summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/arrayRCU.txt20
-rw-r--r--Documentation/RCU/lockdep.txt10
-rw-r--r--Documentation/RCU/rcu_dereference.txt38
-rw-r--r--Documentation/RCU/whatisRCU.txt6
-rw-r--r--Documentation/devicetree/bindings/clock/at91-clock.txt2
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt4
-rw-r--r--Documentation/devicetree/bindings/usb/renesas_usbhs.txt6
-rw-r--r--Documentation/filesystems/Locking4
-rw-r--r--Documentation/filesystems/automount-support.txt51
-rw-r--r--Documentation/filesystems/porting17
-rw-r--r--Documentation/filesystems/vfs.txt22
-rw-r--r--Documentation/i2c/slave-interface6
-rw-r--r--Documentation/kernel-parameters.txt39
-rw-r--r--Documentation/memory-barriers.txt68
-rw-r--r--Documentation/networking/udplite.txt2
-rw-r--r--MAINTAINERS11
-rw-r--r--Makefile7
-rw-r--r--arch/alpha/include/asm/cmpxchg.h2
-rw-r--r--arch/arm/boot/dts/am335x-bone-common.dtsi19
-rw-r--r--arch/arm/boot/dts/am35xx-clocks.dtsi14
-rw-r--r--arch/arm/boot/dts/armada-xp-linksys-mamba.dts5
-rw-r--r--arch/arm/boot/dts/dm816x.dtsi4
-rw-r--r--arch/arm/boot/dts/omap3-n900.dts4
-rw-r--r--arch/arm/include/asm/barrier.h2
-rw-r--r--arch/arm/mach-exynos/suspend.c4
-rw-r--r--arch/arm/mach-omap2/sleep34xx.S22
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8173-evb.dts3
-rw-r--r--arch/arm64/include/asm/barrier.h2
-rw-r--r--arch/avr32/include/asm/cmpxchg.h2
-rw-r--r--arch/blackfin/include/asm/io.h1
-rw-r--r--arch/hexagon/include/asm/cmpxchg.h1
-rw-r--r--arch/ia64/include/asm/barrier.h7
-rw-r--r--arch/ia64/include/uapi/asm/cmpxchg.h2
-rw-r--r--arch/ia64/kernel/smpboot.c3
-rw-r--r--arch/m32r/include/asm/cmpxchg.h2
-rw-r--r--arch/m68k/include/asm/cmpxchg.h1
-rw-r--r--arch/metag/include/asm/barrier.h2
-rw-r--r--arch/metag/include/asm/cmpxchg.h2
-rw-r--r--arch/mips/ath79/setup.c2
-rw-r--r--arch/mips/cobalt/Makefile3
-rw-r--r--arch/mips/include/asm/barrier.h4
-rw-r--r--arch/mips/include/asm/cmpxchg.h2
-rw-r--r--arch/mips/include/asm/pgtable-bits.h14
-rw-r--r--arch/mips/include/asm/switch_to.h2
-rw-r--r--arch/mips/kernel/cpu-probe.c3
-rw-r--r--arch/mips/kernel/irq.c2
-rw-r--r--arch/mips/kvm/emulate.c2
-rw-r--r--arch/mips/loongson/common/Makefile4
-rw-r--r--arch/mips/loongson/loongson-3/smp.c2
-rw-r--r--arch/mips/mm/c-r4k.c2
-rw-r--r--arch/mips/net/bpf_jit.c6
-rw-r--r--arch/mips/ralink/ill_acc.c2
-rw-r--r--arch/parisc/include/asm/cmpxchg.h2
-rw-r--r--arch/powerpc/include/asm/barrier.h3
-rw-r--r--arch/powerpc/include/asm/cmpxchg.h1
-rw-r--r--arch/s390/include/asm/barrier.h2
-rw-r--r--arch/s390/include/asm/cmpxchg.h2
-rw-r--r--arch/s390/net/bpf_jit.h4
-rw-r--r--arch/s390/net/bpf_jit_comp.c11
-rw-r--r--arch/score/include/asm/cmpxchg.h2
-rw-r--r--arch/score/lib/string.S2
-rw-r--r--arch/sh/include/asm/barrier.h2
-rw-r--r--arch/sh/include/asm/cmpxchg.h2
-rw-r--r--arch/sparc/include/asm/barrier_64.h4
-rw-r--r--arch/sparc/include/asm/cmpxchg_32.h1
-rw-r--r--arch/sparc/include/asm/cmpxchg_64.h2
-rw-r--r--arch/tile/include/asm/atomic_64.h3
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/boot/compressed/misc.h11
-rw-r--r--arch/x86/include/asm/barrier.h4
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--arch/x86/include/asm/paravirt.h29
-rw-r--r--arch/x86/include/asm/paravirt_types.h10
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/qspinlock.h57
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h6
-rw-r--r--arch/x86/include/asm/segment.h14
-rw-r--r--arch/x86/include/asm/spinlock.h5
-rw-r--r--arch/x86/include/asm/spinlock_types.h4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event.c174
-rw-r--r--arch/x86/kernel/cpu/perf_event.h47
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c269
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_bts.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_cqm.c108
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c321
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_pt.c74
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h20
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c20
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c6
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S33
-rw-r--r--arch/x86/kernel/head_64.S20
-rw-r--r--arch/x86/kernel/kvm.c43
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c24
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c22
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c22
-rw-r--r--arch/x86/kvm/lapic.c26
-rw-r--r--arch/x86/kvm/mmu.c14
-rw-r--r--arch/x86/um/asm/barrier.h3
-rw-r--r--arch/x86/xen/spinlock.c64
-rw-r--r--block/blk-mq.c8
-rw-r--r--block/genhd.c13
-rw-r--r--drivers/ata/ahci_mvebu.c2
-rw-r--r--drivers/ata/pata_octeon_cf.c2
-rw-r--r--drivers/base/cacheinfo.c2
-rw-r--r--drivers/base/init.c2
-rw-r--r--drivers/block/Kconfig1
-rw-r--r--drivers/block/nvme-core.c10
-rw-r--r--drivers/block/zram/zram_drv.c2
-rw-r--r--drivers/bus/mvebu-mbus.c109
-rw-r--r--drivers/clk/at91/clk-peripheral.c8
-rw-r--r--drivers/clk/at91/clk-pll.c12
-rw-r--r--drivers/clk/at91/pmc.h2
-rw-r--r--drivers/crypto/caam/caamhash.c2
-rw-r--r--drivers/crypto/caam/caamrng.c2
-rw-r--r--drivers/dma/at_xdmac.c231
-rw-r--r--drivers/dma/dmaengine.c6
-rw-r--r--drivers/dma/hsu/hsu.c5
-rw-r--r--drivers/dma/pl330.c3
-rw-r--r--drivers/firmware/iscsi_ibft.c36
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c4
-rw-r--r--drivers/gpu/drm/drm_sysfs.c2
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c5
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c2
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c5
-rw-r--r--drivers/gpu/drm/i915/intel_i2c.c20
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c6
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c14
-rw-r--r--drivers/gpu/drm/i915/intel_sdvo.c2
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_mode.c5
-rw-r--r--drivers/gpu/drm/radeon/atombios_crtc.c7
-rw-r--r--drivers/gpu/drm/radeon/dce3_1_afmt.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c15
-rw-r--r--drivers/gpu/drm/radeon/radeon_dp_mst.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_vm.c17
-rw-r--r--drivers/i2c/busses/i2c-hix5hd2.c2
-rw-r--r--drivers/i2c/busses/i2c-s3c2410.c1
-rw-r--r--drivers/iio/adc/twl6030-gpadc.c2
-rw-r--r--drivers/iio/imu/adis16400.h2
-rw-r--r--drivers/iio/imu/adis16400_buffer.c26
-rw-r--r--drivers/iio/imu/adis16400_core.c41
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c25
-rw-r--r--drivers/input/mouse/alps.c5
-rw-r--r--drivers/input/mouse/elantech.c8
-rw-r--r--drivers/input/mouse/synaptics.c7
-rw-r--r--drivers/iommu/amd_iommu.c1
-rw-r--r--drivers/iommu/intel-iommu.c31
-rw-r--r--drivers/irqchip/irq-mips-gic.c21
-rw-r--r--drivers/irqchip/irq-sunxi-nmi.c2
-rw-r--r--drivers/md/md.c14
-rw-r--r--drivers/md/raid10.c1
-rw-r--r--drivers/md/raid5.c1
-rw-r--r--drivers/media/Kconfig1
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-drv.c2
-rw-r--r--drivers/net/ethernet/broadcom/b44.c2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c12
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_ethtool.c20
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c11
-rw-r--r--drivers/net/ethernet/cisco/enic/vnic_rq.c9
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.c87
-rw-r--r--drivers/net/ethernet/emulex/benet/be_ethtool.c18
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c15
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c9
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c35
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c25
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c10
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c25
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ptp.c4
-rw-r--r--drivers/ntb/ntb_hw.c3
-rw-r--r--drivers/of/base.c8
-rw-r--r--drivers/of/dynamic.c2
-rw-r--r--drivers/pci/setup-bus.c9
-rw-r--r--drivers/phy/Kconfig10
-rw-r--r--drivers/phy/phy-core.c4
-rw-r--r--drivers/phy/phy-omap-usb2.c1
-rw-r--r--drivers/phy/phy-rcar-gen2.c4
-rw-r--r--drivers/soc/mediatek/Kconfig1
-rw-r--r--drivers/soc/mediatek/mtk-pmic-wrap.c54
-rw-r--r--drivers/ssb/driver_chipcommon_pmu.c6
-rw-r--r--drivers/staging/lustre/lustre/include/linux/lustre_compat25.h15
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h6
-rw-r--r--drivers/staging/lustre/lustre/llite/symlink.c26
-rw-r--r--drivers/staging/ozwpan/ozhcd.c8
-rw-r--r--drivers/staging/ozwpan/ozusbif.h4
-rw-r--r--drivers/staging/ozwpan/ozusbsvc1.c19
-rw-r--r--drivers/staging/rtl8712/rtl8712_led.c144
-rw-r--r--drivers/staging/rtl8712/rtl871x_cmd.c2
-rw-r--r--drivers/staging/rtl8712/rtl871x_mlme.c6
-rw-r--r--drivers/staging/rtl8712/rtl871x_pwrctrl.c2
-rw-r--r--drivers/staging/rtl8712/rtl871x_sta_mgt.c2
-rw-r--r--drivers/tty/n_tty.c21
-rw-r--r--drivers/tty/serial/8250/8250_omap.c82
-rw-r--r--drivers/tty/serial/amba-pl011.c16
-rw-r--r--drivers/tty/serial/imx.c8
-rw-r--r--drivers/usb/dwc3/core.h4
-rw-r--r--drivers/usb/gadget/function/f_fs.c15
-rw-r--r--drivers/usb/gadget/function/f_midi.c8
-rw-r--r--drivers/usb/gadget/function/f_uac1.c5
-rw-r--r--drivers/usb/gadget/legacy/g_ffs.c4
-rw-r--r--drivers/usb/gadget/udc/s3c2410_udc.c2
-rw-r--r--drivers/usb/host/xhci.c57
-rw-r--r--drivers/usb/host/xhci.h2
-rw-r--r--drivers/usb/musb/musb_core.c14
-rw-r--r--drivers/usb/phy/phy-ab8500-usb.c6
-rw-r--r--drivers/usb/phy/phy-tahvo.c3
-rw-r--r--drivers/usb/renesas_usbhs/fifo.c38
-rw-r--r--drivers/usb/serial/cp210x.c1
-rw-r--r--drivers/usb/serial/ftdi_sio.c1
-rw-r--r--drivers/usb/serial/ftdi_sio_ids.h1
-rw-r--r--drivers/virtio/virtio_pci_common.c1
-rw-r--r--fs/9p/v9fs.h2
-rw-r--r--fs/9p/vfs_inode.c123
-rw-r--r--fs/9p/vfs_inode_dotl.c39
-rw-r--r--fs/autofs4/symlink.c5
-rw-r--r--fs/befs/linuxvfs.c57
-rw-r--r--fs/ceph/inode.c11
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/link.c28
-rw-r--r--fs/configfs/symlink.c31
-rw-r--r--fs/debugfs/file.c12
-rw-r--r--fs/debugfs/inode.c6
-rw-r--r--fs/ecryptfs/inode.c11
-rw-r--r--fs/exofs/Kbuild2
-rw-r--r--fs/exofs/exofs.h4
-rw-r--r--fs/exofs/inode.c9
-rw-r--r--fs/exofs/namei.c5
-rw-r--r--fs/exofs/symlink.c55
-rw-r--r--fs/ext2/inode.c1
-rw-r--r--fs/ext2/namei.c3
-rw-r--r--fs/ext2/symlink.c10
-rw-r--r--fs/ext3/inode.c1
-rw-r--r--fs/ext3/namei.c3
-rw-r--r--fs/ext3/symlink.c10
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/inode.c7
-rw-r--r--fs/ext4/namei.c11
-rw-r--r--fs/ext4/symlink.c48
-rw-r--r--fs/f2fs/namei.c18
-rw-r--r--fs/fhandle.c5
-rw-r--r--fs/freevxfs/vxfs_extern.h3
-rw-r--r--fs/freevxfs/vxfs_immed.c34
-rw-r--r--fs/freevxfs/vxfs_inode.c7
-rw-r--r--fs/fuse/dir.c22
-rw-r--r--fs/gfs2/inode.c10
-rw-r--r--fs/hostfs/hostfs_kern.c15
-rw-r--r--fs/hppfs/hppfs.c13
-rw-r--r--fs/inode.c31
-rw-r--r--fs/jffs2/dir.c1
-rw-r--r--fs/jffs2/fs.c1
-rw-r--r--fs/jffs2/symlink.c45
-rw-r--r--fs/jfs/inode.c3
-rw-r--r--fs/jfs/namei.c5
-rw-r--r--fs/jfs/symlink.c10
-rw-r--r--fs/kernfs/symlink.c25
-rw-r--r--fs/libfs.c25
-rw-r--r--fs/logfs/dir.c1
-rw-r--r--fs/mount.h1
-rw-r--r--fs/namei.c1453
-rw-r--r--fs/namespace.c27
-rw-r--r--fs/nfs/symlink.c19
-rw-r--r--fs/ntfs/namei.c2
-rw-r--r--fs/open.c2
-rw-r--r--fs/overlayfs/inode.c35
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/inode.c9
-rw-r--r--fs/proc/namespaces.c4
-rw-r--r--fs/proc/self.c24
-rw-r--r--fs/proc/thread_self.c22
-rw-r--r--fs/select.c6
-rw-r--r--fs/sysv/Makefile2
-rw-r--r--fs/sysv/inode.c5
-rw-r--r--fs/sysv/symlink.c20
-rw-r--r--fs/sysv/sysv.h1
-rw-r--r--fs/ubifs/dir.c1
-rw-r--r--fs/ubifs/file.c11
-rw-r--r--fs/ubifs/super.c1
-rw-r--r--fs/ufs/inode.c5
-rw-r--r--fs/ufs/namei.c3
-rw-r--r--fs/ufs/symlink.c13
-rw-r--r--fs/xfs/xfs_iops.c11
-rw-r--r--include/asm-generic/barrier.h4
-rw-r--r--include/asm-generic/cmpxchg.h3
-rw-r--r--include/asm-generic/qspinlock.h139
-rw-r--r--include/asm-generic/qspinlock_types.h79
-rw-r--r--include/linux/backing-dev.h1
-rw-r--r--include/linux/compiler.h20
-rw-r--r--include/linux/debugfs.h1
-rw-r--r--include/linux/fs.h16
-rw-r--r--include/linux/intel-iommu.h3
-rw-r--r--include/linux/lockdep.h4
-rw-r--r--include/linux/namei.h41
-rw-r--r--include/linux/of.h6
-rw-r--r--include/linux/osq_lock.h5
-rw-r--r--include/linux/perf_event.h46
-rw-r--r--include/linux/rculist.h10
-rw-r--r--include/linux/rcupdate.h70
-rw-r--r--include/linux/rcutiny.h16
-rw-r--r--include/linux/rcutree.h7
-rw-r--r--include/linux/sched.h18
-rw-r--r--include/linux/security.h13
-rw-r--r--include/linux/spinlock.h2
-rw-r--r--include/sound/hda_regmap.h2
-rw-r--r--include/trace/events/writeback.h1
-rw-r--r--include/uapi/drm/radeon_drm.h1
-rw-r--r--include/uapi/linux/perf_event.h18
-rw-r--r--init/Kconfig74
-rw-r--r--kernel/Kconfig.locks13
-rw-r--r--kernel/compat.c6
-rw-r--r--kernel/cpu.c4
-rw-r--r--kernel/events/core.c56
-rw-r--r--kernel/events/internal.h9
-rw-r--r--kernel/events/ring_buffer.c16
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/locking/Makefile3
-rw-r--r--kernel/locking/lockdep.c6
-rw-r--r--kernel/locking/lockdep_proc.c22
-rw-r--r--kernel/locking/locktorture.c14
-rw-r--r--kernel/locking/mcs_spinlock.h1
-rw-r--r--kernel/locking/qrwlock.c30
-rw-r--r--kernel/locking/qspinlock.c473
-rw-r--r--kernel/locking/qspinlock_paravirt.h325
-rw-r--r--kernel/locking/rtmutex.c13
-rw-r--r--kernel/locking/rwsem-xadd.c44
-rw-r--r--kernel/rcu/rcutorture.c103
-rw-r--r--kernel/rcu/srcu.c10
-rw-r--r--kernel/rcu/tiny.c38
-rw-r--r--kernel/rcu/tiny_plugin.h12
-rw-r--r--kernel/rcu/tree.c365
-rw-r--r--kernel/rcu/tree.h35
-rw-r--r--kernel/rcu/tree_plugin.h216
-rw-r--r--kernel/rcu/tree_trace.c6
-rw-r--r--kernel/rcu/update.c30
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/wait.c4
-rw-r--r--kernel/torture.c26
-rw-r--r--kernel/trace/ring_buffer_benchmark.c2
-rw-r--r--kernel/trace/trace_events_filter.c11
-rw-r--r--lib/Kconfig.debug66
-rw-r--r--lib/cpumask.c9
-rw-r--r--lib/mpi/longlong.h4
-rw-r--r--lib/rhashtable.c1
-rw-r--r--lib/strnlen_user.c12
-rw-r--r--lib/swiotlb.c5
-rw-r--r--mm/backing-dev.c18
-rw-r--r--mm/memcontrol.c6
-rw-r--r--mm/memory_hotplug.c4
-rw-r--r--mm/shmem.c40
-rw-r--r--mm/zsmalloc.c3
-rw-r--r--net/bridge/br_fdb.c2
-rw-r--r--net/bridge/br_multicast.c7
-rw-r--r--net/core/dev.c11
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/sock.c15
-rw-r--r--net/ipv4/udp.c18
-rw-r--r--net/ipv6/addrconf_core.c11
-rw-r--r--net/mpls/af_mpls.c13
-rw-r--r--net/mpls/internal.h1
-rw-r--r--net/openvswitch/vport-netdev.c1
-rw-r--r--net/sctp/auth.c11
-rw-r--r--net/tipc/socket.c16
-rw-r--r--net/wireless/wext-compat.c2
-rwxr-xr-xscripts/checkpatch.pl4
-rw-r--r--security/capability.c4
-rw-r--r--security/security.c7
-rw-r--r--security/selinux/avc.c18
-rw-r--r--security/selinux/hooks.c18
-rw-r--r--security/selinux/include/avc.h9
-rw-r--r--sound/hda/hdac_regmap.c8
-rw-r--r--sound/mips/Kconfig2
-rw-r--r--sound/pci/hda/hda_codec.c27
-rw-r--r--sound/pci/hda/hda_intel.c21
-rw-r--r--sound/pci/hda/hda_local.h4
-rw-r--r--sound/pci/hda/patch_realtek.c26
-rw-r--r--sound/pci/hda/patch_sigmatel.c25
-rw-r--r--sound/pci/hda/patch_via.c10
-rw-r--r--sound/usb/mixer.c10
-rw-r--r--sound/usb/mixer_maps.c5
-rw-r--r--sound/usb/quirks.c6
-rw-r--r--tools/Makefile20
-rw-r--r--tools/arch/alpha/include/asm/barrier.h8
-rw-r--r--tools/arch/arm/include/asm/barrier.h12
-rw-r--r--tools/arch/arm64/include/asm/barrier.h16
-rw-r--r--tools/arch/ia64/include/asm/barrier.h48
-rw-r--r--tools/arch/mips/include/asm/barrier.h20
-rw-r--r--tools/arch/powerpc/include/asm/barrier.h29
-rw-r--r--tools/arch/s390/include/asm/barrier.h30
-rw-r--r--tools/arch/sh/include/asm/barrier.h32
-rw-r--r--tools/arch/sparc/include/asm/barrier.h8
-rw-r--r--tools/arch/sparc/include/asm/barrier_32.h6
-rw-r--r--tools/arch/sparc/include/asm/barrier_64.h42
-rw-r--r--tools/arch/tile/include/asm/barrier.h15
-rw-r--r--tools/arch/x86/include/asm/atomic.h65
-rw-r--r--tools/arch/x86/include/asm/barrier.h28
-rw-r--r--tools/arch/x86/include/asm/rmwcc.h41
-rw-r--r--tools/arch/xtensa/include/asm/barrier.h18
-rw-r--r--tools/build/Makefile.build10
-rw-r--r--tools/build/Makefile.feature4
-rw-r--r--tools/build/tests/ex/Build1
-rw-r--r--tools/build/tests/ex/empty2/README2
-rw-r--r--tools/include/asm-generic/atomic-gcc.h63
-rw-r--r--tools/include/asm-generic/barrier.h44
-rw-r--r--tools/include/asm/atomic.h10
-rw-r--r--tools/include/asm/barrier.h27
-rw-r--r--tools/include/linux/atomic.h6
-rw-r--r--tools/include/linux/compiler.h4
-rw-r--r--tools/include/linux/kernel.h (renamed from tools/perf/util/include/linux/kernel.h)4
-rw-r--r--tools/include/linux/list.h (renamed from tools/perf/util/include/linux/list.h)6
-rw-r--r--tools/include/linux/poison.h1
-rw-r--r--tools/include/linux/types.h8
-rw-r--r--tools/lib/traceevent/.gitignore1
-rw-r--r--tools/lib/traceevent/Makefile34
-rw-r--r--tools/lib/traceevent/event-parse.c6
-rw-r--r--tools/lib/traceevent/event-parse.h1
-rw-r--r--tools/lib/traceevent/plugin_cfg80211.c13
-rw-r--r--tools/perf/.gitignore1
-rw-r--r--tools/perf/Documentation/callchain-overhead-calculation.txt108
-rw-r--r--tools/perf/Documentation/perf-bench.txt3
-rw-r--r--tools/perf/Documentation/perf-inject.txt27
-rw-r--r--tools/perf/Documentation/perf-kmem.txt11
-rw-r--r--tools/perf/Documentation/perf-kvm.txt6
-rw-r--r--tools/perf/Documentation/perf-probe.txt17
-rw-r--r--tools/perf/Documentation/perf-record.txt27
-rw-r--r--tools/perf/Documentation/perf-report.txt35
-rw-r--r--tools/perf/Documentation/perf-script.txt37
-rw-r--r--tools/perf/Documentation/perf-top.txt9
-rw-r--r--tools/perf/Documentation/perf-trace.txt7
-rw-r--r--tools/perf/MANIFEST26
-rw-r--r--tools/perf/Makefile.perf30
-rw-r--r--tools/perf/arch/arm64/Build1
-rw-r--r--tools/perf/arch/arm64/include/perf_regs.h3
-rw-r--r--tools/perf/arch/arm64/tests/Build2
-rw-r--r--tools/perf/arch/arm64/tests/dwarf-unwind.c61
-rw-r--r--tools/perf/arch/arm64/tests/regs_load.S46
-rw-r--r--tools/perf/arch/common.c2
-rw-r--r--tools/perf/arch/powerpc/util/Build1
-rw-r--r--tools/perf/arch/powerpc/util/sym-handling.c82
-rw-r--r--tools/perf/bench/Build1
-rw-r--r--tools/perf/bench/bench.h2
-rw-r--r--tools/perf/bench/futex-wake-parallel.c294
-rw-r--r--tools/perf/bench/futex-wake.c7
-rw-r--r--tools/perf/bench/numa.c33
-rw-r--r--tools/perf/builtin-annotate.c19
-rw-r--r--tools/perf/builtin-bench.c1
-rw-r--r--tools/perf/builtin-buildid-list.c9
-rw-r--r--tools/perf/builtin-diff.c9
-rw-r--r--tools/perf/builtin-inject.c175
-rw-r--r--tools/perf/builtin-kmem.c988
-rw-r--r--tools/perf/builtin-kvm.c11
-rw-r--r--tools/perf/builtin-lock.c8
-rw-r--r--tools/perf/builtin-mem.c5
-rw-r--r--tools/perf/builtin-probe.c195
-rw-r--r--tools/perf/builtin-record.c382
-rw-r--r--tools/perf/builtin-report.c47
-rw-r--r--tools/perf/builtin-sched.c159
-rw-r--r--tools/perf/builtin-script.c94
-rw-r--r--tools/perf/builtin-stat.c490
-rw-r--r--tools/perf/builtin-timechart.c9
-rw-r--r--tools/perf/builtin-top.c72
-rw-r--r--tools/perf/builtin-trace.c139
-rw-r--r--tools/perf/config/Makefile13
-rw-r--r--tools/perf/config/utilities.mak19
-rw-r--r--tools/perf/perf-sys.h73
-rw-r--r--tools/perf/perf.h6
-rw-r--r--tools/perf/tests/Build8
-rw-r--r--tools/perf/tests/builtin-test.c16
-rw-r--r--tools/perf/tests/code-reading.c26
-rw-r--r--tools/perf/tests/dso-data.c15
-rw-r--r--tools/perf/tests/dwarf-unwind.c3
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c4
-rw-r--r--tools/perf/tests/hists_common.c10
-rw-r--r--tools/perf/tests/hists_cumulate.c10
-rw-r--r--tools/perf/tests/hists_filter.c12
-rw-r--r--tools/perf/tests/hists_link.c12
-rw-r--r--tools/perf/tests/hists_output.c10
-rw-r--r--tools/perf/tests/keep-tracking.c4
-rw-r--r--tools/perf/tests/kmod-path.c72
-rw-r--r--tools/perf/tests/make18
-rw-r--r--tools/perf/tests/mmap-basic.c6
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c6
-rw-r--r--tools/perf/tests/openat-syscall-all-cpus.c (renamed from tools/perf/tests/open-syscall-all-cpus.c)13
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c (renamed from tools/perf/tests/open-syscall-tp-fields.c)6
-rw-r--r--tools/perf/tests/openat-syscall.c (renamed from tools/perf/tests/open-syscall.c)14
-rw-r--r--tools/perf/tests/parse-events.c14
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c2
-rw-r--r--tools/perf/tests/pmu.c3
-rw-r--r--tools/perf/tests/switch-tracking.c8
-rw-r--r--tools/perf/tests/tests.h17
-rw-r--r--tools/perf/tests/thread-mg-share.c41
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c34
-rw-r--r--tools/perf/ui/browsers/annotate.c64
-rw-r--r--tools/perf/ui/browsers/hists.c646
-rw-r--r--tools/perf/ui/tui/setup.c2
-rw-r--r--tools/perf/util/Build13
-rw-r--r--tools/perf/util/annotate.c73
-rw-r--r--tools/perf/util/annotate.h7
-rw-r--r--tools/perf/util/auxtrace.c1352
-rw-r--r--tools/perf/util/auxtrace.h643
-rw-r--r--tools/perf/util/build-id.c64
-rw-r--r--tools/perf/util/cache.h1
-rw-r--r--tools/perf/util/callchain.h4
-rw-r--r--tools/perf/util/cgroup.c10
-rw-r--r--tools/perf/util/cgroup.h4
-rw-r--r--tools/perf/util/comm.c13
-rw-r--r--tools/perf/util/data-convert-bt.c410
-rw-r--r--tools/perf/util/db-export.c31
-rw-r--r--tools/perf/util/dso.c334
-rw-r--r--tools/perf/util/dso.h47
-rw-r--r--tools/perf/util/dwarf-aux.c220
-rw-r--r--tools/perf/util/dwarf-aux.h13
-rw-r--r--tools/perf/util/environment.c1
-rw-r--r--tools/perf/util/event.c119
-rw-r--r--tools/perf/util/event.h99
-rw-r--r--tools/perf/util/evlist.c103
-rw-r--r--tools/perf/util/evlist.h13
-rw-r--r--tools/perf/util/evsel.c26
-rw-r--r--tools/perf/util/evsel.h7
-rw-r--r--tools/perf/util/header.c52
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/hist.c63
-rw-r--r--tools/perf/util/hist.h1
-rw-r--r--tools/perf/util/include/linux/poison.h1
-rw-r--r--tools/perf/util/include/linux/rbtree.h14
-rw-r--r--tools/perf/util/machine.c281
-rw-r--r--tools/perf/util/machine.h32
-rw-r--r--tools/perf/util/map.c267
-rw-r--r--tools/perf/util/map.h52
-rw-r--r--tools/perf/util/pager.c5
-rw-r--r--tools/perf/util/parse-branch-options.c94
-rw-r--r--tools/perf/util/parse-branch-options.h5
-rw-r--r--tools/perf/util/parse-events.c196
-rw-r--r--tools/perf/util/parse-events.h42
-rw-r--r--tools/perf/util/parse-events.l41
-rw-r--r--tools/perf/util/parse-events.y50
-rw-r--r--tools/perf/util/parse-options.h4
-rw-r--r--tools/perf/util/pmu.c72
-rw-r--r--tools/perf/util/pmu.h6
-rw-r--r--tools/perf/util/probe-event.c662
-rw-r--r--tools/perf/util/probe-event.h26
-rw-r--r--tools/perf/util/probe-finder.c165
-rw-r--r--tools/perf/util/probe-finder.h10
-rw-r--r--tools/perf/util/pstack.c7
-rw-r--r--tools/perf/util/pstack.h1
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/record.c15
-rw-r--r--tools/perf/util/session.c288
-rw-r--r--tools/perf/util/session.h6
-rw-r--r--tools/perf/util/sort.c12
-rw-r--r--tools/perf/util/sort.h38
-rw-r--r--tools/perf/util/stat-shadow.c434
-rw-r--r--tools/perf/util/stat.c71
-rw-r--r--tools/perf/util/stat.h46
-rw-r--r--tools/perf/util/strfilter.c107
-rw-r--r--tools/perf/util/strfilter.h35
-rw-r--r--tools/perf/util/symbol-elf.c23
-rw-r--r--tools/perf/util/symbol.c117
-rw-r--r--tools/perf/util/symbol.h12
-rw-r--r--tools/perf/util/thread-stack.c18
-rw-r--r--tools/perf/util/thread-stack.h1
-rw-r--r--tools/perf/util/thread.c12
-rw-r--r--tools/perf/util/thread.h5
-rw-r--r--tools/perf/util/thread_map.c24
-rw-r--r--tools/perf/util/tool.h13
-rw-r--r--tools/perf/util/trace-event-parse.c2
-rw-r--r--tools/perf/util/unwind-libunwind.c11
-rw-r--r--tools/perf/util/util.c121
-rw-r--r--tools/perf/util/util.h6
-rw-r--r--tools/perf/util/vdso.c60
-rw-r--r--tools/perf/util/vdso.h4
-rw-r--r--tools/perf/util/xyarray.c8
-rw-r--r--tools/perf/util/xyarray.h2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh25
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS015
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS021
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS032
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY022
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE011
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE022
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE02-T1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE038
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE048
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE054
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE064
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE074
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE086
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08-T1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE091
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt36
-rw-r--r--tools/testing/selftests/x86/Makefile6
-rw-r--r--tools/testing/selftests/x86/entry_from_vm86.c114
606 files changed, 16068 insertions, 6145 deletions
diff --git a/Documentation/RCU/arrayRCU.txt b/Documentation/RCU/arrayRCU.txt
index 453ebe6953ee..f05a9afb2c39 100644
--- a/Documentation/RCU/arrayRCU.txt
+++ b/Documentation/RCU/arrayRCU.txt
@@ -10,7 +10,19 @@ also be used to protect arrays. Three situations are as follows:
10 10
113. Resizeable Arrays 113. Resizeable Arrays
12 12
13Each of these situations are discussed below. 13Each of these three situations involves an RCU-protected pointer to an
14array that is separately indexed. It might be tempting to consider use
15of RCU to instead protect the index into an array, however, this use
16case is -not- supported. The problem with RCU-protected indexes into
17arrays is that compilers can play way too many optimization games with
18integers, which means that the rules governing handling of these indexes
19are far more trouble than they are worth. If RCU-protected indexes into
20arrays prove to be particularly valuable (which they have not thus far),
21explicit cooperation from the compiler will be required to permit them
22to be safely used.
23
24That aside, each of the three RCU-protected pointer situations are
25described in the following sections.
14 26
15 27
16Situation 1: Hash Tables 28Situation 1: Hash Tables
@@ -36,9 +48,9 @@ Quick Quiz: Why is it so important that updates be rare when
36Situation 3: Resizeable Arrays 48Situation 3: Resizeable Arrays
37 49
38Use of RCU for resizeable arrays is demonstrated by the grow_ary() 50Use of RCU for resizeable arrays is demonstrated by the grow_ary()
39function used by the System V IPC code. The array is used to map from 51function formerly used by the System V IPC code. The array is used
40semaphore, message-queue, and shared-memory IDs to the data structure 52to map from semaphore, message-queue, and shared-memory IDs to the data
41that represents the corresponding IPC construct. The grow_ary() 53structure that represents the corresponding IPC construct. The grow_ary()
42function does not acquire any locks; instead its caller must hold the 54function does not acquire any locks; instead its caller must hold the
43ids->sem semaphore. 55ids->sem semaphore.
44 56
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt
index cd83d2348fef..da51d3068850 100644
--- a/Documentation/RCU/lockdep.txt
+++ b/Documentation/RCU/lockdep.txt
@@ -47,11 +47,6 @@ checking of rcu_dereference() primitives:
47 Use explicit check expression "c" along with 47 Use explicit check expression "c" along with
48 srcu_read_lock_held()(). This is useful in code that 48 srcu_read_lock_held()(). This is useful in code that
49 is invoked by both SRCU readers and updaters. 49 is invoked by both SRCU readers and updaters.
50 rcu_dereference_index_check(p, c):
51 Use explicit check expression "c", but the caller
52 must supply one of the rcu_read_lock_held() functions.
53 This is useful in code that uses RCU-protected arrays
54 that is invoked by both RCU readers and updaters.
55 rcu_dereference_raw(p): 50 rcu_dereference_raw(p):
56 Don't check. (Use sparingly, if at all.) 51 Don't check. (Use sparingly, if at all.)
57 rcu_dereference_protected(p, c): 52 rcu_dereference_protected(p, c):
@@ -64,11 +59,6 @@ checking of rcu_dereference() primitives:
64 but retain the compiler constraints that prevent duplicating 59 but retain the compiler constraints that prevent duplicating
65 or coalescsing. This is useful when when testing the 60 or coalescsing. This is useful when when testing the
66 value of the pointer itself, for example, against NULL. 61 value of the pointer itself, for example, against NULL.
67 rcu_access_index(idx):
68 Return the value of the index and omit all barriers, but
69 retain the compiler constraints that prevent duplicating
70 or coalescsing. This is useful when when testing the
71 value of the index itself, for example, against -1.
72 62
73The rcu_dereference_check() check expression can be any boolean 63The rcu_dereference_check() check expression can be any boolean
74expression, but would normally include a lockdep expression. However, 64expression, but would normally include a lockdep expression. However,
diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt
index ceb05da5a5ac..1e6c0da994f5 100644
--- a/Documentation/RCU/rcu_dereference.txt
+++ b/Documentation/RCU/rcu_dereference.txt
@@ -25,17 +25,6 @@ o You must use one of the rcu_dereference() family of primitives
25 for an example where the compiler can in fact deduce the exact 25 for an example where the compiler can in fact deduce the exact
26 value of the pointer, and thus cause misordering. 26 value of the pointer, and thus cause misordering.
27 27
28o Do not use single-element RCU-protected arrays. The compiler
29 is within its right to assume that the value of an index into
30 such an array must necessarily evaluate to zero. The compiler
31 could then substitute the constant zero for the computation, so
32 that the array index no longer depended on the value returned
33 by rcu_dereference(). If the array index no longer depends
34 on rcu_dereference(), then both the compiler and the CPU
35 are within their rights to order the array access before the
36 rcu_dereference(), which can cause the array access to return
37 garbage.
38
39o Avoid cancellation when using the "+" and "-" infix arithmetic 28o Avoid cancellation when using the "+" and "-" infix arithmetic
40 operators. For example, for a given variable "x", avoid 29 operators. For example, for a given variable "x", avoid
41 "(x-x)". There are similar arithmetic pitfalls from other 30 "(x-x)". There are similar arithmetic pitfalls from other
@@ -76,14 +65,15 @@ o Do not use the results from the boolean "&&" and "||" when
76 dereferencing. For example, the following (rather improbable) 65 dereferencing. For example, the following (rather improbable)
77 code is buggy: 66 code is buggy:
78 67
79 int a[2]; 68 int *p;
80 int index; 69 int *q;
81 int force_zero_index = 1;
82 70
83 ... 71 ...
84 72
85 r1 = rcu_dereference(i1) 73 p = rcu_dereference(gp)
86 r2 = a[r1 && force_zero_index]; /* BUGGY!!! */ 74 q = &global_q;
75 q += p != &oom_p1 && p != &oom_p2;
76 r1 = *q; /* BUGGY!!! */
87 77
88 The reason this is buggy is that "&&" and "||" are often compiled 78 The reason this is buggy is that "&&" and "||" are often compiled
89 using branches. While weak-memory machines such as ARM or PowerPC 79 using branches. While weak-memory machines such as ARM or PowerPC
@@ -94,14 +84,15 @@ o Do not use the results from relational operators ("==", "!=",
94 ">", ">=", "<", or "<=") when dereferencing. For example, 84 ">", ">=", "<", or "<=") when dereferencing. For example,
95 the following (quite strange) code is buggy: 85 the following (quite strange) code is buggy:
96 86
97 int a[2]; 87 int *p;
98 int index; 88 int *q;
99 int flip_index = 0;
100 89
101 ... 90 ...
102 91
103 r1 = rcu_dereference(i1) 92 p = rcu_dereference(gp)
104 r2 = a[r1 != flip_index]; /* BUGGY!!! */ 93 q = &global_q;
94 q += p > &oom_p;
95 r1 = *q; /* BUGGY!!! */
105 96
106 As before, the reason this is buggy is that relational operators 97 As before, the reason this is buggy is that relational operators
107 are often compiled using branches. And as before, although 98 are often compiled using branches. And as before, although
@@ -193,6 +184,11 @@ o Be very careful about comparing pointers obtained from
193 pointer. Note that the volatile cast in rcu_dereference() 184 pointer. Note that the volatile cast in rcu_dereference()
194 will normally prevent the compiler from knowing too much. 185 will normally prevent the compiler from knowing too much.
195 186
187 However, please note that if the compiler knows that the
188 pointer takes on only one of two values, a not-equal
189 comparison will provide exactly the information that the
190 compiler needs to deduce the value of the pointer.
191
196o Disable any value-speculation optimizations that your compiler 192o Disable any value-speculation optimizations that your compiler
197 might provide, especially if you are making use of feedback-based 193 might provide, especially if you are making use of feedback-based
198 optimizations that take data collected from prior runs. Such 194 optimizations that take data collected from prior runs. Such
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 88dfce182f66..5746b0c77f3e 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -256,7 +256,9 @@ rcu_dereference()
256 If you are going to be fetching multiple fields from the 256 If you are going to be fetching multiple fields from the
257 RCU-protected structure, using the local variable is of 257 RCU-protected structure, using the local variable is of
258 course preferred. Repeated rcu_dereference() calls look 258 course preferred. Repeated rcu_dereference() calls look
259 ugly and incur unnecessary overhead on Alpha CPUs. 259 ugly, do not guarantee that the same pointer will be returned
260 if an update happened while in the critical section, and incur
261 unnecessary overhead on Alpha CPUs.
260 262
261 Note that the value returned by rcu_dereference() is valid 263 Note that the value returned by rcu_dereference() is valid
262 only within the enclosing RCU read-side critical section. 264 only within the enclosing RCU read-side critical section.
@@ -879,9 +881,7 @@ SRCU: Initialization/cleanup
879 881
880All: lockdep-checked RCU-protected pointer access 882All: lockdep-checked RCU-protected pointer access
881 883
882 rcu_access_index
883 rcu_access_pointer 884 rcu_access_pointer
884 rcu_dereference_index_check
885 rcu_dereference_raw 885 rcu_dereference_raw
886 rcu_lockdep_assert 886 rcu_lockdep_assert
887 rcu_sleep_check 887 rcu_sleep_check
diff --git a/Documentation/devicetree/bindings/clock/at91-clock.txt b/Documentation/devicetree/bindings/clock/at91-clock.txt
index 7a4d4926f44e..5ba6450693b9 100644
--- a/Documentation/devicetree/bindings/clock/at91-clock.txt
+++ b/Documentation/devicetree/bindings/clock/at91-clock.txt
@@ -248,7 +248,7 @@ Required properties for peripheral clocks:
248- #address-cells : shall be 1 (reg is used to encode clk id). 248- #address-cells : shall be 1 (reg is used to encode clk id).
249- clocks : shall be the master clock phandle. 249- clocks : shall be the master clock phandle.
250 e.g. clocks = <&mck>; 250 e.g. clocks = <&mck>;
251- name: device tree node describing a specific system clock. 251- name: device tree node describing a specific peripheral clock.
252 * #clock-cells : from common clock binding; shall be set to 0. 252 * #clock-cells : from common clock binding; shall be set to 0.
253 * reg: peripheral id. See Atmel's datasheets to get a full 253 * reg: peripheral id. See Atmel's datasheets to get a full
254 list of peripheral ids. 254 list of peripheral ids.
diff --git a/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt b/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt
index 4b641c7bf1c2..09089a6d69ed 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt
@@ -32,8 +32,8 @@ Example:
32 touchscreen-fuzz-x = <4>; 32 touchscreen-fuzz-x = <4>;
33 touchscreen-fuzz-y = <7>; 33 touchscreen-fuzz-y = <7>;
34 touchscreen-fuzz-pressure = <2>; 34 touchscreen-fuzz-pressure = <2>;
35 touchscreen-max-x = <4096>; 35 touchscreen-size-x = <4096>;
36 touchscreen-max-y = <4096>; 36 touchscreen-size-y = <4096>;
37 touchscreen-max-pressure = <2048>; 37 touchscreen-max-pressure = <2048>;
38 38
39 ti,x-plate-ohms = <280>; 39 ti,x-plate-ohms = <280>;
diff --git a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
index dc2a18f0b3a1..ddbe304beb21 100644
--- a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
+++ b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
@@ -15,10 +15,8 @@ Optional properties:
15 - phys: phandle + phy specifier pair 15 - phys: phandle + phy specifier pair
16 - phy-names: must be "usb" 16 - phy-names: must be "usb"
17 - dmas: Must contain a list of references to DMA specifiers. 17 - dmas: Must contain a list of references to DMA specifiers.
18 - dma-names : Must contain a list of DMA names: 18 - dma-names : named "ch%d", where %d is the channel number ranging from zero
19 - tx0 ... tx<n> 19 to the number of channels (DnFIFOs) minus one.
20 - rx0 ... rx<n>
21 - This <n> means DnFIFO in USBHS module.
22 20
23Example: 21Example:
24 usbhs: usb@e6590000 { 22 usbhs: usb@e6590000 {
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 0a926e2ba3ab..6a34a0f4d37c 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -50,8 +50,8 @@ prototypes:
50 int (*rename2) (struct inode *, struct dentry *, 50 int (*rename2) (struct inode *, struct dentry *,
51 struct inode *, struct dentry *, unsigned int); 51 struct inode *, struct dentry *, unsigned int);
52 int (*readlink) (struct dentry *, char __user *,int); 52 int (*readlink) (struct dentry *, char __user *,int);
53 void * (*follow_link) (struct dentry *, struct nameidata *); 53 const char *(*follow_link) (struct dentry *, void **);
54 void (*put_link) (struct dentry *, struct nameidata *, void *); 54 void (*put_link) (struct inode *, void *);
55 void (*truncate) (struct inode *); 55 void (*truncate) (struct inode *);
56 int (*permission) (struct inode *, int, unsigned int); 56 int (*permission) (struct inode *, int, unsigned int);
57 int (*get_acl)(struct inode *, int); 57 int (*get_acl)(struct inode *, int);
diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt
index 7cac200e2a85..7eb762eb3136 100644
--- a/Documentation/filesystems/automount-support.txt
+++ b/Documentation/filesystems/automount-support.txt
@@ -1,41 +1,15 @@
1Support is available for filesystems that wish to do automounting support (such 1Support is available for filesystems that wish to do automounting
2as kAFS which can be found in fs/afs/). This facility includes allowing 2support (such as kAFS which can be found in fs/afs/ and NFS in
3in-kernel mounts to be performed and mountpoint degradation to be 3fs/nfs/). This facility includes allowing in-kernel mounts to be
4requested. The latter can also be requested by userspace. 4performed and mountpoint degradation to be requested. The latter can
5also be requested by userspace.
5 6
6 7
7====================== 8======================
8IN-KERNEL AUTOMOUNTING 9IN-KERNEL AUTOMOUNTING
9====================== 10======================
10 11
11A filesystem can now mount another filesystem on one of its directories by the 12See section "Mount Traps" of Documentation/filesystems/autofs4.txt
12following procedure:
13
14 (1) Give the directory a follow_link() operation.
15
16 When the directory is accessed, the follow_link op will be called, and
17 it will be provided with the location of the mountpoint in the nameidata
18 structure (vfsmount and dentry).
19
20 (2) Have the follow_link() op do the following steps:
21
22 (a) Call vfs_kern_mount() to call the appropriate filesystem to set up a
23 superblock and gain a vfsmount structure representing it.
24
25 (b) Copy the nameidata provided as an argument and substitute the dentry
26 argument into it the copy.
27
28 (c) Call do_add_mount() to install the new vfsmount into the namespace's
29 mountpoint tree, thus making it accessible to userspace. Use the
30 nameidata set up in (b) as the destination.
31
32 If the mountpoint will be automatically expired, then do_add_mount()
33 should also be given the location of an expiration list (see further
34 down).
35
36 (d) Release the path in the nameidata argument and substitute in the new
37 vfsmount and its root dentry. The ref counts on these will need
38 incrementing.
39 13
40Then from userspace, you can just do something like: 14Then from userspace, you can just do something like:
41 15
@@ -61,17 +35,18 @@ AUTOMATIC MOUNTPOINT EXPIRY
61=========================== 35===========================
62 36
63Automatic expiration of mountpoints is easy, provided you've mounted the 37Automatic expiration of mountpoints is easy, provided you've mounted the
64mountpoint to be expired in the automounting procedure outlined above. 38mountpoint to be expired in the automounting procedure outlined separately.
65 39
66To do expiration, you need to follow these steps: 40To do expiration, you need to follow these steps:
67 41
68 (3) Create at least one list off which the vfsmounts to be expired can be 42 (1) Create at least one list off which the vfsmounts to be expired can be
69 hung. Access to this list will be governed by the vfsmount_lock. 43 hung.
70 44
71 (4) In step (2c) above, the call to do_add_mount() should be provided with a 45 (2) When a new mountpoint is created in the ->d_automount method, add
72 pointer to this list. It will hang the vfsmount off of it if it succeeds. 46 the mnt to the list using mnt_set_expiry()
47 mnt_set_expiry(newmnt, &afs_vfsmounts);
73 48
74 (5) When you want mountpoints to be expired, call mark_mounts_for_expiry() 49 (3) When you want mountpoints to be expired, call mark_mounts_for_expiry()
75 with a pointer to this list. This will process the list, marking every 50 with a pointer to this list. This will process the list, marking every
76 vfsmount thereon for potential expiry on the next call. 51 vfsmount thereon for potential expiry on the next call.
77 52
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index e69274de8d0c..3eae250254d5 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -483,3 +483,20 @@ in your dentry operations instead.
483-- 483--
484[mandatory] 484[mandatory]
485 ->aio_read/->aio_write are gone. Use ->read_iter/->write_iter. 485 ->aio_read/->aio_write are gone. Use ->read_iter/->write_iter.
486---
487[recommended]
488 for embedded ("fast") symlinks just set inode->i_link to wherever the
489 symlink body is and use simple_follow_link() as ->follow_link().
490--
491[mandatory]
492 calling conventions for ->follow_link() have changed. Instead of returning
493 cookie and using nd_set_link() to store the body to traverse, we return
494 the body to traverse and store the cookie using explicit void ** argument.
495 nameidata isn't passed at all - nd_jump_link() doesn't need it and
496 nd_[gs]et_link() is gone.
497--
498[mandatory]
499 calling conventions for ->put_link() have changed. It gets inode instead of
500 dentry, it does not get nameidata at all and it gets called only when cookie
501 is non-NULL. Note that link body isn't available anymore, so if you need it,
502 store it as cookie.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 5d833b32bbcd..b403b29ef710 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -350,8 +350,8 @@ struct inode_operations {
350 int (*rename2) (struct inode *, struct dentry *, 350 int (*rename2) (struct inode *, struct dentry *,
351 struct inode *, struct dentry *, unsigned int); 351 struct inode *, struct dentry *, unsigned int);
352 int (*readlink) (struct dentry *, char __user *,int); 352 int (*readlink) (struct dentry *, char __user *,int);
353 void * (*follow_link) (struct dentry *, struct nameidata *); 353 const char *(*follow_link) (struct dentry *, void **);
354 void (*put_link) (struct dentry *, struct nameidata *, void *); 354 void (*put_link) (struct inode *, void *);
355 int (*permission) (struct inode *, int); 355 int (*permission) (struct inode *, int);
356 int (*get_acl)(struct inode *, int); 356 int (*get_acl)(struct inode *, int);
357 int (*setattr) (struct dentry *, struct iattr *); 357 int (*setattr) (struct dentry *, struct iattr *);
@@ -436,16 +436,18 @@ otherwise noted.
436 436
437 follow_link: called by the VFS to follow a symbolic link to the 437 follow_link: called by the VFS to follow a symbolic link to the
438 inode it points to. Only required if you want to support 438 inode it points to. Only required if you want to support
439 symbolic links. This method returns a void pointer cookie 439 symbolic links. This method returns the symlink body
440 that is passed to put_link(). 440 to traverse (and possibly resets the current position with
441 nd_jump_link()). If the body won't go away until the inode
442 is gone, nothing else is needed; if it needs to be otherwise
443 pinned, the data needed to release whatever we'd grabbed
444 is to be stored in void * variable passed by address to
445 follow_link() instance.
441 446
442 put_link: called by the VFS to release resources allocated by 447 put_link: called by the VFS to release resources allocated by
443 follow_link(). The cookie returned by follow_link() is passed 448 follow_link(). The cookie stored by follow_link() is passed
444 to this method as the last parameter. It is used by 449 to this method as the last parameter; only called when
445 filesystems such as NFS where page cache is not stable 450 cookie isn't NULL.
446 (i.e. page that was installed when the symbolic link walk
447 started might not be in the page cache at the end of the
448 walk).
449 451
450 permission: called by the VFS to check for access rights on a POSIX-like 452 permission: called by the VFS to check for access rights on a POSIX-like
451 filesystem. 453 filesystem.
diff --git a/Documentation/i2c/slave-interface b/Documentation/i2c/slave-interface
index 389bb5d61854..b228ca54bcf4 100644
--- a/Documentation/i2c/slave-interface
+++ b/Documentation/i2c/slave-interface
@@ -31,10 +31,10 @@ User manual
31=========== 31===========
32 32
33I2C slave backends behave like standard I2C clients. So, you can instantiate 33I2C slave backends behave like standard I2C clients. So, you can instantiate
34them like described in the document 'instantiating-devices'. A quick example 34them as described in the document 'instantiating-devices'. A quick example for
35for instantiating the slave-eeprom driver from userspace: 35instantiating the slave-eeprom driver from userspace at address 0x64 on bus 1:
36 36
37 # echo 0-0064 > /sys/bus/i2c/drivers/i2c-slave-eeprom/bind 37 # echo slave-24c02 0x64 > /sys/bus/i2c/devices/i2c-1/new_device
38 38
39Each backend should come with separate documentation to describe its specific 39Each backend should come with separate documentation to describe its specific
40behaviour and setup. 40behaviour and setup.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 61ab1628a057..60c9d6d0fd96 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1481,6 +1481,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1481 By default, super page will be supported if Intel IOMMU 1481 By default, super page will be supported if Intel IOMMU
1482 has the capability. With this option, super page will 1482 has the capability. With this option, super page will
1483 not be supported. 1483 not be supported.
1484 ecs_off [Default Off]
1485 By default, extended context tables will be supported if
1486 the hardware advertises that it has support both for the
1487 extended tables themselves, and also PASID support. With
1488 this option set, extended tables will not be used even
1489 on hardware which claims to support them.
1484 1490
1485 intel_idle.max_cstate= [KNL,HW,ACPI,X86] 1491 intel_idle.max_cstate= [KNL,HW,ACPI,X86]
1486 0 disables intel_idle and fall back on acpi_idle. 1492 0 disables intel_idle and fall back on acpi_idle.
@@ -2992,11 +2998,34 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2992 Set maximum number of finished RCU callbacks to 2998 Set maximum number of finished RCU callbacks to
2993 process in one batch. 2999 process in one batch.
2994 3000
3001 rcutree.dump_tree= [KNL]
3002 Dump the structure of the rcu_node combining tree
3003 out at early boot. This is used for diagnostic
3004 purposes, to verify correct tree setup.
3005
3006 rcutree.gp_cleanup_delay= [KNL]
3007 Set the number of jiffies to delay each step of
3008 RCU grace-period cleanup. This only has effect
3009 when CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP is set.
3010
2995 rcutree.gp_init_delay= [KNL] 3011 rcutree.gp_init_delay= [KNL]
2996 Set the number of jiffies to delay each step of 3012 Set the number of jiffies to delay each step of
2997 RCU grace-period initialization. This only has 3013 RCU grace-period initialization. This only has
2998 effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT is 3014 effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT
2999 set. 3015 is set.
3016
3017 rcutree.gp_preinit_delay= [KNL]
3018 Set the number of jiffies to delay each step of
3019 RCU grace-period pre-initialization, that is,
3020 the propagation of recent CPU-hotplug changes up
3021 the rcu_node combining tree. This only has effect
3022 when CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT is set.
3023
3024 rcutree.rcu_fanout_exact= [KNL]
3025 Disable autobalancing of the rcu_node combining
3026 tree. This is used by rcutorture, and might
3027 possibly be useful for architectures having high
3028 cache-to-cache transfer latencies.
3000 3029
3001 rcutree.rcu_fanout_leaf= [KNL] 3030 rcutree.rcu_fanout_leaf= [KNL]
3002 Increase the number of CPUs assigned to each 3031 Increase the number of CPUs assigned to each
@@ -3101,7 +3130,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
3101 test, hence the "fake". 3130 test, hence the "fake".
3102 3131
3103 rcutorture.nreaders= [KNL] 3132 rcutorture.nreaders= [KNL]
3104 Set number of RCU readers. 3133 Set number of RCU readers. The value -1 selects
3134 N-1, where N is the number of CPUs. A value
3135 "n" less than -1 selects N-n-2, where N is again
3136 the number of CPUs. For example, -2 selects N
3137 (the number of CPUs), -3 selects N+1, and so on.
3105 3138
3106 rcutorture.object_debug= [KNL] 3139 rcutorture.object_debug= [KNL]
3107 Enable debug-object double-call_rcu() testing. 3140 Enable debug-object double-call_rcu() testing.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f95746189b5d..13feb697271f 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -617,16 +617,16 @@ case what's actually required is:
617However, stores are not speculated. This means that ordering -is- provided 617However, stores are not speculated. This means that ordering -is- provided
618for load-store control dependencies, as in the following example: 618for load-store control dependencies, as in the following example:
619 619
620 q = ACCESS_ONCE(a); 620 q = READ_ONCE_CTRL(a);
621 if (q) { 621 if (q) {
622 ACCESS_ONCE(b) = p; 622 ACCESS_ONCE(b) = p;
623 } 623 }
624 624
625Control dependencies pair normally with other types of barriers. 625Control dependencies pair normally with other types of barriers. That
626That said, please note that ACCESS_ONCE() is not optional! Without the 626said, please note that READ_ONCE_CTRL() is not optional! Without the
627ACCESS_ONCE(), might combine the load from 'a' with other loads from 627READ_ONCE_CTRL(), the compiler might combine the load from 'a' with
628'a', and the store to 'b' with other stores to 'b', with possible highly 628other loads from 'a', and the store to 'b' with other stores to 'b',
629counterintuitive effects on ordering. 629with possible highly counterintuitive effects on ordering.
630 630
631Worse yet, if the compiler is able to prove (say) that the value of 631Worse yet, if the compiler is able to prove (say) that the value of
632variable 'a' is always non-zero, it would be well within its rights 632variable 'a' is always non-zero, it would be well within its rights
@@ -636,12 +636,15 @@ as follows:
636 q = a; 636 q = a;
637 b = p; /* BUG: Compiler and CPU can both reorder!!! */ 637 b = p; /* BUG: Compiler and CPU can both reorder!!! */
638 638
639So don't leave out the ACCESS_ONCE(). 639Finally, the READ_ONCE_CTRL() includes an smp_read_barrier_depends()
640that DEC Alpha needs in order to respect control depedencies.
641
642So don't leave out the READ_ONCE_CTRL().
640 643
641It is tempting to try to enforce ordering on identical stores on both 644It is tempting to try to enforce ordering on identical stores on both
642branches of the "if" statement as follows: 645branches of the "if" statement as follows:
643 646
644 q = ACCESS_ONCE(a); 647 q = READ_ONCE_CTRL(a);
645 if (q) { 648 if (q) {
646 barrier(); 649 barrier();
647 ACCESS_ONCE(b) = p; 650 ACCESS_ONCE(b) = p;
@@ -655,7 +658,7 @@ branches of the "if" statement as follows:
655Unfortunately, current compilers will transform this as follows at high 658Unfortunately, current compilers will transform this as follows at high
656optimization levels: 659optimization levels:
657 660
658 q = ACCESS_ONCE(a); 661 q = READ_ONCE_CTRL(a);
659 barrier(); 662 barrier();
660 ACCESS_ONCE(b) = p; /* BUG: No ordering vs. load from a!!! */ 663 ACCESS_ONCE(b) = p; /* BUG: No ordering vs. load from a!!! */
661 if (q) { 664 if (q) {
@@ -685,7 +688,7 @@ memory barriers, for example, smp_store_release():
685In contrast, without explicit memory barriers, two-legged-if control 688In contrast, without explicit memory barriers, two-legged-if control
686ordering is guaranteed only when the stores differ, for example: 689ordering is guaranteed only when the stores differ, for example:
687 690
688 q = ACCESS_ONCE(a); 691 q = READ_ONCE_CTRL(a);
689 if (q) { 692 if (q) {
690 ACCESS_ONCE(b) = p; 693 ACCESS_ONCE(b) = p;
691 do_something(); 694 do_something();
@@ -694,14 +697,14 @@ ordering is guaranteed only when the stores differ, for example:
694 do_something_else(); 697 do_something_else();
695 } 698 }
696 699
697The initial ACCESS_ONCE() is still required to prevent the compiler from 700The initial READ_ONCE_CTRL() is still required to prevent the compiler
698proving the value of 'a'. 701from proving the value of 'a'.
699 702
700In addition, you need to be careful what you do with the local variable 'q', 703In addition, you need to be careful what you do with the local variable 'q',
701otherwise the compiler might be able to guess the value and again remove 704otherwise the compiler might be able to guess the value and again remove
702the needed conditional. For example: 705the needed conditional. For example:
703 706
704 q = ACCESS_ONCE(a); 707 q = READ_ONCE_CTRL(a);
705 if (q % MAX) { 708 if (q % MAX) {
706 ACCESS_ONCE(b) = p; 709 ACCESS_ONCE(b) = p;
707 do_something(); 710 do_something();
@@ -714,7 +717,7 @@ If MAX is defined to be 1, then the compiler knows that (q % MAX) is
714equal to zero, in which case the compiler is within its rights to 717equal to zero, in which case the compiler is within its rights to
715transform the above code into the following: 718transform the above code into the following:
716 719
717 q = ACCESS_ONCE(a); 720 q = READ_ONCE_CTRL(a);
718 ACCESS_ONCE(b) = p; 721 ACCESS_ONCE(b) = p;
719 do_something_else(); 722 do_something_else();
720 723
@@ -725,7 +728,7 @@ is gone, and the barrier won't bring it back. Therefore, if you are
725relying on this ordering, you should make sure that MAX is greater than 728relying on this ordering, you should make sure that MAX is greater than
726one, perhaps as follows: 729one, perhaps as follows:
727 730
728 q = ACCESS_ONCE(a); 731 q = READ_ONCE_CTRL(a);
729 BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */ 732 BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
730 if (q % MAX) { 733 if (q % MAX) {
731 ACCESS_ONCE(b) = p; 734 ACCESS_ONCE(b) = p;
@@ -742,14 +745,15 @@ of the 'if' statement.
742You must also be careful not to rely too much on boolean short-circuit 745You must also be careful not to rely too much on boolean short-circuit
743evaluation. Consider this example: 746evaluation. Consider this example:
744 747
745 q = ACCESS_ONCE(a); 748 q = READ_ONCE_CTRL(a);
746 if (a || 1 > 0) 749 if (a || 1 > 0)
747 ACCESS_ONCE(b) = 1; 750 ACCESS_ONCE(b) = 1;
748 751
749Because the second condition is always true, the compiler can transform 752Because the first condition cannot fault and the second condition is
750this example as following, defeating control dependency: 753always true, the compiler can transform this example as following,
754defeating control dependency:
751 755
752 q = ACCESS_ONCE(a); 756 q = READ_ONCE_CTRL(a);
753 ACCESS_ONCE(b) = 1; 757 ACCESS_ONCE(b) = 1;
754 758
755This example underscores the need to ensure that the compiler cannot 759This example underscores the need to ensure that the compiler cannot
@@ -762,8 +766,8 @@ demonstrated by two related examples, with the initial values of
762x and y both being zero: 766x and y both being zero:
763 767
764 CPU 0 CPU 1 768 CPU 0 CPU 1
765 ===================== ===================== 769 ======================= =======================
766 r1 = ACCESS_ONCE(x); r2 = ACCESS_ONCE(y); 770 r1 = READ_ONCE_CTRL(x); r2 = READ_ONCE_CTRL(y);
767 if (r1 > 0) if (r2 > 0) 771 if (r1 > 0) if (r2 > 0)
768 ACCESS_ONCE(y) = 1; ACCESS_ONCE(x) = 1; 772 ACCESS_ONCE(y) = 1; ACCESS_ONCE(x) = 1;
769 773
@@ -783,7 +787,8 @@ But because control dependencies do -not- provide transitivity, the above
783assertion can fail after the combined three-CPU example completes. If you 787assertion can fail after the combined three-CPU example completes. If you
784need the three-CPU example to provide ordering, you will need smp_mb() 788need the three-CPU example to provide ordering, you will need smp_mb()
785between the loads and stores in the CPU 0 and CPU 1 code fragments, 789between the loads and stores in the CPU 0 and CPU 1 code fragments,
786that is, just before or just after the "if" statements. 790that is, just before or just after the "if" statements. Furthermore,
791the original two-CPU example is very fragile and should be avoided.
787 792
788These two examples are the LB and WWC litmus tests from this paper: 793These two examples are the LB and WWC litmus tests from this paper:
789http://www.cl.cam.ac.uk/users/pes20/ppc-supplemental/test6.pdf and this 794http://www.cl.cam.ac.uk/users/pes20/ppc-supplemental/test6.pdf and this
@@ -791,6 +796,12 @@ site: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html.
791 796
792In summary: 797In summary:
793 798
799 (*) Control dependencies must be headed by READ_ONCE_CTRL().
800 Or, as a much less preferable alternative, interpose
801 be headed by READ_ONCE() or an ACCESS_ONCE() read and must
802 have smp_read_barrier_depends() between this read and the
803 control-dependent write.
804
794 (*) Control dependencies can order prior loads against later stores. 805 (*) Control dependencies can order prior loads against later stores.
795 However, they do -not- guarantee any other sort of ordering: 806 However, they do -not- guarantee any other sort of ordering:
796 Not prior loads against later loads, nor prior stores against 807 Not prior loads against later loads, nor prior stores against
@@ -1662,7 +1673,7 @@ CPU from reordering them.
1662 1673
1663There are some more advanced barrier functions: 1674There are some more advanced barrier functions:
1664 1675
1665 (*) set_mb(var, value) 1676 (*) smp_store_mb(var, value)
1666 1677
1667 This assigns the value to the variable and then inserts a full memory 1678 This assigns the value to the variable and then inserts a full memory
1668 barrier after it, depending on the function. It isn't guaranteed to 1679 barrier after it, depending on the function. It isn't guaranteed to
@@ -1784,10 +1795,9 @@ for each construct. These operations all imply certain barriers:
1784 1795
1785 Memory operations issued before the ACQUIRE may be completed after 1796 Memory operations issued before the ACQUIRE may be completed after
1786 the ACQUIRE operation has completed. An smp_mb__before_spinlock(), 1797 the ACQUIRE operation has completed. An smp_mb__before_spinlock(),
1787 combined with a following ACQUIRE, orders prior loads against 1798 combined with a following ACQUIRE, orders prior stores against
1788 subsequent loads and stores and also orders prior stores against 1799 subsequent loads and stores. Note that this is weaker than smp_mb()!
1789 subsequent stores. Note that this is weaker than smp_mb()! The 1800 The smp_mb__before_spinlock() primitive is free on many architectures.
1790 smp_mb__before_spinlock() primitive is free on many architectures.
1791 1801
1792 (2) RELEASE operation implication: 1802 (2) RELEASE operation implication:
1793 1803
@@ -1975,7 +1985,7 @@ after it has altered the task state:
1975 CPU 1 1985 CPU 1
1976 =============================== 1986 ===============================
1977 set_current_state(); 1987 set_current_state();
1978 set_mb(); 1988 smp_store_mb();
1979 STORE current->state 1989 STORE current->state
1980 <general barrier> 1990 <general barrier>
1981 LOAD event_indicated 1991 LOAD event_indicated
@@ -2016,7 +2026,7 @@ between the STORE to indicate the event and the STORE to set TASK_RUNNING:
2016 CPU 1 CPU 2 2026 CPU 1 CPU 2
2017 =============================== =============================== 2027 =============================== ===============================
2018 set_current_state(); STORE event_indicated 2028 set_current_state(); STORE event_indicated
2019 set_mb(); wake_up(); 2029 smp_store_mb(); wake_up();
2020 STORE current->state <write barrier> 2030 STORE current->state <write barrier>
2021 <general barrier> STORE current->state 2031 <general barrier> STORE current->state
2022 LOAD event_indicated 2032 LOAD event_indicated
diff --git a/Documentation/networking/udplite.txt b/Documentation/networking/udplite.txt
index d727a3829100..53a726855e49 100644
--- a/Documentation/networking/udplite.txt
+++ b/Documentation/networking/udplite.txt
@@ -20,7 +20,7 @@
20 files/UDP-Lite-HOWTO.txt 20 files/UDP-Lite-HOWTO.txt
21 21
22 o The Wireshark UDP-Lite WiKi (with capture files): 22 o The Wireshark UDP-Lite WiKi (with capture files):
23 http://wiki.wireshark.org/Lightweight_User_Datagram_Protocol 23 https://wiki.wireshark.org/Lightweight_User_Datagram_Protocol
24 24
25 o The Protocol Spec, RFC 3828, http://www.ietf.org/rfc/rfc3828.txt 25 o The Protocol Spec, RFC 3828, http://www.ietf.org/rfc/rfc3828.txt
26 26
diff --git a/MAINTAINERS b/MAINTAINERS
index e30871880fdb..4303a642a2e5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -51,9 +51,9 @@ trivial patch so apply some common sense.
51 or does something very odd once a month document it. 51 or does something very odd once a month document it.
52 52
53 PLEASE remember that submissions must be made under the terms 53 PLEASE remember that submissions must be made under the terms
54 of the OSDL certificate of contribution and should include a 54 of the Linux Foundation certificate of contribution and should
55 Signed-off-by: line. The current version of this "Developer's 55 include a Signed-off-by: line. The current version of this
56 Certificate of Origin" (DCO) is listed in the file 56 "Developer's Certificate of Origin" (DCO) is listed in the file
57 Documentation/SubmittingPatches. 57 Documentation/SubmittingPatches.
58 58
596. Make sure you have the right to send any changes you make. If you 596. Make sure you have the right to send any changes you make. If you
@@ -7575,6 +7575,7 @@ F: drivers/pci/host/pci-exynos.c
7575 7575
7576PCI DRIVER FOR SYNOPSIS DESIGNWARE 7576PCI DRIVER FOR SYNOPSIS DESIGNWARE
7577M: Jingoo Han <jingoohan1@gmail.com> 7577M: Jingoo Han <jingoohan1@gmail.com>
7578M: Pratyush Anand <pratyush.anand@gmail.com>
7578L: linux-pci@vger.kernel.org 7579L: linux-pci@vger.kernel.org
7579S: Maintained 7580S: Maintained
7580F: drivers/pci/host/*designware* 7581F: drivers/pci/host/*designware*
@@ -7588,8 +7589,9 @@ F: Documentation/devicetree/bindings/pci/host-generic-pci.txt
7588F: drivers/pci/host/pci-host-generic.c 7589F: drivers/pci/host/pci-host-generic.c
7589 7590
7590PCIE DRIVER FOR ST SPEAR13XX 7591PCIE DRIVER FOR ST SPEAR13XX
7592M: Pratyush Anand <pratyush.anand@gmail.com>
7591L: linux-pci@vger.kernel.org 7593L: linux-pci@vger.kernel.org
7592S: Orphan 7594S: Maintained
7593F: drivers/pci/host/*spear* 7595F: drivers/pci/host/*spear*
7594 7596
7595PCMCIA SUBSYSTEM 7597PCMCIA SUBSYSTEM
@@ -7632,7 +7634,6 @@ F: kernel/delayacct.c
7632 7634
7633PERFORMANCE EVENTS SUBSYSTEM 7635PERFORMANCE EVENTS SUBSYSTEM
7634M: Peter Zijlstra <a.p.zijlstra@chello.nl> 7636M: Peter Zijlstra <a.p.zijlstra@chello.nl>
7635M: Paul Mackerras <paulus@samba.org>
7636M: Ingo Molnar <mingo@redhat.com> 7637M: Ingo Molnar <mingo@redhat.com>
7637M: Arnaldo Carvalho de Melo <acme@kernel.org> 7638M: Arnaldo Carvalho de Melo <acme@kernel.org>
7638L: linux-kernel@vger.kernel.org 7639L: linux-kernel@vger.kernel.org
diff --git a/Makefile b/Makefile
index aee7e5cb4c15..6c6f14628f32 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 4 1VERSION = 4
2PATCHLEVEL = 1 2PATCHLEVEL = 1
3SUBLEVEL = 0 3SUBLEVEL = 0
4EXTRAVERSION = -rc6 4EXTRAVERSION =
5NAME = Hurr durr I'ma sheep 5NAME = Hurr durr I'ma sheep
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
@@ -215,7 +215,6 @@ VPATH := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD))
215 215
216export srctree objtree VPATH 216export srctree objtree VPATH
217 217
218
219# SUBARCH tells the usermode build what the underlying arch is. That is set 218# SUBARCH tells the usermode build what the underlying arch is. That is set
220# first, and if a usermode build is happening, the "ARCH=um" on the command 219# first, and if a usermode build is happening, the "ARCH=um" on the command
221# line overrides the setting of ARCH below. If a native build is happening, 220# line overrides the setting of ARCH below. If a native build is happening,
@@ -1497,11 +1496,11 @@ image_name:
1497# Clear a bunch of variables before executing the submake 1496# Clear a bunch of variables before executing the submake
1498tools/: FORCE 1497tools/: FORCE
1499 $(Q)mkdir -p $(objtree)/tools 1498 $(Q)mkdir -p $(objtree)/tools
1500 $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/ 1499 $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/
1501 1500
1502tools/%: FORCE 1501tools/%: FORCE
1503 $(Q)mkdir -p $(objtree)/tools 1502 $(Q)mkdir -p $(objtree)/tools
1504 $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/ $* 1503 $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/ $*
1505 1504
1506# Single targets 1505# Single targets
1507# --------------------------------------------------------------------------- 1506# ---------------------------------------------------------------------------
diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h
index 429e8cd0d78e..e5117766529e 100644
--- a/arch/alpha/include/asm/cmpxchg.h
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -66,6 +66,4 @@
66#undef __ASM__MB 66#undef __ASM__MB
67#undef ____cmpxchg 67#undef ____cmpxchg
68 68
69#define __HAVE_ARCH_CMPXCHG 1
70
71#endif /* _ALPHA_CMPXCHG_H */ 69#endif /* _ALPHA_CMPXCHG_H */
diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi
index c3255e0c90aa..dbb3f4d2bf84 100644
--- a/arch/arm/boot/dts/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/am335x-bone-common.dtsi
@@ -223,6 +223,25 @@
223/include/ "tps65217.dtsi" 223/include/ "tps65217.dtsi"
224 224
225&tps { 225&tps {
226 /*
227 * Configure pmic to enter OFF-state instead of SLEEP-state ("RTC-only
228 * mode") at poweroff. Most BeagleBone versions do not support RTC-only
229 * mode and risk hardware damage if this mode is entered.
230 *
231 * For details, see linux-omap mailing list May 2015 thread
232 * [PATCH] ARM: dts: am335x-bone* enable pmic-shutdown-controller
233 * In particular, messages:
234 * http://www.spinics.net/lists/linux-omap/msg118585.html
235 * http://www.spinics.net/lists/linux-omap/msg118615.html
236 *
237 * You can override this later with
238 * &tps { /delete-property/ ti,pmic-shutdown-controller; }
239 * if you want to use RTC-only mode and made sure you are not affected
240 * by the hardware problems. (Tip: double-check by performing a current
241 * measurement after shutdown: it should be less than 1 mA.)
242 */
243 ti,pmic-shutdown-controller;
244
226 regulators { 245 regulators {
227 dcdc1_reg: regulator@0 { 246 dcdc1_reg: regulator@0 {
228 regulator-name = "vdds_dpr"; 247 regulator-name = "vdds_dpr";
diff --git a/arch/arm/boot/dts/am35xx-clocks.dtsi b/arch/arm/boot/dts/am35xx-clocks.dtsi
index 518b8fde88b0..18cc826e9db5 100644
--- a/arch/arm/boot/dts/am35xx-clocks.dtsi
+++ b/arch/arm/boot/dts/am35xx-clocks.dtsi
@@ -12,7 +12,7 @@
12 #clock-cells = <0>; 12 #clock-cells = <0>;
13 compatible = "ti,am35xx-gate-clock"; 13 compatible = "ti,am35xx-gate-clock";
14 clocks = <&ipss_ick>; 14 clocks = <&ipss_ick>;
15 reg = <0x059c>; 15 reg = <0x032c>;
16 ti,bit-shift = <1>; 16 ti,bit-shift = <1>;
17 }; 17 };
18 18
@@ -20,7 +20,7 @@
20 #clock-cells = <0>; 20 #clock-cells = <0>;
21 compatible = "ti,gate-clock"; 21 compatible = "ti,gate-clock";
22 clocks = <&rmii_ck>; 22 clocks = <&rmii_ck>;
23 reg = <0x059c>; 23 reg = <0x032c>;
24 ti,bit-shift = <9>; 24 ti,bit-shift = <9>;
25 }; 25 };
26 26
@@ -28,7 +28,7 @@
28 #clock-cells = <0>; 28 #clock-cells = <0>;
29 compatible = "ti,am35xx-gate-clock"; 29 compatible = "ti,am35xx-gate-clock";
30 clocks = <&ipss_ick>; 30 clocks = <&ipss_ick>;
31 reg = <0x059c>; 31 reg = <0x032c>;
32 ti,bit-shift = <2>; 32 ti,bit-shift = <2>;
33 }; 33 };
34 34
@@ -36,7 +36,7 @@
36 #clock-cells = <0>; 36 #clock-cells = <0>;
37 compatible = "ti,gate-clock"; 37 compatible = "ti,gate-clock";
38 clocks = <&pclk_ck>; 38 clocks = <&pclk_ck>;
39 reg = <0x059c>; 39 reg = <0x032c>;
40 ti,bit-shift = <10>; 40 ti,bit-shift = <10>;
41 }; 41 };
42 42
@@ -44,7 +44,7 @@
44 #clock-cells = <0>; 44 #clock-cells = <0>;
45 compatible = "ti,am35xx-gate-clock"; 45 compatible = "ti,am35xx-gate-clock";
46 clocks = <&ipss_ick>; 46 clocks = <&ipss_ick>;
47 reg = <0x059c>; 47 reg = <0x032c>;
48 ti,bit-shift = <0>; 48 ti,bit-shift = <0>;
49 }; 49 };
50 50
@@ -52,7 +52,7 @@
52 #clock-cells = <0>; 52 #clock-cells = <0>;
53 compatible = "ti,gate-clock"; 53 compatible = "ti,gate-clock";
54 clocks = <&sys_ck>; 54 clocks = <&sys_ck>;
55 reg = <0x059c>; 55 reg = <0x032c>;
56 ti,bit-shift = <8>; 56 ti,bit-shift = <8>;
57 }; 57 };
58 58
@@ -60,7 +60,7 @@
60 #clock-cells = <0>; 60 #clock-cells = <0>;
61 compatible = "ti,am35xx-gate-clock"; 61 compatible = "ti,am35xx-gate-clock";
62 clocks = <&sys_ck>; 62 clocks = <&sys_ck>;
63 reg = <0x059c>; 63 reg = <0x032c>;
64 ti,bit-shift = <3>; 64 ti,bit-shift = <3>;
65 }; 65 };
66}; 66};
diff --git a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
index a2cf2154dcdb..fdd187c55aa5 100644
--- a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
+++ b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
@@ -95,6 +95,11 @@
95 95
96 internal-regs { 96 internal-regs {
97 97
98 rtc@10300 {
99 /* No crystal connected to the internal RTC */
100 status = "disabled";
101 };
102
98 /* J10: VCC, NC, RX, NC, TX, GND */ 103 /* J10: VCC, NC, RX, NC, TX, GND */
99 serial@12000 { 104 serial@12000 {
100 status = "okay"; 105 status = "okay";
diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi
index de8427be830a..289806adb343 100644
--- a/arch/arm/boot/dts/dm816x.dtsi
+++ b/arch/arm/boot/dts/dm816x.dtsi
@@ -382,7 +382,7 @@
382 ti,hwmods = "usb_otg_hs"; 382 ti,hwmods = "usb_otg_hs";
383 383
384 usb0: usb@47401000 { 384 usb0: usb@47401000 {
385 compatible = "ti,musb-am33xx"; 385 compatible = "ti,musb-dm816";
386 reg = <0x47401400 0x400 386 reg = <0x47401400 0x400
387 0x47401000 0x200>; 387 0x47401000 0x200>;
388 reg-names = "mc", "control"; 388 reg-names = "mc", "control";
@@ -422,7 +422,7 @@
422 }; 422 };
423 423
424 usb1: usb@47401800 { 424 usb1: usb@47401800 {
425 compatible = "ti,musb-am33xx"; 425 compatible = "ti,musb-dm816";
426 reg = <0x47401c00 0x400 426 reg = <0x47401c00 0x400
427 0x47401800 0x200>; 427 0x47401800 0x200>;
428 reg-names = "mc", "control"; 428 reg-names = "mc", "control";
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index 5c16145920ea..5f5e0f3d5b64 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -832,8 +832,8 @@
832 touchscreen-fuzz-x = <4>; 832 touchscreen-fuzz-x = <4>;
833 touchscreen-fuzz-y = <7>; 833 touchscreen-fuzz-y = <7>;
834 touchscreen-fuzz-pressure = <2>; 834 touchscreen-fuzz-pressure = <2>;
835 touchscreen-max-x = <4096>; 835 touchscreen-size-x = <4096>;
836 touchscreen-max-y = <4096>; 836 touchscreen-size-y = <4096>;
837 touchscreen-max-pressure = <2048>; 837 touchscreen-max-pressure = <2048>;
838 838
839 ti,x-plate-ohms = <280>; 839 ti,x-plate-ohms = <280>;
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index d2f81e6b8c1c..6c2327e1c732 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -81,7 +81,7 @@ do { \
81#define read_barrier_depends() do { } while(0) 81#define read_barrier_depends() do { } while(0)
82#define smp_read_barrier_depends() do { } while(0) 82#define smp_read_barrier_depends() do { } while(0)
83 83
84#define set_mb(var, value) do { var = value; smp_mb(); } while (0) 84#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
85 85
86#define smp_mb__before_atomic() smp_mb() 86#define smp_mb__before_atomic() smp_mb()
87#define smp_mb__after_atomic() smp_mb() 87#define smp_mb__after_atomic() smp_mb()
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index c0b6dccbf7bd..7d23ce04cad5 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -87,8 +87,8 @@ static unsigned int exynos_pmu_spare3;
87static u32 exynos_irqwake_intmask = 0xffffffff; 87static u32 exynos_irqwake_intmask = 0xffffffff;
88 88
89static const struct exynos_wkup_irq exynos3250_wkup_irq[] = { 89static const struct exynos_wkup_irq exynos3250_wkup_irq[] = {
90 { 105, BIT(1) }, /* RTC alarm */ 90 { 73, BIT(1) }, /* RTC alarm */
91 { 106, BIT(2) }, /* RTC tick */ 91 { 74, BIT(2) }, /* RTC tick */
92 { /* sentinel */ }, 92 { /* sentinel */ },
93}; 93};
94 94
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
index d1dedc8195ed..eafd120b53f1 100644
--- a/arch/arm/mach-omap2/sleep34xx.S
+++ b/arch/arm/mach-omap2/sleep34xx.S
@@ -203,23 +203,8 @@ save_context_wfi:
203 */ 203 */
204 ldr r1, kernel_flush 204 ldr r1, kernel_flush
205 blx r1 205 blx r1
206 /*
207 * The kernel doesn't interwork: v7_flush_dcache_all in particluar will
208 * always return in Thumb state when CONFIG_THUMB2_KERNEL is enabled.
209 * This sequence switches back to ARM. Note that .align may insert a
210 * nop: bx pc needs to be word-aligned in order to work.
211 */
212 THUMB( .thumb )
213 THUMB( .align )
214 THUMB( bx pc )
215 THUMB( nop )
216 .arm
217
218 b omap3_do_wfi 206 b omap3_do_wfi
219 207ENDPROC(omap34xx_cpu_suspend)
220/*
221 * Local variables
222 */
223omap3_do_wfi_sram_addr: 208omap3_do_wfi_sram_addr:
224 .word omap3_do_wfi_sram 209 .word omap3_do_wfi_sram
225kernel_flush: 210kernel_flush:
@@ -364,10 +349,7 @@ exit_nonoff_modes:
364 * =================================== 349 * ===================================
365 */ 350 */
366 ldmfd sp!, {r4 - r11, pc} @ restore regs and return 351 ldmfd sp!, {r4 - r11, pc} @ restore regs and return
367 352ENDPROC(omap3_do_wfi)
368/*
369 * Local variables
370 */
371sdrc_power: 353sdrc_power:
372 .word SDRC_POWER_V 354 .word SDRC_POWER_V
373cm_idlest1_core: 355cm_idlest1_core:
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
index 43d54017b779..d0ab012fa379 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
@@ -16,7 +16,8 @@
16#include "mt8173.dtsi" 16#include "mt8173.dtsi"
17 17
18/ { 18/ {
19 model = "mediatek,mt8173-evb"; 19 model = "MediaTek MT8173 evaluation board";
20 compatible = "mediatek,mt8173-evb", "mediatek,mt8173";
20 21
21 aliases { 22 aliases {
22 serial0 = &uart0; 23 serial0 = &uart0;
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 71f19c4dc0de..0fa47c4275cb 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -114,7 +114,7 @@ do { \
114#define read_barrier_depends() do { } while(0) 114#define read_barrier_depends() do { } while(0)
115#define smp_read_barrier_depends() do { } while(0) 115#define smp_read_barrier_depends() do { } while(0)
116 116
117#define set_mb(var, value) do { var = value; smp_mb(); } while (0) 117#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
118#define nop() asm volatile("nop"); 118#define nop() asm volatile("nop");
119 119
120#define smp_mb__before_atomic() smp_mb() 120#define smp_mb__before_atomic() smp_mb()
diff --git a/arch/avr32/include/asm/cmpxchg.h b/arch/avr32/include/asm/cmpxchg.h
index 962a6aeab787..366bbeaeb405 100644
--- a/arch/avr32/include/asm/cmpxchg.h
+++ b/arch/avr32/include/asm/cmpxchg.h
@@ -70,8 +70,6 @@ extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
70 if something tries to do an invalid cmpxchg(). */ 70 if something tries to do an invalid cmpxchg(). */
71extern void __cmpxchg_called_with_bad_pointer(void); 71extern void __cmpxchg_called_with_bad_pointer(void);
72 72
73#define __HAVE_ARCH_CMPXCHG 1
74
75static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, 73static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
76 unsigned long new, int size) 74 unsigned long new, int size)
77{ 75{
diff --git a/arch/blackfin/include/asm/io.h b/arch/blackfin/include/asm/io.h
index 4e8ad0523118..6abebe82d4e9 100644
--- a/arch/blackfin/include/asm/io.h
+++ b/arch/blackfin/include/asm/io.h
@@ -10,6 +10,7 @@
10#include <linux/compiler.h> 10#include <linux/compiler.h>
11#include <linux/types.h> 11#include <linux/types.h>
12#include <asm/byteorder.h> 12#include <asm/byteorder.h>
13#include <asm/def_LPBlackfin.h>
13 14
14#define __raw_readb bfin_read8 15#define __raw_readb bfin_read8
15#define __raw_readw bfin_read16 16#define __raw_readw bfin_read16
diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h
index 9e7802911a57..a6e34e2acbba 100644
--- a/arch/hexagon/include/asm/cmpxchg.h
+++ b/arch/hexagon/include/asm/cmpxchg.h
@@ -64,7 +64,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
64 * looks just like atomic_cmpxchg on our arch currently with a bunch of 64 * looks just like atomic_cmpxchg on our arch currently with a bunch of
65 * variable casting. 65 * variable casting.
66 */ 66 */
67#define __HAVE_ARCH_CMPXCHG 1
68 67
69#define cmpxchg(ptr, old, new) \ 68#define cmpxchg(ptr, old, new) \
70({ \ 69({ \
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index f6769eb2bbf9..843ba435e43b 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -77,12 +77,7 @@ do { \
77 ___p1; \ 77 ___p1; \
78}) 78})
79 79
80/* 80#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
81 * XXX check on this ---I suspect what Linus really wants here is
82 * acquire vs release semantics but we can't discuss this stuff with
83 * Linus just yet. Grrr...
84 */
85#define set_mb(var, value) do { (var) = (value); mb(); } while (0)
86 81
87/* 82/*
88 * The group barrier in front of the rsm & ssm are necessary to ensure 83 * The group barrier in front of the rsm & ssm are necessary to ensure
diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h
index f35109b1d907..a0e3620f8f13 100644
--- a/arch/ia64/include/uapi/asm/cmpxchg.h
+++ b/arch/ia64/include/uapi/asm/cmpxchg.h
@@ -61,8 +61,6 @@ extern void ia64_xchg_called_with_bad_pointer(void);
61 * indicated by comparing RETURN with OLD. 61 * indicated by comparing RETURN with OLD.
62 */ 62 */
63 63
64#define __HAVE_ARCH_CMPXCHG 1
65
66/* 64/*
67 * This function doesn't exist, so you'll get a linker error 65 * This function doesn't exist, so you'll get a linker error
68 * if something tries to do an invalid cmpxchg(). 66 * if something tries to do an invalid cmpxchg().
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 15051e9c2c6f..b054c5c6e713 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -127,7 +127,7 @@ int smp_num_siblings = 1;
127volatile int ia64_cpu_to_sapicid[NR_CPUS]; 127volatile int ia64_cpu_to_sapicid[NR_CPUS];
128EXPORT_SYMBOL(ia64_cpu_to_sapicid); 128EXPORT_SYMBOL(ia64_cpu_to_sapicid);
129 129
130static volatile cpumask_t cpu_callin_map; 130static cpumask_t cpu_callin_map;
131 131
132struct smp_boot_data smp_boot_data __initdata; 132struct smp_boot_data smp_boot_data __initdata;
133 133
@@ -477,6 +477,7 @@ do_boot_cpu (int sapicid, int cpu, struct task_struct *idle)
477 for (timeout = 0; timeout < 100000; timeout++) { 477 for (timeout = 0; timeout < 100000; timeout++) {
478 if (cpumask_test_cpu(cpu, &cpu_callin_map)) 478 if (cpumask_test_cpu(cpu, &cpu_callin_map))
479 break; /* It has booted */ 479 break; /* It has booted */
480 barrier(); /* Make sure we re-read cpu_callin_map */
480 udelay(100); 481 udelay(100);
481 } 482 }
482 Dprintk("\n"); 483 Dprintk("\n");
diff --git a/arch/m32r/include/asm/cmpxchg.h b/arch/m32r/include/asm/cmpxchg.h
index de651db20b43..14bf9b739dd2 100644
--- a/arch/m32r/include/asm/cmpxchg.h
+++ b/arch/m32r/include/asm/cmpxchg.h
@@ -107,8 +107,6 @@ __xchg_local(unsigned long x, volatile void *ptr, int size)
107 ((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr), \ 107 ((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr), \
108 sizeof(*(ptr)))) 108 sizeof(*(ptr))))
109 109
110#define __HAVE_ARCH_CMPXCHG 1
111
112static inline unsigned long 110static inline unsigned long
113__cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new) 111__cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new)
114{ 112{
diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h
index bc755bc620ad..83b1df80f0ac 100644
--- a/arch/m68k/include/asm/cmpxchg.h
+++ b/arch/m68k/include/asm/cmpxchg.h
@@ -90,7 +90,6 @@ extern unsigned long __invalid_cmpxchg_size(volatile void *,
90 * indicated by comparing RETURN with OLD. 90 * indicated by comparing RETURN with OLD.
91 */ 91 */
92#ifdef CONFIG_RMW_INSNS 92#ifdef CONFIG_RMW_INSNS
93#define __HAVE_ARCH_CMPXCHG 1
94 93
95static inline unsigned long __cmpxchg(volatile void *p, unsigned long old, 94static inline unsigned long __cmpxchg(volatile void *p, unsigned long old,
96 unsigned long new, int size) 95 unsigned long new, int size)
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index d703d8e26a65..5a696e507930 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -84,7 +84,7 @@ static inline void fence(void)
84#define read_barrier_depends() do { } while (0) 84#define read_barrier_depends() do { } while (0)
85#define smp_read_barrier_depends() do { } while (0) 85#define smp_read_barrier_depends() do { } while (0)
86 86
87#define set_mb(var, value) do { var = value; smp_mb(); } while (0) 87#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
88 88
89#define smp_store_release(p, v) \ 89#define smp_store_release(p, v) \
90do { \ 90do { \
diff --git a/arch/metag/include/asm/cmpxchg.h b/arch/metag/include/asm/cmpxchg.h
index b1bc1be8540f..be29e3e44321 100644
--- a/arch/metag/include/asm/cmpxchg.h
+++ b/arch/metag/include/asm/cmpxchg.h
@@ -51,8 +51,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
51 return old; 51 return old;
52} 52}
53 53
54#define __HAVE_ARCH_CMPXCHG 1
55
56#define cmpxchg(ptr, o, n) \ 54#define cmpxchg(ptr, o, n) \
57 ({ \ 55 ({ \
58 __typeof__(*(ptr)) _o_ = (o); \ 56 __typeof__(*(ptr)) _o_ = (o); \
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index a73c93c3d44a..7fc8397d16f2 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -225,7 +225,7 @@ void __init plat_time_init(void)
225 ddr_clk_rate = ath79_get_sys_clk_rate("ddr"); 225 ddr_clk_rate = ath79_get_sys_clk_rate("ddr");
226 ref_clk_rate = ath79_get_sys_clk_rate("ref"); 226 ref_clk_rate = ath79_get_sys_clk_rate("ref");
227 227
228 pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, Ref:%lu.%03luMHz", 228 pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, Ref:%lu.%03luMHz\n",
229 cpu_clk_rate / 1000000, (cpu_clk_rate / 1000) % 1000, 229 cpu_clk_rate / 1000000, (cpu_clk_rate / 1000) % 1000,
230 ddr_clk_rate / 1000000, (ddr_clk_rate / 1000) % 1000, 230 ddr_clk_rate / 1000000, (ddr_clk_rate / 1000) % 1000,
231 ahb_clk_rate / 1000000, (ahb_clk_rate / 1000) % 1000, 231 ahb_clk_rate / 1000000, (ahb_clk_rate / 1000) % 1000,
diff --git a/arch/mips/cobalt/Makefile b/arch/mips/cobalt/Makefile
index 558e94977942..68f0c5871adc 100644
--- a/arch/mips/cobalt/Makefile
+++ b/arch/mips/cobalt/Makefile
@@ -2,7 +2,6 @@
2# Makefile for the Cobalt micro systems family specific parts of the kernel 2# Makefile for the Cobalt micro systems family specific parts of the kernel
3# 3#
4 4
5obj-y := buttons.o irq.o lcd.o led.o reset.o rtc.o serial.o setup.o time.o 5obj-y := buttons.o irq.o lcd.o led.o mtd.o reset.o rtc.o serial.o setup.o time.o
6 6
7obj-$(CONFIG_PCI) += pci.o 7obj-$(CONFIG_PCI) += pci.o
8obj-$(CONFIG_MTD_PHYSMAP) += mtd.o
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 2b8bbbcb9be0..7ecba84656d4 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -112,8 +112,8 @@
112#define __WEAK_LLSC_MB " \n" 112#define __WEAK_LLSC_MB " \n"
113#endif 113#endif
114 114
115#define set_mb(var, value) \ 115#define smp_store_mb(var, value) \
116 do { var = value; smp_mb(); } while (0) 116 do { WRITE_ONCE(var, value); smp_mb(); } while (0)
117 117
118#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") 118#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
119 119
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 412f945f1f5e..b71ab4a5fd50 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -138,8 +138,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
138 __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \ 138 __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \
139}) 139})
140 140
141#define __HAVE_ARCH_CMPXCHG 1
142
143#define __cmpxchg_asm(ld, st, m, old, new) \ 141#define __cmpxchg_asm(ld, st, m, old, new) \
144({ \ 142({ \
145 __typeof(*(m)) __ret; \ 143 __typeof(*(m)) __ret; \
diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
index 18ae5ddef118..c28a8499aec7 100644
--- a/arch/mips/include/asm/pgtable-bits.h
+++ b/arch/mips/include/asm/pgtable-bits.h
@@ -113,7 +113,7 @@
113#define _PAGE_PRESENT_SHIFT 0 113#define _PAGE_PRESENT_SHIFT 0
114#define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT) 114#define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT)
115/* R2 or later cores check for RI/XI support to determine _PAGE_READ */ 115/* R2 or later cores check for RI/XI support to determine _PAGE_READ */
116#ifdef CONFIG_CPU_MIPSR2 116#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
117#define _PAGE_WRITE_SHIFT (_PAGE_PRESENT_SHIFT + 1) 117#define _PAGE_WRITE_SHIFT (_PAGE_PRESENT_SHIFT + 1)
118#define _PAGE_WRITE (1 << _PAGE_WRITE_SHIFT) 118#define _PAGE_WRITE (1 << _PAGE_WRITE_SHIFT)
119#else 119#else
@@ -135,16 +135,16 @@
135#define _PAGE_SPLITTING (1 << _PAGE_SPLITTING_SHIFT) 135#define _PAGE_SPLITTING (1 << _PAGE_SPLITTING_SHIFT)
136 136
137/* Only R2 or newer cores have the XI bit */ 137/* Only R2 or newer cores have the XI bit */
138#ifdef CONFIG_CPU_MIPSR2 138#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
139#define _PAGE_NO_EXEC_SHIFT (_PAGE_SPLITTING_SHIFT + 1) 139#define _PAGE_NO_EXEC_SHIFT (_PAGE_SPLITTING_SHIFT + 1)
140#else 140#else
141#define _PAGE_GLOBAL_SHIFT (_PAGE_SPLITTING_SHIFT + 1) 141#define _PAGE_GLOBAL_SHIFT (_PAGE_SPLITTING_SHIFT + 1)
142#define _PAGE_GLOBAL (1 << _PAGE_GLOBAL_SHIFT) 142#define _PAGE_GLOBAL (1 << _PAGE_GLOBAL_SHIFT)
143#endif /* CONFIG_CPU_MIPSR2 */ 143#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
144 144
145#endif /* CONFIG_64BIT && CONFIG_MIPS_HUGE_TLB_SUPPORT */ 145#endif /* CONFIG_64BIT && CONFIG_MIPS_HUGE_TLB_SUPPORT */
146 146
147#ifdef CONFIG_CPU_MIPSR2 147#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
148/* XI - page cannot be executed */ 148/* XI - page cannot be executed */
149#ifndef _PAGE_NO_EXEC_SHIFT 149#ifndef _PAGE_NO_EXEC_SHIFT
150#define _PAGE_NO_EXEC_SHIFT (_PAGE_MODIFIED_SHIFT + 1) 150#define _PAGE_NO_EXEC_SHIFT (_PAGE_MODIFIED_SHIFT + 1)
@@ -160,10 +160,10 @@
160#define _PAGE_GLOBAL_SHIFT (_PAGE_NO_READ_SHIFT + 1) 160#define _PAGE_GLOBAL_SHIFT (_PAGE_NO_READ_SHIFT + 1)
161#define _PAGE_GLOBAL (1 << _PAGE_GLOBAL_SHIFT) 161#define _PAGE_GLOBAL (1 << _PAGE_GLOBAL_SHIFT)
162 162
163#else /* !CONFIG_CPU_MIPSR2 */ 163#else /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR6 */
164#define _PAGE_GLOBAL_SHIFT (_PAGE_MODIFIED_SHIFT + 1) 164#define _PAGE_GLOBAL_SHIFT (_PAGE_MODIFIED_SHIFT + 1)
165#define _PAGE_GLOBAL (1 << _PAGE_GLOBAL_SHIFT) 165#define _PAGE_GLOBAL (1 << _PAGE_GLOBAL_SHIFT)
166#endif /* CONFIG_CPU_MIPSR2 */ 166#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
167 167
168#define _PAGE_VALID_SHIFT (_PAGE_GLOBAL_SHIFT + 1) 168#define _PAGE_VALID_SHIFT (_PAGE_GLOBAL_SHIFT + 1)
169#define _PAGE_VALID (1 << _PAGE_VALID_SHIFT) 169#define _PAGE_VALID (1 << _PAGE_VALID_SHIFT)
@@ -205,7 +205,7 @@
205 */ 205 */
206static inline uint64_t pte_to_entrylo(unsigned long pte_val) 206static inline uint64_t pte_to_entrylo(unsigned long pte_val)
207{ 207{
208#ifdef CONFIG_CPU_MIPSR2 208#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
209 if (cpu_has_rixi) { 209 if (cpu_has_rixi) {
210 int sa; 210 int sa;
211#ifdef CONFIG_32BIT 211#ifdef CONFIG_32BIT
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index e92d6c4b5ed1..7163cd7fdd69 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -104,7 +104,6 @@ do { \
104 if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA)) \ 104 if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA)) \
105 __fpsave = FP_SAVE_VECTOR; \ 105 __fpsave = FP_SAVE_VECTOR; \
106 (last) = resume(prev, next, task_thread_info(next), __fpsave); \ 106 (last) = resume(prev, next, task_thread_info(next), __fpsave); \
107 disable_msa(); \
108} while (0) 107} while (0)
109 108
110#define finish_arch_switch(prev) \ 109#define finish_arch_switch(prev) \
@@ -122,6 +121,7 @@ do { \
122 if (cpu_has_userlocal) \ 121 if (cpu_has_userlocal) \
123 write_c0_userlocal(current_thread_info()->tp_value); \ 122 write_c0_userlocal(current_thread_info()->tp_value); \
124 __restore_watch(); \ 123 __restore_watch(); \
124 disable_msa(); \
125} while (0) 125} while (0)
126 126
127#endif /* _ASM_SWITCH_TO_H */ 127#endif /* _ASM_SWITCH_TO_H */
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index e36515dcd3b2..209e5b76c1bc 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -74,13 +74,12 @@ static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_mips *c)
74{ 74{
75 unsigned long sr, mask, fcsr, fcsr0, fcsr1; 75 unsigned long sr, mask, fcsr, fcsr0, fcsr1;
76 76
77 fcsr = c->fpu_csr31;
77 mask = FPU_CSR_ALL_X | FPU_CSR_ALL_E | FPU_CSR_ALL_S | FPU_CSR_RM; 78 mask = FPU_CSR_ALL_X | FPU_CSR_ALL_E | FPU_CSR_ALL_S | FPU_CSR_RM;
78 79
79 sr = read_c0_status(); 80 sr = read_c0_status();
80 __enable_fpu(FPU_AS_IS); 81 __enable_fpu(FPU_AS_IS);
81 82
82 fcsr = read_32bit_cp1_register(CP1_STATUS);
83
84 fcsr0 = fcsr & mask; 83 fcsr0 = fcsr & mask;
85 write_32bit_cp1_register(CP1_STATUS, fcsr0); 84 write_32bit_cp1_register(CP1_STATUS, fcsr0);
86 fcsr0 = read_32bit_cp1_register(CP1_STATUS); 85 fcsr0 = read_32bit_cp1_register(CP1_STATUS);
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index 51f57d841662..3c8a18a00a65 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -109,7 +109,7 @@ void __init init_IRQ(void)
109#endif 109#endif
110} 110}
111 111
112#ifdef DEBUG_STACKOVERFLOW 112#ifdef CONFIG_DEBUG_STACKOVERFLOW
113static inline void check_stack_overflow(void) 113static inline void check_stack_overflow(void)
114{ 114{
115 unsigned long sp; 115 unsigned long sp;
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 4b50c5787e25..d5fa3eaf39a1 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -2409,7 +2409,7 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
2409 if (vcpu->mmio_needed == 2) 2409 if (vcpu->mmio_needed == 2)
2410 *gpr = *(int16_t *) run->mmio.data; 2410 *gpr = *(int16_t *) run->mmio.data;
2411 else 2411 else
2412 *gpr = *(int16_t *) run->mmio.data; 2412 *gpr = *(uint16_t *)run->mmio.data;
2413 2413
2414 break; 2414 break;
2415 case 1: 2415 case 1:
diff --git a/arch/mips/loongson/common/Makefile b/arch/mips/loongson/common/Makefile
index e70c33fdb881..f2e8153e44f5 100644
--- a/arch/mips/loongson/common/Makefile
+++ b/arch/mips/loongson/common/Makefile
@@ -3,15 +3,13 @@
3# 3#
4 4
5obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \ 5obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \
6 bonito-irq.o mem.o machtype.o platform.o 6 bonito-irq.o mem.o machtype.o platform.o serial.o
7obj-$(CONFIG_PCI) += pci.o 7obj-$(CONFIG_PCI) += pci.o
8 8
9# 9#
10# Serial port support 10# Serial port support
11# 11#
12obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 12obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
13loongson-serial-$(CONFIG_SERIAL_8250) := serial.o
14obj-y += $(loongson-serial-m) $(loongson-serial-y)
15obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o 13obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o
16obj-$(CONFIG_LOONGSON_MC146818) += rtc.o 14obj-$(CONFIG_LOONGSON_MC146818) += rtc.o
17 15
diff --git a/arch/mips/loongson/loongson-3/smp.c b/arch/mips/loongson/loongson-3/smp.c
index e3c68b5da18d..509877c6e9d9 100644
--- a/arch/mips/loongson/loongson-3/smp.c
+++ b/arch/mips/loongson/loongson-3/smp.c
@@ -272,7 +272,7 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
272 if (action & SMP_ASK_C0COUNT) { 272 if (action & SMP_ASK_C0COUNT) {
273 BUG_ON(cpu != 0); 273 BUG_ON(cpu != 0);
274 c0count = read_c0_count(); 274 c0count = read_c0_count();
275 for (i = 1; i < loongson_sysconf.nr_cpus; i++) 275 for (i = 1; i < num_possible_cpus(); i++)
276 per_cpu(core0_c0count, i) = c0count; 276 per_cpu(core0_c0count, i) = c0count;
277 } 277 }
278} 278}
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 0dbb65a51ce5..2e03ab173591 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1372,7 +1372,7 @@ static int probe_scache(void)
1372 scache_size = addr; 1372 scache_size = addr;
1373 c->scache.linesz = 16 << ((config & R4K_CONF_SB) >> 22); 1373 c->scache.linesz = 16 << ((config & R4K_CONF_SB) >> 22);
1374 c->scache.ways = 1; 1374 c->scache.ways = 1;
1375 c->dcache.waybit = 0; /* does not matter */ 1375 c->scache.waybit = 0; /* does not matter */
1376 1376
1377 return 1; 1377 return 1;
1378} 1378}
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 5d6139390bf8..e23fdf2a9c80 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -681,11 +681,7 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx)
681 sp_off += config_enabled(CONFIG_64BIT) ? 681 sp_off += config_enabled(CONFIG_64BIT) ?
682 (ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE; 682 (ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE;
683 683
684 /* 684 return sp_off;
685 * Subtract the bytes for the last registers since we only care about
686 * the location on the stack pointer.
687 */
688 return sp_off - RSIZE;
689} 685}
690 686
691static void build_prologue(struct jit_ctx *ctx) 687static void build_prologue(struct jit_ctx *ctx)
diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c
index e20b02e3ae28..e10d10b9e82a 100644
--- a/arch/mips/ralink/ill_acc.c
+++ b/arch/mips/ralink/ill_acc.c
@@ -41,7 +41,7 @@ static irqreturn_t ill_acc_irq_handler(int irq, void *_priv)
41 addr, (type >> ILL_ACC_OFF_S) & ILL_ACC_OFF_M, 41 addr, (type >> ILL_ACC_OFF_S) & ILL_ACC_OFF_M,
42 type & ILL_ACC_LEN_M); 42 type & ILL_ACC_LEN_M);
43 43
44 rt_memc_w32(REG_ILL_ACC_TYPE, REG_ILL_ACC_TYPE); 44 rt_memc_w32(ILL_INT_STATUS, REG_ILL_ACC_TYPE);
45 45
46 return IRQ_HANDLED; 46 return IRQ_HANDLED;
47} 47}
diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h
index dbd13354ec41..0a90b965cccb 100644
--- a/arch/parisc/include/asm/cmpxchg.h
+++ b/arch/parisc/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size)
46#define xchg(ptr, x) \ 46#define xchg(ptr, x) \
47 ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) 47 ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
48 48
49#define __HAVE_ARCH_CMPXCHG 1
50
51/* bug catcher for when unsupported size is used - won't link */ 49/* bug catcher for when unsupported size is used - won't link */
52extern void __cmpxchg_called_with_bad_pointer(void); 50extern void __cmpxchg_called_with_bad_pointer(void);
53 51
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index a3bf5be111ff..51ccc7232042 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -34,7 +34,7 @@
34#define rmb() __asm__ __volatile__ ("sync" : : : "memory") 34#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
35#define wmb() __asm__ __volatile__ ("sync" : : : "memory") 35#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
36 36
37#define set_mb(var, value) do { var = value; mb(); } while (0) 37#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
38 38
39#ifdef __SUBARCH_HAS_LWSYNC 39#ifdef __SUBARCH_HAS_LWSYNC
40# define SMPWMB LWSYNC 40# define SMPWMB LWSYNC
@@ -89,5 +89,6 @@ do { \
89 89
90#define smp_mb__before_atomic() smp_mb() 90#define smp_mb__before_atomic() smp_mb()
91#define smp_mb__after_atomic() smp_mb() 91#define smp_mb__after_atomic() smp_mb()
92#define smp_mb__before_spinlock() smp_mb()
92 93
93#endif /* _ASM_POWERPC_BARRIER_H */ 94#endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index d463c68fe7f0..ad6263cffb0f 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -144,7 +144,6 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
144 * Compare and exchange - if *p == old, set it to new, 144 * Compare and exchange - if *p == old, set it to new,
145 * and return the old value of *p. 145 * and return the old value of *p.
146 */ 146 */
147#define __HAVE_ARCH_CMPXCHG 1
148 147
149static __always_inline unsigned long 148static __always_inline unsigned long
150__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) 149__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 8d724718ec21..e6f8615a11eb 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -36,7 +36,7 @@
36#define smp_mb__before_atomic() smp_mb() 36#define smp_mb__before_atomic() smp_mb()
37#define smp_mb__after_atomic() smp_mb() 37#define smp_mb__after_atomic() smp_mb()
38 38
39#define set_mb(var, value) do { var = value; mb(); } while (0) 39#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
40 40
41#define smp_store_release(p, v) \ 41#define smp_store_release(p, v) \
42do { \ 42do { \
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 4eadec466b8c..411464f4c97a 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -32,8 +32,6 @@
32 __old; \ 32 __old; \
33}) 33})
34 34
35#define __HAVE_ARCH_CMPXCHG
36
37#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \ 35#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \
38({ \ 36({ \
39 register __typeof__(*(p1)) __old1 asm("2") = (o1); \ 37 register __typeof__(*(p1)) __old1 asm("2") = (o1); \
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
index ba8593a515ba..de156ba3bd71 100644
--- a/arch/s390/net/bpf_jit.h
+++ b/arch/s390/net/bpf_jit.h
@@ -48,7 +48,9 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
48 * We get 160 bytes stack space from calling function, but only use 48 * We get 160 bytes stack space from calling function, but only use
49 * 11 * 8 byte (old backchain + r15 - r6) for storing registers. 49 * 11 * 8 byte (old backchain + r15 - r6) for storing registers.
50 */ 50 */
51#define STK_OFF (MAX_BPF_STACK + 8 + 4 + 4 + (160 - 11 * 8)) 51#define STK_SPACE (MAX_BPF_STACK + 8 + 4 + 4 + 160)
52#define STK_160_UNUSED (160 - 11 * 8)
53#define STK_OFF (STK_SPACE - STK_160_UNUSED)
52#define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */ 54#define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */
53#define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */ 55#define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */
54 56
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 20c146d1251a..55423d8be580 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -384,13 +384,16 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
384 } 384 }
385 /* Setup stack and backchain */ 385 /* Setup stack and backchain */
386 if (jit->seen & SEEN_STACK) { 386 if (jit->seen & SEEN_STACK) {
387 /* lgr %bfp,%r15 (BPF frame pointer) */ 387 if (jit->seen & SEEN_FUNC)
388 EMIT4(0xb9040000, BPF_REG_FP, REG_15); 388 /* lgr %w1,%r15 (backchain) */
389 EMIT4(0xb9040000, REG_W1, REG_15);
390 /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
391 EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
389 /* aghi %r15,-STK_OFF */ 392 /* aghi %r15,-STK_OFF */
390 EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF); 393 EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF);
391 if (jit->seen & SEEN_FUNC) 394 if (jit->seen & SEEN_FUNC)
392 /* stg %bfp,152(%r15) (backchain) */ 395 /* stg %w1,152(%r15) (backchain) */
393 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_FP, REG_0, 396 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
394 REG_15, 152); 397 REG_15, 152);
395 } 398 }
396 /* 399 /*
diff --git a/arch/score/include/asm/cmpxchg.h b/arch/score/include/asm/cmpxchg.h
index f384839c3ee5..cc3f6420b71c 100644
--- a/arch/score/include/asm/cmpxchg.h
+++ b/arch/score/include/asm/cmpxchg.h
@@ -42,8 +42,6 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
42 (unsigned long)(o), \ 42 (unsigned long)(o), \
43 (unsigned long)(n))) 43 (unsigned long)(n)))
44 44
45#define __HAVE_ARCH_CMPXCHG 1
46
47#include <asm-generic/cmpxchg-local.h> 45#include <asm-generic/cmpxchg-local.h>
48 46
49#endif /* _ASM_SCORE_CMPXCHG_H */ 47#endif /* _ASM_SCORE_CMPXCHG_H */
diff --git a/arch/score/lib/string.S b/arch/score/lib/string.S
index 00b7d3a2fc60..16efa3ad037f 100644
--- a/arch/score/lib/string.S
+++ b/arch/score/lib/string.S
@@ -175,10 +175,10 @@ ENTRY(__clear_user)
175 br r3 175 br r3
176 176
177 .section .fixup, "ax" 177 .section .fixup, "ax"
17899:
178 br r3 179 br r3
179 .previous 180 .previous
180 .section __ex_table, "a" 181 .section __ex_table, "a"
181 .align 2 182 .align 2
18299:
183 .word 0b, 99b 183 .word 0b, 99b
184 .previous 184 .previous
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index 43715308b068..bf91037db4e0 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -32,7 +32,7 @@
32#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop") 32#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
33#endif 33#endif
34 34
35#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 35#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
36 36
37#include <asm-generic/barrier.h> 37#include <asm-generic/barrier.h>
38 38
diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h
index f6bd1406b897..85c97b188d71 100644
--- a/arch/sh/include/asm/cmpxchg.h
+++ b/arch/sh/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ extern void __xchg_called_with_bad_pointer(void);
46 * if something tries to do an invalid cmpxchg(). */ 46 * if something tries to do an invalid cmpxchg(). */
47extern void __cmpxchg_called_with_bad_pointer(void); 47extern void __cmpxchg_called_with_bad_pointer(void);
48 48
49#define __HAVE_ARCH_CMPXCHG 1
50
51static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old, 49static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
52 unsigned long new, int size) 50 unsigned long new, int size)
53{ 51{
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 76648941fea7..809941e33e12 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -40,8 +40,8 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
40#define dma_rmb() rmb() 40#define dma_rmb() rmb()
41#define dma_wmb() wmb() 41#define dma_wmb() wmb()
42 42
43#define set_mb(__var, __value) \ 43#define smp_store_mb(__var, __value) \
44 do { __var = __value; membar_safe("#StoreLoad"); } while(0) 44 do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0)
45 45
46#ifdef CONFIG_SMP 46#ifdef CONFIG_SMP
47#define smp_mb() mb() 47#define smp_mb() mb()
diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h
index d38b52dca216..83ffb83c5397 100644
--- a/arch/sparc/include/asm/cmpxchg_32.h
+++ b/arch/sparc/include/asm/cmpxchg_32.h
@@ -34,7 +34,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int
34 * 34 *
35 * Cribbed from <asm-parisc/atomic.h> 35 * Cribbed from <asm-parisc/atomic.h>
36 */ 36 */
37#define __HAVE_ARCH_CMPXCHG 1
38 37
39/* bug catcher for when unsupported size is used - won't link */ 38/* bug catcher for when unsupported size is used - won't link */
40void __cmpxchg_called_with_bad_pointer(void); 39void __cmpxchg_called_with_bad_pointer(void);
diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h
index 0e1ed6cfbf68..faa2f61058c2 100644
--- a/arch/sparc/include/asm/cmpxchg_64.h
+++ b/arch/sparc/include/asm/cmpxchg_64.h
@@ -65,8 +65,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr,
65 65
66#include <asm-generic/cmpxchg-local.h> 66#include <asm-generic/cmpxchg-local.h>
67 67
68#define __HAVE_ARCH_CMPXCHG 1
69
70static inline unsigned long 68static inline unsigned long
71__cmpxchg_u32(volatile int *m, int old, int new) 69__cmpxchg_u32(volatile int *m, int old, int new)
72{ 70{
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index 7b11c5fadd42..0496970cef82 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
105 105
106#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) 106#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
107 107
108/* Define this to indicate that cmpxchg is an efficient operation. */
109#define __HAVE_ARCH_CMPXCHG
110
111#endif /* !__ASSEMBLY__ */ 108#endif /* !__ASSEMBLY__ */
112 109
113#endif /* _ASM_TILE_ATOMIC_64_H */ 110#endif /* _ASM_TILE_ATOMIC_64_H */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 226d5696e1d1..4e986e809861 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -127,7 +127,8 @@ config X86
127 select MODULES_USE_ELF_RELA if X86_64 127 select MODULES_USE_ELF_RELA if X86_64
128 select CLONE_BACKWARDS if X86_32 128 select CLONE_BACKWARDS if X86_32
129 select ARCH_USE_BUILTIN_BSWAP 129 select ARCH_USE_BUILTIN_BSWAP
130 select ARCH_USE_QUEUE_RWLOCK 130 select ARCH_USE_QUEUED_SPINLOCKS
131 select ARCH_USE_QUEUED_RWLOCKS
131 select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION 132 select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
132 select OLD_SIGACTION if X86_32 133 select OLD_SIGACTION if X86_32
133 select COMPAT_OLD_SIGACTION if IA32_EMULATION 134 select COMPAT_OLD_SIGACTION if IA32_EMULATION
@@ -666,7 +667,7 @@ config PARAVIRT_DEBUG
666config PARAVIRT_SPINLOCKS 667config PARAVIRT_SPINLOCKS
667 bool "Paravirtualization layer for spinlocks" 668 bool "Paravirtualization layer for spinlocks"
668 depends on PARAVIRT && SMP 669 depends on PARAVIRT && SMP
669 select UNINLINE_SPIN_UNLOCK 670 select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS
670 ---help--- 671 ---help---
671 Paravirtualized spinlocks allow a pvops backend to replace the 672 Paravirtualized spinlocks allow a pvops backend to replace the
672 spinlock implementation with something virtualization-friendly 673 spinlock implementation with something virtualization-friendly
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 89dd0d78013a..805d25ca5f1d 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -2,15 +2,14 @@
2#define BOOT_COMPRESSED_MISC_H 2#define BOOT_COMPRESSED_MISC_H
3 3
4/* 4/*
5 * we have to be careful, because no indirections are allowed here, and 5 * Special hack: we have to be careful, because no indirections are allowed here,
6 * paravirt_ops is a kind of one. As it will only run in baremetal anyway, 6 * and paravirt_ops is a kind of one. As it will only run in baremetal anyway,
7 * we just keep it from happening 7 * we just keep it from happening. (This list needs to be extended when new
8 * paravirt and debugging variants are added.)
8 */ 9 */
9#undef CONFIG_PARAVIRT 10#undef CONFIG_PARAVIRT
11#undef CONFIG_PARAVIRT_SPINLOCKS
10#undef CONFIG_KASAN 12#undef CONFIG_KASAN
11#ifdef CONFIG_X86_32
12#define _ASM_X86_DESC_H 1
13#endif
14 13
15#include <linux/linkage.h> 14#include <linux/linkage.h>
16#include <linux/screen_info.h> 15#include <linux/screen_info.h>
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 959e45b81fe2..e51a8f803f55 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -35,12 +35,12 @@
35#define smp_mb() mb() 35#define smp_mb() mb()
36#define smp_rmb() dma_rmb() 36#define smp_rmb() dma_rmb()
37#define smp_wmb() barrier() 37#define smp_wmb() barrier()
38#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) 38#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
39#else /* !SMP */ 39#else /* !SMP */
40#define smp_mb() barrier() 40#define smp_mb() barrier()
41#define smp_rmb() barrier() 41#define smp_rmb() barrier()
42#define smp_wmb() barrier() 42#define smp_wmb() barrier()
43#define set_mb(var, value) do { var = value; barrier(); } while (0) 43#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
44#endif /* SMP */ 44#endif /* SMP */
45 45
46#define read_barrier_depends() do { } while (0) 46#define read_barrier_depends() do { } while (0)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 99c105d78b7e..ad19841eddfe 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -4,8 +4,6 @@
4#include <linux/compiler.h> 4#include <linux/compiler.h>
5#include <asm/alternative.h> /* Provides LOCK_PREFIX */ 5#include <asm/alternative.h> /* Provides LOCK_PREFIX */
6 6
7#define __HAVE_ARCH_CMPXCHG 1
8
9/* 7/*
10 * Non-existant functions to indicate usage errors at link time 8 * Non-existant functions to indicate usage errors at link time
11 * (or compile-time if the compiler implements __compiletime_error(). 9 * (or compile-time if the compiler implements __compiletime_error().
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8957810ad7d1..d143bfad45d7 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
712 712
713#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) 713#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
714 714
715#ifdef CONFIG_QUEUED_SPINLOCKS
716
717static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
718 u32 val)
719{
720 PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val);
721}
722
723static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
724{
725 PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
726}
727
728static __always_inline void pv_wait(u8 *ptr, u8 val)
729{
730 PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
731}
732
733static __always_inline void pv_kick(int cpu)
734{
735 PVOP_VCALL1(pv_lock_ops.kick, cpu);
736}
737
738#else /* !CONFIG_QUEUED_SPINLOCKS */
739
715static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock, 740static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
716 __ticket_t ticket) 741 __ticket_t ticket)
717{ 742{
@@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
724 PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket); 749 PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
725} 750}
726 751
727#endif 752#endif /* CONFIG_QUEUED_SPINLOCKS */
753
754#endif /* SMP && PARAVIRT_SPINLOCKS */
728 755
729#ifdef CONFIG_X86_32 756#ifdef CONFIG_X86_32
730#define PV_SAVE_REGS "pushl %ecx; pushl %edx;" 757#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index f7b0b5c112f2..8766c7c395c2 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -333,9 +333,19 @@ struct arch_spinlock;
333typedef u16 __ticket_t; 333typedef u16 __ticket_t;
334#endif 334#endif
335 335
336struct qspinlock;
337
336struct pv_lock_ops { 338struct pv_lock_ops {
339#ifdef CONFIG_QUEUED_SPINLOCKS
340 void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
341 struct paravirt_callee_save queued_spin_unlock;
342
343 void (*wait)(u8 *ptr, u8 val);
344 void (*kick)(int cpu);
345#else /* !CONFIG_QUEUED_SPINLOCKS */
337 struct paravirt_callee_save lock_spinning; 346 struct paravirt_callee_save lock_spinning;
338 void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket); 347 void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
348#endif /* !CONFIG_QUEUED_SPINLOCKS */
339}; 349};
340 350
341/* This contains all the paravirt structures: we get a convenient 351/* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 19507ffa5d28..5fabf1362942 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -107,7 +107,7 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
107static inline int user_mode(struct pt_regs *regs) 107static inline int user_mode(struct pt_regs *regs)
108{ 108{
109#ifdef CONFIG_X86_32 109#ifdef CONFIG_X86_32
110 return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL; 110 return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL;
111#else 111#else
112 return !!(regs->cs & 3); 112 return !!(regs->cs & 3);
113#endif 113#endif
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
new file mode 100644
index 000000000000..9d51fae1cba3
--- /dev/null
+++ b/arch/x86/include/asm/qspinlock.h
@@ -0,0 +1,57 @@
1#ifndef _ASM_X86_QSPINLOCK_H
2#define _ASM_X86_QSPINLOCK_H
3
4#include <asm/cpufeature.h>
5#include <asm-generic/qspinlock_types.h>
6#include <asm/paravirt.h>
7
8#define queued_spin_unlock queued_spin_unlock
9/**
10 * queued_spin_unlock - release a queued spinlock
11 * @lock : Pointer to queued spinlock structure
12 *
13 * A smp_store_release() on the least-significant byte.
14 */
15static inline void native_queued_spin_unlock(struct qspinlock *lock)
16{
17 smp_store_release((u8 *)lock, 0);
18}
19
20#ifdef CONFIG_PARAVIRT_SPINLOCKS
21extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
22extern void __pv_init_lock_hash(void);
23extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
24extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
25
26static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
27{
28 pv_queued_spin_lock_slowpath(lock, val);
29}
30
31static inline void queued_spin_unlock(struct qspinlock *lock)
32{
33 pv_queued_spin_unlock(lock);
34}
35#else
36static inline void queued_spin_unlock(struct qspinlock *lock)
37{
38 native_queued_spin_unlock(lock);
39}
40#endif
41
42#define virt_queued_spin_lock virt_queued_spin_lock
43
44static inline bool virt_queued_spin_lock(struct qspinlock *lock)
45{
46 if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
47 return false;
48
49 while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
50 cpu_relax();
51
52 return true;
53}
54
55#include <asm-generic/qspinlock.h>
56
57#endif /* _ASM_X86_QSPINLOCK_H */
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
new file mode 100644
index 000000000000..b002e711ba88
--- /dev/null
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -0,0 +1,6 @@
1#ifndef __ASM_QSPINLOCK_PARAVIRT_H
2#define __ASM_QSPINLOCK_PARAVIRT_H
3
4PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
5
6#endif
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 5a9856eb12ba..7d5a1929d76b 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -231,11 +231,21 @@
231#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) 231#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8)
232 232
233#ifdef __KERNEL__ 233#ifdef __KERNEL__
234
235/*
236 * early_idt_handler_array is an array of entry points referenced in the
237 * early IDT. For simplicity, it's a real array with one entry point
238 * every nine bytes. That leaves room for an optional 'push $0' if the
239 * vector has no error code (two bytes), a 'push $vector_number' (two
240 * bytes), and a jump to the common entry code (up to five bytes).
241 */
242#define EARLY_IDT_HANDLER_SIZE 9
243
234#ifndef __ASSEMBLY__ 244#ifndef __ASSEMBLY__
235 245
236extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; 246extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
237#ifdef CONFIG_TRACING 247#ifdef CONFIG_TRACING
238# define trace_early_idt_handlers early_idt_handlers 248# define trace_early_idt_handler_array early_idt_handler_array
239#endif 249#endif
240 250
241/* 251/*
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 64b611782ef0..be0a05913b91 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -42,6 +42,10 @@
42extern struct static_key paravirt_ticketlocks_enabled; 42extern struct static_key paravirt_ticketlocks_enabled;
43static __always_inline bool static_key_false(struct static_key *key); 43static __always_inline bool static_key_false(struct static_key *key);
44 44
45#ifdef CONFIG_QUEUED_SPINLOCKS
46#include <asm/qspinlock.h>
47#else
48
45#ifdef CONFIG_PARAVIRT_SPINLOCKS 49#ifdef CONFIG_PARAVIRT_SPINLOCKS
46 50
47static inline void __ticket_enter_slowpath(arch_spinlock_t *lock) 51static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
@@ -196,6 +200,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
196 cpu_relax(); 200 cpu_relax();
197 } 201 }
198} 202}
203#endif /* CONFIG_QUEUED_SPINLOCKS */
199 204
200/* 205/*
201 * Read-write spinlocks, allowing multiple readers 206 * Read-write spinlocks, allowing multiple readers
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 5f9d7572d82b..65c3e37f879a 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -23,6 +23,9 @@ typedef u32 __ticketpair_t;
23 23
24#define TICKET_SHIFT (sizeof(__ticket_t) * 8) 24#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
25 25
26#ifdef CONFIG_QUEUED_SPINLOCKS
27#include <asm-generic/qspinlock_types.h>
28#else
26typedef struct arch_spinlock { 29typedef struct arch_spinlock {
27 union { 30 union {
28 __ticketpair_t head_tail; 31 __ticketpair_t head_tail;
@@ -33,6 +36,7 @@ typedef struct arch_spinlock {
33} arch_spinlock_t; 36} arch_spinlock_t;
34 37
35#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } 38#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
39#endif /* CONFIG_QUEUED_SPINLOCKS */
36 40
37#include <asm-generic/qrwlock_types.h> 41#include <asm-generic/qrwlock_types.h>
38 42
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 20190bdac9d5..95cf78d44ab4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -53,9 +53,12 @@
53static DEFINE_MUTEX(mce_chrdev_read_mutex); 53static DEFINE_MUTEX(mce_chrdev_read_mutex);
54 54
55#define rcu_dereference_check_mce(p) \ 55#define rcu_dereference_check_mce(p) \
56 rcu_dereference_index_check((p), \ 56({ \
57 rcu_read_lock_sched_held() || \ 57 rcu_lockdep_assert(rcu_read_lock_sched_held() || \
58 lockdep_is_held(&mce_chrdev_read_mutex)) 58 lockdep_is_held(&mce_chrdev_read_mutex), \
59 "suspicious rcu_dereference_check_mce() usage"); \
60 smp_load_acquire(&(p)); \
61})
59 62
60#define CREATE_TRACE_POINTS 63#define CREATE_TRACE_POINTS
61#include <trace/events/mce.h> 64#include <trace/events/mce.h>
@@ -1887,7 +1890,7 @@ out:
1887static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait) 1890static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
1888{ 1891{
1889 poll_wait(file, &mce_chrdev_wait, wait); 1892 poll_wait(file, &mce_chrdev_wait, wait);
1890 if (rcu_access_index(mcelog.next)) 1893 if (READ_ONCE(mcelog.next))
1891 return POLLIN | POLLRDNORM; 1894 return POLLIN | POLLRDNORM;
1892 if (!mce_apei_read_done && apei_check_mce()) 1895 if (!mce_apei_read_done && apei_check_mce())
1893 return POLLIN | POLLRDNORM; 1896 return POLLIN | POLLRDNORM;
@@ -1932,8 +1935,8 @@ void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
1932} 1935}
1933EXPORT_SYMBOL_GPL(register_mce_write_callback); 1936EXPORT_SYMBOL_GPL(register_mce_write_callback);
1934 1937
1935ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf, 1938static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
1936 size_t usize, loff_t *off) 1939 size_t usize, loff_t *off)
1937{ 1940{
1938 if (mce_write) 1941 if (mce_write)
1939 return mce_write(filp, ubuf, usize, off); 1942 return mce_write(filp, ubuf, usize, off);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 87848ebe2bb7..5801a14f7524 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -135,6 +135,7 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
135} 135}
136 136
137static atomic_t active_events; 137static atomic_t active_events;
138static atomic_t pmc_refcount;
138static DEFINE_MUTEX(pmc_reserve_mutex); 139static DEFINE_MUTEX(pmc_reserve_mutex);
139 140
140#ifdef CONFIG_X86_LOCAL_APIC 141#ifdef CONFIG_X86_LOCAL_APIC
@@ -190,6 +191,7 @@ static bool check_hw_exists(void)
190 u64 val, val_fail, val_new= ~0; 191 u64 val, val_fail, val_new= ~0;
191 int i, reg, reg_fail, ret = 0; 192 int i, reg, reg_fail, ret = 0;
192 int bios_fail = 0; 193 int bios_fail = 0;
194 int reg_safe = -1;
193 195
194 /* 196 /*
195 * Check to see if the BIOS enabled any of the counters, if so 197 * Check to see if the BIOS enabled any of the counters, if so
@@ -204,6 +206,8 @@ static bool check_hw_exists(void)
204 bios_fail = 1; 206 bios_fail = 1;
205 val_fail = val; 207 val_fail = val;
206 reg_fail = reg; 208 reg_fail = reg;
209 } else {
210 reg_safe = i;
207 } 211 }
208 } 212 }
209 213
@@ -222,11 +226,22 @@ static bool check_hw_exists(void)
222 } 226 }
223 227
224 /* 228 /*
229 * If all the counters are enabled, the below test will always
230 * fail. The tools will also become useless in this scenario.
231 * Just fail and disable the hardware counters.
232 */
233
234 if (reg_safe == -1) {
235 reg = reg_safe;
236 goto msr_fail;
237 }
238
239 /*
225 * Read the current value, change it and read it back to see if it 240 * Read the current value, change it and read it back to see if it
226 * matches, this is needed to detect certain hardware emulators 241 * matches, this is needed to detect certain hardware emulators
227 * (qemu/kvm) that don't trap on the MSR access and always return 0s. 242 * (qemu/kvm) that don't trap on the MSR access and always return 0s.
228 */ 243 */
229 reg = x86_pmu_event_addr(0); 244 reg = x86_pmu_event_addr(reg_safe);
230 if (rdmsrl_safe(reg, &val)) 245 if (rdmsrl_safe(reg, &val))
231 goto msr_fail; 246 goto msr_fail;
232 val ^= 0xffffUL; 247 val ^= 0xffffUL;
@@ -256,11 +271,8 @@ msr_fail:
256 271
257static void hw_perf_event_destroy(struct perf_event *event) 272static void hw_perf_event_destroy(struct perf_event *event)
258{ 273{
259 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { 274 x86_release_hardware();
260 release_pmc_hardware(); 275 atomic_dec(&active_events);
261 release_ds_buffers();
262 mutex_unlock(&pmc_reserve_mutex);
263 }
264} 276}
265 277
266void hw_perf_lbr_event_destroy(struct perf_event *event) 278void hw_perf_lbr_event_destroy(struct perf_event *event)
@@ -310,6 +322,35 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
310 return x86_pmu_extra_regs(val, event); 322 return x86_pmu_extra_regs(val, event);
311} 323}
312 324
325int x86_reserve_hardware(void)
326{
327 int err = 0;
328
329 if (!atomic_inc_not_zero(&pmc_refcount)) {
330 mutex_lock(&pmc_reserve_mutex);
331 if (atomic_read(&pmc_refcount) == 0) {
332 if (!reserve_pmc_hardware())
333 err = -EBUSY;
334 else
335 reserve_ds_buffers();
336 }
337 if (!err)
338 atomic_inc(&pmc_refcount);
339 mutex_unlock(&pmc_reserve_mutex);
340 }
341
342 return err;
343}
344
345void x86_release_hardware(void)
346{
347 if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
348 release_pmc_hardware();
349 release_ds_buffers();
350 mutex_unlock(&pmc_reserve_mutex);
351 }
352}
353
313/* 354/*
314 * Check if we can create event of a certain type (that no conflicting events 355 * Check if we can create event of a certain type (that no conflicting events
315 * are present). 356 * are present).
@@ -322,21 +363,34 @@ int x86_add_exclusive(unsigned int what)
322 return 0; 363 return 0;
323 364
324 mutex_lock(&pmc_reserve_mutex); 365 mutex_lock(&pmc_reserve_mutex);
325 for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) 366 for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
326 if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i])) 367 if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
327 goto out; 368 goto out;
369 }
328 370
329 atomic_inc(&x86_pmu.lbr_exclusive[what]); 371 atomic_inc(&x86_pmu.lbr_exclusive[what]);
330 ret = 0; 372 ret = 0;
331 373
332out: 374out:
333 mutex_unlock(&pmc_reserve_mutex); 375 mutex_unlock(&pmc_reserve_mutex);
376
377 /*
378 * Assuming that all exclusive events will share the PMI handler
379 * (which checks active_events for whether there is work to do),
380 * we can bump active_events counter right here, except for
381 * x86_lbr_exclusive_lbr events that go through x86_pmu_event_init()
382 * path, which already bumps active_events for them.
383 */
384 if (!ret && what != x86_lbr_exclusive_lbr)
385 atomic_inc(&active_events);
386
334 return ret; 387 return ret;
335} 388}
336 389
337void x86_del_exclusive(unsigned int what) 390void x86_del_exclusive(unsigned int what)
338{ 391{
339 atomic_dec(&x86_pmu.lbr_exclusive[what]); 392 atomic_dec(&x86_pmu.lbr_exclusive[what]);
393 atomic_dec(&active_events);
340} 394}
341 395
342int x86_setup_perfctr(struct perf_event *event) 396int x86_setup_perfctr(struct perf_event *event)
@@ -513,22 +567,11 @@ static int __x86_pmu_event_init(struct perf_event *event)
513 if (!x86_pmu_initialized()) 567 if (!x86_pmu_initialized())
514 return -ENODEV; 568 return -ENODEV;
515 569
516 err = 0; 570 err = x86_reserve_hardware();
517 if (!atomic_inc_not_zero(&active_events)) {
518 mutex_lock(&pmc_reserve_mutex);
519 if (atomic_read(&active_events) == 0) {
520 if (!reserve_pmc_hardware())
521 err = -EBUSY;
522 else
523 reserve_ds_buffers();
524 }
525 if (!err)
526 atomic_inc(&active_events);
527 mutex_unlock(&pmc_reserve_mutex);
528 }
529 if (err) 571 if (err)
530 return err; 572 return err;
531 573
574 atomic_inc(&active_events);
532 event->destroy = hw_perf_event_destroy; 575 event->destroy = hw_perf_event_destroy;
533 576
534 event->hw.idx = -1; 577 event->hw.idx = -1;
@@ -611,6 +654,7 @@ struct sched_state {
611 int event; /* event index */ 654 int event; /* event index */
612 int counter; /* counter index */ 655 int counter; /* counter index */
613 int unassigned; /* number of events to be assigned left */ 656 int unassigned; /* number of events to be assigned left */
657 int nr_gp; /* number of GP counters used */
614 unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 658 unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
615}; 659};
616 660
@@ -620,27 +664,29 @@ struct sched_state {
620struct perf_sched { 664struct perf_sched {
621 int max_weight; 665 int max_weight;
622 int max_events; 666 int max_events;
623 struct perf_event **events; 667 int max_gp;
624 struct sched_state state;
625 int saved_states; 668 int saved_states;
669 struct event_constraint **constraints;
670 struct sched_state state;
626 struct sched_state saved[SCHED_STATES_MAX]; 671 struct sched_state saved[SCHED_STATES_MAX];
627}; 672};
628 673
629/* 674/*
630 * Initialize interator that runs through all events and counters. 675 * Initialize interator that runs through all events and counters.
631 */ 676 */
632static void perf_sched_init(struct perf_sched *sched, struct perf_event **events, 677static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
633 int num, int wmin, int wmax) 678 int num, int wmin, int wmax, int gpmax)
634{ 679{
635 int idx; 680 int idx;
636 681
637 memset(sched, 0, sizeof(*sched)); 682 memset(sched, 0, sizeof(*sched));
638 sched->max_events = num; 683 sched->max_events = num;
639 sched->max_weight = wmax; 684 sched->max_weight = wmax;
640 sched->events = events; 685 sched->max_gp = gpmax;
686 sched->constraints = constraints;
641 687
642 for (idx = 0; idx < num; idx++) { 688 for (idx = 0; idx < num; idx++) {
643 if (events[idx]->hw.constraint->weight == wmin) 689 if (constraints[idx]->weight == wmin)
644 break; 690 break;
645 } 691 }
646 692
@@ -687,7 +733,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
687 if (sched->state.event >= sched->max_events) 733 if (sched->state.event >= sched->max_events)
688 return false; 734 return false;
689 735
690 c = sched->events[sched->state.event]->hw.constraint; 736 c = sched->constraints[sched->state.event];
691 /* Prefer fixed purpose counters */ 737 /* Prefer fixed purpose counters */
692 if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { 738 if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
693 idx = INTEL_PMC_IDX_FIXED; 739 idx = INTEL_PMC_IDX_FIXED;
@@ -696,11 +742,16 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
696 goto done; 742 goto done;
697 } 743 }
698 } 744 }
745
699 /* Grab the first unused counter starting with idx */ 746 /* Grab the first unused counter starting with idx */
700 idx = sched->state.counter; 747 idx = sched->state.counter;
701 for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { 748 for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
702 if (!__test_and_set_bit(idx, sched->state.used)) 749 if (!__test_and_set_bit(idx, sched->state.used)) {
750 if (sched->state.nr_gp++ >= sched->max_gp)
751 return false;
752
703 goto done; 753 goto done;
754 }
704 } 755 }
705 756
706 return false; 757 return false;
@@ -745,7 +796,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
745 if (sched->state.weight > sched->max_weight) 796 if (sched->state.weight > sched->max_weight)
746 return false; 797 return false;
747 } 798 }
748 c = sched->events[sched->state.event]->hw.constraint; 799 c = sched->constraints[sched->state.event];
749 } while (c->weight != sched->state.weight); 800 } while (c->weight != sched->state.weight);
750 801
751 sched->state.counter = 0; /* start with first counter */ 802 sched->state.counter = 0; /* start with first counter */
@@ -756,12 +807,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
756/* 807/*
757 * Assign a counter for each event. 808 * Assign a counter for each event.
758 */ 809 */
759int perf_assign_events(struct perf_event **events, int n, 810int perf_assign_events(struct event_constraint **constraints, int n,
760 int wmin, int wmax, int *assign) 811 int wmin, int wmax, int gpmax, int *assign)
761{ 812{
762 struct perf_sched sched; 813 struct perf_sched sched;
763 814
764 perf_sched_init(&sched, events, n, wmin, wmax); 815 perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
765 816
766 do { 817 do {
767 if (!perf_sched_find_counter(&sched)) 818 if (!perf_sched_find_counter(&sched))
@@ -788,9 +839,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
788 x86_pmu.start_scheduling(cpuc); 839 x86_pmu.start_scheduling(cpuc);
789 840
790 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { 841 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
791 hwc = &cpuc->event_list[i]->hw; 842 cpuc->event_constraint[i] = NULL;
792 c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); 843 c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
793 hwc->constraint = c; 844 cpuc->event_constraint[i] = c;
794 845
795 wmin = min(wmin, c->weight); 846 wmin = min(wmin, c->weight);
796 wmax = max(wmax, c->weight); 847 wmax = max(wmax, c->weight);
@@ -801,7 +852,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
801 */ 852 */
802 for (i = 0; i < n; i++) { 853 for (i = 0; i < n; i++) {
803 hwc = &cpuc->event_list[i]->hw; 854 hwc = &cpuc->event_list[i]->hw;
804 c = hwc->constraint; 855 c = cpuc->event_constraint[i];
805 856
806 /* never assigned */ 857 /* never assigned */
807 if (hwc->idx == -1) 858 if (hwc->idx == -1)
@@ -821,9 +872,26 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
821 } 872 }
822 873
823 /* slow path */ 874 /* slow path */
824 if (i != n) 875 if (i != n) {
825 unsched = perf_assign_events(cpuc->event_list, n, wmin, 876 int gpmax = x86_pmu.num_counters;
826 wmax, assign); 877
878 /*
879 * Do not allow scheduling of more than half the available
880 * generic counters.
881 *
882 * This helps avoid counter starvation of sibling thread by
883 * ensuring at most half the counters cannot be in exclusive
884 * mode. There is no designated counters for the limits. Any
885 * N/2 counters can be used. This helps with events with
886 * specific counter constraints.
887 */
888 if (is_ht_workaround_enabled() && !cpuc->is_fake &&
889 READ_ONCE(cpuc->excl_cntrs->exclusive_present))
890 gpmax /= 2;
891
892 unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
893 wmax, gpmax, assign);
894 }
827 895
828 /* 896 /*
829 * In case of success (unsched = 0), mark events as committed, 897 * In case of success (unsched = 0), mark events as committed,
@@ -840,12 +908,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
840 e = cpuc->event_list[i]; 908 e = cpuc->event_list[i];
841 e->hw.flags |= PERF_X86_EVENT_COMMITTED; 909 e->hw.flags |= PERF_X86_EVENT_COMMITTED;
842 if (x86_pmu.commit_scheduling) 910 if (x86_pmu.commit_scheduling)
843 x86_pmu.commit_scheduling(cpuc, e, assign[i]); 911 x86_pmu.commit_scheduling(cpuc, i, assign[i]);
844 } 912 }
845 } 913 } else {
846
847 if (!assign || unsched) {
848
849 for (i = 0; i < n; i++) { 914 for (i = 0; i < n; i++) {
850 e = cpuc->event_list[i]; 915 e = cpuc->event_list[i];
851 /* 916 /*
@@ -1058,13 +1123,16 @@ int x86_perf_event_set_period(struct perf_event *event)
1058 1123
1059 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; 1124 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
1060 1125
1061 /* 1126 if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
1062 * The hw event starts counting from this event offset, 1127 local64_read(&hwc->prev_count) != (u64)-left) {
1063 * mark it to be able to extra future deltas: 1128 /*
1064 */ 1129 * The hw event starts counting from this event offset,
1065 local64_set(&hwc->prev_count, (u64)-left); 1130 * mark it to be able to extra future deltas:
1131 */
1132 local64_set(&hwc->prev_count, (u64)-left);
1066 1133
1067 wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); 1134 wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
1135 }
1068 1136
1069 /* 1137 /*
1070 * Due to erratum on certan cpu we need 1138 * Due to erratum on certan cpu we need
@@ -1292,8 +1360,10 @@ static void x86_pmu_del(struct perf_event *event, int flags)
1292 x86_pmu.put_event_constraints(cpuc, event); 1360 x86_pmu.put_event_constraints(cpuc, event);
1293 1361
1294 /* Delete the array entry. */ 1362 /* Delete the array entry. */
1295 while (++i < cpuc->n_events) 1363 while (++i < cpuc->n_events) {
1296 cpuc->event_list[i-1] = cpuc->event_list[i]; 1364 cpuc->event_list[i-1] = cpuc->event_list[i];
1365 cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
1366 }
1297 --cpuc->n_events; 1367 --cpuc->n_events;
1298 1368
1299 perf_event_update_userpage(event); 1369 perf_event_update_userpage(event);
@@ -1374,6 +1444,10 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1374 u64 finish_clock; 1444 u64 finish_clock;
1375 int ret; 1445 int ret;
1376 1446
1447 /*
1448 * All PMUs/events that share this PMI handler should make sure to
1449 * increment active_events for their events.
1450 */
1377 if (!atomic_read(&active_events)) 1451 if (!atomic_read(&active_events))
1378 return NMI_DONE; 1452 return NMI_DONE;
1379 1453
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6ac5cb7a9e14..3e7fd27dfe20 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -74,6 +74,9 @@ struct event_constraint {
74#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */ 74#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
75#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */ 75#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
76#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ 76#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
77#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
78#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
79#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */
77 80
78 81
79struct amd_nb { 82struct amd_nb {
@@ -87,6 +90,18 @@ struct amd_nb {
87#define MAX_PEBS_EVENTS 8 90#define MAX_PEBS_EVENTS 8
88 91
89/* 92/*
93 * Flags PEBS can handle without an PMI.
94 *
95 * TID can only be handled by flushing at context switch.
96 *
97 */
98#define PEBS_FREERUNNING_FLAGS \
99 (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
100 PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
101 PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
102 PERF_SAMPLE_TRANSACTION)
103
104/*
90 * A debug store configuration. 105 * A debug store configuration.
91 * 106 *
92 * We only support architectures that use 64bit fields. 107 * We only support architectures that use 64bit fields.
@@ -132,10 +147,7 @@ enum intel_excl_state_type {
132}; 147};
133 148
134struct intel_excl_states { 149struct intel_excl_states {
135 enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
136 enum intel_excl_state_type state[X86_PMC_IDX_MAX]; 150 enum intel_excl_state_type state[X86_PMC_IDX_MAX];
137 int num_alloc_cntrs;/* #counters allocated */
138 int max_alloc_cntrs;/* max #counters allowed */
139 bool sched_started; /* true if scheduling has started */ 151 bool sched_started; /* true if scheduling has started */
140}; 152};
141 153
@@ -144,6 +156,11 @@ struct intel_excl_cntrs {
144 156
145 struct intel_excl_states states[2]; 157 struct intel_excl_states states[2];
146 158
159 union {
160 u16 has_exclusive[2];
161 u32 exclusive_present;
162 };
163
147 int refcnt; /* per-core: #HT threads */ 164 int refcnt; /* per-core: #HT threads */
148 unsigned core_id; /* per-core: core id */ 165 unsigned core_id; /* per-core: core id */
149}; 166};
@@ -172,7 +189,11 @@ struct cpu_hw_events {
172 added in the current transaction */ 189 added in the current transaction */
173 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ 190 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
174 u64 tags[X86_PMC_IDX_MAX]; 191 u64 tags[X86_PMC_IDX_MAX];
192
175 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 193 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
194 struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
195
196 int n_excl; /* the number of exclusive events */
176 197
177 unsigned int group_flag; 198 unsigned int group_flag;
178 int is_fake; 199 int is_fake;
@@ -519,12 +540,10 @@ struct x86_pmu {
519 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 540 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
520 struct perf_event *event); 541 struct perf_event *event);
521 542
522 void (*commit_scheduling)(struct cpu_hw_events *cpuc,
523 struct perf_event *event,
524 int cntr);
525
526 void (*start_scheduling)(struct cpu_hw_events *cpuc); 543 void (*start_scheduling)(struct cpu_hw_events *cpuc);
527 544
545 void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
546
528 void (*stop_scheduling)(struct cpu_hw_events *cpuc); 547 void (*stop_scheduling)(struct cpu_hw_events *cpuc);
529 548
530 struct event_constraint *event_constraints; 549 struct event_constraint *event_constraints;
@@ -697,6 +716,10 @@ int x86_add_exclusive(unsigned int what);
697 716
698void x86_del_exclusive(unsigned int what); 717void x86_del_exclusive(unsigned int what);
699 718
719int x86_reserve_hardware(void);
720
721void x86_release_hardware(void);
722
700void hw_perf_lbr_event_destroy(struct perf_event *event); 723void hw_perf_lbr_event_destroy(struct perf_event *event);
701 724
702int x86_setup_perfctr(struct perf_event *event); 725int x86_setup_perfctr(struct perf_event *event);
@@ -717,8 +740,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
717 740
718void x86_pmu_enable_all(int added); 741void x86_pmu_enable_all(int added);
719 742
720int perf_assign_events(struct perf_event **events, int n, 743int perf_assign_events(struct event_constraint **constraints, int n,
721 int wmin, int wmax, int *assign); 744 int wmin, int wmax, int gpmax, int *assign);
722int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); 745int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
723 746
724void x86_pmu_stop(struct perf_event *event, int flags); 747void x86_pmu_stop(struct perf_event *event, int flags);
@@ -860,6 +883,8 @@ void intel_pmu_pebs_enable_all(void);
860 883
861void intel_pmu_pebs_disable_all(void); 884void intel_pmu_pebs_disable_all(void);
862 885
886void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
887
863void intel_ds_init(void); 888void intel_ds_init(void);
864 889
865void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); 890void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
@@ -929,4 +954,8 @@ static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
929 return NULL; 954 return NULL;
930} 955}
931 956
957static inline int is_ht_workaround_enabled(void)
958{
959 return 0;
960}
932#endif /* CONFIG_CPU_SUP_INTEL */ 961#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 324817735771..b9826a981fb2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1903,9 +1903,8 @@ static void
1903intel_start_scheduling(struct cpu_hw_events *cpuc) 1903intel_start_scheduling(struct cpu_hw_events *cpuc)
1904{ 1904{
1905 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 1905 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
1906 struct intel_excl_states *xl, *xlo; 1906 struct intel_excl_states *xl;
1907 int tid = cpuc->excl_thread_id; 1907 int tid = cpuc->excl_thread_id;
1908 int o_tid = 1 - tid; /* sibling thread */
1909 1908
1910 /* 1909 /*
1911 * nothing needed if in group validation mode 1910 * nothing needed if in group validation mode
@@ -1916,35 +1915,52 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
1916 /* 1915 /*
1917 * no exclusion needed 1916 * no exclusion needed
1918 */ 1917 */
1919 if (!excl_cntrs) 1918 if (WARN_ON_ONCE(!excl_cntrs))
1920 return; 1919 return;
1921 1920
1922 xlo = &excl_cntrs->states[o_tid];
1923 xl = &excl_cntrs->states[tid]; 1921 xl = &excl_cntrs->states[tid];
1924 1922
1925 xl->sched_started = true; 1923 xl->sched_started = true;
1926 xl->num_alloc_cntrs = 0;
1927 /* 1924 /*
1928 * lock shared state until we are done scheduling 1925 * lock shared state until we are done scheduling
1929 * in stop_event_scheduling() 1926 * in stop_event_scheduling()
1930 * makes scheduling appear as a transaction 1927 * makes scheduling appear as a transaction
1931 */ 1928 */
1932 WARN_ON_ONCE(!irqs_disabled());
1933 raw_spin_lock(&excl_cntrs->lock); 1929 raw_spin_lock(&excl_cntrs->lock);
1930}
1934 1931
1935 /* 1932static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
1936 * save initial state of sibling thread 1933{
1937 */ 1934 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
1938 memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state)); 1935 struct event_constraint *c = cpuc->event_constraint[idx];
1936 struct intel_excl_states *xl;
1937 int tid = cpuc->excl_thread_id;
1938
1939 if (cpuc->is_fake || !is_ht_workaround_enabled())
1940 return;
1941
1942 if (WARN_ON_ONCE(!excl_cntrs))
1943 return;
1944
1945 if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
1946 return;
1947
1948 xl = &excl_cntrs->states[tid];
1949
1950 lockdep_assert_held(&excl_cntrs->lock);
1951
1952 if (c->flags & PERF_X86_EVENT_EXCL)
1953 xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
1954 else
1955 xl->state[cntr] = INTEL_EXCL_SHARED;
1939} 1956}
1940 1957
1941static void 1958static void
1942intel_stop_scheduling(struct cpu_hw_events *cpuc) 1959intel_stop_scheduling(struct cpu_hw_events *cpuc)
1943{ 1960{
1944 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 1961 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
1945 struct intel_excl_states *xl, *xlo; 1962 struct intel_excl_states *xl;
1946 int tid = cpuc->excl_thread_id; 1963 int tid = cpuc->excl_thread_id;
1947 int o_tid = 1 - tid; /* sibling thread */
1948 1964
1949 /* 1965 /*
1950 * nothing needed if in group validation mode 1966 * nothing needed if in group validation mode
@@ -1954,17 +1970,11 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
1954 /* 1970 /*
1955 * no exclusion needed 1971 * no exclusion needed
1956 */ 1972 */
1957 if (!excl_cntrs) 1973 if (WARN_ON_ONCE(!excl_cntrs))
1958 return; 1974 return;
1959 1975
1960 xlo = &excl_cntrs->states[o_tid];
1961 xl = &excl_cntrs->states[tid]; 1976 xl = &excl_cntrs->states[tid];
1962 1977
1963 /*
1964 * make new sibling thread state visible
1965 */
1966 memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));
1967
1968 xl->sched_started = false; 1978 xl->sched_started = false;
1969 /* 1979 /*
1970 * release shared state lock (acquired in intel_start_scheduling()) 1980 * release shared state lock (acquired in intel_start_scheduling())
@@ -1976,12 +1986,10 @@ static struct event_constraint *
1976intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, 1986intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
1977 int idx, struct event_constraint *c) 1987 int idx, struct event_constraint *c)
1978{ 1988{
1979 struct event_constraint *cx;
1980 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 1989 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
1981 struct intel_excl_states *xl, *xlo; 1990 struct intel_excl_states *xlo;
1982 int is_excl, i;
1983 int tid = cpuc->excl_thread_id; 1991 int tid = cpuc->excl_thread_id;
1984 int o_tid = 1 - tid; /* alternate */ 1992 int is_excl, i;
1985 1993
1986 /* 1994 /*
1987 * validating a group does not require 1995 * validating a group does not require
@@ -1993,34 +2001,8 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
1993 /* 2001 /*
1994 * no exclusion needed 2002 * no exclusion needed
1995 */ 2003 */
1996 if (!excl_cntrs) 2004 if (WARN_ON_ONCE(!excl_cntrs))
1997 return c; 2005 return c;
1998 /*
1999 * event requires exclusive counter access
2000 * across HT threads
2001 */
2002 is_excl = c->flags & PERF_X86_EVENT_EXCL;
2003
2004 /*
2005 * xl = state of current HT
2006 * xlo = state of sibling HT
2007 */
2008 xl = &excl_cntrs->states[tid];
2009 xlo = &excl_cntrs->states[o_tid];
2010
2011 /*
2012 * do not allow scheduling of more than max_alloc_cntrs
2013 * which is set to half the available generic counters.
2014 * this helps avoid counter starvation of sibling thread
2015 * by ensuring at most half the counters cannot be in
2016 * exclusive mode. There is not designated counters for the
2017 * limits. Any N/2 counters can be used. This helps with
2018 * events with specifix counter constraints
2019 */
2020 if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs)
2021 return &emptyconstraint;
2022
2023 cx = c;
2024 2006
2025 /* 2007 /*
2026 * because we modify the constraint, we need 2008 * because we modify the constraint, we need
@@ -2031,10 +2013,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
2031 * been cloned (marked dynamic) 2013 * been cloned (marked dynamic)
2032 */ 2014 */
2033 if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { 2015 if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
2034 2016 struct event_constraint *cx;
2035 /* sanity check */
2036 if (idx < 0)
2037 return &emptyconstraint;
2038 2017
2039 /* 2018 /*
2040 * grab pre-allocated constraint entry 2019 * grab pre-allocated constraint entry
@@ -2045,13 +2024,14 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
2045 * initialize dynamic constraint 2024 * initialize dynamic constraint
2046 * with static constraint 2025 * with static constraint
2047 */ 2026 */
2048 memcpy(cx, c, sizeof(*cx)); 2027 *cx = *c;
2049 2028
2050 /* 2029 /*
2051 * mark constraint as dynamic, so we 2030 * mark constraint as dynamic, so we
2052 * can free it later on 2031 * can free it later on
2053 */ 2032 */
2054 cx->flags |= PERF_X86_EVENT_DYNAMIC; 2033 cx->flags |= PERF_X86_EVENT_DYNAMIC;
2034 c = cx;
2055 } 2035 }
2056 2036
2057 /* 2037 /*
@@ -2062,6 +2042,22 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
2062 */ 2042 */
2063 2043
2064 /* 2044 /*
2045 * state of sibling HT
2046 */
2047 xlo = &excl_cntrs->states[tid ^ 1];
2048
2049 /*
2050 * event requires exclusive counter access
2051 * across HT threads
2052 */
2053 is_excl = c->flags & PERF_X86_EVENT_EXCL;
2054 if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
2055 event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
2056 if (!cpuc->n_excl++)
2057 WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
2058 }
2059
2060 /*
2065 * Modify static constraint with current dynamic 2061 * Modify static constraint with current dynamic
2066 * state of thread 2062 * state of thread
2067 * 2063 *
@@ -2069,44 +2065,44 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
2069 * SHARED : sibling counter measuring non-exclusive event 2065 * SHARED : sibling counter measuring non-exclusive event
2070 * UNUSED : sibling counter unused 2066 * UNUSED : sibling counter unused
2071 */ 2067 */
2072 for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) { 2068 for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
2073 /* 2069 /*
2074 * exclusive event in sibling counter 2070 * exclusive event in sibling counter
2075 * our corresponding counter cannot be used 2071 * our corresponding counter cannot be used
2076 * regardless of our event 2072 * regardless of our event
2077 */ 2073 */
2078 if (xl->state[i] == INTEL_EXCL_EXCLUSIVE) 2074 if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
2079 __clear_bit(i, cx->idxmsk); 2075 __clear_bit(i, c->idxmsk);
2080 /* 2076 /*
2081 * if measuring an exclusive event, sibling 2077 * if measuring an exclusive event, sibling
2082 * measuring non-exclusive, then counter cannot 2078 * measuring non-exclusive, then counter cannot
2083 * be used 2079 * be used
2084 */ 2080 */
2085 if (is_excl && xl->state[i] == INTEL_EXCL_SHARED) 2081 if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
2086 __clear_bit(i, cx->idxmsk); 2082 __clear_bit(i, c->idxmsk);
2087 } 2083 }
2088 2084
2089 /* 2085 /*
2090 * recompute actual bit weight for scheduling algorithm 2086 * recompute actual bit weight for scheduling algorithm
2091 */ 2087 */
2092 cx->weight = hweight64(cx->idxmsk64); 2088 c->weight = hweight64(c->idxmsk64);
2093 2089
2094 /* 2090 /*
2095 * if we return an empty mask, then switch 2091 * if we return an empty mask, then switch
2096 * back to static empty constraint to avoid 2092 * back to static empty constraint to avoid
2097 * the cost of freeing later on 2093 * the cost of freeing later on
2098 */ 2094 */
2099 if (cx->weight == 0) 2095 if (c->weight == 0)
2100 cx = &emptyconstraint; 2096 c = &emptyconstraint;
2101 2097
2102 return cx; 2098 return c;
2103} 2099}
2104 2100
2105static struct event_constraint * 2101static struct event_constraint *
2106intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 2102intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
2107 struct perf_event *event) 2103 struct perf_event *event)
2108{ 2104{
2109 struct event_constraint *c1 = event->hw.constraint; 2105 struct event_constraint *c1 = cpuc->event_constraint[idx];
2110 struct event_constraint *c2; 2106 struct event_constraint *c2;
2111 2107
2112 /* 2108 /*
@@ -2132,10 +2128,8 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
2132{ 2128{
2133 struct hw_perf_event *hwc = &event->hw; 2129 struct hw_perf_event *hwc = &event->hw;
2134 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 2130 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
2135 struct intel_excl_states *xlo, *xl;
2136 unsigned long flags = 0; /* keep compiler happy */
2137 int tid = cpuc->excl_thread_id; 2131 int tid = cpuc->excl_thread_id;
2138 int o_tid = 1 - tid; 2132 struct intel_excl_states *xl;
2139 2133
2140 /* 2134 /*
2141 * nothing needed if in group validation mode 2135 * nothing needed if in group validation mode
@@ -2143,31 +2137,35 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
2143 if (cpuc->is_fake) 2137 if (cpuc->is_fake)
2144 return; 2138 return;
2145 2139
2146 WARN_ON_ONCE(!excl_cntrs); 2140 if (WARN_ON_ONCE(!excl_cntrs))
2147
2148 if (!excl_cntrs)
2149 return; 2141 return;
2150 2142
2151 xl = &excl_cntrs->states[tid]; 2143 if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
2152 xlo = &excl_cntrs->states[o_tid]; 2144 hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
2145 if (!--cpuc->n_excl)
2146 WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
2147 }
2153 2148
2154 /* 2149 /*
2155 * put_constraint may be called from x86_schedule_events() 2150 * If event was actually assigned, then mark the counter state as
2156 * which already has the lock held so here make locking 2151 * unused now.
2157 * conditional
2158 */ 2152 */
2159 if (!xl->sched_started) 2153 if (hwc->idx >= 0) {
2160 raw_spin_lock_irqsave(&excl_cntrs->lock, flags); 2154 xl = &excl_cntrs->states[tid];
2161 2155
2162 /* 2156 /*
2163 * if event was actually assigned, then mark the 2157 * put_constraint may be called from x86_schedule_events()
2164 * counter state as unused now 2158 * which already has the lock held so here make locking
2165 */ 2159 * conditional.
2166 if (hwc->idx >= 0) 2160 */
2167 xlo->state[hwc->idx] = INTEL_EXCL_UNUSED; 2161 if (!xl->sched_started)
2162 raw_spin_lock(&excl_cntrs->lock);
2168 2163
2169 if (!xl->sched_started) 2164 xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
2170 raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags); 2165
2166 if (!xl->sched_started)
2167 raw_spin_unlock(&excl_cntrs->lock);
2168 }
2171} 2169}
2172 2170
2173static void 2171static void
@@ -2188,8 +2186,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
2188static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 2186static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
2189 struct perf_event *event) 2187 struct perf_event *event)
2190{ 2188{
2191 struct event_constraint *c = event->hw.constraint;
2192
2193 intel_put_shared_regs_event_constraints(cpuc, event); 2189 intel_put_shared_regs_event_constraints(cpuc, event);
2194 2190
2195 /* 2191 /*
@@ -2197,48 +2193,8 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
2197 * all events are subject to and must call the 2193 * all events are subject to and must call the
2198 * put_excl_constraints() routine 2194 * put_excl_constraints() routine
2199 */ 2195 */
2200 if (c && cpuc->excl_cntrs) 2196 if (cpuc->excl_cntrs)
2201 intel_put_excl_constraints(cpuc, event); 2197 intel_put_excl_constraints(cpuc, event);
2202
2203 /* cleanup dynamic constraint */
2204 if (c && (c->flags & PERF_X86_EVENT_DYNAMIC))
2205 event->hw.constraint = NULL;
2206}
2207
2208static void intel_commit_scheduling(struct cpu_hw_events *cpuc,
2209 struct perf_event *event, int cntr)
2210{
2211 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
2212 struct event_constraint *c = event->hw.constraint;
2213 struct intel_excl_states *xlo, *xl;
2214 int tid = cpuc->excl_thread_id;
2215 int o_tid = 1 - tid;
2216 int is_excl;
2217
2218 if (cpuc->is_fake || !c)
2219 return;
2220
2221 is_excl = c->flags & PERF_X86_EVENT_EXCL;
2222
2223 if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
2224 return;
2225
2226 WARN_ON_ONCE(!excl_cntrs);
2227
2228 if (!excl_cntrs)
2229 return;
2230
2231 xl = &excl_cntrs->states[tid];
2232 xlo = &excl_cntrs->states[o_tid];
2233
2234 WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
2235
2236 if (cntr >= 0) {
2237 if (is_excl)
2238 xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
2239 else
2240 xlo->init_state[cntr] = INTEL_EXCL_SHARED;
2241 }
2242} 2198}
2243 2199
2244static void intel_pebs_aliases_core2(struct perf_event *event) 2200static void intel_pebs_aliases_core2(struct perf_event *event)
@@ -2304,8 +2260,15 @@ static int intel_pmu_hw_config(struct perf_event *event)
2304 if (ret) 2260 if (ret)
2305 return ret; 2261 return ret;
2306 2262
2307 if (event->attr.precise_ip && x86_pmu.pebs_aliases) 2263 if (event->attr.precise_ip) {
2308 x86_pmu.pebs_aliases(event); 2264 if (!event->attr.freq) {
2265 event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
2266 if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
2267 event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
2268 }
2269 if (x86_pmu.pebs_aliases)
2270 x86_pmu.pebs_aliases(event);
2271 }
2309 2272
2310 if (needs_branch_stack(event)) { 2273 if (needs_branch_stack(event)) {
2311 ret = intel_pmu_setup_lbr_filter(event); 2274 ret = intel_pmu_setup_lbr_filter(event);
@@ -2554,19 +2517,11 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)
2554static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) 2517static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
2555{ 2518{
2556 struct intel_excl_cntrs *c; 2519 struct intel_excl_cntrs *c;
2557 int i;
2558 2520
2559 c = kzalloc_node(sizeof(struct intel_excl_cntrs), 2521 c = kzalloc_node(sizeof(struct intel_excl_cntrs),
2560 GFP_KERNEL, cpu_to_node(cpu)); 2522 GFP_KERNEL, cpu_to_node(cpu));
2561 if (c) { 2523 if (c) {
2562 raw_spin_lock_init(&c->lock); 2524 raw_spin_lock_init(&c->lock);
2563 for (i = 0; i < X86_PMC_IDX_MAX; i++) {
2564 c->states[0].state[i] = INTEL_EXCL_UNUSED;
2565 c->states[0].init_state[i] = INTEL_EXCL_UNUSED;
2566
2567 c->states[1].state[i] = INTEL_EXCL_UNUSED;
2568 c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
2569 }
2570 c->core_id = -1; 2525 c->core_id = -1;
2571 } 2526 }
2572 return c; 2527 return c;
@@ -2639,8 +2594,6 @@ static void intel_pmu_cpu_starting(int cpu)
2639 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; 2594 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
2640 2595
2641 if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { 2596 if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
2642 int h = x86_pmu.num_counters >> 1;
2643
2644 for_each_cpu(i, topology_sibling_cpumask(cpu)) { 2597 for_each_cpu(i, topology_sibling_cpumask(cpu)) {
2645 struct intel_excl_cntrs *c; 2598 struct intel_excl_cntrs *c;
2646 2599
@@ -2654,11 +2607,6 @@ static void intel_pmu_cpu_starting(int cpu)
2654 } 2607 }
2655 cpuc->excl_cntrs->core_id = core_id; 2608 cpuc->excl_cntrs->core_id = core_id;
2656 cpuc->excl_cntrs->refcnt++; 2609 cpuc->excl_cntrs->refcnt++;
2657 /*
2658 * set hard limit to half the number of generic counters
2659 */
2660 cpuc->excl_cntrs->states[0].max_alloc_cntrs = h;
2661 cpuc->excl_cntrs->states[1].max_alloc_cntrs = h;
2662 } 2610 }
2663} 2611}
2664 2612
@@ -2694,6 +2642,15 @@ static void intel_pmu_cpu_dying(int cpu)
2694 fini_debug_store_on_cpu(cpu); 2642 fini_debug_store_on_cpu(cpu);
2695} 2643}
2696 2644
2645static void intel_pmu_sched_task(struct perf_event_context *ctx,
2646 bool sched_in)
2647{
2648 if (x86_pmu.pebs_active)
2649 intel_pmu_pebs_sched_task(ctx, sched_in);
2650 if (x86_pmu.lbr_nr)
2651 intel_pmu_lbr_sched_task(ctx, sched_in);
2652}
2653
2697PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); 2654PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
2698 2655
2699PMU_FORMAT_ATTR(ldlat, "config1:0-15"); 2656PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -2783,7 +2740,7 @@ static __initconst const struct x86_pmu intel_pmu = {
2783 .cpu_starting = intel_pmu_cpu_starting, 2740 .cpu_starting = intel_pmu_cpu_starting,
2784 .cpu_dying = intel_pmu_cpu_dying, 2741 .cpu_dying = intel_pmu_cpu_dying,
2785 .guest_get_msrs = intel_guest_get_msrs, 2742 .guest_get_msrs = intel_guest_get_msrs,
2786 .sched_task = intel_pmu_lbr_sched_task, 2743 .sched_task = intel_pmu_sched_task,
2787}; 2744};
2788 2745
2789static __init void intel_clovertown_quirk(void) 2746static __init void intel_clovertown_quirk(void)
@@ -2956,8 +2913,8 @@ static __init void intel_ht_bug(void)
2956{ 2913{
2957 x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED; 2914 x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
2958 2915
2959 x86_pmu.commit_scheduling = intel_commit_scheduling;
2960 x86_pmu.start_scheduling = intel_start_scheduling; 2916 x86_pmu.start_scheduling = intel_start_scheduling;
2917 x86_pmu.commit_scheduling = intel_commit_scheduling;
2961 x86_pmu.stop_scheduling = intel_stop_scheduling; 2918 x86_pmu.stop_scheduling = intel_stop_scheduling;
2962} 2919}
2963 2920
@@ -3270,6 +3227,8 @@ __init int intel_pmu_init(void)
3270 3227
3271 case 61: /* 14nm Broadwell Core-M */ 3228 case 61: /* 14nm Broadwell Core-M */
3272 case 86: /* 14nm Broadwell Xeon D */ 3229 case 86: /* 14nm Broadwell Xeon D */
3230 case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
3231 case 79: /* 14nm Broadwell Server */
3273 x86_pmu.late_ack = true; 3232 x86_pmu.late_ack = true;
3274 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 3233 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
3275 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 3234 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -3339,13 +3298,13 @@ __init int intel_pmu_init(void)
3339 * counter, so do not extend mask to generic counters 3298 * counter, so do not extend mask to generic counters
3340 */ 3299 */
3341 for_each_event_constraint(c, x86_pmu.event_constraints) { 3300 for_each_event_constraint(c, x86_pmu.event_constraints) {
3342 if (c->cmask != FIXED_EVENT_FLAGS 3301 if (c->cmask == FIXED_EVENT_FLAGS
3343 || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { 3302 && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
3344 continue; 3303 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
3345 } 3304 }
3346 3305 c->idxmsk64 &=
3347 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; 3306 ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
3348 c->weight += x86_pmu.num_counters; 3307 c->weight = hweight64(c->idxmsk64);
3349 } 3308 }
3350 } 3309 }
3351 3310
@@ -3413,8 +3372,8 @@ static __init int fixup_ht_bug(void)
3413 3372
3414 x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); 3373 x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
3415 3374
3416 x86_pmu.commit_scheduling = NULL;
3417 x86_pmu.start_scheduling = NULL; 3375 x86_pmu.start_scheduling = NULL;
3376 x86_pmu.commit_scheduling = NULL;
3418 x86_pmu.stop_scheduling = NULL; 3377 x86_pmu.stop_scheduling = NULL;
3419 3378
3420 watchdog_nmi_enable_all(); 3379 watchdog_nmi_enable_all();
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index ac1f0c55f379..7795f3f8b1d5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -483,17 +483,26 @@ static int bts_event_add(struct perf_event *event, int mode)
483 483
484static void bts_event_destroy(struct perf_event *event) 484static void bts_event_destroy(struct perf_event *event)
485{ 485{
486 x86_release_hardware();
486 x86_del_exclusive(x86_lbr_exclusive_bts); 487 x86_del_exclusive(x86_lbr_exclusive_bts);
487} 488}
488 489
489static int bts_event_init(struct perf_event *event) 490static int bts_event_init(struct perf_event *event)
490{ 491{
492 int ret;
493
491 if (event->attr.type != bts_pmu.type) 494 if (event->attr.type != bts_pmu.type)
492 return -ENOENT; 495 return -ENOENT;
493 496
494 if (x86_add_exclusive(x86_lbr_exclusive_bts)) 497 if (x86_add_exclusive(x86_lbr_exclusive_bts))
495 return -EBUSY; 498 return -EBUSY;
496 499
500 ret = x86_reserve_hardware();
501 if (ret) {
502 x86_del_exclusive(x86_lbr_exclusive_bts);
503 return ret;
504 }
505
497 event->destroy = bts_event_destroy; 506 event->destroy = bts_event_destroy;
498 507
499 return 0; 508 return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index e4d1b8b738fa..188076161c1b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -13,16 +13,35 @@
13#define MSR_IA32_QM_CTR 0x0c8e 13#define MSR_IA32_QM_CTR 0x0c8e
14#define MSR_IA32_QM_EVTSEL 0x0c8d 14#define MSR_IA32_QM_EVTSEL 0x0c8d
15 15
16static unsigned int cqm_max_rmid = -1; 16static u32 cqm_max_rmid = -1;
17static unsigned int cqm_l3_scale; /* supposedly cacheline size */ 17static unsigned int cqm_l3_scale; /* supposedly cacheline size */
18 18
19struct intel_cqm_state { 19/**
20 raw_spinlock_t lock; 20 * struct intel_pqr_state - State cache for the PQR MSR
21 int rmid; 21 * @rmid: The cached Resource Monitoring ID
22 int cnt; 22 * @closid: The cached Class Of Service ID
23 * @rmid_usecnt: The usage counter for rmid
24 *
25 * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
26 * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
27 * contains both parts, so we need to cache them.
28 *
29 * The cache also helps to avoid pointless updates if the value does
30 * not change.
31 */
32struct intel_pqr_state {
33 u32 rmid;
34 u32 closid;
35 int rmid_usecnt;
23}; 36};
24 37
25static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state); 38/*
39 * The cached intel_pqr_state is strictly per CPU and can never be
40 * updated from a remote CPU. Both functions which modify the state
41 * (intel_cqm_event_start and intel_cqm_event_stop) are called with
42 * interrupts disabled, which is sufficient for the protection.
43 */
44static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
26 45
27/* 46/*
28 * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. 47 * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
@@ -57,7 +76,7 @@ static cpumask_t cqm_cpumask;
57 * near-zero occupancy value, i.e. no cachelines are tagged with this 76 * near-zero occupancy value, i.e. no cachelines are tagged with this
58 * RMID, once __intel_cqm_rmid_rotate() returns. 77 * RMID, once __intel_cqm_rmid_rotate() returns.
59 */ 78 */
60static unsigned int intel_cqm_rotation_rmid; 79static u32 intel_cqm_rotation_rmid;
61 80
62#define INVALID_RMID (-1) 81#define INVALID_RMID (-1)
63 82
@@ -69,7 +88,7 @@ static unsigned int intel_cqm_rotation_rmid;
69 * Likewise, an rmid value of -1 is used to indicate "no rmid currently 88 * Likewise, an rmid value of -1 is used to indicate "no rmid currently
70 * assigned" and is used as part of the rotation code. 89 * assigned" and is used as part of the rotation code.
71 */ 90 */
72static inline bool __rmid_valid(unsigned int rmid) 91static inline bool __rmid_valid(u32 rmid)
73{ 92{
74 if (!rmid || rmid == INVALID_RMID) 93 if (!rmid || rmid == INVALID_RMID)
75 return false; 94 return false;
@@ -77,7 +96,7 @@ static inline bool __rmid_valid(unsigned int rmid)
77 return true; 96 return true;
78} 97}
79 98
80static u64 __rmid_read(unsigned int rmid) 99static u64 __rmid_read(u32 rmid)
81{ 100{
82 u64 val; 101 u64 val;
83 102
@@ -102,7 +121,7 @@ enum rmid_recycle_state {
102}; 121};
103 122
104struct cqm_rmid_entry { 123struct cqm_rmid_entry {
105 unsigned int rmid; 124 u32 rmid;
106 enum rmid_recycle_state state; 125 enum rmid_recycle_state state;
107 struct list_head list; 126 struct list_head list;
108 unsigned long queue_time; 127 unsigned long queue_time;
@@ -147,7 +166,7 @@ static LIST_HEAD(cqm_rmid_limbo_lru);
147 */ 166 */
148static struct cqm_rmid_entry **cqm_rmid_ptrs; 167static struct cqm_rmid_entry **cqm_rmid_ptrs;
149 168
150static inline struct cqm_rmid_entry *__rmid_entry(int rmid) 169static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
151{ 170{
152 struct cqm_rmid_entry *entry; 171 struct cqm_rmid_entry *entry;
153 172
@@ -162,7 +181,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
162 * 181 *
163 * We expect to be called with cache_mutex held. 182 * We expect to be called with cache_mutex held.
164 */ 183 */
165static int __get_rmid(void) 184static u32 __get_rmid(void)
166{ 185{
167 struct cqm_rmid_entry *entry; 186 struct cqm_rmid_entry *entry;
168 187
@@ -177,7 +196,7 @@ static int __get_rmid(void)
177 return entry->rmid; 196 return entry->rmid;
178} 197}
179 198
180static void __put_rmid(unsigned int rmid) 199static void __put_rmid(u32 rmid)
181{ 200{
182 struct cqm_rmid_entry *entry; 201 struct cqm_rmid_entry *entry;
183 202
@@ -372,7 +391,7 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b)
372} 391}
373 392
374struct rmid_read { 393struct rmid_read {
375 unsigned int rmid; 394 u32 rmid;
376 atomic64_t value; 395 atomic64_t value;
377}; 396};
378 397
@@ -381,12 +400,11 @@ static void __intel_cqm_event_count(void *info);
381/* 400/*
382 * Exchange the RMID of a group of events. 401 * Exchange the RMID of a group of events.
383 */ 402 */
384static unsigned int 403static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
385intel_cqm_xchg_rmid(struct perf_event *group, unsigned int rmid)
386{ 404{
387 struct perf_event *event; 405 struct perf_event *event;
388 unsigned int old_rmid = group->hw.cqm_rmid;
389 struct list_head *head = &group->hw.cqm_group_entry; 406 struct list_head *head = &group->hw.cqm_group_entry;
407 u32 old_rmid = group->hw.cqm_rmid;
390 408
391 lockdep_assert_held(&cache_mutex); 409 lockdep_assert_held(&cache_mutex);
392 410
@@ -451,7 +469,7 @@ static void intel_cqm_stable(void *arg)
451 * If we have group events waiting for an RMID that don't conflict with 469 * If we have group events waiting for an RMID that don't conflict with
452 * events already running, assign @rmid. 470 * events already running, assign @rmid.
453 */ 471 */
454static bool intel_cqm_sched_in_event(unsigned int rmid) 472static bool intel_cqm_sched_in_event(u32 rmid)
455{ 473{
456 struct perf_event *leader, *event; 474 struct perf_event *leader, *event;
457 475
@@ -598,7 +616,7 @@ static bool intel_cqm_rmid_stabilize(unsigned int *available)
598static void __intel_cqm_pick_and_rotate(struct perf_event *next) 616static void __intel_cqm_pick_and_rotate(struct perf_event *next)
599{ 617{
600 struct perf_event *rotor; 618 struct perf_event *rotor;
601 unsigned int rmid; 619 u32 rmid;
602 620
603 lockdep_assert_held(&cache_mutex); 621 lockdep_assert_held(&cache_mutex);
604 622
@@ -626,7 +644,7 @@ static void __intel_cqm_pick_and_rotate(struct perf_event *next)
626static void intel_cqm_sched_out_conflicting_events(struct perf_event *event) 644static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
627{ 645{
628 struct perf_event *group, *g; 646 struct perf_event *group, *g;
629 unsigned int rmid; 647 u32 rmid;
630 648
631 lockdep_assert_held(&cache_mutex); 649 lockdep_assert_held(&cache_mutex);
632 650
@@ -828,8 +846,8 @@ static void intel_cqm_setup_event(struct perf_event *event,
828 struct perf_event **group) 846 struct perf_event **group)
829{ 847{
830 struct perf_event *iter; 848 struct perf_event *iter;
831 unsigned int rmid;
832 bool conflict = false; 849 bool conflict = false;
850 u32 rmid;
833 851
834 list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) { 852 list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
835 rmid = iter->hw.cqm_rmid; 853 rmid = iter->hw.cqm_rmid;
@@ -860,7 +878,7 @@ static void intel_cqm_setup_event(struct perf_event *event,
860static void intel_cqm_event_read(struct perf_event *event) 878static void intel_cqm_event_read(struct perf_event *event)
861{ 879{
862 unsigned long flags; 880 unsigned long flags;
863 unsigned int rmid; 881 u32 rmid;
864 u64 val; 882 u64 val;
865 883
866 /* 884 /*
@@ -961,55 +979,48 @@ out:
961 979
962static void intel_cqm_event_start(struct perf_event *event, int mode) 980static void intel_cqm_event_start(struct perf_event *event, int mode)
963{ 981{
964 struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); 982 struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
965 unsigned int rmid = event->hw.cqm_rmid; 983 u32 rmid = event->hw.cqm_rmid;
966 unsigned long flags;
967 984
968 if (!(event->hw.cqm_state & PERF_HES_STOPPED)) 985 if (!(event->hw.cqm_state & PERF_HES_STOPPED))
969 return; 986 return;
970 987
971 event->hw.cqm_state &= ~PERF_HES_STOPPED; 988 event->hw.cqm_state &= ~PERF_HES_STOPPED;
972 989
973 raw_spin_lock_irqsave(&state->lock, flags); 990 if (state->rmid_usecnt++) {
974 991 if (!WARN_ON_ONCE(state->rmid != rmid))
975 if (state->cnt++) 992 return;
976 WARN_ON_ONCE(state->rmid != rmid); 993 } else {
977 else
978 WARN_ON_ONCE(state->rmid); 994 WARN_ON_ONCE(state->rmid);
995 }
979 996
980 state->rmid = rmid; 997 state->rmid = rmid;
981 wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid); 998 wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
982
983 raw_spin_unlock_irqrestore(&state->lock, flags);
984} 999}
985 1000
986static void intel_cqm_event_stop(struct perf_event *event, int mode) 1001static void intel_cqm_event_stop(struct perf_event *event, int mode)
987{ 1002{
988 struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); 1003 struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
989 unsigned long flags;
990 1004
991 if (event->hw.cqm_state & PERF_HES_STOPPED) 1005 if (event->hw.cqm_state & PERF_HES_STOPPED)
992 return; 1006 return;
993 1007
994 event->hw.cqm_state |= PERF_HES_STOPPED; 1008 event->hw.cqm_state |= PERF_HES_STOPPED;
995 1009
996 raw_spin_lock_irqsave(&state->lock, flags);
997 intel_cqm_event_read(event); 1010 intel_cqm_event_read(event);
998 1011
999 if (!--state->cnt) { 1012 if (!--state->rmid_usecnt) {
1000 state->rmid = 0; 1013 state->rmid = 0;
1001 wrmsrl(MSR_IA32_PQR_ASSOC, 0); 1014 wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
1002 } else { 1015 } else {
1003 WARN_ON_ONCE(!state->rmid); 1016 WARN_ON_ONCE(!state->rmid);
1004 } 1017 }
1005
1006 raw_spin_unlock_irqrestore(&state->lock, flags);
1007} 1018}
1008 1019
1009static int intel_cqm_event_add(struct perf_event *event, int mode) 1020static int intel_cqm_event_add(struct perf_event *event, int mode)
1010{ 1021{
1011 unsigned long flags; 1022 unsigned long flags;
1012 unsigned int rmid; 1023 u32 rmid;
1013 1024
1014 raw_spin_lock_irqsave(&cache_lock, flags); 1025 raw_spin_lock_irqsave(&cache_lock, flags);
1015 1026
@@ -1024,11 +1035,6 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
1024 return 0; 1035 return 0;
1025} 1036}
1026 1037
1027static void intel_cqm_event_del(struct perf_event *event, int mode)
1028{
1029 intel_cqm_event_stop(event, mode);
1030}
1031
1032static void intel_cqm_event_destroy(struct perf_event *event) 1038static void intel_cqm_event_destroy(struct perf_event *event)
1033{ 1039{
1034 struct perf_event *group_other = NULL; 1040 struct perf_event *group_other = NULL;
@@ -1057,7 +1063,7 @@ static void intel_cqm_event_destroy(struct perf_event *event)
1057 list_replace(&event->hw.cqm_groups_entry, 1063 list_replace(&event->hw.cqm_groups_entry,
1058 &group_other->hw.cqm_groups_entry); 1064 &group_other->hw.cqm_groups_entry);
1059 } else { 1065 } else {
1060 unsigned int rmid = event->hw.cqm_rmid; 1066 u32 rmid = event->hw.cqm_rmid;
1061 1067
1062 if (__rmid_valid(rmid)) 1068 if (__rmid_valid(rmid))
1063 __put_rmid(rmid); 1069 __put_rmid(rmid);
@@ -1221,7 +1227,7 @@ static struct pmu intel_cqm_pmu = {
1221 .task_ctx_nr = perf_sw_context, 1227 .task_ctx_nr = perf_sw_context,
1222 .event_init = intel_cqm_event_init, 1228 .event_init = intel_cqm_event_init,
1223 .add = intel_cqm_event_add, 1229 .add = intel_cqm_event_add,
1224 .del = intel_cqm_event_del, 1230 .del = intel_cqm_event_stop,
1225 .start = intel_cqm_event_start, 1231 .start = intel_cqm_event_start,
1226 .stop = intel_cqm_event_stop, 1232 .stop = intel_cqm_event_stop,
1227 .read = intel_cqm_event_read, 1233 .read = intel_cqm_event_read,
@@ -1243,12 +1249,12 @@ static inline void cqm_pick_event_reader(int cpu)
1243 1249
1244static void intel_cqm_cpu_prepare(unsigned int cpu) 1250static void intel_cqm_cpu_prepare(unsigned int cpu)
1245{ 1251{
1246 struct intel_cqm_state *state = &per_cpu(cqm_state, cpu); 1252 struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
1247 struct cpuinfo_x86 *c = &cpu_data(cpu); 1253 struct cpuinfo_x86 *c = &cpu_data(cpu);
1248 1254
1249 raw_spin_lock_init(&state->lock);
1250 state->rmid = 0; 1255 state->rmid = 0;
1251 state->cnt = 0; 1256 state->closid = 0;
1257 state->rmid_usecnt = 0;
1252 1258
1253 WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid); 1259 WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
1254 WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale); 1260 WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 813f75d71175..71fc40238843 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -11,7 +11,7 @@
11#define BTS_RECORD_SIZE 24 11#define BTS_RECORD_SIZE 24
12 12
13#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) 13#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
14#define PEBS_BUFFER_SIZE PAGE_SIZE 14#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
15#define PEBS_FIXUP_SIZE PAGE_SIZE 15#define PEBS_FIXUP_SIZE PAGE_SIZE
16 16
17/* 17/*
@@ -250,7 +250,7 @@ static int alloc_pebs_buffer(int cpu)
250{ 250{
251 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 251 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
252 int node = cpu_to_node(cpu); 252 int node = cpu_to_node(cpu);
253 int max, thresh = 1; /* always use a single PEBS record */ 253 int max;
254 void *buffer, *ibuffer; 254 void *buffer, *ibuffer;
255 255
256 if (!x86_pmu.pebs) 256 if (!x86_pmu.pebs)
@@ -280,9 +280,6 @@ static int alloc_pebs_buffer(int cpu)
280 ds->pebs_absolute_maximum = ds->pebs_buffer_base + 280 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
281 max * x86_pmu.pebs_record_size; 281 max * x86_pmu.pebs_record_size;
282 282
283 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
284 thresh * x86_pmu.pebs_record_size;
285
286 return 0; 283 return 0;
287} 284}
288 285
@@ -549,6 +546,19 @@ int intel_pmu_drain_bts_buffer(void)
549 return 1; 546 return 1;
550} 547}
551 548
549static inline void intel_pmu_drain_pebs_buffer(void)
550{
551 struct pt_regs regs;
552
553 x86_pmu.drain_pebs(&regs);
554}
555
556void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
557{
558 if (!sched_in)
559 intel_pmu_drain_pebs_buffer();
560}
561
552/* 562/*
553 * PEBS 563 * PEBS
554 */ 564 */
@@ -684,33 +694,81 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
684 return &emptyconstraint; 694 return &emptyconstraint;
685} 695}
686 696
697static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
698{
699 return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
700}
701
687void intel_pmu_pebs_enable(struct perf_event *event) 702void intel_pmu_pebs_enable(struct perf_event *event)
688{ 703{
689 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 704 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
690 struct hw_perf_event *hwc = &event->hw; 705 struct hw_perf_event *hwc = &event->hw;
706 struct debug_store *ds = cpuc->ds;
707 bool first_pebs;
708 u64 threshold;
691 709
692 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 710 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
693 711
712 first_pebs = !pebs_is_enabled(cpuc);
694 cpuc->pebs_enabled |= 1ULL << hwc->idx; 713 cpuc->pebs_enabled |= 1ULL << hwc->idx;
695 714
696 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) 715 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
697 cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); 716 cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
698 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) 717 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
699 cpuc->pebs_enabled |= 1ULL << 63; 718 cpuc->pebs_enabled |= 1ULL << 63;
719
720 /*
721 * When the event is constrained enough we can use a larger
722 * threshold and run the event with less frequent PMI.
723 */
724 if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
725 threshold = ds->pebs_absolute_maximum -
726 x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
727
728 if (first_pebs)
729 perf_sched_cb_inc(event->ctx->pmu);
730 } else {
731 threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
732
733 /*
734 * If not all events can use larger buffer,
735 * roll back to threshold = 1
736 */
737 if (!first_pebs &&
738 (ds->pebs_interrupt_threshold > threshold))
739 perf_sched_cb_dec(event->ctx->pmu);
740 }
741
742 /* Use auto-reload if possible to save a MSR write in the PMI */
743 if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
744 ds->pebs_event_reset[hwc->idx] =
745 (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
746 }
747
748 if (first_pebs || ds->pebs_interrupt_threshold > threshold)
749 ds->pebs_interrupt_threshold = threshold;
700} 750}
701 751
702void intel_pmu_pebs_disable(struct perf_event *event) 752void intel_pmu_pebs_disable(struct perf_event *event)
703{ 753{
704 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 754 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
705 struct hw_perf_event *hwc = &event->hw; 755 struct hw_perf_event *hwc = &event->hw;
756 struct debug_store *ds = cpuc->ds;
706 757
707 cpuc->pebs_enabled &= ~(1ULL << hwc->idx); 758 cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
708 759
709 if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT) 760 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
710 cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); 761 cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
711 else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST) 762 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
712 cpuc->pebs_enabled &= ~(1ULL << 63); 763 cpuc->pebs_enabled &= ~(1ULL << 63);
713 764
765 if (ds->pebs_interrupt_threshold >
766 ds->pebs_buffer_base + x86_pmu.pebs_record_size) {
767 intel_pmu_drain_pebs_buffer();
768 if (!pebs_is_enabled(cpuc))
769 perf_sched_cb_dec(event->ctx->pmu);
770 }
771
714 if (cpuc->enabled) 772 if (cpuc->enabled)
715 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 773 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
716 774
@@ -846,8 +904,10 @@ static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
846 return txn; 904 return txn;
847} 905}
848 906
849static void __intel_pmu_pebs_event(struct perf_event *event, 907static void setup_pebs_sample_data(struct perf_event *event,
850 struct pt_regs *iregs, void *__pebs) 908 struct pt_regs *iregs, void *__pebs,
909 struct perf_sample_data *data,
910 struct pt_regs *regs)
851{ 911{
852#define PERF_X86_EVENT_PEBS_HSW_PREC \ 912#define PERF_X86_EVENT_PEBS_HSW_PREC \
853 (PERF_X86_EVENT_PEBS_ST_HSW | \ 913 (PERF_X86_EVENT_PEBS_ST_HSW | \
@@ -859,13 +919,11 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
859 */ 919 */
860 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 920 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
861 struct pebs_record_hsw *pebs = __pebs; 921 struct pebs_record_hsw *pebs = __pebs;
862 struct perf_sample_data data;
863 struct pt_regs regs;
864 u64 sample_type; 922 u64 sample_type;
865 int fll, fst, dsrc; 923 int fll, fst, dsrc;
866 int fl = event->hw.flags; 924 int fl = event->hw.flags;
867 925
868 if (!intel_pmu_save_and_restart(event)) 926 if (pebs == NULL)
869 return; 927 return;
870 928
871 sample_type = event->attr.sample_type; 929 sample_type = event->attr.sample_type;
@@ -874,15 +932,15 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
874 fll = fl & PERF_X86_EVENT_PEBS_LDLAT; 932 fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
875 fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); 933 fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
876 934
877 perf_sample_data_init(&data, 0, event->hw.last_period); 935 perf_sample_data_init(data, 0, event->hw.last_period);
878 936
879 data.period = event->hw.last_period; 937 data->period = event->hw.last_period;
880 938
881 /* 939 /*
882 * Use latency for weight (only avail with PEBS-LL) 940 * Use latency for weight (only avail with PEBS-LL)
883 */ 941 */
884 if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) 942 if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
885 data.weight = pebs->lat; 943 data->weight = pebs->lat;
886 944
887 /* 945 /*
888 * data.data_src encodes the data source 946 * data.data_src encodes the data source
@@ -895,7 +953,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
895 val = precise_datala_hsw(event, pebs->dse); 953 val = precise_datala_hsw(event, pebs->dse);
896 else if (fst) 954 else if (fst)
897 val = precise_store_data(pebs->dse); 955 val = precise_store_data(pebs->dse);
898 data.data_src.val = val; 956 data->data_src.val = val;
899 } 957 }
900 958
901 /* 959 /*
@@ -908,61 +966,123 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
908 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. 966 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
909 * A possible PERF_SAMPLE_REGS will have to transfer all regs. 967 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
910 */ 968 */
911 regs = *iregs; 969 *regs = *iregs;
912 regs.flags = pebs->flags; 970 regs->flags = pebs->flags;
913 set_linear_ip(&regs, pebs->ip); 971 set_linear_ip(regs, pebs->ip);
914 regs.bp = pebs->bp; 972 regs->bp = pebs->bp;
915 regs.sp = pebs->sp; 973 regs->sp = pebs->sp;
916 974
917 if (sample_type & PERF_SAMPLE_REGS_INTR) { 975 if (sample_type & PERF_SAMPLE_REGS_INTR) {
918 regs.ax = pebs->ax; 976 regs->ax = pebs->ax;
919 regs.bx = pebs->bx; 977 regs->bx = pebs->bx;
920 regs.cx = pebs->cx; 978 regs->cx = pebs->cx;
921 regs.dx = pebs->dx; 979 regs->dx = pebs->dx;
922 regs.si = pebs->si; 980 regs->si = pebs->si;
923 regs.di = pebs->di; 981 regs->di = pebs->di;
924 regs.bp = pebs->bp; 982 regs->bp = pebs->bp;
925 regs.sp = pebs->sp; 983 regs->sp = pebs->sp;
926 984
927 regs.flags = pebs->flags; 985 regs->flags = pebs->flags;
928#ifndef CONFIG_X86_32 986#ifndef CONFIG_X86_32
929 regs.r8 = pebs->r8; 987 regs->r8 = pebs->r8;
930 regs.r9 = pebs->r9; 988 regs->r9 = pebs->r9;
931 regs.r10 = pebs->r10; 989 regs->r10 = pebs->r10;
932 regs.r11 = pebs->r11; 990 regs->r11 = pebs->r11;
933 regs.r12 = pebs->r12; 991 regs->r12 = pebs->r12;
934 regs.r13 = pebs->r13; 992 regs->r13 = pebs->r13;
935 regs.r14 = pebs->r14; 993 regs->r14 = pebs->r14;
936 regs.r15 = pebs->r15; 994 regs->r15 = pebs->r15;
937#endif 995#endif
938 } 996 }
939 997
940 if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { 998 if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
941 regs.ip = pebs->real_ip; 999 regs->ip = pebs->real_ip;
942 regs.flags |= PERF_EFLAGS_EXACT; 1000 regs->flags |= PERF_EFLAGS_EXACT;
943 } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs)) 1001 } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
944 regs.flags |= PERF_EFLAGS_EXACT; 1002 regs->flags |= PERF_EFLAGS_EXACT;
945 else 1003 else
946 regs.flags &= ~PERF_EFLAGS_EXACT; 1004 regs->flags &= ~PERF_EFLAGS_EXACT;
947 1005
948 if ((sample_type & PERF_SAMPLE_ADDR) && 1006 if ((sample_type & PERF_SAMPLE_ADDR) &&
949 x86_pmu.intel_cap.pebs_format >= 1) 1007 x86_pmu.intel_cap.pebs_format >= 1)
950 data.addr = pebs->dla; 1008 data->addr = pebs->dla;
951 1009
952 if (x86_pmu.intel_cap.pebs_format >= 2) { 1010 if (x86_pmu.intel_cap.pebs_format >= 2) {
953 /* Only set the TSX weight when no memory weight. */ 1011 /* Only set the TSX weight when no memory weight. */
954 if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll) 1012 if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
955 data.weight = intel_hsw_weight(pebs); 1013 data->weight = intel_hsw_weight(pebs);
956 1014
957 if (sample_type & PERF_SAMPLE_TRANSACTION) 1015 if (sample_type & PERF_SAMPLE_TRANSACTION)
958 data.txn = intel_hsw_transaction(pebs); 1016 data->txn = intel_hsw_transaction(pebs);
959 } 1017 }
960 1018
961 if (has_branch_stack(event)) 1019 if (has_branch_stack(event))
962 data.br_stack = &cpuc->lbr_stack; 1020 data->br_stack = &cpuc->lbr_stack;
1021}
1022
1023static inline void *
1024get_next_pebs_record_by_bit(void *base, void *top, int bit)
1025{
1026 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1027 void *at;
1028 u64 pebs_status;
1029
1030 if (base == NULL)
1031 return NULL;
1032
1033 for (at = base; at < top; at += x86_pmu.pebs_record_size) {
1034 struct pebs_record_nhm *p = at;
963 1035
964 if (perf_event_overflow(event, &data, &regs)) 1036 if (test_bit(bit, (unsigned long *)&p->status)) {
1037 /* PEBS v3 has accurate status bits */
1038 if (x86_pmu.intel_cap.pebs_format >= 3)
1039 return at;
1040
1041 if (p->status == (1 << bit))
1042 return at;
1043
1044 /* clear non-PEBS bit and re-check */
1045 pebs_status = p->status & cpuc->pebs_enabled;
1046 pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
1047 if (pebs_status == (1 << bit))
1048 return at;
1049 }
1050 }
1051 return NULL;
1052}
1053
1054static void __intel_pmu_pebs_event(struct perf_event *event,
1055 struct pt_regs *iregs,
1056 void *base, void *top,
1057 int bit, int count)
1058{
1059 struct perf_sample_data data;
1060 struct pt_regs regs;
1061 void *at = get_next_pebs_record_by_bit(base, top, bit);
1062
1063 if (!intel_pmu_save_and_restart(event) &&
1064 !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
1065 return;
1066
1067 while (count > 1) {
1068 setup_pebs_sample_data(event, iregs, at, &data, &regs);
1069 perf_event_output(event, &data, &regs);
1070 at += x86_pmu.pebs_record_size;
1071 at = get_next_pebs_record_by_bit(at, top, bit);
1072 count--;
1073 }
1074
1075 setup_pebs_sample_data(event, iregs, at, &data, &regs);
1076
1077 /*
1078 * All but the last records are processed.
1079 * The last one is left to be able to call the overflow handler.
1080 */
1081 if (perf_event_overflow(event, &data, &regs)) {
965 x86_pmu_stop(event, 0); 1082 x86_pmu_stop(event, 0);
1083 return;
1084 }
1085
966} 1086}
967 1087
968static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) 1088static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
@@ -992,72 +1112,99 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
992 if (!event->attr.precise_ip) 1112 if (!event->attr.precise_ip)
993 return; 1113 return;
994 1114
995 n = top - at; 1115 n = (top - at) / x86_pmu.pebs_record_size;
996 if (n <= 0) 1116 if (n <= 0)
997 return; 1117 return;
998 1118
999 /* 1119 __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
1000 * Should not happen, we program the threshold at 1 and do not
1001 * set a reset value.
1002 */
1003 WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
1004 at += n - 1;
1005
1006 __intel_pmu_pebs_event(event, iregs, at);
1007} 1120}
1008 1121
1009static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) 1122static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
1010{ 1123{
1011 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1124 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1012 struct debug_store *ds = cpuc->ds; 1125 struct debug_store *ds = cpuc->ds;
1013 struct perf_event *event = NULL; 1126 struct perf_event *event;
1014 void *at, *top; 1127 void *base, *at, *top;
1015 u64 status = 0; 1128 short counts[MAX_PEBS_EVENTS] = {};
1016 int bit; 1129 short error[MAX_PEBS_EVENTS] = {};
1130 int bit, i;
1017 1131
1018 if (!x86_pmu.pebs_active) 1132 if (!x86_pmu.pebs_active)
1019 return; 1133 return;
1020 1134
1021 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; 1135 base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
1022 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; 1136 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
1023 1137
1024 ds->pebs_index = ds->pebs_buffer_base; 1138 ds->pebs_index = ds->pebs_buffer_base;
1025 1139
1026 if (unlikely(at > top)) 1140 if (unlikely(base >= top))
1027 return; 1141 return;
1028 1142
1029 /* 1143 for (at = base; at < top; at += x86_pmu.pebs_record_size) {
1030 * Should not happen, we program the threshold at 1 and do not
1031 * set a reset value.
1032 */
1033 WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
1034 "Unexpected number of pebs records %ld\n",
1035 (long)(top - at) / x86_pmu.pebs_record_size);
1036
1037 for (; at < top; at += x86_pmu.pebs_record_size) {
1038 struct pebs_record_nhm *p = at; 1144 struct pebs_record_nhm *p = at;
1039 1145
1040 for_each_set_bit(bit, (unsigned long *)&p->status, 1146 /* PEBS v3 has accurate status bits */
1041 x86_pmu.max_pebs_events) { 1147 if (x86_pmu.intel_cap.pebs_format >= 3) {
1042 event = cpuc->events[bit]; 1148 for_each_set_bit(bit, (unsigned long *)&p->status,
1043 if (!test_bit(bit, cpuc->active_mask)) 1149 MAX_PEBS_EVENTS)
1044 continue; 1150 counts[bit]++;
1045
1046 WARN_ON_ONCE(!event);
1047 1151
1048 if (!event->attr.precise_ip) 1152 continue;
1049 continue; 1153 }
1050 1154
1051 if (__test_and_set_bit(bit, (unsigned long *)&status)) 1155 bit = find_first_bit((unsigned long *)&p->status,
1156 x86_pmu.max_pebs_events);
1157 if (bit >= x86_pmu.max_pebs_events)
1158 continue;
1159 if (!test_bit(bit, cpuc->active_mask))
1160 continue;
1161 /*
1162 * The PEBS hardware does not deal well with the situation
1163 * when events happen near to each other and multiple bits
1164 * are set. But it should happen rarely.
1165 *
1166 * If these events include one PEBS and multiple non-PEBS
1167 * events, it doesn't impact PEBS record. The record will
1168 * be handled normally. (slow path)
1169 *
1170 * If these events include two or more PEBS events, the
1171 * records for the events can be collapsed into a single
1172 * one, and it's not possible to reconstruct all events
1173 * that caused the PEBS record. It's called collision.
1174 * If collision happened, the record will be dropped.
1175 *
1176 */
1177 if (p->status != (1 << bit)) {
1178 u64 pebs_status;
1179
1180 /* slow path */
1181 pebs_status = p->status & cpuc->pebs_enabled;
1182 pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
1183 if (pebs_status != (1 << bit)) {
1184 for_each_set_bit(i, (unsigned long *)&pebs_status,
1185 MAX_PEBS_EVENTS)
1186 error[i]++;
1052 continue; 1187 continue;
1053 1188 }
1054 break;
1055 } 1189 }
1190 counts[bit]++;
1191 }
1056 1192
1057 if (!event || bit >= x86_pmu.max_pebs_events) 1193 for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
1194 if ((counts[bit] == 0) && (error[bit] == 0))
1058 continue; 1195 continue;
1196 event = cpuc->events[bit];
1197 WARN_ON_ONCE(!event);
1198 WARN_ON_ONCE(!event->attr.precise_ip);
1059 1199
1060 __intel_pmu_pebs_event(event, iregs, at); 1200 /* log dropped samples number */
1201 if (error[bit])
1202 perf_log_lost_samples(event, error[bit]);
1203
1204 if (counts[bit]) {
1205 __intel_pmu_pebs_event(event, iregs, base,
1206 top, bit, counts[bit]);
1207 }
1061 } 1208 }
1062} 1209}
1063 1210
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 94e5b506caa6..452a7bd2dedb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -96,6 +96,7 @@ enum {
96 X86_BR_NO_TX = 1 << 14,/* not in transaction */ 96 X86_BR_NO_TX = 1 << 14,/* not in transaction */
97 X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ 97 X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
98 X86_BR_CALL_STACK = 1 << 16,/* call stack */ 98 X86_BR_CALL_STACK = 1 << 16,/* call stack */
99 X86_BR_IND_JMP = 1 << 17,/* indirect jump */
99}; 100};
100 101
101#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) 102#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -113,6 +114,7 @@ enum {
113 X86_BR_IRQ |\ 114 X86_BR_IRQ |\
114 X86_BR_ABORT |\ 115 X86_BR_ABORT |\
115 X86_BR_IND_CALL |\ 116 X86_BR_IND_CALL |\
117 X86_BR_IND_JMP |\
116 X86_BR_ZERO_CALL) 118 X86_BR_ZERO_CALL)
117 119
118#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) 120#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -262,9 +264,6 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
262 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 264 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
263 struct x86_perf_task_context *task_ctx; 265 struct x86_perf_task_context *task_ctx;
264 266
265 if (!x86_pmu.lbr_nr)
266 return;
267
268 /* 267 /*
269 * If LBR callstack feature is enabled and the stack was saved when 268 * If LBR callstack feature is enabled and the stack was saved when
270 * the task was scheduled out, restore the stack. Otherwise flush 269 * the task was scheduled out, restore the stack. Otherwise flush
@@ -523,6 +522,9 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
523 X86_BR_CALL_STACK; 522 X86_BR_CALL_STACK;
524 } 523 }
525 524
525 if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
526 mask |= X86_BR_IND_JMP;
527
526 /* 528 /*
527 * stash actual user request into reg, it may 529 * stash actual user request into reg, it may
528 * be used by fixup code for some CPU 530 * be used by fixup code for some CPU
@@ -736,7 +738,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
736 break; 738 break;
737 case 4: 739 case 4:
738 case 5: 740 case 5:
739 ret = X86_BR_JMP; 741 ret = X86_BR_IND_JMP;
740 break; 742 break;
741 } 743 }
742 break; 744 break;
@@ -844,6 +846,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
844 */ 846 */
845 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, 847 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
846 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 848 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
849 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
847}; 850};
848 851
849static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 852static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -856,6 +859,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
856 | LBR_FAR, 859 | LBR_FAR,
857 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 860 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
858 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 861 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
862 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
859}; 863};
860 864
861static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 865static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -870,6 +874,7 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
870 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 874 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
871 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 875 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
872 | LBR_RETURN | LBR_CALL_STACK, 876 | LBR_RETURN | LBR_CALL_STACK,
877 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
873}; 878};
874 879
875/* core */ 880/* core */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index ffe666c2c6b5..159887c3a89d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -151,7 +151,7 @@ static int __init pt_pmu_hw_init(void)
151 151
152 de_attr->attr.attr.name = pt_caps[i].name; 152 de_attr->attr.attr.name = pt_caps[i].name;
153 153
154 sysfs_attr_init(&de_attrs->attr.attr); 154 sysfs_attr_init(&de_attr->attr.attr);
155 155
156 de_attr->attr.attr.mode = S_IRUGO; 156 de_attr->attr.attr.mode = S_IRUGO;
157 de_attr->attr.show = pt_cap_show; 157 de_attr->attr.show = pt_cap_show;
@@ -187,15 +187,6 @@ static bool pt_event_valid(struct perf_event *event)
187 * These all are cpu affine and operate on a local PT 187 * These all are cpu affine and operate on a local PT
188 */ 188 */
189 189
190static bool pt_is_running(void)
191{
192 u64 ctl;
193
194 rdmsrl(MSR_IA32_RTIT_CTL, ctl);
195
196 return !!(ctl & RTIT_CTL_TRACEEN);
197}
198
199static void pt_config(struct perf_event *event) 190static void pt_config(struct perf_event *event)
200{ 191{
201 u64 reg; 192 u64 reg;
@@ -609,16 +600,19 @@ static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
609 * @handle: Current output handle. 600 * @handle: Current output handle.
610 * 601 *
611 * Place INT and STOP marks to prevent overwriting old data that the consumer 602 * Place INT and STOP marks to prevent overwriting old data that the consumer
612 * hasn't yet collected. 603 * hasn't yet collected and waking up the consumer after a certain fraction of
604 * the buffer has filled up. Only needed and sensible for non-snapshot counters.
605 *
606 * This obviously relies on buf::head to figure out buffer markers, so it has
607 * to be called after pt_buffer_reset_offsets() and before the hardware tracing
608 * is enabled.
613 */ 609 */
614static int pt_buffer_reset_markers(struct pt_buffer *buf, 610static int pt_buffer_reset_markers(struct pt_buffer *buf,
615 struct perf_output_handle *handle) 611 struct perf_output_handle *handle)
616 612
617{ 613{
618 unsigned long idx, npages, end; 614 unsigned long head = local64_read(&buf->head);
619 615 unsigned long idx, npages, wakeup;
620 if (buf->snapshot)
621 return 0;
622 616
623 /* can't stop in the middle of an output region */ 617 /* can't stop in the middle of an output region */
624 if (buf->output_off + handle->size + 1 < 618 if (buf->output_off + handle->size + 1 <
@@ -634,17 +628,26 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
634 buf->topa_index[buf->stop_pos]->stop = 0; 628 buf->topa_index[buf->stop_pos]->stop = 0;
635 buf->topa_index[buf->intr_pos]->intr = 0; 629 buf->topa_index[buf->intr_pos]->intr = 0;
636 630
637 if (pt_cap_get(PT_CAP_topa_multiple_entries)) { 631 /* how many pages till the STOP marker */
638 npages = (handle->size + 1) >> PAGE_SHIFT; 632 npages = handle->size >> PAGE_SHIFT;
639 end = (local64_read(&buf->head) >> PAGE_SHIFT) + npages; 633
640 /*if (end > handle->wakeup >> PAGE_SHIFT) 634 /* if it's on a page boundary, fill up one more page */
641 end = handle->wakeup >> PAGE_SHIFT;*/ 635 if (!offset_in_page(head + handle->size + 1))
642 idx = end & (buf->nr_pages - 1); 636 npages++;
643 buf->stop_pos = idx; 637
644 idx = (local64_read(&buf->head) >> PAGE_SHIFT) + npages - 1; 638 idx = (head >> PAGE_SHIFT) + npages;
645 idx &= buf->nr_pages - 1; 639 idx &= buf->nr_pages - 1;
646 buf->intr_pos = idx; 640 buf->stop_pos = idx;
647 } 641
642 wakeup = handle->wakeup >> PAGE_SHIFT;
643
644 /* in the worst case, wake up the consumer one page before hard stop */
645 idx = (head >> PAGE_SHIFT) + npages - 1;
646 if (idx > wakeup)
647 idx = wakeup;
648
649 idx &= buf->nr_pages - 1;
650 buf->intr_pos = idx;
648 651
649 buf->topa_index[buf->stop_pos]->stop = 1; 652 buf->topa_index[buf->stop_pos]->stop = 1;
650 buf->topa_index[buf->intr_pos]->intr = 1; 653 buf->topa_index[buf->intr_pos]->intr = 1;
@@ -664,7 +667,7 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
664 struct topa *cur = buf->first, *prev = buf->last; 667 struct topa *cur = buf->first, *prev = buf->last;
665 struct topa_entry *te_cur = TOPA_ENTRY(cur, 0), 668 struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
666 *te_prev = TOPA_ENTRY(prev, prev->last - 1); 669 *te_prev = TOPA_ENTRY(prev, prev->last - 1);
667 int pg = 0, idx = 0, ntopa = 0; 670 int pg = 0, idx = 0;
668 671
669 while (pg < buf->nr_pages) { 672 while (pg < buf->nr_pages) {
670 int tidx; 673 int tidx;
@@ -679,9 +682,9 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
679 /* advance to next topa table */ 682 /* advance to next topa table */
680 idx = 0; 683 idx = 0;
681 cur = list_entry(cur->list.next, struct topa, list); 684 cur = list_entry(cur->list.next, struct topa, list);
682 ntopa++; 685 } else {
683 } else
684 idx++; 686 idx++;
687 }
685 te_cur = TOPA_ENTRY(cur, idx); 688 te_cur = TOPA_ENTRY(cur, idx);
686 } 689 }
687 690
@@ -693,7 +696,14 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
693 * @head: Write pointer (aux_head) from AUX buffer. 696 * @head: Write pointer (aux_head) from AUX buffer.
694 * 697 *
695 * Find the ToPA table and entry corresponding to given @head and set buffer's 698 * Find the ToPA table and entry corresponding to given @head and set buffer's
696 * "current" pointers accordingly. 699 * "current" pointers accordingly. This is done after we have obtained the
700 * current aux_head position from a successful call to perf_aux_output_begin()
701 * to make sure the hardware is writing to the right place.
702 *
703 * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
704 * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
705 * which are used to determine INT and STOP markers' locations by a subsequent
706 * call to pt_buffer_reset_markers().
697 */ 707 */
698static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head) 708static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
699{ 709{
@@ -891,6 +901,7 @@ void intel_pt_interrupt(void)
891 } 901 }
892 902
893 pt_buffer_reset_offsets(buf, pt->handle.head); 903 pt_buffer_reset_offsets(buf, pt->handle.head);
904 /* snapshot counters don't use PMI, so it's safe */
894 ret = pt_buffer_reset_markers(buf, &pt->handle); 905 ret = pt_buffer_reset_markers(buf, &pt->handle);
895 if (ret) { 906 if (ret) {
896 perf_aux_output_end(&pt->handle, 0, true); 907 perf_aux_output_end(&pt->handle, 0, true);
@@ -913,7 +924,7 @@ static void pt_event_start(struct perf_event *event, int mode)
913 struct pt *pt = this_cpu_ptr(&pt_ctx); 924 struct pt *pt = this_cpu_ptr(&pt_ctx);
914 struct pt_buffer *buf = perf_get_aux(&pt->handle); 925 struct pt_buffer *buf = perf_get_aux(&pt->handle);
915 926
916 if (pt_is_running() || !buf || pt_buffer_is_full(buf, pt)) { 927 if (!buf || pt_buffer_is_full(buf, pt)) {
917 event->hw.state = PERF_HES_STOPPED; 928 event->hw.state = PERF_HES_STOPPED;
918 return; 929 return;
919 } 930 }
@@ -944,7 +955,6 @@ static void pt_event_stop(struct perf_event *event, int mode)
944 event->hw.state = PERF_HES_STOPPED; 955 event->hw.state = PERF_HES_STOPPED;
945 956
946 if (mode & PERF_EF_UPDATE) { 957 if (mode & PERF_EF_UPDATE) {
947 struct pt *pt = this_cpu_ptr(&pt_ctx);
948 struct pt_buffer *buf = perf_get_aux(&pt->handle); 958 struct pt_buffer *buf = perf_get_aux(&pt->handle);
949 959
950 if (!buf) 960 if (!buf)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index c635b8b49e93..7c1de1610178 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -365,9 +365,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
365 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 365 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
366 366
367 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 367 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
368 hwc = &box->event_list[i]->hw;
369 c = uncore_get_event_constraint(box, box->event_list[i]); 368 c = uncore_get_event_constraint(box, box->event_list[i]);
370 hwc->constraint = c; 369 box->event_constraint[i] = c;
371 wmin = min(wmin, c->weight); 370 wmin = min(wmin, c->weight);
372 wmax = max(wmax, c->weight); 371 wmax = max(wmax, c->weight);
373 } 372 }
@@ -375,7 +374,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
375 /* fastpath, try to reuse previous register */ 374 /* fastpath, try to reuse previous register */
376 for (i = 0; i < n; i++) { 375 for (i = 0; i < n; i++) {
377 hwc = &box->event_list[i]->hw; 376 hwc = &box->event_list[i]->hw;
378 c = hwc->constraint; 377 c = box->event_constraint[i];
379 378
380 /* never assigned */ 379 /* never assigned */
381 if (hwc->idx == -1) 380 if (hwc->idx == -1)
@@ -395,8 +394,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
395 } 394 }
396 /* slow path */ 395 /* slow path */
397 if (i != n) 396 if (i != n)
398 ret = perf_assign_events(box->event_list, n, 397 ret = perf_assign_events(box->event_constraint, n,
399 wmin, wmax, assign); 398 wmin, wmax, n, assign);
400 399
401 if (!assign || ret) { 400 if (!assign || ret) {
402 for (i = 0; i < n; i++) 401 for (i = 0; i < n; i++)
@@ -840,6 +839,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
840 box->phys_id = phys_id; 839 box->phys_id = phys_id;
841 box->pci_dev = pdev; 840 box->pci_dev = pdev;
842 box->pmu = pmu; 841 box->pmu = pmu;
842 uncore_box_init(box);
843 pci_set_drvdata(pdev, box); 843 pci_set_drvdata(pdev, box);
844 844
845 raw_spin_lock(&uncore_box_lock); 845 raw_spin_lock(&uncore_box_lock);
@@ -922,6 +922,9 @@ static int __init uncore_pci_init(void)
922 case 69: /* Haswell Celeron */ 922 case 69: /* Haswell Celeron */
923 ret = hsw_uncore_pci_init(); 923 ret = hsw_uncore_pci_init();
924 break; 924 break;
925 case 61: /* Broadwell */
926 ret = bdw_uncore_pci_init();
927 break;
925 default: 928 default:
926 return 0; 929 return 0;
927 } 930 }
@@ -1003,8 +1006,10 @@ static int uncore_cpu_starting(int cpu)
1003 pmu = &type->pmus[j]; 1006 pmu = &type->pmus[j];
1004 box = *per_cpu_ptr(pmu->box, cpu); 1007 box = *per_cpu_ptr(pmu->box, cpu);
1005 /* called by uncore_cpu_init? */ 1008 /* called by uncore_cpu_init? */
1006 if (box && box->phys_id >= 0) 1009 if (box && box->phys_id >= 0) {
1010 uncore_box_init(box);
1007 continue; 1011 continue;
1012 }
1008 1013
1009 for_each_online_cpu(k) { 1014 for_each_online_cpu(k) {
1010 exist = *per_cpu_ptr(pmu->box, k); 1015 exist = *per_cpu_ptr(pmu->box, k);
@@ -1020,8 +1025,10 @@ static int uncore_cpu_starting(int cpu)
1020 } 1025 }
1021 } 1026 }
1022 1027
1023 if (box) 1028 if (box) {
1024 box->phys_id = phys_id; 1029 box->phys_id = phys_id;
1030 uncore_box_init(box);
1031 }
1025 } 1032 }
1026 } 1033 }
1027 return 0; 1034 return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 6c8c1e7e69d8..0f77f0a196e4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -97,6 +97,7 @@ struct intel_uncore_box {
97 atomic_t refcnt; 97 atomic_t refcnt;
98 struct perf_event *events[UNCORE_PMC_IDX_MAX]; 98 struct perf_event *events[UNCORE_PMC_IDX_MAX];
99 struct perf_event *event_list[UNCORE_PMC_IDX_MAX]; 99 struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
100 struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
100 unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 101 unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
101 u64 tags[UNCORE_PMC_IDX_MAX]; 102 u64 tags[UNCORE_PMC_IDX_MAX];
102 struct pci_dev *pci_dev; 103 struct pci_dev *pci_dev;
@@ -257,14 +258,6 @@ static inline int uncore_num_counters(struct intel_uncore_box *box)
257 return box->pmu->type->num_counters; 258 return box->pmu->type->num_counters;
258} 259}
259 260
260static inline void uncore_box_init(struct intel_uncore_box *box)
261{
262 if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
263 if (box->pmu->type->ops->init_box)
264 box->pmu->type->ops->init_box(box);
265 }
266}
267
268static inline void uncore_disable_box(struct intel_uncore_box *box) 261static inline void uncore_disable_box(struct intel_uncore_box *box)
269{ 262{
270 if (box->pmu->type->ops->disable_box) 263 if (box->pmu->type->ops->disable_box)
@@ -273,8 +266,6 @@ static inline void uncore_disable_box(struct intel_uncore_box *box)
273 266
274static inline void uncore_enable_box(struct intel_uncore_box *box) 267static inline void uncore_enable_box(struct intel_uncore_box *box)
275{ 268{
276 uncore_box_init(box);
277
278 if (box->pmu->type->ops->enable_box) 269 if (box->pmu->type->ops->enable_box)
279 box->pmu->type->ops->enable_box(box); 270 box->pmu->type->ops->enable_box(box);
280} 271}
@@ -297,6 +288,14 @@ static inline u64 uncore_read_counter(struct intel_uncore_box *box,
297 return box->pmu->type->ops->read_counter(box, event); 288 return box->pmu->type->ops->read_counter(box, event);
298} 289}
299 290
291static inline void uncore_box_init(struct intel_uncore_box *box)
292{
293 if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
294 if (box->pmu->type->ops->init_box)
295 box->pmu->type->ops->init_box(box);
296 }
297}
298
300static inline bool uncore_box_is_fake(struct intel_uncore_box *box) 299static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
301{ 300{
302 return (box->phys_id < 0); 301 return (box->phys_id < 0);
@@ -326,6 +325,7 @@ extern struct event_constraint uncore_constraint_empty;
326int snb_uncore_pci_init(void); 325int snb_uncore_pci_init(void);
327int ivb_uncore_pci_init(void); 326int ivb_uncore_pci_init(void);
328int hsw_uncore_pci_init(void); 327int hsw_uncore_pci_init(void);
328int bdw_uncore_pci_init(void);
329void snb_uncore_cpu_init(void); 329void snb_uncore_cpu_init(void);
330void nhm_uncore_cpu_init(void); 330void nhm_uncore_cpu_init(void);
331 331
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 4562e9e22c60..b005a78c7012 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -7,6 +7,7 @@
7#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 7#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
8#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 8#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00
9#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 9#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
10#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
10 11
11/* SNB event control */ 12/* SNB event control */
12#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff 13#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -486,6 +487,14 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
486 { /* end: all zeroes */ }, 487 { /* end: all zeroes */ },
487}; 488};
488 489
490static const struct pci_device_id bdw_uncore_pci_ids[] = {
491 { /* IMC */
492 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
493 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
494 },
495 { /* end: all zeroes */ },
496};
497
489static struct pci_driver snb_uncore_pci_driver = { 498static struct pci_driver snb_uncore_pci_driver = {
490 .name = "snb_uncore", 499 .name = "snb_uncore",
491 .id_table = snb_uncore_pci_ids, 500 .id_table = snb_uncore_pci_ids,
@@ -501,6 +510,11 @@ static struct pci_driver hsw_uncore_pci_driver = {
501 .id_table = hsw_uncore_pci_ids, 510 .id_table = hsw_uncore_pci_ids,
502}; 511};
503 512
513static struct pci_driver bdw_uncore_pci_driver = {
514 .name = "bdw_uncore",
515 .id_table = bdw_uncore_pci_ids,
516};
517
504struct imc_uncore_pci_dev { 518struct imc_uncore_pci_dev {
505 __u32 pci_id; 519 __u32 pci_id;
506 struct pci_driver *driver; 520 struct pci_driver *driver;
@@ -514,6 +528,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
514 IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ 528 IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
515 IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ 529 IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */
516 IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ 530 IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
531 IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */
517 { /* end marker */ } 532 { /* end marker */ }
518}; 533};
519 534
@@ -561,6 +576,11 @@ int hsw_uncore_pci_init(void)
561 return imc_uncore_pci_init(); 576 return imc_uncore_pci_init();
562} 577}
563 578
579int bdw_uncore_pci_init(void)
580{
581 return imc_uncore_pci_init();
582}
583
564/* end of Sandy Bridge uncore support */ 584/* end of Sandy Bridge uncore support */
565 585
566/* Nehalem uncore support */ 586/* Nehalem uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index 12d9548457e7..6d6e85dd5849 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -164,8 +164,8 @@
164 ((1ULL << (n)) - 1))) 164 ((1ULL << (n)) - 1)))
165 165
166/* Haswell-EP Ubox */ 166/* Haswell-EP Ubox */
167#define HSWEP_U_MSR_PMON_CTR0 0x705 167#define HSWEP_U_MSR_PMON_CTR0 0x709
168#define HSWEP_U_MSR_PMON_CTL0 0x709 168#define HSWEP_U_MSR_PMON_CTL0 0x705
169#define HSWEP_U_MSR_PMON_FILTER 0x707 169#define HSWEP_U_MSR_PMON_FILTER 0x707
170 170
171#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL 0x703 171#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL 0x703
@@ -1914,7 +1914,7 @@ static struct intel_uncore_type hswep_uncore_cbox = {
1914 .name = "cbox", 1914 .name = "cbox",
1915 .num_counters = 4, 1915 .num_counters = 4,
1916 .num_boxes = 18, 1916 .num_boxes = 18,
1917 .perf_ctr_bits = 44, 1917 .perf_ctr_bits = 48,
1918 .event_ctl = HSWEP_C0_MSR_PMON_CTL0, 1918 .event_ctl = HSWEP_C0_MSR_PMON_CTL0,
1919 .perf_ctr = HSWEP_C0_MSR_PMON_CTR0, 1919 .perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
1920 .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, 1920 .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2b55ee6db053..5a4668136e98 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -167,7 +167,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
167 clear_bss(); 167 clear_bss();
168 168
169 for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) 169 for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
170 set_intr_gate(i, early_idt_handlers[i]); 170 set_intr_gate(i, early_idt_handler_array[i]);
171 load_idt((const struct desc_ptr *)&idt_descr); 171 load_idt((const struct desc_ptr *)&idt_descr);
172 172
173 copy_bootdata(__va(real_mode_data)); 173 copy_bootdata(__va(real_mode_data));
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index d031bad9e07e..53eeb226657c 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -478,21 +478,22 @@ is486:
478__INIT 478__INIT
479setup_once: 479setup_once:
480 /* 480 /*
481 * Set up a idt with 256 entries pointing to ignore_int, 481 * Set up a idt with 256 interrupt gates that push zero if there
482 * interrupt gates. It doesn't actually load idt - that needs 482 * is no error code and then jump to early_idt_handler_common.
483 * to be done on each CPU. Interrupts are enabled elsewhere, 483 * It doesn't actually load the idt - that needs to be done on
484 * when we can be relatively sure everything is ok. 484 * each CPU. Interrupts are enabled elsewhere, when we can be
485 * relatively sure everything is ok.
485 */ 486 */
486 487
487 movl $idt_table,%edi 488 movl $idt_table,%edi
488 movl $early_idt_handlers,%eax 489 movl $early_idt_handler_array,%eax
489 movl $NUM_EXCEPTION_VECTORS,%ecx 490 movl $NUM_EXCEPTION_VECTORS,%ecx
4901: 4911:
491 movl %eax,(%edi) 492 movl %eax,(%edi)
492 movl %eax,4(%edi) 493 movl %eax,4(%edi)
493 /* interrupt gate, dpl=0, present */ 494 /* interrupt gate, dpl=0, present */
494 movl $(0x8E000000 + __KERNEL_CS),2(%edi) 495 movl $(0x8E000000 + __KERNEL_CS),2(%edi)
495 addl $9,%eax 496 addl $EARLY_IDT_HANDLER_SIZE,%eax
496 addl $8,%edi 497 addl $8,%edi
497 loop 1b 498 loop 1b
498 499
@@ -524,26 +525,28 @@ setup_once:
524 andl $0,setup_once_ref /* Once is enough, thanks */ 525 andl $0,setup_once_ref /* Once is enough, thanks */
525 ret 526 ret
526 527
527ENTRY(early_idt_handlers) 528ENTRY(early_idt_handler_array)
528 # 36(%esp) %eflags 529 # 36(%esp) %eflags
529 # 32(%esp) %cs 530 # 32(%esp) %cs
530 # 28(%esp) %eip 531 # 28(%esp) %eip
531 # 24(%rsp) error code 532 # 24(%rsp) error code
532 i = 0 533 i = 0
533 .rept NUM_EXCEPTION_VECTORS 534 .rept NUM_EXCEPTION_VECTORS
534 .if (EXCEPTION_ERRCODE_MASK >> i) & 1 535 .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
535 ASM_NOP2
536 .else
537 pushl $0 # Dummy error code, to make stack frame uniform 536 pushl $0 # Dummy error code, to make stack frame uniform
538 .endif 537 .endif
539 pushl $i # 20(%esp) Vector number 538 pushl $i # 20(%esp) Vector number
540 jmp early_idt_handler 539 jmp early_idt_handler_common
541 i = i + 1 540 i = i + 1
541 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
542 .endr 542 .endr
543ENDPROC(early_idt_handlers) 543ENDPROC(early_idt_handler_array)
544 544
545 /* This is global to keep gas from relaxing the jumps */ 545early_idt_handler_common:
546ENTRY(early_idt_handler) 546 /*
547 * The stack is the hardware frame, an error code or zero, and the
548 * vector number.
549 */
547 cld 550 cld
548 551
549 cmpl $2,(%esp) # X86_TRAP_NMI 552 cmpl $2,(%esp) # X86_TRAP_NMI
@@ -603,7 +606,7 @@ ex_entry:
603is_nmi: 606is_nmi:
604 addl $8,%esp /* drop vector number and error code */ 607 addl $8,%esp /* drop vector number and error code */
605 iret 608 iret
606ENDPROC(early_idt_handler) 609ENDPROC(early_idt_handler_common)
607 610
608/* This is the default interrupt "handler" :-) */ 611/* This is the default interrupt "handler" :-) */
609 ALIGN 612 ALIGN
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ae6588b301c2..df7e78057ae0 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -321,26 +321,28 @@ bad_address:
321 jmp bad_address 321 jmp bad_address
322 322
323 __INIT 323 __INIT
324 .globl early_idt_handlers 324ENTRY(early_idt_handler_array)
325early_idt_handlers:
326 # 104(%rsp) %rflags 325 # 104(%rsp) %rflags
327 # 96(%rsp) %cs 326 # 96(%rsp) %cs
328 # 88(%rsp) %rip 327 # 88(%rsp) %rip
329 # 80(%rsp) error code 328 # 80(%rsp) error code
330 i = 0 329 i = 0
331 .rept NUM_EXCEPTION_VECTORS 330 .rept NUM_EXCEPTION_VECTORS
332 .if (EXCEPTION_ERRCODE_MASK >> i) & 1 331 .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
333 ASM_NOP2
334 .else
335 pushq $0 # Dummy error code, to make stack frame uniform 332 pushq $0 # Dummy error code, to make stack frame uniform
336 .endif 333 .endif
337 pushq $i # 72(%rsp) Vector number 334 pushq $i # 72(%rsp) Vector number
338 jmp early_idt_handler 335 jmp early_idt_handler_common
339 i = i + 1 336 i = i + 1
337 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
340 .endr 338 .endr
339ENDPROC(early_idt_handler_array)
341 340
342/* This is global to keep gas from relaxing the jumps */ 341early_idt_handler_common:
343ENTRY(early_idt_handler) 342 /*
343 * The stack is the hardware frame, an error code or zero, and the
344 * vector number.
345 */
344 cld 346 cld
345 347
346 cmpl $2,(%rsp) # X86_TRAP_NMI 348 cmpl $2,(%rsp) # X86_TRAP_NMI
@@ -412,7 +414,7 @@ ENTRY(early_idt_handler)
412is_nmi: 414is_nmi:
413 addq $16,%rsp # drop vector number and error code 415 addq $16,%rsp # drop vector number and error code
414 INTERRUPT_RETURN 416 INTERRUPT_RETURN
415ENDPROC(early_idt_handler) 417ENDPROC(early_idt_handler_common)
416 418
417 __INITDATA 419 __INITDATA
418 420
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9435620062df..1681504e44a4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -584,6 +584,39 @@ static void kvm_kick_cpu(int cpu)
584 kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid); 584 kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
585} 585}
586 586
587
588#ifdef CONFIG_QUEUED_SPINLOCKS
589
590#include <asm/qspinlock.h>
591
592static void kvm_wait(u8 *ptr, u8 val)
593{
594 unsigned long flags;
595
596 if (in_nmi())
597 return;
598
599 local_irq_save(flags);
600
601 if (READ_ONCE(*ptr) != val)
602 goto out;
603
604 /*
605 * halt until it's our turn and kicked. Note that we do safe halt
606 * for irq enabled case to avoid hang when lock info is overwritten
607 * in irq spinlock slowpath and no spurious interrupt occur to save us.
608 */
609 if (arch_irqs_disabled_flags(flags))
610 halt();
611 else
612 safe_halt();
613
614out:
615 local_irq_restore(flags);
616}
617
618#else /* !CONFIG_QUEUED_SPINLOCKS */
619
587enum kvm_contention_stat { 620enum kvm_contention_stat {
588 TAKEN_SLOW, 621 TAKEN_SLOW,
589 TAKEN_SLOW_PICKUP, 622 TAKEN_SLOW_PICKUP,
@@ -817,6 +850,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
817 } 850 }
818} 851}
819 852
853#endif /* !CONFIG_QUEUED_SPINLOCKS */
854
820/* 855/*
821 * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. 856 * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
822 */ 857 */
@@ -828,8 +863,16 @@ void __init kvm_spinlock_init(void)
828 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) 863 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
829 return; 864 return;
830 865
866#ifdef CONFIG_QUEUED_SPINLOCKS
867 __pv_init_lock_hash();
868 pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
869 pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
870 pv_lock_ops.wait = kvm_wait;
871 pv_lock_ops.kick = kvm_kick_cpu;
872#else /* !CONFIG_QUEUED_SPINLOCKS */
831 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning); 873 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
832 pv_lock_ops.unlock_kick = kvm_unlock_kick; 874 pv_lock_ops.unlock_kick = kvm_unlock_kick;
875#endif
833} 876}
834 877
835static __init int kvm_spinlock_init_jump(void) 878static __init int kvm_spinlock_init_jump(void)
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index bbb6c7316341..33ee3e0efd65 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,11 +8,33 @@
8 8
9#include <asm/paravirt.h> 9#include <asm/paravirt.h>
10 10
11#ifdef CONFIG_QUEUED_SPINLOCKS
12__visible void __native_queued_spin_unlock(struct qspinlock *lock)
13{
14 native_queued_spin_unlock(lock);
15}
16
17PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
18
19bool pv_is_native_spin_unlock(void)
20{
21 return pv_lock_ops.queued_spin_unlock.func ==
22 __raw_callee_save___native_queued_spin_unlock;
23}
24#endif
25
11struct pv_lock_ops pv_lock_ops = { 26struct pv_lock_ops pv_lock_ops = {
12#ifdef CONFIG_SMP 27#ifdef CONFIG_SMP
28#ifdef CONFIG_QUEUED_SPINLOCKS
29 .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
30 .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
31 .wait = paravirt_nop,
32 .kick = paravirt_nop,
33#else /* !CONFIG_QUEUED_SPINLOCKS */
13 .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop), 34 .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
14 .unlock_kick = paravirt_nop, 35 .unlock_kick = paravirt_nop,
15#endif 36#endif /* !CONFIG_QUEUED_SPINLOCKS */
37#endif /* SMP */
16}; 38};
17EXPORT_SYMBOL(pv_lock_ops); 39EXPORT_SYMBOL(pv_lock_ops);
18 40
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d9f32e6d6ab6..e1b013696dde 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
12DEF_NATIVE(pv_cpu_ops, clts, "clts"); 12DEF_NATIVE(pv_cpu_ops, clts, "clts");
13DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); 13DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
14 14
15#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
16DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
17#endif
18
15unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) 19unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
16{ 20{
17 /* arg in %eax, return in %eax */ 21 /* arg in %eax, return in %eax */
@@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
24 return 0; 28 return 0;
25} 29}
26 30
31extern bool pv_is_native_spin_unlock(void);
32
27unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 33unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
28 unsigned long addr, unsigned len) 34 unsigned long addr, unsigned len)
29{ 35{
@@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
47 PATCH_SITE(pv_mmu_ops, write_cr3); 53 PATCH_SITE(pv_mmu_ops, write_cr3);
48 PATCH_SITE(pv_cpu_ops, clts); 54 PATCH_SITE(pv_cpu_ops, clts);
49 PATCH_SITE(pv_cpu_ops, read_tsc); 55 PATCH_SITE(pv_cpu_ops, read_tsc);
50 56#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
51 patch_site: 57 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
52 ret = paravirt_patch_insns(ibuf, len, start, end); 58 if (pv_is_native_spin_unlock()) {
53 break; 59 start = start_pv_lock_ops_queued_spin_unlock;
60 end = end_pv_lock_ops_queued_spin_unlock;
61 goto patch_site;
62 }
63#endif
54 64
55 default: 65 default:
56 ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); 66 ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
57 break; 67 break;
68
69patch_site:
70 ret = paravirt_patch_insns(ibuf, len, start, end);
71 break;
58 } 72 }
59#undef PATCH_SITE 73#undef PATCH_SITE
60 return ret; 74 return ret;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index a1da6737ba5b..a1fa86782186 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
21DEF_NATIVE(, mov32, "mov %edi, %eax"); 21DEF_NATIVE(, mov32, "mov %edi, %eax");
22DEF_NATIVE(, mov64, "mov %rdi, %rax"); 22DEF_NATIVE(, mov64, "mov %rdi, %rax");
23 23
24#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
25DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
26#endif
27
24unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) 28unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
25{ 29{
26 return paravirt_patch_insns(insnbuf, len, 30 return paravirt_patch_insns(insnbuf, len,
@@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
33 start__mov64, end__mov64); 37 start__mov64, end__mov64);
34} 38}
35 39
40extern bool pv_is_native_spin_unlock(void);
41
36unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 42unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
37 unsigned long addr, unsigned len) 43 unsigned long addr, unsigned len)
38{ 44{
@@ -59,14 +65,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
59 PATCH_SITE(pv_cpu_ops, clts); 65 PATCH_SITE(pv_cpu_ops, clts);
60 PATCH_SITE(pv_mmu_ops, flush_tlb_single); 66 PATCH_SITE(pv_mmu_ops, flush_tlb_single);
61 PATCH_SITE(pv_cpu_ops, wbinvd); 67 PATCH_SITE(pv_cpu_ops, wbinvd);
62 68#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
63 patch_site: 69 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
64 ret = paravirt_patch_insns(ibuf, len, start, end); 70 if (pv_is_native_spin_unlock()) {
65 break; 71 start = start_pv_lock_ops_queued_spin_unlock;
72 end = end_pv_lock_ops_queued_spin_unlock;
73 goto patch_site;
74 }
75#endif
66 76
67 default: 77 default:
68 ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); 78 ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
69 break; 79 break;
80
81patch_site:
82 ret = paravirt_patch_insns(ibuf, len, start, end);
83 break;
70 } 84 }
71#undef PATCH_SITE 85#undef PATCH_SITE
72 return ret; 86 return ret;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 629af0f1c5c4..4c7deb4f78a1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1090,6 +1090,17 @@ static void update_divide_count(struct kvm_lapic *apic)
1090 apic->divide_count); 1090 apic->divide_count);
1091} 1091}
1092 1092
1093static void apic_update_lvtt(struct kvm_lapic *apic)
1094{
1095 u32 timer_mode = kvm_apic_get_reg(apic, APIC_LVTT) &
1096 apic->lapic_timer.timer_mode_mask;
1097
1098 if (apic->lapic_timer.timer_mode != timer_mode) {
1099 apic->lapic_timer.timer_mode = timer_mode;
1100 hrtimer_cancel(&apic->lapic_timer.timer);
1101 }
1102}
1103
1093static void apic_timer_expired(struct kvm_lapic *apic) 1104static void apic_timer_expired(struct kvm_lapic *apic)
1094{ 1105{
1095 struct kvm_vcpu *vcpu = apic->vcpu; 1106 struct kvm_vcpu *vcpu = apic->vcpu;
@@ -1298,6 +1309,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1298 apic_set_reg(apic, APIC_LVTT + 0x10 * i, 1309 apic_set_reg(apic, APIC_LVTT + 0x10 * i,
1299 lvt_val | APIC_LVT_MASKED); 1310 lvt_val | APIC_LVT_MASKED);
1300 } 1311 }
1312 apic_update_lvtt(apic);
1301 atomic_set(&apic->lapic_timer.pending, 0); 1313 atomic_set(&apic->lapic_timer.pending, 0);
1302 1314
1303 } 1315 }
@@ -1330,20 +1342,13 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1330 1342
1331 break; 1343 break;
1332 1344
1333 case APIC_LVTT: { 1345 case APIC_LVTT:
1334 u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
1335
1336 if (apic->lapic_timer.timer_mode != timer_mode) {
1337 apic->lapic_timer.timer_mode = timer_mode;
1338 hrtimer_cancel(&apic->lapic_timer.timer);
1339 }
1340
1341 if (!kvm_apic_sw_enabled(apic)) 1346 if (!kvm_apic_sw_enabled(apic))
1342 val |= APIC_LVT_MASKED; 1347 val |= APIC_LVT_MASKED;
1343 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 1348 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
1344 apic_set_reg(apic, APIC_LVTT, val); 1349 apic_set_reg(apic, APIC_LVTT, val);
1350 apic_update_lvtt(apic);
1345 break; 1351 break;
1346 }
1347 1352
1348 case APIC_TMICT: 1353 case APIC_TMICT:
1349 if (apic_lvtt_tscdeadline(apic)) 1354 if (apic_lvtt_tscdeadline(apic))
@@ -1576,7 +1581,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
1576 1581
1577 for (i = 0; i < APIC_LVT_NUM; i++) 1582 for (i = 0; i < APIC_LVT_NUM; i++)
1578 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 1583 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
1579 apic->lapic_timer.timer_mode = 0; 1584 apic_update_lvtt(apic);
1580 apic_set_reg(apic, APIC_LVT0, 1585 apic_set_reg(apic, APIC_LVT0,
1581 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 1586 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
1582 1587
@@ -1802,6 +1807,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
1802 1807
1803 apic_update_ppr(apic); 1808 apic_update_ppr(apic);
1804 hrtimer_cancel(&apic->lapic_timer.timer); 1809 hrtimer_cancel(&apic->lapic_timer.timer);
1810 apic_update_lvtt(apic);
1805 update_divide_count(apic); 1811 update_divide_count(apic);
1806 start_apic_timer(apic); 1812 start_apic_timer(apic);
1807 apic->irr_pending = true; 1813 apic->irr_pending = true;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 44a7d2515497..b73337634214 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4215,13 +4215,13 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
4215 u64 entry, gentry, *spte; 4215 u64 entry, gentry, *spte;
4216 int npte; 4216 int npte;
4217 bool remote_flush, local_flush, zap_page; 4217 bool remote_flush, local_flush, zap_page;
4218 union kvm_mmu_page_role mask = (union kvm_mmu_page_role) { 4218 union kvm_mmu_page_role mask = { };
4219 .cr0_wp = 1, 4219
4220 .cr4_pae = 1, 4220 mask.cr0_wp = 1;
4221 .nxe = 1, 4221 mask.cr4_pae = 1;
4222 .smep_andnot_wp = 1, 4222 mask.nxe = 1;
4223 .smap_andnot_wp = 1, 4223 mask.smep_andnot_wp = 1;
4224 }; 4224 mask.smap_andnot_wp = 1;
4225 4225
4226 /* 4226 /*
4227 * If we don't have indirect shadow pages, it means no page is 4227 * If we don't have indirect shadow pages, it means no page is
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 7e8a1a650435..b9531d343134 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -39,7 +39,8 @@
39#define smp_mb() barrier() 39#define smp_mb() barrier()
40#define smp_rmb() barrier() 40#define smp_rmb() barrier()
41#define smp_wmb() barrier() 41#define smp_wmb() barrier()
42#define set_mb(var, value) do { var = value; barrier(); } while (0) 42
43#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
43 44
44#define read_barrier_depends() do { } while (0) 45#define read_barrier_depends() do { } while (0)
45#define smp_read_barrier_depends() do { } while (0) 46#define smp_read_barrier_depends() do { } while (0)
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 956374c1edbc..9e2ba5c6e1dd 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,6 +17,56 @@
17#include "xen-ops.h" 17#include "xen-ops.h"
18#include "debugfs.h" 18#include "debugfs.h"
19 19
20static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
21static DEFINE_PER_CPU(char *, irq_name);
22static bool xen_pvspin = true;
23
24#ifdef CONFIG_QUEUED_SPINLOCKS
25
26#include <asm/qspinlock.h>
27
28static void xen_qlock_kick(int cpu)
29{
30 xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
31}
32
33/*
34 * Halt the current CPU & release it back to the host
35 */
36static void xen_qlock_wait(u8 *byte, u8 val)
37{
38 int irq = __this_cpu_read(lock_kicker_irq);
39
40 /* If kicker interrupts not initialized yet, just spin */
41 if (irq == -1)
42 return;
43
44 /* clear pending */
45 xen_clear_irq_pending(irq);
46 barrier();
47
48 /*
49 * We check the byte value after clearing pending IRQ to make sure
50 * that we won't miss a wakeup event because of the clearing.
51 *
52 * The sync_clear_bit() call in xen_clear_irq_pending() is atomic.
53 * So it is effectively a memory barrier for x86.
54 */
55 if (READ_ONCE(*byte) != val)
56 return;
57
58 /*
59 * If an interrupt happens here, it will leave the wakeup irq
60 * pending, which will cause xen_poll_irq() to return
61 * immediately.
62 */
63
64 /* Block until irq becomes pending (or perhaps a spurious wakeup) */
65 xen_poll_irq(irq);
66}
67
68#else /* CONFIG_QUEUED_SPINLOCKS */
69
20enum xen_contention_stat { 70enum xen_contention_stat {
21 TAKEN_SLOW, 71 TAKEN_SLOW,
22 TAKEN_SLOW_PICKUP, 72 TAKEN_SLOW_PICKUP,
@@ -100,12 +150,9 @@ struct xen_lock_waiting {
100 __ticket_t want; 150 __ticket_t want;
101}; 151};
102 152
103static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
104static DEFINE_PER_CPU(char *, irq_name);
105static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); 153static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
106static cpumask_t waiting_cpus; 154static cpumask_t waiting_cpus;
107 155
108static bool xen_pvspin = true;
109__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) 156__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
110{ 157{
111 int irq = __this_cpu_read(lock_kicker_irq); 158 int irq = __this_cpu_read(lock_kicker_irq);
@@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
217 } 264 }
218 } 265 }
219} 266}
267#endif /* CONFIG_QUEUED_SPINLOCKS */
220 268
221static irqreturn_t dummy_handler(int irq, void *dev_id) 269static irqreturn_t dummy_handler(int irq, void *dev_id)
222{ 270{
@@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void)
280 return; 328 return;
281 } 329 }
282 printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); 330 printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
331#ifdef CONFIG_QUEUED_SPINLOCKS
332 __pv_init_lock_hash();
333 pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
334 pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
335 pv_lock_ops.wait = xen_qlock_wait;
336 pv_lock_ops.kick = xen_qlock_kick;
337#else
283 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); 338 pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
284 pv_lock_ops.unlock_kick = xen_unlock_kick; 339 pv_lock_ops.unlock_kick = xen_unlock_kick;
340#endif
285} 341}
286 342
287/* 343/*
@@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg)
310} 366}
311early_param("xen_nopvspin", xen_parse_nopvspin); 367early_param("xen_nopvspin", xen_parse_nopvspin);
312 368
313#ifdef CONFIG_XEN_DEBUG_FS 369#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS)
314 370
315static struct dentry *d_spin_debug; 371static struct dentry *d_spin_debug;
316 372
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e68b71b85a7e..594eea04266e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1600,6 +1600,7 @@ static int blk_mq_hctx_notify(void *data, unsigned long action,
1600 return NOTIFY_OK; 1600 return NOTIFY_OK;
1601} 1601}
1602 1602
1603/* hctx->ctxs will be freed in queue's release handler */
1603static void blk_mq_exit_hctx(struct request_queue *q, 1604static void blk_mq_exit_hctx(struct request_queue *q,
1604 struct blk_mq_tag_set *set, 1605 struct blk_mq_tag_set *set,
1605 struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) 1606 struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
@@ -1618,7 +1619,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
1618 1619
1619 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); 1620 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
1620 blk_free_flush_queue(hctx->fq); 1621 blk_free_flush_queue(hctx->fq);
1621 kfree(hctx->ctxs);
1622 blk_mq_free_bitmap(&hctx->ctx_map); 1622 blk_mq_free_bitmap(&hctx->ctx_map);
1623} 1623}
1624 1624
@@ -1891,8 +1891,12 @@ void blk_mq_release(struct request_queue *q)
1891 unsigned int i; 1891 unsigned int i;
1892 1892
1893 /* hctx kobj stays in hctx */ 1893 /* hctx kobj stays in hctx */
1894 queue_for_each_hw_ctx(q, hctx, i) 1894 queue_for_each_hw_ctx(q, hctx, i) {
1895 if (!hctx)
1896 continue;
1897 kfree(hctx->ctxs);
1895 kfree(hctx); 1898 kfree(hctx);
1899 }
1896 1900
1897 kfree(q->queue_hw_ctx); 1901 kfree(q->queue_hw_ctx);
1898 1902
diff --git a/block/genhd.c b/block/genhd.c
index 0a536dc05f3b..ea982eadaf63 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -422,9 +422,9 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
422 /* allocate ext devt */ 422 /* allocate ext devt */
423 idr_preload(GFP_KERNEL); 423 idr_preload(GFP_KERNEL);
424 424
425 spin_lock(&ext_devt_lock); 425 spin_lock_bh(&ext_devt_lock);
426 idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); 426 idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
427 spin_unlock(&ext_devt_lock); 427 spin_unlock_bh(&ext_devt_lock);
428 428
429 idr_preload_end(); 429 idr_preload_end();
430 if (idx < 0) 430 if (idx < 0)
@@ -449,9 +449,9 @@ void blk_free_devt(dev_t devt)
449 return; 449 return;
450 450
451 if (MAJOR(devt) == BLOCK_EXT_MAJOR) { 451 if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
452 spin_lock(&ext_devt_lock); 452 spin_lock_bh(&ext_devt_lock);
453 idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 453 idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
454 spin_unlock(&ext_devt_lock); 454 spin_unlock_bh(&ext_devt_lock);
455 } 455 }
456} 456}
457 457
@@ -653,7 +653,6 @@ void del_gendisk(struct gendisk *disk)
653 disk->flags &= ~GENHD_FL_UP; 653 disk->flags &= ~GENHD_FL_UP;
654 654
655 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 655 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
656 bdi_unregister(&disk->queue->backing_dev_info);
657 blk_unregister_queue(disk); 656 blk_unregister_queue(disk);
658 blk_unregister_region(disk_devt(disk), disk->minors); 657 blk_unregister_region(disk_devt(disk), disk->minors);
659 658
@@ -691,13 +690,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
691 } else { 690 } else {
692 struct hd_struct *part; 691 struct hd_struct *part;
693 692
694 spin_lock(&ext_devt_lock); 693 spin_lock_bh(&ext_devt_lock);
695 part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 694 part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
696 if (part && get_disk(part_to_disk(part))) { 695 if (part && get_disk(part_to_disk(part))) {
697 *partno = part->partno; 696 *partno = part->partno;
698 disk = part_to_disk(part); 697 disk = part_to_disk(part);
699 } 698 }
700 spin_unlock(&ext_devt_lock); 699 spin_unlock_bh(&ext_devt_lock);
701 } 700 }
702 701
703 return disk; 702 return disk;
diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
index 23716dd8a7ec..5928d0746a27 100644
--- a/drivers/ata/ahci_mvebu.c
+++ b/drivers/ata/ahci_mvebu.c
@@ -45,7 +45,7 @@ static void ahci_mvebu_mbus_config(struct ahci_host_priv *hpriv,
45 writel((cs->mbus_attr << 8) | 45 writel((cs->mbus_attr << 8) |
46 (dram->mbus_dram_target_id << 4) | 1, 46 (dram->mbus_dram_target_id << 4) | 1,
47 hpriv->mmio + AHCI_WINDOW_CTRL(i)); 47 hpriv->mmio + AHCI_WINDOW_CTRL(i));
48 writel(cs->base, hpriv->mmio + AHCI_WINDOW_BASE(i)); 48 writel(cs->base >> 16, hpriv->mmio + AHCI_WINDOW_BASE(i));
49 writel(((cs->size - 1) & 0xffff0000), 49 writel(((cs->size - 1) & 0xffff0000),
50 hpriv->mmio + AHCI_WINDOW_SIZE(i)); 50 hpriv->mmio + AHCI_WINDOW_SIZE(i));
51 } 51 }
diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c
index 80a80548ad0a..27245957eee3 100644
--- a/drivers/ata/pata_octeon_cf.c
+++ b/drivers/ata/pata_octeon_cf.c
@@ -1053,7 +1053,7 @@ static struct of_device_id octeon_cf_match[] = {
1053 }, 1053 },
1054 {}, 1054 {},
1055}; 1055};
1056MODULE_DEVICE_TABLE(of, octeon_i2c_match); 1056MODULE_DEVICE_TABLE(of, octeon_cf_match);
1057 1057
1058static struct platform_driver octeon_cf_driver = { 1058static struct platform_driver octeon_cf_driver = {
1059 .probe = octeon_cf_probe, 1059 .probe = octeon_cf_probe,
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 9c2ba1c97c42..df0c66cb7ad3 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -179,7 +179,7 @@ static int detect_cache_attributes(unsigned int cpu)
179{ 179{
180 int ret; 180 int ret;
181 181
182 if (init_cache_level(cpu)) 182 if (init_cache_level(cpu) || !cache_leaves(cpu))
183 return -ENOENT; 183 return -ENOENT;
184 184
185 per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), 185 per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
diff --git a/drivers/base/init.c b/drivers/base/init.c
index da033d3bab3c..48c0e220acc0 100644
--- a/drivers/base/init.c
+++ b/drivers/base/init.c
@@ -8,6 +8,7 @@
8#include <linux/device.h> 8#include <linux/device.h>
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/memory.h> 10#include <linux/memory.h>
11#include <linux/of.h>
11 12
12#include "base.h" 13#include "base.h"
13 14
@@ -34,4 +35,5 @@ void __init driver_init(void)
34 cpu_dev_init(); 35 cpu_dev_init();
35 memory_dev_init(); 36 memory_dev_init();
36 container_dev_init(); 37 container_dev_init();
38 of_core_init();
37} 39}
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index eb1fed5bd516..3ccef9eba6f9 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -406,6 +406,7 @@ config BLK_DEV_RAM_DAX
406 406
407config BLK_DEV_PMEM 407config BLK_DEV_PMEM
408 tristate "Persistent memory block device support" 408 tristate "Persistent memory block device support"
409 depends on HAS_IOMEM
409 help 410 help
410 Saying Y here will allow you to use a contiguous range of reserved 411 Saying Y here will allow you to use a contiguous range of reserved
411 memory as one or more persistent block devices. 412 memory as one or more persistent block devices.
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 85b8036deaa3..683dff272562 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1750,6 +1750,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1750 struct nvme_iod *iod; 1750 struct nvme_iod *iod;
1751 dma_addr_t meta_dma = 0; 1751 dma_addr_t meta_dma = 0;
1752 void *meta = NULL; 1752 void *meta = NULL;
1753 void __user *metadata;
1753 1754
1754 if (copy_from_user(&io, uio, sizeof(io))) 1755 if (copy_from_user(&io, uio, sizeof(io)))
1755 return -EFAULT; 1756 return -EFAULT;
@@ -1763,6 +1764,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1763 meta_len = 0; 1764 meta_len = 0;
1764 } 1765 }
1765 1766
1767 metadata = (void __user *)(unsigned long)io.metadata;
1768
1766 write = io.opcode & 1; 1769 write = io.opcode & 1;
1767 1770
1768 switch (io.opcode) { 1771 switch (io.opcode) {
@@ -1786,13 +1789,13 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1786 if (meta_len) { 1789 if (meta_len) {
1787 meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len, 1790 meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
1788 &meta_dma, GFP_KERNEL); 1791 &meta_dma, GFP_KERNEL);
1792
1789 if (!meta) { 1793 if (!meta) {
1790 status = -ENOMEM; 1794 status = -ENOMEM;
1791 goto unmap; 1795 goto unmap;
1792 } 1796 }
1793 if (write) { 1797 if (write) {
1794 if (copy_from_user(meta, (void __user *)io.metadata, 1798 if (copy_from_user(meta, metadata, meta_len)) {
1795 meta_len)) {
1796 status = -EFAULT; 1799 status = -EFAULT;
1797 goto unmap; 1800 goto unmap;
1798 } 1801 }
@@ -1819,8 +1822,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1819 nvme_free_iod(dev, iod); 1822 nvme_free_iod(dev, iod);
1820 if (meta) { 1823 if (meta) {
1821 if (status == NVME_SC_SUCCESS && !write) { 1824 if (status == NVME_SC_SUCCESS && !write) {
1822 if (copy_to_user((void __user *)io.metadata, meta, 1825 if (copy_to_user(metadata, meta, meta_len))
1823 meta_len))
1824 status = -EFAULT; 1826 status = -EFAULT;
1825 } 1827 }
1826 dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma); 1828 dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 8dcbced0eafd..6e134f4759c0 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -805,7 +805,9 @@ static void zram_reset_device(struct zram *zram)
805 memset(&zram->stats, 0, sizeof(zram->stats)); 805 memset(&zram->stats, 0, sizeof(zram->stats));
806 zram->disksize = 0; 806 zram->disksize = 0;
807 zram->max_comp_streams = 1; 807 zram->max_comp_streams = 1;
808
808 set_capacity(zram->disk, 0); 809 set_capacity(zram->disk, 0);
810 part_stat_set_all(&zram->disk->part0, 0);
809 811
810 up_write(&zram->init_lock); 812 up_write(&zram->init_lock);
811 /* I/O operation under all of CPU are done so let's free */ 813 /* I/O operation under all of CPU are done so let's free */
diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c
index fb9ec6221730..6f047dcb94c2 100644
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -58,7 +58,6 @@
58#include <linux/debugfs.h> 58#include <linux/debugfs.h>
59#include <linux/log2.h> 59#include <linux/log2.h>
60#include <linux/syscore_ops.h> 60#include <linux/syscore_ops.h>
61#include <linux/memblock.h>
62 61
63/* 62/*
64 * DDR target is the same on all platforms. 63 * DDR target is the same on all platforms.
@@ -70,6 +69,7 @@
70 */ 69 */
71#define WIN_CTRL_OFF 0x0000 70#define WIN_CTRL_OFF 0x0000
72#define WIN_CTRL_ENABLE BIT(0) 71#define WIN_CTRL_ENABLE BIT(0)
72/* Only on HW I/O coherency capable platforms */
73#define WIN_CTRL_SYNCBARRIER BIT(1) 73#define WIN_CTRL_SYNCBARRIER BIT(1)
74#define WIN_CTRL_TGT_MASK 0xf0 74#define WIN_CTRL_TGT_MASK 0xf0
75#define WIN_CTRL_TGT_SHIFT 4 75#define WIN_CTRL_TGT_SHIFT 4
@@ -102,9 +102,7 @@
102 102
103/* Relative to mbusbridge_base */ 103/* Relative to mbusbridge_base */
104#define MBUS_BRIDGE_CTRL_OFF 0x0 104#define MBUS_BRIDGE_CTRL_OFF 0x0
105#define MBUS_BRIDGE_SIZE_MASK 0xffff0000
106#define MBUS_BRIDGE_BASE_OFF 0x4 105#define MBUS_BRIDGE_BASE_OFF 0x4
107#define MBUS_BRIDGE_BASE_MASK 0xffff0000
108 106
109/* Maximum number of windows, for all known platforms */ 107/* Maximum number of windows, for all known platforms */
110#define MBUS_WINS_MAX 20 108#define MBUS_WINS_MAX 20
@@ -323,8 +321,9 @@ static int mvebu_mbus_setup_window(struct mvebu_mbus_state *mbus,
323 ctrl = ((size - 1) & WIN_CTRL_SIZE_MASK) | 321 ctrl = ((size - 1) & WIN_CTRL_SIZE_MASK) |
324 (attr << WIN_CTRL_ATTR_SHIFT) | 322 (attr << WIN_CTRL_ATTR_SHIFT) |
325 (target << WIN_CTRL_TGT_SHIFT) | 323 (target << WIN_CTRL_TGT_SHIFT) |
326 WIN_CTRL_SYNCBARRIER |
327 WIN_CTRL_ENABLE; 324 WIN_CTRL_ENABLE;
325 if (mbus->hw_io_coherency)
326 ctrl |= WIN_CTRL_SYNCBARRIER;
328 327
329 writel(base & WIN_BASE_LOW, addr + WIN_BASE_OFF); 328 writel(base & WIN_BASE_LOW, addr + WIN_BASE_OFF);
330 writel(ctrl, addr + WIN_CTRL_OFF); 329 writel(ctrl, addr + WIN_CTRL_OFF);
@@ -577,106 +576,36 @@ static unsigned int armada_xp_mbus_win_remap_offset(int win)
577 return MVEBU_MBUS_NO_REMAP; 576 return MVEBU_MBUS_NO_REMAP;
578} 577}
579 578
580/*
581 * Use the memblock information to find the MBus bridge hole in the
582 * physical address space.
583 */
584static void __init
585mvebu_mbus_find_bridge_hole(uint64_t *start, uint64_t *end)
586{
587 struct memblock_region *r;
588 uint64_t s = 0;
589
590 for_each_memblock(memory, r) {
591 /*
592 * This part of the memory is above 4 GB, so we don't
593 * care for the MBus bridge hole.
594 */
595 if (r->base >= 0x100000000)
596 continue;
597
598 /*
599 * The MBus bridge hole is at the end of the RAM under
600 * the 4 GB limit.
601 */
602 if (r->base + r->size > s)
603 s = r->base + r->size;
604 }
605
606 *start = s;
607 *end = 0x100000000;
608}
609
610static void __init 579static void __init
611mvebu_mbus_default_setup_cpu_target(struct mvebu_mbus_state *mbus) 580mvebu_mbus_default_setup_cpu_target(struct mvebu_mbus_state *mbus)
612{ 581{
613 int i; 582 int i;
614 int cs; 583 int cs;
615 uint64_t mbus_bridge_base, mbus_bridge_end;
616 584
617 mvebu_mbus_dram_info.mbus_dram_target_id = TARGET_DDR; 585 mvebu_mbus_dram_info.mbus_dram_target_id = TARGET_DDR;
618 586
619 mvebu_mbus_find_bridge_hole(&mbus_bridge_base, &mbus_bridge_end);
620
621 for (i = 0, cs = 0; i < 4; i++) { 587 for (i = 0, cs = 0; i < 4; i++) {
622 u64 base = readl(mbus->sdramwins_base + DDR_BASE_CS_OFF(i)); 588 u32 base = readl(mbus->sdramwins_base + DDR_BASE_CS_OFF(i));
623 u64 size = readl(mbus->sdramwins_base + DDR_SIZE_CS_OFF(i)); 589 u32 size = readl(mbus->sdramwins_base + DDR_SIZE_CS_OFF(i));
624 u64 end;
625 struct mbus_dram_window *w;
626
627 /* Ignore entries that are not enabled */
628 if (!(size & DDR_SIZE_ENABLED))
629 continue;
630
631 /*
632 * Ignore entries whose base address is above 2^32,
633 * since devices cannot DMA to such high addresses
634 */
635 if (base & DDR_BASE_CS_HIGH_MASK)
636 continue;
637
638 base = base & DDR_BASE_CS_LOW_MASK;
639 size = (size | ~DDR_SIZE_MASK) + 1;
640 end = base + size;
641
642 /*
643 * Adjust base/size of the current CS to make sure it
644 * doesn't overlap with the MBus bridge hole. This is
645 * particularly important for devices that do DMA from
646 * DRAM to a SRAM mapped in a MBus window, such as the
647 * CESA cryptographic engine.
648 */
649 590
650 /* 591 /*
651 * The CS is fully enclosed inside the MBus bridge 592 * We only take care of entries for which the chip
652 * area, so ignore it. 593 * select is enabled, and that don't have high base
594 * address bits set (devices can only access the first
595 * 32 bits of the memory).
653 */ 596 */
654 if (base >= mbus_bridge_base && end <= mbus_bridge_end) 597 if ((size & DDR_SIZE_ENABLED) &&
655 continue; 598 !(base & DDR_BASE_CS_HIGH_MASK)) {
599 struct mbus_dram_window *w;
656 600
657 /* 601 w = &mvebu_mbus_dram_info.cs[cs++];
658 * Beginning of CS overlaps with end of MBus, raise CS 602 w->cs_index = i;
659 * base address, and shrink its size. 603 w->mbus_attr = 0xf & ~(1 << i);
660 */ 604 if (mbus->hw_io_coherency)
661 if (base >= mbus_bridge_base && end > mbus_bridge_end) { 605 w->mbus_attr |= ATTR_HW_COHERENCY;
662 size -= mbus_bridge_end - base; 606 w->base = base & DDR_BASE_CS_LOW_MASK;
663 base = mbus_bridge_end; 607 w->size = (size | ~DDR_SIZE_MASK) + 1;
664 } 608 }
665
666 /*
667 * End of CS overlaps with beginning of MBus, shrink
668 * CS size.
669 */
670 if (base < mbus_bridge_base && end > mbus_bridge_base)
671 size -= end - mbus_bridge_base;
672
673 w = &mvebu_mbus_dram_info.cs[cs++];
674 w->cs_index = i;
675 w->mbus_attr = 0xf & ~(1 << i);
676 if (mbus->hw_io_coherency)
677 w->mbus_attr |= ATTR_HW_COHERENCY;
678 w->base = base;
679 w->size = size;
680 } 609 }
681 mvebu_mbus_dram_info.num_cs = cs; 610 mvebu_mbus_dram_info.num_cs = cs;
682} 611}
diff --git a/drivers/clk/at91/clk-peripheral.c b/drivers/clk/at91/clk-peripheral.c
index 597fed423d7d..df2c1afa52b4 100644
--- a/drivers/clk/at91/clk-peripheral.c
+++ b/drivers/clk/at91/clk-peripheral.c
@@ -29,7 +29,7 @@
29#define PERIPHERAL_RSHIFT_MASK 0x3 29#define PERIPHERAL_RSHIFT_MASK 0x3
30#define PERIPHERAL_RSHIFT(val) (((val) >> 16) & PERIPHERAL_RSHIFT_MASK) 30#define PERIPHERAL_RSHIFT(val) (((val) >> 16) & PERIPHERAL_RSHIFT_MASK)
31 31
32#define PERIPHERAL_MAX_SHIFT 4 32#define PERIPHERAL_MAX_SHIFT 3
33 33
34struct clk_peripheral { 34struct clk_peripheral {
35 struct clk_hw hw; 35 struct clk_hw hw;
@@ -242,7 +242,7 @@ static long clk_sam9x5_peripheral_round_rate(struct clk_hw *hw,
242 return *parent_rate; 242 return *parent_rate;
243 243
244 if (periph->range.max) { 244 if (periph->range.max) {
245 for (; shift < PERIPHERAL_MAX_SHIFT; shift++) { 245 for (; shift <= PERIPHERAL_MAX_SHIFT; shift++) {
246 cur_rate = *parent_rate >> shift; 246 cur_rate = *parent_rate >> shift;
247 if (cur_rate <= periph->range.max) 247 if (cur_rate <= periph->range.max)
248 break; 248 break;
@@ -254,7 +254,7 @@ static long clk_sam9x5_peripheral_round_rate(struct clk_hw *hw,
254 254
255 best_diff = cur_rate - rate; 255 best_diff = cur_rate - rate;
256 best_rate = cur_rate; 256 best_rate = cur_rate;
257 for (; shift < PERIPHERAL_MAX_SHIFT; shift++) { 257 for (; shift <= PERIPHERAL_MAX_SHIFT; shift++) {
258 cur_rate = *parent_rate >> shift; 258 cur_rate = *parent_rate >> shift;
259 if (cur_rate < rate) 259 if (cur_rate < rate)
260 cur_diff = rate - cur_rate; 260 cur_diff = rate - cur_rate;
@@ -289,7 +289,7 @@ static int clk_sam9x5_peripheral_set_rate(struct clk_hw *hw,
289 if (periph->range.max && rate > periph->range.max) 289 if (periph->range.max && rate > periph->range.max)
290 return -EINVAL; 290 return -EINVAL;
291 291
292 for (shift = 0; shift < PERIPHERAL_MAX_SHIFT; shift++) { 292 for (shift = 0; shift <= PERIPHERAL_MAX_SHIFT; shift++) {
293 if (parent_rate >> shift == rate) { 293 if (parent_rate >> shift == rate) {
294 periph->auto_div = false; 294 periph->auto_div = false;
295 periph->div = shift; 295 periph->div = shift;
diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c
index 6ec79dbc0840..cbbe40377ad6 100644
--- a/drivers/clk/at91/clk-pll.c
+++ b/drivers/clk/at91/clk-pll.c
@@ -173,8 +173,7 @@ static long clk_pll_get_best_div_mul(struct clk_pll *pll, unsigned long rate,
173 int i = 0; 173 int i = 0;
174 174
175 /* Check if parent_rate is a valid input rate */ 175 /* Check if parent_rate is a valid input rate */
176 if (parent_rate < characteristics->input.min || 176 if (parent_rate < characteristics->input.min)
177 parent_rate > characteristics->input.max)
178 return -ERANGE; 177 return -ERANGE;
179 178
180 /* 179 /*
@@ -187,6 +186,15 @@ static long clk_pll_get_best_div_mul(struct clk_pll *pll, unsigned long rate,
187 if (!mindiv) 186 if (!mindiv)
188 mindiv = 1; 187 mindiv = 1;
189 188
189 if (parent_rate > characteristics->input.max) {
190 tmpdiv = DIV_ROUND_UP(parent_rate, characteristics->input.max);
191 if (tmpdiv > PLL_DIV_MAX)
192 return -ERANGE;
193
194 if (tmpdiv > mindiv)
195 mindiv = tmpdiv;
196 }
197
190 /* 198 /*
191 * Calculate the maximum divider which is limited by PLL register 199 * Calculate the maximum divider which is limited by PLL register
192 * layout (limited by the MUL or DIV field size). 200 * layout (limited by the MUL or DIV field size).
diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h
index 69abb08cf146..eb8e5dc9076d 100644
--- a/drivers/clk/at91/pmc.h
+++ b/drivers/clk/at91/pmc.h
@@ -121,7 +121,7 @@ extern void __init of_at91sam9x5_clk_smd_setup(struct device_node *np,
121 struct at91_pmc *pmc); 121 struct at91_pmc *pmc);
122#endif 122#endif
123 123
124#if defined(CONFIG_HAVE_AT91_SMD) 124#if defined(CONFIG_HAVE_AT91_H32MX)
125extern void __init of_sama5d4_clk_h32mx_setup(struct device_node *np, 125extern void __init of_sama5d4_clk_h32mx_setup(struct device_node *np,
126 struct at91_pmc *pmc); 126 struct at91_pmc *pmc);
127#endif 127#endif
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index ba0532efd3ae..332c8ef8dae2 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -1544,6 +1544,8 @@ static int ahash_init(struct ahash_request *req)
1544 1544
1545 state->current_buf = 0; 1545 state->current_buf = 0;
1546 state->buf_dma = 0; 1546 state->buf_dma = 0;
1547 state->buflen_0 = 0;
1548 state->buflen_1 = 0;
1547 1549
1548 return 0; 1550 return 0;
1549} 1551}
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 26a544b505f1..5095337205b8 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -56,7 +56,7 @@
56 56
57/* Buffer, its dma address and lock */ 57/* Buffer, its dma address and lock */
58struct buf_data { 58struct buf_data {
59 u8 buf[RN_BUF_SIZE]; 59 u8 buf[RN_BUF_SIZE] ____cacheline_aligned;
60 dma_addr_t addr; 60 dma_addr_t addr;
61 struct completion filled; 61 struct completion filled;
62 u32 hw_desc[DESC_JOB_O_LEN]; 62 u32 hw_desc[DESC_JOB_O_LEN];
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 933e4b338459..7992164ea9ec 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -174,6 +174,8 @@
174#define AT_XDMAC_MBR_UBC_NDV3 (0x3 << 27) /* Next Descriptor View 3 */ 174#define AT_XDMAC_MBR_UBC_NDV3 (0x3 << 27) /* Next Descriptor View 3 */
175 175
176#define AT_XDMAC_MAX_CHAN 0x20 176#define AT_XDMAC_MAX_CHAN 0x20
177#define AT_XDMAC_MAX_CSIZE 16 /* 16 data */
178#define AT_XDMAC_MAX_DWIDTH 8 /* 64 bits */
177 179
178#define AT_XDMAC_DMA_BUSWIDTHS\ 180#define AT_XDMAC_DMA_BUSWIDTHS\
179 (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\ 181 (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
@@ -192,20 +194,17 @@ struct at_xdmac_chan {
192 struct dma_chan chan; 194 struct dma_chan chan;
193 void __iomem *ch_regs; 195 void __iomem *ch_regs;
194 u32 mask; /* Channel Mask */ 196 u32 mask; /* Channel Mask */
195 u32 cfg[2]; /* Channel Configuration Register */ 197 u32 cfg; /* Channel Configuration Register */
196 #define AT_XDMAC_DEV_TO_MEM_CFG 0 /* Predifined dev to mem channel conf */
197 #define AT_XDMAC_MEM_TO_DEV_CFG 1 /* Predifined mem to dev channel conf */
198 u8 perid; /* Peripheral ID */ 198 u8 perid; /* Peripheral ID */
199 u8 perif; /* Peripheral Interface */ 199 u8 perif; /* Peripheral Interface */
200 u8 memif; /* Memory Interface */ 200 u8 memif; /* Memory Interface */
201 u32 per_src_addr;
202 u32 per_dst_addr;
203 u32 save_cc; 201 u32 save_cc;
204 u32 save_cim; 202 u32 save_cim;
205 u32 save_cnda; 203 u32 save_cnda;
206 u32 save_cndc; 204 u32 save_cndc;
207 unsigned long status; 205 unsigned long status;
208 struct tasklet_struct tasklet; 206 struct tasklet_struct tasklet;
207 struct dma_slave_config sconfig;
209 208
210 spinlock_t lock; 209 spinlock_t lock;
211 210
@@ -415,8 +414,9 @@ static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx)
415 struct at_xdmac_desc *desc = txd_to_at_desc(tx); 414 struct at_xdmac_desc *desc = txd_to_at_desc(tx);
416 struct at_xdmac_chan *atchan = to_at_xdmac_chan(tx->chan); 415 struct at_xdmac_chan *atchan = to_at_xdmac_chan(tx->chan);
417 dma_cookie_t cookie; 416 dma_cookie_t cookie;
417 unsigned long irqflags;
418 418
419 spin_lock_bh(&atchan->lock); 419 spin_lock_irqsave(&atchan->lock, irqflags);
420 cookie = dma_cookie_assign(tx); 420 cookie = dma_cookie_assign(tx);
421 421
422 dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n", 422 dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n",
@@ -425,7 +425,7 @@ static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx)
425 if (list_is_singular(&atchan->xfers_list)) 425 if (list_is_singular(&atchan->xfers_list))
426 at_xdmac_start_xfer(atchan, desc); 426 at_xdmac_start_xfer(atchan, desc);
427 427
428 spin_unlock_bh(&atchan->lock); 428 spin_unlock_irqrestore(&atchan->lock, irqflags);
429 return cookie; 429 return cookie;
430} 430}
431 431
@@ -494,61 +494,94 @@ static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec,
494 return chan; 494 return chan;
495} 495}
496 496
497static int at_xdmac_compute_chan_conf(struct dma_chan *chan,
498 enum dma_transfer_direction direction)
499{
500 struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
501 int csize, dwidth;
502
503 if (direction == DMA_DEV_TO_MEM) {
504 atchan->cfg =
505 AT91_XDMAC_DT_PERID(atchan->perid)
506 | AT_XDMAC_CC_DAM_INCREMENTED_AM
507 | AT_XDMAC_CC_SAM_FIXED_AM
508 | AT_XDMAC_CC_DIF(atchan->memif)
509 | AT_XDMAC_CC_SIF(atchan->perif)
510 | AT_XDMAC_CC_SWREQ_HWR_CONNECTED
511 | AT_XDMAC_CC_DSYNC_PER2MEM
512 | AT_XDMAC_CC_MBSIZE_SIXTEEN
513 | AT_XDMAC_CC_TYPE_PER_TRAN;
514 csize = ffs(atchan->sconfig.src_maxburst) - 1;
515 if (csize < 0) {
516 dev_err(chan2dev(chan), "invalid src maxburst value\n");
517 return -EINVAL;
518 }
519 atchan->cfg |= AT_XDMAC_CC_CSIZE(csize);
520 dwidth = ffs(atchan->sconfig.src_addr_width) - 1;
521 if (dwidth < 0) {
522 dev_err(chan2dev(chan), "invalid src addr width value\n");
523 return -EINVAL;
524 }
525 atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth);
526 } else if (direction == DMA_MEM_TO_DEV) {
527 atchan->cfg =
528 AT91_XDMAC_DT_PERID(atchan->perid)
529 | AT_XDMAC_CC_DAM_FIXED_AM
530 | AT_XDMAC_CC_SAM_INCREMENTED_AM
531 | AT_XDMAC_CC_DIF(atchan->perif)
532 | AT_XDMAC_CC_SIF(atchan->memif)
533 | AT_XDMAC_CC_SWREQ_HWR_CONNECTED
534 | AT_XDMAC_CC_DSYNC_MEM2PER
535 | AT_XDMAC_CC_MBSIZE_SIXTEEN
536 | AT_XDMAC_CC_TYPE_PER_TRAN;
537 csize = ffs(atchan->sconfig.dst_maxburst) - 1;
538 if (csize < 0) {
539 dev_err(chan2dev(chan), "invalid src maxburst value\n");
540 return -EINVAL;
541 }
542 atchan->cfg |= AT_XDMAC_CC_CSIZE(csize);
543 dwidth = ffs(atchan->sconfig.dst_addr_width) - 1;
544 if (dwidth < 0) {
545 dev_err(chan2dev(chan), "invalid dst addr width value\n");
546 return -EINVAL;
547 }
548 atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth);
549 }
550
551 dev_dbg(chan2dev(chan), "%s: cfg=0x%08x\n", __func__, atchan->cfg);
552
553 return 0;
554}
555
556/*
557 * Only check that maxburst and addr width values are supported by the
558 * the controller but not that the configuration is good to perform the
559 * transfer since we don't know the direction at this stage.
560 */
561static int at_xdmac_check_slave_config(struct dma_slave_config *sconfig)
562{
563 if ((sconfig->src_maxburst > AT_XDMAC_MAX_CSIZE)
564 || (sconfig->dst_maxburst > AT_XDMAC_MAX_CSIZE))
565 return -EINVAL;
566
567 if ((sconfig->src_addr_width > AT_XDMAC_MAX_DWIDTH)
568 || (sconfig->dst_addr_width > AT_XDMAC_MAX_DWIDTH))
569 return -EINVAL;
570
571 return 0;
572}
573
497static int at_xdmac_set_slave_config(struct dma_chan *chan, 574static int at_xdmac_set_slave_config(struct dma_chan *chan,
498 struct dma_slave_config *sconfig) 575 struct dma_slave_config *sconfig)
499{ 576{
500 struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); 577 struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
501 u8 dwidth;
502 int csize;
503 578
504 atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG] = 579 if (at_xdmac_check_slave_config(sconfig)) {
505 AT91_XDMAC_DT_PERID(atchan->perid) 580 dev_err(chan2dev(chan), "invalid slave configuration\n");
506 | AT_XDMAC_CC_DAM_INCREMENTED_AM
507 | AT_XDMAC_CC_SAM_FIXED_AM
508 | AT_XDMAC_CC_DIF(atchan->memif)
509 | AT_XDMAC_CC_SIF(atchan->perif)
510 | AT_XDMAC_CC_SWREQ_HWR_CONNECTED
511 | AT_XDMAC_CC_DSYNC_PER2MEM
512 | AT_XDMAC_CC_MBSIZE_SIXTEEN
513 | AT_XDMAC_CC_TYPE_PER_TRAN;
514 csize = at_xdmac_csize(sconfig->src_maxburst);
515 if (csize < 0) {
516 dev_err(chan2dev(chan), "invalid src maxburst value\n");
517 return -EINVAL; 581 return -EINVAL;
518 } 582 }
519 atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG] |= AT_XDMAC_CC_CSIZE(csize);
520 dwidth = ffs(sconfig->src_addr_width) - 1;
521 atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG] |= AT_XDMAC_CC_DWIDTH(dwidth);
522
523
524 atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG] =
525 AT91_XDMAC_DT_PERID(atchan->perid)
526 | AT_XDMAC_CC_DAM_FIXED_AM
527 | AT_XDMAC_CC_SAM_INCREMENTED_AM
528 | AT_XDMAC_CC_DIF(atchan->perif)
529 | AT_XDMAC_CC_SIF(atchan->memif)
530 | AT_XDMAC_CC_SWREQ_HWR_CONNECTED
531 | AT_XDMAC_CC_DSYNC_MEM2PER
532 | AT_XDMAC_CC_MBSIZE_SIXTEEN
533 | AT_XDMAC_CC_TYPE_PER_TRAN;
534 csize = at_xdmac_csize(sconfig->dst_maxburst);
535 if (csize < 0) {
536 dev_err(chan2dev(chan), "invalid src maxburst value\n");
537 return -EINVAL;
538 }
539 atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG] |= AT_XDMAC_CC_CSIZE(csize);
540 dwidth = ffs(sconfig->dst_addr_width) - 1;
541 atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG] |= AT_XDMAC_CC_DWIDTH(dwidth);
542
543 /* Src and dst addr are needed to configure the link list descriptor. */
544 atchan->per_src_addr = sconfig->src_addr;
545 atchan->per_dst_addr = sconfig->dst_addr;
546 583
547 dev_dbg(chan2dev(chan), 584 memcpy(&atchan->sconfig, sconfig, sizeof(atchan->sconfig));
548 "%s: cfg[dev2mem]=0x%08x, cfg[mem2dev]=0x%08x, per_src_addr=0x%08x, per_dst_addr=0x%08x\n",
549 __func__, atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG],
550 atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG],
551 atchan->per_src_addr, atchan->per_dst_addr);
552 585
553 return 0; 586 return 0;
554} 587}
@@ -563,6 +596,8 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
563 struct scatterlist *sg; 596 struct scatterlist *sg;
564 int i; 597 int i;
565 unsigned int xfer_size = 0; 598 unsigned int xfer_size = 0;
599 unsigned long irqflags;
600 struct dma_async_tx_descriptor *ret = NULL;
566 601
567 if (!sgl) 602 if (!sgl)
568 return NULL; 603 return NULL;
@@ -578,7 +613,10 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
578 flags); 613 flags);
579 614
580 /* Protect dma_sconfig field that can be modified by set_slave_conf. */ 615 /* Protect dma_sconfig field that can be modified by set_slave_conf. */
581 spin_lock_bh(&atchan->lock); 616 spin_lock_irqsave(&atchan->lock, irqflags);
617
618 if (at_xdmac_compute_chan_conf(chan, direction))
619 goto spin_unlock;
582 620
583 /* Prepare descriptors. */ 621 /* Prepare descriptors. */
584 for_each_sg(sgl, sg, sg_len, i) { 622 for_each_sg(sgl, sg, sg_len, i) {
@@ -589,8 +627,7 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
589 mem = sg_dma_address(sg); 627 mem = sg_dma_address(sg);
590 if (unlikely(!len)) { 628 if (unlikely(!len)) {
591 dev_err(chan2dev(chan), "sg data length is zero\n"); 629 dev_err(chan2dev(chan), "sg data length is zero\n");
592 spin_unlock_bh(&atchan->lock); 630 goto spin_unlock;
593 return NULL;
594 } 631 }
595 dev_dbg(chan2dev(chan), "%s: * sg%d len=%u, mem=0x%08x\n", 632 dev_dbg(chan2dev(chan), "%s: * sg%d len=%u, mem=0x%08x\n",
596 __func__, i, len, mem); 633 __func__, i, len, mem);
@@ -600,20 +637,18 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
600 dev_err(chan2dev(chan), "can't get descriptor\n"); 637 dev_err(chan2dev(chan), "can't get descriptor\n");
601 if (first) 638 if (first)
602 list_splice_init(&first->descs_list, &atchan->free_descs_list); 639 list_splice_init(&first->descs_list, &atchan->free_descs_list);
603 spin_unlock_bh(&atchan->lock); 640 goto spin_unlock;
604 return NULL;
605 } 641 }
606 642
607 /* Linked list descriptor setup. */ 643 /* Linked list descriptor setup. */
608 if (direction == DMA_DEV_TO_MEM) { 644 if (direction == DMA_DEV_TO_MEM) {
609 desc->lld.mbr_sa = atchan->per_src_addr; 645 desc->lld.mbr_sa = atchan->sconfig.src_addr;
610 desc->lld.mbr_da = mem; 646 desc->lld.mbr_da = mem;
611 desc->lld.mbr_cfg = atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG];
612 } else { 647 } else {
613 desc->lld.mbr_sa = mem; 648 desc->lld.mbr_sa = mem;
614 desc->lld.mbr_da = atchan->per_dst_addr; 649 desc->lld.mbr_da = atchan->sconfig.dst_addr;
615 desc->lld.mbr_cfg = atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG];
616 } 650 }
651 desc->lld.mbr_cfg = atchan->cfg;
617 dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg); 652 dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg);
618 fixed_dwidth = IS_ALIGNED(len, 1 << dwidth) 653 fixed_dwidth = IS_ALIGNED(len, 1 << dwidth)
619 ? at_xdmac_get_dwidth(desc->lld.mbr_cfg) 654 ? at_xdmac_get_dwidth(desc->lld.mbr_cfg)
@@ -645,13 +680,15 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
645 xfer_size += len; 680 xfer_size += len;
646 } 681 }
647 682
648 spin_unlock_bh(&atchan->lock);
649 683
650 first->tx_dma_desc.flags = flags; 684 first->tx_dma_desc.flags = flags;
651 first->xfer_size = xfer_size; 685 first->xfer_size = xfer_size;
652 first->direction = direction; 686 first->direction = direction;
687 ret = &first->tx_dma_desc;
653 688
654 return &first->tx_dma_desc; 689spin_unlock:
690 spin_unlock_irqrestore(&atchan->lock, irqflags);
691 return ret;
655} 692}
656 693
657static struct dma_async_tx_descriptor * 694static struct dma_async_tx_descriptor *
@@ -664,6 +701,7 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
664 struct at_xdmac_desc *first = NULL, *prev = NULL; 701 struct at_xdmac_desc *first = NULL, *prev = NULL;
665 unsigned int periods = buf_len / period_len; 702 unsigned int periods = buf_len / period_len;
666 int i; 703 int i;
704 unsigned long irqflags;
667 705
668 dev_dbg(chan2dev(chan), "%s: buf_addr=%pad, buf_len=%zd, period_len=%zd, dir=%s, flags=0x%lx\n", 706 dev_dbg(chan2dev(chan), "%s: buf_addr=%pad, buf_len=%zd, period_len=%zd, dir=%s, flags=0x%lx\n",
669 __func__, &buf_addr, buf_len, period_len, 707 __func__, &buf_addr, buf_len, period_len,
@@ -679,32 +717,34 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
679 return NULL; 717 return NULL;
680 } 718 }
681 719
720 if (at_xdmac_compute_chan_conf(chan, direction))
721 return NULL;
722
682 for (i = 0; i < periods; i++) { 723 for (i = 0; i < periods; i++) {
683 struct at_xdmac_desc *desc = NULL; 724 struct at_xdmac_desc *desc = NULL;
684 725
685 spin_lock_bh(&atchan->lock); 726 spin_lock_irqsave(&atchan->lock, irqflags);
686 desc = at_xdmac_get_desc(atchan); 727 desc = at_xdmac_get_desc(atchan);
687 if (!desc) { 728 if (!desc) {
688 dev_err(chan2dev(chan), "can't get descriptor\n"); 729 dev_err(chan2dev(chan), "can't get descriptor\n");
689 if (first) 730 if (first)
690 list_splice_init(&first->descs_list, &atchan->free_descs_list); 731 list_splice_init(&first->descs_list, &atchan->free_descs_list);
691 spin_unlock_bh(&atchan->lock); 732 spin_unlock_irqrestore(&atchan->lock, irqflags);
692 return NULL; 733 return NULL;
693 } 734 }
694 spin_unlock_bh(&atchan->lock); 735 spin_unlock_irqrestore(&atchan->lock, irqflags);
695 dev_dbg(chan2dev(chan), 736 dev_dbg(chan2dev(chan),
696 "%s: desc=0x%p, tx_dma_desc.phys=%pad\n", 737 "%s: desc=0x%p, tx_dma_desc.phys=%pad\n",
697 __func__, desc, &desc->tx_dma_desc.phys); 738 __func__, desc, &desc->tx_dma_desc.phys);
698 739
699 if (direction == DMA_DEV_TO_MEM) { 740 if (direction == DMA_DEV_TO_MEM) {
700 desc->lld.mbr_sa = atchan->per_src_addr; 741 desc->lld.mbr_sa = atchan->sconfig.src_addr;
701 desc->lld.mbr_da = buf_addr + i * period_len; 742 desc->lld.mbr_da = buf_addr + i * period_len;
702 desc->lld.mbr_cfg = atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG];
703 } else { 743 } else {
704 desc->lld.mbr_sa = buf_addr + i * period_len; 744 desc->lld.mbr_sa = buf_addr + i * period_len;
705 desc->lld.mbr_da = atchan->per_dst_addr; 745 desc->lld.mbr_da = atchan->sconfig.dst_addr;
706 desc->lld.mbr_cfg = atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG];
707 } 746 }
747 desc->lld.mbr_cfg = atchan->cfg;
708 desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1 748 desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1
709 | AT_XDMAC_MBR_UBC_NDEN 749 | AT_XDMAC_MBR_UBC_NDEN
710 | AT_XDMAC_MBR_UBC_NSEN 750 | AT_XDMAC_MBR_UBC_NSEN
@@ -766,6 +806,7 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
766 | AT_XDMAC_CC_SIF(0) 806 | AT_XDMAC_CC_SIF(0)
767 | AT_XDMAC_CC_MBSIZE_SIXTEEN 807 | AT_XDMAC_CC_MBSIZE_SIXTEEN
768 | AT_XDMAC_CC_TYPE_MEM_TRAN; 808 | AT_XDMAC_CC_TYPE_MEM_TRAN;
809 unsigned long irqflags;
769 810
770 dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, len=%zd, flags=0x%lx\n", 811 dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, len=%zd, flags=0x%lx\n",
771 __func__, &src, &dest, len, flags); 812 __func__, &src, &dest, len, flags);
@@ -798,9 +839,9 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
798 839
799 dev_dbg(chan2dev(chan), "%s: remaining_size=%zu\n", __func__, remaining_size); 840 dev_dbg(chan2dev(chan), "%s: remaining_size=%zu\n", __func__, remaining_size);
800 841
801 spin_lock_bh(&atchan->lock); 842 spin_lock_irqsave(&atchan->lock, irqflags);
802 desc = at_xdmac_get_desc(atchan); 843 desc = at_xdmac_get_desc(atchan);
803 spin_unlock_bh(&atchan->lock); 844 spin_unlock_irqrestore(&atchan->lock, irqflags);
804 if (!desc) { 845 if (!desc) {
805 dev_err(chan2dev(chan), "can't get descriptor\n"); 846 dev_err(chan2dev(chan), "can't get descriptor\n");
806 if (first) 847 if (first)
@@ -886,6 +927,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
886 int residue; 927 int residue;
887 u32 cur_nda, mask, value; 928 u32 cur_nda, mask, value;
888 u8 dwidth = 0; 929 u8 dwidth = 0;
930 unsigned long flags;
889 931
890 ret = dma_cookie_status(chan, cookie, txstate); 932 ret = dma_cookie_status(chan, cookie, txstate);
891 if (ret == DMA_COMPLETE) 933 if (ret == DMA_COMPLETE)
@@ -894,7 +936,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
894 if (!txstate) 936 if (!txstate)
895 return ret; 937 return ret;
896 938
897 spin_lock_bh(&atchan->lock); 939 spin_lock_irqsave(&atchan->lock, flags);
898 940
899 desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node); 941 desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node);
900 942
@@ -904,8 +946,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
904 */ 946 */
905 if (!desc->active_xfer) { 947 if (!desc->active_xfer) {
906 dma_set_residue(txstate, desc->xfer_size); 948 dma_set_residue(txstate, desc->xfer_size);
907 spin_unlock_bh(&atchan->lock); 949 goto spin_unlock;
908 return ret;
909 } 950 }
910 951
911 residue = desc->xfer_size; 952 residue = desc->xfer_size;
@@ -936,14 +977,14 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
936 } 977 }
937 residue += at_xdmac_chan_read(atchan, AT_XDMAC_CUBC) << dwidth; 978 residue += at_xdmac_chan_read(atchan, AT_XDMAC_CUBC) << dwidth;
938 979
939 spin_unlock_bh(&atchan->lock);
940
941 dma_set_residue(txstate, residue); 980 dma_set_residue(txstate, residue);
942 981
943 dev_dbg(chan2dev(chan), 982 dev_dbg(chan2dev(chan),
944 "%s: desc=0x%p, tx_dma_desc.phys=%pad, tx_status=%d, cookie=%d, residue=%d\n", 983 "%s: desc=0x%p, tx_dma_desc.phys=%pad, tx_status=%d, cookie=%d, residue=%d\n",
945 __func__, desc, &desc->tx_dma_desc.phys, ret, cookie, residue); 984 __func__, desc, &desc->tx_dma_desc.phys, ret, cookie, residue);
946 985
986spin_unlock:
987 spin_unlock_irqrestore(&atchan->lock, flags);
947 return ret; 988 return ret;
948} 989}
949 990
@@ -964,8 +1005,9 @@ static void at_xdmac_remove_xfer(struct at_xdmac_chan *atchan,
964static void at_xdmac_advance_work(struct at_xdmac_chan *atchan) 1005static void at_xdmac_advance_work(struct at_xdmac_chan *atchan)
965{ 1006{
966 struct at_xdmac_desc *desc; 1007 struct at_xdmac_desc *desc;
1008 unsigned long flags;
967 1009
968 spin_lock_bh(&atchan->lock); 1010 spin_lock_irqsave(&atchan->lock, flags);
969 1011
970 /* 1012 /*
971 * If channel is enabled, do nothing, advance_work will be triggered 1013 * If channel is enabled, do nothing, advance_work will be triggered
@@ -980,7 +1022,7 @@ static void at_xdmac_advance_work(struct at_xdmac_chan *atchan)
980 at_xdmac_start_xfer(atchan, desc); 1022 at_xdmac_start_xfer(atchan, desc);
981 } 1023 }
982 1024
983 spin_unlock_bh(&atchan->lock); 1025 spin_unlock_irqrestore(&atchan->lock, flags);
984} 1026}
985 1027
986static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan) 1028static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan)
@@ -1116,12 +1158,13 @@ static int at_xdmac_device_config(struct dma_chan *chan,
1116{ 1158{
1117 struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); 1159 struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
1118 int ret; 1160 int ret;
1161 unsigned long flags;
1119 1162
1120 dev_dbg(chan2dev(chan), "%s\n", __func__); 1163 dev_dbg(chan2dev(chan), "%s\n", __func__);
1121 1164
1122 spin_lock_bh(&atchan->lock); 1165 spin_lock_irqsave(&atchan->lock, flags);
1123 ret = at_xdmac_set_slave_config(chan, config); 1166 ret = at_xdmac_set_slave_config(chan, config);
1124 spin_unlock_bh(&atchan->lock); 1167 spin_unlock_irqrestore(&atchan->lock, flags);
1125 1168
1126 return ret; 1169 return ret;
1127} 1170}
@@ -1130,18 +1173,19 @@ static int at_xdmac_device_pause(struct dma_chan *chan)
1130{ 1173{
1131 struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); 1174 struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
1132 struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); 1175 struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
1176 unsigned long flags;
1133 1177
1134 dev_dbg(chan2dev(chan), "%s\n", __func__); 1178 dev_dbg(chan2dev(chan), "%s\n", __func__);
1135 1179
1136 if (test_and_set_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status)) 1180 if (test_and_set_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status))
1137 return 0; 1181 return 0;
1138 1182
1139 spin_lock_bh(&atchan->lock); 1183 spin_lock_irqsave(&atchan->lock, flags);
1140 at_xdmac_write(atxdmac, AT_XDMAC_GRWS, atchan->mask); 1184 at_xdmac_write(atxdmac, AT_XDMAC_GRWS, atchan->mask);
1141 while (at_xdmac_chan_read(atchan, AT_XDMAC_CC) 1185 while (at_xdmac_chan_read(atchan, AT_XDMAC_CC)
1142 & (AT_XDMAC_CC_WRIP | AT_XDMAC_CC_RDIP)) 1186 & (AT_XDMAC_CC_WRIP | AT_XDMAC_CC_RDIP))
1143 cpu_relax(); 1187 cpu_relax();
1144 spin_unlock_bh(&atchan->lock); 1188 spin_unlock_irqrestore(&atchan->lock, flags);
1145 1189
1146 return 0; 1190 return 0;
1147} 1191}
@@ -1150,18 +1194,19 @@ static int at_xdmac_device_resume(struct dma_chan *chan)
1150{ 1194{
1151 struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); 1195 struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
1152 struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); 1196 struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
1197 unsigned long flags;
1153 1198
1154 dev_dbg(chan2dev(chan), "%s\n", __func__); 1199 dev_dbg(chan2dev(chan), "%s\n", __func__);
1155 1200
1156 spin_lock_bh(&atchan->lock); 1201 spin_lock_irqsave(&atchan->lock, flags);
1157 if (!at_xdmac_chan_is_paused(atchan)) { 1202 if (!at_xdmac_chan_is_paused(atchan)) {
1158 spin_unlock_bh(&atchan->lock); 1203 spin_unlock_irqrestore(&atchan->lock, flags);
1159 return 0; 1204 return 0;
1160 } 1205 }
1161 1206
1162 at_xdmac_write(atxdmac, AT_XDMAC_GRWR, atchan->mask); 1207 at_xdmac_write(atxdmac, AT_XDMAC_GRWR, atchan->mask);
1163 clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status); 1208 clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
1164 spin_unlock_bh(&atchan->lock); 1209 spin_unlock_irqrestore(&atchan->lock, flags);
1165 1210
1166 return 0; 1211 return 0;
1167} 1212}
@@ -1171,10 +1216,11 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan)
1171 struct at_xdmac_desc *desc, *_desc; 1216 struct at_xdmac_desc *desc, *_desc;
1172 struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); 1217 struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
1173 struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); 1218 struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
1219 unsigned long flags;
1174 1220
1175 dev_dbg(chan2dev(chan), "%s\n", __func__); 1221 dev_dbg(chan2dev(chan), "%s\n", __func__);
1176 1222
1177 spin_lock_bh(&atchan->lock); 1223 spin_lock_irqsave(&atchan->lock, flags);
1178 at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask); 1224 at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
1179 while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask) 1225 while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask)
1180 cpu_relax(); 1226 cpu_relax();
@@ -1184,7 +1230,7 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan)
1184 at_xdmac_remove_xfer(atchan, desc); 1230 at_xdmac_remove_xfer(atchan, desc);
1185 1231
1186 clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status); 1232 clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
1187 spin_unlock_bh(&atchan->lock); 1233 spin_unlock_irqrestore(&atchan->lock, flags);
1188 1234
1189 return 0; 1235 return 0;
1190} 1236}
@@ -1194,8 +1240,9 @@ static int at_xdmac_alloc_chan_resources(struct dma_chan *chan)
1194 struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); 1240 struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
1195 struct at_xdmac_desc *desc; 1241 struct at_xdmac_desc *desc;
1196 int i; 1242 int i;
1243 unsigned long flags;
1197 1244
1198 spin_lock_bh(&atchan->lock); 1245 spin_lock_irqsave(&atchan->lock, flags);
1199 1246
1200 if (at_xdmac_chan_is_enabled(atchan)) { 1247 if (at_xdmac_chan_is_enabled(atchan)) {
1201 dev_err(chan2dev(chan), 1248 dev_err(chan2dev(chan),
@@ -1226,7 +1273,7 @@ static int at_xdmac_alloc_chan_resources(struct dma_chan *chan)
1226 dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i); 1273 dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);
1227 1274
1228spin_unlock: 1275spin_unlock:
1229 spin_unlock_bh(&atchan->lock); 1276 spin_unlock_irqrestore(&atchan->lock, flags);
1230 return i; 1277 return i;
1231} 1278}
1232 1279
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 2890d744bb1b..3ddfd1f6c23c 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -487,7 +487,11 @@ int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
487 caps->directions = device->directions; 487 caps->directions = device->directions;
488 caps->residue_granularity = device->residue_granularity; 488 caps->residue_granularity = device->residue_granularity;
489 489
490 caps->cmd_pause = !!device->device_pause; 490 /*
491 * Some devices implement only pause (e.g. to get residuum) but no
492 * resume. However cmd_pause is advertised as pause AND resume.
493 */
494 caps->cmd_pause = !!(device->device_pause && device->device_resume);
491 caps->cmd_terminate = !!device->device_terminate_all; 495 caps->cmd_terminate = !!device->device_terminate_all;
492 496
493 return 0; 497 return 0;
diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
index 9b84def7a353..f42f71e37e73 100644
--- a/drivers/dma/hsu/hsu.c
+++ b/drivers/dma/hsu/hsu.c
@@ -384,7 +384,10 @@ static int hsu_dma_terminate_all(struct dma_chan *chan)
384 spin_lock_irqsave(&hsuc->vchan.lock, flags); 384 spin_lock_irqsave(&hsuc->vchan.lock, flags);
385 385
386 hsu_dma_stop_channel(hsuc); 386 hsu_dma_stop_channel(hsuc);
387 hsuc->desc = NULL; 387 if (hsuc->desc) {
388 hsu_dma_desc_free(&hsuc->desc->vdesc);
389 hsuc->desc = NULL;
390 }
388 391
389 vchan_get_all_descriptors(&hsuc->vchan, &head); 392 vchan_get_all_descriptors(&hsuc->vchan, &head);
390 spin_unlock_irqrestore(&hsuc->vchan.lock, flags); 393 spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index a7d9d3029b14..340f9e607cd8 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2127,6 +2127,7 @@ static int pl330_terminate_all(struct dma_chan *chan)
2127 struct pl330_dmac *pl330 = pch->dmac; 2127 struct pl330_dmac *pl330 = pch->dmac;
2128 LIST_HEAD(list); 2128 LIST_HEAD(list);
2129 2129
2130 pm_runtime_get_sync(pl330->ddma.dev);
2130 spin_lock_irqsave(&pch->lock, flags); 2131 spin_lock_irqsave(&pch->lock, flags);
2131 spin_lock(&pl330->lock); 2132 spin_lock(&pl330->lock);
2132 _stop(pch->thread); 2133 _stop(pch->thread);
@@ -2151,6 +2152,8 @@ static int pl330_terminate_all(struct dma_chan *chan)
2151 list_splice_tail_init(&pch->work_list, &pl330->desc_pool); 2152 list_splice_tail_init(&pch->work_list, &pl330->desc_pool);
2152 list_splice_tail_init(&pch->completed_list, &pl330->desc_pool); 2153 list_splice_tail_init(&pch->completed_list, &pl330->desc_pool);
2153 spin_unlock_irqrestore(&pch->lock, flags); 2154 spin_unlock_irqrestore(&pch->lock, flags);
2155 pm_runtime_mark_last_busy(pl330->ddma.dev);
2156 pm_runtime_put_autosuspend(pl330->ddma.dev);
2154 2157
2155 return 0; 2158 return 0;
2156} 2159}
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index 071c2c969eec..72791232e46b 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -186,8 +186,20 @@ struct ibft_kobject {
186 186
187static struct iscsi_boot_kset *boot_kset; 187static struct iscsi_boot_kset *boot_kset;
188 188
189/* fully null address */
189static const char nulls[16]; 190static const char nulls[16];
190 191
192/* IPv4-mapped IPv6 ::ffff:0.0.0.0 */
193static const char mapped_nulls[16] = { 0x00, 0x00, 0x00, 0x00,
194 0x00, 0x00, 0x00, 0x00,
195 0x00, 0x00, 0xff, 0xff,
196 0x00, 0x00, 0x00, 0x00 };
197
198static int address_not_null(u8 *ip)
199{
200 return (memcmp(ip, nulls, 16) && memcmp(ip, mapped_nulls, 16));
201}
202
191/* 203/*
192 * Helper functions to parse data properly. 204 * Helper functions to parse data properly.
193 */ 205 */
@@ -445,7 +457,7 @@ static umode_t ibft_check_nic_for(void *data, int type)
445 rc = S_IRUGO; 457 rc = S_IRUGO;
446 break; 458 break;
447 case ISCSI_BOOT_ETH_IP_ADDR: 459 case ISCSI_BOOT_ETH_IP_ADDR:
448 if (memcmp(nic->ip_addr, nulls, sizeof(nic->ip_addr))) 460 if (address_not_null(nic->ip_addr))
449 rc = S_IRUGO; 461 rc = S_IRUGO;
450 break; 462 break;
451 case ISCSI_BOOT_ETH_SUBNET_MASK: 463 case ISCSI_BOOT_ETH_SUBNET_MASK:
@@ -456,21 +468,19 @@ static umode_t ibft_check_nic_for(void *data, int type)
456 rc = S_IRUGO; 468 rc = S_IRUGO;
457 break; 469 break;
458 case ISCSI_BOOT_ETH_GATEWAY: 470 case ISCSI_BOOT_ETH_GATEWAY:
459 if (memcmp(nic->gateway, nulls, sizeof(nic->gateway))) 471 if (address_not_null(nic->gateway))
460 rc = S_IRUGO; 472 rc = S_IRUGO;
461 break; 473 break;
462 case ISCSI_BOOT_ETH_PRIMARY_DNS: 474 case ISCSI_BOOT_ETH_PRIMARY_DNS:
463 if (memcmp(nic->primary_dns, nulls, 475 if (address_not_null(nic->primary_dns))
464 sizeof(nic->primary_dns)))
465 rc = S_IRUGO; 476 rc = S_IRUGO;
466 break; 477 break;
467 case ISCSI_BOOT_ETH_SECONDARY_DNS: 478 case ISCSI_BOOT_ETH_SECONDARY_DNS:
468 if (memcmp(nic->secondary_dns, nulls, 479 if (address_not_null(nic->secondary_dns))
469 sizeof(nic->secondary_dns)))
470 rc = S_IRUGO; 480 rc = S_IRUGO;
471 break; 481 break;
472 case ISCSI_BOOT_ETH_DHCP: 482 case ISCSI_BOOT_ETH_DHCP:
473 if (memcmp(nic->dhcp, nulls, sizeof(nic->dhcp))) 483 if (address_not_null(nic->dhcp))
474 rc = S_IRUGO; 484 rc = S_IRUGO;
475 break; 485 break;
476 case ISCSI_BOOT_ETH_VLAN: 486 case ISCSI_BOOT_ETH_VLAN:
@@ -536,23 +546,19 @@ static umode_t __init ibft_check_initiator_for(void *data, int type)
536 rc = S_IRUGO; 546 rc = S_IRUGO;
537 break; 547 break;
538 case ISCSI_BOOT_INI_ISNS_SERVER: 548 case ISCSI_BOOT_INI_ISNS_SERVER:
539 if (memcmp(init->isns_server, nulls, 549 if (address_not_null(init->isns_server))
540 sizeof(init->isns_server)))
541 rc = S_IRUGO; 550 rc = S_IRUGO;
542 break; 551 break;
543 case ISCSI_BOOT_INI_SLP_SERVER: 552 case ISCSI_BOOT_INI_SLP_SERVER:
544 if (memcmp(init->slp_server, nulls, 553 if (address_not_null(init->slp_server))
545 sizeof(init->slp_server)))
546 rc = S_IRUGO; 554 rc = S_IRUGO;
547 break; 555 break;
548 case ISCSI_BOOT_INI_PRI_RADIUS_SERVER: 556 case ISCSI_BOOT_INI_PRI_RADIUS_SERVER:
549 if (memcmp(init->pri_radius_server, nulls, 557 if (address_not_null(init->pri_radius_server))
550 sizeof(init->pri_radius_server)))
551 rc = S_IRUGO; 558 rc = S_IRUGO;
552 break; 559 break;
553 case ISCSI_BOOT_INI_SEC_RADIUS_SERVER: 560 case ISCSI_BOOT_INI_SEC_RADIUS_SERVER:
554 if (memcmp(init->sec_radius_server, nulls, 561 if (address_not_null(init->sec_radius_server))
555 sizeof(init->sec_radius_server)))
556 rc = S_IRUGO; 562 rc = S_IRUGO;
557 break; 563 break;
558 case ISCSI_BOOT_INI_INITIATOR_NAME: 564 case ISCSI_BOOT_INI_INITIATOR_NAME:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index e469c4b2e8cc..c25728bc388a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -684,8 +684,6 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
684 dev->node_props.cpu_core_id_base); 684 dev->node_props.cpu_core_id_base);
685 sysfs_show_32bit_prop(buffer, "simd_id_base", 685 sysfs_show_32bit_prop(buffer, "simd_id_base",
686 dev->node_props.simd_id_base); 686 dev->node_props.simd_id_base);
687 sysfs_show_32bit_prop(buffer, "capability",
688 dev->node_props.capability);
689 sysfs_show_32bit_prop(buffer, "max_waves_per_simd", 687 sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
690 dev->node_props.max_waves_per_simd); 688 dev->node_props.max_waves_per_simd);
691 sysfs_show_32bit_prop(buffer, "lds_size_in_kb", 689 sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
@@ -736,6 +734,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
736 dev->gpu->kfd2kgd->get_fw_version( 734 dev->gpu->kfd2kgd->get_fw_version(
737 dev->gpu->kgd, 735 dev->gpu->kgd,
738 KGD_ENGINE_MEC1)); 736 KGD_ENGINE_MEC1));
737 sysfs_show_32bit_prop(buffer, "capability",
738 dev->node_props.capability);
739 } 739 }
740 740
741 return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute", 741 return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index ffc305fc2076..eb7e61078a5b 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -217,7 +217,7 @@ static ssize_t status_store(struct device *device,
217 217
218 mutex_unlock(&dev->mode_config.mutex); 218 mutex_unlock(&dev->mode_config.mutex);
219 219
220 return ret; 220 return ret ? ret : count;
221} 221}
222 222
223static ssize_t status_show(struct device *device, 223static ssize_t status_show(struct device *device,
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 007c7d7d8295..dc55c51964ab 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1667,12 +1667,15 @@ static int i915_sr_status(struct seq_file *m, void *unused)
1667 1667
1668 if (HAS_PCH_SPLIT(dev)) 1668 if (HAS_PCH_SPLIT(dev))
1669 sr_enabled = I915_READ(WM1_LP_ILK) & WM1_LP_SR_EN; 1669 sr_enabled = I915_READ(WM1_LP_ILK) & WM1_LP_SR_EN;
1670 else if (IS_CRESTLINE(dev) || IS_I945G(dev) || IS_I945GM(dev)) 1670 else if (IS_CRESTLINE(dev) || IS_G4X(dev) ||
1671 IS_I945G(dev) || IS_I945GM(dev))
1671 sr_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN; 1672 sr_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
1672 else if (IS_I915GM(dev)) 1673 else if (IS_I915GM(dev))
1673 sr_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN; 1674 sr_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
1674 else if (IS_PINEVIEW(dev)) 1675 else if (IS_PINEVIEW(dev))
1675 sr_enabled = I915_READ(DSPFW3) & PINEVIEW_SELF_REFRESH_EN; 1676 sr_enabled = I915_READ(DSPFW3) & PINEVIEW_SELF_REFRESH_EN;
1677 else if (IS_VALLEYVIEW(dev))
1678 sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
1676 1679
1677 intel_runtime_pm_put(dev_priv); 1680 intel_runtime_pm_put(dev_priv);
1678 1681
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 53394f998a1f..2d0995e7afc3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3003,8 +3003,8 @@ int i915_vma_unbind(struct i915_vma *vma)
3003 } else if (vma->ggtt_view.pages) { 3003 } else if (vma->ggtt_view.pages) {
3004 sg_free_table(vma->ggtt_view.pages); 3004 sg_free_table(vma->ggtt_view.pages);
3005 kfree(vma->ggtt_view.pages); 3005 kfree(vma->ggtt_view.pages);
3006 vma->ggtt_view.pages = NULL;
3007 } 3006 }
3007 vma->ggtt_view.pages = NULL;
3008 } 3008 }
3009 3009
3010 drm_mm_remove_node(&vma->node); 3010 drm_mm_remove_node(&vma->node);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index f27346e907b1..d714a4b5711e 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -880,10 +880,8 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
880 DP_AUX_CH_CTL_RECEIVE_ERROR)) 880 DP_AUX_CH_CTL_RECEIVE_ERROR))
881 continue; 881 continue;
882 if (status & DP_AUX_CH_CTL_DONE) 882 if (status & DP_AUX_CH_CTL_DONE)
883 break; 883 goto done;
884 } 884 }
885 if (status & DP_AUX_CH_CTL_DONE)
886 break;
887 } 885 }
888 886
889 if ((status & DP_AUX_CH_CTL_DONE) == 0) { 887 if ((status & DP_AUX_CH_CTL_DONE) == 0) {
@@ -892,6 +890,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
892 goto out; 890 goto out;
893 } 891 }
894 892
893done:
895 /* Check for timeout or receive error. 894 /* Check for timeout or receive error.
896 * Timeouts occur when the sink is not connected 895 * Timeouts occur when the sink is not connected
897 */ 896 */
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 56e437e31580..ae628001fd97 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -435,7 +435,7 @@ gmbus_xfer(struct i2c_adapter *adapter,
435 struct intel_gmbus, 435 struct intel_gmbus,
436 adapter); 436 adapter);
437 struct drm_i915_private *dev_priv = bus->dev_priv; 437 struct drm_i915_private *dev_priv = bus->dev_priv;
438 int i, reg_offset; 438 int i = 0, inc, try = 0, reg_offset;
439 int ret = 0; 439 int ret = 0;
440 440
441 intel_aux_display_runtime_get(dev_priv); 441 intel_aux_display_runtime_get(dev_priv);
@@ -448,12 +448,14 @@ gmbus_xfer(struct i2c_adapter *adapter,
448 448
449 reg_offset = dev_priv->gpio_mmio_base; 449 reg_offset = dev_priv->gpio_mmio_base;
450 450
451retry:
451 I915_WRITE(GMBUS0 + reg_offset, bus->reg0); 452 I915_WRITE(GMBUS0 + reg_offset, bus->reg0);
452 453
453 for (i = 0; i < num; i++) { 454 for (; i < num; i += inc) {
455 inc = 1;
454 if (gmbus_is_index_read(msgs, i, num)) { 456 if (gmbus_is_index_read(msgs, i, num)) {
455 ret = gmbus_xfer_index_read(dev_priv, &msgs[i]); 457 ret = gmbus_xfer_index_read(dev_priv, &msgs[i]);
456 i += 1; /* set i to the index of the read xfer */ 458 inc = 2; /* an index read is two msgs */
457 } else if (msgs[i].flags & I2C_M_RD) { 459 } else if (msgs[i].flags & I2C_M_RD) {
458 ret = gmbus_xfer_read(dev_priv, &msgs[i], 0); 460 ret = gmbus_xfer_read(dev_priv, &msgs[i], 0);
459 } else { 461 } else {
@@ -525,6 +527,18 @@ clear_err:
525 adapter->name, msgs[i].addr, 527 adapter->name, msgs[i].addr,
526 (msgs[i].flags & I2C_M_RD) ? 'r' : 'w', msgs[i].len); 528 (msgs[i].flags & I2C_M_RD) ? 'r' : 'w', msgs[i].len);
527 529
530 /*
531 * Passive adapters sometimes NAK the first probe. Retry the first
532 * message once on -ENXIO for GMBUS transfers; the bit banging algorithm
533 * has retries internally. See also the retry loop in
534 * drm_do_probe_ddc_edid, which bails out on the first -ENXIO.
535 */
536 if (ret == -ENXIO && i == 0 && try++ == 0) {
537 DRM_DEBUG_KMS("GMBUS [%s] NAK on first message, retry\n",
538 adapter->name);
539 goto retry;
540 }
541
528 goto out; 542 goto out;
529 543
530timeout: 544timeout:
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 09df74b8e917..424e62197787 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1134,6 +1134,12 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring)
1134 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask)); 1134 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
1135 I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff); 1135 I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
1136 1136
1137 if (ring->status_page.obj) {
1138 I915_WRITE(RING_HWS_PGA(ring->mmio_base),
1139 (u32)ring->status_page.gfx_addr);
1140 POSTING_READ(RING_HWS_PGA(ring->mmio_base));
1141 }
1142
1137 I915_WRITE(RING_MODE_GEN7(ring), 1143 I915_WRITE(RING_MODE_GEN7(ring),
1138 _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | 1144 _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
1139 _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); 1145 _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 441e2502b889..005b5e04de4d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -901,13 +901,6 @@ static int chv_init_workarounds(struct intel_engine_cs *ring)
901 GEN6_WIZ_HASHING_MASK, 901 GEN6_WIZ_HASHING_MASK,
902 GEN6_WIZ_HASHING_16x4); 902 GEN6_WIZ_HASHING_16x4);
903 903
904 if (INTEL_REVID(dev) == SKL_REVID_C0 ||
905 INTEL_REVID(dev) == SKL_REVID_D0)
906 /* WaBarrierPerformanceFixDisable:skl */
907 WA_SET_BIT_MASKED(HDC_CHICKEN0,
908 HDC_FENCE_DEST_SLM_DISABLE |
909 HDC_BARRIER_PERFORMANCE_DISABLE);
910
911 return 0; 904 return 0;
912} 905}
913 906
@@ -1024,6 +1017,13 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)
1024 WA_SET_BIT_MASKED(HIZ_CHICKEN, 1017 WA_SET_BIT_MASKED(HIZ_CHICKEN,
1025 BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE); 1018 BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
1026 1019
1020 if (INTEL_REVID(dev) == SKL_REVID_C0 ||
1021 INTEL_REVID(dev) == SKL_REVID_D0)
1022 /* WaBarrierPerformanceFixDisable:skl */
1023 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1024 HDC_FENCE_DEST_SLM_DISABLE |
1025 HDC_BARRIER_PERFORMANCE_DISABLE);
1026
1027 return skl_tune_iz_hashing(ring); 1027 return skl_tune_iz_hashing(ring);
1028} 1028}
1029 1029
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index e87d2f418de4..987b81f31b0e 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -2550,7 +2550,7 @@ intel_sdvo_analog_init(struct intel_sdvo *intel_sdvo, int device)
2550 2550
2551 DRM_DEBUG_KMS("initialising analog device %d\n", device); 2551 DRM_DEBUG_KMS("initialising analog device %d\n", device);
2552 2552
2553 intel_sdvo_connector = kzalloc(sizeof(*intel_sdvo_connector), GFP_KERNEL); 2553 intel_sdvo_connector = intel_sdvo_connector_alloc();
2554 if (!intel_sdvo_connector) 2554 if (!intel_sdvo_connector)
2555 return false; 2555 return false;
2556 2556
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index 6e84df9369a6..ad4b9010dfb0 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -1526,6 +1526,11 @@ static int mga_vga_mode_valid(struct drm_connector *connector,
1526 return MODE_BANDWIDTH; 1526 return MODE_BANDWIDTH;
1527 } 1527 }
1528 1528
1529 if ((mode->hdisplay % 8) != 0 || (mode->hsync_start % 8) != 0 ||
1530 (mode->hsync_end % 8) != 0 || (mode->htotal % 8) != 0) {
1531 return MODE_H_ILLEGAL;
1532 }
1533
1529 if (mode->crtc_hdisplay > 2048 || mode->crtc_hsync_start > 4096 || 1534 if (mode->crtc_hdisplay > 2048 || mode->crtc_hsync_start > 4096 ||
1530 mode->crtc_hsync_end > 4096 || mode->crtc_htotal > 4096 || 1535 mode->crtc_hsync_end > 4096 || mode->crtc_htotal > 4096 ||
1531 mode->crtc_vdisplay > 2048 || mode->crtc_vsync_start > 4096 || 1536 mode->crtc_vdisplay > 2048 || mode->crtc_vsync_start > 4096 ||
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index e597ffc26563..dac78ad24b31 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -580,9 +580,6 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
580 else 580 else
581 radeon_crtc->pll_flags |= RADEON_PLL_PREFER_LOW_REF_DIV; 581 radeon_crtc->pll_flags |= RADEON_PLL_PREFER_LOW_REF_DIV;
582 582
583 /* if there is no audio, set MINM_OVER_MAXP */
584 if (!drm_detect_monitor_audio(radeon_connector_edid(connector)))
585 radeon_crtc->pll_flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP;
586 if (rdev->family < CHIP_RV770) 583 if (rdev->family < CHIP_RV770)
587 radeon_crtc->pll_flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP; 584 radeon_crtc->pll_flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP;
588 /* use frac fb div on APUs */ 585 /* use frac fb div on APUs */
@@ -1798,9 +1795,7 @@ static int radeon_get_shared_nondp_ppll(struct drm_crtc *crtc)
1798 if ((crtc->mode.clock == test_crtc->mode.clock) && 1795 if ((crtc->mode.clock == test_crtc->mode.clock) &&
1799 (adjusted_clock == test_adjusted_clock) && 1796 (adjusted_clock == test_adjusted_clock) &&
1800 (radeon_crtc->ss_enabled == test_radeon_crtc->ss_enabled) && 1797 (radeon_crtc->ss_enabled == test_radeon_crtc->ss_enabled) &&
1801 (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID) && 1798 (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID))
1802 (drm_detect_monitor_audio(radeon_connector_edid(test_radeon_crtc->connector)) ==
1803 drm_detect_monitor_audio(radeon_connector_edid(radeon_crtc->connector))))
1804 return test_radeon_crtc->pll_id; 1799 return test_radeon_crtc->pll_id;
1805 } 1800 }
1806 } 1801 }
diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c
index f04205170b8a..cfa3a84a2af0 100644
--- a/drivers/gpu/drm/radeon/dce3_1_afmt.c
+++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c
@@ -173,7 +173,7 @@ void dce3_2_hdmi_update_acr(struct drm_encoder *encoder, long offset,
173 struct drm_device *dev = encoder->dev; 173 struct drm_device *dev = encoder->dev;
174 struct radeon_device *rdev = dev->dev_private; 174 struct radeon_device *rdev = dev->dev_private;
175 175
176 WREG32(HDMI0_ACR_PACKET_CONTROL + offset, 176 WREG32(DCE3_HDMI0_ACR_PACKET_CONTROL + offset,
177 HDMI0_ACR_SOURCE | /* select SW CTS value */ 177 HDMI0_ACR_SOURCE | /* select SW CTS value */
178 HDMI0_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */ 178 HDMI0_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */
179 179
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index b7ca4c514621..a7fdfa4f0857 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1463,6 +1463,21 @@ int radeon_device_init(struct radeon_device *rdev,
1463 if (r) 1463 if (r)
1464 DRM_ERROR("ib ring test failed (%d).\n", r); 1464 DRM_ERROR("ib ring test failed (%d).\n", r);
1465 1465
1466 /*
1467 * Turks/Thames GPU will freeze whole laptop if DPM is not restarted
1468 * after the CP ring have chew one packet at least. Hence here we stop
1469 * and restart DPM after the radeon_ib_ring_tests().
1470 */
1471 if (rdev->pm.dpm_enabled &&
1472 (rdev->pm.pm_method == PM_METHOD_DPM) &&
1473 (rdev->family == CHIP_TURKS) &&
1474 (rdev->flags & RADEON_IS_MOBILITY)) {
1475 mutex_lock(&rdev->pm.mutex);
1476 radeon_dpm_disable(rdev);
1477 radeon_dpm_enable(rdev);
1478 mutex_unlock(&rdev->pm.mutex);
1479 }
1480
1466 if ((radeon_testing & 1)) { 1481 if ((radeon_testing & 1)) {
1467 if (rdev->accel_working) 1482 if (rdev->accel_working)
1468 radeon_test_moves(rdev); 1483 radeon_test_moves(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c
index 2b98ed3e684d..257b10be5cda 100644
--- a/drivers/gpu/drm/radeon/radeon_dp_mst.c
+++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c
@@ -663,12 +663,17 @@ int
663radeon_dp_mst_probe(struct radeon_connector *radeon_connector) 663radeon_dp_mst_probe(struct radeon_connector *radeon_connector)
664{ 664{
665 struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv; 665 struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
666 struct drm_device *dev = radeon_connector->base.dev;
667 struct radeon_device *rdev = dev->dev_private;
666 int ret; 668 int ret;
667 u8 msg[1]; 669 u8 msg[1];
668 670
669 if (!radeon_mst) 671 if (!radeon_mst)
670 return 0; 672 return 0;
671 673
674 if (!ASIC_IS_DCE5(rdev))
675 return 0;
676
672 if (dig_connector->dpcd[DP_DPCD_REV] < 0x12) 677 if (dig_connector->dpcd[DP_DPCD_REV] < 0x12)
673 return 0; 678 return 0;
674 679
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 7b2a7335cc5d..b0acf50d9558 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -576,6 +576,9 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
576 if (radeon_get_allowed_info_register(rdev, *value, value)) 576 if (radeon_get_allowed_info_register(rdev, *value, value))
577 return -EINVAL; 577 return -EINVAL;
578 break; 578 break;
579 case RADEON_INFO_VA_UNMAP_WORKING:
580 *value = true;
581 break;
579 default: 582 default:
580 DRM_DEBUG_KMS("Invalid request %d\n", info->request); 583 DRM_DEBUG_KMS("Invalid request %d\n", info->request);
581 return -EINVAL; 584 return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index de42fc4a22b8..9c3377ca17b7 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -458,14 +458,16 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
458 /* make sure object fit at this offset */ 458 /* make sure object fit at this offset */
459 eoffset = soffset + size; 459 eoffset = soffset + size;
460 if (soffset >= eoffset) { 460 if (soffset >= eoffset) {
461 return -EINVAL; 461 r = -EINVAL;
462 goto error_unreserve;
462 } 463 }
463 464
464 last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; 465 last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
465 if (last_pfn > rdev->vm_manager.max_pfn) { 466 if (last_pfn > rdev->vm_manager.max_pfn) {
466 dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", 467 dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
467 last_pfn, rdev->vm_manager.max_pfn); 468 last_pfn, rdev->vm_manager.max_pfn);
468 return -EINVAL; 469 r = -EINVAL;
470 goto error_unreserve;
469 } 471 }
470 472
471 } else { 473 } else {
@@ -486,7 +488,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
486 "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, 488 "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
487 soffset, tmp->bo, tmp->it.start, tmp->it.last); 489 soffset, tmp->bo, tmp->it.start, tmp->it.last);
488 mutex_unlock(&vm->mutex); 490 mutex_unlock(&vm->mutex);
489 return -EINVAL; 491 r = -EINVAL;
492 goto error_unreserve;
490 } 493 }
491 } 494 }
492 495
@@ -497,7 +500,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
497 tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 500 tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
498 if (!tmp) { 501 if (!tmp) {
499 mutex_unlock(&vm->mutex); 502 mutex_unlock(&vm->mutex);
500 return -ENOMEM; 503 r = -ENOMEM;
504 goto error_unreserve;
501 } 505 }
502 tmp->it.start = bo_va->it.start; 506 tmp->it.start = bo_va->it.start;
503 tmp->it.last = bo_va->it.last; 507 tmp->it.last = bo_va->it.last;
@@ -555,7 +559,6 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
555 r = radeon_vm_clear_bo(rdev, pt); 559 r = radeon_vm_clear_bo(rdev, pt);
556 if (r) { 560 if (r) {
557 radeon_bo_unref(&pt); 561 radeon_bo_unref(&pt);
558 radeon_bo_reserve(bo_va->bo, false);
559 return r; 562 return r;
560 } 563 }
561 564
@@ -575,6 +578,10 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
575 578
576 mutex_unlock(&vm->mutex); 579 mutex_unlock(&vm->mutex);
577 return 0; 580 return 0;
581
582error_unreserve:
583 radeon_bo_unreserve(bo_va->bo);
584 return r;
578} 585}
579 586
580/** 587/**
diff --git a/drivers/i2c/busses/i2c-hix5hd2.c b/drivers/i2c/busses/i2c-hix5hd2.c
index 8fe78d08e01c..7c6966434ee7 100644
--- a/drivers/i2c/busses/i2c-hix5hd2.c
+++ b/drivers/i2c/busses/i2c-hix5hd2.c
@@ -554,4 +554,4 @@ module_platform_driver(hix5hd2_i2c_driver);
554MODULE_DESCRIPTION("Hix5hd2 I2C Bus driver"); 554MODULE_DESCRIPTION("Hix5hd2 I2C Bus driver");
555MODULE_AUTHOR("Wei Yan <sledge.yanwei@huawei.com>"); 555MODULE_AUTHOR("Wei Yan <sledge.yanwei@huawei.com>");
556MODULE_LICENSE("GPL"); 556MODULE_LICENSE("GPL");
557MODULE_ALIAS("platform:i2c-hix5hd2"); 557MODULE_ALIAS("platform:hix5hd2-i2c");
diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 958c8db4ec30..297e9c9ac943 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -1143,6 +1143,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
1143 return -ENOMEM; 1143 return -ENOMEM;
1144 1144
1145 i2c->quirks = s3c24xx_get_device_quirks(pdev); 1145 i2c->quirks = s3c24xx_get_device_quirks(pdev);
1146 i2c->sysreg = ERR_PTR(-ENOENT);
1146 if (pdata) 1147 if (pdata)
1147 memcpy(i2c->pdata, pdata, sizeof(*pdata)); 1148 memcpy(i2c->pdata, pdata, sizeof(*pdata));
1148 else 1149 else
diff --git a/drivers/iio/adc/twl6030-gpadc.c b/drivers/iio/adc/twl6030-gpadc.c
index 89d8aa1d2818..df12c57e6ce0 100644
--- a/drivers/iio/adc/twl6030-gpadc.c
+++ b/drivers/iio/adc/twl6030-gpadc.c
@@ -1001,7 +1001,7 @@ static struct platform_driver twl6030_gpadc_driver = {
1001 1001
1002module_platform_driver(twl6030_gpadc_driver); 1002module_platform_driver(twl6030_gpadc_driver);
1003 1003
1004MODULE_ALIAS("platform: " DRIVER_NAME); 1004MODULE_ALIAS("platform:" DRIVER_NAME);
1005MODULE_AUTHOR("Balaji T K <balajitk@ti.com>"); 1005MODULE_AUTHOR("Balaji T K <balajitk@ti.com>");
1006MODULE_AUTHOR("Graeme Gregory <gg@slimlogic.co.uk>"); 1006MODULE_AUTHOR("Graeme Gregory <gg@slimlogic.co.uk>");
1007MODULE_AUTHOR("Oleksandr Kozaruk <oleksandr.kozaruk@ti.com"); 1007MODULE_AUTHOR("Oleksandr Kozaruk <oleksandr.kozaruk@ti.com");
diff --git a/drivers/iio/imu/adis16400.h b/drivers/iio/imu/adis16400.h
index 0916bf6b6c31..73b189c1c0fb 100644
--- a/drivers/iio/imu/adis16400.h
+++ b/drivers/iio/imu/adis16400.h
@@ -139,6 +139,7 @@
139#define ADIS16400_NO_BURST BIT(1) 139#define ADIS16400_NO_BURST BIT(1)
140#define ADIS16400_HAS_SLOW_MODE BIT(2) 140#define ADIS16400_HAS_SLOW_MODE BIT(2)
141#define ADIS16400_HAS_SERIAL_NUMBER BIT(3) 141#define ADIS16400_HAS_SERIAL_NUMBER BIT(3)
142#define ADIS16400_BURST_DIAG_STAT BIT(4)
142 143
143struct adis16400_state; 144struct adis16400_state;
144 145
@@ -165,6 +166,7 @@ struct adis16400_state {
165 int filt_int; 166 int filt_int;
166 167
167 struct adis adis; 168 struct adis adis;
169 unsigned long avail_scan_mask[2];
168}; 170};
169 171
170/* At the moment triggers are only used for ring buffer 172/* At the moment triggers are only used for ring buffer
diff --git a/drivers/iio/imu/adis16400_buffer.c b/drivers/iio/imu/adis16400_buffer.c
index 6e727ffe5262..90c24a23c679 100644
--- a/drivers/iio/imu/adis16400_buffer.c
+++ b/drivers/iio/imu/adis16400_buffer.c
@@ -18,7 +18,8 @@ int adis16400_update_scan_mode(struct iio_dev *indio_dev,
18{ 18{
19 struct adis16400_state *st = iio_priv(indio_dev); 19 struct adis16400_state *st = iio_priv(indio_dev);
20 struct adis *adis = &st->adis; 20 struct adis *adis = &st->adis;
21 uint16_t *tx; 21 unsigned int burst_length;
22 u8 *tx;
22 23
23 if (st->variant->flags & ADIS16400_NO_BURST) 24 if (st->variant->flags & ADIS16400_NO_BURST)
24 return adis_update_scan_mode(indio_dev, scan_mask); 25 return adis_update_scan_mode(indio_dev, scan_mask);
@@ -26,26 +27,29 @@ int adis16400_update_scan_mode(struct iio_dev *indio_dev,
26 kfree(adis->xfer); 27 kfree(adis->xfer);
27 kfree(adis->buffer); 28 kfree(adis->buffer);
28 29
30 /* All but the timestamp channel */
31 burst_length = (indio_dev->num_channels - 1) * sizeof(u16);
32 if (st->variant->flags & ADIS16400_BURST_DIAG_STAT)
33 burst_length += sizeof(u16);
34
29 adis->xfer = kcalloc(2, sizeof(*adis->xfer), GFP_KERNEL); 35 adis->xfer = kcalloc(2, sizeof(*adis->xfer), GFP_KERNEL);
30 if (!adis->xfer) 36 if (!adis->xfer)
31 return -ENOMEM; 37 return -ENOMEM;
32 38
33 adis->buffer = kzalloc(indio_dev->scan_bytes + sizeof(u16), 39 adis->buffer = kzalloc(burst_length + sizeof(u16), GFP_KERNEL);
34 GFP_KERNEL);
35 if (!adis->buffer) 40 if (!adis->buffer)
36 return -ENOMEM; 41 return -ENOMEM;
37 42
38 tx = adis->buffer + indio_dev->scan_bytes; 43 tx = adis->buffer + burst_length;
39
40 tx[0] = ADIS_READ_REG(ADIS16400_GLOB_CMD); 44 tx[0] = ADIS_READ_REG(ADIS16400_GLOB_CMD);
41 tx[1] = 0; 45 tx[1] = 0;
42 46
43 adis->xfer[0].tx_buf = tx; 47 adis->xfer[0].tx_buf = tx;
44 adis->xfer[0].bits_per_word = 8; 48 adis->xfer[0].bits_per_word = 8;
45 adis->xfer[0].len = 2; 49 adis->xfer[0].len = 2;
46 adis->xfer[1].tx_buf = tx; 50 adis->xfer[1].rx_buf = adis->buffer;
47 adis->xfer[1].bits_per_word = 8; 51 adis->xfer[1].bits_per_word = 8;
48 adis->xfer[1].len = indio_dev->scan_bytes; 52 adis->xfer[1].len = burst_length;
49 53
50 spi_message_init(&adis->msg); 54 spi_message_init(&adis->msg);
51 spi_message_add_tail(&adis->xfer[0], &adis->msg); 55 spi_message_add_tail(&adis->xfer[0], &adis->msg);
@@ -61,6 +65,7 @@ irqreturn_t adis16400_trigger_handler(int irq, void *p)
61 struct adis16400_state *st = iio_priv(indio_dev); 65 struct adis16400_state *st = iio_priv(indio_dev);
62 struct adis *adis = &st->adis; 66 struct adis *adis = &st->adis;
63 u32 old_speed_hz = st->adis.spi->max_speed_hz; 67 u32 old_speed_hz = st->adis.spi->max_speed_hz;
68 void *buffer;
64 int ret; 69 int ret;
65 70
66 if (!adis->buffer) 71 if (!adis->buffer)
@@ -81,7 +86,12 @@ irqreturn_t adis16400_trigger_handler(int irq, void *p)
81 spi_setup(st->adis.spi); 86 spi_setup(st->adis.spi);
82 } 87 }
83 88
84 iio_push_to_buffers_with_timestamp(indio_dev, adis->buffer, 89 if (st->variant->flags & ADIS16400_BURST_DIAG_STAT)
90 buffer = adis->buffer + sizeof(u16);
91 else
92 buffer = adis->buffer;
93
94 iio_push_to_buffers_with_timestamp(indio_dev, buffer,
85 pf->timestamp); 95 pf->timestamp);
86 96
87 iio_trigger_notify_done(indio_dev->trig); 97 iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/iio/imu/adis16400_core.c b/drivers/iio/imu/adis16400_core.c
index fa795dcd5f75..2fd68f2219a7 100644
--- a/drivers/iio/imu/adis16400_core.c
+++ b/drivers/iio/imu/adis16400_core.c
@@ -405,6 +405,11 @@ static int adis16400_read_raw(struct iio_dev *indio_dev,
405 *val = st->variant->temp_scale_nano / 1000000; 405 *val = st->variant->temp_scale_nano / 1000000;
406 *val2 = (st->variant->temp_scale_nano % 1000000); 406 *val2 = (st->variant->temp_scale_nano % 1000000);
407 return IIO_VAL_INT_PLUS_MICRO; 407 return IIO_VAL_INT_PLUS_MICRO;
408 case IIO_PRESSURE:
409 /* 20 uBar = 0.002kPascal */
410 *val = 0;
411 *val2 = 2000;
412 return IIO_VAL_INT_PLUS_MICRO;
408 default: 413 default:
409 return -EINVAL; 414 return -EINVAL;
410 } 415 }
@@ -454,10 +459,10 @@ static int adis16400_read_raw(struct iio_dev *indio_dev,
454 } 459 }
455} 460}
456 461
457#define ADIS16400_VOLTAGE_CHAN(addr, bits, name, si) { \ 462#define ADIS16400_VOLTAGE_CHAN(addr, bits, name, si, chn) { \
458 .type = IIO_VOLTAGE, \ 463 .type = IIO_VOLTAGE, \
459 .indexed = 1, \ 464 .indexed = 1, \
460 .channel = 0, \ 465 .channel = chn, \
461 .extend_name = name, \ 466 .extend_name = name, \
462 .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ 467 .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \
463 BIT(IIO_CHAN_INFO_SCALE), \ 468 BIT(IIO_CHAN_INFO_SCALE), \
@@ -474,10 +479,10 @@ static int adis16400_read_raw(struct iio_dev *indio_dev,
474} 479}
475 480
476#define ADIS16400_SUPPLY_CHAN(addr, bits) \ 481#define ADIS16400_SUPPLY_CHAN(addr, bits) \
477 ADIS16400_VOLTAGE_CHAN(addr, bits, "supply", ADIS16400_SCAN_SUPPLY) 482 ADIS16400_VOLTAGE_CHAN(addr, bits, "supply", ADIS16400_SCAN_SUPPLY, 0)
478 483
479#define ADIS16400_AUX_ADC_CHAN(addr, bits) \ 484#define ADIS16400_AUX_ADC_CHAN(addr, bits) \
480 ADIS16400_VOLTAGE_CHAN(addr, bits, NULL, ADIS16400_SCAN_ADC) 485 ADIS16400_VOLTAGE_CHAN(addr, bits, NULL, ADIS16400_SCAN_ADC, 1)
481 486
482#define ADIS16400_GYRO_CHAN(mod, addr, bits) { \ 487#define ADIS16400_GYRO_CHAN(mod, addr, bits) { \
483 .type = IIO_ANGL_VEL, \ 488 .type = IIO_ANGL_VEL, \
@@ -773,7 +778,8 @@ static struct adis16400_chip_info adis16400_chips[] = {
773 .channels = adis16448_channels, 778 .channels = adis16448_channels,
774 .num_channels = ARRAY_SIZE(adis16448_channels), 779 .num_channels = ARRAY_SIZE(adis16448_channels),
775 .flags = ADIS16400_HAS_PROD_ID | 780 .flags = ADIS16400_HAS_PROD_ID |
776 ADIS16400_HAS_SERIAL_NUMBER, 781 ADIS16400_HAS_SERIAL_NUMBER |
782 ADIS16400_BURST_DIAG_STAT,
777 .gyro_scale_micro = IIO_DEGREE_TO_RAD(10000), /* 0.01 deg/s */ 783 .gyro_scale_micro = IIO_DEGREE_TO_RAD(10000), /* 0.01 deg/s */
778 .accel_scale_micro = IIO_G_TO_M_S_2(833), /* 1/1200 g */ 784 .accel_scale_micro = IIO_G_TO_M_S_2(833), /* 1/1200 g */
779 .temp_scale_nano = 73860000, /* 0.07386 C */ 785 .temp_scale_nano = 73860000, /* 0.07386 C */
@@ -791,11 +797,6 @@ static const struct iio_info adis16400_info = {
791 .debugfs_reg_access = adis_debugfs_reg_access, 797 .debugfs_reg_access = adis_debugfs_reg_access,
792}; 798};
793 799
794static const unsigned long adis16400_burst_scan_mask[] = {
795 ~0UL,
796 0,
797};
798
799static const char * const adis16400_status_error_msgs[] = { 800static const char * const adis16400_status_error_msgs[] = {
800 [ADIS16400_DIAG_STAT_ZACCL_FAIL] = "Z-axis accelerometer self-test failure", 801 [ADIS16400_DIAG_STAT_ZACCL_FAIL] = "Z-axis accelerometer self-test failure",
801 [ADIS16400_DIAG_STAT_YACCL_FAIL] = "Y-axis accelerometer self-test failure", 802 [ADIS16400_DIAG_STAT_YACCL_FAIL] = "Y-axis accelerometer self-test failure",
@@ -843,6 +844,20 @@ static const struct adis_data adis16400_data = {
843 BIT(ADIS16400_DIAG_STAT_POWER_LOW), 844 BIT(ADIS16400_DIAG_STAT_POWER_LOW),
844}; 845};
845 846
847static void adis16400_setup_chan_mask(struct adis16400_state *st)
848{
849 const struct adis16400_chip_info *chip_info = st->variant;
850 unsigned i;
851
852 for (i = 0; i < chip_info->num_channels; i++) {
853 const struct iio_chan_spec *ch = &chip_info->channels[i];
854
855 if (ch->scan_index >= 0 &&
856 ch->scan_index != ADIS16400_SCAN_TIMESTAMP)
857 st->avail_scan_mask[0] |= BIT(ch->scan_index);
858 }
859}
860
846static int adis16400_probe(struct spi_device *spi) 861static int adis16400_probe(struct spi_device *spi)
847{ 862{
848 struct adis16400_state *st; 863 struct adis16400_state *st;
@@ -866,8 +881,10 @@ static int adis16400_probe(struct spi_device *spi)
866 indio_dev->info = &adis16400_info; 881 indio_dev->info = &adis16400_info;
867 indio_dev->modes = INDIO_DIRECT_MODE; 882 indio_dev->modes = INDIO_DIRECT_MODE;
868 883
869 if (!(st->variant->flags & ADIS16400_NO_BURST)) 884 if (!(st->variant->flags & ADIS16400_NO_BURST)) {
870 indio_dev->available_scan_masks = adis16400_burst_scan_mask; 885 adis16400_setup_chan_mask(st);
886 indio_dev->available_scan_masks = st->avail_scan_mask;
887 }
871 888
872 ret = adis_init(&st->adis, indio_dev, spi, &adis16400_data); 889 ret = adis_init(&st->adis, indio_dev, spi, &adis16400_data);
873 if (ret) 890 if (ret)
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 3f40319a55da..575a072d765f 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -65,6 +65,8 @@ static int
65isert_rdma_accept(struct isert_conn *isert_conn); 65isert_rdma_accept(struct isert_conn *isert_conn);
66struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np); 66struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np);
67 67
68static void isert_release_work(struct work_struct *work);
69
68static inline bool 70static inline bool
69isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd) 71isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
70{ 72{
@@ -648,6 +650,7 @@ isert_init_conn(struct isert_conn *isert_conn)
648 mutex_init(&isert_conn->mutex); 650 mutex_init(&isert_conn->mutex);
649 spin_lock_init(&isert_conn->pool_lock); 651 spin_lock_init(&isert_conn->pool_lock);
650 INIT_LIST_HEAD(&isert_conn->fr_pool); 652 INIT_LIST_HEAD(&isert_conn->fr_pool);
653 INIT_WORK(&isert_conn->release_work, isert_release_work);
651} 654}
652 655
653static void 656static void
@@ -925,6 +928,7 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
925{ 928{
926 struct isert_np *isert_np = cma_id->context; 929 struct isert_np *isert_np = cma_id->context;
927 struct isert_conn *isert_conn; 930 struct isert_conn *isert_conn;
931 bool terminating = false;
928 932
929 if (isert_np->np_cm_id == cma_id) 933 if (isert_np->np_cm_id == cma_id)
930 return isert_np_cma_handler(cma_id->context, event); 934 return isert_np_cma_handler(cma_id->context, event);
@@ -932,12 +936,25 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
932 isert_conn = cma_id->qp->qp_context; 936 isert_conn = cma_id->qp->qp_context;
933 937
934 mutex_lock(&isert_conn->mutex); 938 mutex_lock(&isert_conn->mutex);
939 terminating = (isert_conn->state == ISER_CONN_TERMINATING);
935 isert_conn_terminate(isert_conn); 940 isert_conn_terminate(isert_conn);
936 mutex_unlock(&isert_conn->mutex); 941 mutex_unlock(&isert_conn->mutex);
937 942
938 isert_info("conn %p completing wait\n", isert_conn); 943 isert_info("conn %p completing wait\n", isert_conn);
939 complete(&isert_conn->wait); 944 complete(&isert_conn->wait);
940 945
946 if (terminating)
947 goto out;
948
949 mutex_lock(&isert_np->np_accept_mutex);
950 if (!list_empty(&isert_conn->accept_node)) {
951 list_del_init(&isert_conn->accept_node);
952 isert_put_conn(isert_conn);
953 queue_work(isert_release_wq, &isert_conn->release_work);
954 }
955 mutex_unlock(&isert_np->np_accept_mutex);
956
957out:
941 return 0; 958 return 0;
942} 959}
943 960
@@ -2380,7 +2397,6 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
2380 page_off = offset % PAGE_SIZE; 2397 page_off = offset % PAGE_SIZE;
2381 2398
2382 send_wr->sg_list = ib_sge; 2399 send_wr->sg_list = ib_sge;
2383 send_wr->num_sge = sg_nents;
2384 send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc; 2400 send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
2385 /* 2401 /*
2386 * Perform mapping of TCM scatterlist memory ib_sge dma_addr. 2402 * Perform mapping of TCM scatterlist memory ib_sge dma_addr.
@@ -2400,14 +2416,17 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
2400 ib_sge->addr, ib_sge->length, ib_sge->lkey); 2416 ib_sge->addr, ib_sge->length, ib_sge->lkey);
2401 page_off = 0; 2417 page_off = 0;
2402 data_left -= ib_sge->length; 2418 data_left -= ib_sge->length;
2419 if (!data_left)
2420 break;
2403 ib_sge++; 2421 ib_sge++;
2404 isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge); 2422 isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge);
2405 } 2423 }
2406 2424
2425 send_wr->num_sge = ++i;
2407 isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n", 2426 isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
2408 send_wr->sg_list, send_wr->num_sge); 2427 send_wr->sg_list, send_wr->num_sge);
2409 2428
2410 return sg_nents; 2429 return send_wr->num_sge;
2411} 2430}
2412 2431
2413static int 2432static int
@@ -3366,7 +3385,6 @@ static void isert_wait_conn(struct iscsi_conn *conn)
3366 isert_wait4flush(isert_conn); 3385 isert_wait4flush(isert_conn);
3367 isert_wait4logout(isert_conn); 3386 isert_wait4logout(isert_conn);
3368 3387
3369 INIT_WORK(&isert_conn->release_work, isert_release_work);
3370 queue_work(isert_release_wq, &isert_conn->release_work); 3388 queue_work(isert_release_wq, &isert_conn->release_work);
3371} 3389}
3372 3390
@@ -3374,6 +3392,7 @@ static void isert_free_conn(struct iscsi_conn *conn)
3374{ 3392{
3375 struct isert_conn *isert_conn = conn->context; 3393 struct isert_conn *isert_conn = conn->context;
3376 3394
3395 isert_wait4flush(isert_conn);
3377 isert_put_conn(isert_conn); 3396 isert_put_conn(isert_conn);
3378} 3397}
3379 3398
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 7752bd59d4b7..a353b7de6d22 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -1063,9 +1063,8 @@ static void alps_process_trackstick_packet_v7(struct psmouse *psmouse)
1063 right = (packet[1] & 0x02) >> 1; 1063 right = (packet[1] & 0x02) >> 1;
1064 middle = (packet[1] & 0x04) >> 2; 1064 middle = (packet[1] & 0x04) >> 2;
1065 1065
1066 /* Divide 2 since trackpoint's speed is too fast */ 1066 input_report_rel(dev2, REL_X, (char)x);
1067 input_report_rel(dev2, REL_X, (char)x / 2); 1067 input_report_rel(dev2, REL_Y, -((char)y));
1068 input_report_rel(dev2, REL_Y, -((char)y / 2));
1069 1068
1070 input_report_key(dev2, BTN_LEFT, left); 1069 input_report_key(dev2, BTN_LEFT, left);
1071 input_report_key(dev2, BTN_RIGHT, right); 1070 input_report_key(dev2, BTN_RIGHT, right);
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index 79363b687195..ce3d40004458 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1376,10 +1376,11 @@ static bool elantech_is_signature_valid(const unsigned char *param)
1376 return true; 1376 return true;
1377 1377
1378 /* 1378 /*
1379 * Some models have a revision higher then 20. Meaning param[2] may 1379 * Some hw_version >= 4 models have a revision higher then 20. Meaning
1380 * be 10 or 20, skip the rates check for these. 1380 * that param[2] may be 10 or 20, skip the rates check for these.
1381 */ 1381 */
1382 if (param[0] == 0x46 && (param[1] & 0xef) == 0x0f && param[2] < 40) 1382 if ((param[0] & 0x0f) >= 0x06 && (param[1] & 0xaf) == 0x0f &&
1383 param[2] < 40)
1383 return true; 1384 return true;
1384 1385
1385 for (i = 0; i < ARRAY_SIZE(rates); i++) 1386 for (i = 0; i < ARRAY_SIZE(rates); i++)
@@ -1555,6 +1556,7 @@ static int elantech_set_properties(struct elantech_data *etd)
1555 case 9: 1556 case 9:
1556 case 10: 1557 case 10:
1557 case 13: 1558 case 13:
1559 case 14:
1558 etd->hw_version = 4; 1560 etd->hw_version = 4;
1559 break; 1561 break;
1560 default: 1562 default:
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 630af73e98c4..35c8d0ceabee 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -151,6 +151,11 @@ static const struct min_max_quirk min_max_pnpid_table[] = {
151 1024, 5112, 2024, 4832 151 1024, 5112, 2024, 4832
152 }, 152 },
153 { 153 {
154 (const char * const []){"LEN2000", NULL},
155 {ANY_BOARD_ID, ANY_BOARD_ID},
156 1024, 5113, 2021, 4832
157 },
158 {
154 (const char * const []){"LEN2001", NULL}, 159 (const char * const []){"LEN2001", NULL},
155 {ANY_BOARD_ID, ANY_BOARD_ID}, 160 {ANY_BOARD_ID, ANY_BOARD_ID},
156 1024, 5022, 2508, 4832 161 1024, 5022, 2508, 4832
@@ -191,7 +196,7 @@ static const char * const topbuttonpad_pnp_ids[] = {
191 "LEN0045", 196 "LEN0045",
192 "LEN0047", 197 "LEN0047",
193 "LEN0049", 198 "LEN0049",
194 "LEN2000", 199 "LEN2000", /* S540 */
195 "LEN2001", /* Edge E431 */ 200 "LEN2001", /* Edge E431 */
196 "LEN2002", /* Edge E531 */ 201 "LEN2002", /* Edge E531 */
197 "LEN2003", 202 "LEN2003",
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index e43d48956dea..e1c7e9e51045 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2930,6 +2930,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
2930 size = PAGE_ALIGN(size); 2930 size = PAGE_ALIGN(size);
2931 dma_mask = dev->coherent_dma_mask; 2931 dma_mask = dev->coherent_dma_mask;
2932 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); 2932 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
2933 flag |= __GFP_ZERO;
2933 2934
2934 page = alloc_pages(flag | __GFP_NOWARN, get_order(size)); 2935 page = alloc_pages(flag | __GFP_NOWARN, get_order(size));
2935 if (!page) { 2936 if (!page) {
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 68d43beccb7e..5ecfaf29933a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -422,6 +422,14 @@ static int dmar_map_gfx = 1;
422static int dmar_forcedac; 422static int dmar_forcedac;
423static int intel_iommu_strict; 423static int intel_iommu_strict;
424static int intel_iommu_superpage = 1; 424static int intel_iommu_superpage = 1;
425static int intel_iommu_ecs = 1;
426
427/* We only actually use ECS when PASID support (on the new bit 40)
428 * is also advertised. Some early implementations — the ones with
429 * PASID support on bit 28 — have issues even when we *only* use
430 * extended root/context tables. */
431#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
432 ecap_pasid(iommu->ecap))
425 433
426int intel_iommu_gfx_mapped; 434int intel_iommu_gfx_mapped;
427EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); 435EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
@@ -465,6 +473,10 @@ static int __init intel_iommu_setup(char *str)
465 printk(KERN_INFO 473 printk(KERN_INFO
466 "Intel-IOMMU: disable supported super page\n"); 474 "Intel-IOMMU: disable supported super page\n");
467 intel_iommu_superpage = 0; 475 intel_iommu_superpage = 0;
476 } else if (!strncmp(str, "ecs_off", 7)) {
477 printk(KERN_INFO
478 "Intel-IOMMU: disable extended context table support\n");
479 intel_iommu_ecs = 0;
468 } 480 }
469 481
470 str += strcspn(str, ","); 482 str += strcspn(str, ",");
@@ -669,7 +681,7 @@ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu
669 struct context_entry *context; 681 struct context_entry *context;
670 u64 *entry; 682 u64 *entry;
671 683
672 if (ecap_ecs(iommu->ecap)) { 684 if (ecs_enabled(iommu)) {
673 if (devfn >= 0x80) { 685 if (devfn >= 0x80) {
674 devfn -= 0x80; 686 devfn -= 0x80;
675 entry = &root->hi; 687 entry = &root->hi;
@@ -696,6 +708,11 @@ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu
696 return &context[devfn]; 708 return &context[devfn];
697} 709}
698 710
711static int iommu_dummy(struct device *dev)
712{
713 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
714}
715
699static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) 716static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
700{ 717{
701 struct dmar_drhd_unit *drhd = NULL; 718 struct dmar_drhd_unit *drhd = NULL;
@@ -705,6 +722,9 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
705 u16 segment = 0; 722 u16 segment = 0;
706 int i; 723 int i;
707 724
725 if (iommu_dummy(dev))
726 return NULL;
727
708 if (dev_is_pci(dev)) { 728 if (dev_is_pci(dev)) {
709 pdev = to_pci_dev(dev); 729 pdev = to_pci_dev(dev);
710 segment = pci_domain_nr(pdev->bus); 730 segment = pci_domain_nr(pdev->bus);
@@ -798,7 +818,7 @@ static void free_context_table(struct intel_iommu *iommu)
798 if (context) 818 if (context)
799 free_pgtable_page(context); 819 free_pgtable_page(context);
800 820
801 if (!ecap_ecs(iommu->ecap)) 821 if (!ecs_enabled(iommu))
802 continue; 822 continue;
803 823
804 context = iommu_context_addr(iommu, i, 0x80, 0); 824 context = iommu_context_addr(iommu, i, 0x80, 0);
@@ -1133,7 +1153,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
1133 unsigned long flag; 1153 unsigned long flag;
1134 1154
1135 addr = virt_to_phys(iommu->root_entry); 1155 addr = virt_to_phys(iommu->root_entry);
1136 if (ecap_ecs(iommu->ecap)) 1156 if (ecs_enabled(iommu))
1137 addr |= DMA_RTADDR_RTT; 1157 addr |= DMA_RTADDR_RTT;
1138 1158
1139 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1159 raw_spin_lock_irqsave(&iommu->register_lock, flag);
@@ -2969,11 +2989,6 @@ static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
2969 return __get_valid_domain_for_dev(dev); 2989 return __get_valid_domain_for_dev(dev);
2970} 2990}
2971 2991
2972static int iommu_dummy(struct device *dev)
2973{
2974 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2975}
2976
2977/* Check if the dev needs to go through non-identity map and unmap process.*/ 2992/* Check if the dev needs to go through non-identity map and unmap process.*/
2978static int iommu_no_mapping(struct device *dev) 2993static int iommu_no_mapping(struct device *dev)
2979{ 2994{
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 57f09cb54464..269c2354c431 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -271,7 +271,7 @@ int gic_get_c0_fdc_int(void)
271 GIC_LOCAL_TO_HWIRQ(GIC_LOCAL_INT_FDC)); 271 GIC_LOCAL_TO_HWIRQ(GIC_LOCAL_INT_FDC));
272} 272}
273 273
274static void gic_handle_shared_int(void) 274static void gic_handle_shared_int(bool chained)
275{ 275{
276 unsigned int i, intr, virq; 276 unsigned int i, intr, virq;
277 unsigned long *pcpu_mask; 277 unsigned long *pcpu_mask;
@@ -299,7 +299,10 @@ static void gic_handle_shared_int(void)
299 while (intr != gic_shared_intrs) { 299 while (intr != gic_shared_intrs) {
300 virq = irq_linear_revmap(gic_irq_domain, 300 virq = irq_linear_revmap(gic_irq_domain,
301 GIC_SHARED_TO_HWIRQ(intr)); 301 GIC_SHARED_TO_HWIRQ(intr));
302 do_IRQ(virq); 302 if (chained)
303 generic_handle_irq(virq);
304 else
305 do_IRQ(virq);
303 306
304 /* go to next pending bit */ 307 /* go to next pending bit */
305 bitmap_clear(pending, intr, 1); 308 bitmap_clear(pending, intr, 1);
@@ -431,7 +434,7 @@ static struct irq_chip gic_edge_irq_controller = {
431#endif 434#endif
432}; 435};
433 436
434static void gic_handle_local_int(void) 437static void gic_handle_local_int(bool chained)
435{ 438{
436 unsigned long pending, masked; 439 unsigned long pending, masked;
437 unsigned int intr, virq; 440 unsigned int intr, virq;
@@ -445,7 +448,10 @@ static void gic_handle_local_int(void)
445 while (intr != GIC_NUM_LOCAL_INTRS) { 448 while (intr != GIC_NUM_LOCAL_INTRS) {
446 virq = irq_linear_revmap(gic_irq_domain, 449 virq = irq_linear_revmap(gic_irq_domain,
447 GIC_LOCAL_TO_HWIRQ(intr)); 450 GIC_LOCAL_TO_HWIRQ(intr));
448 do_IRQ(virq); 451 if (chained)
452 generic_handle_irq(virq);
453 else
454 do_IRQ(virq);
449 455
450 /* go to next pending bit */ 456 /* go to next pending bit */
451 bitmap_clear(&pending, intr, 1); 457 bitmap_clear(&pending, intr, 1);
@@ -509,13 +515,14 @@ static struct irq_chip gic_all_vpes_local_irq_controller = {
509 515
510static void __gic_irq_dispatch(void) 516static void __gic_irq_dispatch(void)
511{ 517{
512 gic_handle_local_int(); 518 gic_handle_local_int(false);
513 gic_handle_shared_int(); 519 gic_handle_shared_int(false);
514} 520}
515 521
516static void gic_irq_dispatch(unsigned int irq, struct irq_desc *desc) 522static void gic_irq_dispatch(unsigned int irq, struct irq_desc *desc)
517{ 523{
518 __gic_irq_dispatch(); 524 gic_handle_local_int(true);
525 gic_handle_shared_int(true);
519} 526}
520 527
521#ifdef CONFIG_MIPS_GIC_IPI 528#ifdef CONFIG_MIPS_GIC_IPI
diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index 4a9ce5b50c5b..6b2b582433bd 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c
@@ -104,7 +104,7 @@ static int sunxi_sc_nmi_set_type(struct irq_data *data, unsigned int flow_type)
104 irqd_set_trigger_type(data, flow_type); 104 irqd_set_trigger_type(data, flow_type);
105 irq_setup_alt_chip(data, flow_type); 105 irq_setup_alt_chip(data, flow_type);
106 106
107 for (i = 0; i <= gc->num_ct; i++, ct++) 107 for (i = 0; i < gc->num_ct; i++, ct++)
108 if (ct->type & flow_type) 108 if (ct->type & flow_type)
109 ctrl_off = ct->regs.type; 109 ctrl_off = ct->regs.type;
110 110
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 27506302eb7a..4dbed4a67aaf 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3834,7 +3834,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
3834 err = -EBUSY; 3834 err = -EBUSY;
3835 } 3835 }
3836 spin_unlock(&mddev->lock); 3836 spin_unlock(&mddev->lock);
3837 return err; 3837 return err ?: len;
3838 } 3838 }
3839 err = mddev_lock(mddev); 3839 err = mddev_lock(mddev);
3840 if (err) 3840 if (err)
@@ -4217,13 +4217,14 @@ action_store(struct mddev *mddev, const char *page, size_t len)
4217 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4217 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4218 else 4218 else
4219 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4219 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4220 flush_workqueue(md_misc_wq); 4220 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4221 if (mddev->sync_thread) { 4221 mddev_lock(mddev) == 0) {
4222 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 4222 flush_workqueue(md_misc_wq);
4223 if (mddev_lock(mddev) == 0) { 4223 if (mddev->sync_thread) {
4224 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4224 md_reap_sync_thread(mddev); 4225 md_reap_sync_thread(mddev);
4225 mddev_unlock(mddev);
4226 } 4226 }
4227 mddev_unlock(mddev);
4227 } 4228 }
4228 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || 4229 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4229 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) 4230 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
@@ -8261,6 +8262,7 @@ void md_reap_sync_thread(struct mddev *mddev)
8261 if (mddev_is_clustered(mddev)) 8262 if (mddev_is_clustered(mddev))
8262 md_cluster_ops->metadata_update_finish(mddev); 8263 md_cluster_ops->metadata_update_finish(mddev);
8263 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 8264 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8265 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8264 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 8266 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8265 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); 8267 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8266 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); 8268 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index e793ab6b3570..f55c3f35b746 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4156,6 +4156,7 @@ static int raid10_start_reshape(struct mddev *mddev)
4156 4156
4157 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 4157 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4158 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 4158 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4159 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
4159 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); 4160 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
4160 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 4161 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
4161 4162
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 553d54b87052..b6793d2e051f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7354,6 +7354,7 @@ static int raid5_start_reshape(struct mddev *mddev)
7354 7354
7355 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 7355 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7356 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 7356 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7357 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
7357 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); 7358 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7358 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 7359 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7359 mddev->sync_thread = md_register_thread(md_do_sync, mddev, 7360 mddev->sync_thread = md_register_thread(md_do_sync, mddev,
diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
index 3ef0f90b128f..157099243d61 100644
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig
@@ -97,6 +97,7 @@ config MEDIA_CONTROLLER
97config MEDIA_CONTROLLER_DVB 97config MEDIA_CONTROLLER_DVB
98 bool "Enable Media controller for DVB" 98 bool "Enable Media controller for DVB"
99 depends on MEDIA_CONTROLLER 99 depends on MEDIA_CONTROLLER
100 depends on BROKEN
100 ---help--- 101 ---help---
101 Enable the media controller API support for DVB. 102 Enable the media controller API support for DVB.
102 103
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index db84ddcfec84..9fd6c69a8bac 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -423,7 +423,7 @@ static void xgbe_tx_timer(unsigned long data)
423 if (napi_schedule_prep(napi)) { 423 if (napi_schedule_prep(napi)) {
424 /* Disable Tx and Rx interrupts */ 424 /* Disable Tx and Rx interrupts */
425 if (pdata->per_channel_irq) 425 if (pdata->per_channel_irq)
426 disable_irq(channel->dma_irq); 426 disable_irq_nosync(channel->dma_irq);
427 else 427 else
428 xgbe_disable_rx_tx_ints(pdata); 428 xgbe_disable_rx_tx_ints(pdata);
429 429
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 77363d680532..a3b1c07ae0af 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -2464,6 +2464,7 @@ err_out_powerdown:
2464 ssb_bus_may_powerdown(sdev->bus); 2464 ssb_bus_may_powerdown(sdev->bus);
2465 2465
2466err_out_free_dev: 2466err_out_free_dev:
2467 netif_napi_del(&bp->napi);
2467 free_netdev(dev); 2468 free_netdev(dev);
2468 2469
2469out: 2470out:
@@ -2480,6 +2481,7 @@ static void b44_remove_one(struct ssb_device *sdev)
2480 b44_unregister_phy_one(bp); 2481 b44_unregister_phy_one(bp);
2481 ssb_device_disable(sdev, 0); 2482 ssb_device_disable(sdev, 0);
2482 ssb_bus_may_powerdown(sdev->bus); 2483 ssb_bus_may_powerdown(sdev->bus);
2484 netif_napi_del(&bp->napi);
2483 free_netdev(dev); 2485 free_netdev(dev);
2484 ssb_pcihost_set_power_state(sdev, PCI_D3hot); 2486 ssb_pcihost_set_power_state(sdev, PCI_D3hot);
2485 ssb_set_drvdata(sdev, NULL); 2487 ssb_set_drvdata(sdev, NULL);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index e7651b3c6c57..420949cc55aa 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -299,9 +299,6 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
299 phy_name = "external RGMII (no delay)"; 299 phy_name = "external RGMII (no delay)";
300 else 300 else
301 phy_name = "external RGMII (TX delay)"; 301 phy_name = "external RGMII (TX delay)";
302 reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL);
303 reg |= RGMII_MODE_EN | id_mode_dis;
304 bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
305 bcmgenet_sys_writel(priv, 302 bcmgenet_sys_writel(priv,
306 PORT_MODE_EXT_GPHY, SYS_PORT_CTRL); 303 PORT_MODE_EXT_GPHY, SYS_PORT_CTRL);
307 break; 304 break;
@@ -310,6 +307,15 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
310 return -EINVAL; 307 return -EINVAL;
311 } 308 }
312 309
310 /* This is an external PHY (xMII), so we need to enable the RGMII
311 * block for the interface to work
312 */
313 if (priv->ext_phy) {
314 reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL);
315 reg |= RGMII_MODE_EN | id_mode_dis;
316 bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
317 }
318
313 if (init) 319 if (init)
314 dev_info(kdev, "configuring instance for %s\n", phy_name); 320 dev_info(kdev, "configuring instance for %s\n", phy_name);
315 321
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index 28d9ca675a27..68d47b196dae 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -131,8 +131,15 @@ static void enic_get_drvinfo(struct net_device *netdev,
131{ 131{
132 struct enic *enic = netdev_priv(netdev); 132 struct enic *enic = netdev_priv(netdev);
133 struct vnic_devcmd_fw_info *fw_info; 133 struct vnic_devcmd_fw_info *fw_info;
134 int err;
134 135
135 enic_dev_fw_info(enic, &fw_info); 136 err = enic_dev_fw_info(enic, &fw_info);
137 /* return only when pci_zalloc_consistent fails in vnic_dev_fw_info
138 * For other failures, like devcmd failure, we return previously
139 * recorded info.
140 */
141 if (err == -ENOMEM)
142 return;
136 143
137 strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); 144 strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
138 strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); 145 strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
@@ -181,8 +188,15 @@ static void enic_get_ethtool_stats(struct net_device *netdev,
181 struct enic *enic = netdev_priv(netdev); 188 struct enic *enic = netdev_priv(netdev);
182 struct vnic_stats *vstats; 189 struct vnic_stats *vstats;
183 unsigned int i; 190 unsigned int i;
184 191 int err;
185 enic_dev_stats_dump(enic, &vstats); 192
193 err = enic_dev_stats_dump(enic, &vstats);
194 /* return only when pci_zalloc_consistent fails in vnic_dev_stats_dump
195 * For other failures, like devcmd failure, we return previously
196 * recorded stats.
197 */
198 if (err == -ENOMEM)
199 return;
186 200
187 for (i = 0; i < enic_n_tx_stats; i++) 201 for (i = 0; i < enic_n_tx_stats; i++)
188 *(data++) = ((u64 *)&vstats->tx)[enic_tx_stats[i].index]; 202 *(data++) = ((u64 *)&vstats->tx)[enic_tx_stats[i].index];
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 204bd182473b..eadae1b412c6 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -615,8 +615,15 @@ static struct rtnl_link_stats64 *enic_get_stats(struct net_device *netdev,
615{ 615{
616 struct enic *enic = netdev_priv(netdev); 616 struct enic *enic = netdev_priv(netdev);
617 struct vnic_stats *stats; 617 struct vnic_stats *stats;
618 int err;
618 619
619 enic_dev_stats_dump(enic, &stats); 620 err = enic_dev_stats_dump(enic, &stats);
621 /* return only when pci_zalloc_consistent fails in vnic_dev_stats_dump
622 * For other failures, like devcmd failure, we return previously
623 * recorded stats.
624 */
625 if (err == -ENOMEM)
626 return net_stats;
620 627
621 net_stats->tx_packets = stats->tx.tx_frames_ok; 628 net_stats->tx_packets = stats->tx.tx_frames_ok;
622 net_stats->tx_bytes = stats->tx.tx_bytes_ok; 629 net_stats->tx_bytes = stats->tx.tx_bytes_ok;
@@ -1407,6 +1414,7 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget)
1407 */ 1414 */
1408 enic_calc_int_moderation(enic, &enic->rq[rq]); 1415 enic_calc_int_moderation(enic, &enic->rq[rq]);
1409 1416
1417 enic_poll_unlock_napi(&enic->rq[rq]);
1410 if (work_done < work_to_do) { 1418 if (work_done < work_to_do) {
1411 1419
1412 /* Some work done, but not enough to stay in polling, 1420 /* Some work done, but not enough to stay in polling,
@@ -1418,7 +1426,6 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget)
1418 enic_set_int_moderation(enic, &enic->rq[rq]); 1426 enic_set_int_moderation(enic, &enic->rq[rq]);
1419 vnic_intr_unmask(&enic->intr[intr]); 1427 vnic_intr_unmask(&enic->intr[intr]);
1420 } 1428 }
1421 enic_poll_unlock_napi(&enic->rq[rq]);
1422 1429
1423 return work_done; 1430 return work_done;
1424} 1431}
diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.c b/drivers/net/ethernet/cisco/enic/vnic_rq.c
index 36a2ed606c91..c4b2183bf352 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_rq.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_rq.c
@@ -188,16 +188,15 @@ void vnic_rq_clean(struct vnic_rq *rq,
188 struct vnic_rq_buf *buf; 188 struct vnic_rq_buf *buf;
189 u32 fetch_index; 189 u32 fetch_index;
190 unsigned int count = rq->ring.desc_count; 190 unsigned int count = rq->ring.desc_count;
191 int i;
191 192
192 buf = rq->to_clean; 193 buf = rq->to_clean;
193 194
194 while (vnic_rq_desc_used(rq) > 0) { 195 for (i = 0; i < rq->ring.desc_count; i++) {
195
196 (*buf_clean)(rq, buf); 196 (*buf_clean)(rq, buf);
197 197 buf = buf->next;
198 buf = rq->to_clean = buf->next;
199 rq->ring.desc_avail++;
200 } 198 }
199 rq->ring.desc_avail = rq->ring.desc_count - 1;
201 200
202 /* Use current fetch_index as the ring starting point */ 201 /* Use current fetch_index as the ring starting point */
203 fetch_index = ioread32(&rq->ctrl->fetch_index); 202 fetch_index = ioread32(&rq->ctrl->fetch_index);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index fb140faeafb1..c5e1d0ac75f9 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -1720,9 +1720,9 @@ int be_cmd_get_regs(struct be_adapter *adapter, u32 buf_len, void *buf)
1720 total_size = buf_len; 1720 total_size = buf_len;
1721 1721
1722 get_fat_cmd.size = sizeof(struct be_cmd_req_get_fat) + 60*1024; 1722 get_fat_cmd.size = sizeof(struct be_cmd_req_get_fat) + 60*1024;
1723 get_fat_cmd.va = pci_alloc_consistent(adapter->pdev, 1723 get_fat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
1724 get_fat_cmd.size, 1724 get_fat_cmd.size,
1725 &get_fat_cmd.dma); 1725 &get_fat_cmd.dma, GFP_ATOMIC);
1726 if (!get_fat_cmd.va) { 1726 if (!get_fat_cmd.va) {
1727 dev_err(&adapter->pdev->dev, 1727 dev_err(&adapter->pdev->dev,
1728 "Memory allocation failure while reading FAT data\n"); 1728 "Memory allocation failure while reading FAT data\n");
@@ -1767,8 +1767,8 @@ int be_cmd_get_regs(struct be_adapter *adapter, u32 buf_len, void *buf)
1767 log_offset += buf_size; 1767 log_offset += buf_size;
1768 } 1768 }
1769err: 1769err:
1770 pci_free_consistent(adapter->pdev, get_fat_cmd.size, 1770 dma_free_coherent(&adapter->pdev->dev, get_fat_cmd.size,
1771 get_fat_cmd.va, get_fat_cmd.dma); 1771 get_fat_cmd.va, get_fat_cmd.dma);
1772 spin_unlock_bh(&adapter->mcc_lock); 1772 spin_unlock_bh(&adapter->mcc_lock);
1773 return status; 1773 return status;
1774} 1774}
@@ -2215,12 +2215,12 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
2215 return -EINVAL; 2215 return -EINVAL;
2216 2216
2217 cmd.size = sizeof(struct be_cmd_resp_port_type); 2217 cmd.size = sizeof(struct be_cmd_resp_port_type);
2218 cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma); 2218 cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
2219 GFP_ATOMIC);
2219 if (!cmd.va) { 2220 if (!cmd.va) {
2220 dev_err(&adapter->pdev->dev, "Memory allocation failed\n"); 2221 dev_err(&adapter->pdev->dev, "Memory allocation failed\n");
2221 return -ENOMEM; 2222 return -ENOMEM;
2222 } 2223 }
2223 memset(cmd.va, 0, cmd.size);
2224 2224
2225 spin_lock_bh(&adapter->mcc_lock); 2225 spin_lock_bh(&adapter->mcc_lock);
2226 2226
@@ -2245,7 +2245,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
2245 } 2245 }
2246err: 2246err:
2247 spin_unlock_bh(&adapter->mcc_lock); 2247 spin_unlock_bh(&adapter->mcc_lock);
2248 pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma); 2248 dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
2249 return status; 2249 return status;
2250} 2250}
2251 2251
@@ -2720,7 +2720,8 @@ int be_cmd_get_phy_info(struct be_adapter *adapter)
2720 goto err; 2720 goto err;
2721 } 2721 }
2722 cmd.size = sizeof(struct be_cmd_req_get_phy_info); 2722 cmd.size = sizeof(struct be_cmd_req_get_phy_info);
2723 cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma); 2723 cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
2724 GFP_ATOMIC);
2724 if (!cmd.va) { 2725 if (!cmd.va) {
2725 dev_err(&adapter->pdev->dev, "Memory alloc failure\n"); 2726 dev_err(&adapter->pdev->dev, "Memory alloc failure\n");
2726 status = -ENOMEM; 2727 status = -ENOMEM;
@@ -2754,7 +2755,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter)
2754 BE_SUPPORTED_SPEED_1GBPS; 2755 BE_SUPPORTED_SPEED_1GBPS;
2755 } 2756 }
2756 } 2757 }
2757 pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma); 2758 dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
2758err: 2759err:
2759 spin_unlock_bh(&adapter->mcc_lock); 2760 spin_unlock_bh(&adapter->mcc_lock);
2760 return status; 2761 return status;
@@ -2805,8 +2806,9 @@ int be_cmd_get_cntl_attributes(struct be_adapter *adapter)
2805 2806
2806 memset(&attribs_cmd, 0, sizeof(struct be_dma_mem)); 2807 memset(&attribs_cmd, 0, sizeof(struct be_dma_mem));
2807 attribs_cmd.size = sizeof(struct be_cmd_resp_cntl_attribs); 2808 attribs_cmd.size = sizeof(struct be_cmd_resp_cntl_attribs);
2808 attribs_cmd.va = pci_alloc_consistent(adapter->pdev, attribs_cmd.size, 2809 attribs_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
2809 &attribs_cmd.dma); 2810 attribs_cmd.size,
2811 &attribs_cmd.dma, GFP_ATOMIC);
2810 if (!attribs_cmd.va) { 2812 if (!attribs_cmd.va) {
2811 dev_err(&adapter->pdev->dev, "Memory allocation failure\n"); 2813 dev_err(&adapter->pdev->dev, "Memory allocation failure\n");
2812 status = -ENOMEM; 2814 status = -ENOMEM;
@@ -2833,8 +2835,8 @@ int be_cmd_get_cntl_attributes(struct be_adapter *adapter)
2833err: 2835err:
2834 mutex_unlock(&adapter->mbox_lock); 2836 mutex_unlock(&adapter->mbox_lock);
2835 if (attribs_cmd.va) 2837 if (attribs_cmd.va)
2836 pci_free_consistent(adapter->pdev, attribs_cmd.size, 2838 dma_free_coherent(&adapter->pdev->dev, attribs_cmd.size,
2837 attribs_cmd.va, attribs_cmd.dma); 2839 attribs_cmd.va, attribs_cmd.dma);
2838 return status; 2840 return status;
2839} 2841}
2840 2842
@@ -2972,9 +2974,10 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac,
2972 2974
2973 memset(&get_mac_list_cmd, 0, sizeof(struct be_dma_mem)); 2975 memset(&get_mac_list_cmd, 0, sizeof(struct be_dma_mem));
2974 get_mac_list_cmd.size = sizeof(struct be_cmd_resp_get_mac_list); 2976 get_mac_list_cmd.size = sizeof(struct be_cmd_resp_get_mac_list);
2975 get_mac_list_cmd.va = pci_alloc_consistent(adapter->pdev, 2977 get_mac_list_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
2976 get_mac_list_cmd.size, 2978 get_mac_list_cmd.size,
2977 &get_mac_list_cmd.dma); 2979 &get_mac_list_cmd.dma,
2980 GFP_ATOMIC);
2978 2981
2979 if (!get_mac_list_cmd.va) { 2982 if (!get_mac_list_cmd.va) {
2980 dev_err(&adapter->pdev->dev, 2983 dev_err(&adapter->pdev->dev,
@@ -3047,8 +3050,8 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac,
3047 3050
3048out: 3051out:
3049 spin_unlock_bh(&adapter->mcc_lock); 3052 spin_unlock_bh(&adapter->mcc_lock);
3050 pci_free_consistent(adapter->pdev, get_mac_list_cmd.size, 3053 dma_free_coherent(&adapter->pdev->dev, get_mac_list_cmd.size,
3051 get_mac_list_cmd.va, get_mac_list_cmd.dma); 3054 get_mac_list_cmd.va, get_mac_list_cmd.dma);
3052 return status; 3055 return status;
3053} 3056}
3054 3057
@@ -3101,8 +3104,8 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array,
3101 3104
3102 memset(&cmd, 0, sizeof(struct be_dma_mem)); 3105 memset(&cmd, 0, sizeof(struct be_dma_mem));
3103 cmd.size = sizeof(struct be_cmd_req_set_mac_list); 3106 cmd.size = sizeof(struct be_cmd_req_set_mac_list);
3104 cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size, 3107 cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
3105 &cmd.dma, GFP_KERNEL); 3108 GFP_KERNEL);
3106 if (!cmd.va) 3109 if (!cmd.va)
3107 return -ENOMEM; 3110 return -ENOMEM;
3108 3111
@@ -3291,7 +3294,8 @@ int be_cmd_get_acpi_wol_cap(struct be_adapter *adapter)
3291 3294
3292 memset(&cmd, 0, sizeof(struct be_dma_mem)); 3295 memset(&cmd, 0, sizeof(struct be_dma_mem));
3293 cmd.size = sizeof(struct be_cmd_resp_acpi_wol_magic_config_v1); 3296 cmd.size = sizeof(struct be_cmd_resp_acpi_wol_magic_config_v1);
3294 cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma); 3297 cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
3298 GFP_ATOMIC);
3295 if (!cmd.va) { 3299 if (!cmd.va) {
3296 dev_err(&adapter->pdev->dev, "Memory allocation failure\n"); 3300 dev_err(&adapter->pdev->dev, "Memory allocation failure\n");
3297 status = -ENOMEM; 3301 status = -ENOMEM;
@@ -3326,7 +3330,8 @@ int be_cmd_get_acpi_wol_cap(struct be_adapter *adapter)
3326err: 3330err:
3327 mutex_unlock(&adapter->mbox_lock); 3331 mutex_unlock(&adapter->mbox_lock);
3328 if (cmd.va) 3332 if (cmd.va)
3329 pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma); 3333 dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
3334 cmd.dma);
3330 return status; 3335 return status;
3331 3336
3332} 3337}
@@ -3340,8 +3345,9 @@ int be_cmd_set_fw_log_level(struct be_adapter *adapter, u32 level)
3340 3345
3341 memset(&extfat_cmd, 0, sizeof(struct be_dma_mem)); 3346 memset(&extfat_cmd, 0, sizeof(struct be_dma_mem));
3342 extfat_cmd.size = sizeof(struct be_cmd_resp_get_ext_fat_caps); 3347 extfat_cmd.size = sizeof(struct be_cmd_resp_get_ext_fat_caps);
3343 extfat_cmd.va = pci_alloc_consistent(adapter->pdev, extfat_cmd.size, 3348 extfat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
3344 &extfat_cmd.dma); 3349 extfat_cmd.size, &extfat_cmd.dma,
3350 GFP_ATOMIC);
3345 if (!extfat_cmd.va) 3351 if (!extfat_cmd.va)
3346 return -ENOMEM; 3352 return -ENOMEM;
3347 3353
@@ -3363,8 +3369,8 @@ int be_cmd_set_fw_log_level(struct be_adapter *adapter, u32 level)
3363 3369
3364 status = be_cmd_set_ext_fat_capabilites(adapter, &extfat_cmd, cfgs); 3370 status = be_cmd_set_ext_fat_capabilites(adapter, &extfat_cmd, cfgs);
3365err: 3371err:
3366 pci_free_consistent(adapter->pdev, extfat_cmd.size, extfat_cmd.va, 3372 dma_free_coherent(&adapter->pdev->dev, extfat_cmd.size, extfat_cmd.va,
3367 extfat_cmd.dma); 3373 extfat_cmd.dma);
3368 return status; 3374 return status;
3369} 3375}
3370 3376
@@ -3377,8 +3383,9 @@ int be_cmd_get_fw_log_level(struct be_adapter *adapter)
3377 3383
3378 memset(&extfat_cmd, 0, sizeof(struct be_dma_mem)); 3384 memset(&extfat_cmd, 0, sizeof(struct be_dma_mem));
3379 extfat_cmd.size = sizeof(struct be_cmd_resp_get_ext_fat_caps); 3385 extfat_cmd.size = sizeof(struct be_cmd_resp_get_ext_fat_caps);
3380 extfat_cmd.va = pci_alloc_consistent(adapter->pdev, extfat_cmd.size, 3386 extfat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
3381 &extfat_cmd.dma); 3387 extfat_cmd.size, &extfat_cmd.dma,
3388 GFP_ATOMIC);
3382 3389
3383 if (!extfat_cmd.va) { 3390 if (!extfat_cmd.va) {
3384 dev_err(&adapter->pdev->dev, "%s: Memory allocation failure\n", 3391 dev_err(&adapter->pdev->dev, "%s: Memory allocation failure\n",
@@ -3396,8 +3403,8 @@ int be_cmd_get_fw_log_level(struct be_adapter *adapter)
3396 level = cfgs->module[0].trace_lvl[j].dbg_lvl; 3403 level = cfgs->module[0].trace_lvl[j].dbg_lvl;
3397 } 3404 }
3398 } 3405 }
3399 pci_free_consistent(adapter->pdev, extfat_cmd.size, extfat_cmd.va, 3406 dma_free_coherent(&adapter->pdev->dev, extfat_cmd.size, extfat_cmd.va,
3400 extfat_cmd.dma); 3407 extfat_cmd.dma);
3401err: 3408err:
3402 return level; 3409 return level;
3403} 3410}
@@ -3595,7 +3602,8 @@ int be_cmd_get_func_config(struct be_adapter *adapter, struct be_resources *res)
3595 3602
3596 memset(&cmd, 0, sizeof(struct be_dma_mem)); 3603 memset(&cmd, 0, sizeof(struct be_dma_mem));
3597 cmd.size = sizeof(struct be_cmd_resp_get_func_config); 3604 cmd.size = sizeof(struct be_cmd_resp_get_func_config);
3598 cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma); 3605 cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
3606 GFP_ATOMIC);
3599 if (!cmd.va) { 3607 if (!cmd.va) {
3600 dev_err(&adapter->pdev->dev, "Memory alloc failure\n"); 3608 dev_err(&adapter->pdev->dev, "Memory alloc failure\n");
3601 status = -ENOMEM; 3609 status = -ENOMEM;
@@ -3635,7 +3643,8 @@ int be_cmd_get_func_config(struct be_adapter *adapter, struct be_resources *res)
3635err: 3643err:
3636 mutex_unlock(&adapter->mbox_lock); 3644 mutex_unlock(&adapter->mbox_lock);
3637 if (cmd.va) 3645 if (cmd.va)
3638 pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma); 3646 dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
3647 cmd.dma);
3639 return status; 3648 return status;
3640} 3649}
3641 3650
@@ -3656,7 +3665,8 @@ int be_cmd_get_profile_config(struct be_adapter *adapter,
3656 3665
3657 memset(&cmd, 0, sizeof(struct be_dma_mem)); 3666 memset(&cmd, 0, sizeof(struct be_dma_mem));
3658 cmd.size = sizeof(struct be_cmd_resp_get_profile_config); 3667 cmd.size = sizeof(struct be_cmd_resp_get_profile_config);
3659 cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma); 3668 cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
3669 GFP_ATOMIC);
3660 if (!cmd.va) 3670 if (!cmd.va)
3661 return -ENOMEM; 3671 return -ENOMEM;
3662 3672
@@ -3702,7 +3712,8 @@ int be_cmd_get_profile_config(struct be_adapter *adapter,
3702 res->vf_if_cap_flags = vf_res->cap_flags; 3712 res->vf_if_cap_flags = vf_res->cap_flags;
3703err: 3713err:
3704 if (cmd.va) 3714 if (cmd.va)
3705 pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma); 3715 dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
3716 cmd.dma);
3706 return status; 3717 return status;
3707} 3718}
3708 3719
@@ -3717,7 +3728,8 @@ static int be_cmd_set_profile_config(struct be_adapter *adapter, void *desc,
3717 3728
3718 memset(&cmd, 0, sizeof(struct be_dma_mem)); 3729 memset(&cmd, 0, sizeof(struct be_dma_mem));
3719 cmd.size = sizeof(struct be_cmd_req_set_profile_config); 3730 cmd.size = sizeof(struct be_cmd_req_set_profile_config);
3720 cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma); 3731 cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
3732 GFP_ATOMIC);
3721 if (!cmd.va) 3733 if (!cmd.va)
3722 return -ENOMEM; 3734 return -ENOMEM;
3723 3735
@@ -3733,7 +3745,8 @@ static int be_cmd_set_profile_config(struct be_adapter *adapter, void *desc,
3733 status = be_cmd_notify_wait(adapter, &wrb); 3745 status = be_cmd_notify_wait(adapter, &wrb);
3734 3746
3735 if (cmd.va) 3747 if (cmd.va)
3736 pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma); 3748 dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
3749 cmd.dma);
3737 return status; 3750 return status;
3738} 3751}
3739 3752
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index b765c24625bf..2835dee5dc39 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -264,8 +264,8 @@ static int lancer_cmd_read_file(struct be_adapter *adapter, u8 *file_name,
264 int status = 0; 264 int status = 0;
265 265
266 read_cmd.size = LANCER_READ_FILE_CHUNK; 266 read_cmd.size = LANCER_READ_FILE_CHUNK;
267 read_cmd.va = pci_alloc_consistent(adapter->pdev, read_cmd.size, 267 read_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, read_cmd.size,
268 &read_cmd.dma); 268 &read_cmd.dma, GFP_ATOMIC);
269 269
270 if (!read_cmd.va) { 270 if (!read_cmd.va) {
271 dev_err(&adapter->pdev->dev, 271 dev_err(&adapter->pdev->dev,
@@ -289,8 +289,8 @@ static int lancer_cmd_read_file(struct be_adapter *adapter, u8 *file_name,
289 break; 289 break;
290 } 290 }
291 } 291 }
292 pci_free_consistent(adapter->pdev, read_cmd.size, read_cmd.va, 292 dma_free_coherent(&adapter->pdev->dev, read_cmd.size, read_cmd.va,
293 read_cmd.dma); 293 read_cmd.dma);
294 294
295 return status; 295 return status;
296} 296}
@@ -818,8 +818,9 @@ static int be_test_ddr_dma(struct be_adapter *adapter)
818 }; 818 };
819 819
820 ddrdma_cmd.size = sizeof(struct be_cmd_req_ddrdma_test); 820 ddrdma_cmd.size = sizeof(struct be_cmd_req_ddrdma_test);
821 ddrdma_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, ddrdma_cmd.size, 821 ddrdma_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
822 &ddrdma_cmd.dma, GFP_KERNEL); 822 ddrdma_cmd.size, &ddrdma_cmd.dma,
823 GFP_KERNEL);
823 if (!ddrdma_cmd.va) 824 if (!ddrdma_cmd.va)
824 return -ENOMEM; 825 return -ENOMEM;
825 826
@@ -941,8 +942,9 @@ static int be_read_eeprom(struct net_device *netdev,
941 942
942 memset(&eeprom_cmd, 0, sizeof(struct be_dma_mem)); 943 memset(&eeprom_cmd, 0, sizeof(struct be_dma_mem));
943 eeprom_cmd.size = sizeof(struct be_cmd_req_seeprom_read); 944 eeprom_cmd.size = sizeof(struct be_cmd_req_seeprom_read);
944 eeprom_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, eeprom_cmd.size, 945 eeprom_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
945 &eeprom_cmd.dma, GFP_KERNEL); 946 eeprom_cmd.size, &eeprom_cmd.dma,
947 GFP_KERNEL);
946 948
947 if (!eeprom_cmd.va) 949 if (!eeprom_cmd.va)
948 return -ENOMEM; 950 return -ENOMEM;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 6f9ffb9026cd..e43cc8a73ea7 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4605,8 +4605,8 @@ static int lancer_fw_download(struct be_adapter *adapter,
4605 4605
4606 flash_cmd.size = sizeof(struct lancer_cmd_req_write_object) 4606 flash_cmd.size = sizeof(struct lancer_cmd_req_write_object)
4607 + LANCER_FW_DOWNLOAD_CHUNK; 4607 + LANCER_FW_DOWNLOAD_CHUNK;
4608 flash_cmd.va = dma_alloc_coherent(dev, flash_cmd.size, 4608 flash_cmd.va = dma_zalloc_coherent(dev, flash_cmd.size,
4609 &flash_cmd.dma, GFP_KERNEL); 4609 &flash_cmd.dma, GFP_KERNEL);
4610 if (!flash_cmd.va) 4610 if (!flash_cmd.va)
4611 return -ENOMEM; 4611 return -ENOMEM;
4612 4612
@@ -4739,8 +4739,8 @@ static int be_fw_download(struct be_adapter *adapter, const struct firmware* fw)
4739 } 4739 }
4740 4740
4741 flash_cmd.size = sizeof(struct be_cmd_write_flashrom); 4741 flash_cmd.size = sizeof(struct be_cmd_write_flashrom);
4742 flash_cmd.va = dma_alloc_coherent(dev, flash_cmd.size, &flash_cmd.dma, 4742 flash_cmd.va = dma_zalloc_coherent(dev, flash_cmd.size, &flash_cmd.dma,
4743 GFP_KERNEL); 4743 GFP_KERNEL);
4744 if (!flash_cmd.va) 4744 if (!flash_cmd.va)
4745 return -ENOMEM; 4745 return -ENOMEM;
4746 4746
@@ -5291,16 +5291,15 @@ static int be_drv_init(struct be_adapter *adapter)
5291 int status = 0; 5291 int status = 0;
5292 5292
5293 mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16; 5293 mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5294 mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size, 5294 mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5295 &mbox_mem_alloc->dma, 5295 &mbox_mem_alloc->dma,
5296 GFP_KERNEL); 5296 GFP_KERNEL);
5297 if (!mbox_mem_alloc->va) 5297 if (!mbox_mem_alloc->va)
5298 return -ENOMEM; 5298 return -ENOMEM;
5299 5299
5300 mbox_mem_align->size = sizeof(struct be_mcc_mailbox); 5300 mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5301 mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16); 5301 mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5302 mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16); 5302 mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5303 memset(mbox_mem_align->va, 0, sizeof(struct be_mcc_mailbox));
5304 5303
5305 rx_filter->size = sizeof(struct be_cmd_req_rx_filter); 5304 rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5306 rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size, 5305 rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 33c35d3b7420..5d47307121ab 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -317,6 +317,7 @@ struct i40e_pf {
317#endif 317#endif
318#define I40E_FLAG_PORT_ID_VALID (u64)(1 << 28) 318#define I40E_FLAG_PORT_ID_VALID (u64)(1 << 28)
319#define I40E_FLAG_DCB_CAPABLE (u64)(1 << 29) 319#define I40E_FLAG_DCB_CAPABLE (u64)(1 << 29)
320#define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(40)
320 321
321 /* tracks features that get auto disabled by errors */ 322 /* tracks features that get auto disabled by errors */
322 u64 auto_disable_flags; 323 u64 auto_disable_flags;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 34170eabca7d..da0faf478af0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -1021,6 +1021,15 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
1021 goto command_write_done; 1021 goto command_write_done;
1022 } 1022 }
1023 1023
1024 /* By default we are in VEPA mode, if this is the first VF/VMDq
1025 * VSI to be added switch to VEB mode.
1026 */
1027 if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
1028 pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
1029 i40e_do_reset_safe(pf,
1030 BIT_ULL(__I40E_PF_RESET_REQUESTED));
1031 }
1032
1024 vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0); 1033 vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0);
1025 if (vsi) 1034 if (vsi)
1026 dev_info(&pf->pdev->dev, "added VSI %d to relay %d\n", 1035 dev_info(&pf->pdev->dev, "added VSI %d to relay %d\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index a54c14491e3b..5b5bea159bd5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6097,6 +6097,10 @@ static int i40e_reconstitute_veb(struct i40e_veb *veb)
6097 if (ret) 6097 if (ret)
6098 goto end_reconstitute; 6098 goto end_reconstitute;
6099 6099
6100 if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
6101 veb->bridge_mode = BRIDGE_MODE_VEB;
6102 else
6103 veb->bridge_mode = BRIDGE_MODE_VEPA;
6100 i40e_config_bridge_mode(veb); 6104 i40e_config_bridge_mode(veb);
6101 6105
6102 /* create the remaining VSIs attached to this VEB */ 6106 /* create the remaining VSIs attached to this VEB */
@@ -8031,7 +8035,12 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev,
8031 } else if (mode != veb->bridge_mode) { 8035 } else if (mode != veb->bridge_mode) {
8032 /* Existing HW bridge but different mode needs reset */ 8036 /* Existing HW bridge but different mode needs reset */
8033 veb->bridge_mode = mode; 8037 veb->bridge_mode = mode;
8034 i40e_do_reset(pf, (1 << __I40E_PF_RESET_REQUESTED)); 8038 /* TODO: If no VFs or VMDq VSIs, disallow VEB mode */
8039 if (mode == BRIDGE_MODE_VEB)
8040 pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
8041 else
8042 pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
8043 i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED));
8035 break; 8044 break;
8036 } 8045 }
8037 } 8046 }
@@ -8343,11 +8352,12 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
8343 ctxt.uplink_seid = vsi->uplink_seid; 8352 ctxt.uplink_seid = vsi->uplink_seid;
8344 ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; 8353 ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
8345 ctxt.flags = I40E_AQ_VSI_TYPE_PF; 8354 ctxt.flags = I40E_AQ_VSI_TYPE_PF;
8346 if (i40e_is_vsi_uplink_mode_veb(vsi)) { 8355 if ((pf->flags & I40E_FLAG_VEB_MODE_ENABLED) &&
8356 (i40e_is_vsi_uplink_mode_veb(vsi))) {
8347 ctxt.info.valid_sections |= 8357 ctxt.info.valid_sections |=
8348 cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); 8358 cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
8349 ctxt.info.switch_id = 8359 ctxt.info.switch_id =
8350 cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); 8360 cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
8351 } 8361 }
8352 i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true); 8362 i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
8353 break; 8363 break;
@@ -8746,6 +8756,14 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
8746 __func__); 8756 __func__);
8747 return NULL; 8757 return NULL;
8748 } 8758 }
8759 /* We come up by default in VEPA mode if SRIOV is not
8760 * already enabled, in which case we can't force VEPA
8761 * mode.
8762 */
8763 if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
8764 veb->bridge_mode = BRIDGE_MODE_VEPA;
8765 pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
8766 }
8749 i40e_config_bridge_mode(veb); 8767 i40e_config_bridge_mode(veb);
8750 } 8768 }
8751 for (i = 0; i < I40E_MAX_VEB && !veb; i++) { 8769 for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
@@ -9856,6 +9874,15 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
9856 goto err_switch_setup; 9874 goto err_switch_setup;
9857 } 9875 }
9858 9876
9877#ifdef CONFIG_PCI_IOV
9878 /* prep for VF support */
9879 if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
9880 (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
9881 !test_bit(__I40E_BAD_EEPROM, &pf->state)) {
9882 if (pci_num_vf(pdev))
9883 pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
9884 }
9885#endif
9859 err = i40e_setup_pf_switch(pf, false); 9886 err = i40e_setup_pf_switch(pf, false);
9860 if (err) { 9887 if (err) {
9861 dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err); 9888 dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 4bd3a80aba82..9d95042d5a0f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2410,14 +2410,12 @@ static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2410 * i40e_chk_linearize - Check if there are more than 8 fragments per packet 2410 * i40e_chk_linearize - Check if there are more than 8 fragments per packet
2411 * @skb: send buffer 2411 * @skb: send buffer
2412 * @tx_flags: collected send information 2412 * @tx_flags: collected send information
2413 * @hdr_len: size of the packet header
2414 * 2413 *
2415 * Note: Our HW can't scatter-gather more than 8 fragments to build 2414 * Note: Our HW can't scatter-gather more than 8 fragments to build
2416 * a packet on the wire and so we need to figure out the cases where we 2415 * a packet on the wire and so we need to figure out the cases where we
2417 * need to linearize the skb. 2416 * need to linearize the skb.
2418 **/ 2417 **/
2419static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags, 2418static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
2420 const u8 hdr_len)
2421{ 2419{
2422 struct skb_frag_struct *frag; 2420 struct skb_frag_struct *frag;
2423 bool linearize = false; 2421 bool linearize = false;
@@ -2429,7 +2427,7 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
2429 gso_segs = skb_shinfo(skb)->gso_segs; 2427 gso_segs = skb_shinfo(skb)->gso_segs;
2430 2428
2431 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { 2429 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
2432 u16 j = 1; 2430 u16 j = 0;
2433 2431
2434 if (num_frags < (I40E_MAX_BUFFER_TXD)) 2432 if (num_frags < (I40E_MAX_BUFFER_TXD))
2435 goto linearize_chk_done; 2433 goto linearize_chk_done;
@@ -2440,21 +2438,18 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
2440 goto linearize_chk_done; 2438 goto linearize_chk_done;
2441 } 2439 }
2442 frag = &skb_shinfo(skb)->frags[0]; 2440 frag = &skb_shinfo(skb)->frags[0];
2443 size = hdr_len;
2444 /* we might still have more fragments per segment */ 2441 /* we might still have more fragments per segment */
2445 do { 2442 do {
2446 size += skb_frag_size(frag); 2443 size += skb_frag_size(frag);
2447 frag++; j++; 2444 frag++; j++;
2445 if ((size >= skb_shinfo(skb)->gso_size) &&
2446 (j < I40E_MAX_BUFFER_TXD)) {
2447 size = (size % skb_shinfo(skb)->gso_size);
2448 j = (size) ? 1 : 0;
2449 }
2448 if (j == I40E_MAX_BUFFER_TXD) { 2450 if (j == I40E_MAX_BUFFER_TXD) {
2449 if (size < skb_shinfo(skb)->gso_size) { 2451 linearize = true;
2450 linearize = true; 2452 break;
2451 break;
2452 }
2453 j = 1;
2454 size -= skb_shinfo(skb)->gso_size;
2455 if (size)
2456 j++;
2457 size += hdr_len;
2458 } 2453 }
2459 num_frags--; 2454 num_frags--;
2460 } while (num_frags); 2455 } while (num_frags);
@@ -2724,7 +2719,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2724 if (tsyn) 2719 if (tsyn)
2725 tx_flags |= I40E_TX_FLAGS_TSYN; 2720 tx_flags |= I40E_TX_FLAGS_TSYN;
2726 2721
2727 if (i40e_chk_linearize(skb, tx_flags, hdr_len)) 2722 if (i40e_chk_linearize(skb, tx_flags))
2728 if (skb_linearize(skb)) 2723 if (skb_linearize(skb))
2729 goto out_drop; 2724 goto out_drop;
2730 2725
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 78d1c4ff565e..4e9376da0518 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1018,11 +1018,19 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
1018{ 1018{
1019 struct i40e_pf *pf = pci_get_drvdata(pdev); 1019 struct i40e_pf *pf = pci_get_drvdata(pdev);
1020 1020
1021 if (num_vfs) 1021 if (num_vfs) {
1022 if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
1023 pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
1024 i40e_do_reset_safe(pf,
1025 BIT_ULL(__I40E_PF_RESET_REQUESTED));
1026 }
1022 return i40e_pci_sriov_enable(pdev, num_vfs); 1027 return i40e_pci_sriov_enable(pdev, num_vfs);
1028 }
1023 1029
1024 if (!pci_vfs_assigned(pf->pdev)) { 1030 if (!pci_vfs_assigned(pf->pdev)) {
1025 i40e_free_vfs(pf); 1031 i40e_free_vfs(pf);
1032 pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
1033 i40e_do_reset_safe(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED));
1026 } else { 1034 } else {
1027 dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n"); 1035 dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n");
1028 return -EINVAL; 1036 return -EINVAL;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index b077e02a0cc7..458fbb421090 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1619,14 +1619,12 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
1619 * i40e_chk_linearize - Check if there are more than 8 fragments per packet 1619 * i40e_chk_linearize - Check if there are more than 8 fragments per packet
1620 * @skb: send buffer 1620 * @skb: send buffer
1621 * @tx_flags: collected send information 1621 * @tx_flags: collected send information
1622 * @hdr_len: size of the packet header
1623 * 1622 *
1624 * Note: Our HW can't scatter-gather more than 8 fragments to build 1623 * Note: Our HW can't scatter-gather more than 8 fragments to build
1625 * a packet on the wire and so we need to figure out the cases where we 1624 * a packet on the wire and so we need to figure out the cases where we
1626 * need to linearize the skb. 1625 * need to linearize the skb.
1627 **/ 1626 **/
1628static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags, 1627static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
1629 const u8 hdr_len)
1630{ 1628{
1631 struct skb_frag_struct *frag; 1629 struct skb_frag_struct *frag;
1632 bool linearize = false; 1630 bool linearize = false;
@@ -1638,7 +1636,7 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
1638 gso_segs = skb_shinfo(skb)->gso_segs; 1636 gso_segs = skb_shinfo(skb)->gso_segs;
1639 1637
1640 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { 1638 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
1641 u16 j = 1; 1639 u16 j = 0;
1642 1640
1643 if (num_frags < (I40E_MAX_BUFFER_TXD)) 1641 if (num_frags < (I40E_MAX_BUFFER_TXD))
1644 goto linearize_chk_done; 1642 goto linearize_chk_done;
@@ -1649,21 +1647,18 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
1649 goto linearize_chk_done; 1647 goto linearize_chk_done;
1650 } 1648 }
1651 frag = &skb_shinfo(skb)->frags[0]; 1649 frag = &skb_shinfo(skb)->frags[0];
1652 size = hdr_len;
1653 /* we might still have more fragments per segment */ 1650 /* we might still have more fragments per segment */
1654 do { 1651 do {
1655 size += skb_frag_size(frag); 1652 size += skb_frag_size(frag);
1656 frag++; j++; 1653 frag++; j++;
1654 if ((size >= skb_shinfo(skb)->gso_size) &&
1655 (j < I40E_MAX_BUFFER_TXD)) {
1656 size = (size % skb_shinfo(skb)->gso_size);
1657 j = (size) ? 1 : 0;
1658 }
1657 if (j == I40E_MAX_BUFFER_TXD) { 1659 if (j == I40E_MAX_BUFFER_TXD) {
1658 if (size < skb_shinfo(skb)->gso_size) { 1660 linearize = true;
1659 linearize = true; 1661 break;
1660 break;
1661 }
1662 j = 1;
1663 size -= skb_shinfo(skb)->gso_size;
1664 if (size)
1665 j++;
1666 size += hdr_len;
1667 } 1662 }
1668 num_frags--; 1663 num_frags--;
1669 } while (num_frags); 1664 } while (num_frags);
@@ -1950,7 +1945,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
1950 else if (tso) 1945 else if (tso)
1951 tx_flags |= I40E_TX_FLAGS_TSO; 1946 tx_flags |= I40E_TX_FLAGS_TSO;
1952 1947
1953 if (i40e_chk_linearize(skb, tx_flags, hdr_len)) 1948 if (i40e_chk_linearize(skb, tx_flags))
1954 if (skb_linearize(skb)) 1949 if (skb_linearize(skb))
1955 goto out_drop; 1950 goto out_drop;
1956 1951
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index e3b9b63ad010..c3a9392cbc19 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -538,8 +538,8 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp,
538 igb->perout[i].start.tv_nsec = rq->perout.start.nsec; 538 igb->perout[i].start.tv_nsec = rq->perout.start.nsec;
539 igb->perout[i].period.tv_sec = ts.tv_sec; 539 igb->perout[i].period.tv_sec = ts.tv_sec;
540 igb->perout[i].period.tv_nsec = ts.tv_nsec; 540 igb->perout[i].period.tv_nsec = ts.tv_nsec;
541 wr32(trgttiml, rq->perout.start.sec); 541 wr32(trgttimh, rq->perout.start.sec);
542 wr32(trgttimh, rq->perout.start.nsec); 542 wr32(trgttiml, rq->perout.start.nsec);
543 tsauxc |= tsauxc_mask; 543 tsauxc |= tsauxc_mask;
544 tsim |= tsim_mask; 544 tsim |= tsim_mask;
545 } else { 545 } else {
diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
index cd29b1038c5e..15f9b7c9e4d3 100644
--- a/drivers/ntb/ntb_hw.c
+++ b/drivers/ntb/ntb_hw.c
@@ -1660,6 +1660,7 @@ static int ntb_atom_detect(struct ntb_device *ndev)
1660 u32 ppd; 1660 u32 ppd;
1661 1661
1662 ndev->hw_type = BWD_HW; 1662 ndev->hw_type = BWD_HW;
1663 ndev->limits.max_mw = BWD_MAX_MW;
1663 1664
1664 rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &ppd); 1665 rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &ppd);
1665 if (rc) 1666 if (rc)
@@ -1778,7 +1779,7 @@ static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1778 dev_warn(&pdev->dev, "Cannot remap BAR %d\n", 1779 dev_warn(&pdev->dev, "Cannot remap BAR %d\n",
1779 MW_TO_BAR(i)); 1780 MW_TO_BAR(i));
1780 rc = -EIO; 1781 rc = -EIO;
1781 goto err3; 1782 goto err4;
1782 } 1783 }
1783 } 1784 }
1784 1785
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 99764db0875a..f0650265febf 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -189,7 +189,7 @@ int __of_attach_node_sysfs(struct device_node *np)
189 return 0; 189 return 0;
190} 190}
191 191
192static int __init of_init(void) 192void __init of_core_init(void)
193{ 193{
194 struct device_node *np; 194 struct device_node *np;
195 195
@@ -198,7 +198,8 @@ static int __init of_init(void)
198 of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj); 198 of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj);
199 if (!of_kset) { 199 if (!of_kset) {
200 mutex_unlock(&of_mutex); 200 mutex_unlock(&of_mutex);
201 return -ENOMEM; 201 pr_err("devicetree: failed to register existing nodes\n");
202 return;
202 } 203 }
203 for_each_of_allnodes(np) 204 for_each_of_allnodes(np)
204 __of_attach_node_sysfs(np); 205 __of_attach_node_sysfs(np);
@@ -207,10 +208,7 @@ static int __init of_init(void)
207 /* Symlink in /proc as required by userspace ABI */ 208 /* Symlink in /proc as required by userspace ABI */
208 if (of_root) 209 if (of_root)
209 proc_symlink("device-tree", NULL, "/sys/firmware/devicetree/base"); 210 proc_symlink("device-tree", NULL, "/sys/firmware/devicetree/base");
210
211 return 0;
212} 211}
213core_initcall(of_init);
214 212
215static struct property *__of_find_property(const struct device_node *np, 213static struct property *__of_find_property(const struct device_node *np,
216 const char *name, int *lenp) 214 const char *name, int *lenp)
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 3351ef408125..53826b84e0ec 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -225,7 +225,7 @@ void __of_attach_node(struct device_node *np)
225 phandle = __of_get_property(np, "phandle", &sz); 225 phandle = __of_get_property(np, "phandle", &sz);
226 if (!phandle) 226 if (!phandle)
227 phandle = __of_get_property(np, "linux,phandle", &sz); 227 phandle = __of_get_property(np, "linux,phandle", &sz);
228 if (IS_ENABLED(PPC_PSERIES) && !phandle) 228 if (IS_ENABLED(CONFIG_PPC_PSERIES) && !phandle)
229 phandle = __of_get_property(np, "ibm,phandle", &sz); 229 phandle = __of_get_property(np, "ibm,phandle", &sz);
230 np->phandle = (phandle && (sz >= 4)) ? be32_to_cpup(phandle) : 0; 230 np->phandle = (phandle && (sz >= 4)) ? be32_to_cpup(phandle) : 0;
231 231
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 4fd0cacf7ca0..508cc56130e3 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -428,16 +428,19 @@ static void __assign_resources_sorted(struct list_head *head,
428 * consistent. 428 * consistent.
429 */ 429 */
430 if (add_align > dev_res->res->start) { 430 if (add_align > dev_res->res->start) {
431 resource_size_t r_size = resource_size(dev_res->res);
432
431 dev_res->res->start = add_align; 433 dev_res->res->start = add_align;
432 dev_res->res->end = add_align + 434 dev_res->res->end = add_align + r_size - 1;
433 resource_size(dev_res->res);
434 435
435 list_for_each_entry(dev_res2, head, list) { 436 list_for_each_entry(dev_res2, head, list) {
436 align = pci_resource_alignment(dev_res2->dev, 437 align = pci_resource_alignment(dev_res2->dev,
437 dev_res2->res); 438 dev_res2->res);
438 if (add_align > align) 439 if (add_align > align) {
439 list_move_tail(&dev_res->list, 440 list_move_tail(&dev_res->list,
440 &dev_res2->list); 441 &dev_res2->list);
442 break;
443 }
441 } 444 }
442 } 445 }
443 446
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index a53bd5b52df9..fc9b9f0ea91e 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -38,7 +38,9 @@ config ARMADA375_USBCLUSTER_PHY
38config PHY_DM816X_USB 38config PHY_DM816X_USB
39 tristate "TI dm816x USB PHY driver" 39 tristate "TI dm816x USB PHY driver"
40 depends on ARCH_OMAP2PLUS 40 depends on ARCH_OMAP2PLUS
41 depends on USB_SUPPORT
41 select GENERIC_PHY 42 select GENERIC_PHY
43 select USB_PHY
42 help 44 help
43 Enable this for dm816x USB to work. 45 Enable this for dm816x USB to work.
44 46
@@ -97,8 +99,9 @@ config OMAP_CONTROL_PHY
97config OMAP_USB2 99config OMAP_USB2
98 tristate "OMAP USB2 PHY Driver" 100 tristate "OMAP USB2 PHY Driver"
99 depends on ARCH_OMAP2PLUS 101 depends on ARCH_OMAP2PLUS
100 depends on USB_PHY 102 depends on USB_SUPPORT
101 select GENERIC_PHY 103 select GENERIC_PHY
104 select USB_PHY
102 select OMAP_CONTROL_PHY 105 select OMAP_CONTROL_PHY
103 depends on OMAP_OCP2SCP 106 depends on OMAP_OCP2SCP
104 help 107 help
@@ -122,8 +125,9 @@ config TI_PIPE3
122config TWL4030_USB 125config TWL4030_USB
123 tristate "TWL4030 USB Transceiver Driver" 126 tristate "TWL4030 USB Transceiver Driver"
124 depends on TWL4030_CORE && REGULATOR_TWL4030 && USB_MUSB_OMAP2PLUS 127 depends on TWL4030_CORE && REGULATOR_TWL4030 && USB_MUSB_OMAP2PLUS
125 depends on USB_PHY 128 depends on USB_SUPPORT
126 select GENERIC_PHY 129 select GENERIC_PHY
130 select USB_PHY
127 help 131 help
128 Enable this to support the USB OTG transceiver on TWL4030 132 Enable this to support the USB OTG transceiver on TWL4030
129 family chips (including the TWL5030 and TPS659x0 devices). 133 family chips (including the TWL5030 and TPS659x0 devices).
@@ -304,7 +308,7 @@ config PHY_STIH41X_USB
304 308
305config PHY_QCOM_UFS 309config PHY_QCOM_UFS
306 tristate "Qualcomm UFS PHY driver" 310 tristate "Qualcomm UFS PHY driver"
307 depends on OF && ARCH_MSM 311 depends on OF && ARCH_QCOM
308 select GENERIC_PHY 312 select GENERIC_PHY
309 help 313 help
310 Support for UFS PHY on QCOM chipsets. 314 Support for UFS PHY on QCOM chipsets.
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index 3791838f4bd4..63bc12d7a73e 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -530,7 +530,7 @@ struct phy *phy_optional_get(struct device *dev, const char *string)
530{ 530{
531 struct phy *phy = phy_get(dev, string); 531 struct phy *phy = phy_get(dev, string);
532 532
533 if (PTR_ERR(phy) == -ENODEV) 533 if (IS_ERR(phy) && (PTR_ERR(phy) == -ENODEV))
534 phy = NULL; 534 phy = NULL;
535 535
536 return phy; 536 return phy;
@@ -584,7 +584,7 @@ struct phy *devm_phy_optional_get(struct device *dev, const char *string)
584{ 584{
585 struct phy *phy = devm_phy_get(dev, string); 585 struct phy *phy = devm_phy_get(dev, string);
586 586
587 if (PTR_ERR(phy) == -ENODEV) 587 if (IS_ERR(phy) && (PTR_ERR(phy) == -ENODEV))
588 phy = NULL; 588 phy = NULL;
589 589
590 return phy; 590 return phy;
diff --git a/drivers/phy/phy-omap-usb2.c b/drivers/phy/phy-omap-usb2.c
index 183ef4368101..c1a468686bdc 100644
--- a/drivers/phy/phy-omap-usb2.c
+++ b/drivers/phy/phy-omap-usb2.c
@@ -275,6 +275,7 @@ static int omap_usb2_probe(struct platform_device *pdev)
275 phy->wkupclk = devm_clk_get(phy->dev, "usb_phy_cm_clk32k"); 275 phy->wkupclk = devm_clk_get(phy->dev, "usb_phy_cm_clk32k");
276 if (IS_ERR(phy->wkupclk)) { 276 if (IS_ERR(phy->wkupclk)) {
277 dev_err(&pdev->dev, "unable to get usb_phy_cm_clk32k\n"); 277 dev_err(&pdev->dev, "unable to get usb_phy_cm_clk32k\n");
278 pm_runtime_disable(phy->dev);
278 return PTR_ERR(phy->wkupclk); 279 return PTR_ERR(phy->wkupclk);
279 } else { 280 } else {
280 dev_warn(&pdev->dev, 281 dev_warn(&pdev->dev,
diff --git a/drivers/phy/phy-rcar-gen2.c b/drivers/phy/phy-rcar-gen2.c
index 778276aba3aa..97d45f47d1ad 100644
--- a/drivers/phy/phy-rcar-gen2.c
+++ b/drivers/phy/phy-rcar-gen2.c
@@ -23,7 +23,7 @@
23#define USBHS_LPSTS 0x02 23#define USBHS_LPSTS 0x02
24#define USBHS_UGCTRL 0x80 24#define USBHS_UGCTRL 0x80
25#define USBHS_UGCTRL2 0x84 25#define USBHS_UGCTRL2 0x84
26#define USBHS_UGSTS 0x88 /* The manuals have 0x90 */ 26#define USBHS_UGSTS 0x88 /* From technical update */
27 27
28/* Low Power Status register (LPSTS) */ 28/* Low Power Status register (LPSTS) */
29#define USBHS_LPSTS_SUSPM 0x4000 29#define USBHS_LPSTS_SUSPM 0x4000
@@ -41,7 +41,7 @@
41#define USBHS_UGCTRL2_USB0SEL_HS_USB 0x00000030 41#define USBHS_UGCTRL2_USB0SEL_HS_USB 0x00000030
42 42
43/* USB General status register (UGSTS) */ 43/* USB General status register (UGSTS) */
44#define USBHS_UGSTS_LOCK 0x00000300 /* The manuals have 0x3 */ 44#define USBHS_UGSTS_LOCK 0x00000100 /* From technical update */
45 45
46#define PHYS_PER_CHANNEL 2 46#define PHYS_PER_CHANNEL 2
47 47
diff --git a/drivers/soc/mediatek/Kconfig b/drivers/soc/mediatek/Kconfig
index bcdb22d5e215..3c1850332a90 100644
--- a/drivers/soc/mediatek/Kconfig
+++ b/drivers/soc/mediatek/Kconfig
@@ -4,6 +4,7 @@
4config MTK_PMIC_WRAP 4config MTK_PMIC_WRAP
5 tristate "MediaTek PMIC Wrapper Support" 5 tristate "MediaTek PMIC Wrapper Support"
6 depends on ARCH_MEDIATEK 6 depends on ARCH_MEDIATEK
7 depends on RESET_CONTROLLER
7 select REGMAP 8 select REGMAP
8 help 9 help
9 Say yes here to add support for MediaTek PMIC Wrapper found 10 Say yes here to add support for MediaTek PMIC Wrapper found
diff --git a/drivers/soc/mediatek/mtk-pmic-wrap.c b/drivers/soc/mediatek/mtk-pmic-wrap.c
index db5be1eec54c..f432291feee9 100644
--- a/drivers/soc/mediatek/mtk-pmic-wrap.c
+++ b/drivers/soc/mediatek/mtk-pmic-wrap.c
@@ -443,11 +443,6 @@ static int pwrap_wait_for_state(struct pmic_wrapper *wrp,
443static int pwrap_write(struct pmic_wrapper *wrp, u32 adr, u32 wdata) 443static int pwrap_write(struct pmic_wrapper *wrp, u32 adr, u32 wdata)
444{ 444{
445 int ret; 445 int ret;
446 u32 val;
447
448 val = pwrap_readl(wrp, PWRAP_WACS2_RDATA);
449 if (PWRAP_GET_WACS_FSM(val) == PWRAP_WACS_FSM_WFVLDCLR)
450 pwrap_writel(wrp, 1, PWRAP_WACS2_VLDCLR);
451 446
452 ret = pwrap_wait_for_state(wrp, pwrap_is_fsm_idle); 447 ret = pwrap_wait_for_state(wrp, pwrap_is_fsm_idle);
453 if (ret) 448 if (ret)
@@ -462,11 +457,6 @@ static int pwrap_write(struct pmic_wrapper *wrp, u32 adr, u32 wdata)
462static int pwrap_read(struct pmic_wrapper *wrp, u32 adr, u32 *rdata) 457static int pwrap_read(struct pmic_wrapper *wrp, u32 adr, u32 *rdata)
463{ 458{
464 int ret; 459 int ret;
465 u32 val;
466
467 val = pwrap_readl(wrp, PWRAP_WACS2_RDATA);
468 if (PWRAP_GET_WACS_FSM(val) == PWRAP_WACS_FSM_WFVLDCLR)
469 pwrap_writel(wrp, 1, PWRAP_WACS2_VLDCLR);
470 460
471 ret = pwrap_wait_for_state(wrp, pwrap_is_fsm_idle); 461 ret = pwrap_wait_for_state(wrp, pwrap_is_fsm_idle);
472 if (ret) 462 if (ret)
@@ -480,6 +470,8 @@ static int pwrap_read(struct pmic_wrapper *wrp, u32 adr, u32 *rdata)
480 470
481 *rdata = PWRAP_GET_WACS_RDATA(pwrap_readl(wrp, PWRAP_WACS2_RDATA)); 471 *rdata = PWRAP_GET_WACS_RDATA(pwrap_readl(wrp, PWRAP_WACS2_RDATA));
482 472
473 pwrap_writel(wrp, 1, PWRAP_WACS2_VLDCLR);
474
483 return 0; 475 return 0;
484} 476}
485 477
@@ -563,45 +555,17 @@ static int pwrap_init_sidly(struct pmic_wrapper *wrp)
563 555
564static int pwrap_init_reg_clock(struct pmic_wrapper *wrp) 556static int pwrap_init_reg_clock(struct pmic_wrapper *wrp)
565{ 557{
566 unsigned long rate_spi; 558 if (pwrap_is_mt8135(wrp)) {
567 int ck_mhz; 559 pwrap_writel(wrp, 0x4, PWRAP_CSHEXT);
568
569 rate_spi = clk_get_rate(wrp->clk_spi);
570
571 if (rate_spi > 26000000)
572 ck_mhz = 26;
573 else if (rate_spi > 18000000)
574 ck_mhz = 18;
575 else
576 ck_mhz = 0;
577
578 switch (ck_mhz) {
579 case 18:
580 if (pwrap_is_mt8135(wrp))
581 pwrap_writel(wrp, 0xc, PWRAP_CSHEXT);
582 pwrap_writel(wrp, 0x4, PWRAP_CSHEXT_WRITE);
583 pwrap_writel(wrp, 0xc, PWRAP_CSHEXT_READ);
584 pwrap_writel(wrp, 0x0, PWRAP_CSLEXT_START);
585 pwrap_writel(wrp, 0x0, PWRAP_CSLEXT_END);
586 break;
587 case 26:
588 if (pwrap_is_mt8135(wrp))
589 pwrap_writel(wrp, 0x4, PWRAP_CSHEXT);
590 pwrap_writel(wrp, 0x0, PWRAP_CSHEXT_WRITE); 560 pwrap_writel(wrp, 0x0, PWRAP_CSHEXT_WRITE);
591 pwrap_writel(wrp, 0x4, PWRAP_CSHEXT_READ); 561 pwrap_writel(wrp, 0x4, PWRAP_CSHEXT_READ);
592 pwrap_writel(wrp, 0x0, PWRAP_CSLEXT_START); 562 pwrap_writel(wrp, 0x0, PWRAP_CSLEXT_START);
593 pwrap_writel(wrp, 0x0, PWRAP_CSLEXT_END); 563 pwrap_writel(wrp, 0x0, PWRAP_CSLEXT_END);
594 break; 564 } else {
595 case 0: 565 pwrap_writel(wrp, 0x0, PWRAP_CSHEXT_WRITE);
596 if (pwrap_is_mt8135(wrp)) 566 pwrap_writel(wrp, 0x4, PWRAP_CSHEXT_READ);
597 pwrap_writel(wrp, 0xf, PWRAP_CSHEXT); 567 pwrap_writel(wrp, 0x2, PWRAP_CSLEXT_START);
598 pwrap_writel(wrp, 0xf, PWRAP_CSHEXT_WRITE); 568 pwrap_writel(wrp, 0x2, PWRAP_CSLEXT_END);
599 pwrap_writel(wrp, 0xf, PWRAP_CSHEXT_READ);
600 pwrap_writel(wrp, 0xf, PWRAP_CSLEXT_START);
601 pwrap_writel(wrp, 0xf, PWRAP_CSLEXT_END);
602 break;
603 default:
604 return -EINVAL;
605 } 569 }
606 570
607 return 0; 571 return 0;
diff --git a/drivers/ssb/driver_chipcommon_pmu.c b/drivers/ssb/driver_chipcommon_pmu.c
index 09428412139e..c5352ea4821e 100644
--- a/drivers/ssb/driver_chipcommon_pmu.c
+++ b/drivers/ssb/driver_chipcommon_pmu.c
@@ -621,8 +621,8 @@ static u32 ssb_pmu_get_alp_clock_clk0(struct ssb_chipcommon *cc)
621 u32 crystalfreq; 621 u32 crystalfreq;
622 const struct pmu0_plltab_entry *e = NULL; 622 const struct pmu0_plltab_entry *e = NULL;
623 623
624 crystalfreq = chipco_read32(cc, SSB_CHIPCO_PMU_CTL) & 624 crystalfreq = (chipco_read32(cc, SSB_CHIPCO_PMU_CTL) &
625 SSB_CHIPCO_PMU_CTL_XTALFREQ >> SSB_CHIPCO_PMU_CTL_XTALFREQ_SHIFT; 625 SSB_CHIPCO_PMU_CTL_XTALFREQ) >> SSB_CHIPCO_PMU_CTL_XTALFREQ_SHIFT;
626 e = pmu0_plltab_find_entry(crystalfreq); 626 e = pmu0_plltab_find_entry(crystalfreq);
627 BUG_ON(!e); 627 BUG_ON(!e);
628 return e->freq * 1000; 628 return e->freq * 1000;
@@ -634,7 +634,7 @@ u32 ssb_pmu_get_alp_clock(struct ssb_chipcommon *cc)
634 634
635 switch (bus->chip_id) { 635 switch (bus->chip_id) {
636 case 0x5354: 636 case 0x5354:
637 ssb_pmu_get_alp_clock_clk0(cc); 637 return ssb_pmu_get_alp_clock_clk0(cc);
638 default: 638 default:
639 ssb_err("ERROR: PMU alp clock unknown for device %04X\n", 639 ssb_err("ERROR: PMU alp clock unknown for device %04X\n",
640 bus->chip_id); 640 bus->chip_id);
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
index 3925db160650..513c81f43d6e 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
@@ -189,22 +189,7 @@ static inline int ll_quota_off(struct super_block *sb, int off, int remount)
189#endif 189#endif
190 190
191 191
192
193/*
194 * After 3.1, kernel's nameidata.intent.open.flags is different
195 * with lustre's lookup_intent.it_flags, as lustre's it_flags'
196 * lower bits equal to FMODE_xxx while kernel doesn't transliterate
197 * lower bits of nameidata.intent.open.flags to FMODE_xxx.
198 * */
199#include <linux/version.h> 192#include <linux/version.h>
200static inline int ll_namei_to_lookup_intent_flag(int flag)
201{
202#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
203 flag = (flag & ~O_ACCMODE) | OPEN_FMODE(flag);
204#endif
205 return flag;
206}
207
208#include <linux/fs.h> 193#include <linux/fs.h>
209 194
210# define ll_umode_t umode_t 195# define ll_umode_t umode_t
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 5f918e3c4683..528af9011653 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -57,12 +57,6 @@
57#define VM_FAULT_RETRY 0 57#define VM_FAULT_RETRY 0
58#endif 58#endif
59 59
60/* Kernel 3.1 kills LOOKUP_CONTINUE, LOOKUP_PARENT is equivalent to it.
61 * seem kernel commit 49084c3bb2055c401f3493c13edae14d49128ca0 */
62#ifndef LOOKUP_CONTINUE
63#define LOOKUP_CONTINUE LOOKUP_PARENT
64#endif
65
66/** Only used on client-side for indicating the tail of dir hash/offset. */ 60/** Only used on client-side for indicating the tail of dir hash/offset. */
67#define LL_DIR_END_OFF 0x7fffffffffffffffULL 61#define LL_DIR_END_OFF 0x7fffffffffffffffULL
68#define LL_DIR_END_OFF_32BIT 0x7fffffffUL 62#define LL_DIR_END_OFF_32BIT 0x7fffffffUL
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
index 3711e671a4df..69b203651905 100644
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ b/drivers/staging/lustre/lustre/llite/symlink.c
@@ -118,7 +118,7 @@ failed:
118 return rc; 118 return rc;
119} 119}
120 120
121static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd) 121static const char *ll_follow_link(struct dentry *dentry, void **cookie)
122{ 122{
123 struct inode *inode = d_inode(dentry); 123 struct inode *inode = d_inode(dentry);
124 struct ptlrpc_request *request = NULL; 124 struct ptlrpc_request *request = NULL;
@@ -126,32 +126,22 @@ static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd)
126 char *symname = NULL; 126 char *symname = NULL;
127 127
128 CDEBUG(D_VFSTRACE, "VFS Op\n"); 128 CDEBUG(D_VFSTRACE, "VFS Op\n");
129 /* Limit the recursive symlink depth to 5 instead of default 129 ll_inode_size_lock(inode);
130 * 8 links when kernel has 4k stack to prevent stack overflow. 130 rc = ll_readlink_internal(inode, &request, &symname);
131 * For 8k stacks we need to limit it to 7 for local servers. */ 131 ll_inode_size_unlock(inode);
132 if (THREAD_SIZE < 8192 && current->link_count >= 6) {
133 rc = -ELOOP;
134 } else if (THREAD_SIZE == 8192 && current->link_count >= 8) {
135 rc = -ELOOP;
136 } else {
137 ll_inode_size_lock(inode);
138 rc = ll_readlink_internal(inode, &request, &symname);
139 ll_inode_size_unlock(inode);
140 }
141 if (rc) { 132 if (rc) {
142 ptlrpc_req_finished(request); 133 ptlrpc_req_finished(request);
143 request = NULL; 134 return ERR_PTR(rc);
144 symname = ERR_PTR(rc);
145 } 135 }
146 136
147 nd_set_link(nd, symname);
148 /* symname may contain a pointer to the request message buffer, 137 /* symname may contain a pointer to the request message buffer,
149 * we delay request releasing until ll_put_link then. 138 * we delay request releasing until ll_put_link then.
150 */ 139 */
151 return request; 140 *cookie = request;
141 return symname;
152} 142}
153 143
154static void ll_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 144static void ll_put_link(struct inode *unused, void *cookie)
155{ 145{
156 ptlrpc_req_finished(cookie); 146 ptlrpc_req_finished(cookie);
157} 147}
diff --git a/drivers/staging/ozwpan/ozhcd.c b/drivers/staging/ozwpan/ozhcd.c
index 5ff4716b72c3..784b5ecfa849 100644
--- a/drivers/staging/ozwpan/ozhcd.c
+++ b/drivers/staging/ozwpan/ozhcd.c
@@ -746,8 +746,8 @@ void oz_hcd_pd_reset(void *hpd, void *hport)
746/* 746/*
747 * Context: softirq 747 * Context: softirq
748 */ 748 */
749void oz_hcd_get_desc_cnf(void *hport, u8 req_id, int status, const u8 *desc, 749void oz_hcd_get_desc_cnf(void *hport, u8 req_id, u8 status, const u8 *desc,
750 int length, int offset, int total_size) 750 u8 length, u16 offset, u16 total_size)
751{ 751{
752 struct oz_port *port = hport; 752 struct oz_port *port = hport;
753 struct urb *urb; 753 struct urb *urb;
@@ -759,8 +759,8 @@ void oz_hcd_get_desc_cnf(void *hport, u8 req_id, int status, const u8 *desc,
759 if (!urb) 759 if (!urb)
760 return; 760 return;
761 if (status == 0) { 761 if (status == 0) {
762 int copy_len; 762 unsigned int copy_len;
763 int required_size = urb->transfer_buffer_length; 763 unsigned int required_size = urb->transfer_buffer_length;
764 764
765 if (required_size > total_size) 765 if (required_size > total_size)
766 required_size = total_size; 766 required_size = total_size;
diff --git a/drivers/staging/ozwpan/ozusbif.h b/drivers/staging/ozwpan/ozusbif.h
index 4249fa374012..d2a6085345be 100644
--- a/drivers/staging/ozwpan/ozusbif.h
+++ b/drivers/staging/ozwpan/ozusbif.h
@@ -29,8 +29,8 @@ void oz_usb_request_heartbeat(void *hpd);
29 29
30/* Confirmation functions. 30/* Confirmation functions.
31 */ 31 */
32void oz_hcd_get_desc_cnf(void *hport, u8 req_id, int status, 32void oz_hcd_get_desc_cnf(void *hport, u8 req_id, u8 status,
33 const u8 *desc, int length, int offset, int total_size); 33 const u8 *desc, u8 length, u16 offset, u16 total_size);
34void oz_hcd_control_cnf(void *hport, u8 req_id, u8 rcode, 34void oz_hcd_control_cnf(void *hport, u8 req_id, u8 rcode,
35 const u8 *data, int data_len); 35 const u8 *data, int data_len);
36 36
diff --git a/drivers/staging/ozwpan/ozusbsvc1.c b/drivers/staging/ozwpan/ozusbsvc1.c
index d434d8c6fff6..f660bb198c65 100644
--- a/drivers/staging/ozwpan/ozusbsvc1.c
+++ b/drivers/staging/ozwpan/ozusbsvc1.c
@@ -326,7 +326,11 @@ static void oz_usb_handle_ep_data(struct oz_usb_ctx *usb_ctx,
326 struct oz_multiple_fixed *body = 326 struct oz_multiple_fixed *body =
327 (struct oz_multiple_fixed *)data_hdr; 327 (struct oz_multiple_fixed *)data_hdr;
328 u8 *data = body->data; 328 u8 *data = body->data;
329 int n = (len - sizeof(struct oz_multiple_fixed)+1) 329 unsigned int n;
330 if (!body->unit_size ||
331 len < sizeof(struct oz_multiple_fixed) - 1)
332 break;
333 n = (len - (sizeof(struct oz_multiple_fixed) - 1))
330 / body->unit_size; 334 / body->unit_size;
331 while (n--) { 335 while (n--) {
332 oz_hcd_data_ind(usb_ctx->hport, body->endpoint, 336 oz_hcd_data_ind(usb_ctx->hport, body->endpoint,
@@ -390,10 +394,15 @@ void oz_usb_rx(struct oz_pd *pd, struct oz_elt *elt)
390 case OZ_GET_DESC_RSP: { 394 case OZ_GET_DESC_RSP: {
391 struct oz_get_desc_rsp *body = 395 struct oz_get_desc_rsp *body =
392 (struct oz_get_desc_rsp *)usb_hdr; 396 (struct oz_get_desc_rsp *)usb_hdr;
393 int data_len = elt->length - 397 u16 offs, total_size;
394 sizeof(struct oz_get_desc_rsp) + 1; 398 u8 data_len;
395 u16 offs = le16_to_cpu(get_unaligned(&body->offset)); 399
396 u16 total_size = 400 if (elt->length < sizeof(struct oz_get_desc_rsp) - 1)
401 break;
402 data_len = elt->length -
403 (sizeof(struct oz_get_desc_rsp) - 1);
404 offs = le16_to_cpu(get_unaligned(&body->offset));
405 total_size =
397 le16_to_cpu(get_unaligned(&body->total_size)); 406 le16_to_cpu(get_unaligned(&body->total_size));
398 oz_dbg(ON, "USB_REQ_GET_DESCRIPTOR - cnf\n"); 407 oz_dbg(ON, "USB_REQ_GET_DESCRIPTOR - cnf\n");
399 oz_hcd_get_desc_cnf(usb_ctx->hport, body->req_id, 408 oz_hcd_get_desc_cnf(usb_ctx->hport, body->req_id,
diff --git a/drivers/staging/rtl8712/rtl8712_led.c b/drivers/staging/rtl8712/rtl8712_led.c
index f1d47a0676c3..ada8d5dafd49 100644
--- a/drivers/staging/rtl8712/rtl8712_led.c
+++ b/drivers/staging/rtl8712/rtl8712_led.c
@@ -898,11 +898,11 @@ static void SwLedControlMode1(struct _adapter *padapter,
898 IS_LED_WPS_BLINKING(pLed)) 898 IS_LED_WPS_BLINKING(pLed))
899 return; 899 return;
900 if (pLed->bLedLinkBlinkInProgress == true) { 900 if (pLed->bLedLinkBlinkInProgress == true) {
901 del_timer_sync(&pLed->BlinkTimer); 901 del_timer(&pLed->BlinkTimer);
902 pLed->bLedLinkBlinkInProgress = false; 902 pLed->bLedLinkBlinkInProgress = false;
903 } 903 }
904 if (pLed->bLedBlinkInProgress == true) { 904 if (pLed->bLedBlinkInProgress == true) {
905 del_timer_sync(&pLed->BlinkTimer); 905 del_timer(&pLed->BlinkTimer);
906 pLed->bLedBlinkInProgress = false; 906 pLed->bLedBlinkInProgress = false;
907 } 907 }
908 pLed->bLedNoLinkBlinkInProgress = true; 908 pLed->bLedNoLinkBlinkInProgress = true;
@@ -921,11 +921,11 @@ static void SwLedControlMode1(struct _adapter *padapter,
921 IS_LED_WPS_BLINKING(pLed)) 921 IS_LED_WPS_BLINKING(pLed))
922 return; 922 return;
923 if (pLed->bLedNoLinkBlinkInProgress == true) { 923 if (pLed->bLedNoLinkBlinkInProgress == true) {
924 del_timer_sync(&pLed->BlinkTimer); 924 del_timer(&pLed->BlinkTimer);
925 pLed->bLedNoLinkBlinkInProgress = false; 925 pLed->bLedNoLinkBlinkInProgress = false;
926 } 926 }
927 if (pLed->bLedBlinkInProgress == true) { 927 if (pLed->bLedBlinkInProgress == true) {
928 del_timer_sync(&pLed->BlinkTimer); 928 del_timer(&pLed->BlinkTimer);
929 pLed->bLedBlinkInProgress = false; 929 pLed->bLedBlinkInProgress = false;
930 } 930 }
931 pLed->bLedLinkBlinkInProgress = true; 931 pLed->bLedLinkBlinkInProgress = true;
@@ -946,15 +946,15 @@ static void SwLedControlMode1(struct _adapter *padapter,
946 if (IS_LED_WPS_BLINKING(pLed)) 946 if (IS_LED_WPS_BLINKING(pLed))
947 return; 947 return;
948 if (pLed->bLedNoLinkBlinkInProgress == true) { 948 if (pLed->bLedNoLinkBlinkInProgress == true) {
949 del_timer_sync(&pLed->BlinkTimer); 949 del_timer(&pLed->BlinkTimer);
950 pLed->bLedNoLinkBlinkInProgress = false; 950 pLed->bLedNoLinkBlinkInProgress = false;
951 } 951 }
952 if (pLed->bLedLinkBlinkInProgress == true) { 952 if (pLed->bLedLinkBlinkInProgress == true) {
953 del_timer_sync(&pLed->BlinkTimer); 953 del_timer(&pLed->BlinkTimer);
954 pLed->bLedLinkBlinkInProgress = false; 954 pLed->bLedLinkBlinkInProgress = false;
955 } 955 }
956 if (pLed->bLedBlinkInProgress == true) { 956 if (pLed->bLedBlinkInProgress == true) {
957 del_timer_sync(&pLed->BlinkTimer); 957 del_timer(&pLed->BlinkTimer);
958 pLed->bLedBlinkInProgress = false; 958 pLed->bLedBlinkInProgress = false;
959 } 959 }
960 pLed->bLedScanBlinkInProgress = true; 960 pLed->bLedScanBlinkInProgress = true;
@@ -975,11 +975,11 @@ static void SwLedControlMode1(struct _adapter *padapter,
975 IS_LED_WPS_BLINKING(pLed)) 975 IS_LED_WPS_BLINKING(pLed))
976 return; 976 return;
977 if (pLed->bLedNoLinkBlinkInProgress == true) { 977 if (pLed->bLedNoLinkBlinkInProgress == true) {
978 del_timer_sync(&pLed->BlinkTimer); 978 del_timer(&pLed->BlinkTimer);
979 pLed->bLedNoLinkBlinkInProgress = false; 979 pLed->bLedNoLinkBlinkInProgress = false;
980 } 980 }
981 if (pLed->bLedLinkBlinkInProgress == true) { 981 if (pLed->bLedLinkBlinkInProgress == true) {
982 del_timer_sync(&pLed->BlinkTimer); 982 del_timer(&pLed->BlinkTimer);
983 pLed->bLedLinkBlinkInProgress = false; 983 pLed->bLedLinkBlinkInProgress = false;
984 } 984 }
985 pLed->bLedBlinkInProgress = true; 985 pLed->bLedBlinkInProgress = true;
@@ -998,19 +998,19 @@ static void SwLedControlMode1(struct _adapter *padapter,
998 case LED_CTL_START_WPS_BOTTON: 998 case LED_CTL_START_WPS_BOTTON:
999 if (pLed->bLedWPSBlinkInProgress == false) { 999 if (pLed->bLedWPSBlinkInProgress == false) {
1000 if (pLed->bLedNoLinkBlinkInProgress == true) { 1000 if (pLed->bLedNoLinkBlinkInProgress == true) {
1001 del_timer_sync(&pLed->BlinkTimer); 1001 del_timer(&pLed->BlinkTimer);
1002 pLed->bLedNoLinkBlinkInProgress = false; 1002 pLed->bLedNoLinkBlinkInProgress = false;
1003 } 1003 }
1004 if (pLed->bLedLinkBlinkInProgress == true) { 1004 if (pLed->bLedLinkBlinkInProgress == true) {
1005 del_timer_sync(&pLed->BlinkTimer); 1005 del_timer(&pLed->BlinkTimer);
1006 pLed->bLedLinkBlinkInProgress = false; 1006 pLed->bLedLinkBlinkInProgress = false;
1007 } 1007 }
1008 if (pLed->bLedBlinkInProgress == true) { 1008 if (pLed->bLedBlinkInProgress == true) {
1009 del_timer_sync(&pLed->BlinkTimer); 1009 del_timer(&pLed->BlinkTimer);
1010 pLed->bLedBlinkInProgress = false; 1010 pLed->bLedBlinkInProgress = false;
1011 } 1011 }
1012 if (pLed->bLedScanBlinkInProgress == true) { 1012 if (pLed->bLedScanBlinkInProgress == true) {
1013 del_timer_sync(&pLed->BlinkTimer); 1013 del_timer(&pLed->BlinkTimer);
1014 pLed->bLedScanBlinkInProgress = false; 1014 pLed->bLedScanBlinkInProgress = false;
1015 } 1015 }
1016 pLed->bLedWPSBlinkInProgress = true; 1016 pLed->bLedWPSBlinkInProgress = true;
@@ -1025,23 +1025,23 @@ static void SwLedControlMode1(struct _adapter *padapter,
1025 break; 1025 break;
1026 case LED_CTL_STOP_WPS: 1026 case LED_CTL_STOP_WPS:
1027 if (pLed->bLedNoLinkBlinkInProgress == true) { 1027 if (pLed->bLedNoLinkBlinkInProgress == true) {
1028 del_timer_sync(&pLed->BlinkTimer); 1028 del_timer(&pLed->BlinkTimer);
1029 pLed->bLedNoLinkBlinkInProgress = false; 1029 pLed->bLedNoLinkBlinkInProgress = false;
1030 } 1030 }
1031 if (pLed->bLedLinkBlinkInProgress == true) { 1031 if (pLed->bLedLinkBlinkInProgress == true) {
1032 del_timer_sync(&pLed->BlinkTimer); 1032 del_timer(&pLed->BlinkTimer);
1033 pLed->bLedLinkBlinkInProgress = false; 1033 pLed->bLedLinkBlinkInProgress = false;
1034 } 1034 }
1035 if (pLed->bLedBlinkInProgress == true) { 1035 if (pLed->bLedBlinkInProgress == true) {
1036 del_timer_sync(&pLed->BlinkTimer); 1036 del_timer(&pLed->BlinkTimer);
1037 pLed->bLedBlinkInProgress = false; 1037 pLed->bLedBlinkInProgress = false;
1038 } 1038 }
1039 if (pLed->bLedScanBlinkInProgress == true) { 1039 if (pLed->bLedScanBlinkInProgress == true) {
1040 del_timer_sync(&pLed->BlinkTimer); 1040 del_timer(&pLed->BlinkTimer);
1041 pLed->bLedScanBlinkInProgress = false; 1041 pLed->bLedScanBlinkInProgress = false;
1042 } 1042 }
1043 if (pLed->bLedWPSBlinkInProgress) 1043 if (pLed->bLedWPSBlinkInProgress)
1044 del_timer_sync(&pLed->BlinkTimer); 1044 del_timer(&pLed->BlinkTimer);
1045 else 1045 else
1046 pLed->bLedWPSBlinkInProgress = true; 1046 pLed->bLedWPSBlinkInProgress = true;
1047 pLed->CurrLedState = LED_BLINK_WPS_STOP; 1047 pLed->CurrLedState = LED_BLINK_WPS_STOP;
@@ -1057,7 +1057,7 @@ static void SwLedControlMode1(struct _adapter *padapter,
1057 break; 1057 break;
1058 case LED_CTL_STOP_WPS_FAIL: 1058 case LED_CTL_STOP_WPS_FAIL:
1059 if (pLed->bLedWPSBlinkInProgress) { 1059 if (pLed->bLedWPSBlinkInProgress) {
1060 del_timer_sync(&pLed->BlinkTimer); 1060 del_timer(&pLed->BlinkTimer);
1061 pLed->bLedWPSBlinkInProgress = false; 1061 pLed->bLedWPSBlinkInProgress = false;
1062 } 1062 }
1063 pLed->bLedNoLinkBlinkInProgress = true; 1063 pLed->bLedNoLinkBlinkInProgress = true;
@@ -1073,23 +1073,23 @@ static void SwLedControlMode1(struct _adapter *padapter,
1073 pLed->CurrLedState = LED_OFF; 1073 pLed->CurrLedState = LED_OFF;
1074 pLed->BlinkingLedState = LED_OFF; 1074 pLed->BlinkingLedState = LED_OFF;
1075 if (pLed->bLedNoLinkBlinkInProgress) { 1075 if (pLed->bLedNoLinkBlinkInProgress) {
1076 del_timer_sync(&pLed->BlinkTimer); 1076 del_timer(&pLed->BlinkTimer);
1077 pLed->bLedNoLinkBlinkInProgress = false; 1077 pLed->bLedNoLinkBlinkInProgress = false;
1078 } 1078 }
1079 if (pLed->bLedLinkBlinkInProgress) { 1079 if (pLed->bLedLinkBlinkInProgress) {
1080 del_timer_sync(&pLed->BlinkTimer); 1080 del_timer(&pLed->BlinkTimer);
1081 pLed->bLedLinkBlinkInProgress = false; 1081 pLed->bLedLinkBlinkInProgress = false;
1082 } 1082 }
1083 if (pLed->bLedBlinkInProgress) { 1083 if (pLed->bLedBlinkInProgress) {
1084 del_timer_sync(&pLed->BlinkTimer); 1084 del_timer(&pLed->BlinkTimer);
1085 pLed->bLedBlinkInProgress = false; 1085 pLed->bLedBlinkInProgress = false;
1086 } 1086 }
1087 if (pLed->bLedWPSBlinkInProgress) { 1087 if (pLed->bLedWPSBlinkInProgress) {
1088 del_timer_sync(&pLed->BlinkTimer); 1088 del_timer(&pLed->BlinkTimer);
1089 pLed->bLedWPSBlinkInProgress = false; 1089 pLed->bLedWPSBlinkInProgress = false;
1090 } 1090 }
1091 if (pLed->bLedScanBlinkInProgress) { 1091 if (pLed->bLedScanBlinkInProgress) {
1092 del_timer_sync(&pLed->BlinkTimer); 1092 del_timer(&pLed->BlinkTimer);
1093 pLed->bLedScanBlinkInProgress = false; 1093 pLed->bLedScanBlinkInProgress = false;
1094 } 1094 }
1095 mod_timer(&pLed->BlinkTimer, 1095 mod_timer(&pLed->BlinkTimer,
@@ -1116,7 +1116,7 @@ static void SwLedControlMode2(struct _adapter *padapter,
1116 return; 1116 return;
1117 1117
1118 if (pLed->bLedBlinkInProgress == true) { 1118 if (pLed->bLedBlinkInProgress == true) {
1119 del_timer_sync(&pLed->BlinkTimer); 1119 del_timer(&pLed->BlinkTimer);
1120 pLed->bLedBlinkInProgress = false; 1120 pLed->bLedBlinkInProgress = false;
1121 } 1121 }
1122 pLed->bLedScanBlinkInProgress = true; 1122 pLed->bLedScanBlinkInProgress = true;
@@ -1154,11 +1154,11 @@ static void SwLedControlMode2(struct _adapter *padapter,
1154 pLed->CurrLedState = LED_ON; 1154 pLed->CurrLedState = LED_ON;
1155 pLed->BlinkingLedState = LED_ON; 1155 pLed->BlinkingLedState = LED_ON;
1156 if (pLed->bLedBlinkInProgress) { 1156 if (pLed->bLedBlinkInProgress) {
1157 del_timer_sync(&pLed->BlinkTimer); 1157 del_timer(&pLed->BlinkTimer);
1158 pLed->bLedBlinkInProgress = false; 1158 pLed->bLedBlinkInProgress = false;
1159 } 1159 }
1160 if (pLed->bLedScanBlinkInProgress) { 1160 if (pLed->bLedScanBlinkInProgress) {
1161 del_timer_sync(&pLed->BlinkTimer); 1161 del_timer(&pLed->BlinkTimer);
1162 pLed->bLedScanBlinkInProgress = false; 1162 pLed->bLedScanBlinkInProgress = false;
1163 } 1163 }
1164 1164
@@ -1170,11 +1170,11 @@ static void SwLedControlMode2(struct _adapter *padapter,
1170 case LED_CTL_START_WPS_BOTTON: 1170 case LED_CTL_START_WPS_BOTTON:
1171 if (pLed->bLedWPSBlinkInProgress == false) { 1171 if (pLed->bLedWPSBlinkInProgress == false) {
1172 if (pLed->bLedBlinkInProgress == true) { 1172 if (pLed->bLedBlinkInProgress == true) {
1173 del_timer_sync(&pLed->BlinkTimer); 1173 del_timer(&pLed->BlinkTimer);
1174 pLed->bLedBlinkInProgress = false; 1174 pLed->bLedBlinkInProgress = false;
1175 } 1175 }
1176 if (pLed->bLedScanBlinkInProgress == true) { 1176 if (pLed->bLedScanBlinkInProgress == true) {
1177 del_timer_sync(&pLed->BlinkTimer); 1177 del_timer(&pLed->BlinkTimer);
1178 pLed->bLedScanBlinkInProgress = false; 1178 pLed->bLedScanBlinkInProgress = false;
1179 } 1179 }
1180 pLed->bLedWPSBlinkInProgress = true; 1180 pLed->bLedWPSBlinkInProgress = true;
@@ -1214,15 +1214,15 @@ static void SwLedControlMode2(struct _adapter *padapter,
1214 pLed->CurrLedState = LED_OFF; 1214 pLed->CurrLedState = LED_OFF;
1215 pLed->BlinkingLedState = LED_OFF; 1215 pLed->BlinkingLedState = LED_OFF;
1216 if (pLed->bLedBlinkInProgress) { 1216 if (pLed->bLedBlinkInProgress) {
1217 del_timer_sync(&pLed->BlinkTimer); 1217 del_timer(&pLed->BlinkTimer);
1218 pLed->bLedBlinkInProgress = false; 1218 pLed->bLedBlinkInProgress = false;
1219 } 1219 }
1220 if (pLed->bLedScanBlinkInProgress) { 1220 if (pLed->bLedScanBlinkInProgress) {
1221 del_timer_sync(&pLed->BlinkTimer); 1221 del_timer(&pLed->BlinkTimer);
1222 pLed->bLedScanBlinkInProgress = false; 1222 pLed->bLedScanBlinkInProgress = false;
1223 } 1223 }
1224 if (pLed->bLedWPSBlinkInProgress) { 1224 if (pLed->bLedWPSBlinkInProgress) {
1225 del_timer_sync(&pLed->BlinkTimer); 1225 del_timer(&pLed->BlinkTimer);
1226 pLed->bLedWPSBlinkInProgress = false; 1226 pLed->bLedWPSBlinkInProgress = false;
1227 } 1227 }
1228 mod_timer(&pLed->BlinkTimer, 1228 mod_timer(&pLed->BlinkTimer,
@@ -1248,7 +1248,7 @@ static void SwLedControlMode3(struct _adapter *padapter,
1248 if (IS_LED_WPS_BLINKING(pLed)) 1248 if (IS_LED_WPS_BLINKING(pLed))
1249 return; 1249 return;
1250 if (pLed->bLedBlinkInProgress == true) { 1250 if (pLed->bLedBlinkInProgress == true) {
1251 del_timer_sync(&pLed->BlinkTimer); 1251 del_timer(&pLed->BlinkTimer);
1252 pLed->bLedBlinkInProgress = false; 1252 pLed->bLedBlinkInProgress = false;
1253 } 1253 }
1254 pLed->bLedScanBlinkInProgress = true; 1254 pLed->bLedScanBlinkInProgress = true;
@@ -1286,11 +1286,11 @@ static void SwLedControlMode3(struct _adapter *padapter,
1286 pLed->CurrLedState = LED_ON; 1286 pLed->CurrLedState = LED_ON;
1287 pLed->BlinkingLedState = LED_ON; 1287 pLed->BlinkingLedState = LED_ON;
1288 if (pLed->bLedBlinkInProgress) { 1288 if (pLed->bLedBlinkInProgress) {
1289 del_timer_sync(&pLed->BlinkTimer); 1289 del_timer(&pLed->BlinkTimer);
1290 pLed->bLedBlinkInProgress = false; 1290 pLed->bLedBlinkInProgress = false;
1291 } 1291 }
1292 if (pLed->bLedScanBlinkInProgress) { 1292 if (pLed->bLedScanBlinkInProgress) {
1293 del_timer_sync(&pLed->BlinkTimer); 1293 del_timer(&pLed->BlinkTimer);
1294 pLed->bLedScanBlinkInProgress = false; 1294 pLed->bLedScanBlinkInProgress = false;
1295 } 1295 }
1296 mod_timer(&pLed->BlinkTimer, 1296 mod_timer(&pLed->BlinkTimer,
@@ -1300,11 +1300,11 @@ static void SwLedControlMode3(struct _adapter *padapter,
1300 case LED_CTL_START_WPS_BOTTON: 1300 case LED_CTL_START_WPS_BOTTON:
1301 if (pLed->bLedWPSBlinkInProgress == false) { 1301 if (pLed->bLedWPSBlinkInProgress == false) {
1302 if (pLed->bLedBlinkInProgress == true) { 1302 if (pLed->bLedBlinkInProgress == true) {
1303 del_timer_sync(&pLed->BlinkTimer); 1303 del_timer(&pLed->BlinkTimer);
1304 pLed->bLedBlinkInProgress = false; 1304 pLed->bLedBlinkInProgress = false;
1305 } 1305 }
1306 if (pLed->bLedScanBlinkInProgress == true) { 1306 if (pLed->bLedScanBlinkInProgress == true) {
1307 del_timer_sync(&pLed->BlinkTimer); 1307 del_timer(&pLed->BlinkTimer);
1308 pLed->bLedScanBlinkInProgress = false; 1308 pLed->bLedScanBlinkInProgress = false;
1309 } 1309 }
1310 pLed->bLedWPSBlinkInProgress = true; 1310 pLed->bLedWPSBlinkInProgress = true;
@@ -1319,7 +1319,7 @@ static void SwLedControlMode3(struct _adapter *padapter,
1319 break; 1319 break;
1320 case LED_CTL_STOP_WPS: 1320 case LED_CTL_STOP_WPS:
1321 if (pLed->bLedWPSBlinkInProgress) { 1321 if (pLed->bLedWPSBlinkInProgress) {
1322 del_timer_sync(&(pLed->BlinkTimer)); 1322 del_timer(&pLed->BlinkTimer);
1323 pLed->bLedWPSBlinkInProgress = false; 1323 pLed->bLedWPSBlinkInProgress = false;
1324 } else 1324 } else
1325 pLed->bLedWPSBlinkInProgress = true; 1325 pLed->bLedWPSBlinkInProgress = true;
@@ -1336,7 +1336,7 @@ static void SwLedControlMode3(struct _adapter *padapter,
1336 break; 1336 break;
1337 case LED_CTL_STOP_WPS_FAIL: 1337 case LED_CTL_STOP_WPS_FAIL:
1338 if (pLed->bLedWPSBlinkInProgress) { 1338 if (pLed->bLedWPSBlinkInProgress) {
1339 del_timer_sync(&pLed->BlinkTimer); 1339 del_timer(&pLed->BlinkTimer);
1340 pLed->bLedWPSBlinkInProgress = false; 1340 pLed->bLedWPSBlinkInProgress = false;
1341 } 1341 }
1342 pLed->CurrLedState = LED_OFF; 1342 pLed->CurrLedState = LED_OFF;
@@ -1357,15 +1357,15 @@ static void SwLedControlMode3(struct _adapter *padapter,
1357 pLed->CurrLedState = LED_OFF; 1357 pLed->CurrLedState = LED_OFF;
1358 pLed->BlinkingLedState = LED_OFF; 1358 pLed->BlinkingLedState = LED_OFF;
1359 if (pLed->bLedBlinkInProgress) { 1359 if (pLed->bLedBlinkInProgress) {
1360 del_timer_sync(&pLed->BlinkTimer); 1360 del_timer(&pLed->BlinkTimer);
1361 pLed->bLedBlinkInProgress = false; 1361 pLed->bLedBlinkInProgress = false;
1362 } 1362 }
1363 if (pLed->bLedScanBlinkInProgress) { 1363 if (pLed->bLedScanBlinkInProgress) {
1364 del_timer_sync(&pLed->BlinkTimer); 1364 del_timer(&pLed->BlinkTimer);
1365 pLed->bLedScanBlinkInProgress = false; 1365 pLed->bLedScanBlinkInProgress = false;
1366 } 1366 }
1367 if (pLed->bLedWPSBlinkInProgress) { 1367 if (pLed->bLedWPSBlinkInProgress) {
1368 del_timer_sync(&pLed->BlinkTimer); 1368 del_timer(&pLed->BlinkTimer);
1369 pLed->bLedWPSBlinkInProgress = false; 1369 pLed->bLedWPSBlinkInProgress = false;
1370 } 1370 }
1371 mod_timer(&pLed->BlinkTimer, 1371 mod_timer(&pLed->BlinkTimer,
@@ -1388,7 +1388,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
1388 case LED_CTL_START_TO_LINK: 1388 case LED_CTL_START_TO_LINK:
1389 if (pLed1->bLedWPSBlinkInProgress) { 1389 if (pLed1->bLedWPSBlinkInProgress) {
1390 pLed1->bLedWPSBlinkInProgress = false; 1390 pLed1->bLedWPSBlinkInProgress = false;
1391 del_timer_sync(&pLed1->BlinkTimer); 1391 del_timer(&pLed1->BlinkTimer);
1392 pLed1->BlinkingLedState = LED_OFF; 1392 pLed1->BlinkingLedState = LED_OFF;
1393 pLed1->CurrLedState = LED_OFF; 1393 pLed1->CurrLedState = LED_OFF;
1394 if (pLed1->bLedOn) 1394 if (pLed1->bLedOn)
@@ -1400,11 +1400,11 @@ static void SwLedControlMode4(struct _adapter *padapter,
1400 IS_LED_WPS_BLINKING(pLed)) 1400 IS_LED_WPS_BLINKING(pLed))
1401 return; 1401 return;
1402 if (pLed->bLedBlinkInProgress == true) { 1402 if (pLed->bLedBlinkInProgress == true) {
1403 del_timer_sync(&pLed->BlinkTimer); 1403 del_timer(&pLed->BlinkTimer);
1404 pLed->bLedBlinkInProgress = false; 1404 pLed->bLedBlinkInProgress = false;
1405 } 1405 }
1406 if (pLed->bLedNoLinkBlinkInProgress == true) { 1406 if (pLed->bLedNoLinkBlinkInProgress == true) {
1407 del_timer_sync(&pLed->BlinkTimer); 1407 del_timer(&pLed->BlinkTimer);
1408 pLed->bLedNoLinkBlinkInProgress = false; 1408 pLed->bLedNoLinkBlinkInProgress = false;
1409 } 1409 }
1410 pLed->bLedStartToLinkBlinkInProgress = true; 1410 pLed->bLedStartToLinkBlinkInProgress = true;
@@ -1426,7 +1426,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
1426 if (LedAction == LED_CTL_LINK) { 1426 if (LedAction == LED_CTL_LINK) {
1427 if (pLed1->bLedWPSBlinkInProgress) { 1427 if (pLed1->bLedWPSBlinkInProgress) {
1428 pLed1->bLedWPSBlinkInProgress = false; 1428 pLed1->bLedWPSBlinkInProgress = false;
1429 del_timer_sync(&pLed1->BlinkTimer); 1429 del_timer(&pLed1->BlinkTimer);
1430 pLed1->BlinkingLedState = LED_OFF; 1430 pLed1->BlinkingLedState = LED_OFF;
1431 pLed1->CurrLedState = LED_OFF; 1431 pLed1->CurrLedState = LED_OFF;
1432 if (pLed1->bLedOn) 1432 if (pLed1->bLedOn)
@@ -1439,7 +1439,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
1439 IS_LED_WPS_BLINKING(pLed)) 1439 IS_LED_WPS_BLINKING(pLed))
1440 return; 1440 return;
1441 if (pLed->bLedBlinkInProgress == true) { 1441 if (pLed->bLedBlinkInProgress == true) {
1442 del_timer_sync(&pLed->BlinkTimer); 1442 del_timer(&pLed->BlinkTimer);
1443 pLed->bLedBlinkInProgress = false; 1443 pLed->bLedBlinkInProgress = false;
1444 } 1444 }
1445 pLed->bLedNoLinkBlinkInProgress = true; 1445 pLed->bLedNoLinkBlinkInProgress = true;
@@ -1460,11 +1460,11 @@ static void SwLedControlMode4(struct _adapter *padapter,
1460 if (IS_LED_WPS_BLINKING(pLed)) 1460 if (IS_LED_WPS_BLINKING(pLed))
1461 return; 1461 return;
1462 if (pLed->bLedNoLinkBlinkInProgress == true) { 1462 if (pLed->bLedNoLinkBlinkInProgress == true) {
1463 del_timer_sync(&pLed->BlinkTimer); 1463 del_timer(&pLed->BlinkTimer);
1464 pLed->bLedNoLinkBlinkInProgress = false; 1464 pLed->bLedNoLinkBlinkInProgress = false;
1465 } 1465 }
1466 if (pLed->bLedBlinkInProgress == true) { 1466 if (pLed->bLedBlinkInProgress == true) {
1467 del_timer_sync(&pLed->BlinkTimer); 1467 del_timer(&pLed->BlinkTimer);
1468 pLed->bLedBlinkInProgress = false; 1468 pLed->bLedBlinkInProgress = false;
1469 } 1469 }
1470 pLed->bLedScanBlinkInProgress = true; 1470 pLed->bLedScanBlinkInProgress = true;
@@ -1485,7 +1485,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
1485 IS_LED_WPS_BLINKING(pLed)) 1485 IS_LED_WPS_BLINKING(pLed))
1486 return; 1486 return;
1487 if (pLed->bLedNoLinkBlinkInProgress == true) { 1487 if (pLed->bLedNoLinkBlinkInProgress == true) {
1488 del_timer_sync(&pLed->BlinkTimer); 1488 del_timer(&pLed->BlinkTimer);
1489 pLed->bLedNoLinkBlinkInProgress = false; 1489 pLed->bLedNoLinkBlinkInProgress = false;
1490 } 1490 }
1491 pLed->bLedBlinkInProgress = true; 1491 pLed->bLedBlinkInProgress = true;
@@ -1503,7 +1503,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
1503 case LED_CTL_START_WPS_BOTTON: 1503 case LED_CTL_START_WPS_BOTTON:
1504 if (pLed1->bLedWPSBlinkInProgress) { 1504 if (pLed1->bLedWPSBlinkInProgress) {
1505 pLed1->bLedWPSBlinkInProgress = false; 1505 pLed1->bLedWPSBlinkInProgress = false;
1506 del_timer_sync(&(pLed1->BlinkTimer)); 1506 del_timer(&pLed1->BlinkTimer);
1507 pLed1->BlinkingLedState = LED_OFF; 1507 pLed1->BlinkingLedState = LED_OFF;
1508 pLed1->CurrLedState = LED_OFF; 1508 pLed1->CurrLedState = LED_OFF;
1509 if (pLed1->bLedOn) 1509 if (pLed1->bLedOn)
@@ -1512,15 +1512,15 @@ static void SwLedControlMode4(struct _adapter *padapter,
1512 } 1512 }
1513 if (pLed->bLedWPSBlinkInProgress == false) { 1513 if (pLed->bLedWPSBlinkInProgress == false) {
1514 if (pLed->bLedNoLinkBlinkInProgress == true) { 1514 if (pLed->bLedNoLinkBlinkInProgress == true) {
1515 del_timer_sync(&pLed->BlinkTimer); 1515 del_timer(&pLed->BlinkTimer);
1516 pLed->bLedNoLinkBlinkInProgress = false; 1516 pLed->bLedNoLinkBlinkInProgress = false;
1517 } 1517 }
1518 if (pLed->bLedBlinkInProgress == true) { 1518 if (pLed->bLedBlinkInProgress == true) {
1519 del_timer_sync(&pLed->BlinkTimer); 1519 del_timer(&pLed->BlinkTimer);
1520 pLed->bLedBlinkInProgress = false; 1520 pLed->bLedBlinkInProgress = false;
1521 } 1521 }
1522 if (pLed->bLedScanBlinkInProgress == true) { 1522 if (pLed->bLedScanBlinkInProgress == true) {
1523 del_timer_sync(&pLed->BlinkTimer); 1523 del_timer(&pLed->BlinkTimer);
1524 pLed->bLedScanBlinkInProgress = false; 1524 pLed->bLedScanBlinkInProgress = false;
1525 } 1525 }
1526 pLed->bLedWPSBlinkInProgress = true; 1526 pLed->bLedWPSBlinkInProgress = true;
@@ -1538,7 +1538,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
1538 break; 1538 break;
1539 case LED_CTL_STOP_WPS: /*WPS connect success*/ 1539 case LED_CTL_STOP_WPS: /*WPS connect success*/
1540 if (pLed->bLedWPSBlinkInProgress) { 1540 if (pLed->bLedWPSBlinkInProgress) {
1541 del_timer_sync(&pLed->BlinkTimer); 1541 del_timer(&pLed->BlinkTimer);
1542 pLed->bLedWPSBlinkInProgress = false; 1542 pLed->bLedWPSBlinkInProgress = false;
1543 } 1543 }
1544 pLed->bLedNoLinkBlinkInProgress = true; 1544 pLed->bLedNoLinkBlinkInProgress = true;
@@ -1552,7 +1552,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
1552 break; 1552 break;
1553 case LED_CTL_STOP_WPS_FAIL: /*WPS authentication fail*/ 1553 case LED_CTL_STOP_WPS_FAIL: /*WPS authentication fail*/
1554 if (pLed->bLedWPSBlinkInProgress) { 1554 if (pLed->bLedWPSBlinkInProgress) {
1555 del_timer_sync(&pLed->BlinkTimer); 1555 del_timer(&pLed->BlinkTimer);
1556 pLed->bLedWPSBlinkInProgress = false; 1556 pLed->bLedWPSBlinkInProgress = false;
1557 } 1557 }
1558 pLed->bLedNoLinkBlinkInProgress = true; 1558 pLed->bLedNoLinkBlinkInProgress = true;
@@ -1565,7 +1565,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
1565 msecs_to_jiffies(LED_BLINK_NO_LINK_INTERVAL_ALPHA)); 1565 msecs_to_jiffies(LED_BLINK_NO_LINK_INTERVAL_ALPHA));
1566 /*LED1 settings*/ 1566 /*LED1 settings*/
1567 if (pLed1->bLedWPSBlinkInProgress) 1567 if (pLed1->bLedWPSBlinkInProgress)
1568 del_timer_sync(&pLed1->BlinkTimer); 1568 del_timer(&pLed1->BlinkTimer);
1569 else 1569 else
1570 pLed1->bLedWPSBlinkInProgress = true; 1570 pLed1->bLedWPSBlinkInProgress = true;
1571 pLed1->CurrLedState = LED_BLINK_WPS_STOP; 1571 pLed1->CurrLedState = LED_BLINK_WPS_STOP;
@@ -1578,7 +1578,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
1578 break; 1578 break;
1579 case LED_CTL_STOP_WPS_FAIL_OVERLAP: /*WPS session overlap*/ 1579 case LED_CTL_STOP_WPS_FAIL_OVERLAP: /*WPS session overlap*/
1580 if (pLed->bLedWPSBlinkInProgress) { 1580 if (pLed->bLedWPSBlinkInProgress) {
1581 del_timer_sync(&pLed->BlinkTimer); 1581 del_timer(&pLed->BlinkTimer);
1582 pLed->bLedWPSBlinkInProgress = false; 1582 pLed->bLedWPSBlinkInProgress = false;
1583 } 1583 }
1584 pLed->bLedNoLinkBlinkInProgress = true; 1584 pLed->bLedNoLinkBlinkInProgress = true;
@@ -1591,7 +1591,7 @@ static void SwLedControlMode4(struct _adapter *padapter,
1591 msecs_to_jiffies(LED_BLINK_NO_LINK_INTERVAL_ALPHA)); 1591 msecs_to_jiffies(LED_BLINK_NO_LINK_INTERVAL_ALPHA));
1592 /*LED1 settings*/ 1592 /*LED1 settings*/
1593 if (pLed1->bLedWPSBlinkInProgress) 1593 if (pLed1->bLedWPSBlinkInProgress)
1594 del_timer_sync(&pLed1->BlinkTimer); 1594 del_timer(&pLed1->BlinkTimer);
1595 else 1595 else
1596 pLed1->bLedWPSBlinkInProgress = true; 1596 pLed1->bLedWPSBlinkInProgress = true;
1597 pLed1->CurrLedState = LED_BLINK_WPS_STOP_OVERLAP; 1597 pLed1->CurrLedState = LED_BLINK_WPS_STOP_OVERLAP;
@@ -1607,31 +1607,31 @@ static void SwLedControlMode4(struct _adapter *padapter,
1607 pLed->CurrLedState = LED_OFF; 1607 pLed->CurrLedState = LED_OFF;
1608 pLed->BlinkingLedState = LED_OFF; 1608 pLed->BlinkingLedState = LED_OFF;
1609 if (pLed->bLedNoLinkBlinkInProgress) { 1609 if (pLed->bLedNoLinkBlinkInProgress) {
1610 del_timer_sync(&pLed->BlinkTimer); 1610 del_timer(&pLed->BlinkTimer);
1611 pLed->bLedNoLinkBlinkInProgress = false; 1611 pLed->bLedNoLinkBlinkInProgress = false;
1612 } 1612 }
1613 if (pLed->bLedLinkBlinkInProgress) { 1613 if (pLed->bLedLinkBlinkInProgress) {
1614 del_timer_sync(&pLed->BlinkTimer); 1614 del_timer(&pLed->BlinkTimer);
1615 pLed->bLedLinkBlinkInProgress = false; 1615 pLed->bLedLinkBlinkInProgress = false;
1616 } 1616 }
1617 if (pLed->bLedBlinkInProgress) { 1617 if (pLed->bLedBlinkInProgress) {
1618 del_timer_sync(&pLed->BlinkTimer); 1618 del_timer(&pLed->BlinkTimer);
1619 pLed->bLedBlinkInProgress = false; 1619 pLed->bLedBlinkInProgress = false;
1620 } 1620 }
1621 if (pLed->bLedWPSBlinkInProgress) { 1621 if (pLed->bLedWPSBlinkInProgress) {
1622 del_timer_sync(&pLed->BlinkTimer); 1622 del_timer(&pLed->BlinkTimer);
1623 pLed->bLedWPSBlinkInProgress = false; 1623 pLed->bLedWPSBlinkInProgress = false;
1624 } 1624 }
1625 if (pLed->bLedScanBlinkInProgress) { 1625 if (pLed->bLedScanBlinkInProgress) {
1626 del_timer_sync(&pLed->BlinkTimer); 1626 del_timer(&pLed->BlinkTimer);
1627 pLed->bLedScanBlinkInProgress = false; 1627 pLed->bLedScanBlinkInProgress = false;
1628 } 1628 }
1629 if (pLed->bLedStartToLinkBlinkInProgress) { 1629 if (pLed->bLedStartToLinkBlinkInProgress) {
1630 del_timer_sync(&pLed->BlinkTimer); 1630 del_timer(&pLed->BlinkTimer);
1631 pLed->bLedStartToLinkBlinkInProgress = false; 1631 pLed->bLedStartToLinkBlinkInProgress = false;
1632 } 1632 }
1633 if (pLed1->bLedWPSBlinkInProgress) { 1633 if (pLed1->bLedWPSBlinkInProgress) {
1634 del_timer_sync(&pLed1->BlinkTimer); 1634 del_timer(&pLed1->BlinkTimer);
1635 pLed1->bLedWPSBlinkInProgress = false; 1635 pLed1->bLedWPSBlinkInProgress = false;
1636 } 1636 }
1637 pLed1->BlinkingLedState = LED_UNKNOWN; 1637 pLed1->BlinkingLedState = LED_UNKNOWN;
@@ -1671,7 +1671,7 @@ static void SwLedControlMode5(struct _adapter *padapter,
1671 ; /* dummy branch */ 1671 ; /* dummy branch */
1672 else if (pLed->bLedScanBlinkInProgress == false) { 1672 else if (pLed->bLedScanBlinkInProgress == false) {
1673 if (pLed->bLedBlinkInProgress == true) { 1673 if (pLed->bLedBlinkInProgress == true) {
1674 del_timer_sync(&pLed->BlinkTimer); 1674 del_timer(&pLed->BlinkTimer);
1675 pLed->bLedBlinkInProgress = false; 1675 pLed->bLedBlinkInProgress = false;
1676 } 1676 }
1677 pLed->bLedScanBlinkInProgress = true; 1677 pLed->bLedScanBlinkInProgress = true;
@@ -1705,7 +1705,7 @@ static void SwLedControlMode5(struct _adapter *padapter,
1705 pLed->CurrLedState = LED_OFF; 1705 pLed->CurrLedState = LED_OFF;
1706 pLed->BlinkingLedState = LED_OFF; 1706 pLed->BlinkingLedState = LED_OFF;
1707 if (pLed->bLedBlinkInProgress) { 1707 if (pLed->bLedBlinkInProgress) {
1708 del_timer_sync(&pLed->BlinkTimer); 1708 del_timer(&pLed->BlinkTimer);
1709 pLed->bLedBlinkInProgress = false; 1709 pLed->bLedBlinkInProgress = false;
1710 } 1710 }
1711 SwLedOff(padapter, pLed); 1711 SwLedOff(padapter, pLed);
@@ -1756,7 +1756,7 @@ static void SwLedControlMode6(struct _adapter *padapter,
1756 case LED_CTL_START_WPS_BOTTON: 1756 case LED_CTL_START_WPS_BOTTON:
1757 if (pLed->bLedWPSBlinkInProgress == false) { 1757 if (pLed->bLedWPSBlinkInProgress == false) {
1758 if (pLed->bLedBlinkInProgress == true) { 1758 if (pLed->bLedBlinkInProgress == true) {
1759 del_timer_sync(&pLed->BlinkTimer); 1759 del_timer(&pLed->BlinkTimer);
1760 pLed->bLedBlinkInProgress = false; 1760 pLed->bLedBlinkInProgress = false;
1761 } 1761 }
1762 pLed->bLedWPSBlinkInProgress = true; 1762 pLed->bLedWPSBlinkInProgress = true;
@@ -1772,7 +1772,7 @@ static void SwLedControlMode6(struct _adapter *padapter,
1772 case LED_CTL_STOP_WPS_FAIL: 1772 case LED_CTL_STOP_WPS_FAIL:
1773 case LED_CTL_STOP_WPS: 1773 case LED_CTL_STOP_WPS:
1774 if (pLed->bLedWPSBlinkInProgress) { 1774 if (pLed->bLedWPSBlinkInProgress) {
1775 del_timer_sync(&pLed->BlinkTimer); 1775 del_timer(&pLed->BlinkTimer);
1776 pLed->bLedWPSBlinkInProgress = false; 1776 pLed->bLedWPSBlinkInProgress = false;
1777 } 1777 }
1778 pLed->CurrLedState = LED_ON; 1778 pLed->CurrLedState = LED_ON;
@@ -1784,11 +1784,11 @@ static void SwLedControlMode6(struct _adapter *padapter,
1784 pLed->CurrLedState = LED_OFF; 1784 pLed->CurrLedState = LED_OFF;
1785 pLed->BlinkingLedState = LED_OFF; 1785 pLed->BlinkingLedState = LED_OFF;
1786 if (pLed->bLedBlinkInProgress) { 1786 if (pLed->bLedBlinkInProgress) {
1787 del_timer_sync(&pLed->BlinkTimer); 1787 del_timer(&pLed->BlinkTimer);
1788 pLed->bLedBlinkInProgress = false; 1788 pLed->bLedBlinkInProgress = false;
1789 } 1789 }
1790 if (pLed->bLedWPSBlinkInProgress) { 1790 if (pLed->bLedWPSBlinkInProgress) {
1791 del_timer_sync(&pLed->BlinkTimer); 1791 del_timer(&pLed->BlinkTimer);
1792 pLed->bLedWPSBlinkInProgress = false; 1792 pLed->bLedWPSBlinkInProgress = false;
1793 } 1793 }
1794 SwLedOff(padapter, pLed); 1794 SwLedOff(padapter, pLed);
diff --git a/drivers/staging/rtl8712/rtl871x_cmd.c b/drivers/staging/rtl8712/rtl871x_cmd.c
index 1a1c38f885d6..e35854d28f90 100644
--- a/drivers/staging/rtl8712/rtl871x_cmd.c
+++ b/drivers/staging/rtl8712/rtl871x_cmd.c
@@ -910,7 +910,7 @@ void r8712_createbss_cmd_callback(struct _adapter *padapter,
910 if (pcmd->res != H2C_SUCCESS) 910 if (pcmd->res != H2C_SUCCESS)
911 mod_timer(&pmlmepriv->assoc_timer, 911 mod_timer(&pmlmepriv->assoc_timer,
912 jiffies + msecs_to_jiffies(1)); 912 jiffies + msecs_to_jiffies(1));
913 del_timer_sync(&pmlmepriv->assoc_timer); 913 del_timer(&pmlmepriv->assoc_timer);
914#ifdef __BIG_ENDIAN 914#ifdef __BIG_ENDIAN
915 /* endian_convert */ 915 /* endian_convert */
916 pnetwork->Length = le32_to_cpu(pnetwork->Length); 916 pnetwork->Length = le32_to_cpu(pnetwork->Length);
diff --git a/drivers/staging/rtl8712/rtl871x_mlme.c b/drivers/staging/rtl8712/rtl871x_mlme.c
index fb2b195b90af..c044b0e55ba9 100644
--- a/drivers/staging/rtl8712/rtl871x_mlme.c
+++ b/drivers/staging/rtl8712/rtl871x_mlme.c
@@ -582,7 +582,7 @@ void r8712_surveydone_event_callback(struct _adapter *adapter, u8 *pbuf)
582 spin_lock_irqsave(&pmlmepriv->lock, irqL); 582 spin_lock_irqsave(&pmlmepriv->lock, irqL);
583 583
584 if (check_fwstate(pmlmepriv, _FW_UNDER_SURVEY) == true) { 584 if (check_fwstate(pmlmepriv, _FW_UNDER_SURVEY) == true) {
585 del_timer_sync(&pmlmepriv->scan_to_timer); 585 del_timer(&pmlmepriv->scan_to_timer);
586 586
587 _clr_fwstate_(pmlmepriv, _FW_UNDER_SURVEY); 587 _clr_fwstate_(pmlmepriv, _FW_UNDER_SURVEY);
588 } 588 }
@@ -696,7 +696,7 @@ void r8712_ind_disconnect(struct _adapter *padapter)
696 } 696 }
697 if (padapter->pwrctrlpriv.pwr_mode != 697 if (padapter->pwrctrlpriv.pwr_mode !=
698 padapter->registrypriv.power_mgnt) { 698 padapter->registrypriv.power_mgnt) {
699 del_timer_sync(&pmlmepriv->dhcp_timer); 699 del_timer(&pmlmepriv->dhcp_timer);
700 r8712_set_ps_mode(padapter, padapter->registrypriv.power_mgnt, 700 r8712_set_ps_mode(padapter, padapter->registrypriv.power_mgnt,
701 padapter->registrypriv.smart_ps); 701 padapter->registrypriv.smart_ps);
702 } 702 }
@@ -910,7 +910,7 @@ void r8712_joinbss_event_callback(struct _adapter *adapter, u8 *pbuf)
910 if (check_fwstate(pmlmepriv, WIFI_STATION_STATE) 910 if (check_fwstate(pmlmepriv, WIFI_STATION_STATE)
911 == true) 911 == true)
912 r8712_indicate_connect(adapter); 912 r8712_indicate_connect(adapter);
913 del_timer_sync(&pmlmepriv->assoc_timer); 913 del_timer(&pmlmepriv->assoc_timer);
914 } else 914 } else
915 goto ignore_joinbss_callback; 915 goto ignore_joinbss_callback;
916 } else { 916 } else {
diff --git a/drivers/staging/rtl8712/rtl871x_pwrctrl.c b/drivers/staging/rtl8712/rtl871x_pwrctrl.c
index aaa584435c87..9bc04f474d18 100644
--- a/drivers/staging/rtl8712/rtl871x_pwrctrl.c
+++ b/drivers/staging/rtl8712/rtl871x_pwrctrl.c
@@ -103,7 +103,7 @@ void r8712_cpwm_int_hdl(struct _adapter *padapter,
103 103
104 if (pwrpriv->cpwm_tog == ((preportpwrstate->state) & 0x80)) 104 if (pwrpriv->cpwm_tog == ((preportpwrstate->state) & 0x80))
105 return; 105 return;
106 del_timer_sync(&padapter->pwrctrlpriv.rpwm_check_timer); 106 del_timer(&padapter->pwrctrlpriv.rpwm_check_timer);
107 _enter_pwrlock(&pwrpriv->lock); 107 _enter_pwrlock(&pwrpriv->lock);
108 pwrpriv->cpwm = (preportpwrstate->state) & 0xf; 108 pwrpriv->cpwm = (preportpwrstate->state) & 0xf;
109 if (pwrpriv->cpwm >= PS_STATE_S2) { 109 if (pwrpriv->cpwm >= PS_STATE_S2) {
diff --git a/drivers/staging/rtl8712/rtl871x_sta_mgt.c b/drivers/staging/rtl8712/rtl871x_sta_mgt.c
index 7bb96c47f188..a9b93d0f6f56 100644
--- a/drivers/staging/rtl8712/rtl871x_sta_mgt.c
+++ b/drivers/staging/rtl8712/rtl871x_sta_mgt.c
@@ -198,7 +198,7 @@ void r8712_free_stainfo(struct _adapter *padapter, struct sta_info *psta)
198 * cancel reordering_ctrl_timer */ 198 * cancel reordering_ctrl_timer */
199 for (i = 0; i < 16; i++) { 199 for (i = 0; i < 16; i++) {
200 preorder_ctrl = &psta->recvreorder_ctrl[i]; 200 preorder_ctrl = &psta->recvreorder_ctrl[i];
201 del_timer_sync(&preorder_ctrl->reordering_ctrl_timer); 201 del_timer(&preorder_ctrl->reordering_ctrl_timer);
202 } 202 }
203 spin_lock(&(pfree_sta_queue->lock)); 203 spin_lock(&(pfree_sta_queue->lock));
204 /* insert into free_sta_queue; 20061114 */ 204 /* insert into free_sta_queue; 20061114 */
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index cc57a3a6b02b..396344cb011f 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -162,6 +162,17 @@ static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
162 return put_user(x, ptr); 162 return put_user(x, ptr);
163} 163}
164 164
165static inline int tty_copy_to_user(struct tty_struct *tty,
166 void __user *to,
167 const void *from,
168 unsigned long n)
169{
170 struct n_tty_data *ldata = tty->disc_data;
171
172 tty_audit_add_data(tty, to, n, ldata->icanon);
173 return copy_to_user(to, from, n);
174}
175
165/** 176/**
166 * n_tty_kick_worker - start input worker (if required) 177 * n_tty_kick_worker - start input worker (if required)
167 * @tty: terminal 178 * @tty: terminal
@@ -2070,8 +2081,8 @@ static int canon_copy_from_read_buf(struct tty_struct *tty,
2070 2081
2071 size = N_TTY_BUF_SIZE - tail; 2082 size = N_TTY_BUF_SIZE - tail;
2072 n = eol - tail; 2083 n = eol - tail;
2073 if (n > 4096) 2084 if (n > N_TTY_BUF_SIZE)
2074 n += 4096; 2085 n += N_TTY_BUF_SIZE;
2075 n += found; 2086 n += found;
2076 c = n; 2087 c = n;
2077 2088
@@ -2084,12 +2095,12 @@ static int canon_copy_from_read_buf(struct tty_struct *tty,
2084 __func__, eol, found, n, c, size, more); 2095 __func__, eol, found, n, c, size, more);
2085 2096
2086 if (n > size) { 2097 if (n > size) {
2087 ret = copy_to_user(*b, read_buf_addr(ldata, tail), size); 2098 ret = tty_copy_to_user(tty, *b, read_buf_addr(ldata, tail), size);
2088 if (ret) 2099 if (ret)
2089 return -EFAULT; 2100 return -EFAULT;
2090 ret = copy_to_user(*b + size, ldata->read_buf, n - size); 2101 ret = tty_copy_to_user(tty, *b + size, ldata->read_buf, n - size);
2091 } else 2102 } else
2092 ret = copy_to_user(*b, read_buf_addr(ldata, tail), n); 2103 ret = tty_copy_to_user(tty, *b, read_buf_addr(ldata, tail), n);
2093 2104
2094 if (ret) 2105 if (ret)
2095 return -EFAULT; 2106 return -EFAULT;
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index 9289999cb7c6..dce1a23706e8 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -562,12 +562,36 @@ static irqreturn_t omap_wake_irq(int irq, void *dev_id)
562 return IRQ_NONE; 562 return IRQ_NONE;
563} 563}
564 564
565#ifdef CONFIG_SERIAL_8250_DMA
566static int omap_8250_dma_handle_irq(struct uart_port *port);
567#endif
568
569static irqreturn_t omap8250_irq(int irq, void *dev_id)
570{
571 struct uart_port *port = dev_id;
572 struct uart_8250_port *up = up_to_u8250p(port);
573 unsigned int iir;
574 int ret;
575
576#ifdef CONFIG_SERIAL_8250_DMA
577 if (up->dma) {
578 ret = omap_8250_dma_handle_irq(port);
579 return IRQ_RETVAL(ret);
580 }
581#endif
582
583 serial8250_rpm_get(up);
584 iir = serial_port_in(port, UART_IIR);
585 ret = serial8250_handle_irq(port, iir);
586 serial8250_rpm_put(up);
587
588 return IRQ_RETVAL(ret);
589}
590
565static int omap_8250_startup(struct uart_port *port) 591static int omap_8250_startup(struct uart_port *port)
566{ 592{
567 struct uart_8250_port *up = 593 struct uart_8250_port *up = up_to_u8250p(port);
568 container_of(port, struct uart_8250_port, port);
569 struct omap8250_priv *priv = port->private_data; 594 struct omap8250_priv *priv = port->private_data;
570
571 int ret; 595 int ret;
572 596
573 if (priv->wakeirq) { 597 if (priv->wakeirq) {
@@ -580,10 +604,31 @@ static int omap_8250_startup(struct uart_port *port)
580 604
581 pm_runtime_get_sync(port->dev); 605 pm_runtime_get_sync(port->dev);
582 606
583 ret = serial8250_do_startup(port); 607 up->mcr = 0;
584 if (ret) 608 serial_out(up, UART_FCR, UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
609
610 serial_out(up, UART_LCR, UART_LCR_WLEN8);
611
612 up->lsr_saved_flags = 0;
613 up->msr_saved_flags = 0;
614
615 if (up->dma) {
616 ret = serial8250_request_dma(up);
617 if (ret) {
618 dev_warn_ratelimited(port->dev,
619 "failed to request DMA\n");
620 up->dma = NULL;
621 }
622 }
623
624 ret = request_irq(port->irq, omap8250_irq, IRQF_SHARED,
625 dev_name(port->dev), port);
626 if (ret < 0)
585 goto err; 627 goto err;
586 628
629 up->ier = UART_IER_RLSI | UART_IER_RDI;
630 serial_out(up, UART_IER, up->ier);
631
587#ifdef CONFIG_PM 632#ifdef CONFIG_PM
588 up->capabilities |= UART_CAP_RPM; 633 up->capabilities |= UART_CAP_RPM;
589#endif 634#endif
@@ -610,8 +655,7 @@ err:
610 655
611static void omap_8250_shutdown(struct uart_port *port) 656static void omap_8250_shutdown(struct uart_port *port)
612{ 657{
613 struct uart_8250_port *up = 658 struct uart_8250_port *up = up_to_u8250p(port);
614 container_of(port, struct uart_8250_port, port);
615 struct omap8250_priv *priv = port->private_data; 659 struct omap8250_priv *priv = port->private_data;
616 660
617 flush_work(&priv->qos_work); 661 flush_work(&priv->qos_work);
@@ -621,11 +665,24 @@ static void omap_8250_shutdown(struct uart_port *port)
621 pm_runtime_get_sync(port->dev); 665 pm_runtime_get_sync(port->dev);
622 666
623 serial_out(up, UART_OMAP_WER, 0); 667 serial_out(up, UART_OMAP_WER, 0);
624 serial8250_do_shutdown(port); 668
669 up->ier = 0;
670 serial_out(up, UART_IER, 0);
671
672 if (up->dma)
673 serial8250_release_dma(up);
674
675 /*
676 * Disable break condition and FIFOs
677 */
678 if (up->lcr & UART_LCR_SBC)
679 serial_out(up, UART_LCR, up->lcr & ~UART_LCR_SBC);
680 serial_out(up, UART_FCR, UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
625 681
626 pm_runtime_mark_last_busy(port->dev); 682 pm_runtime_mark_last_busy(port->dev);
627 pm_runtime_put_autosuspend(port->dev); 683 pm_runtime_put_autosuspend(port->dev);
628 684
685 free_irq(port->irq, port);
629 if (priv->wakeirq) 686 if (priv->wakeirq)
630 free_irq(priv->wakeirq, port); 687 free_irq(priv->wakeirq, port);
631} 688}
@@ -974,6 +1031,13 @@ static inline int omap_8250_rx_dma(struct uart_8250_port *p, unsigned int iir)
974} 1031}
975#endif 1032#endif
976 1033
1034static int omap8250_no_handle_irq(struct uart_port *port)
1035{
1036 /* IRQ has not been requested but handling irq? */
1037 WARN_ONCE(1, "Unexpected irq handling before port startup\n");
1038 return 0;
1039}
1040
977static int omap8250_probe(struct platform_device *pdev) 1041static int omap8250_probe(struct platform_device *pdev)
978{ 1042{
979 struct resource *regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1043 struct resource *regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1075,6 +1139,7 @@ static int omap8250_probe(struct platform_device *pdev)
1075 pm_runtime_get_sync(&pdev->dev); 1139 pm_runtime_get_sync(&pdev->dev);
1076 1140
1077 omap_serial_fill_features_erratas(&up, priv); 1141 omap_serial_fill_features_erratas(&up, priv);
1142 up.port.handle_irq = omap8250_no_handle_irq;
1078#ifdef CONFIG_SERIAL_8250_DMA 1143#ifdef CONFIG_SERIAL_8250_DMA
1079 if (pdev->dev.of_node) { 1144 if (pdev->dev.of_node) {
1080 /* 1145 /*
@@ -1088,7 +1153,6 @@ static int omap8250_probe(struct platform_device *pdev)
1088 ret = of_property_count_strings(pdev->dev.of_node, "dma-names"); 1153 ret = of_property_count_strings(pdev->dev.of_node, "dma-names");
1089 if (ret == 2) { 1154 if (ret == 2) {
1090 up.dma = &priv->omap8250_dma; 1155 up.dma = &priv->omap8250_dma;
1091 up.port.handle_irq = omap_8250_dma_handle_irq;
1092 priv->omap8250_dma.fn = the_no_dma_filter_fn; 1156 priv->omap8250_dma.fn = the_no_dma_filter_fn;
1093 priv->omap8250_dma.tx_dma = omap_8250_tx_dma; 1157 priv->omap8250_dma.tx_dma = omap_8250_tx_dma;
1094 priv->omap8250_dma.rx_dma = omap_8250_rx_dma; 1158 priv->omap8250_dma.rx_dma = omap_8250_rx_dma;
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 6f5a0720a8c8..763eb20fe321 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -1249,20 +1249,19 @@ __acquires(&uap->port.lock)
1249 1249
1250/* 1250/*
1251 * Transmit a character 1251 * Transmit a character
1252 * There must be at least one free entry in the TX FIFO to accept the char.
1253 * 1252 *
1254 * Returns true if the FIFO might have space in it afterwards; 1253 * Returns true if the character was successfully queued to the FIFO.
1255 * returns false if the FIFO definitely became full. 1254 * Returns false otherwise.
1256 */ 1255 */
1257static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c) 1256static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c)
1258{ 1257{
1258 if (readw(uap->port.membase + UART01x_FR) & UART01x_FR_TXFF)
1259 return false; /* unable to transmit character */
1260
1259 writew(c, uap->port.membase + UART01x_DR); 1261 writew(c, uap->port.membase + UART01x_DR);
1260 uap->port.icount.tx++; 1262 uap->port.icount.tx++;
1261 1263
1262 if (likely(uap->tx_irq_seen > 1)) 1264 return true;
1263 return true;
1264
1265 return !(readw(uap->port.membase + UART01x_FR) & UART01x_FR_TXFF);
1266} 1265}
1267 1266
1268static bool pl011_tx_chars(struct uart_amba_port *uap) 1267static bool pl011_tx_chars(struct uart_amba_port *uap)
@@ -1296,7 +1295,8 @@ static bool pl011_tx_chars(struct uart_amba_port *uap)
1296 return false; 1295 return false;
1297 1296
1298 if (uap->port.x_char) { 1297 if (uap->port.x_char) {
1299 pl011_tx_char(uap, uap->port.x_char); 1298 if (!pl011_tx_char(uap, uap->port.x_char))
1299 goto done;
1300 uap->port.x_char = 0; 1300 uap->port.x_char = 0;
1301 --count; 1301 --count;
1302 } 1302 }
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index c8cfa0637128..88250395b0ce 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -911,6 +911,14 @@ static void dma_rx_callback(void *data)
911 911
912 status = dmaengine_tx_status(chan, (dma_cookie_t)0, &state); 912 status = dmaengine_tx_status(chan, (dma_cookie_t)0, &state);
913 count = RX_BUF_SIZE - state.residue; 913 count = RX_BUF_SIZE - state.residue;
914
915 if (readl(sport->port.membase + USR2) & USR2_IDLE) {
916 /* In condition [3] the SDMA counted up too early */
917 count--;
918
919 writel(USR2_IDLE, sport->port.membase + USR2);
920 }
921
914 dev_dbg(sport->port.dev, "We get %d bytes.\n", count); 922 dev_dbg(sport->port.dev, "We get %d bytes.\n", count);
915 923
916 if (count) { 924 if (count) {
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index fdab715a0631..c0eafa6fd403 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -339,7 +339,7 @@
339#define DWC3_DGCMD_SET_ENDPOINT_NRDY 0x0c 339#define DWC3_DGCMD_SET_ENDPOINT_NRDY 0x0c
340#define DWC3_DGCMD_RUN_SOC_BUS_LOOPBACK 0x10 340#define DWC3_DGCMD_RUN_SOC_BUS_LOOPBACK 0x10
341 341
342#define DWC3_DGCMD_STATUS(n) (((n) >> 15) & 1) 342#define DWC3_DGCMD_STATUS(n) (((n) >> 12) & 0x0F)
343#define DWC3_DGCMD_CMDACT (1 << 10) 343#define DWC3_DGCMD_CMDACT (1 << 10)
344#define DWC3_DGCMD_CMDIOC (1 << 8) 344#define DWC3_DGCMD_CMDIOC (1 << 8)
345 345
@@ -355,7 +355,7 @@
355#define DWC3_DEPCMD_PARAM_SHIFT 16 355#define DWC3_DEPCMD_PARAM_SHIFT 16
356#define DWC3_DEPCMD_PARAM(x) ((x) << DWC3_DEPCMD_PARAM_SHIFT) 356#define DWC3_DEPCMD_PARAM(x) ((x) << DWC3_DEPCMD_PARAM_SHIFT)
357#define DWC3_DEPCMD_GET_RSC_IDX(x) (((x) >> DWC3_DEPCMD_PARAM_SHIFT) & 0x7f) 357#define DWC3_DEPCMD_GET_RSC_IDX(x) (((x) >> DWC3_DEPCMD_PARAM_SHIFT) & 0x7f)
358#define DWC3_DEPCMD_STATUS(x) (((x) >> 15) & 1) 358#define DWC3_DEPCMD_STATUS(x) (((x) >> 12) & 0x0F)
359#define DWC3_DEPCMD_HIPRI_FORCERM (1 << 11) 359#define DWC3_DEPCMD_HIPRI_FORCERM (1 << 11)
360#define DWC3_DEPCMD_CMDACT (1 << 10) 360#define DWC3_DEPCMD_CMDACT (1 << 10)
361#define DWC3_DEPCMD_CMDIOC (1 << 8) 361#define DWC3_DEPCMD_CMDIOC (1 << 8)
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 6bdb57069044..3507f880eb74 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -315,7 +315,6 @@ static ssize_t ffs_ep0_write(struct file *file, const char __user *buf,
315 return ret; 315 return ret;
316 } 316 }
317 317
318 set_bit(FFS_FL_CALL_CLOSED_CALLBACK, &ffs->flags);
319 return len; 318 return len;
320 } 319 }
321 break; 320 break;
@@ -847,7 +846,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
847 ret = ep->status; 846 ret = ep->status;
848 if (io_data->read && ret > 0) { 847 if (io_data->read && ret > 0) {
849 ret = copy_to_iter(data, ret, &io_data->data); 848 ret = copy_to_iter(data, ret, &io_data->data);
850 if (unlikely(iov_iter_count(&io_data->data))) 849 if (!ret)
851 ret = -EFAULT; 850 ret = -EFAULT;
852 } 851 }
853 } 852 }
@@ -1463,8 +1462,7 @@ static void ffs_data_clear(struct ffs_data *ffs)
1463{ 1462{
1464 ENTER(); 1463 ENTER();
1465 1464
1466 if (test_and_clear_bit(FFS_FL_CALL_CLOSED_CALLBACK, &ffs->flags)) 1465 ffs_closed(ffs);
1467 ffs_closed(ffs);
1468 1466
1469 BUG_ON(ffs->gadget); 1467 BUG_ON(ffs->gadget);
1470 1468
@@ -3422,9 +3420,13 @@ static int ffs_ready(struct ffs_data *ffs)
3422 ffs_obj->desc_ready = true; 3420 ffs_obj->desc_ready = true;
3423 ffs_obj->ffs_data = ffs; 3421 ffs_obj->ffs_data = ffs;
3424 3422
3425 if (ffs_obj->ffs_ready_callback) 3423 if (ffs_obj->ffs_ready_callback) {
3426 ret = ffs_obj->ffs_ready_callback(ffs); 3424 ret = ffs_obj->ffs_ready_callback(ffs);
3425 if (ret)
3426 goto done;
3427 }
3427 3428
3429 set_bit(FFS_FL_CALL_CLOSED_CALLBACK, &ffs->flags);
3428done: 3430done:
3429 ffs_dev_unlock(); 3431 ffs_dev_unlock();
3430 return ret; 3432 return ret;
@@ -3443,7 +3445,8 @@ static void ffs_closed(struct ffs_data *ffs)
3443 3445
3444 ffs_obj->desc_ready = false; 3446 ffs_obj->desc_ready = false;
3445 3447
3446 if (ffs_obj->ffs_closed_callback) 3448 if (test_and_clear_bit(FFS_FL_CALL_CLOSED_CALLBACK, &ffs->flags) &&
3449 ffs_obj->ffs_closed_callback)
3447 ffs_obj->ffs_closed_callback(ffs); 3450 ffs_obj->ffs_closed_callback(ffs);
3448 3451
3449 if (!ffs_obj->opts || ffs_obj->opts->no_configfs 3452 if (!ffs_obj->opts || ffs_obj->opts->no_configfs
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 259b656c0b3e..6316aa5b1c49 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -973,7 +973,13 @@ static ssize_t f_midi_opts_id_show(struct f_midi_opts *opts, char *page)
973 int result; 973 int result;
974 974
975 mutex_lock(&opts->lock); 975 mutex_lock(&opts->lock);
976 result = strlcpy(page, opts->id, PAGE_SIZE); 976 if (opts->id) {
977 result = strlcpy(page, opts->id, PAGE_SIZE);
978 } else {
979 page[0] = 0;
980 result = 0;
981 }
982
977 mutex_unlock(&opts->lock); 983 mutex_unlock(&opts->lock);
978 984
979 return result; 985 return result;
diff --git a/drivers/usb/gadget/function/f_uac1.c b/drivers/usb/gadget/function/f_uac1.c
index 9719abfb6145..7856b3394494 100644
--- a/drivers/usb/gadget/function/f_uac1.c
+++ b/drivers/usb/gadget/function/f_uac1.c
@@ -588,7 +588,10 @@ static int f_audio_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
588 588
589 if (intf == 1) { 589 if (intf == 1) {
590 if (alt == 1) { 590 if (alt == 1) {
591 config_ep_by_speed(cdev->gadget, f, out_ep); 591 err = config_ep_by_speed(cdev->gadget, f, out_ep);
592 if (err)
593 return err;
594
592 usb_ep_enable(out_ep); 595 usb_ep_enable(out_ep);
593 out_ep->driver_data = audio; 596 out_ep->driver_data = audio;
594 audio->copy_buf = f_audio_buffer_alloc(audio_buf_size); 597 audio->copy_buf = f_audio_buffer_alloc(audio_buf_size);
diff --git a/drivers/usb/gadget/legacy/g_ffs.c b/drivers/usb/gadget/legacy/g_ffs.c
index 7b9ef7e257d2..e821931c965c 100644
--- a/drivers/usb/gadget/legacy/g_ffs.c
+++ b/drivers/usb/gadget/legacy/g_ffs.c
@@ -304,8 +304,10 @@ static int functionfs_ready_callback(struct ffs_data *ffs)
304 gfs_registered = true; 304 gfs_registered = true;
305 305
306 ret = usb_composite_probe(&gfs_driver); 306 ret = usb_composite_probe(&gfs_driver);
307 if (unlikely(ret < 0)) 307 if (unlikely(ret < 0)) {
308 ++missing_funcs;
308 gfs_registered = false; 309 gfs_registered = false;
310 }
309 311
310 return ret; 312 return ret;
311} 313}
diff --git a/drivers/usb/gadget/udc/s3c2410_udc.c b/drivers/usb/gadget/udc/s3c2410_udc.c
index b808951491cc..99fd9a5667df 100644
--- a/drivers/usb/gadget/udc/s3c2410_udc.c
+++ b/drivers/usb/gadget/udc/s3c2410_udc.c
@@ -1487,7 +1487,7 @@ static int s3c2410_udc_pullup(struct usb_gadget *gadget, int is_on)
1487 1487
1488 dprintk(DEBUG_NORMAL, "%s()\n", __func__); 1488 dprintk(DEBUG_NORMAL, "%s()\n", __func__);
1489 1489
1490 s3c2410_udc_set_pullup(udc, is_on ? 0 : 1); 1490 s3c2410_udc_set_pullup(udc, is_on);
1491 return 0; 1491 return 0;
1492} 1492}
1493 1493
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index ec8ac1674854..36bf089b708f 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3682,18 +3682,21 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev)
3682{ 3682{
3683 struct xhci_hcd *xhci = hcd_to_xhci(hcd); 3683 struct xhci_hcd *xhci = hcd_to_xhci(hcd);
3684 unsigned long flags; 3684 unsigned long flags;
3685 int ret; 3685 int ret, slot_id;
3686 struct xhci_command *command; 3686 struct xhci_command *command;
3687 3687
3688 command = xhci_alloc_command(xhci, false, false, GFP_KERNEL); 3688 command = xhci_alloc_command(xhci, false, false, GFP_KERNEL);
3689 if (!command) 3689 if (!command)
3690 return 0; 3690 return 0;
3691 3691
3692 /* xhci->slot_id and xhci->addr_dev are not thread-safe */
3693 mutex_lock(&xhci->mutex);
3692 spin_lock_irqsave(&xhci->lock, flags); 3694 spin_lock_irqsave(&xhci->lock, flags);
3693 command->completion = &xhci->addr_dev; 3695 command->completion = &xhci->addr_dev;
3694 ret = xhci_queue_slot_control(xhci, command, TRB_ENABLE_SLOT, 0); 3696 ret = xhci_queue_slot_control(xhci, command, TRB_ENABLE_SLOT, 0);
3695 if (ret) { 3697 if (ret) {
3696 spin_unlock_irqrestore(&xhci->lock, flags); 3698 spin_unlock_irqrestore(&xhci->lock, flags);
3699 mutex_unlock(&xhci->mutex);
3697 xhci_dbg(xhci, "FIXME: allocate a command ring segment\n"); 3700 xhci_dbg(xhci, "FIXME: allocate a command ring segment\n");
3698 kfree(command); 3701 kfree(command);
3699 return 0; 3702 return 0;
@@ -3702,8 +3705,10 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev)
3702 spin_unlock_irqrestore(&xhci->lock, flags); 3705 spin_unlock_irqrestore(&xhci->lock, flags);
3703 3706
3704 wait_for_completion(command->completion); 3707 wait_for_completion(command->completion);
3708 slot_id = xhci->slot_id;
3709 mutex_unlock(&xhci->mutex);
3705 3710
3706 if (!xhci->slot_id || command->status != COMP_SUCCESS) { 3711 if (!slot_id || command->status != COMP_SUCCESS) {
3707 xhci_err(xhci, "Error while assigning device slot ID\n"); 3712 xhci_err(xhci, "Error while assigning device slot ID\n");
3708 xhci_err(xhci, "Max number of devices this xHCI host supports is %u.\n", 3713 xhci_err(xhci, "Max number of devices this xHCI host supports is %u.\n",
3709 HCS_MAX_SLOTS( 3714 HCS_MAX_SLOTS(
@@ -3728,11 +3733,11 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev)
3728 * xhci_discover_or_reset_device(), which may be called as part of 3733 * xhci_discover_or_reset_device(), which may be called as part of
3729 * mass storage driver error handling. 3734 * mass storage driver error handling.
3730 */ 3735 */
3731 if (!xhci_alloc_virt_device(xhci, xhci->slot_id, udev, GFP_NOIO)) { 3736 if (!xhci_alloc_virt_device(xhci, slot_id, udev, GFP_NOIO)) {
3732 xhci_warn(xhci, "Could not allocate xHCI USB device data structures\n"); 3737 xhci_warn(xhci, "Could not allocate xHCI USB device data structures\n");
3733 goto disable_slot; 3738 goto disable_slot;
3734 } 3739 }
3735 udev->slot_id = xhci->slot_id; 3740 udev->slot_id = slot_id;
3736 3741
3737#ifndef CONFIG_USB_DEFAULT_PERSIST 3742#ifndef CONFIG_USB_DEFAULT_PERSIST
3738 /* 3743 /*
@@ -3778,12 +3783,15 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
3778 struct xhci_slot_ctx *slot_ctx; 3783 struct xhci_slot_ctx *slot_ctx;
3779 struct xhci_input_control_ctx *ctrl_ctx; 3784 struct xhci_input_control_ctx *ctrl_ctx;
3780 u64 temp_64; 3785 u64 temp_64;
3781 struct xhci_command *command; 3786 struct xhci_command *command = NULL;
3787
3788 mutex_lock(&xhci->mutex);
3782 3789
3783 if (!udev->slot_id) { 3790 if (!udev->slot_id) {
3784 xhci_dbg_trace(xhci, trace_xhci_dbg_address, 3791 xhci_dbg_trace(xhci, trace_xhci_dbg_address,
3785 "Bad Slot ID %d", udev->slot_id); 3792 "Bad Slot ID %d", udev->slot_id);
3786 return -EINVAL; 3793 ret = -EINVAL;
3794 goto out;
3787 } 3795 }
3788 3796
3789 virt_dev = xhci->devs[udev->slot_id]; 3797 virt_dev = xhci->devs[udev->slot_id];
@@ -3796,7 +3804,8 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
3796 */ 3804 */
3797 xhci_warn(xhci, "Virt dev invalid for slot_id 0x%x!\n", 3805 xhci_warn(xhci, "Virt dev invalid for slot_id 0x%x!\n",
3798 udev->slot_id); 3806 udev->slot_id);
3799 return -EINVAL; 3807 ret = -EINVAL;
3808 goto out;
3800 } 3809 }
3801 3810
3802 if (setup == SETUP_CONTEXT_ONLY) { 3811 if (setup == SETUP_CONTEXT_ONLY) {
@@ -3804,13 +3813,15 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
3804 if (GET_SLOT_STATE(le32_to_cpu(slot_ctx->dev_state)) == 3813 if (GET_SLOT_STATE(le32_to_cpu(slot_ctx->dev_state)) ==
3805 SLOT_STATE_DEFAULT) { 3814 SLOT_STATE_DEFAULT) {
3806 xhci_dbg(xhci, "Slot already in default state\n"); 3815 xhci_dbg(xhci, "Slot already in default state\n");
3807 return 0; 3816 goto out;
3808 } 3817 }
3809 } 3818 }
3810 3819
3811 command = xhci_alloc_command(xhci, false, false, GFP_KERNEL); 3820 command = xhci_alloc_command(xhci, false, false, GFP_KERNEL);
3812 if (!command) 3821 if (!command) {
3813 return -ENOMEM; 3822 ret = -ENOMEM;
3823 goto out;
3824 }
3814 3825
3815 command->in_ctx = virt_dev->in_ctx; 3826 command->in_ctx = virt_dev->in_ctx;
3816 command->completion = &xhci->addr_dev; 3827 command->completion = &xhci->addr_dev;
@@ -3820,8 +3831,8 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
3820 if (!ctrl_ctx) { 3831 if (!ctrl_ctx) {
3821 xhci_warn(xhci, "%s: Could not get input context, bad type.\n", 3832 xhci_warn(xhci, "%s: Could not get input context, bad type.\n",
3822 __func__); 3833 __func__);
3823 kfree(command); 3834 ret = -EINVAL;
3824 return -EINVAL; 3835 goto out;
3825 } 3836 }
3826 /* 3837 /*
3827 * If this is the first Set Address since device plug-in or 3838 * If this is the first Set Address since device plug-in or
@@ -3848,8 +3859,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
3848 spin_unlock_irqrestore(&xhci->lock, flags); 3859 spin_unlock_irqrestore(&xhci->lock, flags);
3849 xhci_dbg_trace(xhci, trace_xhci_dbg_address, 3860 xhci_dbg_trace(xhci, trace_xhci_dbg_address,
3850 "FIXME: allocate a command ring segment"); 3861 "FIXME: allocate a command ring segment");
3851 kfree(command); 3862 goto out;
3852 return ret;
3853 } 3863 }
3854 xhci_ring_cmd_db(xhci); 3864 xhci_ring_cmd_db(xhci);
3855 spin_unlock_irqrestore(&xhci->lock, flags); 3865 spin_unlock_irqrestore(&xhci->lock, flags);
@@ -3896,10 +3906,8 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
3896 ret = -EINVAL; 3906 ret = -EINVAL;
3897 break; 3907 break;
3898 } 3908 }
3899 if (ret) { 3909 if (ret)
3900 kfree(command); 3910 goto out;
3901 return ret;
3902 }
3903 temp_64 = xhci_read_64(xhci, &xhci->op_regs->dcbaa_ptr); 3911 temp_64 = xhci_read_64(xhci, &xhci->op_regs->dcbaa_ptr);
3904 xhci_dbg_trace(xhci, trace_xhci_dbg_address, 3912 xhci_dbg_trace(xhci, trace_xhci_dbg_address,
3905 "Op regs DCBAA ptr = %#016llx", temp_64); 3913 "Op regs DCBAA ptr = %#016llx", temp_64);
@@ -3932,8 +3940,10 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
3932 xhci_dbg_trace(xhci, trace_xhci_dbg_address, 3940 xhci_dbg_trace(xhci, trace_xhci_dbg_address,
3933 "Internal device address = %d", 3941 "Internal device address = %d",
3934 le32_to_cpu(slot_ctx->dev_state) & DEV_ADDR_MASK); 3942 le32_to_cpu(slot_ctx->dev_state) & DEV_ADDR_MASK);
3943out:
3944 mutex_unlock(&xhci->mutex);
3935 kfree(command); 3945 kfree(command);
3936 return 0; 3946 return ret;
3937} 3947}
3938 3948
3939int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev) 3949int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev)
@@ -4855,6 +4865,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks)
4855 return 0; 4865 return 0;
4856 } 4866 }
4857 4867
4868 mutex_init(&xhci->mutex);
4858 xhci->cap_regs = hcd->regs; 4869 xhci->cap_regs = hcd->regs;
4859 xhci->op_regs = hcd->regs + 4870 xhci->op_regs = hcd->regs +
4860 HC_LENGTH(readl(&xhci->cap_regs->hc_capbase)); 4871 HC_LENGTH(readl(&xhci->cap_regs->hc_capbase));
@@ -5011,4 +5022,12 @@ static int __init xhci_hcd_init(void)
5011 BUILD_BUG_ON(sizeof(struct xhci_run_regs) != (8+8*128)*32/8); 5022 BUILD_BUG_ON(sizeof(struct xhci_run_regs) != (8+8*128)*32/8);
5012 return 0; 5023 return 0;
5013} 5024}
5025
5026/*
5027 * If an init function is provided, an exit function must also be provided
5028 * to allow module unload.
5029 */
5030static void __exit xhci_hcd_fini(void) { }
5031
5014module_init(xhci_hcd_init); 5032module_init(xhci_hcd_init);
5033module_exit(xhci_hcd_fini);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index ea75e8ccd3c1..6977f8491fa7 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1497,6 +1497,8 @@ struct xhci_hcd {
1497 struct list_head lpm_failed_devs; 1497 struct list_head lpm_failed_devs;
1498 1498
1499 /* slot enabling and address device helpers */ 1499 /* slot enabling and address device helpers */
1500 /* these are not thread safe so use mutex */
1501 struct mutex mutex;
1500 struct completion addr_dev; 1502 struct completion addr_dev;
1501 int slot_id; 1503 int slot_id;
1502 /* For USB 3.0 LPM enable/disable. */ 1504 /* For USB 3.0 LPM enable/disable. */
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 3789b08ef67b..6dca3d794ced 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -2021,13 +2021,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
2021 if (musb->ops->quirks) 2021 if (musb->ops->quirks)
2022 musb->io.quirks = musb->ops->quirks; 2022 musb->io.quirks = musb->ops->quirks;
2023 2023
2024 /* At least tusb6010 has it's own offsets.. */ 2024 /* Most devices use indexed offset or flat offset */
2025 if (musb->ops->ep_offset)
2026 musb->io.ep_offset = musb->ops->ep_offset;
2027 if (musb->ops->ep_select)
2028 musb->io.ep_select = musb->ops->ep_select;
2029
2030 /* ..and some devices use indexed offset or flat offset */
2031 if (musb->io.quirks & MUSB_INDEXED_EP) { 2025 if (musb->io.quirks & MUSB_INDEXED_EP) {
2032 musb->io.ep_offset = musb_indexed_ep_offset; 2026 musb->io.ep_offset = musb_indexed_ep_offset;
2033 musb->io.ep_select = musb_indexed_ep_select; 2027 musb->io.ep_select = musb_indexed_ep_select;
@@ -2036,6 +2030,12 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
2036 musb->io.ep_select = musb_flat_ep_select; 2030 musb->io.ep_select = musb_flat_ep_select;
2037 } 2031 }
2038 2032
2033 /* At least tusb6010 has its own offsets */
2034 if (musb->ops->ep_offset)
2035 musb->io.ep_offset = musb->ops->ep_offset;
2036 if (musb->ops->ep_select)
2037 musb->io.ep_select = musb->ops->ep_select;
2038
2039 if (musb->ops->fifo_mode) 2039 if (musb->ops->fifo_mode)
2040 fifo_mode = musb->ops->fifo_mode; 2040 fifo_mode = musb->ops->fifo_mode;
2041 else 2041 else
diff --git a/drivers/usb/phy/phy-ab8500-usb.c b/drivers/usb/phy/phy-ab8500-usb.c
index 7225d526df04..03ab0c699f74 100644
--- a/drivers/usb/phy/phy-ab8500-usb.c
+++ b/drivers/usb/phy/phy-ab8500-usb.c
@@ -1179,7 +1179,7 @@ static int ab8500_usb_irq_setup(struct platform_device *pdev,
1179 } 1179 }
1180 err = devm_request_threaded_irq(&pdev->dev, irq, NULL, 1180 err = devm_request_threaded_irq(&pdev->dev, irq, NULL,
1181 ab8500_usb_link_status_irq, 1181 ab8500_usb_link_status_irq,
1182 IRQF_NO_SUSPEND | IRQF_SHARED, 1182 IRQF_NO_SUSPEND | IRQF_SHARED | IRQF_ONESHOT,
1183 "usb-link-status", ab); 1183 "usb-link-status", ab);
1184 if (err < 0) { 1184 if (err < 0) {
1185 dev_err(ab->dev, "request_irq failed for link status irq\n"); 1185 dev_err(ab->dev, "request_irq failed for link status irq\n");
@@ -1195,7 +1195,7 @@ static int ab8500_usb_irq_setup(struct platform_device *pdev,
1195 } 1195 }
1196 err = devm_request_threaded_irq(&pdev->dev, irq, NULL, 1196 err = devm_request_threaded_irq(&pdev->dev, irq, NULL,
1197 ab8500_usb_disconnect_irq, 1197 ab8500_usb_disconnect_irq,
1198 IRQF_NO_SUSPEND | IRQF_SHARED, 1198 IRQF_NO_SUSPEND | IRQF_SHARED | IRQF_ONESHOT,
1199 "usb-id-fall", ab); 1199 "usb-id-fall", ab);
1200 if (err < 0) { 1200 if (err < 0) {
1201 dev_err(ab->dev, "request_irq failed for ID fall irq\n"); 1201 dev_err(ab->dev, "request_irq failed for ID fall irq\n");
@@ -1211,7 +1211,7 @@ static int ab8500_usb_irq_setup(struct platform_device *pdev,
1211 } 1211 }
1212 err = devm_request_threaded_irq(&pdev->dev, irq, NULL, 1212 err = devm_request_threaded_irq(&pdev->dev, irq, NULL,
1213 ab8500_usb_disconnect_irq, 1213 ab8500_usb_disconnect_irq,
1214 IRQF_NO_SUSPEND | IRQF_SHARED, 1214 IRQF_NO_SUSPEND | IRQF_SHARED | IRQF_ONESHOT,
1215 "usb-vbus-fall", ab); 1215 "usb-vbus-fall", ab);
1216 if (err < 0) { 1216 if (err < 0) {
1217 dev_err(ab->dev, "request_irq failed for Vbus fall irq\n"); 1217 dev_err(ab->dev, "request_irq failed for Vbus fall irq\n");
diff --git a/drivers/usb/phy/phy-tahvo.c b/drivers/usb/phy/phy-tahvo.c
index 845f658276b1..2b28443d07b9 100644
--- a/drivers/usb/phy/phy-tahvo.c
+++ b/drivers/usb/phy/phy-tahvo.c
@@ -401,7 +401,8 @@ static int tahvo_usb_probe(struct platform_device *pdev)
401 dev_set_drvdata(&pdev->dev, tu); 401 dev_set_drvdata(&pdev->dev, tu);
402 402
403 tu->irq = platform_get_irq(pdev, 0); 403 tu->irq = platform_get_irq(pdev, 0);
404 ret = request_threaded_irq(tu->irq, NULL, tahvo_usb_vbus_interrupt, 0, 404 ret = request_threaded_irq(tu->irq, NULL, tahvo_usb_vbus_interrupt,
405 IRQF_ONESHOT,
405 "tahvo-vbus", tu); 406 "tahvo-vbus", tu);
406 if (ret) { 407 if (ret) {
407 dev_err(&pdev->dev, "could not register tahvo-vbus irq: %d\n", 408 dev_err(&pdev->dev, "could not register tahvo-vbus irq: %d\n",
diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c
index 8597cf9cfceb..c0f5c652d272 100644
--- a/drivers/usb/renesas_usbhs/fifo.c
+++ b/drivers/usb/renesas_usbhs/fifo.c
@@ -611,6 +611,8 @@ struct usbhs_pkt_handle usbhs_fifo_pio_push_handler = {
611static int usbhsf_prepare_pop(struct usbhs_pkt *pkt, int *is_done) 611static int usbhsf_prepare_pop(struct usbhs_pkt *pkt, int *is_done)
612{ 612{
613 struct usbhs_pipe *pipe = pkt->pipe; 613 struct usbhs_pipe *pipe = pkt->pipe;
614 struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe);
615 struct usbhs_fifo *fifo = usbhsf_get_cfifo(priv);
614 616
615 if (usbhs_pipe_is_busy(pipe)) 617 if (usbhs_pipe_is_busy(pipe))
616 return 0; 618 return 0;
@@ -624,6 +626,9 @@ static int usbhsf_prepare_pop(struct usbhs_pkt *pkt, int *is_done)
624 usbhs_pipe_data_sequence(pipe, pkt->sequence); 626 usbhs_pipe_data_sequence(pipe, pkt->sequence);
625 pkt->sequence = -1; /* -1 sequence will be ignored */ 627 pkt->sequence = -1; /* -1 sequence will be ignored */
626 628
629 if (usbhs_pipe_is_dcp(pipe))
630 usbhsf_fifo_clear(pipe, fifo);
631
627 usbhs_pipe_set_trans_count_if_bulk(pipe, pkt->length); 632 usbhs_pipe_set_trans_count_if_bulk(pipe, pkt->length);
628 usbhs_pipe_enable(pipe); 633 usbhs_pipe_enable(pipe);
629 usbhs_pipe_running(pipe, 1); 634 usbhs_pipe_running(pipe, 1);
@@ -673,7 +678,14 @@ static int usbhsf_pio_try_pop(struct usbhs_pkt *pkt, int *is_done)
673 *is_done = 1; 678 *is_done = 1;
674 usbhsf_rx_irq_ctrl(pipe, 0); 679 usbhsf_rx_irq_ctrl(pipe, 0);
675 usbhs_pipe_running(pipe, 0); 680 usbhs_pipe_running(pipe, 0);
676 usbhs_pipe_disable(pipe); /* disable pipe first */ 681 /*
682 * If function mode, since this controller is possible to enter
683 * Control Write status stage at this timing, this driver
684 * should not disable the pipe. If such a case happens, this
685 * controller is not able to complete the status stage.
686 */
687 if (!usbhs_mod_is_host(priv) && !usbhs_pipe_is_dcp(pipe))
688 usbhs_pipe_disable(pipe); /* disable pipe first */
677 } 689 }
678 690
679 /* 691 /*
@@ -1227,15 +1239,21 @@ static void usbhsf_dma_init_dt(struct device *dev, struct usbhs_fifo *fifo,
1227{ 1239{
1228 char name[16]; 1240 char name[16];
1229 1241
1230 snprintf(name, sizeof(name), "tx%d", channel); 1242 /*
1231 fifo->tx_chan = dma_request_slave_channel_reason(dev, name); 1243 * To avoid complex handing for DnFIFOs, the driver uses each
1232 if (IS_ERR(fifo->tx_chan)) 1244 * DnFIFO as TX or RX direction (not bi-direction).
1233 fifo->tx_chan = NULL; 1245 * So, the driver uses odd channels for TX, even channels for RX.
1234 1246 */
1235 snprintf(name, sizeof(name), "rx%d", channel); 1247 snprintf(name, sizeof(name), "ch%d", channel);
1236 fifo->rx_chan = dma_request_slave_channel_reason(dev, name); 1248 if (channel & 1) {
1237 if (IS_ERR(fifo->rx_chan)) 1249 fifo->tx_chan = dma_request_slave_channel_reason(dev, name);
1238 fifo->rx_chan = NULL; 1250 if (IS_ERR(fifo->tx_chan))
1251 fifo->tx_chan = NULL;
1252 } else {
1253 fifo->rx_chan = dma_request_slave_channel_reason(dev, name);
1254 if (IS_ERR(fifo->rx_chan))
1255 fifo->rx_chan = NULL;
1256 }
1239} 1257}
1240 1258
1241static void usbhsf_dma_init(struct usbhs_priv *priv, struct usbhs_fifo *fifo, 1259static void usbhsf_dma_init(struct usbhs_priv *priv, struct usbhs_fifo *fifo,
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 9031750e7404..ffd739e31bfc 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -128,6 +128,7 @@ static const struct usb_device_id id_table[] = {
128 { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */ 128 { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */
129 { USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */ 129 { USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */
130 { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */ 130 { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */
131 { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
131 { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ 132 { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
132 { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ 133 { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
133 { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ 134 { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 8eb68a31cab6..4c8b3b82103d 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -699,6 +699,7 @@ static const struct usb_device_id id_table_combined[] = {
699 { USB_DEVICE(XSENS_VID, XSENS_AWINDA_DONGLE_PID) }, 699 { USB_DEVICE(XSENS_VID, XSENS_AWINDA_DONGLE_PID) },
700 { USB_DEVICE(XSENS_VID, XSENS_AWINDA_STATION_PID) }, 700 { USB_DEVICE(XSENS_VID, XSENS_AWINDA_STATION_PID) },
701 { USB_DEVICE(XSENS_VID, XSENS_CONVERTER_PID) }, 701 { USB_DEVICE(XSENS_VID, XSENS_CONVERTER_PID) },
702 { USB_DEVICE(XSENS_VID, XSENS_MTDEVBOARD_PID) },
702 { USB_DEVICE(XSENS_VID, XSENS_MTW_PID) }, 703 { USB_DEVICE(XSENS_VID, XSENS_MTW_PID) },
703 { USB_DEVICE(FTDI_VID, FTDI_OMNI1509) }, 704 { USB_DEVICE(FTDI_VID, FTDI_OMNI1509) },
704 { USB_DEVICE(MOBILITY_VID, MOBILITY_USB_SERIAL_PID) }, 705 { USB_DEVICE(MOBILITY_VID, MOBILITY_USB_SERIAL_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 4e4f46f3c89c..792e054126de 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -155,6 +155,7 @@
155#define XSENS_AWINDA_STATION_PID 0x0101 155#define XSENS_AWINDA_STATION_PID 0x0101
156#define XSENS_AWINDA_DONGLE_PID 0x0102 156#define XSENS_AWINDA_DONGLE_PID 0x0102
157#define XSENS_MTW_PID 0x0200 /* Xsens MTw */ 157#define XSENS_MTW_PID 0x0200 /* Xsens MTw */
158#define XSENS_MTDEVBOARD_PID 0x0300 /* Motion Tracker Development Board */
158#define XSENS_CONVERTER_PID 0xD00D /* Xsens USB-serial converter */ 159#define XSENS_CONVERTER_PID 0xD00D /* Xsens USB-serial converter */
159 160
160/* Xsens devices using FTDI VID */ 161/* Xsens devices using FTDI VID */
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index e894eb278d83..eba1b7ac7294 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -423,6 +423,7 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
423 if (cpu == -1) 423 if (cpu == -1)
424 irq_set_affinity_hint(irq, NULL); 424 irq_set_affinity_hint(irq, NULL);
425 else { 425 else {
426 cpumask_clear(mask);
426 cpumask_set_cpu(cpu, mask); 427 cpumask_set_cpu(cpu, mask);
427 irq_set_affinity_hint(irq, mask); 428 irq_set_affinity_hint(irq, mask);
428 } 429 }
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index fb9ffcb43277..0923f2cf3c80 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -149,8 +149,6 @@ extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d);
149extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d); 149extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d);
150extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 150extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
151 struct inode *new_dir, struct dentry *new_dentry); 151 struct inode *new_dir, struct dentry *new_dentry);
152extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
153 void *p);
154extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses, 152extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
155 struct p9_fid *fid, 153 struct p9_fid *fid,
156 struct super_block *sb, int new); 154 struct super_block *sb, int new);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 703342e309f5..510040b04c96 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1224,100 +1224,43 @@ ino_t v9fs_qid2ino(struct p9_qid *qid)
1224} 1224}
1225 1225
1226/** 1226/**
1227 * v9fs_readlink - read a symlink's location (internal version) 1227 * v9fs_vfs_follow_link - follow a symlink path
1228 * @dentry: dentry for symlink 1228 * @dentry: dentry for symlink
1229 * @buffer: buffer to load symlink location into 1229 * @cookie: place to pass the data to put_link()
1230 * @buflen: length of buffer
1231 *
1232 */ 1230 */
1233 1231
1234static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) 1232static const char *v9fs_vfs_follow_link(struct dentry *dentry, void **cookie)
1235{ 1233{
1236 int retval; 1234 struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry);
1237 1235 struct p9_fid *fid = v9fs_fid_lookup(dentry);
1238 struct v9fs_session_info *v9ses;
1239 struct p9_fid *fid;
1240 struct p9_wstat *st; 1236 struct p9_wstat *st;
1237 char *res;
1238
1239 p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
1241 1240
1242 p9_debug(P9_DEBUG_VFS, " %pd\n", dentry);
1243 retval = -EPERM;
1244 v9ses = v9fs_dentry2v9ses(dentry);
1245 fid = v9fs_fid_lookup(dentry);
1246 if (IS_ERR(fid)) 1241 if (IS_ERR(fid))
1247 return PTR_ERR(fid); 1242 return ERR_CAST(fid);
1248 1243
1249 if (!v9fs_proto_dotu(v9ses)) 1244 if (!v9fs_proto_dotu(v9ses))
1250 return -EBADF; 1245 return ERR_PTR(-EBADF);
1251 1246
1252 st = p9_client_stat(fid); 1247 st = p9_client_stat(fid);
1253 if (IS_ERR(st)) 1248 if (IS_ERR(st))
1254 return PTR_ERR(st); 1249 return ERR_CAST(st);
1255 1250
1256 if (!(st->mode & P9_DMSYMLINK)) { 1251 if (!(st->mode & P9_DMSYMLINK)) {
1257 retval = -EINVAL; 1252 p9stat_free(st);
1258 goto done; 1253 kfree(st);
1254 return ERR_PTR(-EINVAL);
1259 } 1255 }
1256 res = st->extension;
1257 st->extension = NULL;
1258 if (strlen(res) >= PATH_MAX)
1259 res[PATH_MAX - 1] = '\0';
1260 1260
1261 /* copy extension buffer into buffer */
1262 retval = min(strlen(st->extension)+1, (size_t)buflen);
1263 memcpy(buffer, st->extension, retval);
1264
1265 p9_debug(P9_DEBUG_VFS, "%pd -> %s (%.*s)\n",
1266 dentry, st->extension, buflen, buffer);
1267
1268done:
1269 p9stat_free(st); 1261 p9stat_free(st);
1270 kfree(st); 1262 kfree(st);
1271 return retval; 1263 return *cookie = res;
1272}
1273
1274/**
1275 * v9fs_vfs_follow_link - follow a symlink path
1276 * @dentry: dentry for symlink
1277 * @nd: nameidata
1278 *
1279 */
1280
1281static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
1282{
1283 int len = 0;
1284 char *link = __getname();
1285
1286 p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
1287
1288 if (!link)
1289 link = ERR_PTR(-ENOMEM);
1290 else {
1291 len = v9fs_readlink(dentry, link, PATH_MAX);
1292
1293 if (len < 0) {
1294 __putname(link);
1295 link = ERR_PTR(len);
1296 } else
1297 link[min(len, PATH_MAX-1)] = 0;
1298 }
1299 nd_set_link(nd, link);
1300
1301 return NULL;
1302}
1303
1304/**
1305 * v9fs_vfs_put_link - release a symlink path
1306 * @dentry: dentry for symlink
1307 * @nd: nameidata
1308 * @p: unused
1309 *
1310 */
1311
1312void
1313v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1314{
1315 char *s = nd_get_link(nd);
1316
1317 p9_debug(P9_DEBUG_VFS, " %pd %s\n",
1318 dentry, IS_ERR(s) ? "<error>" : s);
1319 if (!IS_ERR(s))
1320 __putname(s);
1321} 1264}
1322 1265
1323/** 1266/**
@@ -1370,6 +1313,8 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1370 return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname); 1313 return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname);
1371} 1314}
1372 1315
1316#define U32_MAX_DIGITS 10
1317
1373/** 1318/**
1374 * v9fs_vfs_link - create a hardlink 1319 * v9fs_vfs_link - create a hardlink
1375 * @old_dentry: dentry for file to link to 1320 * @old_dentry: dentry for file to link to
@@ -1383,7 +1328,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1383 struct dentry *dentry) 1328 struct dentry *dentry)
1384{ 1329{
1385 int retval; 1330 int retval;
1386 char *name; 1331 char name[1 + U32_MAX_DIGITS + 2]; /* sign + number + \n + \0 */
1387 struct p9_fid *oldfid; 1332 struct p9_fid *oldfid;
1388 1333
1389 p9_debug(P9_DEBUG_VFS, " %lu,%pd,%pd\n", 1334 p9_debug(P9_DEBUG_VFS, " %lu,%pd,%pd\n",
@@ -1393,20 +1338,12 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1393 if (IS_ERR(oldfid)) 1338 if (IS_ERR(oldfid))
1394 return PTR_ERR(oldfid); 1339 return PTR_ERR(oldfid);
1395 1340
1396 name = __getname();
1397 if (unlikely(!name)) {
1398 retval = -ENOMEM;
1399 goto clunk_fid;
1400 }
1401
1402 sprintf(name, "%d\n", oldfid->fid); 1341 sprintf(name, "%d\n", oldfid->fid);
1403 retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name); 1342 retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
1404 __putname(name);
1405 if (!retval) { 1343 if (!retval) {
1406 v9fs_refresh_inode(oldfid, d_inode(old_dentry)); 1344 v9fs_refresh_inode(oldfid, d_inode(old_dentry));
1407 v9fs_invalidate_inode_attr(dir); 1345 v9fs_invalidate_inode_attr(dir);
1408 } 1346 }
1409clunk_fid:
1410 p9_client_clunk(oldfid); 1347 p9_client_clunk(oldfid);
1411 return retval; 1348 return retval;
1412} 1349}
@@ -1425,7 +1362,7 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde
1425{ 1362{
1426 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); 1363 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
1427 int retval; 1364 int retval;
1428 char *name; 1365 char name[2 + U32_MAX_DIGITS + 1 + U32_MAX_DIGITS + 1];
1429 u32 perm; 1366 u32 perm;
1430 1367
1431 p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n", 1368 p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n",
@@ -1435,26 +1372,16 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde
1435 if (!new_valid_dev(rdev)) 1372 if (!new_valid_dev(rdev))
1436 return -EINVAL; 1373 return -EINVAL;
1437 1374
1438 name = __getname();
1439 if (!name)
1440 return -ENOMEM;
1441 /* build extension */ 1375 /* build extension */
1442 if (S_ISBLK(mode)) 1376 if (S_ISBLK(mode))
1443 sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev)); 1377 sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev));
1444 else if (S_ISCHR(mode)) 1378 else if (S_ISCHR(mode))
1445 sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev)); 1379 sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev));
1446 else if (S_ISFIFO(mode)) 1380 else
1447 *name = 0;
1448 else if (S_ISSOCK(mode))
1449 *name = 0; 1381 *name = 0;
1450 else {
1451 __putname(name);
1452 return -EINVAL;
1453 }
1454 1382
1455 perm = unixmode2p9mode(v9ses, mode); 1383 perm = unixmode2p9mode(v9ses, mode);
1456 retval = v9fs_vfs_mkspecial(dir, dentry, perm, name); 1384 retval = v9fs_vfs_mkspecial(dir, dentry, perm, name);
1457 __putname(name);
1458 1385
1459 return retval; 1386 return retval;
1460} 1387}
@@ -1530,7 +1457,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
1530static const struct inode_operations v9fs_symlink_inode_operations = { 1457static const struct inode_operations v9fs_symlink_inode_operations = {
1531 .readlink = generic_readlink, 1458 .readlink = generic_readlink,
1532 .follow_link = v9fs_vfs_follow_link, 1459 .follow_link = v9fs_vfs_follow_link,
1533 .put_link = v9fs_vfs_put_link, 1460 .put_link = kfree_put_link,
1534 .getattr = v9fs_vfs_getattr, 1461 .getattr = v9fs_vfs_getattr,
1535 .setattr = v9fs_vfs_setattr, 1462 .setattr = v9fs_vfs_setattr,
1536}; 1463};
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 9861c7c951a6..09e4433717b8 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -905,41 +905,24 @@ error:
905/** 905/**
906 * v9fs_vfs_follow_link_dotl - follow a symlink path 906 * v9fs_vfs_follow_link_dotl - follow a symlink path
907 * @dentry: dentry for symlink 907 * @dentry: dentry for symlink
908 * @nd: nameidata 908 * @cookie: place to pass the data to put_link()
909 *
910 */ 909 */
911 910
912static void * 911static const char *
913v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd) 912v9fs_vfs_follow_link_dotl(struct dentry *dentry, void **cookie)
914{ 913{
915 int retval; 914 struct p9_fid *fid = v9fs_fid_lookup(dentry);
916 struct p9_fid *fid;
917 char *link = __getname();
918 char *target; 915 char *target;
916 int retval;
919 917
920 p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); 918 p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
921 919
922 if (!link) { 920 if (IS_ERR(fid))
923 link = ERR_PTR(-ENOMEM); 921 return ERR_CAST(fid);
924 goto ndset;
925 }
926 fid = v9fs_fid_lookup(dentry);
927 if (IS_ERR(fid)) {
928 __putname(link);
929 link = ERR_CAST(fid);
930 goto ndset;
931 }
932 retval = p9_client_readlink(fid, &target); 922 retval = p9_client_readlink(fid, &target);
933 if (!retval) { 923 if (retval)
934 strcpy(link, target); 924 return ERR_PTR(retval);
935 kfree(target); 925 return *cookie = target;
936 goto ndset;
937 }
938 __putname(link);
939 link = ERR_PTR(retval);
940ndset:
941 nd_set_link(nd, link);
942 return NULL;
943} 926}
944 927
945int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) 928int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
@@ -1006,7 +989,7 @@ const struct inode_operations v9fs_file_inode_operations_dotl = {
1006const struct inode_operations v9fs_symlink_inode_operations_dotl = { 989const struct inode_operations v9fs_symlink_inode_operations_dotl = {
1007 .readlink = generic_readlink, 990 .readlink = generic_readlink,
1008 .follow_link = v9fs_vfs_follow_link_dotl, 991 .follow_link = v9fs_vfs_follow_link_dotl,
1009 .put_link = v9fs_vfs_put_link, 992 .put_link = kfree_put_link,
1010 .getattr = v9fs_vfs_getattr_dotl, 993 .getattr = v9fs_vfs_getattr_dotl,
1011 .setattr = v9fs_vfs_setattr_dotl, 994 .setattr = v9fs_vfs_setattr_dotl,
1012 .setxattr = generic_setxattr, 995 .setxattr = generic_setxattr,
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index de58cc7b8076..da0c33481bc0 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -12,14 +12,13 @@
12 12
13#include "autofs_i.h" 13#include "autofs_i.h"
14 14
15static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) 15static const char *autofs4_follow_link(struct dentry *dentry, void **cookie)
16{ 16{
17 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 17 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
18 struct autofs_info *ino = autofs4_dentry_ino(dentry); 18 struct autofs_info *ino = autofs4_dentry_ino(dentry);
19 if (ino && !autofs4_oz_mode(sbi)) 19 if (ino && !autofs4_oz_mode(sbi))
20 ino->last_used = jiffies; 20 ino->last_used = jiffies;
21 nd_set_link(nd, d_inode(dentry)->i_private); 21 return d_inode(dentry)->i_private;
22 return NULL;
23} 22}
24 23
25const struct inode_operations autofs4_symlink_inode_operations = { 24const struct inode_operations autofs4_symlink_inode_operations = {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 7943533c3868..46aedacfa6a8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -42,8 +42,7 @@ static struct inode *befs_iget(struct super_block *, unsigned long);
42static struct inode *befs_alloc_inode(struct super_block *sb); 42static struct inode *befs_alloc_inode(struct super_block *sb);
43static void befs_destroy_inode(struct inode *inode); 43static void befs_destroy_inode(struct inode *inode);
44static void befs_destroy_inodecache(void); 44static void befs_destroy_inodecache(void);
45static void *befs_follow_link(struct dentry *, struct nameidata *); 45static const char *befs_follow_link(struct dentry *, void **);
46static void *befs_fast_follow_link(struct dentry *, struct nameidata *);
47static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, 46static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
48 char **out, int *out_len); 47 char **out, int *out_len);
49static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, 48static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -80,11 +79,6 @@ static const struct address_space_operations befs_aops = {
80 .bmap = befs_bmap, 79 .bmap = befs_bmap,
81}; 80};
82 81
83static const struct inode_operations befs_fast_symlink_inode_operations = {
84 .readlink = generic_readlink,
85 .follow_link = befs_fast_follow_link,
86};
87
88static const struct inode_operations befs_symlink_inode_operations = { 82static const struct inode_operations befs_symlink_inode_operations = {
89 .readlink = generic_readlink, 83 .readlink = generic_readlink,
90 .follow_link = befs_follow_link, 84 .follow_link = befs_follow_link,
@@ -403,10 +397,12 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
403 inode->i_op = &befs_dir_inode_operations; 397 inode->i_op = &befs_dir_inode_operations;
404 inode->i_fop = &befs_dir_operations; 398 inode->i_fop = &befs_dir_operations;
405 } else if (S_ISLNK(inode->i_mode)) { 399 } else if (S_ISLNK(inode->i_mode)) {
406 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) 400 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
407 inode->i_op = &befs_symlink_inode_operations; 401 inode->i_op = &befs_symlink_inode_operations;
408 else 402 } else {
409 inode->i_op = &befs_fast_symlink_inode_operations; 403 inode->i_link = befs_ino->i_data.symlink;
404 inode->i_op = &simple_symlink_inode_operations;
405 }
410 } else { 406 } else {
411 befs_error(sb, "Inode %lu is not a regular file, " 407 befs_error(sb, "Inode %lu is not a regular file, "
412 "directory or symlink. THAT IS WRONG! BeFS has no " 408 "directory or symlink. THAT IS WRONG! BeFS has no "
@@ -467,8 +463,8 @@ befs_destroy_inodecache(void)
467 * The data stream become link name. Unless the LONG_SYMLINK 463 * The data stream become link name. Unless the LONG_SYMLINK
468 * flag is set. 464 * flag is set.
469 */ 465 */
470static void * 466static const char *
471befs_follow_link(struct dentry *dentry, struct nameidata *nd) 467befs_follow_link(struct dentry *dentry, void **cookie)
472{ 468{
473 struct super_block *sb = dentry->d_sb; 469 struct super_block *sb = dentry->d_sb;
474 struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry)); 470 struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
@@ -478,33 +474,20 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
478 474
479 if (len == 0) { 475 if (len == 0) {
480 befs_error(sb, "Long symlink with illegal length"); 476 befs_error(sb, "Long symlink with illegal length");
481 link = ERR_PTR(-EIO); 477 return ERR_PTR(-EIO);
482 } else {
483 befs_debug(sb, "Follow long symlink");
484
485 link = kmalloc(len, GFP_NOFS);
486 if (!link) {
487 link = ERR_PTR(-ENOMEM);
488 } else if (befs_read_lsymlink(sb, data, link, len) != len) {
489 kfree(link);
490 befs_error(sb, "Failed to read entire long symlink");
491 link = ERR_PTR(-EIO);
492 } else {
493 link[len - 1] = '\0';
494 }
495 } 478 }
496 nd_set_link(nd, link); 479 befs_debug(sb, "Follow long symlink");
497 return NULL;
498}
499
500
501static void *
502befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
503{
504 struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
505 480
506 nd_set_link(nd, befs_ino->i_data.symlink); 481 link = kmalloc(len, GFP_NOFS);
507 return NULL; 482 if (!link)
483 return ERR_PTR(-ENOMEM);
484 if (befs_read_lsymlink(sb, data, link, len) != len) {
485 kfree(link);
486 befs_error(sb, "Failed to read entire long symlink");
487 return ERR_PTR(-EIO);
488 }
489 link[len - 1] = '\0';
490 return *cookie = link;
508} 491}
509 492
510/* 493/*
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e876e1944519..571acd88606c 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -6,7 +6,6 @@
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/uaccess.h> 7#include <linux/uaccess.h>
8#include <linux/kernel.h> 8#include <linux/kernel.h>
9#include <linux/namei.h>
10#include <linux/writeback.h> 9#include <linux/writeback.h>
11#include <linux/vmalloc.h> 10#include <linux/vmalloc.h>
12#include <linux/posix_acl.h> 11#include <linux/posix_acl.h>
@@ -819,6 +818,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
819 else 818 else
820 kfree(sym); /* lost a race */ 819 kfree(sym); /* lost a race */
821 } 820 }
821 inode->i_link = ci->i_symlink;
822 break; 822 break;
823 case S_IFDIR: 823 case S_IFDIR:
824 inode->i_op = &ceph_dir_iops; 824 inode->i_op = &ceph_dir_iops;
@@ -1691,16 +1691,9 @@ retry:
1691/* 1691/*
1692 * symlinks 1692 * symlinks
1693 */ 1693 */
1694static void *ceph_sym_follow_link(struct dentry *dentry, struct nameidata *nd)
1695{
1696 struct ceph_inode_info *ci = ceph_inode(d_inode(dentry));
1697 nd_set_link(nd, ci->i_symlink);
1698 return NULL;
1699}
1700
1701static const struct inode_operations ceph_symlink_iops = { 1694static const struct inode_operations ceph_symlink_iops = {
1702 .readlink = generic_readlink, 1695 .readlink = generic_readlink,
1703 .follow_link = ceph_sym_follow_link, 1696 .follow_link = simple_follow_link,
1704 .setattr = ceph_setattr, 1697 .setattr = ceph_setattr,
1705 .getattr = ceph_getattr, 1698 .getattr = ceph_getattr,
1706 .setxattr = ceph_setxattr, 1699 .setxattr = ceph_setxattr,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 252f5c15806b..a782b22904e4 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -120,7 +120,7 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
120#endif 120#endif
121 121
122/* Functions related to symlinks */ 122/* Functions related to symlinks */
123extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd); 123extern const char *cifs_follow_link(struct dentry *direntry, void **cookie);
124extern int cifs_readlink(struct dentry *direntry, char __user *buffer, 124extern int cifs_readlink(struct dentry *direntry, char __user *buffer,
125 int buflen); 125 int buflen);
126extern int cifs_symlink(struct inode *inode, struct dentry *direntry, 126extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index e6c707cc62b3..e3548f73bdea 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -626,8 +626,8 @@ cifs_hl_exit:
626 return rc; 626 return rc;
627} 627}
628 628
629void * 629const char *
630cifs_follow_link(struct dentry *direntry, struct nameidata *nd) 630cifs_follow_link(struct dentry *direntry, void **cookie)
631{ 631{
632 struct inode *inode = d_inode(direntry); 632 struct inode *inode = d_inode(direntry);
633 int rc = -ENOMEM; 633 int rc = -ENOMEM;
@@ -643,16 +643,18 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
643 643
644 tlink = cifs_sb_tlink(cifs_sb); 644 tlink = cifs_sb_tlink(cifs_sb);
645 if (IS_ERR(tlink)) { 645 if (IS_ERR(tlink)) {
646 rc = PTR_ERR(tlink); 646 free_xid(xid);
647 tlink = NULL; 647 return ERR_CAST(tlink);
648 goto out;
649 } 648 }
650 tcon = tlink_tcon(tlink); 649 tcon = tlink_tcon(tlink);
651 server = tcon->ses->server; 650 server = tcon->ses->server;
652 651
653 full_path = build_path_from_dentry(direntry); 652 full_path = build_path_from_dentry(direntry);
654 if (!full_path) 653 if (!full_path) {
655 goto out; 654 free_xid(xid);
655 cifs_put_tlink(tlink);
656 return ERR_PTR(-ENOMEM);
657 }
656 658
657 cifs_dbg(FYI, "Full path: %s inode = 0x%p\n", full_path, inode); 659 cifs_dbg(FYI, "Full path: %s inode = 0x%p\n", full_path, inode);
658 660
@@ -670,17 +672,13 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
670 &target_path, cifs_sb); 672 &target_path, cifs_sb);
671 673
672 kfree(full_path); 674 kfree(full_path);
673out: 675 free_xid(xid);
676 cifs_put_tlink(tlink);
674 if (rc != 0) { 677 if (rc != 0) {
675 kfree(target_path); 678 kfree(target_path);
676 target_path = ERR_PTR(rc); 679 return ERR_PTR(rc);
677 } 680 }
678 681 return *cookie = target_path;
679 free_xid(xid);
680 if (tlink)
681 cifs_put_tlink(tlink);
682 nd_set_link(nd, target_path);
683 return NULL;
684} 682}
685 683
686int 684int
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index cc9f2546ea4a..ec5c8325b503 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -279,36 +279,27 @@ static int configfs_getlink(struct dentry *dentry, char * path)
279 279
280} 280}
281 281
282static void *configfs_follow_link(struct dentry *dentry, struct nameidata *nd) 282static const char *configfs_follow_link(struct dentry *dentry, void **cookie)
283{ 283{
284 int error = -ENOMEM;
285 unsigned long page = get_zeroed_page(GFP_KERNEL); 284 unsigned long page = get_zeroed_page(GFP_KERNEL);
285 int error;
286 286
287 if (page) { 287 if (!page)
288 error = configfs_getlink(dentry, (char *)page); 288 return ERR_PTR(-ENOMEM);
289 if (!error) {
290 nd_set_link(nd, (char *)page);
291 return (void *)page;
292 }
293 }
294
295 nd_set_link(nd, ERR_PTR(error));
296 return NULL;
297}
298 289
299static void configfs_put_link(struct dentry *dentry, struct nameidata *nd, 290 error = configfs_getlink(dentry, (char *)page);
300 void *cookie) 291 if (!error) {
301{ 292 return *cookie = (void *)page;
302 if (cookie) {
303 unsigned long page = (unsigned long)cookie;
304 free_page(page);
305 } 293 }
294
295 free_page(page);
296 return ERR_PTR(error);
306} 297}
307 298
308const struct inode_operations configfs_symlink_inode_operations = { 299const struct inode_operations configfs_symlink_inode_operations = {
309 .follow_link = configfs_follow_link, 300 .follow_link = configfs_follow_link,
310 .readlink = generic_readlink, 301 .readlink = generic_readlink,
311 .put_link = configfs_put_link, 302 .put_link = free_page_put_link,
312 .setattr = configfs_setattr, 303 .setattr = configfs_setattr,
313}; 304};
314 305
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 830a7e76f5c6..284f9aa0028b 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -17,7 +17,6 @@
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/pagemap.h> 19#include <linux/pagemap.h>
20#include <linux/namei.h>
21#include <linux/debugfs.h> 20#include <linux/debugfs.h>
22#include <linux/io.h> 21#include <linux/io.h>
23#include <linux/slab.h> 22#include <linux/slab.h>
@@ -43,17 +42,6 @@ const struct file_operations debugfs_file_operations = {
43 .llseek = noop_llseek, 42 .llseek = noop_llseek,
44}; 43};
45 44
46static void *debugfs_follow_link(struct dentry *dentry, struct nameidata *nd)
47{
48 nd_set_link(nd, d_inode(dentry)->i_private);
49 return NULL;
50}
51
52const struct inode_operations debugfs_link_operations = {
53 .readlink = generic_readlink,
54 .follow_link = debugfs_follow_link,
55};
56
57static int debugfs_u8_set(void *data, u64 val) 45static int debugfs_u8_set(void *data, u64 val)
58{ 46{
59 *(u8 *)data = val; 47 *(u8 *)data = val;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index c1e7ffb0dab6..7eaec88ea970 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -174,7 +174,7 @@ static void debugfs_evict_inode(struct inode *inode)
174 truncate_inode_pages_final(&inode->i_data); 174 truncate_inode_pages_final(&inode->i_data);
175 clear_inode(inode); 175 clear_inode(inode);
176 if (S_ISLNK(inode->i_mode)) 176 if (S_ISLNK(inode->i_mode))
177 kfree(inode->i_private); 177 kfree(inode->i_link);
178} 178}
179 179
180static const struct super_operations debugfs_super_operations = { 180static const struct super_operations debugfs_super_operations = {
@@ -511,8 +511,8 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
511 return failed_creating(dentry); 511 return failed_creating(dentry);
512 } 512 }
513 inode->i_mode = S_IFLNK | S_IRWXUGO; 513 inode->i_mode = S_IFLNK | S_IRWXUGO;
514 inode->i_op = &debugfs_link_operations; 514 inode->i_op = &simple_symlink_inode_operations;
515 inode->i_private = link; 515 inode->i_link = link;
516 d_instantiate(dentry, inode); 516 d_instantiate(dentry, inode);
517 return end_creating(dentry); 517 return end_creating(dentry);
518} 518}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index fc850b55db67..3c4db1172d22 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -170,7 +170,6 @@ out_unlock:
170 * @directory_inode: inode of the new file's dentry's parent in ecryptfs 170 * @directory_inode: inode of the new file's dentry's parent in ecryptfs
171 * @ecryptfs_dentry: New file's dentry in ecryptfs 171 * @ecryptfs_dentry: New file's dentry in ecryptfs
172 * @mode: The mode of the new file 172 * @mode: The mode of the new file
173 * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
174 * 173 *
175 * Creates the underlying file and the eCryptfs inode which will link to 174 * Creates the underlying file and the eCryptfs inode which will link to
176 * it. It will also update the eCryptfs directory inode to mimic the 175 * it. It will also update the eCryptfs directory inode to mimic the
@@ -384,7 +383,7 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
384 * ecryptfs_lookup 383 * ecryptfs_lookup
385 * @ecryptfs_dir_inode: The eCryptfs directory inode 384 * @ecryptfs_dir_inode: The eCryptfs directory inode
386 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up 385 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
387 * @ecryptfs_nd: nameidata; may be NULL 386 * @flags: lookup flags
388 * 387 *
389 * Find a file on disk. If the file does not exist, then we'll add it to the 388 * Find a file on disk. If the file does not exist, then we'll add it to the
390 * dentry cache and continue on to read it from the disk. 389 * dentry cache and continue on to read it from the disk.
@@ -675,18 +674,16 @@ out:
675 return rc ? ERR_PTR(rc) : buf; 674 return rc ? ERR_PTR(rc) : buf;
676} 675}
677 676
678static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) 677static const char *ecryptfs_follow_link(struct dentry *dentry, void **cookie)
679{ 678{
680 size_t len; 679 size_t len;
681 char *buf = ecryptfs_readlink_lower(dentry, &len); 680 char *buf = ecryptfs_readlink_lower(dentry, &len);
682 if (IS_ERR(buf)) 681 if (IS_ERR(buf))
683 goto out; 682 return buf;
684 fsstack_copy_attr_atime(d_inode(dentry), 683 fsstack_copy_attr_atime(d_inode(dentry),
685 d_inode(ecryptfs_dentry_to_lower(dentry))); 684 d_inode(ecryptfs_dentry_to_lower(dentry)));
686 buf[len] = '\0'; 685 buf[len] = '\0';
687out: 686 return *cookie = buf;
688 nd_set_link(nd, buf);
689 return NULL;
690} 687}
691 688
692/** 689/**
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index b47c7b8dc275..a364fd0965ec 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -16,5 +16,5 @@
16libore-y := ore.o ore_raid.o 16libore-y := ore.o ore_raid.o
17obj-$(CONFIG_ORE) += libore.o 17obj-$(CONFIG_ORE) += libore.o
18 18
19exofs-y := inode.o file.o symlink.o namei.o dir.o super.o sys.o 19exofs-y := inode.o file.o namei.o dir.o super.o sys.o
20obj-$(CONFIG_EXOFS_FS) += exofs.o 20obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index ad9cac670a47..2e86086bc940 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -207,10 +207,6 @@ extern const struct address_space_operations exofs_aops;
207extern const struct inode_operations exofs_dir_inode_operations; 207extern const struct inode_operations exofs_dir_inode_operations;
208extern const struct inode_operations exofs_special_inode_operations; 208extern const struct inode_operations exofs_special_inode_operations;
209 209
210/* symlink.c */
211extern const struct inode_operations exofs_symlink_inode_operations;
212extern const struct inode_operations exofs_fast_symlink_inode_operations;
213
214/* exofs_init_comps will initialize an ore_components device array 210/* exofs_init_comps will initialize an ore_components device array
215 * pointing to a single ore_comp struct, and a round-robin view 211 * pointing to a single ore_comp struct, and a round-robin view
216 * of the device table. 212 * of the device table.
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 786e4cc8c889..73c64daa0f55 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1222,10 +1222,11 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1222 inode->i_fop = &exofs_dir_operations; 1222 inode->i_fop = &exofs_dir_operations;
1223 inode->i_mapping->a_ops = &exofs_aops; 1223 inode->i_mapping->a_ops = &exofs_aops;
1224 } else if (S_ISLNK(inode->i_mode)) { 1224 } else if (S_ISLNK(inode->i_mode)) {
1225 if (exofs_inode_is_fast_symlink(inode)) 1225 if (exofs_inode_is_fast_symlink(inode)) {
1226 inode->i_op = &exofs_fast_symlink_inode_operations; 1226 inode->i_op = &simple_symlink_inode_operations;
1227 else { 1227 inode->i_link = (char *)oi->i_data;
1228 inode->i_op = &exofs_symlink_inode_operations; 1228 } else {
1229 inode->i_op = &page_symlink_inode_operations;
1229 inode->i_mapping->a_ops = &exofs_aops; 1230 inode->i_mapping->a_ops = &exofs_aops;
1230 } 1231 }
1231 } else { 1232 } else {
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 5ae25e431191..09a6bb1ad63c 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -113,7 +113,7 @@ static int exofs_symlink(struct inode *dir, struct dentry *dentry,
113 oi = exofs_i(inode); 113 oi = exofs_i(inode);
114 if (l > sizeof(oi->i_data)) { 114 if (l > sizeof(oi->i_data)) {
115 /* slow symlink */ 115 /* slow symlink */
116 inode->i_op = &exofs_symlink_inode_operations; 116 inode->i_op = &page_symlink_inode_operations;
117 inode->i_mapping->a_ops = &exofs_aops; 117 inode->i_mapping->a_ops = &exofs_aops;
118 memset(oi->i_data, 0, sizeof(oi->i_data)); 118 memset(oi->i_data, 0, sizeof(oi->i_data));
119 119
@@ -122,7 +122,8 @@ static int exofs_symlink(struct inode *dir, struct dentry *dentry,
122 goto out_fail; 122 goto out_fail;
123 } else { 123 } else {
124 /* fast symlink */ 124 /* fast symlink */
125 inode->i_op = &exofs_fast_symlink_inode_operations; 125 inode->i_op = &simple_symlink_inode_operations;
126 inode->i_link = (char *)oi->i_data;
126 memcpy(oi->i_data, symname, l); 127 memcpy(oi->i_data, symname, l);
127 inode->i_size = l-1; 128 inode->i_size = l-1;
128 } 129 }
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c
deleted file mode 100644
index 6f6f3a4c1365..000000000000
--- a/fs/exofs/symlink.c
+++ /dev/null
@@ -1,55 +0,0 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <ooo@electrozaur.com>
6 *
7 * Copyrights for code taken from ext2:
8 * Copyright (C) 1992, 1993, 1994, 1995
9 * Remy Card (card@masi.ibp.fr)
10 * Laboratoire MASI - Institut Blaise Pascal
11 * Universite Pierre et Marie Curie (Paris VI)
12 * from
13 * linux/fs/minix/inode.c
14 * Copyright (C) 1991, 1992 Linus Torvalds
15 *
16 * This file is part of exofs.
17 *
18 * exofs is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation. Since it is based on ext2, and the only
21 * valid version of GPL for the Linux kernel is version 2, the only valid
22 * version of GPL for exofs is version 2.
23 *
24 * exofs is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with exofs; if not, write to the Free Software
31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
32 */
33
34#include <linux/namei.h>
35
36#include "exofs.h"
37
38static void *exofs_follow_link(struct dentry *dentry, struct nameidata *nd)
39{
40 struct exofs_i_info *oi = exofs_i(d_inode(dentry));
41
42 nd_set_link(nd, (char *)oi->i_data);
43 return NULL;
44}
45
46const struct inode_operations exofs_symlink_inode_operations = {
47 .readlink = generic_readlink,
48 .follow_link = page_follow_link_light,
49 .put_link = page_put_link,
50};
51
52const struct inode_operations exofs_fast_symlink_inode_operations = {
53 .readlink = generic_readlink,
54 .follow_link = exofs_follow_link,
55};
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index f460ae36d5b7..5c09776d347f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1403,6 +1403,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1403 inode->i_mapping->a_ops = &ext2_aops; 1403 inode->i_mapping->a_ops = &ext2_aops;
1404 } else if (S_ISLNK(inode->i_mode)) { 1404 } else if (S_ISLNK(inode->i_mode)) {
1405 if (ext2_inode_is_fast_symlink(inode)) { 1405 if (ext2_inode_is_fast_symlink(inode)) {
1406 inode->i_link = (char *)ei->i_data;
1406 inode->i_op = &ext2_fast_symlink_inode_operations; 1407 inode->i_op = &ext2_fast_symlink_inode_operations;
1407 nd_terminate_link(ei->i_data, inode->i_size, 1408 nd_terminate_link(ei->i_data, inode->i_size,
1408 sizeof(ei->i_data) - 1); 1409 sizeof(ei->i_data) - 1);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 3e074a9ccbe6..13ec54a99c96 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -189,7 +189,8 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
189 } else { 189 } else {
190 /* fast symlink */ 190 /* fast symlink */
191 inode->i_op = &ext2_fast_symlink_inode_operations; 191 inode->i_op = &ext2_fast_symlink_inode_operations;
192 memcpy((char*)(EXT2_I(inode)->i_data),symname,l); 192 inode->i_link = (char*)EXT2_I(inode)->i_data;
193 memcpy(inode->i_link, symname, l);
193 inode->i_size = l-1; 194 inode->i_size = l-1;
194 } 195 }
195 mark_inode_dirty(inode); 196 mark_inode_dirty(inode);
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
index 20608f17c2e5..ae17179f3810 100644
--- a/fs/ext2/symlink.c
+++ b/fs/ext2/symlink.c
@@ -19,14 +19,6 @@
19 19
20#include "ext2.h" 20#include "ext2.h"
21#include "xattr.h" 21#include "xattr.h"
22#include <linux/namei.h>
23
24static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
25{
26 struct ext2_inode_info *ei = EXT2_I(d_inode(dentry));
27 nd_set_link(nd, (char *)ei->i_data);
28 return NULL;
29}
30 22
31const struct inode_operations ext2_symlink_inode_operations = { 23const struct inode_operations ext2_symlink_inode_operations = {
32 .readlink = generic_readlink, 24 .readlink = generic_readlink,
@@ -43,7 +35,7 @@ const struct inode_operations ext2_symlink_inode_operations = {
43 35
44const struct inode_operations ext2_fast_symlink_inode_operations = { 36const struct inode_operations ext2_fast_symlink_inode_operations = {
45 .readlink = generic_readlink, 37 .readlink = generic_readlink,
46 .follow_link = ext2_follow_link, 38 .follow_link = simple_follow_link,
47 .setattr = ext2_setattr, 39 .setattr = ext2_setattr,
48#ifdef CONFIG_EXT2_FS_XATTR 40#ifdef CONFIG_EXT2_FS_XATTR
49 .setxattr = generic_setxattr, 41 .setxattr = generic_setxattr,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2ee2dc4351d1..6c7e5468a2f8 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2999,6 +2999,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2999 inode->i_op = &ext3_fast_symlink_inode_operations; 2999 inode->i_op = &ext3_fast_symlink_inode_operations;
3000 nd_terminate_link(ei->i_data, inode->i_size, 3000 nd_terminate_link(ei->i_data, inode->i_size,
3001 sizeof(ei->i_data) - 1); 3001 sizeof(ei->i_data) - 1);
3002 inode->i_link = (char *)ei->i_data;
3002 } else { 3003 } else {
3003 inode->i_op = &ext3_symlink_inode_operations; 3004 inode->i_op = &ext3_symlink_inode_operations;
3004 ext3_set_aops(inode); 3005 ext3_set_aops(inode);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 4264b9bd0002..c9e767cd4b67 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2308,7 +2308,8 @@ retry:
2308 } 2308 }
2309 } else { 2309 } else {
2310 inode->i_op = &ext3_fast_symlink_inode_operations; 2310 inode->i_op = &ext3_fast_symlink_inode_operations;
2311 memcpy((char*)&EXT3_I(inode)->i_data,symname,l); 2311 inode->i_link = (char*)&EXT3_I(inode)->i_data;
2312 memcpy(inode->i_link, symname, l);
2312 inode->i_size = l-1; 2313 inode->i_size = l-1;
2313 } 2314 }
2314 EXT3_I(inode)->i_disksize = inode->i_size; 2315 EXT3_I(inode)->i_disksize = inode->i_size;
diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c
index ea96df3c58db..c08c59094ae6 100644
--- a/fs/ext3/symlink.c
+++ b/fs/ext3/symlink.c
@@ -17,17 +17,9 @@
17 * ext3 symlink handling code 17 * ext3 symlink handling code
18 */ 18 */
19 19
20#include <linux/namei.h>
21#include "ext3.h" 20#include "ext3.h"
22#include "xattr.h" 21#include "xattr.h"
23 22
24static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
25{
26 struct ext3_inode_info *ei = EXT3_I(d_inode(dentry));
27 nd_set_link(nd, (char*)ei->i_data);
28 return NULL;
29}
30
31const struct inode_operations ext3_symlink_inode_operations = { 23const struct inode_operations ext3_symlink_inode_operations = {
32 .readlink = generic_readlink, 24 .readlink = generic_readlink,
33 .follow_link = page_follow_link_light, 25 .follow_link = page_follow_link_light,
@@ -43,7 +35,7 @@ const struct inode_operations ext3_symlink_inode_operations = {
43 35
44const struct inode_operations ext3_fast_symlink_inode_operations = { 36const struct inode_operations ext3_fast_symlink_inode_operations = {
45 .readlink = generic_readlink, 37 .readlink = generic_readlink,
46 .follow_link = ext3_follow_link, 38 .follow_link = simple_follow_link,
47 .setattr = ext3_setattr, 39 .setattr = ext3_setattr,
48#ifdef CONFIG_EXT3_FS_XATTR 40#ifdef CONFIG_EXT3_FS_XATTR
49 .setxattr = generic_setxattr, 41 .setxattr = generic_setxattr,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9a83f149ac85..0a3b72d1d458 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2847,6 +2847,7 @@ extern int ext4_mpage_readpages(struct address_space *mapping,
2847 unsigned nr_pages); 2847 unsigned nr_pages);
2848 2848
2849/* symlink.c */ 2849/* symlink.c */
2850extern const struct inode_operations ext4_encrypted_symlink_inode_operations;
2850extern const struct inode_operations ext4_symlink_inode_operations; 2851extern const struct inode_operations ext4_symlink_inode_operations;
2851extern const struct inode_operations ext4_fast_symlink_inode_operations; 2852extern const struct inode_operations ext4_fast_symlink_inode_operations;
2852 2853
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0554b0b5957b..5168c9b56880 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4213,8 +4213,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4213 inode->i_op = &ext4_dir_inode_operations; 4213 inode->i_op = &ext4_dir_inode_operations;
4214 inode->i_fop = &ext4_dir_operations; 4214 inode->i_fop = &ext4_dir_operations;
4215 } else if (S_ISLNK(inode->i_mode)) { 4215 } else if (S_ISLNK(inode->i_mode)) {
4216 if (ext4_inode_is_fast_symlink(inode) && 4216 if (ext4_encrypted_inode(inode)) {
4217 !ext4_encrypted_inode(inode)) { 4217 inode->i_op = &ext4_encrypted_symlink_inode_operations;
4218 ext4_set_aops(inode);
4219 } else if (ext4_inode_is_fast_symlink(inode)) {
4220 inode->i_link = (char *)ei->i_data;
4218 inode->i_op = &ext4_fast_symlink_inode_operations; 4221 inode->i_op = &ext4_fast_symlink_inode_operations;
4219 nd_terminate_link(ei->i_data, inode->i_size, 4222 nd_terminate_link(ei->i_data, inode->i_size,
4220 sizeof(ei->i_data) - 1); 4223 sizeof(ei->i_data) - 1);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 814f3beb4369..5fdb9f6aa869 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3206,10 +3206,12 @@ static int ext4_symlink(struct inode *dir,
3206 goto err_drop_inode; 3206 goto err_drop_inode;
3207 sd->len = cpu_to_le16(ostr.len); 3207 sd->len = cpu_to_le16(ostr.len);
3208 disk_link.name = (char *) sd; 3208 disk_link.name = (char *) sd;
3209 inode->i_op = &ext4_encrypted_symlink_inode_operations;
3209 } 3210 }
3210 3211
3211 if ((disk_link.len > EXT4_N_BLOCKS * 4)) { 3212 if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
3212 inode->i_op = &ext4_symlink_inode_operations; 3213 if (!encryption_required)
3214 inode->i_op = &ext4_symlink_inode_operations;
3213 ext4_set_aops(inode); 3215 ext4_set_aops(inode);
3214 /* 3216 /*
3215 * We cannot call page_symlink() with transaction started 3217 * We cannot call page_symlink() with transaction started
@@ -3249,9 +3251,10 @@ static int ext4_symlink(struct inode *dir,
3249 } else { 3251 } else {
3250 /* clear the extent format for fast symlink */ 3252 /* clear the extent format for fast symlink */
3251 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 3253 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
3252 inode->i_op = encryption_required ? 3254 if (!encryption_required) {
3253 &ext4_symlink_inode_operations : 3255 inode->i_op = &ext4_fast_symlink_inode_operations;
3254 &ext4_fast_symlink_inode_operations; 3256 inode->i_link = (char *)&EXT4_I(inode)->i_data;
3257 }
3255 memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name, 3258 memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name,
3256 disk_link.len); 3259 disk_link.len);
3257 inode->i_size = disk_link.len - 1; 3260 inode->i_size = disk_link.len - 1;
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 187b78920314..ba5bd18a9825 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -23,7 +23,7 @@
23#include "xattr.h" 23#include "xattr.h"
24 24
25#ifdef CONFIG_EXT4_FS_ENCRYPTION 25#ifdef CONFIG_EXT4_FS_ENCRYPTION
26static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) 26static const char *ext4_follow_link(struct dentry *dentry, void **cookie)
27{ 27{
28 struct page *cpage = NULL; 28 struct page *cpage = NULL;
29 char *caddr, *paddr = NULL; 29 char *caddr, *paddr = NULL;
@@ -35,12 +35,9 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
35 int res; 35 int res;
36 u32 plen, max_size = inode->i_sb->s_blocksize; 36 u32 plen, max_size = inode->i_sb->s_blocksize;
37 37
38 if (!ext4_encrypted_inode(inode))
39 return page_follow_link_light(dentry, nd);
40
41 ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize); 38 ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize);
42 if (IS_ERR(ctx)) 39 if (IS_ERR(ctx))
43 return ctx; 40 return ERR_CAST(ctx);
44 41
45 if (ext4_inode_is_fast_symlink(inode)) { 42 if (ext4_inode_is_fast_symlink(inode)) {
46 caddr = (char *) EXT4_I(inode)->i_data; 43 caddr = (char *) EXT4_I(inode)->i_data;
@@ -49,7 +46,7 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
49 cpage = read_mapping_page(inode->i_mapping, 0, NULL); 46 cpage = read_mapping_page(inode->i_mapping, 0, NULL);
50 if (IS_ERR(cpage)) { 47 if (IS_ERR(cpage)) {
51 ext4_put_fname_crypto_ctx(&ctx); 48 ext4_put_fname_crypto_ctx(&ctx);
52 return cpage; 49 return ERR_CAST(cpage);
53 } 50 }
54 caddr = kmap(cpage); 51 caddr = kmap(cpage);
55 caddr[size] = 0; 52 caddr[size] = 0;
@@ -80,13 +77,12 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
80 /* Null-terminate the name */ 77 /* Null-terminate the name */
81 if (res <= plen) 78 if (res <= plen)
82 paddr[res] = '\0'; 79 paddr[res] = '\0';
83 nd_set_link(nd, paddr);
84 ext4_put_fname_crypto_ctx(&ctx); 80 ext4_put_fname_crypto_ctx(&ctx);
85 if (cpage) { 81 if (cpage) {
86 kunmap(cpage); 82 kunmap(cpage);
87 page_cache_release(cpage); 83 page_cache_release(cpage);
88 } 84 }
89 return NULL; 85 return *cookie = paddr;
90errout: 86errout:
91 ext4_put_fname_crypto_ctx(&ctx); 87 ext4_put_fname_crypto_ctx(&ctx);
92 if (cpage) { 88 if (cpage) {
@@ -97,36 +93,22 @@ errout:
97 return ERR_PTR(res); 93 return ERR_PTR(res);
98} 94}
99 95
100static void ext4_put_link(struct dentry *dentry, struct nameidata *nd, 96const struct inode_operations ext4_encrypted_symlink_inode_operations = {
101 void *cookie) 97 .readlink = generic_readlink,
102{ 98 .follow_link = ext4_follow_link,
103 struct page *page = cookie; 99 .put_link = kfree_put_link,
104 100 .setattr = ext4_setattr,
105 if (!page) { 101 .setxattr = generic_setxattr,
106 kfree(nd_get_link(nd)); 102 .getxattr = generic_getxattr,
107 } else { 103 .listxattr = ext4_listxattr,
108 kunmap(page); 104 .removexattr = generic_removexattr,
109 page_cache_release(page); 105};
110 }
111}
112#endif 106#endif
113 107
114static void *ext4_follow_fast_link(struct dentry *dentry, struct nameidata *nd)
115{
116 struct ext4_inode_info *ei = EXT4_I(d_inode(dentry));
117 nd_set_link(nd, (char *) ei->i_data);
118 return NULL;
119}
120
121const struct inode_operations ext4_symlink_inode_operations = { 108const struct inode_operations ext4_symlink_inode_operations = {
122 .readlink = generic_readlink, 109 .readlink = generic_readlink,
123#ifdef CONFIG_EXT4_FS_ENCRYPTION
124 .follow_link = ext4_follow_link,
125 .put_link = ext4_put_link,
126#else
127 .follow_link = page_follow_link_light, 110 .follow_link = page_follow_link_light,
128 .put_link = page_put_link, 111 .put_link = page_put_link,
129#endif
130 .setattr = ext4_setattr, 112 .setattr = ext4_setattr,
131 .setxattr = generic_setxattr, 113 .setxattr = generic_setxattr,
132 .getxattr = generic_getxattr, 114 .getxattr = generic_getxattr,
@@ -136,7 +118,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
136 118
137const struct inode_operations ext4_fast_symlink_inode_operations = { 119const struct inode_operations ext4_fast_symlink_inode_operations = {
138 .readlink = generic_readlink, 120 .readlink = generic_readlink,
139 .follow_link = ext4_follow_fast_link, 121 .follow_link = simple_follow_link,
140 .setattr = ext4_setattr, 122 .setattr = ext4_setattr,
141 .setxattr = generic_setxattr, 123 .setxattr = generic_setxattr,
142 .getxattr = generic_getxattr, 124 .getxattr = generic_getxattr,
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 658e8079aaf9..71765d062914 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -296,19 +296,15 @@ fail:
296 return err; 296 return err;
297} 297}
298 298
299static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd) 299static const char *f2fs_follow_link(struct dentry *dentry, void **cookie)
300{ 300{
301 struct page *page = page_follow_link_light(dentry, nd); 301 const char *link = page_follow_link_light(dentry, cookie);
302 302 if (!IS_ERR(link) && !*link) {
303 if (IS_ERR_OR_NULL(page)) 303 /* this is broken symlink case */
304 return page; 304 page_put_link(NULL, *cookie);
305 305 link = ERR_PTR(-ENOENT);
306 /* this is broken symlink case */
307 if (*nd_get_link(nd) == 0) {
308 page_put_link(dentry, nd, page);
309 return ERR_PTR(-ENOENT);
310 } 306 }
311 return page; 307 return link;
312} 308}
313 309
314static int f2fs_symlink(struct inode *dir, struct dentry *dentry, 310static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 999ff5c3cab0..d59712dfa3e7 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -195,8 +195,9 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
195 goto out_err; 195 goto out_err;
196 } 196 }
197 /* copy the full handle */ 197 /* copy the full handle */
198 if (copy_from_user(handle, ufh, 198 *handle = f_handle;
199 sizeof(struct file_handle) + 199 if (copy_from_user(&handle->f_handle,
200 &ufh->f_handle,
200 f_handle.handle_bytes)) { 201 f_handle.handle_bytes)) {
201 retval = -EFAULT; 202 retval = -EFAULT;
202 goto out_handle; 203 goto out_handle;
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 881aa3d217f0..e3dcb4467d92 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -50,9 +50,6 @@ extern daddr_t vxfs_bmap1(struct inode *, long);
50/* vxfs_fshead.c */ 50/* vxfs_fshead.c */
51extern int vxfs_read_fshead(struct super_block *); 51extern int vxfs_read_fshead(struct super_block *);
52 52
53/* vxfs_immed.c */
54extern const struct inode_operations vxfs_immed_symlink_iops;
55
56/* vxfs_inode.c */ 53/* vxfs_inode.c */
57extern const struct address_space_operations vxfs_immed_aops; 54extern const struct address_space_operations vxfs_immed_aops;
58extern struct kmem_cache *vxfs_inode_cachep; 55extern struct kmem_cache *vxfs_inode_cachep;
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index 8b9229e2ca5c..cb84f0fcc72a 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -32,29 +32,15 @@
32 */ 32 */
33#include <linux/fs.h> 33#include <linux/fs.h>
34#include <linux/pagemap.h> 34#include <linux/pagemap.h>
35#include <linux/namei.h>
36 35
37#include "vxfs.h" 36#include "vxfs.h"
38#include "vxfs_extern.h" 37#include "vxfs_extern.h"
39#include "vxfs_inode.h" 38#include "vxfs_inode.h"
40 39
41 40
42static void * vxfs_immed_follow_link(struct dentry *, struct nameidata *);
43
44static int vxfs_immed_readpage(struct file *, struct page *); 41static int vxfs_immed_readpage(struct file *, struct page *);
45 42
46/* 43/*
47 * Inode operations for immed symlinks.
48 *
49 * Unliked all other operations we do not go through the pagecache,
50 * but do all work directly on the inode.
51 */
52const struct inode_operations vxfs_immed_symlink_iops = {
53 .readlink = generic_readlink,
54 .follow_link = vxfs_immed_follow_link,
55};
56
57/*
58 * Address space operations for immed files and directories. 44 * Address space operations for immed files and directories.
59 */ 45 */
60const struct address_space_operations vxfs_immed_aops = { 46const struct address_space_operations vxfs_immed_aops = {
@@ -62,26 +48,6 @@ const struct address_space_operations vxfs_immed_aops = {
62}; 48};
63 49
64/** 50/**
65 * vxfs_immed_follow_link - follow immed symlink
66 * @dp: dentry for the link
67 * @np: pathname lookup data for the current path walk
68 *
69 * Description:
70 * vxfs_immed_follow_link restarts the pathname lookup with
71 * the data obtained from @dp.
72 *
73 * Returns:
74 * Zero on success, else a negative error code.
75 */
76static void *
77vxfs_immed_follow_link(struct dentry *dp, struct nameidata *np)
78{
79 struct vxfs_inode_info *vip = VXFS_INO(d_inode(dp));
80 nd_set_link(np, vip->vii_immed.vi_immed);
81 return NULL;
82}
83
84/**
85 * vxfs_immed_readpage - read part of an immed inode into pagecache 51 * vxfs_immed_readpage - read part of an immed inode into pagecache
86 * @file: file context (unused) 52 * @file: file context (unused)
87 * @page: page frame to fill in. 53 * @page: page frame to fill in.
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 363e3ae25f6b..ef73ed674a27 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -35,6 +35,7 @@
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/kernel.h> 36#include <linux/kernel.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/namei.h>
38 39
39#include "vxfs.h" 40#include "vxfs.h"
40#include "vxfs_inode.h" 41#include "vxfs_inode.h"
@@ -327,8 +328,10 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
327 ip->i_op = &page_symlink_inode_operations; 328 ip->i_op = &page_symlink_inode_operations;
328 ip->i_mapping->a_ops = &vxfs_aops; 329 ip->i_mapping->a_ops = &vxfs_aops;
329 } else { 330 } else {
330 ip->i_op = &vxfs_immed_symlink_iops; 331 ip->i_op = &simple_symlink_inode_operations;
331 vip->vii_immed.vi_immed[ip->i_size] = '\0'; 332 ip->i_link = vip->vii_immed.vi_immed;
333 nd_terminate_link(ip->i_link, ip->i_size,
334 sizeof(vip->vii_immed.vi_immed) - 1);
332 } 335 }
333 } else 336 } else
334 init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev)); 337 init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev));
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 0572bca49f15..5e2e08712d3b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1365,7 +1365,7 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
1365 return err; 1365 return err;
1366} 1366}
1367 1367
1368static char *read_link(struct dentry *dentry) 1368static const char *fuse_follow_link(struct dentry *dentry, void **cookie)
1369{ 1369{
1370 struct inode *inode = d_inode(dentry); 1370 struct inode *inode = d_inode(dentry);
1371 struct fuse_conn *fc = get_fuse_conn(inode); 1371 struct fuse_conn *fc = get_fuse_conn(inode);
@@ -1389,28 +1389,12 @@ static char *read_link(struct dentry *dentry)
1389 link = ERR_PTR(ret); 1389 link = ERR_PTR(ret);
1390 } else { 1390 } else {
1391 link[ret] = '\0'; 1391 link[ret] = '\0';
1392 *cookie = link;
1392 } 1393 }
1393 fuse_invalidate_atime(inode); 1394 fuse_invalidate_atime(inode);
1394 return link; 1395 return link;
1395} 1396}
1396 1397
1397static void free_link(char *link)
1398{
1399 if (!IS_ERR(link))
1400 free_page((unsigned long) link);
1401}
1402
1403static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd)
1404{
1405 nd_set_link(nd, read_link(dentry));
1406 return NULL;
1407}
1408
1409static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
1410{
1411 free_link(nd_get_link(nd));
1412}
1413
1414static int fuse_dir_open(struct inode *inode, struct file *file) 1398static int fuse_dir_open(struct inode *inode, struct file *file)
1415{ 1399{
1416 return fuse_open_common(inode, file, true); 1400 return fuse_open_common(inode, file, true);
@@ -1926,7 +1910,7 @@ static const struct inode_operations fuse_common_inode_operations = {
1926static const struct inode_operations fuse_symlink_inode_operations = { 1910static const struct inode_operations fuse_symlink_inode_operations = {
1927 .setattr = fuse_setattr, 1911 .setattr = fuse_setattr,
1928 .follow_link = fuse_follow_link, 1912 .follow_link = fuse_follow_link,
1929 .put_link = fuse_put_link, 1913 .put_link = free_page_put_link,
1930 .readlink = generic_readlink, 1914 .readlink = generic_readlink,
1931 .getattr = fuse_getattr, 1915 .getattr = fuse_getattr,
1932 .setxattr = fuse_setxattr, 1916 .setxattr = fuse_setxattr,
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 1b3ca7a2e3fc..3a1461de1551 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1548,7 +1548,7 @@ out:
1548 * Returns: 0 on success or error code 1548 * Returns: 0 on success or error code
1549 */ 1549 */
1550 1550
1551static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) 1551static const char *gfs2_follow_link(struct dentry *dentry, void **cookie)
1552{ 1552{
1553 struct gfs2_inode *ip = GFS2_I(d_inode(dentry)); 1553 struct gfs2_inode *ip = GFS2_I(d_inode(dentry));
1554 struct gfs2_holder i_gh; 1554 struct gfs2_holder i_gh;
@@ -1561,8 +1561,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
1561 error = gfs2_glock_nq(&i_gh); 1561 error = gfs2_glock_nq(&i_gh);
1562 if (error) { 1562 if (error) {
1563 gfs2_holder_uninit(&i_gh); 1563 gfs2_holder_uninit(&i_gh);
1564 nd_set_link(nd, ERR_PTR(error)); 1564 return ERR_PTR(error);
1565 return NULL;
1566 } 1565 }
1567 1566
1568 size = (unsigned int)i_size_read(&ip->i_inode); 1567 size = (unsigned int)i_size_read(&ip->i_inode);
@@ -1586,8 +1585,9 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
1586 brelse(dibh); 1585 brelse(dibh);
1587out: 1586out:
1588 gfs2_glock_dq_uninit(&i_gh); 1587 gfs2_glock_dq_uninit(&i_gh);
1589 nd_set_link(nd, buf); 1588 if (!IS_ERR(buf))
1590 return NULL; 1589 *cookie = buf;
1590 return buf;
1591} 1591}
1592 1592
1593/** 1593/**
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 07d8d8f52faf..059597b23f67 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -892,7 +892,7 @@ static const struct inode_operations hostfs_dir_iops = {
892 .setattr = hostfs_setattr, 892 .setattr = hostfs_setattr,
893}; 893};
894 894
895static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd) 895static const char *hostfs_follow_link(struct dentry *dentry, void **cookie)
896{ 896{
897 char *link = __getname(); 897 char *link = __getname();
898 if (link) { 898 if (link) {
@@ -906,21 +906,18 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
906 } 906 }
907 if (err < 0) { 907 if (err < 0) {
908 __putname(link); 908 __putname(link);
909 link = ERR_PTR(err); 909 return ERR_PTR(err);
910 } 910 }
911 } else { 911 } else {
912 link = ERR_PTR(-ENOMEM); 912 return ERR_PTR(-ENOMEM);
913 } 913 }
914 914
915 nd_set_link(nd, link); 915 return *cookie = link;
916 return NULL;
917} 916}
918 917
919static void hostfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 918static void hostfs_put_link(struct inode *unused, void *cookie)
920{ 919{
921 char *s = nd_get_link(nd); 920 __putname(cookie);
922 if (!IS_ERR(s))
923 __putname(s);
924} 921}
925 922
926static const struct inode_operations hostfs_link_iops = { 923static const struct inode_operations hostfs_link_iops = {
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index fa2bd5366ecf..2867837909a9 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -642,20 +642,19 @@ static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
642 buflen); 642 buflen);
643} 643}
644 644
645static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) 645static const char *hppfs_follow_link(struct dentry *dentry, void **cookie)
646{ 646{
647 struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry; 647 struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry;
648 648
649 return d_inode(proc_dentry)->i_op->follow_link(proc_dentry, nd); 649 return d_inode(proc_dentry)->i_op->follow_link(proc_dentry, cookie);
650} 650}
651 651
652static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd, 652static void hppfs_put_link(struct inode *inode, void *cookie)
653 void *cookie)
654{ 653{
655 struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry; 654 struct inode *proc_inode = d_inode(HPPFS_I(inode)->proc_dentry);
656 655
657 if (d_inode(proc_dentry)->i_op->put_link) 656 if (proc_inode->i_op->put_link)
658 d_inode(proc_dentry)->i_op->put_link(proc_dentry, nd, cookie); 657 proc_inode->i_op->put_link(proc_inode, cookie);
659} 658}
660 659
661static const struct inode_operations hppfs_dir_iops = { 660static const struct inode_operations hppfs_dir_iops = {
diff --git a/fs/inode.c b/fs/inode.c
index ea37cd17b53f..e8d62688ed91 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -152,6 +152,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
152 inode->i_pipe = NULL; 152 inode->i_pipe = NULL;
153 inode->i_bdev = NULL; 153 inode->i_bdev = NULL;
154 inode->i_cdev = NULL; 154 inode->i_cdev = NULL;
155 inode->i_link = NULL;
155 inode->i_rdev = 0; 156 inode->i_rdev = 0;
156 inode->dirtied_when = 0; 157 inode->dirtied_when = 0;
157 158
@@ -1584,36 +1585,47 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
1584 * This function automatically handles read only file systems and media, 1585 * This function automatically handles read only file systems and media,
1585 * as well as the "noatime" flag and inode specific "noatime" markers. 1586 * as well as the "noatime" flag and inode specific "noatime" markers.
1586 */ 1587 */
1587void touch_atime(const struct path *path) 1588bool atime_needs_update(const struct path *path, struct inode *inode)
1588{ 1589{
1589 struct vfsmount *mnt = path->mnt; 1590 struct vfsmount *mnt = path->mnt;
1590 struct inode *inode = d_inode(path->dentry);
1591 struct timespec now; 1591 struct timespec now;
1592 1592
1593 if (inode->i_flags & S_NOATIME) 1593 if (inode->i_flags & S_NOATIME)
1594 return; 1594 return false;
1595 if (IS_NOATIME(inode)) 1595 if (IS_NOATIME(inode))
1596 return; 1596 return false;
1597 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1597 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1598 return; 1598 return false;
1599 1599
1600 if (mnt->mnt_flags & MNT_NOATIME) 1600 if (mnt->mnt_flags & MNT_NOATIME)
1601 return; 1601 return false;
1602 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1602 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1603 return; 1603 return false;
1604 1604
1605 now = current_fs_time(inode->i_sb); 1605 now = current_fs_time(inode->i_sb);
1606 1606
1607 if (!relatime_need_update(mnt, inode, now)) 1607 if (!relatime_need_update(mnt, inode, now))
1608 return; 1608 return false;
1609 1609
1610 if (timespec_equal(&inode->i_atime, &now)) 1610 if (timespec_equal(&inode->i_atime, &now))
1611 return false;
1612
1613 return true;
1614}
1615
1616void touch_atime(const struct path *path)
1617{
1618 struct vfsmount *mnt = path->mnt;
1619 struct inode *inode = d_inode(path->dentry);
1620 struct timespec now;
1621
1622 if (!atime_needs_update(path, inode))
1611 return; 1623 return;
1612 1624
1613 if (!sb_start_write_trylock(inode->i_sb)) 1625 if (!sb_start_write_trylock(inode->i_sb))
1614 return; 1626 return;
1615 1627
1616 if (__mnt_want_write(mnt)) 1628 if (__mnt_want_write(mnt) != 0)
1617 goto skip_update; 1629 goto skip_update;
1618 /* 1630 /*
1619 * File systems can error out when updating inodes if they need to 1631 * File systems can error out when updating inodes if they need to
@@ -1624,6 +1636,7 @@ void touch_atime(const struct path *path)
1624 * We may also fail on filesystems that have the ability to make parts 1636 * We may also fail on filesystems that have the ability to make parts
1625 * of the fs read only, e.g. subvolumes in Btrfs. 1637 * of the fs read only, e.g. subvolumes in Btrfs.
1626 */ 1638 */
1639 now = current_fs_time(inode->i_sb);
1627 update_time(inode, &now, S_ATIME); 1640 update_time(inode, &now, S_ATIME);
1628 __mnt_drop_write(mnt); 1641 __mnt_drop_write(mnt);
1629skip_update: 1642skip_update:
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 1ba5c97943b8..81180022923f 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -354,6 +354,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
354 ret = -ENOMEM; 354 ret = -ENOMEM;
355 goto fail; 355 goto fail;
356 } 356 }
357 inode->i_link = f->target;
357 358
358 jffs2_dbg(1, "%s(): symlink's target '%s' cached\n", 359 jffs2_dbg(1, "%s(): symlink's target '%s' cached\n",
359 __func__, (char *)f->target); 360 __func__, (char *)f->target);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index fe5ea080b4ec..60d86e8fba6e 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -294,6 +294,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
294 294
295 case S_IFLNK: 295 case S_IFLNK:
296 inode->i_op = &jffs2_symlink_inode_operations; 296 inode->i_op = &jffs2_symlink_inode_operations;
297 inode->i_link = f->target;
297 break; 298 break;
298 299
299 case S_IFDIR: 300 case S_IFDIR:
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index 1fefa25d0fa5..8ce2f240125b 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -9,58 +9,15 @@
9 * 9 *
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14#include <linux/kernel.h>
15#include <linux/fs.h>
16#include <linux/namei.h>
17#include "nodelist.h" 12#include "nodelist.h"
18 13
19static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd);
20
21const struct inode_operations jffs2_symlink_inode_operations = 14const struct inode_operations jffs2_symlink_inode_operations =
22{ 15{
23 .readlink = generic_readlink, 16 .readlink = generic_readlink,
24 .follow_link = jffs2_follow_link, 17 .follow_link = simple_follow_link,
25 .setattr = jffs2_setattr, 18 .setattr = jffs2_setattr,
26 .setxattr = jffs2_setxattr, 19 .setxattr = jffs2_setxattr,
27 .getxattr = jffs2_getxattr, 20 .getxattr = jffs2_getxattr,
28 .listxattr = jffs2_listxattr, 21 .listxattr = jffs2_listxattr,
29 .removexattr = jffs2_removexattr 22 .removexattr = jffs2_removexattr
30}; 23};
31
32static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
33{
34 struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(dentry));
35 char *p = (char *)f->target;
36
37 /*
38 * We don't acquire the f->sem mutex here since the only data we
39 * use is f->target.
40 *
41 * 1. If we are here the inode has already built and f->target has
42 * to point to the target path.
43 * 2. Nobody uses f->target (if the inode is symlink's inode). The
44 * exception is inode freeing function which frees f->target. But
45 * it can't be called while we are here and before VFS has
46 * stopped using our f->target string which we provide by means of
47 * nd_set_link() call.
48 */
49
50 if (!p) {
51 pr_err("%s(): can't find symlink target\n", __func__);
52 p = ERR_PTR(-EIO);
53 }
54 jffs2_dbg(1, "%s(): target path is '%s'\n",
55 __func__, (char *)f->target);
56
57 nd_set_link(nd, p);
58
59 /*
60 * We will unlock the f->sem mutex but VFS will use the f->target string. This is safe
61 * since the only way that may cause f->target to be changed is iput() operation.
62 * But VFS will not use f->target after iput() has been called.
63 */
64 return NULL;
65}
66
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 070dc4b33544..6f1cb2b5ee28 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -63,11 +63,12 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
63 inode->i_mapping->a_ops = &jfs_aops; 63 inode->i_mapping->a_ops = &jfs_aops;
64 } else { 64 } else {
65 inode->i_op = &jfs_fast_symlink_inode_operations; 65 inode->i_op = &jfs_fast_symlink_inode_operations;
66 inode->i_link = JFS_IP(inode)->i_inline;
66 /* 67 /*
67 * The inline data should be null-terminated, but 68 * The inline data should be null-terminated, but
68 * don't let on-disk corruption crash the kernel 69 * don't let on-disk corruption crash the kernel
69 */ 70 */
70 JFS_IP(inode)->i_inline[inode->i_size] = '\0'; 71 inode->i_link[inode->i_size] = '\0';
71 } 72 }
72 } else { 73 } else {
73 inode->i_op = &jfs_file_inode_operations; 74 inode->i_op = &jfs_file_inode_operations;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 66db7bc0ed10..e33be921aa41 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -880,7 +880,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
880 int ssize; /* source pathname size */ 880 int ssize; /* source pathname size */
881 struct btstack btstack; 881 struct btstack btstack;
882 struct inode *ip = d_inode(dentry); 882 struct inode *ip = d_inode(dentry);
883 unchar *i_fastsymlink;
884 s64 xlen = 0; 883 s64 xlen = 0;
885 int bmask = 0, xsize; 884 int bmask = 0, xsize;
886 s64 xaddr; 885 s64 xaddr;
@@ -946,8 +945,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
946 if (ssize <= IDATASIZE) { 945 if (ssize <= IDATASIZE) {
947 ip->i_op = &jfs_fast_symlink_inode_operations; 946 ip->i_op = &jfs_fast_symlink_inode_operations;
948 947
949 i_fastsymlink = JFS_IP(ip)->i_inline; 948 ip->i_link = JFS_IP(ip)->i_inline;
950 memcpy(i_fastsymlink, name, ssize); 949 memcpy(ip->i_link, name, ssize);
951 ip->i_size = ssize - 1; 950 ip->i_size = ssize - 1;
952 951
953 /* 952 /*
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 80f42bcc4ef1..5929e2363cb8 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -17,21 +17,13 @@
17 */ 17 */
18 18
19#include <linux/fs.h> 19#include <linux/fs.h>
20#include <linux/namei.h>
21#include "jfs_incore.h" 20#include "jfs_incore.h"
22#include "jfs_inode.h" 21#include "jfs_inode.h"
23#include "jfs_xattr.h" 22#include "jfs_xattr.h"
24 23
25static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
26{
27 char *s = JFS_IP(d_inode(dentry))->i_inline;
28 nd_set_link(nd, s);
29 return NULL;
30}
31
32const struct inode_operations jfs_fast_symlink_inode_operations = { 24const struct inode_operations jfs_fast_symlink_inode_operations = {
33 .readlink = generic_readlink, 25 .readlink = generic_readlink,
34 .follow_link = jfs_follow_link, 26 .follow_link = simple_follow_link,
35 .setattr = jfs_setattr, 27 .setattr = jfs_setattr,
36 .setxattr = jfs_setxattr, 28 .setxattr = jfs_setxattr,
37 .getxattr = jfs_getxattr, 29 .getxattr = jfs_getxattr,
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 8a198898e39a..db272528ab5b 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -112,25 +112,18 @@ static int kernfs_getlink(struct dentry *dentry, char *path)
112 return error; 112 return error;
113} 113}
114 114
115static void *kernfs_iop_follow_link(struct dentry *dentry, struct nameidata *nd) 115static const char *kernfs_iop_follow_link(struct dentry *dentry, void **cookie)
116{ 116{
117 int error = -ENOMEM; 117 int error = -ENOMEM;
118 unsigned long page = get_zeroed_page(GFP_KERNEL); 118 unsigned long page = get_zeroed_page(GFP_KERNEL);
119 if (page) { 119 if (!page)
120 error = kernfs_getlink(dentry, (char *) page); 120 return ERR_PTR(-ENOMEM);
121 if (error < 0) 121 error = kernfs_getlink(dentry, (char *)page);
122 free_page((unsigned long)page); 122 if (unlikely(error < 0)) {
123 }
124 nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
125 return NULL;
126}
127
128static void kernfs_iop_put_link(struct dentry *dentry, struct nameidata *nd,
129 void *cookie)
130{
131 char *page = nd_get_link(nd);
132 if (!IS_ERR(page))
133 free_page((unsigned long)page); 123 free_page((unsigned long)page);
124 return ERR_PTR(error);
125 }
126 return *cookie = (char *)page;
134} 127}
135 128
136const struct inode_operations kernfs_symlink_iops = { 129const struct inode_operations kernfs_symlink_iops = {
@@ -140,7 +133,7 @@ const struct inode_operations kernfs_symlink_iops = {
140 .listxattr = kernfs_iop_listxattr, 133 .listxattr = kernfs_iop_listxattr,
141 .readlink = generic_readlink, 134 .readlink = generic_readlink,
142 .follow_link = kernfs_iop_follow_link, 135 .follow_link = kernfs_iop_follow_link,
143 .put_link = kernfs_iop_put_link, 136 .put_link = free_page_put_link,
144 .setattr = kernfs_iop_setattr, 137 .setattr = kernfs_iop_setattr,
145 .getattr = kernfs_iop_getattr, 138 .getattr = kernfs_iop_getattr,
146 .permission = kernfs_iop_permission, 139 .permission = kernfs_iop_permission,
diff --git a/fs/libfs.c b/fs/libfs.c
index cb1fb4b9b637..65e1feca8b98 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1024,15 +1024,18 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1024} 1024}
1025EXPORT_SYMBOL(noop_fsync); 1025EXPORT_SYMBOL(noop_fsync);
1026 1026
1027void kfree_put_link(struct dentry *dentry, struct nameidata *nd, 1027void kfree_put_link(struct inode *unused, void *cookie)
1028 void *cookie)
1029{ 1028{
1030 char *s = nd_get_link(nd); 1029 kfree(cookie);
1031 if (!IS_ERR(s))
1032 kfree(s);
1033} 1030}
1034EXPORT_SYMBOL(kfree_put_link); 1031EXPORT_SYMBOL(kfree_put_link);
1035 1032
1033void free_page_put_link(struct inode *unused, void *cookie)
1034{
1035 free_page((unsigned long) cookie);
1036}
1037EXPORT_SYMBOL(free_page_put_link);
1038
1036/* 1039/*
1037 * nop .set_page_dirty method so that people can use .page_mkwrite on 1040 * nop .set_page_dirty method so that people can use .page_mkwrite on
1038 * anon inodes. 1041 * anon inodes.
@@ -1093,3 +1096,15 @@ simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
1093 return -EINVAL; 1096 return -EINVAL;
1094} 1097}
1095EXPORT_SYMBOL(simple_nosetlease); 1098EXPORT_SYMBOL(simple_nosetlease);
1099
1100const char *simple_follow_link(struct dentry *dentry, void **cookie)
1101{
1102 return d_inode(dentry)->i_link;
1103}
1104EXPORT_SYMBOL(simple_follow_link);
1105
1106const struct inode_operations simple_symlink_inode_operations = {
1107 .follow_link = simple_follow_link,
1108 .readlink = generic_readlink
1109};
1110EXPORT_SYMBOL(simple_symlink_inode_operations);
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 4cf38f118549..f9b45d46d4c4 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -779,6 +779,7 @@ fail:
779const struct inode_operations logfs_symlink_iops = { 779const struct inode_operations logfs_symlink_iops = {
780 .readlink = generic_readlink, 780 .readlink = generic_readlink,
781 .follow_link = page_follow_link_light, 781 .follow_link = page_follow_link_light,
782 .put_link = page_put_link,
782}; 783};
783 784
784const struct inode_operations logfs_dir_iops = { 785const struct inode_operations logfs_dir_iops = {
diff --git a/fs/mount.h b/fs/mount.h
index 6a61c2b3e385..b5b8082bfa42 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -88,6 +88,7 @@ static inline int is_mounted(struct vfsmount *mnt)
88extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); 88extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
89extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *); 89extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
90 90
91extern int __legitimize_mnt(struct vfsmount *, unsigned);
91extern bool legitimize_mnt(struct vfsmount *, unsigned); 92extern bool legitimize_mnt(struct vfsmount *, unsigned);
92 93
93extern void __detach_mounts(struct dentry *dentry); 94extern void __detach_mounts(struct dentry *dentry);
diff --git a/fs/namei.c b/fs/namei.c
index fe30d3be43a8..2dad0eaf91d3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -492,6 +492,7 @@ void path_put(const struct path *path)
492} 492}
493EXPORT_SYMBOL(path_put); 493EXPORT_SYMBOL(path_put);
494 494
495#define EMBEDDED_LEVELS 2
495struct nameidata { 496struct nameidata {
496 struct path path; 497 struct path path;
497 struct qstr last; 498 struct qstr last;
@@ -501,10 +502,139 @@ struct nameidata {
501 unsigned seq, m_seq; 502 unsigned seq, m_seq;
502 int last_type; 503 int last_type;
503 unsigned depth; 504 unsigned depth;
504 struct file *base; 505 int total_link_count;
505 char *saved_names[MAX_NESTED_LINKS + 1]; 506 struct saved {
507 struct path link;
508 void *cookie;
509 const char *name;
510 struct inode *inode;
511 unsigned seq;
512 } *stack, internal[EMBEDDED_LEVELS];
513 struct filename *name;
514 struct nameidata *saved;
515 unsigned root_seq;
516 int dfd;
506}; 517};
507 518
519static void set_nameidata(struct nameidata *p, int dfd, struct filename *name)
520{
521 struct nameidata *old = current->nameidata;
522 p->stack = p->internal;
523 p->dfd = dfd;
524 p->name = name;
525 p->total_link_count = old ? old->total_link_count : 0;
526 p->saved = old;
527 current->nameidata = p;
528}
529
530static void restore_nameidata(void)
531{
532 struct nameidata *now = current->nameidata, *old = now->saved;
533
534 current->nameidata = old;
535 if (old)
536 old->total_link_count = now->total_link_count;
537 if (now->stack != now->internal) {
538 kfree(now->stack);
539 now->stack = now->internal;
540 }
541}
542
543static int __nd_alloc_stack(struct nameidata *nd)
544{
545 struct saved *p;
546
547 if (nd->flags & LOOKUP_RCU) {
548 p= kmalloc(MAXSYMLINKS * sizeof(struct saved),
549 GFP_ATOMIC);
550 if (unlikely(!p))
551 return -ECHILD;
552 } else {
553 p= kmalloc(MAXSYMLINKS * sizeof(struct saved),
554 GFP_KERNEL);
555 if (unlikely(!p))
556 return -ENOMEM;
557 }
558 memcpy(p, nd->internal, sizeof(nd->internal));
559 nd->stack = p;
560 return 0;
561}
562
563static inline int nd_alloc_stack(struct nameidata *nd)
564{
565 if (likely(nd->depth != EMBEDDED_LEVELS))
566 return 0;
567 if (likely(nd->stack != nd->internal))
568 return 0;
569 return __nd_alloc_stack(nd);
570}
571
572static void drop_links(struct nameidata *nd)
573{
574 int i = nd->depth;
575 while (i--) {
576 struct saved *last = nd->stack + i;
577 struct inode *inode = last->inode;
578 if (last->cookie && inode->i_op->put_link) {
579 inode->i_op->put_link(inode, last->cookie);
580 last->cookie = NULL;
581 }
582 }
583}
584
585static void terminate_walk(struct nameidata *nd)
586{
587 drop_links(nd);
588 if (!(nd->flags & LOOKUP_RCU)) {
589 int i;
590 path_put(&nd->path);
591 for (i = 0; i < nd->depth; i++)
592 path_put(&nd->stack[i].link);
593 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
594 path_put(&nd->root);
595 nd->root.mnt = NULL;
596 }
597 } else {
598 nd->flags &= ~LOOKUP_RCU;
599 if (!(nd->flags & LOOKUP_ROOT))
600 nd->root.mnt = NULL;
601 rcu_read_unlock();
602 }
603 nd->depth = 0;
604}
605
606/* path_put is needed afterwards regardless of success or failure */
607static bool legitimize_path(struct nameidata *nd,
608 struct path *path, unsigned seq)
609{
610 int res = __legitimize_mnt(path->mnt, nd->m_seq);
611 if (unlikely(res)) {
612 if (res > 0)
613 path->mnt = NULL;
614 path->dentry = NULL;
615 return false;
616 }
617 if (unlikely(!lockref_get_not_dead(&path->dentry->d_lockref))) {
618 path->dentry = NULL;
619 return false;
620 }
621 return !read_seqcount_retry(&path->dentry->d_seq, seq);
622}
623
624static bool legitimize_links(struct nameidata *nd)
625{
626 int i;
627 for (i = 0; i < nd->depth; i++) {
628 struct saved *last = nd->stack + i;
629 if (unlikely(!legitimize_path(nd, &last->link, last->seq))) {
630 drop_links(nd);
631 nd->depth = i + 1;
632 return false;
633 }
634 }
635 return true;
636}
637
508/* 638/*
509 * Path walking has 2 modes, rcu-walk and ref-walk (see 639 * Path walking has 2 modes, rcu-walk and ref-walk (see
510 * Documentation/filesystems/path-lookup.txt). In situations when we can't 640 * Documentation/filesystems/path-lookup.txt). In situations when we can't
@@ -520,35 +650,28 @@ struct nameidata {
520 * unlazy_walk - try to switch to ref-walk mode. 650 * unlazy_walk - try to switch to ref-walk mode.
521 * @nd: nameidata pathwalk data 651 * @nd: nameidata pathwalk data
522 * @dentry: child of nd->path.dentry or NULL 652 * @dentry: child of nd->path.dentry or NULL
653 * @seq: seq number to check dentry against
523 * Returns: 0 on success, -ECHILD on failure 654 * Returns: 0 on success, -ECHILD on failure
524 * 655 *
525 * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry 656 * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry
526 * for ref-walk mode. @dentry must be a path found by a do_lookup call on 657 * for ref-walk mode. @dentry must be a path found by a do_lookup call on
527 * @nd or NULL. Must be called from rcu-walk context. 658 * @nd or NULL. Must be called from rcu-walk context.
659 * Nothing should touch nameidata between unlazy_walk() failure and
660 * terminate_walk().
528 */ 661 */
529static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) 662static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq)
530{ 663{
531 struct fs_struct *fs = current->fs;
532 struct dentry *parent = nd->path.dentry; 664 struct dentry *parent = nd->path.dentry;
533 665
534 BUG_ON(!(nd->flags & LOOKUP_RCU)); 666 BUG_ON(!(nd->flags & LOOKUP_RCU));
535 667
536 /*
537 * After legitimizing the bastards, terminate_walk()
538 * will do the right thing for non-RCU mode, and all our
539 * subsequent exit cases should rcu_read_unlock()
540 * before returning. Do vfsmount first; if dentry
541 * can't be legitimized, just set nd->path.dentry to NULL
542 * and rely on dput(NULL) being a no-op.
543 */
544 if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
545 return -ECHILD;
546 nd->flags &= ~LOOKUP_RCU; 668 nd->flags &= ~LOOKUP_RCU;
547 669 if (unlikely(!legitimize_links(nd)))
548 if (!lockref_get_not_dead(&parent->d_lockref)) { 670 goto out2;
549 nd->path.dentry = NULL; 671 if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq)))
550 goto out; 672 goto out2;
551 } 673 if (unlikely(!lockref_get_not_dead(&parent->d_lockref)))
674 goto out1;
552 675
553 /* 676 /*
554 * For a negative lookup, the lookup sequence point is the parents 677 * For a negative lookup, the lookup sequence point is the parents
@@ -568,7 +691,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
568 } else { 691 } else {
569 if (!lockref_get_not_dead(&dentry->d_lockref)) 692 if (!lockref_get_not_dead(&dentry->d_lockref))
570 goto out; 693 goto out;
571 if (read_seqcount_retry(&dentry->d_seq, nd->seq)) 694 if (read_seqcount_retry(&dentry->d_seq, seq))
572 goto drop_dentry; 695 goto drop_dentry;
573 } 696 }
574 697
@@ -577,22 +700,24 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
577 * still valid and get it if required. 700 * still valid and get it if required.
578 */ 701 */
579 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { 702 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
580 spin_lock(&fs->lock); 703 if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq))) {
581 if (nd->root.mnt != fs->root.mnt || nd->root.dentry != fs->root.dentry) 704 rcu_read_unlock();
582 goto unlock_and_drop_dentry; 705 dput(dentry);
583 path_get(&nd->root); 706 return -ECHILD;
584 spin_unlock(&fs->lock); 707 }
585 } 708 }
586 709
587 rcu_read_unlock(); 710 rcu_read_unlock();
588 return 0; 711 return 0;
589 712
590unlock_and_drop_dentry:
591 spin_unlock(&fs->lock);
592drop_dentry: 713drop_dentry:
593 rcu_read_unlock(); 714 rcu_read_unlock();
594 dput(dentry); 715 dput(dentry);
595 goto drop_root_mnt; 716 goto drop_root_mnt;
717out2:
718 nd->path.mnt = NULL;
719out1:
720 nd->path.dentry = NULL;
596out: 721out:
597 rcu_read_unlock(); 722 rcu_read_unlock();
598drop_root_mnt: 723drop_root_mnt:
@@ -601,6 +726,24 @@ drop_root_mnt:
601 return -ECHILD; 726 return -ECHILD;
602} 727}
603 728
729static int unlazy_link(struct nameidata *nd, struct path *link, unsigned seq)
730{
731 if (unlikely(!legitimize_path(nd, link, seq))) {
732 drop_links(nd);
733 nd->depth = 0;
734 nd->flags &= ~LOOKUP_RCU;
735 nd->path.mnt = NULL;
736 nd->path.dentry = NULL;
737 if (!(nd->flags & LOOKUP_ROOT))
738 nd->root.mnt = NULL;
739 rcu_read_unlock();
740 } else if (likely(unlazy_walk(nd, NULL, 0)) == 0) {
741 return 0;
742 }
743 path_put(link);
744 return -ECHILD;
745}
746
604static inline int d_revalidate(struct dentry *dentry, unsigned int flags) 747static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
605{ 748{
606 return dentry->d_op->d_revalidate(dentry, flags); 749 return dentry->d_op->d_revalidate(dentry, flags);
@@ -622,26 +765,10 @@ static int complete_walk(struct nameidata *nd)
622 int status; 765 int status;
623 766
624 if (nd->flags & LOOKUP_RCU) { 767 if (nd->flags & LOOKUP_RCU) {
625 nd->flags &= ~LOOKUP_RCU;
626 if (!(nd->flags & LOOKUP_ROOT)) 768 if (!(nd->flags & LOOKUP_ROOT))
627 nd->root.mnt = NULL; 769 nd->root.mnt = NULL;
628 770 if (unlikely(unlazy_walk(nd, NULL, 0)))
629 if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
630 rcu_read_unlock();
631 return -ECHILD;
632 }
633 if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
634 rcu_read_unlock();
635 mntput(nd->path.mnt);
636 return -ECHILD; 771 return -ECHILD;
637 }
638 if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
639 rcu_read_unlock();
640 dput(dentry);
641 mntput(nd->path.mnt);
642 return -ECHILD;
643 }
644 rcu_read_unlock();
645 } 772 }
646 773
647 if (likely(!(nd->flags & LOOKUP_JUMPED))) 774 if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -657,28 +784,25 @@ static int complete_walk(struct nameidata *nd)
657 if (!status) 784 if (!status)
658 status = -ESTALE; 785 status = -ESTALE;
659 786
660 path_put(&nd->path);
661 return status; 787 return status;
662} 788}
663 789
664static __always_inline void set_root(struct nameidata *nd) 790static void set_root(struct nameidata *nd)
665{ 791{
666 get_fs_root(current->fs, &nd->root); 792 get_fs_root(current->fs, &nd->root);
667} 793}
668 794
669static int link_path_walk(const char *, struct nameidata *); 795static unsigned set_root_rcu(struct nameidata *nd)
670
671static __always_inline unsigned set_root_rcu(struct nameidata *nd)
672{ 796{
673 struct fs_struct *fs = current->fs; 797 struct fs_struct *fs = current->fs;
674 unsigned seq, res; 798 unsigned seq;
675 799
676 do { 800 do {
677 seq = read_seqcount_begin(&fs->seq); 801 seq = read_seqcount_begin(&fs->seq);
678 nd->root = fs->root; 802 nd->root = fs->root;
679 res = __read_seqcount_begin(&nd->root.dentry->d_seq); 803 nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
680 } while (read_seqcount_retry(&fs->seq, seq)); 804 } while (read_seqcount_retry(&fs->seq, seq));
681 return res; 805 return nd->root_seq;
682} 806}
683 807
684static void path_put_conditional(struct path *path, struct nameidata *nd) 808static void path_put_conditional(struct path *path, struct nameidata *nd)
@@ -704,8 +828,9 @@ static inline void path_to_nameidata(const struct path *path,
704 * Helper to directly jump to a known parsed path from ->follow_link, 828 * Helper to directly jump to a known parsed path from ->follow_link,
705 * caller must have taken a reference to path beforehand. 829 * caller must have taken a reference to path beforehand.
706 */ 830 */
707void nd_jump_link(struct nameidata *nd, struct path *path) 831void nd_jump_link(struct path *path)
708{ 832{
833 struct nameidata *nd = current->nameidata;
709 path_put(&nd->path); 834 path_put(&nd->path);
710 835
711 nd->path = *path; 836 nd->path = *path;
@@ -713,24 +838,14 @@ void nd_jump_link(struct nameidata *nd, struct path *path)
713 nd->flags |= LOOKUP_JUMPED; 838 nd->flags |= LOOKUP_JUMPED;
714} 839}
715 840
716void nd_set_link(struct nameidata *nd, char *path) 841static inline void put_link(struct nameidata *nd)
717{
718 nd->saved_names[nd->depth] = path;
719}
720EXPORT_SYMBOL(nd_set_link);
721
722char *nd_get_link(struct nameidata *nd)
723{
724 return nd->saved_names[nd->depth];
725}
726EXPORT_SYMBOL(nd_get_link);
727
728static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
729{ 842{
730 struct inode *inode = link->dentry->d_inode; 843 struct saved *last = nd->stack + --nd->depth;
731 if (inode->i_op->put_link) 844 struct inode *inode = last->inode;
732 inode->i_op->put_link(link->dentry, nd, cookie); 845 if (last->cookie && inode->i_op->put_link)
733 path_put(link); 846 inode->i_op->put_link(inode, last->cookie);
847 if (!(nd->flags & LOOKUP_RCU))
848 path_put(&last->link);
734} 849}
735 850
736int sysctl_protected_symlinks __read_mostly = 0; 851int sysctl_protected_symlinks __read_mostly = 0;
@@ -738,7 +853,6 @@ int sysctl_protected_hardlinks __read_mostly = 0;
738 853
739/** 854/**
740 * may_follow_link - Check symlink following for unsafe situations 855 * may_follow_link - Check symlink following for unsafe situations
741 * @link: The path of the symlink
742 * @nd: nameidata pathwalk data 856 * @nd: nameidata pathwalk data
743 * 857 *
744 * In the case of the sysctl_protected_symlinks sysctl being enabled, 858 * In the case of the sysctl_protected_symlinks sysctl being enabled,
@@ -752,7 +866,7 @@ int sysctl_protected_hardlinks __read_mostly = 0;
752 * 866 *
753 * Returns 0 if following the symlink is allowed, -ve on error. 867 * Returns 0 if following the symlink is allowed, -ve on error.
754 */ 868 */
755static inline int may_follow_link(struct path *link, struct nameidata *nd) 869static inline int may_follow_link(struct nameidata *nd)
756{ 870{
757 const struct inode *inode; 871 const struct inode *inode;
758 const struct inode *parent; 872 const struct inode *parent;
@@ -761,7 +875,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd)
761 return 0; 875 return 0;
762 876
763 /* Allowed if owner and follower match. */ 877 /* Allowed if owner and follower match. */
764 inode = link->dentry->d_inode; 878 inode = nd->stack[0].inode;
765 if (uid_eq(current_cred()->fsuid, inode->i_uid)) 879 if (uid_eq(current_cred()->fsuid, inode->i_uid))
766 return 0; 880 return 0;
767 881
@@ -774,9 +888,10 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd)
774 if (uid_eq(parent->i_uid, inode->i_uid)) 888 if (uid_eq(parent->i_uid, inode->i_uid))
775 return 0; 889 return 0;
776 890
777 audit_log_link_denied("follow_link", link); 891 if (nd->flags & LOOKUP_RCU)
778 path_put_conditional(link, nd); 892 return -ECHILD;
779 path_put(&nd->path); 893
894 audit_log_link_denied("follow_link", &nd->stack[0].link);
780 return -EACCES; 895 return -EACCES;
781} 896}
782 897
@@ -849,82 +964,68 @@ static int may_linkat(struct path *link)
849 return -EPERM; 964 return -EPERM;
850} 965}
851 966
852static __always_inline int 967static __always_inline
853follow_link(struct path *link, struct nameidata *nd, void **p) 968const char *get_link(struct nameidata *nd)
854{ 969{
855 struct dentry *dentry = link->dentry; 970 struct saved *last = nd->stack + nd->depth - 1;
971 struct dentry *dentry = last->link.dentry;
972 struct inode *inode = last->inode;
856 int error; 973 int error;
857 char *s; 974 const char *res;
858 975
859 BUG_ON(nd->flags & LOOKUP_RCU); 976 if (!(nd->flags & LOOKUP_RCU)) {
860 977 touch_atime(&last->link);
861 if (link->mnt == nd->path.mnt) 978 cond_resched();
862 mntget(link->mnt); 979 } else if (atime_needs_update(&last->link, inode)) {
863 980 if (unlikely(unlazy_walk(nd, NULL, 0)))
864 error = -ELOOP; 981 return ERR_PTR(-ECHILD);
865 if (unlikely(current->total_link_count >= 40)) 982 touch_atime(&last->link);
866 goto out_put_nd_path; 983 }
867
868 cond_resched();
869 current->total_link_count++;
870
871 touch_atime(link);
872 nd_set_link(nd, NULL);
873 984
874 error = security_inode_follow_link(link->dentry, nd); 985 error = security_inode_follow_link(dentry, inode,
875 if (error) 986 nd->flags & LOOKUP_RCU);
876 goto out_put_nd_path; 987 if (unlikely(error))
988 return ERR_PTR(error);
877 989
878 nd->last_type = LAST_BIND; 990 nd->last_type = LAST_BIND;
879 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 991 res = inode->i_link;
880 error = PTR_ERR(*p); 992 if (!res) {
881 if (IS_ERR(*p)) 993 if (nd->flags & LOOKUP_RCU) {
882 goto out_put_nd_path; 994 if (unlikely(unlazy_walk(nd, NULL, 0)))
883 995 return ERR_PTR(-ECHILD);
884 error = 0;
885 s = nd_get_link(nd);
886 if (s) {
887 if (unlikely(IS_ERR(s))) {
888 path_put(&nd->path);
889 put_link(nd, link, *p);
890 return PTR_ERR(s);
891 } 996 }
892 if (*s == '/') { 997 res = inode->i_op->follow_link(dentry, &last->cookie);
998 if (IS_ERR_OR_NULL(res)) {
999 last->cookie = NULL;
1000 return res;
1001 }
1002 }
1003 if (*res == '/') {
1004 if (nd->flags & LOOKUP_RCU) {
1005 struct dentry *d;
1006 if (!nd->root.mnt)
1007 set_root_rcu(nd);
1008 nd->path = nd->root;
1009 d = nd->path.dentry;
1010 nd->inode = d->d_inode;
1011 nd->seq = nd->root_seq;
1012 if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq)))
1013 return ERR_PTR(-ECHILD);
1014 } else {
893 if (!nd->root.mnt) 1015 if (!nd->root.mnt)
894 set_root(nd); 1016 set_root(nd);
895 path_put(&nd->path); 1017 path_put(&nd->path);
896 nd->path = nd->root; 1018 nd->path = nd->root;
897 path_get(&nd->root); 1019 path_get(&nd->root);
898 nd->flags |= LOOKUP_JUMPED; 1020 nd->inode = nd->path.dentry->d_inode;
899 } 1021 }
900 nd->inode = nd->path.dentry->d_inode; 1022 nd->flags |= LOOKUP_JUMPED;
901 error = link_path_walk(s, nd); 1023 while (unlikely(*++res == '/'))
902 if (unlikely(error)) 1024 ;
903 put_link(nd, link, *p);
904 } 1025 }
905 1026 if (!*res)
906 return error; 1027 res = NULL;
907 1028 return res;
908out_put_nd_path:
909 *p = NULL;
910 path_put(&nd->path);
911 path_put(link);
912 return error;
913}
914
915static int follow_up_rcu(struct path *path)
916{
917 struct mount *mnt = real_mount(path->mnt);
918 struct mount *parent;
919 struct dentry *mountpoint;
920
921 parent = mnt->mnt_parent;
922 if (&parent->mnt == path->mnt)
923 return 0;
924 mountpoint = mnt->mnt_mountpoint;
925 path->dentry = mountpoint;
926 path->mnt = &parent->mnt;
927 return 1;
928} 1029}
929 1030
930/* 1031/*
@@ -965,7 +1066,7 @@ EXPORT_SYMBOL(follow_up);
965 * - return -EISDIR to tell follow_managed() to stop and return the path we 1066 * - return -EISDIR to tell follow_managed() to stop and return the path we
966 * were called with. 1067 * were called with.
967 */ 1068 */
968static int follow_automount(struct path *path, unsigned flags, 1069static int follow_automount(struct path *path, struct nameidata *nd,
969 bool *need_mntput) 1070 bool *need_mntput)
970{ 1071{
971 struct vfsmount *mnt; 1072 struct vfsmount *mnt;
@@ -985,13 +1086,13 @@ static int follow_automount(struct path *path, unsigned flags,
985 * as being automount points. These will need the attentions 1086 * as being automount points. These will need the attentions
986 * of the daemon to instantiate them before they can be used. 1087 * of the daemon to instantiate them before they can be used.
987 */ 1088 */
988 if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | 1089 if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
989 LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && 1090 LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
990 path->dentry->d_inode) 1091 path->dentry->d_inode)
991 return -EISDIR; 1092 return -EISDIR;
992 1093
993 current->total_link_count++; 1094 nd->total_link_count++;
994 if (current->total_link_count >= 40) 1095 if (nd->total_link_count >= 40)
995 return -ELOOP; 1096 return -ELOOP;
996 1097
997 mnt = path->dentry->d_op->d_automount(path); 1098 mnt = path->dentry->d_op->d_automount(path);
@@ -1005,7 +1106,7 @@ static int follow_automount(struct path *path, unsigned flags,
1005 * the path being looked up; if it wasn't then the remainder of 1106 * the path being looked up; if it wasn't then the remainder of
1006 * the path is inaccessible and we should say so. 1107 * the path is inaccessible and we should say so.
1007 */ 1108 */
1008 if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_PARENT)) 1109 if (PTR_ERR(mnt) == -EISDIR && (nd->flags & LOOKUP_PARENT))
1009 return -EREMOTE; 1110 return -EREMOTE;
1010 return PTR_ERR(mnt); 1111 return PTR_ERR(mnt);
1011 } 1112 }
@@ -1045,7 +1146,7 @@ static int follow_automount(struct path *path, unsigned flags,
1045 * 1146 *
1046 * Serialization is taken care of in namespace.c 1147 * Serialization is taken care of in namespace.c
1047 */ 1148 */
1048static int follow_managed(struct path *path, unsigned flags) 1149static int follow_managed(struct path *path, struct nameidata *nd)
1049{ 1150{
1050 struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */ 1151 struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
1051 unsigned managed; 1152 unsigned managed;
@@ -1089,7 +1190,7 @@ static int follow_managed(struct path *path, unsigned flags)
1089 1190
1090 /* Handle an automount point */ 1191 /* Handle an automount point */
1091 if (managed & DCACHE_NEED_AUTOMOUNT) { 1192 if (managed & DCACHE_NEED_AUTOMOUNT) {
1092 ret = follow_automount(path, flags, &need_mntput); 1193 ret = follow_automount(path, nd, &need_mntput);
1093 if (ret < 0) 1194 if (ret < 0)
1094 break; 1195 break;
1095 continue; 1196 continue;
@@ -1103,7 +1204,11 @@ static int follow_managed(struct path *path, unsigned flags)
1103 mntput(path->mnt); 1204 mntput(path->mnt);
1104 if (ret == -EISDIR) 1205 if (ret == -EISDIR)
1105 ret = 0; 1206 ret = 0;
1106 return ret < 0 ? ret : need_mntput; 1207 if (need_mntput)
1208 nd->flags |= LOOKUP_JUMPED;
1209 if (unlikely(ret < 0))
1210 path_put_conditional(path, nd);
1211 return ret;
1107} 1212}
1108 1213
1109int follow_down_one(struct path *path) 1214int follow_down_one(struct path *path)
@@ -1133,7 +1238,7 @@ static inline int managed_dentry_rcu(struct dentry *dentry)
1133 * we meet a managed dentry that would need blocking. 1238 * we meet a managed dentry that would need blocking.
1134 */ 1239 */
1135static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, 1240static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
1136 struct inode **inode) 1241 struct inode **inode, unsigned *seqp)
1137{ 1242{
1138 for (;;) { 1243 for (;;) {
1139 struct mount *mounted; 1244 struct mount *mounted;
@@ -1160,7 +1265,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
1160 path->mnt = &mounted->mnt; 1265 path->mnt = &mounted->mnt;
1161 path->dentry = mounted->mnt.mnt_root; 1266 path->dentry = mounted->mnt.mnt_root;
1162 nd->flags |= LOOKUP_JUMPED; 1267 nd->flags |= LOOKUP_JUMPED;
1163 nd->seq = read_seqcount_begin(&path->dentry->d_seq); 1268 *seqp = read_seqcount_begin(&path->dentry->d_seq);
1164 /* 1269 /*
1165 * Update the inode too. We don't need to re-check the 1270 * Update the inode too. We don't need to re-check the
1166 * dentry sequence number here after this d_inode read, 1271 * dentry sequence number here after this d_inode read,
@@ -1179,10 +1284,8 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1179 set_root_rcu(nd); 1284 set_root_rcu(nd);
1180 1285
1181 while (1) { 1286 while (1) {
1182 if (nd->path.dentry == nd->root.dentry && 1287 if (path_equal(&nd->path, &nd->root))
1183 nd->path.mnt == nd->root.mnt) {
1184 break; 1288 break;
1185 }
1186 if (nd->path.dentry != nd->path.mnt->mnt_root) { 1289 if (nd->path.dentry != nd->path.mnt->mnt_root) {
1187 struct dentry *old = nd->path.dentry; 1290 struct dentry *old = nd->path.dentry;
1188 struct dentry *parent = old->d_parent; 1291 struct dentry *parent = old->d_parent;
@@ -1190,38 +1293,42 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1190 1293
1191 inode = parent->d_inode; 1294 inode = parent->d_inode;
1192 seq = read_seqcount_begin(&parent->d_seq); 1295 seq = read_seqcount_begin(&parent->d_seq);
1193 if (read_seqcount_retry(&old->d_seq, nd->seq)) 1296 if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
1194 goto failed; 1297 return -ECHILD;
1195 nd->path.dentry = parent; 1298 nd->path.dentry = parent;
1196 nd->seq = seq; 1299 nd->seq = seq;
1197 break; 1300 break;
1301 } else {
1302 struct mount *mnt = real_mount(nd->path.mnt);
1303 struct mount *mparent = mnt->mnt_parent;
1304 struct dentry *mountpoint = mnt->mnt_mountpoint;
1305 struct inode *inode2 = mountpoint->d_inode;
1306 unsigned seq = read_seqcount_begin(&mountpoint->d_seq);
1307 if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
1308 return -ECHILD;
1309 if (&mparent->mnt == nd->path.mnt)
1310 break;
1311 /* we know that mountpoint was pinned */
1312 nd->path.dentry = mountpoint;
1313 nd->path.mnt = &mparent->mnt;
1314 inode = inode2;
1315 nd->seq = seq;
1198 } 1316 }
1199 if (!follow_up_rcu(&nd->path))
1200 break;
1201 inode = nd->path.dentry->d_inode;
1202 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
1203 } 1317 }
1204 while (d_mountpoint(nd->path.dentry)) { 1318 while (unlikely(d_mountpoint(nd->path.dentry))) {
1205 struct mount *mounted; 1319 struct mount *mounted;
1206 mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); 1320 mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
1321 if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
1322 return -ECHILD;
1207 if (!mounted) 1323 if (!mounted)
1208 break; 1324 break;
1209 nd->path.mnt = &mounted->mnt; 1325 nd->path.mnt = &mounted->mnt;
1210 nd->path.dentry = mounted->mnt.mnt_root; 1326 nd->path.dentry = mounted->mnt.mnt_root;
1211 inode = nd->path.dentry->d_inode; 1327 inode = nd->path.dentry->d_inode;
1212 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); 1328 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
1213 if (read_seqretry(&mount_lock, nd->m_seq))
1214 goto failed;
1215 } 1329 }
1216 nd->inode = inode; 1330 nd->inode = inode;
1217 return 0; 1331 return 0;
1218
1219failed:
1220 nd->flags &= ~LOOKUP_RCU;
1221 if (!(nd->flags & LOOKUP_ROOT))
1222 nd->root.mnt = NULL;
1223 rcu_read_unlock();
1224 return -ECHILD;
1225} 1332}
1226 1333
1227/* 1334/*
@@ -1400,7 +1507,8 @@ static struct dentry *__lookup_hash(struct qstr *name,
1400 * It _is_ time-critical. 1507 * It _is_ time-critical.
1401 */ 1508 */
1402static int lookup_fast(struct nameidata *nd, 1509static int lookup_fast(struct nameidata *nd,
1403 struct path *path, struct inode **inode) 1510 struct path *path, struct inode **inode,
1511 unsigned *seqp)
1404{ 1512{
1405 struct vfsmount *mnt = nd->path.mnt; 1513 struct vfsmount *mnt = nd->path.mnt;
1406 struct dentry *dentry, *parent = nd->path.dentry; 1514 struct dentry *dentry, *parent = nd->path.dentry;
@@ -1424,7 +1532,7 @@ static int lookup_fast(struct nameidata *nd,
1424 * This sequence count validates that the inode matches 1532 * This sequence count validates that the inode matches
1425 * the dentry name information from lookup. 1533 * the dentry name information from lookup.
1426 */ 1534 */
1427 *inode = dentry->d_inode; 1535 *inode = d_backing_inode(dentry);
1428 negative = d_is_negative(dentry); 1536 negative = d_is_negative(dentry);
1429 if (read_seqcount_retry(&dentry->d_seq, seq)) 1537 if (read_seqcount_retry(&dentry->d_seq, seq))
1430 return -ECHILD; 1538 return -ECHILD;
@@ -1440,8 +1548,8 @@ static int lookup_fast(struct nameidata *nd,
1440 */ 1548 */
1441 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1549 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1442 return -ECHILD; 1550 return -ECHILD;
1443 nd->seq = seq;
1444 1551
1552 *seqp = seq;
1445 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1553 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1446 status = d_revalidate(dentry, nd->flags); 1554 status = d_revalidate(dentry, nd->flags);
1447 if (unlikely(status <= 0)) { 1555 if (unlikely(status <= 0)) {
@@ -1452,10 +1560,10 @@ static int lookup_fast(struct nameidata *nd,
1452 } 1560 }
1453 path->mnt = mnt; 1561 path->mnt = mnt;
1454 path->dentry = dentry; 1562 path->dentry = dentry;
1455 if (likely(__follow_mount_rcu(nd, path, inode))) 1563 if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
1456 return 0; 1564 return 0;
1457unlazy: 1565unlazy:
1458 if (unlazy_walk(nd, dentry)) 1566 if (unlazy_walk(nd, dentry, seq))
1459 return -ECHILD; 1567 return -ECHILD;
1460 } else { 1568 } else {
1461 dentry = __d_lookup(parent, &nd->last); 1569 dentry = __d_lookup(parent, &nd->last);
@@ -1482,15 +1590,10 @@ unlazy:
1482 } 1590 }
1483 path->mnt = mnt; 1591 path->mnt = mnt;
1484 path->dentry = dentry; 1592 path->dentry = dentry;
1485 err = follow_managed(path, nd->flags); 1593 err = follow_managed(path, nd);
1486 if (unlikely(err < 0)) { 1594 if (likely(!err))
1487 path_put_conditional(path, nd); 1595 *inode = d_backing_inode(path->dentry);
1488 return err; 1596 return err;
1489 }
1490 if (err)
1491 nd->flags |= LOOKUP_JUMPED;
1492 *inode = path->dentry->d_inode;
1493 return 0;
1494 1597
1495need_lookup: 1598need_lookup:
1496 return 1; 1599 return 1;
@@ -1500,7 +1603,6 @@ need_lookup:
1500static int lookup_slow(struct nameidata *nd, struct path *path) 1603static int lookup_slow(struct nameidata *nd, struct path *path)
1501{ 1604{
1502 struct dentry *dentry, *parent; 1605 struct dentry *dentry, *parent;
1503 int err;
1504 1606
1505 parent = nd->path.dentry; 1607 parent = nd->path.dentry;
1506 BUG_ON(nd->inode != parent->d_inode); 1608 BUG_ON(nd->inode != parent->d_inode);
@@ -1512,14 +1614,7 @@ static int lookup_slow(struct nameidata *nd, struct path *path)
1512 return PTR_ERR(dentry); 1614 return PTR_ERR(dentry);
1513 path->mnt = nd->path.mnt; 1615 path->mnt = nd->path.mnt;
1514 path->dentry = dentry; 1616 path->dentry = dentry;
1515 err = follow_managed(path, nd->flags); 1617 return follow_managed(path, nd);
1516 if (unlikely(err < 0)) {
1517 path_put_conditional(path, nd);
1518 return err;
1519 }
1520 if (err)
1521 nd->flags |= LOOKUP_JUMPED;
1522 return 0;
1523} 1618}
1524 1619
1525static inline int may_lookup(struct nameidata *nd) 1620static inline int may_lookup(struct nameidata *nd)
@@ -1528,7 +1623,7 @@ static inline int may_lookup(struct nameidata *nd)
1528 int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK); 1623 int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
1529 if (err != -ECHILD) 1624 if (err != -ECHILD)
1530 return err; 1625 return err;
1531 if (unlazy_walk(nd, NULL)) 1626 if (unlazy_walk(nd, NULL, 0))
1532 return -ECHILD; 1627 return -ECHILD;
1533 } 1628 }
1534 return inode_permission(nd->inode, MAY_EXEC); 1629 return inode_permission(nd->inode, MAY_EXEC);
@@ -1538,24 +1633,45 @@ static inline int handle_dots(struct nameidata *nd, int type)
1538{ 1633{
1539 if (type == LAST_DOTDOT) { 1634 if (type == LAST_DOTDOT) {
1540 if (nd->flags & LOOKUP_RCU) { 1635 if (nd->flags & LOOKUP_RCU) {
1541 if (follow_dotdot_rcu(nd)) 1636 return follow_dotdot_rcu(nd);
1542 return -ECHILD;
1543 } else 1637 } else
1544 follow_dotdot(nd); 1638 follow_dotdot(nd);
1545 } 1639 }
1546 return 0; 1640 return 0;
1547} 1641}
1548 1642
1549static void terminate_walk(struct nameidata *nd) 1643static int pick_link(struct nameidata *nd, struct path *link,
1644 struct inode *inode, unsigned seq)
1550{ 1645{
1646 int error;
1647 struct saved *last;
1648 if (unlikely(nd->total_link_count++ >= MAXSYMLINKS)) {
1649 path_to_nameidata(link, nd);
1650 return -ELOOP;
1651 }
1551 if (!(nd->flags & LOOKUP_RCU)) { 1652 if (!(nd->flags & LOOKUP_RCU)) {
1552 path_put(&nd->path); 1653 if (link->mnt == nd->path.mnt)
1553 } else { 1654 mntget(link->mnt);
1554 nd->flags &= ~LOOKUP_RCU; 1655 }
1555 if (!(nd->flags & LOOKUP_ROOT)) 1656 error = nd_alloc_stack(nd);
1556 nd->root.mnt = NULL; 1657 if (unlikely(error)) {
1557 rcu_read_unlock(); 1658 if (error == -ECHILD) {
1659 if (unlikely(unlazy_link(nd, link, seq)))
1660 return -ECHILD;
1661 error = nd_alloc_stack(nd);
1662 }
1663 if (error) {
1664 path_put(link);
1665 return error;
1666 }
1558 } 1667 }
1668
1669 last = nd->stack + nd->depth++;
1670 last->link = *link;
1671 last->cookie = NULL;
1672 last->inode = inode;
1673 last->seq = seq;
1674 return 1;
1559} 1675}
1560 1676
1561/* 1677/*
@@ -1564,98 +1680,68 @@ static void terminate_walk(struct nameidata *nd)
1564 * so we keep a cache of "no, this doesn't need follow_link" 1680 * so we keep a cache of "no, this doesn't need follow_link"
1565 * for the common case. 1681 * for the common case.
1566 */ 1682 */
1567static inline int should_follow_link(struct dentry *dentry, int follow) 1683static inline int should_follow_link(struct nameidata *nd, struct path *link,
1684 int follow,
1685 struct inode *inode, unsigned seq)
1568{ 1686{
1569 return unlikely(d_is_symlink(dentry)) ? follow : 0; 1687 if (likely(!d_is_symlink(link->dentry)))
1688 return 0;
1689 if (!follow)
1690 return 0;
1691 return pick_link(nd, link, inode, seq);
1570} 1692}
1571 1693
1572static inline int walk_component(struct nameidata *nd, struct path *path, 1694enum {WALK_GET = 1, WALK_PUT = 2};
1573 int follow) 1695
1696static int walk_component(struct nameidata *nd, int flags)
1574{ 1697{
1698 struct path path;
1575 struct inode *inode; 1699 struct inode *inode;
1700 unsigned seq;
1576 int err; 1701 int err;
1577 /* 1702 /*
1578 * "." and ".." are special - ".." especially so because it has 1703 * "." and ".." are special - ".." especially so because it has
1579 * to be able to know about the current root directory and 1704 * to be able to know about the current root directory and
1580 * parent relationships. 1705 * parent relationships.
1581 */ 1706 */
1582 if (unlikely(nd->last_type != LAST_NORM)) 1707 if (unlikely(nd->last_type != LAST_NORM)) {
1583 return handle_dots(nd, nd->last_type); 1708 err = handle_dots(nd, nd->last_type);
1584 err = lookup_fast(nd, path, &inode); 1709 if (flags & WALK_PUT)
1710 put_link(nd);
1711 return err;
1712 }
1713 err = lookup_fast(nd, &path, &inode, &seq);
1585 if (unlikely(err)) { 1714 if (unlikely(err)) {
1586 if (err < 0) 1715 if (err < 0)
1587 goto out_err; 1716 return err;
1588 1717
1589 err = lookup_slow(nd, path); 1718 err = lookup_slow(nd, &path);
1590 if (err < 0) 1719 if (err < 0)
1591 goto out_err; 1720 return err;
1592 1721
1593 inode = path->dentry->d_inode; 1722 inode = d_backing_inode(path.dentry);
1723 seq = 0; /* we are already out of RCU mode */
1594 err = -ENOENT; 1724 err = -ENOENT;
1595 if (d_is_negative(path->dentry)) 1725 if (d_is_negative(path.dentry))
1596 goto out_path_put; 1726 goto out_path_put;
1597 } 1727 }
1598 1728
1599 if (should_follow_link(path->dentry, follow)) { 1729 if (flags & WALK_PUT)
1600 if (nd->flags & LOOKUP_RCU) { 1730 put_link(nd);
1601 if (unlikely(nd->path.mnt != path->mnt || 1731 err = should_follow_link(nd, &path, flags & WALK_GET, inode, seq);
1602 unlazy_walk(nd, path->dentry))) { 1732 if (unlikely(err))
1603 err = -ECHILD; 1733 return err;
1604 goto out_err; 1734 path_to_nameidata(&path, nd);
1605 }
1606 }
1607 BUG_ON(inode != path->dentry->d_inode);
1608 return 1;
1609 }
1610 path_to_nameidata(path, nd);
1611 nd->inode = inode; 1735 nd->inode = inode;
1736 nd->seq = seq;
1612 return 0; 1737 return 0;
1613 1738
1614out_path_put: 1739out_path_put:
1615 path_to_nameidata(path, nd); 1740 path_to_nameidata(&path, nd);
1616out_err:
1617 terminate_walk(nd);
1618 return err; 1741 return err;
1619} 1742}
1620 1743
1621/* 1744/*
1622 * This limits recursive symlink follows to 8, while
1623 * limiting consecutive symlinks to 40.
1624 *
1625 * Without that kind of total limit, nasty chains of consecutive
1626 * symlinks can cause almost arbitrarily long lookups.
1627 */
1628static inline int nested_symlink(struct path *path, struct nameidata *nd)
1629{
1630 int res;
1631
1632 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1633 path_put_conditional(path, nd);
1634 path_put(&nd->path);
1635 return -ELOOP;
1636 }
1637 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1638
1639 nd->depth++;
1640 current->link_count++;
1641
1642 do {
1643 struct path link = *path;
1644 void *cookie;
1645
1646 res = follow_link(&link, nd, &cookie);
1647 if (res)
1648 break;
1649 res = walk_component(nd, path, LOOKUP_FOLLOW);
1650 put_link(nd, &link, cookie);
1651 } while (res > 0);
1652
1653 current->link_count--;
1654 nd->depth--;
1655 return res;
1656}
1657
1658/*
1659 * We can do the critical dentry name comparison and hashing 1745 * We can do the critical dentry name comparison and hashing
1660 * operations one word at a time, but we are limited to: 1746 * operations one word at a time, but we are limited to:
1661 * 1747 *
@@ -1781,9 +1867,8 @@ static inline u64 hash_name(const char *name)
1781 */ 1867 */
1782static int link_path_walk(const char *name, struct nameidata *nd) 1868static int link_path_walk(const char *name, struct nameidata *nd)
1783{ 1869{
1784 struct path next;
1785 int err; 1870 int err;
1786 1871
1787 while (*name=='/') 1872 while (*name=='/')
1788 name++; 1873 name++;
1789 if (!*name) 1874 if (!*name)
@@ -1796,7 +1881,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1796 1881
1797 err = may_lookup(nd); 1882 err = may_lookup(nd);
1798 if (err) 1883 if (err)
1799 break; 1884 return err;
1800 1885
1801 hash_len = hash_name(name); 1886 hash_len = hash_name(name);
1802 1887
@@ -1818,7 +1903,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1818 struct qstr this = { { .hash_len = hash_len }, .name = name }; 1903 struct qstr this = { { .hash_len = hash_len }, .name = name };
1819 err = parent->d_op->d_hash(parent, &this); 1904 err = parent->d_op->d_hash(parent, &this);
1820 if (err < 0) 1905 if (err < 0)
1821 break; 1906 return err;
1822 hash_len = this.hash_len; 1907 hash_len = this.hash_len;
1823 name = this.name; 1908 name = this.name;
1824 } 1909 }
@@ -1830,7 +1915,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1830 1915
1831 name += hashlen_len(hash_len); 1916 name += hashlen_len(hash_len);
1832 if (!*name) 1917 if (!*name)
1833 return 0; 1918 goto OK;
1834 /* 1919 /*
1835 * If it wasn't NUL, we know it was '/'. Skip that 1920 * If it wasn't NUL, we know it was '/'. Skip that
1836 * slash, and continue until no more slashes. 1921 * slash, and continue until no more slashes.
@@ -1838,57 +1923,73 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1838 do { 1923 do {
1839 name++; 1924 name++;
1840 } while (unlikely(*name == '/')); 1925 } while (unlikely(*name == '/'));
1841 if (!*name) 1926 if (unlikely(!*name)) {
1842 return 0; 1927OK:
1843 1928 /* pathname body, done */
1844 err = walk_component(nd, &next, LOOKUP_FOLLOW); 1929 if (!nd->depth)
1930 return 0;
1931 name = nd->stack[nd->depth - 1].name;
1932 /* trailing symlink, done */
1933 if (!name)
1934 return 0;
1935 /* last component of nested symlink */
1936 err = walk_component(nd, WALK_GET | WALK_PUT);
1937 } else {
1938 err = walk_component(nd, WALK_GET);
1939 }
1845 if (err < 0) 1940 if (err < 0)
1846 return err; 1941 return err;
1847 1942
1848 if (err) { 1943 if (err) {
1849 err = nested_symlink(&next, nd); 1944 const char *s = get_link(nd);
1850 if (err) 1945
1851 return err; 1946 if (unlikely(IS_ERR(s)))
1852 } 1947 return PTR_ERR(s);
1853 if (!d_can_lookup(nd->path.dentry)) { 1948 err = 0;
1854 err = -ENOTDIR; 1949 if (unlikely(!s)) {
1855 break; 1950 /* jumped */
1951 put_link(nd);
1952 } else {
1953 nd->stack[nd->depth - 1].name = name;
1954 name = s;
1955 continue;
1956 }
1856 } 1957 }
1958 if (unlikely(!d_can_lookup(nd->path.dentry)))
1959 return -ENOTDIR;
1857 } 1960 }
1858 terminate_walk(nd);
1859 return err;
1860} 1961}
1861 1962
1862static int path_init(int dfd, const struct filename *name, unsigned int flags, 1963static const char *path_init(struct nameidata *nd, unsigned flags)
1863 struct nameidata *nd)
1864{ 1964{
1865 int retval = 0; 1965 int retval = 0;
1866 const char *s = name->name; 1966 const char *s = nd->name->name;
1867 1967
1868 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1968 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1869 nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; 1969 nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
1870 nd->depth = 0; 1970 nd->depth = 0;
1871 nd->base = NULL; 1971 nd->total_link_count = 0;
1872 if (flags & LOOKUP_ROOT) { 1972 if (flags & LOOKUP_ROOT) {
1873 struct dentry *root = nd->root.dentry; 1973 struct dentry *root = nd->root.dentry;
1874 struct inode *inode = root->d_inode; 1974 struct inode *inode = root->d_inode;
1875 if (*s) { 1975 if (*s) {
1876 if (!d_can_lookup(root)) 1976 if (!d_can_lookup(root))
1877 return -ENOTDIR; 1977 return ERR_PTR(-ENOTDIR);
1878 retval = inode_permission(inode, MAY_EXEC); 1978 retval = inode_permission(inode, MAY_EXEC);
1879 if (retval) 1979 if (retval)
1880 return retval; 1980 return ERR_PTR(retval);
1881 } 1981 }
1882 nd->path = nd->root; 1982 nd->path = nd->root;
1883 nd->inode = inode; 1983 nd->inode = inode;
1884 if (flags & LOOKUP_RCU) { 1984 if (flags & LOOKUP_RCU) {
1885 rcu_read_lock(); 1985 rcu_read_lock();
1886 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1986 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1987 nd->root_seq = nd->seq;
1887 nd->m_seq = read_seqbegin(&mount_lock); 1988 nd->m_seq = read_seqbegin(&mount_lock);
1888 } else { 1989 } else {
1889 path_get(&nd->path); 1990 path_get(&nd->path);
1890 } 1991 }
1891 goto done; 1992 return s;
1892 } 1993 }
1893 1994
1894 nd->root.mnt = NULL; 1995 nd->root.mnt = NULL;
@@ -1903,7 +2004,7 @@ static int path_init(int dfd, const struct filename *name, unsigned int flags,
1903 path_get(&nd->root); 2004 path_get(&nd->root);
1904 } 2005 }
1905 nd->path = nd->root; 2006 nd->path = nd->root;
1906 } else if (dfd == AT_FDCWD) { 2007 } else if (nd->dfd == AT_FDCWD) {
1907 if (flags & LOOKUP_RCU) { 2008 if (flags & LOOKUP_RCU) {
1908 struct fs_struct *fs = current->fs; 2009 struct fs_struct *fs = current->fs;
1909 unsigned seq; 2010 unsigned seq;
@@ -1920,180 +2021,205 @@ static int path_init(int dfd, const struct filename *name, unsigned int flags,
1920 } 2021 }
1921 } else { 2022 } else {
1922 /* Caller must check execute permissions on the starting path component */ 2023 /* Caller must check execute permissions on the starting path component */
1923 struct fd f = fdget_raw(dfd); 2024 struct fd f = fdget_raw(nd->dfd);
1924 struct dentry *dentry; 2025 struct dentry *dentry;
1925 2026
1926 if (!f.file) 2027 if (!f.file)
1927 return -EBADF; 2028 return ERR_PTR(-EBADF);
1928 2029
1929 dentry = f.file->f_path.dentry; 2030 dentry = f.file->f_path.dentry;
1930 2031
1931 if (*s) { 2032 if (*s) {
1932 if (!d_can_lookup(dentry)) { 2033 if (!d_can_lookup(dentry)) {
1933 fdput(f); 2034 fdput(f);
1934 return -ENOTDIR; 2035 return ERR_PTR(-ENOTDIR);
1935 } 2036 }
1936 } 2037 }
1937 2038
1938 nd->path = f.file->f_path; 2039 nd->path = f.file->f_path;
1939 if (flags & LOOKUP_RCU) { 2040 if (flags & LOOKUP_RCU) {
1940 if (f.flags & FDPUT_FPUT)
1941 nd->base = f.file;
1942 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1943 rcu_read_lock(); 2041 rcu_read_lock();
2042 nd->inode = nd->path.dentry->d_inode;
2043 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
1944 } else { 2044 } else {
1945 path_get(&nd->path); 2045 path_get(&nd->path);
1946 fdput(f); 2046 nd->inode = nd->path.dentry->d_inode;
1947 } 2047 }
2048 fdput(f);
2049 return s;
1948 } 2050 }
1949 2051
1950 nd->inode = nd->path.dentry->d_inode; 2052 nd->inode = nd->path.dentry->d_inode;
1951 if (!(flags & LOOKUP_RCU)) 2053 if (!(flags & LOOKUP_RCU))
1952 goto done; 2054 return s;
1953 if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq))) 2055 if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq)))
1954 goto done; 2056 return s;
1955 if (!(nd->flags & LOOKUP_ROOT)) 2057 if (!(nd->flags & LOOKUP_ROOT))
1956 nd->root.mnt = NULL; 2058 nd->root.mnt = NULL;
1957 rcu_read_unlock(); 2059 rcu_read_unlock();
1958 return -ECHILD; 2060 return ERR_PTR(-ECHILD);
1959done:
1960 current->total_link_count = 0;
1961 return link_path_walk(s, nd);
1962} 2061}
1963 2062
1964static void path_cleanup(struct nameidata *nd) 2063static const char *trailing_symlink(struct nameidata *nd)
1965{ 2064{
1966 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { 2065 const char *s;
1967 path_put(&nd->root); 2066 int error = may_follow_link(nd);
1968 nd->root.mnt = NULL; 2067 if (unlikely(error))
1969 } 2068 return ERR_PTR(error);
1970 if (unlikely(nd->base)) 2069 nd->flags |= LOOKUP_PARENT;
1971 fput(nd->base); 2070 nd->stack[0].name = NULL;
2071 s = get_link(nd);
2072 return s ? s : "";
1972} 2073}
1973 2074
1974static inline int lookup_last(struct nameidata *nd, struct path *path) 2075static inline int lookup_last(struct nameidata *nd)
1975{ 2076{
1976 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len]) 2077 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
1977 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 2078 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1978 2079
1979 nd->flags &= ~LOOKUP_PARENT; 2080 nd->flags &= ~LOOKUP_PARENT;
1980 return walk_component(nd, path, nd->flags & LOOKUP_FOLLOW); 2081 return walk_component(nd,
2082 nd->flags & LOOKUP_FOLLOW
2083 ? nd->depth
2084 ? WALK_PUT | WALK_GET
2085 : WALK_GET
2086 : 0);
1981} 2087}
1982 2088
1983/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 2089/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1984static int path_lookupat(int dfd, const struct filename *name, 2090static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
1985 unsigned int flags, struct nameidata *nd)
1986{ 2091{
1987 struct path path; 2092 const char *s = path_init(nd, flags);
1988 int err; 2093 int err;
1989 2094
1990 /* 2095 if (IS_ERR(s))
1991 * Path walking is largely split up into 2 different synchronisation 2096 return PTR_ERR(s);
1992 * schemes, rcu-walk and ref-walk (explained in 2097 while (!(err = link_path_walk(s, nd))
1993 * Documentation/filesystems/path-lookup.txt). These share much of the 2098 && ((err = lookup_last(nd)) > 0)) {
1994 * path walk code, but some things particularly setup, cleanup, and 2099 s = trailing_symlink(nd);
1995 * following mounts are sufficiently divergent that functions are 2100 if (IS_ERR(s)) {
1996 * duplicated. Typically there is a function foo(), and its RCU 2101 err = PTR_ERR(s);
1997 * analogue, foo_rcu(). 2102 break;
1998 *
1999 * -ECHILD is the error number of choice (just to avoid clashes) that
2000 * is returned if some aspect of an rcu-walk fails. Such an error must
2001 * be handled by restarting a traditional ref-walk (which will always
2002 * be able to complete).
2003 */
2004 err = path_init(dfd, name, flags, nd);
2005 if (!err && !(flags & LOOKUP_PARENT)) {
2006 err = lookup_last(nd, &path);
2007 while (err > 0) {
2008 void *cookie;
2009 struct path link = path;
2010 err = may_follow_link(&link, nd);
2011 if (unlikely(err))
2012 break;
2013 nd->flags |= LOOKUP_PARENT;
2014 err = follow_link(&link, nd, &cookie);
2015 if (err)
2016 break;
2017 err = lookup_last(nd, &path);
2018 put_link(nd, &link, cookie);
2019 } 2103 }
2020 } 2104 }
2021
2022 if (!err) 2105 if (!err)
2023 err = complete_walk(nd); 2106 err = complete_walk(nd);
2024 2107
2025 if (!err && nd->flags & LOOKUP_DIRECTORY) { 2108 if (!err && nd->flags & LOOKUP_DIRECTORY)
2026 if (!d_can_lookup(nd->path.dentry)) { 2109 if (!d_can_lookup(nd->path.dentry))
2027 path_put(&nd->path);
2028 err = -ENOTDIR; 2110 err = -ENOTDIR;
2029 } 2111 if (!err) {
2112 *path = nd->path;
2113 nd->path.mnt = NULL;
2114 nd->path.dentry = NULL;
2030 } 2115 }
2031 2116 terminate_walk(nd);
2032 path_cleanup(nd);
2033 return err; 2117 return err;
2034} 2118}
2035 2119
2036static int filename_lookup(int dfd, struct filename *name, 2120static int filename_lookup(int dfd, struct filename *name, unsigned flags,
2037 unsigned int flags, struct nameidata *nd) 2121 struct path *path, struct path *root)
2038{ 2122{
2039 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd); 2123 int retval;
2124 struct nameidata nd;
2125 if (IS_ERR(name))
2126 return PTR_ERR(name);
2127 if (unlikely(root)) {
2128 nd.root = *root;
2129 flags |= LOOKUP_ROOT;
2130 }
2131 set_nameidata(&nd, dfd, name);
2132 retval = path_lookupat(&nd, flags | LOOKUP_RCU, path);
2040 if (unlikely(retval == -ECHILD)) 2133 if (unlikely(retval == -ECHILD))
2041 retval = path_lookupat(dfd, name, flags, nd); 2134 retval = path_lookupat(&nd, flags, path);
2042 if (unlikely(retval == -ESTALE)) 2135 if (unlikely(retval == -ESTALE))
2043 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); 2136 retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path);
2044 2137
2045 if (likely(!retval)) 2138 if (likely(!retval))
2046 audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); 2139 audit_inode(name, path->dentry, flags & LOOKUP_PARENT);
2140 restore_nameidata();
2141 putname(name);
2047 return retval; 2142 return retval;
2048} 2143}
2049 2144
2145/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
2146static int path_parentat(struct nameidata *nd, unsigned flags,
2147 struct path *parent)
2148{
2149 const char *s = path_init(nd, flags);
2150 int err;
2151 if (IS_ERR(s))
2152 return PTR_ERR(s);
2153 err = link_path_walk(s, nd);
2154 if (!err)
2155 err = complete_walk(nd);
2156 if (!err) {
2157 *parent = nd->path;
2158 nd->path.mnt = NULL;
2159 nd->path.dentry = NULL;
2160 }
2161 terminate_walk(nd);
2162 return err;
2163}
2164
2165static struct filename *filename_parentat(int dfd, struct filename *name,
2166 unsigned int flags, struct path *parent,
2167 struct qstr *last, int *type)
2168{
2169 int retval;
2170 struct nameidata nd;
2171
2172 if (IS_ERR(name))
2173 return name;
2174 set_nameidata(&nd, dfd, name);
2175 retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
2176 if (unlikely(retval == -ECHILD))
2177 retval = path_parentat(&nd, flags, parent);
2178 if (unlikely(retval == -ESTALE))
2179 retval = path_parentat(&nd, flags | LOOKUP_REVAL, parent);
2180 if (likely(!retval)) {
2181 *last = nd.last;
2182 *type = nd.last_type;
2183 audit_inode(name, parent->dentry, LOOKUP_PARENT);
2184 } else {
2185 putname(name);
2186 name = ERR_PTR(retval);
2187 }
2188 restore_nameidata();
2189 return name;
2190}
2191
2050/* does lookup, returns the object with parent locked */ 2192/* does lookup, returns the object with parent locked */
2051struct dentry *kern_path_locked(const char *name, struct path *path) 2193struct dentry *kern_path_locked(const char *name, struct path *path)
2052{ 2194{
2053 struct filename *filename = getname_kernel(name); 2195 struct filename *filename;
2054 struct nameidata nd;
2055 struct dentry *d; 2196 struct dentry *d;
2056 int err; 2197 struct qstr last;
2198 int type;
2057 2199
2200 filename = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
2201 &last, &type);
2058 if (IS_ERR(filename)) 2202 if (IS_ERR(filename))
2059 return ERR_CAST(filename); 2203 return ERR_CAST(filename);
2060 2204 if (unlikely(type != LAST_NORM)) {
2061 err = filename_lookup(AT_FDCWD, filename, LOOKUP_PARENT, &nd); 2205 path_put(path);
2062 if (err) { 2206 putname(filename);
2063 d = ERR_PTR(err); 2207 return ERR_PTR(-EINVAL);
2064 goto out;
2065 }
2066 if (nd.last_type != LAST_NORM) {
2067 path_put(&nd.path);
2068 d = ERR_PTR(-EINVAL);
2069 goto out;
2070 } 2208 }
2071 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2209 mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2072 d = __lookup_hash(&nd.last, nd.path.dentry, 0); 2210 d = __lookup_hash(&last, path->dentry, 0);
2073 if (IS_ERR(d)) { 2211 if (IS_ERR(d)) {
2074 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2212 mutex_unlock(&path->dentry->d_inode->i_mutex);
2075 path_put(&nd.path); 2213 path_put(path);
2076 goto out;
2077 } 2214 }
2078 *path = nd.path;
2079out:
2080 putname(filename); 2215 putname(filename);
2081 return d; 2216 return d;
2082} 2217}
2083 2218
2084int kern_path(const char *name, unsigned int flags, struct path *path) 2219int kern_path(const char *name, unsigned int flags, struct path *path)
2085{ 2220{
2086 struct nameidata nd; 2221 return filename_lookup(AT_FDCWD, getname_kernel(name),
2087 struct filename *filename = getname_kernel(name); 2222 flags, path, NULL);
2088 int res = PTR_ERR(filename);
2089
2090 if (!IS_ERR(filename)) {
2091 res = filename_lookup(AT_FDCWD, filename, flags, &nd);
2092 putname(filename);
2093 if (!res)
2094 *path = nd.path;
2095 }
2096 return res;
2097} 2223}
2098EXPORT_SYMBOL(kern_path); 2224EXPORT_SYMBOL(kern_path);
2099 2225
@@ -2109,36 +2235,13 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
2109 const char *name, unsigned int flags, 2235 const char *name, unsigned int flags,
2110 struct path *path) 2236 struct path *path)
2111{ 2237{
2112 struct filename *filename = getname_kernel(name); 2238 struct path root = {.mnt = mnt, .dentry = dentry};
2113 int err = PTR_ERR(filename); 2239 /* the first argument of filename_lookup() is ignored with root */
2114 2240 return filename_lookup(AT_FDCWD, getname_kernel(name),
2115 BUG_ON(flags & LOOKUP_PARENT); 2241 flags , path, &root);
2116
2117 /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */
2118 if (!IS_ERR(filename)) {
2119 struct nameidata nd;
2120 nd.root.dentry = dentry;
2121 nd.root.mnt = mnt;
2122 err = filename_lookup(AT_FDCWD, filename,
2123 flags | LOOKUP_ROOT, &nd);
2124 if (!err)
2125 *path = nd.path;
2126 putname(filename);
2127 }
2128 return err;
2129} 2242}
2130EXPORT_SYMBOL(vfs_path_lookup); 2243EXPORT_SYMBOL(vfs_path_lookup);
2131 2244
2132/*
2133 * Restricted form of lookup. Doesn't follow links, single-component only,
2134 * needs parent already locked. Doesn't follow mounts.
2135 * SMP-safe.
2136 */
2137static struct dentry *lookup_hash(struct nameidata *nd)
2138{
2139 return __lookup_hash(&nd->last, nd->path.dentry, nd->flags);
2140}
2141
2142/** 2245/**
2143 * lookup_one_len - filesystem helper to lookup single pathname component 2246 * lookup_one_len - filesystem helper to lookup single pathname component
2144 * @name: pathname component to lookup 2247 * @name: pathname component to lookup
@@ -2193,27 +2296,10 @@ EXPORT_SYMBOL(lookup_one_len);
2193int user_path_at_empty(int dfd, const char __user *name, unsigned flags, 2296int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
2194 struct path *path, int *empty) 2297 struct path *path, int *empty)
2195{ 2298{
2196 struct nameidata nd; 2299 return filename_lookup(dfd, getname_flags(name, flags, empty),
2197 struct filename *tmp = getname_flags(name, flags, empty); 2300 flags, path, NULL);
2198 int err = PTR_ERR(tmp);
2199 if (!IS_ERR(tmp)) {
2200
2201 BUG_ON(flags & LOOKUP_PARENT);
2202
2203 err = filename_lookup(dfd, tmp, flags, &nd);
2204 putname(tmp);
2205 if (!err)
2206 *path = nd.path;
2207 }
2208 return err;
2209}
2210
2211int user_path_at(int dfd, const char __user *name, unsigned flags,
2212 struct path *path)
2213{
2214 return user_path_at_empty(dfd, name, flags, path, NULL);
2215} 2301}
2216EXPORT_SYMBOL(user_path_at); 2302EXPORT_SYMBOL(user_path_at_empty);
2217 2303
2218/* 2304/*
2219 * NB: most callers don't do anything directly with the reference to the 2305 * NB: most callers don't do anything directly with the reference to the
@@ -2221,26 +2307,16 @@ EXPORT_SYMBOL(user_path_at);
2221 * allocated by getname. So we must hold the reference to it until all 2307 * allocated by getname. So we must hold the reference to it until all
2222 * path-walking is complete. 2308 * path-walking is complete.
2223 */ 2309 */
2224static struct filename * 2310static inline struct filename *
2225user_path_parent(int dfd, const char __user *path, struct nameidata *nd, 2311user_path_parent(int dfd, const char __user *path,
2312 struct path *parent,
2313 struct qstr *last,
2314 int *type,
2226 unsigned int flags) 2315 unsigned int flags)
2227{ 2316{
2228 struct filename *s = getname(path);
2229 int error;
2230
2231 /* only LOOKUP_REVAL is allowed in extra flags */ 2317 /* only LOOKUP_REVAL is allowed in extra flags */
2232 flags &= LOOKUP_REVAL; 2318 return filename_parentat(dfd, getname(path), flags & LOOKUP_REVAL,
2233 2319 parent, last, type);
2234 if (IS_ERR(s))
2235 return s;
2236
2237 error = filename_lookup(dfd, s, flags | LOOKUP_PARENT, nd);
2238 if (error) {
2239 putname(s);
2240 return ERR_PTR(error);
2241 }
2242
2243 return s;
2244} 2320}
2245 2321
2246/** 2322/**
@@ -2279,10 +2355,8 @@ mountpoint_last(struct nameidata *nd, struct path *path)
2279 2355
2280 /* If we're in rcuwalk, drop out of it to handle last component */ 2356 /* If we're in rcuwalk, drop out of it to handle last component */
2281 if (nd->flags & LOOKUP_RCU) { 2357 if (nd->flags & LOOKUP_RCU) {
2282 if (unlazy_walk(nd, NULL)) { 2358 if (unlazy_walk(nd, NULL, 0))
2283 error = -ECHILD; 2359 return -ECHILD;
2284 goto out;
2285 }
2286 } 2360 }
2287 2361
2288 nd->flags &= ~LOOKUP_PARENT; 2362 nd->flags &= ~LOOKUP_PARENT;
@@ -2290,7 +2364,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
2290 if (unlikely(nd->last_type != LAST_NORM)) { 2364 if (unlikely(nd->last_type != LAST_NORM)) {
2291 error = handle_dots(nd, nd->last_type); 2365 error = handle_dots(nd, nd->last_type);
2292 if (error) 2366 if (error)
2293 goto out; 2367 return error;
2294 dentry = dget(nd->path.dentry); 2368 dentry = dget(nd->path.dentry);
2295 goto done; 2369 goto done;
2296 } 2370 }
@@ -2305,74 +2379,60 @@ mountpoint_last(struct nameidata *nd, struct path *path)
2305 */ 2379 */
2306 dentry = d_alloc(dir, &nd->last); 2380 dentry = d_alloc(dir, &nd->last);
2307 if (!dentry) { 2381 if (!dentry) {
2308 error = -ENOMEM;
2309 mutex_unlock(&dir->d_inode->i_mutex); 2382 mutex_unlock(&dir->d_inode->i_mutex);
2310 goto out; 2383 return -ENOMEM;
2311 } 2384 }
2312 dentry = lookup_real(dir->d_inode, dentry, nd->flags); 2385 dentry = lookup_real(dir->d_inode, dentry, nd->flags);
2313 error = PTR_ERR(dentry);
2314 if (IS_ERR(dentry)) { 2386 if (IS_ERR(dentry)) {
2315 mutex_unlock(&dir->d_inode->i_mutex); 2387 mutex_unlock(&dir->d_inode->i_mutex);
2316 goto out; 2388 return PTR_ERR(dentry);
2317 } 2389 }
2318 } 2390 }
2319 mutex_unlock(&dir->d_inode->i_mutex); 2391 mutex_unlock(&dir->d_inode->i_mutex);
2320 2392
2321done: 2393done:
2322 if (d_is_negative(dentry)) { 2394 if (d_is_negative(dentry)) {
2323 error = -ENOENT;
2324 dput(dentry); 2395 dput(dentry);
2325 goto out; 2396 return -ENOENT;
2326 } 2397 }
2398 if (nd->depth)
2399 put_link(nd);
2327 path->dentry = dentry; 2400 path->dentry = dentry;
2328 path->mnt = nd->path.mnt; 2401 path->mnt = nd->path.mnt;
2329 if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW)) 2402 error = should_follow_link(nd, path, nd->flags & LOOKUP_FOLLOW,
2330 return 1; 2403 d_backing_inode(dentry), 0);
2404 if (unlikely(error))
2405 return error;
2331 mntget(path->mnt); 2406 mntget(path->mnt);
2332 follow_mount(path); 2407 follow_mount(path);
2333 error = 0; 2408 return 0;
2334out:
2335 terminate_walk(nd);
2336 return error;
2337} 2409}
2338 2410
2339/** 2411/**
2340 * path_mountpoint - look up a path to be umounted 2412 * path_mountpoint - look up a path to be umounted
2341 * @dfd: directory file descriptor to start walk from 2413 * @nameidata: lookup context
2342 * @name: full pathname to walk
2343 * @path: pointer to container for result
2344 * @flags: lookup flags 2414 * @flags: lookup flags
2415 * @path: pointer to container for result
2345 * 2416 *
2346 * Look up the given name, but don't attempt to revalidate the last component. 2417 * Look up the given name, but don't attempt to revalidate the last component.
2347 * Returns 0 and "path" will be valid on success; Returns error otherwise. 2418 * Returns 0 and "path" will be valid on success; Returns error otherwise.
2348 */ 2419 */
2349static int 2420static int
2350path_mountpoint(int dfd, const struct filename *name, struct path *path, 2421path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path)
2351 unsigned int flags)
2352{ 2422{
2353 struct nameidata nd; 2423 const char *s = path_init(nd, flags);
2354 int err; 2424 int err;
2355 2425 if (IS_ERR(s))
2356 err = path_init(dfd, name, flags, &nd); 2426 return PTR_ERR(s);
2357 if (unlikely(err)) 2427 while (!(err = link_path_walk(s, nd)) &&
2358 goto out; 2428 (err = mountpoint_last(nd, path)) > 0) {
2359 2429 s = trailing_symlink(nd);
2360 err = mountpoint_last(&nd, path); 2430 if (IS_ERR(s)) {
2361 while (err > 0) { 2431 err = PTR_ERR(s);
2362 void *cookie;
2363 struct path link = *path;
2364 err = may_follow_link(&link, &nd);
2365 if (unlikely(err))
2366 break;
2367 nd.flags |= LOOKUP_PARENT;
2368 err = follow_link(&link, &nd, &cookie);
2369 if (err)
2370 break; 2432 break;
2371 err = mountpoint_last(&nd, path); 2433 }
2372 put_link(&nd, &link, cookie);
2373 } 2434 }
2374out: 2435 terminate_walk(nd);
2375 path_cleanup(&nd);
2376 return err; 2436 return err;
2377} 2437}
2378 2438
@@ -2380,16 +2440,19 @@ static int
2380filename_mountpoint(int dfd, struct filename *name, struct path *path, 2440filename_mountpoint(int dfd, struct filename *name, struct path *path,
2381 unsigned int flags) 2441 unsigned int flags)
2382{ 2442{
2443 struct nameidata nd;
2383 int error; 2444 int error;
2384 if (IS_ERR(name)) 2445 if (IS_ERR(name))
2385 return PTR_ERR(name); 2446 return PTR_ERR(name);
2386 error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU); 2447 set_nameidata(&nd, dfd, name);
2448 error = path_mountpoint(&nd, flags | LOOKUP_RCU, path);
2387 if (unlikely(error == -ECHILD)) 2449 if (unlikely(error == -ECHILD))
2388 error = path_mountpoint(dfd, name, path, flags); 2450 error = path_mountpoint(&nd, flags, path);
2389 if (unlikely(error == -ESTALE)) 2451 if (unlikely(error == -ESTALE))
2390 error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL); 2452 error = path_mountpoint(&nd, flags | LOOKUP_REVAL, path);
2391 if (likely(!error)) 2453 if (likely(!error))
2392 audit_inode(name, path->dentry, 0); 2454 audit_inode(name, path->dentry, 0);
2455 restore_nameidata();
2393 putname(name); 2456 putname(name);
2394 return error; 2457 return error;
2395} 2458}
@@ -2456,7 +2519,7 @@ EXPORT_SYMBOL(__check_sticky);
2456 */ 2519 */
2457static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) 2520static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
2458{ 2521{
2459 struct inode *inode = victim->d_inode; 2522 struct inode *inode = d_backing_inode(victim);
2460 int error; 2523 int error;
2461 2524
2462 if (d_is_negative(victim)) 2525 if (d_is_negative(victim))
@@ -2922,18 +2985,19 @@ out_dput:
2922/* 2985/*
2923 * Handle the last step of open() 2986 * Handle the last step of open()
2924 */ 2987 */
2925static int do_last(struct nameidata *nd, struct path *path, 2988static int do_last(struct nameidata *nd,
2926 struct file *file, const struct open_flags *op, 2989 struct file *file, const struct open_flags *op,
2927 int *opened, struct filename *name) 2990 int *opened)
2928{ 2991{
2929 struct dentry *dir = nd->path.dentry; 2992 struct dentry *dir = nd->path.dentry;
2930 int open_flag = op->open_flag; 2993 int open_flag = op->open_flag;
2931 bool will_truncate = (open_flag & O_TRUNC) != 0; 2994 bool will_truncate = (open_flag & O_TRUNC) != 0;
2932 bool got_write = false; 2995 bool got_write = false;
2933 int acc_mode = op->acc_mode; 2996 int acc_mode = op->acc_mode;
2997 unsigned seq;
2934 struct inode *inode; 2998 struct inode *inode;
2935 bool symlink_ok = false;
2936 struct path save_parent = { .dentry = NULL, .mnt = NULL }; 2999 struct path save_parent = { .dentry = NULL, .mnt = NULL };
3000 struct path path;
2937 bool retried = false; 3001 bool retried = false;
2938 int error; 3002 int error;
2939 3003
@@ -2942,7 +3006,7 @@ static int do_last(struct nameidata *nd, struct path *path,
2942 3006
2943 if (nd->last_type != LAST_NORM) { 3007 if (nd->last_type != LAST_NORM) {
2944 error = handle_dots(nd, nd->last_type); 3008 error = handle_dots(nd, nd->last_type);
2945 if (error) 3009 if (unlikely(error))
2946 return error; 3010 return error;
2947 goto finish_open; 3011 goto finish_open;
2948 } 3012 }
@@ -2950,15 +3014,13 @@ static int do_last(struct nameidata *nd, struct path *path,
2950 if (!(open_flag & O_CREAT)) { 3014 if (!(open_flag & O_CREAT)) {
2951 if (nd->last.name[nd->last.len]) 3015 if (nd->last.name[nd->last.len])
2952 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 3016 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2953 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2954 symlink_ok = true;
2955 /* we _can_ be in RCU mode here */ 3017 /* we _can_ be in RCU mode here */
2956 error = lookup_fast(nd, path, &inode); 3018 error = lookup_fast(nd, &path, &inode, &seq);
2957 if (likely(!error)) 3019 if (likely(!error))
2958 goto finish_lookup; 3020 goto finish_lookup;
2959 3021
2960 if (error < 0) 3022 if (error < 0)
2961 goto out; 3023 return error;
2962 3024
2963 BUG_ON(nd->inode != dir->d_inode); 3025 BUG_ON(nd->inode != dir->d_inode);
2964 } else { 3026 } else {
@@ -2972,11 +3034,10 @@ static int do_last(struct nameidata *nd, struct path *path,
2972 if (error) 3034 if (error)
2973 return error; 3035 return error;
2974 3036
2975 audit_inode(name, dir, LOOKUP_PARENT); 3037 audit_inode(nd->name, dir, LOOKUP_PARENT);
2976 error = -EISDIR;
2977 /* trailing slashes? */ 3038 /* trailing slashes? */
2978 if (nd->last.name[nd->last.len]) 3039 if (unlikely(nd->last.name[nd->last.len]))
2979 goto out; 3040 return -EISDIR;
2980 } 3041 }
2981 3042
2982retry_lookup: 3043retry_lookup:
@@ -2991,7 +3052,7 @@ retry_lookup:
2991 */ 3052 */
2992 } 3053 }
2993 mutex_lock(&dir->d_inode->i_mutex); 3054 mutex_lock(&dir->d_inode->i_mutex);
2994 error = lookup_open(nd, path, file, op, got_write, opened); 3055 error = lookup_open(nd, &path, file, op, got_write, opened);
2995 mutex_unlock(&dir->d_inode->i_mutex); 3056 mutex_unlock(&dir->d_inode->i_mutex);
2996 3057
2997 if (error <= 0) { 3058 if (error <= 0) {
@@ -3002,7 +3063,7 @@ retry_lookup:
3002 !S_ISREG(file_inode(file)->i_mode)) 3063 !S_ISREG(file_inode(file)->i_mode))
3003 will_truncate = false; 3064 will_truncate = false;
3004 3065
3005 audit_inode(name, file->f_path.dentry, 0); 3066 audit_inode(nd->name, file->f_path.dentry, 0);
3006 goto opened; 3067 goto opened;
3007 } 3068 }
3008 3069
@@ -3011,15 +3072,15 @@ retry_lookup:
3011 open_flag &= ~O_TRUNC; 3072 open_flag &= ~O_TRUNC;
3012 will_truncate = false; 3073 will_truncate = false;
3013 acc_mode = MAY_OPEN; 3074 acc_mode = MAY_OPEN;
3014 path_to_nameidata(path, nd); 3075 path_to_nameidata(&path, nd);
3015 goto finish_open_created; 3076 goto finish_open_created;
3016 } 3077 }
3017 3078
3018 /* 3079 /*
3019 * create/update audit record if it already exists. 3080 * create/update audit record if it already exists.
3020 */ 3081 */
3021 if (d_is_positive(path->dentry)) 3082 if (d_is_positive(path.dentry))
3022 audit_inode(name, path->dentry, 0); 3083 audit_inode(nd->name, path.dentry, 0);
3023 3084
3024 /* 3085 /*
3025 * If atomic_open() acquired write access it is dropped now due to 3086 * If atomic_open() acquired write access it is dropped now due to
@@ -3031,47 +3092,45 @@ retry_lookup:
3031 got_write = false; 3092 got_write = false;
3032 } 3093 }
3033 3094
3034 error = -EEXIST; 3095 if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) {
3035 if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) 3096 path_to_nameidata(&path, nd);
3036 goto exit_dput; 3097 return -EEXIST;
3037 3098 }
3038 error = follow_managed(path, nd->flags);
3039 if (error < 0)
3040 goto exit_dput;
3041 3099
3042 if (error) 3100 error = follow_managed(&path, nd);
3043 nd->flags |= LOOKUP_JUMPED; 3101 if (unlikely(error < 0))
3102 return error;
3044 3103
3045 BUG_ON(nd->flags & LOOKUP_RCU); 3104 BUG_ON(nd->flags & LOOKUP_RCU);
3046 inode = path->dentry->d_inode; 3105 inode = d_backing_inode(path.dentry);
3047 error = -ENOENT; 3106 seq = 0; /* out of RCU mode, so the value doesn't matter */
3048 if (d_is_negative(path->dentry)) { 3107 if (unlikely(d_is_negative(path.dentry))) {
3049 path_to_nameidata(path, nd); 3108 path_to_nameidata(&path, nd);
3050 goto out; 3109 return -ENOENT;
3051 } 3110 }
3052finish_lookup: 3111finish_lookup:
3053 /* we _can_ be in RCU mode here */ 3112 if (nd->depth)
3054 if (should_follow_link(path->dentry, !symlink_ok)) { 3113 put_link(nd);
3055 if (nd->flags & LOOKUP_RCU) { 3114 error = should_follow_link(nd, &path, nd->flags & LOOKUP_FOLLOW,
3056 if (unlikely(nd->path.mnt != path->mnt || 3115 inode, seq);
3057 unlazy_walk(nd, path->dentry))) { 3116 if (unlikely(error))
3058 error = -ECHILD; 3117 return error;
3059 goto out; 3118
3060 } 3119 if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) {
3061 } 3120 path_to_nameidata(&path, nd);
3062 BUG_ON(inode != path->dentry->d_inode); 3121 return -ELOOP;
3063 return 1;
3064 } 3122 }
3065 3123
3066 if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) { 3124 if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) {
3067 path_to_nameidata(path, nd); 3125 path_to_nameidata(&path, nd);
3068 } else { 3126 } else {
3069 save_parent.dentry = nd->path.dentry; 3127 save_parent.dentry = nd->path.dentry;
3070 save_parent.mnt = mntget(path->mnt); 3128 save_parent.mnt = mntget(path.mnt);
3071 nd->path.dentry = path->dentry; 3129 nd->path.dentry = path.dentry;
3072 3130
3073 } 3131 }
3074 nd->inode = inode; 3132 nd->inode = inode;
3133 nd->seq = seq;
3075 /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ 3134 /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
3076finish_open: 3135finish_open:
3077 error = complete_walk(nd); 3136 error = complete_walk(nd);
@@ -3079,7 +3138,7 @@ finish_open:
3079 path_put(&save_parent); 3138 path_put(&save_parent);
3080 return error; 3139 return error;
3081 } 3140 }
3082 audit_inode(name, nd->path.dentry, 0); 3141 audit_inode(nd->name, nd->path.dentry, 0);
3083 error = -EISDIR; 3142 error = -EISDIR;
3084 if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) 3143 if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
3085 goto out; 3144 goto out;
@@ -3126,12 +3185,8 @@ out:
3126 if (got_write) 3185 if (got_write)
3127 mnt_drop_write(nd->path.mnt); 3186 mnt_drop_write(nd->path.mnt);
3128 path_put(&save_parent); 3187 path_put(&save_parent);
3129 terminate_walk(nd);
3130 return error; 3188 return error;
3131 3189
3132exit_dput:
3133 path_put_conditional(path, nd);
3134 goto out;
3135exit_fput: 3190exit_fput:
3136 fput(file); 3191 fput(file);
3137 goto out; 3192 goto out;
@@ -3155,50 +3210,46 @@ stale_open:
3155 goto retry_lookup; 3210 goto retry_lookup;
3156} 3211}
3157 3212
3158static int do_tmpfile(int dfd, struct filename *pathname, 3213static int do_tmpfile(struct nameidata *nd, unsigned flags,
3159 struct nameidata *nd, int flags,
3160 const struct open_flags *op, 3214 const struct open_flags *op,
3161 struct file *file, int *opened) 3215 struct file *file, int *opened)
3162{ 3216{
3163 static const struct qstr name = QSTR_INIT("/", 1); 3217 static const struct qstr name = QSTR_INIT("/", 1);
3164 struct dentry *dentry, *child; 3218 struct dentry *child;
3165 struct inode *dir; 3219 struct inode *dir;
3166 int error = path_lookupat(dfd, pathname, 3220 struct path path;
3167 flags | LOOKUP_DIRECTORY, nd); 3221 int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
3168 if (unlikely(error)) 3222 if (unlikely(error))
3169 return error; 3223 return error;
3170 error = mnt_want_write(nd->path.mnt); 3224 error = mnt_want_write(path.mnt);
3171 if (unlikely(error)) 3225 if (unlikely(error))
3172 goto out; 3226 goto out;
3227 dir = path.dentry->d_inode;
3173 /* we want directory to be writable */ 3228 /* we want directory to be writable */
3174 error = inode_permission(nd->inode, MAY_WRITE | MAY_EXEC); 3229 error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
3175 if (error) 3230 if (error)
3176 goto out2; 3231 goto out2;
3177 dentry = nd->path.dentry;
3178 dir = dentry->d_inode;
3179 if (!dir->i_op->tmpfile) { 3232 if (!dir->i_op->tmpfile) {
3180 error = -EOPNOTSUPP; 3233 error = -EOPNOTSUPP;
3181 goto out2; 3234 goto out2;
3182 } 3235 }
3183 child = d_alloc(dentry, &name); 3236 child = d_alloc(path.dentry, &name);
3184 if (unlikely(!child)) { 3237 if (unlikely(!child)) {
3185 error = -ENOMEM; 3238 error = -ENOMEM;
3186 goto out2; 3239 goto out2;
3187 } 3240 }
3188 nd->flags &= ~LOOKUP_DIRECTORY; 3241 dput(path.dentry);
3189 nd->flags |= op->intent; 3242 path.dentry = child;
3190 dput(nd->path.dentry); 3243 error = dir->i_op->tmpfile(dir, child, op->mode);
3191 nd->path.dentry = child;
3192 error = dir->i_op->tmpfile(dir, nd->path.dentry, op->mode);
3193 if (error) 3244 if (error)
3194 goto out2; 3245 goto out2;
3195 audit_inode(pathname, nd->path.dentry, 0); 3246 audit_inode(nd->name, child, 0);
3196 /* Don't check for other permissions, the inode was just created */ 3247 /* Don't check for other permissions, the inode was just created */
3197 error = may_open(&nd->path, MAY_OPEN, op->open_flag); 3248 error = may_open(&path, MAY_OPEN, op->open_flag);
3198 if (error) 3249 if (error)
3199 goto out2; 3250 goto out2;
3200 file->f_path.mnt = nd->path.mnt; 3251 file->f_path.mnt = path.mnt;
3201 error = finish_open(file, nd->path.dentry, NULL, opened); 3252 error = finish_open(file, child, NULL, opened);
3202 if (error) 3253 if (error)
3203 goto out2; 3254 goto out2;
3204 error = open_check_o_direct(file); 3255 error = open_check_o_direct(file);
@@ -3211,17 +3262,17 @@ static int do_tmpfile(int dfd, struct filename *pathname,
3211 spin_unlock(&inode->i_lock); 3262 spin_unlock(&inode->i_lock);
3212 } 3263 }
3213out2: 3264out2:
3214 mnt_drop_write(nd->path.mnt); 3265 mnt_drop_write(path.mnt);
3215out: 3266out:
3216 path_put(&nd->path); 3267 path_put(&path);
3217 return error; 3268 return error;
3218} 3269}
3219 3270
3220static struct file *path_openat(int dfd, struct filename *pathname, 3271static struct file *path_openat(struct nameidata *nd,
3221 struct nameidata *nd, const struct open_flags *op, int flags) 3272 const struct open_flags *op, unsigned flags)
3222{ 3273{
3274 const char *s;
3223 struct file *file; 3275 struct file *file;
3224 struct path path;
3225 int opened = 0; 3276 int opened = 0;
3226 int error; 3277 int error;
3227 3278
@@ -3232,37 +3283,25 @@ static struct file *path_openat(int dfd, struct filename *pathname,
3232 file->f_flags = op->open_flag; 3283 file->f_flags = op->open_flag;
3233 3284
3234 if (unlikely(file->f_flags & __O_TMPFILE)) { 3285 if (unlikely(file->f_flags & __O_TMPFILE)) {
3235 error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened); 3286 error = do_tmpfile(nd, flags, op, file, &opened);
3236 goto out2; 3287 goto out2;
3237 } 3288 }
3238 3289
3239 error = path_init(dfd, pathname, flags, nd); 3290 s = path_init(nd, flags);
3240 if (unlikely(error)) 3291 if (IS_ERR(s)) {
3241 goto out; 3292 put_filp(file);
3242 3293 return ERR_CAST(s);
3243 error = do_last(nd, &path, file, op, &opened, pathname); 3294 }
3244 while (unlikely(error > 0)) { /* trailing symlink */ 3295 while (!(error = link_path_walk(s, nd)) &&
3245 struct path link = path; 3296 (error = do_last(nd, file, op, &opened)) > 0) {
3246 void *cookie;
3247 if (!(nd->flags & LOOKUP_FOLLOW)) {
3248 path_put_conditional(&path, nd);
3249 path_put(&nd->path);
3250 error = -ELOOP;
3251 break;
3252 }
3253 error = may_follow_link(&link, nd);
3254 if (unlikely(error))
3255 break;
3256 nd->flags |= LOOKUP_PARENT;
3257 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); 3297 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
3258 error = follow_link(&link, nd, &cookie); 3298 s = trailing_symlink(nd);
3259 if (unlikely(error)) 3299 if (IS_ERR(s)) {
3300 error = PTR_ERR(s);
3260 break; 3301 break;
3261 error = do_last(nd, &path, file, op, &opened, pathname); 3302 }
3262 put_link(nd, &link, cookie);
3263 } 3303 }
3264out: 3304 terminate_walk(nd);
3265 path_cleanup(nd);
3266out2: 3305out2:
3267 if (!(opened & FILE_OPENED)) { 3306 if (!(opened & FILE_OPENED)) {
3268 BUG_ON(!error); 3307 BUG_ON(!error);
@@ -3287,11 +3326,13 @@ struct file *do_filp_open(int dfd, struct filename *pathname,
3287 int flags = op->lookup_flags; 3326 int flags = op->lookup_flags;
3288 struct file *filp; 3327 struct file *filp;
3289 3328
3290 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); 3329 set_nameidata(&nd, dfd, pathname);
3330 filp = path_openat(&nd, op, flags | LOOKUP_RCU);
3291 if (unlikely(filp == ERR_PTR(-ECHILD))) 3331 if (unlikely(filp == ERR_PTR(-ECHILD)))
3292 filp = path_openat(dfd, pathname, &nd, op, flags); 3332 filp = path_openat(&nd, op, flags);
3293 if (unlikely(filp == ERR_PTR(-ESTALE))) 3333 if (unlikely(filp == ERR_PTR(-ESTALE)))
3294 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL); 3334 filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
3335 restore_nameidata();
3295 return filp; 3336 return filp;
3296} 3337}
3297 3338
@@ -3313,11 +3354,13 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
3313 if (unlikely(IS_ERR(filename))) 3354 if (unlikely(IS_ERR(filename)))
3314 return ERR_CAST(filename); 3355 return ERR_CAST(filename);
3315 3356
3316 file = path_openat(-1, filename, &nd, op, flags | LOOKUP_RCU); 3357 set_nameidata(&nd, -1, filename);
3358 file = path_openat(&nd, op, flags | LOOKUP_RCU);
3317 if (unlikely(file == ERR_PTR(-ECHILD))) 3359 if (unlikely(file == ERR_PTR(-ECHILD)))
3318 file = path_openat(-1, filename, &nd, op, flags); 3360 file = path_openat(&nd, op, flags);
3319 if (unlikely(file == ERR_PTR(-ESTALE))) 3361 if (unlikely(file == ERR_PTR(-ESTALE)))
3320 file = path_openat(-1, filename, &nd, op, flags | LOOKUP_REVAL); 3362 file = path_openat(&nd, op, flags | LOOKUP_REVAL);
3363 restore_nameidata();
3321 putname(filename); 3364 putname(filename);
3322 return file; 3365 return file;
3323} 3366}
@@ -3326,7 +3369,8 @@ static struct dentry *filename_create(int dfd, struct filename *name,
3326 struct path *path, unsigned int lookup_flags) 3369 struct path *path, unsigned int lookup_flags)
3327{ 3370{
3328 struct dentry *dentry = ERR_PTR(-EEXIST); 3371 struct dentry *dentry = ERR_PTR(-EEXIST);
3329 struct nameidata nd; 3372 struct qstr last;
3373 int type;
3330 int err2; 3374 int err2;
3331 int error; 3375 int error;
3332 bool is_dir = (lookup_flags & LOOKUP_DIRECTORY); 3376 bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
@@ -3337,26 +3381,25 @@ static struct dentry *filename_create(int dfd, struct filename *name,
3337 */ 3381 */
3338 lookup_flags &= LOOKUP_REVAL; 3382 lookup_flags &= LOOKUP_REVAL;
3339 3383
3340 error = filename_lookup(dfd, name, LOOKUP_PARENT|lookup_flags, &nd); 3384 name = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
3341 if (error) 3385 if (IS_ERR(name))
3342 return ERR_PTR(error); 3386 return ERR_CAST(name);
3343 3387
3344 /* 3388 /*
3345 * Yucky last component or no last component at all? 3389 * Yucky last component or no last component at all?
3346 * (foo/., foo/.., /////) 3390 * (foo/., foo/.., /////)
3347 */ 3391 */
3348 if (nd.last_type != LAST_NORM) 3392 if (unlikely(type != LAST_NORM))
3349 goto out; 3393 goto out;
3350 nd.flags &= ~LOOKUP_PARENT;
3351 nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
3352 3394
3353 /* don't fail immediately if it's r/o, at least try to report other errors */ 3395 /* don't fail immediately if it's r/o, at least try to report other errors */
3354 err2 = mnt_want_write(nd.path.mnt); 3396 err2 = mnt_want_write(path->mnt);
3355 /* 3397 /*
3356 * Do the final lookup. 3398 * Do the final lookup.
3357 */ 3399 */
3358 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 3400 lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
3359 dentry = lookup_hash(&nd); 3401 mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
3402 dentry = __lookup_hash(&last, path->dentry, lookup_flags);
3360 if (IS_ERR(dentry)) 3403 if (IS_ERR(dentry))
3361 goto unlock; 3404 goto unlock;
3362 3405
@@ -3370,7 +3413,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
3370 * all is fine. Let's be bastards - you had / on the end, you've 3413 * all is fine. Let's be bastards - you had / on the end, you've
3371 * been asking for (non-existent) directory. -ENOENT for you. 3414 * been asking for (non-existent) directory. -ENOENT for you.
3372 */ 3415 */
3373 if (unlikely(!is_dir && nd.last.name[nd.last.len])) { 3416 if (unlikely(!is_dir && last.name[last.len])) {
3374 error = -ENOENT; 3417 error = -ENOENT;
3375 goto fail; 3418 goto fail;
3376 } 3419 }
@@ -3378,31 +3421,26 @@ static struct dentry *filename_create(int dfd, struct filename *name,
3378 error = err2; 3421 error = err2;
3379 goto fail; 3422 goto fail;
3380 } 3423 }
3381 *path = nd.path; 3424 putname(name);
3382 return dentry; 3425 return dentry;
3383fail: 3426fail:
3384 dput(dentry); 3427 dput(dentry);
3385 dentry = ERR_PTR(error); 3428 dentry = ERR_PTR(error);
3386unlock: 3429unlock:
3387 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 3430 mutex_unlock(&path->dentry->d_inode->i_mutex);
3388 if (!err2) 3431 if (!err2)
3389 mnt_drop_write(nd.path.mnt); 3432 mnt_drop_write(path->mnt);
3390out: 3433out:
3391 path_put(&nd.path); 3434 path_put(path);
3435 putname(name);
3392 return dentry; 3436 return dentry;
3393} 3437}
3394 3438
3395struct dentry *kern_path_create(int dfd, const char *pathname, 3439struct dentry *kern_path_create(int dfd, const char *pathname,
3396 struct path *path, unsigned int lookup_flags) 3440 struct path *path, unsigned int lookup_flags)
3397{ 3441{
3398 struct filename *filename = getname_kernel(pathname); 3442 return filename_create(dfd, getname_kernel(pathname),
3399 struct dentry *res; 3443 path, lookup_flags);
3400
3401 if (IS_ERR(filename))
3402 return ERR_CAST(filename);
3403 res = filename_create(dfd, filename, path, lookup_flags);
3404 putname(filename);
3405 return res;
3406} 3444}
3407EXPORT_SYMBOL(kern_path_create); 3445EXPORT_SYMBOL(kern_path_create);
3408 3446
@@ -3415,16 +3453,10 @@ void done_path_create(struct path *path, struct dentry *dentry)
3415} 3453}
3416EXPORT_SYMBOL(done_path_create); 3454EXPORT_SYMBOL(done_path_create);
3417 3455
3418struct dentry *user_path_create(int dfd, const char __user *pathname, 3456inline struct dentry *user_path_create(int dfd, const char __user *pathname,
3419 struct path *path, unsigned int lookup_flags) 3457 struct path *path, unsigned int lookup_flags)
3420{ 3458{
3421 struct filename *tmp = getname(pathname); 3459 return filename_create(dfd, getname(pathname), path, lookup_flags);
3422 struct dentry *res;
3423 if (IS_ERR(tmp))
3424 return ERR_CAST(tmp);
3425 res = filename_create(dfd, tmp, path, lookup_flags);
3426 putname(tmp);
3427 return res;
3428} 3460}
3429EXPORT_SYMBOL(user_path_create); 3461EXPORT_SYMBOL(user_path_create);
3430 3462
@@ -3645,14 +3677,17 @@ static long do_rmdir(int dfd, const char __user *pathname)
3645 int error = 0; 3677 int error = 0;
3646 struct filename *name; 3678 struct filename *name;
3647 struct dentry *dentry; 3679 struct dentry *dentry;
3648 struct nameidata nd; 3680 struct path path;
3681 struct qstr last;
3682 int type;
3649 unsigned int lookup_flags = 0; 3683 unsigned int lookup_flags = 0;
3650retry: 3684retry:
3651 name = user_path_parent(dfd, pathname, &nd, lookup_flags); 3685 name = user_path_parent(dfd, pathname,
3686 &path, &last, &type, lookup_flags);
3652 if (IS_ERR(name)) 3687 if (IS_ERR(name))
3653 return PTR_ERR(name); 3688 return PTR_ERR(name);
3654 3689
3655 switch(nd.last_type) { 3690 switch (type) {
3656 case LAST_DOTDOT: 3691 case LAST_DOTDOT:
3657 error = -ENOTEMPTY; 3692 error = -ENOTEMPTY;
3658 goto exit1; 3693 goto exit1;
@@ -3664,13 +3699,12 @@ retry:
3664 goto exit1; 3699 goto exit1;
3665 } 3700 }
3666 3701
3667 nd.flags &= ~LOOKUP_PARENT; 3702 error = mnt_want_write(path.mnt);
3668 error = mnt_want_write(nd.path.mnt);
3669 if (error) 3703 if (error)
3670 goto exit1; 3704 goto exit1;
3671 3705
3672 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 3706 mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
3673 dentry = lookup_hash(&nd); 3707 dentry = __lookup_hash(&last, path.dentry, lookup_flags);
3674 error = PTR_ERR(dentry); 3708 error = PTR_ERR(dentry);
3675 if (IS_ERR(dentry)) 3709 if (IS_ERR(dentry))
3676 goto exit2; 3710 goto exit2;
@@ -3678,17 +3712,17 @@ retry:
3678 error = -ENOENT; 3712 error = -ENOENT;
3679 goto exit3; 3713 goto exit3;
3680 } 3714 }
3681 error = security_path_rmdir(&nd.path, dentry); 3715 error = security_path_rmdir(&path, dentry);
3682 if (error) 3716 if (error)
3683 goto exit3; 3717 goto exit3;
3684 error = vfs_rmdir(nd.path.dentry->d_inode, dentry); 3718 error = vfs_rmdir(path.dentry->d_inode, dentry);
3685exit3: 3719exit3:
3686 dput(dentry); 3720 dput(dentry);
3687exit2: 3721exit2:
3688 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 3722 mutex_unlock(&path.dentry->d_inode->i_mutex);
3689 mnt_drop_write(nd.path.mnt); 3723 mnt_drop_write(path.mnt);
3690exit1: 3724exit1:
3691 path_put(&nd.path); 3725 path_put(&path);
3692 putname(name); 3726 putname(name);
3693 if (retry_estale(error, lookup_flags)) { 3727 if (retry_estale(error, lookup_flags)) {
3694 lookup_flags |= LOOKUP_REVAL; 3728 lookup_flags |= LOOKUP_REVAL;
@@ -3771,43 +3805,45 @@ static long do_unlinkat(int dfd, const char __user *pathname)
3771 int error; 3805 int error;
3772 struct filename *name; 3806 struct filename *name;
3773 struct dentry *dentry; 3807 struct dentry *dentry;
3774 struct nameidata nd; 3808 struct path path;
3809 struct qstr last;
3810 int type;
3775 struct inode *inode = NULL; 3811 struct inode *inode = NULL;
3776 struct inode *delegated_inode = NULL; 3812 struct inode *delegated_inode = NULL;
3777 unsigned int lookup_flags = 0; 3813 unsigned int lookup_flags = 0;
3778retry: 3814retry:
3779 name = user_path_parent(dfd, pathname, &nd, lookup_flags); 3815 name = user_path_parent(dfd, pathname,
3816 &path, &last, &type, lookup_flags);
3780 if (IS_ERR(name)) 3817 if (IS_ERR(name))
3781 return PTR_ERR(name); 3818 return PTR_ERR(name);
3782 3819
3783 error = -EISDIR; 3820 error = -EISDIR;
3784 if (nd.last_type != LAST_NORM) 3821 if (type != LAST_NORM)
3785 goto exit1; 3822 goto exit1;
3786 3823
3787 nd.flags &= ~LOOKUP_PARENT; 3824 error = mnt_want_write(path.mnt);
3788 error = mnt_want_write(nd.path.mnt);
3789 if (error) 3825 if (error)
3790 goto exit1; 3826 goto exit1;
3791retry_deleg: 3827retry_deleg:
3792 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 3828 mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
3793 dentry = lookup_hash(&nd); 3829 dentry = __lookup_hash(&last, path.dentry, lookup_flags);
3794 error = PTR_ERR(dentry); 3830 error = PTR_ERR(dentry);
3795 if (!IS_ERR(dentry)) { 3831 if (!IS_ERR(dentry)) {
3796 /* Why not before? Because we want correct error value */ 3832 /* Why not before? Because we want correct error value */
3797 if (nd.last.name[nd.last.len]) 3833 if (last.name[last.len])
3798 goto slashes; 3834 goto slashes;
3799 inode = dentry->d_inode; 3835 inode = dentry->d_inode;
3800 if (d_is_negative(dentry)) 3836 if (d_is_negative(dentry))
3801 goto slashes; 3837 goto slashes;
3802 ihold(inode); 3838 ihold(inode);
3803 error = security_path_unlink(&nd.path, dentry); 3839 error = security_path_unlink(&path, dentry);
3804 if (error) 3840 if (error)
3805 goto exit2; 3841 goto exit2;
3806 error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode); 3842 error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode);
3807exit2: 3843exit2:
3808 dput(dentry); 3844 dput(dentry);
3809 } 3845 }
3810 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 3846 mutex_unlock(&path.dentry->d_inode->i_mutex);
3811 if (inode) 3847 if (inode)
3812 iput(inode); /* truncate the inode here */ 3848 iput(inode); /* truncate the inode here */
3813 inode = NULL; 3849 inode = NULL;
@@ -3816,9 +3852,9 @@ exit2:
3816 if (!error) 3852 if (!error)
3817 goto retry_deleg; 3853 goto retry_deleg;
3818 } 3854 }
3819 mnt_drop_write(nd.path.mnt); 3855 mnt_drop_write(path.mnt);
3820exit1: 3856exit1:
3821 path_put(&nd.path); 3857 path_put(&path);
3822 putname(name); 3858 putname(name);
3823 if (retry_estale(error, lookup_flags)) { 3859 if (retry_estale(error, lookup_flags)) {
3824 lookup_flags |= LOOKUP_REVAL; 3860 lookup_flags |= LOOKUP_REVAL;
@@ -4248,14 +4284,15 @@ EXPORT_SYMBOL(vfs_rename);
4248SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, 4284SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
4249 int, newdfd, const char __user *, newname, unsigned int, flags) 4285 int, newdfd, const char __user *, newname, unsigned int, flags)
4250{ 4286{
4251 struct dentry *old_dir, *new_dir;
4252 struct dentry *old_dentry, *new_dentry; 4287 struct dentry *old_dentry, *new_dentry;
4253 struct dentry *trap; 4288 struct dentry *trap;
4254 struct nameidata oldnd, newnd; 4289 struct path old_path, new_path;
4290 struct qstr old_last, new_last;
4291 int old_type, new_type;
4255 struct inode *delegated_inode = NULL; 4292 struct inode *delegated_inode = NULL;
4256 struct filename *from; 4293 struct filename *from;
4257 struct filename *to; 4294 struct filename *to;
4258 unsigned int lookup_flags = 0; 4295 unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
4259 bool should_retry = false; 4296 bool should_retry = false;
4260 int error; 4297 int error;
4261 4298
@@ -4269,47 +4306,45 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
4269 if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD)) 4306 if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
4270 return -EPERM; 4307 return -EPERM;
4271 4308
4309 if (flags & RENAME_EXCHANGE)
4310 target_flags = 0;
4311
4272retry: 4312retry:
4273 from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); 4313 from = user_path_parent(olddfd, oldname,
4314 &old_path, &old_last, &old_type, lookup_flags);
4274 if (IS_ERR(from)) { 4315 if (IS_ERR(from)) {
4275 error = PTR_ERR(from); 4316 error = PTR_ERR(from);
4276 goto exit; 4317 goto exit;
4277 } 4318 }
4278 4319
4279 to = user_path_parent(newdfd, newname, &newnd, lookup_flags); 4320 to = user_path_parent(newdfd, newname,
4321 &new_path, &new_last, &new_type, lookup_flags);
4280 if (IS_ERR(to)) { 4322 if (IS_ERR(to)) {
4281 error = PTR_ERR(to); 4323 error = PTR_ERR(to);
4282 goto exit1; 4324 goto exit1;
4283 } 4325 }
4284 4326
4285 error = -EXDEV; 4327 error = -EXDEV;
4286 if (oldnd.path.mnt != newnd.path.mnt) 4328 if (old_path.mnt != new_path.mnt)
4287 goto exit2; 4329 goto exit2;
4288 4330
4289 old_dir = oldnd.path.dentry;
4290 error = -EBUSY; 4331 error = -EBUSY;
4291 if (oldnd.last_type != LAST_NORM) 4332 if (old_type != LAST_NORM)
4292 goto exit2; 4333 goto exit2;
4293 4334
4294 new_dir = newnd.path.dentry;
4295 if (flags & RENAME_NOREPLACE) 4335 if (flags & RENAME_NOREPLACE)
4296 error = -EEXIST; 4336 error = -EEXIST;
4297 if (newnd.last_type != LAST_NORM) 4337 if (new_type != LAST_NORM)
4298 goto exit2; 4338 goto exit2;
4299 4339
4300 error = mnt_want_write(oldnd.path.mnt); 4340 error = mnt_want_write(old_path.mnt);
4301 if (error) 4341 if (error)
4302 goto exit2; 4342 goto exit2;
4303 4343
4304 oldnd.flags &= ~LOOKUP_PARENT;
4305 newnd.flags &= ~LOOKUP_PARENT;
4306 if (!(flags & RENAME_EXCHANGE))
4307 newnd.flags |= LOOKUP_RENAME_TARGET;
4308
4309retry_deleg: 4344retry_deleg:
4310 trap = lock_rename(new_dir, old_dir); 4345 trap = lock_rename(new_path.dentry, old_path.dentry);
4311 4346
4312 old_dentry = lookup_hash(&oldnd); 4347 old_dentry = __lookup_hash(&old_last, old_path.dentry, lookup_flags);
4313 error = PTR_ERR(old_dentry); 4348 error = PTR_ERR(old_dentry);
4314 if (IS_ERR(old_dentry)) 4349 if (IS_ERR(old_dentry))
4315 goto exit3; 4350 goto exit3;
@@ -4317,7 +4352,7 @@ retry_deleg:
4317 error = -ENOENT; 4352 error = -ENOENT;
4318 if (d_is_negative(old_dentry)) 4353 if (d_is_negative(old_dentry))
4319 goto exit4; 4354 goto exit4;
4320 new_dentry = lookup_hash(&newnd); 4355 new_dentry = __lookup_hash(&new_last, new_path.dentry, lookup_flags | target_flags);
4321 error = PTR_ERR(new_dentry); 4356 error = PTR_ERR(new_dentry);
4322 if (IS_ERR(new_dentry)) 4357 if (IS_ERR(new_dentry))
4323 goto exit4; 4358 goto exit4;
@@ -4331,16 +4366,16 @@ retry_deleg:
4331 4366
4332 if (!d_is_dir(new_dentry)) { 4367 if (!d_is_dir(new_dentry)) {
4333 error = -ENOTDIR; 4368 error = -ENOTDIR;
4334 if (newnd.last.name[newnd.last.len]) 4369 if (new_last.name[new_last.len])
4335 goto exit5; 4370 goto exit5;
4336 } 4371 }
4337 } 4372 }
4338 /* unless the source is a directory trailing slashes give -ENOTDIR */ 4373 /* unless the source is a directory trailing slashes give -ENOTDIR */
4339 if (!d_is_dir(old_dentry)) { 4374 if (!d_is_dir(old_dentry)) {
4340 error = -ENOTDIR; 4375 error = -ENOTDIR;
4341 if (oldnd.last.name[oldnd.last.len]) 4376 if (old_last.name[old_last.len])
4342 goto exit5; 4377 goto exit5;
4343 if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len]) 4378 if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len])
4344 goto exit5; 4379 goto exit5;
4345 } 4380 }
4346 /* source should not be ancestor of target */ 4381 /* source should not be ancestor of target */
@@ -4353,32 +4388,32 @@ retry_deleg:
4353 if (new_dentry == trap) 4388 if (new_dentry == trap)
4354 goto exit5; 4389 goto exit5;
4355 4390
4356 error = security_path_rename(&oldnd.path, old_dentry, 4391 error = security_path_rename(&old_path, old_dentry,
4357 &newnd.path, new_dentry, flags); 4392 &new_path, new_dentry, flags);
4358 if (error) 4393 if (error)
4359 goto exit5; 4394 goto exit5;
4360 error = vfs_rename(old_dir->d_inode, old_dentry, 4395 error = vfs_rename(old_path.dentry->d_inode, old_dentry,
4361 new_dir->d_inode, new_dentry, 4396 new_path.dentry->d_inode, new_dentry,
4362 &delegated_inode, flags); 4397 &delegated_inode, flags);
4363exit5: 4398exit5:
4364 dput(new_dentry); 4399 dput(new_dentry);
4365exit4: 4400exit4:
4366 dput(old_dentry); 4401 dput(old_dentry);
4367exit3: 4402exit3:
4368 unlock_rename(new_dir, old_dir); 4403 unlock_rename(new_path.dentry, old_path.dentry);
4369 if (delegated_inode) { 4404 if (delegated_inode) {
4370 error = break_deleg_wait(&delegated_inode); 4405 error = break_deleg_wait(&delegated_inode);
4371 if (!error) 4406 if (!error)
4372 goto retry_deleg; 4407 goto retry_deleg;
4373 } 4408 }
4374 mnt_drop_write(oldnd.path.mnt); 4409 mnt_drop_write(old_path.mnt);
4375exit2: 4410exit2:
4376 if (retry_estale(error, lookup_flags)) 4411 if (retry_estale(error, lookup_flags))
4377 should_retry = true; 4412 should_retry = true;
4378 path_put(&newnd.path); 4413 path_put(&new_path);
4379 putname(to); 4414 putname(to);
4380exit1: 4415exit1:
4381 path_put(&oldnd.path); 4416 path_put(&old_path);
4382 putname(from); 4417 putname(from);
4383 if (should_retry) { 4418 if (should_retry) {
4384 should_retry = false; 4419 should_retry = false;
@@ -4437,18 +4472,19 @@ EXPORT_SYMBOL(readlink_copy);
4437 */ 4472 */
4438int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) 4473int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
4439{ 4474{
4440 struct nameidata nd;
4441 void *cookie; 4475 void *cookie;
4476 struct inode *inode = d_inode(dentry);
4477 const char *link = inode->i_link;
4442 int res; 4478 int res;
4443 4479
4444 nd.depth = 0; 4480 if (!link) {
4445 cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); 4481 link = inode->i_op->follow_link(dentry, &cookie);
4446 if (IS_ERR(cookie)) 4482 if (IS_ERR(link))
4447 return PTR_ERR(cookie); 4483 return PTR_ERR(link);
4448 4484 }
4449 res = readlink_copy(buffer, buflen, nd_get_link(&nd)); 4485 res = readlink_copy(buffer, buflen, link);
4450 if (dentry->d_inode->i_op->put_link) 4486 if (inode->i_op->put_link)
4451 dentry->d_inode->i_op->put_link(dentry, &nd, cookie); 4487 inode->i_op->put_link(inode, cookie);
4452 return res; 4488 return res;
4453} 4489}
4454EXPORT_SYMBOL(generic_readlink); 4490EXPORT_SYMBOL(generic_readlink);
@@ -4480,22 +4516,21 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
4480} 4516}
4481EXPORT_SYMBOL(page_readlink); 4517EXPORT_SYMBOL(page_readlink);
4482 4518
4483void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) 4519const char *page_follow_link_light(struct dentry *dentry, void **cookie)
4484{ 4520{
4485 struct page *page = NULL; 4521 struct page *page = NULL;
4486 nd_set_link(nd, page_getlink(dentry, &page)); 4522 char *res = page_getlink(dentry, &page);
4487 return page; 4523 if (!IS_ERR(res))
4524 *cookie = page;
4525 return res;
4488} 4526}
4489EXPORT_SYMBOL(page_follow_link_light); 4527EXPORT_SYMBOL(page_follow_link_light);
4490 4528
4491void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 4529void page_put_link(struct inode *unused, void *cookie)
4492{ 4530{
4493 struct page *page = cookie; 4531 struct page *page = cookie;
4494 4532 kunmap(page);
4495 if (page) { 4533 page_cache_release(page);
4496 kunmap(page);
4497 page_cache_release(page);
4498 }
4499} 4534}
4500EXPORT_SYMBOL(page_put_link); 4535EXPORT_SYMBOL(page_put_link);
4501 4536
diff --git a/fs/namespace.c b/fs/namespace.c
index 1b9e11167bae..9c1c43d0d4f1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -590,24 +590,35 @@ static void delayed_free_vfsmnt(struct rcu_head *head)
590} 590}
591 591
592/* call under rcu_read_lock */ 592/* call under rcu_read_lock */
593bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) 593int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
594{ 594{
595 struct mount *mnt; 595 struct mount *mnt;
596 if (read_seqretry(&mount_lock, seq)) 596 if (read_seqretry(&mount_lock, seq))
597 return false; 597 return 1;
598 if (bastard == NULL) 598 if (bastard == NULL)
599 return true; 599 return 0;
600 mnt = real_mount(bastard); 600 mnt = real_mount(bastard);
601 mnt_add_count(mnt, 1); 601 mnt_add_count(mnt, 1);
602 if (likely(!read_seqretry(&mount_lock, seq))) 602 if (likely(!read_seqretry(&mount_lock, seq)))
603 return true; 603 return 0;
604 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { 604 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
605 mnt_add_count(mnt, -1); 605 mnt_add_count(mnt, -1);
606 return false; 606 return 1;
607 }
608 return -1;
609}
610
611/* call under rcu_read_lock */
612bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
613{
614 int res = __legitimize_mnt(bastard, seq);
615 if (likely(!res))
616 return true;
617 if (unlikely(res < 0)) {
618 rcu_read_unlock();
619 mntput(bastard);
620 rcu_read_lock();
607 } 621 }
608 rcu_read_unlock();
609 mntput(bastard);
610 rcu_read_lock();
611 return false; 622 return false;
612} 623}
613 624
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 2d56200655fe..b6de433da5db 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -20,7 +20,6 @@
20#include <linux/stat.h> 20#include <linux/stat.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/string.h> 22#include <linux/string.h>
23#include <linux/namei.h>
24 23
25/* Symlink caching in the page cache is even more simplistic 24/* Symlink caching in the page cache is even more simplistic
26 * and straight-forward than readdir caching. 25 * and straight-forward than readdir caching.
@@ -43,7 +42,7 @@ error:
43 return -EIO; 42 return -EIO;
44} 43}
45 44
46static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) 45static const char *nfs_follow_link(struct dentry *dentry, void **cookie)
47{ 46{
48 struct inode *inode = d_inode(dentry); 47 struct inode *inode = d_inode(dentry);
49 struct page *page; 48 struct page *page;
@@ -51,19 +50,13 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
51 50
52 err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping)); 51 err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
53 if (err) 52 if (err)
54 goto read_failed; 53 return err;
55 page = read_cache_page(&inode->i_data, 0, 54 page = read_cache_page(&inode->i_data, 0,
56 (filler_t *)nfs_symlink_filler, inode); 55 (filler_t *)nfs_symlink_filler, inode);
57 if (IS_ERR(page)) { 56 if (IS_ERR(page))
58 err = page; 57 return ERR_CAST(page);
59 goto read_failed; 58 *cookie = page;
60 } 59 return kmap(page);
61 nd_set_link(nd, kmap(page));
62 return page;
63
64read_failed:
65 nd_set_link(nd, err);
66 return NULL;
67} 60}
68 61
69/* 62/*
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 0f35b80d17fe..443abecf01b7 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -35,7 +35,7 @@
35 * ntfs_lookup - find the inode represented by a dentry in a directory inode 35 * ntfs_lookup - find the inode represented by a dentry in a directory inode
36 * @dir_ino: directory inode in which to look for the inode 36 * @dir_ino: directory inode in which to look for the inode
37 * @dent: dentry representing the inode to look for 37 * @dent: dentry representing the inode to look for
38 * @nd: lookup nameidata 38 * @flags: lookup flags
39 * 39 *
40 * In short, ntfs_lookup() looks for the inode represented by the dentry @dent 40 * In short, ntfs_lookup() looks for the inode represented by the dentry @dent
41 * in the directory inode @dir_ino and if found attaches the inode to the 41 * in the directory inode @dir_ino and if found attaches the inode to the
diff --git a/fs/open.c b/fs/open.c
index 98e5a52dc68c..e0250bdcc440 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -367,7 +367,7 @@ retry:
367 if (res) 367 if (res)
368 goto out; 368 goto out;
369 369
370 inode = path.dentry->d_inode; 370 inode = d_backing_inode(path.dentry);
371 371
372 if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { 372 if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
373 /* 373 /*
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 04f124884687..308379b2d0b2 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -140,11 +140,12 @@ struct ovl_link_data {
140 void *cookie; 140 void *cookie;
141}; 141};
142 142
143static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd) 143static const char *ovl_follow_link(struct dentry *dentry, void **cookie)
144{ 144{
145 void *ret;
146 struct dentry *realdentry; 145 struct dentry *realdentry;
147 struct inode *realinode; 146 struct inode *realinode;
147 struct ovl_link_data *data = NULL;
148 const char *ret;
148 149
149 realdentry = ovl_dentry_real(dentry); 150 realdentry = ovl_dentry_real(dentry);
150 realinode = realdentry->d_inode; 151 realinode = realdentry->d_inode;
@@ -152,28 +153,28 @@ static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
152 if (WARN_ON(!realinode->i_op->follow_link)) 153 if (WARN_ON(!realinode->i_op->follow_link))
153 return ERR_PTR(-EPERM); 154 return ERR_PTR(-EPERM);
154 155
155 ret = realinode->i_op->follow_link(realdentry, nd);
156 if (IS_ERR(ret))
157 return ret;
158
159 if (realinode->i_op->put_link) { 156 if (realinode->i_op->put_link) {
160 struct ovl_link_data *data;
161
162 data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL); 157 data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
163 if (!data) { 158 if (!data)
164 realinode->i_op->put_link(realdentry, nd, ret);
165 return ERR_PTR(-ENOMEM); 159 return ERR_PTR(-ENOMEM);
166 }
167 data->realdentry = realdentry; 160 data->realdentry = realdentry;
168 data->cookie = ret; 161 }
169 162
170 return data; 163 ret = realinode->i_op->follow_link(realdentry, cookie);
171 } else { 164 if (IS_ERR_OR_NULL(ret)) {
172 return NULL; 165 kfree(data);
166 return ret;
173 } 167 }
168
169 if (data)
170 data->cookie = *cookie;
171
172 *cookie = data;
173
174 return ret;
174} 175}
175 176
176static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c) 177static void ovl_put_link(struct inode *unused, void *c)
177{ 178{
178 struct inode *realinode; 179 struct inode *realinode;
179 struct ovl_link_data *data = c; 180 struct ovl_link_data *data = c;
@@ -182,7 +183,7 @@ static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
182 return; 183 return;
183 184
184 realinode = data->realdentry->d_inode; 185 realinode = data->realdentry->d_inode;
185 realinode->i_op->put_link(data->realdentry, nd, data->cookie); 186 realinode->i_op->put_link(realinode, data->cookie);
186 kfree(data); 187 kfree(data);
187} 188}
188 189
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 093ca14f5701..286a422f440e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1380,7 +1380,7 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
1380 return -ENOENT; 1380 return -ENOENT;
1381} 1381}
1382 1382
1383static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1383static const char *proc_pid_follow_link(struct dentry *dentry, void **cookie)
1384{ 1384{
1385 struct inode *inode = d_inode(dentry); 1385 struct inode *inode = d_inode(dentry);
1386 struct path path; 1386 struct path path;
@@ -1394,7 +1394,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1394 if (error) 1394 if (error)
1395 goto out; 1395 goto out;
1396 1396
1397 nd_jump_link(nd, &path); 1397 nd_jump_link(&path);
1398 return NULL; 1398 return NULL;
1399out: 1399out:
1400 return ERR_PTR(error); 1400 return ERR_PTR(error);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 8272aaba1bb0..afe232b9df6e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -23,7 +23,6 @@
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/magic.h> 25#include <linux/magic.h>
26#include <linux/namei.h>
27 26
28#include <asm/uaccess.h> 27#include <asm/uaccess.h>
29 28
@@ -394,16 +393,16 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
394}; 393};
395#endif 394#endif
396 395
397static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) 396static const char *proc_follow_link(struct dentry *dentry, void **cookie)
398{ 397{
399 struct proc_dir_entry *pde = PDE(d_inode(dentry)); 398 struct proc_dir_entry *pde = PDE(d_inode(dentry));
400 if (unlikely(!use_pde(pde))) 399 if (unlikely(!use_pde(pde)))
401 return ERR_PTR(-EINVAL); 400 return ERR_PTR(-EINVAL);
402 nd_set_link(nd, pde->data); 401 *cookie = pde;
403 return pde; 402 return pde->data;
404} 403}
405 404
406static void proc_put_link(struct dentry *dentry, struct nameidata *nd, void *p) 405static void proc_put_link(struct inode *unused, void *p)
407{ 406{
408 unuse_pde(p); 407 unuse_pde(p);
409} 408}
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index e512642dbbdc..f6e8354b8cea 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -30,7 +30,7 @@ static const struct proc_ns_operations *ns_entries[] = {
30 &mntns_operations, 30 &mntns_operations,
31}; 31};
32 32
33static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) 33static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie)
34{ 34{
35 struct inode *inode = d_inode(dentry); 35 struct inode *inode = d_inode(dentry);
36 const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; 36 const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
@@ -45,7 +45,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
45 if (ptrace_may_access(task, PTRACE_MODE_READ)) { 45 if (ptrace_may_access(task, PTRACE_MODE_READ)) {
46 error = ns_get_path(&ns_path, task, ns_ops); 46 error = ns_get_path(&ns_path, task, ns_ops);
47 if (!error) 47 if (!error)
48 nd_jump_link(nd, &ns_path); 48 nd_jump_link(&ns_path);
49 } 49 }
50 put_task_struct(task); 50 put_task_struct(task);
51 return error; 51 return error;
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 6195b4a7c3b1..113b8d061fc0 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -1,5 +1,4 @@
1#include <linux/sched.h> 1#include <linux/sched.h>
2#include <linux/namei.h>
3#include <linux/slab.h> 2#include <linux/slab.h>
4#include <linux/pid_namespace.h> 3#include <linux/pid_namespace.h>
5#include "internal.h" 4#include "internal.h"
@@ -19,21 +18,20 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
19 return readlink_copy(buffer, buflen, tmp); 18 return readlink_copy(buffer, buflen, tmp);
20} 19}
21 20
22static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 21static const char *proc_self_follow_link(struct dentry *dentry, void **cookie)
23{ 22{
24 struct pid_namespace *ns = dentry->d_sb->s_fs_info; 23 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
25 pid_t tgid = task_tgid_nr_ns(current, ns); 24 pid_t tgid = task_tgid_nr_ns(current, ns);
26 char *name = ERR_PTR(-ENOENT); 25 char *name;
27 if (tgid) { 26
28 /* 11 for max length of signed int in decimal + NULL term */ 27 if (!tgid)
29 name = kmalloc(12, GFP_KERNEL); 28 return ERR_PTR(-ENOENT);
30 if (!name) 29 /* 11 for max length of signed int in decimal + NULL term */
31 name = ERR_PTR(-ENOMEM); 30 name = kmalloc(12, GFP_KERNEL);
32 else 31 if (!name)
33 sprintf(name, "%d", tgid); 32 return ERR_PTR(-ENOMEM);
34 } 33 sprintf(name, "%d", tgid);
35 nd_set_link(nd, name); 34 return *cookie = name;
36 return NULL;
37} 35}
38 36
39static const struct inode_operations proc_self_inode_operations = { 37static const struct inode_operations proc_self_inode_operations = {
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index a8371993b4fb..947b0f4fd0a1 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -1,5 +1,4 @@
1#include <linux/sched.h> 1#include <linux/sched.h>
2#include <linux/namei.h>
3#include <linux/slab.h> 2#include <linux/slab.h>
4#include <linux/pid_namespace.h> 3#include <linux/pid_namespace.h>
5#include "internal.h" 4#include "internal.h"
@@ -20,21 +19,20 @@ static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
20 return readlink_copy(buffer, buflen, tmp); 19 return readlink_copy(buffer, buflen, tmp);
21} 20}
22 21
23static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd) 22static const char *proc_thread_self_follow_link(struct dentry *dentry, void **cookie)
24{ 23{
25 struct pid_namespace *ns = dentry->d_sb->s_fs_info; 24 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
26 pid_t tgid = task_tgid_nr_ns(current, ns); 25 pid_t tgid = task_tgid_nr_ns(current, ns);
27 pid_t pid = task_pid_nr_ns(current, ns); 26 pid_t pid = task_pid_nr_ns(current, ns);
28 char *name = ERR_PTR(-ENOENT); 27 char *name;
29 if (pid) { 28
30 name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL); 29 if (!pid)
31 if (!name) 30 return ERR_PTR(-ENOENT);
32 name = ERR_PTR(-ENOMEM); 31 name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
33 else 32 if (!name)
34 sprintf(name, "%d/task/%d", tgid, pid); 33 return ERR_PTR(-ENOMEM);
35 } 34 sprintf(name, "%d/task/%d", tgid, pid);
36 nd_set_link(nd, name); 35 return *cookie = name;
37 return NULL;
38} 36}
39 37
40static const struct inode_operations proc_thread_self_inode_operations = { 38static const struct inode_operations proc_thread_self_inode_operations = {
diff --git a/fs/select.c b/fs/select.c
index f684c750e08a..015547330e88 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -189,7 +189,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
189 * doesn't imply write barrier and the users expect write 189 * doesn't imply write barrier and the users expect write
190 * barrier semantics on wakeup functions. The following 190 * barrier semantics on wakeup functions. The following
191 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() 191 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
192 * and is paired with set_mb() in poll_schedule_timeout. 192 * and is paired with smp_store_mb() in poll_schedule_timeout.
193 */ 193 */
194 smp_wmb(); 194 smp_wmb();
195 pwq->triggered = 1; 195 pwq->triggered = 1;
@@ -244,7 +244,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
244 /* 244 /*
245 * Prepare for the next iteration. 245 * Prepare for the next iteration.
246 * 246 *
247 * The following set_mb() serves two purposes. First, it's 247 * The following smp_store_mb() serves two purposes. First, it's
248 * the counterpart rmb of the wmb in pollwake() such that data 248 * the counterpart rmb of the wmb in pollwake() such that data
249 * written before wake up is always visible after wake up. 249 * written before wake up is always visible after wake up.
250 * Second, the full barrier guarantees that triggered clearing 250 * Second, the full barrier guarantees that triggered clearing
@@ -252,7 +252,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
252 * this problem doesn't exist for the first iteration as 252 * this problem doesn't exist for the first iteration as
253 * add_wait_queue() has full barrier semantics. 253 * add_wait_queue() has full barrier semantics.
254 */ 254 */
255 set_mb(pwq->triggered, 0); 255 smp_store_mb(pwq->triggered, 0);
256 256
257 return rc; 257 return rc;
258} 258}
diff --git a/fs/sysv/Makefile b/fs/sysv/Makefile
index 3591f9d7a48a..7a75e70a4b61 100644
--- a/fs/sysv/Makefile
+++ b/fs/sysv/Makefile
@@ -5,4 +5,4 @@
5obj-$(CONFIG_SYSV_FS) += sysv.o 5obj-$(CONFIG_SYSV_FS) += sysv.o
6 6
7sysv-objs := ialloc.o balloc.o inode.o itree.o file.o dir.o \ 7sysv-objs := ialloc.o balloc.o inode.o itree.o file.o dir.o \
8 namei.o super.o symlink.o 8 namei.o super.o
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 88956309cc86..590ad9206e3f 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -166,8 +166,9 @@ void sysv_set_inode(struct inode *inode, dev_t rdev)
166 inode->i_op = &sysv_symlink_inode_operations; 166 inode->i_op = &sysv_symlink_inode_operations;
167 inode->i_mapping->a_ops = &sysv_aops; 167 inode->i_mapping->a_ops = &sysv_aops;
168 } else { 168 } else {
169 inode->i_op = &sysv_fast_symlink_inode_operations; 169 inode->i_op = &simple_symlink_inode_operations;
170 nd_terminate_link(SYSV_I(inode)->i_data, inode->i_size, 170 inode->i_link = (char *)SYSV_I(inode)->i_data;
171 nd_terminate_link(inode->i_link, inode->i_size,
171 sizeof(SYSV_I(inode)->i_data) - 1); 172 sizeof(SYSV_I(inode)->i_data) - 1);
172 } 173 }
173 } else 174 } else
diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c
deleted file mode 100644
index d3fa0d703314..000000000000
--- a/fs/sysv/symlink.c
+++ /dev/null
@@ -1,20 +0,0 @@
1/*
2 * linux/fs/sysv/symlink.c
3 *
4 * Handling of System V filesystem fast symlinks extensions.
5 * Aug 2001, Christoph Hellwig (hch@infradead.org)
6 */
7
8#include "sysv.h"
9#include <linux/namei.h>
10
11static void *sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
12{
13 nd_set_link(nd, (char *)SYSV_I(d_inode(dentry))->i_data);
14 return NULL;
15}
16
17const struct inode_operations sysv_fast_symlink_inode_operations = {
18 .readlink = generic_readlink,
19 .follow_link = sysv_follow_link,
20};
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 69d488986cce..2c13525131cd 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -161,7 +161,6 @@ extern ino_t sysv_inode_by_name(struct dentry *);
161 161
162extern const struct inode_operations sysv_file_inode_operations; 162extern const struct inode_operations sysv_file_inode_operations;
163extern const struct inode_operations sysv_dir_inode_operations; 163extern const struct inode_operations sysv_dir_inode_operations;
164extern const struct inode_operations sysv_fast_symlink_inode_operations;
165extern const struct file_operations sysv_file_operations; 164extern const struct file_operations sysv_file_operations;
166extern const struct file_operations sysv_dir_operations; 165extern const struct file_operations sysv_dir_operations;
167extern const struct address_space_operations sysv_aops; 166extern const struct address_space_operations sysv_aops;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 27060fc855d4..5c27c66c224a 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -889,6 +889,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
889 889
890 memcpy(ui->data, symname, len); 890 memcpy(ui->data, symname, len);
891 ((char *)ui->data)[len] = '\0'; 891 ((char *)ui->data)[len] = '\0';
892 inode->i_link = ui->data;
892 /* 893 /*
893 * The terminating zero byte is not written to the flash media and it 894 * The terminating zero byte is not written to the flash media and it
894 * is put just to make later in-memory string processing simpler. Thus, 895 * is put just to make later in-memory string processing simpler. Thus,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 35efc103c39c..a3dfe2ae79f2 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -51,7 +51,6 @@
51 51
52#include "ubifs.h" 52#include "ubifs.h"
53#include <linux/mount.h> 53#include <linux/mount.h>
54#include <linux/namei.h>
55#include <linux/slab.h> 54#include <linux/slab.h>
56 55
57static int read_block(struct inode *inode, void *addr, unsigned int block, 56static int read_block(struct inode *inode, void *addr, unsigned int block,
@@ -1300,14 +1299,6 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset,
1300 ClearPageChecked(page); 1299 ClearPageChecked(page);
1301} 1300}
1302 1301
1303static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
1304{
1305 struct ubifs_inode *ui = ubifs_inode(d_inode(dentry));
1306
1307 nd_set_link(nd, ui->data);
1308 return NULL;
1309}
1310
1311int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 1302int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1312{ 1303{
1313 struct inode *inode = file->f_mapping->host; 1304 struct inode *inode = file->f_mapping->host;
@@ -1570,7 +1561,7 @@ const struct inode_operations ubifs_file_inode_operations = {
1570 1561
1571const struct inode_operations ubifs_symlink_inode_operations = { 1562const struct inode_operations ubifs_symlink_inode_operations = {
1572 .readlink = generic_readlink, 1563 .readlink = generic_readlink,
1573 .follow_link = ubifs_follow_link, 1564 .follow_link = simple_follow_link,
1574 .setattr = ubifs_setattr, 1565 .setattr = ubifs_setattr,
1575 .getattr = ubifs_getattr, 1566 .getattr = ubifs_getattr,
1576 .setxattr = ubifs_setxattr, 1567 .setxattr = ubifs_setxattr,
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 75e6f04bb795..20f5dbd7c6a8 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -195,6 +195,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
195 } 195 }
196 memcpy(ui->data, ino->data, ui->data_len); 196 memcpy(ui->data, ino->data, ui->data_len);
197 ((char *)ui->data)[ui->data_len] = '\0'; 197 ((char *)ui->data)[ui->data_len] = '\0';
198 inode->i_link = ui->data;
198 break; 199 break;
199 case S_IFBLK: 200 case S_IFBLK:
200 case S_IFCHR: 201 case S_IFCHR:
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index be7d42c7d938..99aaf5c9bf4d 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -572,9 +572,10 @@ static void ufs_set_inode_ops(struct inode *inode)
572 inode->i_fop = &ufs_dir_operations; 572 inode->i_fop = &ufs_dir_operations;
573 inode->i_mapping->a_ops = &ufs_aops; 573 inode->i_mapping->a_ops = &ufs_aops;
574 } else if (S_ISLNK(inode->i_mode)) { 574 } else if (S_ISLNK(inode->i_mode)) {
575 if (!inode->i_blocks) 575 if (!inode->i_blocks) {
576 inode->i_op = &ufs_fast_symlink_inode_operations; 576 inode->i_op = &ufs_fast_symlink_inode_operations;
577 else { 577 inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink;
578 } else {
578 inode->i_op = &ufs_symlink_inode_operations; 579 inode->i_op = &ufs_symlink_inode_operations;
579 inode->i_mapping->a_ops = &ufs_aops; 580 inode->i_mapping->a_ops = &ufs_aops;
580 } 581 }
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index e491a93a7e9a..f773deb1d2e3 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -144,7 +144,8 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
144 } else { 144 } else {
145 /* fast symlink */ 145 /* fast symlink */
146 inode->i_op = &ufs_fast_symlink_inode_operations; 146 inode->i_op = &ufs_fast_symlink_inode_operations;
147 memcpy(UFS_I(inode)->i_u1.i_symlink, symname, l); 147 inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink;
148 memcpy(inode->i_link, symname, l);
148 inode->i_size = l-1; 149 inode->i_size = l-1;
149 } 150 }
150 mark_inode_dirty(inode); 151 mark_inode_dirty(inode);
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c
index 5b537e2fdda3..874480bb43e9 100644
--- a/fs/ufs/symlink.c
+++ b/fs/ufs/symlink.c
@@ -25,23 +25,12 @@
25 * ext2 symlink handling code 25 * ext2 symlink handling code
26 */ 26 */
27 27
28#include <linux/fs.h>
29#include <linux/namei.h>
30
31#include "ufs_fs.h" 28#include "ufs_fs.h"
32#include "ufs.h" 29#include "ufs.h"
33 30
34
35static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
36{
37 struct ufs_inode_info *p = UFS_I(d_inode(dentry));
38 nd_set_link(nd, (char*)p->i_u1.i_symlink);
39 return NULL;
40}
41
42const struct inode_operations ufs_fast_symlink_inode_operations = { 31const struct inode_operations ufs_fast_symlink_inode_operations = {
43 .readlink = generic_readlink, 32 .readlink = generic_readlink,
44 .follow_link = ufs_follow_link, 33 .follow_link = simple_follow_link,
45 .setattr = ufs_setattr, 34 .setattr = ufs_setattr,
46}; 35};
47 36
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index f4cd7204e236..7f51f39f8acc 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -41,7 +41,6 @@
41 41
42#include <linux/capability.h> 42#include <linux/capability.h>
43#include <linux/xattr.h> 43#include <linux/xattr.h>
44#include <linux/namei.h>
45#include <linux/posix_acl.h> 44#include <linux/posix_acl.h>
46#include <linux/security.h> 45#include <linux/security.h>
47#include <linux/fiemap.h> 46#include <linux/fiemap.h>
@@ -414,10 +413,10 @@ xfs_vn_rename(
414 * we need to be very careful about how much stack we use. 413 * we need to be very careful about how much stack we use.
415 * uio is kmalloced for this reason... 414 * uio is kmalloced for this reason...
416 */ 415 */
417STATIC void * 416STATIC const char *
418xfs_vn_follow_link( 417xfs_vn_follow_link(
419 struct dentry *dentry, 418 struct dentry *dentry,
420 struct nameidata *nd) 419 void **cookie)
421{ 420{
422 char *link; 421 char *link;
423 int error = -ENOMEM; 422 int error = -ENOMEM;
@@ -430,14 +429,12 @@ xfs_vn_follow_link(
430 if (unlikely(error)) 429 if (unlikely(error))
431 goto out_kfree; 430 goto out_kfree;
432 431
433 nd_set_link(nd, link); 432 return *cookie = link;
434 return NULL;
435 433
436 out_kfree: 434 out_kfree:
437 kfree(link); 435 kfree(link);
438 out_err: 436 out_err:
439 nd_set_link(nd, ERR_PTR(error)); 437 return ERR_PTR(error);
440 return NULL;
441} 438}
442 439
443STATIC int 440STATIC int
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index f5c40b0fadc2..e6a83d712ef6 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -66,8 +66,8 @@
66#define smp_read_barrier_depends() do { } while (0) 66#define smp_read_barrier_depends() do { } while (0)
67#endif 67#endif
68 68
69#ifndef set_mb 69#ifndef smp_store_mb
70#define set_mb(var, value) do { (var) = (value); mb(); } while (0) 70#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
71#endif 71#endif
72 72
73#ifndef smp_mb__before_atomic 73#ifndef smp_mb__before_atomic
diff --git a/include/asm-generic/cmpxchg.h b/include/asm-generic/cmpxchg.h
index 811fb1e9b061..3766ab34aa45 100644
--- a/include/asm-generic/cmpxchg.h
+++ b/include/asm-generic/cmpxchg.h
@@ -86,9 +86,6 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
86 86
87/* 87/*
88 * Atomic compare and exchange. 88 * Atomic compare and exchange.
89 *
90 * Do not define __HAVE_ARCH_CMPXCHG because we want to use it to check whether
91 * a cmpxchg primitive faster than repeated local irq save/restore exists.
92 */ 89 */
93#include <asm-generic/cmpxchg-local.h> 90#include <asm-generic/cmpxchg-local.h>
94 91
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
new file mode 100644
index 000000000000..83bfb87f5bf1
--- /dev/null
+++ b/include/asm-generic/qspinlock.h
@@ -0,0 +1,139 @@
1/*
2 * Queued spinlock
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
15 *
16 * Authors: Waiman Long <waiman.long@hp.com>
17 */
18#ifndef __ASM_GENERIC_QSPINLOCK_H
19#define __ASM_GENERIC_QSPINLOCK_H
20
21#include <asm-generic/qspinlock_types.h>
22
23/**
24 * queued_spin_is_locked - is the spinlock locked?
25 * @lock: Pointer to queued spinlock structure
26 * Return: 1 if it is locked, 0 otherwise
27 */
28static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
29{
30 return atomic_read(&lock->val);
31}
32
33/**
34 * queued_spin_value_unlocked - is the spinlock structure unlocked?
35 * @lock: queued spinlock structure
36 * Return: 1 if it is unlocked, 0 otherwise
37 *
38 * N.B. Whenever there are tasks waiting for the lock, it is considered
39 * locked wrt the lockref code to avoid lock stealing by the lockref
40 * code and change things underneath the lock. This also allows some
41 * optimizations to be applied without conflict with lockref.
42 */
43static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
44{
45 return !atomic_read(&lock.val);
46}
47
48/**
49 * queued_spin_is_contended - check if the lock is contended
50 * @lock : Pointer to queued spinlock structure
51 * Return: 1 if lock contended, 0 otherwise
52 */
53static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
54{
55 return atomic_read(&lock->val) & ~_Q_LOCKED_MASK;
56}
57/**
58 * queued_spin_trylock - try to acquire the queued spinlock
59 * @lock : Pointer to queued spinlock structure
60 * Return: 1 if lock acquired, 0 if failed
61 */
62static __always_inline int queued_spin_trylock(struct qspinlock *lock)
63{
64 if (!atomic_read(&lock->val) &&
65 (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0))
66 return 1;
67 return 0;
68}
69
70extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
71
72/**
73 * queued_spin_lock - acquire a queued spinlock
74 * @lock: Pointer to queued spinlock structure
75 */
76static __always_inline void queued_spin_lock(struct qspinlock *lock)
77{
78 u32 val;
79
80 val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL);
81 if (likely(val == 0))
82 return;
83 queued_spin_lock_slowpath(lock, val);
84}
85
86#ifndef queued_spin_unlock
87/**
88 * queued_spin_unlock - release a queued spinlock
89 * @lock : Pointer to queued spinlock structure
90 */
91static __always_inline void queued_spin_unlock(struct qspinlock *lock)
92{
93 /*
94 * smp_mb__before_atomic() in order to guarantee release semantics
95 */
96 smp_mb__before_atomic_dec();
97 atomic_sub(_Q_LOCKED_VAL, &lock->val);
98}
99#endif
100
101/**
102 * queued_spin_unlock_wait - wait until current lock holder releases the lock
103 * @lock : Pointer to queued spinlock structure
104 *
105 * There is a very slight possibility of live-lock if the lockers keep coming
106 * and the waiter is just unfortunate enough to not see any unlock state.
107 */
108static inline void queued_spin_unlock_wait(struct qspinlock *lock)
109{
110 while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
111 cpu_relax();
112}
113
114#ifndef virt_queued_spin_lock
115static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock)
116{
117 return false;
118}
119#endif
120
121/*
122 * Initializier
123 */
124#define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) }
125
126/*
127 * Remapping spinlock architecture specific functions to the corresponding
128 * queued spinlock functions.
129 */
130#define arch_spin_is_locked(l) queued_spin_is_locked(l)
131#define arch_spin_is_contended(l) queued_spin_is_contended(l)
132#define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l)
133#define arch_spin_lock(l) queued_spin_lock(l)
134#define arch_spin_trylock(l) queued_spin_trylock(l)
135#define arch_spin_unlock(l) queued_spin_unlock(l)
136#define arch_spin_lock_flags(l, f) queued_spin_lock(l)
137#define arch_spin_unlock_wait(l) queued_spin_unlock_wait(l)
138
139#endif /* __ASM_GENERIC_QSPINLOCK_H */
diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h
new file mode 100644
index 000000000000..85f888e86761
--- /dev/null
+++ b/include/asm-generic/qspinlock_types.h
@@ -0,0 +1,79 @@
1/*
2 * Queued spinlock
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
15 *
16 * Authors: Waiman Long <waiman.long@hp.com>
17 */
18#ifndef __ASM_GENERIC_QSPINLOCK_TYPES_H
19#define __ASM_GENERIC_QSPINLOCK_TYPES_H
20
21/*
22 * Including atomic.h with PARAVIRT on will cause compilation errors because
23 * of recursive header file incluson via paravirt_types.h. So don't include
24 * it if PARAVIRT is on.
25 */
26#ifndef CONFIG_PARAVIRT
27#include <linux/types.h>
28#include <linux/atomic.h>
29#endif
30
31typedef struct qspinlock {
32 atomic_t val;
33} arch_spinlock_t;
34
35/*
36 * Bitfields in the atomic value:
37 *
38 * When NR_CPUS < 16K
39 * 0- 7: locked byte
40 * 8: pending
41 * 9-15: not used
42 * 16-17: tail index
43 * 18-31: tail cpu (+1)
44 *
45 * When NR_CPUS >= 16K
46 * 0- 7: locked byte
47 * 8: pending
48 * 9-10: tail index
49 * 11-31: tail cpu (+1)
50 */
51#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\
52 << _Q_ ## type ## _OFFSET)
53#define _Q_LOCKED_OFFSET 0
54#define _Q_LOCKED_BITS 8
55#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED)
56
57#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
58#if CONFIG_NR_CPUS < (1U << 14)
59#define _Q_PENDING_BITS 8
60#else
61#define _Q_PENDING_BITS 1
62#endif
63#define _Q_PENDING_MASK _Q_SET_MASK(PENDING)
64
65#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS)
66#define _Q_TAIL_IDX_BITS 2
67#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX)
68
69#define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS)
70#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET)
71#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU)
72
73#define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET
74#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
75
76#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
77#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET)
78
79#endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index aff923ae8c4b..d87d8eced064 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -116,7 +116,6 @@ __printf(3, 4)
116int bdi_register(struct backing_dev_info *bdi, struct device *parent, 116int bdi_register(struct backing_dev_info *bdi, struct device *parent,
117 const char *fmt, ...); 117 const char *fmt, ...);
118int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); 118int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
119void bdi_unregister(struct backing_dev_info *bdi);
120int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); 119int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
121void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, 120void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
122 enum wb_reason reason); 121 enum wb_reason reason);
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 867722591be2..05be2352fef8 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -250,7 +250,23 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
250 ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) 250 ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
251 251
252#define WRITE_ONCE(x, val) \ 252#define WRITE_ONCE(x, val) \
253 ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; }) 253 ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
254
255/**
256 * READ_ONCE_CTRL - Read a value heading a control dependency
257 * @x: The value to be read, heading the control dependency
258 *
259 * Control dependencies are tricky. See Documentation/memory-barriers.txt
260 * for important information on how to use them. Note that in many cases,
261 * use of smp_load_acquire() will be much simpler. Control dependencies
262 * should be avoided except on the hottest of hotpaths.
263 */
264#define READ_ONCE_CTRL(x) \
265({ \
266 typeof(x) __val = READ_ONCE(x); \
267 smp_read_barrier_depends(); /* Enforce control dependency. */ \
268 __val; \
269})
254 270
255#endif /* __KERNEL__ */ 271#endif /* __KERNEL__ */
256 272
@@ -450,7 +466,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
450 * with an explicit memory barrier or atomic instruction that provides the 466 * with an explicit memory barrier or atomic instruction that provides the
451 * required ordering. 467 * required ordering.
452 * 468 *
453 * If possible use READ_ONCE/ASSIGN_ONCE instead. 469 * If possible use READ_ONCE()/WRITE_ONCE() instead.
454 */ 470 */
455#define __ACCESS_ONCE(x) ({ \ 471#define __ACCESS_ONCE(x) ({ \
456 __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \ 472 __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index cb25af461054..420311bcee38 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -45,7 +45,6 @@ extern struct dentry *arch_debugfs_dir;
45 45
46/* declared over in file.c */ 46/* declared over in file.c */
47extern const struct file_operations debugfs_file_operations; 47extern const struct file_operations debugfs_file_operations;
48extern const struct inode_operations debugfs_link_operations;
49 48
50struct dentry *debugfs_create_file(const char *name, umode_t mode, 49struct dentry *debugfs_create_file(const char *name, umode_t mode,
51 struct dentry *parent, void *data, 50 struct dentry *parent, void *data,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35ec87e490b1..b577e801b4af 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -38,7 +38,6 @@ struct backing_dev_info;
38struct export_operations; 38struct export_operations;
39struct hd_geometry; 39struct hd_geometry;
40struct iovec; 40struct iovec;
41struct nameidata;
42struct kiocb; 41struct kiocb;
43struct kobject; 42struct kobject;
44struct pipe_inode_info; 43struct pipe_inode_info;
@@ -656,6 +655,7 @@ struct inode {
656 struct pipe_inode_info *i_pipe; 655 struct pipe_inode_info *i_pipe;
657 struct block_device *i_bdev; 656 struct block_device *i_bdev;
658 struct cdev *i_cdev; 657 struct cdev *i_cdev;
658 char *i_link;
659 }; 659 };
660 660
661 __u32 i_generation; 661 __u32 i_generation;
@@ -1607,12 +1607,12 @@ struct file_operations {
1607 1607
1608struct inode_operations { 1608struct inode_operations {
1609 struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); 1609 struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
1610 void * (*follow_link) (struct dentry *, struct nameidata *); 1610 const char * (*follow_link) (struct dentry *, void **);
1611 int (*permission) (struct inode *, int); 1611 int (*permission) (struct inode *, int);
1612 struct posix_acl * (*get_acl)(struct inode *, int); 1612 struct posix_acl * (*get_acl)(struct inode *, int);
1613 1613
1614 int (*readlink) (struct dentry *, char __user *,int); 1614 int (*readlink) (struct dentry *, char __user *,int);
1615 void (*put_link) (struct dentry *, struct nameidata *, void *); 1615 void (*put_link) (struct inode *, void *);
1616 1616
1617 int (*create) (struct inode *,struct dentry *, umode_t, bool); 1617 int (*create) (struct inode *,struct dentry *, umode_t, bool);
1618 int (*link) (struct dentry *,struct inode *,struct dentry *); 1618 int (*link) (struct dentry *,struct inode *,struct dentry *);
@@ -1879,6 +1879,7 @@ enum file_time_flags {
1879 S_VERSION = 8, 1879 S_VERSION = 8,
1880}; 1880};
1881 1881
1882extern bool atime_needs_update(const struct path *, struct inode *);
1882extern void touch_atime(const struct path *); 1883extern void touch_atime(const struct path *);
1883static inline void file_accessed(struct file *file) 1884static inline void file_accessed(struct file *file)
1884{ 1885{
@@ -2704,13 +2705,14 @@ extern const struct file_operations generic_ro_fops;
2704 2705
2705extern int readlink_copy(char __user *, int, const char *); 2706extern int readlink_copy(char __user *, int, const char *);
2706extern int page_readlink(struct dentry *, char __user *, int); 2707extern int page_readlink(struct dentry *, char __user *, int);
2707extern void *page_follow_link_light(struct dentry *, struct nameidata *); 2708extern const char *page_follow_link_light(struct dentry *, void **);
2708extern void page_put_link(struct dentry *, struct nameidata *, void *); 2709extern void page_put_link(struct inode *, void *);
2709extern int __page_symlink(struct inode *inode, const char *symname, int len, 2710extern int __page_symlink(struct inode *inode, const char *symname, int len,
2710 int nofs); 2711 int nofs);
2711extern int page_symlink(struct inode *inode, const char *symname, int len); 2712extern int page_symlink(struct inode *inode, const char *symname, int len);
2712extern const struct inode_operations page_symlink_inode_operations; 2713extern const struct inode_operations page_symlink_inode_operations;
2713extern void kfree_put_link(struct dentry *, struct nameidata *, void *); 2714extern void kfree_put_link(struct inode *, void *);
2715extern void free_page_put_link(struct inode *, void *);
2714extern int generic_readlink(struct dentry *, char __user *, int); 2716extern int generic_readlink(struct dentry *, char __user *, int);
2715extern void generic_fillattr(struct inode *, struct kstat *); 2717extern void generic_fillattr(struct inode *, struct kstat *);
2716int vfs_getattr_nosec(struct path *path, struct kstat *stat); 2718int vfs_getattr_nosec(struct path *path, struct kstat *stat);
@@ -2721,6 +2723,8 @@ void __inode_sub_bytes(struct inode *inode, loff_t bytes);
2721void inode_sub_bytes(struct inode *inode, loff_t bytes); 2723void inode_sub_bytes(struct inode *inode, loff_t bytes);
2722loff_t inode_get_bytes(struct inode *inode); 2724loff_t inode_get_bytes(struct inode *inode);
2723void inode_set_bytes(struct inode *inode, loff_t bytes); 2725void inode_set_bytes(struct inode *inode, loff_t bytes);
2726const char *simple_follow_link(struct dentry *, void **);
2727extern const struct inode_operations simple_symlink_inode_operations;
2724 2728
2725extern int iterate_dir(struct file *, struct dir_context *); 2729extern int iterate_dir(struct file *, struct dir_context *);
2726 2730
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 796ef9645827..a240e61a7700 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -115,13 +115,14 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
115 * Extended Capability Register 115 * Extended Capability Register
116 */ 116 */
117 117
118#define ecap_pasid(e) ((e >> 40) & 0x1)
118#define ecap_pss(e) ((e >> 35) & 0x1f) 119#define ecap_pss(e) ((e >> 35) & 0x1f)
119#define ecap_eafs(e) ((e >> 34) & 0x1) 120#define ecap_eafs(e) ((e >> 34) & 0x1)
120#define ecap_nwfs(e) ((e >> 33) & 0x1) 121#define ecap_nwfs(e) ((e >> 33) & 0x1)
121#define ecap_srs(e) ((e >> 31) & 0x1) 122#define ecap_srs(e) ((e >> 31) & 0x1)
122#define ecap_ers(e) ((e >> 30) & 0x1) 123#define ecap_ers(e) ((e >> 30) & 0x1)
123#define ecap_prs(e) ((e >> 29) & 0x1) 124#define ecap_prs(e) ((e >> 29) & 0x1)
124#define ecap_pasid(e) ((e >> 28) & 0x1) 125/* PASID support used to be on bit 28 */
125#define ecap_dis(e) ((e >> 27) & 0x1) 126#define ecap_dis(e) ((e >> 27) & 0x1)
126#define ecap_nest(e) ((e >> 26) & 0x1) 127#define ecap_nest(e) ((e >> 26) & 0x1)
127#define ecap_mts(e) ((e >> 25) & 0x1) 128#define ecap_mts(e) ((e >> 25) & 0x1)
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 066ba4157541..2722111591a3 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -130,8 +130,8 @@ enum bounce_type {
130}; 130};
131 131
132struct lock_class_stats { 132struct lock_class_stats {
133 unsigned long contention_point[4]; 133 unsigned long contention_point[LOCKSTAT_POINTS];
134 unsigned long contending_point[4]; 134 unsigned long contending_point[LOCKSTAT_POINTS];
135 struct lock_time read_waittime; 135 struct lock_time read_waittime;
136 struct lock_time write_waittime; 136 struct lock_time write_waittime;
137 struct lock_time read_holdtime; 137 struct lock_time read_holdtime;
diff --git a/include/linux/namei.h b/include/linux/namei.h
index c8990779f0c3..d8c6334cd150 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -1,16 +1,15 @@
1#ifndef _LINUX_NAMEI_H 1#ifndef _LINUX_NAMEI_H
2#define _LINUX_NAMEI_H 2#define _LINUX_NAMEI_H
3 3
4#include <linux/dcache.h> 4#include <linux/kernel.h>
5#include <linux/errno.h>
6#include <linux/linkage.h>
7#include <linux/path.h> 5#include <linux/path.h>
8 6#include <linux/fcntl.h>
9struct vfsmount; 7#include <linux/errno.h>
10struct nameidata;
11 8
12enum { MAX_NESTED_LINKS = 8 }; 9enum { MAX_NESTED_LINKS = 8 };
13 10
11#define MAXSYMLINKS 40
12
14/* 13/*
15 * Type of the last component on LOOKUP_PARENT 14 * Type of the last component on LOOKUP_PARENT
16 */ 15 */
@@ -45,13 +44,29 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
45#define LOOKUP_ROOT 0x2000 44#define LOOKUP_ROOT 0x2000
46#define LOOKUP_EMPTY 0x4000 45#define LOOKUP_EMPTY 0x4000
47 46
48extern int user_path_at(int, const char __user *, unsigned, struct path *);
49extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); 47extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty);
50 48
51#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path) 49static inline int user_path_at(int dfd, const char __user *name, unsigned flags,
52#define user_lpath(name, path) user_path_at(AT_FDCWD, name, 0, path) 50 struct path *path)
53#define user_path_dir(name, path) \ 51{
54 user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path) 52 return user_path_at_empty(dfd, name, flags, path, NULL);
53}
54
55static inline int user_path(const char __user *name, struct path *path)
56{
57 return user_path_at_empty(AT_FDCWD, name, LOOKUP_FOLLOW, path, NULL);
58}
59
60static inline int user_lpath(const char __user *name, struct path *path)
61{
62 return user_path_at_empty(AT_FDCWD, name, 0, path, NULL);
63}
64
65static inline int user_path_dir(const char __user *name, struct path *path)
66{
67 return user_path_at_empty(AT_FDCWD, name,
68 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path, NULL);
69}
55 70
56extern int kern_path(const char *, unsigned, struct path *); 71extern int kern_path(const char *, unsigned, struct path *);
57 72
@@ -70,9 +85,7 @@ extern int follow_up(struct path *);
70extern struct dentry *lock_rename(struct dentry *, struct dentry *); 85extern struct dentry *lock_rename(struct dentry *, struct dentry *);
71extern void unlock_rename(struct dentry *, struct dentry *); 86extern void unlock_rename(struct dentry *, struct dentry *);
72 87
73extern void nd_jump_link(struct nameidata *nd, struct path *path); 88extern void nd_jump_link(struct path *path);
74extern void nd_set_link(struct nameidata *nd, char *path);
75extern char *nd_get_link(struct nameidata *nd);
76 89
77static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) 90static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
78{ 91{
diff --git a/include/linux/of.h b/include/linux/of.h
index ddeaae6d2083..b871ff9d81d7 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -121,6 +121,8 @@ extern struct device_node *of_stdout;
121extern raw_spinlock_t devtree_lock; 121extern raw_spinlock_t devtree_lock;
122 122
123#ifdef CONFIG_OF 123#ifdef CONFIG_OF
124void of_core_init(void);
125
124static inline bool is_of_node(struct fwnode_handle *fwnode) 126static inline bool is_of_node(struct fwnode_handle *fwnode)
125{ 127{
126 return fwnode && fwnode->type == FWNODE_OF; 128 return fwnode && fwnode->type == FWNODE_OF;
@@ -376,6 +378,10 @@ bool of_console_check(struct device_node *dn, char *name, int index);
376 378
377#else /* CONFIG_OF */ 379#else /* CONFIG_OF */
378 380
381static inline void of_core_init(void)
382{
383}
384
379static inline bool is_of_node(struct fwnode_handle *fwnode) 385static inline bool is_of_node(struct fwnode_handle *fwnode)
380{ 386{
381 return false; 387 return false;
diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h
index 3a6490e81b28..703ea5c30a33 100644
--- a/include/linux/osq_lock.h
+++ b/include/linux/osq_lock.h
@@ -32,4 +32,9 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock)
32extern bool osq_lock(struct optimistic_spin_queue *lock); 32extern bool osq_lock(struct optimistic_spin_queue *lock);
33extern void osq_unlock(struct optimistic_spin_queue *lock); 33extern void osq_unlock(struct optimistic_spin_queue *lock);
34 34
35static inline bool osq_is_locked(struct optimistic_spin_queue *lock)
36{
37 return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL;
38}
39
35#endif 40#endif
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 61992cf2e977..a204d5266f5f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -92,8 +92,6 @@ struct hw_perf_event_extra {
92 int idx; /* index in shared_regs->regs[] */ 92 int idx; /* index in shared_regs->regs[] */
93}; 93};
94 94
95struct event_constraint;
96
97/** 95/**
98 * struct hw_perf_event - performance event hardware details: 96 * struct hw_perf_event - performance event hardware details:
99 */ 97 */
@@ -112,8 +110,6 @@ struct hw_perf_event {
112 110
113 struct hw_perf_event_extra extra_reg; 111 struct hw_perf_event_extra extra_reg;
114 struct hw_perf_event_extra branch_reg; 112 struct hw_perf_event_extra branch_reg;
115
116 struct event_constraint *constraint;
117 }; 113 };
118 struct { /* software */ 114 struct { /* software */
119 struct hrtimer hrtimer; 115 struct hrtimer hrtimer;
@@ -124,7 +120,7 @@ struct hw_perf_event {
124 }; 120 };
125 struct { /* intel_cqm */ 121 struct { /* intel_cqm */
126 int cqm_state; 122 int cqm_state;
127 int cqm_rmid; 123 u32 cqm_rmid;
128 struct list_head cqm_events_entry; 124 struct list_head cqm_events_entry;
129 struct list_head cqm_groups_entry; 125 struct list_head cqm_groups_entry;
130 struct list_head cqm_group_entry; 126 struct list_head cqm_group_entry;
@@ -734,6 +730,22 @@ extern int perf_event_overflow(struct perf_event *event,
734 struct perf_sample_data *data, 730 struct perf_sample_data *data,
735 struct pt_regs *regs); 731 struct pt_regs *regs);
736 732
733extern void perf_event_output(struct perf_event *event,
734 struct perf_sample_data *data,
735 struct pt_regs *regs);
736
737extern void
738perf_event_header__init_id(struct perf_event_header *header,
739 struct perf_sample_data *data,
740 struct perf_event *event);
741extern void
742perf_event__output_id_sample(struct perf_event *event,
743 struct perf_output_handle *handle,
744 struct perf_sample_data *sample);
745
746extern void
747perf_log_lost_samples(struct perf_event *event, u64 lost);
748
737static inline bool is_sampling_event(struct perf_event *event) 749static inline bool is_sampling_event(struct perf_event *event)
738{ 750{
739 return event->attr.sample_period != 0; 751 return event->attr.sample_period != 0;
@@ -798,11 +810,33 @@ perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
798 810
799extern struct static_key_deferred perf_sched_events; 811extern struct static_key_deferred perf_sched_events;
800 812
813static __always_inline bool
814perf_sw_migrate_enabled(void)
815{
816 if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
817 return true;
818 return false;
819}
820
821static inline void perf_event_task_migrate(struct task_struct *task)
822{
823 if (perf_sw_migrate_enabled())
824 task->sched_migrated = 1;
825}
826
801static inline void perf_event_task_sched_in(struct task_struct *prev, 827static inline void perf_event_task_sched_in(struct task_struct *prev,
802 struct task_struct *task) 828 struct task_struct *task)
803{ 829{
804 if (static_key_false(&perf_sched_events.key)) 830 if (static_key_false(&perf_sched_events.key))
805 __perf_event_task_sched_in(prev, task); 831 __perf_event_task_sched_in(prev, task);
832
833 if (perf_sw_migrate_enabled() && task->sched_migrated) {
834 struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
835
836 perf_fetch_caller_regs(regs);
837 ___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0);
838 task->sched_migrated = 0;
839 }
806} 840}
807 841
808static inline void perf_event_task_sched_out(struct task_struct *prev, 842static inline void perf_event_task_sched_out(struct task_struct *prev,
@@ -925,6 +959,8 @@ perf_aux_output_skip(struct perf_output_handle *handle,
925static inline void * 959static inline void *
926perf_get_aux(struct perf_output_handle *handle) { return NULL; } 960perf_get_aux(struct perf_output_handle *handle) { return NULL; }
927static inline void 961static inline void
962perf_event_task_migrate(struct task_struct *task) { }
963static inline void
928perf_event_task_sched_in(struct task_struct *prev, 964perf_event_task_sched_in(struct task_struct *prev,
929 struct task_struct *task) { } 965 struct task_struct *task) { }
930static inline void 966static inline void
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index a18b16f1dc0e..17c6b1f84a77 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -29,8 +29,8 @@
29 */ 29 */
30static inline void INIT_LIST_HEAD_RCU(struct list_head *list) 30static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
31{ 31{
32 ACCESS_ONCE(list->next) = list; 32 WRITE_ONCE(list->next, list);
33 ACCESS_ONCE(list->prev) = list; 33 WRITE_ONCE(list->prev, list);
34} 34}
35 35
36/* 36/*
@@ -288,7 +288,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
288#define list_first_or_null_rcu(ptr, type, member) \ 288#define list_first_or_null_rcu(ptr, type, member) \
289({ \ 289({ \
290 struct list_head *__ptr = (ptr); \ 290 struct list_head *__ptr = (ptr); \
291 struct list_head *__next = ACCESS_ONCE(__ptr->next); \ 291 struct list_head *__next = READ_ONCE(__ptr->next); \
292 likely(__ptr != __next) ? list_entry_rcu(__next, type, member) : NULL; \ 292 likely(__ptr != __next) ? list_entry_rcu(__next, type, member) : NULL; \
293}) 293})
294 294
@@ -549,8 +549,8 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
549 */ 549 */
550#define hlist_for_each_entry_from_rcu(pos, member) \ 550#define hlist_for_each_entry_from_rcu(pos, member) \
551 for (; pos; \ 551 for (; pos; \
552 pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ 552 pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \
553 typeof(*(pos)), member)) 553 &(pos)->member)), typeof(*(pos)), member))
554 554
555#endif /* __KERNEL__ */ 555#endif /* __KERNEL__ */
556#endif 556#endif
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 573a5afd5ed8..03a899aabd17 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -292,10 +292,6 @@ void rcu_sched_qs(void);
292void rcu_bh_qs(void); 292void rcu_bh_qs(void);
293void rcu_check_callbacks(int user); 293void rcu_check_callbacks(int user);
294struct notifier_block; 294struct notifier_block;
295void rcu_idle_enter(void);
296void rcu_idle_exit(void);
297void rcu_irq_enter(void);
298void rcu_irq_exit(void);
299int rcu_cpu_notify(struct notifier_block *self, 295int rcu_cpu_notify(struct notifier_block *self,
300 unsigned long action, void *hcpu); 296 unsigned long action, void *hcpu);
301 297
@@ -364,8 +360,8 @@ extern struct srcu_struct tasks_rcu_exit_srcu;
364#define rcu_note_voluntary_context_switch(t) \ 360#define rcu_note_voluntary_context_switch(t) \
365 do { \ 361 do { \
366 rcu_all_qs(); \ 362 rcu_all_qs(); \
367 if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \ 363 if (READ_ONCE((t)->rcu_tasks_holdout)) \
368 ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \ 364 WRITE_ONCE((t)->rcu_tasks_holdout, false); \
369 } while (0) 365 } while (0)
370#else /* #ifdef CONFIG_TASKS_RCU */ 366#else /* #ifdef CONFIG_TASKS_RCU */
371#define TASKS_RCU(x) do { } while (0) 367#define TASKS_RCU(x) do { } while (0)
@@ -609,7 +605,7 @@ static inline void rcu_preempt_sleep_check(void)
609 605
610#define __rcu_access_pointer(p, space) \ 606#define __rcu_access_pointer(p, space) \
611({ \ 607({ \
612 typeof(*p) *_________p1 = (typeof(*p) *__force)ACCESS_ONCE(p); \ 608 typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \
613 rcu_dereference_sparse(p, space); \ 609 rcu_dereference_sparse(p, space); \
614 ((typeof(*p) __force __kernel *)(_________p1)); \ 610 ((typeof(*p) __force __kernel *)(_________p1)); \
615}) 611})
@@ -628,21 +624,6 @@ static inline void rcu_preempt_sleep_check(void)
628 ((typeof(*p) __force __kernel *)(p)); \ 624 ((typeof(*p) __force __kernel *)(p)); \
629}) 625})
630 626
631#define __rcu_access_index(p, space) \
632({ \
633 typeof(p) _________p1 = ACCESS_ONCE(p); \
634 rcu_dereference_sparse(p, space); \
635 (_________p1); \
636})
637#define __rcu_dereference_index_check(p, c) \
638({ \
639 /* Dependency order vs. p above. */ \
640 typeof(p) _________p1 = lockless_dereference(p); \
641 rcu_lockdep_assert(c, \
642 "suspicious rcu_dereference_index_check() usage"); \
643 (_________p1); \
644})
645
646/** 627/**
647 * RCU_INITIALIZER() - statically initialize an RCU-protected global variable 628 * RCU_INITIALIZER() - statically initialize an RCU-protected global variable
648 * @v: The value to statically initialize with. 629 * @v: The value to statically initialize with.
@@ -659,7 +640,7 @@ static inline void rcu_preempt_sleep_check(void)
659 */ 640 */
660#define lockless_dereference(p) \ 641#define lockless_dereference(p) \
661({ \ 642({ \
662 typeof(p) _________p1 = ACCESS_ONCE(p); \ 643 typeof(p) _________p1 = READ_ONCE(p); \
663 smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ 644 smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
664 (_________p1); \ 645 (_________p1); \
665}) 646})
@@ -702,7 +683,7 @@ static inline void rcu_preempt_sleep_check(void)
702 * @p: The pointer to read 683 * @p: The pointer to read
703 * 684 *
704 * Return the value of the specified RCU-protected pointer, but omit the 685 * Return the value of the specified RCU-protected pointer, but omit the
705 * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful 686 * smp_read_barrier_depends() and keep the READ_ONCE(). This is useful
706 * when the value of this pointer is accessed, but the pointer is not 687 * when the value of this pointer is accessed, but the pointer is not
707 * dereferenced, for example, when testing an RCU-protected pointer against 688 * dereferenced, for example, when testing an RCU-protected pointer against
708 * NULL. Although rcu_access_pointer() may also be used in cases where 689 * NULL. Although rcu_access_pointer() may also be used in cases where
@@ -787,47 +768,12 @@ static inline void rcu_preempt_sleep_check(void)
787#define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu) 768#define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu)
788 769
789/** 770/**
790 * rcu_access_index() - fetch RCU index with no dereferencing
791 * @p: The index to read
792 *
793 * Return the value of the specified RCU-protected index, but omit the
794 * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful
795 * when the value of this index is accessed, but the index is not
796 * dereferenced, for example, when testing an RCU-protected index against
797 * -1. Although rcu_access_index() may also be used in cases where
798 * update-side locks prevent the value of the index from changing, you
799 * should instead use rcu_dereference_index_protected() for this use case.
800 */
801#define rcu_access_index(p) __rcu_access_index((p), __rcu)
802
803/**
804 * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
805 * @p: The pointer to read, prior to dereferencing
806 * @c: The conditions under which the dereference will take place
807 *
808 * Similar to rcu_dereference_check(), but omits the sparse checking.
809 * This allows rcu_dereference_index_check() to be used on integers,
810 * which can then be used as array indices. Attempting to use
811 * rcu_dereference_check() on an integer will give compiler warnings
812 * because the sparse address-space mechanism relies on dereferencing
813 * the RCU-protected pointer. Dereferencing integers is not something
814 * that even gcc will put up with.
815 *
816 * Note that this function does not implicitly check for RCU read-side
817 * critical sections. If this function gains lots of uses, it might
818 * make sense to provide versions for each flavor of RCU, but it does
819 * not make sense as of early 2010.
820 */
821#define rcu_dereference_index_check(p, c) \
822 __rcu_dereference_index_check((p), (c))
823
824/**
825 * rcu_dereference_protected() - fetch RCU pointer when updates prevented 771 * rcu_dereference_protected() - fetch RCU pointer when updates prevented
826 * @p: The pointer to read, prior to dereferencing 772 * @p: The pointer to read, prior to dereferencing
827 * @c: The conditions under which the dereference will take place 773 * @c: The conditions under which the dereference will take place
828 * 774 *
829 * Return the value of the specified RCU-protected pointer, but omit 775 * Return the value of the specified RCU-protected pointer, but omit
830 * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This 776 * both the smp_read_barrier_depends() and the READ_ONCE(). This
831 * is useful in cases where update-side locks prevent the value of the 777 * is useful in cases where update-side locks prevent the value of the
832 * pointer from changing. Please note that this primitive does -not- 778 * pointer from changing. Please note that this primitive does -not-
833 * prevent the compiler from repeating this reference or combining it 779 * prevent the compiler from repeating this reference or combining it
@@ -1153,13 +1099,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
1153#define kfree_rcu(ptr, rcu_head) \ 1099#define kfree_rcu(ptr, rcu_head) \
1154 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) 1100 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
1155 1101
1156#if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) 1102#ifdef CONFIG_TINY_RCU
1157static inline int rcu_needs_cpu(unsigned long *delta_jiffies) 1103static inline int rcu_needs_cpu(unsigned long *delta_jiffies)
1158{ 1104{
1159 *delta_jiffies = ULONG_MAX; 1105 *delta_jiffies = ULONG_MAX;
1160 return 0; 1106 return 0;
1161} 1107}
1162#endif /* #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) */ 1108#endif /* #ifdef CONFIG_TINY_RCU */
1163 1109
1164#if defined(CONFIG_RCU_NOCB_CPU_ALL) 1110#if defined(CONFIG_RCU_NOCB_CPU_ALL)
1165static inline bool rcu_is_nocb_cpu(int cpu) { return true; } 1111static inline bool rcu_is_nocb_cpu(int cpu) { return true; }
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 937edaeb150d..3df6c1ec4e25 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -159,6 +159,22 @@ static inline void rcu_cpu_stall_reset(void)
159{ 159{
160} 160}
161 161
162static inline void rcu_idle_enter(void)
163{
164}
165
166static inline void rcu_idle_exit(void)
167{
168}
169
170static inline void rcu_irq_enter(void)
171{
172}
173
174static inline void rcu_irq_exit(void)
175{
176}
177
162static inline void exit_rcu(void) 178static inline void exit_rcu(void)
163{ 179{
164} 180}
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index d2e583a6aaca..3fa4a43ab415 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -31,9 +31,7 @@
31#define __LINUX_RCUTREE_H 31#define __LINUX_RCUTREE_H
32 32
33void rcu_note_context_switch(void); 33void rcu_note_context_switch(void);
34#ifndef CONFIG_RCU_NOCB_CPU_ALL
35int rcu_needs_cpu(unsigned long *delta_jiffies); 34int rcu_needs_cpu(unsigned long *delta_jiffies);
36#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
37void rcu_cpu_stall_reset(void); 35void rcu_cpu_stall_reset(void);
38 36
39/* 37/*
@@ -93,6 +91,11 @@ void rcu_force_quiescent_state(void);
93void rcu_bh_force_quiescent_state(void); 91void rcu_bh_force_quiescent_state(void);
94void rcu_sched_force_quiescent_state(void); 92void rcu_sched_force_quiescent_state(void);
95 93
94void rcu_idle_enter(void);
95void rcu_idle_exit(void);
96void rcu_irq_enter(void);
97void rcu_irq_exit(void);
98
96void exit_rcu(void); 99void exit_rcu(void);
97 100
98void rcu_scheduler_starting(void); 101void rcu_scheduler_starting(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7de815c6fa78..d4193d5613cf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -132,6 +132,7 @@ struct fs_struct;
132struct perf_event_context; 132struct perf_event_context;
133struct blk_plug; 133struct blk_plug;
134struct filename; 134struct filename;
135struct nameidata;
135 136
136#define VMACACHE_BITS 2 137#define VMACACHE_BITS 2
137#define VMACACHE_SIZE (1U << VMACACHE_BITS) 138#define VMACACHE_SIZE (1U << VMACACHE_BITS)
@@ -261,7 +262,7 @@ extern char ___assert_task_state[1 - 2*!!(
261#define set_task_state(tsk, state_value) \ 262#define set_task_state(tsk, state_value) \
262 do { \ 263 do { \
263 (tsk)->task_state_change = _THIS_IP_; \ 264 (tsk)->task_state_change = _THIS_IP_; \
264 set_mb((tsk)->state, (state_value)); \ 265 smp_store_mb((tsk)->state, (state_value)); \
265 } while (0) 266 } while (0)
266 267
267/* 268/*
@@ -283,7 +284,7 @@ extern char ___assert_task_state[1 - 2*!!(
283#define set_current_state(state_value) \ 284#define set_current_state(state_value) \
284 do { \ 285 do { \
285 current->task_state_change = _THIS_IP_; \ 286 current->task_state_change = _THIS_IP_; \
286 set_mb(current->state, (state_value)); \ 287 smp_store_mb(current->state, (state_value)); \
287 } while (0) 288 } while (0)
288 289
289#else 290#else
@@ -291,7 +292,7 @@ extern char ___assert_task_state[1 - 2*!!(
291#define __set_task_state(tsk, state_value) \ 292#define __set_task_state(tsk, state_value) \
292 do { (tsk)->state = (state_value); } while (0) 293 do { (tsk)->state = (state_value); } while (0)
293#define set_task_state(tsk, state_value) \ 294#define set_task_state(tsk, state_value) \
294 set_mb((tsk)->state, (state_value)) 295 smp_store_mb((tsk)->state, (state_value))
295 296
296/* 297/*
297 * set_current_state() includes a barrier so that the write of current->state 298 * set_current_state() includes a barrier so that the write of current->state
@@ -307,7 +308,7 @@ extern char ___assert_task_state[1 - 2*!!(
307#define __set_current_state(state_value) \ 308#define __set_current_state(state_value) \
308 do { current->state = (state_value); } while (0) 309 do { current->state = (state_value); } while (0)
309#define set_current_state(state_value) \ 310#define set_current_state(state_value) \
310 set_mb(current->state, (state_value)) 311 smp_store_mb(current->state, (state_value))
311 312
312#endif 313#endif
313 314
@@ -1422,9 +1423,6 @@ struct task_struct {
1422#endif 1423#endif
1423 1424
1424 struct mm_struct *mm, *active_mm; 1425 struct mm_struct *mm, *active_mm;
1425#ifdef CONFIG_COMPAT_BRK
1426 unsigned brk_randomized:1;
1427#endif
1428 /* per-thread vma caching */ 1426 /* per-thread vma caching */
1429 u32 vmacache_seqnum; 1427 u32 vmacache_seqnum;
1430 struct vm_area_struct *vmacache[VMACACHE_SIZE]; 1428 struct vm_area_struct *vmacache[VMACACHE_SIZE];
@@ -1447,10 +1445,14 @@ struct task_struct {
1447 /* Revert to default priority/policy when forking */ 1445 /* Revert to default priority/policy when forking */
1448 unsigned sched_reset_on_fork:1; 1446 unsigned sched_reset_on_fork:1;
1449 unsigned sched_contributes_to_load:1; 1447 unsigned sched_contributes_to_load:1;
1448 unsigned sched_migrated:1;
1450 1449
1451#ifdef CONFIG_MEMCG_KMEM 1450#ifdef CONFIG_MEMCG_KMEM
1452 unsigned memcg_kmem_skip_account:1; 1451 unsigned memcg_kmem_skip_account:1;
1453#endif 1452#endif
1453#ifdef CONFIG_COMPAT_BRK
1454 unsigned brk_randomized:1;
1455#endif
1454 1456
1455 unsigned long atomic_flags; /* Flags needing atomic access. */ 1457 unsigned long atomic_flags; /* Flags needing atomic access. */
1456 1458
@@ -1527,7 +1529,7 @@ struct task_struct {
1527 it with task_lock()) 1529 it with task_lock())
1528 - initialized normally by setup_new_exec */ 1530 - initialized normally by setup_new_exec */
1529/* file system info */ 1531/* file system info */
1530 int link_count, total_link_count; 1532 struct nameidata *nameidata;
1531#ifdef CONFIG_SYSVIPC 1533#ifdef CONFIG_SYSVIPC
1532/* ipc stuff */ 1534/* ipc stuff */
1533 struct sysv_sem sysvsem; 1535 struct sysv_sem sysvsem;
diff --git a/include/linux/security.h b/include/linux/security.h
index 18264ea9e314..52febde52479 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -43,7 +43,6 @@ struct file;
43struct vfsmount; 43struct vfsmount;
44struct path; 44struct path;
45struct qstr; 45struct qstr;
46struct nameidata;
47struct iattr; 46struct iattr;
48struct fown_struct; 47struct fown_struct;
49struct file_operations; 48struct file_operations;
@@ -477,7 +476,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
477 * @inode_follow_link: 476 * @inode_follow_link:
478 * Check permission to follow a symbolic link when looking up a pathname. 477 * Check permission to follow a symbolic link when looking up a pathname.
479 * @dentry contains the dentry structure for the link. 478 * @dentry contains the dentry structure for the link.
480 * @nd contains the nameidata structure for the parent directory. 479 * @inode contains the inode, which itself is not stable in RCU-walk
480 * @rcu indicates whether we are in RCU-walk mode.
481 * Return 0 if permission is granted. 481 * Return 0 if permission is granted.
482 * @inode_permission: 482 * @inode_permission:
483 * Check permission before accessing an inode. This hook is called by the 483 * Check permission before accessing an inode. This hook is called by the
@@ -1553,7 +1553,8 @@ struct security_operations {
1553 int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, 1553 int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry,
1554 struct inode *new_dir, struct dentry *new_dentry); 1554 struct inode *new_dir, struct dentry *new_dentry);
1555 int (*inode_readlink) (struct dentry *dentry); 1555 int (*inode_readlink) (struct dentry *dentry);
1556 int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); 1556 int (*inode_follow_link) (struct dentry *dentry, struct inode *inode,
1557 bool rcu);
1557 int (*inode_permission) (struct inode *inode, int mask); 1558 int (*inode_permission) (struct inode *inode, int mask);
1558 int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); 1559 int (*inode_setattr) (struct dentry *dentry, struct iattr *attr);
1559 int (*inode_getattr) (const struct path *path); 1560 int (*inode_getattr) (const struct path *path);
@@ -1839,7 +1840,8 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
1839 struct inode *new_dir, struct dentry *new_dentry, 1840 struct inode *new_dir, struct dentry *new_dentry,
1840 unsigned int flags); 1841 unsigned int flags);
1841int security_inode_readlink(struct dentry *dentry); 1842int security_inode_readlink(struct dentry *dentry);
1842int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); 1843int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
1844 bool rcu);
1843int security_inode_permission(struct inode *inode, int mask); 1845int security_inode_permission(struct inode *inode, int mask);
1844int security_inode_setattr(struct dentry *dentry, struct iattr *attr); 1846int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
1845int security_inode_getattr(const struct path *path); 1847int security_inode_getattr(const struct path *path);
@@ -2242,7 +2244,8 @@ static inline int security_inode_readlink(struct dentry *dentry)
2242} 2244}
2243 2245
2244static inline int security_inode_follow_link(struct dentry *dentry, 2246static inline int security_inode_follow_link(struct dentry *dentry,
2245 struct nameidata *nd) 2247 struct inode *inode,
2248 bool rcu)
2246{ 2249{
2247 return 0; 2250 return 0;
2248} 2251}
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 3e18379dfa6f..0063b24b4f36 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -120,7 +120,7 @@ do { \
120/* 120/*
121 * Despite its name it doesn't necessarily has to be a full barrier. 121 * Despite its name it doesn't necessarily has to be a full barrier.
122 * It should only guarantee that a STORE before the critical section 122 * It should only guarantee that a STORE before the critical section
123 * can not be reordered with a LOAD inside this section. 123 * can not be reordered with LOADs and STOREs inside this section.
124 * spin_lock() is the one-way barrier, this LOAD can not escape out 124 * spin_lock() is the one-way barrier, this LOAD can not escape out
125 * of the region. So the default implementation simply ensures that 125 * of the region. So the default implementation simply ensures that
126 * a STORE can not move into the critical section, smp_wmb() should 126 * a STORE can not move into the critical section, smp_wmb() should
diff --git a/include/sound/hda_regmap.h b/include/sound/hda_regmap.h
index 53a18b3635e2..df705908480a 100644
--- a/include/sound/hda_regmap.h
+++ b/include/sound/hda_regmap.h
@@ -9,6 +9,8 @@
9#include <sound/core.h> 9#include <sound/core.h>
10#include <sound/hdaudio.h> 10#include <sound/hdaudio.h>
11 11
12#define AC_AMP_FAKE_MUTE 0x10 /* fake mute bit set to amp verbs */
13
12int snd_hdac_regmap_init(struct hdac_device *codec); 14int snd_hdac_regmap_init(struct hdac_device *codec);
13void snd_hdac_regmap_exit(struct hdac_device *codec); 15void snd_hdac_regmap_exit(struct hdac_device *codec);
14int snd_hdac_regmap_add_vendor_verb(struct hdac_device *codec, 16int snd_hdac_regmap_add_vendor_verb(struct hdac_device *codec,
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 880dd7437172..c178d13d6f4c 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -250,7 +250,6 @@ DEFINE_EVENT(writeback_class, name, \
250DEFINE_WRITEBACK_EVENT(writeback_nowork); 250DEFINE_WRITEBACK_EVENT(writeback_nowork);
251DEFINE_WRITEBACK_EVENT(writeback_wake_background); 251DEFINE_WRITEBACK_EVENT(writeback_wake_background);
252DEFINE_WRITEBACK_EVENT(writeback_bdi_register); 252DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
253DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
254 253
255DECLARE_EVENT_CLASS(wbc_class, 254DECLARE_EVENT_CLASS(wbc_class,
256 TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), 255 TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 871e73f99a4d..94d44ab2fda1 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -1038,6 +1038,7 @@ struct drm_radeon_cs {
1038#define RADEON_INFO_CURRENT_GPU_SCLK 0x22 1038#define RADEON_INFO_CURRENT_GPU_SCLK 0x22
1039#define RADEON_INFO_CURRENT_GPU_MCLK 0x23 1039#define RADEON_INFO_CURRENT_GPU_MCLK 0x23
1040#define RADEON_INFO_READ_REG 0x24 1040#define RADEON_INFO_READ_REG 0x24
1041#define RADEON_INFO_VA_UNMAP_WORKING 0x25
1041 1042
1042struct drm_radeon_info { 1043struct drm_radeon_info {
1043 uint32_t request; 1044 uint32_t request;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 309211b3eb67..d97f84c080da 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -167,6 +167,7 @@ enum perf_branch_sample_type_shift {
167 PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ 167 PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */
168 168
169 PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ 169 PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */
170 PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */
170 171
171 PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ 172 PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
172}; 173};
@@ -186,6 +187,7 @@ enum perf_branch_sample_type {
186 PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, 187 PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
187 188
188 PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, 189 PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
190 PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
189 191
190 PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, 192 PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
191}; 193};
@@ -564,6 +566,10 @@ struct perf_event_mmap_page {
564#define PERF_RECORD_MISC_GUEST_USER (5 << 0) 566#define PERF_RECORD_MISC_GUEST_USER (5 << 0)
565 567
566/* 568/*
569 * Indicates that /proc/PID/maps parsing are truncated by time out.
570 */
571#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12)
572/*
567 * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on 573 * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
568 * different events so can reuse the same bit position. 574 * different events so can reuse the same bit position.
569 */ 575 */
@@ -800,6 +806,18 @@ enum perf_event_type {
800 */ 806 */
801 PERF_RECORD_ITRACE_START = 12, 807 PERF_RECORD_ITRACE_START = 12,
802 808
809 /*
810 * Records the dropped/lost sample number.
811 *
812 * struct {
813 * struct perf_event_header header;
814 *
815 * u64 lost;
816 * struct sample_id sample_id;
817 * };
818 */
819 PERF_RECORD_LOST_SAMPLES = 13,
820
803 PERF_RECORD_MAX, /* non-ABI */ 821 PERF_RECORD_MAX, /* non-ABI */
804}; 822};
805 823
diff --git a/init/Kconfig b/init/Kconfig
index dc24dec60232..b999fa381bf9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -465,13 +465,9 @@ endmenu # "CPU/Task time and stats accounting"
465 465
466menu "RCU Subsystem" 466menu "RCU Subsystem"
467 467
468choice
469 prompt "RCU Implementation"
470 default TREE_RCU
471
472config TREE_RCU 468config TREE_RCU
473 bool "Tree-based hierarchical RCU" 469 bool
474 depends on !PREEMPT && SMP 470 default y if !PREEMPT && SMP
475 help 471 help
476 This option selects the RCU implementation that is 472 This option selects the RCU implementation that is
477 designed for very large SMP system with hundreds or 473 designed for very large SMP system with hundreds or
@@ -479,8 +475,8 @@ config TREE_RCU
479 smaller systems. 475 smaller systems.
480 476
481config PREEMPT_RCU 477config PREEMPT_RCU
482 bool "Preemptible tree-based hierarchical RCU" 478 bool
483 depends on PREEMPT 479 default y if PREEMPT
484 help 480 help
485 This option selects the RCU implementation that is 481 This option selects the RCU implementation that is
486 designed for very large SMP systems with hundreds or 482 designed for very large SMP systems with hundreds or
@@ -491,15 +487,28 @@ config PREEMPT_RCU
491 Select this option if you are unsure. 487 Select this option if you are unsure.
492 488
493config TINY_RCU 489config TINY_RCU
494 bool "UP-only small-memory-footprint RCU" 490 bool
495 depends on !PREEMPT && !SMP 491 default y if !PREEMPT && !SMP
496 help 492 help
497 This option selects the RCU implementation that is 493 This option selects the RCU implementation that is
498 designed for UP systems from which real-time response 494 designed for UP systems from which real-time response
499 is not required. This option greatly reduces the 495 is not required. This option greatly reduces the
500 memory footprint of RCU. 496 memory footprint of RCU.
501 497
502endchoice 498config RCU_EXPERT
499 bool "Make expert-level adjustments to RCU configuration"
500 default n
501 help
502 This option needs to be enabled if you wish to make
503 expert-level adjustments to RCU configuration. By default,
504 no such adjustments can be made, which has the often-beneficial
505 side-effect of preventing "make oldconfig" from asking you all
506 sorts of detailed questions about how you would like numerous
507 obscure RCU options to be set up.
508
509 Say Y if you need to make expert-level adjustments to RCU.
510
511 Say N if you are unsure.
503 512
504config SRCU 513config SRCU
505 bool 514 bool
@@ -509,7 +518,7 @@ config SRCU
509 sections. 518 sections.
510 519
511config TASKS_RCU 520config TASKS_RCU
512 bool "Task_based RCU implementation using voluntary context switch" 521 bool
513 default n 522 default n
514 select SRCU 523 select SRCU
515 help 524 help
@@ -517,8 +526,6 @@ config TASKS_RCU
517 only voluntary context switch (not preemption!), idle, and 526 only voluntary context switch (not preemption!), idle, and
518 user-mode execution as quiescent states. 527 user-mode execution as quiescent states.
519 528
520 If unsure, say N.
521
522config RCU_STALL_COMMON 529config RCU_STALL_COMMON
523 def_bool ( TREE_RCU || PREEMPT_RCU || RCU_TRACE ) 530 def_bool ( TREE_RCU || PREEMPT_RCU || RCU_TRACE )
524 help 531 help
@@ -531,9 +538,7 @@ config CONTEXT_TRACKING
531 bool 538 bool
532 539
533config RCU_USER_QS 540config RCU_USER_QS
534 bool "Consider userspace as in RCU extended quiescent state" 541 bool
535 depends on HAVE_CONTEXT_TRACKING && SMP
536 select CONTEXT_TRACKING
537 help 542 help
538 This option sets hooks on kernel / userspace boundaries and 543 This option sets hooks on kernel / userspace boundaries and
539 puts RCU in extended quiescent state when the CPU runs in 544 puts RCU in extended quiescent state when the CPU runs in
@@ -541,12 +546,6 @@ config RCU_USER_QS
541 excluded from the global RCU state machine and thus doesn't 546 excluded from the global RCU state machine and thus doesn't
542 try to keep the timer tick on for RCU. 547 try to keep the timer tick on for RCU.
543 548
544 Unless you want to hack and help the development of the full
545 dynticks mode, you shouldn't enable this option. It also
546 adds unnecessary overhead.
547
548 If unsure say N
549
550config CONTEXT_TRACKING_FORCE 549config CONTEXT_TRACKING_FORCE
551 bool "Force context tracking" 550 bool "Force context tracking"
552 depends on CONTEXT_TRACKING 551 depends on CONTEXT_TRACKING
@@ -578,7 +577,7 @@ config RCU_FANOUT
578 int "Tree-based hierarchical RCU fanout value" 577 int "Tree-based hierarchical RCU fanout value"
579 range 2 64 if 64BIT 578 range 2 64 if 64BIT
580 range 2 32 if !64BIT 579 range 2 32 if !64BIT
581 depends on TREE_RCU || PREEMPT_RCU 580 depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
582 default 64 if 64BIT 581 default 64 if 64BIT
583 default 32 if !64BIT 582 default 32 if !64BIT
584 help 583 help
@@ -596,9 +595,9 @@ config RCU_FANOUT
596 595
597config RCU_FANOUT_LEAF 596config RCU_FANOUT_LEAF
598 int "Tree-based hierarchical RCU leaf-level fanout value" 597 int "Tree-based hierarchical RCU leaf-level fanout value"
599 range 2 RCU_FANOUT if 64BIT 598 range 2 64 if 64BIT
600 range 2 RCU_FANOUT if !64BIT 599 range 2 32 if !64BIT
601 depends on TREE_RCU || PREEMPT_RCU 600 depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
602 default 16 601 default 16
603 help 602 help
604 This option controls the leaf-level fanout of hierarchical 603 This option controls the leaf-level fanout of hierarchical
@@ -621,23 +620,9 @@ config RCU_FANOUT_LEAF
621 620
622 Take the default if unsure. 621 Take the default if unsure.
623 622
624config RCU_FANOUT_EXACT
625 bool "Disable tree-based hierarchical RCU auto-balancing"
626 depends on TREE_RCU || PREEMPT_RCU
627 default n
628 help
629 This option forces use of the exact RCU_FANOUT value specified,
630 regardless of imbalances in the hierarchy. This is useful for
631 testing RCU itself, and might one day be useful on systems with
632 strong NUMA behavior.
633
634 Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
635
636 Say N if unsure.
637
638config RCU_FAST_NO_HZ 623config RCU_FAST_NO_HZ
639 bool "Accelerate last non-dyntick-idle CPU's grace periods" 624 bool "Accelerate last non-dyntick-idle CPU's grace periods"
640 depends on NO_HZ_COMMON && SMP 625 depends on NO_HZ_COMMON && SMP && RCU_EXPERT
641 default n 626 default n
642 help 627 help
643 This option permits CPUs to enter dynticks-idle state even if 628 This option permits CPUs to enter dynticks-idle state even if
@@ -663,7 +648,7 @@ config TREE_RCU_TRACE
663 648
664config RCU_BOOST 649config RCU_BOOST
665 bool "Enable RCU priority boosting" 650 bool "Enable RCU priority boosting"
666 depends on RT_MUTEXES && PREEMPT_RCU 651 depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
667 default n 652 default n
668 help 653 help
669 This option boosts the priority of preempted RCU readers that 654 This option boosts the priority of preempted RCU readers that
@@ -680,6 +665,7 @@ config RCU_KTHREAD_PRIO
680 range 0 99 if !RCU_BOOST 665 range 0 99 if !RCU_BOOST
681 default 1 if RCU_BOOST 666 default 1 if RCU_BOOST
682 default 0 if !RCU_BOOST 667 default 0 if !RCU_BOOST
668 depends on RCU_EXPERT
683 help 669 help
684 This option specifies the SCHED_FIFO priority value that will be 670 This option specifies the SCHED_FIFO priority value that will be
685 assigned to the rcuc/n and rcub/n threads and is also the value 671 assigned to the rcuc/n and rcub/n threads and is also the value
@@ -1637,7 +1623,7 @@ config PERF_EVENTS
1637config DEBUG_PERF_USE_VMALLOC 1623config DEBUG_PERF_USE_VMALLOC
1638 default n 1624 default n
1639 bool "Debug: use vmalloc to back perf mmap() buffers" 1625 bool "Debug: use vmalloc to back perf mmap() buffers"
1640 depends on PERF_EVENTS && DEBUG_KERNEL 1626 depends on PERF_EVENTS && DEBUG_KERNEL && !PPC
1641 select PERF_USE_VMALLOC 1627 select PERF_USE_VMALLOC
1642 help 1628 help
1643 Use vmalloc memory to back perf mmap() buffers. 1629 Use vmalloc memory to back perf mmap() buffers.
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 08561f1acd13..ebdb0043203a 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -235,9 +235,16 @@ config LOCK_SPIN_ON_OWNER
235 def_bool y 235 def_bool y
236 depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER 236 depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER
237 237
238config ARCH_USE_QUEUE_RWLOCK 238config ARCH_USE_QUEUED_SPINLOCKS
239 bool 239 bool
240 240
241config QUEUE_RWLOCK 241config QUEUED_SPINLOCKS
242 def_bool y if ARCH_USE_QUEUE_RWLOCK 242 def_bool y if ARCH_USE_QUEUED_SPINLOCKS
243 depends on SMP
244
245config ARCH_USE_QUEUED_RWLOCKS
246 bool
247
248config QUEUED_RWLOCKS
249 def_bool y if ARCH_USE_QUEUED_RWLOCKS
243 depends on SMP 250 depends on SMP
diff --git a/kernel/compat.c b/kernel/compat.c
index 24f00610c575..333d364be29d 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -912,7 +912,8 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
912 * bitmap. We must however ensure the end of the 912 * bitmap. We must however ensure the end of the
913 * kernel bitmap is zeroed. 913 * kernel bitmap is zeroed.
914 */ 914 */
915 if (nr_compat_longs-- > 0) { 915 if (nr_compat_longs) {
916 nr_compat_longs--;
916 if (__get_user(um, umask)) 917 if (__get_user(um, umask))
917 return -EFAULT; 918 return -EFAULT;
918 } else { 919 } else {
@@ -954,7 +955,8 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
954 * We dont want to write past the end of the userspace 955 * We dont want to write past the end of the userspace
955 * bitmap. 956 * bitmap.
956 */ 957 */
957 if (nr_compat_longs-- > 0) { 958 if (nr_compat_longs) {
959 nr_compat_longs--;
958 if (__put_user(um, umask)) 960 if (__put_user(um, umask))
959 return -EFAULT; 961 return -EFAULT;
960 } 962 }
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 94bbe4695232..9c9c9fab16cc 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -398,7 +398,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
398 err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); 398 err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
399 if (err) { 399 if (err) {
400 /* CPU didn't die: tell everyone. Can't complain. */ 400 /* CPU didn't die: tell everyone. Can't complain. */
401 smpboot_unpark_threads(cpu);
402 cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); 401 cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
403 goto out_release; 402 goto out_release;
404 } 403 }
@@ -463,6 +462,7 @@ static int smpboot_thread_call(struct notifier_block *nfb,
463 462
464 switch (action & ~CPU_TASKS_FROZEN) { 463 switch (action & ~CPU_TASKS_FROZEN) {
465 464
465 case CPU_DOWN_FAILED:
466 case CPU_ONLINE: 466 case CPU_ONLINE:
467 smpboot_unpark_threads(cpu); 467 smpboot_unpark_threads(cpu);
468 break; 468 break;
@@ -479,7 +479,7 @@ static struct notifier_block smpboot_thread_notifier = {
479 .priority = CPU_PRI_SMPBOOT, 479 .priority = CPU_PRI_SMPBOOT,
480}; 480};
481 481
482void __cpuinit smpboot_thread_init(void) 482void smpboot_thread_init(void)
483{ 483{
484 register_cpu_notifier(&smpboot_thread_notifier); 484 register_cpu_notifier(&smpboot_thread_notifier);
485} 485}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1a3bf48743ce..f2003b97ddc9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3442,7 +3442,6 @@ static void free_event_rcu(struct rcu_head *head)
3442 if (event->ns) 3442 if (event->ns)
3443 put_pid_ns(event->ns); 3443 put_pid_ns(event->ns);
3444 perf_event_free_filter(event); 3444 perf_event_free_filter(event);
3445 perf_event_free_bpf_prog(event);
3446 kfree(event); 3445 kfree(event);
3447} 3446}
3448 3447
@@ -3573,6 +3572,8 @@ static void __free_event(struct perf_event *event)
3573 put_callchain_buffers(); 3572 put_callchain_buffers();
3574 } 3573 }
3575 3574
3575 perf_event_free_bpf_prog(event);
3576
3576 if (event->destroy) 3577 if (event->destroy)
3577 event->destroy(event); 3578 event->destroy(event);
3578 3579
@@ -4330,20 +4331,20 @@ static void ring_buffer_attach(struct perf_event *event,
4330 WARN_ON_ONCE(event->rcu_pending); 4331 WARN_ON_ONCE(event->rcu_pending);
4331 4332
4332 old_rb = event->rb; 4333 old_rb = event->rb;
4333 event->rcu_batches = get_state_synchronize_rcu();
4334 event->rcu_pending = 1;
4335
4336 spin_lock_irqsave(&old_rb->event_lock, flags); 4334 spin_lock_irqsave(&old_rb->event_lock, flags);
4337 list_del_rcu(&event->rb_entry); 4335 list_del_rcu(&event->rb_entry);
4338 spin_unlock_irqrestore(&old_rb->event_lock, flags); 4336 spin_unlock_irqrestore(&old_rb->event_lock, flags);
4339 }
4340 4337
4341 if (event->rcu_pending && rb) { 4338 event->rcu_batches = get_state_synchronize_rcu();
4342 cond_synchronize_rcu(event->rcu_batches); 4339 event->rcu_pending = 1;
4343 event->rcu_pending = 0;
4344 } 4340 }
4345 4341
4346 if (rb) { 4342 if (rb) {
4343 if (event->rcu_pending) {
4344 cond_synchronize_rcu(event->rcu_batches);
4345 event->rcu_pending = 0;
4346 }
4347
4347 spin_lock_irqsave(&rb->event_lock, flags); 4348 spin_lock_irqsave(&rb->event_lock, flags);
4348 list_add_rcu(&event->rb_entry, &rb->event_list); 4349 list_add_rcu(&event->rb_entry, &rb->event_list);
4349 spin_unlock_irqrestore(&rb->event_lock, flags); 4350 spin_unlock_irqrestore(&rb->event_lock, flags);
@@ -5380,9 +5381,9 @@ void perf_prepare_sample(struct perf_event_header *header,
5380 } 5381 }
5381} 5382}
5382 5383
5383static void perf_event_output(struct perf_event *event, 5384void perf_event_output(struct perf_event *event,
5384 struct perf_sample_data *data, 5385 struct perf_sample_data *data,
5385 struct pt_regs *regs) 5386 struct pt_regs *regs)
5386{ 5387{
5387 struct perf_output_handle handle; 5388 struct perf_output_handle handle;
5388 struct perf_event_header header; 5389 struct perf_event_header header;
@@ -5974,6 +5975,39 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head,
5974} 5975}
5975 5976
5976/* 5977/*
5978 * Lost/dropped samples logging
5979 */
5980void perf_log_lost_samples(struct perf_event *event, u64 lost)
5981{
5982 struct perf_output_handle handle;
5983 struct perf_sample_data sample;
5984 int ret;
5985
5986 struct {
5987 struct perf_event_header header;
5988 u64 lost;
5989 } lost_samples_event = {
5990 .header = {
5991 .type = PERF_RECORD_LOST_SAMPLES,
5992 .misc = 0,
5993 .size = sizeof(lost_samples_event),
5994 },
5995 .lost = lost,
5996 };
5997
5998 perf_event_header__init_id(&lost_samples_event.header, &sample, event);
5999
6000 ret = perf_output_begin(&handle, event,
6001 lost_samples_event.header.size);
6002 if (ret)
6003 return;
6004
6005 perf_output_put(&handle, lost_samples_event);
6006 perf_event__output_id_sample(event, &handle, &sample);
6007 perf_output_end(&handle);
6008}
6009
6010/*
5977 * IRQ throttle logging 6011 * IRQ throttle logging
5978 */ 6012 */
5979 6013
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 9f6ce9ba4a04..2deb24c7a40d 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -72,15 +72,6 @@ static inline bool rb_has_aux(struct ring_buffer *rb)
72void perf_event_aux_event(struct perf_event *event, unsigned long head, 72void perf_event_aux_event(struct perf_event *event, unsigned long head,
73 unsigned long size, u64 flags); 73 unsigned long size, u64 flags);
74 74
75extern void
76perf_event_header__init_id(struct perf_event_header *header,
77 struct perf_sample_data *data,
78 struct perf_event *event);
79extern void
80perf_event__output_id_sample(struct perf_event *event,
81 struct perf_output_handle *handle,
82 struct perf_sample_data *sample);
83
84extern struct page * 75extern struct page *
85perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff); 76perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff);
86 77
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 232f00f273cb..96472824a752 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -141,7 +141,7 @@ int perf_output_begin(struct perf_output_handle *handle,
141 perf_output_get_handle(handle); 141 perf_output_get_handle(handle);
142 142
143 do { 143 do {
144 tail = ACCESS_ONCE(rb->user_page->data_tail); 144 tail = READ_ONCE_CTRL(rb->user_page->data_tail);
145 offset = head = local_read(&rb->head); 145 offset = head = local_read(&rb->head);
146 if (!rb->overwrite && 146 if (!rb->overwrite &&
147 unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size)) 147 unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
@@ -493,6 +493,20 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
493 rb->aux_pages[rb->aux_nr_pages] = page_address(page++); 493 rb->aux_pages[rb->aux_nr_pages] = page_address(page++);
494 } 494 }
495 495
496 /*
497 * In overwrite mode, PMUs that don't support SG may not handle more
498 * than one contiguous allocation, since they rely on PMI to do double
499 * buffering. In this case, the entire buffer has to be one contiguous
500 * chunk.
501 */
502 if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) &&
503 overwrite) {
504 struct page *page = virt_to_page(rb->aux_pages[0]);
505
506 if (page_private(page) != max_order)
507 goto out;
508 }
509
496 rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, 510 rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages,
497 overwrite); 511 overwrite);
498 if (!rb->aux_priv) 512 if (!rb->aux_priv)
diff --git a/kernel/futex.c b/kernel/futex.c
index f9984c363e9a..aacc706f85fc 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2056,7 +2056,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
2056{ 2056{
2057 /* 2057 /*
2058 * The task state is guaranteed to be set before another task can 2058 * The task state is guaranteed to be set before another task can
2059 * wake it. set_current_state() is implemented using set_mb() and 2059 * wake it. set_current_state() is implemented using smp_store_mb() and
2060 * queue_me() calls spin_unlock() upon completion, both serializing 2060 * queue_me() calls spin_unlock() upon completion, both serializing
2061 * access to the hash list and forcing another memory barrier. 2061 * access to the hash list and forcing another memory barrier.
2062 */ 2062 */
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index de7a416cca2a..7dd5c9918e4c 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_SMP) += spinlock.o
17obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o 17obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
18obj-$(CONFIG_SMP) += lglock.o 18obj-$(CONFIG_SMP) += lglock.o
19obj-$(CONFIG_PROVE_LOCKING) += spinlock.o 19obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
20obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o
20obj-$(CONFIG_RT_MUTEXES) += rtmutex.o 21obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
21obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o 22obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
22obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o 23obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
@@ -25,5 +26,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
25obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o 26obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
26obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o 27obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
27obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o 28obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
28obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o 29obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
29obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o 30obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index a0831e1b99f4..456614136f1a 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3900,7 +3900,8 @@ static void zap_class(struct lock_class *class)
3900 list_del_rcu(&class->hash_entry); 3900 list_del_rcu(&class->hash_entry);
3901 list_del_rcu(&class->lock_entry); 3901 list_del_rcu(&class->lock_entry);
3902 3902
3903 class->key = NULL; 3903 RCU_INIT_POINTER(class->key, NULL);
3904 RCU_INIT_POINTER(class->name, NULL);
3904} 3905}
3905 3906
3906static inline int within(const void *addr, void *start, unsigned long size) 3907static inline int within(const void *addr, void *start, unsigned long size)
@@ -4066,8 +4067,7 @@ void __init lockdep_info(void)
4066 4067
4067#ifdef CONFIG_DEBUG_LOCKDEP 4068#ifdef CONFIG_DEBUG_LOCKDEP
4068 if (lockdep_init_error) { 4069 if (lockdep_init_error) {
4069 printk("WARNING: lockdep init error! lock-%s was acquired" 4070 printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error);
4070 "before lockdep_init\n", lock_init_error);
4071 printk("Call stack leading to lockdep invocation was:\n"); 4071 printk("Call stack leading to lockdep invocation was:\n");
4072 print_stack_trace(&lockdep_init_trace, 0); 4072 print_stack_trace(&lockdep_init_trace, 0);
4073 } 4073 }
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index ef43ac4bafb5..d83d798bef95 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -426,10 +426,12 @@ static void seq_lock_time(struct seq_file *m, struct lock_time *lt)
426 426
427static void seq_stats(struct seq_file *m, struct lock_stat_data *data) 427static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
428{ 428{
429 char name[39]; 429 struct lockdep_subclass_key *ckey;
430 struct lock_class *class;
431 struct lock_class_stats *stats; 430 struct lock_class_stats *stats;
431 struct lock_class *class;
432 const char *cname;
432 int i, namelen; 433 int i, namelen;
434 char name[39];
433 435
434 class = data->class; 436 class = data->class;
435 stats = &data->stats; 437 stats = &data->stats;
@@ -440,15 +442,25 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
440 if (class->subclass) 442 if (class->subclass)
441 namelen -= 2; 443 namelen -= 2;
442 444
443 if (!class->name) { 445 rcu_read_lock_sched();
446 cname = rcu_dereference_sched(class->name);
447 ckey = rcu_dereference_sched(class->key);
448
449 if (!cname && !ckey) {
450 rcu_read_unlock_sched();
451 return;
452
453 } else if (!cname) {
444 char str[KSYM_NAME_LEN]; 454 char str[KSYM_NAME_LEN];
445 const char *key_name; 455 const char *key_name;
446 456
447 key_name = __get_key_name(class->key, str); 457 key_name = __get_key_name(ckey, str);
448 snprintf(name, namelen, "%s", key_name); 458 snprintf(name, namelen, "%s", key_name);
449 } else { 459 } else {
450 snprintf(name, namelen, "%s", class->name); 460 snprintf(name, namelen, "%s", cname);
451 } 461 }
462 rcu_read_unlock_sched();
463
452 namelen = strlen(name); 464 namelen = strlen(name);
453 if (class->name_version > 1) { 465 if (class->name_version > 1) {
454 snprintf(name+namelen, 3, "#%d", class->name_version); 466 snprintf(name+namelen, 3, "#%d", class->name_version);
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index ec8cce259779..32244186f1f2 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -122,12 +122,12 @@ static int torture_lock_busted_write_lock(void)
122 122
123static void torture_lock_busted_write_delay(struct torture_random_state *trsp) 123static void torture_lock_busted_write_delay(struct torture_random_state *trsp)
124{ 124{
125 const unsigned long longdelay_us = 100; 125 const unsigned long longdelay_ms = 100;
126 126
127 /* We want a long delay occasionally to force massive contention. */ 127 /* We want a long delay occasionally to force massive contention. */
128 if (!(torture_random(trsp) % 128 if (!(torture_random(trsp) %
129 (cxt.nrealwriters_stress * 2000 * longdelay_us))) 129 (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
130 mdelay(longdelay_us); 130 mdelay(longdelay_ms);
131#ifdef CONFIG_PREEMPT 131#ifdef CONFIG_PREEMPT
132 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) 132 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
133 preempt_schedule(); /* Allow test to be preempted. */ 133 preempt_schedule(); /* Allow test to be preempted. */
@@ -160,14 +160,14 @@ static int torture_spin_lock_write_lock(void) __acquires(torture_spinlock)
160static void torture_spin_lock_write_delay(struct torture_random_state *trsp) 160static void torture_spin_lock_write_delay(struct torture_random_state *trsp)
161{ 161{
162 const unsigned long shortdelay_us = 2; 162 const unsigned long shortdelay_us = 2;
163 const unsigned long longdelay_us = 100; 163 const unsigned long longdelay_ms = 100;
164 164
165 /* We want a short delay mostly to emulate likely code, and 165 /* We want a short delay mostly to emulate likely code, and
166 * we want a long delay occasionally to force massive contention. 166 * we want a long delay occasionally to force massive contention.
167 */ 167 */
168 if (!(torture_random(trsp) % 168 if (!(torture_random(trsp) %
169 (cxt.nrealwriters_stress * 2000 * longdelay_us))) 169 (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
170 mdelay(longdelay_us); 170 mdelay(longdelay_ms);
171 if (!(torture_random(trsp) % 171 if (!(torture_random(trsp) %
172 (cxt.nrealwriters_stress * 2 * shortdelay_us))) 172 (cxt.nrealwriters_stress * 2 * shortdelay_us)))
173 udelay(shortdelay_us); 173 udelay(shortdelay_us);
@@ -309,7 +309,7 @@ static int torture_rwlock_read_lock_irq(void) __acquires(torture_rwlock)
309static void torture_rwlock_read_unlock_irq(void) 309static void torture_rwlock_read_unlock_irq(void)
310__releases(torture_rwlock) 310__releases(torture_rwlock)
311{ 311{
312 write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags); 312 read_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags);
313} 313}
314 314
315static struct lock_torture_ops rw_lock_irq_ops = { 315static struct lock_torture_ops rw_lock_irq_ops = {
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 75e114bdf3f2..fd91aaa4554c 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -17,6 +17,7 @@
17struct mcs_spinlock { 17struct mcs_spinlock {
18 struct mcs_spinlock *next; 18 struct mcs_spinlock *next;
19 int locked; /* 1 if lock acquired */ 19 int locked; /* 1 if lock acquired */
20 int count; /* nesting count, see qspinlock.c */
20}; 21};
21 22
22#ifndef arch_mcs_spin_lock_contended 23#ifndef arch_mcs_spin_lock_contended
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index f956ede7f90d..6c5da483966b 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Queue read/write lock 2 * Queued read/write locks
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
@@ -22,6 +22,26 @@
22#include <linux/hardirq.h> 22#include <linux/hardirq.h>
23#include <asm/qrwlock.h> 23#include <asm/qrwlock.h>
24 24
25/*
26 * This internal data structure is used for optimizing access to some of
27 * the subfields within the atomic_t cnts.
28 */
29struct __qrwlock {
30 union {
31 atomic_t cnts;
32 struct {
33#ifdef __LITTLE_ENDIAN
34 u8 wmode; /* Writer mode */
35 u8 rcnts[3]; /* Reader counts */
36#else
37 u8 rcnts[3]; /* Reader counts */
38 u8 wmode; /* Writer mode */
39#endif
40 };
41 };
42 arch_spinlock_t lock;
43};
44
25/** 45/**
26 * rspin_until_writer_unlock - inc reader count & spin until writer is gone 46 * rspin_until_writer_unlock - inc reader count & spin until writer is gone
27 * @lock : Pointer to queue rwlock structure 47 * @lock : Pointer to queue rwlock structure
@@ -107,10 +127,10 @@ void queue_write_lock_slowpath(struct qrwlock *lock)
107 * or wait for a previous writer to go away. 127 * or wait for a previous writer to go away.
108 */ 128 */
109 for (;;) { 129 for (;;) {
110 cnts = atomic_read(&lock->cnts); 130 struct __qrwlock *l = (struct __qrwlock *)lock;
111 if (!(cnts & _QW_WMASK) && 131
112 (atomic_cmpxchg(&lock->cnts, cnts, 132 if (!READ_ONCE(l->wmode) &&
113 cnts | _QW_WAITING) == cnts)) 133 (cmpxchg(&l->wmode, 0, _QW_WAITING) == 0))
114 break; 134 break;
115 135
116 cpu_relax_lowlatency(); 136 cpu_relax_lowlatency();
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
new file mode 100644
index 000000000000..38c49202d532
--- /dev/null
+++ b/kernel/locking/qspinlock.c
@@ -0,0 +1,473 @@
1/*
2 * Queued spinlock
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
15 * (C) Copyright 2013-2014 Red Hat, Inc.
16 * (C) Copyright 2015 Intel Corp.
17 *
18 * Authors: Waiman Long <waiman.long@hp.com>
19 * Peter Zijlstra <peterz@infradead.org>
20 */
21
22#ifndef _GEN_PV_LOCK_SLOWPATH
23
24#include <linux/smp.h>
25#include <linux/bug.h>
26#include <linux/cpumask.h>
27#include <linux/percpu.h>
28#include <linux/hardirq.h>
29#include <linux/mutex.h>
30#include <asm/byteorder.h>
31#include <asm/qspinlock.h>
32
33/*
34 * The basic principle of a queue-based spinlock can best be understood
35 * by studying a classic queue-based spinlock implementation called the
36 * MCS lock. The paper below provides a good description for this kind
37 * of lock.
38 *
39 * http://www.cise.ufl.edu/tr/DOC/REP-1992-71.pdf
40 *
41 * This queued spinlock implementation is based on the MCS lock, however to make
42 * it fit the 4 bytes we assume spinlock_t to be, and preserve its existing
43 * API, we must modify it somehow.
44 *
45 * In particular; where the traditional MCS lock consists of a tail pointer
46 * (8 bytes) and needs the next pointer (another 8 bytes) of its own node to
47 * unlock the next pending (next->locked), we compress both these: {tail,
48 * next->locked} into a single u32 value.
49 *
50 * Since a spinlock disables recursion of its own context and there is a limit
51 * to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there
52 * are at most 4 nesting levels, it can be encoded by a 2-bit number. Now
53 * we can encode the tail by combining the 2-bit nesting level with the cpu
54 * number. With one byte for the lock value and 3 bytes for the tail, only a
55 * 32-bit word is now needed. Even though we only need 1 bit for the lock,
56 * we extend it to a full byte to achieve better performance for architectures
57 * that support atomic byte write.
58 *
59 * We also change the first spinner to spin on the lock bit instead of its
60 * node; whereby avoiding the need to carry a node from lock to unlock, and
61 * preserving existing lock API. This also makes the unlock code simpler and
62 * faster.
63 *
64 * N.B. The current implementation only supports architectures that allow
65 * atomic operations on smaller 8-bit and 16-bit data types.
66 *
67 */
68
69#include "mcs_spinlock.h"
70
71#ifdef CONFIG_PARAVIRT_SPINLOCKS
72#define MAX_NODES 8
73#else
74#define MAX_NODES 4
75#endif
76
77/*
78 * Per-CPU queue node structures; we can never have more than 4 nested
79 * contexts: task, softirq, hardirq, nmi.
80 *
81 * Exactly fits one 64-byte cacheline on a 64-bit architecture.
82 *
83 * PV doubles the storage and uses the second cacheline for PV state.
84 */
85static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
86
87/*
88 * We must be able to distinguish between no-tail and the tail at 0:0,
89 * therefore increment the cpu number by one.
90 */
91
92static inline u32 encode_tail(int cpu, int idx)
93{
94 u32 tail;
95
96#ifdef CONFIG_DEBUG_SPINLOCK
97 BUG_ON(idx > 3);
98#endif
99 tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
100 tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
101
102 return tail;
103}
104
105static inline struct mcs_spinlock *decode_tail(u32 tail)
106{
107 int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
108 int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
109
110 return per_cpu_ptr(&mcs_nodes[idx], cpu);
111}
112
113#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
114
115/*
116 * By using the whole 2nd least significant byte for the pending bit, we
117 * can allow better optimization of the lock acquisition for the pending
118 * bit holder.
119 *
120 * This internal structure is also used by the set_locked function which
121 * is not restricted to _Q_PENDING_BITS == 8.
122 */
123struct __qspinlock {
124 union {
125 atomic_t val;
126#ifdef __LITTLE_ENDIAN
127 struct {
128 u8 locked;
129 u8 pending;
130 };
131 struct {
132 u16 locked_pending;
133 u16 tail;
134 };
135#else
136 struct {
137 u16 tail;
138 u16 locked_pending;
139 };
140 struct {
141 u8 reserved[2];
142 u8 pending;
143 u8 locked;
144 };
145#endif
146 };
147};
148
149#if _Q_PENDING_BITS == 8
150/**
151 * clear_pending_set_locked - take ownership and clear the pending bit.
152 * @lock: Pointer to queued spinlock structure
153 *
154 * *,1,0 -> *,0,1
155 *
156 * Lock stealing is not allowed if this function is used.
157 */
158static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
159{
160 struct __qspinlock *l = (void *)lock;
161
162 WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
163}
164
165/*
166 * xchg_tail - Put in the new queue tail code word & retrieve previous one
167 * @lock : Pointer to queued spinlock structure
168 * @tail : The new queue tail code word
169 * Return: The previous queue tail code word
170 *
171 * xchg(lock, tail)
172 *
173 * p,*,* -> n,*,* ; prev = xchg(lock, node)
174 */
175static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
176{
177 struct __qspinlock *l = (void *)lock;
178
179 return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
180}
181
182#else /* _Q_PENDING_BITS == 8 */
183
184/**
185 * clear_pending_set_locked - take ownership and clear the pending bit.
186 * @lock: Pointer to queued spinlock structure
187 *
188 * *,1,0 -> *,0,1
189 */
190static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
191{
192 atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
193}
194
195/**
196 * xchg_tail - Put in the new queue tail code word & retrieve previous one
197 * @lock : Pointer to queued spinlock structure
198 * @tail : The new queue tail code word
199 * Return: The previous queue tail code word
200 *
201 * xchg(lock, tail)
202 *
203 * p,*,* -> n,*,* ; prev = xchg(lock, node)
204 */
205static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
206{
207 u32 old, new, val = atomic_read(&lock->val);
208
209 for (;;) {
210 new = (val & _Q_LOCKED_PENDING_MASK) | tail;
211 old = atomic_cmpxchg(&lock->val, val, new);
212 if (old == val)
213 break;
214
215 val = old;
216 }
217 return old;
218}
219#endif /* _Q_PENDING_BITS == 8 */
220
221/**
222 * set_locked - Set the lock bit and own the lock
223 * @lock: Pointer to queued spinlock structure
224 *
225 * *,*,0 -> *,0,1
226 */
227static __always_inline void set_locked(struct qspinlock *lock)
228{
229 struct __qspinlock *l = (void *)lock;
230
231 WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
232}
233
234
235/*
236 * Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for
237 * all the PV callbacks.
238 */
239
240static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
241static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
242static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { }
243
244static __always_inline void __pv_wait_head(struct qspinlock *lock,
245 struct mcs_spinlock *node) { }
246
247#define pv_enabled() false
248
249#define pv_init_node __pv_init_node
250#define pv_wait_node __pv_wait_node
251#define pv_kick_node __pv_kick_node
252#define pv_wait_head __pv_wait_head
253
254#ifdef CONFIG_PARAVIRT_SPINLOCKS
255#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
256#endif
257
258#endif /* _GEN_PV_LOCK_SLOWPATH */
259
260/**
261 * queued_spin_lock_slowpath - acquire the queued spinlock
262 * @lock: Pointer to queued spinlock structure
263 * @val: Current value of the queued spinlock 32-bit word
264 *
265 * (queue tail, pending bit, lock value)
266 *
267 * fast : slow : unlock
268 * : :
269 * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0)
270 * : | ^--------.------. / :
271 * : v \ \ | :
272 * pending : (0,1,1) +--> (0,1,0) \ | :
273 * : | ^--' | | :
274 * : v | | :
275 * uncontended : (n,x,y) +--> (n,0,0) --' | :
276 * queue : | ^--' | :
277 * : v | :
278 * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' :
279 * queue : ^--' :
280 */
281void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
282{
283 struct mcs_spinlock *prev, *next, *node;
284 u32 new, old, tail;
285 int idx;
286
287 BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
288
289 if (pv_enabled())
290 goto queue;
291
292 if (virt_queued_spin_lock(lock))
293 return;
294
295 /*
296 * wait for in-progress pending->locked hand-overs
297 *
298 * 0,1,0 -> 0,0,1
299 */
300 if (val == _Q_PENDING_VAL) {
301 while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
302 cpu_relax();
303 }
304
305 /*
306 * trylock || pending
307 *
308 * 0,0,0 -> 0,0,1 ; trylock
309 * 0,0,1 -> 0,1,1 ; pending
310 */
311 for (;;) {
312 /*
313 * If we observe any contention; queue.
314 */
315 if (val & ~_Q_LOCKED_MASK)
316 goto queue;
317
318 new = _Q_LOCKED_VAL;
319 if (val == new)
320 new |= _Q_PENDING_VAL;
321
322 old = atomic_cmpxchg(&lock->val, val, new);
323 if (old == val)
324 break;
325
326 val = old;
327 }
328
329 /*
330 * we won the trylock
331 */
332 if (new == _Q_LOCKED_VAL)
333 return;
334
335 /*
336 * we're pending, wait for the owner to go away.
337 *
338 * *,1,1 -> *,1,0
339 *
340 * this wait loop must be a load-acquire such that we match the
341 * store-release that clears the locked bit and create lock
342 * sequentiality; this is because not all clear_pending_set_locked()
343 * implementations imply full barriers.
344 */
345 while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK)
346 cpu_relax();
347
348 /*
349 * take ownership and clear the pending bit.
350 *
351 * *,1,0 -> *,0,1
352 */
353 clear_pending_set_locked(lock);
354 return;
355
356 /*
357 * End of pending bit optimistic spinning and beginning of MCS
358 * queuing.
359 */
360queue:
361 node = this_cpu_ptr(&mcs_nodes[0]);
362 idx = node->count++;
363 tail = encode_tail(smp_processor_id(), idx);
364
365 node += idx;
366 node->locked = 0;
367 node->next = NULL;
368 pv_init_node(node);
369
370 /*
371 * We touched a (possibly) cold cacheline in the per-cpu queue node;
372 * attempt the trylock once more in the hope someone let go while we
373 * weren't watching.
374 */
375 if (queued_spin_trylock(lock))
376 goto release;
377
378 /*
379 * We have already touched the queueing cacheline; don't bother with
380 * pending stuff.
381 *
382 * p,*,* -> n,*,*
383 */
384 old = xchg_tail(lock, tail);
385
386 /*
387 * if there was a previous node; link it and wait until reaching the
388 * head of the waitqueue.
389 */
390 if (old & _Q_TAIL_MASK) {
391 prev = decode_tail(old);
392 WRITE_ONCE(prev->next, node);
393
394 pv_wait_node(node);
395 arch_mcs_spin_lock_contended(&node->locked);
396 }
397
398 /*
399 * we're at the head of the waitqueue, wait for the owner & pending to
400 * go away.
401 *
402 * *,x,y -> *,0,0
403 *
404 * this wait loop must use a load-acquire such that we match the
405 * store-release that clears the locked bit and create lock
406 * sequentiality; this is because the set_locked() function below
407 * does not imply a full barrier.
408 *
409 */
410 pv_wait_head(lock, node);
411 while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
412 cpu_relax();
413
414 /*
415 * claim the lock:
416 *
417 * n,0,0 -> 0,0,1 : lock, uncontended
418 * *,0,0 -> *,0,1 : lock, contended
419 *
420 * If the queue head is the only one in the queue (lock value == tail),
421 * clear the tail code and grab the lock. Otherwise, we only need
422 * to grab the lock.
423 */
424 for (;;) {
425 if (val != tail) {
426 set_locked(lock);
427 break;
428 }
429 old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
430 if (old == val)
431 goto release; /* No contention */
432
433 val = old;
434 }
435
436 /*
437 * contended path; wait for next, release.
438 */
439 while (!(next = READ_ONCE(node->next)))
440 cpu_relax();
441
442 arch_mcs_spin_unlock_contended(&next->locked);
443 pv_kick_node(next);
444
445release:
446 /*
447 * release the node
448 */
449 this_cpu_dec(mcs_nodes[0].count);
450}
451EXPORT_SYMBOL(queued_spin_lock_slowpath);
452
453/*
454 * Generate the paravirt code for queued_spin_unlock_slowpath().
455 */
456#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS)
457#define _GEN_PV_LOCK_SLOWPATH
458
459#undef pv_enabled
460#define pv_enabled() true
461
462#undef pv_init_node
463#undef pv_wait_node
464#undef pv_kick_node
465#undef pv_wait_head
466
467#undef queued_spin_lock_slowpath
468#define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath
469
470#include "qspinlock_paravirt.h"
471#include "qspinlock.c"
472
473#endif
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
new file mode 100644
index 000000000000..04ab18151cc8
--- /dev/null
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -0,0 +1,325 @@
1#ifndef _GEN_PV_LOCK_SLOWPATH
2#error "do not include this file"
3#endif
4
5#include <linux/hash.h>
6#include <linux/bootmem.h>
7
8/*
9 * Implement paravirt qspinlocks; the general idea is to halt the vcpus instead
10 * of spinning them.
11 *
12 * This relies on the architecture to provide two paravirt hypercalls:
13 *
14 * pv_wait(u8 *ptr, u8 val) -- suspends the vcpu if *ptr == val
15 * pv_kick(cpu) -- wakes a suspended vcpu
16 *
17 * Using these we implement __pv_queued_spin_lock_slowpath() and
18 * __pv_queued_spin_unlock() to replace native_queued_spin_lock_slowpath() and
19 * native_queued_spin_unlock().
20 */
21
22#define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET)
23
24enum vcpu_state {
25 vcpu_running = 0,
26 vcpu_halted,
27};
28
29struct pv_node {
30 struct mcs_spinlock mcs;
31 struct mcs_spinlock __res[3];
32
33 int cpu;
34 u8 state;
35};
36
37/*
38 * Lock and MCS node addresses hash table for fast lookup
39 *
40 * Hashing is done on a per-cacheline basis to minimize the need to access
41 * more than one cacheline.
42 *
43 * Dynamically allocate a hash table big enough to hold at least 4X the
44 * number of possible cpus in the system. Allocation is done on page
45 * granularity. So the minimum number of hash buckets should be at least
46 * 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page.
47 *
48 * Since we should not be holding locks from NMI context (very rare indeed) the
49 * max load factor is 0.75, which is around the point where open addressing
50 * breaks down.
51 *
52 */
53struct pv_hash_entry {
54 struct qspinlock *lock;
55 struct pv_node *node;
56};
57
58#define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry))
59#define PV_HE_MIN (PAGE_SIZE / sizeof(struct pv_hash_entry))
60
61static struct pv_hash_entry *pv_lock_hash;
62static unsigned int pv_lock_hash_bits __read_mostly;
63
64/*
65 * Allocate memory for the PV qspinlock hash buckets
66 *
67 * This function should be called from the paravirt spinlock initialization
68 * routine.
69 */
70void __init __pv_init_lock_hash(void)
71{
72 int pv_hash_size = ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE);
73
74 if (pv_hash_size < PV_HE_MIN)
75 pv_hash_size = PV_HE_MIN;
76
77 /*
78 * Allocate space from bootmem which should be page-size aligned
79 * and hence cacheline aligned.
80 */
81 pv_lock_hash = alloc_large_system_hash("PV qspinlock",
82 sizeof(struct pv_hash_entry),
83 pv_hash_size, 0, HASH_EARLY,
84 &pv_lock_hash_bits, NULL,
85 pv_hash_size, pv_hash_size);
86}
87
88#define for_each_hash_entry(he, offset, hash) \
89 for (hash &= ~(PV_HE_PER_LINE - 1), he = &pv_lock_hash[hash], offset = 0; \
90 offset < (1 << pv_lock_hash_bits); \
91 offset++, he = &pv_lock_hash[(hash + offset) & ((1 << pv_lock_hash_bits) - 1)])
92
93static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
94{
95 unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
96 struct pv_hash_entry *he;
97
98 for_each_hash_entry(he, offset, hash) {
99 if (!cmpxchg(&he->lock, NULL, lock)) {
100 WRITE_ONCE(he->node, node);
101 return &he->lock;
102 }
103 }
104 /*
105 * Hard assume there is a free entry for us.
106 *
107 * This is guaranteed by ensuring every blocked lock only ever consumes
108 * a single entry, and since we only have 4 nesting levels per CPU
109 * and allocated 4*nr_possible_cpus(), this must be so.
110 *
111 * The single entry is guaranteed by having the lock owner unhash
112 * before it releases.
113 */
114 BUG();
115}
116
117static struct pv_node *pv_unhash(struct qspinlock *lock)
118{
119 unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
120 struct pv_hash_entry *he;
121 struct pv_node *node;
122
123 for_each_hash_entry(he, offset, hash) {
124 if (READ_ONCE(he->lock) == lock) {
125 node = READ_ONCE(he->node);
126 WRITE_ONCE(he->lock, NULL);
127 return node;
128 }
129 }
130 /*
131 * Hard assume we'll find an entry.
132 *
133 * This guarantees a limited lookup time and is itself guaranteed by
134 * having the lock owner do the unhash -- IFF the unlock sees the
135 * SLOW flag, there MUST be a hash entry.
136 */
137 BUG();
138}
139
140/*
141 * Initialize the PV part of the mcs_spinlock node.
142 */
143static void pv_init_node(struct mcs_spinlock *node)
144{
145 struct pv_node *pn = (struct pv_node *)node;
146
147 BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock));
148
149 pn->cpu = smp_processor_id();
150 pn->state = vcpu_running;
151}
152
153/*
154 * Wait for node->locked to become true, halt the vcpu after a short spin.
155 * pv_kick_node() is used to wake the vcpu again.
156 */
157static void pv_wait_node(struct mcs_spinlock *node)
158{
159 struct pv_node *pn = (struct pv_node *)node;
160 int loop;
161
162 for (;;) {
163 for (loop = SPIN_THRESHOLD; loop; loop--) {
164 if (READ_ONCE(node->locked))
165 return;
166 cpu_relax();
167 }
168
169 /*
170 * Order pn->state vs pn->locked thusly:
171 *
172 * [S] pn->state = vcpu_halted [S] next->locked = 1
173 * MB MB
174 * [L] pn->locked [RmW] pn->state = vcpu_running
175 *
176 * Matches the xchg() from pv_kick_node().
177 */
178 smp_store_mb(pn->state, vcpu_halted);
179
180 if (!READ_ONCE(node->locked))
181 pv_wait(&pn->state, vcpu_halted);
182
183 /*
184 * Reset the vCPU state to avoid unncessary CPU kicking
185 */
186 WRITE_ONCE(pn->state, vcpu_running);
187
188 /*
189 * If the locked flag is still not set after wakeup, it is a
190 * spurious wakeup and the vCPU should wait again. However,
191 * there is a pretty high overhead for CPU halting and kicking.
192 * So it is better to spin for a while in the hope that the
193 * MCS lock will be released soon.
194 */
195 }
196 /*
197 * By now our node->locked should be 1 and our caller will not actually
198 * spin-wait for it. We do however rely on our caller to do a
199 * load-acquire for us.
200 */
201}
202
203/*
204 * Called after setting next->locked = 1, used to wake those stuck in
205 * pv_wait_node().
206 */
207static void pv_kick_node(struct mcs_spinlock *node)
208{
209 struct pv_node *pn = (struct pv_node *)node;
210
211 /*
212 * Note that because node->locked is already set, this actual
213 * mcs_spinlock entry could be re-used already.
214 *
215 * This should be fine however, kicking people for no reason is
216 * harmless.
217 *
218 * See the comment in pv_wait_node().
219 */
220 if (xchg(&pn->state, vcpu_running) == vcpu_halted)
221 pv_kick(pn->cpu);
222}
223
224/*
225 * Wait for l->locked to become clear; halt the vcpu after a short spin.
226 * __pv_queued_spin_unlock() will wake us.
227 */
228static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
229{
230 struct pv_node *pn = (struct pv_node *)node;
231 struct __qspinlock *l = (void *)lock;
232 struct qspinlock **lp = NULL;
233 int loop;
234
235 for (;;) {
236 for (loop = SPIN_THRESHOLD; loop; loop--) {
237 if (!READ_ONCE(l->locked))
238 return;
239 cpu_relax();
240 }
241
242 WRITE_ONCE(pn->state, vcpu_halted);
243 if (!lp) { /* ONCE */
244 lp = pv_hash(lock, pn);
245 /*
246 * lp must be set before setting _Q_SLOW_VAL
247 *
248 * [S] lp = lock [RmW] l = l->locked = 0
249 * MB MB
250 * [S] l->locked = _Q_SLOW_VAL [L] lp
251 *
252 * Matches the cmpxchg() in __pv_queued_spin_unlock().
253 */
254 if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) {
255 /*
256 * The lock is free and _Q_SLOW_VAL has never
257 * been set. Therefore we need to unhash before
258 * getting the lock.
259 */
260 WRITE_ONCE(*lp, NULL);
261 return;
262 }
263 }
264 pv_wait(&l->locked, _Q_SLOW_VAL);
265
266 /*
267 * The unlocker should have freed the lock before kicking the
268 * CPU. So if the lock is still not free, it is a spurious
269 * wakeup and so the vCPU should wait again after spinning for
270 * a while.
271 */
272 }
273
274 /*
275 * Lock is unlocked now; the caller will acquire it without waiting.
276 * As with pv_wait_node() we rely on the caller to do a load-acquire
277 * for us.
278 */
279}
280
281/*
282 * PV version of the unlock function to be used in stead of
283 * queued_spin_unlock().
284 */
285__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
286{
287 struct __qspinlock *l = (void *)lock;
288 struct pv_node *node;
289
290 /*
291 * We must not unlock if SLOW, because in that case we must first
292 * unhash. Otherwise it would be possible to have multiple @lock
293 * entries, which would be BAD.
294 */
295 if (likely(cmpxchg(&l->locked, _Q_LOCKED_VAL, 0) == _Q_LOCKED_VAL))
296 return;
297
298 /*
299 * Since the above failed to release, this must be the SLOW path.
300 * Therefore start by looking up the blocked node and unhashing it.
301 */
302 node = pv_unhash(lock);
303
304 /*
305 * Now that we have a reference to the (likely) blocked pv_node,
306 * release the lock.
307 */
308 smp_store_release(&l->locked, 0);
309
310 /*
311 * At this point the memory pointed at by lock can be freed/reused,
312 * however we can still use the pv_node to kick the CPU.
313 */
314 if (READ_ONCE(node->state) == vcpu_halted)
315 pv_kick(node->cpu);
316}
317/*
318 * Include the architecture specific callee-save thunk of the
319 * __pv_queued_spin_unlock(). This thunk is put together with
320 * __pv_queued_spin_unlock() near the top of the file to make sure
321 * that the callee-save thunk and the real unlock function are close
322 * to each other sharing consecutive instruction cachelines.
323 */
324#include <asm/qspinlock_paravirt.h>
325
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index b025295f4966..30ec5b46cd8c 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -70,10 +70,10 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
70} 70}
71 71
72/* 72/*
73 * We can speed up the acquire/release, if the architecture 73 * We can speed up the acquire/release, if there's no debugging state to be
74 * supports cmpxchg and if there's no debugging state to be set up 74 * set up.
75 */ 75 */
76#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES) 76#ifndef CONFIG_DEBUG_RT_MUTEXES
77# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c) 77# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
78static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 78static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
79{ 79{
@@ -1443,10 +1443,17 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
1443 * 1443 *
1444 * @lock: the rt_mutex to be locked 1444 * @lock: the rt_mutex to be locked
1445 * 1445 *
1446 * This function can only be called in thread context. It's safe to
1447 * call it from atomic regions, but not from hard interrupt or soft
1448 * interrupt context.
1449 *
1446 * Returns 1 on success and 0 on contention 1450 * Returns 1 on success and 0 on contention
1447 */ 1451 */
1448int __sched rt_mutex_trylock(struct rt_mutex *lock) 1452int __sched rt_mutex_trylock(struct rt_mutex *lock)
1449{ 1453{
1454 if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
1455 return 0;
1456
1450 return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); 1457 return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
1451} 1458}
1452EXPORT_SYMBOL_GPL(rt_mutex_trylock); 1459EXPORT_SYMBOL_GPL(rt_mutex_trylock);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 3417d0172a5d..0f189714e457 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -409,11 +409,24 @@ done:
409 return taken; 409 return taken;
410} 410}
411 411
412/*
413 * Return true if the rwsem has active spinner
414 */
415static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
416{
417 return osq_is_locked(&sem->osq);
418}
419
412#else 420#else
413static bool rwsem_optimistic_spin(struct rw_semaphore *sem) 421static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
414{ 422{
415 return false; 423 return false;
416} 424}
425
426static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
427{
428 return false;
429}
417#endif 430#endif
418 431
419/* 432/*
@@ -496,7 +509,38 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
496{ 509{
497 unsigned long flags; 510 unsigned long flags;
498 511
512 /*
513 * If a spinner is present, it is not necessary to do the wakeup.
514 * Try to do wakeup only if the trylock succeeds to minimize
515 * spinlock contention which may introduce too much delay in the
516 * unlock operation.
517 *
518 * spinning writer up_write/up_read caller
519 * --------------- -----------------------
520 * [S] osq_unlock() [L] osq
521 * MB RMB
522 * [RmW] rwsem_try_write_lock() [RmW] spin_trylock(wait_lock)
523 *
524 * Here, it is important to make sure that there won't be a missed
525 * wakeup while the rwsem is free and the only spinning writer goes
526 * to sleep without taking the rwsem. Even when the spinning writer
527 * is just going to break out of the waiting loop, it will still do
528 * a trylock in rwsem_down_write_failed() before sleeping. IOW, if
529 * rwsem_has_spinner() is true, it will guarantee at least one
530 * trylock attempt on the rwsem later on.
531 */
532 if (rwsem_has_spinner(sem)) {
533 /*
534 * The smp_rmb() here is to make sure that the spinner
535 * state is consulted before reading the wait_lock.
536 */
537 smp_rmb();
538 if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
539 return sem;
540 goto locked;
541 }
499 raw_spin_lock_irqsave(&sem->wait_lock, flags); 542 raw_spin_lock_irqsave(&sem->wait_lock, flags);
543locked:
500 544
501 /* do nothing if list empty */ 545 /* do nothing if list empty */
502 if (!list_empty(&sem->wait_list)) 546 if (!list_empty(&sem->wait_list))
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 8dbe27611ec3..59e32684c23b 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -241,6 +241,7 @@ rcu_torture_free(struct rcu_torture *p)
241struct rcu_torture_ops { 241struct rcu_torture_ops {
242 int ttype; 242 int ttype;
243 void (*init)(void); 243 void (*init)(void);
244 void (*cleanup)(void);
244 int (*readlock)(void); 245 int (*readlock)(void);
245 void (*read_delay)(struct torture_random_state *rrsp); 246 void (*read_delay)(struct torture_random_state *rrsp);
246 void (*readunlock)(int idx); 247 void (*readunlock)(int idx);
@@ -477,10 +478,12 @@ static struct rcu_torture_ops rcu_busted_ops = {
477 */ 478 */
478 479
479DEFINE_STATIC_SRCU(srcu_ctl); 480DEFINE_STATIC_SRCU(srcu_ctl);
481static struct srcu_struct srcu_ctld;
482static struct srcu_struct *srcu_ctlp = &srcu_ctl;
480 483
481static int srcu_torture_read_lock(void) __acquires(&srcu_ctl) 484static int srcu_torture_read_lock(void) __acquires(srcu_ctlp)
482{ 485{
483 return srcu_read_lock(&srcu_ctl); 486 return srcu_read_lock(srcu_ctlp);
484} 487}
485 488
486static void srcu_read_delay(struct torture_random_state *rrsp) 489static void srcu_read_delay(struct torture_random_state *rrsp)
@@ -499,49 +502,49 @@ static void srcu_read_delay(struct torture_random_state *rrsp)
499 rcu_read_delay(rrsp); 502 rcu_read_delay(rrsp);
500} 503}
501 504
502static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) 505static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp)
503{ 506{
504 srcu_read_unlock(&srcu_ctl, idx); 507 srcu_read_unlock(srcu_ctlp, idx);
505} 508}
506 509
507static unsigned long srcu_torture_completed(void) 510static unsigned long srcu_torture_completed(void)
508{ 511{
509 return srcu_batches_completed(&srcu_ctl); 512 return srcu_batches_completed(srcu_ctlp);
510} 513}
511 514
512static void srcu_torture_deferred_free(struct rcu_torture *rp) 515static void srcu_torture_deferred_free(struct rcu_torture *rp)
513{ 516{
514 call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb); 517 call_srcu(srcu_ctlp, &rp->rtort_rcu, rcu_torture_cb);
515} 518}
516 519
517static void srcu_torture_synchronize(void) 520static void srcu_torture_synchronize(void)
518{ 521{
519 synchronize_srcu(&srcu_ctl); 522 synchronize_srcu(srcu_ctlp);
520} 523}
521 524
522static void srcu_torture_call(struct rcu_head *head, 525static void srcu_torture_call(struct rcu_head *head,
523 void (*func)(struct rcu_head *head)) 526 void (*func)(struct rcu_head *head))
524{ 527{
525 call_srcu(&srcu_ctl, head, func); 528 call_srcu(srcu_ctlp, head, func);
526} 529}
527 530
528static void srcu_torture_barrier(void) 531static void srcu_torture_barrier(void)
529{ 532{
530 srcu_barrier(&srcu_ctl); 533 srcu_barrier(srcu_ctlp);
531} 534}
532 535
533static void srcu_torture_stats(void) 536static void srcu_torture_stats(void)
534{ 537{
535 int cpu; 538 int cpu;
536 int idx = srcu_ctl.completed & 0x1; 539 int idx = srcu_ctlp->completed & 0x1;
537 540
538 pr_alert("%s%s per-CPU(idx=%d):", 541 pr_alert("%s%s per-CPU(idx=%d):",
539 torture_type, TORTURE_FLAG, idx); 542 torture_type, TORTURE_FLAG, idx);
540 for_each_possible_cpu(cpu) { 543 for_each_possible_cpu(cpu) {
541 long c0, c1; 544 long c0, c1;
542 545
543 c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx]; 546 c0 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[!idx];
544 c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]; 547 c1 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[idx];
545 pr_cont(" %d(%ld,%ld)", cpu, c0, c1); 548 pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
546 } 549 }
547 pr_cont("\n"); 550 pr_cont("\n");
@@ -549,7 +552,7 @@ static void srcu_torture_stats(void)
549 552
550static void srcu_torture_synchronize_expedited(void) 553static void srcu_torture_synchronize_expedited(void)
551{ 554{
552 synchronize_srcu_expedited(&srcu_ctl); 555 synchronize_srcu_expedited(srcu_ctlp);
553} 556}
554 557
555static struct rcu_torture_ops srcu_ops = { 558static struct rcu_torture_ops srcu_ops = {
@@ -569,6 +572,38 @@ static struct rcu_torture_ops srcu_ops = {
569 .name = "srcu" 572 .name = "srcu"
570}; 573};
571 574
575static void srcu_torture_init(void)
576{
577 rcu_sync_torture_init();
578 WARN_ON(init_srcu_struct(&srcu_ctld));
579 srcu_ctlp = &srcu_ctld;
580}
581
582static void srcu_torture_cleanup(void)
583{
584 cleanup_srcu_struct(&srcu_ctld);
585 srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */
586}
587
588/* As above, but dynamically allocated. */
589static struct rcu_torture_ops srcud_ops = {
590 .ttype = SRCU_FLAVOR,
591 .init = srcu_torture_init,
592 .cleanup = srcu_torture_cleanup,
593 .readlock = srcu_torture_read_lock,
594 .read_delay = srcu_read_delay,
595 .readunlock = srcu_torture_read_unlock,
596 .started = NULL,
597 .completed = srcu_torture_completed,
598 .deferred_free = srcu_torture_deferred_free,
599 .sync = srcu_torture_synchronize,
600 .exp_sync = srcu_torture_synchronize_expedited,
601 .call = srcu_torture_call,
602 .cb_barrier = srcu_torture_barrier,
603 .stats = srcu_torture_stats,
604 .name = "srcud"
605};
606
572/* 607/*
573 * Definitions for sched torture testing. 608 * Definitions for sched torture testing.
574 */ 609 */
@@ -672,8 +707,8 @@ static void rcu_torture_boost_cb(struct rcu_head *head)
672 struct rcu_boost_inflight *rbip = 707 struct rcu_boost_inflight *rbip =
673 container_of(head, struct rcu_boost_inflight, rcu); 708 container_of(head, struct rcu_boost_inflight, rcu);
674 709
675 smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */ 710 /* Ensure RCU-core accesses precede clearing ->inflight */
676 rbip->inflight = 0; 711 smp_store_release(&rbip->inflight, 0);
677} 712}
678 713
679static int rcu_torture_boost(void *arg) 714static int rcu_torture_boost(void *arg)
@@ -710,9 +745,9 @@ static int rcu_torture_boost(void *arg)
710 call_rcu_time = jiffies; 745 call_rcu_time = jiffies;
711 while (ULONG_CMP_LT(jiffies, endtime)) { 746 while (ULONG_CMP_LT(jiffies, endtime)) {
712 /* If we don't have a callback in flight, post one. */ 747 /* If we don't have a callback in flight, post one. */
713 if (!rbi.inflight) { 748 if (!smp_load_acquire(&rbi.inflight)) {
714 smp_mb(); /* RCU core before ->inflight = 1. */ 749 /* RCU core before ->inflight = 1. */
715 rbi.inflight = 1; 750 smp_store_release(&rbi.inflight, 1);
716 call_rcu(&rbi.rcu, rcu_torture_boost_cb); 751 call_rcu(&rbi.rcu, rcu_torture_boost_cb);
717 if (jiffies - call_rcu_time > 752 if (jiffies - call_rcu_time >
718 test_boost_duration * HZ - HZ / 2) { 753 test_boost_duration * HZ - HZ / 2) {
@@ -751,11 +786,10 @@ checkwait: stutter_wait("rcu_torture_boost");
751 } while (!torture_must_stop()); 786 } while (!torture_must_stop());
752 787
753 /* Clean up and exit. */ 788 /* Clean up and exit. */
754 while (!kthread_should_stop() || rbi.inflight) { 789 while (!kthread_should_stop() || smp_load_acquire(&rbi.inflight)) {
755 torture_shutdown_absorb("rcu_torture_boost"); 790 torture_shutdown_absorb("rcu_torture_boost");
756 schedule_timeout_uninterruptible(1); 791 schedule_timeout_uninterruptible(1);
757 } 792 }
758 smp_mb(); /* order accesses to ->inflight before stack-frame death. */
759 destroy_rcu_head_on_stack(&rbi.rcu); 793 destroy_rcu_head_on_stack(&rbi.rcu);
760 torture_kthread_stopping("rcu_torture_boost"); 794 torture_kthread_stopping("rcu_torture_boost");
761 return 0; 795 return 0;
@@ -1054,7 +1088,7 @@ static void rcu_torture_timer(unsigned long unused)
1054 p = rcu_dereference_check(rcu_torture_current, 1088 p = rcu_dereference_check(rcu_torture_current,
1055 rcu_read_lock_bh_held() || 1089 rcu_read_lock_bh_held() ||
1056 rcu_read_lock_sched_held() || 1090 rcu_read_lock_sched_held() ||
1057 srcu_read_lock_held(&srcu_ctl)); 1091 srcu_read_lock_held(srcu_ctlp));
1058 if (p == NULL) { 1092 if (p == NULL) {
1059 /* Leave because rcu_torture_writer is not yet underway */ 1093 /* Leave because rcu_torture_writer is not yet underway */
1060 cur_ops->readunlock(idx); 1094 cur_ops->readunlock(idx);
@@ -1128,7 +1162,7 @@ rcu_torture_reader(void *arg)
1128 p = rcu_dereference_check(rcu_torture_current, 1162 p = rcu_dereference_check(rcu_torture_current,
1129 rcu_read_lock_bh_held() || 1163 rcu_read_lock_bh_held() ||
1130 rcu_read_lock_sched_held() || 1164 rcu_read_lock_sched_held() ||
1131 srcu_read_lock_held(&srcu_ctl)); 1165 srcu_read_lock_held(srcu_ctlp));
1132 if (p == NULL) { 1166 if (p == NULL) {
1133 /* Wait for rcu_torture_writer to get underway */ 1167 /* Wait for rcu_torture_writer to get underway */
1134 cur_ops->readunlock(idx); 1168 cur_ops->readunlock(idx);
@@ -1413,12 +1447,15 @@ static int rcu_torture_barrier_cbs(void *arg)
1413 do { 1447 do {
1414 wait_event(barrier_cbs_wq[myid], 1448 wait_event(barrier_cbs_wq[myid],
1415 (newphase = 1449 (newphase =
1416 ACCESS_ONCE(barrier_phase)) != lastphase || 1450 smp_load_acquire(&barrier_phase)) != lastphase ||
1417 torture_must_stop()); 1451 torture_must_stop());
1418 lastphase = newphase; 1452 lastphase = newphase;
1419 smp_mb(); /* ensure barrier_phase load before ->call(). */
1420 if (torture_must_stop()) 1453 if (torture_must_stop())
1421 break; 1454 break;
1455 /*
1456 * The above smp_load_acquire() ensures barrier_phase load
1457 * is ordered before the folloiwng ->call().
1458 */
1422 cur_ops->call(&rcu, rcu_torture_barrier_cbf); 1459 cur_ops->call(&rcu, rcu_torture_barrier_cbf);
1423 if (atomic_dec_and_test(&barrier_cbs_count)) 1460 if (atomic_dec_and_test(&barrier_cbs_count))
1424 wake_up(&barrier_wq); 1461 wake_up(&barrier_wq);
@@ -1439,8 +1476,8 @@ static int rcu_torture_barrier(void *arg)
1439 do { 1476 do {
1440 atomic_set(&barrier_cbs_invoked, 0); 1477 atomic_set(&barrier_cbs_invoked, 0);
1441 atomic_set(&barrier_cbs_count, n_barrier_cbs); 1478 atomic_set(&barrier_cbs_count, n_barrier_cbs);
1442 smp_mb(); /* Ensure barrier_phase after prior assignments. */ 1479 /* Ensure barrier_phase ordered after prior assignments. */
1443 barrier_phase = !barrier_phase; 1480 smp_store_release(&barrier_phase, !barrier_phase);
1444 for (i = 0; i < n_barrier_cbs; i++) 1481 for (i = 0; i < n_barrier_cbs; i++)
1445 wake_up(&barrier_cbs_wq[i]); 1482 wake_up(&barrier_cbs_wq[i]);
1446 wait_event(barrier_wq, 1483 wait_event(barrier_wq,
@@ -1588,10 +1625,14 @@ rcu_torture_cleanup(void)
1588 rcutorture_booster_cleanup(i); 1625 rcutorture_booster_cleanup(i);
1589 } 1626 }
1590 1627
1591 /* Wait for all RCU callbacks to fire. */ 1628 /*
1592 1629 * Wait for all RCU callbacks to fire, then do flavor-specific
1630 * cleanup operations.
1631 */
1593 if (cur_ops->cb_barrier != NULL) 1632 if (cur_ops->cb_barrier != NULL)
1594 cur_ops->cb_barrier(); 1633 cur_ops->cb_barrier();
1634 if (cur_ops->cleanup != NULL)
1635 cur_ops->cleanup();
1595 1636
1596 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ 1637 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
1597 1638
@@ -1668,8 +1709,8 @@ rcu_torture_init(void)
1668 int cpu; 1709 int cpu;
1669 int firsterr = 0; 1710 int firsterr = 0;
1670 static struct rcu_torture_ops *torture_ops[] = { 1711 static struct rcu_torture_ops *torture_ops[] = {
1671 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, 1712 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
1672 RCUTORTURE_TASKS_OPS 1713 &sched_ops, RCUTORTURE_TASKS_OPS
1673 }; 1714 };
1674 1715
1675 if (!torture_init_begin(torture_type, verbose, &torture_runnable)) 1716 if (!torture_init_begin(torture_type, verbose, &torture_runnable))
@@ -1701,7 +1742,7 @@ rcu_torture_init(void)
1701 if (nreaders >= 0) { 1742 if (nreaders >= 0) {
1702 nrealreaders = nreaders; 1743 nrealreaders = nreaders;
1703 } else { 1744 } else {
1704 nrealreaders = num_online_cpus() - 1; 1745 nrealreaders = num_online_cpus() - 2 - nreaders;
1705 if (nrealreaders <= 0) 1746 if (nrealreaders <= 0)
1706 nrealreaders = 1; 1747 nrealreaders = 1;
1707 } 1748 }
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index cad76e76b4e7..fb33d35ee0b7 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -151,7 +151,7 @@ static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx)
151 unsigned long t; 151 unsigned long t;
152 152
153 for_each_possible_cpu(cpu) { 153 for_each_possible_cpu(cpu) {
154 t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]); 154 t = READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]);
155 sum += t; 155 sum += t;
156 } 156 }
157 return sum; 157 return sum;
@@ -168,7 +168,7 @@ static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx)
168 unsigned long t; 168 unsigned long t;
169 169
170 for_each_possible_cpu(cpu) { 170 for_each_possible_cpu(cpu) {
171 t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]); 171 t = READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]);
172 sum += t; 172 sum += t;
173 } 173 }
174 return sum; 174 return sum;
@@ -265,8 +265,8 @@ static int srcu_readers_active(struct srcu_struct *sp)
265 unsigned long sum = 0; 265 unsigned long sum = 0;
266 266
267 for_each_possible_cpu(cpu) { 267 for_each_possible_cpu(cpu) {
268 sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]); 268 sum += READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]);
269 sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]); 269 sum += READ_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]);
270 } 270 }
271 return sum; 271 return sum;
272} 272}
@@ -296,7 +296,7 @@ int __srcu_read_lock(struct srcu_struct *sp)
296{ 296{
297 int idx; 297 int idx;
298 298
299 idx = ACCESS_ONCE(sp->completed) & 0x1; 299 idx = READ_ONCE(sp->completed) & 0x1;
300 preempt_disable(); 300 preempt_disable();
301 __this_cpu_inc(sp->per_cpu_ref->c[idx]); 301 __this_cpu_inc(sp->per_cpu_ref->c[idx]);
302 smp_mb(); /* B */ /* Avoid leaking the critical section. */ 302 smp_mb(); /* B */ /* Avoid leaking the critical section. */
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 069742d61c68..591af0cb7b9f 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -49,39 +49,6 @@ static void __call_rcu(struct rcu_head *head,
49 49
50#include "tiny_plugin.h" 50#include "tiny_plugin.h"
51 51
52/*
53 * Enter idle, which is an extended quiescent state if we have fully
54 * entered that mode.
55 */
56void rcu_idle_enter(void)
57{
58}
59EXPORT_SYMBOL_GPL(rcu_idle_enter);
60
61/*
62 * Exit an interrupt handler towards idle.
63 */
64void rcu_irq_exit(void)
65{
66}
67EXPORT_SYMBOL_GPL(rcu_irq_exit);
68
69/*
70 * Exit idle, so that we are no longer in an extended quiescent state.
71 */
72void rcu_idle_exit(void)
73{
74}
75EXPORT_SYMBOL_GPL(rcu_idle_exit);
76
77/*
78 * Enter an interrupt handler, moving away from idle.
79 */
80void rcu_irq_enter(void)
81{
82}
83EXPORT_SYMBOL_GPL(rcu_irq_enter);
84
85#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) 52#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
86 53
87/* 54/*
@@ -170,6 +137,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
170 137
171 /* Move the ready-to-invoke callbacks to a local list. */ 138 /* Move the ready-to-invoke callbacks to a local list. */
172 local_irq_save(flags); 139 local_irq_save(flags);
140 if (rcp->donetail == &rcp->rcucblist) {
141 /* No callbacks ready, so just leave. */
142 local_irq_restore(flags);
143 return;
144 }
173 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); 145 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1));
174 list = rcp->rcucblist; 146 list = rcp->rcucblist;
175 rcp->rcucblist = *rcp->donetail; 147 rcp->rcucblist = *rcp->donetail;
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index f94e209a10d6..e492a5253e0f 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -144,16 +144,17 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
144 return; 144 return;
145 rcp->ticks_this_gp++; 145 rcp->ticks_this_gp++;
146 j = jiffies; 146 j = jiffies;
147 js = ACCESS_ONCE(rcp->jiffies_stall); 147 js = READ_ONCE(rcp->jiffies_stall);
148 if (rcp->rcucblist && ULONG_CMP_GE(j, js)) { 148 if (rcp->rcucblist && ULONG_CMP_GE(j, js)) {
149 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n", 149 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
150 rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE, 150 rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE,
151 jiffies - rcp->gp_start, rcp->qlen); 151 jiffies - rcp->gp_start, rcp->qlen);
152 dump_stack(); 152 dump_stack();
153 ACCESS_ONCE(rcp->jiffies_stall) = jiffies + 153 WRITE_ONCE(rcp->jiffies_stall,
154 3 * rcu_jiffies_till_stall_check() + 3; 154 jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
155 } else if (ULONG_CMP_GE(j, js)) { 155 } else if (ULONG_CMP_GE(j, js)) {
156 ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check(); 156 WRITE_ONCE(rcp->jiffies_stall,
157 jiffies + rcu_jiffies_till_stall_check());
157 } 158 }
158} 159}
159 160
@@ -161,7 +162,8 @@ static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
161{ 162{
162 rcp->ticks_this_gp = 0; 163 rcp->ticks_this_gp = 0;
163 rcp->gp_start = jiffies; 164 rcp->gp_start = jiffies;
164 ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check(); 165 WRITE_ONCE(rcp->jiffies_stall,
166 jiffies + rcu_jiffies_till_stall_check());
165} 167}
166 168
167static void check_cpu_stalls(void) 169static void check_cpu_stalls(void)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8cf7304b2867..add042926a66 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -91,7 +91,7 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var
91 91
92#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ 92#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
93DEFINE_RCU_TPS(sname) \ 93DEFINE_RCU_TPS(sname) \
94DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ 94static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \
95struct rcu_state sname##_state = { \ 95struct rcu_state sname##_state = { \
96 .level = { &sname##_state.node[0] }, \ 96 .level = { &sname##_state.node[0] }, \
97 .rda = &sname##_data, \ 97 .rda = &sname##_data, \
@@ -110,11 +110,18 @@ struct rcu_state sname##_state = { \
110RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); 110RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
111RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); 111RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
112 112
113static struct rcu_state *rcu_state_p; 113static struct rcu_state *const rcu_state_p;
114static struct rcu_data __percpu *const rcu_data_p;
114LIST_HEAD(rcu_struct_flavors); 115LIST_HEAD(rcu_struct_flavors);
115 116
116/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ 117/* Dump rcu_node combining tree at boot to verify correct setup. */
117static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; 118static bool dump_tree;
119module_param(dump_tree, bool, 0444);
120/* Control rcu_node-tree auto-balancing at boot time. */
121static bool rcu_fanout_exact;
122module_param(rcu_fanout_exact, bool, 0444);
123/* Increase (but not decrease) the RCU_FANOUT_LEAF at boot time. */
124static int rcu_fanout_leaf = RCU_FANOUT_LEAF;
118module_param(rcu_fanout_leaf, int, 0444); 125module_param(rcu_fanout_leaf, int, 0444);
119int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; 126int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
120static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ 127static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */
@@ -159,17 +166,46 @@ static void invoke_rcu_core(void);
159static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); 166static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
160 167
161/* rcuc/rcub kthread realtime priority */ 168/* rcuc/rcub kthread realtime priority */
169#ifdef CONFIG_RCU_KTHREAD_PRIO
162static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; 170static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
171#else /* #ifdef CONFIG_RCU_KTHREAD_PRIO */
172static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
173#endif /* #else #ifdef CONFIG_RCU_KTHREAD_PRIO */
163module_param(kthread_prio, int, 0644); 174module_param(kthread_prio, int, 0644);
164 175
165/* Delay in jiffies for grace-period initialization delays, debug only. */ 176/* Delay in jiffies for grace-period initialization delays, debug only. */
177
178#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT
179static int gp_preinit_delay = CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY;
180module_param(gp_preinit_delay, int, 0644);
181#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */
182static const int gp_preinit_delay;
183#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */
184
166#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT 185#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT
167static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY; 186static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY;
168module_param(gp_init_delay, int, 0644); 187module_param(gp_init_delay, int, 0644);
169#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ 188#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
170static const int gp_init_delay; 189static const int gp_init_delay;
171#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ 190#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
172#define PER_RCU_NODE_PERIOD 10 /* Number of grace periods between delays. */ 191
192#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP
193static int gp_cleanup_delay = CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY;
194module_param(gp_cleanup_delay, int, 0644);
195#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */
196static const int gp_cleanup_delay;
197#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */
198
199/*
200 * Number of grace periods between delays, normalized by the duration of
201 * the delay. The longer the the delay, the more the grace periods between
202 * each delay. The reason for this normalization is that it means that,
203 * for non-zero delays, the overall slowdown of grace periods is constant
204 * regardless of the duration of the delay. This arrangement balances
205 * the need for long delays to increase some race probabilities with the
206 * need for fast grace periods to increase other race probabilities.
207 */
208#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays. */
173 209
174/* 210/*
175 * Track the rcutorture test sequence number and the update version 211 * Track the rcutorture test sequence number and the update version
@@ -191,17 +227,17 @@ unsigned long rcutorture_vernum;
191 */ 227 */
192unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp) 228unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
193{ 229{
194 return ACCESS_ONCE(rnp->qsmaskinitnext); 230 return READ_ONCE(rnp->qsmaskinitnext);
195} 231}
196 232
197/* 233/*
198 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 234 * Return true if an RCU grace period is in progress. The READ_ONCE()s
199 * permit this function to be invoked without holding the root rcu_node 235 * permit this function to be invoked without holding the root rcu_node
200 * structure's ->lock, but of course results can be subject to change. 236 * structure's ->lock, but of course results can be subject to change.
201 */ 237 */
202static int rcu_gp_in_progress(struct rcu_state *rsp) 238static int rcu_gp_in_progress(struct rcu_state *rsp)
203{ 239{
204 return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum); 240 return READ_ONCE(rsp->completed) != READ_ONCE(rsp->gpnum);
205} 241}
206 242
207/* 243/*
@@ -278,8 +314,8 @@ static void rcu_momentary_dyntick_idle(void)
278 if (!(resched_mask & rsp->flavor_mask)) 314 if (!(resched_mask & rsp->flavor_mask))
279 continue; 315 continue;
280 smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */ 316 smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */
281 if (ACCESS_ONCE(rdp->mynode->completed) != 317 if (READ_ONCE(rdp->mynode->completed) !=
282 ACCESS_ONCE(rdp->cond_resched_completed)) 318 READ_ONCE(rdp->cond_resched_completed))
283 continue; 319 continue;
284 320
285 /* 321 /*
@@ -491,9 +527,9 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
491 break; 527 break;
492 } 528 }
493 if (rsp != NULL) { 529 if (rsp != NULL) {
494 *flags = ACCESS_ONCE(rsp->gp_flags); 530 *flags = READ_ONCE(rsp->gp_flags);
495 *gpnum = ACCESS_ONCE(rsp->gpnum); 531 *gpnum = READ_ONCE(rsp->gpnum);
496 *completed = ACCESS_ONCE(rsp->completed); 532 *completed = READ_ONCE(rsp->completed);
497 return; 533 return;
498 } 534 }
499 *flags = 0; 535 *flags = 0;
@@ -539,10 +575,10 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
539static int rcu_future_needs_gp(struct rcu_state *rsp) 575static int rcu_future_needs_gp(struct rcu_state *rsp)
540{ 576{
541 struct rcu_node *rnp = rcu_get_root(rsp); 577 struct rcu_node *rnp = rcu_get_root(rsp);
542 int idx = (ACCESS_ONCE(rnp->completed) + 1) & 0x1; 578 int idx = (READ_ONCE(rnp->completed) + 1) & 0x1;
543 int *fp = &rnp->need_future_gp[idx]; 579 int *fp = &rnp->need_future_gp[idx];
544 580
545 return ACCESS_ONCE(*fp); 581 return READ_ONCE(*fp);
546} 582}
547 583
548/* 584/*
@@ -565,7 +601,7 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
565 return 1; /* Yes, this CPU has newly registered callbacks. */ 601 return 1; /* Yes, this CPU has newly registered callbacks. */
566 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) 602 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
567 if (rdp->nxttail[i - 1] != rdp->nxttail[i] && 603 if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
568 ULONG_CMP_LT(ACCESS_ONCE(rsp->completed), 604 ULONG_CMP_LT(READ_ONCE(rsp->completed),
569 rdp->nxtcompleted[i])) 605 rdp->nxtcompleted[i]))
570 return 1; /* Yes, CBs for future grace period. */ 606 return 1; /* Yes, CBs for future grace period. */
571 return 0; /* No grace period needed. */ 607 return 0; /* No grace period needed. */
@@ -585,7 +621,8 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
585 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 621 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
586 622
587 trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); 623 trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
588 if (!user && !is_idle_task(current)) { 624 if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
625 !user && !is_idle_task(current)) {
589 struct task_struct *idle __maybe_unused = 626 struct task_struct *idle __maybe_unused =
590 idle_task(smp_processor_id()); 627 idle_task(smp_processor_id());
591 628
@@ -604,7 +641,8 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
604 smp_mb__before_atomic(); /* See above. */ 641 smp_mb__before_atomic(); /* See above. */
605 atomic_inc(&rdtp->dynticks); 642 atomic_inc(&rdtp->dynticks);
606 smp_mb__after_atomic(); /* Force ordering with next sojourn. */ 643 smp_mb__after_atomic(); /* Force ordering with next sojourn. */
607 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 644 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
645 atomic_read(&rdtp->dynticks) & 0x1);
608 rcu_dynticks_task_enter(); 646 rcu_dynticks_task_enter();
609 647
610 /* 648 /*
@@ -630,7 +668,8 @@ static void rcu_eqs_enter(bool user)
630 668
631 rdtp = this_cpu_ptr(&rcu_dynticks); 669 rdtp = this_cpu_ptr(&rcu_dynticks);
632 oldval = rdtp->dynticks_nesting; 670 oldval = rdtp->dynticks_nesting;
633 WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); 671 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
672 (oldval & DYNTICK_TASK_NEST_MASK) == 0);
634 if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) { 673 if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) {
635 rdtp->dynticks_nesting = 0; 674 rdtp->dynticks_nesting = 0;
636 rcu_eqs_enter_common(oldval, user); 675 rcu_eqs_enter_common(oldval, user);
@@ -703,7 +742,8 @@ void rcu_irq_exit(void)
703 rdtp = this_cpu_ptr(&rcu_dynticks); 742 rdtp = this_cpu_ptr(&rcu_dynticks);
704 oldval = rdtp->dynticks_nesting; 743 oldval = rdtp->dynticks_nesting;
705 rdtp->dynticks_nesting--; 744 rdtp->dynticks_nesting--;
706 WARN_ON_ONCE(rdtp->dynticks_nesting < 0); 745 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
746 rdtp->dynticks_nesting < 0);
707 if (rdtp->dynticks_nesting) 747 if (rdtp->dynticks_nesting)
708 trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting); 748 trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
709 else 749 else
@@ -728,10 +768,12 @@ static void rcu_eqs_exit_common(long long oldval, int user)
728 atomic_inc(&rdtp->dynticks); 768 atomic_inc(&rdtp->dynticks);
729 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 769 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
730 smp_mb__after_atomic(); /* See above. */ 770 smp_mb__after_atomic(); /* See above. */
731 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 771 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
772 !(atomic_read(&rdtp->dynticks) & 0x1));
732 rcu_cleanup_after_idle(); 773 rcu_cleanup_after_idle();
733 trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); 774 trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
734 if (!user && !is_idle_task(current)) { 775 if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
776 !user && !is_idle_task(current)) {
735 struct task_struct *idle __maybe_unused = 777 struct task_struct *idle __maybe_unused =
736 idle_task(smp_processor_id()); 778 idle_task(smp_processor_id());
737 779
@@ -755,7 +797,7 @@ static void rcu_eqs_exit(bool user)
755 797
756 rdtp = this_cpu_ptr(&rcu_dynticks); 798 rdtp = this_cpu_ptr(&rcu_dynticks);
757 oldval = rdtp->dynticks_nesting; 799 oldval = rdtp->dynticks_nesting;
758 WARN_ON_ONCE(oldval < 0); 800 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
759 if (oldval & DYNTICK_TASK_NEST_MASK) { 801 if (oldval & DYNTICK_TASK_NEST_MASK) {
760 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; 802 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
761 } else { 803 } else {
@@ -828,7 +870,8 @@ void rcu_irq_enter(void)
828 rdtp = this_cpu_ptr(&rcu_dynticks); 870 rdtp = this_cpu_ptr(&rcu_dynticks);
829 oldval = rdtp->dynticks_nesting; 871 oldval = rdtp->dynticks_nesting;
830 rdtp->dynticks_nesting++; 872 rdtp->dynticks_nesting++;
831 WARN_ON_ONCE(rdtp->dynticks_nesting == 0); 873 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
874 rdtp->dynticks_nesting == 0);
832 if (oldval) 875 if (oldval)
833 trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); 876 trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
834 else 877 else
@@ -1011,9 +1054,9 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
1011 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); 1054 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
1012 return 1; 1055 return 1;
1013 } else { 1056 } else {
1014 if (ULONG_CMP_LT(ACCESS_ONCE(rdp->gpnum) + ULONG_MAX / 4, 1057 if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
1015 rdp->mynode->gpnum)) 1058 rdp->mynode->gpnum))
1016 ACCESS_ONCE(rdp->gpwrap) = true; 1059 WRITE_ONCE(rdp->gpwrap, true);
1017 return 0; 1060 return 0;
1018 } 1061 }
1019} 1062}
@@ -1093,12 +1136,12 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
1093 if (ULONG_CMP_GE(jiffies, 1136 if (ULONG_CMP_GE(jiffies,
1094 rdp->rsp->gp_start + jiffies_till_sched_qs) || 1137 rdp->rsp->gp_start + jiffies_till_sched_qs) ||
1095 ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { 1138 ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
1096 if (!(ACCESS_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) { 1139 if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) {
1097 ACCESS_ONCE(rdp->cond_resched_completed) = 1140 WRITE_ONCE(rdp->cond_resched_completed,
1098 ACCESS_ONCE(rdp->mynode->completed); 1141 READ_ONCE(rdp->mynode->completed));
1099 smp_mb(); /* ->cond_resched_completed before *rcrmp. */ 1142 smp_mb(); /* ->cond_resched_completed before *rcrmp. */
1100 ACCESS_ONCE(*rcrmp) = 1143 WRITE_ONCE(*rcrmp,
1101 ACCESS_ONCE(*rcrmp) + rdp->rsp->flavor_mask; 1144 READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask);
1102 resched_cpu(rdp->cpu); /* Force CPU into scheduler. */ 1145 resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
1103 rdp->rsp->jiffies_resched += 5; /* Enable beating. */ 1146 rdp->rsp->jiffies_resched += 5; /* Enable beating. */
1104 } else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { 1147 } else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
@@ -1119,9 +1162,9 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
1119 rsp->gp_start = j; 1162 rsp->gp_start = j;
1120 smp_wmb(); /* Record start time before stall time. */ 1163 smp_wmb(); /* Record start time before stall time. */
1121 j1 = rcu_jiffies_till_stall_check(); 1164 j1 = rcu_jiffies_till_stall_check();
1122 ACCESS_ONCE(rsp->jiffies_stall) = j + j1; 1165 WRITE_ONCE(rsp->jiffies_stall, j + j1);
1123 rsp->jiffies_resched = j + j1 / 2; 1166 rsp->jiffies_resched = j + j1 / 2;
1124 rsp->n_force_qs_gpstart = ACCESS_ONCE(rsp->n_force_qs); 1167 rsp->n_force_qs_gpstart = READ_ONCE(rsp->n_force_qs);
1125} 1168}
1126 1169
1127/* 1170/*
@@ -1133,10 +1176,11 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
1133 unsigned long j; 1176 unsigned long j;
1134 1177
1135 j = jiffies; 1178 j = jiffies;
1136 gpa = ACCESS_ONCE(rsp->gp_activity); 1179 gpa = READ_ONCE(rsp->gp_activity);
1137 if (j - gpa > 2 * HZ) 1180 if (j - gpa > 2 * HZ)
1138 pr_err("%s kthread starved for %ld jiffies!\n", 1181 pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x\n",
1139 rsp->name, j - gpa); 1182 rsp->name, j - gpa,
1183 rsp->gpnum, rsp->completed, rsp->gp_flags);
1140} 1184}
1141 1185
1142/* 1186/*
@@ -1173,12 +1217,13 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
1173 /* Only let one CPU complain about others per time interval. */ 1217 /* Only let one CPU complain about others per time interval. */
1174 1218
1175 raw_spin_lock_irqsave(&rnp->lock, flags); 1219 raw_spin_lock_irqsave(&rnp->lock, flags);
1176 delta = jiffies - ACCESS_ONCE(rsp->jiffies_stall); 1220 delta = jiffies - READ_ONCE(rsp->jiffies_stall);
1177 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { 1221 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
1178 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1222 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1179 return; 1223 return;
1180 } 1224 }
1181 ACCESS_ONCE(rsp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; 1225 WRITE_ONCE(rsp->jiffies_stall,
1226 jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
1182 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1227 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1183 1228
1184 /* 1229 /*
@@ -1212,12 +1257,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
1212 if (ndetected) { 1257 if (ndetected) {
1213 rcu_dump_cpu_stacks(rsp); 1258 rcu_dump_cpu_stacks(rsp);
1214 } else { 1259 } else {
1215 if (ACCESS_ONCE(rsp->gpnum) != gpnum || 1260 if (READ_ONCE(rsp->gpnum) != gpnum ||
1216 ACCESS_ONCE(rsp->completed) == gpnum) { 1261 READ_ONCE(rsp->completed) == gpnum) {
1217 pr_err("INFO: Stall ended before state dump start\n"); 1262 pr_err("INFO: Stall ended before state dump start\n");
1218 } else { 1263 } else {
1219 j = jiffies; 1264 j = jiffies;
1220 gpa = ACCESS_ONCE(rsp->gp_activity); 1265 gpa = READ_ONCE(rsp->gp_activity);
1221 pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", 1266 pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
1222 rsp->name, j - gpa, j, gpa, 1267 rsp->name, j - gpa, j, gpa,
1223 jiffies_till_next_fqs, 1268 jiffies_till_next_fqs,
@@ -1262,9 +1307,9 @@ static void print_cpu_stall(struct rcu_state *rsp)
1262 rcu_dump_cpu_stacks(rsp); 1307 rcu_dump_cpu_stacks(rsp);
1263 1308
1264 raw_spin_lock_irqsave(&rnp->lock, flags); 1309 raw_spin_lock_irqsave(&rnp->lock, flags);
1265 if (ULONG_CMP_GE(jiffies, ACCESS_ONCE(rsp->jiffies_stall))) 1310 if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))
1266 ACCESS_ONCE(rsp->jiffies_stall) = jiffies + 1311 WRITE_ONCE(rsp->jiffies_stall,
1267 3 * rcu_jiffies_till_stall_check() + 3; 1312 jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
1268 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1313 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1269 1314
1270 /* 1315 /*
@@ -1307,20 +1352,20 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
1307 * Given this check, comparisons of jiffies, rsp->jiffies_stall, 1352 * Given this check, comparisons of jiffies, rsp->jiffies_stall,
1308 * and rsp->gp_start suffice to forestall false positives. 1353 * and rsp->gp_start suffice to forestall false positives.
1309 */ 1354 */
1310 gpnum = ACCESS_ONCE(rsp->gpnum); 1355 gpnum = READ_ONCE(rsp->gpnum);
1311 smp_rmb(); /* Pick up ->gpnum first... */ 1356 smp_rmb(); /* Pick up ->gpnum first... */
1312 js = ACCESS_ONCE(rsp->jiffies_stall); 1357 js = READ_ONCE(rsp->jiffies_stall);
1313 smp_rmb(); /* ...then ->jiffies_stall before the rest... */ 1358 smp_rmb(); /* ...then ->jiffies_stall before the rest... */
1314 gps = ACCESS_ONCE(rsp->gp_start); 1359 gps = READ_ONCE(rsp->gp_start);
1315 smp_rmb(); /* ...and finally ->gp_start before ->completed. */ 1360 smp_rmb(); /* ...and finally ->gp_start before ->completed. */
1316 completed = ACCESS_ONCE(rsp->completed); 1361 completed = READ_ONCE(rsp->completed);
1317 if (ULONG_CMP_GE(completed, gpnum) || 1362 if (ULONG_CMP_GE(completed, gpnum) ||
1318 ULONG_CMP_LT(j, js) || 1363 ULONG_CMP_LT(j, js) ||
1319 ULONG_CMP_GE(gps, js)) 1364 ULONG_CMP_GE(gps, js))
1320 return; /* No stall or GP completed since entering function. */ 1365 return; /* No stall or GP completed since entering function. */
1321 rnp = rdp->mynode; 1366 rnp = rdp->mynode;
1322 if (rcu_gp_in_progress(rsp) && 1367 if (rcu_gp_in_progress(rsp) &&
1323 (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask)) { 1368 (READ_ONCE(rnp->qsmask) & rdp->grpmask)) {
1324 1369
1325 /* We haven't checked in, so go dump stack. */ 1370 /* We haven't checked in, so go dump stack. */
1326 print_cpu_stall(rsp); 1371 print_cpu_stall(rsp);
@@ -1347,7 +1392,7 @@ void rcu_cpu_stall_reset(void)
1347 struct rcu_state *rsp; 1392 struct rcu_state *rsp;
1348 1393
1349 for_each_rcu_flavor(rsp) 1394 for_each_rcu_flavor(rsp)
1350 ACCESS_ONCE(rsp->jiffies_stall) = jiffies + ULONG_MAX / 2; 1395 WRITE_ONCE(rsp->jiffies_stall, jiffies + ULONG_MAX / 2);
1351} 1396}
1352 1397
1353/* 1398/*
@@ -1457,7 +1502,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1457 * doing some extra useless work. 1502 * doing some extra useless work.
1458 */ 1503 */
1459 if (rnp->gpnum != rnp->completed || 1504 if (rnp->gpnum != rnp->completed ||
1460 ACCESS_ONCE(rnp_root->gpnum) != ACCESS_ONCE(rnp_root->completed)) { 1505 READ_ONCE(rnp_root->gpnum) != READ_ONCE(rnp_root->completed)) {
1461 rnp->need_future_gp[c & 0x1]++; 1506 rnp->need_future_gp[c & 0x1]++;
1462 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf")); 1507 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
1463 goto out; 1508 goto out;
@@ -1542,7 +1587,7 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
1542static void rcu_gp_kthread_wake(struct rcu_state *rsp) 1587static void rcu_gp_kthread_wake(struct rcu_state *rsp)
1543{ 1588{
1544 if (current == rsp->gp_kthread || 1589 if (current == rsp->gp_kthread ||
1545 !ACCESS_ONCE(rsp->gp_flags) || 1590 !READ_ONCE(rsp->gp_flags) ||
1546 !rsp->gp_kthread) 1591 !rsp->gp_kthread)
1547 return; 1592 return;
1548 wake_up(&rsp->gp_wq); 1593 wake_up(&rsp->gp_wq);
@@ -1677,7 +1722,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1677 1722
1678 /* Handle the ends of any preceding grace periods first. */ 1723 /* Handle the ends of any preceding grace periods first. */
1679 if (rdp->completed == rnp->completed && 1724 if (rdp->completed == rnp->completed &&
1680 !unlikely(ACCESS_ONCE(rdp->gpwrap))) { 1725 !unlikely(READ_ONCE(rdp->gpwrap))) {
1681 1726
1682 /* No grace period end, so just accelerate recent callbacks. */ 1727 /* No grace period end, so just accelerate recent callbacks. */
1683 ret = rcu_accelerate_cbs(rsp, rnp, rdp); 1728 ret = rcu_accelerate_cbs(rsp, rnp, rdp);
@@ -1692,7 +1737,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1692 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend")); 1737 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
1693 } 1738 }
1694 1739
1695 if (rdp->gpnum != rnp->gpnum || unlikely(ACCESS_ONCE(rdp->gpwrap))) { 1740 if (rdp->gpnum != rnp->gpnum || unlikely(READ_ONCE(rdp->gpwrap))) {
1696 /* 1741 /*
1697 * If the current grace period is waiting for this CPU, 1742 * If the current grace period is waiting for this CPU,
1698 * set up to detect a quiescent state, otherwise don't 1743 * set up to detect a quiescent state, otherwise don't
@@ -1704,7 +1749,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1704 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); 1749 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
1705 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); 1750 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1706 zero_cpu_stall_ticks(rdp); 1751 zero_cpu_stall_ticks(rdp);
1707 ACCESS_ONCE(rdp->gpwrap) = false; 1752 WRITE_ONCE(rdp->gpwrap, false);
1708 } 1753 }
1709 return ret; 1754 return ret;
1710} 1755}
@@ -1717,9 +1762,9 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1717 1762
1718 local_irq_save(flags); 1763 local_irq_save(flags);
1719 rnp = rdp->mynode; 1764 rnp = rdp->mynode;
1720 if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) && 1765 if ((rdp->gpnum == READ_ONCE(rnp->gpnum) &&
1721 rdp->completed == ACCESS_ONCE(rnp->completed) && 1766 rdp->completed == READ_ONCE(rnp->completed) &&
1722 !unlikely(ACCESS_ONCE(rdp->gpwrap))) || /* w/out lock. */ 1767 !unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */
1723 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 1768 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
1724 local_irq_restore(flags); 1769 local_irq_restore(flags);
1725 return; 1770 return;
@@ -1731,6 +1776,13 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1731 rcu_gp_kthread_wake(rsp); 1776 rcu_gp_kthread_wake(rsp);
1732} 1777}
1733 1778
1779static void rcu_gp_slow(struct rcu_state *rsp, int delay)
1780{
1781 if (delay > 0 &&
1782 !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
1783 schedule_timeout_uninterruptible(delay);
1784}
1785
1734/* 1786/*
1735 * Initialize a new grace period. Return 0 if no grace period required. 1787 * Initialize a new grace period. Return 0 if no grace period required.
1736 */ 1788 */
@@ -1740,15 +1792,15 @@ static int rcu_gp_init(struct rcu_state *rsp)
1740 struct rcu_data *rdp; 1792 struct rcu_data *rdp;
1741 struct rcu_node *rnp = rcu_get_root(rsp); 1793 struct rcu_node *rnp = rcu_get_root(rsp);
1742 1794
1743 ACCESS_ONCE(rsp->gp_activity) = jiffies; 1795 WRITE_ONCE(rsp->gp_activity, jiffies);
1744 raw_spin_lock_irq(&rnp->lock); 1796 raw_spin_lock_irq(&rnp->lock);
1745 smp_mb__after_unlock_lock(); 1797 smp_mb__after_unlock_lock();
1746 if (!ACCESS_ONCE(rsp->gp_flags)) { 1798 if (!READ_ONCE(rsp->gp_flags)) {
1747 /* Spurious wakeup, tell caller to go back to sleep. */ 1799 /* Spurious wakeup, tell caller to go back to sleep. */
1748 raw_spin_unlock_irq(&rnp->lock); 1800 raw_spin_unlock_irq(&rnp->lock);
1749 return 0; 1801 return 0;
1750 } 1802 }
1751 ACCESS_ONCE(rsp->gp_flags) = 0; /* Clear all flags: New grace period. */ 1803 WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */
1752 1804
1753 if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) { 1805 if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) {
1754 /* 1806 /*
@@ -1773,6 +1825,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1773 * will handle subsequent offline CPUs. 1825 * will handle subsequent offline CPUs.
1774 */ 1826 */
1775 rcu_for_each_leaf_node(rsp, rnp) { 1827 rcu_for_each_leaf_node(rsp, rnp) {
1828 rcu_gp_slow(rsp, gp_preinit_delay);
1776 raw_spin_lock_irq(&rnp->lock); 1829 raw_spin_lock_irq(&rnp->lock);
1777 smp_mb__after_unlock_lock(); 1830 smp_mb__after_unlock_lock();
1778 if (rnp->qsmaskinit == rnp->qsmaskinitnext && 1831 if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
@@ -1829,14 +1882,15 @@ static int rcu_gp_init(struct rcu_state *rsp)
1829 * process finishes, because this kthread handles both. 1882 * process finishes, because this kthread handles both.
1830 */ 1883 */
1831 rcu_for_each_node_breadth_first(rsp, rnp) { 1884 rcu_for_each_node_breadth_first(rsp, rnp) {
1885 rcu_gp_slow(rsp, gp_init_delay);
1832 raw_spin_lock_irq(&rnp->lock); 1886 raw_spin_lock_irq(&rnp->lock);
1833 smp_mb__after_unlock_lock(); 1887 smp_mb__after_unlock_lock();
1834 rdp = this_cpu_ptr(rsp->rda); 1888 rdp = this_cpu_ptr(rsp->rda);
1835 rcu_preempt_check_blocked_tasks(rnp); 1889 rcu_preempt_check_blocked_tasks(rnp);
1836 rnp->qsmask = rnp->qsmaskinit; 1890 rnp->qsmask = rnp->qsmaskinit;
1837 ACCESS_ONCE(rnp->gpnum) = rsp->gpnum; 1891 WRITE_ONCE(rnp->gpnum, rsp->gpnum);
1838 if (WARN_ON_ONCE(rnp->completed != rsp->completed)) 1892 if (WARN_ON_ONCE(rnp->completed != rsp->completed))
1839 ACCESS_ONCE(rnp->completed) = rsp->completed; 1893 WRITE_ONCE(rnp->completed, rsp->completed);
1840 if (rnp == rdp->mynode) 1894 if (rnp == rdp->mynode)
1841 (void)__note_gp_changes(rsp, rnp, rdp); 1895 (void)__note_gp_changes(rsp, rnp, rdp);
1842 rcu_preempt_boost_start_gp(rnp); 1896 rcu_preempt_boost_start_gp(rnp);
@@ -1845,10 +1899,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1845 rnp->grphi, rnp->qsmask); 1899 rnp->grphi, rnp->qsmask);
1846 raw_spin_unlock_irq(&rnp->lock); 1900 raw_spin_unlock_irq(&rnp->lock);
1847 cond_resched_rcu_qs(); 1901 cond_resched_rcu_qs();
1848 ACCESS_ONCE(rsp->gp_activity) = jiffies; 1902 WRITE_ONCE(rsp->gp_activity, jiffies);
1849 if (gp_init_delay > 0 &&
1850 !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD)))
1851 schedule_timeout_uninterruptible(gp_init_delay);
1852 } 1903 }
1853 1904
1854 return 1; 1905 return 1;
@@ -1864,7 +1915,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1864 unsigned long maxj; 1915 unsigned long maxj;
1865 struct rcu_node *rnp = rcu_get_root(rsp); 1916 struct rcu_node *rnp = rcu_get_root(rsp);
1866 1917
1867 ACCESS_ONCE(rsp->gp_activity) = jiffies; 1918 WRITE_ONCE(rsp->gp_activity, jiffies);
1868 rsp->n_force_qs++; 1919 rsp->n_force_qs++;
1869 if (fqs_state == RCU_SAVE_DYNTICK) { 1920 if (fqs_state == RCU_SAVE_DYNTICK) {
1870 /* Collect dyntick-idle snapshots. */ 1921 /* Collect dyntick-idle snapshots. */
@@ -1882,11 +1933,11 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1882 force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); 1933 force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
1883 } 1934 }
1884 /* Clear flag to prevent immediate re-entry. */ 1935 /* Clear flag to prevent immediate re-entry. */
1885 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 1936 if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
1886 raw_spin_lock_irq(&rnp->lock); 1937 raw_spin_lock_irq(&rnp->lock);
1887 smp_mb__after_unlock_lock(); 1938 smp_mb__after_unlock_lock();
1888 ACCESS_ONCE(rsp->gp_flags) = 1939 WRITE_ONCE(rsp->gp_flags,
1889 ACCESS_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS; 1940 READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);
1890 raw_spin_unlock_irq(&rnp->lock); 1941 raw_spin_unlock_irq(&rnp->lock);
1891 } 1942 }
1892 return fqs_state; 1943 return fqs_state;
@@ -1903,7 +1954,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1903 struct rcu_data *rdp; 1954 struct rcu_data *rdp;
1904 struct rcu_node *rnp = rcu_get_root(rsp); 1955 struct rcu_node *rnp = rcu_get_root(rsp);
1905 1956
1906 ACCESS_ONCE(rsp->gp_activity) = jiffies; 1957 WRITE_ONCE(rsp->gp_activity, jiffies);
1907 raw_spin_lock_irq(&rnp->lock); 1958 raw_spin_lock_irq(&rnp->lock);
1908 smp_mb__after_unlock_lock(); 1959 smp_mb__after_unlock_lock();
1909 gp_duration = jiffies - rsp->gp_start; 1960 gp_duration = jiffies - rsp->gp_start;
@@ -1934,7 +1985,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1934 smp_mb__after_unlock_lock(); 1985 smp_mb__after_unlock_lock();
1935 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); 1986 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
1936 WARN_ON_ONCE(rnp->qsmask); 1987 WARN_ON_ONCE(rnp->qsmask);
1937 ACCESS_ONCE(rnp->completed) = rsp->gpnum; 1988 WRITE_ONCE(rnp->completed, rsp->gpnum);
1938 rdp = this_cpu_ptr(rsp->rda); 1989 rdp = this_cpu_ptr(rsp->rda);
1939 if (rnp == rdp->mynode) 1990 if (rnp == rdp->mynode)
1940 needgp = __note_gp_changes(rsp, rnp, rdp) || needgp; 1991 needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
@@ -1942,7 +1993,8 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1942 nocb += rcu_future_gp_cleanup(rsp, rnp); 1993 nocb += rcu_future_gp_cleanup(rsp, rnp);
1943 raw_spin_unlock_irq(&rnp->lock); 1994 raw_spin_unlock_irq(&rnp->lock);
1944 cond_resched_rcu_qs(); 1995 cond_resched_rcu_qs();
1945 ACCESS_ONCE(rsp->gp_activity) = jiffies; 1996 WRITE_ONCE(rsp->gp_activity, jiffies);
1997 rcu_gp_slow(rsp, gp_cleanup_delay);
1946 } 1998 }
1947 rnp = rcu_get_root(rsp); 1999 rnp = rcu_get_root(rsp);
1948 raw_spin_lock_irq(&rnp->lock); 2000 raw_spin_lock_irq(&rnp->lock);
@@ -1950,16 +2002,16 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1950 rcu_nocb_gp_set(rnp, nocb); 2002 rcu_nocb_gp_set(rnp, nocb);
1951 2003
1952 /* Declare grace period done. */ 2004 /* Declare grace period done. */
1953 ACCESS_ONCE(rsp->completed) = rsp->gpnum; 2005 WRITE_ONCE(rsp->completed, rsp->gpnum);
1954 trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); 2006 trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
1955 rsp->fqs_state = RCU_GP_IDLE; 2007 rsp->fqs_state = RCU_GP_IDLE;
1956 rdp = this_cpu_ptr(rsp->rda); 2008 rdp = this_cpu_ptr(rsp->rda);
1957 /* Advance CBs to reduce false positives below. */ 2009 /* Advance CBs to reduce false positives below. */
1958 needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp; 2010 needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp;
1959 if (needgp || cpu_needs_another_gp(rsp, rdp)) { 2011 if (needgp || cpu_needs_another_gp(rsp, rdp)) {
1960 ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT; 2012 WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
1961 trace_rcu_grace_period(rsp->name, 2013 trace_rcu_grace_period(rsp->name,
1962 ACCESS_ONCE(rsp->gpnum), 2014 READ_ONCE(rsp->gpnum),
1963 TPS("newreq")); 2015 TPS("newreq"));
1964 } 2016 }
1965 raw_spin_unlock_irq(&rnp->lock); 2017 raw_spin_unlock_irq(&rnp->lock);
@@ -1983,20 +2035,20 @@ static int __noreturn rcu_gp_kthread(void *arg)
1983 /* Handle grace-period start. */ 2035 /* Handle grace-period start. */
1984 for (;;) { 2036 for (;;) {
1985 trace_rcu_grace_period(rsp->name, 2037 trace_rcu_grace_period(rsp->name,
1986 ACCESS_ONCE(rsp->gpnum), 2038 READ_ONCE(rsp->gpnum),
1987 TPS("reqwait")); 2039 TPS("reqwait"));
1988 rsp->gp_state = RCU_GP_WAIT_GPS; 2040 rsp->gp_state = RCU_GP_WAIT_GPS;
1989 wait_event_interruptible(rsp->gp_wq, 2041 wait_event_interruptible(rsp->gp_wq,
1990 ACCESS_ONCE(rsp->gp_flags) & 2042 READ_ONCE(rsp->gp_flags) &
1991 RCU_GP_FLAG_INIT); 2043 RCU_GP_FLAG_INIT);
1992 /* Locking provides needed memory barrier. */ 2044 /* Locking provides needed memory barrier. */
1993 if (rcu_gp_init(rsp)) 2045 if (rcu_gp_init(rsp))
1994 break; 2046 break;
1995 cond_resched_rcu_qs(); 2047 cond_resched_rcu_qs();
1996 ACCESS_ONCE(rsp->gp_activity) = jiffies; 2048 WRITE_ONCE(rsp->gp_activity, jiffies);
1997 WARN_ON(signal_pending(current)); 2049 WARN_ON(signal_pending(current));
1998 trace_rcu_grace_period(rsp->name, 2050 trace_rcu_grace_period(rsp->name,
1999 ACCESS_ONCE(rsp->gpnum), 2051 READ_ONCE(rsp->gpnum),
2000 TPS("reqwaitsig")); 2052 TPS("reqwaitsig"));
2001 } 2053 }
2002 2054
@@ -2012,39 +2064,39 @@ static int __noreturn rcu_gp_kthread(void *arg)
2012 if (!ret) 2064 if (!ret)
2013 rsp->jiffies_force_qs = jiffies + j; 2065 rsp->jiffies_force_qs = jiffies + j;
2014 trace_rcu_grace_period(rsp->name, 2066 trace_rcu_grace_period(rsp->name,
2015 ACCESS_ONCE(rsp->gpnum), 2067 READ_ONCE(rsp->gpnum),
2016 TPS("fqswait")); 2068 TPS("fqswait"));
2017 rsp->gp_state = RCU_GP_WAIT_FQS; 2069 rsp->gp_state = RCU_GP_WAIT_FQS;
2018 ret = wait_event_interruptible_timeout(rsp->gp_wq, 2070 ret = wait_event_interruptible_timeout(rsp->gp_wq,
2019 ((gf = ACCESS_ONCE(rsp->gp_flags)) & 2071 ((gf = READ_ONCE(rsp->gp_flags)) &
2020 RCU_GP_FLAG_FQS) || 2072 RCU_GP_FLAG_FQS) ||
2021 (!ACCESS_ONCE(rnp->qsmask) && 2073 (!READ_ONCE(rnp->qsmask) &&
2022 !rcu_preempt_blocked_readers_cgp(rnp)), 2074 !rcu_preempt_blocked_readers_cgp(rnp)),
2023 j); 2075 j);
2024 /* Locking provides needed memory barriers. */ 2076 /* Locking provides needed memory barriers. */
2025 /* If grace period done, leave loop. */ 2077 /* If grace period done, leave loop. */
2026 if (!ACCESS_ONCE(rnp->qsmask) && 2078 if (!READ_ONCE(rnp->qsmask) &&
2027 !rcu_preempt_blocked_readers_cgp(rnp)) 2079 !rcu_preempt_blocked_readers_cgp(rnp))
2028 break; 2080 break;
2029 /* If time for quiescent-state forcing, do it. */ 2081 /* If time for quiescent-state forcing, do it. */
2030 if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) || 2082 if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) ||
2031 (gf & RCU_GP_FLAG_FQS)) { 2083 (gf & RCU_GP_FLAG_FQS)) {
2032 trace_rcu_grace_period(rsp->name, 2084 trace_rcu_grace_period(rsp->name,
2033 ACCESS_ONCE(rsp->gpnum), 2085 READ_ONCE(rsp->gpnum),
2034 TPS("fqsstart")); 2086 TPS("fqsstart"));
2035 fqs_state = rcu_gp_fqs(rsp, fqs_state); 2087 fqs_state = rcu_gp_fqs(rsp, fqs_state);
2036 trace_rcu_grace_period(rsp->name, 2088 trace_rcu_grace_period(rsp->name,
2037 ACCESS_ONCE(rsp->gpnum), 2089 READ_ONCE(rsp->gpnum),
2038 TPS("fqsend")); 2090 TPS("fqsend"));
2039 cond_resched_rcu_qs(); 2091 cond_resched_rcu_qs();
2040 ACCESS_ONCE(rsp->gp_activity) = jiffies; 2092 WRITE_ONCE(rsp->gp_activity, jiffies);
2041 } else { 2093 } else {
2042 /* Deal with stray signal. */ 2094 /* Deal with stray signal. */
2043 cond_resched_rcu_qs(); 2095 cond_resched_rcu_qs();
2044 ACCESS_ONCE(rsp->gp_activity) = jiffies; 2096 WRITE_ONCE(rsp->gp_activity, jiffies);
2045 WARN_ON(signal_pending(current)); 2097 WARN_ON(signal_pending(current));
2046 trace_rcu_grace_period(rsp->name, 2098 trace_rcu_grace_period(rsp->name,
2047 ACCESS_ONCE(rsp->gpnum), 2099 READ_ONCE(rsp->gpnum),
2048 TPS("fqswaitsig")); 2100 TPS("fqswaitsig"));
2049 } 2101 }
2050 j = jiffies_till_next_fqs; 2102 j = jiffies_till_next_fqs;
@@ -2086,8 +2138,8 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
2086 */ 2138 */
2087 return false; 2139 return false;
2088 } 2140 }
2089 ACCESS_ONCE(rsp->gp_flags) = RCU_GP_FLAG_INIT; 2141 WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
2090 trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), 2142 trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
2091 TPS("newreq")); 2143 TPS("newreq"));
2092 2144
2093 /* 2145 /*
@@ -2137,6 +2189,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
2137 __releases(rcu_get_root(rsp)->lock) 2189 __releases(rcu_get_root(rsp)->lock)
2138{ 2190{
2139 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 2191 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
2192 WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
2140 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); 2193 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
2141 rcu_gp_kthread_wake(rsp); 2194 rcu_gp_kthread_wake(rsp);
2142} 2195}
@@ -2334,8 +2387,6 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
2334 rcu_report_qs_rdp(rdp->cpu, rsp, rdp); 2387 rcu_report_qs_rdp(rdp->cpu, rsp, rdp);
2335} 2388}
2336 2389
2337#ifdef CONFIG_HOTPLUG_CPU
2338
2339/* 2390/*
2340 * Send the specified CPU's RCU callbacks to the orphanage. The 2391 * Send the specified CPU's RCU callbacks to the orphanage. The
2341 * specified CPU must be offline, and the caller must hold the 2392 * specified CPU must be offline, and the caller must hold the
@@ -2346,7 +2397,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
2346 struct rcu_node *rnp, struct rcu_data *rdp) 2397 struct rcu_node *rnp, struct rcu_data *rdp)
2347{ 2398{
2348 /* No-CBs CPUs do not have orphanable callbacks. */ 2399 /* No-CBs CPUs do not have orphanable callbacks. */
2349 if (rcu_is_nocb_cpu(rdp->cpu)) 2400 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu))
2350 return; 2401 return;
2351 2402
2352 /* 2403 /*
@@ -2359,7 +2410,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
2359 rsp->qlen += rdp->qlen; 2410 rsp->qlen += rdp->qlen;
2360 rdp->n_cbs_orphaned += rdp->qlen; 2411 rdp->n_cbs_orphaned += rdp->qlen;
2361 rdp->qlen_lazy = 0; 2412 rdp->qlen_lazy = 0;
2362 ACCESS_ONCE(rdp->qlen) = 0; 2413 WRITE_ONCE(rdp->qlen, 0);
2363 } 2414 }
2364 2415
2365 /* 2416 /*
@@ -2405,7 +2456,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
2405 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); 2456 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
2406 2457
2407 /* No-CBs CPUs are handled specially. */ 2458 /* No-CBs CPUs are handled specially. */
2408 if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) 2459 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
2460 rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
2409 return; 2461 return;
2410 2462
2411 /* Do the accounting first. */ 2463 /* Do the accounting first. */
@@ -2452,6 +2504,9 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2452 RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda)); 2504 RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda));
2453 RCU_TRACE(struct rcu_node *rnp = rdp->mynode); 2505 RCU_TRACE(struct rcu_node *rnp = rdp->mynode);
2454 2506
2507 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
2508 return;
2509
2455 RCU_TRACE(mask = rdp->grpmask); 2510 RCU_TRACE(mask = rdp->grpmask);
2456 trace_rcu_grace_period(rsp->name, 2511 trace_rcu_grace_period(rsp->name,
2457 rnp->gpnum + 1 - !!(rnp->qsmask & mask), 2512 rnp->gpnum + 1 - !!(rnp->qsmask & mask),
@@ -2480,7 +2535,8 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2480 long mask; 2535 long mask;
2481 struct rcu_node *rnp = rnp_leaf; 2536 struct rcu_node *rnp = rnp_leaf;
2482 2537
2483 if (rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) 2538 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
2539 rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
2484 return; 2540 return;
2485 for (;;) { 2541 for (;;) {
2486 mask = rnp->grpmask; 2542 mask = rnp->grpmask;
@@ -2511,6 +2567,9 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
2511 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2567 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2512 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 2568 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
2513 2569
2570 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
2571 return;
2572
2514 /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ 2573 /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
2515 mask = rdp->grpmask; 2574 mask = rdp->grpmask;
2516 raw_spin_lock_irqsave(&rnp->lock, flags); 2575 raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -2532,6 +2591,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2532 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2591 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2533 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 2592 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
2534 2593
2594 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
2595 return;
2596
2535 /* Adjust any no-longer-needed kthreads. */ 2597 /* Adjust any no-longer-needed kthreads. */
2536 rcu_boost_kthread_setaffinity(rnp, -1); 2598 rcu_boost_kthread_setaffinity(rnp, -1);
2537 2599
@@ -2546,26 +2608,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2546 cpu, rdp->qlen, rdp->nxtlist); 2608 cpu, rdp->qlen, rdp->nxtlist);
2547} 2609}
2548 2610
2549#else /* #ifdef CONFIG_HOTPLUG_CPU */
2550
2551static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
2552{
2553}
2554
2555static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2556{
2557}
2558
2559static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
2560{
2561}
2562
2563static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2564{
2565}
2566
2567#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
2568
2569/* 2611/*
2570 * Invoke any RCU callbacks that have made it to the end of their grace 2612 * Invoke any RCU callbacks that have made it to the end of their grace
2571 * period. Thottle as specified by rdp->blimit. 2613 * period. Thottle as specified by rdp->blimit.
@@ -2580,7 +2622,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2580 /* If no callbacks are ready, just return. */ 2622 /* If no callbacks are ready, just return. */
2581 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 2623 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
2582 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); 2624 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
2583 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), 2625 trace_rcu_batch_end(rsp->name, 0, !!READ_ONCE(rdp->nxtlist),
2584 need_resched(), is_idle_task(current), 2626 need_resched(), is_idle_task(current),
2585 rcu_is_callbacks_kthread()); 2627 rcu_is_callbacks_kthread());
2586 return; 2628 return;
@@ -2636,7 +2678,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2636 } 2678 }
2637 smp_mb(); /* List handling before counting for rcu_barrier(). */ 2679 smp_mb(); /* List handling before counting for rcu_barrier(). */
2638 rdp->qlen_lazy -= count_lazy; 2680 rdp->qlen_lazy -= count_lazy;
2639 ACCESS_ONCE(rdp->qlen) = rdp->qlen - count; 2681 WRITE_ONCE(rdp->qlen, rdp->qlen - count);
2640 rdp->n_cbs_invoked += count; 2682 rdp->n_cbs_invoked += count;
2641 2683
2642 /* Reinstate batch limit if we have worked down the excess. */ 2684 /* Reinstate batch limit if we have worked down the excess. */
@@ -2730,10 +2772,6 @@ static void force_qs_rnp(struct rcu_state *rsp,
2730 mask = 0; 2772 mask = 0;
2731 raw_spin_lock_irqsave(&rnp->lock, flags); 2773 raw_spin_lock_irqsave(&rnp->lock, flags);
2732 smp_mb__after_unlock_lock(); 2774 smp_mb__after_unlock_lock();
2733 if (!rcu_gp_in_progress(rsp)) {
2734 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2735 return;
2736 }
2737 if (rnp->qsmask == 0) { 2775 if (rnp->qsmask == 0) {
2738 if (rcu_state_p == &rcu_sched_state || 2776 if (rcu_state_p == &rcu_sched_state ||
2739 rsp != rcu_state_p || 2777 rsp != rcu_state_p ||
@@ -2763,8 +2801,6 @@ static void force_qs_rnp(struct rcu_state *rsp,
2763 bit = 1; 2801 bit = 1;
2764 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { 2802 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
2765 if ((rnp->qsmask & bit) != 0) { 2803 if ((rnp->qsmask & bit) != 0) {
2766 if ((rnp->qsmaskinit & bit) == 0)
2767 *isidle = false; /* Pending hotplug. */
2768 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) 2804 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
2769 mask |= bit; 2805 mask |= bit;
2770 } 2806 }
@@ -2793,7 +2829,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
2793 /* Funnel through hierarchy to reduce memory contention. */ 2829 /* Funnel through hierarchy to reduce memory contention. */
2794 rnp = __this_cpu_read(rsp->rda->mynode); 2830 rnp = __this_cpu_read(rsp->rda->mynode);
2795 for (; rnp != NULL; rnp = rnp->parent) { 2831 for (; rnp != NULL; rnp = rnp->parent) {
2796 ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) || 2832 ret = (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
2797 !raw_spin_trylock(&rnp->fqslock); 2833 !raw_spin_trylock(&rnp->fqslock);
2798 if (rnp_old != NULL) 2834 if (rnp_old != NULL)
2799 raw_spin_unlock(&rnp_old->fqslock); 2835 raw_spin_unlock(&rnp_old->fqslock);
@@ -2809,13 +2845,12 @@ static void force_quiescent_state(struct rcu_state *rsp)
2809 raw_spin_lock_irqsave(&rnp_old->lock, flags); 2845 raw_spin_lock_irqsave(&rnp_old->lock, flags);
2810 smp_mb__after_unlock_lock(); 2846 smp_mb__after_unlock_lock();
2811 raw_spin_unlock(&rnp_old->fqslock); 2847 raw_spin_unlock(&rnp_old->fqslock);
2812 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 2848 if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
2813 rsp->n_force_qs_lh++; 2849 rsp->n_force_qs_lh++;
2814 raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 2850 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2815 return; /* Someone beat us to it. */ 2851 return; /* Someone beat us to it. */
2816 } 2852 }
2817 ACCESS_ONCE(rsp->gp_flags) = 2853 WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
2818 ACCESS_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS;
2819 raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 2854 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2820 rcu_gp_kthread_wake(rsp); 2855 rcu_gp_kthread_wake(rsp);
2821} 2856}
@@ -2881,7 +2916,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
2881 */ 2916 */
2882static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 2917static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
2883{ 2918{
2884 if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active))) 2919 if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
2885 return; 2920 return;
2886 if (likely(!rsp->boost)) { 2921 if (likely(!rsp->boost)) {
2887 rcu_do_batch(rsp, rdp); 2922 rcu_do_batch(rsp, rdp);
@@ -2972,7 +3007,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2972 WARN_ON_ONCE((unsigned long)head & 0x1); /* Misaligned rcu_head! */ 3007 WARN_ON_ONCE((unsigned long)head & 0x1); /* Misaligned rcu_head! */
2973 if (debug_rcu_head_queue(head)) { 3008 if (debug_rcu_head_queue(head)) {
2974 /* Probable double call_rcu(), so leak the callback. */ 3009 /* Probable double call_rcu(), so leak the callback. */
2975 ACCESS_ONCE(head->func) = rcu_leak_callback; 3010 WRITE_ONCE(head->func, rcu_leak_callback);
2976 WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n"); 3011 WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
2977 return; 3012 return;
2978 } 3013 }
@@ -3011,7 +3046,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
3011 if (!likely(rdp->nxtlist)) 3046 if (!likely(rdp->nxtlist))
3012 init_default_callback_list(rdp); 3047 init_default_callback_list(rdp);
3013 } 3048 }
3014 ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1; 3049 WRITE_ONCE(rdp->qlen, rdp->qlen + 1);
3015 if (lazy) 3050 if (lazy)
3016 rdp->qlen_lazy++; 3051 rdp->qlen_lazy++;
3017 else 3052 else
@@ -3287,7 +3322,7 @@ void synchronize_sched_expedited(void)
3287 if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start), 3322 if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start),
3288 (ulong)atomic_long_read(&rsp->expedited_done) + 3323 (ulong)atomic_long_read(&rsp->expedited_done) +
3289 ULONG_MAX / 8)) { 3324 ULONG_MAX / 8)) {
3290 synchronize_sched(); 3325 wait_rcu_gp(call_rcu_sched);
3291 atomic_long_inc(&rsp->expedited_wrap); 3326 atomic_long_inc(&rsp->expedited_wrap);
3292 return; 3327 return;
3293 } 3328 }
@@ -3450,14 +3485,14 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3450 } 3485 }
3451 3486
3452 /* Has another RCU grace period completed? */ 3487 /* Has another RCU grace period completed? */
3453 if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */ 3488 if (READ_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
3454 rdp->n_rp_gp_completed++; 3489 rdp->n_rp_gp_completed++;
3455 return 1; 3490 return 1;
3456 } 3491 }
3457 3492
3458 /* Has a new RCU grace period started? */ 3493 /* Has a new RCU grace period started? */
3459 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum || 3494 if (READ_ONCE(rnp->gpnum) != rdp->gpnum ||
3460 unlikely(ACCESS_ONCE(rdp->gpwrap))) { /* outside lock */ 3495 unlikely(READ_ONCE(rdp->gpwrap))) { /* outside lock */
3461 rdp->n_rp_gp_started++; 3496 rdp->n_rp_gp_started++;
3462 return 1; 3497 return 1;
3463 } 3498 }
@@ -3493,7 +3528,7 @@ static int rcu_pending(void)
3493 * non-NULL, store an indication of whether all callbacks are lazy. 3528 * non-NULL, store an indication of whether all callbacks are lazy.
3494 * (If there are no callbacks, all of them are deemed to be lazy.) 3529 * (If there are no callbacks, all of them are deemed to be lazy.)
3495 */ 3530 */
3496static int __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy) 3531static bool __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy)
3497{ 3532{
3498 bool al = true; 3533 bool al = true;
3499 bool hc = false; 3534 bool hc = false;
@@ -3564,7 +3599,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
3564{ 3599{
3565 int cpu; 3600 int cpu;
3566 struct rcu_data *rdp; 3601 struct rcu_data *rdp;
3567 unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); 3602 unsigned long snap = READ_ONCE(rsp->n_barrier_done);
3568 unsigned long snap_done; 3603 unsigned long snap_done;
3569 3604
3570 _rcu_barrier_trace(rsp, "Begin", -1, snap); 3605 _rcu_barrier_trace(rsp, "Begin", -1, snap);
@@ -3606,10 +3641,10 @@ static void _rcu_barrier(struct rcu_state *rsp)
3606 3641
3607 /* 3642 /*
3608 * Increment ->n_barrier_done to avoid duplicate work. Use 3643 * Increment ->n_barrier_done to avoid duplicate work. Use
3609 * ACCESS_ONCE() to prevent the compiler from speculating 3644 * WRITE_ONCE() to prevent the compiler from speculating
3610 * the increment to precede the early-exit check. 3645 * the increment to precede the early-exit check.
3611 */ 3646 */
3612 ACCESS_ONCE(rsp->n_barrier_done) = rsp->n_barrier_done + 1; 3647 WRITE_ONCE(rsp->n_barrier_done, rsp->n_barrier_done + 1);
3613 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); 3648 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
3614 _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done); 3649 _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
3615 smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ 3650 smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
@@ -3645,7 +3680,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
3645 __call_rcu(&rdp->barrier_head, 3680 __call_rcu(&rdp->barrier_head,
3646 rcu_barrier_callback, rsp, cpu, 0); 3681 rcu_barrier_callback, rsp, cpu, 0);
3647 } 3682 }
3648 } else if (ACCESS_ONCE(rdp->qlen)) { 3683 } else if (READ_ONCE(rdp->qlen)) {
3649 _rcu_barrier_trace(rsp, "OnlineQ", cpu, 3684 _rcu_barrier_trace(rsp, "OnlineQ", cpu,
3650 rsp->n_barrier_done); 3685 rsp->n_barrier_done);
3651 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); 3686 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
@@ -3665,7 +3700,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
3665 3700
3666 /* Increment ->n_barrier_done to prevent duplicate work. */ 3701 /* Increment ->n_barrier_done to prevent duplicate work. */
3667 smp_mb(); /* Keep increment after above mechanism. */ 3702 smp_mb(); /* Keep increment after above mechanism. */
3668 ACCESS_ONCE(rsp->n_barrier_done) = rsp->n_barrier_done + 1; 3703 WRITE_ONCE(rsp->n_barrier_done, rsp->n_barrier_done + 1);
3669 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); 3704 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
3670 _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done); 3705 _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
3671 smp_mb(); /* Keep increment before caller's subsequent code. */ 3706 smp_mb(); /* Keep increment before caller's subsequent code. */
@@ -3780,7 +3815,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3780 rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ 3815 rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
3781 rdp->completed = rnp->completed; 3816 rdp->completed = rnp->completed;
3782 rdp->passed_quiesce = false; 3817 rdp->passed_quiesce = false;
3783 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); 3818 rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu);
3784 rdp->qs_pending = false; 3819 rdp->qs_pending = false;
3785 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); 3820 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
3786 raw_spin_unlock_irqrestore(&rnp->lock, flags); 3821 raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -3924,16 +3959,16 @@ void rcu_scheduler_starting(void)
3924 3959
3925/* 3960/*
3926 * Compute the per-level fanout, either using the exact fanout specified 3961 * Compute the per-level fanout, either using the exact fanout specified
3927 * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. 3962 * or balancing the tree, depending on the rcu_fanout_exact boot parameter.
3928 */ 3963 */
3929static void __init rcu_init_levelspread(struct rcu_state *rsp) 3964static void __init rcu_init_levelspread(struct rcu_state *rsp)
3930{ 3965{
3931 int i; 3966 int i;
3932 3967
3933 if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) { 3968 if (rcu_fanout_exact) {
3934 rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; 3969 rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
3935 for (i = rcu_num_lvls - 2; i >= 0; i--) 3970 for (i = rcu_num_lvls - 2; i >= 0; i--)
3936 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 3971 rsp->levelspread[i] = RCU_FANOUT;
3937 } else { 3972 } else {
3938 int ccur; 3973 int ccur;
3939 int cprv; 3974 int cprv;
@@ -3971,9 +4006,9 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3971 4006
3972 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ 4007 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
3973 4008
3974 /* Silence gcc 4.8 warning about array index out of range. */ 4009 /* Silence gcc 4.8 false positive about array index out of range. */
3975 if (rcu_num_lvls > RCU_NUM_LVLS) 4010 if (rcu_num_lvls <= 0 || rcu_num_lvls > RCU_NUM_LVLS)
3976 panic("rcu_init_one: rcu_num_lvls overflow"); 4011 panic("rcu_init_one: rcu_num_lvls out of range");
3977 4012
3978 /* Initialize the level-tracking arrays. */ 4013 /* Initialize the level-tracking arrays. */
3979 4014
@@ -4059,7 +4094,7 @@ static void __init rcu_init_geometry(void)
4059 jiffies_till_next_fqs = d; 4094 jiffies_till_next_fqs = d;
4060 4095
4061 /* If the compile-time values are accurate, just leave. */ 4096 /* If the compile-time values are accurate, just leave. */
4062 if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF && 4097 if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
4063 nr_cpu_ids == NR_CPUS) 4098 nr_cpu_ids == NR_CPUS)
4064 return; 4099 return;
4065 pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n", 4100 pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n",
@@ -4073,7 +4108,7 @@ static void __init rcu_init_geometry(void)
4073 rcu_capacity[0] = 1; 4108 rcu_capacity[0] = 1;
4074 rcu_capacity[1] = rcu_fanout_leaf; 4109 rcu_capacity[1] = rcu_fanout_leaf;
4075 for (i = 2; i <= MAX_RCU_LVLS; i++) 4110 for (i = 2; i <= MAX_RCU_LVLS; i++)
4076 rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; 4111 rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT;
4077 4112
4078 /* 4113 /*
4079 * The boot-time rcu_fanout_leaf parameter is only permitted 4114 * The boot-time rcu_fanout_leaf parameter is only permitted
@@ -4083,7 +4118,7 @@ static void __init rcu_init_geometry(void)
4083 * the configured number of CPUs. Complain and fall back to the 4118 * the configured number of CPUs. Complain and fall back to the
4084 * compile-time values if these limits are exceeded. 4119 * compile-time values if these limits are exceeded.
4085 */ 4120 */
4086 if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || 4121 if (rcu_fanout_leaf < RCU_FANOUT_LEAF ||
4087 rcu_fanout_leaf > sizeof(unsigned long) * 8 || 4122 rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
4088 n > rcu_capacity[MAX_RCU_LVLS]) { 4123 n > rcu_capacity[MAX_RCU_LVLS]) {
4089 WARN_ON(1); 4124 WARN_ON(1);
@@ -4109,6 +4144,28 @@ static void __init rcu_init_geometry(void)
4109 rcu_num_nodes -= n; 4144 rcu_num_nodes -= n;
4110} 4145}
4111 4146
4147/*
4148 * Dump out the structure of the rcu_node combining tree associated
4149 * with the rcu_state structure referenced by rsp.
4150 */
4151static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)
4152{
4153 int level = 0;
4154 struct rcu_node *rnp;
4155
4156 pr_info("rcu_node tree layout dump\n");
4157 pr_info(" ");
4158 rcu_for_each_node_breadth_first(rsp, rnp) {
4159 if (rnp->level != level) {
4160 pr_cont("\n");
4161 pr_info(" ");
4162 level = rnp->level;
4163 }
4164 pr_cont("%d:%d ^%d ", rnp->grplo, rnp->grphi, rnp->grpnum);
4165 }
4166 pr_cont("\n");
4167}
4168
4112void __init rcu_init(void) 4169void __init rcu_init(void)
4113{ 4170{
4114 int cpu; 4171 int cpu;
@@ -4119,6 +4176,8 @@ void __init rcu_init(void)
4119 rcu_init_geometry(); 4176 rcu_init_geometry();
4120 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 4177 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
4121 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 4178 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
4179 if (dump_tree)
4180 rcu_dump_rcu_node_tree(&rcu_sched_state);
4122 __rcu_init_preempt(); 4181 __rcu_init_preempt();
4123 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 4182 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
4124 4183
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index a69d3dab2ec4..4adb7ca0bf47 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -35,11 +35,33 @@
35 * In practice, this did work well going from three levels to four. 35 * In practice, this did work well going from three levels to four.
36 * Of course, your mileage may vary. 36 * Of course, your mileage may vary.
37 */ 37 */
38
38#define MAX_RCU_LVLS 4 39#define MAX_RCU_LVLS 4
39#define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF) 40
40#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) 41#ifdef CONFIG_RCU_FANOUT
41#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) 42#define RCU_FANOUT CONFIG_RCU_FANOUT
42#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) 43#else /* #ifdef CONFIG_RCU_FANOUT */
44# ifdef CONFIG_64BIT
45# define RCU_FANOUT 64
46# else
47# define RCU_FANOUT 32
48# endif
49#endif /* #else #ifdef CONFIG_RCU_FANOUT */
50
51#ifdef CONFIG_RCU_FANOUT_LEAF
52#define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
53#else /* #ifdef CONFIG_RCU_FANOUT_LEAF */
54# ifdef CONFIG_64BIT
55# define RCU_FANOUT_LEAF 64
56# else
57# define RCU_FANOUT_LEAF 32
58# endif
59#endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */
60
61#define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
62#define RCU_FANOUT_2 (RCU_FANOUT_1 * RCU_FANOUT)
63#define RCU_FANOUT_3 (RCU_FANOUT_2 * RCU_FANOUT)
64#define RCU_FANOUT_4 (RCU_FANOUT_3 * RCU_FANOUT)
43 65
44#if NR_CPUS <= RCU_FANOUT_1 66#if NR_CPUS <= RCU_FANOUT_1
45# define RCU_NUM_LVLS 1 67# define RCU_NUM_LVLS 1
@@ -170,7 +192,6 @@ struct rcu_node {
170 /* if there is no such task. If there */ 192 /* if there is no such task. If there */
171 /* is no current expedited grace period, */ 193 /* is no current expedited grace period, */
172 /* then there can cannot be any such task. */ 194 /* then there can cannot be any such task. */
173#ifdef CONFIG_RCU_BOOST
174 struct list_head *boost_tasks; 195 struct list_head *boost_tasks;
175 /* Pointer to first task that needs to be */ 196 /* Pointer to first task that needs to be */
176 /* priority boosted, or NULL if no priority */ 197 /* priority boosted, or NULL if no priority */
@@ -208,7 +229,6 @@ struct rcu_node {
208 unsigned long n_balk_nos; 229 unsigned long n_balk_nos;
209 /* Refused to boost: not sure why, though. */ 230 /* Refused to boost: not sure why, though. */
210 /* This can happen due to race conditions. */ 231 /* This can happen due to race conditions. */
211#endif /* #ifdef CONFIG_RCU_BOOST */
212#ifdef CONFIG_RCU_NOCB_CPU 232#ifdef CONFIG_RCU_NOCB_CPU
213 wait_queue_head_t nocb_gp_wq[2]; 233 wait_queue_head_t nocb_gp_wq[2];
214 /* Place for rcu_nocb_kthread() to wait GP. */ 234 /* Place for rcu_nocb_kthread() to wait GP. */
@@ -519,14 +539,11 @@ extern struct list_head rcu_struct_flavors;
519 * RCU implementation internal declarations: 539 * RCU implementation internal declarations:
520 */ 540 */
521extern struct rcu_state rcu_sched_state; 541extern struct rcu_state rcu_sched_state;
522DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
523 542
524extern struct rcu_state rcu_bh_state; 543extern struct rcu_state rcu_bh_state;
525DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
526 544
527#ifdef CONFIG_PREEMPT_RCU 545#ifdef CONFIG_PREEMPT_RCU
528extern struct rcu_state rcu_preempt_state; 546extern struct rcu_state rcu_preempt_state;
529DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
530#endif /* #ifdef CONFIG_PREEMPT_RCU */ 547#endif /* #ifdef CONFIG_PREEMPT_RCU */
531 548
532#ifdef CONFIG_RCU_BOOST 549#ifdef CONFIG_RCU_BOOST
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 8c0ec0f5a027..32664347091a 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -43,7 +43,17 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
43DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); 43DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
44DEFINE_PER_CPU(char, rcu_cpu_has_work); 44DEFINE_PER_CPU(char, rcu_cpu_has_work);
45 45
46#endif /* #ifdef CONFIG_RCU_BOOST */ 46#else /* #ifdef CONFIG_RCU_BOOST */
47
48/*
49 * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
50 * all uses are in dead code. Provide a definition to keep the compiler
51 * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
52 * This probably needs to be excluded from -rt builds.
53 */
54#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
55
56#endif /* #else #ifdef CONFIG_RCU_BOOST */
47 57
48#ifdef CONFIG_RCU_NOCB_CPU 58#ifdef CONFIG_RCU_NOCB_CPU
49static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ 59static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
@@ -60,11 +70,11 @@ static void __init rcu_bootup_announce_oddness(void)
60{ 70{
61 if (IS_ENABLED(CONFIG_RCU_TRACE)) 71 if (IS_ENABLED(CONFIG_RCU_TRACE))
62 pr_info("\tRCU debugfs-based tracing is enabled.\n"); 72 pr_info("\tRCU debugfs-based tracing is enabled.\n");
63 if ((IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || 73 if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) ||
64 (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)) 74 (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32))
65 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", 75 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
66 CONFIG_RCU_FANOUT); 76 RCU_FANOUT);
67 if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) 77 if (rcu_fanout_exact)
68 pr_info("\tHierarchical RCU autobalancing is disabled.\n"); 78 pr_info("\tHierarchical RCU autobalancing is disabled.\n");
69 if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) 79 if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
70 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); 80 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
@@ -76,10 +86,10 @@ static void __init rcu_bootup_announce_oddness(void)
76 pr_info("\tAdditional per-CPU info printed with stalls.\n"); 86 pr_info("\tAdditional per-CPU info printed with stalls.\n");
77 if (NUM_RCU_LVL_4 != 0) 87 if (NUM_RCU_LVL_4 != 0)
78 pr_info("\tFour-level hierarchy is enabled.\n"); 88 pr_info("\tFour-level hierarchy is enabled.\n");
79 if (CONFIG_RCU_FANOUT_LEAF != 16) 89 if (RCU_FANOUT_LEAF != 16)
80 pr_info("\tBuild-time adjustment of leaf fanout to %d.\n", 90 pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
81 CONFIG_RCU_FANOUT_LEAF); 91 RCU_FANOUT_LEAF);
82 if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) 92 if (rcu_fanout_leaf != RCU_FANOUT_LEAF)
83 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); 93 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
84 if (nr_cpu_ids != NR_CPUS) 94 if (nr_cpu_ids != NR_CPUS)
85 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); 95 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
@@ -90,7 +100,8 @@ static void __init rcu_bootup_announce_oddness(void)
90#ifdef CONFIG_PREEMPT_RCU 100#ifdef CONFIG_PREEMPT_RCU
91 101
92RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); 102RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
93static struct rcu_state *rcu_state_p = &rcu_preempt_state; 103static struct rcu_state *const rcu_state_p = &rcu_preempt_state;
104static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
94 105
95static int rcu_preempted_readers_exp(struct rcu_node *rnp); 106static int rcu_preempted_readers_exp(struct rcu_node *rnp);
96static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 107static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
@@ -116,11 +127,11 @@ static void __init rcu_bootup_announce(void)
116 */ 127 */
117static void rcu_preempt_qs(void) 128static void rcu_preempt_qs(void)
118{ 129{
119 if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) { 130 if (!__this_cpu_read(rcu_data_p->passed_quiesce)) {
120 trace_rcu_grace_period(TPS("rcu_preempt"), 131 trace_rcu_grace_period(TPS("rcu_preempt"),
121 __this_cpu_read(rcu_preempt_data.gpnum), 132 __this_cpu_read(rcu_data_p->gpnum),
122 TPS("cpuqs")); 133 TPS("cpuqs"));
123 __this_cpu_write(rcu_preempt_data.passed_quiesce, 1); 134 __this_cpu_write(rcu_data_p->passed_quiesce, 1);
124 barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ 135 barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
125 current->rcu_read_unlock_special.b.need_qs = false; 136 current->rcu_read_unlock_special.b.need_qs = false;
126 } 137 }
@@ -150,7 +161,7 @@ static void rcu_preempt_note_context_switch(void)
150 !t->rcu_read_unlock_special.b.blocked) { 161 !t->rcu_read_unlock_special.b.blocked) {
151 162
152 /* Possibly blocking in an RCU read-side critical section. */ 163 /* Possibly blocking in an RCU read-side critical section. */
153 rdp = this_cpu_ptr(rcu_preempt_state.rda); 164 rdp = this_cpu_ptr(rcu_state_p->rda);
154 rnp = rdp->mynode; 165 rnp = rdp->mynode;
155 raw_spin_lock_irqsave(&rnp->lock, flags); 166 raw_spin_lock_irqsave(&rnp->lock, flags);
156 smp_mb__after_unlock_lock(); 167 smp_mb__after_unlock_lock();
@@ -180,10 +191,9 @@ static void rcu_preempt_note_context_switch(void)
180 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { 191 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
181 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); 192 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
182 rnp->gp_tasks = &t->rcu_node_entry; 193 rnp->gp_tasks = &t->rcu_node_entry;
183#ifdef CONFIG_RCU_BOOST 194 if (IS_ENABLED(CONFIG_RCU_BOOST) &&
184 if (rnp->boost_tasks != NULL) 195 rnp->boost_tasks != NULL)
185 rnp->boost_tasks = rnp->gp_tasks; 196 rnp->boost_tasks = rnp->gp_tasks;
186#endif /* #ifdef CONFIG_RCU_BOOST */
187 } else { 197 } else {
188 list_add(&t->rcu_node_entry, &rnp->blkd_tasks); 198 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
189 if (rnp->qsmask & rdp->grpmask) 199 if (rnp->qsmask & rdp->grpmask)
@@ -263,9 +273,7 @@ void rcu_read_unlock_special(struct task_struct *t)
263 bool empty_exp_now; 273 bool empty_exp_now;
264 unsigned long flags; 274 unsigned long flags;
265 struct list_head *np; 275 struct list_head *np;
266#ifdef CONFIG_RCU_BOOST
267 bool drop_boost_mutex = false; 276 bool drop_boost_mutex = false;
268#endif /* #ifdef CONFIG_RCU_BOOST */
269 struct rcu_node *rnp; 277 struct rcu_node *rnp;
270 union rcu_special special; 278 union rcu_special special;
271 279
@@ -307,9 +315,11 @@ void rcu_read_unlock_special(struct task_struct *t)
307 t->rcu_read_unlock_special.b.blocked = false; 315 t->rcu_read_unlock_special.b.blocked = false;
308 316
309 /* 317 /*
310 * Remove this task from the list it blocked on. The 318 * Remove this task from the list it blocked on. The task
311 * task can migrate while we acquire the lock, but at 319 * now remains queued on the rcu_node corresponding to
312 * most one time. So at most two passes through loop. 320 * the CPU it first blocked on, so the first attempt to
321 * acquire the task's rcu_node's ->lock will succeed.
322 * Keep the loop and add a WARN_ON() out of sheer paranoia.
313 */ 323 */
314 for (;;) { 324 for (;;) {
315 rnp = t->rcu_blocked_node; 325 rnp = t->rcu_blocked_node;
@@ -317,6 +327,7 @@ void rcu_read_unlock_special(struct task_struct *t)
317 smp_mb__after_unlock_lock(); 327 smp_mb__after_unlock_lock();
318 if (rnp == t->rcu_blocked_node) 328 if (rnp == t->rcu_blocked_node)
319 break; 329 break;
330 WARN_ON_ONCE(1);
320 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 331 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
321 } 332 }
322 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); 333 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
@@ -331,12 +342,12 @@ void rcu_read_unlock_special(struct task_struct *t)
331 rnp->gp_tasks = np; 342 rnp->gp_tasks = np;
332 if (&t->rcu_node_entry == rnp->exp_tasks) 343 if (&t->rcu_node_entry == rnp->exp_tasks)
333 rnp->exp_tasks = np; 344 rnp->exp_tasks = np;
334#ifdef CONFIG_RCU_BOOST 345 if (IS_ENABLED(CONFIG_RCU_BOOST)) {
335 if (&t->rcu_node_entry == rnp->boost_tasks) 346 if (&t->rcu_node_entry == rnp->boost_tasks)
336 rnp->boost_tasks = np; 347 rnp->boost_tasks = np;
337 /* Snapshot ->boost_mtx ownership with rcu_node lock held. */ 348 /* Snapshot ->boost_mtx ownership w/rnp->lock held. */
338 drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; 349 drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
339#endif /* #ifdef CONFIG_RCU_BOOST */ 350 }
340 351
341 /* 352 /*
342 * If this was the last task on the current list, and if 353 * If this was the last task on the current list, and if
@@ -353,24 +364,21 @@ void rcu_read_unlock_special(struct task_struct *t)
353 rnp->grplo, 364 rnp->grplo,
354 rnp->grphi, 365 rnp->grphi,
355 !!rnp->gp_tasks); 366 !!rnp->gp_tasks);
356 rcu_report_unblock_qs_rnp(&rcu_preempt_state, 367 rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags);
357 rnp, flags);
358 } else { 368 } else {
359 raw_spin_unlock_irqrestore(&rnp->lock, flags); 369 raw_spin_unlock_irqrestore(&rnp->lock, flags);
360 } 370 }
361 371
362#ifdef CONFIG_RCU_BOOST
363 /* Unboost if we were boosted. */ 372 /* Unboost if we were boosted. */
364 if (drop_boost_mutex) 373 if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
365 rt_mutex_unlock(&rnp->boost_mtx); 374 rt_mutex_unlock(&rnp->boost_mtx);
366#endif /* #ifdef CONFIG_RCU_BOOST */
367 375
368 /* 376 /*
369 * If this was the last task on the expedited lists, 377 * If this was the last task on the expedited lists,
370 * then we need to report up the rcu_node hierarchy. 378 * then we need to report up the rcu_node hierarchy.
371 */ 379 */
372 if (!empty_exp && empty_exp_now) 380 if (!empty_exp && empty_exp_now)
373 rcu_report_exp_rnp(&rcu_preempt_state, rnp, true); 381 rcu_report_exp_rnp(rcu_state_p, rnp, true);
374 } else { 382 } else {
375 local_irq_restore(flags); 383 local_irq_restore(flags);
376 } 384 }
@@ -390,7 +398,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
390 raw_spin_unlock_irqrestore(&rnp->lock, flags); 398 raw_spin_unlock_irqrestore(&rnp->lock, flags);
391 return; 399 return;
392 } 400 }
393 t = list_entry(rnp->gp_tasks, 401 t = list_entry(rnp->gp_tasks->prev,
394 struct task_struct, rcu_node_entry); 402 struct task_struct, rcu_node_entry);
395 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) 403 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
396 sched_show_task(t); 404 sched_show_task(t);
@@ -447,7 +455,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
447 if (!rcu_preempt_blocked_readers_cgp(rnp)) 455 if (!rcu_preempt_blocked_readers_cgp(rnp))
448 return 0; 456 return 0;
449 rcu_print_task_stall_begin(rnp); 457 rcu_print_task_stall_begin(rnp);
450 t = list_entry(rnp->gp_tasks, 458 t = list_entry(rnp->gp_tasks->prev,
451 struct task_struct, rcu_node_entry); 459 struct task_struct, rcu_node_entry);
452 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { 460 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
453 pr_cont(" P%d", t->pid); 461 pr_cont(" P%d", t->pid);
@@ -491,8 +499,8 @@ static void rcu_preempt_check_callbacks(void)
491 return; 499 return;
492 } 500 }
493 if (t->rcu_read_lock_nesting > 0 && 501 if (t->rcu_read_lock_nesting > 0 &&
494 __this_cpu_read(rcu_preempt_data.qs_pending) && 502 __this_cpu_read(rcu_data_p->qs_pending) &&
495 !__this_cpu_read(rcu_preempt_data.passed_quiesce)) 503 !__this_cpu_read(rcu_data_p->passed_quiesce))
496 t->rcu_read_unlock_special.b.need_qs = true; 504 t->rcu_read_unlock_special.b.need_qs = true;
497} 505}
498 506
@@ -500,7 +508,7 @@ static void rcu_preempt_check_callbacks(void)
500 508
501static void rcu_preempt_do_callbacks(void) 509static void rcu_preempt_do_callbacks(void)
502{ 510{
503 rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data)); 511 rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
504} 512}
505 513
506#endif /* #ifdef CONFIG_RCU_BOOST */ 514#endif /* #ifdef CONFIG_RCU_BOOST */
@@ -510,7 +518,7 @@ static void rcu_preempt_do_callbacks(void)
510 */ 518 */
511void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 519void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
512{ 520{
513 __call_rcu(head, func, &rcu_preempt_state, -1, 0); 521 __call_rcu(head, func, rcu_state_p, -1, 0);
514} 522}
515EXPORT_SYMBOL_GPL(call_rcu); 523EXPORT_SYMBOL_GPL(call_rcu);
516 524
@@ -570,7 +578,7 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp)
570static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) 578static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
571{ 579{
572 return !rcu_preempted_readers_exp(rnp) && 580 return !rcu_preempted_readers_exp(rnp) &&
573 ACCESS_ONCE(rnp->expmask) == 0; 581 READ_ONCE(rnp->expmask) == 0;
574} 582}
575 583
576/* 584/*
@@ -711,12 +719,12 @@ sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp)
711void synchronize_rcu_expedited(void) 719void synchronize_rcu_expedited(void)
712{ 720{
713 struct rcu_node *rnp; 721 struct rcu_node *rnp;
714 struct rcu_state *rsp = &rcu_preempt_state; 722 struct rcu_state *rsp = rcu_state_p;
715 unsigned long snap; 723 unsigned long snap;
716 int trycount = 0; 724 int trycount = 0;
717 725
718 smp_mb(); /* Caller's modifications seen first by other CPUs. */ 726 smp_mb(); /* Caller's modifications seen first by other CPUs. */
719 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; 727 snap = READ_ONCE(sync_rcu_preempt_exp_count) + 1;
720 smp_mb(); /* Above access cannot bleed into critical section. */ 728 smp_mb(); /* Above access cannot bleed into critical section. */
721 729
722 /* 730 /*
@@ -740,7 +748,7 @@ void synchronize_rcu_expedited(void)
740 */ 748 */
741 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { 749 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
742 if (ULONG_CMP_LT(snap, 750 if (ULONG_CMP_LT(snap,
743 ACCESS_ONCE(sync_rcu_preempt_exp_count))) { 751 READ_ONCE(sync_rcu_preempt_exp_count))) {
744 put_online_cpus(); 752 put_online_cpus();
745 goto mb_ret; /* Others did our work for us. */ 753 goto mb_ret; /* Others did our work for us. */
746 } 754 }
@@ -752,7 +760,7 @@ void synchronize_rcu_expedited(void)
752 return; 760 return;
753 } 761 }
754 } 762 }
755 if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) { 763 if (ULONG_CMP_LT(snap, READ_ONCE(sync_rcu_preempt_exp_count))) {
756 put_online_cpus(); 764 put_online_cpus();
757 goto unlock_mb_ret; /* Others did our work for us. */ 765 goto unlock_mb_ret; /* Others did our work for us. */
758 } 766 }
@@ -780,8 +788,7 @@ void synchronize_rcu_expedited(void)
780 788
781 /* Clean up and exit. */ 789 /* Clean up and exit. */
782 smp_mb(); /* ensure expedited GP seen before counter increment. */ 790 smp_mb(); /* ensure expedited GP seen before counter increment. */
783 ACCESS_ONCE(sync_rcu_preempt_exp_count) = 791 WRITE_ONCE(sync_rcu_preempt_exp_count, sync_rcu_preempt_exp_count + 1);
784 sync_rcu_preempt_exp_count + 1;
785unlock_mb_ret: 792unlock_mb_ret:
786 mutex_unlock(&sync_rcu_preempt_exp_mutex); 793 mutex_unlock(&sync_rcu_preempt_exp_mutex);
787mb_ret: 794mb_ret:
@@ -799,7 +806,7 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
799 */ 806 */
800void rcu_barrier(void) 807void rcu_barrier(void)
801{ 808{
802 _rcu_barrier(&rcu_preempt_state); 809 _rcu_barrier(rcu_state_p);
803} 810}
804EXPORT_SYMBOL_GPL(rcu_barrier); 811EXPORT_SYMBOL_GPL(rcu_barrier);
805 812
@@ -808,7 +815,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier);
808 */ 815 */
809static void __init __rcu_init_preempt(void) 816static void __init __rcu_init_preempt(void)
810{ 817{
811 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); 818 rcu_init_one(rcu_state_p, rcu_data_p);
812} 819}
813 820
814/* 821/*
@@ -831,7 +838,8 @@ void exit_rcu(void)
831 838
832#else /* #ifdef CONFIG_PREEMPT_RCU */ 839#else /* #ifdef CONFIG_PREEMPT_RCU */
833 840
834static struct rcu_state *rcu_state_p = &rcu_sched_state; 841static struct rcu_state *const rcu_state_p = &rcu_sched_state;
842static struct rcu_data __percpu *const rcu_data_p = &rcu_sched_data;
835 843
836/* 844/*
837 * Tell them what RCU they are running. 845 * Tell them what RCU they are running.
@@ -994,8 +1002,8 @@ static int rcu_boost(struct rcu_node *rnp)
994 struct task_struct *t; 1002 struct task_struct *t;
995 struct list_head *tb; 1003 struct list_head *tb;
996 1004
997 if (ACCESS_ONCE(rnp->exp_tasks) == NULL && 1005 if (READ_ONCE(rnp->exp_tasks) == NULL &&
998 ACCESS_ONCE(rnp->boost_tasks) == NULL) 1006 READ_ONCE(rnp->boost_tasks) == NULL)
999 return 0; /* Nothing left to boost. */ 1007 return 0; /* Nothing left to boost. */
1000 1008
1001 raw_spin_lock_irqsave(&rnp->lock, flags); 1009 raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -1048,8 +1056,8 @@ static int rcu_boost(struct rcu_node *rnp)
1048 rt_mutex_lock(&rnp->boost_mtx); 1056 rt_mutex_lock(&rnp->boost_mtx);
1049 rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */ 1057 rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */
1050 1058
1051 return ACCESS_ONCE(rnp->exp_tasks) != NULL || 1059 return READ_ONCE(rnp->exp_tasks) != NULL ||
1052 ACCESS_ONCE(rnp->boost_tasks) != NULL; 1060 READ_ONCE(rnp->boost_tasks) != NULL;
1053} 1061}
1054 1062
1055/* 1063/*
@@ -1173,7 +1181,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1173 struct sched_param sp; 1181 struct sched_param sp;
1174 struct task_struct *t; 1182 struct task_struct *t;
1175 1183
1176 if (&rcu_preempt_state != rsp) 1184 if (rcu_state_p != rsp)
1177 return 0; 1185 return 0;
1178 1186
1179 if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0) 1187 if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0)
@@ -1367,13 +1375,12 @@ static void rcu_prepare_kthreads(int cpu)
1367 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs 1375 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
1368 * any flavor of RCU. 1376 * any flavor of RCU.
1369 */ 1377 */
1370#ifndef CONFIG_RCU_NOCB_CPU_ALL
1371int rcu_needs_cpu(unsigned long *delta_jiffies) 1378int rcu_needs_cpu(unsigned long *delta_jiffies)
1372{ 1379{
1373 *delta_jiffies = ULONG_MAX; 1380 *delta_jiffies = ULONG_MAX;
1374 return rcu_cpu_has_callbacks(NULL); 1381 return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
1382 ? 0 : rcu_cpu_has_callbacks(NULL);
1375} 1383}
1376#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
1377 1384
1378/* 1385/*
1379 * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up 1386 * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
@@ -1462,7 +1469,7 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
1462 * callbacks not yet ready to invoke. 1469 * callbacks not yet ready to invoke.
1463 */ 1470 */
1464 if ((rdp->completed != rnp->completed || 1471 if ((rdp->completed != rnp->completed ||
1465 unlikely(ACCESS_ONCE(rdp->gpwrap))) && 1472 unlikely(READ_ONCE(rdp->gpwrap))) &&
1466 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) 1473 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
1467 note_gp_changes(rsp, rdp); 1474 note_gp_changes(rsp, rdp);
1468 1475
@@ -1480,11 +1487,15 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
1480 * 1487 *
1481 * The caller must have disabled interrupts. 1488 * The caller must have disabled interrupts.
1482 */ 1489 */
1483#ifndef CONFIG_RCU_NOCB_CPU_ALL
1484int rcu_needs_cpu(unsigned long *dj) 1490int rcu_needs_cpu(unsigned long *dj)
1485{ 1491{
1486 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 1492 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
1487 1493
1494 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) {
1495 *dj = ULONG_MAX;
1496 return 0;
1497 }
1498
1488 /* Snapshot to detect later posting of non-lazy callback. */ 1499 /* Snapshot to detect later posting of non-lazy callback. */
1489 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; 1500 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
1490 1501
@@ -1511,7 +1522,6 @@ int rcu_needs_cpu(unsigned long *dj)
1511 } 1522 }
1512 return 0; 1523 return 0;
1513} 1524}
1514#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
1515 1525
1516/* 1526/*
1517 * Prepare a CPU for idle from an RCU perspective. The first major task 1527 * Prepare a CPU for idle from an RCU perspective. The first major task
@@ -1525,7 +1535,6 @@ int rcu_needs_cpu(unsigned long *dj)
1525 */ 1535 */
1526static void rcu_prepare_for_idle(void) 1536static void rcu_prepare_for_idle(void)
1527{ 1537{
1528#ifndef CONFIG_RCU_NOCB_CPU_ALL
1529 bool needwake; 1538 bool needwake;
1530 struct rcu_data *rdp; 1539 struct rcu_data *rdp;
1531 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 1540 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
@@ -1533,8 +1542,11 @@ static void rcu_prepare_for_idle(void)
1533 struct rcu_state *rsp; 1542 struct rcu_state *rsp;
1534 int tne; 1543 int tne;
1535 1544
1545 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL))
1546 return;
1547
1536 /* Handle nohz enablement switches conservatively. */ 1548 /* Handle nohz enablement switches conservatively. */
1537 tne = ACCESS_ONCE(tick_nohz_active); 1549 tne = READ_ONCE(tick_nohz_active);
1538 if (tne != rdtp->tick_nohz_enabled_snap) { 1550 if (tne != rdtp->tick_nohz_enabled_snap) {
1539 if (rcu_cpu_has_callbacks(NULL)) 1551 if (rcu_cpu_has_callbacks(NULL))
1540 invoke_rcu_core(); /* force nohz to see update. */ 1552 invoke_rcu_core(); /* force nohz to see update. */
@@ -1580,7 +1592,6 @@ static void rcu_prepare_for_idle(void)
1580 if (needwake) 1592 if (needwake)
1581 rcu_gp_kthread_wake(rsp); 1593 rcu_gp_kthread_wake(rsp);
1582 } 1594 }
1583#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
1584} 1595}
1585 1596
1586/* 1597/*
@@ -1590,12 +1601,11 @@ static void rcu_prepare_for_idle(void)
1590 */ 1601 */
1591static void rcu_cleanup_after_idle(void) 1602static void rcu_cleanup_after_idle(void)
1592{ 1603{
1593#ifndef CONFIG_RCU_NOCB_CPU_ALL 1604 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
1594 if (rcu_is_nocb_cpu(smp_processor_id())) 1605 rcu_is_nocb_cpu(smp_processor_id()))
1595 return; 1606 return;
1596 if (rcu_try_advance_all_cbs()) 1607 if (rcu_try_advance_all_cbs())
1597 invoke_rcu_core(); 1608 invoke_rcu_core();
1598#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
1599} 1609}
1600 1610
1601/* 1611/*
@@ -1760,7 +1770,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1760 atomic_read(&rdtp->dynticks) & 0xfff, 1770 atomic_read(&rdtp->dynticks) & 0xfff,
1761 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, 1771 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
1762 rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), 1772 rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
1763 ACCESS_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart, 1773 READ_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart,
1764 fast_no_hz); 1774 fast_no_hz);
1765} 1775}
1766 1776
@@ -1898,11 +1908,11 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
1898{ 1908{
1899 struct rcu_data *rdp_leader = rdp->nocb_leader; 1909 struct rcu_data *rdp_leader = rdp->nocb_leader;
1900 1910
1901 if (!ACCESS_ONCE(rdp_leader->nocb_kthread)) 1911 if (!READ_ONCE(rdp_leader->nocb_kthread))
1902 return; 1912 return;
1903 if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) { 1913 if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
1904 /* Prior smp_mb__after_atomic() orders against prior enqueue. */ 1914 /* Prior smp_mb__after_atomic() orders against prior enqueue. */
1905 ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false; 1915 WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
1906 wake_up(&rdp_leader->nocb_wq); 1916 wake_up(&rdp_leader->nocb_wq);
1907 } 1917 }
1908} 1918}
@@ -1934,14 +1944,14 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
1934 ret = atomic_long_read(&rdp->nocb_q_count); 1944 ret = atomic_long_read(&rdp->nocb_q_count);
1935 1945
1936#ifdef CONFIG_PROVE_RCU 1946#ifdef CONFIG_PROVE_RCU
1937 rhp = ACCESS_ONCE(rdp->nocb_head); 1947 rhp = READ_ONCE(rdp->nocb_head);
1938 if (!rhp) 1948 if (!rhp)
1939 rhp = ACCESS_ONCE(rdp->nocb_gp_head); 1949 rhp = READ_ONCE(rdp->nocb_gp_head);
1940 if (!rhp) 1950 if (!rhp)
1941 rhp = ACCESS_ONCE(rdp->nocb_follower_head); 1951 rhp = READ_ONCE(rdp->nocb_follower_head);
1942 1952
1943 /* Having no rcuo kthread but CBs after scheduler starts is bad! */ 1953 /* Having no rcuo kthread but CBs after scheduler starts is bad! */
1944 if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp && 1954 if (!READ_ONCE(rdp->nocb_kthread) && rhp &&
1945 rcu_scheduler_fully_active) { 1955 rcu_scheduler_fully_active) {
1946 /* RCU callback enqueued before CPU first came online??? */ 1956 /* RCU callback enqueued before CPU first came online??? */
1947 pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n", 1957 pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n",
@@ -1975,12 +1985,12 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
1975 atomic_long_add(rhcount, &rdp->nocb_q_count); 1985 atomic_long_add(rhcount, &rdp->nocb_q_count);
1976 /* rcu_barrier() relies on ->nocb_q_count add before xchg. */ 1986 /* rcu_barrier() relies on ->nocb_q_count add before xchg. */
1977 old_rhpp = xchg(&rdp->nocb_tail, rhtp); 1987 old_rhpp = xchg(&rdp->nocb_tail, rhtp);
1978 ACCESS_ONCE(*old_rhpp) = rhp; 1988 WRITE_ONCE(*old_rhpp, rhp);
1979 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); 1989 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
1980 smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */ 1990 smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */
1981 1991
1982 /* If we are not being polled and there is a kthread, awaken it ... */ 1992 /* If we are not being polled and there is a kthread, awaken it ... */
1983 t = ACCESS_ONCE(rdp->nocb_kthread); 1993 t = READ_ONCE(rdp->nocb_kthread);
1984 if (rcu_nocb_poll || !t) { 1994 if (rcu_nocb_poll || !t) {
1985 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 1995 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
1986 TPS("WakeNotPoll")); 1996 TPS("WakeNotPoll"));
@@ -2118,7 +2128,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2118 for (;;) { 2128 for (;;) {
2119 wait_event_interruptible( 2129 wait_event_interruptible(
2120 rnp->nocb_gp_wq[c & 0x1], 2130 rnp->nocb_gp_wq[c & 0x1],
2121 (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c))); 2131 (d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
2122 if (likely(d)) 2132 if (likely(d))
2123 break; 2133 break;
2124 WARN_ON(signal_pending(current)); 2134 WARN_ON(signal_pending(current));
@@ -2145,7 +2155,7 @@ wait_again:
2145 if (!rcu_nocb_poll) { 2155 if (!rcu_nocb_poll) {
2146 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep"); 2156 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
2147 wait_event_interruptible(my_rdp->nocb_wq, 2157 wait_event_interruptible(my_rdp->nocb_wq,
2148 !ACCESS_ONCE(my_rdp->nocb_leader_sleep)); 2158 !READ_ONCE(my_rdp->nocb_leader_sleep));
2149 /* Memory barrier handled by smp_mb() calls below and repoll. */ 2159 /* Memory barrier handled by smp_mb() calls below and repoll. */
2150 } else if (firsttime) { 2160 } else if (firsttime) {
2151 firsttime = false; /* Don't drown trace log with "Poll"! */ 2161 firsttime = false; /* Don't drown trace log with "Poll"! */
@@ -2159,12 +2169,12 @@ wait_again:
2159 */ 2169 */
2160 gotcbs = false; 2170 gotcbs = false;
2161 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { 2171 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
2162 rdp->nocb_gp_head = ACCESS_ONCE(rdp->nocb_head); 2172 rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head);
2163 if (!rdp->nocb_gp_head) 2173 if (!rdp->nocb_gp_head)
2164 continue; /* No CBs here, try next follower. */ 2174 continue; /* No CBs here, try next follower. */
2165 2175
2166 /* Move callbacks to wait-for-GP list, which is empty. */ 2176 /* Move callbacks to wait-for-GP list, which is empty. */
2167 ACCESS_ONCE(rdp->nocb_head) = NULL; 2177 WRITE_ONCE(rdp->nocb_head, NULL);
2168 rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head); 2178 rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
2169 gotcbs = true; 2179 gotcbs = true;
2170 } 2180 }
@@ -2184,7 +2194,7 @@ wait_again:
2184 my_rdp->nocb_leader_sleep = true; 2194 my_rdp->nocb_leader_sleep = true;
2185 smp_mb(); /* Ensure _sleep true before scan. */ 2195 smp_mb(); /* Ensure _sleep true before scan. */
2186 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) 2196 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower)
2187 if (ACCESS_ONCE(rdp->nocb_head)) { 2197 if (READ_ONCE(rdp->nocb_head)) {
2188 /* Found CB, so short-circuit next wait. */ 2198 /* Found CB, so short-circuit next wait. */
2189 my_rdp->nocb_leader_sleep = false; 2199 my_rdp->nocb_leader_sleep = false;
2190 break; 2200 break;
@@ -2205,7 +2215,7 @@ wait_again:
2205 2215
2206 /* Each pass through the following loop wakes a follower, if needed. */ 2216 /* Each pass through the following loop wakes a follower, if needed. */
2207 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { 2217 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
2208 if (ACCESS_ONCE(rdp->nocb_head)) 2218 if (READ_ONCE(rdp->nocb_head))
2209 my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/ 2219 my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/
2210 if (!rdp->nocb_gp_head) 2220 if (!rdp->nocb_gp_head)
2211 continue; /* No CBs, so no need to wake follower. */ 2221 continue; /* No CBs, so no need to wake follower. */
@@ -2241,7 +2251,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
2241 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2251 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2242 "FollowerSleep"); 2252 "FollowerSleep");
2243 wait_event_interruptible(rdp->nocb_wq, 2253 wait_event_interruptible(rdp->nocb_wq,
2244 ACCESS_ONCE(rdp->nocb_follower_head)); 2254 READ_ONCE(rdp->nocb_follower_head));
2245 } else if (firsttime) { 2255 } else if (firsttime) {
2246 /* Don't drown trace log with "Poll"! */ 2256 /* Don't drown trace log with "Poll"! */
2247 firsttime = false; 2257 firsttime = false;
@@ -2282,10 +2292,10 @@ static int rcu_nocb_kthread(void *arg)
2282 nocb_follower_wait(rdp); 2292 nocb_follower_wait(rdp);
2283 2293
2284 /* Pull the ready-to-invoke callbacks onto local list. */ 2294 /* Pull the ready-to-invoke callbacks onto local list. */
2285 list = ACCESS_ONCE(rdp->nocb_follower_head); 2295 list = READ_ONCE(rdp->nocb_follower_head);
2286 BUG_ON(!list); 2296 BUG_ON(!list);
2287 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty"); 2297 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty");
2288 ACCESS_ONCE(rdp->nocb_follower_head) = NULL; 2298 WRITE_ONCE(rdp->nocb_follower_head, NULL);
2289 tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head); 2299 tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head);
2290 2300
2291 /* Each pass through the following loop invokes a callback. */ 2301 /* Each pass through the following loop invokes a callback. */
@@ -2324,7 +2334,7 @@ static int rcu_nocb_kthread(void *arg)
2324/* Is a deferred wakeup of rcu_nocb_kthread() required? */ 2334/* Is a deferred wakeup of rcu_nocb_kthread() required? */
2325static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) 2335static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2326{ 2336{
2327 return ACCESS_ONCE(rdp->nocb_defer_wakeup); 2337 return READ_ONCE(rdp->nocb_defer_wakeup);
2328} 2338}
2329 2339
2330/* Do a deferred wakeup of rcu_nocb_kthread(). */ 2340/* Do a deferred wakeup of rcu_nocb_kthread(). */
@@ -2334,8 +2344,8 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2334 2344
2335 if (!rcu_nocb_need_deferred_wakeup(rdp)) 2345 if (!rcu_nocb_need_deferred_wakeup(rdp))
2336 return; 2346 return;
2337 ndw = ACCESS_ONCE(rdp->nocb_defer_wakeup); 2347 ndw = READ_ONCE(rdp->nocb_defer_wakeup);
2338 ACCESS_ONCE(rdp->nocb_defer_wakeup) = RCU_NOGP_WAKE_NOT; 2348 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_NOT);
2339 wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE); 2349 wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE);
2340 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake")); 2350 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
2341} 2351}
@@ -2448,7 +2458,7 @@ static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu)
2448 t = kthread_run(rcu_nocb_kthread, rdp_spawn, 2458 t = kthread_run(rcu_nocb_kthread, rdp_spawn,
2449 "rcuo%c/%d", rsp->abbr, cpu); 2459 "rcuo%c/%d", rsp->abbr, cpu);
2450 BUG_ON(IS_ERR(t)); 2460 BUG_ON(IS_ERR(t));
2451 ACCESS_ONCE(rdp_spawn->nocb_kthread) = t; 2461 WRITE_ONCE(rdp_spawn->nocb_kthread, t);
2452} 2462}
2453 2463
2454/* 2464/*
@@ -2663,7 +2673,7 @@ static void rcu_sysidle_enter(int irq)
2663 2673
2664 /* Record start of fully idle period. */ 2674 /* Record start of fully idle period. */
2665 j = jiffies; 2675 j = jiffies;
2666 ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j; 2676 WRITE_ONCE(rdtp->dynticks_idle_jiffies, j);
2667 smp_mb__before_atomic(); 2677 smp_mb__before_atomic();
2668 atomic_inc(&rdtp->dynticks_idle); 2678 atomic_inc(&rdtp->dynticks_idle);
2669 smp_mb__after_atomic(); 2679 smp_mb__after_atomic();
@@ -2681,7 +2691,7 @@ static void rcu_sysidle_enter(int irq)
2681 */ 2691 */
2682void rcu_sysidle_force_exit(void) 2692void rcu_sysidle_force_exit(void)
2683{ 2693{
2684 int oldstate = ACCESS_ONCE(full_sysidle_state); 2694 int oldstate = READ_ONCE(full_sysidle_state);
2685 int newoldstate; 2695 int newoldstate;
2686 2696
2687 /* 2697 /*
@@ -2794,7 +2804,7 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2794 smp_mb(); /* Read counters before timestamps. */ 2804 smp_mb(); /* Read counters before timestamps. */
2795 2805
2796 /* Pick up timestamps. */ 2806 /* Pick up timestamps. */
2797 j = ACCESS_ONCE(rdtp->dynticks_idle_jiffies); 2807 j = READ_ONCE(rdtp->dynticks_idle_jiffies);
2798 /* If this CPU entered idle more recently, update maxj timestamp. */ 2808 /* If this CPU entered idle more recently, update maxj timestamp. */
2799 if (ULONG_CMP_LT(*maxj, j)) 2809 if (ULONG_CMP_LT(*maxj, j))
2800 *maxj = j; 2810 *maxj = j;
@@ -2831,11 +2841,11 @@ static unsigned long rcu_sysidle_delay(void)
2831static void rcu_sysidle(unsigned long j) 2841static void rcu_sysidle(unsigned long j)
2832{ 2842{
2833 /* Check the current state. */ 2843 /* Check the current state. */
2834 switch (ACCESS_ONCE(full_sysidle_state)) { 2844 switch (READ_ONCE(full_sysidle_state)) {
2835 case RCU_SYSIDLE_NOT: 2845 case RCU_SYSIDLE_NOT:
2836 2846
2837 /* First time all are idle, so note a short idle period. */ 2847 /* First time all are idle, so note a short idle period. */
2838 ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_SHORT; 2848 WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_SHORT);
2839 break; 2849 break;
2840 2850
2841 case RCU_SYSIDLE_SHORT: 2851 case RCU_SYSIDLE_SHORT:
@@ -2873,7 +2883,7 @@ static void rcu_sysidle_cancel(void)
2873{ 2883{
2874 smp_mb(); 2884 smp_mb();
2875 if (full_sysidle_state > RCU_SYSIDLE_SHORT) 2885 if (full_sysidle_state > RCU_SYSIDLE_SHORT)
2876 ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT; 2886 WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_NOT);
2877} 2887}
2878 2888
2879/* 2889/*
@@ -2925,7 +2935,7 @@ static void rcu_sysidle_cb(struct rcu_head *rhp)
2925 smp_mb(); /* grace period precedes setting inuse. */ 2935 smp_mb(); /* grace period precedes setting inuse. */
2926 2936
2927 rshp = container_of(rhp, struct rcu_sysidle_head, rh); 2937 rshp = container_of(rhp, struct rcu_sysidle_head, rh);
2928 ACCESS_ONCE(rshp->inuse) = 0; 2938 WRITE_ONCE(rshp->inuse, 0);
2929} 2939}
2930 2940
2931/* 2941/*
@@ -2936,7 +2946,7 @@ static void rcu_sysidle_cb(struct rcu_head *rhp)
2936bool rcu_sys_is_idle(void) 2946bool rcu_sys_is_idle(void)
2937{ 2947{
2938 static struct rcu_sysidle_head rsh; 2948 static struct rcu_sysidle_head rsh;
2939 int rss = ACCESS_ONCE(full_sysidle_state); 2949 int rss = READ_ONCE(full_sysidle_state);
2940 2950
2941 if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu)) 2951 if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu))
2942 return false; 2952 return false;
@@ -2964,7 +2974,7 @@ bool rcu_sys_is_idle(void)
2964 } 2974 }
2965 rcu_sysidle_report(rcu_state_p, isidle, maxj, false); 2975 rcu_sysidle_report(rcu_state_p, isidle, maxj, false);
2966 oldrss = rss; 2976 oldrss = rss;
2967 rss = ACCESS_ONCE(full_sysidle_state); 2977 rss = READ_ONCE(full_sysidle_state);
2968 } 2978 }
2969 } 2979 }
2970 2980
@@ -3048,10 +3058,10 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
3048#ifdef CONFIG_NO_HZ_FULL 3058#ifdef CONFIG_NO_HZ_FULL
3049 if (tick_nohz_full_cpu(smp_processor_id()) && 3059 if (tick_nohz_full_cpu(smp_processor_id()) &&
3050 (!rcu_gp_in_progress(rsp) || 3060 (!rcu_gp_in_progress(rsp) ||
3051 ULONG_CMP_LT(jiffies, ACCESS_ONCE(rsp->gp_start) + HZ))) 3061 ULONG_CMP_LT(jiffies, READ_ONCE(rsp->gp_start) + HZ)))
3052 return 1; 3062 return true;
3053#endif /* #ifdef CONFIG_NO_HZ_FULL */ 3063#endif /* #ifdef CONFIG_NO_HZ_FULL */
3054 return 0; 3064 return false;
3055} 3065}
3056 3066
3057/* 3067/*
@@ -3077,7 +3087,7 @@ static void rcu_bind_gp_kthread(void)
3077static void rcu_dynticks_task_enter(void) 3087static void rcu_dynticks_task_enter(void)
3078{ 3088{
3079#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) 3089#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
3080 ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id(); 3090 WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
3081#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ 3091#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
3082} 3092}
3083 3093
@@ -3085,6 +3095,6 @@ static void rcu_dynticks_task_enter(void)
3085static void rcu_dynticks_task_exit(void) 3095static void rcu_dynticks_task_exit(void)
3086{ 3096{
3087#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) 3097#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
3088 ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1; 3098 WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
3089#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ 3099#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
3090} 3100}
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index f92361efd0f5..3ea7ffc7d5c4 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -277,7 +277,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
277 seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", 277 seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
278 rsp->n_force_qs, rsp->n_force_qs_ngp, 278 rsp->n_force_qs, rsp->n_force_qs_ngp,
279 rsp->n_force_qs - rsp->n_force_qs_ngp, 279 rsp->n_force_qs - rsp->n_force_qs_ngp,
280 ACCESS_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen); 280 READ_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen);
281 for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) { 281 for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
282 if (rnp->level != level) { 282 if (rnp->level != level) {
283 seq_puts(m, "\n"); 283 seq_puts(m, "\n");
@@ -323,8 +323,8 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
323 struct rcu_node *rnp = &rsp->node[0]; 323 struct rcu_node *rnp = &rsp->node[0];
324 324
325 raw_spin_lock_irqsave(&rnp->lock, flags); 325 raw_spin_lock_irqsave(&rnp->lock, flags);
326 completed = ACCESS_ONCE(rsp->completed); 326 completed = READ_ONCE(rsp->completed);
327 gpnum = ACCESS_ONCE(rsp->gpnum); 327 gpnum = READ_ONCE(rsp->gpnum);
328 if (completed == gpnum) 328 if (completed == gpnum)
329 gpage = 0; 329 gpage = 0;
330 else 330 else
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 1f133350da01..afaecb7a799a 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -150,14 +150,14 @@ void __rcu_read_unlock(void)
150 barrier(); /* critical section before exit code. */ 150 barrier(); /* critical section before exit code. */
151 t->rcu_read_lock_nesting = INT_MIN; 151 t->rcu_read_lock_nesting = INT_MIN;
152 barrier(); /* assign before ->rcu_read_unlock_special load */ 152 barrier(); /* assign before ->rcu_read_unlock_special load */
153 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s))) 153 if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
154 rcu_read_unlock_special(t); 154 rcu_read_unlock_special(t);
155 barrier(); /* ->rcu_read_unlock_special load before assign */ 155 barrier(); /* ->rcu_read_unlock_special load before assign */
156 t->rcu_read_lock_nesting = 0; 156 t->rcu_read_lock_nesting = 0;
157 } 157 }
158#ifdef CONFIG_PROVE_LOCKING 158#ifdef CONFIG_PROVE_LOCKING
159 { 159 {
160 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); 160 int rrln = READ_ONCE(t->rcu_read_lock_nesting);
161 161
162 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); 162 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
163 } 163 }
@@ -389,17 +389,17 @@ module_param(rcu_cpu_stall_timeout, int, 0644);
389 389
390int rcu_jiffies_till_stall_check(void) 390int rcu_jiffies_till_stall_check(void)
391{ 391{
392 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout); 392 int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout);
393 393
394 /* 394 /*
395 * Limit check must be consistent with the Kconfig limits 395 * Limit check must be consistent with the Kconfig limits
396 * for CONFIG_RCU_CPU_STALL_TIMEOUT. 396 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
397 */ 397 */
398 if (till_stall_check < 3) { 398 if (till_stall_check < 3) {
399 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3; 399 WRITE_ONCE(rcu_cpu_stall_timeout, 3);
400 till_stall_check = 3; 400 till_stall_check = 3;
401 } else if (till_stall_check > 300) { 401 } else if (till_stall_check > 300) {
402 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300; 402 WRITE_ONCE(rcu_cpu_stall_timeout, 300);
403 till_stall_check = 300; 403 till_stall_check = 300;
404 } 404 }
405 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; 405 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
@@ -550,12 +550,12 @@ static void check_holdout_task(struct task_struct *t,
550{ 550{
551 int cpu; 551 int cpu;
552 552
553 if (!ACCESS_ONCE(t->rcu_tasks_holdout) || 553 if (!READ_ONCE(t->rcu_tasks_holdout) ||
554 t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) || 554 t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) ||
555 !ACCESS_ONCE(t->on_rq) || 555 !READ_ONCE(t->on_rq) ||
556 (IS_ENABLED(CONFIG_NO_HZ_FULL) && 556 (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
557 !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) { 557 !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
558 ACCESS_ONCE(t->rcu_tasks_holdout) = false; 558 WRITE_ONCE(t->rcu_tasks_holdout, false);
559 list_del_init(&t->rcu_tasks_holdout_list); 559 list_del_init(&t->rcu_tasks_holdout_list);
560 put_task_struct(t); 560 put_task_struct(t);
561 return; 561 return;
@@ -639,11 +639,11 @@ static int __noreturn rcu_tasks_kthread(void *arg)
639 */ 639 */
640 rcu_read_lock(); 640 rcu_read_lock();
641 for_each_process_thread(g, t) { 641 for_each_process_thread(g, t) {
642 if (t != current && ACCESS_ONCE(t->on_rq) && 642 if (t != current && READ_ONCE(t->on_rq) &&
643 !is_idle_task(t)) { 643 !is_idle_task(t)) {
644 get_task_struct(t); 644 get_task_struct(t);
645 t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw); 645 t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
646 ACCESS_ONCE(t->rcu_tasks_holdout) = true; 646 WRITE_ONCE(t->rcu_tasks_holdout, true);
647 list_add(&t->rcu_tasks_holdout_list, 647 list_add(&t->rcu_tasks_holdout_list,
648 &rcu_tasks_holdouts); 648 &rcu_tasks_holdouts);
649 } 649 }
@@ -672,7 +672,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
672 struct task_struct *t1; 672 struct task_struct *t1;
673 673
674 schedule_timeout_interruptible(HZ); 674 schedule_timeout_interruptible(HZ);
675 rtst = ACCESS_ONCE(rcu_task_stall_timeout); 675 rtst = READ_ONCE(rcu_task_stall_timeout);
676 needreport = rtst > 0 && 676 needreport = rtst > 0 &&
677 time_after(jiffies, lastreport + rtst); 677 time_after(jiffies, lastreport + rtst);
678 if (needreport) 678 if (needreport)
@@ -728,7 +728,7 @@ static void rcu_spawn_tasks_kthread(void)
728 static struct task_struct *rcu_tasks_kthread_ptr; 728 static struct task_struct *rcu_tasks_kthread_ptr;
729 struct task_struct *t; 729 struct task_struct *t;
730 730
731 if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) { 731 if (READ_ONCE(rcu_tasks_kthread_ptr)) {
732 smp_mb(); /* Ensure caller sees full kthread. */ 732 smp_mb(); /* Ensure caller sees full kthread. */
733 return; 733 return;
734 } 734 }
@@ -740,7 +740,7 @@ static void rcu_spawn_tasks_kthread(void)
740 t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread"); 740 t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
741 BUG_ON(IS_ERR(t)); 741 BUG_ON(IS_ERR(t));
742 smp_mb(); /* Ensure others see full kthread. */ 742 smp_mb(); /* Ensure others see full kthread. */
743 ACCESS_ONCE(rcu_tasks_kthread_ptr) = t; 743 WRITE_ONCE(rcu_tasks_kthread_ptr, t);
744 mutex_unlock(&rcu_tasks_kthread_mutex); 744 mutex_unlock(&rcu_tasks_kthread_mutex);
745} 745}
746 746
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 10338ce78be4..f89ca9bcf42a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1095,7 +1095,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1095 if (p->sched_class->migrate_task_rq) 1095 if (p->sched_class->migrate_task_rq)
1096 p->sched_class->migrate_task_rq(p, new_cpu); 1096 p->sched_class->migrate_task_rq(p, new_cpu);
1097 p->se.nr_migrations++; 1097 p->se.nr_migrations++;
1098 perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); 1098 perf_event_task_migrate(p);
1099 } 1099 }
1100 1100
1101 __set_task_cpu(p, new_cpu); 1101 __set_task_cpu(p, new_cpu);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4b6e5f63d9af..433061d984ea 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2209,7 +2209,7 @@ void task_numa_work(struct callback_head *work)
2209 } 2209 }
2210 for (; vma; vma = vma->vm_next) { 2210 for (; vma; vma = vma->vm_next) {
2211 if (!vma_migratable(vma) || !vma_policy_mof(vma) || 2211 if (!vma_migratable(vma) || !vma_policy_mof(vma) ||
2212 is_vm_hugetlb_page(vma)) { 2212 is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) {
2213 continue; 2213 continue;
2214 } 2214 }
2215 2215
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 2ccec988d6b7..052e02672d12 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -341,7 +341,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
341 * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss 341 * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
342 * an event. 342 * an event.
343 */ 343 */
344 set_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */ 344 smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
345 345
346 return timeout; 346 return timeout;
347} 347}
@@ -354,7 +354,7 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
354 * doesn't imply write barrier and the users expects write 354 * doesn't imply write barrier and the users expects write
355 * barrier semantics on wakeup functions. The following 355 * barrier semantics on wakeup functions. The following
356 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() 356 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
357 * and is paired with set_mb() in wait_woken(). 357 * and is paired with smp_store_mb() in wait_woken().
358 */ 358 */
359 smp_wmb(); /* C */ 359 smp_wmb(); /* C */
360 wait->flags |= WQ_FLAG_WOKEN; 360 wait->flags |= WQ_FLAG_WOKEN;
diff --git a/kernel/torture.c b/kernel/torture.c
index dd70993c266c..3e4840633d3e 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -409,7 +409,7 @@ static void (*torture_shutdown_hook)(void);
409 */ 409 */
410void torture_shutdown_absorb(const char *title) 410void torture_shutdown_absorb(const char *title)
411{ 411{
412 while (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { 412 while (READ_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
413 pr_notice("torture thread %s parking due to system shutdown\n", 413 pr_notice("torture thread %s parking due to system shutdown\n",
414 title); 414 title);
415 schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); 415 schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT);
@@ -480,9 +480,9 @@ static int torture_shutdown_notify(struct notifier_block *unused1,
480 unsigned long unused2, void *unused3) 480 unsigned long unused2, void *unused3)
481{ 481{
482 mutex_lock(&fullstop_mutex); 482 mutex_lock(&fullstop_mutex);
483 if (ACCESS_ONCE(fullstop) == FULLSTOP_DONTSTOP) { 483 if (READ_ONCE(fullstop) == FULLSTOP_DONTSTOP) {
484 VERBOSE_TOROUT_STRING("Unscheduled system shutdown detected"); 484 VERBOSE_TOROUT_STRING("Unscheduled system shutdown detected");
485 ACCESS_ONCE(fullstop) = FULLSTOP_SHUTDOWN; 485 WRITE_ONCE(fullstop, FULLSTOP_SHUTDOWN);
486 } else { 486 } else {
487 pr_warn("Concurrent rmmod and shutdown illegal!\n"); 487 pr_warn("Concurrent rmmod and shutdown illegal!\n");
488 } 488 }
@@ -523,13 +523,13 @@ static int stutter;
523 */ 523 */
524void stutter_wait(const char *title) 524void stutter_wait(const char *title)
525{ 525{
526 while (ACCESS_ONCE(stutter_pause_test) || 526 while (READ_ONCE(stutter_pause_test) ||
527 (torture_runnable && !ACCESS_ONCE(*torture_runnable))) { 527 (torture_runnable && !READ_ONCE(*torture_runnable))) {
528 if (stutter_pause_test) 528 if (stutter_pause_test)
529 if (ACCESS_ONCE(stutter_pause_test) == 1) 529 if (READ_ONCE(stutter_pause_test) == 1)
530 schedule_timeout_interruptible(1); 530 schedule_timeout_interruptible(1);
531 else 531 else
532 while (ACCESS_ONCE(stutter_pause_test)) 532 while (READ_ONCE(stutter_pause_test))
533 cond_resched(); 533 cond_resched();
534 else 534 else
535 schedule_timeout_interruptible(round_jiffies_relative(HZ)); 535 schedule_timeout_interruptible(round_jiffies_relative(HZ));
@@ -549,14 +549,14 @@ static int torture_stutter(void *arg)
549 if (!torture_must_stop()) { 549 if (!torture_must_stop()) {
550 if (stutter > 1) { 550 if (stutter > 1) {
551 schedule_timeout_interruptible(stutter - 1); 551 schedule_timeout_interruptible(stutter - 1);
552 ACCESS_ONCE(stutter_pause_test) = 2; 552 WRITE_ONCE(stutter_pause_test, 2);
553 } 553 }
554 schedule_timeout_interruptible(1); 554 schedule_timeout_interruptible(1);
555 ACCESS_ONCE(stutter_pause_test) = 1; 555 WRITE_ONCE(stutter_pause_test, 1);
556 } 556 }
557 if (!torture_must_stop()) 557 if (!torture_must_stop())
558 schedule_timeout_interruptible(stutter); 558 schedule_timeout_interruptible(stutter);
559 ACCESS_ONCE(stutter_pause_test) = 0; 559 WRITE_ONCE(stutter_pause_test, 0);
560 torture_shutdown_absorb("torture_stutter"); 560 torture_shutdown_absorb("torture_stutter");
561 } while (!torture_must_stop()); 561 } while (!torture_must_stop());
562 torture_kthread_stopping("torture_stutter"); 562 torture_kthread_stopping("torture_stutter");
@@ -642,13 +642,13 @@ EXPORT_SYMBOL_GPL(torture_init_end);
642bool torture_cleanup_begin(void) 642bool torture_cleanup_begin(void)
643{ 643{
644 mutex_lock(&fullstop_mutex); 644 mutex_lock(&fullstop_mutex);
645 if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { 645 if (READ_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
646 pr_warn("Concurrent rmmod and shutdown illegal!\n"); 646 pr_warn("Concurrent rmmod and shutdown illegal!\n");
647 mutex_unlock(&fullstop_mutex); 647 mutex_unlock(&fullstop_mutex);
648 schedule_timeout_uninterruptible(10); 648 schedule_timeout_uninterruptible(10);
649 return true; 649 return true;
650 } 650 }
651 ACCESS_ONCE(fullstop) = FULLSTOP_RMMOD; 651 WRITE_ONCE(fullstop, FULLSTOP_RMMOD);
652 mutex_unlock(&fullstop_mutex); 652 mutex_unlock(&fullstop_mutex);
653 torture_shutdown_cleanup(); 653 torture_shutdown_cleanup();
654 torture_shuffle_cleanup(); 654 torture_shuffle_cleanup();
@@ -681,7 +681,7 @@ EXPORT_SYMBOL_GPL(torture_must_stop);
681 */ 681 */
682bool torture_must_stop_irq(void) 682bool torture_must_stop_irq(void)
683{ 683{
684 return ACCESS_ONCE(fullstop) != FULLSTOP_DONTSTOP; 684 return READ_ONCE(fullstop) != FULLSTOP_DONTSTOP;
685} 685}
686EXPORT_SYMBOL_GPL(torture_must_stop_irq); 686EXPORT_SYMBOL_GPL(torture_must_stop_irq);
687 687
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 13d945c0d03f..1b28df2d9104 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -450,7 +450,7 @@ static int __init ring_buffer_benchmark_init(void)
450 450
451 if (producer_fifo >= 0) { 451 if (producer_fifo >= 0) {
452 struct sched_param param = { 452 struct sched_param param = {
453 .sched_priority = consumer_fifo 453 .sched_priority = producer_fifo
454 }; 454 };
455 sched_setscheduler(producer, SCHED_FIFO, &param); 455 sched_setscheduler(producer, SCHED_FIFO, &param);
456 } else 456 } else
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index ced69da0ff55..7f2e97ce71a7 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1369,19 +1369,26 @@ static int check_preds(struct filter_parse_state *ps)
1369{ 1369{
1370 int n_normal_preds = 0, n_logical_preds = 0; 1370 int n_normal_preds = 0, n_logical_preds = 0;
1371 struct postfix_elt *elt; 1371 struct postfix_elt *elt;
1372 int cnt = 0;
1372 1373
1373 list_for_each_entry(elt, &ps->postfix, list) { 1374 list_for_each_entry(elt, &ps->postfix, list) {
1374 if (elt->op == OP_NONE) 1375 if (elt->op == OP_NONE) {
1376 cnt++;
1375 continue; 1377 continue;
1378 }
1376 1379
1377 if (elt->op == OP_AND || elt->op == OP_OR) { 1380 if (elt->op == OP_AND || elt->op == OP_OR) {
1378 n_logical_preds++; 1381 n_logical_preds++;
1382 cnt--;
1379 continue; 1383 continue;
1380 } 1384 }
1385 if (elt->op != OP_NOT)
1386 cnt--;
1381 n_normal_preds++; 1387 n_normal_preds++;
1388 WARN_ON_ONCE(cnt < 0);
1382 } 1389 }
1383 1390
1384 if (!n_normal_preds || n_logical_preds >= n_normal_preds) { 1391 if (cnt != 1 || !n_normal_preds || n_logical_preds >= n_normal_preds) {
1385 parse_error(ps, FILT_ERR_INVALID_FILTER, 0); 1392 parse_error(ps, FILT_ERR_INVALID_FILTER, 0);
1386 return -EINVAL; 1393 return -EINVAL;
1387 } 1394 }
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ba2b0c87e65b..b908048f8d6a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1233,6 +1233,7 @@ config RCU_TORTURE_TEST
1233 depends on DEBUG_KERNEL 1233 depends on DEBUG_KERNEL
1234 select TORTURE_TEST 1234 select TORTURE_TEST
1235 select SRCU 1235 select SRCU
1236 select TASKS_RCU
1236 default n 1237 default n
1237 help 1238 help
1238 This option provides a kernel module that runs torture tests 1239 This option provides a kernel module that runs torture tests
@@ -1261,12 +1262,38 @@ config RCU_TORTURE_TEST_RUNNABLE
1261 Say N here if you want the RCU torture tests to start only 1262 Say N here if you want the RCU torture tests to start only
1262 after being manually enabled via /proc. 1263 after being manually enabled via /proc.
1263 1264
1265config RCU_TORTURE_TEST_SLOW_PREINIT
1266 bool "Slow down RCU grace-period pre-initialization to expose races"
1267 depends on RCU_TORTURE_TEST
1268 help
1269 This option delays grace-period pre-initialization (the
1270 propagation of CPU-hotplug changes up the rcu_node combining
1271 tree) for a few jiffies between initializing each pair of
1272 consecutive rcu_node structures. This helps to expose races
1273 involving grace-period pre-initialization, in other words, it
1274 makes your kernel less stable. It can also greatly increase
1275 grace-period latency, especially on systems with large numbers
1276 of CPUs. This is useful when torture-testing RCU, but in
1277 almost no other circumstance.
1278
1279 Say Y here if you want your system to crash and hang more often.
1280 Say N if you want a sane system.
1281
1282config RCU_TORTURE_TEST_SLOW_PREINIT_DELAY
1283 int "How much to slow down RCU grace-period pre-initialization"
1284 range 0 5
1285 default 3
1286 depends on RCU_TORTURE_TEST_SLOW_PREINIT
1287 help
1288 This option specifies the number of jiffies to wait between
1289 each rcu_node structure pre-initialization step.
1290
1264config RCU_TORTURE_TEST_SLOW_INIT 1291config RCU_TORTURE_TEST_SLOW_INIT
1265 bool "Slow down RCU grace-period initialization to expose races" 1292 bool "Slow down RCU grace-period initialization to expose races"
1266 depends on RCU_TORTURE_TEST 1293 depends on RCU_TORTURE_TEST
1267 help 1294 help
1268 This option makes grace-period initialization block for a 1295 This option delays grace-period initialization for a few
1269 few jiffies between initializing each pair of consecutive 1296 jiffies between initializing each pair of consecutive
1270 rcu_node structures. This helps to expose races involving 1297 rcu_node structures. This helps to expose races involving
1271 grace-period initialization, in other words, it makes your 1298 grace-period initialization, in other words, it makes your
1272 kernel less stable. It can also greatly increase grace-period 1299 kernel less stable. It can also greatly increase grace-period
@@ -1286,6 +1313,30 @@ config RCU_TORTURE_TEST_SLOW_INIT_DELAY
1286 This option specifies the number of jiffies to wait between 1313 This option specifies the number of jiffies to wait between
1287 each rcu_node structure initialization. 1314 each rcu_node structure initialization.
1288 1315
1316config RCU_TORTURE_TEST_SLOW_CLEANUP
1317 bool "Slow down RCU grace-period cleanup to expose races"
1318 depends on RCU_TORTURE_TEST
1319 help
1320 This option delays grace-period cleanup for a few jiffies
1321 between cleaning up each pair of consecutive rcu_node
1322 structures. This helps to expose races involving grace-period
1323 cleanup, in other words, it makes your kernel less stable.
1324 It can also greatly increase grace-period latency, especially
1325 on systems with large numbers of CPUs. This is useful when
1326 torture-testing RCU, but in almost no other circumstance.
1327
1328 Say Y here if you want your system to crash and hang more often.
1329 Say N if you want a sane system.
1330
1331config RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY
1332 int "How much to slow down RCU grace-period cleanup"
1333 range 0 5
1334 default 3
1335 depends on RCU_TORTURE_TEST_SLOW_CLEANUP
1336 help
1337 This option specifies the number of jiffies to wait between
1338 each rcu_node structure cleanup operation.
1339
1289config RCU_CPU_STALL_TIMEOUT 1340config RCU_CPU_STALL_TIMEOUT
1290 int "RCU CPU stall timeout in seconds" 1341 int "RCU CPU stall timeout in seconds"
1291 depends on RCU_STALL_COMMON 1342 depends on RCU_STALL_COMMON
@@ -1322,6 +1373,17 @@ config RCU_TRACE
1322 Say Y here if you want to enable RCU tracing 1373 Say Y here if you want to enable RCU tracing
1323 Say N if you are unsure. 1374 Say N if you are unsure.
1324 1375
1376config RCU_EQS_DEBUG
1377 bool "Use this when adding any sort of NO_HZ support to your arch"
1378 depends on DEBUG_KERNEL
1379 help
1380 This option provides consistency checks in RCU's handling of
1381 NO_HZ. These checks have proven quite helpful in detecting
1382 bugs in arch-specific NO_HZ code.
1383
1384 Say N here if you need ultimate kernel/user switch latencies
1385 Say Y if you are unsure
1386
1325endmenu # "RCU Debugging" 1387endmenu # "RCU Debugging"
1326 1388
1327config DEBUG_BLOCK_EXT_DEVT 1389config DEBUG_BLOCK_EXT_DEVT
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 5f627084f2e9..5a70f6196f57 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -16,11 +16,10 @@
16int cpumask_next_and(int n, const struct cpumask *src1p, 16int cpumask_next_and(int n, const struct cpumask *src1p,
17 const struct cpumask *src2p) 17 const struct cpumask *src2p)
18{ 18{
19 struct cpumask tmp; 19 while ((n = cpumask_next(n, src1p)) < nr_cpu_ids)
20 20 if (cpumask_test_cpu(n, src2p))
21 if (cpumask_and(&tmp, src1p, src2p)) 21 break;
22 return cpumask_next(n, &tmp); 22 return n;
23 return nr_cpu_ids;
24} 23}
25EXPORT_SYMBOL(cpumask_next_and); 24EXPORT_SYMBOL(cpumask_next_and);
26 25
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index aac511417ad1..a89d041592c8 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -639,7 +639,7 @@ do { \
639 ************** MIPS ***************** 639 ************** MIPS *****************
640 ***************************************/ 640 ***************************************/
641#if defined(__mips__) && W_TYPE_SIZE == 32 641#if defined(__mips__) && W_TYPE_SIZE == 32
642#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 642#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
643#define umul_ppmm(w1, w0, u, v) \ 643#define umul_ppmm(w1, w0, u, v) \
644do { \ 644do { \
645 UDItype __ll = (UDItype)(u) * (v); \ 645 UDItype __ll = (UDItype)(u) * (v); \
@@ -671,7 +671,7 @@ do { \
671 ************** MIPS/64 ************** 671 ************** MIPS/64 **************
672 ***************************************/ 672 ***************************************/
673#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 673#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
674#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 674#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
675#define umul_ppmm(w1, w0, u, v) \ 675#define umul_ppmm(w1, w0, u, v) \
676do { \ 676do { \
677 typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ 677 typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 4396434e4715..8609378e6505 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -26,6 +26,7 @@
26#include <linux/random.h> 26#include <linux/random.h>
27#include <linux/rhashtable.h> 27#include <linux/rhashtable.h>
28#include <linux/err.h> 28#include <linux/err.h>
29#include <linux/export.h>
29 30
30#define HASH_DEFAULT_SIZE 64UL 31#define HASH_DEFAULT_SIZE 64UL
31#define HASH_MIN_SIZE 4U 32#define HASH_MIN_SIZE 4U
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 36c15a2889e4..3a5f2b366d84 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -57,7 +57,8 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
57 return res + find_zero(data) + 1 - align; 57 return res + find_zero(data) + 1 - align;
58 } 58 }
59 res += sizeof(unsigned long); 59 res += sizeof(unsigned long);
60 if (unlikely(max < sizeof(unsigned long))) 60 /* We already handled 'unsigned long' bytes. Did we do it all ? */
61 if (unlikely(max <= sizeof(unsigned long)))
61 break; 62 break;
62 max -= sizeof(unsigned long); 63 max -= sizeof(unsigned long);
63 if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) 64 if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
@@ -90,8 +91,15 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
90 * Get the size of a NUL-terminated string in user space. 91 * Get the size of a NUL-terminated string in user space.
91 * 92 *
92 * Returns the size of the string INCLUDING the terminating NUL. 93 * Returns the size of the string INCLUDING the terminating NUL.
93 * If the string is too long, returns 'count+1'. 94 * If the string is too long, returns a number larger than @count. User
95 * has to check the return value against "> count".
94 * On exception (or invalid count), returns 0. 96 * On exception (or invalid count), returns 0.
97 *
98 * NOTE! You should basically never use this function. There is
99 * almost never any valid case for using the length of a user space
100 * string, since the string can be changed at any time by other
101 * threads. Use "strncpy_from_user()" instead to get a stable copy
102 * of the string.
95 */ 103 */
96long strnlen_user(const char __user *str, long count) 104long strnlen_user(const char __user *str, long count)
97{ 105{
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 4abda074ea45..3c365ab6cf5f 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -537,8 +537,9 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
537 * Allocates bounce buffer and returns its kernel virtual address. 537 * Allocates bounce buffer and returns its kernel virtual address.
538 */ 538 */
539 539
540phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, 540static phys_addr_t
541 enum dma_data_direction dir) 541map_single(struct device *hwdev, phys_addr_t phys, size_t size,
542 enum dma_data_direction dir)
542{ 543{
543 dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); 544 dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
544 545
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 6dc4580df2af..000e7b3b9896 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -359,23 +359,6 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
359 flush_delayed_work(&bdi->wb.dwork); 359 flush_delayed_work(&bdi->wb.dwork);
360} 360}
361 361
362/*
363 * Called when the device behind @bdi has been removed or ejected.
364 *
365 * We can't really do much here except for reducing the dirty ratio at
366 * the moment. In the future we should be able to set a flag so that
367 * the filesystem can handle errors at mark_inode_dirty time instead
368 * of only at writeback time.
369 */
370void bdi_unregister(struct backing_dev_info *bdi)
371{
372 if (WARN_ON_ONCE(!bdi->dev))
373 return;
374
375 bdi_set_min_ratio(bdi, 0);
376}
377EXPORT_SYMBOL(bdi_unregister);
378
379static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) 362static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
380{ 363{
381 memset(wb, 0, sizeof(*wb)); 364 memset(wb, 0, sizeof(*wb));
@@ -443,6 +426,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
443 int i; 426 int i;
444 427
445 bdi_wb_shutdown(bdi); 428 bdi_wb_shutdown(bdi);
429 bdi_set_min_ratio(bdi, 0);
446 430
447 WARN_ON(!list_empty(&bdi->work_list)); 431 WARN_ON(!list_empty(&bdi->work_list));
448 WARN_ON(delayed_work_pending(&bdi->wb.dwork)); 432 WARN_ON(delayed_work_pending(&bdi->wb.dwork));
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 14c2f2017e37..a04225d372ba 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2323,6 +2323,8 @@ done_restock:
2323 css_get_many(&memcg->css, batch); 2323 css_get_many(&memcg->css, batch);
2324 if (batch > nr_pages) 2324 if (batch > nr_pages)
2325 refill_stock(memcg, batch - nr_pages); 2325 refill_stock(memcg, batch - nr_pages);
2326 if (!(gfp_mask & __GFP_WAIT))
2327 goto done;
2326 /* 2328 /*
2327 * If the hierarchy is above the normal consumption range, 2329 * If the hierarchy is above the normal consumption range,
2328 * make the charging task trim their excess contribution. 2330 * make the charging task trim their excess contribution.
@@ -5833,9 +5835,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
5833 if (!mem_cgroup_is_root(memcg)) 5835 if (!mem_cgroup_is_root(memcg))
5834 page_counter_uncharge(&memcg->memory, 1); 5836 page_counter_uncharge(&memcg->memory, 1);
5835 5837
5836 /* XXX: caller holds IRQ-safe mapping->tree_lock */ 5838 /* Caller disabled preemption with mapping->tree_lock */
5837 VM_BUG_ON(!irqs_disabled());
5838
5839 mem_cgroup_charge_statistics(memcg, page, -1); 5839 mem_cgroup_charge_statistics(memcg, page, -1);
5840 memcg_check_events(memcg, page); 5840 memcg_check_events(memcg, page);
5841} 5841}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 457bde530cbe..9e88f749aa51 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1969,8 +1969,10 @@ void try_offline_node(int nid)
1969 * wait_table may be allocated from boot memory, 1969 * wait_table may be allocated from boot memory,
1970 * here only free if it's allocated by vmalloc. 1970 * here only free if it's allocated by vmalloc.
1971 */ 1971 */
1972 if (is_vmalloc_addr(zone->wait_table)) 1972 if (is_vmalloc_addr(zone->wait_table)) {
1973 vfree(zone->wait_table); 1973 vfree(zone->wait_table);
1974 zone->wait_table = NULL;
1975 }
1974 } 1976 }
1975} 1977}
1976EXPORT_SYMBOL(try_offline_node); 1978EXPORT_SYMBOL(try_offline_node);
diff --git a/mm/shmem.c b/mm/shmem.c
index de981370fbc5..3759099d8ce4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2451,6 +2451,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
2451 return -ENOMEM; 2451 return -ENOMEM;
2452 } 2452 }
2453 inode->i_op = &shmem_short_symlink_operations; 2453 inode->i_op = &shmem_short_symlink_operations;
2454 inode->i_link = info->symlink;
2454 } else { 2455 } else {
2455 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); 2456 error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
2456 if (error) { 2457 if (error) {
@@ -2474,30 +2475,23 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
2474 return 0; 2475 return 0;
2475} 2476}
2476 2477
2477static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd) 2478static const char *shmem_follow_link(struct dentry *dentry, void **cookie)
2478{
2479 nd_set_link(nd, SHMEM_I(d_inode(dentry))->symlink);
2480 return NULL;
2481}
2482
2483static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
2484{ 2479{
2485 struct page *page = NULL; 2480 struct page *page = NULL;
2486 int error = shmem_getpage(d_inode(dentry), 0, &page, SGP_READ, NULL); 2481 int error = shmem_getpage(d_inode(dentry), 0, &page, SGP_READ, NULL);
2487 nd_set_link(nd, error ? ERR_PTR(error) : kmap(page)); 2482 if (error)
2488 if (page) 2483 return ERR_PTR(error);
2489 unlock_page(page); 2484 unlock_page(page);
2490 return page; 2485 *cookie = page;
2486 return kmap(page);
2491} 2487}
2492 2488
2493static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 2489static void shmem_put_link(struct inode *unused, void *cookie)
2494{ 2490{
2495 if (!IS_ERR(nd_get_link(nd))) { 2491 struct page *page = cookie;
2496 struct page *page = cookie; 2492 kunmap(page);
2497 kunmap(page); 2493 mark_page_accessed(page);
2498 mark_page_accessed(page); 2494 page_cache_release(page);
2499 page_cache_release(page);
2500 }
2501} 2495}
2502 2496
2503#ifdef CONFIG_TMPFS_XATTR 2497#ifdef CONFIG_TMPFS_XATTR
@@ -2642,7 +2636,7 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
2642 2636
2643static const struct inode_operations shmem_short_symlink_operations = { 2637static const struct inode_operations shmem_short_symlink_operations = {
2644 .readlink = generic_readlink, 2638 .readlink = generic_readlink,
2645 .follow_link = shmem_follow_short_symlink, 2639 .follow_link = simple_follow_link,
2646#ifdef CONFIG_TMPFS_XATTR 2640#ifdef CONFIG_TMPFS_XATTR
2647 .setxattr = shmem_setxattr, 2641 .setxattr = shmem_setxattr,
2648 .getxattr = shmem_getxattr, 2642 .getxattr = shmem_getxattr,
@@ -3401,7 +3395,13 @@ int shmem_zero_setup(struct vm_area_struct *vma)
3401 struct file *file; 3395 struct file *file;
3402 loff_t size = vma->vm_end - vma->vm_start; 3396 loff_t size = vma->vm_end - vma->vm_start;
3403 3397
3404 file = shmem_file_setup("dev/zero", size, vma->vm_flags); 3398 /*
3399 * Cloning a new file under mmap_sem leads to a lock ordering conflict
3400 * between XFS directory reading and selinux: since this file is only
3401 * accessible to the user through its mapping, use S_PRIVATE flag to
3402 * bypass file security, in the same way as shmem_kernel_file_setup().
3403 */
3404 file = __shmem_file_setup("dev/zero", size, vma->vm_flags, S_PRIVATE);
3405 if (IS_ERR(file)) 3405 if (IS_ERR(file))
3406 return PTR_ERR(file); 3406 return PTR_ERR(file);
3407 3407
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 08bd7a3d464a..a8b5e749e84e 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -289,7 +289,8 @@ static int create_handle_cache(struct zs_pool *pool)
289 289
290static void destroy_handle_cache(struct zs_pool *pool) 290static void destroy_handle_cache(struct zs_pool *pool)
291{ 291{
292 kmem_cache_destroy(pool->handle_cachep); 292 if (pool->handle_cachep)
293 kmem_cache_destroy(pool->handle_cachep);
293} 294}
294 295
295static unsigned long alloc_handle(struct zs_pool *pool) 296static unsigned long alloc_handle(struct zs_pool *pool)
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e0670d7054f9..659fb96672e4 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -796,9 +796,11 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
796 int err = 0; 796 int err = 0;
797 797
798 if (ndm->ndm_flags & NTF_USE) { 798 if (ndm->ndm_flags & NTF_USE) {
799 local_bh_disable();
799 rcu_read_lock(); 800 rcu_read_lock();
800 br_fdb_update(p->br, p, addr, vid, true); 801 br_fdb_update(p->br, p, addr, vid, true);
801 rcu_read_unlock(); 802 rcu_read_unlock();
803 local_bh_enable();
802 } else { 804 } else {
803 spin_lock_bh(&p->br->hash_lock); 805 spin_lock_bh(&p->br->hash_lock);
804 err = fdb_add_entry(p, addr, ndm->ndm_state, 806 err = fdb_add_entry(p, addr, ndm->ndm_state,
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 22fd0419b314..ff667e18b2d6 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1167,6 +1167,9 @@ static void br_multicast_add_router(struct net_bridge *br,
1167 struct net_bridge_port *p; 1167 struct net_bridge_port *p;
1168 struct hlist_node *slot = NULL; 1168 struct hlist_node *slot = NULL;
1169 1169
1170 if (!hlist_unhashed(&port->rlist))
1171 return;
1172
1170 hlist_for_each_entry(p, &br->router_list, rlist) { 1173 hlist_for_each_entry(p, &br->router_list, rlist) {
1171 if ((unsigned long) port >= (unsigned long) p) 1174 if ((unsigned long) port >= (unsigned long) p)
1172 break; 1175 break;
@@ -1194,12 +1197,8 @@ static void br_multicast_mark_router(struct net_bridge *br,
1194 if (port->multicast_router != 1) 1197 if (port->multicast_router != 1)
1195 return; 1198 return;
1196 1199
1197 if (!hlist_unhashed(&port->rlist))
1198 goto timer;
1199
1200 br_multicast_add_router(br, port); 1200 br_multicast_add_router(br, port);
1201 1201
1202timer:
1203 mod_timer(&port->multicast_router_timer, 1202 mod_timer(&port->multicast_router_timer,
1204 now + br->multicast_querier_interval); 1203 now + br->multicast_querier_interval);
1205} 1204}
diff --git a/net/core/dev.c b/net/core/dev.c
index 2c1c67fad64d..aa82f9ab6a36 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1718,15 +1718,8 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
1718 1718
1719int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) 1719int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1720{ 1720{
1721 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { 1721 if (skb_orphan_frags(skb, GFP_ATOMIC) ||
1722 if (skb_copy_ubufs(skb, GFP_ATOMIC)) { 1722 unlikely(!is_skb_forwardable(dev, skb))) {
1723 atomic_long_inc(&dev->rx_dropped);
1724 kfree_skb(skb);
1725 return NET_RX_DROP;
1726 }
1727 }
1728
1729 if (unlikely(!is_skb_forwardable(dev, skb))) {
1730 atomic_long_inc(&dev->rx_dropped); 1723 atomic_long_inc(&dev->rx_dropped);
1731 kfree_skb(skb); 1724 kfree_skb(skb);
1732 return NET_RX_DROP; 1725 return NET_RX_DROP;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3cfff2a3d651..41ec02242ea7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4398,7 +4398,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
4398 4398
4399 while (order) { 4399 while (order) {
4400 if (npages >= 1 << order) { 4400 if (npages >= 1 << order) {
4401 page = alloc_pages(gfp_mask | 4401 page = alloc_pages((gfp_mask & ~__GFP_WAIT) |
4402 __GFP_COMP | 4402 __GFP_COMP |
4403 __GFP_NOWARN | 4403 __GFP_NOWARN |
4404 __GFP_NORETRY, 4404 __GFP_NORETRY,
diff --git a/net/core/sock.c b/net/core/sock.c
index 292f42228bfb..dc30dc5bb1b8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -354,15 +354,12 @@ void sk_clear_memalloc(struct sock *sk)
354 354
355 /* 355 /*
356 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward 356 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
357 * progress of swapping. However, if SOCK_MEMALLOC is cleared while 357 * progress of swapping. SOCK_MEMALLOC may be cleared while
358 * it has rmem allocations there is a risk that the user of the 358 * it has rmem allocations due to the last swapfile being deactivated
359 * socket cannot make forward progress due to exceeding the rmem 359 * but there is a risk that the socket is unusable due to exceeding
360 * limits. By rights, sk_clear_memalloc() should only be called 360 * the rmem limits. Reclaim the reserves and obey rmem limits again.
361 * on sockets being torn down but warn and reset the accounting if
362 * that assumption breaks.
363 */ 361 */
364 if (WARN_ON(sk->sk_forward_alloc)) 362 sk_mem_reclaim(sk);
365 sk_mem_reclaim(sk);
366} 363}
367EXPORT_SYMBOL_GPL(sk_clear_memalloc); 364EXPORT_SYMBOL_GPL(sk_clear_memalloc);
368 365
@@ -1883,7 +1880,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
1883 1880
1884 pfrag->offset = 0; 1881 pfrag->offset = 0;
1885 if (SKB_FRAG_PAGE_ORDER) { 1882 if (SKB_FRAG_PAGE_ORDER) {
1886 pfrag->page = alloc_pages(gfp | __GFP_COMP | 1883 pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
1887 __GFP_NOWARN | __GFP_NORETRY, 1884 __GFP_NOWARN | __GFP_NORETRY,
1888 SKB_FRAG_PAGE_ORDER); 1885 SKB_FRAG_PAGE_ORDER);
1889 if (likely(pfrag->page)) { 1886 if (likely(pfrag->page)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1c92ea67baef..83aa604f9273 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -90,6 +90,7 @@
90#include <linux/socket.h> 90#include <linux/socket.h>
91#include <linux/sockios.h> 91#include <linux/sockios.h>
92#include <linux/igmp.h> 92#include <linux/igmp.h>
93#include <linux/inetdevice.h>
93#include <linux/in.h> 94#include <linux/in.h>
94#include <linux/errno.h> 95#include <linux/errno.h>
95#include <linux/timer.h> 96#include <linux/timer.h>
@@ -1960,6 +1961,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
1960 struct sock *sk; 1961 struct sock *sk;
1961 struct dst_entry *dst; 1962 struct dst_entry *dst;
1962 int dif = skb->dev->ifindex; 1963 int dif = skb->dev->ifindex;
1964 int ours;
1963 1965
1964 /* validate the packet */ 1966 /* validate the packet */
1965 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) 1967 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
@@ -1969,14 +1971,24 @@ void udp_v4_early_demux(struct sk_buff *skb)
1969 uh = udp_hdr(skb); 1971 uh = udp_hdr(skb);
1970 1972
1971 if (skb->pkt_type == PACKET_BROADCAST || 1973 if (skb->pkt_type == PACKET_BROADCAST ||
1972 skb->pkt_type == PACKET_MULTICAST) 1974 skb->pkt_type == PACKET_MULTICAST) {
1975 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
1976
1977 if (!in_dev)
1978 return;
1979
1980 ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
1981 iph->protocol);
1982 if (!ours)
1983 return;
1973 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, 1984 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
1974 uh->source, iph->saddr, dif); 1985 uh->source, iph->saddr, dif);
1975 else if (skb->pkt_type == PACKET_HOST) 1986 } else if (skb->pkt_type == PACKET_HOST) {
1976 sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, 1987 sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
1977 uh->source, iph->saddr, dif); 1988 uh->source, iph->saddr, dif);
1978 else 1989 } else {
1979 return; 1990 return;
1991 }
1980 1992
1981 if (!sk) 1993 if (!sk)
1982 return; 1994 return;
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index d873ceea86e6..ca09bf49ac68 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -133,6 +133,14 @@ static void snmp6_free_dev(struct inet6_dev *idev)
133 free_percpu(idev->stats.ipv6); 133 free_percpu(idev->stats.ipv6);
134} 134}
135 135
136static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
137{
138 struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
139
140 snmp6_free_dev(idev);
141 kfree(idev);
142}
143
136/* Nobody refers to this device, we may destroy it. */ 144/* Nobody refers to this device, we may destroy it. */
137 145
138void in6_dev_finish_destroy(struct inet6_dev *idev) 146void in6_dev_finish_destroy(struct inet6_dev *idev)
@@ -151,7 +159,6 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
151 pr_warn("Freeing alive inet6 device %p\n", idev); 159 pr_warn("Freeing alive inet6 device %p\n", idev);
152 return; 160 return;
153 } 161 }
154 snmp6_free_dev(idev); 162 call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
155 kfree_rcu(idev, rcu);
156} 163}
157EXPORT_SYMBOL(in6_dev_finish_destroy); 164EXPORT_SYMBOL(in6_dev_finish_destroy);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 7b3f732269e4..1f93a5978f2a 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -541,7 +541,7 @@ static void mpls_ifdown(struct net_device *dev)
541 541
542 RCU_INIT_POINTER(dev->mpls_ptr, NULL); 542 RCU_INIT_POINTER(dev->mpls_ptr, NULL);
543 543
544 kfree(mdev); 544 kfree_rcu(mdev, rcu);
545} 545}
546 546
547static int mpls_dev_notify(struct notifier_block *this, unsigned long event, 547static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
@@ -564,6 +564,17 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
564 case NETDEV_UNREGISTER: 564 case NETDEV_UNREGISTER:
565 mpls_ifdown(dev); 565 mpls_ifdown(dev);
566 break; 566 break;
567 case NETDEV_CHANGENAME:
568 mdev = mpls_dev_get(dev);
569 if (mdev) {
570 int err;
571
572 mpls_dev_sysctl_unregister(mdev);
573 err = mpls_dev_sysctl_register(dev, mdev);
574 if (err)
575 return notifier_from_errno(err);
576 }
577 break;
567 } 578 }
568 return NOTIFY_OK; 579 return NOTIFY_OK;
569} 580}
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index b064c345042c..8cabeb5a1cb9 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -16,6 +16,7 @@ struct mpls_dev {
16 int input_enabled; 16 int input_enabled;
17 17
18 struct ctl_table_header *sysctl; 18 struct ctl_table_header *sysctl;
19 struct rcu_head rcu;
19}; 20};
20 21
21struct sk_buff; 22struct sk_buff;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 4776282c6417..33e6d6e2908f 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -125,6 +125,7 @@ static struct vport *netdev_create(const struct vport_parms *parms)
125 if (err) 125 if (err)
126 goto error_master_upper_dev_unlink; 126 goto error_master_upper_dev_unlink;
127 127
128 dev_disable_lro(netdev_vport->dev);
128 dev_set_promiscuity(netdev_vport->dev, 1); 129 dev_set_promiscuity(netdev_vport->dev, 1);
129 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; 130 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
130 rtnl_unlock(); 131 rtnl_unlock();
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index fb7976aee61c..4f15b7d730e1 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -381,13 +381,14 @@ nomem:
381} 381}
382 382
383 383
384/* Public interface to creat the association shared key. 384/* Public interface to create the association shared key.
385 * See code above for the algorithm. 385 * See code above for the algorithm.
386 */ 386 */
387int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) 387int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
388{ 388{
389 struct sctp_auth_bytes *secret; 389 struct sctp_auth_bytes *secret;
390 struct sctp_shared_key *ep_key; 390 struct sctp_shared_key *ep_key;
391 struct sctp_chunk *chunk;
391 392
392 /* If we don't support AUTH, or peer is not capable 393 /* If we don't support AUTH, or peer is not capable
393 * we don't need to do anything. 394 * we don't need to do anything.
@@ -410,6 +411,14 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
410 sctp_auth_key_put(asoc->asoc_shared_key); 411 sctp_auth_key_put(asoc->asoc_shared_key);
411 asoc->asoc_shared_key = secret; 412 asoc->asoc_shared_key = secret;
412 413
414 /* Update send queue in case any chunk already in there now
415 * needs authenticating
416 */
417 list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) {
418 if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc))
419 chunk->auth = 1;
420 }
421
413 return 0; 422 return 0;
414} 423}
415 424
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 9074b5cede38..f485600c4507 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2142,11 +2142,17 @@ static void tipc_sk_timeout(unsigned long data)
2142 peer_node = tsk_peer_node(tsk); 2142 peer_node = tsk_peer_node(tsk);
2143 2143
2144 if (tsk->probing_state == TIPC_CONN_PROBING) { 2144 if (tsk->probing_state == TIPC_CONN_PROBING) {
2145 /* Previous probe not answered -> self abort */ 2145 if (!sock_owned_by_user(sk)) {
2146 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, 2146 sk->sk_socket->state = SS_DISCONNECTING;
2147 TIPC_CONN_MSG, SHORT_H_SIZE, 0, 2147 tsk->connected = 0;
2148 own_node, peer_node, tsk->portid, 2148 tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
2149 peer_port, TIPC_ERR_NO_PORT); 2149 tsk_peer_port(tsk));
2150 sk->sk_state_change(sk);
2151 } else {
2152 /* Try again later */
2153 sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
2154 }
2155
2150 } else { 2156 } else {
2151 skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, 2157 skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
2152 INT_H_SIZE, 0, peer_node, own_node, 2158 INT_H_SIZE, 0, peer_node, own_node,
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index fff1bef6ed6d..fd682832a0e3 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1333,6 +1333,8 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
1333 memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); 1333 memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN);
1334 wdev_unlock(wdev); 1334 wdev_unlock(wdev);
1335 1335
1336 memset(&sinfo, 0, sizeof(sinfo));
1337
1336 if (rdev_get_station(rdev, dev, bssid, &sinfo)) 1338 if (rdev_get_station(rdev, dev, bssid, &sinfo))
1337 return NULL; 1339 return NULL;
1338 1340
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 89b1df4e72ab..c5ec977b9c37 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3169,12 +3169,12 @@ sub process {
3169 } 3169 }
3170 3170
3171# check for global initialisers. 3171# check for global initialisers.
3172 if ($line =~ /^\+(\s*$Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/) { 3172 if ($line =~ /^\+$Type\s*$Ident(?:\s+$Modifier)*\s*=\s*(?:0|NULL|false)\s*;/) {
3173 if (ERROR("GLOBAL_INITIALISERS", 3173 if (ERROR("GLOBAL_INITIALISERS",
3174 "do not initialise globals to 0 or NULL\n" . 3174 "do not initialise globals to 0 or NULL\n" .
3175 $herecurr) && 3175 $herecurr) &&
3176 $fix) { 3176 $fix) {
3177 $fixed[$fixlinenr] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/; 3177 $fixed[$fixlinenr] =~ s/(^.$Type\s*$Ident(?:\s+$Modifier)*)\s*=\s*(0|NULL|false)\s*;/$1;/;
3178 } 3178 }
3179 } 3179 }
3180# check for static initialisers. 3180# check for static initialisers.
diff --git a/security/capability.c b/security/capability.c
index 0d03fcc489a4..7d3f38fe02ba 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -209,8 +209,8 @@ static int cap_inode_readlink(struct dentry *dentry)
209 return 0; 209 return 0;
210} 210}
211 211
212static int cap_inode_follow_link(struct dentry *dentry, 212static int cap_inode_follow_link(struct dentry *dentry, struct inode *inode,
213 struct nameidata *nameidata) 213 bool rcu)
214{ 214{
215 return 0; 215 return 0;
216} 216}
diff --git a/security/security.c b/security/security.c
index 8e9b1f4b9b45..04c8feca081a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -581,11 +581,12 @@ int security_inode_readlink(struct dentry *dentry)
581 return security_ops->inode_readlink(dentry); 581 return security_ops->inode_readlink(dentry);
582} 582}
583 583
584int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd) 584int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
585 bool rcu)
585{ 586{
586 if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) 587 if (unlikely(IS_PRIVATE(inode)))
587 return 0; 588 return 0;
588 return security_ops->inode_follow_link(dentry, nd); 589 return security_ops->inode_follow_link(dentry, inode, rcu);
589} 590}
590 591
591int security_inode_permission(struct inode *inode, int mask) 592int security_inode_permission(struct inode *inode, int mask)
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 3c17dda9571d..0b122b1421a9 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -761,7 +761,23 @@ int avc_has_perm(u32 ssid, u32 tsid, u16 tclass,
761 761
762 rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd); 762 rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd);
763 763
764 rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata); 764 rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata, 0);
765 if (rc2)
766 return rc2;
767 return rc;
768}
769
770int avc_has_perm_flags(u32 ssid, u32 tsid, u16 tclass,
771 u32 requested, struct common_audit_data *auditdata,
772 int flags)
773{
774 struct av_decision avd;
775 int rc, rc2;
776
777 rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd);
778
779 rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc,
780 auditdata, flags);
765 if (rc2) 781 if (rc2)
766 return rc2; 782 return rc2;
767 return rc; 783 return rc;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 7dade28affba..ffa5a642629a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1564,7 +1564,7 @@ static int cred_has_capability(const struct cred *cred,
1564 1564
1565 rc = avc_has_perm_noaudit(sid, sid, sclass, av, 0, &avd); 1565 rc = avc_has_perm_noaudit(sid, sid, sclass, av, 0, &avd);
1566 if (audit == SECURITY_CAP_AUDIT) { 1566 if (audit == SECURITY_CAP_AUDIT) {
1567 int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad); 1567 int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad, 0);
1568 if (rc2) 1568 if (rc2)
1569 return rc2; 1569 return rc2;
1570 } 1570 }
@@ -2861,11 +2861,23 @@ static int selinux_inode_readlink(struct dentry *dentry)
2861 return dentry_has_perm(cred, dentry, FILE__READ); 2861 return dentry_has_perm(cred, dentry, FILE__READ);
2862} 2862}
2863 2863
2864static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *nameidata) 2864static int selinux_inode_follow_link(struct dentry *dentry, struct inode *inode,
2865 bool rcu)
2865{ 2866{
2866 const struct cred *cred = current_cred(); 2867 const struct cred *cred = current_cred();
2868 struct common_audit_data ad;
2869 struct inode_security_struct *isec;
2870 u32 sid;
2867 2871
2868 return dentry_has_perm(cred, dentry, FILE__READ); 2872 validate_creds(cred);
2873
2874 ad.type = LSM_AUDIT_DATA_DENTRY;
2875 ad.u.dentry = dentry;
2876 sid = cred_sid(cred);
2877 isec = inode->i_security;
2878
2879 return avc_has_perm_flags(sid, isec->sid, isec->sclass, FILE__READ, &ad,
2880 rcu ? MAY_NOT_BLOCK : 0);
2869} 2881}
2870 2882
2871static noinline int audit_inode_permission(struct inode *inode, 2883static noinline int audit_inode_permission(struct inode *inode,
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index ddf8eec03f21..5973c327c54e 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -130,7 +130,8 @@ static inline int avc_audit(u32 ssid, u32 tsid,
130 u16 tclass, u32 requested, 130 u16 tclass, u32 requested,
131 struct av_decision *avd, 131 struct av_decision *avd,
132 int result, 132 int result,
133 struct common_audit_data *a) 133 struct common_audit_data *a,
134 int flags)
134{ 135{
135 u32 audited, denied; 136 u32 audited, denied;
136 audited = avc_audit_required(requested, avd, result, 0, &denied); 137 audited = avc_audit_required(requested, avd, result, 0, &denied);
@@ -138,7 +139,7 @@ static inline int avc_audit(u32 ssid, u32 tsid,
138 return 0; 139 return 0;
139 return slow_avc_audit(ssid, tsid, tclass, 140 return slow_avc_audit(ssid, tsid, tclass,
140 requested, audited, denied, result, 141 requested, audited, denied, result,
141 a, 0); 142 a, flags);
142} 143}
143 144
144#define AVC_STRICT 1 /* Ignore permissive mode. */ 145#define AVC_STRICT 1 /* Ignore permissive mode. */
@@ -150,6 +151,10 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
150int avc_has_perm(u32 ssid, u32 tsid, 151int avc_has_perm(u32 ssid, u32 tsid,
151 u16 tclass, u32 requested, 152 u16 tclass, u32 requested,
152 struct common_audit_data *auditdata); 153 struct common_audit_data *auditdata);
154int avc_has_perm_flags(u32 ssid, u32 tsid,
155 u16 tclass, u32 requested,
156 struct common_audit_data *auditdata,
157 int flags);
153 158
154u32 avc_policy_seqno(void); 159u32 avc_policy_seqno(void);
155 160
diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c
index 7371e0c3926f..1eabcdf69457 100644
--- a/sound/hda/hdac_regmap.c
+++ b/sound/hda/hdac_regmap.c
@@ -246,6 +246,9 @@ static int hda_reg_read(void *context, unsigned int reg, unsigned int *val)
246 return hda_reg_read_stereo_amp(codec, reg, val); 246 return hda_reg_read_stereo_amp(codec, reg, val);
247 if (verb == AC_VERB_GET_PROC_COEF) 247 if (verb == AC_VERB_GET_PROC_COEF)
248 return hda_reg_read_coef(codec, reg, val); 248 return hda_reg_read_coef(codec, reg, val);
249 if ((verb & 0x700) == AC_VERB_SET_AMP_GAIN_MUTE)
250 reg &= ~AC_AMP_FAKE_MUTE;
251
249 err = snd_hdac_exec_verb(codec, reg, 0, val); 252 err = snd_hdac_exec_verb(codec, reg, 0, val);
250 if (err < 0) 253 if (err < 0)
251 return err; 254 return err;
@@ -265,6 +268,9 @@ static int hda_reg_write(void *context, unsigned int reg, unsigned int val)
265 unsigned int verb; 268 unsigned int verb;
266 int i, bytes, err; 269 int i, bytes, err;
267 270
271 if (codec->caps_overwriting)
272 return 0;
273
268 reg &= ~0x00080000U; /* drop GET bit */ 274 reg &= ~0x00080000U; /* drop GET bit */
269 reg |= (codec->addr << 28); 275 reg |= (codec->addr << 28);
270 verb = get_verb(reg); 276 verb = get_verb(reg);
@@ -280,6 +286,8 @@ static int hda_reg_write(void *context, unsigned int reg, unsigned int val)
280 286
281 switch (verb & 0xf00) { 287 switch (verb & 0xf00) {
282 case AC_VERB_SET_AMP_GAIN_MUTE: 288 case AC_VERB_SET_AMP_GAIN_MUTE:
289 if ((reg & AC_AMP_FAKE_MUTE) && (val & AC_AMP_MUTE))
290 val = 0;
283 verb = AC_VERB_SET_AMP_GAIN_MUTE; 291 verb = AC_VERB_SET_AMP_GAIN_MUTE;
284 if (reg & AC_AMP_GET_LEFT) 292 if (reg & AC_AMP_GET_LEFT)
285 verb |= AC_AMP_SET_LEFT >> 8; 293 verb |= AC_AMP_SET_LEFT >> 8;
diff --git a/sound/mips/Kconfig b/sound/mips/Kconfig
index d2f615ab177a..2153d31fb663 100644
--- a/sound/mips/Kconfig
+++ b/sound/mips/Kconfig
@@ -12,12 +12,14 @@ if SND_MIPS
12config SND_SGI_O2 12config SND_SGI_O2
13 tristate "SGI O2 Audio" 13 tristate "SGI O2 Audio"
14 depends on SGI_IP32 14 depends on SGI_IP32
15 select SND_PCM
15 help 16 help
16 Sound support for the SGI O2 Workstation. 17 Sound support for the SGI O2 Workstation.
17 18
18config SND_SGI_HAL2 19config SND_SGI_HAL2
19 tristate "SGI HAL2 Audio" 20 tristate "SGI HAL2 Audio"
20 depends on SGI_HAS_HAL2 21 depends on SGI_HAS_HAL2
22 select SND_PCM
21 help 23 help
22 Sound support for the SGI Indy and Indigo2 Workstation. 24 Sound support for the SGI Indy and Indigo2 Workstation.
23 25
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index b49feff0a319..5645481af3d9 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -436,7 +436,7 @@ static unsigned int get_num_devices(struct hda_codec *codec, hda_nid_t nid)
436 get_wcaps_type(wcaps) != AC_WID_PIN) 436 get_wcaps_type(wcaps) != AC_WID_PIN)
437 return 0; 437 return 0;
438 438
439 parm = snd_hda_param_read(codec, nid, AC_PAR_DEVLIST_LEN); 439 parm = snd_hdac_read_parm_uncached(&codec->core, nid, AC_PAR_DEVLIST_LEN);
440 if (parm == -1 && codec->bus->rirb_error) 440 if (parm == -1 && codec->bus->rirb_error)
441 parm = 0; 441 parm = 0;
442 return parm & AC_DEV_LIST_LEN_MASK; 442 return parm & AC_DEV_LIST_LEN_MASK;
@@ -1376,6 +1376,31 @@ int snd_hda_override_amp_caps(struct hda_codec *codec, hda_nid_t nid, int dir,
1376EXPORT_SYMBOL_GPL(snd_hda_override_amp_caps); 1376EXPORT_SYMBOL_GPL(snd_hda_override_amp_caps);
1377 1377
1378/** 1378/**
1379 * snd_hda_codec_amp_update - update the AMP mono value
1380 * @codec: HD-audio codec
1381 * @nid: NID to read the AMP value
1382 * @ch: channel to update (0 or 1)
1383 * @dir: #HDA_INPUT or #HDA_OUTPUT
1384 * @idx: the index value (only for input direction)
1385 * @mask: bit mask to set
1386 * @val: the bits value to set
1387 *
1388 * Update the AMP values for the given channel, direction and index.
1389 */
1390int snd_hda_codec_amp_update(struct hda_codec *codec, hda_nid_t nid,
1391 int ch, int dir, int idx, int mask, int val)
1392{
1393 unsigned int cmd = snd_hdac_regmap_encode_amp(nid, ch, dir, idx);
1394
1395 /* enable fake mute if no h/w mute but min=mute */
1396 if ((query_amp_caps(codec, nid, dir) &
1397 (AC_AMPCAP_MUTE | AC_AMPCAP_MIN_MUTE)) == AC_AMPCAP_MIN_MUTE)
1398 cmd |= AC_AMP_FAKE_MUTE;
1399 return snd_hdac_regmap_update_raw(&codec->core, cmd, mask, val);
1400}
1401EXPORT_SYMBOL_GPL(snd_hda_codec_amp_update);
1402
1403/**
1379 * snd_hda_codec_amp_stereo - update the AMP stereo values 1404 * snd_hda_codec_amp_stereo - update the AMP stereo values
1380 * @codec: HD-audio codec 1405 * @codec: HD-audio codec
1381 * @nid: NID to read the AMP value 1406 * @nid: NID to read the AMP value
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index fea198c58196..b6db25b23dd3 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -340,6 +340,11 @@ enum {
340#define use_vga_switcheroo(chip) 0 340#define use_vga_switcheroo(chip) 0
341#endif 341#endif
342 342
343#define CONTROLLER_IN_GPU(pci) (((pci)->device == 0x0a0c) || \
344 ((pci)->device == 0x0c0c) || \
345 ((pci)->device == 0x0d0c) || \
346 ((pci)->device == 0x160c))
347
343static char *driver_short_names[] = { 348static char *driver_short_names[] = {
344 [AZX_DRIVER_ICH] = "HDA Intel", 349 [AZX_DRIVER_ICH] = "HDA Intel",
345 [AZX_DRIVER_PCH] = "HDA Intel PCH", 350 [AZX_DRIVER_PCH] = "HDA Intel PCH",
@@ -1854,8 +1859,17 @@ static int azx_probe_continue(struct azx *chip)
1854 if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) { 1859 if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
1855#ifdef CONFIG_SND_HDA_I915 1860#ifdef CONFIG_SND_HDA_I915
1856 err = hda_i915_init(hda); 1861 err = hda_i915_init(hda);
1857 if (err < 0) 1862 if (err < 0) {
1858 goto out_free; 1863 /* if the controller is bound only with HDMI/DP
1864 * (for HSW and BDW), we need to abort the probe;
1865 * for other chips, still continue probing as other
1866 * codecs can be on the same link.
1867 */
1868 if (CONTROLLER_IN_GPU(pci))
1869 goto out_free;
1870 else
1871 goto skip_i915;
1872 }
1859 err = hda_display_power(hda, true); 1873 err = hda_display_power(hda, true);
1860 if (err < 0) { 1874 if (err < 0) {
1861 dev_err(chip->card->dev, 1875 dev_err(chip->card->dev,
@@ -1865,6 +1879,9 @@ static int azx_probe_continue(struct azx *chip)
1865#endif 1879#endif
1866 } 1880 }
1867 1881
1882#ifdef CONFIG_SND_HDA_I915
1883 skip_i915:
1884#endif
1868 err = azx_first_init(chip); 1885 err = azx_first_init(chip);
1869 if (err < 0) 1886 if (err < 0)
1870 goto out_free; 1887 goto out_free;
diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h
index 3b567f42296b..bed66c314431 100644
--- a/sound/pci/hda/hda_local.h
+++ b/sound/pci/hda/hda_local.h
@@ -129,8 +129,8 @@ int snd_hda_mixer_amp_switch_put_beep(struct snd_kcontrol *kcontrol,
129/* lowlevel accessor with caching; use carefully */ 129/* lowlevel accessor with caching; use carefully */
130#define snd_hda_codec_amp_read(codec, nid, ch, dir, idx) \ 130#define snd_hda_codec_amp_read(codec, nid, ch, dir, idx) \
131 snd_hdac_regmap_get_amp(&(codec)->core, nid, ch, dir, idx) 131 snd_hdac_regmap_get_amp(&(codec)->core, nid, ch, dir, idx)
132#define snd_hda_codec_amp_update(codec, nid, ch, dir, idx, mask, val) \ 132int snd_hda_codec_amp_update(struct hda_codec *codec, hda_nid_t nid,
133 snd_hdac_regmap_update_amp(&(codec)->core, nid, ch, dir, idx, mask, val) 133 int ch, int dir, int idx, int mask, int val);
134int snd_hda_codec_amp_stereo(struct hda_codec *codec, hda_nid_t nid, 134int snd_hda_codec_amp_stereo(struct hda_codec *codec, hda_nid_t nid,
135 int dir, int idx, int mask, int val); 135 int dir, int idx, int mask, int val);
136int snd_hda_codec_amp_init(struct hda_codec *codec, hda_nid_t nid, int ch, 136int snd_hda_codec_amp_init(struct hda_codec *codec, hda_nid_t nid, int ch,
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 464168426465..6d010452c1f5 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2168,6 +2168,7 @@ static const struct hda_fixup alc882_fixups[] = {
2168static const struct snd_pci_quirk alc882_fixup_tbl[] = { 2168static const struct snd_pci_quirk alc882_fixup_tbl[] = {
2169 SND_PCI_QUIRK(0x1025, 0x006c, "Acer Aspire 9810", ALC883_FIXUP_ACER_EAPD), 2169 SND_PCI_QUIRK(0x1025, 0x006c, "Acer Aspire 9810", ALC883_FIXUP_ACER_EAPD),
2170 SND_PCI_QUIRK(0x1025, 0x0090, "Acer Aspire", ALC883_FIXUP_ACER_EAPD), 2170 SND_PCI_QUIRK(0x1025, 0x0090, "Acer Aspire", ALC883_FIXUP_ACER_EAPD),
2171 SND_PCI_QUIRK(0x1025, 0x0107, "Acer Aspire", ALC883_FIXUP_ACER_EAPD),
2171 SND_PCI_QUIRK(0x1025, 0x010a, "Acer Ferrari 5000", ALC883_FIXUP_ACER_EAPD), 2172 SND_PCI_QUIRK(0x1025, 0x010a, "Acer Ferrari 5000", ALC883_FIXUP_ACER_EAPD),
2172 SND_PCI_QUIRK(0x1025, 0x0110, "Acer Aspire", ALC883_FIXUP_ACER_EAPD), 2173 SND_PCI_QUIRK(0x1025, 0x0110, "Acer Aspire", ALC883_FIXUP_ACER_EAPD),
2173 SND_PCI_QUIRK(0x1025, 0x0112, "Acer Aspire 9303", ALC883_FIXUP_ACER_EAPD), 2174 SND_PCI_QUIRK(0x1025, 0x0112, "Acer Aspire 9303", ALC883_FIXUP_ACER_EAPD),
@@ -4514,6 +4515,8 @@ enum {
4514 ALC288_FIXUP_DELL_HEADSET_MODE, 4515 ALC288_FIXUP_DELL_HEADSET_MODE,
4515 ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, 4516 ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
4516 ALC288_FIXUP_DELL_XPS_13_GPIO6, 4517 ALC288_FIXUP_DELL_XPS_13_GPIO6,
4518 ALC292_FIXUP_DELL_E7X,
4519 ALC292_FIXUP_DISABLE_AAMIX,
4517}; 4520};
4518 4521
4519static const struct hda_fixup alc269_fixups[] = { 4522static const struct hda_fixup alc269_fixups[] = {
@@ -5036,6 +5039,16 @@ static const struct hda_fixup alc269_fixups[] = {
5036 .chained = true, 5039 .chained = true,
5037 .chain_id = ALC288_FIXUP_DELL1_MIC_NO_PRESENCE 5040 .chain_id = ALC288_FIXUP_DELL1_MIC_NO_PRESENCE
5038 }, 5041 },
5042 [ALC292_FIXUP_DISABLE_AAMIX] = {
5043 .type = HDA_FIXUP_FUNC,
5044 .v.func = alc_fixup_disable_aamix,
5045 },
5046 [ALC292_FIXUP_DELL_E7X] = {
5047 .type = HDA_FIXUP_FUNC,
5048 .v.func = alc_fixup_dell_xps13,
5049 .chained = true,
5050 .chain_id = ALC292_FIXUP_DISABLE_AAMIX
5051 },
5039}; 5052};
5040 5053
5041static const struct snd_pci_quirk alc269_fixup_tbl[] = { 5054static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5048,6 +5061,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
5048 SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), 5061 SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
5049 SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), 5062 SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
5050 SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), 5063 SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
5064 SND_PCI_QUIRK(0x1028, 0x05ca, "Dell Latitude E7240", ALC292_FIXUP_DELL_E7X),
5065 SND_PCI_QUIRK(0x1028, 0x05cb, "Dell Latitude E7440", ALC292_FIXUP_DELL_E7X),
5051 SND_PCI_QUIRK(0x1028, 0x05da, "Dell Vostro 5460", ALC290_FIXUP_SUBWOOFER), 5066 SND_PCI_QUIRK(0x1028, 0x05da, "Dell Vostro 5460", ALC290_FIXUP_SUBWOOFER),
5052 SND_PCI_QUIRK(0x1028, 0x05f4, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), 5067 SND_PCI_QUIRK(0x1028, 0x05f4, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
5053 SND_PCI_QUIRK(0x1028, 0x05f5, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), 5068 SND_PCI_QUIRK(0x1028, 0x05f5, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
@@ -5057,6 +5072,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
5057 SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK), 5072 SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK),
5058 SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), 5073 SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
5059 SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), 5074 SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
5075 SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC292_FIXUP_DELL_E7X),
5060 SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), 5076 SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
5061 SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), 5077 SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
5062 SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), 5078 SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
@@ -5377,6 +5393,13 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
5377 {0x1d, 0x40700001}, 5393 {0x1d, 0x40700001},
5378 {0x21, 0x02211040}), 5394 {0x21, 0x02211040}),
5379 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, 5395 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
5396 ALC255_STANDARD_PINS,
5397 {0x12, 0x90a60160},
5398 {0x14, 0x90170120},
5399 {0x17, 0x40000000},
5400 {0x1d, 0x40700001},
5401 {0x21, 0x02211030}),
5402 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
5380 ALC256_STANDARD_PINS, 5403 ALC256_STANDARD_PINS,
5381 {0x13, 0x40000000}), 5404 {0x13, 0x40000000}),
5382 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, 5405 SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
@@ -5629,8 +5652,7 @@ static int patch_alc269(struct hda_codec *codec)
5629 5652
5630 spec = codec->spec; 5653 spec = codec->spec;
5631 spec->gen.shared_mic_vref_pin = 0x18; 5654 spec->gen.shared_mic_vref_pin = 0x18;
5632 if (codec->core.vendor_id != 0x10ec0292) 5655 codec->power_save_node = 1;
5633 codec->power_save_node = 1;
5634 5656
5635 snd_hda_pick_fixup(codec, alc269_fixup_models, 5657 snd_hda_pick_fixup(codec, alc269_fixup_models,
5636 alc269_fixup_tbl, alc269_fixups); 5658 alc269_fixup_tbl, alc269_fixups);
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 6833c74ed6ff..6c66d7e16439 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -100,6 +100,7 @@ enum {
100 STAC_HP_ENVY_BASS, 100 STAC_HP_ENVY_BASS,
101 STAC_HP_BNB13_EQ, 101 STAC_HP_BNB13_EQ,
102 STAC_HP_ENVY_TS_BASS, 102 STAC_HP_ENVY_TS_BASS,
103 STAC_HP_ENVY_TS_DAC_BIND,
103 STAC_92HD83XXX_GPIO10_EAPD, 104 STAC_92HD83XXX_GPIO10_EAPD,
104 STAC_92HD83XXX_MODELS 105 STAC_92HD83XXX_MODELS
105}; 106};
@@ -2171,6 +2172,22 @@ static void stac92hd83xxx_fixup_gpio10_eapd(struct hda_codec *codec,
2171 spec->eapd_switch = 0; 2172 spec->eapd_switch = 0;
2172} 2173}
2173 2174
2175static void hp_envy_ts_fixup_dac_bind(struct hda_codec *codec,
2176 const struct hda_fixup *fix,
2177 int action)
2178{
2179 struct sigmatel_spec *spec = codec->spec;
2180 static hda_nid_t preferred_pairs[] = {
2181 0xd, 0x13,
2182 0
2183 };
2184
2185 if (action != HDA_FIXUP_ACT_PRE_PROBE)
2186 return;
2187
2188 spec->gen.preferred_dacs = preferred_pairs;
2189}
2190
2174static const struct hda_verb hp_bnb13_eq_verbs[] = { 2191static const struct hda_verb hp_bnb13_eq_verbs[] = {
2175 /* 44.1KHz base */ 2192 /* 44.1KHz base */
2176 { 0x22, 0x7A6, 0x3E }, 2193 { 0x22, 0x7A6, 0x3E },
@@ -2686,6 +2703,12 @@ static const struct hda_fixup stac92hd83xxx_fixups[] = {
2686 {} 2703 {}
2687 }, 2704 },
2688 }, 2705 },
2706 [STAC_HP_ENVY_TS_DAC_BIND] = {
2707 .type = HDA_FIXUP_FUNC,
2708 .v.func = hp_envy_ts_fixup_dac_bind,
2709 .chained = true,
2710 .chain_id = STAC_HP_ENVY_TS_BASS,
2711 },
2689 [STAC_92HD83XXX_GPIO10_EAPD] = { 2712 [STAC_92HD83XXX_GPIO10_EAPD] = {
2690 .type = HDA_FIXUP_FUNC, 2713 .type = HDA_FIXUP_FUNC,
2691 .v.func = stac92hd83xxx_fixup_gpio10_eapd, 2714 .v.func = stac92hd83xxx_fixup_gpio10_eapd,
@@ -2764,6 +2787,8 @@ static const struct snd_pci_quirk stac92hd83xxx_fixup_tbl[] = {
2764 "HP bNB13", STAC_HP_BNB13_EQ), 2787 "HP bNB13", STAC_HP_BNB13_EQ),
2765 SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x190e, 2788 SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x190e,
2766 "HP ENVY TS", STAC_HP_ENVY_TS_BASS), 2789 "HP ENVY TS", STAC_HP_ENVY_TS_BASS),
2790 SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1967,
2791 "HP ENVY TS", STAC_HP_ENVY_TS_DAC_BIND),
2767 SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1940, 2792 SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1940,
2768 "HP bNB13", STAC_HP_BNB13_EQ), 2793 "HP bNB13", STAC_HP_BNB13_EQ),
2769 SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1941, 2794 SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1941,
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 31a95cca015d..bab6c04932aa 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -449,6 +449,15 @@ static int via_suspend(struct hda_codec *codec)
449 449
450 return 0; 450 return 0;
451} 451}
452
453static int via_resume(struct hda_codec *codec)
454{
455 /* some delay here to make jack detection working (bko#98921) */
456 msleep(10);
457 codec->patch_ops.init(codec);
458 regcache_sync(codec->core.regmap);
459 return 0;
460}
452#endif 461#endif
453 462
454#ifdef CONFIG_PM 463#ifdef CONFIG_PM
@@ -475,6 +484,7 @@ static const struct hda_codec_ops via_patch_ops = {
475 .stream_pm = snd_hda_gen_stream_pm, 484 .stream_pm = snd_hda_gen_stream_pm,
476#ifdef CONFIG_PM 485#ifdef CONFIG_PM
477 .suspend = via_suspend, 486 .suspend = via_suspend,
487 .resume = via_resume,
478 .check_power_status = via_check_power_status, 488 .check_power_status = via_check_power_status,
479#endif 489#endif
480}; 490};
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 3e2ef61c627b..8b7e391dd0b8 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -918,6 +918,7 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval,
918 case USB_ID(0x046d, 0x081d): /* HD Webcam c510 */ 918 case USB_ID(0x046d, 0x081d): /* HD Webcam c510 */
919 case USB_ID(0x046d, 0x0825): /* HD Webcam c270 */ 919 case USB_ID(0x046d, 0x0825): /* HD Webcam c270 */
920 case USB_ID(0x046d, 0x0826): /* HD Webcam c525 */ 920 case USB_ID(0x046d, 0x0826): /* HD Webcam c525 */
921 case USB_ID(0x046d, 0x08ca): /* Logitech Quickcam Fusion */
921 case USB_ID(0x046d, 0x0991): 922 case USB_ID(0x046d, 0x0991):
922 /* Most audio usb devices lie about volume resolution. 923 /* Most audio usb devices lie about volume resolution.
923 * Most Logitech webcams have res = 384. 924 * Most Logitech webcams have res = 384.
@@ -1582,12 +1583,6 @@ static int parse_audio_mixer_unit(struct mixer_build *state, int unitid,
1582 unitid); 1583 unitid);
1583 return -EINVAL; 1584 return -EINVAL;
1584 } 1585 }
1585 /* no bmControls field (e.g. Maya44) -> ignore */
1586 if (desc->bLength <= 10 + input_pins) {
1587 usb_audio_dbg(state->chip, "MU %d has no bmControls field\n",
1588 unitid);
1589 return 0;
1590 }
1591 1586
1592 num_ins = 0; 1587 num_ins = 0;
1593 ich = 0; 1588 ich = 0;
@@ -1595,6 +1590,9 @@ static int parse_audio_mixer_unit(struct mixer_build *state, int unitid,
1595 err = parse_audio_unit(state, desc->baSourceID[pin]); 1590 err = parse_audio_unit(state, desc->baSourceID[pin]);
1596 if (err < 0) 1591 if (err < 0)
1597 continue; 1592 continue;
1593 /* no bmControls field (e.g. Maya44) -> ignore */
1594 if (desc->bLength <= 10 + input_pins)
1595 continue;
1598 err = check_input_term(state, desc->baSourceID[pin], &iterm); 1596 err = check_input_term(state, desc->baSourceID[pin], &iterm);
1599 if (err < 0) 1597 if (err < 0)
1600 return err; 1598 return err;
diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index b703cb3cda19..e5000da9e9d7 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c
@@ -437,6 +437,11 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = {
437 .map = ebox44_map, 437 .map = ebox44_map,
438 }, 438 },
439 { 439 {
440 /* MAYA44 USB+ */
441 .id = USB_ID(0x2573, 0x0008),
442 .map = maya44_map,
443 },
444 {
440 /* KEF X300A */ 445 /* KEF X300A */
441 .id = USB_ID(0x27ac, 0x1000), 446 .id = USB_ID(0x27ac, 0x1000),
442 .map = scms_usb3318_map, 447 .map = scms_usb3318_map,
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 29175346cc4f..754e689596a2 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1120,6 +1120,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
1120 case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ 1120 case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */
1121 case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ 1121 case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */
1122 case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ 1122 case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
1123 case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
1123 return true; 1124 return true;
1124 } 1125 }
1125 return false; 1126 return false;
@@ -1266,8 +1267,9 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
1266 if (fp->altsetting == 2) 1267 if (fp->altsetting == 2)
1267 return SNDRV_PCM_FMTBIT_DSD_U32_BE; 1268 return SNDRV_PCM_FMTBIT_DSD_U32_BE;
1268 break; 1269 break;
1269 /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */ 1270
1270 case USB_ID(0x20b1, 0x2009): 1271 case USB_ID(0x20b1, 0x2009): /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */
1272 case USB_ID(0x20b1, 0x2023): /* JLsounds I2SoverUSB */
1271 if (fp->altsetting == 3) 1273 if (fp->altsetting == 3)
1272 return SNDRV_PCM_FMTBIT_DSD_U32_BE; 1274 return SNDRV_PCM_FMTBIT_DSD_U32_BE;
1273 break; 1275 break;
diff --git a/tools/Makefile b/tools/Makefile
index 9a617adc6675..b35102721cbb 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -1,3 +1,8 @@
1# Some of the tools (perf) use same make variables
2# as in kernel build.
3export srctree=
4export objtree=
5
1include scripts/Makefile.include 6include scripts/Makefile.include
2 7
3help: 8help:
@@ -47,11 +52,16 @@ cgroup firewire hv guest usb virtio vm net: FORCE
47liblockdep: FORCE 52liblockdep: FORCE
48 $(call descend,lib/lockdep) 53 $(call descend,lib/lockdep)
49 54
50libapikfs: FORCE 55libapi: FORCE
51 $(call descend,lib/api) 56 $(call descend,lib/api)
52 57
53perf: libapikfs FORCE 58# The perf build does not follow the descend function setup,
54 $(call descend,$@) 59# invoking it via it's own make rule.
60PERF_O = $(if $(O),$(O)/tools/perf,)
61
62perf: FORCE
63 $(Q)mkdir -p $(PERF_O) .
64 $(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir=
55 65
56selftests: FORCE 66selftests: FORCE
57 $(call descend,testing/$@) 67 $(call descend,testing/$@)
@@ -97,10 +107,10 @@ cgroup_clean hv_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clea
97liblockdep_clean: 107liblockdep_clean:
98 $(call descend,lib/lockdep,clean) 108 $(call descend,lib/lockdep,clean)
99 109
100libapikfs_clean: 110libapi_clean:
101 $(call descend,lib/api,clean) 111 $(call descend,lib/api,clean)
102 112
103perf_clean: libapikfs_clean 113perf_clean:
104 $(call descend,$(@:_clean=),clean) 114 $(call descend,$(@:_clean=),clean)
105 115
106selftests_clean: 116selftests_clean:
diff --git a/tools/arch/alpha/include/asm/barrier.h b/tools/arch/alpha/include/asm/barrier.h
new file mode 100644
index 000000000000..95df19c95482
--- /dev/null
+++ b/tools/arch/alpha/include/asm/barrier.h
@@ -0,0 +1,8 @@
1#ifndef __TOOLS_LINUX_ASM_ALPHA_BARRIER_H
2#define __TOOLS_LINUX_ASM_ALPHA_BARRIER_H
3
4#define mb() __asm__ __volatile__("mb": : :"memory")
5#define rmb() __asm__ __volatile__("mb": : :"memory")
6#define wmb() __asm__ __volatile__("wmb": : :"memory")
7
8#endif /* __TOOLS_LINUX_ASM_ALPHA_BARRIER_H */
diff --git a/tools/arch/arm/include/asm/barrier.h b/tools/arch/arm/include/asm/barrier.h
new file mode 100644
index 000000000000..005c618a0ab0
--- /dev/null
+++ b/tools/arch/arm/include/asm/barrier.h
@@ -0,0 +1,12 @@
1#ifndef _TOOLS_LINUX_ASM_ARM_BARRIER_H
2#define _TOOLS_LINUX_ASM_ARM_BARRIER_H
3
4/*
5 * Use the __kuser_memory_barrier helper in the CPU helper page. See
6 * arch/arm/kernel/entry-armv.S in the kernel source for details.
7 */
8#define mb() ((void(*)(void))0xffff0fa0)()
9#define wmb() ((void(*)(void))0xffff0fa0)()
10#define rmb() ((void(*)(void))0xffff0fa0)()
11
12#endif /* _TOOLS_LINUX_ASM_ARM_BARRIER_H */
diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h
new file mode 100644
index 000000000000..a0483c8e0142
--- /dev/null
+++ b/tools/arch/arm64/include/asm/barrier.h
@@ -0,0 +1,16 @@
1#ifndef _TOOLS_LINUX_ASM_AARCH64_BARRIER_H
2#define _TOOLS_LINUX_ASM_AARCH64_BARRIER_H
3
4/*
5 * From tools/perf/perf-sys.h, last modified in:
6 * f428ebd184c82a7914b2aa7e9f868918aaf7ea78 perf tools: Fix AAAAARGH64 memory barriers
7 *
8 * XXX: arch/arm64/include/asm/barrier.h in the kernel sources use dsb, is this
9 * a case like for arm32 where we do things differently in userspace?
10 */
11
12#define mb() asm volatile("dmb ish" ::: "memory")
13#define wmb() asm volatile("dmb ishst" ::: "memory")
14#define rmb() asm volatile("dmb ishld" ::: "memory")
15
16#endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */
diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h
new file mode 100644
index 000000000000..e4422b4b634e
--- /dev/null
+++ b/tools/arch/ia64/include/asm/barrier.h
@@ -0,0 +1,48 @@
1/*
2 * Copied from the kernel sources to tools/:
3 *
4 * Memory barrier definitions. This is based on information published
5 * in the Processor Abstraction Layer and the System Abstraction Layer
6 * manual.
7 *
8 * Copyright (C) 1998-2003 Hewlett-Packard Co
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
11 * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
12 */
13#ifndef _TOOLS_LINUX_ASM_IA64_BARRIER_H
14#define _TOOLS_LINUX_ASM_IA64_BARRIER_H
15
16#include <linux/compiler.h>
17
18/*
19 * Macros to force memory ordering. In these descriptions, "previous"
20 * and "subsequent" refer to program order; "visible" means that all
21 * architecturally visible effects of a memory access have occurred
22 * (at a minimum, this means the memory has been read or written).
23 *
24 * wmb(): Guarantees that all preceding stores to memory-
25 * like regions are visible before any subsequent
26 * stores and that all following stores will be
27 * visible only after all previous stores.
28 * rmb(): Like wmb(), but for reads.
29 * mb(): wmb()/rmb() combo, i.e., all previous memory
30 * accesses are visible before all subsequent
31 * accesses and vice versa. This is also known as
32 * a "fence."
33 *
34 * Note: "mb()" and its variants cannot be used as a fence to order
35 * accesses to memory mapped I/O registers. For that, mf.a needs to
36 * be used. However, we don't want to always use mf.a because (a)
37 * it's (presumably) much slower than mf and (b) mf.a is supported for
38 * sequential memory pages only.
39 */
40
41/* XXX From arch/ia64/include/uapi/asm/gcc_intrin.h */
42#define ia64_mf() asm volatile ("mf" ::: "memory")
43
44#define mb() ia64_mf()
45#define rmb() mb()
46#define wmb() mb()
47
48#endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */
diff --git a/tools/arch/mips/include/asm/barrier.h b/tools/arch/mips/include/asm/barrier.h
new file mode 100644
index 000000000000..80f96f7556e3
--- /dev/null
+++ b/tools/arch/mips/include/asm/barrier.h
@@ -0,0 +1,20 @@
1#ifndef _TOOLS_LINUX_ASM_MIPS_BARRIER_H
2#define _TOOLS_LINUX_ASM_MIPS_BARRIER_H
3/*
4 * FIXME: This came from tools/perf/perf-sys.h, where it was first introduced
5 * in c1e028ef40b8d6943b767028ba17d4f2ba020edb, more work needed to make it
6 * more closely follow the Linux kernel arch/mips/include/asm/barrier.h file.
7 * Probably when we continue work on tools/ Kconfig support to have all the
8 * CONFIG_ needed for properly doing that.
9 */
10#define mb() asm volatile( \
11 ".set mips2\n\t" \
12 "sync\n\t" \
13 ".set mips0" \
14 : /* no output */ \
15 : /* no input */ \
16 : "memory")
17#define wmb() mb()
18#define rmb() mb()
19
20#endif /* _TOOLS_LINUX_ASM_MIPS_BARRIER_H */
diff --git a/tools/arch/powerpc/include/asm/barrier.h b/tools/arch/powerpc/include/asm/barrier.h
new file mode 100644
index 000000000000..b23aee8e6d90
--- /dev/null
+++ b/tools/arch/powerpc/include/asm/barrier.h
@@ -0,0 +1,29 @@
1/*
2 * Copied from the kernel sources:
3 *
4 * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
5 */
6#ifndef _TOOLS_LINUX_ASM_POWERPC_BARRIER_H
7#define _TOOLS_LINUX_ASM_POWERPC_BARRIER_H
8
9/*
10 * Memory barrier.
11 * The sync instruction guarantees that all memory accesses initiated
12 * by this processor have been performed (with respect to all other
13 * mechanisms that access memory). The eieio instruction is a barrier
14 * providing an ordering (separately) for (a) cacheable stores and (b)
15 * loads and stores to non-cacheable memory (e.g. I/O devices).
16 *
17 * mb() prevents loads and stores being reordered across this point.
18 * rmb() prevents loads being reordered across this point.
19 * wmb() prevents stores being reordered across this point.
20 *
21 * *mb() variants without smp_ prefix must order all types of memory
22 * operations with one another. sync is the only instruction sufficient
23 * to do this.
24 */
25#define mb() __asm__ __volatile__ ("sync" : : : "memory")
26#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
27#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
28
29#endif /* _TOOLS_LINUX_ASM_POWERPC_BARRIER_H */
diff --git a/tools/arch/s390/include/asm/barrier.h b/tools/arch/s390/include/asm/barrier.h
new file mode 100644
index 000000000000..f85141266b92
--- /dev/null
+++ b/tools/arch/s390/include/asm/barrier.h
@@ -0,0 +1,30 @@
1/*
2 * Copied from the kernel sources:
3 *
4 * Copyright IBM Corp. 1999, 2009
5 *
6 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
7 */
8
9#ifndef __TOOLS_LINUX_ASM_BARRIER_H
10#define __TOOLS_LINUX_ASM_BARRIER_H
11
12/*
13 * Force strict CPU ordering.
14 * And yes, this is required on UP too when we're talking
15 * to devices.
16 */
17
18#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
19/* Fast-BCR without checkpoint synchronization */
20#define __ASM_BARRIER "bcr 14,0\n"
21#else
22#define __ASM_BARRIER "bcr 15,0\n"
23#endif
24
25#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
26
27#define rmb() mb()
28#define wmb() mb()
29
30#endif /* __TOOLS_LIB_ASM_BARRIER_H */
diff --git a/tools/arch/sh/include/asm/barrier.h b/tools/arch/sh/include/asm/barrier.h
new file mode 100644
index 000000000000..c18fd7599b97
--- /dev/null
+++ b/tools/arch/sh/include/asm/barrier.h
@@ -0,0 +1,32 @@
1/*
2 * Copied from the kernel sources:
3 *
4 * Copyright (C) 1999, 2000 Niibe Yutaka & Kaz Kojima
5 * Copyright (C) 2002 Paul Mundt
6 */
7#ifndef __TOOLS_LINUX_ASM_SH_BARRIER_H
8#define __TOOLS_LINUX_ASM_SH_BARRIER_H
9
10/*
11 * A brief note on ctrl_barrier(), the control register write barrier.
12 *
13 * Legacy SH cores typically require a sequence of 8 nops after
14 * modification of a control register in order for the changes to take
15 * effect. On newer cores (like the sh4a and sh5) this is accomplished
16 * with icbi.
17 *
18 * Also note that on sh4a in the icbi case we can forego a synco for the
19 * write barrier, as it's not necessary for control registers.
20 *
21 * Historically we have only done this type of barrier for the MMUCR, but
22 * it's also necessary for the CCR, so we make it generic here instead.
23 */
24#if defined(__SH4A__) || defined(__SH5__)
25#define mb() __asm__ __volatile__ ("synco": : :"memory")
26#define rmb() mb()
27#define wmb() mb()
28#endif
29
30#include <asm-generic/barrier.h>
31
32#endif /* __TOOLS_LINUX_ASM_SH_BARRIER_H */
diff --git a/tools/arch/sparc/include/asm/barrier.h b/tools/arch/sparc/include/asm/barrier.h
new file mode 100644
index 000000000000..8c017b3b1391
--- /dev/null
+++ b/tools/arch/sparc/include/asm/barrier.h
@@ -0,0 +1,8 @@
1#ifndef ___TOOLS_LINUX_ASM_SPARC_BARRIER_H
2#define ___TOOLS_LINUX_ASM_SPARC_BARRIER_H
3#if defined(__sparc__) && defined(__arch64__)
4#include "barrier_64.h"
5#else
6#include "barrier_32.h"
7#endif
8#endif
diff --git a/tools/arch/sparc/include/asm/barrier_32.h b/tools/arch/sparc/include/asm/barrier_32.h
new file mode 100644
index 000000000000..c5eadd0a7233
--- /dev/null
+++ b/tools/arch/sparc/include/asm/barrier_32.h
@@ -0,0 +1,6 @@
1#ifndef __TOOLS_PERF_SPARC_BARRIER_H
2#define __TOOLS_PERF_SPARC_BARRIER_H
3
4#include <asm-generic/barrier.h>
5
6#endif /* !(__TOOLS_PERF_SPARC_BARRIER_H) */
diff --git a/tools/arch/sparc/include/asm/barrier_64.h b/tools/arch/sparc/include/asm/barrier_64.h
new file mode 100644
index 000000000000..9a7d7322c3f7
--- /dev/null
+++ b/tools/arch/sparc/include/asm/barrier_64.h
@@ -0,0 +1,42 @@
1#ifndef __TOOLS_LINUX_SPARC64_BARRIER_H
2#define __TOOLS_LINUX_SPARC64_BARRIER_H
3
4/* Copied from the kernel sources to tools/:
5 *
6 * These are here in an effort to more fully work around Spitfire Errata
7 * #51. Essentially, if a memory barrier occurs soon after a mispredicted
8 * branch, the chip can stop executing instructions until a trap occurs.
9 * Therefore, if interrupts are disabled, the chip can hang forever.
10 *
11 * It used to be believed that the memory barrier had to be right in the
12 * delay slot, but a case has been traced recently wherein the memory barrier
13 * was one instruction after the branch delay slot and the chip still hung.
14 * The offending sequence was the following in sym_wakeup_done() of the
15 * sym53c8xx_2 driver:
16 *
17 * call sym_ccb_from_dsa, 0
18 * movge %icc, 0, %l0
19 * brz,pn %o0, .LL1303
20 * mov %o0, %l2
21 * membar #LoadLoad
22 *
23 * The branch has to be mispredicted for the bug to occur. Therefore, we put
24 * the memory barrier explicitly into a "branch always, predicted taken"
25 * delay slot to avoid the problem case.
26 */
27#define membar_safe(type) \
28do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
29 " membar " type "\n" \
30 "1:\n" \
31 : : : "memory"); \
32} while (0)
33
34/* The kernel always executes in TSO memory model these days,
35 * and furthermore most sparc64 chips implement more stringent
36 * memory ordering than required by the specifications.
37 */
38#define mb() membar_safe("#StoreLoad")
39#define rmb() __asm__ __volatile__("":::"memory")
40#define wmb() __asm__ __volatile__("":::"memory")
41
42#endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */
diff --git a/tools/arch/tile/include/asm/barrier.h b/tools/arch/tile/include/asm/barrier.h
new file mode 100644
index 000000000000..7d3692c3d4ac
--- /dev/null
+++ b/tools/arch/tile/include/asm/barrier.h
@@ -0,0 +1,15 @@
1#ifndef _TOOLS_LINUX_ASM_TILE_BARRIER_H
2#define _TOOLS_LINUX_ASM_TILE_BARRIER_H
3/*
4 * FIXME: This came from tools/perf/perf-sys.h, where it was first introduced
5 * in 620830b6954913647b7c7f68920cf48eddf6ad92, more work needed to make it
6 * more closely follow the Linux kernel arch/tile/include/asm/barrier.h file.
7 * Probably when we continue work on tools/ Kconfig support to have all the
8 * CONFIG_ needed for properly doing that.
9 */
10
11#define mb() asm volatile ("mf" ::: "memory")
12#define wmb() mb()
13#define rmb() mb()
14
15#endif /* _TOOLS_LINUX_ASM_TILE_BARRIER_H */
diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h
new file mode 100644
index 000000000000..059e33e94260
--- /dev/null
+++ b/tools/arch/x86/include/asm/atomic.h
@@ -0,0 +1,65 @@
1#ifndef _TOOLS_LINUX_ASM_X86_ATOMIC_H
2#define _TOOLS_LINUX_ASM_X86_ATOMIC_H
3
4#include <linux/compiler.h>
5#include <linux/types.h>
6#include "rmwcc.h"
7
8#define LOCK_PREFIX "\n\tlock; "
9
10/*
11 * Atomic operations that C can't guarantee us. Useful for
12 * resource counting etc..
13 */
14
15#define ATOMIC_INIT(i) { (i) }
16
17/**
18 * atomic_read - read atomic variable
19 * @v: pointer of type atomic_t
20 *
21 * Atomically reads the value of @v.
22 */
23static inline int atomic_read(const atomic_t *v)
24{
25 return ACCESS_ONCE((v)->counter);
26}
27
28/**
29 * atomic_set - set atomic variable
30 * @v: pointer of type atomic_t
31 * @i: required value
32 *
33 * Atomically sets the value of @v to @i.
34 */
35static inline void atomic_set(atomic_t *v, int i)
36{
37 v->counter = i;
38}
39
40/**
41 * atomic_inc - increment atomic variable
42 * @v: pointer of type atomic_t
43 *
44 * Atomically increments @v by 1.
45 */
46static inline void atomic_inc(atomic_t *v)
47{
48 asm volatile(LOCK_PREFIX "incl %0"
49 : "+m" (v->counter));
50}
51
52/**
53 * atomic_dec_and_test - decrement and test
54 * @v: pointer of type atomic_t
55 *
56 * Atomically decrements @v by 1 and
57 * returns true if the result is 0, or false for all other
58 * cases.
59 */
60static inline int atomic_dec_and_test(atomic_t *v)
61{
62 GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
63}
64
65#endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
diff --git a/tools/arch/x86/include/asm/barrier.h b/tools/arch/x86/include/asm/barrier.h
new file mode 100644
index 000000000000..f366d8e550e4
--- /dev/null
+++ b/tools/arch/x86/include/asm/barrier.h
@@ -0,0 +1,28 @@
1#ifndef _TOOLS_LINUX_ASM_X86_BARRIER_H
2#define _TOOLS_LINUX_ASM_X86_BARRIER_H
3
4/*
5 * Copied from the Linux kernel sources, and also moving code
6 * out from tools/perf/perf-sys.h so as to make it be located
7 * in a place similar as in the kernel sources.
8 *
9 * Force strict CPU ordering.
10 * And yes, this is required on UP too when we're talking
11 * to devices.
12 */
13
14#if defined(__i386__)
15/*
16 * Some non-Intel clones support out of order store. wmb() ceases to be a
17 * nop for these.
18 */
19#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
20#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
21#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
22#elif defined(__x86_64__)
23#define mb() asm volatile("mfence":::"memory")
24#define rmb() asm volatile("lfence":::"memory")
25#define wmb() asm volatile("sfence" ::: "memory")
26#endif
27
28#endif /* _TOOLS_LINUX_ASM_X86_BARRIER_H */
diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h
new file mode 100644
index 000000000000..a6669bc06939
--- /dev/null
+++ b/tools/arch/x86/include/asm/rmwcc.h
@@ -0,0 +1,41 @@
1#ifndef _TOOLS_LINUX_ASM_X86_RMWcc
2#define _TOOLS_LINUX_ASM_X86_RMWcc
3
4#ifdef CC_HAVE_ASM_GOTO
5
6#define __GEN_RMWcc(fullop, var, cc, ...) \
7do { \
8 asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \
9 : : "m" (var), ## __VA_ARGS__ \
10 : "memory" : cc_label); \
11 return 0; \
12cc_label: \
13 return 1; \
14} while (0)
15
16#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
17 __GEN_RMWcc(op " " arg0, var, cc)
18
19#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
20 __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
21
22#else /* !CC_HAVE_ASM_GOTO */
23
24#define __GEN_RMWcc(fullop, var, cc, ...) \
25do { \
26 char c; \
27 asm volatile (fullop "; set" cc " %1" \
28 : "+m" (var), "=qm" (c) \
29 : __VA_ARGS__ : "memory"); \
30 return c != 0; \
31} while (0)
32
33#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
34 __GEN_RMWcc(op " " arg0, var, cc)
35
36#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
37 __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
38
39#endif /* CC_HAVE_ASM_GOTO */
40
41#endif /* _TOOLS_LINUX_ASM_X86_RMWcc */
diff --git a/tools/arch/xtensa/include/asm/barrier.h b/tools/arch/xtensa/include/asm/barrier.h
new file mode 100644
index 000000000000..583800bd7259
--- /dev/null
+++ b/tools/arch/xtensa/include/asm/barrier.h
@@ -0,0 +1,18 @@
1/*
2 * Copied from the kernel sources to tools/:
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * Copyright (C) 2001 - 2012 Tensilica Inc.
9 */
10
11#ifndef _TOOLS_LINUX_XTENSA_SYSTEM_H
12#define _TOOLS_LINUX_XTENSA_SYSTEM_H
13
14#define mb() ({ __asm__ __volatile__("memw" : : : "memory"); })
15#define rmb() barrier()
16#define wmb() mb()
17
18#endif /* _TOOLS_LINUX_XTENSA_SYSTEM_H */
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index 10df57237a66..a51244a8022f 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -37,7 +37,7 @@ subdir-obj-y :=
37 37
38# Build definitions 38# Build definitions
39build-file := $(dir)/Build 39build-file := $(dir)/Build
40include $(build-file) 40-include $(build-file)
41 41
42quiet_cmd_flex = FLEX $@ 42quiet_cmd_flex = FLEX $@
43quiet_cmd_bison = BISON $@ 43quiet_cmd_bison = BISON $@
@@ -94,12 +94,12 @@ obj-y := $(patsubst %/, %/$(obj)-in.o, $(obj-y))
94subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y)) 94subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y))
95 95
96# '$(OUTPUT)/dir' prefix to all objects 96# '$(OUTPUT)/dir' prefix to all objects
97prefix := $(subst ./,,$(OUTPUT)$(dir)/) 97objprefix := $(subst ./,,$(OUTPUT)$(dir)/)
98obj-y := $(addprefix $(prefix),$(obj-y)) 98obj-y := $(addprefix $(objprefix),$(obj-y))
99subdir-obj-y := $(addprefix $(prefix),$(subdir-obj-y)) 99subdir-obj-y := $(addprefix $(objprefix),$(subdir-obj-y))
100 100
101# Final '$(obj)-in.o' object 101# Final '$(obj)-in.o' object
102in-target := $(prefix)$(obj)-in.o 102in-target := $(objprefix)$(obj)-in.o
103 103
104PHONY += $(subdir-y) 104PHONY += $(subdir-y)
105 105
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 3a0b0ca2a28c..2975632d51e2 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -27,7 +27,7 @@ endef
27# the rule that uses them - an example for that is the 'bionic' 27# the rule that uses them - an example for that is the 'bionic'
28# feature check. ] 28# feature check. ]
29# 29#
30FEATURE_TESTS = \ 30FEATURE_TESTS ?= \
31 backtrace \ 31 backtrace \
32 dwarf \ 32 dwarf \
33 fortify-source \ 33 fortify-source \
@@ -53,7 +53,7 @@ FEATURE_TESTS = \
53 zlib \ 53 zlib \
54 lzma 54 lzma
55 55
56FEATURE_DISPLAY = \ 56FEATURE_DISPLAY ?= \
57 dwarf \ 57 dwarf \
58 glibc \ 58 glibc \
59 gtk2 \ 59 gtk2 \
diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build
index 0e6c3e6767e6..70d876237c57 100644
--- a/tools/build/tests/ex/Build
+++ b/tools/build/tests/ex/Build
@@ -2,6 +2,7 @@ ex-y += ex.o
2ex-y += a.o 2ex-y += a.o
3ex-y += b.o 3ex-y += b.o
4ex-y += empty/ 4ex-y += empty/
5ex-y += empty2/
5 6
6libex-y += c.o 7libex-y += c.o
7libex-y += d.o 8libex-y += d.o
diff --git a/tools/build/tests/ex/empty2/README b/tools/build/tests/ex/empty2/README
new file mode 100644
index 000000000000..2107cc5bf5a9
--- /dev/null
+++ b/tools/build/tests/ex/empty2/README
@@ -0,0 +1,2 @@
1This directory is left intentionally without Build file
2to test proper nesting into Build-less directories.
diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h
new file mode 100644
index 000000000000..2ba78c9f5701
--- /dev/null
+++ b/tools/include/asm-generic/atomic-gcc.h
@@ -0,0 +1,63 @@
1#ifndef __TOOLS_ASM_GENERIC_ATOMIC_H
2#define __TOOLS_ASM_GENERIC_ATOMIC_H
3
4#include <linux/compiler.h>
5#include <linux/types.h>
6
7/*
8 * Atomic operations that C can't guarantee us. Useful for
9 * resource counting etc..
10 *
11 * Excerpts obtained from the Linux kernel sources.
12 */
13
14#define ATOMIC_INIT(i) { (i) }
15
16/**
17 * atomic_read - read atomic variable
18 * @v: pointer of type atomic_t
19 *
20 * Atomically reads the value of @v.
21 */
22static inline int atomic_read(const atomic_t *v)
23{
24 return ACCESS_ONCE((v)->counter);
25}
26
27/**
28 * atomic_set - set atomic variable
29 * @v: pointer of type atomic_t
30 * @i: required value
31 *
32 * Atomically sets the value of @v to @i.
33 */
34static inline void atomic_set(atomic_t *v, int i)
35{
36 v->counter = i;
37}
38
39/**
40 * atomic_inc - increment atomic variable
41 * @v: pointer of type atomic_t
42 *
43 * Atomically increments @v by 1.
44 */
45static inline void atomic_inc(atomic_t *v)
46{
47 __sync_add_and_fetch(&v->counter, 1);
48}
49
50/**
51 * atomic_dec_and_test - decrement and test
52 * @v: pointer of type atomic_t
53 *
54 * Atomically decrements @v by 1 and
55 * returns true if the result is 0, or false for all other
56 * cases.
57 */
58static inline int atomic_dec_and_test(atomic_t *v)
59{
60 return __sync_sub_and_fetch(&v->counter, 1) == 0;
61}
62
63#endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */
diff --git a/tools/include/asm-generic/barrier.h b/tools/include/asm-generic/barrier.h
new file mode 100644
index 000000000000..47b933903eaf
--- /dev/null
+++ b/tools/include/asm-generic/barrier.h
@@ -0,0 +1,44 @@
1/*
2 * Copied from the kernel sources to tools/perf/:
3 *
4 * Generic barrier definitions, originally based on MN10300 definitions.
5 *
6 * It should be possible to use these on really simple architectures,
7 * but it serves more as a starting point for new ports.
8 *
9 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
10 * Written by David Howells (dhowells@redhat.com)
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public Licence
14 * as published by the Free Software Foundation; either version
15 * 2 of the Licence, or (at your option) any later version.
16 */
17#ifndef __TOOLS_LINUX_ASM_GENERIC_BARRIER_H
18#define __TOOLS_LINUX_ASM_GENERIC_BARRIER_H
19
20#ifndef __ASSEMBLY__
21
22#include <linux/compiler.h>
23
24/*
25 * Force strict CPU ordering. And yes, this is required on UP too when we're
26 * talking to devices.
27 *
28 * Fall back to compiler barriers if nothing better is provided.
29 */
30
31#ifndef mb
32#define mb() barrier()
33#endif
34
35#ifndef rmb
36#define rmb() mb()
37#endif
38
39#ifndef wmb
40#define wmb() mb()
41#endif
42
43#endif /* !__ASSEMBLY__ */
44#endif /* __TOOLS_LINUX_ASM_GENERIC_BARRIER_H */
diff --git a/tools/include/asm/atomic.h b/tools/include/asm/atomic.h
new file mode 100644
index 000000000000..70794f538a86
--- /dev/null
+++ b/tools/include/asm/atomic.h
@@ -0,0 +1,10 @@
1#ifndef __TOOLS_LINUX_ASM_ATOMIC_H
2#define __TOOLS_LINUX_ASM_ATOMIC_H
3
4#if defined(__i386__) || defined(__x86_64__)
5#include "../../arch/x86/include/asm/atomic.h"
6#else
7#include <asm-generic/atomic-gcc.h>
8#endif
9
10#endif /* __TOOLS_LINUX_ASM_ATOMIC_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
new file mode 100644
index 000000000000..ac66ac594685
--- /dev/null
+++ b/tools/include/asm/barrier.h
@@ -0,0 +1,27 @@
1#if defined(__i386__) || defined(__x86_64__)
2#include "../../arch/x86/include/asm/barrier.h"
3#elif defined(__arm__)
4#include "../../arch/arm/include/asm/barrier.h"
5#elif defined(__aarch64__)
6#include "../../arch/arm64/include/asm/barrier.h"
7#elif defined(__powerpc__)
8#include "../../arch/powerpc/include/asm/barrier.h"
9#elif defined(__s390__)
10#include "../../arch/s390/include/asm/barrier.h"
11#elif defined(__sh__)
12#include "../../arch/sh/include/asm/barrier.h"
13#elif defined(__sparc__)
14#include "../../arch/sparc/include/asm/barrier.h"
15#elif defined(__tile__)
16#include "../../arch/tile/include/asm/barrier.h"
17#elif defined(__alpha__)
18#include "../../arch/alpha/include/asm/barrier.h"
19#elif defined(__mips__)
20#include "../../arch/mips/include/asm/barrier.h"
21#elif defined(__ia64__)
22#include "../../arch/ia64/include/asm/barrier.h"
23#elif defined(__xtensa__)
24#include "../../arch/xtensa/include/asm/barrier.h"
25#else
26#include <asm-generic/barrier.h>
27#endif
diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h
new file mode 100644
index 000000000000..4e3d3d18ebab
--- /dev/null
+++ b/tools/include/linux/atomic.h
@@ -0,0 +1,6 @@
1#ifndef __TOOLS_LINUX_ATOMIC_H
2#define __TOOLS_LINUX_ATOMIC_H
3
4#include <asm/atomic.h>
5
6#endif /* __TOOLS_LINUX_ATOMIC_H */
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 88461f09cc86..f0e72674c52d 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -1,6 +1,10 @@
1#ifndef _TOOLS_LINUX_COMPILER_H_ 1#ifndef _TOOLS_LINUX_COMPILER_H_
2#define _TOOLS_LINUX_COMPILER_H_ 2#define _TOOLS_LINUX_COMPILER_H_
3 3
4/* Optimization barrier */
5/* The "volatile" is due to gcc bugs */
6#define barrier() __asm__ __volatile__("": : :"memory")
7
4#ifndef __always_inline 8#ifndef __always_inline
5# define __always_inline inline __attribute__((always_inline)) 9# define __always_inline inline __attribute__((always_inline))
6#endif 10#endif
diff --git a/tools/perf/util/include/linux/kernel.h b/tools/include/linux/kernel.h
index 09e8e7aea7c6..76df53539c2a 100644
--- a/tools/perf/util/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -1,5 +1,5 @@
1#ifndef PERF_LINUX_KERNEL_H_ 1#ifndef __TOOLS_LINUX_KERNEL_H
2#define PERF_LINUX_KERNEL_H_ 2#define __TOOLS_LINUX_KERNEL_H
3 3
4#include <stdarg.h> 4#include <stdarg.h>
5#include <stdio.h> 5#include <stdio.h>
diff --git a/tools/perf/util/include/linux/list.h b/tools/include/linux/list.h
index 76ddbc726343..76b014c96893 100644
--- a/tools/perf/util/include/linux/list.h
+++ b/tools/include/linux/list.h
@@ -1,10 +1,10 @@
1#include <linux/kernel.h> 1#include <linux/kernel.h>
2#include <linux/types.h> 2#include <linux/types.h>
3 3
4#include "../../../../include/linux/list.h" 4#include "../../../include/linux/list.h"
5 5
6#ifndef PERF_LIST_H 6#ifndef TOOLS_LIST_H
7#define PERF_LIST_H 7#define TOOLS_LIST_H
8/** 8/**
9 * list_del_range - deletes range of entries from list. 9 * list_del_range - deletes range of entries from list.
10 * @begin: first element in the range to delete from the list. 10 * @begin: first element in the range to delete from the list.
diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
new file mode 100644
index 000000000000..0c27bdf14233
--- /dev/null
+++ b/tools/include/linux/poison.h
@@ -0,0 +1 @@
#include "../../../include/linux/poison.h"
diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h
index b5cf25e05df2..8ebf6278b2ef 100644
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -60,6 +60,14 @@ typedef __u32 __bitwise __be32;
60typedef __u64 __bitwise __le64; 60typedef __u64 __bitwise __le64;
61typedef __u64 __bitwise __be64; 61typedef __u64 __bitwise __be64;
62 62
63typedef struct {
64 int counter;
65} atomic_t;
66
67#ifndef __aligned_u64
68# define __aligned_u64 __u64 __attribute__((aligned(8)))
69#endif
70
63struct list_head { 71struct list_head {
64 struct list_head *next, *prev; 72 struct list_head *next, *prev;
65}; 73};
diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore
index 35f56be5a4cd..3c60335fe7be 100644
--- a/tools/lib/traceevent/.gitignore
+++ b/tools/lib/traceevent/.gitignore
@@ -1 +1,2 @@
1TRACEEVENT-CFLAGS 1TRACEEVENT-CFLAGS
2libtraceevent-dynamic-list
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index d410da335e3d..6daaff652aff 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -23,6 +23,7 @@ endef
23# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. 23# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
24$(call allow-override,CC,$(CROSS_COMPILE)gcc) 24$(call allow-override,CC,$(CROSS_COMPILE)gcc)
25$(call allow-override,AR,$(CROSS_COMPILE)ar) 25$(call allow-override,AR,$(CROSS_COMPILE)ar)
26$(call allow-override,NM,$(CROSS_COMPILE)nm)
26 27
27EXT = -std=gnu99 28EXT = -std=gnu99
28INSTALL = install 29INSTALL = install
@@ -34,9 +35,15 @@ INSTALL = install
34DESTDIR ?= 35DESTDIR ?=
35DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' 36DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
36 37
38LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
39ifeq ($(LP64), 1)
40 libdir_relative = lib64
41else
42 libdir_relative = lib
43endif
44
37prefix ?= /usr/local 45prefix ?= /usr/local
38bindir_relative = bin 46libdir = $(prefix)/$(libdir_relative)
39bindir = $(prefix)/$(bindir_relative)
40man_dir = $(prefix)/share/man 47man_dir = $(prefix)/share/man
41man_dir_SQ = '$(subst ','\'',$(man_dir))' 48man_dir_SQ = '$(subst ','\'',$(man_dir))'
42 49
@@ -58,7 +65,7 @@ ifeq ($(prefix),$(HOME))
58override plugin_dir = $(HOME)/.traceevent/plugins 65override plugin_dir = $(HOME)/.traceevent/plugins
59set_plugin_dir := 0 66set_plugin_dir := 0
60else 67else
61override plugin_dir = $(prefix)/lib/traceevent/plugins 68override plugin_dir = $(libdir)/traceevent/plugins
62endif 69endif
63endif 70endif
64 71
@@ -85,11 +92,11 @@ srctree := $(patsubst %/,%,$(dir $(srctree)))
85#$(info Determined 'srctree' to be $(srctree)) 92#$(info Determined 'srctree' to be $(srctree))
86endif 93endif
87 94
88export prefix bindir src obj 95export prefix libdir src obj
89 96
90# Shell quotes 97# Shell quotes
91bindir_SQ = $(subst ','\'',$(bindir)) 98libdir_SQ = $(subst ','\'',$(libdir))
92bindir_relative_SQ = $(subst ','\'',$(bindir_relative)) 99libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
93plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) 100plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
94 101
95LIB_FILE = libtraceevent.a libtraceevent.so 102LIB_FILE = libtraceevent.a libtraceevent.so
@@ -151,8 +158,9 @@ PLUGINS_IN := $(PLUGINS:.so=-in.o)
151 158
152TE_IN := $(OUTPUT)libtraceevent-in.o 159TE_IN := $(OUTPUT)libtraceevent-in.o
153LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) 160LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
161DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list
154 162
155CMD_TARGETS = $(LIB_FILE) $(PLUGINS) 163CMD_TARGETS = $(LIB_FILE) $(PLUGINS) $(DYNAMIC_LIST_FILE)
156 164
157TARGETS = $(CMD_TARGETS) 165TARGETS = $(CMD_TARGETS)
158 166
@@ -169,6 +177,9 @@ $(OUTPUT)libtraceevent.so: $(TE_IN)
169$(OUTPUT)libtraceevent.a: $(TE_IN) 177$(OUTPUT)libtraceevent.a: $(TE_IN)
170 $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ 178 $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
171 179
180$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS)
181 $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@)
182
172plugins: $(PLUGINS) 183plugins: $(PLUGINS)
173 184
174__plugin_obj = $(notdir $@) 185__plugin_obj = $(notdir $@)
@@ -238,9 +249,16 @@ define do_install_plugins
238 done 249 done
239endef 250endef
240 251
252define do_generate_dynamic_list_file
253 (echo '{'; \
254 $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u; \
255 echo '};'; \
256 ) > $2
257endef
258
241install_lib: all_cmd install_plugins 259install_lib: all_cmd install_plugins
242 $(call QUIET_INSTALL, $(LIB_FILE)) \ 260 $(call QUIET_INSTALL, $(LIB_FILE)) \
243 $(call do_install,$(LIB_FILE),$(bindir_SQ)) 261 $(call do_install,$(LIB_FILE),$(libdir_SQ))
244 262
245install_plugins: $(PLUGINS) 263install_plugins: $(PLUGINS)
246 $(call QUIET_INSTALL, trace_plugins) \ 264 $(call QUIET_INSTALL, trace_plugins) \
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 29f94f6f0d9e..cc25f059ab3d 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -1387,7 +1387,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
1387 do_warning_event(event, "%s: no type found", __func__); 1387 do_warning_event(event, "%s: no type found", __func__);
1388 goto fail; 1388 goto fail;
1389 } 1389 }
1390 field->name = last_token; 1390 field->name = field->alias = last_token;
1391 1391
1392 if (test_type(type, EVENT_OP)) 1392 if (test_type(type, EVENT_OP))
1393 goto fail; 1393 goto fail;
@@ -1469,7 +1469,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
1469 size_dynamic = type_size(field->name); 1469 size_dynamic = type_size(field->name);
1470 free_token(field->name); 1470 free_token(field->name);
1471 strcat(field->type, brackets); 1471 strcat(field->type, brackets);
1472 field->name = token; 1472 field->name = field->alias = token;
1473 type = read_token(&token); 1473 type = read_token(&token);
1474 } else { 1474 } else {
1475 char *new_type; 1475 char *new_type;
@@ -6444,6 +6444,8 @@ void pevent_ref(struct pevent *pevent)
6444void pevent_free_format_field(struct format_field *field) 6444void pevent_free_format_field(struct format_field *field)
6445{ 6445{
6446 free(field->type); 6446 free(field->type);
6447 if (field->alias != field->name)
6448 free(field->alias);
6447 free(field->name); 6449 free(field->name);
6448 free(field); 6450 free(field);
6449} 6451}
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 86a5839fb048..063b1971eb35 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -191,6 +191,7 @@ struct format_field {
191 struct event_format *event; 191 struct event_format *event;
192 char *type; 192 char *type;
193 char *name; 193 char *name;
194 char *alias;
194 int offset; 195 int offset;
195 int size; 196 int size;
196 unsigned int arraylen; 197 unsigned int arraylen;
diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugin_cfg80211.c
index 4592d8438318..ec57d0c1fbc2 100644
--- a/tools/lib/traceevent/plugin_cfg80211.c
+++ b/tools/lib/traceevent/plugin_cfg80211.c
@@ -4,6 +4,19 @@
4#include <endian.h> 4#include <endian.h>
5#include "event-parse.h" 5#include "event-parse.h"
6 6
7/*
8 * From glibc endian.h, for older systems where it is not present, e.g.: RHEL5,
9 * Fedora6.
10 */
11#ifndef le16toh
12# if __BYTE_ORDER == __LITTLE_ENDIAN
13# define le16toh(x) (x)
14# else
15# define le16toh(x) __bswap_16 (x)
16# endif
17#endif
18
19
7static unsigned long long 20static unsigned long long
8process___le16_to_cpup(struct trace_seq *s, unsigned long long *args) 21process___le16_to_cpup(struct trace_seq *s, unsigned long long *args)
9{ 22{
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 812f904193e8..09db62ba5786 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -28,3 +28,4 @@ config.mak.autogen
28*-flex.* 28*-flex.*
29*.pyc 29*.pyc
30*.pyo 30*.pyo
31.config-detected
diff --git a/tools/perf/Documentation/callchain-overhead-calculation.txt b/tools/perf/Documentation/callchain-overhead-calculation.txt
new file mode 100644
index 000000000000..1a757927195e
--- /dev/null
+++ b/tools/perf/Documentation/callchain-overhead-calculation.txt
@@ -0,0 +1,108 @@
1Overhead calculation
2--------------------
3The overhead can be shown in two columns as 'Children' and 'Self' when
4perf collects callchains. The 'self' overhead is simply calculated by
5adding all period values of the entry - usually a function (symbol).
6This is the value that perf shows traditionally and sum of all the
7'self' overhead values should be 100%.
8
9The 'children' overhead is calculated by adding all period values of
10the child functions so that it can show the total overhead of the
11higher level functions even if they don't directly execute much.
12'Children' here means functions that are called from another (parent)
13function.
14
15It might be confusing that the sum of all the 'children' overhead
16values exceeds 100% since each of them is already an accumulation of
17'self' overhead of its child functions. But with this enabled, users
18can find which function has the most overhead even if samples are
19spread over the children.
20
21Consider the following example; there are three functions like below.
22
23-----------------------
24void foo(void) {
25 /* do something */
26}
27
28void bar(void) {
29 /* do something */
30 foo();
31}
32
33int main(void) {
34 bar()
35 return 0;
36}
37-----------------------
38
39In this case 'foo' is a child of 'bar', and 'bar' is an immediate
40child of 'main' so 'foo' also is a child of 'main'. In other words,
41'main' is a parent of 'foo' and 'bar', and 'bar' is a parent of 'foo'.
42
43Suppose all samples are recorded in 'foo' and 'bar' only. When it's
44recorded with callchains the output will show something like below
45in the usual (self-overhead-only) output of perf report:
46
47----------------------------------
48Overhead Symbol
49........ .....................
50 60.00% foo
51 |
52 --- foo
53 bar
54 main
55 __libc_start_main
56
57 40.00% bar
58 |
59 --- bar
60 main
61 __libc_start_main
62----------------------------------
63
64When the --children option is enabled, the 'self' overhead values of
65child functions (i.e. 'foo' and 'bar') are added to the parents to
66calculate the 'children' overhead. In this case the report could be
67displayed as:
68
69-------------------------------------------
70Children Self Symbol
71........ ........ ....................
72 100.00% 0.00% __libc_start_main
73 |
74 --- __libc_start_main
75
76 100.00% 0.00% main
77 |
78 --- main
79 __libc_start_main
80
81 100.00% 40.00% bar
82 |
83 --- bar
84 main
85 __libc_start_main
86
87 60.00% 60.00% foo
88 |
89 --- foo
90 bar
91 main
92 __libc_start_main
93-------------------------------------------
94
95In the above output, the 'self' overhead of 'foo' (60%) was add to the
96'children' overhead of 'bar', 'main' and '\_\_libc_start_main'.
97Likewise, the 'self' overhead of 'bar' (40%) was added to the
98'children' overhead of 'main' and '\_\_libc_start_main'.
99
100So '\_\_libc_start_main' and 'main' are shown first since they have
101same (100%) 'children' overhead (even though they have zero 'self'
102overhead) and they are the parents of 'foo' and 'bar'.
103
104Since v3.16 the 'children' overhead is shown by default and the output
105is sorted by its values. The 'children' overhead is disabled by
106specifying --no-children option on the command line or by adding
107'report.children = false' or 'top.children = false' in the perf config
108file.
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index f6480cbf309b..bf3d0644bf10 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -210,6 +210,9 @@ Suite for evaluating hash tables.
210*wake*:: 210*wake*::
211Suite for evaluating wake calls. 211Suite for evaluating wake calls.
212 212
213*wake-parallel*::
214Suite for evaluating parallel wake calls.
215
213*requeue*:: 216*requeue*::
214Suite for evaluating requeue calls. 217Suite for evaluating requeue calls.
215 218
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index dc7442cf3d7f..b876ae312699 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -44,6 +44,33 @@ OPTIONS
44--kallsyms=<file>:: 44--kallsyms=<file>::
45 kallsyms pathname 45 kallsyms pathname
46 46
47--itrace::
48 Decode Instruction Tracing data, replacing it with synthesized events.
49 Options are:
50
51 i synthesize instructions events
52 b synthesize branches events
53 c synthesize branches events (calls only)
54 r synthesize branches events (returns only)
55 x synthesize transactions events
56 e synthesize error events
57 d create a debug log
58 g synthesize a call chain (use with i or x)
59
60 The default is all events i.e. the same as --itrace=ibxe
61
62 In addition, the period (default 100000) for instructions events
63 can be specified in units of:
64
65 i instructions
66 t ticks
67 ms milliseconds
68 us microseconds
69 ns nanoseconds (default)
70
71 Also the call chain size (default 16, max. 1024) for instructions or
72 transactions events can be specified.
73
47SEE ALSO 74SEE ALSO
48-------- 75--------
49linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] 76linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index 23219c65c16f..ff0f433b3fce 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -37,7 +37,11 @@ OPTIONS
37 37
38-s <key[,key2...]>:: 38-s <key[,key2...]>::
39--sort=<key[,key2...]>:: 39--sort=<key[,key2...]>::
40 Sort the output (default: frag,hit,bytes) 40 Sort the output (default: 'frag,hit,bytes' for slab and 'bytes,hit'
41 for page). Available sort keys are 'ptr, callsite, bytes, hit,
42 pingpong, frag' for slab and 'page, callsite, bytes, hit, order,
43 migtype, gfp' for page. This option should be preceded by one of the
44 mode selection options - i.e. --slab, --page, --alloc and/or --caller.
41 45
42-l <num>:: 46-l <num>::
43--line=<num>:: 47--line=<num>::
@@ -52,6 +56,11 @@ OPTIONS
52--page:: 56--page::
53 Analyze page allocator events 57 Analyze page allocator events
54 58
59--live::
60 Show live page stat. The perf kmem shows total allocation stat by
61 default, but this option shows live (currently allocated) pages
62 instead. (This option works with --page option only)
63
55SEE ALSO 64SEE ALSO
56-------- 65--------
57linkperf:perf-record[1] 66linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index 6252e776009c..6a5bb2b17039 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -151,6 +151,12 @@ STAT LIVE OPTIONS
151 Show events other than HLT (x86 only) or Wait state (s390 only) 151 Show events other than HLT (x86 only) or Wait state (s390 only)
152 that take longer than duration usecs. 152 that take longer than duration usecs.
153 153
154--proc-map-timeout::
155 When processing pre-existing threads /proc/XXX/mmap, it may take
156 a long time, because the file may be huge. A time out is needed
157 in such cases.
158 This option sets the time out limit. The default value is 500 ms.
159
154SEE ALSO 160SEE ALSO
155-------- 161--------
156linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1], 162linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1],
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 239609c09f83..3a8a9ba2b041 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -14,11 +14,13 @@ or
14or 14or
15'perf probe' [options] --del='[GROUP:]EVENT' [...] 15'perf probe' [options] --del='[GROUP:]EVENT' [...]
16or 16or
17'perf probe' --list 17'perf probe' --list[=[GROUP:]EVENT]
18or 18or
19'perf probe' [options] --line='LINE' 19'perf probe' [options] --line='LINE'
20or 20or
21'perf probe' [options] --vars='PROBEPOINT' 21'perf probe' [options] --vars='PROBEPOINT'
22or
23'perf probe' [options] --funcs
22 24
23DESCRIPTION 25DESCRIPTION
24----------- 26-----------
@@ -64,8 +66,8 @@ OPTIONS
64 classes(e.g. [a-z], [!A-Z]). 66 classes(e.g. [a-z], [!A-Z]).
65 67
66-l:: 68-l::
67--list:: 69--list[=[GROUP:]EVENT]::
68 List up current probe events. 70 List up current probe events. This can also accept filtering patterns of event names.
69 71
70-L:: 72-L::
71--line=:: 73--line=::
@@ -81,10 +83,15 @@ OPTIONS
81 (Only for --vars) Show external defined variables in addition to local 83 (Only for --vars) Show external defined variables in addition to local
82 variables. 84 variables.
83 85
86--no-inlines::
87 (Only for --add) Search only for non-inlined functions. The functions
88 which do not have instances are ignored.
89
84-F:: 90-F::
85--funcs:: 91--funcs[=FILTER]::
86 Show available functions in given module or kernel. With -x/--exec, 92 Show available functions in given module or kernel. With -x/--exec,
87 can also list functions in a user space executable / shared library. 93 can also list functions in a user space executable / shared library.
94 This also can accept a FILTER rule argument.
88 95
89--filter=FILTER:: 96--filter=FILTER::
90 (Only for --vars and --funcs) Set filter. FILTER is a combination of glob 97 (Only for --vars and --funcs) Set filter. FILTER is a combination of glob
@@ -148,7 +155,7 @@ Each probe argument follows below syntax.
148 [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] 155 [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
149 156
150'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) 157'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
151'$vars' special argument is also available for NAME, it is expanded to the local variables which can access at given probe point. 158'$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
152'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. 159'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
153 160
154On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid. 161On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 4847a793de65..9b9d9d086680 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -108,6 +108,8 @@ OPTIONS
108 Number of mmap data pages (must be a power of two) or size 108 Number of mmap data pages (must be a power of two) or size
109 specification with appended unit character - B/K/M/G. The 109 specification with appended unit character - B/K/M/G. The
110 size is rounded up to have nearest pages power of two value. 110 size is rounded up to have nearest pages power of two value.
111 Also, by adding a comma, the number of mmap pages for AUX
112 area tracing can be specified.
111 113
112--group:: 114--group::
113 Put all events in a single event group. This precedes the --event 115 Put all events in a single event group. This precedes the --event
@@ -145,16 +147,21 @@ OPTIONS
145 147
146-s:: 148-s::
147--stat:: 149--stat::
148 Per thread counts. 150 Record per-thread event counts. Use it with 'perf report -T' to see
151 the values.
149 152
150-d:: 153-d::
151--data:: 154--data::
152 Sample addresses. 155 Record the sample addresses.
153 156
154-T:: 157-T::
155--timestamp:: 158--timestamp::
156 Sample timestamps. Use it with 'perf report -D' to see the timestamps, 159 Record the sample timestamps. Use it with 'perf report -D' to see the
157 for instance. 160 timestamps, for instance.
161
162-P::
163--period::
164 Record the sample period.
158 165
159-n:: 166-n::
160--no-samples:: 167--no-samples::
@@ -257,6 +264,18 @@ records. See clock_gettime(). In particular CLOCK_MONOTONIC and
257CLOCK_MONOTONIC_RAW are supported, some events might also allow 264CLOCK_MONOTONIC_RAW are supported, some events might also allow
258CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI. 265CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI.
259 266
267-S::
268--snapshot::
269Select AUX area tracing Snapshot Mode. This option is valid only with an
270AUX area tracing event. Optionally the number of bytes to capture per
271snapshot can be specified. In Snapshot Mode, trace data is captured only when
272signal SIGUSR2 is received.
273
274--proc-map-timeout::
275When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
276because the file may be huge. A time out is needed in such cases.
277This option sets the time out limit. The default value is 500 ms.
278
260SEE ALSO 279SEE ALSO
261-------- 280--------
262linkperf:perf-stat[1], linkperf:perf-list[1] 281linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 4879cf638824..c33b69f3374f 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -34,7 +34,8 @@ OPTIONS
34 34
35-T:: 35-T::
36--threads:: 36--threads::
37 Show per-thread event counters 37 Show per-thread event counters. The input data file should be recorded
38 with -s option.
38-c:: 39-c::
39--comms=:: 40--comms=::
40 Only consider symbols in these comms. CSV that understands 41 Only consider symbols in these comms. CSV that understands
@@ -193,6 +194,7 @@ OPTIONS
193 Accumulate callchain of children to parent entry so that then can 194 Accumulate callchain of children to parent entry so that then can
194 show up in the output. The output will have a new "Children" column 195 show up in the output. The output will have a new "Children" column
195 and will be sorted on the data. It requires callchains are recorded. 196 and will be sorted on the data. It requires callchains are recorded.
197 See the `overhead calculation' section for more details.
196 198
197--max-stack:: 199--max-stack::
198 Set the stack depth limit when parsing the callchain, anything 200 Set the stack depth limit when parsing the callchain, anything
@@ -323,6 +325,37 @@ OPTIONS
323--header-only:: 325--header-only::
324 Show only perf.data header (forces --stdio). 326 Show only perf.data header (forces --stdio).
325 327
328--itrace::
329 Options for decoding instruction tracing data. The options are:
330
331 i synthesize instructions events
332 b synthesize branches events
333 c synthesize branches events (calls only)
334 r synthesize branches events (returns only)
335 x synthesize transactions events
336 e synthesize error events
337 d create a debug log
338 g synthesize a call chain (use with i or x)
339
340 The default is all events i.e. the same as --itrace=ibxe
341
342 In addition, the period (default 100000) for instructions events
343 can be specified in units of:
344
345 i instructions
346 t ticks
347 ms milliseconds
348 us microseconds
349 ns nanoseconds (default)
350
351 Also the call chain size (default 16, max. 1024) for instructions or
352 transactions events can be specified.
353
354 To disable decoding entirely, use --no-itrace.
355
356
357include::callchain-overhead-calculation.txt[]
358
326SEE ALSO 359SEE ALSO
327-------- 360--------
328linkperf:perf-stat[1], linkperf:perf-annotate[1] 361linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 79445750fcb3..c82df572fac2 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -115,7 +115,8 @@ OPTIONS
115-f:: 115-f::
116--fields:: 116--fields::
117 Comma separated list of fields to print. Options are: 117 Comma separated list of fields to print. Options are:
118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period. 118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
119 srcline, period, flags.
119 Field list can be prepended with the type, trace, sw or hw, 120 Field list can be prepended with the type, trace, sw or hw,
120 to indicate to which event type the field list applies. 121 to indicate to which event type the field list applies.
121 e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace 122 e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
@@ -165,6 +166,12 @@ OPTIONS
165 166
166 At this point usage is displayed, and perf-script exits. 167 At this point usage is displayed, and perf-script exits.
167 168
169 The flags field is synthesized and may have a value when Instruction
170 Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
171 call, return, conditional, system, asynchronous, interrupt,
172 transaction abort, trace begin, trace end, and in transaction,
173 respectively.
174
168 Finally, a user may not set fields to none for all event types. 175 Finally, a user may not set fields to none for all event types.
169 i.e., -f "" is not allowed. 176 i.e., -f "" is not allowed.
170 177
@@ -221,6 +228,34 @@ OPTIONS
221--header-only 228--header-only
222 Show only perf.data header. 229 Show only perf.data header.
223 230
231--itrace::
232 Options for decoding instruction tracing data. The options are:
233
234 i synthesize instructions events
235 b synthesize branches events
236 c synthesize branches events (calls only)
237 r synthesize branches events (returns only)
238 x synthesize transactions events
239 e synthesize error events
240 d create a debug log
241 g synthesize a call chain (use with i or x)
242
243 The default is all events i.e. the same as --itrace=ibxe
244
245 In addition, the period (default 100000) for instructions events
246 can be specified in units of:
247
248 i instructions
249 t ticks
250 ms milliseconds
251 us microseconds
252 ns nanoseconds (default)
253
254 Also the call chain size (default 16, max. 1024) for instructions or
255 transactions events can be specified.
256
257 To disable decoding entirely, use --no-itrace.
258
224SEE ALSO 259SEE ALSO
225-------- 260--------
226linkperf:perf-record[1], linkperf:perf-script-perl[1], 261linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 3265b1070518..776aec4d0927 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -168,7 +168,7 @@ Default is to monitor all CPUS.
168 Accumulate callchain of children to parent entry so that then can 168 Accumulate callchain of children to parent entry so that then can
169 show up in the output. The output will have a new "Children" column 169 show up in the output. The output will have a new "Children" column
170 and will be sorted on the data. It requires -g/--call-graph option 170 and will be sorted on the data. It requires -g/--call-graph option
171 enabled. 171 enabled. See the `overhead calculation' section for more details.
172 172
173--max-stack:: 173--max-stack::
174 Set the stack depth limit when parsing the callchain, anything 174 Set the stack depth limit when parsing the callchain, anything
@@ -201,6 +201,12 @@ Default is to monitor all CPUS.
201 Force each column width to the provided list, for large terminal 201 Force each column width to the provided list, for large terminal
202 readability. 0 means no limit (default behavior). 202 readability. 0 means no limit (default behavior).
203 203
204--proc-map-timeout::
205 When processing pre-existing threads /proc/XXX/mmap, it may take
206 a long time, because the file may be huge. A time out is needed
207 in such cases.
208 This option sets the time out limit. The default value is 500 ms.
209
204 210
205INTERACTIVE PROMPTING KEYS 211INTERACTIVE PROMPTING KEYS
206-------------------------- 212--------------------------
@@ -234,6 +240,7 @@ INTERACTIVE PROMPTING KEYS
234 240
235Pressing any unmapped key displays a menu, and prompts for input. 241Pressing any unmapped key displays a menu, and prompts for input.
236 242
243include::callchain-overhead-calculation.txt[]
237 244
238SEE ALSO 245SEE ALSO
239-------- 246--------
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index ba03fd5d1a54..7ea078658a87 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -35,7 +35,7 @@ OPTIONS
35 35
36-e:: 36-e::
37--expr:: 37--expr::
38 List of events to show, currently only syscall names. 38 List of syscalls to show, currently only syscall names.
39 Prefixing with ! shows all syscalls but the ones specified. You may 39 Prefixing with ! shows all syscalls but the ones specified. You may
40 need to escape it. 40 need to escape it.
41 41
@@ -121,6 +121,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
121--event:: 121--event::
122 Trace other events, see 'perf list' for a complete list. 122 Trace other events, see 'perf list' for a complete list.
123 123
124--proc-map-timeout::
125 When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
126 because the file may be huge. A time out is needed in such cases.
127 This option sets the time out limit. The default value is 500 ms.
128
124PAGEFAULTS 129PAGEFAULTS
125---------- 130----------
126 131
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 11ccbb22ea2b..fe50a1b34aa0 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,12 +1,30 @@
1tools/perf 1tools/perf
2tools/arch/alpha/include/asm/barrier.h
3tools/arch/arm/include/asm/barrier.h
4tools/arch/ia64/include/asm/barrier.h
5tools/arch/mips/include/asm/barrier.h
6tools/arch/powerpc/include/asm/barrier.h
7tools/arch/s390/include/asm/barrier.h
8tools/arch/sh/include/asm/barrier.h
9tools/arch/sparc/include/asm/barrier.h
10tools/arch/sparc/include/asm/barrier_32.h
11tools/arch/sparc/include/asm/barrier_64.h
12tools/arch/tile/include/asm/barrier.h
13tools/arch/x86/include/asm/barrier.h
14tools/arch/xtensa/include/asm/barrier.h
2tools/scripts 15tools/scripts
3tools/build 16tools/build
17tools/arch/x86/include/asm/atomic.h
18tools/arch/x86/include/asm/rmwcc.h
4tools/lib/traceevent 19tools/lib/traceevent
5tools/lib/api 20tools/lib/api
6tools/lib/symbol/kallsyms.c 21tools/lib/symbol/kallsyms.c
7tools/lib/symbol/kallsyms.h 22tools/lib/symbol/kallsyms.h
8tools/lib/util/find_next_bit.c 23tools/lib/util/find_next_bit.c
24tools/include/asm/atomic.h
25tools/include/asm/barrier.h
9tools/include/asm/bug.h 26tools/include/asm/bug.h
27tools/include/asm-generic/barrier.h
10tools/include/asm-generic/bitops/arch_hweight.h 28tools/include/asm-generic/bitops/arch_hweight.h
11tools/include/asm-generic/bitops/atomic.h 29tools/include/asm-generic/bitops/atomic.h
12tools/include/asm-generic/bitops/const_hweight.h 30tools/include/asm-generic/bitops/const_hweight.h
@@ -17,35 +35,35 @@ tools/include/asm-generic/bitops/fls64.h
17tools/include/asm-generic/bitops/fls.h 35tools/include/asm-generic/bitops/fls.h
18tools/include/asm-generic/bitops/hweight.h 36tools/include/asm-generic/bitops/hweight.h
19tools/include/asm-generic/bitops.h 37tools/include/asm-generic/bitops.h
38tools/include/linux/atomic.h
20tools/include/linux/bitops.h 39tools/include/linux/bitops.h
21tools/include/linux/compiler.h 40tools/include/linux/compiler.h
22tools/include/linux/export.h 41tools/include/linux/export.h
23tools/include/linux/hash.h 42tools/include/linux/hash.h
43tools/include/linux/kernel.h
44tools/include/linux/list.h
24tools/include/linux/log2.h 45tools/include/linux/log2.h
46tools/include/linux/poison.h
25tools/include/linux/types.h 47tools/include/linux/types.h
26include/asm-generic/bitops/arch_hweight.h 48include/asm-generic/bitops/arch_hweight.h
27include/asm-generic/bitops/const_hweight.h 49include/asm-generic/bitops/const_hweight.h
28include/asm-generic/bitops/fls64.h 50include/asm-generic/bitops/fls64.h
29include/asm-generic/bitops/__fls.h 51include/asm-generic/bitops/__fls.h
30include/asm-generic/bitops/fls.h 52include/asm-generic/bitops/fls.h
31include/linux/const.h
32include/linux/perf_event.h 53include/linux/perf_event.h
33include/linux/rbtree.h 54include/linux/rbtree.h
34include/linux/list.h 55include/linux/list.h
35include/linux/hash.h 56include/linux/hash.h
36include/linux/stringify.h 57include/linux/stringify.h
37lib/find_next_bit.c
38lib/hweight.c 58lib/hweight.c
39lib/rbtree.c 59lib/rbtree.c
40include/linux/swab.h 60include/linux/swab.h
41arch/*/include/asm/unistd*.h 61arch/*/include/asm/unistd*.h
42arch/*/include/asm/perf_regs.h
43arch/*/include/uapi/asm/unistd*.h 62arch/*/include/uapi/asm/unistd*.h
44arch/*/include/uapi/asm/perf_regs.h 63arch/*/include/uapi/asm/perf_regs.h
45arch/*/lib/memcpy*.S 64arch/*/lib/memcpy*.S
46arch/*/lib/memset*.S 65arch/*/lib/memset*.S
47include/linux/poison.h 66include/linux/poison.h
48include/linux/magic.h
49include/linux/hw_breakpoint.h 67include/linux/hw_breakpoint.h
50include/linux/rbtree_augmented.h 68include/linux/rbtree_augmented.h
51include/uapi/linux/perf_event.h 69include/uapi/linux/perf_event.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index c43a20517591..1af0cfeb7a57 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -73,6 +73,8 @@ include config/utilities.mak
73# for CTF data format. 73# for CTF data format.
74# 74#
75# Define NO_LZMA if you do not want to support compressed (xz) kernel modules 75# Define NO_LZMA if you do not want to support compressed (xz) kernel modules
76#
77# Define NO_AUXTRACE if you do not want AUX area tracing support
76 78
77ifeq ($(srctree),) 79ifeq ($(srctree),)
78srctree := $(patsubst %/,%,$(dir $(shell pwd))) 80srctree := $(patsubst %/,%,$(dir $(shell pwd)))
@@ -171,6 +173,9 @@ endif
171LIBTRACEEVENT = $(TE_PATH)libtraceevent.a 173LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
172export LIBTRACEEVENT 174export LIBTRACEEVENT
173 175
176LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
177LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
178
174LIBAPI = $(LIB_PATH)libapi.a 179LIBAPI = $(LIB_PATH)libapi.a
175export LIBAPI 180export LIBAPI
176 181
@@ -185,8 +190,9 @@ python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT
185PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) 190PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
186PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI) 191PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
187 192
188$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) 193$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
189 $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \ 194 $(QUIET_GEN)CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
195 $(PYTHON_WORD) util/setup.py \
190 --quiet build_ext; \ 196 --quiet build_ext; \
191 mkdir -p $(OUTPUT)python && \ 197 mkdir -p $(OUTPUT)python && \
192 cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/ 198 cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
@@ -276,8 +282,9 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj
276$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE 282$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE
277 $(Q)$(MAKE) $(build)=perf 283 $(Q)$(MAKE) $(build)=perf
278 284
279$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) 285$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
280 $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@ 286 $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
287 $(PERF_IN) $(LIBS) -o $@
281 288
282$(GTK_IN): FORCE 289$(GTK_IN): FORCE
283 $(Q)$(MAKE) $(build)=gtk 290 $(Q)$(MAKE) $(build)=gtk
@@ -371,7 +378,13 @@ $(LIB_FILE): $(LIBPERF_IN)
371LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 378LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
372 379
373$(LIBTRACEEVENT): FORCE 380$(LIBTRACEEVENT): FORCE
374 $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins 381 $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
382
383libtraceevent_plugins: FORCE
384 $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
385
386$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
387 $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
375 388
376$(LIBTRACEEVENT)-clean: 389$(LIBTRACEEVENT)-clean:
377 $(call QUIET_CLEAN, libtraceevent) 390 $(call QUIET_CLEAN, libtraceevent)
@@ -462,7 +475,7 @@ check: $(OUTPUT)common-cmds.h
462 475
463install-gtk: 476install-gtk:
464 477
465install-bin: all install-gtk 478install-tools: all install-gtk
466 $(call QUIET_INSTALL, binaries) \ 479 $(call QUIET_INSTALL, binaries) \
467 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \ 480 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \
468 $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \ 481 $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
@@ -500,12 +513,16 @@ endif
500 $(call QUIET_INSTALL, perf_completion-script) \ 513 $(call QUIET_INSTALL, perf_completion-script) \
501 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \ 514 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
502 $(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' 515 $(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
516
517install-tests: all install-gtk
503 $(call QUIET_INSTALL, tests) \ 518 $(call QUIET_INSTALL, tests) \
504 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ 519 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
505 $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ 520 $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
506 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ 521 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
507 $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' 522 $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
508 523
524install-bin: install-tools install-tests
525
509install: install-bin try-install-man install-traceevent-plugins 526install: install-bin try-install-man install-traceevent-plugins
510 527
511install-python_ext: 528install-python_ext:
@@ -549,4 +566,5 @@ FORCE:
549.PHONY: all install clean config-clean strip install-gtk 566.PHONY: all install clean config-clean strip install-gtk
550.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell 567.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
551.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep 568.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep
569.PHONY: libtraceevent_plugins
552 570
diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build
index 54afe4a467e7..41bf61da476a 100644
--- a/tools/perf/arch/arm64/Build
+++ b/tools/perf/arch/arm64/Build
@@ -1 +1,2 @@
1libperf-y += util/ 1libperf-y += util/
2libperf-$(CONFIG_DWARF_UNWIND) += tests/
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
index 1d3f39c3aa56..4e5af27e3fbf 100644
--- a/tools/perf/arch/arm64/include/perf_regs.h
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -5,8 +5,11 @@
5#include <linux/types.h> 5#include <linux/types.h>
6#include <asm/perf_regs.h> 6#include <asm/perf_regs.h>
7 7
8void perf_regs_load(u64 *regs);
9
8#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1) 10#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
9#define PERF_REGS_MAX PERF_REG_ARM64_MAX 11#define PERF_REGS_MAX PERF_REG_ARM64_MAX
12#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
10 13
11#define PERF_REG_IP PERF_REG_ARM64_PC 14#define PERF_REG_IP PERF_REG_ARM64_PC
12#define PERF_REG_SP PERF_REG_ARM64_SP 15#define PERF_REG_SP PERF_REG_ARM64_SP
diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build
new file mode 100644
index 000000000000..b30eff9bcc83
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/Build
@@ -0,0 +1,2 @@
1libperf-y += regs_load.o
2libperf-y += dwarf-unwind.o
diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c
new file mode 100644
index 000000000000..cf04a4c91c59
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c
@@ -0,0 +1,61 @@
1#include <string.h>
2#include "perf_regs.h"
3#include "thread.h"
4#include "map.h"
5#include "event.h"
6#include "debug.h"
7#include "tests/tests.h"
8
9#define STACK_SIZE 8192
10
11static int sample_ustack(struct perf_sample *sample,
12 struct thread *thread, u64 *regs)
13{
14 struct stack_dump *stack = &sample->user_stack;
15 struct map *map;
16 unsigned long sp;
17 u64 stack_size, *buf;
18
19 buf = malloc(STACK_SIZE);
20 if (!buf) {
21 pr_debug("failed to allocate sample uregs data\n");
22 return -1;
23 }
24
25 sp = (unsigned long) regs[PERF_REG_ARM64_SP];
26
27 map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
28 if (!map) {
29 pr_debug("failed to get stack map\n");
30 free(buf);
31 return -1;
32 }
33
34 stack_size = map->end - sp;
35 stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
36
37 memcpy(buf, (void *) sp, stack_size);
38 stack->data = (char *) buf;
39 stack->size = stack_size;
40 return 0;
41}
42
43int test__arch_unwind_sample(struct perf_sample *sample,
44 struct thread *thread)
45{
46 struct regs_dump *regs = &sample->user_regs;
47 u64 *buf;
48
49 buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
50 if (!buf) {
51 pr_debug("failed to allocate sample uregs data\n");
52 return -1;
53 }
54
55 perf_regs_load(buf);
56 regs->abi = PERF_SAMPLE_REGS_ABI;
57 regs->regs = buf;
58 regs->mask = PERF_REGS_MASK;
59
60 return sample_ustack(sample, thread, buf);
61}
diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
new file mode 100644
index 000000000000..025b46e579a6
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/regs_load.S
@@ -0,0 +1,46 @@
1#include <linux/linkage.h>
2
3.text
4.type perf_regs_load,%function
5#define STR_REG(r) str x##r, [x0, 8 * r]
6#define LDR_REG(r) ldr x##r, [x0, 8 * r]
7#define SP (8 * 31)
8#define PC (8 * 32)
9ENTRY(perf_regs_load)
10 STR_REG(0)
11 STR_REG(1)
12 STR_REG(2)
13 STR_REG(3)
14 STR_REG(4)
15 STR_REG(5)
16 STR_REG(6)
17 STR_REG(7)
18 STR_REG(8)
19 STR_REG(9)
20 STR_REG(10)
21 STR_REG(11)
22 STR_REG(12)
23 STR_REG(13)
24 STR_REG(14)
25 STR_REG(15)
26 STR_REG(16)
27 STR_REG(17)
28 STR_REG(18)
29 STR_REG(19)
30 STR_REG(20)
31 STR_REG(21)
32 STR_REG(22)
33 STR_REG(23)
34 STR_REG(24)
35 STR_REG(25)
36 STR_REG(26)
37 STR_REG(27)
38 STR_REG(28)
39 STR_REG(29)
40 STR_REG(30)
41 mov x1, sp
42 str x1, [x0, #SP]
43 str x30, [x0, #PC]
44 LDR_REG(1)
45 ret
46ENDPROC(perf_regs_load)
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index 49776f190abf..b7bb42c44694 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -61,7 +61,7 @@ const char *const mips_triplets[] = {
61static bool lookup_path(char *name) 61static bool lookup_path(char *name)
62{ 62{
63 bool found = false; 63 bool found = false;
64 char *path, *tmp; 64 char *path, *tmp = NULL;
65 char buf[PATH_MAX]; 65 char buf[PATH_MAX];
66 char *env = getenv("PATH"); 66 char *env = getenv("PATH");
67 67
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 0af6e9b3f728..7b8b0d1a1b62 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,4 +1,5 @@
1libperf-y += header.o 1libperf-y += header.o
2libperf-y += sym-handling.o
2 3
3libperf-$(CONFIG_DWARF) += dwarf-regs.o 4libperf-$(CONFIG_DWARF) += dwarf-regs.o
4libperf-$(CONFIG_DWARF) += skip-callchain-idx.o 5libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
new file mode 100644
index 000000000000..bbc1a50768dd
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -0,0 +1,82 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
7 */
8
9#include "debug.h"
10#include "symbol.h"
11#include "map.h"
12#include "probe-event.h"
13
14#ifdef HAVE_LIBELF_SUPPORT
15bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
16{
17 return ehdr.e_type == ET_EXEC ||
18 ehdr.e_type == ET_REL ||
19 ehdr.e_type == ET_DYN;
20}
21
22#if defined(_CALL_ELF) && _CALL_ELF == 2
23void arch__elf_sym_adjust(GElf_Sym *sym)
24{
25 sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
26}
27#endif
28#endif
29
30#if !defined(_CALL_ELF) || _CALL_ELF != 2
31int arch__choose_best_symbol(struct symbol *syma,
32 struct symbol *symb __maybe_unused)
33{
34 char *sym = syma->name;
35
36 /* Skip over any initial dot */
37 if (*sym == '.')
38 sym++;
39
40 /* Avoid "SyS" kernel syscall aliases */
41 if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3))
42 return SYMBOL_B;
43 if (strlen(sym) >= 10 && !strncmp(sym, "compat_SyS", 10))
44 return SYMBOL_B;
45
46 return SYMBOL_A;
47}
48
49/* Allow matching against dot variants */
50int arch__compare_symbol_names(const char *namea, const char *nameb)
51{
52 /* Skip over initial dot */
53 if (*namea == '.')
54 namea++;
55 if (*nameb == '.')
56 nameb++;
57
58 return strcmp(namea, nameb);
59}
60#endif
61
62#if defined(_CALL_ELF) && _CALL_ELF == 2
63bool arch__prefers_symtab(void)
64{
65 return true;
66}
67
68#define PPC64LE_LEP_OFFSET 8
69
70void arch__fix_tev_from_maps(struct perf_probe_event *pev,
71 struct probe_trace_event *tev, struct map *map)
72{
73 /*
74 * ppc64 ABIv2 local entry point is currently always 2 instructions
75 * (8 bytes) after the global entry point.
76 */
77 if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
78 tev->point.address += PPC64LE_LEP_OFFSET;
79 tev->point.offset += PPC64LE_LEP_OFFSET;
80 }
81}
82#endif
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 5ce98023d518..c3ab760e06b4 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -3,6 +3,7 @@ perf-y += sched-pipe.o
3perf-y += mem-memcpy.o 3perf-y += mem-memcpy.o
4perf-y += futex-hash.o 4perf-y += futex-hash.o
5perf-y += futex-wake.o 5perf-y += futex-wake.o
6perf-y += futex-wake-parallel.o
6perf-y += futex-requeue.o 7perf-y += futex-requeue.o
7 8
8perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o 9perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 3c4dd44d45cb..70b2f718cc21 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -33,6 +33,8 @@ extern int bench_mem_memcpy(int argc, const char **argv,
33extern int bench_mem_memset(int argc, const char **argv, const char *prefix); 33extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
34extern int bench_futex_hash(int argc, const char **argv, const char *prefix); 34extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
35extern int bench_futex_wake(int argc, const char **argv, const char *prefix); 35extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
36extern int bench_futex_wake_parallel(int argc, const char **argv,
37 const char *prefix);
36extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); 38extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
37 39
38#define BENCH_FORMAT_DEFAULT_STR "default" 40#define BENCH_FORMAT_DEFAULT_STR "default"
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
new file mode 100644
index 000000000000..6d8c9fa2a16c
--- /dev/null
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -0,0 +1,294 @@
1/*
2 * Copyright (C) 2015 Davidlohr Bueso.
3 *
4 * Block a bunch of threads and let parallel waker threads wakeup an
5 * equal amount of them. The program output reflects the avg latency
6 * for each individual thread to service its share of work. Ultimately
7 * it can be used to measure futex_wake() changes.
8 */
9
10#include "../perf.h"
11#include "../util/util.h"
12#include "../util/stat.h"
13#include "../util/parse-options.h"
14#include "../util/header.h"
15#include "bench.h"
16#include "futex.h"
17
18#include <err.h>
19#include <stdlib.h>
20#include <sys/time.h>
21#include <pthread.h>
22
23struct thread_data {
24 pthread_t worker;
25 unsigned int nwoken;
26 struct timeval runtime;
27};
28
29static unsigned int nwakes = 1;
30
31/* all threads will block on the same futex -- hash bucket chaos ;) */
32static u_int32_t futex = 0;
33
34static pthread_t *blocked_worker;
35static bool done = false, silent = false, fshared = false;
36static unsigned int nblocked_threads = 0, nwaking_threads = 0;
37static pthread_mutex_t thread_lock;
38static pthread_cond_t thread_parent, thread_worker;
39static struct stats waketime_stats, wakeup_stats;
40static unsigned int ncpus, threads_starting;
41static int futex_flag = 0;
42
43static const struct option options[] = {
44 OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"),
45 OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"),
46 OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
47 OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
48 OPT_END()
49};
50
51static const char * const bench_futex_wake_parallel_usage[] = {
52 "perf bench futex wake-parallel <options>",
53 NULL
54};
55
56static void *waking_workerfn(void *arg)
57{
58 struct thread_data *waker = (struct thread_data *) arg;
59 struct timeval start, end;
60
61 gettimeofday(&start, NULL);
62
63 waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
64 if (waker->nwoken != nwakes)
65 warnx("couldn't wakeup all tasks (%d/%d)",
66 waker->nwoken, nwakes);
67
68 gettimeofday(&end, NULL);
69 timersub(&end, &start, &waker->runtime);
70
71 pthread_exit(NULL);
72 return NULL;
73}
74
75static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
76{
77 unsigned int i;
78
79 pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
80
81 /* create and block all threads */
82 for (i = 0; i < nwaking_threads; i++) {
83 /*
84 * Thread creation order will impact per-thread latency
85 * as it will affect the order to acquire the hb spinlock.
86 * For now let the scheduler decide.
87 */
88 if (pthread_create(&td[i].worker, &thread_attr,
89 waking_workerfn, (void *)&td[i]))
90 err(EXIT_FAILURE, "pthread_create");
91 }
92
93 for (i = 0; i < nwaking_threads; i++)
94 if (pthread_join(td[i].worker, NULL))
95 err(EXIT_FAILURE, "pthread_join");
96}
97
98static void *blocked_workerfn(void *arg __maybe_unused)
99{
100 pthread_mutex_lock(&thread_lock);
101 threads_starting--;
102 if (!threads_starting)
103 pthread_cond_signal(&thread_parent);
104 pthread_cond_wait(&thread_worker, &thread_lock);
105 pthread_mutex_unlock(&thread_lock);
106
107 while (1) { /* handle spurious wakeups */
108 if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
109 break;
110 }
111
112 pthread_exit(NULL);
113 return NULL;
114}
115
116static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
117{
118 cpu_set_t cpu;
119 unsigned int i;
120
121 threads_starting = nblocked_threads;
122
123 /* create and block all threads */
124 for (i = 0; i < nblocked_threads; i++) {
125 CPU_ZERO(&cpu);
126 CPU_SET(i % ncpus, &cpu);
127
128 if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
129 err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
130
131 if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
132 err(EXIT_FAILURE, "pthread_create");
133 }
134}
135
136static void print_run(struct thread_data *waking_worker, unsigned int run_num)
137{
138 unsigned int i, wakeup_avg;
139 double waketime_avg, waketime_stddev;
140 struct stats __waketime_stats, __wakeup_stats;
141
142 init_stats(&__wakeup_stats);
143 init_stats(&__waketime_stats);
144
145 for (i = 0; i < nwaking_threads; i++) {
146 update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);
147 update_stats(&__wakeup_stats, waking_worker[i].nwoken);
148 }
149
150 waketime_avg = avg_stats(&__waketime_stats);
151 waketime_stddev = stddev_stats(&__waketime_stats);
152 wakeup_avg = avg_stats(&__wakeup_stats);
153
154 printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
155 "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
156 nblocked_threads, waketime_avg/1e3,
157 rel_stddev_stats(waketime_stddev, waketime_avg));
158}
159
160static void print_summary(void)
161{
162 unsigned int wakeup_avg;
163 double waketime_avg, waketime_stddev;
164
165 waketime_avg = avg_stats(&waketime_stats);
166 waketime_stddev = stddev_stats(&waketime_stats);
167 wakeup_avg = avg_stats(&wakeup_stats);
168
169 printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
170 wakeup_avg,
171 nblocked_threads,
172 waketime_avg/1e3,
173 rel_stddev_stats(waketime_stddev, waketime_avg));
174}
175
176
177static void do_run_stats(struct thread_data *waking_worker)
178{
179 unsigned int i;
180
181 for (i = 0; i < nwaking_threads; i++) {
182 update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);
183 update_stats(&wakeup_stats, waking_worker[i].nwoken);
184 }
185
186}
187
188static void toggle_done(int sig __maybe_unused,
189 siginfo_t *info __maybe_unused,
190 void *uc __maybe_unused)
191{
192 done = true;
193}
194
195int bench_futex_wake_parallel(int argc, const char **argv,
196 const char *prefix __maybe_unused)
197{
198 int ret = 0;
199 unsigned int i, j;
200 struct sigaction act;
201 pthread_attr_t thread_attr;
202 struct thread_data *waking_worker;
203
204 argc = parse_options(argc, argv, options,
205 bench_futex_wake_parallel_usage, 0);
206 if (argc) {
207 usage_with_options(bench_futex_wake_parallel_usage, options);
208 exit(EXIT_FAILURE);
209 }
210
211 sigfillset(&act.sa_mask);
212 act.sa_sigaction = toggle_done;
213 sigaction(SIGINT, &act, NULL);
214
215 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
216 if (!nblocked_threads)
217 nblocked_threads = ncpus;
218
219 /* some sanity checks */
220 if (nwaking_threads > nblocked_threads || !nwaking_threads)
221 nwaking_threads = nblocked_threads;
222
223 if (nblocked_threads % nwaking_threads)
224 errx(EXIT_FAILURE, "Must be perfectly divisible");
225 /*
226 * Each thread will wakeup nwakes tasks in
227 * a single futex_wait call.
228 */
229 nwakes = nblocked_threads/nwaking_threads;
230
231 blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker));
232 if (!blocked_worker)
233 err(EXIT_FAILURE, "calloc");
234
235 if (!fshared)
236 futex_flag = FUTEX_PRIVATE_FLAG;
237
238 printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
239 "futex %p), %d threads waking up %d at a time.\n\n",
240 getpid(), nblocked_threads, fshared ? "shared":"private",
241 &futex, nwaking_threads, nwakes);
242
243 init_stats(&wakeup_stats);
244 init_stats(&waketime_stats);
245
246 pthread_attr_init(&thread_attr);
247 pthread_mutex_init(&thread_lock, NULL);
248 pthread_cond_init(&thread_parent, NULL);
249 pthread_cond_init(&thread_worker, NULL);
250
251 for (j = 0; j < bench_repeat && !done; j++) {
252 waking_worker = calloc(nwaking_threads, sizeof(*waking_worker));
253 if (!waking_worker)
254 err(EXIT_FAILURE, "calloc");
255
256 /* create, launch & block all threads */
257 block_threads(blocked_worker, thread_attr);
258
259 /* make sure all threads are already blocked */
260 pthread_mutex_lock(&thread_lock);
261 while (threads_starting)
262 pthread_cond_wait(&thread_parent, &thread_lock);
263 pthread_cond_broadcast(&thread_worker);
264 pthread_mutex_unlock(&thread_lock);
265
266 usleep(100000);
267
268 /* Ok, all threads are patiently blocked, start waking folks up */
269 wakeup_threads(waking_worker, thread_attr);
270
271 for (i = 0; i < nblocked_threads; i++) {
272 ret = pthread_join(blocked_worker[i], NULL);
273 if (ret)
274 err(EXIT_FAILURE, "pthread_join");
275 }
276
277 do_run_stats(waking_worker);
278 if (!silent)
279 print_run(waking_worker, j);
280
281 free(waking_worker);
282 }
283
284 /* cleanup & report results */
285 pthread_cond_destroy(&thread_parent);
286 pthread_cond_destroy(&thread_worker);
287 pthread_mutex_destroy(&thread_lock);
288 pthread_attr_destroy(&thread_attr);
289
290 print_summary();
291
292 free(blocked_worker);
293 return ret;
294}
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 929f762be47e..e5e41d3bdce7 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -60,7 +60,12 @@ static void *workerfn(void *arg __maybe_unused)
60 pthread_cond_wait(&thread_worker, &thread_lock); 60 pthread_cond_wait(&thread_worker, &thread_lock);
61 pthread_mutex_unlock(&thread_lock); 61 pthread_mutex_unlock(&thread_lock);
62 62
63 futex_wait(&futex1, 0, NULL, futex_flag); 63 while (1) {
64 if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR)
65 break;
66 }
67
68 pthread_exit(NULL);
64 return NULL; 69 return NULL;
65} 70}
66 71
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index ba5efa4710b5..870b7e665a20 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -8,6 +8,7 @@
8#include "../builtin.h" 8#include "../builtin.h"
9#include "../util/util.h" 9#include "../util/util.h"
10#include "../util/parse-options.h" 10#include "../util/parse-options.h"
11#include "../util/cloexec.h"
11 12
12#include "bench.h" 13#include "bench.h"
13 14
@@ -23,6 +24,7 @@
23#include <pthread.h> 24#include <pthread.h>
24#include <sys/mman.h> 25#include <sys/mman.h>
25#include <sys/time.h> 26#include <sys/time.h>
27#include <sys/resource.h>
26#include <sys/wait.h> 28#include <sys/wait.h>
27#include <sys/prctl.h> 29#include <sys/prctl.h>
28#include <sys/types.h> 30#include <sys/types.h>
@@ -51,6 +53,9 @@ struct thread_data {
51 unsigned int loops_done; 53 unsigned int loops_done;
52 u64 val; 54 u64 val;
53 u64 runtime_ns; 55 u64 runtime_ns;
56 u64 system_time_ns;
57 u64 user_time_ns;
58 double speed_gbs;
54 pthread_mutex_t *process_lock; 59 pthread_mutex_t *process_lock;
55}; 60};
56 61
@@ -1042,6 +1047,7 @@ static void *worker_thread(void *__tdata)
1042 u64 bytes_done; 1047 u64 bytes_done;
1043 long work_done; 1048 long work_done;
1044 u32 l; 1049 u32 l;
1050 struct rusage rusage;
1045 1051
1046 bind_to_cpumask(td->bind_cpumask); 1052 bind_to_cpumask(td->bind_cpumask);
1047 bind_to_memnode(td->bind_node); 1053 bind_to_memnode(td->bind_node);
@@ -1194,6 +1200,13 @@ static void *worker_thread(void *__tdata)
1194 timersub(&stop, &start0, &diff); 1200 timersub(&stop, &start0, &diff);
1195 td->runtime_ns = diff.tv_sec * 1000000000ULL; 1201 td->runtime_ns = diff.tv_sec * 1000000000ULL;
1196 td->runtime_ns += diff.tv_usec * 1000ULL; 1202 td->runtime_ns += diff.tv_usec * 1000ULL;
1203 td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9;
1204
1205 getrusage(RUSAGE_THREAD, &rusage);
1206 td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL;
1207 td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL;
1208 td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL;
1209 td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL;
1197 1210
1198 free_data(thread_data, g->p.bytes_thread); 1211 free_data(thread_data, g->p.bytes_thread);
1199 1212
@@ -1420,7 +1433,7 @@ static int __bench_numa(const char *name)
1420 double runtime_sec_min; 1433 double runtime_sec_min;
1421 int wait_stat; 1434 int wait_stat;
1422 double bytes; 1435 double bytes;
1423 int i, t; 1436 int i, t, p;
1424 1437
1425 if (init()) 1438 if (init())
1426 return -1; 1439 return -1;
@@ -1556,6 +1569,24 @@ static int __bench_numa(const char *name)
1556 print_res(name, bytes / runtime_sec_max / 1e9, 1569 print_res(name, bytes / runtime_sec_max / 1e9,
1557 "GB/sec,", "total-speed", "GB/sec total speed"); 1570 "GB/sec,", "total-speed", "GB/sec total speed");
1558 1571
1572 if (g->p.show_details >= 2) {
1573 char tname[32];
1574 struct thread_data *td;
1575 for (p = 0; p < g->p.nr_proc; p++) {
1576 for (t = 0; t < g->p.nr_threads; t++) {
1577 memset(tname, 0, 32);
1578 td = g->threads + p*g->p.nr_threads + t;
1579 snprintf(tname, 32, "process%d:thread%d", p, t);
1580 print_res(tname, td->speed_gbs,
1581 "GB/sec", "thread-speed", "GB/sec/thread speed");
1582 print_res(tname, td->system_time_ns / 1e9,
1583 "secs", "thread-system-time", "system CPU time/thread");
1584 print_res(tname, td->user_time_ns / 1e9,
1585 "secs", "thread-user-time", "user CPU time/thread");
1586 }
1587 }
1588 }
1589
1559 free(pids); 1590 free(pids);
1560 1591
1561 deinit(); 1592 deinit();
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 71bf7451c0ca..2c1bec39c30e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -59,6 +59,10 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
59 (al->sym == NULL || 59 (al->sym == NULL ||
60 strcmp(ann->sym_hist_filter, al->sym->name) != 0)) { 60 strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
61 /* We're only interested in a symbol named sym_hist_filter */ 61 /* We're only interested in a symbol named sym_hist_filter */
62 /*
63 * FIXME: why isn't this done in the symbol_filter when loading
64 * the DSO?
65 */
62 if (al->sym != NULL) { 66 if (al->sym != NULL) {
63 rb_erase(&al->sym->rb_node, 67 rb_erase(&al->sym->rb_node,
64 &al->map->dso->symbols[al->map->type]); 68 &al->map->dso->symbols[al->map->type]);
@@ -84,6 +88,7 @@ static int process_sample_event(struct perf_tool *tool,
84{ 88{
85 struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool); 89 struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
86 struct addr_location al; 90 struct addr_location al;
91 int ret = 0;
87 92
88 if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { 93 if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
89 pr_warning("problem processing %d event, skipping it.\n", 94 pr_warning("problem processing %d event, skipping it.\n",
@@ -92,15 +97,16 @@ static int process_sample_event(struct perf_tool *tool,
92 } 97 }
93 98
94 if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap)) 99 if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
95 return 0; 100 goto out_put;
96 101
97 if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) { 102 if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) {
98 pr_warning("problem incrementing symbol count, " 103 pr_warning("problem incrementing symbol count, "
99 "skipping event\n"); 104 "skipping event\n");
100 return -1; 105 ret = -1;
101 } 106 }
102 107out_put:
103 return 0; 108 addr_location__put(&al);
109 return ret;
104} 110}
105 111
106static int hist_entry__tty_annotate(struct hist_entry *he, 112static int hist_entry__tty_annotate(struct hist_entry *he,
@@ -283,7 +289,6 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
283 }, 289 },
284 }; 290 };
285 struct perf_data_file file = { 291 struct perf_data_file file = {
286 .path = input_name,
287 .mode = PERF_DATA_MODE_READ, 292 .mode = PERF_DATA_MODE_READ,
288 }; 293 };
289 const struct option options[] = { 294 const struct option options[] = {
@@ -324,6 +329,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
324 "objdump binary to use for disassembly and annotations"), 329 "objdump binary to use for disassembly and annotations"),
325 OPT_BOOLEAN(0, "group", &symbol_conf.event_group, 330 OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
326 "Show event group information together"), 331 "Show event group information together"),
332 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
333 "Show a column with the sum of periods"),
327 OPT_END() 334 OPT_END()
328 }; 335 };
329 int ret = hists__init(); 336 int ret = hists__init();
@@ -340,6 +347,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
340 else if (annotate.use_gtk) 347 else if (annotate.use_gtk)
341 use_browser = 2; 348 use_browser = 2;
342 349
350 file.path = input_name;
351
343 setup_browser(true); 352 setup_browser(true);
344 353
345 annotate.session = perf_session__new(&file, false, &annotate.tool); 354 annotate.session = perf_session__new(&file, false, &annotate.tool);
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index b9a56fa83330..b5314e452ec7 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -58,6 +58,7 @@ static struct bench mem_benchmarks[] = {
58static struct bench futex_benchmarks[] = { 58static struct bench futex_benchmarks[] = {
59 { "hash", "Benchmark for futex hash table", bench_futex_hash }, 59 { "hash", "Benchmark for futex hash table", bench_futex_hash },
60 { "wake", "Benchmark for futex wake calls", bench_futex_wake }, 60 { "wake", "Benchmark for futex wake calls", bench_futex_wake },
61 { "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel },
61 { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue }, 62 { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue },
62 { "all", "Test all futex benchmarks", NULL }, 63 { "all", "Test all futex benchmarks", NULL },
63 { NULL, NULL, NULL } 64 { NULL, NULL, NULL }
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index feb420f74c2d..9fe93c8d4fcf 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -69,6 +69,15 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
69 session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops); 69 session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops);
70 if (session == NULL) 70 if (session == NULL)
71 return -1; 71 return -1;
72
73 /*
74 * We take all buildids when the file contains AUX area tracing data
75 * because we do not decode the trace because it would take too long.
76 */
77 if (!perf_data_file__is_pipe(&file) &&
78 perf_header__has_feat(&session->header, HEADER_AUXTRACE))
79 with_hits = false;
80
72 /* 81 /*
73 * in pipe-mode, the only way to get the buildids is to parse 82 * in pipe-mode, the only way to get the buildids is to parse
74 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID 83 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index df6307b4050a..daaa7dca9c3b 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -328,6 +328,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
328{ 328{
329 struct addr_location al; 329 struct addr_location al;
330 struct hists *hists = evsel__hists(evsel); 330 struct hists *hists = evsel__hists(evsel);
331 int ret = -1;
331 332
332 if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { 333 if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
333 pr_warning("problem processing %d event, skipping it.\n", 334 pr_warning("problem processing %d event, skipping it.\n",
@@ -338,7 +339,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
338 if (hists__add_entry(hists, &al, sample->period, 339 if (hists__add_entry(hists, &al, sample->period,
339 sample->weight, sample->transaction)) { 340 sample->weight, sample->transaction)) {
340 pr_warning("problem incrementing symbol period, skipping event\n"); 341 pr_warning("problem incrementing symbol period, skipping event\n");
341 return -1; 342 goto out_put;
342 } 343 }
343 344
344 /* 345 /*
@@ -350,8 +351,10 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
350 hists->stats.total_period += sample->period; 351 hists->stats.total_period += sample->period;
351 if (!al.filtered) 352 if (!al.filtered)
352 hists->stats.total_non_filtered_period += sample->period; 353 hists->stats.total_non_filtered_period += sample->period;
353 354 ret = 0;
354 return 0; 355out_put:
356 addr_location__put(&al);
357 return ret;
355} 358}
356 359
357static struct perf_tool tool = { 360static struct perf_tool tool = {
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 40a33d7334cc..52ec66b23607 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -16,6 +16,7 @@
16#include "util/debug.h" 16#include "util/debug.h"
17#include "util/build-id.h" 17#include "util/build-id.h"
18#include "util/data.h" 18#include "util/data.h"
19#include "util/auxtrace.h"
19 20
20#include "util/parse-options.h" 21#include "util/parse-options.h"
21 22
@@ -26,10 +27,12 @@ struct perf_inject {
26 struct perf_session *session; 27 struct perf_session *session;
27 bool build_ids; 28 bool build_ids;
28 bool sched_stat; 29 bool sched_stat;
30 bool have_auxtrace;
29 const char *input_name; 31 const char *input_name;
30 struct perf_data_file output; 32 struct perf_data_file output;
31 u64 bytes_written; 33 u64 bytes_written;
32 struct list_head samples; 34 struct list_head samples;
35 struct itrace_synth_opts itrace_synth_opts;
33}; 36};
34 37
35struct event_entry { 38struct event_entry {
@@ -38,14 +41,11 @@ struct event_entry {
38 union perf_event event[0]; 41 union perf_event event[0];
39}; 42};
40 43
41static int perf_event__repipe_synth(struct perf_tool *tool, 44static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
42 union perf_event *event)
43{ 45{
44 struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
45 ssize_t size; 46 ssize_t size;
46 47
47 size = perf_data_file__write(&inject->output, event, 48 size = perf_data_file__write(&inject->output, buf, sz);
48 event->header.size);
49 if (size < 0) 49 if (size < 0)
50 return -errno; 50 return -errno;
51 51
@@ -53,6 +53,15 @@ static int perf_event__repipe_synth(struct perf_tool *tool,
53 return 0; 53 return 0;
54} 54}
55 55
56static int perf_event__repipe_synth(struct perf_tool *tool,
57 union perf_event *event)
58{
59 struct perf_inject *inject = container_of(tool, struct perf_inject,
60 tool);
61
62 return output_bytes(inject, event, event->header.size);
63}
64
56static int perf_event__repipe_oe_synth(struct perf_tool *tool, 65static int perf_event__repipe_oe_synth(struct perf_tool *tool,
57 union perf_event *event, 66 union perf_event *event,
58 struct ordered_events *oe __maybe_unused) 67 struct ordered_events *oe __maybe_unused)
@@ -86,6 +95,79 @@ static int perf_event__repipe_attr(struct perf_tool *tool,
86 return perf_event__repipe_synth(tool, event); 95 return perf_event__repipe_synth(tool, event);
87} 96}
88 97
98#ifdef HAVE_AUXTRACE_SUPPORT
99
100static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
101{
102 char buf[4096];
103 ssize_t ssz;
104 int ret;
105
106 while (size > 0) {
107 ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
108 if (ssz < 0)
109 return -errno;
110 ret = output_bytes(inject, buf, ssz);
111 if (ret)
112 return ret;
113 size -= ssz;
114 }
115
116 return 0;
117}
118
119static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
120 union perf_event *event,
121 struct perf_session *session
122 __maybe_unused)
123{
124 struct perf_inject *inject = container_of(tool, struct perf_inject,
125 tool);
126 int ret;
127
128 inject->have_auxtrace = true;
129
130 if (!inject->output.is_pipe) {
131 off_t offset;
132
133 offset = lseek(inject->output.fd, 0, SEEK_CUR);
134 if (offset == -1)
135 return -errno;
136 ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
137 event, offset);
138 if (ret < 0)
139 return ret;
140 }
141
142 if (perf_data_file__is_pipe(session->file) || !session->one_mmap) {
143 ret = output_bytes(inject, event, event->header.size);
144 if (ret < 0)
145 return ret;
146 ret = copy_bytes(inject, perf_data_file__fd(session->file),
147 event->auxtrace.size);
148 } else {
149 ret = output_bytes(inject, event,
150 event->header.size + event->auxtrace.size);
151 }
152 if (ret < 0)
153 return ret;
154
155 return event->auxtrace.size;
156}
157
158#else
159
160static s64
161perf_event__repipe_auxtrace(struct perf_tool *tool __maybe_unused,
162 union perf_event *event __maybe_unused,
163 struct perf_session *session __maybe_unused)
164{
165 pr_err("AUX area tracing not supported\n");
166 return -EINVAL;
167}
168
169#endif
170
89static int perf_event__repipe(struct perf_tool *tool, 171static int perf_event__repipe(struct perf_tool *tool,
90 union perf_event *event, 172 union perf_event *event,
91 struct perf_sample *sample __maybe_unused, 173 struct perf_sample *sample __maybe_unused,
@@ -155,6 +237,32 @@ static int perf_event__repipe_fork(struct perf_tool *tool,
155 return err; 237 return err;
156} 238}
157 239
240static int perf_event__repipe_comm(struct perf_tool *tool,
241 union perf_event *event,
242 struct perf_sample *sample,
243 struct machine *machine)
244{
245 int err;
246
247 err = perf_event__process_comm(tool, event, sample, machine);
248 perf_event__repipe(tool, event, sample, machine);
249
250 return err;
251}
252
253static int perf_event__repipe_exit(struct perf_tool *tool,
254 union perf_event *event,
255 struct perf_sample *sample,
256 struct machine *machine)
257{
258 int err;
259
260 err = perf_event__process_exit(tool, event, sample, machine);
261 perf_event__repipe(tool, event, sample, machine);
262
263 return err;
264}
265
158static int perf_event__repipe_tracing_data(struct perf_tool *tool, 266static int perf_event__repipe_tracing_data(struct perf_tool *tool,
159 union perf_event *event, 267 union perf_event *event,
160 struct perf_session *session) 268 struct perf_session *session)
@@ -167,6 +275,18 @@ static int perf_event__repipe_tracing_data(struct perf_tool *tool,
167 return err; 275 return err;
168} 276}
169 277
278static int perf_event__repipe_id_index(struct perf_tool *tool,
279 union perf_event *event,
280 struct perf_session *session)
281{
282 int err;
283
284 perf_event__repipe_synth(tool, event);
285 err = perf_event__process_id_index(tool, event, session);
286
287 return err;
288}
289
170static int dso__read_build_id(struct dso *dso) 290static int dso__read_build_id(struct dso *dso)
171{ 291{
172 if (dso->has_build_id) 292 if (dso->has_build_id)
@@ -245,6 +365,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
245 } 365 }
246 } 366 }
247 367
368 thread__put(thread);
248repipe: 369repipe:
249 perf_event__repipe(tool, event, sample, machine); 370 perf_event__repipe(tool, event, sample, machine);
250 return 0; 371 return 0;
@@ -351,16 +472,20 @@ static int __cmd_inject(struct perf_inject *inject)
351 struct perf_session *session = inject->session; 472 struct perf_session *session = inject->session;
352 struct perf_data_file *file_out = &inject->output; 473 struct perf_data_file *file_out = &inject->output;
353 int fd = perf_data_file__fd(file_out); 474 int fd = perf_data_file__fd(file_out);
475 u64 output_data_offset;
354 476
355 signal(SIGINT, sig_handler); 477 signal(SIGINT, sig_handler);
356 478
357 if (inject->build_ids || inject->sched_stat) { 479 if (inject->build_ids || inject->sched_stat ||
480 inject->itrace_synth_opts.set) {
358 inject->tool.mmap = perf_event__repipe_mmap; 481 inject->tool.mmap = perf_event__repipe_mmap;
359 inject->tool.mmap2 = perf_event__repipe_mmap2; 482 inject->tool.mmap2 = perf_event__repipe_mmap2;
360 inject->tool.fork = perf_event__repipe_fork; 483 inject->tool.fork = perf_event__repipe_fork;
361 inject->tool.tracing_data = perf_event__repipe_tracing_data; 484 inject->tool.tracing_data = perf_event__repipe_tracing_data;
362 } 485 }
363 486
487 output_data_offset = session->header.data_offset;
488
364 if (inject->build_ids) { 489 if (inject->build_ids) {
365 inject->tool.sample = perf_event__inject_buildid; 490 inject->tool.sample = perf_event__inject_buildid;
366 } else if (inject->sched_stat) { 491 } else if (inject->sched_stat) {
@@ -379,17 +504,43 @@ static int __cmd_inject(struct perf_inject *inject)
379 else if (!strncmp(name, "sched:sched_stat_", 17)) 504 else if (!strncmp(name, "sched:sched_stat_", 17))
380 evsel->handler = perf_inject__sched_stat; 505 evsel->handler = perf_inject__sched_stat;
381 } 506 }
507 } else if (inject->itrace_synth_opts.set) {
508 session->itrace_synth_opts = &inject->itrace_synth_opts;
509 inject->itrace_synth_opts.inject = true;
510 inject->tool.comm = perf_event__repipe_comm;
511 inject->tool.exit = perf_event__repipe_exit;
512 inject->tool.id_index = perf_event__repipe_id_index;
513 inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
514 inject->tool.auxtrace = perf_event__process_auxtrace;
515 inject->tool.ordered_events = true;
516 inject->tool.ordering_requires_timestamps = true;
517 /* Allow space in the header for new attributes */
518 output_data_offset = 4096;
382 } 519 }
383 520
521 if (!inject->itrace_synth_opts.set)
522 auxtrace_index__free(&session->auxtrace_index);
523
384 if (!file_out->is_pipe) 524 if (!file_out->is_pipe)
385 lseek(fd, session->header.data_offset, SEEK_SET); 525 lseek(fd, output_data_offset, SEEK_SET);
386 526
387 ret = perf_session__process_events(session); 527 ret = perf_session__process_events(session);
388 528
389 if (!file_out->is_pipe) { 529 if (!file_out->is_pipe) {
390 if (inject->build_ids) 530 if (inject->build_ids) {
391 perf_header__set_feat(&session->header, 531 perf_header__set_feat(&session->header,
392 HEADER_BUILD_ID); 532 HEADER_BUILD_ID);
533 if (inject->have_auxtrace)
534 dsos__hit_all(session);
535 }
536 /*
537 * The AUX areas have been removed and replaced with
538 * synthesized hardware events, so clear the feature flag.
539 */
540 if (inject->itrace_synth_opts.set)
541 perf_header__clear_feat(&session->header,
542 HEADER_AUXTRACE);
543 session->header.data_offset = output_data_offset;
393 session->header.data_size = inject->bytes_written; 544 session->header.data_size = inject->bytes_written;
394 perf_session__write_header(session, session->evlist, fd, true); 545 perf_session__write_header(session, session->evlist, fd, true);
395 } 546 }
@@ -408,11 +559,16 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
408 .fork = perf_event__repipe, 559 .fork = perf_event__repipe,
409 .exit = perf_event__repipe, 560 .exit = perf_event__repipe,
410 .lost = perf_event__repipe, 561 .lost = perf_event__repipe,
562 .aux = perf_event__repipe,
563 .itrace_start = perf_event__repipe,
411 .read = perf_event__repipe_sample, 564 .read = perf_event__repipe_sample,
412 .throttle = perf_event__repipe, 565 .throttle = perf_event__repipe,
413 .unthrottle = perf_event__repipe, 566 .unthrottle = perf_event__repipe,
414 .attr = perf_event__repipe_attr, 567 .attr = perf_event__repipe_attr,
415 .tracing_data = perf_event__repipe_op2_synth, 568 .tracing_data = perf_event__repipe_op2_synth,
569 .auxtrace_info = perf_event__repipe_op2_synth,
570 .auxtrace = perf_event__repipe_auxtrace,
571 .auxtrace_error = perf_event__repipe_op2_synth,
416 .finished_round = perf_event__repipe_oe_synth, 572 .finished_round = perf_event__repipe_oe_synth,
417 .build_id = perf_event__repipe_op2_synth, 573 .build_id = perf_event__repipe_op2_synth,
418 .id_index = perf_event__repipe_op2_synth, 574 .id_index = perf_event__repipe_op2_synth,
@@ -444,6 +600,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
444 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", 600 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
445 "kallsyms pathname"), 601 "kallsyms pathname"),
446 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), 602 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
603 OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
604 NULL, "opts", "Instruction Tracing options",
605 itrace_parse_synth_opts),
447 OPT_END() 606 OPT_END()
448 }; 607 };
449 const char * const inject_usage[] = { 608 const char * const inject_usage[] = {
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 1634186d537c..950f296dfcf7 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -10,6 +10,7 @@
10#include "util/header.h" 10#include "util/header.h"
11#include "util/session.h" 11#include "util/session.h"
12#include "util/tool.h" 12#include "util/tool.h"
13#include "util/callchain.h"
13 14
14#include "util/parse-options.h" 15#include "util/parse-options.h"
15#include "util/trace-event.h" 16#include "util/trace-event.h"
@@ -21,14 +22,19 @@
21#include <linux/rbtree.h> 22#include <linux/rbtree.h>
22#include <linux/string.h> 23#include <linux/string.h>
23#include <locale.h> 24#include <locale.h>
25#include <regex.h>
24 26
25static int kmem_slab; 27static int kmem_slab;
26static int kmem_page; 28static int kmem_page;
27 29
28static long kmem_page_size; 30static long kmem_page_size;
31static enum {
32 KMEM_SLAB,
33 KMEM_PAGE,
34} kmem_default = KMEM_SLAB; /* for backward compatibility */
29 35
30struct alloc_stat; 36struct alloc_stat;
31typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); 37typedef int (*sort_fn_t)(void *, void *);
32 38
33static int alloc_flag; 39static int alloc_flag;
34static int caller_flag; 40static int caller_flag;
@@ -179,8 +185,8 @@ static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
179 return ret; 185 return ret;
180} 186}
181 187
182static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); 188static int ptr_cmp(void *, void *);
183static int callsite_cmp(struct alloc_stat *, struct alloc_stat *); 189static int slab_callsite_cmp(void *, void *);
184 190
185static struct alloc_stat *search_alloc_stat(unsigned long ptr, 191static struct alloc_stat *search_alloc_stat(unsigned long ptr,
186 unsigned long call_site, 192 unsigned long call_site,
@@ -221,7 +227,8 @@ static int perf_evsel__process_free_event(struct perf_evsel *evsel,
221 s_alloc->pingpong++; 227 s_alloc->pingpong++;
222 228
223 s_caller = search_alloc_stat(0, s_alloc->call_site, 229 s_caller = search_alloc_stat(0, s_alloc->call_site,
224 &root_caller_stat, callsite_cmp); 230 &root_caller_stat,
231 slab_callsite_cmp);
225 if (!s_caller) 232 if (!s_caller)
226 return -1; 233 return -1;
227 s_caller->pingpong++; 234 s_caller->pingpong++;
@@ -241,6 +248,8 @@ static unsigned long nr_page_fails;
241static unsigned long nr_page_nomatch; 248static unsigned long nr_page_nomatch;
242 249
243static bool use_pfn; 250static bool use_pfn;
251static bool live_page;
252static struct perf_session *kmem_session;
244 253
245#define MAX_MIGRATE_TYPES 6 254#define MAX_MIGRATE_TYPES 6
246#define MAX_PAGE_ORDER 11 255#define MAX_PAGE_ORDER 11
@@ -250,6 +259,7 @@ static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
250struct page_stat { 259struct page_stat {
251 struct rb_node node; 260 struct rb_node node;
252 u64 page; 261 u64 page;
262 u64 callsite;
253 int order; 263 int order;
254 unsigned gfp_flags; 264 unsigned gfp_flags;
255 unsigned migrate_type; 265 unsigned migrate_type;
@@ -259,13 +269,158 @@ struct page_stat {
259 int nr_free; 269 int nr_free;
260}; 270};
261 271
262static struct rb_root page_tree; 272static struct rb_root page_live_tree;
263static struct rb_root page_alloc_tree; 273static struct rb_root page_alloc_tree;
264static struct rb_root page_alloc_sorted; 274static struct rb_root page_alloc_sorted;
275static struct rb_root page_caller_tree;
276static struct rb_root page_caller_sorted;
265 277
266static struct page_stat *search_page(unsigned long page, bool create) 278struct alloc_func {
279 u64 start;
280 u64 end;
281 char *name;
282};
283
284static int nr_alloc_funcs;
285static struct alloc_func *alloc_func_list;
286
287static int funcmp(const void *a, const void *b)
288{
289 const struct alloc_func *fa = a;
290 const struct alloc_func *fb = b;
291
292 if (fa->start > fb->start)
293 return 1;
294 else
295 return -1;
296}
297
298static int callcmp(const void *a, const void *b)
299{
300 const struct alloc_func *fa = a;
301 const struct alloc_func *fb = b;
302
303 if (fb->start <= fa->start && fa->end < fb->end)
304 return 0;
305
306 if (fa->start > fb->start)
307 return 1;
308 else
309 return -1;
310}
311
312static int build_alloc_func_list(void)
267{ 313{
268 struct rb_node **node = &page_tree.rb_node; 314 int ret;
315 struct map *kernel_map;
316 struct symbol *sym;
317 struct rb_node *node;
318 struct alloc_func *func;
319 struct machine *machine = &kmem_session->machines.host;
320 regex_t alloc_func_regex;
321 const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
322
323 ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
324 if (ret) {
325 char err[BUFSIZ];
326
327 regerror(ret, &alloc_func_regex, err, sizeof(err));
328 pr_err("Invalid regex: %s\n%s", pattern, err);
329 return -EINVAL;
330 }
331
332 kernel_map = machine->vmlinux_maps[MAP__FUNCTION];
333 if (map__load(kernel_map, NULL) < 0) {
334 pr_err("cannot load kernel map\n");
335 return -ENOENT;
336 }
337
338 map__for_each_symbol(kernel_map, sym, node) {
339 if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
340 continue;
341
342 func = realloc(alloc_func_list,
343 (nr_alloc_funcs + 1) * sizeof(*func));
344 if (func == NULL)
345 return -ENOMEM;
346
347 pr_debug("alloc func: %s\n", sym->name);
348 func[nr_alloc_funcs].start = sym->start;
349 func[nr_alloc_funcs].end = sym->end;
350 func[nr_alloc_funcs].name = sym->name;
351
352 alloc_func_list = func;
353 nr_alloc_funcs++;
354 }
355
356 qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
357
358 regfree(&alloc_func_regex);
359 return 0;
360}
361
362/*
363 * Find first non-memory allocation function from callchain.
364 * The allocation functions are in the 'alloc_func_list'.
365 */
366static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
367{
368 struct addr_location al;
369 struct machine *machine = &kmem_session->machines.host;
370 struct callchain_cursor_node *node;
371
372 if (alloc_func_list == NULL) {
373 if (build_alloc_func_list() < 0)
374 goto out;
375 }
376
377 al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
378 sample__resolve_callchain(sample, NULL, evsel, &al, 16);
379
380 callchain_cursor_commit(&callchain_cursor);
381 while (true) {
382 struct alloc_func key, *caller;
383 u64 addr;
384
385 node = callchain_cursor_current(&callchain_cursor);
386 if (node == NULL)
387 break;
388
389 key.start = key.end = node->ip;
390 caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
391 sizeof(key), callcmp);
392 if (!caller) {
393 /* found */
394 if (node->map)
395 addr = map__unmap_ip(node->map, node->ip);
396 else
397 addr = node->ip;
398
399 return addr;
400 } else
401 pr_debug3("skipping alloc function: %s\n", caller->name);
402
403 callchain_cursor_advance(&callchain_cursor);
404 }
405
406out:
407 pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
408 return sample->ip;
409}
410
411struct sort_dimension {
412 const char name[20];
413 sort_fn_t cmp;
414 struct list_head list;
415};
416
417static LIST_HEAD(page_alloc_sort_input);
418static LIST_HEAD(page_caller_sort_input);
419
420static struct page_stat *
421__page_stat__findnew_page(struct page_stat *pstat, bool create)
422{
423 struct rb_node **node = &page_live_tree.rb_node;
269 struct rb_node *parent = NULL; 424 struct rb_node *parent = NULL;
270 struct page_stat *data; 425 struct page_stat *data;
271 426
@@ -275,7 +430,7 @@ static struct page_stat *search_page(unsigned long page, bool create)
275 parent = *node; 430 parent = *node;
276 data = rb_entry(*node, struct page_stat, node); 431 data = rb_entry(*node, struct page_stat, node);
277 432
278 cmp = data->page - page; 433 cmp = data->page - pstat->page;
279 if (cmp < 0) 434 if (cmp < 0)
280 node = &parent->rb_left; 435 node = &parent->rb_left;
281 else if (cmp > 0) 436 else if (cmp > 0)
@@ -289,49 +444,48 @@ static struct page_stat *search_page(unsigned long page, bool create)
289 444
290 data = zalloc(sizeof(*data)); 445 data = zalloc(sizeof(*data));
291 if (data != NULL) { 446 if (data != NULL) {
292 data->page = page; 447 data->page = pstat->page;
448 data->order = pstat->order;
449 data->gfp_flags = pstat->gfp_flags;
450 data->migrate_type = pstat->migrate_type;
293 451
294 rb_link_node(&data->node, parent, node); 452 rb_link_node(&data->node, parent, node);
295 rb_insert_color(&data->node, &page_tree); 453 rb_insert_color(&data->node, &page_live_tree);
296 } 454 }
297 455
298 return data; 456 return data;
299} 457}
300 458
301static int page_stat_cmp(struct page_stat *a, struct page_stat *b) 459static struct page_stat *page_stat__find_page(struct page_stat *pstat)
302{ 460{
303 if (a->page > b->page) 461 return __page_stat__findnew_page(pstat, false);
304 return -1; 462}
305 if (a->page < b->page) 463
306 return 1; 464static struct page_stat *page_stat__findnew_page(struct page_stat *pstat)
307 if (a->order > b->order) 465{
308 return -1; 466 return __page_stat__findnew_page(pstat, true);
309 if (a->order < b->order)
310 return 1;
311 if (a->migrate_type > b->migrate_type)
312 return -1;
313 if (a->migrate_type < b->migrate_type)
314 return 1;
315 if (a->gfp_flags > b->gfp_flags)
316 return -1;
317 if (a->gfp_flags < b->gfp_flags)
318 return 1;
319 return 0;
320} 467}
321 468
322static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool create) 469static struct page_stat *
470__page_stat__findnew_alloc(struct page_stat *pstat, bool create)
323{ 471{
324 struct rb_node **node = &page_alloc_tree.rb_node; 472 struct rb_node **node = &page_alloc_tree.rb_node;
325 struct rb_node *parent = NULL; 473 struct rb_node *parent = NULL;
326 struct page_stat *data; 474 struct page_stat *data;
475 struct sort_dimension *sort;
327 476
328 while (*node) { 477 while (*node) {
329 s64 cmp; 478 int cmp = 0;
330 479
331 parent = *node; 480 parent = *node;
332 data = rb_entry(*node, struct page_stat, node); 481 data = rb_entry(*node, struct page_stat, node);
333 482
334 cmp = page_stat_cmp(data, pstat); 483 list_for_each_entry(sort, &page_alloc_sort_input, list) {
484 cmp = sort->cmp(pstat, data);
485 if (cmp)
486 break;
487 }
488
335 if (cmp < 0) 489 if (cmp < 0)
336 node = &parent->rb_left; 490 node = &parent->rb_left;
337 else if (cmp > 0) 491 else if (cmp > 0)
@@ -357,6 +511,71 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool cr
357 return data; 511 return data;
358} 512}
359 513
514static struct page_stat *page_stat__find_alloc(struct page_stat *pstat)
515{
516 return __page_stat__findnew_alloc(pstat, false);
517}
518
519static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
520{
521 return __page_stat__findnew_alloc(pstat, true);
522}
523
524static struct page_stat *
525__page_stat__findnew_caller(struct page_stat *pstat, bool create)
526{
527 struct rb_node **node = &page_caller_tree.rb_node;
528 struct rb_node *parent = NULL;
529 struct page_stat *data;
530 struct sort_dimension *sort;
531
532 while (*node) {
533 int cmp = 0;
534
535 parent = *node;
536 data = rb_entry(*node, struct page_stat, node);
537
538 list_for_each_entry(sort, &page_caller_sort_input, list) {
539 cmp = sort->cmp(pstat, data);
540 if (cmp)
541 break;
542 }
543
544 if (cmp < 0)
545 node = &parent->rb_left;
546 else if (cmp > 0)
547 node = &parent->rb_right;
548 else
549 return data;
550 }
551
552 if (!create)
553 return NULL;
554
555 data = zalloc(sizeof(*data));
556 if (data != NULL) {
557 data->callsite = pstat->callsite;
558 data->order = pstat->order;
559 data->gfp_flags = pstat->gfp_flags;
560 data->migrate_type = pstat->migrate_type;
561
562 rb_link_node(&data->node, parent, node);
563 rb_insert_color(&data->node, &page_caller_tree);
564 }
565
566 return data;
567}
568
569static struct page_stat *page_stat__find_caller(struct page_stat *pstat)
570{
571 return __page_stat__findnew_caller(pstat, false);
572}
573
574static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat)
575{
576 return __page_stat__findnew_caller(pstat, true);
577}
578
360static bool valid_page(u64 pfn_or_page) 579static bool valid_page(u64 pfn_or_page)
361{ 580{
362 if (use_pfn && pfn_or_page == -1UL) 581 if (use_pfn && pfn_or_page == -1UL)
@@ -366,6 +585,176 @@ static bool valid_page(u64 pfn_or_page)
366 return true; 585 return true;
367} 586}
368 587
588struct gfp_flag {
589 unsigned int flags;
590 char *compact_str;
591 char *human_readable;
592};
593
594static struct gfp_flag *gfps;
595static int nr_gfps;
596
597static int gfpcmp(const void *a, const void *b)
598{
599 const struct gfp_flag *fa = a;
600 const struct gfp_flag *fb = b;
601
602 return fa->flags - fb->flags;
603}
604
605/* see include/trace/events/gfpflags.h */
606static const struct {
607 const char *original;
608 const char *compact;
609} gfp_compact_table[] = {
610 { "GFP_TRANSHUGE", "THP" },
611 { "GFP_HIGHUSER_MOVABLE", "HUM" },
612 { "GFP_HIGHUSER", "HU" },
613 { "GFP_USER", "U" },
614 { "GFP_TEMPORARY", "TMP" },
615 { "GFP_KERNEL", "K" },
616 { "GFP_NOFS", "NF" },
617 { "GFP_ATOMIC", "A" },
618 { "GFP_NOIO", "NI" },
619 { "GFP_HIGH", "H" },
620 { "GFP_WAIT", "W" },
621 { "GFP_IO", "I" },
622 { "GFP_COLD", "CO" },
623 { "GFP_NOWARN", "NWR" },
624 { "GFP_REPEAT", "R" },
625 { "GFP_NOFAIL", "NF" },
626 { "GFP_NORETRY", "NR" },
627 { "GFP_COMP", "C" },
628 { "GFP_ZERO", "Z" },
629 { "GFP_NOMEMALLOC", "NMA" },
630 { "GFP_MEMALLOC", "MA" },
631 { "GFP_HARDWALL", "HW" },
632 { "GFP_THISNODE", "TN" },
633 { "GFP_RECLAIMABLE", "RC" },
634 { "GFP_MOVABLE", "M" },
635 { "GFP_NOTRACK", "NT" },
636 { "GFP_NO_KSWAPD", "NK" },
637 { "GFP_OTHER_NODE", "ON" },
638 { "GFP_NOWAIT", "NW" },
639};
640
641static size_t max_gfp_len;
642
643static char *compact_gfp_flags(char *gfp_flags)
644{
645 char *orig_flags = strdup(gfp_flags);
646 char *new_flags = NULL;
647 char *str, *pos = NULL;
648 size_t len = 0;
649
650 if (orig_flags == NULL)
651 return NULL;
652
653 str = strtok_r(orig_flags, "|", &pos);
654 while (str) {
655 size_t i;
656 char *new;
657 const char *cpt;
658
659 for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) {
660 if (strcmp(gfp_compact_table[i].original, str))
661 continue;
662
663 cpt = gfp_compact_table[i].compact;
664 new = realloc(new_flags, len + strlen(cpt) + 2);
665 if (new == NULL) {
666 free(new_flags);
667 return NULL;
668 }
669
670 new_flags = new;
671
672 if (!len) {
673 strcpy(new_flags, cpt);
674 } else {
675 strcat(new_flags, "|");
676 strcat(new_flags, cpt);
677 len++;
678 }
679
680 len += strlen(cpt);
681 }
682
683 str = strtok_r(NULL, "|", &pos);
684 }
685
686 if (max_gfp_len < len)
687 max_gfp_len = len;
688
689 free(orig_flags);
690 return new_flags;
691}
692
693static char *compact_gfp_string(unsigned long gfp_flags)
694{
695 struct gfp_flag key = {
696 .flags = gfp_flags,
697 };
698 struct gfp_flag *gfp;
699
700 gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp);
701 if (gfp)
702 return gfp->compact_str;
703
704 return NULL;
705}
706
707static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample,
708 unsigned int gfp_flags)
709{
710 struct pevent_record record = {
711 .cpu = sample->cpu,
712 .data = sample->raw_data,
713 .size = sample->raw_size,
714 };
715 struct trace_seq seq;
716 char *str, *pos = NULL;
717
718 if (nr_gfps) {
719 struct gfp_flag key = {
720 .flags = gfp_flags,
721 };
722
723 if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp))
724 return 0;
725 }
726
727 trace_seq_init(&seq);
728 pevent_event_info(&seq, evsel->tp_format, &record);
729
730 str = strtok_r(seq.buffer, " ", &pos);
731 while (str) {
732 if (!strncmp(str, "gfp_flags=", 10)) {
733 struct gfp_flag *new;
734
735 new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps));
736 if (new == NULL)
737 return -ENOMEM;
738
739 gfps = new;
740 new += nr_gfps++;
741
742 new->flags = gfp_flags;
743 new->human_readable = strdup(str + 10);
744 new->compact_str = compact_gfp_flags(str + 10);
745 if (!new->human_readable || !new->compact_str)
746 return -ENOMEM;
747
748 qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp);
749 }
750
751 str = strtok_r(NULL, " ", &pos);
752 }
753
754 trace_seq_destroy(&seq);
755 return 0;
756}
757
369static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel, 758static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
370 struct perf_sample *sample) 759 struct perf_sample *sample)
371{ 760{
@@ -375,6 +764,7 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
375 unsigned int migrate_type = perf_evsel__intval(evsel, sample, 764 unsigned int migrate_type = perf_evsel__intval(evsel, sample,
376 "migratetype"); 765 "migratetype");
377 u64 bytes = kmem_page_size << order; 766 u64 bytes = kmem_page_size << order;
767 u64 callsite;
378 struct page_stat *pstat; 768 struct page_stat *pstat;
379 struct page_stat this = { 769 struct page_stat this = {
380 .order = order, 770 .order = order,
@@ -397,20 +787,36 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
397 return 0; 787 return 0;
398 } 788 }
399 789
790 if (parse_gfp_flags(evsel, sample, gfp_flags) < 0)
791 return -1;
792
793 callsite = find_callsite(evsel, sample);
794
400 /* 795 /*
401 * This is to find the current page (with correct gfp flags and 796 * This is to find the current page (with correct gfp flags and
402 * migrate type) at free event. 797 * migrate type) at free event.
403 */ 798 */
404 pstat = search_page(page, true); 799 this.page = page;
800 pstat = page_stat__findnew_page(&this);
405 if (pstat == NULL) 801 if (pstat == NULL)
406 return -ENOMEM; 802 return -ENOMEM;
407 803
408 pstat->order = order; 804 pstat->nr_alloc++;
409 pstat->gfp_flags = gfp_flags; 805 pstat->alloc_bytes += bytes;
410 pstat->migrate_type = migrate_type; 806 pstat->callsite = callsite;
807
808 if (!live_page) {
809 pstat = page_stat__findnew_alloc(&this);
810 if (pstat == NULL)
811 return -ENOMEM;
411 812
412 this.page = page; 813 pstat->nr_alloc++;
413 pstat = search_page_alloc_stat(&this, true); 814 pstat->alloc_bytes += bytes;
815 pstat->callsite = callsite;
816 }
817
818 this.callsite = callsite;
819 pstat = page_stat__findnew_caller(&this);
414 if (pstat == NULL) 820 if (pstat == NULL)
415 return -ENOMEM; 821 return -ENOMEM;
416 822
@@ -441,7 +847,8 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
441 nr_page_frees++; 847 nr_page_frees++;
442 total_page_free_bytes += bytes; 848 total_page_free_bytes += bytes;
443 849
444 pstat = search_page(page, false); 850 this.page = page;
851 pstat = page_stat__find_page(&this);
445 if (pstat == NULL) { 852 if (pstat == NULL) {
446 pr_debug2("missing free at page %"PRIx64" (order: %d)\n", 853 pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
447 page, order); 854 page, order);
@@ -452,20 +859,41 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
452 return 0; 859 return 0;
453 } 860 }
454 861
455 this.page = page;
456 this.gfp_flags = pstat->gfp_flags; 862 this.gfp_flags = pstat->gfp_flags;
457 this.migrate_type = pstat->migrate_type; 863 this.migrate_type = pstat->migrate_type;
864 this.callsite = pstat->callsite;
458 865
459 rb_erase(&pstat->node, &page_tree); 866 rb_erase(&pstat->node, &page_live_tree);
460 free(pstat); 867 free(pstat);
461 868
462 pstat = search_page_alloc_stat(&this, false); 869 if (live_page) {
870 order_stats[this.order][this.migrate_type]--;
871 } else {
872 pstat = page_stat__find_alloc(&this);
873 if (pstat == NULL)
874 return -ENOMEM;
875
876 pstat->nr_free++;
877 pstat->free_bytes += bytes;
878 }
879
880 pstat = page_stat__find_caller(&this);
463 if (pstat == NULL) 881 if (pstat == NULL)
464 return -ENOENT; 882 return -ENOENT;
465 883
466 pstat->nr_free++; 884 pstat->nr_free++;
467 pstat->free_bytes += bytes; 885 pstat->free_bytes += bytes;
468 886
887 if (live_page) {
888 pstat->nr_alloc--;
889 pstat->alloc_bytes -= bytes;
890
891 if (pstat->nr_alloc == 0) {
892 rb_erase(&pstat->node, &page_caller_tree);
893 free(pstat);
894 }
895 }
896
469 return 0; 897 return 0;
470} 898}
471 899
@@ -478,6 +906,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
478 struct perf_evsel *evsel, 906 struct perf_evsel *evsel,
479 struct machine *machine) 907 struct machine *machine)
480{ 908{
909 int err = 0;
481 struct thread *thread = machine__findnew_thread(machine, sample->pid, 910 struct thread *thread = machine__findnew_thread(machine, sample->pid,
482 sample->tid); 911 sample->tid);
483 912
@@ -491,10 +920,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
491 920
492 if (evsel->handler != NULL) { 921 if (evsel->handler != NULL) {
493 tracepoint_handler f = evsel->handler; 922 tracepoint_handler f = evsel->handler;
494 return f(evsel, sample); 923 err = f(evsel, sample);
495 } 924 }
496 925
497 return 0; 926 thread__put(thread);
927
928 return err;
498} 929}
499 930
500static struct perf_tool perf_kmem = { 931static struct perf_tool perf_kmem = {
@@ -576,41 +1007,111 @@ static const char * const migrate_type_str[] = {
576 "UNKNOWN", 1007 "UNKNOWN",
577}; 1008};
578 1009
579static void __print_page_result(struct rb_root *root, 1010static void __print_page_alloc_result(struct perf_session *session, int n_lines)
580 struct perf_session *session __maybe_unused,
581 int n_lines)
582{ 1011{
583 struct rb_node *next = rb_first(root); 1012 struct rb_node *next = rb_first(&page_alloc_sorted);
1013 struct machine *machine = &session->machines.host;
584 const char *format; 1014 const char *format;
1015 int gfp_len = max(strlen("GFP flags"), max_gfp_len);
585 1016
586 printf("\n%.80s\n", graph_dotted_line); 1017 printf("\n%.105s\n", graph_dotted_line);
587 printf(" %-16s | Total alloc (KB) | Hits | Order | Mig.type | GFP flags\n", 1018 printf(" %-16s | %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n",
588 use_pfn ? "PFN" : "Page"); 1019 use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total",
589 printf("%.80s\n", graph_dotted_line); 1020 gfp_len, "GFP flags");
1021 printf("%.105s\n", graph_dotted_line);
590 1022
591 if (use_pfn) 1023 if (use_pfn)
592 format = " %16llu | %'16llu | %'9d | %5d | %8s | %08lx\n"; 1024 format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
593 else 1025 else
594 format = " %016llx | %'16llu | %'9d | %5d | %8s | %08lx\n"; 1026 format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
595 1027
596 while (next && n_lines--) { 1028 while (next && n_lines--) {
597 struct page_stat *data; 1029 struct page_stat *data;
1030 struct symbol *sym;
1031 struct map *map;
1032 char buf[32];
1033 char *caller = buf;
598 1034
599 data = rb_entry(next, struct page_stat, node); 1035 data = rb_entry(next, struct page_stat, node);
1036 sym = machine__find_kernel_function(machine, data->callsite,
1037 &map, NULL);
1038 if (sym && sym->name)
1039 caller = sym->name;
1040 else
1041 scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
600 1042
601 printf(format, (unsigned long long)data->page, 1043 printf(format, (unsigned long long)data->page,
602 (unsigned long long)data->alloc_bytes / 1024, 1044 (unsigned long long)data->alloc_bytes / 1024,
603 data->nr_alloc, data->order, 1045 data->nr_alloc, data->order,
604 migrate_type_str[data->migrate_type], 1046 migrate_type_str[data->migrate_type],
605 (unsigned long)data->gfp_flags); 1047 gfp_len, compact_gfp_string(data->gfp_flags), caller);
606 1048
607 next = rb_next(next); 1049 next = rb_next(next);
608 } 1050 }
609 1051
610 if (n_lines == -1) 1052 if (n_lines == -1) {
611 printf(" ... | ... | ... | ... | ... | ... \n"); 1053 printf(" ... | ... | ... | ... | ... | %-*s | ...\n",
1054 gfp_len, "...");
1055 }
1056
1057 printf("%.105s\n", graph_dotted_line);
1058}
1059
1060static void __print_page_caller_result(struct perf_session *session, int n_lines)
1061{
1062 struct rb_node *next = rb_first(&page_caller_sorted);
1063 struct machine *machine = &session->machines.host;
1064 int gfp_len = max(strlen("GFP flags"), max_gfp_len);
1065
1066 printf("\n%.105s\n", graph_dotted_line);
1067 printf(" %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n",
1068 live_page ? "Live" : "Total", gfp_len, "GFP flags");
1069 printf("%.105s\n", graph_dotted_line);
1070
1071 while (next && n_lines--) {
1072 struct page_stat *data;
1073 struct symbol *sym;
1074 struct map *map;
1075 char buf[32];
1076 char *caller = buf;
1077
1078 data = rb_entry(next, struct page_stat, node);
1079 sym = machine__find_kernel_function(machine, data->callsite,
1080 &map, NULL);
1081 if (sym && sym->name)
1082 caller = sym->name;
1083 else
1084 scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
1085
1086 printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n",
1087 (unsigned long long)data->alloc_bytes / 1024,
1088 data->nr_alloc, data->order,
1089 migrate_type_str[data->migrate_type],
1090 gfp_len, compact_gfp_string(data->gfp_flags), caller);
1091
1092 next = rb_next(next);
1093 }
1094
1095 if (n_lines == -1) {
1096 printf(" ... | ... | ... | ... | %-*s | ...\n",
1097 gfp_len, "...");
1098 }
612 1099
613 printf("%.80s\n", graph_dotted_line); 1100 printf("%.105s\n", graph_dotted_line);
1101}
1102
1103static void print_gfp_flags(void)
1104{
1105 int i;
1106
1107 printf("#\n");
1108 printf("# GFP flags\n");
1109 printf("# ---------\n");
1110 for (i = 0; i < nr_gfps; i++) {
1111 printf("# %08x: %*s: %s\n", gfps[i].flags,
1112 (int) max_gfp_len, gfps[i].compact_str,
1113 gfps[i].human_readable);
1114 }
614} 1115}
615 1116
616static void print_slab_summary(void) 1117static void print_slab_summary(void)
@@ -682,8 +1183,12 @@ static void print_slab_result(struct perf_session *session)
682 1183
683static void print_page_result(struct perf_session *session) 1184static void print_page_result(struct perf_session *session)
684{ 1185{
1186 if (caller_flag || alloc_flag)
1187 print_gfp_flags();
1188 if (caller_flag)
1189 __print_page_caller_result(session, caller_lines);
685 if (alloc_flag) 1190 if (alloc_flag)
686 __print_page_result(&page_alloc_sorted, session, alloc_lines); 1191 __print_page_alloc_result(session, alloc_lines);
687 print_page_summary(); 1192 print_page_summary();
688} 1193}
689 1194
@@ -695,14 +1200,10 @@ static void print_result(struct perf_session *session)
695 print_page_result(session); 1200 print_page_result(session);
696} 1201}
697 1202
698struct sort_dimension { 1203static LIST_HEAD(slab_caller_sort);
699 const char name[20]; 1204static LIST_HEAD(slab_alloc_sort);
700 sort_fn_t cmp; 1205static LIST_HEAD(page_caller_sort);
701 struct list_head list; 1206static LIST_HEAD(page_alloc_sort);
702};
703
704static LIST_HEAD(caller_sort);
705static LIST_HEAD(alloc_sort);
706 1207
707static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data, 1208static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
708 struct list_head *sort_list) 1209 struct list_head *sort_list)
@@ -751,10 +1252,12 @@ static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted
751 } 1252 }
752} 1253}
753 1254
754static void sort_page_insert(struct rb_root *root, struct page_stat *data) 1255static void sort_page_insert(struct rb_root *root, struct page_stat *data,
1256 struct list_head *sort_list)
755{ 1257{
756 struct rb_node **new = &root->rb_node; 1258 struct rb_node **new = &root->rb_node;
757 struct rb_node *parent = NULL; 1259 struct rb_node *parent = NULL;
1260 struct sort_dimension *sort;
758 1261
759 while (*new) { 1262 while (*new) {
760 struct page_stat *this; 1263 struct page_stat *this;
@@ -763,8 +1266,11 @@ static void sort_page_insert(struct rb_root *root, struct page_stat *data)
763 this = rb_entry(*new, struct page_stat, node); 1266 this = rb_entry(*new, struct page_stat, node);
764 parent = *new; 1267 parent = *new;
765 1268
766 /* TODO: support more sort key */ 1269 list_for_each_entry(sort, sort_list, list) {
767 cmp = data->alloc_bytes - this->alloc_bytes; 1270 cmp = sort->cmp(data, this);
1271 if (cmp)
1272 break;
1273 }
768 1274
769 if (cmp > 0) 1275 if (cmp > 0)
770 new = &parent->rb_left; 1276 new = &parent->rb_left;
@@ -776,7 +1282,8 @@ static void sort_page_insert(struct rb_root *root, struct page_stat *data)
776 rb_insert_color(&data->node, root); 1282 rb_insert_color(&data->node, root);
777} 1283}
778 1284
779static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted) 1285static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted,
1286 struct list_head *sort_list)
780{ 1287{
781 struct rb_node *node; 1288 struct rb_node *node;
782 struct page_stat *data; 1289 struct page_stat *data;
@@ -788,7 +1295,7 @@ static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted
788 1295
789 rb_erase(node, root); 1296 rb_erase(node, root);
790 data = rb_entry(node, struct page_stat, node); 1297 data = rb_entry(node, struct page_stat, node);
791 sort_page_insert(root_sorted, data); 1298 sort_page_insert(root_sorted, data, sort_list);
792 } 1299 }
793} 1300}
794 1301
@@ -796,12 +1303,20 @@ static void sort_result(void)
796{ 1303{
797 if (kmem_slab) { 1304 if (kmem_slab) {
798 __sort_slab_result(&root_alloc_stat, &root_alloc_sorted, 1305 __sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
799 &alloc_sort); 1306 &slab_alloc_sort);
800 __sort_slab_result(&root_caller_stat, &root_caller_sorted, 1307 __sort_slab_result(&root_caller_stat, &root_caller_sorted,
801 &caller_sort); 1308 &slab_caller_sort);
802 } 1309 }
803 if (kmem_page) { 1310 if (kmem_page) {
804 __sort_page_result(&page_alloc_tree, &page_alloc_sorted); 1311 if (live_page)
1312 __sort_page_result(&page_live_tree, &page_alloc_sorted,
1313 &page_alloc_sort);
1314 else
1315 __sort_page_result(&page_alloc_tree, &page_alloc_sorted,
1316 &page_alloc_sort);
1317
1318 __sort_page_result(&page_caller_tree, &page_caller_sorted,
1319 &page_caller_sort);
805 } 1320 }
806} 1321}
807 1322
@@ -850,8 +1365,12 @@ out:
850 return err; 1365 return err;
851} 1366}
852 1367
853static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) 1368/* slab sort keys */
1369static int ptr_cmp(void *a, void *b)
854{ 1370{
1371 struct alloc_stat *l = a;
1372 struct alloc_stat *r = b;
1373
855 if (l->ptr < r->ptr) 1374 if (l->ptr < r->ptr)
856 return -1; 1375 return -1;
857 else if (l->ptr > r->ptr) 1376 else if (l->ptr > r->ptr)
@@ -864,8 +1383,11 @@ static struct sort_dimension ptr_sort_dimension = {
864 .cmp = ptr_cmp, 1383 .cmp = ptr_cmp,
865}; 1384};
866 1385
867static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) 1386static int slab_callsite_cmp(void *a, void *b)
868{ 1387{
1388 struct alloc_stat *l = a;
1389 struct alloc_stat *r = b;
1390
869 if (l->call_site < r->call_site) 1391 if (l->call_site < r->call_site)
870 return -1; 1392 return -1;
871 else if (l->call_site > r->call_site) 1393 else if (l->call_site > r->call_site)
@@ -875,11 +1397,14 @@ static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
875 1397
876static struct sort_dimension callsite_sort_dimension = { 1398static struct sort_dimension callsite_sort_dimension = {
877 .name = "callsite", 1399 .name = "callsite",
878 .cmp = callsite_cmp, 1400 .cmp = slab_callsite_cmp,
879}; 1401};
880 1402
881static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) 1403static int hit_cmp(void *a, void *b)
882{ 1404{
1405 struct alloc_stat *l = a;
1406 struct alloc_stat *r = b;
1407
883 if (l->hit < r->hit) 1408 if (l->hit < r->hit)
884 return -1; 1409 return -1;
885 else if (l->hit > r->hit) 1410 else if (l->hit > r->hit)
@@ -892,8 +1417,11 @@ static struct sort_dimension hit_sort_dimension = {
892 .cmp = hit_cmp, 1417 .cmp = hit_cmp,
893}; 1418};
894 1419
895static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) 1420static int bytes_cmp(void *a, void *b)
896{ 1421{
1422 struct alloc_stat *l = a;
1423 struct alloc_stat *r = b;
1424
897 if (l->bytes_alloc < r->bytes_alloc) 1425 if (l->bytes_alloc < r->bytes_alloc)
898 return -1; 1426 return -1;
899 else if (l->bytes_alloc > r->bytes_alloc) 1427 else if (l->bytes_alloc > r->bytes_alloc)
@@ -906,9 +1434,11 @@ static struct sort_dimension bytes_sort_dimension = {
906 .cmp = bytes_cmp, 1434 .cmp = bytes_cmp,
907}; 1435};
908 1436
909static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) 1437static int frag_cmp(void *a, void *b)
910{ 1438{
911 double x, y; 1439 double x, y;
1440 struct alloc_stat *l = a;
1441 struct alloc_stat *r = b;
912 1442
913 x = fragmentation(l->bytes_req, l->bytes_alloc); 1443 x = fragmentation(l->bytes_req, l->bytes_alloc);
914 y = fragmentation(r->bytes_req, r->bytes_alloc); 1444 y = fragmentation(r->bytes_req, r->bytes_alloc);
@@ -925,8 +1455,11 @@ static struct sort_dimension frag_sort_dimension = {
925 .cmp = frag_cmp, 1455 .cmp = frag_cmp,
926}; 1456};
927 1457
928static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r) 1458static int pingpong_cmp(void *a, void *b)
929{ 1459{
1460 struct alloc_stat *l = a;
1461 struct alloc_stat *r = b;
1462
930 if (l->pingpong < r->pingpong) 1463 if (l->pingpong < r->pingpong)
931 return -1; 1464 return -1;
932 else if (l->pingpong > r->pingpong) 1465 else if (l->pingpong > r->pingpong)
@@ -939,7 +1472,135 @@ static struct sort_dimension pingpong_sort_dimension = {
939 .cmp = pingpong_cmp, 1472 .cmp = pingpong_cmp,
940}; 1473};
941 1474
942static struct sort_dimension *avail_sorts[] = { 1475/* page sort keys */
1476static int page_cmp(void *a, void *b)
1477{
1478 struct page_stat *l = a;
1479 struct page_stat *r = b;
1480
1481 if (l->page < r->page)
1482 return -1;
1483 else if (l->page > r->page)
1484 return 1;
1485 return 0;
1486}
1487
1488static struct sort_dimension page_sort_dimension = {
1489 .name = "page",
1490 .cmp = page_cmp,
1491};
1492
1493static int page_callsite_cmp(void *a, void *b)
1494{
1495 struct page_stat *l = a;
1496 struct page_stat *r = b;
1497
1498 if (l->callsite < r->callsite)
1499 return -1;
1500 else if (l->callsite > r->callsite)
1501 return 1;
1502 return 0;
1503}
1504
1505static struct sort_dimension page_callsite_sort_dimension = {
1506 .name = "callsite",
1507 .cmp = page_callsite_cmp,
1508};
1509
1510static int page_hit_cmp(void *a, void *b)
1511{
1512 struct page_stat *l = a;
1513 struct page_stat *r = b;
1514
1515 if (l->nr_alloc < r->nr_alloc)
1516 return -1;
1517 else if (l->nr_alloc > r->nr_alloc)
1518 return 1;
1519 return 0;
1520}
1521
1522static struct sort_dimension page_hit_sort_dimension = {
1523 .name = "hit",
1524 .cmp = page_hit_cmp,
1525};
1526
1527static int page_bytes_cmp(void *a, void *b)
1528{
1529 struct page_stat *l = a;
1530 struct page_stat *r = b;
1531
1532 if (l->alloc_bytes < r->alloc_bytes)
1533 return -1;
1534 else if (l->alloc_bytes > r->alloc_bytes)
1535 return 1;
1536 return 0;
1537}
1538
1539static struct sort_dimension page_bytes_sort_dimension = {
1540 .name = "bytes",
1541 .cmp = page_bytes_cmp,
1542};
1543
1544static int page_order_cmp(void *a, void *b)
1545{
1546 struct page_stat *l = a;
1547 struct page_stat *r = b;
1548
1549 if (l->order < r->order)
1550 return -1;
1551 else if (l->order > r->order)
1552 return 1;
1553 return 0;
1554}
1555
1556static struct sort_dimension page_order_sort_dimension = {
1557 .name = "order",
1558 .cmp = page_order_cmp,
1559};
1560
1561static int migrate_type_cmp(void *a, void *b)
1562{
1563 struct page_stat *l = a;
1564 struct page_stat *r = b;
1565
1566 /* for internal use to find free'd page */
1567 if (l->migrate_type == -1U)
1568 return 0;
1569
1570 if (l->migrate_type < r->migrate_type)
1571 return -1;
1572 else if (l->migrate_type > r->migrate_type)
1573 return 1;
1574 return 0;
1575}
1576
1577static struct sort_dimension migrate_type_sort_dimension = {
1578 .name = "migtype",
1579 .cmp = migrate_type_cmp,
1580};
1581
1582static int gfp_flags_cmp(void *a, void *b)
1583{
1584 struct page_stat *l = a;
1585 struct page_stat *r = b;
1586
1587 /* for internal use to find free'd page */
1588 if (l->gfp_flags == -1U)
1589 return 0;
1590
1591 if (l->gfp_flags < r->gfp_flags)
1592 return -1;
1593 else if (l->gfp_flags > r->gfp_flags)
1594 return 1;
1595 return 0;
1596}
1597
1598static struct sort_dimension gfp_flags_sort_dimension = {
1599 .name = "gfp",
1600 .cmp = gfp_flags_cmp,
1601};
1602
1603static struct sort_dimension *slab_sorts[] = {
943 &ptr_sort_dimension, 1604 &ptr_sort_dimension,
944 &callsite_sort_dimension, 1605 &callsite_sort_dimension,
945 &hit_sort_dimension, 1606 &hit_sort_dimension,
@@ -948,16 +1609,44 @@ static struct sort_dimension *avail_sorts[] = {
948 &pingpong_sort_dimension, 1609 &pingpong_sort_dimension,
949}; 1610};
950 1611
951#define NUM_AVAIL_SORTS ((int)ARRAY_SIZE(avail_sorts)) 1612static struct sort_dimension *page_sorts[] = {
1613 &page_sort_dimension,
1614 &page_callsite_sort_dimension,
1615 &page_hit_sort_dimension,
1616 &page_bytes_sort_dimension,
1617 &page_order_sort_dimension,
1618 &migrate_type_sort_dimension,
1619 &gfp_flags_sort_dimension,
1620};
1621
1622static int slab_sort_dimension__add(const char *tok, struct list_head *list)
1623{
1624 struct sort_dimension *sort;
1625 int i;
1626
1627 for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) {
1628 if (!strcmp(slab_sorts[i]->name, tok)) {
1629 sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i]));
1630 if (!sort) {
1631 pr_err("%s: memdup failed\n", __func__);
1632 return -1;
1633 }
1634 list_add_tail(&sort->list, list);
1635 return 0;
1636 }
1637 }
1638
1639 return -1;
1640}
952 1641
953static int sort_dimension__add(const char *tok, struct list_head *list) 1642static int page_sort_dimension__add(const char *tok, struct list_head *list)
954{ 1643{
955 struct sort_dimension *sort; 1644 struct sort_dimension *sort;
956 int i; 1645 int i;
957 1646
958 for (i = 0; i < NUM_AVAIL_SORTS; i++) { 1647 for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) {
959 if (!strcmp(avail_sorts[i]->name, tok)) { 1648 if (!strcmp(page_sorts[i]->name, tok)) {
960 sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i])); 1649 sort = memdup(page_sorts[i], sizeof(*page_sorts[i]));
961 if (!sort) { 1650 if (!sort) {
962 pr_err("%s: memdup failed\n", __func__); 1651 pr_err("%s: memdup failed\n", __func__);
963 return -1; 1652 return -1;
@@ -970,7 +1659,33 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
970 return -1; 1659 return -1;
971} 1660}
972 1661
973static int setup_sorting(struct list_head *sort_list, const char *arg) 1662static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
1663{
1664 char *tok;
1665 char *str = strdup(arg);
1666 char *pos = str;
1667
1668 if (!str) {
1669 pr_err("%s: strdup failed\n", __func__);
1670 return -1;
1671 }
1672
1673 while (true) {
1674 tok = strsep(&pos, ",");
1675 if (!tok)
1676 break;
1677 if (slab_sort_dimension__add(tok, sort_list) < 0) {
1678 error("Unknown slab --sort key: '%s'", tok);
1679 free(str);
1680 return -1;
1681 }
1682 }
1683
1684 free(str);
1685 return 0;
1686}
1687
1688static int setup_page_sorting(struct list_head *sort_list, const char *arg)
974{ 1689{
975 char *tok; 1690 char *tok;
976 char *str = strdup(arg); 1691 char *str = strdup(arg);
@@ -985,8 +1700,8 @@ static int setup_sorting(struct list_head *sort_list, const char *arg)
985 tok = strsep(&pos, ","); 1700 tok = strsep(&pos, ",");
986 if (!tok) 1701 if (!tok)
987 break; 1702 break;
988 if (sort_dimension__add(tok, sort_list) < 0) { 1703 if (page_sort_dimension__add(tok, sort_list) < 0) {
989 error("Unknown --sort key: '%s'", tok); 1704 error("Unknown page --sort key: '%s'", tok);
990 free(str); 1705 free(str);
991 return -1; 1706 return -1;
992 } 1707 }
@@ -1002,10 +1717,18 @@ static int parse_sort_opt(const struct option *opt __maybe_unused,
1002 if (!arg) 1717 if (!arg)
1003 return -1; 1718 return -1;
1004 1719
1005 if (caller_flag > alloc_flag) 1720 if (kmem_page > kmem_slab ||
1006 return setup_sorting(&caller_sort, arg); 1721 (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) {
1007 else 1722 if (caller_flag > alloc_flag)
1008 return setup_sorting(&alloc_sort, arg); 1723 return setup_page_sorting(&page_caller_sort, arg);
1724 else
1725 return setup_page_sorting(&page_alloc_sort, arg);
1726 } else {
1727 if (caller_flag > alloc_flag)
1728 return setup_slab_sorting(&slab_caller_sort, arg);
1729 else
1730 return setup_slab_sorting(&slab_alloc_sort, arg);
1731 }
1009 1732
1010 return 0; 1733 return 0;
1011} 1734}
@@ -1084,7 +1807,7 @@ static int __cmd_record(int argc, const char **argv)
1084 if (kmem_slab) 1807 if (kmem_slab)
1085 rec_argc += ARRAY_SIZE(slab_events); 1808 rec_argc += ARRAY_SIZE(slab_events);
1086 if (kmem_page) 1809 if (kmem_page)
1087 rec_argc += ARRAY_SIZE(page_events); 1810 rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
1088 1811
1089 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1812 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1090 1813
@@ -1099,6 +1822,8 @@ static int __cmd_record(int argc, const char **argv)
1099 rec_argv[i] = strdup(slab_events[j]); 1822 rec_argv[i] = strdup(slab_events[j]);
1100 } 1823 }
1101 if (kmem_page) { 1824 if (kmem_page) {
1825 rec_argv[i++] = strdup("-g");
1826
1102 for (j = 0; j < ARRAY_SIZE(page_events); j++, i++) 1827 for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
1103 rec_argv[i] = strdup(page_events[j]); 1828 rec_argv[i] = strdup(page_events[j]);
1104 } 1829 }
@@ -1109,9 +1834,26 @@ static int __cmd_record(int argc, const char **argv)
1109 return cmd_record(i, rec_argv, NULL); 1834 return cmd_record(i, rec_argv, NULL);
1110} 1835}
1111 1836
1837static int kmem_config(const char *var, const char *value, void *cb)
1838{
1839 if (!strcmp(var, "kmem.default")) {
1840 if (!strcmp(value, "slab"))
1841 kmem_default = KMEM_SLAB;
1842 else if (!strcmp(value, "page"))
1843 kmem_default = KMEM_PAGE;
1844 else
1845 pr_err("invalid default value ('slab' or 'page' required): %s\n",
1846 value);
1847 return 0;
1848 }
1849
1850 return perf_default_config(var, value, cb);
1851}
1852
1112int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) 1853int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
1113{ 1854{
1114 const char * const default_sort_order = "frag,hit,bytes"; 1855 const char * const default_slab_sort = "frag,hit,bytes";
1856 const char * const default_page_sort = "bytes,hit";
1115 struct perf_data_file file = { 1857 struct perf_data_file file = {
1116 .mode = PERF_DATA_MODE_READ, 1858 .mode = PERF_DATA_MODE_READ,
1117 }; 1859 };
@@ -1124,8 +1866,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
1124 OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL, 1866 OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
1125 "show per-allocation statistics", parse_alloc_opt), 1867 "show per-allocation statistics", parse_alloc_opt),
1126 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", 1868 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
1127 "sort by keys: ptr, call_site, bytes, hit, pingpong, frag", 1869 "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, "
1128 parse_sort_opt), 1870 "page, order, migtype, gfp", parse_sort_opt),
1129 OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), 1871 OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
1130 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), 1872 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
1131 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), 1873 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
@@ -1133,6 +1875,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
1133 parse_slab_opt), 1875 parse_slab_opt),
1134 OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator", 1876 OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
1135 parse_page_opt), 1877 parse_page_opt),
1878 OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
1136 OPT_END() 1879 OPT_END()
1137 }; 1880 };
1138 const char *const kmem_subcommands[] = { "record", "stat", NULL }; 1881 const char *const kmem_subcommands[] = { "record", "stat", NULL };
@@ -1142,15 +1885,21 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
1142 }; 1885 };
1143 struct perf_session *session; 1886 struct perf_session *session;
1144 int ret = -1; 1887 int ret = -1;
1888 const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n";
1145 1889
1890 perf_config(kmem_config, NULL);
1146 argc = parse_options_subcommand(argc, argv, kmem_options, 1891 argc = parse_options_subcommand(argc, argv, kmem_options,
1147 kmem_subcommands, kmem_usage, 0); 1892 kmem_subcommands, kmem_usage, 0);
1148 1893
1149 if (!argc) 1894 if (!argc)
1150 usage_with_options(kmem_usage, kmem_options); 1895 usage_with_options(kmem_usage, kmem_options);
1151 1896
1152 if (kmem_slab == 0 && kmem_page == 0) 1897 if (kmem_slab == 0 && kmem_page == 0) {
1153 kmem_slab = 1; /* for backward compatibility */ 1898 if (kmem_default == KMEM_SLAB)
1899 kmem_slab = 1;
1900 else
1901 kmem_page = 1;
1902 }
1154 1903
1155 if (!strncmp(argv[0], "rec", 3)) { 1904 if (!strncmp(argv[0], "rec", 3)) {
1156 symbol__init(NULL); 1905 symbol__init(NULL);
@@ -1159,19 +1908,30 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
1159 1908
1160 file.path = input_name; 1909 file.path = input_name;
1161 1910
1162 session = perf_session__new(&file, false, &perf_kmem); 1911 kmem_session = session = perf_session__new(&file, false, &perf_kmem);
1163 if (session == NULL) 1912 if (session == NULL)
1164 return -1; 1913 return -1;
1165 1914
1915 if (kmem_slab) {
1916 if (!perf_evlist__find_tracepoint_by_name(session->evlist,
1917 "kmem:kmalloc")) {
1918 pr_err(errmsg, "slab", "slab");
1919 return -1;
1920 }
1921 }
1922
1166 if (kmem_page) { 1923 if (kmem_page) {
1167 struct perf_evsel *evsel = perf_evlist__first(session->evlist); 1924 struct perf_evsel *evsel;
1168 1925
1169 if (evsel == NULL || evsel->tp_format == NULL) { 1926 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
1170 pr_err("invalid event found.. aborting\n"); 1927 "kmem:mm_page_alloc");
1928 if (evsel == NULL) {
1929 pr_err(errmsg, "page", "page");
1171 return -1; 1930 return -1;
1172 } 1931 }
1173 1932
1174 kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent); 1933 kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent);
1934 symbol_conf.use_callchain = true;
1175 } 1935 }
1176 1936
1177 symbol__init(&session->header.env); 1937 symbol__init(&session->header.env);
@@ -1182,11 +1942,21 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
1182 if (cpu__setup_cpunode_map()) 1942 if (cpu__setup_cpunode_map())
1183 goto out_delete; 1943 goto out_delete;
1184 1944
1185 if (list_empty(&caller_sort)) 1945 if (list_empty(&slab_caller_sort))
1186 setup_sorting(&caller_sort, default_sort_order); 1946 setup_slab_sorting(&slab_caller_sort, default_slab_sort);
1187 if (list_empty(&alloc_sort)) 1947 if (list_empty(&slab_alloc_sort))
1188 setup_sorting(&alloc_sort, default_sort_order); 1948 setup_slab_sorting(&slab_alloc_sort, default_slab_sort);
1189 1949 if (list_empty(&page_caller_sort))
1950 setup_page_sorting(&page_caller_sort, default_page_sort);
1951 if (list_empty(&page_alloc_sort))
1952 setup_page_sorting(&page_alloc_sort, default_page_sort);
1953
1954 if (kmem_page) {
1955 setup_page_sorting(&page_alloc_sort_input,
1956 "page,order,migtype,gfp");
1957 setup_page_sorting(&page_caller_sort_input,
1958 "callsite,order,migtype,gfp");
1959 }
1190 ret = __cmd_kmem(session); 1960 ret = __cmd_kmem(session);
1191 } else 1961 } else
1192 usage_with_options(kmem_usage, kmem_options); 1962 usage_with_options(kmem_usage, kmem_options);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 1f9338f6109c..74878cd75078 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -651,6 +651,7 @@ static int process_sample_event(struct perf_tool *tool,
651 struct perf_evsel *evsel, 651 struct perf_evsel *evsel,
652 struct machine *machine) 652 struct machine *machine)
653{ 653{
654 int err = 0;
654 struct thread *thread; 655 struct thread *thread;
655 struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, 656 struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
656 tool); 657 tool);
@@ -666,9 +667,10 @@ static int process_sample_event(struct perf_tool *tool,
666 } 667 }
667 668
668 if (!handle_kvm_event(kvm, thread, evsel, sample)) 669 if (!handle_kvm_event(kvm, thread, evsel, sample))
669 return -1; 670 err = -1;
670 671
671 return 0; 672 thread__put(thread);
673 return err;
672} 674}
673 675
674static int cpu_isa_config(struct perf_kvm_stat *kvm) 676static int cpu_isa_config(struct perf_kvm_stat *kvm)
@@ -1309,6 +1311,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
1309 "show events other than" 1311 "show events other than"
1310 " HLT (x86 only) or Wait state (s390 only)" 1312 " HLT (x86 only) or Wait state (s390 only)"
1311 " that take longer than duration usecs"), 1313 " that take longer than duration usecs"),
1314 OPT_UINTEGER(0, "proc-map-timeout", &kvm->opts.proc_map_timeout,
1315 "per thread proc mmap processing timeout in ms"),
1312 OPT_END() 1316 OPT_END()
1313 }; 1317 };
1314 const char * const live_usage[] = { 1318 const char * const live_usage[] = {
@@ -1336,6 +1340,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
1336 kvm->opts.target.uses_mmap = false; 1340 kvm->opts.target.uses_mmap = false;
1337 kvm->opts.target.uid_str = NULL; 1341 kvm->opts.target.uid_str = NULL;
1338 kvm->opts.target.uid = UINT_MAX; 1342 kvm->opts.target.uid = UINT_MAX;
1343 kvm->opts.proc_map_timeout = 500;
1339 1344
1340 symbol__init(NULL); 1345 symbol__init(NULL);
1341 disable_buildid_cache(); 1346 disable_buildid_cache();
@@ -1391,7 +1396,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
1391 perf_session__set_id_hdr_size(kvm->session); 1396 perf_session__set_id_hdr_size(kvm->session);
1392 ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true); 1397 ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
1393 machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target, 1398 machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
1394 kvm->evlist->threads, false); 1399 kvm->evlist->threads, false, kvm->opts.proc_map_timeout);
1395 err = kvm_live_open_events(kvm); 1400 err = kvm_live_open_events(kvm);
1396 if (err) 1401 if (err)
1397 goto out; 1402 goto out;
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index d49c2ab85fc2..de16aaed516e 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -769,6 +769,7 @@ static void dump_threads(void)
769 t = perf_session__findnew(session, st->tid); 769 t = perf_session__findnew(session, st->tid);
770 pr_info("%10d: %s\n", st->tid, thread__comm_str(t)); 770 pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
771 node = rb_next(node); 771 node = rb_next(node);
772 thread__put(t);
772 }; 773 };
773} 774}
774 775
@@ -810,6 +811,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
810 struct perf_evsel *evsel, 811 struct perf_evsel *evsel,
811 struct machine *machine) 812 struct machine *machine)
812{ 813{
814 int err = 0;
813 struct thread *thread = machine__findnew_thread(machine, sample->pid, 815 struct thread *thread = machine__findnew_thread(machine, sample->pid,
814 sample->tid); 816 sample->tid);
815 817
@@ -821,10 +823,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
821 823
822 if (evsel->handler != NULL) { 824 if (evsel->handler != NULL) {
823 tracepoint_handler f = evsel->handler; 825 tracepoint_handler f = evsel->handler;
824 return f(evsel, sample); 826 err = f(evsel, sample);
825 } 827 }
826 828
827 return 0; 829 thread__put(thread);
830
831 return err;
828} 832}
829 833
830static void sort_result(void) 834static void sort_result(void)
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 675216e08bfc..da2ec06f0742 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -74,7 +74,7 @@ dump_raw_samples(struct perf_tool *tool,
74 } 74 }
75 75
76 if (al.filtered || (mem->hide_unresolved && al.sym == NULL)) 76 if (al.filtered || (mem->hide_unresolved && al.sym == NULL))
77 return 0; 77 goto out_put;
78 78
79 if (al.map != NULL) 79 if (al.map != NULL)
80 al.map->dso->hit = 1; 80 al.map->dso->hit = 1;
@@ -103,7 +103,8 @@ dump_raw_samples(struct perf_tool *tool,
103 symbol_conf.field_sep, 103 symbol_conf.field_sep,
104 al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", 104 al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
105 al.sym ? al.sym->name : "???"); 105 al.sym ? al.sym->name : "???");
106 106out_put:
107 addr_location__put(&al);
107 return 0; 108 return 0;
108} 109}
109 110
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index f7b1af67e9f6..1272559fa22d 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -44,25 +44,19 @@
44 44
45#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*" 45#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
46#define DEFAULT_FUNC_FILTER "!_*" 46#define DEFAULT_FUNC_FILTER "!_*"
47#define DEFAULT_LIST_FILTER "*:*"
47 48
48/* Session management structure */ 49/* Session management structure */
49static struct { 50static struct {
51 int command; /* Command short_name */
50 bool list_events; 52 bool list_events;
51 bool force_add;
52 bool show_lines;
53 bool show_vars;
54 bool show_ext_vars;
55 bool show_funcs;
56 bool mod_events;
57 bool uprobes; 53 bool uprobes;
58 bool quiet; 54 bool quiet;
59 bool target_used; 55 bool target_used;
60 int nevents; 56 int nevents;
61 struct perf_probe_event events[MAX_PROBES]; 57 struct perf_probe_event events[MAX_PROBES];
62 struct strlist *dellist;
63 struct line_range line_range; 58 struct line_range line_range;
64 char *target; 59 char *target;
65 int max_probe_points;
66 struct strfilter *filter; 60 struct strfilter *filter;
67} params; 61} params;
68 62
@@ -93,6 +87,28 @@ static int parse_probe_event(const char *str)
93 return ret; 87 return ret;
94} 88}
95 89
90static int params_add_filter(const char *str)
91{
92 const char *err = NULL;
93 int ret = 0;
94
95 pr_debug2("Add filter: %s\n", str);
96 if (!params.filter) {
97 params.filter = strfilter__new(str, &err);
98 if (!params.filter)
99 ret = err ? -EINVAL : -ENOMEM;
100 } else
101 ret = strfilter__or(params.filter, str, &err);
102
103 if (ret == -EINVAL) {
104 pr_err("Filter parse error at %td.\n", err - str + 1);
105 pr_err("Source: \"%s\"\n", str);
106 pr_err(" %*c\n", (int)(err - str + 1), '^');
107 }
108
109 return ret;
110}
111
96static int set_target(const char *ptr) 112static int set_target(const char *ptr)
97{ 113{
98 int found = 0; 114 int found = 0;
@@ -152,34 +168,11 @@ static int parse_probe_event_argv(int argc, const char **argv)
152 168
153 len += sprintf(&buf[len], "%s ", argv[i]); 169 len += sprintf(&buf[len], "%s ", argv[i]);
154 } 170 }
155 params.mod_events = true;
156 ret = parse_probe_event(buf); 171 ret = parse_probe_event(buf);
157 free(buf); 172 free(buf);
158 return ret; 173 return ret;
159} 174}
160 175
161static int opt_add_probe_event(const struct option *opt __maybe_unused,
162 const char *str, int unset __maybe_unused)
163{
164 if (str) {
165 params.mod_events = true;
166 return parse_probe_event(str);
167 } else
168 return 0;
169}
170
171static int opt_del_probe_event(const struct option *opt __maybe_unused,
172 const char *str, int unset __maybe_unused)
173{
174 if (str) {
175 params.mod_events = true;
176 if (!params.dellist)
177 params.dellist = strlist__new(true, NULL);
178 strlist__add(params.dellist, str);
179 }
180 return 0;
181}
182
183static int opt_set_target(const struct option *opt, const char *str, 176static int opt_set_target(const struct option *opt, const char *str,
184 int unset __maybe_unused) 177 int unset __maybe_unused)
185{ 178{
@@ -217,8 +210,10 @@ static int opt_set_target(const struct option *opt, const char *str,
217 return ret; 210 return ret;
218} 211}
219 212
213/* Command option callbacks */
214
220#ifdef HAVE_DWARF_SUPPORT 215#ifdef HAVE_DWARF_SUPPORT
221static int opt_show_lines(const struct option *opt __maybe_unused, 216static int opt_show_lines(const struct option *opt,
222 const char *str, int unset __maybe_unused) 217 const char *str, int unset __maybe_unused)
223{ 218{
224 int ret = 0; 219 int ret = 0;
@@ -226,19 +221,19 @@ static int opt_show_lines(const struct option *opt __maybe_unused,
226 if (!str) 221 if (!str)
227 return 0; 222 return 0;
228 223
229 if (params.show_lines) { 224 if (params.command == 'L') {
230 pr_warning("Warning: more than one --line options are" 225 pr_warning("Warning: more than one --line options are"
231 " detected. Only the first one is valid.\n"); 226 " detected. Only the first one is valid.\n");
232 return 0; 227 return 0;
233 } 228 }
234 229
235 params.show_lines = true; 230 params.command = opt->short_name;
236 ret = parse_line_range_desc(str, &params.line_range); 231 ret = parse_line_range_desc(str, &params.line_range);
237 232
238 return ret; 233 return ret;
239} 234}
240 235
241static int opt_show_vars(const struct option *opt __maybe_unused, 236static int opt_show_vars(const struct option *opt,
242 const char *str, int unset __maybe_unused) 237 const char *str, int unset __maybe_unused)
243{ 238{
244 struct perf_probe_event *pev = &params.events[params.nevents]; 239 struct perf_probe_event *pev = &params.events[params.nevents];
@@ -252,29 +247,39 @@ static int opt_show_vars(const struct option *opt __maybe_unused,
252 pr_err(" Error: '--vars' doesn't accept arguments.\n"); 247 pr_err(" Error: '--vars' doesn't accept arguments.\n");
253 return -EINVAL; 248 return -EINVAL;
254 } 249 }
255 params.show_vars = true; 250 params.command = opt->short_name;
256 251
257 return ret; 252 return ret;
258} 253}
259#endif 254#endif
255static int opt_add_probe_event(const struct option *opt,
256 const char *str, int unset __maybe_unused)
257{
258 if (str) {
259 params.command = opt->short_name;
260 return parse_probe_event(str);
261 }
262
263 return 0;
264}
265
266static int opt_set_filter_with_command(const struct option *opt,
267 const char *str, int unset)
268{
269 if (!unset)
270 params.command = opt->short_name;
271
272 if (str)
273 return params_add_filter(str);
274
275 return 0;
276}
260 277
261static int opt_set_filter(const struct option *opt __maybe_unused, 278static int opt_set_filter(const struct option *opt __maybe_unused,
262 const char *str, int unset __maybe_unused) 279 const char *str, int unset __maybe_unused)
263{ 280{
264 const char *err; 281 if (str)
265 282 return params_add_filter(str);
266 if (str) {
267 pr_debug2("Set filter: %s\n", str);
268 if (params.filter)
269 strfilter__delete(params.filter);
270 params.filter = strfilter__new(str, &err);
271 if (!params.filter) {
272 pr_err("Filter parse error at %td.\n", err - str + 1);
273 pr_err("Source: \"%s\"\n", str);
274 pr_err(" %*c\n", (int)(err - str + 1), '^');
275 return -EINVAL;
276 }
277 }
278 283
279 return 0; 284 return 0;
280} 285}
@@ -290,8 +295,6 @@ static void cleanup_params(void)
290 295
291 for (i = 0; i < params.nevents; i++) 296 for (i = 0; i < params.nevents; i++)
292 clear_perf_probe_event(params.events + i); 297 clear_perf_probe_event(params.events + i);
293 if (params.dellist)
294 strlist__delete(params.dellist);
295 line_range__clear(&params.line_range); 298 line_range__clear(&params.line_range);
296 free(params.target); 299 free(params.target);
297 if (params.filter) 300 if (params.filter)
@@ -316,22 +319,24 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
316 "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]", 319 "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
317 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", 320 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
318 "perf probe [<options>] --del '[GROUP:]EVENT' ...", 321 "perf probe [<options>] --del '[GROUP:]EVENT' ...",
319 "perf probe --list", 322 "perf probe --list [GROUP:]EVENT ...",
320#ifdef HAVE_DWARF_SUPPORT 323#ifdef HAVE_DWARF_SUPPORT
321 "perf probe [<options>] --line 'LINEDESC'", 324 "perf probe [<options>] --line 'LINEDESC'",
322 "perf probe [<options>] --vars 'PROBEPOINT'", 325 "perf probe [<options>] --vars 'PROBEPOINT'",
323#endif 326#endif
327 "perf probe [<options>] --funcs",
324 NULL 328 NULL
325}; 329 };
326 struct option options[] = { 330 struct option options[] = {
327 OPT_INCR('v', "verbose", &verbose, 331 OPT_INCR('v', "verbose", &verbose,
328 "be more verbose (show parsed arguments, etc)"), 332 "be more verbose (show parsed arguments, etc)"),
329 OPT_BOOLEAN('q', "quiet", &params.quiet, 333 OPT_BOOLEAN('q', "quiet", &params.quiet,
330 "be quiet (do not show any mesages)"), 334 "be quiet (do not show any mesages)"),
331 OPT_BOOLEAN('l', "list", &params.list_events, 335 OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT",
332 "list up current probe events"), 336 "list up probe events",
337 opt_set_filter_with_command, DEFAULT_LIST_FILTER),
333 OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.", 338 OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
334 opt_del_probe_event), 339 opt_set_filter_with_command),
335 OPT_CALLBACK('a', "add", NULL, 340 OPT_CALLBACK('a', "add", NULL,
336#ifdef HAVE_DWARF_SUPPORT 341#ifdef HAVE_DWARF_SUPPORT
337 "[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT" 342 "[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT"
@@ -356,7 +361,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
356 "\t\tARG:\tProbe argument (kprobe-tracer argument format.)\n", 361 "\t\tARG:\tProbe argument (kprobe-tracer argument format.)\n",
357#endif 362#endif
358 opt_add_probe_event), 363 opt_add_probe_event),
359 OPT_BOOLEAN('f', "force", &params.force_add, "forcibly add events" 364 OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events"
360 " with existing name"), 365 " with existing name"),
361#ifdef HAVE_DWARF_SUPPORT 366#ifdef HAVE_DWARF_SUPPORT
362 OPT_CALLBACK('L', "line", NULL, 367 OPT_CALLBACK('L', "line", NULL,
@@ -365,8 +370,10 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
365 OPT_CALLBACK('V', "vars", NULL, 370 OPT_CALLBACK('V', "vars", NULL,
366 "FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT", 371 "FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT",
367 "Show accessible variables on PROBEDEF", opt_show_vars), 372 "Show accessible variables on PROBEDEF", opt_show_vars),
368 OPT_BOOLEAN('\0', "externs", &params.show_ext_vars, 373 OPT_BOOLEAN('\0', "externs", &probe_conf.show_ext_vars,
369 "Show external variables too (with --vars only)"), 374 "Show external variables too (with --vars only)"),
375 OPT_BOOLEAN('\0', "range", &probe_conf.show_location_range,
376 "Show variables location range in scope (with --vars only)"),
370 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 377 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
371 "file", "vmlinux pathname"), 378 "file", "vmlinux pathname"),
372 OPT_STRING('s', "source", &symbol_conf.source_prefix, 379 OPT_STRING('s', "source", &symbol_conf.source_prefix,
@@ -374,12 +381,15 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
374 OPT_CALLBACK('m', "module", NULL, "modname|path", 381 OPT_CALLBACK('m', "module", NULL, "modname|path",
375 "target module name (for online) or path (for offline)", 382 "target module name (for online) or path (for offline)",
376 opt_set_target), 383 opt_set_target),
384 OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
385 "Don't search inlined functions"),
377#endif 386#endif
378 OPT__DRY_RUN(&probe_event_dry_run), 387 OPT__DRY_RUN(&probe_event_dry_run),
379 OPT_INTEGER('\0', "max-probes", &params.max_probe_points, 388 OPT_INTEGER('\0', "max-probes", &probe_conf.max_probes,
380 "Set how many probe points can be found for a probe."), 389 "Set how many probe points can be found for a probe."),
381 OPT_BOOLEAN('F', "funcs", &params.show_funcs, 390 OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
382 "Show potential probe-able functions."), 391 "Show potential probe-able functions.",
392 opt_set_filter_with_command, DEFAULT_FUNC_FILTER),
383 OPT_CALLBACK('\0', "filter", NULL, 393 OPT_CALLBACK('\0', "filter", NULL,
384 "[!]FILTER", "Set a filter (with --vars/funcs only)\n" 394 "[!]FILTER", "Set a filter (with --vars/funcs only)\n"
385 "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n" 395 "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n"
@@ -402,6 +412,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
402 set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE); 412 set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE);
403 set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE); 413 set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE);
404#endif 414#endif
415 set_option_flag(options, 'F', "funcs", PARSE_OPT_EXCLUSIVE);
405 416
406 argc = parse_options(argc, argv, options, probe_usage, 417 argc = parse_options(argc, argv, options, probe_usage,
407 PARSE_OPT_STOP_AT_NON_OPTION); 418 PARSE_OPT_STOP_AT_NON_OPTION);
@@ -410,11 +421,16 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
410 pr_warning(" Error: '-' is not supported.\n"); 421 pr_warning(" Error: '-' is not supported.\n");
411 usage_with_options(probe_usage, options); 422 usage_with_options(probe_usage, options);
412 } 423 }
424 if (params.command && params.command != 'a') {
425 pr_warning(" Error: another command except --add is set.\n");
426 usage_with_options(probe_usage, options);
427 }
413 ret = parse_probe_event_argv(argc, argv); 428 ret = parse_probe_event_argv(argc, argv);
414 if (ret < 0) { 429 if (ret < 0) {
415 pr_err_with_code(" Error: Command Parse Error.", ret); 430 pr_err_with_code(" Error: Command Parse Error.", ret);
416 return ret; 431 return ret;
417 } 432 }
433 params.command = 'a';
418 } 434 }
419 435
420 if (params.quiet) { 436 if (params.quiet) {
@@ -425,89 +441,70 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
425 verbose = -1; 441 verbose = -1;
426 } 442 }
427 443
428 if (params.max_probe_points == 0) 444 if (probe_conf.max_probes == 0)
429 params.max_probe_points = MAX_PROBES; 445 probe_conf.max_probes = MAX_PROBES;
430
431 if ((!params.nevents && !params.dellist && !params.list_events &&
432 !params.show_lines && !params.show_funcs))
433 usage_with_options(probe_usage, options);
434 446
435 /* 447 /*
436 * Only consider the user's kernel image path if given. 448 * Only consider the user's kernel image path if given.
437 */ 449 */
438 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); 450 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
439 451
440 if (params.list_events) { 452 switch (params.command) {
453 case 'l':
441 if (params.uprobes) { 454 if (params.uprobes) {
442 pr_warning(" Error: Don't use --list with --exec.\n"); 455 pr_warning(" Error: Don't use --list with --exec.\n");
443 usage_with_options(probe_usage, options); 456 usage_with_options(probe_usage, options);
444 } 457 }
445 ret = show_perf_probe_events(); 458 ret = show_perf_probe_events(params.filter);
446 if (ret < 0) 459 if (ret < 0)
447 pr_err_with_code(" Error: Failed to show event list.", ret); 460 pr_err_with_code(" Error: Failed to show event list.", ret);
448 return ret; 461 return ret;
449 } 462 case 'F':
450 if (params.show_funcs) {
451 if (!params.filter)
452 params.filter = strfilter__new(DEFAULT_FUNC_FILTER,
453 NULL);
454 ret = show_available_funcs(params.target, params.filter, 463 ret = show_available_funcs(params.target, params.filter,
455 params.uprobes); 464 params.uprobes);
456 strfilter__delete(params.filter);
457 params.filter = NULL;
458 if (ret < 0) 465 if (ret < 0)
459 pr_err_with_code(" Error: Failed to show functions.", ret); 466 pr_err_with_code(" Error: Failed to show functions.", ret);
460 return ret; 467 return ret;
461 }
462
463#ifdef HAVE_DWARF_SUPPORT 468#ifdef HAVE_DWARF_SUPPORT
464 if (params.show_lines) { 469 case 'L':
465 ret = show_line_range(&params.line_range, params.target, 470 ret = show_line_range(&params.line_range, params.target,
466 params.uprobes); 471 params.uprobes);
467 if (ret < 0) 472 if (ret < 0)
468 pr_err_with_code(" Error: Failed to show lines.", ret); 473 pr_err_with_code(" Error: Failed to show lines.", ret);
469 return ret; 474 return ret;
470 } 475 case 'V':
471 if (params.show_vars) {
472 if (!params.filter) 476 if (!params.filter)
473 params.filter = strfilter__new(DEFAULT_VAR_FILTER, 477 params.filter = strfilter__new(DEFAULT_VAR_FILTER,
474 NULL); 478 NULL);
475 479
476 ret = show_available_vars(params.events, params.nevents, 480 ret = show_available_vars(params.events, params.nevents,
477 params.max_probe_points, 481 params.filter);
478 params.target,
479 params.filter,
480 params.show_ext_vars);
481 strfilter__delete(params.filter);
482 params.filter = NULL;
483 if (ret < 0) 482 if (ret < 0)
484 pr_err_with_code(" Error: Failed to show vars.", ret); 483 pr_err_with_code(" Error: Failed to show vars.", ret);
485 return ret; 484 return ret;
486 }
487#endif 485#endif
488 486 case 'd':
489 if (params.dellist) { 487 ret = del_perf_probe_events(params.filter);
490 ret = del_perf_probe_events(params.dellist);
491 if (ret < 0) { 488 if (ret < 0) {
492 pr_err_with_code(" Error: Failed to delete events.", ret); 489 pr_err_with_code(" Error: Failed to delete events.", ret);
493 return ret; 490 return ret;
494 } 491 }
495 } 492 break;
496 493 case 'a':
497 if (params.nevents) {
498 /* Ensure the last given target is used */ 494 /* Ensure the last given target is used */
499 if (params.target && !params.target_used) { 495 if (params.target && !params.target_used) {
500 pr_warning(" Error: -x/-m must follow the probe definitions.\n"); 496 pr_warning(" Error: -x/-m must follow the probe definitions.\n");
501 usage_with_options(probe_usage, options); 497 usage_with_options(probe_usage, options);
502 } 498 }
503 499
504 ret = add_perf_probe_events(params.events, params.nevents, 500 ret = add_perf_probe_events(params.events, params.nevents);
505 params.max_probe_points,
506 params.force_add);
507 if (ret < 0) { 501 if (ret < 0) {
508 pr_err_with_code(" Error: Failed to add events.", ret); 502 pr_err_with_code(" Error: Failed to add events.", ret);
509 return ret; 503 return ret;
510 } 504 }
505 break;
506 default:
507 usage_with_options(probe_usage, options);
511 } 508 }
512 return 0; 509 return 0;
513} 510}
@@ -522,5 +519,5 @@ int cmd_probe(int argc, const char **argv, const char *prefix)
522 cleanup_params(); 519 cleanup_params();
523 } 520 }
524 521
525 return ret; 522 return ret < 0 ? ret : 0;
526} 523}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index c3efdfb630b5..de165a1b9240 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -27,6 +27,8 @@
27#include "util/cpumap.h" 27#include "util/cpumap.h"
28#include "util/thread_map.h" 28#include "util/thread_map.h"
29#include "util/data.h" 29#include "util/data.h"
30#include "util/auxtrace.h"
31#include "util/parse-branch-options.h"
30 32
31#include <unistd.h> 33#include <unistd.h>
32#include <sched.h> 34#include <sched.h>
@@ -38,6 +40,7 @@ struct record {
38 struct record_opts opts; 40 struct record_opts opts;
39 u64 bytes_written; 41 u64 bytes_written;
40 struct perf_data_file file; 42 struct perf_data_file file;
43 struct auxtrace_record *itr;
41 struct perf_evlist *evlist; 44 struct perf_evlist *evlist;
42 struct perf_session *session; 45 struct perf_session *session;
43 const char *progname; 46 const char *progname;
@@ -110,9 +113,12 @@ out:
110 return rc; 113 return rc;
111} 114}
112 115
113static volatile int done = 0; 116static volatile int done;
114static volatile int signr = -1; 117static volatile int signr = -1;
115static volatile int child_finished = 0; 118static volatile int child_finished;
119static volatile int auxtrace_snapshot_enabled;
120static volatile int auxtrace_snapshot_err;
121static volatile int auxtrace_record__snapshot_started;
116 122
117static void sig_handler(int sig) 123static void sig_handler(int sig)
118{ 124{
@@ -133,6 +139,133 @@ static void record__sig_exit(void)
133 raise(signr); 139 raise(signr);
134} 140}
135 141
142#ifdef HAVE_AUXTRACE_SUPPORT
143
144static int record__process_auxtrace(struct perf_tool *tool,
145 union perf_event *event, void *data1,
146 size_t len1, void *data2, size_t len2)
147{
148 struct record *rec = container_of(tool, struct record, tool);
149 struct perf_data_file *file = &rec->file;
150 size_t padding;
151 u8 pad[8] = {0};
152
153 if (!perf_data_file__is_pipe(file)) {
154 off_t file_offset;
155 int fd = perf_data_file__fd(file);
156 int err;
157
158 file_offset = lseek(fd, 0, SEEK_CUR);
159 if (file_offset == -1)
160 return -1;
161 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
162 event, file_offset);
163 if (err)
164 return err;
165 }
166
167 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
168 padding = (len1 + len2) & 7;
169 if (padding)
170 padding = 8 - padding;
171
172 record__write(rec, event, event->header.size);
173 record__write(rec, data1, len1);
174 if (len2)
175 record__write(rec, data2, len2);
176 record__write(rec, &pad, padding);
177
178 return 0;
179}
180
181static int record__auxtrace_mmap_read(struct record *rec,
182 struct auxtrace_mmap *mm)
183{
184 int ret;
185
186 ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
187 record__process_auxtrace);
188 if (ret < 0)
189 return ret;
190
191 if (ret)
192 rec->samples++;
193
194 return 0;
195}
196
197static int record__auxtrace_mmap_read_snapshot(struct record *rec,
198 struct auxtrace_mmap *mm)
199{
200 int ret;
201
202 ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
203 record__process_auxtrace,
204 rec->opts.auxtrace_snapshot_size);
205 if (ret < 0)
206 return ret;
207
208 if (ret)
209 rec->samples++;
210
211 return 0;
212}
213
214static int record__auxtrace_read_snapshot_all(struct record *rec)
215{
216 int i;
217 int rc = 0;
218
219 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
220 struct auxtrace_mmap *mm =
221 &rec->evlist->mmap[i].auxtrace_mmap;
222
223 if (!mm->base)
224 continue;
225
226 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
227 rc = -1;
228 goto out;
229 }
230 }
231out:
232 return rc;
233}
234
235static void record__read_auxtrace_snapshot(struct record *rec)
236{
237 pr_debug("Recording AUX area tracing snapshot\n");
238 if (record__auxtrace_read_snapshot_all(rec) < 0) {
239 auxtrace_snapshot_err = -1;
240 } else {
241 auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
242 if (!auxtrace_snapshot_err)
243 auxtrace_snapshot_enabled = 1;
244 }
245}
246
247#else
248
249static inline
250int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
251 struct auxtrace_mmap *mm __maybe_unused)
252{
253 return 0;
254}
255
256static inline
257void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
258{
259}
260
261static inline
262int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
263{
264 return 0;
265}
266
267#endif
268
136static int record__open(struct record *rec) 269static int record__open(struct record *rec)
137{ 270{
138 char msg[512]; 271 char msg[512];
@@ -169,13 +302,16 @@ try_again:
169 goto out; 302 goto out;
170 } 303 }
171 304
172 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) { 305 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
306 opts->auxtrace_mmap_pages,
307 opts->auxtrace_snapshot_mode) < 0) {
173 if (errno == EPERM) { 308 if (errno == EPERM) {
174 pr_err("Permission error mapping pages.\n" 309 pr_err("Permission error mapping pages.\n"
175 "Consider increasing " 310 "Consider increasing "
176 "/proc/sys/kernel/perf_event_mlock_kb,\n" 311 "/proc/sys/kernel/perf_event_mlock_kb,\n"
177 "or try again with a smaller value of -m/--mmap_pages.\n" 312 "or try again with a smaller value of -m/--mmap_pages.\n"
178 "(current value: %u)\n", opts->mmap_pages); 313 "(current value: %u,%u)\n",
314 opts->mmap_pages, opts->auxtrace_mmap_pages);
179 rc = -errno; 315 rc = -errno;
180 } else { 316 } else {
181 pr_err("failed to mmap with %d (%s)\n", errno, 317 pr_err("failed to mmap with %d (%s)\n", errno,
@@ -209,12 +345,9 @@ static int process_buildids(struct record *rec)
209 struct perf_data_file *file = &rec->file; 345 struct perf_data_file *file = &rec->file;
210 struct perf_session *session = rec->session; 346 struct perf_session *session = rec->session;
211 347
212 u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); 348 if (file->size == 0)
213 if (size == 0)
214 return 0; 349 return 0;
215 350
216 file->size = size;
217
218 /* 351 /*
219 * During this process, it'll load kernel map and replace the 352 * During this process, it'll load kernel map and replace the
220 * dso->long_name to a real pathname it found. In this case 353 * dso->long_name to a real pathname it found. In this case
@@ -270,12 +403,20 @@ static int record__mmap_read_all(struct record *rec)
270 int rc = 0; 403 int rc = 0;
271 404
272 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 405 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
406 struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
407
273 if (rec->evlist->mmap[i].base) { 408 if (rec->evlist->mmap[i].base) {
274 if (record__mmap_read(rec, i) != 0) { 409 if (record__mmap_read(rec, i) != 0) {
275 rc = -1; 410 rc = -1;
276 goto out; 411 goto out;
277 } 412 }
278 } 413 }
414
415 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
416 record__auxtrace_mmap_read(rec, mm) != 0) {
417 rc = -1;
418 goto out;
419 }
279 } 420 }
280 421
281 /* 422 /*
@@ -305,6 +446,9 @@ static void record__init_features(struct record *rec)
305 446
306 if (!rec->opts.branch_stack) 447 if (!rec->opts.branch_stack)
307 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 448 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
449
450 if (!rec->opts.full_auxtrace)
451 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
308} 452}
309 453
310static volatile int workload_exec_errno; 454static volatile int workload_exec_errno;
@@ -323,6 +467,8 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
323 child_finished = 1; 467 child_finished = 1;
324} 468}
325 469
470static void snapshot_sig_handler(int sig);
471
326static int __cmd_record(struct record *rec, int argc, const char **argv) 472static int __cmd_record(struct record *rec, int argc, const char **argv)
327{ 473{
328 int err; 474 int err;
@@ -343,6 +489,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
343 signal(SIGCHLD, sig_handler); 489 signal(SIGCHLD, sig_handler);
344 signal(SIGINT, sig_handler); 490 signal(SIGINT, sig_handler);
345 signal(SIGTERM, sig_handler); 491 signal(SIGTERM, sig_handler);
492 if (rec->opts.auxtrace_snapshot_mode)
493 signal(SIGUSR2, snapshot_sig_handler);
494 else
495 signal(SIGUSR2, SIG_IGN);
346 496
347 session = perf_session__new(file, false, tool); 497 session = perf_session__new(file, false, tool);
348 if (session == NULL) { 498 if (session == NULL) {
@@ -421,6 +571,13 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
421 } 571 }
422 } 572 }
423 573
574 if (rec->opts.full_auxtrace) {
575 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
576 session, process_synthesized_event);
577 if (err)
578 goto out_delete_session;
579 }
580
424 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 581 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
425 machine); 582 machine);
426 if (err < 0) 583 if (err < 0)
@@ -441,7 +598,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
441 } 598 }
442 599
443 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 600 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
444 process_synthesized_event, opts->sample_address); 601 process_synthesized_event, opts->sample_address,
602 opts->proc_map_timeout);
445 if (err != 0) 603 if (err != 0)
446 goto out_child; 604 goto out_child;
447 605
@@ -475,14 +633,27 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
475 perf_evlist__enable(rec->evlist); 633 perf_evlist__enable(rec->evlist);
476 } 634 }
477 635
636 auxtrace_snapshot_enabled = 1;
478 for (;;) { 637 for (;;) {
479 int hits = rec->samples; 638 int hits = rec->samples;
480 639
481 if (record__mmap_read_all(rec) < 0) { 640 if (record__mmap_read_all(rec) < 0) {
641 auxtrace_snapshot_enabled = 0;
482 err = -1; 642 err = -1;
483 goto out_child; 643 goto out_child;
484 } 644 }
485 645
646 if (auxtrace_record__snapshot_started) {
647 auxtrace_record__snapshot_started = 0;
648 if (!auxtrace_snapshot_err)
649 record__read_auxtrace_snapshot(rec);
650 if (auxtrace_snapshot_err) {
651 pr_err("AUX area tracing snapshot failed\n");
652 err = -1;
653 goto out_child;
654 }
655 }
656
486 if (hits == rec->samples) { 657 if (hits == rec->samples) {
487 if (done || draining) 658 if (done || draining)
488 break; 659 break;
@@ -505,10 +676,12 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
505 * disable events in this case. 676 * disable events in this case.
506 */ 677 */
507 if (done && !disabled && !target__none(&opts->target)) { 678 if (done && !disabled && !target__none(&opts->target)) {
679 auxtrace_snapshot_enabled = 0;
508 perf_evlist__disable(rec->evlist); 680 perf_evlist__disable(rec->evlist);
509 disabled = true; 681 disabled = true;
510 } 682 }
511 } 683 }
684 auxtrace_snapshot_enabled = 0;
512 685
513 if (forks && workload_exec_errno) { 686 if (forks && workload_exec_errno) {
514 char msg[STRERR_BUFSIZE]; 687 char msg[STRERR_BUFSIZE];
@@ -544,16 +717,25 @@ out_child:
544 717
545 if (!err && !file->is_pipe) { 718 if (!err && !file->is_pipe) {
546 rec->session->header.data_size += rec->bytes_written; 719 rec->session->header.data_size += rec->bytes_written;
720 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
547 721
548 if (!rec->no_buildid) 722 if (!rec->no_buildid) {
549 process_buildids(rec); 723 process_buildids(rec);
724 /*
725 * We take all buildids when the file contains
726 * AUX area tracing data because we do not decode the
727 * trace because it would take too long.
728 */
729 if (rec->opts.full_auxtrace)
730 dsos__hit_all(rec->session);
731 }
550 perf_session__write_header(rec->session, rec->evlist, fd, true); 732 perf_session__write_header(rec->session, rec->evlist, fd, true);
551 } 733 }
552 734
553 if (!err && !quiet) { 735 if (!err && !quiet) {
554 char samples[128]; 736 char samples[128];
555 737
556 if (rec->samples) 738 if (rec->samples && !rec->opts.full_auxtrace)
557 scnprintf(samples, sizeof(samples), 739 scnprintf(samples, sizeof(samples),
558 " (%" PRIu64 " samples)", rec->samples); 740 " (%" PRIu64 " samples)", rec->samples);
559 else 741 else
@@ -569,94 +751,6 @@ out_delete_session:
569 return status; 751 return status;
570} 752}
571 753
572#define BRANCH_OPT(n, m) \
573 { .name = n, .mode = (m) }
574
575#define BRANCH_END { .name = NULL }
576
577struct branch_mode {
578 const char *name;
579 int mode;
580};
581
582static const struct branch_mode branch_modes[] = {
583 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
584 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
585 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
586 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
587 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
588 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
589 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
590 BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
591 BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
592 BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
593 BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
594 BRANCH_END
595};
596
597static int
598parse_branch_stack(const struct option *opt, const char *str, int unset)
599{
600#define ONLY_PLM \
601 (PERF_SAMPLE_BRANCH_USER |\
602 PERF_SAMPLE_BRANCH_KERNEL |\
603 PERF_SAMPLE_BRANCH_HV)
604
605 uint64_t *mode = (uint64_t *)opt->value;
606 const struct branch_mode *br;
607 char *s, *os = NULL, *p;
608 int ret = -1;
609
610 if (unset)
611 return 0;
612
613 /*
614 * cannot set it twice, -b + --branch-filter for instance
615 */
616 if (*mode)
617 return -1;
618
619 /* str may be NULL in case no arg is passed to -b */
620 if (str) {
621 /* because str is read-only */
622 s = os = strdup(str);
623 if (!s)
624 return -1;
625
626 for (;;) {
627 p = strchr(s, ',');
628 if (p)
629 *p = '\0';
630
631 for (br = branch_modes; br->name; br++) {
632 if (!strcasecmp(s, br->name))
633 break;
634 }
635 if (!br->name) {
636 ui__warning("unknown branch filter %s,"
637 " check man page\n", s);
638 goto error;
639 }
640
641 *mode |= br->mode;
642
643 if (!p)
644 break;
645
646 s = p + 1;
647 }
648 }
649 ret = 0;
650
651 /* default to any branch */
652 if ((*mode & ~ONLY_PLM) == 0) {
653 *mode = PERF_SAMPLE_BRANCH_ANY;
654 }
655error:
656 free(os);
657 return ret;
658}
659
660static void callchain_debug(void) 754static void callchain_debug(void)
661{ 755{
662 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 756 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
@@ -795,6 +889,49 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
795 return -1; 889 return -1;
796} 890}
797 891
892static int record__parse_mmap_pages(const struct option *opt,
893 const char *str,
894 int unset __maybe_unused)
895{
896 struct record_opts *opts = opt->value;
897 char *s, *p;
898 unsigned int mmap_pages;
899 int ret;
900
901 if (!str)
902 return -EINVAL;
903
904 s = strdup(str);
905 if (!s)
906 return -ENOMEM;
907
908 p = strchr(s, ',');
909 if (p)
910 *p = '\0';
911
912 if (*s) {
913 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
914 if (ret)
915 goto out_free;
916 opts->mmap_pages = mmap_pages;
917 }
918
919 if (!p) {
920 ret = 0;
921 goto out_free;
922 }
923
924 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
925 if (ret)
926 goto out_free;
927
928 opts->auxtrace_mmap_pages = mmap_pages;
929
930out_free:
931 free(s);
932 return ret;
933}
934
798static const char * const __record_usage[] = { 935static const char * const __record_usage[] = {
799 "perf record [<options>] [<command>]", 936 "perf record [<options>] [<command>]",
800 "perf record [<options>] -- <command> [<options>]", 937 "perf record [<options>] -- <command> [<options>]",
@@ -823,6 +960,7 @@ static struct record record = {
823 .uses_mmap = true, 960 .uses_mmap = true,
824 .default_per_cpu = true, 961 .default_per_cpu = true,
825 }, 962 },
963 .proc_map_timeout = 500,
826 }, 964 },
827 .tool = { 965 .tool = {
828 .sample = process_sample_event, 966 .sample = process_sample_event,
@@ -875,9 +1013,9 @@ struct option __record_options[] = {
875 &record.opts.no_inherit_set, 1013 &record.opts.no_inherit_set,
876 "child tasks do not inherit counters"), 1014 "child tasks do not inherit counters"),
877 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), 1015 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
878 OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages", 1016 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
879 "number of mmap data pages", 1017 "number of mmap data pages and AUX area tracing mmap pages",
880 perf_evlist__parse_mmap_pages), 1018 record__parse_mmap_pages),
881 OPT_BOOLEAN(0, "group", &record.opts.group, 1019 OPT_BOOLEAN(0, "group", &record.opts.group,
882 "put the counters into a counter group"), 1020 "put the counters into a counter group"),
883 OPT_CALLBACK_NOOPT('g', NULL, &record.opts, 1021 OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
@@ -891,10 +1029,9 @@ struct option __record_options[] = {
891 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 1029 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
892 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1030 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
893 "per thread counts"), 1031 "per thread counts"),
894 OPT_BOOLEAN('d', "data", &record.opts.sample_address, 1032 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
895 "Sample addresses"), 1033 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Record the sample timestamps"),
896 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"), 1034 OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
897 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
898 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1035 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
899 "don't sample"), 1036 "don't sample"),
900 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache, 1037 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
@@ -929,6 +1066,10 @@ struct option __record_options[] = {
929 OPT_CALLBACK('k', "clockid", &record.opts, 1066 OPT_CALLBACK('k', "clockid", &record.opts,
930 "clockid", "clockid to use for events, see clock_gettime()", 1067 "clockid", "clockid to use for events, see clock_gettime()",
931 parse_clockid), 1068 parse_clockid),
1069 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1070 "opts", "AUX area tracing Snapshot Mode", ""),
1071 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1072 "per thread proc mmap processing timeout in ms"),
932 OPT_END() 1073 OPT_END()
933}; 1074};
934 1075
@@ -936,7 +1077,7 @@ struct option *record_options = __record_options;
936 1077
937int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) 1078int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
938{ 1079{
939 int err = -ENOMEM; 1080 int err;
940 struct record *rec = &record; 1081 struct record *rec = &record;
941 char errbuf[BUFSIZ]; 1082 char errbuf[BUFSIZ];
942 1083
@@ -957,6 +1098,19 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
957 usage_with_options(record_usage, record_options); 1098 usage_with_options(record_usage, record_options);
958 } 1099 }
959 1100
1101 if (!rec->itr) {
1102 rec->itr = auxtrace_record__init(rec->evlist, &err);
1103 if (err)
1104 return err;
1105 }
1106
1107 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1108 rec->opts.auxtrace_snapshot_opts);
1109 if (err)
1110 return err;
1111
1112 err = -ENOMEM;
1113
960 symbol__init(NULL); 1114 symbol__init(NULL);
961 1115
962 if (symbol_conf.kptr_restrict) 1116 if (symbol_conf.kptr_restrict)
@@ -1002,6 +1156,10 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1002 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 1156 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1003 usage_with_options(record_usage, record_options); 1157 usage_with_options(record_usage, record_options);
1004 1158
1159 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1160 if (err)
1161 goto out_symbol_exit;
1162
1005 if (record_opts__config(&rec->opts)) { 1163 if (record_opts__config(&rec->opts)) {
1006 err = -EINVAL; 1164 err = -EINVAL;
1007 goto out_symbol_exit; 1165 goto out_symbol_exit;
@@ -1011,5 +1169,15 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1011out_symbol_exit: 1169out_symbol_exit:
1012 perf_evlist__delete(rec->evlist); 1170 perf_evlist__delete(rec->evlist);
1013 symbol__exit(); 1171 symbol__exit();
1172 auxtrace_record__free(rec->itr);
1014 return err; 1173 return err;
1015} 1174}
1175
1176static void snapshot_sig_handler(int sig __maybe_unused)
1177{
1178 if (!auxtrace_snapshot_enabled)
1179 return;
1180 auxtrace_snapshot_enabled = 0;
1181 auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
1182 auxtrace_record__snapshot_started = 1;
1183}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index b63aeda719be..32626ea3e227 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -36,6 +36,8 @@
36#include "util/data.h" 36#include "util/data.h"
37#include "arch/common.h" 37#include "arch/common.h"
38 38
39#include "util/auxtrace.h"
40
39#include <dlfcn.h> 41#include <dlfcn.h>
40#include <linux/bitmap.h> 42#include <linux/bitmap.h>
41 43
@@ -137,10 +139,12 @@ static int process_sample_event(struct perf_tool *tool,
137 struct report *rep = container_of(tool, struct report, tool); 139 struct report *rep = container_of(tool, struct report, tool);
138 struct addr_location al; 140 struct addr_location al;
139 struct hist_entry_iter iter = { 141 struct hist_entry_iter iter = {
140 .hide_unresolved = rep->hide_unresolved, 142 .evsel = evsel,
141 .add_entry_cb = hist_iter__report_callback, 143 .sample = sample,
144 .hide_unresolved = rep->hide_unresolved,
145 .add_entry_cb = hist_iter__report_callback,
142 }; 146 };
143 int ret; 147 int ret = 0;
144 148
145 if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { 149 if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
146 pr_debug("problem processing %d event, skipping it.\n", 150 pr_debug("problem processing %d event, skipping it.\n",
@@ -149,10 +153,10 @@ static int process_sample_event(struct perf_tool *tool,
149 } 153 }
150 154
151 if (rep->hide_unresolved && al.sym == NULL) 155 if (rep->hide_unresolved && al.sym == NULL)
152 return 0; 156 goto out_put;
153 157
154 if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) 158 if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
155 return 0; 159 goto out_put;
156 160
157 if (sort__mode == SORT_MODE__BRANCH) 161 if (sort__mode == SORT_MODE__BRANCH)
158 iter.ops = &hist_iter_branch; 162 iter.ops = &hist_iter_branch;
@@ -166,11 +170,11 @@ static int process_sample_event(struct perf_tool *tool,
166 if (al.map != NULL) 170 if (al.map != NULL)
167 al.map->dso->hit = 1; 171 al.map->dso->hit = 1;
168 172
169 ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack, 173 ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
170 rep);
171 if (ret < 0) 174 if (ret < 0)
172 pr_debug("problem adding hist entry, skipping event\n"); 175 pr_debug("problem adding hist entry, skipping event\n");
173 176out_put:
177 addr_location__put(&al);
174 return ret; 178 return ret;
175} 179}
176 180
@@ -316,6 +320,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
316{ 320{
317 struct perf_evsel *pos; 321 struct perf_evsel *pos;
318 322
323 fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n", evlist->stats.total_lost_samples);
319 evlist__for_each(evlist, pos) { 324 evlist__for_each(evlist, pos) {
320 struct hists *hists = evsel__hists(pos); 325 struct hists *hists = evsel__hists(pos);
321 const char *evname = perf_evsel__name(pos); 326 const char *evname = perf_evsel__name(pos);
@@ -330,15 +335,14 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
330 } 335 }
331 336
332 if (sort_order == NULL && 337 if (sort_order == NULL &&
333 parent_pattern == default_parent_pattern) { 338 parent_pattern == default_parent_pattern)
334 fprintf(stdout, "#\n# (%s)\n#\n", help); 339 fprintf(stdout, "#\n# (%s)\n#\n", help);
335 340
336 if (rep->show_threads) { 341 if (rep->show_threads) {
337 bool style = !strcmp(rep->pretty_printing_style, "raw"); 342 bool style = !strcmp(rep->pretty_printing_style, "raw");
338 perf_read_values_display(stdout, &rep->show_threads_values, 343 perf_read_values_display(stdout, &rep->show_threads_values,
339 style); 344 style);
340 perf_read_values_destroy(&rep->show_threads_values); 345 perf_read_values_destroy(&rep->show_threads_values);
341 }
342 } 346 }
343 347
344 return 0; 348 return 0;
@@ -585,6 +589,7 @@ parse_percent_limit(const struct option *opt, const char *str,
585int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) 589int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
586{ 590{
587 struct perf_session *session; 591 struct perf_session *session;
592 struct itrace_synth_opts itrace_synth_opts = { .set = 0, };
588 struct stat st; 593 struct stat st;
589 bool has_br_stack = false; 594 bool has_br_stack = false;
590 int branch_mode = -1; 595 int branch_mode = -1;
@@ -607,6 +612,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
607 .attr = perf_event__process_attr, 612 .attr = perf_event__process_attr,
608 .tracing_data = perf_event__process_tracing_data, 613 .tracing_data = perf_event__process_tracing_data,
609 .build_id = perf_event__process_build_id, 614 .build_id = perf_event__process_build_id,
615 .id_index = perf_event__process_id_index,
616 .auxtrace_info = perf_event__process_auxtrace_info,
617 .auxtrace = perf_event__process_auxtrace,
610 .ordered_events = true, 618 .ordered_events = true,
611 .ordering_requires_timestamps = true, 619 .ordering_requires_timestamps = true,
612 }, 620 },
@@ -717,6 +725,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
717 "Don't show entries under that percent", parse_percent_limit), 725 "Don't show entries under that percent", parse_percent_limit),
718 OPT_CALLBACK(0, "percentage", NULL, "relative|absolute", 726 OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
719 "how to display percentage of filtered entries", parse_filter_percentage), 727 "how to display percentage of filtered entries", parse_filter_percentage),
728 OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
729 "Instruction Tracing options",
730 itrace_parse_synth_opts),
720 OPT_END() 731 OPT_END()
721 }; 732 };
722 struct perf_data_file file = { 733 struct perf_data_file file = {
@@ -761,6 +772,8 @@ repeat:
761 report.queue_size); 772 report.queue_size);
762 } 773 }
763 774
775 session->itrace_synth_opts = &itrace_synth_opts;
776
764 report.session = session; 777 report.session = session;
765 778
766 has_br_stack = perf_header__has_feat(&session->header, 779 has_br_stack = perf_header__has_feat(&session->header,
@@ -803,8 +816,8 @@ repeat:
803 goto error; 816 goto error;
804 } 817 }
805 818
806 /* Force tty output for header output. */ 819 /* Force tty output for header output and per-thread stat. */
807 if (report.header || report.header_only) 820 if (report.header || report.header_only || report.show_threads)
808 use_browser = 0; 821 use_browser = 0;
809 822
810 if (strcmp(input_name, "-") != 0) 823 if (strcmp(input_name, "-") != 0)
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 5275bab70313..33962612a5e9 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -95,6 +95,7 @@ struct work_atoms {
95 u64 total_lat; 95 u64 total_lat;
96 u64 nb_atoms; 96 u64 nb_atoms;
97 u64 total_runtime; 97 u64 total_runtime;
98 int num_merged;
98}; 99};
99 100
100typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); 101typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
@@ -168,9 +169,10 @@ struct perf_sched {
168 u64 all_runtime; 169 u64 all_runtime;
169 u64 all_count; 170 u64 all_count;
170 u64 cpu_last_switched[MAX_CPUS]; 171 u64 cpu_last_switched[MAX_CPUS];
171 struct rb_root atom_root, sorted_atom_root; 172 struct rb_root atom_root, sorted_atom_root, merged_atom_root;
172 struct list_head sort_list, cmp_pid; 173 struct list_head sort_list, cmp_pid;
173 bool force; 174 bool force;
175 bool skip_merge;
174}; 176};
175 177
176static u64 get_nsecs(void) 178static u64 get_nsecs(void)
@@ -770,7 +772,7 @@ static int replay_fork_event(struct perf_sched *sched,
770 if (child == NULL || parent == NULL) { 772 if (child == NULL || parent == NULL) {
771 pr_debug("thread does not exist on fork event: child %p, parent %p\n", 773 pr_debug("thread does not exist on fork event: child %p, parent %p\n",
772 child, parent); 774 child, parent);
773 return 0; 775 goto out_put;
774 } 776 }
775 777
776 if (verbose) { 778 if (verbose) {
@@ -781,6 +783,9 @@ static int replay_fork_event(struct perf_sched *sched,
781 783
782 register_pid(sched, parent->tid, thread__comm_str(parent)); 784 register_pid(sched, parent->tid, thread__comm_str(parent));
783 register_pid(sched, child->tid, thread__comm_str(child)); 785 register_pid(sched, child->tid, thread__comm_str(child));
786out_put:
787 thread__put(child);
788 thread__put(parent);
784 return 0; 789 return 0;
785} 790}
786 791
@@ -957,7 +962,7 @@ static int latency_switch_event(struct perf_sched *sched,
957 struct work_atoms *out_events, *in_events; 962 struct work_atoms *out_events, *in_events;
958 struct thread *sched_out, *sched_in; 963 struct thread *sched_out, *sched_in;
959 u64 timestamp0, timestamp = sample->time; 964 u64 timestamp0, timestamp = sample->time;
960 int cpu = sample->cpu; 965 int cpu = sample->cpu, err = -1;
961 s64 delta; 966 s64 delta;
962 967
963 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 968 BUG_ON(cpu >= MAX_CPUS || cpu < 0);
@@ -976,15 +981,17 @@ static int latency_switch_event(struct perf_sched *sched,
976 981
977 sched_out = machine__findnew_thread(machine, -1, prev_pid); 982 sched_out = machine__findnew_thread(machine, -1, prev_pid);
978 sched_in = machine__findnew_thread(machine, -1, next_pid); 983 sched_in = machine__findnew_thread(machine, -1, next_pid);
984 if (sched_out == NULL || sched_in == NULL)
985 goto out_put;
979 986
980 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 987 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
981 if (!out_events) { 988 if (!out_events) {
982 if (thread_atoms_insert(sched, sched_out)) 989 if (thread_atoms_insert(sched, sched_out))
983 return -1; 990 goto out_put;
984 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); 991 out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
985 if (!out_events) { 992 if (!out_events) {
986 pr_err("out-event: Internal tree error"); 993 pr_err("out-event: Internal tree error");
987 return -1; 994 goto out_put;
988 } 995 }
989 } 996 }
990 if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp)) 997 if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp))
@@ -993,22 +1000,25 @@ static int latency_switch_event(struct perf_sched *sched,
993 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1000 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
994 if (!in_events) { 1001 if (!in_events) {
995 if (thread_atoms_insert(sched, sched_in)) 1002 if (thread_atoms_insert(sched, sched_in))
996 return -1; 1003 goto out_put;
997 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); 1004 in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
998 if (!in_events) { 1005 if (!in_events) {
999 pr_err("in-event: Internal tree error"); 1006 pr_err("in-event: Internal tree error");
1000 return -1; 1007 goto out_put;
1001 } 1008 }
1002 /* 1009 /*
1003 * Take came in we have not heard about yet, 1010 * Take came in we have not heard about yet,
1004 * add in an initial atom in runnable state: 1011 * add in an initial atom in runnable state:
1005 */ 1012 */
1006 if (add_sched_out_event(in_events, 'R', timestamp)) 1013 if (add_sched_out_event(in_events, 'R', timestamp))
1007 return -1; 1014 goto out_put;
1008 } 1015 }
1009 add_sched_in_event(in_events, timestamp); 1016 add_sched_in_event(in_events, timestamp);
1010 1017 err = 0;
1011 return 0; 1018out_put:
1019 thread__put(sched_out);
1020 thread__put(sched_in);
1021 return err;
1012} 1022}
1013 1023
1014static int latency_runtime_event(struct perf_sched *sched, 1024static int latency_runtime_event(struct perf_sched *sched,
@@ -1021,23 +1031,29 @@ static int latency_runtime_event(struct perf_sched *sched,
1021 struct thread *thread = machine__findnew_thread(machine, -1, pid); 1031 struct thread *thread = machine__findnew_thread(machine, -1, pid);
1022 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1032 struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
1023 u64 timestamp = sample->time; 1033 u64 timestamp = sample->time;
1024 int cpu = sample->cpu; 1034 int cpu = sample->cpu, err = -1;
1035
1036 if (thread == NULL)
1037 return -1;
1025 1038
1026 BUG_ON(cpu >= MAX_CPUS || cpu < 0); 1039 BUG_ON(cpu >= MAX_CPUS || cpu < 0);
1027 if (!atoms) { 1040 if (!atoms) {
1028 if (thread_atoms_insert(sched, thread)) 1041 if (thread_atoms_insert(sched, thread))
1029 return -1; 1042 goto out_put;
1030 atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); 1043 atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
1031 if (!atoms) { 1044 if (!atoms) {
1032 pr_err("in-event: Internal tree error"); 1045 pr_err("in-event: Internal tree error");
1033 return -1; 1046 goto out_put;
1034 } 1047 }
1035 if (add_sched_out_event(atoms, 'R', timestamp)) 1048 if (add_sched_out_event(atoms, 'R', timestamp))
1036 return -1; 1049 goto out_put;
1037 } 1050 }
1038 1051
1039 add_runtime_event(atoms, runtime, timestamp); 1052 add_runtime_event(atoms, runtime, timestamp);
1040 return 0; 1053 err = 0;
1054out_put:
1055 thread__put(thread);
1056 return err;
1041} 1057}
1042 1058
1043static int latency_wakeup_event(struct perf_sched *sched, 1059static int latency_wakeup_event(struct perf_sched *sched,
@@ -1050,19 +1066,22 @@ static int latency_wakeup_event(struct perf_sched *sched,
1050 struct work_atom *atom; 1066 struct work_atom *atom;
1051 struct thread *wakee; 1067 struct thread *wakee;
1052 u64 timestamp = sample->time; 1068 u64 timestamp = sample->time;
1069 int err = -1;
1053 1070
1054 wakee = machine__findnew_thread(machine, -1, pid); 1071 wakee = machine__findnew_thread(machine, -1, pid);
1072 if (wakee == NULL)
1073 return -1;
1055 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1074 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
1056 if (!atoms) { 1075 if (!atoms) {
1057 if (thread_atoms_insert(sched, wakee)) 1076 if (thread_atoms_insert(sched, wakee))
1058 return -1; 1077 goto out_put;
1059 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1078 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
1060 if (!atoms) { 1079 if (!atoms) {
1061 pr_err("wakeup-event: Internal tree error"); 1080 pr_err("wakeup-event: Internal tree error");
1062 return -1; 1081 goto out_put;
1063 } 1082 }
1064 if (add_sched_out_event(atoms, 'S', timestamp)) 1083 if (add_sched_out_event(atoms, 'S', timestamp))
1065 return -1; 1084 goto out_put;
1066 } 1085 }
1067 1086
1068 BUG_ON(list_empty(&atoms->work_list)); 1087 BUG_ON(list_empty(&atoms->work_list));
@@ -1081,17 +1100,21 @@ static int latency_wakeup_event(struct perf_sched *sched,
1081 * skip in this case. 1100 * skip in this case.
1082 */ 1101 */
1083 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1102 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
1084 return 0; 1103 goto out_ok;
1085 1104
1086 sched->nr_timestamps++; 1105 sched->nr_timestamps++;
1087 if (atom->sched_out_time > timestamp) { 1106 if (atom->sched_out_time > timestamp) {
1088 sched->nr_unordered_timestamps++; 1107 sched->nr_unordered_timestamps++;
1089 return 0; 1108 goto out_ok;
1090 } 1109 }
1091 1110
1092 atom->state = THREAD_WAIT_CPU; 1111 atom->state = THREAD_WAIT_CPU;
1093 atom->wake_up_time = timestamp; 1112 atom->wake_up_time = timestamp;
1094 return 0; 1113out_ok:
1114 err = 0;
1115out_put:
1116 thread__put(wakee);
1117 return err;
1095} 1118}
1096 1119
1097static int latency_migrate_task_event(struct perf_sched *sched, 1120static int latency_migrate_task_event(struct perf_sched *sched,
@@ -1104,6 +1127,7 @@ static int latency_migrate_task_event(struct perf_sched *sched,
1104 struct work_atoms *atoms; 1127 struct work_atoms *atoms;
1105 struct work_atom *atom; 1128 struct work_atom *atom;
1106 struct thread *migrant; 1129 struct thread *migrant;
1130 int err = -1;
1107 1131
1108 /* 1132 /*
1109 * Only need to worry about migration when profiling one CPU. 1133 * Only need to worry about migration when profiling one CPU.
@@ -1112,18 +1136,20 @@ static int latency_migrate_task_event(struct perf_sched *sched,
1112 return 0; 1136 return 0;
1113 1137
1114 migrant = machine__findnew_thread(machine, -1, pid); 1138 migrant = machine__findnew_thread(machine, -1, pid);
1139 if (migrant == NULL)
1140 return -1;
1115 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1141 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
1116 if (!atoms) { 1142 if (!atoms) {
1117 if (thread_atoms_insert(sched, migrant)) 1143 if (thread_atoms_insert(sched, migrant))
1118 return -1; 1144 goto out_put;
1119 register_pid(sched, migrant->tid, thread__comm_str(migrant)); 1145 register_pid(sched, migrant->tid, thread__comm_str(migrant));
1120 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); 1146 atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
1121 if (!atoms) { 1147 if (!atoms) {
1122 pr_err("migration-event: Internal tree error"); 1148 pr_err("migration-event: Internal tree error");
1123 return -1; 1149 goto out_put;
1124 } 1150 }
1125 if (add_sched_out_event(atoms, 'R', timestamp)) 1151 if (add_sched_out_event(atoms, 'R', timestamp))
1126 return -1; 1152 goto out_put;
1127 } 1153 }
1128 1154
1129 BUG_ON(list_empty(&atoms->work_list)); 1155 BUG_ON(list_empty(&atoms->work_list));
@@ -1135,8 +1161,10 @@ static int latency_migrate_task_event(struct perf_sched *sched,
1135 1161
1136 if (atom->sched_out_time > timestamp) 1162 if (atom->sched_out_time > timestamp)
1137 sched->nr_unordered_timestamps++; 1163 sched->nr_unordered_timestamps++;
1138 1164 err = 0;
1139 return 0; 1165out_put:
1166 thread__put(migrant);
1167 return err;
1140} 1168}
1141 1169
1142static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) 1170static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list)
@@ -1156,7 +1184,10 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
1156 sched->all_runtime += work_list->total_runtime; 1184 sched->all_runtime += work_list->total_runtime;
1157 sched->all_count += work_list->nb_atoms; 1185 sched->all_count += work_list->nb_atoms;
1158 1186
1159 ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid); 1187 if (work_list->num_merged > 1)
1188 ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged);
1189 else
1190 ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
1160 1191
1161 for (i = 0; i < 24 - ret; i++) 1192 for (i = 0; i < 24 - ret; i++)
1162 printf(" "); 1193 printf(" ");
@@ -1276,17 +1307,22 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
1276static void perf_sched__sort_lat(struct perf_sched *sched) 1307static void perf_sched__sort_lat(struct perf_sched *sched)
1277{ 1308{
1278 struct rb_node *node; 1309 struct rb_node *node;
1279 1310 struct rb_root *root = &sched->atom_root;
1311again:
1280 for (;;) { 1312 for (;;) {
1281 struct work_atoms *data; 1313 struct work_atoms *data;
1282 node = rb_first(&sched->atom_root); 1314 node = rb_first(root);
1283 if (!node) 1315 if (!node)
1284 break; 1316 break;
1285 1317
1286 rb_erase(node, &sched->atom_root); 1318 rb_erase(node, root);
1287 data = rb_entry(node, struct work_atoms, node); 1319 data = rb_entry(node, struct work_atoms, node);
1288 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); 1320 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
1289 } 1321 }
1322 if (root == &sched->atom_root) {
1323 root = &sched->merged_atom_root;
1324 goto again;
1325 }
1290} 1326}
1291 1327
1292static int process_sched_wakeup_event(struct perf_tool *tool, 1328static int process_sched_wakeup_event(struct perf_tool *tool,
@@ -1330,8 +1366,10 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
1330 } 1366 }
1331 1367
1332 sched_in = machine__findnew_thread(machine, -1, next_pid); 1368 sched_in = machine__findnew_thread(machine, -1, next_pid);
1369 if (sched_in == NULL)
1370 return -1;
1333 1371
1334 sched->curr_thread[this_cpu] = sched_in; 1372 sched->curr_thread[this_cpu] = thread__get(sched_in);
1335 1373
1336 printf(" "); 1374 printf(" ");
1337 1375
@@ -1381,6 +1419,8 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
1381 printf("\n"); 1419 printf("\n");
1382 } 1420 }
1383 1421
1422 thread__put(sched_in);
1423
1384 return 0; 1424 return 0;
1385} 1425}
1386 1426
@@ -1542,6 +1582,59 @@ static void print_bad_events(struct perf_sched *sched)
1542 } 1582 }
1543} 1583}
1544 1584
1585static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
1586{
1587 struct rb_node **new = &(root->rb_node), *parent = NULL;
1588 struct work_atoms *this;
1589 const char *comm = thread__comm_str(data->thread), *this_comm;
1590
1591 while (*new) {
1592 int cmp;
1593
1594 this = container_of(*new, struct work_atoms, node);
1595 parent = *new;
1596
1597 this_comm = thread__comm_str(this->thread);
1598 cmp = strcmp(comm, this_comm);
1599 if (cmp > 0) {
1600 new = &((*new)->rb_left);
1601 } else if (cmp < 0) {
1602 new = &((*new)->rb_right);
1603 } else {
1604 this->num_merged++;
1605 this->total_runtime += data->total_runtime;
1606 this->nb_atoms += data->nb_atoms;
1607 this->total_lat += data->total_lat;
1608 list_splice(&data->work_list, &this->work_list);
1609 if (this->max_lat < data->max_lat) {
1610 this->max_lat = data->max_lat;
1611 this->max_lat_at = data->max_lat_at;
1612 }
1613 zfree(&data);
1614 return;
1615 }
1616 }
1617
1618 data->num_merged++;
1619 rb_link_node(&data->node, parent, new);
1620 rb_insert_color(&data->node, root);
1621}
1622
1623static void perf_sched__merge_lat(struct perf_sched *sched)
1624{
1625 struct work_atoms *data;
1626 struct rb_node *node;
1627
1628 if (sched->skip_merge)
1629 return;
1630
1631 while ((node = rb_first(&sched->atom_root))) {
1632 rb_erase(node, &sched->atom_root);
1633 data = rb_entry(node, struct work_atoms, node);
1634 __merge_work_atoms(&sched->merged_atom_root, data);
1635 }
1636}
1637
1545static int perf_sched__lat(struct perf_sched *sched) 1638static int perf_sched__lat(struct perf_sched *sched)
1546{ 1639{
1547 struct rb_node *next; 1640 struct rb_node *next;
@@ -1551,6 +1644,7 @@ static int perf_sched__lat(struct perf_sched *sched)
1551 if (perf_sched__read_events(sched)) 1644 if (perf_sched__read_events(sched))
1552 return -1; 1645 return -1;
1553 1646
1647 perf_sched__merge_lat(sched);
1554 perf_sched__sort_lat(sched); 1648 perf_sched__sort_lat(sched);
1555 1649
1556 printf("\n -----------------------------------------------------------------------------------------------------------------\n"); 1650 printf("\n -----------------------------------------------------------------------------------------------------------------\n");
@@ -1702,6 +1796,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
1702 .profile_cpu = -1, 1796 .profile_cpu = -1,
1703 .next_shortname1 = 'A', 1797 .next_shortname1 = 'A',
1704 .next_shortname2 = '0', 1798 .next_shortname2 = '0',
1799 .skip_merge = 0,
1705 }; 1800 };
1706 const struct option latency_options[] = { 1801 const struct option latency_options[] = {
1707 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", 1802 OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
@@ -1712,6 +1807,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
1712 "CPU to profile on"), 1807 "CPU to profile on"),
1713 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1808 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1714 "dump raw trace in ASCII"), 1809 "dump raw trace in ASCII"),
1810 OPT_BOOLEAN('p', "pids", &sched.skip_merge,
1811 "latency stats per pid instead of per comm"),
1715 OPT_END() 1812 OPT_END()
1716 }; 1813 };
1717 const struct option replay_options[] = { 1814 const struct option replay_options[] = {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 58f10b8e6ff2..24809787369f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -16,6 +16,7 @@
16#include "util/evsel.h" 16#include "util/evsel.h"
17#include "util/sort.h" 17#include "util/sort.h"
18#include "util/data.h" 18#include "util/data.h"
19#include "util/auxtrace.h"
19#include <linux/bitmap.h> 20#include <linux/bitmap.h>
20 21
21static char const *script_name; 22static char const *script_name;
@@ -26,6 +27,7 @@ static u64 nr_unordered;
26static bool no_callchain; 27static bool no_callchain;
27static bool latency_format; 28static bool latency_format;
28static bool system_wide; 29static bool system_wide;
30static bool print_flags;
29static const char *cpu_list; 31static const char *cpu_list;
30static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 32static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
31 33
@@ -146,9 +148,10 @@ static const char *output_field2str(enum perf_output_field field)
146 148
147#define PRINT_FIELD(x) (output[attr->type].fields & PERF_OUTPUT_##x) 149#define PRINT_FIELD(x) (output[attr->type].fields & PERF_OUTPUT_##x)
148 150
149static int perf_evsel__check_stype(struct perf_evsel *evsel, 151static int perf_evsel__do_check_stype(struct perf_evsel *evsel,
150 u64 sample_type, const char *sample_msg, 152 u64 sample_type, const char *sample_msg,
151 enum perf_output_field field) 153 enum perf_output_field field,
154 bool allow_user_set)
152{ 155{
153 struct perf_event_attr *attr = &evsel->attr; 156 struct perf_event_attr *attr = &evsel->attr;
154 int type = attr->type; 157 int type = attr->type;
@@ -158,6 +161,8 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel,
158 return 0; 161 return 0;
159 162
160 if (output[type].user_set) { 163 if (output[type].user_set) {
164 if (allow_user_set)
165 return 0;
161 evname = perf_evsel__name(evsel); 166 evname = perf_evsel__name(evsel);
162 pr_err("Samples for '%s' event do not have %s attribute set. " 167 pr_err("Samples for '%s' event do not have %s attribute set. "
163 "Cannot print '%s' field.\n", 168 "Cannot print '%s' field.\n",
@@ -175,10 +180,22 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel,
175 return 0; 180 return 0;
176} 181}
177 182
183static int perf_evsel__check_stype(struct perf_evsel *evsel,
184 u64 sample_type, const char *sample_msg,
185 enum perf_output_field field)
186{
187 return perf_evsel__do_check_stype(evsel, sample_type, sample_msg, field,
188 false);
189}
190
178static int perf_evsel__check_attr(struct perf_evsel *evsel, 191static int perf_evsel__check_attr(struct perf_evsel *evsel,
179 struct perf_session *session) 192 struct perf_session *session)
180{ 193{
181 struct perf_event_attr *attr = &evsel->attr; 194 struct perf_event_attr *attr = &evsel->attr;
195 bool allow_user_set;
196
197 allow_user_set = perf_header__has_feat(&session->header,
198 HEADER_AUXTRACE);
182 199
183 if (PRINT_FIELD(TRACE) && 200 if (PRINT_FIELD(TRACE) &&
184 !perf_session__has_traces(session, "record -R")) 201 !perf_session__has_traces(session, "record -R"))
@@ -191,8 +208,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
191 } 208 }
192 209
193 if (PRINT_FIELD(ADDR) && 210 if (PRINT_FIELD(ADDR) &&
194 perf_evsel__check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR", 211 perf_evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
195 PERF_OUTPUT_ADDR)) 212 PERF_OUTPUT_ADDR, allow_user_set))
196 return -EINVAL; 213 return -EINVAL;
197 214
198 if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { 215 if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
@@ -229,8 +246,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
229 return -EINVAL; 246 return -EINVAL;
230 247
231 if (PRINT_FIELD(CPU) && 248 if (PRINT_FIELD(CPU) &&
232 perf_evsel__check_stype(evsel, PERF_SAMPLE_CPU, "CPU", 249 perf_evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
233 PERF_OUTPUT_CPU)) 250 PERF_OUTPUT_CPU, allow_user_set))
234 return -EINVAL; 251 return -EINVAL;
235 252
236 if (PRINT_FIELD(PERIOD) && 253 if (PRINT_FIELD(PERIOD) &&
@@ -445,6 +462,25 @@ static void print_sample_bts(union perf_event *event,
445 printf("\n"); 462 printf("\n");
446} 463}
447 464
465static void print_sample_flags(u32 flags)
466{
467 const char *chars = PERF_IP_FLAG_CHARS;
468 const int n = strlen(PERF_IP_FLAG_CHARS);
469 char str[33];
470 int i, pos = 0;
471
472 for (i = 0; i < n; i++, flags >>= 1) {
473 if (flags & 1)
474 str[pos++] = chars[i];
475 }
476 for (; i < 32; i++, flags >>= 1) {
477 if (flags & 1)
478 str[pos++] = '?';
479 }
480 str[pos] = 0;
481 printf(" %-4s ", str);
482}
483
448static void process_event(union perf_event *event, struct perf_sample *sample, 484static void process_event(union perf_event *event, struct perf_sample *sample,
449 struct perf_evsel *evsel, struct addr_location *al) 485 struct perf_evsel *evsel, struct addr_location *al)
450{ 486{
@@ -464,6 +500,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
464 printf("%s: ", evname ? evname : "[unknown]"); 500 printf("%s: ", evname ? evname : "[unknown]");
465 } 501 }
466 502
503 if (print_flags)
504 print_sample_flags(sample->flags);
505
467 if (is_bts_event(attr)) { 506 if (is_bts_event(attr)) {
468 print_sample_bts(event, sample, evsel, thread, al); 507 print_sample_bts(event, sample, evsel, thread, al);
469 return; 508 return;
@@ -568,13 +607,14 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
568 } 607 }
569 608
570 if (al.filtered) 609 if (al.filtered)
571 return 0; 610 goto out_put;
572 611
573 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) 612 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
574 return 0; 613 goto out_put;
575 614
576 scripting_ops->process_event(event, sample, evsel, &al); 615 scripting_ops->process_event(event, sample, evsel, &al);
577 616out_put:
617 addr_location__put(&al);
578 return 0; 618 return 0;
579} 619}
580 620
@@ -642,8 +682,8 @@ static int process_comm_event(struct perf_tool *tool,
642 print_sample_start(sample, thread, evsel); 682 print_sample_start(sample, thread, evsel);
643 perf_event__fprintf(event, stdout); 683 perf_event__fprintf(event, stdout);
644 ret = 0; 684 ret = 0;
645
646out: 685out:
686 thread__put(thread);
647 return ret; 687 return ret;
648} 688}
649 689
@@ -674,6 +714,7 @@ static int process_fork_event(struct perf_tool *tool,
674 } 714 }
675 print_sample_start(sample, thread, evsel); 715 print_sample_start(sample, thread, evsel);
676 perf_event__fprintf(event, stdout); 716 perf_event__fprintf(event, stdout);
717 thread__put(thread);
677 718
678 return 0; 719 return 0;
679} 720}
@@ -682,6 +723,7 @@ static int process_exit_event(struct perf_tool *tool,
682 struct perf_sample *sample, 723 struct perf_sample *sample,
683 struct machine *machine) 724 struct machine *machine)
684{ 725{
726 int err = 0;
685 struct thread *thread; 727 struct thread *thread;
686 struct perf_script *script = container_of(tool, struct perf_script, tool); 728 struct perf_script *script = container_of(tool, struct perf_script, tool);
687 struct perf_session *session = script->session; 729 struct perf_session *session = script->session;
@@ -703,9 +745,10 @@ static int process_exit_event(struct perf_tool *tool,
703 perf_event__fprintf(event, stdout); 745 perf_event__fprintf(event, stdout);
704 746
705 if (perf_event__process_exit(tool, event, sample, machine) < 0) 747 if (perf_event__process_exit(tool, event, sample, machine) < 0)
706 return -1; 748 err = -1;
707 749
708 return 0; 750 thread__put(thread);
751 return err;
709} 752}
710 753
711static int process_mmap_event(struct perf_tool *tool, 754static int process_mmap_event(struct perf_tool *tool,
@@ -735,7 +778,7 @@ static int process_mmap_event(struct perf_tool *tool,
735 } 778 }
736 print_sample_start(sample, thread, evsel); 779 print_sample_start(sample, thread, evsel);
737 perf_event__fprintf(event, stdout); 780 perf_event__fprintf(event, stdout);
738 781 thread__put(thread);
739 return 0; 782 return 0;
740} 783}
741 784
@@ -766,7 +809,7 @@ static int process_mmap2_event(struct perf_tool *tool,
766 } 809 }
767 print_sample_start(sample, thread, evsel); 810 print_sample_start(sample, thread, evsel);
768 perf_event__fprintf(event, stdout); 811 perf_event__fprintf(event, stdout);
769 812 thread__put(thread);
770 return 0; 813 return 0;
771} 814}
772 815
@@ -999,12 +1042,15 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
999 } 1042 }
1000 } 1043 }
1001 1044
1002 tok = strtok(tok, ","); 1045 for (tok = strtok(tok, ","); tok; tok = strtok(NULL, ",")) {
1003 while (tok) {
1004 for (i = 0; i < imax; ++i) { 1046 for (i = 0; i < imax; ++i) {
1005 if (strcmp(tok, all_output_options[i].str) == 0) 1047 if (strcmp(tok, all_output_options[i].str) == 0)
1006 break; 1048 break;
1007 } 1049 }
1050 if (i == imax && strcmp(tok, "flags") == 0) {
1051 print_flags = true;
1052 continue;
1053 }
1008 if (i == imax) { 1054 if (i == imax) {
1009 fprintf(stderr, "Invalid field requested.\n"); 1055 fprintf(stderr, "Invalid field requested.\n");
1010 rc = -EINVAL; 1056 rc = -EINVAL;
@@ -1032,8 +1078,6 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
1032 } 1078 }
1033 output[type].fields |= all_output_options[i].field; 1079 output[type].fields |= all_output_options[i].field;
1034 } 1080 }
1035
1036 tok = strtok(NULL, ",");
1037 } 1081 }
1038 1082
1039 if (type >= 0) { 1083 if (type >= 0) {
@@ -1497,6 +1541,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
1497 char *rec_script_path = NULL; 1541 char *rec_script_path = NULL;
1498 char *rep_script_path = NULL; 1542 char *rep_script_path = NULL;
1499 struct perf_session *session; 1543 struct perf_session *session;
1544 struct itrace_synth_opts itrace_synth_opts = { .set = false, };
1500 char *script_path = NULL; 1545 char *script_path = NULL;
1501 const char **__argv; 1546 const char **__argv;
1502 int i, j, err = 0; 1547 int i, j, err = 0;
@@ -1511,6 +1556,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
1511 .attr = process_attr, 1556 .attr = process_attr,
1512 .tracing_data = perf_event__process_tracing_data, 1557 .tracing_data = perf_event__process_tracing_data,
1513 .build_id = perf_event__process_build_id, 1558 .build_id = perf_event__process_build_id,
1559 .id_index = perf_event__process_id_index,
1560 .auxtrace_info = perf_event__process_auxtrace_info,
1561 .auxtrace = perf_event__process_auxtrace,
1562 .auxtrace_error = perf_event__process_auxtrace_error,
1514 .ordered_events = true, 1563 .ordered_events = true,
1515 .ordering_requires_timestamps = true, 1564 .ordering_requires_timestamps = true,
1516 }, 1565 },
@@ -1549,7 +1598,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
1549 "comma separated output fields prepend with 'type:'. " 1598 "comma separated output fields prepend with 'type:'. "
1550 "Valid types: hw,sw,trace,raw. " 1599 "Valid types: hw,sw,trace,raw. "
1551 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," 1600 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
1552 "addr,symoff,period", parse_output_fields), 1601 "addr,symoff,period,flags", parse_output_fields),
1553 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1602 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1554 "system-wide collection from all CPUs"), 1603 "system-wide collection from all CPUs"),
1555 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", 1604 OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
@@ -1570,6 +1619,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
1570 OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events, 1619 OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events,
1571 "Show the mmap events"), 1620 "Show the mmap events"),
1572 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), 1621 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
1622 OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
1623 "Instruction Tracing options",
1624 itrace_parse_synth_opts),
1573 OPT_END() 1625 OPT_END()
1574 }; 1626 };
1575 const char * const script_subcommands[] = { "record", "report", NULL }; 1627 const char * const script_subcommands[] = { "record", "report", NULL };
@@ -1765,6 +1817,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
1765 1817
1766 script.session = session; 1818 script.session = session;
1767 1819
1820 session->itrace_synth_opts = &itrace_synth_opts;
1821
1768 if (cpu_list) { 1822 if (cpu_list) {
1769 err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); 1823 err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
1770 if (err < 0) 1824 if (err < 0)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f7b8218785f6..fcf99bdeb19e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -73,8 +73,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix);
73static void print_aggr(char *prefix); 73static void print_aggr(char *prefix);
74 74
75/* Default events used for perf stat -T */ 75/* Default events used for perf stat -T */
76static const char * const transaction_attrs[] = { 76static const char *transaction_attrs = {
77 "task-clock", 77 "task-clock,"
78 "{" 78 "{"
79 "instructions," 79 "instructions,"
80 "cycles," 80 "cycles,"
@@ -86,8 +86,8 @@ static const char * const transaction_attrs[] = {
86}; 86};
87 87
88/* More limited version when the CPU does not have all events. */ 88/* More limited version when the CPU does not have all events. */
89static const char * const transaction_limited_attrs[] = { 89static const char * transaction_limited_attrs = {
90 "task-clock", 90 "task-clock,"
91 "{" 91 "{"
92 "instructions," 92 "instructions,"
93 "cycles," 93 "cycles,"
@@ -96,30 +96,12 @@ static const char * const transaction_limited_attrs[] = {
96 "}" 96 "}"
97}; 97};
98 98
99/* must match transaction_attrs and the beginning limited_attrs */
100enum {
101 T_TASK_CLOCK,
102 T_INSTRUCTIONS,
103 T_CYCLES,
104 T_CYCLES_IN_TX,
105 T_TRANSACTION_START,
106 T_ELISION_START,
107 T_CYCLES_IN_TX_CP,
108};
109
110static struct perf_evlist *evsel_list; 99static struct perf_evlist *evsel_list;
111 100
112static struct target target = { 101static struct target target = {
113 .uid = UINT_MAX, 102 .uid = UINT_MAX,
114}; 103};
115 104
116enum aggr_mode {
117 AGGR_NONE,
118 AGGR_GLOBAL,
119 AGGR_SOCKET,
120 AGGR_CORE,
121};
122
123static int run_count = 1; 105static int run_count = 1;
124static bool no_inherit = false; 106static bool no_inherit = false;
125static bool scale = true; 107static bool scale = true;
@@ -147,10 +129,6 @@ static int (*aggr_get_id)(struct cpu_map *m, int cpu);
147 129
148static volatile int done = 0; 130static volatile int done = 0;
149 131
150struct perf_stat {
151 struct stats res_stats[3];
152};
153
154static inline void diff_timespec(struct timespec *r, struct timespec *a, 132static inline void diff_timespec(struct timespec *r, struct timespec *a,
155 struct timespec *b) 133 struct timespec *b)
156{ 134{
@@ -180,6 +158,8 @@ static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
180 158
181 for (i = 0; i < 3; i++) 159 for (i = 0; i < 3; i++)
182 init_stats(&ps->res_stats[i]); 160 init_stats(&ps->res_stats[i]);
161
162 perf_stat_evsel_id_init(evsel);
183} 163}
184 164
185static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) 165static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
@@ -198,24 +178,19 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
198 178
199static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel) 179static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
200{ 180{
201 void *addr; 181 struct perf_counts *counts;
202 size_t sz;
203 182
204 sz = sizeof(*evsel->counts) + 183 counts = perf_counts__new(perf_evsel__nr_cpus(evsel));
205 (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values)); 184 if (counts)
185 evsel->prev_raw_counts = counts;
206 186
207 addr = zalloc(sz); 187 return counts ? 0 : -ENOMEM;
208 if (!addr)
209 return -ENOMEM;
210
211 evsel->prev_raw_counts = addr;
212
213 return 0;
214} 188}
215 189
216static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) 190static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
217{ 191{
218 zfree(&evsel->prev_raw_counts); 192 perf_counts__delete(evsel->prev_raw_counts);
193 evsel->prev_raw_counts = NULL;
219} 194}
220 195
221static void perf_evlist__free_stats(struct perf_evlist *evlist) 196static void perf_evlist__free_stats(struct perf_evlist *evlist)
@@ -247,22 +222,6 @@ out_free:
247 return -1; 222 return -1;
248} 223}
249 224
250static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
251static struct stats runtime_cycles_stats[MAX_NR_CPUS];
252static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
253static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
254static struct stats runtime_branches_stats[MAX_NR_CPUS];
255static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
256static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
257static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
258static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
259static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
260static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
261static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
262static struct stats walltime_nsecs_stats;
263static struct stats runtime_transaction_stats[MAX_NR_CPUS];
264static struct stats runtime_elision_stats[MAX_NR_CPUS];
265
266static void perf_stat__reset_stats(struct perf_evlist *evlist) 225static void perf_stat__reset_stats(struct perf_evlist *evlist)
267{ 226{
268 struct perf_evsel *evsel; 227 struct perf_evsel *evsel;
@@ -272,23 +231,7 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist)
272 perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); 231 perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
273 } 232 }
274 233
275 memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); 234 perf_stat__reset_shadow_stats();
276 memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
277 memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
278 memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
279 memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
280 memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
281 memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
282 memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
283 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
284 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
285 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
286 memset(runtime_cycles_in_tx_stats, 0,
287 sizeof(runtime_cycles_in_tx_stats));
288 memset(runtime_transaction_stats, 0,
289 sizeof(runtime_transaction_stats));
290 memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
291 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
292} 235}
293 236
294static int create_perf_stat_counter(struct perf_evsel *evsel) 237static int create_perf_stat_counter(struct perf_evsel *evsel)
@@ -325,70 +268,6 @@ static inline int nsec_counter(struct perf_evsel *evsel)
325 return 0; 268 return 0;
326} 269}
327 270
328static struct perf_evsel *nth_evsel(int n)
329{
330 static struct perf_evsel **array;
331 static int array_len;
332 struct perf_evsel *ev;
333 int j;
334
335 /* Assumes this only called when evsel_list does not change anymore. */
336 if (!array) {
337 evlist__for_each(evsel_list, ev)
338 array_len++;
339 array = malloc(array_len * sizeof(void *));
340 if (!array)
341 exit(ENOMEM);
342 j = 0;
343 evlist__for_each(evsel_list, ev)
344 array[j++] = ev;
345 }
346 if (n < array_len)
347 return array[n];
348 return NULL;
349}
350
351/*
352 * Update various tracking values we maintain to print
353 * more semantic information such as miss/hit ratios,
354 * instruction rates, etc:
355 */
356static void update_shadow_stats(struct perf_evsel *counter, u64 *count,
357 int cpu)
358{
359 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
360 update_stats(&runtime_nsecs_stats[cpu], count[0]);
361 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
362 update_stats(&runtime_cycles_stats[cpu], count[0]);
363 else if (transaction_run &&
364 perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
365 update_stats(&runtime_cycles_in_tx_stats[cpu], count[0]);
366 else if (transaction_run &&
367 perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
368 update_stats(&runtime_transaction_stats[cpu], count[0]);
369 else if (transaction_run &&
370 perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
371 update_stats(&runtime_elision_stats[cpu], count[0]);
372 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
373 update_stats(&runtime_stalled_cycles_front_stats[cpu], count[0]);
374 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
375 update_stats(&runtime_stalled_cycles_back_stats[cpu], count[0]);
376 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
377 update_stats(&runtime_branches_stats[cpu], count[0]);
378 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
379 update_stats(&runtime_cacherefs_stats[cpu], count[0]);
380 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
381 update_stats(&runtime_l1_dcache_stats[cpu], count[0]);
382 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
383 update_stats(&runtime_l1_icache_stats[cpu], count[0]);
384 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
385 update_stats(&runtime_ll_cache_stats[cpu], count[0]);
386 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
387 update_stats(&runtime_dtlb_cache_stats[cpu], count[0]);
388 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
389 update_stats(&runtime_itlb_cache_stats[cpu], count[0]);
390}
391
392static void zero_per_pkg(struct perf_evsel *counter) 271static void zero_per_pkg(struct perf_evsel *counter)
393{ 272{
394 if (counter->per_pkg_mask) 273 if (counter->per_pkg_mask)
@@ -449,7 +328,7 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused,
449 perf_counts_values__scale(count, scale, NULL); 328 perf_counts_values__scale(count, scale, NULL);
450 evsel->counts->cpu[cpu] = *count; 329 evsel->counts->cpu[cpu] = *count;
451 if (aggr_mode == AGGR_NONE) 330 if (aggr_mode == AGGR_NONE)
452 update_shadow_stats(evsel, count->values, cpu); 331 perf_stat__update_shadow_stats(evsel, count->values, cpu);
453 break; 332 break;
454 case AGGR_GLOBAL: 333 case AGGR_GLOBAL:
455 aggr->val += count->val; 334 aggr->val += count->val;
@@ -497,7 +376,7 @@ static int read_counter_aggr(struct perf_evsel *counter)
497 /* 376 /*
498 * Save the full runtime - to allow normalization during printout: 377 * Save the full runtime - to allow normalization during printout:
499 */ 378 */
500 update_shadow_stats(counter, count, 0); 379 perf_stat__update_shadow_stats(counter, count, 0);
501 380
502 return 0; 381 return 0;
503} 382}
@@ -665,7 +544,10 @@ static int __run_perf_stat(int argc, const char **argv)
665 ui__warning("%s event is not supported by the kernel.\n", 544 ui__warning("%s event is not supported by the kernel.\n",
666 perf_evsel__name(counter)); 545 perf_evsel__name(counter));
667 counter->supported = false; 546 counter->supported = false;
668 continue; 547
548 if ((counter->leader != counter) ||
549 !(counter->leader->nr_members > 1))
550 continue;
669 } 551 }
670 552
671 perf_evsel__open_strerror(counter, &target, 553 perf_evsel__open_strerror(counter, &target,
@@ -875,188 +757,8 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
875 fprintf(output, " "); 757 fprintf(output, " ");
876} 758}
877 759
878/* used for get_ratio_color() */
879enum grc_type {
880 GRC_STALLED_CYCLES_FE,
881 GRC_STALLED_CYCLES_BE,
882 GRC_CACHE_MISSES,
883 GRC_MAX_NR
884};
885
886static const char *get_ratio_color(enum grc_type type, double ratio)
887{
888 static const double grc_table[GRC_MAX_NR][3] = {
889 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
890 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
891 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
892 };
893 const char *color = PERF_COLOR_NORMAL;
894
895 if (ratio > grc_table[type][0])
896 color = PERF_COLOR_RED;
897 else if (ratio > grc_table[type][1])
898 color = PERF_COLOR_MAGENTA;
899 else if (ratio > grc_table[type][2])
900 color = PERF_COLOR_YELLOW;
901
902 return color;
903}
904
905static void print_stalled_cycles_frontend(int cpu,
906 struct perf_evsel *evsel
907 __maybe_unused, double avg)
908{
909 double total, ratio = 0.0;
910 const char *color;
911
912 total = avg_stats(&runtime_cycles_stats[cpu]);
913
914 if (total)
915 ratio = avg / total * 100.0;
916
917 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
918
919 fprintf(output, " # ");
920 color_fprintf(output, color, "%6.2f%%", ratio);
921 fprintf(output, " frontend cycles idle ");
922}
923
924static void print_stalled_cycles_backend(int cpu,
925 struct perf_evsel *evsel
926 __maybe_unused, double avg)
927{
928 double total, ratio = 0.0;
929 const char *color;
930
931 total = avg_stats(&runtime_cycles_stats[cpu]);
932
933 if (total)
934 ratio = avg / total * 100.0;
935
936 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
937
938 fprintf(output, " # ");
939 color_fprintf(output, color, "%6.2f%%", ratio);
940 fprintf(output, " backend cycles idle ");
941}
942
943static void print_branch_misses(int cpu,
944 struct perf_evsel *evsel __maybe_unused,
945 double avg)
946{
947 double total, ratio = 0.0;
948 const char *color;
949
950 total = avg_stats(&runtime_branches_stats[cpu]);
951
952 if (total)
953 ratio = avg / total * 100.0;
954
955 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
956
957 fprintf(output, " # ");
958 color_fprintf(output, color, "%6.2f%%", ratio);
959 fprintf(output, " of all branches ");
960}
961
962static void print_l1_dcache_misses(int cpu,
963 struct perf_evsel *evsel __maybe_unused,
964 double avg)
965{
966 double total, ratio = 0.0;
967 const char *color;
968
969 total = avg_stats(&runtime_l1_dcache_stats[cpu]);
970
971 if (total)
972 ratio = avg / total * 100.0;
973
974 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
975
976 fprintf(output, " # ");
977 color_fprintf(output, color, "%6.2f%%", ratio);
978 fprintf(output, " of all L1-dcache hits ");
979}
980
981static void print_l1_icache_misses(int cpu,
982 struct perf_evsel *evsel __maybe_unused,
983 double avg)
984{
985 double total, ratio = 0.0;
986 const char *color;
987
988 total = avg_stats(&runtime_l1_icache_stats[cpu]);
989
990 if (total)
991 ratio = avg / total * 100.0;
992
993 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
994
995 fprintf(output, " # ");
996 color_fprintf(output, color, "%6.2f%%", ratio);
997 fprintf(output, " of all L1-icache hits ");
998}
999
1000static void print_dtlb_cache_misses(int cpu,
1001 struct perf_evsel *evsel __maybe_unused,
1002 double avg)
1003{
1004 double total, ratio = 0.0;
1005 const char *color;
1006
1007 total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
1008
1009 if (total)
1010 ratio = avg / total * 100.0;
1011
1012 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1013
1014 fprintf(output, " # ");
1015 color_fprintf(output, color, "%6.2f%%", ratio);
1016 fprintf(output, " of all dTLB cache hits ");
1017}
1018
1019static void print_itlb_cache_misses(int cpu,
1020 struct perf_evsel *evsel __maybe_unused,
1021 double avg)
1022{
1023 double total, ratio = 0.0;
1024 const char *color;
1025
1026 total = avg_stats(&runtime_itlb_cache_stats[cpu]);
1027
1028 if (total)
1029 ratio = avg / total * 100.0;
1030
1031 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1032
1033 fprintf(output, " # ");
1034 color_fprintf(output, color, "%6.2f%%", ratio);
1035 fprintf(output, " of all iTLB cache hits ");
1036}
1037
1038static void print_ll_cache_misses(int cpu,
1039 struct perf_evsel *evsel __maybe_unused,
1040 double avg)
1041{
1042 double total, ratio = 0.0;
1043 const char *color;
1044
1045 total = avg_stats(&runtime_ll_cache_stats[cpu]);
1046
1047 if (total)
1048 ratio = avg / total * 100.0;
1049
1050 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1051
1052 fprintf(output, " # ");
1053 color_fprintf(output, color, "%6.2f%%", ratio);
1054 fprintf(output, " of all LL-cache hits ");
1055}
1056
1057static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 760static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
1058{ 761{
1059 double total, ratio = 0.0, total2;
1060 double sc = evsel->scale; 762 double sc = evsel->scale;
1061 const char *fmt; 763 const char *fmt;
1062 int cpu = cpu_map__id_to_cpu(id); 764 int cpu = cpu_map__id_to_cpu(id);
@@ -1090,138 +792,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
1090 if (csv_output || interval) 792 if (csv_output || interval)
1091 return; 793 return;
1092 794
1093 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 795 perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode);
1094 total = avg_stats(&runtime_cycles_stats[cpu]);
1095 if (total) {
1096 ratio = avg / total;
1097 fprintf(output, " # %5.2f insns per cycle ", ratio);
1098 } else {
1099 fprintf(output, " ");
1100 }
1101 total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
1102 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
1103
1104 if (total && avg) {
1105 ratio = total / avg;
1106 fprintf(output, "\n");
1107 if (aggr_mode == AGGR_NONE)
1108 fprintf(output, " ");
1109 fprintf(output, " # %5.2f stalled cycles per insn", ratio);
1110 }
1111
1112 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
1113 runtime_branches_stats[cpu].n != 0) {
1114 print_branch_misses(cpu, evsel, avg);
1115 } else if (
1116 evsel->attr.type == PERF_TYPE_HW_CACHE &&
1117 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
1118 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1119 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1120 runtime_l1_dcache_stats[cpu].n != 0) {
1121 print_l1_dcache_misses(cpu, evsel, avg);
1122 } else if (
1123 evsel->attr.type == PERF_TYPE_HW_CACHE &&
1124 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
1125 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1126 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1127 runtime_l1_icache_stats[cpu].n != 0) {
1128 print_l1_icache_misses(cpu, evsel, avg);
1129 } else if (
1130 evsel->attr.type == PERF_TYPE_HW_CACHE &&
1131 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
1132 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1133 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1134 runtime_dtlb_cache_stats[cpu].n != 0) {
1135 print_dtlb_cache_misses(cpu, evsel, avg);
1136 } else if (
1137 evsel->attr.type == PERF_TYPE_HW_CACHE &&
1138 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
1139 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1140 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1141 runtime_itlb_cache_stats[cpu].n != 0) {
1142 print_itlb_cache_misses(cpu, evsel, avg);
1143 } else if (
1144 evsel->attr.type == PERF_TYPE_HW_CACHE &&
1145 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
1146 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1147 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1148 runtime_ll_cache_stats[cpu].n != 0) {
1149 print_ll_cache_misses(cpu, evsel, avg);
1150 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
1151 runtime_cacherefs_stats[cpu].n != 0) {
1152 total = avg_stats(&runtime_cacherefs_stats[cpu]);
1153
1154 if (total)
1155 ratio = avg * 100 / total;
1156
1157 fprintf(output, " # %8.3f %% of all cache refs ", ratio);
1158
1159 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
1160 print_stalled_cycles_frontend(cpu, evsel, avg);
1161 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
1162 print_stalled_cycles_backend(cpu, evsel, avg);
1163 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
1164 total = avg_stats(&runtime_nsecs_stats[cpu]);
1165
1166 if (total) {
1167 ratio = avg / total;
1168 fprintf(output, " # %8.3f GHz ", ratio);
1169 } else {
1170 fprintf(output, " ");
1171 }
1172 } else if (transaction_run &&
1173 perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
1174 total = avg_stats(&runtime_cycles_stats[cpu]);
1175 if (total)
1176 fprintf(output,
1177 " # %5.2f%% transactional cycles ",
1178 100.0 * (avg / total));
1179 } else if (transaction_run &&
1180 perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
1181 total = avg_stats(&runtime_cycles_stats[cpu]);
1182 total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
1183 if (total2 < avg)
1184 total2 = avg;
1185 if (total)
1186 fprintf(output,
1187 " # %5.2f%% aborted cycles ",
1188 100.0 * ((total2-avg) / total));
1189 } else if (transaction_run &&
1190 perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
1191 avg > 0 &&
1192 runtime_cycles_in_tx_stats[cpu].n != 0) {
1193 total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
1194
1195 if (total)
1196 ratio = total / avg;
1197
1198 fprintf(output, " # %8.0f cycles / transaction ", ratio);
1199 } else if (transaction_run &&
1200 perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
1201 avg > 0 &&
1202 runtime_cycles_in_tx_stats[cpu].n != 0) {
1203 total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
1204
1205 if (total)
1206 ratio = total / avg;
1207
1208 fprintf(output, " # %8.0f cycles / elision ", ratio);
1209 } else if (runtime_nsecs_stats[cpu].n != 0) {
1210 char unit = 'M';
1211
1212 total = avg_stats(&runtime_nsecs_stats[cpu]);
1213
1214 if (total)
1215 ratio = 1000.0 * avg / total;
1216 if (ratio < 0.001) {
1217 ratio *= 1000;
1218 unit = 'K';
1219 }
1220
1221 fprintf(output, " # %8.3f %c/sec ", ratio, unit);
1222 } else {
1223 fprintf(output, " ");
1224 }
1225} 796}
1226 797
1227static void print_aggr(char *prefix) 798static void print_aggr(char *prefix)
@@ -1536,17 +1107,6 @@ static int perf_stat_init_aggr_mode(void)
1536 return 0; 1107 return 0;
1537} 1108}
1538 1109
1539static int setup_events(const char * const *attrs, unsigned len)
1540{
1541 unsigned i;
1542
1543 for (i = 0; i < len; i++) {
1544 if (parse_events(evsel_list, attrs[i]))
1545 return -1;
1546 }
1547 return 0;
1548}
1549
1550/* 1110/*
1551 * Add default attributes, if there were no attributes specified or 1111 * Add default attributes, if there were no attributes specified or
1552 * if -d/--detailed, -d -d or -d -d -d is used: 1112 * if -d/--detailed, -d -d or -d -d -d is used:
@@ -1668,12 +1228,10 @@ static int add_default_attributes(void)
1668 int err; 1228 int err;
1669 if (pmu_have_event("cpu", "cycles-ct") && 1229 if (pmu_have_event("cpu", "cycles-ct") &&
1670 pmu_have_event("cpu", "el-start")) 1230 pmu_have_event("cpu", "el-start"))
1671 err = setup_events(transaction_attrs, 1231 err = parse_events(evsel_list, transaction_attrs, NULL);
1672 ARRAY_SIZE(transaction_attrs));
1673 else 1232 else
1674 err = setup_events(transaction_limited_attrs, 1233 err = parse_events(evsel_list, transaction_limited_attrs, NULL);
1675 ARRAY_SIZE(transaction_limited_attrs)); 1234 if (err) {
1676 if (err < 0) {
1677 fprintf(stderr, "Cannot set up transaction events\n"); 1235 fprintf(stderr, "Cannot set up transaction events\n");
1678 return -1; 1236 return -1;
1679 } 1237 }
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index e50fe1187b0b..30e59620179d 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -61,13 +61,13 @@ struct timechart {
61 tasks_only, 61 tasks_only,
62 with_backtrace, 62 with_backtrace,
63 topology; 63 topology;
64 bool force;
64 /* IO related settings */ 65 /* IO related settings */
65 u64 io_events;
66 bool io_only, 66 bool io_only,
67 skip_eagain; 67 skip_eagain;
68 u64 io_events;
68 u64 min_time, 69 u64 min_time,
69 merge_dist; 70 merge_dist;
70 bool force;
71}; 71};
72 72
73struct per_pidcomm; 73struct per_pidcomm;
@@ -523,7 +523,7 @@ static const char *cat_backtrace(union perf_event *event,
523 * Discard all. 523 * Discard all.
524 */ 524 */
525 zfree(&p); 525 zfree(&p);
526 goto exit; 526 goto exit_put;
527 } 527 }
528 continue; 528 continue;
529 } 529 }
@@ -538,7 +538,8 @@ static const char *cat_backtrace(union perf_event *event,
538 else 538 else
539 fprintf(f, "..... %016" PRIx64 "\n", ip); 539 fprintf(f, "..... %016" PRIx64 "\n", ip);
540 } 540 }
541 541exit_put:
542 addr_location__put(&al);
542exit: 543exit:
543 fclose(f); 544 fclose(f);
544 545
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 6a4d5d41c671..619a8696fda7 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -235,10 +235,13 @@ static void perf_top__show_details(struct perf_top *top)
235 235
236 more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel, 236 more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel,
237 0, top->sym_pcnt_filter, top->print_entries, 4); 237 0, top->sym_pcnt_filter, top->print_entries, 4);
238 if (top->zero) 238
239 symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx); 239 if (top->evlist->enabled) {
240 else 240 if (top->zero)
241 symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx); 241 symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
242 else
243 symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
244 }
242 if (more != 0) 245 if (more != 0)
243 printf("%d lines not displayed, maybe increase display entries [e]\n", more); 246 printf("%d lines not displayed, maybe increase display entries [e]\n", more);
244out_unlock: 247out_unlock:
@@ -276,11 +279,13 @@ static void perf_top__print_sym_table(struct perf_top *top)
276 return; 279 return;
277 } 280 }
278 281
279 if (top->zero) { 282 if (top->evlist->enabled) {
280 hists__delete_entries(hists); 283 if (top->zero) {
281 } else { 284 hists__delete_entries(hists);
282 hists__decay_entries(hists, top->hide_user_symbols, 285 } else {
283 top->hide_kernel_symbols); 286 hists__decay_entries(hists, top->hide_user_symbols,
287 top->hide_kernel_symbols);
288 }
284 } 289 }
285 290
286 hists__collapse_resort(hists, NULL); 291 hists__collapse_resort(hists, NULL);
@@ -545,11 +550,13 @@ static void perf_top__sort_new_samples(void *arg)
545 550
546 hists = evsel__hists(t->sym_evsel); 551 hists = evsel__hists(t->sym_evsel);
547 552
548 if (t->zero) { 553 if (t->evlist->enabled) {
549 hists__delete_entries(hists); 554 if (t->zero) {
550 } else { 555 hists__delete_entries(hists);
551 hists__decay_entries(hists, t->hide_user_symbols, 556 } else {
552 t->hide_kernel_symbols); 557 hists__decay_entries(hists, t->hide_user_symbols,
558 t->hide_kernel_symbols);
559 }
553 } 560 }
554 561
555 hists__collapse_resort(hists, NULL); 562 hists__collapse_resort(hists, NULL);
@@ -579,8 +586,27 @@ static void *display_thread_tui(void *arg)
579 hists->uid_filter_str = top->record_opts.target.uid_str; 586 hists->uid_filter_str = top->record_opts.target.uid_str;
580 } 587 }
581 588
582 perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, 589 while (true) {
583 &top->session->header.env); 590 int key = perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
591 top->min_percent,
592 &top->session->header.env);
593
594 if (key != 'f')
595 break;
596
597 perf_evlist__toggle_enable(top->evlist);
598 /*
599 * No need to refresh, resort/decay histogram entries
600 * if we are not collecting samples:
601 */
602 if (top->evlist->enabled) {
603 hbt.refresh = top->delay_secs;
604 help = "Press 'f' to disable the events or 'h' to see other hotkeys";
605 } else {
606 help = "Press 'f' again to re-enable the events";
607 hbt.refresh = 0;
608 }
609 }
584 610
585 done = 1; 611 done = 1;
586 return NULL; 612 return NULL;
@@ -775,7 +801,9 @@ static void perf_event__process_sample(struct perf_tool *tool,
775 if (al.sym == NULL || !al.sym->ignore) { 801 if (al.sym == NULL || !al.sym->ignore) {
776 struct hists *hists = evsel__hists(evsel); 802 struct hists *hists = evsel__hists(evsel);
777 struct hist_entry_iter iter = { 803 struct hist_entry_iter iter = {
778 .add_entry_cb = hist_iter__top_callback, 804 .evsel = evsel,
805 .sample = sample,
806 .add_entry_cb = hist_iter__top_callback,
779 }; 807 };
780 808
781 if (symbol_conf.cumulate_callchain) 809 if (symbol_conf.cumulate_callchain)
@@ -785,15 +813,14 @@ static void perf_event__process_sample(struct perf_tool *tool,
785 813
786 pthread_mutex_lock(&hists->lock); 814 pthread_mutex_lock(&hists->lock);
787 815
788 err = hist_entry_iter__add(&iter, &al, evsel, sample, 816 err = hist_entry_iter__add(&iter, &al, top->max_stack, top);
789 top->max_stack, top);
790 if (err < 0) 817 if (err < 0)
791 pr_err("Problem incrementing symbol period, skipping event\n"); 818 pr_err("Problem incrementing symbol period, skipping event\n");
792 819
793 pthread_mutex_unlock(&hists->lock); 820 pthread_mutex_unlock(&hists->lock);
794 } 821 }
795 822
796 return; 823 addr_location__put(&al);
797} 824}
798 825
799static void perf_top__mmap_read_idx(struct perf_top *top, int idx) 826static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
@@ -950,7 +977,7 @@ static int __cmd_top(struct perf_top *top)
950 goto out_delete; 977 goto out_delete;
951 978
952 machine__synthesize_threads(&top->session->machines.host, &opts->target, 979 machine__synthesize_threads(&top->session->machines.host, &opts->target,
953 top->evlist->threads, false); 980 top->evlist->threads, false, opts->proc_map_timeout);
954 ret = perf_top__start_counters(top); 981 ret = perf_top__start_counters(top);
955 if (ret) 982 if (ret)
956 goto out_delete; 983 goto out_delete;
@@ -1060,6 +1087,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1060 .target = { 1087 .target = {
1061 .uses_mmap = true, 1088 .uses_mmap = true,
1062 }, 1089 },
1090 .proc_map_timeout = 500,
1063 }, 1091 },
1064 .max_stack = PERF_MAX_STACK_DEPTH, 1092 .max_stack = PERF_MAX_STACK_DEPTH,
1065 .sym_pcnt_filter = 5, 1093 .sym_pcnt_filter = 5,
@@ -1159,6 +1187,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1159 OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str, 1187 OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
1160 "width[,width...]", 1188 "width[,width...]",
1161 "don't try to adjust column width, use these fixed values"), 1189 "don't try to adjust column width, use these fixed values"),
1190 OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
1191 "per thread proc mmap processing timeout in ms"),
1162 OPT_END() 1192 OPT_END()
1163 }; 1193 };
1164 const char * const top_usage[] = { 1194 const char * const top_usage[] = {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index e122970361f2..de5d277d1ad7 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -16,7 +16,6 @@
16 16
17#include <libaudit.h> 17#include <libaudit.h>
18#include <stdlib.h> 18#include <stdlib.h>
19#include <sys/eventfd.h>
20#include <sys/mman.h> 19#include <sys/mman.h>
21#include <linux/futex.h> 20#include <linux/futex.h>
22 21
@@ -41,6 +40,51 @@
41# define EFD_SEMAPHORE 1 40# define EFD_SEMAPHORE 1
42#endif 41#endif
43 42
43#ifndef EFD_NONBLOCK
44# define EFD_NONBLOCK 00004000
45#endif
46
47#ifndef EFD_CLOEXEC
48# define EFD_CLOEXEC 02000000
49#endif
50
51#ifndef O_CLOEXEC
52# define O_CLOEXEC 02000000
53#endif
54
55#ifndef SOCK_DCCP
56# define SOCK_DCCP 6
57#endif
58
59#ifndef SOCK_CLOEXEC
60# define SOCK_CLOEXEC 02000000
61#endif
62
63#ifndef SOCK_NONBLOCK
64# define SOCK_NONBLOCK 00004000
65#endif
66
67#ifndef MSG_CMSG_CLOEXEC
68# define MSG_CMSG_CLOEXEC 0x40000000
69#endif
70
71#ifndef PERF_FLAG_FD_NO_GROUP
72# define PERF_FLAG_FD_NO_GROUP (1UL << 0)
73#endif
74
75#ifndef PERF_FLAG_FD_OUTPUT
76# define PERF_FLAG_FD_OUTPUT (1UL << 1)
77#endif
78
79#ifndef PERF_FLAG_PID_CGROUP
80# define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
81#endif
82
83#ifndef PERF_FLAG_FD_CLOEXEC
84# define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
85#endif
86
87
44struct tp_field { 88struct tp_field {
45 int offset; 89 int offset;
46 union { 90 union {
@@ -331,6 +375,14 @@ static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
331 375
332#define SCA_HEX syscall_arg__scnprintf_hex 376#define SCA_HEX syscall_arg__scnprintf_hex
333 377
378static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
379 struct syscall_arg *arg)
380{
381 return scnprintf(bf, size, "%d", arg->val);
382}
383
384#define SCA_INT syscall_arg__scnprintf_int
385
334static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, 386static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
335 struct syscall_arg *arg) 387 struct syscall_arg *arg)
336{ 388{
@@ -783,6 +835,34 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
783 835
784#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags 836#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
785 837
838static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
839 struct syscall_arg *arg)
840{
841 int printed = 0, flags = arg->val;
842
843 if (flags == 0)
844 return 0;
845
846#define P_FLAG(n) \
847 if (flags & PERF_FLAG_##n) { \
848 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
849 flags &= ~PERF_FLAG_##n; \
850 }
851
852 P_FLAG(FD_NO_GROUP);
853 P_FLAG(FD_OUTPUT);
854 P_FLAG(PID_CGROUP);
855 P_FLAG(FD_CLOEXEC);
856#undef P_FLAG
857
858 if (flags)
859 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
860
861 return printed;
862}
863
864#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
865
786static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, 866static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
787 struct syscall_arg *arg) 867 struct syscall_arg *arg)
788{ 868{
@@ -1050,6 +1130,11 @@ static struct syscall_fmt {
1050 { .name = "openat", .errmsg = true, 1130 { .name = "openat", .errmsg = true,
1051 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ 1131 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1052 [2] = SCA_OPEN_FLAGS, /* flags */ }, }, 1132 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1133 { .name = "perf_event_open", .errmsg = true,
1134 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1135 [2] = SCA_INT, /* cpu */
1136 [3] = SCA_FD, /* group_fd */
1137 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1053 { .name = "pipe2", .errmsg = true, 1138 { .name = "pipe2", .errmsg = true,
1054 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, 1139 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1055 { .name = "poll", .errmsg = true, .timeout = true, }, 1140 { .name = "poll", .errmsg = true, .timeout = true, },
@@ -1433,7 +1518,8 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1433 return -ENOMEM; 1518 return -ENOMEM;
1434 1519
1435 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, 1520 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1436 evlist->threads, trace__tool_process, false); 1521 evlist->threads, trace__tool_process, false,
1522 trace->opts.proc_map_timeout);
1437 if (err) 1523 if (err)
1438 symbol__exit(); 1524 symbol__exit();
1439 1525
@@ -1712,7 +1798,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1712 void *args; 1798 void *args;
1713 size_t printed = 0; 1799 size_t printed = 0;
1714 struct thread *thread; 1800 struct thread *thread;
1715 int id = perf_evsel__sc_tp_uint(evsel, id, sample); 1801 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1716 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1802 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1717 struct thread_trace *ttrace; 1803 struct thread_trace *ttrace;
1718 1804
@@ -1725,14 +1811,14 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1725 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1811 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1726 ttrace = thread__trace(thread, trace->output); 1812 ttrace = thread__trace(thread, trace->output);
1727 if (ttrace == NULL) 1813 if (ttrace == NULL)
1728 return -1; 1814 goto out_put;
1729 1815
1730 args = perf_evsel__sc_tp_ptr(evsel, args, sample); 1816 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1731 1817
1732 if (ttrace->entry_str == NULL) { 1818 if (ttrace->entry_str == NULL) {
1733 ttrace->entry_str = malloc(1024); 1819 ttrace->entry_str = malloc(1024);
1734 if (!ttrace->entry_str) 1820 if (!ttrace->entry_str)
1735 return -1; 1821 goto out_put;
1736 } 1822 }
1737 1823
1738 if (!trace->summary_only) 1824 if (!trace->summary_only)
@@ -1757,8 +1843,10 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1757 thread__put(trace->current); 1843 thread__put(trace->current);
1758 trace->current = thread__get(thread); 1844 trace->current = thread__get(thread);
1759 } 1845 }
1760 1846 err = 0;
1761 return 0; 1847out_put:
1848 thread__put(thread);
1849 return err;
1762} 1850}
1763 1851
1764static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, 1852static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
@@ -1768,7 +1856,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1768 long ret; 1856 long ret;
1769 u64 duration = 0; 1857 u64 duration = 0;
1770 struct thread *thread; 1858 struct thread *thread;
1771 int id = perf_evsel__sc_tp_uint(evsel, id, sample); 1859 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1772 struct syscall *sc = trace__syscall_info(trace, evsel, id); 1860 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1773 struct thread_trace *ttrace; 1861 struct thread_trace *ttrace;
1774 1862
@@ -1781,7 +1869,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1781 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 1869 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1782 ttrace = thread__trace(thread, trace->output); 1870 ttrace = thread__trace(thread, trace->output);
1783 if (ttrace == NULL) 1871 if (ttrace == NULL)
1784 return -1; 1872 goto out_put;
1785 1873
1786 if (trace->summary) 1874 if (trace->summary)
1787 thread__update_stats(ttrace, id, sample); 1875 thread__update_stats(ttrace, id, sample);
@@ -1835,8 +1923,10 @@ signed_print:
1835 fputc('\n', trace->output); 1923 fputc('\n', trace->output);
1836out: 1924out:
1837 ttrace->entry_pending = false; 1925 ttrace->entry_pending = false;
1838 1926 err = 0;
1839 return 0; 1927out_put:
1928 thread__put(thread);
1929 return err;
1840} 1930}
1841 1931
1842static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, 1932static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
@@ -1863,6 +1953,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs
1863 1953
1864 ttrace->runtime_ms += runtime_ms; 1954 ttrace->runtime_ms += runtime_ms;
1865 trace->runtime_ms += runtime_ms; 1955 trace->runtime_ms += runtime_ms;
1956 thread__put(thread);
1866 return 0; 1957 return 0;
1867 1958
1868out_dump: 1959out_dump:
@@ -1872,6 +1963,7 @@ out_dump:
1872 (pid_t)perf_evsel__intval(evsel, sample, "pid"), 1963 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1873 runtime, 1964 runtime,
1874 perf_evsel__intval(evsel, sample, "vruntime")); 1965 perf_evsel__intval(evsel, sample, "vruntime"));
1966 thread__put(thread);
1875 return 0; 1967 return 0;
1876} 1968}
1877 1969
@@ -1924,11 +2016,12 @@ static int trace__pgfault(struct trace *trace,
1924 struct addr_location al; 2016 struct addr_location al;
1925 char map_type = 'd'; 2017 char map_type = 'd';
1926 struct thread_trace *ttrace; 2018 struct thread_trace *ttrace;
2019 int err = -1;
1927 2020
1928 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); 2021 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1929 ttrace = thread__trace(thread, trace->output); 2022 ttrace = thread__trace(thread, trace->output);
1930 if (ttrace == NULL) 2023 if (ttrace == NULL)
1931 return -1; 2024 goto out_put;
1932 2025
1933 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) 2026 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1934 ttrace->pfmaj++; 2027 ttrace->pfmaj++;
@@ -1936,7 +2029,7 @@ static int trace__pgfault(struct trace *trace,
1936 ttrace->pfmin++; 2029 ttrace->pfmin++;
1937 2030
1938 if (trace->summary_only) 2031 if (trace->summary_only)
1939 return 0; 2032 goto out;
1940 2033
1941 thread__find_addr_location(thread, cpumode, MAP__FUNCTION, 2034 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1942 sample->ip, &al); 2035 sample->ip, &al);
@@ -1967,8 +2060,11 @@ static int trace__pgfault(struct trace *trace,
1967 print_location(trace->output, sample, &al, true, false); 2060 print_location(trace->output, sample, &al, true, false);
1968 2061
1969 fprintf(trace->output, " (%c%c)\n", map_type, al.level); 2062 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1970 2063out:
1971 return 0; 2064 err = 0;
2065out_put:
2066 thread__put(thread);
2067 return err;
1972} 2068}
1973 2069
1974static bool skip_sample(struct trace *trace, struct perf_sample *sample) 2070static bool skip_sample(struct trace *trace, struct perf_sample *sample)
@@ -2652,6 +2748,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2652 .user_interval = ULLONG_MAX, 2748 .user_interval = ULLONG_MAX,
2653 .no_buffering = true, 2749 .no_buffering = true,
2654 .mmap_pages = UINT_MAX, 2750 .mmap_pages = UINT_MAX,
2751 .proc_map_timeout = 500,
2655 }, 2752 },
2656 .output = stdout, 2753 .output = stdout,
2657 .show_comm = true, 2754 .show_comm = true,
@@ -2666,16 +2763,15 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2666 OPT_BOOLEAN(0, "comm", &trace.show_comm, 2763 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2667 "show the thread COMM next to its id"), 2764 "show the thread COMM next to its id"),
2668 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), 2765 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2669 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", 2766 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2670 "list of events to trace"),
2671 OPT_STRING('o', "output", &output_name, "file", "output file name"), 2767 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2672 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), 2768 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2673 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", 2769 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2674 "trace events on existing process id"), 2770 "trace events on existing process id"),
2675 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", 2771 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2676 "trace events on existing thread id"), 2772 "trace events on existing thread id"),
2677 OPT_CALLBACK(0, "filter-pids", &trace, "float", 2773 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2678 "show only events with duration > N.M ms", trace__set_filter_pids), 2774 "pids to filter (by the kernel)", trace__set_filter_pids),
2679 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, 2775 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2680 "system-wide collection from all CPUs"), 2776 "system-wide collection from all CPUs"),
2681 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", 2777 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
@@ -2702,6 +2798,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2702 "Trace pagefaults", parse_pagefaults, "maj"), 2798 "Trace pagefaults", parse_pagefaults, "maj"),
2703 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), 2799 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2704 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), 2800 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2801 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2802 "per thread proc mmap processing timeout in ms"),
2705 OPT_END() 2803 OPT_END()
2706 }; 2804 };
2707 const char * const trace_subcommands[] = { "record", NULL }; 2805 const char * const trace_subcommands[] = { "record", NULL };
@@ -2712,11 +2810,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2712 signal(SIGFPE, sighandler_dump_stack); 2810 signal(SIGFPE, sighandler_dump_stack);
2713 2811
2714 trace.evlist = perf_evlist__new(); 2812 trace.evlist = perf_evlist__new();
2715 if (trace.evlist == NULL)
2716 return -ENOMEM;
2717 2813
2718 if (trace.evlist == NULL) { 2814 if (trace.evlist == NULL) {
2719 pr_err("Not enough memory to run!\n"); 2815 pr_err("Not enough memory to run!\n");
2816 err = -ENOMEM;
2720 goto out; 2817 goto out;
2721 } 2818 }
2722 2819
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 59a98c643240..317001c94660 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -32,7 +32,7 @@ ifeq ($(ARCH),x86)
32 LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 32 LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
33 $(call detected,CONFIG_X86_64) 33 $(call detected,CONFIG_X86_64)
34 else 34 else
35 LIBUNWIND_LIBS = -lunwind -lunwind-x86 35 LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
36 endif 36 endif
37 NO_PERF_REGS := 0 37 NO_PERF_REGS := 0
38endif 38endif
@@ -130,6 +130,8 @@ endif
130 130
131ifeq ($(DEBUG),0) 131ifeq ($(DEBUG),0)
132 CFLAGS += -O6 132 CFLAGS += -O6
133else
134 CFLAGS += $(call cc-option,-Og,-O0)
133endif 135endif
134 136
135ifdef PARSER_DEBUG 137ifdef PARSER_DEBUG
@@ -268,6 +270,10 @@ else
268 endif # libelf support 270 endif # libelf support
269endif # NO_LIBELF 271endif # NO_LIBELF
270 272
273ifdef NO_DWARF
274 NO_LIBDW_DWARF_UNWIND := 1
275endif
276
271ifndef NO_LIBELF 277ifndef NO_LIBELF
272 CFLAGS += -DHAVE_LIBELF_SUPPORT 278 CFLAGS += -DHAVE_LIBELF_SUPPORT
273 EXTLIBS += -lelf 279 EXTLIBS += -lelf
@@ -610,6 +616,11 @@ ifdef LIBBABELTRACE
610 endif 616 endif
611endif 617endif
612 618
619ifndef NO_AUXTRACE
620 $(call detected,CONFIG_AUXTRACE)
621 CFLAGS += -DHAVE_AUXTRACE_SUPPORT
622endif
623
613# Among the variables below, these: 624# Among the variables below, these:
614# perfexecdir 625# perfexecdir
615# template_dir 626# template_dir
diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak
index c16ce833079c..0ebef09c0842 100644
--- a/tools/perf/config/utilities.mak
+++ b/tools/perf/config/utilities.mak
@@ -177,3 +177,22 @@ $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
177endef 177endef
178_ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2))) 178_ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2)))
179_gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) 179_gea_err = $(if $(1),$(error Please set '$(1)' appropriately))
180
181# try-run
182# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
183# Exit code chooses option. "$$TMP" is can be used as temporary file and
184# is automatically cleaned up.
185try-run = $(shell set -e; \
186 TMP="$(TMPOUT).$$$$.tmp"; \
187 TMPO="$(TMPOUT).$$$$.o"; \
188 if ($(1)) >/dev/null 2>&1; \
189 then echo "$(2)"; \
190 else echo "$(3)"; \
191 fi; \
192 rm -f "$$TMP" "$$TMPO")
193
194# cc-option
195# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
196
197cc-option = $(call try-run,\
198 $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 6ef68165c9db..83a25cef82fd 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -6,11 +6,9 @@
6#include <sys/syscall.h> 6#include <sys/syscall.h>
7#include <linux/types.h> 7#include <linux/types.h>
8#include <linux/perf_event.h> 8#include <linux/perf_event.h>
9#include <asm/barrier.h>
9 10
10#if defined(__i386__) 11#if defined(__i386__)
11#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
12#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
13#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
14#define cpu_relax() asm volatile("rep; nop" ::: "memory"); 12#define cpu_relax() asm volatile("rep; nop" ::: "memory");
15#define CPUINFO_PROC {"model name"} 13#define CPUINFO_PROC {"model name"}
16#ifndef __NR_perf_event_open 14#ifndef __NR_perf_event_open
@@ -25,9 +23,6 @@
25#endif 23#endif
26 24
27#if defined(__x86_64__) 25#if defined(__x86_64__)
28#define mb() asm volatile("mfence" ::: "memory")
29#define wmb() asm volatile("sfence" ::: "memory")
30#define rmb() asm volatile("lfence" ::: "memory")
31#define cpu_relax() asm volatile("rep; nop" ::: "memory"); 26#define cpu_relax() asm volatile("rep; nop" ::: "memory");
32#define CPUINFO_PROC {"model name"} 27#define CPUINFO_PROC {"model name"}
33#ifndef __NR_perf_event_open 28#ifndef __NR_perf_event_open
@@ -43,129 +38,63 @@
43 38
44#ifdef __powerpc__ 39#ifdef __powerpc__
45#include "../../arch/powerpc/include/uapi/asm/unistd.h" 40#include "../../arch/powerpc/include/uapi/asm/unistd.h"
46#define mb() asm volatile ("sync" ::: "memory")
47#define wmb() asm volatile ("sync" ::: "memory")
48#define rmb() asm volatile ("sync" ::: "memory")
49#define CPUINFO_PROC {"cpu"} 41#define CPUINFO_PROC {"cpu"}
50#endif 42#endif
51 43
52#ifdef __s390__ 44#ifdef __s390__
53#define mb() asm volatile("bcr 15,0" ::: "memory")
54#define wmb() asm volatile("bcr 15,0" ::: "memory")
55#define rmb() asm volatile("bcr 15,0" ::: "memory")
56#define CPUINFO_PROC {"vendor_id"} 45#define CPUINFO_PROC {"vendor_id"}
57#endif 46#endif
58 47
59#ifdef __sh__ 48#ifdef __sh__
60#if defined(__SH4A__) || defined(__SH5__)
61# define mb() asm volatile("synco" ::: "memory")
62# define wmb() asm volatile("synco" ::: "memory")
63# define rmb() asm volatile("synco" ::: "memory")
64#else
65# define mb() asm volatile("" ::: "memory")
66# define wmb() asm volatile("" ::: "memory")
67# define rmb() asm volatile("" ::: "memory")
68#endif
69#define CPUINFO_PROC {"cpu type"} 49#define CPUINFO_PROC {"cpu type"}
70#endif 50#endif
71 51
72#ifdef __hppa__ 52#ifdef __hppa__
73#define mb() asm volatile("" ::: "memory")
74#define wmb() asm volatile("" ::: "memory")
75#define rmb() asm volatile("" ::: "memory")
76#define CPUINFO_PROC {"cpu"} 53#define CPUINFO_PROC {"cpu"}
77#endif 54#endif
78 55
79#ifdef __sparc__ 56#ifdef __sparc__
80#ifdef __LP64__
81#define mb() asm volatile("ba,pt %%xcc, 1f\n" \
82 "membar #StoreLoad\n" \
83 "1:\n":::"memory")
84#else
85#define mb() asm volatile("":::"memory")
86#endif
87#define wmb() asm volatile("":::"memory")
88#define rmb() asm volatile("":::"memory")
89#define CPUINFO_PROC {"cpu"} 57#define CPUINFO_PROC {"cpu"}
90#endif 58#endif
91 59
92#ifdef __alpha__ 60#ifdef __alpha__
93#define mb() asm volatile("mb" ::: "memory")
94#define wmb() asm volatile("wmb" ::: "memory")
95#define rmb() asm volatile("mb" ::: "memory")
96#define CPUINFO_PROC {"cpu model"} 61#define CPUINFO_PROC {"cpu model"}
97#endif 62#endif
98 63
99#ifdef __ia64__ 64#ifdef __ia64__
100#define mb() asm volatile ("mf" ::: "memory")
101#define wmb() asm volatile ("mf" ::: "memory")
102#define rmb() asm volatile ("mf" ::: "memory")
103#define cpu_relax() asm volatile ("hint @pause" ::: "memory") 65#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
104#define CPUINFO_PROC {"model name"} 66#define CPUINFO_PROC {"model name"}
105#endif 67#endif
106 68
107#ifdef __arm__ 69#ifdef __arm__
108/*
109 * Use the __kuser_memory_barrier helper in the CPU helper page. See
110 * arch/arm/kernel/entry-armv.S in the kernel source for details.
111 */
112#define mb() ((void(*)(void))0xffff0fa0)()
113#define wmb() ((void(*)(void))0xffff0fa0)()
114#define rmb() ((void(*)(void))0xffff0fa0)()
115#define CPUINFO_PROC {"model name", "Processor"} 70#define CPUINFO_PROC {"model name", "Processor"}
116#endif 71#endif
117 72
118#ifdef __aarch64__ 73#ifdef __aarch64__
119#define mb() asm volatile("dmb ish" ::: "memory")
120#define wmb() asm volatile("dmb ishst" ::: "memory")
121#define rmb() asm volatile("dmb ishld" ::: "memory")
122#define cpu_relax() asm volatile("yield" ::: "memory") 74#define cpu_relax() asm volatile("yield" ::: "memory")
123#endif 75#endif
124 76
125#ifdef __mips__ 77#ifdef __mips__
126#define mb() asm volatile( \
127 ".set mips2\n\t" \
128 "sync\n\t" \
129 ".set mips0" \
130 : /* no output */ \
131 : /* no input */ \
132 : "memory")
133#define wmb() mb()
134#define rmb() mb()
135#define CPUINFO_PROC {"cpu model"} 78#define CPUINFO_PROC {"cpu model"}
136#endif 79#endif
137 80
138#ifdef __arc__ 81#ifdef __arc__
139#define mb() asm volatile("" ::: "memory")
140#define wmb() asm volatile("" ::: "memory")
141#define rmb() asm volatile("" ::: "memory")
142#define CPUINFO_PROC {"Processor"} 82#define CPUINFO_PROC {"Processor"}
143#endif 83#endif
144 84
145#ifdef __metag__ 85#ifdef __metag__
146#define mb() asm volatile("" ::: "memory")
147#define wmb() asm volatile("" ::: "memory")
148#define rmb() asm volatile("" ::: "memory")
149#define CPUINFO_PROC {"CPU"} 86#define CPUINFO_PROC {"CPU"}
150#endif 87#endif
151 88
152#ifdef __xtensa__ 89#ifdef __xtensa__
153#define mb() asm volatile("memw" ::: "memory")
154#define wmb() asm volatile("memw" ::: "memory")
155#define rmb() asm volatile("" ::: "memory")
156#define CPUINFO_PROC {"core ID"} 90#define CPUINFO_PROC {"core ID"}
157#endif 91#endif
158 92
159#ifdef __tile__ 93#ifdef __tile__
160#define mb() asm volatile ("mf" ::: "memory")
161#define wmb() asm volatile ("mf" ::: "memory")
162#define rmb() asm volatile ("mf" ::: "memory")
163#define cpu_relax() asm volatile ("mfspr zero, PASS" ::: "memory") 94#define cpu_relax() asm volatile ("mfspr zero, PASS" ::: "memory")
164#define CPUINFO_PROC {"model name"} 95#define CPUINFO_PROC {"model name"}
165#endif 96#endif
166 97
167#define barrier() asm volatile ("" ::: "memory")
168
169#ifndef cpu_relax 98#ifndef cpu_relax
170#define cpu_relax() barrier() 99#define cpu_relax() barrier()
171#endif 100#endif
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index e14bb637255c..4a5827fff799 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -54,16 +54,22 @@ struct record_opts {
54 bool period; 54 bool period;
55 bool sample_intr_regs; 55 bool sample_intr_regs;
56 bool running_time; 56 bool running_time;
57 bool full_auxtrace;
58 bool auxtrace_snapshot_mode;
57 unsigned int freq; 59 unsigned int freq;
58 unsigned int mmap_pages; 60 unsigned int mmap_pages;
61 unsigned int auxtrace_mmap_pages;
59 unsigned int user_freq; 62 unsigned int user_freq;
60 u64 branch_stack; 63 u64 branch_stack;
61 u64 default_interval; 64 u64 default_interval;
62 u64 user_interval; 65 u64 user_interval;
66 size_t auxtrace_snapshot_size;
67 const char *auxtrace_snapshot_opts;
63 bool sample_transaction; 68 bool sample_transaction;
64 unsigned initial_delay; 69 unsigned initial_delay;
65 bool use_clockid; 70 bool use_clockid;
66 clockid_t clockid; 71 clockid_t clockid;
72 unsigned int proc_map_timeout;
67}; 73};
68 74
69struct option; 75struct option;
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 6a8801b32017..ee41e705b2eb 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -3,9 +3,9 @@ perf-y += parse-events.o
3perf-y += dso-data.o 3perf-y += dso-data.o
4perf-y += attr.o 4perf-y += attr.o
5perf-y += vmlinux-kallsyms.o 5perf-y += vmlinux-kallsyms.o
6perf-y += open-syscall.o 6perf-y += openat-syscall.o
7perf-y += open-syscall-all-cpus.o 7perf-y += openat-syscall-all-cpus.o
8perf-y += open-syscall-tp-fields.o 8perf-y += openat-syscall-tp-fields.o
9perf-y += mmap-basic.o 9perf-y += mmap-basic.o
10perf-y += perf-record.o 10perf-y += perf-record.o
11perf-y += rdpmc.o 11perf-y += rdpmc.o
@@ -34,7 +34,7 @@ perf-y += kmod-path.o
34 34
35perf-$(CONFIG_X86) += perf-time-to-tsc.o 35perf-$(CONFIG_X86) += perf-time-to-tsc.o
36 36
37ifeq ($(ARCH),$(filter $(ARCH),x86 arm)) 37ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
38perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o 38perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
39endif 39endif
40 40
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 4f4098167112..87b9961646e4 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -23,12 +23,12 @@ static struct test {
23 .func = test__vmlinux_matches_kallsyms, 23 .func = test__vmlinux_matches_kallsyms,
24 }, 24 },
25 { 25 {
26 .desc = "detect open syscall event", 26 .desc = "detect openat syscall event",
27 .func = test__open_syscall_event, 27 .func = test__openat_syscall_event,
28 }, 28 },
29 { 29 {
30 .desc = "detect open syscall event on all cpus", 30 .desc = "detect openat syscall event on all cpus",
31 .func = test__open_syscall_event_on_all_cpus, 31 .func = test__openat_syscall_event_on_all_cpus,
32 }, 32 },
33 { 33 {
34 .desc = "read samples using the mmap interface", 34 .desc = "read samples using the mmap interface",
@@ -73,8 +73,8 @@ static struct test {
73 .func = test__perf_evsel__tp_sched_test, 73 .func = test__perf_evsel__tp_sched_test,
74 }, 74 },
75 { 75 {
76 .desc = "Generate and check syscalls:sys_enter_open event fields", 76 .desc = "Generate and check syscalls:sys_enter_openat event fields",
77 .func = test__syscall_open_tp_fields, 77 .func = test__syscall_openat_tp_fields,
78 }, 78 },
79 { 79 {
80 .desc = "struct perf_event_attr setup", 80 .desc = "struct perf_event_attr setup",
@@ -126,7 +126,7 @@ static struct test {
126 .desc = "Test parsing with no sample_id_all bit set", 126 .desc = "Test parsing with no sample_id_all bit set",
127 .func = test__parse_no_sample_id_all, 127 .func = test__parse_no_sample_id_all,
128 }, 128 },
129#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) 129#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)
130#ifdef HAVE_DWARF_UNWIND_SUPPORT 130#ifdef HAVE_DWARF_UNWIND_SUPPORT
131 { 131 {
132 .desc = "Test dwarf unwind", 132 .desc = "Test dwarf unwind",
@@ -219,7 +219,7 @@ static int run_test(struct test *test)
219 wait(&status); 219 wait(&status);
220 220
221 if (WIFEXITED(status)) { 221 if (WIFEXITED(status)) {
222 err = WEXITSTATUS(status); 222 err = (signed char)WEXITSTATUS(status);
223 pr_debug("test child finished with %d\n", err); 223 pr_debug("test child finished with %d\n", err);
224 } else if (WIFSIGNALED(status)) { 224 } else if (WIFSIGNALED(status)) {
225 err = -1; 225 err = -1;
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index f671ec37a7c4..22f8a00446e1 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -248,6 +248,7 @@ static int process_sample_event(struct machine *machine,
248 struct perf_sample sample; 248 struct perf_sample sample;
249 struct thread *thread; 249 struct thread *thread;
250 u8 cpumode; 250 u8 cpumode;
251 int ret;
251 252
252 if (perf_evlist__parse_sample(evlist, event, &sample)) { 253 if (perf_evlist__parse_sample(evlist, event, &sample)) {
253 pr_debug("perf_evlist__parse_sample failed\n"); 254 pr_debug("perf_evlist__parse_sample failed\n");
@@ -262,7 +263,9 @@ static int process_sample_event(struct machine *machine,
262 263
263 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 264 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
264 265
265 return read_object_code(sample.ip, READLEN, cpumode, thread, state); 266 ret = read_object_code(sample.ip, READLEN, cpumode, thread, state);
267 thread__put(thread);
268 return ret;
266} 269}
267 270
268static int process_event(struct machine *machine, struct perf_evlist *evlist, 271static int process_event(struct machine *machine, struct perf_evlist *evlist,
@@ -448,7 +451,7 @@ static int do_test_code_reading(bool try_kcore)
448 } 451 }
449 452
450 ret = perf_event__synthesize_thread_map(NULL, threads, 453 ret = perf_event__synthesize_thread_map(NULL, threads,
451 perf_event__process, machine, false); 454 perf_event__process, machine, false, 500);
452 if (ret < 0) { 455 if (ret < 0) {
453 pr_debug("perf_event__synthesize_thread_map failed\n"); 456 pr_debug("perf_event__synthesize_thread_map failed\n");
454 goto out_err; 457 goto out_err;
@@ -457,13 +460,13 @@ static int do_test_code_reading(bool try_kcore)
457 thread = machine__findnew_thread(machine, pid, pid); 460 thread = machine__findnew_thread(machine, pid, pid);
458 if (!thread) { 461 if (!thread) {
459 pr_debug("machine__findnew_thread failed\n"); 462 pr_debug("machine__findnew_thread failed\n");
460 goto out_err; 463 goto out_put;
461 } 464 }
462 465
463 cpus = cpu_map__new(NULL); 466 cpus = cpu_map__new(NULL);
464 if (!cpus) { 467 if (!cpus) {
465 pr_debug("cpu_map__new failed\n"); 468 pr_debug("cpu_map__new failed\n");
466 goto out_err; 469 goto out_put;
467 } 470 }
468 471
469 while (1) { 472 while (1) {
@@ -472,7 +475,7 @@ static int do_test_code_reading(bool try_kcore)
472 evlist = perf_evlist__new(); 475 evlist = perf_evlist__new();
473 if (!evlist) { 476 if (!evlist) {
474 pr_debug("perf_evlist__new failed\n"); 477 pr_debug("perf_evlist__new failed\n");
475 goto out_err; 478 goto out_put;
476 } 479 }
477 480
478 perf_evlist__set_maps(evlist, cpus, threads); 481 perf_evlist__set_maps(evlist, cpus, threads);
@@ -482,10 +485,10 @@ static int do_test_code_reading(bool try_kcore)
482 else 485 else
483 str = "cycles"; 486 str = "cycles";
484 pr_debug("Parsing event '%s'\n", str); 487 pr_debug("Parsing event '%s'\n", str);
485 ret = parse_events(evlist, str); 488 ret = parse_events(evlist, str, NULL);
486 if (ret < 0) { 489 if (ret < 0) {
487 pr_debug("parse_events failed\n"); 490 pr_debug("parse_events failed\n");
488 goto out_err; 491 goto out_put;
489 } 492 }
490 493
491 perf_evlist__config(evlist, &opts); 494 perf_evlist__config(evlist, &opts);
@@ -506,7 +509,7 @@ static int do_test_code_reading(bool try_kcore)
506 continue; 509 continue;
507 } 510 }
508 pr_debug("perf_evlist__open failed\n"); 511 pr_debug("perf_evlist__open failed\n");
509 goto out_err; 512 goto out_put;
510 } 513 }
511 break; 514 break;
512 } 515 }
@@ -514,7 +517,7 @@ static int do_test_code_reading(bool try_kcore)
514 ret = perf_evlist__mmap(evlist, UINT_MAX, false); 517 ret = perf_evlist__mmap(evlist, UINT_MAX, false);
515 if (ret < 0) { 518 if (ret < 0) {
516 pr_debug("perf_evlist__mmap failed\n"); 519 pr_debug("perf_evlist__mmap failed\n");
517 goto out_err; 520 goto out_put;
518 } 521 }
519 522
520 perf_evlist__enable(evlist); 523 perf_evlist__enable(evlist);
@@ -525,7 +528,7 @@ static int do_test_code_reading(bool try_kcore)
525 528
526 ret = process_events(machine, evlist, &state); 529 ret = process_events(machine, evlist, &state);
527 if (ret < 0) 530 if (ret < 0)
528 goto out_err; 531 goto out_put;
529 532
530 if (!have_vmlinux && !have_kcore && !try_kcore) 533 if (!have_vmlinux && !have_kcore && !try_kcore)
531 err = TEST_CODE_READING_NO_KERNEL_OBJ; 534 err = TEST_CODE_READING_NO_KERNEL_OBJ;
@@ -535,7 +538,10 @@ static int do_test_code_reading(bool try_kcore)
535 err = TEST_CODE_READING_NO_ACCESS; 538 err = TEST_CODE_READING_NO_ACCESS;
536 else 539 else
537 err = TEST_CODE_READING_OK; 540 err = TEST_CODE_READING_OK;
541out_put:
542 thread__put(thread);
538out_err: 543out_err:
544
539 if (evlist) { 545 if (evlist) {
540 perf_evlist__delete(evlist); 546 perf_evlist__delete(evlist);
541 } else { 547 } else {
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index 513e5febbe5a..a218aeaf56a0 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -99,6 +99,17 @@ struct test_data_offset offsets[] = {
99 }, 99 },
100}; 100};
101 101
102/* move it from util/dso.c for compatibility */
103static int dso__data_fd(struct dso *dso, struct machine *machine)
104{
105 int fd = dso__data_get_fd(dso, machine);
106
107 if (fd >= 0)
108 dso__data_put_fd(dso);
109
110 return fd;
111}
112
102int test__dso_data(void) 113int test__dso_data(void)
103{ 114{
104 struct machine machine; 115 struct machine machine;
@@ -155,7 +166,7 @@ int test__dso_data(void)
155 free(buf); 166 free(buf);
156 } 167 }
157 168
158 dso__delete(dso); 169 dso__put(dso);
159 unlink(file); 170 unlink(file);
160 return 0; 171 return 0;
161} 172}
@@ -215,7 +226,7 @@ static void dsos__delete(int cnt)
215 struct dso *dso = dsos[i]; 226 struct dso *dso = dsos[i];
216 227
217 unlink(dso->name); 228 unlink(dso->name);
218 dso__delete(dso); 229 dso__put(dso);
219 } 230 }
220 231
221 free(dsos); 232 free(dsos);
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 0bf06bec68c7..40b36c462427 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -28,7 +28,7 @@ static int init_live_machine(struct machine *machine)
28 pid_t pid = getpid(); 28 pid_t pid = getpid();
29 29
30 return perf_event__synthesize_mmap_events(NULL, &event, pid, pid, 30 return perf_event__synthesize_mmap_events(NULL, &event, pid, pid,
31 mmap_handler, machine, true); 31 mmap_handler, machine, true, 500);
32} 32}
33 33
34#define MAX_STACK 8 34#define MAX_STACK 8
@@ -170,6 +170,7 @@ int test__dwarf_unwind(void)
170 } 170 }
171 171
172 err = krava_1(thread); 172 err = krava_1(thread);
173 thread__put(thread);
173 174
174 out: 175 out:
175 machine__delete_threads(machine); 176 machine__delete_threads(machine);
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index b8d8341b383e..3fa715987a5e 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -23,7 +23,7 @@ static int perf_evsel__roundtrip_cache_name_test(void)
23 for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { 23 for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
24 __perf_evsel__hw_cache_type_op_res_name(type, op, i, 24 __perf_evsel__hw_cache_type_op_res_name(type, op, i,
25 name, sizeof(name)); 25 name, sizeof(name));
26 err = parse_events(evlist, name); 26 err = parse_events(evlist, name, NULL);
27 if (err) 27 if (err)
28 ret = err; 28 ret = err;
29 } 29 }
@@ -71,7 +71,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names)
71 return -ENOMEM; 71 return -ENOMEM;
72 72
73 for (i = 0; i < nr_names; ++i) { 73 for (i = 0; i < nr_names; ++i) {
74 err = parse_events(evlist, names[i]); 74 err = parse_events(evlist, names[i], NULL);
75 if (err) { 75 if (err) {
76 pr_debug("failed to parse event '%s', err %d\n", 76 pr_debug("failed to parse event '%s', err %d\n",
77 names[i], err); 77 names[i], err);
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index a62c09134516..ce80b274b097 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -96,6 +96,7 @@ struct machine *setup_fake_machine(struct machines *machines)
96 goto out; 96 goto out;
97 97
98 thread__set_comm(thread, fake_threads[i].comm, 0); 98 thread__set_comm(thread, fake_threads[i].comm, 0);
99 thread__put(thread);
99 } 100 }
100 101
101 for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) { 102 for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
@@ -120,8 +121,7 @@ struct machine *setup_fake_machine(struct machines *machines)
120 size_t k; 121 size_t k;
121 struct dso *dso; 122 struct dso *dso;
122 123
123 dso = __dsos__findnew(&machine->user_dsos, 124 dso = machine__findnew_dso(machine, fake_symbols[i].dso_name);
124 fake_symbols[i].dso_name);
125 if (dso == NULL) 125 if (dso == NULL)
126 goto out; 126 goto out;
127 127
@@ -134,11 +134,15 @@ struct machine *setup_fake_machine(struct machines *machines)
134 134
135 sym = symbol__new(fsym->start, fsym->length, 135 sym = symbol__new(fsym->start, fsym->length,
136 STB_GLOBAL, fsym->name); 136 STB_GLOBAL, fsym->name);
137 if (sym == NULL) 137 if (sym == NULL) {
138 dso__put(dso);
138 goto out; 139 goto out;
140 }
139 141
140 symbols__insert(&dso->symbols[MAP__FUNCTION], sym); 142 symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
141 } 143 }
144
145 dso__put(dso);
142 } 146 }
143 147
144 return machine; 148 return machine;
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 18619966454c..7d82c8be5e36 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -87,6 +87,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
87 }, 87 },
88 }; 88 };
89 struct hist_entry_iter iter = { 89 struct hist_entry_iter iter = {
90 .evsel = evsel,
91 .sample = &sample,
90 .hide_unresolved = false, 92 .hide_unresolved = false,
91 }; 93 };
92 94
@@ -104,9 +106,11 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
104 &sample) < 0) 106 &sample) < 0)
105 goto out; 107 goto out;
106 108
107 if (hist_entry_iter__add(&iter, &al, evsel, &sample, 109 if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
108 PERF_MAX_STACK_DEPTH, NULL) < 0) 110 NULL) < 0) {
111 addr_location__put(&al);
109 goto out; 112 goto out;
113 }
110 114
111 fake_samples[i].thread = al.thread; 115 fake_samples[i].thread = al.thread;
112 fake_samples[i].map = al.map; 116 fake_samples[i].map = al.map;
@@ -695,7 +699,7 @@ int test__hists_cumulate(void)
695 699
696 TEST_ASSERT_VAL("No memory", evlist); 700 TEST_ASSERT_VAL("No memory", evlist);
697 701
698 err = parse_events(evlist, "cpu-clock"); 702 err = parse_events(evlist, "cpu-clock", NULL);
699 if (err) 703 if (err)
700 goto out; 704 goto out;
701 705
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 59e53db7914c..ce48775e6ada 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -63,6 +63,8 @@ static int add_hist_entries(struct perf_evlist *evlist,
63 }, 63 },
64 }; 64 };
65 struct hist_entry_iter iter = { 65 struct hist_entry_iter iter = {
66 .evsel = evsel,
67 .sample = &sample,
66 .ops = &hist_iter_normal, 68 .ops = &hist_iter_normal,
67 .hide_unresolved = false, 69 .hide_unresolved = false,
68 }; 70 };
@@ -81,9 +83,11 @@ static int add_hist_entries(struct perf_evlist *evlist,
81 &sample) < 0) 83 &sample) < 0)
82 goto out; 84 goto out;
83 85
84 if (hist_entry_iter__add(&iter, &al, evsel, &sample, 86 if (hist_entry_iter__add(&iter, &al,
85 PERF_MAX_STACK_DEPTH, NULL) < 0) 87 PERF_MAX_STACK_DEPTH, NULL) < 0) {
88 addr_location__put(&al);
86 goto out; 89 goto out;
90 }
87 91
88 fake_samples[i].thread = al.thread; 92 fake_samples[i].thread = al.thread;
89 fake_samples[i].map = al.map; 93 fake_samples[i].map = al.map;
@@ -108,10 +112,10 @@ int test__hists_filter(void)
108 112
109 TEST_ASSERT_VAL("No memory", evlist); 113 TEST_ASSERT_VAL("No memory", evlist);
110 114
111 err = parse_events(evlist, "cpu-clock"); 115 err = parse_events(evlist, "cpu-clock", NULL);
112 if (err) 116 if (err)
113 goto out; 117 goto out;
114 err = parse_events(evlist, "task-clock"); 118 err = parse_events(evlist, "task-clock", NULL);
115 if (err) 119 if (err)
116 goto out; 120 goto out;
117 121
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 278ba8344c23..8c102b011424 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -91,8 +91,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
91 91
92 he = __hists__add_entry(hists, &al, NULL, 92 he = __hists__add_entry(hists, &al, NULL,
93 NULL, NULL, 1, 1, 0, true); 93 NULL, NULL, 1, 1, 0, true);
94 if (he == NULL) 94 if (he == NULL) {
95 addr_location__put(&al);
95 goto out; 96 goto out;
97 }
96 98
97 fake_common_samples[k].thread = al.thread; 99 fake_common_samples[k].thread = al.thread;
98 fake_common_samples[k].map = al.map; 100 fake_common_samples[k].map = al.map;
@@ -115,8 +117,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
115 117
116 he = __hists__add_entry(hists, &al, NULL, 118 he = __hists__add_entry(hists, &al, NULL,
117 NULL, NULL, 1, 1, 0, true); 119 NULL, NULL, 1, 1, 0, true);
118 if (he == NULL) 120 if (he == NULL) {
121 addr_location__put(&al);
119 goto out; 122 goto out;
123 }
120 124
121 fake_samples[i][k].thread = al.thread; 125 fake_samples[i][k].thread = al.thread;
122 fake_samples[i][k].map = al.map; 126 fake_samples[i][k].map = al.map;
@@ -282,10 +286,10 @@ int test__hists_link(void)
282 if (evlist == NULL) 286 if (evlist == NULL)
283 return -ENOMEM; 287 return -ENOMEM;
284 288
285 err = parse_events(evlist, "cpu-clock"); 289 err = parse_events(evlist, "cpu-clock", NULL);
286 if (err) 290 if (err)
287 goto out; 291 goto out;
288 err = parse_events(evlist, "task-clock"); 292 err = parse_events(evlist, "task-clock", NULL);
289 if (err) 293 if (err)
290 goto out; 294 goto out;
291 295
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index b52c9faea224..adbebc852cc8 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -57,6 +57,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
57 }, 57 },
58 }; 58 };
59 struct hist_entry_iter iter = { 59 struct hist_entry_iter iter = {
60 .evsel = evsel,
61 .sample = &sample,
60 .ops = &hist_iter_normal, 62 .ops = &hist_iter_normal,
61 .hide_unresolved = false, 63 .hide_unresolved = false,
62 }; 64 };
@@ -70,9 +72,11 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
70 &sample) < 0) 72 &sample) < 0)
71 goto out; 73 goto out;
72 74
73 if (hist_entry_iter__add(&iter, &al, evsel, &sample, 75 if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
74 PERF_MAX_STACK_DEPTH, NULL) < 0) 76 NULL) < 0) {
77 addr_location__put(&al);
75 goto out; 78 goto out;
79 }
76 80
77 fake_samples[i].thread = al.thread; 81 fake_samples[i].thread = al.thread;
78 fake_samples[i].map = al.map; 82 fake_samples[i].map = al.map;
@@ -590,7 +594,7 @@ int test__hists_output(void)
590 594
591 TEST_ASSERT_VAL("No memory", evlist); 595 TEST_ASSERT_VAL("No memory", evlist);
592 596
593 err = parse_events(evlist, "cpu-clock"); 597 err = parse_events(evlist, "cpu-clock", NULL);
594 if (err) 598 if (err)
595 goto out; 599 goto out;
596 600
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 7a5ab7b0b8f6..5b171d1e338b 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -78,8 +78,8 @@ int test__keep_tracking(void)
78 78
79 perf_evlist__set_maps(evlist, cpus, threads); 79 perf_evlist__set_maps(evlist, cpus, threads);
80 80
81 CHECK__(parse_events(evlist, "dummy:u")); 81 CHECK__(parse_events(evlist, "dummy:u", NULL));
82 CHECK__(parse_events(evlist, "cycles:u")); 82 CHECK__(parse_events(evlist, "cycles:u", NULL));
83 83
84 perf_evlist__config(evlist, &opts); 84 perf_evlist__config(evlist, &opts);
85 85
diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
index e8d7cbb9320c..08c433b4bf4f 100644
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -34,9 +34,21 @@ static int test(const char *path, bool alloc_name, bool alloc_ext,
34 return 0; 34 return 0;
35} 35}
36 36
37static int test_is_kernel_module(const char *path, int cpumode, bool expect)
38{
39 TEST_ASSERT_VAL("is_kernel_module",
40 (!!is_kernel_module(path, cpumode)) == (!!expect));
41 pr_debug("%s (cpumode: %d) - is_kernel_module: %s\n",
42 path, cpumode, expect ? "true" : "false");
43 return 0;
44}
45
37#define T(path, an, ae, k, c, n, e) \ 46#define T(path, an, ae, k, c, n, e) \
38 TEST_ASSERT_VAL("failed", !test(path, an, ae, k, c, n, e)) 47 TEST_ASSERT_VAL("failed", !test(path, an, ae, k, c, n, e))
39 48
49#define M(path, c, e) \
50 TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
51
40int test__kmod_path__parse(void) 52int test__kmod_path__parse(void)
41{ 53{
42 /* path alloc_name alloc_ext kmod comp name ext */ 54 /* path alloc_name alloc_ext kmod comp name ext */
@@ -44,30 +56,90 @@ int test__kmod_path__parse(void)
44 T("/xxxx/xxxx/x-x.ko", false , true , true, false, NULL , NULL); 56 T("/xxxx/xxxx/x-x.ko", false , true , true, false, NULL , NULL);
45 T("/xxxx/xxxx/x-x.ko", true , false , true, false, "[x_x]", NULL); 57 T("/xxxx/xxxx/x-x.ko", true , false , true, false, "[x_x]", NULL);
46 T("/xxxx/xxxx/x-x.ko", false , false , true, false, NULL , NULL); 58 T("/xxxx/xxxx/x-x.ko", false , false , true, false, NULL , NULL);
59 M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
60 M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_KERNEL, true);
61 M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_USER, false);
47 62
48 /* path alloc_name alloc_ext kmod comp name ext */ 63 /* path alloc_name alloc_ext kmod comp name ext */
49 T("/xxxx/xxxx/x.ko.gz", true , true , true, true, "[x]", "gz"); 64 T("/xxxx/xxxx/x.ko.gz", true , true , true, true, "[x]", "gz");
50 T("/xxxx/xxxx/x.ko.gz", false , true , true, true, NULL , "gz"); 65 T("/xxxx/xxxx/x.ko.gz", false , true , true, true, NULL , "gz");
51 T("/xxxx/xxxx/x.ko.gz", true , false , true, true, "[x]", NULL); 66 T("/xxxx/xxxx/x.ko.gz", true , false , true, true, "[x]", NULL);
52 T("/xxxx/xxxx/x.ko.gz", false , false , true, true, NULL , NULL); 67 T("/xxxx/xxxx/x.ko.gz", false , false , true, true, NULL , NULL);
68 M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
69 M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
70 M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_USER, false);
53 71
54 /* path alloc_name alloc_ext kmod comp name ext */ 72 /* path alloc_name alloc_ext kmod comp name ext */
55 T("/xxxx/xxxx/x.gz", true , true , false, true, "x.gz" ,"gz"); 73 T("/xxxx/xxxx/x.gz", true , true , false, true, "x.gz" ,"gz");
56 T("/xxxx/xxxx/x.gz", false , true , false, true, NULL ,"gz"); 74 T("/xxxx/xxxx/x.gz", false , true , false, true, NULL ,"gz");
57 T("/xxxx/xxxx/x.gz", true , false , false, true, "x.gz" , NULL); 75 T("/xxxx/xxxx/x.gz", true , false , false, true, "x.gz" , NULL);
58 T("/xxxx/xxxx/x.gz", false , false , false, true, NULL , NULL); 76 T("/xxxx/xxxx/x.gz", false , false , false, true, NULL , NULL);
77 M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
78 M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_KERNEL, false);
79 M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_USER, false);
59 80
60 /* path alloc_name alloc_ext kmod comp name ext */ 81 /* path alloc_name alloc_ext kmod comp name ext */
61 T("x.gz", true , true , false, true, "x.gz", "gz"); 82 T("x.gz", true , true , false, true, "x.gz", "gz");
62 T("x.gz", false , true , false, true, NULL , "gz"); 83 T("x.gz", false , true , false, true, NULL , "gz");
63 T("x.gz", true , false , false, true, "x.gz", NULL); 84 T("x.gz", true , false , false, true, "x.gz", NULL);
64 T("x.gz", false , false , false, true, NULL , NULL); 85 T("x.gz", false , false , false, true, NULL , NULL);
86 M("x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
87 M("x.gz", PERF_RECORD_MISC_KERNEL, false);
88 M("x.gz", PERF_RECORD_MISC_USER, false);
65 89
66 /* path alloc_name alloc_ext kmod comp name ext */ 90 /* path alloc_name alloc_ext kmod comp name ext */
67 T("x.ko.gz", true , true , true, true, "[x]", "gz"); 91 T("x.ko.gz", true , true , true, true, "[x]", "gz");
68 T("x.ko.gz", false , true , true, true, NULL , "gz"); 92 T("x.ko.gz", false , true , true, true, NULL , "gz");
69 T("x.ko.gz", true , false , true, true, "[x]", NULL); 93 T("x.ko.gz", true , false , true, true, "[x]", NULL);
70 T("x.ko.gz", false , false , true, true, NULL , NULL); 94 T("x.ko.gz", false , false , true, true, NULL , NULL);
95 M("x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
96 M("x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
97 M("x.ko.gz", PERF_RECORD_MISC_USER, false);
98
99 /* path alloc_name alloc_ext kmod comp name ext */
100 T("[test_module]", true , true , true, false, "[test_module]", NULL);
101 T("[test_module]", false , true , true, false, NULL , NULL);
102 T("[test_module]", true , false , true, false, "[test_module]", NULL);
103 T("[test_module]", false , false , true, false, NULL , NULL);
104 M("[test_module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
105 M("[test_module]", PERF_RECORD_MISC_KERNEL, true);
106 M("[test_module]", PERF_RECORD_MISC_USER, false);
107
108 /* path alloc_name alloc_ext kmod comp name ext */
109 T("[test.module]", true , true , true, false, "[test.module]", NULL);
110 T("[test.module]", false , true , true, false, NULL , NULL);
111 T("[test.module]", true , false , true, false, "[test.module]", NULL);
112 T("[test.module]", false , false , true, false, NULL , NULL);
113 M("[test.module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
114 M("[test.module]", PERF_RECORD_MISC_KERNEL, true);
115 M("[test.module]", PERF_RECORD_MISC_USER, false);
116
117 /* path alloc_name alloc_ext kmod comp name ext */
118 T("[vdso]", true , true , false, false, "[vdso]", NULL);
119 T("[vdso]", false , true , false, false, NULL , NULL);
120 T("[vdso]", true , false , false, false, "[vdso]", NULL);
121 T("[vdso]", false , false , false, false, NULL , NULL);
122 M("[vdso]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
123 M("[vdso]", PERF_RECORD_MISC_KERNEL, false);
124 M("[vdso]", PERF_RECORD_MISC_USER, false);
125
126 /* path alloc_name alloc_ext kmod comp name ext */
127 T("[vsyscall]", true , true , false, false, "[vsyscall]", NULL);
128 T("[vsyscall]", false , true , false, false, NULL , NULL);
129 T("[vsyscall]", true , false , false, false, "[vsyscall]", NULL);
130 T("[vsyscall]", false , false , false, false, NULL , NULL);
131 M("[vsyscall]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
132 M("[vsyscall]", PERF_RECORD_MISC_KERNEL, false);
133 M("[vsyscall]", PERF_RECORD_MISC_USER, false);
134
135 /* path alloc_name alloc_ext kmod comp name ext */
136 T("[kernel.kallsyms]", true , true , false, false, "[kernel.kallsyms]", NULL);
137 T("[kernel.kallsyms]", false , true , false, false, NULL , NULL);
138 T("[kernel.kallsyms]", true , false , false, false, "[kernel.kallsyms]", NULL);
139 T("[kernel.kallsyms]", false , false , false, false, NULL , NULL);
140 M("[kernel.kallsyms]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
141 M("[kernel.kallsyms]", PERF_RECORD_MISC_KERNEL, false);
142 M("[kernel.kallsyms]", PERF_RECORD_MISC_USER, false);
71 143
72 return 0; 144 return 0;
73} 145}
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index bff85324f799..65280d28662e 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -32,6 +32,7 @@ make_no_backtrace := NO_BACKTRACE=1
32make_no_libnuma := NO_LIBNUMA=1 32make_no_libnuma := NO_LIBNUMA=1
33make_no_libaudit := NO_LIBAUDIT=1 33make_no_libaudit := NO_LIBAUDIT=1
34make_no_libbionic := NO_LIBBIONIC=1 34make_no_libbionic := NO_LIBBIONIC=1
35make_no_auxtrace := NO_AUXTRACE=1
35make_tags := tags 36make_tags := tags
36make_cscope := cscope 37make_cscope := cscope
37make_help := help 38make_help := help
@@ -52,7 +53,7 @@ make_static := LDFLAGS=-static
52make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 53make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
53make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 54make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
54make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 55make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
55make_minimal += NO_LIBDW_DWARF_UNWIND=1 56make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1
56 57
57# $(run) contains all available tests 58# $(run) contains all available tests
58run := make_pure 59run := make_pure
@@ -74,6 +75,7 @@ run += make_no_backtrace
74run += make_no_libnuma 75run += make_no_libnuma
75run += make_no_libaudit 76run += make_no_libaudit
76run += make_no_libbionic 77run += make_no_libbionic
78run += make_no_auxtrace
77run += make_help 79run += make_help
78run += make_doc 80run += make_doc
79run += make_perf_o 81run += make_perf_o
@@ -223,7 +225,19 @@ tarpkg:
223 echo "- $@: $$cmd" && echo $$cmd > $@ && \ 225 echo "- $@: $$cmd" && echo $$cmd > $@ && \
224 ( eval $$cmd ) >> $@ 2>&1 226 ( eval $$cmd ) >> $@ 2>&1
225 227
226all: $(run) $(run_O) tarpkg 228make_kernelsrc:
229 @echo " - make -C <kernelsrc> tools/perf"
230 $(call clean); \
231 (make -C ../.. tools/perf) > $@ 2>&1 && \
232 test -x perf && rm -f $@ || (cat $@ ; false)
233
234make_kernelsrc_tools:
235 @echo " - make -C <kernelsrc>/tools perf"
236 $(call clean); \
237 (make -C ../../tools perf) > $@ 2>&1 && \
238 test -x perf && rm -f $@ || (cat $@ ; false)
239
240all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
227 @echo OK 241 @echo OK
228 242
229out: $(run_O) 243out: $(run_O)
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 9b9622a33932..5855cf471210 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -23,10 +23,8 @@ int test__basic_mmap(void)
23 struct cpu_map *cpus; 23 struct cpu_map *cpus;
24 struct perf_evlist *evlist; 24 struct perf_evlist *evlist;
25 cpu_set_t cpu_set; 25 cpu_set_t cpu_set;
26 const char *syscall_names[] = { "getsid", "getppid", "getpgrp", 26 const char *syscall_names[] = { "getsid", "getppid", "getpgid", };
27 "getpgid", }; 27 pid_t (*syscalls[])(void) = { (void *)getsid, getppid, (void*)getpgid };
28 pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
29 (void*)getpgid };
30#define nsyscalls ARRAY_SIZE(syscall_names) 28#define nsyscalls ARRAY_SIZE(syscall_names)
31 unsigned int nr_events[nsyscalls], 29 unsigned int nr_events[nsyscalls],
32 expected_nr_events[nsyscalls], i, j; 30 expected_nr_events[nsyscalls], i, j;
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 2113f1c8611f..7f48efa7e295 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -129,7 +129,7 @@ static int synth_all(struct machine *machine)
129{ 129{
130 return perf_event__synthesize_threads(NULL, 130 return perf_event__synthesize_threads(NULL,
131 perf_event__process, 131 perf_event__process,
132 machine, 0); 132 machine, 0, 500);
133} 133}
134 134
135static int synth_process(struct machine *machine) 135static int synth_process(struct machine *machine)
@@ -141,7 +141,7 @@ static int synth_process(struct machine *machine)
141 141
142 err = perf_event__synthesize_thread_map(NULL, map, 142 err = perf_event__synthesize_thread_map(NULL, map,
143 perf_event__process, 143 perf_event__process,
144 machine, 0); 144 machine, 0, 500);
145 145
146 thread_map__delete(map); 146 thread_map__delete(map);
147 return err; 147 return err;
@@ -191,6 +191,8 @@ static int mmap_events(synth_cb synth)
191 PERF_RECORD_MISC_USER, MAP__FUNCTION, 191 PERF_RECORD_MISC_USER, MAP__FUNCTION,
192 (unsigned long) (td->map + 1), &al); 192 (unsigned long) (td->map + 1), &al);
193 193
194 thread__put(thread);
195
194 if (!al.map) { 196 if (!al.map) {
195 pr_debug("failed, couldn't find map\n"); 197 pr_debug("failed, couldn't find map\n");
196 err = -1; 198 err = -1;
diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index 3ec885c48f8f..9a7a116e09b8 100644
--- a/tools/perf/tests/open-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -3,13 +3,14 @@
3#include "thread_map.h" 3#include "thread_map.h"
4#include "cpumap.h" 4#include "cpumap.h"
5#include "debug.h" 5#include "debug.h"
6#include "stat.h"
6 7
7int test__open_syscall_event_on_all_cpus(void) 8int test__openat_syscall_event_on_all_cpus(void)
8{ 9{
9 int err = -1, fd, cpu; 10 int err = -1, fd, cpu;
10 struct cpu_map *cpus; 11 struct cpu_map *cpus;
11 struct perf_evsel *evsel; 12 struct perf_evsel *evsel;
12 unsigned int nr_open_calls = 111, i; 13 unsigned int nr_openat_calls = 111, i;
13 cpu_set_t cpu_set; 14 cpu_set_t cpu_set;
14 struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX); 15 struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
15 char sbuf[STRERR_BUFSIZE]; 16 char sbuf[STRERR_BUFSIZE];
@@ -27,7 +28,7 @@ int test__open_syscall_event_on_all_cpus(void)
27 28
28 CPU_ZERO(&cpu_set); 29 CPU_ZERO(&cpu_set);
29 30
30 evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); 31 evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
31 if (evsel == NULL) { 32 if (evsel == NULL) {
32 if (tracefs_configured()) 33 if (tracefs_configured())
33 pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); 34 pr_debug("is tracefs mounted on /sys/kernel/tracing?\n");
@@ -46,7 +47,7 @@ int test__open_syscall_event_on_all_cpus(void)
46 } 47 }
47 48
48 for (cpu = 0; cpu < cpus->nr; ++cpu) { 49 for (cpu = 0; cpu < cpus->nr; ++cpu) {
49 unsigned int ncalls = nr_open_calls + cpu; 50 unsigned int ncalls = nr_openat_calls + cpu;
50 /* 51 /*
51 * XXX eventually lift this restriction in a way that 52 * XXX eventually lift this restriction in a way that
52 * keeps perf building on older glibc installations 53 * keeps perf building on older glibc installations
@@ -66,7 +67,7 @@ int test__open_syscall_event_on_all_cpus(void)
66 goto out_close_fd; 67 goto out_close_fd;
67 } 68 }
68 for (i = 0; i < ncalls; ++i) { 69 for (i = 0; i < ncalls; ++i) {
69 fd = open("/etc/passwd", O_RDONLY); 70 fd = openat(0, "/etc/passwd", O_RDONLY);
70 close(fd); 71 close(fd);
71 } 72 }
72 CPU_CLR(cpus->map[cpu], &cpu_set); 73 CPU_CLR(cpus->map[cpu], &cpu_set);
@@ -96,7 +97,7 @@ int test__open_syscall_event_on_all_cpus(void)
96 break; 97 break;
97 } 98 }
98 99
99 expected = nr_open_calls + cpu; 100 expected = nr_openat_calls + cpu;
100 if (evsel->counts->cpu[cpu].val != expected) { 101 if (evsel->counts->cpu[cpu].val != expected) {
101 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", 102 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
102 expected, cpus->map[cpu], evsel->counts->cpu[cpu].val); 103 expected, cpus->map[cpu], evsel->counts->cpu[cpu].val);
diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 127dcae0b760..6245221479d7 100644
--- a/tools/perf/tests/open-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -5,7 +5,7 @@
5#include "tests.h" 5#include "tests.h"
6#include "debug.h" 6#include "debug.h"
7 7
8int test__syscall_open_tp_fields(void) 8int test__syscall_openat_tp_fields(void)
9{ 9{
10 struct record_opts opts = { 10 struct record_opts opts = {
11 .target = { 11 .target = {
@@ -29,7 +29,7 @@ int test__syscall_open_tp_fields(void)
29 goto out; 29 goto out;
30 } 30 }
31 31
32 evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); 32 evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
33 if (evsel == NULL) { 33 if (evsel == NULL) {
34 pr_debug("%s: perf_evsel__newtp\n", __func__); 34 pr_debug("%s: perf_evsel__newtp\n", __func__);
35 goto out_delete_evlist; 35 goto out_delete_evlist;
@@ -66,7 +66,7 @@ int test__syscall_open_tp_fields(void)
66 /* 66 /*
67 * Generate the event: 67 * Generate the event:
68 */ 68 */
69 open(filename, flags); 69 openat(AT_FDCWD, filename, flags);
70 70
71 while (1) { 71 while (1) {
72 int before = nr_events; 72 int before = nr_events;
diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/openat-syscall.c
index 07aa319bf334..9f9491bb8e48 100644
--- a/tools/perf/tests/open-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -3,11 +3,11 @@
3#include "debug.h" 3#include "debug.h"
4#include "tests.h" 4#include "tests.h"
5 5
6int test__open_syscall_event(void) 6int test__openat_syscall_event(void)
7{ 7{
8 int err = -1, fd; 8 int err = -1, fd;
9 struct perf_evsel *evsel; 9 struct perf_evsel *evsel;
10 unsigned int nr_open_calls = 111, i; 10 unsigned int nr_openat_calls = 111, i;
11 struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX); 11 struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
12 char sbuf[STRERR_BUFSIZE]; 12 char sbuf[STRERR_BUFSIZE];
13 13
@@ -16,7 +16,7 @@ int test__open_syscall_event(void)
16 return -1; 16 return -1;
17 } 17 }
18 18
19 evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); 19 evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
20 if (evsel == NULL) { 20 if (evsel == NULL) {
21 if (tracefs_configured()) 21 if (tracefs_configured())
22 pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); 22 pr_debug("is tracefs mounted on /sys/kernel/tracing?\n");
@@ -34,8 +34,8 @@ int test__open_syscall_event(void)
34 goto out_evsel_delete; 34 goto out_evsel_delete;
35 } 35 }
36 36
37 for (i = 0; i < nr_open_calls; ++i) { 37 for (i = 0; i < nr_openat_calls; ++i) {
38 fd = open("/etc/passwd", O_RDONLY); 38 fd = openat(0, "/etc/passwd", O_RDONLY);
39 close(fd); 39 close(fd);
40 } 40 }
41 41
@@ -44,9 +44,9 @@ int test__open_syscall_event(void)
44 goto out_close_fd; 44 goto out_close_fd;
45 } 45 }
46 46
47 if (evsel->counts->cpu[0].val != nr_open_calls) { 47 if (evsel->counts->cpu[0].val != nr_openat_calls) {
48 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n", 48 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
49 nr_open_calls, evsel->counts->cpu[0].val); 49 nr_openat_calls, evsel->counts->cpu[0].val);
50 goto out_close_fd; 50 goto out_close_fd;
51 } 51 }
52 52
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 3de744961739..d76963f7ad3d 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -427,7 +427,7 @@ static int test__checkevent_list(struct perf_evlist *evlist)
427 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); 427 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
428 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 428 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
429 429
430 /* syscalls:sys_enter_open:k */ 430 /* syscalls:sys_enter_openat:k */
431 evsel = perf_evsel__next(evsel); 431 evsel = perf_evsel__next(evsel);
432 TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); 432 TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
433 TEST_ASSERT_VAL("wrong sample_type", 433 TEST_ASSERT_VAL("wrong sample_type",
@@ -665,7 +665,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
665 TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); 665 TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
666 TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups); 666 TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
667 667
668 /* group1 syscalls:sys_enter_open:H */ 668 /* group1 syscalls:sys_enter_openat:H */
669 evsel = leader = perf_evlist__first(evlist); 669 evsel = leader = perf_evlist__first(evlist);
670 TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); 670 TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
671 TEST_ASSERT_VAL("wrong sample_type", 671 TEST_ASSERT_VAL("wrong sample_type",
@@ -1293,7 +1293,7 @@ struct evlist_test {
1293 1293
1294static struct evlist_test test__events[] = { 1294static struct evlist_test test__events[] = {
1295 { 1295 {
1296 .name = "syscalls:sys_enter_open", 1296 .name = "syscalls:sys_enter_openat",
1297 .check = test__checkevent_tracepoint, 1297 .check = test__checkevent_tracepoint,
1298 .id = 0, 1298 .id = 0,
1299 }, 1299 },
@@ -1353,7 +1353,7 @@ static struct evlist_test test__events[] = {
1353 .id = 11, 1353 .id = 11,
1354 }, 1354 },
1355 { 1355 {
1356 .name = "syscalls:sys_enter_open:k", 1356 .name = "syscalls:sys_enter_openat:k",
1357 .check = test__checkevent_tracepoint_modifier, 1357 .check = test__checkevent_tracepoint_modifier,
1358 .id = 12, 1358 .id = 12,
1359 }, 1359 },
@@ -1408,7 +1408,7 @@ static struct evlist_test test__events[] = {
1408 .id = 22, 1408 .id = 22,
1409 }, 1409 },
1410 { 1410 {
1411 .name = "r1,syscalls:sys_enter_open:k,1:1:hp", 1411 .name = "r1,syscalls:sys_enter_openat:k,1:1:hp",
1412 .check = test__checkevent_list, 1412 .check = test__checkevent_list,
1413 .id = 23, 1413 .id = 23,
1414 }, 1414 },
@@ -1443,7 +1443,7 @@ static struct evlist_test test__events[] = {
1443 .id = 29, 1443 .id = 29,
1444 }, 1444 },
1445 { 1445 {
1446 .name = "group1{syscalls:sys_enter_open:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u", 1446 .name = "group1{syscalls:sys_enter_openat:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
1447 .check = test__group3, 1447 .check = test__group3,
1448 .id = 30, 1448 .id = 30,
1449 }, 1449 },
@@ -1571,7 +1571,7 @@ static int test_event(struct evlist_test *e)
1571 if (evlist == NULL) 1571 if (evlist == NULL)
1572 return -ENOMEM; 1572 return -ENOMEM;
1573 1573
1574 ret = parse_events(evlist, e->name); 1574 ret = parse_events(evlist, e->name, NULL);
1575 if (ret) { 1575 if (ret) {
1576 pr_debug("failed to parse event '%s', err %d\n", 1576 pr_debug("failed to parse event '%s', err %d\n",
1577 e->name, ret); 1577 e->name, ret);
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index f238442b238a..5f49484f1abc 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -68,7 +68,7 @@ int test__perf_time_to_tsc(void)
68 68
69 perf_evlist__set_maps(evlist, cpus, threads); 69 perf_evlist__set_maps(evlist, cpus, threads);
70 70
71 CHECK__(parse_events(evlist, "cycles:u")); 71 CHECK__(parse_events(evlist, "cycles:u", NULL));
72 72
73 perf_evlist__config(evlist, &opts); 73 perf_evlist__config(evlist, &opts);
74 74
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index eeb68bb1972d..faa04e9d5d5f 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -152,7 +152,8 @@ int test__pmu(void)
152 if (ret) 152 if (ret)
153 break; 153 break;
154 154
155 ret = perf_pmu__config_terms(&formats, &attr, terms, false); 155 ret = perf_pmu__config_terms(&formats, &attr, terms,
156 false, NULL);
156 if (ret) 157 if (ret)
157 break; 158 break;
158 159
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index cc68648c7c55..0d31403ea593 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -347,7 +347,7 @@ int test__switch_tracking(void)
347 perf_evlist__set_maps(evlist, cpus, threads); 347 perf_evlist__set_maps(evlist, cpus, threads);
348 348
349 /* First event */ 349 /* First event */
350 err = parse_events(evlist, "cpu-clock:u"); 350 err = parse_events(evlist, "cpu-clock:u", NULL);
351 if (err) { 351 if (err) {
352 pr_debug("Failed to parse event dummy:u\n"); 352 pr_debug("Failed to parse event dummy:u\n");
353 goto out_err; 353 goto out_err;
@@ -356,7 +356,7 @@ int test__switch_tracking(void)
356 cpu_clocks_evsel = perf_evlist__last(evlist); 356 cpu_clocks_evsel = perf_evlist__last(evlist);
357 357
358 /* Second event */ 358 /* Second event */
359 err = parse_events(evlist, "cycles:u"); 359 err = parse_events(evlist, "cycles:u", NULL);
360 if (err) { 360 if (err) {
361 pr_debug("Failed to parse event cycles:u\n"); 361 pr_debug("Failed to parse event cycles:u\n");
362 goto out_err; 362 goto out_err;
@@ -371,7 +371,7 @@ int test__switch_tracking(void)
371 goto out; 371 goto out;
372 } 372 }
373 373
374 err = parse_events(evlist, sched_switch); 374 err = parse_events(evlist, sched_switch, NULL);
375 if (err) { 375 if (err) {
376 pr_debug("Failed to parse event %s\n", sched_switch); 376 pr_debug("Failed to parse event %s\n", sched_switch);
377 goto out_err; 377 goto out_err;
@@ -401,7 +401,7 @@ int test__switch_tracking(void)
401 perf_evsel__set_sample_bit(cycles_evsel, TIME); 401 perf_evsel__set_sample_bit(cycles_evsel, TIME);
402 402
403 /* Fourth event */ 403 /* Fourth event */
404 err = parse_events(evlist, "dummy:u"); 404 err = parse_events(evlist, "dummy:u", NULL);
405 if (err) { 405 if (err) {
406 pr_debug("Failed to parse event dummy:u\n"); 406 pr_debug("Failed to parse event dummy:u\n");
407 goto out_err; 407 goto out_err;
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 52758a33f64c..8e5038b48ba8 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -9,6 +9,15 @@ do { \
9 } \ 9 } \
10} while (0) 10} while (0)
11 11
12#define TEST_ASSERT_EQUAL(text, val, expected) \
13do { \
14 if (val != expected) { \
15 pr_debug("FAILED %s:%d %s (%d != %d)\n", \
16 __FILE__, __LINE__, text, val, expected); \
17 return -1; \
18 } \
19} while (0)
20
12enum { 21enum {
13 TEST_OK = 0, 22 TEST_OK = 0,
14 TEST_FAIL = -1, 23 TEST_FAIL = -1,
@@ -17,14 +26,14 @@ enum {
17 26
18/* Tests */ 27/* Tests */
19int test__vmlinux_matches_kallsyms(void); 28int test__vmlinux_matches_kallsyms(void);
20int test__open_syscall_event(void); 29int test__openat_syscall_event(void);
21int test__open_syscall_event_on_all_cpus(void); 30int test__openat_syscall_event_on_all_cpus(void);
22int test__basic_mmap(void); 31int test__basic_mmap(void);
23int test__PERF_RECORD(void); 32int test__PERF_RECORD(void);
24int test__rdpmc(void); 33int test__rdpmc(void);
25int test__perf_evsel__roundtrip_name_test(void); 34int test__perf_evsel__roundtrip_name_test(void);
26int test__perf_evsel__tp_sched_test(void); 35int test__perf_evsel__tp_sched_test(void);
27int test__syscall_open_tp_fields(void); 36int test__syscall_openat_tp_fields(void);
28int test__pmu(void); 37int test__pmu(void);
29int test__attr(void); 38int test__attr(void);
30int test__dso_data(void); 39int test__dso_data(void);
@@ -53,7 +62,7 @@ int test__fdarray__filter(void);
53int test__fdarray__add(void); 62int test__fdarray__add(void);
54int test__kmod_path__parse(void); 63int test__kmod_path__parse(void);
55 64
56#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) 65#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)
57#ifdef HAVE_DWARF_UNWIND_SUPPORT 66#ifdef HAVE_DWARF_UNWIND_SUPPORT
58struct thread; 67struct thread;
59struct perf_sample; 68struct perf_sample;
diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c
index b028499dd3cf..01fabb19d746 100644
--- a/tools/perf/tests/thread-mg-share.c
+++ b/tools/perf/tests/thread-mg-share.c
@@ -43,7 +43,7 @@ int test__thread_mg_share(void)
43 leader && t1 && t2 && t3 && other); 43 leader && t1 && t2 && t3 && other);
44 44
45 mg = leader->mg; 45 mg = leader->mg;
46 TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 4); 46 TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 4);
47 47
48 /* test the map groups pointer is shared */ 48 /* test the map groups pointer is shared */
49 TEST_ASSERT_VAL("map groups don't match", mg == t1->mg); 49 TEST_ASSERT_VAL("map groups don't match", mg == t1->mg);
@@ -58,33 +58,40 @@ int test__thread_mg_share(void)
58 other_leader = machine__find_thread(machine, 4, 4); 58 other_leader = machine__find_thread(machine, 4, 4);
59 TEST_ASSERT_VAL("failed to find other leader", other_leader); 59 TEST_ASSERT_VAL("failed to find other leader", other_leader);
60 60
61 /*
62 * Ok, now that all the rbtree related operations were done,
63 * lets remove all of them from there so that we can do the
64 * refcounting tests.
65 */
66 machine__remove_thread(machine, leader);
67 machine__remove_thread(machine, t1);
68 machine__remove_thread(machine, t2);
69 machine__remove_thread(machine, t3);
70 machine__remove_thread(machine, other);
71 machine__remove_thread(machine, other_leader);
72
61 other_mg = other->mg; 73 other_mg = other->mg;
62 TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 2); 74 TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 2);
63 75
64 TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg); 76 TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg);
65 77
66 /* release thread group */ 78 /* release thread group */
67 thread__delete(leader); 79 thread__put(leader);
68 TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 3); 80 TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 3);
69 81
70 thread__delete(t1); 82 thread__put(t1);
71 TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 2); 83 TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 2);
72 84
73 thread__delete(t2); 85 thread__put(t2);
74 TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 1); 86 TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 1);
75 87
76 thread__delete(t3); 88 thread__put(t3);
77 89
78 /* release other group */ 90 /* release other group */
79 thread__delete(other_leader); 91 thread__put(other_leader);
80 TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 1); 92 TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 1);
81 93
82 thread__delete(other); 94 thread__put(other);
83
84 /*
85 * Cannot call machine__delete_threads(machine) now,
86 * because we've already released all the threads.
87 */
88 95
89 machines__exit(&machines); 96 machines__exit(&machines);
90 return 0; 97 return 0;
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 3d9088003a5b..b34c5fc829ae 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -23,9 +23,10 @@ int test__vmlinux_matches_kallsyms(void)
23 int err = -1; 23 int err = -1;
24 struct rb_node *nd; 24 struct rb_node *nd;
25 struct symbol *sym; 25 struct symbol *sym;
26 struct map *kallsyms_map, *vmlinux_map; 26 struct map *kallsyms_map, *vmlinux_map, *map;
27 struct machine kallsyms, vmlinux; 27 struct machine kallsyms, vmlinux;
28 enum map_type type = MAP__FUNCTION; 28 enum map_type type = MAP__FUNCTION;
29 struct maps *maps = &vmlinux.kmaps.maps[type];
29 u64 mem_start, mem_end; 30 u64 mem_start, mem_end;
30 31
31 /* 32 /*
@@ -184,8 +185,8 @@ detour:
184 185
185 pr_info("Maps only in vmlinux:\n"); 186 pr_info("Maps only in vmlinux:\n");
186 187
187 for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) { 188 for (map = maps__first(maps); map; map = map__next(map)) {
188 struct map *pos = rb_entry(nd, struct map, rb_node), *pair; 189 struct map *
189 /* 190 /*
190 * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while 191 * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while
191 * the kernel will have the path for the vmlinux file being used, 192 * the kernel will have the path for the vmlinux file being used,
@@ -193,22 +194,22 @@ detour:
193 * both cases. 194 * both cases.
194 */ 195 */
195 pair = map_groups__find_by_name(&kallsyms.kmaps, type, 196 pair = map_groups__find_by_name(&kallsyms.kmaps, type,
196 (pos->dso->kernel ? 197 (map->dso->kernel ?
197 pos->dso->short_name : 198 map->dso->short_name :
198 pos->dso->name)); 199 map->dso->name));
199 if (pair) 200 if (pair)
200 pair->priv = 1; 201 pair->priv = 1;
201 else 202 else
202 map__fprintf(pos, stderr); 203 map__fprintf(map, stderr);
203 } 204 }
204 205
205 pr_info("Maps in vmlinux with a different name in kallsyms:\n"); 206 pr_info("Maps in vmlinux with a different name in kallsyms:\n");
206 207
207 for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) { 208 for (map = maps__first(maps); map; map = map__next(map)) {
208 struct map *pos = rb_entry(nd, struct map, rb_node), *pair; 209 struct map *pair;
209 210
210 mem_start = vmlinux_map->unmap_ip(vmlinux_map, pos->start); 211 mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start);
211 mem_end = vmlinux_map->unmap_ip(vmlinux_map, pos->end); 212 mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end);
212 213
213 pair = map_groups__find(&kallsyms.kmaps, type, mem_start); 214 pair = map_groups__find(&kallsyms.kmaps, type, mem_start);
214 if (pair == NULL || pair->priv) 215 if (pair == NULL || pair->priv)
@@ -217,7 +218,7 @@ detour:
217 if (pair->start == mem_start) { 218 if (pair->start == mem_start) {
218 pair->priv = 1; 219 pair->priv = 1;
219 pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as", 220 pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
220 pos->start, pos->end, pos->pgoff, pos->dso->name); 221 map->start, map->end, map->pgoff, map->dso->name);
221 if (mem_end != pair->end) 222 if (mem_end != pair->end)
222 pr_info(":\n*%" PRIx64 "-%" PRIx64 " %" PRIx64, 223 pr_info(":\n*%" PRIx64 "-%" PRIx64 " %" PRIx64,
223 pair->start, pair->end, pair->pgoff); 224 pair->start, pair->end, pair->pgoff);
@@ -228,12 +229,11 @@ detour:
228 229
229 pr_info("Maps only in kallsyms:\n"); 230 pr_info("Maps only in kallsyms:\n");
230 231
231 for (nd = rb_first(&kallsyms.kmaps.maps[type]); 232 maps = &kallsyms.kmaps.maps[type];
232 nd; nd = rb_next(nd)) {
233 struct map *pos = rb_entry(nd, struct map, rb_node);
234 233
235 if (!pos->priv) 234 for (map = maps__first(maps); map; map = map__next(map)) {
236 map__fprintf(pos, stderr); 235 if (!map->priv)
236 map__fprintf(map, stderr);
237 } 237 }
238out: 238out:
239 machine__exit(&kallsyms); 239 machine__exit(&kallsyms);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e5250eb2dd57..5995a8bd7c69 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -11,16 +11,21 @@
11#include "../../util/evsel.h" 11#include "../../util/evsel.h"
12#include <pthread.h> 12#include <pthread.h>
13 13
14struct disasm_line_samples {
15 double percent;
16 u64 nr;
17};
18
14struct browser_disasm_line { 19struct browser_disasm_line {
15 struct rb_node rb_node; 20 struct rb_node rb_node;
16 u32 idx; 21 u32 idx;
17 int idx_asm; 22 int idx_asm;
18 int jump_sources; 23 int jump_sources;
19 /* 24 /*
20 * actual length of this array is saved on the nr_events field 25 * actual length of this array is saved on the nr_events field
21 * of the struct annotate_browser 26 * of the struct annotate_browser
22 */ 27 */
23 double percent[1]; 28 struct disasm_line_samples samples[1];
24}; 29};
25 30
26static struct annotate_browser_opt { 31static struct annotate_browser_opt {
@@ -28,7 +33,8 @@ static struct annotate_browser_opt {
28 use_offset, 33 use_offset,
29 jump_arrows, 34 jump_arrows,
30 show_linenr, 35 show_linenr,
31 show_nr_jumps; 36 show_nr_jumps,
37 show_total_period;
32} annotate_browser__opts = { 38} annotate_browser__opts = {
33 .use_offset = true, 39 .use_offset = true,
34 .jump_arrows = true, 40 .jump_arrows = true,
@@ -105,15 +111,20 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
105 char bf[256]; 111 char bf[256];
106 112
107 for (i = 0; i < ab->nr_events; i++) { 113 for (i = 0; i < ab->nr_events; i++) {
108 if (bdl->percent[i] > percent_max) 114 if (bdl->samples[i].percent > percent_max)
109 percent_max = bdl->percent[i]; 115 percent_max = bdl->samples[i].percent;
110 } 116 }
111 117
112 if (dl->offset != -1 && percent_max != 0.0) { 118 if (dl->offset != -1 && percent_max != 0.0) {
113 for (i = 0; i < ab->nr_events; i++) { 119 for (i = 0; i < ab->nr_events; i++) {
114 ui_browser__set_percent_color(browser, bdl->percent[i], 120 ui_browser__set_percent_color(browser,
121 bdl->samples[i].percent,
115 current_entry); 122 current_entry);
116 slsmg_printf("%6.2f ", bdl->percent[i]); 123 if (annotate_browser__opts.show_total_period)
124 slsmg_printf("%6" PRIu64 " ",
125 bdl->samples[i].nr);
126 else
127 slsmg_printf("%6.2f ", bdl->samples[i].percent);
117 } 128 }
118 } else { 129 } else {
119 ui_browser__set_percent_color(browser, 0, current_entry); 130 ui_browser__set_percent_color(browser, 0, current_entry);
@@ -273,9 +284,9 @@ static int disasm__cmp(struct browser_disasm_line *a,
273 int i; 284 int i;
274 285
275 for (i = 0; i < nr_pcnt; i++) { 286 for (i = 0; i < nr_pcnt; i++) {
276 if (a->percent[i] == b->percent[i]) 287 if (a->samples[i].percent == b->samples[i].percent)
277 continue; 288 continue;
278 return a->percent[i] < b->percent[i]; 289 return a->samples[i].percent < b->samples[i].percent;
279 } 290 }
280 return 0; 291 return 0;
281} 292}
@@ -366,14 +377,17 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
366 next = disasm__get_next_ip_line(&notes->src->source, pos); 377 next = disasm__get_next_ip_line(&notes->src->source, pos);
367 378
368 for (i = 0; i < browser->nr_events; i++) { 379 for (i = 0; i < browser->nr_events; i++) {
369 bpos->percent[i] = disasm__calc_percent(notes, 380 u64 nr_samples;
381
382 bpos->samples[i].percent = disasm__calc_percent(notes,
370 evsel->idx + i, 383 evsel->idx + i,
371 pos->offset, 384 pos->offset,
372 next ? next->offset : len, 385 next ? next->offset : len,
373 &path); 386 &path, &nr_samples);
387 bpos->samples[i].nr = nr_samples;
374 388
375 if (max_percent < bpos->percent[i]) 389 if (max_percent < bpos->samples[i].percent)
376 max_percent = bpos->percent[i]; 390 max_percent = bpos->samples[i].percent;
377 } 391 }
378 392
379 if (max_percent < 0.01) { 393 if (max_percent < 0.01) {
@@ -737,6 +751,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
737 "n Search next string\n" 751 "n Search next string\n"
738 "o Toggle disassembler output/simplified view\n" 752 "o Toggle disassembler output/simplified view\n"
739 "s Toggle source code view\n" 753 "s Toggle source code view\n"
754 "t Toggle total period view\n"
740 "/ Search string\n" 755 "/ Search string\n"
741 "k Toggle line numbers\n" 756 "k Toggle line numbers\n"
742 "r Run available scripts\n" 757 "r Run available scripts\n"
@@ -812,6 +827,11 @@ show_sup_ins:
812 ui_helpline__puts("Actions are only available for 'callq', 'retq' & jump instructions."); 827 ui_helpline__puts("Actions are only available for 'callq', 'retq' & jump instructions.");
813 } 828 }
814 continue; 829 continue;
830 case 't':
831 annotate_browser__opts.show_total_period =
832 !annotate_browser__opts.show_total_period;
833 annotate_browser__update_addr_width(browser);
834 continue;
815 case K_LEFT: 835 case K_LEFT:
816 case K_ESC: 836 case K_ESC:
817 case 'q': 837 case 'q':
@@ -832,12 +852,20 @@ out:
832int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, 852int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
833 struct hist_browser_timer *hbt) 853 struct hist_browser_timer *hbt)
834{ 854{
855 /* Set default value for show_total_period. */
856 annotate_browser__opts.show_total_period =
857 symbol_conf.show_total_period;
858
835 return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt); 859 return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt);
836} 860}
837 861
838int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, 862int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
839 struct hist_browser_timer *hbt) 863 struct hist_browser_timer *hbt)
840{ 864{
865 /* reset abort key so that it can get Ctrl-C as a key */
866 SLang_reset_tty();
867 SLang_init_tty(0, 0, 0);
868
841 return map_symbol__tui_annotate(&he->ms, evsel, hbt); 869 return map_symbol__tui_annotate(&he->ms, evsel, hbt);
842} 870}
843 871
@@ -925,7 +953,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
925 953
926 if (perf_evsel__is_group_event(evsel)) { 954 if (perf_evsel__is_group_event(evsel)) {
927 nr_pcnt = evsel->nr_members; 955 nr_pcnt = evsel->nr_members;
928 sizeof_bdl += sizeof(double) * (nr_pcnt - 1); 956 sizeof_bdl += sizeof(struct disasm_line_samples) *
957 (nr_pcnt - 1);
929 } 958 }
930 959
931 if (symbol__annotate(sym, map, sizeof_bdl) < 0) { 960 if (symbol__annotate(sym, map, sizeof_bdl) < 0) {
@@ -1002,6 +1031,7 @@ static struct annotate_config {
1002 ANNOTATE_CFG(show_linenr), 1031 ANNOTATE_CFG(show_linenr),
1003 ANNOTATE_CFG(show_nr_jumps), 1032 ANNOTATE_CFG(show_nr_jumps),
1004 ANNOTATE_CFG(use_offset), 1033 ANNOTATE_CFG(use_offset),
1034 ANNOTATE_CFG(show_total_period),
1005}; 1035};
1006 1036
1007#undef ANNOTATE_CFG 1037#undef ANNOTATE_CFG
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 995b7a8596b1..c42adb600091 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -25,6 +25,9 @@ struct hist_browser {
25 struct hists *hists; 25 struct hists *hists;
26 struct hist_entry *he_selection; 26 struct hist_entry *he_selection;
27 struct map_symbol *selection; 27 struct map_symbol *selection;
28 struct hist_browser_timer *hbt;
29 struct pstack *pstack;
30 struct perf_session_env *env;
28 int print_seq; 31 int print_seq;
29 bool show_dso; 32 bool show_dso;
30 bool show_headers; 33 bool show_headers;
@@ -60,7 +63,7 @@ static int hist_browser__get_folding(struct hist_browser *browser)
60 struct hist_entry *he = 63 struct hist_entry *he =
61 rb_entry(nd, struct hist_entry, rb_node); 64 rb_entry(nd, struct hist_entry, rb_node);
62 65
63 if (he->ms.unfolded) 66 if (he->unfolded)
64 unfolded_rows += he->nr_rows; 67 unfolded_rows += he->nr_rows;
65 } 68 }
66 return unfolded_rows; 69 return unfolded_rows;
@@ -136,24 +139,19 @@ static char tree__folded_sign(bool unfolded)
136 return unfolded ? '-' : '+'; 139 return unfolded ? '-' : '+';
137} 140}
138 141
139static char map_symbol__folded(const struct map_symbol *ms)
140{
141 return ms->has_children ? tree__folded_sign(ms->unfolded) : ' ';
142}
143
144static char hist_entry__folded(const struct hist_entry *he) 142static char hist_entry__folded(const struct hist_entry *he)
145{ 143{
146 return map_symbol__folded(&he->ms); 144 return he->has_children ? tree__folded_sign(he->unfolded) : ' ';
147} 145}
148 146
149static char callchain_list__folded(const struct callchain_list *cl) 147static char callchain_list__folded(const struct callchain_list *cl)
150{ 148{
151 return map_symbol__folded(&cl->ms); 149 return cl->has_children ? tree__folded_sign(cl->unfolded) : ' ';
152} 150}
153 151
154static void map_symbol__set_folding(struct map_symbol *ms, bool unfold) 152static void callchain_list__set_folding(struct callchain_list *cl, bool unfold)
155{ 153{
156 ms->unfolded = unfold ? ms->has_children : false; 154 cl->unfolded = unfold ? cl->has_children : false;
157} 155}
158 156
159static int callchain_node__count_rows_rb_tree(struct callchain_node *node) 157static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
@@ -189,7 +187,7 @@ static int callchain_node__count_rows(struct callchain_node *node)
189 187
190 list_for_each_entry(chain, &node->val, list) { 188 list_for_each_entry(chain, &node->val, list) {
191 ++n; 189 ++n;
192 unfolded = chain->ms.unfolded; 190 unfolded = chain->unfolded;
193 } 191 }
194 192
195 if (unfolded) 193 if (unfolded)
@@ -211,15 +209,27 @@ static int callchain__count_rows(struct rb_root *chain)
211 return n; 209 return n;
212} 210}
213 211
214static bool map_symbol__toggle_fold(struct map_symbol *ms) 212static bool hist_entry__toggle_fold(struct hist_entry *he)
215{ 213{
216 if (!ms) 214 if (!he)
217 return false; 215 return false;
218 216
219 if (!ms->has_children) 217 if (!he->has_children)
220 return false; 218 return false;
221 219
222 ms->unfolded = !ms->unfolded; 220 he->unfolded = !he->unfolded;
221 return true;
222}
223
224static bool callchain_list__toggle_fold(struct callchain_list *cl)
225{
226 if (!cl)
227 return false;
228
229 if (!cl->has_children)
230 return false;
231
232 cl->unfolded = !cl->unfolded;
223 return true; 233 return true;
224} 234}
225 235
@@ -235,10 +245,10 @@ static void callchain_node__init_have_children_rb_tree(struct callchain_node *no
235 list_for_each_entry(chain, &child->val, list) { 245 list_for_each_entry(chain, &child->val, list) {
236 if (first) { 246 if (first) {
237 first = false; 247 first = false;
238 chain->ms.has_children = chain->list.next != &child->val || 248 chain->has_children = chain->list.next != &child->val ||
239 !RB_EMPTY_ROOT(&child->rb_root); 249 !RB_EMPTY_ROOT(&child->rb_root);
240 } else 250 } else
241 chain->ms.has_children = chain->list.next == &child->val && 251 chain->has_children = chain->list.next == &child->val &&
242 !RB_EMPTY_ROOT(&child->rb_root); 252 !RB_EMPTY_ROOT(&child->rb_root);
243 } 253 }
244 254
@@ -252,11 +262,11 @@ static void callchain_node__init_have_children(struct callchain_node *node,
252 struct callchain_list *chain; 262 struct callchain_list *chain;
253 263
254 chain = list_entry(node->val.next, struct callchain_list, list); 264 chain = list_entry(node->val.next, struct callchain_list, list);
255 chain->ms.has_children = has_sibling; 265 chain->has_children = has_sibling;
256 266
257 if (!list_empty(&node->val)) { 267 if (!list_empty(&node->val)) {
258 chain = list_entry(node->val.prev, struct callchain_list, list); 268 chain = list_entry(node->val.prev, struct callchain_list, list);
259 chain->ms.has_children = !RB_EMPTY_ROOT(&node->rb_root); 269 chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
260 } 270 }
261 271
262 callchain_node__init_have_children_rb_tree(node); 272 callchain_node__init_have_children_rb_tree(node);
@@ -276,7 +286,7 @@ static void callchain__init_have_children(struct rb_root *root)
276static void hist_entry__init_have_children(struct hist_entry *he) 286static void hist_entry__init_have_children(struct hist_entry *he)
277{ 287{
278 if (!he->init_have_children) { 288 if (!he->init_have_children) {
279 he->ms.has_children = !RB_EMPTY_ROOT(&he->sorted_chain); 289 he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
280 callchain__init_have_children(&he->sorted_chain); 290 callchain__init_have_children(&he->sorted_chain);
281 he->init_have_children = true; 291 he->init_have_children = true;
282 } 292 }
@@ -284,14 +294,22 @@ static void hist_entry__init_have_children(struct hist_entry *he)
284 294
285static bool hist_browser__toggle_fold(struct hist_browser *browser) 295static bool hist_browser__toggle_fold(struct hist_browser *browser)
286{ 296{
287 if (map_symbol__toggle_fold(browser->selection)) { 297 struct hist_entry *he = browser->he_selection;
288 struct hist_entry *he = browser->he_selection; 298 struct map_symbol *ms = browser->selection;
299 struct callchain_list *cl = container_of(ms, struct callchain_list, ms);
300 bool has_children;
289 301
302 if (ms == &he->ms)
303 has_children = hist_entry__toggle_fold(he);
304 else
305 has_children = callchain_list__toggle_fold(cl);
306
307 if (has_children) {
290 hist_entry__init_have_children(he); 308 hist_entry__init_have_children(he);
291 browser->b.nr_entries -= he->nr_rows; 309 browser->b.nr_entries -= he->nr_rows;
292 browser->nr_callchain_rows -= he->nr_rows; 310 browser->nr_callchain_rows -= he->nr_rows;
293 311
294 if (he->ms.unfolded) 312 if (he->unfolded)
295 he->nr_rows = callchain__count_rows(&he->sorted_chain); 313 he->nr_rows = callchain__count_rows(&he->sorted_chain);
296 else 314 else
297 he->nr_rows = 0; 315 he->nr_rows = 0;
@@ -318,8 +336,8 @@ static int callchain_node__set_folding_rb_tree(struct callchain_node *node, bool
318 336
319 list_for_each_entry(chain, &child->val, list) { 337 list_for_each_entry(chain, &child->val, list) {
320 ++n; 338 ++n;
321 map_symbol__set_folding(&chain->ms, unfold); 339 callchain_list__set_folding(chain, unfold);
322 has_children = chain->ms.has_children; 340 has_children = chain->has_children;
323 } 341 }
324 342
325 if (has_children) 343 if (has_children)
@@ -337,8 +355,8 @@ static int callchain_node__set_folding(struct callchain_node *node, bool unfold)
337 355
338 list_for_each_entry(chain, &node->val, list) { 356 list_for_each_entry(chain, &node->val, list) {
339 ++n; 357 ++n;
340 map_symbol__set_folding(&chain->ms, unfold); 358 callchain_list__set_folding(chain, unfold);
341 has_children = chain->ms.has_children; 359 has_children = chain->has_children;
342 } 360 }
343 361
344 if (has_children) 362 if (has_children)
@@ -363,9 +381,9 @@ static int callchain__set_folding(struct rb_root *chain, bool unfold)
363static void hist_entry__set_folding(struct hist_entry *he, bool unfold) 381static void hist_entry__set_folding(struct hist_entry *he, bool unfold)
364{ 382{
365 hist_entry__init_have_children(he); 383 hist_entry__init_have_children(he);
366 map_symbol__set_folding(&he->ms, unfold); 384 he->unfolded = unfold ? he->has_children : false;
367 385
368 if (he->ms.has_children) { 386 if (he->has_children) {
369 int n = callchain__set_folding(&he->sorted_chain, unfold); 387 int n = callchain__set_folding(&he->sorted_chain, unfold);
370 he->nr_rows = unfold ? n : 0; 388 he->nr_rows = unfold ? n : 0;
371 } else 389 } else
@@ -406,11 +424,11 @@ static void ui_browser__warn_lost_events(struct ui_browser *browser)
406 "Or reduce the sampling frequency."); 424 "Or reduce the sampling frequency.");
407} 425}
408 426
409static int hist_browser__run(struct hist_browser *browser, 427static int hist_browser__run(struct hist_browser *browser, const char *help)
410 struct hist_browser_timer *hbt)
411{ 428{
412 int key; 429 int key;
413 char title[160]; 430 char title[160];
431 struct hist_browser_timer *hbt = browser->hbt;
414 int delay_secs = hbt ? hbt->refresh : 0; 432 int delay_secs = hbt ? hbt->refresh : 0;
415 433
416 browser->b.entries = &browser->hists->entries; 434 browser->b.entries = &browser->hists->entries;
@@ -418,8 +436,7 @@ static int hist_browser__run(struct hist_browser *browser,
418 436
419 hists__browser_title(browser->hists, hbt, title, sizeof(title)); 437 hists__browser_title(browser->hists, hbt, title, sizeof(title));
420 438
421 if (ui_browser__show(&browser->b, title, 439 if (ui_browser__show(&browser->b, title, help) < 0)
422 "Press '?' for help on key bindings") < 0)
423 return -1; 440 return -1;
424 441
425 while (1) { 442 while (1) {
@@ -1016,7 +1033,7 @@ do_offset:
1016 if (offset > 0) { 1033 if (offset > 0) {
1017 do { 1034 do {
1018 h = rb_entry(nd, struct hist_entry, rb_node); 1035 h = rb_entry(nd, struct hist_entry, rb_node);
1019 if (h->ms.unfolded) { 1036 if (h->unfolded) {
1020 u16 remaining = h->nr_rows - h->row_offset; 1037 u16 remaining = h->nr_rows - h->row_offset;
1021 if (offset > remaining) { 1038 if (offset > remaining) {
1022 offset -= remaining; 1039 offset -= remaining;
@@ -1037,7 +1054,7 @@ do_offset:
1037 } else if (offset < 0) { 1054 } else if (offset < 0) {
1038 while (1) { 1055 while (1) {
1039 h = rb_entry(nd, struct hist_entry, rb_node); 1056 h = rb_entry(nd, struct hist_entry, rb_node);
1040 if (h->ms.unfolded) { 1057 if (h->unfolded) {
1041 if (first) { 1058 if (first) {
1042 if (-offset > h->row_offset) { 1059 if (-offset > h->row_offset) {
1043 offset += h->row_offset; 1060 offset += h->row_offset;
@@ -1074,7 +1091,7 @@ do_offset:
1074 * row_offset at its last entry. 1091 * row_offset at its last entry.
1075 */ 1092 */
1076 h = rb_entry(nd, struct hist_entry, rb_node); 1093 h = rb_entry(nd, struct hist_entry, rb_node);
1077 if (h->ms.unfolded) 1094 if (h->unfolded)
1078 h->row_offset = h->nr_rows; 1095 h->row_offset = h->nr_rows;
1079 break; 1096 break;
1080 } 1097 }
@@ -1195,7 +1212,9 @@ static int hist_browser__dump(struct hist_browser *browser)
1195 return 0; 1212 return 0;
1196} 1213}
1197 1214
1198static struct hist_browser *hist_browser__new(struct hists *hists) 1215static struct hist_browser *hist_browser__new(struct hists *hists,
1216 struct hist_browser_timer *hbt,
1217 struct perf_session_env *env)
1199{ 1218{
1200 struct hist_browser *browser = zalloc(sizeof(*browser)); 1219 struct hist_browser *browser = zalloc(sizeof(*browser));
1201 1220
@@ -1206,6 +1225,8 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
1206 browser->b.seek = ui_browser__hists_seek; 1225 browser->b.seek = ui_browser__hists_seek;
1207 browser->b.use_navkeypressed = true; 1226 browser->b.use_navkeypressed = true;
1208 browser->show_headers = symbol_conf.show_hist_headers; 1227 browser->show_headers = symbol_conf.show_hist_headers;
1228 browser->hbt = hbt;
1229 browser->env = env;
1209 } 1230 }
1210 1231
1211 return browser; 1232 return browser;
@@ -1395,6 +1416,257 @@ close_file_and_continue:
1395 return ret; 1416 return ret;
1396} 1417}
1397 1418
1419struct popup_action {
1420 struct thread *thread;
1421 struct dso *dso;
1422 struct map_symbol ms;
1423
1424 int (*fn)(struct hist_browser *browser, struct popup_action *act);
1425};
1426
1427static int
1428do_annotate(struct hist_browser *browser, struct popup_action *act)
1429{
1430 struct perf_evsel *evsel;
1431 struct annotation *notes;
1432 struct hist_entry *he;
1433 int err;
1434
1435 if (!objdump_path && perf_session_env__lookup_objdump(browser->env))
1436 return 0;
1437
1438 notes = symbol__annotation(act->ms.sym);
1439 if (!notes->src)
1440 return 0;
1441
1442 evsel = hists_to_evsel(browser->hists);
1443 err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt);
1444 he = hist_browser__selected_entry(browser);
1445 /*
1446 * offer option to annotate the other branch source or target
1447 * (if they exists) when returning from annotate
1448 */
1449 if ((err == 'q' || err == CTRL('c')) && he->branch_info)
1450 return 1;
1451
1452 ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
1453 if (err)
1454 ui_browser__handle_resize(&browser->b);
1455 return 0;
1456}
1457
1458static int
1459add_annotate_opt(struct hist_browser *browser __maybe_unused,
1460 struct popup_action *act, char **optstr,
1461 struct map *map, struct symbol *sym)
1462{
1463 if (sym == NULL || map->dso->annotate_warned)
1464 return 0;
1465
1466 if (asprintf(optstr, "Annotate %s", sym->name) < 0)
1467 return 0;
1468
1469 act->ms.map = map;
1470 act->ms.sym = sym;
1471 act->fn = do_annotate;
1472 return 1;
1473}
1474
1475static int
1476do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
1477{
1478 struct thread *thread = act->thread;
1479
1480 if (browser->hists->thread_filter) {
1481 pstack__remove(browser->pstack, &browser->hists->thread_filter);
1482 perf_hpp__set_elide(HISTC_THREAD, false);
1483 thread__zput(browser->hists->thread_filter);
1484 ui_helpline__pop();
1485 } else {
1486 ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
1487 thread->comm_set ? thread__comm_str(thread) : "",
1488 thread->tid);
1489 browser->hists->thread_filter = thread__get(thread);
1490 perf_hpp__set_elide(HISTC_THREAD, false);
1491 pstack__push(browser->pstack, &browser->hists->thread_filter);
1492 }
1493
1494 hists__filter_by_thread(browser->hists);
1495 hist_browser__reset(browser);
1496 return 0;
1497}
1498
1499static int
1500add_thread_opt(struct hist_browser *browser, struct popup_action *act,
1501 char **optstr, struct thread *thread)
1502{
1503 if (thread == NULL)
1504 return 0;
1505
1506 if (asprintf(optstr, "Zoom %s %s(%d) thread",
1507 browser->hists->thread_filter ? "out of" : "into",
1508 thread->comm_set ? thread__comm_str(thread) : "",
1509 thread->tid) < 0)
1510 return 0;
1511
1512 act->thread = thread;
1513 act->fn = do_zoom_thread;
1514 return 1;
1515}
1516
1517static int
1518do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
1519{
1520 struct dso *dso = act->dso;
1521
1522 if (browser->hists->dso_filter) {
1523 pstack__remove(browser->pstack, &browser->hists->dso_filter);
1524 perf_hpp__set_elide(HISTC_DSO, false);
1525 browser->hists->dso_filter = NULL;
1526 ui_helpline__pop();
1527 } else {
1528 if (dso == NULL)
1529 return 0;
1530 ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
1531 dso->kernel ? "the Kernel" : dso->short_name);
1532 browser->hists->dso_filter = dso;
1533 perf_hpp__set_elide(HISTC_DSO, true);
1534 pstack__push(browser->pstack, &browser->hists->dso_filter);
1535 }
1536
1537 hists__filter_by_dso(browser->hists);
1538 hist_browser__reset(browser);
1539 return 0;
1540}
1541
1542static int
1543add_dso_opt(struct hist_browser *browser, struct popup_action *act,
1544 char **optstr, struct dso *dso)
1545{
1546 if (dso == NULL)
1547 return 0;
1548
1549 if (asprintf(optstr, "Zoom %s %s DSO",
1550 browser->hists->dso_filter ? "out of" : "into",
1551 dso->kernel ? "the Kernel" : dso->short_name) < 0)
1552 return 0;
1553
1554 act->dso = dso;
1555 act->fn = do_zoom_dso;
1556 return 1;
1557}
1558
1559static int
1560do_browse_map(struct hist_browser *browser __maybe_unused,
1561 struct popup_action *act)
1562{
1563 map__browse(act->ms.map);
1564 return 0;
1565}
1566
1567static int
1568add_map_opt(struct hist_browser *browser __maybe_unused,
1569 struct popup_action *act, char **optstr, struct map *map)
1570{
1571 if (map == NULL)
1572 return 0;
1573
1574 if (asprintf(optstr, "Browse map details") < 0)
1575 return 0;
1576
1577 act->ms.map = map;
1578 act->fn = do_browse_map;
1579 return 1;
1580}
1581
1582static int
1583do_run_script(struct hist_browser *browser __maybe_unused,
1584 struct popup_action *act)
1585{
1586 char script_opt[64];
1587 memset(script_opt, 0, sizeof(script_opt));
1588
1589 if (act->thread) {
1590 scnprintf(script_opt, sizeof(script_opt), " -c %s ",
1591 thread__comm_str(act->thread));
1592 } else if (act->ms.sym) {
1593 scnprintf(script_opt, sizeof(script_opt), " -S %s ",
1594 act->ms.sym->name);
1595 }
1596
1597 script_browse(script_opt);
1598 return 0;
1599}
1600
1601static int
1602add_script_opt(struct hist_browser *browser __maybe_unused,
1603 struct popup_action *act, char **optstr,
1604 struct thread *thread, struct symbol *sym)
1605{
1606 if (thread) {
1607 if (asprintf(optstr, "Run scripts for samples of thread [%s]",
1608 thread__comm_str(thread)) < 0)
1609 return 0;
1610 } else if (sym) {
1611 if (asprintf(optstr, "Run scripts for samples of symbol [%s]",
1612 sym->name) < 0)
1613 return 0;
1614 } else {
1615 if (asprintf(optstr, "Run scripts for all samples") < 0)
1616 return 0;
1617 }
1618
1619 act->thread = thread;
1620 act->ms.sym = sym;
1621 act->fn = do_run_script;
1622 return 1;
1623}
1624
1625static int
1626do_switch_data(struct hist_browser *browser __maybe_unused,
1627 struct popup_action *act __maybe_unused)
1628{
1629 if (switch_data_file()) {
1630 ui__warning("Won't switch the data files due to\n"
1631 "no valid data file get selected!\n");
1632 return 0;
1633 }
1634
1635 return K_SWITCH_INPUT_DATA;
1636}
1637
1638static int
1639add_switch_opt(struct hist_browser *browser,
1640 struct popup_action *act, char **optstr)
1641{
1642 if (!is_report_browser(browser->hbt))
1643 return 0;
1644
1645 if (asprintf(optstr, "Switch to another data file in PWD") < 0)
1646 return 0;
1647
1648 act->fn = do_switch_data;
1649 return 1;
1650}
1651
1652static int
1653do_exit_browser(struct hist_browser *browser __maybe_unused,
1654 struct popup_action *act __maybe_unused)
1655{
1656 return 0;
1657}
1658
1659static int
1660add_exit_opt(struct hist_browser *browser __maybe_unused,
1661 struct popup_action *act, char **optstr)
1662{
1663 if (asprintf(optstr, "Exit") < 0)
1664 return 0;
1665
1666 act->fn = do_exit_browser;
1667 return 1;
1668}
1669
1398static void hist_browser__update_nr_entries(struct hist_browser *hb) 1670static void hist_browser__update_nr_entries(struct hist_browser *hb)
1399{ 1671{
1400 u64 nr_entries = 0; 1672 u64 nr_entries = 0;
@@ -1421,14 +1693,14 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1421 struct perf_session_env *env) 1693 struct perf_session_env *env)
1422{ 1694{
1423 struct hists *hists = evsel__hists(evsel); 1695 struct hists *hists = evsel__hists(evsel);
1424 struct hist_browser *browser = hist_browser__new(hists); 1696 struct hist_browser *browser = hist_browser__new(hists, hbt, env);
1425 struct branch_info *bi; 1697 struct branch_info *bi;
1426 struct pstack *fstack; 1698#define MAX_OPTIONS 16
1427 char *options[16]; 1699 char *options[MAX_OPTIONS];
1700 struct popup_action actions[MAX_OPTIONS];
1428 int nr_options = 0; 1701 int nr_options = 0;
1429 int key = -1; 1702 int key = -1;
1430 char buf[64]; 1703 char buf[64];
1431 char script_opt[64];
1432 int delay_secs = hbt ? hbt->refresh : 0; 1704 int delay_secs = hbt ? hbt->refresh : 0;
1433 struct perf_hpp_fmt *fmt; 1705 struct perf_hpp_fmt *fmt;
1434 1706
@@ -1463,23 +1735,29 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1463 "t Zoom into current Thread\n" 1735 "t Zoom into current Thread\n"
1464 "V Verbose (DSO names in callchains, etc)\n" 1736 "V Verbose (DSO names in callchains, etc)\n"
1465 "z Toggle zeroing of samples\n" 1737 "z Toggle zeroing of samples\n"
1738 "f Enable/Disable events\n"
1466 "/ Filter symbol by name"; 1739 "/ Filter symbol by name";
1467 1740
1468 if (browser == NULL) 1741 if (browser == NULL)
1469 return -1; 1742 return -1;
1470 1743
1744 /* reset abort key so that it can get Ctrl-C as a key */
1745 SLang_reset_tty();
1746 SLang_init_tty(0, 0, 0);
1747
1471 if (min_pcnt) { 1748 if (min_pcnt) {
1472 browser->min_pcnt = min_pcnt; 1749 browser->min_pcnt = min_pcnt;
1473 hist_browser__update_nr_entries(browser); 1750 hist_browser__update_nr_entries(browser);
1474 } 1751 }
1475 1752
1476 fstack = pstack__new(2); 1753 browser->pstack = pstack__new(2);
1477 if (fstack == NULL) 1754 if (browser->pstack == NULL)
1478 goto out; 1755 goto out;
1479 1756
1480 ui_helpline__push(helpline); 1757 ui_helpline__push(helpline);
1481 1758
1482 memset(options, 0, sizeof(options)); 1759 memset(options, 0, sizeof(options));
1760 memset(actions, 0, sizeof(actions));
1483 1761
1484 perf_hpp__for_each_format(fmt) 1762 perf_hpp__for_each_format(fmt)
1485 perf_hpp__reset_width(fmt, hists); 1763 perf_hpp__reset_width(fmt, hists);
@@ -1489,16 +1767,12 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1489 1767
1490 while (1) { 1768 while (1) {
1491 struct thread *thread = NULL; 1769 struct thread *thread = NULL;
1492 const struct dso *dso = NULL; 1770 struct dso *dso = NULL;
1493 int choice = 0, 1771 int choice = 0;
1494 annotate = -2, zoom_dso = -2, zoom_thread = -2,
1495 annotate_f = -2, annotate_t = -2, browse_map = -2;
1496 int scripts_comm = -2, scripts_symbol = -2,
1497 scripts_all = -2, switch_data = -2;
1498 1772
1499 nr_options = 0; 1773 nr_options = 0;
1500 1774
1501 key = hist_browser__run(browser, hbt); 1775 key = hist_browser__run(browser, helpline);
1502 1776
1503 if (browser->he_selection != NULL) { 1777 if (browser->he_selection != NULL) {
1504 thread = hist_browser__selected_thread(browser); 1778 thread = hist_browser__selected_thread(browser);
@@ -1526,17 +1800,25 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1526 browser->selection->sym == NULL || 1800 browser->selection->sym == NULL ||
1527 browser->selection->map->dso->annotate_warned) 1801 browser->selection->map->dso->annotate_warned)
1528 continue; 1802 continue;
1529 goto do_annotate; 1803
1804 actions->ms.map = browser->selection->map;
1805 actions->ms.sym = browser->selection->sym;
1806 do_annotate(browser, actions);
1807 continue;
1530 case 'P': 1808 case 'P':
1531 hist_browser__dump(browser); 1809 hist_browser__dump(browser);
1532 continue; 1810 continue;
1533 case 'd': 1811 case 'd':
1534 goto zoom_dso; 1812 actions->dso = dso;
1813 do_zoom_dso(browser, actions);
1814 continue;
1535 case 'V': 1815 case 'V':
1536 browser->show_dso = !browser->show_dso; 1816 browser->show_dso = !browser->show_dso;
1537 continue; 1817 continue;
1538 case 't': 1818 case 't':
1539 goto zoom_thread; 1819 actions->thread = thread;
1820 do_zoom_thread(browser, actions);
1821 continue;
1540 case '/': 1822 case '/':
1541 if (ui_browser__input_window("Symbol to show", 1823 if (ui_browser__input_window("Symbol to show",
1542 "Please enter the name of symbol you want to see", 1824 "Please enter the name of symbol you want to see",
@@ -1548,12 +1830,18 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1548 } 1830 }
1549 continue; 1831 continue;
1550 case 'r': 1832 case 'r':
1551 if (is_report_browser(hbt)) 1833 if (is_report_browser(hbt)) {
1552 goto do_scripts; 1834 actions->thread = NULL;
1835 actions->ms.sym = NULL;
1836 do_run_script(browser, actions);
1837 }
1553 continue; 1838 continue;
1554 case 's': 1839 case 's':
1555 if (is_report_browser(hbt)) 1840 if (is_report_browser(hbt)) {
1556 goto do_data_switch; 1841 key = do_switch_data(browser, actions);
1842 if (key == K_SWITCH_INPUT_DATA)
1843 goto out_free_stack;
1844 }
1557 continue; 1845 continue;
1558 case 'i': 1846 case 'i':
1559 /* env->arch is NULL for live-mode (i.e. perf top) */ 1847 /* env->arch is NULL for live-mode (i.e. perf top) */
@@ -1583,7 +1871,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1583 case K_LEFT: { 1871 case K_LEFT: {
1584 const void *top; 1872 const void *top;
1585 1873
1586 if (pstack__empty(fstack)) { 1874 if (pstack__empty(browser->pstack)) {
1587 /* 1875 /*
1588 * Go back to the perf_evsel_menu__run or other user 1876 * Go back to the perf_evsel_menu__run or other user
1589 */ 1877 */
@@ -1591,11 +1879,17 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1591 goto out_free_stack; 1879 goto out_free_stack;
1592 continue; 1880 continue;
1593 } 1881 }
1594 top = pstack__pop(fstack); 1882 top = pstack__peek(browser->pstack);
1595 if (top == &browser->hists->dso_filter) 1883 if (top == &browser->hists->dso_filter) {
1596 goto zoom_out_dso; 1884 /*
1885 * No need to set actions->dso here since
1886 * it's just to remove the current filter.
1887 * Ditto for thread below.
1888 */
1889 do_zoom_dso(browser, actions);
1890 }
1597 if (top == &browser->hists->thread_filter) 1891 if (top == &browser->hists->thread_filter)
1598 goto zoom_out_thread; 1892 do_zoom_thread(browser, actions);
1599 continue; 1893 continue;
1600 } 1894 }
1601 case K_ESC: 1895 case K_ESC:
@@ -1607,7 +1901,12 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1607 case 'q': 1901 case 'q':
1608 case CTRL('c'): 1902 case CTRL('c'):
1609 goto out_free_stack; 1903 goto out_free_stack;
1904 case 'f':
1905 if (!is_report_browser(hbt))
1906 goto out_free_stack;
1907 /* Fall thru */
1610 default: 1908 default:
1909 helpline = "Press '?' for help on key bindings";
1611 continue; 1910 continue;
1612 } 1911 }
1613 1912
@@ -1623,196 +1922,71 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1623 if (bi == NULL) 1922 if (bi == NULL)
1624 goto skip_annotation; 1923 goto skip_annotation;
1625 1924
1626 if (bi->from.sym != NULL && 1925 nr_options += add_annotate_opt(browser,
1627 !bi->from.map->dso->annotate_warned && 1926 &actions[nr_options],
1628 asprintf(&options[nr_options], "Annotate %s", bi->from.sym->name) > 0) { 1927 &options[nr_options],
1629 annotate_f = nr_options++; 1928 bi->from.map,
1630 } 1929 bi->from.sym);
1631 1930 if (bi->to.sym != bi->from.sym)
1632 if (bi->to.sym != NULL && 1931 nr_options += add_annotate_opt(browser,
1633 !bi->to.map->dso->annotate_warned && 1932 &actions[nr_options],
1634 (bi->to.sym != bi->from.sym || 1933 &options[nr_options],
1635 bi->to.map->dso != bi->from.map->dso) && 1934 bi->to.map,
1636 asprintf(&options[nr_options], "Annotate %s", bi->to.sym->name) > 0) { 1935 bi->to.sym);
1637 annotate_t = nr_options++;
1638 }
1639 } else { 1936 } else {
1640 if (browser->selection->sym != NULL && 1937 nr_options += add_annotate_opt(browser,
1641 !browser->selection->map->dso->annotate_warned) { 1938 &actions[nr_options],
1642 struct annotation *notes; 1939 &options[nr_options],
1643 1940 browser->selection->map,
1644 notes = symbol__annotation(browser->selection->sym); 1941 browser->selection->sym);
1645
1646 if (notes->src &&
1647 asprintf(&options[nr_options], "Annotate %s",
1648 browser->selection->sym->name) > 0) {
1649 annotate = nr_options++;
1650 }
1651 }
1652 } 1942 }
1653skip_annotation: 1943skip_annotation:
1654 if (thread != NULL && 1944 nr_options += add_thread_opt(browser, &actions[nr_options],
1655 asprintf(&options[nr_options], "Zoom %s %s(%d) thread", 1945 &options[nr_options], thread);
1656 (browser->hists->thread_filter ? "out of" : "into"), 1946 nr_options += add_dso_opt(browser, &actions[nr_options],
1657 (thread->comm_set ? thread__comm_str(thread) : ""), 1947 &options[nr_options], dso);
1658 thread->tid) > 0) 1948 nr_options += add_map_opt(browser, &actions[nr_options],
1659 zoom_thread = nr_options++; 1949 &options[nr_options],
1660 1950 browser->selection->map);
1661 if (dso != NULL &&
1662 asprintf(&options[nr_options], "Zoom %s %s DSO",
1663 (browser->hists->dso_filter ? "out of" : "into"),
1664 (dso->kernel ? "the Kernel" : dso->short_name)) > 0)
1665 zoom_dso = nr_options++;
1666
1667 if (browser->selection != NULL &&
1668 browser->selection->map != NULL &&
1669 asprintf(&options[nr_options], "Browse map details") > 0)
1670 browse_map = nr_options++;
1671 1951
1672 /* perf script support */ 1952 /* perf script support */
1673 if (browser->he_selection) { 1953 if (browser->he_selection) {
1674 struct symbol *sym; 1954 nr_options += add_script_opt(browser,
1675 1955 &actions[nr_options],
1676 if (asprintf(&options[nr_options], "Run scripts for samples of thread [%s]", 1956 &options[nr_options],
1677 thread__comm_str(browser->he_selection->thread)) > 0) 1957 thread, NULL);
1678 scripts_comm = nr_options++; 1958 nr_options += add_script_opt(browser,
1679 1959 &actions[nr_options],
1680 sym = browser->he_selection->ms.sym; 1960 &options[nr_options],
1681 if (sym && sym->namelen && 1961 NULL, browser->selection->sym);
1682 asprintf(&options[nr_options], "Run scripts for samples of symbol [%s]",
1683 sym->name) > 0)
1684 scripts_symbol = nr_options++;
1685 } 1962 }
1686 1963 nr_options += add_script_opt(browser, &actions[nr_options],
1687 if (asprintf(&options[nr_options], "Run scripts for all samples") > 0) 1964 &options[nr_options], NULL, NULL);
1688 scripts_all = nr_options++; 1965 nr_options += add_switch_opt(browser, &actions[nr_options],
1689 1966 &options[nr_options]);
1690 if (is_report_browser(hbt) && asprintf(&options[nr_options],
1691 "Switch to another data file in PWD") > 0)
1692 switch_data = nr_options++;
1693add_exit_option: 1967add_exit_option:
1694 options[nr_options++] = (char *)"Exit"; 1968 nr_options += add_exit_opt(browser, &actions[nr_options],
1695retry_popup_menu: 1969 &options[nr_options]);
1696 choice = ui__popup_menu(nr_options, options);
1697
1698 if (choice == nr_options - 1)
1699 break;
1700
1701 if (choice == -1) {
1702 free_popup_options(options, nr_options - 1);
1703 continue;
1704 }
1705
1706 if (choice == annotate || choice == annotate_t || choice == annotate_f) {
1707 struct hist_entry *he;
1708 struct annotation *notes;
1709 struct map_symbol ms;
1710 int err;
1711do_annotate:
1712 if (!objdump_path && perf_session_env__lookup_objdump(env))
1713 continue;
1714
1715 he = hist_browser__selected_entry(browser);
1716 if (he == NULL)
1717 continue;
1718
1719 if (choice == annotate_f) {
1720 ms.map = he->branch_info->from.map;
1721 ms.sym = he->branch_info->from.sym;
1722 } else if (choice == annotate_t) {
1723 ms.map = he->branch_info->to.map;
1724 ms.sym = he->branch_info->to.sym;
1725 } else {
1726 ms = *browser->selection;
1727 }
1728 1970
1729 notes = symbol__annotation(ms.sym); 1971 do {
1730 if (!notes->src) 1972 struct popup_action *act;
1731 continue;
1732
1733 err = map_symbol__tui_annotate(&ms, evsel, hbt);
1734 /*
1735 * offer option to annotate the other branch source or target
1736 * (if they exists) when returning from annotate
1737 */
1738 if ((err == 'q' || err == CTRL('c'))
1739 && annotate_t != -2 && annotate_f != -2)
1740 goto retry_popup_menu;
1741
1742 ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
1743 if (err)
1744 ui_browser__handle_resize(&browser->b);
1745
1746 } else if (choice == browse_map)
1747 map__browse(browser->selection->map);
1748 else if (choice == zoom_dso) {
1749zoom_dso:
1750 if (browser->hists->dso_filter) {
1751 pstack__remove(fstack, &browser->hists->dso_filter);
1752zoom_out_dso:
1753 ui_helpline__pop();
1754 browser->hists->dso_filter = NULL;
1755 perf_hpp__set_elide(HISTC_DSO, false);
1756 } else {
1757 if (dso == NULL)
1758 continue;
1759 ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
1760 dso->kernel ? "the Kernel" : dso->short_name);
1761 browser->hists->dso_filter = dso;
1762 perf_hpp__set_elide(HISTC_DSO, true);
1763 pstack__push(fstack, &browser->hists->dso_filter);
1764 }
1765 hists__filter_by_dso(hists);
1766 hist_browser__reset(browser);
1767 } else if (choice == zoom_thread) {
1768zoom_thread:
1769 if (browser->hists->thread_filter) {
1770 pstack__remove(fstack, &browser->hists->thread_filter);
1771zoom_out_thread:
1772 ui_helpline__pop();
1773 thread__zput(browser->hists->thread_filter);
1774 perf_hpp__set_elide(HISTC_THREAD, false);
1775 } else {
1776 ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
1777 thread->comm_set ? thread__comm_str(thread) : "",
1778 thread->tid);
1779 browser->hists->thread_filter = thread__get(thread);
1780 perf_hpp__set_elide(HISTC_THREAD, false);
1781 pstack__push(fstack, &browser->hists->thread_filter);
1782 }
1783 hists__filter_by_thread(hists);
1784 hist_browser__reset(browser);
1785 }
1786 /* perf scripts support */
1787 else if (choice == scripts_all || choice == scripts_comm ||
1788 choice == scripts_symbol) {
1789do_scripts:
1790 memset(script_opt, 0, 64);
1791 1973
1792 if (choice == scripts_comm) 1974 choice = ui__popup_menu(nr_options, options);
1793 sprintf(script_opt, " -c %s ", thread__comm_str(browser->he_selection->thread)); 1975 if (choice == -1 || choice >= nr_options)
1976 break;
1794 1977
1795 if (choice == scripts_symbol) 1978 act = &actions[choice];
1796 sprintf(script_opt, " -S %s ", browser->he_selection->ms.sym->name); 1979 key = act->fn(browser, act);
1980 } while (key == 1);
1797 1981
1798 script_browse(script_opt); 1982 if (key == K_SWITCH_INPUT_DATA)
1799 } 1983 break;
1800 /* Switch to another data file */
1801 else if (choice == switch_data) {
1802do_data_switch:
1803 if (!switch_data_file()) {
1804 key = K_SWITCH_INPUT_DATA;
1805 break;
1806 } else
1807 ui__warning("Won't switch the data files due to\n"
1808 "no valid data file get selected!\n");
1809 }
1810 } 1984 }
1811out_free_stack: 1985out_free_stack:
1812 pstack__delete(fstack); 1986 pstack__delete(browser->pstack);
1813out: 1987out:
1814 hist_browser__delete(browser); 1988 hist_browser__delete(browser);
1815 free_popup_options(options, nr_options - 1); 1989 free_popup_options(options, MAX_OPTIONS);
1816 return key; 1990 return key;
1817} 1991}
1818 1992
diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c
index b77e1d771363..60d1f29b4b50 100644
--- a/tools/perf/ui/tui/setup.c
+++ b/tools/perf/ui/tui/setup.c
@@ -129,7 +129,7 @@ int ui__init(void)
129 err = SLsmg_init_smg(); 129 err = SLsmg_init_smg();
130 if (err < 0) 130 if (err < 0)
131 goto out; 131 goto out;
132 err = SLang_init_tty(0, 0, 0); 132 err = SLang_init_tty(-1, 0, 0);
133 if (err < 0) 133 if (err < 0)
134 goto out; 134 goto out;
135 135
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 797490a40075..586a59d46022 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -68,12 +68,15 @@ libperf-y += rblist.o
68libperf-y += intlist.o 68libperf-y += intlist.o
69libperf-y += vdso.o 69libperf-y += vdso.o
70libperf-y += stat.o 70libperf-y += stat.o
71libperf-y += stat-shadow.o
71libperf-y += record.o 72libperf-y += record.o
72libperf-y += srcline.o 73libperf-y += srcline.o
73libperf-y += data.o 74libperf-y += data.o
74libperf-$(CONFIG_X86) += tsc.o 75libperf-$(CONFIG_X86) += tsc.o
75libperf-y += cloexec.o 76libperf-y += cloexec.o
76libperf-y += thread-stack.o 77libperf-y += thread-stack.o
78libperf-$(CONFIG_AUXTRACE) += auxtrace.o
79libperf-y += parse-branch-options.o
77 80
78libperf-$(CONFIG_LIBELF) += symbol-elf.o 81libperf-$(CONFIG_LIBELF) += symbol-elf.o
79libperf-$(CONFIG_LIBELF) += probe-event.o 82libperf-$(CONFIG_LIBELF) += probe-event.o
@@ -101,23 +104,23 @@ CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="B
101 104
102$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c 105$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
103 $(call rule_mkdir) 106 $(call rule_mkdir)
104 @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l 107 $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
105 108
106$(OUTPUT)util/parse-events-bison.c: util/parse-events.y 109$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
107 $(call rule_mkdir) 110 $(call rule_mkdir)
108 @$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ 111 $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
109 112
110$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c 113$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
111 $(call rule_mkdir) 114 $(call rule_mkdir)
112 @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l 115 $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
113 116
114$(OUTPUT)util/pmu-bison.c: util/pmu.y 117$(OUTPUT)util/pmu-bison.c: util/pmu.y
115 $(call rule_mkdir) 118 $(call rule_mkdir)
116 @$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_ 119 $(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
117 120
118CFLAGS_parse-events-flex.o += -w 121CFLAGS_parse-events-flex.o += -w
119CFLAGS_pmu-flex.o += -w 122CFLAGS_pmu-flex.o += -w
120CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w 123CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
121CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w 124CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
122 125
123$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c 126$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7f5bdfc9bc87..03b7bc70eb66 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -506,6 +506,17 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
506 return 0; 506 return 0;
507} 507}
508 508
509static struct annotation *symbol__get_annotation(struct symbol *sym)
510{
511 struct annotation *notes = symbol__annotation(sym);
512
513 if (notes->src == NULL) {
514 if (symbol__alloc_hist(sym) < 0)
515 return NULL;
516 }
517 return notes;
518}
519
509static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, 520static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
510 int evidx, u64 addr) 521 int evidx, u64 addr)
511{ 522{
@@ -513,13 +524,9 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
513 524
514 if (sym == NULL) 525 if (sym == NULL)
515 return 0; 526 return 0;
516 527 notes = symbol__get_annotation(sym);
517 notes = symbol__annotation(sym); 528 if (notes == NULL)
518 if (notes->src == NULL) { 529 return -ENOMEM;
519 if (symbol__alloc_hist(sym) < 0)
520 return -ENOMEM;
521 }
522
523 return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); 530 return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
524} 531}
525 532
@@ -647,14 +654,15 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa
647} 654}
648 655
649double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, 656double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
650 s64 end, const char **path) 657 s64 end, const char **path, u64 *nr_samples)
651{ 658{
652 struct source_line *src_line = notes->src->lines; 659 struct source_line *src_line = notes->src->lines;
653 double percent = 0.0; 660 double percent = 0.0;
661 *nr_samples = 0;
654 662
655 if (src_line) { 663 if (src_line) {
656 size_t sizeof_src_line = sizeof(*src_line) + 664 size_t sizeof_src_line = sizeof(*src_line) +
657 sizeof(src_line->p) * (src_line->nr_pcnt - 1); 665 sizeof(src_line->samples) * (src_line->nr_pcnt - 1);
658 666
659 while (offset < end) { 667 while (offset < end) {
660 src_line = (void *)notes->src->lines + 668 src_line = (void *)notes->src->lines +
@@ -663,7 +671,8 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
663 if (*path == NULL) 671 if (*path == NULL)
664 *path = src_line->path; 672 *path = src_line->path;
665 673
666 percent += src_line->p[evidx].percent; 674 percent += src_line->samples[evidx].percent;
675 *nr_samples += src_line->samples[evidx].nr;
667 offset++; 676 offset++;
668 } 677 }
669 } else { 678 } else {
@@ -673,8 +682,10 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
673 while (offset < end) 682 while (offset < end)
674 hits += h->addr[offset++]; 683 hits += h->addr[offset++];
675 684
676 if (h->sum) 685 if (h->sum) {
686 *nr_samples = hits;
677 percent = 100.0 * hits / h->sum; 687 percent = 100.0 * hits / h->sum;
688 }
678 } 689 }
679 690
680 return percent; 691 return percent;
@@ -689,8 +700,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
689 700
690 if (dl->offset != -1) { 701 if (dl->offset != -1) {
691 const char *path = NULL; 702 const char *path = NULL;
703 u64 nr_samples;
692 double percent, max_percent = 0.0; 704 double percent, max_percent = 0.0;
693 double *ppercents = &percent; 705 double *ppercents = &percent;
706 u64 *psamples = &nr_samples;
694 int i, nr_percent = 1; 707 int i, nr_percent = 1;
695 const char *color; 708 const char *color;
696 struct annotation *notes = symbol__annotation(sym); 709 struct annotation *notes = symbol__annotation(sym);
@@ -703,8 +716,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
703 if (perf_evsel__is_group_event(evsel)) { 716 if (perf_evsel__is_group_event(evsel)) {
704 nr_percent = evsel->nr_members; 717 nr_percent = evsel->nr_members;
705 ppercents = calloc(nr_percent, sizeof(double)); 718 ppercents = calloc(nr_percent, sizeof(double));
706 if (ppercents == NULL) 719 psamples = calloc(nr_percent, sizeof(u64));
720 if (ppercents == NULL || psamples == NULL) {
707 return -1; 721 return -1;
722 }
708 } 723 }
709 724
710 for (i = 0; i < nr_percent; i++) { 725 for (i = 0; i < nr_percent; i++) {
@@ -712,9 +727,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
712 notes->src->lines ? i : evsel->idx + i, 727 notes->src->lines ? i : evsel->idx + i,
713 offset, 728 offset,
714 next ? next->offset : (s64) len, 729 next ? next->offset : (s64) len,
715 &path); 730 &path, &nr_samples);
716 731
717 ppercents[i] = percent; 732 ppercents[i] = percent;
733 psamples[i] = nr_samples;
718 if (percent > max_percent) 734 if (percent > max_percent)
719 max_percent = percent; 735 max_percent = percent;
720 } 736 }
@@ -752,8 +768,14 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
752 768
753 for (i = 0; i < nr_percent; i++) { 769 for (i = 0; i < nr_percent; i++) {
754 percent = ppercents[i]; 770 percent = ppercents[i];
771 nr_samples = psamples[i];
755 color = get_percent_color(percent); 772 color = get_percent_color(percent);
756 color_fprintf(stdout, color, " %7.2f", percent); 773
774 if (symbol_conf.show_total_period)
775 color_fprintf(stdout, color, " %7" PRIu64,
776 nr_samples);
777 else
778 color_fprintf(stdout, color, " %7.2f", percent);
757 } 779 }
758 780
759 printf(" : "); 781 printf(" : ");
@@ -763,6 +785,9 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
763 if (ppercents != &percent) 785 if (ppercents != &percent)
764 free(ppercents); 786 free(ppercents);
765 787
788 if (psamples != &nr_samples)
789 free(psamples);
790
766 } else if (max_lines && printed >= max_lines) 791 } else if (max_lines && printed >= max_lines)
767 return 1; 792 return 1;
768 else { 793 else {
@@ -1096,7 +1121,7 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin
1096 ret = strcmp(iter->path, src_line->path); 1121 ret = strcmp(iter->path, src_line->path);
1097 if (ret == 0) { 1122 if (ret == 0) {
1098 for (i = 0; i < src_line->nr_pcnt; i++) 1123 for (i = 0; i < src_line->nr_pcnt; i++)
1099 iter->p[i].percent_sum += src_line->p[i].percent; 1124 iter->samples[i].percent_sum += src_line->samples[i].percent;
1100 return; 1125 return;
1101 } 1126 }
1102 1127
@@ -1107,7 +1132,7 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin
1107 } 1132 }
1108 1133
1109 for (i = 0; i < src_line->nr_pcnt; i++) 1134 for (i = 0; i < src_line->nr_pcnt; i++)
1110 src_line->p[i].percent_sum = src_line->p[i].percent; 1135 src_line->samples[i].percent_sum = src_line->samples[i].percent;
1111 1136
1112 rb_link_node(&src_line->node, parent, p); 1137 rb_link_node(&src_line->node, parent, p);
1113 rb_insert_color(&src_line->node, root); 1138 rb_insert_color(&src_line->node, root);
@@ -1118,9 +1143,9 @@ static int cmp_source_line(struct source_line *a, struct source_line *b)
1118 int i; 1143 int i;
1119 1144
1120 for (i = 0; i < a->nr_pcnt; i++) { 1145 for (i = 0; i < a->nr_pcnt; i++) {
1121 if (a->p[i].percent_sum == b->p[i].percent_sum) 1146 if (a->samples[i].percent_sum == b->samples[i].percent_sum)
1122 continue; 1147 continue;
1123 return a->p[i].percent_sum > b->p[i].percent_sum; 1148 return a->samples[i].percent_sum > b->samples[i].percent_sum;
1124 } 1149 }
1125 1150
1126 return 0; 1151 return 0;
@@ -1172,7 +1197,7 @@ static void symbol__free_source_line(struct symbol *sym, int len)
1172 int i; 1197 int i;
1173 1198
1174 sizeof_src_line = sizeof(*src_line) + 1199 sizeof_src_line = sizeof(*src_line) +
1175 (sizeof(src_line->p) * (src_line->nr_pcnt - 1)); 1200 (sizeof(src_line->samples) * (src_line->nr_pcnt - 1));
1176 1201
1177 for (i = 0; i < len; i++) { 1202 for (i = 0; i < len; i++) {
1178 free_srcline(src_line->path); 1203 free_srcline(src_line->path);
@@ -1204,7 +1229,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
1204 h_sum += h->sum; 1229 h_sum += h->sum;
1205 } 1230 }
1206 nr_pcnt = evsel->nr_members; 1231 nr_pcnt = evsel->nr_members;
1207 sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->p); 1232 sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples);
1208 } 1233 }
1209 1234
1210 if (!h_sum) 1235 if (!h_sum)
@@ -1224,10 +1249,10 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
1224 1249
1225 for (k = 0; k < nr_pcnt; k++) { 1250 for (k = 0; k < nr_pcnt; k++) {
1226 h = annotation__histogram(notes, evidx + k); 1251 h = annotation__histogram(notes, evidx + k);
1227 src_line->p[k].percent = 100.0 * h->addr[i] / h->sum; 1252 src_line->samples[k].percent = 100.0 * h->addr[i] / h->sum;
1228 1253
1229 if (src_line->p[k].percent > percent_max) 1254 if (src_line->samples[k].percent > percent_max)
1230 percent_max = src_line->p[k].percent; 1255 percent_max = src_line->samples[k].percent;
1231 } 1256 }
1232 1257
1233 if (percent_max <= 0.5) 1258 if (percent_max <= 0.5)
@@ -1267,7 +1292,7 @@ static void print_summary(struct rb_root *root, const char *filename)
1267 1292
1268 src_line = rb_entry(node, struct source_line, node); 1293 src_line = rb_entry(node, struct source_line, node);
1269 for (i = 0; i < src_line->nr_pcnt; i++) { 1294 for (i = 0; i < src_line->nr_pcnt; i++) {
1270 percent = src_line->p[i].percent_sum; 1295 percent = src_line->samples[i].percent_sum;
1271 color = get_percent_color(percent); 1296 color = get_percent_color(percent);
1272 color_fprintf(stdout, color, " %7.2f", percent); 1297 color_fprintf(stdout, color, " %7.2f", percent);
1273 1298
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index cadbdc90a5cb..7e78e6c27078 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -72,23 +72,24 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa
72int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); 72int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
73size_t disasm__fprintf(struct list_head *head, FILE *fp); 73size_t disasm__fprintf(struct list_head *head, FILE *fp);
74double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, 74double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
75 s64 end, const char **path); 75 s64 end, const char **path, u64 *nr_samples);
76 76
77struct sym_hist { 77struct sym_hist {
78 u64 sum; 78 u64 sum;
79 u64 addr[0]; 79 u64 addr[0];
80}; 80};
81 81
82struct source_line_percent { 82struct source_line_samples {
83 double percent; 83 double percent;
84 double percent_sum; 84 double percent_sum;
85 double nr;
85}; 86};
86 87
87struct source_line { 88struct source_line {
88 struct rb_node node; 89 struct rb_node node;
89 char *path; 90 char *path;
90 int nr_pcnt; 91 int nr_pcnt;
91 struct source_line_percent p[1]; 92 struct source_line_samples samples[1];
92}; 93};
93 94
94/** struct annotated_source - symbols with hits have this attached as in sannotation 95/** struct annotated_source - symbols with hits have this attached as in sannotation
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
new file mode 100644
index 000000000000..df66966cfde7
--- /dev/null
+++ b/tools/perf/util/auxtrace.c
@@ -0,0 +1,1352 @@
1/*
2 * auxtrace.c: AUX area trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include <sys/types.h>
17#include <sys/mman.h>
18#include <stdbool.h>
19
20#include <linux/kernel.h>
21#include <linux/perf_event.h>
22#include <linux/types.h>
23#include <linux/bitops.h>
24#include <linux/log2.h>
25#include <linux/string.h>
26
27#include <sys/param.h>
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31#include <limits.h>
32#include <errno.h>
33#include <linux/list.h>
34
35#include "../perf.h"
36#include "util.h"
37#include "evlist.h"
38#include "cpumap.h"
39#include "thread_map.h"
40#include "asm/bug.h"
41#include "auxtrace.h"
42
43#include <linux/hash.h>
44
45#include "event.h"
46#include "session.h"
47#include "debug.h"
48#include "parse-options.h"
49
50int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
51 struct auxtrace_mmap_params *mp,
52 void *userpg, int fd)
53{
54 struct perf_event_mmap_page *pc = userpg;
55
56#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
57 pr_err("Cannot use AUX area tracing mmaps\n");
58 return -1;
59#endif
60
61 WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n");
62
63 mm->userpg = userpg;
64 mm->mask = mp->mask;
65 mm->len = mp->len;
66 mm->prev = 0;
67 mm->idx = mp->idx;
68 mm->tid = mp->tid;
69 mm->cpu = mp->cpu;
70
71 if (!mp->len) {
72 mm->base = NULL;
73 return 0;
74 }
75
76 pc->aux_offset = mp->offset;
77 pc->aux_size = mp->len;
78
79 mm->base = mmap(NULL, mp->len, mp->prot, MAP_SHARED, fd, mp->offset);
80 if (mm->base == MAP_FAILED) {
81 pr_debug2("failed to mmap AUX area\n");
82 mm->base = NULL;
83 return -1;
84 }
85
86 return 0;
87}
88
89void auxtrace_mmap__munmap(struct auxtrace_mmap *mm)
90{
91 if (mm->base) {
92 munmap(mm->base, mm->len);
93 mm->base = NULL;
94 }
95}
96
97void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
98 off_t auxtrace_offset,
99 unsigned int auxtrace_pages,
100 bool auxtrace_overwrite)
101{
102 if (auxtrace_pages) {
103 mp->offset = auxtrace_offset;
104 mp->len = auxtrace_pages * (size_t)page_size;
105 mp->mask = is_power_of_2(mp->len) ? mp->len - 1 : 0;
106 mp->prot = PROT_READ | (auxtrace_overwrite ? 0 : PROT_WRITE);
107 pr_debug2("AUX area mmap length %zu\n", mp->len);
108 } else {
109 mp->len = 0;
110 }
111}
112
113void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
114 struct perf_evlist *evlist, int idx,
115 bool per_cpu)
116{
117 mp->idx = idx;
118
119 if (per_cpu) {
120 mp->cpu = evlist->cpus->map[idx];
121 if (evlist->threads)
122 mp->tid = evlist->threads->map[0];
123 else
124 mp->tid = -1;
125 } else {
126 mp->cpu = -1;
127 mp->tid = evlist->threads->map[idx];
128 }
129}
130
131#define AUXTRACE_INIT_NR_QUEUES 32
132
133static struct auxtrace_queue *auxtrace_alloc_queue_array(unsigned int nr_queues)
134{
135 struct auxtrace_queue *queue_array;
136 unsigned int max_nr_queues, i;
137
138 max_nr_queues = UINT_MAX / sizeof(struct auxtrace_queue);
139 if (nr_queues > max_nr_queues)
140 return NULL;
141
142 queue_array = calloc(nr_queues, sizeof(struct auxtrace_queue));
143 if (!queue_array)
144 return NULL;
145
146 for (i = 0; i < nr_queues; i++) {
147 INIT_LIST_HEAD(&queue_array[i].head);
148 queue_array[i].priv = NULL;
149 }
150
151 return queue_array;
152}
153
154int auxtrace_queues__init(struct auxtrace_queues *queues)
155{
156 queues->nr_queues = AUXTRACE_INIT_NR_QUEUES;
157 queues->queue_array = auxtrace_alloc_queue_array(queues->nr_queues);
158 if (!queues->queue_array)
159 return -ENOMEM;
160 return 0;
161}
162
163static int auxtrace_queues__grow(struct auxtrace_queues *queues,
164 unsigned int new_nr_queues)
165{
166 unsigned int nr_queues = queues->nr_queues;
167 struct auxtrace_queue *queue_array;
168 unsigned int i;
169
170 if (!nr_queues)
171 nr_queues = AUXTRACE_INIT_NR_QUEUES;
172
173 while (nr_queues && nr_queues < new_nr_queues)
174 nr_queues <<= 1;
175
176 if (nr_queues < queues->nr_queues || nr_queues < new_nr_queues)
177 return -EINVAL;
178
179 queue_array = auxtrace_alloc_queue_array(nr_queues);
180 if (!queue_array)
181 return -ENOMEM;
182
183 for (i = 0; i < queues->nr_queues; i++) {
184 list_splice_tail(&queues->queue_array[i].head,
185 &queue_array[i].head);
186 queue_array[i].priv = queues->queue_array[i].priv;
187 }
188
189 queues->nr_queues = nr_queues;
190 queues->queue_array = queue_array;
191
192 return 0;
193}
194
195static void *auxtrace_copy_data(u64 size, struct perf_session *session)
196{
197 int fd = perf_data_file__fd(session->file);
198 void *p;
199 ssize_t ret;
200
201 if (size > SSIZE_MAX)
202 return NULL;
203
204 p = malloc(size);
205 if (!p)
206 return NULL;
207
208 ret = readn(fd, p, size);
209 if (ret != (ssize_t)size) {
210 free(p);
211 return NULL;
212 }
213
214 return p;
215}
216
217static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
218 unsigned int idx,
219 struct auxtrace_buffer *buffer)
220{
221 struct auxtrace_queue *queue;
222 int err;
223
224 if (idx >= queues->nr_queues) {
225 err = auxtrace_queues__grow(queues, idx + 1);
226 if (err)
227 return err;
228 }
229
230 queue = &queues->queue_array[idx];
231
232 if (!queue->set) {
233 queue->set = true;
234 queue->tid = buffer->tid;
235 queue->cpu = buffer->cpu;
236 } else if (buffer->cpu != queue->cpu || buffer->tid != queue->tid) {
237 pr_err("auxtrace queue conflict: cpu %d, tid %d vs cpu %d, tid %d\n",
238 queue->cpu, queue->tid, buffer->cpu, buffer->tid);
239 return -EINVAL;
240 }
241
242 buffer->buffer_nr = queues->next_buffer_nr++;
243
244 list_add_tail(&buffer->list, &queue->head);
245
246 queues->new_data = true;
247 queues->populated = true;
248
249 return 0;
250}
251
252/* Limit buffers to 32MiB on 32-bit */
253#define BUFFER_LIMIT_FOR_32_BIT (32 * 1024 * 1024)
254
255static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
256 unsigned int idx,
257 struct auxtrace_buffer *buffer)
258{
259 u64 sz = buffer->size;
260 bool consecutive = false;
261 struct auxtrace_buffer *b;
262 int err;
263
264 while (sz > BUFFER_LIMIT_FOR_32_BIT) {
265 b = memdup(buffer, sizeof(struct auxtrace_buffer));
266 if (!b)
267 return -ENOMEM;
268 b->size = BUFFER_LIMIT_FOR_32_BIT;
269 b->consecutive = consecutive;
270 err = auxtrace_queues__add_buffer(queues, idx, b);
271 if (err) {
272 auxtrace_buffer__free(b);
273 return err;
274 }
275 buffer->data_offset += BUFFER_LIMIT_FOR_32_BIT;
276 sz -= BUFFER_LIMIT_FOR_32_BIT;
277 consecutive = true;
278 }
279
280 buffer->size = sz;
281 buffer->consecutive = consecutive;
282
283 return 0;
284}
285
286static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
287 struct perf_session *session,
288 unsigned int idx,
289 struct auxtrace_buffer *buffer)
290{
291 if (session->one_mmap) {
292 buffer->data = buffer->data_offset - session->one_mmap_offset +
293 session->one_mmap_addr;
294 } else if (perf_data_file__is_pipe(session->file)) {
295 buffer->data = auxtrace_copy_data(buffer->size, session);
296 if (!buffer->data)
297 return -ENOMEM;
298 buffer->data_needs_freeing = true;
299 } else if (BITS_PER_LONG == 32 &&
300 buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
301 int err;
302
303 err = auxtrace_queues__split_buffer(queues, idx, buffer);
304 if (err)
305 return err;
306 }
307
308 return auxtrace_queues__add_buffer(queues, idx, buffer);
309}
310
311int auxtrace_queues__add_event(struct auxtrace_queues *queues,
312 struct perf_session *session,
313 union perf_event *event, off_t data_offset,
314 struct auxtrace_buffer **buffer_ptr)
315{
316 struct auxtrace_buffer *buffer;
317 unsigned int idx;
318 int err;
319
320 buffer = zalloc(sizeof(struct auxtrace_buffer));
321 if (!buffer)
322 return -ENOMEM;
323
324 buffer->pid = -1;
325 buffer->tid = event->auxtrace.tid;
326 buffer->cpu = event->auxtrace.cpu;
327 buffer->data_offset = data_offset;
328 buffer->offset = event->auxtrace.offset;
329 buffer->reference = event->auxtrace.reference;
330 buffer->size = event->auxtrace.size;
331 idx = event->auxtrace.idx;
332
333 err = auxtrace_queues__add_event_buffer(queues, session, idx, buffer);
334 if (err)
335 goto out_err;
336
337 if (buffer_ptr)
338 *buffer_ptr = buffer;
339
340 return 0;
341
342out_err:
343 auxtrace_buffer__free(buffer);
344 return err;
345}
346
347static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues,
348 struct perf_session *session,
349 off_t file_offset, size_t sz)
350{
351 union perf_event *event;
352 int err;
353 char buf[PERF_SAMPLE_MAX_SIZE];
354
355 err = perf_session__peek_event(session, file_offset, buf,
356 PERF_SAMPLE_MAX_SIZE, &event, NULL);
357 if (err)
358 return err;
359
360 if (event->header.type == PERF_RECORD_AUXTRACE) {
361 if (event->header.size < sizeof(struct auxtrace_event) ||
362 event->header.size != sz) {
363 err = -EINVAL;
364 goto out;
365 }
366 file_offset += event->header.size;
367 err = auxtrace_queues__add_event(queues, session, event,
368 file_offset, NULL);
369 }
370out:
371 return err;
372}
373
374void auxtrace_queues__free(struct auxtrace_queues *queues)
375{
376 unsigned int i;
377
378 for (i = 0; i < queues->nr_queues; i++) {
379 while (!list_empty(&queues->queue_array[i].head)) {
380 struct auxtrace_buffer *buffer;
381
382 buffer = list_entry(queues->queue_array[i].head.next,
383 struct auxtrace_buffer, list);
384 list_del(&buffer->list);
385 auxtrace_buffer__free(buffer);
386 }
387 }
388
389 zfree(&queues->queue_array);
390 queues->nr_queues = 0;
391}
392
393static void auxtrace_heapify(struct auxtrace_heap_item *heap_array,
394 unsigned int pos, unsigned int queue_nr,
395 u64 ordinal)
396{
397 unsigned int parent;
398
399 while (pos) {
400 parent = (pos - 1) >> 1;
401 if (heap_array[parent].ordinal <= ordinal)
402 break;
403 heap_array[pos] = heap_array[parent];
404 pos = parent;
405 }
406 heap_array[pos].queue_nr = queue_nr;
407 heap_array[pos].ordinal = ordinal;
408}
409
410int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
411 u64 ordinal)
412{
413 struct auxtrace_heap_item *heap_array;
414
415 if (queue_nr >= heap->heap_sz) {
416 unsigned int heap_sz = AUXTRACE_INIT_NR_QUEUES;
417
418 while (heap_sz <= queue_nr)
419 heap_sz <<= 1;
420 heap_array = realloc(heap->heap_array,
421 heap_sz * sizeof(struct auxtrace_heap_item));
422 if (!heap_array)
423 return -ENOMEM;
424 heap->heap_array = heap_array;
425 heap->heap_sz = heap_sz;
426 }
427
428 auxtrace_heapify(heap->heap_array, heap->heap_cnt++, queue_nr, ordinal);
429
430 return 0;
431}
432
433void auxtrace_heap__free(struct auxtrace_heap *heap)
434{
435 zfree(&heap->heap_array);
436 heap->heap_cnt = 0;
437 heap->heap_sz = 0;
438}
439
440void auxtrace_heap__pop(struct auxtrace_heap *heap)
441{
442 unsigned int pos, last, heap_cnt = heap->heap_cnt;
443 struct auxtrace_heap_item *heap_array;
444
445 if (!heap_cnt)
446 return;
447
448 heap->heap_cnt -= 1;
449
450 heap_array = heap->heap_array;
451
452 pos = 0;
453 while (1) {
454 unsigned int left, right;
455
456 left = (pos << 1) + 1;
457 if (left >= heap_cnt)
458 break;
459 right = left + 1;
460 if (right >= heap_cnt) {
461 heap_array[pos] = heap_array[left];
462 return;
463 }
464 if (heap_array[left].ordinal < heap_array[right].ordinal) {
465 heap_array[pos] = heap_array[left];
466 pos = left;
467 } else {
468 heap_array[pos] = heap_array[right];
469 pos = right;
470 }
471 }
472
473 last = heap_cnt - 1;
474 auxtrace_heapify(heap_array, pos, heap_array[last].queue_nr,
475 heap_array[last].ordinal);
476}
477
478size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr)
479{
480 if (itr)
481 return itr->info_priv_size(itr);
482 return 0;
483}
484
485static int auxtrace_not_supported(void)
486{
487 pr_err("AUX area tracing is not supported on this architecture\n");
488 return -EINVAL;
489}
490
491int auxtrace_record__info_fill(struct auxtrace_record *itr,
492 struct perf_session *session,
493 struct auxtrace_info_event *auxtrace_info,
494 size_t priv_size)
495{
496 if (itr)
497 return itr->info_fill(itr, session, auxtrace_info, priv_size);
498 return auxtrace_not_supported();
499}
500
501void auxtrace_record__free(struct auxtrace_record *itr)
502{
503 if (itr)
504 itr->free(itr);
505}
506
507int auxtrace_record__snapshot_start(struct auxtrace_record *itr)
508{
509 if (itr && itr->snapshot_start)
510 return itr->snapshot_start(itr);
511 return 0;
512}
513
514int auxtrace_record__snapshot_finish(struct auxtrace_record *itr)
515{
516 if (itr && itr->snapshot_finish)
517 return itr->snapshot_finish(itr);
518 return 0;
519}
520
521int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
522 struct auxtrace_mmap *mm,
523 unsigned char *data, u64 *head, u64 *old)
524{
525 if (itr && itr->find_snapshot)
526 return itr->find_snapshot(itr, idx, mm, data, head, old);
527 return 0;
528}
529
530int auxtrace_record__options(struct auxtrace_record *itr,
531 struct perf_evlist *evlist,
532 struct record_opts *opts)
533{
534 if (itr)
535 return itr->recording_options(itr, evlist, opts);
536 return 0;
537}
538
539u64 auxtrace_record__reference(struct auxtrace_record *itr)
540{
541 if (itr)
542 return itr->reference(itr);
543 return 0;
544}
545
546int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
547 struct record_opts *opts, const char *str)
548{
549 if (!str)
550 return 0;
551
552 if (itr)
553 return itr->parse_snapshot_options(itr, opts, str);
554
555 pr_err("No AUX area tracing to snapshot\n");
556 return -EINVAL;
557}
558
559struct auxtrace_record *__weak
560auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, int *err)
561{
562 *err = 0;
563 return NULL;
564}
565
566static int auxtrace_index__alloc(struct list_head *head)
567{
568 struct auxtrace_index *auxtrace_index;
569
570 auxtrace_index = malloc(sizeof(struct auxtrace_index));
571 if (!auxtrace_index)
572 return -ENOMEM;
573
574 auxtrace_index->nr = 0;
575 INIT_LIST_HEAD(&auxtrace_index->list);
576
577 list_add_tail(&auxtrace_index->list, head);
578
579 return 0;
580}
581
582void auxtrace_index__free(struct list_head *head)
583{
584 struct auxtrace_index *auxtrace_index, *n;
585
586 list_for_each_entry_safe(auxtrace_index, n, head, list) {
587 list_del(&auxtrace_index->list);
588 free(auxtrace_index);
589 }
590}
591
592static struct auxtrace_index *auxtrace_index__last(struct list_head *head)
593{
594 struct auxtrace_index *auxtrace_index;
595 int err;
596
597 if (list_empty(head)) {
598 err = auxtrace_index__alloc(head);
599 if (err)
600 return NULL;
601 }
602
603 auxtrace_index = list_entry(head->prev, struct auxtrace_index, list);
604
605 if (auxtrace_index->nr >= PERF_AUXTRACE_INDEX_ENTRY_COUNT) {
606 err = auxtrace_index__alloc(head);
607 if (err)
608 return NULL;
609 auxtrace_index = list_entry(head->prev, struct auxtrace_index,
610 list);
611 }
612
613 return auxtrace_index;
614}
615
616int auxtrace_index__auxtrace_event(struct list_head *head,
617 union perf_event *event, off_t file_offset)
618{
619 struct auxtrace_index *auxtrace_index;
620 size_t nr;
621
622 auxtrace_index = auxtrace_index__last(head);
623 if (!auxtrace_index)
624 return -ENOMEM;
625
626 nr = auxtrace_index->nr;
627 auxtrace_index->entries[nr].file_offset = file_offset;
628 auxtrace_index->entries[nr].sz = event->header.size;
629 auxtrace_index->nr += 1;
630
631 return 0;
632}
633
634static int auxtrace_index__do_write(int fd,
635 struct auxtrace_index *auxtrace_index)
636{
637 struct auxtrace_index_entry ent;
638 size_t i;
639
640 for (i = 0; i < auxtrace_index->nr; i++) {
641 ent.file_offset = auxtrace_index->entries[i].file_offset;
642 ent.sz = auxtrace_index->entries[i].sz;
643 if (writen(fd, &ent, sizeof(ent)) != sizeof(ent))
644 return -errno;
645 }
646 return 0;
647}
648
649int auxtrace_index__write(int fd, struct list_head *head)
650{
651 struct auxtrace_index *auxtrace_index;
652 u64 total = 0;
653 int err;
654
655 list_for_each_entry(auxtrace_index, head, list)
656 total += auxtrace_index->nr;
657
658 if (writen(fd, &total, sizeof(total)) != sizeof(total))
659 return -errno;
660
661 list_for_each_entry(auxtrace_index, head, list) {
662 err = auxtrace_index__do_write(fd, auxtrace_index);
663 if (err)
664 return err;
665 }
666
667 return 0;
668}
669
670static int auxtrace_index__process_entry(int fd, struct list_head *head,
671 bool needs_swap)
672{
673 struct auxtrace_index *auxtrace_index;
674 struct auxtrace_index_entry ent;
675 size_t nr;
676
677 if (readn(fd, &ent, sizeof(ent)) != sizeof(ent))
678 return -1;
679
680 auxtrace_index = auxtrace_index__last(head);
681 if (!auxtrace_index)
682 return -1;
683
684 nr = auxtrace_index->nr;
685 if (needs_swap) {
686 auxtrace_index->entries[nr].file_offset =
687 bswap_64(ent.file_offset);
688 auxtrace_index->entries[nr].sz = bswap_64(ent.sz);
689 } else {
690 auxtrace_index->entries[nr].file_offset = ent.file_offset;
691 auxtrace_index->entries[nr].sz = ent.sz;
692 }
693
694 auxtrace_index->nr = nr + 1;
695
696 return 0;
697}
698
699int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
700 bool needs_swap)
701{
702 struct list_head *head = &session->auxtrace_index;
703 u64 nr;
704
705 if (readn(fd, &nr, sizeof(u64)) != sizeof(u64))
706 return -1;
707
708 if (needs_swap)
709 nr = bswap_64(nr);
710
711 if (sizeof(u64) + nr * sizeof(struct auxtrace_index_entry) > size)
712 return -1;
713
714 while (nr--) {
715 int err;
716
717 err = auxtrace_index__process_entry(fd, head, needs_swap);
718 if (err)
719 return -1;
720 }
721
722 return 0;
723}
724
725static int auxtrace_queues__process_index_entry(struct auxtrace_queues *queues,
726 struct perf_session *session,
727 struct auxtrace_index_entry *ent)
728{
729 return auxtrace_queues__add_indexed_event(queues, session,
730 ent->file_offset, ent->sz);
731}
732
733int auxtrace_queues__process_index(struct auxtrace_queues *queues,
734 struct perf_session *session)
735{
736 struct auxtrace_index *auxtrace_index;
737 struct auxtrace_index_entry *ent;
738 size_t i;
739 int err;
740
741 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
742 for (i = 0; i < auxtrace_index->nr; i++) {
743 ent = &auxtrace_index->entries[i];
744 err = auxtrace_queues__process_index_entry(queues,
745 session,
746 ent);
747 if (err)
748 return err;
749 }
750 }
751 return 0;
752}
753
754struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
755 struct auxtrace_buffer *buffer)
756{
757 if (buffer) {
758 if (list_is_last(&buffer->list, &queue->head))
759 return NULL;
760 return list_entry(buffer->list.next, struct auxtrace_buffer,
761 list);
762 } else {
763 if (list_empty(&queue->head))
764 return NULL;
765 return list_entry(queue->head.next, struct auxtrace_buffer,
766 list);
767 }
768}
769
770void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd)
771{
772 size_t adj = buffer->data_offset & (page_size - 1);
773 size_t size = buffer->size + adj;
774 off_t file_offset = buffer->data_offset - adj;
775 void *addr;
776
777 if (buffer->data)
778 return buffer->data;
779
780 addr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, file_offset);
781 if (addr == MAP_FAILED)
782 return NULL;
783
784 buffer->mmap_addr = addr;
785 buffer->mmap_size = size;
786
787 buffer->data = addr + adj;
788
789 return buffer->data;
790}
791
792void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer)
793{
794 if (!buffer->data || !buffer->mmap_addr)
795 return;
796 munmap(buffer->mmap_addr, buffer->mmap_size);
797 buffer->mmap_addr = NULL;
798 buffer->mmap_size = 0;
799 buffer->data = NULL;
800 buffer->use_data = NULL;
801}
802
803void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer)
804{
805 auxtrace_buffer__put_data(buffer);
806 if (buffer->data_needs_freeing) {
807 buffer->data_needs_freeing = false;
808 zfree(&buffer->data);
809 buffer->use_data = NULL;
810 buffer->size = 0;
811 }
812}
813
814void auxtrace_buffer__free(struct auxtrace_buffer *buffer)
815{
816 auxtrace_buffer__drop_data(buffer);
817 free(buffer);
818}
819
820void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
821 int code, int cpu, pid_t pid, pid_t tid, u64 ip,
822 const char *msg)
823{
824 size_t size;
825
826 memset(auxtrace_error, 0, sizeof(struct auxtrace_error_event));
827
828 auxtrace_error->header.type = PERF_RECORD_AUXTRACE_ERROR;
829 auxtrace_error->type = type;
830 auxtrace_error->code = code;
831 auxtrace_error->cpu = cpu;
832 auxtrace_error->pid = pid;
833 auxtrace_error->tid = tid;
834 auxtrace_error->ip = ip;
835 strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG);
836
837 size = (void *)auxtrace_error->msg - (void *)auxtrace_error +
838 strlen(auxtrace_error->msg) + 1;
839 auxtrace_error->header.size = PERF_ALIGN(size, sizeof(u64));
840}
841
842int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
843 struct perf_tool *tool,
844 struct perf_session *session,
845 perf_event__handler_t process)
846{
847 union perf_event *ev;
848 size_t priv_size;
849 int err;
850
851 pr_debug2("Synthesizing auxtrace information\n");
852 priv_size = auxtrace_record__info_priv_size(itr);
853 ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size);
854 if (!ev)
855 return -ENOMEM;
856
857 ev->auxtrace_info.header.type = PERF_RECORD_AUXTRACE_INFO;
858 ev->auxtrace_info.header.size = sizeof(struct auxtrace_info_event) +
859 priv_size;
860 err = auxtrace_record__info_fill(itr, session, &ev->auxtrace_info,
861 priv_size);
862 if (err)
863 goto out_free;
864
865 err = process(tool, ev, NULL, NULL);
866out_free:
867 free(ev);
868 return err;
869}
870
871static bool auxtrace__dont_decode(struct perf_session *session)
872{
873 return !session->itrace_synth_opts ||
874 session->itrace_synth_opts->dont_decode;
875}
876
877int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
878 union perf_event *event,
879 struct perf_session *session __maybe_unused)
880{
881 enum auxtrace_type type = event->auxtrace_info.type;
882
883 if (dump_trace)
884 fprintf(stdout, " type: %u\n", type);
885
886 switch (type) {
887 case PERF_AUXTRACE_UNKNOWN:
888 default:
889 return -EINVAL;
890 }
891}
892
893s64 perf_event__process_auxtrace(struct perf_tool *tool,
894 union perf_event *event,
895 struct perf_session *session)
896{
897 s64 err;
898
899 if (dump_trace)
900 fprintf(stdout, " size: %#"PRIx64" offset: %#"PRIx64" ref: %#"PRIx64" idx: %u tid: %d cpu: %d\n",
901 event->auxtrace.size, event->auxtrace.offset,
902 event->auxtrace.reference, event->auxtrace.idx,
903 event->auxtrace.tid, event->auxtrace.cpu);
904
905 if (auxtrace__dont_decode(session))
906 return event->auxtrace.size;
907
908 if (!session->auxtrace || event->header.type != PERF_RECORD_AUXTRACE)
909 return -EINVAL;
910
911 err = session->auxtrace->process_auxtrace_event(session, event, tool);
912 if (err < 0)
913 return err;
914
915 return event->auxtrace.size;
916}
917
918#define PERF_ITRACE_DEFAULT_PERIOD_TYPE PERF_ITRACE_PERIOD_NANOSECS
919#define PERF_ITRACE_DEFAULT_PERIOD 100000
920#define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ 16
921#define PERF_ITRACE_MAX_CALLCHAIN_SZ 1024
922
923void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
924{
925 synth_opts->instructions = true;
926 synth_opts->branches = true;
927 synth_opts->transactions = true;
928 synth_opts->errors = true;
929 synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
930 synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
931 synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
932}
933
934/*
935 * Please check tools/perf/Documentation/perf-script.txt for information
936 * about the options parsed here, which is introduced after this cset,
937 * when support in 'perf script' for these options is introduced.
938 */
939int itrace_parse_synth_opts(const struct option *opt, const char *str,
940 int unset)
941{
942 struct itrace_synth_opts *synth_opts = opt->value;
943 const char *p;
944 char *endptr;
945
946 synth_opts->set = true;
947
948 if (unset) {
949 synth_opts->dont_decode = true;
950 return 0;
951 }
952
953 if (!str) {
954 itrace_synth_opts__set_default(synth_opts);
955 return 0;
956 }
957
958 for (p = str; *p;) {
959 switch (*p++) {
960 case 'i':
961 synth_opts->instructions = true;
962 while (*p == ' ' || *p == ',')
963 p += 1;
964 if (isdigit(*p)) {
965 synth_opts->period = strtoull(p, &endptr, 10);
966 p = endptr;
967 while (*p == ' ' || *p == ',')
968 p += 1;
969 switch (*p++) {
970 case 'i':
971 synth_opts->period_type =
972 PERF_ITRACE_PERIOD_INSTRUCTIONS;
973 break;
974 case 't':
975 synth_opts->period_type =
976 PERF_ITRACE_PERIOD_TICKS;
977 break;
978 case 'm':
979 synth_opts->period *= 1000;
980 /* Fall through */
981 case 'u':
982 synth_opts->period *= 1000;
983 /* Fall through */
984 case 'n':
985 if (*p++ != 's')
986 goto out_err;
987 synth_opts->period_type =
988 PERF_ITRACE_PERIOD_NANOSECS;
989 break;
990 case '\0':
991 goto out;
992 default:
993 goto out_err;
994 }
995 }
996 break;
997 case 'b':
998 synth_opts->branches = true;
999 break;
1000 case 'x':
1001 synth_opts->transactions = true;
1002 break;
1003 case 'e':
1004 synth_opts->errors = true;
1005 break;
1006 case 'd':
1007 synth_opts->log = true;
1008 break;
1009 case 'c':
1010 synth_opts->branches = true;
1011 synth_opts->calls = true;
1012 break;
1013 case 'r':
1014 synth_opts->branches = true;
1015 synth_opts->returns = true;
1016 break;
1017 case 'g':
1018 synth_opts->callchain = true;
1019 synth_opts->callchain_sz =
1020 PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
1021 while (*p == ' ' || *p == ',')
1022 p += 1;
1023 if (isdigit(*p)) {
1024 unsigned int val;
1025
1026 val = strtoul(p, &endptr, 10);
1027 p = endptr;
1028 if (!val || val > PERF_ITRACE_MAX_CALLCHAIN_SZ)
1029 goto out_err;
1030 synth_opts->callchain_sz = val;
1031 }
1032 break;
1033 case ' ':
1034 case ',':
1035 break;
1036 default:
1037 goto out_err;
1038 }
1039 }
1040out:
1041 if (synth_opts->instructions) {
1042 if (!synth_opts->period_type)
1043 synth_opts->period_type =
1044 PERF_ITRACE_DEFAULT_PERIOD_TYPE;
1045 if (!synth_opts->period)
1046 synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
1047 }
1048
1049 return 0;
1050
1051out_err:
1052 pr_err("Bad Instruction Tracing options '%s'\n", str);
1053 return -EINVAL;
1054}
1055
1056static const char * const auxtrace_error_type_name[] = {
1057 [PERF_AUXTRACE_ERROR_ITRACE] = "instruction trace",
1058};
1059
1060static const char *auxtrace_error_name(int type)
1061{
1062 const char *error_type_name = NULL;
1063
1064 if (type < PERF_AUXTRACE_ERROR_MAX)
1065 error_type_name = auxtrace_error_type_name[type];
1066 if (!error_type_name)
1067 error_type_name = "unknown AUX";
1068 return error_type_name;
1069}
1070
1071size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp)
1072{
1073 struct auxtrace_error_event *e = &event->auxtrace_error;
1074 int ret;
1075
1076 ret = fprintf(fp, " %s error type %u",
1077 auxtrace_error_name(e->type), e->type);
1078 ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n",
1079 e->cpu, e->pid, e->tid, e->ip, e->code, e->msg);
1080 return ret;
1081}
1082
1083void perf_session__auxtrace_error_inc(struct perf_session *session,
1084 union perf_event *event)
1085{
1086 struct auxtrace_error_event *e = &event->auxtrace_error;
1087
1088 if (e->type < PERF_AUXTRACE_ERROR_MAX)
1089 session->evlist->stats.nr_auxtrace_errors[e->type] += 1;
1090}
1091
1092void events_stats__auxtrace_error_warn(const struct events_stats *stats)
1093{
1094 int i;
1095
1096 for (i = 0; i < PERF_AUXTRACE_ERROR_MAX; i++) {
1097 if (!stats->nr_auxtrace_errors[i])
1098 continue;
1099 ui__warning("%u %s errors\n",
1100 stats->nr_auxtrace_errors[i],
1101 auxtrace_error_name(i));
1102 }
1103}
1104
1105int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused,
1106 union perf_event *event,
1107 struct perf_session *session)
1108{
1109 if (auxtrace__dont_decode(session))
1110 return 0;
1111
1112 perf_event__fprintf_auxtrace_error(event, stdout);
1113 return 0;
1114}
1115
1116static int __auxtrace_mmap__read(struct auxtrace_mmap *mm,
1117 struct auxtrace_record *itr,
1118 struct perf_tool *tool, process_auxtrace_t fn,
1119 bool snapshot, size_t snapshot_size)
1120{
1121 u64 head, old = mm->prev, offset, ref;
1122 unsigned char *data = mm->base;
1123 size_t size, head_off, old_off, len1, len2, padding;
1124 union perf_event ev;
1125 void *data1, *data2;
1126
1127 if (snapshot) {
1128 head = auxtrace_mmap__read_snapshot_head(mm);
1129 if (auxtrace_record__find_snapshot(itr, mm->idx, mm, data,
1130 &head, &old))
1131 return -1;
1132 } else {
1133 head = auxtrace_mmap__read_head(mm);
1134 }
1135
1136 if (old == head)
1137 return 0;
1138
1139 pr_debug3("auxtrace idx %d old %#"PRIx64" head %#"PRIx64" diff %#"PRIx64"\n",
1140 mm->idx, old, head, head - old);
1141
1142 if (mm->mask) {
1143 head_off = head & mm->mask;
1144 old_off = old & mm->mask;
1145 } else {
1146 head_off = head % mm->len;
1147 old_off = old % mm->len;
1148 }
1149
1150 if (head_off > old_off)
1151 size = head_off - old_off;
1152 else
1153 size = mm->len - (old_off - head_off);
1154
1155 if (snapshot && size > snapshot_size)
1156 size = snapshot_size;
1157
1158 ref = auxtrace_record__reference(itr);
1159
1160 if (head > old || size <= head || mm->mask) {
1161 offset = head - size;
1162 } else {
1163 /*
1164 * When the buffer size is not a power of 2, 'head' wraps at the
1165 * highest multiple of the buffer size, so we have to subtract
1166 * the remainder here.
1167 */
1168 u64 rem = (0ULL - mm->len) % mm->len;
1169
1170 offset = head - size - rem;
1171 }
1172
1173 if (size > head_off) {
1174 len1 = size - head_off;
1175 data1 = &data[mm->len - len1];
1176 len2 = head_off;
1177 data2 = &data[0];
1178 } else {
1179 len1 = size;
1180 data1 = &data[head_off - len1];
1181 len2 = 0;
1182 data2 = NULL;
1183 }
1184
1185 /* padding must be written by fn() e.g. record__process_auxtrace() */
1186 padding = size & 7;
1187 if (padding)
1188 padding = 8 - padding;
1189
1190 memset(&ev, 0, sizeof(ev));
1191 ev.auxtrace.header.type = PERF_RECORD_AUXTRACE;
1192 ev.auxtrace.header.size = sizeof(ev.auxtrace);
1193 ev.auxtrace.size = size + padding;
1194 ev.auxtrace.offset = offset;
1195 ev.auxtrace.reference = ref;
1196 ev.auxtrace.idx = mm->idx;
1197 ev.auxtrace.tid = mm->tid;
1198 ev.auxtrace.cpu = mm->cpu;
1199
1200 if (fn(tool, &ev, data1, len1, data2, len2))
1201 return -1;
1202
1203 mm->prev = head;
1204
1205 if (!snapshot) {
1206 auxtrace_mmap__write_tail(mm, head);
1207 if (itr->read_finish) {
1208 int err;
1209
1210 err = itr->read_finish(itr, mm->idx);
1211 if (err < 0)
1212 return err;
1213 }
1214 }
1215
1216 return 1;
1217}
1218
1219int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
1220 struct perf_tool *tool, process_auxtrace_t fn)
1221{
1222 return __auxtrace_mmap__read(mm, itr, tool, fn, false, 0);
1223}
1224
1225int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
1226 struct auxtrace_record *itr,
1227 struct perf_tool *tool, process_auxtrace_t fn,
1228 size_t snapshot_size)
1229{
1230 return __auxtrace_mmap__read(mm, itr, tool, fn, true, snapshot_size);
1231}
1232
1233/**
1234 * struct auxtrace_cache - hash table to implement a cache
1235 * @hashtable: the hashtable
1236 * @sz: hashtable size (number of hlists)
1237 * @entry_size: size of an entry
1238 * @limit: limit the number of entries to this maximum, when reached the cache
1239 * is dropped and caching begins again with an empty cache
1240 * @cnt: current number of entries
1241 * @bits: hashtable size (@sz = 2^@bits)
1242 */
1243struct auxtrace_cache {
1244 struct hlist_head *hashtable;
1245 size_t sz;
1246 size_t entry_size;
1247 size_t limit;
1248 size_t cnt;
1249 unsigned int bits;
1250};
1251
1252struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
1253 unsigned int limit_percent)
1254{
1255 struct auxtrace_cache *c;
1256 struct hlist_head *ht;
1257 size_t sz, i;
1258
1259 c = zalloc(sizeof(struct auxtrace_cache));
1260 if (!c)
1261 return NULL;
1262
1263 sz = 1UL << bits;
1264
1265 ht = calloc(sz, sizeof(struct hlist_head));
1266 if (!ht)
1267 goto out_free;
1268
1269 for (i = 0; i < sz; i++)
1270 INIT_HLIST_HEAD(&ht[i]);
1271
1272 c->hashtable = ht;
1273 c->sz = sz;
1274 c->entry_size = entry_size;
1275 c->limit = (c->sz * limit_percent) / 100;
1276 c->bits = bits;
1277
1278 return c;
1279
1280out_free:
1281 free(c);
1282 return NULL;
1283}
1284
1285static void auxtrace_cache__drop(struct auxtrace_cache *c)
1286{
1287 struct auxtrace_cache_entry *entry;
1288 struct hlist_node *tmp;
1289 size_t i;
1290
1291 if (!c)
1292 return;
1293
1294 for (i = 0; i < c->sz; i++) {
1295 hlist_for_each_entry_safe(entry, tmp, &c->hashtable[i], hash) {
1296 hlist_del(&entry->hash);
1297 auxtrace_cache__free_entry(c, entry);
1298 }
1299 }
1300
1301 c->cnt = 0;
1302}
1303
1304void auxtrace_cache__free(struct auxtrace_cache *c)
1305{
1306 if (!c)
1307 return;
1308
1309 auxtrace_cache__drop(c);
1310 free(c->hashtable);
1311 free(c);
1312}
1313
1314void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c)
1315{
1316 return malloc(c->entry_size);
1317}
1318
1319void auxtrace_cache__free_entry(struct auxtrace_cache *c __maybe_unused,
1320 void *entry)
1321{
1322 free(entry);
1323}
1324
1325int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
1326 struct auxtrace_cache_entry *entry)
1327{
1328 if (c->limit && ++c->cnt > c->limit)
1329 auxtrace_cache__drop(c);
1330
1331 entry->key = key;
1332 hlist_add_head(&entry->hash, &c->hashtable[hash_32(key, c->bits)]);
1333
1334 return 0;
1335}
1336
1337void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key)
1338{
1339 struct auxtrace_cache_entry *entry;
1340 struct hlist_head *hlist;
1341
1342 if (!c)
1343 return NULL;
1344
1345 hlist = &c->hashtable[hash_32(key, c->bits)];
1346 hlist_for_each_entry(entry, hlist, hash) {
1347 if (entry->key == key)
1348 return entry;
1349 }
1350
1351 return NULL;
1352}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
new file mode 100644
index 000000000000..a171abbe7301
--- /dev/null
+++ b/tools/perf/util/auxtrace.h
@@ -0,0 +1,643 @@
1/*
2 * auxtrace.h: AUX area trace support
3 * Copyright (c) 2013-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#ifndef __PERF_AUXTRACE_H
17#define __PERF_AUXTRACE_H
18
19#include <sys/types.h>
20#include <stdbool.h>
21#include <stddef.h>
22#include <linux/list.h>
23#include <linux/perf_event.h>
24#include <linux/types.h>
25
26#include "../perf.h"
27#include "event.h"
28#include "session.h"
29#include "debug.h"
30
31union perf_event;
32struct perf_session;
33struct perf_evlist;
34struct perf_tool;
35struct option;
36struct record_opts;
37struct auxtrace_info_event;
38struct events_stats;
39
40enum auxtrace_type {
41 PERF_AUXTRACE_UNKNOWN,
42};
43
44enum itrace_period_type {
45 PERF_ITRACE_PERIOD_INSTRUCTIONS,
46 PERF_ITRACE_PERIOD_TICKS,
47 PERF_ITRACE_PERIOD_NANOSECS,
48};
49
50/**
51 * struct itrace_synth_opts - AUX area tracing synthesis options.
52 * @set: indicates whether or not options have been set
53 * @inject: indicates the event (not just the sample) must be fully synthesized
54 * because 'perf inject' will write it out
55 * @instructions: whether to synthesize 'instructions' events
56 * @branches: whether to synthesize 'branches' events
57 * @transactions: whether to synthesize events for transactions
58 * @errors: whether to synthesize decoder error events
59 * @dont_decode: whether to skip decoding entirely
60 * @log: write a decoding log
61 * @calls: limit branch samples to calls (can be combined with @returns)
62 * @returns: limit branch samples to returns (can be combined with @calls)
63 * @callchain: add callchain to 'instructions' events
64 * @callchain_sz: maximum callchain size
65 * @period: 'instructions' events period
66 * @period_type: 'instructions' events period type
67 */
68struct itrace_synth_opts {
69 bool set;
70 bool inject;
71 bool instructions;
72 bool branches;
73 bool transactions;
74 bool errors;
75 bool dont_decode;
76 bool log;
77 bool calls;
78 bool returns;
79 bool callchain;
80 unsigned int callchain_sz;
81 unsigned long long period;
82 enum itrace_period_type period_type;
83};
84
85/**
86 * struct auxtrace_index_entry - indexes a AUX area tracing event within a
87 * perf.data file.
88 * @file_offset: offset within the perf.data file
89 * @sz: size of the event
90 */
91struct auxtrace_index_entry {
92 u64 file_offset;
93 u64 sz;
94};
95
96#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
97
98/**
99 * struct auxtrace_index - index of AUX area tracing events within a perf.data
100 * file.
101 * @list: linking a number of arrays of entries
102 * @nr: number of entries
103 * @entries: array of entries
104 */
105struct auxtrace_index {
106 struct list_head list;
107 size_t nr;
108 struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
109};
110
111/**
112 * struct auxtrace - session callbacks to allow AUX area data decoding.
113 * @process_event: lets the decoder see all session events
114 * @flush_events: process any remaining data
115 * @free_events: free resources associated with event processing
116 * @free: free resources associated with the session
117 */
118struct auxtrace {
119 int (*process_event)(struct perf_session *session,
120 union perf_event *event,
121 struct perf_sample *sample,
122 struct perf_tool *tool);
123 int (*process_auxtrace_event)(struct perf_session *session,
124 union perf_event *event,
125 struct perf_tool *tool);
126 int (*flush_events)(struct perf_session *session,
127 struct perf_tool *tool);
128 void (*free_events)(struct perf_session *session);
129 void (*free)(struct perf_session *session);
130};
131
132/**
133 * struct auxtrace_buffer - a buffer containing AUX area tracing data.
134 * @list: buffers are queued in a list held by struct auxtrace_queue
135 * @size: size of the buffer in bytes
136 * @pid: in per-thread mode, the pid this buffer is associated with
137 * @tid: in per-thread mode, the tid this buffer is associated with
138 * @cpu: in per-cpu mode, the cpu this buffer is associated with
139 * @data: actual buffer data (can be null if the data has not been loaded)
140 * @data_offset: file offset at which the buffer can be read
141 * @mmap_addr: mmap address at which the buffer can be read
142 * @mmap_size: size of the mmap at @mmap_addr
143 * @data_needs_freeing: @data was malloc'd so free it when it is no longer
144 * needed
145 * @consecutive: the original data was split up and this buffer is consecutive
146 * to the previous buffer
147 * @offset: offset as determined by aux_head / aux_tail members of struct
148 * perf_event_mmap_page
149 * @reference: an implementation-specific reference determined when the data is
150 * recorded
151 * @buffer_nr: used to number each buffer
152 * @use_size: implementation actually only uses this number of bytes
153 * @use_data: implementation actually only uses data starting at this address
154 */
155struct auxtrace_buffer {
156 struct list_head list;
157 size_t size;
158 pid_t pid;
159 pid_t tid;
160 int cpu;
161 void *data;
162 off_t data_offset;
163 void *mmap_addr;
164 size_t mmap_size;
165 bool data_needs_freeing;
166 bool consecutive;
167 u64 offset;
168 u64 reference;
169 u64 buffer_nr;
170 size_t use_size;
171 void *use_data;
172};
173
174/**
175 * struct auxtrace_queue - a queue of AUX area tracing data buffers.
176 * @head: head of buffer list
177 * @tid: in per-thread mode, the tid this queue is associated with
178 * @cpu: in per-cpu mode, the cpu this queue is associated with
179 * @set: %true once this queue has been dedicated to a specific thread or cpu
180 * @priv: implementation-specific data
181 */
182struct auxtrace_queue {
183 struct list_head head;
184 pid_t tid;
185 int cpu;
186 bool set;
187 void *priv;
188};
189
190/**
191 * struct auxtrace_queues - an array of AUX area tracing queues.
192 * @queue_array: array of queues
193 * @nr_queues: number of queues
194 * @new_data: set whenever new data is queued
195 * @populated: queues have been fully populated using the auxtrace_index
196 * @next_buffer_nr: used to number each buffer
197 */
198struct auxtrace_queues {
199 struct auxtrace_queue *queue_array;
200 unsigned int nr_queues;
201 bool new_data;
202 bool populated;
203 u64 next_buffer_nr;
204};
205
206/**
207 * struct auxtrace_heap_item - element of struct auxtrace_heap.
208 * @queue_nr: queue number
209 * @ordinal: value used for sorting (lowest ordinal is top of the heap) expected
210 * to be a timestamp
211 */
212struct auxtrace_heap_item {
213 unsigned int queue_nr;
214 u64 ordinal;
215};
216
217/**
218 * struct auxtrace_heap - a heap suitable for sorting AUX area tracing queues.
219 * @heap_array: the heap
220 * @heap_cnt: the number of elements in the heap
221 * @heap_sz: maximum number of elements (grows as needed)
222 */
223struct auxtrace_heap {
224 struct auxtrace_heap_item *heap_array;
225 unsigned int heap_cnt;
226 unsigned int heap_sz;
227};
228
229/**
230 * struct auxtrace_mmap - records an mmap of the auxtrace buffer.
231 * @base: address of mapped area
232 * @userpg: pointer to buffer's perf_event_mmap_page
233 * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
234 * @len: size of mapped area
235 * @prev: previous aux_head
236 * @idx: index of this mmap
237 * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
238 * mmap) otherwise %0
239 * @cpu: cpu number for a per-cpu mmap otherwise %-1
240 */
241struct auxtrace_mmap {
242 void *base;
243 void *userpg;
244 size_t mask;
245 size_t len;
246 u64 prev;
247 int idx;
248 pid_t tid;
249 int cpu;
250};
251
252/**
253 * struct auxtrace_mmap_params - parameters to set up struct auxtrace_mmap.
254 * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
255 * @offset: file offset of mapped area
256 * @len: size of mapped area
257 * @prot: mmap memory protection
258 * @idx: index of this mmap
259 * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
260 * mmap) otherwise %0
261 * @cpu: cpu number for a per-cpu mmap otherwise %-1
262 */
263struct auxtrace_mmap_params {
264 size_t mask;
265 off_t offset;
266 size_t len;
267 int prot;
268 int idx;
269 pid_t tid;
270 int cpu;
271};
272
273/**
274 * struct auxtrace_record - callbacks for recording AUX area data.
275 * @recording_options: validate and process recording options
276 * @info_priv_size: return the size of the private data in auxtrace_info_event
277 * @info_fill: fill-in the private data in auxtrace_info_event
278 * @free: free this auxtrace record structure
279 * @snapshot_start: starting a snapshot
280 * @snapshot_finish: finishing a snapshot
281 * @find_snapshot: find data to snapshot within auxtrace mmap
282 * @parse_snapshot_options: parse snapshot options
283 * @reference: provide a 64-bit reference number for auxtrace_event
284 * @read_finish: called after reading from an auxtrace mmap
285 */
286struct auxtrace_record {
287 int (*recording_options)(struct auxtrace_record *itr,
288 struct perf_evlist *evlist,
289 struct record_opts *opts);
290 size_t (*info_priv_size)(struct auxtrace_record *itr);
291 int (*info_fill)(struct auxtrace_record *itr,
292 struct perf_session *session,
293 struct auxtrace_info_event *auxtrace_info,
294 size_t priv_size);
295 void (*free)(struct auxtrace_record *itr);
296 int (*snapshot_start)(struct auxtrace_record *itr);
297 int (*snapshot_finish)(struct auxtrace_record *itr);
298 int (*find_snapshot)(struct auxtrace_record *itr, int idx,
299 struct auxtrace_mmap *mm, unsigned char *data,
300 u64 *head, u64 *old);
301 int (*parse_snapshot_options)(struct auxtrace_record *itr,
302 struct record_opts *opts,
303 const char *str);
304 u64 (*reference)(struct auxtrace_record *itr);
305 int (*read_finish)(struct auxtrace_record *itr, int idx);
306};
307
308#ifdef HAVE_AUXTRACE_SUPPORT
309
310/*
311 * In snapshot mode the mmapped page is read-only which makes using
312 * __sync_val_compare_and_swap() problematic. However, snapshot mode expects
313 * the buffer is not updated while the snapshot is made (e.g. Intel PT disables
314 * the event) so there is not a race anyway.
315 */
316static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm)
317{
318 struct perf_event_mmap_page *pc = mm->userpg;
319 u64 head = ACCESS_ONCE(pc->aux_head);
320
321 /* Ensure all reads are done after we read the head */
322 rmb();
323 return head;
324}
325
326static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
327{
328 struct perf_event_mmap_page *pc = mm->userpg;
329#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
330 u64 head = ACCESS_ONCE(pc->aux_head);
331#else
332 u64 head = __sync_val_compare_and_swap(&pc->aux_head, 0, 0);
333#endif
334
335 /* Ensure all reads are done after we read the head */
336 rmb();
337 return head;
338}
339
340static inline void auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
341{
342 struct perf_event_mmap_page *pc = mm->userpg;
343#if BITS_PER_LONG != 64 && defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
344 u64 old_tail;
345#endif
346
347 /* Ensure all reads are done before we write the tail out */
348 mb();
349#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
350 pc->aux_tail = tail;
351#else
352 do {
353 old_tail = __sync_val_compare_and_swap(&pc->aux_tail, 0, 0);
354 } while (!__sync_bool_compare_and_swap(&pc->aux_tail, old_tail, tail));
355#endif
356}
357
358int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
359 struct auxtrace_mmap_params *mp,
360 void *userpg, int fd);
361void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
362void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
363 off_t auxtrace_offset,
364 unsigned int auxtrace_pages,
365 bool auxtrace_overwrite);
366void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
367 struct perf_evlist *evlist, int idx,
368 bool per_cpu);
369
370typedef int (*process_auxtrace_t)(struct perf_tool *tool,
371 union perf_event *event, void *data1,
372 size_t len1, void *data2, size_t len2);
373
374int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
375 struct perf_tool *tool, process_auxtrace_t fn);
376
377int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
378 struct auxtrace_record *itr,
379 struct perf_tool *tool, process_auxtrace_t fn,
380 size_t snapshot_size);
381
382int auxtrace_queues__init(struct auxtrace_queues *queues);
383int auxtrace_queues__add_event(struct auxtrace_queues *queues,
384 struct perf_session *session,
385 union perf_event *event, off_t data_offset,
386 struct auxtrace_buffer **buffer_ptr);
387void auxtrace_queues__free(struct auxtrace_queues *queues);
388int auxtrace_queues__process_index(struct auxtrace_queues *queues,
389 struct perf_session *session);
390struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
391 struct auxtrace_buffer *buffer);
392void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd);
393void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer);
394void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer);
395void auxtrace_buffer__free(struct auxtrace_buffer *buffer);
396
397int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
398 u64 ordinal);
399void auxtrace_heap__pop(struct auxtrace_heap *heap);
400void auxtrace_heap__free(struct auxtrace_heap *heap);
401
402struct auxtrace_cache_entry {
403 struct hlist_node hash;
404 u32 key;
405};
406
407struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
408 unsigned int limit_percent);
409void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache);
410void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c);
411void auxtrace_cache__free_entry(struct auxtrace_cache *c, void *entry);
412int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
413 struct auxtrace_cache_entry *entry);
414void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key);
415
416struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
417 int *err);
418
419int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
420 struct record_opts *opts,
421 const char *str);
422int auxtrace_record__options(struct auxtrace_record *itr,
423 struct perf_evlist *evlist,
424 struct record_opts *opts);
425size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr);
426int auxtrace_record__info_fill(struct auxtrace_record *itr,
427 struct perf_session *session,
428 struct auxtrace_info_event *auxtrace_info,
429 size_t priv_size);
430void auxtrace_record__free(struct auxtrace_record *itr);
431int auxtrace_record__snapshot_start(struct auxtrace_record *itr);
432int auxtrace_record__snapshot_finish(struct auxtrace_record *itr);
433int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
434 struct auxtrace_mmap *mm,
435 unsigned char *data, u64 *head, u64 *old);
436u64 auxtrace_record__reference(struct auxtrace_record *itr);
437
438int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event,
439 off_t file_offset);
440int auxtrace_index__write(int fd, struct list_head *head);
441int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
442 bool needs_swap);
443void auxtrace_index__free(struct list_head *head);
444
445void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
446 int code, int cpu, pid_t pid, pid_t tid, u64 ip,
447 const char *msg);
448
449int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
450 struct perf_tool *tool,
451 struct perf_session *session,
452 perf_event__handler_t process);
453int perf_event__process_auxtrace_info(struct perf_tool *tool,
454 union perf_event *event,
455 struct perf_session *session);
456s64 perf_event__process_auxtrace(struct perf_tool *tool,
457 union perf_event *event,
458 struct perf_session *session);
459int perf_event__process_auxtrace_error(struct perf_tool *tool,
460 union perf_event *event,
461 struct perf_session *session);
462int itrace_parse_synth_opts(const struct option *opt, const char *str,
463 int unset);
464void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
465
466size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
467void perf_session__auxtrace_error_inc(struct perf_session *session,
468 union perf_event *event);
469void events_stats__auxtrace_error_warn(const struct events_stats *stats);
470
471static inline int auxtrace__process_event(struct perf_session *session,
472 union perf_event *event,
473 struct perf_sample *sample,
474 struct perf_tool *tool)
475{
476 if (!session->auxtrace)
477 return 0;
478
479 return session->auxtrace->process_event(session, event, sample, tool);
480}
481
482static inline int auxtrace__flush_events(struct perf_session *session,
483 struct perf_tool *tool)
484{
485 if (!session->auxtrace)
486 return 0;
487
488 return session->auxtrace->flush_events(session, tool);
489}
490
491static inline void auxtrace__free_events(struct perf_session *session)
492{
493 if (!session->auxtrace)
494 return;
495
496 return session->auxtrace->free_events(session);
497}
498
499static inline void auxtrace__free(struct perf_session *session)
500{
501 if (!session->auxtrace)
502 return;
503
504 return session->auxtrace->free(session);
505}
506
507#else
508
509static inline struct auxtrace_record *
510auxtrace_record__init(struct perf_evlist *evlist __maybe_unused,
511 int *err __maybe_unused)
512{
513 *err = 0;
514 return NULL;
515}
516
517static inline
518void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused)
519{
520}
521
522static inline int
523perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
524 struct perf_tool *tool __maybe_unused,
525 struct perf_session *session __maybe_unused,
526 perf_event__handler_t process __maybe_unused)
527{
528 return -EINVAL;
529}
530
531static inline
532int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused,
533 struct perf_evlist *evlist __maybe_unused,
534 struct record_opts *opts __maybe_unused)
535{
536 return 0;
537}
538
539#define perf_event__process_auxtrace_info 0
540#define perf_event__process_auxtrace 0
541#define perf_event__process_auxtrace_error 0
542
543static inline
544void perf_session__auxtrace_error_inc(struct perf_session *session
545 __maybe_unused,
546 union perf_event *event
547 __maybe_unused)
548{
549}
550
551static inline
552void events_stats__auxtrace_error_warn(const struct events_stats *stats
553 __maybe_unused)
554{
555}
556
557static inline
558int itrace_parse_synth_opts(const struct option *opt __maybe_unused,
559 const char *str __maybe_unused,
560 int unset __maybe_unused)
561{
562 pr_err("AUX area tracing not supported\n");
563 return -EINVAL;
564}
565
566static inline
567int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
568 struct record_opts *opts __maybe_unused,
569 const char *str)
570{
571 if (!str)
572 return 0;
573 pr_err("AUX area tracing not supported\n");
574 return -EINVAL;
575}
576
577static inline
578int auxtrace__process_event(struct perf_session *session __maybe_unused,
579 union perf_event *event __maybe_unused,
580 struct perf_sample *sample __maybe_unused,
581 struct perf_tool *tool __maybe_unused)
582{
583 return 0;
584}
585
586static inline
587int auxtrace__flush_events(struct perf_session *session __maybe_unused,
588 struct perf_tool *tool __maybe_unused)
589{
590 return 0;
591}
592
593static inline
594void auxtrace__free_events(struct perf_session *session __maybe_unused)
595{
596}
597
598static inline
599void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache __maybe_unused)
600{
601}
602
603static inline
604void auxtrace__free(struct perf_session *session __maybe_unused)
605{
606}
607
608static inline
609int auxtrace_index__write(int fd __maybe_unused,
610 struct list_head *head __maybe_unused)
611{
612 return -EINVAL;
613}
614
615static inline
616int auxtrace_index__process(int fd __maybe_unused,
617 u64 size __maybe_unused,
618 struct perf_session *session __maybe_unused,
619 bool needs_swap __maybe_unused)
620{
621 return -EINVAL;
622}
623
624static inline
625void auxtrace_index__free(struct list_head *head __maybe_unused)
626{
627}
628
629int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
630 struct auxtrace_mmap_params *mp,
631 void *userpg, int fd);
632void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
633void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
634 off_t auxtrace_offset,
635 unsigned int auxtrace_pages,
636 bool auxtrace_overwrite);
637void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
638 struct perf_evlist *evlist, int idx,
639 bool per_cpu);
640
641#endif
642
643#endif
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 61867dff5d5a..1f6fc2323ef9 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -43,6 +43,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
43 if (al.map != NULL) 43 if (al.map != NULL)
44 al.map->dso->hit = 1; 44 al.map->dso->hit = 1;
45 45
46 thread__put(thread);
46 return 0; 47 return 0;
47} 48}
48 49
@@ -59,8 +60,10 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
59 dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, 60 dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
60 event->fork.ppid, event->fork.ptid); 61 event->fork.ppid, event->fork.ptid);
61 62
62 if (thread) 63 if (thread) {
63 machine__remove_thread(machine, thread); 64 machine__remove_thread(machine, thread);
65 thread__put(thread);
66 }
64 67
65 return 0; 68 return 0;
66} 69}
@@ -159,15 +162,20 @@ static int write_buildid(const char *name, size_t name_len, u8 *build_id,
159 return write_padded(fd, name, name_len + 1, len); 162 return write_padded(fd, name, name_len + 1, len);
160} 163}
161 164
162static int __dsos__write_buildid_table(struct list_head *head, 165static int machine__write_buildid_table(struct machine *machine, int fd)
163 struct machine *machine,
164 pid_t pid, u16 misc, int fd)
165{ 166{
167 int err = 0;
166 char nm[PATH_MAX]; 168 char nm[PATH_MAX];
167 struct dso *pos; 169 struct dso *pos;
170 u16 kmisc = PERF_RECORD_MISC_KERNEL,
171 umisc = PERF_RECORD_MISC_USER;
168 172
169 dsos__for_each_with_build_id(pos, head) { 173 if (!machine__is_host(machine)) {
170 int err; 174 kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
175 umisc = PERF_RECORD_MISC_GUEST_USER;
176 }
177
178 dsos__for_each_with_build_id(pos, &machine->dsos.head) {
171 const char *name; 179 const char *name;
172 size_t name_len; 180 size_t name_len;
173 181
@@ -186,32 +194,12 @@ static int __dsos__write_buildid_table(struct list_head *head,
186 name_len = pos->long_name_len + 1; 194 name_len = pos->long_name_len + 1;
187 } 195 }
188 196
189 err = write_buildid(name, name_len, pos->build_id, 197 err = write_buildid(name, name_len, pos->build_id, machine->pid,
190 pid, misc, fd); 198 pos->kernel ? kmisc : umisc, fd);
191 if (err) 199 if (err)
192 return err; 200 break;
193 }
194
195 return 0;
196}
197
198static int machine__write_buildid_table(struct machine *machine, int fd)
199{
200 int err;
201 u16 kmisc = PERF_RECORD_MISC_KERNEL,
202 umisc = PERF_RECORD_MISC_USER;
203
204 if (!machine__is_host(machine)) {
205 kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
206 umisc = PERF_RECORD_MISC_GUEST_USER;
207 } 201 }
208 202
209 err = __dsos__write_buildid_table(&machine->kernel_dsos.head, machine,
210 machine->pid, kmisc, fd);
211 if (err == 0)
212 err = __dsos__write_buildid_table(&machine->user_dsos.head,
213 machine, machine->pid, umisc,
214 fd);
215 return err; 203 return err;
216} 204}
217 205
@@ -244,13 +232,7 @@ static int __dsos__hit_all(struct list_head *head)
244 232
245static int machine__hit_all_dsos(struct machine *machine) 233static int machine__hit_all_dsos(struct machine *machine)
246{ 234{
247 int err; 235 return __dsos__hit_all(&machine->dsos.head);
248
249 err = __dsos__hit_all(&machine->kernel_dsos.head);
250 if (err)
251 return err;
252
253 return __dsos__hit_all(&machine->user_dsos.head);
254} 236}
255 237
256int dsos__hit_all(struct perf_session *session) 238int dsos__hit_all(struct perf_session *session)
@@ -490,9 +472,7 @@ static int __dsos__cache_build_ids(struct list_head *head,
490 472
491static int machine__cache_build_ids(struct machine *machine) 473static int machine__cache_build_ids(struct machine *machine)
492{ 474{
493 int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine); 475 return __dsos__cache_build_ids(&machine->dsos.head, machine);
494 ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine);
495 return ret;
496} 476}
497 477
498int perf_session__cache_build_ids(struct perf_session *session) 478int perf_session__cache_build_ids(struct perf_session *session)
@@ -517,11 +497,7 @@ int perf_session__cache_build_ids(struct perf_session *session)
517 497
518static bool machine__read_build_ids(struct machine *machine, bool with_hits) 498static bool machine__read_build_ids(struct machine *machine, bool with_hits)
519{ 499{
520 bool ret; 500 return __dsos__read_build_ids(&machine->dsos.head, with_hits);
521
522 ret = __dsos__read_build_ids(&machine->kernel_dsos.head, with_hits);
523 ret |= __dsos__read_build_ids(&machine->user_dsos.head, with_hits);
524 return ret;
525} 501}
526 502
527bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) 503bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index fbcca21d66ab..c861373aaed3 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -30,7 +30,6 @@ extern const char *perf_config_dirname(const char *, const char *);
30 30
31/* pager.c */ 31/* pager.c */
32extern void setup_pager(void); 32extern void setup_pager(void);
33extern const char *pager_program;
34extern int pager_in_use(void); 33extern int pager_in_use(void);
35extern int pager_use_color; 34extern int pager_use_color;
36 35
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 6033a0a212ca..679c2c6d8ade 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -72,6 +72,10 @@ extern struct callchain_param callchain_param;
72struct callchain_list { 72struct callchain_list {
73 u64 ip; 73 u64 ip;
74 struct map_symbol ms; 74 struct map_symbol ms;
75 struct /* for TUI */ {
76 bool unfolded;
77 bool has_children;
78 };
75 char *srcline; 79 char *srcline;
76 struct list_head list; 80 struct list_head list;
77}; 81};
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 88f7be399432..32e12ecfe9c5 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -115,23 +115,19 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
115 goto found; 115 goto found;
116 n++; 116 n++;
117 } 117 }
118 if (cgrp->refcnt == 0) 118 if (atomic_read(&cgrp->refcnt) == 0)
119 free(cgrp); 119 free(cgrp);
120 120
121 return -1; 121 return -1;
122found: 122found:
123 cgrp->refcnt++; 123 atomic_inc(&cgrp->refcnt);
124 counter->cgrp = cgrp; 124 counter->cgrp = cgrp;
125 return 0; 125 return 0;
126} 126}
127 127
128void close_cgroup(struct cgroup_sel *cgrp) 128void close_cgroup(struct cgroup_sel *cgrp)
129{ 129{
130 if (!cgrp) 130 if (cgrp && atomic_dec_and_test(&cgrp->refcnt)) {
131 return;
132
133 /* XXX: not reentrant */
134 if (--cgrp->refcnt == 0) {
135 close(cgrp->fd); 131 close(cgrp->fd);
136 zfree(&cgrp->name); 132 zfree(&cgrp->name);
137 free(cgrp); 133 free(cgrp);
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index 89acd6debdc5..b4b8cb42fe5e 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -1,12 +1,14 @@
1#ifndef __CGROUP_H__ 1#ifndef __CGROUP_H__
2#define __CGROUP_H__ 2#define __CGROUP_H__
3 3
4#include <linux/atomic.h>
5
4struct option; 6struct option;
5 7
6struct cgroup_sel { 8struct cgroup_sel {
7 char *name; 9 char *name;
8 int fd; 10 int fd;
9 int refcnt; 11 atomic_t refcnt;
10}; 12};
11 13
12 14
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index b2bb59df65e1..21b7ff382c3f 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -2,24 +2,27 @@
2#include "util.h" 2#include "util.h"
3#include <stdlib.h> 3#include <stdlib.h>
4#include <stdio.h> 4#include <stdio.h>
5#include <linux/atomic.h>
5 6
6struct comm_str { 7struct comm_str {
7 char *str; 8 char *str;
8 struct rb_node rb_node; 9 struct rb_node rb_node;
9 int ref; 10 atomic_t refcnt;
10}; 11};
11 12
12/* Should perhaps be moved to struct machine */ 13/* Should perhaps be moved to struct machine */
13static struct rb_root comm_str_root; 14static struct rb_root comm_str_root;
14 15
15static void comm_str__get(struct comm_str *cs) 16static struct comm_str *comm_str__get(struct comm_str *cs)
16{ 17{
17 cs->ref++; 18 if (cs)
19 atomic_inc(&cs->refcnt);
20 return cs;
18} 21}
19 22
20static void comm_str__put(struct comm_str *cs) 23static void comm_str__put(struct comm_str *cs)
21{ 24{
22 if (!--cs->ref) { 25 if (cs && atomic_dec_and_test(&cs->refcnt)) {
23 rb_erase(&cs->rb_node, &comm_str_root); 26 rb_erase(&cs->rb_node, &comm_str_root);
24 zfree(&cs->str); 27 zfree(&cs->str);
25 free(cs); 28 free(cs);
@@ -40,6 +43,8 @@ static struct comm_str *comm_str__alloc(const char *str)
40 return NULL; 43 return NULL;
41 } 44 }
42 45
46 atomic_set(&cs->refcnt, 0);
47
43 return cs; 48 return cs;
44} 49}
45 50
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index dd17c9a32fbc..5bfc1198ab46 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -14,6 +14,7 @@
14#include <babeltrace/ctf-writer/event.h> 14#include <babeltrace/ctf-writer/event.h>
15#include <babeltrace/ctf-writer/event-types.h> 15#include <babeltrace/ctf-writer/event-types.h>
16#include <babeltrace/ctf-writer/event-fields.h> 16#include <babeltrace/ctf-writer/event-fields.h>
17#include <babeltrace/ctf-ir/utils.h>
17#include <babeltrace/ctf/events.h> 18#include <babeltrace/ctf/events.h>
18#include <traceevent/event-parse.h> 19#include <traceevent/event-parse.h>
19#include "asm/bug.h" 20#include "asm/bug.h"
@@ -38,12 +39,21 @@ struct evsel_priv {
38 struct bt_ctf_event_class *event_class; 39 struct bt_ctf_event_class *event_class;
39}; 40};
40 41
42#define MAX_CPUS 4096
43
44struct ctf_stream {
45 struct bt_ctf_stream *stream;
46 int cpu;
47 u32 count;
48};
49
41struct ctf_writer { 50struct ctf_writer {
42 /* writer primitives */ 51 /* writer primitives */
43 struct bt_ctf_writer *writer; 52 struct bt_ctf_writer *writer;
44 struct bt_ctf_stream *stream; 53 struct ctf_stream **stream;
45 struct bt_ctf_stream_class *stream_class; 54 int stream_cnt;
46 struct bt_ctf_clock *clock; 55 struct bt_ctf_stream_class *stream_class;
56 struct bt_ctf_clock *clock;
47 57
48 /* data types */ 58 /* data types */
49 union { 59 union {
@@ -65,6 +75,9 @@ struct convert {
65 75
66 u64 events_size; 76 u64 events_size;
67 u64 events_count; 77 u64 events_count;
78
79 /* Ordered events configured queue size. */
80 u64 queue_size;
68}; 81};
69 82
70static int value_set(struct bt_ctf_field_type *type, 83static int value_set(struct bt_ctf_field_type *type,
@@ -153,6 +166,43 @@ get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field)
153 return cw->data.u32; 166 return cw->data.u32;
154} 167}
155 168
169static unsigned long long adjust_signedness(unsigned long long value_int, int size)
170{
171 unsigned long long value_mask;
172
173 /*
174 * value_mask = (1 << (size * 8 - 1)) - 1.
175 * Directly set value_mask for code readers.
176 */
177 switch (size) {
178 case 1:
179 value_mask = 0x7fULL;
180 break;
181 case 2:
182 value_mask = 0x7fffULL;
183 break;
184 case 4:
185 value_mask = 0x7fffffffULL;
186 break;
187 case 8:
188 /*
189 * For 64 bit value, return it self. There is no need
190 * to fill high bit.
191 */
192 /* Fall through */
193 default:
194 /* BUG! */
195 return value_int;
196 }
197
198 /* If it is a positive value, don't adjust. */
199 if ((value_int & (~0ULL - value_mask)) == 0)
200 return value_int;
201
202 /* Fill upper part of value_int with 1 to make it a negative long long. */
203 return (value_int & value_mask) | ~value_mask;
204}
205
156static int add_tracepoint_field_value(struct ctf_writer *cw, 206static int add_tracepoint_field_value(struct ctf_writer *cw,
157 struct bt_ctf_event_class *event_class, 207 struct bt_ctf_event_class *event_class,
158 struct bt_ctf_event *event, 208 struct bt_ctf_event *event,
@@ -164,7 +214,6 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
164 struct bt_ctf_field *field; 214 struct bt_ctf_field *field;
165 const char *name = fmtf->name; 215 const char *name = fmtf->name;
166 void *data = sample->raw_data; 216 void *data = sample->raw_data;
167 unsigned long long value_int;
168 unsigned long flags = fmtf->flags; 217 unsigned long flags = fmtf->flags;
169 unsigned int n_items; 218 unsigned int n_items;
170 unsigned int i; 219 unsigned int i;
@@ -172,6 +221,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
172 unsigned int len; 221 unsigned int len;
173 int ret; 222 int ret;
174 223
224 name = fmtf->alias;
175 offset = fmtf->offset; 225 offset = fmtf->offset;
176 len = fmtf->size; 226 len = fmtf->size;
177 if (flags & FIELD_IS_STRING) 227 if (flags & FIELD_IS_STRING)
@@ -208,11 +258,6 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
208 type = get_tracepoint_field_type(cw, fmtf); 258 type = get_tracepoint_field_type(cw, fmtf);
209 259
210 for (i = 0; i < n_items; i++) { 260 for (i = 0; i < n_items; i++) {
211 if (!(flags & FIELD_IS_STRING))
212 value_int = pevent_read_number(
213 fmtf->event->pevent,
214 data + offset + i * len, len);
215
216 if (flags & FIELD_IS_ARRAY) 261 if (flags & FIELD_IS_ARRAY)
217 field = bt_ctf_field_array_get_field(array_field, i); 262 field = bt_ctf_field_array_get_field(array_field, i);
218 else 263 else
@@ -226,12 +271,21 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
226 if (flags & FIELD_IS_STRING) 271 if (flags & FIELD_IS_STRING)
227 ret = bt_ctf_field_string_set_value(field, 272 ret = bt_ctf_field_string_set_value(field,
228 data + offset + i * len); 273 data + offset + i * len);
229 else if (!(flags & FIELD_IS_SIGNED)) 274 else {
230 ret = bt_ctf_field_unsigned_integer_set_value( 275 unsigned long long value_int;
231 field, value_int); 276
232 else 277 value_int = pevent_read_number(
233 ret = bt_ctf_field_signed_integer_set_value( 278 fmtf->event->pevent,
234 field, value_int); 279 data + offset + i * len, len);
280
281 if (!(flags & FIELD_IS_SIGNED))
282 ret = bt_ctf_field_unsigned_integer_set_value(
283 field, value_int);
284 else
285 ret = bt_ctf_field_signed_integer_set_value(
286 field, adjust_signedness(value_int, len));
287 }
288
235 if (ret) { 289 if (ret) {
236 pr_err("failed to set file value %s\n", name); 290 pr_err("failed to set file value %s\n", name);
237 goto err_put_field; 291 goto err_put_field;
@@ -346,12 +400,6 @@ static int add_generic_values(struct ctf_writer *cw,
346 return -1; 400 return -1;
347 } 401 }
348 402
349 if (type & PERF_SAMPLE_CPU) {
350 ret = value_set_u32(cw, event, "perf_cpu", sample->cpu);
351 if (ret)
352 return -1;
353 }
354
355 if (type & PERF_SAMPLE_PERIOD) { 403 if (type & PERF_SAMPLE_PERIOD) {
356 ret = value_set_u64(cw, event, "perf_period", sample->period); 404 ret = value_set_u64(cw, event, "perf_period", sample->period);
357 if (ret) 405 if (ret)
@@ -381,6 +429,129 @@ static int add_generic_values(struct ctf_writer *cw,
381 return 0; 429 return 0;
382} 430}
383 431
432static int ctf_stream__flush(struct ctf_stream *cs)
433{
434 int err = 0;
435
436 if (cs) {
437 err = bt_ctf_stream_flush(cs->stream);
438 if (err)
439 pr_err("CTF stream %d flush failed\n", cs->cpu);
440
441 pr("Flush stream for cpu %d (%u samples)\n",
442 cs->cpu, cs->count);
443
444 cs->count = 0;
445 }
446
447 return err;
448}
449
450static struct ctf_stream *ctf_stream__create(struct ctf_writer *cw, int cpu)
451{
452 struct ctf_stream *cs;
453 struct bt_ctf_field *pkt_ctx = NULL;
454 struct bt_ctf_field *cpu_field = NULL;
455 struct bt_ctf_stream *stream = NULL;
456 int ret;
457
458 cs = zalloc(sizeof(*cs));
459 if (!cs) {
460 pr_err("Failed to allocate ctf stream\n");
461 return NULL;
462 }
463
464 stream = bt_ctf_writer_create_stream(cw->writer, cw->stream_class);
465 if (!stream) {
466 pr_err("Failed to create CTF stream\n");
467 goto out;
468 }
469
470 pkt_ctx = bt_ctf_stream_get_packet_context(stream);
471 if (!pkt_ctx) {
472 pr_err("Failed to obtain packet context\n");
473 goto out;
474 }
475
476 cpu_field = bt_ctf_field_structure_get_field(pkt_ctx, "cpu_id");
477 bt_ctf_field_put(pkt_ctx);
478 if (!cpu_field) {
479 pr_err("Failed to obtain cpu field\n");
480 goto out;
481 }
482
483 ret = bt_ctf_field_unsigned_integer_set_value(cpu_field, (u32) cpu);
484 if (ret) {
485 pr_err("Failed to update CPU number\n");
486 goto out;
487 }
488
489 bt_ctf_field_put(cpu_field);
490
491 cs->cpu = cpu;
492 cs->stream = stream;
493 return cs;
494
495out:
496 if (cpu_field)
497 bt_ctf_field_put(cpu_field);
498 if (stream)
499 bt_ctf_stream_put(stream);
500
501 free(cs);
502 return NULL;
503}
504
505static void ctf_stream__delete(struct ctf_stream *cs)
506{
507 if (cs) {
508 bt_ctf_stream_put(cs->stream);
509 free(cs);
510 }
511}
512
513static struct ctf_stream *ctf_stream(struct ctf_writer *cw, int cpu)
514{
515 struct ctf_stream *cs = cw->stream[cpu];
516
517 if (!cs) {
518 cs = ctf_stream__create(cw, cpu);
519 cw->stream[cpu] = cs;
520 }
521
522 return cs;
523}
524
525static int get_sample_cpu(struct ctf_writer *cw, struct perf_sample *sample,
526 struct perf_evsel *evsel)
527{
528 int cpu = 0;
529
530 if (evsel->attr.sample_type & PERF_SAMPLE_CPU)
531 cpu = sample->cpu;
532
533 if (cpu > cw->stream_cnt) {
534 pr_err("Event was recorded for CPU %d, limit is at %d.\n",
535 cpu, cw->stream_cnt);
536 cpu = 0;
537 }
538
539 return cpu;
540}
541
542#define STREAM_FLUSH_COUNT 100000
543
544/*
545 * Currently we have no other way to determine the
546 * time for the stream flush other than keep track
547 * of the number of events and check it against
548 * threshold.
549 */
550static bool is_flush_needed(struct ctf_stream *cs)
551{
552 return cs->count >= STREAM_FLUSH_COUNT;
553}
554
384static int process_sample_event(struct perf_tool *tool, 555static int process_sample_event(struct perf_tool *tool,
385 union perf_event *_event __maybe_unused, 556 union perf_event *_event __maybe_unused,
386 struct perf_sample *sample, 557 struct perf_sample *sample,
@@ -390,6 +561,7 @@ static int process_sample_event(struct perf_tool *tool,
390 struct convert *c = container_of(tool, struct convert, tool); 561 struct convert *c = container_of(tool, struct convert, tool);
391 struct evsel_priv *priv = evsel->priv; 562 struct evsel_priv *priv = evsel->priv;
392 struct ctf_writer *cw = &c->writer; 563 struct ctf_writer *cw = &c->writer;
564 struct ctf_stream *cs;
393 struct bt_ctf_event_class *event_class; 565 struct bt_ctf_event_class *event_class;
394 struct bt_ctf_event *event; 566 struct bt_ctf_event *event;
395 int ret; 567 int ret;
@@ -424,9 +596,93 @@ static int process_sample_event(struct perf_tool *tool,
424 return -1; 596 return -1;
425 } 597 }
426 598
427 bt_ctf_stream_append_event(cw->stream, event); 599 cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
600 if (cs) {
601 if (is_flush_needed(cs))
602 ctf_stream__flush(cs);
603
604 cs->count++;
605 bt_ctf_stream_append_event(cs->stream, event);
606 }
607
428 bt_ctf_event_put(event); 608 bt_ctf_event_put(event);
429 return 0; 609 return cs ? 0 : -1;
610}
611
612/* If dup < 0, add a prefix. Else, add _dupl_X suffix. */
613static char *change_name(char *name, char *orig_name, int dup)
614{
615 char *new_name = NULL;
616 size_t len;
617
618 if (!name)
619 name = orig_name;
620
621 if (dup >= 10)
622 goto out;
623 /*
624 * Add '_' prefix to potential keywork. According to
625 * Mathieu Desnoyers (https://lkml.org/lkml/2015/1/23/652),
626 * futher CTF spec updating may require us to use '$'.
627 */
628 if (dup < 0)
629 len = strlen(name) + sizeof("_");
630 else
631 len = strlen(orig_name) + sizeof("_dupl_X");
632
633 new_name = malloc(len);
634 if (!new_name)
635 goto out;
636
637 if (dup < 0)
638 snprintf(new_name, len, "_%s", name);
639 else
640 snprintf(new_name, len, "%s_dupl_%d", orig_name, dup);
641
642out:
643 if (name != orig_name)
644 free(name);
645 return new_name;
646}
647
648static int event_class_add_field(struct bt_ctf_event_class *event_class,
649 struct bt_ctf_field_type *type,
650 struct format_field *field)
651{
652 struct bt_ctf_field_type *t = NULL;
653 char *name;
654 int dup = 1;
655 int ret;
656
657 /* alias was already assigned */
658 if (field->alias != field->name)
659 return bt_ctf_event_class_add_field(event_class, type,
660 (char *)field->alias);
661
662 name = field->name;
663
664 /* If 'name' is a keywork, add prefix. */
665 if (bt_ctf_validate_identifier(name))
666 name = change_name(name, field->name, -1);
667
668 if (!name) {
669 pr_err("Failed to fix invalid identifier.");
670 return -1;
671 }
672 while ((t = bt_ctf_event_class_get_field_by_name(event_class, name))) {
673 bt_ctf_field_type_put(t);
674 name = change_name(name, field->name, dup++);
675 if (!name) {
676 pr_err("Failed to create dup name for '%s'\n", field->name);
677 return -1;
678 }
679 }
680
681 ret = bt_ctf_event_class_add_field(event_class, type, name);
682 if (!ret)
683 field->alias = name;
684
685 return ret;
430} 686}
431 687
432static int add_tracepoint_fields_types(struct ctf_writer *cw, 688static int add_tracepoint_fields_types(struct ctf_writer *cw,
@@ -457,14 +713,14 @@ static int add_tracepoint_fields_types(struct ctf_writer *cw,
457 if (flags & FIELD_IS_ARRAY) 713 if (flags & FIELD_IS_ARRAY)
458 type = bt_ctf_field_type_array_create(type, field->arraylen); 714 type = bt_ctf_field_type_array_create(type, field->arraylen);
459 715
460 ret = bt_ctf_event_class_add_field(event_class, type, 716 ret = event_class_add_field(event_class, type, field);
461 field->name);
462 717
463 if (flags & FIELD_IS_ARRAY) 718 if (flags & FIELD_IS_ARRAY)
464 bt_ctf_field_type_put(type); 719 bt_ctf_field_type_put(type);
465 720
466 if (ret) { 721 if (ret) {
467 pr_err("Failed to add field '%s\n", field->name); 722 pr_err("Failed to add field '%s': %d\n",
723 field->name, ret);
468 return -1; 724 return -1;
469 } 725 }
470 } 726 }
@@ -508,7 +764,7 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
508 do { \ 764 do { \
509 pr2(" field '%s'\n", n); \ 765 pr2(" field '%s'\n", n); \
510 if (bt_ctf_event_class_add_field(cl, t, n)) { \ 766 if (bt_ctf_event_class_add_field(cl, t, n)) { \
511 pr_err("Failed to add field '%s;\n", n); \ 767 pr_err("Failed to add field '%s';\n", n); \
512 return -1; \ 768 return -1; \
513 } \ 769 } \
514 } while (0) 770 } while (0)
@@ -528,9 +784,6 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
528 if (type & PERF_SAMPLE_STREAM_ID) 784 if (type & PERF_SAMPLE_STREAM_ID)
529 ADD_FIELD(event_class, cw->data.u64, "perf_stream_id"); 785 ADD_FIELD(event_class, cw->data.u64, "perf_stream_id");
530 786
531 if (type & PERF_SAMPLE_CPU)
532 ADD_FIELD(event_class, cw->data.u32, "perf_cpu");
533
534 if (type & PERF_SAMPLE_PERIOD) 787 if (type & PERF_SAMPLE_PERIOD)
535 ADD_FIELD(event_class, cw->data.u64, "perf_period"); 788 ADD_FIELD(event_class, cw->data.u64, "perf_period");
536 789
@@ -604,6 +857,39 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session)
604 return 0; 857 return 0;
605} 858}
606 859
860static int setup_streams(struct ctf_writer *cw, struct perf_session *session)
861{
862 struct ctf_stream **stream;
863 struct perf_header *ph = &session->header;
864 int ncpus;
865
866 /*
867 * Try to get the number of cpus used in the data file,
868 * if not present fallback to the MAX_CPUS.
869 */
870 ncpus = ph->env.nr_cpus_avail ?: MAX_CPUS;
871
872 stream = zalloc(sizeof(*stream) * ncpus);
873 if (!stream) {
874 pr_err("Failed to allocate streams.\n");
875 return -ENOMEM;
876 }
877
878 cw->stream = stream;
879 cw->stream_cnt = ncpus;
880 return 0;
881}
882
883static void free_streams(struct ctf_writer *cw)
884{
885 int cpu;
886
887 for (cpu = 0; cpu < cw->stream_cnt; cpu++)
888 ctf_stream__delete(cw->stream[cpu]);
889
890 free(cw->stream);
891}
892
607static int ctf_writer__setup_env(struct ctf_writer *cw, 893static int ctf_writer__setup_env(struct ctf_writer *cw,
608 struct perf_session *session) 894 struct perf_session *session)
609{ 895{
@@ -713,7 +999,7 @@ static void ctf_writer__cleanup(struct ctf_writer *cw)
713 ctf_writer__cleanup_data(cw); 999 ctf_writer__cleanup_data(cw);
714 1000
715 bt_ctf_clock_put(cw->clock); 1001 bt_ctf_clock_put(cw->clock);
716 bt_ctf_stream_put(cw->stream); 1002 free_streams(cw);
717 bt_ctf_stream_class_put(cw->stream_class); 1003 bt_ctf_stream_class_put(cw->stream_class);
718 bt_ctf_writer_put(cw->writer); 1004 bt_ctf_writer_put(cw->writer);
719 1005
@@ -725,8 +1011,9 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path)
725{ 1011{
726 struct bt_ctf_writer *writer; 1012 struct bt_ctf_writer *writer;
727 struct bt_ctf_stream_class *stream_class; 1013 struct bt_ctf_stream_class *stream_class;
728 struct bt_ctf_stream *stream;
729 struct bt_ctf_clock *clock; 1014 struct bt_ctf_clock *clock;
1015 struct bt_ctf_field_type *pkt_ctx_type;
1016 int ret;
730 1017
731 /* CTF writer */ 1018 /* CTF writer */
732 writer = bt_ctf_writer_create(path); 1019 writer = bt_ctf_writer_create(path);
@@ -767,14 +1054,15 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path)
767 if (ctf_writer__init_data(cw)) 1054 if (ctf_writer__init_data(cw))
768 goto err_cleanup; 1055 goto err_cleanup;
769 1056
770 /* CTF stream instance */ 1057 /* Add cpu_id for packet context */
771 stream = bt_ctf_writer_create_stream(writer, stream_class); 1058 pkt_ctx_type = bt_ctf_stream_class_get_packet_context_type(stream_class);
772 if (!stream) { 1059 if (!pkt_ctx_type)
773 pr("Failed to create CTF stream.\n");
774 goto err_cleanup; 1060 goto err_cleanup;
775 }
776 1061
777 cw->stream = stream; 1062 ret = bt_ctf_field_type_structure_add_field(pkt_ctx_type, cw->data.u32, "cpu_id");
1063 bt_ctf_field_type_put(pkt_ctx_type);
1064 if (ret)
1065 goto err_cleanup;
778 1066
779 /* CTF clock writer setup */ 1067 /* CTF clock writer setup */
780 if (bt_ctf_writer_add_clock(writer, clock)) { 1068 if (bt_ctf_writer_add_clock(writer, clock)) {
@@ -791,6 +1079,28 @@ err:
791 return -1; 1079 return -1;
792} 1080}
793 1081
1082static int ctf_writer__flush_streams(struct ctf_writer *cw)
1083{
1084 int cpu, ret = 0;
1085
1086 for (cpu = 0; cpu < cw->stream_cnt && !ret; cpu++)
1087 ret = ctf_stream__flush(cw->stream[cpu]);
1088
1089 return ret;
1090}
1091
1092static int convert__config(const char *var, const char *value, void *cb)
1093{
1094 struct convert *c = cb;
1095
1096 if (!strcmp(var, "convert.queue-size")) {
1097 c->queue_size = perf_config_u64(var, value);
1098 return 0;
1099 }
1100
1101 return perf_default_config(var, value, cb);
1102}
1103
794int bt_convert__perf2ctf(const char *input, const char *path, bool force) 1104int bt_convert__perf2ctf(const char *input, const char *path, bool force)
795{ 1105{
796 struct perf_session *session; 1106 struct perf_session *session;
@@ -817,6 +1127,8 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
817 struct ctf_writer *cw = &c.writer; 1127 struct ctf_writer *cw = &c.writer;
818 int err = -1; 1128 int err = -1;
819 1129
1130 perf_config(convert__config, &c);
1131
820 /* CTF writer */ 1132 /* CTF writer */
821 if (ctf_writer__init(cw, path)) 1133 if (ctf_writer__init(cw, path))
822 return -1; 1134 return -1;
@@ -826,6 +1138,11 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
826 if (!session) 1138 if (!session)
827 goto free_writer; 1139 goto free_writer;
828 1140
1141 if (c.queue_size) {
1142 ordered_events__set_alloc_size(&session->ordered_events,
1143 c.queue_size);
1144 }
1145
829 /* CTF writer env/clock setup */ 1146 /* CTF writer env/clock setup */
830 if (ctf_writer__setup_env(cw, session)) 1147 if (ctf_writer__setup_env(cw, session))
831 goto free_session; 1148 goto free_session;
@@ -834,9 +1151,14 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
834 if (setup_events(cw, session)) 1151 if (setup_events(cw, session))
835 goto free_session; 1152 goto free_session;
836 1153
1154 if (setup_streams(cw, session))
1155 goto free_session;
1156
837 err = perf_session__process_events(session); 1157 err = perf_session__process_events(session);
838 if (!err) 1158 if (!err)
839 err = bt_ctf_stream_flush(cw->stream); 1159 err = ctf_writer__flush_streams(cw);
1160 else
1161 pr_err("Error during conversion.\n");
840 1162
841 fprintf(stderr, 1163 fprintf(stderr,
842 "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", 1164 "[ perf data convert: Converted '%s' into CTF data '%s' ]\n",
@@ -847,11 +1169,15 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
847 (double) c.events_size / 1024.0 / 1024.0, 1169 (double) c.events_size / 1024.0 / 1024.0,
848 c.events_count); 1170 c.events_count);
849 1171
850 /* its all good */
851free_session:
852 perf_session__delete(session); 1172 perf_session__delete(session);
1173 ctf_writer__cleanup(cw);
1174
1175 return err;
853 1176
1177free_session:
1178 perf_session__delete(session);
854free_writer: 1179free_writer:
855 ctf_writer__cleanup(cw); 1180 ctf_writer__cleanup(cw);
1181 pr_err("Error during conversion setup.\n");
856 return err; 1182 return err;
857} 1183}
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index bb39a3ffc70b..1c9689e4cc17 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -122,6 +122,7 @@ int db_export__machine(struct db_export *dbe, struct machine *machine)
122int db_export__thread(struct db_export *dbe, struct thread *thread, 122int db_export__thread(struct db_export *dbe, struct thread *thread,
123 struct machine *machine, struct comm *comm) 123 struct machine *machine, struct comm *comm)
124{ 124{
125 struct thread *main_thread;
125 u64 main_thread_db_id = 0; 126 u64 main_thread_db_id = 0;
126 int err; 127 int err;
127 128
@@ -131,8 +132,6 @@ int db_export__thread(struct db_export *dbe, struct thread *thread,
131 thread->db_id = ++dbe->thread_last_db_id; 132 thread->db_id = ++dbe->thread_last_db_id;
132 133
133 if (thread->pid_ != -1) { 134 if (thread->pid_ != -1) {
134 struct thread *main_thread;
135
136 if (thread->pid_ == thread->tid) { 135 if (thread->pid_ == thread->tid) {
137 main_thread = thread; 136 main_thread = thread;
138 } else { 137 } else {
@@ -144,14 +143,16 @@ int db_export__thread(struct db_export *dbe, struct thread *thread,
144 err = db_export__thread(dbe, main_thread, machine, 143 err = db_export__thread(dbe, main_thread, machine,
145 comm); 144 comm);
146 if (err) 145 if (err)
147 return err; 146 goto out_put;
148 if (comm) { 147 if (comm) {
149 err = db_export__comm_thread(dbe, comm, thread); 148 err = db_export__comm_thread(dbe, comm, thread);
150 if (err) 149 if (err)
151 return err; 150 goto out_put;
152 } 151 }
153 } 152 }
154 main_thread_db_id = main_thread->db_id; 153 main_thread_db_id = main_thread->db_id;
154 if (main_thread != thread)
155 thread__put(main_thread);
155 } 156 }
156 157
157 if (dbe->export_thread) 158 if (dbe->export_thread)
@@ -159,6 +160,10 @@ int db_export__thread(struct db_export *dbe, struct thread *thread,
159 machine); 160 machine);
160 161
161 return 0; 162 return 0;
163
164out_put:
165 thread__put(main_thread);
166 return err;
162} 167}
163 168
164int db_export__comm(struct db_export *dbe, struct comm *comm, 169int db_export__comm(struct db_export *dbe, struct comm *comm,
@@ -229,7 +234,7 @@ int db_export__symbol(struct db_export *dbe, struct symbol *sym,
229static struct thread *get_main_thread(struct machine *machine, struct thread *thread) 234static struct thread *get_main_thread(struct machine *machine, struct thread *thread)
230{ 235{
231 if (thread->pid_ == thread->tid) 236 if (thread->pid_ == thread->tid)
232 return thread; 237 return thread__get(thread);
233 238
234 if (thread->pid_ == -1) 239 if (thread->pid_ == -1)
235 return NULL; 240 return NULL;
@@ -309,12 +314,12 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
309 314
310 err = db_export__thread(dbe, thread, al->machine, comm); 315 err = db_export__thread(dbe, thread, al->machine, comm);
311 if (err) 316 if (err)
312 return err; 317 goto out_put;
313 318
314 if (comm) { 319 if (comm) {
315 err = db_export__comm(dbe, comm, main_thread); 320 err = db_export__comm(dbe, comm, main_thread);
316 if (err) 321 if (err)
317 return err; 322 goto out_put;
318 es.comm_db_id = comm->db_id; 323 es.comm_db_id = comm->db_id;
319 } 324 }
320 325
@@ -322,7 +327,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
322 327
323 err = db_ids_from_al(dbe, al, &es.dso_db_id, &es.sym_db_id, &es.offset); 328 err = db_ids_from_al(dbe, al, &es.dso_db_id, &es.sym_db_id, &es.offset);
324 if (err) 329 if (err)
325 return err; 330 goto out_put;
326 331
327 if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && 332 if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
328 sample_addr_correlates_sym(&evsel->attr)) { 333 sample_addr_correlates_sym(&evsel->attr)) {
@@ -332,20 +337,22 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
332 err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id, 337 err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id,
333 &es.addr_sym_db_id, &es.addr_offset); 338 &es.addr_sym_db_id, &es.addr_offset);
334 if (err) 339 if (err)
335 return err; 340 goto out_put;
336 if (dbe->crp) { 341 if (dbe->crp) {
337 err = thread_stack__process(thread, comm, sample, al, 342 err = thread_stack__process(thread, comm, sample, al,
338 &addr_al, es.db_id, 343 &addr_al, es.db_id,
339 dbe->crp); 344 dbe->crp);
340 if (err) 345 if (err)
341 return err; 346 goto out_put;
342 } 347 }
343 } 348 }
344 349
345 if (dbe->export_sample) 350 if (dbe->export_sample)
346 return dbe->export_sample(dbe, &es); 351 err = dbe->export_sample(dbe, &es);
347 352
348 return 0; 353out_put:
354 thread__put(main_thread);
355 return err;
349} 356}
350 357
351static struct { 358static struct {
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index fc0ddd5792a9..7c0c08386a1d 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -4,6 +4,7 @@
4#include "symbol.h" 4#include "symbol.h"
5#include "dso.h" 5#include "dso.h"
6#include "machine.h" 6#include "machine.h"
7#include "auxtrace.h"
7#include "util.h" 8#include "util.h"
8#include "debug.h" 9#include "debug.h"
9 10
@@ -165,12 +166,28 @@ bool is_supported_compression(const char *ext)
165 return false; 166 return false;
166} 167}
167 168
168bool is_kernel_module(const char *pathname) 169bool is_kernel_module(const char *pathname, int cpumode)
169{ 170{
170 struct kmod_path m; 171 struct kmod_path m;
171 172 int mode = cpumode & PERF_RECORD_MISC_CPUMODE_MASK;
172 if (kmod_path__parse(&m, pathname)) 173
173 return NULL; 174 WARN_ONCE(mode != cpumode,
175 "Internal error: passing unmasked cpumode (%x) to is_kernel_module",
176 cpumode);
177
178 switch (mode) {
179 case PERF_RECORD_MISC_USER:
180 case PERF_RECORD_MISC_HYPERVISOR:
181 case PERF_RECORD_MISC_GUEST_USER:
182 return false;
183 /* Treat PERF_RECORD_MISC_CPUMODE_UNKNOWN as kernel */
184 default:
185 if (kmod_path__parse(&m, pathname)) {
186 pr_err("Failed to check whether %s is a kernel module or not. Assume it is.",
187 pathname);
188 return true;
189 }
190 }
174 191
175 return m.kmod; 192 return m.kmod;
176} 193}
@@ -214,12 +231,33 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
214{ 231{
215 const char *name = strrchr(path, '/'); 232 const char *name = strrchr(path, '/');
216 const char *ext = strrchr(path, '.'); 233 const char *ext = strrchr(path, '.');
234 bool is_simple_name = false;
217 235
218 memset(m, 0x0, sizeof(*m)); 236 memset(m, 0x0, sizeof(*m));
219 name = name ? name + 1 : path; 237 name = name ? name + 1 : path;
220 238
239 /*
240 * '.' is also a valid character for module name. For example:
241 * [aaa.bbb] is a valid module name. '[' should have higher
242 * priority than '.ko' suffix.
243 *
244 * The kernel names are from machine__mmap_name. Such
245 * name should belong to kernel itself, not kernel module.
246 */
247 if (name[0] == '[') {
248 is_simple_name = true;
249 if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) ||
250 (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) ||
251 (strncmp(name, "[vdso]", 6) == 0) ||
252 (strncmp(name, "[vsyscall]", 10) == 0)) {
253 m->kmod = false;
254
255 } else
256 m->kmod = true;
257 }
258
221 /* No extension, just return name. */ 259 /* No extension, just return name. */
222 if (ext == NULL) { 260 if ((ext == NULL) || is_simple_name) {
223 if (alloc_name) { 261 if (alloc_name) {
224 m->name = strdup(name); 262 m->name = strdup(name);
225 return m->name ? 0 : -ENOMEM; 263 return m->name ? 0 : -ENOMEM;
@@ -264,6 +302,7 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
264 */ 302 */
265static LIST_HEAD(dso__data_open); 303static LIST_HEAD(dso__data_open);
266static long dso__data_open_cnt; 304static long dso__data_open_cnt;
305static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER;
267 306
268static void dso__list_add(struct dso *dso) 307static void dso__list_add(struct dso *dso)
269{ 308{
@@ -433,18 +472,12 @@ static void check_data_close(void)
433 */ 472 */
434void dso__data_close(struct dso *dso) 473void dso__data_close(struct dso *dso)
435{ 474{
475 pthread_mutex_lock(&dso__data_open_lock);
436 close_dso(dso); 476 close_dso(dso);
477 pthread_mutex_unlock(&dso__data_open_lock);
437} 478}
438 479
439/** 480static void try_to_open_dso(struct dso *dso, struct machine *machine)
440 * dso__data_fd - Get dso's data file descriptor
441 * @dso: dso object
442 * @machine: machine object
443 *
444 * External interface to find dso's file, open it and
445 * returns file descriptor.
446 */
447int dso__data_fd(struct dso *dso, struct machine *machine)
448{ 481{
449 enum dso_binary_type binary_type_data[] = { 482 enum dso_binary_type binary_type_data[] = {
450 DSO_BINARY_TYPE__BUILD_ID_CACHE, 483 DSO_BINARY_TYPE__BUILD_ID_CACHE,
@@ -453,11 +486,8 @@ int dso__data_fd(struct dso *dso, struct machine *machine)
453 }; 486 };
454 int i = 0; 487 int i = 0;
455 488
456 if (dso->data.status == DSO_DATA_STATUS_ERROR)
457 return -1;
458
459 if (dso->data.fd >= 0) 489 if (dso->data.fd >= 0)
460 goto out; 490 return;
461 491
462 if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) { 492 if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) {
463 dso->data.fd = open_dso(dso, machine); 493 dso->data.fd = open_dso(dso, machine);
@@ -477,10 +507,38 @@ out:
477 dso->data.status = DSO_DATA_STATUS_OK; 507 dso->data.status = DSO_DATA_STATUS_OK;
478 else 508 else
479 dso->data.status = DSO_DATA_STATUS_ERROR; 509 dso->data.status = DSO_DATA_STATUS_ERROR;
510}
511
512/**
513 * dso__data_get_fd - Get dso's data file descriptor
514 * @dso: dso object
515 * @machine: machine object
516 *
517 * External interface to find dso's file, open it and
518 * returns file descriptor. It should be paired with
519 * dso__data_put_fd() if it returns non-negative value.
520 */
521int dso__data_get_fd(struct dso *dso, struct machine *machine)
522{
523 if (dso->data.status == DSO_DATA_STATUS_ERROR)
524 return -1;
525
526 if (pthread_mutex_lock(&dso__data_open_lock) < 0)
527 return -1;
528
529 try_to_open_dso(dso, machine);
530
531 if (dso->data.fd < 0)
532 pthread_mutex_unlock(&dso__data_open_lock);
480 533
481 return dso->data.fd; 534 return dso->data.fd;
482} 535}
483 536
537void dso__data_put_fd(struct dso *dso __maybe_unused)
538{
539 pthread_mutex_unlock(&dso__data_open_lock);
540}
541
484bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by) 542bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
485{ 543{
486 u32 flag = 1 << by; 544 u32 flag = 1 << by;
@@ -494,10 +552,12 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
494} 552}
495 553
496static void 554static void
497dso_cache__free(struct rb_root *root) 555dso_cache__free(struct dso *dso)
498{ 556{
557 struct rb_root *root = &dso->data.cache;
499 struct rb_node *next = rb_first(root); 558 struct rb_node *next = rb_first(root);
500 559
560 pthread_mutex_lock(&dso->lock);
501 while (next) { 561 while (next) {
502 struct dso_cache *cache; 562 struct dso_cache *cache;
503 563
@@ -506,10 +566,12 @@ dso_cache__free(struct rb_root *root)
506 rb_erase(&cache->rb_node, root); 566 rb_erase(&cache->rb_node, root);
507 free(cache); 567 free(cache);
508 } 568 }
569 pthread_mutex_unlock(&dso->lock);
509} 570}
510 571
511static struct dso_cache *dso_cache__find(const struct rb_root *root, u64 offset) 572static struct dso_cache *dso_cache__find(struct dso *dso, u64 offset)
512{ 573{
574 const struct rb_root *root = &dso->data.cache;
513 struct rb_node * const *p = &root->rb_node; 575 struct rb_node * const *p = &root->rb_node;
514 const struct rb_node *parent = NULL; 576 const struct rb_node *parent = NULL;
515 struct dso_cache *cache; 577 struct dso_cache *cache;
@@ -528,17 +590,20 @@ static struct dso_cache *dso_cache__find(const struct rb_root *root, u64 offset)
528 else 590 else
529 return cache; 591 return cache;
530 } 592 }
593
531 return NULL; 594 return NULL;
532} 595}
533 596
534static void 597static struct dso_cache *
535dso_cache__insert(struct rb_root *root, struct dso_cache *new) 598dso_cache__insert(struct dso *dso, struct dso_cache *new)
536{ 599{
600 struct rb_root *root = &dso->data.cache;
537 struct rb_node **p = &root->rb_node; 601 struct rb_node **p = &root->rb_node;
538 struct rb_node *parent = NULL; 602 struct rb_node *parent = NULL;
539 struct dso_cache *cache; 603 struct dso_cache *cache;
540 u64 offset = new->offset; 604 u64 offset = new->offset;
541 605
606 pthread_mutex_lock(&dso->lock);
542 while (*p != NULL) { 607 while (*p != NULL) {
543 u64 end; 608 u64 end;
544 609
@@ -550,10 +615,17 @@ dso_cache__insert(struct rb_root *root, struct dso_cache *new)
550 p = &(*p)->rb_left; 615 p = &(*p)->rb_left;
551 else if (offset >= end) 616 else if (offset >= end)
552 p = &(*p)->rb_right; 617 p = &(*p)->rb_right;
618 else
619 goto out;
553 } 620 }
554 621
555 rb_link_node(&new->rb_node, parent, p); 622 rb_link_node(&new->rb_node, parent, p);
556 rb_insert_color(&new->rb_node, root); 623 rb_insert_color(&new->rb_node, root);
624
625 cache = NULL;
626out:
627 pthread_mutex_unlock(&dso->lock);
628 return cache;
557} 629}
558 630
559static ssize_t 631static ssize_t
@@ -568,19 +640,33 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset,
568} 640}
569 641
570static ssize_t 642static ssize_t
571dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size) 643dso_cache__read(struct dso *dso, struct machine *machine,
644 u64 offset, u8 *data, ssize_t size)
572{ 645{
573 struct dso_cache *cache; 646 struct dso_cache *cache;
647 struct dso_cache *old;
574 ssize_t ret; 648 ssize_t ret;
575 649
576 do { 650 do {
577 u64 cache_offset; 651 u64 cache_offset;
578 652
579 ret = -ENOMEM;
580
581 cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE); 653 cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
582 if (!cache) 654 if (!cache)
655 return -ENOMEM;
656
657 pthread_mutex_lock(&dso__data_open_lock);
658
659 /*
660 * dso->data.fd might be closed if other thread opened another
661 * file (dso) due to open file limit (RLIMIT_NOFILE).
662 */
663 try_to_open_dso(dso, machine);
664
665 if (dso->data.fd < 0) {
666 ret = -errno;
667 dso->data.status = DSO_DATA_STATUS_ERROR;
583 break; 668 break;
669 }
584 670
585 cache_offset = offset & DSO__DATA_CACHE_MASK; 671 cache_offset = offset & DSO__DATA_CACHE_MASK;
586 672
@@ -590,11 +676,20 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
590 676
591 cache->offset = cache_offset; 677 cache->offset = cache_offset;
592 cache->size = ret; 678 cache->size = ret;
593 dso_cache__insert(&dso->data.cache, cache); 679 } while (0);
594 680
595 ret = dso_cache__memcpy(cache, offset, data, size); 681 pthread_mutex_unlock(&dso__data_open_lock);
596 682
597 } while (0); 683 if (ret > 0) {
684 old = dso_cache__insert(dso, cache);
685 if (old) {
686 /* we lose the race */
687 free(cache);
688 cache = old;
689 }
690
691 ret = dso_cache__memcpy(cache, offset, data, size);
692 }
598 693
599 if (ret <= 0) 694 if (ret <= 0)
600 free(cache); 695 free(cache);
@@ -602,16 +697,16 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
602 return ret; 697 return ret;
603} 698}
604 699
605static ssize_t dso_cache_read(struct dso *dso, u64 offset, 700static ssize_t dso_cache_read(struct dso *dso, struct machine *machine,
606 u8 *data, ssize_t size) 701 u64 offset, u8 *data, ssize_t size)
607{ 702{
608 struct dso_cache *cache; 703 struct dso_cache *cache;
609 704
610 cache = dso_cache__find(&dso->data.cache, offset); 705 cache = dso_cache__find(dso, offset);
611 if (cache) 706 if (cache)
612 return dso_cache__memcpy(cache, offset, data, size); 707 return dso_cache__memcpy(cache, offset, data, size);
613 else 708 else
614 return dso_cache__read(dso, offset, data, size); 709 return dso_cache__read(dso, machine, offset, data, size);
615} 710}
616 711
617/* 712/*
@@ -619,7 +714,8 @@ static ssize_t dso_cache_read(struct dso *dso, u64 offset,
619 * in the rb_tree. Any read to already cached data is served 714 * in the rb_tree. Any read to already cached data is served
620 * by cached data. 715 * by cached data.
621 */ 716 */
622static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size) 717static ssize_t cached_read(struct dso *dso, struct machine *machine,
718 u64 offset, u8 *data, ssize_t size)
623{ 719{
624 ssize_t r = 0; 720 ssize_t r = 0;
625 u8 *p = data; 721 u8 *p = data;
@@ -627,7 +723,7 @@ static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
627 do { 723 do {
628 ssize_t ret; 724 ssize_t ret;
629 725
630 ret = dso_cache_read(dso, offset, p, size); 726 ret = dso_cache_read(dso, machine, offset, p, size);
631 if (ret < 0) 727 if (ret < 0)
632 return ret; 728 return ret;
633 729
@@ -647,21 +743,44 @@ static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
647 return r; 743 return r;
648} 744}
649 745
650static int data_file_size(struct dso *dso) 746static int data_file_size(struct dso *dso, struct machine *machine)
651{ 747{
748 int ret = 0;
652 struct stat st; 749 struct stat st;
653 char sbuf[STRERR_BUFSIZE]; 750 char sbuf[STRERR_BUFSIZE];
654 751
655 if (!dso->data.file_size) { 752 if (dso->data.file_size)
656 if (fstat(dso->data.fd, &st)) { 753 return 0;
657 pr_err("dso mmap failed, fstat: %s\n", 754
658 strerror_r(errno, sbuf, sizeof(sbuf))); 755 if (dso->data.status == DSO_DATA_STATUS_ERROR)
659 return -1; 756 return -1;
660 } 757
661 dso->data.file_size = st.st_size; 758 pthread_mutex_lock(&dso__data_open_lock);
759
760 /*
761 * dso->data.fd might be closed if other thread opened another
762 * file (dso) due to open file limit (RLIMIT_NOFILE).
763 */
764 try_to_open_dso(dso, machine);
765
766 if (dso->data.fd < 0) {
767 ret = -errno;
768 dso->data.status = DSO_DATA_STATUS_ERROR;
769 goto out;
662 } 770 }
663 771
664 return 0; 772 if (fstat(dso->data.fd, &st) < 0) {
773 ret = -errno;
774 pr_err("dso cache fstat failed: %s\n",
775 strerror_r(errno, sbuf, sizeof(sbuf)));
776 dso->data.status = DSO_DATA_STATUS_ERROR;
777 goto out;
778 }
779 dso->data.file_size = st.st_size;
780
781out:
782 pthread_mutex_unlock(&dso__data_open_lock);
783 return ret;
665} 784}
666 785
667/** 786/**
@@ -673,23 +792,17 @@ static int data_file_size(struct dso *dso)
673 */ 792 */
674off_t dso__data_size(struct dso *dso, struct machine *machine) 793off_t dso__data_size(struct dso *dso, struct machine *machine)
675{ 794{
676 int fd; 795 if (data_file_size(dso, machine))
677
678 fd = dso__data_fd(dso, machine);
679 if (fd < 0)
680 return fd;
681
682 if (data_file_size(dso))
683 return -1; 796 return -1;
684 797
685 /* For now just estimate dso data size is close to file size */ 798 /* For now just estimate dso data size is close to file size */
686 return dso->data.file_size; 799 return dso->data.file_size;
687} 800}
688 801
689static ssize_t data_read_offset(struct dso *dso, u64 offset, 802static ssize_t data_read_offset(struct dso *dso, struct machine *machine,
690 u8 *data, ssize_t size) 803 u64 offset, u8 *data, ssize_t size)
691{ 804{
692 if (data_file_size(dso)) 805 if (data_file_size(dso, machine))
693 return -1; 806 return -1;
694 807
695 /* Check the offset sanity. */ 808 /* Check the offset sanity. */
@@ -699,7 +812,7 @@ static ssize_t data_read_offset(struct dso *dso, u64 offset,
699 if (offset + size < offset) 812 if (offset + size < offset)
700 return -1; 813 return -1;
701 814
702 return cached_read(dso, offset, data, size); 815 return cached_read(dso, machine, offset, data, size);
703} 816}
704 817
705/** 818/**
@@ -716,10 +829,10 @@ static ssize_t data_read_offset(struct dso *dso, u64 offset,
716ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, 829ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
717 u64 offset, u8 *data, ssize_t size) 830 u64 offset, u8 *data, ssize_t size)
718{ 831{
719 if (dso__data_fd(dso, machine) < 0) 832 if (dso->data.status == DSO_DATA_STATUS_ERROR)
720 return -1; 833 return -1;
721 834
722 return data_read_offset(dso, offset, data, size); 835 return data_read_offset(dso, machine, offset, data, size);
723} 836}
724 837
725/** 838/**
@@ -751,13 +864,13 @@ struct map *dso__new_map(const char *name)
751 return map; 864 return map;
752} 865}
753 866
754struct dso *dso__kernel_findnew(struct machine *machine, const char *name, 867struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
755 const char *short_name, int dso_type) 868 const char *short_name, int dso_type)
756{ 869{
757 /* 870 /*
758 * The kernel dso could be created by build_id processing. 871 * The kernel dso could be created by build_id processing.
759 */ 872 */
760 struct dso *dso = __dsos__findnew(&machine->kernel_dsos, name); 873 struct dso *dso = machine__findnew_dso(machine, name);
761 874
762 /* 875 /*
763 * We need to run this in all cases, since during the build_id 876 * We need to run this in all cases, since during the build_id
@@ -776,8 +889,8 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
776 * Either one of the dso or name parameter must be non-NULL or the 889 * Either one of the dso or name parameter must be non-NULL or the
777 * function will not work. 890 * function will not work.
778 */ 891 */
779static struct dso *dso__findlink_by_longname(struct rb_root *root, 892static struct dso *__dso__findlink_by_longname(struct rb_root *root,
780 struct dso *dso, const char *name) 893 struct dso *dso, const char *name)
781{ 894{
782 struct rb_node **p = &root->rb_node; 895 struct rb_node **p = &root->rb_node;
783 struct rb_node *parent = NULL; 896 struct rb_node *parent = NULL;
@@ -824,10 +937,10 @@ static struct dso *dso__findlink_by_longname(struct rb_root *root,
824 return NULL; 937 return NULL;
825} 938}
826 939
827static inline struct dso * 940static inline struct dso *__dso__find_by_longname(struct rb_root *root,
828dso__find_by_longname(const struct rb_root *root, const char *name) 941 const char *name)
829{ 942{
830 return dso__findlink_by_longname((struct rb_root *)root, NULL, name); 943 return __dso__findlink_by_longname(root, NULL, name);
831} 944}
832 945
833void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) 946void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
@@ -935,6 +1048,8 @@ struct dso *dso__new(const char *name)
935 RB_CLEAR_NODE(&dso->rb_node); 1048 RB_CLEAR_NODE(&dso->rb_node);
936 INIT_LIST_HEAD(&dso->node); 1049 INIT_LIST_HEAD(&dso->node);
937 INIT_LIST_HEAD(&dso->data.open_entry); 1050 INIT_LIST_HEAD(&dso->data.open_entry);
1051 pthread_mutex_init(&dso->lock, NULL);
1052 atomic_set(&dso->refcnt, 1);
938 } 1053 }
939 1054
940 return dso; 1055 return dso;
@@ -961,12 +1076,27 @@ void dso__delete(struct dso *dso)
961 } 1076 }
962 1077
963 dso__data_close(dso); 1078 dso__data_close(dso);
964 dso_cache__free(&dso->data.cache); 1079 auxtrace_cache__free(dso->auxtrace_cache);
1080 dso_cache__free(dso);
965 dso__free_a2l(dso); 1081 dso__free_a2l(dso);
966 zfree(&dso->symsrc_filename); 1082 zfree(&dso->symsrc_filename);
1083 pthread_mutex_destroy(&dso->lock);
967 free(dso); 1084 free(dso);
968} 1085}
969 1086
1087struct dso *dso__get(struct dso *dso)
1088{
1089 if (dso)
1090 atomic_inc(&dso->refcnt);
1091 return dso;
1092}
1093
1094void dso__put(struct dso *dso)
1095{
1096 if (dso && atomic_dec_and_test(&dso->refcnt))
1097 dso__delete(dso);
1098}
1099
970void dso__set_build_id(struct dso *dso, void *build_id) 1100void dso__set_build_id(struct dso *dso, void *build_id)
971{ 1101{
972 memcpy(dso->build_id, build_id, sizeof(dso->build_id)); 1102 memcpy(dso->build_id, build_id, sizeof(dso->build_id));
@@ -1033,14 +1163,41 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
1033 return have_build_id; 1163 return have_build_id;
1034} 1164}
1035 1165
1036void dsos__add(struct dsos *dsos, struct dso *dso) 1166void __dsos__add(struct dsos *dsos, struct dso *dso)
1037{ 1167{
1038 list_add_tail(&dso->node, &dsos->head); 1168 list_add_tail(&dso->node, &dsos->head);
1039 dso__findlink_by_longname(&dsos->root, dso, NULL); 1169 __dso__findlink_by_longname(&dsos->root, dso, NULL);
1170 /*
1171 * It is now in the linked list, grab a reference, then garbage collect
1172 * this when needing memory, by looking at LRU dso instances in the
1173 * list with atomic_read(&dso->refcnt) == 1, i.e. no references
1174 * anywhere besides the one for the list, do, under a lock for the
1175 * list: remove it from the list, then a dso__put(), that probably will
1176 * be the last and will then call dso__delete(), end of life.
1177 *
1178 * That, or at the end of the 'struct machine' lifetime, when all
1179 * 'struct dso' instances will be removed from the list, in
1180 * dsos__exit(), if they have no other reference from some other data
1181 * structure.
1182 *
1183 * E.g.: after processing a 'perf.data' file and storing references
1184 * to objects instantiated while processing events, we will have
1185 * references to the 'thread', 'map', 'dso' structs all from 'struct
1186 * hist_entry' instances, but we may not need anything not referenced,
1187 * so we might as well call machines__exit()/machines__delete() and
1188 * garbage collect it.
1189 */
1190 dso__get(dso);
1191}
1192
1193void dsos__add(struct dsos *dsos, struct dso *dso)
1194{
1195 pthread_rwlock_wrlock(&dsos->lock);
1196 __dsos__add(dsos, dso);
1197 pthread_rwlock_unlock(&dsos->lock);
1040} 1198}
1041 1199
1042struct dso *dsos__find(const struct dsos *dsos, const char *name, 1200struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
1043 bool cmp_short)
1044{ 1201{
1045 struct dso *pos; 1202 struct dso *pos;
1046 1203
@@ -1050,15 +1207,24 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name,
1050 return pos; 1207 return pos;
1051 return NULL; 1208 return NULL;
1052 } 1209 }
1053 return dso__find_by_longname(&dsos->root, name); 1210 return __dso__find_by_longname(&dsos->root, name);
1211}
1212
1213struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
1214{
1215 struct dso *dso;
1216 pthread_rwlock_rdlock(&dsos->lock);
1217 dso = __dsos__find(dsos, name, cmp_short);
1218 pthread_rwlock_unlock(&dsos->lock);
1219 return dso;
1054} 1220}
1055 1221
1056struct dso *dsos__addnew(struct dsos *dsos, const char *name) 1222struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
1057{ 1223{
1058 struct dso *dso = dso__new(name); 1224 struct dso *dso = dso__new(name);
1059 1225
1060 if (dso != NULL) { 1226 if (dso != NULL) {
1061 dsos__add(dsos, dso); 1227 __dsos__add(dsos, dso);
1062 dso__set_basename(dso); 1228 dso__set_basename(dso);
1063 } 1229 }
1064 return dso; 1230 return dso;
@@ -1066,9 +1232,18 @@ struct dso *dsos__addnew(struct dsos *dsos, const char *name)
1066 1232
1067struct dso *__dsos__findnew(struct dsos *dsos, const char *name) 1233struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
1068{ 1234{
1069 struct dso *dso = dsos__find(dsos, name, false); 1235 struct dso *dso = __dsos__find(dsos, name, false);
1070 1236
1071 return dso ? dso : dsos__addnew(dsos, name); 1237 return dso ? dso : __dsos__addnew(dsos, name);
1238}
1239
1240struct dso *dsos__findnew(struct dsos *dsos, const char *name)
1241{
1242 struct dso *dso;
1243 pthread_rwlock_wrlock(&dsos->lock);
1244 dso = dso__get(__dsos__findnew(dsos, name));
1245 pthread_rwlock_unlock(&dsos->lock);
1246 return dso;
1072} 1247}
1073 1248
1074size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, 1249size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
@@ -1130,12 +1305,15 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
1130enum dso_type dso__type(struct dso *dso, struct machine *machine) 1305enum dso_type dso__type(struct dso *dso, struct machine *machine)
1131{ 1306{
1132 int fd; 1307 int fd;
1308 enum dso_type type = DSO__TYPE_UNKNOWN;
1133 1309
1134 fd = dso__data_fd(dso, machine); 1310 fd = dso__data_get_fd(dso, machine);
1135 if (fd < 0) 1311 if (fd >= 0) {
1136 return DSO__TYPE_UNKNOWN; 1312 type = dso__type_fd(fd);
1313 dso__data_put_fd(dso);
1314 }
1137 1315
1138 return dso__type_fd(fd); 1316 return type;
1139} 1317}
1140 1318
1141int dso__strerror_load(struct dso *dso, char *buf, size_t buflen) 1319int dso__strerror_load(struct dso *dso, char *buf, size_t buflen)
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index e0901b4ed8de..2fe98bb0e95b 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -1,9 +1,11 @@
1#ifndef __PERF_DSO 1#ifndef __PERF_DSO
2#define __PERF_DSO 2#define __PERF_DSO
3 3
4#include <linux/atomic.h>
4#include <linux/types.h> 5#include <linux/types.h>
5#include <linux/rbtree.h> 6#include <linux/rbtree.h>
6#include <stdbool.h> 7#include <stdbool.h>
8#include <pthread.h>
7#include <linux/types.h> 9#include <linux/types.h>
8#include <linux/bitops.h> 10#include <linux/bitops.h>
9#include "map.h" 11#include "map.h"
@@ -124,9 +126,13 @@ struct dso_cache {
124struct dsos { 126struct dsos {
125 struct list_head head; 127 struct list_head head;
126 struct rb_root root; /* rbtree root sorted by long name */ 128 struct rb_root root; /* rbtree root sorted by long name */
129 pthread_rwlock_t lock;
127}; 130};
128 131
132struct auxtrace_cache;
133
129struct dso { 134struct dso {
135 pthread_mutex_t lock;
130 struct list_head node; 136 struct list_head node;
131 struct rb_node rb_node; /* rbtree node sorted by long name */ 137 struct rb_node rb_node; /* rbtree node sorted by long name */
132 struct rb_root symbols[MAP__NR_TYPES]; 138 struct rb_root symbols[MAP__NR_TYPES];
@@ -156,6 +162,7 @@ struct dso {
156 u16 long_name_len; 162 u16 long_name_len;
157 u16 short_name_len; 163 u16 short_name_len;
158 void *dwfl; /* DWARF debug info */ 164 void *dwfl; /* DWARF debug info */
165 struct auxtrace_cache *auxtrace_cache;
159 166
160 /* dso data file */ 167 /* dso data file */
161 struct { 168 struct {
@@ -173,7 +180,7 @@ struct dso {
173 void *priv; 180 void *priv;
174 u64 db_id; 181 u64 db_id;
175 }; 182 };
176 183 atomic_t refcnt;
177 char name[0]; 184 char name[0];
178}; 185};
179 186
@@ -200,6 +207,17 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated);
200 207
201int dso__name_len(const struct dso *dso); 208int dso__name_len(const struct dso *dso);
202 209
210struct dso *dso__get(struct dso *dso);
211void dso__put(struct dso *dso);
212
213static inline void __dso__zput(struct dso **dso)
214{
215 dso__put(*dso);
216 *dso = NULL;
217}
218
219#define dso__zput(dso) __dso__zput(&dso)
220
203bool dso__loaded(const struct dso *dso, enum map_type type); 221bool dso__loaded(const struct dso *dso, enum map_type type);
204 222
205bool dso__sorted_by_name(const struct dso *dso, enum map_type type); 223bool dso__sorted_by_name(const struct dso *dso, enum map_type type);
@@ -216,7 +234,7 @@ char dso__symtab_origin(const struct dso *dso);
216int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type, 234int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type,
217 char *root_dir, char *filename, size_t size); 235 char *root_dir, char *filename, size_t size);
218bool is_supported_compression(const char *ext); 236bool is_supported_compression(const char *ext);
219bool is_kernel_module(const char *pathname); 237bool is_kernel_module(const char *pathname, int cpumode);
220bool decompress_to_file(const char *ext, const char *filename, int output_fd); 238bool decompress_to_file(const char *ext, const char *filename, int output_fd);
221bool dso__needs_decompress(struct dso *dso); 239bool dso__needs_decompress(struct dso *dso);
222 240
@@ -236,7 +254,8 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
236 254
237/* 255/*
238 * The dso__data_* external interface provides following functions: 256 * The dso__data_* external interface provides following functions:
239 * dso__data_fd 257 * dso__data_get_fd
258 * dso__data_put_fd
240 * dso__data_close 259 * dso__data_close
241 * dso__data_size 260 * dso__data_size
242 * dso__data_read_offset 261 * dso__data_read_offset
@@ -253,8 +272,11 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
253 * The current usage of the dso__data_* interface is as follows: 272 * The current usage of the dso__data_* interface is as follows:
254 * 273 *
255 * Get DSO's fd: 274 * Get DSO's fd:
256 * int fd = dso__data_fd(dso, machine); 275 * int fd = dso__data_get_fd(dso, machine);
257 * USE 'fd' SOMEHOW 276 * if (fd >= 0) {
277 * USE 'fd' SOMEHOW
278 * dso__data_put_fd(dso);
279 * }
258 * 280 *
259 * Read DSO's data: 281 * Read DSO's data:
260 * n = dso__data_read_offset(dso_0, &machine, 0, buf, BUFSIZE); 282 * n = dso__data_read_offset(dso_0, &machine, 0, buf, BUFSIZE);
@@ -273,7 +295,8 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
273 * 295 *
274 * TODO 296 * TODO
275*/ 297*/
276int dso__data_fd(struct dso *dso, struct machine *machine); 298int dso__data_get_fd(struct dso *dso, struct machine *machine);
299void dso__data_put_fd(struct dso *dso __maybe_unused);
277void dso__data_close(struct dso *dso); 300void dso__data_close(struct dso *dso);
278 301
279off_t dso__data_size(struct dso *dso, struct machine *machine); 302off_t dso__data_size(struct dso *dso, struct machine *machine);
@@ -285,14 +308,16 @@ ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
285bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by); 308bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by);
286 309
287struct map *dso__new_map(const char *name); 310struct map *dso__new_map(const char *name);
288struct dso *dso__kernel_findnew(struct machine *machine, const char *name, 311struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
289 const char *short_name, int dso_type); 312 const char *short_name, int dso_type);
290 313
314void __dsos__add(struct dsos *dsos, struct dso *dso);
291void dsos__add(struct dsos *dsos, struct dso *dso); 315void dsos__add(struct dsos *dsos, struct dso *dso);
292struct dso *dsos__addnew(struct dsos *dsos, const char *name); 316struct dso *__dsos__addnew(struct dsos *dsos, const char *name);
293struct dso *dsos__find(const struct dsos *dsos, const char *name, 317struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
294 bool cmp_short); 318struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
295struct dso *__dsos__findnew(struct dsos *dsos, const char *name); 319struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
320struct dso *dsos__findnew(struct dsos *dsos, const char *name);
296bool __dsos__read_build_ids(struct list_head *head, bool with_hits); 321bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
297 322
298size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, 323size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index c34e024020c7..57f3ef41c2bc 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -139,11 +139,27 @@ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
139bool die_compare_name(Dwarf_Die *dw_die, const char *tname) 139bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
140{ 140{
141 const char *name; 141 const char *name;
142
142 name = dwarf_diename(dw_die); 143 name = dwarf_diename(dw_die);
143 return name ? (strcmp(tname, name) == 0) : false; 144 return name ? (strcmp(tname, name) == 0) : false;
144} 145}
145 146
146/** 147/**
148 * die_match_name - Match diename and glob
149 * @dw_die: a DIE
150 * @glob: a string of target glob pattern
151 *
152 * Glob matching the name of @dw_die and @glob. Return false if matching fail.
153 */
154bool die_match_name(Dwarf_Die *dw_die, const char *glob)
155{
156 const char *name;
157
158 name = dwarf_diename(dw_die);
159 return name ? strglobmatch(name, glob) : false;
160}
161
162/**
147 * die_get_call_lineno - Get callsite line number of inline-function instance 163 * die_get_call_lineno - Get callsite line number of inline-function instance
148 * @in_die: a DIE of an inlined function instance 164 * @in_die: a DIE of an inlined function instance
149 * 165 *
@@ -417,6 +433,43 @@ struct __addr_die_search_param {
417 Dwarf_Die *die_mem; 433 Dwarf_Die *die_mem;
418}; 434};
419 435
436static int __die_search_func_tail_cb(Dwarf_Die *fn_die, void *data)
437{
438 struct __addr_die_search_param *ad = data;
439 Dwarf_Addr addr = 0;
440
441 if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
442 !dwarf_highpc(fn_die, &addr) &&
443 addr == ad->addr) {
444 memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
445 return DWARF_CB_ABORT;
446 }
447 return DWARF_CB_OK;
448}
449
450/**
451 * die_find_tailfunc - Search for a non-inlined function with tail call at
452 * given address
453 * @cu_die: a CU DIE which including @addr
454 * @addr: target address
455 * @die_mem: a buffer for result DIE
456 *
457 * Search for a non-inlined function DIE with tail call at @addr. Stores the
458 * DIE to @die_mem and returns it if found. Returns NULL if failed.
459 */
460Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
461 Dwarf_Die *die_mem)
462{
463 struct __addr_die_search_param ad;
464 ad.addr = addr;
465 ad.die_mem = die_mem;
466 /* dwarf_getscopes can't find subprogram. */
467 if (!dwarf_getfuncs(cu_die, __die_search_func_tail_cb, &ad, 0))
468 return NULL;
469 else
470 return die_mem;
471}
472
420/* die_find callback for non-inlined function search */ 473/* die_find callback for non-inlined function search */
421static int __die_search_func_cb(Dwarf_Die *fn_die, void *data) 474static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
422{ 475{
@@ -832,19 +885,17 @@ Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
832/** 885/**
833 * die_get_typename - Get the name of given variable DIE 886 * die_get_typename - Get the name of given variable DIE
834 * @vr_die: a variable DIE 887 * @vr_die: a variable DIE
835 * @buf: a buffer for result type name 888 * @buf: a strbuf for result type name
836 * @len: a max-length of @buf
837 * 889 *
838 * Get the name of @vr_die and stores it to @buf. Return the actual length 890 * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded.
839 * of type name if succeeded. Return -E2BIG if @len is not enough long, and 891 * and Return -ENOENT if failed to find type name.
840 * Return -ENOENT if failed to find type name.
841 * Note that the result will stores typedef name if possible, and stores 892 * Note that the result will stores typedef name if possible, and stores
842 * "*(function_type)" if the type is a function pointer. 893 * "*(function_type)" if the type is a function pointer.
843 */ 894 */
844int die_get_typename(Dwarf_Die *vr_die, char *buf, int len) 895int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
845{ 896{
846 Dwarf_Die type; 897 Dwarf_Die type;
847 int tag, ret, ret2; 898 int tag, ret;
848 const char *tmp = ""; 899 const char *tmp = "";
849 900
850 if (__die_get_real_type(vr_die, &type) == NULL) 901 if (__die_get_real_type(vr_die, &type) == NULL)
@@ -855,8 +906,8 @@ int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
855 tmp = "*"; 906 tmp = "*";
856 else if (tag == DW_TAG_subroutine_type) { 907 else if (tag == DW_TAG_subroutine_type) {
857 /* Function pointer */ 908 /* Function pointer */
858 ret = snprintf(buf, len, "(function_type)"); 909 strbuf_addf(buf, "(function_type)");
859 return (ret >= len) ? -E2BIG : ret; 910 return 0;
860 } else { 911 } else {
861 if (!dwarf_diename(&type)) 912 if (!dwarf_diename(&type))
862 return -ENOENT; 913 return -ENOENT;
@@ -867,39 +918,156 @@ int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
867 else if (tag == DW_TAG_enumeration_type) 918 else if (tag == DW_TAG_enumeration_type)
868 tmp = "enum "; 919 tmp = "enum ";
869 /* Write a base name */ 920 /* Write a base name */
870 ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type)); 921 strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type));
871 return (ret >= len) ? -E2BIG : ret; 922 return 0;
872 }
873 ret = die_get_typename(&type, buf, len);
874 if (ret > 0) {
875 ret2 = snprintf(buf + ret, len - ret, "%s", tmp);
876 ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
877 } 923 }
924 ret = die_get_typename(&type, buf);
925 if (ret == 0)
926 strbuf_addf(buf, "%s", tmp);
927
878 return ret; 928 return ret;
879} 929}
880 930
881/** 931/**
882 * die_get_varname - Get the name and type of given variable DIE 932 * die_get_varname - Get the name and type of given variable DIE
883 * @vr_die: a variable DIE 933 * @vr_die: a variable DIE
884 * @buf: a buffer for type and variable name 934 * @buf: a strbuf for type and variable name
885 * @len: the max-length of @buf
886 * 935 *
887 * Get the name and type of @vr_die and stores it in @buf as "type\tname". 936 * Get the name and type of @vr_die and stores it in @buf as "type\tname".
888 */ 937 */
889int die_get_varname(Dwarf_Die *vr_die, char *buf, int len) 938int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
890{ 939{
891 int ret, ret2; 940 int ret;
892 941
893 ret = die_get_typename(vr_die, buf, len); 942 ret = die_get_typename(vr_die, buf);
894 if (ret < 0) { 943 if (ret < 0) {
895 pr_debug("Failed to get type, make it unknown.\n"); 944 pr_debug("Failed to get type, make it unknown.\n");
896 ret = snprintf(buf, len, "(unknown_type)"); 945 strbuf_addf(buf, "(unknown_type)");
897 } 946 }
898 if (ret > 0) { 947
899 ret2 = snprintf(buf + ret, len - ret, "\t%s", 948 strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
900 dwarf_diename(vr_die)); 949
901 ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret; 950 return 0;
951}
952
953/**
954 * die_get_var_innermost_scope - Get innermost scope range of given variable DIE
955 * @sp_die: a subprogram DIE
956 * @vr_die: a variable DIE
957 * @buf: a strbuf for variable byte offset range
958 *
959 * Get the innermost scope range of @vr_die and stores it in @buf as
960 * "@<function_name+[NN-NN,NN-NN]>".
961 */
962static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
963 struct strbuf *buf)
964{
965 Dwarf_Die *scopes;
966 int count;
967 size_t offset = 0;
968 Dwarf_Addr base;
969 Dwarf_Addr start, end;
970 Dwarf_Addr entry;
971 int ret;
972 bool first = true;
973 const char *name;
974
975 ret = dwarf_entrypc(sp_die, &entry);
976 if (ret)
977 return ret;
978
979 name = dwarf_diename(sp_die);
980 if (!name)
981 return -ENOENT;
982
983 count = dwarf_getscopes_die(vr_die, &scopes);
984
985 /* (*SCOPES)[1] is the DIE for the scope containing that scope */
986 if (count <= 1) {
987 ret = -EINVAL;
988 goto out;
902 } 989 }
990
991 while ((offset = dwarf_ranges(&scopes[1], offset, &base,
992 &start, &end)) > 0) {
993 start -= entry;
994 end -= entry;
995
996 if (first) {
997 strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
998 name, start, end);
999 first = false;
1000 } else {
1001 strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
1002 start, end);
1003 }
1004 }
1005
1006 if (!first)
1007 strbuf_addf(buf, "]>");
1008
1009out:
1010 free(scopes);
903 return ret; 1011 return ret;
904} 1012}
905 1013
1014/**
1015 * die_get_var_range - Get byte offset range of given variable DIE
1016 * @sp_die: a subprogram DIE
1017 * @vr_die: a variable DIE
1018 * @buf: a strbuf for type and variable name and byte offset range
1019 *
1020 * Get the byte offset range of @vr_die and stores it in @buf as
1021 * "@<function_name+[NN-NN,NN-NN]>".
1022 */
1023int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf)
1024{
1025 int ret = 0;
1026 Dwarf_Addr base;
1027 Dwarf_Addr start, end;
1028 Dwarf_Addr entry;
1029 Dwarf_Op *op;
1030 size_t nops;
1031 size_t offset = 0;
1032 Dwarf_Attribute attr;
1033 bool first = true;
1034 const char *name;
1035
1036 ret = dwarf_entrypc(sp_die, &entry);
1037 if (ret)
1038 return ret;
1039
1040 name = dwarf_diename(sp_die);
1041 if (!name)
1042 return -ENOENT;
1043
1044 if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
1045 return -EINVAL;
1046
1047 while ((offset = dwarf_getlocations(
1048 &attr, offset, &base,
1049 &start, &end, &op, &nops)) > 0) {
1050 if (start == 0) {
1051 /* Single Location Descriptions */
1052 ret = die_get_var_innermost_scope(sp_die, vr_die, buf);
1053 return ret;
1054 }
1055
1056 /* Location Lists */
1057 start -= entry;
1058 end -= entry;
1059 if (first) {
1060 strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
1061 name, start, end);
1062 first = false;
1063 } else {
1064 strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
1065 start, end);
1066 }
1067 }
1068
1069 if (!first)
1070 strbuf_addf(buf, "]>");
1071
1072 return ret;
1073}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index af7dbcd5f929..c42ec366f2a7 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -47,6 +47,9 @@ extern bool die_is_func_instance(Dwarf_Die *dw_die);
47/* Compare diename and tname */ 47/* Compare diename and tname */
48extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); 48extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
49 49
50/* Matching diename with glob pattern */
51extern bool die_match_name(Dwarf_Die *dw_die, const char *glob);
52
50/* Get callsite line number of inline-function instance */ 53/* Get callsite line number of inline-function instance */
51extern int die_get_call_lineno(Dwarf_Die *in_die); 54extern int die_get_call_lineno(Dwarf_Die *in_die);
52 55
@@ -82,6 +85,10 @@ extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
82extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, 85extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
83 Dwarf_Die *die_mem); 86 Dwarf_Die *die_mem);
84 87
88/* Search a non-inlined function with tail call at given address */
89Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
90 Dwarf_Die *die_mem);
91
85/* Search the top inlined function including given address */ 92/* Search the top inlined function including given address */
86extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, 93extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
87 Dwarf_Die *die_mem); 94 Dwarf_Die *die_mem);
@@ -114,8 +121,10 @@ extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
114 Dwarf_Die *die_mem); 121 Dwarf_Die *die_mem);
115 122
116/* Get the name of given variable DIE */ 123/* Get the name of given variable DIE */
117extern int die_get_typename(Dwarf_Die *vr_die, char *buf, int len); 124extern int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
118 125
119/* Get the name and type of given variable DIE, stored as "type\tname" */ 126/* Get the name and type of given variable DIE, stored as "type\tname" */
120extern int die_get_varname(Dwarf_Die *vr_die, char *buf, int len); 127extern int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
128extern int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
129 struct strbuf *buf);
121#endif 130#endif
diff --git a/tools/perf/util/environment.c b/tools/perf/util/environment.c
index 275b0ee345f5..7405123692f1 100644
--- a/tools/perf/util/environment.c
+++ b/tools/perf/util/environment.c
@@ -5,5 +5,4 @@
5 */ 5 */
6#include "cache.h" 6#include "cache.h"
7 7
8const char *pager_program;
9int pager_use_color = 1; 8int pager_use_color = 1;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index ff866c4d2e2f..d7d986d8f23e 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -23,12 +23,18 @@ static const char *perf_event__names[] = {
23 [PERF_RECORD_FORK] = "FORK", 23 [PERF_RECORD_FORK] = "FORK",
24 [PERF_RECORD_READ] = "READ", 24 [PERF_RECORD_READ] = "READ",
25 [PERF_RECORD_SAMPLE] = "SAMPLE", 25 [PERF_RECORD_SAMPLE] = "SAMPLE",
26 [PERF_RECORD_AUX] = "AUX",
27 [PERF_RECORD_ITRACE_START] = "ITRACE_START",
28 [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES",
26 [PERF_RECORD_HEADER_ATTR] = "ATTR", 29 [PERF_RECORD_HEADER_ATTR] = "ATTR",
27 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", 30 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
28 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", 31 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
29 [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID", 32 [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
30 [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND", 33 [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
31 [PERF_RECORD_ID_INDEX] = "ID_INDEX", 34 [PERF_RECORD_ID_INDEX] = "ID_INDEX",
35 [PERF_RECORD_AUXTRACE_INFO] = "AUXTRACE_INFO",
36 [PERF_RECORD_AUXTRACE] = "AUXTRACE",
37 [PERF_RECORD_AUXTRACE_ERROR] = "AUXTRACE_ERROR",
32}; 38};
33 39
34const char *perf_event__name(unsigned int id) 40const char *perf_event__name(unsigned int id)
@@ -212,10 +218,14 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
212 pid_t pid, pid_t tgid, 218 pid_t pid, pid_t tgid,
213 perf_event__handler_t process, 219 perf_event__handler_t process,
214 struct machine *machine, 220 struct machine *machine,
215 bool mmap_data) 221 bool mmap_data,
222 unsigned int proc_map_timeout)
216{ 223{
217 char filename[PATH_MAX]; 224 char filename[PATH_MAX];
218 FILE *fp; 225 FILE *fp;
226 unsigned long long t;
227 bool truncation = false;
228 unsigned long long timeout = proc_map_timeout * 1000000ULL;
219 int rc = 0; 229 int rc = 0;
220 230
221 if (machine__is_default_guest(machine)) 231 if (machine__is_default_guest(machine))
@@ -234,6 +244,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
234 } 244 }
235 245
236 event->header.type = PERF_RECORD_MMAP2; 246 event->header.type = PERF_RECORD_MMAP2;
247 t = rdclock();
237 248
238 while (1) { 249 while (1) {
239 char bf[BUFSIZ]; 250 char bf[BUFSIZ];
@@ -247,6 +258,15 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
247 if (fgets(bf, sizeof(bf), fp) == NULL) 258 if (fgets(bf, sizeof(bf), fp) == NULL)
248 break; 259 break;
249 260
261 if ((rdclock() - t) > timeout) {
262 pr_warning("Reading %s time out. "
263 "You may want to increase "
264 "the time limit by --proc-map-timeout\n",
265 filename);
266 truncation = true;
267 goto out;
268 }
269
250 /* ensure null termination since stack will be reused. */ 270 /* ensure null termination since stack will be reused. */
251 strcpy(execname, ""); 271 strcpy(execname, "");
252 272
@@ -295,6 +315,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
295 event->header.misc |= PERF_RECORD_MISC_MMAP_DATA; 315 event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
296 } 316 }
297 317
318out:
319 if (truncation)
320 event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
321
298 if (!strcmp(execname, "")) 322 if (!strcmp(execname, ""))
299 strcpy(execname, anonstr); 323 strcpy(execname, anonstr);
300 324
@@ -313,6 +337,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
313 rc = -1; 337 rc = -1;
314 break; 338 break;
315 } 339 }
340
341 if (truncation)
342 break;
316 } 343 }
317 344
318 fclose(fp); 345 fclose(fp);
@@ -324,8 +351,9 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
324 struct machine *machine) 351 struct machine *machine)
325{ 352{
326 int rc = 0; 353 int rc = 0;
327 struct rb_node *nd; 354 struct map *pos;
328 struct map_groups *kmaps = &machine->kmaps; 355 struct map_groups *kmaps = &machine->kmaps;
356 struct maps *maps = &kmaps->maps[MAP__FUNCTION];
329 union perf_event *event = zalloc((sizeof(event->mmap) + 357 union perf_event *event = zalloc((sizeof(event->mmap) +
330 machine->id_hdr_size)); 358 machine->id_hdr_size));
331 if (event == NULL) { 359 if (event == NULL) {
@@ -345,10 +373,8 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
345 else 373 else
346 event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; 374 event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
347 375
348 for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]); 376 for (pos = maps__first(maps); pos; pos = map__next(pos)) {
349 nd; nd = rb_next(nd)) {
350 size_t size; 377 size_t size;
351 struct map *pos = rb_entry(nd, struct map, rb_node);
352 378
353 if (pos->dso->kernel) 379 if (pos->dso->kernel)
354 continue; 380 continue;
@@ -381,7 +407,9 @@ static int __event__synthesize_thread(union perf_event *comm_event,
381 pid_t pid, int full, 407 pid_t pid, int full,
382 perf_event__handler_t process, 408 perf_event__handler_t process,
383 struct perf_tool *tool, 409 struct perf_tool *tool,
384 struct machine *machine, bool mmap_data) 410 struct machine *machine,
411 bool mmap_data,
412 unsigned int proc_map_timeout)
385{ 413{
386 char filename[PATH_MAX]; 414 char filename[PATH_MAX];
387 DIR *tasks; 415 DIR *tasks;
@@ -398,7 +426,8 @@ static int __event__synthesize_thread(union perf_event *comm_event,
398 return -1; 426 return -1;
399 427
400 return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, 428 return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
401 process, machine, mmap_data); 429 process, machine, mmap_data,
430 proc_map_timeout);
402 } 431 }
403 432
404 if (machine__is_default_guest(machine)) 433 if (machine__is_default_guest(machine))
@@ -439,7 +468,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
439 if (_pid == pid) { 468 if (_pid == pid) {
440 /* process the parent's maps too */ 469 /* process the parent's maps too */
441 rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, 470 rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
442 process, machine, mmap_data); 471 process, machine, mmap_data, proc_map_timeout);
443 if (rc) 472 if (rc)
444 break; 473 break;
445 } 474 }
@@ -453,7 +482,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
453 struct thread_map *threads, 482 struct thread_map *threads,
454 perf_event__handler_t process, 483 perf_event__handler_t process,
455 struct machine *machine, 484 struct machine *machine,
456 bool mmap_data) 485 bool mmap_data,
486 unsigned int proc_map_timeout)
457{ 487{
458 union perf_event *comm_event, *mmap_event, *fork_event; 488 union perf_event *comm_event, *mmap_event, *fork_event;
459 int err = -1, thread, j; 489 int err = -1, thread, j;
@@ -476,7 +506,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
476 fork_event, 506 fork_event,
477 threads->map[thread], 0, 507 threads->map[thread], 0,
478 process, tool, machine, 508 process, tool, machine,
479 mmap_data)) { 509 mmap_data, proc_map_timeout)) {
480 err = -1; 510 err = -1;
481 break; 511 break;
482 } 512 }
@@ -502,7 +532,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
502 fork_event, 532 fork_event,
503 comm_event->comm.pid, 0, 533 comm_event->comm.pid, 0,
504 process, tool, machine, 534 process, tool, machine,
505 mmap_data)) { 535 mmap_data, proc_map_timeout)) {
506 err = -1; 536 err = -1;
507 break; 537 break;
508 } 538 }
@@ -519,7 +549,9 @@ out:
519 549
520int perf_event__synthesize_threads(struct perf_tool *tool, 550int perf_event__synthesize_threads(struct perf_tool *tool,
521 perf_event__handler_t process, 551 perf_event__handler_t process,
522 struct machine *machine, bool mmap_data) 552 struct machine *machine,
553 bool mmap_data,
554 unsigned int proc_map_timeout)
523{ 555{
524 DIR *proc; 556 DIR *proc;
525 char proc_path[PATH_MAX]; 557 char proc_path[PATH_MAX];
@@ -559,7 +591,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
559 * one thread couldn't be synthesized. 591 * one thread couldn't be synthesized.
560 */ 592 */
561 __event__synthesize_thread(comm_event, mmap_event, fork_event, pid, 593 __event__synthesize_thread(comm_event, mmap_event, fork_event, pid,
562 1, process, tool, machine, mmap_data); 594 1, process, tool, machine, mmap_data,
595 proc_map_timeout);
563 } 596 }
564 597
565 err = 0; 598 err = 0;
@@ -692,6 +725,30 @@ int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
692 return machine__process_lost_event(machine, event, sample); 725 return machine__process_lost_event(machine, event, sample);
693} 726}
694 727
728int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
729 union perf_event *event,
730 struct perf_sample *sample __maybe_unused,
731 struct machine *machine)
732{
733 return machine__process_aux_event(machine, event);
734}
735
736int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
737 union perf_event *event,
738 struct perf_sample *sample __maybe_unused,
739 struct machine *machine)
740{
741 return machine__process_itrace_start_event(machine, event);
742}
743
744int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
745 union perf_event *event,
746 struct perf_sample *sample,
747 struct machine *machine)
748{
749 return machine__process_lost_samples_event(machine, event, sample);
750}
751
695size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) 752size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
696{ 753{
697 return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", 754 return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
@@ -755,6 +812,21 @@ int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
755 return machine__process_exit_event(machine, event, sample); 812 return machine__process_exit_event(machine, event, sample);
756} 813}
757 814
815size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
816{
817 return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s]\n",
818 event->aux.aux_offset, event->aux.aux_size,
819 event->aux.flags,
820 event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "",
821 event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "");
822}
823
824size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
825{
826 return fprintf(fp, " pid: %u tid: %u\n",
827 event->itrace_start.pid, event->itrace_start.tid);
828}
829
758size_t perf_event__fprintf(union perf_event *event, FILE *fp) 830size_t perf_event__fprintf(union perf_event *event, FILE *fp)
759{ 831{
760 size_t ret = fprintf(fp, "PERF_RECORD_%s", 832 size_t ret = fprintf(fp, "PERF_RECORD_%s",
@@ -774,6 +846,12 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
774 case PERF_RECORD_MMAP2: 846 case PERF_RECORD_MMAP2:
775 ret += perf_event__fprintf_mmap2(event, fp); 847 ret += perf_event__fprintf_mmap2(event, fp);
776 break; 848 break;
849 case PERF_RECORD_AUX:
850 ret += perf_event__fprintf_aux(event, fp);
851 break;
852 case PERF_RECORD_ITRACE_START:
853 ret += perf_event__fprintf_itrace_start(event, fp);
854 break;
777 default: 855 default:
778 ret += fprintf(fp, "\n"); 856 ret += fprintf(fp, "\n");
779 } 857 }
@@ -877,6 +955,10 @@ void thread__find_addr_location(struct thread *thread,
877 al->sym = NULL; 955 al->sym = NULL;
878} 956}
879 957
958/*
959 * Callers need to drop the reference to al->thread, obtained in
960 * machine__findnew_thread()
961 */
880int perf_event__preprocess_sample(const union perf_event *event, 962int perf_event__preprocess_sample(const union perf_event *event,
881 struct machine *machine, 963 struct machine *machine,
882 struct addr_location *al, 964 struct addr_location *al,
@@ -937,6 +1019,17 @@ int perf_event__preprocess_sample(const union perf_event *event,
937 return 0; 1019 return 0;
938} 1020}
939 1021
1022/*
1023 * The preprocess_sample method will return with reference counts for the
1024 * in it, when done using (and perhaps getting ref counts if needing to
1025 * keep a pointer to one of those entries) it must be paired with
1026 * addr_location__put(), so that the refcounts can be decremented.
1027 */
1028void addr_location__put(struct addr_location *al)
1029{
1030 thread__zput(al->thread);
1031}
1032
940bool is_bts_event(struct perf_event_attr *attr) 1033bool is_bts_event(struct perf_event_attr *attr)
941{ 1034{
942 return attr->type == PERF_TYPE_HARDWARE && 1035 return attr->type == PERF_TYPE_HARDWARE &&
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 09b9e8d3fcf7..c53f36384b64 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -52,6 +52,11 @@ struct lost_event {
52 u64 lost; 52 u64 lost;
53}; 53};
54 54
55struct lost_samples_event {
56 struct perf_event_header header;
57 u64 lost;
58};
59
55/* 60/*
56 * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID 61 * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
57 */ 62 */
@@ -157,6 +162,8 @@ enum {
157 PERF_IP_FLAG_IN_TX = 1ULL << 10, 162 PERF_IP_FLAG_IN_TX = 1ULL << 10,
158}; 163};
159 164
165#define PERF_IP_FLAG_CHARS "bcrosyiABEx"
166
160#define PERF_BRANCH_MASK (\ 167#define PERF_BRANCH_MASK (\
161 PERF_IP_FLAG_BRANCH |\ 168 PERF_IP_FLAG_BRANCH |\
162 PERF_IP_FLAG_CALL |\ 169 PERF_IP_FLAG_CALL |\
@@ -215,9 +222,17 @@ enum perf_user_event_type { /* above any possible kernel type */
215 PERF_RECORD_HEADER_BUILD_ID = 67, 222 PERF_RECORD_HEADER_BUILD_ID = 67,
216 PERF_RECORD_FINISHED_ROUND = 68, 223 PERF_RECORD_FINISHED_ROUND = 68,
217 PERF_RECORD_ID_INDEX = 69, 224 PERF_RECORD_ID_INDEX = 69,
225 PERF_RECORD_AUXTRACE_INFO = 70,
226 PERF_RECORD_AUXTRACE = 71,
227 PERF_RECORD_AUXTRACE_ERROR = 72,
218 PERF_RECORD_HEADER_MAX 228 PERF_RECORD_HEADER_MAX
219}; 229};
220 230
231enum auxtrace_error_type {
232 PERF_AUXTRACE_ERROR_ITRACE = 1,
233 PERF_AUXTRACE_ERROR_MAX
234};
235
221/* 236/*
222 * The kernel collects the number of events it couldn't send in a stretch and 237 * The kernel collects the number of events it couldn't send in a stretch and
223 * when possible sends this number in a PERF_RECORD_LOST event. The number of 238 * when possible sends this number in a PERF_RECORD_LOST event. The number of
@@ -225,6 +240,12 @@ enum perf_user_event_type { /* above any possible kernel type */
225 * total_lost tells exactly how many events the kernel in fact lost, i.e. it is 240 * total_lost tells exactly how many events the kernel in fact lost, i.e. it is
226 * the sum of all struct lost_event.lost fields reported. 241 * the sum of all struct lost_event.lost fields reported.
227 * 242 *
243 * The kernel discards mixed up samples and sends the number in a
244 * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored
245 * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells
246 * exactly how many samples the kernel in fact dropped, i.e. it is the sum of
247 * all struct lost_samples_event.lost fields reported.
248 *
228 * The total_period is needed because by default auto-freq is used, so 249 * The total_period is needed because by default auto-freq is used, so
229 * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get 250 * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
230 * the total number of low level events, it is necessary to to sum all struct 251 * the total number of low level events, it is necessary to to sum all struct
@@ -234,6 +255,7 @@ struct events_stats {
234 u64 total_period; 255 u64 total_period;
235 u64 total_non_filtered_period; 256 u64 total_non_filtered_period;
236 u64 total_lost; 257 u64 total_lost;
258 u64 total_lost_samples;
237 u64 total_invalid_chains; 259 u64 total_invalid_chains;
238 u32 nr_events[PERF_RECORD_HEADER_MAX]; 260 u32 nr_events[PERF_RECORD_HEADER_MAX];
239 u32 nr_non_filtered_samples; 261 u32 nr_non_filtered_samples;
@@ -242,6 +264,8 @@ struct events_stats {
242 u32 nr_invalid_chains; 264 u32 nr_invalid_chains;
243 u32 nr_unknown_id; 265 u32 nr_unknown_id;
244 u32 nr_unprocessable_samples; 266 u32 nr_unprocessable_samples;
267 u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX];
268 u32 nr_proc_map_timeout;
245}; 269};
246 270
247struct attr_event { 271struct attr_event {
@@ -280,6 +304,50 @@ struct id_index_event {
280 struct id_index_entry entries[0]; 304 struct id_index_entry entries[0];
281}; 305};
282 306
307struct auxtrace_info_event {
308 struct perf_event_header header;
309 u32 type;
310 u32 reserved__; /* For alignment */
311 u64 priv[];
312};
313
314struct auxtrace_event {
315 struct perf_event_header header;
316 u64 size;
317 u64 offset;
318 u64 reference;
319 u32 idx;
320 u32 tid;
321 u32 cpu;
322 u32 reserved__; /* For alignment */
323};
324
325#define MAX_AUXTRACE_ERROR_MSG 64
326
327struct auxtrace_error_event {
328 struct perf_event_header header;
329 u32 type;
330 u32 code;
331 u32 cpu;
332 u32 pid;
333 u32 tid;
334 u32 reserved__; /* For alignment */
335 u64 ip;
336 char msg[MAX_AUXTRACE_ERROR_MSG];
337};
338
339struct aux_event {
340 struct perf_event_header header;
341 u64 aux_offset;
342 u64 aux_size;
343 u64 flags;
344};
345
346struct itrace_start_event {
347 struct perf_event_header header;
348 u32 pid, tid;
349};
350
283union perf_event { 351union perf_event {
284 struct perf_event_header header; 352 struct perf_event_header header;
285 struct mmap_event mmap; 353 struct mmap_event mmap;
@@ -287,6 +355,7 @@ union perf_event {
287 struct comm_event comm; 355 struct comm_event comm;
288 struct fork_event fork; 356 struct fork_event fork;
289 struct lost_event lost; 357 struct lost_event lost;
358 struct lost_samples_event lost_samples;
290 struct read_event read; 359 struct read_event read;
291 struct throttle_event throttle; 360 struct throttle_event throttle;
292 struct sample_event sample; 361 struct sample_event sample;
@@ -295,6 +364,11 @@ union perf_event {
295 struct tracing_data_event tracing_data; 364 struct tracing_data_event tracing_data;
296 struct build_id_event build_id; 365 struct build_id_event build_id;
297 struct id_index_event id_index; 366 struct id_index_event id_index;
367 struct auxtrace_info_event auxtrace_info;
368 struct auxtrace_event auxtrace;
369 struct auxtrace_error_event auxtrace_error;
370 struct aux_event aux;
371 struct itrace_start_event itrace_start;
298}; 372};
299 373
300void perf_event__print_totals(void); 374void perf_event__print_totals(void);
@@ -310,10 +384,12 @@ typedef int (*perf_event__handler_t)(struct perf_tool *tool,
310int perf_event__synthesize_thread_map(struct perf_tool *tool, 384int perf_event__synthesize_thread_map(struct perf_tool *tool,
311 struct thread_map *threads, 385 struct thread_map *threads,
312 perf_event__handler_t process, 386 perf_event__handler_t process,
313 struct machine *machine, bool mmap_data); 387 struct machine *machine, bool mmap_data,
388 unsigned int proc_map_timeout);
314int perf_event__synthesize_threads(struct perf_tool *tool, 389int perf_event__synthesize_threads(struct perf_tool *tool,
315 perf_event__handler_t process, 390 perf_event__handler_t process,
316 struct machine *machine, bool mmap_data); 391 struct machine *machine, bool mmap_data,
392 unsigned int proc_map_timeout);
317int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, 393int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
318 perf_event__handler_t process, 394 perf_event__handler_t process,
319 struct machine *machine); 395 struct machine *machine);
@@ -330,6 +406,18 @@ int perf_event__process_lost(struct perf_tool *tool,
330 union perf_event *event, 406 union perf_event *event,
331 struct perf_sample *sample, 407 struct perf_sample *sample,
332 struct machine *machine); 408 struct machine *machine);
409int perf_event__process_lost_samples(struct perf_tool *tool,
410 union perf_event *event,
411 struct perf_sample *sample,
412 struct machine *machine);
413int perf_event__process_aux(struct perf_tool *tool,
414 union perf_event *event,
415 struct perf_sample *sample,
416 struct machine *machine);
417int perf_event__process_itrace_start(struct perf_tool *tool,
418 union perf_event *event,
419 struct perf_sample *sample,
420 struct machine *machine);
333int perf_event__process_mmap(struct perf_tool *tool, 421int perf_event__process_mmap(struct perf_tool *tool,
334 union perf_event *event, 422 union perf_event *event,
335 struct perf_sample *sample, 423 struct perf_sample *sample,
@@ -358,6 +446,8 @@ int perf_event__preprocess_sample(const union perf_event *event,
358 struct addr_location *al, 446 struct addr_location *al,
359 struct perf_sample *sample); 447 struct perf_sample *sample);
360 448
449void addr_location__put(struct addr_location *al);
450
361struct thread; 451struct thread;
362 452
363bool is_bts_event(struct perf_event_attr *attr); 453bool is_bts_event(struct perf_event_attr *attr);
@@ -381,12 +471,15 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
381 pid_t pid, pid_t tgid, 471 pid_t pid, pid_t tgid,
382 perf_event__handler_t process, 472 perf_event__handler_t process,
383 struct machine *machine, 473 struct machine *machine,
384 bool mmap_data); 474 bool mmap_data,
475 unsigned int proc_map_timeout);
385 476
386size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp); 477size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
387size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp); 478size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
388size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); 479size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
389size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); 480size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
481size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
482size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
390size_t perf_event__fprintf(union perf_event *event, FILE *fp); 483size_t perf_event__fprintf(union perf_event *event, FILE *fp);
391 484
392u64 kallsyms__get_function_start(const char *kallsyms_filename, 485u64 kallsyms__get_function_start(const char *kallsyms_filename,
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 080be93eea96..8366511b45f8 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -297,6 +297,8 @@ void perf_evlist__disable(struct perf_evlist *evlist)
297 PERF_EVENT_IOC_DISABLE, 0); 297 PERF_EVENT_IOC_DISABLE, 0);
298 } 298 }
299 } 299 }
300
301 evlist->enabled = false;
300} 302}
301 303
302void perf_evlist__enable(struct perf_evlist *evlist) 304void perf_evlist__enable(struct perf_evlist *evlist)
@@ -316,6 +318,13 @@ void perf_evlist__enable(struct perf_evlist *evlist)
316 PERF_EVENT_IOC_ENABLE, 0); 318 PERF_EVENT_IOC_ENABLE, 0);
317 } 319 }
318 } 320 }
321
322 evlist->enabled = true;
323}
324
325void perf_evlist__toggle_enable(struct perf_evlist *evlist)
326{
327 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
319} 328}
320 329
321int perf_evlist__disable_event(struct perf_evlist *evlist, 330int perf_evlist__disable_event(struct perf_evlist *evlist,
@@ -634,11 +643,18 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
634union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) 643union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
635{ 644{
636 struct perf_mmap *md = &evlist->mmap[idx]; 645 struct perf_mmap *md = &evlist->mmap[idx];
637 u64 head = perf_mmap__read_head(md); 646 u64 head;
638 u64 old = md->prev; 647 u64 old = md->prev;
639 unsigned char *data = md->base + page_size; 648 unsigned char *data = md->base + page_size;
640 union perf_event *event = NULL; 649 union perf_event *event = NULL;
641 650
651 /*
652 * Check if event was unmapped due to a POLLHUP/POLLERR.
653 */
654 if (!atomic_read(&md->refcnt))
655 return NULL;
656
657 head = perf_mmap__read_head(md);
642 if (evlist->overwrite) { 658 if (evlist->overwrite) {
643 /* 659 /*
644 * If we're further behind than half the buffer, there's a chance 660 * If we're further behind than half the buffer, there's a chance
@@ -695,19 +711,19 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
695 711
696static bool perf_mmap__empty(struct perf_mmap *md) 712static bool perf_mmap__empty(struct perf_mmap *md)
697{ 713{
698 return perf_mmap__read_head(md) == md->prev; 714 return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
699} 715}
700 716
701static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) 717static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
702{ 718{
703 ++evlist->mmap[idx].refcnt; 719 atomic_inc(&evlist->mmap[idx].refcnt);
704} 720}
705 721
706static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) 722static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
707{ 723{
708 BUG_ON(evlist->mmap[idx].refcnt == 0); 724 BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0);
709 725
710 if (--evlist->mmap[idx].refcnt == 0) 726 if (atomic_dec_and_test(&evlist->mmap[idx].refcnt))
711 __perf_evlist__munmap(evlist, idx); 727 __perf_evlist__munmap(evlist, idx);
712} 728}
713 729
@@ -721,17 +737,46 @@ void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
721 perf_mmap__write_tail(md, old); 737 perf_mmap__write_tail(md, old);
722 } 738 }
723 739
724 if (md->refcnt == 1 && perf_mmap__empty(md)) 740 if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md))
725 perf_evlist__mmap_put(evlist, idx); 741 perf_evlist__mmap_put(evlist, idx);
726} 742}
727 743
744int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
745 struct auxtrace_mmap_params *mp __maybe_unused,
746 void *userpg __maybe_unused,
747 int fd __maybe_unused)
748{
749 return 0;
750}
751
752void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
753{
754}
755
756void __weak auxtrace_mmap_params__init(
757 struct auxtrace_mmap_params *mp __maybe_unused,
758 off_t auxtrace_offset __maybe_unused,
759 unsigned int auxtrace_pages __maybe_unused,
760 bool auxtrace_overwrite __maybe_unused)
761{
762}
763
764void __weak auxtrace_mmap_params__set_idx(
765 struct auxtrace_mmap_params *mp __maybe_unused,
766 struct perf_evlist *evlist __maybe_unused,
767 int idx __maybe_unused,
768 bool per_cpu __maybe_unused)
769{
770}
771
728static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) 772static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
729{ 773{
730 if (evlist->mmap[idx].base != NULL) { 774 if (evlist->mmap[idx].base != NULL) {
731 munmap(evlist->mmap[idx].base, evlist->mmap_len); 775 munmap(evlist->mmap[idx].base, evlist->mmap_len);
732 evlist->mmap[idx].base = NULL; 776 evlist->mmap[idx].base = NULL;
733 evlist->mmap[idx].refcnt = 0; 777 atomic_set(&evlist->mmap[idx].refcnt, 0);
734 } 778 }
779 auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap);
735} 780}
736 781
737void perf_evlist__munmap(struct perf_evlist *evlist) 782void perf_evlist__munmap(struct perf_evlist *evlist)
@@ -759,6 +804,7 @@ static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
759struct mmap_params { 804struct mmap_params {
760 int prot; 805 int prot;
761 int mask; 806 int mask;
807 struct auxtrace_mmap_params auxtrace_mp;
762}; 808};
763 809
764static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, 810static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
@@ -777,7 +823,7 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
777 * evlist layer can't just drop it when filtering events in 823 * evlist layer can't just drop it when filtering events in
778 * perf_evlist__filter_pollfd(). 824 * perf_evlist__filter_pollfd().
779 */ 825 */
780 evlist->mmap[idx].refcnt = 2; 826 atomic_set(&evlist->mmap[idx].refcnt, 2);
781 evlist->mmap[idx].prev = 0; 827 evlist->mmap[idx].prev = 0;
782 evlist->mmap[idx].mask = mp->mask; 828 evlist->mmap[idx].mask = mp->mask;
783 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, 829 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
@@ -789,6 +835,10 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
789 return -1; 835 return -1;
790 } 836 }
791 837
838 if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap,
839 &mp->auxtrace_mp, evlist->mmap[idx].base, fd))
840 return -1;
841
792 return 0; 842 return 0;
793} 843}
794 844
@@ -853,6 +903,9 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
853 for (cpu = 0; cpu < nr_cpus; cpu++) { 903 for (cpu = 0; cpu < nr_cpus; cpu++) {
854 int output = -1; 904 int output = -1;
855 905
906 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
907 true);
908
856 for (thread = 0; thread < nr_threads; thread++) { 909 for (thread = 0; thread < nr_threads; thread++) {
857 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 910 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
858 thread, &output)) 911 thread, &output))
@@ -878,6 +931,9 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
878 for (thread = 0; thread < nr_threads; thread++) { 931 for (thread = 0; thread < nr_threads; thread++) {
879 int output = -1; 932 int output = -1;
880 933
934 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
935 false);
936
881 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 937 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
882 &output)) 938 &output))
883 goto out_unmap; 939 goto out_unmap;
@@ -960,10 +1016,8 @@ static long parse_pages_arg(const char *str, unsigned long min,
960 return pages; 1016 return pages;
961} 1017}
962 1018
963int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1019int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
964 int unset __maybe_unused)
965{ 1020{
966 unsigned int *mmap_pages = opt->value;
967 unsigned long max = UINT_MAX; 1021 unsigned long max = UINT_MAX;
968 long pages; 1022 long pages;
969 1023
@@ -980,20 +1034,32 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
980 return 0; 1034 return 0;
981} 1035}
982 1036
1037int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
1038 int unset __maybe_unused)
1039{
1040 return __perf_evlist__parse_mmap_pages(opt->value, str);
1041}
1042
983/** 1043/**
984 * perf_evlist__mmap - Create mmaps to receive events. 1044 * perf_evlist__mmap_ex - Create mmaps to receive events.
985 * @evlist: list of events 1045 * @evlist: list of events
986 * @pages: map length in pages 1046 * @pages: map length in pages
987 * @overwrite: overwrite older events? 1047 * @overwrite: overwrite older events?
1048 * @auxtrace_pages - auxtrace map length in pages
1049 * @auxtrace_overwrite - overwrite older auxtrace data?
988 * 1050 *
989 * If @overwrite is %false the user needs to signal event consumption using 1051 * If @overwrite is %false the user needs to signal event consumption using
990 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1052 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this
991 * automatically. 1053 * automatically.
992 * 1054 *
1055 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
1056 * consumption using auxtrace_mmap__write_tail().
1057 *
993 * Return: %0 on success, negative error code otherwise. 1058 * Return: %0 on success, negative error code otherwise.
994 */ 1059 */
995int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 1060int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
996 bool overwrite) 1061 bool overwrite, unsigned int auxtrace_pages,
1062 bool auxtrace_overwrite)
997{ 1063{
998 struct perf_evsel *evsel; 1064 struct perf_evsel *evsel;
999 const struct cpu_map *cpus = evlist->cpus; 1065 const struct cpu_map *cpus = evlist->cpus;
@@ -1013,6 +1079,9 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
1013 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1079 pr_debug("mmap size %zuB\n", evlist->mmap_len);
1014 mp.mask = evlist->mmap_len - page_size - 1; 1080 mp.mask = evlist->mmap_len - page_size - 1;
1015 1081
1082 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
1083 auxtrace_pages, auxtrace_overwrite);
1084
1016 evlist__for_each(evlist, evsel) { 1085 evlist__for_each(evlist, evsel) {
1017 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1086 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
1018 evsel->sample_id == NULL && 1087 evsel->sample_id == NULL &&
@@ -1026,6 +1095,12 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
1026 return perf_evlist__mmap_per_cpu(evlist, &mp); 1095 return perf_evlist__mmap_per_cpu(evlist, &mp);
1027} 1096}
1028 1097
1098int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
1099 bool overwrite)
1100{
1101 return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
1102}
1103
1029int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1104int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
1030{ 1105{
1031 evlist->threads = thread_map__new_str(target->pid, target->tid, 1106 evlist->threads = thread_map__new_str(target->pid, target->tid,
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index b5cce95d644e..a8489b9d2812 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -1,6 +1,7 @@
1#ifndef __PERF_EVLIST_H 1#ifndef __PERF_EVLIST_H
2#define __PERF_EVLIST_H 1 2#define __PERF_EVLIST_H 1
3 3
4#include <linux/atomic.h>
4#include <linux/list.h> 5#include <linux/list.h>
5#include <api/fd/array.h> 6#include <api/fd/array.h>
6#include <stdio.h> 7#include <stdio.h>
@@ -8,6 +9,7 @@
8#include "event.h" 9#include "event.h"
9#include "evsel.h" 10#include "evsel.h"
10#include "util.h" 11#include "util.h"
12#include "auxtrace.h"
11#include <unistd.h> 13#include <unistd.h>
12 14
13struct pollfd; 15struct pollfd;
@@ -26,8 +28,9 @@ struct record_opts;
26struct perf_mmap { 28struct perf_mmap {
27 void *base; 29 void *base;
28 int mask; 30 int mask;
29 int refcnt; 31 atomic_t refcnt;
30 u64 prev; 32 u64 prev;
33 struct auxtrace_mmap auxtrace_mmap;
31 char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8))); 34 char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
32}; 35};
33 36
@@ -37,6 +40,8 @@ struct perf_evlist {
37 int nr_entries; 40 int nr_entries;
38 int nr_groups; 41 int nr_groups;
39 int nr_mmaps; 42 int nr_mmaps;
43 bool overwrite;
44 bool enabled;
40 size_t mmap_len; 45 size_t mmap_len;
41 int id_pos; 46 int id_pos;
42 int is_pos; 47 int is_pos;
@@ -45,7 +50,6 @@ struct perf_evlist {
45 int cork_fd; 50 int cork_fd;
46 pid_t pid; 51 pid_t pid;
47 } workload; 52 } workload;
48 bool overwrite;
49 struct fdarray pollfd; 53 struct fdarray pollfd;
50 struct perf_mmap *mmap; 54 struct perf_mmap *mmap;
51 struct thread_map *threads; 55 struct thread_map *threads;
@@ -122,16 +126,21 @@ int perf_evlist__start_workload(struct perf_evlist *evlist);
122 126
123struct option; 127struct option;
124 128
129int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
125int perf_evlist__parse_mmap_pages(const struct option *opt, 130int perf_evlist__parse_mmap_pages(const struct option *opt,
126 const char *str, 131 const char *str,
127 int unset); 132 int unset);
128 133
134int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
135 bool overwrite, unsigned int auxtrace_pages,
136 bool auxtrace_overwrite);
129int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 137int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
130 bool overwrite); 138 bool overwrite);
131void perf_evlist__munmap(struct perf_evlist *evlist); 139void perf_evlist__munmap(struct perf_evlist *evlist);
132 140
133void perf_evlist__disable(struct perf_evlist *evlist); 141void perf_evlist__disable(struct perf_evlist *evlist);
134void perf_evlist__enable(struct perf_evlist *evlist); 142void perf_evlist__enable(struct perf_evlist *evlist);
143void perf_evlist__toggle_enable(struct perf_evlist *evlist);
135 144
136int perf_evlist__disable_event(struct perf_evlist *evlist, 145int perf_evlist__disable_event(struct perf_evlist *evlist,
137 struct perf_evsel *evsel); 146 struct perf_evsel *evsel);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 33e3fd8c2e68..33449decf7bd 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -26,6 +26,7 @@
26#include "perf_regs.h" 26#include "perf_regs.h"
27#include "debug.h" 27#include "debug.h"
28#include "trace-event.h" 28#include "trace-event.h"
29#include "stat.h"
29 30
30static struct { 31static struct {
31 bool sample_id_all; 32 bool sample_id_all;
@@ -851,19 +852,6 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
851 return 0; 852 return 0;
852} 853}
853 854
854void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus)
855{
856 memset(evsel->counts, 0, (sizeof(*evsel->counts) +
857 (ncpus * sizeof(struct perf_counts_values))));
858}
859
860int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
861{
862 evsel->counts = zalloc((sizeof(*evsel->counts) +
863 (ncpus * sizeof(struct perf_counts_values))));
864 return evsel->counts != NULL ? 0 : -ENOMEM;
865}
866
867static void perf_evsel__free_fd(struct perf_evsel *evsel) 855static void perf_evsel__free_fd(struct perf_evsel *evsel)
868{ 856{
869 xyarray__delete(evsel->fd); 857 xyarray__delete(evsel->fd);
@@ -891,11 +879,6 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
891 } 879 }
892} 880}
893 881
894void perf_evsel__free_counts(struct perf_evsel *evsel)
895{
896 zfree(&evsel->counts);
897}
898
899void perf_evsel__exit(struct perf_evsel *evsel) 882void perf_evsel__exit(struct perf_evsel *evsel)
900{ 883{
901 assert(list_empty(&evsel->node)); 884 assert(list_empty(&evsel->node));
@@ -1058,7 +1041,7 @@ static void __p_read_format(char *buf, size_t size, u64 value)
1058 1041
1059#define BUF_SIZE 1024 1042#define BUF_SIZE 1024
1060 1043
1061#define p_hex(val) snprintf(buf, BUF_SIZE, "%"PRIx64, (uint64_t)(val)) 1044#define p_hex(val) snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
1062#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val)) 1045#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
1063#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val)) 1046#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
1064#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) 1047#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val)
@@ -1121,6 +1104,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
1121 PRINT_ATTRf(sample_stack_user, p_unsigned); 1104 PRINT_ATTRf(sample_stack_user, p_unsigned);
1122 PRINT_ATTRf(clockid, p_signed); 1105 PRINT_ATTRf(clockid, p_signed);
1123 PRINT_ATTRf(sample_regs_intr, p_hex); 1106 PRINT_ATTRf(sample_regs_intr, p_hex);
1107 PRINT_ATTRf(aux_watermark, p_unsigned);
1124 1108
1125 return ret; 1109 return ret;
1126} 1110}
@@ -2148,7 +2132,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
2148 case EMFILE: 2132 case EMFILE:
2149 return scnprintf(msg, size, "%s", 2133 return scnprintf(msg, size, "%s",
2150 "Too many events are opened.\n" 2134 "Too many events are opened.\n"
2151 "Try again after reducing the number of events."); 2135 "Probably the maximum number of open file descriptors has been reached.\n"
2136 "Hint: Try again after reducing the number of events.\n"
2137 "Hint: Try increasing the limit with 'ulimit -n <limit>'");
2152 case ENODEV: 2138 case ENODEV:
2153 if (target->cpu_list) 2139 if (target->cpu_list)
2154 return scnprintf(msg, size, "%s", 2140 return scnprintf(msg, size, "%s",
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index e486151b0308..bb0579e8a10a 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -73,7 +73,6 @@ struct perf_evsel {
73 char *name; 73 char *name;
74 double scale; 74 double scale;
75 const char *unit; 75 const char *unit;
76 bool snapshot;
77 struct event_format *tp_format; 76 struct event_format *tp_format;
78 union { 77 union {
79 void *priv; 78 void *priv;
@@ -86,6 +85,7 @@ struct perf_evsel {
86 unsigned int sample_size; 85 unsigned int sample_size;
87 int id_pos; 86 int id_pos;
88 int is_pos; 87 int is_pos;
88 bool snapshot;
89 bool supported; 89 bool supported;
90 bool needs_swap; 90 bool needs_swap;
91 bool no_aux_samples; 91 bool no_aux_samples;
@@ -93,11 +93,11 @@ struct perf_evsel {
93 bool system_wide; 93 bool system_wide;
94 bool tracking; 94 bool tracking;
95 bool per_pkg; 95 bool per_pkg;
96 unsigned long *per_pkg_mask;
97 /* parse modifier helper */ 96 /* parse modifier helper */
98 int exclude_GH; 97 int exclude_GH;
99 int nr_members; 98 int nr_members;
100 int sample_read; 99 int sample_read;
100 unsigned long *per_pkg_mask;
101 struct perf_evsel *leader; 101 struct perf_evsel *leader;
102 char *group_name; 102 char *group_name;
103}; 103};
@@ -170,9 +170,6 @@ const char *perf_evsel__group_name(struct perf_evsel *evsel);
170int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size); 170int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size);
171 171
172int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); 172int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
173int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
174void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus);
175void perf_evsel__free_counts(struct perf_evsel *evsel);
176void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); 173void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
177 174
178void __perf_evsel__set_sample_bit(struct perf_evsel *evsel, 175void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 918fd8ae2d80..21a77e7a171e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -869,6 +869,20 @@ static int write_branch_stack(int fd __maybe_unused,
869 return 0; 869 return 0;
870} 870}
871 871
872static int write_auxtrace(int fd, struct perf_header *h,
873 struct perf_evlist *evlist __maybe_unused)
874{
875 struct perf_session *session;
876 int err;
877
878 session = container_of(h, struct perf_session, header);
879
880 err = auxtrace_index__write(fd, &session->auxtrace_index);
881 if (err < 0)
882 pr_err("Failed to write auxtrace index\n");
883 return err;
884}
885
872static void print_hostname(struct perf_header *ph, int fd __maybe_unused, 886static void print_hostname(struct perf_header *ph, int fd __maybe_unused,
873 FILE *fp) 887 FILE *fp)
874{ 888{
@@ -1151,6 +1165,12 @@ static void print_branch_stack(struct perf_header *ph __maybe_unused,
1151 fprintf(fp, "# contains samples with branch stack\n"); 1165 fprintf(fp, "# contains samples with branch stack\n");
1152} 1166}
1153 1167
1168static void print_auxtrace(struct perf_header *ph __maybe_unused,
1169 int fd __maybe_unused, FILE *fp)
1170{
1171 fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n");
1172}
1173
1154static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused, 1174static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
1155 FILE *fp) 1175 FILE *fp)
1156{ 1176{
@@ -1218,9 +1238,8 @@ static int __event_process_build_id(struct build_id_event *bev,
1218 struct perf_session *session) 1238 struct perf_session *session)
1219{ 1239{
1220 int err = -1; 1240 int err = -1;
1221 struct dsos *dsos;
1222 struct machine *machine; 1241 struct machine *machine;
1223 u16 misc; 1242 u16 cpumode;
1224 struct dso *dso; 1243 struct dso *dso;
1225 enum dso_kernel_type dso_type; 1244 enum dso_kernel_type dso_type;
1226 1245
@@ -1228,39 +1247,37 @@ static int __event_process_build_id(struct build_id_event *bev,
1228 if (!machine) 1247 if (!machine)
1229 goto out; 1248 goto out;
1230 1249
1231 misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 1250 cpumode = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1232 1251
1233 switch (misc) { 1252 switch (cpumode) {
1234 case PERF_RECORD_MISC_KERNEL: 1253 case PERF_RECORD_MISC_KERNEL:
1235 dso_type = DSO_TYPE_KERNEL; 1254 dso_type = DSO_TYPE_KERNEL;
1236 dsos = &machine->kernel_dsos;
1237 break; 1255 break;
1238 case PERF_RECORD_MISC_GUEST_KERNEL: 1256 case PERF_RECORD_MISC_GUEST_KERNEL:
1239 dso_type = DSO_TYPE_GUEST_KERNEL; 1257 dso_type = DSO_TYPE_GUEST_KERNEL;
1240 dsos = &machine->kernel_dsos;
1241 break; 1258 break;
1242 case PERF_RECORD_MISC_USER: 1259 case PERF_RECORD_MISC_USER:
1243 case PERF_RECORD_MISC_GUEST_USER: 1260 case PERF_RECORD_MISC_GUEST_USER:
1244 dso_type = DSO_TYPE_USER; 1261 dso_type = DSO_TYPE_USER;
1245 dsos = &machine->user_dsos;
1246 break; 1262 break;
1247 default: 1263 default:
1248 goto out; 1264 goto out;
1249 } 1265 }
1250 1266
1251 dso = __dsos__findnew(dsos, filename); 1267 dso = machine__findnew_dso(machine, filename);
1252 if (dso != NULL) { 1268 if (dso != NULL) {
1253 char sbuild_id[BUILD_ID_SIZE * 2 + 1]; 1269 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
1254 1270
1255 dso__set_build_id(dso, &bev->build_id); 1271 dso__set_build_id(dso, &bev->build_id);
1256 1272
1257 if (!is_kernel_module(filename)) 1273 if (!is_kernel_module(filename, cpumode))
1258 dso->kernel = dso_type; 1274 dso->kernel = dso_type;
1259 1275
1260 build_id__sprintf(dso->build_id, sizeof(dso->build_id), 1276 build_id__sprintf(dso->build_id, sizeof(dso->build_id),
1261 sbuild_id); 1277 sbuild_id);
1262 pr_debug("build id event received for %s: %s\n", 1278 pr_debug("build id event received for %s: %s\n",
1263 dso->long_name, sbuild_id); 1279 dso->long_name, sbuild_id);
1280 dso__put(dso);
1264 } 1281 }
1265 1282
1266 err = 0; 1283 err = 0;
@@ -1821,6 +1838,22 @@ out_free:
1821 return ret; 1838 return ret;
1822} 1839}
1823 1840
1841static int process_auxtrace(struct perf_file_section *section,
1842 struct perf_header *ph, int fd,
1843 void *data __maybe_unused)
1844{
1845 struct perf_session *session;
1846 int err;
1847
1848 session = container_of(ph, struct perf_session, header);
1849
1850 err = auxtrace_index__process(fd, section->size, session,
1851 ph->needs_swap);
1852 if (err < 0)
1853 pr_err("Failed to process auxtrace index\n");
1854 return err;
1855}
1856
1824struct feature_ops { 1857struct feature_ops {
1825 int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); 1858 int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
1826 void (*print)(struct perf_header *h, int fd, FILE *fp); 1859 void (*print)(struct perf_header *h, int fd, FILE *fp);
@@ -1861,6 +1894,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
1861 FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), 1894 FEAT_OPA(HEADER_BRANCH_STACK, branch_stack),
1862 FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings), 1895 FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings),
1863 FEAT_OPP(HEADER_GROUP_DESC, group_desc), 1896 FEAT_OPP(HEADER_GROUP_DESC, group_desc),
1897 FEAT_OPP(HEADER_AUXTRACE, auxtrace),
1864}; 1898};
1865 1899
1866struct header_print_data { 1900struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 3bb90ac172a1..d4d57962c591 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -30,6 +30,7 @@ enum {
30 HEADER_BRANCH_STACK, 30 HEADER_BRANCH_STACK,
31 HEADER_PMU_MAPPINGS, 31 HEADER_PMU_MAPPINGS,
32 HEADER_GROUP_DESC, 32 HEADER_GROUP_DESC,
33 HEADER_AUXTRACE,
33 HEADER_LAST_FEATURE, 34 HEADER_LAST_FEATURE,
34 HEADER_FEAT_BITS = 256, 35 HEADER_FEAT_BITS = 256,
35}; 36};
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index cc22b9158b93..6f28d53d4e46 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -313,8 +313,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
313 memset(&he->stat, 0, sizeof(he->stat)); 313 memset(&he->stat, 0, sizeof(he->stat));
314 } 314 }
315 315
316 if (he->ms.map) 316 map__get(he->ms.map);
317 he->ms.map->referenced = true;
318 317
319 if (he->branch_info) { 318 if (he->branch_info) {
320 /* 319 /*
@@ -324,6 +323,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
324 */ 323 */
325 he->branch_info = malloc(sizeof(*he->branch_info)); 324 he->branch_info = malloc(sizeof(*he->branch_info));
326 if (he->branch_info == NULL) { 325 if (he->branch_info == NULL) {
326 map__zput(he->ms.map);
327 free(he->stat_acc); 327 free(he->stat_acc);
328 free(he); 328 free(he);
329 return NULL; 329 return NULL;
@@ -332,17 +332,13 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
332 memcpy(he->branch_info, template->branch_info, 332 memcpy(he->branch_info, template->branch_info,
333 sizeof(*he->branch_info)); 333 sizeof(*he->branch_info));
334 334
335 if (he->branch_info->from.map) 335 map__get(he->branch_info->from.map);
336 he->branch_info->from.map->referenced = true; 336 map__get(he->branch_info->to.map);
337 if (he->branch_info->to.map)
338 he->branch_info->to.map->referenced = true;
339 } 337 }
340 338
341 if (he->mem_info) { 339 if (he->mem_info) {
342 if (he->mem_info->iaddr.map) 340 map__get(he->mem_info->iaddr.map);
343 he->mem_info->iaddr.map->referenced = true; 341 map__get(he->mem_info->daddr.map);
344 if (he->mem_info->daddr.map)
345 he->mem_info->daddr.map->referenced = true;
346 } 342 }
347 343
348 if (symbol_conf.use_callchain) 344 if (symbol_conf.use_callchain)
@@ -362,10 +358,10 @@ static u8 symbol__parent_filter(const struct symbol *parent)
362 return 0; 358 return 0;
363} 359}
364 360
365static struct hist_entry *add_hist_entry(struct hists *hists, 361static struct hist_entry *hists__findnew_entry(struct hists *hists,
366 struct hist_entry *entry, 362 struct hist_entry *entry,
367 struct addr_location *al, 363 struct addr_location *al,
368 bool sample_self) 364 bool sample_self)
369{ 365{
370 struct rb_node **p; 366 struct rb_node **p;
371 struct rb_node *parent = NULL; 367 struct rb_node *parent = NULL;
@@ -407,9 +403,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
407 * the history counter to increment. 403 * the history counter to increment.
408 */ 404 */
409 if (he->ms.map != entry->ms.map) { 405 if (he->ms.map != entry->ms.map) {
410 he->ms.map = entry->ms.map; 406 map__put(he->ms.map);
411 if (he->ms.map) 407 he->ms.map = map__get(entry->ms.map);
412 he->ms.map->referenced = true;
413 } 408 }
414 goto out; 409 goto out;
415 } 410 }
@@ -468,7 +463,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
468 .transaction = transaction, 463 .transaction = transaction,
469 }; 464 };
470 465
471 return add_hist_entry(hists, &entry, al, sample_self); 466 return hists__findnew_entry(hists, &entry, al, sample_self);
472} 467}
473 468
474static int 469static int
@@ -548,9 +543,9 @@ iter_finish_mem_entry(struct hist_entry_iter *iter,
548 543
549out: 544out:
550 /* 545 /*
551 * We don't need to free iter->priv (mem_info) here since 546 * We don't need to free iter->priv (mem_info) here since the mem info
552 * the mem info was either already freed in add_hist_entry() or 547 * was either already freed in hists__findnew_entry() or passed to a
553 * passed to a new hist entry by hist_entry__new(). 548 * new hist entry by hist_entry__new().
554 */ 549 */
555 iter->priv = NULL; 550 iter->priv = NULL;
556 551
@@ -851,19 +846,15 @@ const struct hist_iter_ops hist_iter_cumulative = {
851}; 846};
852 847
853int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, 848int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
854 struct perf_evsel *evsel, struct perf_sample *sample,
855 int max_stack_depth, void *arg) 849 int max_stack_depth, void *arg)
856{ 850{
857 int err, err2; 851 int err, err2;
858 852
859 err = sample__resolve_callchain(sample, &iter->parent, evsel, al, 853 err = sample__resolve_callchain(iter->sample, &iter->parent,
860 max_stack_depth); 854 iter->evsel, al, max_stack_depth);
861 if (err) 855 if (err)
862 return err; 856 return err;
863 857
864 iter->evsel = evsel;
865 iter->sample = sample;
866
867 err = iter->ops->prepare_entry(iter, al); 858 err = iter->ops->prepare_entry(iter, al);
868 if (err) 859 if (err)
869 goto out; 860 goto out;
@@ -937,8 +928,20 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
937void hist_entry__delete(struct hist_entry *he) 928void hist_entry__delete(struct hist_entry *he)
938{ 929{
939 thread__zput(he->thread); 930 thread__zput(he->thread);
940 zfree(&he->branch_info); 931 map__zput(he->ms.map);
941 zfree(&he->mem_info); 932
933 if (he->branch_info) {
934 map__zput(he->branch_info->from.map);
935 map__zput(he->branch_info->to.map);
936 zfree(&he->branch_info);
937 }
938
939 if (he->mem_info) {
940 map__zput(he->mem_info->iaddr.map);
941 map__zput(he->mem_info->daddr.map);
942 zfree(&he->mem_info);
943 }
944
942 zfree(&he->stat_acc); 945 zfree(&he->stat_acc);
943 free_srcline(he->srcline); 946 free_srcline(he->srcline);
944 free_callchain(he->callchain); 947 free_callchain(he->callchain);
@@ -1163,7 +1166,7 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h
1163 return; 1166 return;
1164 1167
1165 /* force fold unfiltered entry for simplicity */ 1168 /* force fold unfiltered entry for simplicity */
1166 h->ms.unfolded = false; 1169 h->unfolded = false;
1167 h->row_offset = 0; 1170 h->row_offset = 0;
1168 h->nr_rows = 0; 1171 h->nr_rows = 0;
1169 1172
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 9f31b89a527a..5ed8d9c22981 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -111,7 +111,6 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
111 u64 weight, u64 transaction, 111 u64 weight, u64 transaction,
112 bool sample_self); 112 bool sample_self);
113int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, 113int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
114 struct perf_evsel *evsel, struct perf_sample *sample,
115 int max_stack_depth, void *arg); 114 int max_stack_depth, void *arg);
116 115
117int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); 116int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
diff --git a/tools/perf/util/include/linux/poison.h b/tools/perf/util/include/linux/poison.h
deleted file mode 100644
index fef6dbc9ce13..000000000000
--- a/tools/perf/util/include/linux/poison.h
+++ /dev/null
@@ -1 +0,0 @@
1#include "../../../../include/linux/poison.h"
diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h
index 2a030c5af3aa..f06d89f0b867 100644
--- a/tools/perf/util/include/linux/rbtree.h
+++ b/tools/perf/util/include/linux/rbtree.h
@@ -1,2 +1,16 @@
1#ifndef __TOOLS_LINUX_PERF_RBTREE_H
2#define __TOOLS_LINUX_PERF_RBTREE_H
1#include <stdbool.h> 3#include <stdbool.h>
2#include "../../../../include/linux/rbtree.h" 4#include "../../../../include/linux/rbtree.h"
5
6/*
7 * Handy for checking that we are not deleting an entry that is
8 * already in a list, found in block/{blk-throttle,cfq-iosched}.c,
9 * probably should be moved to lib/rbtree.c...
10 */
11static inline void rb_erase_init(struct rb_node *n, struct rb_root *root)
12{
13 rb_erase(n, root);
14 RB_CLEAR_NODE(n);
15}
16#endif /* __TOOLS_LINUX_PERF_RBTREE_H */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 527e032e24f6..4744673aff1b 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -14,20 +14,23 @@
14#include "unwind.h" 14#include "unwind.h"
15#include "linux/hash.h" 15#include "linux/hash.h"
16 16
17static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
18
17static void dsos__init(struct dsos *dsos) 19static void dsos__init(struct dsos *dsos)
18{ 20{
19 INIT_LIST_HEAD(&dsos->head); 21 INIT_LIST_HEAD(&dsos->head);
20 dsos->root = RB_ROOT; 22 dsos->root = RB_ROOT;
23 pthread_rwlock_init(&dsos->lock, NULL);
21} 24}
22 25
23int machine__init(struct machine *machine, const char *root_dir, pid_t pid) 26int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
24{ 27{
25 map_groups__init(&machine->kmaps, machine); 28 map_groups__init(&machine->kmaps, machine);
26 RB_CLEAR_NODE(&machine->rb_node); 29 RB_CLEAR_NODE(&machine->rb_node);
27 dsos__init(&machine->user_dsos); 30 dsos__init(&machine->dsos);
28 dsos__init(&machine->kernel_dsos);
29 31
30 machine->threads = RB_ROOT; 32 machine->threads = RB_ROOT;
33 pthread_rwlock_init(&machine->threads_lock, NULL);
31 INIT_LIST_HEAD(&machine->dead_threads); 34 INIT_LIST_HEAD(&machine->dead_threads);
32 machine->last_match = NULL; 35 machine->last_match = NULL;
33 36
@@ -54,6 +57,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
54 57
55 snprintf(comm, sizeof(comm), "[guest/%d]", pid); 58 snprintf(comm, sizeof(comm), "[guest/%d]", pid);
56 thread__set_comm(thread, comm, 0); 59 thread__set_comm(thread, comm, 0);
60 thread__put(thread);
57 } 61 }
58 62
59 machine->current_tid = NULL; 63 machine->current_tid = NULL;
@@ -78,37 +82,50 @@ out_delete:
78 return NULL; 82 return NULL;
79} 83}
80 84
81static void dsos__delete(struct dsos *dsos) 85static void dsos__purge(struct dsos *dsos)
82{ 86{
83 struct dso *pos, *n; 87 struct dso *pos, *n;
84 88
89 pthread_rwlock_wrlock(&dsos->lock);
90
85 list_for_each_entry_safe(pos, n, &dsos->head, node) { 91 list_for_each_entry_safe(pos, n, &dsos->head, node) {
86 RB_CLEAR_NODE(&pos->rb_node); 92 RB_CLEAR_NODE(&pos->rb_node);
87 list_del(&pos->node); 93 list_del_init(&pos->node);
88 dso__delete(pos); 94 dso__put(pos);
89 } 95 }
96
97 pthread_rwlock_unlock(&dsos->lock);
98}
99
100static void dsos__exit(struct dsos *dsos)
101{
102 dsos__purge(dsos);
103 pthread_rwlock_destroy(&dsos->lock);
90} 104}
91 105
92void machine__delete_threads(struct machine *machine) 106void machine__delete_threads(struct machine *machine)
93{ 107{
94 struct rb_node *nd = rb_first(&machine->threads); 108 struct rb_node *nd;
95 109
110 pthread_rwlock_wrlock(&machine->threads_lock);
111 nd = rb_first(&machine->threads);
96 while (nd) { 112 while (nd) {
97 struct thread *t = rb_entry(nd, struct thread, rb_node); 113 struct thread *t = rb_entry(nd, struct thread, rb_node);
98 114
99 nd = rb_next(nd); 115 nd = rb_next(nd);
100 machine__remove_thread(machine, t); 116 __machine__remove_thread(machine, t, false);
101 } 117 }
118 pthread_rwlock_unlock(&machine->threads_lock);
102} 119}
103 120
104void machine__exit(struct machine *machine) 121void machine__exit(struct machine *machine)
105{ 122{
106 map_groups__exit(&machine->kmaps); 123 map_groups__exit(&machine->kmaps);
107 dsos__delete(&machine->user_dsos); 124 dsos__exit(&machine->dsos);
108 dsos__delete(&machine->kernel_dsos); 125 machine__exit_vdso(machine);
109 vdso__exit(machine);
110 zfree(&machine->root_dir); 126 zfree(&machine->root_dir);
111 zfree(&machine->current_tid); 127 zfree(&machine->current_tid);
128 pthread_rwlock_destroy(&machine->threads_lock);
112} 129}
113 130
114void machine__delete(struct machine *machine) 131void machine__delete(struct machine *machine)
@@ -303,7 +320,7 @@ static void machine__update_thread_pid(struct machine *machine,
303 if (th->pid_ == th->tid) 320 if (th->pid_ == th->tid)
304 return; 321 return;
305 322
306 leader = machine__findnew_thread(machine, th->pid_, th->pid_); 323 leader = __machine__findnew_thread(machine, th->pid_, th->pid_);
307 if (!leader) 324 if (!leader)
308 goto out_err; 325 goto out_err;
309 326
@@ -325,7 +342,7 @@ static void machine__update_thread_pid(struct machine *machine,
325 if (!map_groups__empty(th->mg)) 342 if (!map_groups__empty(th->mg))
326 pr_err("Discarding thread maps for %d:%d\n", 343 pr_err("Discarding thread maps for %d:%d\n",
327 th->pid_, th->tid); 344 th->pid_, th->tid);
328 map_groups__delete(th->mg); 345 map_groups__put(th->mg);
329 } 346 }
330 347
331 th->mg = map_groups__get(leader->mg); 348 th->mg = map_groups__get(leader->mg);
@@ -336,9 +353,9 @@ out_err:
336 pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid); 353 pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid);
337} 354}
338 355
339static struct thread *__machine__findnew_thread(struct machine *machine, 356static struct thread *____machine__findnew_thread(struct machine *machine,
340 pid_t pid, pid_t tid, 357 pid_t pid, pid_t tid,
341 bool create) 358 bool create)
342{ 359{
343 struct rb_node **p = &machine->threads.rb_node; 360 struct rb_node **p = &machine->threads.rb_node;
344 struct rb_node *parent = NULL; 361 struct rb_node *parent = NULL;
@@ -356,7 +373,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
356 return th; 373 return th;
357 } 374 }
358 375
359 thread__zput(machine->last_match); 376 machine->last_match = NULL;
360 } 377 }
361 378
362 while (*p != NULL) { 379 while (*p != NULL) {
@@ -364,7 +381,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
364 th = rb_entry(parent, struct thread, rb_node); 381 th = rb_entry(parent, struct thread, rb_node);
365 382
366 if (th->tid == tid) { 383 if (th->tid == tid) {
367 machine->last_match = thread__get(th); 384 machine->last_match = th;
368 machine__update_thread_pid(machine, th, pid); 385 machine__update_thread_pid(machine, th, pid);
369 return th; 386 return th;
370 } 387 }
@@ -392,7 +409,8 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
392 * leader and that would screwed the rb tree. 409 * leader and that would screwed the rb tree.
393 */ 410 */
394 if (thread__init_map_groups(th, machine)) { 411 if (thread__init_map_groups(th, machine)) {
395 rb_erase(&th->rb_node, &machine->threads); 412 rb_erase_init(&th->rb_node, &machine->threads);
413 RB_CLEAR_NODE(&th->rb_node);
396 thread__delete(th); 414 thread__delete(th);
397 return NULL; 415 return NULL;
398 } 416 }
@@ -400,22 +418,36 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
400 * It is now in the rbtree, get a ref 418 * It is now in the rbtree, get a ref
401 */ 419 */
402 thread__get(th); 420 thread__get(th);
403 machine->last_match = thread__get(th); 421 machine->last_match = th;
404 } 422 }
405 423
406 return th; 424 return th;
407} 425}
408 426
427struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
428{
429 return ____machine__findnew_thread(machine, pid, tid, true);
430}
431
409struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, 432struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
410 pid_t tid) 433 pid_t tid)
411{ 434{
412 return __machine__findnew_thread(machine, pid, tid, true); 435 struct thread *th;
436
437 pthread_rwlock_wrlock(&machine->threads_lock);
438 th = thread__get(__machine__findnew_thread(machine, pid, tid));
439 pthread_rwlock_unlock(&machine->threads_lock);
440 return th;
413} 441}
414 442
415struct thread *machine__find_thread(struct machine *machine, pid_t pid, 443struct thread *machine__find_thread(struct machine *machine, pid_t pid,
416 pid_t tid) 444 pid_t tid)
417{ 445{
418 return __machine__findnew_thread(machine, pid, tid, false); 446 struct thread *th;
447 pthread_rwlock_rdlock(&machine->threads_lock);
448 th = thread__get(____machine__findnew_thread(machine, pid, tid, false));
449 pthread_rwlock_unlock(&machine->threads_lock);
450 return th;
419} 451}
420 452
421struct comm *machine__thread_exec_comm(struct machine *machine, 453struct comm *machine__thread_exec_comm(struct machine *machine,
@@ -434,6 +466,7 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
434 event->comm.pid, 466 event->comm.pid,
435 event->comm.tid); 467 event->comm.tid);
436 bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC; 468 bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC;
469 int err = 0;
437 470
438 if (exec) 471 if (exec)
439 machine->comm_exec = true; 472 machine->comm_exec = true;
@@ -444,10 +477,12 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
444 if (thread == NULL || 477 if (thread == NULL ||
445 __thread__set_comm(thread, event->comm.comm, sample->time, exec)) { 478 __thread__set_comm(thread, event->comm.comm, sample->time, exec)) {
446 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); 479 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
447 return -1; 480 err = -1;
448 } 481 }
449 482
450 return 0; 483 thread__put(thread);
484
485 return err;
451} 486}
452 487
453int machine__process_lost_event(struct machine *machine __maybe_unused, 488int machine__process_lost_event(struct machine *machine __maybe_unused,
@@ -458,17 +493,27 @@ int machine__process_lost_event(struct machine *machine __maybe_unused,
458 return 0; 493 return 0;
459} 494}
460 495
461static struct dso* 496int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
462machine__module_dso(struct machine *machine, struct kmod_path *m, 497 union perf_event *event, struct perf_sample *sample)
463 const char *filename) 498{
499 dump_printf(": id:%" PRIu64 ": lost samples :%" PRIu64 "\n",
500 sample->id, event->lost_samples.lost);
501 return 0;
502}
503
504static struct dso *machine__findnew_module_dso(struct machine *machine,
505 struct kmod_path *m,
506 const char *filename)
464{ 507{
465 struct dso *dso; 508 struct dso *dso;
466 509
467 dso = dsos__find(&machine->kernel_dsos, m->name, true); 510 pthread_rwlock_wrlock(&machine->dsos.lock);
511
512 dso = __dsos__find(&machine->dsos, m->name, true);
468 if (!dso) { 513 if (!dso) {
469 dso = dsos__addnew(&machine->kernel_dsos, m->name); 514 dso = __dsos__addnew(&machine->dsos, m->name);
470 if (dso == NULL) 515 if (dso == NULL)
471 return NULL; 516 goto out_unlock;
472 517
473 if (machine__is_host(machine)) 518 if (machine__is_host(machine))
474 dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; 519 dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
@@ -483,11 +528,30 @@ machine__module_dso(struct machine *machine, struct kmod_path *m,
483 dso__set_long_name(dso, strdup(filename), true); 528 dso__set_long_name(dso, strdup(filename), true);
484 } 529 }
485 530
531 dso__get(dso);
532out_unlock:
533 pthread_rwlock_unlock(&machine->dsos.lock);
486 return dso; 534 return dso;
487} 535}
488 536
489struct map *machine__new_module(struct machine *machine, u64 start, 537int machine__process_aux_event(struct machine *machine __maybe_unused,
490 const char *filename) 538 union perf_event *event)
539{
540 if (dump_trace)
541 perf_event__fprintf_aux(event, stdout);
542 return 0;
543}
544
545int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
546 union perf_event *event)
547{
548 if (dump_trace)
549 perf_event__fprintf_itrace_start(event, stdout);
550 return 0;
551}
552
553struct map *machine__findnew_module_map(struct machine *machine, u64 start,
554 const char *filename)
491{ 555{
492 struct map *map = NULL; 556 struct map *map = NULL;
493 struct dso *dso; 557 struct dso *dso;
@@ -501,7 +565,7 @@ struct map *machine__new_module(struct machine *machine, u64 start,
501 if (map) 565 if (map)
502 goto out; 566 goto out;
503 567
504 dso = machine__module_dso(machine, &m, filename); 568 dso = machine__findnew_module_dso(machine, &m, filename);
505 if (dso == NULL) 569 if (dso == NULL)
506 goto out; 570 goto out;
507 571
@@ -519,13 +583,11 @@ out:
519size_t machines__fprintf_dsos(struct machines *machines, FILE *fp) 583size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
520{ 584{
521 struct rb_node *nd; 585 struct rb_node *nd;
522 size_t ret = __dsos__fprintf(&machines->host.kernel_dsos.head, fp) + 586 size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp);
523 __dsos__fprintf(&machines->host.user_dsos.head, fp);
524 587
525 for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { 588 for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
526 struct machine *pos = rb_entry(nd, struct machine, rb_node); 589 struct machine *pos = rb_entry(nd, struct machine, rb_node);
527 ret += __dsos__fprintf(&pos->kernel_dsos.head, fp); 590 ret += __dsos__fprintf(&pos->dsos.head, fp);
528 ret += __dsos__fprintf(&pos->user_dsos.head, fp);
529 } 591 }
530 592
531 return ret; 593 return ret;
@@ -534,8 +596,7 @@ size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
534size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp, 596size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
535 bool (skip)(struct dso *dso, int parm), int parm) 597 bool (skip)(struct dso *dso, int parm), int parm)
536{ 598{
537 return __dsos__fprintf_buildid(&m->kernel_dsos.head, fp, skip, parm) + 599 return __dsos__fprintf_buildid(&m->dsos.head, fp, skip, parm);
538 __dsos__fprintf_buildid(&m->user_dsos.head, fp, skip, parm);
539} 600}
540 601
541size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, 602size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
@@ -575,12 +636,16 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
575 size_t ret = 0; 636 size_t ret = 0;
576 struct rb_node *nd; 637 struct rb_node *nd;
577 638
639 pthread_rwlock_rdlock(&machine->threads_lock);
640
578 for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { 641 for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
579 struct thread *pos = rb_entry(nd, struct thread, rb_node); 642 struct thread *pos = rb_entry(nd, struct thread, rb_node);
580 643
581 ret += thread__fprintf(pos, fp); 644 ret += thread__fprintf(pos, fp);
582 } 645 }
583 646
647 pthread_rwlock_unlock(&machine->threads_lock);
648
584 return ret; 649 return ret;
585} 650}
586 651
@@ -594,9 +659,8 @@ static struct dso *machine__get_kernel(struct machine *machine)
594 if (!vmlinux_name) 659 if (!vmlinux_name)
595 vmlinux_name = "[kernel.kallsyms]"; 660 vmlinux_name = "[kernel.kallsyms]";
596 661
597 kernel = dso__kernel_findnew(machine, vmlinux_name, 662 kernel = machine__findnew_kernel(machine, vmlinux_name,
598 "[kernel]", 663 "[kernel]", DSO_TYPE_KERNEL);
599 DSO_TYPE_KERNEL);
600 } else { 664 } else {
601 char bf[PATH_MAX]; 665 char bf[PATH_MAX];
602 666
@@ -606,9 +670,9 @@ static struct dso *machine__get_kernel(struct machine *machine)
606 vmlinux_name = machine__mmap_name(machine, bf, 670 vmlinux_name = machine__mmap_name(machine, bf,
607 sizeof(bf)); 671 sizeof(bf));
608 672
609 kernel = dso__kernel_findnew(machine, vmlinux_name, 673 kernel = machine__findnew_kernel(machine, vmlinux_name,
610 "[guest.kernel]", 674 "[guest.kernel]",
611 DSO_TYPE_GUEST_KERNEL); 675 DSO_TYPE_GUEST_KERNEL);
612 } 676 }
613 677
614 if (kernel != NULL && (!kernel->has_build_id)) 678 if (kernel != NULL && (!kernel->has_build_id))
@@ -713,7 +777,6 @@ void machine__destroy_kernel_maps(struct machine *machine)
713 kmap->ref_reloc_sym = NULL; 777 kmap->ref_reloc_sym = NULL;
714 } 778 }
715 779
716 map__delete(machine->vmlinux_maps[type]);
717 machine->vmlinux_maps[type] = NULL; 780 machine->vmlinux_maps[type] = NULL;
718 } 781 }
719} 782}
@@ -970,7 +1033,7 @@ static int machine__create_module(void *arg, const char *name, u64 start)
970 struct machine *machine = arg; 1033 struct machine *machine = arg;
971 struct map *map; 1034 struct map *map;
972 1035
973 map = machine__new_module(machine, start, name); 1036 map = machine__findnew_module_map(machine, start, name);
974 if (map == NULL) 1037 if (map == NULL)
975 return -1; 1038 return -1;
976 1039
@@ -1062,7 +1125,7 @@ static bool machine__uses_kcore(struct machine *machine)
1062{ 1125{
1063 struct dso *dso; 1126 struct dso *dso;
1064 1127
1065 list_for_each_entry(dso, &machine->kernel_dsos.head, node) { 1128 list_for_each_entry(dso, &machine->dsos.head, node) {
1066 if (dso__is_kcore(dso)) 1129 if (dso__is_kcore(dso))
1067 return true; 1130 return true;
1068 } 1131 }
@@ -1093,8 +1156,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
1093 strlen(kmmap_prefix) - 1) == 0; 1156 strlen(kmmap_prefix) - 1) == 0;
1094 if (event->mmap.filename[0] == '/' || 1157 if (event->mmap.filename[0] == '/' ||
1095 (!is_kernel_mmap && event->mmap.filename[0] == '[')) { 1158 (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
1096 map = machine__new_module(machine, event->mmap.start, 1159 map = machine__findnew_module_map(machine, event->mmap.start,
1097 event->mmap.filename); 1160 event->mmap.filename);
1098 if (map == NULL) 1161 if (map == NULL)
1099 goto out_problem; 1162 goto out_problem;
1100 1163
@@ -1109,23 +1172,48 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
1109 struct dso *kernel = NULL; 1172 struct dso *kernel = NULL;
1110 struct dso *dso; 1173 struct dso *dso;
1111 1174
1112 list_for_each_entry(dso, &machine->kernel_dsos.head, node) { 1175 pthread_rwlock_rdlock(&machine->dsos.lock);
1113 if (is_kernel_module(dso->long_name)) 1176
1177 list_for_each_entry(dso, &machine->dsos.head, node) {
1178
1179 /*
1180 * The cpumode passed to is_kernel_module is not the
1181 * cpumode of *this* event. If we insist on passing
1182 * correct cpumode to is_kernel_module, we should
1183 * record the cpumode when we adding this dso to the
1184 * linked list.
1185 *
1186 * However we don't really need passing correct
1187 * cpumode. We know the correct cpumode must be kernel
1188 * mode (if not, we should not link it onto kernel_dsos
1189 * list).
1190 *
1191 * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN.
1192 * is_kernel_module() treats it as a kernel cpumode.
1193 */
1194
1195 if (!dso->kernel ||
1196 is_kernel_module(dso->long_name,
1197 PERF_RECORD_MISC_CPUMODE_UNKNOWN))
1114 continue; 1198 continue;
1115 1199
1200
1116 kernel = dso; 1201 kernel = dso;
1117 break; 1202 break;
1118 } 1203 }
1119 1204
1205 pthread_rwlock_unlock(&machine->dsos.lock);
1206
1120 if (kernel == NULL) 1207 if (kernel == NULL)
1121 kernel = __dsos__findnew(&machine->kernel_dsos, 1208 kernel = machine__findnew_dso(machine, kmmap_prefix);
1122 kmmap_prefix);
1123 if (kernel == NULL) 1209 if (kernel == NULL)
1124 goto out_problem; 1210 goto out_problem;
1125 1211
1126 kernel->kernel = kernel_type; 1212 kernel->kernel = kernel_type;
1127 if (__machine__create_kernel_maps(machine, kernel) < 0) 1213 if (__machine__create_kernel_maps(machine, kernel) < 0) {
1214 dso__put(kernel);
1128 goto out_problem; 1215 goto out_problem;
1216 }
1129 1217
1130 if (strstr(kernel->long_name, "vmlinux")) 1218 if (strstr(kernel->long_name, "vmlinux"))
1131 dso__set_short_name(kernel, "[kernel.vmlinux]", false); 1219 dso__set_short_name(kernel, "[kernel.vmlinux]", false);
@@ -1197,11 +1285,15 @@ int machine__process_mmap2_event(struct machine *machine,
1197 event->mmap2.filename, type, thread); 1285 event->mmap2.filename, type, thread);
1198 1286
1199 if (map == NULL) 1287 if (map == NULL)
1200 goto out_problem; 1288 goto out_problem_map;
1201 1289
1202 thread__insert_map(thread, map); 1290 thread__insert_map(thread, map);
1291 thread__put(thread);
1292 map__put(map);
1203 return 0; 1293 return 0;
1204 1294
1295out_problem_map:
1296 thread__put(thread);
1205out_problem: 1297out_problem:
1206 dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n"); 1298 dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n");
1207 return 0; 1299 return 0;
@@ -1244,31 +1336,46 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
1244 type, thread); 1336 type, thread);
1245 1337
1246 if (map == NULL) 1338 if (map == NULL)
1247 goto out_problem; 1339 goto out_problem_map;
1248 1340
1249 thread__insert_map(thread, map); 1341 thread__insert_map(thread, map);
1342 thread__put(thread);
1343 map__put(map);
1250 return 0; 1344 return 0;
1251 1345
1346out_problem_map:
1347 thread__put(thread);
1252out_problem: 1348out_problem:
1253 dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); 1349 dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
1254 return 0; 1350 return 0;
1255} 1351}
1256 1352
1257void machine__remove_thread(struct machine *machine, struct thread *th) 1353static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
1258{ 1354{
1259 if (machine->last_match == th) 1355 if (machine->last_match == th)
1260 thread__zput(machine->last_match); 1356 machine->last_match = NULL;
1261 1357
1262 rb_erase(&th->rb_node, &machine->threads); 1358 BUG_ON(atomic_read(&th->refcnt) == 0);
1359 if (lock)
1360 pthread_rwlock_wrlock(&machine->threads_lock);
1361 rb_erase_init(&th->rb_node, &machine->threads);
1362 RB_CLEAR_NODE(&th->rb_node);
1263 /* 1363 /*
1264 * Move it first to the dead_threads list, then drop the reference, 1364 * Move it first to the dead_threads list, then drop the reference,
1265 * if this is the last reference, then the thread__delete destructor 1365 * if this is the last reference, then the thread__delete destructor
1266 * will be called and we will remove it from the dead_threads list. 1366 * will be called and we will remove it from the dead_threads list.
1267 */ 1367 */
1268 list_add_tail(&th->node, &machine->dead_threads); 1368 list_add_tail(&th->node, &machine->dead_threads);
1369 if (lock)
1370 pthread_rwlock_unlock(&machine->threads_lock);
1269 thread__put(th); 1371 thread__put(th);
1270} 1372}
1271 1373
1374void machine__remove_thread(struct machine *machine, struct thread *th)
1375{
1376 return __machine__remove_thread(machine, th, true);
1377}
1378
1272int machine__process_fork_event(struct machine *machine, union perf_event *event, 1379int machine__process_fork_event(struct machine *machine, union perf_event *event,
1273 struct perf_sample *sample) 1380 struct perf_sample *sample)
1274{ 1381{
@@ -1278,10 +1385,13 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
1278 struct thread *parent = machine__findnew_thread(machine, 1385 struct thread *parent = machine__findnew_thread(machine,
1279 event->fork.ppid, 1386 event->fork.ppid,
1280 event->fork.ptid); 1387 event->fork.ptid);
1388 int err = 0;
1281 1389
1282 /* if a thread currently exists for the thread id remove it */ 1390 /* if a thread currently exists for the thread id remove it */
1283 if (thread != NULL) 1391 if (thread != NULL) {
1284 machine__remove_thread(machine, thread); 1392 machine__remove_thread(machine, thread);
1393 thread__put(thread);
1394 }
1285 1395
1286 thread = machine__findnew_thread(machine, event->fork.pid, 1396 thread = machine__findnew_thread(machine, event->fork.pid,
1287 event->fork.tid); 1397 event->fork.tid);
@@ -1291,10 +1401,12 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
1291 if (thread == NULL || parent == NULL || 1401 if (thread == NULL || parent == NULL ||
1292 thread__fork(thread, parent, sample->time) < 0) { 1402 thread__fork(thread, parent, sample->time) < 0) {
1293 dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); 1403 dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
1294 return -1; 1404 err = -1;
1295 } 1405 }
1406 thread__put(thread);
1407 thread__put(parent);
1296 1408
1297 return 0; 1409 return err;
1298} 1410}
1299 1411
1300int machine__process_exit_event(struct machine *machine, union perf_event *event, 1412int machine__process_exit_event(struct machine *machine, union perf_event *event,
@@ -1307,8 +1419,10 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
1307 if (dump_trace) 1419 if (dump_trace)
1308 perf_event__fprintf_task(event, stdout); 1420 perf_event__fprintf_task(event, stdout);
1309 1421
1310 if (thread != NULL) 1422 if (thread != NULL) {
1311 thread__exited(thread); 1423 thread__exited(thread);
1424 thread__put(thread);
1425 }
1312 1426
1313 return 0; 1427 return 0;
1314} 1428}
@@ -1331,6 +1445,13 @@ int machine__process_event(struct machine *machine, union perf_event *event,
1331 ret = machine__process_exit_event(machine, event, sample); break; 1445 ret = machine__process_exit_event(machine, event, sample); break;
1332 case PERF_RECORD_LOST: 1446 case PERF_RECORD_LOST:
1333 ret = machine__process_lost_event(machine, event, sample); break; 1447 ret = machine__process_lost_event(machine, event, sample); break;
1448 case PERF_RECORD_AUX:
1449 ret = machine__process_aux_event(machine, event); break;
1450 case PERF_RECORD_ITRACE_START:
1451 ret = machine__process_itrace_start_event(machine, event);
1452 case PERF_RECORD_LOST_SAMPLES:
1453 ret = machine__process_lost_samples_event(machine, event, sample); break;
1454 break;
1334 default: 1455 default:
1335 ret = -1; 1456 ret = -1;
1336 break; 1457 break;
@@ -1769,14 +1890,36 @@ int machine__for_each_thread(struct machine *machine,
1769 return rc; 1890 return rc;
1770} 1891}
1771 1892
1893int machines__for_each_thread(struct machines *machines,
1894 int (*fn)(struct thread *thread, void *p),
1895 void *priv)
1896{
1897 struct rb_node *nd;
1898 int rc = 0;
1899
1900 rc = machine__for_each_thread(&machines->host, fn, priv);
1901 if (rc != 0)
1902 return rc;
1903
1904 for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
1905 struct machine *machine = rb_entry(nd, struct machine, rb_node);
1906
1907 rc = machine__for_each_thread(machine, fn, priv);
1908 if (rc != 0)
1909 return rc;
1910 }
1911 return rc;
1912}
1913
1772int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, 1914int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
1773 struct target *target, struct thread_map *threads, 1915 struct target *target, struct thread_map *threads,
1774 perf_event__handler_t process, bool data_mmap) 1916 perf_event__handler_t process, bool data_mmap,
1917 unsigned int proc_map_timeout)
1775{ 1918{
1776 if (target__has_task(target)) 1919 if (target__has_task(target))
1777 return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap); 1920 return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
1778 else if (target__has_cpu(target)) 1921 else if (target__has_cpu(target))
1779 return perf_event__synthesize_threads(tool, process, machine, data_mmap); 1922 return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout);
1780 /* command specified */ 1923 /* command specified */
1781 return 0; 1924 return 0;
1782} 1925}
@@ -1820,6 +1963,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
1820 return -ENOMEM; 1963 return -ENOMEM;
1821 1964
1822 thread->cpu = cpu; 1965 thread->cpu = cpu;
1966 thread__put(thread);
1823 1967
1824 return 0; 1968 return 0;
1825} 1969}
@@ -1845,3 +1989,8 @@ int machine__get_kernel_start(struct machine *machine)
1845 } 1989 }
1846 return err; 1990 return err;
1847} 1991}
1992
1993struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
1994{
1995 return dsos__findnew(&machine->dsos, filename);
1996}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 6d64cedb9d1e..887798e511e9 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -30,11 +30,11 @@ struct machine {
30 bool comm_exec; 30 bool comm_exec;
31 char *root_dir; 31 char *root_dir;
32 struct rb_root threads; 32 struct rb_root threads;
33 pthread_rwlock_t threads_lock;
33 struct list_head dead_threads; 34 struct list_head dead_threads;
34 struct thread *last_match; 35 struct thread *last_match;
35 struct vdso_info *vdso_info; 36 struct vdso_info *vdso_info;
36 struct dsos user_dsos; 37 struct dsos dsos;
37 struct dsos kernel_dsos;
38 struct map_groups kmaps; 38 struct map_groups kmaps;
39 struct map *vmlinux_maps[MAP__NR_TYPES]; 39 struct map *vmlinux_maps[MAP__NR_TYPES];
40 u64 kernel_start; 40 u64 kernel_start;
@@ -81,6 +81,12 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
81 struct perf_sample *sample); 81 struct perf_sample *sample);
82int machine__process_lost_event(struct machine *machine, union perf_event *event, 82int machine__process_lost_event(struct machine *machine, union perf_event *event,
83 struct perf_sample *sample); 83 struct perf_sample *sample);
84int machine__process_lost_samples_event(struct machine *machine, union perf_event *event,
85 struct perf_sample *sample);
86int machine__process_aux_event(struct machine *machine,
87 union perf_event *event);
88int machine__process_itrace_start_event(struct machine *machine,
89 union perf_event *event);
84int machine__process_mmap_event(struct machine *machine, union perf_event *event, 90int machine__process_mmap_event(struct machine *machine, union perf_event *event,
85 struct perf_sample *sample); 91 struct perf_sample *sample);
86int machine__process_mmap2_event(struct machine *machine, union perf_event *event, 92int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
@@ -147,8 +153,10 @@ static inline bool machine__is_host(struct machine *machine)
147 return machine ? machine->pid == HOST_KERNEL_ID : false; 153 return machine ? machine->pid == HOST_KERNEL_ID : false;
148} 154}
149 155
150struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, 156struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
151 pid_t tid); 157struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
158
159struct dso *machine__findnew_dso(struct machine *machine, const char *filename);
152 160
153size_t machine__fprintf(struct machine *machine, FILE *fp); 161size_t machine__fprintf(struct machine *machine, FILE *fp);
154 162
@@ -181,8 +189,8 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
181 filter); 189 filter);
182} 190}
183 191
184struct map *machine__new_module(struct machine *machine, u64 start, 192struct map *machine__findnew_module_map(struct machine *machine, u64 start,
185 const char *filename); 193 const char *filename);
186 194
187int machine__load_kallsyms(struct machine *machine, const char *filename, 195int machine__load_kallsyms(struct machine *machine, const char *filename,
188 enum map_type type, symbol_filter_t filter); 196 enum map_type type, symbol_filter_t filter);
@@ -208,16 +216,22 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
208int machine__for_each_thread(struct machine *machine, 216int machine__for_each_thread(struct machine *machine,
209 int (*fn)(struct thread *thread, void *p), 217 int (*fn)(struct thread *thread, void *p),
210 void *priv); 218 void *priv);
219int machines__for_each_thread(struct machines *machines,
220 int (*fn)(struct thread *thread, void *p),
221 void *priv);
211 222
212int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, 223int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
213 struct target *target, struct thread_map *threads, 224 struct target *target, struct thread_map *threads,
214 perf_event__handler_t process, bool data_mmap); 225 perf_event__handler_t process, bool data_mmap,
226 unsigned int proc_map_timeout);
215static inline 227static inline
216int machine__synthesize_threads(struct machine *machine, struct target *target, 228int machine__synthesize_threads(struct machine *machine, struct target *target,
217 struct thread_map *threads, bool data_mmap) 229 struct thread_map *threads, bool data_mmap,
230 unsigned int proc_map_timeout)
218{ 231{
219 return __machine__synthesize_threads(machine, NULL, target, threads, 232 return __machine__synthesize_threads(machine, NULL, target, threads,
220 perf_event__process, data_mmap); 233 perf_event__process, data_mmap,
234 proc_map_timeout);
221} 235}
222 236
223pid_t machine__get_current_tid(struct machine *machine, int cpu); 237pid_t machine__get_current_tid(struct machine *machine, int cpu);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index a14f08f41686..b5a5e9c02437 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -16,6 +16,8 @@
16#include "machine.h" 16#include "machine.h"
17#include <linux/string.h> 17#include <linux/string.h>
18 18
19static void __maps__insert(struct maps *maps, struct map *map);
20
19const char *map_type__name[MAP__NR_TYPES] = { 21const char *map_type__name[MAP__NR_TYPES] = {
20 [MAP__FUNCTION] = "Functions", 22 [MAP__FUNCTION] = "Functions",
21 [MAP__VARIABLE] = "Variables", 23 [MAP__VARIABLE] = "Variables",
@@ -130,13 +132,13 @@ void map__init(struct map *map, enum map_type type,
130 map->end = end; 132 map->end = end;
131 map->pgoff = pgoff; 133 map->pgoff = pgoff;
132 map->reloc = 0; 134 map->reloc = 0;
133 map->dso = dso; 135 map->dso = dso__get(dso);
134 map->map_ip = map__map_ip; 136 map->map_ip = map__map_ip;
135 map->unmap_ip = map__unmap_ip; 137 map->unmap_ip = map__unmap_ip;
136 RB_CLEAR_NODE(&map->rb_node); 138 RB_CLEAR_NODE(&map->rb_node);
137 map->groups = NULL; 139 map->groups = NULL;
138 map->referenced = false;
139 map->erange_warned = false; 140 map->erange_warned = false;
141 atomic_set(&map->refcnt, 1);
140} 142}
141 143
142struct map *map__new(struct machine *machine, u64 start, u64 len, 144struct map *map__new(struct machine *machine, u64 start, u64 len,
@@ -175,9 +177,9 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
175 177
176 if (vdso) { 178 if (vdso) {
177 pgoff = 0; 179 pgoff = 0;
178 dso = vdso__dso_findnew(machine, thread); 180 dso = machine__findnew_vdso(machine, thread);
179 } else 181 } else
180 dso = __dsos__findnew(&machine->user_dsos, filename); 182 dso = machine__findnew_dso(machine, filename);
181 183
182 if (dso == NULL) 184 if (dso == NULL)
183 goto out_delete; 185 goto out_delete;
@@ -195,6 +197,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
195 if (type != MAP__FUNCTION) 197 if (type != MAP__FUNCTION)
196 dso__set_loaded(dso, map->type); 198 dso__set_loaded(dso, map->type);
197 } 199 }
200 dso__put(dso);
198 } 201 }
199 return map; 202 return map;
200out_delete: 203out_delete:
@@ -221,11 +224,24 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
221 return map; 224 return map;
222} 225}
223 226
227static void map__exit(struct map *map)
228{
229 BUG_ON(!RB_EMPTY_NODE(&map->rb_node));
230 dso__zput(map->dso);
231}
232
224void map__delete(struct map *map) 233void map__delete(struct map *map)
225{ 234{
235 map__exit(map);
226 free(map); 236 free(map);
227} 237}
228 238
239void map__put(struct map *map)
240{
241 if (map && atomic_dec_and_test(&map->refcnt))
242 map__delete(map);
243}
244
229void map__fixup_start(struct map *map) 245void map__fixup_start(struct map *map)
230{ 246{
231 struct rb_root *symbols = &map->dso->symbols[map->type]; 247 struct rb_root *symbols = &map->dso->symbols[map->type];
@@ -292,6 +308,11 @@ int map__load(struct map *map, symbol_filter_t filter)
292 return 0; 308 return 0;
293} 309}
294 310
311int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
312{
313 return strcmp(namea, nameb);
314}
315
295struct symbol *map__find_symbol(struct map *map, u64 addr, 316struct symbol *map__find_symbol(struct map *map, u64 addr,
296 symbol_filter_t filter) 317 symbol_filter_t filter)
297{ 318{
@@ -413,48 +434,49 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
413 return ip + map->reloc; 434 return ip + map->reloc;
414} 435}
415 436
437static void maps__init(struct maps *maps)
438{
439 maps->entries = RB_ROOT;
440 pthread_rwlock_init(&maps->lock, NULL);
441}
442
416void map_groups__init(struct map_groups *mg, struct machine *machine) 443void map_groups__init(struct map_groups *mg, struct machine *machine)
417{ 444{
418 int i; 445 int i;
419 for (i = 0; i < MAP__NR_TYPES; ++i) { 446 for (i = 0; i < MAP__NR_TYPES; ++i) {
420 mg->maps[i] = RB_ROOT; 447 maps__init(&mg->maps[i]);
421 INIT_LIST_HEAD(&mg->removed_maps[i]);
422 } 448 }
423 mg->machine = machine; 449 mg->machine = machine;
424 mg->refcnt = 1; 450 atomic_set(&mg->refcnt, 1);
425} 451}
426 452
427static void maps__delete(struct rb_root *maps) 453static void __maps__purge(struct maps *maps)
428{ 454{
429 struct rb_node *next = rb_first(maps); 455 struct rb_root *root = &maps->entries;
456 struct rb_node *next = rb_first(root);
430 457
431 while (next) { 458 while (next) {
432 struct map *pos = rb_entry(next, struct map, rb_node); 459 struct map *pos = rb_entry(next, struct map, rb_node);
433 460
434 next = rb_next(&pos->rb_node); 461 next = rb_next(&pos->rb_node);
435 rb_erase(&pos->rb_node, maps); 462 rb_erase_init(&pos->rb_node, root);
436 map__delete(pos); 463 map__put(pos);
437 } 464 }
438} 465}
439 466
440static void maps__delete_removed(struct list_head *maps) 467static void maps__exit(struct maps *maps)
441{ 468{
442 struct map *pos, *n; 469 pthread_rwlock_wrlock(&maps->lock);
443 470 __maps__purge(maps);
444 list_for_each_entry_safe(pos, n, maps, node) { 471 pthread_rwlock_unlock(&maps->lock);
445 list_del(&pos->node);
446 map__delete(pos);
447 }
448} 472}
449 473
450void map_groups__exit(struct map_groups *mg) 474void map_groups__exit(struct map_groups *mg)
451{ 475{
452 int i; 476 int i;
453 477
454 for (i = 0; i < MAP__NR_TYPES; ++i) { 478 for (i = 0; i < MAP__NR_TYPES; ++i)
455 maps__delete(&mg->maps[i]); 479 maps__exit(&mg->maps[i]);
456 maps__delete_removed(&mg->removed_maps[i]);
457 }
458} 480}
459 481
460bool map_groups__empty(struct map_groups *mg) 482bool map_groups__empty(struct map_groups *mg)
@@ -464,8 +486,6 @@ bool map_groups__empty(struct map_groups *mg)
464 for (i = 0; i < MAP__NR_TYPES; ++i) { 486 for (i = 0; i < MAP__NR_TYPES; ++i) {
465 if (maps__first(&mg->maps[i])) 487 if (maps__first(&mg->maps[i]))
466 return false; 488 return false;
467 if (!list_empty(&mg->removed_maps[i]))
468 return false;
469 } 489 }
470 490
471 return true; 491 return true;
@@ -489,32 +509,10 @@ void map_groups__delete(struct map_groups *mg)
489 509
490void map_groups__put(struct map_groups *mg) 510void map_groups__put(struct map_groups *mg)
491{ 511{
492 if (--mg->refcnt == 0) 512 if (mg && atomic_dec_and_test(&mg->refcnt))
493 map_groups__delete(mg); 513 map_groups__delete(mg);
494} 514}
495 515
496void map_groups__flush(struct map_groups *mg)
497{
498 int type;
499
500 for (type = 0; type < MAP__NR_TYPES; type++) {
501 struct rb_root *root = &mg->maps[type];
502 struct rb_node *next = rb_first(root);
503
504 while (next) {
505 struct map *pos = rb_entry(next, struct map, rb_node);
506 next = rb_next(&pos->rb_node);
507 rb_erase(&pos->rb_node, root);
508 /*
509 * We may have references to this map, for
510 * instance in some hist_entry instances, so
511 * just move them to a separate list.
512 */
513 list_add_tail(&pos->node, &mg->removed_maps[pos->type]);
514 }
515 }
516}
517
518struct symbol *map_groups__find_symbol(struct map_groups *mg, 516struct symbol *map_groups__find_symbol(struct map_groups *mg,
519 enum map_type type, u64 addr, 517 enum map_type type, u64 addr,
520 struct map **mapp, 518 struct map **mapp,
@@ -538,20 +536,28 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
538 struct map **mapp, 536 struct map **mapp,
539 symbol_filter_t filter) 537 symbol_filter_t filter)
540{ 538{
539 struct maps *maps = &mg->maps[type];
540 struct symbol *sym;
541 struct rb_node *nd; 541 struct rb_node *nd;
542 542
543 for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) { 543 pthread_rwlock_rdlock(&maps->lock);
544
545 for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
544 struct map *pos = rb_entry(nd, struct map, rb_node); 546 struct map *pos = rb_entry(nd, struct map, rb_node);
545 struct symbol *sym = map__find_symbol_by_name(pos, name, filter); 547
548 sym = map__find_symbol_by_name(pos, name, filter);
546 549
547 if (sym == NULL) 550 if (sym == NULL)
548 continue; 551 continue;
549 if (mapp != NULL) 552 if (mapp != NULL)
550 *mapp = pos; 553 *mapp = pos;
551 return sym; 554 goto out;
552 } 555 }
553 556
554 return NULL; 557 sym = NULL;
558out:
559 pthread_rwlock_unlock(&maps->lock);
560 return sym;
555} 561}
556 562
557int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter) 563int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter)
@@ -571,73 +577,54 @@ int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter)
571 return ams->sym ? 0 : -1; 577 return ams->sym ? 0 : -1;
572} 578}
573 579
574size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type, 580static size_t maps__fprintf(struct maps *maps, FILE *fp)
575 FILE *fp)
576{ 581{
577 size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); 582 size_t printed = 0;
578 struct rb_node *nd; 583 struct rb_node *nd;
579 584
580 for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) { 585 pthread_rwlock_rdlock(&maps->lock);
586
587 for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
581 struct map *pos = rb_entry(nd, struct map, rb_node); 588 struct map *pos = rb_entry(nd, struct map, rb_node);
582 printed += fprintf(fp, "Map:"); 589 printed += fprintf(fp, "Map:");
583 printed += map__fprintf(pos, fp); 590 printed += map__fprintf(pos, fp);
584 if (verbose > 2) { 591 if (verbose > 2) {
585 printed += dso__fprintf(pos->dso, type, fp); 592 printed += dso__fprintf(pos->dso, pos->type, fp);
586 printed += fprintf(fp, "--\n"); 593 printed += fprintf(fp, "--\n");
587 } 594 }
588 } 595 }
589 596
590 return printed; 597 pthread_rwlock_unlock(&maps->lock);
591}
592 598
593static size_t map_groups__fprintf_maps(struct map_groups *mg, FILE *fp)
594{
595 size_t printed = 0, i;
596 for (i = 0; i < MAP__NR_TYPES; ++i)
597 printed += __map_groups__fprintf_maps(mg, i, fp);
598 return printed; 599 return printed;
599} 600}
600 601
601static size_t __map_groups__fprintf_removed_maps(struct map_groups *mg, 602size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
602 enum map_type type, FILE *fp) 603 FILE *fp)
603{ 604{
604 struct map *pos; 605 size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
605 size_t printed = 0; 606 return printed += maps__fprintf(&mg->maps[type], fp);
606
607 list_for_each_entry(pos, &mg->removed_maps[type], node) {
608 printed += fprintf(fp, "Map:");
609 printed += map__fprintf(pos, fp);
610 if (verbose > 1) {
611 printed += dso__fprintf(pos->dso, type, fp);
612 printed += fprintf(fp, "--\n");
613 }
614 }
615 return printed;
616} 607}
617 608
618static size_t map_groups__fprintf_removed_maps(struct map_groups *mg, 609size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
619 FILE *fp)
620{ 610{
621 size_t printed = 0, i; 611 size_t printed = 0, i;
622 for (i = 0; i < MAP__NR_TYPES; ++i) 612 for (i = 0; i < MAP__NR_TYPES; ++i)
623 printed += __map_groups__fprintf_removed_maps(mg, i, fp); 613 printed += __map_groups__fprintf_maps(mg, i, fp);
624 return printed; 614 return printed;
625} 615}
626 616
627size_t map_groups__fprintf(struct map_groups *mg, FILE *fp) 617static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
628{ 618{
629 size_t printed = map_groups__fprintf_maps(mg, fp); 619 struct rb_root *root;
630 printed += fprintf(fp, "Removed maps:\n"); 620 struct rb_node *next;
631 return printed + map_groups__fprintf_removed_maps(mg, fp);
632}
633
634int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
635 FILE *fp)
636{
637 struct rb_root *root = &mg->maps[map->type];
638 struct rb_node *next = rb_first(root);
639 int err = 0; 621 int err = 0;
640 622
623 pthread_rwlock_wrlock(&maps->lock);
624
625 root = &maps->entries;
626 next = rb_first(root);
627
641 while (next) { 628 while (next) {
642 struct map *pos = rb_entry(next, struct map, rb_node); 629 struct map *pos = rb_entry(next, struct map, rb_node);
643 next = rb_next(&pos->rb_node); 630 next = rb_next(&pos->rb_node);
@@ -651,7 +638,7 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
651 map__fprintf(pos, fp); 638 map__fprintf(pos, fp);
652 } 639 }
653 640
654 rb_erase(&pos->rb_node, root); 641 rb_erase_init(&pos->rb_node, root);
655 /* 642 /*
656 * Now check if we need to create new maps for areas not 643 * Now check if we need to create new maps for areas not
657 * overlapped by the new map: 644 * overlapped by the new map:
@@ -661,11 +648,11 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
661 648
662 if (before == NULL) { 649 if (before == NULL) {
663 err = -ENOMEM; 650 err = -ENOMEM;
664 goto move_map; 651 goto put_map;
665 } 652 }
666 653
667 before->end = map->start; 654 before->end = map->start;
668 map_groups__insert(mg, before); 655 __maps__insert(maps, before);
669 if (verbose >= 2) 656 if (verbose >= 2)
670 map__fprintf(before, fp); 657 map__fprintf(before, fp);
671 } 658 }
@@ -675,28 +662,31 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
675 662
676 if (after == NULL) { 663 if (after == NULL) {
677 err = -ENOMEM; 664 err = -ENOMEM;
678 goto move_map; 665 goto put_map;
679 } 666 }
680 667
681 after->start = map->end; 668 after->start = map->end;
682 map_groups__insert(mg, after); 669 __maps__insert(maps, after);
683 if (verbose >= 2) 670 if (verbose >= 2)
684 map__fprintf(after, fp); 671 map__fprintf(after, fp);
685 } 672 }
686move_map: 673put_map:
687 /* 674 map__put(pos);
688 * If we have references, just move them to a separate list.
689 */
690 if (pos->referenced)
691 list_add_tail(&pos->node, &mg->removed_maps[map->type]);
692 else
693 map__delete(pos);
694 675
695 if (err) 676 if (err)
696 return err; 677 goto out;
697 } 678 }
698 679
699 return 0; 680 err = 0;
681out:
682 pthread_rwlock_unlock(&maps->lock);
683 return err;
684}
685
686int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
687 FILE *fp)
688{
689 return maps__fixup_overlappings(&mg->maps[map->type], map, fp);
700} 690}
701 691
702/* 692/*
@@ -705,20 +695,28 @@ move_map:
705int map_groups__clone(struct map_groups *mg, 695int map_groups__clone(struct map_groups *mg,
706 struct map_groups *parent, enum map_type type) 696 struct map_groups *parent, enum map_type type)
707{ 697{
708 struct rb_node *nd; 698 int err = -ENOMEM;
709 for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) { 699 struct map *map;
710 struct map *map = rb_entry(nd, struct map, rb_node); 700 struct maps *maps = &parent->maps[type];
701
702 pthread_rwlock_rdlock(&maps->lock);
703
704 for (map = maps__first(maps); map; map = map__next(map)) {
711 struct map *new = map__clone(map); 705 struct map *new = map__clone(map);
712 if (new == NULL) 706 if (new == NULL)
713 return -ENOMEM; 707 goto out_unlock;
714 map_groups__insert(mg, new); 708 map_groups__insert(mg, new);
715 } 709 }
716 return 0; 710
711 err = 0;
712out_unlock:
713 pthread_rwlock_unlock(&maps->lock);
714 return err;
717} 715}
718 716
719void maps__insert(struct rb_root *maps, struct map *map) 717static void __maps__insert(struct maps *maps, struct map *map)
720{ 718{
721 struct rb_node **p = &maps->rb_node; 719 struct rb_node **p = &maps->entries.rb_node;
722 struct rb_node *parent = NULL; 720 struct rb_node *parent = NULL;
723 const u64 ip = map->start; 721 const u64 ip = map->start;
724 struct map *m; 722 struct map *m;
@@ -733,20 +731,38 @@ void maps__insert(struct rb_root *maps, struct map *map)
733 } 731 }
734 732
735 rb_link_node(&map->rb_node, parent, p); 733 rb_link_node(&map->rb_node, parent, p);
736 rb_insert_color(&map->rb_node, maps); 734 rb_insert_color(&map->rb_node, &maps->entries);
735 map__get(map);
737} 736}
738 737
739void maps__remove(struct rb_root *maps, struct map *map) 738void maps__insert(struct maps *maps, struct map *map)
740{ 739{
741 rb_erase(&map->rb_node, maps); 740 pthread_rwlock_wrlock(&maps->lock);
741 __maps__insert(maps, map);
742 pthread_rwlock_unlock(&maps->lock);
742} 743}
743 744
744struct map *maps__find(struct rb_root *maps, u64 ip) 745static void __maps__remove(struct maps *maps, struct map *map)
745{ 746{
746 struct rb_node **p = &maps->rb_node; 747 rb_erase_init(&map->rb_node, &maps->entries);
747 struct rb_node *parent = NULL; 748 map__put(map);
749}
750
751void maps__remove(struct maps *maps, struct map *map)
752{
753 pthread_rwlock_wrlock(&maps->lock);
754 __maps__remove(maps, map);
755 pthread_rwlock_unlock(&maps->lock);
756}
757
758struct map *maps__find(struct maps *maps, u64 ip)
759{
760 struct rb_node **p, *parent = NULL;
748 struct map *m; 761 struct map *m;
749 762
763 pthread_rwlock_rdlock(&maps->lock);
764
765 p = &maps->entries.rb_node;
750 while (*p != NULL) { 766 while (*p != NULL) {
751 parent = *p; 767 parent = *p;
752 m = rb_entry(parent, struct map, rb_node); 768 m = rb_entry(parent, struct map, rb_node);
@@ -755,22 +771,25 @@ struct map *maps__find(struct rb_root *maps, u64 ip)
755 else if (ip >= m->end) 771 else if (ip >= m->end)
756 p = &(*p)->rb_right; 772 p = &(*p)->rb_right;
757 else 773 else
758 return m; 774 goto out;
759 } 775 }
760 776
761 return NULL; 777 m = NULL;
778out:
779 pthread_rwlock_unlock(&maps->lock);
780 return m;
762} 781}
763 782
764struct map *maps__first(struct rb_root *maps) 783struct map *maps__first(struct maps *maps)
765{ 784{
766 struct rb_node *first = rb_first(maps); 785 struct rb_node *first = rb_first(&maps->entries);
767 786
768 if (first) 787 if (first)
769 return rb_entry(first, struct map, rb_node); 788 return rb_entry(first, struct map, rb_node);
770 return NULL; 789 return NULL;
771} 790}
772 791
773struct map *maps__next(struct map *map) 792struct map *map__next(struct map *map)
774{ 793{
775 struct rb_node *next = rb_next(&map->rb_node); 794 struct rb_node *next = rb_next(&map->rb_node);
776 795
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index ec19c59ca38e..d73e687b224e 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -1,9 +1,11 @@
1#ifndef __PERF_MAP_H 1#ifndef __PERF_MAP_H
2#define __PERF_MAP_H 2#define __PERF_MAP_H
3 3
4#include <linux/atomic.h>
4#include <linux/compiler.h> 5#include <linux/compiler.h>
5#include <linux/list.h> 6#include <linux/list.h>
6#include <linux/rbtree.h> 7#include <linux/rbtree.h>
8#include <pthread.h>
7#include <stdio.h> 9#include <stdio.h>
8#include <stdbool.h> 10#include <stdbool.h>
9#include <linux/types.h> 11#include <linux/types.h>
@@ -32,7 +34,6 @@ struct map {
32 u64 start; 34 u64 start;
33 u64 end; 35 u64 end;
34 u8 /* enum map_type */ type; 36 u8 /* enum map_type */ type;
35 bool referenced;
36 bool erange_warned; 37 bool erange_warned;
37 u32 priv; 38 u32 priv;
38 u32 prot; 39 u32 prot;
@@ -50,6 +51,7 @@ struct map {
50 51
51 struct dso *dso; 52 struct dso *dso;
52 struct map_groups *groups; 53 struct map_groups *groups;
54 atomic_t refcnt;
53}; 55};
54 56
55struct kmap { 57struct kmap {
@@ -57,11 +59,15 @@ struct kmap {
57 struct map_groups *kmaps; 59 struct map_groups *kmaps;
58}; 60};
59 61
62struct maps {
63 struct rb_root entries;
64 pthread_rwlock_t lock;
65};
66
60struct map_groups { 67struct map_groups {
61 struct rb_root maps[MAP__NR_TYPES]; 68 struct maps maps[MAP__NR_TYPES];
62 struct list_head removed_maps[MAP__NR_TYPES];
63 struct machine *machine; 69 struct machine *machine;
64 int refcnt; 70 atomic_t refcnt;
65}; 71};
66 72
67struct map_groups *map_groups__new(struct machine *machine); 73struct map_groups *map_groups__new(struct machine *machine);
@@ -70,7 +76,8 @@ bool map_groups__empty(struct map_groups *mg);
70 76
71static inline struct map_groups *map_groups__get(struct map_groups *mg) 77static inline struct map_groups *map_groups__get(struct map_groups *mg)
72{ 78{
73 ++mg->refcnt; 79 if (mg)
80 atomic_inc(&mg->refcnt);
74 return mg; 81 return mg;
75} 82}
76 83
@@ -124,7 +131,7 @@ struct thread;
124 */ 131 */
125#define __map__for_each_symbol_by_name(map, sym_name, pos, filter) \ 132#define __map__for_each_symbol_by_name(map, sym_name, pos, filter) \
126 for (pos = map__find_symbol_by_name(map, sym_name, filter); \ 133 for (pos = map__find_symbol_by_name(map, sym_name, filter); \
127 pos && strcmp(pos->name, sym_name) == 0; \ 134 pos && arch__compare_symbol_names(pos->name, sym_name) == 0; \
128 pos = symbol__next_by_name(pos)) 135 pos = symbol__next_by_name(pos))
129 136
130#define map__for_each_symbol_by_name(map, sym_name, pos) \ 137#define map__for_each_symbol_by_name(map, sym_name, pos) \
@@ -132,6 +139,7 @@ struct thread;
132 139
133typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); 140typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
134 141
142int arch__compare_symbol_names(const char *namea, const char *nameb);
135void map__init(struct map *map, enum map_type type, 143void map__init(struct map *map, enum map_type type,
136 u64 start, u64 end, u64 pgoff, struct dso *dso); 144 u64 start, u64 end, u64 pgoff, struct dso *dso);
137struct map *map__new(struct machine *machine, u64 start, u64 len, 145struct map *map__new(struct machine *machine, u64 start, u64 len,
@@ -141,6 +149,24 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
141struct map *map__new2(u64 start, struct dso *dso, enum map_type type); 149struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
142void map__delete(struct map *map); 150void map__delete(struct map *map);
143struct map *map__clone(struct map *map); 151struct map *map__clone(struct map *map);
152
153static inline struct map *map__get(struct map *map)
154{
155 if (map)
156 atomic_inc(&map->refcnt);
157 return map;
158}
159
160void map__put(struct map *map);
161
162static inline void __map__zput(struct map **map)
163{
164 map__put(*map);
165 *map = NULL;
166}
167
168#define map__zput(map) __map__zput(&map)
169
144int map__overlap(struct map *l, struct map *r); 170int map__overlap(struct map *l, struct map *r);
145size_t map__fprintf(struct map *map, FILE *fp); 171size_t map__fprintf(struct map *map, FILE *fp);
146size_t map__fprintf_dsoname(struct map *map, FILE *fp); 172size_t map__fprintf_dsoname(struct map *map, FILE *fp);
@@ -159,11 +185,11 @@ void map__reloc_vmlinux(struct map *map);
159 185
160size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type, 186size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
161 FILE *fp); 187 FILE *fp);
162void maps__insert(struct rb_root *maps, struct map *map); 188void maps__insert(struct maps *maps, struct map *map);
163void maps__remove(struct rb_root *maps, struct map *map); 189void maps__remove(struct maps *maps, struct map *map);
164struct map *maps__find(struct rb_root *maps, u64 addr); 190struct map *maps__find(struct maps *maps, u64 addr);
165struct map *maps__first(struct rb_root *maps); 191struct map *maps__first(struct maps *maps);
166struct map *maps__next(struct map *map); 192struct map *map__next(struct map *map);
167void map_groups__init(struct map_groups *mg, struct machine *machine); 193void map_groups__init(struct map_groups *mg, struct machine *machine);
168void map_groups__exit(struct map_groups *mg); 194void map_groups__exit(struct map_groups *mg);
169int map_groups__clone(struct map_groups *mg, 195int map_groups__clone(struct map_groups *mg,
@@ -198,7 +224,7 @@ static inline struct map *map_groups__first(struct map_groups *mg,
198 224
199static inline struct map *map_groups__next(struct map *map) 225static inline struct map *map_groups__next(struct map *map)
200{ 226{
201 return maps__next(map); 227 return map__next(map);
202} 228}
203 229
204struct symbol *map_groups__find_symbol(struct map_groups *mg, 230struct symbol *map_groups__find_symbol(struct map_groups *mg,
@@ -230,6 +256,4 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
230struct map *map_groups__find_by_name(struct map_groups *mg, 256struct map *map_groups__find_by_name(struct map_groups *mg,
231 enum map_type type, const char *name); 257 enum map_type type, const char *name);
232 258
233void map_groups__flush(struct map_groups *mg);
234
235#endif /* __PERF_MAP_H */ 259#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c
index 31ee02d4e988..53ef006a951c 100644
--- a/tools/perf/util/pager.c
+++ b/tools/perf/util/pager.c
@@ -50,11 +50,6 @@ void setup_pager(void)
50 50
51 if (!isatty(1)) 51 if (!isatty(1))
52 return; 52 return;
53 if (!pager) {
54 if (!pager_program)
55 perf_config(perf_default_config, NULL);
56 pager = pager_program;
57 }
58 if (!pager) 53 if (!pager)
59 pager = getenv("PAGER"); 54 pager = getenv("PAGER");
60 if (!(pager || access("/usr/bin/pager", X_OK))) 55 if (!(pager || access("/usr/bin/pager", X_OK)))
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
new file mode 100644
index 000000000000..a3b1e13a05c0
--- /dev/null
+++ b/tools/perf/util/parse-branch-options.c
@@ -0,0 +1,94 @@
1#include "perf.h"
2#include "util/util.h"
3#include "util/debug.h"
4#include "util/parse-options.h"
5#include "util/parse-branch-options.h"
6
7#define BRANCH_OPT(n, m) \
8 { .name = n, .mode = (m) }
9
10#define BRANCH_END { .name = NULL }
11
12struct branch_mode {
13 const char *name;
14 int mode;
15};
16
17static const struct branch_mode branch_modes[] = {
18 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
19 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
20 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
21 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
22 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
23 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
24 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
25 BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
26 BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
27 BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
28 BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
29 BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP),
30 BRANCH_END
31};
32
33int
34parse_branch_stack(const struct option *opt, const char *str, int unset)
35{
36#define ONLY_PLM \
37 (PERF_SAMPLE_BRANCH_USER |\
38 PERF_SAMPLE_BRANCH_KERNEL |\
39 PERF_SAMPLE_BRANCH_HV)
40
41 uint64_t *mode = (uint64_t *)opt->value;
42 const struct branch_mode *br;
43 char *s, *os = NULL, *p;
44 int ret = -1;
45
46 if (unset)
47 return 0;
48
49 /*
50 * cannot set it twice, -b + --branch-filter for instance
51 */
52 if (*mode)
53 return -1;
54
55 /* str may be NULL in case no arg is passed to -b */
56 if (str) {
57 /* because str is read-only */
58 s = os = strdup(str);
59 if (!s)
60 return -1;
61
62 for (;;) {
63 p = strchr(s, ',');
64 if (p)
65 *p = '\0';
66
67 for (br = branch_modes; br->name; br++) {
68 if (!strcasecmp(s, br->name))
69 break;
70 }
71 if (!br->name) {
72 ui__warning("unknown branch filter %s,"
73 " check man page\n", s);
74 goto error;
75 }
76
77 *mode |= br->mode;
78
79 if (!p)
80 break;
81
82 s = p + 1;
83 }
84 }
85 ret = 0;
86
87 /* default to any branch */
88 if ((*mode & ~ONLY_PLM) == 0) {
89 *mode = PERF_SAMPLE_BRANCH_ANY;
90 }
91error:
92 free(os);
93 return ret;
94}
diff --git a/tools/perf/util/parse-branch-options.h b/tools/perf/util/parse-branch-options.h
new file mode 100644
index 000000000000..b9d9470c2e82
--- /dev/null
+++ b/tools/perf/util/parse-branch-options.h
@@ -0,0 +1,5 @@
1#ifndef _PERF_PARSE_BRANCH_OPTIONS_H
2#define _PERF_PARSE_BRANCH_OPTIONS_H 1
3struct option;
4int parse_branch_stack(const struct option *opt, const char *str, int unset);
5#endif /* _PERF_PARSE_BRANCH_OPTIONS_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index be0655388b38..2a4d1ec02846 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -17,6 +17,7 @@
17#include "parse-events-flex.h" 17#include "parse-events-flex.h"
18#include "pmu.h" 18#include "pmu.h"
19#include "thread_map.h" 19#include "thread_map.h"
20#include "asm/bug.h"
20 21
21#define MAX_NAME_LEN 100 22#define MAX_NAME_LEN 100
22 23
@@ -538,16 +539,40 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
538 return add_event(list, idx, &attr, NULL); 539 return add_event(list, idx, &attr, NULL);
539} 540}
540 541
542static int check_type_val(struct parse_events_term *term,
543 struct parse_events_error *err,
544 int type)
545{
546 if (type == term->type_val)
547 return 0;
548
549 if (err) {
550 err->idx = term->err_val;
551 if (type == PARSE_EVENTS__TERM_TYPE_NUM)
552 err->str = strdup("expected numeric value");
553 else
554 err->str = strdup("expected string value");
555 }
556 return -EINVAL;
557}
558
541static int config_term(struct perf_event_attr *attr, 559static int config_term(struct perf_event_attr *attr,
542 struct parse_events_term *term) 560 struct parse_events_term *term,
561 struct parse_events_error *err)
543{ 562{
544#define CHECK_TYPE_VAL(type) \ 563#define CHECK_TYPE_VAL(type) \
545do { \ 564do { \
546 if (PARSE_EVENTS__TERM_TYPE_ ## type != term->type_val) \ 565 if (check_type_val(term, err, PARSE_EVENTS__TERM_TYPE_ ## type)) \
547 return -EINVAL; \ 566 return -EINVAL; \
548} while (0) 567} while (0)
549 568
550 switch (term->type_term) { 569 switch (term->type_term) {
570 case PARSE_EVENTS__TERM_TYPE_USER:
571 /*
572 * Always succeed for sysfs terms, as we dont know
573 * at this point what type they need to have.
574 */
575 return 0;
551 case PARSE_EVENTS__TERM_TYPE_CONFIG: 576 case PARSE_EVENTS__TERM_TYPE_CONFIG:
552 CHECK_TYPE_VAL(NUM); 577 CHECK_TYPE_VAL(NUM);
553 attr->config = term->val.num; 578 attr->config = term->val.num;
@@ -582,18 +607,20 @@ do { \
582} 607}
583 608
584static int config_attr(struct perf_event_attr *attr, 609static int config_attr(struct perf_event_attr *attr,
585 struct list_head *head, int fail) 610 struct list_head *head,
611 struct parse_events_error *err)
586{ 612{
587 struct parse_events_term *term; 613 struct parse_events_term *term;
588 614
589 list_for_each_entry(term, head, list) 615 list_for_each_entry(term, head, list)
590 if (config_term(attr, term) && fail) 616 if (config_term(attr, term, err))
591 return -EINVAL; 617 return -EINVAL;
592 618
593 return 0; 619 return 0;
594} 620}
595 621
596int parse_events_add_numeric(struct list_head *list, int *idx, 622int parse_events_add_numeric(struct parse_events_evlist *data,
623 struct list_head *list,
597 u32 type, u64 config, 624 u32 type, u64 config,
598 struct list_head *head_config) 625 struct list_head *head_config)
599{ 626{
@@ -604,10 +631,10 @@ int parse_events_add_numeric(struct list_head *list, int *idx,
604 attr.config = config; 631 attr.config = config;
605 632
606 if (head_config && 633 if (head_config &&
607 config_attr(&attr, head_config, 1)) 634 config_attr(&attr, head_config, data->error))
608 return -EINVAL; 635 return -EINVAL;
609 636
610 return add_event(list, idx, &attr, NULL); 637 return add_event(list, &data->idx, &attr, NULL);
611} 638}
612 639
613static int parse_events__is_name_term(struct parse_events_term *term) 640static int parse_events__is_name_term(struct parse_events_term *term)
@@ -626,8 +653,9 @@ static char *pmu_event_name(struct list_head *head_terms)
626 return NULL; 653 return NULL;
627} 654}
628 655
629int parse_events_add_pmu(struct list_head *list, int *idx, 656int parse_events_add_pmu(struct parse_events_evlist *data,
630 char *name, struct list_head *head_config) 657 struct list_head *list, char *name,
658 struct list_head *head_config)
631{ 659{
632 struct perf_event_attr attr; 660 struct perf_event_attr attr;
633 struct perf_pmu_info info; 661 struct perf_pmu_info info;
@@ -647,7 +675,7 @@ int parse_events_add_pmu(struct list_head *list, int *idx,
647 675
648 if (!head_config) { 676 if (!head_config) {
649 attr.type = pmu->type; 677 attr.type = pmu->type;
650 evsel = __add_event(list, idx, &attr, NULL, pmu->cpus); 678 evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus);
651 return evsel ? 0 : -ENOMEM; 679 return evsel ? 0 : -ENOMEM;
652 } 680 }
653 681
@@ -658,13 +686,14 @@ int parse_events_add_pmu(struct list_head *list, int *idx,
658 * Configure hardcoded terms first, no need to check 686 * Configure hardcoded terms first, no need to check
659 * return value when called with fail == 0 ;) 687 * return value when called with fail == 0 ;)
660 */ 688 */
661 config_attr(&attr, head_config, 0); 689 if (config_attr(&attr, head_config, data->error))
690 return -EINVAL;
662 691
663 if (perf_pmu__config(pmu, &attr, head_config)) 692 if (perf_pmu__config(pmu, &attr, head_config, data->error))
664 return -EINVAL; 693 return -EINVAL;
665 694
666 evsel = __add_event(list, idx, &attr, pmu_event_name(head_config), 695 evsel = __add_event(list, &data->idx, &attr,
667 pmu->cpus); 696 pmu_event_name(head_config), pmu->cpus);
668 if (evsel) { 697 if (evsel) {
669 evsel->unit = info.unit; 698 evsel->unit = info.unit;
670 evsel->scale = info.scale; 699 evsel->scale = info.scale;
@@ -1019,11 +1048,13 @@ int parse_events_terms(struct list_head *terms, const char *str)
1019 return ret; 1048 return ret;
1020} 1049}
1021 1050
1022int parse_events(struct perf_evlist *evlist, const char *str) 1051int parse_events(struct perf_evlist *evlist, const char *str,
1052 struct parse_events_error *err)
1023{ 1053{
1024 struct parse_events_evlist data = { 1054 struct parse_events_evlist data = {
1025 .list = LIST_HEAD_INIT(data.list), 1055 .list = LIST_HEAD_INIT(data.list),
1026 .idx = evlist->nr_entries, 1056 .idx = evlist->nr_entries,
1057 .error = err,
1027 }; 1058 };
1028 int ret; 1059 int ret;
1029 1060
@@ -1044,16 +1075,87 @@ int parse_events(struct perf_evlist *evlist, const char *str)
1044 return ret; 1075 return ret;
1045} 1076}
1046 1077
1078#define MAX_WIDTH 1000
1079static int get_term_width(void)
1080{
1081 struct winsize ws;
1082
1083 get_term_dimensions(&ws);
1084 return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col;
1085}
1086
1087static void parse_events_print_error(struct parse_events_error *err,
1088 const char *event)
1089{
1090 const char *str = "invalid or unsupported event: ";
1091 char _buf[MAX_WIDTH];
1092 char *buf = (char *) event;
1093 int idx = 0;
1094
1095 if (err->str) {
1096 /* -2 for extra '' in the final fprintf */
1097 int width = get_term_width() - 2;
1098 int len_event = strlen(event);
1099 int len_str, max_len, cut = 0;
1100
1101 /*
1102 * Maximum error index indent, we will cut
1103 * the event string if it's bigger.
1104 */
1105 int max_err_idx = 10;
1106
1107 /*
1108 * Let's be specific with the message when
1109 * we have the precise error.
1110 */
1111 str = "event syntax error: ";
1112 len_str = strlen(str);
1113 max_len = width - len_str;
1114
1115 buf = _buf;
1116
1117 /* We're cutting from the beggining. */
1118 if (err->idx > max_err_idx)
1119 cut = err->idx - max_err_idx;
1120
1121 strncpy(buf, event + cut, max_len);
1122
1123 /* Mark cut parts with '..' on both sides. */
1124 if (cut)
1125 buf[0] = buf[1] = '.';
1126
1127 if ((len_event - cut) > max_len) {
1128 buf[max_len - 1] = buf[max_len - 2] = '.';
1129 buf[max_len] = 0;
1130 }
1131
1132 idx = len_str + err->idx - cut;
1133 }
1134
1135 fprintf(stderr, "%s'%s'\n", str, buf);
1136 if (idx) {
1137 fprintf(stderr, "%*s\\___ %s\n", idx + 1, "", err->str);
1138 if (err->help)
1139 fprintf(stderr, "\n%s\n", err->help);
1140 free(err->str);
1141 free(err->help);
1142 }
1143
1144 fprintf(stderr, "Run 'perf list' for a list of valid events\n");
1145}
1146
1147#undef MAX_WIDTH
1148
1047int parse_events_option(const struct option *opt, const char *str, 1149int parse_events_option(const struct option *opt, const char *str,
1048 int unset __maybe_unused) 1150 int unset __maybe_unused)
1049{ 1151{
1050 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; 1152 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
1051 int ret = parse_events(evlist, str); 1153 struct parse_events_error err = { .idx = 0, };
1154 int ret = parse_events(evlist, str, &err);
1155
1156 if (ret)
1157 parse_events_print_error(&err, str);
1052 1158
1053 if (ret) {
1054 fprintf(stderr, "invalid or unsupported event: '%s'\n", str);
1055 fprintf(stderr, "Run 'perf list' for a list of valid events\n");
1056 }
1057 return ret; 1159 return ret;
1058} 1160}
1059 1161
@@ -1460,7 +1562,7 @@ int parse_events__is_hardcoded_term(struct parse_events_term *term)
1460 1562
1461static int new_term(struct parse_events_term **_term, int type_val, 1563static int new_term(struct parse_events_term **_term, int type_val,
1462 int type_term, char *config, 1564 int type_term, char *config,
1463 char *str, u64 num) 1565 char *str, u64 num, int err_term, int err_val)
1464{ 1566{
1465 struct parse_events_term *term; 1567 struct parse_events_term *term;
1466 1568
@@ -1472,6 +1574,8 @@ static int new_term(struct parse_events_term **_term, int type_val,
1472 term->type_val = type_val; 1574 term->type_val = type_val;
1473 term->type_term = type_term; 1575 term->type_term = type_term;
1474 term->config = config; 1576 term->config = config;
1577 term->err_term = err_term;
1578 term->err_val = err_val;
1475 1579
1476 switch (type_val) { 1580 switch (type_val) {
1477 case PARSE_EVENTS__TERM_TYPE_NUM: 1581 case PARSE_EVENTS__TERM_TYPE_NUM:
@@ -1490,17 +1594,29 @@ static int new_term(struct parse_events_term **_term, int type_val,
1490} 1594}
1491 1595
1492int parse_events_term__num(struct parse_events_term **term, 1596int parse_events_term__num(struct parse_events_term **term,
1493 int type_term, char *config, u64 num) 1597 int type_term, char *config, u64 num,
1598 void *loc_term_, void *loc_val_)
1494{ 1599{
1600 YYLTYPE *loc_term = loc_term_;
1601 YYLTYPE *loc_val = loc_val_;
1602
1495 return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term, 1603 return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term,
1496 config, NULL, num); 1604 config, NULL, num,
1605 loc_term ? loc_term->first_column : 0,
1606 loc_val ? loc_val->first_column : 0);
1497} 1607}
1498 1608
1499int parse_events_term__str(struct parse_events_term **term, 1609int parse_events_term__str(struct parse_events_term **term,
1500 int type_term, char *config, char *str) 1610 int type_term, char *config, char *str,
1611 void *loc_term_, void *loc_val_)
1501{ 1612{
1613 YYLTYPE *loc_term = loc_term_;
1614 YYLTYPE *loc_val = loc_val_;
1615
1502 return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term, 1616 return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term,
1503 config, str, 0); 1617 config, str, 0,
1618 loc_term ? loc_term->first_column : 0,
1619 loc_val ? loc_val->first_column : 0);
1504} 1620}
1505 1621
1506int parse_events_term__sym_hw(struct parse_events_term **term, 1622int parse_events_term__sym_hw(struct parse_events_term **term,
@@ -1514,18 +1630,20 @@ int parse_events_term__sym_hw(struct parse_events_term **term,
1514 if (config) 1630 if (config)
1515 return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, 1631 return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
1516 PARSE_EVENTS__TERM_TYPE_USER, config, 1632 PARSE_EVENTS__TERM_TYPE_USER, config,
1517 (char *) sym->symbol, 0); 1633 (char *) sym->symbol, 0, 0, 0);
1518 else 1634 else
1519 return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, 1635 return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
1520 PARSE_EVENTS__TERM_TYPE_USER, 1636 PARSE_EVENTS__TERM_TYPE_USER,
1521 (char *) "event", (char *) sym->symbol, 0); 1637 (char *) "event", (char *) sym->symbol,
1638 0, 0, 0);
1522} 1639}
1523 1640
1524int parse_events_term__clone(struct parse_events_term **new, 1641int parse_events_term__clone(struct parse_events_term **new,
1525 struct parse_events_term *term) 1642 struct parse_events_term *term)
1526{ 1643{
1527 return new_term(new, term->type_val, term->type_term, term->config, 1644 return new_term(new, term->type_val, term->type_term, term->config,
1528 term->val.str, term->val.num); 1645 term->val.str, term->val.num,
1646 term->err_term, term->err_val);
1529} 1647}
1530 1648
1531void parse_events__free_terms(struct list_head *terms) 1649void parse_events__free_terms(struct list_head *terms)
@@ -1535,3 +1653,15 @@ void parse_events__free_terms(struct list_head *terms)
1535 list_for_each_entry_safe(term, h, terms, list) 1653 list_for_each_entry_safe(term, h, terms, list)
1536 free(term); 1654 free(term);
1537} 1655}
1656
1657void parse_events_evlist_error(struct parse_events_evlist *data,
1658 int idx, const char *str)
1659{
1660 struct parse_events_error *err = data->error;
1661
1662 if (!err)
1663 return;
1664 err->idx = idx;
1665 err->str = strdup(str);
1666 WARN_ONCE(!err->str, "WARNING: failed to allocate error string");
1667}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 52a2dda4f954..131f29b2f132 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -12,6 +12,7 @@
12struct list_head; 12struct list_head;
13struct perf_evsel; 13struct perf_evsel;
14struct perf_evlist; 14struct perf_evlist;
15struct parse_events_error;
15 16
16struct option; 17struct option;
17 18
@@ -29,7 +30,8 @@ const char *event_type(int type);
29 30
30extern int parse_events_option(const struct option *opt, const char *str, 31extern int parse_events_option(const struct option *opt, const char *str,
31 int unset); 32 int unset);
32extern int parse_events(struct perf_evlist *evlist, const char *str); 33extern int parse_events(struct perf_evlist *evlist, const char *str,
34 struct parse_events_error *error);
33extern int parse_events_terms(struct list_head *terms, const char *str); 35extern int parse_events_terms(struct list_head *terms, const char *str);
34extern int parse_filter(const struct option *opt, const char *str, int unset); 36extern int parse_filter(const struct option *opt, const char *str, int unset);
35 37
@@ -72,12 +74,23 @@ struct parse_events_term {
72 int type_term; 74 int type_term;
73 struct list_head list; 75 struct list_head list;
74 bool used; 76 bool used;
77
78 /* error string indexes for within parsed string */
79 int err_term;
80 int err_val;
81};
82
83struct parse_events_error {
84 int idx; /* index in the parsed string */
85 char *str; /* string to display at the index */
86 char *help; /* optional help string */
75}; 87};
76 88
77struct parse_events_evlist { 89struct parse_events_evlist {
78 struct list_head list; 90 struct list_head list;
79 int idx; 91 int idx;
80 int nr_groups; 92 int nr_groups;
93 struct parse_events_error *error;
81}; 94};
82 95
83struct parse_events_terms { 96struct parse_events_terms {
@@ -85,10 +98,12 @@ struct parse_events_terms {
85}; 98};
86 99
87int parse_events__is_hardcoded_term(struct parse_events_term *term); 100int parse_events__is_hardcoded_term(struct parse_events_term *term);
88int parse_events_term__num(struct parse_events_term **_term, 101int parse_events_term__num(struct parse_events_term **term,
89 int type_term, char *config, u64 num); 102 int type_term, char *config, u64 num,
90int parse_events_term__str(struct parse_events_term **_term, 103 void *loc_term, void *loc_val);
91 int type_term, char *config, char *str); 104int parse_events_term__str(struct parse_events_term **term,
105 int type_term, char *config, char *str,
106 void *loc_term, void *loc_val);
92int parse_events_term__sym_hw(struct parse_events_term **term, 107int parse_events_term__sym_hw(struct parse_events_term **term,
93 char *config, unsigned idx); 108 char *config, unsigned idx);
94int parse_events_term__clone(struct parse_events_term **new, 109int parse_events_term__clone(struct parse_events_term **new,
@@ -99,21 +114,24 @@ int parse_events__modifier_group(struct list_head *list, char *event_mod);
99int parse_events_name(struct list_head *list, char *name); 114int parse_events_name(struct list_head *list, char *name);
100int parse_events_add_tracepoint(struct list_head *list, int *idx, 115int parse_events_add_tracepoint(struct list_head *list, int *idx,
101 char *sys, char *event); 116 char *sys, char *event);
102int parse_events_add_numeric(struct list_head *list, int *idx, 117int parse_events_add_numeric(struct parse_events_evlist *data,
118 struct list_head *list,
103 u32 type, u64 config, 119 u32 type, u64 config,
104 struct list_head *head_config); 120 struct list_head *head_config);
105int parse_events_add_cache(struct list_head *list, int *idx, 121int parse_events_add_cache(struct list_head *list, int *idx,
106 char *type, char *op_result1, char *op_result2); 122 char *type, char *op_result1, char *op_result2);
107int parse_events_add_breakpoint(struct list_head *list, int *idx, 123int parse_events_add_breakpoint(struct list_head *list, int *idx,
108 void *ptr, char *type, u64 len); 124 void *ptr, char *type, u64 len);
109int parse_events_add_pmu(struct list_head *list, int *idx, 125int parse_events_add_pmu(struct parse_events_evlist *data,
110 char *pmu , struct list_head *head_config); 126 struct list_head *list, char *name,
127 struct list_head *head_config);
111enum perf_pmu_event_symbol_type 128enum perf_pmu_event_symbol_type
112perf_pmu__parse_check(const char *name); 129perf_pmu__parse_check(const char *name);
113void parse_events__set_leader(char *name, struct list_head *list); 130void parse_events__set_leader(char *name, struct list_head *list);
114void parse_events_update_lists(struct list_head *list_event, 131void parse_events_update_lists(struct list_head *list_event,
115 struct list_head *list_all); 132 struct list_head *list_all);
116void parse_events_error(void *data, void *scanner, char const *msg); 133void parse_events_evlist_error(struct parse_events_evlist *data,
134 int idx, const char *str);
117 135
118void print_events(const char *event_glob, bool name_only); 136void print_events(const char *event_glob, bool name_only);
119 137
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 8895cf3132ab..09e738fe9ea2 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -3,6 +3,8 @@
3%option bison-bridge 3%option bison-bridge
4%option prefix="parse_events_" 4%option prefix="parse_events_"
5%option stack 5%option stack
6%option bison-locations
7%option yylineno
6 8
7%{ 9%{
8#include <errno.h> 10#include <errno.h>
@@ -51,6 +53,18 @@ static int str(yyscan_t scanner, int token)
51 return token; 53 return token;
52} 54}
53 55
56#define REWIND(__alloc) \
57do { \
58 YYSTYPE *__yylval = parse_events_get_lval(yyscanner); \
59 char *text = parse_events_get_text(yyscanner); \
60 \
61 if (__alloc) \
62 __yylval->str = strdup(text); \
63 \
64 yycolumn -= strlen(text); \
65 yyless(0); \
66} while (0)
67
54static int pmu_str_check(yyscan_t scanner) 68static int pmu_str_check(yyscan_t scanner)
55{ 69{
56 YYSTYPE *yylval = parse_events_get_lval(scanner); 70 YYSTYPE *yylval = parse_events_get_lval(scanner);
@@ -85,6 +99,13 @@ static int term(yyscan_t scanner, int type)
85 return PE_TERM; 99 return PE_TERM;
86} 100}
87 101
102#define YY_USER_ACTION \
103do { \
104 yylloc->last_column = yylloc->first_column; \
105 yylloc->first_column = yycolumn; \
106 yycolumn += yyleng; \
107} while (0);
108
88%} 109%}
89 110
90%x mem 111%x mem
@@ -119,6 +140,12 @@ modifier_bp [rwx]{1,3}
119 140
120 if (start_token) { 141 if (start_token) {
121 parse_events_set_extra(NULL, yyscanner); 142 parse_events_set_extra(NULL, yyscanner);
143 /*
144 * The flex parser does not init locations variable
145 * via the scan_string interface, so we need do the
146 * init in here.
147 */
148 yycolumn = 0;
122 return start_token; 149 return start_token;
123 } 150 }
124 } 151 }
@@ -127,24 +154,30 @@ modifier_bp [rwx]{1,3}
127<event>{ 154<event>{
128 155
129{group} { 156{group} {
130 BEGIN(INITIAL); yyless(0); 157 BEGIN(INITIAL);
158 REWIND(0);
131 } 159 }
132 160
133{event_pmu} | 161{event_pmu} |
134{event} { 162{event} {
135 str(yyscanner, PE_EVENT_NAME); 163 BEGIN(INITIAL);
136 BEGIN(INITIAL); yyless(0); 164 REWIND(1);
137 return PE_EVENT_NAME; 165 return PE_EVENT_NAME;
138 } 166 }
139 167
140. | 168. |
141<<EOF>> { 169<<EOF>> {
142 BEGIN(INITIAL); yyless(0); 170 BEGIN(INITIAL);
171 REWIND(0);
143 } 172 }
144 173
145} 174}
146 175
147<config>{ 176<config>{
177 /*
178 * Please update formats_error_string any time
179 * new static term is added.
180 */
148config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } 181config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
149config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } 182config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
150config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } 183config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 72def077dbbf..591905a02b92 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -2,6 +2,7 @@
2%parse-param {void *_data} 2%parse-param {void *_data}
3%parse-param {void *scanner} 3%parse-param {void *scanner}
4%lex-param {void* scanner} 4%lex-param {void* scanner}
5%locations
5 6
6%{ 7%{
7 8
@@ -14,8 +15,6 @@
14#include "parse-events.h" 15#include "parse-events.h"
15#include "parse-events-bison.h" 16#include "parse-events-bison.h"
16 17
17extern int parse_events_lex (YYSTYPE* lvalp, void* scanner);
18
19#define ABORT_ON(val) \ 18#define ABORT_ON(val) \
20do { \ 19do { \
21 if (val) \ 20 if (val) \
@@ -208,7 +207,7 @@ PE_NAME '/' event_config '/'
208 struct list_head *list; 207 struct list_head *list;
209 208
210 ALLOC_LIST(list); 209 ALLOC_LIST(list);
211 ABORT_ON(parse_events_add_pmu(list, &data->idx, $1, $3)); 210 ABORT_ON(parse_events_add_pmu(data, list, $1, $3));
212 parse_events__free_terms($3); 211 parse_events__free_terms($3);
213 $$ = list; 212 $$ = list;
214} 213}
@@ -219,7 +218,7 @@ PE_NAME '/' '/'
219 struct list_head *list; 218 struct list_head *list;
220 219
221 ALLOC_LIST(list); 220 ALLOC_LIST(list);
222 ABORT_ON(parse_events_add_pmu(list, &data->idx, $1, NULL)); 221 ABORT_ON(parse_events_add_pmu(data, list, $1, NULL));
223 $$ = list; 222 $$ = list;
224} 223}
225| 224|
@@ -232,11 +231,11 @@ PE_KERNEL_PMU_EVENT sep_dc
232 231
233 ALLOC_LIST(head); 232 ALLOC_LIST(head);
234 ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, 233 ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
235 $1, 1)); 234 $1, 1, &@1, NULL));
236 list_add_tail(&term->list, head); 235 list_add_tail(&term->list, head);
237 236
238 ALLOC_LIST(list); 237 ALLOC_LIST(list);
239 ABORT_ON(parse_events_add_pmu(list, &data->idx, "cpu", head)); 238 ABORT_ON(parse_events_add_pmu(data, list, "cpu", head));
240 parse_events__free_terms(head); 239 parse_events__free_terms(head);
241 $$ = list; 240 $$ = list;
242} 241}
@@ -252,7 +251,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
252 251
253 ALLOC_LIST(head); 252 ALLOC_LIST(head);
254 ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, 253 ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
255 &pmu_name, 1)); 254 &pmu_name, 1, &@1, NULL));
256 list_add_tail(&term->list, head); 255 list_add_tail(&term->list, head);
257 256
258 ALLOC_LIST(list); 257 ALLOC_LIST(list);
@@ -275,8 +274,7 @@ value_sym '/' event_config '/'
275 int config = $1 & 255; 274 int config = $1 & 255;
276 275
277 ALLOC_LIST(list); 276 ALLOC_LIST(list);
278 ABORT_ON(parse_events_add_numeric(list, &data->idx, 277 ABORT_ON(parse_events_add_numeric(data, list, type, config, $3));
279 type, config, $3));
280 parse_events__free_terms($3); 278 parse_events__free_terms($3);
281 $$ = list; 279 $$ = list;
282} 280}
@@ -289,8 +287,7 @@ value_sym sep_slash_dc
289 int config = $1 & 255; 287 int config = $1 & 255;
290 288
291 ALLOC_LIST(list); 289 ALLOC_LIST(list);
292 ABORT_ON(parse_events_add_numeric(list, &data->idx, 290 ABORT_ON(parse_events_add_numeric(data, list, type, config, NULL));
293 type, config, NULL));
294 $$ = list; 291 $$ = list;
295} 292}
296 293
@@ -389,7 +386,15 @@ PE_NAME ':' PE_NAME
389 struct list_head *list; 386 struct list_head *list;
390 387
391 ALLOC_LIST(list); 388 ALLOC_LIST(list);
392 ABORT_ON(parse_events_add_tracepoint(list, &data->idx, $1, $3)); 389 if (parse_events_add_tracepoint(list, &data->idx, $1, $3)) {
390 struct parse_events_error *error = data->error;
391
392 if (error) {
393 error->idx = @1.first_column;
394 error->str = strdup("unknown tracepoint");
395 }
396 return -1;
397 }
393 $$ = list; 398 $$ = list;
394} 399}
395 400
@@ -400,7 +405,7 @@ PE_VALUE ':' PE_VALUE
400 struct list_head *list; 405 struct list_head *list;
401 406
402 ALLOC_LIST(list); 407 ALLOC_LIST(list);
403 ABORT_ON(parse_events_add_numeric(list, &data->idx, (u32)$1, $3, NULL)); 408 ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, NULL));
404 $$ = list; 409 $$ = list;
405} 410}
406 411
@@ -411,8 +416,7 @@ PE_RAW
411 struct list_head *list; 416 struct list_head *list;
412 417
413 ALLOC_LIST(list); 418 ALLOC_LIST(list);
414 ABORT_ON(parse_events_add_numeric(list, &data->idx, 419 ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, NULL));
415 PERF_TYPE_RAW, $1, NULL));
416 $$ = list; 420 $$ = list;
417} 421}
418 422
@@ -450,7 +454,7 @@ PE_NAME '=' PE_NAME
450 struct parse_events_term *term; 454 struct parse_events_term *term;
451 455
452 ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, 456 ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
453 $1, $3)); 457 $1, $3, &@1, &@3));
454 $$ = term; 458 $$ = term;
455} 459}
456| 460|
@@ -459,7 +463,7 @@ PE_NAME '=' PE_VALUE
459 struct parse_events_term *term; 463 struct parse_events_term *term;
460 464
461 ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, 465 ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
462 $1, $3)); 466 $1, $3, &@1, &@3));
463 $$ = term; 467 $$ = term;
464} 468}
465| 469|
@@ -477,7 +481,7 @@ PE_NAME
477 struct parse_events_term *term; 481 struct parse_events_term *term;
478 482
479 ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, 483 ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
480 $1, 1)); 484 $1, 1, &@1, NULL));
481 $$ = term; 485 $$ = term;
482} 486}
483| 487|
@@ -494,7 +498,7 @@ PE_TERM '=' PE_NAME
494{ 498{
495 struct parse_events_term *term; 499 struct parse_events_term *term;
496 500
497 ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3)); 501 ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3));
498 $$ = term; 502 $$ = term;
499} 503}
500| 504|
@@ -502,7 +506,7 @@ PE_TERM '=' PE_VALUE
502{ 506{
503 struct parse_events_term *term; 507 struct parse_events_term *term;
504 508
505 ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3)); 509 ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, &@1, &@3));
506 $$ = term; 510 $$ = term;
507} 511}
508| 512|
@@ -510,7 +514,7 @@ PE_TERM
510{ 514{
511 struct parse_events_term *term; 515 struct parse_events_term *term;
512 516
513 ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1)); 517 ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL));
514 $$ = term; 518 $$ = term;
515} 519}
516 520
@@ -520,7 +524,9 @@ sep_slash_dc: '/' | ':' |
520 524
521%% 525%%
522 526
523void parse_events_error(void *data __maybe_unused, void *scanner __maybe_unused, 527void parse_events_error(YYLTYPE *loc, void *data,
528 void *scanner __maybe_unused,
524 char const *msg __maybe_unused) 529 char const *msg __maybe_unused)
525{ 530{
531 parse_events_evlist_error(data, loc->last_column, "parser error");
526} 532}
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index 59561fd86278..367d8b816cc7 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -123,6 +123,10 @@ struct option {
123#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) } 123#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
124#define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) } 124#define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
125#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h) } 125#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h) }
126#define OPT_STRING_OPTARG(s, l, v, a, h, d) \
127 { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
128 .value = check_vtype(v, const char **), (a), .help = (h), \
129 .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
126#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY} 130#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
127#define OPT_DATE(s, l, v, h) \ 131#define OPT_DATE(s, l, v, h) \
128 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } 132 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 48411674da0f..0fcc624eb767 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -112,7 +112,11 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
112 if (sret < 0) 112 if (sret < 0)
113 goto error; 113 goto error;
114 114
115 scale[sret] = '\0'; 115 if (scale[sret - 1] == '\n')
116 scale[sret - 1] = '\0';
117 else
118 scale[sret] = '\0';
119
116 /* 120 /*
117 * save current locale 121 * save current locale
118 */ 122 */
@@ -154,7 +158,10 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n
154 158
155 close(fd); 159 close(fd);
156 160
157 alias->unit[sret] = '\0'; 161 if (alias->unit[sret - 1] == '\n')
162 alias->unit[sret - 1] = '\0';
163 else
164 alias->unit[sret] = '\0';
158 165
159 return 0; 166 return 0;
160error: 167error:
@@ -442,6 +449,10 @@ static struct perf_pmu *pmu_lookup(const char *name)
442 LIST_HEAD(aliases); 449 LIST_HEAD(aliases);
443 __u32 type; 450 __u32 type;
444 451
452 /* No support for intel_bts or intel_pt so disallow them */
453 if (!strcmp(name, "intel_bts") || !strcmp(name, "intel_pt"))
454 return NULL;
455
445 /* 456 /*
446 * The pmu data we store & need consists of the pmu 457 * The pmu data we store & need consists of the pmu
447 * type value and format definitions. Load both right 458 * type value and format definitions. Load both right
@@ -579,6 +590,38 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
579 return -1; 590 return -1;
580} 591}
581 592
593static char *formats_error_string(struct list_head *formats)
594{
595 struct perf_pmu_format *format;
596 char *err, *str;
597 static const char *static_terms = "config,config1,config2,name,period,branch_type\n";
598 unsigned i = 0;
599
600 if (!asprintf(&str, "valid terms:"))
601 return NULL;
602
603 /* sysfs exported terms */
604 list_for_each_entry(format, formats, list) {
605 char c = i++ ? ',' : ' ';
606
607 err = str;
608 if (!asprintf(&str, "%s%c%s", err, c, format->name))
609 goto fail;
610 free(err);
611 }
612
613 /* static terms */
614 err = str;
615 if (!asprintf(&str, "%s,%s", err, static_terms))
616 goto fail;
617
618 free(err);
619 return str;
620fail:
621 free(err);
622 return NULL;
623}
624
582/* 625/*
583 * Setup one of config[12] attr members based on the 626 * Setup one of config[12] attr members based on the
584 * user input data - term parameter. 627 * user input data - term parameter.
@@ -587,7 +630,7 @@ static int pmu_config_term(struct list_head *formats,
587 struct perf_event_attr *attr, 630 struct perf_event_attr *attr,
588 struct parse_events_term *term, 631 struct parse_events_term *term,
589 struct list_head *head_terms, 632 struct list_head *head_terms,
590 bool zero) 633 bool zero, struct parse_events_error *err)
591{ 634{
592 struct perf_pmu_format *format; 635 struct perf_pmu_format *format;
593 __u64 *vp; 636 __u64 *vp;
@@ -611,6 +654,11 @@ static int pmu_config_term(struct list_head *formats,
611 if (!format) { 654 if (!format) {
612 if (verbose) 655 if (verbose)
613 printf("Invalid event/parameter '%s'\n", term->config); 656 printf("Invalid event/parameter '%s'\n", term->config);
657 if (err) {
658 err->idx = term->err_term;
659 err->str = strdup("unknown term");
660 err->help = formats_error_string(formats);
661 }
614 return -EINVAL; 662 return -EINVAL;
615 } 663 }
616 664
@@ -636,9 +684,14 @@ static int pmu_config_term(struct list_head *formats,
636 val = term->val.num; 684 val = term->val.num;
637 else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) { 685 else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
638 if (strcmp(term->val.str, "?")) { 686 if (strcmp(term->val.str, "?")) {
639 if (verbose) 687 if (verbose) {
640 pr_info("Invalid sysfs entry %s=%s\n", 688 pr_info("Invalid sysfs entry %s=%s\n",
641 term->config, term->val.str); 689 term->config, term->val.str);
690 }
691 if (err) {
692 err->idx = term->err_val;
693 err->str = strdup("expected numeric value");
694 }
642 return -EINVAL; 695 return -EINVAL;
643 } 696 }
644 697
@@ -654,12 +707,13 @@ static int pmu_config_term(struct list_head *formats,
654int perf_pmu__config_terms(struct list_head *formats, 707int perf_pmu__config_terms(struct list_head *formats,
655 struct perf_event_attr *attr, 708 struct perf_event_attr *attr,
656 struct list_head *head_terms, 709 struct list_head *head_terms,
657 bool zero) 710 bool zero, struct parse_events_error *err)
658{ 711{
659 struct parse_events_term *term; 712 struct parse_events_term *term;
660 713
661 list_for_each_entry(term, head_terms, list) { 714 list_for_each_entry(term, head_terms, list) {
662 if (pmu_config_term(formats, attr, term, head_terms, zero)) 715 if (pmu_config_term(formats, attr, term, head_terms,
716 zero, err))
663 return -EINVAL; 717 return -EINVAL;
664 } 718 }
665 719
@@ -672,12 +726,14 @@ int perf_pmu__config_terms(struct list_head *formats,
672 * 2) pmu format definitions - specified by pmu parameter 726 * 2) pmu format definitions - specified by pmu parameter
673 */ 727 */
674int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, 728int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
675 struct list_head *head_terms) 729 struct list_head *head_terms,
730 struct parse_events_error *err)
676{ 731{
677 bool zero = !!pmu->default_config; 732 bool zero = !!pmu->default_config;
678 733
679 attr->type = pmu->type; 734 attr->type = pmu->type;
680 return perf_pmu__config_terms(&pmu->format, attr, head_terms, zero); 735 return perf_pmu__config_terms(&pmu->format, attr, head_terms,
736 zero, err);
681} 737}
682 738
683static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, 739static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 6b1249fbdb5f..7b9c8cf8ae3e 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -4,6 +4,7 @@
4#include <linux/bitmap.h> 4#include <linux/bitmap.h>
5#include <linux/perf_event.h> 5#include <linux/perf_event.h>
6#include <stdbool.h> 6#include <stdbool.h>
7#include "parse-events.h"
7 8
8enum { 9enum {
9 PERF_PMU_FORMAT_VALUE_CONFIG, 10 PERF_PMU_FORMAT_VALUE_CONFIG,
@@ -47,11 +48,12 @@ struct perf_pmu_alias {
47 48
48struct perf_pmu *perf_pmu__find(const char *name); 49struct perf_pmu *perf_pmu__find(const char *name);
49int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, 50int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
50 struct list_head *head_terms); 51 struct list_head *head_terms,
52 struct parse_events_error *error);
51int perf_pmu__config_terms(struct list_head *formats, 53int perf_pmu__config_terms(struct list_head *formats,
52 struct perf_event_attr *attr, 54 struct perf_event_attr *attr,
53 struct list_head *head_terms, 55 struct list_head *head_terms,
54 bool zero); 56 bool zero, struct parse_events_error *error);
55int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, 57int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
56 struct perf_pmu_info *info); 58 struct perf_pmu_info *info);
57struct list_head *perf_pmu__alias(struct perf_pmu *pmu, 59struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index d05b77cf35f7..076527b639bd 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -51,6 +51,7 @@
51#define PERFPROBE_GROUP "probe" 51#define PERFPROBE_GROUP "probe"
52 52
53bool probe_event_dry_run; /* Dry run flag */ 53bool probe_event_dry_run; /* Dry run flag */
54struct probe_conf probe_conf;
54 55
55#define semantic_error(msg ...) pr_err("Semantic error :" msg) 56#define semantic_error(msg ...) pr_err("Semantic error :" msg)
56 57
@@ -161,18 +162,18 @@ static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc)
161 162
162static struct map *kernel_get_module_map(const char *module) 163static struct map *kernel_get_module_map(const char *module)
163{ 164{
164 struct rb_node *nd;
165 struct map_groups *grp = &host_machine->kmaps; 165 struct map_groups *grp = &host_machine->kmaps;
166 struct maps *maps = &grp->maps[MAP__FUNCTION];
167 struct map *pos;
166 168
167 /* A file path -- this is an offline module */ 169 /* A file path -- this is an offline module */
168 if (module && strchr(module, '/')) 170 if (module && strchr(module, '/'))
169 return machine__new_module(host_machine, 0, module); 171 return machine__findnew_module_map(host_machine, 0, module);
170 172
171 if (!module) 173 if (!module)
172 module = "kernel"; 174 module = "kernel";
173 175
174 for (nd = rb_first(&grp->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) { 176 for (pos = maps__first(maps); pos; pos = map__next(pos)) {
175 struct map *pos = rb_entry(nd, struct map, rb_node);
176 if (strncmp(pos->dso->short_name + 1, module, 177 if (strncmp(pos->dso->short_name + 1, module,
177 pos->dso->short_name_len - 2) == 0) { 178 pos->dso->short_name_len - 2) == 0) {
178 return pos; 179 return pos;
@@ -194,52 +195,11 @@ static void put_target_map(struct map *map, bool user)
194{ 195{
195 if (map && user) { 196 if (map && user) {
196 /* Only the user map needs to be released */ 197 /* Only the user map needs to be released */
197 dso__delete(map->dso); 198 map__put(map);
198 map__delete(map);
199 } 199 }
200} 200}
201 201
202 202
203static struct dso *kernel_get_module_dso(const char *module)
204{
205 struct dso *dso;
206 struct map *map;
207 const char *vmlinux_name;
208
209 if (module) {
210 list_for_each_entry(dso, &host_machine->kernel_dsos.head,
211 node) {
212 if (strncmp(dso->short_name + 1, module,
213 dso->short_name_len - 2) == 0)
214 goto found;
215 }
216 pr_debug("Failed to find module %s.\n", module);
217 return NULL;
218 }
219
220 map = host_machine->vmlinux_maps[MAP__FUNCTION];
221 dso = map->dso;
222
223 vmlinux_name = symbol_conf.vmlinux_name;
224 if (vmlinux_name) {
225 if (dso__load_vmlinux(dso, map, vmlinux_name, false, NULL) <= 0)
226 return NULL;
227 } else {
228 if (dso__load_vmlinux_path(dso, map, NULL) <= 0) {
229 pr_debug("Failed to load kernel map.\n");
230 return NULL;
231 }
232 }
233found:
234 return dso;
235}
236
237const char *kernel_get_module_path(const char *module)
238{
239 struct dso *dso = kernel_get_module_dso(module);
240 return (dso) ? dso->long_name : NULL;
241}
242
243static int convert_exec_to_group(const char *exec, char **result) 203static int convert_exec_to_group(const char *exec, char **result)
244{ 204{
245 char *ptr1, *ptr2, *exec_copy; 205 char *ptr1, *ptr2, *exec_copy;
@@ -286,7 +246,55 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs)
286 clear_probe_trace_event(tevs + i); 246 clear_probe_trace_event(tevs + i);
287} 247}
288 248
249static bool kprobe_blacklist__listed(unsigned long address);
250static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
251{
252 /* Get the address of _etext for checking non-probable text symbol */
253 if (kernel_get_symbol_address_by_name("_etext", false) < address)
254 pr_warning("%s is out of .text, skip it.\n", symbol);
255 else if (kprobe_blacklist__listed(address))
256 pr_warning("%s is blacklisted function, skip it.\n", symbol);
257 else
258 return false;
259
260 return true;
261}
262
289#ifdef HAVE_DWARF_SUPPORT 263#ifdef HAVE_DWARF_SUPPORT
264
265static int kernel_get_module_dso(const char *module, struct dso **pdso)
266{
267 struct dso *dso;
268 struct map *map;
269 const char *vmlinux_name;
270 int ret = 0;
271
272 if (module) {
273 list_for_each_entry(dso, &host_machine->dsos.head, node) {
274 if (!dso->kernel)
275 continue;
276 if (strncmp(dso->short_name + 1, module,
277 dso->short_name_len - 2) == 0)
278 goto found;
279 }
280 pr_debug("Failed to find module %s.\n", module);
281 return -ENOENT;
282 }
283
284 map = host_machine->vmlinux_maps[MAP__FUNCTION];
285 dso = map->dso;
286
287 vmlinux_name = symbol_conf.vmlinux_name;
288 dso->load_errno = 0;
289 if (vmlinux_name)
290 ret = dso__load_vmlinux(dso, map, vmlinux_name, false, NULL);
291 else
292 ret = dso__load_vmlinux_path(dso, map, NULL);
293found:
294 *pdso = dso;
295 return ret;
296}
297
290/* 298/*
291 * Some binaries like glibc have special symbols which are on the symbol 299 * Some binaries like glibc have special symbols which are on the symbol
292 * table, but not in the debuginfo. If we can find the address of the 300 * table, but not in the debuginfo. If we can find the address of the
@@ -344,15 +352,14 @@ out:
344 352
345static int get_alternative_probe_event(struct debuginfo *dinfo, 353static int get_alternative_probe_event(struct debuginfo *dinfo,
346 struct perf_probe_event *pev, 354 struct perf_probe_event *pev,
347 struct perf_probe_point *tmp, 355 struct perf_probe_point *tmp)
348 const char *target)
349{ 356{
350 int ret; 357 int ret;
351 358
352 memcpy(tmp, &pev->point, sizeof(*tmp)); 359 memcpy(tmp, &pev->point, sizeof(*tmp));
353 memset(&pev->point, 0, sizeof(pev->point)); 360 memset(&pev->point, 0, sizeof(pev->point));
354 ret = find_alternative_probe_point(dinfo, tmp, &pev->point, 361 ret = find_alternative_probe_point(dinfo, tmp, &pev->point,
355 target, pev->uprobes); 362 pev->target, pev->uprobes);
356 if (ret < 0) 363 if (ret < 0)
357 memcpy(&pev->point, tmp, sizeof(*tmp)); 364 memcpy(&pev->point, tmp, sizeof(*tmp));
358 365
@@ -390,16 +397,25 @@ static int get_alternative_line_range(struct debuginfo *dinfo,
390static struct debuginfo *open_debuginfo(const char *module, bool silent) 397static struct debuginfo *open_debuginfo(const char *module, bool silent)
391{ 398{
392 const char *path = module; 399 const char *path = module;
393 struct debuginfo *ret; 400 char reason[STRERR_BUFSIZE];
401 struct debuginfo *ret = NULL;
402 struct dso *dso = NULL;
403 int err;
394 404
395 if (!module || !strchr(module, '/')) { 405 if (!module || !strchr(module, '/')) {
396 path = kernel_get_module_path(module); 406 err = kernel_get_module_dso(module, &dso);
397 if (!path) { 407 if (err < 0) {
408 if (!dso || dso->load_errno == 0) {
409 if (!strerror_r(-err, reason, STRERR_BUFSIZE))
410 strcpy(reason, "(unknown)");
411 } else
412 dso__strerror_load(dso, reason, STRERR_BUFSIZE);
398 if (!silent) 413 if (!silent)
399 pr_err("Failed to find path of %s module.\n", 414 pr_err("Failed to find the path for %s: %s\n",
400 module ?: "kernel"); 415 module ?: "kernel", reason);
401 return NULL; 416 return NULL;
402 } 417 }
418 path = dso->long_name;
403 } 419 }
404 ret = debuginfo__new(path); 420 ret = debuginfo__new(path);
405 if (!ret && !silent) { 421 if (!ret && !silent) {
@@ -413,6 +429,41 @@ static struct debuginfo *open_debuginfo(const char *module, bool silent)
413 return ret; 429 return ret;
414} 430}
415 431
432/* For caching the last debuginfo */
433static struct debuginfo *debuginfo_cache;
434static char *debuginfo_cache_path;
435
436static struct debuginfo *debuginfo_cache__open(const char *module, bool silent)
437{
438 if ((debuginfo_cache_path && !strcmp(debuginfo_cache_path, module)) ||
439 (!debuginfo_cache_path && !module && debuginfo_cache))
440 goto out;
441
442 /* Copy module path */
443 free(debuginfo_cache_path);
444 if (module) {
445 debuginfo_cache_path = strdup(module);
446 if (!debuginfo_cache_path) {
447 debuginfo__delete(debuginfo_cache);
448 debuginfo_cache = NULL;
449 goto out;
450 }
451 }
452
453 debuginfo_cache = open_debuginfo(module, silent);
454 if (!debuginfo_cache)
455 zfree(&debuginfo_cache_path);
456out:
457 return debuginfo_cache;
458}
459
460static void debuginfo_cache__exit(void)
461{
462 debuginfo__delete(debuginfo_cache);
463 debuginfo_cache = NULL;
464 zfree(&debuginfo_cache_path);
465}
466
416 467
417static int get_text_start_address(const char *exec, unsigned long *address) 468static int get_text_start_address(const char *exec, unsigned long *address)
418{ 469{
@@ -474,12 +525,11 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
474 pr_debug("try to find information at %" PRIx64 " in %s\n", addr, 525 pr_debug("try to find information at %" PRIx64 " in %s\n", addr,
475 tp->module ? : "kernel"); 526 tp->module ? : "kernel");
476 527
477 dinfo = open_debuginfo(tp->module, verbose == 0); 528 dinfo = debuginfo_cache__open(tp->module, verbose == 0);
478 if (dinfo) { 529 if (dinfo)
479 ret = debuginfo__find_probe_point(dinfo, 530 ret = debuginfo__find_probe_point(dinfo,
480 (unsigned long)addr, pp); 531 (unsigned long)addr, pp);
481 debuginfo__delete(dinfo); 532 else
482 } else
483 ret = -ENOENT; 533 ret = -ENOENT;
484 534
485 if (ret > 0) { 535 if (ret > 0) {
@@ -558,7 +608,7 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
558{ 608{
559 struct ref_reloc_sym *reloc_sym; 609 struct ref_reloc_sym *reloc_sym;
560 char *tmp; 610 char *tmp;
561 int i; 611 int i, skipped = 0;
562 612
563 if (uprobe) 613 if (uprobe)
564 return add_exec_to_probe_trace_events(tevs, ntevs, module); 614 return add_exec_to_probe_trace_events(tevs, ntevs, module);
@@ -574,31 +624,40 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
574 } 624 }
575 625
576 for (i = 0; i < ntevs; i++) { 626 for (i = 0; i < ntevs; i++) {
577 if (tevs[i].point.address && !tevs[i].point.retprobe) { 627 if (!tevs[i].point.address || tevs[i].point.retprobe)
628 continue;
629 /* If we found a wrong one, mark it by NULL symbol */
630 if (kprobe_warn_out_range(tevs[i].point.symbol,
631 tevs[i].point.address)) {
632 tmp = NULL;
633 skipped++;
634 } else {
578 tmp = strdup(reloc_sym->name); 635 tmp = strdup(reloc_sym->name);
579 if (!tmp) 636 if (!tmp)
580 return -ENOMEM; 637 return -ENOMEM;
581 free(tevs[i].point.symbol);
582 tevs[i].point.symbol = tmp;
583 tevs[i].point.offset = tevs[i].point.address -
584 reloc_sym->unrelocated_addr;
585 } 638 }
639 /* If we have no realname, use symbol for it */
640 if (!tevs[i].point.realname)
641 tevs[i].point.realname = tevs[i].point.symbol;
642 else
643 free(tevs[i].point.symbol);
644 tevs[i].point.symbol = tmp;
645 tevs[i].point.offset = tevs[i].point.address -
646 reloc_sym->unrelocated_addr;
586 } 647 }
587 return 0; 648 return skipped;
588} 649}
589 650
590/* Try to find perf_probe_event with debuginfo */ 651/* Try to find perf_probe_event with debuginfo */
591static int try_to_find_probe_trace_events(struct perf_probe_event *pev, 652static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
592 struct probe_trace_event **tevs, 653 struct probe_trace_event **tevs)
593 int max_tevs, const char *target)
594{ 654{
595 bool need_dwarf = perf_probe_event_need_dwarf(pev); 655 bool need_dwarf = perf_probe_event_need_dwarf(pev);
596 struct perf_probe_point tmp; 656 struct perf_probe_point tmp;
597 struct debuginfo *dinfo; 657 struct debuginfo *dinfo;
598 int ntevs, ret = 0; 658 int ntevs, ret = 0;
599 659
600 dinfo = open_debuginfo(target, !need_dwarf); 660 dinfo = open_debuginfo(pev->target, !need_dwarf);
601
602 if (!dinfo) { 661 if (!dinfo) {
603 if (need_dwarf) 662 if (need_dwarf)
604 return -ENOENT; 663 return -ENOENT;
@@ -608,13 +667,12 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
608 667
609 pr_debug("Try to find probe point from debuginfo.\n"); 668 pr_debug("Try to find probe point from debuginfo.\n");
610 /* Searching trace events corresponding to a probe event */ 669 /* Searching trace events corresponding to a probe event */
611 ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs); 670 ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
612 671
613 if (ntevs == 0) { /* Not found, retry with an alternative */ 672 if (ntevs == 0) { /* Not found, retry with an alternative */
614 ret = get_alternative_probe_event(dinfo, pev, &tmp, target); 673 ret = get_alternative_probe_event(dinfo, pev, &tmp);
615 if (!ret) { 674 if (!ret) {
616 ntevs = debuginfo__find_trace_events(dinfo, pev, 675 ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
617 tevs, max_tevs);
618 /* 676 /*
619 * Write back to the original probe_event for 677 * Write back to the original probe_event for
620 * setting appropriate (user given) event name 678 * setting appropriate (user given) event name
@@ -629,12 +687,15 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
629 if (ntevs > 0) { /* Succeeded to find trace events */ 687 if (ntevs > 0) { /* Succeeded to find trace events */
630 pr_debug("Found %d probe_trace_events.\n", ntevs); 688 pr_debug("Found %d probe_trace_events.\n", ntevs);
631 ret = post_process_probe_trace_events(*tevs, ntevs, 689 ret = post_process_probe_trace_events(*tevs, ntevs,
632 target, pev->uprobes); 690 pev->target, pev->uprobes);
633 if (ret < 0) { 691 if (ret < 0 || ret == ntevs) {
634 clear_probe_trace_events(*tevs, ntevs); 692 clear_probe_trace_events(*tevs, ntevs);
635 zfree(tevs); 693 zfree(tevs);
636 } 694 }
637 return ret < 0 ? ret : ntevs; 695 if (ret != ntevs)
696 return ret < 0 ? ret : ntevs;
697 ntevs = 0;
698 /* Fall through */
638 } 699 }
639 700
640 if (ntevs == 0) { /* No error but failed to find probe point. */ 701 if (ntevs == 0) { /* No error but failed to find probe point. */
@@ -809,8 +870,7 @@ int show_line_range(struct line_range *lr, const char *module, bool user)
809 870
810static int show_available_vars_at(struct debuginfo *dinfo, 871static int show_available_vars_at(struct debuginfo *dinfo,
811 struct perf_probe_event *pev, 872 struct perf_probe_event *pev,
812 int max_vls, struct strfilter *_filter, 873 struct strfilter *_filter)
813 bool externs, const char *target)
814{ 874{
815 char *buf; 875 char *buf;
816 int ret, i, nvars; 876 int ret, i, nvars;
@@ -824,13 +884,12 @@ static int show_available_vars_at(struct debuginfo *dinfo,
824 return -EINVAL; 884 return -EINVAL;
825 pr_debug("Searching variables at %s\n", buf); 885 pr_debug("Searching variables at %s\n", buf);
826 886
827 ret = debuginfo__find_available_vars_at(dinfo, pev, &vls, 887 ret = debuginfo__find_available_vars_at(dinfo, pev, &vls);
828 max_vls, externs);
829 if (!ret) { /* Not found, retry with an alternative */ 888 if (!ret) { /* Not found, retry with an alternative */
830 ret = get_alternative_probe_event(dinfo, pev, &tmp, target); 889 ret = get_alternative_probe_event(dinfo, pev, &tmp);
831 if (!ret) { 890 if (!ret) {
832 ret = debuginfo__find_available_vars_at(dinfo, pev, 891 ret = debuginfo__find_available_vars_at(dinfo, pev,
833 &vls, max_vls, externs); 892 &vls);
834 /* Release the old probe_point */ 893 /* Release the old probe_point */
835 clear_perf_probe_point(&tmp); 894 clear_perf_probe_point(&tmp);
836 } 895 }
@@ -877,8 +936,7 @@ end:
877 936
878/* Show available variables on given probe point */ 937/* Show available variables on given probe point */
879int show_available_vars(struct perf_probe_event *pevs, int npevs, 938int show_available_vars(struct perf_probe_event *pevs, int npevs,
880 int max_vls, const char *module, 939 struct strfilter *_filter)
881 struct strfilter *_filter, bool externs)
882{ 940{
883 int i, ret = 0; 941 int i, ret = 0;
884 struct debuginfo *dinfo; 942 struct debuginfo *dinfo;
@@ -887,7 +945,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
887 if (ret < 0) 945 if (ret < 0)
888 return ret; 946 return ret;
889 947
890 dinfo = open_debuginfo(module, false); 948 dinfo = open_debuginfo(pevs->target, false);
891 if (!dinfo) { 949 if (!dinfo) {
892 ret = -ENOENT; 950 ret = -ENOENT;
893 goto out; 951 goto out;
@@ -896,8 +954,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
896 setup_pager(); 954 setup_pager();
897 955
898 for (i = 0; i < npevs && ret >= 0; i++) 956 for (i = 0; i < npevs && ret >= 0; i++)
899 ret = show_available_vars_at(dinfo, &pevs[i], max_vls, _filter, 957 ret = show_available_vars_at(dinfo, &pevs[i], _filter);
900 externs, module);
901 958
902 debuginfo__delete(dinfo); 959 debuginfo__delete(dinfo);
903out: 960out:
@@ -907,6 +964,10 @@ out:
907 964
908#else /* !HAVE_DWARF_SUPPORT */ 965#else /* !HAVE_DWARF_SUPPORT */
909 966
967static void debuginfo_cache__exit(void)
968{
969}
970
910static int 971static int
911find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused, 972find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
912 struct perf_probe_point *pp __maybe_unused, 973 struct perf_probe_point *pp __maybe_unused,
@@ -916,9 +977,7 @@ find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
916} 977}
917 978
918static int try_to_find_probe_trace_events(struct perf_probe_event *pev, 979static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
919 struct probe_trace_event **tevs __maybe_unused, 980 struct probe_trace_event **tevs __maybe_unused)
920 int max_tevs __maybe_unused,
921 const char *target __maybe_unused)
922{ 981{
923 if (perf_probe_event_need_dwarf(pev)) { 982 if (perf_probe_event_need_dwarf(pev)) {
924 pr_warning("Debuginfo-analysis is not supported.\n"); 983 pr_warning("Debuginfo-analysis is not supported.\n");
@@ -937,10 +996,8 @@ int show_line_range(struct line_range *lr __maybe_unused,
937} 996}
938 997
939int show_available_vars(struct perf_probe_event *pevs __maybe_unused, 998int show_available_vars(struct perf_probe_event *pevs __maybe_unused,
940 int npevs __maybe_unused, int max_vls __maybe_unused, 999 int npevs __maybe_unused,
941 const char *module __maybe_unused, 1000 struct strfilter *filter __maybe_unused)
942 struct strfilter *filter __maybe_unused,
943 bool externs __maybe_unused)
944{ 1001{
945 pr_warning("Debuginfo-analysis is not supported.\n"); 1002 pr_warning("Debuginfo-analysis is not supported.\n");
946 return -ENOSYS; 1003 return -ENOSYS;
@@ -980,6 +1037,18 @@ static int parse_line_num(char **ptr, int *val, const char *what)
980 return 0; 1037 return 0;
981} 1038}
982 1039
1040/* Check the name is good for event, group or function */
1041static bool is_c_func_name(const char *name)
1042{
1043 if (!isalpha(*name) && *name != '_')
1044 return false;
1045 while (*++name != '\0') {
1046 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
1047 return false;
1048 }
1049 return true;
1050}
1051
983/* 1052/*
984 * Stuff 'lr' according to the line range described by 'arg'. 1053 * Stuff 'lr' according to the line range described by 'arg'.
985 * The line range syntax is described by: 1054 * The line range syntax is described by:
@@ -1048,10 +1117,15 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
1048 goto err; 1117 goto err;
1049 } 1118 }
1050 lr->function = name; 1119 lr->function = name;
1051 } else if (strchr(name, '.')) 1120 } else if (strchr(name, '/') || strchr(name, '.'))
1052 lr->file = name; 1121 lr->file = name;
1053 else 1122 else if (is_c_func_name(name))/* We reuse it for checking funcname */
1054 lr->function = name; 1123 lr->function = name;
1124 else { /* Invalid name */
1125 semantic_error("'%s' is not a valid function name.\n", name);
1126 err = -EINVAL;
1127 goto err;
1128 }
1055 1129
1056 return 0; 1130 return 0;
1057err: 1131err:
@@ -1059,24 +1133,13 @@ err:
1059 return err; 1133 return err;
1060} 1134}
1061 1135
1062/* Check the name is good for event/group */
1063static bool check_event_name(const char *name)
1064{
1065 if (!isalpha(*name) && *name != '_')
1066 return false;
1067 while (*++name != '\0') {
1068 if (!isalpha(*name) && !isdigit(*name) && *name != '_')
1069 return false;
1070 }
1071 return true;
1072}
1073
1074/* Parse probepoint definition. */ 1136/* Parse probepoint definition. */
1075static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) 1137static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
1076{ 1138{
1077 struct perf_probe_point *pp = &pev->point; 1139 struct perf_probe_point *pp = &pev->point;
1078 char *ptr, *tmp; 1140 char *ptr, *tmp;
1079 char c, nc = 0; 1141 char c, nc = 0;
1142 bool file_spec = false;
1080 /* 1143 /*
1081 * <Syntax> 1144 * <Syntax>
1082 * perf probe [EVENT=]SRC[:LN|;PTN] 1145 * perf probe [EVENT=]SRC[:LN|;PTN]
@@ -1095,7 +1158,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
1095 semantic_error("Group name is not supported yet.\n"); 1158 semantic_error("Group name is not supported yet.\n");
1096 return -ENOTSUP; 1159 return -ENOTSUP;
1097 } 1160 }
1098 if (!check_event_name(arg)) { 1161 if (!is_c_func_name(arg)) {
1099 semantic_error("%s is bad for event name -it must " 1162 semantic_error("%s is bad for event name -it must "
1100 "follow C symbol-naming rule.\n", arg); 1163 "follow C symbol-naming rule.\n", arg);
1101 return -EINVAL; 1164 return -EINVAL;
@@ -1107,6 +1170,23 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
1107 arg = tmp; 1170 arg = tmp;
1108 } 1171 }
1109 1172
1173 /*
1174 * Check arg is function or file name and copy it.
1175 *
1176 * We consider arg to be a file spec if and only if it satisfies
1177 * all of the below criteria::
1178 * - it does not include any of "+@%",
1179 * - it includes one of ":;", and
1180 * - it has a period '.' in the name.
1181 *
1182 * Otherwise, we consider arg to be a function specification.
1183 */
1184 if (!strpbrk(arg, "+@%") && (ptr = strpbrk(arg, ";:")) != NULL) {
1185 /* This is a file spec if it includes a '.' before ; or : */
1186 if (memchr(arg, '.', ptr - arg))
1187 file_spec = true;
1188 }
1189
1110 ptr = strpbrk(arg, ";:+@%"); 1190 ptr = strpbrk(arg, ";:+@%");
1111 if (ptr) { 1191 if (ptr) {
1112 nc = *ptr; 1192 nc = *ptr;
@@ -1117,10 +1197,9 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
1117 if (tmp == NULL) 1197 if (tmp == NULL)
1118 return -ENOMEM; 1198 return -ENOMEM;
1119 1199
1120 /* Check arg is function or file and copy it */ 1200 if (file_spec)
1121 if (strchr(tmp, '.')) /* File */
1122 pp->file = tmp; 1201 pp->file = tmp;
1123 else /* Function */ 1202 else
1124 pp->function = tmp; 1203 pp->function = tmp;
1125 1204
1126 /* Parse other options */ 1205 /* Parse other options */
@@ -1762,8 +1841,7 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
1762 1841
1763out: 1842out:
1764 if (map && !is_kprobe) { 1843 if (map && !is_kprobe) {
1765 dso__delete(map->dso); 1844 map__put(map);
1766 map__delete(map);
1767 } 1845 }
1768 1846
1769 return ret; 1847 return ret;
@@ -1877,6 +1955,7 @@ static void clear_probe_trace_event(struct probe_trace_event *tev)
1877 free(tev->event); 1955 free(tev->event);
1878 free(tev->group); 1956 free(tev->group);
1879 free(tev->point.symbol); 1957 free(tev->point.symbol);
1958 free(tev->point.realname);
1880 free(tev->point.module); 1959 free(tev->point.module);
1881 for (i = 0; i < tev->nargs; i++) { 1960 for (i = 0; i < tev->nargs; i++) {
1882 free(tev->args[i].name); 1961 free(tev->args[i].name);
@@ -1954,7 +2033,7 @@ static int open_probe_events(const char *trace_file, bool readwrite)
1954 if (ret >= 0) { 2033 if (ret >= 0) {
1955 pr_debug("Opening %s write=%d\n", buf, readwrite); 2034 pr_debug("Opening %s write=%d\n", buf, readwrite);
1956 if (readwrite && !probe_event_dry_run) 2035 if (readwrite && !probe_event_dry_run)
1957 ret = open(buf, O_RDWR, O_APPEND); 2036 ret = open(buf, O_RDWR | O_APPEND, 0);
1958 else 2037 else
1959 ret = open(buf, O_RDONLY, 0); 2038 ret = open(buf, O_RDONLY, 0);
1960 2039
@@ -2095,9 +2174,31 @@ kprobe_blacklist__find_by_address(struct list_head *blacklist,
2095 return NULL; 2174 return NULL;
2096} 2175}
2097 2176
2098/* Show an event */ 2177static LIST_HEAD(kprobe_blacklist);
2099static int show_perf_probe_event(struct perf_probe_event *pev, 2178
2100 const char *module) 2179static void kprobe_blacklist__init(void)
2180{
2181 if (!list_empty(&kprobe_blacklist))
2182 return;
2183
2184 if (kprobe_blacklist__load(&kprobe_blacklist) < 0)
2185 pr_debug("No kprobe blacklist support, ignored\n");
2186}
2187
2188static void kprobe_blacklist__release(void)
2189{
2190 kprobe_blacklist__delete(&kprobe_blacklist);
2191}
2192
2193static bool kprobe_blacklist__listed(unsigned long address)
2194{
2195 return !!kprobe_blacklist__find_by_address(&kprobe_blacklist, address);
2196}
2197
2198static int perf_probe_event__sprintf(const char *group, const char *event,
2199 struct perf_probe_event *pev,
2200 const char *module,
2201 struct strbuf *result)
2101{ 2202{
2102 int i, ret; 2203 int i, ret;
2103 char buf[128]; 2204 char buf[128];
@@ -2108,30 +2209,67 @@ static int show_perf_probe_event(struct perf_probe_event *pev,
2108 if (!place) 2209 if (!place)
2109 return -EINVAL; 2210 return -EINVAL;
2110 2211
2111 ret = e_snprintf(buf, 128, "%s:%s", pev->group, pev->event); 2212 ret = e_snprintf(buf, 128, "%s:%s", group, event);
2112 if (ret < 0) 2213 if (ret < 0)
2113 return ret; 2214 goto out;
2114 2215
2115 pr_info(" %-20s (on %s", buf, place); 2216 strbuf_addf(result, " %-20s (on %s", buf, place);
2116 if (module) 2217 if (module)
2117 pr_info(" in %s", module); 2218 strbuf_addf(result, " in %s", module);
2118 2219
2119 if (pev->nargs > 0) { 2220 if (pev->nargs > 0) {
2120 pr_info(" with"); 2221 strbuf_addstr(result, " with");
2121 for (i = 0; i < pev->nargs; i++) { 2222 for (i = 0; i < pev->nargs; i++) {
2122 ret = synthesize_perf_probe_arg(&pev->args[i], 2223 ret = synthesize_perf_probe_arg(&pev->args[i],
2123 buf, 128); 2224 buf, 128);
2124 if (ret < 0) 2225 if (ret < 0)
2125 break; 2226 goto out;
2126 pr_info(" %s", buf); 2227 strbuf_addf(result, " %s", buf);
2127 } 2228 }
2128 } 2229 }
2129 pr_info(")\n"); 2230 strbuf_addch(result, ')');
2231out:
2130 free(place); 2232 free(place);
2131 return ret; 2233 return ret;
2132} 2234}
2133 2235
2134static int __show_perf_probe_events(int fd, bool is_kprobe) 2236/* Show an event */
2237static int show_perf_probe_event(const char *group, const char *event,
2238 struct perf_probe_event *pev,
2239 const char *module, bool use_stdout)
2240{
2241 struct strbuf buf = STRBUF_INIT;
2242 int ret;
2243
2244 ret = perf_probe_event__sprintf(group, event, pev, module, &buf);
2245 if (ret >= 0) {
2246 if (use_stdout)
2247 printf("%s\n", buf.buf);
2248 else
2249 pr_info("%s\n", buf.buf);
2250 }
2251 strbuf_release(&buf);
2252
2253 return ret;
2254}
2255
2256static bool filter_probe_trace_event(struct probe_trace_event *tev,
2257 struct strfilter *filter)
2258{
2259 char tmp[128];
2260
2261 /* At first, check the event name itself */
2262 if (strfilter__compare(filter, tev->event))
2263 return true;
2264
2265 /* Next, check the combination of name and group */
2266 if (e_snprintf(tmp, 128, "%s:%s", tev->group, tev->event) < 0)
2267 return false;
2268 return strfilter__compare(filter, tmp);
2269}
2270
2271static int __show_perf_probe_events(int fd, bool is_kprobe,
2272 struct strfilter *filter)
2135{ 2273{
2136 int ret = 0; 2274 int ret = 0;
2137 struct probe_trace_event tev; 2275 struct probe_trace_event tev;
@@ -2149,24 +2287,31 @@ static int __show_perf_probe_events(int fd, bool is_kprobe)
2149 strlist__for_each(ent, rawlist) { 2287 strlist__for_each(ent, rawlist) {
2150 ret = parse_probe_trace_command(ent->s, &tev); 2288 ret = parse_probe_trace_command(ent->s, &tev);
2151 if (ret >= 0) { 2289 if (ret >= 0) {
2290 if (!filter_probe_trace_event(&tev, filter))
2291 goto next;
2152 ret = convert_to_perf_probe_event(&tev, &pev, 2292 ret = convert_to_perf_probe_event(&tev, &pev,
2153 is_kprobe); 2293 is_kprobe);
2154 if (ret >= 0) 2294 if (ret < 0)
2155 ret = show_perf_probe_event(&pev, 2295 goto next;
2156 tev.point.module); 2296 ret = show_perf_probe_event(pev.group, pev.event,
2297 &pev, tev.point.module,
2298 true);
2157 } 2299 }
2300next:
2158 clear_perf_probe_event(&pev); 2301 clear_perf_probe_event(&pev);
2159 clear_probe_trace_event(&tev); 2302 clear_probe_trace_event(&tev);
2160 if (ret < 0) 2303 if (ret < 0)
2161 break; 2304 break;
2162 } 2305 }
2163 strlist__delete(rawlist); 2306 strlist__delete(rawlist);
2307 /* Cleanup cached debuginfo if needed */
2308 debuginfo_cache__exit();
2164 2309
2165 return ret; 2310 return ret;
2166} 2311}
2167 2312
2168/* List up current perf-probe events */ 2313/* List up current perf-probe events */
2169int show_perf_probe_events(void) 2314int show_perf_probe_events(struct strfilter *filter)
2170{ 2315{
2171 int kp_fd, up_fd, ret; 2316 int kp_fd, up_fd, ret;
2172 2317
@@ -2178,7 +2323,7 @@ int show_perf_probe_events(void)
2178 2323
2179 kp_fd = open_kprobe_events(false); 2324 kp_fd = open_kprobe_events(false);
2180 if (kp_fd >= 0) { 2325 if (kp_fd >= 0) {
2181 ret = __show_perf_probe_events(kp_fd, true); 2326 ret = __show_perf_probe_events(kp_fd, true, filter);
2182 close(kp_fd); 2327 close(kp_fd);
2183 if (ret < 0) 2328 if (ret < 0)
2184 goto out; 2329 goto out;
@@ -2192,7 +2337,7 @@ int show_perf_probe_events(void)
2192 } 2337 }
2193 2338
2194 if (up_fd >= 0) { 2339 if (up_fd >= 0) {
2195 ret = __show_perf_probe_events(up_fd, false); 2340 ret = __show_perf_probe_events(up_fd, false, filter);
2196 close(up_fd); 2341 close(up_fd);
2197 } 2342 }
2198out: 2343out:
@@ -2266,6 +2411,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
2266 struct strlist *namelist, bool allow_suffix) 2411 struct strlist *namelist, bool allow_suffix)
2267{ 2412{
2268 int i, ret; 2413 int i, ret;
2414 char *p;
2415
2416 if (*base == '.')
2417 base++;
2269 2418
2270 /* Try no suffix */ 2419 /* Try no suffix */
2271 ret = e_snprintf(buf, len, "%s", base); 2420 ret = e_snprintf(buf, len, "%s", base);
@@ -2273,6 +2422,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
2273 pr_debug("snprintf() failed: %d\n", ret); 2422 pr_debug("snprintf() failed: %d\n", ret);
2274 return ret; 2423 return ret;
2275 } 2424 }
2425 /* Cut off the postfixes (e.g. .const, .isra)*/
2426 p = strchr(buf, '.');
2427 if (p && p != buf)
2428 *p = '\0';
2276 if (!strlist__has_entry(namelist, buf)) 2429 if (!strlist__has_entry(namelist, buf))
2277 return 0; 2430 return 0;
2278 2431
@@ -2328,10 +2481,9 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
2328 int i, fd, ret; 2481 int i, fd, ret;
2329 struct probe_trace_event *tev = NULL; 2482 struct probe_trace_event *tev = NULL;
2330 char buf[64]; 2483 char buf[64];
2331 const char *event, *group; 2484 const char *event = NULL, *group = NULL;
2332 struct strlist *namelist; 2485 struct strlist *namelist;
2333 LIST_HEAD(blacklist); 2486 bool safename;
2334 struct kprobe_blacklist_node *node;
2335 2487
2336 if (pev->uprobes) 2488 if (pev->uprobes)
2337 fd = open_uprobe_events(true); 2489 fd = open_uprobe_events(true);
@@ -2347,34 +2499,26 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
2347 namelist = get_probe_trace_event_names(fd, false); 2499 namelist = get_probe_trace_event_names(fd, false);
2348 if (!namelist) { 2500 if (!namelist) {
2349 pr_debug("Failed to get current event list.\n"); 2501 pr_debug("Failed to get current event list.\n");
2350 return -EIO; 2502 ret = -ENOMEM;
2351 } 2503 goto close_out;
2352 /* Get kprobe blacklist if exists */
2353 if (!pev->uprobes) {
2354 ret = kprobe_blacklist__load(&blacklist);
2355 if (ret < 0)
2356 pr_debug("No kprobe blacklist support, ignored\n");
2357 } 2504 }
2358 2505
2506 safename = (pev->point.function && !strisglob(pev->point.function));
2359 ret = 0; 2507 ret = 0;
2360 pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); 2508 pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
2361 for (i = 0; i < ntevs; i++) { 2509 for (i = 0; i < ntevs; i++) {
2362 tev = &tevs[i]; 2510 tev = &tevs[i];
2363 /* Ensure that the address is NOT blacklisted */ 2511 /* Skip if the symbol is out of .text or blacklisted */
2364 node = kprobe_blacklist__find_by_address(&blacklist, 2512 if (!tev->point.symbol)
2365 tev->point.address);
2366 if (node) {
2367 pr_warning("Warning: Skipped probing on blacklisted function: %s\n", node->symbol);
2368 continue; 2513 continue;
2369 }
2370 2514
2371 if (pev->event) 2515 if (pev->event)
2372 event = pev->event; 2516 event = pev->event;
2373 else 2517 else
2374 if (pev->point.function) 2518 if (safename)
2375 event = pev->point.function; 2519 event = pev->point.function;
2376 else 2520 else
2377 event = tev->point.symbol; 2521 event = tev->point.realname;
2378 if (pev->group) 2522 if (pev->group)
2379 group = pev->group; 2523 group = pev->group;
2380 else 2524 else
@@ -2399,15 +2543,12 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
2399 /* Add added event name to namelist */ 2543 /* Add added event name to namelist */
2400 strlist__add(namelist, event); 2544 strlist__add(namelist, event);
2401 2545
2402 /* Trick here - save current event/group */ 2546 /* We use tev's name for showing new events */
2403 event = pev->event; 2547 show_perf_probe_event(tev->group, tev->event, pev,
2404 group = pev->group; 2548 tev->point.module, false);
2405 pev->event = tev->event; 2549 /* Save the last valid name */
2406 pev->group = tev->group; 2550 event = tev->event;
2407 show_perf_probe_event(pev, tev->point.module); 2551 group = tev->group;
2408 /* Trick here - restore current event/group */
2409 pev->event = (char *)event;
2410 pev->group = (char *)group;
2411 2552
2412 /* 2553 /*
2413 * Probes after the first probe which comes from same 2554 * Probes after the first probe which comes from same
@@ -2421,26 +2562,34 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
2421 warn_uprobe_event_compat(tev); 2562 warn_uprobe_event_compat(tev);
2422 2563
2423 /* Note that it is possible to skip all events because of blacklist */ 2564 /* Note that it is possible to skip all events because of blacklist */
2424 if (ret >= 0 && tev->event) { 2565 if (ret >= 0 && event) {
2425 /* Show how to use the event. */ 2566 /* Show how to use the event. */
2426 pr_info("\nYou can now use it in all perf tools, such as:\n\n"); 2567 pr_info("\nYou can now use it in all perf tools, such as:\n\n");
2427 pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, 2568 pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event);
2428 tev->event);
2429 } 2569 }
2430 2570
2431 kprobe_blacklist__delete(&blacklist);
2432 strlist__delete(namelist); 2571 strlist__delete(namelist);
2572close_out:
2433 close(fd); 2573 close(fd);
2434 return ret; 2574 return ret;
2435} 2575}
2436 2576
2437static int find_probe_functions(struct map *map, char *name) 2577static int find_probe_functions(struct map *map, char *name,
2578 struct symbol **syms)
2438{ 2579{
2439 int found = 0; 2580 int found = 0;
2440 struct symbol *sym; 2581 struct symbol *sym;
2582 struct rb_node *tmp;
2583
2584 if (map__load(map, NULL) < 0)
2585 return 0;
2441 2586
2442 map__for_each_symbol_by_name(map, name, sym) { 2587 map__for_each_symbol(map, sym, tmp) {
2443 found++; 2588 if (strglobmatch(sym->name, name)) {
2589 found++;
2590 if (syms && found < probe_conf.max_probes)
2591 syms[found - 1] = sym;
2592 }
2444 } 2593 }
2445 2594
2446 return found; 2595 return found;
@@ -2449,42 +2598,52 @@ static int find_probe_functions(struct map *map, char *name)
2449#define strdup_or_goto(str, label) \ 2598#define strdup_or_goto(str, label) \
2450 ({ char *__p = strdup(str); if (!__p) goto label; __p; }) 2599 ({ char *__p = strdup(str); if (!__p) goto label; __p; })
2451 2600
2601void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
2602 struct probe_trace_event *tev __maybe_unused,
2603 struct map *map __maybe_unused) { }
2604
2452/* 2605/*
2453 * Find probe function addresses from map. 2606 * Find probe function addresses from map.
2454 * Return an error or the number of found probe_trace_event 2607 * Return an error or the number of found probe_trace_event
2455 */ 2608 */
2456static int find_probe_trace_events_from_map(struct perf_probe_event *pev, 2609static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
2457 struct probe_trace_event **tevs, 2610 struct probe_trace_event **tevs)
2458 int max_tevs, const char *target)
2459{ 2611{
2460 struct map *map = NULL; 2612 struct map *map = NULL;
2461 struct ref_reloc_sym *reloc_sym = NULL; 2613 struct ref_reloc_sym *reloc_sym = NULL;
2462 struct symbol *sym; 2614 struct symbol *sym;
2615 struct symbol **syms = NULL;
2463 struct probe_trace_event *tev; 2616 struct probe_trace_event *tev;
2464 struct perf_probe_point *pp = &pev->point; 2617 struct perf_probe_point *pp = &pev->point;
2465 struct probe_trace_point *tp; 2618 struct probe_trace_point *tp;
2466 int num_matched_functions; 2619 int num_matched_functions;
2467 int ret, i; 2620 int ret, i, j, skipped = 0;
2468 2621
2469 map = get_target_map(target, pev->uprobes); 2622 map = get_target_map(pev->target, pev->uprobes);
2470 if (!map) { 2623 if (!map) {
2471 ret = -EINVAL; 2624 ret = -EINVAL;
2472 goto out; 2625 goto out;
2473 } 2626 }
2474 2627
2628 syms = malloc(sizeof(struct symbol *) * probe_conf.max_probes);
2629 if (!syms) {
2630 ret = -ENOMEM;
2631 goto out;
2632 }
2633
2475 /* 2634 /*
2476 * Load matched symbols: Since the different local symbols may have 2635 * Load matched symbols: Since the different local symbols may have
2477 * same name but different addresses, this lists all the symbols. 2636 * same name but different addresses, this lists all the symbols.
2478 */ 2637 */
2479 num_matched_functions = find_probe_functions(map, pp->function); 2638 num_matched_functions = find_probe_functions(map, pp->function, syms);
2480 if (num_matched_functions == 0) { 2639 if (num_matched_functions == 0) {
2481 pr_err("Failed to find symbol %s in %s\n", pp->function, 2640 pr_err("Failed to find symbol %s in %s\n", pp->function,
2482 target ? : "kernel"); 2641 pev->target ? : "kernel");
2483 ret = -ENOENT; 2642 ret = -ENOENT;
2484 goto out; 2643 goto out;
2485 } else if (num_matched_functions > max_tevs) { 2644 } else if (num_matched_functions > probe_conf.max_probes) {
2486 pr_err("Too many functions matched in %s\n", 2645 pr_err("Too many functions matched in %s\n",
2487 target ? : "kernel"); 2646 pev->target ? : "kernel");
2488 ret = -E2BIG; 2647 ret = -E2BIG;
2489 goto out; 2648 goto out;
2490 } 2649 }
@@ -2507,7 +2666,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
2507 2666
2508 ret = 0; 2667 ret = 0;
2509 2668
2510 map__for_each_symbol_by_name(map, pp->function, sym) { 2669 for (j = 0; j < num_matched_functions; j++) {
2670 sym = syms[j];
2671
2511 tev = (*tevs) + ret; 2672 tev = (*tevs) + ret;
2512 tp = &tev->point; 2673 tp = &tev->point;
2513 if (ret == num_matched_functions) { 2674 if (ret == num_matched_functions) {
@@ -2524,16 +2685,24 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
2524 } 2685 }
2525 /* Add one probe point */ 2686 /* Add one probe point */
2526 tp->address = map->unmap_ip(map, sym->start) + pp->offset; 2687 tp->address = map->unmap_ip(map, sym->start) + pp->offset;
2527 if (reloc_sym) { 2688 /* If we found a wrong one, mark it by NULL symbol */
2689 if (!pev->uprobes &&
2690 kprobe_warn_out_range(sym->name, tp->address)) {
2691 tp->symbol = NULL; /* Skip it */
2692 skipped++;
2693 } else if (reloc_sym) {
2528 tp->symbol = strdup_or_goto(reloc_sym->name, nomem_out); 2694 tp->symbol = strdup_or_goto(reloc_sym->name, nomem_out);
2529 tp->offset = tp->address - reloc_sym->addr; 2695 tp->offset = tp->address - reloc_sym->addr;
2530 } else { 2696 } else {
2531 tp->symbol = strdup_or_goto(sym->name, nomem_out); 2697 tp->symbol = strdup_or_goto(sym->name, nomem_out);
2532 tp->offset = pp->offset; 2698 tp->offset = pp->offset;
2533 } 2699 }
2700 tp->realname = strdup_or_goto(sym->name, nomem_out);
2701
2534 tp->retprobe = pp->retprobe; 2702 tp->retprobe = pp->retprobe;
2535 if (target) 2703 if (pev->target)
2536 tev->point.module = strdup_or_goto(target, nomem_out); 2704 tev->point.module = strdup_or_goto(pev->target,
2705 nomem_out);
2537 tev->uprobes = pev->uprobes; 2706 tev->uprobes = pev->uprobes;
2538 tev->nargs = pev->nargs; 2707 tev->nargs = pev->nargs;
2539 if (tev->nargs) { 2708 if (tev->nargs) {
@@ -2555,10 +2724,16 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
2555 strdup_or_goto(pev->args[i].type, 2724 strdup_or_goto(pev->args[i].type,
2556 nomem_out); 2725 nomem_out);
2557 } 2726 }
2727 arch__fix_tev_from_maps(pev, tev, map);
2728 }
2729 if (ret == skipped) {
2730 ret = -ENOENT;
2731 goto err_out;
2558 } 2732 }
2559 2733
2560out: 2734out:
2561 put_target_map(map, pev->uprobes); 2735 put_target_map(map, pev->uprobes);
2736 free(syms);
2562 return ret; 2737 return ret;
2563 2738
2564nomem_out: 2739nomem_out:
@@ -2569,27 +2744,34 @@ err_out:
2569 goto out; 2744 goto out;
2570} 2745}
2571 2746
2747bool __weak arch__prefers_symtab(void) { return false; }
2748
2572static int convert_to_probe_trace_events(struct perf_probe_event *pev, 2749static int convert_to_probe_trace_events(struct perf_probe_event *pev,
2573 struct probe_trace_event **tevs, 2750 struct probe_trace_event **tevs)
2574 int max_tevs, const char *target)
2575{ 2751{
2576 int ret; 2752 int ret;
2577 2753
2578 if (pev->uprobes && !pev->group) { 2754 if (pev->uprobes && !pev->group) {
2579 /* Replace group name if not given */ 2755 /* Replace group name if not given */
2580 ret = convert_exec_to_group(target, &pev->group); 2756 ret = convert_exec_to_group(pev->target, &pev->group);
2581 if (ret != 0) { 2757 if (ret != 0) {
2582 pr_warning("Failed to make a group name.\n"); 2758 pr_warning("Failed to make a group name.\n");
2583 return ret; 2759 return ret;
2584 } 2760 }
2585 } 2761 }
2586 2762
2763 if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
2764 ret = find_probe_trace_events_from_map(pev, tevs);
2765 if (ret > 0)
2766 return ret; /* Found in symbol table */
2767 }
2768
2587 /* Convert perf_probe_event with debuginfo */ 2769 /* Convert perf_probe_event with debuginfo */
2588 ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target); 2770 ret = try_to_find_probe_trace_events(pev, tevs);
2589 if (ret != 0) 2771 if (ret != 0)
2590 return ret; /* Found in debuginfo or got an error */ 2772 return ret; /* Found in debuginfo or got an error */
2591 2773
2592 return find_probe_trace_events_from_map(pev, tevs, max_tevs, target); 2774 return find_probe_trace_events_from_map(pev, tevs);
2593} 2775}
2594 2776
2595struct __event_package { 2777struct __event_package {
@@ -2598,8 +2780,7 @@ struct __event_package {
2598 int ntevs; 2780 int ntevs;
2599}; 2781};
2600 2782
2601int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, 2783int add_perf_probe_events(struct perf_probe_event *pevs, int npevs)
2602 int max_tevs, bool force_add)
2603{ 2784{
2604 int i, j, ret; 2785 int i, j, ret;
2605 struct __event_package *pkgs; 2786 struct __event_package *pkgs;
@@ -2619,20 +2800,24 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
2619 /* Loop 1: convert all events */ 2800 /* Loop 1: convert all events */
2620 for (i = 0; i < npevs; i++) { 2801 for (i = 0; i < npevs; i++) {
2621 pkgs[i].pev = &pevs[i]; 2802 pkgs[i].pev = &pevs[i];
2803 /* Init kprobe blacklist if needed */
2804 if (!pkgs[i].pev->uprobes)
2805 kprobe_blacklist__init();
2622 /* Convert with or without debuginfo */ 2806 /* Convert with or without debuginfo */
2623 ret = convert_to_probe_trace_events(pkgs[i].pev, 2807 ret = convert_to_probe_trace_events(pkgs[i].pev,
2624 &pkgs[i].tevs, 2808 &pkgs[i].tevs);
2625 max_tevs,
2626 pkgs[i].pev->target);
2627 if (ret < 0) 2809 if (ret < 0)
2628 goto end; 2810 goto end;
2629 pkgs[i].ntevs = ret; 2811 pkgs[i].ntevs = ret;
2630 } 2812 }
2813 /* This just release blacklist only if allocated */
2814 kprobe_blacklist__release();
2631 2815
2632 /* Loop 2: add all events */ 2816 /* Loop 2: add all events */
2633 for (i = 0; i < npevs; i++) { 2817 for (i = 0; i < npevs; i++) {
2634 ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs, 2818 ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs,
2635 pkgs[i].ntevs, force_add); 2819 pkgs[i].ntevs,
2820 probe_conf.force_add);
2636 if (ret < 0) 2821 if (ret < 0)
2637 break; 2822 break;
2638 } 2823 }
@@ -2684,40 +2869,39 @@ error:
2684 return ret; 2869 return ret;
2685} 2870}
2686 2871
2687static int del_trace_probe_event(int fd, const char *buf, 2872static int del_trace_probe_events(int fd, struct strfilter *filter,
2688 struct strlist *namelist) 2873 struct strlist *namelist)
2689{ 2874{
2690 struct str_node *ent, *n; 2875 struct str_node *ent;
2691 int ret = -1; 2876 const char *p;
2877 int ret = -ENOENT;
2692 2878
2693 if (strpbrk(buf, "*?")) { /* Glob-exp */ 2879 if (!namelist)
2694 strlist__for_each_safe(ent, n, namelist) 2880 return -ENOENT;
2695 if (strglobmatch(ent->s, buf)) { 2881
2696 ret = __del_trace_probe_event(fd, ent); 2882 strlist__for_each(ent, namelist) {
2697 if (ret < 0) 2883 p = strchr(ent->s, ':');
2698 break; 2884 if ((p && strfilter__compare(filter, p + 1)) ||
2699 strlist__remove(namelist, ent); 2885 strfilter__compare(filter, ent->s)) {
2700 }
2701 } else {
2702 ent = strlist__find(namelist, buf);
2703 if (ent) {
2704 ret = __del_trace_probe_event(fd, ent); 2886 ret = __del_trace_probe_event(fd, ent);
2705 if (ret >= 0) 2887 if (ret < 0)
2706 strlist__remove(namelist, ent); 2888 break;
2707 } 2889 }
2708 } 2890 }
2709 2891
2710 return ret; 2892 return ret;
2711} 2893}
2712 2894
2713int del_perf_probe_events(struct strlist *dellist) 2895int del_perf_probe_events(struct strfilter *filter)
2714{ 2896{
2715 int ret = -1, ufd = -1, kfd = -1; 2897 int ret, ret2, ufd = -1, kfd = -1;
2716 char buf[128];
2717 const char *group, *event;
2718 char *p, *str;
2719 struct str_node *ent;
2720 struct strlist *namelist = NULL, *unamelist = NULL; 2898 struct strlist *namelist = NULL, *unamelist = NULL;
2899 char *str = strfilter__string(filter);
2900
2901 if (!str)
2902 return -EINVAL;
2903
2904 pr_debug("Delete filter: \'%s\'\n", str);
2721 2905
2722 /* Get current event names */ 2906 /* Get current event names */
2723 kfd = open_kprobe_events(true); 2907 kfd = open_kprobe_events(true);
@@ -2730,49 +2914,23 @@ int del_perf_probe_events(struct strlist *dellist)
2730 2914
2731 if (kfd < 0 && ufd < 0) { 2915 if (kfd < 0 && ufd < 0) {
2732 print_both_open_warning(kfd, ufd); 2916 print_both_open_warning(kfd, ufd);
2917 ret = kfd;
2733 goto error; 2918 goto error;
2734 } 2919 }
2735 2920
2736 if (namelist == NULL && unamelist == NULL) 2921 ret = del_trace_probe_events(kfd, filter, namelist);
2922 if (ret < 0 && ret != -ENOENT)
2737 goto error; 2923 goto error;
2738 2924
2739 strlist__for_each(ent, dellist) { 2925 ret2 = del_trace_probe_events(ufd, filter, unamelist);
2740 str = strdup(ent->s); 2926 if (ret2 < 0 && ret2 != -ENOENT) {
2741 if (str == NULL) { 2927 ret = ret2;
2742 ret = -ENOMEM; 2928 goto error;
2743 goto error;
2744 }
2745 pr_debug("Parsing: %s\n", str);
2746 p = strchr(str, ':');
2747 if (p) {
2748 group = str;
2749 *p = '\0';
2750 event = p + 1;
2751 } else {
2752 group = "*";
2753 event = str;
2754 }
2755
2756 ret = e_snprintf(buf, 128, "%s:%s", group, event);
2757 if (ret < 0) {
2758 pr_err("Failed to copy event.");
2759 free(str);
2760 goto error;
2761 }
2762
2763 pr_debug("Group: %s, Event: %s\n", group, event);
2764
2765 if (namelist)
2766 ret = del_trace_probe_event(kfd, buf, namelist);
2767
2768 if (unamelist && ret != 0)
2769 ret = del_trace_probe_event(ufd, buf, unamelist);
2770
2771 if (ret != 0)
2772 pr_info("Info: Event \"%s\" does not exist.\n", buf);
2773
2774 free(str);
2775 } 2929 }
2930 if (ret == -ENOENT && ret2 == -ENOENT)
2931 pr_debug("\"%s\" does not hit any event.\n", str);
2932 /* Note that this is silently ignored */
2933 ret = 0;
2776 2934
2777error: 2935error:
2778 if (kfd >= 0) { 2936 if (kfd >= 0) {
@@ -2784,6 +2942,7 @@ error:
2784 strlist__delete(unamelist); 2942 strlist__delete(unamelist);
2785 close(ufd); 2943 close(ufd);
2786 } 2944 }
2945 free(str);
2787 2946
2788 return ret; 2947 return ret;
2789} 2948}
@@ -2837,8 +2996,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
2837 dso__fprintf_symbols_by_name(map->dso, map->type, stdout); 2996 dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
2838end: 2997end:
2839 if (user) { 2998 if (user) {
2840 dso__delete(map->dso); 2999 map__put(map);
2841 map__delete(map);
2842 } 3000 }
2843 exit_symbol_maps(); 3001 exit_symbol_maps();
2844 3002
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index d6b783447be9..31db6ee7db54 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -6,10 +6,20 @@
6#include "strlist.h" 6#include "strlist.h"
7#include "strfilter.h" 7#include "strfilter.h"
8 8
9/* Probe related configurations */
10struct probe_conf {
11 bool show_ext_vars;
12 bool show_location_range;
13 bool force_add;
14 bool no_inlines;
15 int max_probes;
16};
17extern struct probe_conf probe_conf;
9extern bool probe_event_dry_run; 18extern bool probe_event_dry_run;
10 19
11/* kprobe-tracer and uprobe-tracer tracing point */ 20/* kprobe-tracer and uprobe-tracer tracing point */
12struct probe_trace_point { 21struct probe_trace_point {
22 char *realname; /* function real name (if needed) */
13 char *symbol; /* Base symbol */ 23 char *symbol; /* Base symbol */
14 char *module; /* Module name */ 24 char *module; /* Module name */
15 unsigned long offset; /* Offset from symbol */ 25 unsigned long offset; /* Offset from symbol */
@@ -121,20 +131,18 @@ extern void line_range__clear(struct line_range *lr);
121/* Initialize line range */ 131/* Initialize line range */
122extern int line_range__init(struct line_range *lr); 132extern int line_range__init(struct line_range *lr);
123 133
124/* Internal use: Return kernel/module path */ 134extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
125extern const char *kernel_get_module_path(const char *module); 135extern int del_perf_probe_events(struct strfilter *filter);
126 136extern int show_perf_probe_events(struct strfilter *filter);
127extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
128 int max_probe_points, bool force_add);
129extern int del_perf_probe_events(struct strlist *dellist);
130extern int show_perf_probe_events(void);
131extern int show_line_range(struct line_range *lr, const char *module, 137extern int show_line_range(struct line_range *lr, const char *module,
132 bool user); 138 bool user);
133extern int show_available_vars(struct perf_probe_event *pevs, int npevs, 139extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
134 int max_probe_points, const char *module, 140 struct strfilter *filter);
135 struct strfilter *filter, bool externs);
136extern int show_available_funcs(const char *module, struct strfilter *filter, 141extern int show_available_funcs(const char *module, struct strfilter *filter,
137 bool user); 142 bool user);
143bool arch__prefers_symtab(void);
144void arch__fix_tev_from_maps(struct perf_probe_event *pev,
145 struct probe_trace_event *tev, struct map *map);
138 146
139/* Maximum index number of event-name postfix */ 147/* Maximum index number of event-name postfix */
140#define MAX_EVENT_INDEX 1024 148#define MAX_EVENT_INDEX 1024
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 2a76e14db732..2da65a710893 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -130,7 +130,7 @@ struct debuginfo *debuginfo__new(const char *path)
130 continue; 130 continue;
131 dinfo = __debuginfo__new(buf); 131 dinfo = __debuginfo__new(buf);
132 } 132 }
133 dso__delete(dso); 133 dso__put(dso);
134 134
135out: 135out:
136 /* if failed to open all distro debuginfo, open given binary */ 136 /* if failed to open all distro debuginfo, open given binary */
@@ -177,7 +177,7 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
177 Dwarf_Word offs = 0; 177 Dwarf_Word offs = 0;
178 bool ref = false; 178 bool ref = false;
179 const char *regs; 179 const char *regs;
180 int ret; 180 int ret, ret2 = 0;
181 181
182 if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL) 182 if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL)
183 goto static_var; 183 goto static_var;
@@ -187,9 +187,19 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
187 return -EINVAL; /* Broken DIE ? */ 187 return -EINVAL; /* Broken DIE ? */
188 if (dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0) { 188 if (dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0) {
189 ret = dwarf_entrypc(sp_die, &tmp); 189 ret = dwarf_entrypc(sp_die, &tmp);
190 if (ret || addr != tmp || 190 if (ret)
191 dwarf_tag(vr_die) != DW_TAG_formal_parameter || 191 return -ENOENT;
192 dwarf_highpc(sp_die, &tmp)) 192
193 if (probe_conf.show_location_range &&
194 (dwarf_tag(vr_die) == DW_TAG_variable)) {
195 ret2 = -ERANGE;
196 } else if (addr != tmp ||
197 dwarf_tag(vr_die) != DW_TAG_formal_parameter) {
198 return -ENOENT;
199 }
200
201 ret = dwarf_highpc(sp_die, &tmp);
202 if (ret)
193 return -ENOENT; 203 return -ENOENT;
194 /* 204 /*
195 * This is fuzzed by fentry mcount. We try to find the 205 * This is fuzzed by fentry mcount. We try to find the
@@ -210,7 +220,7 @@ found:
210 if (op->atom == DW_OP_addr) { 220 if (op->atom == DW_OP_addr) {
211static_var: 221static_var:
212 if (!tvar) 222 if (!tvar)
213 return 0; 223 return ret2;
214 /* Static variables on memory (not stack), make @varname */ 224 /* Static variables on memory (not stack), make @varname */
215 ret = strlen(dwarf_diename(vr_die)); 225 ret = strlen(dwarf_diename(vr_die));
216 tvar->value = zalloc(ret + 2); 226 tvar->value = zalloc(ret + 2);
@@ -220,7 +230,7 @@ static_var:
220 tvar->ref = alloc_trace_arg_ref((long)offs); 230 tvar->ref = alloc_trace_arg_ref((long)offs);
221 if (tvar->ref == NULL) 231 if (tvar->ref == NULL)
222 return -ENOMEM; 232 return -ENOMEM;
223 return 0; 233 return ret2;
224 } 234 }
225 235
226 /* If this is based on frame buffer, set the offset */ 236 /* If this is based on frame buffer, set the offset */
@@ -250,14 +260,14 @@ static_var:
250 } 260 }
251 261
252 if (!tvar) 262 if (!tvar)
253 return 0; 263 return ret2;
254 264
255 regs = get_arch_regstr(regn); 265 regs = get_arch_regstr(regn);
256 if (!regs) { 266 if (!regs) {
257 /* This should be a bug in DWARF or this tool */ 267 /* This should be a bug in DWARF or this tool */
258 pr_warning("Mapping for the register number %u " 268 pr_warning("Mapping for the register number %u "
259 "missing on this architecture.\n", regn); 269 "missing on this architecture.\n", regn);
260 return -ERANGE; 270 return -ENOTSUP;
261 } 271 }
262 272
263 tvar->value = strdup(regs); 273 tvar->value = strdup(regs);
@@ -269,7 +279,7 @@ static_var:
269 if (tvar->ref == NULL) 279 if (tvar->ref == NULL)
270 return -ENOMEM; 280 return -ENOMEM;
271 } 281 }
272 return 0; 282 return ret2;
273} 283}
274 284
275#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_LONG / sizeof(long)) 285#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_LONG / sizeof(long))
@@ -517,10 +527,12 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
517 527
518 ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops, 528 ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
519 &pf->sp_die, pf->tvar); 529 &pf->sp_die, pf->tvar);
520 if (ret == -ENOENT || ret == -EINVAL) 530 if (ret == -ENOENT || ret == -EINVAL) {
521 pr_err("Failed to find the location of %s at this address.\n" 531 pr_err("Failed to find the location of the '%s' variable at this address.\n"
522 " Perhaps, it has been optimized out.\n", pf->pvar->var); 532 " Perhaps it has been optimized out.\n"
523 else if (ret == -ENOTSUP) 533 " Use -V with the --range option to show '%s' location range.\n",
534 pf->pvar->var, pf->pvar->var);
535 } else if (ret == -ENOTSUP)
524 pr_err("Sorry, we don't support this variable location yet.\n"); 536 pr_err("Sorry, we don't support this variable location yet.\n");
525 else if (ret == 0 && pf->pvar->field) { 537 else if (ret == 0 && pf->pvar->field) {
526 ret = convert_variable_fields(vr_die, pf->pvar->var, 538 ret = convert_variable_fields(vr_die, pf->pvar->var,
@@ -662,9 +674,15 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
662 /* If not a real subprogram, find a real one */ 674 /* If not a real subprogram, find a real one */
663 if (!die_is_func_def(sc_die)) { 675 if (!die_is_func_def(sc_die)) {
664 if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) { 676 if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
665 pr_warning("Failed to find probe point in any " 677 if (die_find_tailfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
666 "functions.\n"); 678 pr_warning("Ignoring tail call from %s\n",
667 return -ENOENT; 679 dwarf_diename(&pf->sp_die));
680 return 0;
681 } else {
682 pr_warning("Failed to find probe point in any "
683 "functions.\n");
684 return -ENOENT;
685 }
668 } 686 }
669 } else 687 } else
670 memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die)); 688 memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die));
@@ -719,7 +737,7 @@ static int find_best_scope_cb(Dwarf_Die *fn_die, void *data)
719 } 737 }
720 /* If the function name is given, that's what user expects */ 738 /* If the function name is given, that's what user expects */
721 if (fsp->function) { 739 if (fsp->function) {
722 if (die_compare_name(fn_die, fsp->function)) { 740 if (die_match_name(fn_die, fsp->function)) {
723 memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die)); 741 memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
724 fsp->found = true; 742 fsp->found = true;
725 return 1; 743 return 1;
@@ -922,13 +940,14 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
922 940
923 /* Check tag and diename */ 941 /* Check tag and diename */
924 if (!die_is_func_def(sp_die) || 942 if (!die_is_func_def(sp_die) ||
925 !die_compare_name(sp_die, pp->function)) 943 !die_match_name(sp_die, pp->function))
926 return DWARF_CB_OK; 944 return DWARF_CB_OK;
927 945
928 /* Check declared file */ 946 /* Check declared file */
929 if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die))) 947 if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die)))
930 return DWARF_CB_OK; 948 return DWARF_CB_OK;
931 949
950 pr_debug("Matched function: %s\n", dwarf_diename(sp_die));
932 pf->fname = dwarf_decl_file(sp_die); 951 pf->fname = dwarf_decl_file(sp_die);
933 if (pp->line) { /* Function relative line */ 952 if (pp->line) { /* Function relative line */
934 dwarf_decl_line(sp_die, &pf->lno); 953 dwarf_decl_line(sp_die, &pf->lno);
@@ -945,10 +964,20 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
945 /* TODO: Check the address in this function */ 964 /* TODO: Check the address in this function */
946 param->retval = call_probe_finder(sp_die, pf); 965 param->retval = call_probe_finder(sp_die, pf);
947 } 966 }
948 } else 967 } else if (!probe_conf.no_inlines) {
949 /* Inlined function: search instances */ 968 /* Inlined function: search instances */
950 param->retval = die_walk_instances(sp_die, 969 param->retval = die_walk_instances(sp_die,
951 probe_point_inline_cb, (void *)pf); 970 probe_point_inline_cb, (void *)pf);
971 /* This could be a non-existed inline definition */
972 if (param->retval == -ENOENT && strisglob(pp->function))
973 param->retval = 0;
974 }
975
976 /* We need to find other candidates */
977 if (strisglob(pp->function) && param->retval >= 0) {
978 param->retval = 0; /* We have to clear the result */
979 return DWARF_CB_OK;
980 }
952 981
953 return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */ 982 return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */
954} 983}
@@ -977,7 +1006,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
977 if (dwarf_tag(param->sp_die) != DW_TAG_subprogram) 1006 if (dwarf_tag(param->sp_die) != DW_TAG_subprogram)
978 return DWARF_CB_OK; 1007 return DWARF_CB_OK;
979 1008
980 if (die_compare_name(param->sp_die, param->function)) { 1009 if (die_match_name(param->sp_die, param->function)) {
981 if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die)) 1010 if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die))
982 return DWARF_CB_OK; 1011 return DWARF_CB_OK;
983 1012
@@ -1030,7 +1059,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
1030 return -ENOMEM; 1059 return -ENOMEM;
1031 1060
1032 /* Fastpath: lookup by function name from .debug_pubnames section */ 1061 /* Fastpath: lookup by function name from .debug_pubnames section */
1033 if (pp->function) { 1062 if (pp->function && !strisglob(pp->function)) {
1034 struct pubname_callback_param pubname_param = { 1063 struct pubname_callback_param pubname_param = {
1035 .function = pp->function, 1064 .function = pp->function,
1036 .file = pp->file, 1065 .file = pp->file,
@@ -1089,6 +1118,7 @@ found:
1089struct local_vars_finder { 1118struct local_vars_finder {
1090 struct probe_finder *pf; 1119 struct probe_finder *pf;
1091 struct perf_probe_arg *args; 1120 struct perf_probe_arg *args;
1121 bool vars;
1092 int max_args; 1122 int max_args;
1093 int nargs; 1123 int nargs;
1094 int ret; 1124 int ret;
@@ -1103,7 +1133,7 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
1103 1133
1104 tag = dwarf_tag(die_mem); 1134 tag = dwarf_tag(die_mem);
1105 if (tag == DW_TAG_formal_parameter || 1135 if (tag == DW_TAG_formal_parameter ||
1106 tag == DW_TAG_variable) { 1136 (tag == DW_TAG_variable && vf->vars)) {
1107 if (convert_variable_location(die_mem, vf->pf->addr, 1137 if (convert_variable_location(die_mem, vf->pf->addr,
1108 vf->pf->fb_ops, &pf->sp_die, 1138 vf->pf->fb_ops, &pf->sp_die,
1109 NULL) == 0) { 1139 NULL) == 0) {
@@ -1129,26 +1159,28 @@ static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
1129 Dwarf_Die die_mem; 1159 Dwarf_Die die_mem;
1130 int i; 1160 int i;
1131 int n = 0; 1161 int n = 0;
1132 struct local_vars_finder vf = {.pf = pf, .args = args, 1162 struct local_vars_finder vf = {.pf = pf, .args = args, .vars = false,
1133 .max_args = MAX_PROBE_ARGS, .ret = 0}; 1163 .max_args = MAX_PROBE_ARGS, .ret = 0};
1134 1164
1135 for (i = 0; i < pf->pev->nargs; i++) { 1165 for (i = 0; i < pf->pev->nargs; i++) {
1136 /* var never be NULL */ 1166 /* var never be NULL */
1137 if (strcmp(pf->pev->args[i].var, "$vars") == 0) { 1167 if (strcmp(pf->pev->args[i].var, PROBE_ARG_VARS) == 0)
1138 pr_debug("Expanding $vars into:"); 1168 vf.vars = true;
1139 vf.nargs = n; 1169 else if (strcmp(pf->pev->args[i].var, PROBE_ARG_PARAMS) != 0) {
1140 /* Special local variables */
1141 die_find_child(sc_die, copy_variables_cb, (void *)&vf,
1142 &die_mem);
1143 pr_debug(" (%d)\n", vf.nargs - n);
1144 if (vf.ret < 0)
1145 return vf.ret;
1146 n = vf.nargs;
1147 } else {
1148 /* Copy normal argument */ 1170 /* Copy normal argument */
1149 args[n] = pf->pev->args[i]; 1171 args[n] = pf->pev->args[i];
1150 n++; 1172 n++;
1173 continue;
1151 } 1174 }
1175 pr_debug("Expanding %s into:", pf->pev->args[i].var);
1176 vf.nargs = n;
1177 /* Special local variables */
1178 die_find_child(sc_die, copy_variables_cb, (void *)&vf,
1179 &die_mem);
1180 pr_debug(" (%d)\n", vf.nargs - n);
1181 if (vf.ret < 0)
1182 return vf.ret;
1183 n = vf.nargs;
1152 } 1184 }
1153 return n; 1185 return n;
1154} 1186}
@@ -1176,6 +1208,10 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
1176 if (ret < 0) 1208 if (ret < 0)
1177 return ret; 1209 return ret;
1178 1210
1211 tev->point.realname = strdup(dwarf_diename(sc_die));
1212 if (!tev->point.realname)
1213 return -ENOMEM;
1214
1179 pr_debug("Probe point found: %s+%lu\n", tev->point.symbol, 1215 pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
1180 tev->point.offset); 1216 tev->point.offset);
1181 1217
@@ -1213,15 +1249,15 @@ end:
1213/* Find probe_trace_events specified by perf_probe_event from debuginfo */ 1249/* Find probe_trace_events specified by perf_probe_event from debuginfo */
1214int debuginfo__find_trace_events(struct debuginfo *dbg, 1250int debuginfo__find_trace_events(struct debuginfo *dbg,
1215 struct perf_probe_event *pev, 1251 struct perf_probe_event *pev,
1216 struct probe_trace_event **tevs, int max_tevs) 1252 struct probe_trace_event **tevs)
1217{ 1253{
1218 struct trace_event_finder tf = { 1254 struct trace_event_finder tf = {
1219 .pf = {.pev = pev, .callback = add_probe_trace_event}, 1255 .pf = {.pev = pev, .callback = add_probe_trace_event},
1220 .mod = dbg->mod, .max_tevs = max_tevs}; 1256 .max_tevs = probe_conf.max_probes, .mod = dbg->mod};
1221 int ret; 1257 int ret;
1222 1258
1223 /* Allocate result tevs array */ 1259 /* Allocate result tevs array */
1224 *tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs); 1260 *tevs = zalloc(sizeof(struct probe_trace_event) * tf.max_tevs);
1225 if (*tevs == NULL) 1261 if (*tevs == NULL)
1226 return -ENOMEM; 1262 return -ENOMEM;
1227 1263
@@ -1237,14 +1273,11 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
1237 return (ret < 0) ? ret : tf.ntevs; 1273 return (ret < 0) ? ret : tf.ntevs;
1238} 1274}
1239 1275
1240#define MAX_VAR_LEN 64
1241
1242/* Collect available variables in this scope */ 1276/* Collect available variables in this scope */
1243static int collect_variables_cb(Dwarf_Die *die_mem, void *data) 1277static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
1244{ 1278{
1245 struct available_var_finder *af = data; 1279 struct available_var_finder *af = data;
1246 struct variable_list *vl; 1280 struct variable_list *vl;
1247 char buf[MAX_VAR_LEN];
1248 int tag, ret; 1281 int tag, ret;
1249 1282
1250 vl = &af->vls[af->nvls - 1]; 1283 vl = &af->vls[af->nvls - 1];
@@ -1255,11 +1288,38 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
1255 ret = convert_variable_location(die_mem, af->pf.addr, 1288 ret = convert_variable_location(die_mem, af->pf.addr,
1256 af->pf.fb_ops, &af->pf.sp_die, 1289 af->pf.fb_ops, &af->pf.sp_die,
1257 NULL); 1290 NULL);
1258 if (ret == 0) { 1291 if (ret == 0 || ret == -ERANGE) {
1259 ret = die_get_varname(die_mem, buf, MAX_VAR_LEN); 1292 int ret2;
1260 pr_debug2("Add new var: %s\n", buf); 1293 bool externs = !af->child;
1261 if (ret > 0) 1294 struct strbuf buf;
1262 strlist__add(vl->vars, buf); 1295
1296 strbuf_init(&buf, 64);
1297
1298 if (probe_conf.show_location_range) {
1299 if (!externs) {
1300 if (ret)
1301 strbuf_addf(&buf, "[INV]\t");
1302 else
1303 strbuf_addf(&buf, "[VAL]\t");
1304 } else
1305 strbuf_addf(&buf, "[EXT]\t");
1306 }
1307
1308 ret2 = die_get_varname(die_mem, &buf);
1309
1310 if (!ret2 && probe_conf.show_location_range &&
1311 !externs) {
1312 strbuf_addf(&buf, "\t");
1313 ret2 = die_get_var_range(&af->pf.sp_die,
1314 die_mem, &buf);
1315 }
1316
1317 pr_debug("Add new var: %s\n", buf.buf);
1318 if (ret2 == 0) {
1319 strlist__add(vl->vars,
1320 strbuf_detach(&buf, NULL));
1321 }
1322 strbuf_release(&buf);
1263 } 1323 }
1264 } 1324 }
1265 1325
@@ -1302,9 +1362,9 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf)
1302 die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem); 1362 die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem);
1303 1363
1304 /* Find external variables */ 1364 /* Find external variables */
1305 if (!af->externs) 1365 if (!probe_conf.show_ext_vars)
1306 goto out; 1366 goto out;
1307 /* Don't need to search child DIE for externs. */ 1367 /* Don't need to search child DIE for external vars. */
1308 af->child = false; 1368 af->child = false;
1309 die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem); 1369 die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem);
1310 1370
@@ -1324,17 +1384,16 @@ out:
1324 */ 1384 */
1325int debuginfo__find_available_vars_at(struct debuginfo *dbg, 1385int debuginfo__find_available_vars_at(struct debuginfo *dbg,
1326 struct perf_probe_event *pev, 1386 struct perf_probe_event *pev,
1327 struct variable_list **vls, 1387 struct variable_list **vls)
1328 int max_vls, bool externs)
1329{ 1388{
1330 struct available_var_finder af = { 1389 struct available_var_finder af = {
1331 .pf = {.pev = pev, .callback = add_available_vars}, 1390 .pf = {.pev = pev, .callback = add_available_vars},
1332 .mod = dbg->mod, 1391 .mod = dbg->mod,
1333 .max_vls = max_vls, .externs = externs}; 1392 .max_vls = probe_conf.max_probes};
1334 int ret; 1393 int ret;
1335 1394
1336 /* Allocate result vls array */ 1395 /* Allocate result vls array */
1337 *vls = zalloc(sizeof(struct variable_list) * max_vls); 1396 *vls = zalloc(sizeof(struct variable_list) * af.max_vls);
1338 if (*vls == NULL) 1397 if (*vls == NULL)
1339 return -ENOMEM; 1398 return -ENOMEM;
1340 1399
@@ -1535,7 +1594,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
1535 return DWARF_CB_OK; 1594 return DWARF_CB_OK;
1536 1595
1537 if (die_is_func_def(sp_die) && 1596 if (die_is_func_def(sp_die) &&
1538 die_compare_name(sp_die, lr->function)) { 1597 die_match_name(sp_die, lr->function)) {
1539 lf->fname = dwarf_decl_file(sp_die); 1598 lf->fname = dwarf_decl_file(sp_die);
1540 dwarf_decl_line(sp_die, &lr->offset); 1599 dwarf_decl_line(sp_die, &lr->offset);
1541 pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset); 1600 pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index ebf8c8c81453..bed82716e1b4 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -10,6 +10,9 @@
10#define MAX_PROBES 128 10#define MAX_PROBES 128
11#define MAX_PROBE_ARGS 128 11#define MAX_PROBE_ARGS 128
12 12
13#define PROBE_ARG_VARS "$vars"
14#define PROBE_ARG_PARAMS "$params"
15
13static inline int is_c_varname(const char *name) 16static inline int is_c_varname(const char *name)
14{ 17{
15 /* TODO */ 18 /* TODO */
@@ -37,8 +40,7 @@ extern void debuginfo__delete(struct debuginfo *dbg);
37/* Find probe_trace_events specified by perf_probe_event from debuginfo */ 40/* Find probe_trace_events specified by perf_probe_event from debuginfo */
38extern int debuginfo__find_trace_events(struct debuginfo *dbg, 41extern int debuginfo__find_trace_events(struct debuginfo *dbg,
39 struct perf_probe_event *pev, 42 struct perf_probe_event *pev,
40 struct probe_trace_event **tevs, 43 struct probe_trace_event **tevs);
41 int max_tevs);
42 44
43/* Find a perf_probe_point from debuginfo */ 45/* Find a perf_probe_point from debuginfo */
44extern int debuginfo__find_probe_point(struct debuginfo *dbg, 46extern int debuginfo__find_probe_point(struct debuginfo *dbg,
@@ -52,8 +54,7 @@ extern int debuginfo__find_line_range(struct debuginfo *dbg,
52/* Find available variables */ 54/* Find available variables */
53extern int debuginfo__find_available_vars_at(struct debuginfo *dbg, 55extern int debuginfo__find_available_vars_at(struct debuginfo *dbg,
54 struct perf_probe_event *pev, 56 struct perf_probe_event *pev,
55 struct variable_list **vls, 57 struct variable_list **vls);
56 int max_points, bool externs);
57 58
58/* Find a src file from a DWARF tag path */ 59/* Find a src file from a DWARF tag path */
59int get_real_path(const char *raw_path, const char *comp_dir, 60int get_real_path(const char *raw_path, const char *comp_dir,
@@ -96,7 +97,6 @@ struct available_var_finder {
96 struct variable_list *vls; /* Found variable lists */ 97 struct variable_list *vls; /* Found variable lists */
97 int nvls; /* Number of variable lists */ 98 int nvls; /* Number of variable lists */
98 int max_vls; /* Max no. of variable lists */ 99 int max_vls; /* Max no. of variable lists */
99 bool externs; /* Find external vars too */
100 bool child; /* Search child scopes */ 100 bool child; /* Search child scopes */
101}; 101};
102 102
diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
index a126e6cc6e73..b234a6e3d0d4 100644
--- a/tools/perf/util/pstack.c
+++ b/tools/perf/util/pstack.c
@@ -74,3 +74,10 @@ void *pstack__pop(struct pstack *pstack)
74 pstack->entries[pstack->top] = NULL; 74 pstack->entries[pstack->top] = NULL;
75 return ret; 75 return ret;
76} 76}
77
78void *pstack__peek(struct pstack *pstack)
79{
80 if (pstack->top == 0)
81 return NULL;
82 return pstack->entries[pstack->top - 1];
83}
diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h
index c3cb6584d527..ded7f2e36624 100644
--- a/tools/perf/util/pstack.h
+++ b/tools/perf/util/pstack.h
@@ -10,5 +10,6 @@ bool pstack__empty(const struct pstack *pstack);
10void pstack__remove(struct pstack *pstack, void *key); 10void pstack__remove(struct pstack *pstack, void *key);
11void pstack__push(struct pstack *pstack, void *key); 11void pstack__push(struct pstack *pstack, void *key);
12void *pstack__pop(struct pstack *pstack); 12void *pstack__pop(struct pstack *pstack);
13void *pstack__peek(struct pstack *pstack);
13 14
14#endif /* _PERF_PSTACK_ */ 15#endif /* _PERF_PSTACK_ */
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 4d28624a1eca..5925fec90562 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -16,6 +16,7 @@ util/util.c
16util/xyarray.c 16util/xyarray.c
17util/cgroup.c 17util/cgroup.c
18util/rblist.c 18util/rblist.c
19util/stat.c
19util/strlist.c 20util/strlist.c
20util/trace-event.c 21util/trace-event.c
21../../lib/rbtree.c 22../../lib/rbtree.c
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 8acd0df88b5c..d457c523a33d 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -20,7 +20,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
20 if (!evlist) 20 if (!evlist)
21 return -ENOMEM; 21 return -ENOMEM;
22 22
23 if (parse_events(evlist, str)) 23 if (parse_events(evlist, str, NULL))
24 goto out_delete; 24 goto out_delete;
25 25
26 evsel = perf_evlist__first(evlist); 26 evsel = perf_evlist__first(evlist);
@@ -119,7 +119,16 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
119 evsel->attr.comm_exec = 1; 119 evsel->attr.comm_exec = 1;
120 } 120 }
121 121
122 if (evlist->nr_entries > 1) { 122 if (opts->full_auxtrace) {
123 /*
124 * Need to be able to synthesize and parse selected events with
125 * arbitrary sample types, which requires always being able to
126 * match the id.
127 */
128 use_sample_identifier = perf_can_sample_identifier();
129 evlist__for_each(evlist, evsel)
130 perf_evsel__set_sample_id(evsel, use_sample_identifier);
131 } else if (evlist->nr_entries > 1) {
123 struct perf_evsel *first = perf_evlist__first(evlist); 132 struct perf_evsel *first = perf_evlist__first(evlist);
124 133
125 evlist__for_each(evlist, evsel) { 134 evlist__for_each(evlist, evsel) {
@@ -207,7 +216,7 @@ bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str)
207 if (!temp_evlist) 216 if (!temp_evlist)
208 return false; 217 return false;
209 218
210 err = parse_events(temp_evlist, str); 219 err = parse_events(temp_evlist, str, NULL);
211 if (err) 220 if (err)
212 goto out_delete; 221 goto out_delete;
213 222
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 0c74012575ac..aa482c10469d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -15,12 +15,14 @@
15#include "cpumap.h" 15#include "cpumap.h"
16#include "perf_regs.h" 16#include "perf_regs.h"
17#include "asm/bug.h" 17#include "asm/bug.h"
18#include "auxtrace.h"
19#include "thread-stack.h"
18 20
19static int machines__deliver_event(struct machines *machines, 21static int perf_session__deliver_event(struct perf_session *session,
20 struct perf_evlist *evlist, 22 union perf_event *event,
21 union perf_event *event, 23 struct perf_sample *sample,
22 struct perf_sample *sample, 24 struct perf_tool *tool,
23 struct perf_tool *tool, u64 file_offset); 25 u64 file_offset);
24 26
25static int perf_session__open(struct perf_session *session) 27static int perf_session__open(struct perf_session *session)
26{ 28{
@@ -105,8 +107,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
105 return ret; 107 return ret;
106 } 108 }
107 109
108 return machines__deliver_event(&session->machines, session->evlist, event->event, 110 return perf_session__deliver_event(session, event->event, &sample,
109 &sample, session->tool, event->file_offset); 111 session->tool, event->file_offset);
110} 112}
111 113
112struct perf_session *perf_session__new(struct perf_data_file *file, 114struct perf_session *perf_session__new(struct perf_data_file *file,
@@ -119,6 +121,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file,
119 121
120 session->repipe = repipe; 122 session->repipe = repipe;
121 session->tool = tool; 123 session->tool = tool;
124 INIT_LIST_HEAD(&session->auxtrace_index);
122 machines__init(&session->machines); 125 machines__init(&session->machines);
123 ordered_events__init(&session->ordered_events, ordered_events__deliver_event); 126 ordered_events__init(&session->ordered_events, ordered_events__deliver_event);
124 127
@@ -185,6 +188,8 @@ static void perf_session_env__delete(struct perf_session_env *env)
185 188
186void perf_session__delete(struct perf_session *session) 189void perf_session__delete(struct perf_session *session)
187{ 190{
191 auxtrace__free(session);
192 auxtrace_index__free(&session->auxtrace_index);
188 perf_session__destroy_kernel_maps(session); 193 perf_session__destroy_kernel_maps(session);
189 perf_session__delete_threads(session); 194 perf_session__delete_threads(session);
190 perf_session_env__delete(&session->header.env); 195 perf_session_env__delete(&session->header.env);
@@ -262,6 +267,49 @@ static int process_id_index_stub(struct perf_tool *tool __maybe_unused,
262 return 0; 267 return 0;
263} 268}
264 269
270static int process_event_auxtrace_info_stub(struct perf_tool *tool __maybe_unused,
271 union perf_event *event __maybe_unused,
272 struct perf_session *session __maybe_unused)
273{
274 dump_printf(": unhandled!\n");
275 return 0;
276}
277
278static int skipn(int fd, off_t n)
279{
280 char buf[4096];
281 ssize_t ret;
282
283 while (n > 0) {
284 ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
285 if (ret <= 0)
286 return ret;
287 n -= ret;
288 }
289
290 return 0;
291}
292
293static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
294 union perf_event *event,
295 struct perf_session *session
296 __maybe_unused)
297{
298 dump_printf(": unhandled!\n");
299 if (perf_data_file__is_pipe(session->file))
300 skipn(perf_data_file__fd(session->file), event->auxtrace.size);
301 return event->auxtrace.size;
302}
303
304static
305int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused,
306 union perf_event *event __maybe_unused,
307 struct perf_session *session __maybe_unused)
308{
309 dump_printf(": unhandled!\n");
310 return 0;
311}
312
265void perf_tool__fill_defaults(struct perf_tool *tool) 313void perf_tool__fill_defaults(struct perf_tool *tool)
266{ 314{
267 if (tool->sample == NULL) 315 if (tool->sample == NULL)
@@ -278,6 +326,12 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
278 tool->exit = process_event_stub; 326 tool->exit = process_event_stub;
279 if (tool->lost == NULL) 327 if (tool->lost == NULL)
280 tool->lost = perf_event__process_lost; 328 tool->lost = perf_event__process_lost;
329 if (tool->lost_samples == NULL)
330 tool->lost_samples = perf_event__process_lost_samples;
331 if (tool->aux == NULL)
332 tool->aux = perf_event__process_aux;
333 if (tool->itrace_start == NULL)
334 tool->itrace_start = perf_event__process_itrace_start;
281 if (tool->read == NULL) 335 if (tool->read == NULL)
282 tool->read = process_event_sample_stub; 336 tool->read = process_event_sample_stub;
283 if (tool->throttle == NULL) 337 if (tool->throttle == NULL)
@@ -298,6 +352,12 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
298 } 352 }
299 if (tool->id_index == NULL) 353 if (tool->id_index == NULL)
300 tool->id_index = process_id_index_stub; 354 tool->id_index = process_id_index_stub;
355 if (tool->auxtrace_info == NULL)
356 tool->auxtrace_info = process_event_auxtrace_info_stub;
357 if (tool->auxtrace == NULL)
358 tool->auxtrace = process_event_auxtrace_stub;
359 if (tool->auxtrace_error == NULL)
360 tool->auxtrace_error = process_event_auxtrace_error_stub;
301} 361}
302 362
303static void swap_sample_id_all(union perf_event *event, void *data) 363static void swap_sample_id_all(union perf_event *event, void *data)
@@ -390,6 +450,26 @@ static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
390 swap_sample_id_all(event, &event->read + 1); 450 swap_sample_id_all(event, &event->read + 1);
391} 451}
392 452
453static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
454{
455 event->aux.aux_offset = bswap_64(event->aux.aux_offset);
456 event->aux.aux_size = bswap_64(event->aux.aux_size);
457 event->aux.flags = bswap_64(event->aux.flags);
458
459 if (sample_id_all)
460 swap_sample_id_all(event, &event->aux + 1);
461}
462
463static void perf_event__itrace_start_swap(union perf_event *event,
464 bool sample_id_all)
465{
466 event->itrace_start.pid = bswap_32(event->itrace_start.pid);
467 event->itrace_start.tid = bswap_32(event->itrace_start.tid);
468
469 if (sample_id_all)
470 swap_sample_id_all(event, &event->itrace_start + 1);
471}
472
393static void perf_event__throttle_swap(union perf_event *event, 473static void perf_event__throttle_swap(union perf_event *event,
394 bool sample_id_all) 474 bool sample_id_all)
395{ 475{
@@ -438,19 +518,42 @@ void perf_event__attr_swap(struct perf_event_attr *attr)
438{ 518{
439 attr->type = bswap_32(attr->type); 519 attr->type = bswap_32(attr->type);
440 attr->size = bswap_32(attr->size); 520 attr->size = bswap_32(attr->size);
441 attr->config = bswap_64(attr->config);
442 attr->sample_period = bswap_64(attr->sample_period);
443 attr->sample_type = bswap_64(attr->sample_type);
444 attr->read_format = bswap_64(attr->read_format);
445 attr->wakeup_events = bswap_32(attr->wakeup_events);
446 attr->bp_type = bswap_32(attr->bp_type);
447 attr->bp_addr = bswap_64(attr->bp_addr);
448 attr->bp_len = bswap_64(attr->bp_len);
449 attr->branch_sample_type = bswap_64(attr->branch_sample_type);
450 attr->sample_regs_user = bswap_64(attr->sample_regs_user);
451 attr->sample_stack_user = bswap_32(attr->sample_stack_user);
452 521
453 swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); 522#define bswap_safe(f, n) \
523 (attr->size > (offsetof(struct perf_event_attr, f) + \
524 sizeof(attr->f) * (n)))
525#define bswap_field(f, sz) \
526do { \
527 if (bswap_safe(f, 0)) \
528 attr->f = bswap_##sz(attr->f); \
529} while(0)
530#define bswap_field_32(f) bswap_field(f, 32)
531#define bswap_field_64(f) bswap_field(f, 64)
532
533 bswap_field_64(config);
534 bswap_field_64(sample_period);
535 bswap_field_64(sample_type);
536 bswap_field_64(read_format);
537 bswap_field_32(wakeup_events);
538 bswap_field_32(bp_type);
539 bswap_field_64(bp_addr);
540 bswap_field_64(bp_len);
541 bswap_field_64(branch_sample_type);
542 bswap_field_64(sample_regs_user);
543 bswap_field_32(sample_stack_user);
544 bswap_field_32(aux_watermark);
545
546 /*
547 * After read_format are bitfields. Check read_format because
548 * we are unable to use offsetof on bitfield.
549 */
550 if (bswap_safe(read_format, 1))
551 swap_bitfield((u8 *) (&attr->read_format + 1),
552 sizeof(u64));
553#undef bswap_field_64
554#undef bswap_field_32
555#undef bswap_field
556#undef bswap_safe
454} 557}
455 558
456static void perf_event__hdr_attr_swap(union perf_event *event, 559static void perf_event__hdr_attr_swap(union perf_event *event,
@@ -478,6 +581,40 @@ static void perf_event__tracing_data_swap(union perf_event *event,
478 event->tracing_data.size = bswap_32(event->tracing_data.size); 581 event->tracing_data.size = bswap_32(event->tracing_data.size);
479} 582}
480 583
584static void perf_event__auxtrace_info_swap(union perf_event *event,
585 bool sample_id_all __maybe_unused)
586{
587 size_t size;
588
589 event->auxtrace_info.type = bswap_32(event->auxtrace_info.type);
590
591 size = event->header.size;
592 size -= (void *)&event->auxtrace_info.priv - (void *)event;
593 mem_bswap_64(event->auxtrace_info.priv, size);
594}
595
596static void perf_event__auxtrace_swap(union perf_event *event,
597 bool sample_id_all __maybe_unused)
598{
599 event->auxtrace.size = bswap_64(event->auxtrace.size);
600 event->auxtrace.offset = bswap_64(event->auxtrace.offset);
601 event->auxtrace.reference = bswap_64(event->auxtrace.reference);
602 event->auxtrace.idx = bswap_32(event->auxtrace.idx);
603 event->auxtrace.tid = bswap_32(event->auxtrace.tid);
604 event->auxtrace.cpu = bswap_32(event->auxtrace.cpu);
605}
606
607static void perf_event__auxtrace_error_swap(union perf_event *event,
608 bool sample_id_all __maybe_unused)
609{
610 event->auxtrace_error.type = bswap_32(event->auxtrace_error.type);
611 event->auxtrace_error.code = bswap_32(event->auxtrace_error.code);
612 event->auxtrace_error.cpu = bswap_32(event->auxtrace_error.cpu);
613 event->auxtrace_error.pid = bswap_32(event->auxtrace_error.pid);
614 event->auxtrace_error.tid = bswap_32(event->auxtrace_error.tid);
615 event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip);
616}
617
481typedef void (*perf_event__swap_op)(union perf_event *event, 618typedef void (*perf_event__swap_op)(union perf_event *event,
482 bool sample_id_all); 619 bool sample_id_all);
483 620
@@ -492,11 +629,17 @@ static perf_event__swap_op perf_event__swap_ops[] = {
492 [PERF_RECORD_THROTTLE] = perf_event__throttle_swap, 629 [PERF_RECORD_THROTTLE] = perf_event__throttle_swap,
493 [PERF_RECORD_UNTHROTTLE] = perf_event__throttle_swap, 630 [PERF_RECORD_UNTHROTTLE] = perf_event__throttle_swap,
494 [PERF_RECORD_SAMPLE] = perf_event__all64_swap, 631 [PERF_RECORD_SAMPLE] = perf_event__all64_swap,
632 [PERF_RECORD_AUX] = perf_event__aux_swap,
633 [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap,
634 [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap,
495 [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, 635 [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
496 [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, 636 [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
497 [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, 637 [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
498 [PERF_RECORD_HEADER_BUILD_ID] = NULL, 638 [PERF_RECORD_HEADER_BUILD_ID] = NULL,
499 [PERF_RECORD_ID_INDEX] = perf_event__all64_swap, 639 [PERF_RECORD_ID_INDEX] = perf_event__all64_swap,
640 [PERF_RECORD_AUXTRACE_INFO] = perf_event__auxtrace_info_swap,
641 [PERF_RECORD_AUXTRACE] = perf_event__auxtrace_swap,
642 [PERF_RECORD_AUXTRACE_ERROR] = perf_event__auxtrace_error_swap,
500 [PERF_RECORD_HEADER_MAX] = NULL, 643 [PERF_RECORD_HEADER_MAX] = NULL,
501}; 644};
502 645
@@ -921,6 +1064,8 @@ static int machines__deliver_event(struct machines *machines,
921 case PERF_RECORD_MMAP: 1064 case PERF_RECORD_MMAP:
922 return tool->mmap(tool, event, sample, machine); 1065 return tool->mmap(tool, event, sample, machine);
923 case PERF_RECORD_MMAP2: 1066 case PERF_RECORD_MMAP2:
1067 if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
1068 ++evlist->stats.nr_proc_map_timeout;
924 return tool->mmap2(tool, event, sample, machine); 1069 return tool->mmap2(tool, event, sample, machine);
925 case PERF_RECORD_COMM: 1070 case PERF_RECORD_COMM:
926 return tool->comm(tool, event, sample, machine); 1071 return tool->comm(tool, event, sample, machine);
@@ -932,18 +1077,44 @@ static int machines__deliver_event(struct machines *machines,
932 if (tool->lost == perf_event__process_lost) 1077 if (tool->lost == perf_event__process_lost)
933 evlist->stats.total_lost += event->lost.lost; 1078 evlist->stats.total_lost += event->lost.lost;
934 return tool->lost(tool, event, sample, machine); 1079 return tool->lost(tool, event, sample, machine);
1080 case PERF_RECORD_LOST_SAMPLES:
1081 if (tool->lost_samples == perf_event__process_lost_samples)
1082 evlist->stats.total_lost_samples += event->lost_samples.lost;
1083 return tool->lost_samples(tool, event, sample, machine);
935 case PERF_RECORD_READ: 1084 case PERF_RECORD_READ:
936 return tool->read(tool, event, sample, evsel, machine); 1085 return tool->read(tool, event, sample, evsel, machine);
937 case PERF_RECORD_THROTTLE: 1086 case PERF_RECORD_THROTTLE:
938 return tool->throttle(tool, event, sample, machine); 1087 return tool->throttle(tool, event, sample, machine);
939 case PERF_RECORD_UNTHROTTLE: 1088 case PERF_RECORD_UNTHROTTLE:
940 return tool->unthrottle(tool, event, sample, machine); 1089 return tool->unthrottle(tool, event, sample, machine);
1090 case PERF_RECORD_AUX:
1091 return tool->aux(tool, event, sample, machine);
1092 case PERF_RECORD_ITRACE_START:
1093 return tool->itrace_start(tool, event, sample, machine);
941 default: 1094 default:
942 ++evlist->stats.nr_unknown_events; 1095 ++evlist->stats.nr_unknown_events;
943 return -1; 1096 return -1;
944 } 1097 }
945} 1098}
946 1099
1100static int perf_session__deliver_event(struct perf_session *session,
1101 union perf_event *event,
1102 struct perf_sample *sample,
1103 struct perf_tool *tool,
1104 u64 file_offset)
1105{
1106 int ret;
1107
1108 ret = auxtrace__process_event(session, event, sample, tool);
1109 if (ret < 0)
1110 return ret;
1111 if (ret > 0)
1112 return 0;
1113
1114 return machines__deliver_event(&session->machines, session->evlist,
1115 event, sample, tool, file_offset);
1116}
1117
947static s64 perf_session__process_user_event(struct perf_session *session, 1118static s64 perf_session__process_user_event(struct perf_session *session,
948 union perf_event *event, 1119 union perf_event *event,
949 u64 file_offset) 1120 u64 file_offset)
@@ -980,6 +1151,15 @@ static s64 perf_session__process_user_event(struct perf_session *session,
980 return tool->finished_round(tool, event, oe); 1151 return tool->finished_round(tool, event, oe);
981 case PERF_RECORD_ID_INDEX: 1152 case PERF_RECORD_ID_INDEX:
982 return tool->id_index(tool, event, session); 1153 return tool->id_index(tool, event, session);
1154 case PERF_RECORD_AUXTRACE_INFO:
1155 return tool->auxtrace_info(tool, event, session);
1156 case PERF_RECORD_AUXTRACE:
1157 /* setup for reading amidst mmap */
1158 lseek(fd, file_offset + event->header.size, SEEK_SET);
1159 return tool->auxtrace(tool, event, session);
1160 case PERF_RECORD_AUXTRACE_ERROR:
1161 perf_session__auxtrace_error_inc(session, event);
1162 return tool->auxtrace_error(tool, event, session);
983 default: 1163 default:
984 return -EINVAL; 1164 return -EINVAL;
985 } 1165 }
@@ -1034,7 +1214,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
1034 return -1; 1214 return -1;
1035 1215
1036 if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 || 1216 if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 ||
1037 readn(fd, &buf, hdr_sz) != (ssize_t)hdr_sz) 1217 readn(fd, buf, hdr_sz) != (ssize_t)hdr_sz)
1038 return -1; 1218 return -1;
1039 1219
1040 event = (union perf_event *)buf; 1220 event = (union perf_event *)buf;
@@ -1042,12 +1222,12 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
1042 if (session->header.needs_swap) 1222 if (session->header.needs_swap)
1043 perf_event_header__bswap(&event->header); 1223 perf_event_header__bswap(&event->header);
1044 1224
1045 if (event->header.size < hdr_sz) 1225 if (event->header.size < hdr_sz || event->header.size > buf_sz)
1046 return -1; 1226 return -1;
1047 1227
1048 rest = event->header.size - hdr_sz; 1228 rest = event->header.size - hdr_sz;
1049 1229
1050 if (readn(fd, &buf, rest) != (ssize_t)rest) 1230 if (readn(fd, buf, rest) != (ssize_t)rest)
1051 return -1; 1231 return -1;
1052 1232
1053 if (session->header.needs_swap) 1233 if (session->header.needs_swap)
@@ -1096,8 +1276,8 @@ static s64 perf_session__process_event(struct perf_session *session,
1096 return ret; 1276 return ret;
1097 } 1277 }
1098 1278
1099 return machines__deliver_event(&session->machines, evlist, event, 1279 return perf_session__deliver_event(session, event, &sample, tool,
1100 &sample, tool, file_offset); 1280 file_offset);
1101} 1281}
1102 1282
1103void perf_event_header__bswap(struct perf_event_header *hdr) 1283void perf_event_header__bswap(struct perf_event_header *hdr)
@@ -1138,6 +1318,18 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
1138 stats->nr_events[PERF_RECORD_LOST]); 1318 stats->nr_events[PERF_RECORD_LOST]);
1139 } 1319 }
1140 1320
1321 if (session->tool->lost_samples == perf_event__process_lost_samples) {
1322 double drop_rate;
1323
1324 drop_rate = (double)stats->total_lost_samples /
1325 (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
1326 if (drop_rate > 0.05) {
1327 ui__warning("Processed %" PRIu64 " samples and lost %3.2f%% samples!\n\n",
1328 stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
1329 drop_rate * 100.0);
1330 }
1331 }
1332
1141 if (stats->nr_unknown_events != 0) { 1333 if (stats->nr_unknown_events != 0) {
1142 ui__warning("Found %u unknown events!\n\n" 1334 ui__warning("Found %u unknown events!\n\n"
1143 "Is this an older tool processing a perf.data " 1335 "Is this an older tool processing a perf.data "
@@ -1168,6 +1360,32 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
1168 1360
1169 if (oe->nr_unordered_events != 0) 1361 if (oe->nr_unordered_events != 0)
1170 ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events); 1362 ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
1363
1364 events_stats__auxtrace_error_warn(stats);
1365
1366 if (stats->nr_proc_map_timeout != 0) {
1367 ui__warning("%d map information files for pre-existing threads were\n"
1368 "not processed, if there are samples for addresses they\n"
1369 "will not be resolved, you may find out which are these\n"
1370 "threads by running with -v and redirecting the output\n"
1371 "to a file.\n"
1372 "The time limit to process proc map is too short?\n"
1373 "Increase it by --proc-map-timeout\n",
1374 stats->nr_proc_map_timeout);
1375 }
1376}
1377
1378static int perf_session__flush_thread_stack(struct thread *thread,
1379 void *p __maybe_unused)
1380{
1381 return thread_stack__flush(thread);
1382}
1383
1384static int perf_session__flush_thread_stacks(struct perf_session *session)
1385{
1386 return machines__for_each_thread(&session->machines,
1387 perf_session__flush_thread_stack,
1388 NULL);
1171} 1389}
1172 1390
1173volatile int session_done; 1391volatile int session_done;
@@ -1256,10 +1474,17 @@ more:
1256done: 1474done:
1257 /* do the final flush for ordered samples */ 1475 /* do the final flush for ordered samples */
1258 err = ordered_events__flush(oe, OE_FLUSH__FINAL); 1476 err = ordered_events__flush(oe, OE_FLUSH__FINAL);
1477 if (err)
1478 goto out_err;
1479 err = auxtrace__flush_events(session, tool);
1480 if (err)
1481 goto out_err;
1482 err = perf_session__flush_thread_stacks(session);
1259out_err: 1483out_err:
1260 free(buf); 1484 free(buf);
1261 perf_session__warn_about_errors(session); 1485 perf_session__warn_about_errors(session);
1262 ordered_events__free(&session->ordered_events); 1486 ordered_events__free(&session->ordered_events);
1487 auxtrace__free_events(session);
1263 return err; 1488 return err;
1264} 1489}
1265 1490
@@ -1402,10 +1627,17 @@ more:
1402out: 1627out:
1403 /* do the final flush for ordered samples */ 1628 /* do the final flush for ordered samples */
1404 err = ordered_events__flush(oe, OE_FLUSH__FINAL); 1629 err = ordered_events__flush(oe, OE_FLUSH__FINAL);
1630 if (err)
1631 goto out_err;
1632 err = auxtrace__flush_events(session, tool);
1633 if (err)
1634 goto out_err;
1635 err = perf_session__flush_thread_stacks(session);
1405out_err: 1636out_err:
1406 ui_progress__finish(); 1637 ui_progress__finish();
1407 perf_session__warn_about_errors(session); 1638 perf_session__warn_about_errors(session);
1408 ordered_events__free(&session->ordered_events); 1639 ordered_events__free(&session->ordered_events);
1640 auxtrace__free_events(session);
1409 session->one_mmap = false; 1641 session->one_mmap = false;
1410 return err; 1642 return err;
1411} 1643}
@@ -1488,7 +1720,13 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp
1488 1720
1489size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) 1721size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
1490{ 1722{
1491 size_t ret = fprintf(fp, "Aggregated stats:\n"); 1723 size_t ret;
1724 const char *msg = "";
1725
1726 if (perf_header__has_feat(&session->header, HEADER_AUXTRACE))
1727 msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)";
1728
1729 ret = fprintf(fp, "Aggregated stats:%s\n", msg);
1492 1730
1493 ret += events_stats__fprintf(&session->evlist->stats, fp); 1731 ret += events_stats__fprintf(&session->evlist->stats, fp);
1494 return ret; 1732 return ret;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index d5fa7b7916ef..b44afc75d1cc 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -15,10 +15,16 @@
15struct ip_callchain; 15struct ip_callchain;
16struct thread; 16struct thread;
17 17
18struct auxtrace;
19struct itrace_synth_opts;
20
18struct perf_session { 21struct perf_session {
19 struct perf_header header; 22 struct perf_header header;
20 struct machines machines; 23 struct machines machines;
21 struct perf_evlist *evlist; 24 struct perf_evlist *evlist;
25 struct auxtrace *auxtrace;
26 struct itrace_synth_opts *itrace_synth_opts;
27 struct list_head auxtrace_index;
22 struct trace_event tevent; 28 struct trace_event tevent;
23 bool repipe; 29 bool repipe;
24 bool one_mmap; 30 bool one_mmap;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 4593f36ecc4c..4c65a143a34c 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -89,14 +89,14 @@ static int64_t
89sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) 89sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
90{ 90{
91 /* Compare the addr that should be unique among comm */ 91 /* Compare the addr that should be unique among comm */
92 return comm__str(right->comm) - comm__str(left->comm); 92 return strcmp(comm__str(right->comm), comm__str(left->comm));
93} 93}
94 94
95static int64_t 95static int64_t
96sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) 96sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
97{ 97{
98 /* Compare the addr that should be unique among comm */ 98 /* Compare the addr that should be unique among comm */
99 return comm__str(right->comm) - comm__str(left->comm); 99 return strcmp(comm__str(right->comm), comm__str(left->comm));
100} 100}
101 101
102static int64_t 102static int64_t
@@ -182,18 +182,16 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
182 182
183static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) 183static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
184{ 184{
185 u64 ip_l, ip_r;
186
187 if (!sym_l || !sym_r) 185 if (!sym_l || !sym_r)
188 return cmp_null(sym_l, sym_r); 186 return cmp_null(sym_l, sym_r);
189 187
190 if (sym_l == sym_r) 188 if (sym_l == sym_r)
191 return 0; 189 return 0;
192 190
193 ip_l = sym_l->start; 191 if (sym_l->start != sym_r->start)
194 ip_r = sym_r->start; 192 return (int64_t)(sym_r->start - sym_l->start);
195 193
196 return (int64_t)(ip_r - ip_l); 194 return (int64_t)(sym_r->end - sym_l->end);
197} 195}
198 196
199static int64_t 197static int64_t
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 846036a921dc..e97cd476d336 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -58,15 +58,16 @@ struct he_stat {
58 58
59struct hist_entry_diff { 59struct hist_entry_diff {
60 bool computed; 60 bool computed;
61 union {
62 /* PERF_HPP__DELTA */
63 double period_ratio_delta;
61 64
62 /* PERF_HPP__DELTA */ 65 /* PERF_HPP__RATIO */
63 double period_ratio_delta; 66 double period_ratio;
64
65 /* PERF_HPP__RATIO */
66 double period_ratio;
67 67
68 /* HISTC_WEIGHTED_DIFF */ 68 /* HISTC_WEIGHTED_DIFF */
69 s64 wdiff; 69 s64 wdiff;
70 };
70}; 71};
71 72
72/** 73/**
@@ -92,21 +93,28 @@ struct hist_entry {
92 s32 cpu; 93 s32 cpu;
93 u8 cpumode; 94 u8 cpumode;
94 95
95 struct hist_entry_diff diff;
96
97 /* We are added by hists__add_dummy_entry. */ 96 /* We are added by hists__add_dummy_entry. */
98 bool dummy; 97 bool dummy;
99 98
100 /* XXX These two should move to some tree widget lib */
101 u16 row_offset;
102 u16 nr_rows;
103
104 bool init_have_children;
105 char level; 99 char level;
106 u8 filtered; 100 u8 filtered;
101 union {
102 /*
103 * Since perf diff only supports the stdio output, TUI
104 * fields are only accessed from perf report (or perf
105 * top). So make it an union to reduce memory usage.
106 */
107 struct hist_entry_diff diff;
108 struct /* for TUI */ {
109 u16 row_offset;
110 u16 nr_rows;
111 bool init_have_children;
112 bool unfolded;
113 bool has_children;
114 };
115 };
107 char *srcline; 116 char *srcline;
108 struct symbol *parent; 117 struct symbol *parent;
109 unsigned long position;
110 struct rb_root sorted_chain; 118 struct rb_root sorted_chain;
111 struct branch_info *branch_info; 119 struct branch_info *branch_info;
112 struct hists *hists; 120 struct hists *hists;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
new file mode 100644
index 000000000000..53e8bb7bc852
--- /dev/null
+++ b/tools/perf/util/stat-shadow.c
@@ -0,0 +1,434 @@
1#include <stdio.h>
2#include "evsel.h"
3#include "stat.h"
4#include "color.h"
5
6enum {
7 CTX_BIT_USER = 1 << 0,
8 CTX_BIT_KERNEL = 1 << 1,
9 CTX_BIT_HV = 1 << 2,
10 CTX_BIT_HOST = 1 << 3,
11 CTX_BIT_IDLE = 1 << 4,
12 CTX_BIT_MAX = 1 << 5,
13};
14
15#define NUM_CTX CTX_BIT_MAX
16
17static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
18static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
19static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
20static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
21static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
22static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
23static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
24static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
25static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
26static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
27static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
28static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
29static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
30static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
31
32struct stats walltime_nsecs_stats;
33
34static int evsel_context(struct perf_evsel *evsel)
35{
36 int ctx = 0;
37
38 if (evsel->attr.exclude_kernel)
39 ctx |= CTX_BIT_KERNEL;
40 if (evsel->attr.exclude_user)
41 ctx |= CTX_BIT_USER;
42 if (evsel->attr.exclude_hv)
43 ctx |= CTX_BIT_HV;
44 if (evsel->attr.exclude_host)
45 ctx |= CTX_BIT_HOST;
46 if (evsel->attr.exclude_idle)
47 ctx |= CTX_BIT_IDLE;
48
49 return ctx;
50}
51
52void perf_stat__reset_shadow_stats(void)
53{
54 memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
55 memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
56 memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
57 memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
58 memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
59 memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
60 memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
61 memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
62 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
63 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
64 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
65 memset(runtime_cycles_in_tx_stats, 0,
66 sizeof(runtime_cycles_in_tx_stats));
67 memset(runtime_transaction_stats, 0,
68 sizeof(runtime_transaction_stats));
69 memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
70 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
71}
72
73/*
74 * Update various tracking values we maintain to print
75 * more semantic information such as miss/hit ratios,
76 * instruction rates, etc:
77 */
78void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
79 int cpu)
80{
81 int ctx = evsel_context(counter);
82
83 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
84 update_stats(&runtime_nsecs_stats[cpu], count[0]);
85 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
86 update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
87 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
88 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
89 else if (perf_stat_evsel__is(counter, TRANSACTION_START))
90 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
91 else if (perf_stat_evsel__is(counter, ELISION_START))
92 update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
93 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
94 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
95 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
96 update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
97 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
98 update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
99 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
100 update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
101 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
102 update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
103 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
104 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
105 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
106 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
107 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
108 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
109 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
110 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
111}
112
113/* used for get_ratio_color() */
114enum grc_type {
115 GRC_STALLED_CYCLES_FE,
116 GRC_STALLED_CYCLES_BE,
117 GRC_CACHE_MISSES,
118 GRC_MAX_NR
119};
120
121static const char *get_ratio_color(enum grc_type type, double ratio)
122{
123 static const double grc_table[GRC_MAX_NR][3] = {
124 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
125 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
126 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
127 };
128 const char *color = PERF_COLOR_NORMAL;
129
130 if (ratio > grc_table[type][0])
131 color = PERF_COLOR_RED;
132 else if (ratio > grc_table[type][1])
133 color = PERF_COLOR_MAGENTA;
134 else if (ratio > grc_table[type][2])
135 color = PERF_COLOR_YELLOW;
136
137 return color;
138}
139
140static void print_stalled_cycles_frontend(FILE *out, int cpu,
141 struct perf_evsel *evsel
142 __maybe_unused, double avg)
143{
144 double total, ratio = 0.0;
145 const char *color;
146 int ctx = evsel_context(evsel);
147
148 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
149
150 if (total)
151 ratio = avg / total * 100.0;
152
153 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
154
155 fprintf(out, " # ");
156 color_fprintf(out, color, "%6.2f%%", ratio);
157 fprintf(out, " frontend cycles idle ");
158}
159
160static void print_stalled_cycles_backend(FILE *out, int cpu,
161 struct perf_evsel *evsel
162 __maybe_unused, double avg)
163{
164 double total, ratio = 0.0;
165 const char *color;
166 int ctx = evsel_context(evsel);
167
168 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
169
170 if (total)
171 ratio = avg / total * 100.0;
172
173 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
174
175 fprintf(out, " # ");
176 color_fprintf(out, color, "%6.2f%%", ratio);
177 fprintf(out, " backend cycles idle ");
178}
179
180static void print_branch_misses(FILE *out, int cpu,
181 struct perf_evsel *evsel __maybe_unused,
182 double avg)
183{
184 double total, ratio = 0.0;
185 const char *color;
186 int ctx = evsel_context(evsel);
187
188 total = avg_stats(&runtime_branches_stats[ctx][cpu]);
189
190 if (total)
191 ratio = avg / total * 100.0;
192
193 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
194
195 fprintf(out, " # ");
196 color_fprintf(out, color, "%6.2f%%", ratio);
197 fprintf(out, " of all branches ");
198}
199
200static void print_l1_dcache_misses(FILE *out, int cpu,
201 struct perf_evsel *evsel __maybe_unused,
202 double avg)
203{
204 double total, ratio = 0.0;
205 const char *color;
206 int ctx = evsel_context(evsel);
207
208 total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
209
210 if (total)
211 ratio = avg / total * 100.0;
212
213 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
214
215 fprintf(out, " # ");
216 color_fprintf(out, color, "%6.2f%%", ratio);
217 fprintf(out, " of all L1-dcache hits ");
218}
219
220static void print_l1_icache_misses(FILE *out, int cpu,
221 struct perf_evsel *evsel __maybe_unused,
222 double avg)
223{
224 double total, ratio = 0.0;
225 const char *color;
226 int ctx = evsel_context(evsel);
227
228 total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
229
230 if (total)
231 ratio = avg / total * 100.0;
232
233 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
234
235 fprintf(out, " # ");
236 color_fprintf(out, color, "%6.2f%%", ratio);
237 fprintf(out, " of all L1-icache hits ");
238}
239
240static void print_dtlb_cache_misses(FILE *out, int cpu,
241 struct perf_evsel *evsel __maybe_unused,
242 double avg)
243{
244 double total, ratio = 0.0;
245 const char *color;
246 int ctx = evsel_context(evsel);
247
248 total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
249
250 if (total)
251 ratio = avg / total * 100.0;
252
253 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
254
255 fprintf(out, " # ");
256 color_fprintf(out, color, "%6.2f%%", ratio);
257 fprintf(out, " of all dTLB cache hits ");
258}
259
260static void print_itlb_cache_misses(FILE *out, int cpu,
261 struct perf_evsel *evsel __maybe_unused,
262 double avg)
263{
264 double total, ratio = 0.0;
265 const char *color;
266 int ctx = evsel_context(evsel);
267
268 total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
269
270 if (total)
271 ratio = avg / total * 100.0;
272
273 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
274
275 fprintf(out, " # ");
276 color_fprintf(out, color, "%6.2f%%", ratio);
277 fprintf(out, " of all iTLB cache hits ");
278}
279
280static void print_ll_cache_misses(FILE *out, int cpu,
281 struct perf_evsel *evsel __maybe_unused,
282 double avg)
283{
284 double total, ratio = 0.0;
285 const char *color;
286 int ctx = evsel_context(evsel);
287
288 total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
289
290 if (total)
291 ratio = avg / total * 100.0;
292
293 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
294
295 fprintf(out, " # ");
296 color_fprintf(out, color, "%6.2f%%", ratio);
297 fprintf(out, " of all LL-cache hits ");
298}
299
300void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
301 double avg, int cpu, enum aggr_mode aggr)
302{
303 double total, ratio = 0.0, total2;
304 int ctx = evsel_context(evsel);
305
306 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
307 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
308 if (total) {
309 ratio = avg / total;
310 fprintf(out, " # %5.2f insns per cycle ", ratio);
311 } else {
312 fprintf(out, " ");
313 }
314 total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
315 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
316
317 if (total && avg) {
318 ratio = total / avg;
319 fprintf(out, "\n");
320 if (aggr == AGGR_NONE)
321 fprintf(out, " ");
322 fprintf(out, " # %5.2f stalled cycles per insn", ratio);
323 }
324
325 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
326 runtime_branches_stats[ctx][cpu].n != 0) {
327 print_branch_misses(out, cpu, evsel, avg);
328 } else if (
329 evsel->attr.type == PERF_TYPE_HW_CACHE &&
330 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
331 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
332 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
333 runtime_l1_dcache_stats[ctx][cpu].n != 0) {
334 print_l1_dcache_misses(out, cpu, evsel, avg);
335 } else if (
336 evsel->attr.type == PERF_TYPE_HW_CACHE &&
337 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
338 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
339 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
340 runtime_l1_icache_stats[ctx][cpu].n != 0) {
341 print_l1_icache_misses(out, cpu, evsel, avg);
342 } else if (
343 evsel->attr.type == PERF_TYPE_HW_CACHE &&
344 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
345 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
346 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
347 runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
348 print_dtlb_cache_misses(out, cpu, evsel, avg);
349 } else if (
350 evsel->attr.type == PERF_TYPE_HW_CACHE &&
351 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
352 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
353 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
354 runtime_itlb_cache_stats[ctx][cpu].n != 0) {
355 print_itlb_cache_misses(out, cpu, evsel, avg);
356 } else if (
357 evsel->attr.type == PERF_TYPE_HW_CACHE &&
358 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
359 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
360 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
361 runtime_ll_cache_stats[ctx][cpu].n != 0) {
362 print_ll_cache_misses(out, cpu, evsel, avg);
363 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
364 runtime_cacherefs_stats[ctx][cpu].n != 0) {
365 total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
366
367 if (total)
368 ratio = avg * 100 / total;
369
370 fprintf(out, " # %8.3f %% of all cache refs ", ratio);
371
372 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
373 print_stalled_cycles_frontend(out, cpu, evsel, avg);
374 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
375 print_stalled_cycles_backend(out, cpu, evsel, avg);
376 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
377 total = avg_stats(&runtime_nsecs_stats[cpu]);
378
379 if (total) {
380 ratio = avg / total;
381 fprintf(out, " # %8.3f GHz ", ratio);
382 } else {
383 fprintf(out, " ");
384 }
385 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
386 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
387 if (total)
388 fprintf(out,
389 " # %5.2f%% transactional cycles ",
390 100.0 * (avg / total));
391 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
392 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
393 total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
394 if (total2 < avg)
395 total2 = avg;
396 if (total)
397 fprintf(out,
398 " # %5.2f%% aborted cycles ",
399 100.0 * ((total2-avg) / total));
400 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
401 avg > 0 &&
402 runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
403 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
404
405 if (total)
406 ratio = total / avg;
407
408 fprintf(out, " # %8.0f cycles / transaction ", ratio);
409 } else if (perf_stat_evsel__is(evsel, ELISION_START) &&
410 avg > 0 &&
411 runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
412 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
413
414 if (total)
415 ratio = total / avg;
416
417 fprintf(out, " # %8.0f cycles / elision ", ratio);
418 } else if (runtime_nsecs_stats[cpu].n != 0) {
419 char unit = 'M';
420
421 total = avg_stats(&runtime_nsecs_stats[cpu]);
422
423 if (total)
424 ratio = 1000.0 * avg / total;
425 if (ratio < 0.001) {
426 ratio *= 1000;
427 unit = 'K';
428 }
429
430 fprintf(out, " # %8.3f %c/sec ", ratio, unit);
431 } else {
432 fprintf(out, " ");
433 }
434}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 6506b3dfb605..4014b709f956 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -1,6 +1,6 @@
1#include <math.h> 1#include <math.h>
2
3#include "stat.h" 2#include "stat.h"
3#include "evsel.h"
4 4
5void update_stats(struct stats *stats, u64 val) 5void update_stats(struct stats *stats, u64 val)
6{ 6{
@@ -61,3 +61,72 @@ double rel_stddev_stats(double stddev, double avg)
61 61
62 return pct; 62 return pct;
63} 63}
64
65bool __perf_evsel_stat__is(struct perf_evsel *evsel,
66 enum perf_stat_evsel_id id)
67{
68 struct perf_stat *ps = evsel->priv;
69
70 return ps->id == id;
71}
72
73#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
74static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
75 ID(NONE, x),
76 ID(CYCLES_IN_TX, cpu/cycles-t/),
77 ID(TRANSACTION_START, cpu/tx-start/),
78 ID(ELISION_START, cpu/el-start/),
79 ID(CYCLES_IN_TX_CP, cpu/cycles-ct/),
80};
81#undef ID
82
83void perf_stat_evsel_id_init(struct perf_evsel *evsel)
84{
85 struct perf_stat *ps = evsel->priv;
86 int i;
87
88 /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
89
90 for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
91 if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
92 ps->id = i;
93 break;
94 }
95 }
96}
97
98struct perf_counts *perf_counts__new(int ncpus)
99{
100 int size = sizeof(struct perf_counts) +
101 ncpus * sizeof(struct perf_counts_values);
102
103 return zalloc(size);
104}
105
106void perf_counts__delete(struct perf_counts *counts)
107{
108 free(counts);
109}
110
111static void perf_counts__reset(struct perf_counts *counts, int ncpus)
112{
113 memset(counts, 0, (sizeof(*counts) +
114 (ncpus * sizeof(struct perf_counts_values))));
115}
116
117void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus)
118{
119 perf_counts__reset(evsel->counts, ncpus);
120}
121
122int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
123{
124 evsel->counts = perf_counts__new(ncpus);
125 return evsel->counts != NULL ? 0 : -ENOMEM;
126}
127
128void perf_evsel__free_counts(struct perf_evsel *evsel)
129{
130 perf_counts__delete(evsel->counts);
131 evsel->counts = NULL;
132}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 5667fc3e39cf..093dc3cb28dd 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -2,6 +2,7 @@
2#define __PERF_STATS_H 2#define __PERF_STATS_H
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <stdio.h>
5 6
6struct stats 7struct stats
7{ 8{
@@ -9,6 +10,27 @@ struct stats
9 u64 max, min; 10 u64 max, min;
10}; 11};
11 12
13enum perf_stat_evsel_id {
14 PERF_STAT_EVSEL_ID__NONE = 0,
15 PERF_STAT_EVSEL_ID__CYCLES_IN_TX,
16 PERF_STAT_EVSEL_ID__TRANSACTION_START,
17 PERF_STAT_EVSEL_ID__ELISION_START,
18 PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
19 PERF_STAT_EVSEL_ID__MAX,
20};
21
22struct perf_stat {
23 struct stats res_stats[3];
24 enum perf_stat_evsel_id id;
25};
26
27enum aggr_mode {
28 AGGR_NONE,
29 AGGR_GLOBAL,
30 AGGR_SOCKET,
31 AGGR_CORE,
32};
33
12void update_stats(struct stats *stats, u64 val); 34void update_stats(struct stats *stats, u64 val);
13double avg_stats(struct stats *stats); 35double avg_stats(struct stats *stats);
14double stddev_stats(struct stats *stats); 36double stddev_stats(struct stats *stats);
@@ -22,4 +44,28 @@ static inline void init_stats(struct stats *stats)
22 stats->min = (u64) -1; 44 stats->min = (u64) -1;
23 stats->max = 0; 45 stats->max = 0;
24} 46}
47
48struct perf_evsel;
49bool __perf_evsel_stat__is(struct perf_evsel *evsel,
50 enum perf_stat_evsel_id id);
51
52#define perf_stat_evsel__is(evsel, id) \
53 __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
54
55void perf_stat_evsel_id_init(struct perf_evsel *evsel);
56
57extern struct stats walltime_nsecs_stats;
58
59void perf_stat__reset_shadow_stats(void);
60void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
61 int cpu);
62void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
63 double avg, int cpu, enum aggr_mode aggr);
64
65struct perf_counts *perf_counts__new(int ncpus);
66void perf_counts__delete(struct perf_counts *counts);
67
68void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus);
69int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
70void perf_evsel__free_counts(struct perf_evsel *evsel);
25#endif 71#endif
diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c
index 79a757a2a15c..bcae659b6546 100644
--- a/tools/perf/util/strfilter.c
+++ b/tools/perf/util/strfilter.c
@@ -170,6 +170,46 @@ struct strfilter *strfilter__new(const char *rules, const char **err)
170 return filter; 170 return filter;
171} 171}
172 172
173static int strfilter__append(struct strfilter *filter, bool _or,
174 const char *rules, const char **err)
175{
176 struct strfilter_node *right, *root;
177 const char *ep = NULL;
178
179 if (!filter || !rules)
180 return -EINVAL;
181
182 right = strfilter_node__new(rules, &ep);
183 if (!right || *ep != '\0') {
184 if (err)
185 *err = ep;
186 goto error;
187 }
188 root = strfilter_node__alloc(_or ? OP_or : OP_and, filter->root, right);
189 if (!root) {
190 ep = NULL;
191 goto error;
192 }
193
194 filter->root = root;
195 return 0;
196
197error:
198 strfilter_node__delete(right);
199 return ep ? -EINVAL : -ENOMEM;
200}
201
202int strfilter__or(struct strfilter *filter, const char *rules, const char **err)
203{
204 return strfilter__append(filter, true, rules, err);
205}
206
207int strfilter__and(struct strfilter *filter, const char *rules,
208 const char **err)
209{
210 return strfilter__append(filter, false, rules, err);
211}
212
173static bool strfilter_node__compare(struct strfilter_node *node, 213static bool strfilter_node__compare(struct strfilter_node *node,
174 const char *str) 214 const char *str)
175{ 215{
@@ -197,3 +237,70 @@ bool strfilter__compare(struct strfilter *filter, const char *str)
197 return false; 237 return false;
198 return strfilter_node__compare(filter->root, str); 238 return strfilter_node__compare(filter->root, str);
199} 239}
240
241static int strfilter_node__sprint(struct strfilter_node *node, char *buf);
242
243/* sprint node in parenthesis if needed */
244static int strfilter_node__sprint_pt(struct strfilter_node *node, char *buf)
245{
246 int len;
247 int pt = node->r ? 2 : 0; /* don't need to check node->l */
248
249 if (buf && pt)
250 *buf++ = '(';
251 len = strfilter_node__sprint(node, buf);
252 if (len < 0)
253 return len;
254 if (buf && pt)
255 *(buf + len) = ')';
256 return len + pt;
257}
258
259static int strfilter_node__sprint(struct strfilter_node *node, char *buf)
260{
261 int len = 0, rlen;
262
263 if (!node || !node->p)
264 return -EINVAL;
265
266 switch (*node->p) {
267 case '|':
268 case '&':
269 len = strfilter_node__sprint_pt(node->l, buf);
270 if (len < 0)
271 return len;
272 case '!':
273 if (buf) {
274 *(buf + len++) = *node->p;
275 buf += len;
276 } else
277 len++;
278 rlen = strfilter_node__sprint_pt(node->r, buf);
279 if (rlen < 0)
280 return rlen;
281 len += rlen;
282 break;
283 default:
284 len = strlen(node->p);
285 if (buf)
286 strcpy(buf, node->p);
287 }
288
289 return len;
290}
291
292char *strfilter__string(struct strfilter *filter)
293{
294 int len;
295 char *ret = NULL;
296
297 len = strfilter_node__sprint(filter->root, NULL);
298 if (len < 0)
299 return NULL;
300
301 ret = malloc(len + 1);
302 if (ret)
303 strfilter_node__sprint(filter->root, ret);
304
305 return ret;
306}
diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h
index fe611f3c9e39..cff5eda88728 100644
--- a/tools/perf/util/strfilter.h
+++ b/tools/perf/util/strfilter.h
@@ -29,6 +29,32 @@ struct strfilter {
29struct strfilter *strfilter__new(const char *rules, const char **err); 29struct strfilter *strfilter__new(const char *rules, const char **err);
30 30
31/** 31/**
32 * strfilter__or - Append an additional rule by logical-or
33 * @filter: Original string filter
34 * @rules: Filter rule to be appended at left of the root of
35 * @filter by using logical-or.
36 * @err: Pointer which points an error detected on @rules
37 *
38 * Parse @rules and join it to the @filter by using logical-or.
39 * Return 0 if success, or return the error code.
40 */
41int strfilter__or(struct strfilter *filter,
42 const char *rules, const char **err);
43
44/**
45 * strfilter__add - Append an additional rule by logical-and
46 * @filter: Original string filter
47 * @rules: Filter rule to be appended at left of the root of
48 * @filter by using logical-and.
49 * @err: Pointer which points an error detected on @rules
50 *
51 * Parse @rules and join it to the @filter by using logical-and.
52 * Return 0 if success, or return the error code.
53 */
54int strfilter__and(struct strfilter *filter,
55 const char *rules, const char **err);
56
57/**
32 * strfilter__compare - compare given string and a string filter 58 * strfilter__compare - compare given string and a string filter
33 * @filter: String filter 59 * @filter: String filter
34 * @str: target string 60 * @str: target string
@@ -45,4 +71,13 @@ bool strfilter__compare(struct strfilter *filter, const char *str);
45 */ 71 */
46void strfilter__delete(struct strfilter *filter); 72void strfilter__delete(struct strfilter *filter);
47 73
74/**
75 * strfilter__string - Reconstruct a rule string from filter
76 * @filter: String filter to reconstruct
77 *
78 * Reconstruct a rule string from @filter. This will be good for
79 * debug messages. Note that returning string must be freed afterward.
80 */
81char *strfilter__string(struct strfilter *filter);
82
48#endif 83#endif
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index a7ab6063e038..65f7e389ae09 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -630,6 +630,11 @@ void symsrc__destroy(struct symsrc *ss)
630 close(ss->fd); 630 close(ss->fd);
631} 631}
632 632
633bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr)
634{
635 return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL;
636}
637
633int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, 638int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
634 enum dso_binary_type type) 639 enum dso_binary_type type)
635{ 640{
@@ -678,6 +683,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
678 } 683 }
679 684
680 if (!dso__build_id_equal(dso, build_id)) { 685 if (!dso__build_id_equal(dso, build_id)) {
686 pr_debug("%s: build id mismatch for %s.\n", __func__, name);
681 dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID; 687 dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID;
682 goto out_elf_end; 688 goto out_elf_end;
683 } 689 }
@@ -711,8 +717,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
711 ".gnu.prelink_undo", 717 ".gnu.prelink_undo",
712 NULL) != NULL); 718 NULL) != NULL);
713 } else { 719 } else {
714 ss->adjust_symbols = ehdr.e_type == ET_EXEC || 720 ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
715 ehdr.e_type == ET_REL;
716 } 721 }
717 722
718 ss->name = strdup(name); 723 ss->name = strdup(name);
@@ -771,6 +776,8 @@ static bool want_demangle(bool is_kernel_sym)
771 return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; 776 return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
772} 777}
773 778
779void __weak arch__elf_sym_adjust(GElf_Sym *sym __maybe_unused) { }
780
774int dso__load_sym(struct dso *dso, struct map *map, 781int dso__load_sym(struct dso *dso, struct map *map,
775 struct symsrc *syms_ss, struct symsrc *runtime_ss, 782 struct symsrc *syms_ss, struct symsrc *runtime_ss,
776 symbol_filter_t filter, int kmodule) 783 symbol_filter_t filter, int kmodule)
@@ -935,6 +942,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
935 (sym.st_value & 1)) 942 (sym.st_value & 1))
936 --sym.st_value; 943 --sym.st_value;
937 944
945 arch__elf_sym_adjust(&sym);
946
938 if (dso->kernel || kmodule) { 947 if (dso->kernel || kmodule) {
939 char dso_name[PATH_MAX]; 948 char dso_name[PATH_MAX];
940 949
@@ -963,8 +972,10 @@ int dso__load_sym(struct dso *dso, struct map *map,
963 map->unmap_ip = map__unmap_ip; 972 map->unmap_ip = map__unmap_ip;
964 /* Ensure maps are correctly ordered */ 973 /* Ensure maps are correctly ordered */
965 if (kmaps) { 974 if (kmaps) {
975 map__get(map);
966 map_groups__remove(kmaps, map); 976 map_groups__remove(kmaps, map);
967 map_groups__insert(kmaps, map); 977 map_groups__insert(kmaps, map);
978 map__put(map);
968 } 979 }
969 } 980 }
970 981
@@ -1005,7 +1016,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
1005 curr_map = map__new2(start, curr_dso, 1016 curr_map = map__new2(start, curr_dso,
1006 map->type); 1017 map->type);
1007 if (curr_map == NULL) { 1018 if (curr_map == NULL) {
1008 dso__delete(curr_dso); 1019 dso__put(curr_dso);
1009 goto out_elf_end; 1020 goto out_elf_end;
1010 } 1021 }
1011 if (adjust_kernel_syms) { 1022 if (adjust_kernel_syms) {
@@ -1020,11 +1031,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
1020 } 1031 }
1021 curr_dso->symtab_type = dso->symtab_type; 1032 curr_dso->symtab_type = dso->symtab_type;
1022 map_groups__insert(kmaps, curr_map); 1033 map_groups__insert(kmaps, curr_map);
1023 /* 1034 dsos__add(&map->groups->machine->dsos, curr_dso);
1024 * The new DSO should go to the kernel DSOS
1025 */
1026 dsos__add(&map->groups->machine->kernel_dsos,
1027 curr_dso);
1028 dso__set_loaded(curr_dso, map->type); 1035 dso__set_loaded(curr_dso, map->type);
1029 } else 1036 } else
1030 curr_dso = curr_map->dso; 1037 curr_dso = curr_map->dso;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 201f6c4ca738..504f2d73b7ee 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -85,8 +85,17 @@ static int prefix_underscores_count(const char *str)
85 return tail - str; 85 return tail - str;
86} 86}
87 87
88#define SYMBOL_A 0 88int __weak arch__choose_best_symbol(struct symbol *syma,
89#define SYMBOL_B 1 89 struct symbol *symb __maybe_unused)
90{
91 /* Avoid "SyS" kernel syscall aliases */
92 if (strlen(syma->name) >= 3 && !strncmp(syma->name, "SyS", 3))
93 return SYMBOL_B;
94 if (strlen(syma->name) >= 10 && !strncmp(syma->name, "compat_SyS", 10))
95 return SYMBOL_B;
96
97 return SYMBOL_A;
98}
90 99
91static int choose_best_symbol(struct symbol *syma, struct symbol *symb) 100static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
92{ 101{
@@ -134,13 +143,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
134 else if (na < nb) 143 else if (na < nb)
135 return SYMBOL_B; 144 return SYMBOL_B;
136 145
137 /* Avoid "SyS" kernel syscall aliases */ 146 return arch__choose_best_symbol(syma, symb);
138 if (na >= 3 && !strncmp(syma->name, "SyS", 3))
139 return SYMBOL_B;
140 if (na >= 10 && !strncmp(syma->name, "compat_SyS", 10))
141 return SYMBOL_B;
142
143 return SYMBOL_A;
144} 147}
145 148
146void symbols__fixup_duplicate(struct rb_root *symbols) 149void symbols__fixup_duplicate(struct rb_root *symbols)
@@ -199,18 +202,18 @@ void symbols__fixup_end(struct rb_root *symbols)
199 202
200void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) 203void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
201{ 204{
202 struct map *prev, *curr; 205 struct maps *maps = &mg->maps[type];
203 struct rb_node *nd, *prevnd = rb_first(&mg->maps[type]); 206 struct map *next, *curr;
204 207
205 if (prevnd == NULL) 208 pthread_rwlock_wrlock(&maps->lock);
206 return;
207 209
208 curr = rb_entry(prevnd, struct map, rb_node); 210 curr = maps__first(maps);
211 if (curr == NULL)
212 goto out_unlock;
209 213
210 for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) { 214 for (next = map__next(curr); next; next = map__next(curr)) {
211 prev = curr; 215 curr->end = next->start;
212 curr = rb_entry(nd, struct map, rb_node); 216 curr = next;
213 prev->end = curr->start;
214 } 217 }
215 218
216 /* 219 /*
@@ -218,6 +221,9 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
218 * last map final address. 221 * last map final address.
219 */ 222 */
220 curr->end = ~0ULL; 223 curr->end = ~0ULL;
224
225out_unlock:
226 pthread_rwlock_unlock(&maps->lock);
221} 227}
222 228
223struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) 229struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
@@ -397,7 +403,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
397 const char *name) 403 const char *name)
398{ 404{
399 struct rb_node *n; 405 struct rb_node *n;
400 struct symbol_name_rb_node *s; 406 struct symbol_name_rb_node *s = NULL;
401 407
402 if (symbols == NULL) 408 if (symbols == NULL)
403 return NULL; 409 return NULL;
@@ -408,7 +414,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
408 int cmp; 414 int cmp;
409 415
410 s = rb_entry(n, struct symbol_name_rb_node, rb_node); 416 s = rb_entry(n, struct symbol_name_rb_node, rb_node);
411 cmp = strcmp(name, s->sym.name); 417 cmp = arch__compare_symbol_names(name, s->sym.name);
412 418
413 if (cmp < 0) 419 if (cmp < 0)
414 n = n->rb_left; 420 n = n->rb_left;
@@ -426,7 +432,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
426 struct symbol_name_rb_node *tmp; 432 struct symbol_name_rb_node *tmp;
427 433
428 tmp = rb_entry(n, struct symbol_name_rb_node, rb_node); 434 tmp = rb_entry(n, struct symbol_name_rb_node, rb_node);
429 if (strcmp(tmp->sym.name, s->sym.name)) 435 if (arch__compare_symbol_names(tmp->sym.name, s->sym.name))
430 break; 436 break;
431 437
432 s = tmp; 438 s = tmp;
@@ -653,14 +659,14 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
653 curr_map = map_groups__find(kmaps, map->type, pos->start); 659 curr_map = map_groups__find(kmaps, map->type, pos->start);
654 660
655 if (!curr_map || (filter && filter(curr_map, pos))) { 661 if (!curr_map || (filter && filter(curr_map, pos))) {
656 rb_erase(&pos->rb_node, root); 662 rb_erase_init(&pos->rb_node, root);
657 symbol__delete(pos); 663 symbol__delete(pos);
658 } else { 664 } else {
659 pos->start -= curr_map->start - curr_map->pgoff; 665 pos->start -= curr_map->start - curr_map->pgoff;
660 if (pos->end) 666 if (pos->end)
661 pos->end -= curr_map->start - curr_map->pgoff; 667 pos->end -= curr_map->start - curr_map->pgoff;
662 if (curr_map != map) { 668 if (curr_map != map) {
663 rb_erase(&pos->rb_node, root); 669 rb_erase_init(&pos->rb_node, root);
664 symbols__insert( 670 symbols__insert(
665 &curr_map->dso->symbols[curr_map->type], 671 &curr_map->dso->symbols[curr_map->type],
666 pos); 672 pos);
@@ -780,7 +786,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta,
780 786
781 curr_map = map__new2(pos->start, ndso, map->type); 787 curr_map = map__new2(pos->start, ndso, map->type);
782 if (curr_map == NULL) { 788 if (curr_map == NULL) {
783 dso__delete(ndso); 789 dso__put(ndso);
784 return -1; 790 return -1;
785 } 791 }
786 792
@@ -1167,20 +1173,23 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1167 /* Add new maps */ 1173 /* Add new maps */
1168 while (!list_empty(&md.maps)) { 1174 while (!list_empty(&md.maps)) {
1169 new_map = list_entry(md.maps.next, struct map, node); 1175 new_map = list_entry(md.maps.next, struct map, node);
1170 list_del(&new_map->node); 1176 list_del_init(&new_map->node);
1171 if (new_map == replacement_map) { 1177 if (new_map == replacement_map) {
1172 map->start = new_map->start; 1178 map->start = new_map->start;
1173 map->end = new_map->end; 1179 map->end = new_map->end;
1174 map->pgoff = new_map->pgoff; 1180 map->pgoff = new_map->pgoff;
1175 map->map_ip = new_map->map_ip; 1181 map->map_ip = new_map->map_ip;
1176 map->unmap_ip = new_map->unmap_ip; 1182 map->unmap_ip = new_map->unmap_ip;
1177 map__delete(new_map);
1178 /* Ensure maps are correctly ordered */ 1183 /* Ensure maps are correctly ordered */
1184 map__get(map);
1179 map_groups__remove(kmaps, map); 1185 map_groups__remove(kmaps, map);
1180 map_groups__insert(kmaps, map); 1186 map_groups__insert(kmaps, map);
1187 map__put(map);
1181 } else { 1188 } else {
1182 map_groups__insert(kmaps, new_map); 1189 map_groups__insert(kmaps, new_map);
1183 } 1190 }
1191
1192 map__put(new_map);
1184 } 1193 }
1185 1194
1186 /* 1195 /*
@@ -1205,8 +1214,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1205out_err: 1214out_err:
1206 while (!list_empty(&md.maps)) { 1215 while (!list_empty(&md.maps)) {
1207 map = list_entry(md.maps.next, struct map, node); 1216 map = list_entry(md.maps.next, struct map, node);
1208 list_del(&map->node); 1217 list_del_init(&map->node);
1209 map__delete(map); 1218 map__put(map);
1210 } 1219 }
1211 close(fd); 1220 close(fd);
1212 return -EINVAL; 1221 return -EINVAL;
@@ -1355,7 +1364,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
1355 case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: 1364 case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
1356 /* 1365 /*
1357 * kernel modules know their symtab type - it's set when 1366 * kernel modules know their symtab type - it's set when
1358 * creating a module dso in machine__new_module(). 1367 * creating a module dso in machine__findnew_module_map().
1359 */ 1368 */
1360 return kmod && dso->symtab_type == type; 1369 return kmod && dso->symtab_type == type;
1361 1370
@@ -1380,12 +1389,22 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
1380 struct symsrc *syms_ss = NULL, *runtime_ss = NULL; 1389 struct symsrc *syms_ss = NULL, *runtime_ss = NULL;
1381 bool kmod; 1390 bool kmod;
1382 1391
1383 dso__set_loaded(dso, map->type); 1392 pthread_mutex_lock(&dso->lock);
1393
1394 /* check again under the dso->lock */
1395 if (dso__loaded(dso, map->type)) {
1396 ret = 1;
1397 goto out;
1398 }
1384 1399
1385 if (dso->kernel == DSO_TYPE_KERNEL) 1400 if (dso->kernel) {
1386 return dso__load_kernel_sym(dso, map, filter); 1401 if (dso->kernel == DSO_TYPE_KERNEL)
1387 else if (dso->kernel == DSO_TYPE_GUEST_KERNEL) 1402 ret = dso__load_kernel_sym(dso, map, filter);
1388 return dso__load_guest_kernel_sym(dso, map, filter); 1403 else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
1404 ret = dso__load_guest_kernel_sym(dso, map, filter);
1405
1406 goto out;
1407 }
1389 1408
1390 if (map->groups && map->groups->machine) 1409 if (map->groups && map->groups->machine)
1391 machine = map->groups->machine; 1410 machine = map->groups->machine;
@@ -1398,18 +1417,18 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
1398 struct stat st; 1417 struct stat st;
1399 1418
1400 if (lstat(dso->name, &st) < 0) 1419 if (lstat(dso->name, &st) < 0)
1401 return -1; 1420 goto out;
1402 1421
1403 if (st.st_uid && (st.st_uid != geteuid())) { 1422 if (st.st_uid && (st.st_uid != geteuid())) {
1404 pr_warning("File %s not owned by current user or root, " 1423 pr_warning("File %s not owned by current user or root, "
1405 "ignoring it.\n", dso->name); 1424 "ignoring it.\n", dso->name);
1406 return -1; 1425 goto out;
1407 } 1426 }
1408 1427
1409 ret = dso__load_perf_map(dso, map, filter); 1428 ret = dso__load_perf_map(dso, map, filter);
1410 dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT : 1429 dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
1411 DSO_BINARY_TYPE__NOT_FOUND; 1430 DSO_BINARY_TYPE__NOT_FOUND;
1412 return ret; 1431 goto out;
1413 } 1432 }
1414 1433
1415 if (machine) 1434 if (machine)
@@ -1417,7 +1436,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
1417 1436
1418 name = malloc(PATH_MAX); 1437 name = malloc(PATH_MAX);
1419 if (!name) 1438 if (!name)
1420 return -1; 1439 goto out;
1421 1440
1422 kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || 1441 kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
1423 dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP || 1442 dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
@@ -1498,23 +1517,32 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
1498out_free: 1517out_free:
1499 free(name); 1518 free(name);
1500 if (ret < 0 && strstr(dso->name, " (deleted)") != NULL) 1519 if (ret < 0 && strstr(dso->name, " (deleted)") != NULL)
1501 return 0; 1520 ret = 0;
1521out:
1522 dso__set_loaded(dso, map->type);
1523 pthread_mutex_unlock(&dso->lock);
1524
1502 return ret; 1525 return ret;
1503} 1526}
1504 1527
1505struct map *map_groups__find_by_name(struct map_groups *mg, 1528struct map *map_groups__find_by_name(struct map_groups *mg,
1506 enum map_type type, const char *name) 1529 enum map_type type, const char *name)
1507{ 1530{
1508 struct rb_node *nd; 1531 struct maps *maps = &mg->maps[type];
1532 struct map *map;
1509 1533
1510 for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) { 1534 pthread_rwlock_rdlock(&maps->lock);
1511 struct map *map = rb_entry(nd, struct map, rb_node);
1512 1535
1536 for (map = maps__first(maps); map; map = map__next(map)) {
1513 if (map->dso && strcmp(map->dso->short_name, name) == 0) 1537 if (map->dso && strcmp(map->dso->short_name, name) == 0)
1514 return map; 1538 goto out_unlock;
1515 } 1539 }
1516 1540
1517 return NULL; 1541 map = NULL;
1542
1543out_unlock:
1544 pthread_rwlock_unlock(&maps->lock);
1545 return map;
1518} 1546}
1519 1547
1520int dso__load_vmlinux(struct dso *dso, struct map *map, 1548int dso__load_vmlinux(struct dso *dso, struct map *map,
@@ -1802,6 +1830,7 @@ static void vmlinux_path__exit(void)
1802{ 1830{
1803 while (--vmlinux_path__nr_entries >= 0) 1831 while (--vmlinux_path__nr_entries >= 0)
1804 zfree(&vmlinux_path[vmlinux_path__nr_entries]); 1832 zfree(&vmlinux_path[vmlinux_path__nr_entries]);
1833 vmlinux_path__nr_entries = 0;
1805 1834
1806 zfree(&vmlinux_path); 1835 zfree(&vmlinux_path);
1807} 1836}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 09561500164a..bef47ead1d9b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -158,8 +158,6 @@ struct ref_reloc_sym {
158struct map_symbol { 158struct map_symbol {
159 struct map *map; 159 struct map *map;
160 struct symbol *sym; 160 struct symbol *sym;
161 bool unfolded;
162 bool has_children;
163}; 161};
164 162
165struct addr_map_symbol { 163struct addr_map_symbol {
@@ -303,4 +301,14 @@ int setup_list(struct strlist **list, const char *list_str,
303int setup_intlist(struct intlist **list, const char *list_str, 301int setup_intlist(struct intlist **list, const char *list_str,
304 const char *list_name); 302 const char *list_name);
305 303
304#ifdef HAVE_LIBELF_SUPPORT
305bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
306void arch__elf_sym_adjust(GElf_Sym *sym);
307#endif
308
309#define SYMBOL_A 0
310#define SYMBOL_B 1
311
312int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb);
313
306#endif /* __PERF_SYMBOL */ 314#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 9ed59a452d1f..679688e70ae7 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -219,7 +219,7 @@ static int thread_stack__call_return(struct thread *thread,
219 return crp->process(&cr, crp->data); 219 return crp->process(&cr, crp->data);
220} 220}
221 221
222static int thread_stack__flush(struct thread *thread, struct thread_stack *ts) 222static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
223{ 223{
224 struct call_return_processor *crp = ts->crp; 224 struct call_return_processor *crp = ts->crp;
225 int err; 225 int err;
@@ -242,6 +242,14 @@ static int thread_stack__flush(struct thread *thread, struct thread_stack *ts)
242 return 0; 242 return 0;
243} 243}
244 244
245int thread_stack__flush(struct thread *thread)
246{
247 if (thread->ts)
248 return __thread_stack__flush(thread, thread->ts);
249
250 return 0;
251}
252
245int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, 253int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
246 u64 to_ip, u16 insn_len, u64 trace_nr) 254 u64 to_ip, u16 insn_len, u64 trace_nr)
247{ 255{
@@ -264,7 +272,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
264 */ 272 */
265 if (trace_nr != thread->ts->trace_nr) { 273 if (trace_nr != thread->ts->trace_nr) {
266 if (thread->ts->trace_nr) 274 if (thread->ts->trace_nr)
267 thread_stack__flush(thread, thread->ts); 275 __thread_stack__flush(thread, thread->ts);
268 thread->ts->trace_nr = trace_nr; 276 thread->ts->trace_nr = trace_nr;
269 } 277 }
270 278
@@ -297,7 +305,7 @@ void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
297 305
298 if (trace_nr != thread->ts->trace_nr) { 306 if (trace_nr != thread->ts->trace_nr) {
299 if (thread->ts->trace_nr) 307 if (thread->ts->trace_nr)
300 thread_stack__flush(thread, thread->ts); 308 __thread_stack__flush(thread, thread->ts);
301 thread->ts->trace_nr = trace_nr; 309 thread->ts->trace_nr = trace_nr;
302 } 310 }
303} 311}
@@ -305,7 +313,7 @@ void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
305void thread_stack__free(struct thread *thread) 313void thread_stack__free(struct thread *thread)
306{ 314{
307 if (thread->ts) { 315 if (thread->ts) {
308 thread_stack__flush(thread, thread->ts); 316 __thread_stack__flush(thread, thread->ts);
309 zfree(&thread->ts->stack); 317 zfree(&thread->ts->stack);
310 zfree(&thread->ts); 318 zfree(&thread->ts);
311 } 319 }
@@ -689,7 +697,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
689 697
690 /* Flush stack on exec */ 698 /* Flush stack on exec */
691 if (ts->comm != comm && thread->pid_ == thread->tid) { 699 if (ts->comm != comm && thread->pid_ == thread->tid) {
692 err = thread_stack__flush(thread, ts); 700 err = __thread_stack__flush(thread, ts);
693 if (err) 701 if (err)
694 return err; 702 return err;
695 ts->comm = comm; 703 ts->comm = comm;
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index b843bbef8ba2..e1528f1374c3 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -96,6 +96,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
96void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); 96void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
97void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, 97void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
98 size_t sz, u64 ip); 98 size_t sz, u64 ip);
99int thread_stack__flush(struct thread *thread);
99void thread_stack__free(struct thread *thread); 100void thread_stack__free(struct thread *thread);
100 101
101struct call_return_processor * 102struct call_return_processor *
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 1c8fbc9588c5..28c4b746baa1 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -18,7 +18,7 @@ int thread__init_map_groups(struct thread *thread, struct machine *machine)
18 if (pid == thread->tid || pid == -1) { 18 if (pid == thread->tid || pid == -1) {
19 thread->mg = map_groups__new(machine); 19 thread->mg = map_groups__new(machine);
20 } else { 20 } else {
21 leader = machine__findnew_thread(machine, pid, pid); 21 leader = __machine__findnew_thread(machine, pid, pid);
22 if (leader) 22 if (leader)
23 thread->mg = map_groups__get(leader->mg); 23 thread->mg = map_groups__get(leader->mg);
24 } 24 }
@@ -53,7 +53,8 @@ struct thread *thread__new(pid_t pid, pid_t tid)
53 goto err_thread; 53 goto err_thread;
54 54
55 list_add(&comm->list, &thread->comm_list); 55 list_add(&comm->list, &thread->comm_list);
56 56 atomic_set(&thread->refcnt, 0);
57 RB_CLEAR_NODE(&thread->rb_node);
57 } 58 }
58 59
59 return thread; 60 return thread;
@@ -67,6 +68,8 @@ void thread__delete(struct thread *thread)
67{ 68{
68 struct comm *comm, *tmp; 69 struct comm *comm, *tmp;
69 70
71 BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));
72
70 thread_stack__free(thread); 73 thread_stack__free(thread);
71 74
72 if (thread->mg) { 75 if (thread->mg) {
@@ -84,13 +87,14 @@ void thread__delete(struct thread *thread)
84 87
85struct thread *thread__get(struct thread *thread) 88struct thread *thread__get(struct thread *thread)
86{ 89{
87 ++thread->refcnt; 90 if (thread)
91 atomic_inc(&thread->refcnt);
88 return thread; 92 return thread;
89} 93}
90 94
91void thread__put(struct thread *thread) 95void thread__put(struct thread *thread)
92{ 96{
93 if (thread && --thread->refcnt == 0) { 97 if (thread && atomic_dec_and_test(&thread->refcnt)) {
94 list_del_init(&thread->node); 98 list_del_init(&thread->node);
95 thread__delete(thread); 99 thread__delete(thread);
96 } 100 }
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 9b8a54dc34a8..a0ac0317affb 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -1,6 +1,7 @@
1#ifndef __PERF_THREAD_H 1#ifndef __PERF_THREAD_H
2#define __PERF_THREAD_H 2#define __PERF_THREAD_H
3 3
4#include <linux/atomic.h>
4#include <linux/rbtree.h> 5#include <linux/rbtree.h>
5#include <linux/list.h> 6#include <linux/list.h>
6#include <unistd.h> 7#include <unistd.h>
@@ -21,12 +22,12 @@ struct thread {
21 pid_t tid; 22 pid_t tid;
22 pid_t ppid; 23 pid_t ppid;
23 int cpu; 24 int cpu;
24 int refcnt; 25 atomic_t refcnt;
25 char shortname[3]; 26 char shortname[3];
26 bool comm_set; 27 bool comm_set;
28 int comm_len;
27 bool dead; /* if set thread has exited */ 29 bool dead; /* if set thread has exited */
28 struct list_head comm_list; 30 struct list_head comm_list;
29 int comm_len;
30 u64 db_id; 31 u64 db_id;
31 32
32 void *priv; 33 void *priv;
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index f93b9734735b..f4822bd03709 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -20,6 +20,15 @@ static int filter(const struct dirent *dir)
20 return 1; 20 return 1;
21} 21}
22 22
23static struct thread_map *thread_map__realloc(struct thread_map *map, int nr)
24{
25 size_t size = sizeof(*map) + sizeof(pid_t) * nr;
26
27 return realloc(map, size);
28}
29
30#define thread_map__alloc(__nr) thread_map__realloc(NULL, __nr)
31
23struct thread_map *thread_map__new_by_pid(pid_t pid) 32struct thread_map *thread_map__new_by_pid(pid_t pid)
24{ 33{
25 struct thread_map *threads; 34 struct thread_map *threads;
@@ -33,7 +42,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
33 if (items <= 0) 42 if (items <= 0)
34 return NULL; 43 return NULL;
35 44
36 threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); 45 threads = thread_map__alloc(items);
37 if (threads != NULL) { 46 if (threads != NULL) {
38 for (i = 0; i < items; i++) 47 for (i = 0; i < items; i++)
39 threads->map[i] = atoi(namelist[i]->d_name); 48 threads->map[i] = atoi(namelist[i]->d_name);
@@ -49,7 +58,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
49 58
50struct thread_map *thread_map__new_by_tid(pid_t tid) 59struct thread_map *thread_map__new_by_tid(pid_t tid)
51{ 60{
52 struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t)); 61 struct thread_map *threads = thread_map__alloc(1);
53 62
54 if (threads != NULL) { 63 if (threads != NULL) {
55 threads->map[0] = tid; 64 threads->map[0] = tid;
@@ -65,8 +74,8 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
65 int max_threads = 32, items, i; 74 int max_threads = 32, items, i;
66 char path[256]; 75 char path[256];
67 struct dirent dirent, *next, **namelist = NULL; 76 struct dirent dirent, *next, **namelist = NULL;
68 struct thread_map *threads = malloc(sizeof(*threads) + 77 struct thread_map *threads = thread_map__alloc(max_threads);
69 max_threads * sizeof(pid_t)); 78
70 if (threads == NULL) 79 if (threads == NULL)
71 goto out; 80 goto out;
72 81
@@ -185,8 +194,7 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
185 goto out_free_threads; 194 goto out_free_threads;
186 195
187 total_tasks += items; 196 total_tasks += items;
188 nt = realloc(threads, (sizeof(*threads) + 197 nt = thread_map__realloc(threads, total_tasks);
189 sizeof(pid_t) * total_tasks));
190 if (nt == NULL) 198 if (nt == NULL)
191 goto out_free_namelist; 199 goto out_free_namelist;
192 200
@@ -216,7 +224,7 @@ out_free_threads:
216 224
217struct thread_map *thread_map__new_dummy(void) 225struct thread_map *thread_map__new_dummy(void)
218{ 226{
219 struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t)); 227 struct thread_map *threads = thread_map__alloc(1);
220 228
221 if (threads != NULL) { 229 if (threads != NULL) {
222 threads->map[0] = -1; 230 threads->map[0] = -1;
@@ -253,7 +261,7 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
253 continue; 261 continue;
254 262
255 ntasks++; 263 ntasks++;
256 nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks); 264 nt = thread_map__realloc(threads, ntasks);
257 265
258 if (nt == NULL) 266 if (nt == NULL)
259 goto out_free_threads; 267 goto out_free_threads;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 51d9e56c0f84..c307dd438286 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -3,6 +3,8 @@
3 3
4#include <stdbool.h> 4#include <stdbool.h>
5 5
6#include <linux/types.h>
7
6struct perf_session; 8struct perf_session;
7union perf_event; 9union perf_event;
8struct perf_evlist; 10struct perf_evlist;
@@ -29,6 +31,9 @@ typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event,
29typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, 31typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
30 struct ordered_events *oe); 32 struct ordered_events *oe);
31 33
34typedef s64 (*event_op3)(struct perf_tool *tool, union perf_event *event,
35 struct perf_session *session);
36
32struct perf_tool { 37struct perf_tool {
33 event_sample sample, 38 event_sample sample,
34 read; 39 read;
@@ -38,13 +43,19 @@ struct perf_tool {
38 fork, 43 fork,
39 exit, 44 exit,
40 lost, 45 lost,
46 lost_samples,
47 aux,
48 itrace_start,
41 throttle, 49 throttle,
42 unthrottle; 50 unthrottle;
43 event_attr_op attr; 51 event_attr_op attr;
44 event_op2 tracing_data; 52 event_op2 tracing_data;
45 event_oe finished_round; 53 event_oe finished_round;
46 event_op2 build_id, 54 event_op2 build_id,
47 id_index; 55 id_index,
56 auxtrace_info,
57 auxtrace_error;
58 event_op3 auxtrace;
48 bool ordered_events; 59 bool ordered_events;
49 bool ordering_requires_timestamps; 60 bool ordering_requires_timestamps;
50}; 61};
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 25d6c737be3e..d4957418657e 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -173,7 +173,7 @@ void parse_ftrace_printk(struct pevent *pevent,
173 char *line; 173 char *line;
174 char *next = NULL; 174 char *next = NULL;
175 char *addr_str; 175 char *addr_str;
176 char *fmt; 176 char *fmt = NULL;
177 177
178 line = strtok_r(file, "\n", &next); 178 line = strtok_r(file, "\n", &next);
179 while (line) { 179 while (line) {
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index 7b09a443a280..4c00507ee3fd 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -269,13 +269,14 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
269 u64 offset = dso->data.eh_frame_hdr_offset; 269 u64 offset = dso->data.eh_frame_hdr_offset;
270 270
271 if (offset == 0) { 271 if (offset == 0) {
272 fd = dso__data_fd(dso, machine); 272 fd = dso__data_get_fd(dso, machine);
273 if (fd < 0) 273 if (fd < 0)
274 return -EINVAL; 274 return -EINVAL;
275 275
276 /* Check the .eh_frame section for unwinding info */ 276 /* Check the .eh_frame section for unwinding info */
277 offset = elf_section_offset(fd, ".eh_frame_hdr"); 277 offset = elf_section_offset(fd, ".eh_frame_hdr");
278 dso->data.eh_frame_hdr_offset = offset; 278 dso->data.eh_frame_hdr_offset = offset;
279 dso__data_put_fd(dso);
279 } 280 }
280 281
281 if (offset) 282 if (offset)
@@ -294,13 +295,14 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
294 u64 ofs = dso->data.debug_frame_offset; 295 u64 ofs = dso->data.debug_frame_offset;
295 296
296 if (ofs == 0) { 297 if (ofs == 0) {
297 fd = dso__data_fd(dso, machine); 298 fd = dso__data_get_fd(dso, machine);
298 if (fd < 0) 299 if (fd < 0)
299 return -EINVAL; 300 return -EINVAL;
300 301
301 /* Check the .debug_frame section for unwinding info */ 302 /* Check the .debug_frame section for unwinding info */
302 ofs = elf_section_offset(fd, ".debug_frame"); 303 ofs = elf_section_offset(fd, ".debug_frame");
303 dso->data.debug_frame_offset = ofs; 304 dso->data.debug_frame_offset = ofs;
305 dso__data_put_fd(dso);
304 } 306 }
305 307
306 *offset = ofs; 308 *offset = ofs;
@@ -353,10 +355,13 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
353#ifndef NO_LIBUNWIND_DEBUG_FRAME 355#ifndef NO_LIBUNWIND_DEBUG_FRAME
354 /* Check the .debug_frame section for unwinding info */ 356 /* Check the .debug_frame section for unwinding info */
355 if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { 357 if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
356 int fd = dso__data_fd(map->dso, ui->machine); 358 int fd = dso__data_get_fd(map->dso, ui->machine);
357 int is_exec = elf_is_exec(fd, map->dso->name); 359 int is_exec = elf_is_exec(fd, map->dso->name);
358 unw_word_t base = is_exec ? 0 : map->start; 360 unw_word_t base = is_exec ? 0 : map->start;
359 361
362 if (fd >= 0)
363 dso__data_put_fd(map->dso);
364
360 memset(&di, 0, sizeof(di)); 365 memset(&di, 0, sizeof(di));
361 if (dwarf_find_debug_frame(0, &di, ip, base, map->dso->name, 366 if (dwarf_find_debug_frame(0, &di, ip, base, map->dso->name,
362 map->start, map->end)) 367 map->start, map->end))
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 4ee6d0d4c993..edc2d633b332 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -72,20 +72,60 @@ int mkdir_p(char *path, mode_t mode)
72 return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0; 72 return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
73} 73}
74 74
75static int slow_copyfile(const char *from, const char *to, mode_t mode) 75int rm_rf(char *path)
76{
77 DIR *dir;
78 int ret = 0;
79 struct dirent *d;
80 char namebuf[PATH_MAX];
81
82 dir = opendir(path);
83 if (dir == NULL)
84 return 0;
85
86 while ((d = readdir(dir)) != NULL && !ret) {
87 struct stat statbuf;
88
89 if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
90 continue;
91
92 scnprintf(namebuf, sizeof(namebuf), "%s/%s",
93 path, d->d_name);
94
95 ret = stat(namebuf, &statbuf);
96 if (ret < 0) {
97 pr_debug("stat failed: %s\n", namebuf);
98 break;
99 }
100
101 if (S_ISREG(statbuf.st_mode))
102 ret = unlink(namebuf);
103 else if (S_ISDIR(statbuf.st_mode))
104 ret = rm_rf(namebuf);
105 else {
106 pr_debug("unknown file: %s\n", namebuf);
107 ret = -1;
108 }
109 }
110 closedir(dir);
111
112 if (ret < 0)
113 return ret;
114
115 return rmdir(path);
116}
117
118static int slow_copyfile(const char *from, const char *to)
76{ 119{
77 int err = -1; 120 int err = -1;
78 char *line = NULL; 121 char *line = NULL;
79 size_t n; 122 size_t n;
80 FILE *from_fp = fopen(from, "r"), *to_fp; 123 FILE *from_fp = fopen(from, "r"), *to_fp;
81 mode_t old_umask;
82 124
83 if (from_fp == NULL) 125 if (from_fp == NULL)
84 goto out; 126 goto out;
85 127
86 old_umask = umask(mode ^ 0777);
87 to_fp = fopen(to, "w"); 128 to_fp = fopen(to, "w");
88 umask(old_umask);
89 if (to_fp == NULL) 129 if (to_fp == NULL)
90 goto out_fclose_from; 130 goto out_fclose_from;
91 131
@@ -102,42 +142,81 @@ out:
102 return err; 142 return err;
103} 143}
104 144
145int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
146{
147 void *ptr;
148 loff_t pgoff;
149
150 pgoff = off_in & ~(page_size - 1);
151 off_in -= pgoff;
152
153 ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff);
154 if (ptr == MAP_FAILED)
155 return -1;
156
157 while (size) {
158 ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out);
159 if (ret < 0 && errno == EINTR)
160 continue;
161 if (ret <= 0)
162 break;
163
164 size -= ret;
165 off_in += ret;
166 off_out -= ret;
167 }
168 munmap(ptr, off_in + size);
169
170 return size ? -1 : 0;
171}
172
105int copyfile_mode(const char *from, const char *to, mode_t mode) 173int copyfile_mode(const char *from, const char *to, mode_t mode)
106{ 174{
107 int fromfd, tofd; 175 int fromfd, tofd;
108 struct stat st; 176 struct stat st;
109 void *addr;
110 int err = -1; 177 int err = -1;
178 char *tmp = NULL, *ptr = NULL;
111 179
112 if (stat(from, &st)) 180 if (stat(from, &st))
113 goto out; 181 goto out;
114 182
115 if (st.st_size == 0) /* /proc? do it slowly... */ 183 /* extra 'x' at the end is to reserve space for '.' */
116 return slow_copyfile(from, to, mode); 184 if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) {
117 185 tmp = NULL;
118 fromfd = open(from, O_RDONLY);
119 if (fromfd < 0)
120 goto out; 186 goto out;
187 }
188 ptr = strrchr(tmp, '/');
189 if (!ptr)
190 goto out;
191 ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1);
192 *ptr = '.';
121 193
122 tofd = creat(to, mode); 194 tofd = mkstemp(tmp);
123 if (tofd < 0) 195 if (tofd < 0)
124 goto out_close_from; 196 goto out;
197
198 if (fchmod(tofd, mode))
199 goto out_close_to;
200
201 if (st.st_size == 0) { /* /proc? do it slowly... */
202 err = slow_copyfile(from, tmp);
203 goto out_close_to;
204 }
125 205
126 addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fromfd, 0); 206 fromfd = open(from, O_RDONLY);
127 if (addr == MAP_FAILED) 207 if (fromfd < 0)
128 goto out_close_to; 208 goto out_close_to;
129 209
130 if (write(tofd, addr, st.st_size) == st.st_size) 210 err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size);
131 err = 0;
132 211
133 munmap(addr, st.st_size); 212 close(fromfd);
134out_close_to: 213out_close_to:
135 close(tofd); 214 close(tofd);
136 if (err) 215 if (!err)
137 unlink(to); 216 err = link(tmp, to);
138out_close_from: 217 unlink(tmp);
139 close(fromfd);
140out: 218out:
219 free(tmp);
141 return err; 220 return err;
142} 221}
143 222
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 1ff23e04ad27..8bce58b47a82 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -249,14 +249,20 @@ static inline int sane_case(int x, int high)
249} 249}
250 250
251int mkdir_p(char *path, mode_t mode); 251int mkdir_p(char *path, mode_t mode);
252int rm_rf(char *path);
252int copyfile(const char *from, const char *to); 253int copyfile(const char *from, const char *to);
253int copyfile_mode(const char *from, const char *to, mode_t mode); 254int copyfile_mode(const char *from, const char *to, mode_t mode);
255int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size);
254 256
255s64 perf_atoll(const char *str); 257s64 perf_atoll(const char *str);
256char **argv_split(const char *str, int *argcp); 258char **argv_split(const char *str, int *argcp);
257void argv_free(char **argv); 259void argv_free(char **argv);
258bool strglobmatch(const char *str, const char *pat); 260bool strglobmatch(const char *str, const char *pat);
259bool strlazymatch(const char *str, const char *pat); 261bool strlazymatch(const char *str, const char *pat);
262static inline bool strisglob(const char *str)
263{
264 return strpbrk(str, "*?[") != NULL;
265}
260int strtailcmp(const char *s1, const char *s2); 266int strtailcmp(const char *s1, const char *s2);
261char *strxfrchar(char *s, char from, char to); 267char *strxfrchar(char *s, char from, char to);
262unsigned long convert_unit(unsigned long value, char *unit); 268unsigned long convert_unit(unsigned long value, char *unit);
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 5c7dd796979d..4b89118f158d 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -101,7 +101,7 @@ static char *get_file(struct vdso_file *vdso_file)
101 return vdso; 101 return vdso;
102} 102}
103 103
104void vdso__exit(struct machine *machine) 104void machine__exit_vdso(struct machine *machine)
105{ 105{
106 struct vdso_info *vdso_info = machine->vdso_info; 106 struct vdso_info *vdso_info = machine->vdso_info;
107 107
@@ -120,14 +120,14 @@ void vdso__exit(struct machine *machine)
120 zfree(&machine->vdso_info); 120 zfree(&machine->vdso_info);
121} 121}
122 122
123static struct dso *vdso__new(struct machine *machine, const char *short_name, 123static struct dso *__machine__addnew_vdso(struct machine *machine, const char *short_name,
124 const char *long_name) 124 const char *long_name)
125{ 125{
126 struct dso *dso; 126 struct dso *dso;
127 127
128 dso = dso__new(short_name); 128 dso = dso__new(short_name);
129 if (dso != NULL) { 129 if (dso != NULL) {
130 dsos__add(&machine->user_dsos, dso); 130 __dsos__add(&machine->dsos, dso);
131 dso__set_long_name(dso, long_name, false); 131 dso__set_long_name(dso, long_name, false);
132 } 132 }
133 133
@@ -230,27 +230,31 @@ static const char *vdso__get_compat_file(struct vdso_file *vdso_file)
230 return vdso_file->temp_file_name; 230 return vdso_file->temp_file_name;
231} 231}
232 232
233static struct dso *vdso__findnew_compat(struct machine *machine, 233static struct dso *__machine__findnew_compat(struct machine *machine,
234 struct vdso_file *vdso_file) 234 struct vdso_file *vdso_file)
235{ 235{
236 const char *file_name; 236 const char *file_name;
237 struct dso *dso; 237 struct dso *dso;
238 238
239 dso = dsos__find(&machine->user_dsos, vdso_file->dso_name, true); 239 pthread_rwlock_wrlock(&machine->dsos.lock);
240 dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true);
240 if (dso) 241 if (dso)
241 return dso; 242 goto out_unlock;
242 243
243 file_name = vdso__get_compat_file(vdso_file); 244 file_name = vdso__get_compat_file(vdso_file);
244 if (!file_name) 245 if (!file_name)
245 return NULL; 246 goto out_unlock;
246 247
247 return vdso__new(machine, vdso_file->dso_name, file_name); 248 dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
249out_unlock:
250 pthread_rwlock_unlock(&machine->dsos.lock);
251 return dso;
248} 252}
249 253
250static int vdso__dso_findnew_compat(struct machine *machine, 254static int __machine__findnew_vdso_compat(struct machine *machine,
251 struct thread *thread, 255 struct thread *thread,
252 struct vdso_info *vdso_info, 256 struct vdso_info *vdso_info,
253 struct dso **dso) 257 struct dso **dso)
254{ 258{
255 enum dso_type dso_type; 259 enum dso_type dso_type;
256 260
@@ -267,10 +271,10 @@ static int vdso__dso_findnew_compat(struct machine *machine,
267 271
268 switch (dso_type) { 272 switch (dso_type) {
269 case DSO__TYPE_32BIT: 273 case DSO__TYPE_32BIT:
270 *dso = vdso__findnew_compat(machine, &vdso_info->vdso32); 274 *dso = __machine__findnew_compat(machine, &vdso_info->vdso32);
271 return 1; 275 return 1;
272 case DSO__TYPE_X32BIT: 276 case DSO__TYPE_X32BIT:
273 *dso = vdso__findnew_compat(machine, &vdso_info->vdsox32); 277 *dso = __machine__findnew_compat(machine, &vdso_info->vdsox32);
274 return 1; 278 return 1;
275 case DSO__TYPE_UNKNOWN: 279 case DSO__TYPE_UNKNOWN:
276 case DSO__TYPE_64BIT: 280 case DSO__TYPE_64BIT:
@@ -281,35 +285,37 @@ static int vdso__dso_findnew_compat(struct machine *machine,
281 285
282#endif 286#endif
283 287
284struct dso *vdso__dso_findnew(struct machine *machine, 288struct dso *machine__findnew_vdso(struct machine *machine,
285 struct thread *thread __maybe_unused) 289 struct thread *thread __maybe_unused)
286{ 290{
287 struct vdso_info *vdso_info; 291 struct vdso_info *vdso_info;
288 struct dso *dso; 292 struct dso *dso = NULL;
289 293
294 pthread_rwlock_wrlock(&machine->dsos.lock);
290 if (!machine->vdso_info) 295 if (!machine->vdso_info)
291 machine->vdso_info = vdso_info__new(); 296 machine->vdso_info = vdso_info__new();
292 297
293 vdso_info = machine->vdso_info; 298 vdso_info = machine->vdso_info;
294 if (!vdso_info) 299 if (!vdso_info)
295 return NULL; 300 goto out_unlock;
296 301
297#if BITS_PER_LONG == 64 302#if BITS_PER_LONG == 64
298 if (vdso__dso_findnew_compat(machine, thread, vdso_info, &dso)) 303 if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
299 return dso; 304 goto out_unlock;
300#endif 305#endif
301 306
302 dso = dsos__find(&machine->user_dsos, DSO__NAME_VDSO, true); 307 dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
303 if (!dso) { 308 if (!dso) {
304 char *file; 309 char *file;
305 310
306 file = get_file(&vdso_info->vdso); 311 file = get_file(&vdso_info->vdso);
307 if (!file) 312 if (file)
308 return NULL; 313 dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
309
310 dso = vdso__new(machine, DSO__NAME_VDSO, file);
311 } 314 }
312 315
316out_unlock:
317 dso__get(dso);
318 pthread_rwlock_unlock(&machine->dsos.lock);
313 return dso; 319 return dso;
314} 320}
315 321
diff --git a/tools/perf/util/vdso.h b/tools/perf/util/vdso.h
index d97da1616f0c..cdc4fabfc212 100644
--- a/tools/perf/util/vdso.h
+++ b/tools/perf/util/vdso.h
@@ -23,7 +23,7 @@ bool dso__is_vdso(struct dso *dso);
23struct machine; 23struct machine;
24struct thread; 24struct thread;
25 25
26struct dso *vdso__dso_findnew(struct machine *machine, struct thread *thread); 26struct dso *machine__findnew_vdso(struct machine *machine, struct thread *thread);
27void vdso__exit(struct machine *machine); 27void machine__exit_vdso(struct machine *machine);
28 28
29#endif /* __PERF_VDSO__ */ 29#endif /* __PERF_VDSO__ */
diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c
index 22afbf6c536a..c10ba41ef3f6 100644
--- a/tools/perf/util/xyarray.c
+++ b/tools/perf/util/xyarray.c
@@ -9,11 +9,19 @@ struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size)
9 if (xy != NULL) { 9 if (xy != NULL) {
10 xy->entry_size = entry_size; 10 xy->entry_size = entry_size;
11 xy->row_size = row_size; 11 xy->row_size = row_size;
12 xy->entries = xlen * ylen;
12 } 13 }
13 14
14 return xy; 15 return xy;
15} 16}
16 17
18void xyarray__reset(struct xyarray *xy)
19{
20 size_t n = xy->entries * xy->entry_size;
21
22 memset(xy->contents, 0, n);
23}
24
17void xyarray__delete(struct xyarray *xy) 25void xyarray__delete(struct xyarray *xy)
18{ 26{
19 free(xy); 27 free(xy);
diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h
index c488a07275dd..7f30af371b7e 100644
--- a/tools/perf/util/xyarray.h
+++ b/tools/perf/util/xyarray.h
@@ -6,11 +6,13 @@
6struct xyarray { 6struct xyarray {
7 size_t row_size; 7 size_t row_size;
8 size_t entry_size; 8 size_t entry_size;
9 size_t entries;
9 char contents[]; 10 char contents[];
10}; 11};
11 12
12struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size); 13struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
13void xyarray__delete(struct xyarray *xy); 14void xyarray__delete(struct xyarray *xy);
15void xyarray__reset(struct xyarray *xy);
14 16
15static inline void *xyarray__entry(struct xyarray *xy, int x, int y) 17static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
16{ 18{
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index 15f1a17ca96e..3f81a1095206 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -66,7 +66,7 @@ make $buildloc $TORTURE_DEFCONFIG > $builddir/Make.defconfig.out 2>&1
66mv $builddir/.config $builddir/.config.sav 66mv $builddir/.config $builddir/.config.sav
67sh $T/upd.sh < $builddir/.config.sav > $builddir/.config 67sh $T/upd.sh < $builddir/.config.sav > $builddir/.config
68cp $builddir/.config $builddir/.config.new 68cp $builddir/.config $builddir/.config.new
69yes '' | make $buildloc oldconfig > $builddir/Make.modconfig.out 2>&1 69yes '' | make $buildloc oldconfig > $builddir/Make.oldconfig.out 2> $builddir/Make.oldconfig.err
70 70
71# verify new config matches specification. 71# verify new config matches specification.
72configcheck.sh $builddir/.config $c 72configcheck.sh $builddir/.config $c
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 4f5b20f367a9..d86bdd6b6cc2 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -43,6 +43,10 @@ do
43 if test -f "$i/console.log" 43 if test -f "$i/console.log"
44 then 44 then
45 configcheck.sh $i/.config $i/ConfigFragment 45 configcheck.sh $i/.config $i/ConfigFragment
46 if test -r $i/Make.oldconfig.err
47 then
48 cat $i/Make.oldconfig.err
49 fi
46 parse-build.sh $i/Make.out $configfile 50 parse-build.sh $i/Make.out $configfile
47 parse-torture.sh $i/console.log $configfile 51 parse-torture.sh $i/console.log $configfile
48 parse-console.sh $i/console.log $configfile 52 parse-console.sh $i/console.log $configfile
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index dd2812ceb0ba..fbe2dbff1e21 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -55,7 +55,7 @@ usage () {
55 echo " --bootargs kernel-boot-arguments" 55 echo " --bootargs kernel-boot-arguments"
56 echo " --bootimage relative-path-to-kernel-boot-image" 56 echo " --bootimage relative-path-to-kernel-boot-image"
57 echo " --buildonly" 57 echo " --buildonly"
58 echo " --configs \"config-file list\"" 58 echo " --configs \"config-file list w/ repeat factor (3*TINY01)\""
59 echo " --cpus N" 59 echo " --cpus N"
60 echo " --datestamp string" 60 echo " --datestamp string"
61 echo " --defconfig string" 61 echo " --defconfig string"
@@ -178,13 +178,26 @@ fi
178touch $T/cfgcpu 178touch $T/cfgcpu
179for CF in $configs 179for CF in $configs
180do 180do
181 if test -f "$CONFIGFRAG/$CF" 181 case $CF in
182 [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**)
183 config_reps=`echo $CF | sed -e 's/\*.*$//'`
184 CF1=`echo $CF | sed -e 's/^[^*]*\*//'`
185 ;;
186 *)
187 config_reps=1
188 CF1=$CF
189 ;;
190 esac
191 if test -f "$CONFIGFRAG/$CF1"
182 then 192 then
183 cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF` 193 cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1`
184 cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF" "$cpu_count"` 194 cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
185 echo $CF $cpu_count >> $T/cfgcpu 195 for ((cur_rep=0;cur_rep<$config_reps;cur_rep++))
196 do
197 echo $CF1 $cpu_count >> $T/cfgcpu
198 done
186 else 199 else
187 echo "The --configs file $CF does not exist, terminating." 200 echo "The --configs file $CF1 does not exist, terminating."
188 exit 1 201 exit 1
189 fi 202 fi
190done 203done
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
index 49701218dc62..f824b4c9d9d9 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
@@ -1,3 +1,5 @@
1CONFIG_RCU_TORTURE_TEST=y 1CONFIG_RCU_TORTURE_TEST=y
2CONFIG_PRINTK_TIME=y 2CONFIG_PRINTK_TIME=y
3CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
3CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y 4CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
5CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
index 9fbb41b9b314..1a087c3c8bb8 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
@@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y
5CONFIG_PREEMPT_NONE=y 5CONFIG_PREEMPT_NONE=y
6CONFIG_PREEMPT_VOLUNTARY=n 6CONFIG_PREEMPT_VOLUNTARY=n
7CONFIG_PREEMPT=n 7CONFIG_PREEMPT=n
8CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
index 4b6f272dba27..4837430a71c0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
@@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y
5CONFIG_PREEMPT_NONE=n 5CONFIG_PREEMPT_NONE=n
6CONFIG_PREEMPT_VOLUNTARY=n 6CONFIG_PREEMPT_VOLUNTARY=n
7CONFIG_PREEMPT=y 7CONFIG_PREEMPT=y
8#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
index 238bfe3bd0cc..84a7d51b7481 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -1 +1 @@
rcutorture.torture_type=srcu rcutorture.torture_type=srcud
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
index 97f0a0b27ef7..2cc0e60eba6e 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -5,5 +5,6 @@ CONFIG_PREEMPT_NONE=n
5CONFIG_PREEMPT_VOLUNTARY=n 5CONFIG_PREEMPT_VOLUNTARY=n
6CONFIG_PREEMPT=y 6CONFIG_PREEMPT=y
7CONFIG_DEBUG_LOCK_ALLOC=y 7CONFIG_DEBUG_LOCK_ALLOC=y
8CONFIG_PROVE_RCU=y 8CONFIG_PROVE_LOCKING=n
9CONFIG_TASKS_RCU=y 9#CHECK#CONFIG_PROVE_RCU=n
10CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
index 696d2ea74d13..ad2be91e5ee7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
@@ -2,4 +2,3 @@ CONFIG_SMP=n
2CONFIG_PREEMPT_NONE=y 2CONFIG_PREEMPT_NONE=y
3CONFIG_PREEMPT_VOLUNTARY=n 3CONFIG_PREEMPT_VOLUNTARY=n
4CONFIG_PREEMPT=n 4CONFIG_PREEMPT=n
5CONFIG_TASKS_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
index 9c60da5b5d1d..c70c51d5ded1 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -6,8 +6,8 @@ CONFIG_HIBERNATION=n
6CONFIG_PREEMPT_NONE=n 6CONFIG_PREEMPT_NONE=n
7CONFIG_PREEMPT_VOLUNTARY=n 7CONFIG_PREEMPT_VOLUNTARY=n
8CONFIG_PREEMPT=y 8CONFIG_PREEMPT=y
9CONFIG_TASKS_RCU=y
10CONFIG_HZ_PERIODIC=n 9CONFIG_HZ_PERIODIC=n
11CONFIG_NO_HZ_IDLE=n 10CONFIG_NO_HZ_IDLE=n
12CONFIG_NO_HZ_FULL=y 11CONFIG_NO_HZ_FULL=y
13CONFIG_NO_HZ_FULL_ALL=y 12CONFIG_NO_HZ_FULL_ALL=y
13#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
index 36e41df3d27a..f1892e0371c9 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
@@ -8,7 +8,7 @@ CONFIG_NO_HZ_IDLE=n
8CONFIG_NO_HZ_FULL=n 8CONFIG_NO_HZ_FULL=n
9CONFIG_RCU_TRACE=y 9CONFIG_RCU_TRACE=y
10CONFIG_PROVE_LOCKING=y 10CONFIG_PROVE_LOCKING=y
11CONFIG_PROVE_RCU=y 11#CHECK#CONFIG_PROVE_RCU=y
12CONFIG_DEBUG_LOCK_ALLOC=y 12CONFIG_DEBUG_LOCK_ALLOC=y
13CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 13CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
14CONFIG_PREEMPT_COUNT=y 14CONFIG_PREEMPT_COUNT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
index 0f0802730014..6c1a292a65fb 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
@@ -1,2 +1,3 @@
1rcupdate.rcu_self_test=1 1rcupdate.rcu_self_test=1
2rcupdate.rcu_self_test_bh=1 2rcupdate.rcu_self_test_bh=1
3rcutorture.torture_type=rcu_bh
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index f8a10a7500c6..8e9137f66831 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -16,3 +16,4 @@ CONFIG_DEBUG_LOCK_ALLOC=n
16CONFIG_RCU_CPU_STALL_INFO=n 16CONFIG_RCU_CPU_STALL_INFO=n
17CONFIG_RCU_BOOST=n 17CONFIG_RCU_BOOST=n
18CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 18CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
19CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
index 629122fb8b4a..aeea6a204d14 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -14,10 +14,10 @@ CONFIG_SUSPEND=n
14CONFIG_HIBERNATION=n 14CONFIG_HIBERNATION=n
15CONFIG_RCU_FANOUT=3 15CONFIG_RCU_FANOUT=3
16CONFIG_RCU_FANOUT_LEAF=3 16CONFIG_RCU_FANOUT_LEAF=3
17CONFIG_RCU_FANOUT_EXACT=n
18CONFIG_RCU_NOCB_CPU=n 17CONFIG_RCU_NOCB_CPU=n
19CONFIG_DEBUG_LOCK_ALLOC=y 18CONFIG_DEBUG_LOCK_ALLOC=y
20CONFIG_PROVE_LOCKING=n 19CONFIG_PROVE_LOCKING=n
21CONFIG_RCU_CPU_STALL_INFO=n 20CONFIG_RCU_CPU_STALL_INFO=n
22CONFIG_RCU_BOOST=n 21CONFIG_RCU_BOOST=n
23CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 22CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
23CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
index a25de47888a4..2ac9e68ea3d1 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
@@ -14,7 +14,6 @@ CONFIG_SUSPEND=n
14CONFIG_HIBERNATION=n 14CONFIG_HIBERNATION=n
15CONFIG_RCU_FANOUT=3 15CONFIG_RCU_FANOUT=3
16CONFIG_RCU_FANOUT_LEAF=3 16CONFIG_RCU_FANOUT_LEAF=3
17CONFIG_RCU_FANOUT_EXACT=n
18CONFIG_RCU_NOCB_CPU=n 17CONFIG_RCU_NOCB_CPU=n
19CONFIG_DEBUG_LOCK_ALLOC=y 18CONFIG_DEBUG_LOCK_ALLOC=y
20CONFIG_PROVE_LOCKING=n 19CONFIG_PROVE_LOCKING=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
index 53f24e0a0ab6..72aa7d87ea99 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
@@ -1,5 +1,5 @@
1CONFIG_SMP=y 1CONFIG_SMP=y
2CONFIG_NR_CPUS=8 2CONFIG_NR_CPUS=16
3CONFIG_PREEMPT_NONE=n 3CONFIG_PREEMPT_NONE=n
4CONFIG_PREEMPT_VOLUNTARY=n 4CONFIG_PREEMPT_VOLUNTARY=n
5CONFIG_PREEMPT=y 5CONFIG_PREEMPT=y
@@ -9,12 +9,12 @@ CONFIG_NO_HZ_IDLE=n
9CONFIG_NO_HZ_FULL=n 9CONFIG_NO_HZ_FULL=n
10CONFIG_RCU_TRACE=y 10CONFIG_RCU_TRACE=y
11CONFIG_HOTPLUG_CPU=y 11CONFIG_HOTPLUG_CPU=y
12CONFIG_RCU_FANOUT=4 12CONFIG_RCU_FANOUT=2
13CONFIG_RCU_FANOUT_LEAF=4 13CONFIG_RCU_FANOUT_LEAF=2
14CONFIG_RCU_FANOUT_EXACT=n
15CONFIG_RCU_NOCB_CPU=n 14CONFIG_RCU_NOCB_CPU=n
16CONFIG_DEBUG_LOCK_ALLOC=n 15CONFIG_DEBUG_LOCK_ALLOC=n
17CONFIG_RCU_CPU_STALL_INFO=n 16CONFIG_RCU_CPU_STALL_INFO=n
18CONFIG_RCU_BOOST=y 17CONFIG_RCU_BOOST=y
19CONFIG_RCU_KTHREAD_PRIO=2 18CONFIG_RCU_KTHREAD_PRIO=2
20CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 19CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
20CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
new file mode 100644
index 000000000000..120c0c88d100
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -0,0 +1 @@
rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index 0f84db35b36d..3f5112751cda 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -13,10 +13,10 @@ CONFIG_RCU_TRACE=y
13CONFIG_HOTPLUG_CPU=n 13CONFIG_HOTPLUG_CPU=n
14CONFIG_SUSPEND=n 14CONFIG_SUSPEND=n
15CONFIG_HIBERNATION=n 15CONFIG_HIBERNATION=n
16CONFIG_RCU_FANOUT=2 16CONFIG_RCU_FANOUT=4
17CONFIG_RCU_FANOUT_LEAF=2 17CONFIG_RCU_FANOUT_LEAF=4
18CONFIG_RCU_FANOUT_EXACT=n
19CONFIG_RCU_NOCB_CPU=n 18CONFIG_RCU_NOCB_CPU=n
20CONFIG_DEBUG_LOCK_ALLOC=n 19CONFIG_DEBUG_LOCK_ALLOC=n
21CONFIG_RCU_CPU_STALL_INFO=y 20CONFIG_RCU_CPU_STALL_INFO=n
22CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 21CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
22CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
index 212e3bfd2b2a..c04dfea6fd21 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -12,11 +12,11 @@ CONFIG_RCU_TRACE=n
12CONFIG_HOTPLUG_CPU=y 12CONFIG_HOTPLUG_CPU=y
13CONFIG_RCU_FANOUT=6 13CONFIG_RCU_FANOUT=6
14CONFIG_RCU_FANOUT_LEAF=6 14CONFIG_RCU_FANOUT_LEAF=6
15CONFIG_RCU_FANOUT_EXACT=n
16CONFIG_RCU_NOCB_CPU=y 15CONFIG_RCU_NOCB_CPU=y
17CONFIG_RCU_NOCB_CPU_NONE=y 16CONFIG_RCU_NOCB_CPU_NONE=y
18CONFIG_DEBUG_LOCK_ALLOC=y 17CONFIG_DEBUG_LOCK_ALLOC=y
19CONFIG_PROVE_LOCKING=y 18CONFIG_PROVE_LOCKING=y
20CONFIG_PROVE_RCU=y 19#CHECK#CONFIG_PROVE_RCU=y
21CONFIG_RCU_CPU_STALL_INFO=n 20CONFIG_RCU_CPU_STALL_INFO=n
22CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 21CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
22CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
index 7eee63b44218..f51d2c73a68e 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -14,10 +14,10 @@ CONFIG_SUSPEND=n
14CONFIG_HIBERNATION=n 14CONFIG_HIBERNATION=n
15CONFIG_RCU_FANOUT=6 15CONFIG_RCU_FANOUT=6
16CONFIG_RCU_FANOUT_LEAF=6 16CONFIG_RCU_FANOUT_LEAF=6
17CONFIG_RCU_FANOUT_EXACT=y
18CONFIG_RCU_NOCB_CPU=n 17CONFIG_RCU_NOCB_CPU=n
19CONFIG_DEBUG_LOCK_ALLOC=y 18CONFIG_DEBUG_LOCK_ALLOC=y
20CONFIG_PROVE_LOCKING=y 19CONFIG_PROVE_LOCKING=y
21CONFIG_PROVE_RCU=y 20#CHECK#CONFIG_PROVE_RCU=y
22CONFIG_RCU_CPU_STALL_INFO=n 21CONFIG_RCU_CPU_STALL_INFO=n
23CONFIG_DEBUG_OBJECTS_RCU_HEAD=y 22CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
23CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
index da9a03a398db..dd90f28ed700 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
@@ -1,3 +1,4 @@
1rcupdate.rcu_self_test=1 1rcupdate.rcu_self_test=1
2rcupdate.rcu_self_test_bh=1 2rcupdate.rcu_self_test_bh=1
3rcupdate.rcu_self_test_sched=1 3rcupdate.rcu_self_test_sched=1
4rcutree.rcu_fanout_exact=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index 92a97fa97dec..f422af4ff5a3 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -15,8 +15,8 @@ CONFIG_RCU_TRACE=y
15CONFIG_HOTPLUG_CPU=y 15CONFIG_HOTPLUG_CPU=y
16CONFIG_RCU_FANOUT=2 16CONFIG_RCU_FANOUT=2
17CONFIG_RCU_FANOUT_LEAF=2 17CONFIG_RCU_FANOUT_LEAF=2
18CONFIG_RCU_FANOUT_EXACT=n
19CONFIG_RCU_NOCB_CPU=n 18CONFIG_RCU_NOCB_CPU=n
20CONFIG_DEBUG_LOCK_ALLOC=n 19CONFIG_DEBUG_LOCK_ALLOC=n
21CONFIG_RCU_CPU_STALL_INFO=y 20CONFIG_RCU_CPU_STALL_INFO=n
22CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 21CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
22CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
index 5812027d6f9f..a24d2ca30646 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -1,5 +1,5 @@
1CONFIG_SMP=y 1CONFIG_SMP=y
2CONFIG_NR_CPUS=16 2CONFIG_NR_CPUS=8
3CONFIG_PREEMPT_NONE=n 3CONFIG_PREEMPT_NONE=n
4CONFIG_PREEMPT_VOLUNTARY=n 4CONFIG_PREEMPT_VOLUNTARY=n
5CONFIG_PREEMPT=y 5CONFIG_PREEMPT=y
@@ -13,13 +13,13 @@ CONFIG_HOTPLUG_CPU=n
13CONFIG_SUSPEND=n 13CONFIG_SUSPEND=n
14CONFIG_HIBERNATION=n 14CONFIG_HIBERNATION=n
15CONFIG_RCU_FANOUT=3 15CONFIG_RCU_FANOUT=3
16CONFIG_RCU_FANOUT_EXACT=y
17CONFIG_RCU_FANOUT_LEAF=2 16CONFIG_RCU_FANOUT_LEAF=2
18CONFIG_RCU_NOCB_CPU=y 17CONFIG_RCU_NOCB_CPU=y
19CONFIG_RCU_NOCB_CPU_ALL=y 18CONFIG_RCU_NOCB_CPU_ALL=y
20CONFIG_DEBUG_LOCK_ALLOC=n 19CONFIG_DEBUG_LOCK_ALLOC=n
21CONFIG_PROVE_LOCKING=y 20CONFIG_PROVE_LOCKING=y
22CONFIG_PROVE_RCU=y 21#CHECK#CONFIG_PROVE_RCU=y
23CONFIG_RCU_CPU_STALL_INFO=n 22CONFIG_RCU_CPU_STALL_INFO=n
24CONFIG_RCU_BOOST=n 23CONFIG_RCU_BOOST=n
25CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 24CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
25CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
index 3eaeccacb083..b2b8cea69dc9 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
@@ -13,7 +13,6 @@ CONFIG_HOTPLUG_CPU=n
13CONFIG_SUSPEND=n 13CONFIG_SUSPEND=n
14CONFIG_HIBERNATION=n 14CONFIG_HIBERNATION=n
15CONFIG_RCU_FANOUT=3 15CONFIG_RCU_FANOUT=3
16CONFIG_RCU_FANOUT_EXACT=y
17CONFIG_RCU_FANOUT_LEAF=2 16CONFIG_RCU_FANOUT_LEAF=2
18CONFIG_RCU_NOCB_CPU=y 17CONFIG_RCU_NOCB_CPU=y
19CONFIG_RCU_NOCB_CPU_ALL=y 18CONFIG_RCU_NOCB_CPU_ALL=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot
new file mode 100644
index 000000000000..883149b5f2d1
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot
@@ -0,0 +1 @@
rcutree.rcu_fanout_exact=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
index 2561daf605ad..fb066dc82769 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
@@ -1,3 +1,4 @@
1rcutorture.torture_type=sched 1rcutorture.torture_type=sched
2rcupdate.rcu_self_test=1 2rcupdate.rcu_self_test=1
3rcupdate.rcu_self_test_sched=1 3rcupdate.rcu_self_test_sched=1
4rcutree.rcu_fanout_exact=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
index 6076b36f6c0b..aa4ed08d999d 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -16,3 +16,4 @@ CONFIG_DEBUG_LOCK_ALLOC=n
16CONFIG_RCU_CPU_STALL_INFO=n 16CONFIG_RCU_CPU_STALL_INFO=n
17CONFIG_RCU_BOOST=n 17CONFIG_RCU_BOOST=n
18CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 18CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
19#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index ec03c883db00..b24c0004fc49 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -12,13 +12,12 @@ CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.)
12CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE. 12CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE.
13CONFIG_NO_HZ_FULL_SYSIDLE -- Do one. 13CONFIG_NO_HZ_FULL_SYSIDLE -- Do one.
14CONFIG_PREEMPT -- Do half. (First three and #8.) 14CONFIG_PREEMPT -- Do half. (First three and #8.)
15CONFIG_PROVE_LOCKING -- Do all but two, covering CONFIG_PROVE_RCU and not. 15CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
16CONFIG_PROVE_RCU -- Do all but one under CONFIG_PROVE_LOCKING. 16CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
17CONFIG_RCU_BOOST -- one of PREEMPT_RCU. 17CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
18CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing. 18CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing.
19CONFIG_RCU_CPU_STALL_INFO -- Do one. 19CONFIG_RCU_CPU_STALL_INFO -- Now default, avoid at least twice.
20CONFIG_RCU_FANOUT -- Cover hierarchy as currently, but overlap with others. 20CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
21CONFIG_RCU_FANOUT_EXACT -- Do one.
22CONFIG_RCU_FANOUT_LEAF -- Do one non-default. 21CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
23CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL. 22CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL.
24CONFIG_RCU_NOCB_CPU -- Do three, see below. 23CONFIG_RCU_NOCB_CPU -- Do three, see below.
@@ -27,28 +26,19 @@ CONFIG_RCU_NOCB_CPU_NONE -- Do one.
27CONFIG_RCU_NOCB_CPU_ZERO -- Do one. 26CONFIG_RCU_NOCB_CPU_ZERO -- Do one.
28CONFIG_RCU_TRACE -- Do half. 27CONFIG_RCU_TRACE -- Do half.
29CONFIG_SMP -- Need one !SMP for PREEMPT_RCU. 28CONFIG_SMP -- Need one !SMP for PREEMPT_RCU.
29!RCU_EXPERT -- Do a few, but these have to be vanilla configurations.
30RCU-bh: Do one with PREEMPT and one with !PREEMPT. 30RCU-bh: Do one with PREEMPT and one with !PREEMPT.
31RCU-sched: Do one with PREEMPT but not BOOST. 31RCU-sched: Do one with PREEMPT but not BOOST.
32 32
33 33
34Hierarchy: 34Boot parameters:
35 35
36TREE01. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=8, CONFIG_RCU_FANOUT_EXACT=n. 36nohz_full - do at least one.
37TREE02. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=n, 37maxcpu -- do at least one.
38 CONFIG_RCU_FANOUT_LEAF=3. 38rcupdate.rcu_self_test_bh -- Do at least one each, offloaded and not.
39TREE03. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=4, CONFIG_RCU_FANOUT_EXACT=n, 39rcupdate.rcu_self_test_sched -- Do at least one each, offloaded and not.
40 CONFIG_RCU_FANOUT_LEAF=4. 40rcupdate.rcu_self_test -- Do at least one each, offloaded and not.
41TREE04. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n, 41rcutree.rcu_fanout_exact -- Do at least one.
42 CONFIG_RCU_FANOUT_LEAF=2.
43TREE05. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=n
44 CONFIG_RCU_FANOUT_LEAF=6.
45TREE06. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=y
46 CONFIG_RCU_FANOUT_LEAF=6.
47TREE07. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n,
48 CONFIG_RCU_FANOUT_LEAF=2.
49TREE08. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=y,
50 CONFIG_RCU_FANOUT_LEAF=2.
51TREE09. CONFIG_NR_CPUS=1.
52 42
53 43
54Kconfig Parameters Ignored: 44Kconfig Parameters Ignored:
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 5bdb781163d1..9b0d8baf2934 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,8 +5,10 @@ include ../lib.mk
5.PHONY: all all_32 all_64 warn_32bit_failure clean 5.PHONY: all all_32 all_64 warn_32bit_failure clean
6 6
7TARGETS_C_BOTHBITS := sigreturn single_step_syscall 7TARGETS_C_BOTHBITS := sigreturn single_step_syscall
8TARGETS_C_32BIT_ONLY := entry_from_vm86
8 9
9BINARIES_32 := $(TARGETS_C_BOTHBITS:%=%_32) 10TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
11BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
10BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64) 12BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64)
11 13
12CFLAGS := -O2 -g -std=gnu99 -pthread -Wall 14CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
@@ -32,7 +34,7 @@ all_64: $(BINARIES_64)
32clean: 34clean:
33 $(RM) $(BINARIES_32) $(BINARIES_64) 35 $(RM) $(BINARIES_32) $(BINARIES_64)
34 36
35$(TARGETS_C_BOTHBITS:%=%_32): %_32: %.c 37$(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c
36 $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl 38 $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
37 39
38$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c 40$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
new file mode 100644
index 000000000000..5c38a187677b
--- /dev/null
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -0,0 +1,114 @@
1/*
2 * entry_from_vm86.c - tests kernel entries from vm86 mode
3 * Copyright (c) 2014-2015 Andrew Lutomirski
4 *
5 * This exercises a few paths that need to special-case vm86 mode.
6 *
7 * GPL v2.
8 */
9
10#define _GNU_SOURCE
11
12#include <assert.h>
13#include <stdlib.h>
14#include <sys/syscall.h>
15#include <sys/signal.h>
16#include <sys/ucontext.h>
17#include <unistd.h>
18#include <stdio.h>
19#include <string.h>
20#include <inttypes.h>
21#include <sys/mman.h>
22#include <err.h>
23#include <stddef.h>
24#include <stdbool.h>
25#include <errno.h>
26#include <sys/vm86.h>
27
28static unsigned long load_addr = 0x10000;
29static int nerrs = 0;
30
31asm (
32 ".pushsection .rodata\n\t"
33 ".type vmcode_bound, @object\n\t"
34 "vmcode:\n\t"
35 "vmcode_bound:\n\t"
36 ".code16\n\t"
37 "bound %ax, (2048)\n\t"
38 "int3\n\t"
39 "vmcode_sysenter:\n\t"
40 "sysenter\n\t"
41 ".size vmcode, . - vmcode\n\t"
42 "end_vmcode:\n\t"
43 ".code32\n\t"
44 ".popsection"
45 );
46
47extern unsigned char vmcode[], end_vmcode[];
48extern unsigned char vmcode_bound[], vmcode_sysenter[];
49
50static void do_test(struct vm86plus_struct *v86, unsigned long eip,
51 const char *text)
52{
53 long ret;
54
55 printf("[RUN]\t%s from vm86 mode\n", text);
56 v86->regs.eip = eip;
57 ret = vm86(VM86_ENTER, v86);
58
59 if (ret == -1 && errno == ENOSYS) {
60 printf("[SKIP]\tvm86 not supported\n");
61 return;
62 }
63
64 if (VM86_TYPE(ret) == VM86_INTx) {
65 char trapname[32];
66 int trapno = VM86_ARG(ret);
67 if (trapno == 13)
68 strcpy(trapname, "GP");
69 else if (trapno == 5)
70 strcpy(trapname, "BR");
71 else if (trapno == 14)
72 strcpy(trapname, "PF");
73 else
74 sprintf(trapname, "%d", trapno);
75
76 printf("[OK]\tExited vm86 mode due to #%s\n", trapname);
77 } else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
78 printf("[OK]\tExited vm86 mode due to unhandled GP fault\n");
79 } else {
80 printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n",
81 VM86_TYPE(ret), VM86_ARG(ret));
82 }
83}
84
85int main(void)
86{
87 struct vm86plus_struct v86;
88 unsigned char *addr = mmap((void *)load_addr, 4096,
89 PROT_READ | PROT_WRITE | PROT_EXEC,
90 MAP_ANONYMOUS | MAP_PRIVATE, -1,0);
91 if (addr != (unsigned char *)load_addr)
92 err(1, "mmap");
93
94 memcpy(addr, vmcode, end_vmcode - vmcode);
95 addr[2048] = 2;
96 addr[2050] = 3;
97
98 memset(&v86, 0, sizeof(v86));
99
100 v86.regs.cs = load_addr / 16;
101 v86.regs.ss = load_addr / 16;
102 v86.regs.ds = load_addr / 16;
103 v86.regs.es = load_addr / 16;
104
105 assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */
106
107 /* #BR -- should deliver SIG??? */
108 do_test(&v86, vmcode_bound - vmcode, "#BR");
109
110 /* SYSENTER -- should cause #GP or #UD depending on CPU */
111 do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER");
112
113 return (nerrs == 0 ? 0 : 1);
114}