aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-events62
-rw-r--r--Documentation/ABI/testing/sysfs-platform-ts550047
-rw-r--r--Documentation/PCI/MSI-HOWTO.txt37
-rw-r--r--Documentation/atomic_ops.txt2
-rw-r--r--Documentation/memory-barriers.txt1
-rw-r--r--Documentation/trace/ftrace.txt83
-rw-r--r--Documentation/x86/boot.txt1
-rw-r--r--MAINTAINERS23
-rw-r--r--Makefile5
-rw-r--r--arch/Kconfig12
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/kernel/osf_sys.c6
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/include/asm/smp_scu.h8
-rw-r--r--arch/arm/kernel/smp_scu.c2
-rw-r--r--arch/arm/mach-highbank/highbank.c3
-rw-r--r--arch/arm/mach-highbank/sysregs.h4
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/blackfin/Kconfig2
-rw-r--r--arch/frv/Kconfig1
-rw-r--r--arch/hexagon/Kconfig2
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/ia64/include/asm/cputime.h92
-rw-r--r--arch/ia64/include/asm/thread_info.h4
-rw-r--r--arch/ia64/include/asm/xen/minstate.h2
-rw-r--r--arch/ia64/kernel/asm-offsets.c2
-rw-r--r--arch/ia64/kernel/entry.S16
-rw-r--r--arch/ia64/kernel/fsys.S4
-rw-r--r--arch/ia64/kernel/head.S4
-rw-r--r--arch/ia64/kernel/ivt.S8
-rw-r--r--arch/ia64/kernel/minstate.h2
-rw-r--r--arch/ia64/kernel/time.c5
-rw-r--r--arch/m68k/include/asm/processor.h1
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/parisc/Kconfig2
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/configs/chroma_defconfig2
-rw-r--r--arch/powerpc/configs/corenet64_smp_defconfig2
-rw-r--r--arch/powerpc/configs/pasemi_defconfig2
-rw-r--r--arch/powerpc/include/asm/cputime.h6
-rw-r--r--arch/powerpc/include/asm/lppaca.h2
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h26
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h4
-rw-r--r--arch/powerpc/kernel/entry_64.S4
-rw-r--r--arch/powerpc/kernel/time.c7
-rw-r--r--arch/powerpc/perf/core-book3s.c12
-rw-r--r--arch/powerpc/perf/power7-pmu.c80
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c1
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c6
-rw-r--r--arch/powerpc/platforms/pseries/setup.c6
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/kernel/time.c3
-rw-r--r--arch/s390/kernel/vtime.c6
-rw-r--r--arch/sh/Kconfig4
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/include/asm/pgtable_64.h14
-rw-r--r--arch/sparc/kernel/sbus.c6
-rw-r--r--arch/sparc/mm/gup.c59
-rw-r--r--arch/tile/Kconfig2
-rw-r--r--arch/tile/include/asm/io.h6
-rw-r--r--arch/tile/include/asm/irqflags.h32
-rw-r--r--arch/tile/include/uapi/arch/interrupts_32.h394
-rw-r--r--arch/tile/include/uapi/arch/interrupts_64.h346
-rw-r--r--arch/tile/kernel/intvec_64.S4
-rw-r--r--arch/tile/kernel/process.c2
-rw-r--r--arch/tile/kernel/reboot.c2
-rw-r--r--arch/tile/kernel/setup.c5
-rw-r--r--arch/tile/kernel/stack.c3
-rw-r--r--arch/tile/lib/cacheflush.c2
-rw-r--r--arch/tile/lib/cpumask.c2
-rw-r--r--arch/tile/lib/exports.c2
-rw-r--r--arch/tile/mm/homecache.c1
-rw-r--r--arch/x86/Kconfig28
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/misc.h1
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/include/asm/amd_nb.h17
-rw-r--r--arch/x86/include/asm/bootparam_utils.h38
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/ftrace.h1
-rw-r--r--arch/x86/include/asm/hpet.h5
-rw-r--r--arch/x86/include/asm/hw_irq.h13
-rw-r--r--arch/x86/include/asm/hypervisor.h13
-rw-r--r--arch/x86/include/asm/io_apic.h28
-rw-r--r--arch/x86/include/asm/irq_remapping.h40
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/kvm_para.h8
-rw-r--r--arch/x86/include/asm/linkage.h18
-rw-r--r--arch/x86/include/asm/mce.h84
-rw-r--r--arch/x86/include/asm/mshyperv.h4
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/include/asm/perf_event.h13
-rw-r--r--arch/x86/include/asm/pgtable.h17
-rw-r--r--arch/x86/include/asm/pgtable_32.h7
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/x86_init.h27
-rw-r--r--arch/x86/include/asm/xor.h491
-rw-r--r--arch/x86/include/asm/xor_32.h309
-rw-r--r--arch/x86/include/asm/xor_64.h305
-rw-r--r--arch/x86/include/uapi/asm/mce.h87
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h2
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/apb_timer.c10
-rw-r--r--arch/x86/kernel/apic/apic.c28
-rw-r--r--arch/x86/kernel/apic/io_apic.c457
-rw-r--r--arch/x86/kernel/apic/ipi.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c21
-rw-r--r--arch/x86/kernel/apm_32.c11
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c7
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c54
-rw-r--r--arch/x86/kernel/cpu/perf_event.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event.h25
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c322
-rw-r--r--arch/x86/kernel/cpu/vmware.c13
-rw-r--r--arch/x86/kernel/entry_32.S9
-rw-r--r--arch/x86/kernel/entry_64.S7
-rw-r--r--arch/x86/kernel/head32.c3
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S93
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/kprobes/Makefile7
-rw-r--r--arch/x86/kernel/kprobes/common.h (renamed from arch/x86/kernel/kprobes-common.h)11
-rw-r--r--arch/x86/kernel/kprobes/core.c (renamed from arch/x86/kernel/kprobes.c)76
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c93
-rw-r--r--arch/x86/kernel/kprobes/opt.c (renamed from arch/x86/kernel/kprobes-opt.c)2
-rw-r--r--arch/x86/kernel/kvm.c1
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/rtc.c1
-rw-r--r--arch/x86/kernel/sys_x86_64.c2
-rw-r--r--arch/x86/kernel/tsc.c3
-rw-r--r--arch/x86/kernel/uprobes.c4
-rw-r--r--arch/x86/kernel/x86_init.c24
-rw-r--r--arch/x86/mm/fault.c8
-rw-r--r--arch/x86/mm/init_64.c7
-rw-r--r--arch/x86/mm/memtest.c10
-rw-r--r--arch/x86/mm/srat.c29
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/platform/Makefile2
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c7
-rw-r--r--arch/x86/platform/efi/efi.c2
-rw-r--r--arch/x86/platform/goldfish/Makefile1
-rw-r--r--arch/x86/platform/goldfish/goldfish.c51
-rw-r--r--arch/x86/platform/sfi/sfi.c2
-rw-r--r--arch/x86/platform/ts5500/Makefile1
-rw-r--r--arch/x86/platform/ts5500/ts5500.c339
-rw-r--r--arch/x86/platform/uv/tlb_uv.c4
-rw-r--r--arch/x86/um/fault.c2
-rw-r--r--arch/x86/vdso/vclock_gettime.c2
-rw-r--r--arch/x86/xen/enlighten.c78
-rw-r--r--arch/x86/xen/suspend.c2
-rw-r--r--arch/x86/xen/xen-asm_32.S14
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--block/blk-exec.c1
-rw-r--r--drivers/acpi/apei/cper.c19
-rw-r--r--drivers/ata/ahci.c93
-rw-r--r--drivers/ata/ahci.h6
-rw-r--r--drivers/ata/libahci.c118
-rw-r--r--drivers/block/sunvdc.c2
-rw-r--r--drivers/edac/amd64_edac.c109
-rw-r--r--drivers/edac/amd64_edac.h12
-rw-r--r--drivers/gpu/drm/nouveau/core/core/falcon.c7
-rw-r--r--drivers/gpu/drm/nouveau/core/core/subdev.c2
-rw-r--r--drivers/gpu/drm/nouveau/core/include/core/object.h7
-rw-r--r--drivers/gpu/drm/nouveau/core/subdev/fb/base.c4
-rw-r--r--drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drm.c3
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c86
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c38
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c1
-rw-r--r--drivers/hv/Kconfig2
-rw-r--r--drivers/input/input.c16
-rw-r--r--drivers/input/joystick/analog.c2
-rw-r--r--drivers/input/keyboard/lm8323.c2
-rw-r--r--drivers/input/tablet/wacom_sys.c6
-rw-r--r--drivers/iommu/amd_iommu.c8
-rw-r--r--drivers/iommu/dmar.c2
-rw-r--r--drivers/iommu/intel-iommu.c2
-rw-r--r--drivers/iommu/intel_irq_remapping.c48
-rw-r--r--drivers/iommu/irq_remapping.c231
-rw-r--r--drivers/iommu/irq_remapping.h1
-rw-r--r--drivers/isdn/mISDN/stack.c7
-rw-r--r--drivers/media/dvb-core/dvb_frontend.c6
-rw-r--r--drivers/net/ethernet/atheros/atl1c/atl1c_main.c71
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c12
-rw-r--r--drivers/net/ethernet/cadence/macb.c5
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c1
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c7
-rw-r--r--drivers/net/ethernet/realtek/r8169.c86
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c10
-rw-r--r--drivers/net/usb/qmi_wwan.c1
-rw-r--r--drivers/net/wireless/mwl8k.c36
-rw-r--r--drivers/pci/msi.c26
-rw-r--r--drivers/pci/pcie/aer/aerdrv_errprint.c63
-rw-r--r--drivers/pci/remove.c2
-rw-r--r--drivers/rtc/Kconfig12
-rw-r--r--drivers/rtc/Makefile1
-rw-r--r--drivers/rtc/class.c7
-rw-r--r--drivers/rtc/rtc-pl031.c4
-rw-r--r--drivers/rtc/systohc.c44
-rw-r--r--drivers/spi/spi.c2
-rw-r--r--drivers/staging/csr/bh.c2
-rw-r--r--drivers/staging/csr/unifi_sme.c2
-rw-r--r--drivers/staging/iio/trigger/Kconfig1
-rw-r--r--drivers/staging/omapdrm/Kconfig2
-rw-r--r--drivers/tty/sysrq.c1
-rw-r--r--drivers/video/omap2/dss/dss_features.c1
-rw-r--r--drivers/xen/events.c7
-rw-r--r--drivers/xen/pcpu.c3
-rw-r--r--fs/binfmt_elf.c8
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/pstore/ram.c10
-rw-r--r--fs/select.c1
-rw-r--r--include/asm-generic/cputime.h66
-rw-r--r--include/asm-generic/cputime_jiffies.h72
-rw-r--r--include/asm-generic/cputime_nsecs.h104
-rw-r--r--include/linux/aer.h4
-rw-r--r--include/linux/clockchips.h9
-rw-r--r--include/linux/context_tracking.h28
-rw-r--r--include/linux/ftrace.h6
-rw-r--r--include/linux/ftrace_event.h6
-rw-r--r--include/linux/hardirq.h8
-rw-r--r--include/linux/init_task.h12
-rw-r--r--include/linux/irq.h8
-rw-r--r--include/linux/irq_work.h22
-rw-r--r--include/linux/kernel_stat.h2
-rw-r--r--include/linux/kprobes.h12
-rw-r--r--include/linux/kvm_host.h55
-rw-r--r--include/linux/pci.h7
-rw-r--r--include/linux/perf_event.h20
-rw-r--r--include/linux/printk.h3
-rw-r--r--include/linux/profile.h13
-rw-r--r--include/linux/rcupdate.h15
-rw-r--r--include/linux/ring_buffer.h1
-rw-r--r--include/linux/rtc.h1
-rw-r--r--include/linux/sched.h185
-rw-r--r--include/linux/sched/rt.h58
-rw-r--r--include/linux/sched/sysctl.h110
-rw-r--r--include/linux/smpboot.h5
-rw-r--r--include/linux/srcu.h26
-rw-r--r--include/linux/tick.h17
-rw-r--r--include/linux/time.h13
-rw-r--r--include/linux/tsacct_kern.h3
-rw-r--r--include/linux/uprobes.h23
-rw-r--r--include/linux/vtime.h59
-rw-r--r--include/trace/events/ras.h77
-rw-r--r--include/trace/events/rcu.h31
-rw-r--r--include/uapi/linux/auto_fs.h25
-rw-r--r--include/uapi/linux/perf_event.h3
-rw-r--r--init/Kconfig42
-rw-r--r--init/init_task.c2
-rw-r--r--kernel/acct.c6
-rw-r--r--kernel/context_tracking.c114
-rw-r--r--kernel/cpu.c6
-rw-r--r--kernel/delayacct.c7
-rw-r--r--kernel/events/core.c5
-rw-r--r--kernel/events/hw_breakpoint.c2
-rw-r--r--kernel/events/uprobes.c466
-rw-r--r--kernel/exit.c10
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/futex.c1
-rw-r--r--kernel/hrtimer.c38
-rw-r--r--kernel/irq/chip.c30
-rw-r--r--kernel/irq/manage.c3
-rw-r--r--kernel/irq/spurious.c7
-rw-r--r--kernel/irq_work.c150
-rw-r--r--kernel/kprobes.c8
-rw-r--r--kernel/mutex.c1
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/posix-cpu-timers.c51
-rw-r--r--kernel/posix-timers.c2
-rw-r--r--kernel/printk.c36
-rw-r--r--kernel/profile.c24
-rw-r--r--kernel/ptrace.c6
-rw-r--r--kernel/rcu.h7
-rw-r--r--kernel/rcupdate.c60
-rw-r--r--kernel/rcutiny.c8
-rw-r--r--kernel/rcutiny_plugin.h56
-rw-r--r--kernel/rcutorture.c66
-rw-r--r--kernel/rcutree.c260
-rw-r--r--kernel/rcutree.h11
-rw-r--r--kernel/rtmutex-debug.c1
-rw-r--r--kernel/rtmutex-tester.c1
-rw-r--r--kernel/rtmutex.c1
-rw-r--r--kernel/sched/core.c22
-rw-r--r--kernel/sched/cpupri.c2
-rw-r--r--kernel/sched/cputime.c314
-rw-r--r--kernel/sched/fair.c27
-rw-r--r--kernel/sched/rt.c26
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/signal.c12
-rw-r--r--kernel/smpboot.c5
-rw-r--r--kernel/softirq.c6
-rw-r--r--kernel/srcu.c37
-rw-r--r--kernel/stop_machine.c156
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/time.c8
-rw-r--r--kernel/time/Kconfig9
-rw-r--r--kernel/time/ntp.c22
-rw-r--r--kernel/time/tick-broadcast.c38
-rw-r--r--kernel/time/tick-sched.c12
-rw-r--r--kernel/time/timekeeping.c45
-rw-r--r--kernel/timeconst.pl6
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/Kconfig18
-rw-r--r--kernel/trace/blktrace.c2
-rw-r--r--kernel/trace/ftrace.c88
-rw-r--r--kernel/trace/ring_buffer.c108
-rw-r--r--kernel/trace/trace.c253
-rw-r--r--kernel/trace/trace.h134
-rw-r--r--kernel/trace/trace_clock.c5
-rw-r--r--kernel/trace/trace_events.c1
-rw-r--r--kernel/trace/trace_functions.c61
-rw-r--r--kernel/trace/trace_functions_graph.c68
-rw-r--r--kernel/trace/trace_probe.h1
-rw-r--r--kernel/trace/trace_sched_wakeup.c2
-rw-r--r--kernel/trace/trace_selftest.c21
-rw-r--r--kernel/trace/trace_syscalls.c18
-rw-r--r--kernel/trace/trace_uprobe.c217
-rw-r--r--kernel/tsacct.c44
-rw-r--r--kernel/watchdog.c1
-rw-r--r--lib/Kconfig.debug117
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/mlock.c6
-rw-r--r--mm/mmap.c1
-rw-r--r--mm/mremap.c1
-rw-r--r--mm/nommu.c1
-rw-r--r--mm/page-writeback.c1
-rw-r--r--mm/page_alloc.c20
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/bridge/br_stp_bpdu.c2
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/ipv4/arp.c21
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c18
-rw-r--r--net/mac80211/cfg.c3
-rw-r--r--net/mac80211/mlme.c11
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c35
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c2
-rw-r--r--net/sched/sch_htb.c4
-rw-r--r--net/sctp/Kconfig4
-rw-r--r--net/sctp/ipv6.c5
-rw-r--r--samples/Kconfig6
-rw-r--r--samples/Makefile2
-rw-r--r--samples/tracepoints/Makefile6
-rw-r--r--samples/tracepoints/tp-samples-trace.h11
-rw-r--r--samples/tracepoints/tracepoint-probe-sample.c57
-rw-r--r--samples/tracepoints/tracepoint-probe-sample2.c44
-rw-r--r--samples/tracepoints/tracepoint-sample.c57
-rw-r--r--tools/Makefile2
-rw-r--r--tools/lib/traceevent/event-parse.c49
-rw-r--r--tools/lib/traceevent/event-parse.h3
-rw-r--r--tools/lib/traceevent/event-utils.h3
-rw-r--r--tools/lib/traceevent/parse-filter.c3
-rw-r--r--tools/lib/traceevent/parse-utils.c19
-rw-r--r--tools/lib/traceevent/trace-seq.c3
-rw-r--r--tools/perf/Documentation/Makefile4
-rw-r--r--tools/perf/Documentation/perf-annotate.txt7
-rw-r--r--tools/perf/Documentation/perf-buildid-cache.txt7
-rw-r--r--tools/perf/Documentation/perf-diff.txt4
-rw-r--r--tools/perf/Documentation/perf-evlist.txt4
-rw-r--r--tools/perf/Documentation/perf-report.txt41
-rw-r--r--tools/perf/Documentation/perf-script-python.txt2
-rw-r--r--tools/perf/Documentation/perf-stat.txt11
-rw-r--r--tools/perf/Documentation/perf-test.txt4
-rw-r--r--tools/perf/Documentation/perf-top.txt2
-rw-r--r--tools/perf/Makefile104
-rw-r--r--tools/perf/arch/common.c1
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/numa.c1731
-rw-r--r--tools/perf/builtin-annotate.c30
-rw-r--r--tools/perf/builtin-bench.c19
-rw-r--r--tools/perf/builtin-buildid-cache.c96
-rw-r--r--tools/perf/builtin-buildid-list.c21
-rw-r--r--tools/perf/builtin-diff.c205
-rw-r--r--tools/perf/builtin-evlist.c88
-rw-r--r--tools/perf/builtin-kmem.c16
-rw-r--r--tools/perf/builtin-kvm.c3
-rw-r--r--tools/perf/builtin-record.c168
-rw-r--r--tools/perf/builtin-report.c93
-rw-r--r--tools/perf/builtin-sched.c6
-rw-r--r--tools/perf/builtin-script.c17
-rw-r--r--tools/perf/builtin-stat.c328
-rw-r--r--tools/perf/builtin-top.c372
-rw-r--r--tools/perf/builtin-trace.c2
-rw-r--r--tools/perf/config/feature-tests.mak11
-rw-r--r--tools/perf/config/utilities.mak6
-rw-r--r--tools/perf/perf.c32
-rw-r--r--tools/perf/perf.h32
-rw-r--r--tools/perf/scripts/perl/bin/workqueue-stats-record2
-rw-r--r--tools/perf/scripts/perl/bin/workqueue-stats-report3
-rw-r--r--tools/perf/scripts/perl/rwtop.pl6
-rw-r--r--tools/perf/scripts/perl/workqueue-stats.pl129
-rw-r--r--tools/perf/tests/attr.c9
-rw-r--r--tools/perf/tests/attr.py27
-rw-r--r--tools/perf/tests/attr/base-record2
-rw-r--r--tools/perf/tests/attr/test-record-group2
-rw-r--r--tools/perf/tests/attr/test-record-group14
-rw-r--r--tools/perf/tests/builtin-test.c40
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c4
-rw-r--r--tools/perf/tests/hists_link.c500
-rw-r--r--tools/perf/tests/mmap-basic.c40
-rw-r--r--tools/perf/tests/open-syscall-all-cpus.c19
-rw-r--r--tools/perf/tests/open-syscall.c17
-rw-r--r--tools/perf/tests/parse-events.c324
-rw-r--r--tools/perf/tests/perf-record.c20
-rw-r--r--tools/perf/tests/pmu.c11
-rw-r--r--tools/perf/tests/python-use.c23
-rw-r--r--tools/perf/tests/tests.h11
-rw-r--r--tools/perf/tests/util.c30
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c7
-rw-r--r--tools/perf/ui/browser.c6
-rw-r--r--tools/perf/ui/browsers/annotate.c33
-rw-r--r--tools/perf/ui/browsers/hists.c341
-rw-r--r--tools/perf/ui/gtk/annotate.c229
-rw-r--r--tools/perf/ui/gtk/browser.c235
-rw-r--r--tools/perf/ui/gtk/gtk.h10
-rw-r--r--tools/perf/ui/gtk/helpline.c23
-rw-r--r--tools/perf/ui/gtk/hists.c312
-rw-r--r--tools/perf/ui/helpline.c12
-rw-r--r--tools/perf/ui/helpline.h22
-rw-r--r--tools/perf/ui/hist.c481
-rw-r--r--tools/perf/ui/keysyms.h1
-rw-r--r--tools/perf/ui/setup.c3
-rw-r--r--tools/perf/ui/stdio/hist.c25
-rw-r--r--tools/perf/ui/tui/helpline.c29
-rw-r--r--tools/perf/ui/util.c1
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN4
-rw-r--r--tools/perf/util/annotate.c2
-rw-r--r--tools/perf/util/annotate.h24
-rw-r--r--tools/perf/util/callchain.c2
-rw-r--r--tools/perf/util/callchain.h5
-rw-r--r--tools/perf/util/cpumap.c54
-rw-r--r--tools/perf/util/cpumap.h9
-rw-r--r--tools/perf/util/debug.c28
-rw-r--r--tools/perf/util/debug.h34
-rw-r--r--tools/perf/util/dso.c6
-rw-r--r--tools/perf/util/dso.h2
-rw-r--r--tools/perf/util/event.c4
-rw-r--r--tools/perf/util/evlist.c31
-rw-r--r--tools/perf/util/evlist.h34
-rw-r--r--tools/perf/util/evsel.c370
-rw-r--r--tools/perf/util/evsel.h50
-rw-r--r--tools/perf/util/header.c266
-rw-r--r--tools/perf/util/header.h2
-rw-r--r--tools/perf/util/hist.c142
-rw-r--r--tools/perf/util/hist.h26
-rw-r--r--tools/perf/util/include/linux/bitops.h1
-rw-r--r--tools/perf/util/intlist.c36
-rw-r--r--tools/perf/util/intlist.h2
-rw-r--r--tools/perf/util/machine.c784
-rw-r--r--tools/perf/util/machine.h41
-rw-r--r--tools/perf/util/map.c121
-rw-r--r--tools/perf/util/map.h24
-rw-r--r--tools/perf/util/parse-events.c96
-rw-r--r--tools/perf/util/parse-events.h22
-rw-r--r--tools/perf/util/parse-events.y75
-rw-r--r--tools/perf/util/pmu.c46
-rw-r--r--tools/perf/util/pmu.h15
-rw-r--r--tools/perf/util/pmu.y1
-rw-r--r--tools/perf/util/probe-finder.c10
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/python.c9
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c1
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c1
-rw-r--r--tools/perf/util/session.c325
-rw-r--r--tools/perf/util/session.h35
-rw-r--r--tools/perf/util/sort.c245
-rw-r--r--tools/perf/util/sort.h15
-rw-r--r--tools/perf/util/string.c18
-rw-r--r--tools/perf/util/strlist.c54
-rw-r--r--tools/perf/util/strlist.h42
-rw-r--r--tools/perf/util/symbol-elf.c14
-rw-r--r--tools/perf/util/symbol-minimal.c1
-rw-r--r--tools/perf/util/symbol.c536
-rw-r--r--tools/perf/util/symbol.h9
-rw-r--r--tools/perf/util/sysfs.c2
-rw-r--r--tools/perf/util/thread.c20
-rw-r--r--tools/perf/util/thread.h1
-rw-r--r--tools/perf/util/top.c22
-rw-r--r--tools/perf/util/top.h10
-rw-r--r--tools/perf/util/util.c24
-rw-r--r--tools/perf/util/util.h4
489 files changed, 14315 insertions, 7098 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
new file mode 100644
index 000000000000..0adeb524c0d4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
@@ -0,0 +1,62 @@
1What: /sys/devices/cpu/events/
2 /sys/devices/cpu/events/branch-misses
3 /sys/devices/cpu/events/cache-references
4 /sys/devices/cpu/events/cache-misses
5 /sys/devices/cpu/events/stalled-cycles-frontend
6 /sys/devices/cpu/events/branch-instructions
7 /sys/devices/cpu/events/stalled-cycles-backend
8 /sys/devices/cpu/events/instructions
9 /sys/devices/cpu/events/cpu-cycles
10
11Date: 2013/01/08
12
13Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
14
15Description: Generic performance monitoring events
16
17 A collection of performance monitoring events that may be
18 supported by many/most CPUs. These events can be monitored
19 using the 'perf(1)' tool.
20
21 The contents of each file would look like:
22
23 event=0xNNNN
24
25 where 'N' is a hex digit and the number '0xNNNN' shows the
26 "raw code" for the perf event identified by the file's
27 "basename".
28
29
30What: /sys/devices/cpu/events/PM_LD_MISS_L1
31 /sys/devices/cpu/events/PM_LD_REF_L1
32 /sys/devices/cpu/events/PM_CYC
33 /sys/devices/cpu/events/PM_BRU_FIN
34 /sys/devices/cpu/events/PM_GCT_NOSLOT_CYC
35 /sys/devices/cpu/events/PM_BRU_MPRED
36 /sys/devices/cpu/events/PM_INST_CMPL
37 /sys/devices/cpu/events/PM_CMPLU_STALL
38
39Date: 2013/01/08
40
41Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
42 Linux Powerpc mailing list <linuxppc-dev@ozlabs.org>
43
44Description: POWER-systems specific performance monitoring events
45
46 A collection of performance monitoring events that may be
47 supported by the POWER CPU. These events can be monitored
48 using the 'perf(1)' tool.
49
50 These events may not be supported by other CPUs.
51
52 The contents of each file would look like:
53
54 event=0xNNNN
55
56 where 'N' is a hex digit and the number '0xNNNN' shows the
57 "raw code" for the perf event identified by the file's
58 "basename".
59
60 Further, multiple terms like 'event=0xNNNN' can be specified
61 and separated with comma. All available terms are defined in
62 the /sys/bus/event_source/devices/<dev>/format file.
diff --git a/Documentation/ABI/testing/sysfs-platform-ts5500 b/Documentation/ABI/testing/sysfs-platform-ts5500
new file mode 100644
index 000000000000..c88375a537a1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-ts5500
@@ -0,0 +1,47 @@
1What: /sys/devices/platform/ts5500/adc
2Date: January 2013
3KernelVersion: 3.7
4Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com>
5Description:
6 Indicates the presence of an A/D Converter. If it is present,
7 it will display "1", otherwise "0".
8
9What: /sys/devices/platform/ts5500/ereset
10Date: January 2013
11KernelVersion: 3.7
12Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com>
13Description:
14 Indicates the presence of an external reset. If it is present,
15 it will display "1", otherwise "0".
16
17What: /sys/devices/platform/ts5500/id
18Date: January 2013
19KernelVersion: 3.7
20Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com>
21Description:
22 Product ID of the TS board. TS-5500 ID is 0x60.
23
24What: /sys/devices/platform/ts5500/jumpers
25Date: January 2013
26KernelVersion: 3.7
27Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com>
28Description:
29 Bitfield showing the jumpers' state. If a jumper is present,
30 the corresponding bit is set. For instance, 0x0e means jumpers
31 2, 3 and 4 are set.
32
33What: /sys/devices/platform/ts5500/rs485
34Date: January 2013
35KernelVersion: 3.7
36Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com>
37Description:
38 Indicates the presence of the RS485 option. If it is present,
39 it will display "1", otherwise "0".
40
41What: /sys/devices/platform/ts5500/sram
42Date: January 2013
43KernelVersion: 3.7
44Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com>
45Description:
46 Indicates the presence of the SRAM option. If it is present,
47 it will display "1", otherwise "0".
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 53e6fca146d7..a09178086c30 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -127,15 +127,42 @@ on the number of vectors that can be allocated; pci_enable_msi_block()
127returns as soon as it finds any constraint that doesn't allow the 127returns as soon as it finds any constraint that doesn't allow the
128call to succeed. 128call to succeed.
129 129
1304.2.3 pci_disable_msi 1304.2.3 pci_enable_msi_block_auto
131
132int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *count)
133
134This variation on pci_enable_msi() call allows a device driver to request
135the maximum possible number of MSIs. The MSI specification only allows
136interrupts to be allocated in powers of two, up to a maximum of 2^5 (32).
137
138If this function returns a positive number, it indicates that it has
139succeeded and the returned value is the number of allocated interrupts. In
140this case, the function enables MSI on this device and updates dev->irq to
141be the lowest of the new interrupts assigned to it. The other interrupts
142assigned to the device are in the range dev->irq to dev->irq + returned
143value - 1.
144
145If this function returns a negative number, it indicates an error and
146the driver should not attempt to request any more MSI interrupts for
147this device.
148
149If the device driver needs to know the number of interrupts the device
150supports it can pass the pointer count where that number is stored. The
151device driver must decide what action to take if pci_enable_msi_block_auto()
152succeeds, but returns a value less than the number of interrupts supported.
153If the device driver does not need to know the number of interrupts
154supported, it can set the pointer count to NULL.
155
1564.2.4 pci_disable_msi
131 157
132void pci_disable_msi(struct pci_dev *dev) 158void pci_disable_msi(struct pci_dev *dev)
133 159
134This function should be used to undo the effect of pci_enable_msi() or 160This function should be used to undo the effect of pci_enable_msi() or
135pci_enable_msi_block(). Calling it restores dev->irq to the pin-based 161pci_enable_msi_block() or pci_enable_msi_block_auto(). Calling it restores
136interrupt number and frees the previously allocated message signaled 162dev->irq to the pin-based interrupt number and frees the previously
137interrupt(s). The interrupt may subsequently be assigned to another 163allocated message signaled interrupt(s). The interrupt may subsequently be
138device, so drivers should not cache the value of dev->irq. 164assigned to another device, so drivers should not cache the value of
165dev->irq.
139 166
140Before calling this function, a device driver must always call free_irq() 167Before calling this function, a device driver must always call free_irq()
141on any interrupt for which it previously called request_irq(). 168on any interrupt for which it previously called request_irq().
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index 27f2b21a9d5c..d9ca5be9b471 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -253,6 +253,8 @@ This performs an atomic exchange operation on the atomic variable v, setting
253the given new value. It returns the old value that the atomic variable v had 253the given new value. It returns the old value that the atomic variable v had
254just before the operation. 254just before the operation.
255 255
256atomic_xchg requires explicit memory barriers around the operation.
257
256 int atomic_cmpxchg(atomic_t *v, int old, int new); 258 int atomic_cmpxchg(atomic_t *v, int old, int new);
257 259
258This performs an atomic compare exchange operation on the atomic value v, 260This performs an atomic compare exchange operation on the atomic value v,
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 3c4e1b3b80a1..fa5d8a9ae205 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1685,6 +1685,7 @@ explicit lock operations, described later). These include:
1685 1685
1686 xchg(); 1686 xchg();
1687 cmpxchg(); 1687 cmpxchg();
1688 atomic_xchg();
1688 atomic_cmpxchg(); 1689 atomic_cmpxchg();
1689 atomic_inc_return(); 1690 atomic_inc_return();
1690 atomic_dec_return(); 1691 atomic_dec_return();
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 6f51fed45f2d..53d6a3c51d87 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1842,6 +1842,89 @@ an error.
1842 # cat buffer_size_kb 1842 # cat buffer_size_kb
184385 184385
1844 1844
1845Snapshot
1846--------
1847CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature
1848available to all non latency tracers. (Latency tracers which
1849record max latency, such as "irqsoff" or "wakeup", can't use
1850this feature, since those are already using the snapshot
1851mechanism internally.)
1852
1853Snapshot preserves a current trace buffer at a particular point
1854in time without stopping tracing. Ftrace swaps the current
1855buffer with a spare buffer, and tracing continues in the new
1856current (=previous spare) buffer.
1857
1858The following debugfs files in "tracing" are related to this
1859feature:
1860
1861 snapshot:
1862
1863 This is used to take a snapshot and to read the output
1864 of the snapshot. Echo 1 into this file to allocate a
1865 spare buffer and to take a snapshot (swap), then read
1866 the snapshot from this file in the same format as
1867 "trace" (described above in the section "The File
1868 System"). Both reads snapshot and tracing are executable
1869 in parallel. When the spare buffer is allocated, echoing
1870 0 frees it, and echoing else (positive) values clear the
1871 snapshot contents.
1872 More details are shown in the table below.
1873
1874 status\input | 0 | 1 | else |
1875 --------------+------------+------------+------------+
1876 not allocated |(do nothing)| alloc+swap | EINVAL |
1877 --------------+------------+------------+------------+
1878 allocated | free | swap | clear |
1879 --------------+------------+------------+------------+
1880
1881Here is an example of using the snapshot feature.
1882
1883 # echo 1 > events/sched/enable
1884 # echo 1 > snapshot
1885 # cat snapshot
1886# tracer: nop
1887#
1888# entries-in-buffer/entries-written: 71/71 #P:8
1889#
1890# _-----=> irqs-off
1891# / _----=> need-resched
1892# | / _---=> hardirq/softirq
1893# || / _--=> preempt-depth
1894# ||| / delay
1895# TASK-PID CPU# |||| TIMESTAMP FUNCTION
1896# | | | |||| | |
1897 <idle>-0 [005] d... 2440.603828: sched_switch: prev_comm=swapper/5 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2242 next_prio=120
1898 sleep-2242 [005] d... 2440.603846: sched_switch: prev_comm=snapshot-test-2 prev_pid=2242 prev_prio=120 prev_state=R ==> next_comm=kworker/5:1 next_pid=60 next_prio=120
1899[...]
1900 <idle>-0 [002] d... 2440.707230: sched_switch: prev_comm=swapper/2 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2229 next_prio=120
1901
1902 # cat trace
1903# tracer: nop
1904#
1905# entries-in-buffer/entries-written: 77/77 #P:8
1906#
1907# _-----=> irqs-off
1908# / _----=> need-resched
1909# | / _---=> hardirq/softirq
1910# || / _--=> preempt-depth
1911# ||| / delay
1912# TASK-PID CPU# |||| TIMESTAMP FUNCTION
1913# | | | |||| | |
1914 <idle>-0 [007] d... 2440.707395: sched_switch: prev_comm=swapper/7 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2243 next_prio=120
1915 snapshot-test-2-2229 [002] d... 2440.707438: sched_switch: prev_comm=snapshot-test-2 prev_pid=2229 prev_prio=120 prev_state=S ==> next_comm=swapper/2 next_pid=0 next_prio=120
1916[...]
1917
1918
1919If you try to use this snapshot feature when current tracer is
1920one of the latency tracers, you will get the following results.
1921
1922 # echo wakeup > current_tracer
1923 # echo 1 > snapshot
1924bash: echo: write error: Device or resource busy
1925 # cat snapshot
1926cat: snapshot: Device or resource busy
1927
1845----------- 1928-----------
1846 1929
1847More details can be found in the source code, in the 1930More details can be found in the source code, in the
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index e540fd67f767..b443f1de0e5a 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -390,6 +390,7 @@ Protocol: 2.00+
390 F Special (0xFF = undefined) 390 F Special (0xFF = undefined)
391 10 Reserved 391 10 Reserved
392 11 Minimal Linux Bootloader <http://sebastian-plotz.blogspot.de> 392 11 Minimal Linux Bootloader <http://sebastian-plotz.blogspot.de>
393 12 OVMF UEFI virtualization stack
393 394
394 Please contact <hpa@zytor.com> if you need a bootloader ID 395 Please contact <hpa@zytor.com> if you need a bootloader ID
395 value assigned. 396 value assigned.
diff --git a/MAINTAINERS b/MAINTAINERS
index 35a56bcd5e75..526fb85f2f7e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1303,7 +1303,7 @@ F: include/linux/dmaengine.h
1303F: include/linux/async_tx.h 1303F: include/linux/async_tx.h
1304 1304
1305AT24 EEPROM DRIVER 1305AT24 EEPROM DRIVER
1306M: Wolfram Sang <w.sang@pengutronix.de> 1306M: Wolfram Sang <wsa@the-dreams.de>
1307L: linux-i2c@vger.kernel.org 1307L: linux-i2c@vger.kernel.org
1308S: Maintained 1308S: Maintained
1309F: drivers/misc/eeprom/at24.c 1309F: drivers/misc/eeprom/at24.c
@@ -3757,12 +3757,11 @@ S: Maintained
3757F: drivers/i2c/i2c-stub.c 3757F: drivers/i2c/i2c-stub.c
3758 3758
3759I2C SUBSYSTEM 3759I2C SUBSYSTEM
3760M: Wolfram Sang <w.sang@pengutronix.de> 3760M: Wolfram Sang <wsa@the-dreams.de>
3761M: "Ben Dooks (embedded platforms)" <ben-linux@fluff.org> 3761M: "Ben Dooks (embedded platforms)" <ben-linux@fluff.org>
3762L: linux-i2c@vger.kernel.org 3762L: linux-i2c@vger.kernel.org
3763W: http://i2c.wiki.kernel.org/ 3763W: http://i2c.wiki.kernel.org/
3764T: quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-i2c/ 3764T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
3765T: git git://git.pengutronix.de/git/wsa/linux.git
3766S: Maintained 3765S: Maintained
3767F: Documentation/i2c/ 3766F: Documentation/i2c/
3768F: drivers/i2c/ 3767F: drivers/i2c/
@@ -5778,15 +5777,6 @@ L: linux-i2c@vger.kernel.org
5778S: Maintained 5777S: Maintained
5779F: drivers/i2c/muxes/i2c-mux-pca9541.c 5778F: drivers/i2c/muxes/i2c-mux-pca9541.c
5780 5779
5781PCA9564/PCA9665 I2C BUS DRIVER
5782M: Wolfram Sang <w.sang@pengutronix.de>
5783L: linux-i2c@vger.kernel.org
5784S: Maintained
5785F: drivers/i2c/algos/i2c-algo-pca.c
5786F: drivers/i2c/busses/i2c-pca-*
5787F: include/linux/i2c-algo-pca.h
5788F: include/linux/i2c-pca-platform.h
5789
5790PCDP - PRIMARY CONSOLE AND DEBUG PORT 5780PCDP - PRIMARY CONSOLE AND DEBUG PORT
5791M: Khalid Aziz <khalid@gonehiking.org> 5781M: Khalid Aziz <khalid@gonehiking.org>
5792S: Maintained 5782S: Maintained
@@ -6598,7 +6588,7 @@ F: drivers/dma/dw_dmac_regs.h
6598F: drivers/dma/dw_dmac.c 6588F: drivers/dma/dw_dmac.c
6599 6589
6600TIMEKEEPING, NTP 6590TIMEKEEPING, NTP
6601M: John Stultz <johnstul@us.ibm.com> 6591M: John Stultz <john.stultz@linaro.org>
6602M: Thomas Gleixner <tglx@linutronix.de> 6592M: Thomas Gleixner <tglx@linutronix.de>
6603T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core 6593T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
6604S: Supported 6594S: Supported
@@ -7543,6 +7533,11 @@ F: drivers/net/team/
7543F: include/linux/if_team.h 7533F: include/linux/if_team.h
7544F: include/uapi/linux/if_team.h 7534F: include/uapi/linux/if_team.h
7545 7535
7536TECHNOLOGIC SYSTEMS TS-5500 PLATFORM SUPPORT
7537M: Savoir-faire Linux Inc. <kernel@savoirfairelinux.com>
7538S: Maintained
7539F: arch/x86/platform/ts5500/
7540
7546TECHNOTREND USB IR RECEIVER 7541TECHNOTREND USB IR RECEIVER
7547M: Sean Young <sean@mess.org> 7542M: Sean Young <sean@mess.org>
7548L: linux-media@vger.kernel.org 7543L: linux-media@vger.kernel.org
diff --git a/Makefile b/Makefile
index 08ef9bdb80c7..6fccf6531770 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 3 1VERSION = 3
2PATCHLEVEL = 8 2PATCHLEVEL = 8
3SUBLEVEL = 0 3SUBLEVEL = 0
4EXTRAVERSION = -rc7 4EXTRAVERSION =
5NAME = Unicycling Gorilla 5NAME = Unicycling Gorilla
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
@@ -165,7 +165,8 @@ export srctree objtree VPATH
165# then ARCH is assigned, getting whatever value it gets normally, and 165# then ARCH is assigned, getting whatever value it gets normally, and
166# SUBARCH is subsequently ignored. 166# SUBARCH is subsequently ignored.
167 167
168SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ 168SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
169 -e s/sun4u/sparc64/ \
169 -e s/arm.*/arm/ -e s/sa110/arm/ \ 170 -e s/arm.*/arm/ -e s/sa110/arm/ \
170 -e s/s390x/s390/ -e s/parisc64/parisc/ \ 171 -e s/s390x/s390/ -e s/parisc64/parisc/ \
171 -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ 172 -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
diff --git a/arch/Kconfig b/arch/Kconfig
index 7f8f281f2585..97fb7d0365d1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -76,6 +76,15 @@ config OPTPROBES
76 depends on KPROBES && HAVE_OPTPROBES 76 depends on KPROBES && HAVE_OPTPROBES
77 depends on !PREEMPT 77 depends on !PREEMPT
78 78
79config KPROBES_ON_FTRACE
80 def_bool y
81 depends on KPROBES && HAVE_KPROBES_ON_FTRACE
82 depends on DYNAMIC_FTRACE_WITH_REGS
83 help
84 If function tracer is enabled and the arch supports full
85 passing of pt_regs to function tracing, then kprobes can
86 optimize on top of function tracing.
87
79config UPROBES 88config UPROBES
80 bool "Transparent user-space probes (EXPERIMENTAL)" 89 bool "Transparent user-space probes (EXPERIMENTAL)"
81 depends on UPROBE_EVENT && PERF_EVENTS 90 depends on UPROBE_EVENT && PERF_EVENTS
@@ -158,6 +167,9 @@ config HAVE_KRETPROBES
158config HAVE_OPTPROBES 167config HAVE_OPTPROBES
159 bool 168 bool
160 169
170config HAVE_KPROBES_ON_FTRACE
171 bool
172
161config HAVE_NMI_WATCHDOG 173config HAVE_NMI_WATCHDOG
162 bool 174 bool
163# 175#
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 9d5904cc7712..9b504af2e966 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -5,7 +5,6 @@ config ALPHA
5 select HAVE_IDE 5 select HAVE_IDE
6 select HAVE_OPROFILE 6 select HAVE_OPROFILE
7 select HAVE_SYSCALL_WRAPPERS 7 select HAVE_SYSCALL_WRAPPERS
8 select HAVE_IRQ_WORK
9 select HAVE_PCSPKR_PLATFORM 8 select HAVE_PCSPKR_PLATFORM
10 select HAVE_PERF_EVENTS 9 select HAVE_PERF_EVENTS
11 select HAVE_DMA_ATTRS 10 select HAVE_DMA_ATTRS
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 14db93e4c8a8..dbc1760f418b 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1139,6 +1139,7 @@ struct rusage32 {
1139SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) 1139SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
1140{ 1140{
1141 struct rusage32 r; 1141 struct rusage32 r;
1142 cputime_t utime, stime;
1142 1143
1143 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) 1144 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
1144 return -EINVAL; 1145 return -EINVAL;
@@ -1146,8 +1147,9 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
1146 memset(&r, 0, sizeof(r)); 1147 memset(&r, 0, sizeof(r));
1147 switch (who) { 1148 switch (who) {
1148 case RUSAGE_SELF: 1149 case RUSAGE_SELF:
1149 jiffies_to_timeval32(current->utime, &r.ru_utime); 1150 task_cputime(current, &utime, &stime);
1150 jiffies_to_timeval32(current->stime, &r.ru_stime); 1151 jiffies_to_timeval32(utime, &r.ru_utime);
1152 jiffies_to_timeval32(stime, &r.ru_stime);
1151 r.ru_minflt = current->min_flt; 1153 r.ru_minflt = current->min_flt;
1152 r.ru_majflt = current->maj_flt; 1154 r.ru_majflt = current->maj_flt;
1153 break; 1155 break;
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 67874b82a4ed..9bbe760f2352 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -36,7 +36,6 @@ config ARM
36 select HAVE_GENERIC_HARDIRQS 36 select HAVE_GENERIC_HARDIRQS
37 select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)) 37 select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7))
38 select HAVE_IDE if PCI || ISA || PCMCIA 38 select HAVE_IDE if PCI || ISA || PCMCIA
39 select HAVE_IRQ_WORK
40 select HAVE_KERNEL_GZIP 39 select HAVE_KERNEL_GZIP
41 select HAVE_KERNEL_LZMA 40 select HAVE_KERNEL_LZMA
42 select HAVE_KERNEL_LZO 41 select HAVE_KERNEL_LZO
diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h
index 4eb6d005ffaa..86dff32a0737 100644
--- a/arch/arm/include/asm/smp_scu.h
+++ b/arch/arm/include/asm/smp_scu.h
@@ -7,8 +7,14 @@
7 7
8#ifndef __ASSEMBLER__ 8#ifndef __ASSEMBLER__
9unsigned int scu_get_core_count(void __iomem *); 9unsigned int scu_get_core_count(void __iomem *);
10void scu_enable(void __iomem *);
11int scu_power_mode(void __iomem *, unsigned int); 10int scu_power_mode(void __iomem *, unsigned int);
11
12#ifdef CONFIG_SMP
13void scu_enable(void __iomem *scu_base);
14#else
15static inline void scu_enable(void __iomem *scu_base) {}
16#endif
17
12#endif 18#endif
13 19
14#endif 20#endif
diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c
index b9f015e843d8..45eac87ed66a 100644
--- a/arch/arm/kernel/smp_scu.c
+++ b/arch/arm/kernel/smp_scu.c
@@ -75,7 +75,7 @@ void scu_enable(void __iomem *scu_base)
75int scu_power_mode(void __iomem *scu_base, unsigned int mode) 75int scu_power_mode(void __iomem *scu_base, unsigned int mode)
76{ 76{
77 unsigned int val; 77 unsigned int val;
78 int cpu = cpu_logical_map(smp_processor_id()); 78 int cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(smp_processor_id()), 0);
79 79
80 if (mode > 3 || mode == 1 || cpu > 3) 80 if (mode > 3 || mode == 1 || cpu > 3)
81 return -EINVAL; 81 return -EINVAL;
diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index 981dc1e1da51..e6c061282939 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -28,6 +28,7 @@
28 28
29#include <asm/arch_timer.h> 29#include <asm/arch_timer.h>
30#include <asm/cacheflush.h> 30#include <asm/cacheflush.h>
31#include <asm/cputype.h>
31#include <asm/smp_plat.h> 32#include <asm/smp_plat.h>
32#include <asm/smp_twd.h> 33#include <asm/smp_twd.h>
33#include <asm/hardware/arm_timer.h> 34#include <asm/hardware/arm_timer.h>
@@ -59,7 +60,7 @@ static void __init highbank_scu_map_io(void)
59 60
60void highbank_set_cpu_jump(int cpu, void *jump_addr) 61void highbank_set_cpu_jump(int cpu, void *jump_addr)
61{ 62{
62 cpu = cpu_logical_map(cpu); 63 cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 0);
63 writel(virt_to_phys(jump_addr), HB_JUMP_TABLE_VIRT(cpu)); 64 writel(virt_to_phys(jump_addr), HB_JUMP_TABLE_VIRT(cpu));
64 __cpuc_flush_dcache_area(HB_JUMP_TABLE_VIRT(cpu), 16); 65 __cpuc_flush_dcache_area(HB_JUMP_TABLE_VIRT(cpu), 16);
65 outer_clean_range(HB_JUMP_TABLE_PHYS(cpu), 66 outer_clean_range(HB_JUMP_TABLE_PHYS(cpu),
diff --git a/arch/arm/mach-highbank/sysregs.h b/arch/arm/mach-highbank/sysregs.h
index 70af9d13fcef..5995df7f2622 100644
--- a/arch/arm/mach-highbank/sysregs.h
+++ b/arch/arm/mach-highbank/sysregs.h
@@ -37,7 +37,7 @@ extern void __iomem *sregs_base;
37 37
38static inline void highbank_set_core_pwr(void) 38static inline void highbank_set_core_pwr(void)
39{ 39{
40 int cpu = cpu_logical_map(smp_processor_id()); 40 int cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(smp_processor_id()), 0);
41 if (scu_base_addr) 41 if (scu_base_addr)
42 scu_power_mode(scu_base_addr, SCU_PM_POWEROFF); 42 scu_power_mode(scu_base_addr, SCU_PM_POWEROFF);
43 else 43 else
@@ -46,7 +46,7 @@ static inline void highbank_set_core_pwr(void)
46 46
47static inline void highbank_clear_core_pwr(void) 47static inline void highbank_clear_core_pwr(void)
48{ 48{
49 int cpu = cpu_logical_map(smp_processor_id()); 49 int cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(smp_processor_id()), 0);
50 if (scu_base_addr) 50 if (scu_base_addr)
51 scu_power_mode(scu_base_addr, SCU_PM_NORMAL); 51 scu_power_mode(scu_base_addr, SCU_PM_NORMAL);
52 else 52 else
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f8f362aafee9..75e915b72471 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -21,7 +21,6 @@ config ARM64
21 select HAVE_GENERIC_DMA_COHERENT 21 select HAVE_GENERIC_DMA_COHERENT
22 select HAVE_GENERIC_HARDIRQS 22 select HAVE_GENERIC_HARDIRQS
23 select HAVE_HW_BREAKPOINT if PERF_EVENTS 23 select HAVE_HW_BREAKPOINT if PERF_EVENTS
24 select HAVE_IRQ_WORK
25 select HAVE_MEMBLOCK 24 select HAVE_MEMBLOCK
26 select HAVE_PERF_EVENTS 25 select HAVE_PERF_EVENTS
27 select IRQ_DOMAIN 26 select IRQ_DOMAIN
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index b6f3ad5441c5..67e4aaad78f5 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -24,7 +24,6 @@ config BLACKFIN
24 select HAVE_FUNCTION_TRACER 24 select HAVE_FUNCTION_TRACER
25 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 25 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
26 select HAVE_IDE 26 select HAVE_IDE
27 select HAVE_IRQ_WORK
28 select HAVE_KERNEL_GZIP if RAMKERNEL 27 select HAVE_KERNEL_GZIP if RAMKERNEL
29 select HAVE_KERNEL_BZIP2 if RAMKERNEL 28 select HAVE_KERNEL_BZIP2 if RAMKERNEL
30 select HAVE_KERNEL_LZMA if RAMKERNEL 29 select HAVE_KERNEL_LZMA if RAMKERNEL
@@ -38,7 +37,6 @@ config BLACKFIN
38 select HAVE_GENERIC_HARDIRQS 37 select HAVE_GENERIC_HARDIRQS
39 select GENERIC_ATOMIC64 38 select GENERIC_ATOMIC64
40 select GENERIC_IRQ_PROBE 39 select GENERIC_IRQ_PROBE
41 select IRQ_PER_CPU if SMP
42 select USE_GENERIC_SMP_HELPERS if SMP 40 select USE_GENERIC_SMP_HELPERS if SMP
43 select HAVE_NMI_WATCHDOG if NMI_WATCHDOG 41 select HAVE_NMI_WATCHDOG if NMI_WATCHDOG
44 select GENERIC_SMP_IDLE_THREAD 42 select GENERIC_SMP_IDLE_THREAD
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 9d262645f667..17df48fc8f44 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -3,7 +3,6 @@ config FRV
3 default y 3 default y
4 select HAVE_IDE 4 select HAVE_IDE
5 select HAVE_ARCH_TRACEHOOK 5 select HAVE_ARCH_TRACEHOOK
6 select HAVE_IRQ_WORK
7 select HAVE_PERF_EVENTS 6 select HAVE_PERF_EVENTS
8 select HAVE_UID16 7 select HAVE_UID16
9 select HAVE_GENERIC_HARDIRQS 8 select HAVE_GENERIC_HARDIRQS
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 0744f7d7b1fd..e4decc6b8947 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -12,9 +12,7 @@ config HEXAGON
12 # select ARCH_WANT_OPTIONAL_GPIOLIB 12 # select ARCH_WANT_OPTIONAL_GPIOLIB
13 # select ARCH_REQUIRE_GPIOLIB 13 # select ARCH_REQUIRE_GPIOLIB
14 # select HAVE_CLK 14 # select HAVE_CLK
15 # select IRQ_PER_CPU
16 # select GENERIC_PENDING_IRQ if SMP 15 # select GENERIC_PENDING_IRQ if SMP
17 select HAVE_IRQ_WORK
18 select GENERIC_ATOMIC64 16 select GENERIC_ATOMIC64
19 select HAVE_PERF_EVENTS 17 select HAVE_PERF_EVENTS
20 select HAVE_GENERIC_HARDIRQS 18 select HAVE_GENERIC_HARDIRQS
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 3279646120e3..00c2e88f7755 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -29,7 +29,6 @@ config IA64
29 select ARCH_DISCARD_MEMBLOCK 29 select ARCH_DISCARD_MEMBLOCK
30 select GENERIC_IRQ_PROBE 30 select GENERIC_IRQ_PROBE
31 select GENERIC_PENDING_IRQ if SMP 31 select GENERIC_PENDING_IRQ if SMP
32 select IRQ_PER_CPU
33 select GENERIC_IRQ_SHOW 32 select GENERIC_IRQ_SHOW
34 select ARCH_WANT_OPTIONAL_GPIOLIB 33 select ARCH_WANT_OPTIONAL_GPIOLIB
35 select ARCH_HAVE_NMI_SAFE_CMPXCHG 34 select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index 7fcf7f08ab06..e2d3f5baf265 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -11,99 +11,19 @@
11 * as published by the Free Software Foundation; either version 11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version. 12 * 2 of the License, or (at your option) any later version.
13 * 13 *
14 * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec. 14 * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec.
15 * Otherwise we measure cpu time in jiffies using the generic definitions. 15 * Otherwise we measure cpu time in jiffies using the generic definitions.
16 */ 16 */
17 17
18#ifndef __IA64_CPUTIME_H 18#ifndef __IA64_CPUTIME_H
19#define __IA64_CPUTIME_H 19#define __IA64_CPUTIME_H
20 20
21#ifndef CONFIG_VIRT_CPU_ACCOUNTING 21#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
22#include <asm-generic/cputime.h> 22# include <asm-generic/cputime.h>
23#else 23#else
24 24# include <asm/processor.h>
25#include <linux/time.h> 25# include <asm-generic/cputime_nsecs.h>
26#include <linux/jiffies.h>
27#include <asm/processor.h>
28
29typedef u64 __nocast cputime_t;
30typedef u64 __nocast cputime64_t;
31
32#define cputime_one_jiffy jiffies_to_cputime(1)
33
34/*
35 * Convert cputime <-> jiffies (HZ)
36 */
37#define cputime_to_jiffies(__ct) \
38 ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
39#define jiffies_to_cputime(__jif) \
40 (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
41#define cputime64_to_jiffies64(__ct) \
42 ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
43#define jiffies64_to_cputime64(__jif) \
44 (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
45
46/*
47 * Convert cputime <-> microseconds
48 */
49#define cputime_to_usecs(__ct) \
50 ((__force u64)(__ct) / NSEC_PER_USEC)
51#define usecs_to_cputime(__usecs) \
52 (__force cputime_t)((__usecs) * NSEC_PER_USEC)
53#define usecs_to_cputime64(__usecs) \
54 (__force cputime64_t)((__usecs) * NSEC_PER_USEC)
55
56/*
57 * Convert cputime <-> seconds
58 */
59#define cputime_to_secs(__ct) \
60 ((__force u64)(__ct) / NSEC_PER_SEC)
61#define secs_to_cputime(__secs) \
62 (__force cputime_t)((__secs) * NSEC_PER_SEC)
63
64/*
65 * Convert cputime <-> timespec (nsec)
66 */
67static inline cputime_t timespec_to_cputime(const struct timespec *val)
68{
69 u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
70 return (__force cputime_t) ret;
71}
72static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
73{
74 val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
75 val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
76}
77
78/*
79 * Convert cputime <-> timeval (msec)
80 */
81static inline cputime_t timeval_to_cputime(struct timeval *val)
82{
83 u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
84 return (__force cputime_t) ret;
85}
86static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
87{
88 val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
89 val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
90}
91
92/*
93 * Convert cputime <-> clock (USER_HZ)
94 */
95#define cputime_to_clock_t(__ct) \
96 ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
97#define clock_t_to_cputime(__x) \
98 (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
99
100/*
101 * Convert cputime64 to clock.
102 */
103#define cputime64_to_clock_t(__ct) \
104 cputime_to_clock_t((__force cputime_t)__ct)
105
106extern void arch_vtime_task_switch(struct task_struct *tsk); 26extern void arch_vtime_task_switch(struct task_struct *tsk);
27#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
107 28
108#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
109#endif /* __IA64_CPUTIME_H */ 29#endif /* __IA64_CPUTIME_H */
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index ff2ae4136584..020d655ed082 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -31,7 +31,7 @@ struct thread_info {
31 mm_segment_t addr_limit; /* user-level address space limit */ 31 mm_segment_t addr_limit; /* user-level address space limit */
32 int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ 32 int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */
33 struct restart_block restart_block; 33 struct restart_block restart_block;
34#ifdef CONFIG_VIRT_CPU_ACCOUNTING 34#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
35 __u64 ac_stamp; 35 __u64 ac_stamp;
36 __u64 ac_leave; 36 __u64 ac_leave;
37 __u64 ac_stime; 37 __u64 ac_stime;
@@ -69,7 +69,7 @@ struct thread_info {
69#define task_stack_page(tsk) ((void *)(tsk)) 69#define task_stack_page(tsk) ((void *)(tsk))
70 70
71#define __HAVE_THREAD_FUNCTIONS 71#define __HAVE_THREAD_FUNCTIONS
72#ifdef CONFIG_VIRT_CPU_ACCOUNTING 72#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
73#define setup_thread_stack(p, org) \ 73#define setup_thread_stack(p, org) \
74 *task_thread_info(p) = *task_thread_info(org); \ 74 *task_thread_info(p) = *task_thread_info(org); \
75 task_thread_info(p)->ac_stime = 0; \ 75 task_thread_info(p)->ac_stime = 0; \
diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h
index c57fa910f2c9..00cf03e0cb82 100644
--- a/arch/ia64/include/asm/xen/minstate.h
+++ b/arch/ia64/include/asm/xen/minstate.h
@@ -1,5 +1,5 @@
1 1
2#ifdef CONFIG_VIRT_CPU_ACCOUNTING 2#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
3/* read ar.itc in advance, and use it before leaving bank 0 */ 3/* read ar.itc in advance, and use it before leaving bank 0 */
4#define XEN_ACCOUNT_GET_STAMP \ 4#define XEN_ACCOUNT_GET_STAMP \
5 MOV_FROM_ITC(pUStk, p6, r20, r2); 5 MOV_FROM_ITC(pUStk, p6, r20, r2);
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index a48bd9a9927b..46c9e3007315 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -41,7 +41,7 @@ void foo(void)
41 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); 41 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
42 DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); 42 DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
43 DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); 43 DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
44#ifdef CONFIG_VIRT_CPU_ACCOUNTING 44#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
45 DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp)); 45 DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
46 DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave)); 46 DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
47 DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime)); 47 DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 6bfd8429ee0f..7a53530f22c2 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -724,7 +724,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall)
724#endif 724#endif
725.global __paravirt_work_processed_syscall; 725.global __paravirt_work_processed_syscall;
726__paravirt_work_processed_syscall: 726__paravirt_work_processed_syscall:
727#ifdef CONFIG_VIRT_CPU_ACCOUNTING 727#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
728 adds r2=PT(LOADRS)+16,r12 728 adds r2=PT(LOADRS)+16,r12
729 MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave 729 MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave
730 adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 730 adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -762,7 +762,7 @@ __paravirt_work_processed_syscall:
762 762
763 ld8 r29=[r2],16 // M0|1 load cr.ipsr 763 ld8 r29=[r2],16 // M0|1 load cr.ipsr
764 ld8 r28=[r3],16 // M0|1 load cr.iip 764 ld8 r28=[r3],16 // M0|1 load cr.iip
765#ifdef CONFIG_VIRT_CPU_ACCOUNTING 765#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
766(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13 766(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
767 ;; 767 ;;
768 ld8 r30=[r2],16 // M0|1 load cr.ifs 768 ld8 r30=[r2],16 // M0|1 load cr.ifs
@@ -793,7 +793,7 @@ __paravirt_work_processed_syscall:
793 ld8.fill r1=[r3],16 // M0|1 load r1 793 ld8.fill r1=[r3],16 // M0|1 load r1
794(pUStk) mov r17=1 // A 794(pUStk) mov r17=1 // A
795 ;; 795 ;;
796#ifdef CONFIG_VIRT_CPU_ACCOUNTING 796#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
797(pUStk) st1 [r15]=r17 // M2|3 797(pUStk) st1 [r15]=r17 // M2|3
798#else 798#else
799(pUStk) st1 [r14]=r17 // M2|3 799(pUStk) st1 [r14]=r17 // M2|3
@@ -813,7 +813,7 @@ __paravirt_work_processed_syscall:
813 shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition 813 shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
814 COVER // B add current frame into dirty partition & set cr.ifs 814 COVER // B add current frame into dirty partition & set cr.ifs
815 ;; 815 ;;
816#ifdef CONFIG_VIRT_CPU_ACCOUNTING 816#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
817 mov r19=ar.bsp // M2 get new backing store pointer 817 mov r19=ar.bsp // M2 get new backing store pointer
818 st8 [r14]=r22 // M save time at leave 818 st8 [r14]=r22 // M save time at leave
819 mov f10=f0 // F clear f10 819 mov f10=f0 // F clear f10
@@ -948,7 +948,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
948 adds r16=PT(CR_IPSR)+16,r12 948 adds r16=PT(CR_IPSR)+16,r12
949 adds r17=PT(CR_IIP)+16,r12 949 adds r17=PT(CR_IIP)+16,r12
950 950
951#ifdef CONFIG_VIRT_CPU_ACCOUNTING 951#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
952 .pred.rel.mutex pUStk,pKStk 952 .pred.rel.mutex pUStk,pKStk
953 MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled 953 MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled
954 MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave 954 MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave
@@ -981,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
981 ;; 981 ;;
982 ld8.fill r12=[r16],16 982 ld8.fill r12=[r16],16
983 ld8.fill r13=[r17],16 983 ld8.fill r13=[r17],16
984#ifdef CONFIG_VIRT_CPU_ACCOUNTING 984#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
985(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18 985(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
986#else 986#else
987(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 987(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
@@ -989,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
989 ;; 989 ;;
990 ld8 r20=[r16],16 // ar.fpsr 990 ld8 r20=[r16],16 // ar.fpsr
991 ld8.fill r15=[r17],16 991 ld8.fill r15=[r17],16
992#ifdef CONFIG_VIRT_CPU_ACCOUNTING 992#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
993(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred 993(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred
994#endif 994#endif
995 ;; 995 ;;
@@ -997,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
997 ld8.fill r2=[r17] 997 ld8.fill r2=[r17]
998(pUStk) mov r17=1 998(pUStk) mov r17=1
999 ;; 999 ;;
1000#ifdef CONFIG_VIRT_CPU_ACCOUNTING 1000#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
1001 // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;; 1001 // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;;
1002 // mib : mov add br -> mib : ld8 add br 1002 // mib : mov add br -> mib : ld8 add br
1003 // bbb_ : br nop cover;; mbb_ : mov br cover;; 1003 // bbb_ : br nop cover;; mbb_ : mov br cover;;
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index e662f178b990..c4cd45d97749 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -529,7 +529,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
529 nop.i 0 529 nop.i 0
530 ;; 530 ;;
531 mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 531 mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
532#ifdef CONFIG_VIRT_CPU_ACCOUNTING 532#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
533 MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting 533 MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting
534#else 534#else
535 nop.m 0 535 nop.m 0
@@ -555,7 +555,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
555 cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 555 cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
556 br.call.sptk.many b7=ia64_syscall_setup // B 556 br.call.sptk.many b7=ia64_syscall_setup // B
557 ;; 557 ;;
558#ifdef CONFIG_VIRT_CPU_ACCOUNTING 558#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
559 // mov.m r30=ar.itc is called in advance 559 // mov.m r30=ar.itc is called in advance
560 add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 560 add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
561 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 561 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 4738ff7bd66a..9be4e497f3d3 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1073,7 +1073,7 @@ END(ia64_native_sched_clock)
1073sched_clock = ia64_native_sched_clock 1073sched_clock = ia64_native_sched_clock
1074#endif 1074#endif
1075 1075
1076#ifdef CONFIG_VIRT_CPU_ACCOUNTING 1076#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
1077GLOBAL_ENTRY(cycle_to_cputime) 1077GLOBAL_ENTRY(cycle_to_cputime)
1078 alloc r16=ar.pfs,1,0,0,0 1078 alloc r16=ar.pfs,1,0,0,0
1079 addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 1079 addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
@@ -1091,7 +1091,7 @@ GLOBAL_ENTRY(cycle_to_cputime)
1091 shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT 1091 shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
1092 br.ret.sptk.many rp 1092 br.ret.sptk.many rp
1093END(cycle_to_cputime) 1093END(cycle_to_cputime)
1094#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 1094#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
1095 1095
1096#ifdef CONFIG_IA64_BRL_EMU 1096#ifdef CONFIG_IA64_BRL_EMU
1097 1097
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index fa25689fc453..689ffcaa284e 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -784,7 +784,7 @@ ENTRY(break_fault)
784 784
785(p8) adds r28=16,r28 // A switch cr.iip to next bundle 785(p8) adds r28=16,r28 // A switch cr.iip to next bundle
786(p9) adds r8=1,r8 // A increment ei to next slot 786(p9) adds r8=1,r8 // A increment ei to next slot
787#ifdef CONFIG_VIRT_CPU_ACCOUNTING 787#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
788 ;; 788 ;;
789 mov b6=r30 // I0 setup syscall handler branch reg early 789 mov b6=r30 // I0 setup syscall handler branch reg early
790#else 790#else
@@ -801,7 +801,7 @@ ENTRY(break_fault)
801 // 801 //
802/////////////////////////////////////////////////////////////////////// 802///////////////////////////////////////////////////////////////////////
803 st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag 803 st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
804#ifdef CONFIG_VIRT_CPU_ACCOUNTING 804#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
805 MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting 805 MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting
806#else 806#else
807 mov b6=r30 // I0 setup syscall handler branch reg early 807 mov b6=r30 // I0 setup syscall handler branch reg early
@@ -817,7 +817,7 @@ ENTRY(break_fault)
817 cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? 817 cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited?
818 br.call.sptk.many b7=ia64_syscall_setup // B 818 br.call.sptk.many b7=ia64_syscall_setup // B
8191: 8191:
820#ifdef CONFIG_VIRT_CPU_ACCOUNTING 820#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
821 // mov.m r30=ar.itc is called in advance, and r13 is current 821 // mov.m r30=ar.itc is called in advance, and r13 is current
822 add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A 822 add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A
823 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A 823 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A
@@ -1043,7 +1043,7 @@ END(ia64_syscall_setup)
1043 DBG_FAULT(16) 1043 DBG_FAULT(16)
1044 FAULT(16) 1044 FAULT(16)
1045 1045
1046#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) 1046#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
1047 /* 1047 /*
1048 * There is no particular reason for this code to be here, other than 1048 * There is no particular reason for this code to be here, other than
1049 * that there happens to be space here that would go unused otherwise. 1049 * that there happens to be space here that would go unused otherwise.
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index d56753a11636..cc82a7d744c9 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -4,7 +4,7 @@
4#include "entry.h" 4#include "entry.h"
5#include "paravirt_inst.h" 5#include "paravirt_inst.h"
6 6
7#ifdef CONFIG_VIRT_CPU_ACCOUNTING 7#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
8/* read ar.itc in advance, and use it before leaving bank 0 */ 8/* read ar.itc in advance, and use it before leaving bank 0 */
9#define ACCOUNT_GET_STAMP \ 9#define ACCOUNT_GET_STAMP \
10(pUStk) mov.m r20=ar.itc; 10(pUStk) mov.m r20=ar.itc;
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 88a794536bc0..fbaac1afb844 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -77,7 +77,7 @@ static struct clocksource clocksource_itc = {
77}; 77};
78static struct clocksource *itc_clocksource; 78static struct clocksource *itc_clocksource;
79 79
80#ifdef CONFIG_VIRT_CPU_ACCOUNTING 80#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
81 81
82#include <linux/kernel_stat.h> 82#include <linux/kernel_stat.h>
83 83
@@ -136,13 +136,14 @@ void vtime_account_system(struct task_struct *tsk)
136 136
137 account_system_time(tsk, 0, delta, delta); 137 account_system_time(tsk, 0, delta, delta);
138} 138}
139EXPORT_SYMBOL_GPL(vtime_account_system);
139 140
140void vtime_account_idle(struct task_struct *tsk) 141void vtime_account_idle(struct task_struct *tsk)
141{ 142{
142 account_idle_time(vtime_delta(tsk)); 143 account_idle_time(vtime_delta(tsk));
143} 144}
144 145
145#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 146#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
146 147
147static irqreturn_t 148static irqreturn_t
148timer_interrupt (int irq, void *dev_id) 149timer_interrupt (int irq, void *dev_id)
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index ae700f49e51d..b0768a657920 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -130,7 +130,6 @@ extern int handle_kernel_fault(struct pt_regs *regs);
130#define start_thread(_regs, _pc, _usp) \ 130#define start_thread(_regs, _pc, _usp) \
131do { \ 131do { \
132 (_regs)->pc = (_pc); \ 132 (_regs)->pc = (_pc); \
133 ((struct switch_stack *)(_regs))[-1].a6 = 0; \
134 setframeformat(_regs); \ 133 setframeformat(_regs); \
135 if (current->mm) \ 134 if (current->mm) \
136 (_regs)->d5 = current->mm->start_data; \ 135 (_regs)->d5 = current->mm->start_data; \
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 2ac626ab9d43..9becc44d9d7a 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -4,7 +4,6 @@ config MIPS
4 select HAVE_GENERIC_DMA_COHERENT 4 select HAVE_GENERIC_DMA_COHERENT
5 select HAVE_IDE 5 select HAVE_IDE
6 select HAVE_OPROFILE 6 select HAVE_OPROFILE
7 select HAVE_IRQ_WORK
8 select HAVE_PERF_EVENTS 7 select HAVE_PERF_EVENTS
9 select PERF_USE_VMALLOC 8 select PERF_USE_VMALLOC
10 select HAVE_ARCH_KGDB 9 select HAVE_ARCH_KGDB
@@ -2161,7 +2160,6 @@ source "mm/Kconfig"
2161config SMP 2160config SMP
2162 bool "Multi-Processing support" 2161 bool "Multi-Processing support"
2163 depends on SYS_SUPPORTS_SMP 2162 depends on SYS_SUPPORTS_SMP
2164 select IRQ_PER_CPU
2165 select USE_GENERIC_SMP_HELPERS 2163 select USE_GENERIC_SMP_HELPERS
2166 help 2164 help
2167 This enables support for systems with more than one CPU. If you have 2165 This enables support for systems with more than one CPU. If you have
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index b77feffbadea..a32e34ecda9e 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -9,14 +9,12 @@ config PARISC
9 select RTC_DRV_GENERIC 9 select RTC_DRV_GENERIC
10 select INIT_ALL_POSSIBLE 10 select INIT_ALL_POSSIBLE
11 select BUG 11 select BUG
12 select HAVE_IRQ_WORK
13 select HAVE_PERF_EVENTS 12 select HAVE_PERF_EVENTS
14 select GENERIC_ATOMIC64 if !64BIT 13 select GENERIC_ATOMIC64 if !64BIT
15 select HAVE_GENERIC_HARDIRQS 14 select HAVE_GENERIC_HARDIRQS
16 select BROKEN_RODATA 15 select BROKEN_RODATA
17 select GENERIC_IRQ_PROBE 16 select GENERIC_IRQ_PROBE
18 select GENERIC_PCI_IOMAP 17 select GENERIC_PCI_IOMAP
19 select IRQ_PER_CPU
20 select ARCH_HAVE_NMI_SAFE_CMPXCHG 18 select ARCH_HAVE_NMI_SAFE_CMPXCHG
21 select GENERIC_SMP_IDLE_THREAD 19 select GENERIC_SMP_IDLE_THREAD
22 select GENERIC_STRNCPY_FROM_USER 20 select GENERIC_STRNCPY_FROM_USER
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 17903f1f356b..561ccca7b1a7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -118,14 +118,12 @@ config PPC
118 select HAVE_SYSCALL_WRAPPERS if PPC64 118 select HAVE_SYSCALL_WRAPPERS if PPC64
119 select GENERIC_ATOMIC64 if PPC32 119 select GENERIC_ATOMIC64 if PPC32
120 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE 120 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
121 select HAVE_IRQ_WORK
122 select HAVE_PERF_EVENTS 121 select HAVE_PERF_EVENTS
123 select HAVE_REGS_AND_STACK_ACCESS_API 122 select HAVE_REGS_AND_STACK_ACCESS_API
124 select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 123 select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
125 select HAVE_GENERIC_HARDIRQS 124 select HAVE_GENERIC_HARDIRQS
126 select ARCH_WANT_IPC_PARSE_VERSION 125 select ARCH_WANT_IPC_PARSE_VERSION
127 select SPARSE_IRQ 126 select SPARSE_IRQ
128 select IRQ_PER_CPU
129 select IRQ_DOMAIN 127 select IRQ_DOMAIN
130 select GENERIC_IRQ_SHOW 128 select GENERIC_IRQ_SHOW
131 select GENERIC_IRQ_SHOW_LEVEL 129 select GENERIC_IRQ_SHOW_LEVEL
diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig
index 29bb11ec6c64..4f35fc462385 100644
--- a/arch/powerpc/configs/chroma_defconfig
+++ b/arch/powerpc/configs/chroma_defconfig
@@ -1,6 +1,6 @@
1CONFIG_PPC64=y 1CONFIG_PPC64=y
2CONFIG_PPC_BOOK3E_64=y 2CONFIG_PPC_BOOK3E_64=y
3# CONFIG_VIRT_CPU_ACCOUNTING is not set 3# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
4CONFIG_SMP=y 4CONFIG_SMP=y
5CONFIG_NR_CPUS=256 5CONFIG_NR_CPUS=256
6CONFIG_EXPERIMENTAL=y 6CONFIG_EXPERIMENTAL=y
diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig
index 88fa5c46f66f..f7df8362911f 100644
--- a/arch/powerpc/configs/corenet64_smp_defconfig
+++ b/arch/powerpc/configs/corenet64_smp_defconfig
@@ -1,6 +1,6 @@
1CONFIG_PPC64=y 1CONFIG_PPC64=y
2CONFIG_PPC_BOOK3E_64=y 2CONFIG_PPC_BOOK3E_64=y
3# CONFIG_VIRT_CPU_ACCOUNTING is not set 3# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
4CONFIG_SMP=y 4CONFIG_SMP=y
5CONFIG_NR_CPUS=2 5CONFIG_NR_CPUS=2
6CONFIG_EXPERIMENTAL=y 6CONFIG_EXPERIMENTAL=y
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index 840a2c2d0430..bcedeea0df89 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -1,6 +1,6 @@
1CONFIG_PPC64=y 1CONFIG_PPC64=y
2CONFIG_ALTIVEC=y 2CONFIG_ALTIVEC=y
3# CONFIG_VIRT_CPU_ACCOUNTING is not set 3# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
4CONFIG_SMP=y 4CONFIG_SMP=y
5CONFIG_NR_CPUS=2 5CONFIG_NR_CPUS=2
6CONFIG_EXPERIMENTAL=y 6CONFIG_EXPERIMENTAL=y
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 483733bd06d4..607559ab271f 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -8,7 +8,7 @@
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 * 10 *
11 * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in 11 * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in
12 * the same units as the timebase. Otherwise we measure cpu time 12 * the same units as the timebase. Otherwise we measure cpu time
13 * in jiffies using the generic definitions. 13 * in jiffies using the generic definitions.
14 */ 14 */
@@ -16,7 +16,7 @@
16#ifndef __POWERPC_CPUTIME_H 16#ifndef __POWERPC_CPUTIME_H
17#define __POWERPC_CPUTIME_H 17#define __POWERPC_CPUTIME_H
18 18
19#ifndef CONFIG_VIRT_CPU_ACCOUNTING 19#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
20#include <asm-generic/cputime.h> 20#include <asm-generic/cputime.h>
21#ifdef __KERNEL__ 21#ifdef __KERNEL__
22static inline void setup_cputime_one_jiffy(void) { } 22static inline void setup_cputime_one_jiffy(void) { }
@@ -231,5 +231,5 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
231static inline void arch_vtime_task_switch(struct task_struct *tsk) { } 231static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
232 232
233#endif /* __KERNEL__ */ 233#endif /* __KERNEL__ */
234#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 234#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
235#endif /* __POWERPC_CPUTIME_H */ 235#endif /* __POWERPC_CPUTIME_H */
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 531fe0c3108f..b1e7f2af1016 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -145,7 +145,7 @@ struct dtl_entry {
145extern struct kmem_cache *dtl_cache; 145extern struct kmem_cache *dtl_cache;
146 146
147/* 147/*
148 * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls 148 * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
149 * reading from the dispatch trace log. If other code wants to consume 149 * reading from the dispatch trace log. If other code wants to consume
150 * DTL entries, it can set this pointer to a function that will get 150 * DTL entries, it can set this pointer to a function that will get
151 * called once for each DTL entry that gets processed. 151 * called once for each DTL entry that gets processed.
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 9710be3a2d17..136bba62efa4 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/types.h> 12#include <linux/types.h>
13#include <asm/hw_irq.h> 13#include <asm/hw_irq.h>
14#include <linux/device.h>
14 15
15#define MAX_HWEVENTS 8 16#define MAX_HWEVENTS 8
16#define MAX_EVENT_ALTERNATIVES 8 17#define MAX_EVENT_ALTERNATIVES 8
@@ -35,6 +36,7 @@ struct power_pmu {
35 void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); 36 void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
36 int (*limited_pmc_event)(u64 event_id); 37 int (*limited_pmc_event)(u64 event_id);
37 u32 flags; 38 u32 flags;
39 const struct attribute_group **attr_groups;
38 int n_generic; 40 int n_generic;
39 int *generic_events; 41 int *generic_events;
40 int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] 42 int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
@@ -109,3 +111,27 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
109 * If an event_id is not subject to the constraint expressed by a particular 111 * If an event_id is not subject to the constraint expressed by a particular
110 * field, then it will have 0 in both the mask and value for that field. 112 * field, then it will have 0 in both the mask and value for that field.
111 */ 113 */
114
115extern ssize_t power_events_sysfs_show(struct device *dev,
116 struct device_attribute *attr, char *page);
117
118/*
119 * EVENT_VAR() is same as PMU_EVENT_VAR with a suffix.
120 *
121 * Having a suffix allows us to have aliases in sysfs - eg: the generic
122 * event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and
123 * 'PM_CYC' where the latter is the name by which the event is known in
124 * POWER CPU specification.
125 */
126#define EVENT_VAR(_id, _suffix) event_attr_##_id##_suffix
127#define EVENT_PTR(_id, _suffix) &EVENT_VAR(_id, _suffix).attr.attr
128
129#define EVENT_ATTR(_name, _id, _suffix) \
130 PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_PM_##_id, \
131 power_events_sysfs_show)
132
133#define GENERIC_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _g)
134#define GENERIC_EVENT_PTR(_id) EVENT_PTR(_id, _g)
135
136#define POWER_EVENT_ATTR(_name, _id) EVENT_ATTR(PM_##_name, _id, _p)
137#define POWER_EVENT_PTR(_id) EVENT_PTR(_id, _p)
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index ea2a86e8ff95..2d0e1f5d8339 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -24,7 +24,7 @@
24 * user_time and system_time fields in the paca. 24 * user_time and system_time fields in the paca.
25 */ 25 */
26 26
27#ifndef CONFIG_VIRT_CPU_ACCOUNTING 27#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
28#define ACCOUNT_CPU_USER_ENTRY(ra, rb) 28#define ACCOUNT_CPU_USER_ENTRY(ra, rb)
29#define ACCOUNT_CPU_USER_EXIT(ra, rb) 29#define ACCOUNT_CPU_USER_EXIT(ra, rb)
30#define ACCOUNT_STOLEN_TIME 30#define ACCOUNT_STOLEN_TIME
@@ -70,7 +70,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
70 70
71#endif /* CONFIG_PPC_SPLPAR */ 71#endif /* CONFIG_PPC_SPLPAR */
72 72
73#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 73#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
74 74
75/* 75/*
76 * Macros for storing registers into and loading registers from 76 * Macros for storing registers into and loading registers from
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 3d990d3bd8ba..ac057013f9fd 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -94,7 +94,7 @@ system_call_common:
94 addi r9,r1,STACK_FRAME_OVERHEAD 94 addi r9,r1,STACK_FRAME_OVERHEAD
95 ld r11,exception_marker@toc(r2) 95 ld r11,exception_marker@toc(r2)
96 std r11,-16(r9) /* "regshere" marker */ 96 std r11,-16(r9) /* "regshere" marker */
97#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR) 97#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
98BEGIN_FW_FTR_SECTION 98BEGIN_FW_FTR_SECTION
99 beq 33f 99 beq 33f
100 /* if from user, see if there are any DTL entries to process */ 100 /* if from user, see if there are any DTL entries to process */
@@ -110,7 +110,7 @@ BEGIN_FW_FTR_SECTION
110 addi r9,r1,STACK_FRAME_OVERHEAD 110 addi r9,r1,STACK_FRAME_OVERHEAD
11133: 11133:
112END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) 112END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
113#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ 113#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
114 114
115 /* 115 /*
116 * A syscall should always be called with interrupts enabled 116 * A syscall should always be called with interrupts enabled
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 127361e093f4..f77fa22754bc 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
143unsigned long ppc_tb_freq; 143unsigned long ppc_tb_freq;
144EXPORT_SYMBOL_GPL(ppc_tb_freq); 144EXPORT_SYMBOL_GPL(ppc_tb_freq);
145 145
146#ifdef CONFIG_VIRT_CPU_ACCOUNTING 146#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
147/* 147/*
148 * Factors for converting from cputime_t (timebase ticks) to 148 * Factors for converting from cputime_t (timebase ticks) to
149 * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). 149 * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
@@ -347,6 +347,7 @@ void vtime_account_system(struct task_struct *tsk)
347 if (stolen) 347 if (stolen)
348 account_steal_time(stolen); 348 account_steal_time(stolen);
349} 349}
350EXPORT_SYMBOL_GPL(vtime_account_system);
350 351
351void vtime_account_idle(struct task_struct *tsk) 352void vtime_account_idle(struct task_struct *tsk)
352{ 353{
@@ -377,7 +378,7 @@ void vtime_account_user(struct task_struct *tsk)
377 account_user_time(tsk, utime, utimescaled); 378 account_user_time(tsk, utime, utimescaled);
378} 379}
379 380
380#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ 381#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
381#define calc_cputime_factors() 382#define calc_cputime_factors()
382#endif 383#endif
383 384
@@ -668,7 +669,7 @@ int update_persistent_clock(struct timespec now)
668 struct rtc_time tm; 669 struct rtc_time tm;
669 670
670 if (!ppc_md.set_rtc_time) 671 if (!ppc_md.set_rtc_time)
671 return 0; 672 return -ENODEV;
672 673
673 to_tm(now.tv_sec + 1 + timezone_offset, &tm); 674 to_tm(now.tv_sec + 1 + timezone_offset, &tm);
674 tm.tm_year -= 1900; 675 tm.tm_year -= 1900;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index aa2465e21f1a..fa476d50791f 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1305,6 +1305,16 @@ static int power_pmu_event_idx(struct perf_event *event)
1305 return event->hw.idx; 1305 return event->hw.idx;
1306} 1306}
1307 1307
1308ssize_t power_events_sysfs_show(struct device *dev,
1309 struct device_attribute *attr, char *page)
1310{
1311 struct perf_pmu_events_attr *pmu_attr;
1312
1313 pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
1314
1315 return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
1316}
1317
1308struct pmu power_pmu = { 1318struct pmu power_pmu = {
1309 .pmu_enable = power_pmu_enable, 1319 .pmu_enable = power_pmu_enable,
1310 .pmu_disable = power_pmu_disable, 1320 .pmu_disable = power_pmu_disable,
@@ -1537,6 +1547,8 @@ int __cpuinit register_power_pmu(struct power_pmu *pmu)
1537 pr_info("%s performance monitor hardware support registered\n", 1547 pr_info("%s performance monitor hardware support registered\n",
1538 pmu->name); 1548 pmu->name);
1539 1549
1550 power_pmu.attr_groups = ppmu->attr_groups;
1551
1540#ifdef MSR_HV 1552#ifdef MSR_HV
1541 /* 1553 /*
1542 * Use FCHV to ignore kernel events if MSR.HV is set. 1554 * Use FCHV to ignore kernel events if MSR.HV is set.
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 2ee01e38d5e2..b554879bd31e 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -51,6 +51,18 @@
51#define MMCR1_PMCSEL_MSK 0xff 51#define MMCR1_PMCSEL_MSK 0xff
52 52
53/* 53/*
54 * Power7 event codes.
55 */
56#define PME_PM_CYC 0x1e
57#define PME_PM_GCT_NOSLOT_CYC 0x100f8
58#define PME_PM_CMPLU_STALL 0x4000a
59#define PME_PM_INST_CMPL 0x2
60#define PME_PM_LD_REF_L1 0xc880
61#define PME_PM_LD_MISS_L1 0x400f0
62#define PME_PM_BRU_FIN 0x10068
63#define PME_PM_BRU_MPRED 0x400f6
64
65/*
54 * Layout of constraint bits: 66 * Layout of constraint bits:
55 * 6666555555555544444444443333333333222222222211111111110000000000 67 * 6666555555555544444444443333333333222222222211111111110000000000
56 * 3210987654321098765432109876543210987654321098765432109876543210 68 * 3210987654321098765432109876543210987654321098765432109876543210
@@ -307,14 +319,14 @@ static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[])
307} 319}
308 320
309static int power7_generic_events[] = { 321static int power7_generic_events[] = {
310 [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, 322 [PERF_COUNT_HW_CPU_CYCLES] = PME_PM_CYC,
311 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x100f8, /* GCT_NOSLOT_CYC */ 323 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PME_PM_GCT_NOSLOT_CYC,
312 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x4000a, /* CMPLU_STALL */ 324 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PME_PM_CMPLU_STALL,
313 [PERF_COUNT_HW_INSTRUCTIONS] = 2, 325 [PERF_COUNT_HW_INSTRUCTIONS] = PME_PM_INST_CMPL,
314 [PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU*/ 326 [PERF_COUNT_HW_CACHE_REFERENCES] = PME_PM_LD_REF_L1,
315 [PERF_COUNT_HW_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */ 327 [PERF_COUNT_HW_CACHE_MISSES] = PME_PM_LD_MISS_L1,
316 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */ 328 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PME_PM_BRU_FIN,
317 [PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */ 329 [PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BRU_MPRED,
318}; 330};
319 331
320#define C(x) PERF_COUNT_HW_CACHE_##x 332#define C(x) PERF_COUNT_HW_CACHE_##x
@@ -362,6 +374,57 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
362 }, 374 },
363}; 375};
364 376
377
378GENERIC_EVENT_ATTR(cpu-cycles, CYC);
379GENERIC_EVENT_ATTR(stalled-cycles-frontend, GCT_NOSLOT_CYC);
380GENERIC_EVENT_ATTR(stalled-cycles-backend, CMPLU_STALL);
381GENERIC_EVENT_ATTR(instructions, INST_CMPL);
382GENERIC_EVENT_ATTR(cache-references, LD_REF_L1);
383GENERIC_EVENT_ATTR(cache-misses, LD_MISS_L1);
384GENERIC_EVENT_ATTR(branch-instructions, BRU_FIN);
385GENERIC_EVENT_ATTR(branch-misses, BRU_MPRED);
386
387POWER_EVENT_ATTR(CYC, CYC);
388POWER_EVENT_ATTR(GCT_NOSLOT_CYC, GCT_NOSLOT_CYC);
389POWER_EVENT_ATTR(CMPLU_STALL, CMPLU_STALL);
390POWER_EVENT_ATTR(INST_CMPL, INST_CMPL);
391POWER_EVENT_ATTR(LD_REF_L1, LD_REF_L1);
392POWER_EVENT_ATTR(LD_MISS_L1, LD_MISS_L1);
393POWER_EVENT_ATTR(BRU_FIN, BRU_FIN)
394POWER_EVENT_ATTR(BRU_MPRED, BRU_MPRED);
395
396static struct attribute *power7_events_attr[] = {
397 GENERIC_EVENT_PTR(CYC),
398 GENERIC_EVENT_PTR(GCT_NOSLOT_CYC),
399 GENERIC_EVENT_PTR(CMPLU_STALL),
400 GENERIC_EVENT_PTR(INST_CMPL),
401 GENERIC_EVENT_PTR(LD_REF_L1),
402 GENERIC_EVENT_PTR(LD_MISS_L1),
403 GENERIC_EVENT_PTR(BRU_FIN),
404 GENERIC_EVENT_PTR(BRU_MPRED),
405
406 POWER_EVENT_PTR(CYC),
407 POWER_EVENT_PTR(GCT_NOSLOT_CYC),
408 POWER_EVENT_PTR(CMPLU_STALL),
409 POWER_EVENT_PTR(INST_CMPL),
410 POWER_EVENT_PTR(LD_REF_L1),
411 POWER_EVENT_PTR(LD_MISS_L1),
412 POWER_EVENT_PTR(BRU_FIN),
413 POWER_EVENT_PTR(BRU_MPRED),
414 NULL
415};
416
417
418static struct attribute_group power7_pmu_events_group = {
419 .name = "events",
420 .attrs = power7_events_attr,
421};
422
423static const struct attribute_group *power7_pmu_attr_groups[] = {
424 &power7_pmu_events_group,
425 NULL,
426};
427
365static struct power_pmu power7_pmu = { 428static struct power_pmu power7_pmu = {
366 .name = "POWER7", 429 .name = "POWER7",
367 .n_counter = 6, 430 .n_counter = 6,
@@ -373,6 +436,7 @@ static struct power_pmu power7_pmu = {
373 .get_alternatives = power7_get_alternatives, 436 .get_alternatives = power7_get_alternatives,
374 .disable_pmc = power7_disable_pmc, 437 .disable_pmc = power7_disable_pmc,
375 .flags = PPMU_ALT_SIPR, 438 .flags = PPMU_ALT_SIPR,
439 .attr_groups = power7_pmu_attr_groups,
376 .n_generic = ARRAY_SIZE(power7_generic_events), 440 .n_generic = ARRAY_SIZE(power7_generic_events),
377 .generic_events = power7_generic_events, 441 .generic_events = power7_generic_events,
378 .cache_events = &power7_cache_events, 442 .cache_events = &power7_cache_events,
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 25db92a8e1cf..49318385d4fa 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -24,6 +24,7 @@
24 24
25#include <linux/errno.h> 25#include <linux/errno.h>
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/sched/rt.h>
27#include <linux/kernel.h> 28#include <linux/kernel.h>
28#include <linux/mm.h> 29#include <linux/mm.h>
29#include <linux/slab.h> 30#include <linux/slab.h>
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index a7648543c59e..0cc0ac07a55d 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -57,7 +57,7 @@ static u8 dtl_event_mask = 0x7;
57 */ 57 */
58static int dtl_buf_entries = N_DISPATCH_LOG; 58static int dtl_buf_entries = N_DISPATCH_LOG;
59 59
60#ifdef CONFIG_VIRT_CPU_ACCOUNTING 60#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
61struct dtl_ring { 61struct dtl_ring {
62 u64 write_index; 62 u64 write_index;
63 struct dtl_entry *write_ptr; 63 struct dtl_entry *write_ptr;
@@ -142,7 +142,7 @@ static u64 dtl_current_index(struct dtl *dtl)
142 return per_cpu(dtl_rings, dtl->cpu).write_index; 142 return per_cpu(dtl_rings, dtl->cpu).write_index;
143} 143}
144 144
145#else /* CONFIG_VIRT_CPU_ACCOUNTING */ 145#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
146 146
147static int dtl_start(struct dtl *dtl) 147static int dtl_start(struct dtl *dtl)
148{ 148{
@@ -188,7 +188,7 @@ static u64 dtl_current_index(struct dtl *dtl)
188{ 188{
189 return lppaca_of(dtl->cpu).dtl_idx; 189 return lppaca_of(dtl->cpu).dtl_idx;
190} 190}
191#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 191#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
192 192
193static int dtl_enable(struct dtl *dtl) 193static int dtl_enable(struct dtl *dtl)
194{ 194{
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index ca55882465d6..527e12c9573b 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = {
281 281
282struct kmem_cache *dtl_cache; 282struct kmem_cache *dtl_cache;
283 283
284#ifdef CONFIG_VIRT_CPU_ACCOUNTING 284#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
285/* 285/*
286 * Allocate space for the dispatch trace log for all possible cpus 286 * Allocate space for the dispatch trace log for all possible cpus
287 * and register the buffers with the hypervisor. This is used for 287 * and register the buffers with the hypervisor. This is used for
@@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void)
332 332
333 return 0; 333 return 0;
334} 334}
335#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ 335#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
336static inline int alloc_dispatch_logs(void) 336static inline int alloc_dispatch_logs(void)
337{ 337{
338 return 0; 338 return 0;
339} 339}
340#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 340#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
341 341
342static int alloc_dispatch_log_kmem_cache(void) 342static int alloc_dispatch_log_kmem_cache(void)
343{ 343{
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b5ea38c25647..c15ba7d1be64 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -78,7 +78,6 @@ config S390
78 select HAVE_KVM if 64BIT 78 select HAVE_KVM if 64BIT
79 select HAVE_ARCH_TRACEHOOK 79 select HAVE_ARCH_TRACEHOOK
80 select INIT_ALL_POSSIBLE 80 select INIT_ALL_POSSIBLE
81 select HAVE_IRQ_WORK
82 select HAVE_PERF_EVENTS 81 select HAVE_PERF_EVENTS
83 select ARCH_HAVE_NMI_SAFE_CMPXCHG 82 select ARCH_HAVE_NMI_SAFE_CMPXCHG
84 select HAVE_DEBUG_KMEMLEAK 83 select HAVE_DEBUG_KMEMLEAK
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index a5f4f5a1d24b..0aa98db8a80d 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -120,6 +120,9 @@ static int s390_next_ktime(ktime_t expires,
120 nsecs = ktime_to_ns(ktime_add(timespec_to_ktime(ts), expires)); 120 nsecs = ktime_to_ns(ktime_add(timespec_to_ktime(ts), expires));
121 do_div(nsecs, 125); 121 do_div(nsecs, 125);
122 S390_lowcore.clock_comparator = sched_clock_base_cc + (nsecs << 9); 122 S390_lowcore.clock_comparator = sched_clock_base_cc + (nsecs << 9);
123 /* Program the maximum value if we have an overflow (== year 2042) */
124 if (unlikely(S390_lowcore.clock_comparator < sched_clock_base_cc))
125 S390_lowcore.clock_comparator = -1ULL;
123 set_clock_comparator(S390_lowcore.clock_comparator); 126 set_clock_comparator(S390_lowcore.clock_comparator);
124 return 0; 127 return 0;
125} 128}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index e84b8b68444a..ce9cc5aa2033 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -127,7 +127,7 @@ void vtime_account_user(struct task_struct *tsk)
127 * Update process times based on virtual cpu times stored by entry.S 127 * Update process times based on virtual cpu times stored by entry.S
128 * to the lowcore fields user_timer, system_timer & steal_clock. 128 * to the lowcore fields user_timer, system_timer & steal_clock.
129 */ 129 */
130void vtime_account(struct task_struct *tsk) 130void vtime_account_irq_enter(struct task_struct *tsk)
131{ 131{
132 struct thread_info *ti = task_thread_info(tsk); 132 struct thread_info *ti = task_thread_info(tsk);
133 u64 timer, system; 133 u64 timer, system;
@@ -145,10 +145,10 @@ void vtime_account(struct task_struct *tsk)
145 145
146 virt_timer_forward(system); 146 virt_timer_forward(system);
147} 147}
148EXPORT_SYMBOL_GPL(vtime_account); 148EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
149 149
150void vtime_account_system(struct task_struct *tsk) 150void vtime_account_system(struct task_struct *tsk)
151__attribute__((alias("vtime_account"))); 151__attribute__((alias("vtime_account_irq_enter")));
152EXPORT_SYMBOL_GPL(vtime_account_system); 152EXPORT_SYMBOL_GPL(vtime_account_system);
153 153
154void __kprobes vtime_stop_cpu(void) 154void __kprobes vtime_stop_cpu(void)
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index babc2b826c5c..9c833c585871 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -11,7 +11,6 @@ config SUPERH
11 select HAVE_ARCH_TRACEHOOK 11 select HAVE_ARCH_TRACEHOOK
12 select HAVE_DMA_API_DEBUG 12 select HAVE_DMA_API_DEBUG
13 select HAVE_DMA_ATTRS 13 select HAVE_DMA_ATTRS
14 select HAVE_IRQ_WORK
15 select HAVE_PERF_EVENTS 14 select HAVE_PERF_EVENTS
16 select HAVE_DEBUG_BUGVERBOSE 15 select HAVE_DEBUG_BUGVERBOSE
17 select ARCH_HAVE_CUSTOM_GPIO_H 16 select ARCH_HAVE_CUSTOM_GPIO_H
@@ -91,9 +90,6 @@ config GENERIC_CSUM
91config GENERIC_HWEIGHT 90config GENERIC_HWEIGHT
92 def_bool y 91 def_bool y
93 92
94config IRQ_PER_CPU
95 def_bool y
96
97config GENERIC_GPIO 93config GENERIC_GPIO
98 def_bool n 94 def_bool n
99 95
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 9f2edb5c5551..9bff3db17c8c 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -23,7 +23,6 @@ config SPARC
23 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE 23 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
24 select RTC_CLASS 24 select RTC_CLASS
25 select RTC_DRV_M48T59 25 select RTC_DRV_M48T59
26 select HAVE_IRQ_WORK
27 select HAVE_DMA_ATTRS 26 select HAVE_DMA_ATTRS
28 select HAVE_DMA_API_DEBUG 27 select HAVE_DMA_API_DEBUG
29 select HAVE_ARCH_JUMP_LABEL 28 select HAVE_ARCH_JUMP_LABEL
@@ -61,6 +60,7 @@ config SPARC64
61 select HAVE_MEMBLOCK 60 select HAVE_MEMBLOCK
62 select HAVE_MEMBLOCK_NODE_MAP 61 select HAVE_MEMBLOCK_NODE_MAP
63 select HAVE_SYSCALL_WRAPPERS 62 select HAVE_SYSCALL_WRAPPERS
63 select HAVE_ARCH_TRANSPARENT_HUGEPAGE
64 select HAVE_DYNAMIC_FTRACE 64 select HAVE_DYNAMIC_FTRACE
65 select HAVE_FTRACE_MCOUNT_RECORD 65 select HAVE_FTRACE_MCOUNT_RECORD
66 select HAVE_SYSCALL_TRACEPOINTS 66 select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 7870be0f5adc..08fcce90316b 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -71,7 +71,6 @@
71#define PMD_PADDR _AC(0xfffffffe,UL) 71#define PMD_PADDR _AC(0xfffffffe,UL)
72#define PMD_PADDR_SHIFT _AC(11,UL) 72#define PMD_PADDR_SHIFT _AC(11,UL)
73 73
74#ifdef CONFIG_TRANSPARENT_HUGEPAGE
75#define PMD_ISHUGE _AC(0x00000001,UL) 74#define PMD_ISHUGE _AC(0x00000001,UL)
76 75
77/* This is the PMD layout when PMD_ISHUGE is set. With 4MB huge 76/* This is the PMD layout when PMD_ISHUGE is set. With 4MB huge
@@ -86,7 +85,6 @@
86#define PMD_HUGE_ACCESSED _AC(0x00000080,UL) 85#define PMD_HUGE_ACCESSED _AC(0x00000080,UL)
87#define PMD_HUGE_EXEC _AC(0x00000040,UL) 86#define PMD_HUGE_EXEC _AC(0x00000040,UL)
88#define PMD_HUGE_SPLITTING _AC(0x00000020,UL) 87#define PMD_HUGE_SPLITTING _AC(0x00000020,UL)
89#endif
90 88
91/* PGDs point to PMD tables which are 8K aligned. */ 89/* PGDs point to PMD tables which are 8K aligned. */
92#define PGD_PADDR _AC(0xfffffffc,UL) 90#define PGD_PADDR _AC(0xfffffffc,UL)
@@ -628,6 +626,12 @@ static inline unsigned long pte_special(pte_t pte)
628 return pte_val(pte) & _PAGE_SPECIAL; 626 return pte_val(pte) & _PAGE_SPECIAL;
629} 627}
630 628
629static inline int pmd_large(pmd_t pmd)
630{
631 return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) ==
632 (PMD_ISHUGE | PMD_HUGE_PRESENT);
633}
634
631#ifdef CONFIG_TRANSPARENT_HUGEPAGE 635#ifdef CONFIG_TRANSPARENT_HUGEPAGE
632static inline int pmd_young(pmd_t pmd) 636static inline int pmd_young(pmd_t pmd)
633{ 637{
@@ -646,12 +650,6 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
646 return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT); 650 return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT);
647} 651}
648 652
649static inline int pmd_large(pmd_t pmd)
650{
651 return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) ==
652 (PMD_ISHUGE | PMD_HUGE_PRESENT);
653}
654
655static inline int pmd_trans_splitting(pmd_t pmd) 653static inline int pmd_trans_splitting(pmd_t pmd)
656{ 654{
657 return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) == 655 return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) ==
diff --git a/arch/sparc/kernel/sbus.c b/arch/sparc/kernel/sbus.c
index 1271b3a27d4e..be5bdf93c767 100644
--- a/arch/sparc/kernel/sbus.c
+++ b/arch/sparc/kernel/sbus.c
@@ -554,10 +554,8 @@ static void __init sbus_iommu_init(struct platform_device *op)
554 regs = pr->phys_addr; 554 regs = pr->phys_addr;
555 555
556 iommu = kzalloc(sizeof(*iommu), GFP_ATOMIC); 556 iommu = kzalloc(sizeof(*iommu), GFP_ATOMIC);
557 if (!iommu)
558 goto fatal_memory_error;
559 strbuf = kzalloc(sizeof(*strbuf), GFP_ATOMIC); 557 strbuf = kzalloc(sizeof(*strbuf), GFP_ATOMIC);
560 if (!strbuf) 558 if (!iommu || !strbuf)
561 goto fatal_memory_error; 559 goto fatal_memory_error;
562 560
563 op->dev.archdata.iommu = iommu; 561 op->dev.archdata.iommu = iommu;
@@ -656,6 +654,8 @@ static void __init sbus_iommu_init(struct platform_device *op)
656 return; 654 return;
657 655
658fatal_memory_error: 656fatal_memory_error:
657 kfree(iommu);
658 kfree(strbuf);
659 prom_printf("sbus_iommu_init: Fatal memory allocation error.\n"); 659 prom_printf("sbus_iommu_init: Fatal memory allocation error.\n");
660} 660}
661 661
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 42c55df3aec3..01ee23dd724d 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -66,6 +66,56 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
66 return 1; 66 return 1;
67} 67}
68 68
69static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
70 unsigned long end, int write, struct page **pages,
71 int *nr)
72{
73 struct page *head, *page, *tail;
74 u32 mask;
75 int refs;
76
77 mask = PMD_HUGE_PRESENT;
78 if (write)
79 mask |= PMD_HUGE_WRITE;
80 if ((pmd_val(pmd) & mask) != mask)
81 return 0;
82
83 refs = 0;
84 head = pmd_page(pmd);
85 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
86 tail = page;
87 do {
88 VM_BUG_ON(compound_head(page) != head);
89 pages[*nr] = page;
90 (*nr)++;
91 page++;
92 refs++;
93 } while (addr += PAGE_SIZE, addr != end);
94
95 if (!page_cache_add_speculative(head, refs)) {
96 *nr -= refs;
97 return 0;
98 }
99
100 if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
101 *nr -= refs;
102 while (refs--)
103 put_page(head);
104 return 0;
105 }
106
107 /* Any tail page need their mapcount reference taken before we
108 * return.
109 */
110 while (refs--) {
111 if (PageTail(tail))
112 get_huge_page_tail(tail);
113 tail++;
114 }
115
116 return 1;
117}
118
69static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, 119static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
70 int write, struct page **pages, int *nr) 120 int write, struct page **pages, int *nr)
71{ 121{
@@ -77,9 +127,14 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
77 pmd_t pmd = *pmdp; 127 pmd_t pmd = *pmdp;
78 128
79 next = pmd_addr_end(addr, end); 129 next = pmd_addr_end(addr, end);
80 if (pmd_none(pmd)) 130 if (pmd_none(pmd) || pmd_trans_splitting(pmd))
81 return 0; 131 return 0;
82 if (!gup_pte_range(pmd, addr, next, write, pages, nr)) 132 if (unlikely(pmd_large(pmd))) {
133 if (!gup_huge_pmd(pmdp, pmd, addr, next,
134 write, pages, nr))
135 return 0;
136 } else if (!gup_pte_range(pmd, addr, next, write,
137 pages, nr))
83 return 0; 138 return 0;
84 } while (pmdp++, addr = next, addr != end); 139 } while (pmdp++, addr = next, addr != end);
85 140
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 875d008828b8..1bb7ad4aeff4 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -140,6 +140,8 @@ config ARCH_DEFCONFIG
140 140
141source "init/Kconfig" 141source "init/Kconfig"
142 142
143source "kernel/Kconfig.freezer"
144
143menu "Tilera-specific configuration" 145menu "Tilera-specific configuration"
144 146
145config NR_CPUS 147config NR_CPUS
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index 2a9b293fece6..31672918064c 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -250,7 +250,9 @@ static inline void writeq(u64 val, unsigned long addr)
250#define iowrite32 writel 250#define iowrite32 writel
251#define iowrite64 writeq 251#define iowrite64 writeq
252 252
253static inline void memset_io(void *dst, int val, size_t len) 253#if CHIP_HAS_MMIO() || defined(CONFIG_PCI)
254
255static inline void memset_io(volatile void *dst, int val, size_t len)
254{ 256{
255 int x; 257 int x;
256 BUG_ON((unsigned long)dst & 0x3); 258 BUG_ON((unsigned long)dst & 0x3);
@@ -277,6 +279,8 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
277 writel(*(u32 *)(src + x), dst + x); 279 writel(*(u32 *)(src + x), dst + x);
278} 280}
279 281
282#endif
283
280/* 284/*
281 * The Tile architecture does not support IOPORT, even with PCI. 285 * The Tile architecture does not support IOPORT, even with PCI.
282 * Unfortunately we can't yet simply not declare these methods, 286 * Unfortunately we can't yet simply not declare these methods,
diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h
index b4e96fef2cf8..241c0bb60b12 100644
--- a/arch/tile/include/asm/irqflags.h
+++ b/arch/tile/include/asm/irqflags.h
@@ -18,32 +18,20 @@
18#include <arch/interrupts.h> 18#include <arch/interrupts.h>
19#include <arch/chip.h> 19#include <arch/chip.h>
20 20
21#if !defined(__tilegx__) && defined(__ASSEMBLY__)
22
23/* 21/*
24 * The set of interrupts we want to allow when interrupts are nominally 22 * The set of interrupts we want to allow when interrupts are nominally
25 * disabled. The remainder are effectively "NMI" interrupts from 23 * disabled. The remainder are effectively "NMI" interrupts from
26 * the point of view of the generic Linux code. Note that synchronous 24 * the point of view of the generic Linux code. Note that synchronous
27 * interrupts (aka "non-queued") are not blocked by the mask in any case. 25 * interrupts (aka "non-queued") are not blocked by the mask in any case.
28 */ 26 */
29#if CHIP_HAS_AUX_PERF_COUNTERS()
30#define LINUX_MASKABLE_INTERRUPTS_HI \
31 (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT)))
32#else
33#define LINUX_MASKABLE_INTERRUPTS_HI \
34 (~(INT_MASK_HI(INT_PERF_COUNT)))
35#endif
36
37#else
38
39#if CHIP_HAS_AUX_PERF_COUNTERS()
40#define LINUX_MASKABLE_INTERRUPTS \
41 (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT)))
42#else
43#define LINUX_MASKABLE_INTERRUPTS \ 27#define LINUX_MASKABLE_INTERRUPTS \
44 (~(INT_MASK(INT_PERF_COUNT))) 28 (~((_AC(1,ULL) << INT_PERF_COUNT) | (_AC(1,ULL) << INT_AUX_PERF_COUNT)))
45#endif
46 29
30#if CHIP_HAS_SPLIT_INTR_MASK()
31/* The same macro, but for the two 32-bit SPRs separately. */
32#define LINUX_MASKABLE_INTERRUPTS_LO (-1)
33#define LINUX_MASKABLE_INTERRUPTS_HI \
34 (~((1 << (INT_PERF_COUNT - 32)) | (1 << (INT_AUX_PERF_COUNT - 32))))
47#endif 35#endif
48 36
49#ifndef __ASSEMBLY__ 37#ifndef __ASSEMBLY__
@@ -126,7 +114,7 @@
126 * to know our current state. 114 * to know our current state.
127 */ 115 */
128DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); 116DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
129#define INITIAL_INTERRUPTS_ENABLED INT_MASK(INT_MEM_ERROR) 117#define INITIAL_INTERRUPTS_ENABLED (1ULL << INT_MEM_ERROR)
130 118
131/* Disable interrupts. */ 119/* Disable interrupts. */
132#define arch_local_irq_disable() \ 120#define arch_local_irq_disable() \
@@ -165,7 +153,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
165 153
166/* Prevent the given interrupt from being enabled next time we enable irqs. */ 154/* Prevent the given interrupt from being enabled next time we enable irqs. */
167#define arch_local_irq_mask(interrupt) \ 155#define arch_local_irq_mask(interrupt) \
168 (__get_cpu_var(interrupts_enabled_mask) &= ~INT_MASK(interrupt)) 156 (__get_cpu_var(interrupts_enabled_mask) &= ~(1ULL << (interrupt)))
169 157
170/* Prevent the given interrupt from being enabled immediately. */ 158/* Prevent the given interrupt from being enabled immediately. */
171#define arch_local_irq_mask_now(interrupt) do { \ 159#define arch_local_irq_mask_now(interrupt) do { \
@@ -175,7 +163,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
175 163
176/* Allow the given interrupt to be enabled next time we enable irqs. */ 164/* Allow the given interrupt to be enabled next time we enable irqs. */
177#define arch_local_irq_unmask(interrupt) \ 165#define arch_local_irq_unmask(interrupt) \
178 (__get_cpu_var(interrupts_enabled_mask) |= INT_MASK(interrupt)) 166 (__get_cpu_var(interrupts_enabled_mask) |= (1ULL << (interrupt)))
179 167
180/* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */ 168/* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */
181#define arch_local_irq_unmask_now(interrupt) do { \ 169#define arch_local_irq_unmask_now(interrupt) do { \
@@ -250,7 +238,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
250/* Disable interrupts. */ 238/* Disable interrupts. */
251#define IRQ_DISABLE(tmp0, tmp1) \ 239#define IRQ_DISABLE(tmp0, tmp1) \
252 { \ 240 { \
253 movei tmp0, -1; \ 241 movei tmp0, LINUX_MASKABLE_INTERRUPTS_LO; \
254 moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI) \ 242 moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI) \
255 }; \ 243 }; \
256 { \ 244 { \
diff --git a/arch/tile/include/uapi/arch/interrupts_32.h b/arch/tile/include/uapi/arch/interrupts_32.h
index 96b5710505b6..2efe3f68b2d6 100644
--- a/arch/tile/include/uapi/arch/interrupts_32.h
+++ b/arch/tile/include/uapi/arch/interrupts_32.h
@@ -15,6 +15,7 @@
15#ifndef __ARCH_INTERRUPTS_H__ 15#ifndef __ARCH_INTERRUPTS_H__
16#define __ARCH_INTERRUPTS_H__ 16#define __ARCH_INTERRUPTS_H__
17 17
18#ifndef __KERNEL__
18/** Mask for an interrupt. */ 19/** Mask for an interrupt. */
19/* Note: must handle breaking interrupts into high and low words manually. */ 20/* Note: must handle breaking interrupts into high and low words manually. */
20#define INT_MASK_LO(intno) (1 << (intno)) 21#define INT_MASK_LO(intno) (1 << (intno))
@@ -23,6 +24,7 @@
23#ifndef __ASSEMBLER__ 24#ifndef __ASSEMBLER__
24#define INT_MASK(intno) (1ULL << (intno)) 25#define INT_MASK(intno) (1ULL << (intno))
25#endif 26#endif
27#endif
26 28
27 29
28/** Where a given interrupt executes */ 30/** Where a given interrupt executes */
@@ -92,216 +94,216 @@
92 94
93#ifndef __ASSEMBLER__ 95#ifndef __ASSEMBLER__
94#define QUEUED_INTERRUPTS ( \ 96#define QUEUED_INTERRUPTS ( \
95 INT_MASK(INT_MEM_ERROR) | \ 97 (1ULL << INT_MEM_ERROR) | \
96 INT_MASK(INT_DMATLB_MISS) | \ 98 (1ULL << INT_DMATLB_MISS) | \
97 INT_MASK(INT_DMATLB_ACCESS) | \ 99 (1ULL << INT_DMATLB_ACCESS) | \
98 INT_MASK(INT_SNITLB_MISS) | \ 100 (1ULL << INT_SNITLB_MISS) | \
99 INT_MASK(INT_SN_NOTIFY) | \ 101 (1ULL << INT_SN_NOTIFY) | \
100 INT_MASK(INT_SN_FIREWALL) | \ 102 (1ULL << INT_SN_FIREWALL) | \
101 INT_MASK(INT_IDN_FIREWALL) | \ 103 (1ULL << INT_IDN_FIREWALL) | \
102 INT_MASK(INT_UDN_FIREWALL) | \ 104 (1ULL << INT_UDN_FIREWALL) | \
103 INT_MASK(INT_TILE_TIMER) | \ 105 (1ULL << INT_TILE_TIMER) | \
104 INT_MASK(INT_IDN_TIMER) | \ 106 (1ULL << INT_IDN_TIMER) | \
105 INT_MASK(INT_UDN_TIMER) | \ 107 (1ULL << INT_UDN_TIMER) | \
106 INT_MASK(INT_DMA_NOTIFY) | \ 108 (1ULL << INT_DMA_NOTIFY) | \
107 INT_MASK(INT_IDN_CA) | \ 109 (1ULL << INT_IDN_CA) | \
108 INT_MASK(INT_UDN_CA) | \ 110 (1ULL << INT_UDN_CA) | \
109 INT_MASK(INT_IDN_AVAIL) | \ 111 (1ULL << INT_IDN_AVAIL) | \
110 INT_MASK(INT_UDN_AVAIL) | \ 112 (1ULL << INT_UDN_AVAIL) | \
111 INT_MASK(INT_PERF_COUNT) | \ 113 (1ULL << INT_PERF_COUNT) | \
112 INT_MASK(INT_INTCTRL_3) | \ 114 (1ULL << INT_INTCTRL_3) | \
113 INT_MASK(INT_INTCTRL_2) | \ 115 (1ULL << INT_INTCTRL_2) | \
114 INT_MASK(INT_INTCTRL_1) | \ 116 (1ULL << INT_INTCTRL_1) | \
115 INT_MASK(INT_INTCTRL_0) | \ 117 (1ULL << INT_INTCTRL_0) | \
116 INT_MASK(INT_BOOT_ACCESS) | \ 118 (1ULL << INT_BOOT_ACCESS) | \
117 INT_MASK(INT_WORLD_ACCESS) | \ 119 (1ULL << INT_WORLD_ACCESS) | \
118 INT_MASK(INT_I_ASID) | \ 120 (1ULL << INT_I_ASID) | \
119 INT_MASK(INT_D_ASID) | \ 121 (1ULL << INT_D_ASID) | \
120 INT_MASK(INT_DMA_ASID) | \ 122 (1ULL << INT_DMA_ASID) | \
121 INT_MASK(INT_SNI_ASID) | \ 123 (1ULL << INT_SNI_ASID) | \
122 INT_MASK(INT_DMA_CPL) | \ 124 (1ULL << INT_DMA_CPL) | \
123 INT_MASK(INT_SN_CPL) | \ 125 (1ULL << INT_SN_CPL) | \
124 INT_MASK(INT_DOUBLE_FAULT) | \ 126 (1ULL << INT_DOUBLE_FAULT) | \
125 INT_MASK(INT_AUX_PERF_COUNT) | \ 127 (1ULL << INT_AUX_PERF_COUNT) | \
126 0) 128 0)
127#define NONQUEUED_INTERRUPTS ( \ 129#define NONQUEUED_INTERRUPTS ( \
128 INT_MASK(INT_ITLB_MISS) | \ 130 (1ULL << INT_ITLB_MISS) | \
129 INT_MASK(INT_ILL) | \ 131 (1ULL << INT_ILL) | \
130 INT_MASK(INT_GPV) | \ 132 (1ULL << INT_GPV) | \
131 INT_MASK(INT_SN_ACCESS) | \ 133 (1ULL << INT_SN_ACCESS) | \
132 INT_MASK(INT_IDN_ACCESS) | \ 134 (1ULL << INT_IDN_ACCESS) | \
133 INT_MASK(INT_UDN_ACCESS) | \ 135 (1ULL << INT_UDN_ACCESS) | \
134 INT_MASK(INT_IDN_REFILL) | \ 136 (1ULL << INT_IDN_REFILL) | \
135 INT_MASK(INT_UDN_REFILL) | \ 137 (1ULL << INT_UDN_REFILL) | \
136 INT_MASK(INT_IDN_COMPLETE) | \ 138 (1ULL << INT_IDN_COMPLETE) | \
137 INT_MASK(INT_UDN_COMPLETE) | \ 139 (1ULL << INT_UDN_COMPLETE) | \
138 INT_MASK(INT_SWINT_3) | \ 140 (1ULL << INT_SWINT_3) | \
139 INT_MASK(INT_SWINT_2) | \ 141 (1ULL << INT_SWINT_2) | \
140 INT_MASK(INT_SWINT_1) | \ 142 (1ULL << INT_SWINT_1) | \
141 INT_MASK(INT_SWINT_0) | \ 143 (1ULL << INT_SWINT_0) | \
142 INT_MASK(INT_UNALIGN_DATA) | \ 144 (1ULL << INT_UNALIGN_DATA) | \
143 INT_MASK(INT_DTLB_MISS) | \ 145 (1ULL << INT_DTLB_MISS) | \
144 INT_MASK(INT_DTLB_ACCESS) | \ 146 (1ULL << INT_DTLB_ACCESS) | \
145 INT_MASK(INT_SN_STATIC_ACCESS) | \ 147 (1ULL << INT_SN_STATIC_ACCESS) | \
146 0) 148 0)
147#define CRITICAL_MASKED_INTERRUPTS ( \ 149#define CRITICAL_MASKED_INTERRUPTS ( \
148 INT_MASK(INT_MEM_ERROR) | \ 150 (1ULL << INT_MEM_ERROR) | \
149 INT_MASK(INT_DMATLB_MISS) | \ 151 (1ULL << INT_DMATLB_MISS) | \
150 INT_MASK(INT_DMATLB_ACCESS) | \ 152 (1ULL << INT_DMATLB_ACCESS) | \
151 INT_MASK(INT_SNITLB_MISS) | \ 153 (1ULL << INT_SNITLB_MISS) | \
152 INT_MASK(INT_SN_NOTIFY) | \ 154 (1ULL << INT_SN_NOTIFY) | \
153 INT_MASK(INT_SN_FIREWALL) | \ 155 (1ULL << INT_SN_FIREWALL) | \
154 INT_MASK(INT_IDN_FIREWALL) | \ 156 (1ULL << INT_IDN_FIREWALL) | \
155 INT_MASK(INT_UDN_FIREWALL) | \ 157 (1ULL << INT_UDN_FIREWALL) | \
156 INT_MASK(INT_TILE_TIMER) | \ 158 (1ULL << INT_TILE_TIMER) | \
157 INT_MASK(INT_IDN_TIMER) | \ 159 (1ULL << INT_IDN_TIMER) | \
158 INT_MASK(INT_UDN_TIMER) | \ 160 (1ULL << INT_UDN_TIMER) | \
159 INT_MASK(INT_DMA_NOTIFY) | \ 161 (1ULL << INT_DMA_NOTIFY) | \
160 INT_MASK(INT_IDN_CA) | \ 162 (1ULL << INT_IDN_CA) | \
161 INT_MASK(INT_UDN_CA) | \ 163 (1ULL << INT_UDN_CA) | \
162 INT_MASK(INT_IDN_AVAIL) | \ 164 (1ULL << INT_IDN_AVAIL) | \
163 INT_MASK(INT_UDN_AVAIL) | \ 165 (1ULL << INT_UDN_AVAIL) | \
164 INT_MASK(INT_PERF_COUNT) | \ 166 (1ULL << INT_PERF_COUNT) | \
165 INT_MASK(INT_INTCTRL_3) | \ 167 (1ULL << INT_INTCTRL_3) | \
166 INT_MASK(INT_INTCTRL_2) | \ 168 (1ULL << INT_INTCTRL_2) | \
167 INT_MASK(INT_INTCTRL_1) | \ 169 (1ULL << INT_INTCTRL_1) | \
168 INT_MASK(INT_INTCTRL_0) | \ 170 (1ULL << INT_INTCTRL_0) | \
169 INT_MASK(INT_AUX_PERF_COUNT) | \ 171 (1ULL << INT_AUX_PERF_COUNT) | \
170 0) 172 0)
171#define CRITICAL_UNMASKED_INTERRUPTS ( \ 173#define CRITICAL_UNMASKED_INTERRUPTS ( \
172 INT_MASK(INT_ITLB_MISS) | \ 174 (1ULL << INT_ITLB_MISS) | \
173 INT_MASK(INT_ILL) | \ 175 (1ULL << INT_ILL) | \
174 INT_MASK(INT_GPV) | \ 176 (1ULL << INT_GPV) | \
175 INT_MASK(INT_SN_ACCESS) | \ 177 (1ULL << INT_SN_ACCESS) | \
176 INT_MASK(INT_IDN_ACCESS) | \ 178 (1ULL << INT_IDN_ACCESS) | \
177 INT_MASK(INT_UDN_ACCESS) | \ 179 (1ULL << INT_UDN_ACCESS) | \
178 INT_MASK(INT_IDN_REFILL) | \ 180 (1ULL << INT_IDN_REFILL) | \
179 INT_MASK(INT_UDN_REFILL) | \ 181 (1ULL << INT_UDN_REFILL) | \
180 INT_MASK(INT_IDN_COMPLETE) | \ 182 (1ULL << INT_IDN_COMPLETE) | \
181 INT_MASK(INT_UDN_COMPLETE) | \ 183 (1ULL << INT_UDN_COMPLETE) | \
182 INT_MASK(INT_SWINT_3) | \ 184 (1ULL << INT_SWINT_3) | \
183 INT_MASK(INT_SWINT_2) | \ 185 (1ULL << INT_SWINT_2) | \
184 INT_MASK(INT_SWINT_1) | \ 186 (1ULL << INT_SWINT_1) | \
185 INT_MASK(INT_SWINT_0) | \ 187 (1ULL << INT_SWINT_0) | \
186 INT_MASK(INT_UNALIGN_DATA) | \ 188 (1ULL << INT_UNALIGN_DATA) | \
187 INT_MASK(INT_DTLB_MISS) | \ 189 (1ULL << INT_DTLB_MISS) | \
188 INT_MASK(INT_DTLB_ACCESS) | \ 190 (1ULL << INT_DTLB_ACCESS) | \
189 INT_MASK(INT_BOOT_ACCESS) | \ 191 (1ULL << INT_BOOT_ACCESS) | \
190 INT_MASK(INT_WORLD_ACCESS) | \ 192 (1ULL << INT_WORLD_ACCESS) | \
191 INT_MASK(INT_I_ASID) | \ 193 (1ULL << INT_I_ASID) | \
192 INT_MASK(INT_D_ASID) | \ 194 (1ULL << INT_D_ASID) | \
193 INT_MASK(INT_DMA_ASID) | \ 195 (1ULL << INT_DMA_ASID) | \
194 INT_MASK(INT_SNI_ASID) | \ 196 (1ULL << INT_SNI_ASID) | \
195 INT_MASK(INT_DMA_CPL) | \ 197 (1ULL << INT_DMA_CPL) | \
196 INT_MASK(INT_SN_CPL) | \ 198 (1ULL << INT_SN_CPL) | \
197 INT_MASK(INT_DOUBLE_FAULT) | \ 199 (1ULL << INT_DOUBLE_FAULT) | \
198 INT_MASK(INT_SN_STATIC_ACCESS) | \ 200 (1ULL << INT_SN_STATIC_ACCESS) | \
199 0) 201 0)
200#define MASKABLE_INTERRUPTS ( \ 202#define MASKABLE_INTERRUPTS ( \
201 INT_MASK(INT_MEM_ERROR) | \ 203 (1ULL << INT_MEM_ERROR) | \
202 INT_MASK(INT_IDN_REFILL) | \ 204 (1ULL << INT_IDN_REFILL) | \
203 INT_MASK(INT_UDN_REFILL) | \ 205 (1ULL << INT_UDN_REFILL) | \
204 INT_MASK(INT_IDN_COMPLETE) | \ 206 (1ULL << INT_IDN_COMPLETE) | \
205 INT_MASK(INT_UDN_COMPLETE) | \ 207 (1ULL << INT_UDN_COMPLETE) | \
206 INT_MASK(INT_DMATLB_MISS) | \ 208 (1ULL << INT_DMATLB_MISS) | \
207 INT_MASK(INT_DMATLB_ACCESS) | \ 209 (1ULL << INT_DMATLB_ACCESS) | \
208 INT_MASK(INT_SNITLB_MISS) | \ 210 (1ULL << INT_SNITLB_MISS) | \
209 INT_MASK(INT_SN_NOTIFY) | \ 211 (1ULL << INT_SN_NOTIFY) | \
210 INT_MASK(INT_SN_FIREWALL) | \ 212 (1ULL << INT_SN_FIREWALL) | \
211 INT_MASK(INT_IDN_FIREWALL) | \ 213 (1ULL << INT_IDN_FIREWALL) | \
212 INT_MASK(INT_UDN_FIREWALL) | \ 214 (1ULL << INT_UDN_FIREWALL) | \
213 INT_MASK(INT_TILE_TIMER) | \ 215 (1ULL << INT_TILE_TIMER) | \
214 INT_MASK(INT_IDN_TIMER) | \ 216 (1ULL << INT_IDN_TIMER) | \
215 INT_MASK(INT_UDN_TIMER) | \ 217 (1ULL << INT_UDN_TIMER) | \
216 INT_MASK(INT_DMA_NOTIFY) | \ 218 (1ULL << INT_DMA_NOTIFY) | \
217 INT_MASK(INT_IDN_CA) | \ 219 (1ULL << INT_IDN_CA) | \
218 INT_MASK(INT_UDN_CA) | \ 220 (1ULL << INT_UDN_CA) | \
219 INT_MASK(INT_IDN_AVAIL) | \ 221 (1ULL << INT_IDN_AVAIL) | \
220 INT_MASK(INT_UDN_AVAIL) | \ 222 (1ULL << INT_UDN_AVAIL) | \
221 INT_MASK(INT_PERF_COUNT) | \ 223 (1ULL << INT_PERF_COUNT) | \
222 INT_MASK(INT_INTCTRL_3) | \ 224 (1ULL << INT_INTCTRL_3) | \
223 INT_MASK(INT_INTCTRL_2) | \ 225 (1ULL << INT_INTCTRL_2) | \
224 INT_MASK(INT_INTCTRL_1) | \ 226 (1ULL << INT_INTCTRL_1) | \
225 INT_MASK(INT_INTCTRL_0) | \ 227 (1ULL << INT_INTCTRL_0) | \
226 INT_MASK(INT_AUX_PERF_COUNT) | \ 228 (1ULL << INT_AUX_PERF_COUNT) | \
227 0) 229 0)
228#define UNMASKABLE_INTERRUPTS ( \ 230#define UNMASKABLE_INTERRUPTS ( \
229 INT_MASK(INT_ITLB_MISS) | \ 231 (1ULL << INT_ITLB_MISS) | \
230 INT_MASK(INT_ILL) | \ 232 (1ULL << INT_ILL) | \
231 INT_MASK(INT_GPV) | \ 233 (1ULL << INT_GPV) | \
232 INT_MASK(INT_SN_ACCESS) | \ 234 (1ULL << INT_SN_ACCESS) | \
233 INT_MASK(INT_IDN_ACCESS) | \ 235 (1ULL << INT_IDN_ACCESS) | \
234 INT_MASK(INT_UDN_ACCESS) | \ 236 (1ULL << INT_UDN_ACCESS) | \
235 INT_MASK(INT_SWINT_3) | \ 237 (1ULL << INT_SWINT_3) | \
236 INT_MASK(INT_SWINT_2) | \ 238 (1ULL << INT_SWINT_2) | \
237 INT_MASK(INT_SWINT_1) | \ 239 (1ULL << INT_SWINT_1) | \
238 INT_MASK(INT_SWINT_0) | \ 240 (1ULL << INT_SWINT_0) | \
239 INT_MASK(INT_UNALIGN_DATA) | \ 241 (1ULL << INT_UNALIGN_DATA) | \
240 INT_MASK(INT_DTLB_MISS) | \ 242 (1ULL << INT_DTLB_MISS) | \
241 INT_MASK(INT_DTLB_ACCESS) | \ 243 (1ULL << INT_DTLB_ACCESS) | \
242 INT_MASK(INT_BOOT_ACCESS) | \ 244 (1ULL << INT_BOOT_ACCESS) | \
243 INT_MASK(INT_WORLD_ACCESS) | \ 245 (1ULL << INT_WORLD_ACCESS) | \
244 INT_MASK(INT_I_ASID) | \ 246 (1ULL << INT_I_ASID) | \
245 INT_MASK(INT_D_ASID) | \ 247 (1ULL << INT_D_ASID) | \
246 INT_MASK(INT_DMA_ASID) | \ 248 (1ULL << INT_DMA_ASID) | \
247 INT_MASK(INT_SNI_ASID) | \ 249 (1ULL << INT_SNI_ASID) | \
248 INT_MASK(INT_DMA_CPL) | \ 250 (1ULL << INT_DMA_CPL) | \
249 INT_MASK(INT_SN_CPL) | \ 251 (1ULL << INT_SN_CPL) | \
250 INT_MASK(INT_DOUBLE_FAULT) | \ 252 (1ULL << INT_DOUBLE_FAULT) | \
251 INT_MASK(INT_SN_STATIC_ACCESS) | \ 253 (1ULL << INT_SN_STATIC_ACCESS) | \
252 0) 254 0)
253#define SYNC_INTERRUPTS ( \ 255#define SYNC_INTERRUPTS ( \
254 INT_MASK(INT_ITLB_MISS) | \ 256 (1ULL << INT_ITLB_MISS) | \
255 INT_MASK(INT_ILL) | \ 257 (1ULL << INT_ILL) | \
256 INT_MASK(INT_GPV) | \ 258 (1ULL << INT_GPV) | \
257 INT_MASK(INT_SN_ACCESS) | \ 259 (1ULL << INT_SN_ACCESS) | \
258 INT_MASK(INT_IDN_ACCESS) | \ 260 (1ULL << INT_IDN_ACCESS) | \
259 INT_MASK(INT_UDN_ACCESS) | \ 261 (1ULL << INT_UDN_ACCESS) | \
260 INT_MASK(INT_IDN_REFILL) | \ 262 (1ULL << INT_IDN_REFILL) | \
261 INT_MASK(INT_UDN_REFILL) | \ 263 (1ULL << INT_UDN_REFILL) | \
262 INT_MASK(INT_IDN_COMPLETE) | \ 264 (1ULL << INT_IDN_COMPLETE) | \
263 INT_MASK(INT_UDN_COMPLETE) | \ 265 (1ULL << INT_UDN_COMPLETE) | \
264 INT_MASK(INT_SWINT_3) | \ 266 (1ULL << INT_SWINT_3) | \
265 INT_MASK(INT_SWINT_2) | \ 267 (1ULL << INT_SWINT_2) | \
266 INT_MASK(INT_SWINT_1) | \ 268 (1ULL << INT_SWINT_1) | \
267 INT_MASK(INT_SWINT_0) | \ 269 (1ULL << INT_SWINT_0) | \
268 INT_MASK(INT_UNALIGN_DATA) | \ 270 (1ULL << INT_UNALIGN_DATA) | \
269 INT_MASK(INT_DTLB_MISS) | \ 271 (1ULL << INT_DTLB_MISS) | \
270 INT_MASK(INT_DTLB_ACCESS) | \ 272 (1ULL << INT_DTLB_ACCESS) | \
271 INT_MASK(INT_SN_STATIC_ACCESS) | \ 273 (1ULL << INT_SN_STATIC_ACCESS) | \
272 0) 274 0)
273#define NON_SYNC_INTERRUPTS ( \ 275#define NON_SYNC_INTERRUPTS ( \
274 INT_MASK(INT_MEM_ERROR) | \ 276 (1ULL << INT_MEM_ERROR) | \
275 INT_MASK(INT_DMATLB_MISS) | \ 277 (1ULL << INT_DMATLB_MISS) | \
276 INT_MASK(INT_DMATLB_ACCESS) | \ 278 (1ULL << INT_DMATLB_ACCESS) | \
277 INT_MASK(INT_SNITLB_MISS) | \ 279 (1ULL << INT_SNITLB_MISS) | \
278 INT_MASK(INT_SN_NOTIFY) | \ 280 (1ULL << INT_SN_NOTIFY) | \
279 INT_MASK(INT_SN_FIREWALL) | \ 281 (1ULL << INT_SN_FIREWALL) | \
280 INT_MASK(INT_IDN_FIREWALL) | \ 282 (1ULL << INT_IDN_FIREWALL) | \
281 INT_MASK(INT_UDN_FIREWALL) | \ 283 (1ULL << INT_UDN_FIREWALL) | \
282 INT_MASK(INT_TILE_TIMER) | \ 284 (1ULL << INT_TILE_TIMER) | \
283 INT_MASK(INT_IDN_TIMER) | \ 285 (1ULL << INT_IDN_TIMER) | \
284 INT_MASK(INT_UDN_TIMER) | \ 286 (1ULL << INT_UDN_TIMER) | \
285 INT_MASK(INT_DMA_NOTIFY) | \ 287 (1ULL << INT_DMA_NOTIFY) | \
286 INT_MASK(INT_IDN_CA) | \ 288 (1ULL << INT_IDN_CA) | \
287 INT_MASK(INT_UDN_CA) | \ 289 (1ULL << INT_UDN_CA) | \
288 INT_MASK(INT_IDN_AVAIL) | \ 290 (1ULL << INT_IDN_AVAIL) | \
289 INT_MASK(INT_UDN_AVAIL) | \ 291 (1ULL << INT_UDN_AVAIL) | \
290 INT_MASK(INT_PERF_COUNT) | \ 292 (1ULL << INT_PERF_COUNT) | \
291 INT_MASK(INT_INTCTRL_3) | \ 293 (1ULL << INT_INTCTRL_3) | \
292 INT_MASK(INT_INTCTRL_2) | \ 294 (1ULL << INT_INTCTRL_2) | \
293 INT_MASK(INT_INTCTRL_1) | \ 295 (1ULL << INT_INTCTRL_1) | \
294 INT_MASK(INT_INTCTRL_0) | \ 296 (1ULL << INT_INTCTRL_0) | \
295 INT_MASK(INT_BOOT_ACCESS) | \ 297 (1ULL << INT_BOOT_ACCESS) | \
296 INT_MASK(INT_WORLD_ACCESS) | \ 298 (1ULL << INT_WORLD_ACCESS) | \
297 INT_MASK(INT_I_ASID) | \ 299 (1ULL << INT_I_ASID) | \
298 INT_MASK(INT_D_ASID) | \ 300 (1ULL << INT_D_ASID) | \
299 INT_MASK(INT_DMA_ASID) | \ 301 (1ULL << INT_DMA_ASID) | \
300 INT_MASK(INT_SNI_ASID) | \ 302 (1ULL << INT_SNI_ASID) | \
301 INT_MASK(INT_DMA_CPL) | \ 303 (1ULL << INT_DMA_CPL) | \
302 INT_MASK(INT_SN_CPL) | \ 304 (1ULL << INT_SN_CPL) | \
303 INT_MASK(INT_DOUBLE_FAULT) | \ 305 (1ULL << INT_DOUBLE_FAULT) | \
304 INT_MASK(INT_AUX_PERF_COUNT) | \ 306 (1ULL << INT_AUX_PERF_COUNT) | \
305 0) 307 0)
306#endif /* !__ASSEMBLER__ */ 308#endif /* !__ASSEMBLER__ */
307#endif /* !__ARCH_INTERRUPTS_H__ */ 309#endif /* !__ARCH_INTERRUPTS_H__ */
diff --git a/arch/tile/include/uapi/arch/interrupts_64.h b/arch/tile/include/uapi/arch/interrupts_64.h
index 5bb58b2e4e6f..13c9f9182348 100644
--- a/arch/tile/include/uapi/arch/interrupts_64.h
+++ b/arch/tile/include/uapi/arch/interrupts_64.h
@@ -15,6 +15,7 @@
15#ifndef __ARCH_INTERRUPTS_H__ 15#ifndef __ARCH_INTERRUPTS_H__
16#define __ARCH_INTERRUPTS_H__ 16#define __ARCH_INTERRUPTS_H__
17 17
18#ifndef __KERNEL__
18/** Mask for an interrupt. */ 19/** Mask for an interrupt. */
19#ifdef __ASSEMBLER__ 20#ifdef __ASSEMBLER__
20/* Note: must handle breaking interrupts into high and low words manually. */ 21/* Note: must handle breaking interrupts into high and low words manually. */
@@ -22,6 +23,7 @@
22#else 23#else
23#define INT_MASK(intno) (1ULL << (intno)) 24#define INT_MASK(intno) (1ULL << (intno))
24#endif 25#endif
26#endif
25 27
26 28
27/** Where a given interrupt executes */ 29/** Where a given interrupt executes */
@@ -85,192 +87,192 @@
85 87
86#ifndef __ASSEMBLER__ 88#ifndef __ASSEMBLER__
87#define QUEUED_INTERRUPTS ( \ 89#define QUEUED_INTERRUPTS ( \
88 INT_MASK(INT_MEM_ERROR) | \ 90 (1ULL << INT_MEM_ERROR) | \
89 INT_MASK(INT_IDN_COMPLETE) | \ 91 (1ULL << INT_IDN_COMPLETE) | \
90 INT_MASK(INT_UDN_COMPLETE) | \ 92 (1ULL << INT_UDN_COMPLETE) | \
91 INT_MASK(INT_IDN_FIREWALL) | \ 93 (1ULL << INT_IDN_FIREWALL) | \
92 INT_MASK(INT_UDN_FIREWALL) | \ 94 (1ULL << INT_UDN_FIREWALL) | \
93 INT_MASK(INT_TILE_TIMER) | \ 95 (1ULL << INT_TILE_TIMER) | \
94 INT_MASK(INT_AUX_TILE_TIMER) | \ 96 (1ULL << INT_AUX_TILE_TIMER) | \
95 INT_MASK(INT_IDN_TIMER) | \ 97 (1ULL << INT_IDN_TIMER) | \
96 INT_MASK(INT_UDN_TIMER) | \ 98 (1ULL << INT_UDN_TIMER) | \
97 INT_MASK(INT_IDN_AVAIL) | \ 99 (1ULL << INT_IDN_AVAIL) | \
98 INT_MASK(INT_UDN_AVAIL) | \ 100 (1ULL << INT_UDN_AVAIL) | \
99 INT_MASK(INT_IPI_3) | \ 101 (1ULL << INT_IPI_3) | \
100 INT_MASK(INT_IPI_2) | \ 102 (1ULL << INT_IPI_2) | \
101 INT_MASK(INT_IPI_1) | \ 103 (1ULL << INT_IPI_1) | \
102 INT_MASK(INT_IPI_0) | \ 104 (1ULL << INT_IPI_0) | \
103 INT_MASK(INT_PERF_COUNT) | \ 105 (1ULL << INT_PERF_COUNT) | \
104 INT_MASK(INT_AUX_PERF_COUNT) | \ 106 (1ULL << INT_AUX_PERF_COUNT) | \
105 INT_MASK(INT_INTCTRL_3) | \ 107 (1ULL << INT_INTCTRL_3) | \
106 INT_MASK(INT_INTCTRL_2) | \ 108 (1ULL << INT_INTCTRL_2) | \
107 INT_MASK(INT_INTCTRL_1) | \ 109 (1ULL << INT_INTCTRL_1) | \
108 INT_MASK(INT_INTCTRL_0) | \ 110 (1ULL << INT_INTCTRL_0) | \
109 INT_MASK(INT_BOOT_ACCESS) | \ 111 (1ULL << INT_BOOT_ACCESS) | \
110 INT_MASK(INT_WORLD_ACCESS) | \ 112 (1ULL << INT_WORLD_ACCESS) | \
111 INT_MASK(INT_I_ASID) | \ 113 (1ULL << INT_I_ASID) | \
112 INT_MASK(INT_D_ASID) | \ 114 (1ULL << INT_D_ASID) | \
113 INT_MASK(INT_DOUBLE_FAULT) | \ 115 (1ULL << INT_DOUBLE_FAULT) | \
114 0) 116 0)
115#define NONQUEUED_INTERRUPTS ( \ 117#define NONQUEUED_INTERRUPTS ( \
116 INT_MASK(INT_SINGLE_STEP_3) | \ 118 (1ULL << INT_SINGLE_STEP_3) | \
117 INT_MASK(INT_SINGLE_STEP_2) | \ 119 (1ULL << INT_SINGLE_STEP_2) | \
118 INT_MASK(INT_SINGLE_STEP_1) | \ 120 (1ULL << INT_SINGLE_STEP_1) | \
119 INT_MASK(INT_SINGLE_STEP_0) | \ 121 (1ULL << INT_SINGLE_STEP_0) | \
120 INT_MASK(INT_ITLB_MISS) | \ 122 (1ULL << INT_ITLB_MISS) | \
121 INT_MASK(INT_ILL) | \ 123 (1ULL << INT_ILL) | \
122 INT_MASK(INT_GPV) | \ 124 (1ULL << INT_GPV) | \
123 INT_MASK(INT_IDN_ACCESS) | \ 125 (1ULL << INT_IDN_ACCESS) | \
124 INT_MASK(INT_UDN_ACCESS) | \ 126 (1ULL << INT_UDN_ACCESS) | \
125 INT_MASK(INT_SWINT_3) | \ 127 (1ULL << INT_SWINT_3) | \
126 INT_MASK(INT_SWINT_2) | \ 128 (1ULL << INT_SWINT_2) | \
127 INT_MASK(INT_SWINT_1) | \ 129 (1ULL << INT_SWINT_1) | \
128 INT_MASK(INT_SWINT_0) | \ 130 (1ULL << INT_SWINT_0) | \
129 INT_MASK(INT_ILL_TRANS) | \ 131 (1ULL << INT_ILL_TRANS) | \
130 INT_MASK(INT_UNALIGN_DATA) | \ 132 (1ULL << INT_UNALIGN_DATA) | \
131 INT_MASK(INT_DTLB_MISS) | \ 133 (1ULL << INT_DTLB_MISS) | \
132 INT_MASK(INT_DTLB_ACCESS) | \ 134 (1ULL << INT_DTLB_ACCESS) | \
133 0) 135 0)
134#define CRITICAL_MASKED_INTERRUPTS ( \ 136#define CRITICAL_MASKED_INTERRUPTS ( \
135 INT_MASK(INT_MEM_ERROR) | \ 137 (1ULL << INT_MEM_ERROR) | \
136 INT_MASK(INT_SINGLE_STEP_3) | \ 138 (1ULL << INT_SINGLE_STEP_3) | \
137 INT_MASK(INT_SINGLE_STEP_2) | \ 139 (1ULL << INT_SINGLE_STEP_2) | \
138 INT_MASK(INT_SINGLE_STEP_1) | \ 140 (1ULL << INT_SINGLE_STEP_1) | \
139 INT_MASK(INT_SINGLE_STEP_0) | \ 141 (1ULL << INT_SINGLE_STEP_0) | \
140 INT_MASK(INT_IDN_COMPLETE) | \ 142 (1ULL << INT_IDN_COMPLETE) | \
141 INT_MASK(INT_UDN_COMPLETE) | \ 143 (1ULL << INT_UDN_COMPLETE) | \
142 INT_MASK(INT_IDN_FIREWALL) | \ 144 (1ULL << INT_IDN_FIREWALL) | \
143 INT_MASK(INT_UDN_FIREWALL) | \ 145 (1ULL << INT_UDN_FIREWALL) | \
144 INT_MASK(INT_TILE_TIMER) | \ 146 (1ULL << INT_TILE_TIMER) | \
145 INT_MASK(INT_AUX_TILE_TIMER) | \ 147 (1ULL << INT_AUX_TILE_TIMER) | \
146 INT_MASK(INT_IDN_TIMER) | \ 148 (1ULL << INT_IDN_TIMER) | \
147 INT_MASK(INT_UDN_TIMER) | \ 149 (1ULL << INT_UDN_TIMER) | \
148 INT_MASK(INT_IDN_AVAIL) | \ 150 (1ULL << INT_IDN_AVAIL) | \
149 INT_MASK(INT_UDN_AVAIL) | \ 151 (1ULL << INT_UDN_AVAIL) | \
150 INT_MASK(INT_IPI_3) | \ 152 (1ULL << INT_IPI_3) | \
151 INT_MASK(INT_IPI_2) | \ 153 (1ULL << INT_IPI_2) | \
152 INT_MASK(INT_IPI_1) | \ 154 (1ULL << INT_IPI_1) | \
153 INT_MASK(INT_IPI_0) | \ 155 (1ULL << INT_IPI_0) | \
154 INT_MASK(INT_PERF_COUNT) | \ 156 (1ULL << INT_PERF_COUNT) | \
155 INT_MASK(INT_AUX_PERF_COUNT) | \ 157 (1ULL << INT_AUX_PERF_COUNT) | \
156 INT_MASK(INT_INTCTRL_3) | \ 158 (1ULL << INT_INTCTRL_3) | \
157 INT_MASK(INT_INTCTRL_2) | \ 159 (1ULL << INT_INTCTRL_2) | \
158 INT_MASK(INT_INTCTRL_1) | \ 160 (1ULL << INT_INTCTRL_1) | \
159 INT_MASK(INT_INTCTRL_0) | \ 161 (1ULL << INT_INTCTRL_0) | \
160 0) 162 0)
161#define CRITICAL_UNMASKED_INTERRUPTS ( \ 163#define CRITICAL_UNMASKED_INTERRUPTS ( \
162 INT_MASK(INT_ITLB_MISS) | \ 164 (1ULL << INT_ITLB_MISS) | \
163 INT_MASK(INT_ILL) | \ 165 (1ULL << INT_ILL) | \
164 INT_MASK(INT_GPV) | \ 166 (1ULL << INT_GPV) | \
165 INT_MASK(INT_IDN_ACCESS) | \ 167 (1ULL << INT_IDN_ACCESS) | \
166 INT_MASK(INT_UDN_ACCESS) | \ 168 (1ULL << INT_UDN_ACCESS) | \
167 INT_MASK(INT_SWINT_3) | \ 169 (1ULL << INT_SWINT_3) | \
168 INT_MASK(INT_SWINT_2) | \ 170 (1ULL << INT_SWINT_2) | \
169 INT_MASK(INT_SWINT_1) | \ 171 (1ULL << INT_SWINT_1) | \
170 INT_MASK(INT_SWINT_0) | \ 172 (1ULL << INT_SWINT_0) | \
171 INT_MASK(INT_ILL_TRANS) | \ 173 (1ULL << INT_ILL_TRANS) | \
172 INT_MASK(INT_UNALIGN_DATA) | \ 174 (1ULL << INT_UNALIGN_DATA) | \
173 INT_MASK(INT_DTLB_MISS) | \ 175 (1ULL << INT_DTLB_MISS) | \
174 INT_MASK(INT_DTLB_ACCESS) | \ 176 (1ULL << INT_DTLB_ACCESS) | \
175 INT_MASK(INT_BOOT_ACCESS) | \ 177 (1ULL << INT_BOOT_ACCESS) | \
176 INT_MASK(INT_WORLD_ACCESS) | \ 178 (1ULL << INT_WORLD_ACCESS) | \
177 INT_MASK(INT_I_ASID) | \ 179 (1ULL << INT_I_ASID) | \
178 INT_MASK(INT_D_ASID) | \ 180 (1ULL << INT_D_ASID) | \
179 INT_MASK(INT_DOUBLE_FAULT) | \ 181 (1ULL << INT_DOUBLE_FAULT) | \
180 0) 182 0)
181#define MASKABLE_INTERRUPTS ( \ 183#define MASKABLE_INTERRUPTS ( \
182 INT_MASK(INT_MEM_ERROR) | \ 184 (1ULL << INT_MEM_ERROR) | \
183 INT_MASK(INT_SINGLE_STEP_3) | \ 185 (1ULL << INT_SINGLE_STEP_3) | \
184 INT_MASK(INT_SINGLE_STEP_2) | \ 186 (1ULL << INT_SINGLE_STEP_2) | \
185 INT_MASK(INT_SINGLE_STEP_1) | \ 187 (1ULL << INT_SINGLE_STEP_1) | \
186 INT_MASK(INT_SINGLE_STEP_0) | \ 188 (1ULL << INT_SINGLE_STEP_0) | \
187 INT_MASK(INT_IDN_COMPLETE) | \ 189 (1ULL << INT_IDN_COMPLETE) | \
188 INT_MASK(INT_UDN_COMPLETE) | \ 190 (1ULL << INT_UDN_COMPLETE) | \
189 INT_MASK(INT_IDN_FIREWALL) | \ 191 (1ULL << INT_IDN_FIREWALL) | \
190 INT_MASK(INT_UDN_FIREWALL) | \ 192 (1ULL << INT_UDN_FIREWALL) | \
191 INT_MASK(INT_TILE_TIMER) | \ 193 (1ULL << INT_TILE_TIMER) | \
192 INT_MASK(INT_AUX_TILE_TIMER) | \ 194 (1ULL << INT_AUX_TILE_TIMER) | \
193 INT_MASK(INT_IDN_TIMER) | \ 195 (1ULL << INT_IDN_TIMER) | \
194 INT_MASK(INT_UDN_TIMER) | \ 196 (1ULL << INT_UDN_TIMER) | \
195 INT_MASK(INT_IDN_AVAIL) | \ 197 (1ULL << INT_IDN_AVAIL) | \
196 INT_MASK(INT_UDN_AVAIL) | \ 198 (1ULL << INT_UDN_AVAIL) | \
197 INT_MASK(INT_IPI_3) | \ 199 (1ULL << INT_IPI_3) | \
198 INT_MASK(INT_IPI_2) | \ 200 (1ULL << INT_IPI_2) | \
199 INT_MASK(INT_IPI_1) | \ 201 (1ULL << INT_IPI_1) | \
200 INT_MASK(INT_IPI_0) | \ 202 (1ULL << INT_IPI_0) | \
201 INT_MASK(INT_PERF_COUNT) | \ 203 (1ULL << INT_PERF_COUNT) | \
202 INT_MASK(INT_AUX_PERF_COUNT) | \ 204 (1ULL << INT_AUX_PERF_COUNT) | \
203 INT_MASK(INT_INTCTRL_3) | \ 205 (1ULL << INT_INTCTRL_3) | \
204 INT_MASK(INT_INTCTRL_2) | \ 206 (1ULL << INT_INTCTRL_2) | \
205 INT_MASK(INT_INTCTRL_1) | \ 207 (1ULL << INT_INTCTRL_1) | \
206 INT_MASK(INT_INTCTRL_0) | \ 208 (1ULL << INT_INTCTRL_0) | \
207 0) 209 0)
208#define UNMASKABLE_INTERRUPTS ( \ 210#define UNMASKABLE_INTERRUPTS ( \
209 INT_MASK(INT_ITLB_MISS) | \ 211 (1ULL << INT_ITLB_MISS) | \
210 INT_MASK(INT_ILL) | \ 212 (1ULL << INT_ILL) | \
211 INT_MASK(INT_GPV) | \ 213 (1ULL << INT_GPV) | \
212 INT_MASK(INT_IDN_ACCESS) | \ 214 (1ULL << INT_IDN_ACCESS) | \
213 INT_MASK(INT_UDN_ACCESS) | \ 215 (1ULL << INT_UDN_ACCESS) | \
214 INT_MASK(INT_SWINT_3) | \ 216 (1ULL << INT_SWINT_3) | \
215 INT_MASK(INT_SWINT_2) | \ 217 (1ULL << INT_SWINT_2) | \
216 INT_MASK(INT_SWINT_1) | \ 218 (1ULL << INT_SWINT_1) | \
217 INT_MASK(INT_SWINT_0) | \ 219 (1ULL << INT_SWINT_0) | \
218 INT_MASK(INT_ILL_TRANS) | \ 220 (1ULL << INT_ILL_TRANS) | \
219 INT_MASK(INT_UNALIGN_DATA) | \ 221 (1ULL << INT_UNALIGN_DATA) | \
220 INT_MASK(INT_DTLB_MISS) | \ 222 (1ULL << INT_DTLB_MISS) | \
221 INT_MASK(INT_DTLB_ACCESS) | \ 223 (1ULL << INT_DTLB_ACCESS) | \
222 INT_MASK(INT_BOOT_ACCESS) | \ 224 (1ULL << INT_BOOT_ACCESS) | \
223 INT_MASK(INT_WORLD_ACCESS) | \ 225 (1ULL << INT_WORLD_ACCESS) | \
224 INT_MASK(INT_I_ASID) | \ 226 (1ULL << INT_I_ASID) | \
225 INT_MASK(INT_D_ASID) | \ 227 (1ULL << INT_D_ASID) | \
226 INT_MASK(INT_DOUBLE_FAULT) | \ 228 (1ULL << INT_DOUBLE_FAULT) | \
227 0) 229 0)
228#define SYNC_INTERRUPTS ( \ 230#define SYNC_INTERRUPTS ( \
229 INT_MASK(INT_SINGLE_STEP_3) | \ 231 (1ULL << INT_SINGLE_STEP_3) | \
230 INT_MASK(INT_SINGLE_STEP_2) | \ 232 (1ULL << INT_SINGLE_STEP_2) | \
231 INT_MASK(INT_SINGLE_STEP_1) | \ 233 (1ULL << INT_SINGLE_STEP_1) | \
232 INT_MASK(INT_SINGLE_STEP_0) | \ 234 (1ULL << INT_SINGLE_STEP_0) | \
233 INT_MASK(INT_IDN_COMPLETE) | \ 235 (1ULL << INT_IDN_COMPLETE) | \
234 INT_MASK(INT_UDN_COMPLETE) | \ 236 (1ULL << INT_UDN_COMPLETE) | \
235 INT_MASK(INT_ITLB_MISS) | \ 237 (1ULL << INT_ITLB_MISS) | \
236 INT_MASK(INT_ILL) | \ 238 (1ULL << INT_ILL) | \
237 INT_MASK(INT_GPV) | \ 239 (1ULL << INT_GPV) | \
238 INT_MASK(INT_IDN_ACCESS) | \ 240 (1ULL << INT_IDN_ACCESS) | \
239 INT_MASK(INT_UDN_ACCESS) | \ 241 (1ULL << INT_UDN_ACCESS) | \
240 INT_MASK(INT_SWINT_3) | \ 242 (1ULL << INT_SWINT_3) | \
241 INT_MASK(INT_SWINT_2) | \ 243 (1ULL << INT_SWINT_2) | \
242 INT_MASK(INT_SWINT_1) | \ 244 (1ULL << INT_SWINT_1) | \
243 INT_MASK(INT_SWINT_0) | \ 245 (1ULL << INT_SWINT_0) | \
244 INT_MASK(INT_ILL_TRANS) | \ 246 (1ULL << INT_ILL_TRANS) | \
245 INT_MASK(INT_UNALIGN_DATA) | \ 247 (1ULL << INT_UNALIGN_DATA) | \
246 INT_MASK(INT_DTLB_MISS) | \ 248 (1ULL << INT_DTLB_MISS) | \
247 INT_MASK(INT_DTLB_ACCESS) | \ 249 (1ULL << INT_DTLB_ACCESS) | \
248 0) 250 0)
249#define NON_SYNC_INTERRUPTS ( \ 251#define NON_SYNC_INTERRUPTS ( \
250 INT_MASK(INT_MEM_ERROR) | \ 252 (1ULL << INT_MEM_ERROR) | \
251 INT_MASK(INT_IDN_FIREWALL) | \ 253 (1ULL << INT_IDN_FIREWALL) | \
252 INT_MASK(INT_UDN_FIREWALL) | \ 254 (1ULL << INT_UDN_FIREWALL) | \
253 INT_MASK(INT_TILE_TIMER) | \ 255 (1ULL << INT_TILE_TIMER) | \
254 INT_MASK(INT_AUX_TILE_TIMER) | \ 256 (1ULL << INT_AUX_TILE_TIMER) | \
255 INT_MASK(INT_IDN_TIMER) | \ 257 (1ULL << INT_IDN_TIMER) | \
256 INT_MASK(INT_UDN_TIMER) | \ 258 (1ULL << INT_UDN_TIMER) | \
257 INT_MASK(INT_IDN_AVAIL) | \ 259 (1ULL << INT_IDN_AVAIL) | \
258 INT_MASK(INT_UDN_AVAIL) | \ 260 (1ULL << INT_UDN_AVAIL) | \
259 INT_MASK(INT_IPI_3) | \ 261 (1ULL << INT_IPI_3) | \
260 INT_MASK(INT_IPI_2) | \ 262 (1ULL << INT_IPI_2) | \
261 INT_MASK(INT_IPI_1) | \ 263 (1ULL << INT_IPI_1) | \
262 INT_MASK(INT_IPI_0) | \ 264 (1ULL << INT_IPI_0) | \
263 INT_MASK(INT_PERF_COUNT) | \ 265 (1ULL << INT_PERF_COUNT) | \
264 INT_MASK(INT_AUX_PERF_COUNT) | \ 266 (1ULL << INT_AUX_PERF_COUNT) | \
265 INT_MASK(INT_INTCTRL_3) | \ 267 (1ULL << INT_INTCTRL_3) | \
266 INT_MASK(INT_INTCTRL_2) | \ 268 (1ULL << INT_INTCTRL_2) | \
267 INT_MASK(INT_INTCTRL_1) | \ 269 (1ULL << INT_INTCTRL_1) | \
268 INT_MASK(INT_INTCTRL_0) | \ 270 (1ULL << INT_INTCTRL_0) | \
269 INT_MASK(INT_BOOT_ACCESS) | \ 271 (1ULL << INT_BOOT_ACCESS) | \
270 INT_MASK(INT_WORLD_ACCESS) | \ 272 (1ULL << INT_WORLD_ACCESS) | \
271 INT_MASK(INT_I_ASID) | \ 273 (1ULL << INT_I_ASID) | \
272 INT_MASK(INT_D_ASID) | \ 274 (1ULL << INT_D_ASID) | \
273 INT_MASK(INT_DOUBLE_FAULT) | \ 275 (1ULL << INT_DOUBLE_FAULT) | \
274 0) 276 0)
275#endif /* !__ASSEMBLER__ */ 277#endif /* !__ASSEMBLER__ */
276#endif /* !__ARCH_INTERRUPTS_H__ */ 278#endif /* !__ARCH_INTERRUPTS_H__ */
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 54bc9a6678e8..4ea080902654 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -1035,7 +1035,9 @@ handle_syscall:
1035 /* Ensure that the syscall number is within the legal range. */ 1035 /* Ensure that the syscall number is within the legal range. */
1036 { 1036 {
1037 moveli r20, hw2(sys_call_table) 1037 moveli r20, hw2(sys_call_table)
1038#ifdef CONFIG_COMPAT
1038 blbs r30, .Lcompat_syscall 1039 blbs r30, .Lcompat_syscall
1040#endif
1039 } 1041 }
1040 { 1042 {
1041 cmpltu r21, TREG_SYSCALL_NR_NAME, r21 1043 cmpltu r21, TREG_SYSCALL_NR_NAME, r21
@@ -1093,6 +1095,7 @@ handle_syscall:
1093 j .Lresume_userspace /* jump into middle of interrupt_return */ 1095 j .Lresume_userspace /* jump into middle of interrupt_return */
1094 } 1096 }
1095 1097
1098#ifdef CONFIG_COMPAT
1096.Lcompat_syscall: 1099.Lcompat_syscall:
1097 /* 1100 /*
1098 * Load the base of the compat syscall table in r20, and 1101 * Load the base of the compat syscall table in r20, and
@@ -1117,6 +1120,7 @@ handle_syscall:
1117 { move r15, r4; addxi r4, r4, 0 } 1120 { move r15, r4; addxi r4, r4, 0 }
1118 { move r16, r5; addxi r5, r5, 0 } 1121 { move r16, r5; addxi r5, r5, 0 }
1119 j .Lload_syscall_pointer 1122 j .Lload_syscall_pointer
1123#endif
1120 1124
1121.Linvalid_syscall: 1125.Linvalid_syscall:
1122 /* Report an invalid syscall back to the user program */ 1126 /* Report an invalid syscall back to the user program */
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 0e5661e7d00d..caf93ae11793 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -159,7 +159,7 @@ static void save_arch_state(struct thread_struct *t);
159int copy_thread(unsigned long clone_flags, unsigned long sp, 159int copy_thread(unsigned long clone_flags, unsigned long sp,
160 unsigned long arg, struct task_struct *p) 160 unsigned long arg, struct task_struct *p)
161{ 161{
162 struct pt_regs *childregs = task_pt_regs(p), *regs = current_pt_regs(); 162 struct pt_regs *childregs = task_pt_regs(p);
163 unsigned long ksp; 163 unsigned long ksp;
164 unsigned long *callee_regs; 164 unsigned long *callee_regs;
165 165
diff --git a/arch/tile/kernel/reboot.c b/arch/tile/kernel/reboot.c
index baa3d905fee2..d1b5c913ae72 100644
--- a/arch/tile/kernel/reboot.c
+++ b/arch/tile/kernel/reboot.c
@@ -16,6 +16,7 @@
16#include <linux/reboot.h> 16#include <linux/reboot.h>
17#include <linux/smp.h> 17#include <linux/smp.h>
18#include <linux/pm.h> 18#include <linux/pm.h>
19#include <linux/export.h>
19#include <asm/page.h> 20#include <asm/page.h>
20#include <asm/setup.h> 21#include <asm/setup.h>
21#include <hv/hypervisor.h> 22#include <hv/hypervisor.h>
@@ -49,3 +50,4 @@ void machine_restart(char *cmd)
49 50
50/* No interesting distinction to be made here. */ 51/* No interesting distinction to be made here. */
51void (*pm_power_off)(void) = NULL; 52void (*pm_power_off)(void) = NULL;
53EXPORT_SYMBOL(pm_power_off);
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 6a649a4462d3..d1e15f7b59c6 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -31,6 +31,7 @@
31#include <linux/timex.h> 31#include <linux/timex.h>
32#include <linux/hugetlb.h> 32#include <linux/hugetlb.h>
33#include <linux/start_kernel.h> 33#include <linux/start_kernel.h>
34#include <linux/screen_info.h>
34#include <asm/setup.h> 35#include <asm/setup.h>
35#include <asm/sections.h> 36#include <asm/sections.h>
36#include <asm/cacheflush.h> 37#include <asm/cacheflush.h>
@@ -49,6 +50,10 @@ static inline int ABS(int x) { return x >= 0 ? x : -x; }
49/* Chip information */ 50/* Chip information */
50char chip_model[64] __write_once; 51char chip_model[64] __write_once;
51 52
53#ifdef CONFIG_VT
54struct screen_info screen_info;
55#endif
56
52struct pglist_data node_data[MAX_NUMNODES] __read_mostly; 57struct pglist_data node_data[MAX_NUMNODES] __read_mostly;
53EXPORT_SYMBOL(node_data); 58EXPORT_SYMBOL(node_data);
54 59
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index b2f44c28dda6..ed258b8ae320 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -112,7 +112,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
112 p->pc, p->sp, p->ex1); 112 p->pc, p->sp, p->ex1);
113 p = NULL; 113 p = NULL;
114 } 114 }
115 if (!kbt->profile || (INT_MASK(p->faultnum) & QUEUED_INTERRUPTS) == 0) 115 if (!kbt->profile || ((1ULL << p->faultnum) & QUEUED_INTERRUPTS) == 0)
116 return p; 116 return p;
117 return NULL; 117 return NULL;
118} 118}
@@ -484,6 +484,7 @@ void save_stack_trace(struct stack_trace *trace)
484{ 484{
485 save_stack_trace_tsk(NULL, trace); 485 save_stack_trace_tsk(NULL, trace);
486} 486}
487EXPORT_SYMBOL_GPL(save_stack_trace);
487 488
488#endif 489#endif
489 490
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c
index db4fb89e12d8..8f8ad814b139 100644
--- a/arch/tile/lib/cacheflush.c
+++ b/arch/tile/lib/cacheflush.c
@@ -12,6 +12,7 @@
12 * more details. 12 * more details.
13 */ 13 */
14 14
15#include <linux/export.h>
15#include <asm/page.h> 16#include <asm/page.h>
16#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
17#include <arch/icache.h> 18#include <arch/icache.h>
@@ -165,3 +166,4 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
165 __insn_mtspr(SPR_DSTREAM_PF, old_dstream_pf); 166 __insn_mtspr(SPR_DSTREAM_PF, old_dstream_pf);
166#endif 167#endif
167} 168}
169EXPORT_SYMBOL_GPL(finv_buffer_remote);
diff --git a/arch/tile/lib/cpumask.c b/arch/tile/lib/cpumask.c
index fdc403614d12..75947edccb26 100644
--- a/arch/tile/lib/cpumask.c
+++ b/arch/tile/lib/cpumask.c
@@ -16,6 +16,7 @@
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <linux/export.h>
19 20
20/* 21/*
21 * Allow cropping out bits beyond the end of the array. 22 * Allow cropping out bits beyond the end of the array.
@@ -50,3 +51,4 @@ int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits)
50 } while (*bp != '\0' && *bp != '\n'); 51 } while (*bp != '\0' && *bp != '\n');
51 return 0; 52 return 0;
52} 53}
54EXPORT_SYMBOL(bitmap_parselist_crop);
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index dd5f0a33fdaf..4385cb6fa00a 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -55,6 +55,8 @@ EXPORT_SYMBOL(hv_dev_poll_cancel);
55EXPORT_SYMBOL(hv_dev_close); 55EXPORT_SYMBOL(hv_dev_close);
56EXPORT_SYMBOL(hv_sysconf); 56EXPORT_SYMBOL(hv_sysconf);
57EXPORT_SYMBOL(hv_confstr); 57EXPORT_SYMBOL(hv_confstr);
58EXPORT_SYMBOL(hv_get_rtc);
59EXPORT_SYMBOL(hv_set_rtc);
58 60
59/* libgcc.a */ 61/* libgcc.a */
60uint32_t __udivsi3(uint32_t dividend, uint32_t divisor); 62uint32_t __udivsi3(uint32_t dividend, uint32_t divisor);
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index 5f7868dcd6d4..1ae911939a18 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -408,6 +408,7 @@ void homecache_change_page_home(struct page *page, int order, int home)
408 __set_pte(ptep, pte_set_home(pteval, home)); 408 __set_pte(ptep, pte_set_home(pteval, home));
409 } 409 }
410} 410}
411EXPORT_SYMBOL(homecache_change_page_home);
411 412
412struct page *homecache_alloc_pages(gfp_t gfp_mask, 413struct page *homecache_alloc_pages(gfp_t gfp_mask,
413 unsigned int order, int home) 414 unsigned int order, int home)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 225543bf45a5..260857a53b87 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1,7 +1,7 @@
1# Select 32 or 64 bit 1# Select 32 or 64 bit
2config 64BIT 2config 64BIT
3 bool "64-bit kernel" if ARCH = "x86" 3 bool "64-bit kernel" if ARCH = "x86"
4 default ARCH = "x86_64" 4 default ARCH != "i386"
5 ---help--- 5 ---help---
6 Say yes to build a 64-bit kernel - formerly known as x86_64 6 Say yes to build a 64-bit kernel - formerly known as x86_64
7 Say no to build a 32-bit kernel - formerly known as i386 7 Say no to build a 32-bit kernel - formerly known as i386
@@ -28,7 +28,6 @@ config X86
28 select HAVE_OPROFILE 28 select HAVE_OPROFILE
29 select HAVE_PCSPKR_PLATFORM 29 select HAVE_PCSPKR_PLATFORM
30 select HAVE_PERF_EVENTS 30 select HAVE_PERF_EVENTS
31 select HAVE_IRQ_WORK
32 select HAVE_IOREMAP_PROT 31 select HAVE_IOREMAP_PROT
33 select HAVE_KPROBES 32 select HAVE_KPROBES
34 select HAVE_MEMBLOCK 33 select HAVE_MEMBLOCK
@@ -40,10 +39,12 @@ config X86
40 select HAVE_DMA_CONTIGUOUS if !SWIOTLB 39 select HAVE_DMA_CONTIGUOUS if !SWIOTLB
41 select HAVE_KRETPROBES 40 select HAVE_KRETPROBES
42 select HAVE_OPTPROBES 41 select HAVE_OPTPROBES
42 select HAVE_KPROBES_ON_FTRACE
43 select HAVE_FTRACE_MCOUNT_RECORD 43 select HAVE_FTRACE_MCOUNT_RECORD
44 select HAVE_FENTRY if X86_64 44 select HAVE_FENTRY if X86_64
45 select HAVE_C_RECORDMCOUNT 45 select HAVE_C_RECORDMCOUNT
46 select HAVE_DYNAMIC_FTRACE 46 select HAVE_DYNAMIC_FTRACE
47 select HAVE_DYNAMIC_FTRACE_WITH_REGS
47 select HAVE_FUNCTION_TRACER 48 select HAVE_FUNCTION_TRACER
48 select HAVE_FUNCTION_GRAPH_TRACER 49 select HAVE_FUNCTION_GRAPH_TRACER
49 select HAVE_FUNCTION_GRAPH_FP_TEST 50 select HAVE_FUNCTION_GRAPH_FP_TEST
@@ -106,6 +107,7 @@ config X86
106 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) 107 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
107 select GENERIC_TIME_VSYSCALL if X86_64 108 select GENERIC_TIME_VSYSCALL if X86_64
108 select KTIME_SCALAR if X86_32 109 select KTIME_SCALAR if X86_32
110 select ALWAYS_USE_PERSISTENT_CLOCK
109 select GENERIC_STRNCPY_FROM_USER 111 select GENERIC_STRNCPY_FROM_USER
110 select GENERIC_STRNLEN_USER 112 select GENERIC_STRNLEN_USER
111 select HAVE_CONTEXT_TRACKING if X86_64 113 select HAVE_CONTEXT_TRACKING if X86_64
@@ -114,6 +116,7 @@ config X86
114 select MODULES_USE_ELF_RELA if X86_64 116 select MODULES_USE_ELF_RELA if X86_64
115 select CLONE_BACKWARDS if X86_32 117 select CLONE_BACKWARDS if X86_32
116 select GENERIC_SIGALTSTACK 118 select GENERIC_SIGALTSTACK
119 select ARCH_USE_BUILTIN_BSWAP
117 120
118config INSTRUCTION_DECODER 121config INSTRUCTION_DECODER
119 def_bool y 122 def_bool y
@@ -320,6 +323,10 @@ config X86_BIGSMP
320 ---help--- 323 ---help---
321 This option is needed for the systems that have more than 8 CPUs 324 This option is needed for the systems that have more than 8 CPUs
322 325
326config GOLDFISH
327 def_bool y
328 depends on X86_GOLDFISH
329
323if X86_32 330if X86_32
324config X86_EXTENDED_PLATFORM 331config X86_EXTENDED_PLATFORM
325 bool "Support for extended (non-PC) x86 platforms" 332 bool "Support for extended (non-PC) x86 platforms"
@@ -402,6 +409,14 @@ config X86_UV
402# Following is an alphabetically sorted list of 32 bit extended platforms 409# Following is an alphabetically sorted list of 32 bit extended platforms
403# Please maintain the alphabetic order if and when there are additions 410# Please maintain the alphabetic order if and when there are additions
404 411
412config X86_GOLDFISH
413 bool "Goldfish (Virtual Platform)"
414 depends on X86_32
415 ---help---
416 Enable support for the Goldfish virtual platform used primarily
417 for Android development. Unless you are building for the Android
418 Goldfish emulator say N here.
419
405config X86_INTEL_CE 420config X86_INTEL_CE
406 bool "CE4100 TV platform" 421 bool "CE4100 TV platform"
407 depends on PCI 422 depends on PCI
@@ -2188,6 +2203,15 @@ config GEOS
2188 ---help--- 2203 ---help---
2189 This option enables system support for the Traverse Technologies GEOS. 2204 This option enables system support for the Traverse Technologies GEOS.
2190 2205
2206config TS5500
2207 bool "Technologic Systems TS-5500 platform support"
2208 depends on MELAN
2209 select CHECK_SIGNATURE
2210 select NEW_LEDS
2211 select LEDS_CLASS
2212 ---help---
2213 This option enables system support for the Technologic Systems TS-5500.
2214
2191endif # X86_32 2215endif # X86_32
2192 2216
2193config AMD_NB 2217config AMD_NB
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index e71fc4279aab..5c477260294f 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -2,7 +2,11 @@
2 2
3# select defconfig based on actual architecture 3# select defconfig based on actual architecture
4ifeq ($(ARCH),x86) 4ifeq ($(ARCH),x86)
5 ifeq ($(shell uname -m),x86_64)
6 KBUILD_DEFCONFIG := x86_64_defconfig
7 else
5 KBUILD_DEFCONFIG := i386_defconfig 8 KBUILD_DEFCONFIG := i386_defconfig
9 endif
6else 10else
7 KBUILD_DEFCONFIG := $(ARCH)_defconfig 11 KBUILD_DEFCONFIG := $(ARCH)_defconfig
8endif 12endif
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 88f7ff6da404..7cb56c6ca351 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -325,6 +325,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
325{ 325{
326 real_mode = rmode; 326 real_mode = rmode;
327 327
328 sanitize_boot_params(real_mode);
329
328 if (real_mode->screen_info.orig_video_mode == 7) { 330 if (real_mode->screen_info.orig_video_mode == 7) {
329 vidmem = (char *) 0xb0000; 331 vidmem = (char *) 0xb0000;
330 vidport = 0x3b4; 332 vidport = 0x3b4;
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 0e6dc0ee0eea..674019d8e235 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -18,6 +18,7 @@
18#include <asm/page.h> 18#include <asm/page.h>
19#include <asm/boot.h> 19#include <asm/boot.h>
20#include <asm/bootparam.h> 20#include <asm/bootparam.h>
21#include <asm/bootparam_utils.h>
21 22
22#define BOOT_BOOT_H 23#define BOOT_BOOT_H
23#include "../ctype.h" 24#include "../ctype.h"
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 5598547281a7..94447086e551 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,3 +1,4 @@
1# CONFIG_64BIT is not set
1CONFIG_EXPERIMENTAL=y 2CONFIG_EXPERIMENTAL=y
2# CONFIG_LOCALVERSION_AUTO is not set 3# CONFIG_LOCALVERSION_AUTO is not set
3CONFIG_SYSVIPC=y 4CONFIG_SYSVIPC=y
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9cd8fd..a54ee1d054d9 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,6 +81,23 @@ static inline struct amd_northbridge *node_to_amd_nb(int node)
81 return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; 81 return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
82} 82}
83 83
84static inline u16 amd_get_node_id(struct pci_dev *pdev)
85{
86 struct pci_dev *misc;
87 int i;
88
89 for (i = 0; i != amd_nb_num(); i++) {
90 misc = node_to_amd_nb(i)->misc;
91
92 if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) &&
93 PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn))
94 return i;
95 }
96
97 WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev));
98 return 0;
99}
100
84#else 101#else
85 102
86#define amd_nb_num(x) 0 103#define amd_nb_num(x) 0
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
new file mode 100644
index 000000000000..5b5e9cb774b5
--- /dev/null
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -0,0 +1,38 @@
1#ifndef _ASM_X86_BOOTPARAM_UTILS_H
2#define _ASM_X86_BOOTPARAM_UTILS_H
3
4#include <asm/bootparam.h>
5
6/*
7 * This file is included from multiple environments. Do not
8 * add completing #includes to make it standalone.
9 */
10
11/*
12 * Deal with bootloaders which fail to initialize unknown fields in
13 * boot_params to zero. The list fields in this list are taken from
14 * analysis of kexec-tools; if other broken bootloaders initialize a
15 * different set of fields we will need to figure out how to disambiguate.
16 *
17 */
18static void sanitize_boot_params(struct boot_params *boot_params)
19{
20 if (boot_params->sentinel) {
21 /*fields in boot_params are not valid, clear them */
22 memset(&boot_params->olpc_ofw_header, 0,
23 (char *)&boot_params->alt_mem_k -
24 (char *)&boot_params->olpc_ofw_header);
25 memset(&boot_params->kbd_status, 0,
26 (char *)&boot_params->hdr -
27 (char *)&boot_params->kbd_status);
28 memset(&boot_params->_pad7[0], 0,
29 (char *)&boot_params->edd_mbr_sig_buffer[0] -
30 (char *)&boot_params->_pad7[0]);
31 memset(&boot_params->_pad8[0], 0,
32 (char *)&boot_params->eddbuf[0] -
33 (char *)&boot_params->_pad8[0]);
34 memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
35 }
36}
37
38#endif /* _ASM_X86_BOOTPARAM_UTILS_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 2d9075e863a0..93fe929d1cee 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -167,6 +167,7 @@
167#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ 167#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
168#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ 168#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
169#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ 169#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
170#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
170 171
171/* 172/*
172 * Auxiliary flags: Linux defined - For features scattered in various 173 * Auxiliary flags: Linux defined - For features scattered in various
@@ -309,6 +310,7 @@ extern const char * const x86_power_flags[32];
309#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) 310#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
310#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) 311#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
311#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) 312#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
313#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
312#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) 314#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
313#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) 315#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
314#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) 316#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9a25b522d377..86cb51e1ca96 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -44,7 +44,6 @@
44 44
45#ifdef CONFIG_DYNAMIC_FTRACE 45#ifdef CONFIG_DYNAMIC_FTRACE
46#define ARCH_SUPPORTS_FTRACE_OPS 1 46#define ARCH_SUPPORTS_FTRACE_OPS 1
47#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
48#endif 47#endif
49 48
50#ifndef __ASSEMBLY__ 49#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 434e2106cc87..b18df579c0e9 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -80,9 +80,9 @@ extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
80extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); 80extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
81 81
82#ifdef CONFIG_PCI_MSI 82#ifdef CONFIG_PCI_MSI
83extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); 83extern int default_setup_hpet_msi(unsigned int irq, unsigned int id);
84#else 84#else
85static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 85static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id)
86{ 86{
87 return -EINVAL; 87 return -EINVAL;
88} 88}
@@ -111,6 +111,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler);
111static inline int hpet_enable(void) { return 0; } 111static inline int hpet_enable(void) { return 0; }
112static inline int is_hpet_enabled(void) { return 0; } 112static inline int is_hpet_enabled(void) { return 0; }
113#define hpet_readl(a) 0 113#define hpet_readl(a) 0
114#define default_setup_hpet_msi NULL
114 115
115#endif 116#endif
116#endif /* _ASM_X86_HPET_H */ 117#endif /* _ASM_X86_HPET_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index eb92a6ed2be7..10a78c3d3d5a 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -101,6 +101,7 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
101 irq_attr->polarity = polarity; 101 irq_attr->polarity = polarity;
102} 102}
103 103
104/* Intel specific interrupt remapping information */
104struct irq_2_iommu { 105struct irq_2_iommu {
105 struct intel_iommu *iommu; 106 struct intel_iommu *iommu;
106 u16 irte_index; 107 u16 irte_index;
@@ -108,6 +109,12 @@ struct irq_2_iommu {
108 u8 irte_mask; 109 u8 irte_mask;
109}; 110};
110 111
112/* AMD specific interrupt remapping information */
113struct irq_2_irte {
114 u16 devid; /* Device ID for IRTE table */
115 u16 index; /* Index into IRTE table*/
116};
117
111/* 118/*
112 * This is performance-critical, we want to do it O(1) 119 * This is performance-critical, we want to do it O(1)
113 * 120 *
@@ -120,7 +127,11 @@ struct irq_cfg {
120 u8 vector; 127 u8 vector;
121 u8 move_in_progress : 1; 128 u8 move_in_progress : 1;
122#ifdef CONFIG_IRQ_REMAP 129#ifdef CONFIG_IRQ_REMAP
123 struct irq_2_iommu irq_2_iommu; 130 u8 remapped : 1;
131 union {
132 struct irq_2_iommu irq_2_iommu;
133 struct irq_2_irte irq_2_irte;
134 };
124#endif 135#endif
125}; 136};
126 137
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index b518c7509933..86095ed14135 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -25,6 +25,7 @@
25 25
26extern void init_hypervisor(struct cpuinfo_x86 *c); 26extern void init_hypervisor(struct cpuinfo_x86 *c);
27extern void init_hypervisor_platform(void); 27extern void init_hypervisor_platform(void);
28extern bool hypervisor_x2apic_available(void);
28 29
29/* 30/*
30 * x86 hypervisor information 31 * x86 hypervisor information
@@ -41,6 +42,9 @@ struct hypervisor_x86 {
41 42
42 /* Platform setup (run once per boot) */ 43 /* Platform setup (run once per boot) */
43 void (*init_platform)(void); 44 void (*init_platform)(void);
45
46 /* X2APIC detection (run once per boot) */
47 bool (*x2apic_available)(void);
44}; 48};
45 49
46extern const struct hypervisor_x86 *x86_hyper; 50extern const struct hypervisor_x86 *x86_hyper;
@@ -51,13 +55,4 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
51extern const struct hypervisor_x86 x86_hyper_xen_hvm; 55extern const struct hypervisor_x86 x86_hyper_xen_hvm;
52extern const struct hypervisor_x86 x86_hyper_kvm; 56extern const struct hypervisor_x86 x86_hyper_kvm;
53 57
54static inline bool hypervisor_x2apic_available(void)
55{
56 if (kvm_para_available())
57 return true;
58 if (xen_x2apic_para_available())
59 return true;
60 return false;
61}
62
63#endif 58#endif
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 73d8c5398ea9..459e50a424d1 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -144,11 +144,24 @@ extern int timer_through_8259;
144 (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) 144 (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
145 145
146struct io_apic_irq_attr; 146struct io_apic_irq_attr;
147struct irq_cfg;
147extern int io_apic_set_pci_routing(struct device *dev, int irq, 148extern int io_apic_set_pci_routing(struct device *dev, int irq,
148 struct io_apic_irq_attr *irq_attr); 149 struct io_apic_irq_attr *irq_attr);
149void setup_IO_APIC_irq_extra(u32 gsi); 150void setup_IO_APIC_irq_extra(u32 gsi);
150extern void ioapic_insert_resources(void); 151extern void ioapic_insert_resources(void);
151 152
153extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
154 unsigned int, int,
155 struct io_apic_irq_attr *);
156extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
157 unsigned int, int,
158 struct io_apic_irq_attr *);
159extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
160
161extern void native_compose_msi_msg(struct pci_dev *pdev,
162 unsigned int irq, unsigned int dest,
163 struct msi_msg *msg, u8 hpet_id);
164extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
152int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); 165int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
153 166
154extern int save_ioapic_entries(void); 167extern int save_ioapic_entries(void);
@@ -179,6 +192,12 @@ extern void __init native_io_apic_init_mappings(void);
179extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); 192extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
180extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); 193extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
181extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); 194extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
195extern void native_disable_io_apic(void);
196extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
197extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
198extern int native_ioapic_set_affinity(struct irq_data *,
199 const struct cpumask *,
200 bool);
182 201
183static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 202static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
184{ 203{
@@ -193,6 +212,9 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
193{ 212{
194 x86_io_apic_ops.modify(apic, reg, value); 213 x86_io_apic_ops.modify(apic, reg, value);
195} 214}
215
216extern void io_apic_eoi(unsigned int apic, unsigned int vector);
217
196#else /* !CONFIG_X86_IO_APIC */ 218#else /* !CONFIG_X86_IO_APIC */
197 219
198#define io_apic_assign_pci_irqs 0 220#define io_apic_assign_pci_irqs 0
@@ -223,6 +245,12 @@ static inline void disable_ioapic_support(void) { }
223#define native_io_apic_read NULL 245#define native_io_apic_read NULL
224#define native_io_apic_write NULL 246#define native_io_apic_write NULL
225#define native_io_apic_modify NULL 247#define native_io_apic_modify NULL
248#define native_disable_io_apic NULL
249#define native_io_apic_print_entries NULL
250#define native_ioapic_set_affinity NULL
251#define native_setup_ioapic_entry NULL
252#define native_compose_msi_msg NULL
253#define native_eoi_ioapic_pin NULL
226#endif 254#endif
227 255
228#endif /* _ASM_X86_IO_APIC_H */ 256#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5fb9bbbd2f14..95fd3527f632 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -26,8 +26,6 @@
26 26
27#ifdef CONFIG_IRQ_REMAP 27#ifdef CONFIG_IRQ_REMAP
28 28
29extern int irq_remapping_enabled;
30
31extern void setup_irq_remapping_ops(void); 29extern void setup_irq_remapping_ops(void);
32extern int irq_remapping_supported(void); 30extern int irq_remapping_supported(void);
33extern int irq_remapping_prepare(void); 31extern int irq_remapping_prepare(void);
@@ -40,21 +38,19 @@ extern int setup_ioapic_remapped_entry(int irq,
40 unsigned int destination, 38 unsigned int destination,
41 int vector, 39 int vector,
42 struct io_apic_irq_attr *attr); 40 struct io_apic_irq_attr *attr);
43extern int set_remapped_irq_affinity(struct irq_data *data,
44 const struct cpumask *mask,
45 bool force);
46extern void free_remapped_irq(int irq); 41extern void free_remapped_irq(int irq);
47extern void compose_remapped_msi_msg(struct pci_dev *pdev, 42extern void compose_remapped_msi_msg(struct pci_dev *pdev,
48 unsigned int irq, unsigned int dest, 43 unsigned int irq, unsigned int dest,
49 struct msi_msg *msg, u8 hpet_id); 44 struct msi_msg *msg, u8 hpet_id);
50extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
51extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
52 int index, int sub_handle);
53extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); 45extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
46extern void panic_if_irq_remap(const char *msg);
47extern bool setup_remapped_irq(int irq,
48 struct irq_cfg *cfg,
49 struct irq_chip *chip);
54 50
55#else /* CONFIG_IRQ_REMAP */ 51void irq_remap_modify_chip_defaults(struct irq_chip *chip);
56 52
57#define irq_remapping_enabled 0 53#else /* CONFIG_IRQ_REMAP */
58 54
59static inline void setup_irq_remapping_ops(void) { } 55static inline void setup_irq_remapping_ops(void) { }
60static inline int irq_remapping_supported(void) { return 0; } 56static inline int irq_remapping_supported(void) { return 0; }
@@ -71,30 +67,30 @@ static inline int setup_ioapic_remapped_entry(int irq,
71{ 67{
72 return -ENODEV; 68 return -ENODEV;
73} 69}
74static inline int set_remapped_irq_affinity(struct irq_data *data,
75 const struct cpumask *mask,
76 bool force)
77{
78 return 0;
79}
80static inline void free_remapped_irq(int irq) { } 70static inline void free_remapped_irq(int irq) { }
81static inline void compose_remapped_msi_msg(struct pci_dev *pdev, 71static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
82 unsigned int irq, unsigned int dest, 72 unsigned int irq, unsigned int dest,
83 struct msi_msg *msg, u8 hpet_id) 73 struct msi_msg *msg, u8 hpet_id)
84{ 74{
85} 75}
86static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) 76static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
87{ 77{
88 return -ENODEV; 78 return -ENODEV;
89} 79}
90static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, 80
91 int index, int sub_handle) 81static inline void panic_if_irq_remap(const char *msg)
82{
83}
84
85static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
92{ 86{
93 return -ENODEV;
94} 87}
95static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) 88
89static inline bool setup_remapped_irq(int irq,
90 struct irq_cfg *cfg,
91 struct irq_chip *chip)
96{ 92{
97 return -ENODEV; 93 return false;
98} 94}
99#endif /* CONFIG_IRQ_REMAP */ 95#endif /* CONFIG_IRQ_REMAP */
100 96
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 1508e518c7e3..aac5fa62a86c 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -109,8 +109,8 @@
109 109
110#define UV_BAU_MESSAGE 0xf5 110#define UV_BAU_MESSAGE 0xf5
111 111
112/* Xen vector callback to receive events in a HVM domain */ 112/* Vector on which hypervisor callbacks will be delivered */
113#define XEN_HVM_EVTCHN_CALLBACK 0xf3 113#define HYPERVISOR_CALLBACK_VECTOR 0xf3
114 114
115/* 115/*
116 * Local APIC timer IRQ vector is on a different priority level, 116 * Local APIC timer IRQ vector is on a different priority level,
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed1f16187be..65231e173baf 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -85,13 +85,13 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
85 return ret; 85 return ret;
86} 86}
87 87
88static inline int kvm_para_available(void) 88static inline bool kvm_para_available(void)
89{ 89{
90 unsigned int eax, ebx, ecx, edx; 90 unsigned int eax, ebx, ecx, edx;
91 char signature[13]; 91 char signature[13];
92 92
93 if (boot_cpu_data.cpuid_level < 0) 93 if (boot_cpu_data.cpuid_level < 0)
94 return 0; /* So we don't blow up on old processors */ 94 return false; /* So we don't blow up on old processors */
95 95
96 if (cpu_has_hypervisor) { 96 if (cpu_has_hypervisor) {
97 cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); 97 cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
@@ -101,10 +101,10 @@ static inline int kvm_para_available(void)
101 signature[12] = 0; 101 signature[12] = 0;
102 102
103 if (strcmp(signature, "KVMKVMKVM") == 0) 103 if (strcmp(signature, "KVMKVMKVM") == 0)
104 return 1; 104 return true;
105 } 105 }
106 106
107 return 0; 107 return false;
108} 108}
109 109
110static inline unsigned int kvm_arch_para_features(void) 110static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 48142971b25d..79327e9483a3 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -27,20 +27,20 @@
27#define __asmlinkage_protect0(ret) \ 27#define __asmlinkage_protect0(ret) \
28 __asmlinkage_protect_n(ret) 28 __asmlinkage_protect_n(ret)
29#define __asmlinkage_protect1(ret, arg1) \ 29#define __asmlinkage_protect1(ret, arg1) \
30 __asmlinkage_protect_n(ret, "g" (arg1)) 30 __asmlinkage_protect_n(ret, "m" (arg1))
31#define __asmlinkage_protect2(ret, arg1, arg2) \ 31#define __asmlinkage_protect2(ret, arg1, arg2) \
32 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2)) 32 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2))
33#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ 33#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
34 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3)) 34 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3))
35#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ 35#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
36 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ 36 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
37 "g" (arg4)) 37 "m" (arg4))
38#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ 38#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
39 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ 39 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
40 "g" (arg4), "g" (arg5)) 40 "m" (arg4), "m" (arg5))
41#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ 41#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
42 __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ 42 __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
43 "g" (arg4), "g" (arg5), "g" (arg6)) 43 "m" (arg4), "m" (arg5), "m" (arg6))
44 44
45#endif /* CONFIG_X86_32 */ 45#endif /* CONFIG_X86_32 */
46 46
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ecdfee60ee4a..f4076af1f4ed 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -3,6 +3,90 @@
3 3
4#include <uapi/asm/mce.h> 4#include <uapi/asm/mce.h>
5 5
6/*
7 * Machine Check support for x86
8 */
9
10/* MCG_CAP register defines */
11#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
12#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
13#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
14#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
15#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
16#define MCG_EXT_CNT_SHIFT 16
17#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
18#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
19
20/* MCG_STATUS register defines */
21#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
22#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
23#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
24
25/* MCi_STATUS register defines */
26#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
27#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
28#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
29#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
30#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
31#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
32#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
33#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
34#define MCI_STATUS_AR (1ULL<<55) /* Action required */
35#define MCACOD 0xffff /* MCA Error Code */
36
37/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
38#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
39#define MCACOD_SCRUBMSK 0xfff0
40#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
41#define MCACOD_DATA 0x0134 /* Data Load */
42#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
43
44/* MCi_MISC register defines */
45#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
46#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
47#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
48#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
49#define MCI_MISC_ADDR_PHYS 2 /* physical address */
50#define MCI_MISC_ADDR_MEM 3 /* memory address */
51#define MCI_MISC_ADDR_GENERIC 7 /* generic */
52
53/* CTL2 register defines */
54#define MCI_CTL2_CMCI_EN (1ULL << 30)
55#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
56
57#define MCJ_CTX_MASK 3
58#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
59#define MCJ_CTX_RANDOM 0 /* inject context: random */
60#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
61#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
62#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
63#define MCJ_EXCEPTION 0x8 /* raise as exception */
64#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
65
66#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
67
68/* Software defined banks */
69#define MCE_EXTENDED_BANK 128
70#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0)
71#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
72
73#define MCE_LOG_LEN 32
74#define MCE_LOG_SIGNATURE "MACHINECHECK"
75
76/*
77 * This structure contains all data related to the MCE log. Also
78 * carries a signature to make it easier to find from external
79 * debugging tools. Each entry is only valid when its finished flag
80 * is set.
81 */
82struct mce_log {
83 char signature[12]; /* "MACHINECHECK" */
84 unsigned len; /* = MCE_LOG_LEN */
85 unsigned next;
86 unsigned flags;
87 unsigned recordlen; /* length of struct mce */
88 struct mce entry[MCE_LOG_LEN];
89};
6 90
7struct mca_config { 91struct mca_config {
8 bool dont_log_ce; 92 bool dont_log_ce;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 79ce5685ab64..c2934be2446a 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -11,4 +11,8 @@ struct ms_hyperv_info {
11 11
12extern struct ms_hyperv_info ms_hyperv; 12extern struct ms_hyperv_info ms_hyperv;
13 13
14void hyperv_callback_vector(void);
15void hyperv_vector_handler(struct pt_regs *regs);
16void hv_register_vmbus_handler(int irq, irq_handler_t handler);
17
14#endif 18#endif
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index dba7805176bf..c28fd02f4bf7 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -121,9 +121,12 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
121#define arch_teardown_msi_irq x86_teardown_msi_irq 121#define arch_teardown_msi_irq x86_teardown_msi_irq
122#define arch_restore_msi_irqs x86_restore_msi_irqs 122#define arch_restore_msi_irqs x86_restore_msi_irqs
123/* implemented in arch/x86/kernel/apic/io_apic. */ 123/* implemented in arch/x86/kernel/apic/io_apic. */
124struct msi_desc;
124int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); 125int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
125void native_teardown_msi_irq(unsigned int irq); 126void native_teardown_msi_irq(unsigned int irq);
126void native_restore_msi_irqs(struct pci_dev *dev, int irq); 127void native_restore_msi_irqs(struct pci_dev *dev, int irq);
128int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
129 unsigned int irq_base, unsigned int irq_offset);
127/* default to the implementation in drivers/lib/msi.c */ 130/* default to the implementation in drivers/lib/msi.c */
128#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS 131#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
129#define HAVE_DEFAULT_MSI_RESTORE_IRQS 132#define HAVE_DEFAULT_MSI_RESTORE_IRQS
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 4fabcdf1cfa7..57cb63402213 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,8 +29,13 @@
29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) 29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL 30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
31 31
32#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40) 32#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
33#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41) 33#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
34#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
35
36#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37
37#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \
38 (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
34 39
35#define AMD64_EVENTSEL_EVENT \ 40#define AMD64_EVENTSEL_EVENT \
36 (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) 41 (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
@@ -46,8 +51,12 @@
46#define AMD64_RAW_EVENT_MASK \ 51#define AMD64_RAW_EVENT_MASK \
47 (X86_RAW_EVENT_MASK | \ 52 (X86_RAW_EVENT_MASK | \
48 AMD64_EVENTSEL_EVENT) 53 AMD64_EVENTSEL_EVENT)
54#define AMD64_RAW_EVENT_MASK_NB \
55 (AMD64_EVENTSEL_EVENT | \
56 ARCH_PERFMON_EVENTSEL_UMASK)
49#define AMD64_NUM_COUNTERS 4 57#define AMD64_NUM_COUNTERS 4
50#define AMD64_NUM_COUNTERS_CORE 6 58#define AMD64_NUM_COUNTERS_CORE 6
59#define AMD64_NUM_COUNTERS_NB 4
51 60
52#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c 61#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
53#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 62#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5199db2923d3..fc304279b559 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -142,6 +142,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
142 return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; 142 return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
143} 143}
144 144
145static inline unsigned long pud_pfn(pud_t pud)
146{
147 return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
148}
149
145#define pte_page(pte) pfn_to_page(pte_pfn(pte)) 150#define pte_page(pte) pfn_to_page(pte_pfn(pte))
146 151
147static inline int pmd_large(pmd_t pte) 152static inline int pmd_large(pmd_t pte)
@@ -781,6 +786,18 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
781 memcpy(dst, src, count * sizeof(pgd_t)); 786 memcpy(dst, src, count * sizeof(pgd_t));
782} 787}
783 788
789/*
790 * The x86 doesn't have any external MMU info: the kernel page
791 * tables contain all the necessary information.
792 */
793static inline void update_mmu_cache(struct vm_area_struct *vma,
794 unsigned long addr, pte_t *ptep)
795{
796}
797static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
798 unsigned long addr, pmd_t *pmd)
799{
800}
784 801
785#include <asm-generic/pgtable.h> 802#include <asm-generic/pgtable.h>
786#endif /* __ASSEMBLY__ */ 803#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 8faa215a503e..9ee322103c6d 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -66,13 +66,6 @@ do { \
66 __flush_tlb_one((vaddr)); \ 66 __flush_tlb_one((vaddr)); \
67} while (0) 67} while (0)
68 68
69/*
70 * The i386 doesn't have any external MMU info: the kernel page
71 * tables contain all the necessary information.
72 */
73#define update_mmu_cache(vma, address, ptep) do { } while (0)
74#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
75
76#endif /* !__ASSEMBLY__ */ 69#endif /* !__ASSEMBLY__ */
77 70
78/* 71/*
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 47356f9df82e..615b0c78449f 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -142,9 +142,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
142#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) 142#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
143#define pte_unmap(pte) ((void)(pte))/* NOP */ 143#define pte_unmap(pte) ((void)(pte))/* NOP */
144 144
145#define update_mmu_cache(vma, address, ptep) do { } while (0)
146#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
147
148/* Encode and de-code a swap entry */ 145/* Encode and de-code a swap entry */
149#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE 146#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
150#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) 147#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 888184b2fc85..cf500543f6ff 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -943,7 +943,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
943extern int get_tsc_mode(unsigned long adr); 943extern int get_tsc_mode(unsigned long adr);
944extern int set_tsc_mode(unsigned int val); 944extern int set_tsc_mode(unsigned int val);
945 945
946extern int amd_get_nb_id(int cpu); 946extern u16 amd_get_nb_id(int cpu);
947 947
948struct aperfmperf { 948struct aperfmperf {
949 u64 aperf, mperf; 949 u64 aperf, mperf;
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 6c7fc25f2c34..5c6e4fb370f5 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -47,6 +47,12 @@
47# define NEED_NOPL 0 47# define NEED_NOPL 0
48#endif 48#endif
49 49
50#ifdef CONFIG_MATOM
51# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
52#else
53# define NEED_MOVBE 0
54#endif
55
50#ifdef CONFIG_X86_64 56#ifdef CONFIG_X86_64
51#ifdef CONFIG_PARAVIRT 57#ifdef CONFIG_PARAVIRT
52/* Paravirtualized systems may not have PSE or PGE available */ 58/* Paravirtualized systems may not have PSE or PGE available */
@@ -80,7 +86,7 @@
80 86
81#define REQUIRED_MASK2 0 87#define REQUIRED_MASK2 0
82#define REQUIRED_MASK3 (NEED_NOPL) 88#define REQUIRED_MASK3 (NEED_NOPL)
83#define REQUIRED_MASK4 0 89#define REQUIRED_MASK4 (NEED_MOVBE)
84#define REQUIRED_MASK5 0 90#define REQUIRED_MASK5 0
85#define REQUIRED_MASK6 0 91#define REQUIRED_MASK6 0
86#define REQUIRED_MASK7 0 92#define REQUIRED_MASK7 0
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 57693498519c..7669941cc9d2 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -181,19 +181,38 @@ struct x86_platform_ops {
181}; 181};
182 182
183struct pci_dev; 183struct pci_dev;
184struct msi_msg;
184 185
185struct x86_msi_ops { 186struct x86_msi_ops {
186 int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); 187 int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
188 void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq,
189 unsigned int dest, struct msi_msg *msg,
190 u8 hpet_id);
187 void (*teardown_msi_irq)(unsigned int irq); 191 void (*teardown_msi_irq)(unsigned int irq);
188 void (*teardown_msi_irqs)(struct pci_dev *dev); 192 void (*teardown_msi_irqs)(struct pci_dev *dev);
189 void (*restore_msi_irqs)(struct pci_dev *dev, int irq); 193 void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
194 int (*setup_hpet_msi)(unsigned int irq, unsigned int id);
190}; 195};
191 196
197struct IO_APIC_route_entry;
198struct io_apic_irq_attr;
199struct irq_data;
200struct cpumask;
201
192struct x86_io_apic_ops { 202struct x86_io_apic_ops {
193 void (*init) (void); 203 void (*init) (void);
194 unsigned int (*read) (unsigned int apic, unsigned int reg); 204 unsigned int (*read) (unsigned int apic, unsigned int reg);
195 void (*write) (unsigned int apic, unsigned int reg, unsigned int value); 205 void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
196 void (*modify)(unsigned int apic, unsigned int reg, unsigned int value); 206 void (*modify) (unsigned int apic, unsigned int reg, unsigned int value);
207 void (*disable)(void);
208 void (*print_entries)(unsigned int apic, unsigned int nr_entries);
209 int (*set_affinity)(struct irq_data *data,
210 const struct cpumask *mask,
211 bool force);
212 int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry,
213 unsigned int destination, int vector,
214 struct io_apic_irq_attr *attr);
215 void (*eoi_ioapic_pin)(int apic, int pin, int vector);
197}; 216};
198 217
199extern struct x86_init_ops x86_init; 218extern struct x86_init_ops x86_init;
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index f8fde90bc45e..d8829751b3f8 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -1,10 +1,499 @@
1#ifdef CONFIG_KMEMCHECK 1#ifdef CONFIG_KMEMCHECK
2/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ 2/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
3# include <asm-generic/xor.h> 3# include <asm-generic/xor.h>
4#elif !defined(_ASM_X86_XOR_H)
5#define _ASM_X86_XOR_H
6
7/*
8 * Optimized RAID-5 checksumming functions for SSE.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2, or (at your option)
13 * any later version.
14 *
15 * You should have received a copy of the GNU General Public License
16 * (for example /usr/src/linux/COPYING); if not, write to the Free
17 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20/*
21 * Cache avoiding checksumming functions utilizing KNI instructions
22 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
23 */
24
25/*
26 * Based on
27 * High-speed RAID5 checksumming functions utilizing SSE instructions.
28 * Copyright (C) 1998 Ingo Molnar.
29 */
30
31/*
32 * x86-64 changes / gcc fixes from Andi Kleen.
33 * Copyright 2002 Andi Kleen, SuSE Labs.
34 *
35 * This hasn't been optimized for the hammer yet, but there are likely
36 * no advantages to be gotten from x86-64 here anyways.
37 */
38
39#include <asm/i387.h>
40
41#ifdef CONFIG_X86_32
42/* reduce register pressure */
43# define XOR_CONSTANT_CONSTRAINT "i"
4#else 44#else
45# define XOR_CONSTANT_CONSTRAINT "re"
46#endif
47
48#define OFFS(x) "16*("#x")"
49#define PF_OFFS(x) "256+16*("#x")"
50#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
51#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
52#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
53#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
54#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
55#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
56#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
57#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
58#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
59#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
60#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
61#define NOP(x)
62
63#define BLK64(pf, op, i) \
64 pf(i) \
65 op(i, 0) \
66 op(i + 1, 1) \
67 op(i + 2, 2) \
68 op(i + 3, 3)
69
70static void
71xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
72{
73 unsigned long lines = bytes >> 8;
74
75 kernel_fpu_begin();
76
77 asm volatile(
78#undef BLOCK
79#define BLOCK(i) \
80 LD(i, 0) \
81 LD(i + 1, 1) \
82 PF1(i) \
83 PF1(i + 2) \
84 LD(i + 2, 2) \
85 LD(i + 3, 3) \
86 PF0(i + 4) \
87 PF0(i + 6) \
88 XO1(i, 0) \
89 XO1(i + 1, 1) \
90 XO1(i + 2, 2) \
91 XO1(i + 3, 3) \
92 ST(i, 0) \
93 ST(i + 1, 1) \
94 ST(i + 2, 2) \
95 ST(i + 3, 3) \
96
97
98 PF0(0)
99 PF0(2)
100
101 " .align 32 ;\n"
102 " 1: ;\n"
103
104 BLOCK(0)
105 BLOCK(4)
106 BLOCK(8)
107 BLOCK(12)
108
109 " add %[inc], %[p1] ;\n"
110 " add %[inc], %[p2] ;\n"
111 " dec %[cnt] ;\n"
112 " jnz 1b ;\n"
113 : [cnt] "+r" (lines),
114 [p1] "+r" (p1), [p2] "+r" (p2)
115 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
116 : "memory");
117
118 kernel_fpu_end();
119}
120
121static void
122xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
123{
124 unsigned long lines = bytes >> 8;
125
126 kernel_fpu_begin();
127
128 asm volatile(
129#undef BLOCK
130#define BLOCK(i) \
131 BLK64(PF0, LD, i) \
132 BLK64(PF1, XO1, i) \
133 BLK64(NOP, ST, i) \
134
135 " .align 32 ;\n"
136 " 1: ;\n"
137
138 BLOCK(0)
139 BLOCK(4)
140 BLOCK(8)
141 BLOCK(12)
142
143 " add %[inc], %[p1] ;\n"
144 " add %[inc], %[p2] ;\n"
145 " dec %[cnt] ;\n"
146 " jnz 1b ;\n"
147 : [cnt] "+r" (lines),
148 [p1] "+r" (p1), [p2] "+r" (p2)
149 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
150 : "memory");
151
152 kernel_fpu_end();
153}
154
155static void
156xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
157 unsigned long *p3)
158{
159 unsigned long lines = bytes >> 8;
160
161 kernel_fpu_begin();
162
163 asm volatile(
164#undef BLOCK
165#define BLOCK(i) \
166 PF1(i) \
167 PF1(i + 2) \
168 LD(i, 0) \
169 LD(i + 1, 1) \
170 LD(i + 2, 2) \
171 LD(i + 3, 3) \
172 PF2(i) \
173 PF2(i + 2) \
174 PF0(i + 4) \
175 PF0(i + 6) \
176 XO1(i, 0) \
177 XO1(i + 1, 1) \
178 XO1(i + 2, 2) \
179 XO1(i + 3, 3) \
180 XO2(i, 0) \
181 XO2(i + 1, 1) \
182 XO2(i + 2, 2) \
183 XO2(i + 3, 3) \
184 ST(i, 0) \
185 ST(i + 1, 1) \
186 ST(i + 2, 2) \
187 ST(i + 3, 3) \
188
189
190 PF0(0)
191 PF0(2)
192
193 " .align 32 ;\n"
194 " 1: ;\n"
195
196 BLOCK(0)
197 BLOCK(4)
198 BLOCK(8)
199 BLOCK(12)
200
201 " add %[inc], %[p1] ;\n"
202 " add %[inc], %[p2] ;\n"
203 " add %[inc], %[p3] ;\n"
204 " dec %[cnt] ;\n"
205 " jnz 1b ;\n"
206 : [cnt] "+r" (lines),
207 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
208 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
209 : "memory");
210
211 kernel_fpu_end();
212}
213
214static void
215xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
216 unsigned long *p3)
217{
218 unsigned long lines = bytes >> 8;
219
220 kernel_fpu_begin();
221
222 asm volatile(
223#undef BLOCK
224#define BLOCK(i) \
225 BLK64(PF0, LD, i) \
226 BLK64(PF1, XO1, i) \
227 BLK64(PF2, XO2, i) \
228 BLK64(NOP, ST, i) \
229
230 " .align 32 ;\n"
231 " 1: ;\n"
232
233 BLOCK(0)
234 BLOCK(4)
235 BLOCK(8)
236 BLOCK(12)
237
238 " add %[inc], %[p1] ;\n"
239 " add %[inc], %[p2] ;\n"
240 " add %[inc], %[p3] ;\n"
241 " dec %[cnt] ;\n"
242 " jnz 1b ;\n"
243 : [cnt] "+r" (lines),
244 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
245 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
246 : "memory");
247
248 kernel_fpu_end();
249}
250
251static void
252xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
253 unsigned long *p3, unsigned long *p4)
254{
255 unsigned long lines = bytes >> 8;
256
257 kernel_fpu_begin();
258
259 asm volatile(
260#undef BLOCK
261#define BLOCK(i) \
262 PF1(i) \
263 PF1(i + 2) \
264 LD(i, 0) \
265 LD(i + 1, 1) \
266 LD(i + 2, 2) \
267 LD(i + 3, 3) \
268 PF2(i) \
269 PF2(i + 2) \
270 XO1(i, 0) \
271 XO1(i + 1, 1) \
272 XO1(i + 2, 2) \
273 XO1(i + 3, 3) \
274 PF3(i) \
275 PF3(i + 2) \
276 PF0(i + 4) \
277 PF0(i + 6) \
278 XO2(i, 0) \
279 XO2(i + 1, 1) \
280 XO2(i + 2, 2) \
281 XO2(i + 3, 3) \
282 XO3(i, 0) \
283 XO3(i + 1, 1) \
284 XO3(i + 2, 2) \
285 XO3(i + 3, 3) \
286 ST(i, 0) \
287 ST(i + 1, 1) \
288 ST(i + 2, 2) \
289 ST(i + 3, 3) \
290
291
292 PF0(0)
293 PF0(2)
294
295 " .align 32 ;\n"
296 " 1: ;\n"
297
298 BLOCK(0)
299 BLOCK(4)
300 BLOCK(8)
301 BLOCK(12)
302
303 " add %[inc], %[p1] ;\n"
304 " add %[inc], %[p2] ;\n"
305 " add %[inc], %[p3] ;\n"
306 " add %[inc], %[p4] ;\n"
307 " dec %[cnt] ;\n"
308 " jnz 1b ;\n"
309 : [cnt] "+r" (lines), [p1] "+r" (p1),
310 [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
311 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
312 : "memory");
313
314 kernel_fpu_end();
315}
316
317static void
318xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
319 unsigned long *p3, unsigned long *p4)
320{
321 unsigned long lines = bytes >> 8;
322
323 kernel_fpu_begin();
324
325 asm volatile(
326#undef BLOCK
327#define BLOCK(i) \
328 BLK64(PF0, LD, i) \
329 BLK64(PF1, XO1, i) \
330 BLK64(PF2, XO2, i) \
331 BLK64(PF3, XO3, i) \
332 BLK64(NOP, ST, i) \
333
334 " .align 32 ;\n"
335 " 1: ;\n"
336
337 BLOCK(0)
338 BLOCK(4)
339 BLOCK(8)
340 BLOCK(12)
341
342 " add %[inc], %[p1] ;\n"
343 " add %[inc], %[p2] ;\n"
344 " add %[inc], %[p3] ;\n"
345 " add %[inc], %[p4] ;\n"
346 " dec %[cnt] ;\n"
347 " jnz 1b ;\n"
348 : [cnt] "+r" (lines), [p1] "+r" (p1),
349 [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
350 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
351 : "memory");
352
353 kernel_fpu_end();
354}
355
356static void
357xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
358 unsigned long *p3, unsigned long *p4, unsigned long *p5)
359{
360 unsigned long lines = bytes >> 8;
361
362 kernel_fpu_begin();
363
364 asm volatile(
365#undef BLOCK
366#define BLOCK(i) \
367 PF1(i) \
368 PF1(i + 2) \
369 LD(i, 0) \
370 LD(i + 1, 1) \
371 LD(i + 2, 2) \
372 LD(i + 3, 3) \
373 PF2(i) \
374 PF2(i + 2) \
375 XO1(i, 0) \
376 XO1(i + 1, 1) \
377 XO1(i + 2, 2) \
378 XO1(i + 3, 3) \
379 PF3(i) \
380 PF3(i + 2) \
381 XO2(i, 0) \
382 XO2(i + 1, 1) \
383 XO2(i + 2, 2) \
384 XO2(i + 3, 3) \
385 PF4(i) \
386 PF4(i + 2) \
387 PF0(i + 4) \
388 PF0(i + 6) \
389 XO3(i, 0) \
390 XO3(i + 1, 1) \
391 XO3(i + 2, 2) \
392 XO3(i + 3, 3) \
393 XO4(i, 0) \
394 XO4(i + 1, 1) \
395 XO4(i + 2, 2) \
396 XO4(i + 3, 3) \
397 ST(i, 0) \
398 ST(i + 1, 1) \
399 ST(i + 2, 2) \
400 ST(i + 3, 3) \
401
402
403 PF0(0)
404 PF0(2)
405
406 " .align 32 ;\n"
407 " 1: ;\n"
408
409 BLOCK(0)
410 BLOCK(4)
411 BLOCK(8)
412 BLOCK(12)
413
414 " add %[inc], %[p1] ;\n"
415 " add %[inc], %[p2] ;\n"
416 " add %[inc], %[p3] ;\n"
417 " add %[inc], %[p4] ;\n"
418 " add %[inc], %[p5] ;\n"
419 " dec %[cnt] ;\n"
420 " jnz 1b ;\n"
421 : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
422 [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
423 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
424 : "memory");
425
426 kernel_fpu_end();
427}
428
429static void
430xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
431 unsigned long *p3, unsigned long *p4, unsigned long *p5)
432{
433 unsigned long lines = bytes >> 8;
434
435 kernel_fpu_begin();
436
437 asm volatile(
438#undef BLOCK
439#define BLOCK(i) \
440 BLK64(PF0, LD, i) \
441 BLK64(PF1, XO1, i) \
442 BLK64(PF2, XO2, i) \
443 BLK64(PF3, XO3, i) \
444 BLK64(PF4, XO4, i) \
445 BLK64(NOP, ST, i) \
446
447 " .align 32 ;\n"
448 " 1: ;\n"
449
450 BLOCK(0)
451 BLOCK(4)
452 BLOCK(8)
453 BLOCK(12)
454
455 " add %[inc], %[p1] ;\n"
456 " add %[inc], %[p2] ;\n"
457 " add %[inc], %[p3] ;\n"
458 " add %[inc], %[p4] ;\n"
459 " add %[inc], %[p5] ;\n"
460 " dec %[cnt] ;\n"
461 " jnz 1b ;\n"
462 : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
463 [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
464 : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
465 : "memory");
466
467 kernel_fpu_end();
468}
469
470static struct xor_block_template xor_block_sse_pf64 = {
471 .name = "prefetch64-sse",
472 .do_2 = xor_sse_2_pf64,
473 .do_3 = xor_sse_3_pf64,
474 .do_4 = xor_sse_4_pf64,
475 .do_5 = xor_sse_5_pf64,
476};
477
478#undef LD
479#undef XO1
480#undef XO2
481#undef XO3
482#undef XO4
483#undef ST
484#undef NOP
485#undef BLK64
486#undef BLOCK
487
488#undef XOR_CONSTANT_CONSTRAINT
489
5#ifdef CONFIG_X86_32 490#ifdef CONFIG_X86_32
6# include <asm/xor_32.h> 491# include <asm/xor_32.h>
7#else 492#else
8# include <asm/xor_64.h> 493# include <asm/xor_64.h>
9#endif 494#endif
10#endif 495
496#define XOR_SELECT_TEMPLATE(FASTEST) \
497 AVX_SELECT(FASTEST)
498
499#endif /* _ASM_X86_XOR_H */
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index f79cb7ec0e06..ce05722e3c68 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -2,7 +2,7 @@
2#define _ASM_X86_XOR_32_H 2#define _ASM_X86_XOR_32_H
3 3
4/* 4/*
5 * Optimized RAID-5 checksumming functions for MMX and SSE. 5 * Optimized RAID-5 checksumming functions for MMX.
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 8 * it under the terms of the GNU General Public License as published by
@@ -529,290 +529,6 @@ static struct xor_block_template xor_block_p5_mmx = {
529 .do_5 = xor_p5_mmx_5, 529 .do_5 = xor_p5_mmx_5,
530}; 530};
531 531
532/*
533 * Cache avoiding checksumming functions utilizing KNI instructions
534 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
535 */
536
537#define OFFS(x) "16*("#x")"
538#define PF_OFFS(x) "256+16*("#x")"
539#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
540#define LD(x, y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n"
541#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n"
542#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n"
543#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n"
544#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n"
545#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n"
546#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n"
547#define XO1(x, y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n"
548#define XO2(x, y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n"
549#define XO3(x, y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n"
550#define XO4(x, y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n"
551#define XO5(x, y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n"
552
553
554static void
555xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
556{
557 unsigned long lines = bytes >> 8;
558
559 kernel_fpu_begin();
560
561 asm volatile(
562#undef BLOCK
563#define BLOCK(i) \
564 LD(i, 0) \
565 LD(i + 1, 1) \
566 PF1(i) \
567 PF1(i + 2) \
568 LD(i + 2, 2) \
569 LD(i + 3, 3) \
570 PF0(i + 4) \
571 PF0(i + 6) \
572 XO1(i, 0) \
573 XO1(i + 1, 1) \
574 XO1(i + 2, 2) \
575 XO1(i + 3, 3) \
576 ST(i, 0) \
577 ST(i + 1, 1) \
578 ST(i + 2, 2) \
579 ST(i + 3, 3) \
580
581
582 PF0(0)
583 PF0(2)
584
585 " .align 32 ;\n"
586 " 1: ;\n"
587
588 BLOCK(0)
589 BLOCK(4)
590 BLOCK(8)
591 BLOCK(12)
592
593 " addl $256, %1 ;\n"
594 " addl $256, %2 ;\n"
595 " decl %0 ;\n"
596 " jnz 1b ;\n"
597 : "+r" (lines),
598 "+r" (p1), "+r" (p2)
599 :
600 : "memory");
601
602 kernel_fpu_end();
603}
604
605static void
606xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
607 unsigned long *p3)
608{
609 unsigned long lines = bytes >> 8;
610
611 kernel_fpu_begin();
612
613 asm volatile(
614#undef BLOCK
615#define BLOCK(i) \
616 PF1(i) \
617 PF1(i + 2) \
618 LD(i,0) \
619 LD(i + 1, 1) \
620 LD(i + 2, 2) \
621 LD(i + 3, 3) \
622 PF2(i) \
623 PF2(i + 2) \
624 PF0(i + 4) \
625 PF0(i + 6) \
626 XO1(i,0) \
627 XO1(i + 1, 1) \
628 XO1(i + 2, 2) \
629 XO1(i + 3, 3) \
630 XO2(i,0) \
631 XO2(i + 1, 1) \
632 XO2(i + 2, 2) \
633 XO2(i + 3, 3) \
634 ST(i,0) \
635 ST(i + 1, 1) \
636 ST(i + 2, 2) \
637 ST(i + 3, 3) \
638
639
640 PF0(0)
641 PF0(2)
642
643 " .align 32 ;\n"
644 " 1: ;\n"
645
646 BLOCK(0)
647 BLOCK(4)
648 BLOCK(8)
649 BLOCK(12)
650
651 " addl $256, %1 ;\n"
652 " addl $256, %2 ;\n"
653 " addl $256, %3 ;\n"
654 " decl %0 ;\n"
655 " jnz 1b ;\n"
656 : "+r" (lines),
657 "+r" (p1), "+r"(p2), "+r"(p3)
658 :
659 : "memory" );
660
661 kernel_fpu_end();
662}
663
664static void
665xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
666 unsigned long *p3, unsigned long *p4)
667{
668 unsigned long lines = bytes >> 8;
669
670 kernel_fpu_begin();
671
672 asm volatile(
673#undef BLOCK
674#define BLOCK(i) \
675 PF1(i) \
676 PF1(i + 2) \
677 LD(i,0) \
678 LD(i + 1, 1) \
679 LD(i + 2, 2) \
680 LD(i + 3, 3) \
681 PF2(i) \
682 PF2(i + 2) \
683 XO1(i,0) \
684 XO1(i + 1, 1) \
685 XO1(i + 2, 2) \
686 XO1(i + 3, 3) \
687 PF3(i) \
688 PF3(i + 2) \
689 PF0(i + 4) \
690 PF0(i + 6) \
691 XO2(i,0) \
692 XO2(i + 1, 1) \
693 XO2(i + 2, 2) \
694 XO2(i + 3, 3) \
695 XO3(i,0) \
696 XO3(i + 1, 1) \
697 XO3(i + 2, 2) \
698 XO3(i + 3, 3) \
699 ST(i,0) \
700 ST(i + 1, 1) \
701 ST(i + 2, 2) \
702 ST(i + 3, 3) \
703
704
705 PF0(0)
706 PF0(2)
707
708 " .align 32 ;\n"
709 " 1: ;\n"
710
711 BLOCK(0)
712 BLOCK(4)
713 BLOCK(8)
714 BLOCK(12)
715
716 " addl $256, %1 ;\n"
717 " addl $256, %2 ;\n"
718 " addl $256, %3 ;\n"
719 " addl $256, %4 ;\n"
720 " decl %0 ;\n"
721 " jnz 1b ;\n"
722 : "+r" (lines),
723 "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
724 :
725 : "memory" );
726
727 kernel_fpu_end();
728}
729
730static void
731xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
732 unsigned long *p3, unsigned long *p4, unsigned long *p5)
733{
734 unsigned long lines = bytes >> 8;
735
736 kernel_fpu_begin();
737
738 /* Make sure GCC forgets anything it knows about p4 or p5,
739 such that it won't pass to the asm volatile below a
740 register that is shared with any other variable. That's
741 because we modify p4 and p5 there, but we can't mark them
742 as read/write, otherwise we'd overflow the 10-asm-operands
743 limit of GCC < 3.1. */
744 asm("" : "+r" (p4), "+r" (p5));
745
746 asm volatile(
747#undef BLOCK
748#define BLOCK(i) \
749 PF1(i) \
750 PF1(i + 2) \
751 LD(i,0) \
752 LD(i + 1, 1) \
753 LD(i + 2, 2) \
754 LD(i + 3, 3) \
755 PF2(i) \
756 PF2(i + 2) \
757 XO1(i,0) \
758 XO1(i + 1, 1) \
759 XO1(i + 2, 2) \
760 XO1(i + 3, 3) \
761 PF3(i) \
762 PF3(i + 2) \
763 XO2(i,0) \
764 XO2(i + 1, 1) \
765 XO2(i + 2, 2) \
766 XO2(i + 3, 3) \
767 PF4(i) \
768 PF4(i + 2) \
769 PF0(i + 4) \
770 PF0(i + 6) \
771 XO3(i,0) \
772 XO3(i + 1, 1) \
773 XO3(i + 2, 2) \
774 XO3(i + 3, 3) \
775 XO4(i,0) \
776 XO4(i + 1, 1) \
777 XO4(i + 2, 2) \
778 XO4(i + 3, 3) \
779 ST(i,0) \
780 ST(i + 1, 1) \
781 ST(i + 2, 2) \
782 ST(i + 3, 3) \
783
784
785 PF0(0)
786 PF0(2)
787
788 " .align 32 ;\n"
789 " 1: ;\n"
790
791 BLOCK(0)
792 BLOCK(4)
793 BLOCK(8)
794 BLOCK(12)
795
796 " addl $256, %1 ;\n"
797 " addl $256, %2 ;\n"
798 " addl $256, %3 ;\n"
799 " addl $256, %4 ;\n"
800 " addl $256, %5 ;\n"
801 " decl %0 ;\n"
802 " jnz 1b ;\n"
803 : "+r" (lines),
804 "+r" (p1), "+r" (p2), "+r" (p3)
805 : "r" (p4), "r" (p5)
806 : "memory");
807
808 /* p4 and p5 were modified, and now the variables are dead.
809 Clobber them just to be sure nobody does something stupid
810 like assuming they have some legal value. */
811 asm("" : "=r" (p4), "=r" (p5));
812
813 kernel_fpu_end();
814}
815
816static struct xor_block_template xor_block_pIII_sse = { 532static struct xor_block_template xor_block_pIII_sse = {
817 .name = "pIII_sse", 533 .name = "pIII_sse",
818 .do_2 = xor_sse_2, 534 .do_2 = xor_sse_2,
@@ -827,26 +543,25 @@ static struct xor_block_template xor_block_pIII_sse = {
827/* Also try the generic routines. */ 543/* Also try the generic routines. */
828#include <asm-generic/xor.h> 544#include <asm-generic/xor.h>
829 545
546/* We force the use of the SSE xor block because it can write around L2.
547 We may also be able to load into the L1 only depending on how the cpu
548 deals with a load to a line that is being prefetched. */
830#undef XOR_TRY_TEMPLATES 549#undef XOR_TRY_TEMPLATES
831#define XOR_TRY_TEMPLATES \ 550#define XOR_TRY_TEMPLATES \
832do { \ 551do { \
833 xor_speed(&xor_block_8regs); \
834 xor_speed(&xor_block_8regs_p); \
835 xor_speed(&xor_block_32regs); \
836 xor_speed(&xor_block_32regs_p); \
837 AVX_XOR_SPEED; \ 552 AVX_XOR_SPEED; \
838 if (cpu_has_xmm) \ 553 if (cpu_has_xmm) { \
839 xor_speed(&xor_block_pIII_sse); \ 554 xor_speed(&xor_block_pIII_sse); \
840 if (cpu_has_mmx) { \ 555 xor_speed(&xor_block_sse_pf64); \
556 } else if (cpu_has_mmx) { \
841 xor_speed(&xor_block_pII_mmx); \ 557 xor_speed(&xor_block_pII_mmx); \
842 xor_speed(&xor_block_p5_mmx); \ 558 xor_speed(&xor_block_p5_mmx); \
559 } else { \
560 xor_speed(&xor_block_8regs); \
561 xor_speed(&xor_block_8regs_p); \
562 xor_speed(&xor_block_32regs); \
563 xor_speed(&xor_block_32regs_p); \
843 } \ 564 } \
844} while (0) 565} while (0)
845 566
846/* We force the use of the SSE xor block because it can write around L2.
847 We may also be able to load into the L1 only depending on how the cpu
848 deals with a load to a line that is being prefetched. */
849#define XOR_SELECT_TEMPLATE(FASTEST) \
850 AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
851
852#endif /* _ASM_X86_XOR_32_H */ 567#endif /* _ASM_X86_XOR_32_H */
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index 87ac522c4af5..546f1e3b87cc 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -1,301 +1,6 @@
1#ifndef _ASM_X86_XOR_64_H 1#ifndef _ASM_X86_XOR_64_H
2#define _ASM_X86_XOR_64_H 2#define _ASM_X86_XOR_64_H
3 3
4/*
5 * Optimized RAID-5 checksumming functions for MMX and SSE.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
10 * any later version.
11 *
12 * You should have received a copy of the GNU General Public License
13 * (for example /usr/src/linux/COPYING); if not, write to the Free
14 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17
18/*
19 * Cache avoiding checksumming functions utilizing KNI instructions
20 * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
21 */
22
23/*
24 * Based on
25 * High-speed RAID5 checksumming functions utilizing SSE instructions.
26 * Copyright (C) 1998 Ingo Molnar.
27 */
28
29/*
30 * x86-64 changes / gcc fixes from Andi Kleen.
31 * Copyright 2002 Andi Kleen, SuSE Labs.
32 *
33 * This hasn't been optimized for the hammer yet, but there are likely
34 * no advantages to be gotten from x86-64 here anyways.
35 */
36
37#include <asm/i387.h>
38
39#define OFFS(x) "16*("#x")"
40#define PF_OFFS(x) "256+16*("#x")"
41#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
42#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
43#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
44#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
45#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
46#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
47#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
48#define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n"
49#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
50#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
51#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
52#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
53#define XO5(x, y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n"
54
55
56static void
57xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
58{
59 unsigned int lines = bytes >> 8;
60
61 kernel_fpu_begin();
62
63 asm volatile(
64#undef BLOCK
65#define BLOCK(i) \
66 LD(i, 0) \
67 LD(i + 1, 1) \
68 PF1(i) \
69 PF1(i + 2) \
70 LD(i + 2, 2) \
71 LD(i + 3, 3) \
72 PF0(i + 4) \
73 PF0(i + 6) \
74 XO1(i, 0) \
75 XO1(i + 1, 1) \
76 XO1(i + 2, 2) \
77 XO1(i + 3, 3) \
78 ST(i, 0) \
79 ST(i + 1, 1) \
80 ST(i + 2, 2) \
81 ST(i + 3, 3) \
82
83
84 PF0(0)
85 PF0(2)
86
87 " .align 32 ;\n"
88 " 1: ;\n"
89
90 BLOCK(0)
91 BLOCK(4)
92 BLOCK(8)
93 BLOCK(12)
94
95 " addq %[inc], %[p1] ;\n"
96 " addq %[inc], %[p2] ;\n"
97 " decl %[cnt] ; jnz 1b"
98 : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines)
99 : [inc] "r" (256UL)
100 : "memory");
101
102 kernel_fpu_end();
103}
104
105static void
106xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
107 unsigned long *p3)
108{
109 unsigned int lines = bytes >> 8;
110
111 kernel_fpu_begin();
112 asm volatile(
113#undef BLOCK
114#define BLOCK(i) \
115 PF1(i) \
116 PF1(i + 2) \
117 LD(i, 0) \
118 LD(i + 1, 1) \
119 LD(i + 2, 2) \
120 LD(i + 3, 3) \
121 PF2(i) \
122 PF2(i + 2) \
123 PF0(i + 4) \
124 PF0(i + 6) \
125 XO1(i, 0) \
126 XO1(i + 1, 1) \
127 XO1(i + 2, 2) \
128 XO1(i + 3, 3) \
129 XO2(i, 0) \
130 XO2(i + 1, 1) \
131 XO2(i + 2, 2) \
132 XO2(i + 3, 3) \
133 ST(i, 0) \
134 ST(i + 1, 1) \
135 ST(i + 2, 2) \
136 ST(i + 3, 3) \
137
138
139 PF0(0)
140 PF0(2)
141
142 " .align 32 ;\n"
143 " 1: ;\n"
144
145 BLOCK(0)
146 BLOCK(4)
147 BLOCK(8)
148 BLOCK(12)
149
150 " addq %[inc], %[p1] ;\n"
151 " addq %[inc], %[p2] ;\n"
152 " addq %[inc], %[p3] ;\n"
153 " decl %[cnt] ; jnz 1b"
154 : [cnt] "+r" (lines),
155 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
156 : [inc] "r" (256UL)
157 : "memory");
158 kernel_fpu_end();
159}
160
161static void
162xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
163 unsigned long *p3, unsigned long *p4)
164{
165 unsigned int lines = bytes >> 8;
166
167 kernel_fpu_begin();
168
169 asm volatile(
170#undef BLOCK
171#define BLOCK(i) \
172 PF1(i) \
173 PF1(i + 2) \
174 LD(i, 0) \
175 LD(i + 1, 1) \
176 LD(i + 2, 2) \
177 LD(i + 3, 3) \
178 PF2(i) \
179 PF2(i + 2) \
180 XO1(i, 0) \
181 XO1(i + 1, 1) \
182 XO1(i + 2, 2) \
183 XO1(i + 3, 3) \
184 PF3(i) \
185 PF3(i + 2) \
186 PF0(i + 4) \
187 PF0(i + 6) \
188 XO2(i, 0) \
189 XO2(i + 1, 1) \
190 XO2(i + 2, 2) \
191 XO2(i + 3, 3) \
192 XO3(i, 0) \
193 XO3(i + 1, 1) \
194 XO3(i + 2, 2) \
195 XO3(i + 3, 3) \
196 ST(i, 0) \
197 ST(i + 1, 1) \
198 ST(i + 2, 2) \
199 ST(i + 3, 3) \
200
201
202 PF0(0)
203 PF0(2)
204
205 " .align 32 ;\n"
206 " 1: ;\n"
207
208 BLOCK(0)
209 BLOCK(4)
210 BLOCK(8)
211 BLOCK(12)
212
213 " addq %[inc], %[p1] ;\n"
214 " addq %[inc], %[p2] ;\n"
215 " addq %[inc], %[p3] ;\n"
216 " addq %[inc], %[p4] ;\n"
217 " decl %[cnt] ; jnz 1b"
218 : [cnt] "+c" (lines),
219 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
220 : [inc] "r" (256UL)
221 : "memory" );
222
223 kernel_fpu_end();
224}
225
226static void
227xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
228 unsigned long *p3, unsigned long *p4, unsigned long *p5)
229{
230 unsigned int lines = bytes >> 8;
231
232 kernel_fpu_begin();
233
234 asm volatile(
235#undef BLOCK
236#define BLOCK(i) \
237 PF1(i) \
238 PF1(i + 2) \
239 LD(i, 0) \
240 LD(i + 1, 1) \
241 LD(i + 2, 2) \
242 LD(i + 3, 3) \
243 PF2(i) \
244 PF2(i + 2) \
245 XO1(i, 0) \
246 XO1(i + 1, 1) \
247 XO1(i + 2, 2) \
248 XO1(i + 3, 3) \
249 PF3(i) \
250 PF3(i + 2) \
251 XO2(i, 0) \
252 XO2(i + 1, 1) \
253 XO2(i + 2, 2) \
254 XO2(i + 3, 3) \
255 PF4(i) \
256 PF4(i + 2) \
257 PF0(i + 4) \
258 PF0(i + 6) \
259 XO3(i, 0) \
260 XO3(i + 1, 1) \
261 XO3(i + 2, 2) \
262 XO3(i + 3, 3) \
263 XO4(i, 0) \
264 XO4(i + 1, 1) \
265 XO4(i + 2, 2) \
266 XO4(i + 3, 3) \
267 ST(i, 0) \
268 ST(i + 1, 1) \
269 ST(i + 2, 2) \
270 ST(i + 3, 3) \
271
272
273 PF0(0)
274 PF0(2)
275
276 " .align 32 ;\n"
277 " 1: ;\n"
278
279 BLOCK(0)
280 BLOCK(4)
281 BLOCK(8)
282 BLOCK(12)
283
284 " addq %[inc], %[p1] ;\n"
285 " addq %[inc], %[p2] ;\n"
286 " addq %[inc], %[p3] ;\n"
287 " addq %[inc], %[p4] ;\n"
288 " addq %[inc], %[p5] ;\n"
289 " decl %[cnt] ; jnz 1b"
290 : [cnt] "+c" (lines),
291 [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
292 [p5] "+r" (p5)
293 : [inc] "r" (256UL)
294 : "memory");
295
296 kernel_fpu_end();
297}
298
299static struct xor_block_template xor_block_sse = { 4static struct xor_block_template xor_block_sse = {
300 .name = "generic_sse", 5 .name = "generic_sse",
301 .do_2 = xor_sse_2, 6 .do_2 = xor_sse_2,
@@ -308,17 +13,15 @@ static struct xor_block_template xor_block_sse = {
308/* Also try the AVX routines */ 13/* Also try the AVX routines */
309#include <asm/xor_avx.h> 14#include <asm/xor_avx.h>
310 15
16/* We force the use of the SSE xor block because it can write around L2.
17 We may also be able to load into the L1 only depending on how the cpu
18 deals with a load to a line that is being prefetched. */
311#undef XOR_TRY_TEMPLATES 19#undef XOR_TRY_TEMPLATES
312#define XOR_TRY_TEMPLATES \ 20#define XOR_TRY_TEMPLATES \
313do { \ 21do { \
314 AVX_XOR_SPEED; \ 22 AVX_XOR_SPEED; \
23 xor_speed(&xor_block_sse_pf64); \
315 xor_speed(&xor_block_sse); \ 24 xor_speed(&xor_block_sse); \
316} while (0) 25} while (0)
317 26
318/* We force the use of the SSE xor block because it can write around L2.
319 We may also be able to load into the L1 only depending on how the cpu
320 deals with a load to a line that is being prefetched. */
321#define XOR_SELECT_TEMPLATE(FASTEST) \
322 AVX_SELECT(&xor_block_sse)
323
324#endif /* _ASM_X86_XOR_64_H */ 27#endif /* _ASM_X86_XOR_64_H */
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 58c829871c31..a0eab85ce7b8 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -4,66 +4,6 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <asm/ioctls.h> 5#include <asm/ioctls.h>
6 6
7/*
8 * Machine Check support for x86
9 */
10
11/* MCG_CAP register defines */
12#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
13#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
14#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
15#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
16#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
17#define MCG_EXT_CNT_SHIFT 16
18#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
19#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
20
21/* MCG_STATUS register defines */
22#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
23#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
24#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
25
26/* MCi_STATUS register defines */
27#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
28#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
29#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
30#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
31#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
32#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
33#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
34#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
35#define MCI_STATUS_AR (1ULL<<55) /* Action required */
36#define MCACOD 0xffff /* MCA Error Code */
37
38/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
39#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
40#define MCACOD_SCRUBMSK 0xfff0
41#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
42#define MCACOD_DATA 0x0134 /* Data Load */
43#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
44
45/* MCi_MISC register defines */
46#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
47#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
48#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
49#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
50#define MCI_MISC_ADDR_PHYS 2 /* physical address */
51#define MCI_MISC_ADDR_MEM 3 /* memory address */
52#define MCI_MISC_ADDR_GENERIC 7 /* generic */
53
54/* CTL2 register defines */
55#define MCI_CTL2_CMCI_EN (1ULL << 30)
56#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
57
58#define MCJ_CTX_MASK 3
59#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
60#define MCJ_CTX_RANDOM 0 /* inject context: random */
61#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
62#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
63#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
64#define MCJ_EXCEPTION 0x8 /* raise as exception */
65#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
66
67/* Fields are zero when not available */ 7/* Fields are zero when not available */
68struct mce { 8struct mce {
69 __u64 status; 9 __u64 status;
@@ -87,35 +27,8 @@ struct mce {
87 __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ 27 __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
88}; 28};
89 29
90/*
91 * This structure contains all data related to the MCE log. Also
92 * carries a signature to make it easier to find from external
93 * debugging tools. Each entry is only valid when its finished flag
94 * is set.
95 */
96
97#define MCE_LOG_LEN 32
98
99struct mce_log {
100 char signature[12]; /* "MACHINECHECK" */
101 unsigned len; /* = MCE_LOG_LEN */
102 unsigned next;
103 unsigned flags;
104 unsigned recordlen; /* length of struct mce */
105 struct mce entry[MCE_LOG_LEN];
106};
107
108#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
109
110#define MCE_LOG_SIGNATURE "MACHINECHECK"
111
112#define MCE_GET_RECORD_LEN _IOR('M', 1, int) 30#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
113#define MCE_GET_LOG_LEN _IOR('M', 2, int) 31#define MCE_GET_LOG_LEN _IOR('M', 2, int)
114#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int) 32#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int)
115 33
116/* Software defined banks */
117#define MCE_EXTENDED_BANK 128
118#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0
119#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
120
121#endif /* _UAPI_ASM_X86_MCE_H */ 34#endif /* _UAPI_ASM_X86_MCE_H */
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 433a59fb1a74..075a40255591 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -194,6 +194,8 @@
194/* Fam 15h MSRs */ 194/* Fam 15h MSRs */
195#define MSR_F15H_PERF_CTL 0xc0010200 195#define MSR_F15H_PERF_CTL 0xc0010200
196#define MSR_F15H_PERF_CTR 0xc0010201 196#define MSR_F15H_PERF_CTR 0xc0010201
197#define MSR_F15H_NB_PERF_CTL 0xc0010240
198#define MSR_F15H_NB_PERF_CTR 0xc0010241
197 199
198/* Fam 10h MSRs */ 200/* Fam 10h MSRs */
199#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 201#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34e923a53762..ac3b3d002833 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -65,8 +65,7 @@ obj-$(CONFIG_X86_TSC) += trace_clock.o
65obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 65obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
66obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 66obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
67obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 67obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
68obj-$(CONFIG_KPROBES) += kprobes.o 68obj-y += kprobes/
69obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
70obj-$(CONFIG_MODULES) += module.o 69obj-$(CONFIG_MODULES) += module.o
71obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o 70obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
72obj-$(CONFIG_KGDB) += kgdb.o 71obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index afdc3f756dea..c9876efecafb 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -240,7 +240,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
240 dw_apb_clockevent_pause(adev->timer); 240 dw_apb_clockevent_pause(adev->timer);
241 if (system_state == SYSTEM_RUNNING) { 241 if (system_state == SYSTEM_RUNNING) {
242 pr_debug("skipping APBT CPU %lu offline\n", cpu); 242 pr_debug("skipping APBT CPU %lu offline\n", cpu);
243 } else if (adev) { 243 } else {
244 pr_debug("APBT clockevent for cpu %lu offline\n", cpu); 244 pr_debug("APBT clockevent for cpu %lu offline\n", cpu);
245 dw_apb_clockevent_stop(adev->timer); 245 dw_apb_clockevent_stop(adev->timer);
246 } 246 }
@@ -311,7 +311,6 @@ void __init apbt_time_init(void)
311#ifdef CONFIG_SMP 311#ifdef CONFIG_SMP
312 int i; 312 int i;
313 struct sfi_timer_table_entry *p_mtmr; 313 struct sfi_timer_table_entry *p_mtmr;
314 unsigned int percpu_timer;
315 struct apbt_dev *adev; 314 struct apbt_dev *adev;
316#endif 315#endif
317 316
@@ -346,13 +345,10 @@ void __init apbt_time_init(void)
346 return; 345 return;
347 } 346 }
348 pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus()); 347 pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus());
349 if (num_possible_cpus() <= sfi_mtimer_num) { 348 if (num_possible_cpus() <= sfi_mtimer_num)
350 percpu_timer = 1;
351 apbt_num_timers_used = num_possible_cpus(); 349 apbt_num_timers_used = num_possible_cpus();
352 } else { 350 else
353 percpu_timer = 0;
354 apbt_num_timers_used = 1; 351 apbt_num_timers_used = 1;
355 }
356 pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); 352 pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used);
357 353
358 /* here we set up per CPU timer data structure */ 354 /* here we set up per CPU timer data structure */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b994cc84aa7e..a5b4dce1b7ac 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1477,8 +1477,7 @@ void __init bsp_end_local_APIC_setup(void)
1477 * Now that local APIC setup is completed for BP, configure the fault 1477 * Now that local APIC setup is completed for BP, configure the fault
1478 * handling for interrupt remapping. 1478 * handling for interrupt remapping.
1479 */ 1479 */
1480 if (irq_remapping_enabled) 1480 irq_remap_enable_fault_handling();
1481 irq_remap_enable_fault_handling();
1482 1481
1483} 1482}
1484 1483
@@ -2251,8 +2250,7 @@ static int lapic_suspend(void)
2251 local_irq_save(flags); 2250 local_irq_save(flags);
2252 disable_local_APIC(); 2251 disable_local_APIC();
2253 2252
2254 if (irq_remapping_enabled) 2253 irq_remapping_disable();
2255 irq_remapping_disable();
2256 2254
2257 local_irq_restore(flags); 2255 local_irq_restore(flags);
2258 return 0; 2256 return 0;
@@ -2268,16 +2266,15 @@ static void lapic_resume(void)
2268 return; 2266 return;
2269 2267
2270 local_irq_save(flags); 2268 local_irq_save(flags);
2271 if (irq_remapping_enabled) { 2269
2272 /* 2270 /*
2273 * IO-APIC and PIC have their own resume routines. 2271 * IO-APIC and PIC have their own resume routines.
2274 * We just mask them here to make sure the interrupt 2272 * We just mask them here to make sure the interrupt
2275 * subsystem is completely quiet while we enable x2apic 2273 * subsystem is completely quiet while we enable x2apic
2276 * and interrupt-remapping. 2274 * and interrupt-remapping.
2277 */ 2275 */
2278 mask_ioapic_entries(); 2276 mask_ioapic_entries();
2279 legacy_pic->mask_all(); 2277 legacy_pic->mask_all();
2280 }
2281 2278
2282 if (x2apic_mode) 2279 if (x2apic_mode)
2283 enable_x2apic(); 2280 enable_x2apic();
@@ -2320,8 +2317,7 @@ static void lapic_resume(void)
2320 apic_write(APIC_ESR, 0); 2317 apic_write(APIC_ESR, 0);
2321 apic_read(APIC_ESR); 2318 apic_read(APIC_ESR);
2322 2319
2323 if (irq_remapping_enabled) 2320 irq_remapping_reenable(x2apic_mode);
2324 irq_remapping_reenable(x2apic_mode);
2325 2321
2326 local_irq_restore(flags); 2322 local_irq_restore(flags);
2327} 2323}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b739d398bb29..9ed796ccc32c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -68,22 +68,6 @@
68#define for_each_irq_pin(entry, head) \ 68#define for_each_irq_pin(entry, head) \
69 for (entry = head; entry; entry = entry->next) 69 for (entry = head; entry; entry = entry->next)
70 70
71#ifdef CONFIG_IRQ_REMAP
72static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
73static inline bool irq_remapped(struct irq_cfg *cfg)
74{
75 return cfg->irq_2_iommu.iommu != NULL;
76}
77#else
78static inline bool irq_remapped(struct irq_cfg *cfg)
79{
80 return false;
81}
82static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
83{
84}
85#endif
86
87/* 71/*
88 * Is the SiS APIC rmw bug present ? 72 * Is the SiS APIC rmw bug present ?
89 * -1 = don't know, 0 = no, 1 = yes 73 * -1 = don't know, 0 = no, 1 = yes
@@ -300,9 +284,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
300 return cfg; 284 return cfg;
301} 285}
302 286
303static int alloc_irq_from(unsigned int from, int node) 287static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
304{ 288{
305 return irq_alloc_desc_from(from, node); 289 return irq_alloc_descs_from(from, count, node);
306} 290}
307 291
308static void free_irq_at(unsigned int at, struct irq_cfg *cfg) 292static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
@@ -326,7 +310,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
326 + (mpc_ioapic_addr(idx) & ~PAGE_MASK); 310 + (mpc_ioapic_addr(idx) & ~PAGE_MASK);
327} 311}
328 312
329static inline void io_apic_eoi(unsigned int apic, unsigned int vector) 313void io_apic_eoi(unsigned int apic, unsigned int vector)
330{ 314{
331 struct io_apic __iomem *io_apic = io_apic_base(apic); 315 struct io_apic __iomem *io_apic = io_apic_base(apic);
332 writel(vector, &io_apic->eoi); 316 writel(vector, &io_apic->eoi);
@@ -573,19 +557,10 @@ static void unmask_ioapic_irq(struct irq_data *data)
573 * Otherwise, we simulate the EOI message manually by changing the trigger 557 * Otherwise, we simulate the EOI message manually by changing the trigger
574 * mode to edge and then back to level, with RTE being masked during this. 558 * mode to edge and then back to level, with RTE being masked during this.
575 */ 559 */
576static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) 560void native_eoi_ioapic_pin(int apic, int pin, int vector)
577{ 561{
578 if (mpc_ioapic_ver(apic) >= 0x20) { 562 if (mpc_ioapic_ver(apic) >= 0x20) {
579 /* 563 io_apic_eoi(apic, vector);
580 * Intr-remapping uses pin number as the virtual vector
581 * in the RTE. Actual vector is programmed in
582 * intr-remapping table entry. Hence for the io-apic
583 * EOI we use the pin number.
584 */
585 if (cfg && irq_remapped(cfg))
586 io_apic_eoi(apic, pin);
587 else
588 io_apic_eoi(apic, vector);
589 } else { 564 } else {
590 struct IO_APIC_route_entry entry, entry1; 565 struct IO_APIC_route_entry entry, entry1;
591 566
@@ -606,14 +581,15 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
606 } 581 }
607} 582}
608 583
609static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) 584void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
610{ 585{
611 struct irq_pin_list *entry; 586 struct irq_pin_list *entry;
612 unsigned long flags; 587 unsigned long flags;
613 588
614 raw_spin_lock_irqsave(&ioapic_lock, flags); 589 raw_spin_lock_irqsave(&ioapic_lock, flags);
615 for_each_irq_pin(entry, cfg->irq_2_pin) 590 for_each_irq_pin(entry, cfg->irq_2_pin)
616 __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg); 591 x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin,
592 cfg->vector);
617 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 593 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
618} 594}
619 595
@@ -650,7 +626,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
650 } 626 }
651 627
652 raw_spin_lock_irqsave(&ioapic_lock, flags); 628 raw_spin_lock_irqsave(&ioapic_lock, flags);
653 __eoi_ioapic_pin(apic, pin, entry.vector, NULL); 629 x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector);
654 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 630 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
655 } 631 }
656 632
@@ -1304,25 +1280,18 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
1304 fasteoi = false; 1280 fasteoi = false;
1305 } 1281 }
1306 1282
1307 if (irq_remapped(cfg)) { 1283 if (setup_remapped_irq(irq, cfg, chip))
1308 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
1309 irq_remap_modify_chip_defaults(chip);
1310 fasteoi = trigger != 0; 1284 fasteoi = trigger != 0;
1311 }
1312 1285
1313 hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; 1286 hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
1314 irq_set_chip_and_handler_name(irq, chip, hdl, 1287 irq_set_chip_and_handler_name(irq, chip, hdl,
1315 fasteoi ? "fasteoi" : "edge"); 1288 fasteoi ? "fasteoi" : "edge");
1316} 1289}
1317 1290
1318static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, 1291int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
1319 unsigned int destination, int vector, 1292 unsigned int destination, int vector,
1320 struct io_apic_irq_attr *attr) 1293 struct io_apic_irq_attr *attr)
1321{ 1294{
1322 if (irq_remapping_enabled)
1323 return setup_ioapic_remapped_entry(irq, entry, destination,
1324 vector, attr);
1325
1326 memset(entry, 0, sizeof(*entry)); 1295 memset(entry, 0, sizeof(*entry));
1327 1296
1328 entry->delivery_mode = apic->irq_delivery_mode; 1297 entry->delivery_mode = apic->irq_delivery_mode;
@@ -1370,8 +1339,8 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
1370 attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, 1339 attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
1371 cfg->vector, irq, attr->trigger, attr->polarity, dest); 1340 cfg->vector, irq, attr->trigger, attr->polarity, dest);
1372 1341
1373 if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { 1342 if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
1374 pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1343 pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1375 mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); 1344 mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
1376 __clear_irq_vector(irq, cfg); 1345 __clear_irq_vector(irq, cfg);
1377 1346
@@ -1479,9 +1448,6 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1479 struct IO_APIC_route_entry entry; 1448 struct IO_APIC_route_entry entry;
1480 unsigned int dest; 1449 unsigned int dest;
1481 1450
1482 if (irq_remapping_enabled)
1483 return;
1484
1485 memset(&entry, 0, sizeof(entry)); 1451 memset(&entry, 0, sizeof(entry));
1486 1452
1487 /* 1453 /*
@@ -1513,9 +1479,63 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1513 ioapic_write_entry(ioapic_idx, pin, entry); 1479 ioapic_write_entry(ioapic_idx, pin, entry);
1514} 1480}
1515 1481
1516__apicdebuginit(void) print_IO_APIC(int ioapic_idx) 1482void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
1517{ 1483{
1518 int i; 1484 int i;
1485
1486 pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n");
1487
1488 for (i = 0; i <= nr_entries; i++) {
1489 struct IO_APIC_route_entry entry;
1490
1491 entry = ioapic_read_entry(apic, i);
1492
1493 pr_debug(" %02x %02X ", i, entry.dest);
1494 pr_cont("%1d %1d %1d %1d %1d "
1495 "%1d %1d %02X\n",
1496 entry.mask,
1497 entry.trigger,
1498 entry.irr,
1499 entry.polarity,
1500 entry.delivery_status,
1501 entry.dest_mode,
1502 entry.delivery_mode,
1503 entry.vector);
1504 }
1505}
1506
1507void intel_ir_io_apic_print_entries(unsigned int apic,
1508 unsigned int nr_entries)
1509{
1510 int i;
1511
1512 pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n");
1513
1514 for (i = 0; i <= nr_entries; i++) {
1515 struct IR_IO_APIC_route_entry *ir_entry;
1516 struct IO_APIC_route_entry entry;
1517
1518 entry = ioapic_read_entry(apic, i);
1519
1520 ir_entry = (struct IR_IO_APIC_route_entry *)&entry;
1521
1522 pr_debug(" %02x %04X ", i, ir_entry->index);
1523 pr_cont("%1d %1d %1d %1d %1d "
1524 "%1d %1d %X %02X\n",
1525 ir_entry->format,
1526 ir_entry->mask,
1527 ir_entry->trigger,
1528 ir_entry->irr,
1529 ir_entry->polarity,
1530 ir_entry->delivery_status,
1531 ir_entry->index2,
1532 ir_entry->zero,
1533 ir_entry->vector);
1534 }
1535}
1536
1537__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1538{
1519 union IO_APIC_reg_00 reg_00; 1539 union IO_APIC_reg_00 reg_00;
1520 union IO_APIC_reg_01 reg_01; 1540 union IO_APIC_reg_01 reg_01;
1521 union IO_APIC_reg_02 reg_02; 1541 union IO_APIC_reg_02 reg_02;
@@ -1568,58 +1588,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1568 1588
1569 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1589 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1570 1590
1571 if (irq_remapping_enabled) { 1591 x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
1572 printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
1573 " Pol Stat Indx2 Zero Vect:\n");
1574 } else {
1575 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1576 " Stat Dmod Deli Vect:\n");
1577 }
1578
1579 for (i = 0; i <= reg_01.bits.entries; i++) {
1580 if (irq_remapping_enabled) {
1581 struct IO_APIC_route_entry entry;
1582 struct IR_IO_APIC_route_entry *ir_entry;
1583
1584 entry = ioapic_read_entry(ioapic_idx, i);
1585 ir_entry = (struct IR_IO_APIC_route_entry *) &entry;
1586 printk(KERN_DEBUG " %02x %04X ",
1587 i,
1588 ir_entry->index
1589 );
1590 pr_cont("%1d %1d %1d %1d %1d "
1591 "%1d %1d %X %02X\n",
1592 ir_entry->format,
1593 ir_entry->mask,
1594 ir_entry->trigger,
1595 ir_entry->irr,
1596 ir_entry->polarity,
1597 ir_entry->delivery_status,
1598 ir_entry->index2,
1599 ir_entry->zero,
1600 ir_entry->vector
1601 );
1602 } else {
1603 struct IO_APIC_route_entry entry;
1604
1605 entry = ioapic_read_entry(ioapic_idx, i);
1606 printk(KERN_DEBUG " %02x %02X ",
1607 i,
1608 entry.dest
1609 );
1610 pr_cont("%1d %1d %1d %1d %1d "
1611 "%1d %1d %02X\n",
1612 entry.mask,
1613 entry.trigger,
1614 entry.irr,
1615 entry.polarity,
1616 entry.delivery_status,
1617 entry.dest_mode,
1618 entry.delivery_mode,
1619 entry.vector
1620 );
1621 }
1622 }
1623} 1592}
1624 1593
1625__apicdebuginit(void) print_IO_APICs(void) 1594__apicdebuginit(void) print_IO_APICs(void)
@@ -1921,30 +1890,14 @@ void __init enable_IO_APIC(void)
1921 clear_IO_APIC(); 1890 clear_IO_APIC();
1922} 1891}
1923 1892
1924/* 1893void native_disable_io_apic(void)
1925 * Not an __init, needed by the reboot code
1926 */
1927void disable_IO_APIC(void)
1928{ 1894{
1929 /* 1895 /*
1930 * Clear the IO-APIC before rebooting:
1931 */
1932 clear_IO_APIC();
1933
1934 if (!legacy_pic->nr_legacy_irqs)
1935 return;
1936
1937 /*
1938 * If the i8259 is routed through an IOAPIC 1896 * If the i8259 is routed through an IOAPIC
1939 * Put that IOAPIC in virtual wire mode 1897 * Put that IOAPIC in virtual wire mode
1940 * so legacy interrupts can be delivered. 1898 * so legacy interrupts can be delivered.
1941 *
1942 * With interrupt-remapping, for now we will use virtual wire A mode,
1943 * as virtual wire B is little complex (need to configure both
1944 * IOAPIC RTE as well as interrupt-remapping table entry).
1945 * As this gets called during crash dump, keep this simple for now.
1946 */ 1899 */
1947 if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) { 1900 if (ioapic_i8259.pin != -1) {
1948 struct IO_APIC_route_entry entry; 1901 struct IO_APIC_route_entry entry;
1949 1902
1950 memset(&entry, 0, sizeof(entry)); 1903 memset(&entry, 0, sizeof(entry));
@@ -1964,12 +1917,25 @@ void disable_IO_APIC(void)
1964 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); 1917 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1965 } 1918 }
1966 1919
1920 if (cpu_has_apic || apic_from_smp_config())
1921 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1922
1923}
1924
1925/*
1926 * Not an __init, needed by the reboot code
1927 */
1928void disable_IO_APIC(void)
1929{
1967 /* 1930 /*
1968 * Use virtual wire A mode when interrupt remapping is enabled. 1931 * Clear the IO-APIC before rebooting:
1969 */ 1932 */
1970 if (cpu_has_apic || apic_from_smp_config()) 1933 clear_IO_APIC();
1971 disconnect_bsp_APIC(!irq_remapping_enabled && 1934
1972 ioapic_i8259.pin != -1); 1935 if (!legacy_pic->nr_legacy_irqs)
1936 return;
1937
1938 x86_io_apic_ops.disable();
1973} 1939}
1974 1940
1975#ifdef CONFIG_X86_32 1941#ifdef CONFIG_X86_32
@@ -2322,12 +2288,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2322 2288
2323 apic = entry->apic; 2289 apic = entry->apic;
2324 pin = entry->pin; 2290 pin = entry->pin;
2325 /* 2291
2326 * With interrupt-remapping, destination information comes 2292 io_apic_write(apic, 0x11 + pin*2, dest);
2327 * from interrupt-remapping table entry.
2328 */
2329 if (!irq_remapped(cfg))
2330 io_apic_write(apic, 0x11 + pin*2, dest);
2331 reg = io_apic_read(apic, 0x10 + pin*2); 2293 reg = io_apic_read(apic, 0x10 + pin*2);
2332 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 2294 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
2333 reg |= vector; 2295 reg |= vector;
@@ -2369,9 +2331,10 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2369 return 0; 2331 return 0;
2370} 2332}
2371 2333
2372static int 2334
2373ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, 2335int native_ioapic_set_affinity(struct irq_data *data,
2374 bool force) 2336 const struct cpumask *mask,
2337 bool force)
2375{ 2338{
2376 unsigned int dest, irq = data->irq; 2339 unsigned int dest, irq = data->irq;
2377 unsigned long flags; 2340 unsigned long flags;
@@ -2548,33 +2511,6 @@ static void ack_apic_level(struct irq_data *data)
2548 ioapic_irqd_unmask(data, cfg, masked); 2511 ioapic_irqd_unmask(data, cfg, masked);
2549} 2512}
2550 2513
2551#ifdef CONFIG_IRQ_REMAP
2552static void ir_ack_apic_edge(struct irq_data *data)
2553{
2554 ack_APIC_irq();
2555}
2556
2557static void ir_ack_apic_level(struct irq_data *data)
2558{
2559 ack_APIC_irq();
2560 eoi_ioapic_irq(data->irq, data->chip_data);
2561}
2562
2563static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
2564{
2565 seq_printf(p, " IR-%s", data->chip->name);
2566}
2567
2568static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
2569{
2570 chip->irq_print_chip = ir_print_prefix;
2571 chip->irq_ack = ir_ack_apic_edge;
2572 chip->irq_eoi = ir_ack_apic_level;
2573
2574 chip->irq_set_affinity = set_remapped_irq_affinity;
2575}
2576#endif /* CONFIG_IRQ_REMAP */
2577
2578static struct irq_chip ioapic_chip __read_mostly = { 2514static struct irq_chip ioapic_chip __read_mostly = {
2579 .name = "IO-APIC", 2515 .name = "IO-APIC",
2580 .irq_startup = startup_ioapic_irq, 2516 .irq_startup = startup_ioapic_irq,
@@ -2582,7 +2518,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
2582 .irq_unmask = unmask_ioapic_irq, 2518 .irq_unmask = unmask_ioapic_irq,
2583 .irq_ack = ack_apic_edge, 2519 .irq_ack = ack_apic_edge,
2584 .irq_eoi = ack_apic_level, 2520 .irq_eoi = ack_apic_level,
2585 .irq_set_affinity = ioapic_set_affinity, 2521 .irq_set_affinity = native_ioapic_set_affinity,
2586 .irq_retrigger = ioapic_retrigger_irq, 2522 .irq_retrigger = ioapic_retrigger_irq,
2587}; 2523};
2588 2524
@@ -2781,8 +2717,7 @@ static inline void __init check_timer(void)
2781 * 8259A. 2717 * 8259A.
2782 */ 2718 */
2783 if (pin1 == -1) { 2719 if (pin1 == -1) {
2784 if (irq_remapping_enabled) 2720 panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
2785 panic("BIOS bug: timer not connected to IO-APIC");
2786 pin1 = pin2; 2721 pin1 = pin2;
2787 apic1 = apic2; 2722 apic1 = apic2;
2788 no_pin1 = 1; 2723 no_pin1 = 1;
@@ -2814,8 +2749,7 @@ static inline void __init check_timer(void)
2814 clear_IO_APIC_pin(0, pin1); 2749 clear_IO_APIC_pin(0, pin1);
2815 goto out; 2750 goto out;
2816 } 2751 }
2817 if (irq_remapping_enabled) 2752 panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
2818 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2819 local_irq_disable(); 2753 local_irq_disable();
2820 clear_IO_APIC_pin(apic1, pin1); 2754 clear_IO_APIC_pin(apic1, pin1);
2821 if (!no_pin1) 2755 if (!no_pin1)
@@ -2982,37 +2916,58 @@ device_initcall(ioapic_init_ops);
2982/* 2916/*
2983 * Dynamic irq allocate and deallocation 2917 * Dynamic irq allocate and deallocation
2984 */ 2918 */
2985unsigned int create_irq_nr(unsigned int from, int node) 2919unsigned int __create_irqs(unsigned int from, unsigned int count, int node)
2986{ 2920{
2987 struct irq_cfg *cfg; 2921 struct irq_cfg **cfg;
2988 unsigned long flags; 2922 unsigned long flags;
2989 unsigned int ret = 0; 2923 int irq, i;
2990 int irq;
2991 2924
2992 if (from < nr_irqs_gsi) 2925 if (from < nr_irqs_gsi)
2993 from = nr_irqs_gsi; 2926 from = nr_irqs_gsi;
2994 2927
2995 irq = alloc_irq_from(from, node); 2928 cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
2996 if (irq < 0) 2929 if (!cfg)
2997 return 0;
2998 cfg = alloc_irq_cfg(irq, node);
2999 if (!cfg) {
3000 free_irq_at(irq, NULL);
3001 return 0; 2930 return 0;
2931
2932 irq = alloc_irqs_from(from, count, node);
2933 if (irq < 0)
2934 goto out_cfgs;
2935
2936 for (i = 0; i < count; i++) {
2937 cfg[i] = alloc_irq_cfg(irq + i, node);
2938 if (!cfg[i])
2939 goto out_irqs;
3002 } 2940 }
3003 2941
3004 raw_spin_lock_irqsave(&vector_lock, flags); 2942 raw_spin_lock_irqsave(&vector_lock, flags);
3005 if (!__assign_irq_vector(irq, cfg, apic->target_cpus())) 2943 for (i = 0; i < count; i++)
3006 ret = irq; 2944 if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
2945 goto out_vecs;
3007 raw_spin_unlock_irqrestore(&vector_lock, flags); 2946 raw_spin_unlock_irqrestore(&vector_lock, flags);
3008 2947
3009 if (ret) { 2948 for (i = 0; i < count; i++) {
3010 irq_set_chip_data(irq, cfg); 2949 irq_set_chip_data(irq + i, cfg[i]);
3011 irq_clear_status_flags(irq, IRQ_NOREQUEST); 2950 irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
3012 } else {
3013 free_irq_at(irq, cfg);
3014 } 2951 }
3015 return ret; 2952
2953 kfree(cfg);
2954 return irq;
2955
2956out_vecs:
2957 for (i--; i >= 0; i--)
2958 __clear_irq_vector(irq + i, cfg[i]);
2959 raw_spin_unlock_irqrestore(&vector_lock, flags);
2960out_irqs:
2961 for (i = 0; i < count; i++)
2962 free_irq_at(irq + i, cfg[i]);
2963out_cfgs:
2964 kfree(cfg);
2965 return 0;
2966}
2967
2968unsigned int create_irq_nr(unsigned int from, int node)
2969{
2970 return __create_irqs(from, 1, node);
3016} 2971}
3017 2972
3018int create_irq(void) 2973int create_irq(void)
@@ -3037,48 +2992,35 @@ void destroy_irq(unsigned int irq)
3037 2992
3038 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); 2993 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
3039 2994
3040 if (irq_remapped(cfg)) 2995 free_remapped_irq(irq);
3041 free_remapped_irq(irq); 2996
3042 raw_spin_lock_irqsave(&vector_lock, flags); 2997 raw_spin_lock_irqsave(&vector_lock, flags);
3043 __clear_irq_vector(irq, cfg); 2998 __clear_irq_vector(irq, cfg);
3044 raw_spin_unlock_irqrestore(&vector_lock, flags); 2999 raw_spin_unlock_irqrestore(&vector_lock, flags);
3045 free_irq_at(irq, cfg); 3000 free_irq_at(irq, cfg);
3046} 3001}
3047 3002
3003void destroy_irqs(unsigned int irq, unsigned int count)
3004{
3005 unsigned int i;
3006
3007 for (i = 0; i < count; i++)
3008 destroy_irq(irq + i);
3009}
3010
3048/* 3011/*
3049 * MSI message composition 3012 * MSI message composition
3050 */ 3013 */
3051#ifdef CONFIG_PCI_MSI 3014void native_compose_msi_msg(struct pci_dev *pdev,
3052static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, 3015 unsigned int irq, unsigned int dest,
3053 struct msi_msg *msg, u8 hpet_id) 3016 struct msi_msg *msg, u8 hpet_id)
3054{ 3017{
3055 struct irq_cfg *cfg; 3018 struct irq_cfg *cfg = irq_cfg(irq);
3056 int err;
3057 unsigned dest;
3058
3059 if (disable_apic)
3060 return -ENXIO;
3061
3062 cfg = irq_cfg(irq);
3063 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3064 if (err)
3065 return err;
3066 3019
3067 err = apic->cpu_mask_to_apicid_and(cfg->domain, 3020 msg->address_hi = MSI_ADDR_BASE_HI;
3068 apic->target_cpus(), &dest);
3069 if (err)
3070 return err;
3071
3072 if (irq_remapped(cfg)) {
3073 compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
3074 return err;
3075 }
3076 3021
3077 if (x2apic_enabled()) 3022 if (x2apic_enabled())
3078 msg->address_hi = MSI_ADDR_BASE_HI | 3023 msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
3079 MSI_ADDR_EXT_DEST_ID(dest);
3080 else
3081 msg->address_hi = MSI_ADDR_BASE_HI;
3082 3024
3083 msg->address_lo = 3025 msg->address_lo =
3084 MSI_ADDR_BASE_LO | 3026 MSI_ADDR_BASE_LO |
@@ -3097,8 +3039,32 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3097 MSI_DATA_DELIVERY_FIXED: 3039 MSI_DATA_DELIVERY_FIXED:
3098 MSI_DATA_DELIVERY_LOWPRI) | 3040 MSI_DATA_DELIVERY_LOWPRI) |
3099 MSI_DATA_VECTOR(cfg->vector); 3041 MSI_DATA_VECTOR(cfg->vector);
3042}
3100 3043
3101 return err; 3044#ifdef CONFIG_PCI_MSI
3045static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3046 struct msi_msg *msg, u8 hpet_id)
3047{
3048 struct irq_cfg *cfg;
3049 int err;
3050 unsigned dest;
3051
3052 if (disable_apic)
3053 return -ENXIO;
3054
3055 cfg = irq_cfg(irq);
3056 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3057 if (err)
3058 return err;
3059
3060 err = apic->cpu_mask_to_apicid_and(cfg->domain,
3061 apic->target_cpus(), &dest);
3062 if (err)
3063 return err;
3064
3065 x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
3066
3067 return 0;
3102} 3068}
3103 3069
3104static int 3070static int
@@ -3136,23 +3102,28 @@ static struct irq_chip msi_chip = {
3136 .irq_retrigger = ioapic_retrigger_irq, 3102 .irq_retrigger = ioapic_retrigger_irq,
3137}; 3103};
3138 3104
3139static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3105int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
3106 unsigned int irq_base, unsigned int irq_offset)
3140{ 3107{
3141 struct irq_chip *chip = &msi_chip; 3108 struct irq_chip *chip = &msi_chip;
3142 struct msi_msg msg; 3109 struct msi_msg msg;
3110 unsigned int irq = irq_base + irq_offset;
3143 int ret; 3111 int ret;
3144 3112
3145 ret = msi_compose_msg(dev, irq, &msg, -1); 3113 ret = msi_compose_msg(dev, irq, &msg, -1);
3146 if (ret < 0) 3114 if (ret < 0)
3147 return ret; 3115 return ret;
3148 3116
3149 irq_set_msi_desc(irq, msidesc); 3117 irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
3150 write_msi_msg(irq, &msg);
3151 3118
3152 if (irq_remapped(irq_get_chip_data(irq))) { 3119 /*
3153 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3120 * MSI-X message is written per-IRQ, the offset is always 0.
3154 irq_remap_modify_chip_defaults(chip); 3121 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
3155 } 3122 */
3123 if (!irq_offset)
3124 write_msi_msg(irq, &msg);
3125
3126 setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
3156 3127
3157 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); 3128 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3158 3129
@@ -3163,46 +3134,26 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3163 3134
3164int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3135int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3165{ 3136{
3166 int node, ret, sub_handle, index = 0;
3167 unsigned int irq, irq_want; 3137 unsigned int irq, irq_want;
3168 struct msi_desc *msidesc; 3138 struct msi_desc *msidesc;
3139 int node, ret;
3169 3140
3170 /* x86 doesn't support multiple MSI yet */ 3141 /* Multiple MSI vectors only supported with interrupt remapping */
3171 if (type == PCI_CAP_ID_MSI && nvec > 1) 3142 if (type == PCI_CAP_ID_MSI && nvec > 1)
3172 return 1; 3143 return 1;
3173 3144
3174 node = dev_to_node(&dev->dev); 3145 node = dev_to_node(&dev->dev);
3175 irq_want = nr_irqs_gsi; 3146 irq_want = nr_irqs_gsi;
3176 sub_handle = 0;
3177 list_for_each_entry(msidesc, &dev->msi_list, list) { 3147 list_for_each_entry(msidesc, &dev->msi_list, list) {
3178 irq = create_irq_nr(irq_want, node); 3148 irq = create_irq_nr(irq_want, node);
3179 if (irq == 0) 3149 if (irq == 0)
3180 return -1; 3150 return -ENOSPC;
3151
3181 irq_want = irq + 1; 3152 irq_want = irq + 1;
3182 if (!irq_remapping_enabled)
3183 goto no_ir;
3184 3153
3185 if (!sub_handle) { 3154 ret = setup_msi_irq(dev, msidesc, irq, 0);
3186 /*
3187 * allocate the consecutive block of IRTE's
3188 * for 'nvec'
3189 */
3190 index = msi_alloc_remapped_irq(dev, irq, nvec);
3191 if (index < 0) {
3192 ret = index;
3193 goto error;
3194 }
3195 } else {
3196 ret = msi_setup_remapped_irq(dev, irq, index,
3197 sub_handle);
3198 if (ret < 0)
3199 goto error;
3200 }
3201no_ir:
3202 ret = setup_msi_irq(dev, msidesc, irq);
3203 if (ret < 0) 3155 if (ret < 0)
3204 goto error; 3156 goto error;
3205 sub_handle++;
3206 } 3157 }
3207 return 0; 3158 return 0;
3208 3159
@@ -3298,26 +3249,19 @@ static struct irq_chip hpet_msi_type = {
3298 .irq_retrigger = ioapic_retrigger_irq, 3249 .irq_retrigger = ioapic_retrigger_irq,
3299}; 3250};
3300 3251
3301int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 3252int default_setup_hpet_msi(unsigned int irq, unsigned int id)
3302{ 3253{
3303 struct irq_chip *chip = &hpet_msi_type; 3254 struct irq_chip *chip = &hpet_msi_type;
3304 struct msi_msg msg; 3255 struct msi_msg msg;
3305 int ret; 3256 int ret;
3306 3257
3307 if (irq_remapping_enabled) {
3308 ret = setup_hpet_msi_remapped(irq, id);
3309 if (ret)
3310 return ret;
3311 }
3312
3313 ret = msi_compose_msg(NULL, irq, &msg, id); 3258 ret = msi_compose_msg(NULL, irq, &msg, id);
3314 if (ret < 0) 3259 if (ret < 0)
3315 return ret; 3260 return ret;
3316 3261
3317 hpet_msi_write(irq_get_handler_data(irq), &msg); 3262 hpet_msi_write(irq_get_handler_data(irq), &msg);
3318 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3263 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3319 if (irq_remapped(irq_get_chip_data(irq))) 3264 setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
3320 irq_remap_modify_chip_defaults(chip);
3321 3265
3322 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); 3266 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3323 return 0; 3267 return 0;
@@ -3683,10 +3627,7 @@ void __init setup_ioapic_dest(void)
3683 else 3627 else
3684 mask = apic->target_cpus(); 3628 mask = apic->target_cpus();
3685 3629
3686 if (irq_remapping_enabled) 3630 x86_io_apic_ops.set_affinity(idata, mask, false);
3687 set_remapped_irq_affinity(idata, mask, false);
3688 else
3689 ioapic_set_affinity(idata, mask, false);
3690 } 3631 }
3691 3632
3692} 3633}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index cce91bf26676..7434d8556d09 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -106,7 +106,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
106 unsigned long mask = cpumask_bits(cpumask)[0]; 106 unsigned long mask = cpumask_bits(cpumask)[0];
107 unsigned long flags; 107 unsigned long flags;
108 108
109 if (WARN_ONCE(!mask, "empty IPI mask")) 109 if (!mask)
110 return; 110 return;
111 111
112 local_irq_save(flags); 112 local_irq_save(flags);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index e03a1e180e81..562a76d433c8 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -20,18 +20,19 @@ static int set_x2apic_phys_mode(char *arg)
20} 20}
21early_param("x2apic_phys", set_x2apic_phys_mode); 21early_param("x2apic_phys", set_x2apic_phys_mode);
22 22
23static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 23static bool x2apic_fadt_phys(void)
24{ 24{
25 if (x2apic_phys) 25 if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
26 return x2apic_enabled(); 26 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
27 else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
28 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) &&
29 x2apic_enabled()) {
30 printk(KERN_DEBUG "System requires x2apic physical mode\n"); 27 printk(KERN_DEBUG "System requires x2apic physical mode\n");
31 return 1; 28 return true;
32 } 29 }
33 else 30 return false;
34 return 0; 31}
32
33static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
34{
35 return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
35} 36}
36 37
37static void 38static void
@@ -82,7 +83,7 @@ static void init_x2apic_ldr(void)
82 83
83static int x2apic_phys_probe(void) 84static int x2apic_phys_probe(void)
84{ 85{
85 if (x2apic_mode && x2apic_phys) 86 if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
86 return 1; 87 return 1;
87 88
88 return apic == &apic_x2apic_phys; 89 return apic == &apic_x2apic_phys;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index d65464e43503..8d7012b7f402 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -899,6 +899,7 @@ static void apm_cpu_idle(void)
899 static int use_apm_idle; /* = 0 */ 899 static int use_apm_idle; /* = 0 */
900 static unsigned int last_jiffies; /* = 0 */ 900 static unsigned int last_jiffies; /* = 0 */
901 static unsigned int last_stime; /* = 0 */ 901 static unsigned int last_stime; /* = 0 */
902 cputime_t stime;
902 903
903 int apm_idle_done = 0; 904 int apm_idle_done = 0;
904 unsigned int jiffies_since_last_check = jiffies - last_jiffies; 905 unsigned int jiffies_since_last_check = jiffies - last_jiffies;
@@ -906,23 +907,23 @@ static void apm_cpu_idle(void)
906 907
907 WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012"); 908 WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
908recalc: 909recalc:
910 task_cputime(current, NULL, &stime);
909 if (jiffies_since_last_check > IDLE_CALC_LIMIT) { 911 if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
910 use_apm_idle = 0; 912 use_apm_idle = 0;
911 last_jiffies = jiffies;
912 last_stime = current->stime;
913 } else if (jiffies_since_last_check > idle_period) { 913 } else if (jiffies_since_last_check > idle_period) {
914 unsigned int idle_percentage; 914 unsigned int idle_percentage;
915 915
916 idle_percentage = current->stime - last_stime; 916 idle_percentage = stime - last_stime;
917 idle_percentage *= 100; 917 idle_percentage *= 100;
918 idle_percentage /= jiffies_since_last_check; 918 idle_percentage /= jiffies_since_last_check;
919 use_apm_idle = (idle_percentage > idle_threshold); 919 use_apm_idle = (idle_percentage > idle_threshold);
920 if (apm_info.forbid_idle) 920 if (apm_info.forbid_idle)
921 use_apm_idle = 0; 921 use_apm_idle = 0;
922 last_jiffies = jiffies;
923 last_stime = current->stime;
924 } 922 }
925 923
924 last_jiffies = jiffies;
925 last_stime = stime;
926
926 bucket = IDLE_LEAKY_MAX; 927 bucket = IDLE_LEAKY_MAX;
927 928
928 while (!need_resched()) { 929 while (!need_resched()) {
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 15239fffd6fe..782c456eaa01 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -364,9 +364,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
364#endif 364#endif
365} 365}
366 366
367int amd_get_nb_id(int cpu) 367u16 amd_get_nb_id(int cpu)
368{ 368{
369 int id = 0; 369 u16 id = 0;
370#ifdef CONFIG_SMP 370#ifdef CONFIG_SMP
371 id = per_cpu(cpu_llc_id, cpu); 371 id = per_cpu(cpu_llc_id, cpu);
372#endif 372#endif
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index a8f8fa9769d6..1e7e84a02eba 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -79,3 +79,10 @@ void __init init_hypervisor_platform(void)
79 if (x86_hyper->init_platform) 79 if (x86_hyper->init_platform)
80 x86_hyper->init_platform(); 80 x86_hyper->init_platform();
81} 81}
82
83bool __init hypervisor_x2apic_available(void)
84{
85 return x86_hyper &&
86 x86_hyper->x2apic_available &&
87 x86_hyper->x2apic_available();
88}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 84c1309c4c0c..7c6f7d548c0f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1226,7 +1226,7 @@ static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1226 .notifier_call = cacheinfo_cpu_callback, 1226 .notifier_call = cacheinfo_cpu_callback,
1227}; 1227};
1228 1228
1229static int __cpuinit cache_sysfs_init(void) 1229static int __init cache_sysfs_init(void)
1230{ 1230{
1231 int i; 1231 int i;
1232 1232
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 0a630dd4b620..a7d26d83fb70 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -14,10 +14,15 @@
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/clocksource.h> 15#include <linux/clocksource.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/hardirq.h>
18#include <linux/interrupt.h>
17#include <asm/processor.h> 19#include <asm/processor.h>
18#include <asm/hypervisor.h> 20#include <asm/hypervisor.h>
19#include <asm/hyperv.h> 21#include <asm/hyperv.h>
20#include <asm/mshyperv.h> 22#include <asm/mshyperv.h>
23#include <asm/desc.h>
24#include <asm/idle.h>
25#include <asm/irq_regs.h>
21 26
22struct ms_hyperv_info ms_hyperv; 27struct ms_hyperv_info ms_hyperv;
23EXPORT_SYMBOL_GPL(ms_hyperv); 28EXPORT_SYMBOL_GPL(ms_hyperv);
@@ -30,6 +35,13 @@ static bool __init ms_hyperv_platform(void)
30 if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) 35 if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
31 return false; 36 return false;
32 37
38 /*
39 * Xen emulates Hyper-V to support enlightened Windows.
40 * Check to see first if we are on a Xen Hypervisor.
41 */
42 if (xen_cpuid_base())
43 return false;
44
33 cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, 45 cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
34 &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); 46 &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
35 47
@@ -68,7 +80,14 @@ static void __init ms_hyperv_init_platform(void)
68 printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", 80 printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
69 ms_hyperv.features, ms_hyperv.hints); 81 ms_hyperv.features, ms_hyperv.hints);
70 82
71 clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); 83 if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
84 clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
85#if IS_ENABLED(CONFIG_HYPERV)
86 /*
87 * Setup the IDT for hypervisor callback.
88 */
89 alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
90#endif
72} 91}
73 92
74const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { 93const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
@@ -77,3 +96,36 @@ const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
77 .init_platform = ms_hyperv_init_platform, 96 .init_platform = ms_hyperv_init_platform,
78}; 97};
79EXPORT_SYMBOL(x86_hyper_ms_hyperv); 98EXPORT_SYMBOL(x86_hyper_ms_hyperv);
99
100#if IS_ENABLED(CONFIG_HYPERV)
101static int vmbus_irq = -1;
102static irq_handler_t vmbus_isr;
103
104void hv_register_vmbus_handler(int irq, irq_handler_t handler)
105{
106 vmbus_irq = irq;
107 vmbus_isr = handler;
108}
109
110void hyperv_vector_handler(struct pt_regs *regs)
111{
112 struct pt_regs *old_regs = set_irq_regs(regs);
113 struct irq_desc *desc;
114
115 irq_enter();
116 exit_idle();
117
118 desc = irq_to_desc(vmbus_irq);
119
120 if (desc)
121 generic_handle_irq_desc(vmbus_irq, desc);
122
123 irq_exit();
124 set_irq_regs(old_regs);
125}
126#else
127void hv_register_vmbus_handler(int irq, irq_handler_t handler)
128{
129}
130#endif
131EXPORT_SYMBOL_GPL(hv_register_vmbus_handler);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6774c17a5576..bf0f01aea994 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -829,7 +829,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
829 } else { 829 } else {
830 hwc->config_base = x86_pmu_config_addr(hwc->idx); 830 hwc->config_base = x86_pmu_config_addr(hwc->idx);
831 hwc->event_base = x86_pmu_event_addr(hwc->idx); 831 hwc->event_base = x86_pmu_event_addr(hwc->idx);
832 hwc->event_base_rdpmc = hwc->idx; 832 hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
833 } 833 }
834} 834}
835 835
@@ -1310,11 +1310,6 @@ static struct attribute_group x86_pmu_format_group = {
1310 .attrs = NULL, 1310 .attrs = NULL,
1311}; 1311};
1312 1312
1313struct perf_pmu_events_attr {
1314 struct device_attribute attr;
1315 u64 id;
1316};
1317
1318/* 1313/*
1319 * Remove all undefined events (x86_pmu.event_map(id) == 0) 1314 * Remove all undefined events (x86_pmu.event_map(id) == 0)
1320 * out of events_attr attributes. 1315 * out of events_attr attributes.
@@ -1348,11 +1343,9 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at
1348#define EVENT_VAR(_id) event_attr_##_id 1343#define EVENT_VAR(_id) event_attr_##_id
1349#define EVENT_PTR(_id) &event_attr_##_id.attr.attr 1344#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
1350 1345
1351#define EVENT_ATTR(_name, _id) \ 1346#define EVENT_ATTR(_name, _id) \
1352static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ 1347 PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \
1353 .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ 1348 events_sysfs_show)
1354 .id = PERF_COUNT_HW_##_id, \
1355};
1356 1349
1357EVENT_ATTR(cpu-cycles, CPU_CYCLES ); 1350EVENT_ATTR(cpu-cycles, CPU_CYCLES );
1358EVENT_ATTR(instructions, INSTRUCTIONS ); 1351EVENT_ATTR(instructions, INSTRUCTIONS );
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 115c1ea97746..7f5c75c2afdd 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -325,6 +325,8 @@ struct x86_pmu {
325 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); 325 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
326 unsigned eventsel; 326 unsigned eventsel;
327 unsigned perfctr; 327 unsigned perfctr;
328 int (*addr_offset)(int index, bool eventsel);
329 int (*rdpmc_index)(int index);
328 u64 (*event_map)(int); 330 u64 (*event_map)(int);
329 int max_events; 331 int max_events;
330 int num_counters; 332 int num_counters;
@@ -446,28 +448,21 @@ extern u64 __read_mostly hw_cache_extra_regs
446 448
447u64 x86_perf_event_update(struct perf_event *event); 449u64 x86_perf_event_update(struct perf_event *event);
448 450
449static inline int x86_pmu_addr_offset(int index) 451static inline unsigned int x86_pmu_config_addr(int index)
450{ 452{
451 int offset; 453 return x86_pmu.eventsel + (x86_pmu.addr_offset ?
452 454 x86_pmu.addr_offset(index, true) : index);
453 /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
454 alternative_io(ASM_NOP2,
455 "shll $1, %%eax",
456 X86_FEATURE_PERFCTR_CORE,
457 "=a" (offset),
458 "a" (index));
459
460 return offset;
461} 455}
462 456
463static inline unsigned int x86_pmu_config_addr(int index) 457static inline unsigned int x86_pmu_event_addr(int index)
464{ 458{
465 return x86_pmu.eventsel + x86_pmu_addr_offset(index); 459 return x86_pmu.perfctr + (x86_pmu.addr_offset ?
460 x86_pmu.addr_offset(index, false) : index);
466} 461}
467 462
468static inline unsigned int x86_pmu_event_addr(int index) 463static inline int x86_pmu_rdpmc_index(int index)
469{ 464{
470 return x86_pmu.perfctr + x86_pmu_addr_offset(index); 465 return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
471} 466}
472 467
473int x86_setup_perfctr(struct perf_event *event); 468int x86_setup_perfctr(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index c93bc4e813a0..dfdab42aed27 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,21 +132,102 @@ static u64 amd_pmu_event_map(int hw_event)
132 return amd_perfmon_event_map[hw_event]; 132 return amd_perfmon_event_map[hw_event];
133} 133}
134 134
135static int amd_pmu_hw_config(struct perf_event *event) 135static struct event_constraint *amd_nb_event_constraint;
136
137/*
138 * Previously calculated offsets
139 */
140static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
141static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
142static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
143
144/*
145 * Legacy CPUs:
146 * 4 counters starting at 0xc0010000 each offset by 1
147 *
148 * CPUs with core performance counter extensions:
149 * 6 counters starting at 0xc0010200 each offset by 2
150 *
151 * CPUs with north bridge performance counter extensions:
152 * 4 additional counters starting at 0xc0010240 each offset by 2
153 * (indexed right above either one of the above core counters)
154 */
155static inline int amd_pmu_addr_offset(int index, bool eventsel)
136{ 156{
137 int ret; 157 int offset, first, base;
138 158
139 /* pass precise event sampling to ibs: */ 159 if (!index)
140 if (event->attr.precise_ip && get_ibs_caps()) 160 return index;
141 return -ENOENT; 161
162 if (eventsel)
163 offset = event_offsets[index];
164 else
165 offset = count_offsets[index];
166
167 if (offset)
168 return offset;
169
170 if (amd_nb_event_constraint &&
171 test_bit(index, amd_nb_event_constraint->idxmsk)) {
172 /*
173 * calculate the offset of NB counters with respect to
174 * base eventsel or perfctr
175 */
176
177 first = find_first_bit(amd_nb_event_constraint->idxmsk,
178 X86_PMC_IDX_MAX);
179
180 if (eventsel)
181 base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel;
182 else
183 base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
184
185 offset = base + ((index - first) << 1);
186 } else if (!cpu_has_perfctr_core)
187 offset = index;
188 else
189 offset = index << 1;
190
191 if (eventsel)
192 event_offsets[index] = offset;
193 else
194 count_offsets[index] = offset;
195
196 return offset;
197}
198
199static inline int amd_pmu_rdpmc_index(int index)
200{
201 int ret, first;
202
203 if (!index)
204 return index;
205
206 ret = rdpmc_indexes[index];
142 207
143 ret = x86_pmu_hw_config(event);
144 if (ret) 208 if (ret)
145 return ret; 209 return ret;
146 210
147 if (has_branch_stack(event)) 211 if (amd_nb_event_constraint &&
148 return -EOPNOTSUPP; 212 test_bit(index, amd_nb_event_constraint->idxmsk)) {
213 /*
214 * according to the mnual, ECX value of the NB counters is
215 * the index of the NB counter (0, 1, 2 or 3) plus 6
216 */
217
218 first = find_first_bit(amd_nb_event_constraint->idxmsk,
219 X86_PMC_IDX_MAX);
220 ret = index - first + 6;
221 } else
222 ret = index;
223
224 rdpmc_indexes[index] = ret;
225
226 return ret;
227}
149 228
229static int amd_core_hw_config(struct perf_event *event)
230{
150 if (event->attr.exclude_host && event->attr.exclude_guest) 231 if (event->attr.exclude_host && event->attr.exclude_guest)
151 /* 232 /*
152 * When HO == GO == 1 the hardware treats that as GO == HO == 0 233 * When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -156,14 +237,37 @@ static int amd_pmu_hw_config(struct perf_event *event)
156 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | 237 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
157 ARCH_PERFMON_EVENTSEL_OS); 238 ARCH_PERFMON_EVENTSEL_OS);
158 else if (event->attr.exclude_host) 239 else if (event->attr.exclude_host)
159 event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY; 240 event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
160 else if (event->attr.exclude_guest) 241 else if (event->attr.exclude_guest)
161 event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY; 242 event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
243
244 return 0;
245}
246
247/*
248 * NB counters do not support the following event select bits:
249 * Host/Guest only
250 * Counter mask
251 * Invert counter mask
252 * Edge detect
253 * OS/User mode
254 */
255static int amd_nb_hw_config(struct perf_event *event)
256{
257 /* for NB, we only allow system wide counting mode */
258 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
259 return -EINVAL;
260
261 if (event->attr.exclude_user || event->attr.exclude_kernel ||
262 event->attr.exclude_host || event->attr.exclude_guest)
263 return -EINVAL;
162 264
163 if (event->attr.type != PERF_TYPE_RAW) 265 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
164 return 0; 266 ARCH_PERFMON_EVENTSEL_OS);
165 267
166 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; 268 if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
269 ARCH_PERFMON_EVENTSEL_INT))
270 return -EINVAL;
167 271
168 return 0; 272 return 0;
169} 273}
@@ -181,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
181 return (hwc->config & 0xe0) == 0xe0; 285 return (hwc->config & 0xe0) == 0xe0;
182} 286}
183 287
288static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
289{
290 return amd_nb_event_constraint && amd_is_nb_event(hwc);
291}
292
184static inline int amd_has_nb(struct cpu_hw_events *cpuc) 293static inline int amd_has_nb(struct cpu_hw_events *cpuc)
185{ 294{
186 struct amd_nb *nb = cpuc->amd_nb; 295 struct amd_nb *nb = cpuc->amd_nb;
@@ -188,20 +297,37 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
188 return nb && nb->nb_id != -1; 297 return nb && nb->nb_id != -1;
189} 298}
190 299
191static void amd_put_event_constraints(struct cpu_hw_events *cpuc, 300static int amd_pmu_hw_config(struct perf_event *event)
192 struct perf_event *event) 301{
302 int ret;
303
304 /* pass precise event sampling to ibs: */
305 if (event->attr.precise_ip && get_ibs_caps())
306 return -ENOENT;
307
308 if (has_branch_stack(event))
309 return -EOPNOTSUPP;
310
311 ret = x86_pmu_hw_config(event);
312 if (ret)
313 return ret;
314
315 if (event->attr.type == PERF_TYPE_RAW)
316 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
317
318 if (amd_is_perfctr_nb_event(&event->hw))
319 return amd_nb_hw_config(event);
320
321 return amd_core_hw_config(event);
322}
323
324static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
325 struct perf_event *event)
193{ 326{
194 struct hw_perf_event *hwc = &event->hw;
195 struct amd_nb *nb = cpuc->amd_nb; 327 struct amd_nb *nb = cpuc->amd_nb;
196 int i; 328 int i;
197 329
198 /* 330 /*
199 * only care about NB events
200 */
201 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
202 return;
203
204 /*
205 * need to scan whole list because event may not have 331 * need to scan whole list because event may not have
206 * been assigned during scheduling 332 * been assigned during scheduling
207 * 333 *
@@ -215,6 +341,19 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
215 } 341 }
216} 342}
217 343
344static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
345{
346 int core_id = cpu_data(smp_processor_id()).cpu_core_id;
347
348 /* deliver interrupts only to this core */
349 if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) {
350 hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
351 hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
352 hwc->config |= (u64)(core_id) <<
353 AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
354 }
355}
356
218 /* 357 /*
219 * AMD64 NorthBridge events need special treatment because 358 * AMD64 NorthBridge events need special treatment because
220 * counter access needs to be synchronized across all cores 359 * counter access needs to be synchronized across all cores
@@ -247,24 +386,24 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
247 * 386 *
248 * Given that resources are allocated (cmpxchg), they must be 387 * Given that resources are allocated (cmpxchg), they must be
249 * eventually freed for others to use. This is accomplished by 388 * eventually freed for others to use. This is accomplished by
250 * calling amd_put_event_constraints(). 389 * calling __amd_put_nb_event_constraints()
251 * 390 *
252 * Non NB events are not impacted by this restriction. 391 * Non NB events are not impacted by this restriction.
253 */ 392 */
254static struct event_constraint * 393static struct event_constraint *
255amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 394__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
395 struct event_constraint *c)
256{ 396{
257 struct hw_perf_event *hwc = &event->hw; 397 struct hw_perf_event *hwc = &event->hw;
258 struct amd_nb *nb = cpuc->amd_nb; 398 struct amd_nb *nb = cpuc->amd_nb;
259 struct perf_event *old = NULL; 399 struct perf_event *old;
260 int max = x86_pmu.num_counters; 400 int idx, new = -1;
261 int i, j, k = -1;
262 401
263 /* 402 if (!c)
264 * if not NB event or no NB, then no constraints 403 c = &unconstrained;
265 */ 404
266 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) 405 if (cpuc->is_fake)
267 return &unconstrained; 406 return c;
268 407
269 /* 408 /*
270 * detect if already present, if so reuse 409 * detect if already present, if so reuse
@@ -276,48 +415,36 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
276 * because of successive calls to x86_schedule_events() from 415 * because of successive calls to x86_schedule_events() from
277 * hw_perf_group_sched_in() without hw_perf_enable() 416 * hw_perf_group_sched_in() without hw_perf_enable()
278 */ 417 */
279 for (i = 0; i < max; i++) { 418 for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
280 /* 419 if (new == -1 || hwc->idx == idx)
281 * keep track of first free slot 420 /* assign free slot, prefer hwc->idx */
282 */ 421 old = cmpxchg(nb->owners + idx, NULL, event);
283 if (k == -1 && !nb->owners[i]) 422 else if (nb->owners[idx] == event)
284 k = i; 423 /* event already present */
424 old = event;
425 else
426 continue;
427
428 if (old && old != event)
429 continue;
430
431 /* reassign to this slot */
432 if (new != -1)
433 cmpxchg(nb->owners + new, event, NULL);
434 new = idx;
285 435
286 /* already present, reuse */ 436 /* already present, reuse */
287 if (nb->owners[i] == event) 437 if (old == event)
288 goto done;
289 }
290 /*
291 * not present, so grab a new slot
292 * starting either at:
293 */
294 if (hwc->idx != -1) {
295 /* previous assignment */
296 i = hwc->idx;
297 } else if (k != -1) {
298 /* start from free slot found */
299 i = k;
300 } else {
301 /*
302 * event not found, no slot found in
303 * first pass, try again from the
304 * beginning
305 */
306 i = 0;
307 }
308 j = i;
309 do {
310 old = cmpxchg(nb->owners+i, NULL, event);
311 if (!old)
312 break; 438 break;
313 if (++i == max) 439 }
314 i = 0; 440
315 } while (i != j); 441 if (new == -1)
316done: 442 return &emptyconstraint;
317 if (!old) 443
318 return &nb->event_constraints[i]; 444 if (amd_is_perfctr_nb_event(hwc))
319 445 amd_nb_interrupt_hw_config(hwc);
320 return &emptyconstraint; 446
447 return &nb->event_constraints[new];
321} 448}
322 449
323static struct amd_nb *amd_alloc_nb(int cpu) 450static struct amd_nb *amd_alloc_nb(int cpu)
@@ -364,7 +491,7 @@ static void amd_pmu_cpu_starting(int cpu)
364 struct amd_nb *nb; 491 struct amd_nb *nb;
365 int i, nb_id; 492 int i, nb_id;
366 493
367 cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; 494 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
368 495
369 if (boot_cpu_data.x86_max_cores < 2) 496 if (boot_cpu_data.x86_max_cores < 2)
370 return; 497 return;
@@ -407,6 +534,26 @@ static void amd_pmu_cpu_dead(int cpu)
407 } 534 }
408} 535}
409 536
537static struct event_constraint *
538amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
539{
540 /*
541 * if not NB event or no NB, then no constraints
542 */
543 if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
544 return &unconstrained;
545
546 return __amd_get_nb_event_constraints(cpuc, event,
547 amd_nb_event_constraint);
548}
549
550static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
551 struct perf_event *event)
552{
553 if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
554 __amd_put_nb_event_constraints(cpuc, event);
555}
556
410PMU_FORMAT_ATTR(event, "config:0-7,32-35"); 557PMU_FORMAT_ATTR(event, "config:0-7,32-35");
411PMU_FORMAT_ATTR(umask, "config:8-15" ); 558PMU_FORMAT_ATTR(umask, "config:8-15" );
412PMU_FORMAT_ATTR(edge, "config:18" ); 559PMU_FORMAT_ATTR(edge, "config:18" );
@@ -496,6 +643,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
496static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); 643static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
497static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); 644static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
498 645
646static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
647static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
648
499static struct event_constraint * 649static struct event_constraint *
500amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) 650amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
501{ 651{
@@ -561,8 +711,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
561 return &amd_f15_PMC20; 711 return &amd_f15_PMC20;
562 } 712 }
563 case AMD_EVENT_NB: 713 case AMD_EVENT_NB:
564 /* not yet implemented */ 714 return __amd_get_nb_event_constraints(cpuc, event,
565 return &emptyconstraint; 715 amd_nb_event_constraint);
566 default: 716 default:
567 return &emptyconstraint; 717 return &emptyconstraint;
568 } 718 }
@@ -587,6 +737,8 @@ static __initconst const struct x86_pmu amd_pmu = {
587 .schedule_events = x86_schedule_events, 737 .schedule_events = x86_schedule_events,
588 .eventsel = MSR_K7_EVNTSEL0, 738 .eventsel = MSR_K7_EVNTSEL0,
589 .perfctr = MSR_K7_PERFCTR0, 739 .perfctr = MSR_K7_PERFCTR0,
740 .addr_offset = amd_pmu_addr_offset,
741 .rdpmc_index = amd_pmu_rdpmc_index,
590 .event_map = amd_pmu_event_map, 742 .event_map = amd_pmu_event_map,
591 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 743 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
592 .num_counters = AMD64_NUM_COUNTERS, 744 .num_counters = AMD64_NUM_COUNTERS,
@@ -608,7 +760,7 @@ static __initconst const struct x86_pmu amd_pmu = {
608 760
609static int setup_event_constraints(void) 761static int setup_event_constraints(void)
610{ 762{
611 if (boot_cpu_data.x86 >= 0x15) 763 if (boot_cpu_data.x86 == 0x15)
612 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; 764 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
613 return 0; 765 return 0;
614} 766}
@@ -638,6 +790,23 @@ static int setup_perfctr_core(void)
638 return 0; 790 return 0;
639} 791}
640 792
793static int setup_perfctr_nb(void)
794{
795 if (!cpu_has_perfctr_nb)
796 return -ENODEV;
797
798 x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
799
800 if (cpu_has_perfctr_core)
801 amd_nb_event_constraint = &amd_NBPMC96;
802 else
803 amd_nb_event_constraint = &amd_NBPMC74;
804
805 printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
806
807 return 0;
808}
809
641__init int amd_pmu_init(void) 810__init int amd_pmu_init(void)
642{ 811{
643 /* Performance-monitoring supported from K7 and later: */ 812 /* Performance-monitoring supported from K7 and later: */
@@ -648,6 +817,7 @@ __init int amd_pmu_init(void)
648 817
649 setup_event_constraints(); 818 setup_event_constraints();
650 setup_perfctr_core(); 819 setup_perfctr_core();
820 setup_perfctr_nb();
651 821
652 /* Events are common for all AMDs */ 822 /* Events are common for all AMDs */
653 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 823 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
@@ -678,7 +848,7 @@ void amd_pmu_disable_virt(void)
678 * SVM is disabled the Guest-only bits still gets set and the counter 848 * SVM is disabled the Guest-only bits still gets set and the counter
679 * will not count anything. 849 * will not count anything.
680 */ 850 */
681 cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; 851 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
682 852
683 /* Reload all events */ 853 /* Reload all events */
684 x86_pmu_disable_all(); 854 x86_pmu_disable_all();
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index d22d0c4edcfd..03a36321ec54 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -33,6 +33,9 @@
33 33
34#define VMWARE_PORT_CMD_GETVERSION 10 34#define VMWARE_PORT_CMD_GETVERSION 10
35#define VMWARE_PORT_CMD_GETHZ 45 35#define VMWARE_PORT_CMD_GETHZ 45
36#define VMWARE_PORT_CMD_GETVCPU_INFO 68
37#define VMWARE_PORT_CMD_LEGACY_X2APIC 3
38#define VMWARE_PORT_CMD_VCPU_RESERVED 31
36 39
37#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ 40#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
38 __asm__("inl (%%dx)" : \ 41 __asm__("inl (%%dx)" : \
@@ -125,10 +128,20 @@ static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c)
125 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); 128 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
126} 129}
127 130
131/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
132static bool __init vmware_legacy_x2apic_available(void)
133{
134 uint32_t eax, ebx, ecx, edx;
135 VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx);
136 return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 &&
137 (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
138}
139
128const __refconst struct hypervisor_x86 x86_hyper_vmware = { 140const __refconst struct hypervisor_x86 x86_hyper_vmware = {
129 .name = "VMware", 141 .name = "VMware",
130 .detect = vmware_platform, 142 .detect = vmware_platform,
131 .set_cpu_features = vmware_set_cpu_features, 143 .set_cpu_features = vmware_set_cpu_features,
132 .init_platform = vmware_platform_setup, 144 .init_platform = vmware_platform_setup,
145 .x2apic_available = vmware_legacy_x2apic_available,
133}; 146};
134EXPORT_SYMBOL(x86_hyper_vmware); 147EXPORT_SYMBOL(x86_hyper_vmware);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 6ed91d9980e2..8831176aa5ef 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1091,11 +1091,18 @@ ENTRY(xen_failsafe_callback)
1091 _ASM_EXTABLE(4b,9b) 1091 _ASM_EXTABLE(4b,9b)
1092ENDPROC(xen_failsafe_callback) 1092ENDPROC(xen_failsafe_callback)
1093 1093
1094BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, 1094BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1095 xen_evtchn_do_upcall) 1095 xen_evtchn_do_upcall)
1096 1096
1097#endif /* CONFIG_XEN */ 1097#endif /* CONFIG_XEN */
1098 1098
1099#if IS_ENABLED(CONFIG_HYPERV)
1100
1101BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
1102 hyperv_vector_handler)
1103
1104#endif /* CONFIG_HYPERV */
1105
1099#ifdef CONFIG_FUNCTION_TRACER 1106#ifdef CONFIG_FUNCTION_TRACER
1100#ifdef CONFIG_DYNAMIC_FTRACE 1107#ifdef CONFIG_DYNAMIC_FTRACE
1101 1108
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index cb3c591339aa..048f2240f8e6 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1454,11 +1454,16 @@ ENTRY(xen_failsafe_callback)
1454 CFI_ENDPROC 1454 CFI_ENDPROC
1455END(xen_failsafe_callback) 1455END(xen_failsafe_callback)
1456 1456
1457apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ 1457apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
1458 xen_hvm_callback_vector xen_evtchn_do_upcall 1458 xen_hvm_callback_vector xen_evtchn_do_upcall
1459 1459
1460#endif /* CONFIG_XEN */ 1460#endif /* CONFIG_XEN */
1461 1461
1462#if IS_ENABLED(CONFIG_HYPERV)
1463apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
1464 hyperv_callback_vector hyperv_vector_handler
1465#endif /* CONFIG_HYPERV */
1466
1462/* 1467/*
1463 * Some functions should be protected against kprobes 1468 * Some functions should be protected against kprobes
1464 */ 1469 */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index c18f59d10101..6773c918b8cc 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -18,6 +18,7 @@
18#include <asm/io_apic.h> 18#include <asm/io_apic.h>
19#include <asm/bios_ebda.h> 19#include <asm/bios_ebda.h>
20#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
21#include <asm/bootparam_utils.h>
21 22
22static void __init i386_default_early_setup(void) 23static void __init i386_default_early_setup(void)
23{ 24{
@@ -30,6 +31,8 @@ static void __init i386_default_early_setup(void)
30 31
31void __init i386_start_kernel(void) 32void __init i386_start_kernel(void)
32{ 33{
34 sanitize_boot_params(&boot_params);
35
33 memblock_reserve(__pa_symbol(&_text), 36 memblock_reserve(__pa_symbol(&_text),
34 __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); 37 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
35 38
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 037df57a99ac..849fc9e63c2f 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -25,6 +25,7 @@
25#include <asm/kdebug.h> 25#include <asm/kdebug.h>
26#include <asm/e820.h> 26#include <asm/e820.h>
27#include <asm/bios_ebda.h> 27#include <asm/bios_ebda.h>
28#include <asm/bootparam_utils.h>
28 29
29static void __init zap_identity_mappings(void) 30static void __init zap_identity_mappings(void)
30{ 31{
@@ -46,6 +47,7 @@ static void __init copy_bootdata(char *real_mode_data)
46 char * command_line; 47 char * command_line;
47 48
48 memcpy(&boot_params, real_mode_data, sizeof boot_params); 49 memcpy(&boot_params, real_mode_data, sizeof boot_params);
50 sanitize_boot_params(&boot_params);
49 if (boot_params.hdr.cmd_line_ptr) { 51 if (boot_params.hdr.cmd_line_ptr) {
50 command_line = __va(boot_params.hdr.cmd_line_ptr); 52 command_line = __va(boot_params.hdr.cmd_line_ptr);
51 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); 53 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index c8932c79e78b..3c3f58a0808f 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -307,36 +307,45 @@ default_entry:
307 movl %eax,%cr0 307 movl %eax,%cr0
308 308
309/* 309/*
310 * New page tables may be in 4Mbyte page mode and may 310 * We want to start out with EFLAGS unambiguously cleared. Some BIOSes leave
311 * be using the global pages. 311 * bits like NT set. This would confuse the debugger if this code is traced. So
312 * initialize them properly now before switching to protected mode. That means
313 * DF in particular (even though we have cleared it earlier after copying the
314 * command line) because GCC expects it.
315 */
316 pushl $0
317 popfl
318
319/*
320 * New page tables may be in 4Mbyte page mode and may be using the global pages.
312 * 321 *
313 * NOTE! If we are on a 486 we may have no cr4 at all! 322 * NOTE! If we are on a 486 we may have no cr4 at all! Specifically, cr4 exists
314 * Specifically, cr4 exists if and only if CPUID exists 323 * if and only if CPUID exists and has flags other than the FPU flag set.
315 * and has flags other than the FPU flag set.
316 */ 324 */
325 movl $-1,pa(X86_CPUID) # preset CPUID level
317 movl $X86_EFLAGS_ID,%ecx 326 movl $X86_EFLAGS_ID,%ecx
318 pushl %ecx 327 pushl %ecx
319 popfl 328 popfl # set EFLAGS=ID
320 pushfl 329 pushfl
321 popl %eax 330 popl %eax # get EFLAGS
322 pushl $0 331 testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set?
323 popfl 332 jz enable_paging # hw disallowed setting of ID bit
324 pushfl 333 # which means no CPUID and no CR4
325 popl %edx 334
326 xorl %edx,%eax 335 xorl %eax,%eax
327 testl %ecx,%eax 336 cpuid
328 jz 6f # No ID flag = no CPUID = no CR4 337 movl %eax,pa(X86_CPUID) # save largest std CPUID function
329 338
330 movl $1,%eax 339 movl $1,%eax
331 cpuid 340 cpuid
332 andl $~1,%edx # Ignore CPUID.FPU 341 andl $~1,%edx # Ignore CPUID.FPU
333 jz 6f # No flags or only CPUID.FPU = no CR4 342 jz enable_paging # No flags or only CPUID.FPU = no CR4
334 343
335 movl pa(mmu_cr4_features),%eax 344 movl pa(mmu_cr4_features),%eax
336 movl %eax,%cr4 345 movl %eax,%cr4
337 346
338 testb $X86_CR4_PAE, %al # check if PAE is enabled 347 testb $X86_CR4_PAE, %al # check if PAE is enabled
339 jz 6f 348 jz enable_paging
340 349
341 /* Check if extended functions are implemented */ 350 /* Check if extended functions are implemented */
342 movl $0x80000000, %eax 351 movl $0x80000000, %eax
@@ -344,7 +353,7 @@ default_entry:
344 /* Value must be in the range 0x80000001 to 0x8000ffff */ 353 /* Value must be in the range 0x80000001 to 0x8000ffff */
345 subl $0x80000001, %eax 354 subl $0x80000001, %eax
346 cmpl $(0x8000ffff-0x80000001), %eax 355 cmpl $(0x8000ffff-0x80000001), %eax
347 ja 6f 356 ja enable_paging
348 357
349 /* Clear bogus XD_DISABLE bits */ 358 /* Clear bogus XD_DISABLE bits */
350 call verify_cpu 359 call verify_cpu
@@ -353,7 +362,7 @@ default_entry:
353 cpuid 362 cpuid
354 /* Execute Disable bit supported? */ 363 /* Execute Disable bit supported? */
355 btl $(X86_FEATURE_NX & 31), %edx 364 btl $(X86_FEATURE_NX & 31), %edx
356 jnc 6f 365 jnc enable_paging
357 366
358 /* Setup EFER (Extended Feature Enable Register) */ 367 /* Setup EFER (Extended Feature Enable Register) */
359 movl $MSR_EFER, %ecx 368 movl $MSR_EFER, %ecx
@@ -363,7 +372,7 @@ default_entry:
363 /* Make changes effective */ 372 /* Make changes effective */
364 wrmsr 373 wrmsr
365 374
3666: 375enable_paging:
367 376
368/* 377/*
369 * Enable paging 378 * Enable paging
@@ -378,14 +387,6 @@ default_entry:
378 addl $__PAGE_OFFSET, %esp 387 addl $__PAGE_OFFSET, %esp
379 388
380/* 389/*
381 * Initialize eflags. Some BIOS's leave bits like NT set. This would
382 * confuse the debugger if this code is traced.
383 * XXX - best to initialize before switching to protected mode.
384 */
385 pushl $0
386 popfl
387
388/*
389 * start system 32-bit setup. We need to re-do some of the things done 390 * start system 32-bit setup. We need to re-do some of the things done
390 * in 16-bit mode for the "real" operations. 391 * in 16-bit mode for the "real" operations.
391 */ 392 */
@@ -394,31 +395,11 @@ default_entry:
394 jz 1f # Did we do this already? 395 jz 1f # Did we do this already?
395 call *%eax 396 call *%eax
3961: 3971:
397 398
398/* check if it is 486 or 386. */
399/* 399/*
400 * XXX - this does a lot of unnecessary setup. Alignment checks don't 400 * Check if it is 486
401 * apply at our cpl of 0 and the stack ought to be aligned already, and
402 * we don't need to preserve eflags.
403 */ 401 */
404 movl $-1,X86_CPUID # -1 for no CPUID initially 402 cmpl $-1,X86_CPUID
405 movb $3,X86 # at least 386
406 pushfl # push EFLAGS
407 popl %eax # get EFLAGS
408 movl %eax,%ecx # save original EFLAGS
409 xorl $0x240000,%eax # flip AC and ID bits in EFLAGS
410 pushl %eax # copy to EFLAGS
411 popfl # set EFLAGS
412 pushfl # get new EFLAGS
413 popl %eax # put it in eax
414 xorl %ecx,%eax # change in flags
415 pushl %ecx # restore original EFLAGS
416 popfl
417 testl $0x40000,%eax # check if AC bit changed
418 je is386
419
420 movb $4,X86 # at least 486
421 testl $0x200000,%eax # check if ID bit changed
422 je is486 403 je is486
423 404
424 /* get vendor info */ 405 /* get vendor info */
@@ -444,11 +425,10 @@ default_entry:
444 movb %cl,X86_MASK 425 movb %cl,X86_MASK
445 movl %edx,X86_CAPABILITY 426 movl %edx,X86_CAPABILITY
446 427
447is486: movl $0x50022,%ecx # set AM, WP, NE and MP 428is486:
448 jmp 2f 429 movb $4,X86
449 430 movl $0x50022,%ecx # set AM, WP, NE and MP
450is386: movl $2,%ecx # set MP 431 movl %cr0,%eax
4512: movl %cr0,%eax
452 andl $0x80000011,%eax # Save PG,PE,ET 432 andl $0x80000011,%eax # Save PG,PE,ET
453 orl %ecx,%eax 433 orl %ecx,%eax
454 movl %eax,%cr0 434 movl %eax,%cr0
@@ -473,7 +453,6 @@ is386: movl $2,%ecx # set MP
473 xorl %eax,%eax # Clear LDT 453 xorl %eax,%eax # Clear LDT
474 lldt %ax 454 lldt %ax
475 455
476 cld # gcc2 wants the direction flag cleared at all times
477 pushl $0 # fake return address for unwinder 456 pushl $0 # fake return address for unwinder
478 jmp *(initial_code) 457 jmp *(initial_code)
479 458
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index e28670f9a589..da85a8e830a1 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -478,7 +478,7 @@ static int hpet_msi_next_event(unsigned long delta,
478 478
479static int hpet_setup_msi_irq(unsigned int irq) 479static int hpet_setup_msi_irq(unsigned int irq)
480{ 480{
481 if (arch_setup_hpet_msi(irq, hpet_blockid)) { 481 if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) {
482 destroy_irq(irq); 482 destroy_irq(irq);
483 return -EINVAL; 483 return -EINVAL;
484 } 484 }
diff --git a/arch/x86/kernel/kprobes/Makefile b/arch/x86/kernel/kprobes/Makefile
new file mode 100644
index 000000000000..0d33169cc1a2
--- /dev/null
+++ b/arch/x86/kernel/kprobes/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for kernel probes
3#
4
5obj-$(CONFIG_KPROBES) += core.o
6obj-$(CONFIG_OPTPROBES) += opt.o
7obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes/common.h
index 3230b68ef29a..2e9d4b5af036 100644
--- a/arch/x86/kernel/kprobes-common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
99 return addr; 99 return addr;
100} 100}
101#endif 101#endif
102
103#ifdef CONFIG_KPROBES_ON_FTRACE
104extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
105 struct kprobe_ctlblk *kcb);
106#else
107static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
108 struct kprobe_ctlblk *kcb)
109{
110 return 0;
111}
112#endif
102#endif 113#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes/core.c
index 57916c0d3cf6..e124554598ee 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -58,7 +58,7 @@
58#include <asm/insn.h> 58#include <asm/insn.h>
59#include <asm/debugreg.h> 59#include <asm/debugreg.h>
60 60
61#include "kprobes-common.h" 61#include "common.h"
62 62
63void jprobe_return_end(void); 63void jprobe_return_end(void);
64 64
@@ -78,7 +78,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
78 * Groups, and some special opcodes can not boost. 78 * Groups, and some special opcodes can not boost.
79 * This is non-const and volatile to keep gcc from statically 79 * This is non-const and volatile to keep gcc from statically
80 * optimizing it out, as variable_test_bit makes gcc think only 80 * optimizing it out, as variable_test_bit makes gcc think only
81 * *(unsigned long*) is used. 81 * *(unsigned long*) is used.
82 */ 82 */
83static volatile u32 twobyte_is_boostable[256 / 32] = { 83static volatile u32 twobyte_is_boostable[256 / 32] = {
84 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 84 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
@@ -117,7 +117,7 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
117 struct __arch_relative_insn { 117 struct __arch_relative_insn {
118 u8 op; 118 u8 op;
119 s32 raddr; 119 s32 raddr;
120 } __attribute__((packed)) *insn; 120 } __packed *insn;
121 121
122 insn = (struct __arch_relative_insn *)from; 122 insn = (struct __arch_relative_insn *)from;
123 insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); 123 insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
@@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
541 return 1; 541 return 1;
542} 542}
543 543
544#ifdef KPROBES_CAN_USE_FTRACE
545static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
546 struct kprobe_ctlblk *kcb)
547{
548 /*
549 * Emulate singlestep (and also recover regs->ip)
550 * as if there is a 5byte nop
551 */
552 regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
553 if (unlikely(p->post_handler)) {
554 kcb->kprobe_status = KPROBE_HIT_SSDONE;
555 p->post_handler(p, regs, 0);
556 }
557 __this_cpu_write(current_kprobe, NULL);
558}
559#endif
560
561/* 544/*
562 * Interrupts are disabled on entry as trap3 is an interrupt gate and they 545 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
563 * remain disabled throughout this function. 546 * remain disabled throughout this function.
@@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
616 } else if (kprobe_running()) { 599 } else if (kprobe_running()) {
617 p = __this_cpu_read(current_kprobe); 600 p = __this_cpu_read(current_kprobe);
618 if (p->break_handler && p->break_handler(p, regs)) { 601 if (p->break_handler && p->break_handler(p, regs)) {
619#ifdef KPROBES_CAN_USE_FTRACE 602 if (!skip_singlestep(p, regs, kcb))
620 if (kprobe_ftrace(p)) { 603 setup_singlestep(p, regs, kcb, 0);
621 skip_singlestep(p, regs, kcb);
622 return 1;
623 }
624#endif
625 setup_singlestep(p, regs, kcb, 0);
626 return 1; 604 return 1;
627 } 605 }
628 } /* else: not a kprobe fault; let the kernel handle it */ 606 } /* else: not a kprobe fault; let the kernel handle it */
@@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1075 return 0; 1053 return 0;
1076} 1054}
1077 1055
1078#ifdef KPROBES_CAN_USE_FTRACE
1079/* Ftrace callback handler for kprobes */
1080void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
1081 struct ftrace_ops *ops, struct pt_regs *regs)
1082{
1083 struct kprobe *p;
1084 struct kprobe_ctlblk *kcb;
1085 unsigned long flags;
1086
1087 /* Disable irq for emulating a breakpoint and avoiding preempt */
1088 local_irq_save(flags);
1089
1090 p = get_kprobe((kprobe_opcode_t *)ip);
1091 if (unlikely(!p) || kprobe_disabled(p))
1092 goto end;
1093
1094 kcb = get_kprobe_ctlblk();
1095 if (kprobe_running()) {
1096 kprobes_inc_nmissed_count(p);
1097 } else {
1098 /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
1099 regs->ip = ip + sizeof(kprobe_opcode_t);
1100
1101 __this_cpu_write(current_kprobe, p);
1102 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1103 if (!p->pre_handler || !p->pre_handler(p, regs))
1104 skip_singlestep(p, regs, kcb);
1105 /*
1106 * If pre_handler returns !0, it sets regs->ip and
1107 * resets current kprobe.
1108 */
1109 }
1110end:
1111 local_irq_restore(flags);
1112}
1113
1114int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
1115{
1116 p->ainsn.insn = NULL;
1117 p->ainsn.boostable = -1;
1118 return 0;
1119}
1120#endif
1121
1122int __init arch_init_kprobes(void) 1056int __init arch_init_kprobes(void)
1123{ 1057{
1124 return arch_init_optprobes(); 1058 return arch_init_optprobes();
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
new file mode 100644
index 000000000000..23ef5c556f06
--- /dev/null
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -0,0 +1,93 @@
1/*
2 * Dynamic Ftrace based Kprobes Optimization
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) Hitachi Ltd., 2012
19 */
20#include <linux/kprobes.h>
21#include <linux/ptrace.h>
22#include <linux/hardirq.h>
23#include <linux/preempt.h>
24#include <linux/ftrace.h>
25
26#include "common.h"
27
28static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
29 struct kprobe_ctlblk *kcb)
30{
31 /*
32 * Emulate singlestep (and also recover regs->ip)
33 * as if there is a 5byte nop
34 */
35 regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
36 if (unlikely(p->post_handler)) {
37 kcb->kprobe_status = KPROBE_HIT_SSDONE;
38 p->post_handler(p, regs, 0);
39 }
40 __this_cpu_write(current_kprobe, NULL);
41 return 1;
42}
43
44int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
45 struct kprobe_ctlblk *kcb)
46{
47 if (kprobe_ftrace(p))
48 return __skip_singlestep(p, regs, kcb);
49 else
50 return 0;
51}
52
53/* Ftrace callback handler for kprobes */
54void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
55 struct ftrace_ops *ops, struct pt_regs *regs)
56{
57 struct kprobe *p;
58 struct kprobe_ctlblk *kcb;
59 unsigned long flags;
60
61 /* Disable irq for emulating a breakpoint and avoiding preempt */
62 local_irq_save(flags);
63
64 p = get_kprobe((kprobe_opcode_t *)ip);
65 if (unlikely(!p) || kprobe_disabled(p))
66 goto end;
67
68 kcb = get_kprobe_ctlblk();
69 if (kprobe_running()) {
70 kprobes_inc_nmissed_count(p);
71 } else {
72 /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
73 regs->ip = ip + sizeof(kprobe_opcode_t);
74
75 __this_cpu_write(current_kprobe, p);
76 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
77 if (!p->pre_handler || !p->pre_handler(p, regs))
78 __skip_singlestep(p, regs, kcb);
79 /*
80 * If pre_handler returns !0, it sets regs->ip and
81 * resets current kprobe.
82 */
83 }
84end:
85 local_irq_restore(flags);
86}
87
88int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
89{
90 p->ainsn.insn = NULL;
91 p->ainsn.boostable = -1;
92 return 0;
93}
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes/opt.c
index c5e410eed403..76dc6f095724 100644
--- a/arch/x86/kernel/kprobes-opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -37,7 +37,7 @@
37#include <asm/insn.h> 37#include <asm/insn.h>
38#include <asm/debugreg.h> 38#include <asm/debugreg.h>
39 39
40#include "kprobes-common.h" 40#include "common.h"
41 41
42unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) 42unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
43{ 43{
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9c2bd8bd4b4c..2b44ea5f269d 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -505,6 +505,7 @@ static bool __init kvm_detect(void)
505const struct hypervisor_x86 x86_hyper_kvm __refconst = { 505const struct hypervisor_x86 x86_hyper_kvm __refconst = {
506 .name = "KVM", 506 .name = "KVM",
507 .detect = kvm_detect, 507 .detect = kvm_detect,
508 .x2apic_available = kvm_para_available,
508}; 509};
509EXPORT_SYMBOL_GPL(x86_hyper_kvm); 510EXPORT_SYMBOL_GPL(x86_hyper_kvm);
510 511
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index b629bbe0d9bd..29a8120e6fe8 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,7 +22,7 @@
22#include <linux/perf_event.h> 22#include <linux/perf_event.h>
23#include <linux/hw_breakpoint.h> 23#include <linux/hw_breakpoint.h>
24#include <linux/rcupdate.h> 24#include <linux/rcupdate.h>
25#include <linux/module.h> 25#include <linux/export.h>
26#include <linux/context_tracking.h> 26#include <linux/context_tracking.h>
27 27
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 801602b5d745..2e8f3d3b5641 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -149,7 +149,6 @@ unsigned long mach_get_cmos_time(void)
149 if (century) { 149 if (century) {
150 century = bcd2bin(century); 150 century = bcd2bin(century);
151 year += century * 100; 151 year += century * 100;
152 printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
153 } else 152 } else
154 year += CMOS_YEARS_OFFS; 153 year += CMOS_YEARS_OFFS;
155 154
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 97ef74b88e0f..dbded5aedb81 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -157,7 +157,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
157 if (flags & MAP_FIXED) 157 if (flags & MAP_FIXED)
158 return addr; 158 return addr;
159 159
160 /* for MAP_32BIT mappings we force the legact mmap base */ 160 /* for MAP_32BIT mappings we force the legacy mmap base */
161 if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) 161 if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
162 goto bottomup; 162 goto bottomup;
163 163
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 06ccb5073a3f..4b9ea101fe3b 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -623,7 +623,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
623 ns_now = __cycles_2_ns(tsc_now); 623 ns_now = __cycles_2_ns(tsc_now);
624 624
625 if (cpu_khz) { 625 if (cpu_khz) {
626 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; 626 *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
627 cpu_khz / 2) / cpu_khz;
627 *offset = ns_now - mult_frac(tsc_now, *scale, 628 *offset = ns_now - mult_frac(tsc_now, *scale,
628 (1UL << CYC2NS_SCALE_FACTOR)); 629 (1UL << CYC2NS_SCALE_FACTOR));
629 } 630 }
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index c71025b67462..0ba4cfb4f412 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -680,8 +680,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
680 if (auprobe->insn[i] == 0x66) 680 if (auprobe->insn[i] == 0x66)
681 continue; 681 continue;
682 682
683 if (auprobe->insn[i] == 0x90) 683 if (auprobe->insn[i] == 0x90) {
684 regs->ip += i + 1;
684 return true; 685 return true;
686 }
685 687
686 break; 688 break;
687 } 689 }
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 7a3d075a814a..d065d67c2672 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -19,6 +19,7 @@
19#include <asm/time.h> 19#include <asm/time.h>
20#include <asm/irq.h> 20#include <asm/irq.h>
21#include <asm/io_apic.h> 21#include <asm/io_apic.h>
22#include <asm/hpet.h>
22#include <asm/pat.h> 23#include <asm/pat.h>
23#include <asm/tsc.h> 24#include <asm/tsc.h>
24#include <asm/iommu.h> 25#include <asm/iommu.h>
@@ -111,15 +112,22 @@ struct x86_platform_ops x86_platform = {
111 112
112EXPORT_SYMBOL_GPL(x86_platform); 113EXPORT_SYMBOL_GPL(x86_platform);
113struct x86_msi_ops x86_msi = { 114struct x86_msi_ops x86_msi = {
114 .setup_msi_irqs = native_setup_msi_irqs, 115 .setup_msi_irqs = native_setup_msi_irqs,
115 .teardown_msi_irq = native_teardown_msi_irq, 116 .compose_msi_msg = native_compose_msi_msg,
116 .teardown_msi_irqs = default_teardown_msi_irqs, 117 .teardown_msi_irq = native_teardown_msi_irq,
117 .restore_msi_irqs = default_restore_msi_irqs, 118 .teardown_msi_irqs = default_teardown_msi_irqs,
119 .restore_msi_irqs = default_restore_msi_irqs,
120 .setup_hpet_msi = default_setup_hpet_msi,
118}; 121};
119 122
120struct x86_io_apic_ops x86_io_apic_ops = { 123struct x86_io_apic_ops x86_io_apic_ops = {
121 .init = native_io_apic_init_mappings, 124 .init = native_io_apic_init_mappings,
122 .read = native_io_apic_read, 125 .read = native_io_apic_read,
123 .write = native_io_apic_write, 126 .write = native_io_apic_write,
124 .modify = native_io_apic_modify, 127 .modify = native_io_apic_modify,
128 .disable = native_disable_io_apic,
129 .print_entries = native_io_apic_print_entries,
130 .set_affinity = native_ioapic_set_affinity,
131 .setup_entry = native_setup_ioapic_entry,
132 .eoi_ioapic_pin = native_eoi_ioapic_pin,
125}; 133};
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 027088f2f7dd..fb674fd3fc22 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -748,13 +748,15 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
748 return; 748 return;
749 } 749 }
750#endif 750#endif
751 /* Kernel addresses are always protection faults: */
752 if (address >= TASK_SIZE)
753 error_code |= PF_PROT;
751 754
752 if (unlikely(show_unhandled_signals)) 755 if (likely(show_unhandled_signals))
753 show_signal_msg(regs, error_code, address, tsk); 756 show_signal_msg(regs, error_code, address, tsk);
754 757
755 /* Kernel addresses are always protection faults: */
756 tsk->thread.cr2 = address; 758 tsk->thread.cr2 = address;
757 tsk->thread.error_code = error_code | (address >= TASK_SIZE); 759 tsk->thread.error_code = error_code;
758 tsk->thread.trap_nr = X86_TRAP_PF; 760 tsk->thread.trap_nr = X86_TRAP_PF;
759 761
760 force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); 762 force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 2ead3c8a4c84..d6eeead43758 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -605,7 +605,7 @@ kernel_physical_mapping_init(unsigned long start,
605 } 605 }
606 606
607 if (pgd_changed) 607 if (pgd_changed)
608 sync_global_pgds(addr, end); 608 sync_global_pgds(addr, end - 1);
609 609
610 __flush_tlb_all(); 610 __flush_tlb_all();
611 611
@@ -831,6 +831,9 @@ int kern_addr_valid(unsigned long addr)
831 if (pud_none(*pud)) 831 if (pud_none(*pud))
832 return 0; 832 return 0;
833 833
834 if (pud_large(*pud))
835 return pfn_valid(pud_pfn(*pud));
836
834 pmd = pmd_offset(pud, addr); 837 pmd = pmd_offset(pud, addr);
835 if (pmd_none(*pmd)) 838 if (pmd_none(*pmd))
836 return 0; 839 return 0;
@@ -981,7 +984,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
981 } 984 }
982 985
983 } 986 }
984 sync_global_pgds((unsigned long)start_page, end); 987 sync_global_pgds((unsigned long)start_page, end - 1);
985 return 0; 988 return 0;
986} 989}
987 990
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index c80b9fb95734..8dabbed409ee 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -9,6 +9,7 @@
9#include <linux/memblock.h> 9#include <linux/memblock.h>
10 10
11static u64 patterns[] __initdata = { 11static u64 patterns[] __initdata = {
12 /* The first entry has to be 0 to leave memtest with zeroed memory */
12 0, 13 0,
13 0xffffffffffffffffULL, 14 0xffffffffffffffffULL,
14 0x5555555555555555ULL, 15 0x5555555555555555ULL,
@@ -110,15 +111,8 @@ void __init early_memtest(unsigned long start, unsigned long end)
110 return; 111 return;
111 112
112 printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern); 113 printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern);
113 for (i = 0; i < memtest_pattern; i++) { 114 for (i = memtest_pattern-1; i < UINT_MAX; --i) {
114 idx = i % ARRAY_SIZE(patterns); 115 idx = i % ARRAY_SIZE(patterns);
115 do_one_pass(patterns[idx], start, end); 116 do_one_pass(patterns[idx], start, end);
116 } 117 }
117
118 if (idx > 0) {
119 printk(KERN_INFO "early_memtest: wipe out "
120 "test pattern from memory\n");
121 /* additional test with pattern 0 will do this */
122 do_one_pass(0, start, end);
123 }
124} 118}
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 4ddf497ca65b..cdd0da9dd530 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -149,39 +149,40 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
149 int node, pxm; 149 int node, pxm;
150 150
151 if (srat_disabled()) 151 if (srat_disabled())
152 return -1; 152 goto out_err;
153 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) { 153 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
154 bad_srat(); 154 goto out_err_bad_srat;
155 return -1;
156 }
157 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) 155 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
158 return -1; 156 goto out_err;
159
160 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info()) 157 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
161 return -1; 158 goto out_err;
159
162 start = ma->base_address; 160 start = ma->base_address;
163 end = start + ma->length; 161 end = start + ma->length;
164 pxm = ma->proximity_domain; 162 pxm = ma->proximity_domain;
165 if (acpi_srat_revision <= 1) 163 if (acpi_srat_revision <= 1)
166 pxm &= 0xff; 164 pxm &= 0xff;
165
167 node = setup_node(pxm); 166 node = setup_node(pxm);
168 if (node < 0) { 167 if (node < 0) {
169 printk(KERN_ERR "SRAT: Too many proximity domains.\n"); 168 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
170 bad_srat(); 169 goto out_err_bad_srat;
171 return -1;
172 } 170 }
173 171
174 if (numa_add_memblk(node, start, end) < 0) { 172 if (numa_add_memblk(node, start, end) < 0)
175 bad_srat(); 173 goto out_err_bad_srat;
176 return -1;
177 }
178 174
179 node_set(node, numa_nodes_parsed); 175 node_set(node, numa_nodes_parsed);
180 176
181 printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", 177 printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
182 node, pxm, 178 node, pxm,
183 (unsigned long long) start, (unsigned long long) end - 1); 179 (unsigned long long) start, (unsigned long long) end - 1);
180
184 return 0; 181 return 0;
182out_err_bad_srat:
183 bad_srat();
184out_err:
185 return -1;
185} 186}
186 187
187void __init acpi_numa_arch_fixup(void) {} 188void __init acpi_numa_arch_fixup(void) {}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 13a6b29e2e5d..282375f13c7e 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -335,7 +335,7 @@ static const struct file_operations fops_tlbflush = {
335 .llseek = default_llseek, 335 .llseek = default_llseek,
336}; 336};
337 337
338static int __cpuinit create_tlb_flushall_shift(void) 338static int __init create_tlb_flushall_shift(void)
339{ 339{
340 debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, 340 debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR,
341 arch_debugfs_dir, NULL, &fops_tlbflush); 341 arch_debugfs_dir, NULL, &fops_tlbflush);
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 8d874396cb29..01e0231a113e 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -2,10 +2,12 @@
2obj-y += ce4100/ 2obj-y += ce4100/
3obj-y += efi/ 3obj-y += efi/
4obj-y += geode/ 4obj-y += geode/
5obj-y += goldfish/
5obj-y += iris/ 6obj-y += iris/
6obj-y += mrst/ 7obj-y += mrst/
7obj-y += olpc/ 8obj-y += olpc/
8obj-y += scx200/ 9obj-y += scx200/
9obj-y += sfi/ 10obj-y += sfi/
11obj-y += ts5500/
10obj-y += visws/ 12obj-y += visws/
11obj-y += uv/ 13obj-y += uv/
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index d9c1b95af17c..7145ec63c520 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -11,20 +11,21 @@
11 * published by the Free Software Foundation. 11 * published by the Free Software Foundation.
12 */ 12 */
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/init.h>
14#include <linux/acpi.h> 15#include <linux/acpi.h>
15#include <linux/efi.h> 16#include <linux/efi.h>
16#include <linux/efi-bgrt.h> 17#include <linux/efi-bgrt.h>
17 18
18struct acpi_table_bgrt *bgrt_tab; 19struct acpi_table_bgrt *bgrt_tab;
19void *bgrt_image; 20void *__initdata bgrt_image;
20size_t bgrt_image_size; 21size_t __initdata bgrt_image_size;
21 22
22struct bmp_header { 23struct bmp_header {
23 u16 id; 24 u16 id;
24 u32 size; 25 u32 size;
25} __packed; 26} __packed;
26 27
27void efi_bgrt_init(void) 28void __init efi_bgrt_init(void)
28{ 29{
29 acpi_status status; 30 acpi_status status;
30 void __iomem *image; 31 void __iomem *image;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 77cf0090c0a3..928bf837040a 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -87,7 +87,7 @@ EXPORT_SYMBOL(efi_enabled);
87 87
88static int __init setup_noefi(char *arg) 88static int __init setup_noefi(char *arg)
89{ 89{
90 clear_bit(EFI_BOOT, &x86_efi_facility); 90 clear_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
91 return 0; 91 return 0;
92} 92}
93early_param("noefi", setup_noefi); 93early_param("noefi", setup_noefi);
diff --git a/arch/x86/platform/goldfish/Makefile b/arch/x86/platform/goldfish/Makefile
new file mode 100644
index 000000000000..f030b532fdf3
--- /dev/null
+++ b/arch/x86/platform/goldfish/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_GOLDFISH) += goldfish.o
diff --git a/arch/x86/platform/goldfish/goldfish.c b/arch/x86/platform/goldfish/goldfish.c
new file mode 100644
index 000000000000..1693107a518e
--- /dev/null
+++ b/arch/x86/platform/goldfish/goldfish.c
@@ -0,0 +1,51 @@
1/*
2 * Copyright (C) 2007 Google, Inc.
3 * Copyright (C) 2011 Intel, Inc.
4 * Copyright (C) 2013 Intel, Inc.
5 *
6 * This software is licensed under the terms of the GNU General Public
7 * License version 2, as published by the Free Software Foundation, and
8 * may be copied, distributed, and modified under those terms.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 */
16
17#include <linux/kernel.h>
18#include <linux/irq.h>
19#include <linux/platform_device.h>
20
21/*
22 * Where in virtual device memory the IO devices (timers, system controllers
23 * and so on)
24 */
25
26#define GOLDFISH_PDEV_BUS_BASE (0xff001000)
27#define GOLDFISH_PDEV_BUS_END (0xff7fffff)
28#define GOLDFISH_PDEV_BUS_IRQ (4)
29
30#define GOLDFISH_TTY_BASE (0x2000)
31
32static struct resource goldfish_pdev_bus_resources[] = {
33 {
34 .start = GOLDFISH_PDEV_BUS_BASE,
35 .end = GOLDFISH_PDEV_BUS_END,
36 .flags = IORESOURCE_MEM,
37 },
38 {
39 .start = GOLDFISH_PDEV_BUS_IRQ,
40 .end = GOLDFISH_PDEV_BUS_IRQ,
41 .flags = IORESOURCE_IRQ,
42 }
43};
44
45static int __init goldfish_init(void)
46{
47 platform_device_register_simple("goldfish_pdev_bus", -1,
48 goldfish_pdev_bus_resources, 2);
49 return 0;
50}
51device_initcall(goldfish_init);
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index 7785b72ecc3a..bcd1a703e3e6 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -35,7 +35,7 @@
35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; 35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
36 36
37/* All CPUs enumerated by SFI must be present and enabled */ 37/* All CPUs enumerated by SFI must be present and enabled */
38static void __cpuinit mp_sfi_register_lapic(u8 id) 38static void __init mp_sfi_register_lapic(u8 id)
39{ 39{
40 if (MAX_LOCAL_APIC - id <= 0) { 40 if (MAX_LOCAL_APIC - id <= 0) {
41 pr_warning("Processor #%d invalid (max %d)\n", 41 pr_warning("Processor #%d invalid (max %d)\n",
diff --git a/arch/x86/platform/ts5500/Makefile b/arch/x86/platform/ts5500/Makefile
new file mode 100644
index 000000000000..c54e348c96a7
--- /dev/null
+++ b/arch/x86/platform/ts5500/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_TS5500) += ts5500.o
diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c
new file mode 100644
index 000000000000..39febb214e8c
--- /dev/null
+++ b/arch/x86/platform/ts5500/ts5500.c
@@ -0,0 +1,339 @@
1/*
2 * Technologic Systems TS-5500 Single Board Computer support
3 *
4 * Copyright (C) 2013 Savoir-faire Linux Inc.
5 * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option) any later
10 * version.
11 *
12 *
13 * This driver registers the Technologic Systems TS-5500 Single Board Computer
14 * (SBC) and its devices, and exposes information to userspace such as jumpers'
15 * state or available options. For further information about sysfs entries, see
16 * Documentation/ABI/testing/sysfs-platform-ts5500.
17 *
18 * This code actually supports the TS-5500 platform, but it may be extended to
19 * support similar Technologic Systems x86-based platforms, such as the TS-5600.
20 */
21
22#include <linux/delay.h>
23#include <linux/io.h>
24#include <linux/kernel.h>
25#include <linux/leds.h>
26#include <linux/module.h>
27#include <linux/platform_data/gpio-ts5500.h>
28#include <linux/platform_data/max197.h>
29#include <linux/platform_device.h>
30#include <linux/slab.h>
31
32/* Product code register */
33#define TS5500_PRODUCT_CODE_ADDR 0x74
34#define TS5500_PRODUCT_CODE 0x60 /* TS-5500 product code */
35
36/* SRAM/RS-485/ADC options, and RS-485 RTS/Automatic RS-485 flags register */
37#define TS5500_SRAM_RS485_ADC_ADDR 0x75
38#define TS5500_SRAM BIT(0) /* SRAM option */
39#define TS5500_RS485 BIT(1) /* RS-485 option */
40#define TS5500_ADC BIT(2) /* A/D converter option */
41#define TS5500_RS485_RTS BIT(6) /* RTS for RS-485 */
42#define TS5500_RS485_AUTO BIT(7) /* Automatic RS-485 */
43
44/* External Reset/Industrial Temperature Range options register */
45#define TS5500_ERESET_ITR_ADDR 0x76
46#define TS5500_ERESET BIT(0) /* External Reset option */
47#define TS5500_ITR BIT(1) /* Indust. Temp. Range option */
48
49/* LED/Jumpers register */
50#define TS5500_LED_JP_ADDR 0x77
51#define TS5500_LED BIT(0) /* LED flag */
52#define TS5500_JP1 BIT(1) /* Automatic CMOS */
53#define TS5500_JP2 BIT(2) /* Enable Serial Console */
54#define TS5500_JP3 BIT(3) /* Write Enable Drive A */
55#define TS5500_JP4 BIT(4) /* Fast Console (115K baud) */
56#define TS5500_JP5 BIT(5) /* User Jumper */
57#define TS5500_JP6 BIT(6) /* Console on COM1 (req. JP2) */
58#define TS5500_JP7 BIT(7) /* Undocumented (Unused) */
59
60/* A/D Converter registers */
61#define TS5500_ADC_CONV_BUSY_ADDR 0x195 /* Conversion state register */
62#define TS5500_ADC_CONV_BUSY BIT(0)
63#define TS5500_ADC_CONV_INIT_LSB_ADDR 0x196 /* Start conv. / LSB register */
64#define TS5500_ADC_CONV_MSB_ADDR 0x197 /* MSB register */
65#define TS5500_ADC_CONV_DELAY 12 /* usec */
66
67/**
68 * struct ts5500_sbc - TS-5500 board description
69 * @id: Board product ID.
70 * @sram: Flag for SRAM option.
71 * @rs485: Flag for RS-485 option.
72 * @adc: Flag for Analog/Digital converter option.
73 * @ereset: Flag for External Reset option.
74 * @itr: Flag for Industrial Temperature Range option.
75 * @jumpers: Bitfield for jumpers' state.
76 */
77struct ts5500_sbc {
78 int id;
79 bool sram;
80 bool rs485;
81 bool adc;
82 bool ereset;
83 bool itr;
84 u8 jumpers;
85};
86
87/* Board signatures in BIOS shadow RAM */
88static const struct {
89 const char * const string;
90 const ssize_t offset;
91} ts5500_signatures[] __initdata = {
92 { "TS-5x00 AMD Elan", 0xb14 },
93};
94
95static int __init ts5500_check_signature(void)
96{
97 void __iomem *bios;
98 int i, ret = -ENODEV;
99
100 bios = ioremap(0xf0000, 0x10000);
101 if (!bios)
102 return -ENOMEM;
103
104 for (i = 0; i < ARRAY_SIZE(ts5500_signatures); i++) {
105 if (check_signature(bios + ts5500_signatures[i].offset,
106 ts5500_signatures[i].string,
107 strlen(ts5500_signatures[i].string))) {
108 ret = 0;
109 break;
110 }
111 }
112
113 iounmap(bios);
114 return ret;
115}
116
117static int __init ts5500_detect_config(struct ts5500_sbc *sbc)
118{
119 u8 tmp;
120 int ret = 0;
121
122 if (!request_region(TS5500_PRODUCT_CODE_ADDR, 4, "ts5500"))
123 return -EBUSY;
124
125 tmp = inb(TS5500_PRODUCT_CODE_ADDR);
126 if (tmp != TS5500_PRODUCT_CODE) {
127 pr_err("This platform is not a TS-5500 (found ID 0x%x)\n", tmp);
128 ret = -ENODEV;
129 goto cleanup;
130 }
131 sbc->id = tmp;
132
133 tmp = inb(TS5500_SRAM_RS485_ADC_ADDR);
134 sbc->sram = tmp & TS5500_SRAM;
135 sbc->rs485 = tmp & TS5500_RS485;
136 sbc->adc = tmp & TS5500_ADC;
137
138 tmp = inb(TS5500_ERESET_ITR_ADDR);
139 sbc->ereset = tmp & TS5500_ERESET;
140 sbc->itr = tmp & TS5500_ITR;
141
142 tmp = inb(TS5500_LED_JP_ADDR);
143 sbc->jumpers = tmp & ~TS5500_LED;
144
145cleanup:
146 release_region(TS5500_PRODUCT_CODE_ADDR, 4);
147 return ret;
148}
149
150static ssize_t ts5500_show_id(struct device *dev,
151 struct device_attribute *attr, char *buf)
152{
153 struct ts5500_sbc *sbc = dev_get_drvdata(dev);
154
155 return sprintf(buf, "0x%.2x\n", sbc->id);
156}
157
158static ssize_t ts5500_show_jumpers(struct device *dev,
159 struct device_attribute *attr,
160 char *buf)
161{
162 struct ts5500_sbc *sbc = dev_get_drvdata(dev);
163
164 return sprintf(buf, "0x%.2x\n", sbc->jumpers >> 1);
165}
166
167#define TS5500_SHOW(field) \
168 static ssize_t ts5500_show_##field(struct device *dev, \
169 struct device_attribute *attr, \
170 char *buf) \
171 { \
172 struct ts5500_sbc *sbc = dev_get_drvdata(dev); \
173 return sprintf(buf, "%d\n", sbc->field); \
174 }
175
176TS5500_SHOW(sram)
177TS5500_SHOW(rs485)
178TS5500_SHOW(adc)
179TS5500_SHOW(ereset)
180TS5500_SHOW(itr)
181
182static DEVICE_ATTR(id, S_IRUGO, ts5500_show_id, NULL);
183static DEVICE_ATTR(jumpers, S_IRUGO, ts5500_show_jumpers, NULL);
184static DEVICE_ATTR(sram, S_IRUGO, ts5500_show_sram, NULL);
185static DEVICE_ATTR(rs485, S_IRUGO, ts5500_show_rs485, NULL);
186static DEVICE_ATTR(adc, S_IRUGO, ts5500_show_adc, NULL);
187static DEVICE_ATTR(ereset, S_IRUGO, ts5500_show_ereset, NULL);
188static DEVICE_ATTR(itr, S_IRUGO, ts5500_show_itr, NULL);
189
190static struct attribute *ts5500_attributes[] = {
191 &dev_attr_id.attr,
192 &dev_attr_jumpers.attr,
193 &dev_attr_sram.attr,
194 &dev_attr_rs485.attr,
195 &dev_attr_adc.attr,
196 &dev_attr_ereset.attr,
197 &dev_attr_itr.attr,
198 NULL
199};
200
201static const struct attribute_group ts5500_attr_group = {
202 .attrs = ts5500_attributes,
203};
204
205static struct resource ts5500_dio1_resource[] = {
206 DEFINE_RES_IRQ_NAMED(7, "DIO1 interrupt"),
207};
208
209static struct platform_device ts5500_dio1_pdev = {
210 .name = "ts5500-dio1",
211 .id = -1,
212 .resource = ts5500_dio1_resource,
213 .num_resources = 1,
214};
215
216static struct resource ts5500_dio2_resource[] = {
217 DEFINE_RES_IRQ_NAMED(6, "DIO2 interrupt"),
218};
219
220static struct platform_device ts5500_dio2_pdev = {
221 .name = "ts5500-dio2",
222 .id = -1,
223 .resource = ts5500_dio2_resource,
224 .num_resources = 1,
225};
226
227static void ts5500_led_set(struct led_classdev *led_cdev,
228 enum led_brightness brightness)
229{
230 outb(!!brightness, TS5500_LED_JP_ADDR);
231}
232
233static enum led_brightness ts5500_led_get(struct led_classdev *led_cdev)
234{
235 return (inb(TS5500_LED_JP_ADDR) & TS5500_LED) ? LED_FULL : LED_OFF;
236}
237
238static struct led_classdev ts5500_led_cdev = {
239 .name = "ts5500:green:",
240 .brightness_set = ts5500_led_set,
241 .brightness_get = ts5500_led_get,
242};
243
244static int ts5500_adc_convert(u8 ctrl)
245{
246 u8 lsb, msb;
247
248 /* Start conversion (ensure the 3 MSB are set to 0) */
249 outb(ctrl & 0x1f, TS5500_ADC_CONV_INIT_LSB_ADDR);
250
251 /*
252 * The platform has CPLD logic driving the A/D converter.
253 * The conversion must complete within 11 microseconds,
254 * otherwise we have to re-initiate a conversion.
255 */
256 udelay(TS5500_ADC_CONV_DELAY);
257 if (inb(TS5500_ADC_CONV_BUSY_ADDR) & TS5500_ADC_CONV_BUSY)
258 return -EBUSY;
259
260 /* Read the raw data */
261 lsb = inb(TS5500_ADC_CONV_INIT_LSB_ADDR);
262 msb = inb(TS5500_ADC_CONV_MSB_ADDR);
263
264 return (msb << 8) | lsb;
265}
266
267static struct max197_platform_data ts5500_adc_pdata = {
268 .convert = ts5500_adc_convert,
269};
270
271static struct platform_device ts5500_adc_pdev = {
272 .name = "max197",
273 .id = -1,
274 .dev = {
275 .platform_data = &ts5500_adc_pdata,
276 },
277};
278
279static int __init ts5500_init(void)
280{
281 struct platform_device *pdev;
282 struct ts5500_sbc *sbc;
283 int err;
284
285 /*
286 * There is no DMI available or PCI bridge subvendor info,
287 * only the BIOS provides a 16-bit identification call.
288 * It is safer to find a signature in the BIOS shadow RAM.
289 */
290 err = ts5500_check_signature();
291 if (err)
292 return err;
293
294 pdev = platform_device_register_simple("ts5500", -1, NULL, 0);
295 if (IS_ERR(pdev))
296 return PTR_ERR(pdev);
297
298 sbc = devm_kzalloc(&pdev->dev, sizeof(struct ts5500_sbc), GFP_KERNEL);
299 if (!sbc) {
300 err = -ENOMEM;
301 goto error;
302 }
303
304 err = ts5500_detect_config(sbc);
305 if (err)
306 goto error;
307
308 platform_set_drvdata(pdev, sbc);
309
310 err = sysfs_create_group(&pdev->dev.kobj, &ts5500_attr_group);
311 if (err)
312 goto error;
313
314 ts5500_dio1_pdev.dev.parent = &pdev->dev;
315 if (platform_device_register(&ts5500_dio1_pdev))
316 dev_warn(&pdev->dev, "DIO1 block registration failed\n");
317 ts5500_dio2_pdev.dev.parent = &pdev->dev;
318 if (platform_device_register(&ts5500_dio2_pdev))
319 dev_warn(&pdev->dev, "DIO2 block registration failed\n");
320
321 if (led_classdev_register(&pdev->dev, &ts5500_led_cdev))
322 dev_warn(&pdev->dev, "LED registration failed\n");
323
324 if (sbc->adc) {
325 ts5500_adc_pdev.dev.parent = &pdev->dev;
326 if (platform_device_register(&ts5500_adc_pdev))
327 dev_warn(&pdev->dev, "ADC registration failed\n");
328 }
329
330 return 0;
331error:
332 platform_device_unregister(pdev);
333 return err;
334}
335device_initcall(ts5500_init);
336
337MODULE_LICENSE("GPL");
338MODULE_AUTHOR("Savoir-faire Linux Inc. <kernel@savoirfairelinux.com>");
339MODULE_DESCRIPTION("Technologic Systems TS-5500 platform driver");
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index dbbdca5f508c..0f92173a12b6 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1467,7 +1467,7 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,
1467 } 1467 }
1468 1468
1469 if (input_arg == 0) { 1469 if (input_arg == 0) {
1470 elements = sizeof(stat_description)/sizeof(*stat_description); 1470 elements = ARRAY_SIZE(stat_description);
1471 printk(KERN_DEBUG "# cpu: cpu number\n"); 1471 printk(KERN_DEBUG "# cpu: cpu number\n");
1472 printk(KERN_DEBUG "Sender statistics:\n"); 1472 printk(KERN_DEBUG "Sender statistics:\n");
1473 for (i = 0; i < elements; i++) 1473 for (i = 0; i < elements; i++)
@@ -1508,7 +1508,7 @@ static int parse_tunables_write(struct bau_control *bcp, char *instr,
1508 char *q; 1508 char *q;
1509 int cnt = 0; 1509 int cnt = 0;
1510 int val; 1510 int val;
1511 int e = sizeof(tunables) / sizeof(*tunables); 1511 int e = ARRAY_SIZE(tunables);
1512 1512
1513 p = instr + strspn(instr, WHITESPACE); 1513 p = instr + strspn(instr, WHITESPACE);
1514 q = p; 1514 q = p;
diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c
index 8784ab30d91b..84ac7f7b0257 100644
--- a/arch/x86/um/fault.c
+++ b/arch/x86/um/fault.c
@@ -20,7 +20,7 @@ int arch_fixup(unsigned long address, struct uml_pt_regs *regs)
20 const struct exception_table_entry *fixup; 20 const struct exception_table_entry *fixup;
21 21
22 fixup = search_exception_tables(address); 22 fixup = search_exception_tables(address);
23 if (fixup != 0) { 23 if (fixup) {
24 UPT_IP(regs) = fixup->fixup; 24 UPT_IP(regs) = fixup->fixup;
25 return 1; 25 return 1;
26 } 26 }
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 205ad328aa52..c74436e687bf 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -60,7 +60,7 @@ notrace static cycle_t vread_tsc(void)
60 60
61static notrace cycle_t vread_hpet(void) 61static notrace cycle_t vread_hpet(void)
62{ 62{
63 return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); 63 return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER);
64} 64}
65 65
66#ifdef CONFIG_PARAVIRT_CLOCK 66#ifdef CONFIG_PARAVIRT_CLOCK
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 138e5667409a..39928d16be3b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1517,72 +1517,51 @@ asmlinkage void __init xen_start_kernel(void)
1517#endif 1517#endif
1518} 1518}
1519 1519
1520#ifdef CONFIG_XEN_PVHVM 1520void __ref xen_hvm_init_shared_info(void)
1521#define HVM_SHARED_INFO_ADDR 0xFE700000UL
1522static struct shared_info *xen_hvm_shared_info;
1523static unsigned long xen_hvm_sip_phys;
1524static int xen_major, xen_minor;
1525
1526static void xen_hvm_connect_shared_info(unsigned long pfn)
1527{ 1521{
1522 int cpu;
1528 struct xen_add_to_physmap xatp; 1523 struct xen_add_to_physmap xatp;
1524 static struct shared_info *shared_info_page = 0;
1529 1525
1526 if (!shared_info_page)
1527 shared_info_page = (struct shared_info *)
1528 extend_brk(PAGE_SIZE, PAGE_SIZE);
1530 xatp.domid = DOMID_SELF; 1529 xatp.domid = DOMID_SELF;
1531 xatp.idx = 0; 1530 xatp.idx = 0;
1532 xatp.space = XENMAPSPACE_shared_info; 1531 xatp.space = XENMAPSPACE_shared_info;
1533 xatp.gpfn = pfn; 1532 xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
1534 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 1533 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
1535 BUG(); 1534 BUG();
1536 1535
1537} 1536 HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
1538static void __init xen_hvm_set_shared_info(struct shared_info *sip)
1539{
1540 int cpu;
1541
1542 HYPERVISOR_shared_info = sip;
1543 1537
1544 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info 1538 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
1545 * page, we use it in the event channel upcall and in some pvclock 1539 * page, we use it in the event channel upcall and in some pvclock
1546 * related functions. We don't need the vcpu_info placement 1540 * related functions. We don't need the vcpu_info placement
1547 * optimizations because we don't use any pv_mmu or pv_irq op on 1541 * optimizations because we don't use any pv_mmu or pv_irq op on
1548 * HVM. */ 1542 * HVM.
1549 for_each_online_cpu(cpu) 1543 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
1544 * online but xen_hvm_init_shared_info is run at resume time too and
1545 * in that case multiple vcpus might be online. */
1546 for_each_online_cpu(cpu) {
1550 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 1547 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
1551}
1552
1553/* Reconnect the shared_info pfn to a (new) mfn */
1554void xen_hvm_resume_shared_info(void)
1555{
1556 xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT);
1557}
1558
1559/* Xen tools prior to Xen 4 do not provide a E820_Reserved area for guest usage.
1560 * On these old tools the shared info page will be placed in E820_Ram.
1561 * Xen 4 provides a E820_Reserved area at 0xFC000000, and this code expects
1562 * that nothing is mapped up to HVM_SHARED_INFO_ADDR.
1563 * Xen 4.3+ provides an explicit 1MB area at HVM_SHARED_INFO_ADDR which is used
1564 * here for the shared info page. */
1565static void __init xen_hvm_init_shared_info(void)
1566{
1567 if (xen_major < 4) {
1568 xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE);
1569 xen_hvm_sip_phys = __pa(xen_hvm_shared_info);
1570 } else {
1571 xen_hvm_sip_phys = HVM_SHARED_INFO_ADDR;
1572 set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_hvm_sip_phys);
1573 xen_hvm_shared_info =
1574 (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
1575 } 1548 }
1576 xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT);
1577 xen_hvm_set_shared_info(xen_hvm_shared_info);
1578} 1549}
1579 1550
1551#ifdef CONFIG_XEN_PVHVM
1580static void __init init_hvm_pv_info(void) 1552static void __init init_hvm_pv_info(void)
1581{ 1553{
1582 uint32_t ecx, edx, pages, msr, base; 1554 int major, minor;
1555 uint32_t eax, ebx, ecx, edx, pages, msr, base;
1583 u64 pfn; 1556 u64 pfn;
1584 1557
1585 base = xen_cpuid_base(); 1558 base = xen_cpuid_base();
1559 cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1560
1561 major = eax >> 16;
1562 minor = eax & 0xffff;
1563 printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
1564
1586 cpuid(base + 2, &pages, &msr, &ecx, &edx); 1565 cpuid(base + 2, &pages, &msr, &ecx, &edx);
1587 1566
1588 pfn = __pa(hypercall_page); 1567 pfn = __pa(hypercall_page);
@@ -1633,22 +1612,12 @@ static void __init xen_hvm_guest_init(void)
1633 1612
1634static bool __init xen_hvm_platform(void) 1613static bool __init xen_hvm_platform(void)
1635{ 1614{
1636 uint32_t eax, ebx, ecx, edx, base;
1637
1638 if (xen_pv_domain()) 1615 if (xen_pv_domain())
1639 return false; 1616 return false;
1640 1617
1641 base = xen_cpuid_base(); 1618 if (!xen_cpuid_base())
1642 if (!base)
1643 return false; 1619 return false;
1644 1620
1645 cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1646
1647 xen_major = eax >> 16;
1648 xen_minor = eax & 0xffff;
1649
1650 printk(KERN_INFO "Xen version %d.%d.\n", xen_major, xen_minor);
1651
1652 return true; 1621 return true;
1653} 1622}
1654 1623
@@ -1668,6 +1637,7 @@ const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
1668 .name = "Xen HVM", 1637 .name = "Xen HVM",
1669 .detect = xen_hvm_platform, 1638 .detect = xen_hvm_platform,
1670 .init_platform = xen_hvm_guest_init, 1639 .init_platform = xen_hvm_guest_init,
1640 .x2apic_available = xen_x2apic_para_available,
1671}; 1641};
1672EXPORT_SYMBOL(x86_hyper_xen_hvm); 1642EXPORT_SYMBOL(x86_hyper_xen_hvm);
1673#endif 1643#endif
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index ae8a00c39de4..45329c8c226e 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
30{ 30{
31#ifdef CONFIG_XEN_PVHVM 31#ifdef CONFIG_XEN_PVHVM
32 int cpu; 32 int cpu;
33 xen_hvm_resume_shared_info(); 33 xen_hvm_init_shared_info();
34 xen_callback_vector(); 34 xen_callback_vector();
35 xen_unplug_emulated_devices(); 35 xen_unplug_emulated_devices();
36 if (xen_feature(XENFEAT_hvm_safe_pvclock)) { 36 if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index f9643fc50de5..33ca6e42a4ca 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -89,11 +89,11 @@ ENTRY(xen_iret)
89 */ 89 */
90#ifdef CONFIG_SMP 90#ifdef CONFIG_SMP
91 GET_THREAD_INFO(%eax) 91 GET_THREAD_INFO(%eax)
92 movl TI_cpu(%eax), %eax 92 movl %ss:TI_cpu(%eax), %eax
93 movl __per_cpu_offset(,%eax,4), %eax 93 movl %ss:__per_cpu_offset(,%eax,4), %eax
94 mov xen_vcpu(%eax), %eax 94 mov %ss:xen_vcpu(%eax), %eax
95#else 95#else
96 movl xen_vcpu, %eax 96 movl %ss:xen_vcpu, %eax
97#endif 97#endif
98 98
99 /* check IF state we're restoring */ 99 /* check IF state we're restoring */
@@ -106,11 +106,11 @@ ENTRY(xen_iret)
106 * resuming the code, so we don't have to be worried about 106 * resuming the code, so we don't have to be worried about
107 * being preempted to another CPU. 107 * being preempted to another CPU.
108 */ 108 */
109 setz XEN_vcpu_info_mask(%eax) 109 setz %ss:XEN_vcpu_info_mask(%eax)
110xen_iret_start_crit: 110xen_iret_start_crit:
111 111
112 /* check for unmasked and pending */ 112 /* check for unmasked and pending */
113 cmpw $0x0001, XEN_vcpu_info_pending(%eax) 113 cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax)
114 114
115 /* 115 /*
116 * If there's something pending, mask events again so we can 116 * If there's something pending, mask events again so we can
@@ -118,7 +118,7 @@ xen_iret_start_crit:
118 * touch XEN_vcpu_info_mask. 118 * touch XEN_vcpu_info_mask.
119 */ 119 */
120 jne 1f 120 jne 1f
121 movb $1, XEN_vcpu_info_mask(%eax) 121 movb $1, %ss:XEN_vcpu_info_mask(%eax)
122 122
1231: popl %eax 1231: popl %eax
124 124
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d2e73d19d366..a95b41744ad0 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -40,7 +40,7 @@ void xen_enable_syscall(void);
40void xen_vcpu_restore(void); 40void xen_vcpu_restore(void);
41 41
42void xen_callback_vector(void); 42void xen_callback_vector(void);
43void xen_hvm_resume_shared_info(void); 43void xen_hvm_init_shared_info(void);
44void xen_unplug_emulated_devices(void); 44void xen_unplug_emulated_devices(void);
45 45
46void __init xen_build_dynamic_phys_to_machine(void); 46void __init xen_build_dynamic_phys_to_machine(void);
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 74638ec234c8..c88202f973d9 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -5,6 +5,7 @@
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/bio.h> 6#include <linux/bio.h>
7#include <linux/blkdev.h> 7#include <linux/blkdev.h>
8#include <linux/sched/sysctl.h>
8 9
9#include "blk.h" 10#include "blk.h"
10 11
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index e6defd86b424..1e5d8a40101e 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -29,6 +29,7 @@
29#include <linux/time.h> 29#include <linux/time.h>
30#include <linux/cper.h> 30#include <linux/cper.h>
31#include <linux/acpi.h> 31#include <linux/acpi.h>
32#include <linux/pci.h>
32#include <linux/aer.h> 33#include <linux/aer.h>
33 34
34/* 35/*
@@ -249,6 +250,10 @@ static const char *cper_pcie_port_type_strs[] = {
249static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, 250static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
250 const struct acpi_hest_generic_data *gdata) 251 const struct acpi_hest_generic_data *gdata)
251{ 252{
253#ifdef CONFIG_ACPI_APEI_PCIEAER
254 struct pci_dev *dev;
255#endif
256
252 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) 257 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
253 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, 258 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
254 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ? 259 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
@@ -281,10 +286,18 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
281 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", 286 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
282 pfx, pcie->bridge.secondary_status, pcie->bridge.control); 287 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
283#ifdef CONFIG_ACPI_APEI_PCIEAER 288#ifdef CONFIG_ACPI_APEI_PCIEAER
284 if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) { 289 dev = pci_get_domain_bus_and_slot(pcie->device_id.segment,
285 struct aer_capability_regs *aer_regs = (void *)pcie->aer_info; 290 pcie->device_id.bus, pcie->device_id.function);
286 cper_print_aer(pfx, gdata->error_severity, aer_regs); 291 if (!dev) {
292 pr_err("PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n",
293 pcie->device_id.segment, pcie->device_id.bus,
294 pcie->device_id.slot, pcie->device_id.function);
295 return;
287 } 296 }
297 if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO)
298 cper_print_aer(pfx, dev, gdata->error_severity,
299 (struct aer_capability_regs *) pcie->aer_info);
300 pci_dev_put(dev);
288#endif 301#endif
289} 302}
290 303
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 497912732566..495aeed26779 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1061,6 +1061,86 @@ static inline void ahci_gtf_filter_workaround(struct ata_host *host)
1061{} 1061{}
1062#endif 1062#endif
1063 1063
1064int ahci_init_interrupts(struct pci_dev *pdev, struct ahci_host_priv *hpriv)
1065{
1066 int rc;
1067 unsigned int maxvec;
1068
1069 if (!(hpriv->flags & AHCI_HFLAG_NO_MSI)) {
1070 rc = pci_enable_msi_block_auto(pdev, &maxvec);
1071 if (rc > 0) {
1072 if ((rc == maxvec) || (rc == 1))
1073 return rc;
1074 /*
1075 * Assume that advantage of multipe MSIs is negated,
1076 * so fallback to single MSI mode to save resources
1077 */
1078 pci_disable_msi(pdev);
1079 if (!pci_enable_msi(pdev))
1080 return 1;
1081 }
1082 }
1083
1084 pci_intx(pdev, 1);
1085 return 0;
1086}
1087
1088/**
1089 * ahci_host_activate - start AHCI host, request IRQs and register it
1090 * @host: target ATA host
1091 * @irq: base IRQ number to request
1092 * @n_msis: number of MSIs allocated for this host
1093 * @irq_handler: irq_handler used when requesting IRQs
1094 * @irq_flags: irq_flags used when requesting IRQs
1095 *
1096 * Similar to ata_host_activate, but requests IRQs according to AHCI-1.1
1097 * when multiple MSIs were allocated. That is one MSI per port, starting
1098 * from @irq.
1099 *
1100 * LOCKING:
1101 * Inherited from calling layer (may sleep).
1102 *
1103 * RETURNS:
1104 * 0 on success, -errno otherwise.
1105 */
1106int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis)
1107{
1108 int i, rc;
1109
1110 /* Sharing Last Message among several ports is not supported */
1111 if (n_msis < host->n_ports)
1112 return -EINVAL;
1113
1114 rc = ata_host_start(host);
1115 if (rc)
1116 return rc;
1117
1118 for (i = 0; i < host->n_ports; i++) {
1119 rc = devm_request_threaded_irq(host->dev,
1120 irq + i, ahci_hw_interrupt, ahci_thread_fn, IRQF_SHARED,
1121 dev_driver_string(host->dev), host->ports[i]);
1122 if (rc)
1123 goto out_free_irqs;
1124 }
1125
1126 for (i = 0; i < host->n_ports; i++)
1127 ata_port_desc(host->ports[i], "irq %d", irq + i);
1128
1129 rc = ata_host_register(host, &ahci_sht);
1130 if (rc)
1131 goto out_free_all_irqs;
1132
1133 return 0;
1134
1135out_free_all_irqs:
1136 i = host->n_ports;
1137out_free_irqs:
1138 for (i--; i >= 0; i--)
1139 devm_free_irq(host->dev, irq + i, host->ports[i]);
1140
1141 return rc;
1142}
1143
1064static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 1144static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1065{ 1145{
1066 unsigned int board_id = ent->driver_data; 1146 unsigned int board_id = ent->driver_data;
@@ -1069,7 +1149,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1069 struct device *dev = &pdev->dev; 1149 struct device *dev = &pdev->dev;
1070 struct ahci_host_priv *hpriv; 1150 struct ahci_host_priv *hpriv;
1071 struct ata_host *host; 1151 struct ata_host *host;
1072 int n_ports, i, rc; 1152 int n_ports, n_msis, i, rc;
1073 int ahci_pci_bar = AHCI_PCI_BAR_STANDARD; 1153 int ahci_pci_bar = AHCI_PCI_BAR_STANDARD;
1074 1154
1075 VPRINTK("ENTER\n"); 1155 VPRINTK("ENTER\n");
@@ -1156,11 +1236,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1156 if (ahci_sb600_enable_64bit(pdev)) 1236 if (ahci_sb600_enable_64bit(pdev))
1157 hpriv->flags &= ~AHCI_HFLAG_32BIT_ONLY; 1237 hpriv->flags &= ~AHCI_HFLAG_32BIT_ONLY;
1158 1238
1159 if ((hpriv->flags & AHCI_HFLAG_NO_MSI) || pci_enable_msi(pdev))
1160 pci_intx(pdev, 1);
1161
1162 hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar]; 1239 hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar];
1163 1240
1241 n_msis = ahci_init_interrupts(pdev, hpriv);
1242 if (n_msis > 1)
1243 hpriv->flags |= AHCI_HFLAG_MULTI_MSI;
1244
1164 /* save initial config */ 1245 /* save initial config */
1165 ahci_pci_save_initial_config(pdev, hpriv); 1246 ahci_pci_save_initial_config(pdev, hpriv);
1166 1247
@@ -1256,6 +1337,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1256 ahci_pci_print_info(host); 1337 ahci_pci_print_info(host);
1257 1338
1258 pci_set_master(pdev); 1339 pci_set_master(pdev);
1340
1341 if (hpriv->flags & AHCI_HFLAG_MULTI_MSI)
1342 return ahci_host_activate(host, pdev->irq, n_msis);
1343
1259 return ata_host_activate(host, pdev->irq, ahci_interrupt, IRQF_SHARED, 1344 return ata_host_activate(host, pdev->irq, ahci_interrupt, IRQF_SHARED,
1260 &ahci_sht); 1345 &ahci_sht);
1261} 1346}
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 9be471200a07..b830e6c9fe49 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -231,6 +231,7 @@ enum {
231 AHCI_HFLAG_DELAY_ENGINE = (1 << 15), /* do not start engine on 231 AHCI_HFLAG_DELAY_ENGINE = (1 << 15), /* do not start engine on
232 port start (wait until 232 port start (wait until
233 error-handling stage) */ 233 error-handling stage) */
234 AHCI_HFLAG_MULTI_MSI = (1 << 16), /* multiple PCI MSIs */
234 235
235 /* ap->flags bits */ 236 /* ap->flags bits */
236 237
@@ -297,6 +298,8 @@ struct ahci_port_priv {
297 unsigned int ncq_saw_d2h:1; 298 unsigned int ncq_saw_d2h:1;
298 unsigned int ncq_saw_dmas:1; 299 unsigned int ncq_saw_dmas:1;
299 unsigned int ncq_saw_sdb:1; 300 unsigned int ncq_saw_sdb:1;
301 u32 intr_status; /* interrupts to handle */
302 spinlock_t lock; /* protects parent ata_port */
300 u32 intr_mask; /* interrupts to enable */ 303 u32 intr_mask; /* interrupts to enable */
301 bool fbs_supported; /* set iff FBS is supported */ 304 bool fbs_supported; /* set iff FBS is supported */
302 bool fbs_enabled; /* set iff FBS is enabled */ 305 bool fbs_enabled; /* set iff FBS is enabled */
@@ -359,7 +362,10 @@ void ahci_set_em_messages(struct ahci_host_priv *hpriv,
359 struct ata_port_info *pi); 362 struct ata_port_info *pi);
360int ahci_reset_em(struct ata_host *host); 363int ahci_reset_em(struct ata_host *host);
361irqreturn_t ahci_interrupt(int irq, void *dev_instance); 364irqreturn_t ahci_interrupt(int irq, void *dev_instance);
365irqreturn_t ahci_hw_interrupt(int irq, void *dev_instance);
366irqreturn_t ahci_thread_fn(int irq, void *dev_instance);
362void ahci_print_info(struct ata_host *host, const char *scc_s); 367void ahci_print_info(struct ata_host *host, const char *scc_s);
368int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis);
363 369
364static inline void __iomem *__ahci_port_base(struct ata_host *host, 370static inline void __iomem *__ahci_port_base(struct ata_host *host,
365 unsigned int port_no) 371 unsigned int port_no)
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 6cd7805e47ca..34c82167b962 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -1655,19 +1655,16 @@ static void ahci_error_intr(struct ata_port *ap, u32 irq_stat)
1655 ata_port_abort(ap); 1655 ata_port_abort(ap);
1656} 1656}
1657 1657
1658static void ahci_port_intr(struct ata_port *ap) 1658static void ahci_handle_port_interrupt(struct ata_port *ap,
1659 void __iomem *port_mmio, u32 status)
1659{ 1660{
1660 void __iomem *port_mmio = ahci_port_base(ap);
1661 struct ata_eh_info *ehi = &ap->link.eh_info; 1661 struct ata_eh_info *ehi = &ap->link.eh_info;
1662 struct ahci_port_priv *pp = ap->private_data; 1662 struct ahci_port_priv *pp = ap->private_data;
1663 struct ahci_host_priv *hpriv = ap->host->private_data; 1663 struct ahci_host_priv *hpriv = ap->host->private_data;
1664 int resetting = !!(ap->pflags & ATA_PFLAG_RESETTING); 1664 int resetting = !!(ap->pflags & ATA_PFLAG_RESETTING);
1665 u32 status, qc_active = 0; 1665 u32 qc_active = 0;
1666 int rc; 1666 int rc;
1667 1667
1668 status = readl(port_mmio + PORT_IRQ_STAT);
1669 writel(status, port_mmio + PORT_IRQ_STAT);
1670
1671 /* ignore BAD_PMP while resetting */ 1668 /* ignore BAD_PMP while resetting */
1672 if (unlikely(resetting)) 1669 if (unlikely(resetting))
1673 status &= ~PORT_IRQ_BAD_PMP; 1670 status &= ~PORT_IRQ_BAD_PMP;
@@ -1743,6 +1740,107 @@ static void ahci_port_intr(struct ata_port *ap)
1743 } 1740 }
1744} 1741}
1745 1742
1743void ahci_port_intr(struct ata_port *ap)
1744{
1745 void __iomem *port_mmio = ahci_port_base(ap);
1746 u32 status;
1747
1748 status = readl(port_mmio + PORT_IRQ_STAT);
1749 writel(status, port_mmio + PORT_IRQ_STAT);
1750
1751 ahci_handle_port_interrupt(ap, port_mmio, status);
1752}
1753
1754irqreturn_t ahci_thread_fn(int irq, void *dev_instance)
1755{
1756 struct ata_port *ap = dev_instance;
1757 struct ahci_port_priv *pp = ap->private_data;
1758 void __iomem *port_mmio = ahci_port_base(ap);
1759 unsigned long flags;
1760 u32 status;
1761
1762 spin_lock_irqsave(&ap->host->lock, flags);
1763 status = pp->intr_status;
1764 if (status)
1765 pp->intr_status = 0;
1766 spin_unlock_irqrestore(&ap->host->lock, flags);
1767
1768 spin_lock_bh(ap->lock);
1769 ahci_handle_port_interrupt(ap, port_mmio, status);
1770 spin_unlock_bh(ap->lock);
1771
1772 return IRQ_HANDLED;
1773}
1774EXPORT_SYMBOL_GPL(ahci_thread_fn);
1775
1776void ahci_hw_port_interrupt(struct ata_port *ap)
1777{
1778 void __iomem *port_mmio = ahci_port_base(ap);
1779 struct ahci_port_priv *pp = ap->private_data;
1780 u32 status;
1781
1782 status = readl(port_mmio + PORT_IRQ_STAT);
1783 writel(status, port_mmio + PORT_IRQ_STAT);
1784
1785 pp->intr_status |= status;
1786}
1787
1788irqreturn_t ahci_hw_interrupt(int irq, void *dev_instance)
1789{
1790 struct ata_port *ap_this = dev_instance;
1791 struct ahci_port_priv *pp = ap_this->private_data;
1792 struct ata_host *host = ap_this->host;
1793 struct ahci_host_priv *hpriv = host->private_data;
1794 void __iomem *mmio = hpriv->mmio;
1795 unsigned int i;
1796 u32 irq_stat, irq_masked;
1797
1798 VPRINTK("ENTER\n");
1799
1800 spin_lock(&host->lock);
1801
1802 irq_stat = readl(mmio + HOST_IRQ_STAT);
1803
1804 if (!irq_stat) {
1805 u32 status = pp->intr_status;
1806
1807 spin_unlock(&host->lock);
1808
1809 VPRINTK("EXIT\n");
1810
1811 return status ? IRQ_WAKE_THREAD : IRQ_NONE;
1812 }
1813
1814 irq_masked = irq_stat & hpriv->port_map;
1815
1816 for (i = 0; i < host->n_ports; i++) {
1817 struct ata_port *ap;
1818
1819 if (!(irq_masked & (1 << i)))
1820 continue;
1821
1822 ap = host->ports[i];
1823 if (ap) {
1824 ahci_hw_port_interrupt(ap);
1825 VPRINTK("port %u\n", i);
1826 } else {
1827 VPRINTK("port %u (no irq)\n", i);
1828 if (ata_ratelimit())
1829 dev_warn(host->dev,
1830 "interrupt on disabled port %u\n", i);
1831 }
1832 }
1833
1834 writel(irq_stat, mmio + HOST_IRQ_STAT);
1835
1836 spin_unlock(&host->lock);
1837
1838 VPRINTK("EXIT\n");
1839
1840 return IRQ_WAKE_THREAD;
1841}
1842EXPORT_SYMBOL_GPL(ahci_hw_interrupt);
1843
1746irqreturn_t ahci_interrupt(int irq, void *dev_instance) 1844irqreturn_t ahci_interrupt(int irq, void *dev_instance)
1747{ 1845{
1748 struct ata_host *host = dev_instance; 1846 struct ata_host *host = dev_instance;
@@ -2196,6 +2294,14 @@ static int ahci_port_start(struct ata_port *ap)
2196 */ 2294 */
2197 pp->intr_mask = DEF_PORT_IRQ; 2295 pp->intr_mask = DEF_PORT_IRQ;
2198 2296
2297 /*
2298 * Switch to per-port locking in case each port has its own MSI vector.
2299 */
2300 if ((hpriv->flags & AHCI_HFLAG_MULTI_MSI)) {
2301 spin_lock_init(&pp->lock);
2302 ap->lock = &pp->lock;
2303 }
2304
2199 ap->private_data = pp; 2305 ap->private_data = pp;
2200 2306
2201 /* engage engines, captain */ 2307 /* engage engines, captain */
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 564156a8e572..5814deb6963d 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -461,7 +461,7 @@ static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
461 int op_len, err; 461 int op_len, err;
462 void *req_buf; 462 void *req_buf;
463 463
464 if (!(((u64)1 << ((u64)op - 1)) & port->operations)) 464 if (!(((u64)1 << (u64)op) & port->operations))
465 return -EOPNOTSUPP; 465 return -EOPNOTSUPP;
466 466
467 switch (op) { 467 switch (op) {
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index ad8bf2aa629d..2d3f8825e8b8 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -31,7 +31,7 @@ static struct ecc_settings **ecc_stngs;
31 * 31 *
32 *FIXME: Produce a better mapping/linearisation. 32 *FIXME: Produce a better mapping/linearisation.
33 */ 33 */
34struct scrubrate { 34static const struct scrubrate {
35 u32 scrubval; /* bit pattern for scrub rate */ 35 u32 scrubval; /* bit pattern for scrub rate */
36 u32 bandwidth; /* bandwidth consumed (bytes/sec) */ 36 u32 bandwidth; /* bandwidth consumed (bytes/sec) */
37} scrubrates[] = { 37} scrubrates[] = {
@@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
239 * DRAM base/limit associated with node_id 239 * DRAM base/limit associated with node_id
240 */ 240 */
241static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, 241static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr,
242 unsigned nid) 242 u8 nid)
243{ 243{
244 u64 addr; 244 u64 addr;
245 245
@@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
265 u64 sys_addr) 265 u64 sys_addr)
266{ 266{
267 struct amd64_pvt *pvt; 267 struct amd64_pvt *pvt;
268 unsigned node_id; 268 u8 node_id;
269 u32 intlv_en, bits; 269 u32 intlv_en, bits;
270 270
271 /* 271 /*
@@ -939,7 +939,8 @@ static u64 get_error_address(struct mce *m)
939 struct amd64_pvt *pvt; 939 struct amd64_pvt *pvt;
940 u64 cc6_base, tmp_addr; 940 u64 cc6_base, tmp_addr;
941 u32 tmp; 941 u32 tmp;
942 u8 mce_nid, intlv_en; 942 u16 mce_nid;
943 u8 intlv_en;
943 944
944 if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7) 945 if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7)
945 return addr; 946 return addr;
@@ -979,10 +980,29 @@ static u64 get_error_address(struct mce *m)
979 return addr; 980 return addr;
980} 981}
981 982
983static struct pci_dev *pci_get_related_function(unsigned int vendor,
984 unsigned int device,
985 struct pci_dev *related)
986{
987 struct pci_dev *dev = NULL;
988
989 while ((dev = pci_get_device(vendor, device, dev))) {
990 if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) &&
991 (dev->bus->number == related->bus->number) &&
992 (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
993 break;
994 }
995
996 return dev;
997}
998
982static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) 999static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
983{ 1000{
1001 struct amd_northbridge *nb;
1002 struct pci_dev *misc, *f1 = NULL;
984 struct cpuinfo_x86 *c = &boot_cpu_data; 1003 struct cpuinfo_x86 *c = &boot_cpu_data;
985 int off = range << 3; 1004 int off = range << 3;
1005 u32 llim;
986 1006
987 amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off, &pvt->ranges[range].base.lo); 1007 amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off, &pvt->ranges[range].base.lo);
988 amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo); 1008 amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo);
@@ -996,30 +1016,32 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
996 amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off, &pvt->ranges[range].base.hi); 1016 amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off, &pvt->ranges[range].base.hi);
997 amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi); 1017 amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi);
998 1018
999 /* Factor in CC6 save area by reading dst node's limit reg */ 1019 /* F15h: factor in CC6 save area by reading dst node's limit reg */
1000 if (c->x86 == 0x15) { 1020 if (c->x86 != 0x15)
1001 struct pci_dev *f1 = NULL; 1021 return;
1002 u8 nid = dram_dst_node(pvt, range);
1003 u32 llim;
1004 1022
1005 f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1)); 1023 nb = node_to_amd_nb(dram_dst_node(pvt, range));
1006 if (WARN_ON(!f1)) 1024 if (WARN_ON(!nb))
1007 return; 1025 return;
1008 1026
1009 amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim); 1027 misc = nb->misc;
1028 f1 = pci_get_related_function(misc->vendor, PCI_DEVICE_ID_AMD_15H_NB_F1, misc);
1029 if (WARN_ON(!f1))
1030 return;
1010 1031
1011 pvt->ranges[range].lim.lo &= GENMASK(0, 15); 1032 amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim);
1012 1033
1013 /* {[39:27],111b} */ 1034 pvt->ranges[range].lim.lo &= GENMASK(0, 15);
1014 pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16;
1015 1035
1016 pvt->ranges[range].lim.hi &= GENMASK(0, 7); 1036 /* {[39:27],111b} */
1037 pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16;
1017 1038
1018 /* [47:40] */ 1039 pvt->ranges[range].lim.hi &= GENMASK(0, 7);
1019 pvt->ranges[range].lim.hi |= llim >> 13;
1020 1040
1021 pci_dev_put(f1); 1041 /* [47:40] */
1022 } 1042 pvt->ranges[range].lim.hi |= llim >> 13;
1043
1044 pci_dev_put(f1);
1023} 1045}
1024 1046
1025static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, 1047static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
@@ -1305,7 +1327,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr,
1305} 1327}
1306 1328
1307/* Convert the sys_addr to the normalized DCT address */ 1329/* Convert the sys_addr to the normalized DCT address */
1308static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, 1330static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range,
1309 u64 sys_addr, bool hi_rng, 1331 u64 sys_addr, bool hi_rng,
1310 u32 dct_sel_base_addr) 1332 u32 dct_sel_base_addr)
1311{ 1333{
@@ -1381,7 +1403,7 @@ static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow)
1381 * -EINVAL: NOT FOUND 1403 * -EINVAL: NOT FOUND
1382 * 0..csrow = Chip-Select Row 1404 * 0..csrow = Chip-Select Row
1383 */ 1405 */
1384static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct) 1406static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct)
1385{ 1407{
1386 struct mem_ctl_info *mci; 1408 struct mem_ctl_info *mci;
1387 struct amd64_pvt *pvt; 1409 struct amd64_pvt *pvt;
@@ -1672,23 +1694,6 @@ static struct amd64_family_type amd64_family_types[] = {
1672 }, 1694 },
1673}; 1695};
1674 1696
1675static struct pci_dev *pci_get_related_function(unsigned int vendor,
1676 unsigned int device,
1677 struct pci_dev *related)
1678{
1679 struct pci_dev *dev = NULL;
1680
1681 dev = pci_get_device(vendor, device, dev);
1682 while (dev) {
1683 if ((dev->bus->number == related->bus->number) &&
1684 (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn)))
1685 break;
1686 dev = pci_get_device(vendor, device, dev);
1687 }
1688
1689 return dev;
1690}
1691
1692/* 1697/*
1693 * These are tables of eigenvectors (one per line) which can be used for the 1698 * These are tables of eigenvectors (one per line) which can be used for the
1694 * construction of the syndrome tables. The modified syndrome search algorithm 1699 * construction of the syndrome tables. The modified syndrome search algorithm
@@ -1696,7 +1701,7 @@ static struct pci_dev *pci_get_related_function(unsigned int vendor,
1696 * 1701 *
1697 * Algorithm courtesy of Ross LaFetra from AMD. 1702 * Algorithm courtesy of Ross LaFetra from AMD.
1698 */ 1703 */
1699static u16 x4_vectors[] = { 1704static const u16 x4_vectors[] = {
1700 0x2f57, 0x1afe, 0x66cc, 0xdd88, 1705 0x2f57, 0x1afe, 0x66cc, 0xdd88,
1701 0x11eb, 0x3396, 0x7f4c, 0xeac8, 1706 0x11eb, 0x3396, 0x7f4c, 0xeac8,
1702 0x0001, 0x0002, 0x0004, 0x0008, 1707 0x0001, 0x0002, 0x0004, 0x0008,
@@ -1735,7 +1740,7 @@ static u16 x4_vectors[] = {
1735 0x19a9, 0x2efe, 0xb5cc, 0x6f88, 1740 0x19a9, 0x2efe, 0xb5cc, 0x6f88,
1736}; 1741};
1737 1742
1738static u16 x8_vectors[] = { 1743static const u16 x8_vectors[] = {
1739 0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480, 1744 0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480,
1740 0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80, 1745 0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80,
1741 0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80, 1746 0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80,
@@ -1757,7 +1762,7 @@ static u16 x8_vectors[] = {
1757 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000, 1762 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000,
1758}; 1763};
1759 1764
1760static int decode_syndrome(u16 syndrome, u16 *vectors, unsigned num_vecs, 1765static int decode_syndrome(u16 syndrome, const u16 *vectors, unsigned num_vecs,
1761 unsigned v_dim) 1766 unsigned v_dim)
1762{ 1767{
1763 unsigned int i, err_sym; 1768 unsigned int i, err_sym;
@@ -2181,7 +2186,7 @@ static int init_csrows(struct mem_ctl_info *mci)
2181} 2186}
2182 2187
2183/* get all cores on this DCT */ 2188/* get all cores on this DCT */
2184static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) 2189static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid)
2185{ 2190{
2186 int cpu; 2191 int cpu;
2187 2192
@@ -2191,7 +2196,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid)
2191} 2196}
2192 2197
2193/* check MCG_CTL on all the cpus on this node */ 2198/* check MCG_CTL on all the cpus on this node */
2194static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid) 2199static bool amd64_nb_mce_bank_enabled_on_node(u16 nid)
2195{ 2200{
2196 cpumask_var_t mask; 2201 cpumask_var_t mask;
2197 int cpu, nbe; 2202 int cpu, nbe;
@@ -2224,7 +2229,7 @@ out:
2224 return ret; 2229 return ret;
2225} 2230}
2226 2231
2227static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) 2232static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on)
2228{ 2233{
2229 cpumask_var_t cmask; 2234 cpumask_var_t cmask;
2230 int cpu; 2235 int cpu;
@@ -2262,7 +2267,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
2262 return 0; 2267 return 0;
2263} 2268}
2264 2269
2265static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, 2270static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid,
2266 struct pci_dev *F3) 2271 struct pci_dev *F3)
2267{ 2272{
2268 bool ret = true; 2273 bool ret = true;
@@ -2314,7 +2319,7 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
2314 return ret; 2319 return ret;
2315} 2320}
2316 2321
2317static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid, 2322static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
2318 struct pci_dev *F3) 2323 struct pci_dev *F3)
2319{ 2324{
2320 u32 value, mask = 0x3; /* UECC/CECC enable */ 2325 u32 value, mask = 0x3; /* UECC/CECC enable */
@@ -2353,7 +2358,7 @@ static const char *ecc_msg =
2353 "'ecc_enable_override'.\n" 2358 "'ecc_enable_override'.\n"
2354 " (Note that use of the override may cause unknown side effects.)\n"; 2359 " (Note that use of the override may cause unknown side effects.)\n";
2355 2360
2356static bool ecc_enabled(struct pci_dev *F3, u8 nid) 2361static bool ecc_enabled(struct pci_dev *F3, u16 nid)
2357{ 2362{
2358 u32 value; 2363 u32 value;
2359 u8 ecc_en = 0; 2364 u8 ecc_en = 0;
@@ -2474,7 +2479,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
2474 struct mem_ctl_info *mci = NULL; 2479 struct mem_ctl_info *mci = NULL;
2475 struct edac_mc_layer layers[2]; 2480 struct edac_mc_layer layers[2];
2476 int err = 0, ret; 2481 int err = 0, ret;
2477 u8 nid = get_node_id(F2); 2482 u16 nid = amd_get_node_id(F2);
2478 2483
2479 ret = -ENOMEM; 2484 ret = -ENOMEM;
2480 pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); 2485 pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2566,7 +2571,7 @@ err_ret:
2566static int amd64_probe_one_instance(struct pci_dev *pdev, 2571static int amd64_probe_one_instance(struct pci_dev *pdev,
2567 const struct pci_device_id *mc_type) 2572 const struct pci_device_id *mc_type)
2568{ 2573{
2569 u8 nid = get_node_id(pdev); 2574 u16 nid = amd_get_node_id(pdev);
2570 struct pci_dev *F3 = node_to_amd_nb(nid)->misc; 2575 struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2571 struct ecc_settings *s; 2576 struct ecc_settings *s;
2572 int ret = 0; 2577 int ret = 0;
@@ -2616,7 +2621,7 @@ static void amd64_remove_one_instance(struct pci_dev *pdev)
2616{ 2621{
2617 struct mem_ctl_info *mci; 2622 struct mem_ctl_info *mci;
2618 struct amd64_pvt *pvt; 2623 struct amd64_pvt *pvt;
2619 u8 nid = get_node_id(pdev); 2624 u16 nid = amd_get_node_id(pdev);
2620 struct pci_dev *F3 = node_to_amd_nb(nid)->misc; 2625 struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2621 struct ecc_settings *s = ecc_stngs[nid]; 2626 struct ecc_settings *s = ecc_stngs[nid];
2622 2627
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index e864f407806c..35637d83f235 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -292,12 +292,6 @@
292/* MSRs */ 292/* MSRs */
293#define MSR_MCGCTL_NBE BIT(4) 293#define MSR_MCGCTL_NBE BIT(4)
294 294
295/* AMD sets the first MC device at device ID 0x18. */
296static inline u8 get_node_id(struct pci_dev *pdev)
297{
298 return PCI_SLOT(pdev->devfn) - 0x18;
299}
300
301enum amd_families { 295enum amd_families {
302 K8_CPUS = 0, 296 K8_CPUS = 0,
303 F10_CPUS, 297 F10_CPUS,
@@ -340,7 +334,7 @@ struct amd64_pvt {
340 /* pci_device handles which we utilize */ 334 /* pci_device handles which we utilize */
341 struct pci_dev *F1, *F2, *F3; 335 struct pci_dev *F1, *F2, *F3;
342 336
343 unsigned mc_node_id; /* MC index of this MC node */ 337 u16 mc_node_id; /* MC index of this MC node */
344 int ext_model; /* extended model value of this node */ 338 int ext_model; /* extended model value of this node */
345 int channel_count; 339 int channel_count;
346 340
@@ -393,7 +387,7 @@ struct err_info {
393 u32 offset; 387 u32 offset;
394}; 388};
395 389
396static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i) 390static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i)
397{ 391{
398 u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8; 392 u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8;
399 393
@@ -403,7 +397,7 @@ static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i)
403 return (((u64)pvt->ranges[i].base.hi & 0x000000ff) << 40) | addr; 397 return (((u64)pvt->ranges[i].base.hi & 0x000000ff) << 40) | addr;
404} 398}
405 399
406static inline u64 get_dram_limit(struct amd64_pvt *pvt, unsigned i) 400static inline u64 get_dram_limit(struct amd64_pvt *pvt, u8 i)
407{ 401{
408 u64 lim = (((u64)pvt->ranges[i].lim.lo & 0xffff0000) << 8) | 0x00ffffff; 402 u64 lim = (((u64)pvt->ranges[i].lim.lo & 0xffff0000) << 8) | 0x00ffffff;
409 403
diff --git a/drivers/gpu/drm/nouveau/core/core/falcon.c b/drivers/gpu/drm/nouveau/core/core/falcon.c
index 6b0843c33877..e05c15777588 100644
--- a/drivers/gpu/drm/nouveau/core/core/falcon.c
+++ b/drivers/gpu/drm/nouveau/core/core/falcon.c
@@ -73,8 +73,11 @@ _nouveau_falcon_init(struct nouveau_object *object)
73 nv_debug(falcon, "data limit: %d\n", falcon->data.limit); 73 nv_debug(falcon, "data limit: %d\n", falcon->data.limit);
74 74
75 /* wait for 'uc halted' to be signalled before continuing */ 75 /* wait for 'uc halted' to be signalled before continuing */
76 if (falcon->secret) { 76 if (falcon->secret && falcon->version < 4) {
77 nv_wait(falcon, 0x008, 0x00000010, 0x00000010); 77 if (!falcon->version)
78 nv_wait(falcon, 0x008, 0x00000010, 0x00000010);
79 else
80 nv_wait(falcon, 0x180, 0x80000000, 0);
78 nv_wo32(falcon, 0x004, 0x00000010); 81 nv_wo32(falcon, 0x004, 0x00000010);
79 } 82 }
80 83
diff --git a/drivers/gpu/drm/nouveau/core/core/subdev.c b/drivers/gpu/drm/nouveau/core/core/subdev.c
index f74c30aa33a0..48f06378d3f9 100644
--- a/drivers/gpu/drm/nouveau/core/core/subdev.c
+++ b/drivers/gpu/drm/nouveau/core/core/subdev.c
@@ -99,7 +99,7 @@ nouveau_subdev_create_(struct nouveau_object *parent,
99 if (ret) 99 if (ret)
100 return ret; 100 return ret;
101 101
102 mutex_init(&subdev->mutex); 102 __mutex_init(&subdev->mutex, subname, &oclass->lock_class_key);
103 subdev->name = subname; 103 subdev->name = subname;
104 104
105 if (parent) { 105 if (parent) {
diff --git a/drivers/gpu/drm/nouveau/core/include/core/object.h b/drivers/gpu/drm/nouveau/core/include/core/object.h
index 5982935ee23a..106bb19fdd9a 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/object.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/object.h
@@ -50,10 +50,13 @@ int nouveau_object_fini(struct nouveau_object *, bool suspend);
50 50
51extern struct nouveau_ofuncs nouveau_object_ofuncs; 51extern struct nouveau_ofuncs nouveau_object_ofuncs;
52 52
53/* Don't allocate dynamically, because lockdep needs lock_class_keys to be in
54 * ".data". */
53struct nouveau_oclass { 55struct nouveau_oclass {
54 u32 handle; 56 u32 handle;
55 struct nouveau_ofuncs *ofuncs; 57 struct nouveau_ofuncs * const ofuncs;
56 struct nouveau_omthds *omthds; 58 struct nouveau_omthds * const omthds;
59 struct lock_class_key lock_class_key;
57}; 60};
58 61
59#define nv_oclass(o) nv_object(o)->oclass 62#define nv_oclass(o) nv_object(o)->oclass
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/base.c b/drivers/gpu/drm/nouveau/core/subdev/fb/base.c
index d6d16007ec1a..d62045f454b2 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/base.c
@@ -86,8 +86,8 @@ nouveau_fb_preinit(struct nouveau_fb *pfb)
86 return ret; 86 return ret;
87 } 87 }
88 88
89 if (!nouveau_mm_initialised(&pfb->tags) && tags) { 89 if (!nouveau_mm_initialised(&pfb->tags)) {
90 ret = nouveau_mm_init(&pfb->tags, 0, ++tags, 1); 90 ret = nouveau_mm_init(&pfb->tags, 0, tags ? ++tags : 0, 1);
91 if (ret) 91 if (ret)
92 return ret; 92 return ret;
93 } 93 }
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c
index 487cb8c6c204..eac236ed19b2 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c
@@ -99,7 +99,7 @@ nv50_fb_vram_init(struct nouveau_fb *pfb)
99 struct nouveau_bios *bios = nouveau_bios(device); 99 struct nouveau_bios *bios = nouveau_bios(device);
100 const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */ 100 const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */
101 const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */ 101 const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */
102 u32 size; 102 u32 size, tags = 0;
103 int ret; 103 int ret;
104 104
105 pfb->ram.size = nv_rd32(pfb, 0x10020c); 105 pfb->ram.size = nv_rd32(pfb, 0x10020c);
@@ -140,10 +140,11 @@ nv50_fb_vram_init(struct nouveau_fb *pfb)
140 return ret; 140 return ret;
141 141
142 pfb->ram.ranks = (nv_rd32(pfb, 0x100200) & 0x4) ? 2 : 1; 142 pfb->ram.ranks = (nv_rd32(pfb, 0x100200) & 0x4) ? 2 : 1;
143 tags = nv_rd32(pfb, 0x100320);
143 break; 144 break;
144 } 145 }
145 146
146 return nv_rd32(pfb, 0x100320); 147 return tags;
147} 148}
148 149
149static int 150static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 69d7b1d0b9d6..1699a9083a2f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -28,6 +28,7 @@
28 */ 28 */
29 29
30#include <core/engine.h> 30#include <core/engine.h>
31#include <linux/swiotlb.h>
31 32
32#include <subdev/fb.h> 33#include <subdev/fb.h>
33#include <subdev/vm.h> 34#include <subdev/vm.h>
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 8b090f1eb51d..5e7aef23825a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -245,6 +245,8 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
245 return 0; 245 return 0;
246} 246}
247 247
248static struct lock_class_key drm_client_lock_class_key;
249
248static int 250static int
249nouveau_drm_load(struct drm_device *dev, unsigned long flags) 251nouveau_drm_load(struct drm_device *dev, unsigned long flags)
250{ 252{
@@ -256,6 +258,7 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
256 ret = nouveau_cli_create(pdev, "DRM", sizeof(*drm), (void**)&drm); 258 ret = nouveau_cli_create(pdev, "DRM", sizeof(*drm), (void**)&drm);
257 if (ret) 259 if (ret)
258 return ret; 260 return ret;
261 lockdep_set_class(&drm->client.mutex, &drm_client_lock_class_key);
259 262
260 dev->dev_private = drm; 263 dev->dev_private = drm;
261 drm->dev = dev; 264 drm->dev = dev;
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 7a445666e71f..ee4cff534f10 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -2909,14 +2909,14 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2909 return -EINVAL; 2909 return -EINVAL;
2910 } 2910 }
2911 if (tiled) { 2911 if (tiled) {
2912 dst_offset = ib[idx+1]; 2912 dst_offset = radeon_get_ib_value(p, idx+1);
2913 dst_offset <<= 8; 2913 dst_offset <<= 8;
2914 2914
2915 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 2915 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2916 p->idx += count + 7; 2916 p->idx += count + 7;
2917 } else { 2917 } else {
2918 dst_offset = ib[idx+1]; 2918 dst_offset = radeon_get_ib_value(p, idx+1);
2919 dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32; 2919 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2920 2920
2921 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2921 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2922 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2922 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
@@ -2954,12 +2954,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2954 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); 2954 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2955 return -EINVAL; 2955 return -EINVAL;
2956 } 2956 }
2957 dst_offset = ib[idx+1]; 2957 dst_offset = radeon_get_ib_value(p, idx+1);
2958 dst_offset <<= 8; 2958 dst_offset <<= 8;
2959 dst2_offset = ib[idx+2]; 2959 dst2_offset = radeon_get_ib_value(p, idx+2);
2960 dst2_offset <<= 8; 2960 dst2_offset <<= 8;
2961 src_offset = ib[idx+8]; 2961 src_offset = radeon_get_ib_value(p, idx+8);
2962 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; 2962 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2963 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 2963 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2964 dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n", 2964 dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
2965 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 2965 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
@@ -3014,12 +3014,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
3014 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); 3014 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3015 return -EINVAL; 3015 return -EINVAL;
3016 } 3016 }
3017 dst_offset = ib[idx+1]; 3017 dst_offset = radeon_get_ib_value(p, idx+1);
3018 dst_offset <<= 8; 3018 dst_offset <<= 8;
3019 dst2_offset = ib[idx+2]; 3019 dst2_offset = radeon_get_ib_value(p, idx+2);
3020 dst2_offset <<= 8; 3020 dst2_offset <<= 8;
3021 src_offset = ib[idx+8]; 3021 src_offset = radeon_get_ib_value(p, idx+8);
3022 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; 3022 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3023 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 3023 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3024 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", 3024 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3025 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 3025 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
@@ -3046,22 +3046,22 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
3046 /* detile bit */ 3046 /* detile bit */
3047 if (idx_value & (1 << 31)) { 3047 if (idx_value & (1 << 31)) {
3048 /* tiled src, linear dst */ 3048 /* tiled src, linear dst */
3049 src_offset = ib[idx+1]; 3049 src_offset = radeon_get_ib_value(p, idx+1);
3050 src_offset <<= 8; 3050 src_offset <<= 8;
3051 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); 3051 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3052 3052
3053 dst_offset = ib[idx+7]; 3053 dst_offset = radeon_get_ib_value(p, idx+7);
3054 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; 3054 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3055 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 3055 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3056 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 3056 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3057 } else { 3057 } else {
3058 /* linear src, tiled dst */ 3058 /* linear src, tiled dst */
3059 src_offset = ib[idx+7]; 3059 src_offset = radeon_get_ib_value(p, idx+7);
3060 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; 3060 src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3061 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 3061 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3062 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 3062 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3063 3063
3064 dst_offset = ib[idx+1]; 3064 dst_offset = radeon_get_ib_value(p, idx+1);
3065 dst_offset <<= 8; 3065 dst_offset <<= 8;
3066 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 3066 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3067 } 3067 }
@@ -3098,12 +3098,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
3098 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); 3098 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3099 return -EINVAL; 3099 return -EINVAL;
3100 } 3100 }
3101 dst_offset = ib[idx+1]; 3101 dst_offset = radeon_get_ib_value(p, idx+1);
3102 dst_offset <<= 8; 3102 dst_offset <<= 8;
3103 dst2_offset = ib[idx+2]; 3103 dst2_offset = radeon_get_ib_value(p, idx+2);
3104 dst2_offset <<= 8; 3104 dst2_offset <<= 8;
3105 src_offset = ib[idx+8]; 3105 src_offset = radeon_get_ib_value(p, idx+8);
3106 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; 3106 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3107 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 3107 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3108 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", 3108 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3109 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 3109 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
@@ -3135,22 +3135,22 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
3135 /* detile bit */ 3135 /* detile bit */
3136 if (idx_value & (1 << 31)) { 3136 if (idx_value & (1 << 31)) {
3137 /* tiled src, linear dst */ 3137 /* tiled src, linear dst */
3138 src_offset = ib[idx+1]; 3138 src_offset = radeon_get_ib_value(p, idx+1);
3139 src_offset <<= 8; 3139 src_offset <<= 8;
3140 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); 3140 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3141 3141
3142 dst_offset = ib[idx+7]; 3142 dst_offset = radeon_get_ib_value(p, idx+7);
3143 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; 3143 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3144 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 3144 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3145 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 3145 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3146 } else { 3146 } else {
3147 /* linear src, tiled dst */ 3147 /* linear src, tiled dst */
3148 src_offset = ib[idx+7]; 3148 src_offset = radeon_get_ib_value(p, idx+7);
3149 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; 3149 src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3150 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 3150 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3151 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 3151 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3152 3152
3153 dst_offset = ib[idx+1]; 3153 dst_offset = radeon_get_ib_value(p, idx+1);
3154 dst_offset <<= 8; 3154 dst_offset <<= 8;
3155 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 3155 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3156 } 3156 }
@@ -3176,10 +3176,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
3176 switch (misc) { 3176 switch (misc) {
3177 case 0: 3177 case 0:
3178 /* L2L, byte */ 3178 /* L2L, byte */
3179 src_offset = ib[idx+2]; 3179 src_offset = radeon_get_ib_value(p, idx+2);
3180 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; 3180 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3181 dst_offset = ib[idx+1]; 3181 dst_offset = radeon_get_ib_value(p, idx+1);
3182 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; 3182 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
3183 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) { 3183 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
3184 dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n", 3184 dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
3185 src_offset + count, radeon_bo_size(src_reloc->robj)); 3185 src_offset + count, radeon_bo_size(src_reloc->robj));
@@ -3216,12 +3216,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
3216 DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n"); 3216 DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
3217 return -EINVAL; 3217 return -EINVAL;
3218 } 3218 }
3219 dst_offset = ib[idx+1]; 3219 dst_offset = radeon_get_ib_value(p, idx+1);
3220 dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; 3220 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3221 dst2_offset = ib[idx+2]; 3221 dst2_offset = radeon_get_ib_value(p, idx+2);
3222 dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32; 3222 dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
3223 src_offset = ib[idx+3]; 3223 src_offset = radeon_get_ib_value(p, idx+3);
3224 src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; 3224 src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
3225 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 3225 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3226 dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n", 3226 dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
3227 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 3227 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
@@ -3251,10 +3251,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
3251 } 3251 }
3252 } else { 3252 } else {
3253 /* L2L, dw */ 3253 /* L2L, dw */
3254 src_offset = ib[idx+2]; 3254 src_offset = radeon_get_ib_value(p, idx+2);
3255 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; 3255 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
3256 dst_offset = ib[idx+1]; 3256 dst_offset = radeon_get_ib_value(p, idx+1);
3257 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; 3257 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
3258 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 3258 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3259 dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n", 3259 dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
3260 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 3260 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
@@ -3279,8 +3279,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
3279 DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n"); 3279 DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3280 return -EINVAL; 3280 return -EINVAL;
3281 } 3281 }
3282 dst_offset = ib[idx+1]; 3282 dst_offset = radeon_get_ib_value(p, idx+1);
3283 dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16; 3283 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3284 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 3284 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3285 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", 3285 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3286 dst_offset, radeon_bo_size(dst_reloc->robj)); 3286 dst_offset, radeon_bo_size(dst_reloc->robj));
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 69ec24ab8d63..9b2512bf1a46 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -2623,14 +2623,14 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
2623 return -EINVAL; 2623 return -EINVAL;
2624 } 2624 }
2625 if (tiled) { 2625 if (tiled) {
2626 dst_offset = ib[idx+1]; 2626 dst_offset = radeon_get_ib_value(p, idx+1);
2627 dst_offset <<= 8; 2627 dst_offset <<= 8;
2628 2628
2629 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 2629 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2630 p->idx += count + 5; 2630 p->idx += count + 5;
2631 } else { 2631 } else {
2632 dst_offset = ib[idx+1]; 2632 dst_offset = radeon_get_ib_value(p, idx+1);
2633 dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32; 2633 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2634 2634
2635 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2635 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2636 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2636 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
@@ -2658,32 +2658,32 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
2658 /* detile bit */ 2658 /* detile bit */
2659 if (idx_value & (1 << 31)) { 2659 if (idx_value & (1 << 31)) {
2660 /* tiled src, linear dst */ 2660 /* tiled src, linear dst */
2661 src_offset = ib[idx+1]; 2661 src_offset = radeon_get_ib_value(p, idx+1);
2662 src_offset <<= 8; 2662 src_offset <<= 8;
2663 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); 2663 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2664 2664
2665 dst_offset = ib[idx+5]; 2665 dst_offset = radeon_get_ib_value(p, idx+5);
2666 dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; 2666 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2667 ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2667 ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2668 ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; 2668 ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2669 } else { 2669 } else {
2670 /* linear src, tiled dst */ 2670 /* linear src, tiled dst */
2671 src_offset = ib[idx+5]; 2671 src_offset = radeon_get_ib_value(p, idx+5);
2672 src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; 2672 src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2673 ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 2673 ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2674 ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2674 ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2675 2675
2676 dst_offset = ib[idx+1]; 2676 dst_offset = radeon_get_ib_value(p, idx+1);
2677 dst_offset <<= 8; 2677 dst_offset <<= 8;
2678 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); 2678 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2679 } 2679 }
2680 p->idx += 7; 2680 p->idx += 7;
2681 } else { 2681 } else {
2682 if (p->family >= CHIP_RV770) { 2682 if (p->family >= CHIP_RV770) {
2683 src_offset = ib[idx+2]; 2683 src_offset = radeon_get_ib_value(p, idx+2);
2684 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; 2684 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2685 dst_offset = ib[idx+1]; 2685 dst_offset = radeon_get_ib_value(p, idx+1);
2686 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; 2686 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2687 2687
2688 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2688 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2689 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 2689 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
@@ -2691,10 +2691,10 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
2691 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; 2691 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2692 p->idx += 5; 2692 p->idx += 5;
2693 } else { 2693 } else {
2694 src_offset = ib[idx+2]; 2694 src_offset = radeon_get_ib_value(p, idx+2);
2695 src_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; 2695 src_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2696 dst_offset = ib[idx+1]; 2696 dst_offset = radeon_get_ib_value(p, idx+1);
2697 dst_offset |= ((u64)(ib[idx+3] & 0xff0000)) << 16; 2697 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff0000)) << 16;
2698 2698
2699 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); 2699 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2700 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); 2700 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
@@ -2724,8 +2724,8 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
2724 DRM_ERROR("bad DMA_PACKET_WRITE\n"); 2724 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2725 return -EINVAL; 2725 return -EINVAL;
2726 } 2726 }
2727 dst_offset = ib[idx+1]; 2727 dst_offset = radeon_get_ib_value(p, idx+1);
2728 dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16; 2728 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
2729 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 2729 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2730 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", 2730 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
2731 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 2731 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 1d8ff2f850ba..93f760e27a92 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -38,6 +38,7 @@
38#include <drm/radeon_drm.h> 38#include <drm/radeon_drm.h>
39#include <linux/seq_file.h> 39#include <linux/seq_file.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/swiotlb.h>
41#include "radeon_reg.h" 42#include "radeon_reg.h"
42#include "radeon.h" 43#include "radeon.h"
43 44
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index b38ef6d8d049..64630f15f181 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -2,7 +2,7 @@ menu "Microsoft Hyper-V guest support"
2 2
3config HYPERV 3config HYPERV
4 tristate "Microsoft Hyper-V client drivers" 4 tristate "Microsoft Hyper-V client drivers"
5 depends on X86 && ACPI && PCI 5 depends on X86 && ACPI && PCI && X86_LOCAL_APIC
6 help 6 help
7 Select this option to run Linux as a Hyper-V client operating 7 Select this option to run Linux as a Hyper-V client operating
8 system. 8 system.
diff --git a/drivers/input/input.c b/drivers/input/input.c
index ce01332f7b3a..c04469928925 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1785,12 +1785,13 @@ static void devm_input_device_release(struct device *dev, void *res)
1785 * its driver (or binding fails). Once managed input device is allocated, 1785 * its driver (or binding fails). Once managed input device is allocated,
1786 * it is ready to be set up and registered in the same fashion as regular 1786 * it is ready to be set up and registered in the same fashion as regular
1787 * input device. There are no special devm_input_device_[un]register() 1787 * input device. There are no special devm_input_device_[un]register()
1788 * variants, regular ones work with both managed and unmanaged devices. 1788 * variants, regular ones work with both managed and unmanaged devices,
1789 * should you need them. In most cases however, managed input device need
1790 * not be explicitly unregistered or freed.
1789 * 1791 *
1790 * NOTE: the owner device is set up as parent of input device and users 1792 * NOTE: the owner device is set up as parent of input device and users
1791 * should not override it. 1793 * should not override it.
1792 */ 1794 */
1793
1794struct input_dev *devm_input_allocate_device(struct device *dev) 1795struct input_dev *devm_input_allocate_device(struct device *dev)
1795{ 1796{
1796 struct input_dev *input; 1797 struct input_dev *input;
@@ -2004,6 +2005,17 @@ static void devm_input_device_unregister(struct device *dev, void *res)
2004 * Once device has been successfully registered it can be unregistered 2005 * Once device has been successfully registered it can be unregistered
2005 * with input_unregister_device(); input_free_device() should not be 2006 * with input_unregister_device(); input_free_device() should not be
2006 * called in this case. 2007 * called in this case.
2008 *
2009 * Note that this function is also used to register managed input devices
2010 * (ones allocated with devm_input_allocate_device()). Such managed input
2011 * devices need not be explicitly unregistered or freed, their tear down
2012 * is controlled by the devres infrastructure. It is also worth noting
2013 * that tear down of managed input devices is internally a 2-step process:
2014 * registered managed input device is first unregistered, but stays in
2015 * memory and can still handle input_event() calls (although events will
2016 * not be delivered anywhere). The freeing of managed input device will
2017 * happen later, when devres stack is unwound to the point where device
2018 * allocation was made.
2007 */ 2019 */
2008int input_register_device(struct input_dev *dev) 2020int input_register_device(struct input_dev *dev)
2009{ 2021{
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index 358cd7ee905b..7cd74e29cbc8 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -162,7 +162,7 @@ static unsigned int get_time_pit(void)
162#define GET_TIME(x) do { x = get_cycles(); } while (0) 162#define GET_TIME(x) do { x = get_cycles(); } while (0)
163#define DELTA(x,y) ((y)-(x)) 163#define DELTA(x,y) ((y)-(x))
164#define TIME_NAME "PCC" 164#define TIME_NAME "PCC"
165#elif defined(CONFIG_MN10300) 165#elif defined(CONFIG_MN10300) || defined(CONFIG_TILE)
166#define GET_TIME(x) do { x = get_cycles(); } while (0) 166#define GET_TIME(x) do { x = get_cycles(); } while (0)
167#define DELTA(x, y) ((x) - (y)) 167#define DELTA(x, y) ((x) - (y))
168#define TIME_NAME "TSC" 168#define TIME_NAME "TSC"
diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c
index 93c812662134..0de23f41b2d3 100644
--- a/drivers/input/keyboard/lm8323.c
+++ b/drivers/input/keyboard/lm8323.c
@@ -398,7 +398,7 @@ static irqreturn_t lm8323_irq(int irq, void *_lm)
398 lm8323_configure(lm); 398 lm8323_configure(lm);
399 } 399 }
400 for (i = 0; i < LM8323_NUM_PWMS; i++) { 400 for (i = 0; i < LM8323_NUM_PWMS; i++) {
401 if (ints & (1 << (INT_PWM1 + i))) { 401 if (ints & (INT_PWM1 << i)) {
402 dev_vdbg(&lm->client->dev, 402 dev_vdbg(&lm->client->dev,
403 "pwm%d engine completed\n", i); 403 "pwm%d engine completed\n", i);
404 pwm_done(&lm->pwm[i]); 404 pwm_done(&lm->pwm[i]);
diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c
index f92d34f45a1c..aaf23aeae2ea 100644
--- a/drivers/input/tablet/wacom_sys.c
+++ b/drivers/input/tablet/wacom_sys.c
@@ -553,10 +553,10 @@ static int wacom_set_device_mode(struct usb_interface *intf, int report_id, int
553 if (!rep_data) 553 if (!rep_data)
554 return error; 554 return error;
555 555
556 rep_data[0] = report_id;
557 rep_data[1] = mode;
558
559 do { 556 do {
557 rep_data[0] = report_id;
558 rep_data[1] = mode;
559
560 error = wacom_set_report(intf, WAC_HID_FEATURE_REPORT, 560 error = wacom_set_report(intf, WAC_HID_FEATURE_REPORT,
561 report_id, rep_data, length, 1); 561 report_id, rep_data, length, 1);
562 if (error >= 0) 562 if (error >= 0)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index c1c74e030a58..d33eaaf783ad 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4017,10 +4017,10 @@ static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count)
4017 4017
4018 index -= count - 1; 4018 index -= count - 1;
4019 4019
4020 cfg->remapped = 1;
4020 irte_info = &cfg->irq_2_iommu; 4021 irte_info = &cfg->irq_2_iommu;
4021 irte_info->sub_handle = devid; 4022 irte_info->sub_handle = devid;
4022 irte_info->irte_index = index; 4023 irte_info->irte_index = index;
4023 irte_info->iommu = (void *)cfg;
4024 4024
4025 goto out; 4025 goto out;
4026 } 4026 }
@@ -4127,9 +4127,9 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
4127 index = attr->ioapic_pin; 4127 index = attr->ioapic_pin;
4128 4128
4129 /* Setup IRQ remapping info */ 4129 /* Setup IRQ remapping info */
4130 cfg->remapped = 1;
4130 irte_info->sub_handle = devid; 4131 irte_info->sub_handle = devid;
4131 irte_info->irte_index = index; 4132 irte_info->irte_index = index;
4132 irte_info->iommu = (void *)cfg;
4133 4133
4134 /* Setup IRTE for IOMMU */ 4134 /* Setup IRTE for IOMMU */
4135 irte.val = 0; 4135 irte.val = 0;
@@ -4288,9 +4288,9 @@ static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
4288 devid = get_device_id(&pdev->dev); 4288 devid = get_device_id(&pdev->dev);
4289 irte_info = &cfg->irq_2_iommu; 4289 irte_info = &cfg->irq_2_iommu;
4290 4290
4291 cfg->remapped = 1;
4291 irte_info->sub_handle = devid; 4292 irte_info->sub_handle = devid;
4292 irte_info->irte_index = index + offset; 4293 irte_info->irte_index = index + offset;
4293 irte_info->iommu = (void *)cfg;
4294 4294
4295 return 0; 4295 return 0;
4296} 4296}
@@ -4314,9 +4314,9 @@ static int setup_hpet_msi(unsigned int irq, unsigned int id)
4314 if (index < 0) 4314 if (index < 0)
4315 return index; 4315 return index;
4316 4316
4317 cfg->remapped = 1;
4317 irte_info->sub_handle = devid; 4318 irte_info->sub_handle = devid;
4318 irte_info->irte_index = index; 4319 irte_info->irte_index = index;
4319 irte_info->iommu = (void *)cfg;
4320 4320
4321 return 0; 4321 return 0;
4322} 4322}
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 86e2f4a62b9a..174bb654453d 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -41,6 +41,8 @@
41#include <asm/irq_remapping.h> 41#include <asm/irq_remapping.h>
42#include <asm/iommu_table.h> 42#include <asm/iommu_table.h>
43 43
44#include "irq_remapping.h"
45
44/* No locks are needed as DMA remapping hardware unit 46/* No locks are needed as DMA remapping hardware unit
45 * list is constructed at boot time and hotplug of 47 * list is constructed at boot time and hotplug of
46 * these units are not supported by the architecture. 48 * these units are not supported by the architecture.
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index eca28014ef3e..43d5c8b8e7ad 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -46,6 +46,8 @@
46#include <asm/cacheflush.h> 46#include <asm/cacheflush.h>
47#include <asm/iommu.h> 47#include <asm/iommu.h>
48 48
49#include "irq_remapping.h"
50
49#define ROOT_SIZE VTD_PAGE_SIZE 51#define ROOT_SIZE VTD_PAGE_SIZE
50#define CONTEXT_SIZE VTD_PAGE_SIZE 52#define CONTEXT_SIZE VTD_PAGE_SIZE
51 53
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index af8904de1d44..f3b8f23b5d8f 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -68,6 +68,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
68{ 68{
69 struct ir_table *table = iommu->ir_table; 69 struct ir_table *table = iommu->ir_table;
70 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); 70 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
71 struct irq_cfg *cfg = irq_get_chip_data(irq);
71 u16 index, start_index; 72 u16 index, start_index;
72 unsigned int mask = 0; 73 unsigned int mask = 0;
73 unsigned long flags; 74 unsigned long flags;
@@ -115,6 +116,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
115 for (i = index; i < index + count; i++) 116 for (i = index; i < index + count; i++)
116 table->base[i].present = 1; 117 table->base[i].present = 1;
117 118
119 cfg->remapped = 1;
118 irq_iommu->iommu = iommu; 120 irq_iommu->iommu = iommu;
119 irq_iommu->irte_index = index; 121 irq_iommu->irte_index = index;
120 irq_iommu->sub_handle = 0; 122 irq_iommu->sub_handle = 0;
@@ -155,6 +157,7 @@ static int map_irq_to_irte_handle(int irq, u16 *sub_handle)
155static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) 157static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
156{ 158{
157 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); 159 struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
160 struct irq_cfg *cfg = irq_get_chip_data(irq);
158 unsigned long flags; 161 unsigned long flags;
159 162
160 if (!irq_iommu) 163 if (!irq_iommu)
@@ -162,6 +165,7 @@ static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subha
162 165
163 raw_spin_lock_irqsave(&irq_2_ir_lock, flags); 166 raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
164 167
168 cfg->remapped = 1;
165 irq_iommu->iommu = iommu; 169 irq_iommu->iommu = iommu;
166 irq_iommu->irte_index = index; 170 irq_iommu->irte_index = index;
167 irq_iommu->sub_handle = subhandle; 171 irq_iommu->sub_handle = subhandle;
@@ -425,11 +429,22 @@ static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode)
425 429
426 /* Enable interrupt-remapping */ 430 /* Enable interrupt-remapping */
427 iommu->gcmd |= DMA_GCMD_IRE; 431 iommu->gcmd |= DMA_GCMD_IRE;
432 iommu->gcmd &= ~DMA_GCMD_CFI; /* Block compatibility-format MSIs */
428 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 433 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
429 434
430 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 435 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
431 readl, (sts & DMA_GSTS_IRES), sts); 436 readl, (sts & DMA_GSTS_IRES), sts);
432 437
438 /*
439 * With CFI clear in the Global Command register, we should be
440 * protected from dangerous (i.e. compatibility) interrupts
441 * regardless of x2apic status. Check just to be sure.
442 */
443 if (sts & DMA_GSTS_CFIS)
444 WARN(1, KERN_WARNING
445 "Compatibility-format IRQs enabled despite intr remapping;\n"
446 "you are vulnerable to IRQ injection.\n");
447
433 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 448 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
434} 449}
435 450
@@ -526,20 +541,24 @@ static int __init intel_irq_remapping_supported(void)
526static int __init intel_enable_irq_remapping(void) 541static int __init intel_enable_irq_remapping(void)
527{ 542{
528 struct dmar_drhd_unit *drhd; 543 struct dmar_drhd_unit *drhd;
544 bool x2apic_present;
529 int setup = 0; 545 int setup = 0;
530 int eim = 0; 546 int eim = 0;
531 547
548 x2apic_present = x2apic_supported();
549
532 if (parse_ioapics_under_ir() != 1) { 550 if (parse_ioapics_under_ir() != 1) {
533 printk(KERN_INFO "Not enable interrupt remapping\n"); 551 printk(KERN_INFO "Not enable interrupt remapping\n");
534 return -1; 552 goto error;
535 } 553 }
536 554
537 if (x2apic_supported()) { 555 if (x2apic_present) {
538 eim = !dmar_x2apic_optout(); 556 eim = !dmar_x2apic_optout();
539 WARN(!eim, KERN_WARNING 557 if (!eim)
540 "Your BIOS is broken and requested that x2apic be disabled\n" 558 printk(KERN_WARNING
541 "This will leave your machine vulnerable to irq-injection attacks\n" 559 "Your BIOS is broken and requested that x2apic be disabled.\n"
542 "Use 'intremap=no_x2apic_optout' to override BIOS request\n"); 560 "This will slightly decrease performance.\n"
561 "Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
543 } 562 }
544 563
545 for_each_drhd_unit(drhd) { 564 for_each_drhd_unit(drhd) {
@@ -578,7 +597,7 @@ static int __init intel_enable_irq_remapping(void)
578 if (eim && !ecap_eim_support(iommu->ecap)) { 597 if (eim && !ecap_eim_support(iommu->ecap)) {
579 printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, " 598 printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
580 " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap); 599 " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
581 return -1; 600 goto error;
582 } 601 }
583 } 602 }
584 603
@@ -594,7 +613,7 @@ static int __init intel_enable_irq_remapping(void)
594 printk(KERN_ERR "DRHD %Lx: failed to enable queued, " 613 printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
595 " invalidation, ecap %Lx, ret %d\n", 614 " invalidation, ecap %Lx, ret %d\n",
596 drhd->reg_base_addr, iommu->ecap, ret); 615 drhd->reg_base_addr, iommu->ecap, ret);
597 return -1; 616 goto error;
598 } 617 }
599 } 618 }
600 619
@@ -617,6 +636,14 @@ static int __init intel_enable_irq_remapping(void)
617 goto error; 636 goto error;
618 637
619 irq_remapping_enabled = 1; 638 irq_remapping_enabled = 1;
639
640 /*
641 * VT-d has a different layout for IO-APIC entries when
642 * interrupt remapping is enabled. So it needs a special routine
643 * to print IO-APIC entries for debugging purposes too.
644 */
645 x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries;
646
620 pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic"); 647 pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");
621 648
622 return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE; 649 return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
@@ -625,6 +652,11 @@ error:
625 /* 652 /*
626 * handle error condition gracefully here! 653 * handle error condition gracefully here!
627 */ 654 */
655
656 if (x2apic_present)
657 WARN(1, KERN_WARNING
658 "Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n");
659
628 return -1; 660 return -1;
629} 661}
630 662
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index faf85d6e33fe..d56f8c17c5fe 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -1,11 +1,18 @@
1#include <linux/seq_file.h>
2#include <linux/cpumask.h>
1#include <linux/kernel.h> 3#include <linux/kernel.h>
2#include <linux/string.h> 4#include <linux/string.h>
3#include <linux/cpumask.h> 5#include <linux/cpumask.h>
4#include <linux/errno.h> 6#include <linux/errno.h>
5#include <linux/msi.h> 7#include <linux/msi.h>
8#include <linux/irq.h>
9#include <linux/pci.h>
6 10
7#include <asm/hw_irq.h> 11#include <asm/hw_irq.h>
8#include <asm/irq_remapping.h> 12#include <asm/irq_remapping.h>
13#include <asm/processor.h>
14#include <asm/x86_init.h>
15#include <asm/apic.h>
9 16
10#include "irq_remapping.h" 17#include "irq_remapping.h"
11 18
@@ -17,6 +24,152 @@ int no_x2apic_optout;
17 24
18static struct irq_remap_ops *remap_ops; 25static struct irq_remap_ops *remap_ops;
19 26
27static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
28static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
29 int index, int sub_handle);
30static int set_remapped_irq_affinity(struct irq_data *data,
31 const struct cpumask *mask,
32 bool force);
33
34static bool irq_remapped(struct irq_cfg *cfg)
35{
36 return (cfg->remapped == 1);
37}
38
39static void irq_remapping_disable_io_apic(void)
40{
41 /*
42 * With interrupt-remapping, for now we will use virtual wire A
43 * mode, as virtual wire B is little complex (need to configure
44 * both IOAPIC RTE as well as interrupt-remapping table entry).
45 * As this gets called during crash dump, keep this simple for
46 * now.
47 */
48 if (cpu_has_apic || apic_from_smp_config())
49 disconnect_bsp_APIC(0);
50}
51
52static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)
53{
54 int node, ret, sub_handle, index = 0;
55 unsigned int irq;
56 struct msi_desc *msidesc;
57
58 nvec = __roundup_pow_of_two(nvec);
59
60 WARN_ON(!list_is_singular(&dev->msi_list));
61 msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
62 WARN_ON(msidesc->irq);
63 WARN_ON(msidesc->msi_attrib.multiple);
64
65 node = dev_to_node(&dev->dev);
66 irq = __create_irqs(get_nr_irqs_gsi(), nvec, node);
67 if (irq == 0)
68 return -ENOSPC;
69
70 msidesc->msi_attrib.multiple = ilog2(nvec);
71 for (sub_handle = 0; sub_handle < nvec; sub_handle++) {
72 if (!sub_handle) {
73 index = msi_alloc_remapped_irq(dev, irq, nvec);
74 if (index < 0) {
75 ret = index;
76 goto error;
77 }
78 } else {
79 ret = msi_setup_remapped_irq(dev, irq + sub_handle,
80 index, sub_handle);
81 if (ret < 0)
82 goto error;
83 }
84 ret = setup_msi_irq(dev, msidesc, irq, sub_handle);
85 if (ret < 0)
86 goto error;
87 }
88 return 0;
89
90error:
91 destroy_irqs(irq, nvec);
92
93 /*
94 * Restore altered MSI descriptor fields and prevent just destroyed
95 * IRQs from tearing down again in default_teardown_msi_irqs()
96 */
97 msidesc->irq = 0;
98 msidesc->msi_attrib.multiple = 0;
99
100 return ret;
101}
102
103static int do_setup_msix_irqs(struct pci_dev *dev, int nvec)
104{
105 int node, ret, sub_handle, index = 0;
106 struct msi_desc *msidesc;
107 unsigned int irq;
108
109 node = dev_to_node(&dev->dev);
110 irq = get_nr_irqs_gsi();
111 sub_handle = 0;
112
113 list_for_each_entry(msidesc, &dev->msi_list, list) {
114
115 irq = create_irq_nr(irq, node);
116 if (irq == 0)
117 return -1;
118
119 if (sub_handle == 0)
120 ret = index = msi_alloc_remapped_irq(dev, irq, nvec);
121 else
122 ret = msi_setup_remapped_irq(dev, irq, index, sub_handle);
123
124 if (ret < 0)
125 goto error;
126
127 ret = setup_msi_irq(dev, msidesc, irq, 0);
128 if (ret < 0)
129 goto error;
130
131 sub_handle += 1;
132 irq += 1;
133 }
134
135 return 0;
136
137error:
138 destroy_irq(irq);
139 return ret;
140}
141
142static int irq_remapping_setup_msi_irqs(struct pci_dev *dev,
143 int nvec, int type)
144{
145 if (type == PCI_CAP_ID_MSI)
146 return do_setup_msi_irqs(dev, nvec);
147 else
148 return do_setup_msix_irqs(dev, nvec);
149}
150
151void eoi_ioapic_pin_remapped(int apic, int pin, int vector)
152{
153 /*
154 * Intr-remapping uses pin number as the virtual vector
155 * in the RTE. Actual vector is programmed in
156 * intr-remapping table entry. Hence for the io-apic
157 * EOI we use the pin number.
158 */
159 io_apic_eoi(apic, pin);
160}
161
162static void __init irq_remapping_modify_x86_ops(void)
163{
164 x86_io_apic_ops.disable = irq_remapping_disable_io_apic;
165 x86_io_apic_ops.set_affinity = set_remapped_irq_affinity;
166 x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry;
167 x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped;
168 x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs;
169 x86_msi.setup_hpet_msi = setup_hpet_msi_remapped;
170 x86_msi.compose_msi_msg = compose_remapped_msi_msg;
171}
172
20static __init int setup_nointremap(char *str) 173static __init int setup_nointremap(char *str)
21{ 174{
22 disable_irq_remap = 1; 175 disable_irq_remap = 1;
@@ -79,15 +232,24 @@ int __init irq_remapping_prepare(void)
79 232
80int __init irq_remapping_enable(void) 233int __init irq_remapping_enable(void)
81{ 234{
235 int ret;
236
82 if (!remap_ops || !remap_ops->enable) 237 if (!remap_ops || !remap_ops->enable)
83 return -ENODEV; 238 return -ENODEV;
84 239
85 return remap_ops->enable(); 240 ret = remap_ops->enable();
241
242 if (irq_remapping_enabled)
243 irq_remapping_modify_x86_ops();
244
245 return ret;
86} 246}
87 247
88void irq_remapping_disable(void) 248void irq_remapping_disable(void)
89{ 249{
90 if (!remap_ops || !remap_ops->disable) 250 if (!irq_remapping_enabled ||
251 !remap_ops ||
252 !remap_ops->disable)
91 return; 253 return;
92 254
93 remap_ops->disable(); 255 remap_ops->disable();
@@ -95,7 +257,9 @@ void irq_remapping_disable(void)
95 257
96int irq_remapping_reenable(int mode) 258int irq_remapping_reenable(int mode)
97{ 259{
98 if (!remap_ops || !remap_ops->reenable) 260 if (!irq_remapping_enabled ||
261 !remap_ops ||
262 !remap_ops->reenable)
99 return 0; 263 return 0;
100 264
101 return remap_ops->reenable(mode); 265 return remap_ops->reenable(mode);
@@ -103,6 +267,9 @@ int irq_remapping_reenable(int mode)
103 267
104int __init irq_remap_enable_fault_handling(void) 268int __init irq_remap_enable_fault_handling(void)
105{ 269{
270 if (!irq_remapping_enabled)
271 return 0;
272
106 if (!remap_ops || !remap_ops->enable_faulting) 273 if (!remap_ops || !remap_ops->enable_faulting)
107 return -ENODEV; 274 return -ENODEV;
108 275
@@ -133,23 +300,28 @@ int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask,
133 300
134void free_remapped_irq(int irq) 301void free_remapped_irq(int irq)
135{ 302{
303 struct irq_cfg *cfg = irq_get_chip_data(irq);
304
136 if (!remap_ops || !remap_ops->free_irq) 305 if (!remap_ops || !remap_ops->free_irq)
137 return; 306 return;
138 307
139 remap_ops->free_irq(irq); 308 if (irq_remapped(cfg))
309 remap_ops->free_irq(irq);
140} 310}
141 311
142void compose_remapped_msi_msg(struct pci_dev *pdev, 312void compose_remapped_msi_msg(struct pci_dev *pdev,
143 unsigned int irq, unsigned int dest, 313 unsigned int irq, unsigned int dest,
144 struct msi_msg *msg, u8 hpet_id) 314 struct msi_msg *msg, u8 hpet_id)
145{ 315{
146 if (!remap_ops || !remap_ops->compose_msi_msg) 316 struct irq_cfg *cfg = irq_get_chip_data(irq);
147 return;
148 317
149 remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); 318 if (!irq_remapped(cfg))
319 native_compose_msi_msg(pdev, irq, dest, msg, hpet_id);
320 else if (remap_ops && remap_ops->compose_msi_msg)
321 remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id);
150} 322}
151 323
152int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) 324static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
153{ 325{
154 if (!remap_ops || !remap_ops->msi_alloc_irq) 326 if (!remap_ops || !remap_ops->msi_alloc_irq)
155 return -ENODEV; 327 return -ENODEV;
@@ -157,8 +329,8 @@ int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
157 return remap_ops->msi_alloc_irq(pdev, irq, nvec); 329 return remap_ops->msi_alloc_irq(pdev, irq, nvec);
158} 330}
159 331
160int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, 332static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
161 int index, int sub_handle) 333 int index, int sub_handle)
162{ 334{
163 if (!remap_ops || !remap_ops->msi_setup_irq) 335 if (!remap_ops || !remap_ops->msi_setup_irq)
164 return -ENODEV; 336 return -ENODEV;
@@ -173,3 +345,42 @@ int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
173 345
174 return remap_ops->setup_hpet_msi(irq, id); 346 return remap_ops->setup_hpet_msi(irq, id);
175} 347}
348
349void panic_if_irq_remap(const char *msg)
350{
351 if (irq_remapping_enabled)
352 panic(msg);
353}
354
355static void ir_ack_apic_edge(struct irq_data *data)
356{
357 ack_APIC_irq();
358}
359
360static void ir_ack_apic_level(struct irq_data *data)
361{
362 ack_APIC_irq();
363 eoi_ioapic_irq(data->irq, data->chip_data);
364}
365
366static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
367{
368 seq_printf(p, " IR-%s", data->chip->name);
369}
370
371void irq_remap_modify_chip_defaults(struct irq_chip *chip)
372{
373 chip->irq_print_chip = ir_print_prefix;
374 chip->irq_ack = ir_ack_apic_edge;
375 chip->irq_eoi = ir_ack_apic_level;
376 chip->irq_set_affinity = x86_io_apic_ops.set_affinity;
377}
378
379bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip)
380{
381 if (!irq_remapped(cfg))
382 return false;
383 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
384 irq_remap_modify_chip_defaults(chip);
385 return true;
386}
diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h
index 95363acb583f..ecb637670405 100644
--- a/drivers/iommu/irq_remapping.h
+++ b/drivers/iommu/irq_remapping.h
@@ -34,6 +34,7 @@ struct msi_msg;
34extern int disable_irq_remap; 34extern int disable_irq_remap;
35extern int disable_sourceid_checking; 35extern int disable_sourceid_checking;
36extern int no_x2apic_optout; 36extern int no_x2apic_optout;
37extern int irq_remapping_enabled;
37 38
38struct irq_remap_ops { 39struct irq_remap_ops {
39 /* Check whether Interrupt Remapping is supported */ 40 /* Check whether Interrupt Remapping is supported */
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index 5f21f629b7ae..deda591f70b9 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -18,6 +18,7 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/mISDNif.h> 19#include <linux/mISDNif.h>
20#include <linux/kthread.h> 20#include <linux/kthread.h>
21#include <linux/sched.h>
21#include "core.h" 22#include "core.h"
22 23
23static u_int *debug; 24static u_int *debug;
@@ -202,6 +203,9 @@ static int
202mISDNStackd(void *data) 203mISDNStackd(void *data)
203{ 204{
204 struct mISDNstack *st = data; 205 struct mISDNstack *st = data;
206#ifdef MISDN_MSG_STATS
207 cputime_t utime, stime;
208#endif
205 int err = 0; 209 int err = 0;
206 210
207 sigfillset(&current->blocked); 211 sigfillset(&current->blocked);
@@ -303,9 +307,10 @@ mISDNStackd(void *data)
303 "msg %d sleep %d stopped\n", 307 "msg %d sleep %d stopped\n",
304 dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt, 308 dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt,
305 st->stopped_cnt); 309 st->stopped_cnt);
310 task_cputime(st->thread, &utime, &stime);
306 printk(KERN_DEBUG 311 printk(KERN_DEBUG
307 "mISDNStackd daemon for %s utime(%ld) stime(%ld)\n", 312 "mISDNStackd daemon for %s utime(%ld) stime(%ld)\n",
308 dev_name(&st->dev->dev), st->thread->utime, st->thread->stime); 313 dev_name(&st->dev->dev), utime, stime);
309 printk(KERN_DEBUG 314 printk(KERN_DEBUG
310 "mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n", 315 "mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n",
311 dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw); 316 dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw);
diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index 49d95040096a..0223ad255cb4 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -1820,7 +1820,7 @@ static int dvb_frontend_ioctl(struct file *file,
1820 struct dvb_frontend *fe = dvbdev->priv; 1820 struct dvb_frontend *fe = dvbdev->priv;
1821 struct dtv_frontend_properties *c = &fe->dtv_property_cache; 1821 struct dtv_frontend_properties *c = &fe->dtv_property_cache;
1822 struct dvb_frontend_private *fepriv = fe->frontend_priv; 1822 struct dvb_frontend_private *fepriv = fe->frontend_priv;
1823 int err = -ENOTTY; 1823 int err = -EOPNOTSUPP;
1824 1824
1825 dev_dbg(fe->dvb->device, "%s: (%d)\n", __func__, _IOC_NR(cmd)); 1825 dev_dbg(fe->dvb->device, "%s: (%d)\n", __func__, _IOC_NR(cmd));
1826 if (fepriv->exit != DVB_FE_NO_EXIT) 1826 if (fepriv->exit != DVB_FE_NO_EXIT)
@@ -1938,7 +1938,7 @@ static int dvb_frontend_ioctl_properties(struct file *file,
1938 } 1938 }
1939 1939
1940 } else 1940 } else
1941 err = -ENOTTY; 1941 err = -EOPNOTSUPP;
1942 1942
1943out: 1943out:
1944 kfree(tvp); 1944 kfree(tvp);
@@ -2071,7 +2071,7 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
2071 struct dvb_frontend *fe = dvbdev->priv; 2071 struct dvb_frontend *fe = dvbdev->priv;
2072 struct dvb_frontend_private *fepriv = fe->frontend_priv; 2072 struct dvb_frontend_private *fepriv = fe->frontend_priv;
2073 struct dtv_frontend_properties *c = &fe->dtv_property_cache; 2073 struct dtv_frontend_properties *c = &fe->dtv_property_cache;
2074 int err = -ENOTTY; 2074 int err = -EOPNOTSUPP;
2075 2075
2076 switch (cmd) { 2076 switch (cmd) {
2077 case FE_GET_INFO: { 2077 case FE_GET_INFO: {
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 56d3f697e0c7..0035c01660b6 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -21,7 +21,7 @@
21 21
22#include "atl1c.h" 22#include "atl1c.h"
23 23
24#define ATL1C_DRV_VERSION "1.0.1.0-NAPI" 24#define ATL1C_DRV_VERSION "1.0.1.1-NAPI"
25char atl1c_driver_name[] = "atl1c"; 25char atl1c_driver_name[] = "atl1c";
26char atl1c_driver_version[] = ATL1C_DRV_VERSION; 26char atl1c_driver_version[] = ATL1C_DRV_VERSION;
27 27
@@ -1652,6 +1652,7 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter)
1652 u16 num_alloc = 0; 1652 u16 num_alloc = 0;
1653 u16 rfd_next_to_use, next_next; 1653 u16 rfd_next_to_use, next_next;
1654 struct atl1c_rx_free_desc *rfd_desc; 1654 struct atl1c_rx_free_desc *rfd_desc;
1655 dma_addr_t mapping;
1655 1656
1656 next_next = rfd_next_to_use = rfd_ring->next_to_use; 1657 next_next = rfd_next_to_use = rfd_ring->next_to_use;
1657 if (++next_next == rfd_ring->count) 1658 if (++next_next == rfd_ring->count)
@@ -1678,9 +1679,18 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter)
1678 ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY); 1679 ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY);
1679 buffer_info->skb = skb; 1680 buffer_info->skb = skb;
1680 buffer_info->length = adapter->rx_buffer_len; 1681 buffer_info->length = adapter->rx_buffer_len;
1681 buffer_info->dma = pci_map_single(pdev, vir_addr, 1682 mapping = pci_map_single(pdev, vir_addr,
1682 buffer_info->length, 1683 buffer_info->length,
1683 PCI_DMA_FROMDEVICE); 1684 PCI_DMA_FROMDEVICE);
1685 if (unlikely(pci_dma_mapping_error(pdev, mapping))) {
1686 dev_kfree_skb(skb);
1687 buffer_info->skb = NULL;
1688 buffer_info->length = 0;
1689 ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_FREE);
1690 netif_warn(adapter, rx_err, adapter->netdev, "RX pci_map_single failed");
1691 break;
1692 }
1693 buffer_info->dma = mapping;
1684 ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE, 1694 ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE,
1685 ATL1C_PCIMAP_FROMDEVICE); 1695 ATL1C_PCIMAP_FROMDEVICE);
1686 rfd_desc->buffer_addr = cpu_to_le64(buffer_info->dma); 1696 rfd_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
@@ -2015,7 +2025,29 @@ check_sum:
2015 return 0; 2025 return 0;
2016} 2026}
2017 2027
2018static void atl1c_tx_map(struct atl1c_adapter *adapter, 2028static void atl1c_tx_rollback(struct atl1c_adapter *adpt,
2029 struct atl1c_tpd_desc *first_tpd,
2030 enum atl1c_trans_queue type)
2031{
2032 struct atl1c_tpd_ring *tpd_ring = &adpt->tpd_ring[type];
2033 struct atl1c_buffer *buffer_info;
2034 struct atl1c_tpd_desc *tpd;
2035 u16 first_index, index;
2036
2037 first_index = first_tpd - (struct atl1c_tpd_desc *)tpd_ring->desc;
2038 index = first_index;
2039 while (index != tpd_ring->next_to_use) {
2040 tpd = ATL1C_TPD_DESC(tpd_ring, index);
2041 buffer_info = &tpd_ring->buffer_info[index];
2042 atl1c_clean_buffer(adpt->pdev, buffer_info, 0);
2043 memset(tpd, 0, sizeof(struct atl1c_tpd_desc));
2044 if (++index == tpd_ring->count)
2045 index = 0;
2046 }
2047 tpd_ring->next_to_use = first_index;
2048}
2049
2050static int atl1c_tx_map(struct atl1c_adapter *adapter,
2019 struct sk_buff *skb, struct atl1c_tpd_desc *tpd, 2051 struct sk_buff *skb, struct atl1c_tpd_desc *tpd,
2020 enum atl1c_trans_queue type) 2052 enum atl1c_trans_queue type)
2021{ 2053{
@@ -2040,7 +2072,10 @@ static void atl1c_tx_map(struct atl1c_adapter *adapter,
2040 buffer_info->length = map_len; 2072 buffer_info->length = map_len;
2041 buffer_info->dma = pci_map_single(adapter->pdev, 2073 buffer_info->dma = pci_map_single(adapter->pdev,
2042 skb->data, hdr_len, PCI_DMA_TODEVICE); 2074 skb->data, hdr_len, PCI_DMA_TODEVICE);
2043 ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY); 2075 if (unlikely(pci_dma_mapping_error(adapter->pdev,
2076 buffer_info->dma)))
2077 goto err_dma;
2078
2044 ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE, 2079 ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE,
2045 ATL1C_PCIMAP_TODEVICE); 2080 ATL1C_PCIMAP_TODEVICE);
2046 mapped_len += map_len; 2081 mapped_len += map_len;
@@ -2062,6 +2097,10 @@ static void atl1c_tx_map(struct atl1c_adapter *adapter,
2062 buffer_info->dma = 2097 buffer_info->dma =
2063 pci_map_single(adapter->pdev, skb->data + mapped_len, 2098 pci_map_single(adapter->pdev, skb->data + mapped_len,
2064 buffer_info->length, PCI_DMA_TODEVICE); 2099 buffer_info->length, PCI_DMA_TODEVICE);
2100 if (unlikely(pci_dma_mapping_error(adapter->pdev,
2101 buffer_info->dma)))
2102 goto err_dma;
2103
2065 ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY); 2104 ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY);
2066 ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE, 2105 ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE,
2067 ATL1C_PCIMAP_TODEVICE); 2106 ATL1C_PCIMAP_TODEVICE);
@@ -2083,6 +2122,9 @@ static void atl1c_tx_map(struct atl1c_adapter *adapter,
2083 frag, 0, 2122 frag, 0,
2084 buffer_info->length, 2123 buffer_info->length,
2085 DMA_TO_DEVICE); 2124 DMA_TO_DEVICE);
2125 if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma))
2126 goto err_dma;
2127
2086 ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY); 2128 ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY);
2087 ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_PAGE, 2129 ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_PAGE,
2088 ATL1C_PCIMAP_TODEVICE); 2130 ATL1C_PCIMAP_TODEVICE);
@@ -2095,6 +2137,13 @@ static void atl1c_tx_map(struct atl1c_adapter *adapter,
2095 /* The last buffer info contain the skb address, 2137 /* The last buffer info contain the skb address,
2096 so it will be free after unmap */ 2138 so it will be free after unmap */
2097 buffer_info->skb = skb; 2139 buffer_info->skb = skb;
2140
2141 return 0;
2142
2143err_dma:
2144 buffer_info->dma = 0;
2145 buffer_info->length = 0;
2146 return -1;
2098} 2147}
2099 2148
2100static void atl1c_tx_queue(struct atl1c_adapter *adapter, struct sk_buff *skb, 2149static void atl1c_tx_queue(struct atl1c_adapter *adapter, struct sk_buff *skb,
@@ -2157,10 +2206,18 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
2157 if (skb_network_offset(skb) != ETH_HLEN) 2206 if (skb_network_offset(skb) != ETH_HLEN)
2158 tpd->word1 |= 1 << TPD_ETH_TYPE_SHIFT; /* Ethernet frame */ 2207 tpd->word1 |= 1 << TPD_ETH_TYPE_SHIFT; /* Ethernet frame */
2159 2208
2160 atl1c_tx_map(adapter, skb, tpd, type); 2209 if (atl1c_tx_map(adapter, skb, tpd, type) < 0) {
2161 atl1c_tx_queue(adapter, skb, tpd, type); 2210 netif_info(adapter, tx_done, adapter->netdev,
2211 "tx-skb droppted due to dma error\n");
2212 /* roll back tpd/buffer */
2213 atl1c_tx_rollback(adapter, tpd, type);
2214 spin_unlock_irqrestore(&adapter->tx_lock, flags);
2215 dev_kfree_skb(skb);
2216 } else {
2217 atl1c_tx_queue(adapter, skb, tpd, type);
2218 spin_unlock_irqrestore(&adapter->tx_lock, flags);
2219 }
2162 2220
2163 spin_unlock_irqrestore(&adapter->tx_lock, flags);
2164 return NETDEV_TX_OK; 2221 return NETDEV_TX_OK;
2165} 2222}
2166 2223
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index f771ddfba646..a5edac8df67b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -504,13 +504,11 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
504 skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp, 504 skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp,
505 tpa_info->parsing_flags, len_on_bd); 505 tpa_info->parsing_flags, len_on_bd);
506 506
507 /* set for GRO */ 507 skb_shinfo(skb)->gso_type =
508 if (fp->mode == TPA_MODE_GRO) 508 (GET_FLAG(tpa_info->parsing_flags,
509 skb_shinfo(skb)->gso_type = 509 PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
510 (GET_FLAG(tpa_info->parsing_flags, 510 PRS_FLAG_OVERETH_IPV6) ?
511 PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) == 511 SKB_GSO_TCPV6 : SKB_GSO_TCPV4;
512 PRS_FLAG_OVERETH_IPV6) ?
513 SKB_GSO_TCPV6 : SKB_GSO_TCPV4;
514 } 512 }
515 513
516 514
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index a9b0830fb39d..b9d4bb9530e5 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -693,6 +693,11 @@ static int macb_poll(struct napi_struct *napi, int budget)
693 * get notified when new packets arrive. 693 * get notified when new packets arrive.
694 */ 694 */
695 macb_writel(bp, IER, MACB_RX_INT_FLAGS); 695 macb_writel(bp, IER, MACB_RX_INT_FLAGS);
696
697 /* Packets received while interrupts were disabled */
698 status = macb_readl(bp, RSR);
699 if (unlikely(status))
700 napi_reschedule(napi);
696 } 701 }
697 702
698 /* TODO: Handle errors */ 703 /* TODO: Handle errors */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 20a5af6d87d0..b3e3294cfe53 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1401,6 +1401,7 @@ static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring,
1401 /* set gso_size to avoid messing up TCP MSS */ 1401 /* set gso_size to avoid messing up TCP MSS */
1402 skb_shinfo(skb)->gso_size = DIV_ROUND_UP((skb->len - hdr_len), 1402 skb_shinfo(skb)->gso_size = DIV_ROUND_UP((skb->len - hdr_len),
1403 IXGBE_CB(skb)->append_cnt); 1403 IXGBE_CB(skb)->append_cnt);
1404 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1404} 1405}
1405 1406
1406static void ixgbe_update_rsc_stats(struct ixgbe_ring *rx_ring, 1407static void ixgbe_update_rsc_stats(struct ixgbe_ring *rx_ring,
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 6f82812d0fab..09aa310b6194 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -986,8 +986,13 @@ qlcnic_process_lro(struct qlcnic_adapter *adapter,
986 th->seq = htonl(seq_number); 986 th->seq = htonl(seq_number);
987 length = skb->len; 987 length = skb->len;
988 988
989 if (adapter->flags & QLCNIC_FW_LRO_MSS_CAP) 989 if (adapter->flags & QLCNIC_FW_LRO_MSS_CAP) {
990 skb_shinfo(skb)->gso_size = qlcnic_get_lro_sts_mss(sts_data1); 990 skb_shinfo(skb)->gso_size = qlcnic_get_lro_sts_mss(sts_data1);
991 if (skb->protocol == htons(ETH_P_IPV6))
992 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
993 else
994 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
995 }
991 996
992 if (vid != 0xffff) 997 if (vid != 0xffff)
993 __vlan_hwaccel_put_tag(skb, vid); 998 __vlan_hwaccel_put_tag(skb, vid);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 11702324a071..998974f78742 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -450,7 +450,6 @@ enum rtl8168_registers {
450#define PWM_EN (1 << 22) 450#define PWM_EN (1 << 22)
451#define RXDV_GATED_EN (1 << 19) 451#define RXDV_GATED_EN (1 << 19)
452#define EARLY_TALLY_EN (1 << 16) 452#define EARLY_TALLY_EN (1 << 16)
453#define FORCE_CLK (1 << 15) /* force clock request */
454}; 453};
455 454
456enum rtl_register_content { 455enum rtl_register_content {
@@ -514,7 +513,6 @@ enum rtl_register_content {
514 PMEnable = (1 << 0), /* Power Management Enable */ 513 PMEnable = (1 << 0), /* Power Management Enable */
515 514
516 /* Config2 register p. 25 */ 515 /* Config2 register p. 25 */
517 ClkReqEn = (1 << 7), /* Clock Request Enable */
518 MSIEnable = (1 << 5), /* 8169 only. Reserved in the 8168. */ 516 MSIEnable = (1 << 5), /* 8169 only. Reserved in the 8168. */
519 PCI_Clock_66MHz = 0x01, 517 PCI_Clock_66MHz = 0x01,
520 PCI_Clock_33MHz = 0x00, 518 PCI_Clock_33MHz = 0x00,
@@ -535,7 +533,6 @@ enum rtl_register_content {
535 Spi_en = (1 << 3), 533 Spi_en = (1 << 3),
536 LanWake = (1 << 1), /* LanWake enable/disable */ 534 LanWake = (1 << 1), /* LanWake enable/disable */
537 PMEStatus = (1 << 0), /* PME status can be reset by PCI RST# */ 535 PMEStatus = (1 << 0), /* PME status can be reset by PCI RST# */
538 ASPM_en = (1 << 0), /* ASPM enable */
539 536
540 /* TBICSR p.28 */ 537 /* TBICSR p.28 */
541 TBIReset = 0x80000000, 538 TBIReset = 0x80000000,
@@ -684,7 +681,6 @@ enum features {
684 RTL_FEATURE_WOL = (1 << 0), 681 RTL_FEATURE_WOL = (1 << 0),
685 RTL_FEATURE_MSI = (1 << 1), 682 RTL_FEATURE_MSI = (1 << 1),
686 RTL_FEATURE_GMII = (1 << 2), 683 RTL_FEATURE_GMII = (1 << 2),
687 RTL_FEATURE_FW_LOADED = (1 << 3),
688}; 684};
689 685
690struct rtl8169_counters { 686struct rtl8169_counters {
@@ -2389,10 +2385,8 @@ static void rtl_apply_firmware(struct rtl8169_private *tp)
2389 struct rtl_fw *rtl_fw = tp->rtl_fw; 2385 struct rtl_fw *rtl_fw = tp->rtl_fw;
2390 2386
2391 /* TODO: release firmware once rtl_phy_write_fw signals failures. */ 2387 /* TODO: release firmware once rtl_phy_write_fw signals failures. */
2392 if (!IS_ERR_OR_NULL(rtl_fw)) { 2388 if (!IS_ERR_OR_NULL(rtl_fw))
2393 rtl_phy_write_fw(tp, rtl_fw); 2389 rtl_phy_write_fw(tp, rtl_fw);
2394 tp->features |= RTL_FEATURE_FW_LOADED;
2395 }
2396} 2390}
2397 2391
2398static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val) 2392static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
@@ -2403,31 +2397,6 @@ static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
2403 rtl_apply_firmware(tp); 2397 rtl_apply_firmware(tp);
2404} 2398}
2405 2399
2406static void r810x_aldps_disable(struct rtl8169_private *tp)
2407{
2408 rtl_writephy(tp, 0x1f, 0x0000);
2409 rtl_writephy(tp, 0x18, 0x0310);
2410 msleep(100);
2411}
2412
2413static void r810x_aldps_enable(struct rtl8169_private *tp)
2414{
2415 if (!(tp->features & RTL_FEATURE_FW_LOADED))
2416 return;
2417
2418 rtl_writephy(tp, 0x1f, 0x0000);
2419 rtl_writephy(tp, 0x18, 0x8310);
2420}
2421
2422static void r8168_aldps_enable_1(struct rtl8169_private *tp)
2423{
2424 if (!(tp->features & RTL_FEATURE_FW_LOADED))
2425 return;
2426
2427 rtl_writephy(tp, 0x1f, 0x0000);
2428 rtl_w1w0_phy(tp, 0x15, 0x1000, 0x0000);
2429}
2430
2431static void rtl8169s_hw_phy_config(struct rtl8169_private *tp) 2400static void rtl8169s_hw_phy_config(struct rtl8169_private *tp)
2432{ 2401{
2433 static const struct phy_reg phy_reg_init[] = { 2402 static const struct phy_reg phy_reg_init[] = {
@@ -3218,8 +3187,6 @@ static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp)
3218 rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0400); 3187 rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0400);
3219 rtl_writephy(tp, 0x1f, 0x0000); 3188 rtl_writephy(tp, 0x1f, 0x0000);
3220 3189
3221 r8168_aldps_enable_1(tp);
3222
3223 /* Broken BIOS workaround: feed GigaMAC registers with MAC address. */ 3190 /* Broken BIOS workaround: feed GigaMAC registers with MAC address. */
3224 rtl_rar_exgmac_set(tp, tp->dev->dev_addr); 3191 rtl_rar_exgmac_set(tp, tp->dev->dev_addr);
3225} 3192}
@@ -3294,8 +3261,6 @@ static void rtl8168f_1_hw_phy_config(struct rtl8169_private *tp)
3294 rtl_writephy(tp, 0x05, 0x8b85); 3261 rtl_writephy(tp, 0x05, 0x8b85);
3295 rtl_w1w0_phy(tp, 0x06, 0x4000, 0x0000); 3262 rtl_w1w0_phy(tp, 0x06, 0x4000, 0x0000);
3296 rtl_writephy(tp, 0x1f, 0x0000); 3263 rtl_writephy(tp, 0x1f, 0x0000);
3297
3298 r8168_aldps_enable_1(tp);
3299} 3264}
3300 3265
3301static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp) 3266static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp)
@@ -3303,8 +3268,6 @@ static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp)
3303 rtl_apply_firmware(tp); 3268 rtl_apply_firmware(tp);
3304 3269
3305 rtl8168f_hw_phy_config(tp); 3270 rtl8168f_hw_phy_config(tp);
3306
3307 r8168_aldps_enable_1(tp);
3308} 3271}
3309 3272
3310static void rtl8411_hw_phy_config(struct rtl8169_private *tp) 3273static void rtl8411_hw_phy_config(struct rtl8169_private *tp)
@@ -3402,8 +3365,6 @@ static void rtl8411_hw_phy_config(struct rtl8169_private *tp)
3402 rtl_w1w0_phy(tp, 0x19, 0x0000, 0x0001); 3365 rtl_w1w0_phy(tp, 0x19, 0x0000, 0x0001);
3403 rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0400); 3366 rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0400);
3404 rtl_writephy(tp, 0x1f, 0x0000); 3367 rtl_writephy(tp, 0x1f, 0x0000);
3405
3406 r8168_aldps_enable_1(tp);
3407} 3368}
3408 3369
3409static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) 3370static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp)
@@ -3489,19 +3450,21 @@ static void rtl8105e_hw_phy_config(struct rtl8169_private *tp)
3489 }; 3450 };
3490 3451
3491 /* Disable ALDPS before ram code */ 3452 /* Disable ALDPS before ram code */
3492 r810x_aldps_disable(tp); 3453 rtl_writephy(tp, 0x1f, 0x0000);
3454 rtl_writephy(tp, 0x18, 0x0310);
3455 msleep(100);
3493 3456
3494 rtl_apply_firmware(tp); 3457 rtl_apply_firmware(tp);
3495 3458
3496 rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init)); 3459 rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
3497
3498 r810x_aldps_enable(tp);
3499} 3460}
3500 3461
3501static void rtl8402_hw_phy_config(struct rtl8169_private *tp) 3462static void rtl8402_hw_phy_config(struct rtl8169_private *tp)
3502{ 3463{
3503 /* Disable ALDPS before setting firmware */ 3464 /* Disable ALDPS before setting firmware */
3504 r810x_aldps_disable(tp); 3465 rtl_writephy(tp, 0x1f, 0x0000);
3466 rtl_writephy(tp, 0x18, 0x0310);
3467 msleep(20);
3505 3468
3506 rtl_apply_firmware(tp); 3469 rtl_apply_firmware(tp);
3507 3470
@@ -3511,8 +3474,6 @@ static void rtl8402_hw_phy_config(struct rtl8169_private *tp)
3511 rtl_writephy(tp, 0x10, 0x401f); 3474 rtl_writephy(tp, 0x10, 0x401f);
3512 rtl_writephy(tp, 0x19, 0x7030); 3475 rtl_writephy(tp, 0x19, 0x7030);
3513 rtl_writephy(tp, 0x1f, 0x0000); 3476 rtl_writephy(tp, 0x1f, 0x0000);
3514
3515 r810x_aldps_enable(tp);
3516} 3477}
3517 3478
3518static void rtl8106e_hw_phy_config(struct rtl8169_private *tp) 3479static void rtl8106e_hw_phy_config(struct rtl8169_private *tp)
@@ -3525,7 +3486,9 @@ static void rtl8106e_hw_phy_config(struct rtl8169_private *tp)
3525 }; 3486 };
3526 3487
3527 /* Disable ALDPS before ram code */ 3488 /* Disable ALDPS before ram code */
3528 r810x_aldps_disable(tp); 3489 rtl_writephy(tp, 0x1f, 0x0000);
3490 rtl_writephy(tp, 0x18, 0x0310);
3491 msleep(100);
3529 3492
3530 rtl_apply_firmware(tp); 3493 rtl_apply_firmware(tp);
3531 3494
@@ -3533,8 +3496,6 @@ static void rtl8106e_hw_phy_config(struct rtl8169_private *tp)
3533 rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init)); 3496 rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
3534 3497
3535 rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); 3498 rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
3536
3537 r810x_aldps_enable(tp);
3538} 3499}
3539 3500
3540static void rtl_hw_phy_config(struct net_device *dev) 3501static void rtl_hw_phy_config(struct net_device *dev)
@@ -5051,6 +5012,8 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
5051 5012
5052 RTL_W8(MaxTxPacketSize, EarlySize); 5013 RTL_W8(MaxTxPacketSize, EarlySize);
5053 5014
5015 rtl_disable_clock_request(pdev);
5016
5054 RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO); 5017 RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
5055 RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB); 5018 RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
5056 5019
@@ -5059,8 +5022,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
5059 5022
5060 RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN); 5023 RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
5061 RTL_W32(MISC, RTL_R32(MISC) | PWM_EN); 5024 RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
5062 RTL_W8(Config5, (RTL_R8(Config5) & ~Spi_en) | ASPM_en); 5025 RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
5063 RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn);
5064} 5026}
5065 5027
5066static void rtl_hw_start_8168f(struct rtl8169_private *tp) 5028static void rtl_hw_start_8168f(struct rtl8169_private *tp)
@@ -5085,12 +5047,13 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
5085 5047
5086 RTL_W8(MaxTxPacketSize, EarlySize); 5048 RTL_W8(MaxTxPacketSize, EarlySize);
5087 5049
5050 rtl_disable_clock_request(pdev);
5051
5088 RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO); 5052 RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
5089 RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB); 5053 RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
5090 RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN); 5054 RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
5091 RTL_W32(MISC, RTL_R32(MISC) | PWM_EN | FORCE_CLK); 5055 RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
5092 RTL_W8(Config5, (RTL_R8(Config5) & ~Spi_en) | ASPM_en); 5056 RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
5093 RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn);
5094} 5057}
5095 5058
5096static void rtl_hw_start_8168f_1(struct rtl8169_private *tp) 5059static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
@@ -5147,10 +5110,8 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
5147 rtl_w1w0_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC); 5110 rtl_w1w0_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
5148 5111
5149 RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); 5112 RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
5150 RTL_W32(MISC, (RTL_R32(MISC) | FORCE_CLK) & ~RXDV_GATED_EN); 5113 RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
5151 RTL_W8(MaxTxPacketSize, EarlySize); 5114 RTL_W8(MaxTxPacketSize, EarlySize);
5152 RTL_W8(Config5, RTL_R8(Config5) | ASPM_en);
5153 RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn);
5154 5115
5155 rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); 5116 rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
5156 rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); 5117 rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -5366,9 +5327,6 @@ static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
5366 5327
5367 RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET); 5328 RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
5368 RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN); 5329 RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
5369 RTL_W8(Config5, RTL_R8(Config5) | ASPM_en);
5370 RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn);
5371 RTL_W32(MISC, RTL_R32(MISC) | FORCE_CLK);
5372 5330
5373 rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1)); 5331 rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1));
5374} 5332}
@@ -5394,9 +5352,6 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
5394 5352
5395 RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO); 5353 RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
5396 RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB); 5354 RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
5397 RTL_W8(Config5, RTL_R8(Config5) | ASPM_en);
5398 RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn);
5399 RTL_W32(MISC, RTL_R32(MISC) | FORCE_CLK);
5400 5355
5401 rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402)); 5356 rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
5402 5357
@@ -5418,10 +5373,7 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
5418 /* Force LAN exit from ASPM if Rx/Tx are not idle */ 5373 /* Force LAN exit from ASPM if Rx/Tx are not idle */
5419 RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800); 5374 RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
5420 5375
5421 RTL_W32(MISC, 5376 RTL_W32(MISC, (RTL_R32(MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
5422 (RTL_R32(MISC) | DISABLE_LAN_EN | FORCE_CLK) & ~EARLY_TALLY_EN);
5423 RTL_W8(Config5, RTL_R8(Config5) | ASPM_en);
5424 RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn);
5425 RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET); 5377 RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
5426 RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN); 5378 RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
5427} 5379}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f07c0612abf6..b75f4b286895 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -69,7 +69,7 @@
69 69
70#undef STMMAC_XMIT_DEBUG 70#undef STMMAC_XMIT_DEBUG
71/*#define STMMAC_XMIT_DEBUG*/ 71/*#define STMMAC_XMIT_DEBUG*/
72#ifdef STMMAC_TX_DEBUG 72#ifdef STMMAC_XMIT_DEBUG
73#define TX_DBG(fmt, args...) printk(fmt, ## args) 73#define TX_DBG(fmt, args...) printk(fmt, ## args)
74#else 74#else
75#define TX_DBG(fmt, args...) do { } while (0) 75#define TX_DBG(fmt, args...) do { } while (0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 0376a5e6b2bf..0b9829fe3eea 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -188,8 +188,6 @@ int stmmac_mdio_register(struct net_device *ndev)
188 goto bus_register_fail; 188 goto bus_register_fail;
189 } 189 }
190 190
191 priv->mii = new_bus;
192
193 found = 0; 191 found = 0;
194 for (addr = 0; addr < PHY_MAX_ADDR; addr++) { 192 for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
195 struct phy_device *phydev = new_bus->phy_map[addr]; 193 struct phy_device *phydev = new_bus->phy_map[addr];
@@ -237,8 +235,14 @@ int stmmac_mdio_register(struct net_device *ndev)
237 } 235 }
238 } 236 }
239 237
240 if (!found) 238 if (!found) {
241 pr_warning("%s: No PHY found\n", ndev->name); 239 pr_warning("%s: No PHY found\n", ndev->name);
240 mdiobus_unregister(new_bus);
241 mdiobus_free(new_bus);
242 return -ENODEV;
243 }
244
245 priv->mii = new_bus;
242 246
243 return 0; 247 return 0;
244 248
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index c8e05e27f38c..19d903598b0d 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -411,6 +411,7 @@ static const struct usb_device_id products[] = {
411 }, 411 },
412 412
413 /* 3. Combined interface devices matching on interface number */ 413 /* 3. Combined interface devices matching on interface number */
414 {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */
414 {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ 415 {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */
415 {QMI_FIXED_INTF(0x19d2, 0x0002, 1)}, 416 {QMI_FIXED_INTF(0x19d2, 0x0002, 1)},
416 {QMI_FIXED_INTF(0x19d2, 0x0012, 1)}, 417 {QMI_FIXED_INTF(0x19d2, 0x0012, 1)},
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index 83564d36e801..a00a03ea4ec9 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -318,20 +318,20 @@ struct mwl8k_sta {
318#define MWL8K_STA(_sta) ((struct mwl8k_sta *)&((_sta)->drv_priv)) 318#define MWL8K_STA(_sta) ((struct mwl8k_sta *)&((_sta)->drv_priv))
319 319
320static const struct ieee80211_channel mwl8k_channels_24[] = { 320static const struct ieee80211_channel mwl8k_channels_24[] = {
321 { .center_freq = 2412, .hw_value = 1, }, 321 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2412, .hw_value = 1, },
322 { .center_freq = 2417, .hw_value = 2, }, 322 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2417, .hw_value = 2, },
323 { .center_freq = 2422, .hw_value = 3, }, 323 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2422, .hw_value = 3, },
324 { .center_freq = 2427, .hw_value = 4, }, 324 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2427, .hw_value = 4, },
325 { .center_freq = 2432, .hw_value = 5, }, 325 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2432, .hw_value = 5, },
326 { .center_freq = 2437, .hw_value = 6, }, 326 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2437, .hw_value = 6, },
327 { .center_freq = 2442, .hw_value = 7, }, 327 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2442, .hw_value = 7, },
328 { .center_freq = 2447, .hw_value = 8, }, 328 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2447, .hw_value = 8, },
329 { .center_freq = 2452, .hw_value = 9, }, 329 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2452, .hw_value = 9, },
330 { .center_freq = 2457, .hw_value = 10, }, 330 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2457, .hw_value = 10, },
331 { .center_freq = 2462, .hw_value = 11, }, 331 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2462, .hw_value = 11, },
332 { .center_freq = 2467, .hw_value = 12, }, 332 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2467, .hw_value = 12, },
333 { .center_freq = 2472, .hw_value = 13, }, 333 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2472, .hw_value = 13, },
334 { .center_freq = 2484, .hw_value = 14, }, 334 { .band = IEEE80211_BAND_2GHZ, .center_freq = 2484, .hw_value = 14, },
335}; 335};
336 336
337static const struct ieee80211_rate mwl8k_rates_24[] = { 337static const struct ieee80211_rate mwl8k_rates_24[] = {
@@ -352,10 +352,10 @@ static const struct ieee80211_rate mwl8k_rates_24[] = {
352}; 352};
353 353
354static const struct ieee80211_channel mwl8k_channels_50[] = { 354static const struct ieee80211_channel mwl8k_channels_50[] = {
355 { .center_freq = 5180, .hw_value = 36, }, 355 { .band = IEEE80211_BAND_5GHZ, .center_freq = 5180, .hw_value = 36, },
356 { .center_freq = 5200, .hw_value = 40, }, 356 { .band = IEEE80211_BAND_5GHZ, .center_freq = 5200, .hw_value = 40, },
357 { .center_freq = 5220, .hw_value = 44, }, 357 { .band = IEEE80211_BAND_5GHZ, .center_freq = 5220, .hw_value = 44, },
358 { .center_freq = 5240, .hw_value = 48, }, 358 { .band = IEEE80211_BAND_5GHZ, .center_freq = 5240, .hw_value = 48, },
359}; 359};
360 360
361static const struct ieee80211_rate mwl8k_rates_50[] = { 361static const struct ieee80211_rate mwl8k_rates_50[] = {
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5099636a6e5f..00cc78c7aa04 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -845,6 +845,32 @@ int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
845} 845}
846EXPORT_SYMBOL(pci_enable_msi_block); 846EXPORT_SYMBOL(pci_enable_msi_block);
847 847
848int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
849{
850 int ret, pos, nvec;
851 u16 msgctl;
852
853 pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
854 if (!pos)
855 return -EINVAL;
856
857 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
858 ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
859
860 if (maxvec)
861 *maxvec = ret;
862
863 do {
864 nvec = ret;
865 ret = pci_enable_msi_block(dev, nvec);
866 } while (ret > 0);
867
868 if (ret < 0)
869 return ret;
870 return nvec;
871}
872EXPORT_SYMBOL(pci_enable_msi_block_auto);
873
848void pci_msi_shutdown(struct pci_dev *dev) 874void pci_msi_shutdown(struct pci_dev *dev)
849{ 875{
850 struct msi_desc *desc; 876 struct msi_desc *desc;
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 3ea51736f18d..5ab14251839d 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -23,6 +23,9 @@
23 23
24#include "aerdrv.h" 24#include "aerdrv.h"
25 25
26#define CREATE_TRACE_POINTS
27#include <trace/events/ras.h>
28
26#define AER_AGENT_RECEIVER 0 29#define AER_AGENT_RECEIVER 0
27#define AER_AGENT_REQUESTER 1 30#define AER_AGENT_REQUESTER 1
28#define AER_AGENT_COMPLETER 2 31#define AER_AGENT_COMPLETER 2
@@ -121,12 +124,11 @@ static const char *aer_agent_string[] = {
121 "Transmitter ID" 124 "Transmitter ID"
122}; 125};
123 126
124static void __aer_print_error(const char *prefix, 127static void __aer_print_error(struct pci_dev *dev,
125 struct aer_err_info *info) 128 struct aer_err_info *info)
126{ 129{
127 int i, status; 130 int i, status;
128 const char *errmsg = NULL; 131 const char *errmsg = NULL;
129
130 status = (info->status & ~info->mask); 132 status = (info->status & ~info->mask);
131 133
132 for (i = 0; i < 32; i++) { 134 for (i = 0; i < 32; i++) {
@@ -141,26 +143,22 @@ static void __aer_print_error(const char *prefix,
141 aer_uncorrectable_error_string[i] : NULL; 143 aer_uncorrectable_error_string[i] : NULL;
142 144
143 if (errmsg) 145 if (errmsg)
144 printk("%s"" [%2d] %-22s%s\n", prefix, i, errmsg, 146 dev_err(&dev->dev, " [%2d] %-22s%s\n", i, errmsg,
145 info->first_error == i ? " (First)" : ""); 147 info->first_error == i ? " (First)" : "");
146 else 148 else
147 printk("%s"" [%2d] Unknown Error Bit%s\n", prefix, i, 149 dev_err(&dev->dev, " [%2d] Unknown Error Bit%s\n",
148 info->first_error == i ? " (First)" : ""); 150 i, info->first_error == i ? " (First)" : "");
149 } 151 }
150} 152}
151 153
152void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) 154void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
153{ 155{
154 int id = ((dev->bus->number << 8) | dev->devfn); 156 int id = ((dev->bus->number << 8) | dev->devfn);
155 char prefix[44];
156
157 snprintf(prefix, sizeof(prefix), "%s%s %s: ",
158 (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR,
159 dev_driver_string(&dev->dev), dev_name(&dev->dev));
160 157
161 if (info->status == 0) { 158 if (info->status == 0) {
162 printk("%s""PCIe Bus Error: severity=%s, type=Unaccessible, " 159 dev_err(&dev->dev,
163 "id=%04x(Unregistered Agent ID)\n", prefix, 160 "PCIe Bus Error: severity=%s, type=Unaccessible, "
161 "id=%04x(Unregistered Agent ID)\n",
164 aer_error_severity_string[info->severity], id); 162 aer_error_severity_string[info->severity], id);
165 } else { 163 } else {
166 int layer, agent; 164 int layer, agent;
@@ -168,22 +166,24 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
168 layer = AER_GET_LAYER_ERROR(info->severity, info->status); 166 layer = AER_GET_LAYER_ERROR(info->severity, info->status);
169 agent = AER_GET_AGENT(info->severity, info->status); 167 agent = AER_GET_AGENT(info->severity, info->status);
170 168
171 printk("%s""PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n", 169 dev_err(&dev->dev,
172 prefix, aer_error_severity_string[info->severity], 170 "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
171 aer_error_severity_string[info->severity],
173 aer_error_layer[layer], id, aer_agent_string[agent]); 172 aer_error_layer[layer], id, aer_agent_string[agent]);
174 173
175 printk("%s"" device [%04x:%04x] error status/mask=%08x/%08x\n", 174 dev_err(&dev->dev,
176 prefix, dev->vendor, dev->device, 175 " device [%04x:%04x] error status/mask=%08x/%08x\n",
176 dev->vendor, dev->device,
177 info->status, info->mask); 177 info->status, info->mask);
178 178
179 __aer_print_error(prefix, info); 179 __aer_print_error(dev, info);
180 180
181 if (info->tlp_header_valid) { 181 if (info->tlp_header_valid) {
182 unsigned char *tlp = (unsigned char *) &info->tlp; 182 unsigned char *tlp = (unsigned char *) &info->tlp;
183 printk("%s"" TLP Header:" 183 dev_err(&dev->dev, " TLP Header:"
184 " %02x%02x%02x%02x %02x%02x%02x%02x" 184 " %02x%02x%02x%02x %02x%02x%02x%02x"
185 " %02x%02x%02x%02x %02x%02x%02x%02x\n", 185 " %02x%02x%02x%02x %02x%02x%02x%02x\n",
186 prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, 186 *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
187 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), 187 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
188 *(tlp + 11), *(tlp + 10), *(tlp + 9), 188 *(tlp + 11), *(tlp + 10), *(tlp + 9),
189 *(tlp + 8), *(tlp + 15), *(tlp + 14), 189 *(tlp + 8), *(tlp + 15), *(tlp + 14),
@@ -192,8 +192,11 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
192 } 192 }
193 193
194 if (info->id && info->error_dev_num > 1 && info->id == id) 194 if (info->id && info->error_dev_num > 1 && info->id == id)
195 printk("%s"" Error of this Agent(%04x) is reported first\n", 195 dev_err(&dev->dev,
196 prefix, id); 196 " Error of this Agent(%04x) is reported first\n",
197 id);
198 trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
199 info->severity);
197} 200}
198 201
199void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) 202void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
@@ -217,7 +220,7 @@ int cper_severity_to_aer(int cper_severity)
217} 220}
218EXPORT_SYMBOL_GPL(cper_severity_to_aer); 221EXPORT_SYMBOL_GPL(cper_severity_to_aer);
219 222
220void cper_print_aer(const char *prefix, int cper_severity, 223void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity,
221 struct aer_capability_regs *aer) 224 struct aer_capability_regs *aer)
222{ 225{
223 int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0; 226 int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
@@ -239,25 +242,27 @@ void cper_print_aer(const char *prefix, int cper_severity,
239 } 242 }
240 layer = AER_GET_LAYER_ERROR(aer_severity, status); 243 layer = AER_GET_LAYER_ERROR(aer_severity, status);
241 agent = AER_GET_AGENT(aer_severity, status); 244 agent = AER_GET_AGENT(aer_severity, status);
242 printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n", 245 dev_err(&dev->dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n",
243 prefix, status, mask); 246 status, mask);
244 cper_print_bits(prefix, status, status_strs, status_strs_size); 247 cper_print_bits(prefix, status, status_strs, status_strs_size);
245 printk("%s""aer_layer=%s, aer_agent=%s\n", prefix, 248 dev_err(&dev->dev, "aer_layer=%s, aer_agent=%s\n",
246 aer_error_layer[layer], aer_agent_string[agent]); 249 aer_error_layer[layer], aer_agent_string[agent]);
247 if (aer_severity != AER_CORRECTABLE) 250 if (aer_severity != AER_CORRECTABLE)
248 printk("%s""aer_uncor_severity: 0x%08x\n", 251 dev_err(&dev->dev, "aer_uncor_severity: 0x%08x\n",
249 prefix, aer->uncor_severity); 252 aer->uncor_severity);
250 if (tlp_header_valid) { 253 if (tlp_header_valid) {
251 const unsigned char *tlp; 254 const unsigned char *tlp;
252 tlp = (const unsigned char *)&aer->header_log; 255 tlp = (const unsigned char *)&aer->header_log;
253 printk("%s""aer_tlp_header:" 256 dev_err(&dev->dev, "aer_tlp_header:"
254 " %02x%02x%02x%02x %02x%02x%02x%02x" 257 " %02x%02x%02x%02x %02x%02x%02x%02x"
255 " %02x%02x%02x%02x %02x%02x%02x%02x\n", 258 " %02x%02x%02x%02x %02x%02x%02x%02x\n",
256 prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, 259 *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
257 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), 260 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
258 *(tlp + 11), *(tlp + 10), *(tlp + 9), 261 *(tlp + 11), *(tlp + 10), *(tlp + 9),
259 *(tlp + 8), *(tlp + 15), *(tlp + 14), 262 *(tlp + 8), *(tlp + 15), *(tlp + 14),
260 *(tlp + 13), *(tlp + 12)); 263 *(tlp + 13), *(tlp + 12));
261 } 264 }
265 trace_aer_event(dev_name(&dev->dev), (status & ~mask),
266 aer_severity);
262} 267}
263#endif 268#endif
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 7c0fd9252e6f..84954a726a94 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -19,6 +19,8 @@ static void pci_free_resources(struct pci_dev *dev)
19 19
20static void pci_stop_dev(struct pci_dev *dev) 20static void pci_stop_dev(struct pci_dev *dev)
21{ 21{
22 pci_pme_active(dev, false);
23
22 if (dev->is_added) { 24 if (dev->is_added) {
23 pci_proc_detach_device(dev); 25 pci_proc_detach_device(dev);
24 pci_remove_sysfs_dev_files(dev); 26 pci_remove_sysfs_dev_files(dev);
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 923a9da9c829..5e44eaabf457 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -20,14 +20,24 @@ if RTC_CLASS
20config RTC_HCTOSYS 20config RTC_HCTOSYS
21 bool "Set system time from RTC on startup and resume" 21 bool "Set system time from RTC on startup and resume"
22 default y 22 default y
23 depends on !ALWAYS_USE_PERSISTENT_CLOCK
23 help 24 help
24 If you say yes here, the system time (wall clock) will be set using 25 If you say yes here, the system time (wall clock) will be set using
25 the value read from a specified RTC device. This is useful to avoid 26 the value read from a specified RTC device. This is useful to avoid
26 unnecessary fsck runs at boot time, and to network better. 27 unnecessary fsck runs at boot time, and to network better.
27 28
29config RTC_SYSTOHC
30 bool "Set the RTC time based on NTP synchronization"
31 default y
32 depends on !ALWAYS_USE_PERSISTENT_CLOCK
33 help
34 If you say yes here, the system time (wall clock) will be stored
35 in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11
36 minutes if userspace reports synchronized NTP status.
37
28config RTC_HCTOSYS_DEVICE 38config RTC_HCTOSYS_DEVICE
29 string "RTC used to set the system time" 39 string "RTC used to set the system time"
30 depends on RTC_HCTOSYS = y 40 depends on RTC_HCTOSYS = y || RTC_SYSTOHC = y
31 default "rtc0" 41 default "rtc0"
32 help 42 help
33 The RTC device that will be used to (re)initialize the system 43 The RTC device that will be used to (re)initialize the system
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 4418ef3f9ecc..ec2988b00a44 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -6,6 +6,7 @@ ccflags-$(CONFIG_RTC_DEBUG) := -DDEBUG
6 6
7obj-$(CONFIG_RTC_LIB) += rtc-lib.o 7obj-$(CONFIG_RTC_LIB) += rtc-lib.o
8obj-$(CONFIG_RTC_HCTOSYS) += hctosys.o 8obj-$(CONFIG_RTC_HCTOSYS) += hctosys.o
9obj-$(CONFIG_RTC_SYSTOHC) += systohc.o
9obj-$(CONFIG_RTC_CLASS) += rtc-core.o 10obj-$(CONFIG_RTC_CLASS) += rtc-core.o
10rtc-core-y := class.o interface.o 11rtc-core-y := class.o interface.o
11 12
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 5143629dedbd..26388f182594 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -50,6 +50,10 @@ static int rtc_suspend(struct device *dev, pm_message_t mesg)
50 struct rtc_device *rtc = to_rtc_device(dev); 50 struct rtc_device *rtc = to_rtc_device(dev);
51 struct rtc_time tm; 51 struct rtc_time tm;
52 struct timespec delta, delta_delta; 52 struct timespec delta, delta_delta;
53
54 if (has_persistent_clock())
55 return 0;
56
53 if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0) 57 if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
54 return 0; 58 return 0;
55 59
@@ -88,6 +92,9 @@ static int rtc_resume(struct device *dev)
88 struct timespec new_system, new_rtc; 92 struct timespec new_system, new_rtc;
89 struct timespec sleep_time; 93 struct timespec sleep_time;
90 94
95 if (has_persistent_clock())
96 return 0;
97
91 rtc_hctosys_ret = -ENODEV; 98 rtc_hctosys_ret = -ENODEV;
92 if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0) 99 if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
93 return 0; 100 return 0;
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index 10c1a3454e48..81c5077feff3 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -350,7 +350,9 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id)
350 /* Enable the clockwatch on ST Variants */ 350 /* Enable the clockwatch on ST Variants */
351 if (vendor->clockwatch) 351 if (vendor->clockwatch)
352 data |= RTC_CR_CWEN; 352 data |= RTC_CR_CWEN;
353 writel(data | RTC_CR_EN, ldata->base + RTC_CR); 353 else
354 data |= RTC_CR_EN;
355 writel(data, ldata->base + RTC_CR);
354 356
355 /* 357 /*
356 * On ST PL031 variants, the RTC reset value does not provide correct 358 * On ST PL031 variants, the RTC reset value does not provide correct
diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c
new file mode 100644
index 000000000000..bf3e242ccc5c
--- /dev/null
+++ b/drivers/rtc/systohc.c
@@ -0,0 +1,44 @@
1/*
2 * This program is free software; you can redistribute it and/or modify it
3 * under the terms of the GNU General Public License version 2 as published by
4 * the Free Software Foundation.
5 *
6 */
7#include <linux/rtc.h>
8#include <linux/time.h>
9
10/**
11 * rtc_set_ntp_time - Save NTP synchronized time to the RTC
12 * @now: Current time of day
13 *
14 * Replacement for the NTP platform function update_persistent_clock
15 * that stores time for later retrieval by rtc_hctosys.
16 *
17 * Returns 0 on successful RTC update, -ENODEV if a RTC update is not
18 * possible at all, and various other -errno for specific temporary failure
19 * cases.
20 *
21 * If temporary failure is indicated the caller should try again 'soon'
22 */
23int rtc_set_ntp_time(struct timespec now)
24{
25 struct rtc_device *rtc;
26 struct rtc_time tm;
27 int err = -ENODEV;
28
29 if (now.tv_nsec < (NSEC_PER_SEC >> 1))
30 rtc_time_to_tm(now.tv_sec, &tm);
31 else
32 rtc_time_to_tm(now.tv_sec + 1, &tm);
33
34 rtc = rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE);
35 if (rtc) {
36 /* rtc_hctosys exclusively uses UTC, so we call set_time here,
37 * not set_mmss. */
38 if (rtc->ops && (rtc->ops->set_time || rtc->ops->set_mmss))
39 err = rtc_set_time(rtc, &tm);
40 rtc_class_close(rtc);
41 }
42
43 return err;
44}
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 19ee901577da..3a6083b386a1 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -33,7 +33,7 @@
33#include <linux/of_gpio.h> 33#include <linux/of_gpio.h>
34#include <linux/pm_runtime.h> 34#include <linux/pm_runtime.h>
35#include <linux/export.h> 35#include <linux/export.h>
36#include <linux/sched.h> 36#include <linux/sched/rt.h>
37#include <linux/delay.h> 37#include <linux/delay.h>
38#include <linux/kthread.h> 38#include <linux/kthread.h>
39#include <linux/ioport.h> 39#include <linux/ioport.h>
diff --git a/drivers/staging/csr/bh.c b/drivers/staging/csr/bh.c
index 1a1f5c79822a..7b133597e923 100644
--- a/drivers/staging/csr/bh.c
+++ b/drivers/staging/csr/bh.c
@@ -15,7 +15,7 @@
15 */ 15 */
16#include "csr_wifi_hip_unifi.h" 16#include "csr_wifi_hip_unifi.h"
17#include "unifi_priv.h" 17#include "unifi_priv.h"
18 18#include <linux/sched/rt.h>
19 19
20/* 20/*
21 * --------------------------------------------------------------------------- 21 * ---------------------------------------------------------------------------
diff --git a/drivers/staging/csr/unifi_sme.c b/drivers/staging/csr/unifi_sme.c
index 7c6c4138fc76..49395da34b7f 100644
--- a/drivers/staging/csr/unifi_sme.c
+++ b/drivers/staging/csr/unifi_sme.c
@@ -15,7 +15,7 @@
15#include "unifi_priv.h" 15#include "unifi_priv.h"
16#include "csr_wifi_hip_unifi.h" 16#include "csr_wifi_hip_unifi.h"
17#include "csr_wifi_hip_conversions.h" 17#include "csr_wifi_hip_conversions.h"
18 18#include <linux/sched/rt.h>
19 19
20 20
21 21
diff --git a/drivers/staging/iio/trigger/Kconfig b/drivers/staging/iio/trigger/Kconfig
index 7d3207559265..d44d3ad26fa5 100644
--- a/drivers/staging/iio/trigger/Kconfig
+++ b/drivers/staging/iio/trigger/Kconfig
@@ -21,7 +21,6 @@ config IIO_GPIO_TRIGGER
21config IIO_SYSFS_TRIGGER 21config IIO_SYSFS_TRIGGER
22 tristate "SYSFS trigger" 22 tristate "SYSFS trigger"
23 depends on SYSFS 23 depends on SYSFS
24 depends on HAVE_IRQ_WORK
25 select IRQ_WORK 24 select IRQ_WORK
26 help 25 help
27 Provides support for using SYSFS entry as IIO triggers. 26 Provides support for using SYSFS entry as IIO triggers.
diff --git a/drivers/staging/omapdrm/Kconfig b/drivers/staging/omapdrm/Kconfig
index b724a4131435..09f65dc3d2c8 100644
--- a/drivers/staging/omapdrm/Kconfig
+++ b/drivers/staging/omapdrm/Kconfig
@@ -3,8 +3,8 @@ config DRM_OMAP
3 tristate "OMAP DRM" 3 tristate "OMAP DRM"
4 depends on DRM && !CONFIG_FB_OMAP2 4 depends on DRM && !CONFIG_FB_OMAP2
5 depends on ARCH_OMAP2PLUS || ARCH_MULTIPLATFORM 5 depends on ARCH_OMAP2PLUS || ARCH_MULTIPLATFORM
6 depends on OMAP2_DSS
6 select DRM_KMS_HELPER 7 select DRM_KMS_HELPER
7 select OMAP2_DSS
8 select FB_SYS_FILLRECT 8 select FB_SYS_FILLRECT
9 select FB_SYS_COPYAREA 9 select FB_SYS_COPYAREA
10 select FB_SYS_IMAGEBLIT 10 select FB_SYS_IMAGEBLIT
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index b3c4a250ff86..40e5b3919e27 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -15,6 +15,7 @@
15#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 16
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/sched/rt.h>
18#include <linux/interrupt.h> 19#include <linux/interrupt.h>
19#include <linux/mm.h> 20#include <linux/mm.h>
20#include <linux/fs.h> 21#include <linux/fs.h>
diff --git a/drivers/video/omap2/dss/dss_features.c b/drivers/video/omap2/dss/dss_features.c
index 18688c12e30d..d7d66ef5cb58 100644
--- a/drivers/video/omap2/dss/dss_features.c
+++ b/drivers/video/omap2/dss/dss_features.c
@@ -538,6 +538,7 @@ static const enum dss_feat_id omap3630_dss_feat_list[] = {
538 FEAT_ALPHA_FIXED_ZORDER, 538 FEAT_ALPHA_FIXED_ZORDER,
539 FEAT_FIFO_MERGE, 539 FEAT_FIFO_MERGE,
540 FEAT_OMAP3_DSI_FIFO_BUG, 540 FEAT_OMAP3_DSI_FIFO_BUG,
541 FEAT_DPI_USES_VDDS_DSI,
541}; 542};
542 543
543static const enum dss_feat_id omap4430_es1_0_dss_feat_list[] = { 544static const enum dss_feat_id omap4430_es1_0_dss_feat_list[] = {
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 74d77dfa5f63..22f77c5f6012 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1787,7 +1787,7 @@ void xen_callback_vector(void)
1787 int rc; 1787 int rc;
1788 uint64_t callback_via; 1788 uint64_t callback_via;
1789 if (xen_have_vector_callback) { 1789 if (xen_have_vector_callback) {
1790 callback_via = HVM_CALLBACK_VECTOR(XEN_HVM_EVTCHN_CALLBACK); 1790 callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
1791 rc = xen_set_callback_via(callback_via); 1791 rc = xen_set_callback_via(callback_via);
1792 if (rc) { 1792 if (rc) {
1793 printk(KERN_ERR "Request for Xen HVM callback vector" 1793 printk(KERN_ERR "Request for Xen HVM callback vector"
@@ -1798,8 +1798,9 @@ void xen_callback_vector(void)
1798 printk(KERN_INFO "Xen HVM callback vector for event delivery is " 1798 printk(KERN_INFO "Xen HVM callback vector for event delivery is "
1799 "enabled\n"); 1799 "enabled\n");
1800 /* in the restore case the vector has already been allocated */ 1800 /* in the restore case the vector has already been allocated */
1801 if (!test_bit(XEN_HVM_EVTCHN_CALLBACK, used_vectors)) 1801 if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors))
1802 alloc_intr_gate(XEN_HVM_EVTCHN_CALLBACK, xen_hvm_callback_vector); 1802 alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
1803 xen_hvm_callback_vector);
1803 } 1804 }
1804} 1805}
1805#else 1806#else
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
index 067fcfa1723e..5a27a4599a4a 100644
--- a/drivers/xen/pcpu.c
+++ b/drivers/xen/pcpu.c
@@ -278,8 +278,7 @@ static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu)
278 * Only those at cpu present map has its sys interface. 278 * Only those at cpu present map has its sys interface.
279 */ 279 */
280 if (info->flags & XEN_PCPU_FLAGS_INVALID) { 280 if (info->flags & XEN_PCPU_FLAGS_INVALID) {
281 if (pcpu) 281 unregister_and_remove_pcpu(pcpu);
282 unregister_and_remove_pcpu(pcpu);
283 return 0; 282 return 0;
284 } 283 }
285 284
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0c42cdbabecf..49d0b43458b7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -33,6 +33,7 @@
33#include <linux/elf.h> 33#include <linux/elf.h>
34#include <linux/utsname.h> 34#include <linux/utsname.h>
35#include <linux/coredump.h> 35#include <linux/coredump.h>
36#include <linux/sched.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <asm/param.h> 38#include <asm/param.h>
38#include <asm/page.h> 39#include <asm/page.h>
@@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1320 cputime_to_timeval(cputime.utime, &prstatus->pr_utime); 1321 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1321 cputime_to_timeval(cputime.stime, &prstatus->pr_stime); 1322 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1322 } else { 1323 } else {
1323 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1324 cputime_t utime, stime;
1324 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1325
1326 task_cputime(p, &utime, &stime);
1327 cputime_to_timeval(utime, &prstatus->pr_utime);
1328 cputime_to_timeval(stime, &prstatus->pr_stime);
1325 } 1329 }
1326 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); 1330 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1327 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); 1331 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index dc84732e554f..cb240dd3b402 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1375 cputime_to_timeval(cputime.utime, &prstatus->pr_utime); 1375 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1376 cputime_to_timeval(cputime.stime, &prstatus->pr_stime); 1376 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1377 } else { 1377 } else {
1378 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1378 cputime_t utime, stime;
1379 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1379
1380 task_cputime(p, &utime, &stime);
1381 cputime_to_timeval(utime, &prstatus->pr_utime);
1382 cputime_to_timeval(stime, &prstatus->pr_stime);
1380 } 1383 }
1381 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); 1384 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1382 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); 1385 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6a91e6ffbcbd..f7ed9ee46eb9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
449 do { 449 do {
450 min_flt += t->min_flt; 450 min_flt += t->min_flt;
451 maj_flt += t->maj_flt; 451 maj_flt += t->maj_flt;
452 gtime += t->gtime; 452 gtime += task_gtime(t);
453 t = next_thread(t); 453 t = next_thread(t);
454 } while (t != task); 454 } while (t != task);
455 455
@@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
472 min_flt = task->min_flt; 472 min_flt = task->min_flt;
473 maj_flt = task->maj_flt; 473 maj_flt = task->maj_flt;
474 task_cputime_adjusted(task, &utime, &stime); 474 task_cputime_adjusted(task, &utime, &stime);
475 gtime = task->gtime; 475 gtime = task_gtime(task);
476 } 476 }
477 477
478 /* scale priority and nice values from timeslices to -20..20 */ 478 /* scale priority and nice values from timeslices to -20..20 */
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 7003e5266f25..288f068740f6 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -167,12 +167,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
167static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) 167static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz)
168{ 168{
169 char *hdr; 169 char *hdr;
170 struct timeval timestamp; 170 struct timespec timestamp;
171 size_t len; 171 size_t len;
172 172
173 do_gettimeofday(&timestamp); 173 /* Report zeroed timestamp if called before timekeeping has resumed. */
174 if (__getnstimeofday(&timestamp)) {
175 timestamp.tv_sec = 0;
176 timestamp.tv_nsec = 0;
177 }
174 hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n", 178 hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n",
175 (long)timestamp.tv_sec, (long)timestamp.tv_usec); 179 (long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000));
176 WARN_ON_ONCE(!hdr); 180 WARN_ON_ONCE(!hdr);
177 len = hdr ? strlen(hdr) : 0; 181 len = hdr ? strlen(hdr) : 0;
178 persistent_ram_write(prz, hdr, len); 182 persistent_ram_write(prz, hdr, len);
diff --git a/fs/select.c b/fs/select.c
index 2ef72d965036..8c1c96c27062 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,6 +26,7 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/rcupdate.h> 27#include <linux/rcupdate.h>
28#include <linux/hrtimer.h> 28#include <linux/hrtimer.h>
29#include <linux/sched/rt.h>
29 30
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31 32
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 9a62937c56ca..51969436b8b8 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -4,66 +4,12 @@
4#include <linux/time.h> 4#include <linux/time.h>
5#include <linux/jiffies.h> 5#include <linux/jiffies.h>
6 6
7typedef unsigned long __nocast cputime_t; 7#ifndef CONFIG_VIRT_CPU_ACCOUNTING
8 8# include <asm-generic/cputime_jiffies.h>
9#define cputime_one_jiffy jiffies_to_cputime(1) 9#endif
10#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
11#define cputime_to_scaled(__ct) (__ct)
12#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
13
14typedef u64 __nocast cputime64_t;
15
16#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
17#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
18
19#define nsecs_to_cputime64(__ct) \
20 jiffies64_to_cputime64(nsecs_to_jiffies64(__ct))
21
22
23/*
24 * Convert cputime to microseconds and back.
25 */
26#define cputime_to_usecs(__ct) \
27 jiffies_to_usecs(cputime_to_jiffies(__ct))
28#define usecs_to_cputime(__usec) \
29 jiffies_to_cputime(usecs_to_jiffies(__usec))
30#define usecs_to_cputime64(__usec) \
31 jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
32
33/*
34 * Convert cputime to seconds and back.
35 */
36#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
37#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
38
39/*
40 * Convert cputime to timespec and back.
41 */
42#define timespec_to_cputime(__val) \
43 jiffies_to_cputime(timespec_to_jiffies(__val))
44#define cputime_to_timespec(__ct,__val) \
45 jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
46
47/*
48 * Convert cputime to timeval and back.
49 */
50#define timeval_to_cputime(__val) \
51 jiffies_to_cputime(timeval_to_jiffies(__val))
52#define cputime_to_timeval(__ct,__val) \
53 jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
54
55/*
56 * Convert cputime to clock and back.
57 */
58#define cputime_to_clock_t(__ct) \
59 jiffies_to_clock_t(cputime_to_jiffies(__ct))
60#define clock_t_to_cputime(__x) \
61 jiffies_to_cputime(clock_t_to_jiffies(__x))
62 10
63/* 11#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
64 * Convert cputime64 to clock. 12# include <asm-generic/cputime_nsecs.h>
65 */ 13#endif
66#define cputime64_to_clock_t(__ct) \
67 jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
68 14
69#endif 15#endif
diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h
new file mode 100644
index 000000000000..272ecba9f588
--- /dev/null
+++ b/include/asm-generic/cputime_jiffies.h
@@ -0,0 +1,72 @@
1#ifndef _ASM_GENERIC_CPUTIME_JIFFIES_H
2#define _ASM_GENERIC_CPUTIME_JIFFIES_H
3
4typedef unsigned long __nocast cputime_t;
5
6#define cputime_one_jiffy jiffies_to_cputime(1)
7#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
8#define cputime_to_scaled(__ct) (__ct)
9#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
10
11typedef u64 __nocast cputime64_t;
12
13#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
14#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
15
16
17/*
18 * Convert nanoseconds to cputime
19 */
20#define nsecs_to_cputime64(__nsec) \
21 jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec))
22#define nsecs_to_cputime(__nsec) \
23 jiffies_to_cputime(nsecs_to_jiffies(__nsec))
24
25
26/*
27 * Convert cputime to microseconds and back.
28 */
29#define cputime_to_usecs(__ct) \
30 jiffies_to_usecs(cputime_to_jiffies(__ct))
31#define usecs_to_cputime(__usec) \
32 jiffies_to_cputime(usecs_to_jiffies(__usec))
33#define usecs_to_cputime64(__usec) \
34 jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
35
36/*
37 * Convert cputime to seconds and back.
38 */
39#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
40#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
41
42/*
43 * Convert cputime to timespec and back.
44 */
45#define timespec_to_cputime(__val) \
46 jiffies_to_cputime(timespec_to_jiffies(__val))
47#define cputime_to_timespec(__ct,__val) \
48 jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
49
50/*
51 * Convert cputime to timeval and back.
52 */
53#define timeval_to_cputime(__val) \
54 jiffies_to_cputime(timeval_to_jiffies(__val))
55#define cputime_to_timeval(__ct,__val) \
56 jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
57
58/*
59 * Convert cputime to clock and back.
60 */
61#define cputime_to_clock_t(__ct) \
62 jiffies_to_clock_t(cputime_to_jiffies(__ct))
63#define clock_t_to_cputime(__x) \
64 jiffies_to_cputime(clock_t_to_jiffies(__x))
65
66/*
67 * Convert cputime64 to clock.
68 */
69#define cputime64_to_clock_t(__ct) \
70 jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
71
72#endif
diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h
new file mode 100644
index 000000000000..b6485cafb7bd
--- /dev/null
+++ b/include/asm-generic/cputime_nsecs.h
@@ -0,0 +1,104 @@
1/*
2 * Definitions for measuring cputime in nsecs resolution.
3 *
4 * Based on <arch/ia64/include/asm/cputime.h>
5 *
6 * Copyright (C) 2007 FUJITSU LIMITED
7 * Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 *
14 */
15
16#ifndef _ASM_GENERIC_CPUTIME_NSECS_H
17#define _ASM_GENERIC_CPUTIME_NSECS_H
18
19typedef u64 __nocast cputime_t;
20typedef u64 __nocast cputime64_t;
21
22#define cputime_one_jiffy jiffies_to_cputime(1)
23
24/*
25 * Convert cputime <-> jiffies (HZ)
26 */
27#define cputime_to_jiffies(__ct) \
28 ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
29#define cputime_to_scaled(__ct) (__ct)
30#define jiffies_to_cputime(__jif) \
31 (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
32#define cputime64_to_jiffies64(__ct) \
33 ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
34#define jiffies64_to_cputime64(__jif) \
35 (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
36
37
38/*
39 * Convert cputime <-> nanoseconds
40 */
41#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs))
42
43
44/*
45 * Convert cputime <-> microseconds
46 */
47#define cputime_to_usecs(__ct) \
48 ((__force u64)(__ct) / NSEC_PER_USEC)
49#define usecs_to_cputime(__usecs) \
50 (__force cputime_t)((__usecs) * NSEC_PER_USEC)
51#define usecs_to_cputime64(__usecs) \
52 (__force cputime64_t)((__usecs) * NSEC_PER_USEC)
53
54/*
55 * Convert cputime <-> seconds
56 */
57#define cputime_to_secs(__ct) \
58 ((__force u64)(__ct) / NSEC_PER_SEC)
59#define secs_to_cputime(__secs) \
60 (__force cputime_t)((__secs) * NSEC_PER_SEC)
61
62/*
63 * Convert cputime <-> timespec (nsec)
64 */
65static inline cputime_t timespec_to_cputime(const struct timespec *val)
66{
67 u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
68 return (__force cputime_t) ret;
69}
70static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
71{
72 val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
73 val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
74}
75
76/*
77 * Convert cputime <-> timeval (msec)
78 */
79static inline cputime_t timeval_to_cputime(struct timeval *val)
80{
81 u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
82 return (__force cputime_t) ret;
83}
84static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
85{
86 val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
87 val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
88}
89
90/*
91 * Convert cputime <-> clock (USER_HZ)
92 */
93#define cputime_to_clock_t(__ct) \
94 ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
95#define clock_t_to_cputime(__x) \
96 (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
97
98/*
99 * Convert cputime64 to clock.
100 */
101#define cputime64_to_clock_t(__ct) \
102 cputime_to_clock_t((__force cputime_t)__ct)
103
104#endif
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 544abdb2238c..ec10e1b24c1c 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -49,8 +49,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
49} 49}
50#endif 50#endif
51 51
52extern void cper_print_aer(const char *prefix, int cper_severity, 52extern void cper_print_aer(const char *prefix, struct pci_dev *dev,
53 struct aer_capability_regs *aer); 53 int cper_severity, struct aer_capability_regs *aer);
54extern int cper_severity_to_aer(int cper_severity); 54extern int cper_severity_to_aer(int cper_severity);
55extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, 55extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
56 int severity); 56 int severity);
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 8a7096fcb01e..66346521cb65 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -161,6 +161,15 @@ clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec)
161extern void clockevents_suspend(void); 161extern void clockevents_suspend(void);
162extern void clockevents_resume(void); 162extern void clockevents_resume(void);
163 163
164#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
165#ifdef CONFIG_ARCH_HAS_TICK_BROADCAST
166extern void tick_broadcast(const struct cpumask *mask);
167#else
168#define tick_broadcast NULL
169#endif
170extern int tick_receive_broadcast(void);
171#endif
172
164#ifdef CONFIG_GENERIC_CLOCKEVENTS 173#ifdef CONFIG_GENERIC_CLOCKEVENTS
165extern void clockevents_notify(unsigned long reason, void *arg); 174extern void clockevents_notify(unsigned long reason, void *arg);
166#else 175#else
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index e24339ccb7f0..b28d161c1091 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -3,12 +3,40 @@
3 3
4#ifdef CONFIG_CONTEXT_TRACKING 4#ifdef CONFIG_CONTEXT_TRACKING
5#include <linux/sched.h> 5#include <linux/sched.h>
6#include <linux/percpu.h>
7
8struct context_tracking {
9 /*
10 * When active is false, probes are unset in order
11 * to minimize overhead: TIF flags are cleared
12 * and calls to user_enter/exit are ignored. This
13 * may be further optimized using static keys.
14 */
15 bool active;
16 enum {
17 IN_KERNEL = 0,
18 IN_USER,
19 } state;
20};
21
22DECLARE_PER_CPU(struct context_tracking, context_tracking);
23
24static inline bool context_tracking_in_user(void)
25{
26 return __this_cpu_read(context_tracking.state) == IN_USER;
27}
28
29static inline bool context_tracking_active(void)
30{
31 return __this_cpu_read(context_tracking.active);
32}
6 33
7extern void user_enter(void); 34extern void user_enter(void);
8extern void user_exit(void); 35extern void user_exit(void);
9extern void context_tracking_task_switch(struct task_struct *prev, 36extern void context_tracking_task_switch(struct task_struct *prev,
10 struct task_struct *next); 37 struct task_struct *next);
11#else 38#else
39static inline bool context_tracking_in_user(void) { return false; }
12static inline void user_enter(void) { } 40static inline void user_enter(void) { }
13static inline void user_exit(void) { } 41static inline void user_exit(void) { }
14static inline void context_tracking_task_switch(struct task_struct *prev, 42static inline void context_tracking_task_switch(struct task_struct *prev,
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 92691d85c320..e5ca8ef50e9b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -74,7 +74,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
74 * SAVE_REGS - The ftrace_ops wants regs saved at each function called 74 * SAVE_REGS - The ftrace_ops wants regs saved at each function called
75 * and passed to the callback. If this flag is set, but the 75 * and passed to the callback. If this flag is set, but the
76 * architecture does not support passing regs 76 * architecture does not support passing regs
77 * (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the 77 * (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the
78 * ftrace_ops will fail to register, unless the next flag 78 * ftrace_ops will fail to register, unless the next flag
79 * is set. 79 * is set.
80 * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the 80 * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the
@@ -418,7 +418,7 @@ void ftrace_modify_all_code(int command);
418#endif 418#endif
419 419
420#ifndef FTRACE_REGS_ADDR 420#ifndef FTRACE_REGS_ADDR
421#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS 421#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
422# define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller) 422# define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller)
423#else 423#else
424# define FTRACE_REGS_ADDR FTRACE_ADDR 424# define FTRACE_REGS_ADDR FTRACE_ADDR
@@ -480,7 +480,7 @@ extern int ftrace_make_nop(struct module *mod,
480 */ 480 */
481extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); 481extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
482 482
483#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS 483#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
484/** 484/**
485 * ftrace_modify_call - convert from one addr to another (no nop) 485 * ftrace_modify_call - convert from one addr to another (no nop)
486 * @rec: the mcount call site record 486 * @rec: the mcount call site record
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index a3d489531d83..13a54d0bdfa8 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -49,7 +49,6 @@ struct trace_entry {
49 unsigned char flags; 49 unsigned char flags;
50 unsigned char preempt_count; 50 unsigned char preempt_count;
51 int pid; 51 int pid;
52 int padding;
53}; 52};
54 53
55#define FTRACE_MAX_EVENT \ 54#define FTRACE_MAX_EVENT \
@@ -84,6 +83,9 @@ struct trace_iterator {
84 long idx; 83 long idx;
85 84
86 cpumask_var_t started; 85 cpumask_var_t started;
86
87 /* it's true when current open file is snapshot */
88 bool snapshot;
87}; 89};
88 90
89enum trace_iter_flags { 91enum trace_iter_flags {
@@ -272,7 +274,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type,
272extern int trace_add_event_call(struct ftrace_event_call *call); 274extern int trace_add_event_call(struct ftrace_event_call *call);
273extern void trace_remove_event_call(struct ftrace_event_call *call); 275extern void trace_remove_event_call(struct ftrace_event_call *call);
274 276
275#define is_signed_type(type) (((type)(-1)) < 0) 277#define is_signed_type(type) (((type)(-1)) < (type)0)
276 278
277int trace_set_clr_event(const char *system, const char *event, int set); 279int trace_set_clr_event(const char *system, const char *event, int set);
278 280
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 624ef3f45c8e..29eb805ea4a6 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void);
153 */ 153 */
154#define __irq_enter() \ 154#define __irq_enter() \
155 do { \ 155 do { \
156 vtime_account_irq_enter(current); \ 156 account_irq_enter_time(current); \
157 add_preempt_count(HARDIRQ_OFFSET); \ 157 add_preempt_count(HARDIRQ_OFFSET); \
158 trace_hardirq_enter(); \ 158 trace_hardirq_enter(); \
159 } while (0) 159 } while (0)
@@ -169,7 +169,7 @@ extern void irq_enter(void);
169#define __irq_exit() \ 169#define __irq_exit() \
170 do { \ 170 do { \
171 trace_hardirq_exit(); \ 171 trace_hardirq_exit(); \
172 vtime_account_irq_exit(current); \ 172 account_irq_exit_time(current); \
173 sub_preempt_count(HARDIRQ_OFFSET); \ 173 sub_preempt_count(HARDIRQ_OFFSET); \
174 } while (0) 174 } while (0)
175 175
@@ -180,10 +180,10 @@ extern void irq_exit(void);
180 180
181#define nmi_enter() \ 181#define nmi_enter() \
182 do { \ 182 do { \
183 lockdep_off(); \
183 ftrace_nmi_enter(); \ 184 ftrace_nmi_enter(); \
184 BUG_ON(in_nmi()); \ 185 BUG_ON(in_nmi()); \
185 add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ 186 add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
186 lockdep_off(); \
187 rcu_nmi_enter(); \ 187 rcu_nmi_enter(); \
188 trace_hardirq_enter(); \ 188 trace_hardirq_enter(); \
189 } while (0) 189 } while (0)
@@ -192,10 +192,10 @@ extern void irq_exit(void);
192 do { \ 192 do { \
193 trace_hardirq_exit(); \ 193 trace_hardirq_exit(); \
194 rcu_nmi_exit(); \ 194 rcu_nmi_exit(); \
195 lockdep_on(); \
196 BUG_ON(!in_nmi()); \ 195 BUG_ON(!in_nmi()); \
197 sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ 196 sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
198 ftrace_nmi_exit(); \ 197 ftrace_nmi_exit(); \
198 lockdep_on(); \
199 } while (0) 199 } while (0)
200 200
201#endif /* LINUX_HARDIRQ_H */ 201#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6d087c5f57f7..5cd0f0949927 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -10,7 +10,9 @@
10#include <linux/pid_namespace.h> 10#include <linux/pid_namespace.h>
11#include <linux/user_namespace.h> 11#include <linux/user_namespace.h>
12#include <linux/securebits.h> 12#include <linux/securebits.h>
13#include <linux/seqlock.h>
13#include <net/net_namespace.h> 14#include <net/net_namespace.h>
15#include <linux/sched/rt.h>
14 16
15#ifdef CONFIG_SMP 17#ifdef CONFIG_SMP
16# define INIT_PUSHABLE_TASKS(tsk) \ 18# define INIT_PUSHABLE_TASKS(tsk) \
@@ -141,6 +143,15 @@ extern struct task_group root_task_group;
141# define INIT_PERF_EVENTS(tsk) 143# define INIT_PERF_EVENTS(tsk)
142#endif 144#endif
143 145
146#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
147# define INIT_VTIME(tsk) \
148 .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
149 .vtime_snap = 0, \
150 .vtime_snap_whence = VTIME_SYS,
151#else
152# define INIT_VTIME(tsk)
153#endif
154
144#define INIT_TASK_COMM "swapper" 155#define INIT_TASK_COMM "swapper"
145 156
146/* 157/*
@@ -210,6 +221,7 @@ extern struct task_group root_task_group;
210 INIT_TRACE_RECURSION \ 221 INIT_TRACE_RECURSION \
211 INIT_TASK_RCU_PREEMPT(tsk) \ 222 INIT_TASK_RCU_PREEMPT(tsk) \
212 INIT_CPUSET_SEQ \ 223 INIT_CPUSET_SEQ \
224 INIT_VTIME(tsk) \
213} 225}
214 226
215 227
diff --git a/include/linux/irq.h b/include/linux/irq.h
index fdf2c4a238cc..bc4e06611958 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -509,8 +509,11 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq)
509 509
510/* Handle dynamic irq creation and destruction */ 510/* Handle dynamic irq creation and destruction */
511extern unsigned int create_irq_nr(unsigned int irq_want, int node); 511extern unsigned int create_irq_nr(unsigned int irq_want, int node);
512extern unsigned int __create_irqs(unsigned int from, unsigned int count,
513 int node);
512extern int create_irq(void); 514extern int create_irq(void);
513extern void destroy_irq(unsigned int irq); 515extern void destroy_irq(unsigned int irq);
516extern void destroy_irqs(unsigned int irq, unsigned int count);
514 517
515/* 518/*
516 * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and 519 * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and
@@ -528,6 +531,8 @@ extern int irq_set_handler_data(unsigned int irq, void *data);
528extern int irq_set_chip_data(unsigned int irq, void *data); 531extern int irq_set_chip_data(unsigned int irq, void *data);
529extern int irq_set_irq_type(unsigned int irq, unsigned int type); 532extern int irq_set_irq_type(unsigned int irq, unsigned int type);
530extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry); 533extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
534extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
535 struct msi_desc *entry);
531extern struct irq_data *irq_get_irq_data(unsigned int irq); 536extern struct irq_data *irq_get_irq_data(unsigned int irq);
532 537
533static inline struct irq_chip *irq_get_chip(unsigned int irq) 538static inline struct irq_chip *irq_get_chip(unsigned int irq)
@@ -590,6 +595,9 @@ int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
590#define irq_alloc_desc_from(from, node) \ 595#define irq_alloc_desc_from(from, node) \
591 irq_alloc_descs(-1, from, 1, node) 596 irq_alloc_descs(-1, from, 1, node)
592 597
598#define irq_alloc_descs_from(from, cnt, node) \
599 irq_alloc_descs(-1, from, cnt, node)
600
593void irq_free_descs(unsigned int irq, unsigned int cnt); 601void irq_free_descs(unsigned int irq, unsigned int cnt);
594int irq_reserve_irqs(unsigned int from, unsigned int cnt); 602int irq_reserve_irqs(unsigned int from, unsigned int cnt);
595 603
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 6a9e8f5399e2..f5dbce50466e 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -3,6 +3,20 @@
3 3
4#include <linux/llist.h> 4#include <linux/llist.h>
5 5
6/*
7 * An entry can be in one of four states:
8 *
9 * free NULL, 0 -> {claimed} : free to be used
10 * claimed NULL, 3 -> {pending} : claimed to be enqueued
11 * pending next, 3 -> {busy} : queued, pending callback
12 * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
13 */
14
15#define IRQ_WORK_PENDING 1UL
16#define IRQ_WORK_BUSY 2UL
17#define IRQ_WORK_FLAGS 3UL
18#define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */
19
6struct irq_work { 20struct irq_work {
7 unsigned long flags; 21 unsigned long flags;
8 struct llist_node llnode; 22 struct llist_node llnode;
@@ -16,8 +30,14 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
16 work->func = func; 30 work->func = func;
17} 31}
18 32
19bool irq_work_queue(struct irq_work *work); 33void irq_work_queue(struct irq_work *work);
20void irq_work_run(void); 34void irq_work_run(void);
21void irq_work_sync(struct irq_work *work); 35void irq_work_sync(struct irq_work *work);
22 36
37#ifdef CONFIG_IRQ_WORK
38bool irq_work_needs_cpu(void);
39#else
40static bool irq_work_needs_cpu(void) { return false; }
41#endif
42
23#endif /* _LINUX_IRQ_WORK_H */ 43#endif /* _LINUX_IRQ_WORK_H */
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 66b70780e910..ed5f6ed6eb77 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
127extern void account_steal_time(cputime_t); 127extern void account_steal_time(cputime_t);
128extern void account_idle_time(cputime_t); 128extern void account_idle_time(cputime_t);
129 129
130#ifdef CONFIG_VIRT_CPU_ACCOUNTING 130#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
131static inline void account_process_tick(struct task_struct *tsk, int user) 131static inline void account_process_tick(struct task_struct *tsk, int user)
132{ 132{
133 vtime_account_user(tsk); 133 vtime_account_user(tsk);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 23755ba42abc..4b6ef4d33cc2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -49,16 +49,6 @@
49#define KPROBE_REENTER 0x00000004 49#define KPROBE_REENTER 0x00000004
50#define KPROBE_HIT_SSDONE 0x00000008 50#define KPROBE_HIT_SSDONE 0x00000008
51 51
52/*
53 * If function tracer is enabled and the arch supports full
54 * passing of pt_regs to function tracing, then kprobes can
55 * optimize on top of function tracing.
56 */
57#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \
58 && defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE)
59# define KPROBES_CAN_USE_FTRACE
60#endif
61
62/* Attach to insert probes on any functions which should be ignored*/ 52/* Attach to insert probes on any functions which should be ignored*/
63#define __kprobes __attribute__((__section__(".kprobes.text"))) 53#define __kprobes __attribute__((__section__(".kprobes.text")))
64 54
@@ -316,7 +306,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
316#endif 306#endif
317 307
318#endif /* CONFIG_OPTPROBES */ 308#endif /* CONFIG_OPTPROBES */
319#ifdef KPROBES_CAN_USE_FTRACE 309#ifdef CONFIG_KPROBES_ON_FTRACE
320extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, 310extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
321 struct ftrace_ops *ops, struct pt_regs *regs); 311 struct ftrace_ops *ops, struct pt_regs *regs);
322extern int arch_prepare_kprobe_ftrace(struct kprobe *p); 312extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c497ab0d03d..b7996a768eb2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -22,6 +22,7 @@
22#include <linux/rcupdate.h> 22#include <linux/rcupdate.h>
23#include <linux/ratelimit.h> 23#include <linux/ratelimit.h>
24#include <linux/err.h> 24#include <linux/err.h>
25#include <linux/irqflags.h>
25#include <asm/signal.h> 26#include <asm/signal.h>
26 27
27#include <linux/kvm.h> 28#include <linux/kvm.h>
@@ -740,15 +741,52 @@ static inline int kvm_deassign_device(struct kvm *kvm,
740} 741}
741#endif /* CONFIG_IOMMU_API */ 742#endif /* CONFIG_IOMMU_API */
742 743
743static inline void kvm_guest_enter(void) 744static inline void __guest_enter(void)
744{ 745{
745 BUG_ON(preemptible());
746 /* 746 /*
747 * This is running in ioctl context so we can avoid 747 * This is running in ioctl context so we can avoid
748 * the call to vtime_account() with its unnecessary idle check. 748 * the call to vtime_account() with its unnecessary idle check.
749 */ 749 */
750 vtime_account_system_irqsafe(current); 750 vtime_account_system(current);
751 current->flags |= PF_VCPU; 751 current->flags |= PF_VCPU;
752}
753
754static inline void __guest_exit(void)
755{
756 /*
757 * This is running in ioctl context so we can avoid
758 * the call to vtime_account() with its unnecessary idle check.
759 */
760 vtime_account_system(current);
761 current->flags &= ~PF_VCPU;
762}
763
764#ifdef CONFIG_CONTEXT_TRACKING
765extern void guest_enter(void);
766extern void guest_exit(void);
767
768#else /* !CONFIG_CONTEXT_TRACKING */
769static inline void guest_enter(void)
770{
771 __guest_enter();
772}
773
774static inline void guest_exit(void)
775{
776 __guest_exit();
777}
778#endif /* !CONFIG_CONTEXT_TRACKING */
779
780static inline void kvm_guest_enter(void)
781{
782 unsigned long flags;
783
784 BUG_ON(preemptible());
785
786 local_irq_save(flags);
787 guest_enter();
788 local_irq_restore(flags);
789
752 /* KVM does not hold any references to rcu protected data when it 790 /* KVM does not hold any references to rcu protected data when it
753 * switches CPU into a guest mode. In fact switching to a guest mode 791 * switches CPU into a guest mode. In fact switching to a guest mode
754 * is very similar to exiting to userspase from rcu point of view. In 792 * is very similar to exiting to userspase from rcu point of view. In
@@ -761,12 +799,11 @@ static inline void kvm_guest_enter(void)
761 799
762static inline void kvm_guest_exit(void) 800static inline void kvm_guest_exit(void)
763{ 801{
764 /* 802 unsigned long flags;
765 * This is running in ioctl context so we can avoid 803
766 * the call to vtime_account() with its unnecessary idle check. 804 local_irq_save(flags);
767 */ 805 guest_exit();
768 vtime_account_system_irqsafe(current); 806 local_irq_restore(flags);
769 current->flags &= ~PF_VCPU;
770} 807}
771 808
772/* 809/*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 15472d691ee6..6fa4dd2a3b9e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1101,6 +1101,12 @@ static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
1101 return -1; 1101 return -1;
1102} 1102}
1103 1103
1104static inline int
1105pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
1106{
1107 return -1;
1108}
1109
1104static inline void pci_msi_shutdown(struct pci_dev *dev) 1110static inline void pci_msi_shutdown(struct pci_dev *dev)
1105{ } 1111{ }
1106static inline void pci_disable_msi(struct pci_dev *dev) 1112static inline void pci_disable_msi(struct pci_dev *dev)
@@ -1132,6 +1138,7 @@ static inline int pci_msi_enabled(void)
1132} 1138}
1133#else 1139#else
1134extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec); 1140extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec);
1141extern int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec);
1135extern void pci_msi_shutdown(struct pci_dev *dev); 1142extern void pci_msi_shutdown(struct pci_dev *dev);
1136extern void pci_disable_msi(struct pci_dev *dev); 1143extern void pci_disable_msi(struct pci_dev *dev);
1137extern int pci_msix_table_size(struct pci_dev *dev); 1144extern int pci_msix_table_size(struct pci_dev *dev);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6bfb2faa0b19..e47ee462c2f2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -135,16 +135,21 @@ struct hw_perf_event {
135 struct { /* software */ 135 struct { /* software */
136 struct hrtimer hrtimer; 136 struct hrtimer hrtimer;
137 }; 137 };
138 struct { /* tracepoint */
139 struct task_struct *tp_target;
140 /* for tp_event->class */
141 struct list_head tp_list;
142 };
138#ifdef CONFIG_HAVE_HW_BREAKPOINT 143#ifdef CONFIG_HAVE_HW_BREAKPOINT
139 struct { /* breakpoint */ 144 struct { /* breakpoint */
140 struct arch_hw_breakpoint info;
141 struct list_head bp_list;
142 /* 145 /*
143 * Crufty hack to avoid the chicken and egg 146 * Crufty hack to avoid the chicken and egg
144 * problem hw_breakpoint has with context 147 * problem hw_breakpoint has with context
145 * creation and event initalization. 148 * creation and event initalization.
146 */ 149 */
147 struct task_struct *bp_target; 150 struct task_struct *bp_target;
151 struct arch_hw_breakpoint info;
152 struct list_head bp_list;
148 }; 153 };
149#endif 154#endif
150 }; 155 };
@@ -817,6 +822,17 @@ do { \
817} while (0) 822} while (0)
818 823
819 824
825struct perf_pmu_events_attr {
826 struct device_attribute attr;
827 u64 id;
828};
829
830#define PMU_EVENT_ATTR(_name, _var, _id, _show) \
831static struct perf_pmu_events_attr _var = { \
832 .attr = __ATTR(_name, 0444, _show, NULL), \
833 .id = _id, \
834};
835
820#define PMU_FORMAT_ATTR(_name, _format) \ 836#define PMU_FORMAT_ATTR(_name, _format) \
821static ssize_t \ 837static ssize_t \
822_name##_show(struct device *dev, \ 838_name##_show(struct device *dev, \
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9afc01e5a0a6..86c4b6294713 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -98,9 +98,6 @@ int no_printk(const char *fmt, ...)
98extern asmlinkage __printf(1, 2) 98extern asmlinkage __printf(1, 2)
99void early_printk(const char *fmt, ...); 99void early_printk(const char *fmt, ...);
100 100
101extern int printk_needs_cpu(int cpu);
102extern void printk_tick(void);
103
104#ifdef CONFIG_PRINTK 101#ifdef CONFIG_PRINTK
105asmlinkage __printf(5, 0) 102asmlinkage __printf(5, 0)
106int vprintk_emit(int facility, int level, 103int vprintk_emit(int facility, int level,
diff --git a/include/linux/profile.h b/include/linux/profile.h
index a0fc32279fc0..21123902366d 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -82,9 +82,6 @@ int task_handoff_unregister(struct notifier_block * n);
82int profile_event_register(enum profile_type, struct notifier_block * n); 82int profile_event_register(enum profile_type, struct notifier_block * n);
83int profile_event_unregister(enum profile_type, struct notifier_block * n); 83int profile_event_unregister(enum profile_type, struct notifier_block * n);
84 84
85int register_timer_hook(int (*hook)(struct pt_regs *));
86void unregister_timer_hook(int (*hook)(struct pt_regs *));
87
88struct pt_regs; 85struct pt_regs;
89 86
90#else 87#else
@@ -135,16 +132,6 @@ static inline int profile_event_unregister(enum profile_type t, struct notifier_
135#define profile_handoff_task(a) (0) 132#define profile_handoff_task(a) (0)
136#define profile_munmap(a) do { } while (0) 133#define profile_munmap(a) do { } while (0)
137 134
138static inline int register_timer_hook(int (*hook)(struct pt_regs *))
139{
140 return -ENOSYS;
141}
142
143static inline void unregister_timer_hook(int (*hook)(struct pt_regs *))
144{
145 return;
146}
147
148#endif /* CONFIG_PROFILING */ 135#endif /* CONFIG_PROFILING */
149 136
150#endif /* _LINUX_PROFILE_H */ 137#endif /* _LINUX_PROFILE_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 275aa3f1062d..b758ce17b309 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -53,7 +53,10 @@ extern int rcutorture_runnable; /* for sysctl */
53extern void rcutorture_record_test_transition(void); 53extern void rcutorture_record_test_transition(void);
54extern void rcutorture_record_progress(unsigned long vernum); 54extern void rcutorture_record_progress(unsigned long vernum);
55extern void do_trace_rcu_torture_read(char *rcutorturename, 55extern void do_trace_rcu_torture_read(char *rcutorturename,
56 struct rcu_head *rhp); 56 struct rcu_head *rhp,
57 unsigned long secs,
58 unsigned long c_old,
59 unsigned long c);
57#else 60#else
58static inline void rcutorture_record_test_transition(void) 61static inline void rcutorture_record_test_transition(void)
59{ 62{
@@ -63,9 +66,13 @@ static inline void rcutorture_record_progress(unsigned long vernum)
63} 66}
64#ifdef CONFIG_RCU_TRACE 67#ifdef CONFIG_RCU_TRACE
65extern void do_trace_rcu_torture_read(char *rcutorturename, 68extern void do_trace_rcu_torture_read(char *rcutorturename,
66 struct rcu_head *rhp); 69 struct rcu_head *rhp,
70 unsigned long secs,
71 unsigned long c_old,
72 unsigned long c);
67#else 73#else
68#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) 74#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
75 do { } while (0)
69#endif 76#endif
70#endif 77#endif
71 78
@@ -749,7 +756,7 @@ static inline void rcu_preempt_sleep_check(void)
749 * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) 756 * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
750 * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may 757 * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
751 * be preempted, but explicit blocking is illegal. Finally, in preemptible 758 * be preempted, but explicit blocking is illegal. Finally, in preemptible
752 * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds, 759 * RCU implementations in real-time (with -rt patchset) kernel builds,
753 * RCU read-side critical sections may be preempted and they may also 760 * RCU read-side critical sections may be preempted and they may also
754 * block, but only when acquiring spinlocks that are subject to priority 761 * block, but only when acquiring spinlocks that are subject to priority
755 * inheritance. 762 * inheritance.
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 519777e3fa01..1342e69542f3 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -167,6 +167,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
167unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); 167unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
168unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); 168unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
169unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu); 169unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu);
170unsigned long ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu);
170 171
171u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); 172u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
172void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, 173void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 9531845c419f..11d05f9fe8b6 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -138,6 +138,7 @@ extern void rtc_device_unregister(struct rtc_device *rtc);
138extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); 138extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
139extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); 139extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
140extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs); 140extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs);
141extern int rtc_set_ntp_time(struct timespec now);
141int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm); 142int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm);
142extern int rtc_read_alarm(struct rtc_device *rtc, 143extern int rtc_read_alarm(struct rtc_device *rtc,
143 struct rtc_wkalrm *alrm); 144 struct rtc_wkalrm *alrm);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2112477ff5e..33cc42130371 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -304,19 +304,6 @@ static inline void lockup_detector_init(void)
304} 304}
305#endif 305#endif
306 306
307#ifdef CONFIG_DETECT_HUNG_TASK
308extern unsigned int sysctl_hung_task_panic;
309extern unsigned long sysctl_hung_task_check_count;
310extern unsigned long sysctl_hung_task_timeout_secs;
311extern unsigned long sysctl_hung_task_warnings;
312extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
313 void __user *buffer,
314 size_t *lenp, loff_t *ppos);
315#else
316/* Avoid need for ifdefs elsewhere in the code */
317enum { sysctl_hung_task_timeout_secs = 0 };
318#endif
319
320/* Attach to any functions which should be ignored in wchan output. */ 307/* Attach to any functions which should be ignored in wchan output. */
321#define __sched __attribute__((__section__(".sched.text"))) 308#define __sched __attribute__((__section__(".sched.text")))
322 309
@@ -338,23 +325,6 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
338struct nsproxy; 325struct nsproxy;
339struct user_namespace; 326struct user_namespace;
340 327
341/*
342 * Default maximum number of active map areas, this limits the number of vmas
343 * per mm struct. Users can overwrite this number by sysctl but there is a
344 * problem.
345 *
346 * When a program's coredump is generated as ELF format, a section is created
347 * per a vma. In ELF, the number of sections is represented in unsigned short.
348 * This means the number of sections should be smaller than 65535 at coredump.
349 * Because the kernel adds some informative sections to a image of program at
350 * generating coredump, we need some margin. The number of extra sections is
351 * 1-3 now and depends on arch. We use "5" as safe margin, here.
352 */
353#define MAPCOUNT_ELF_CORE_MARGIN (5)
354#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
355
356extern int sysctl_max_map_count;
357
358#include <linux/aio.h> 328#include <linux/aio.h>
359 329
360#ifdef CONFIG_MMU 330#ifdef CONFIG_MMU
@@ -1194,6 +1164,7 @@ struct sched_entity {
1194 /* rq "owned" by this entity/group: */ 1164 /* rq "owned" by this entity/group: */
1195 struct cfs_rq *my_q; 1165 struct cfs_rq *my_q;
1196#endif 1166#endif
1167
1197/* 1168/*
1198 * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be 1169 * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
1199 * removed when useful for applications beyond shares distribution (e.g. 1170 * removed when useful for applications beyond shares distribution (e.g.
@@ -1208,6 +1179,7 @@ struct sched_entity {
1208struct sched_rt_entity { 1179struct sched_rt_entity {
1209 struct list_head run_list; 1180 struct list_head run_list;
1210 unsigned long timeout; 1181 unsigned long timeout;
1182 unsigned long watchdog_stamp;
1211 unsigned int time_slice; 1183 unsigned int time_slice;
1212 1184
1213 struct sched_rt_entity *back; 1185 struct sched_rt_entity *back;
@@ -1220,11 +1192,6 @@ struct sched_rt_entity {
1220#endif 1192#endif
1221}; 1193};
1222 1194
1223/*
1224 * default timeslice is 100 msecs (used only for SCHED_RR tasks).
1225 * Timeslices get refilled after they expire.
1226 */
1227#define RR_TIMESLICE (100 * HZ / 1000)
1228 1195
1229struct rcu_node; 1196struct rcu_node;
1230 1197
@@ -1368,6 +1335,15 @@ struct task_struct {
1368#ifndef CONFIG_VIRT_CPU_ACCOUNTING 1335#ifndef CONFIG_VIRT_CPU_ACCOUNTING
1369 struct cputime prev_cputime; 1336 struct cputime prev_cputime;
1370#endif 1337#endif
1338#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1339 seqlock_t vtime_seqlock;
1340 unsigned long long vtime_snap;
1341 enum {
1342 VTIME_SLEEPING = 0,
1343 VTIME_USER,
1344 VTIME_SYS,
1345 } vtime_snap_whence;
1346#endif
1371 unsigned long nvcsw, nivcsw; /* context switch counts */ 1347 unsigned long nvcsw, nivcsw; /* context switch counts */
1372 struct timespec start_time; /* monotonic time */ 1348 struct timespec start_time; /* monotonic time */
1373 struct timespec real_start_time; /* boot based time */ 1349 struct timespec real_start_time; /* boot based time */
@@ -1622,37 +1598,6 @@ static inline void set_numabalancing_state(bool enabled)
1622} 1598}
1623#endif 1599#endif
1624 1600
1625/*
1626 * Priority of a process goes from 0..MAX_PRIO-1, valid RT
1627 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
1628 * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
1629 * values are inverted: lower p->prio value means higher priority.
1630 *
1631 * The MAX_USER_RT_PRIO value allows the actual maximum
1632 * RT priority to be separate from the value exported to
1633 * user-space. This allows kernel threads to set their
1634 * priority to a value higher than any user task. Note:
1635 * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
1636 */
1637
1638#define MAX_USER_RT_PRIO 100
1639#define MAX_RT_PRIO MAX_USER_RT_PRIO
1640
1641#define MAX_PRIO (MAX_RT_PRIO + 40)
1642#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
1643
1644static inline int rt_prio(int prio)
1645{
1646 if (unlikely(prio < MAX_RT_PRIO))
1647 return 1;
1648 return 0;
1649}
1650
1651static inline int rt_task(struct task_struct *p)
1652{
1653 return rt_prio(p->prio);
1654}
1655
1656static inline struct pid *task_pid(struct task_struct *task) 1601static inline struct pid *task_pid(struct task_struct *task)
1657{ 1602{
1658 return task->pids[PIDTYPE_PID].pid; 1603 return task->pids[PIDTYPE_PID].pid;
@@ -1792,6 +1737,37 @@ static inline void put_task_struct(struct task_struct *t)
1792 __put_task_struct(t); 1737 __put_task_struct(t);
1793} 1738}
1794 1739
1740#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1741extern void task_cputime(struct task_struct *t,
1742 cputime_t *utime, cputime_t *stime);
1743extern void task_cputime_scaled(struct task_struct *t,
1744 cputime_t *utimescaled, cputime_t *stimescaled);
1745extern cputime_t task_gtime(struct task_struct *t);
1746#else
1747static inline void task_cputime(struct task_struct *t,
1748 cputime_t *utime, cputime_t *stime)
1749{
1750 if (utime)
1751 *utime = t->utime;
1752 if (stime)
1753 *stime = t->stime;
1754}
1755
1756static inline void task_cputime_scaled(struct task_struct *t,
1757 cputime_t *utimescaled,
1758 cputime_t *stimescaled)
1759{
1760 if (utimescaled)
1761 *utimescaled = t->utimescaled;
1762 if (stimescaled)
1763 *stimescaled = t->stimescaled;
1764}
1765
1766static inline cputime_t task_gtime(struct task_struct *t)
1767{
1768 return t->gtime;
1769}
1770#endif
1795extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); 1771extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
1796extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); 1772extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
1797 1773
@@ -2033,58 +2009,7 @@ extern void wake_up_idle_cpu(int cpu);
2033static inline void wake_up_idle_cpu(int cpu) { } 2009static inline void wake_up_idle_cpu(int cpu) { }
2034#endif 2010#endif
2035 2011
2036extern unsigned int sysctl_sched_latency;
2037extern unsigned int sysctl_sched_min_granularity;
2038extern unsigned int sysctl_sched_wakeup_granularity;
2039extern unsigned int sysctl_sched_child_runs_first;
2040
2041enum sched_tunable_scaling {
2042 SCHED_TUNABLESCALING_NONE,
2043 SCHED_TUNABLESCALING_LOG,
2044 SCHED_TUNABLESCALING_LINEAR,
2045 SCHED_TUNABLESCALING_END,
2046};
2047extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
2048
2049extern unsigned int sysctl_numa_balancing_scan_delay;
2050extern unsigned int sysctl_numa_balancing_scan_period_min;
2051extern unsigned int sysctl_numa_balancing_scan_period_max;
2052extern unsigned int sysctl_numa_balancing_scan_period_reset;
2053extern unsigned int sysctl_numa_balancing_scan_size;
2054extern unsigned int sysctl_numa_balancing_settle_count;
2055
2056#ifdef CONFIG_SCHED_DEBUG
2057extern unsigned int sysctl_sched_migration_cost;
2058extern unsigned int sysctl_sched_nr_migrate;
2059extern unsigned int sysctl_sched_time_avg;
2060extern unsigned int sysctl_timer_migration;
2061extern unsigned int sysctl_sched_shares_window;
2062
2063int sched_proc_update_handler(struct ctl_table *table, int write,
2064 void __user *buffer, size_t *length,
2065 loff_t *ppos);
2066#endif
2067#ifdef CONFIG_SCHED_DEBUG
2068static inline unsigned int get_sysctl_timer_migration(void)
2069{
2070 return sysctl_timer_migration;
2071}
2072#else
2073static inline unsigned int get_sysctl_timer_migration(void)
2074{
2075 return 1;
2076}
2077#endif
2078extern unsigned int sysctl_sched_rt_period;
2079extern int sysctl_sched_rt_runtime;
2080
2081int sched_rt_handler(struct ctl_table *table, int write,
2082 void __user *buffer, size_t *lenp,
2083 loff_t *ppos);
2084
2085#ifdef CONFIG_SCHED_AUTOGROUP 2012#ifdef CONFIG_SCHED_AUTOGROUP
2086extern unsigned int sysctl_sched_autogroup_enabled;
2087
2088extern void sched_autogroup_create_attach(struct task_struct *p); 2013extern void sched_autogroup_create_attach(struct task_struct *p);
2089extern void sched_autogroup_detach(struct task_struct *p); 2014extern void sched_autogroup_detach(struct task_struct *p);
2090extern void sched_autogroup_fork(struct signal_struct *sig); 2015extern void sched_autogroup_fork(struct signal_struct *sig);
@@ -2100,30 +2025,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
2100static inline void sched_autogroup_exit(struct signal_struct *sig) { } 2025static inline void sched_autogroup_exit(struct signal_struct *sig) { }
2101#endif 2026#endif
2102 2027
2103#ifdef CONFIG_CFS_BANDWIDTH
2104extern unsigned int sysctl_sched_cfs_bandwidth_slice;
2105#endif
2106
2107#ifdef CONFIG_RT_MUTEXES
2108extern int rt_mutex_getprio(struct task_struct *p);
2109extern void rt_mutex_setprio(struct task_struct *p, int prio);
2110extern void rt_mutex_adjust_pi(struct task_struct *p);
2111static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
2112{
2113 return tsk->pi_blocked_on != NULL;
2114}
2115#else
2116static inline int rt_mutex_getprio(struct task_struct *p)
2117{
2118 return p->normal_prio;
2119}
2120# define rt_mutex_adjust_pi(p) do { } while (0)
2121static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
2122{
2123 return false;
2124}
2125#endif
2126
2127extern bool yield_to(struct task_struct *p, bool preempt); 2028extern bool yield_to(struct task_struct *p, bool preempt);
2128extern void set_user_nice(struct task_struct *p, long nice); 2029extern void set_user_nice(struct task_struct *p, long nice);
2129extern int task_prio(const struct task_struct *p); 2030extern int task_prio(const struct task_struct *p);
@@ -2753,8 +2654,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
2753extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); 2654extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
2754extern long sched_getaffinity(pid_t pid, struct cpumask *mask); 2655extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
2755 2656
2756extern void normalize_rt_tasks(void);
2757
2758#ifdef CONFIG_CGROUP_SCHED 2657#ifdef CONFIG_CGROUP_SCHED
2759 2658
2760extern struct task_group root_task_group; 2659extern struct task_group root_task_group;
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
new file mode 100644
index 000000000000..94e19ea28fc3
--- /dev/null
+++ b/include/linux/sched/rt.h
@@ -0,0 +1,58 @@
1#ifndef _SCHED_RT_H
2#define _SCHED_RT_H
3
4/*
5 * Priority of a process goes from 0..MAX_PRIO-1, valid RT
6 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
7 * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
8 * values are inverted: lower p->prio value means higher priority.
9 *
10 * The MAX_USER_RT_PRIO value allows the actual maximum
11 * RT priority to be separate from the value exported to
12 * user-space. This allows kernel threads to set their
13 * priority to a value higher than any user task. Note:
14 * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
15 */
16
17#define MAX_USER_RT_PRIO 100
18#define MAX_RT_PRIO MAX_USER_RT_PRIO
19
20#define MAX_PRIO (MAX_RT_PRIO + 40)
21#define DEFAULT_PRIO (MAX_RT_PRIO + 20)
22
23static inline int rt_prio(int prio)
24{
25 if (unlikely(prio < MAX_RT_PRIO))
26 return 1;
27 return 0;
28}
29
30static inline int rt_task(struct task_struct *p)
31{
32 return rt_prio(p->prio);
33}
34
35#ifdef CONFIG_RT_MUTEXES
36extern int rt_mutex_getprio(struct task_struct *p);
37extern void rt_mutex_setprio(struct task_struct *p, int prio);
38extern void rt_mutex_adjust_pi(struct task_struct *p);
39static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
40{
41 return tsk->pi_blocked_on != NULL;
42}
43#else
44static inline int rt_mutex_getprio(struct task_struct *p)
45{
46 return p->normal_prio;
47}
48# define rt_mutex_adjust_pi(p) do { } while (0)
49static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
50{
51 return false;
52}
53#endif
54
55extern void normalize_rt_tasks(void);
56
57
58#endif /* _SCHED_RT_H */
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
new file mode 100644
index 000000000000..d2bb0ae979d0
--- /dev/null
+++ b/include/linux/sched/sysctl.h
@@ -0,0 +1,110 @@
1#ifndef _SCHED_SYSCTL_H
2#define _SCHED_SYSCTL_H
3
4#ifdef CONFIG_DETECT_HUNG_TASK
5extern unsigned int sysctl_hung_task_panic;
6extern unsigned long sysctl_hung_task_check_count;
7extern unsigned long sysctl_hung_task_timeout_secs;
8extern unsigned long sysctl_hung_task_warnings;
9extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
10 void __user *buffer,
11 size_t *lenp, loff_t *ppos);
12#else
13/* Avoid need for ifdefs elsewhere in the code */
14enum { sysctl_hung_task_timeout_secs = 0 };
15#endif
16
17/*
18 * Default maximum number of active map areas, this limits the number of vmas
19 * per mm struct. Users can overwrite this number by sysctl but there is a
20 * problem.
21 *
22 * When a program's coredump is generated as ELF format, a section is created
23 * per a vma. In ELF, the number of sections is represented in unsigned short.
24 * This means the number of sections should be smaller than 65535 at coredump.
25 * Because the kernel adds some informative sections to a image of program at
26 * generating coredump, we need some margin. The number of extra sections is
27 * 1-3 now and depends on arch. We use "5" as safe margin, here.
28 */
29#define MAPCOUNT_ELF_CORE_MARGIN (5)
30#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
31
32extern int sysctl_max_map_count;
33
34extern unsigned int sysctl_sched_latency;
35extern unsigned int sysctl_sched_min_granularity;
36extern unsigned int sysctl_sched_wakeup_granularity;
37extern unsigned int sysctl_sched_child_runs_first;
38
39enum sched_tunable_scaling {
40 SCHED_TUNABLESCALING_NONE,
41 SCHED_TUNABLESCALING_LOG,
42 SCHED_TUNABLESCALING_LINEAR,
43 SCHED_TUNABLESCALING_END,
44};
45extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
46
47extern unsigned int sysctl_numa_balancing_scan_delay;
48extern unsigned int sysctl_numa_balancing_scan_period_min;
49extern unsigned int sysctl_numa_balancing_scan_period_max;
50extern unsigned int sysctl_numa_balancing_scan_period_reset;
51extern unsigned int sysctl_numa_balancing_scan_size;
52extern unsigned int sysctl_numa_balancing_settle_count;
53
54#ifdef CONFIG_SCHED_DEBUG
55extern unsigned int sysctl_sched_migration_cost;
56extern unsigned int sysctl_sched_nr_migrate;
57extern unsigned int sysctl_sched_time_avg;
58extern unsigned int sysctl_timer_migration;
59extern unsigned int sysctl_sched_shares_window;
60
61int sched_proc_update_handler(struct ctl_table *table, int write,
62 void __user *buffer, size_t *length,
63 loff_t *ppos);
64#endif
65#ifdef CONFIG_SCHED_DEBUG
66static inline unsigned int get_sysctl_timer_migration(void)
67{
68 return sysctl_timer_migration;
69}
70#else
71static inline unsigned int get_sysctl_timer_migration(void)
72{
73 return 1;
74}
75#endif
76
77/*
78 * control realtime throttling:
79 *
80 * /proc/sys/kernel/sched_rt_period_us
81 * /proc/sys/kernel/sched_rt_runtime_us
82 */
83extern unsigned int sysctl_sched_rt_period;
84extern int sysctl_sched_rt_runtime;
85
86#ifdef CONFIG_CFS_BANDWIDTH
87extern unsigned int sysctl_sched_cfs_bandwidth_slice;
88#endif
89
90#ifdef CONFIG_SCHED_AUTOGROUP
91extern unsigned int sysctl_sched_autogroup_enabled;
92#endif
93
94/*
95 * default timeslice is 100 msecs (used only for SCHED_RR tasks).
96 * Timeslices get refilled after they expire.
97 */
98#define RR_TIMESLICE (100 * HZ / 1000)
99
100extern int sched_rr_timeslice;
101
102extern int sched_rr_handler(struct ctl_table *table, int write,
103 void __user *buffer, size_t *lenp,
104 loff_t *ppos);
105
106extern int sched_rt_handler(struct ctl_table *table, int write,
107 void __user *buffer, size_t *lenp,
108 loff_t *ppos);
109
110#endif /* _SCHED_SYSCTL_H */
diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index e0106d8581d3..c65dee059913 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -14,6 +14,8 @@ struct smpboot_thread_data;
14 * @thread_should_run: Check whether the thread should run or not. Called with 14 * @thread_should_run: Check whether the thread should run or not. Called with
15 * preemption disabled. 15 * preemption disabled.
16 * @thread_fn: The associated thread function 16 * @thread_fn: The associated thread function
17 * @create: Optional setup function, called when the thread gets
18 * created (Not called from the thread context)
17 * @setup: Optional setup function, called when the thread gets 19 * @setup: Optional setup function, called when the thread gets
18 * operational the first time 20 * operational the first time
19 * @cleanup: Optional cleanup function, called when the thread 21 * @cleanup: Optional cleanup function, called when the thread
@@ -22,6 +24,7 @@ struct smpboot_thread_data;
22 * parked (cpu offline) 24 * parked (cpu offline)
23 * @unpark: Optional unpark function, called when the thread is 25 * @unpark: Optional unpark function, called when the thread is
24 * unparked (cpu online) 26 * unparked (cpu online)
27 * @selfparking: Thread is not parked by the park function.
25 * @thread_comm: The base name of the thread 28 * @thread_comm: The base name of the thread
26 */ 29 */
27struct smp_hotplug_thread { 30struct smp_hotplug_thread {
@@ -29,10 +32,12 @@ struct smp_hotplug_thread {
29 struct list_head list; 32 struct list_head list;
30 int (*thread_should_run)(unsigned int cpu); 33 int (*thread_should_run)(unsigned int cpu);
31 void (*thread_fn)(unsigned int cpu); 34 void (*thread_fn)(unsigned int cpu);
35 void (*create)(unsigned int cpu);
32 void (*setup)(unsigned int cpu); 36 void (*setup)(unsigned int cpu);
33 void (*cleanup)(unsigned int cpu, bool online); 37 void (*cleanup)(unsigned int cpu, bool online);
34 void (*park)(unsigned int cpu); 38 void (*park)(unsigned int cpu);
35 void (*unpark)(unsigned int cpu); 39 void (*unpark)(unsigned int cpu);
40 bool selfparking;
36 const char *thread_comm; 41 const char *thread_comm;
37}; 42};
38 43
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 6eb691b08358..04f4121a23ae 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -151,30 +151,14 @@ void srcu_barrier(struct srcu_struct *sp);
151 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot 151 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
152 * and while lockdep is disabled. 152 * and while lockdep is disabled.
153 * 153 *
154 * Note that if the CPU is in the idle loop from an RCU point of view 154 * Note that SRCU is based on its own statemachine and it doesn't
155 * (ie: that we are in the section between rcu_idle_enter() and 155 * relies on normal RCU, it can be called from the CPU which
156 * rcu_idle_exit()) then srcu_read_lock_held() returns false even if 156 * is in the idle loop from an RCU point of view or offline.
157 * the CPU did an srcu_read_lock(). The reason for this is that RCU
158 * ignores CPUs that are in such a section, considering these as in
159 * extended quiescent state, so such a CPU is effectively never in an
160 * RCU read-side critical section regardless of what RCU primitives it
161 * invokes. This state of affairs is required --- we need to keep an
162 * RCU-free window in idle where the CPU may possibly enter into low
163 * power mode. This way we can notice an extended quiescent state to
164 * other CPUs that started a grace period. Otherwise we would delay any
165 * grace period as long as we run in the idle task.
166 *
167 * Similarly, we avoid claiming an SRCU read lock held if the current
168 * CPU is offline.
169 */ 157 */
170static inline int srcu_read_lock_held(struct srcu_struct *sp) 158static inline int srcu_read_lock_held(struct srcu_struct *sp)
171{ 159{
172 if (!debug_lockdep_rcu_enabled()) 160 if (!debug_lockdep_rcu_enabled())
173 return 1; 161 return 1;
174 if (rcu_is_cpu_idle())
175 return 0;
176 if (!rcu_lockdep_current_cpu_online())
177 return 0;
178 return lock_is_held(&sp->dep_map); 162 return lock_is_held(&sp->dep_map);
179} 163}
180 164
@@ -236,8 +220,6 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
236 int retval = __srcu_read_lock(sp); 220 int retval = __srcu_read_lock(sp);
237 221
238 rcu_lock_acquire(&(sp)->dep_map); 222 rcu_lock_acquire(&(sp)->dep_map);
239 rcu_lockdep_assert(!rcu_is_cpu_idle(),
240 "srcu_read_lock() used illegally while idle");
241 return retval; 223 return retval;
242} 224}
243 225
@@ -251,8 +233,6 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
251static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) 233static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
252 __releases(sp) 234 __releases(sp)
253{ 235{
254 rcu_lockdep_assert(!rcu_is_cpu_idle(),
255 "srcu_read_unlock() used illegally while idle");
256 rcu_lock_release(&(sp)->dep_map); 236 rcu_lock_release(&(sp)->dep_map);
257 __srcu_read_unlock(sp, idx); 237 __srcu_read_unlock(sp, idx);
258} 238}
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 1a6567b48492..553272e6af55 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -8,6 +8,8 @@
8 8
9#include <linux/clockchips.h> 9#include <linux/clockchips.h>
10#include <linux/irqflags.h> 10#include <linux/irqflags.h>
11#include <linux/percpu.h>
12#include <linux/hrtimer.h>
11 13
12#ifdef CONFIG_GENERIC_CLOCKEVENTS 14#ifdef CONFIG_GENERIC_CLOCKEVENTS
13 15
@@ -122,13 +124,26 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
122#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ 124#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
123 125
124# ifdef CONFIG_NO_HZ 126# ifdef CONFIG_NO_HZ
127DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched);
128
129static inline int tick_nohz_tick_stopped(void)
130{
131 return __this_cpu_read(tick_cpu_sched.tick_stopped);
132}
133
125extern void tick_nohz_idle_enter(void); 134extern void tick_nohz_idle_enter(void);
126extern void tick_nohz_idle_exit(void); 135extern void tick_nohz_idle_exit(void);
127extern void tick_nohz_irq_exit(void); 136extern void tick_nohz_irq_exit(void);
128extern ktime_t tick_nohz_get_sleep_length(void); 137extern ktime_t tick_nohz_get_sleep_length(void);
129extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); 138extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
130extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); 139extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
131# else 140
141# else /* !CONFIG_NO_HZ */
142static inline int tick_nohz_tick_stopped(void)
143{
144 return 0;
145}
146
132static inline void tick_nohz_idle_enter(void) { } 147static inline void tick_nohz_idle_enter(void) { }
133static inline void tick_nohz_idle_exit(void) { } 148static inline void tick_nohz_idle_exit(void) { }
134 149
diff --git a/include/linux/time.h b/include/linux/time.h
index 4d358e9d10f1..a3ab6a814a9c 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -115,8 +115,20 @@ static inline bool timespec_valid_strict(const struct timespec *ts)
115 return true; 115 return true;
116} 116}
117 117
118extern bool persistent_clock_exist;
119
120#ifdef ALWAYS_USE_PERSISTENT_CLOCK
121#define has_persistent_clock() true
122#else
123static inline bool has_persistent_clock(void)
124{
125 return persistent_clock_exist;
126}
127#endif
128
118extern void read_persistent_clock(struct timespec *ts); 129extern void read_persistent_clock(struct timespec *ts);
119extern void read_boot_clock(struct timespec *ts); 130extern void read_boot_clock(struct timespec *ts);
131extern int persistent_clock_is_local;
120extern int update_persistent_clock(struct timespec now); 132extern int update_persistent_clock(struct timespec now);
121void timekeeping_init(void); 133void timekeeping_init(void);
122extern int timekeeping_suspended; 134extern int timekeeping_suspended;
@@ -158,6 +170,7 @@ extern int do_setitimer(int which, struct itimerval *value,
158 struct itimerval *ovalue); 170 struct itimerval *ovalue);
159extern unsigned int alarm_setitimer(unsigned int seconds); 171extern unsigned int alarm_setitimer(unsigned int seconds);
160extern int do_getitimer(int which, struct itimerval *value); 172extern int do_getitimer(int which, struct itimerval *value);
173extern int __getnstimeofday(struct timespec *tv);
161extern void getnstimeofday(struct timespec *tv); 174extern void getnstimeofday(struct timespec *tv);
162extern void getrawmonotonic(struct timespec *ts); 175extern void getrawmonotonic(struct timespec *ts);
163extern void getnstime_raw_and_real(struct timespec *ts_raw, 176extern void getnstime_raw_and_real(struct timespec *ts_raw,
diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h
index 44893e5ec8f7..3251965bf4cc 100644
--- a/include/linux/tsacct_kern.h
+++ b/include/linux/tsacct_kern.h
@@ -23,12 +23,15 @@ static inline void bacct_add_tsk(struct user_namespace *user_ns,
23#ifdef CONFIG_TASK_XACCT 23#ifdef CONFIG_TASK_XACCT
24extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p); 24extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p);
25extern void acct_update_integrals(struct task_struct *tsk); 25extern void acct_update_integrals(struct task_struct *tsk);
26extern void acct_account_cputime(struct task_struct *tsk);
26extern void acct_clear_integrals(struct task_struct *tsk); 27extern void acct_clear_integrals(struct task_struct *tsk);
27#else 28#else
28static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) 29static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
29{} 30{}
30static inline void acct_update_integrals(struct task_struct *tsk) 31static inline void acct_update_integrals(struct task_struct *tsk)
31{} 32{}
33static inline void acct_account_cputime(struct task_struct *tsk)
34{}
32static inline void acct_clear_integrals(struct task_struct *tsk) 35static inline void acct_clear_integrals(struct task_struct *tsk)
33{} 36{}
34#endif /* CONFIG_TASK_XACCT */ 37#endif /* CONFIG_TASK_XACCT */
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 4f628a6fc5b4..02b83db8e2c5 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -35,13 +35,20 @@ struct inode;
35# include <asm/uprobes.h> 35# include <asm/uprobes.h>
36#endif 36#endif
37 37
38#define UPROBE_HANDLER_REMOVE 1
39#define UPROBE_HANDLER_MASK 1
40
41enum uprobe_filter_ctx {
42 UPROBE_FILTER_REGISTER,
43 UPROBE_FILTER_UNREGISTER,
44 UPROBE_FILTER_MMAP,
45};
46
38struct uprobe_consumer { 47struct uprobe_consumer {
39 int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); 48 int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
40 /* 49 bool (*filter)(struct uprobe_consumer *self,
41 * filter is optional; If a filter exists, handler is run 50 enum uprobe_filter_ctx ctx,
42 * if and only if filter returns true. 51 struct mm_struct *mm);
43 */
44 bool (*filter)(struct uprobe_consumer *self, struct task_struct *task);
45 52
46 struct uprobe_consumer *next; 53 struct uprobe_consumer *next;
47}; 54};
@@ -94,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign
94extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); 101extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
95extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); 102extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
96extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); 103extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
104extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
97extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); 105extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
98extern int uprobe_mmap(struct vm_area_struct *vma); 106extern int uprobe_mmap(struct vm_area_struct *vma);
99extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); 107extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
@@ -117,6 +125,11 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
117{ 125{
118 return -ENOSYS; 126 return -ENOSYS;
119} 127}
128static inline int
129uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add)
130{
131 return -ENOSYS;
132}
120static inline void 133static inline void
121uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) 134uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
122{ 135{
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index ae30ab58431a..71a5782d8c59 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -6,15 +6,46 @@ struct task_struct;
6#ifdef CONFIG_VIRT_CPU_ACCOUNTING 6#ifdef CONFIG_VIRT_CPU_ACCOUNTING
7extern void vtime_task_switch(struct task_struct *prev); 7extern void vtime_task_switch(struct task_struct *prev);
8extern void vtime_account_system(struct task_struct *tsk); 8extern void vtime_account_system(struct task_struct *tsk);
9extern void vtime_account_system_irqsafe(struct task_struct *tsk);
10extern void vtime_account_idle(struct task_struct *tsk); 9extern void vtime_account_idle(struct task_struct *tsk);
11extern void vtime_account_user(struct task_struct *tsk); 10extern void vtime_account_user(struct task_struct *tsk);
12extern void vtime_account(struct task_struct *tsk); 11extern void vtime_account_irq_enter(struct task_struct *tsk);
13#else 12
13#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
14static inline bool vtime_accounting_enabled(void) { return true; }
15#endif
16
17#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
18
14static inline void vtime_task_switch(struct task_struct *prev) { } 19static inline void vtime_task_switch(struct task_struct *prev) { }
15static inline void vtime_account_system(struct task_struct *tsk) { } 20static inline void vtime_account_system(struct task_struct *tsk) { }
16static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } 21static inline void vtime_account_user(struct task_struct *tsk) { }
17static inline void vtime_account(struct task_struct *tsk) { } 22static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
23static inline bool vtime_accounting_enabled(void) { return false; }
24#endif
25
26#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
27extern void arch_vtime_task_switch(struct task_struct *tsk);
28extern void vtime_account_irq_exit(struct task_struct *tsk);
29extern bool vtime_accounting_enabled(void);
30extern void vtime_user_enter(struct task_struct *tsk);
31static inline void vtime_user_exit(struct task_struct *tsk)
32{
33 vtime_account_user(tsk);
34}
35extern void vtime_guest_enter(struct task_struct *tsk);
36extern void vtime_guest_exit(struct task_struct *tsk);
37extern void vtime_init_idle(struct task_struct *tsk);
38#else
39static inline void vtime_account_irq_exit(struct task_struct *tsk)
40{
41 /* On hard|softirq exit we always account to hard|softirq cputime */
42 vtime_account_system(tsk);
43}
44static inline void vtime_user_enter(struct task_struct *tsk) { }
45static inline void vtime_user_exit(struct task_struct *tsk) { }
46static inline void vtime_guest_enter(struct task_struct *tsk) { }
47static inline void vtime_guest_exit(struct task_struct *tsk) { }
48static inline void vtime_init_idle(struct task_struct *tsk) { }
18#endif 49#endif
19 50
20#ifdef CONFIG_IRQ_TIME_ACCOUNTING 51#ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -23,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk);
23static inline void irqtime_account_irq(struct task_struct *tsk) { } 54static inline void irqtime_account_irq(struct task_struct *tsk) { }
24#endif 55#endif
25 56
26static inline void vtime_account_irq_enter(struct task_struct *tsk) 57static inline void account_irq_enter_time(struct task_struct *tsk)
27{ 58{
28 /* 59 vtime_account_irq_enter(tsk);
29 * Hardirq can interrupt idle task anytime. So we need vtime_account()
30 * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING.
31 * Softirq can also interrupt idle task directly if it calls
32 * local_bh_enable(). Such case probably don't exist but we never know.
33 * Ksoftirqd is not concerned because idle time is flushed on context
34 * switch. Softirqs in the end of hardirqs are also not a problem because
35 * the idle time is flushed on hardirq time already.
36 */
37 vtime_account(tsk);
38 irqtime_account_irq(tsk); 60 irqtime_account_irq(tsk);
39} 61}
40 62
41static inline void vtime_account_irq_exit(struct task_struct *tsk) 63static inline void account_irq_exit_time(struct task_struct *tsk)
42{ 64{
43 /* On hard|softirq exit we always account to hard|softirq cputime */ 65 vtime_account_irq_exit(tsk);
44 vtime_account_system(tsk);
45 irqtime_account_irq(tsk); 66 irqtime_account_irq(tsk);
46} 67}
47 68
diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h
new file mode 100644
index 000000000000..88b878383797
--- /dev/null
+++ b/include/trace/events/ras.h
@@ -0,0 +1,77 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM ras
3
4#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_AER_H
6
7#include <linux/tracepoint.h>
8#include <linux/edac.h>
9
10
11/*
12 * PCIe AER Trace event
13 *
14 * These events are generated when hardware detects a corrected or
15 * uncorrected event on a PCIe device. The event report has
16 * the following structure:
17 *
18 * char * dev_name - The name of the slot where the device resides
19 * ([domain:]bus:device.function).
20 * u32 status - Either the correctable or uncorrectable register
21 * indicating what error or errors have been seen
22 * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED
23 */
24
25#define aer_correctable_errors \
26 {BIT(0), "Receiver Error"}, \
27 {BIT(6), "Bad TLP"}, \
28 {BIT(7), "Bad DLLP"}, \
29 {BIT(8), "RELAY_NUM Rollover"}, \
30 {BIT(12), "Replay Timer Timeout"}, \
31 {BIT(13), "Advisory Non-Fatal"}
32
33#define aer_uncorrectable_errors \
34 {BIT(4), "Data Link Protocol"}, \
35 {BIT(12), "Poisoned TLP"}, \
36 {BIT(13), "Flow Control Protocol"}, \
37 {BIT(14), "Completion Timeout"}, \
38 {BIT(15), "Completer Abort"}, \
39 {BIT(16), "Unexpected Completion"}, \
40 {BIT(17), "Receiver Overflow"}, \
41 {BIT(18), "Malformed TLP"}, \
42 {BIT(19), "ECRC"}, \
43 {BIT(20), "Unsupported Request"}
44
45TRACE_EVENT(aer_event,
46 TP_PROTO(const char *dev_name,
47 const u32 status,
48 const u8 severity),
49
50 TP_ARGS(dev_name, status, severity),
51
52 TP_STRUCT__entry(
53 __string( dev_name, dev_name )
54 __field( u32, status )
55 __field( u8, severity )
56 ),
57
58 TP_fast_assign(
59 __assign_str(dev_name, dev_name);
60 __entry->status = status;
61 __entry->severity = severity;
62 ),
63
64 TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
65 __get_str(dev_name),
66 __entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" :
67 __entry->severity == HW_EVENT_ERR_FATAL ?
68 "Fatal" : "Uncorrected",
69 __entry->severity == HW_EVENT_ERR_CORRECTED ?
70 __print_flags(__entry->status, "|", aer_correctable_errors) :
71 __print_flags(__entry->status, "|", aer_uncorrectable_errors))
72);
73
74#endif /* _TRACE_AER_H */
75
76/* This part must be outside protection */
77#include <trace/define_trace.h>
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d4f559b1ec34..1918e832da4f 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -44,8 +44,10 @@ TRACE_EVENT(rcu_utilization,
44 * of a new grace period or the end of an old grace period ("cpustart" 44 * of a new grace period or the end of an old grace period ("cpustart"
45 * and "cpuend", respectively), a CPU passing through a quiescent 45 * and "cpuend", respectively), a CPU passing through a quiescent
46 * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" 46 * state ("cpuqs"), a CPU coming online or going offline ("cpuonl"
47 * and "cpuofl", respectively), and a CPU being kicked for being too 47 * and "cpuofl", respectively), a CPU being kicked for being too
48 * long in dyntick-idle mode ("kick"). 48 * long in dyntick-idle mode ("kick"), a CPU accelerating its new
49 * callbacks to RCU_NEXT_READY_TAIL ("AccReadyCB"), and a CPU
50 * accelerating its new callbacks to RCU_WAIT_TAIL ("AccWaitCB").
49 */ 51 */
50TRACE_EVENT(rcu_grace_period, 52TRACE_EVENT(rcu_grace_period,
51 53
@@ -393,7 +395,7 @@ TRACE_EVENT(rcu_kfree_callback,
393 */ 395 */
394TRACE_EVENT(rcu_batch_start, 396TRACE_EVENT(rcu_batch_start,
395 397
396 TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit), 398 TP_PROTO(char *rcuname, long qlen_lazy, long qlen, long blimit),
397 399
398 TP_ARGS(rcuname, qlen_lazy, qlen, blimit), 400 TP_ARGS(rcuname, qlen_lazy, qlen, blimit),
399 401
@@ -401,7 +403,7 @@ TRACE_EVENT(rcu_batch_start,
401 __field(char *, rcuname) 403 __field(char *, rcuname)
402 __field(long, qlen_lazy) 404 __field(long, qlen_lazy)
403 __field(long, qlen) 405 __field(long, qlen)
404 __field(int, blimit) 406 __field(long, blimit)
405 ), 407 ),
406 408
407 TP_fast_assign( 409 TP_fast_assign(
@@ -411,7 +413,7 @@ TRACE_EVENT(rcu_batch_start,
411 __entry->blimit = blimit; 413 __entry->blimit = blimit;
412 ), 414 ),
413 415
414 TP_printk("%s CBs=%ld/%ld bl=%d", 416 TP_printk("%s CBs=%ld/%ld bl=%ld",
415 __entry->rcuname, __entry->qlen_lazy, __entry->qlen, 417 __entry->rcuname, __entry->qlen_lazy, __entry->qlen,
416 __entry->blimit) 418 __entry->blimit)
417); 419);
@@ -523,22 +525,30 @@ TRACE_EVENT(rcu_batch_end,
523 */ 525 */
524TRACE_EVENT(rcu_torture_read, 526TRACE_EVENT(rcu_torture_read,
525 527
526 TP_PROTO(char *rcutorturename, struct rcu_head *rhp), 528 TP_PROTO(char *rcutorturename, struct rcu_head *rhp,
529 unsigned long secs, unsigned long c_old, unsigned long c),
527 530
528 TP_ARGS(rcutorturename, rhp), 531 TP_ARGS(rcutorturename, rhp, secs, c_old, c),
529 532
530 TP_STRUCT__entry( 533 TP_STRUCT__entry(
531 __field(char *, rcutorturename) 534 __field(char *, rcutorturename)
532 __field(struct rcu_head *, rhp) 535 __field(struct rcu_head *, rhp)
536 __field(unsigned long, secs)
537 __field(unsigned long, c_old)
538 __field(unsigned long, c)
533 ), 539 ),
534 540
535 TP_fast_assign( 541 TP_fast_assign(
536 __entry->rcutorturename = rcutorturename; 542 __entry->rcutorturename = rcutorturename;
537 __entry->rhp = rhp; 543 __entry->rhp = rhp;
544 __entry->secs = secs;
545 __entry->c_old = c_old;
546 __entry->c = c;
538 ), 547 ),
539 548
540 TP_printk("%s torture read %p", 549 TP_printk("%s torture read %p %luus c: %lu %lu",
541 __entry->rcutorturename, __entry->rhp) 550 __entry->rcutorturename, __entry->rhp,
551 __entry->secs, __entry->c_old, __entry->c)
542); 552);
543 553
544/* 554/*
@@ -608,7 +618,8 @@ TRACE_EVENT(rcu_barrier,
608#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) 618#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
609#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ 619#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
610 do { } while (0) 620 do { } while (0)
611#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) 621#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
622 do { } while (0)
612#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0) 623#define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0)
613 624
614#endif /* #else #ifdef CONFIG_RCU_TRACE */ 625#endif /* #else #ifdef CONFIG_RCU_TRACE */
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index 77cdba9df274..bb991dfe134f 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -28,25 +28,16 @@
28#define AUTOFS_MIN_PROTO_VERSION AUTOFS_PROTO_VERSION 28#define AUTOFS_MIN_PROTO_VERSION AUTOFS_PROTO_VERSION
29 29
30/* 30/*
31 * Architectures where both 32- and 64-bit binaries can be executed 31 * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
32 * on 64-bit kernels need this. This keeps the structure format 32 * back to the kernel via ioctl from userspace. On architectures where 32- and
33 * uniform, and makes sure the wait_queue_token isn't too big to be 33 * 64-bit userspace binaries can be executed it's important that the size of
34 * passed back down to the kernel. 34 * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we
35 * 35 * do not break the binary ABI interface by changing the structure size.
36 * This assumes that on these architectures:
37 * mode 32 bit 64 bit
38 * -------------------------
39 * int 32 bit 32 bit
40 * long 32 bit 64 bit
41 *
42 * If so, 32-bit user-space code should be backwards compatible.
43 */ 36 */
44 37#if defined(__ia64__) || defined(__alpha__) /* pure 64bit architectures */
45#if defined(__sparc__) || defined(__mips__) || defined(__x86_64__) \
46 || defined(__powerpc__) || defined(__s390__)
47typedef unsigned int autofs_wqt_t;
48#else
49typedef unsigned long autofs_wqt_t; 38typedef unsigned long autofs_wqt_t;
39#else
40typedef unsigned int autofs_wqt_t;
50#endif 41#endif
51 42
52/* Packet types */ 43/* Packet types */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 4f63c05d27c9..9fa9c622a7f4 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -579,7 +579,8 @@ enum perf_event_type {
579 * { u32 size; 579 * { u32 size;
580 * char data[size];}&& PERF_SAMPLE_RAW 580 * char data[size];}&& PERF_SAMPLE_RAW
581 * 581 *
582 * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK 582 * { u64 nr;
583 * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
583 * 584 *
584 * { u64 abi; # enum perf_sample_regs_abi 585 * { u64 abi; # enum perf_sample_regs_abi
585 * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER 586 * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
diff --git a/init/Kconfig b/init/Kconfig
index be8b7f55312d..7000d9657402 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -20,12 +20,8 @@ config CONSTRUCTORS
20 bool 20 bool
21 depends on !UML 21 depends on !UML
22 22
23config HAVE_IRQ_WORK
24 bool
25
26config IRQ_WORK 23config IRQ_WORK
27 bool 24 bool
28 depends on HAVE_IRQ_WORK
29 25
30config BUILDTIME_EXTABLE_SORT 26config BUILDTIME_EXTABLE_SORT
31 bool 27 bool
@@ -326,10 +322,13 @@ source "kernel/time/Kconfig"
326 322
327menu "CPU/Task time and stats accounting" 323menu "CPU/Task time and stats accounting"
328 324
325config VIRT_CPU_ACCOUNTING
326 bool
327
329choice 328choice
330 prompt "Cputime accounting" 329 prompt "Cputime accounting"
331 default TICK_CPU_ACCOUNTING if !PPC64 330 default TICK_CPU_ACCOUNTING if !PPC64
332 default VIRT_CPU_ACCOUNTING if PPC64 331 default VIRT_CPU_ACCOUNTING_NATIVE if PPC64
333 332
334# Kind of a stub config for the pure tick based cputime accounting 333# Kind of a stub config for the pure tick based cputime accounting
335config TICK_CPU_ACCOUNTING 334config TICK_CPU_ACCOUNTING
@@ -342,9 +341,10 @@ config TICK_CPU_ACCOUNTING
342 341
343 If unsure, say Y. 342 If unsure, say Y.
344 343
345config VIRT_CPU_ACCOUNTING 344config VIRT_CPU_ACCOUNTING_NATIVE
346 bool "Deterministic task and CPU time accounting" 345 bool "Deterministic task and CPU time accounting"
347 depends on HAVE_VIRT_CPU_ACCOUNTING 346 depends on HAVE_VIRT_CPU_ACCOUNTING
347 select VIRT_CPU_ACCOUNTING
348 help 348 help
349 Select this option to enable more accurate task and CPU time 349 Select this option to enable more accurate task and CPU time
350 accounting. This is done by reading a CPU counter on each 350 accounting. This is done by reading a CPU counter on each
@@ -354,6 +354,23 @@ config VIRT_CPU_ACCOUNTING
354 this also enables accounting of stolen time on logically-partitioned 354 this also enables accounting of stolen time on logically-partitioned
355 systems. 355 systems.
356 356
357config VIRT_CPU_ACCOUNTING_GEN
358 bool "Full dynticks CPU time accounting"
359 depends on HAVE_CONTEXT_TRACKING && 64BIT
360 select VIRT_CPU_ACCOUNTING
361 select CONTEXT_TRACKING
362 help
363 Select this option to enable task and CPU time accounting on full
364 dynticks systems. This accounting is implemented by watching every
365 kernel-user boundaries using the context tracking subsystem.
366 The accounting is thus performed at the expense of some significant
367 overhead.
368
369 For now this is only useful if you are working on the full
370 dynticks subsystem development.
371
372 If unsure, say N.
373
357config IRQ_TIME_ACCOUNTING 374config IRQ_TIME_ACCOUNTING
358 bool "Fine granularity task level IRQ time accounting" 375 bool "Fine granularity task level IRQ time accounting"
359 depends on HAVE_IRQ_TIME_ACCOUNTING 376 depends on HAVE_IRQ_TIME_ACCOUNTING
@@ -453,7 +470,7 @@ config TREE_RCU
453 470
454config TREE_PREEMPT_RCU 471config TREE_PREEMPT_RCU
455 bool "Preemptible tree-based hierarchical RCU" 472 bool "Preemptible tree-based hierarchical RCU"
456 depends on PREEMPT && SMP 473 depends on PREEMPT
457 help 474 help
458 This option selects the RCU implementation that is 475 This option selects the RCU implementation that is
459 designed for very large SMP systems with hundreds or 476 designed for very large SMP systems with hundreds or
@@ -461,6 +478,8 @@ config TREE_PREEMPT_RCU
461 is also required. It also scales down nicely to 478 is also required. It also scales down nicely to
462 smaller systems. 479 smaller systems.
463 480
481 Select this option if you are unsure.
482
464config TINY_RCU 483config TINY_RCU
465 bool "UP-only small-memory-footprint RCU" 484 bool "UP-only small-memory-footprint RCU"
466 depends on !PREEMPT && !SMP 485 depends on !PREEMPT && !SMP
@@ -486,6 +505,14 @@ config PREEMPT_RCU
486 This option enables preemptible-RCU code that is common between 505 This option enables preemptible-RCU code that is common between
487 the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. 506 the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
488 507
508config RCU_STALL_COMMON
509 def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
510 help
511 This option enables RCU CPU stall code that is common between
512 the TINY and TREE variants of RCU. The purpose is to allow
513 the tiny variants to disable RCU CPU stall warnings, while
514 making these warnings mandatory for the tree variants.
515
489config CONTEXT_TRACKING 516config CONTEXT_TRACKING
490 bool 517 bool
491 518
@@ -1263,6 +1290,7 @@ config HOTPLUG
1263config PRINTK 1290config PRINTK
1264 default y 1291 default y
1265 bool "Enable support for printk" if EXPERT 1292 bool "Enable support for printk" if EXPERT
1293 select IRQ_WORK
1266 help 1294 help
1267 This option enables normal printk support. Removing it 1295 This option enables normal printk support. Removing it
1268 eliminates most of the message strings from the kernel image 1296 eliminates most of the message strings from the kernel image
diff --git a/init/init_task.c b/init/init_task.c
index 8b2f3996b035..ba0a7f362d9e 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -2,6 +2,8 @@
2#include <linux/export.h> 2#include <linux/export.h>
3#include <linux/mqueue.h> 3#include <linux/mqueue.h>
4#include <linux/sched.h> 4#include <linux/sched.h>
5#include <linux/sched/sysctl.h>
6#include <linux/sched/rt.h>
5#include <linux/init.h> 7#include <linux/init.h>
6#include <linux/fs.h> 8#include <linux/fs.h>
7#include <linux/mm.h> 9#include <linux/mm.h>
diff --git a/kernel/acct.c b/kernel/acct.c
index 051e071a06e7..e8b1627ab9c7 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -566,6 +566,7 @@ out:
566void acct_collect(long exitcode, int group_dead) 566void acct_collect(long exitcode, int group_dead)
567{ 567{
568 struct pacct_struct *pacct = &current->signal->pacct; 568 struct pacct_struct *pacct = &current->signal->pacct;
569 cputime_t utime, stime;
569 unsigned long vsize = 0; 570 unsigned long vsize = 0;
570 571
571 if (group_dead && current->mm) { 572 if (group_dead && current->mm) {
@@ -593,8 +594,9 @@ void acct_collect(long exitcode, int group_dead)
593 pacct->ac_flag |= ACORE; 594 pacct->ac_flag |= ACORE;
594 if (current->flags & PF_SIGNALED) 595 if (current->flags & PF_SIGNALED)
595 pacct->ac_flag |= AXSIG; 596 pacct->ac_flag |= AXSIG;
596 pacct->ac_utime += current->utime; 597 task_cputime(current, &utime, &stime);
597 pacct->ac_stime += current->stime; 598 pacct->ac_utime += utime;
599 pacct->ac_stime += stime;
598 pacct->ac_minflt += current->min_flt; 600 pacct->ac_minflt += current->min_flt;
599 pacct->ac_majflt += current->maj_flt; 601 pacct->ac_majflt += current->maj_flt;
600 spin_unlock_irq(&current->sighand->siglock); 602 spin_unlock_irq(&current->sighand->siglock);
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index e0e07fd55508..65349f07b878 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -1,29 +1,41 @@
1/*
2 * Context tracking: Probe on high level context boundaries such as kernel
3 * and userspace. This includes syscalls and exceptions entry/exit.
4 *
5 * This is used by RCU to remove its dependency on the timer tick while a CPU
6 * runs in userspace.
7 *
8 * Started by Frederic Weisbecker:
9 *
10 * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
11 *
12 * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
13 * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
14 *
15 */
16
1#include <linux/context_tracking.h> 17#include <linux/context_tracking.h>
18#include <linux/kvm_host.h>
2#include <linux/rcupdate.h> 19#include <linux/rcupdate.h>
3#include <linux/sched.h> 20#include <linux/sched.h>
4#include <linux/percpu.h>
5#include <linux/hardirq.h> 21#include <linux/hardirq.h>
22#include <linux/export.h>
6 23
7struct context_tracking { 24DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
8 /*
9 * When active is false, hooks are not set to
10 * minimize overhead: TIF flags are cleared
11 * and calls to user_enter/exit are ignored. This
12 * may be further optimized using static keys.
13 */
14 bool active;
15 enum {
16 IN_KERNEL = 0,
17 IN_USER,
18 } state;
19};
20
21static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
22#ifdef CONFIG_CONTEXT_TRACKING_FORCE 25#ifdef CONFIG_CONTEXT_TRACKING_FORCE
23 .active = true, 26 .active = true,
24#endif 27#endif
25}; 28};
26 29
30/**
31 * user_enter - Inform the context tracking that the CPU is going to
32 * enter userspace mode.
33 *
34 * This function must be called right before we switch from the kernel
35 * to userspace, when it's guaranteed the remaining kernel instructions
36 * to execute won't use any RCU read side critical section because this
37 * function sets RCU in extended quiescent state.
38 */
27void user_enter(void) 39void user_enter(void)
28{ 40{
29 unsigned long flags; 41 unsigned long flags;
@@ -39,40 +51,90 @@ void user_enter(void)
39 if (in_interrupt()) 51 if (in_interrupt())
40 return; 52 return;
41 53
54 /* Kernel threads aren't supposed to go to userspace */
42 WARN_ON_ONCE(!current->mm); 55 WARN_ON_ONCE(!current->mm);
43 56
44 local_irq_save(flags); 57 local_irq_save(flags);
45 if (__this_cpu_read(context_tracking.active) && 58 if (__this_cpu_read(context_tracking.active) &&
46 __this_cpu_read(context_tracking.state) != IN_USER) { 59 __this_cpu_read(context_tracking.state) != IN_USER) {
47 __this_cpu_write(context_tracking.state, IN_USER); 60 /*
61 * At this stage, only low level arch entry code remains and
62 * then we'll run in userspace. We can assume there won't be
63 * any RCU read-side critical section until the next call to
64 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
65 * on the tick.
66 */
67 vtime_user_enter(current);
48 rcu_user_enter(); 68 rcu_user_enter();
69 __this_cpu_write(context_tracking.state, IN_USER);
49 } 70 }
50 local_irq_restore(flags); 71 local_irq_restore(flags);
51} 72}
52 73
74
75/**
76 * user_exit - Inform the context tracking that the CPU is
77 * exiting userspace mode and entering the kernel.
78 *
79 * This function must be called after we entered the kernel from userspace
80 * before any use of RCU read side critical section. This potentially include
81 * any high level kernel code like syscalls, exceptions, signal handling, etc...
82 *
83 * This call supports re-entrancy. This way it can be called from any exception
84 * handler without needing to know if we came from userspace or not.
85 */
53void user_exit(void) 86void user_exit(void)
54{ 87{
55 unsigned long flags; 88 unsigned long flags;
56 89
57 /*
58 * Some contexts may involve an exception occuring in an irq,
59 * leading to that nesting:
60 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
61 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
62 * helpers are enough to protect RCU uses inside the exception. So
63 * just return immediately if we detect we are in an IRQ.
64 */
65 if (in_interrupt()) 90 if (in_interrupt())
66 return; 91 return;
67 92
68 local_irq_save(flags); 93 local_irq_save(flags);
69 if (__this_cpu_read(context_tracking.state) == IN_USER) { 94 if (__this_cpu_read(context_tracking.state) == IN_USER) {
70 __this_cpu_write(context_tracking.state, IN_KERNEL); 95 /*
96 * We are going to run code that may use RCU. Inform
97 * RCU core about that (ie: we may need the tick again).
98 */
71 rcu_user_exit(); 99 rcu_user_exit();
100 vtime_user_exit(current);
101 __this_cpu_write(context_tracking.state, IN_KERNEL);
72 } 102 }
73 local_irq_restore(flags); 103 local_irq_restore(flags);
74} 104}
75 105
106void guest_enter(void)
107{
108 if (vtime_accounting_enabled())
109 vtime_guest_enter(current);
110 else
111 __guest_enter();
112}
113EXPORT_SYMBOL_GPL(guest_enter);
114
115void guest_exit(void)
116{
117 if (vtime_accounting_enabled())
118 vtime_guest_exit(current);
119 else
120 __guest_exit();
121}
122EXPORT_SYMBOL_GPL(guest_exit);
123
124
125/**
126 * context_tracking_task_switch - context switch the syscall callbacks
127 * @prev: the task that is being switched out
128 * @next: the task that is being switched in
129 *
130 * The context tracking uses the syscall slow path to implement its user-kernel
131 * boundaries probes on syscalls. This way it doesn't impact the syscall fast
132 * path on CPUs that don't do context tracking.
133 *
134 * But we need to clear the flag on the previous task because it may later
135 * migrate to some CPU that doesn't do the context tracking. As such the TIF
136 * flag may not be desired there.
137 */
76void context_tracking_task_switch(struct task_struct *prev, 138void context_tracking_task_switch(struct task_struct *prev,
77 struct task_struct *next) 139 struct task_struct *next)
78{ 140{
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3046a503242c..b5e4ab2d427e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -224,11 +224,13 @@ void clear_tasks_mm_cpumask(int cpu)
224static inline void check_for_tasks(int cpu) 224static inline void check_for_tasks(int cpu)
225{ 225{
226 struct task_struct *p; 226 struct task_struct *p;
227 cputime_t utime, stime;
227 228
228 write_lock_irq(&tasklist_lock); 229 write_lock_irq(&tasklist_lock);
229 for_each_process(p) { 230 for_each_process(p) {
231 task_cputime(p, &utime, &stime);
230 if (task_cpu(p) == cpu && p->state == TASK_RUNNING && 232 if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
231 (p->utime || p->stime)) 233 (utime || stime))
232 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d " 234 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
233 "(state = %ld, flags = %x)\n", 235 "(state = %ld, flags = %x)\n",
234 p->comm, task_pid_nr(p), cpu, 236 p->comm, task_pid_nr(p), cpu,
@@ -254,6 +256,8 @@ static int __ref take_cpu_down(void *_param)
254 return err; 256 return err;
255 257
256 cpu_notify(CPU_DYING | param->mod, param->hcpu); 258 cpu_notify(CPU_DYING | param->mod, param->hcpu);
259 /* Park the stopper thread */
260 kthread_park(current);
257 return 0; 261 return 0;
258} 262}
259 263
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 418b3f7053aa..d473988c1d0b 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -106,6 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
106 unsigned long long t2, t3; 106 unsigned long long t2, t3;
107 unsigned long flags; 107 unsigned long flags;
108 struct timespec ts; 108 struct timespec ts;
109 cputime_t utime, stime, stimescaled, utimescaled;
109 110
110 /* Though tsk->delays accessed later, early exit avoids 111 /* Though tsk->delays accessed later, early exit avoids
111 * unnecessary returning of other data 112 * unnecessary returning of other data
@@ -114,12 +115,14 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
114 goto done; 115 goto done;
115 116
116 tmp = (s64)d->cpu_run_real_total; 117 tmp = (s64)d->cpu_run_real_total;
117 cputime_to_timespec(tsk->utime + tsk->stime, &ts); 118 task_cputime(tsk, &utime, &stime);
119 cputime_to_timespec(utime + stime, &ts);
118 tmp += timespec_to_ns(&ts); 120 tmp += timespec_to_ns(&ts);
119 d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; 121 d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
120 122
121 tmp = (s64)d->cpu_scaled_run_real_total; 123 tmp = (s64)d->cpu_scaled_run_real_total;
122 cputime_to_timespec(tsk->utimescaled + tsk->stimescaled, &ts); 124 task_cputime_scaled(tsk, &utimescaled, &stimescaled);
125 cputime_to_timespec(utimescaled + stimescaled, &ts);
123 tmp += timespec_to_ns(&ts); 126 tmp += timespec_to_ns(&ts);
124 d->cpu_scaled_run_real_total = 127 d->cpu_scaled_run_real_total =
125 (tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp; 128 (tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7b6646a8c067..5c75791d7269 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6171,11 +6171,14 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
6171 6171
6172 if (task) { 6172 if (task) {
6173 event->attach_state = PERF_ATTACH_TASK; 6173 event->attach_state = PERF_ATTACH_TASK;
6174
6175 if (attr->type == PERF_TYPE_TRACEPOINT)
6176 event->hw.tp_target = task;
6174#ifdef CONFIG_HAVE_HW_BREAKPOINT 6177#ifdef CONFIG_HAVE_HW_BREAKPOINT
6175 /* 6178 /*
6176 * hw_breakpoint is a bit difficult here.. 6179 * hw_breakpoint is a bit difficult here..
6177 */ 6180 */
6178 if (attr->type == PERF_TYPE_BREAKPOINT) 6181 else if (attr->type == PERF_TYPE_BREAKPOINT)
6179 event->hw.bp_target = task; 6182 event->hw.bp_target = task;
6180#endif 6183#endif
6181 } 6184 }
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index fe8a916507ed..a64f8aeb5c1f 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -676,7 +676,7 @@ int __init init_hw_breakpoint(void)
676 err_alloc: 676 err_alloc:
677 for_each_possible_cpu(err_cpu) { 677 for_each_possible_cpu(err_cpu) {
678 for (i = 0; i < TYPE_MAX; i++) 678 for (i = 0; i < TYPE_MAX; i++)
679 kfree(per_cpu(nr_task_bp_pinned[i], cpu)); 679 kfree(per_cpu(nr_task_bp_pinned[i], err_cpu));
680 if (err_cpu == cpu) 680 if (err_cpu == cpu)
681 break; 681 break;
682 } 682 }
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index dea7acfbb071..a567c8c7ef31 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -27,6 +27,7 @@
27#include <linux/pagemap.h> /* read_mapping_page */ 27#include <linux/pagemap.h> /* read_mapping_page */
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/sched.h> 29#include <linux/sched.h>
30#include <linux/export.h>
30#include <linux/rmap.h> /* anon_vma_prepare */ 31#include <linux/rmap.h> /* anon_vma_prepare */
31#include <linux/mmu_notifier.h> /* set_pte_at_notify */ 32#include <linux/mmu_notifier.h> /* set_pte_at_notify */
32#include <linux/swap.h> /* try_to_free_swap */ 33#include <linux/swap.h> /* try_to_free_swap */
@@ -41,58 +42,31 @@
41#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE 42#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE
42 43
43static struct rb_root uprobes_tree = RB_ROOT; 44static struct rb_root uprobes_tree = RB_ROOT;
44
45static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
46
47#define UPROBES_HASH_SZ 13
48
49/* 45/*
50 * We need separate register/unregister and mmap/munmap lock hashes because 46 * allows us to skip the uprobe_mmap if there are no uprobe events active
51 * of mmap_sem nesting. 47 * at this time. Probably a fine grained per inode count is better?
52 *
53 * uprobe_register() needs to install probes on (potentially) all processes
54 * and thus needs to acquire multiple mmap_sems (consequtively, not
55 * concurrently), whereas uprobe_mmap() is called while holding mmap_sem
56 * for the particular process doing the mmap.
57 *
58 * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem
59 * because of lock order against i_mmap_mutex. This means there's a hole in
60 * the register vma iteration where a mmap() can happen.
61 *
62 * Thus uprobe_register() can race with uprobe_mmap() and we can try and
63 * install a probe where one is already installed.
64 */ 48 */
49#define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree)
65 50
66/* serialize (un)register */ 51static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
67static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
68
69#define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
70 52
53#define UPROBES_HASH_SZ 13
71/* serialize uprobe->pending_list */ 54/* serialize uprobe->pending_list */
72static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; 55static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
73#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) 56#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
74 57
75static struct percpu_rw_semaphore dup_mmap_sem; 58static struct percpu_rw_semaphore dup_mmap_sem;
76 59
77/*
78 * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
79 * events active at this time. Probably a fine grained per inode count is
80 * better?
81 */
82static atomic_t uprobe_events = ATOMIC_INIT(0);
83
84/* Have a copy of original instruction */ 60/* Have a copy of original instruction */
85#define UPROBE_COPY_INSN 0 61#define UPROBE_COPY_INSN 0
86/* Dont run handlers when first register/ last unregister in progress*/
87#define UPROBE_RUN_HANDLER 1
88/* Can skip singlestep */ 62/* Can skip singlestep */
89#define UPROBE_SKIP_SSTEP 2 63#define UPROBE_SKIP_SSTEP 1
90 64
91struct uprobe { 65struct uprobe {
92 struct rb_node rb_node; /* node in the rb tree */ 66 struct rb_node rb_node; /* node in the rb tree */
93 atomic_t ref; 67 atomic_t ref;
68 struct rw_semaphore register_rwsem;
94 struct rw_semaphore consumer_rwsem; 69 struct rw_semaphore consumer_rwsem;
95 struct mutex copy_mutex; /* TODO: kill me and UPROBE_COPY_INSN */
96 struct list_head pending_list; 70 struct list_head pending_list;
97 struct uprobe_consumer *consumers; 71 struct uprobe_consumer *consumers;
98 struct inode *inode; /* Also hold a ref to inode */ 72 struct inode *inode; /* Also hold a ref to inode */
@@ -430,9 +404,6 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
430 u = __insert_uprobe(uprobe); 404 u = __insert_uprobe(uprobe);
431 spin_unlock(&uprobes_treelock); 405 spin_unlock(&uprobes_treelock);
432 406
433 /* For now assume that the instruction need not be single-stepped */
434 __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
435
436 return u; 407 return u;
437} 408}
438 409
@@ -452,8 +423,10 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
452 423
453 uprobe->inode = igrab(inode); 424 uprobe->inode = igrab(inode);
454 uprobe->offset = offset; 425 uprobe->offset = offset;
426 init_rwsem(&uprobe->register_rwsem);
455 init_rwsem(&uprobe->consumer_rwsem); 427 init_rwsem(&uprobe->consumer_rwsem);
456 mutex_init(&uprobe->copy_mutex); 428 /* For now assume that the instruction need not be single-stepped */
429 __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
457 430
458 /* add to uprobes_tree, sorted on inode:offset */ 431 /* add to uprobes_tree, sorted on inode:offset */
459 cur_uprobe = insert_uprobe(uprobe); 432 cur_uprobe = insert_uprobe(uprobe);
@@ -463,38 +436,17 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
463 kfree(uprobe); 436 kfree(uprobe);
464 uprobe = cur_uprobe; 437 uprobe = cur_uprobe;
465 iput(inode); 438 iput(inode);
466 } else {
467 atomic_inc(&uprobe_events);
468 } 439 }
469 440
470 return uprobe; 441 return uprobe;
471} 442}
472 443
473static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) 444static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
474{
475 struct uprobe_consumer *uc;
476
477 if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags))
478 return;
479
480 down_read(&uprobe->consumer_rwsem);
481 for (uc = uprobe->consumers; uc; uc = uc->next) {
482 if (!uc->filter || uc->filter(uc, current))
483 uc->handler(uc, regs);
484 }
485 up_read(&uprobe->consumer_rwsem);
486}
487
488/* Returns the previous consumer */
489static struct uprobe_consumer *
490consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
491{ 445{
492 down_write(&uprobe->consumer_rwsem); 446 down_write(&uprobe->consumer_rwsem);
493 uc->next = uprobe->consumers; 447 uc->next = uprobe->consumers;
494 uprobe->consumers = uc; 448 uprobe->consumers = uc;
495 up_write(&uprobe->consumer_rwsem); 449 up_write(&uprobe->consumer_rwsem);
496
497 return uc->next;
498} 450}
499 451
500/* 452/*
@@ -588,7 +540,8 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
588 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) 540 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
589 return ret; 541 return ret;
590 542
591 mutex_lock(&uprobe->copy_mutex); 543 /* TODO: move this into _register, until then we abuse this sem. */
544 down_write(&uprobe->consumer_rwsem);
592 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) 545 if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
593 goto out; 546 goto out;
594 547
@@ -612,7 +565,30 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
612 set_bit(UPROBE_COPY_INSN, &uprobe->flags); 565 set_bit(UPROBE_COPY_INSN, &uprobe->flags);
613 566
614 out: 567 out:
615 mutex_unlock(&uprobe->copy_mutex); 568 up_write(&uprobe->consumer_rwsem);
569
570 return ret;
571}
572
573static inline bool consumer_filter(struct uprobe_consumer *uc,
574 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
575{
576 return !uc->filter || uc->filter(uc, ctx, mm);
577}
578
579static bool filter_chain(struct uprobe *uprobe,
580 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
581{
582 struct uprobe_consumer *uc;
583 bool ret = false;
584
585 down_read(&uprobe->consumer_rwsem);
586 for (uc = uprobe->consumers; uc; uc = uc->next) {
587 ret = consumer_filter(uc, ctx, mm);
588 if (ret)
589 break;
590 }
591 up_read(&uprobe->consumer_rwsem);
616 592
617 return ret; 593 return ret;
618} 594}
@@ -624,16 +600,6 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
624 bool first_uprobe; 600 bool first_uprobe;
625 int ret; 601 int ret;
626 602
627 /*
628 * If probe is being deleted, unregister thread could be done with
629 * the vma-rmap-walk through. Adding a probe now can be fatal since
630 * nobody will be able to cleanup. Also we could be from fork or
631 * mremap path, where the probe might have already been inserted.
632 * Hence behave as if probe already existed.
633 */
634 if (!uprobe->consumers)
635 return 0;
636
637 ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); 603 ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr);
638 if (ret) 604 if (ret)
639 return ret; 605 return ret;
@@ -658,14 +624,14 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
658static int 624static int
659remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) 625remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
660{ 626{
661 /* can happen if uprobe_register() fails */
662 if (!test_bit(MMF_HAS_UPROBES, &mm->flags))
663 return 0;
664
665 set_bit(MMF_RECALC_UPROBES, &mm->flags); 627 set_bit(MMF_RECALC_UPROBES, &mm->flags);
666 return set_orig_insn(&uprobe->arch, mm, vaddr); 628 return set_orig_insn(&uprobe->arch, mm, vaddr);
667} 629}
668 630
631static inline bool uprobe_is_active(struct uprobe *uprobe)
632{
633 return !RB_EMPTY_NODE(&uprobe->rb_node);
634}
669/* 635/*
670 * There could be threads that have already hit the breakpoint. They 636 * There could be threads that have already hit the breakpoint. They
671 * will recheck the current insn and restart if find_uprobe() fails. 637 * will recheck the current insn and restart if find_uprobe() fails.
@@ -673,12 +639,15 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad
673 */ 639 */
674static void delete_uprobe(struct uprobe *uprobe) 640static void delete_uprobe(struct uprobe *uprobe)
675{ 641{
642 if (WARN_ON(!uprobe_is_active(uprobe)))
643 return;
644
676 spin_lock(&uprobes_treelock); 645 spin_lock(&uprobes_treelock);
677 rb_erase(&uprobe->rb_node, &uprobes_tree); 646 rb_erase(&uprobe->rb_node, &uprobes_tree);
678 spin_unlock(&uprobes_treelock); 647 spin_unlock(&uprobes_treelock);
648 RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */
679 iput(uprobe->inode); 649 iput(uprobe->inode);
680 put_uprobe(uprobe); 650 put_uprobe(uprobe);
681 atomic_dec(&uprobe_events);
682} 651}
683 652
684struct map_info { 653struct map_info {
@@ -764,8 +733,10 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
764 return curr; 733 return curr;
765} 734}
766 735
767static int register_for_each_vma(struct uprobe *uprobe, bool is_register) 736static int
737register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
768{ 738{
739 bool is_register = !!new;
769 struct map_info *info; 740 struct map_info *info;
770 int err = 0; 741 int err = 0;
771 742
@@ -794,10 +765,16 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
794 vaddr_to_offset(vma, info->vaddr) != uprobe->offset) 765 vaddr_to_offset(vma, info->vaddr) != uprobe->offset)
795 goto unlock; 766 goto unlock;
796 767
797 if (is_register) 768 if (is_register) {
798 err = install_breakpoint(uprobe, mm, vma, info->vaddr); 769 /* consult only the "caller", new consumer. */
799 else 770 if (consumer_filter(new,
800 err |= remove_breakpoint(uprobe, mm, info->vaddr); 771 UPROBE_FILTER_REGISTER, mm))
772 err = install_breakpoint(uprobe, mm, vma, info->vaddr);
773 } else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) {
774 if (!filter_chain(uprobe,
775 UPROBE_FILTER_UNREGISTER, mm))
776 err |= remove_breakpoint(uprobe, mm, info->vaddr);
777 }
801 778
802 unlock: 779 unlock:
803 up_write(&mm->mmap_sem); 780 up_write(&mm->mmap_sem);
@@ -810,17 +787,23 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
810 return err; 787 return err;
811} 788}
812 789
813static int __uprobe_register(struct uprobe *uprobe) 790static int __uprobe_register(struct uprobe *uprobe, struct uprobe_consumer *uc)
814{ 791{
815 return register_for_each_vma(uprobe, true); 792 consumer_add(uprobe, uc);
793 return register_for_each_vma(uprobe, uc);
816} 794}
817 795
818static void __uprobe_unregister(struct uprobe *uprobe) 796static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
819{ 797{
820 if (!register_for_each_vma(uprobe, false)) 798 int err;
821 delete_uprobe(uprobe); 799
800 if (!consumer_del(uprobe, uc)) /* WARN? */
801 return;
822 802
803 err = register_for_each_vma(uprobe, NULL);
823 /* TODO : cant unregister? schedule a worker thread */ 804 /* TODO : cant unregister? schedule a worker thread */
805 if (!uprobe->consumers && !err)
806 delete_uprobe(uprobe);
824} 807}
825 808
826/* 809/*
@@ -845,31 +828,59 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
845 struct uprobe *uprobe; 828 struct uprobe *uprobe;
846 int ret; 829 int ret;
847 830
848 if (!inode || !uc || uc->next) 831 /* Racy, just to catch the obvious mistakes */
849 return -EINVAL;
850
851 if (offset > i_size_read(inode)) 832 if (offset > i_size_read(inode))
852 return -EINVAL; 833 return -EINVAL;
853 834
854 ret = 0; 835 retry:
855 mutex_lock(uprobes_hash(inode));
856 uprobe = alloc_uprobe(inode, offset); 836 uprobe = alloc_uprobe(inode, offset);
857 837 if (!uprobe)
858 if (!uprobe) { 838 return -ENOMEM;
859 ret = -ENOMEM; 839 /*
860 } else if (!consumer_add(uprobe, uc)) { 840 * We can race with uprobe_unregister()->delete_uprobe().
861 ret = __uprobe_register(uprobe); 841 * Check uprobe_is_active() and retry if it is false.
862 if (ret) { 842 */
863 uprobe->consumers = NULL; 843 down_write(&uprobe->register_rwsem);
864 __uprobe_unregister(uprobe); 844 ret = -EAGAIN;
865 } else { 845 if (likely(uprobe_is_active(uprobe))) {
866 set_bit(UPROBE_RUN_HANDLER, &uprobe->flags); 846 ret = __uprobe_register(uprobe, uc);
867 } 847 if (ret)
848 __uprobe_unregister(uprobe, uc);
868 } 849 }
850 up_write(&uprobe->register_rwsem);
851 put_uprobe(uprobe);
869 852
870 mutex_unlock(uprobes_hash(inode)); 853 if (unlikely(ret == -EAGAIN))
871 if (uprobe) 854 goto retry;
872 put_uprobe(uprobe); 855 return ret;
856}
857EXPORT_SYMBOL_GPL(uprobe_register);
858
859/*
860 * uprobe_apply - unregister a already registered probe.
861 * @inode: the file in which the probe has to be removed.
862 * @offset: offset from the start of the file.
863 * @uc: consumer which wants to add more or remove some breakpoints
864 * @add: add or remove the breakpoints
865 */
866int uprobe_apply(struct inode *inode, loff_t offset,
867 struct uprobe_consumer *uc, bool add)
868{
869 struct uprobe *uprobe;
870 struct uprobe_consumer *con;
871 int ret = -ENOENT;
872
873 uprobe = find_uprobe(inode, offset);
874 if (!uprobe)
875 return ret;
876
877 down_write(&uprobe->register_rwsem);
878 for (con = uprobe->consumers; con && con != uc ; con = con->next)
879 ;
880 if (con)
881 ret = register_for_each_vma(uprobe, add ? uc : NULL);
882 up_write(&uprobe->register_rwsem);
883 put_uprobe(uprobe);
873 884
874 return ret; 885 return ret;
875} 886}
@@ -884,25 +895,42 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
884{ 895{
885 struct uprobe *uprobe; 896 struct uprobe *uprobe;
886 897
887 if (!inode || !uc)
888 return;
889
890 uprobe = find_uprobe(inode, offset); 898 uprobe = find_uprobe(inode, offset);
891 if (!uprobe) 899 if (!uprobe)
892 return; 900 return;
893 901
894 mutex_lock(uprobes_hash(inode)); 902 down_write(&uprobe->register_rwsem);
903 __uprobe_unregister(uprobe, uc);
904 up_write(&uprobe->register_rwsem);
905 put_uprobe(uprobe);
906}
907EXPORT_SYMBOL_GPL(uprobe_unregister);
895 908
896 if (consumer_del(uprobe, uc)) { 909static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
897 if (!uprobe->consumers) { 910{
898 __uprobe_unregister(uprobe); 911 struct vm_area_struct *vma;
899 clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags); 912 int err = 0;
900 } 913
914 down_read(&mm->mmap_sem);
915 for (vma = mm->mmap; vma; vma = vma->vm_next) {
916 unsigned long vaddr;
917 loff_t offset;
918
919 if (!valid_vma(vma, false) ||
920 vma->vm_file->f_mapping->host != uprobe->inode)
921 continue;
922
923 offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
924 if (uprobe->offset < offset ||
925 uprobe->offset >= offset + vma->vm_end - vma->vm_start)
926 continue;
927
928 vaddr = offset_to_vaddr(vma, uprobe->offset);
929 err |= remove_breakpoint(uprobe, mm, vaddr);
901 } 930 }
931 up_read(&mm->mmap_sem);
902 932
903 mutex_unlock(uprobes_hash(inode)); 933 return err;
904 if (uprobe)
905 put_uprobe(uprobe);
906} 934}
907 935
908static struct rb_node * 936static struct rb_node *
@@ -979,7 +1007,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
979 struct uprobe *uprobe, *u; 1007 struct uprobe *uprobe, *u;
980 struct inode *inode; 1008 struct inode *inode;
981 1009
982 if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) 1010 if (no_uprobe_events() || !valid_vma(vma, true))
983 return 0; 1011 return 0;
984 1012
985 inode = vma->vm_file->f_mapping->host; 1013 inode = vma->vm_file->f_mapping->host;
@@ -988,9 +1016,14 @@ int uprobe_mmap(struct vm_area_struct *vma)
988 1016
989 mutex_lock(uprobes_mmap_hash(inode)); 1017 mutex_lock(uprobes_mmap_hash(inode));
990 build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); 1018 build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list);
991 1019 /*
1020 * We can race with uprobe_unregister(), this uprobe can be already
1021 * removed. But in this case filter_chain() must return false, all
1022 * consumers have gone away.
1023 */
992 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { 1024 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
993 if (!fatal_signal_pending(current)) { 1025 if (!fatal_signal_pending(current) &&
1026 filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) {
994 unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); 1027 unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
995 install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); 1028 install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
996 } 1029 }
@@ -1025,7 +1058,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e
1025 */ 1058 */
1026void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) 1059void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1027{ 1060{
1028 if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) 1061 if (no_uprobe_events() || !valid_vma(vma, false))
1029 return; 1062 return;
1030 1063
1031 if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ 1064 if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
@@ -1042,22 +1075,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
1042/* Slot allocation for XOL */ 1075/* Slot allocation for XOL */
1043static int xol_add_vma(struct xol_area *area) 1076static int xol_add_vma(struct xol_area *area)
1044{ 1077{
1045 struct mm_struct *mm; 1078 struct mm_struct *mm = current->mm;
1046 int ret; 1079 int ret = -EALREADY;
1047
1048 area->page = alloc_page(GFP_HIGHUSER);
1049 if (!area->page)
1050 return -ENOMEM;
1051
1052 ret = -EALREADY;
1053 mm = current->mm;
1054 1080
1055 down_write(&mm->mmap_sem); 1081 down_write(&mm->mmap_sem);
1056 if (mm->uprobes_state.xol_area) 1082 if (mm->uprobes_state.xol_area)
1057 goto fail; 1083 goto fail;
1058 1084
1059 ret = -ENOMEM; 1085 ret = -ENOMEM;
1060
1061 /* Try to map as high as possible, this is only a hint. */ 1086 /* Try to map as high as possible, this is only a hint. */
1062 area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); 1087 area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0);
1063 if (area->vaddr & ~PAGE_MASK) { 1088 if (area->vaddr & ~PAGE_MASK) {
@@ -1073,54 +1098,53 @@ static int xol_add_vma(struct xol_area *area)
1073 smp_wmb(); /* pairs with get_xol_area() */ 1098 smp_wmb(); /* pairs with get_xol_area() */
1074 mm->uprobes_state.xol_area = area; 1099 mm->uprobes_state.xol_area = area;
1075 ret = 0; 1100 ret = 0;
1076 1101 fail:
1077fail:
1078 up_write(&mm->mmap_sem); 1102 up_write(&mm->mmap_sem);
1079 if (ret)
1080 __free_page(area->page);
1081 1103
1082 return ret; 1104 return ret;
1083} 1105}
1084 1106
1085static struct xol_area *get_xol_area(struct mm_struct *mm)
1086{
1087 struct xol_area *area;
1088
1089 area = mm->uprobes_state.xol_area;
1090 smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */
1091
1092 return area;
1093}
1094
1095/* 1107/*
1096 * xol_alloc_area - Allocate process's xol_area. 1108 * get_xol_area - Allocate process's xol_area if necessary.
1097 * This area will be used for storing instructions for execution out of 1109 * This area will be used for storing instructions for execution out of line.
1098 * line.
1099 * 1110 *
1100 * Returns the allocated area or NULL. 1111 * Returns the allocated area or NULL.
1101 */ 1112 */
1102static struct xol_area *xol_alloc_area(void) 1113static struct xol_area *get_xol_area(void)
1103{ 1114{
1115 struct mm_struct *mm = current->mm;
1104 struct xol_area *area; 1116 struct xol_area *area;
1105 1117
1118 area = mm->uprobes_state.xol_area;
1119 if (area)
1120 goto ret;
1121
1106 area = kzalloc(sizeof(*area), GFP_KERNEL); 1122 area = kzalloc(sizeof(*area), GFP_KERNEL);
1107 if (unlikely(!area)) 1123 if (unlikely(!area))
1108 return NULL; 1124 goto out;
1109 1125
1110 area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL); 1126 area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL);
1111
1112 if (!area->bitmap) 1127 if (!area->bitmap)
1113 goto fail; 1128 goto free_area;
1129
1130 area->page = alloc_page(GFP_HIGHUSER);
1131 if (!area->page)
1132 goto free_bitmap;
1114 1133
1115 init_waitqueue_head(&area->wq); 1134 init_waitqueue_head(&area->wq);
1116 if (!xol_add_vma(area)) 1135 if (!xol_add_vma(area))
1117 return area; 1136 return area;
1118 1137
1119fail: 1138 __free_page(area->page);
1139 free_bitmap:
1120 kfree(area->bitmap); 1140 kfree(area->bitmap);
1141 free_area:
1121 kfree(area); 1142 kfree(area);
1122 1143 out:
1123 return get_xol_area(current->mm); 1144 area = mm->uprobes_state.xol_area;
1145 ret:
1146 smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */
1147 return area;
1124} 1148}
1125 1149
1126/* 1150/*
@@ -1186,33 +1210,26 @@ static unsigned long xol_take_insn_slot(struct xol_area *area)
1186} 1210}
1187 1211
1188/* 1212/*
1189 * xol_get_insn_slot - If was not allocated a slot, then 1213 * xol_get_insn_slot - allocate a slot for xol.
1190 * allocate a slot.
1191 * Returns the allocated slot address or 0. 1214 * Returns the allocated slot address or 0.
1192 */ 1215 */
1193static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr) 1216static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
1194{ 1217{
1195 struct xol_area *area; 1218 struct xol_area *area;
1196 unsigned long offset; 1219 unsigned long offset;
1220 unsigned long xol_vaddr;
1197 void *vaddr; 1221 void *vaddr;
1198 1222
1199 area = get_xol_area(current->mm); 1223 area = get_xol_area();
1200 if (!area) { 1224 if (!area)
1201 area = xol_alloc_area(); 1225 return 0;
1202 if (!area)
1203 return 0;
1204 }
1205 current->utask->xol_vaddr = xol_take_insn_slot(area);
1206 1226
1207 /* 1227 xol_vaddr = xol_take_insn_slot(area);
1208 * Initialize the slot if xol_vaddr points to valid 1228 if (unlikely(!xol_vaddr))
1209 * instruction slot.
1210 */
1211 if (unlikely(!current->utask->xol_vaddr))
1212 return 0; 1229 return 0;
1213 1230
1214 current->utask->vaddr = slot_addr; 1231 /* Initialize the slot */
1215 offset = current->utask->xol_vaddr & ~PAGE_MASK; 1232 offset = xol_vaddr & ~PAGE_MASK;
1216 vaddr = kmap_atomic(area->page); 1233 vaddr = kmap_atomic(area->page);
1217 memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES); 1234 memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES);
1218 kunmap_atomic(vaddr); 1235 kunmap_atomic(vaddr);
@@ -1222,7 +1239,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot
1222 */ 1239 */
1223 flush_dcache_page(area->page); 1240 flush_dcache_page(area->page);
1224 1241
1225 return current->utask->xol_vaddr; 1242 return xol_vaddr;
1226} 1243}
1227 1244
1228/* 1245/*
@@ -1240,8 +1257,7 @@ static void xol_free_insn_slot(struct task_struct *tsk)
1240 return; 1257 return;
1241 1258
1242 slot_addr = tsk->utask->xol_vaddr; 1259 slot_addr = tsk->utask->xol_vaddr;
1243 1260 if (unlikely(!slot_addr))
1244 if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr)))
1245 return; 1261 return;
1246 1262
1247 area = tsk->mm->uprobes_state.xol_area; 1263 area = tsk->mm->uprobes_state.xol_area;
@@ -1303,33 +1319,48 @@ void uprobe_copy_process(struct task_struct *t)
1303} 1319}
1304 1320
1305/* 1321/*
1306 * Allocate a uprobe_task object for the task. 1322 * Allocate a uprobe_task object for the task if if necessary.
1307 * Called when the thread hits a breakpoint for the first time. 1323 * Called when the thread hits a breakpoint.
1308 * 1324 *
1309 * Returns: 1325 * Returns:
1310 * - pointer to new uprobe_task on success 1326 * - pointer to new uprobe_task on success
1311 * - NULL otherwise 1327 * - NULL otherwise
1312 */ 1328 */
1313static struct uprobe_task *add_utask(void) 1329static struct uprobe_task *get_utask(void)
1314{ 1330{
1315 struct uprobe_task *utask; 1331 if (!current->utask)
1316 1332 current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
1317 utask = kzalloc(sizeof *utask, GFP_KERNEL); 1333 return current->utask;
1318 if (unlikely(!utask))
1319 return NULL;
1320
1321 current->utask = utask;
1322 return utask;
1323} 1334}
1324 1335
1325/* Prepare to single-step probed instruction out of line. */ 1336/* Prepare to single-step probed instruction out of line. */
1326static int 1337static int
1327pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr) 1338pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
1328{ 1339{
1329 if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs)) 1340 struct uprobe_task *utask;
1330 return 0; 1341 unsigned long xol_vaddr;
1342 int err;
1343
1344 utask = get_utask();
1345 if (!utask)
1346 return -ENOMEM;
1347
1348 xol_vaddr = xol_get_insn_slot(uprobe);
1349 if (!xol_vaddr)
1350 return -ENOMEM;
1351
1352 utask->xol_vaddr = xol_vaddr;
1353 utask->vaddr = bp_vaddr;
1354
1355 err = arch_uprobe_pre_xol(&uprobe->arch, regs);
1356 if (unlikely(err)) {
1357 xol_free_insn_slot(current);
1358 return err;
1359 }
1331 1360
1332 return -EFAULT; 1361 utask->active_uprobe = uprobe;
1362 utask->state = UTASK_SSTEP;
1363 return 0;
1333} 1364}
1334 1365
1335/* 1366/*
@@ -1391,6 +1422,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm)
1391 * This is not strictly accurate, we can race with 1422 * This is not strictly accurate, we can race with
1392 * uprobe_unregister() and see the already removed 1423 * uprobe_unregister() and see the already removed
1393 * uprobe if delete_uprobe() was not yet called. 1424 * uprobe if delete_uprobe() was not yet called.
1425 * Or this uprobe can be filtered out.
1394 */ 1426 */
1395 if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) 1427 if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end))
1396 return; 1428 return;
@@ -1452,13 +1484,33 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
1452 return uprobe; 1484 return uprobe;
1453} 1485}
1454 1486
1487static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
1488{
1489 struct uprobe_consumer *uc;
1490 int remove = UPROBE_HANDLER_REMOVE;
1491
1492 down_read(&uprobe->register_rwsem);
1493 for (uc = uprobe->consumers; uc; uc = uc->next) {
1494 int rc = uc->handler(uc, regs);
1495
1496 WARN(rc & ~UPROBE_HANDLER_MASK,
1497 "bad rc=0x%x from %pf()\n", rc, uc->handler);
1498 remove &= rc;
1499 }
1500
1501 if (remove && uprobe->consumers) {
1502 WARN_ON(!uprobe_is_active(uprobe));
1503 unapply_uprobe(uprobe, current->mm);
1504 }
1505 up_read(&uprobe->register_rwsem);
1506}
1507
1455/* 1508/*
1456 * Run handler and ask thread to singlestep. 1509 * Run handler and ask thread to singlestep.
1457 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. 1510 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
1458 */ 1511 */
1459static void handle_swbp(struct pt_regs *regs) 1512static void handle_swbp(struct pt_regs *regs)
1460{ 1513{
1461 struct uprobe_task *utask;
1462 struct uprobe *uprobe; 1514 struct uprobe *uprobe;
1463 unsigned long bp_vaddr; 1515 unsigned long bp_vaddr;
1464 int uninitialized_var(is_swbp); 1516 int uninitialized_var(is_swbp);
@@ -1483,6 +1535,10 @@ static void handle_swbp(struct pt_regs *regs)
1483 } 1535 }
1484 return; 1536 return;
1485 } 1537 }
1538
1539 /* change it in advance for ->handler() and restart */
1540 instruction_pointer_set(regs, bp_vaddr);
1541
1486 /* 1542 /*
1487 * TODO: move copy_insn/etc into _register and remove this hack. 1543 * TODO: move copy_insn/etc into _register and remove this hack.
1488 * After we hit the bp, _unregister + _register can install the 1544 * After we hit the bp, _unregister + _register can install the
@@ -1490,32 +1546,16 @@ static void handle_swbp(struct pt_regs *regs)
1490 */ 1546 */
1491 smp_rmb(); /* pairs with wmb() in install_breakpoint() */ 1547 smp_rmb(); /* pairs with wmb() in install_breakpoint() */
1492 if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) 1548 if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
1493 goto restart; 1549 goto out;
1494
1495 utask = current->utask;
1496 if (!utask) {
1497 utask = add_utask();
1498 /* Cannot allocate; re-execute the instruction. */
1499 if (!utask)
1500 goto restart;
1501 }
1502 1550
1503 handler_chain(uprobe, regs); 1551 handler_chain(uprobe, regs);
1504 if (can_skip_sstep(uprobe, regs)) 1552 if (can_skip_sstep(uprobe, regs))
1505 goto out; 1553 goto out;
1506 1554
1507 if (!pre_ssout(uprobe, regs, bp_vaddr)) { 1555 if (!pre_ssout(uprobe, regs, bp_vaddr))
1508 utask->active_uprobe = uprobe;
1509 utask->state = UTASK_SSTEP;
1510 return; 1556 return;
1511 }
1512 1557
1513restart: 1558 /* can_skip_sstep() succeeded, or restart if can't singlestep */
1514 /*
1515 * cannot singlestep; cannot skip instruction;
1516 * re-execute the instruction.
1517 */
1518 instruction_pointer_set(regs, bp_vaddr);
1519out: 1559out:
1520 put_uprobe(uprobe); 1560 put_uprobe(uprobe);
1521} 1561}
@@ -1609,10 +1649,8 @@ static int __init init_uprobes(void)
1609{ 1649{
1610 int i; 1650 int i;
1611 1651
1612 for (i = 0; i < UPROBES_HASH_SZ; i++) { 1652 for (i = 0; i < UPROBES_HASH_SZ; i++)
1613 mutex_init(&uprobes_mutex[i]);
1614 mutex_init(&uprobes_mmap_mutex[i]); 1653 mutex_init(&uprobes_mmap_mutex[i]);
1615 }
1616 1654
1617 if (percpu_init_rwsem(&dup_mmap_sem)) 1655 if (percpu_init_rwsem(&dup_mmap_sem))
1618 return -ENOMEM; 1656 return -ENOMEM;
diff --git a/kernel/exit.c b/kernel/exit.c
index b4df21937216..7dd20408707c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,6 +85,7 @@ static void __exit_signal(struct task_struct *tsk)
85 bool group_dead = thread_group_leader(tsk); 85 bool group_dead = thread_group_leader(tsk);
86 struct sighand_struct *sighand; 86 struct sighand_struct *sighand;
87 struct tty_struct *uninitialized_var(tty); 87 struct tty_struct *uninitialized_var(tty);
88 cputime_t utime, stime;
88 89
89 sighand = rcu_dereference_check(tsk->sighand, 90 sighand = rcu_dereference_check(tsk->sighand,
90 lockdep_tasklist_lock_is_held()); 91 lockdep_tasklist_lock_is_held());
@@ -123,9 +124,10 @@ static void __exit_signal(struct task_struct *tsk)
123 * We won't ever get here for the group leader, since it 124 * We won't ever get here for the group leader, since it
124 * will have been the last reference on the signal_struct. 125 * will have been the last reference on the signal_struct.
125 */ 126 */
126 sig->utime += tsk->utime; 127 task_cputime(tsk, &utime, &stime);
127 sig->stime += tsk->stime; 128 sig->utime += utime;
128 sig->gtime += tsk->gtime; 129 sig->stime += stime;
130 sig->gtime += task_gtime(tsk);
129 sig->min_flt += tsk->min_flt; 131 sig->min_flt += tsk->min_flt;
130 sig->maj_flt += tsk->maj_flt; 132 sig->maj_flt += tsk->maj_flt;
131 sig->nvcsw += tsk->nvcsw; 133 sig->nvcsw += tsk->nvcsw;
@@ -1092,7 +1094,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1092 sig = p->signal; 1094 sig = p->signal;
1093 psig->cutime += tgutime + sig->cutime; 1095 psig->cutime += tgutime + sig->cutime;
1094 psig->cstime += tgstime + sig->cstime; 1096 psig->cstime += tgstime + sig->cstime;
1095 psig->cgtime += p->gtime + sig->gtime + sig->cgtime; 1097 psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
1096 psig->cmin_flt += 1098 psig->cmin_flt +=
1097 p->min_flt + sig->min_flt + sig->cmin_flt; 1099 p->min_flt + sig->min_flt + sig->cmin_flt;
1098 psig->cmaj_flt += 1100 psig->cmaj_flt +=
diff --git a/kernel/fork.c b/kernel/fork.c
index c535f33bbb9c..4133876d8cd2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1233,6 +1233,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1233#ifndef CONFIG_VIRT_CPU_ACCOUNTING 1233#ifndef CONFIG_VIRT_CPU_ACCOUNTING
1234 p->prev_cputime.utime = p->prev_cputime.stime = 0; 1234 p->prev_cputime.utime = p->prev_cputime.stime = 0;
1235#endif 1235#endif
1236#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1237 seqlock_init(&p->vtime_seqlock);
1238 p->vtime_snap = 0;
1239 p->vtime_snap_whence = VTIME_SLEEPING;
1240#endif
1241
1236#if defined(SPLIT_RSS_COUNTING) 1242#if defined(SPLIT_RSS_COUNTING)
1237 memset(&p->rss_stat, 0, sizeof(p->rss_stat)); 1243 memset(&p->rss_stat, 0, sizeof(p->rss_stat));
1238#endif 1244#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index 19eb089ca003..9618b6e9fb36 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -60,6 +60,7 @@
60#include <linux/pid.h> 60#include <linux/pid.h>
61#include <linux/nsproxy.h> 61#include <linux/nsproxy.h>
62#include <linux/ptrace.h> 62#include <linux/ptrace.h>
63#include <linux/sched/rt.h>
63 64
64#include <asm/futex.h> 65#include <asm/futex.h>
65 66
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 6db7a5ed52b5..cc47812d3feb 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -44,6 +44,8 @@
44#include <linux/err.h> 44#include <linux/err.h>
45#include <linux/debugobjects.h> 45#include <linux/debugobjects.h>
46#include <linux/sched.h> 46#include <linux/sched.h>
47#include <linux/sched/sysctl.h>
48#include <linux/sched/rt.h>
47#include <linux/timer.h> 49#include <linux/timer.h>
48 50
49#include <asm/uaccess.h> 51#include <asm/uaccess.h>
@@ -640,21 +642,9 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
640 * and expiry check is done in the hrtimer_interrupt or in the softirq. 642 * and expiry check is done in the hrtimer_interrupt or in the softirq.
641 */ 643 */
642static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, 644static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
643 struct hrtimer_clock_base *base, 645 struct hrtimer_clock_base *base)
644 int wakeup)
645{ 646{
646 if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { 647 return base->cpu_base->hres_active && hrtimer_reprogram(timer, base);
647 if (wakeup) {
648 raw_spin_unlock(&base->cpu_base->lock);
649 raise_softirq_irqoff(HRTIMER_SOFTIRQ);
650 raw_spin_lock(&base->cpu_base->lock);
651 } else
652 __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
653
654 return 1;
655 }
656
657 return 0;
658} 648}
659 649
660static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) 650static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
@@ -735,8 +725,7 @@ static inline int hrtimer_switch_to_hres(void) { return 0; }
735static inline void 725static inline void
736hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } 726hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
737static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, 727static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
738 struct hrtimer_clock_base *base, 728 struct hrtimer_clock_base *base)
739 int wakeup)
740{ 729{
741 return 0; 730 return 0;
742} 731}
@@ -995,8 +984,21 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
995 * 984 *
996 * XXX send_remote_softirq() ? 985 * XXX send_remote_softirq() ?
997 */ 986 */
998 if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) 987 if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)
999 hrtimer_enqueue_reprogram(timer, new_base, wakeup); 988 && hrtimer_enqueue_reprogram(timer, new_base)) {
989 if (wakeup) {
990 /*
991 * We need to drop cpu_base->lock to avoid a
992 * lock ordering issue vs. rq->lock.
993 */
994 raw_spin_unlock(&new_base->cpu_base->lock);
995 raise_softirq_irqoff(HRTIMER_SOFTIRQ);
996 local_irq_restore(flags);
997 return ret;
998 } else {
999 __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1000 }
1001 }
1000 1002
1001 unlock_hrtimer_base(timer, &flags); 1003 unlock_hrtimer_base(timer, &flags);
1002 1004
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 3aca9f29d30e..cbd97ce0b000 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -90,27 +90,41 @@ int irq_set_handler_data(unsigned int irq, void *data)
90EXPORT_SYMBOL(irq_set_handler_data); 90EXPORT_SYMBOL(irq_set_handler_data);
91 91
92/** 92/**
93 * irq_set_msi_desc - set MSI descriptor data for an irq 93 * irq_set_msi_desc_off - set MSI descriptor data for an irq at offset
94 * @irq: Interrupt number 94 * @irq_base: Interrupt number base
95 * @entry: Pointer to MSI descriptor data 95 * @irq_offset: Interrupt number offset
96 * @entry: Pointer to MSI descriptor data
96 * 97 *
97 * Set the MSI descriptor entry for an irq 98 * Set the MSI descriptor entry for an irq at offset
98 */ 99 */
99int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry) 100int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
101 struct msi_desc *entry)
100{ 102{
101 unsigned long flags; 103 unsigned long flags;
102 struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); 104 struct irq_desc *desc = irq_get_desc_lock(irq_base + irq_offset, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
103 105
104 if (!desc) 106 if (!desc)
105 return -EINVAL; 107 return -EINVAL;
106 desc->irq_data.msi_desc = entry; 108 desc->irq_data.msi_desc = entry;
107 if (entry) 109 if (entry && !irq_offset)
108 entry->irq = irq; 110 entry->irq = irq_base;
109 irq_put_desc_unlock(desc, flags); 111 irq_put_desc_unlock(desc, flags);
110 return 0; 112 return 0;
111} 113}
112 114
113/** 115/**
116 * irq_set_msi_desc - set MSI descriptor data for an irq
117 * @irq: Interrupt number
118 * @entry: Pointer to MSI descriptor data
119 *
120 * Set the MSI descriptor entry for an irq
121 */
122int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
123{
124 return irq_set_msi_desc_off(irq, 0, entry);
125}
126
127/**
114 * irq_set_chip_data - set irq chip data for an irq 128 * irq_set_chip_data - set irq chip data for an irq
115 * @irq: Interrupt number 129 * @irq: Interrupt number
116 * @data: Pointer to chip specific data 130 * @data: Pointer to chip specific data
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index e49a288fa479..fa17855ca65a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -16,6 +16,7 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/sched/rt.h>
19#include <linux/task_work.h> 20#include <linux/task_work.h>
20 21
21#include "internals.h" 22#include "internals.h"
@@ -1524,6 +1525,7 @@ void enable_percpu_irq(unsigned int irq, unsigned int type)
1524out: 1525out:
1525 irq_put_desc_unlock(desc, flags); 1526 irq_put_desc_unlock(desc, flags);
1526} 1527}
1528EXPORT_SYMBOL_GPL(enable_percpu_irq);
1527 1529
1528void disable_percpu_irq(unsigned int irq) 1530void disable_percpu_irq(unsigned int irq)
1529{ 1531{
@@ -1537,6 +1539,7 @@ void disable_percpu_irq(unsigned int irq)
1537 irq_percpu_disable(desc, cpu); 1539 irq_percpu_disable(desc, cpu);
1538 irq_put_desc_unlock(desc, flags); 1540 irq_put_desc_unlock(desc, flags);
1539} 1541}
1542EXPORT_SYMBOL_GPL(disable_percpu_irq);
1540 1543
1541/* 1544/*
1542 * Internal function to unregister a percpu irqaction. 1545 * Internal function to unregister a percpu irqaction.
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 611cd6003c45..7b5f012bde9d 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -80,13 +80,11 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
80 80
81 /* 81 /*
82 * All handlers must agree on IRQF_SHARED, so we test just the 82 * All handlers must agree on IRQF_SHARED, so we test just the
83 * first. Check for action->next as well. 83 * first.
84 */ 84 */
85 action = desc->action; 85 action = desc->action;
86 if (!action || !(action->flags & IRQF_SHARED) || 86 if (!action || !(action->flags & IRQF_SHARED) ||
87 (action->flags & __IRQF_TIMER) || 87 (action->flags & __IRQF_TIMER))
88 (action->handler(irq, action->dev_id) == IRQ_HANDLED) ||
89 !action->next)
90 goto out; 88 goto out;
91 89
92 /* Already running on another processor */ 90 /* Already running on another processor */
@@ -104,6 +102,7 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
104 do { 102 do {
105 if (handle_irq_event(desc) == IRQ_HANDLED) 103 if (handle_irq_event(desc) == IRQ_HANDLED)
106 ret = IRQ_HANDLED; 104 ret = IRQ_HANDLED;
105 /* Make sure that there is still a valid action */
107 action = desc->action; 106 action = desc->action;
108 } while ((desc->istate & IRQS_PENDING) && action); 107 } while ((desc->istate & IRQS_PENDING) && action);
109 desc->istate &= ~IRQS_POLL_INPROGRESS; 108 desc->istate &= ~IRQS_POLL_INPROGRESS;
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 1588e3b2871b..55fcce6065cf 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -12,37 +12,36 @@
12#include <linux/percpu.h> 12#include <linux/percpu.h>
13#include <linux/hardirq.h> 13#include <linux/hardirq.h>
14#include <linux/irqflags.h> 14#include <linux/irqflags.h>
15#include <linux/sched.h>
16#include <linux/tick.h>
17#include <linux/cpu.h>
18#include <linux/notifier.h>
15#include <asm/processor.h> 19#include <asm/processor.h>
16 20
17/*
18 * An entry can be in one of four states:
19 *
20 * free NULL, 0 -> {claimed} : free to be used
21 * claimed NULL, 3 -> {pending} : claimed to be enqueued
22 * pending next, 3 -> {busy} : queued, pending callback
23 * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
24 */
25
26#define IRQ_WORK_PENDING 1UL
27#define IRQ_WORK_BUSY 2UL
28#define IRQ_WORK_FLAGS 3UL
29 21
30static DEFINE_PER_CPU(struct llist_head, irq_work_list); 22static DEFINE_PER_CPU(struct llist_head, irq_work_list);
23static DEFINE_PER_CPU(int, irq_work_raised);
31 24
32/* 25/*
33 * Claim the entry so that no one else will poke at it. 26 * Claim the entry so that no one else will poke at it.
34 */ 27 */
35static bool irq_work_claim(struct irq_work *work) 28static bool irq_work_claim(struct irq_work *work)
36{ 29{
37 unsigned long flags, nflags; 30 unsigned long flags, oflags, nflags;
38 31
32 /*
33 * Start with our best wish as a premise but only trust any
34 * flag value after cmpxchg() result.
35 */
36 flags = work->flags & ~IRQ_WORK_PENDING;
39 for (;;) { 37 for (;;) {
40 flags = work->flags;
41 if (flags & IRQ_WORK_PENDING)
42 return false;
43 nflags = flags | IRQ_WORK_FLAGS; 38 nflags = flags | IRQ_WORK_FLAGS;
44 if (cmpxchg(&work->flags, flags, nflags) == flags) 39 oflags = cmpxchg(&work->flags, flags, nflags);
40 if (oflags == flags)
45 break; 41 break;
42 if (oflags & IRQ_WORK_PENDING)
43 return false;
44 flags = oflags;
46 cpu_relax(); 45 cpu_relax();
47 } 46 }
48 47
@@ -57,57 +56,69 @@ void __weak arch_irq_work_raise(void)
57} 56}
58 57
59/* 58/*
60 * Queue the entry and raise the IPI if needed. 59 * Enqueue the irq_work @entry unless it's already pending
60 * somewhere.
61 *
62 * Can be re-enqueued while the callback is still in progress.
61 */ 63 */
62static void __irq_work_queue(struct irq_work *work) 64void irq_work_queue(struct irq_work *work)
63{ 65{
64 bool empty; 66 /* Only queue if not already pending */
67 if (!irq_work_claim(work))
68 return;
65 69
70 /* Queue the entry and raise the IPI if needed. */
66 preempt_disable(); 71 preempt_disable();
67 72
68 empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); 73 llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
69 /* The list was empty, raise self-interrupt to start processing. */ 74
70 if (empty) 75 /*
71 arch_irq_work_raise(); 76 * If the work is not "lazy" or the tick is stopped, raise the irq
77 * work interrupt (if supported by the arch), otherwise, just wait
78 * for the next tick.
79 */
80 if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
81 if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
82 arch_irq_work_raise();
83 }
72 84
73 preempt_enable(); 85 preempt_enable();
74} 86}
87EXPORT_SYMBOL_GPL(irq_work_queue);
75 88
76/* 89bool irq_work_needs_cpu(void)
77 * Enqueue the irq_work @entry, returns true on success, failure when the
78 * @entry was already enqueued by someone else.
79 *
80 * Can be re-enqueued while the callback is still in progress.
81 */
82bool irq_work_queue(struct irq_work *work)
83{ 90{
84 if (!irq_work_claim(work)) { 91 struct llist_head *this_list;
85 /* 92
86 * Already enqueued, can't do! 93 this_list = &__get_cpu_var(irq_work_list);
87 */ 94 if (llist_empty(this_list))
88 return false; 95 return false;
89 }
90 96
91 __irq_work_queue(work); 97 /* All work should have been flushed before going offline */
98 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
99
92 return true; 100 return true;
93} 101}
94EXPORT_SYMBOL_GPL(irq_work_queue);
95 102
96/* 103static void __irq_work_run(void)
97 * Run the irq_work entries on this cpu. Requires to be ran from hardirq
98 * context with local IRQs disabled.
99 */
100void irq_work_run(void)
101{ 104{
105 unsigned long flags;
102 struct irq_work *work; 106 struct irq_work *work;
103 struct llist_head *this_list; 107 struct llist_head *this_list;
104 struct llist_node *llnode; 108 struct llist_node *llnode;
105 109
110
111 /*
112 * Reset the "raised" state right before we check the list because
113 * an NMI may enqueue after we find the list empty from the runner.
114 */
115 __this_cpu_write(irq_work_raised, 0);
116 barrier();
117
106 this_list = &__get_cpu_var(irq_work_list); 118 this_list = &__get_cpu_var(irq_work_list);
107 if (llist_empty(this_list)) 119 if (llist_empty(this_list))
108 return; 120 return;
109 121
110 BUG_ON(!in_irq());
111 BUG_ON(!irqs_disabled()); 122 BUG_ON(!irqs_disabled());
112 123
113 llnode = llist_del_all(this_list); 124 llnode = llist_del_all(this_list);
@@ -119,16 +130,31 @@ void irq_work_run(void)
119 /* 130 /*
120 * Clear the PENDING bit, after this point the @work 131 * Clear the PENDING bit, after this point the @work
121 * can be re-used. 132 * can be re-used.
133 * Make it immediately visible so that other CPUs trying
134 * to claim that work don't rely on us to handle their data
135 * while we are in the middle of the func.
122 */ 136 */
123 work->flags = IRQ_WORK_BUSY; 137 flags = work->flags & ~IRQ_WORK_PENDING;
138 xchg(&work->flags, flags);
139
124 work->func(work); 140 work->func(work);
125 /* 141 /*
126 * Clear the BUSY bit and return to the free state if 142 * Clear the BUSY bit and return to the free state if
127 * no-one else claimed it meanwhile. 143 * no-one else claimed it meanwhile.
128 */ 144 */
129 (void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0); 145 (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
130 } 146 }
131} 147}
148
149/*
150 * Run the irq_work entries on this cpu. Requires to be ran from hardirq
151 * context with local IRQs disabled.
152 */
153void irq_work_run(void)
154{
155 BUG_ON(!in_irq());
156 __irq_work_run();
157}
132EXPORT_SYMBOL_GPL(irq_work_run); 158EXPORT_SYMBOL_GPL(irq_work_run);
133 159
134/* 160/*
@@ -143,3 +169,35 @@ void irq_work_sync(struct irq_work *work)
143 cpu_relax(); 169 cpu_relax();
144} 170}
145EXPORT_SYMBOL_GPL(irq_work_sync); 171EXPORT_SYMBOL_GPL(irq_work_sync);
172
173#ifdef CONFIG_HOTPLUG_CPU
174static int irq_work_cpu_notify(struct notifier_block *self,
175 unsigned long action, void *hcpu)
176{
177 long cpu = (long)hcpu;
178
179 switch (action) {
180 case CPU_DYING:
181 /* Called from stop_machine */
182 if (WARN_ON_ONCE(cpu != smp_processor_id()))
183 break;
184 __irq_work_run();
185 break;
186 default:
187 break;
188 }
189 return NOTIFY_OK;
190}
191
192static struct notifier_block cpu_notify;
193
194static __init int irq_work_init_cpu_notifier(void)
195{
196 cpu_notify.notifier_call = irq_work_cpu_notify;
197 cpu_notify.priority = 0;
198 register_cpu_notifier(&cpu_notify);
199 return 0;
200}
201device_initcall(irq_work_init_cpu_notifier);
202
203#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 098f396aa409..f423c3ef4a82 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -919,7 +919,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
919} 919}
920#endif /* CONFIG_OPTPROBES */ 920#endif /* CONFIG_OPTPROBES */
921 921
922#ifdef KPROBES_CAN_USE_FTRACE 922#ifdef CONFIG_KPROBES_ON_FTRACE
923static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { 923static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
924 .func = kprobe_ftrace_handler, 924 .func = kprobe_ftrace_handler,
925 .flags = FTRACE_OPS_FL_SAVE_REGS, 925 .flags = FTRACE_OPS_FL_SAVE_REGS,
@@ -964,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
964 (unsigned long)p->addr, 1, 0); 964 (unsigned long)p->addr, 1, 0);
965 WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret); 965 WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
966} 966}
967#else /* !KPROBES_CAN_USE_FTRACE */ 967#else /* !CONFIG_KPROBES_ON_FTRACE */
968#define prepare_kprobe(p) arch_prepare_kprobe(p) 968#define prepare_kprobe(p) arch_prepare_kprobe(p)
969#define arm_kprobe_ftrace(p) do {} while (0) 969#define arm_kprobe_ftrace(p) do {} while (0)
970#define disarm_kprobe_ftrace(p) do {} while (0) 970#define disarm_kprobe_ftrace(p) do {} while (0)
@@ -1414,12 +1414,12 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
1414 */ 1414 */
1415 ftrace_addr = ftrace_location((unsigned long)p->addr); 1415 ftrace_addr = ftrace_location((unsigned long)p->addr);
1416 if (ftrace_addr) { 1416 if (ftrace_addr) {
1417#ifdef KPROBES_CAN_USE_FTRACE 1417#ifdef CONFIG_KPROBES_ON_FTRACE
1418 /* Given address is not on the instruction boundary */ 1418 /* Given address is not on the instruction boundary */
1419 if ((unsigned long)p->addr != ftrace_addr) 1419 if ((unsigned long)p->addr != ftrace_addr)
1420 return -EILSEQ; 1420 return -EILSEQ;
1421 p->flags |= KPROBE_FLAG_FTRACE; 1421 p->flags |= KPROBE_FLAG_FTRACE;
1422#else /* !KPROBES_CAN_USE_FTRACE */ 1422#else /* !CONFIG_KPROBES_ON_FTRACE */
1423 return -EINVAL; 1423 return -EINVAL;
1424#endif 1424#endif
1425 } 1425 }
diff --git a/kernel/mutex.c b/kernel/mutex.c
index a307cc9c9526..52f23011b6e0 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -19,6 +19,7 @@
19 */ 19 */
20#include <linux/mutex.h> 20#include <linux/mutex.h>
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/sched/rt.h>
22#include <linux/export.h> 23#include <linux/export.h>
23#include <linux/spinlock.h> 24#include <linux/spinlock.h>
24#include <linux/interrupt.h> 25#include <linux/interrupt.h>
diff --git a/kernel/pid.c b/kernel/pid.c
index de9af600006f..f2c6a6825098 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -331,7 +331,7 @@ out:
331 return pid; 331 return pid;
332 332
333out_unlock: 333out_unlock:
334 spin_unlock(&pidmap_lock); 334 spin_unlock_irq(&pidmap_lock);
335out_free: 335out_free:
336 while (++i <= ns->level) 336 while (++i <= ns->level)
337 free_pidmap(pid->numbers + i); 337 free_pidmap(pid->numbers + i);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index a278cad1d5d6..8fd709c9bb58 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -155,11 +155,19 @@ static void bump_cpu_timer(struct k_itimer *timer,
155 155
156static inline cputime_t prof_ticks(struct task_struct *p) 156static inline cputime_t prof_ticks(struct task_struct *p)
157{ 157{
158 return p->utime + p->stime; 158 cputime_t utime, stime;
159
160 task_cputime(p, &utime, &stime);
161
162 return utime + stime;
159} 163}
160static inline cputime_t virt_ticks(struct task_struct *p) 164static inline cputime_t virt_ticks(struct task_struct *p)
161{ 165{
162 return p->utime; 166 cputime_t utime;
167
168 task_cputime(p, &utime, NULL);
169
170 return utime;
163} 171}
164 172
165static int 173static int
@@ -471,18 +479,23 @@ static void cleanup_timers(struct list_head *head,
471 */ 479 */
472void posix_cpu_timers_exit(struct task_struct *tsk) 480void posix_cpu_timers_exit(struct task_struct *tsk)
473{ 481{
482 cputime_t utime, stime;
483
474 add_device_randomness((const void*) &tsk->se.sum_exec_runtime, 484 add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
475 sizeof(unsigned long long)); 485 sizeof(unsigned long long));
486 task_cputime(tsk, &utime, &stime);
476 cleanup_timers(tsk->cpu_timers, 487 cleanup_timers(tsk->cpu_timers,
477 tsk->utime, tsk->stime, tsk->se.sum_exec_runtime); 488 utime, stime, tsk->se.sum_exec_runtime);
478 489
479} 490}
480void posix_cpu_timers_exit_group(struct task_struct *tsk) 491void posix_cpu_timers_exit_group(struct task_struct *tsk)
481{ 492{
482 struct signal_struct *const sig = tsk->signal; 493 struct signal_struct *const sig = tsk->signal;
494 cputime_t utime, stime;
483 495
496 task_cputime(tsk, &utime, &stime);
484 cleanup_timers(tsk->signal->cpu_timers, 497 cleanup_timers(tsk->signal->cpu_timers,
485 tsk->utime + sig->utime, tsk->stime + sig->stime, 498 utime + sig->utime, stime + sig->stime,
486 tsk->se.sum_exec_runtime + sig->sum_sched_runtime); 499 tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
487} 500}
488 501
@@ -1226,11 +1239,14 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
1226static inline int fastpath_timer_check(struct task_struct *tsk) 1239static inline int fastpath_timer_check(struct task_struct *tsk)
1227{ 1240{
1228 struct signal_struct *sig; 1241 struct signal_struct *sig;
1242 cputime_t utime, stime;
1243
1244 task_cputime(tsk, &utime, &stime);
1229 1245
1230 if (!task_cputime_zero(&tsk->cputime_expires)) { 1246 if (!task_cputime_zero(&tsk->cputime_expires)) {
1231 struct task_cputime task_sample = { 1247 struct task_cputime task_sample = {
1232 .utime = tsk->utime, 1248 .utime = utime,
1233 .stime = tsk->stime, 1249 .stime = stime,
1234 .sum_exec_runtime = tsk->se.sum_exec_runtime 1250 .sum_exec_runtime = tsk->se.sum_exec_runtime
1235 }; 1251 };
1236 1252
@@ -1401,8 +1417,10 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
1401 while (!signal_pending(current)) { 1417 while (!signal_pending(current)) {
1402 if (timer.it.cpu.expires.sched == 0) { 1418 if (timer.it.cpu.expires.sched == 0) {
1403 /* 1419 /*
1404 * Our timer fired and was reset. 1420 * Our timer fired and was reset, below
1421 * deletion can not fail.
1405 */ 1422 */
1423 posix_cpu_timer_del(&timer);
1406 spin_unlock_irq(&timer.it_lock); 1424 spin_unlock_irq(&timer.it_lock);
1407 return 0; 1425 return 0;
1408 } 1426 }
@@ -1420,9 +1438,26 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
1420 * We were interrupted by a signal. 1438 * We were interrupted by a signal.
1421 */ 1439 */
1422 sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp); 1440 sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
1423 posix_cpu_timer_set(&timer, 0, &zero_it, it); 1441 error = posix_cpu_timer_set(&timer, 0, &zero_it, it);
1442 if (!error) {
1443 /*
1444 * Timer is now unarmed, deletion can not fail.
1445 */
1446 posix_cpu_timer_del(&timer);
1447 }
1424 spin_unlock_irq(&timer.it_lock); 1448 spin_unlock_irq(&timer.it_lock);
1425 1449
1450 while (error == TIMER_RETRY) {
1451 /*
1452 * We need to handle case when timer was or is in the
1453 * middle of firing. In other cases we already freed
1454 * resources.
1455 */
1456 spin_lock_irq(&timer.it_lock);
1457 error = posix_cpu_timer_del(&timer);
1458 spin_unlock_irq(&timer.it_lock);
1459 }
1460
1426 if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) { 1461 if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) {
1427 /* 1462 /*
1428 * It actually did fire already. 1463 * It actually did fire already.
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 69185ae6b701..10349d5f2ec3 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -997,7 +997,7 @@ SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
997 997
998 err = kc->clock_adj(which_clock, &ktx); 998 err = kc->clock_adj(which_clock, &ktx);
999 999
1000 if (!err && copy_to_user(utx, &ktx, sizeof(ktx))) 1000 if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx)))
1001 return -EFAULT; 1001 return -EFAULT;
1002 1002
1003 return err; 1003 return err;
diff --git a/kernel/printk.c b/kernel/printk.c
index 267ce780abe8..f24633afa46a 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -42,6 +42,7 @@
42#include <linux/notifier.h> 42#include <linux/notifier.h>
43#include <linux/rculist.h> 43#include <linux/rculist.h>
44#include <linux/poll.h> 44#include <linux/poll.h>
45#include <linux/irq_work.h>
45 46
46#include <asm/uaccess.h> 47#include <asm/uaccess.h>
47 48
@@ -1959,30 +1960,32 @@ int is_console_locked(void)
1959static DEFINE_PER_CPU(int, printk_pending); 1960static DEFINE_PER_CPU(int, printk_pending);
1960static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); 1961static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
1961 1962
1962void printk_tick(void) 1963static void wake_up_klogd_work_func(struct irq_work *irq_work)
1963{ 1964{
1964 if (__this_cpu_read(printk_pending)) { 1965 int pending = __this_cpu_xchg(printk_pending, 0);
1965 int pending = __this_cpu_xchg(printk_pending, 0); 1966
1966 if (pending & PRINTK_PENDING_SCHED) { 1967 if (pending & PRINTK_PENDING_SCHED) {
1967 char *buf = __get_cpu_var(printk_sched_buf); 1968 char *buf = __get_cpu_var(printk_sched_buf);
1968 printk(KERN_WARNING "[sched_delayed] %s", buf); 1969 printk(KERN_WARNING "[sched_delayed] %s", buf);
1969 }
1970 if (pending & PRINTK_PENDING_WAKEUP)
1971 wake_up_interruptible(&log_wait);
1972 } 1970 }
1973}
1974 1971
1975int printk_needs_cpu(int cpu) 1972 if (pending & PRINTK_PENDING_WAKEUP)
1976{ 1973 wake_up_interruptible(&log_wait);
1977 if (cpu_is_offline(cpu))
1978 printk_tick();
1979 return __this_cpu_read(printk_pending);
1980} 1974}
1981 1975
1976static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
1977 .func = wake_up_klogd_work_func,
1978 .flags = IRQ_WORK_LAZY,
1979};
1980
1982void wake_up_klogd(void) 1981void wake_up_klogd(void)
1983{ 1982{
1984 if (waitqueue_active(&log_wait)) 1983 preempt_disable();
1984 if (waitqueue_active(&log_wait)) {
1985 this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); 1985 this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
1986 irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
1987 }
1988 preempt_enable();
1986} 1989}
1987 1990
1988static void console_cont_flush(char *text, size_t size) 1991static void console_cont_flush(char *text, size_t size)
@@ -2462,6 +2465,7 @@ int printk_sched(const char *fmt, ...)
2462 va_end(args); 2465 va_end(args);
2463 2466
2464 __this_cpu_or(printk_pending, PRINTK_PENDING_SCHED); 2467 __this_cpu_or(printk_pending, PRINTK_PENDING_SCHED);
2468 irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
2465 local_irq_restore(flags); 2469 local_irq_restore(flags);
2466 2470
2467 return r; 2471 return r;
diff --git a/kernel/profile.c b/kernel/profile.c
index 1f391819c42f..dc3384ee874e 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -37,9 +37,6 @@ struct profile_hit {
37#define NR_PROFILE_HIT (PAGE_SIZE/sizeof(struct profile_hit)) 37#define NR_PROFILE_HIT (PAGE_SIZE/sizeof(struct profile_hit))
38#define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ) 38#define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ)
39 39
40/* Oprofile timer tick hook */
41static int (*timer_hook)(struct pt_regs *) __read_mostly;
42
43static atomic_t *prof_buffer; 40static atomic_t *prof_buffer;
44static unsigned long prof_len, prof_shift; 41static unsigned long prof_len, prof_shift;
45 42
@@ -208,25 +205,6 @@ int profile_event_unregister(enum profile_type type, struct notifier_block *n)
208} 205}
209EXPORT_SYMBOL_GPL(profile_event_unregister); 206EXPORT_SYMBOL_GPL(profile_event_unregister);
210 207
211int register_timer_hook(int (*hook)(struct pt_regs *))
212{
213 if (timer_hook)
214 return -EBUSY;
215 timer_hook = hook;
216 return 0;
217}
218EXPORT_SYMBOL_GPL(register_timer_hook);
219
220void unregister_timer_hook(int (*hook)(struct pt_regs *))
221{
222 WARN_ON(hook != timer_hook);
223 timer_hook = NULL;
224 /* make sure all CPUs see the NULL hook */
225 synchronize_sched(); /* Allow ongoing interrupts to complete. */
226}
227EXPORT_SYMBOL_GPL(unregister_timer_hook);
228
229
230#ifdef CONFIG_SMP 208#ifdef CONFIG_SMP
231/* 209/*
232 * Each cpu has a pair of open-addressed hashtables for pending 210 * Each cpu has a pair of open-addressed hashtables for pending
@@ -436,8 +414,6 @@ void profile_tick(int type)
436{ 414{
437 struct pt_regs *regs = get_irq_regs(); 415 struct pt_regs *regs = get_irq_regs();
438 416
439 if (type == CPU_PROFILING && timer_hook)
440 timer_hook(regs);
441 if (!user_mode(regs) && prof_cpu_mask != NULL && 417 if (!user_mode(regs) && prof_cpu_mask != NULL &&
442 cpumask_test_cpu(smp_processor_id(), prof_cpu_mask)) 418 cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
443 profile_hit(type, (void *)profile_pc(regs)); 419 profile_hit(type, (void *)profile_pc(regs));
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 6cbeaae4406d..acbd28424d81 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -712,6 +712,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type,
712 kiov->iov_len, kiov->iov_base); 712 kiov->iov_len, kiov->iov_base);
713} 713}
714 714
715/*
716 * This is declared in linux/regset.h and defined in machine-dependent
717 * code. We put the export here, near the primary machine-neutral use,
718 * to ensure no machine forgets it.
719 */
720EXPORT_SYMBOL_GPL(task_user_regset_view);
715#endif 721#endif
716 722
717int ptrace_request(struct task_struct *child, long request, 723int ptrace_request(struct task_struct *child, long request,
diff --git a/kernel/rcu.h b/kernel/rcu.h
index 20dfba576c2b..7f8e7590e3e5 100644
--- a/kernel/rcu.h
+++ b/kernel/rcu.h
@@ -111,4 +111,11 @@ static inline bool __rcu_reclaim(char *rn, struct rcu_head *head)
111 111
112extern int rcu_expedited; 112extern int rcu_expedited;
113 113
114#ifdef CONFIG_RCU_STALL_COMMON
115
116extern int rcu_cpu_stall_suppress;
117int rcu_jiffies_till_stall_check(void);
118
119#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
120
114#endif /* __LINUX_RCU_H */ 121#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a2cf76177b44..48ab70384a4c 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -404,11 +404,65 @@ EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
404#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 404#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
405 405
406#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) 406#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
407void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp) 407void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp,
408 unsigned long secs,
409 unsigned long c_old, unsigned long c)
408{ 410{
409 trace_rcu_torture_read(rcutorturename, rhp); 411 trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c);
410} 412}
411EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); 413EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
412#else 414#else
413#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) 415#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
416 do { } while (0)
414#endif 417#endif
418
419#ifdef CONFIG_RCU_STALL_COMMON
420
421#ifdef CONFIG_PROVE_RCU
422#define RCU_STALL_DELAY_DELTA (5 * HZ)
423#else
424#define RCU_STALL_DELAY_DELTA 0
425#endif
426
427int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
428int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
429
430module_param(rcu_cpu_stall_suppress, int, 0644);
431module_param(rcu_cpu_stall_timeout, int, 0644);
432
433int rcu_jiffies_till_stall_check(void)
434{
435 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
436
437 /*
438 * Limit check must be consistent with the Kconfig limits
439 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
440 */
441 if (till_stall_check < 3) {
442 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
443 till_stall_check = 3;
444 } else if (till_stall_check > 300) {
445 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
446 till_stall_check = 300;
447 }
448 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
449}
450
451static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
452{
453 rcu_cpu_stall_suppress = 1;
454 return NOTIFY_DONE;
455}
456
457static struct notifier_block rcu_panic_block = {
458 .notifier_call = rcu_panic,
459};
460
461static int __init check_cpu_stall_init(void)
462{
463 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
464 return 0;
465}
466early_initcall(check_cpu_stall_init);
467
468#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index e7dce58f9c2a..a0714a51b6d7 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -51,10 +51,10 @@ static void __call_rcu(struct rcu_head *head,
51 void (*func)(struct rcu_head *rcu), 51 void (*func)(struct rcu_head *rcu),
52 struct rcu_ctrlblk *rcp); 52 struct rcu_ctrlblk *rcp);
53 53
54#include "rcutiny_plugin.h"
55
56static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 54static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
57 55
56#include "rcutiny_plugin.h"
57
58/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ 58/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
59static void rcu_idle_enter_common(long long newval) 59static void rcu_idle_enter_common(long long newval)
60{ 60{
@@ -193,7 +193,7 @@ EXPORT_SYMBOL(rcu_is_cpu_idle);
193 * interrupts don't count, we must be running at the first interrupt 193 * interrupts don't count, we must be running at the first interrupt
194 * level. 194 * level.
195 */ 195 */
196int rcu_is_cpu_rrupt_from_idle(void) 196static int rcu_is_cpu_rrupt_from_idle(void)
197{ 197{
198 return rcu_dynticks_nesting <= 1; 198 return rcu_dynticks_nesting <= 1;
199} 199}
@@ -205,6 +205,7 @@ int rcu_is_cpu_rrupt_from_idle(void)
205 */ 205 */
206static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) 206static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
207{ 207{
208 reset_cpu_stall_ticks(rcp);
208 if (rcp->rcucblist != NULL && 209 if (rcp->rcucblist != NULL &&
209 rcp->donetail != rcp->curtail) { 210 rcp->donetail != rcp->curtail) {
210 rcp->donetail = rcp->curtail; 211 rcp->donetail = rcp->curtail;
@@ -251,6 +252,7 @@ void rcu_bh_qs(int cpu)
251 */ 252 */
252void rcu_check_callbacks(int cpu, int user) 253void rcu_check_callbacks(int cpu, int user)
253{ 254{
255 check_cpu_stalls();
254 if (user || rcu_is_cpu_rrupt_from_idle()) 256 if (user || rcu_is_cpu_rrupt_from_idle())
255 rcu_sched_qs(cpu); 257 rcu_sched_qs(cpu);
256 else if (!in_softirq()) 258 else if (!in_softirq())
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index f85016a2309b..8a233002faeb 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -33,6 +33,9 @@ struct rcu_ctrlblk {
33 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ 33 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
34 struct rcu_head **curtail; /* ->next pointer of last CB. */ 34 struct rcu_head **curtail; /* ->next pointer of last CB. */
35 RCU_TRACE(long qlen); /* Number of pending CBs. */ 35 RCU_TRACE(long qlen); /* Number of pending CBs. */
36 RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
37 RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
38 RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
36 RCU_TRACE(char *name); /* Name of RCU type. */ 39 RCU_TRACE(char *name); /* Name of RCU type. */
37}; 40};
38 41
@@ -54,6 +57,51 @@ int rcu_scheduler_active __read_mostly;
54EXPORT_SYMBOL_GPL(rcu_scheduler_active); 57EXPORT_SYMBOL_GPL(rcu_scheduler_active);
55#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 58#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
56 59
60#ifdef CONFIG_RCU_TRACE
61
62static void check_cpu_stall(struct rcu_ctrlblk *rcp)
63{
64 unsigned long j;
65 unsigned long js;
66
67 if (rcu_cpu_stall_suppress)
68 return;
69 rcp->ticks_this_gp++;
70 j = jiffies;
71 js = rcp->jiffies_stall;
72 if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
73 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
74 rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
75 jiffies - rcp->gp_start, rcp->qlen);
76 dump_stack();
77 }
78 if (*rcp->curtail && ULONG_CMP_GE(j, js))
79 rcp->jiffies_stall = jiffies +
80 3 * rcu_jiffies_till_stall_check() + 3;
81 else if (ULONG_CMP_GE(j, js))
82 rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
83}
84
85static void check_cpu_stall_preempt(void);
86
87#endif /* #ifdef CONFIG_RCU_TRACE */
88
89static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
90{
91#ifdef CONFIG_RCU_TRACE
92 rcp->ticks_this_gp = 0;
93 rcp->gp_start = jiffies;
94 rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
95#endif /* #ifdef CONFIG_RCU_TRACE */
96}
97
98static void check_cpu_stalls(void)
99{
100 RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
101 RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
102 RCU_TRACE(check_cpu_stall_preempt());
103}
104
57#ifdef CONFIG_TINY_PREEMPT_RCU 105#ifdef CONFIG_TINY_PREEMPT_RCU
58 106
59#include <linux/delay.h> 107#include <linux/delay.h>
@@ -448,6 +496,7 @@ static void rcu_preempt_start_gp(void)
448 /* Official start of GP. */ 496 /* Official start of GP. */
449 rcu_preempt_ctrlblk.gpnum++; 497 rcu_preempt_ctrlblk.gpnum++;
450 RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++); 498 RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
499 reset_cpu_stall_ticks(&rcu_preempt_ctrlblk.rcb);
451 500
452 /* Any blocked RCU readers block new GP. */ 501 /* Any blocked RCU readers block new GP. */
453 if (rcu_preempt_blocked_readers_any()) 502 if (rcu_preempt_blocked_readers_any())
@@ -1054,4 +1103,11 @@ MODULE_AUTHOR("Paul E. McKenney");
1054MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation"); 1103MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
1055MODULE_LICENSE("GPL"); 1104MODULE_LICENSE("GPL");
1056 1105
1106static void check_cpu_stall_preempt(void)
1107{
1108#ifdef CONFIG_TINY_PREEMPT_RCU
1109 check_cpu_stall(&rcu_preempt_ctrlblk.rcb);
1110#endif /* #ifdef CONFIG_TINY_PREEMPT_RCU */
1111}
1112
1057#endif /* #ifdef CONFIG_RCU_TRACE */ 1113#endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 31dea01c85fd..e1f3a8c96724 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -46,6 +46,7 @@
46#include <linux/stat.h> 46#include <linux/stat.h>
47#include <linux/srcu.h> 47#include <linux/srcu.h>
48#include <linux/slab.h> 48#include <linux/slab.h>
49#include <linux/trace_clock.h>
49#include <asm/byteorder.h> 50#include <asm/byteorder.h>
50 51
51MODULE_LICENSE("GPL"); 52MODULE_LICENSE("GPL");
@@ -207,6 +208,20 @@ MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot");
207#define rcu_can_boost() 0 208#define rcu_can_boost() 0
208#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ 209#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */
209 210
211#ifdef CONFIG_RCU_TRACE
212static u64 notrace rcu_trace_clock_local(void)
213{
214 u64 ts = trace_clock_local();
215 unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC);
216 return ts;
217}
218#else /* #ifdef CONFIG_RCU_TRACE */
219static u64 notrace rcu_trace_clock_local(void)
220{
221 return 0ULL;
222}
223#endif /* #else #ifdef CONFIG_RCU_TRACE */
224
210static unsigned long shutdown_time; /* jiffies to system shutdown. */ 225static unsigned long shutdown_time; /* jiffies to system shutdown. */
211static unsigned long boost_starttime; /* jiffies of next boost test start. */ 226static unsigned long boost_starttime; /* jiffies of next boost test start. */
212DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ 227DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
@@ -845,7 +860,7 @@ static int rcu_torture_boost(void *arg)
845 /* Wait for the next test interval. */ 860 /* Wait for the next test interval. */
846 oldstarttime = boost_starttime; 861 oldstarttime = boost_starttime;
847 while (ULONG_CMP_LT(jiffies, oldstarttime)) { 862 while (ULONG_CMP_LT(jiffies, oldstarttime)) {
848 schedule_timeout_uninterruptible(1); 863 schedule_timeout_interruptible(oldstarttime - jiffies);
849 rcu_stutter_wait("rcu_torture_boost"); 864 rcu_stutter_wait("rcu_torture_boost");
850 if (kthread_should_stop() || 865 if (kthread_should_stop() ||
851 fullstop != FULLSTOP_DONTSTOP) 866 fullstop != FULLSTOP_DONTSTOP)
@@ -1028,7 +1043,6 @@ void rcutorture_trace_dump(void)
1028 return; 1043 return;
1029 if (atomic_xchg(&beenhere, 1) != 0) 1044 if (atomic_xchg(&beenhere, 1) != 0)
1030 return; 1045 return;
1031 do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL);
1032 ftrace_dump(DUMP_ALL); 1046 ftrace_dump(DUMP_ALL);
1033} 1047}
1034 1048
@@ -1042,13 +1056,16 @@ static void rcu_torture_timer(unsigned long unused)
1042{ 1056{
1043 int idx; 1057 int idx;
1044 int completed; 1058 int completed;
1059 int completed_end;
1045 static DEFINE_RCU_RANDOM(rand); 1060 static DEFINE_RCU_RANDOM(rand);
1046 static DEFINE_SPINLOCK(rand_lock); 1061 static DEFINE_SPINLOCK(rand_lock);
1047 struct rcu_torture *p; 1062 struct rcu_torture *p;
1048 int pipe_count; 1063 int pipe_count;
1064 unsigned long long ts;
1049 1065
1050 idx = cur_ops->readlock(); 1066 idx = cur_ops->readlock();
1051 completed = cur_ops->completed(); 1067 completed = cur_ops->completed();
1068 ts = rcu_trace_clock_local();
1052 p = rcu_dereference_check(rcu_torture_current, 1069 p = rcu_dereference_check(rcu_torture_current,
1053 rcu_read_lock_bh_held() || 1070 rcu_read_lock_bh_held() ||
1054 rcu_read_lock_sched_held() || 1071 rcu_read_lock_sched_held() ||
@@ -1058,7 +1075,6 @@ static void rcu_torture_timer(unsigned long unused)
1058 cur_ops->readunlock(idx); 1075 cur_ops->readunlock(idx);
1059 return; 1076 return;
1060 } 1077 }
1061 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
1062 if (p->rtort_mbtest == 0) 1078 if (p->rtort_mbtest == 0)
1063 atomic_inc(&n_rcu_torture_mberror); 1079 atomic_inc(&n_rcu_torture_mberror);
1064 spin_lock(&rand_lock); 1080 spin_lock(&rand_lock);
@@ -1071,10 +1087,14 @@ static void rcu_torture_timer(unsigned long unused)
1071 /* Should not happen, but... */ 1087 /* Should not happen, but... */
1072 pipe_count = RCU_TORTURE_PIPE_LEN; 1088 pipe_count = RCU_TORTURE_PIPE_LEN;
1073 } 1089 }
1074 if (pipe_count > 1) 1090 completed_end = cur_ops->completed();
1091 if (pipe_count > 1) {
1092 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts,
1093 completed, completed_end);
1075 rcutorture_trace_dump(); 1094 rcutorture_trace_dump();
1095 }
1076 __this_cpu_inc(rcu_torture_count[pipe_count]); 1096 __this_cpu_inc(rcu_torture_count[pipe_count]);
1077 completed = cur_ops->completed() - completed; 1097 completed = completed_end - completed;
1078 if (completed > RCU_TORTURE_PIPE_LEN) { 1098 if (completed > RCU_TORTURE_PIPE_LEN) {
1079 /* Should not happen, but... */ 1099 /* Should not happen, but... */
1080 completed = RCU_TORTURE_PIPE_LEN; 1100 completed = RCU_TORTURE_PIPE_LEN;
@@ -1094,11 +1114,13 @@ static int
1094rcu_torture_reader(void *arg) 1114rcu_torture_reader(void *arg)
1095{ 1115{
1096 int completed; 1116 int completed;
1117 int completed_end;
1097 int idx; 1118 int idx;
1098 DEFINE_RCU_RANDOM(rand); 1119 DEFINE_RCU_RANDOM(rand);
1099 struct rcu_torture *p; 1120 struct rcu_torture *p;
1100 int pipe_count; 1121 int pipe_count;
1101 struct timer_list t; 1122 struct timer_list t;
1123 unsigned long long ts;
1102 1124
1103 VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); 1125 VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
1104 set_user_nice(current, 19); 1126 set_user_nice(current, 19);
@@ -1112,6 +1134,7 @@ rcu_torture_reader(void *arg)
1112 } 1134 }
1113 idx = cur_ops->readlock(); 1135 idx = cur_ops->readlock();
1114 completed = cur_ops->completed(); 1136 completed = cur_ops->completed();
1137 ts = rcu_trace_clock_local();
1115 p = rcu_dereference_check(rcu_torture_current, 1138 p = rcu_dereference_check(rcu_torture_current,
1116 rcu_read_lock_bh_held() || 1139 rcu_read_lock_bh_held() ||
1117 rcu_read_lock_sched_held() || 1140 rcu_read_lock_sched_held() ||
@@ -1122,7 +1145,6 @@ rcu_torture_reader(void *arg)
1122 schedule_timeout_interruptible(HZ); 1145 schedule_timeout_interruptible(HZ);
1123 continue; 1146 continue;
1124 } 1147 }
1125 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
1126 if (p->rtort_mbtest == 0) 1148 if (p->rtort_mbtest == 0)
1127 atomic_inc(&n_rcu_torture_mberror); 1149 atomic_inc(&n_rcu_torture_mberror);
1128 cur_ops->read_delay(&rand); 1150 cur_ops->read_delay(&rand);
@@ -1132,10 +1154,14 @@ rcu_torture_reader(void *arg)
1132 /* Should not happen, but... */ 1154 /* Should not happen, but... */
1133 pipe_count = RCU_TORTURE_PIPE_LEN; 1155 pipe_count = RCU_TORTURE_PIPE_LEN;
1134 } 1156 }
1135 if (pipe_count > 1) 1157 completed_end = cur_ops->completed();
1158 if (pipe_count > 1) {
1159 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
1160 ts, completed, completed_end);
1136 rcutorture_trace_dump(); 1161 rcutorture_trace_dump();
1162 }
1137 __this_cpu_inc(rcu_torture_count[pipe_count]); 1163 __this_cpu_inc(rcu_torture_count[pipe_count]);
1138 completed = cur_ops->completed() - completed; 1164 completed = completed_end - completed;
1139 if (completed > RCU_TORTURE_PIPE_LEN) { 1165 if (completed > RCU_TORTURE_PIPE_LEN) {
1140 /* Should not happen, but... */ 1166 /* Should not happen, but... */
1141 completed = RCU_TORTURE_PIPE_LEN; 1167 completed = RCU_TORTURE_PIPE_LEN;
@@ -1301,19 +1327,35 @@ static void rcu_torture_shuffle_tasks(void)
1301 set_cpus_allowed_ptr(reader_tasks[i], 1327 set_cpus_allowed_ptr(reader_tasks[i],
1302 shuffle_tmp_mask); 1328 shuffle_tmp_mask);
1303 } 1329 }
1304
1305 if (fakewriter_tasks) { 1330 if (fakewriter_tasks) {
1306 for (i = 0; i < nfakewriters; i++) 1331 for (i = 0; i < nfakewriters; i++)
1307 if (fakewriter_tasks[i]) 1332 if (fakewriter_tasks[i])
1308 set_cpus_allowed_ptr(fakewriter_tasks[i], 1333 set_cpus_allowed_ptr(fakewriter_tasks[i],
1309 shuffle_tmp_mask); 1334 shuffle_tmp_mask);
1310 } 1335 }
1311
1312 if (writer_task) 1336 if (writer_task)
1313 set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask); 1337 set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask);
1314
1315 if (stats_task) 1338 if (stats_task)
1316 set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask); 1339 set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask);
1340 if (stutter_task)
1341 set_cpus_allowed_ptr(stutter_task, shuffle_tmp_mask);
1342 if (fqs_task)
1343 set_cpus_allowed_ptr(fqs_task, shuffle_tmp_mask);
1344 if (shutdown_task)
1345 set_cpus_allowed_ptr(shutdown_task, shuffle_tmp_mask);
1346#ifdef CONFIG_HOTPLUG_CPU
1347 if (onoff_task)
1348 set_cpus_allowed_ptr(onoff_task, shuffle_tmp_mask);
1349#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1350 if (stall_task)
1351 set_cpus_allowed_ptr(stall_task, shuffle_tmp_mask);
1352 if (barrier_cbs_tasks)
1353 for (i = 0; i < n_barrier_cbs; i++)
1354 if (barrier_cbs_tasks[i])
1355 set_cpus_allowed_ptr(barrier_cbs_tasks[i],
1356 shuffle_tmp_mask);
1357 if (barrier_task)
1358 set_cpus_allowed_ptr(barrier_task, shuffle_tmp_mask);
1317 1359
1318 if (rcu_idle_cpu == -1) 1360 if (rcu_idle_cpu == -1)
1319 rcu_idle_cpu = num_online_cpus() - 1; 1361 rcu_idle_cpu = num_online_cpus() - 1;
@@ -1749,7 +1791,7 @@ static int rcu_torture_barrier_init(void)
1749 barrier_cbs_wq = 1791 barrier_cbs_wq =
1750 kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]), 1792 kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
1751 GFP_KERNEL); 1793 GFP_KERNEL);
1752 if (barrier_cbs_tasks == NULL || barrier_cbs_wq == 0) 1794 if (barrier_cbs_tasks == NULL || !barrier_cbs_wq)
1753 return -ENOMEM; 1795 return -ENOMEM;
1754 for (i = 0; i < n_barrier_cbs; i++) { 1796 for (i = 0; i < n_barrier_cbs; i++) {
1755 init_waitqueue_head(&barrier_cbs_wq[i]); 1797 init_waitqueue_head(&barrier_cbs_wq[i]);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e441b77b614e..5b8ad827fd86 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -105,7 +105,7 @@ int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
105 * The rcu_scheduler_active variable transitions from zero to one just 105 * The rcu_scheduler_active variable transitions from zero to one just
106 * before the first task is spawned. So when this variable is zero, RCU 106 * before the first task is spawned. So when this variable is zero, RCU
107 * can assume that there is but one task, allowing RCU to (for example) 107 * can assume that there is but one task, allowing RCU to (for example)
108 * optimized synchronize_sched() to a simple barrier(). When this variable 108 * optimize synchronize_sched() to a simple barrier(). When this variable
109 * is one, RCU must actually do all the hard work required to detect real 109 * is one, RCU must actually do all the hard work required to detect real
110 * grace periods. This variable is also used to suppress boot-time false 110 * grace periods. This variable is also used to suppress boot-time false
111 * positives from lockdep-RCU error checking. 111 * positives from lockdep-RCU error checking.
@@ -217,12 +217,6 @@ module_param(blimit, long, 0444);
217module_param(qhimark, long, 0444); 217module_param(qhimark, long, 0444);
218module_param(qlowmark, long, 0444); 218module_param(qlowmark, long, 0444);
219 219
220int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
221int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
222
223module_param(rcu_cpu_stall_suppress, int, 0644);
224module_param(rcu_cpu_stall_timeout, int, 0644);
225
226static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS; 220static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS;
227static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; 221static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
228 222
@@ -305,17 +299,27 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
305} 299}
306 300
307/* 301/*
308 * Does the current CPU require a yet-as-unscheduled grace period? 302 * Does the current CPU require a not-yet-started grace period?
303 * The caller must have disabled interrupts to prevent races with
304 * normal callback registry.
309 */ 305 */
310static int 306static int
311cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 307cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
312{ 308{
313 struct rcu_head **ntp; 309 int i;
314 310
315 ntp = rdp->nxttail[RCU_DONE_TAIL + 311 if (rcu_gp_in_progress(rsp))
316 (ACCESS_ONCE(rsp->completed) != rdp->completed)]; 312 return 0; /* No, a grace period is already in progress. */
317 return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp && 313 if (!rdp->nxttail[RCU_NEXT_TAIL])
318 !rcu_gp_in_progress(rsp); 314 return 0; /* No, this is a no-CBs (or offline) CPU. */
315 if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
316 return 1; /* Yes, this CPU has newly registered callbacks. */
317 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
318 if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
319 ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
320 rdp->nxtcompleted[i]))
321 return 1; /* Yes, CBs for future grace period. */
322 return 0; /* No grace period needed. */
319} 323}
320 324
321/* 325/*
@@ -336,7 +340,7 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
336static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, 340static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
337 bool user) 341 bool user)
338{ 342{
339 trace_rcu_dyntick("Start", oldval, 0); 343 trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting);
340 if (!user && !is_idle_task(current)) { 344 if (!user && !is_idle_task(current)) {
341 struct task_struct *idle = idle_task(smp_processor_id()); 345 struct task_struct *idle = idle_task(smp_processor_id());
342 346
@@ -727,7 +731,7 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
727 * interrupt from idle, return true. The caller must have at least 731 * interrupt from idle, return true. The caller must have at least
728 * disabled preemption. 732 * disabled preemption.
729 */ 733 */
730int rcu_is_cpu_rrupt_from_idle(void) 734static int rcu_is_cpu_rrupt_from_idle(void)
731{ 735{
732 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; 736 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
733} 737}
@@ -793,28 +797,10 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
793 return 0; 797 return 0;
794} 798}
795 799
796static int jiffies_till_stall_check(void)
797{
798 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
799
800 /*
801 * Limit check must be consistent with the Kconfig limits
802 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
803 */
804 if (till_stall_check < 3) {
805 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
806 till_stall_check = 3;
807 } else if (till_stall_check > 300) {
808 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
809 till_stall_check = 300;
810 }
811 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
812}
813
814static void record_gp_stall_check_time(struct rcu_state *rsp) 800static void record_gp_stall_check_time(struct rcu_state *rsp)
815{ 801{
816 rsp->gp_start = jiffies; 802 rsp->gp_start = jiffies;
817 rsp->jiffies_stall = jiffies + jiffies_till_stall_check(); 803 rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
818} 804}
819 805
820/* 806/*
@@ -857,7 +843,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
857 raw_spin_unlock_irqrestore(&rnp->lock, flags); 843 raw_spin_unlock_irqrestore(&rnp->lock, flags);
858 return; 844 return;
859 } 845 }
860 rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3; 846 rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
861 raw_spin_unlock_irqrestore(&rnp->lock, flags); 847 raw_spin_unlock_irqrestore(&rnp->lock, flags);
862 848
863 /* 849 /*
@@ -935,7 +921,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
935 raw_spin_lock_irqsave(&rnp->lock, flags); 921 raw_spin_lock_irqsave(&rnp->lock, flags);
936 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) 922 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
937 rsp->jiffies_stall = jiffies + 923 rsp->jiffies_stall = jiffies +
938 3 * jiffies_till_stall_check() + 3; 924 3 * rcu_jiffies_till_stall_check() + 3;
939 raw_spin_unlock_irqrestore(&rnp->lock, flags); 925 raw_spin_unlock_irqrestore(&rnp->lock, flags);
940 926
941 set_need_resched(); /* kick ourselves to get things going. */ 927 set_need_resched(); /* kick ourselves to get things going. */
@@ -966,12 +952,6 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
966 } 952 }
967} 953}
968 954
969static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
970{
971 rcu_cpu_stall_suppress = 1;
972 return NOTIFY_DONE;
973}
974
975/** 955/**
976 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period 956 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
977 * 957 *
@@ -989,15 +969,6 @@ void rcu_cpu_stall_reset(void)
989 rsp->jiffies_stall = jiffies + ULONG_MAX / 2; 969 rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
990} 970}
991 971
992static struct notifier_block rcu_panic_block = {
993 .notifier_call = rcu_panic,
994};
995
996static void __init check_cpu_stall_init(void)
997{
998 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
999}
1000
1001/* 972/*
1002 * Update CPU-local rcu_data state to record the newly noticed grace period. 973 * Update CPU-local rcu_data state to record the newly noticed grace period.
1003 * This is used both when we started the grace period and when we notice 974 * This is used both when we started the grace period and when we notice
@@ -1071,6 +1042,145 @@ static void init_callback_list(struct rcu_data *rdp)
1071} 1042}
1072 1043
1073/* 1044/*
1045 * Determine the value that ->completed will have at the end of the
1046 * next subsequent grace period. This is used to tag callbacks so that
1047 * a CPU can invoke callbacks in a timely fashion even if that CPU has
1048 * been dyntick-idle for an extended period with callbacks under the
1049 * influence of RCU_FAST_NO_HZ.
1050 *
1051 * The caller must hold rnp->lock with interrupts disabled.
1052 */
1053static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
1054 struct rcu_node *rnp)
1055{
1056 /*
1057 * If RCU is idle, we just wait for the next grace period.
1058 * But we can only be sure that RCU is idle if we are looking
1059 * at the root rcu_node structure -- otherwise, a new grace
1060 * period might have started, but just not yet gotten around
1061 * to initializing the current non-root rcu_node structure.
1062 */
1063 if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
1064 return rnp->completed + 1;
1065
1066 /*
1067 * Otherwise, wait for a possible partial grace period and
1068 * then the subsequent full grace period.
1069 */
1070 return rnp->completed + 2;
1071}
1072
1073/*
1074 * If there is room, assign a ->completed number to any callbacks on
1075 * this CPU that have not already been assigned. Also accelerate any
1076 * callbacks that were previously assigned a ->completed number that has
1077 * since proven to be too conservative, which can happen if callbacks get
1078 * assigned a ->completed number while RCU is idle, but with reference to
1079 * a non-root rcu_node structure. This function is idempotent, so it does
1080 * not hurt to call it repeatedly.
1081 *
1082 * The caller must hold rnp->lock with interrupts disabled.
1083 */
1084static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1085 struct rcu_data *rdp)
1086{
1087 unsigned long c;
1088 int i;
1089
1090 /* If the CPU has no callbacks, nothing to do. */
1091 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1092 return;
1093
1094 /*
1095 * Starting from the sublist containing the callbacks most
1096 * recently assigned a ->completed number and working down, find the
1097 * first sublist that is not assignable to an upcoming grace period.
1098 * Such a sublist has something in it (first two tests) and has
1099 * a ->completed number assigned that will complete sooner than
1100 * the ->completed number for newly arrived callbacks (last test).
1101 *
1102 * The key point is that any later sublist can be assigned the
1103 * same ->completed number as the newly arrived callbacks, which
1104 * means that the callbacks in any of these later sublist can be
1105 * grouped into a single sublist, whether or not they have already
1106 * been assigned a ->completed number.
1107 */
1108 c = rcu_cbs_completed(rsp, rnp);
1109 for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
1110 if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
1111 !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
1112 break;
1113
1114 /*
1115 * If there are no sublist for unassigned callbacks, leave.
1116 * At the same time, advance "i" one sublist, so that "i" will
1117 * index into the sublist where all the remaining callbacks should
1118 * be grouped into.
1119 */
1120 if (++i >= RCU_NEXT_TAIL)
1121 return;
1122
1123 /*
1124 * Assign all subsequent callbacks' ->completed number to the next
1125 * full grace period and group them all in the sublist initially
1126 * indexed by "i".
1127 */
1128 for (; i <= RCU_NEXT_TAIL; i++) {
1129 rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
1130 rdp->nxtcompleted[i] = c;
1131 }
1132
1133 /* Trace depending on how much we were able to accelerate. */
1134 if (!*rdp->nxttail[RCU_WAIT_TAIL])
1135 trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB");
1136 else
1137 trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB");
1138}
1139
1140/*
1141 * Move any callbacks whose grace period has completed to the
1142 * RCU_DONE_TAIL sublist, then compact the remaining sublists and
1143 * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL
1144 * sublist. This function is idempotent, so it does not hurt to
1145 * invoke it repeatedly. As long as it is not invoked -too- often...
1146 *
1147 * The caller must hold rnp->lock with interrupts disabled.
1148 */
1149static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1150 struct rcu_data *rdp)
1151{
1152 int i, j;
1153
1154 /* If the CPU has no callbacks, nothing to do. */
1155 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1156 return;
1157
1158 /*
1159 * Find all callbacks whose ->completed numbers indicate that they
1160 * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
1161 */
1162 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
1163 if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
1164 break;
1165 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
1166 }
1167 /* Clean up any sublist tail pointers that were misordered above. */
1168 for (j = RCU_WAIT_TAIL; j < i; j++)
1169 rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
1170
1171 /* Copy down callbacks to fill in empty sublists. */
1172 for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
1173 if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
1174 break;
1175 rdp->nxttail[j] = rdp->nxttail[i];
1176 rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
1177 }
1178
1179 /* Classify any remaining callbacks. */
1180 rcu_accelerate_cbs(rsp, rnp, rdp);
1181}
1182
1183/*
1074 * Advance this CPU's callbacks, but only if the current grace period 1184 * Advance this CPU's callbacks, but only if the current grace period
1075 * has ended. This may be called only from the CPU to whom the rdp 1185 * has ended. This may be called only from the CPU to whom the rdp
1076 * belongs. In addition, the corresponding leaf rcu_node structure's 1186 * belongs. In addition, the corresponding leaf rcu_node structure's
@@ -1080,12 +1190,15 @@ static void
1080__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 1190__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1081{ 1191{
1082 /* Did another grace period end? */ 1192 /* Did another grace period end? */
1083 if (rdp->completed != rnp->completed) { 1193 if (rdp->completed == rnp->completed) {
1084 1194
1085 /* Advance callbacks. No harm if list empty. */ 1195 /* No, so just accelerate recent callbacks. */
1086 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; 1196 rcu_accelerate_cbs(rsp, rnp, rdp);
1087 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; 1197
1088 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1198 } else {
1199
1200 /* Advance callbacks. */
1201 rcu_advance_cbs(rsp, rnp, rdp);
1089 1202
1090 /* Remember that we saw this grace-period completion. */ 1203 /* Remember that we saw this grace-period completion. */
1091 rdp->completed = rnp->completed; 1204 rdp->completed = rnp->completed;
@@ -1392,17 +1505,10 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
1392 /* 1505 /*
1393 * Because there is no grace period in progress right now, 1506 * Because there is no grace period in progress right now,
1394 * any callbacks we have up to this point will be satisfied 1507 * any callbacks we have up to this point will be satisfied
1395 * by the next grace period. So promote all callbacks to be 1508 * by the next grace period. So this is a good place to
1396 * handled after the end of the next grace period. If the 1509 * assign a grace period number to recently posted callbacks.
1397 * CPU is not yet aware of the end of the previous grace period,
1398 * we need to allow for the callback advancement that will
1399 * occur when it does become aware. Deadlock prevents us from
1400 * making it aware at this point: We cannot acquire a leaf
1401 * rcu_node ->lock while holding the root rcu_node ->lock.
1402 */ 1510 */
1403 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1511 rcu_accelerate_cbs(rsp, rnp, rdp);
1404 if (rdp->completed == rsp->completed)
1405 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1406 1512
1407 rsp->gp_flags = RCU_GP_FLAG_INIT; 1513 rsp->gp_flags = RCU_GP_FLAG_INIT;
1408 raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ 1514 raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
@@ -1527,7 +1633,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1527 * This GP can't end until cpu checks in, so all of our 1633 * This GP can't end until cpu checks in, so all of our
1528 * callbacks can be processed during the next GP. 1634 * callbacks can be processed during the next GP.
1529 */ 1635 */
1530 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1636 rcu_accelerate_cbs(rsp, rnp, rdp);
1531 1637
1532 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ 1638 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
1533 } 1639 }
@@ -1779,7 +1885,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1779 long bl, count, count_lazy; 1885 long bl, count, count_lazy;
1780 int i; 1886 int i;
1781 1887
1782 /* If no callbacks are ready, just return.*/ 1888 /* If no callbacks are ready, just return. */
1783 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 1889 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
1784 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); 1890 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
1785 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), 1891 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
@@ -2008,19 +2114,19 @@ __rcu_process_callbacks(struct rcu_state *rsp)
2008 2114
2009 WARN_ON_ONCE(rdp->beenonline == 0); 2115 WARN_ON_ONCE(rdp->beenonline == 0);
2010 2116
2011 /* 2117 /* Handle the end of a grace period that some other CPU ended. */
2012 * Advance callbacks in response to end of earlier grace
2013 * period that some other CPU ended.
2014 */
2015 rcu_process_gp_end(rsp, rdp); 2118 rcu_process_gp_end(rsp, rdp);
2016 2119
2017 /* Update RCU state based on any recent quiescent states. */ 2120 /* Update RCU state based on any recent quiescent states. */
2018 rcu_check_quiescent_state(rsp, rdp); 2121 rcu_check_quiescent_state(rsp, rdp);
2019 2122
2020 /* Does this CPU require a not-yet-started grace period? */ 2123 /* Does this CPU require a not-yet-started grace period? */
2124 local_irq_save(flags);
2021 if (cpu_needs_another_gp(rsp, rdp)) { 2125 if (cpu_needs_another_gp(rsp, rdp)) {
2022 raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); 2126 raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
2023 rcu_start_gp(rsp, flags); /* releases above lock */ 2127 rcu_start_gp(rsp, flags); /* releases above lock */
2128 } else {
2129 local_irq_restore(flags);
2024 } 2130 }
2025 2131
2026 /* If there are callbacks ready, invoke them. */ 2132 /* If there are callbacks ready, invoke them. */
@@ -2719,9 +2825,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2719 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2825 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2720 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 2826 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
2721 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 2827 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
2722#ifdef CONFIG_RCU_USER_QS
2723 WARN_ON_ONCE(rdp->dynticks->in_user);
2724#endif
2725 rdp->cpu = cpu; 2828 rdp->cpu = cpu;
2726 rdp->rsp = rsp; 2829 rdp->rsp = rsp;
2727 rcu_boot_init_nocb_percpu_data(rdp); 2830 rcu_boot_init_nocb_percpu_data(rdp);
@@ -2938,6 +3041,10 @@ static void __init rcu_init_one(struct rcu_state *rsp,
2938 3041
2939 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ 3042 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
2940 3043
3044 /* Silence gcc 4.8 warning about array index out of range. */
3045 if (rcu_num_lvls > RCU_NUM_LVLS)
3046 panic("rcu_init_one: rcu_num_lvls overflow");
3047
2941 /* Initialize the level-tracking arrays. */ 3048 /* Initialize the level-tracking arrays. */
2942 3049
2943 for (i = 0; i < rcu_num_lvls; i++) 3050 for (i = 0; i < rcu_num_lvls; i++)
@@ -3074,7 +3181,6 @@ void __init rcu_init(void)
3074 cpu_notifier(rcu_cpu_notify, 0); 3181 cpu_notifier(rcu_cpu_notify, 0);
3075 for_each_online_cpu(cpu) 3182 for_each_online_cpu(cpu)
3076 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); 3183 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
3077 check_cpu_stall_init();
3078} 3184}
3079 3185
3080#include "rcutree_plugin.h" 3186#include "rcutree_plugin.h"
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 4b69291b093d..c896b5045d9d 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -102,10 +102,6 @@ struct rcu_dynticks {
102 /* idle-period nonlazy_posted snapshot. */ 102 /* idle-period nonlazy_posted snapshot. */
103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ 103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
104#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 104#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
105#ifdef CONFIG_RCU_USER_QS
106 bool ignore_user_qs; /* Treat userspace as extended QS or not */
107 bool in_user; /* Is the CPU in userland from RCU POV? */
108#endif
109}; 105};
110 106
111/* RCU's kthread states for tracing. */ 107/* RCU's kthread states for tracing. */
@@ -282,6 +278,8 @@ struct rcu_data {
282 */ 278 */
283 struct rcu_head *nxtlist; 279 struct rcu_head *nxtlist;
284 struct rcu_head **nxttail[RCU_NEXT_SIZE]; 280 struct rcu_head **nxttail[RCU_NEXT_SIZE];
281 unsigned long nxtcompleted[RCU_NEXT_SIZE];
282 /* grace periods for sublists. */
285 long qlen_lazy; /* # of lazy queued callbacks */ 283 long qlen_lazy; /* # of lazy queued callbacks */
286 long qlen; /* # of queued callbacks, incl lazy */ 284 long qlen; /* # of queued callbacks, incl lazy */
287 long qlen_last_fqs_check; 285 long qlen_last_fqs_check;
@@ -343,11 +341,6 @@ struct rcu_data {
343 341
344#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 342#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
345 343
346#ifdef CONFIG_PROVE_RCU
347#define RCU_STALL_DELAY_DELTA (5 * HZ)
348#else
349#define RCU_STALL_DELAY_DELTA 0
350#endif
351#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ 344#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
352 /* to take at least one */ 345 /* to take at least one */
353 /* scheduling clock irq */ 346 /* scheduling clock irq */
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 16502d3a71c8..13b243a323fa 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -17,6 +17,7 @@
17 * See rt.c in preempt-rt for proper credits and further information 17 * See rt.c in preempt-rt for proper credits and further information
18 */ 18 */
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/sched/rt.h>
20#include <linux/delay.h> 21#include <linux/delay.h>
21#include <linux/export.h> 22#include <linux/export.h>
22#include <linux/spinlock.h> 23#include <linux/spinlock.h>
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 98ec49475460..7890b10084a7 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -10,6 +10,7 @@
10#include <linux/kthread.h> 10#include <linux/kthread.h>
11#include <linux/export.h> 11#include <linux/export.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/sched/rt.h>
13#include <linux/spinlock.h> 14#include <linux/spinlock.h>
14#include <linux/timer.h> 15#include <linux/timer.h>
15#include <linux/freezer.h> 16#include <linux/freezer.h>
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a242e691c993..1e09308bf2a1 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -13,6 +13,7 @@
13#include <linux/spinlock.h> 13#include <linux/spinlock.h>
14#include <linux/export.h> 14#include <linux/export.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/sched/rt.h>
16#include <linux/timer.h> 17#include <linux/timer.h>
17 18
18#include "rtmutex_common.h" 19#include "rtmutex_common.h"
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 26058d0bebba..4a88f1d51563 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4371,7 +4371,7 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
4371 struct task_struct *curr = current; 4371 struct task_struct *curr = current;
4372 struct rq *rq, *p_rq; 4372 struct rq *rq, *p_rq;
4373 unsigned long flags; 4373 unsigned long flags;
4374 bool yielded = 0; 4374 int yielded = 0;
4375 4375
4376 local_irq_save(flags); 4376 local_irq_save(flags);
4377 rq = this_rq(); 4377 rq = this_rq();
@@ -4667,6 +4667,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
4667 */ 4667 */
4668 idle->sched_class = &idle_sched_class; 4668 idle->sched_class = &idle_sched_class;
4669 ftrace_graph_init_idle_task(idle, cpu); 4669 ftrace_graph_init_idle_task(idle, cpu);
4670 vtime_init_idle(idle);
4670#if defined(CONFIG_SMP) 4671#if defined(CONFIG_SMP)
4671 sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); 4672 sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
4672#endif 4673#endif
@@ -7508,6 +7509,25 @@ static int sched_rt_global_constraints(void)
7508} 7509}
7509#endif /* CONFIG_RT_GROUP_SCHED */ 7510#endif /* CONFIG_RT_GROUP_SCHED */
7510 7511
7512int sched_rr_handler(struct ctl_table *table, int write,
7513 void __user *buffer, size_t *lenp,
7514 loff_t *ppos)
7515{
7516 int ret;
7517 static DEFINE_MUTEX(mutex);
7518
7519 mutex_lock(&mutex);
7520 ret = proc_dointvec(table, write, buffer, lenp, ppos);
7521 /* make sure that internally we keep jiffies */
7522 /* also, writing zero resets timeslice to default */
7523 if (!ret && write) {
7524 sched_rr_timeslice = sched_rr_timeslice <= 0 ?
7525 RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
7526 }
7527 mutex_unlock(&mutex);
7528 return ret;
7529}
7530
7511int sched_rt_handler(struct ctl_table *table, int write, 7531int sched_rt_handler(struct ctl_table *table, int write,
7512 void __user *buffer, size_t *lenp, 7532 void __user *buffer, size_t *lenp,
7513 loff_t *ppos) 7533 loff_t *ppos)
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 23aa789c53ee..1095e878a46f 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -28,6 +28,8 @@
28 */ 28 */
29 29
30#include <linux/gfp.h> 30#include <linux/gfp.h>
31#include <linux/sched.h>
32#include <linux/sched/rt.h>
31#include "cpupri.h" 33#include "cpupri.h"
32 34
33/* Convert between a 140 based task->prio, and our 102 based cpupri */ 35/* Convert between a 140 based task->prio, and our 102 based cpupri */
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 293b202fcf79..9857329ed280 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -3,6 +3,7 @@
3#include <linux/tsacct_kern.h> 3#include <linux/tsacct_kern.h>
4#include <linux/kernel_stat.h> 4#include <linux/kernel_stat.h>
5#include <linux/static_key.h> 5#include <linux/static_key.h>
6#include <linux/context_tracking.h>
6#include "sched.h" 7#include "sched.h"
7 8
8 9
@@ -163,7 +164,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
163 task_group_account_field(p, index, (__force u64) cputime); 164 task_group_account_field(p, index, (__force u64) cputime);
164 165
165 /* Account for user time used */ 166 /* Account for user time used */
166 acct_update_integrals(p); 167 acct_account_cputime(p);
167} 168}
168 169
169/* 170/*
@@ -213,7 +214,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
213 task_group_account_field(p, index, (__force u64) cputime); 214 task_group_account_field(p, index, (__force u64) cputime);
214 215
215 /* Account for system time used */ 216 /* Account for system time used */
216 acct_update_integrals(p); 217 acct_account_cputime(p);
217} 218}
218 219
219/* 220/*
@@ -295,6 +296,7 @@ static __always_inline bool steal_account_process_tick(void)
295void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) 296void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
296{ 297{
297 struct signal_struct *sig = tsk->signal; 298 struct signal_struct *sig = tsk->signal;
299 cputime_t utime, stime;
298 struct task_struct *t; 300 struct task_struct *t;
299 301
300 times->utime = sig->utime; 302 times->utime = sig->utime;
@@ -308,16 +310,15 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
308 310
309 t = tsk; 311 t = tsk;
310 do { 312 do {
311 times->utime += t->utime; 313 task_cputime(tsk, &utime, &stime);
312 times->stime += t->stime; 314 times->utime += utime;
315 times->stime += stime;
313 times->sum_exec_runtime += task_sched_runtime(t); 316 times->sum_exec_runtime += task_sched_runtime(t);
314 } while_each_thread(tsk, t); 317 } while_each_thread(tsk, t);
315out: 318out:
316 rcu_read_unlock(); 319 rcu_read_unlock();
317} 320}
318 321
319#ifndef CONFIG_VIRT_CPU_ACCOUNTING
320
321#ifdef CONFIG_IRQ_TIME_ACCOUNTING 322#ifdef CONFIG_IRQ_TIME_ACCOUNTING
322/* 323/*
323 * Account a tick to a process and cpustat 324 * Account a tick to a process and cpustat
@@ -382,11 +383,12 @@ static void irqtime_account_idle_ticks(int ticks)
382 irqtime_account_process_tick(current, 0, rq); 383 irqtime_account_process_tick(current, 0, rq);
383} 384}
384#else /* CONFIG_IRQ_TIME_ACCOUNTING */ 385#else /* CONFIG_IRQ_TIME_ACCOUNTING */
385static void irqtime_account_idle_ticks(int ticks) {} 386static inline void irqtime_account_idle_ticks(int ticks) {}
386static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 387static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
387 struct rq *rq) {} 388 struct rq *rq) {}
388#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 389#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
389 390
391#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
390/* 392/*
391 * Account a single tick of cpu time. 393 * Account a single tick of cpu time.
392 * @p: the process that the cpu time gets accounted to 394 * @p: the process that the cpu time gets accounted to
@@ -397,6 +399,9 @@ void account_process_tick(struct task_struct *p, int user_tick)
397 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 399 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
398 struct rq *rq = this_rq(); 400 struct rq *rq = this_rq();
399 401
402 if (vtime_accounting_enabled())
403 return;
404
400 if (sched_clock_irqtime) { 405 if (sched_clock_irqtime) {
401 irqtime_account_process_tick(p, user_tick, rq); 406 irqtime_account_process_tick(p, user_tick, rq);
402 return; 407 return;
@@ -438,8 +443,7 @@ void account_idle_ticks(unsigned long ticks)
438 443
439 account_idle_time(jiffies_to_cputime(ticks)); 444 account_idle_time(jiffies_to_cputime(ticks));
440} 445}
441 446#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
442#endif
443 447
444/* 448/*
445 * Use precise platform statistics if available: 449 * Use precise platform statistics if available:
@@ -461,25 +465,20 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
461 *st = cputime.stime; 465 *st = cputime.stime;
462} 466}
463 467
464void vtime_account_system_irqsafe(struct task_struct *tsk)
465{
466 unsigned long flags;
467
468 local_irq_save(flags);
469 vtime_account_system(tsk);
470 local_irq_restore(flags);
471}
472EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe);
473
474#ifndef __ARCH_HAS_VTIME_TASK_SWITCH 468#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
475void vtime_task_switch(struct task_struct *prev) 469void vtime_task_switch(struct task_struct *prev)
476{ 470{
471 if (!vtime_accounting_enabled())
472 return;
473
477 if (is_idle_task(prev)) 474 if (is_idle_task(prev))
478 vtime_account_idle(prev); 475 vtime_account_idle(prev);
479 else 476 else
480 vtime_account_system(prev); 477 vtime_account_system(prev);
481 478
479#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
482 vtime_account_user(prev); 480 vtime_account_user(prev);
481#endif
483 arch_vtime_task_switch(prev); 482 arch_vtime_task_switch(prev);
484} 483}
485#endif 484#endif
@@ -493,27 +492,40 @@ void vtime_task_switch(struct task_struct *prev)
493 * vtime_account(). 492 * vtime_account().
494 */ 493 */
495#ifndef __ARCH_HAS_VTIME_ACCOUNT 494#ifndef __ARCH_HAS_VTIME_ACCOUNT
496void vtime_account(struct task_struct *tsk) 495void vtime_account_irq_enter(struct task_struct *tsk)
497{ 496{
498 if (in_interrupt() || !is_idle_task(tsk)) 497 if (!vtime_accounting_enabled())
499 vtime_account_system(tsk); 498 return;
500 else 499
501 vtime_account_idle(tsk); 500 if (!in_interrupt()) {
501 /*
502 * If we interrupted user, context_tracking_in_user()
503 * is 1 because the context tracking don't hook
504 * on irq entry/exit. This way we know if
505 * we need to flush user time on kernel entry.
506 */
507 if (context_tracking_in_user()) {
508 vtime_account_user(tsk);
509 return;
510 }
511
512 if (is_idle_task(tsk)) {
513 vtime_account_idle(tsk);
514 return;
515 }
516 }
517 vtime_account_system(tsk);
502} 518}
503EXPORT_SYMBOL_GPL(vtime_account); 519EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
504#endif /* __ARCH_HAS_VTIME_ACCOUNT */ 520#endif /* __ARCH_HAS_VTIME_ACCOUNT */
505 521
506#else 522#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
507
508#ifndef nsecs_to_cputime
509# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
510#endif
511 523
512static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) 524static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total)
513{ 525{
514 u64 temp = (__force u64) rtime; 526 u64 temp = (__force u64) rtime;
515 527
516 temp *= (__force u64) utime; 528 temp *= (__force u64) stime;
517 529
518 if (sizeof(cputime_t) == 4) 530 if (sizeof(cputime_t) == 4)
519 temp = div_u64(temp, (__force u32) total); 531 temp = div_u64(temp, (__force u32) total);
@@ -531,10 +543,10 @@ static void cputime_adjust(struct task_cputime *curr,
531 struct cputime *prev, 543 struct cputime *prev,
532 cputime_t *ut, cputime_t *st) 544 cputime_t *ut, cputime_t *st)
533{ 545{
534 cputime_t rtime, utime, total; 546 cputime_t rtime, stime, total;
535 547
536 utime = curr->utime; 548 stime = curr->stime;
537 total = utime + curr->stime; 549 total = stime + curr->utime;
538 550
539 /* 551 /*
540 * Tick based cputime accounting depend on random scheduling 552 * Tick based cputime accounting depend on random scheduling
@@ -549,17 +561,17 @@ static void cputime_adjust(struct task_cputime *curr,
549 rtime = nsecs_to_cputime(curr->sum_exec_runtime); 561 rtime = nsecs_to_cputime(curr->sum_exec_runtime);
550 562
551 if (total) 563 if (total)
552 utime = scale_utime(utime, rtime, total); 564 stime = scale_stime(stime, rtime, total);
553 else 565 else
554 utime = rtime; 566 stime = rtime;
555 567
556 /* 568 /*
557 * If the tick based count grows faster than the scheduler one, 569 * If the tick based count grows faster than the scheduler one,
558 * the result of the scaling may go backward. 570 * the result of the scaling may go backward.
559 * Let's enforce monotonicity. 571 * Let's enforce monotonicity.
560 */ 572 */
561 prev->utime = max(prev->utime, utime); 573 prev->stime = max(prev->stime, stime);
562 prev->stime = max(prev->stime, rtime - prev->utime); 574 prev->utime = max(prev->utime, rtime - prev->stime);
563 575
564 *ut = prev->utime; 576 *ut = prev->utime;
565 *st = prev->stime; 577 *st = prev->stime;
@@ -568,11 +580,10 @@ static void cputime_adjust(struct task_cputime *curr,
568void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 580void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
569{ 581{
570 struct task_cputime cputime = { 582 struct task_cputime cputime = {
571 .utime = p->utime,
572 .stime = p->stime,
573 .sum_exec_runtime = p->se.sum_exec_runtime, 583 .sum_exec_runtime = p->se.sum_exec_runtime,
574 }; 584 };
575 585
586 task_cputime(p, &cputime.utime, &cputime.stime);
576 cputime_adjust(&cputime, &p->prev_cputime, ut, st); 587 cputime_adjust(&cputime, &p->prev_cputime, ut, st);
577} 588}
578 589
@@ -586,4 +597,221 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
586 thread_group_cputime(p, &cputime); 597 thread_group_cputime(p, &cputime);
587 cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); 598 cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
588} 599}
589#endif 600#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
601
602#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
603static unsigned long long vtime_delta(struct task_struct *tsk)
604{
605 unsigned long long clock;
606
607 clock = sched_clock();
608 if (clock < tsk->vtime_snap)
609 return 0;
610
611 return clock - tsk->vtime_snap;
612}
613
614static cputime_t get_vtime_delta(struct task_struct *tsk)
615{
616 unsigned long long delta = vtime_delta(tsk);
617
618 WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
619 tsk->vtime_snap += delta;
620
621 /* CHECKME: always safe to convert nsecs to cputime? */
622 return nsecs_to_cputime(delta);
623}
624
625static void __vtime_account_system(struct task_struct *tsk)
626{
627 cputime_t delta_cpu = get_vtime_delta(tsk);
628
629 account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
630}
631
632void vtime_account_system(struct task_struct *tsk)
633{
634 if (!vtime_accounting_enabled())
635 return;
636
637 write_seqlock(&tsk->vtime_seqlock);
638 __vtime_account_system(tsk);
639 write_sequnlock(&tsk->vtime_seqlock);
640}
641
642void vtime_account_irq_exit(struct task_struct *tsk)
643{
644 if (!vtime_accounting_enabled())
645 return;
646
647 write_seqlock(&tsk->vtime_seqlock);
648 if (context_tracking_in_user())
649 tsk->vtime_snap_whence = VTIME_USER;
650 __vtime_account_system(tsk);
651 write_sequnlock(&tsk->vtime_seqlock);
652}
653
654void vtime_account_user(struct task_struct *tsk)
655{
656 cputime_t delta_cpu;
657
658 if (!vtime_accounting_enabled())
659 return;
660
661 delta_cpu = get_vtime_delta(tsk);
662
663 write_seqlock(&tsk->vtime_seqlock);
664 tsk->vtime_snap_whence = VTIME_SYS;
665 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
666 write_sequnlock(&tsk->vtime_seqlock);
667}
668
669void vtime_user_enter(struct task_struct *tsk)
670{
671 if (!vtime_accounting_enabled())
672 return;
673
674 write_seqlock(&tsk->vtime_seqlock);
675 tsk->vtime_snap_whence = VTIME_USER;
676 __vtime_account_system(tsk);
677 write_sequnlock(&tsk->vtime_seqlock);
678}
679
680void vtime_guest_enter(struct task_struct *tsk)
681{
682 write_seqlock(&tsk->vtime_seqlock);
683 __vtime_account_system(tsk);
684 current->flags |= PF_VCPU;
685 write_sequnlock(&tsk->vtime_seqlock);
686}
687
688void vtime_guest_exit(struct task_struct *tsk)
689{
690 write_seqlock(&tsk->vtime_seqlock);
691 __vtime_account_system(tsk);
692 current->flags &= ~PF_VCPU;
693 write_sequnlock(&tsk->vtime_seqlock);
694}
695
696void vtime_account_idle(struct task_struct *tsk)
697{
698 cputime_t delta_cpu = get_vtime_delta(tsk);
699
700 account_idle_time(delta_cpu);
701}
702
703bool vtime_accounting_enabled(void)
704{
705 return context_tracking_active();
706}
707
708void arch_vtime_task_switch(struct task_struct *prev)
709{
710 write_seqlock(&prev->vtime_seqlock);
711 prev->vtime_snap_whence = VTIME_SLEEPING;
712 write_sequnlock(&prev->vtime_seqlock);
713
714 write_seqlock(&current->vtime_seqlock);
715 current->vtime_snap_whence = VTIME_SYS;
716 current->vtime_snap = sched_clock();
717 write_sequnlock(&current->vtime_seqlock);
718}
719
720void vtime_init_idle(struct task_struct *t)
721{
722 unsigned long flags;
723
724 write_seqlock_irqsave(&t->vtime_seqlock, flags);
725 t->vtime_snap_whence = VTIME_SYS;
726 t->vtime_snap = sched_clock();
727 write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
728}
729
730cputime_t task_gtime(struct task_struct *t)
731{
732 unsigned int seq;
733 cputime_t gtime;
734
735 do {
736 seq = read_seqbegin(&t->vtime_seqlock);
737
738 gtime = t->gtime;
739 if (t->flags & PF_VCPU)
740 gtime += vtime_delta(t);
741
742 } while (read_seqretry(&t->vtime_seqlock, seq));
743
744 return gtime;
745}
746
747/*
748 * Fetch cputime raw values from fields of task_struct and
749 * add up the pending nohz execution time since the last
750 * cputime snapshot.
751 */
752static void
753fetch_task_cputime(struct task_struct *t,
754 cputime_t *u_dst, cputime_t *s_dst,
755 cputime_t *u_src, cputime_t *s_src,
756 cputime_t *udelta, cputime_t *sdelta)
757{
758 unsigned int seq;
759 unsigned long long delta;
760
761 do {
762 *udelta = 0;
763 *sdelta = 0;
764
765 seq = read_seqbegin(&t->vtime_seqlock);
766
767 if (u_dst)
768 *u_dst = *u_src;
769 if (s_dst)
770 *s_dst = *s_src;
771
772 /* Task is sleeping, nothing to add */
773 if (t->vtime_snap_whence == VTIME_SLEEPING ||
774 is_idle_task(t))
775 continue;
776
777 delta = vtime_delta(t);
778
779 /*
780 * Task runs either in user or kernel space, add pending nohz time to
781 * the right place.
782 */
783 if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
784 *udelta = delta;
785 } else {
786 if (t->vtime_snap_whence == VTIME_SYS)
787 *sdelta = delta;
788 }
789 } while (read_seqretry(&t->vtime_seqlock, seq));
790}
791
792
793void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
794{
795 cputime_t udelta, sdelta;
796
797 fetch_task_cputime(t, utime, stime, &t->utime,
798 &t->stime, &udelta, &sdelta);
799 if (utime)
800 *utime += udelta;
801 if (stime)
802 *stime += sdelta;
803}
804
805void task_cputime_scaled(struct task_struct *t,
806 cputime_t *utimescaled, cputime_t *stimescaled)
807{
808 cputime_t udelta, sdelta;
809
810 fetch_task_cputime(t, utimescaled, stimescaled,
811 &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
812 if (utimescaled)
813 *utimescaled += cputime_to_scaled(udelta);
814 if (stimescaled)
815 *stimescaled += cputime_to_scaled(sdelta);
816}
817#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 81fa53643409..7a33e5986fc5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1680,9 +1680,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
1680 } 1680 }
1681 1681
1682 /* ensure we never gain time by being placed backwards. */ 1682 /* ensure we never gain time by being placed backwards. */
1683 vruntime = max_vruntime(se->vruntime, vruntime); 1683 se->vruntime = max_vruntime(se->vruntime, vruntime);
1684
1685 se->vruntime = vruntime;
1686} 1684}
1687 1685
1688static void check_enqueue_throttle(struct cfs_rq *cfs_rq); 1686static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
@@ -3254,25 +3252,18 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
3254 */ 3252 */
3255static int select_idle_sibling(struct task_struct *p, int target) 3253static int select_idle_sibling(struct task_struct *p, int target)
3256{ 3254{
3257 int cpu = smp_processor_id();
3258 int prev_cpu = task_cpu(p);
3259 struct sched_domain *sd; 3255 struct sched_domain *sd;
3260 struct sched_group *sg; 3256 struct sched_group *sg;
3261 int i; 3257 int i = task_cpu(p);
3262 3258
3263 /* 3259 if (idle_cpu(target))
3264 * If the task is going to be woken-up on this cpu and if it is 3260 return target;
3265 * already idle, then it is the right target.
3266 */
3267 if (target == cpu && idle_cpu(cpu))
3268 return cpu;
3269 3261
3270 /* 3262 /*
3271 * If the task is going to be woken-up on the cpu where it previously 3263 * If the prevous cpu is cache affine and idle, don't be stupid.
3272 * ran and if it is currently idle, then it the right target.
3273 */ 3264 */
3274 if (target == prev_cpu && idle_cpu(prev_cpu)) 3265 if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
3275 return prev_cpu; 3266 return i;
3276 3267
3277 /* 3268 /*
3278 * Otherwise, iterate the domains and find an elegible idle cpu. 3269 * Otherwise, iterate the domains and find an elegible idle cpu.
@@ -3286,7 +3277,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
3286 goto next; 3277 goto next;
3287 3278
3288 for_each_cpu(i, sched_group_cpus(sg)) { 3279 for_each_cpu(i, sched_group_cpus(sg)) {
3289 if (!idle_cpu(i)) 3280 if (i == target || !idle_cpu(i))
3290 goto next; 3281 goto next;
3291 } 3282 }
3292 3283
@@ -6101,7 +6092,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
6101 * idle runqueue: 6092 * idle runqueue:
6102 */ 6093 */
6103 if (rq->cfs.load.weight) 6094 if (rq->cfs.load.weight)
6104 rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); 6095 rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se));
6105 6096
6106 return rr_interval; 6097 return rr_interval;
6107} 6098}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 4f02b2847357..127a2c4cf4ab 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -7,6 +7,8 @@
7 7
8#include <linux/slab.h> 8#include <linux/slab.h>
9 9
10int sched_rr_timeslice = RR_TIMESLICE;
11
10static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); 12static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
11 13
12struct rt_bandwidth def_rt_bandwidth; 14struct rt_bandwidth def_rt_bandwidth;
@@ -925,8 +927,8 @@ static void update_curr_rt(struct rq *rq)
925 return; 927 return;
926 928
927 delta_exec = rq->clock_task - curr->se.exec_start; 929 delta_exec = rq->clock_task - curr->se.exec_start;
928 if (unlikely((s64)delta_exec < 0)) 930 if (unlikely((s64)delta_exec <= 0))
929 delta_exec = 0; 931 return;
930 932
931 schedstat_set(curr->se.statistics.exec_max, 933 schedstat_set(curr->se.statistics.exec_max,
932 max(curr->se.statistics.exec_max, delta_exec)); 934 max(curr->se.statistics.exec_max, delta_exec));
@@ -1427,8 +1429,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1427static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 1429static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1428{ 1430{
1429 if (!task_running(rq, p) && 1431 if (!task_running(rq, p) &&
1430 (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && 1432 cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
1431 (p->nr_cpus_allowed > 1))
1432 return 1; 1433 return 1;
1433 return 0; 1434 return 0;
1434} 1435}
@@ -1889,8 +1890,11 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
1889 * we may need to handle the pulling of RT tasks 1890 * we may need to handle the pulling of RT tasks
1890 * now. 1891 * now.
1891 */ 1892 */
1892 if (p->on_rq && !rq->rt.rt_nr_running) 1893 if (!p->on_rq || rq->rt.rt_nr_running)
1893 pull_rt_task(rq); 1894 return;
1895
1896 if (pull_rt_task(rq))
1897 resched_task(rq->curr);
1894} 1898}
1895 1899
1896void init_sched_rt_class(void) 1900void init_sched_rt_class(void)
@@ -1985,7 +1989,11 @@ static void watchdog(struct rq *rq, struct task_struct *p)
1985 if (soft != RLIM_INFINITY) { 1989 if (soft != RLIM_INFINITY) {
1986 unsigned long next; 1990 unsigned long next;
1987 1991
1988 p->rt.timeout++; 1992 if (p->rt.watchdog_stamp != jiffies) {
1993 p->rt.timeout++;
1994 p->rt.watchdog_stamp = jiffies;
1995 }
1996
1989 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); 1997 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
1990 if (p->rt.timeout > next) 1998 if (p->rt.timeout > next)
1991 p->cputime_expires.sched_exp = p->se.sum_exec_runtime; 1999 p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
@@ -2010,7 +2018,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
2010 if (--p->rt.time_slice) 2018 if (--p->rt.time_slice)
2011 return; 2019 return;
2012 2020
2013 p->rt.time_slice = RR_TIMESLICE; 2021 p->rt.time_slice = sched_rr_timeslice;
2014 2022
2015 /* 2023 /*
2016 * Requeue to the end of queue if we (and all of our ancestors) are the 2024 * Requeue to the end of queue if we (and all of our ancestors) are the
@@ -2041,7 +2049,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
2041 * Time slice is 0 for SCHED_FIFO tasks 2049 * Time slice is 0 for SCHED_FIFO tasks
2042 */ 2050 */
2043 if (task->policy == SCHED_RR) 2051 if (task->policy == SCHED_RR)
2044 return RR_TIMESLICE; 2052 return sched_rr_timeslice;
2045 else 2053 else
2046 return 0; 2054 return 0;
2047} 2055}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fc886441436a..cc03cfdf469f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,5 +1,7 @@
1 1
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/sched/sysctl.h>
4#include <linux/sched/rt.h>
3#include <linux/mutex.h> 5#include <linux/mutex.h>
4#include <linux/spinlock.h> 6#include <linux/spinlock.h>
5#include <linux/stop_machine.h> 7#include <linux/stop_machine.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index 3d09cf6cde75..7f82adbad480 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1632,6 +1632,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
1632 unsigned long flags; 1632 unsigned long flags;
1633 struct sighand_struct *psig; 1633 struct sighand_struct *psig;
1634 bool autoreap = false; 1634 bool autoreap = false;
1635 cputime_t utime, stime;
1635 1636
1636 BUG_ON(sig == -1); 1637 BUG_ON(sig == -1);
1637 1638
@@ -1669,8 +1670,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
1669 task_uid(tsk)); 1670 task_uid(tsk));
1670 rcu_read_unlock(); 1671 rcu_read_unlock();
1671 1672
1672 info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime); 1673 task_cputime(tsk, &utime, &stime);
1673 info.si_stime = cputime_to_clock_t(tsk->stime + tsk->signal->stime); 1674 info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime);
1675 info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime);
1674 1676
1675 info.si_status = tsk->exit_code & 0x7f; 1677 info.si_status = tsk->exit_code & 0x7f;
1676 if (tsk->exit_code & 0x80) 1678 if (tsk->exit_code & 0x80)
@@ -1734,6 +1736,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
1734 unsigned long flags; 1736 unsigned long flags;
1735 struct task_struct *parent; 1737 struct task_struct *parent;
1736 struct sighand_struct *sighand; 1738 struct sighand_struct *sighand;
1739 cputime_t utime, stime;
1737 1740
1738 if (for_ptracer) { 1741 if (for_ptracer) {
1739 parent = tsk->parent; 1742 parent = tsk->parent;
@@ -1752,8 +1755,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
1752 info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); 1755 info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
1753 rcu_read_unlock(); 1756 rcu_read_unlock();
1754 1757
1755 info.si_utime = cputime_to_clock_t(tsk->utime); 1758 task_cputime(tsk, &utime, &stime);
1756 info.si_stime = cputime_to_clock_t(tsk->stime); 1759 info.si_utime = cputime_to_clock_t(utime);
1760 info.si_stime = cputime_to_clock_t(stime);
1757 1761
1758 info.si_code = why; 1762 info.si_code = why;
1759 switch (why) { 1763 switch (why) {
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index d6c5fc054242..d4abac261779 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -183,9 +183,10 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
183 kfree(td); 183 kfree(td);
184 return PTR_ERR(tsk); 184 return PTR_ERR(tsk);
185 } 185 }
186
187 get_task_struct(tsk); 186 get_task_struct(tsk);
188 *per_cpu_ptr(ht->store, cpu) = tsk; 187 *per_cpu_ptr(ht->store, cpu) = tsk;
188 if (ht->create)
189 ht->create(cpu);
189 return 0; 190 return 0;
190} 191}
191 192
@@ -225,7 +226,7 @@ static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
225{ 226{
226 struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); 227 struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
227 228
228 if (tsk) 229 if (tsk && !ht->selfparking)
229 kthread_park(tsk); 230 kthread_park(tsk);
230} 231}
231 232
diff --git a/kernel/softirq.c b/kernel/softirq.c
index ed567babe789..f5cc25f147a6 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void)
221 current->flags &= ~PF_MEMALLOC; 221 current->flags &= ~PF_MEMALLOC;
222 222
223 pending = local_softirq_pending(); 223 pending = local_softirq_pending();
224 vtime_account_irq_enter(current); 224 account_irq_enter_time(current);
225 225
226 __local_bh_disable((unsigned long)__builtin_return_address(0), 226 __local_bh_disable((unsigned long)__builtin_return_address(0),
227 SOFTIRQ_OFFSET); 227 SOFTIRQ_OFFSET);
@@ -272,7 +272,7 @@ restart:
272 272
273 lockdep_softirq_exit(); 273 lockdep_softirq_exit();
274 274
275 vtime_account_irq_exit(current); 275 account_irq_exit_time(current);
276 __local_bh_enable(SOFTIRQ_OFFSET); 276 __local_bh_enable(SOFTIRQ_OFFSET);
277 tsk_restore_flags(current, old_flags, PF_MEMALLOC); 277 tsk_restore_flags(current, old_flags, PF_MEMALLOC);
278} 278}
@@ -341,7 +341,7 @@ static inline void invoke_softirq(void)
341 */ 341 */
342void irq_exit(void) 342void irq_exit(void)
343{ 343{
344 vtime_account_irq_exit(current); 344 account_irq_exit_time(current);
345 trace_hardirq_exit(); 345 trace_hardirq_exit();
346 sub_preempt_count(IRQ_EXIT_OFFSET); 346 sub_preempt_count(IRQ_EXIT_OFFSET);
347 if (!in_interrupt() && local_softirq_pending()) 347 if (!in_interrupt() && local_softirq_pending())
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 2b859828cdc3..01d5ccb8bfe3 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -282,12 +282,8 @@ static int srcu_readers_active(struct srcu_struct *sp)
282 */ 282 */
283void cleanup_srcu_struct(struct srcu_struct *sp) 283void cleanup_srcu_struct(struct srcu_struct *sp)
284{ 284{
285 int sum; 285 if (WARN_ON(srcu_readers_active(sp)))
286 286 return; /* Leakage unless caller handles error. */
287 sum = srcu_readers_active(sp);
288 WARN_ON(sum); /* Leakage unless caller handles error. */
289 if (sum != 0)
290 return;
291 free_percpu(sp->per_cpu_ref); 287 free_percpu(sp->per_cpu_ref);
292 sp->per_cpu_ref = NULL; 288 sp->per_cpu_ref = NULL;
293} 289}
@@ -302,9 +298,8 @@ int __srcu_read_lock(struct srcu_struct *sp)
302{ 298{
303 int idx; 299 int idx;
304 300
301 idx = ACCESS_ONCE(sp->completed) & 0x1;
305 preempt_disable(); 302 preempt_disable();
306 idx = rcu_dereference_index_check(sp->completed,
307 rcu_read_lock_sched_held()) & 0x1;
308 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1; 303 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1;
309 smp_mb(); /* B */ /* Avoid leaking the critical section. */ 304 smp_mb(); /* B */ /* Avoid leaking the critical section. */
310 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1; 305 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1;
@@ -321,10 +316,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
321 */ 316 */
322void __srcu_read_unlock(struct srcu_struct *sp, int idx) 317void __srcu_read_unlock(struct srcu_struct *sp, int idx)
323{ 318{
324 preempt_disable();
325 smp_mb(); /* C */ /* Avoid leaking the critical section. */ 319 smp_mb(); /* C */ /* Avoid leaking the critical section. */
326 ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1; 320 this_cpu_dec(sp->per_cpu_ref->c[idx]);
327 preempt_enable();
328} 321}
329EXPORT_SYMBOL_GPL(__srcu_read_unlock); 322EXPORT_SYMBOL_GPL(__srcu_read_unlock);
330 323
@@ -423,6 +416,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
423 !lock_is_held(&rcu_sched_lock_map), 416 !lock_is_held(&rcu_sched_lock_map),
424 "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); 417 "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
425 418
419 might_sleep();
426 init_completion(&rcu.completion); 420 init_completion(&rcu.completion);
427 421
428 head->next = NULL; 422 head->next = NULL;
@@ -455,10 +449,12 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
455 * synchronize_srcu - wait for prior SRCU read-side critical-section completion 449 * synchronize_srcu - wait for prior SRCU read-side critical-section completion
456 * @sp: srcu_struct with which to synchronize. 450 * @sp: srcu_struct with which to synchronize.
457 * 451 *
458 * Flip the completed counter, and wait for the old count to drain to zero. 452 * Wait for the count to drain to zero of both indexes. To avoid the
459 * As with classic RCU, the updater must use some separate means of 453 * possible starvation of synchronize_srcu(), it waits for the count of
460 * synchronizing concurrent updates. Can block; must be called from 454 * the index=((->completed & 1) ^ 1) to drain to zero at first,
461 * process context. 455 * and then flip the completed and wait for the count of the other index.
456 *
457 * Can block; must be called from process context.
462 * 458 *
463 * Note that it is illegal to call synchronize_srcu() from the corresponding 459 * Note that it is illegal to call synchronize_srcu() from the corresponding
464 * SRCU read-side critical section; doing so will result in deadlock. 460 * SRCU read-side critical section; doing so will result in deadlock.
@@ -480,12 +476,11 @@ EXPORT_SYMBOL_GPL(synchronize_srcu);
480 * Wait for an SRCU grace period to elapse, but be more aggressive about 476 * Wait for an SRCU grace period to elapse, but be more aggressive about
481 * spinning rather than blocking when waiting. 477 * spinning rather than blocking when waiting.
482 * 478 *
483 * Note that it is illegal to call this function while holding any lock 479 * Note that it is also illegal to call synchronize_srcu_expedited()
484 * that is acquired by a CPU-hotplug notifier. It is also illegal to call 480 * from the corresponding SRCU read-side critical section;
485 * synchronize_srcu_expedited() from the corresponding SRCU read-side 481 * doing so will result in deadlock. However, it is perfectly legal
486 * critical section; doing so will result in deadlock. However, it is 482 * to call synchronize_srcu_expedited() on one srcu_struct from some
487 * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct 483 * other srcu_struct's read-side critical section, as long as
488 * from some other srcu_struct's read-side critical section, as long as
489 * the resulting graph of srcu_structs is acyclic. 484 * the resulting graph of srcu_structs is acyclic.
490 */ 485 */
491void synchronize_srcu_expedited(struct srcu_struct *sp) 486void synchronize_srcu_expedited(struct srcu_struct *sp)
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2f194e965715..95d178c62d5a 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -18,7 +18,7 @@
18#include <linux/stop_machine.h> 18#include <linux/stop_machine.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/kallsyms.h> 20#include <linux/kallsyms.h>
21 21#include <linux/smpboot.h>
22#include <linux/atomic.h> 22#include <linux/atomic.h>
23 23
24/* 24/*
@@ -37,10 +37,10 @@ struct cpu_stopper {
37 spinlock_t lock; 37 spinlock_t lock;
38 bool enabled; /* is this stopper enabled? */ 38 bool enabled; /* is this stopper enabled? */
39 struct list_head works; /* list of pending works */ 39 struct list_head works; /* list of pending works */
40 struct task_struct *thread; /* stopper thread */
41}; 40};
42 41
43static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); 42static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
43static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task);
44static bool stop_machine_initialized = false; 44static bool stop_machine_initialized = false;
45 45
46static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) 46static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
@@ -62,16 +62,18 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
62} 62}
63 63
64/* queue @work to @stopper. if offline, @work is completed immediately */ 64/* queue @work to @stopper. if offline, @work is completed immediately */
65static void cpu_stop_queue_work(struct cpu_stopper *stopper, 65static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
66 struct cpu_stop_work *work)
67{ 66{
67 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
68 struct task_struct *p = per_cpu(cpu_stopper_task, cpu);
69
68 unsigned long flags; 70 unsigned long flags;
69 71
70 spin_lock_irqsave(&stopper->lock, flags); 72 spin_lock_irqsave(&stopper->lock, flags);
71 73
72 if (stopper->enabled) { 74 if (stopper->enabled) {
73 list_add_tail(&work->list, &stopper->works); 75 list_add_tail(&work->list, &stopper->works);
74 wake_up_process(stopper->thread); 76 wake_up_process(p);
75 } else 77 } else
76 cpu_stop_signal_done(work->done, false); 78 cpu_stop_signal_done(work->done, false);
77 79
@@ -108,7 +110,7 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
108 struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done }; 110 struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
109 111
110 cpu_stop_init_done(&done, 1); 112 cpu_stop_init_done(&done, 1);
111 cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work); 113 cpu_stop_queue_work(cpu, &work);
112 wait_for_completion(&done.completion); 114 wait_for_completion(&done.completion);
113 return done.executed ? done.ret : -ENOENT; 115 return done.executed ? done.ret : -ENOENT;
114} 116}
@@ -130,7 +132,7 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
130 struct cpu_stop_work *work_buf) 132 struct cpu_stop_work *work_buf)
131{ 133{
132 *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; 134 *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
133 cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf); 135 cpu_stop_queue_work(cpu, work_buf);
134} 136}
135 137
136/* static data for stop_cpus */ 138/* static data for stop_cpus */
@@ -159,8 +161,7 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
159 */ 161 */
160 preempt_disable(); 162 preempt_disable();
161 for_each_cpu(cpu, cpumask) 163 for_each_cpu(cpu, cpumask)
162 cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), 164 cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
163 &per_cpu(stop_cpus_work, cpu));
164 preempt_enable(); 165 preempt_enable();
165} 166}
166 167
@@ -244,20 +245,25 @@ int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
244 return ret; 245 return ret;
245} 246}
246 247
247static int cpu_stopper_thread(void *data) 248static int cpu_stop_should_run(unsigned int cpu)
249{
250 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
251 unsigned long flags;
252 int run;
253
254 spin_lock_irqsave(&stopper->lock, flags);
255 run = !list_empty(&stopper->works);
256 spin_unlock_irqrestore(&stopper->lock, flags);
257 return run;
258}
259
260static void cpu_stopper_thread(unsigned int cpu)
248{ 261{
249 struct cpu_stopper *stopper = data; 262 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
250 struct cpu_stop_work *work; 263 struct cpu_stop_work *work;
251 int ret; 264 int ret;
252 265
253repeat: 266repeat:
254 set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */
255
256 if (kthread_should_stop()) {
257 __set_current_state(TASK_RUNNING);
258 return 0;
259 }
260
261 work = NULL; 267 work = NULL;
262 spin_lock_irq(&stopper->lock); 268 spin_lock_irq(&stopper->lock);
263 if (!list_empty(&stopper->works)) { 269 if (!list_empty(&stopper->works)) {
@@ -273,8 +279,6 @@ repeat:
273 struct cpu_stop_done *done = work->done; 279 struct cpu_stop_done *done = work->done;
274 char ksym_buf[KSYM_NAME_LEN] __maybe_unused; 280 char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
275 281
276 __set_current_state(TASK_RUNNING);
277
278 /* cpu stop callbacks are not allowed to sleep */ 282 /* cpu stop callbacks are not allowed to sleep */
279 preempt_disable(); 283 preempt_disable();
280 284
@@ -290,88 +294,55 @@ repeat:
290 ksym_buf), arg); 294 ksym_buf), arg);
291 295
292 cpu_stop_signal_done(done, true); 296 cpu_stop_signal_done(done, true);
293 } else 297 goto repeat;
294 schedule(); 298 }
295
296 goto repeat;
297} 299}
298 300
299extern void sched_set_stop_task(int cpu, struct task_struct *stop); 301extern void sched_set_stop_task(int cpu, struct task_struct *stop);
300 302
301/* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ 303static void cpu_stop_create(unsigned int cpu)
302static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, 304{
303 unsigned long action, void *hcpu) 305 sched_set_stop_task(cpu, per_cpu(cpu_stopper_task, cpu));
306}
307
308static void cpu_stop_park(unsigned int cpu)
304{ 309{
305 unsigned int cpu = (unsigned long)hcpu;
306 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 310 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
307 struct task_struct *p; 311 struct cpu_stop_work *work;
308 312 unsigned long flags;
309 switch (action & ~CPU_TASKS_FROZEN) {
310 case CPU_UP_PREPARE:
311 BUG_ON(stopper->thread || stopper->enabled ||
312 !list_empty(&stopper->works));
313 p = kthread_create_on_node(cpu_stopper_thread,
314 stopper,
315 cpu_to_node(cpu),
316 "migration/%d", cpu);
317 if (IS_ERR(p))
318 return notifier_from_errno(PTR_ERR(p));
319 get_task_struct(p);
320 kthread_bind(p, cpu);
321 sched_set_stop_task(cpu, p);
322 stopper->thread = p;
323 break;
324
325 case CPU_ONLINE:
326 /* strictly unnecessary, as first user will wake it */
327 wake_up_process(stopper->thread);
328 /* mark enabled */
329 spin_lock_irq(&stopper->lock);
330 stopper->enabled = true;
331 spin_unlock_irq(&stopper->lock);
332 break;
333
334#ifdef CONFIG_HOTPLUG_CPU
335 case CPU_UP_CANCELED:
336 case CPU_POST_DEAD:
337 {
338 struct cpu_stop_work *work;
339
340 sched_set_stop_task(cpu, NULL);
341 /* kill the stopper */
342 kthread_stop(stopper->thread);
343 /* drain remaining works */
344 spin_lock_irq(&stopper->lock);
345 list_for_each_entry(work, &stopper->works, list)
346 cpu_stop_signal_done(work->done, false);
347 stopper->enabled = false;
348 spin_unlock_irq(&stopper->lock);
349 /* release the stopper */
350 put_task_struct(stopper->thread);
351 stopper->thread = NULL;
352 break;
353 }
354#endif
355 }
356 313
357 return NOTIFY_OK; 314 /* drain remaining works */
315 spin_lock_irqsave(&stopper->lock, flags);
316 list_for_each_entry(work, &stopper->works, list)
317 cpu_stop_signal_done(work->done, false);
318 stopper->enabled = false;
319 spin_unlock_irqrestore(&stopper->lock, flags);
358} 320}
359 321
360/* 322static void cpu_stop_unpark(unsigned int cpu)
361 * Give it a higher priority so that cpu stopper is available to other 323{
362 * cpu notifiers. It currently shares the same priority as sched 324 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
363 * migration_notifier. 325
364 */ 326 spin_lock_irq(&stopper->lock);
365static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = { 327 stopper->enabled = true;
366 .notifier_call = cpu_stop_cpu_callback, 328 spin_unlock_irq(&stopper->lock);
367 .priority = 10, 329}
330
331static struct smp_hotplug_thread cpu_stop_threads = {
332 .store = &cpu_stopper_task,
333 .thread_should_run = cpu_stop_should_run,
334 .thread_fn = cpu_stopper_thread,
335 .thread_comm = "migration/%u",
336 .create = cpu_stop_create,
337 .setup = cpu_stop_unpark,
338 .park = cpu_stop_park,
339 .unpark = cpu_stop_unpark,
340 .selfparking = true,
368}; 341};
369 342
370static int __init cpu_stop_init(void) 343static int __init cpu_stop_init(void)
371{ 344{
372 void *bcpu = (void *)(long)smp_processor_id();
373 unsigned int cpu; 345 unsigned int cpu;
374 int err;
375 346
376 for_each_possible_cpu(cpu) { 347 for_each_possible_cpu(cpu) {
377 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 348 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
@@ -380,15 +351,8 @@ static int __init cpu_stop_init(void)
380 INIT_LIST_HEAD(&stopper->works); 351 INIT_LIST_HEAD(&stopper->works);
381 } 352 }
382 353
383 /* start one for the boot cpu */ 354 BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
384 err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE,
385 bcpu);
386 BUG_ON(err != NOTIFY_OK);
387 cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu);
388 register_cpu_notifier(&cpu_stop_cpu_notifier);
389
390 stop_machine_initialized = true; 355 stop_machine_initialized = true;
391
392 return 0; 356 return 0;
393} 357}
394early_initcall(cpu_stop_init); 358early_initcall(cpu_stop_init);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c88878db491e..4fc9be955c71 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -61,6 +61,7 @@
61#include <linux/kmod.h> 61#include <linux/kmod.h>
62#include <linux/capability.h> 62#include <linux/capability.h>
63#include <linux/binfmts.h> 63#include <linux/binfmts.h>
64#include <linux/sched/sysctl.h>
64 65
65#include <asm/uaccess.h> 66#include <asm/uaccess.h>
66#include <asm/processor.h> 67#include <asm/processor.h>
@@ -403,6 +404,13 @@ static struct ctl_table kern_table[] = {
403 .mode = 0644, 404 .mode = 0644,
404 .proc_handler = sched_rt_handler, 405 .proc_handler = sched_rt_handler,
405 }, 406 },
407 {
408 .procname = "sched_rr_timeslice_ms",
409 .data = &sched_rr_timeslice,
410 .maxlen = sizeof(int),
411 .mode = 0644,
412 .proc_handler = sched_rr_handler,
413 },
406#ifdef CONFIG_SCHED_AUTOGROUP 414#ifdef CONFIG_SCHED_AUTOGROUP
407 { 415 {
408 .procname = "sched_autogroup_enabled", 416 .procname = "sched_autogroup_enabled",
diff --git a/kernel/time.c b/kernel/time.c
index d226c6a3fd28..c2a27dd93142 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -115,6 +115,12 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
115} 115}
116 116
117/* 117/*
118 * Indicates if there is an offset between the system clock and the hardware
119 * clock/persistent clock/rtc.
120 */
121int persistent_clock_is_local;
122
123/*
118 * Adjust the time obtained from the CMOS to be UTC time instead of 124 * Adjust the time obtained from the CMOS to be UTC time instead of
119 * local time. 125 * local time.
120 * 126 *
@@ -135,6 +141,8 @@ static inline void warp_clock(void)
135 struct timespec adjust; 141 struct timespec adjust;
136 142
137 adjust = current_kernel_time(); 143 adjust = current_kernel_time();
144 if (sys_tz.tz_minuteswest != 0)
145 persistent_clock_is_local = 1;
138 adjust.tv_sec += sys_tz.tz_minuteswest * 60; 146 adjust.tv_sec += sys_tz.tz_minuteswest * 60;
139 do_settimeofday(&adjust); 147 do_settimeofday(&adjust);
140} 148}
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 8601f0db1261..24510d84efd7 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -12,6 +12,11 @@ config CLOCKSOURCE_WATCHDOG
12config ARCH_CLOCKSOURCE_DATA 12config ARCH_CLOCKSOURCE_DATA
13 bool 13 bool
14 14
15# Platforms has a persistent clock
16config ALWAYS_USE_PERSISTENT_CLOCK
17 bool
18 default n
19
15# Timekeeping vsyscall support 20# Timekeeping vsyscall support
16config GENERIC_TIME_VSYSCALL 21config GENERIC_TIME_VSYSCALL
17 bool 22 bool
@@ -38,6 +43,10 @@ config GENERIC_CLOCKEVENTS_BUILD
38 default y 43 default y
39 depends on GENERIC_CLOCKEVENTS 44 depends on GENERIC_CLOCKEVENTS
40 45
46# Architecture can handle broadcast in a driver-agnostic way
47config ARCH_HAS_TICK_BROADCAST
48 bool
49
41# Clockevents broadcasting infrastructure 50# Clockevents broadcasting infrastructure
42config GENERIC_CLOCKEVENTS_BROADCAST 51config GENERIC_CLOCKEVENTS_BROADCAST
43 bool 52 bool
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 24174b4d669b..b10a42bb0165 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -15,6 +15,7 @@
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/rtc.h>
18 19
19#include "tick-internal.h" 20#include "tick-internal.h"
20 21
@@ -483,8 +484,7 @@ out:
483 return leap; 484 return leap;
484} 485}
485 486
486#ifdef CONFIG_GENERIC_CMOS_UPDATE 487#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
487
488static void sync_cmos_clock(struct work_struct *work); 488static void sync_cmos_clock(struct work_struct *work);
489 489
490static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); 490static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
@@ -510,14 +510,26 @@ static void sync_cmos_clock(struct work_struct *work)
510 } 510 }
511 511
512 getnstimeofday(&now); 512 getnstimeofday(&now);
513 if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) 513 if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) {
514 fail = update_persistent_clock(now); 514 struct timespec adjust = now;
515
516 fail = -ENODEV;
517 if (persistent_clock_is_local)
518 adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
519#ifdef CONFIG_GENERIC_CMOS_UPDATE
520 fail = update_persistent_clock(adjust);
521#endif
522#ifdef CONFIG_RTC_SYSTOHC
523 if (fail == -ENODEV)
524 fail = rtc_set_ntp_time(adjust);
525#endif
526 }
515 527
516 next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2); 528 next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2);
517 if (next.tv_nsec <= 0) 529 if (next.tv_nsec <= 0)
518 next.tv_nsec += NSEC_PER_SEC; 530 next.tv_nsec += NSEC_PER_SEC;
519 531
520 if (!fail) 532 if (!fail || fail == -ENODEV)
521 next.tv_sec = 659; 533 next.tv_sec = 659;
522 else 534 else
523 next.tv_sec = 0; 535 next.tv_sec = 0;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f113755695e2..2fb8cb88df8d 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -18,6 +18,7 @@
18#include <linux/percpu.h> 18#include <linux/percpu.h>
19#include <linux/profile.h> 19#include <linux/profile.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/smp.h>
21 22
22#include "tick-internal.h" 23#include "tick-internal.h"
23 24
@@ -86,6 +87,22 @@ int tick_is_broadcast_device(struct clock_event_device *dev)
86 return (dev && tick_broadcast_device.evtdev == dev); 87 return (dev && tick_broadcast_device.evtdev == dev);
87} 88}
88 89
90static void err_broadcast(const struct cpumask *mask)
91{
92 pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
93}
94
95static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
96{
97 if (!dev->broadcast)
98 dev->broadcast = tick_broadcast;
99 if (!dev->broadcast) {
100 pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
101 dev->name);
102 dev->broadcast = err_broadcast;
103 }
104}
105
89/* 106/*
90 * Check, if the device is disfunctional and a place holder, which 107 * Check, if the device is disfunctional and a place holder, which
91 * needs to be handled by the broadcast device. 108 * needs to be handled by the broadcast device.
@@ -105,6 +122,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
105 */ 122 */
106 if (!tick_device_is_functional(dev)) { 123 if (!tick_device_is_functional(dev)) {
107 dev->event_handler = tick_handle_periodic; 124 dev->event_handler = tick_handle_periodic;
125 tick_device_setup_broadcast_func(dev);
108 cpumask_set_cpu(cpu, tick_get_broadcast_mask()); 126 cpumask_set_cpu(cpu, tick_get_broadcast_mask());
109 tick_broadcast_start_periodic(tick_broadcast_device.evtdev); 127 tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
110 ret = 1; 128 ret = 1;
@@ -116,15 +134,33 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
116 */ 134 */
117 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { 135 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
118 int cpu = smp_processor_id(); 136 int cpu = smp_processor_id();
119
120 cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); 137 cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
121 tick_broadcast_clear_oneshot(cpu); 138 tick_broadcast_clear_oneshot(cpu);
139 } else {
140 tick_device_setup_broadcast_func(dev);
122 } 141 }
123 } 142 }
124 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 143 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
125 return ret; 144 return ret;
126} 145}
127 146
147#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
148int tick_receive_broadcast(void)
149{
150 struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
151 struct clock_event_device *evt = td->evtdev;
152
153 if (!evt)
154 return -ENODEV;
155
156 if (!evt->event_handler)
157 return -EINVAL;
158
159 evt->event_handler(evt);
160 return 0;
161}
162#endif
163
128/* 164/*
129 * Broadcast the event to the cpus, which are set in the mask (mangled). 165 * Broadcast the event to the cpus, which are set in the mask (mangled).
130 */ 166 */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d58e552d9fd1..314b9ee07edf 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -20,6 +20,7 @@
20#include <linux/profile.h> 20#include <linux/profile.h>
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/irq_work.h>
23 24
24#include <asm/irq_regs.h> 25#include <asm/irq_regs.h>
25 26
@@ -28,7 +29,7 @@
28/* 29/*
29 * Per cpu nohz control structure 30 * Per cpu nohz control structure
30 */ 31 */
31static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); 32DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
32 33
33/* 34/*
34 * The time, when the last jiffy update happened. Protected by jiffies_lock. 35 * The time, when the last jiffy update happened. Protected by jiffies_lock.
@@ -331,8 +332,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
331 time_delta = timekeeping_max_deferment(); 332 time_delta = timekeeping_max_deferment();
332 } while (read_seqretry(&jiffies_lock, seq)); 333 } while (read_seqretry(&jiffies_lock, seq));
333 334
334 if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || 335 if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
335 arch_needs_cpu(cpu)) { 336 arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
336 next_jiffies = last_jiffies + 1; 337 next_jiffies = last_jiffies + 1;
337 delta_jiffies = 1; 338 delta_jiffies = 1;
338 } else { 339 } else {
@@ -631,8 +632,11 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
631 632
632static void tick_nohz_account_idle_ticks(struct tick_sched *ts) 633static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
633{ 634{
634#ifndef CONFIG_VIRT_CPU_ACCOUNTING 635#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
635 unsigned long ticks; 636 unsigned long ticks;
637
638 if (vtime_accounting_enabled())
639 return;
636 /* 640 /*
637 * We stopped the tick in idle. Update process times would miss the 641 * We stopped the tick in idle. Update process times would miss the
638 * time we slept as update_process_times does only a 1 tick 642 * time we slept as update_process_times does only a 1 tick
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index cbc6acb0db3f..1e35515a875e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -29,6 +29,9 @@ static struct timekeeper timekeeper;
29/* flag for if timekeeping is suspended */ 29/* flag for if timekeeping is suspended */
30int __read_mostly timekeeping_suspended; 30int __read_mostly timekeeping_suspended;
31 31
32/* Flag for if there is a persistent clock on this platform */
33bool __read_mostly persistent_clock_exist = false;
34
32static inline void tk_normalize_xtime(struct timekeeper *tk) 35static inline void tk_normalize_xtime(struct timekeeper *tk)
33{ 36{
34 while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) { 37 while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) {
@@ -264,19 +267,18 @@ static void timekeeping_forward_now(struct timekeeper *tk)
264} 267}
265 268
266/** 269/**
267 * getnstimeofday - Returns the time of day in a timespec 270 * __getnstimeofday - Returns the time of day in a timespec.
268 * @ts: pointer to the timespec to be set 271 * @ts: pointer to the timespec to be set
269 * 272 *
270 * Returns the time of day in a timespec. 273 * Updates the time of day in the timespec.
274 * Returns 0 on success, or -ve when suspended (timespec will be undefined).
271 */ 275 */
272void getnstimeofday(struct timespec *ts) 276int __getnstimeofday(struct timespec *ts)
273{ 277{
274 struct timekeeper *tk = &timekeeper; 278 struct timekeeper *tk = &timekeeper;
275 unsigned long seq; 279 unsigned long seq;
276 s64 nsecs = 0; 280 s64 nsecs = 0;
277 281
278 WARN_ON(timekeeping_suspended);
279
280 do { 282 do {
281 seq = read_seqbegin(&tk->lock); 283 seq = read_seqbegin(&tk->lock);
282 284
@@ -287,6 +289,26 @@ void getnstimeofday(struct timespec *ts)
287 289
288 ts->tv_nsec = 0; 290 ts->tv_nsec = 0;
289 timespec_add_ns(ts, nsecs); 291 timespec_add_ns(ts, nsecs);
292
293 /*
294 * Do not bail out early, in case there were callers still using
295 * the value, even in the face of the WARN_ON.
296 */
297 if (unlikely(timekeeping_suspended))
298 return -EAGAIN;
299 return 0;
300}
301EXPORT_SYMBOL(__getnstimeofday);
302
303/**
304 * getnstimeofday - Returns the time of day in a timespec.
305 * @ts: pointer to the timespec to be set
306 *
307 * Returns the time of day in a timespec (WARN if suspended).
308 */
309void getnstimeofday(struct timespec *ts)
310{
311 WARN_ON(__getnstimeofday(ts));
290} 312}
291EXPORT_SYMBOL(getnstimeofday); 313EXPORT_SYMBOL(getnstimeofday);
292 314
@@ -640,12 +662,14 @@ void __init timekeeping_init(void)
640 struct timespec now, boot, tmp; 662 struct timespec now, boot, tmp;
641 663
642 read_persistent_clock(&now); 664 read_persistent_clock(&now);
665
643 if (!timespec_valid_strict(&now)) { 666 if (!timespec_valid_strict(&now)) {
644 pr_warn("WARNING: Persistent clock returned invalid value!\n" 667 pr_warn("WARNING: Persistent clock returned invalid value!\n"
645 " Check your CMOS/BIOS settings.\n"); 668 " Check your CMOS/BIOS settings.\n");
646 now.tv_sec = 0; 669 now.tv_sec = 0;
647 now.tv_nsec = 0; 670 now.tv_nsec = 0;
648 } 671 } else if (now.tv_sec || now.tv_nsec)
672 persistent_clock_exist = true;
649 673
650 read_boot_clock(&boot); 674 read_boot_clock(&boot);
651 if (!timespec_valid_strict(&boot)) { 675 if (!timespec_valid_strict(&boot)) {
@@ -718,11 +742,12 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
718{ 742{
719 struct timekeeper *tk = &timekeeper; 743 struct timekeeper *tk = &timekeeper;
720 unsigned long flags; 744 unsigned long flags;
721 struct timespec ts;
722 745
723 /* Make sure we don't set the clock twice */ 746 /*
724 read_persistent_clock(&ts); 747 * Make sure we don't set the clock twice, as timekeeping_resume()
725 if (!(ts.tv_sec == 0 && ts.tv_nsec == 0)) 748 * already did it
749 */
750 if (has_persistent_clock())
726 return; 751 return;
727 752
728 write_seqlock_irqsave(&tk->lock, flags); 753 write_seqlock_irqsave(&tk->lock, flags);
diff --git a/kernel/timeconst.pl b/kernel/timeconst.pl
index eb51d76e058a..3f42652a6a37 100644
--- a/kernel/timeconst.pl
+++ b/kernel/timeconst.pl
@@ -369,10 +369,8 @@ if ($hz eq '--can') {
369 die "Usage: $0 HZ\n"; 369 die "Usage: $0 HZ\n";
370 } 370 }
371 371
372 @val = @{$canned_values{$hz}}; 372 $cv = $canned_values{$hz};
373 if (!defined(@val)) { 373 @val = defined($cv) ? @$cv : compute_values($hz);
374 @val = compute_values($hz);
375 }
376 output($hz, @val); 374 output($hz, @val);
377} 375}
378exit 0; 376exit 0;
diff --git a/kernel/timer.c b/kernel/timer.c
index 367d00858482..dbf7a78a1ef1 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -39,6 +39,7 @@
39#include <linux/kallsyms.h> 39#include <linux/kallsyms.h>
40#include <linux/irq_work.h> 40#include <linux/irq_work.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/sched/sysctl.h>
42#include <linux/slab.h> 43#include <linux/slab.h>
43 44
44#include <asm/uaccess.h> 45#include <asm/uaccess.h>
@@ -1351,7 +1352,6 @@ void update_process_times(int user_tick)
1351 account_process_tick(p, user_tick); 1352 account_process_tick(p, user_tick);
1352 run_local_timers(); 1353 run_local_timers();
1353 rcu_check_callbacks(cpu, user_tick); 1354 rcu_check_callbacks(cpu, user_tick);
1354 printk_tick();
1355#ifdef CONFIG_IRQ_WORK 1355#ifdef CONFIG_IRQ_WORK
1356 if (in_irq()) 1356 if (in_irq())
1357 irq_work_run(); 1357 irq_work_run();
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5d89335a485f..36567564e221 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -39,6 +39,9 @@ config HAVE_DYNAMIC_FTRACE
39 help 39 help
40 See Documentation/trace/ftrace-design.txt 40 See Documentation/trace/ftrace-design.txt
41 41
42config HAVE_DYNAMIC_FTRACE_WITH_REGS
43 bool
44
42config HAVE_FTRACE_MCOUNT_RECORD 45config HAVE_FTRACE_MCOUNT_RECORD
43 bool 46 bool
44 help 47 help
@@ -250,6 +253,16 @@ config FTRACE_SYSCALLS
250 help 253 help
251 Basic tracer to catch the syscall entry and exit events. 254 Basic tracer to catch the syscall entry and exit events.
252 255
256config TRACER_SNAPSHOT
257 bool "Create a snapshot trace buffer"
258 select TRACER_MAX_TRACE
259 help
260 Allow tracing users to take snapshot of the current buffer using the
261 ftrace interface, e.g.:
262
263 echo 1 > /sys/kernel/debug/tracing/snapshot
264 cat snapshot
265
253config TRACE_BRANCH_PROFILING 266config TRACE_BRANCH_PROFILING
254 bool 267 bool
255 select GENERIC_TRACER 268 select GENERIC_TRACER
@@ -434,6 +447,11 @@ config DYNAMIC_FTRACE
434 were made. If so, it runs stop_machine (stops all CPUS) 447 were made. If so, it runs stop_machine (stops all CPUS)
435 and modifies the code to jump over the call to ftrace. 448 and modifies the code to jump over the call to ftrace.
436 449
450config DYNAMIC_FTRACE_WITH_REGS
451 def_bool y
452 depends on DYNAMIC_FTRACE
453 depends on HAVE_DYNAMIC_FTRACE_WITH_REGS
454
437config FUNCTION_PROFILER 455config FUNCTION_PROFILER
438 bool "Kernel function profiler" 456 bool "Kernel function profiler"
439 depends on FUNCTION_TRACER 457 depends on FUNCTION_TRACER
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index c0bd0308741c..71259e2b6b61 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -147,7 +147,7 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
147 return; 147 return;
148 148
149 local_irq_save(flags); 149 local_irq_save(flags);
150 buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); 150 buf = this_cpu_ptr(bt->msg_data);
151 va_start(args, fmt); 151 va_start(args, fmt);
152 n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); 152 n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
153 va_end(args); 153 va_end(args);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 41473b4ad7a4..ce8c3d68292f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
111#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) 111#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
112#endif 112#endif
113 113
114/*
115 * Traverse the ftrace_global_list, invoking all entries. The reason that we
116 * can use rcu_dereference_raw() is that elements removed from this list
117 * are simply leaked, so there is no need to interact with a grace-period
118 * mechanism. The rcu_dereference_raw() calls are needed to handle
119 * concurrent insertions into the ftrace_global_list.
120 *
121 * Silly Alpha and silly pointer-speculation compiler optimizations!
122 */
123#define do_for_each_ftrace_op(op, list) \
124 op = rcu_dereference_raw(list); \
125 do
126
127/*
128 * Optimized for just a single item in the list (as that is the normal case).
129 */
130#define while_for_each_ftrace_op(op) \
131 while (likely(op = rcu_dereference_raw((op)->next)) && \
132 unlikely((op) != &ftrace_list_end))
133
114/** 134/**
115 * ftrace_nr_registered_ops - return number of ops registered 135 * ftrace_nr_registered_ops - return number of ops registered
116 * 136 *
@@ -132,29 +152,21 @@ int ftrace_nr_registered_ops(void)
132 return cnt; 152 return cnt;
133} 153}
134 154
135/*
136 * Traverse the ftrace_global_list, invoking all entries. The reason that we
137 * can use rcu_dereference_raw() is that elements removed from this list
138 * are simply leaked, so there is no need to interact with a grace-period
139 * mechanism. The rcu_dereference_raw() calls are needed to handle
140 * concurrent insertions into the ftrace_global_list.
141 *
142 * Silly Alpha and silly pointer-speculation compiler optimizations!
143 */
144static void 155static void
145ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, 156ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
146 struct ftrace_ops *op, struct pt_regs *regs) 157 struct ftrace_ops *op, struct pt_regs *regs)
147{ 158{
148 if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) 159 int bit;
160
161 bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
162 if (bit < 0)
149 return; 163 return;
150 164
151 trace_recursion_set(TRACE_GLOBAL_BIT); 165 do_for_each_ftrace_op(op, ftrace_global_list) {
152 op = rcu_dereference_raw(ftrace_global_list); /*see above*/
153 while (op != &ftrace_list_end) {
154 op->func(ip, parent_ip, op, regs); 166 op->func(ip, parent_ip, op, regs);
155 op = rcu_dereference_raw(op->next); /*see above*/ 167 } while_for_each_ftrace_op(op);
156 }; 168
157 trace_recursion_clear(TRACE_GLOBAL_BIT); 169 trace_clear_recursion(bit);
158} 170}
159 171
160static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, 172static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
@@ -221,10 +233,24 @@ static void update_global_ops(void)
221 * registered callers. 233 * registered callers.
222 */ 234 */
223 if (ftrace_global_list == &ftrace_list_end || 235 if (ftrace_global_list == &ftrace_list_end ||
224 ftrace_global_list->next == &ftrace_list_end) 236 ftrace_global_list->next == &ftrace_list_end) {
225 func = ftrace_global_list->func; 237 func = ftrace_global_list->func;
226 else 238 /*
239 * As we are calling the function directly.
240 * If it does not have recursion protection,
241 * the function_trace_op needs to be updated
242 * accordingly.
243 */
244 if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)
245 global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
246 else
247 global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
248 } else {
227 func = ftrace_global_list_func; 249 func = ftrace_global_list_func;
250 /* The list has its own recursion protection. */
251 global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
252 }
253
228 254
229 /* If we filter on pids, update to use the pid function */ 255 /* If we filter on pids, update to use the pid function */
230 if (!list_empty(&ftrace_pids)) { 256 if (!list_empty(&ftrace_pids)) {
@@ -337,7 +363,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
337 if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) 363 if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
338 return -EINVAL; 364 return -EINVAL;
339 365
340#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS 366#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
341 /* 367 /*
342 * If the ftrace_ops specifies SAVE_REGS, then it only can be used 368 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
343 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set. 369 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
@@ -4090,14 +4116,11 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
4090 */ 4116 */
4091 preempt_disable_notrace(); 4117 preempt_disable_notrace();
4092 trace_recursion_set(TRACE_CONTROL_BIT); 4118 trace_recursion_set(TRACE_CONTROL_BIT);
4093 op = rcu_dereference_raw(ftrace_control_list); 4119 do_for_each_ftrace_op(op, ftrace_control_list) {
4094 while (op != &ftrace_list_end) {
4095 if (!ftrace_function_local_disabled(op) && 4120 if (!ftrace_function_local_disabled(op) &&
4096 ftrace_ops_test(op, ip)) 4121 ftrace_ops_test(op, ip))
4097 op->func(ip, parent_ip, op, regs); 4122 op->func(ip, parent_ip, op, regs);
4098 4123 } while_for_each_ftrace_op(op);
4099 op = rcu_dereference_raw(op->next);
4100 };
4101 trace_recursion_clear(TRACE_CONTROL_BIT); 4124 trace_recursion_clear(TRACE_CONTROL_BIT);
4102 preempt_enable_notrace(); 4125 preempt_enable_notrace();
4103} 4126}
@@ -4112,27 +4135,26 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
4112 struct ftrace_ops *ignored, struct pt_regs *regs) 4135 struct ftrace_ops *ignored, struct pt_regs *regs)
4113{ 4136{
4114 struct ftrace_ops *op; 4137 struct ftrace_ops *op;
4138 int bit;
4115 4139
4116 if (function_trace_stop) 4140 if (function_trace_stop)
4117 return; 4141 return;
4118 4142
4119 if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT))) 4143 bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
4144 if (bit < 0)
4120 return; 4145 return;
4121 4146
4122 trace_recursion_set(TRACE_INTERNAL_BIT);
4123 /* 4147 /*
4124 * Some of the ops may be dynamically allocated, 4148 * Some of the ops may be dynamically allocated,
4125 * they must be freed after a synchronize_sched(). 4149 * they must be freed after a synchronize_sched().
4126 */ 4150 */
4127 preempt_disable_notrace(); 4151 preempt_disable_notrace();
4128 op = rcu_dereference_raw(ftrace_ops_list); 4152 do_for_each_ftrace_op(op, ftrace_ops_list) {
4129 while (op != &ftrace_list_end) {
4130 if (ftrace_ops_test(op, ip)) 4153 if (ftrace_ops_test(op, ip))
4131 op->func(ip, parent_ip, op, regs); 4154 op->func(ip, parent_ip, op, regs);
4132 op = rcu_dereference_raw(op->next); 4155 } while_for_each_ftrace_op(op);
4133 };
4134 preempt_enable_notrace(); 4156 preempt_enable_notrace();
4135 trace_recursion_clear(TRACE_INTERNAL_BIT); 4157 trace_clear_recursion(bit);
4136} 4158}
4137 4159
4138/* 4160/*
@@ -4143,8 +4165,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
4143 * Archs are to support both the regs and ftrace_ops at the same time. 4165 * Archs are to support both the regs and ftrace_ops at the same time.
4144 * If they support ftrace_ops, it is assumed they support regs. 4166 * If they support ftrace_ops, it is assumed they support regs.
4145 * If call backs want to use regs, they must either check for regs 4167 * If call backs want to use regs, they must either check for regs
4146 * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS. 4168 * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
4147 * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved. 4169 * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
4148 * An architecture can pass partial regs with ftrace_ops and still 4170 * An architecture can pass partial regs with ftrace_ops and still
4149 * set the ARCH_SUPPORT_FTARCE_OPS. 4171 * set the ARCH_SUPPORT_FTARCE_OPS.
4150 */ 4172 */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index ce8514feedcd..7244acde77b0 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3,8 +3,10 @@
3 * 3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> 4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */ 5 */
6#include <linux/ftrace_event.h>
6#include <linux/ring_buffer.h> 7#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h> 8#include <linux/trace_clock.h>
9#include <linux/trace_seq.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/debugfs.h> 11#include <linux/debugfs.h>
10#include <linux/uaccess.h> 12#include <linux/uaccess.h>
@@ -21,7 +23,6 @@
21#include <linux/fs.h> 23#include <linux/fs.h>
22 24
23#include <asm/local.h> 25#include <asm/local.h>
24#include "trace.h"
25 26
26static void update_pages_handler(struct work_struct *work); 27static void update_pages_handler(struct work_struct *work);
27 28
@@ -2432,41 +2433,76 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2432 2433
2433#ifdef CONFIG_TRACING 2434#ifdef CONFIG_TRACING
2434 2435
2435#define TRACE_RECURSIVE_DEPTH 16 2436/*
2437 * The lock and unlock are done within a preempt disable section.
2438 * The current_context per_cpu variable can only be modified
2439 * by the current task between lock and unlock. But it can
2440 * be modified more than once via an interrupt. To pass this
2441 * information from the lock to the unlock without having to
2442 * access the 'in_interrupt()' functions again (which do show
2443 * a bit of overhead in something as critical as function tracing,
2444 * we use a bitmask trick.
2445 *
2446 * bit 0 = NMI context
2447 * bit 1 = IRQ context
2448 * bit 2 = SoftIRQ context
2449 * bit 3 = normal context.
2450 *
2451 * This works because this is the order of contexts that can
2452 * preempt other contexts. A SoftIRQ never preempts an IRQ
2453 * context.
2454 *
2455 * When the context is determined, the corresponding bit is
2456 * checked and set (if it was set, then a recursion of that context
2457 * happened).
2458 *
2459 * On unlock, we need to clear this bit. To do so, just subtract
2460 * 1 from the current_context and AND it to itself.
2461 *
2462 * (binary)
2463 * 101 - 1 = 100
2464 * 101 & 100 = 100 (clearing bit zero)
2465 *
2466 * 1010 - 1 = 1001
2467 * 1010 & 1001 = 1000 (clearing bit 1)
2468 *
2469 * The least significant bit can be cleared this way, and it
2470 * just so happens that it is the same bit corresponding to
2471 * the current context.
2472 */
2473static DEFINE_PER_CPU(unsigned int, current_context);
2436 2474
2437/* Keep this code out of the fast path cache */ 2475static __always_inline int trace_recursive_lock(void)
2438static noinline void trace_recursive_fail(void)
2439{ 2476{
2440 /* Disable all tracing before we do anything else */ 2477 unsigned int val = this_cpu_read(current_context);
2441 tracing_off_permanent(); 2478 int bit;
2442
2443 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
2444 "HC[%lu]:SC[%lu]:NMI[%lu]\n",
2445 trace_recursion_buffer(),
2446 hardirq_count() >> HARDIRQ_SHIFT,
2447 softirq_count() >> SOFTIRQ_SHIFT,
2448 in_nmi());
2449
2450 WARN_ON_ONCE(1);
2451}
2452 2479
2453static inline int trace_recursive_lock(void) 2480 if (in_interrupt()) {
2454{ 2481 if (in_nmi())
2455 trace_recursion_inc(); 2482 bit = 0;
2483 else if (in_irq())
2484 bit = 1;
2485 else
2486 bit = 2;
2487 } else
2488 bit = 3;
2456 2489
2457 if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) 2490 if (unlikely(val & (1 << bit)))
2458 return 0; 2491 return 1;
2459 2492
2460 trace_recursive_fail(); 2493 val |= (1 << bit);
2494 this_cpu_write(current_context, val);
2461 2495
2462 return -1; 2496 return 0;
2463} 2497}
2464 2498
2465static inline void trace_recursive_unlock(void) 2499static __always_inline void trace_recursive_unlock(void)
2466{ 2500{
2467 WARN_ON_ONCE(!trace_recursion_buffer()); 2501 unsigned int val = this_cpu_read(current_context);
2468 2502
2469 trace_recursion_dec(); 2503 val--;
2504 val &= this_cpu_read(current_context);
2505 this_cpu_write(current_context, val);
2470} 2506}
2471 2507
2472#else 2508#else
@@ -3067,6 +3103,24 @@ ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
3067EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); 3103EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
3068 3104
3069/** 3105/**
3106 * ring_buffer_read_events_cpu - get the number of events successfully read
3107 * @buffer: The ring buffer
3108 * @cpu: The per CPU buffer to get the number of events read
3109 */
3110unsigned long
3111ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
3112{
3113 struct ring_buffer_per_cpu *cpu_buffer;
3114
3115 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3116 return 0;
3117
3118 cpu_buffer = buffer->buffers[cpu];
3119 return cpu_buffer->read;
3120}
3121EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
3122
3123/**
3070 * ring_buffer_entries - get the number of entries in a buffer 3124 * ring_buffer_entries - get the number of entries in a buffer
3071 * @buffer: The ring buffer 3125 * @buffer: The ring buffer
3072 * 3126 *
@@ -3425,7 +3479,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
3425 /* check for end of page padding */ 3479 /* check for end of page padding */
3426 if ((iter->head >= rb_page_size(iter->head_page)) && 3480 if ((iter->head >= rb_page_size(iter->head_page)) &&
3427 (iter->head_page != cpu_buffer->commit_page)) 3481 (iter->head_page != cpu_buffer->commit_page))
3428 rb_advance_iter(iter); 3482 rb_inc_iter(iter);
3429} 3483}
3430 3484
3431static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) 3485static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3c13e46d7d24..c2e2c2310374 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -39,6 +39,7 @@
39#include <linux/poll.h> 39#include <linux/poll.h>
40#include <linux/nmi.h> 40#include <linux/nmi.h>
41#include <linux/fs.h> 41#include <linux/fs.h>
42#include <linux/sched/rt.h>
42 43
43#include "trace.h" 44#include "trace.h"
44#include "trace_output.h" 45#include "trace_output.h"
@@ -249,7 +250,7 @@ static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
249static struct tracer *trace_types __read_mostly; 250static struct tracer *trace_types __read_mostly;
250 251
251/* current_trace points to the tracer that is currently active */ 252/* current_trace points to the tracer that is currently active */
252static struct tracer *current_trace __read_mostly; 253static struct tracer *current_trace __read_mostly = &nop_trace;
253 254
254/* 255/*
255 * trace_types_lock is used to protect the trace_types list. 256 * trace_types_lock is used to protect the trace_types list.
@@ -709,10 +710,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
709 return; 710 return;
710 711
711 WARN_ON_ONCE(!irqs_disabled()); 712 WARN_ON_ONCE(!irqs_disabled());
712 if (!current_trace->use_max_tr) { 713
713 WARN_ON_ONCE(1); 714 if (!current_trace->allocated_snapshot) {
715 /* Only the nop tracer should hit this when disabling */
716 WARN_ON_ONCE(current_trace != &nop_trace);
714 return; 717 return;
715 } 718 }
719
716 arch_spin_lock(&ftrace_max_lock); 720 arch_spin_lock(&ftrace_max_lock);
717 721
718 tr->buffer = max_tr.buffer; 722 tr->buffer = max_tr.buffer;
@@ -739,10 +743,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
739 return; 743 return;
740 744
741 WARN_ON_ONCE(!irqs_disabled()); 745 WARN_ON_ONCE(!irqs_disabled());
742 if (!current_trace->use_max_tr) { 746 if (WARN_ON_ONCE(!current_trace->allocated_snapshot))
743 WARN_ON_ONCE(1);
744 return; 747 return;
745 }
746 748
747 arch_spin_lock(&ftrace_max_lock); 749 arch_spin_lock(&ftrace_max_lock);
748 750
@@ -862,10 +864,13 @@ int register_tracer(struct tracer *type)
862 864
863 current_trace = type; 865 current_trace = type;
864 866
865 /* If we expanded the buffers, make sure the max is expanded too */ 867 if (type->use_max_tr) {
866 if (ring_buffer_expanded && type->use_max_tr) 868 /* If we expanded the buffers, make sure the max is expanded too */
867 ring_buffer_resize(max_tr.buffer, trace_buf_size, 869 if (ring_buffer_expanded)
868 RING_BUFFER_ALL_CPUS); 870 ring_buffer_resize(max_tr.buffer, trace_buf_size,
871 RING_BUFFER_ALL_CPUS);
872 type->allocated_snapshot = true;
873 }
869 874
870 /* the test is responsible for initializing and enabling */ 875 /* the test is responsible for initializing and enabling */
871 pr_info("Testing tracer %s: ", type->name); 876 pr_info("Testing tracer %s: ", type->name);
@@ -881,10 +886,14 @@ int register_tracer(struct tracer *type)
881 /* Only reset on passing, to avoid touching corrupted buffers */ 886 /* Only reset on passing, to avoid touching corrupted buffers */
882 tracing_reset_online_cpus(tr); 887 tracing_reset_online_cpus(tr);
883 888
884 /* Shrink the max buffer again */ 889 if (type->use_max_tr) {
885 if (ring_buffer_expanded && type->use_max_tr) 890 type->allocated_snapshot = false;
886 ring_buffer_resize(max_tr.buffer, 1, 891
887 RING_BUFFER_ALL_CPUS); 892 /* Shrink the max buffer again */
893 if (ring_buffer_expanded)
894 ring_buffer_resize(max_tr.buffer, 1,
895 RING_BUFFER_ALL_CPUS);
896 }
888 897
889 printk(KERN_CONT "PASSED\n"); 898 printk(KERN_CONT "PASSED\n");
890 } 899 }
@@ -922,6 +931,9 @@ void tracing_reset(struct trace_array *tr, int cpu)
922{ 931{
923 struct ring_buffer *buffer = tr->buffer; 932 struct ring_buffer *buffer = tr->buffer;
924 933
934 if (!buffer)
935 return;
936
925 ring_buffer_record_disable(buffer); 937 ring_buffer_record_disable(buffer);
926 938
927 /* Make sure all commits have finished */ 939 /* Make sure all commits have finished */
@@ -936,6 +948,9 @@ void tracing_reset_online_cpus(struct trace_array *tr)
936 struct ring_buffer *buffer = tr->buffer; 948 struct ring_buffer *buffer = tr->buffer;
937 int cpu; 949 int cpu;
938 950
951 if (!buffer)
952 return;
953
939 ring_buffer_record_disable(buffer); 954 ring_buffer_record_disable(buffer);
940 955
941 /* Make sure all commits have finished */ 956 /* Make sure all commits have finished */
@@ -1167,7 +1182,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1167 1182
1168 entry->preempt_count = pc & 0xff; 1183 entry->preempt_count = pc & 0xff;
1169 entry->pid = (tsk) ? tsk->pid : 0; 1184 entry->pid = (tsk) ? tsk->pid : 0;
1170 entry->padding = 0;
1171 entry->flags = 1185 entry->flags =
1172#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 1186#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1173 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 1187 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1335,7 +1349,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
1335 */ 1349 */
1336 preempt_disable_notrace(); 1350 preempt_disable_notrace();
1337 1351
1338 use_stack = ++__get_cpu_var(ftrace_stack_reserve); 1352 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1339 /* 1353 /*
1340 * We don't need any atomic variables, just a barrier. 1354 * We don't need any atomic variables, just a barrier.
1341 * If an interrupt comes in, we don't care, because it would 1355 * If an interrupt comes in, we don't care, because it would
@@ -1389,7 +1403,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
1389 out: 1403 out:
1390 /* Again, don't let gcc optimize things here */ 1404 /* Again, don't let gcc optimize things here */
1391 barrier(); 1405 barrier();
1392 __get_cpu_var(ftrace_stack_reserve)--; 1406 __this_cpu_dec(ftrace_stack_reserve);
1393 preempt_enable_notrace(); 1407 preempt_enable_notrace();
1394 1408
1395} 1409}
@@ -1517,7 +1531,6 @@ static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1517static char *get_trace_buf(void) 1531static char *get_trace_buf(void)
1518{ 1532{
1519 struct trace_buffer_struct *percpu_buffer; 1533 struct trace_buffer_struct *percpu_buffer;
1520 struct trace_buffer_struct *buffer;
1521 1534
1522 /* 1535 /*
1523 * If we have allocated per cpu buffers, then we do not 1536 * If we have allocated per cpu buffers, then we do not
@@ -1535,9 +1548,7 @@ static char *get_trace_buf(void)
1535 if (!percpu_buffer) 1548 if (!percpu_buffer)
1536 return NULL; 1549 return NULL;
1537 1550
1538 buffer = per_cpu_ptr(percpu_buffer, smp_processor_id()); 1551 return this_cpu_ptr(&percpu_buffer->buffer[0]);
1539
1540 return buffer->buffer;
1541} 1552}
1542 1553
1543static int alloc_percpu_trace_buffer(void) 1554static int alloc_percpu_trace_buffer(void)
@@ -1942,21 +1953,27 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1942static void *s_start(struct seq_file *m, loff_t *pos) 1953static void *s_start(struct seq_file *m, loff_t *pos)
1943{ 1954{
1944 struct trace_iterator *iter = m->private; 1955 struct trace_iterator *iter = m->private;
1945 static struct tracer *old_tracer;
1946 int cpu_file = iter->cpu_file; 1956 int cpu_file = iter->cpu_file;
1947 void *p = NULL; 1957 void *p = NULL;
1948 loff_t l = 0; 1958 loff_t l = 0;
1949 int cpu; 1959 int cpu;
1950 1960
1951 /* copy the tracer to avoid using a global lock all around */ 1961 /*
1962 * copy the tracer to avoid using a global lock all around.
1963 * iter->trace is a copy of current_trace, the pointer to the
1964 * name may be used instead of a strcmp(), as iter->trace->name
1965 * will point to the same string as current_trace->name.
1966 */
1952 mutex_lock(&trace_types_lock); 1967 mutex_lock(&trace_types_lock);
1953 if (unlikely(old_tracer != current_trace && current_trace)) { 1968 if (unlikely(current_trace && iter->trace->name != current_trace->name))
1954 old_tracer = current_trace;
1955 *iter->trace = *current_trace; 1969 *iter->trace = *current_trace;
1956 }
1957 mutex_unlock(&trace_types_lock); 1970 mutex_unlock(&trace_types_lock);
1958 1971
1959 atomic_inc(&trace_record_cmdline_disabled); 1972 if (iter->snapshot && iter->trace->use_max_tr)
1973 return ERR_PTR(-EBUSY);
1974
1975 if (!iter->snapshot)
1976 atomic_inc(&trace_record_cmdline_disabled);
1960 1977
1961 if (*pos != iter->pos) { 1978 if (*pos != iter->pos) {
1962 iter->ent = NULL; 1979 iter->ent = NULL;
@@ -1995,7 +2012,11 @@ static void s_stop(struct seq_file *m, void *p)
1995{ 2012{
1996 struct trace_iterator *iter = m->private; 2013 struct trace_iterator *iter = m->private;
1997 2014
1998 atomic_dec(&trace_record_cmdline_disabled); 2015 if (iter->snapshot && iter->trace->use_max_tr)
2016 return;
2017
2018 if (!iter->snapshot)
2019 atomic_dec(&trace_record_cmdline_disabled);
1999 trace_access_unlock(iter->cpu_file); 2020 trace_access_unlock(iter->cpu_file);
2000 trace_event_read_unlock(); 2021 trace_event_read_unlock();
2001} 2022}
@@ -2080,8 +2101,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2080 unsigned long total; 2101 unsigned long total;
2081 const char *name = "preemption"; 2102 const char *name = "preemption";
2082 2103
2083 if (type) 2104 name = type->name;
2084 name = type->name;
2085 2105
2086 get_total_entries(tr, &total, &entries); 2106 get_total_entries(tr, &total, &entries);
2087 2107
@@ -2430,7 +2450,7 @@ static const struct seq_operations tracer_seq_ops = {
2430}; 2450};
2431 2451
2432static struct trace_iterator * 2452static struct trace_iterator *
2433__tracing_open(struct inode *inode, struct file *file) 2453__tracing_open(struct inode *inode, struct file *file, bool snapshot)
2434{ 2454{
2435 long cpu_file = (long) inode->i_private; 2455 long cpu_file = (long) inode->i_private;
2436 struct trace_iterator *iter; 2456 struct trace_iterator *iter;
@@ -2457,16 +2477,16 @@ __tracing_open(struct inode *inode, struct file *file)
2457 if (!iter->trace) 2477 if (!iter->trace)
2458 goto fail; 2478 goto fail;
2459 2479
2460 if (current_trace) 2480 *iter->trace = *current_trace;
2461 *iter->trace = *current_trace;
2462 2481
2463 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) 2482 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2464 goto fail; 2483 goto fail;
2465 2484
2466 if (current_trace && current_trace->print_max) 2485 if (current_trace->print_max || snapshot)
2467 iter->tr = &max_tr; 2486 iter->tr = &max_tr;
2468 else 2487 else
2469 iter->tr = &global_trace; 2488 iter->tr = &global_trace;
2489 iter->snapshot = snapshot;
2470 iter->pos = -1; 2490 iter->pos = -1;
2471 mutex_init(&iter->mutex); 2491 mutex_init(&iter->mutex);
2472 iter->cpu_file = cpu_file; 2492 iter->cpu_file = cpu_file;
@@ -2483,8 +2503,9 @@ __tracing_open(struct inode *inode, struct file *file)
2483 if (trace_clocks[trace_clock_id].in_ns) 2503 if (trace_clocks[trace_clock_id].in_ns)
2484 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 2504 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2485 2505
2486 /* stop the trace while dumping */ 2506 /* stop the trace while dumping if we are not opening "snapshot" */
2487 tracing_stop(); 2507 if (!iter->snapshot)
2508 tracing_stop();
2488 2509
2489 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { 2510 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
2490 for_each_tracing_cpu(cpu) { 2511 for_each_tracing_cpu(cpu) {
@@ -2547,8 +2568,9 @@ static int tracing_release(struct inode *inode, struct file *file)
2547 if (iter->trace && iter->trace->close) 2568 if (iter->trace && iter->trace->close)
2548 iter->trace->close(iter); 2569 iter->trace->close(iter);
2549 2570
2550 /* reenable tracing if it was previously enabled */ 2571 if (!iter->snapshot)
2551 tracing_start(); 2572 /* reenable tracing if it was previously enabled */
2573 tracing_start();
2552 mutex_unlock(&trace_types_lock); 2574 mutex_unlock(&trace_types_lock);
2553 2575
2554 mutex_destroy(&iter->mutex); 2576 mutex_destroy(&iter->mutex);
@@ -2576,7 +2598,7 @@ static int tracing_open(struct inode *inode, struct file *file)
2576 } 2598 }
2577 2599
2578 if (file->f_mode & FMODE_READ) { 2600 if (file->f_mode & FMODE_READ) {
2579 iter = __tracing_open(inode, file); 2601 iter = __tracing_open(inode, file, false);
2580 if (IS_ERR(iter)) 2602 if (IS_ERR(iter))
2581 ret = PTR_ERR(iter); 2603 ret = PTR_ERR(iter);
2582 else if (trace_flags & TRACE_ITER_LATENCY_FMT) 2604 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
@@ -3014,10 +3036,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
3014 int r; 3036 int r;
3015 3037
3016 mutex_lock(&trace_types_lock); 3038 mutex_lock(&trace_types_lock);
3017 if (current_trace) 3039 r = sprintf(buf, "%s\n", current_trace->name);
3018 r = sprintf(buf, "%s\n", current_trace->name);
3019 else
3020 r = sprintf(buf, "\n");
3021 mutex_unlock(&trace_types_lock); 3040 mutex_unlock(&trace_types_lock);
3022 3041
3023 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3042 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
@@ -3183,6 +3202,7 @@ static int tracing_set_tracer(const char *buf)
3183 static struct trace_option_dentry *topts; 3202 static struct trace_option_dentry *topts;
3184 struct trace_array *tr = &global_trace; 3203 struct trace_array *tr = &global_trace;
3185 struct tracer *t; 3204 struct tracer *t;
3205 bool had_max_tr;
3186 int ret = 0; 3206 int ret = 0;
3187 3207
3188 mutex_lock(&trace_types_lock); 3208 mutex_lock(&trace_types_lock);
@@ -3207,9 +3227,21 @@ static int tracing_set_tracer(const char *buf)
3207 goto out; 3227 goto out;
3208 3228
3209 trace_branch_disable(); 3229 trace_branch_disable();
3210 if (current_trace && current_trace->reset) 3230 if (current_trace->reset)
3211 current_trace->reset(tr); 3231 current_trace->reset(tr);
3212 if (current_trace && current_trace->use_max_tr) { 3232
3233 had_max_tr = current_trace->allocated_snapshot;
3234 current_trace = &nop_trace;
3235
3236 if (had_max_tr && !t->use_max_tr) {
3237 /*
3238 * We need to make sure that the update_max_tr sees that
3239 * current_trace changed to nop_trace to keep it from
3240 * swapping the buffers after we resize it.
3241 * The update_max_tr is called from interrupts disabled
3242 * so a synchronized_sched() is sufficient.
3243 */
3244 synchronize_sched();
3213 /* 3245 /*
3214 * We don't free the ring buffer. instead, resize it because 3246 * We don't free the ring buffer. instead, resize it because
3215 * The max_tr ring buffer has some state (e.g. ring->clock) and 3247 * The max_tr ring buffer has some state (e.g. ring->clock) and
@@ -3217,18 +3249,19 @@ static int tracing_set_tracer(const char *buf)
3217 */ 3249 */
3218 ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); 3250 ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
3219 set_buffer_entries(&max_tr, 1); 3251 set_buffer_entries(&max_tr, 1);
3252 tracing_reset_online_cpus(&max_tr);
3253 current_trace->allocated_snapshot = false;
3220 } 3254 }
3221 destroy_trace_option_files(topts); 3255 destroy_trace_option_files(topts);
3222 3256
3223 current_trace = &nop_trace;
3224
3225 topts = create_trace_option_files(t); 3257 topts = create_trace_option_files(t);
3226 if (t->use_max_tr) { 3258 if (t->use_max_tr && !had_max_tr) {
3227 /* we need to make per cpu buffer sizes equivalent */ 3259 /* we need to make per cpu buffer sizes equivalent */
3228 ret = resize_buffer_duplicate_size(&max_tr, &global_trace, 3260 ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
3229 RING_BUFFER_ALL_CPUS); 3261 RING_BUFFER_ALL_CPUS);
3230 if (ret < 0) 3262 if (ret < 0)
3231 goto out; 3263 goto out;
3264 t->allocated_snapshot = true;
3232 } 3265 }
3233 3266
3234 if (t->init) { 3267 if (t->init) {
@@ -3336,8 +3369,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
3336 ret = -ENOMEM; 3369 ret = -ENOMEM;
3337 goto fail; 3370 goto fail;
3338 } 3371 }
3339 if (current_trace) 3372 *iter->trace = *current_trace;
3340 *iter->trace = *current_trace;
3341 3373
3342 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 3374 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
3343 ret = -ENOMEM; 3375 ret = -ENOMEM;
@@ -3477,7 +3509,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
3477 size_t cnt, loff_t *ppos) 3509 size_t cnt, loff_t *ppos)
3478{ 3510{
3479 struct trace_iterator *iter = filp->private_data; 3511 struct trace_iterator *iter = filp->private_data;
3480 static struct tracer *old_tracer;
3481 ssize_t sret; 3512 ssize_t sret;
3482 3513
3483 /* return any leftover data */ 3514 /* return any leftover data */
@@ -3489,10 +3520,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
3489 3520
3490 /* copy the tracer to avoid using a global lock all around */ 3521 /* copy the tracer to avoid using a global lock all around */
3491 mutex_lock(&trace_types_lock); 3522 mutex_lock(&trace_types_lock);
3492 if (unlikely(old_tracer != current_trace && current_trace)) { 3523 if (unlikely(iter->trace->name != current_trace->name))
3493 old_tracer = current_trace;
3494 *iter->trace = *current_trace; 3524 *iter->trace = *current_trace;
3495 }
3496 mutex_unlock(&trace_types_lock); 3525 mutex_unlock(&trace_types_lock);
3497 3526
3498 /* 3527 /*
@@ -3648,7 +3677,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3648 .ops = &tracing_pipe_buf_ops, 3677 .ops = &tracing_pipe_buf_ops,
3649 .spd_release = tracing_spd_release_pipe, 3678 .spd_release = tracing_spd_release_pipe,
3650 }; 3679 };
3651 static struct tracer *old_tracer;
3652 ssize_t ret; 3680 ssize_t ret;
3653 size_t rem; 3681 size_t rem;
3654 unsigned int i; 3682 unsigned int i;
@@ -3658,10 +3686,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3658 3686
3659 /* copy the tracer to avoid using a global lock all around */ 3687 /* copy the tracer to avoid using a global lock all around */
3660 mutex_lock(&trace_types_lock); 3688 mutex_lock(&trace_types_lock);
3661 if (unlikely(old_tracer != current_trace && current_trace)) { 3689 if (unlikely(iter->trace->name != current_trace->name))
3662 old_tracer = current_trace;
3663 *iter->trace = *current_trace; 3690 *iter->trace = *current_trace;
3664 }
3665 mutex_unlock(&trace_types_lock); 3691 mutex_unlock(&trace_types_lock);
3666 3692
3667 mutex_lock(&iter->mutex); 3693 mutex_lock(&iter->mutex);
@@ -4037,8 +4063,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4037 * Reset the buffer so that it doesn't have incomparable timestamps. 4063 * Reset the buffer so that it doesn't have incomparable timestamps.
4038 */ 4064 */
4039 tracing_reset_online_cpus(&global_trace); 4065 tracing_reset_online_cpus(&global_trace);
4040 if (max_tr.buffer) 4066 tracing_reset_online_cpus(&max_tr);
4041 tracing_reset_online_cpus(&max_tr);
4042 4067
4043 mutex_unlock(&trace_types_lock); 4068 mutex_unlock(&trace_types_lock);
4044 4069
@@ -4054,6 +4079,87 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
4054 return single_open(file, tracing_clock_show, NULL); 4079 return single_open(file, tracing_clock_show, NULL);
4055} 4080}
4056 4081
4082#ifdef CONFIG_TRACER_SNAPSHOT
4083static int tracing_snapshot_open(struct inode *inode, struct file *file)
4084{
4085 struct trace_iterator *iter;
4086 int ret = 0;
4087
4088 if (file->f_mode & FMODE_READ) {
4089 iter = __tracing_open(inode, file, true);
4090 if (IS_ERR(iter))
4091 ret = PTR_ERR(iter);
4092 }
4093 return ret;
4094}
4095
4096static ssize_t
4097tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4098 loff_t *ppos)
4099{
4100 unsigned long val;
4101 int ret;
4102
4103 ret = tracing_update_buffers();
4104 if (ret < 0)
4105 return ret;
4106
4107 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4108 if (ret)
4109 return ret;
4110
4111 mutex_lock(&trace_types_lock);
4112
4113 if (current_trace->use_max_tr) {
4114 ret = -EBUSY;
4115 goto out;
4116 }
4117
4118 switch (val) {
4119 case 0:
4120 if (current_trace->allocated_snapshot) {
4121 /* free spare buffer */
4122 ring_buffer_resize(max_tr.buffer, 1,
4123 RING_BUFFER_ALL_CPUS);
4124 set_buffer_entries(&max_tr, 1);
4125 tracing_reset_online_cpus(&max_tr);
4126 current_trace->allocated_snapshot = false;
4127 }
4128 break;
4129 case 1:
4130 if (!current_trace->allocated_snapshot) {
4131 /* allocate spare buffer */
4132 ret = resize_buffer_duplicate_size(&max_tr,
4133 &global_trace, RING_BUFFER_ALL_CPUS);
4134 if (ret < 0)
4135 break;
4136 current_trace->allocated_snapshot = true;
4137 }
4138
4139 local_irq_disable();
4140 /* Now, we're going to swap */
4141 update_max_tr(&global_trace, current, smp_processor_id());
4142 local_irq_enable();
4143 break;
4144 default:
4145 if (current_trace->allocated_snapshot)
4146 tracing_reset_online_cpus(&max_tr);
4147 else
4148 ret = -EINVAL;
4149 break;
4150 }
4151
4152 if (ret >= 0) {
4153 *ppos += cnt;
4154 ret = cnt;
4155 }
4156out:
4157 mutex_unlock(&trace_types_lock);
4158 return ret;
4159}
4160#endif /* CONFIG_TRACER_SNAPSHOT */
4161
4162
4057static const struct file_operations tracing_max_lat_fops = { 4163static const struct file_operations tracing_max_lat_fops = {
4058 .open = tracing_open_generic, 4164 .open = tracing_open_generic,
4059 .read = tracing_max_lat_read, 4165 .read = tracing_max_lat_read,
@@ -4110,6 +4216,16 @@ static const struct file_operations trace_clock_fops = {
4110 .write = tracing_clock_write, 4216 .write = tracing_clock_write,
4111}; 4217};
4112 4218
4219#ifdef CONFIG_TRACER_SNAPSHOT
4220static const struct file_operations snapshot_fops = {
4221 .open = tracing_snapshot_open,
4222 .read = seq_read,
4223 .write = tracing_snapshot_write,
4224 .llseek = tracing_seek,
4225 .release = tracing_release,
4226};
4227#endif /* CONFIG_TRACER_SNAPSHOT */
4228
4113struct ftrace_buffer_info { 4229struct ftrace_buffer_info {
4114 struct trace_array *tr; 4230 struct trace_array *tr;
4115 void *spare; 4231 void *spare;
@@ -4414,6 +4530,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
4414 cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu); 4530 cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
4415 trace_seq_printf(s, "dropped events: %ld\n", cnt); 4531 trace_seq_printf(s, "dropped events: %ld\n", cnt);
4416 4532
4533 cnt = ring_buffer_read_events_cpu(tr->buffer, cpu);
4534 trace_seq_printf(s, "read events: %ld\n", cnt);
4535
4417 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); 4536 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
4418 4537
4419 kfree(s); 4538 kfree(s);
@@ -4490,7 +4609,7 @@ struct dentry *tracing_init_dentry(void)
4490 4609
4491static struct dentry *d_percpu; 4610static struct dentry *d_percpu;
4492 4611
4493struct dentry *tracing_dentry_percpu(void) 4612static struct dentry *tracing_dentry_percpu(void)
4494{ 4613{
4495 static int once; 4614 static int once;
4496 struct dentry *d_tracer; 4615 struct dentry *d_tracer;
@@ -4906,6 +5025,11 @@ static __init int tracer_init_debugfs(void)
4906 &ftrace_update_tot_cnt, &tracing_dyn_info_fops); 5025 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
4907#endif 5026#endif
4908 5027
5028#ifdef CONFIG_TRACER_SNAPSHOT
5029 trace_create_file("snapshot", 0644, d_tracer,
5030 (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops);
5031#endif
5032
4909 create_trace_options_dir(); 5033 create_trace_options_dir();
4910 5034
4911 for_each_tracing_cpu(cpu) 5035 for_each_tracing_cpu(cpu)
@@ -5014,6 +5138,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
5014 if (disable_tracing) 5138 if (disable_tracing)
5015 ftrace_kill(); 5139 ftrace_kill();
5016 5140
5141 /* Simulate the iterator */
5017 trace_init_global_iter(&iter); 5142 trace_init_global_iter(&iter);
5018 5143
5019 for_each_tracing_cpu(cpu) { 5144 for_each_tracing_cpu(cpu) {
@@ -5025,10 +5150,6 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
5025 /* don't look at user memory in panic mode */ 5150 /* don't look at user memory in panic mode */
5026 trace_flags &= ~TRACE_ITER_SYM_USEROBJ; 5151 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
5027 5152
5028 /* Simulate the iterator */
5029 iter.tr = &global_trace;
5030 iter.trace = current_trace;
5031
5032 switch (oops_dump_mode) { 5153 switch (oops_dump_mode) {
5033 case DUMP_ALL: 5154 case DUMP_ALL:
5034 iter.cpu_file = TRACE_PIPE_ALL_CPU; 5155 iter.cpu_file = TRACE_PIPE_ALL_CPU;
@@ -5173,7 +5294,7 @@ __init static int tracer_alloc_buffers(void)
5173 init_irq_work(&trace_work_wakeup, trace_wake_up); 5294 init_irq_work(&trace_work_wakeup, trace_wake_up);
5174 5295
5175 register_tracer(&nop_trace); 5296 register_tracer(&nop_trace);
5176 current_trace = &nop_trace; 5297
5177 /* All seems OK, enable tracing */ 5298 /* All seems OK, enable tracing */
5178 tracing_disabled = 0; 5299 tracing_disabled = 0;
5179 5300
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c75d7988902c..57d7e5397d56 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -287,20 +287,62 @@ struct tracer {
287 struct tracer_flags *flags; 287 struct tracer_flags *flags;
288 bool print_max; 288 bool print_max;
289 bool use_max_tr; 289 bool use_max_tr;
290 bool allocated_snapshot;
290}; 291};
291 292
292 293
293/* Only current can touch trace_recursion */ 294/* Only current can touch trace_recursion */
294#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
295#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
296 295
297/* Ring buffer has the 10 LSB bits to count */ 296/*
298#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) 297 * For function tracing recursion:
299 298 * The order of these bits are important.
300/* for function tracing recursion */ 299 *
301#define TRACE_INTERNAL_BIT (1<<11) 300 * When function tracing occurs, the following steps are made:
302#define TRACE_GLOBAL_BIT (1<<12) 301 * If arch does not support a ftrace feature:
303#define TRACE_CONTROL_BIT (1<<13) 302 * call internal function (uses INTERNAL bits) which calls...
303 * If callback is registered to the "global" list, the list
304 * function is called and recursion checks the GLOBAL bits.
305 * then this function calls...
306 * The function callback, which can use the FTRACE bits to
307 * check for recursion.
308 *
309 * Now if the arch does not suppport a feature, and it calls
310 * the global list function which calls the ftrace callback
311 * all three of these steps will do a recursion protection.
312 * There's no reason to do one if the previous caller already
313 * did. The recursion that we are protecting against will
314 * go through the same steps again.
315 *
316 * To prevent the multiple recursion checks, if a recursion
317 * bit is set that is higher than the MAX bit of the current
318 * check, then we know that the check was made by the previous
319 * caller, and we can skip the current check.
320 */
321enum {
322 TRACE_BUFFER_BIT,
323 TRACE_BUFFER_NMI_BIT,
324 TRACE_BUFFER_IRQ_BIT,
325 TRACE_BUFFER_SIRQ_BIT,
326
327 /* Start of function recursion bits */
328 TRACE_FTRACE_BIT,
329 TRACE_FTRACE_NMI_BIT,
330 TRACE_FTRACE_IRQ_BIT,
331 TRACE_FTRACE_SIRQ_BIT,
332
333 /* GLOBAL_BITs must be greater than FTRACE_BITs */
334 TRACE_GLOBAL_BIT,
335 TRACE_GLOBAL_NMI_BIT,
336 TRACE_GLOBAL_IRQ_BIT,
337 TRACE_GLOBAL_SIRQ_BIT,
338
339 /* INTERNAL_BITs must be greater than GLOBAL_BITs */
340 TRACE_INTERNAL_BIT,
341 TRACE_INTERNAL_NMI_BIT,
342 TRACE_INTERNAL_IRQ_BIT,
343 TRACE_INTERNAL_SIRQ_BIT,
344
345 TRACE_CONTROL_BIT,
304 346
305/* 347/*
306 * Abuse of the trace_recursion. 348 * Abuse of the trace_recursion.
@@ -309,11 +351,77 @@ struct tracer {
309 * was called in irq context but we have irq tracing off. Since this 351 * was called in irq context but we have irq tracing off. Since this
310 * can only be modified by current, we can reuse trace_recursion. 352 * can only be modified by current, we can reuse trace_recursion.
311 */ 353 */
312#define TRACE_IRQ_BIT (1<<13) 354 TRACE_IRQ_BIT,
355};
356
357#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
358#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0)
359#define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit)))
360
361#define TRACE_CONTEXT_BITS 4
362
363#define TRACE_FTRACE_START TRACE_FTRACE_BIT
364#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
365
366#define TRACE_GLOBAL_START TRACE_GLOBAL_BIT
367#define TRACE_GLOBAL_MAX ((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1)
368
369#define TRACE_LIST_START TRACE_INTERNAL_BIT
370#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
371
372#define TRACE_CONTEXT_MASK TRACE_LIST_MAX
373
374static __always_inline int trace_get_context_bit(void)
375{
376 int bit;
313 377
314#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) 378 if (in_interrupt()) {
315#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) 379 if (in_nmi())
316#define trace_recursion_test(bit) ((current)->trace_recursion & (bit)) 380 bit = 0;
381
382 else if (in_irq())
383 bit = 1;
384 else
385 bit = 2;
386 } else
387 bit = 3;
388
389 return bit;
390}
391
392static __always_inline int trace_test_and_set_recursion(int start, int max)
393{
394 unsigned int val = current->trace_recursion;
395 int bit;
396
397 /* A previous recursion check was made */
398 if ((val & TRACE_CONTEXT_MASK) > max)
399 return 0;
400
401 bit = trace_get_context_bit() + start;
402 if (unlikely(val & (1 << bit)))
403 return -1;
404
405 val |= 1 << bit;
406 current->trace_recursion = val;
407 barrier();
408
409 return bit;
410}
411
412static __always_inline void trace_clear_recursion(int bit)
413{
414 unsigned int val = current->trace_recursion;
415
416 if (!bit)
417 return;
418
419 bit = 1 << bit;
420 val &= ~bit;
421
422 barrier();
423 current->trace_recursion = val;
424}
317 425
318#define TRACE_PIPE_ALL_CPU -1 426#define TRACE_PIPE_ALL_CPU -1
319 427
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 394783531cbb..aa8f5f48dae6 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -21,8 +21,6 @@
21#include <linux/ktime.h> 21#include <linux/ktime.h>
22#include <linux/trace_clock.h> 22#include <linux/trace_clock.h>
23 23
24#include "trace.h"
25
26/* 24/*
27 * trace_clock_local(): the simplest and least coherent tracing clock. 25 * trace_clock_local(): the simplest and least coherent tracing clock.
28 * 26 *
@@ -44,6 +42,7 @@ u64 notrace trace_clock_local(void)
44 42
45 return clock; 43 return clock;
46} 44}
45EXPORT_SYMBOL_GPL(trace_clock_local);
47 46
48/* 47/*
49 * trace_clock(): 'between' trace clock. Not completely serialized, 48 * trace_clock(): 'between' trace clock. Not completely serialized,
@@ -86,7 +85,7 @@ u64 notrace trace_clock_global(void)
86 local_irq_save(flags); 85 local_irq_save(flags);
87 86
88 this_cpu = raw_smp_processor_id(); 87 this_cpu = raw_smp_processor_id();
89 now = cpu_clock(this_cpu); 88 now = sched_clock_cpu(this_cpu);
90 /* 89 /*
91 * If in an NMI context then dont risk lockups and return the 90 * If in an NMI context then dont risk lockups and return the
92 * cpu_clock() time: 91 * cpu_clock() time:
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 880073d0b946..57e9b284250c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,7 +116,6 @@ static int trace_define_common_fields(void)
116 __common_field(unsigned char, flags); 116 __common_field(unsigned char, flags);
117 __common_field(unsigned char, preempt_count); 117 __common_field(unsigned char, preempt_count);
118 __common_field(int, pid); 118 __common_field(int, pid);
119 __common_field(int, padding);
120 119
121 return ret; 120 return ret;
122} 121}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8e3ad8082ab7..601152523326 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -47,34 +47,6 @@ static void function_trace_start(struct trace_array *tr)
47 tracing_reset_online_cpus(tr); 47 tracing_reset_online_cpus(tr);
48} 48}
49 49
50static void
51function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip,
52 struct ftrace_ops *op, struct pt_regs *pt_regs)
53{
54 struct trace_array *tr = func_trace;
55 struct trace_array_cpu *data;
56 unsigned long flags;
57 long disabled;
58 int cpu;
59 int pc;
60
61 if (unlikely(!ftrace_function_enabled))
62 return;
63
64 pc = preempt_count();
65 preempt_disable_notrace();
66 local_save_flags(flags);
67 cpu = raw_smp_processor_id();
68 data = tr->data[cpu];
69 disabled = atomic_inc_return(&data->disabled);
70
71 if (likely(disabled == 1))
72 trace_function(tr, ip, parent_ip, flags, pc);
73
74 atomic_dec(&data->disabled);
75 preempt_enable_notrace();
76}
77
78/* Our option */ 50/* Our option */
79enum { 51enum {
80 TRACE_FUNC_OPT_STACK = 0x1, 52 TRACE_FUNC_OPT_STACK = 0x1,
@@ -85,34 +57,34 @@ static struct tracer_flags func_flags;
85static void 57static void
86function_trace_call(unsigned long ip, unsigned long parent_ip, 58function_trace_call(unsigned long ip, unsigned long parent_ip,
87 struct ftrace_ops *op, struct pt_regs *pt_regs) 59 struct ftrace_ops *op, struct pt_regs *pt_regs)
88
89{ 60{
90 struct trace_array *tr = func_trace; 61 struct trace_array *tr = func_trace;
91 struct trace_array_cpu *data; 62 struct trace_array_cpu *data;
92 unsigned long flags; 63 unsigned long flags;
93 long disabled; 64 int bit;
94 int cpu; 65 int cpu;
95 int pc; 66 int pc;
96 67
97 if (unlikely(!ftrace_function_enabled)) 68 if (unlikely(!ftrace_function_enabled))
98 return; 69 return;
99 70
100 /* 71 pc = preempt_count();
101 * Need to use raw, since this must be called before the 72 preempt_disable_notrace();
102 * recursive protection is performed.
103 */
104 local_irq_save(flags);
105 cpu = raw_smp_processor_id();
106 data = tr->data[cpu];
107 disabled = atomic_inc_return(&data->disabled);
108 73
109 if (likely(disabled == 1)) { 74 bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
110 pc = preempt_count(); 75 if (bit < 0)
76 goto out;
77
78 cpu = smp_processor_id();
79 data = tr->data[cpu];
80 if (!atomic_read(&data->disabled)) {
81 local_save_flags(flags);
111 trace_function(tr, ip, parent_ip, flags, pc); 82 trace_function(tr, ip, parent_ip, flags, pc);
112 } 83 }
84 trace_clear_recursion(bit);
113 85
114 atomic_dec(&data->disabled); 86 out:
115 local_irq_restore(flags); 87 preempt_enable_notrace();
116} 88}
117 89
118static void 90static void
@@ -185,11 +157,6 @@ static void tracing_start_function_trace(void)
185{ 157{
186 ftrace_function_enabled = 0; 158 ftrace_function_enabled = 0;
187 159
188 if (trace_flags & TRACE_ITER_PREEMPTONLY)
189 trace_ops.func = function_trace_call_preempt_only;
190 else
191 trace_ops.func = function_trace_call;
192
193 if (func_flags.val & TRACE_FUNC_OPT_STACK) 160 if (func_flags.val & TRACE_FUNC_OPT_STACK)
194 register_ftrace_function(&trace_stack_ops); 161 register_ftrace_function(&trace_stack_ops);
195 else 162 else
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4edb4b74eb7e..39ada66389cc 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -47,6 +47,8 @@ struct fgraph_data {
47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20 47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
48#define TRACE_GRAPH_PRINT_IRQS 0x40 48#define TRACE_GRAPH_PRINT_IRQS 0x40
49 49
50static unsigned int max_depth;
51
50static struct tracer_opt trace_opts[] = { 52static struct tracer_opt trace_opts[] = {
51 /* Display overruns? (for self-debug purpose) */ 53 /* Display overruns? (for self-debug purpose) */
52 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, 54 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
@@ -189,10 +191,16 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
189 191
190 ftrace_pop_return_trace(&trace, &ret, frame_pointer); 192 ftrace_pop_return_trace(&trace, &ret, frame_pointer);
191 trace.rettime = trace_clock_local(); 193 trace.rettime = trace_clock_local();
192 ftrace_graph_return(&trace);
193 barrier(); 194 barrier();
194 current->curr_ret_stack--; 195 current->curr_ret_stack--;
195 196
197 /*
198 * The trace should run after decrementing the ret counter
199 * in case an interrupt were to come in. We don't want to
200 * lose the interrupt if max_depth is set.
201 */
202 ftrace_graph_return(&trace);
203
196 if (unlikely(!ret)) { 204 if (unlikely(!ret)) {
197 ftrace_graph_stop(); 205 ftrace_graph_stop();
198 WARN_ON(1); 206 WARN_ON(1);
@@ -250,8 +258,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
250 return 0; 258 return 0;
251 259
252 /* trace it when it is-nested-in or is a function enabled. */ 260 /* trace it when it is-nested-in or is a function enabled. */
253 if (!(trace->depth || ftrace_graph_addr(trace->func)) || 261 if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
254 ftrace_graph_ignore_irqs()) 262 ftrace_graph_ignore_irqs()) ||
263 (max_depth && trace->depth >= max_depth))
255 return 0; 264 return 0;
256 265
257 local_irq_save(flags); 266 local_irq_save(flags);
@@ -1457,6 +1466,59 @@ static struct tracer graph_trace __read_mostly = {
1457#endif 1466#endif
1458}; 1467};
1459 1468
1469
1470static ssize_t
1471graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt,
1472 loff_t *ppos)
1473{
1474 unsigned long val;
1475 int ret;
1476
1477 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1478 if (ret)
1479 return ret;
1480
1481 max_depth = val;
1482
1483 *ppos += cnt;
1484
1485 return cnt;
1486}
1487
1488static ssize_t
1489graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt,
1490 loff_t *ppos)
1491{
1492 char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/
1493 int n;
1494
1495 n = sprintf(buf, "%d\n", max_depth);
1496
1497 return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
1498}
1499
1500static const struct file_operations graph_depth_fops = {
1501 .open = tracing_open_generic,
1502 .write = graph_depth_write,
1503 .read = graph_depth_read,
1504 .llseek = generic_file_llseek,
1505};
1506
1507static __init int init_graph_debugfs(void)
1508{
1509 struct dentry *d_tracer;
1510
1511 d_tracer = tracing_init_dentry();
1512 if (!d_tracer)
1513 return 0;
1514
1515 trace_create_file("max_graph_depth", 0644, d_tracer,
1516 NULL, &graph_depth_fops);
1517
1518 return 0;
1519}
1520fs_initcall(init_graph_debugfs);
1521
1460static __init int init_graph_trace(void) 1522static __init int init_graph_trace(void)
1461{ 1523{
1462 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); 1524 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 933708677814..5c7e09d10d74 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -66,7 +66,6 @@
66#define TP_FLAG_TRACE 1 66#define TP_FLAG_TRACE 1
67#define TP_FLAG_PROFILE 2 67#define TP_FLAG_PROFILE 2
68#define TP_FLAG_REGISTERED 4 68#define TP_FLAG_REGISTERED 4
69#define TP_FLAG_UPROBE 8
70 69
71 70
72/* data_rloc: data relative location, compatible with u32 */ 71/* data_rloc: data relative location, compatible with u32 */
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 9fe45fcefca0..75aa97fbe1a1 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,8 +15,8 @@
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/ftrace.h> 17#include <linux/ftrace.h>
18#include <linux/sched/rt.h>
18#include <trace/events/sched.h> 19#include <trace/events/sched.h>
19
20#include "trace.h" 20#include "trace.h"
21 21
22static struct trace_array *wakeup_trace; 22static struct trace_array *wakeup_trace;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 47623169a815..51c819c12c29 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -415,7 +415,8 @@ static void trace_selftest_test_recursion_func(unsigned long ip,
415 * The ftrace infrastructure should provide the recursion 415 * The ftrace infrastructure should provide the recursion
416 * protection. If not, this will crash the kernel! 416 * protection. If not, this will crash the kernel!
417 */ 417 */
418 trace_selftest_recursion_cnt++; 418 if (trace_selftest_recursion_cnt++ > 10)
419 return;
419 DYN_FTRACE_TEST_NAME(); 420 DYN_FTRACE_TEST_NAME();
420} 421}
421 422
@@ -452,7 +453,6 @@ trace_selftest_function_recursion(void)
452 char *func_name; 453 char *func_name;
453 int len; 454 int len;
454 int ret; 455 int ret;
455 int cnt;
456 456
457 /* The previous test PASSED */ 457 /* The previous test PASSED */
458 pr_cont("PASSED\n"); 458 pr_cont("PASSED\n");
@@ -510,19 +510,10 @@ trace_selftest_function_recursion(void)
510 510
511 unregister_ftrace_function(&test_recsafe_probe); 511 unregister_ftrace_function(&test_recsafe_probe);
512 512
513 /*
514 * If arch supports all ftrace features, and no other task
515 * was on the list, we should be fine.
516 */
517 if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC)
518 cnt = 2; /* Should have recursed */
519 else
520 cnt = 1;
521
522 ret = -1; 513 ret = -1;
523 if (trace_selftest_recursion_cnt != cnt) { 514 if (trace_selftest_recursion_cnt != 2) {
524 pr_cont("*callback not called expected %d times (%d)* ", 515 pr_cont("*callback not called expected 2 times (%d)* ",
525 cnt, trace_selftest_recursion_cnt); 516 trace_selftest_recursion_cnt);
526 goto out; 517 goto out;
527 } 518 }
528 519
@@ -568,7 +559,7 @@ trace_selftest_function_regs(void)
568 int ret; 559 int ret;
569 int supported = 0; 560 int supported = 0;
570 561
571#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS 562#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
572 supported = 1; 563 supported = 1;
573#endif 564#endif
574 565
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 7609dd6714c2..5329e13e74a1 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -77,7 +77,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
77 return syscalls_metadata[nr]; 77 return syscalls_metadata[nr];
78} 78}
79 79
80enum print_line_t 80static enum print_line_t
81print_syscall_enter(struct trace_iterator *iter, int flags, 81print_syscall_enter(struct trace_iterator *iter, int flags,
82 struct trace_event *event) 82 struct trace_event *event)
83{ 83{
@@ -130,7 +130,7 @@ end:
130 return TRACE_TYPE_HANDLED; 130 return TRACE_TYPE_HANDLED;
131} 131}
132 132
133enum print_line_t 133static enum print_line_t
134print_syscall_exit(struct trace_iterator *iter, int flags, 134print_syscall_exit(struct trace_iterator *iter, int flags,
135 struct trace_event *event) 135 struct trace_event *event)
136{ 136{
@@ -270,7 +270,7 @@ static int syscall_exit_define_fields(struct ftrace_event_call *call)
270 return ret; 270 return ret;
271} 271}
272 272
273void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) 273static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
274{ 274{
275 struct syscall_trace_enter *entry; 275 struct syscall_trace_enter *entry;
276 struct syscall_metadata *sys_data; 276 struct syscall_metadata *sys_data;
@@ -305,7 +305,7 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
305 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 305 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
306} 306}
307 307
308void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) 308static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
309{ 309{
310 struct syscall_trace_exit *entry; 310 struct syscall_trace_exit *entry;
311 struct syscall_metadata *sys_data; 311 struct syscall_metadata *sys_data;
@@ -337,7 +337,7 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
337 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 337 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
338} 338}
339 339
340int reg_event_syscall_enter(struct ftrace_event_call *call) 340static int reg_event_syscall_enter(struct ftrace_event_call *call)
341{ 341{
342 int ret = 0; 342 int ret = 0;
343 int num; 343 int num;
@@ -356,7 +356,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
356 return ret; 356 return ret;
357} 357}
358 358
359void unreg_event_syscall_enter(struct ftrace_event_call *call) 359static void unreg_event_syscall_enter(struct ftrace_event_call *call)
360{ 360{
361 int num; 361 int num;
362 362
@@ -371,7 +371,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
371 mutex_unlock(&syscall_trace_lock); 371 mutex_unlock(&syscall_trace_lock);
372} 372}
373 373
374int reg_event_syscall_exit(struct ftrace_event_call *call) 374static int reg_event_syscall_exit(struct ftrace_event_call *call)
375{ 375{
376 int ret = 0; 376 int ret = 0;
377 int num; 377 int num;
@@ -390,7 +390,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
390 return ret; 390 return ret;
391} 391}
392 392
393void unreg_event_syscall_exit(struct ftrace_event_call *call) 393static void unreg_event_syscall_exit(struct ftrace_event_call *call)
394{ 394{
395 int num; 395 int num;
396 396
@@ -459,7 +459,7 @@ unsigned long __init __weak arch_syscall_addr(int nr)
459 return (unsigned long)sys_call_table[nr]; 459 return (unsigned long)sys_call_table[nr];
460} 460}
461 461
462int __init init_ftrace_syscalls(void) 462static int __init init_ftrace_syscalls(void)
463{ 463{
464 struct syscall_metadata *meta; 464 struct syscall_metadata *meta;
465 unsigned long addr; 465 unsigned long addr;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c86e6d4f67fb..8dad2a92dee9 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -28,20 +28,21 @@
28 28
29#define UPROBE_EVENT_SYSTEM "uprobes" 29#define UPROBE_EVENT_SYSTEM "uprobes"
30 30
31struct trace_uprobe_filter {
32 rwlock_t rwlock;
33 int nr_systemwide;
34 struct list_head perf_events;
35};
36
31/* 37/*
32 * uprobe event core functions 38 * uprobe event core functions
33 */ 39 */
34struct trace_uprobe;
35struct uprobe_trace_consumer {
36 struct uprobe_consumer cons;
37 struct trace_uprobe *tu;
38};
39
40struct trace_uprobe { 40struct trace_uprobe {
41 struct list_head list; 41 struct list_head list;
42 struct ftrace_event_class class; 42 struct ftrace_event_class class;
43 struct ftrace_event_call call; 43 struct ftrace_event_call call;
44 struct uprobe_trace_consumer *consumer; 44 struct trace_uprobe_filter filter;
45 struct uprobe_consumer consumer;
45 struct inode *inode; 46 struct inode *inode;
46 char *filename; 47 char *filename;
47 unsigned long offset; 48 unsigned long offset;
@@ -64,6 +65,18 @@ static LIST_HEAD(uprobe_list);
64 65
65static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); 66static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
66 67
68static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
69{
70 rwlock_init(&filter->rwlock);
71 filter->nr_systemwide = 0;
72 INIT_LIST_HEAD(&filter->perf_events);
73}
74
75static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
76{
77 return !filter->nr_systemwide && list_empty(&filter->perf_events);
78}
79
67/* 80/*
68 * Allocate new trace_uprobe and initialize it (including uprobes). 81 * Allocate new trace_uprobe and initialize it (including uprobes).
69 */ 82 */
@@ -92,6 +105,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs)
92 goto error; 105 goto error;
93 106
94 INIT_LIST_HEAD(&tu->list); 107 INIT_LIST_HEAD(&tu->list);
108 tu->consumer.handler = uprobe_dispatcher;
109 init_trace_uprobe_filter(&tu->filter);
95 return tu; 110 return tu;
96 111
97error: 112error:
@@ -253,12 +268,18 @@ static int create_trace_uprobe(int argc, char **argv)
253 if (ret) 268 if (ret)
254 goto fail_address_parse; 269 goto fail_address_parse;
255 270
271 inode = igrab(path.dentry->d_inode);
272 path_put(&path);
273
274 if (!inode || !S_ISREG(inode->i_mode)) {
275 ret = -EINVAL;
276 goto fail_address_parse;
277 }
278
256 ret = kstrtoul(arg, 0, &offset); 279 ret = kstrtoul(arg, 0, &offset);
257 if (ret) 280 if (ret)
258 goto fail_address_parse; 281 goto fail_address_parse;
259 282
260 inode = igrab(path.dentry->d_inode);
261
262 argc -= 2; 283 argc -= 2;
263 argv += 2; 284 argv += 2;
264 285
@@ -356,7 +377,7 @@ fail_address_parse:
356 if (inode) 377 if (inode)
357 iput(inode); 378 iput(inode);
358 379
359 pr_info("Failed to parse address.\n"); 380 pr_info("Failed to parse address or file.\n");
360 381
361 return ret; 382 return ret;
362} 383}
@@ -465,7 +486,7 @@ static const struct file_operations uprobe_profile_ops = {
465}; 486};
466 487
467/* uprobe handler */ 488/* uprobe handler */
468static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) 489static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
469{ 490{
470 struct uprobe_trace_entry_head *entry; 491 struct uprobe_trace_entry_head *entry;
471 struct ring_buffer_event *event; 492 struct ring_buffer_event *event;
@@ -475,8 +496,6 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
475 unsigned long irq_flags; 496 unsigned long irq_flags;
476 struct ftrace_event_call *call = &tu->call; 497 struct ftrace_event_call *call = &tu->call;
477 498
478 tu->nhit++;
479
480 local_save_flags(irq_flags); 499 local_save_flags(irq_flags);
481 pc = preempt_count(); 500 pc = preempt_count();
482 501
@@ -485,16 +504,18 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
485 event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 504 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
486 size, irq_flags, pc); 505 size, irq_flags, pc);
487 if (!event) 506 if (!event)
488 return; 507 return 0;
489 508
490 entry = ring_buffer_event_data(event); 509 entry = ring_buffer_event_data(event);
491 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); 510 entry->ip = instruction_pointer(task_pt_regs(current));
492 data = (u8 *)&entry[1]; 511 data = (u8 *)&entry[1];
493 for (i = 0; i < tu->nr_args; i++) 512 for (i = 0; i < tu->nr_args; i++)
494 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 513 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
495 514
496 if (!filter_current_check_discard(buffer, call, entry, event)) 515 if (!filter_current_check_discard(buffer, call, entry, event))
497 trace_buffer_unlock_commit(buffer, event, irq_flags, pc); 516 trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
517
518 return 0;
498} 519}
499 520
500/* Event entry printers */ 521/* Event entry printers */
@@ -533,42 +554,43 @@ partial:
533 return TRACE_TYPE_PARTIAL_LINE; 554 return TRACE_TYPE_PARTIAL_LINE;
534} 555}
535 556
536static int probe_event_enable(struct trace_uprobe *tu, int flag) 557static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
537{ 558{
538 struct uprobe_trace_consumer *utc; 559 return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
539 int ret = 0; 560}
540 561
541 if (!tu->inode || tu->consumer) 562typedef bool (*filter_func_t)(struct uprobe_consumer *self,
542 return -EINTR; 563 enum uprobe_filter_ctx ctx,
564 struct mm_struct *mm);
543 565
544 utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL); 566static int
545 if (!utc) 567probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
568{
569 int ret = 0;
570
571 if (is_trace_uprobe_enabled(tu))
546 return -EINTR; 572 return -EINTR;
547 573
548 utc->cons.handler = uprobe_dispatcher; 574 WARN_ON(!uprobe_filter_is_empty(&tu->filter));
549 utc->cons.filter = NULL;
550 ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
551 if (ret) {
552 kfree(utc);
553 return ret;
554 }
555 575
556 tu->flags |= flag; 576 tu->flags |= flag;
557 utc->tu = tu; 577 tu->consumer.filter = filter;
558 tu->consumer = utc; 578 ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
579 if (ret)
580 tu->flags &= ~flag;
559 581
560 return 0; 582 return ret;
561} 583}
562 584
563static void probe_event_disable(struct trace_uprobe *tu, int flag) 585static void probe_event_disable(struct trace_uprobe *tu, int flag)
564{ 586{
565 if (!tu->inode || !tu->consumer) 587 if (!is_trace_uprobe_enabled(tu))
566 return; 588 return;
567 589
568 uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons); 590 WARN_ON(!uprobe_filter_is_empty(&tu->filter));
591
592 uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
569 tu->flags &= ~flag; 593 tu->flags &= ~flag;
570 kfree(tu->consumer);
571 tu->consumer = NULL;
572} 594}
573 595
574static int uprobe_event_define_fields(struct ftrace_event_call *event_call) 596static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
@@ -642,8 +664,96 @@ static int set_print_fmt(struct trace_uprobe *tu)
642} 664}
643 665
644#ifdef CONFIG_PERF_EVENTS 666#ifdef CONFIG_PERF_EVENTS
667static bool
668__uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
669{
670 struct perf_event *event;
671
672 if (filter->nr_systemwide)
673 return true;
674
675 list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
676 if (event->hw.tp_target->mm == mm)
677 return true;
678 }
679
680 return false;
681}
682
683static inline bool
684uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
685{
686 return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
687}
688
689static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
690{
691 bool done;
692
693 write_lock(&tu->filter.rwlock);
694 if (event->hw.tp_target) {
695 /*
696 * event->parent != NULL means copy_process(), we can avoid
697 * uprobe_apply(). current->mm must be probed and we can rely
698 * on dup_mmap() which preserves the already installed bp's.
699 *
700 * attr.enable_on_exec means that exec/mmap will install the
701 * breakpoints we need.
702 */
703 done = tu->filter.nr_systemwide ||
704 event->parent || event->attr.enable_on_exec ||
705 uprobe_filter_event(tu, event);
706 list_add(&event->hw.tp_list, &tu->filter.perf_events);
707 } else {
708 done = tu->filter.nr_systemwide;
709 tu->filter.nr_systemwide++;
710 }
711 write_unlock(&tu->filter.rwlock);
712
713 if (!done)
714 uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
715
716 return 0;
717}
718
719static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
720{
721 bool done;
722
723 write_lock(&tu->filter.rwlock);
724 if (event->hw.tp_target) {
725 list_del(&event->hw.tp_list);
726 done = tu->filter.nr_systemwide ||
727 (event->hw.tp_target->flags & PF_EXITING) ||
728 uprobe_filter_event(tu, event);
729 } else {
730 tu->filter.nr_systemwide--;
731 done = tu->filter.nr_systemwide;
732 }
733 write_unlock(&tu->filter.rwlock);
734
735 if (!done)
736 uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
737
738 return 0;
739}
740
741static bool uprobe_perf_filter(struct uprobe_consumer *uc,
742 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
743{
744 struct trace_uprobe *tu;
745 int ret;
746
747 tu = container_of(uc, struct trace_uprobe, consumer);
748 read_lock(&tu->filter.rwlock);
749 ret = __uprobe_perf_filter(&tu->filter, mm);
750 read_unlock(&tu->filter.rwlock);
751
752 return ret;
753}
754
645/* uprobe profile handler */ 755/* uprobe profile handler */
646static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) 756static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
647{ 757{
648 struct ftrace_event_call *call = &tu->call; 758 struct ftrace_event_call *call = &tu->call;
649 struct uprobe_trace_entry_head *entry; 759 struct uprobe_trace_entry_head *entry;
@@ -652,11 +762,14 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
652 int size, __size, i; 762 int size, __size, i;
653 int rctx; 763 int rctx;
654 764
765 if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
766 return UPROBE_HANDLER_REMOVE;
767
655 __size = sizeof(*entry) + tu->size; 768 __size = sizeof(*entry) + tu->size;
656 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 769 size = ALIGN(__size + sizeof(u32), sizeof(u64));
657 size -= sizeof(u32); 770 size -= sizeof(u32);
658 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) 771 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
659 return; 772 return 0;
660 773
661 preempt_disable(); 774 preempt_disable();
662 775
@@ -664,7 +777,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
664 if (!entry) 777 if (!entry)
665 goto out; 778 goto out;
666 779
667 entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); 780 entry->ip = instruction_pointer(task_pt_regs(current));
668 data = (u8 *)&entry[1]; 781 data = (u8 *)&entry[1];
669 for (i = 0; i < tu->nr_args; i++) 782 for (i = 0; i < tu->nr_args; i++)
670 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 783 call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
@@ -674,6 +787,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
674 787
675 out: 788 out:
676 preempt_enable(); 789 preempt_enable();
790 return 0;
677} 791}
678#endif /* CONFIG_PERF_EVENTS */ 792#endif /* CONFIG_PERF_EVENTS */
679 793
@@ -684,7 +798,7 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
684 798
685 switch (type) { 799 switch (type) {
686 case TRACE_REG_REGISTER: 800 case TRACE_REG_REGISTER:
687 return probe_event_enable(tu, TP_FLAG_TRACE); 801 return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
688 802
689 case TRACE_REG_UNREGISTER: 803 case TRACE_REG_UNREGISTER:
690 probe_event_disable(tu, TP_FLAG_TRACE); 804 probe_event_disable(tu, TP_FLAG_TRACE);
@@ -692,11 +806,18 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
692 806
693#ifdef CONFIG_PERF_EVENTS 807#ifdef CONFIG_PERF_EVENTS
694 case TRACE_REG_PERF_REGISTER: 808 case TRACE_REG_PERF_REGISTER:
695 return probe_event_enable(tu, TP_FLAG_PROFILE); 809 return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
696 810
697 case TRACE_REG_PERF_UNREGISTER: 811 case TRACE_REG_PERF_UNREGISTER:
698 probe_event_disable(tu, TP_FLAG_PROFILE); 812 probe_event_disable(tu, TP_FLAG_PROFILE);
699 return 0; 813 return 0;
814
815 case TRACE_REG_PERF_OPEN:
816 return uprobe_perf_open(tu, data);
817
818 case TRACE_REG_PERF_CLOSE:
819 return uprobe_perf_close(tu, data);
820
700#endif 821#endif
701 default: 822 default:
702 return 0; 823 return 0;
@@ -706,22 +827,20 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
706 827
707static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) 828static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
708{ 829{
709 struct uprobe_trace_consumer *utc;
710 struct trace_uprobe *tu; 830 struct trace_uprobe *tu;
831 int ret = 0;
711 832
712 utc = container_of(con, struct uprobe_trace_consumer, cons); 833 tu = container_of(con, struct trace_uprobe, consumer);
713 tu = utc->tu; 834 tu->nhit++;
714 if (!tu || tu->consumer != utc)
715 return 0;
716 835
717 if (tu->flags & TP_FLAG_TRACE) 836 if (tu->flags & TP_FLAG_TRACE)
718 uprobe_trace_func(tu, regs); 837 ret |= uprobe_trace_func(tu, regs);
719 838
720#ifdef CONFIG_PERF_EVENTS 839#ifdef CONFIG_PERF_EVENTS
721 if (tu->flags & TP_FLAG_PROFILE) 840 if (tu->flags & TP_FLAG_PROFILE)
722 uprobe_perf_func(tu, regs); 841 ret |= uprobe_perf_func(tu, regs);
723#endif 842#endif
724 return 0; 843 return ret;
725} 844}
726 845
727static struct trace_event_functions uprobe_funcs = { 846static struct trace_event_functions uprobe_funcs = {
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 625df0b44690..a1dd9a1b1327 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -32,6 +32,7 @@ void bacct_add_tsk(struct user_namespace *user_ns,
32{ 32{
33 const struct cred *tcred; 33 const struct cred *tcred;
34 struct timespec uptime, ts; 34 struct timespec uptime, ts;
35 cputime_t utime, stime, utimescaled, stimescaled;
35 u64 ac_etime; 36 u64 ac_etime;
36 37
37 BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN); 38 BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
@@ -65,10 +66,15 @@ void bacct_add_tsk(struct user_namespace *user_ns,
65 stats->ac_ppid = pid_alive(tsk) ? 66 stats->ac_ppid = pid_alive(tsk) ?
66 task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0; 67 task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0;
67 rcu_read_unlock(); 68 rcu_read_unlock();
68 stats->ac_utime = cputime_to_usecs(tsk->utime); 69
69 stats->ac_stime = cputime_to_usecs(tsk->stime); 70 task_cputime(tsk, &utime, &stime);
70 stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled); 71 stats->ac_utime = cputime_to_usecs(utime);
71 stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled); 72 stats->ac_stime = cputime_to_usecs(stime);
73
74 task_cputime_scaled(tsk, &utimescaled, &stimescaled);
75 stats->ac_utimescaled = cputime_to_usecs(utimescaled);
76 stats->ac_stimescaled = cputime_to_usecs(stimescaled);
77
72 stats->ac_minflt = tsk->min_flt; 78 stats->ac_minflt = tsk->min_flt;
73 stats->ac_majflt = tsk->maj_flt; 79 stats->ac_majflt = tsk->maj_flt;
74 80
@@ -115,11 +121,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
115#undef KB 121#undef KB
116#undef MB 122#undef MB
117 123
118/** 124static void __acct_update_integrals(struct task_struct *tsk,
119 * acct_update_integrals - update mm integral fields in task_struct 125 cputime_t utime, cputime_t stime)
120 * @tsk: task_struct for accounting
121 */
122void acct_update_integrals(struct task_struct *tsk)
123{ 126{
124 if (likely(tsk->mm)) { 127 if (likely(tsk->mm)) {
125 cputime_t time, dtime; 128 cputime_t time, dtime;
@@ -128,7 +131,7 @@ void acct_update_integrals(struct task_struct *tsk)
128 u64 delta; 131 u64 delta;
129 132
130 local_irq_save(flags); 133 local_irq_save(flags);
131 time = tsk->stime + tsk->utime; 134 time = stime + utime;
132 dtime = time - tsk->acct_timexpd; 135 dtime = time - tsk->acct_timexpd;
133 jiffies_to_timeval(cputime_to_jiffies(dtime), &value); 136 jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
134 delta = value.tv_sec; 137 delta = value.tv_sec;
@@ -145,6 +148,27 @@ void acct_update_integrals(struct task_struct *tsk)
145} 148}
146 149
147/** 150/**
151 * acct_update_integrals - update mm integral fields in task_struct
152 * @tsk: task_struct for accounting
153 */
154void acct_update_integrals(struct task_struct *tsk)
155{
156 cputime_t utime, stime;
157
158 task_cputime(tsk, &utime, &stime);
159 __acct_update_integrals(tsk, utime, stime);
160}
161
162/**
163 * acct_account_cputime - update mm integral after cputime update
164 * @tsk: task_struct for accounting
165 */
166void acct_account_cputime(struct task_struct *tsk)
167{
168 __acct_update_integrals(tsk, tsk->utime, tsk->stime);
169}
170
171/**
148 * acct_clear_integrals - clear the mm integral fields in task_struct 172 * acct_clear_integrals - clear the mm integral fields in task_struct
149 * @tsk: task_struct whose accounting fields are cleared 173 * @tsk: task_struct whose accounting fields are cleared
150 */ 174 */
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 75a2ab3d0b02..27689422aa92 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -23,6 +23,7 @@
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/sysctl.h> 24#include <linux/sysctl.h>
25#include <linux/smpboot.h> 25#include <linux/smpboot.h>
26#include <linux/sched/rt.h>
26 27
27#include <asm/irq_regs.h> 28#include <asm/irq_regs.h>
28#include <linux/kvm_para.h> 29#include <linux/kvm_para.h>
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 67604e599384..a1714c897e3f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -605,61 +605,6 @@ config PROVE_LOCKING
605 605
606 For more details, see Documentation/lockdep-design.txt. 606 For more details, see Documentation/lockdep-design.txt.
607 607
608config PROVE_RCU
609 bool "RCU debugging: prove RCU correctness"
610 depends on PROVE_LOCKING
611 default n
612 help
613 This feature enables lockdep extensions that check for correct
614 use of RCU APIs. This is currently under development. Say Y
615 if you want to debug RCU usage or help work on the PROVE_RCU
616 feature.
617
618 Say N if you are unsure.
619
620config PROVE_RCU_REPEATEDLY
621 bool "RCU debugging: don't disable PROVE_RCU on first splat"
622 depends on PROVE_RCU
623 default n
624 help
625 By itself, PROVE_RCU will disable checking upon issuing the
626 first warning (or "splat"). This feature prevents such
627 disabling, allowing multiple RCU-lockdep warnings to be printed
628 on a single reboot.
629
630 Say Y to allow multiple RCU-lockdep warnings per boot.
631
632 Say N if you are unsure.
633
634config PROVE_RCU_DELAY
635 bool "RCU debugging: preemptible RCU race provocation"
636 depends on DEBUG_KERNEL && PREEMPT_RCU
637 default n
638 help
639 There is a class of races that involve an unlikely preemption
640 of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
641 been set to INT_MIN. This feature inserts a delay at that
642 point to increase the probability of these races.
643
644 Say Y to increase probability of preemption of __rcu_read_unlock().
645
646 Say N if you are unsure.
647
648config SPARSE_RCU_POINTER
649 bool "RCU debugging: sparse-based checks for pointer usage"
650 default n
651 help
652 This feature enables the __rcu sparse annotation for
653 RCU-protected pointers. This annotation will cause sparse
654 to flag any non-RCU used of annotated pointers. This can be
655 helpful when debugging RCU usage. Please note that this feature
656 is not intended to enforce code cleanliness; it is instead merely
657 a debugging aid.
658
659 Say Y to make sparse flag questionable use of RCU-protected pointers
660
661 Say N if you are unsure.
662
663config LOCKDEP 608config LOCKDEP
664 bool 609 bool
665 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT 610 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -937,6 +882,63 @@ config BOOT_PRINTK_DELAY
937 BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect 882 BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect
938 what it believes to be lockup conditions. 883 what it believes to be lockup conditions.
939 884
885menu "RCU Debugging"
886
887config PROVE_RCU
888 bool "RCU debugging: prove RCU correctness"
889 depends on PROVE_LOCKING
890 default n
891 help
892 This feature enables lockdep extensions that check for correct
893 use of RCU APIs. This is currently under development. Say Y
894 if you want to debug RCU usage or help work on the PROVE_RCU
895 feature.
896
897 Say N if you are unsure.
898
899config PROVE_RCU_REPEATEDLY
900 bool "RCU debugging: don't disable PROVE_RCU on first splat"
901 depends on PROVE_RCU
902 default n
903 help
904 By itself, PROVE_RCU will disable checking upon issuing the
905 first warning (or "splat"). This feature prevents such
906 disabling, allowing multiple RCU-lockdep warnings to be printed
907 on a single reboot.
908
909 Say Y to allow multiple RCU-lockdep warnings per boot.
910
911 Say N if you are unsure.
912
913config PROVE_RCU_DELAY
914 bool "RCU debugging: preemptible RCU race provocation"
915 depends on DEBUG_KERNEL && PREEMPT_RCU
916 default n
917 help
918 There is a class of races that involve an unlikely preemption
919 of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
920 been set to INT_MIN. This feature inserts a delay at that
921 point to increase the probability of these races.
922
923 Say Y to increase probability of preemption of __rcu_read_unlock().
924
925 Say N if you are unsure.
926
927config SPARSE_RCU_POINTER
928 bool "RCU debugging: sparse-based checks for pointer usage"
929 default n
930 help
931 This feature enables the __rcu sparse annotation for
932 RCU-protected pointers. This annotation will cause sparse
933 to flag any non-RCU used of annotated pointers. This can be
934 helpful when debugging RCU usage. Please note that this feature
935 is not intended to enforce code cleanliness; it is instead merely
936 a debugging aid.
937
938 Say Y to make sparse flag questionable use of RCU-protected pointers
939
940 Say N if you are unsure.
941
940config RCU_TORTURE_TEST 942config RCU_TORTURE_TEST
941 tristate "torture tests for RCU" 943 tristate "torture tests for RCU"
942 depends on DEBUG_KERNEL 944 depends on DEBUG_KERNEL
@@ -970,7 +972,7 @@ config RCU_TORTURE_TEST_RUNNABLE
970 972
971config RCU_CPU_STALL_TIMEOUT 973config RCU_CPU_STALL_TIMEOUT
972 int "RCU CPU stall timeout in seconds" 974 int "RCU CPU stall timeout in seconds"
973 depends on TREE_RCU || TREE_PREEMPT_RCU 975 depends on RCU_STALL_COMMON
974 range 3 300 976 range 3 300
975 default 21 977 default 21
976 help 978 help
@@ -1008,6 +1010,7 @@ config RCU_CPU_STALL_INFO
1008config RCU_TRACE 1010config RCU_TRACE
1009 bool "Enable tracing for RCU" 1011 bool "Enable tracing for RCU"
1010 depends on DEBUG_KERNEL 1012 depends on DEBUG_KERNEL
1013 select TRACE_CLOCK
1011 help 1014 help
1012 This option provides tracing in RCU which presents stats 1015 This option provides tracing in RCU which presents stats
1013 in debugfs for debugging RCU implementation. 1016 in debugfs for debugging RCU implementation.
@@ -1015,6 +1018,8 @@ config RCU_TRACE
1015 Say Y here if you want to enable RCU tracing 1018 Say Y here if you want to enable RCU tracing
1016 Say N if you are unsure. 1019 Say N if you are unsure.
1017 1020
1021endmenu # "RCU Debugging"
1022
1018config KPROBES_SANITY_TEST 1023config KPROBES_SANITY_TEST
1019 bool "Kprobes sanity tests" 1024 bool "Kprobes sanity tests"
1020 depends on DEBUG_KERNEL 1025 depends on DEBUG_KERNEL
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 09255ec8159c..fbb60b103e64 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3030,7 +3030,9 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
3030 if (memcg) { 3030 if (memcg) {
3031 s->memcg_params->memcg = memcg; 3031 s->memcg_params->memcg = memcg;
3032 s->memcg_params->root_cache = root_cache; 3032 s->memcg_params->root_cache = root_cache;
3033 } 3033 } else
3034 s->memcg_params->is_root_cache = true;
3035
3034 return 0; 3036 return 0;
3035} 3037}
3036 3038
diff --git a/mm/mlock.c b/mm/mlock.c
index f0b9ce572fc7..c9bd528b01d2 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -517,11 +517,11 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
517static int do_mlockall(int flags) 517static int do_mlockall(int flags)
518{ 518{
519 struct vm_area_struct * vma, * prev = NULL; 519 struct vm_area_struct * vma, * prev = NULL;
520 unsigned int def_flags = 0;
521 520
522 if (flags & MCL_FUTURE) 521 if (flags & MCL_FUTURE)
523 def_flags = VM_LOCKED; 522 current->mm->def_flags |= VM_LOCKED;
524 current->mm->def_flags = def_flags; 523 else
524 current->mm->def_flags &= ~VM_LOCKED;
525 if (flags == MCL_FUTURE) 525 if (flags == MCL_FUTURE)
526 goto out; 526 goto out;
527 527
diff --git a/mm/mmap.c b/mm/mmap.c
index d1e4124f3d0e..09da0b264982 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -32,6 +32,7 @@
32#include <linux/khugepaged.h> 32#include <linux/khugepaged.h>
33#include <linux/uprobes.h> 33#include <linux/uprobes.h>
34#include <linux/rbtree_augmented.h> 34#include <linux/rbtree_augmented.h>
35#include <linux/sched/sysctl.h>
35 36
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <asm/cacheflush.h> 38#include <asm/cacheflush.h>
diff --git a/mm/mremap.c b/mm/mremap.c
index e1031e1f6a61..f9766f460299 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -19,6 +19,7 @@
19#include <linux/security.h> 19#include <linux/security.h>
20#include <linux/syscalls.h> 20#include <linux/syscalls.h>
21#include <linux/mmu_notifier.h> 21#include <linux/mmu_notifier.h>
22#include <linux/sched/sysctl.h>
22 23
23#include <asm/uaccess.h> 24#include <asm/uaccess.h>
24#include <asm/cacheflush.h> 25#include <asm/cacheflush.h>
diff --git a/mm/nommu.c b/mm/nommu.c
index 79c3cac87afa..b20db4e22263 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -29,6 +29,7 @@
29#include <linux/security.h> 29#include <linux/security.h>
30#include <linux/syscalls.h> 30#include <linux/syscalls.h>
31#include <linux/audit.h> 31#include <linux/audit.h>
32#include <linux/sched/sysctl.h>
32 33
33#include <asm/uaccess.h> 34#include <asm/uaccess.h>
34#include <asm/tlb.h> 35#include <asm/tlb.h>
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0713bfbf0954..66a0024becd9 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -35,6 +35,7 @@
35#include <linux/buffer_head.h> /* __set_page_dirty_buffers */ 35#include <linux/buffer_head.h> /* __set_page_dirty_buffers */
36#include <linux/pagevec.h> 36#include <linux/pagevec.h>
37#include <linux/timer.h> 37#include <linux/timer.h>
38#include <linux/sched/rt.h>
38#include <trace/events/writeback.h> 39#include <trace/events/writeback.h>
39 40
40/* 41/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index df2022ff0c8a..d1107adf174a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -58,6 +58,7 @@
58#include <linux/prefetch.h> 58#include <linux/prefetch.h>
59#include <linux/migrate.h> 59#include <linux/migrate.h>
60#include <linux/page-debug-flags.h> 60#include <linux/page-debug-flags.h>
61#include <linux/sched/rt.h>
61 62
62#include <asm/tlbflush.h> 63#include <asm/tlbflush.h>
63#include <asm/div64.h> 64#include <asm/div64.h>
@@ -773,6 +774,10 @@ void __init init_cma_reserved_pageblock(struct page *page)
773 set_pageblock_migratetype(page, MIGRATE_CMA); 774 set_pageblock_migratetype(page, MIGRATE_CMA);
774 __free_pages(page, pageblock_order); 775 __free_pages(page, pageblock_order);
775 totalram_pages += pageblock_nr_pages; 776 totalram_pages += pageblock_nr_pages;
777#ifdef CONFIG_HIGHMEM
778 if (PageHighMem(page))
779 totalhigh_pages += pageblock_nr_pages;
780#endif
776} 781}
777#endif 782#endif
778 783
@@ -4416,10 +4421,11 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
4416 * round what is now in bits to nearest long in bits, then return it in 4421 * round what is now in bits to nearest long in bits, then return it in
4417 * bytes. 4422 * bytes.
4418 */ 4423 */
4419static unsigned long __init usemap_size(unsigned long zonesize) 4424static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize)
4420{ 4425{
4421 unsigned long usemapsize; 4426 unsigned long usemapsize;
4422 4427
4428 zonesize += zone_start_pfn & (pageblock_nr_pages-1);
4423 usemapsize = roundup(zonesize, pageblock_nr_pages); 4429 usemapsize = roundup(zonesize, pageblock_nr_pages);
4424 usemapsize = usemapsize >> pageblock_order; 4430 usemapsize = usemapsize >> pageblock_order;
4425 usemapsize *= NR_PAGEBLOCK_BITS; 4431 usemapsize *= NR_PAGEBLOCK_BITS;
@@ -4429,17 +4435,19 @@ static unsigned long __init usemap_size(unsigned long zonesize)
4429} 4435}
4430 4436
4431static void __init setup_usemap(struct pglist_data *pgdat, 4437static void __init setup_usemap(struct pglist_data *pgdat,
4432 struct zone *zone, unsigned long zonesize) 4438 struct zone *zone,
4439 unsigned long zone_start_pfn,
4440 unsigned long zonesize)
4433{ 4441{
4434 unsigned long usemapsize = usemap_size(zonesize); 4442 unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
4435 zone->pageblock_flags = NULL; 4443 zone->pageblock_flags = NULL;
4436 if (usemapsize) 4444 if (usemapsize)
4437 zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat, 4445 zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
4438 usemapsize); 4446 usemapsize);
4439} 4447}
4440#else 4448#else
4441static inline void setup_usemap(struct pglist_data *pgdat, 4449static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
4442 struct zone *zone, unsigned long zonesize) {} 4450 unsigned long zone_start_pfn, unsigned long zonesize) {}
4443#endif /* CONFIG_SPARSEMEM */ 4451#endif /* CONFIG_SPARSEMEM */
4444 4452
4445#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE 4453#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -4590,7 +4598,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4590 continue; 4598 continue;
4591 4599
4592 set_pageblock_order(); 4600 set_pageblock_order();
4593 setup_usemap(pgdat, zone, size); 4601 setup_usemap(pgdat, zone, zone_start_pfn, size);
4594 ret = init_currently_empty_zone(zone, zone_start_pfn, 4602 ret = init_currently_empty_zone(zone, zone_start_pfn,
4595 size, MEMMAP_EARLY); 4603 size, MEMMAP_EARLY);
4596 BUG_ON(ret); 4604 BUG_ON(ret);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 183f97a86bb2..553921511e4e 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -440,7 +440,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
440 /* this is an hash collision with the temporary selected node. Choose 440 /* this is an hash collision with the temporary selected node. Choose
441 * the one with the lowest address 441 * the one with the lowest address
442 */ 442 */
443 if ((tmp_max == max) && 443 if ((tmp_max == max) && max_orig_node &&
444 (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)) 444 (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0))
445 goto out; 445 goto out;
446 446
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 7f884e3fb955..8660ea3be705 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -16,6 +16,7 @@
16#include <linux/etherdevice.h> 16#include <linux/etherdevice.h>
17#include <linux/llc.h> 17#include <linux/llc.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/pkt_sched.h>
19#include <net/net_namespace.h> 20#include <net/net_namespace.h>
20#include <net/llc.h> 21#include <net/llc.h>
21#include <net/llc_pdu.h> 22#include <net/llc_pdu.h>
@@ -40,6 +41,7 @@ static void br_send_bpdu(struct net_bridge_port *p,
40 41
41 skb->dev = p->dev; 42 skb->dev = p->dev;
42 skb->protocol = htons(ETH_P_802_2); 43 skb->protocol = htons(ETH_P_802_2);
44 skb->priority = TC_PRIO_CONTROL;
43 45
44 skb_reserve(skb, LLC_RESERVE); 46 skb_reserve(skb, LLC_RESERVE);
45 memcpy(__skb_put(skb, length), data, length); 47 memcpy(__skb_put(skb, length), data, length);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 0337e2b76862..368f9c3f9dc6 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -187,7 +187,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
187 skb_queue_walk(queue, skb) { 187 skb_queue_walk(queue, skb) {
188 *peeked = skb->peeked; 188 *peeked = skb->peeked;
189 if (flags & MSG_PEEK) { 189 if (flags & MSG_PEEK) {
190 if (*off >= skb->len) { 190 if (*off >= skb->len && skb->len) {
191 *off -= skb->len; 191 *off -= skb->len;
192 continue; 192 continue;
193 } 193 }
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 9547a273b9e9..ded146b217f1 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -928,24 +928,25 @@ static void parp_redo(struct sk_buff *skb)
928static int arp_rcv(struct sk_buff *skb, struct net_device *dev, 928static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
929 struct packet_type *pt, struct net_device *orig_dev) 929 struct packet_type *pt, struct net_device *orig_dev)
930{ 930{
931 struct arphdr *arp; 931 const struct arphdr *arp;
932
933 if (dev->flags & IFF_NOARP ||
934 skb->pkt_type == PACKET_OTHERHOST ||
935 skb->pkt_type == PACKET_LOOPBACK)
936 goto freeskb;
937
938 skb = skb_share_check(skb, GFP_ATOMIC);
939 if (!skb)
940 goto out_of_mem;
932 941
933 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ 942 /* ARP header, plus 2 device addresses, plus 2 IP addresses. */
934 if (!pskb_may_pull(skb, arp_hdr_len(dev))) 943 if (!pskb_may_pull(skb, arp_hdr_len(dev)))
935 goto freeskb; 944 goto freeskb;
936 945
937 arp = arp_hdr(skb); 946 arp = arp_hdr(skb);
938 if (arp->ar_hln != dev->addr_len || 947 if (arp->ar_hln != dev->addr_len || arp->ar_pln != 4)
939 dev->flags & IFF_NOARP ||
940 skb->pkt_type == PACKET_OTHERHOST ||
941 skb->pkt_type == PACKET_LOOPBACK ||
942 arp->ar_pln != 4)
943 goto freeskb; 948 goto freeskb;
944 949
945 skb = skb_share_check(skb, GFP_ATOMIC);
946 if (skb == NULL)
947 goto out_of_mem;
948
949 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); 950 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
950 951
951 return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); 952 return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index 7302b0b7b642..83acc1405a18 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -9,6 +9,7 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/skbuff.h> 10#include <linux/skbuff.h>
11#include <linux/ipv6.h> 11#include <linux/ipv6.h>
12#include <net/ipv6.h>
12#include <linux/netfilter.h> 13#include <linux/netfilter.h>
13#include <linux/netfilter_ipv6.h> 14#include <linux/netfilter_ipv6.h>
14#include <linux/netfilter_ipv6/ip6t_NPT.h> 15#include <linux/netfilter_ipv6/ip6t_NPT.h>
@@ -18,11 +19,20 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
18{ 19{
19 struct ip6t_npt_tginfo *npt = par->targinfo; 20 struct ip6t_npt_tginfo *npt = par->targinfo;
20 __wsum src_sum = 0, dst_sum = 0; 21 __wsum src_sum = 0, dst_sum = 0;
22 struct in6_addr pfx;
21 unsigned int i; 23 unsigned int i;
22 24
23 if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) 25 if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
24 return -EINVAL; 26 return -EINVAL;
25 27
28 /* Ensure that LSB of prefix is zero */
29 ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len);
30 if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6))
31 return -EINVAL;
32 ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len);
33 if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))
34 return -EINVAL;
35
26 for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { 36 for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
27 src_sum = csum_add(src_sum, 37 src_sum = csum_add(src_sum,
28 (__force __wsum)npt->src_pfx.in6.s6_addr16[i]); 38 (__force __wsum)npt->src_pfx.in6.s6_addr16[i]);
@@ -30,7 +40,7 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
30 (__force __wsum)npt->dst_pfx.in6.s6_addr16[i]); 40 (__force __wsum)npt->dst_pfx.in6.s6_addr16[i]);
31 } 41 }
32 42
33 npt->adjustment = (__force __sum16) csum_sub(src_sum, dst_sum); 43 npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));
34 return 0; 44 return 0;
35} 45}
36 46
@@ -51,7 +61,7 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
51 61
52 idx = i / 32; 62 idx = i / 32;
53 addr->s6_addr32[idx] &= mask; 63 addr->s6_addr32[idx] &= mask;
54 addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx]; 64 addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx];
55 } 65 }
56 66
57 if (pfx_len <= 48) 67 if (pfx_len <= 48)
@@ -66,8 +76,8 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
66 return false; 76 return false;
67 } 77 }
68 78
69 sum = (__force __sum16) csum_add((__force __wsum)addr->s6_addr16[idx], 79 sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]),
70 npt->adjustment); 80 csum_unfold(npt->adjustment)));
71 if (sum == CSUM_MANGLED_0) 81 if (sum == CSUM_MANGLED_0)
72 sum = 0; 82 sum = 0;
73 *(__force __sum16 *)&addr->s6_addr16[idx] = sum; 83 *(__force __sum16 *)&addr->s6_addr16[idx] = sum;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 516fbc96feff..0479c64aa83c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2004,7 +2004,8 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev,
2004{ 2004{
2005 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 2005 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
2006 2006
2007 memcpy(sdata->vif.bss_conf.mcast_rate, rate, sizeof(rate)); 2007 memcpy(sdata->vif.bss_conf.mcast_rate, rate,
2008 sizeof(int) * IEEE80211_NUM_BANDS);
2008 2009
2009 return 0; 2010 return 0;
2010} 2011}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a3552929a21d..5107248af7fb 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3400,6 +3400,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
3400 3400
3401 ret = 0; 3401 ret = 0;
3402 3402
3403out:
3403 while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, 3404 while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
3404 IEEE80211_CHAN_DISABLED)) { 3405 IEEE80211_CHAN_DISABLED)) {
3405 if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { 3406 if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) {
@@ -3408,14 +3409,13 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
3408 goto out; 3409 goto out;
3409 } 3410 }
3410 3411
3411 ret = chandef_downgrade(chandef); 3412 ret |= chandef_downgrade(chandef);
3412 } 3413 }
3413 3414
3414 if (chandef->width != vht_chandef.width) 3415 if (chandef->width != vht_chandef.width)
3415 sdata_info(sdata, 3416 sdata_info(sdata,
3416 "local regulatory prevented using AP HT/VHT configuration, downgraded\n"); 3417 "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n");
3417 3418
3418out:
3419 WARN_ON_ONCE(!cfg80211_chandef_valid(chandef)); 3419 WARN_ON_ONCE(!cfg80211_chandef_valid(chandef));
3420 return ret; 3420 return ret;
3421} 3421}
@@ -3529,8 +3529,11 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
3529 */ 3529 */
3530 ret = ieee80211_vif_use_channel(sdata, &chandef, 3530 ret = ieee80211_vif_use_channel(sdata, &chandef,
3531 IEEE80211_CHANCTX_SHARED); 3531 IEEE80211_CHANCTX_SHARED);
3532 while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) 3532 while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) {
3533 ifmgd->flags |= chandef_downgrade(&chandef); 3533 ifmgd->flags |= chandef_downgrade(&chandef);
3534 ret = ieee80211_vif_use_channel(sdata, &chandef,
3535 IEEE80211_CHANCTX_SHARED);
3536 }
3534 return ret; 3537 return ret;
3535} 3538}
3536 3539
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 746048b13ef3..ae8ec6f27688 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -61,14 +61,27 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
61 return 1; 61 return 1;
62} 62}
63 63
64static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
65 unsigned int sctphoff)
66{
67 __u32 crc32;
68 struct sk_buff *iter;
69
70 crc32 = sctp_start_cksum((__u8 *)sctph, skb_headlen(skb) - sctphoff);
71 skb_walk_frags(skb, iter)
72 crc32 = sctp_update_cksum((u8 *) iter->data,
73 skb_headlen(iter), crc32);
74 sctph->checksum = sctp_end_cksum(crc32);
75
76 skb->ip_summed = CHECKSUM_UNNECESSARY;
77}
78
64static int 79static int
65sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, 80sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
66 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) 81 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
67{ 82{
68 sctp_sctphdr_t *sctph; 83 sctp_sctphdr_t *sctph;
69 unsigned int sctphoff = iph->len; 84 unsigned int sctphoff = iph->len;
70 struct sk_buff *iter;
71 __be32 crc32;
72 85
73#ifdef CONFIG_IP_VS_IPV6 86#ifdef CONFIG_IP_VS_IPV6
74 if (cp->af == AF_INET6 && iph->fragoffs) 87 if (cp->af == AF_INET6 && iph->fragoffs)
@@ -92,13 +105,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
92 sctph = (void *) skb_network_header(skb) + sctphoff; 105 sctph = (void *) skb_network_header(skb) + sctphoff;
93 sctph->source = cp->vport; 106 sctph->source = cp->vport;
94 107
95 /* Calculate the checksum */ 108 sctp_nat_csum(skb, sctph, sctphoff);
96 crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
97 skb_walk_frags(skb, iter)
98 crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter),
99 crc32);
100 crc32 = sctp_end_cksum(crc32);
101 sctph->checksum = crc32;
102 109
103 return 1; 110 return 1;
104} 111}
@@ -109,8 +116,6 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
109{ 116{
110 sctp_sctphdr_t *sctph; 117 sctp_sctphdr_t *sctph;
111 unsigned int sctphoff = iph->len; 118 unsigned int sctphoff = iph->len;
112 struct sk_buff *iter;
113 __be32 crc32;
114 119
115#ifdef CONFIG_IP_VS_IPV6 120#ifdef CONFIG_IP_VS_IPV6
116 if (cp->af == AF_INET6 && iph->fragoffs) 121 if (cp->af == AF_INET6 && iph->fragoffs)
@@ -134,13 +139,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
134 sctph = (void *) skb_network_header(skb) + sctphoff; 139 sctph = (void *) skb_network_header(skb) + sctphoff;
135 sctph->dest = cp->dport; 140 sctph->dest = cp->dport;
136 141
137 /* Calculate the checksum */ 142 sctp_nat_csum(skb, sctph, sctphoff);
138 crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
139 skb_walk_frags(skb, iter)
140 crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter),
141 crc32);
142 crc32 = sctp_end_cksum(crc32);
143 sctph->checksum = crc32;
144 143
145 return 1; 144 return 1;
146} 145}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index effa10c9e4e3..44fd10c539ac 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1795,6 +1795,8 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
1795 GFP_KERNEL); 1795 GFP_KERNEL);
1796 if (!tinfo->buf) 1796 if (!tinfo->buf)
1797 goto outtinfo; 1797 goto outtinfo;
1798 } else {
1799 tinfo->buf = NULL;
1798 } 1800 }
1799 tinfo->id = id; 1801 tinfo->id = id;
1800 1802
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 51561eafcb72..79e8ed4ac7ce 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1135,9 +1135,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1135 memset(&opt, 0, sizeof(opt)); 1135 memset(&opt, 0, sizeof(opt));
1136 1136
1137 opt.rate.rate = cl->rate.rate_bps >> 3; 1137 opt.rate.rate = cl->rate.rate_bps >> 3;
1138 opt.buffer = cl->buffer; 1138 opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1139 opt.ceil.rate = cl->ceil.rate_bps >> 3; 1139 opt.ceil.rate = cl->ceil.rate_bps >> 3;
1140 opt.cbuffer = cl->cbuffer; 1140 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1141 opt.quantum = cl->quantum; 1141 opt.quantum = cl->quantum;
1142 opt.prio = cl->prio; 1142 opt.prio = cl->prio;
1143 opt.level = cl->level; 1143 opt.level = cl->level;
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 7521d944c0fb..cf4852814e0c 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -3,8 +3,8 @@
3# 3#
4 4
5menuconfig IP_SCTP 5menuconfig IP_SCTP
6 tristate "The SCTP Protocol (EXPERIMENTAL)" 6 tristate "The SCTP Protocol"
7 depends on INET && EXPERIMENTAL 7 depends on INET
8 depends on IPV6 || IPV6=n 8 depends on IPV6 || IPV6=n
9 select CRYPTO 9 select CRYPTO
10 select CRYPTO_HMAC 10 select CRYPTO_HMAC
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index f3f0f4dc31dd..391a245d5203 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -326,9 +326,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
326 */ 326 */
327 rcu_read_lock(); 327 rcu_read_lock();
328 list_for_each_entry_rcu(laddr, &bp->address_list, list) { 328 list_for_each_entry_rcu(laddr, &bp->address_list, list) {
329 if (!laddr->valid && laddr->state != SCTP_ADDR_SRC) 329 if (!laddr->valid)
330 continue; 330 continue;
331 if ((laddr->a.sa.sa_family == AF_INET6) && 331 if ((laddr->state == SCTP_ADDR_SRC) &&
332 (laddr->a.sa.sa_family == AF_INET6) &&
332 (scope <= sctp_scope(&laddr->a))) { 333 (scope <= sctp_scope(&laddr->a))) {
333 bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); 334 bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
334 if (!baddr || (matchlen < bmatchlen)) { 335 if (!baddr || (matchlen < bmatchlen)) {
diff --git a/samples/Kconfig b/samples/Kconfig
index 7b6792a18c05..6181c2cc9ca0 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -5,12 +5,6 @@ menuconfig SAMPLES
5 5
6if SAMPLES 6if SAMPLES
7 7
8config SAMPLE_TRACEPOINTS
9 tristate "Build tracepoints examples -- loadable modules only"
10 depends on TRACEPOINTS && m
11 help
12 This build tracepoints example modules.
13
14config SAMPLE_TRACE_EVENTS 8config SAMPLE_TRACE_EVENTS
15 tristate "Build trace_events examples -- loadable modules only" 9 tristate "Build trace_events examples -- loadable modules only"
16 depends on EVENT_TRACING && m 10 depends on EVENT_TRACING && m
diff --git a/samples/Makefile b/samples/Makefile
index 5ef08bba96ce..1a60c62e2045 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,4 +1,4 @@
1# Makefile for Linux samples code 1# Makefile for Linux samples code
2 2
3obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \ 3obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ \
4 hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ 4 hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/
diff --git a/samples/tracepoints/Makefile b/samples/tracepoints/Makefile
deleted file mode 100644
index 36479ad9ae14..000000000000
--- a/samples/tracepoints/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
1# builds the tracepoint example kernel modules;
2# then to use one (as root): insmod <module_name.ko>
3
4obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-sample.o
5obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample.o
6obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample2.o
diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h
deleted file mode 100644
index 4d46be965961..000000000000
--- a/samples/tracepoints/tp-samples-trace.h
+++ /dev/null
@@ -1,11 +0,0 @@
1#ifndef _TP_SAMPLES_TRACE_H
2#define _TP_SAMPLES_TRACE_H
3
4#include <linux/proc_fs.h> /* for struct inode and struct file */
5#include <linux/tracepoint.h>
6
7DECLARE_TRACE(subsys_event,
8 TP_PROTO(struct inode *inode, struct file *file),
9 TP_ARGS(inode, file));
10DECLARE_TRACE_NOARGS(subsys_eventb);
11#endif
diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c
deleted file mode 100644
index 744c0b9652a7..000000000000
--- a/samples/tracepoints/tracepoint-probe-sample.c
+++ /dev/null
@@ -1,57 +0,0 @@
1/*
2 * tracepoint-probe-sample.c
3 *
4 * sample tracepoint probes.
5 */
6
7#include <linux/module.h>
8#include <linux/file.h>
9#include <linux/dcache.h>
10#include "tp-samples-trace.h"
11
12/*
13 * Here the caller only guarantees locking for struct file and struct inode.
14 * Locking must therefore be done in the probe to use the dentry.
15 */
16static void probe_subsys_event(void *ignore,
17 struct inode *inode, struct file *file)
18{
19 path_get(&file->f_path);
20 dget(file->f_path.dentry);
21 printk(KERN_INFO "Event is encountered with filename %s\n",
22 file->f_path.dentry->d_name.name);
23 dput(file->f_path.dentry);
24 path_put(&file->f_path);
25}
26
27static void probe_subsys_eventb(void *ignore)
28{
29 printk(KERN_INFO "Event B is encountered\n");
30}
31
32static int __init tp_sample_trace_init(void)
33{
34 int ret;
35
36 ret = register_trace_subsys_event(probe_subsys_event, NULL);
37 WARN_ON(ret);
38 ret = register_trace_subsys_eventb(probe_subsys_eventb, NULL);
39 WARN_ON(ret);
40
41 return 0;
42}
43
44module_init(tp_sample_trace_init);
45
46static void __exit tp_sample_trace_exit(void)
47{
48 unregister_trace_subsys_eventb(probe_subsys_eventb, NULL);
49 unregister_trace_subsys_event(probe_subsys_event, NULL);
50 tracepoint_synchronize_unregister();
51}
52
53module_exit(tp_sample_trace_exit);
54
55MODULE_LICENSE("GPL");
56MODULE_AUTHOR("Mathieu Desnoyers");
57MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c
deleted file mode 100644
index 9fcf990e5d4b..000000000000
--- a/samples/tracepoints/tracepoint-probe-sample2.c
+++ /dev/null
@@ -1,44 +0,0 @@
1/*
2 * tracepoint-probe-sample2.c
3 *
4 * 2nd sample tracepoint probes.
5 */
6
7#include <linux/module.h>
8#include <linux/fs.h>
9#include "tp-samples-trace.h"
10
11/*
12 * Here the caller only guarantees locking for struct file and struct inode.
13 * Locking must therefore be done in the probe to use the dentry.
14 */
15static void probe_subsys_event(void *ignore,
16 struct inode *inode, struct file *file)
17{
18 printk(KERN_INFO "Event is encountered with inode number %lu\n",
19 inode->i_ino);
20}
21
22static int __init tp_sample_trace_init(void)
23{
24 int ret;
25
26 ret = register_trace_subsys_event(probe_subsys_event, NULL);
27 WARN_ON(ret);
28
29 return 0;
30}
31
32module_init(tp_sample_trace_init);
33
34static void __exit tp_sample_trace_exit(void)
35{
36 unregister_trace_subsys_event(probe_subsys_event, NULL);
37 tracepoint_synchronize_unregister();
38}
39
40module_exit(tp_sample_trace_exit);
41
42MODULE_LICENSE("GPL");
43MODULE_AUTHOR("Mathieu Desnoyers");
44MODULE_DESCRIPTION("Tracepoint Probes Samples");
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
deleted file mode 100644
index f4d89e008c32..000000000000
--- a/samples/tracepoints/tracepoint-sample.c
+++ /dev/null
@@ -1,57 +0,0 @@
1/* tracepoint-sample.c
2 *
3 * Executes a tracepoint when /proc/tracepoint-sample is opened.
4 *
5 * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
6 *
7 * This file is released under the GPLv2.
8 * See the file COPYING for more details.
9 */
10
11#include <linux/module.h>
12#include <linux/sched.h>
13#include <linux/proc_fs.h>
14#include "tp-samples-trace.h"
15
16DEFINE_TRACE(subsys_event);
17DEFINE_TRACE(subsys_eventb);
18
19struct proc_dir_entry *pentry_sample;
20
21static int my_open(struct inode *inode, struct file *file)
22{
23 int i;
24
25 trace_subsys_event(inode, file);
26 for (i = 0; i < 10; i++)
27 trace_subsys_eventb();
28 return -EPERM;
29}
30
31static const struct file_operations mark_ops = {
32 .open = my_open,
33 .llseek = noop_llseek,
34};
35
36static int __init sample_init(void)
37{
38 printk(KERN_ALERT "sample init\n");
39 pentry_sample = proc_create("tracepoint-sample", 0444, NULL,
40 &mark_ops);
41 if (!pentry_sample)
42 return -EPERM;
43 return 0;
44}
45
46static void __exit sample_exit(void)
47{
48 printk(KERN_ALERT "sample exit\n");
49 remove_proc_entry("tracepoint-sample", NULL);
50}
51
52module_init(sample_init)
53module_exit(sample_exit)
54
55MODULE_LICENSE("GPL");
56MODULE_AUTHOR("Mathieu Desnoyers");
57MODULE_DESCRIPTION("Tracepoint sample");
diff --git a/tools/Makefile b/tools/Makefile
index 1f9a529fe544..798fa0ef048e 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -15,7 +15,7 @@ help:
15 @echo ' x86_energy_perf_policy - Intel energy policy tool' 15 @echo ' x86_energy_perf_policy - Intel energy policy tool'
16 @echo '' 16 @echo ''
17 @echo 'You can do:' 17 @echo 'You can do:'
18 @echo ' $$ make -C tools/<tool>_install' 18 @echo ' $$ make -C tools/ <tool>_install'
19 @echo '' 19 @echo ''
20 @echo ' from the kernel command line to build and install one of' 20 @echo ' from the kernel command line to build and install one of'
21 @echo ' the tools above' 21 @echo ' the tools above'
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 5a824e355d04..82b0606dcb8a 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -13,8 +13,7 @@
13 * GNU Lesser General Public License for more details. 13 * GNU Lesser General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU Lesser General Public 15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program; if not, write to the Free Software 16 * License along with this program; if not, see <http://www.gnu.org/licenses>
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 * 17 *
19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 18 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 * 19 *
@@ -1224,6 +1223,34 @@ static int field_is_long(struct format_field *field)
1224 return 0; 1223 return 0;
1225} 1224}
1226 1225
1226static unsigned int type_size(const char *name)
1227{
1228 /* This covers all FIELD_IS_STRING types. */
1229 static struct {
1230 const char *type;
1231 unsigned int size;
1232 } table[] = {
1233 { "u8", 1 },
1234 { "u16", 2 },
1235 { "u32", 4 },
1236 { "u64", 8 },
1237 { "s8", 1 },
1238 { "s16", 2 },
1239 { "s32", 4 },
1240 { "s64", 8 },
1241 { "char", 1 },
1242 { },
1243 };
1244 int i;
1245
1246 for (i = 0; table[i].type; i++) {
1247 if (!strcmp(table[i].type, name))
1248 return table[i].size;
1249 }
1250
1251 return 0;
1252}
1253
1227static int event_read_fields(struct event_format *event, struct format_field **fields) 1254static int event_read_fields(struct event_format *event, struct format_field **fields)
1228{ 1255{
1229 struct format_field *field = NULL; 1256 struct format_field *field = NULL;
@@ -1233,6 +1260,8 @@ static int event_read_fields(struct event_format *event, struct format_field **f
1233 int count = 0; 1260 int count = 0;
1234 1261
1235 do { 1262 do {
1263 unsigned int size_dynamic = 0;
1264
1236 type = read_token(&token); 1265 type = read_token(&token);
1237 if (type == EVENT_NEWLINE) { 1266 if (type == EVENT_NEWLINE) {
1238 free_token(token); 1267 free_token(token);
@@ -1391,6 +1420,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
1391 field->type = new_type; 1420 field->type = new_type;
1392 strcat(field->type, " "); 1421 strcat(field->type, " ");
1393 strcat(field->type, field->name); 1422 strcat(field->type, field->name);
1423 size_dynamic = type_size(field->name);
1394 free_token(field->name); 1424 free_token(field->name);
1395 strcat(field->type, brackets); 1425 strcat(field->type, brackets);
1396 field->name = token; 1426 field->name = token;
@@ -1463,7 +1493,8 @@ static int event_read_fields(struct event_format *event, struct format_field **f
1463 if (read_expect_type(EVENT_ITEM, &token)) 1493 if (read_expect_type(EVENT_ITEM, &token))
1464 goto fail; 1494 goto fail;
1465 1495
1466 /* add signed type */ 1496 if (strtoul(token, NULL, 0))
1497 field->flags |= FIELD_IS_SIGNED;
1467 1498
1468 free_token(token); 1499 free_token(token);
1469 if (read_expected(EVENT_OP, ";") < 0) 1500 if (read_expected(EVENT_OP, ";") < 0)
@@ -1478,10 +1509,14 @@ static int event_read_fields(struct event_format *event, struct format_field **f
1478 if (field->flags & FIELD_IS_ARRAY) { 1509 if (field->flags & FIELD_IS_ARRAY) {
1479 if (field->arraylen) 1510 if (field->arraylen)
1480 field->elementsize = field->size / field->arraylen; 1511 field->elementsize = field->size / field->arraylen;
1512 else if (field->flags & FIELD_IS_DYNAMIC)
1513 field->elementsize = size_dynamic;
1481 else if (field->flags & FIELD_IS_STRING) 1514 else if (field->flags & FIELD_IS_STRING)
1482 field->elementsize = 1; 1515 field->elementsize = 1;
1483 else 1516 else if (field->flags & FIELD_IS_LONG)
1484 field->elementsize = event->pevent->long_size; 1517 field->elementsize = event->pevent ?
1518 event->pevent->long_size :
1519 sizeof(long);
1485 } else 1520 } else
1486 field->elementsize = field->size; 1521 field->elementsize = field->size;
1487 1522
@@ -1785,6 +1820,8 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
1785 strcmp(token, "/") == 0 || 1820 strcmp(token, "/") == 0 ||
1786 strcmp(token, "<") == 0 || 1821 strcmp(token, "<") == 0 ||
1787 strcmp(token, ">") == 0 || 1822 strcmp(token, ">") == 0 ||
1823 strcmp(token, "<=") == 0 ||
1824 strcmp(token, ">=") == 0 ||
1788 strcmp(token, "==") == 0 || 1825 strcmp(token, "==") == 0 ||
1789 strcmp(token, "!=") == 0) { 1826 strcmp(token, "!=") == 0) {
1790 1827
@@ -2481,7 +2518,7 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char **
2481 2518
2482 free_token(token); 2519 free_token(token);
2483 arg = alloc_arg(); 2520 arg = alloc_arg();
2484 if (!field) { 2521 if (!arg) {
2485 do_warning("%s: not enough memory!", __func__); 2522 do_warning("%s: not enough memory!", __func__);
2486 *tok = NULL; 2523 *tok = NULL;
2487 return EVENT_ERROR; 2524 return EVENT_ERROR;
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 24a4bbabc5d5..7be7e89533e4 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -13,8 +13,7 @@
13 * GNU Lesser General Public License for more details. 13 * GNU Lesser General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU Lesser General Public 15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program; if not, write to the Free Software 16 * License along with this program; if not, see <http://www.gnu.org/licenses>
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 * 17 *
19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 18 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 */ 19 */
diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h
index bc075006966e..e76c9acb92cd 100644
--- a/tools/lib/traceevent/event-utils.h
+++ b/tools/lib/traceevent/event-utils.h
@@ -13,8 +13,7 @@
13 * GNU Lesser General Public License for more details. 13 * GNU Lesser General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU Lesser General Public 15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program; if not, write to the Free Software 16 * License along with this program; if not, see <http://www.gnu.org/licenses>
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 * 17 *
19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 18 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 */ 19 */
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 5ea4326ad11f..2500e75583fc 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -13,8 +13,7 @@
13 * GNU Lesser General Public License for more details. 13 * GNU Lesser General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU Lesser General Public 15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program; if not, write to the Free Software 16 * License along with this program; if not, see <http://www.gnu.org/licenses>
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 * 17 *
19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 18 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 */ 19 */
diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c
index f023a133abb6..bba701cf10e6 100644
--- a/tools/lib/traceevent/parse-utils.c
+++ b/tools/lib/traceevent/parse-utils.c
@@ -1,3 +1,22 @@
1/*
2 * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
3 *
4 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation;
8 * version 2.1 of the License (not later!)
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program; if not, see <http://www.gnu.org/licenses>
17 *
18 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
19 */
1#include <stdio.h> 20#include <stdio.h>
2#include <stdlib.h> 21#include <stdlib.h>
3#include <string.h> 22#include <string.h>
diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c
index b1ccc923e8a5..a57db805136a 100644
--- a/tools/lib/traceevent/trace-seq.c
+++ b/tools/lib/traceevent/trace-seq.c
@@ -13,8 +13,7 @@
13 * GNU Lesser General Public License for more details. 13 * GNU Lesser General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU Lesser General Public 15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program; if not, write to the Free Software 16 * License along with this program; if not, see <http://www.gnu.org/licenses>
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 * 17 *
19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 18 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 */ 19 */
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index ef6d22e879eb..eb30044a922a 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -222,10 +222,14 @@ install-pdf: pdf
222#install-html: html 222#install-html: html
223# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) 223# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
224 224
225ifneq ($(MAKECMDGOALS),clean)
226ifneq ($(MAKECMDGOALS),tags)
225$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE 227$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
226 $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE 228 $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE
227 229
228-include $(OUTPUT)PERF-VERSION-FILE 230-include $(OUTPUT)PERF-VERSION-FILE
231endif
232endif
229 233
230# 234#
231# Determine "include::" file references in asciidoc files. 235# Determine "include::" file references in asciidoc files.
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index c8ffd9fd5c6a..5ad07ef417f0 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -61,11 +61,13 @@ OPTIONS
61 61
62--stdio:: Use the stdio interface. 62--stdio:: Use the stdio interface.
63 63
64--tui:: Use the TUI interface Use of --tui requires a tty, if one is not 64--tui:: Use the TUI interface. Use of --tui requires a tty, if one is not
65 present, as when piping to other commands, the stdio interface is 65 present, as when piping to other commands, the stdio interface is
66 used. This interfaces starts by centering on the line with more 66 used. This interfaces starts by centering on the line with more
67 samples, TAB/UNTAB cycles through the lines with more samples. 67 samples, TAB/UNTAB cycles through the lines with more samples.
68 68
69--gtk:: Use the GTK interface.
70
69-C:: 71-C::
70--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can 72--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
71 be provided as a comma-separated list with no space: 0,1. Ranges of 73 be provided as a comma-separated list with no space: 0,1. Ranges of
@@ -88,6 +90,9 @@ OPTIONS
88--objdump=<path>:: 90--objdump=<path>::
89 Path to objdump binary. 91 Path to objdump binary.
90 92
93--skip-missing::
94 Skip symbols that cannot be annotated.
95
91SEE ALSO 96SEE ALSO
92-------- 97--------
93linkperf:perf-record[1], linkperf:perf-report[1] 98linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index c1057701a7dc..e9a8349a7172 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -24,6 +24,13 @@ OPTIONS
24-r:: 24-r::
25--remove=:: 25--remove=::
26 Remove specified file from the cache. 26 Remove specified file from the cache.
27-M::
28--missing=::
29 List missing build ids in the cache for the specified file.
30-u::
31--update::
32 Update specified file of the cache. It can be used to update kallsyms
33 kernel dso to vmlinux in order to support annotation.
27-v:: 34-v::
28--verbose:: 35--verbose::
29 Be more verbose. 36 Be more verbose.
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index 194f37d635df..5b3123d5721f 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -22,10 +22,6 @@ specified perf.data files.
22 22
23OPTIONS 23OPTIONS
24------- 24-------
25-M::
26--displacement::
27 Show position displacement relative to baseline.
28
29-D:: 25-D::
30--dump-raw-trace:: 26--dump-raw-trace::
31 Dump raw trace in ASCII. 27 Dump raw trace in ASCII.
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
index 15217345c2fa..1ceb3700ffbb 100644
--- a/tools/perf/Documentation/perf-evlist.txt
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -28,6 +28,10 @@ OPTIONS
28--verbose=:: 28--verbose=::
29 Show all fields. 29 Show all fields.
30 30
31-g::
32--group::
33 Show event group information.
34
31SEE ALSO 35SEE ALSO
32-------- 36--------
33linkperf:perf-record[1], linkperf:perf-list[1], 37linkperf:perf-record[1], linkperf:perf-list[1],
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index f4d91bebd59d..02284a0067f0 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -57,11 +57,44 @@ OPTIONS
57 57
58-s:: 58-s::
59--sort=:: 59--sort=::
60 Sort by key(s): pid, comm, dso, symbol, parent, srcline. 60 Sort histogram entries by given key(s) - multiple keys can be specified
61 in CSV format. Following sort keys are available:
62 pid, comm, dso, symbol, parent, cpu, srcline.
63
64 Each key has following meaning:
65
66 - comm: command (name) of the task which can be read via /proc/<pid>/comm
67 - pid: command and tid of the task
68 - dso: name of library or module executed at the time of sample
69 - symbol: name of function executed at the time of sample
70 - parent: name of function matched to the parent regex filter. Unmatched
71 entries are displayed as "[other]".
72 - cpu: cpu number the task ran at the time of sample
73 - srcline: filename and line number executed at the time of sample. The
74 DWARF debuggin info must be provided.
75
76 By default, comm, dso and symbol keys are used.
77 (i.e. --sort comm,dso,symbol)
78
79 If --branch-stack option is used, following sort keys are also
80 available:
81 dso_from, dso_to, symbol_from, symbol_to, mispredict.
82
83 - dso_from: name of library or module branched from
84 - dso_to: name of library or module branched to
85 - symbol_from: name of function branched from
86 - symbol_to: name of function branched to
87 - mispredict: "N" for predicted branch, "Y" for mispredicted branch
88
89 And default sort keys are changed to comm, dso_from, symbol_from, dso_to
90 and symbol_to, see '--branch-stack'.
61 91
62-p:: 92-p::
63--parent=<regex>:: 93--parent=<regex>::
64 regex filter to identify parent, see: '--sort parent' 94 A regex filter to identify parent. The parent is a caller of this
95 function and searched through the callchain, thus it requires callchain
96 information recorded. The pattern is in the exteneded regex format and
97 defaults to "\^sys_|^do_page_fault", see '--sort parent'.
65 98
66-x:: 99-x::
67--exclude-other:: 100--exclude-other::
@@ -74,7 +107,6 @@ OPTIONS
74 107
75-t:: 108-t::
76--field-separator=:: 109--field-separator=::
77
78 Use a special separator character and don't pad with spaces, replacing 110 Use a special separator character and don't pad with spaces, replacing
79 all occurrences of this separator in symbol names (and other output) 111 all occurrences of this separator in symbol names (and other output)
80 with a '.' character, that thus it's the only non valid separator. 112 with a '.' character, that thus it's the only non valid separator.
@@ -171,6 +203,9 @@ OPTIONS
171--objdump=<path>:: 203--objdump=<path>::
172 Path to objdump binary. 204 Path to objdump binary.
173 205
206--group::
207 Show event group information together.
208
174SEE ALSO 209SEE ALSO
175-------- 210--------
176linkperf:perf-stat[1], linkperf:perf-annotate[1] 211linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
index a4027f221a53..9f1f054b8432 100644
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -336,7 +336,6 @@ scripts listed by the 'perf script -l' command e.g.:
336---- 336----
337root@tropicana:~# perf script -l 337root@tropicana:~# perf script -l
338List of available trace scripts: 338List of available trace scripts:
339 workqueue-stats workqueue stats (ins/exe/create/destroy)
340 wakeup-latency system-wide min/max/avg wakeup latency 339 wakeup-latency system-wide min/max/avg wakeup latency
341 rw-by-file <comm> r/w activity for a program, by file 340 rw-by-file <comm> r/w activity for a program, by file
342 rw-by-pid system-wide r/w activity 341 rw-by-pid system-wide r/w activity
@@ -402,7 +401,6 @@ should show a new entry for your script:
402---- 401----
403root@tropicana:~# perf script -l 402root@tropicana:~# perf script -l
404List of available trace scripts: 403List of available trace scripts:
405 workqueue-stats workqueue stats (ins/exe/create/destroy)
406 wakeup-latency system-wide min/max/avg wakeup latency 404 wakeup-latency system-wide min/max/avg wakeup latency
407 rw-by-file <comm> r/w activity for a program, by file 405 rw-by-file <comm> r/w activity for a program, by file
408 rw-by-pid system-wide r/w activity 406 rw-by-pid system-wide r/w activity
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index cf0c3107e06e..faf4f4feebcc 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -114,6 +114,17 @@ with it. --append may be used here. Examples:
114 114
115perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage 115perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
116 116
117-I msecs::
118--interval-print msecs::
119 Print count deltas every N milliseconds (minimum: 100ms)
120 example: perf stat -I 1000 -e cycles -a sleep 5
121
122--aggr-socket::
123Aggregate counts per processor socket for system-wide mode measurements. This
124is a useful mode to detect imbalance between sockets. To enable this mode,
125use --aggr-socket in addition to -a. (system-wide). The output includes the
126socket number and the number of online processors on that socket. This is
127useful to gauge the amount of aggregation.
117 128
118EXAMPLES 129EXAMPLES
119-------- 130--------
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
index b24ac40fcd58..d1d3e5121f89 100644
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -23,6 +23,10 @@ from 'perf test list'.
23 23
24OPTIONS 24OPTIONS
25------- 25-------
26-s::
27--skip::
28 Tests to skip (comma separater numeric list).
29
26-v:: 30-v::
27--verbose:: 31--verbose::
28 Be more verbose. 32 Be more verbose.
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 5b80d84d6b4a..a414bc95fd52 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -60,7 +60,7 @@ Default is to monitor all CPUS.
60 60
61-i:: 61-i::
62--inherit:: 62--inherit::
63 Child tasks inherit counters, only makes sens with -p option. 63 Child tasks do not inherit counters.
64 64
65-k <path>:: 65-k <path>::
66--vmlinux=<path>:: 66--vmlinux=<path>::
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 8ab05e543ef4..a2108ca1cc17 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -47,10 +47,11 @@ include config/utilities.mak
47# backtrace post unwind. 47# backtrace post unwind.
48# 48#
49# Define NO_BACKTRACE if you do not want stack backtrace debug feature 49# Define NO_BACKTRACE if you do not want stack backtrace debug feature
50#
51# Define NO_LIBNUMA if you do not want numa perf benchmark
50 52
51$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE 53$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
52 @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) 54 @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
53-include $(OUTPUT)PERF-VERSION-FILE
54 55
55uname_M := $(shell uname -m 2>/dev/null || echo not) 56uname_M := $(shell uname -m 2>/dev/null || echo not)
56 57
@@ -148,13 +149,25 @@ RM = rm -f
148MKDIR = mkdir 149MKDIR = mkdir
149FIND = find 150FIND = find
150INSTALL = install 151INSTALL = install
152FLEX = flex
153BISON= bison
151 154
152# sparse is architecture-neutral, which means that we need to tell it 155# sparse is architecture-neutral, which means that we need to tell it
153# explicitly what architecture to check for. Fix this up for yours.. 156# explicitly what architecture to check for. Fix this up for yours..
154SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ 157SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
155 158
159ifneq ($(MAKECMDGOALS),clean)
160ifneq ($(MAKECMDGOALS),tags)
156-include config/feature-tests.mak 161-include config/feature-tests.mak
157 162
163ifeq ($(call get-executable,$(FLEX)),)
164 dummy := $(error Error: $(FLEX) is missing on this system, please install it)
165endif
166
167ifeq ($(call get-executable,$(BISON)),)
168 dummy := $(error Error: $(BISON) is missing on this system, please install it)
169endif
170
158ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y) 171ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y)
159 CFLAGS := $(CFLAGS) -fstack-protector-all 172 CFLAGS := $(CFLAGS) -fstack-protector-all
160endif 173endif
@@ -206,6 +219,8 @@ ifeq ($(call try-cc,$(SOURCE_BIONIC),$(CFLAGS),bionic),y)
206 EXTLIBS := $(filter-out -lpthread,$(EXTLIBS)) 219 EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
207 BASIC_CFLAGS += -I. 220 BASIC_CFLAGS += -I.
208endif 221endif
222endif # MAKECMDGOALS != tags
223endif # MAKECMDGOALS != clean
209 224
210# Guard against environment variables 225# Guard against environment variables
211BUILTIN_OBJS = 226BUILTIN_OBJS =
@@ -230,11 +245,19 @@ endif
230LIBTRACEEVENT = $(TE_PATH)libtraceevent.a 245LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
231TE_LIB := -L$(TE_PATH) -ltraceevent 246TE_LIB := -L$(TE_PATH) -ltraceevent
232 247
248export LIBTRACEEVENT
249
250# python extension build directories
251PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
252PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
253PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
254export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
255
256python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
257
233PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) 258PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
234PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py 259PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py
235 260
236export LIBTRACEEVENT
237
238$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) 261$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
239 $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ 262 $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
240 --quiet build_ext; \ 263 --quiet build_ext; \
@@ -269,20 +292,17 @@ endif
269 292
270export PERL_PATH 293export PERL_PATH
271 294
272FLEX = flex
273BISON= bison
274
275$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c 295$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
276 $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c 296 $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c
277 297
278$(OUTPUT)util/parse-events-bison.c: util/parse-events.y 298$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
279 $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c 299 $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
280 300
281$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c 301$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
282 $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c 302 $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c
283 303
284$(OUTPUT)util/pmu-bison.c: util/pmu.y 304$(OUTPUT)util/pmu-bison.c: util/pmu.y
285 $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c 305 $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
286 306
287$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c 307$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
288$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c 308$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
@@ -378,8 +398,11 @@ LIB_H += util/rblist.h
378LIB_H += util/intlist.h 398LIB_H += util/intlist.h
379LIB_H += util/perf_regs.h 399LIB_H += util/perf_regs.h
380LIB_H += util/unwind.h 400LIB_H += util/unwind.h
381LIB_H += ui/helpline.h
382LIB_H += util/vdso.h 401LIB_H += util/vdso.h
402LIB_H += ui/helpline.h
403LIB_H += ui/progress.h
404LIB_H += ui/util.h
405LIB_H += ui/ui.h
383 406
384LIB_OBJS += $(OUTPUT)util/abspath.o 407LIB_OBJS += $(OUTPUT)util/abspath.o
385LIB_OBJS += $(OUTPUT)util/alias.o 408LIB_OBJS += $(OUTPUT)util/alias.o
@@ -453,6 +476,7 @@ LIB_OBJS += $(OUTPUT)util/stat.o
453LIB_OBJS += $(OUTPUT)ui/setup.o 476LIB_OBJS += $(OUTPUT)ui/setup.o
454LIB_OBJS += $(OUTPUT)ui/helpline.o 477LIB_OBJS += $(OUTPUT)ui/helpline.o
455LIB_OBJS += $(OUTPUT)ui/progress.o 478LIB_OBJS += $(OUTPUT)ui/progress.o
479LIB_OBJS += $(OUTPUT)ui/util.o
456LIB_OBJS += $(OUTPUT)ui/hist.o 480LIB_OBJS += $(OUTPUT)ui/hist.o
457LIB_OBJS += $(OUTPUT)ui/stdio/hist.o 481LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
458 482
@@ -471,7 +495,8 @@ LIB_OBJS += $(OUTPUT)tests/rdpmc.o
471LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o 495LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
472LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o 496LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
473LIB_OBJS += $(OUTPUT)tests/pmu.o 497LIB_OBJS += $(OUTPUT)tests/pmu.o
474LIB_OBJS += $(OUTPUT)tests/util.o 498LIB_OBJS += $(OUTPUT)tests/hists_link.o
499LIB_OBJS += $(OUTPUT)tests/python-use.o
475 500
476BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o 501BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
477BUILTIN_OBJS += $(OUTPUT)builtin-bench.o 502BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
@@ -510,14 +535,13 @@ PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT)
510# 535#
511# Platform specific tweaks 536# Platform specific tweaks
512# 537#
538ifneq ($(MAKECMDGOALS),clean)
539ifneq ($(MAKECMDGOALS),tags)
513 540
514# We choose to avoid "if .. else if .. else .. endif endif" 541# We choose to avoid "if .. else if .. else .. endif endif"
515# because maintaining the nesting to match is a pain. If 542# because maintaining the nesting to match is a pain. If
516# we had "elif" things would have been much nicer... 543# we had "elif" things would have been much nicer...
517 544
518-include config.mak.autogen
519-include config.mak
520
521ifdef NO_LIBELF 545ifdef NO_LIBELF
522 NO_DWARF := 1 546 NO_DWARF := 1
523 NO_DEMANGLE := 1 547 NO_DEMANGLE := 1
@@ -557,6 +581,11 @@ else
557endif # SOURCE_LIBELF 581endif # SOURCE_LIBELF
558endif # NO_LIBELF 582endif # NO_LIBELF
559 583
584# There's only x86 (both 32 and 64) support for CFI unwind so far
585ifneq ($(ARCH),x86)
586 NO_LIBUNWIND := 1
587endif
588
560ifndef NO_LIBUNWIND 589ifndef NO_LIBUNWIND
561# for linking with debug library, run like: 590# for linking with debug library, run like:
562# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ 591# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
@@ -646,7 +675,6 @@ ifndef NO_NEWT
646 LIB_OBJS += $(OUTPUT)ui/browsers/hists.o 675 LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
647 LIB_OBJS += $(OUTPUT)ui/browsers/map.o 676 LIB_OBJS += $(OUTPUT)ui/browsers/map.o
648 LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o 677 LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o
649 LIB_OBJS += $(OUTPUT)ui/util.o
650 LIB_OBJS += $(OUTPUT)ui/tui/setup.o 678 LIB_OBJS += $(OUTPUT)ui/tui/setup.o
651 LIB_OBJS += $(OUTPUT)ui/tui/util.o 679 LIB_OBJS += $(OUTPUT)ui/tui/util.o
652 LIB_OBJS += $(OUTPUT)ui/tui/helpline.o 680 LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
@@ -655,9 +683,6 @@ ifndef NO_NEWT
655 LIB_H += ui/browsers/map.h 683 LIB_H += ui/browsers/map.h
656 LIB_H += ui/keysyms.h 684 LIB_H += ui/keysyms.h
657 LIB_H += ui/libslang.h 685 LIB_H += ui/libslang.h
658 LIB_H += ui/progress.h
659 LIB_H += ui/util.h
660 LIB_H += ui/ui.h
661 endif 686 endif
662endif 687endif
663 688
@@ -673,14 +698,12 @@ ifndef NO_GTK2
673 BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null) 698 BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null)
674 EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null) 699 EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null)
675 LIB_OBJS += $(OUTPUT)ui/gtk/browser.o 700 LIB_OBJS += $(OUTPUT)ui/gtk/browser.o
701 LIB_OBJS += $(OUTPUT)ui/gtk/hists.o
676 LIB_OBJS += $(OUTPUT)ui/gtk/setup.o 702 LIB_OBJS += $(OUTPUT)ui/gtk/setup.o
677 LIB_OBJS += $(OUTPUT)ui/gtk/util.o 703 LIB_OBJS += $(OUTPUT)ui/gtk/util.o
678 LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o 704 LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o
679 LIB_OBJS += $(OUTPUT)ui/gtk/progress.o 705 LIB_OBJS += $(OUTPUT)ui/gtk/progress.o
680 # Make sure that it'd be included only once. 706 LIB_OBJS += $(OUTPUT)ui/gtk/annotate.o
681 ifeq ($(findstring -DNEWT_SUPPORT,$(BASIC_CFLAGS)),)
682 LIB_OBJS += $(OUTPUT)ui/util.o
683 endif
684 endif 707 endif
685endif 708endif
686 709
@@ -707,7 +730,7 @@ disable-python = $(eval $(disable-python_code))
707define disable-python_code 730define disable-python_code
708 BASIC_CFLAGS += -DNO_LIBPYTHON 731 BASIC_CFLAGS += -DNO_LIBPYTHON
709 $(if $(1),$(warning No $(1) was found)) 732 $(if $(1),$(warning No $(1) was found))
710 $(warning Python support won't be built) 733 $(warning Python support will not be built)
711endef 734endef
712 735
713override PYTHON := \ 736override PYTHON := \
@@ -715,19 +738,10 @@ override PYTHON := \
715 738
716ifndef PYTHON 739ifndef PYTHON
717 $(call disable-python,python interpreter) 740 $(call disable-python,python interpreter)
718 python-clean :=
719else 741else
720 742
721 PYTHON_WORD := $(call shell-wordify,$(PYTHON)) 743 PYTHON_WORD := $(call shell-wordify,$(PYTHON))
722 744
723 # python extension build directories
724 PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
725 PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
726 PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
727 export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
728
729 python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
730
731 ifdef NO_LIBPYTHON 745 ifdef NO_LIBPYTHON
732 $(call disable-python) 746 $(call disable-python)
733 else 747 else
@@ -839,10 +853,24 @@ ifndef NO_BACKTRACE
839 endif 853 endif
840endif 854endif
841 855
856ifndef NO_LIBNUMA
857 FLAGS_LIBNUMA = $(ALL_CFLAGS) $(ALL_LDFLAGS) -lnuma
858 ifneq ($(call try-cc,$(SOURCE_LIBNUMA),$(FLAGS_LIBNUMA),libnuma),y)
859 msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev);
860 else
861 BASIC_CFLAGS += -DLIBNUMA_SUPPORT
862 BUILTIN_OBJS += $(OUTPUT)bench/numa.o
863 EXTLIBS += -lnuma
864 endif
865endif
866
842ifdef ASCIIDOC8 867ifdef ASCIIDOC8
843 export ASCIIDOC8 868 export ASCIIDOC8
844endif 869endif
845 870
871endif # MAKECMDGOALS != tags
872endif # MAKECMDGOALS != clean
873
846# Shell quote (do not use $(call) to accommodate ancient setups); 874# Shell quote (do not use $(call) to accommodate ancient setups);
847 875
848ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) 876ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
@@ -884,7 +912,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf
884 $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf 912 $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
885 913
886$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS 914$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
887 $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \ 915 $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \
888 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ 916 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
889 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@ 917 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@
890 918
@@ -948,7 +976,13 @@ $(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
948 976
949$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS 977$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS
950 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ 978 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
951 '-DBINDIR="$(bindir_SQ)"' \ 979 '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \
980 $<
981
982$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS
983 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
984 -DPYTHONPATH='"$(OUTPUT)python"' \
985 -DPYTHON='"$(PYTHON_WORD)"' \
952 $< 986 $<
953 987
954$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS 988$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
@@ -1099,7 +1133,7 @@ perfexec_instdir = $(prefix)/$(perfexecdir)
1099endif 1133endif
1100perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) 1134perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
1101 1135
1102install: all try-install-man 1136install-bin: all
1103 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' 1137 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
1104 $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)' 1138 $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
1105 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' 1139 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
@@ -1120,6 +1154,8 @@ install: all try-install-man
1120 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' 1154 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
1121 $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' 1155 $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
1122 1156
1157install: install-bin try-install-man
1158
1123install-python_ext: 1159install-python_ext:
1124 $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' 1160 $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
1125 1161
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index 3e975cb6232e..aacef07ebf31 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -155,6 +155,7 @@ static int perf_session_env__lookup_binutils_path(struct perf_session_env *env,
155 if (lookup_path(buf)) 155 if (lookup_path(buf))
156 goto out; 156 goto out;
157 free(buf); 157 free(buf);
158 buf = NULL;
158 } 159 }
159 160
160 if (!strcmp(arch, "arm")) 161 if (!strcmp(arch, "arm"))
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 8f89998eeaf4..a5223e6a7b43 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -1,6 +1,7 @@
1#ifndef BENCH_H 1#ifndef BENCH_H
2#define BENCH_H 2#define BENCH_H
3 3
4extern int bench_numa(int argc, const char **argv, const char *prefix);
4extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); 5extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
5extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); 6extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
6extern int bench_mem_memcpy(int argc, const char **argv, 7extern int bench_mem_memcpy(int argc, const char **argv,
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
new file mode 100644
index 000000000000..30d1c3225b46
--- /dev/null
+++ b/tools/perf/bench/numa.c
@@ -0,0 +1,1731 @@
1/*
2 * numa.c
3 *
4 * numa: Simulate NUMA-sensitive workload and measure their NUMA performance
5 */
6
7#include "../perf.h"
8#include "../builtin.h"
9#include "../util/util.h"
10#include "../util/parse-options.h"
11
12#include "bench.h"
13
14#include <errno.h>
15#include <sched.h>
16#include <stdio.h>
17#include <assert.h>
18#include <malloc.h>
19#include <signal.h>
20#include <stdlib.h>
21#include <string.h>
22#include <unistd.h>
23#include <pthread.h>
24#include <sys/mman.h>
25#include <sys/time.h>
26#include <sys/wait.h>
27#include <sys/prctl.h>
28#include <sys/types.h>
29
30#include <numa.h>
31#include <numaif.h>
32
33/*
34 * Regular printout to the terminal, supressed if -q is specified:
35 */
36#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
37
38/*
39 * Debug printf:
40 */
41#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
42
43struct thread_data {
44 int curr_cpu;
45 cpu_set_t bind_cpumask;
46 int bind_node;
47 u8 *process_data;
48 int process_nr;
49 int thread_nr;
50 int task_nr;
51 unsigned int loops_done;
52 u64 val;
53 u64 runtime_ns;
54 pthread_mutex_t *process_lock;
55};
56
57/* Parameters set by options: */
58
59struct params {
60 /* Startup synchronization: */
61 bool serialize_startup;
62
63 /* Task hierarchy: */
64 int nr_proc;
65 int nr_threads;
66
67 /* Working set sizes: */
68 const char *mb_global_str;
69 const char *mb_proc_str;
70 const char *mb_proc_locked_str;
71 const char *mb_thread_str;
72
73 double mb_global;
74 double mb_proc;
75 double mb_proc_locked;
76 double mb_thread;
77
78 /* Access patterns to the working set: */
79 bool data_reads;
80 bool data_writes;
81 bool data_backwards;
82 bool data_zero_memset;
83 bool data_rand_walk;
84 u32 nr_loops;
85 u32 nr_secs;
86 u32 sleep_usecs;
87
88 /* Working set initialization: */
89 bool init_zero;
90 bool init_random;
91 bool init_cpu0;
92
93 /* Misc options: */
94 int show_details;
95 int run_all;
96 int thp;
97
98 long bytes_global;
99 long bytes_process;
100 long bytes_process_locked;
101 long bytes_thread;
102
103 int nr_tasks;
104 bool show_quiet;
105
106 bool show_convergence;
107 bool measure_convergence;
108
109 int perturb_secs;
110 int nr_cpus;
111 int nr_nodes;
112
113 /* Affinity options -C and -N: */
114 char *cpu_list_str;
115 char *node_list_str;
116};
117
118
119/* Global, read-writable area, accessible to all processes and threads: */
120
121struct global_info {
122 u8 *data;
123
124 pthread_mutex_t startup_mutex;
125 int nr_tasks_started;
126
127 pthread_mutex_t startup_done_mutex;
128
129 pthread_mutex_t start_work_mutex;
130 int nr_tasks_working;
131
132 pthread_mutex_t stop_work_mutex;
133 u64 bytes_done;
134
135 struct thread_data *threads;
136
137 /* Convergence latency measurement: */
138 bool all_converged;
139 bool stop_work;
140
141 int print_once;
142
143 struct params p;
144};
145
146static struct global_info *g = NULL;
147
148static int parse_cpus_opt(const struct option *opt, const char *arg, int unset);
149static int parse_nodes_opt(const struct option *opt, const char *arg, int unset);
150
151struct params p0;
152
153static const struct option options[] = {
154 OPT_INTEGER('p', "nr_proc" , &p0.nr_proc, "number of processes"),
155 OPT_INTEGER('t', "nr_threads" , &p0.nr_threads, "number of threads per process"),
156
157 OPT_STRING('G', "mb_global" , &p0.mb_global_str, "MB", "global memory (MBs)"),
158 OPT_STRING('P', "mb_proc" , &p0.mb_proc_str, "MB", "process memory (MBs)"),
159 OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
160 OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"),
161
162 OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run"),
163 OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run"),
164 OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"),
165
166 OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"),
167 OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"),
168 OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"),
169 OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
170 OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk, "access the data with random (32bit LFSR) walk"),
171
172
173 OPT_BOOLEAN('z', "init_zero" , &p0.init_zero, "bzero the initial allocations"),
174 OPT_BOOLEAN('I', "init_random" , &p0.init_random, "randomize the contents of the initial allocations"),
175 OPT_BOOLEAN('0', "init_cpu0" , &p0.init_cpu0, "do the initial allocations on CPU#0"),
176 OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs, "perturb thread 0/0 every X secs, to test convergence stability"),
177
178 OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"),
179 OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"),
180 OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
181 OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
182 OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"),
183 OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "bzero the initial allocations"),
184 OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
185
186 /* Special option string parsing callbacks: */
187 OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]",
188 "bind the first N tasks to these specific cpus (the rest is unbound)",
189 parse_cpus_opt),
190 OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]",
191 "bind the first N tasks to these specific memory nodes (the rest is unbound)",
192 parse_nodes_opt),
193 OPT_END()
194};
195
196static const char * const bench_numa_usage[] = {
197 "perf bench numa <options>",
198 NULL
199};
200
201static const char * const numa_usage[] = {
202 "perf bench numa mem [<options>]",
203 NULL
204};
205
206static cpu_set_t bind_to_cpu(int target_cpu)
207{
208 cpu_set_t orig_mask, mask;
209 int ret;
210
211 ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
212 BUG_ON(ret);
213
214 CPU_ZERO(&mask);
215
216 if (target_cpu == -1) {
217 int cpu;
218
219 for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
220 CPU_SET(cpu, &mask);
221 } else {
222 BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
223 CPU_SET(target_cpu, &mask);
224 }
225
226 ret = sched_setaffinity(0, sizeof(mask), &mask);
227 BUG_ON(ret);
228
229 return orig_mask;
230}
231
232static cpu_set_t bind_to_node(int target_node)
233{
234 int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes;
235 cpu_set_t orig_mask, mask;
236 int cpu;
237 int ret;
238
239 BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus);
240 BUG_ON(!cpus_per_node);
241
242 ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
243 BUG_ON(ret);
244
245 CPU_ZERO(&mask);
246
247 if (target_node == -1) {
248 for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
249 CPU_SET(cpu, &mask);
250 } else {
251 int cpu_start = (target_node + 0) * cpus_per_node;
252 int cpu_stop = (target_node + 1) * cpus_per_node;
253
254 BUG_ON(cpu_stop > g->p.nr_cpus);
255
256 for (cpu = cpu_start; cpu < cpu_stop; cpu++)
257 CPU_SET(cpu, &mask);
258 }
259
260 ret = sched_setaffinity(0, sizeof(mask), &mask);
261 BUG_ON(ret);
262
263 return orig_mask;
264}
265
266static void bind_to_cpumask(cpu_set_t mask)
267{
268 int ret;
269
270 ret = sched_setaffinity(0, sizeof(mask), &mask);
271 BUG_ON(ret);
272}
273
274static void mempol_restore(void)
275{
276 int ret;
277
278 ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1);
279
280 BUG_ON(ret);
281}
282
283static void bind_to_memnode(int node)
284{
285 unsigned long nodemask;
286 int ret;
287
288 if (node == -1)
289 return;
290
291 BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask));
292 nodemask = 1L << node;
293
294 ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
295 dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
296
297 BUG_ON(ret);
298}
299
300#define HPSIZE (2*1024*1024)
301
302#define set_taskname(fmt...) \
303do { \
304 char name[20]; \
305 \
306 snprintf(name, 20, fmt); \
307 prctl(PR_SET_NAME, name); \
308} while (0)
309
310static u8 *alloc_data(ssize_t bytes0, int map_flags,
311 int init_zero, int init_cpu0, int thp, int init_random)
312{
313 cpu_set_t orig_mask;
314 ssize_t bytes;
315 u8 *buf;
316 int ret;
317
318 if (!bytes0)
319 return NULL;
320
321 /* Allocate and initialize all memory on CPU#0: */
322 if (init_cpu0) {
323 orig_mask = bind_to_node(0);
324 bind_to_memnode(0);
325 }
326
327 bytes = bytes0 + HPSIZE;
328
329 buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0);
330 BUG_ON(buf == (void *)-1);
331
332 if (map_flags == MAP_PRIVATE) {
333 if (thp > 0) {
334 ret = madvise(buf, bytes, MADV_HUGEPAGE);
335 if (ret && !g->print_once) {
336 g->print_once = 1;
337 printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n");
338 }
339 }
340 if (thp < 0) {
341 ret = madvise(buf, bytes, MADV_NOHUGEPAGE);
342 if (ret && !g->print_once) {
343 g->print_once = 1;
344 printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n");
345 }
346 }
347 }
348
349 if (init_zero) {
350 bzero(buf, bytes);
351 } else {
352 /* Initialize random contents, different in each word: */
353 if (init_random) {
354 u64 *wbuf = (void *)buf;
355 long off = rand();
356 long i;
357
358 for (i = 0; i < bytes/8; i++)
359 wbuf[i] = i + off;
360 }
361 }
362
363 /* Align to 2MB boundary: */
364 buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1));
365
366 /* Restore affinity: */
367 if (init_cpu0) {
368 bind_to_cpumask(orig_mask);
369 mempol_restore();
370 }
371
372 return buf;
373}
374
375static void free_data(void *data, ssize_t bytes)
376{
377 int ret;
378
379 if (!data)
380 return;
381
382 ret = munmap(data, bytes);
383 BUG_ON(ret);
384}
385
386/*
387 * Create a shared memory buffer that can be shared between processes, zeroed:
388 */
389static void * zalloc_shared_data(ssize_t bytes)
390{
391 return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0, g->p.thp, g->p.init_random);
392}
393
394/*
395 * Create a shared memory buffer that can be shared between processes:
396 */
397static void * setup_shared_data(ssize_t bytes)
398{
399 return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
400}
401
402/*
403 * Allocate process-local memory - this will either be shared between
404 * threads of this process, or only be accessed by this thread:
405 */
406static void * setup_private_data(ssize_t bytes)
407{
408 return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
409}
410
411/*
412 * Return a process-shared (global) mutex:
413 */
414static void init_global_mutex(pthread_mutex_t *mutex)
415{
416 pthread_mutexattr_t attr;
417
418 pthread_mutexattr_init(&attr);
419 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
420 pthread_mutex_init(mutex, &attr);
421}
422
423static int parse_cpu_list(const char *arg)
424{
425 p0.cpu_list_str = strdup(arg);
426
427 dprintf("got CPU list: {%s}\n", p0.cpu_list_str);
428
429 return 0;
430}
431
432static void parse_setup_cpu_list(void)
433{
434 struct thread_data *td;
435 char *str0, *str;
436 int t;
437
438 if (!g->p.cpu_list_str)
439 return;
440
441 dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
442
443 str0 = str = strdup(g->p.cpu_list_str);
444 t = 0;
445
446 BUG_ON(!str);
447
448 tprintf("# binding tasks to CPUs:\n");
449 tprintf("# ");
450
451 while (true) {
452 int bind_cpu, bind_cpu_0, bind_cpu_1;
453 char *tok, *tok_end, *tok_step, *tok_len, *tok_mul;
454 int bind_len;
455 int step;
456 int mul;
457
458 tok = strsep(&str, ",");
459 if (!tok)
460 break;
461
462 tok_end = strstr(tok, "-");
463
464 dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
465 if (!tok_end) {
466 /* Single CPU specified: */
467 bind_cpu_0 = bind_cpu_1 = atol(tok);
468 } else {
469 /* CPU range specified (for example: "5-11"): */
470 bind_cpu_0 = atol(tok);
471 bind_cpu_1 = atol(tok_end + 1);
472 }
473
474 step = 1;
475 tok_step = strstr(tok, "#");
476 if (tok_step) {
477 step = atol(tok_step + 1);
478 BUG_ON(step <= 0 || step >= g->p.nr_cpus);
479 }
480
481 /*
482 * Mask length.
483 * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4',
484 * where the _4 means the next 4 CPUs are allowed.
485 */
486 bind_len = 1;
487 tok_len = strstr(tok, "_");
488 if (tok_len) {
489 bind_len = atol(tok_len + 1);
490 BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus);
491 }
492
493 /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
494 mul = 1;
495 tok_mul = strstr(tok, "x");
496 if (tok_mul) {
497 mul = atol(tok_mul + 1);
498 BUG_ON(mul <= 0);
499 }
500
501 dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
502
503 BUG_ON(bind_cpu_0 < 0 || bind_cpu_0 >= g->p.nr_cpus);
504 BUG_ON(bind_cpu_1 < 0 || bind_cpu_1 >= g->p.nr_cpus);
505 BUG_ON(bind_cpu_0 > bind_cpu_1);
506
507 for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
508 int i;
509
510 for (i = 0; i < mul; i++) {
511 int cpu;
512
513 if (t >= g->p.nr_tasks) {
514 printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu);
515 goto out;
516 }
517 td = g->threads + t;
518
519 if (t)
520 tprintf(",");
521 if (bind_len > 1) {
522 tprintf("%2d/%d", bind_cpu, bind_len);
523 } else {
524 tprintf("%2d", bind_cpu);
525 }
526
527 CPU_ZERO(&td->bind_cpumask);
528 for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
529 BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
530 CPU_SET(cpu, &td->bind_cpumask);
531 }
532 t++;
533 }
534 }
535 }
536out:
537
538 tprintf("\n");
539
540 if (t < g->p.nr_tasks)
541 printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
542
543 free(str0);
544}
545
546static int parse_cpus_opt(const struct option *opt __maybe_unused,
547 const char *arg, int unset __maybe_unused)
548{
549 if (!arg)
550 return -1;
551
552 return parse_cpu_list(arg);
553}
554
555static int parse_node_list(const char *arg)
556{
557 p0.node_list_str = strdup(arg);
558
559 dprintf("got NODE list: {%s}\n", p0.node_list_str);
560
561 return 0;
562}
563
564static void parse_setup_node_list(void)
565{
566 struct thread_data *td;
567 char *str0, *str;
568 int t;
569
570 if (!g->p.node_list_str)
571 return;
572
573 dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
574
575 str0 = str = strdup(g->p.node_list_str);
576 t = 0;
577
578 BUG_ON(!str);
579
580 tprintf("# binding tasks to NODEs:\n");
581 tprintf("# ");
582
583 while (true) {
584 int bind_node, bind_node_0, bind_node_1;
585 char *tok, *tok_end, *tok_step, *tok_mul;
586 int step;
587 int mul;
588
589 tok = strsep(&str, ",");
590 if (!tok)
591 break;
592
593 tok_end = strstr(tok, "-");
594
595 dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
596 if (!tok_end) {
597 /* Single NODE specified: */
598 bind_node_0 = bind_node_1 = atol(tok);
599 } else {
600 /* NODE range specified (for example: "5-11"): */
601 bind_node_0 = atol(tok);
602 bind_node_1 = atol(tok_end + 1);
603 }
604
605 step = 1;
606 tok_step = strstr(tok, "#");
607 if (tok_step) {
608 step = atol(tok_step + 1);
609 BUG_ON(step <= 0 || step >= g->p.nr_nodes);
610 }
611
612 /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
613 mul = 1;
614 tok_mul = strstr(tok, "x");
615 if (tok_mul) {
616 mul = atol(tok_mul + 1);
617 BUG_ON(mul <= 0);
618 }
619
620 dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
621
622 BUG_ON(bind_node_0 < 0 || bind_node_0 >= g->p.nr_nodes);
623 BUG_ON(bind_node_1 < 0 || bind_node_1 >= g->p.nr_nodes);
624 BUG_ON(bind_node_0 > bind_node_1);
625
626 for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) {
627 int i;
628
629 for (i = 0; i < mul; i++) {
630 if (t >= g->p.nr_tasks) {
631 printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
632 goto out;
633 }
634 td = g->threads + t;
635
636 if (!t)
637 tprintf(" %2d", bind_node);
638 else
639 tprintf(",%2d", bind_node);
640
641 td->bind_node = bind_node;
642 t++;
643 }
644 }
645 }
646out:
647
648 tprintf("\n");
649
650 if (t < g->p.nr_tasks)
651 printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
652
653 free(str0);
654}
655
656static int parse_nodes_opt(const struct option *opt __maybe_unused,
657 const char *arg, int unset __maybe_unused)
658{
659 if (!arg)
660 return -1;
661
662 return parse_node_list(arg);
663
664 return 0;
665}
666
667#define BIT(x) (1ul << x)
668
669static inline uint32_t lfsr_32(uint32_t lfsr)
670{
671 const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
672 return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps);
673}
674
675/*
676 * Make sure there's real data dependency to RAM (when read
677 * accesses are enabled), so the compiler, the CPU and the
678 * kernel (KSM, zero page, etc.) cannot optimize away RAM
679 * accesses:
680 */
681static inline u64 access_data(u64 *data __attribute__((unused)), u64 val)
682{
683 if (g->p.data_reads)
684 val += *data;
685 if (g->p.data_writes)
686 *data = val + 1;
687 return val;
688}
689
690/*
691 * The worker process does two types of work, a forwards going
692 * loop and a backwards going loop.
693 *
694 * We do this so that on multiprocessor systems we do not create
695 * a 'train' of processing, with highly synchronized processes,
696 * skewing the whole benchmark.
697 */
698static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val)
699{
700 long words = bytes/sizeof(u64);
701 u64 *data = (void *)__data;
702 long chunk_0, chunk_1;
703 u64 *d0, *d, *d1;
704 long off;
705 long i;
706
707 BUG_ON(!data && words);
708 BUG_ON(data && !words);
709
710 if (!data)
711 return val;
712
713 /* Very simple memset() work variant: */
714 if (g->p.data_zero_memset && !g->p.data_rand_walk) {
715 bzero(data, bytes);
716 return val;
717 }
718
719 /* Spread out by PID/TID nr and by loop nr: */
720 chunk_0 = words/nr_max;
721 chunk_1 = words/g->p.nr_loops;
722 off = nr*chunk_0 + loop*chunk_1;
723
724 while (off >= words)
725 off -= words;
726
727 if (g->p.data_rand_walk) {
728 u32 lfsr = nr + loop + val;
729 int j;
730
731 for (i = 0; i < words/1024; i++) {
732 long start, end;
733
734 lfsr = lfsr_32(lfsr);
735
736 start = lfsr % words;
737 end = min(start + 1024, words-1);
738
739 if (g->p.data_zero_memset) {
740 bzero(data + start, (end-start) * sizeof(u64));
741 } else {
742 for (j = start; j < end; j++)
743 val = access_data(data + j, val);
744 }
745 }
746 } else if (!g->p.data_backwards || (nr + loop) & 1) {
747
748 d0 = data + off;
749 d = data + off + 1;
750 d1 = data + words;
751
752 /* Process data forwards: */
753 for (;;) {
754 if (unlikely(d >= d1))
755 d = data;
756 if (unlikely(d == d0))
757 break;
758
759 val = access_data(d, val);
760
761 d++;
762 }
763 } else {
764 /* Process data backwards: */
765
766 d0 = data + off;
767 d = data + off - 1;
768 d1 = data + words;
769
770 /* Process data forwards: */
771 for (;;) {
772 if (unlikely(d < data))
773 d = data + words-1;
774 if (unlikely(d == d0))
775 break;
776
777 val = access_data(d, val);
778
779 d--;
780 }
781 }
782
783 return val;
784}
785
786static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
787{
788 unsigned int cpu;
789
790 cpu = sched_getcpu();
791
792 g->threads[task_nr].curr_cpu = cpu;
793 prctl(0, bytes_worked);
794}
795
796#define MAX_NR_NODES 64
797
798/*
799 * Count the number of nodes a process's threads
800 * are spread out on.
801 *
802 * A count of 1 means that the process is compressed
803 * to a single node. A count of g->p.nr_nodes means it's
804 * spread out on the whole system.
805 */
806static int count_process_nodes(int process_nr)
807{
808 char node_present[MAX_NR_NODES] = { 0, };
809 int nodes;
810 int n, t;
811
812 for (t = 0; t < g->p.nr_threads; t++) {
813 struct thread_data *td;
814 int task_nr;
815 int node;
816
817 task_nr = process_nr*g->p.nr_threads + t;
818 td = g->threads + task_nr;
819
820 node = numa_node_of_cpu(td->curr_cpu);
821 node_present[node] = 1;
822 }
823
824 nodes = 0;
825
826 for (n = 0; n < MAX_NR_NODES; n++)
827 nodes += node_present[n];
828
829 return nodes;
830}
831
832/*
833 * Count the number of distinct process-threads a node contains.
834 *
835 * A count of 1 means that the node contains only a single
836 * process. If all nodes on the system contain at most one
837 * process then we are well-converged.
838 */
839static int count_node_processes(int node)
840{
841 int processes = 0;
842 int t, p;
843
844 for (p = 0; p < g->p.nr_proc; p++) {
845 for (t = 0; t < g->p.nr_threads; t++) {
846 struct thread_data *td;
847 int task_nr;
848 int n;
849
850 task_nr = p*g->p.nr_threads + t;
851 td = g->threads + task_nr;
852
853 n = numa_node_of_cpu(td->curr_cpu);
854 if (n == node) {
855 processes++;
856 break;
857 }
858 }
859 }
860
861 return processes;
862}
863
864static void calc_convergence_compression(int *strong)
865{
866 unsigned int nodes_min, nodes_max;
867 int p;
868
869 nodes_min = -1;
870 nodes_max = 0;
871
872 for (p = 0; p < g->p.nr_proc; p++) {
873 unsigned int nodes = count_process_nodes(p);
874
875 nodes_min = min(nodes, nodes_min);
876 nodes_max = max(nodes, nodes_max);
877 }
878
879 /* Strong convergence: all threads compress on a single node: */
880 if (nodes_min == 1 && nodes_max == 1) {
881 *strong = 1;
882 } else {
883 *strong = 0;
884 tprintf(" {%d-%d}", nodes_min, nodes_max);
885 }
886}
887
888static void calc_convergence(double runtime_ns_max, double *convergence)
889{
890 unsigned int loops_done_min, loops_done_max;
891 int process_groups;
892 int nodes[MAX_NR_NODES];
893 int distance;
894 int nr_min;
895 int nr_max;
896 int strong;
897 int sum;
898 int nr;
899 int node;
900 int cpu;
901 int t;
902
903 if (!g->p.show_convergence && !g->p.measure_convergence)
904 return;
905
906 for (node = 0; node < g->p.nr_nodes; node++)
907 nodes[node] = 0;
908
909 loops_done_min = -1;
910 loops_done_max = 0;
911
912 for (t = 0; t < g->p.nr_tasks; t++) {
913 struct thread_data *td = g->threads + t;
914 unsigned int loops_done;
915
916 cpu = td->curr_cpu;
917
918 /* Not all threads have written it yet: */
919 if (cpu < 0)
920 continue;
921
922 node = numa_node_of_cpu(cpu);
923
924 nodes[node]++;
925
926 loops_done = td->loops_done;
927 loops_done_min = min(loops_done, loops_done_min);
928 loops_done_max = max(loops_done, loops_done_max);
929 }
930
931 nr_max = 0;
932 nr_min = g->p.nr_tasks;
933 sum = 0;
934
935 for (node = 0; node < g->p.nr_nodes; node++) {
936 nr = nodes[node];
937 nr_min = min(nr, nr_min);
938 nr_max = max(nr, nr_max);
939 sum += nr;
940 }
941 BUG_ON(nr_min > nr_max);
942
943 BUG_ON(sum > g->p.nr_tasks);
944
945 if (0 && (sum < g->p.nr_tasks))
946 return;
947
948 /*
949 * Count the number of distinct process groups present
950 * on nodes - when we are converged this will decrease
951 * to g->p.nr_proc:
952 */
953 process_groups = 0;
954
955 for (node = 0; node < g->p.nr_nodes; node++) {
956 int processes = count_node_processes(node);
957
958 nr = nodes[node];
959 tprintf(" %2d/%-2d", nr, processes);
960
961 process_groups += processes;
962 }
963
964 distance = nr_max - nr_min;
965
966 tprintf(" [%2d/%-2d]", distance, process_groups);
967
968 tprintf(" l:%3d-%-3d (%3d)",
969 loops_done_min, loops_done_max, loops_done_max-loops_done_min);
970
971 if (loops_done_min && loops_done_max) {
972 double skew = 1.0 - (double)loops_done_min/loops_done_max;
973
974 tprintf(" [%4.1f%%]", skew * 100.0);
975 }
976
977 calc_convergence_compression(&strong);
978
979 if (strong && process_groups == g->p.nr_proc) {
980 if (!*convergence) {
981 *convergence = runtime_ns_max;
982 tprintf(" (%6.1fs converged)\n", *convergence/1e9);
983 if (g->p.measure_convergence) {
984 g->all_converged = true;
985 g->stop_work = true;
986 }
987 }
988 } else {
989 if (*convergence) {
990 tprintf(" (%6.1fs de-converged)", runtime_ns_max/1e9);
991 *convergence = 0;
992 }
993 tprintf("\n");
994 }
995}
996
997static void show_summary(double runtime_ns_max, int l, double *convergence)
998{
999 tprintf("\r # %5.1f%% [%.1f mins]",
1000 (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max/1e9 / 60.0);
1001
1002 calc_convergence(runtime_ns_max, convergence);
1003
1004 if (g->p.show_details >= 0)
1005 fflush(stdout);
1006}
1007
1008static void *worker_thread(void *__tdata)
1009{
1010 struct thread_data *td = __tdata;
1011 struct timeval start0, start, stop, diff;
1012 int process_nr = td->process_nr;
1013 int thread_nr = td->thread_nr;
1014 unsigned long last_perturbance;
1015 int task_nr = td->task_nr;
1016 int details = g->p.show_details;
1017 int first_task, last_task;
1018 double convergence = 0;
1019 u64 val = td->val;
1020 double runtime_ns_max;
1021 u8 *global_data;
1022 u8 *process_data;
1023 u8 *thread_data;
1024 u64 bytes_done;
1025 long work_done;
1026 u32 l;
1027
1028 bind_to_cpumask(td->bind_cpumask);
1029 bind_to_memnode(td->bind_node);
1030
1031 set_taskname("thread %d/%d", process_nr, thread_nr);
1032
1033 global_data = g->data;
1034 process_data = td->process_data;
1035 thread_data = setup_private_data(g->p.bytes_thread);
1036
1037 bytes_done = 0;
1038
1039 last_task = 0;
1040 if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1)
1041 last_task = 1;
1042
1043 first_task = 0;
1044 if (process_nr == 0 && thread_nr == 0)
1045 first_task = 1;
1046
1047 if (details >= 2) {
1048 printf("# thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n",
1049 process_nr, thread_nr, global_data, process_data, thread_data);
1050 }
1051
1052 if (g->p.serialize_startup) {
1053 pthread_mutex_lock(&g->startup_mutex);
1054 g->nr_tasks_started++;
1055 pthread_mutex_unlock(&g->startup_mutex);
1056
1057 /* Here we will wait for the main process to start us all at once: */
1058 pthread_mutex_lock(&g->start_work_mutex);
1059 g->nr_tasks_working++;
1060
1061 /* Last one wake the main process: */
1062 if (g->nr_tasks_working == g->p.nr_tasks)
1063 pthread_mutex_unlock(&g->startup_done_mutex);
1064
1065 pthread_mutex_unlock(&g->start_work_mutex);
1066 }
1067
1068 gettimeofday(&start0, NULL);
1069
1070 start = stop = start0;
1071 last_perturbance = start.tv_sec;
1072
1073 for (l = 0; l < g->p.nr_loops; l++) {
1074 start = stop;
1075
1076 if (g->stop_work)
1077 break;
1078
1079 val += do_work(global_data, g->p.bytes_global, process_nr, g->p.nr_proc, l, val);
1080 val += do_work(process_data, g->p.bytes_process, thread_nr, g->p.nr_threads, l, val);
1081 val += do_work(thread_data, g->p.bytes_thread, 0, 1, l, val);
1082
1083 if (g->p.sleep_usecs) {
1084 pthread_mutex_lock(td->process_lock);
1085 usleep(g->p.sleep_usecs);
1086 pthread_mutex_unlock(td->process_lock);
1087 }
1088 /*
1089 * Amount of work to be done under a process-global lock:
1090 */
1091 if (g->p.bytes_process_locked) {
1092 pthread_mutex_lock(td->process_lock);
1093 val += do_work(process_data, g->p.bytes_process_locked, thread_nr, g->p.nr_threads, l, val);
1094 pthread_mutex_unlock(td->process_lock);
1095 }
1096
1097 work_done = g->p.bytes_global + g->p.bytes_process +
1098 g->p.bytes_process_locked + g->p.bytes_thread;
1099
1100 update_curr_cpu(task_nr, work_done);
1101 bytes_done += work_done;
1102
1103 if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs)
1104 continue;
1105
1106 td->loops_done = l;
1107
1108 gettimeofday(&stop, NULL);
1109
1110 /* Check whether our max runtime timed out: */
1111 if (g->p.nr_secs) {
1112 timersub(&stop, &start0, &diff);
1113 if (diff.tv_sec >= g->p.nr_secs) {
1114 g->stop_work = true;
1115 break;
1116 }
1117 }
1118
1119 /* Update the summary at most once per second: */
1120 if (start.tv_sec == stop.tv_sec)
1121 continue;
1122
1123 /*
1124 * Perturb the first task's equilibrium every g->p.perturb_secs seconds,
1125 * by migrating to CPU#0:
1126 */
1127 if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
1128 cpu_set_t orig_mask;
1129 int target_cpu;
1130 int this_cpu;
1131
1132 last_perturbance = stop.tv_sec;
1133
1134 /*
1135 * Depending on where we are running, move into
1136 * the other half of the system, to create some
1137 * real disturbance:
1138 */
1139 this_cpu = g->threads[task_nr].curr_cpu;
1140 if (this_cpu < g->p.nr_cpus/2)
1141 target_cpu = g->p.nr_cpus-1;
1142 else
1143 target_cpu = 0;
1144
1145 orig_mask = bind_to_cpu(target_cpu);
1146
1147 /* Here we are running on the target CPU already */
1148 if (details >= 1)
1149 printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
1150
1151 bind_to_cpumask(orig_mask);
1152 }
1153
1154 if (details >= 3) {
1155 timersub(&stop, &start, &diff);
1156 runtime_ns_max = diff.tv_sec * 1000000000;
1157 runtime_ns_max += diff.tv_usec * 1000;
1158
1159 if (details >= 0) {
1160 printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016lx]\n",
1161 process_nr, thread_nr, runtime_ns_max / bytes_done, val);
1162 }
1163 fflush(stdout);
1164 }
1165 if (!last_task)
1166 continue;
1167
1168 timersub(&stop, &start0, &diff);
1169 runtime_ns_max = diff.tv_sec * 1000000000ULL;
1170 runtime_ns_max += diff.tv_usec * 1000ULL;
1171
1172 show_summary(runtime_ns_max, l, &convergence);
1173 }
1174
1175 gettimeofday(&stop, NULL);
1176 timersub(&stop, &start0, &diff);
1177 td->runtime_ns = diff.tv_sec * 1000000000ULL;
1178 td->runtime_ns += diff.tv_usec * 1000ULL;
1179
1180 free_data(thread_data, g->p.bytes_thread);
1181
1182 pthread_mutex_lock(&g->stop_work_mutex);
1183 g->bytes_done += bytes_done;
1184 pthread_mutex_unlock(&g->stop_work_mutex);
1185
1186 return NULL;
1187}
1188
1189/*
1190 * A worker process starts a couple of threads:
1191 */
1192static void worker_process(int process_nr)
1193{
1194 pthread_mutex_t process_lock;
1195 struct thread_data *td;
1196 pthread_t *pthreads;
1197 u8 *process_data;
1198 int task_nr;
1199 int ret;
1200 int t;
1201
1202 pthread_mutex_init(&process_lock, NULL);
1203 set_taskname("process %d", process_nr);
1204
1205 /*
1206 * Pick up the memory policy and the CPU binding of our first thread,
1207 * so that we initialize memory accordingly:
1208 */
1209 task_nr = process_nr*g->p.nr_threads;
1210 td = g->threads + task_nr;
1211
1212 bind_to_memnode(td->bind_node);
1213 bind_to_cpumask(td->bind_cpumask);
1214
1215 pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t));
1216 process_data = setup_private_data(g->p.bytes_process);
1217
1218 if (g->p.show_details >= 3) {
1219 printf(" # process %2d global mem: %p, process mem: %p\n",
1220 process_nr, g->data, process_data);
1221 }
1222
1223 for (t = 0; t < g->p.nr_threads; t++) {
1224 task_nr = process_nr*g->p.nr_threads + t;
1225 td = g->threads + task_nr;
1226
1227 td->process_data = process_data;
1228 td->process_nr = process_nr;
1229 td->thread_nr = t;
1230 td->task_nr = task_nr;
1231 td->val = rand();
1232 td->curr_cpu = -1;
1233 td->process_lock = &process_lock;
1234
1235 ret = pthread_create(pthreads + t, NULL, worker_thread, td);
1236 BUG_ON(ret);
1237 }
1238
1239 for (t = 0; t < g->p.nr_threads; t++) {
1240 ret = pthread_join(pthreads[t], NULL);
1241 BUG_ON(ret);
1242 }
1243
1244 free_data(process_data, g->p.bytes_process);
1245 free(pthreads);
1246}
1247
1248static void print_summary(void)
1249{
1250 if (g->p.show_details < 0)
1251 return;
1252
1253 printf("\n ###\n");
1254 printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
1255 g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus);
1256 printf(" # %5dx %5ldMB global shared mem operations\n",
1257 g->p.nr_loops, g->p.bytes_global/1024/1024);
1258 printf(" # %5dx %5ldMB process shared mem operations\n",
1259 g->p.nr_loops, g->p.bytes_process/1024/1024);
1260 printf(" # %5dx %5ldMB thread local mem operations\n",
1261 g->p.nr_loops, g->p.bytes_thread/1024/1024);
1262
1263 printf(" ###\n");
1264
1265 printf("\n ###\n"); fflush(stdout);
1266}
1267
1268static void init_thread_data(void)
1269{
1270 ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
1271 int t;
1272
1273 g->threads = zalloc_shared_data(size);
1274
1275 for (t = 0; t < g->p.nr_tasks; t++) {
1276 struct thread_data *td = g->threads + t;
1277 int cpu;
1278
1279 /* Allow all nodes by default: */
1280 td->bind_node = -1;
1281
1282 /* Allow all CPUs by default: */
1283 CPU_ZERO(&td->bind_cpumask);
1284 for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
1285 CPU_SET(cpu, &td->bind_cpumask);
1286 }
1287}
1288
1289static void deinit_thread_data(void)
1290{
1291 ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
1292
1293 free_data(g->threads, size);
1294}
1295
1296static int init(void)
1297{
1298 g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0);
1299
1300 /* Copy over options: */
1301 g->p = p0;
1302
1303 g->p.nr_cpus = numa_num_configured_cpus();
1304
1305 g->p.nr_nodes = numa_max_node() + 1;
1306
1307 /* char array in count_process_nodes(): */
1308 BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
1309
1310 if (g->p.show_quiet && !g->p.show_details)
1311 g->p.show_details = -1;
1312
1313 /* Some memory should be specified: */
1314 if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str)
1315 return -1;
1316
1317 if (g->p.mb_global_str) {
1318 g->p.mb_global = atof(g->p.mb_global_str);
1319 BUG_ON(g->p.mb_global < 0);
1320 }
1321
1322 if (g->p.mb_proc_str) {
1323 g->p.mb_proc = atof(g->p.mb_proc_str);
1324 BUG_ON(g->p.mb_proc < 0);
1325 }
1326
1327 if (g->p.mb_proc_locked_str) {
1328 g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str);
1329 BUG_ON(g->p.mb_proc_locked < 0);
1330 BUG_ON(g->p.mb_proc_locked > g->p.mb_proc);
1331 }
1332
1333 if (g->p.mb_thread_str) {
1334 g->p.mb_thread = atof(g->p.mb_thread_str);
1335 BUG_ON(g->p.mb_thread < 0);
1336 }
1337
1338 BUG_ON(g->p.nr_threads <= 0);
1339 BUG_ON(g->p.nr_proc <= 0);
1340
1341 g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads;
1342
1343 g->p.bytes_global = g->p.mb_global *1024L*1024L;
1344 g->p.bytes_process = g->p.mb_proc *1024L*1024L;
1345 g->p.bytes_process_locked = g->p.mb_proc_locked *1024L*1024L;
1346 g->p.bytes_thread = g->p.mb_thread *1024L*1024L;
1347
1348 g->data = setup_shared_data(g->p.bytes_global);
1349
1350 /* Startup serialization: */
1351 init_global_mutex(&g->start_work_mutex);
1352 init_global_mutex(&g->startup_mutex);
1353 init_global_mutex(&g->startup_done_mutex);
1354 init_global_mutex(&g->stop_work_mutex);
1355
1356 init_thread_data();
1357
1358 tprintf("#\n");
1359 parse_setup_cpu_list();
1360 parse_setup_node_list();
1361 tprintf("#\n");
1362
1363 print_summary();
1364
1365 return 0;
1366}
1367
1368static void deinit(void)
1369{
1370 free_data(g->data, g->p.bytes_global);
1371 g->data = NULL;
1372
1373 deinit_thread_data();
1374
1375 free_data(g, sizeof(*g));
1376 g = NULL;
1377}
1378
1379/*
1380 * Print a short or long result, depending on the verbosity setting:
1381 */
1382static void print_res(const char *name, double val,
1383 const char *txt_unit, const char *txt_short, const char *txt_long)
1384{
1385 if (!name)
1386 name = "main,";
1387
1388 if (g->p.show_quiet)
1389 printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
1390 else
1391 printf(" %14.3f %s\n", val, txt_long);
1392}
1393
1394static int __bench_numa(const char *name)
1395{
1396 struct timeval start, stop, diff;
1397 u64 runtime_ns_min, runtime_ns_sum;
1398 pid_t *pids, pid, wpid;
1399 double delta_runtime;
1400 double runtime_avg;
1401 double runtime_sec_max;
1402 double runtime_sec_min;
1403 int wait_stat;
1404 double bytes;
1405 int i, t;
1406
1407 if (init())
1408 return -1;
1409
1410 pids = zalloc(g->p.nr_proc * sizeof(*pids));
1411 pid = -1;
1412
1413 /* All threads try to acquire it, this way we can wait for them to start up: */
1414 pthread_mutex_lock(&g->start_work_mutex);
1415
1416 if (g->p.serialize_startup) {
1417 tprintf(" #\n");
1418 tprintf(" # Startup synchronization: ..."); fflush(stdout);
1419 }
1420
1421 gettimeofday(&start, NULL);
1422
1423 for (i = 0; i < g->p.nr_proc; i++) {
1424 pid = fork();
1425 dprintf(" # process %2d: PID %d\n", i, pid);
1426
1427 BUG_ON(pid < 0);
1428 if (!pid) {
1429 /* Child process: */
1430 worker_process(i);
1431
1432 exit(0);
1433 }
1434 pids[i] = pid;
1435
1436 }
1437 /* Wait for all the threads to start up: */
1438 while (g->nr_tasks_started != g->p.nr_tasks)
1439 usleep(1000);
1440
1441 BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
1442
1443 if (g->p.serialize_startup) {
1444 double startup_sec;
1445
1446 pthread_mutex_lock(&g->startup_done_mutex);
1447
1448 /* This will start all threads: */
1449 pthread_mutex_unlock(&g->start_work_mutex);
1450
1451 /* This mutex is locked - the last started thread will wake us: */
1452 pthread_mutex_lock(&g->startup_done_mutex);
1453
1454 gettimeofday(&stop, NULL);
1455
1456 timersub(&stop, &start, &diff);
1457
1458 startup_sec = diff.tv_sec * 1000000000.0;
1459 startup_sec += diff.tv_usec * 1000.0;
1460 startup_sec /= 1e9;
1461
1462 tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
1463 tprintf(" #\n");
1464
1465 start = stop;
1466 pthread_mutex_unlock(&g->startup_done_mutex);
1467 } else {
1468 gettimeofday(&start, NULL);
1469 }
1470
1471 /* Parent process: */
1472
1473
1474 for (i = 0; i < g->p.nr_proc; i++) {
1475 wpid = waitpid(pids[i], &wait_stat, 0);
1476 BUG_ON(wpid < 0);
1477 BUG_ON(!WIFEXITED(wait_stat));
1478
1479 }
1480
1481 runtime_ns_sum = 0;
1482 runtime_ns_min = -1LL;
1483
1484 for (t = 0; t < g->p.nr_tasks; t++) {
1485 u64 thread_runtime_ns = g->threads[t].runtime_ns;
1486
1487 runtime_ns_sum += thread_runtime_ns;
1488 runtime_ns_min = min(thread_runtime_ns, runtime_ns_min);
1489 }
1490
1491 gettimeofday(&stop, NULL);
1492 timersub(&stop, &start, &diff);
1493
1494 BUG_ON(bench_format != BENCH_FORMAT_DEFAULT);
1495
1496 tprintf("\n ###\n");
1497 tprintf("\n");
1498
1499 runtime_sec_max = diff.tv_sec * 1000000000.0;
1500 runtime_sec_max += diff.tv_usec * 1000.0;
1501 runtime_sec_max /= 1e9;
1502
1503 runtime_sec_min = runtime_ns_min/1e9;
1504
1505 bytes = g->bytes_done;
1506 runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / 1e9;
1507
1508 if (g->p.measure_convergence) {
1509 print_res(name, runtime_sec_max,
1510 "secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge");
1511 }
1512
1513 print_res(name, runtime_sec_max,
1514 "secs,", "runtime-max/thread", "secs slowest (max) thread-runtime");
1515
1516 print_res(name, runtime_sec_min,
1517 "secs,", "runtime-min/thread", "secs fastest (min) thread-runtime");
1518
1519 print_res(name, runtime_avg,
1520 "secs,", "runtime-avg/thread", "secs average thread-runtime");
1521
1522 delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0;
1523 print_res(name, delta_runtime / runtime_sec_max * 100.0,
1524 "%,", "spread-runtime/thread", "% difference between max/avg runtime");
1525
1526 print_res(name, bytes / g->p.nr_tasks / 1e9,
1527 "GB,", "data/thread", "GB data processed, per thread");
1528
1529 print_res(name, bytes / 1e9,
1530 "GB,", "data-total", "GB data processed, total");
1531
1532 print_res(name, runtime_sec_max * 1e9 / (bytes / g->p.nr_tasks),
1533 "nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
1534
1535 print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
1536 "GB/sec,", "thread-speed", "GB/sec/thread speed");
1537
1538 print_res(name, bytes / runtime_sec_max / 1e9,
1539 "GB/sec,", "total-speed", "GB/sec total speed");
1540
1541 free(pids);
1542
1543 deinit();
1544
1545 return 0;
1546}
1547
1548#define MAX_ARGS 50
1549
1550static int command_size(const char **argv)
1551{
1552 int size = 0;
1553
1554 while (*argv) {
1555 size++;
1556 argv++;
1557 }
1558
1559 BUG_ON(size >= MAX_ARGS);
1560
1561 return size;
1562}
1563
1564static void init_params(struct params *p, const char *name, int argc, const char **argv)
1565{
1566 int i;
1567
1568 printf("\n # Running %s \"perf bench numa", name);
1569
1570 for (i = 0; i < argc; i++)
1571 printf(" %s", argv[i]);
1572
1573 printf("\"\n");
1574
1575 memset(p, 0, sizeof(*p));
1576
1577 /* Initialize nonzero defaults: */
1578
1579 p->serialize_startup = 1;
1580 p->data_reads = true;
1581 p->data_writes = true;
1582 p->data_backwards = true;
1583 p->data_rand_walk = true;
1584 p->nr_loops = -1;
1585 p->init_random = true;
1586}
1587
1588static int run_bench_numa(const char *name, const char **argv)
1589{
1590 int argc = command_size(argv);
1591
1592 init_params(&p0, name, argc, argv);
1593 argc = parse_options(argc, argv, options, bench_numa_usage, 0);
1594 if (argc)
1595 goto err;
1596
1597 if (__bench_numa(name))
1598 goto err;
1599
1600 return 0;
1601
1602err:
1603 usage_with_options(numa_usage, options);
1604 return -1;
1605}
1606
1607#define OPT_BW_RAM "-s", "20", "-zZq", "--thp", " 1", "--no-data_rand_walk"
1608#define OPT_BW_RAM_NOTHP OPT_BW_RAM, "--thp", "-1"
1609
1610#define OPT_CONV "-s", "100", "-zZ0qcm", "--thp", " 1"
1611#define OPT_CONV_NOTHP OPT_CONV, "--thp", "-1"
1612
1613#define OPT_BW "-s", "20", "-zZ0q", "--thp", " 1"
1614#define OPT_BW_NOTHP OPT_BW, "--thp", "-1"
1615
1616/*
1617 * The built-in test-suite executed by "perf bench numa -a".
1618 *
1619 * (A minimum of 4 nodes and 16 GB of RAM is recommended.)
1620 */
1621static const char *tests[][MAX_ARGS] = {
1622 /* Basic single-stream NUMA bandwidth measurements: */
1623 { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
1624 "-C" , "0", "-M", "0", OPT_BW_RAM },
1625 { "RAM-bw-local-NOTHP,",
1626 "mem", "-p", "1", "-t", "1", "-P", "1024",
1627 "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP },
1628 { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
1629 "-C" , "0", "-M", "1", OPT_BW_RAM },
1630
1631 /* 2-stream NUMA bandwidth measurements: */
1632 { "RAM-bw-local-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
1633 "-C", "0,2", "-M", "0x2", OPT_BW_RAM },
1634 { "RAM-bw-remote-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
1635 "-C", "0,2", "-M", "1x2", OPT_BW_RAM },
1636
1637 /* Cross-stream NUMA bandwidth measurement: */
1638 { "RAM-bw-cross,", "mem", "-p", "2", "-t", "1", "-P", "1024",
1639 "-C", "0,8", "-M", "1,0", OPT_BW_RAM },
1640
1641 /* Convergence latency measurements: */
1642 { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
1643 { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
1644 { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
1645 { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
1646 { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
1647 { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
1648 { " 4x4-convergence-NOTHP,",
1649 "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
1650 { " 4x6-convergence,", "mem", "-p", "4", "-t", "6", "-P", "1020", OPT_CONV },
1651 { " 4x8-convergence,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_CONV },
1652 { " 8x4-convergence,", "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV },
1653 { " 8x4-convergence-NOTHP,",
1654 "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
1655 { " 3x1-convergence,", "mem", "-p", "3", "-t", "1", "-P", "512", OPT_CONV },
1656 { " 4x1-convergence,", "mem", "-p", "4", "-t", "1", "-P", "512", OPT_CONV },
1657 { " 8x1-convergence,", "mem", "-p", "8", "-t", "1", "-P", "512", OPT_CONV },
1658 { "16x1-convergence,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_CONV },
1659 { "32x1-convergence,", "mem", "-p", "32", "-t", "1", "-P", "128", OPT_CONV },
1660
1661 /* Various NUMA process/thread layout bandwidth measurements: */
1662 { " 2x1-bw-process,", "mem", "-p", "2", "-t", "1", "-P", "1024", OPT_BW },
1663 { " 3x1-bw-process,", "mem", "-p", "3", "-t", "1", "-P", "1024", OPT_BW },
1664 { " 4x1-bw-process,", "mem", "-p", "4", "-t", "1", "-P", "1024", OPT_BW },
1665 { " 8x1-bw-process,", "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW },
1666 { " 8x1-bw-process-NOTHP,",
1667 "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
1668 { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
1669
1670 { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
1671 { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
1672 { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
1673 { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
1674
1675 { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
1676 { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
1677 { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
1678 { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
1679 { " 4x8-bw-thread-NOTHP,",
1680 "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
1681 { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
1682 { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
1683
1684 { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
1685 { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
1686
1687 { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
1688 { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP },
1689 { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW },
1690 { "numa01-bw-thread-NOTHP,",
1691 "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW_NOTHP },
1692};
1693
1694static int bench_all(void)
1695{
1696 int nr = ARRAY_SIZE(tests);
1697 int ret;
1698 int i;
1699
1700 ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'");
1701 BUG_ON(ret < 0);
1702
1703 for (i = 0; i < nr; i++) {
1704 if (run_bench_numa(tests[i][0], tests[i] + 1))
1705 return -1;
1706 }
1707
1708 printf("\n");
1709
1710 return 0;
1711}
1712
1713int bench_numa(int argc, const char **argv, const char *prefix __maybe_unused)
1714{
1715 init_params(&p0, "main,", argc, argv);
1716 argc = parse_options(argc, argv, options, bench_numa_usage, 0);
1717 if (argc)
1718 goto err;
1719
1720 if (p0.run_all)
1721 return bench_all();
1722
1723 if (__bench_numa(NULL))
1724 goto err;
1725
1726 return 0;
1727
1728err:
1729 usage_with_options(numa_usage, options);
1730 return -1;
1731}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index dc870cf31b79..2e6961ea3184 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -34,9 +34,10 @@
34 34
35struct perf_annotate { 35struct perf_annotate {
36 struct perf_tool tool; 36 struct perf_tool tool;
37 bool force, use_tui, use_stdio; 37 bool force, use_tui, use_stdio, use_gtk;
38 bool full_paths; 38 bool full_paths;
39 bool print_line; 39 bool print_line;
40 bool skip_missing;
40 const char *sym_hist_filter; 41 const char *sym_hist_filter;
41 const char *cpu_list; 42 const char *cpu_list;
42 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 43 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -138,9 +139,22 @@ find_next:
138 continue; 139 continue;
139 } 140 }
140 141
141 if (use_browser > 0) { 142 if (use_browser == 2) {
143 int ret;
144
145 ret = hist_entry__gtk_annotate(he, evidx, NULL);
146 if (!ret || !ann->skip_missing)
147 return;
148
149 /* skip missing symbols */
150 nd = rb_next(nd);
151 } else if (use_browser == 1) {
142 key = hist_entry__tui_annotate(he, evidx, NULL); 152 key = hist_entry__tui_annotate(he, evidx, NULL);
143 switch (key) { 153 switch (key) {
154 case -1:
155 if (!ann->skip_missing)
156 return;
157 /* fall through */
144 case K_RIGHT: 158 case K_RIGHT:
145 next = rb_next(nd); 159 next = rb_next(nd);
146 break; 160 break;
@@ -224,6 +238,10 @@ static int __cmd_annotate(struct perf_annotate *ann)
224 ui__error("The %s file has no samples!\n", session->filename); 238 ui__error("The %s file has no samples!\n", session->filename);
225 goto out_delete; 239 goto out_delete;
226 } 240 }
241
242 if (use_browser == 2)
243 perf_gtk__show_annotations();
244
227out_delete: 245out_delete:
228 /* 246 /*
229 * Speed up the exit process, for large files this can 247 * Speed up the exit process, for large files this can
@@ -270,6 +288,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
270 "be more verbose (show symbol address, etc)"), 288 "be more verbose (show symbol address, etc)"),
271 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 289 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
272 "dump raw trace in ASCII"), 290 "dump raw trace in ASCII"),
291 OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
273 OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"), 292 OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
274 OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"), 293 OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
275 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 294 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
@@ -280,6 +299,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
280 "print matching source lines (may be slow)"), 299 "print matching source lines (may be slow)"),
281 OPT_BOOLEAN('P', "full-paths", &annotate.full_paths, 300 OPT_BOOLEAN('P', "full-paths", &annotate.full_paths,
282 "Don't shorten the displayed pathnames"), 301 "Don't shorten the displayed pathnames"),
302 OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
303 "Skip symbols that cannot be annotated"),
283 OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"), 304 OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
284 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 305 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
285 "Look for files with symbols relative to this directory"), 306 "Look for files with symbols relative to this directory"),
@@ -300,6 +321,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
300 use_browser = 0; 321 use_browser = 0;
301 else if (annotate.use_tui) 322 else if (annotate.use_tui)
302 use_browser = 1; 323 use_browser = 1;
324 else if (annotate.use_gtk)
325 use_browser = 2;
303 326
304 setup_browser(true); 327 setup_browser(true);
305 328
@@ -309,7 +332,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
309 if (symbol__init() < 0) 332 if (symbol__init() < 0)
310 return -1; 333 return -1;
311 334
312 setup_sorting(annotate_usage, options); 335 if (setup_sorting() < 0)
336 usage_with_options(annotate_usage, options);
313 337
314 if (argc) { 338 if (argc) {
315 /* 339 /*
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index cae9a5fd2ecf..77298bf892b8 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -35,6 +35,18 @@ struct bench_suite {
35/* sentinel: easy for help */ 35/* sentinel: easy for help */
36#define suite_all { "all", "Test all benchmark suites", NULL } 36#define suite_all { "all", "Test all benchmark suites", NULL }
37 37
38#ifdef LIBNUMA_SUPPORT
39static struct bench_suite numa_suites[] = {
40 { "mem",
41 "Benchmark for NUMA workloads",
42 bench_numa },
43 suite_all,
44 { NULL,
45 NULL,
46 NULL }
47};
48#endif
49
38static struct bench_suite sched_suites[] = { 50static struct bench_suite sched_suites[] = {
39 { "messaging", 51 { "messaging",
40 "Benchmark for scheduler and IPC mechanisms", 52 "Benchmark for scheduler and IPC mechanisms",
@@ -68,6 +80,11 @@ struct bench_subsys {
68}; 80};
69 81
70static struct bench_subsys subsystems[] = { 82static struct bench_subsys subsystems[] = {
83#ifdef LIBNUMA_SUPPORT
84 { "numa",
85 "NUMA scheduling and MM behavior",
86 numa_suites },
87#endif
71 { "sched", 88 { "sched",
72 "scheduler and IPC mechanism", 89 "scheduler and IPC mechanism",
73 sched_suites }, 90 sched_suites },
@@ -159,6 +176,7 @@ static void all_suite(struct bench_subsys *subsys) /* FROM HERE */
159 printf("# Running %s/%s benchmark...\n", 176 printf("# Running %s/%s benchmark...\n",
160 subsys->name, 177 subsys->name,
161 suites[i].name); 178 suites[i].name);
179 fflush(stdout);
162 180
163 argv[1] = suites[i].name; 181 argv[1] = suites[i].name;
164 suites[i].fn(1, argv, NULL); 182 suites[i].fn(1, argv, NULL);
@@ -225,6 +243,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
225 printf("# Running %s/%s benchmark...\n", 243 printf("# Running %s/%s benchmark...\n",
226 subsystems[i].name, 244 subsystems[i].name,
227 subsystems[i].suites[j].name); 245 subsystems[i].suites[j].name);
246 fflush(stdout);
228 status = subsystems[i].suites[j].fn(argc - 1, 247 status = subsystems[i].suites[j].fn(argc - 1,
229 argv + 1, prefix); 248 argv + 1, prefix);
230 goto end; 249 goto end;
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index fae8b250b2ca..c96c8fa38243 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -14,6 +14,7 @@
14#include "util/parse-options.h" 14#include "util/parse-options.h"
15#include "util/strlist.h" 15#include "util/strlist.h"
16#include "util/build-id.h" 16#include "util/build-id.h"
17#include "util/session.h"
17#include "util/symbol.h" 18#include "util/symbol.h"
18 19
19static int build_id_cache__add_file(const char *filename, const char *debugdir) 20static int build_id_cache__add_file(const char *filename, const char *debugdir)
@@ -58,19 +59,89 @@ static int build_id_cache__remove_file(const char *filename,
58 return err; 59 return err;
59} 60}
60 61
62static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
63{
64 char filename[PATH_MAX];
65 u8 build_id[BUILD_ID_SIZE];
66
67 if (dso__build_id_filename(dso, filename, sizeof(filename)) &&
68 filename__read_build_id(filename, build_id,
69 sizeof(build_id)) != sizeof(build_id)) {
70 if (errno == ENOENT)
71 return false;
72
73 pr_warning("Problems with %s file, consider removing it from the cache\n",
74 filename);
75 } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) {
76 pr_warning("Problems with %s file, consider removing it from the cache\n",
77 filename);
78 }
79
80 return true;
81}
82
83static int build_id_cache__fprintf_missing(const char *filename, bool force, FILE *fp)
84{
85 struct perf_session *session = perf_session__new(filename, O_RDONLY,
86 force, false, NULL);
87 if (session == NULL)
88 return -1;
89
90 perf_session__fprintf_dsos_buildid(session, fp, dso__missing_buildid_cache, 0);
91 perf_session__delete(session);
92
93 return 0;
94}
95
96static int build_id_cache__update_file(const char *filename,
97 const char *debugdir)
98{
99 u8 build_id[BUILD_ID_SIZE];
100 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
101
102 int err;
103
104 if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
105 pr_debug("Couldn't read a build-id in %s\n", filename);
106 return -1;
107 }
108
109 build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
110 err = build_id_cache__remove_s(sbuild_id, debugdir);
111 if (!err) {
112 err = build_id_cache__add_s(sbuild_id, debugdir, filename,
113 false, false);
114 }
115 if (verbose)
116 pr_info("Updating %s %s: %s\n", sbuild_id, filename,
117 err ? "FAIL" : "Ok");
118
119 return err;
120}
121
61int cmd_buildid_cache(int argc, const char **argv, 122int cmd_buildid_cache(int argc, const char **argv,
62 const char *prefix __maybe_unused) 123 const char *prefix __maybe_unused)
63{ 124{
64 struct strlist *list; 125 struct strlist *list;
65 struct str_node *pos; 126 struct str_node *pos;
127 int ret = 0;
128 bool force = false;
66 char debugdir[PATH_MAX]; 129 char debugdir[PATH_MAX];
67 char const *add_name_list_str = NULL, 130 char const *add_name_list_str = NULL,
68 *remove_name_list_str = NULL; 131 *remove_name_list_str = NULL,
132 *missing_filename = NULL,
133 *update_name_list_str = NULL;
134
69 const struct option buildid_cache_options[] = { 135 const struct option buildid_cache_options[] = {
70 OPT_STRING('a', "add", &add_name_list_str, 136 OPT_STRING('a', "add", &add_name_list_str,
71 "file list", "file(s) to add"), 137 "file list", "file(s) to add"),
72 OPT_STRING('r', "remove", &remove_name_list_str, "file list", 138 OPT_STRING('r', "remove", &remove_name_list_str, "file list",
73 "file(s) to remove"), 139 "file(s) to remove"),
140 OPT_STRING('M', "missing", &missing_filename, "file",
141 "to find missing build ids in the cache"),
142 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
143 OPT_STRING('u', "update", &update_name_list_str, "file list",
144 "file(s) to update"),
74 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 145 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
75 OPT_END() 146 OPT_END()
76 }; 147 };
@@ -125,5 +196,26 @@ int cmd_buildid_cache(int argc, const char **argv,
125 } 196 }
126 } 197 }
127 198
128 return 0; 199 if (missing_filename)
200 ret = build_id_cache__fprintf_missing(missing_filename, force, stdout);
201
202 if (update_name_list_str) {
203 list = strlist__new(true, update_name_list_str);
204 if (list) {
205 strlist__for_each(pos, list)
206 if (build_id_cache__update_file(pos->s, debugdir)) {
207 if (errno == ENOENT) {
208 pr_debug("%s wasn't in the cache\n",
209 pos->s);
210 continue;
211 }
212 pr_warning("Couldn't update %s: %s\n",
213 pos->s, strerror(errno));
214 }
215
216 strlist__delete(list);
217 }
218 }
219
220 return ret;
129} 221}
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index a82d99fec83e..e74366a13218 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -44,23 +44,26 @@ static int filename__fprintf_build_id(const char *name, FILE *fp)
44 return fprintf(fp, "%s\n", sbuild_id); 44 return fprintf(fp, "%s\n", sbuild_id);
45} 45}
46 46
47static bool dso__skip_buildid(struct dso *dso, int with_hits)
48{
49 return with_hits && !dso->hit;
50}
51
47static int perf_session__list_build_ids(bool force, bool with_hits) 52static int perf_session__list_build_ids(bool force, bool with_hits)
48{ 53{
49 struct perf_session *session; 54 struct perf_session *session;
50 55
51 symbol__elf_init(); 56 symbol__elf_init();
52
53 session = perf_session__new(input_name, O_RDONLY, force, false,
54 &build_id__mark_dso_hit_ops);
55 if (session == NULL)
56 return -1;
57
58 /* 57 /*
59 * See if this is an ELF file first: 58 * See if this is an ELF file first:
60 */ 59 */
61 if (filename__fprintf_build_id(session->filename, stdout)) 60 if (filename__fprintf_build_id(input_name, stdout))
62 goto out; 61 goto out;
63 62
63 session = perf_session__new(input_name, O_RDONLY, force, false,
64 &build_id__mark_dso_hit_ops);
65 if (session == NULL)
66 return -1;
64 /* 67 /*
65 * in pipe-mode, the only way to get the buildids is to parse 68 * in pipe-mode, the only way to get the buildids is to parse
66 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID 69 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
@@ -68,9 +71,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
68 if (with_hits || session->fd_pipe) 71 if (with_hits || session->fd_pipe)
69 perf_session__process_events(session, &build_id__mark_dso_hit_ops); 72 perf_session__process_events(session, &build_id__mark_dso_hit_ops);
70 73
71 perf_session__fprintf_dsos_buildid(session, stdout, with_hits); 74 perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits);
72out:
73 perf_session__delete(session); 75 perf_session__delete(session);
76out:
74 return 0; 77 return 0;
75} 78}
76 79
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 93b852f8a5d5..d207a97a2db1 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -23,7 +23,6 @@ static char const *input_old = "perf.data.old",
23 *input_new = "perf.data"; 23 *input_new = "perf.data";
24static char diff__default_sort_order[] = "dso,symbol"; 24static char diff__default_sort_order[] = "dso,symbol";
25static bool force; 25static bool force;
26static bool show_displacement;
27static bool show_period; 26static bool show_period;
28static bool show_formula; 27static bool show_formula;
29static bool show_baseline_only; 28static bool show_baseline_only;
@@ -146,58 +145,47 @@ static int setup_compute(const struct option *opt, const char *str,
146 return -EINVAL; 145 return -EINVAL;
147} 146}
148 147
149static double get_period_percent(struct hist_entry *he, u64 period) 148double perf_diff__period_percent(struct hist_entry *he, u64 period)
150{ 149{
151 u64 total = he->hists->stats.total_period; 150 u64 total = he->hists->stats.total_period;
152 return (period * 100.0) / total; 151 return (period * 100.0) / total;
153} 152}
154 153
155double perf_diff__compute_delta(struct hist_entry *he) 154double perf_diff__compute_delta(struct hist_entry *he, struct hist_entry *pair)
156{ 155{
157 struct hist_entry *pair = hist_entry__next_pair(he); 156 double new_percent = perf_diff__period_percent(he, he->stat.period);
158 double new_percent = get_period_percent(he, he->stat.period); 157 double old_percent = perf_diff__period_percent(pair, pair->stat.period);
159 double old_percent = pair ? get_period_percent(pair, pair->stat.period) : 0.0;
160 158
161 he->diff.period_ratio_delta = new_percent - old_percent; 159 he->diff.period_ratio_delta = new_percent - old_percent;
162 he->diff.computed = true; 160 he->diff.computed = true;
163 return he->diff.period_ratio_delta; 161 return he->diff.period_ratio_delta;
164} 162}
165 163
166double perf_diff__compute_ratio(struct hist_entry *he) 164double perf_diff__compute_ratio(struct hist_entry *he, struct hist_entry *pair)
167{ 165{
168 struct hist_entry *pair = hist_entry__next_pair(he);
169 double new_period = he->stat.period; 166 double new_period = he->stat.period;
170 double old_period = pair ? pair->stat.period : 0; 167 double old_period = pair->stat.period;
171 168
172 he->diff.computed = true; 169 he->diff.computed = true;
173 he->diff.period_ratio = pair ? (new_period / old_period) : 0; 170 he->diff.period_ratio = new_period / old_period;
174 return he->diff.period_ratio; 171 return he->diff.period_ratio;
175} 172}
176 173
177s64 perf_diff__compute_wdiff(struct hist_entry *he) 174s64 perf_diff__compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
178{ 175{
179 struct hist_entry *pair = hist_entry__next_pair(he);
180 u64 new_period = he->stat.period; 176 u64 new_period = he->stat.period;
181 u64 old_period = pair ? pair->stat.period : 0; 177 u64 old_period = pair->stat.period;
182 178
183 he->diff.computed = true; 179 he->diff.computed = true;
184 180 he->diff.wdiff = new_period * compute_wdiff_w2 -
185 if (!pair) 181 old_period * compute_wdiff_w1;
186 he->diff.wdiff = 0;
187 else
188 he->diff.wdiff = new_period * compute_wdiff_w2 -
189 old_period * compute_wdiff_w1;
190 182
191 return he->diff.wdiff; 183 return he->diff.wdiff;
192} 184}
193 185
194static int formula_delta(struct hist_entry *he, char *buf, size_t size) 186static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
187 char *buf, size_t size)
195{ 188{
196 struct hist_entry *pair = hist_entry__next_pair(he);
197
198 if (!pair)
199 return -1;
200
201 return scnprintf(buf, size, 189 return scnprintf(buf, size,
202 "(%" PRIu64 " * 100 / %" PRIu64 ") - " 190 "(%" PRIu64 " * 100 / %" PRIu64 ") - "
203 "(%" PRIu64 " * 100 / %" PRIu64 ")", 191 "(%" PRIu64 " * 100 / %" PRIu64 ")",
@@ -205,41 +193,36 @@ static int formula_delta(struct hist_entry *he, char *buf, size_t size)
205 pair->stat.period, pair->hists->stats.total_period); 193 pair->stat.period, pair->hists->stats.total_period);
206} 194}
207 195
208static int formula_ratio(struct hist_entry *he, char *buf, size_t size) 196static int formula_ratio(struct hist_entry *he, struct hist_entry *pair,
197 char *buf, size_t size)
209{ 198{
210 struct hist_entry *pair = hist_entry__next_pair(he);
211 double new_period = he->stat.period; 199 double new_period = he->stat.period;
212 double old_period = pair ? pair->stat.period : 0; 200 double old_period = pair->stat.period;
213
214 if (!pair)
215 return -1;
216 201
217 return scnprintf(buf, size, "%.0F / %.0F", new_period, old_period); 202 return scnprintf(buf, size, "%.0F / %.0F", new_period, old_period);
218} 203}
219 204
220static int formula_wdiff(struct hist_entry *he, char *buf, size_t size) 205static int formula_wdiff(struct hist_entry *he, struct hist_entry *pair,
206 char *buf, size_t size)
221{ 207{
222 struct hist_entry *pair = hist_entry__next_pair(he);
223 u64 new_period = he->stat.period; 208 u64 new_period = he->stat.period;
224 u64 old_period = pair ? pair->stat.period : 0; 209 u64 old_period = pair->stat.period;
225
226 if (!pair)
227 return -1;
228 210
229 return scnprintf(buf, size, 211 return scnprintf(buf, size,
230 "(%" PRIu64 " * " "%" PRId64 ") - (%" PRIu64 " * " "%" PRId64 ")", 212 "(%" PRIu64 " * " "%" PRId64 ") - (%" PRIu64 " * " "%" PRId64 ")",
231 new_period, compute_wdiff_w2, old_period, compute_wdiff_w1); 213 new_period, compute_wdiff_w2, old_period, compute_wdiff_w1);
232} 214}
233 215
234int perf_diff__formula(char *buf, size_t size, struct hist_entry *he) 216int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair,
217 char *buf, size_t size)
235{ 218{
236 switch (compute) { 219 switch (compute) {
237 case COMPUTE_DELTA: 220 case COMPUTE_DELTA:
238 return formula_delta(he, buf, size); 221 return formula_delta(he, pair, buf, size);
239 case COMPUTE_RATIO: 222 case COMPUTE_RATIO:
240 return formula_ratio(he, buf, size); 223 return formula_ratio(he, pair, buf, size);
241 case COMPUTE_WEIGHTED_DIFF: 224 case COMPUTE_WEIGHTED_DIFF:
242 return formula_wdiff(he, buf, size); 225 return formula_wdiff(he, pair, buf, size);
243 default: 226 default:
244 BUG_ON(1); 227 BUG_ON(1);
245 } 228 }
@@ -292,48 +275,6 @@ static struct perf_tool tool = {
292 .ordering_requires_timestamps = true, 275 .ordering_requires_timestamps = true,
293}; 276};
294 277
295static void insert_hist_entry_by_name(struct rb_root *root,
296 struct hist_entry *he)
297{
298 struct rb_node **p = &root->rb_node;
299 struct rb_node *parent = NULL;
300 struct hist_entry *iter;
301
302 while (*p != NULL) {
303 parent = *p;
304 iter = rb_entry(parent, struct hist_entry, rb_node);
305 if (hist_entry__cmp(he, iter) < 0)
306 p = &(*p)->rb_left;
307 else
308 p = &(*p)->rb_right;
309 }
310
311 rb_link_node(&he->rb_node, parent, p);
312 rb_insert_color(&he->rb_node, root);
313}
314
315static void hists__name_resort(struct hists *self, bool sort)
316{
317 unsigned long position = 1;
318 struct rb_root tmp = RB_ROOT;
319 struct rb_node *next = rb_first(&self->entries);
320
321 while (next != NULL) {
322 struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node);
323
324 next = rb_next(&n->rb_node);
325 n->position = position++;
326
327 if (sort) {
328 rb_erase(&n->rb_node, &self->entries);
329 insert_hist_entry_by_name(&tmp, n);
330 }
331 }
332
333 if (sort)
334 self->entries = tmp;
335}
336
337static struct perf_evsel *evsel_match(struct perf_evsel *evsel, 278static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
338 struct perf_evlist *evlist) 279 struct perf_evlist *evlist)
339{ 280{
@@ -346,34 +287,34 @@ static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
346 return NULL; 287 return NULL;
347} 288}
348 289
349static void perf_evlist__resort_hists(struct perf_evlist *evlist, bool name) 290static void perf_evlist__collapse_resort(struct perf_evlist *evlist)
350{ 291{
351 struct perf_evsel *evsel; 292 struct perf_evsel *evsel;
352 293
353 list_for_each_entry(evsel, &evlist->entries, node) { 294 list_for_each_entry(evsel, &evlist->entries, node) {
354 struct hists *hists = &evsel->hists; 295 struct hists *hists = &evsel->hists;
355 296
356 hists__output_resort(hists); 297 hists__collapse_resort(hists);
357
358 /*
359 * The hists__name_resort only sets possition
360 * if name is false.
361 */
362 if (name || ((!name) && show_displacement))
363 hists__name_resort(hists, name);
364 } 298 }
365} 299}
366 300
367static void hists__baseline_only(struct hists *hists) 301static void hists__baseline_only(struct hists *hists)
368{ 302{
369 struct rb_node *next = rb_first(&hists->entries); 303 struct rb_root *root;
304 struct rb_node *next;
370 305
306 if (sort__need_collapse)
307 root = &hists->entries_collapsed;
308 else
309 root = hists->entries_in;
310
311 next = rb_first(root);
371 while (next != NULL) { 312 while (next != NULL) {
372 struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node); 313 struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in);
373 314
374 next = rb_next(&he->rb_node); 315 next = rb_next(&he->rb_node_in);
375 if (!hist_entry__next_pair(he)) { 316 if (!hist_entry__next_pair(he)) {
376 rb_erase(&he->rb_node, &hists->entries); 317 rb_erase(&he->rb_node_in, root);
377 hist_entry__free(he); 318 hist_entry__free(he);
378 } 319 }
379 } 320 }
@@ -385,18 +326,21 @@ static void hists__precompute(struct hists *hists)
385 326
386 while (next != NULL) { 327 while (next != NULL) {
387 struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node); 328 struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node);
329 struct hist_entry *pair = hist_entry__next_pair(he);
388 330
389 next = rb_next(&he->rb_node); 331 next = rb_next(&he->rb_node);
332 if (!pair)
333 continue;
390 334
391 switch (compute) { 335 switch (compute) {
392 case COMPUTE_DELTA: 336 case COMPUTE_DELTA:
393 perf_diff__compute_delta(he); 337 perf_diff__compute_delta(he, pair);
394 break; 338 break;
395 case COMPUTE_RATIO: 339 case COMPUTE_RATIO:
396 perf_diff__compute_ratio(he); 340 perf_diff__compute_ratio(he, pair);
397 break; 341 break;
398 case COMPUTE_WEIGHTED_DIFF: 342 case COMPUTE_WEIGHTED_DIFF:
399 perf_diff__compute_wdiff(he); 343 perf_diff__compute_wdiff(he, pair);
400 break; 344 break;
401 default: 345 default:
402 BUG_ON(1); 346 BUG_ON(1);
@@ -470,19 +414,30 @@ static void insert_hist_entry_by_compute(struct rb_root *root,
470 414
471static void hists__compute_resort(struct hists *hists) 415static void hists__compute_resort(struct hists *hists)
472{ 416{
473 struct rb_root tmp = RB_ROOT; 417 struct rb_root *root;
474 struct rb_node *next = rb_first(&hists->entries); 418 struct rb_node *next;
419
420 if (sort__need_collapse)
421 root = &hists->entries_collapsed;
422 else
423 root = hists->entries_in;
424
425 hists->entries = RB_ROOT;
426 next = rb_first(root);
427
428 hists->nr_entries = 0;
429 hists->stats.total_period = 0;
430 hists__reset_col_len(hists);
475 431
476 while (next != NULL) { 432 while (next != NULL) {
477 struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node); 433 struct hist_entry *he;
478 434
479 next = rb_next(&he->rb_node); 435 he = rb_entry(next, struct hist_entry, rb_node_in);
436 next = rb_next(&he->rb_node_in);
480 437
481 rb_erase(&he->rb_node, &hists->entries); 438 insert_hist_entry_by_compute(&hists->entries, he, compute);
482 insert_hist_entry_by_compute(&tmp, he, compute); 439 hists__inc_nr_entries(hists, he);
483 } 440 }
484
485 hists->entries = tmp;
486} 441}
487 442
488static void hists__process(struct hists *old, struct hists *new) 443static void hists__process(struct hists *old, struct hists *new)
@@ -497,6 +452,8 @@ static void hists__process(struct hists *old, struct hists *new)
497 if (sort_compute) { 452 if (sort_compute) {
498 hists__precompute(new); 453 hists__precompute(new);
499 hists__compute_resort(new); 454 hists__compute_resort(new);
455 } else {
456 hists__output_resort(new);
500 } 457 }
501 458
502 hists__fprintf(new, true, 0, 0, stdout); 459 hists__fprintf(new, true, 0, 0, stdout);
@@ -528,8 +485,8 @@ static int __cmd_diff(void)
528 evlist_old = older->evlist; 485 evlist_old = older->evlist;
529 evlist_new = newer->evlist; 486 evlist_new = newer->evlist;
530 487
531 perf_evlist__resort_hists(evlist_old, true); 488 perf_evlist__collapse_resort(evlist_old);
532 perf_evlist__resort_hists(evlist_new, false); 489 perf_evlist__collapse_resort(evlist_new);
533 490
534 list_for_each_entry(evsel, &evlist_new->entries, node) { 491 list_for_each_entry(evsel, &evlist_new->entries, node) {
535 struct perf_evsel *evsel_old; 492 struct perf_evsel *evsel_old;
@@ -562,8 +519,6 @@ static const char * const diff_usage[] = {
562static const struct option options[] = { 519static const struct option options[] = {
563 OPT_INCR('v', "verbose", &verbose, 520 OPT_INCR('v', "verbose", &verbose,
564 "be more verbose (show symbol address, etc)"), 521 "be more verbose (show symbol address, etc)"),
565 OPT_BOOLEAN('M', "displacement", &show_displacement,
566 "Show position displacement relative to baseline"),
567 OPT_BOOLEAN('b', "baseline-only", &show_baseline_only, 522 OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
568 "Show only items with match in baseline"), 523 "Show only items with match in baseline"),
569 OPT_CALLBACK('c', "compute", &compute, 524 OPT_CALLBACK('c', "compute", &compute,
@@ -597,40 +552,32 @@ static const struct option options[] = {
597 552
598static void ui_init(void) 553static void ui_init(void)
599{ 554{
600 perf_hpp__init();
601
602 /* No overhead column. */
603 perf_hpp__column_enable(PERF_HPP__OVERHEAD, false);
604
605 /* 555 /*
606 * Display baseline/delta/ratio/displacement/ 556 * Display baseline/delta/ratio
607 * formula/periods columns. 557 * formula/periods columns.
608 */ 558 */
609 perf_hpp__column_enable(PERF_HPP__BASELINE, true); 559 perf_hpp__column_enable(PERF_HPP__BASELINE);
610 560
611 switch (compute) { 561 switch (compute) {
612 case COMPUTE_DELTA: 562 case COMPUTE_DELTA:
613 perf_hpp__column_enable(PERF_HPP__DELTA, true); 563 perf_hpp__column_enable(PERF_HPP__DELTA);
614 break; 564 break;
615 case COMPUTE_RATIO: 565 case COMPUTE_RATIO:
616 perf_hpp__column_enable(PERF_HPP__RATIO, true); 566 perf_hpp__column_enable(PERF_HPP__RATIO);
617 break; 567 break;
618 case COMPUTE_WEIGHTED_DIFF: 568 case COMPUTE_WEIGHTED_DIFF:
619 perf_hpp__column_enable(PERF_HPP__WEIGHTED_DIFF, true); 569 perf_hpp__column_enable(PERF_HPP__WEIGHTED_DIFF);
620 break; 570 break;
621 default: 571 default:
622 BUG_ON(1); 572 BUG_ON(1);
623 }; 573 };
624 574
625 if (show_displacement)
626 perf_hpp__column_enable(PERF_HPP__DISPL, true);
627
628 if (show_formula) 575 if (show_formula)
629 perf_hpp__column_enable(PERF_HPP__FORMULA, true); 576 perf_hpp__column_enable(PERF_HPP__FORMULA);
630 577
631 if (show_period) { 578 if (show_period) {
632 perf_hpp__column_enable(PERF_HPP__PERIOD, true); 579 perf_hpp__column_enable(PERF_HPP__PERIOD);
633 perf_hpp__column_enable(PERF_HPP__PERIOD_BASELINE, true); 580 perf_hpp__column_enable(PERF_HPP__PERIOD_BASELINE);
634 } 581 }
635} 582}
636 583
@@ -658,7 +605,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
658 605
659 ui_init(); 606 ui_init();
660 607
661 setup_sorting(diff_usage, options); 608 if (setup_sorting() < 0)
609 usage_with_options(diff_usage, options);
610
662 setup_pager(); 611 setup_pager();
663 612
664 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL); 613 sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL);
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index c20f1dcfb7e2..05bd9dfe875c 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -15,39 +15,6 @@
15#include "util/parse-options.h" 15#include "util/parse-options.h"
16#include "util/session.h" 16#include "util/session.h"
17 17
18struct perf_attr_details {
19 bool freq;
20 bool verbose;
21};
22
23static int comma_printf(bool *first, const char *fmt, ...)
24{
25 va_list args;
26 int ret = 0;
27
28 if (!*first) {
29 ret += printf(",");
30 } else {
31 ret += printf(":");
32 *first = false;
33 }
34
35 va_start(args, fmt);
36 ret += vprintf(fmt, args);
37 va_end(args);
38 return ret;
39}
40
41static int __if_print(bool *first, const char *field, u64 value)
42{
43 if (value == 0)
44 return 0;
45
46 return comma_printf(first, " %s: %" PRIu64, field, value);
47}
48
49#define if_print(field) __if_print(&first, #field, pos->attr.field)
50
51static int __cmd_evlist(const char *file_name, struct perf_attr_details *details) 18static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
52{ 19{
53 struct perf_session *session; 20 struct perf_session *session;
@@ -57,52 +24,8 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
57 if (session == NULL) 24 if (session == NULL)
58 return -ENOMEM; 25 return -ENOMEM;
59 26
60 list_for_each_entry(pos, &session->evlist->entries, node) { 27 list_for_each_entry(pos, &session->evlist->entries, node)
61 bool first = true; 28 perf_evsel__fprintf(pos, details, stdout);
62
63 printf("%s", perf_evsel__name(pos));
64
65 if (details->verbose || details->freq) {
66 comma_printf(&first, " sample_freq=%" PRIu64,
67 (u64)pos->attr.sample_freq);
68 }
69
70 if (details->verbose) {
71 if_print(type);
72 if_print(config);
73 if_print(config1);
74 if_print(config2);
75 if_print(size);
76 if_print(sample_type);
77 if_print(read_format);
78 if_print(disabled);
79 if_print(inherit);
80 if_print(pinned);
81 if_print(exclusive);
82 if_print(exclude_user);
83 if_print(exclude_kernel);
84 if_print(exclude_hv);
85 if_print(exclude_idle);
86 if_print(mmap);
87 if_print(comm);
88 if_print(freq);
89 if_print(inherit_stat);
90 if_print(enable_on_exec);
91 if_print(task);
92 if_print(watermark);
93 if_print(precise_ip);
94 if_print(mmap_data);
95 if_print(sample_id_all);
96 if_print(exclude_host);
97 if_print(exclude_guest);
98 if_print(__reserved_1);
99 if_print(wakeup_events);
100 if_print(bp_type);
101 if_print(branch_sample_type);
102 }
103
104 putchar('\n');
105 }
106 29
107 perf_session__delete(session); 30 perf_session__delete(session);
108 return 0; 31 return 0;
@@ -116,6 +39,8 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
116 OPT_BOOLEAN('F', "freq", &details.freq, "Show the sample frequency"), 39 OPT_BOOLEAN('F', "freq", &details.freq, "Show the sample frequency"),
117 OPT_BOOLEAN('v', "verbose", &details.verbose, 40 OPT_BOOLEAN('v', "verbose", &details.verbose,
118 "Show all event attr details"), 41 "Show all event attr details"),
42 OPT_BOOLEAN('g', "group", &details.event_group,
43 "Show event group information"),
119 OPT_END() 44 OPT_END()
120 }; 45 };
121 const char * const evlist_usage[] = { 46 const char * const evlist_usage[] = {
@@ -127,5 +52,10 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
127 if (argc) 52 if (argc)
128 usage_with_options(evlist_usage, options); 53 usage_with_options(evlist_usage, options);
129 54
55 if (details.event_group && (details.verbose || details.freq)) {
56 pr_err("--group option is not compatible with other options\n");
57 usage_with_options(evlist_usage, options);
58 }
59
130 return __cmd_evlist(input_name, &details); 60 return __cmd_evlist(input_name, &details);
131} 61}
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 0b4b796167be..46878daca5cc 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -17,6 +17,7 @@
17#include "util/debug.h" 17#include "util/debug.h"
18 18
19#include <linux/rbtree.h> 19#include <linux/rbtree.h>
20#include <linux/string.h>
20 21
21struct alloc_stat; 22struct alloc_stat;
22typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); 23typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
@@ -340,7 +341,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
340 int n_lines, int is_caller) 341 int n_lines, int is_caller)
341{ 342{
342 struct rb_node *next; 343 struct rb_node *next;
343 struct machine *machine; 344 struct machine *machine = &session->machines.host;
344 345
345 printf("%.102s\n", graph_dotted_line); 346 printf("%.102s\n", graph_dotted_line);
346 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); 347 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
@@ -349,11 +350,6 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
349 350
350 next = rb_first(root); 351 next = rb_first(root);
351 352
352 machine = perf_session__find_host_machine(session);
353 if (!machine) {
354 pr_err("__print_result: couldn't find kernel information\n");
355 return;
356 }
357 while (next && n_lines--) { 353 while (next && n_lines--) {
358 struct alloc_stat *data = rb_entry(next, struct alloc_stat, 354 struct alloc_stat *data = rb_entry(next, struct alloc_stat,
359 node); 355 node);
@@ -614,8 +610,7 @@ static struct sort_dimension *avail_sorts[] = {
614 &pingpong_sort_dimension, 610 &pingpong_sort_dimension,
615}; 611};
616 612
617#define NUM_AVAIL_SORTS \ 613#define NUM_AVAIL_SORTS ((int)ARRAY_SIZE(avail_sorts))
618 (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
619 614
620static int sort_dimension__add(const char *tok, struct list_head *list) 615static int sort_dimension__add(const char *tok, struct list_head *list)
621{ 616{
@@ -624,12 +619,11 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
624 619
625 for (i = 0; i < NUM_AVAIL_SORTS; i++) { 620 for (i = 0; i < NUM_AVAIL_SORTS; i++) {
626 if (!strcmp(avail_sorts[i]->name, tok)) { 621 if (!strcmp(avail_sorts[i]->name, tok)) {
627 sort = malloc(sizeof(*sort)); 622 sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i]));
628 if (!sort) { 623 if (!sort) {
629 pr_err("%s: malloc failed\n", __func__); 624 pr_err("%s: memdup failed\n", __func__);
630 return -1; 625 return -1;
631 } 626 }
632 memcpy(sort, avail_sorts[i], sizeof(*sort));
633 list_add_tail(&sort->list, list); 627 list_add_tail(&sort->list, list);
634 return 0; 628 return 0;
635 } 629 }
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index ca3f80ebc100..37a769d7f9fe 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -973,8 +973,7 @@ __cmd_buildid_list(const char *file_name, int argc, const char **argv)
973 973
974int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) 974int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused)
975{ 975{
976 const char *file_name; 976 const char *file_name = NULL;
977
978 const struct option kvm_options[] = { 977 const struct option kvm_options[] = {
979 OPT_STRING('i', "input", &file_name, "file", 978 OPT_STRING('i', "input", &file_name, "file",
980 "Input file name"), 979 "Input file name"),
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f3151d3c70ce..774c90713a53 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -224,130 +224,28 @@ static bool perf_evlist__equal(struct perf_evlist *evlist,
224 224
225static int perf_record__open(struct perf_record *rec) 225static int perf_record__open(struct perf_record *rec)
226{ 226{
227 char msg[512];
227 struct perf_evsel *pos; 228 struct perf_evsel *pos;
228 struct perf_evlist *evlist = rec->evlist; 229 struct perf_evlist *evlist = rec->evlist;
229 struct perf_session *session = rec->session; 230 struct perf_session *session = rec->session;
230 struct perf_record_opts *opts = &rec->opts; 231 struct perf_record_opts *opts = &rec->opts;
231 int rc = 0; 232 int rc = 0;
232 233
233 /* 234 perf_evlist__config(evlist, opts);
234 * Set the evsel leader links before we configure attributes,
235 * since some might depend on this info.
236 */
237 if (opts->group)
238 perf_evlist__set_leader(evlist);
239
240 perf_evlist__config_attrs(evlist, opts);
241 235
242 list_for_each_entry(pos, &evlist->entries, node) { 236 list_for_each_entry(pos, &evlist->entries, node) {
243 struct perf_event_attr *attr = &pos->attr;
244 /*
245 * Check if parse_single_tracepoint_event has already asked for
246 * PERF_SAMPLE_TIME.
247 *
248 * XXX this is kludgy but short term fix for problems introduced by
249 * eac23d1c that broke 'perf script' by having different sample_types
250 * when using multiple tracepoint events when we use a perf binary
251 * that tries to use sample_id_all on an older kernel.
252 *
253 * We need to move counter creation to perf_session, support
254 * different sample_types, etc.
255 */
256 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
257
258fallback_missing_features:
259 if (opts->exclude_guest_missing)
260 attr->exclude_guest = attr->exclude_host = 0;
261retry_sample_id:
262 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
263try_again: 237try_again:
264 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) { 238 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
265 int err = errno; 239 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
266
267 if (err == EPERM || err == EACCES) {
268 ui__error_paranoid();
269 rc = -err;
270 goto out;
271 } else if (err == ENODEV && opts->target.cpu_list) {
272 pr_err("No such device - did you specify"
273 " an out-of-range profile CPU?\n");
274 rc = -err;
275 goto out;
276 } else if (err == EINVAL) {
277 if (!opts->exclude_guest_missing &&
278 (attr->exclude_guest || attr->exclude_host)) {
279 pr_debug("Old kernel, cannot exclude "
280 "guest or host samples.\n");
281 opts->exclude_guest_missing = true;
282 goto fallback_missing_features;
283 } else if (!opts->sample_id_all_missing) {
284 /*
285 * Old kernel, no attr->sample_id_type_all field
286 */
287 opts->sample_id_all_missing = true;
288 if (!opts->sample_time && !opts->raw_samples && !time_needed)
289 attr->sample_type &= ~PERF_SAMPLE_TIME;
290
291 goto retry_sample_id;
292 }
293 }
294
295 /*
296 * If it's cycles then fall back to hrtimer
297 * based cpu-clock-tick sw counter, which
298 * is always available even if no PMU support.
299 *
300 * PPC returns ENXIO until 2.6.37 (behavior changed
301 * with commit b0a873e).
302 */
303 if ((err == ENOENT || err == ENXIO)
304 && attr->type == PERF_TYPE_HARDWARE
305 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
306
307 if (verbose) 240 if (verbose)
308 ui__warning("The cycles event is not supported, " 241 ui__warning("%s\n", msg);
309 "trying to fall back to cpu-clock-ticks\n");
310 attr->type = PERF_TYPE_SOFTWARE;
311 attr->config = PERF_COUNT_SW_CPU_CLOCK;
312 if (pos->name) {
313 free(pos->name);
314 pos->name = NULL;
315 }
316 goto try_again; 242 goto try_again;
317 } 243 }
318 244
319 if (err == ENOENT) { 245 rc = -errno;
320 ui__error("The %s event is not supported.\n", 246 perf_evsel__open_strerror(pos, &opts->target,
321 perf_evsel__name(pos)); 247 errno, msg, sizeof(msg));
322 rc = -err; 248 ui__error("%s\n", msg);
323 goto out;
324 } else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
325 ui__error("\'precise\' request may not be supported. "
326 "Try removing 'p' modifier\n");
327 rc = -err;
328 goto out;
329 }
330
331 printf("\n");
332 error("sys_perf_event_open() syscall returned with %d "
333 "(%s) for event %s. /bin/dmesg may provide "
334 "additional information.\n",
335 err, strerror(err), perf_evsel__name(pos));
336
337#if defined(__i386__) || defined(__x86_64__)
338 if (attr->type == PERF_TYPE_HARDWARE &&
339 err == EOPNOTSUPP) {
340 pr_err("No hardware sampling interrupt available."
341 " No APIC? If so then you can boot the kernel"
342 " with the \"lapic\" boot parameter to"
343 " force-enable it.\n");
344 rc = -err;
345 goto out;
346 }
347#endif
348
349 pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
350 rc = -err;
351 goto out; 249 goto out;
352 } 250 }
353 } 251 }
@@ -430,10 +328,6 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
430{ 328{
431 int err; 329 int err;
432 struct perf_tool *tool = data; 330 struct perf_tool *tool = data;
433
434 if (machine__is_host(machine))
435 return;
436
437 /* 331 /*
438 *As for guest kernel when processing subcommand record&report, 332 *As for guest kernel when processing subcommand record&report,
439 *we arrange module mmap prior to guest kernel mmap and trigger 333 *we arrange module mmap prior to guest kernel mmap and trigger
@@ -592,6 +486,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
592 goto out_delete_session; 486 goto out_delete_session;
593 } 487 }
594 488
489 if (!evsel_list->nr_groups)
490 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
491
595 /* 492 /*
596 * perf_session__delete(session) will be called at perf_record__exit() 493 * perf_session__delete(session) will be called at perf_record__exit()
597 */ 494 */
@@ -618,12 +515,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
618 515
619 rec->post_processing_offset = lseek(output, 0, SEEK_CUR); 516 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
620 517
621 machine = perf_session__find_host_machine(session); 518 machine = &session->machines.host;
622 if (!machine) {
623 pr_err("Couldn't find native kernel information.\n");
624 err = -1;
625 goto out_delete_session;
626 }
627 519
628 if (opts->pipe_output) { 520 if (opts->pipe_output) {
629 err = perf_event__synthesize_attrs(tool, session, 521 err = perf_event__synthesize_attrs(tool, session,
@@ -676,9 +568,10 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
676 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 568 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
677 "Check /proc/modules permission or run as root.\n"); 569 "Check /proc/modules permission or run as root.\n");
678 570
679 if (perf_guest) 571 if (perf_guest) {
680 perf_session__process_machines(session, tool, 572 machines__process_guests(&session->machines,
681 perf_event__synthesize_guest_os); 573 perf_event__synthesize_guest_os, tool);
574 }
682 575
683 if (!opts->target.system_wide) 576 if (!opts->target.system_wide)
684 err = perf_event__synthesize_thread_map(tool, evsel_list->threads, 577 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
@@ -875,11 +768,10 @@ static int get_stack_size(char *str, unsigned long *_size)
875} 768}
876#endif /* LIBUNWIND_SUPPORT */ 769#endif /* LIBUNWIND_SUPPORT */
877 770
878static int 771int record_parse_callchain_opt(const struct option *opt,
879parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg, 772 const char *arg, int unset)
880 int unset)
881{ 773{
882 struct perf_record *rec = (struct perf_record *)opt->value; 774 struct perf_record_opts *opts = opt->value;
883 char *tok, *name, *saveptr = NULL; 775 char *tok, *name, *saveptr = NULL;
884 char *buf; 776 char *buf;
885 int ret = -1; 777 int ret = -1;
@@ -905,7 +797,7 @@ parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
905 /* Framepointer style */ 797 /* Framepointer style */
906 if (!strncmp(name, "fp", sizeof("fp"))) { 798 if (!strncmp(name, "fp", sizeof("fp"))) {
907 if (!strtok_r(NULL, ",", &saveptr)) { 799 if (!strtok_r(NULL, ",", &saveptr)) {
908 rec->opts.call_graph = CALLCHAIN_FP; 800 opts->call_graph = CALLCHAIN_FP;
909 ret = 0; 801 ret = 0;
910 } else 802 } else
911 pr_err("callchain: No more arguments " 803 pr_err("callchain: No more arguments "
@@ -918,20 +810,20 @@ parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
918 const unsigned long default_stack_dump_size = 8192; 810 const unsigned long default_stack_dump_size = 8192;
919 811
920 ret = 0; 812 ret = 0;
921 rec->opts.call_graph = CALLCHAIN_DWARF; 813 opts->call_graph = CALLCHAIN_DWARF;
922 rec->opts.stack_dump_size = default_stack_dump_size; 814 opts->stack_dump_size = default_stack_dump_size;
923 815
924 tok = strtok_r(NULL, ",", &saveptr); 816 tok = strtok_r(NULL, ",", &saveptr);
925 if (tok) { 817 if (tok) {
926 unsigned long size = 0; 818 unsigned long size = 0;
927 819
928 ret = get_stack_size(tok, &size); 820 ret = get_stack_size(tok, &size);
929 rec->opts.stack_dump_size = size; 821 opts->stack_dump_size = size;
930 } 822 }
931 823
932 if (!ret) 824 if (!ret)
933 pr_debug("callchain: stack dump size %d\n", 825 pr_debug("callchain: stack dump size %d\n",
934 rec->opts.stack_dump_size); 826 opts->stack_dump_size);
935#endif /* LIBUNWIND_SUPPORT */ 827#endif /* LIBUNWIND_SUPPORT */
936 } else { 828 } else {
937 pr_err("callchain: Unknown -g option " 829 pr_err("callchain: Unknown -g option "
@@ -944,7 +836,7 @@ parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
944 free(buf); 836 free(buf);
945 837
946 if (!ret) 838 if (!ret)
947 pr_debug("callchain: type %d\n", rec->opts.call_graph); 839 pr_debug("callchain: type %d\n", opts->call_graph);
948 840
949 return ret; 841 return ret;
950} 842}
@@ -982,9 +874,9 @@ static struct perf_record record = {
982#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: " 874#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
983 875
984#ifdef LIBUNWIND_SUPPORT 876#ifdef LIBUNWIND_SUPPORT
985static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf"; 877const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
986#else 878#else
987static const char callchain_help[] = CALLCHAIN_HELP "[fp]"; 879const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
988#endif 880#endif
989 881
990/* 882/*
@@ -1028,9 +920,9 @@ const struct option record_options[] = {
1028 "number of mmap data pages"), 920 "number of mmap data pages"),
1029 OPT_BOOLEAN(0, "group", &record.opts.group, 921 OPT_BOOLEAN(0, "group", &record.opts.group,
1030 "put the counters into a counter group"), 922 "put the counters into a counter group"),
1031 OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]", 923 OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
1032 callchain_help, &parse_callchain_opt, 924 "mode[,dump_size]", record_callchain_help,
1033 "fp"), 925 &record_parse_callchain_opt, "fp"),
1034 OPT_INCR('v', "verbose", &verbose, 926 OPT_INCR('v', "verbose", &verbose,
1035 "be more verbose (show counter open errors, etc)"), 927 "be more verbose (show counter open errors, etc)"),
1036 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 928 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index fc251005dd3d..96b5a7fee4bb 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -8,6 +8,7 @@
8#include "builtin.h" 8#include "builtin.h"
9 9
10#include "util/util.h" 10#include "util/util.h"
11#include "util/cache.h"
11 12
12#include "util/annotate.h" 13#include "util/annotate.h"
13#include "util/color.h" 14#include "util/color.h"
@@ -54,6 +55,16 @@ struct perf_report {
54 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 55 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
55}; 56};
56 57
58static int perf_report_config(const char *var, const char *value, void *cb)
59{
60 if (!strcmp(var, "report.group")) {
61 symbol_conf.event_group = perf_config_bool(var, value);
62 return 0;
63 }
64
65 return perf_default_config(var, value, cb);
66}
67
57static int perf_report__add_branch_hist_entry(struct perf_tool *tool, 68static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
58 struct addr_location *al, 69 struct addr_location *al,
59 struct perf_sample *sample, 70 struct perf_sample *sample,
@@ -299,6 +310,21 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self,
299 char unit; 310 char unit;
300 unsigned long nr_samples = self->stats.nr_events[PERF_RECORD_SAMPLE]; 311 unsigned long nr_samples = self->stats.nr_events[PERF_RECORD_SAMPLE];
301 u64 nr_events = self->stats.total_period; 312 u64 nr_events = self->stats.total_period;
313 struct perf_evsel *evsel = hists_to_evsel(self);
314 char buf[512];
315 size_t size = sizeof(buf);
316
317 if (symbol_conf.event_group && evsel->nr_members > 1) {
318 struct perf_evsel *pos;
319
320 perf_evsel__group_desc(evsel, buf, size);
321 evname = buf;
322
323 for_each_group_member(pos, evsel) {
324 nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
325 nr_events += pos->hists.stats.total_period;
326 }
327 }
302 328
303 nr_samples = convert_unit(nr_samples, &unit); 329 nr_samples = convert_unit(nr_samples, &unit);
304 ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit); 330 ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit);
@@ -319,6 +345,10 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
319 struct hists *hists = &pos->hists; 345 struct hists *hists = &pos->hists;
320 const char *evname = perf_evsel__name(pos); 346 const char *evname = perf_evsel__name(pos);
321 347
348 if (symbol_conf.event_group &&
349 !perf_evsel__is_group_leader(pos))
350 continue;
351
322 hists__fprintf_nr_sample_events(hists, evname, stdout); 352 hists__fprintf_nr_sample_events(hists, evname, stdout);
323 hists__fprintf(hists, true, 0, 0, stdout); 353 hists__fprintf(hists, true, 0, 0, stdout);
324 fprintf(stdout, "\n\n"); 354 fprintf(stdout, "\n\n");
@@ -372,7 +402,7 @@ static int __cmd_report(struct perf_report *rep)
372 if (ret) 402 if (ret)
373 goto out_delete; 403 goto out_delete;
374 404
375 kernel_map = session->host_machine.vmlinux_maps[MAP__FUNCTION]; 405 kernel_map = session->machines.host.vmlinux_maps[MAP__FUNCTION];
376 kernel_kmap = map__kmap(kernel_map); 406 kernel_kmap = map__kmap(kernel_map);
377 if (kernel_map == NULL || 407 if (kernel_map == NULL ||
378 (kernel_map->dso->hit && 408 (kernel_map->dso->hit &&
@@ -416,8 +446,16 @@ static int __cmd_report(struct perf_report *rep)
416 hists->symbol_filter_str = rep->symbol_filter_str; 446 hists->symbol_filter_str = rep->symbol_filter_str;
417 447
418 hists__collapse_resort(hists); 448 hists__collapse_resort(hists);
419 hists__output_resort(hists);
420 nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE]; 449 nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE];
450
451 /* Non-group events are considered as leader */
452 if (symbol_conf.event_group &&
453 !perf_evsel__is_group_leader(pos)) {
454 struct hists *leader_hists = &pos->leader->hists;
455
456 hists__match(leader_hists, hists);
457 hists__link(leader_hists, hists);
458 }
421 } 459 }
422 460
423 if (nr_samples == 0) { 461 if (nr_samples == 0) {
@@ -425,11 +463,22 @@ static int __cmd_report(struct perf_report *rep)
425 goto out_delete; 463 goto out_delete;
426 } 464 }
427 465
466 list_for_each_entry(pos, &session->evlist->entries, node)
467 hists__output_resort(&pos->hists);
468
428 if (use_browser > 0) { 469 if (use_browser > 0) {
429 if (use_browser == 1) { 470 if (use_browser == 1) {
430 perf_evlist__tui_browse_hists(session->evlist, help, 471 ret = perf_evlist__tui_browse_hists(session->evlist,
431 NULL, 472 help,
432 &session->header.env); 473 NULL,
474 &session->header.env);
475 /*
476 * Usually "ret" is the last pressed key, and we only
477 * care if the key notifies us to switch data file.
478 */
479 if (ret != K_SWITCH_INPUT_DATA)
480 ret = 0;
481
433 } else if (use_browser == 2) { 482 } else if (use_browser == 2) {
434 perf_evlist__gtk_browse_hists(session->evlist, help, 483 perf_evlist__gtk_browse_hists(session->evlist, help,
435 NULL); 484 NULL);
@@ -595,8 +644,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
595 OPT_BOOLEAN(0, "stdio", &report.use_stdio, 644 OPT_BOOLEAN(0, "stdio", &report.use_stdio,
596 "Use the stdio interface"), 645 "Use the stdio interface"),
597 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 646 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
598 "sort by key(s): pid, comm, dso, symbol, parent, dso_to," 647 "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
599 " dso_from, symbol_to, symbol_from, mispredict"), 648 " dso_to, dso_from, symbol_to, symbol_from, mispredict"),
600 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, 649 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
601 "Show sample percentage for different cpu modes"), 650 "Show sample percentage for different cpu modes"),
602 OPT_STRING('p', "parent", &parent_pattern, "regex", 651 OPT_STRING('p', "parent", &parent_pattern, "regex",
@@ -638,6 +687,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
638 "Specify disassembler style (e.g. -M intel for intel syntax)"), 687 "Specify disassembler style (e.g. -M intel for intel syntax)"),
639 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, 688 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
640 "Show a column with the sum of periods"), 689 "Show a column with the sum of periods"),
690 OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
691 "Show event group information together"),
641 OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", 692 OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "",
642 "use branch records for histogram filling", parse_branch_mode), 693 "use branch records for histogram filling", parse_branch_mode),
643 OPT_STRING(0, "objdump", &objdump_path, "path", 694 OPT_STRING(0, "objdump", &objdump_path, "path",
@@ -645,6 +696,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
645 OPT_END() 696 OPT_END()
646 }; 697 };
647 698
699 perf_config(perf_report_config, NULL);
700
648 argc = parse_options(argc, argv, options, report_usage, 0); 701 argc = parse_options(argc, argv, options, report_usage, 0);
649 702
650 if (report.use_stdio) 703 if (report.use_stdio)
@@ -663,6 +716,16 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
663 else 716 else
664 input_name = "perf.data"; 717 input_name = "perf.data";
665 } 718 }
719
720 if (strcmp(input_name, "-") != 0)
721 setup_browser(true);
722 else {
723 use_browser = 0;
724 perf_hpp__column_enable(PERF_HPP__OVERHEAD);
725 perf_hpp__init();
726 }
727
728repeat:
666 session = perf_session__new(input_name, O_RDONLY, 729 session = perf_session__new(input_name, O_RDONLY,
667 report.force, false, &report.tool); 730 report.force, false, &report.tool);
668 if (session == NULL) 731 if (session == NULL)
@@ -688,14 +751,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
688 751
689 } 752 }
690 753
691 if (strcmp(input_name, "-") != 0) 754 if (setup_sorting() < 0)
692 setup_browser(true); 755 usage_with_options(report_usage, options);
693 else {
694 use_browser = 0;
695 perf_hpp__init();
696 }
697
698 setup_sorting(report_usage, options);
699 756
700 /* 757 /*
701 * Only in the newt browser we are doing integrated annotation, 758 * Only in the newt browser we are doing integrated annotation,
@@ -763,6 +820,12 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
763 } 820 }
764 821
765 ret = __cmd_report(&report); 822 ret = __cmd_report(&report);
823 if (ret == K_SWITCH_INPUT_DATA) {
824 perf_session__delete(session);
825 goto repeat;
826 } else
827 ret = 0;
828
766error: 829error:
767 perf_session__delete(session); 830 perf_session__delete(session);
768 return ret; 831 return ret;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index cc28b85dabd5..138229439a93 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1475,9 +1475,9 @@ static int perf_sched__read_events(struct perf_sched *sched, bool destroy,
1475 goto out_delete; 1475 goto out_delete;
1476 } 1476 }
1477 1477
1478 sched->nr_events = session->hists.stats.nr_events[0]; 1478 sched->nr_events = session->stats.nr_events[0];
1479 sched->nr_lost_events = session->hists.stats.total_lost; 1479 sched->nr_lost_events = session->stats.total_lost;
1480 sched->nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; 1480 sched->nr_lost_chunks = session->stats.nr_events[PERF_RECORD_LOST];
1481 } 1481 }
1482 1482
1483 if (destroy) 1483 if (destroy)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index b363e7b292b2..92d4658f56fb 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -692,7 +692,7 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
692 const char *arg, int unset __maybe_unused) 692 const char *arg, int unset __maybe_unused)
693{ 693{
694 char *tok; 694 char *tok;
695 int i, imax = sizeof(all_output_options) / sizeof(struct output_option); 695 int i, imax = ARRAY_SIZE(all_output_options);
696 int j; 696 int j;
697 int rc = 0; 697 int rc = 0;
698 char *str = strdup(arg); 698 char *str = strdup(arg);
@@ -909,18 +909,6 @@ static const char *ends_with(const char *str, const char *suffix)
909 return NULL; 909 return NULL;
910} 910}
911 911
912static char *ltrim(char *str)
913{
914 int len = strlen(str);
915
916 while (len && isspace(*str)) {
917 len--;
918 str++;
919 }
920
921 return str;
922}
923
924static int read_script_info(struct script_desc *desc, const char *filename) 912static int read_script_info(struct script_desc *desc, const char *filename)
925{ 913{
926 char line[BUFSIZ], *p; 914 char line[BUFSIZ], *p;
@@ -1487,7 +1475,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
1487 return -1; 1475 return -1;
1488 } 1476 }
1489 1477
1490 perf_session__fprintf_info(session, stdout, show_full_info); 1478 if (!script_name && !generate_script_lang)
1479 perf_session__fprintf_info(session, stdout, show_full_info);
1491 1480
1492 if (!no_callchain) 1481 if (!no_callchain)
1493 symbol_conf.use_callchain = true; 1482 symbol_conf.use_callchain = true;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c247faca7127..99848761f573 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,6 +65,11 @@
65#define CNTR_NOT_SUPPORTED "<not supported>" 65#define CNTR_NOT_SUPPORTED "<not supported>"
66#define CNTR_NOT_COUNTED "<not counted>" 66#define CNTR_NOT_COUNTED "<not counted>"
67 67
68static void print_stat(int argc, const char **argv);
69static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
70static void print_counter(struct perf_evsel *counter, char *prefix);
71static void print_aggr_socket(char *prefix);
72
68static struct perf_evlist *evsel_list; 73static struct perf_evlist *evsel_list;
69 74
70static struct perf_target target = { 75static struct perf_target target = {
@@ -75,6 +80,7 @@ static int run_count = 1;
75static bool no_inherit = false; 80static bool no_inherit = false;
76static bool scale = true; 81static bool scale = true;
77static bool no_aggr = false; 82static bool no_aggr = false;
83static bool aggr_socket = false;
78static pid_t child_pid = -1; 84static pid_t child_pid = -1;
79static bool null_run = false; 85static bool null_run = false;
80static int detailed_run = 0; 86static int detailed_run = 0;
@@ -87,6 +93,9 @@ static FILE *output = NULL;
87static const char *pre_cmd = NULL; 93static const char *pre_cmd = NULL;
88static const char *post_cmd = NULL; 94static const char *post_cmd = NULL;
89static bool sync_run = false; 95static bool sync_run = false;
96static unsigned int interval = 0;
97static struct timespec ref_time;
98static struct cpu_map *sock_map;
90 99
91static volatile int done = 0; 100static volatile int done = 0;
92 101
@@ -94,6 +103,28 @@ struct perf_stat {
94 struct stats res_stats[3]; 103 struct stats res_stats[3];
95}; 104};
96 105
106static inline void diff_timespec(struct timespec *r, struct timespec *a,
107 struct timespec *b)
108{
109 r->tv_sec = a->tv_sec - b->tv_sec;
110 if (a->tv_nsec < b->tv_nsec) {
111 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
112 r->tv_sec--;
113 } else {
114 r->tv_nsec = a->tv_nsec - b->tv_nsec ;
115 }
116}
117
118static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
119{
120 return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
121}
122
123static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
124{
125 return perf_evsel__cpus(evsel)->nr;
126}
127
97static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) 128static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
98{ 129{
99 evsel->priv = zalloc(sizeof(struct perf_stat)); 130 evsel->priv = zalloc(sizeof(struct perf_stat));
@@ -106,14 +137,27 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
106 evsel->priv = NULL; 137 evsel->priv = NULL;
107} 138}
108 139
109static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) 140static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
110{ 141{
111 return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; 142 void *addr;
143 size_t sz;
144
145 sz = sizeof(*evsel->counts) +
146 (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
147
148 addr = zalloc(sz);
149 if (!addr)
150 return -ENOMEM;
151
152 evsel->prev_raw_counts = addr;
153
154 return 0;
112} 155}
113 156
114static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) 157static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
115{ 158{
116 return perf_evsel__cpus(evsel)->nr; 159 free(evsel->prev_raw_counts);
160 evsel->prev_raw_counts = NULL;
117} 161}
118 162
119static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 163static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
@@ -132,8 +176,6 @@ static struct stats walltime_nsecs_stats;
132static int create_perf_stat_counter(struct perf_evsel *evsel) 176static int create_perf_stat_counter(struct perf_evsel *evsel)
133{ 177{
134 struct perf_event_attr *attr = &evsel->attr; 178 struct perf_event_attr *attr = &evsel->attr;
135 bool exclude_guest_missing = false;
136 int ret;
137 179
138 if (scale) 180 if (scale)
139 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 181 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -141,38 +183,16 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
141 183
142 attr->inherit = !no_inherit; 184 attr->inherit = !no_inherit;
143 185
144retry: 186 if (perf_target__has_cpu(&target))
145 if (exclude_guest_missing) 187 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
146 evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
147
148 if (perf_target__has_cpu(&target)) {
149 ret = perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
150 if (ret)
151 goto check_ret;
152 return 0;
153 }
154 188
155 if (!perf_target__has_task(&target) && 189 if (!perf_target__has_task(&target) &&
156 !perf_evsel__is_group_member(evsel)) { 190 perf_evsel__is_group_leader(evsel)) {
157 attr->disabled = 1; 191 attr->disabled = 1;
158 attr->enable_on_exec = 1; 192 attr->enable_on_exec = 1;
159 } 193 }
160 194
161 ret = perf_evsel__open_per_thread(evsel, evsel_list->threads); 195 return perf_evsel__open_per_thread(evsel, evsel_list->threads);
162 if (!ret)
163 return 0;
164 /* fall through */
165check_ret:
166 if (ret && errno == EINVAL) {
167 if (!exclude_guest_missing &&
168 (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
169 pr_debug("Old kernel, cannot exclude "
170 "guest or host samples.\n");
171 exclude_guest_missing = true;
172 goto retry;
173 }
174 }
175 return ret;
176} 196}
177 197
178/* 198/*
@@ -269,15 +289,79 @@ static int read_counter(struct perf_evsel *counter)
269 return 0; 289 return 0;
270} 290}
271 291
292static void print_interval(void)
293{
294 static int num_print_interval;
295 struct perf_evsel *counter;
296 struct perf_stat *ps;
297 struct timespec ts, rs;
298 char prefix[64];
299
300 if (no_aggr) {
301 list_for_each_entry(counter, &evsel_list->entries, node) {
302 ps = counter->priv;
303 memset(ps->res_stats, 0, sizeof(ps->res_stats));
304 read_counter(counter);
305 }
306 } else {
307 list_for_each_entry(counter, &evsel_list->entries, node) {
308 ps = counter->priv;
309 memset(ps->res_stats, 0, sizeof(ps->res_stats));
310 read_counter_aggr(counter);
311 }
312 }
313 clock_gettime(CLOCK_MONOTONIC, &ts);
314 diff_timespec(&rs, &ts, &ref_time);
315 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
316
317 if (num_print_interval == 0 && !csv_output) {
318 if (aggr_socket)
319 fprintf(output, "# time socket cpus counts events\n");
320 else if (no_aggr)
321 fprintf(output, "# time CPU counts events\n");
322 else
323 fprintf(output, "# time counts events\n");
324 }
325
326 if (++num_print_interval == 25)
327 num_print_interval = 0;
328
329 if (aggr_socket)
330 print_aggr_socket(prefix);
331 else if (no_aggr) {
332 list_for_each_entry(counter, &evsel_list->entries, node)
333 print_counter(counter, prefix);
334 } else {
335 list_for_each_entry(counter, &evsel_list->entries, node)
336 print_counter_aggr(counter, prefix);
337 }
338}
339
272static int __run_perf_stat(int argc __maybe_unused, const char **argv) 340static int __run_perf_stat(int argc __maybe_unused, const char **argv)
273{ 341{
342 char msg[512];
274 unsigned long long t0, t1; 343 unsigned long long t0, t1;
275 struct perf_evsel *counter; 344 struct perf_evsel *counter;
345 struct timespec ts;
276 int status = 0; 346 int status = 0;
277 int child_ready_pipe[2], go_pipe[2]; 347 int child_ready_pipe[2], go_pipe[2];
278 const bool forks = (argc > 0); 348 const bool forks = (argc > 0);
279 char buf; 349 char buf;
280 350
351 if (interval) {
352 ts.tv_sec = interval / 1000;
353 ts.tv_nsec = (interval % 1000) * 1000000;
354 } else {
355 ts.tv_sec = 1;
356 ts.tv_nsec = 0;
357 }
358
359 if (aggr_socket
360 && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
361 perror("cannot build socket map");
362 return -1;
363 }
364
281 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { 365 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
282 perror("failed to create pipes"); 366 perror("failed to create pipes");
283 return -1; 367 return -1;
@@ -348,20 +432,13 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
348 continue; 432 continue;
349 } 433 }
350 434
351 if (errno == EPERM || errno == EACCES) { 435 perf_evsel__open_strerror(counter, &target,
352 error("You may not have permission to collect %sstats.\n" 436 errno, msg, sizeof(msg));
353 "\t Consider tweaking" 437 ui__error("%s\n", msg);
354 " /proc/sys/kernel/perf_event_paranoid or running as root.", 438
355 target.system_wide ? "system-wide " : "");
356 } else {
357 error("open_counter returned with %d (%s). "
358 "/bin/dmesg may provide additional information.\n",
359 errno, strerror(errno));
360 }
361 if (child_pid != -1) 439 if (child_pid != -1)
362 kill(child_pid, SIGTERM); 440 kill(child_pid, SIGTERM);
363 441
364 pr_err("Not all events could be opened.\n");
365 return -1; 442 return -1;
366 } 443 }
367 counter->supported = true; 444 counter->supported = true;
@@ -377,14 +454,25 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
377 * Enable counters and exec the command: 454 * Enable counters and exec the command:
378 */ 455 */
379 t0 = rdclock(); 456 t0 = rdclock();
457 clock_gettime(CLOCK_MONOTONIC, &ref_time);
380 458
381 if (forks) { 459 if (forks) {
382 close(go_pipe[1]); 460 close(go_pipe[1]);
461 if (interval) {
462 while (!waitpid(child_pid, &status, WNOHANG)) {
463 nanosleep(&ts, NULL);
464 print_interval();
465 }
466 }
383 wait(&status); 467 wait(&status);
384 if (WIFSIGNALED(status)) 468 if (WIFSIGNALED(status))
385 psignal(WTERMSIG(status), argv[0]); 469 psignal(WTERMSIG(status), argv[0]);
386 } else { 470 } else {
387 while(!done) sleep(1); 471 while (!done) {
472 nanosleep(&ts, NULL);
473 if (interval)
474 print_interval();
475 }
388 } 476 }
389 477
390 t1 = rdclock(); 478 t1 = rdclock();
@@ -454,13 +542,21 @@ static void print_noise(struct perf_evsel *evsel, double avg)
454 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 542 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
455} 543}
456 544
457static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) 545static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
458{ 546{
459 double msecs = avg / 1e6; 547 double msecs = avg / 1e6;
460 char cpustr[16] = { '\0', }; 548 char cpustr[16] = { '\0', };
461 const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; 549 const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
462 550
463 if (no_aggr) 551 if (aggr_socket)
552 sprintf(cpustr, "S%*d%s%*d%s",
553 csv_output ? 0 : -5,
554 cpu,
555 csv_sep,
556 csv_output ? 0 : 4,
557 nr,
558 csv_sep);
559 else if (no_aggr)
464 sprintf(cpustr, "CPU%*d%s", 560 sprintf(cpustr, "CPU%*d%s",
465 csv_output ? 0 : -4, 561 csv_output ? 0 : -4,
466 perf_evsel__cpus(evsel)->map[cpu], csv_sep); 562 perf_evsel__cpus(evsel)->map[cpu], csv_sep);
@@ -470,7 +566,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
470 if (evsel->cgrp) 566 if (evsel->cgrp)
471 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 567 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
472 568
473 if (csv_output) 569 if (csv_output || interval)
474 return; 570 return;
475 571
476 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 572 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
@@ -659,7 +755,7 @@ static void print_ll_cache_misses(int cpu,
659 fprintf(output, " of all LL-cache hits "); 755 fprintf(output, " of all LL-cache hits ");
660} 756}
661 757
662static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) 758static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
663{ 759{
664 double total, ratio = 0.0; 760 double total, ratio = 0.0;
665 char cpustr[16] = { '\0', }; 761 char cpustr[16] = { '\0', };
@@ -672,7 +768,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
672 else 768 else
673 fmt = "%s%18.0f%s%-25s"; 769 fmt = "%s%18.0f%s%-25s";
674 770
675 if (no_aggr) 771 if (aggr_socket)
772 sprintf(cpustr, "S%*d%s%*d%s",
773 csv_output ? 0 : -5,
774 cpu,
775 csv_sep,
776 csv_output ? 0 : 4,
777 nr,
778 csv_sep);
779 else if (no_aggr)
676 sprintf(cpustr, "CPU%*d%s", 780 sprintf(cpustr, "CPU%*d%s",
677 csv_output ? 0 : -4, 781 csv_output ? 0 : -4,
678 perf_evsel__cpus(evsel)->map[cpu], csv_sep); 782 perf_evsel__cpus(evsel)->map[cpu], csv_sep);
@@ -684,12 +788,11 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
684 if (evsel->cgrp) 788 if (evsel->cgrp)
685 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 789 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
686 790
687 if (csv_output) 791 if (csv_output || interval)
688 return; 792 return;
689 793
690 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 794 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
691 total = avg_stats(&runtime_cycles_stats[cpu]); 795 total = avg_stats(&runtime_cycles_stats[cpu]);
692
693 if (total) 796 if (total)
694 ratio = avg / total; 797 ratio = avg / total;
695 798
@@ -779,16 +882,83 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
779 } 882 }
780} 883}
781 884
885static void print_aggr_socket(char *prefix)
886{
887 struct perf_evsel *counter;
888 u64 ena, run, val;
889 int cpu, s, s2, sock, nr;
890
891 if (!sock_map)
892 return;
893
894 for (s = 0; s < sock_map->nr; s++) {
895 sock = cpu_map__socket(sock_map, s);
896 list_for_each_entry(counter, &evsel_list->entries, node) {
897 val = ena = run = 0;
898 nr = 0;
899 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
900 s2 = cpu_map__get_socket(evsel_list->cpus, cpu);
901 if (s2 != sock)
902 continue;
903 val += counter->counts->cpu[cpu].val;
904 ena += counter->counts->cpu[cpu].ena;
905 run += counter->counts->cpu[cpu].run;
906 nr++;
907 }
908 if (prefix)
909 fprintf(output, "%s", prefix);
910
911 if (run == 0 || ena == 0) {
912 fprintf(output, "S%*d%s%*d%s%*s%s%*s",
913 csv_output ? 0 : -5,
914 s,
915 csv_sep,
916 csv_output ? 0 : 4,
917 nr,
918 csv_sep,
919 csv_output ? 0 : 18,
920 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
921 csv_sep,
922 csv_output ? 0 : -24,
923 perf_evsel__name(counter));
924 if (counter->cgrp)
925 fprintf(output, "%s%s",
926 csv_sep, counter->cgrp->name);
927
928 fputc('\n', output);
929 continue;
930 }
931
932 if (nsec_counter(counter))
933 nsec_printout(sock, nr, counter, val);
934 else
935 abs_printout(sock, nr, counter, val);
936
937 if (!csv_output) {
938 print_noise(counter, 1.0);
939
940 if (run != ena)
941 fprintf(output, " (%.2f%%)",
942 100.0 * run / ena);
943 }
944 fputc('\n', output);
945 }
946 }
947}
948
782/* 949/*
783 * Print out the results of a single counter: 950 * Print out the results of a single counter:
784 * aggregated counts in system-wide mode 951 * aggregated counts in system-wide mode
785 */ 952 */
786static void print_counter_aggr(struct perf_evsel *counter) 953static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
787{ 954{
788 struct perf_stat *ps = counter->priv; 955 struct perf_stat *ps = counter->priv;
789 double avg = avg_stats(&ps->res_stats[0]); 956 double avg = avg_stats(&ps->res_stats[0]);
790 int scaled = counter->counts->scaled; 957 int scaled = counter->counts->scaled;
791 958
959 if (prefix)
960 fprintf(output, "%s", prefix);
961
792 if (scaled == -1) { 962 if (scaled == -1) {
793 fprintf(output, "%*s%s%*s", 963 fprintf(output, "%*s%s%*s",
794 csv_output ? 0 : 18, 964 csv_output ? 0 : 18,
@@ -805,9 +975,9 @@ static void print_counter_aggr(struct perf_evsel *counter)
805 } 975 }
806 976
807 if (nsec_counter(counter)) 977 if (nsec_counter(counter))
808 nsec_printout(-1, counter, avg); 978 nsec_printout(-1, 0, counter, avg);
809 else 979 else
810 abs_printout(-1, counter, avg); 980 abs_printout(-1, 0, counter, avg);
811 981
812 print_noise(counter, avg); 982 print_noise(counter, avg);
813 983
@@ -831,7 +1001,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
831 * Print out the results of a single counter: 1001 * Print out the results of a single counter:
832 * does not use aggregated count in system-wide 1002 * does not use aggregated count in system-wide
833 */ 1003 */
834static void print_counter(struct perf_evsel *counter) 1004static void print_counter(struct perf_evsel *counter, char *prefix)
835{ 1005{
836 u64 ena, run, val; 1006 u64 ena, run, val;
837 int cpu; 1007 int cpu;
@@ -840,6 +1010,10 @@ static void print_counter(struct perf_evsel *counter)
840 val = counter->counts->cpu[cpu].val; 1010 val = counter->counts->cpu[cpu].val;
841 ena = counter->counts->cpu[cpu].ena; 1011 ena = counter->counts->cpu[cpu].ena;
842 run = counter->counts->cpu[cpu].run; 1012 run = counter->counts->cpu[cpu].run;
1013
1014 if (prefix)
1015 fprintf(output, "%s", prefix);
1016
843 if (run == 0 || ena == 0) { 1017 if (run == 0 || ena == 0) {
844 fprintf(output, "CPU%*d%s%*s%s%*s", 1018 fprintf(output, "CPU%*d%s%*s%s%*s",
845 csv_output ? 0 : -4, 1019 csv_output ? 0 : -4,
@@ -859,9 +1033,9 @@ static void print_counter(struct perf_evsel *counter)
859 } 1033 }
860 1034
861 if (nsec_counter(counter)) 1035 if (nsec_counter(counter))
862 nsec_printout(cpu, counter, val); 1036 nsec_printout(cpu, 0, counter, val);
863 else 1037 else
864 abs_printout(cpu, counter, val); 1038 abs_printout(cpu, 0, counter, val);
865 1039
866 if (!csv_output) { 1040 if (!csv_output) {
867 print_noise(counter, 1.0); 1041 print_noise(counter, 1.0);
@@ -899,12 +1073,14 @@ static void print_stat(int argc, const char **argv)
899 fprintf(output, ":\n\n"); 1073 fprintf(output, ":\n\n");
900 } 1074 }
901 1075
902 if (no_aggr) { 1076 if (aggr_socket)
1077 print_aggr_socket(NULL);
1078 else if (no_aggr) {
903 list_for_each_entry(counter, &evsel_list->entries, node) 1079 list_for_each_entry(counter, &evsel_list->entries, node)
904 print_counter(counter); 1080 print_counter(counter, NULL);
905 } else { 1081 } else {
906 list_for_each_entry(counter, &evsel_list->entries, node) 1082 list_for_each_entry(counter, &evsel_list->entries, node)
907 print_counter_aggr(counter); 1083 print_counter_aggr(counter, NULL);
908 } 1084 }
909 1085
910 if (!csv_output) { 1086 if (!csv_output) {
@@ -925,7 +1101,7 @@ static volatile int signr = -1;
925 1101
926static void skip_signal(int signo) 1102static void skip_signal(int signo)
927{ 1103{
928 if(child_pid == -1) 1104 if ((child_pid == -1) || interval)
929 done = 1; 1105 done = 1;
930 1106
931 signr = signo; 1107 signr = signo;
@@ -1145,6 +1321,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1145 "command to run prior to the measured command"), 1321 "command to run prior to the measured command"),
1146 OPT_STRING(0, "post", &post_cmd, "command", 1322 OPT_STRING(0, "post", &post_cmd, "command",
1147 "command to run after to the measured command"), 1323 "command to run after to the measured command"),
1324 OPT_UINTEGER('I', "interval-print", &interval,
1325 "print counts at regular interval in ms (>= 100)"),
1326 OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"),
1148 OPT_END() 1327 OPT_END()
1149 }; 1328 };
1150 const char * const stat_usage[] = { 1329 const char * const stat_usage[] = {
@@ -1231,6 +1410,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1231 usage_with_options(stat_usage, options); 1410 usage_with_options(stat_usage, options);
1232 } 1411 }
1233 1412
1413 if (aggr_socket) {
1414 if (!perf_target__has_cpu(&target)) {
1415 fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
1416 usage_with_options(stat_usage, options);
1417 }
1418 no_aggr = true;
1419 }
1420
1234 if (add_default_attributes()) 1421 if (add_default_attributes())
1235 goto out; 1422 goto out;
1236 1423
@@ -1245,12 +1432,23 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1245 usage_with_options(stat_usage, options); 1432 usage_with_options(stat_usage, options);
1246 return -1; 1433 return -1;
1247 } 1434 }
1435 if (interval && interval < 100) {
1436 pr_err("print interval must be >= 100ms\n");
1437 usage_with_options(stat_usage, options);
1438 return -1;
1439 }
1248 1440
1249 list_for_each_entry(pos, &evsel_list->entries, node) { 1441 list_for_each_entry(pos, &evsel_list->entries, node) {
1250 if (perf_evsel__alloc_stat_priv(pos) < 0 || 1442 if (perf_evsel__alloc_stat_priv(pos) < 0 ||
1251 perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0) 1443 perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0)
1252 goto out_free_fd; 1444 goto out_free_fd;
1253 } 1445 }
1446 if (interval) {
1447 list_for_each_entry(pos, &evsel_list->entries, node) {
1448 if (perf_evsel__alloc_prev_raw_counts(pos) < 0)
1449 goto out_free_fd;
1450 }
1451 }
1254 1452
1255 /* 1453 /*
1256 * We dont want to block the signals - that would cause 1454 * We dont want to block the signals - that would cause
@@ -1260,6 +1458,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1260 */ 1458 */
1261 atexit(sig_atexit); 1459 atexit(sig_atexit);
1262 signal(SIGINT, skip_signal); 1460 signal(SIGINT, skip_signal);
1461 signal(SIGCHLD, skip_signal);
1263 signal(SIGALRM, skip_signal); 1462 signal(SIGALRM, skip_signal);
1264 signal(SIGABRT, skip_signal); 1463 signal(SIGABRT, skip_signal);
1265 1464
@@ -1272,11 +1471,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1272 status = run_perf_stat(argc, argv); 1471 status = run_perf_stat(argc, argv);
1273 } 1472 }
1274 1473
1275 if (status != -1) 1474 if (status != -1 && !interval)
1276 print_stat(argc, argv); 1475 print_stat(argc, argv);
1277out_free_fd: 1476out_free_fd:
1278 list_for_each_entry(pos, &evsel_list->entries, node) 1477 list_for_each_entry(pos, &evsel_list->entries, node) {
1279 perf_evsel__free_stat_priv(pos); 1478 perf_evsel__free_stat_priv(pos);
1479 perf_evsel__free_counts(pos);
1480 perf_evsel__free_prev_raw_counts(pos);
1481 }
1280 perf_evlist__delete_maps(evsel_list); 1482 perf_evlist__delete_maps(evsel_list);
1281out: 1483out:
1282 perf_evlist__delete(evsel_list); 1484 perf_evlist__delete(evsel_list);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index c9ff3950cd4b..72f6eb7b4173 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -68,27 +68,7 @@
68#include <linux/unistd.h> 68#include <linux/unistd.h>
69#include <linux/types.h> 69#include <linux/types.h>
70 70
71void get_term_dimensions(struct winsize *ws) 71static volatile int done;
72{
73 char *s = getenv("LINES");
74
75 if (s != NULL) {
76 ws->ws_row = atoi(s);
77 s = getenv("COLUMNS");
78 if (s != NULL) {
79 ws->ws_col = atoi(s);
80 if (ws->ws_row && ws->ws_col)
81 return;
82 }
83 }
84#ifdef TIOCGWINSZ
85 if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
86 ws->ws_row && ws->ws_col)
87 return;
88#endif
89 ws->ws_row = 25;
90 ws->ws_col = 80;
91}
92 72
93static void perf_top__update_print_entries(struct perf_top *top) 73static void perf_top__update_print_entries(struct perf_top *top)
94{ 74{
@@ -453,8 +433,10 @@ static int perf_top__key_mapped(struct perf_top *top, int c)
453 return 0; 433 return 0;
454} 434}
455 435
456static void perf_top__handle_keypress(struct perf_top *top, int c) 436static bool perf_top__handle_keypress(struct perf_top *top, int c)
457{ 437{
438 bool ret = true;
439
458 if (!perf_top__key_mapped(top, c)) { 440 if (!perf_top__key_mapped(top, c)) {
459 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 441 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
460 struct termios tc, save; 442 struct termios tc, save;
@@ -475,7 +457,7 @@ static void perf_top__handle_keypress(struct perf_top *top, int c)
475 457
476 tcsetattr(0, TCSAFLUSH, &save); 458 tcsetattr(0, TCSAFLUSH, &save);
477 if (!perf_top__key_mapped(top, c)) 459 if (!perf_top__key_mapped(top, c))
478 return; 460 return ret;
479 } 461 }
480 462
481 switch (c) { 463 switch (c) {
@@ -537,7 +519,8 @@ static void perf_top__handle_keypress(struct perf_top *top, int c)
537 printf("exiting.\n"); 519 printf("exiting.\n");
538 if (top->dump_symtab) 520 if (top->dump_symtab)
539 perf_session__fprintf_dsos(top->session, stderr); 521 perf_session__fprintf_dsos(top->session, stderr);
540 exit(0); 522 ret = false;
523 break;
541 case 's': 524 case 's':
542 perf_top__prompt_symbol(top, "Enter details symbol"); 525 perf_top__prompt_symbol(top, "Enter details symbol");
543 break; 526 break;
@@ -560,6 +543,8 @@ static void perf_top__handle_keypress(struct perf_top *top, int c)
560 default: 543 default:
561 break; 544 break;
562 } 545 }
546
547 return ret;
563} 548}
564 549
565static void perf_top__sort_new_samples(void *arg) 550static void perf_top__sort_new_samples(void *arg)
@@ -596,13 +581,12 @@ static void *display_thread_tui(void *arg)
596 * via --uid. 581 * via --uid.
597 */ 582 */
598 list_for_each_entry(pos, &top->evlist->entries, node) 583 list_for_each_entry(pos, &top->evlist->entries, node)
599 pos->hists.uid_filter_str = top->target.uid_str; 584 pos->hists.uid_filter_str = top->record_opts.target.uid_str;
600 585
601 perf_evlist__tui_browse_hists(top->evlist, help, &hbt, 586 perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
602 &top->session->header.env); 587 &top->session->header.env);
603 588
604 exit_browser(0); 589 done = 1;
605 exit(0);
606 return NULL; 590 return NULL;
607} 591}
608 592
@@ -626,7 +610,7 @@ repeat:
626 /* trash return*/ 610 /* trash return*/
627 getc(stdin); 611 getc(stdin);
628 612
629 while (1) { 613 while (!done) {
630 perf_top__print_sym_table(top); 614 perf_top__print_sym_table(top);
631 /* 615 /*
632 * Either timeout expired or we got an EINTR due to SIGWINCH, 616 * Either timeout expired or we got an EINTR due to SIGWINCH,
@@ -640,15 +624,14 @@ repeat:
640 continue; 624 continue;
641 /* Fall trhu */ 625 /* Fall trhu */
642 default: 626 default:
643 goto process_hotkey; 627 c = getc(stdin);
628 tcsetattr(0, TCSAFLUSH, &save);
629
630 if (perf_top__handle_keypress(top, c))
631 goto repeat;
632 done = 1;
644 } 633 }
645 } 634 }
646process_hotkey:
647 c = getc(stdin);
648 tcsetattr(0, TCSAFLUSH, &save);
649
650 perf_top__handle_keypress(top, c);
651 goto repeat;
652 635
653 return NULL; 636 return NULL;
654} 637}
@@ -716,7 +699,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
716 static struct intlist *seen; 699 static struct intlist *seen;
717 700
718 if (!seen) 701 if (!seen)
719 seen = intlist__new(); 702 seen = intlist__new(NULL);
720 703
721 if (!intlist__has_entry(seen, event->ip.pid)) { 704 if (!intlist__has_entry(seen, event->ip.pid)) {
722 pr_err("Can't find guest [%d]'s kernel information\n", 705 pr_err("Can't find guest [%d]'s kernel information\n",
@@ -727,8 +710,8 @@ static void perf_event__process_sample(struct perf_tool *tool,
727 } 710 }
728 711
729 if (!machine) { 712 if (!machine) {
730 pr_err("%u unprocessable samples recorded.", 713 pr_err("%u unprocessable samples recorded.\r",
731 top->session->hists.stats.nr_unprocessable_samples++); 714 top->session->stats.nr_unprocessable_samples++);
732 return; 715 return;
733 } 716 }
734 717
@@ -847,13 +830,13 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
847 ++top->us_samples; 830 ++top->us_samples;
848 if (top->hide_user_symbols) 831 if (top->hide_user_symbols)
849 continue; 832 continue;
850 machine = perf_session__find_host_machine(session); 833 machine = &session->machines.host;
851 break; 834 break;
852 case PERF_RECORD_MISC_KERNEL: 835 case PERF_RECORD_MISC_KERNEL:
853 ++top->kernel_samples; 836 ++top->kernel_samples;
854 if (top->hide_kernel_symbols) 837 if (top->hide_kernel_symbols)
855 continue; 838 continue;
856 machine = perf_session__find_host_machine(session); 839 machine = &session->machines.host;
857 break; 840 break;
858 case PERF_RECORD_MISC_GUEST_KERNEL: 841 case PERF_RECORD_MISC_GUEST_KERNEL:
859 ++top->guest_kernel_samples; 842 ++top->guest_kernel_samples;
@@ -878,7 +861,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
878 hists__inc_nr_events(&evsel->hists, event->header.type); 861 hists__inc_nr_events(&evsel->hists, event->header.type);
879 machine__process_event(machine, event); 862 machine__process_event(machine, event);
880 } else 863 } else
881 ++session->hists.stats.nr_unknown_events; 864 ++session->stats.nr_unknown_events;
882 } 865 }
883} 866}
884 867
@@ -890,123 +873,42 @@ static void perf_top__mmap_read(struct perf_top *top)
890 perf_top__mmap_read_idx(top, i); 873 perf_top__mmap_read_idx(top, i);
891} 874}
892 875
893static void perf_top__start_counters(struct perf_top *top) 876static int perf_top__start_counters(struct perf_top *top)
894{ 877{
878 char msg[512];
895 struct perf_evsel *counter; 879 struct perf_evsel *counter;
896 struct perf_evlist *evlist = top->evlist; 880 struct perf_evlist *evlist = top->evlist;
881 struct perf_record_opts *opts = &top->record_opts;
897 882
898 if (top->group) 883 perf_evlist__config(evlist, opts);
899 perf_evlist__set_leader(evlist);
900 884
901 list_for_each_entry(counter, &evlist->entries, node) { 885 list_for_each_entry(counter, &evlist->entries, node) {
902 struct perf_event_attr *attr = &counter->attr;
903
904 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
905
906 if (top->freq) {
907 attr->sample_type |= PERF_SAMPLE_PERIOD;
908 attr->freq = 1;
909 attr->sample_freq = top->freq;
910 }
911
912 if (evlist->nr_entries > 1) {
913 attr->sample_type |= PERF_SAMPLE_ID;
914 attr->read_format |= PERF_FORMAT_ID;
915 }
916
917 if (perf_target__has_cpu(&top->target))
918 attr->sample_type |= PERF_SAMPLE_CPU;
919
920 if (symbol_conf.use_callchain)
921 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
922
923 attr->mmap = 1;
924 attr->comm = 1;
925 attr->inherit = top->inherit;
926fallback_missing_features:
927 if (top->exclude_guest_missing)
928 attr->exclude_guest = attr->exclude_host = 0;
929retry_sample_id:
930 attr->sample_id_all = top->sample_id_all_missing ? 0 : 1;
931try_again: 886try_again:
932 if (perf_evsel__open(counter, top->evlist->cpus, 887 if (perf_evsel__open(counter, top->evlist->cpus,
933 top->evlist->threads) < 0) { 888 top->evlist->threads) < 0) {
934 int err = errno; 889 if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
935
936 if (err == EPERM || err == EACCES) {
937 ui__error_paranoid();
938 goto out_err;
939 } else if (err == EINVAL) {
940 if (!top->exclude_guest_missing &&
941 (attr->exclude_guest || attr->exclude_host)) {
942 pr_debug("Old kernel, cannot exclude "
943 "guest or host samples.\n");
944 top->exclude_guest_missing = true;
945 goto fallback_missing_features;
946 } else if (!top->sample_id_all_missing) {
947 /*
948 * Old kernel, no attr->sample_id_type_all field
949 */
950 top->sample_id_all_missing = true;
951 goto retry_sample_id;
952 }
953 }
954 /*
955 * If it's cycles then fall back to hrtimer
956 * based cpu-clock-tick sw counter, which
957 * is always available even if no PMU support:
958 */
959 if ((err == ENOENT || err == ENXIO) &&
960 (attr->type == PERF_TYPE_HARDWARE) &&
961 (attr->config == PERF_COUNT_HW_CPU_CYCLES)) {
962
963 if (verbose) 890 if (verbose)
964 ui__warning("Cycles event not supported,\n" 891 ui__warning("%s\n", msg);
965 "trying to fall back to cpu-clock-ticks\n");
966
967 attr->type = PERF_TYPE_SOFTWARE;
968 attr->config = PERF_COUNT_SW_CPU_CLOCK;
969 if (counter->name) {
970 free(counter->name);
971 counter->name = NULL;
972 }
973 goto try_again; 892 goto try_again;
974 } 893 }
975 894
976 if (err == ENOENT) { 895 perf_evsel__open_strerror(counter, &opts->target,
977 ui__error("The %s event is not supported.\n", 896 errno, msg, sizeof(msg));
978 perf_evsel__name(counter)); 897 ui__error("%s\n", msg);
979 goto out_err;
980 } else if (err == EMFILE) {
981 ui__error("Too many events are opened.\n"
982 "Try again after reducing the number of events\n");
983 goto out_err;
984 } else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
985 ui__error("\'precise\' request may not be supported. "
986 "Try removing 'p' modifier\n");
987 goto out_err;
988 }
989
990 ui__error("The sys_perf_event_open() syscall "
991 "returned with %d (%s). /bin/dmesg "
992 "may provide additional information.\n"
993 "No CONFIG_PERF_EVENTS=y kernel support "
994 "configured?\n", err, strerror(err));
995 goto out_err; 898 goto out_err;
996 } 899 }
997 } 900 }
998 901
999 if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) { 902 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
1000 ui__error("Failed to mmap with %d (%s)\n", 903 ui__error("Failed to mmap with %d (%s)\n",
1001 errno, strerror(errno)); 904 errno, strerror(errno));
1002 goto out_err; 905 goto out_err;
1003 } 906 }
1004 907
1005 return; 908 return 0;
1006 909
1007out_err: 910out_err:
1008 exit_browser(0); 911 return -1;
1009 exit(0);
1010} 912}
1011 913
1012static int perf_top__setup_sample_type(struct perf_top *top) 914static int perf_top__setup_sample_type(struct perf_top *top)
@@ -1016,7 +918,7 @@ static int perf_top__setup_sample_type(struct perf_top *top)
1016 ui__error("Selected -g but \"sym\" not present in --sort/-s."); 918 ui__error("Selected -g but \"sym\" not present in --sort/-s.");
1017 return -EINVAL; 919 return -EINVAL;
1018 } 920 }
1019 } else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) { 921 } else if (callchain_param.mode != CHAIN_NONE) {
1020 if (callchain_register_param(&callchain_param) < 0) { 922 if (callchain_register_param(&callchain_param) < 0) {
1021 ui__error("Can't register callchain params.\n"); 923 ui__error("Can't register callchain params.\n");
1022 return -EINVAL; 924 return -EINVAL;
@@ -1028,6 +930,7 @@ static int perf_top__setup_sample_type(struct perf_top *top)
1028 930
1029static int __cmd_top(struct perf_top *top) 931static int __cmd_top(struct perf_top *top)
1030{ 932{
933 struct perf_record_opts *opts = &top->record_opts;
1031 pthread_t thread; 934 pthread_t thread;
1032 int ret; 935 int ret;
1033 /* 936 /*
@@ -1042,26 +945,42 @@ static int __cmd_top(struct perf_top *top)
1042 if (ret) 945 if (ret)
1043 goto out_delete; 946 goto out_delete;
1044 947
1045 if (perf_target__has_task(&top->target)) 948 if (perf_target__has_task(&opts->target))
1046 perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, 949 perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
1047 perf_event__process, 950 perf_event__process,
1048 &top->session->host_machine); 951 &top->session->machines.host);
1049 else 952 else
1050 perf_event__synthesize_threads(&top->tool, perf_event__process, 953 perf_event__synthesize_threads(&top->tool, perf_event__process,
1051 &top->session->host_machine); 954 &top->session->machines.host);
1052 perf_top__start_counters(top); 955
956 ret = perf_top__start_counters(top);
957 if (ret)
958 goto out_delete;
959
1053 top->session->evlist = top->evlist; 960 top->session->evlist = top->evlist;
1054 perf_session__set_id_hdr_size(top->session); 961 perf_session__set_id_hdr_size(top->session);
1055 962
963 /*
964 * When perf is starting the traced process, all the events (apart from
965 * group members) have enable_on_exec=1 set, so don't spoil it by
966 * prematurely enabling them.
967 *
968 * XXX 'top' still doesn't start workloads like record, trace, but should,
969 * so leave the check here.
970 */
971 if (!perf_target__none(&opts->target))
972 perf_evlist__enable(top->evlist);
973
1056 /* Wait for a minimal set of events before starting the snapshot */ 974 /* Wait for a minimal set of events before starting the snapshot */
1057 poll(top->evlist->pollfd, top->evlist->nr_fds, 100); 975 poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1058 976
1059 perf_top__mmap_read(top); 977 perf_top__mmap_read(top);
1060 978
979 ret = -1;
1061 if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui : 980 if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
1062 display_thread), top)) { 981 display_thread), top)) {
1063 ui__error("Could not create display thread.\n"); 982 ui__error("Could not create display thread.\n");
1064 exit(-1); 983 goto out_delete;
1065 } 984 }
1066 985
1067 if (top->realtime_prio) { 986 if (top->realtime_prio) {
@@ -1070,11 +989,11 @@ static int __cmd_top(struct perf_top *top)
1070 param.sched_priority = top->realtime_prio; 989 param.sched_priority = top->realtime_prio;
1071 if (sched_setscheduler(0, SCHED_FIFO, &param)) { 990 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1072 ui__error("Could not set realtime priority.\n"); 991 ui__error("Could not set realtime priority.\n");
1073 exit(-1); 992 goto out_delete;
1074 } 993 }
1075 } 994 }
1076 995
1077 while (1) { 996 while (!done) {
1078 u64 hits = top->samples; 997 u64 hits = top->samples;
1079 998
1080 perf_top__mmap_read(top); 999 perf_top__mmap_read(top);
@@ -1083,126 +1002,67 @@ static int __cmd_top(struct perf_top *top)
1083 ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100); 1002 ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1084 } 1003 }
1085 1004
1005 ret = 0;
1086out_delete: 1006out_delete:
1087 perf_session__delete(top->session); 1007 perf_session__delete(top->session);
1088 top->session = NULL; 1008 top->session = NULL;
1089 1009
1090 return 0; 1010 return ret;
1091} 1011}
1092 1012
1093static int 1013static int
1094parse_callchain_opt(const struct option *opt, const char *arg, int unset) 1014parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1095{ 1015{
1096 struct perf_top *top = (struct perf_top *)opt->value;
1097 char *tok, *tok2;
1098 char *endptr;
1099
1100 /* 1016 /*
1101 * --no-call-graph 1017 * --no-call-graph
1102 */ 1018 */
1103 if (unset) { 1019 if (unset)
1104 top->dont_use_callchains = true;
1105 return 0; 1020 return 0;
1106 }
1107 1021
1108 symbol_conf.use_callchain = true; 1022 symbol_conf.use_callchain = true;
1109 1023
1110 if (!arg) 1024 return record_parse_callchain_opt(opt, arg, unset);
1111 return 0;
1112
1113 tok = strtok((char *)arg, ",");
1114 if (!tok)
1115 return -1;
1116
1117 /* get the output mode */
1118 if (!strncmp(tok, "graph", strlen(arg)))
1119 callchain_param.mode = CHAIN_GRAPH_ABS;
1120
1121 else if (!strncmp(tok, "flat", strlen(arg)))
1122 callchain_param.mode = CHAIN_FLAT;
1123
1124 else if (!strncmp(tok, "fractal", strlen(arg)))
1125 callchain_param.mode = CHAIN_GRAPH_REL;
1126
1127 else if (!strncmp(tok, "none", strlen(arg))) {
1128 callchain_param.mode = CHAIN_NONE;
1129 symbol_conf.use_callchain = false;
1130
1131 return 0;
1132 } else
1133 return -1;
1134
1135 /* get the min percentage */
1136 tok = strtok(NULL, ",");
1137 if (!tok)
1138 goto setup;
1139
1140 callchain_param.min_percent = strtod(tok, &endptr);
1141 if (tok == endptr)
1142 return -1;
1143
1144 /* get the print limit */
1145 tok2 = strtok(NULL, ",");
1146 if (!tok2)
1147 goto setup;
1148
1149 if (tok2[0] != 'c') {
1150 callchain_param.print_limit = strtod(tok2, &endptr);
1151 tok2 = strtok(NULL, ",");
1152 if (!tok2)
1153 goto setup;
1154 }
1155
1156 /* get the call chain order */
1157 if (!strcmp(tok2, "caller"))
1158 callchain_param.order = ORDER_CALLER;
1159 else if (!strcmp(tok2, "callee"))
1160 callchain_param.order = ORDER_CALLEE;
1161 else
1162 return -1;
1163setup:
1164 if (callchain_register_param(&callchain_param) < 0) {
1165 fprintf(stderr, "Can't register callchain params\n");
1166 return -1;
1167 }
1168 return 0;
1169} 1025}
1170 1026
1171int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) 1027int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1172{ 1028{
1173 struct perf_evsel *pos;
1174 int status; 1029 int status;
1175 char errbuf[BUFSIZ]; 1030 char errbuf[BUFSIZ];
1176 struct perf_top top = { 1031 struct perf_top top = {
1177 .count_filter = 5, 1032 .count_filter = 5,
1178 .delay_secs = 2, 1033 .delay_secs = 2,
1179 .freq = 4000, /* 4 KHz */ 1034 .record_opts = {
1180 .mmap_pages = 128, 1035 .mmap_pages = UINT_MAX,
1181 .sym_pcnt_filter = 5, 1036 .user_freq = UINT_MAX,
1182 .target = { 1037 .user_interval = ULLONG_MAX,
1183 .uses_mmap = true, 1038 .freq = 4000, /* 4 KHz */
1039 .target = {
1040 .uses_mmap = true,
1041 },
1184 }, 1042 },
1043 .sym_pcnt_filter = 5,
1185 }; 1044 };
1186 char callchain_default_opt[] = "fractal,0.5,callee"; 1045 struct perf_record_opts *opts = &top.record_opts;
1046 struct perf_target *target = &opts->target;
1187 const struct option options[] = { 1047 const struct option options[] = {
1188 OPT_CALLBACK('e', "event", &top.evlist, "event", 1048 OPT_CALLBACK('e', "event", &top.evlist, "event",
1189 "event selector. use 'perf list' to list available events", 1049 "event selector. use 'perf list' to list available events",
1190 parse_events_option), 1050 parse_events_option),
1191 OPT_INTEGER('c', "count", &top.default_interval, 1051 OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
1192 "event period to sample"), 1052 OPT_STRING('p', "pid", &target->pid, "pid",
1193 OPT_STRING('p', "pid", &top.target.pid, "pid",
1194 "profile events on existing process id"), 1053 "profile events on existing process id"),
1195 OPT_STRING('t', "tid", &top.target.tid, "tid", 1054 OPT_STRING('t', "tid", &target->tid, "tid",
1196 "profile events on existing thread id"), 1055 "profile events on existing thread id"),
1197 OPT_BOOLEAN('a', "all-cpus", &top.target.system_wide, 1056 OPT_BOOLEAN('a', "all-cpus", &target->system_wide,
1198 "system-wide collection from all CPUs"), 1057 "system-wide collection from all CPUs"),
1199 OPT_STRING('C', "cpu", &top.target.cpu_list, "cpu", 1058 OPT_STRING('C', "cpu", &target->cpu_list, "cpu",
1200 "list of cpus to monitor"), 1059 "list of cpus to monitor"),
1201 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 1060 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
1202 "file", "vmlinux pathname"), 1061 "file", "vmlinux pathname"),
1203 OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols, 1062 OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1204 "hide kernel symbols"), 1063 "hide kernel symbols"),
1205 OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"), 1064 OPT_UINTEGER('m', "mmap-pages", &opts->mmap_pages,
1065 "number of mmap data pages"),
1206 OPT_INTEGER('r', "realtime", &top.realtime_prio, 1066 OPT_INTEGER('r', "realtime", &top.realtime_prio,
1207 "collect data with this RT SCHED_FIFO priority"), 1067 "collect data with this RT SCHED_FIFO priority"),
1208 OPT_INTEGER('d', "delay", &top.delay_secs, 1068 OPT_INTEGER('d', "delay", &top.delay_secs,
@@ -1211,16 +1071,14 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1211 "dump the symbol table used for profiling"), 1071 "dump the symbol table used for profiling"),
1212 OPT_INTEGER('f', "count-filter", &top.count_filter, 1072 OPT_INTEGER('f', "count-filter", &top.count_filter,
1213 "only display functions with more events than this"), 1073 "only display functions with more events than this"),
1214 OPT_BOOLEAN('g', "group", &top.group, 1074 OPT_BOOLEAN('g', "group", &opts->group,
1215 "put the counters into a counter group"), 1075 "put the counters into a counter group"),
1216 OPT_BOOLEAN('i', "inherit", &top.inherit, 1076 OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit,
1217 "child tasks inherit counters"), 1077 "child tasks do not inherit counters"),
1218 OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name", 1078 OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1219 "symbol to annotate"), 1079 "symbol to annotate"),
1220 OPT_BOOLEAN('z', "zero", &top.zero, 1080 OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"),
1221 "zero history across updates"), 1081 OPT_UINTEGER('F', "freq", &opts->user_freq, "profile at this frequency"),
1222 OPT_INTEGER('F', "freq", &top.freq,
1223 "profile at this frequency"),
1224 OPT_INTEGER('E', "entries", &top.print_entries, 1082 OPT_INTEGER('E', "entries", &top.print_entries,
1225 "display this many functions"), 1083 "display this many functions"),
1226 OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols, 1084 OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
@@ -1233,10 +1091,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1233 "sort by key(s): pid, comm, dso, symbol, parent"), 1091 "sort by key(s): pid, comm, dso, symbol, parent"),
1234 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, 1092 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
1235 "Show a column with the number of samples"), 1093 "Show a column with the number of samples"),
1236 OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order", 1094 OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
1237 "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. " 1095 "mode[,dump_size]", record_callchain_help,
1238 "Default: fractal,0.5,callee", &parse_callchain_opt, 1096 &parse_callchain_opt, "fp"),
1239 callchain_default_opt),
1240 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, 1097 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
1241 "Show a column with the sum of periods"), 1098 "Show a column with the sum of periods"),
1242 OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", 1099 OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
@@ -1251,7 +1108,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1251 "Display raw encoding of assembly instructions (default)"), 1108 "Display raw encoding of assembly instructions (default)"),
1252 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 1109 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
1253 "Specify disassembler style (e.g. -M intel for intel syntax)"), 1110 "Specify disassembler style (e.g. -M intel for intel syntax)"),
1254 OPT_STRING('u', "uid", &top.target.uid_str, "user", "user to profile"), 1111 OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
1255 OPT_END() 1112 OPT_END()
1256 }; 1113 };
1257 const char * const top_usage[] = { 1114 const char * const top_usage[] = {
@@ -1272,7 +1129,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1272 if (sort_order == default_sort_order) 1129 if (sort_order == default_sort_order)
1273 sort_order = "dso,symbol"; 1130 sort_order = "dso,symbol";
1274 1131
1275 setup_sorting(top_usage, options); 1132 if (setup_sorting() < 0)
1133 usage_with_options(top_usage, options);
1276 1134
1277 if (top.use_stdio) 1135 if (top.use_stdio)
1278 use_browser = 0; 1136 use_browser = 0;
@@ -1281,33 +1139,33 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1281 1139
1282 setup_browser(false); 1140 setup_browser(false);
1283 1141
1284 status = perf_target__validate(&top.target); 1142 status = perf_target__validate(target);
1285 if (status) { 1143 if (status) {
1286 perf_target__strerror(&top.target, status, errbuf, BUFSIZ); 1144 perf_target__strerror(target, status, errbuf, BUFSIZ);
1287 ui__warning("%s", errbuf); 1145 ui__warning("%s", errbuf);
1288 } 1146 }
1289 1147
1290 status = perf_target__parse_uid(&top.target); 1148 status = perf_target__parse_uid(target);
1291 if (status) { 1149 if (status) {
1292 int saved_errno = errno; 1150 int saved_errno = errno;
1293 1151
1294 perf_target__strerror(&top.target, status, errbuf, BUFSIZ); 1152 perf_target__strerror(target, status, errbuf, BUFSIZ);
1295 ui__error("%s", errbuf); 1153 ui__error("%s", errbuf);
1296 1154
1297 status = -saved_errno; 1155 status = -saved_errno;
1298 goto out_delete_evlist; 1156 goto out_delete_evlist;
1299 } 1157 }
1300 1158
1301 if (perf_target__none(&top.target)) 1159 if (perf_target__none(target))
1302 top.target.system_wide = true; 1160 target->system_wide = true;
1303 1161
1304 if (perf_evlist__create_maps(top.evlist, &top.target) < 0) 1162 if (perf_evlist__create_maps(top.evlist, target) < 0)
1305 usage_with_options(top_usage, options); 1163 usage_with_options(top_usage, options);
1306 1164
1307 if (!top.evlist->nr_entries && 1165 if (!top.evlist->nr_entries &&
1308 perf_evlist__add_default(top.evlist) < 0) { 1166 perf_evlist__add_default(top.evlist) < 0) {
1309 ui__error("Not enough memory for event selector list\n"); 1167 ui__error("Not enough memory for event selector list\n");
1310 return -ENOMEM; 1168 goto out_delete_maps;
1311 } 1169 }
1312 1170
1313 symbol_conf.nr_events = top.evlist->nr_entries; 1171 symbol_conf.nr_events = top.evlist->nr_entries;
@@ -1315,24 +1173,22 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1315 if (top.delay_secs < 1) 1173 if (top.delay_secs < 1)
1316 top.delay_secs = 1; 1174 top.delay_secs = 1;
1317 1175
1176 if (opts->user_interval != ULLONG_MAX)
1177 opts->default_interval = opts->user_interval;
1178 if (opts->user_freq != UINT_MAX)
1179 opts->freq = opts->user_freq;
1180
1318 /* 1181 /*
1319 * User specified count overrides default frequency. 1182 * User specified count overrides default frequency.
1320 */ 1183 */
1321 if (top.default_interval) 1184 if (opts->default_interval)
1322 top.freq = 0; 1185 opts->freq = 0;
1323 else if (top.freq) { 1186 else if (opts->freq) {
1324 top.default_interval = top.freq; 1187 opts->default_interval = opts->freq;
1325 } else { 1188 } else {
1326 ui__error("frequency and count are zero, aborting\n"); 1189 ui__error("frequency and count are zero, aborting\n");
1327 exit(EXIT_FAILURE); 1190 status = -EINVAL;
1328 } 1191 goto out_delete_maps;
1329
1330 list_for_each_entry(pos, &top.evlist->entries, node) {
1331 /*
1332 * Fill in the ones not specifically initialized via -c:
1333 */
1334 if (!pos->attr.sample_period)
1335 pos->attr.sample_period = top.default_interval;
1336 } 1192 }
1337 1193
1338 top.sym_evsel = perf_evlist__first(top.evlist); 1194 top.sym_evsel = perf_evlist__first(top.evlist);
@@ -1365,6 +1221,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1365 1221
1366 status = __cmd_top(&top); 1222 status = __cmd_top(&top);
1367 1223
1224out_delete_maps:
1225 perf_evlist__delete_maps(top.evlist);
1368out_delete_evlist: 1226out_delete_evlist:
1369 perf_evlist__delete(top.evlist); 1227 perf_evlist__delete(top.evlist);
1370 1228
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 7932ffa29889..d222d7fc7e96 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -455,7 +455,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
455 goto out_delete_evlist; 455 goto out_delete_evlist;
456 } 456 }
457 457
458 perf_evlist__config_attrs(evlist, &trace->opts); 458 perf_evlist__config(evlist, &trace->opts);
459 459
460 signal(SIGCHLD, sig_handler); 460 signal(SIGCHLD, sig_handler);
461 signal(SIGINT, sig_handler); 461 signal(SIGINT, sig_handler);
diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak
index f5ac77485a4f..b4eabb44e381 100644
--- a/tools/perf/config/feature-tests.mak
+++ b/tools/perf/config/feature-tests.mak
@@ -225,3 +225,14 @@ int main(void)
225 return on_exit(NULL, NULL); 225 return on_exit(NULL, NULL);
226} 226}
227endef 227endef
228
229define SOURCE_LIBNUMA
230#include <numa.h>
231#include <numaif.h>
232
233int main(void)
234{
235 numa_available();
236 return 0;
237}
238endef \ No newline at end of file
diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak
index e5413125e6bb..8ef3bd30a549 100644
--- a/tools/perf/config/utilities.mak
+++ b/tools/perf/config/utilities.mak
@@ -13,7 +13,7 @@ newline := $(newline)
13# what should replace a newline when escaping 13# what should replace a newline when escaping
14# newlines; the default is a bizarre string. 14# newlines; the default is a bizarre string.
15# 15#
16nl-escape = $(or $(1),m822df3020w6a44id34bt574ctac44eb9f4n) 16nl-escape = $(if $(1),$(1),m822df3020w6a44id34bt574ctac44eb9f4n)
17 17
18# escape-nl 18# escape-nl
19# 19#
@@ -173,9 +173,9 @@ _ge-abspath = $(if $(is-executable),$(1))
173# Usage: absolute-executable-path-or-empty = $(call get-executable-or-default,variable,default) 173# Usage: absolute-executable-path-or-empty = $(call get-executable-or-default,variable,default)
174# 174#
175define get-executable-or-default 175define get-executable-or-default
176$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2))) 176$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2),$(1)))
177endef 177endef
178_ge_attempt = $(or $(get-executable),$(_gea_warn),$(call _gea_err,$(2))) 178_ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2)))
179_gea_warn = $(warning The path '$(1)' is not executable.) 179_gea_warn = $(warning The path '$(1)' is not executable.)
180_gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) 180_gea_err = $(if $(1),$(error Please set '$(1)' appropriately))
181 181
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 0f661fbce6a8..095b88207cd3 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -328,14 +328,23 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
328 if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode)) 328 if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode))
329 return 0; 329 return 0;
330 330
331 status = 1;
331 /* Check for ENOSPC and EIO errors.. */ 332 /* Check for ENOSPC and EIO errors.. */
332 if (fflush(stdout)) 333 if (fflush(stdout)) {
333 die("write failure on standard output: %s", strerror(errno)); 334 fprintf(stderr, "write failure on standard output: %s", strerror(errno));
334 if (ferror(stdout)) 335 goto out;
335 die("unknown write failure on standard output"); 336 }
336 if (fclose(stdout)) 337 if (ferror(stdout)) {
337 die("close failed on standard output: %s", strerror(errno)); 338 fprintf(stderr, "unknown write failure on standard output");
338 return 0; 339 goto out;
340 }
341 if (fclose(stdout)) {
342 fprintf(stderr, "close failed on standard output: %s", strerror(errno));
343 goto out;
344 }
345 status = 0;
346out:
347 return status;
339} 348}
340 349
341static void handle_internal_command(int argc, const char **argv) 350static void handle_internal_command(int argc, const char **argv)
@@ -467,7 +476,8 @@ int main(int argc, const char **argv)
467 cmd += 5; 476 cmd += 5;
468 argv[0] = cmd; 477 argv[0] = cmd;
469 handle_internal_command(argc, argv); 478 handle_internal_command(argc, argv);
470 die("cannot handle %s internally", cmd); 479 fprintf(stderr, "cannot handle %s internally", cmd);
480 goto out;
471 } 481 }
472 482
473 /* Look for flags.. */ 483 /* Look for flags.. */
@@ -485,7 +495,7 @@ int main(int argc, const char **argv)
485 printf("\n usage: %s\n\n", perf_usage_string); 495 printf("\n usage: %s\n\n", perf_usage_string);
486 list_common_cmds_help(); 496 list_common_cmds_help();
487 printf("\n %s\n\n", perf_more_info_string); 497 printf("\n %s\n\n", perf_more_info_string);
488 exit(1); 498 goto out;
489 } 499 }
490 cmd = argv[0]; 500 cmd = argv[0];
491 501
@@ -517,7 +527,7 @@ int main(int argc, const char **argv)
517 fprintf(stderr, "Expansion of alias '%s' failed; " 527 fprintf(stderr, "Expansion of alias '%s' failed; "
518 "'%s' is not a perf-command\n", 528 "'%s' is not a perf-command\n",
519 cmd, argv[0]); 529 cmd, argv[0]);
520 exit(1); 530 goto out;
521 } 531 }
522 if (!done_help) { 532 if (!done_help) {
523 cmd = argv[0] = help_unknown_cmd(cmd); 533 cmd = argv[0] = help_unknown_cmd(cmd);
@@ -528,6 +538,6 @@ int main(int argc, const char **argv)
528 538
529 fprintf(stderr, "Failed to run command '%s': %s\n", 539 fprintf(stderr, "Failed to run command '%s': %s\n",
530 cmd, strerror(errno)); 540 cmd, strerror(errno));
531 541out:
532 return 1; 542 return 1;
533} 543}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 2c340e7da458..c2206c87fc9f 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -1,10 +1,6 @@
1#ifndef _PERF_PERF_H 1#ifndef _PERF_PERF_H
2#define _PERF_PERF_H 2#define _PERF_PERF_H
3 3
4struct winsize;
5
6void get_term_dimensions(struct winsize *ws);
7
8#include <asm/unistd.h> 4#include <asm/unistd.h>
9 5
10#if defined(__i386__) 6#if defined(__i386__)
@@ -107,32 +103,6 @@ void get_term_dimensions(struct winsize *ws);
107#include "util/types.h" 103#include "util/types.h"
108#include <stdbool.h> 104#include <stdbool.h>
109 105
110struct perf_mmap {
111 void *base;
112 int mask;
113 unsigned int prev;
114};
115
116static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
117{
118 struct perf_event_mmap_page *pc = mm->base;
119 int head = pc->data_head;
120 rmb();
121 return head;
122}
123
124static inline void perf_mmap__write_tail(struct perf_mmap *md,
125 unsigned long tail)
126{
127 struct perf_event_mmap_page *pc = md->base;
128
129 /*
130 * ensure all reads are done before we write the tail out.
131 */
132 /* mb(); */
133 pc->data_tail = tail;
134}
135
136/* 106/*
137 * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all 107 * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
138 * counters in the current task. 108 * counters in the current task.
@@ -237,8 +207,6 @@ struct perf_record_opts {
237 bool raw_samples; 207 bool raw_samples;
238 bool sample_address; 208 bool sample_address;
239 bool sample_time; 209 bool sample_time;
240 bool sample_id_all_missing;
241 bool exclude_guest_missing;
242 bool period; 210 bool period;
243 unsigned int freq; 211 unsigned int freq;
244 unsigned int mmap_pages; 212 unsigned int mmap_pages;
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-record b/tools/perf/scripts/perl/bin/workqueue-stats-record
deleted file mode 100644
index 8edda9078d5d..000000000000
--- a/tools/perf/scripts/perl/bin/workqueue-stats-record
+++ /dev/null
@@ -1,2 +0,0 @@
1#!/bin/bash
2perf record -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report
deleted file mode 100644
index 6d91411d248c..000000000000
--- a/tools/perf/scripts/perl/bin/workqueue-stats-report
+++ /dev/null
@@ -1,3 +0,0 @@
1#!/bin/bash
2# description: workqueue stats (ins/exe/create/destroy)
3perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
diff --git a/tools/perf/scripts/perl/rwtop.pl b/tools/perf/scripts/perl/rwtop.pl
index 4bb3ecd33472..8b20787021c1 100644
--- a/tools/perf/scripts/perl/rwtop.pl
+++ b/tools/perf/scripts/perl/rwtop.pl
@@ -17,6 +17,7 @@ use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
17use lib "./Perf-Trace-Util/lib"; 17use lib "./Perf-Trace-Util/lib";
18use Perf::Trace::Core; 18use Perf::Trace::Core;
19use Perf::Trace::Util; 19use Perf::Trace::Util;
20use POSIX qw/SIGALRM SA_RESTART/;
20 21
21my $default_interval = 3; 22my $default_interval = 3;
22my $nlines = 20; 23my $nlines = 20;
@@ -90,7 +91,10 @@ sub syscalls::sys_enter_write
90 91
91sub trace_begin 92sub trace_begin
92{ 93{
93 $SIG{ALRM} = \&set_print_pending; 94 my $sa = POSIX::SigAction->new(\&set_print_pending);
95 $sa->flags(SA_RESTART);
96 $sa->safe(1);
97 POSIX::sigaction(SIGALRM, $sa) or die "Can't set SIGALRM handler: $!\n";
94 alarm 1; 98 alarm 1;
95} 99}
96 100
diff --git a/tools/perf/scripts/perl/workqueue-stats.pl b/tools/perf/scripts/perl/workqueue-stats.pl
deleted file mode 100644
index a8eaff5119e0..000000000000
--- a/tools/perf/scripts/perl/workqueue-stats.pl
+++ /dev/null
@@ -1,129 +0,0 @@
1#!/usr/bin/perl -w
2# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2
4
5# Displays workqueue stats
6#
7# Usage:
8#
9# perf record -c 1 -f -a -R -e workqueue:workqueue_creation -e
10# workqueue:workqueue_destruction -e workqueue:workqueue_execution
11# -e workqueue:workqueue_insertion
12#
13# perf script -p -s tools/perf/scripts/perl/workqueue-stats.pl
14
15use 5.010000;
16use strict;
17use warnings;
18
19use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
20use lib "./Perf-Trace-Util/lib";
21use Perf::Trace::Core;
22use Perf::Trace::Util;
23
24my @cpus;
25
26sub workqueue::workqueue_destruction
27{
28 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
29 $common_pid, $common_comm,
30 $thread_comm, $thread_pid) = @_;
31
32 $cpus[$common_cpu]{$thread_pid}{destroyed}++;
33 $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
34}
35
36sub workqueue::workqueue_creation
37{
38 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
39 $common_pid, $common_comm,
40 $thread_comm, $thread_pid, $cpu) = @_;
41
42 $cpus[$common_cpu]{$thread_pid}{created}++;
43 $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
44}
45
46sub workqueue::workqueue_execution
47{
48 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
49 $common_pid, $common_comm,
50 $thread_comm, $thread_pid, $func) = @_;
51
52 $cpus[$common_cpu]{$thread_pid}{executed}++;
53 $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
54}
55
56sub workqueue::workqueue_insertion
57{
58 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
59 $common_pid, $common_comm,
60 $thread_comm, $thread_pid, $func) = @_;
61
62 $cpus[$common_cpu]{$thread_pid}{inserted}++;
63 $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm;
64}
65
66sub trace_end
67{
68 print "workqueue work stats:\n\n";
69 my $cpu = 0;
70 printf("%3s %6s %6s\t%-20s\n", "cpu", "ins", "exec", "name");
71 printf("%3s %6s %6s\t%-20s\n", "---", "---", "----", "----");
72 foreach my $pidhash (@cpus) {
73 while ((my $pid, my $wqhash) = each %$pidhash) {
74 my $ins = $$wqhash{'inserted'} || 0;
75 my $exe = $$wqhash{'executed'} || 0;
76 my $comm = $$wqhash{'comm'} || "";
77 if ($ins || $exe) {
78 printf("%3u %6u %6u\t%-20s\n", $cpu, $ins, $exe, $comm);
79 }
80 }
81 $cpu++;
82 }
83
84 $cpu = 0;
85 print "\nworkqueue lifecycle stats:\n\n";
86 printf("%3s %6s %6s\t%-20s\n", "cpu", "created", "destroyed", "name");
87 printf("%3s %6s %6s\t%-20s\n", "---", "-------", "---------", "----");
88 foreach my $pidhash (@cpus) {
89 while ((my $pid, my $wqhash) = each %$pidhash) {
90 my $created = $$wqhash{'created'} || 0;
91 my $destroyed = $$wqhash{'destroyed'} || 0;
92 my $comm = $$wqhash{'comm'} || "";
93 if ($created || $destroyed) {
94 printf("%3u %6u %6u\t%-20s\n", $cpu, $created, $destroyed,
95 $comm);
96 }
97 }
98 $cpu++;
99 }
100
101 print_unhandled();
102}
103
104my %unhandled;
105
106sub print_unhandled
107{
108 if ((scalar keys %unhandled) == 0) {
109 return;
110 }
111
112 print "\nunhandled events:\n\n";
113
114 printf("%-40s %10s\n", "event", "count");
115 printf("%-40s %10s\n", "----------------------------------------",
116 "-----------");
117
118 foreach my $event_name (keys %unhandled) {
119 printf("%-40s %10d\n", $event_name, $unhandled{$event_name});
120 }
121}
122
123sub trace_unhandled
124{
125 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
126 $common_pid, $common_comm) = @_;
127
128 $unhandled{$event_name}++;
129}
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 25638a986257..bdcceb886f77 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -19,6 +19,11 @@
19 * permissions. All the event text files are stored there. 19 * permissions. All the event text files are stored there.
20 */ 20 */
21 21
22/*
23 * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
24 * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
25 */
26#define __SANE_USERSPACE_TYPES__
22#include <stdlib.h> 27#include <stdlib.h>
23#include <stdio.h> 28#include <stdio.h>
24#include <inttypes.h> 29#include <inttypes.h>
@@ -33,8 +38,6 @@
33 38
34extern int verbose; 39extern int verbose;
35 40
36bool test_attr__enabled;
37
38static char *dir; 41static char *dir;
39 42
40void test_attr__init(void) 43void test_attr__init(void)
@@ -146,7 +149,7 @@ static int run_dir(const char *d, const char *perf)
146{ 149{
147 char cmd[3*PATH_MAX]; 150 char cmd[3*PATH_MAX];
148 151
149 snprintf(cmd, 3*PATH_MAX, "python %s/attr.py -d %s/attr/ -p %s %s", 152 snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %s",
150 d, d, perf, verbose ? "-v" : ""); 153 d, d, perf, verbose ? "-v" : "");
151 154
152 return system(cmd); 155 return system(cmd);
diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index e702b82dcb86..2f629ca485bc 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py
@@ -68,7 +68,7 @@ class Event(dict):
68 self[key] = val 68 self[key] = val
69 69
70 def __init__(self, name, data, base): 70 def __init__(self, name, data, base):
71 log.info(" Event %s" % name); 71 log.debug(" Event %s" % name);
72 self.name = name; 72 self.name = name;
73 self.group = '' 73 self.group = ''
74 self.add(base) 74 self.add(base)
@@ -97,6 +97,14 @@ class Event(dict):
97 return False 97 return False
98 return True 98 return True
99 99
100 def diff(self, other):
101 for t in Event.terms:
102 if not self.has_key(t) or not other.has_key(t):
103 continue
104 if not self.compare_data(self[t], other[t]):
105 log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
106
107
100# Test file description needs to have following sections: 108# Test file description needs to have following sections:
101# [config] 109# [config]
102# - just single instance in file 110# - just single instance in file
@@ -113,7 +121,7 @@ class Test(object):
113 parser = ConfigParser.SafeConfigParser() 121 parser = ConfigParser.SafeConfigParser()
114 parser.read(path) 122 parser.read(path)
115 123
116 log.warning("running '%s'" % path) 124 log.debug("running '%s'" % path)
117 125
118 self.path = path 126 self.path = path
119 self.test_dir = options.test_dir 127 self.test_dir = options.test_dir
@@ -128,7 +136,7 @@ class Test(object):
128 136
129 self.expect = {} 137 self.expect = {}
130 self.result = {} 138 self.result = {}
131 log.info(" loading expected events"); 139 log.debug(" loading expected events");
132 self.load_events(path, self.expect) 140 self.load_events(path, self.expect)
133 141
134 def is_event(self, name): 142 def is_event(self, name):
@@ -164,7 +172,7 @@ class Test(object):
164 self.perf, self.command, tempdir, self.args) 172 self.perf, self.command, tempdir, self.args)
165 ret = os.WEXITSTATUS(os.system(cmd)) 173 ret = os.WEXITSTATUS(os.system(cmd))
166 174
167 log.info(" running '%s' ret %d " % (cmd, ret)) 175 log.warning(" running '%s' ret %d " % (cmd, ret))
168 176
169 if ret != int(self.ret): 177 if ret != int(self.ret):
170 raise Unsup(self) 178 raise Unsup(self)
@@ -172,7 +180,7 @@ class Test(object):
172 def compare(self, expect, result): 180 def compare(self, expect, result):
173 match = {} 181 match = {}
174 182
175 log.info(" compare"); 183 log.debug(" compare");
176 184
177 # For each expected event find all matching 185 # For each expected event find all matching
178 # events in result. Fail if there's not any. 186 # events in result. Fail if there's not any.
@@ -187,10 +195,11 @@ class Test(object):
187 else: 195 else:
188 log.debug(" ->FAIL"); 196 log.debug(" ->FAIL");
189 197
190 log.info(" match: [%s] matches %s" % (exp_name, str(exp_list))) 198 log.debug(" match: [%s] matches %s" % (exp_name, str(exp_list)))
191 199
192 # we did not any matching event - fail 200 # we did not any matching event - fail
193 if (not exp_list): 201 if (not exp_list):
202 exp_event.diff(res_event)
194 raise Fail(self, 'match failure'); 203 raise Fail(self, 'match failure');
195 204
196 match[exp_name] = exp_list 205 match[exp_name] = exp_list
@@ -208,10 +217,10 @@ class Test(object):
208 if res_group not in match[group]: 217 if res_group not in match[group]:
209 raise Fail(self, 'group failure') 218 raise Fail(self, 'group failure')
210 219
211 log.info(" group: [%s] matches group leader %s" % 220 log.debug(" group: [%s] matches group leader %s" %
212 (exp_name, str(match[group]))) 221 (exp_name, str(match[group])))
213 222
214 log.info(" matched") 223 log.debug(" matched")
215 224
216 def resolve_groups(self, events): 225 def resolve_groups(self, events):
217 for name, event in events.items(): 226 for name, event in events.items():
@@ -233,7 +242,7 @@ class Test(object):
233 self.run_cmd(tempdir); 242 self.run_cmd(tempdir);
234 243
235 # load events expectation for the test 244 # load events expectation for the test
236 log.info(" loading result events"); 245 log.debug(" loading result events");
237 for f in glob.glob(tempdir + '/event*'): 246 for f in glob.glob(tempdir + '/event*'):
238 self.load_events(f, self.result); 247 self.load_events(f, self.result);
239 248
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index f1485d8e6a0b..5bc3880f7be5 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -7,7 +7,7 @@ size=96
7config=0 7config=0
8sample_period=4000 8sample_period=4000
9sample_type=263 9sample_type=263
10read_format=7 10read_format=0
11disabled=1 11disabled=1
12inherit=1 12inherit=1
13pinned=0 13pinned=0
diff --git a/tools/perf/tests/attr/test-record-group b/tools/perf/tests/attr/test-record-group
index a6599e9a19d3..57739cacdb2a 100644
--- a/tools/perf/tests/attr/test-record-group
+++ b/tools/perf/tests/attr/test-record-group
@@ -6,12 +6,14 @@ args = --group -e cycles,instructions kill >/dev/null 2>&1
6fd=1 6fd=1
7group_fd=-1 7group_fd=-1
8sample_type=327 8sample_type=327
9read_format=4
9 10
10[event-2:base-record] 11[event-2:base-record]
11fd=2 12fd=2
12group_fd=1 13group_fd=1
13config=1 14config=1
14sample_type=327 15sample_type=327
16read_format=4
15mmap=0 17mmap=0
16comm=0 18comm=0
17enable_on_exec=0 19enable_on_exec=0
diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/attr/test-record-group1
index 5a8359da38af..c5548d054aff 100644
--- a/tools/perf/tests/attr/test-record-group1
+++ b/tools/perf/tests/attr/test-record-group1
@@ -1,11 +1,12 @@
1[config] 1[config]
2command = record 2command = record
3args = -e '{cycles,instructions}' kill >/tmp/krava 2>&1 3args = -e '{cycles,instructions}' kill >/dev/null 2>&1
4 4
5[event-1:base-record] 5[event-1:base-record]
6fd=1 6fd=1
7group_fd=-1 7group_fd=-1
8sample_type=327 8sample_type=327
9read_format=4
9 10
10[event-2:base-record] 11[event-2:base-record]
11fd=2 12fd=2
@@ -13,6 +14,7 @@ group_fd=1
13type=0 14type=0
14config=1 15config=1
15sample_type=327 16sample_type=327
17read_format=4
16mmap=0 18mmap=0
17comm=0 19comm=0
18enable_on_exec=0 20enable_on_exec=0
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 186f67535494..acb98e0e39f2 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -4,6 +4,7 @@
4 * Builtin regression testing command: ever growing number of sanity tests 4 * Builtin regression testing command: ever growing number of sanity tests
5 */ 5 */
6#include "builtin.h" 6#include "builtin.h"
7#include "intlist.h"
7#include "tests.h" 8#include "tests.h"
8#include "debug.h" 9#include "debug.h"
9#include "color.h" 10#include "color.h"
@@ -69,6 +70,14 @@ static struct test {
69 .func = test__attr, 70 .func = test__attr,
70 }, 71 },
71 { 72 {
73 .desc = "Test matching and linking mutliple hists",
74 .func = test__hists_link,
75 },
76 {
77 .desc = "Try 'use perf' in python, checking link problems",
78 .func = test__python_use,
79 },
80 {
72 .func = NULL, 81 .func = NULL,
73 }, 82 },
74}; 83};
@@ -97,7 +106,7 @@ static bool perf_test__matches(int curr, int argc, const char *argv[])
97 return false; 106 return false;
98} 107}
99 108
100static int __cmd_test(int argc, const char *argv[]) 109static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
101{ 110{
102 int i = 0; 111 int i = 0;
103 int width = 0; 112 int width = 0;
@@ -118,13 +127,28 @@ static int __cmd_test(int argc, const char *argv[])
118 continue; 127 continue;
119 128
120 pr_info("%2d: %-*s:", i, width, tests[curr].desc); 129 pr_info("%2d: %-*s:", i, width, tests[curr].desc);
130
131 if (intlist__find(skiplist, i)) {
132 color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
133 continue;
134 }
135
121 pr_debug("\n--- start ---\n"); 136 pr_debug("\n--- start ---\n");
122 err = tests[curr].func(); 137 err = tests[curr].func();
123 pr_debug("---- end ----\n%s:", tests[curr].desc); 138 pr_debug("---- end ----\n%s:", tests[curr].desc);
124 if (err) 139
125 color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n"); 140 switch (err) {
126 else 141 case TEST_OK:
127 pr_info(" Ok\n"); 142 pr_info(" Ok\n");
143 break;
144 case TEST_SKIP:
145 color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
146 break;
147 case TEST_FAIL:
148 default:
149 color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
150 break;
151 }
128 } 152 }
129 153
130 return 0; 154 return 0;
@@ -152,11 +176,14 @@ int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused)
152 "perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]", 176 "perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]",
153 NULL, 177 NULL,
154 }; 178 };
179 const char *skip = NULL;
155 const struct option test_options[] = { 180 const struct option test_options[] = {
181 OPT_STRING('s', "skip", &skip, "tests", "tests to skip"),
156 OPT_INCR('v', "verbose", &verbose, 182 OPT_INCR('v', "verbose", &verbose,
157 "be more verbose (show symbol address, etc)"), 183 "be more verbose (show symbol address, etc)"),
158 OPT_END() 184 OPT_END()
159 }; 185 };
186 struct intlist *skiplist = NULL;
160 187
161 argc = parse_options(argc, argv, test_options, test_usage, 0); 188 argc = parse_options(argc, argv, test_options, test_usage, 0);
162 if (argc >= 1 && !strcmp(argv[0], "list")) 189 if (argc >= 1 && !strcmp(argv[0], "list"))
@@ -169,5 +196,8 @@ int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused)
169 if (symbol__init() < 0) 196 if (symbol__init() < 0)
170 return -1; 197 return -1;
171 198
172 return __cmd_test(argc, argv); 199 if (skip != NULL)
200 skiplist = intlist__new(skip);
201
202 return __cmd_test(argc, argv, skiplist);
173} 203}
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index e61fc828a158..0fd99a9adb91 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -22,7 +22,7 @@ static int perf_evsel__roundtrip_cache_name_test(void)
22 for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { 22 for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
23 __perf_evsel__hw_cache_type_op_res_name(type, op, i, 23 __perf_evsel__hw_cache_type_op_res_name(type, op, i,
24 name, sizeof(name)); 24 name, sizeof(name));
25 err = parse_events(evlist, name, 0); 25 err = parse_events(evlist, name);
26 if (err) 26 if (err)
27 ret = err; 27 ret = err;
28 } 28 }
@@ -70,7 +70,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names)
70 return -ENOMEM; 70 return -ENOMEM;
71 71
72 for (i = 0; i < nr_names; ++i) { 72 for (i = 0; i < nr_names; ++i) {
73 err = parse_events(evlist, names[i], 0); 73 err = parse_events(evlist, names[i]);
74 if (err) { 74 if (err) {
75 pr_debug("failed to parse event '%s', err %d\n", 75 pr_debug("failed to parse event '%s', err %d\n",
76 names[i], err); 76 names[i], err);
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
new file mode 100644
index 000000000000..1be64a6c5daf
--- /dev/null
+++ b/tools/perf/tests/hists_link.c
@@ -0,0 +1,500 @@
1#include "perf.h"
2#include "tests.h"
3#include "debug.h"
4#include "symbol.h"
5#include "sort.h"
6#include "evsel.h"
7#include "evlist.h"
8#include "machine.h"
9#include "thread.h"
10#include "parse-events.h"
11
12static struct {
13 u32 pid;
14 const char *comm;
15} fake_threads[] = {
16 { 100, "perf" },
17 { 200, "perf" },
18 { 300, "bash" },
19};
20
21static struct {
22 u32 pid;
23 u64 start;
24 const char *filename;
25} fake_mmap_info[] = {
26 { 100, 0x40000, "perf" },
27 { 100, 0x50000, "libc" },
28 { 100, 0xf0000, "[kernel]" },
29 { 200, 0x40000, "perf" },
30 { 200, 0x50000, "libc" },
31 { 200, 0xf0000, "[kernel]" },
32 { 300, 0x40000, "bash" },
33 { 300, 0x50000, "libc" },
34 { 300, 0xf0000, "[kernel]" },
35};
36
37struct fake_sym {
38 u64 start;
39 u64 length;
40 const char *name;
41};
42
43static struct fake_sym perf_syms[] = {
44 { 700, 100, "main" },
45 { 800, 100, "run_command" },
46 { 900, 100, "cmd_record" },
47};
48
49static struct fake_sym bash_syms[] = {
50 { 700, 100, "main" },
51 { 800, 100, "xmalloc" },
52 { 900, 100, "xfree" },
53};
54
55static struct fake_sym libc_syms[] = {
56 { 700, 100, "malloc" },
57 { 800, 100, "free" },
58 { 900, 100, "realloc" },
59};
60
61static struct fake_sym kernel_syms[] = {
62 { 700, 100, "schedule" },
63 { 800, 100, "page_fault" },
64 { 900, 100, "sys_perf_event_open" },
65};
66
67static struct {
68 const char *dso_name;
69 struct fake_sym *syms;
70 size_t nr_syms;
71} fake_symbols[] = {
72 { "perf", perf_syms, ARRAY_SIZE(perf_syms) },
73 { "bash", bash_syms, ARRAY_SIZE(bash_syms) },
74 { "libc", libc_syms, ARRAY_SIZE(libc_syms) },
75 { "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) },
76};
77
78static struct machine *setup_fake_machine(struct machines *machines)
79{
80 struct machine *machine = machines__find(machines, HOST_KERNEL_ID);
81 size_t i;
82
83 if (machine == NULL) {
84 pr_debug("Not enough memory for machine setup\n");
85 return NULL;
86 }
87
88 for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
89 struct thread *thread;
90
91 thread = machine__findnew_thread(machine, fake_threads[i].pid);
92 if (thread == NULL)
93 goto out;
94
95 thread__set_comm(thread, fake_threads[i].comm);
96 }
97
98 for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
99 union perf_event fake_mmap_event = {
100 .mmap = {
101 .header = { .misc = PERF_RECORD_MISC_USER, },
102 .pid = fake_mmap_info[i].pid,
103 .start = fake_mmap_info[i].start,
104 .len = 0x1000ULL,
105 .pgoff = 0ULL,
106 },
107 };
108
109 strcpy(fake_mmap_event.mmap.filename,
110 fake_mmap_info[i].filename);
111
112 machine__process_mmap_event(machine, &fake_mmap_event);
113 }
114
115 for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
116 size_t k;
117 struct dso *dso;
118
119 dso = __dsos__findnew(&machine->user_dsos,
120 fake_symbols[i].dso_name);
121 if (dso == NULL)
122 goto out;
123
124 /* emulate dso__load() */
125 dso__set_loaded(dso, MAP__FUNCTION);
126
127 for (k = 0; k < fake_symbols[i].nr_syms; k++) {
128 struct symbol *sym;
129 struct fake_sym *fsym = &fake_symbols[i].syms[k];
130
131 sym = symbol__new(fsym->start, fsym->length,
132 STB_GLOBAL, fsym->name);
133 if (sym == NULL)
134 goto out;
135
136 symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
137 }
138 }
139
140 return machine;
141
142out:
143 pr_debug("Not enough memory for machine setup\n");
144 machine__delete_threads(machine);
145 machine__delete(machine);
146 return NULL;
147}
148
149struct sample {
150 u32 pid;
151 u64 ip;
152 struct thread *thread;
153 struct map *map;
154 struct symbol *sym;
155};
156
157static struct sample fake_common_samples[] = {
158 /* perf [kernel] schedule() */
159 { .pid = 100, .ip = 0xf0000 + 700, },
160 /* perf [perf] main() */
161 { .pid = 200, .ip = 0x40000 + 700, },
162 /* perf [perf] cmd_record() */
163 { .pid = 200, .ip = 0x40000 + 900, },
164 /* bash [bash] xmalloc() */
165 { .pid = 300, .ip = 0x40000 + 800, },
166 /* bash [libc] malloc() */
167 { .pid = 300, .ip = 0x50000 + 700, },
168};
169
170static struct sample fake_samples[][5] = {
171 {
172 /* perf [perf] run_command() */
173 { .pid = 100, .ip = 0x40000 + 800, },
174 /* perf [libc] malloc() */
175 { .pid = 100, .ip = 0x50000 + 700, },
176 /* perf [kernel] page_fault() */
177 { .pid = 100, .ip = 0xf0000 + 800, },
178 /* perf [kernel] sys_perf_event_open() */
179 { .pid = 200, .ip = 0xf0000 + 900, },
180 /* bash [libc] free() */
181 { .pid = 300, .ip = 0x50000 + 800, },
182 },
183 {
184 /* perf [libc] free() */
185 { .pid = 200, .ip = 0x50000 + 800, },
186 /* bash [libc] malloc() */
187 { .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */
188 /* bash [bash] xfee() */
189 { .pid = 300, .ip = 0x40000 + 900, },
190 /* bash [libc] realloc() */
191 { .pid = 300, .ip = 0x50000 + 900, },
192 /* bash [kernel] page_fault() */
193 { .pid = 300, .ip = 0xf0000 + 800, },
194 },
195};
196
197static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
198{
199 struct perf_evsel *evsel;
200 struct addr_location al;
201 struct hist_entry *he;
202 struct perf_sample sample = { .cpu = 0, };
203 size_t i = 0, k;
204
205 /*
206 * each evsel will have 10 samples - 5 common and 5 distinct.
207 * However the second evsel also has a collapsed entry for
208 * "bash [libc] malloc" so total 9 entries will be in the tree.
209 */
210 list_for_each_entry(evsel, &evlist->entries, node) {
211 for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) {
212 const union perf_event event = {
213 .ip = {
214 .header = {
215 .misc = PERF_RECORD_MISC_USER,
216 },
217 .pid = fake_common_samples[k].pid,
218 .ip = fake_common_samples[k].ip,
219 },
220 };
221
222 if (perf_event__preprocess_sample(&event, machine, &al,
223 &sample, 0) < 0)
224 goto out;
225
226 he = __hists__add_entry(&evsel->hists, &al, NULL, 1);
227 if (he == NULL)
228 goto out;
229
230 fake_common_samples[k].thread = al.thread;
231 fake_common_samples[k].map = al.map;
232 fake_common_samples[k].sym = al.sym;
233 }
234
235 for (k = 0; k < ARRAY_SIZE(fake_samples[i]); k++) {
236 const union perf_event event = {
237 .ip = {
238 .header = {
239 .misc = PERF_RECORD_MISC_USER,
240 },
241 .pid = fake_samples[i][k].pid,
242 .ip = fake_samples[i][k].ip,
243 },
244 };
245
246 if (perf_event__preprocess_sample(&event, machine, &al,
247 &sample, 0) < 0)
248 goto out;
249
250 he = __hists__add_entry(&evsel->hists, &al, NULL, 1);
251 if (he == NULL)
252 goto out;
253
254 fake_samples[i][k].thread = al.thread;
255 fake_samples[i][k].map = al.map;
256 fake_samples[i][k].sym = al.sym;
257 }
258 i++;
259 }
260
261 return 0;
262
263out:
264 pr_debug("Not enough memory for adding a hist entry\n");
265 return -1;
266}
267
268static int find_sample(struct sample *samples, size_t nr_samples,
269 struct thread *t, struct map *m, struct symbol *s)
270{
271 while (nr_samples--) {
272 if (samples->thread == t && samples->map == m &&
273 samples->sym == s)
274 return 1;
275 samples++;
276 }
277 return 0;
278}
279
280static int __validate_match(struct hists *hists)
281{
282 size_t count = 0;
283 struct rb_root *root;
284 struct rb_node *node;
285
286 /*
287 * Only entries from fake_common_samples should have a pair.
288 */
289 if (sort__need_collapse)
290 root = &hists->entries_collapsed;
291 else
292 root = hists->entries_in;
293
294 node = rb_first(root);
295 while (node) {
296 struct hist_entry *he;
297
298 he = rb_entry(node, struct hist_entry, rb_node_in);
299
300 if (hist_entry__has_pairs(he)) {
301 if (find_sample(fake_common_samples,
302 ARRAY_SIZE(fake_common_samples),
303 he->thread, he->ms.map, he->ms.sym)) {
304 count++;
305 } else {
306 pr_debug("Can't find the matched entry\n");
307 return -1;
308 }
309 }
310
311 node = rb_next(node);
312 }
313
314 if (count != ARRAY_SIZE(fake_common_samples)) {
315 pr_debug("Invalid count for matched entries: %zd of %zd\n",
316 count, ARRAY_SIZE(fake_common_samples));
317 return -1;
318 }
319
320 return 0;
321}
322
323static int validate_match(struct hists *leader, struct hists *other)
324{
325 return __validate_match(leader) || __validate_match(other);
326}
327
328static int __validate_link(struct hists *hists, int idx)
329{
330 size_t count = 0;
331 size_t count_pair = 0;
332 size_t count_dummy = 0;
333 struct rb_root *root;
334 struct rb_node *node;
335
336 /*
337 * Leader hists (idx = 0) will have dummy entries from other,
338 * and some entries will have no pair. However every entry
339 * in other hists should have (dummy) pair.
340 */
341 if (sort__need_collapse)
342 root = &hists->entries_collapsed;
343 else
344 root = hists->entries_in;
345
346 node = rb_first(root);
347 while (node) {
348 struct hist_entry *he;
349
350 he = rb_entry(node, struct hist_entry, rb_node_in);
351
352 if (hist_entry__has_pairs(he)) {
353 if (!find_sample(fake_common_samples,
354 ARRAY_SIZE(fake_common_samples),
355 he->thread, he->ms.map, he->ms.sym) &&
356 !find_sample(fake_samples[idx],
357 ARRAY_SIZE(fake_samples[idx]),
358 he->thread, he->ms.map, he->ms.sym)) {
359 count_dummy++;
360 }
361 count_pair++;
362 } else if (idx) {
363 pr_debug("A entry from the other hists should have pair\n");
364 return -1;
365 }
366
367 count++;
368 node = rb_next(node);
369 }
370
371 /*
372 * Note that we have a entry collapsed in the other (idx = 1) hists.
373 */
374 if (idx == 0) {
375 if (count_dummy != ARRAY_SIZE(fake_samples[1]) - 1) {
376 pr_debug("Invalid count of dummy entries: %zd of %zd\n",
377 count_dummy, ARRAY_SIZE(fake_samples[1]) - 1);
378 return -1;
379 }
380 if (count != count_pair + ARRAY_SIZE(fake_samples[0])) {
381 pr_debug("Invalid count of total leader entries: %zd of %zd\n",
382 count, count_pair + ARRAY_SIZE(fake_samples[0]));
383 return -1;
384 }
385 } else {
386 if (count != count_pair) {
387 pr_debug("Invalid count of total other entries: %zd of %zd\n",
388 count, count_pair);
389 return -1;
390 }
391 if (count_dummy > 0) {
392 pr_debug("Other hists should not have dummy entries: %zd\n",
393 count_dummy);
394 return -1;
395 }
396 }
397
398 return 0;
399}
400
401static int validate_link(struct hists *leader, struct hists *other)
402{
403 return __validate_link(leader, 0) || __validate_link(other, 1);
404}
405
406static void print_hists(struct hists *hists)
407{
408 int i = 0;
409 struct rb_root *root;
410 struct rb_node *node;
411
412 if (sort__need_collapse)
413 root = &hists->entries_collapsed;
414 else
415 root = hists->entries_in;
416
417 pr_info("----- %s --------\n", __func__);
418 node = rb_first(root);
419 while (node) {
420 struct hist_entry *he;
421
422 he = rb_entry(node, struct hist_entry, rb_node_in);
423
424 pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n",
425 i, he->thread->comm, he->ms.map->dso->short_name,
426 he->ms.sym->name, he->stat.period);
427
428 i++;
429 node = rb_next(node);
430 }
431}
432
433int test__hists_link(void)
434{
435 int err = -1;
436 struct machines machines;
437 struct machine *machine = NULL;
438 struct perf_evsel *evsel, *first;
439 struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
440
441 if (evlist == NULL)
442 return -ENOMEM;
443
444 err = parse_events(evlist, "cpu-clock");
445 if (err)
446 goto out;
447 err = parse_events(evlist, "task-clock");
448 if (err)
449 goto out;
450
451 /* default sort order (comm,dso,sym) will be used */
452 if (setup_sorting() < 0)
453 goto out;
454
455 machines__init(&machines);
456
457 /* setup threads/dso/map/symbols also */
458 machine = setup_fake_machine(&machines);
459 if (!machine)
460 goto out;
461
462 if (verbose > 1)
463 machine__fprintf(machine, stderr);
464
465 /* process sample events */
466 err = add_hist_entries(evlist, machine);
467 if (err < 0)
468 goto out;
469
470 list_for_each_entry(evsel, &evlist->entries, node) {
471 hists__collapse_resort(&evsel->hists);
472
473 if (verbose > 2)
474 print_hists(&evsel->hists);
475 }
476
477 first = perf_evlist__first(evlist);
478 evsel = perf_evlist__last(evlist);
479
480 /* match common entries */
481 hists__match(&first->hists, &evsel->hists);
482 err = validate_match(&first->hists, &evsel->hists);
483 if (err)
484 goto out;
485
486 /* link common and/or dummy entries */
487 hists__link(&first->hists, &evsel->hists);
488 err = validate_link(&first->hists, &evsel->hists);
489 if (err)
490 goto out;
491
492 err = 0;
493
494out:
495 /* tear down everything */
496 perf_evlist__delete(evlist);
497 machines__exit(&machines);
498
499 return err;
500}
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index e1746811e14b..cdd50755af51 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -22,36 +22,16 @@ int test__basic_mmap(void)
22 struct thread_map *threads; 22 struct thread_map *threads;
23 struct cpu_map *cpus; 23 struct cpu_map *cpus;
24 struct perf_evlist *evlist; 24 struct perf_evlist *evlist;
25 struct perf_event_attr attr = {
26 .type = PERF_TYPE_TRACEPOINT,
27 .read_format = PERF_FORMAT_ID,
28 .sample_type = PERF_SAMPLE_ID,
29 .watermark = 0,
30 };
31 cpu_set_t cpu_set; 25 cpu_set_t cpu_set;
32 const char *syscall_names[] = { "getsid", "getppid", "getpgrp", 26 const char *syscall_names[] = { "getsid", "getppid", "getpgrp",
33 "getpgid", }; 27 "getpgid", };
34 pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp, 28 pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
35 (void*)getpgid }; 29 (void*)getpgid };
36#define nsyscalls ARRAY_SIZE(syscall_names) 30#define nsyscalls ARRAY_SIZE(syscall_names)
37 int ids[nsyscalls];
38 unsigned int nr_events[nsyscalls], 31 unsigned int nr_events[nsyscalls],
39 expected_nr_events[nsyscalls], i, j; 32 expected_nr_events[nsyscalls], i, j;
40 struct perf_evsel *evsels[nsyscalls], *evsel; 33 struct perf_evsel *evsels[nsyscalls], *evsel;
41 34
42 for (i = 0; i < nsyscalls; ++i) {
43 char name[64];
44
45 snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
46 ids[i] = trace_event__id(name);
47 if (ids[i] < 0) {
48 pr_debug("Is debugfs mounted on /sys/kernel/debug?\n");
49 return -1;
50 }
51 nr_events[i] = 0;
52 expected_nr_events[i] = random() % 257;
53 }
54
55 threads = thread_map__new(-1, getpid(), UINT_MAX); 35 threads = thread_map__new(-1, getpid(), UINT_MAX);
56 if (threads == NULL) { 36 if (threads == NULL) {
57 pr_debug("thread_map__new\n"); 37 pr_debug("thread_map__new\n");
@@ -79,18 +59,19 @@ int test__basic_mmap(void)
79 goto out_free_cpus; 59 goto out_free_cpus;
80 } 60 }
81 61
82 /* anonymous union fields, can't be initialized above */
83 attr.wakeup_events = 1;
84 attr.sample_period = 1;
85
86 for (i = 0; i < nsyscalls; ++i) { 62 for (i = 0; i < nsyscalls; ++i) {
87 attr.config = ids[i]; 63 char name[64];
88 evsels[i] = perf_evsel__new(&attr, i); 64
65 snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
66 evsels[i] = perf_evsel__newtp("syscalls", name, i);
89 if (evsels[i] == NULL) { 67 if (evsels[i] == NULL) {
90 pr_debug("perf_evsel__new\n"); 68 pr_debug("perf_evsel__new\n");
91 goto out_free_evlist; 69 goto out_free_evlist;
92 } 70 }
93 71
72 evsels[i]->attr.wakeup_events = 1;
73 perf_evsel__set_sample_id(evsels[i]);
74
94 perf_evlist__add(evlist, evsels[i]); 75 perf_evlist__add(evlist, evsels[i]);
95 76
96 if (perf_evsel__open(evsels[i], cpus, threads) < 0) { 77 if (perf_evsel__open(evsels[i], cpus, threads) < 0) {
@@ -99,6 +80,9 @@ int test__basic_mmap(void)
99 strerror(errno)); 80 strerror(errno));
100 goto out_close_fd; 81 goto out_close_fd;
101 } 82 }
83
84 nr_events[i] = 0;
85 expected_nr_events[i] = 1 + rand() % 127;
102 } 86 }
103 87
104 if (perf_evlist__mmap(evlist, 128, true) < 0) { 88 if (perf_evlist__mmap(evlist, 128, true) < 0) {
@@ -128,6 +112,7 @@ int test__basic_mmap(void)
128 goto out_munmap; 112 goto out_munmap;
129 } 113 }
130 114
115 err = -1;
131 evsel = perf_evlist__id2evsel(evlist, sample.id); 116 evsel = perf_evlist__id2evsel(evlist, sample.id);
132 if (evsel == NULL) { 117 if (evsel == NULL) {
133 pr_debug("event with id %" PRIu64 118 pr_debug("event with id %" PRIu64
@@ -137,16 +122,17 @@ int test__basic_mmap(void)
137 nr_events[evsel->idx]++; 122 nr_events[evsel->idx]++;
138 } 123 }
139 124
125 err = 0;
140 list_for_each_entry(evsel, &evlist->entries, node) { 126 list_for_each_entry(evsel, &evlist->entries, node) {
141 if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) { 127 if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
142 pr_debug("expected %d %s events, got %d\n", 128 pr_debug("expected %d %s events, got %d\n",
143 expected_nr_events[evsel->idx], 129 expected_nr_events[evsel->idx],
144 perf_evsel__name(evsel), nr_events[evsel->idx]); 130 perf_evsel__name(evsel), nr_events[evsel->idx]);
131 err = -1;
145 goto out_munmap; 132 goto out_munmap;
146 } 133 }
147 } 134 }
148 135
149 err = 0;
150out_munmap: 136out_munmap:
151 perf_evlist__munmap(evlist); 137 perf_evlist__munmap(evlist);
152out_close_fd: 138out_close_fd:
diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/open-syscall-all-cpus.c
index 31072aba0d54..b0657a9ccda6 100644
--- a/tools/perf/tests/open-syscall-all-cpus.c
+++ b/tools/perf/tests/open-syscall-all-cpus.c
@@ -7,20 +7,12 @@
7int test__open_syscall_event_on_all_cpus(void) 7int test__open_syscall_event_on_all_cpus(void)
8{ 8{
9 int err = -1, fd, cpu; 9 int err = -1, fd, cpu;
10 struct thread_map *threads;
11 struct cpu_map *cpus; 10 struct cpu_map *cpus;
12 struct perf_evsel *evsel; 11 struct perf_evsel *evsel;
13 struct perf_event_attr attr;
14 unsigned int nr_open_calls = 111, i; 12 unsigned int nr_open_calls = 111, i;
15 cpu_set_t cpu_set; 13 cpu_set_t cpu_set;
16 int id = trace_event__id("sys_enter_open"); 14 struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
17 15
18 if (id < 0) {
19 pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
20 return -1;
21 }
22
23 threads = thread_map__new(-1, getpid(), UINT_MAX);
24 if (threads == NULL) { 16 if (threads == NULL) {
25 pr_debug("thread_map__new\n"); 17 pr_debug("thread_map__new\n");
26 return -1; 18 return -1;
@@ -32,15 +24,11 @@ int test__open_syscall_event_on_all_cpus(void)
32 goto out_thread_map_delete; 24 goto out_thread_map_delete;
33 } 25 }
34 26
35
36 CPU_ZERO(&cpu_set); 27 CPU_ZERO(&cpu_set);
37 28
38 memset(&attr, 0, sizeof(attr)); 29 evsel = perf_evsel__newtp("syscalls", "sys_enter_open", 0);
39 attr.type = PERF_TYPE_TRACEPOINT;
40 attr.config = id;
41 evsel = perf_evsel__new(&attr, 0);
42 if (evsel == NULL) { 30 if (evsel == NULL) {
43 pr_debug("perf_evsel__new\n"); 31 pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
44 goto out_thread_map_delete; 32 goto out_thread_map_delete;
45 } 33 }
46 34
@@ -110,6 +98,7 @@ int test__open_syscall_event_on_all_cpus(void)
110 } 98 }
111 } 99 }
112 100
101 perf_evsel__free_counts(evsel);
113out_close_fd: 102out_close_fd:
114 perf_evsel__close_fd(evsel, 1, threads->nr); 103 perf_evsel__close_fd(evsel, 1, threads->nr);
115out_evsel_delete: 104out_evsel_delete:
diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/open-syscall.c
index 98be8b518b4f..befc0671f95d 100644
--- a/tools/perf/tests/open-syscall.c
+++ b/tools/perf/tests/open-syscall.c
@@ -6,29 +6,18 @@
6int test__open_syscall_event(void) 6int test__open_syscall_event(void)
7{ 7{
8 int err = -1, fd; 8 int err = -1, fd;
9 struct thread_map *threads;
10 struct perf_evsel *evsel; 9 struct perf_evsel *evsel;
11 struct perf_event_attr attr;
12 unsigned int nr_open_calls = 111, i; 10 unsigned int nr_open_calls = 111, i;
13 int id = trace_event__id("sys_enter_open"); 11 struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
14 12
15 if (id < 0) {
16 pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
17 return -1;
18 }
19
20 threads = thread_map__new(-1, getpid(), UINT_MAX);
21 if (threads == NULL) { 13 if (threads == NULL) {
22 pr_debug("thread_map__new\n"); 14 pr_debug("thread_map__new\n");
23 return -1; 15 return -1;
24 } 16 }
25 17
26 memset(&attr, 0, sizeof(attr)); 18 evsel = perf_evsel__newtp("syscalls", "sys_enter_open", 0);
27 attr.type = PERF_TYPE_TRACEPOINT;
28 attr.config = id;
29 evsel = perf_evsel__new(&attr, 0);
30 if (evsel == NULL) { 19 if (evsel == NULL) {
31 pr_debug("perf_evsel__new\n"); 20 pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
32 goto out_thread_map_delete; 21 goto out_thread_map_delete;
33 } 22 }
34 23
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 32ee478905eb..c5636f36fe31 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -3,6 +3,7 @@
3#include "evsel.h" 3#include "evsel.h"
4#include "evlist.h" 4#include "evlist.h"
5#include "sysfs.h" 5#include "sysfs.h"
6#include "debugfs.h"
6#include "tests.h" 7#include "tests.h"
7#include <linux/hw_breakpoint.h> 8#include <linux/hw_breakpoint.h>
8 9
@@ -22,6 +23,7 @@ static int test__checkevent_tracepoint(struct perf_evlist *evlist)
22 struct perf_evsel *evsel = perf_evlist__first(evlist); 23 struct perf_evsel *evsel = perf_evlist__first(evlist);
23 24
24 TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); 25 TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
26 TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
25 TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); 27 TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
26 TEST_ASSERT_VAL("wrong sample_type", 28 TEST_ASSERT_VAL("wrong sample_type",
27 PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type); 29 PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type);
@@ -34,6 +36,7 @@ static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist)
34 struct perf_evsel *evsel; 36 struct perf_evsel *evsel;
35 37
36 TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1); 38 TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
39 TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
37 40
38 list_for_each_entry(evsel, &evlist->entries, node) { 41 list_for_each_entry(evsel, &evlist->entries, node) {
39 TEST_ASSERT_VAL("wrong type", 42 TEST_ASSERT_VAL("wrong type",
@@ -463,10 +466,10 @@ static int test__checkevent_pmu_events(struct perf_evlist *evlist)
463 466
464static int test__checkterms_simple(struct list_head *terms) 467static int test__checkterms_simple(struct list_head *terms)
465{ 468{
466 struct parse_events__term *term; 469 struct parse_events_term *term;
467 470
468 /* config=10 */ 471 /* config=10 */
469 term = list_entry(terms->next, struct parse_events__term, list); 472 term = list_entry(terms->next, struct parse_events_term, list);
470 TEST_ASSERT_VAL("wrong type term", 473 TEST_ASSERT_VAL("wrong type term",
471 term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG); 474 term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG);
472 TEST_ASSERT_VAL("wrong type val", 475 TEST_ASSERT_VAL("wrong type val",
@@ -475,7 +478,7 @@ static int test__checkterms_simple(struct list_head *terms)
475 TEST_ASSERT_VAL("wrong config", !term->config); 478 TEST_ASSERT_VAL("wrong config", !term->config);
476 479
477 /* config1 */ 480 /* config1 */
478 term = list_entry(term->list.next, struct parse_events__term, list); 481 term = list_entry(term->list.next, struct parse_events_term, list);
479 TEST_ASSERT_VAL("wrong type term", 482 TEST_ASSERT_VAL("wrong type term",
480 term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG1); 483 term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG1);
481 TEST_ASSERT_VAL("wrong type val", 484 TEST_ASSERT_VAL("wrong type val",
@@ -484,7 +487,7 @@ static int test__checkterms_simple(struct list_head *terms)
484 TEST_ASSERT_VAL("wrong config", !term->config); 487 TEST_ASSERT_VAL("wrong config", !term->config);
485 488
486 /* config2=3 */ 489 /* config2=3 */
487 term = list_entry(term->list.next, struct parse_events__term, list); 490 term = list_entry(term->list.next, struct parse_events_term, list);
488 TEST_ASSERT_VAL("wrong type term", 491 TEST_ASSERT_VAL("wrong type term",
489 term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG2); 492 term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG2);
490 TEST_ASSERT_VAL("wrong type val", 493 TEST_ASSERT_VAL("wrong type val",
@@ -493,7 +496,7 @@ static int test__checkterms_simple(struct list_head *terms)
493 TEST_ASSERT_VAL("wrong config", !term->config); 496 TEST_ASSERT_VAL("wrong config", !term->config);
494 497
495 /* umask=1*/ 498 /* umask=1*/
496 term = list_entry(term->list.next, struct parse_events__term, list); 499 term = list_entry(term->list.next, struct parse_events_term, list);
497 TEST_ASSERT_VAL("wrong type term", 500 TEST_ASSERT_VAL("wrong type term",
498 term->type_term == PARSE_EVENTS__TERM_TYPE_USER); 501 term->type_term == PARSE_EVENTS__TERM_TYPE_USER);
499 TEST_ASSERT_VAL("wrong type val", 502 TEST_ASSERT_VAL("wrong type val",
@@ -509,6 +512,7 @@ static int test__group1(struct perf_evlist *evlist)
509 struct perf_evsel *evsel, *leader; 512 struct perf_evsel *evsel, *leader;
510 513
511 TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); 514 TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
515 TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
512 516
513 /* instructions:k */ 517 /* instructions:k */
514 evsel = leader = perf_evlist__first(evlist); 518 evsel = leader = perf_evlist__first(evlist);
@@ -521,7 +525,9 @@ static int test__group1(struct perf_evlist *evlist)
521 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); 525 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
522 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); 526 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
523 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 527 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
524 TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); 528 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
529 TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
530 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
525 531
526 /* cycles:upp */ 532 /* cycles:upp */
527 evsel = perf_evsel__next(evsel); 533 evsel = perf_evsel__next(evsel);
@@ -536,6 +542,7 @@ static int test__group1(struct perf_evlist *evlist)
536 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); 542 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
537 TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2); 543 TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
538 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 544 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
545 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
539 546
540 return 0; 547 return 0;
541} 548}
@@ -545,6 +552,7 @@ static int test__group2(struct perf_evlist *evlist)
545 struct perf_evsel *evsel, *leader; 552 struct perf_evsel *evsel, *leader;
546 553
547 TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries); 554 TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
555 TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
548 556
549 /* faults + :ku modifier */ 557 /* faults + :ku modifier */
550 evsel = leader = perf_evlist__first(evlist); 558 evsel = leader = perf_evlist__first(evlist);
@@ -557,7 +565,9 @@ static int test__group2(struct perf_evlist *evlist)
557 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); 565 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
558 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); 566 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
559 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 567 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
560 TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); 568 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
569 TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
570 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
561 571
562 /* cache-references + :u modifier */ 572 /* cache-references + :u modifier */
563 evsel = perf_evsel__next(evsel); 573 evsel = perf_evsel__next(evsel);
@@ -567,10 +577,11 @@ static int test__group2(struct perf_evlist *evlist)
567 TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); 577 TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
568 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); 578 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
569 TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); 579 TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
570 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); 580 TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
571 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); 581 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
572 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 582 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
573 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 583 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
584 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
574 585
575 /* cycles:k */ 586 /* cycles:k */
576 evsel = perf_evsel__next(evsel); 587 evsel = perf_evsel__next(evsel);
@@ -583,7 +594,7 @@ static int test__group2(struct perf_evlist *evlist)
583 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); 594 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
584 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); 595 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
585 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 596 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
586 TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); 597 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
587 598
588 return 0; 599 return 0;
589} 600}
@@ -593,6 +604,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
593 struct perf_evsel *evsel, *leader; 604 struct perf_evsel *evsel, *leader;
594 605
595 TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); 606 TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
607 TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
596 608
597 /* group1 syscalls:sys_enter_open:H */ 609 /* group1 syscalls:sys_enter_open:H */
598 evsel = leader = perf_evlist__first(evlist); 610 evsel = leader = perf_evlist__first(evlist);
@@ -606,9 +618,11 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
606 TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); 618 TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
607 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); 619 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
608 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 620 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
609 TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); 621 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
610 TEST_ASSERT_VAL("wrong group name", 622 TEST_ASSERT_VAL("wrong group name",
611 !strcmp(leader->group_name, "group1")); 623 !strcmp(leader->group_name, "group1"));
624 TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
625 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
612 626
613 /* group1 cycles:kppp */ 627 /* group1 cycles:kppp */
614 evsel = perf_evsel__next(evsel); 628 evsel = perf_evsel__next(evsel);
@@ -624,6 +638,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
624 TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 3); 638 TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 3);
625 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 639 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
626 TEST_ASSERT_VAL("wrong group name", !evsel->group_name); 640 TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
641 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
627 642
628 /* group2 cycles + G modifier */ 643 /* group2 cycles + G modifier */
629 evsel = leader = perf_evsel__next(evsel); 644 evsel = leader = perf_evsel__next(evsel);
@@ -636,9 +651,11 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
636 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); 651 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
637 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); 652 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
638 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 653 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
639 TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); 654 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
640 TEST_ASSERT_VAL("wrong group name", 655 TEST_ASSERT_VAL("wrong group name",
641 !strcmp(leader->group_name, "group2")); 656 !strcmp(leader->group_name, "group2"));
657 TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
658 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
642 659
643 /* group2 1:3 + G modifier */ 660 /* group2 1:3 + G modifier */
644 evsel = perf_evsel__next(evsel); 661 evsel = perf_evsel__next(evsel);
@@ -651,6 +668,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
651 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); 668 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
652 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 669 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
653 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 670 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
671 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
654 672
655 /* instructions:u */ 673 /* instructions:u */
656 evsel = perf_evsel__next(evsel); 674 evsel = perf_evsel__next(evsel);
@@ -663,7 +681,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
663 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); 681 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
664 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); 682 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
665 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 683 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
666 TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); 684 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
667 685
668 return 0; 686 return 0;
669} 687}
@@ -673,6 +691,7 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused)
673 struct perf_evsel *evsel, *leader; 691 struct perf_evsel *evsel, *leader;
674 692
675 TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); 693 TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
694 TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
676 695
677 /* cycles:u + p */ 696 /* cycles:u + p */
678 evsel = leader = perf_evlist__first(evlist); 697 evsel = leader = perf_evlist__first(evlist);
@@ -687,7 +706,9 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused)
687 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); 706 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
688 TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 1); 707 TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 1);
689 TEST_ASSERT_VAL("wrong group name", !evsel->group_name); 708 TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
690 TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); 709 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
710 TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
711 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
691 712
692 /* instructions:kp + p */ 713 /* instructions:kp + p */
693 evsel = perf_evsel__next(evsel); 714 evsel = perf_evsel__next(evsel);
@@ -702,6 +723,7 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused)
702 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); 723 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
703 TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2); 724 TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
704 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 725 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
726 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
705 727
706 return 0; 728 return 0;
707} 729}
@@ -711,6 +733,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
711 struct perf_evsel *evsel, *leader; 733 struct perf_evsel *evsel, *leader;
712 734
713 TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); 735 TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
736 TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
714 737
715 /* cycles + G */ 738 /* cycles + G */
716 evsel = leader = perf_evlist__first(evlist); 739 evsel = leader = perf_evlist__first(evlist);
@@ -724,7 +747,9 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
724 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); 747 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
725 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 748 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
726 TEST_ASSERT_VAL("wrong group name", !evsel->group_name); 749 TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
727 TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); 750 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
751 TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
752 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
728 753
729 /* instructions + G */ 754 /* instructions + G */
730 evsel = perf_evsel__next(evsel); 755 evsel = perf_evsel__next(evsel);
@@ -738,6 +763,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
738 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); 763 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
739 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 764 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
740 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 765 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
766 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
741 767
742 /* cycles:G */ 768 /* cycles:G */
743 evsel = leader = perf_evsel__next(evsel); 769 evsel = leader = perf_evsel__next(evsel);
@@ -751,7 +777,9 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
751 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); 777 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
752 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 778 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
753 TEST_ASSERT_VAL("wrong group name", !evsel->group_name); 779 TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
754 TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); 780 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
781 TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
782 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
755 783
756 /* instructions:G */ 784 /* instructions:G */
757 evsel = perf_evsel__next(evsel); 785 evsel = perf_evsel__next(evsel);
@@ -765,6 +793,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
765 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); 793 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
766 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 794 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
767 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); 795 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
796 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
768 797
769 /* cycles */ 798 /* cycles */
770 evsel = perf_evsel__next(evsel); 799 evsel = perf_evsel__next(evsel);
@@ -777,18 +806,235 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
777 TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); 806 TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
778 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); 807 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
779 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); 808 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
780 TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); 809 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
810
811 return 0;
812}
813
814static int test__group_gh1(struct perf_evlist *evlist)
815{
816 struct perf_evsel *evsel, *leader;
817
818 TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
819 TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
820
821 /* cycles + :H group modifier */
822 evsel = leader = perf_evlist__first(evlist);
823 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
824 TEST_ASSERT_VAL("wrong config",
825 PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
826 TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
827 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
828 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
829 TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
830 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
831 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
832 TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
833 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
834 TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
835 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
836
837 /* cache-misses:G + :H group modifier */
838 evsel = perf_evsel__next(evsel);
839 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
840 TEST_ASSERT_VAL("wrong config",
841 PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
842 TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
843 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
844 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
845 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
846 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
847 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
848 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
849 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
850
851 return 0;
852}
853
854static int test__group_gh2(struct perf_evlist *evlist)
855{
856 struct perf_evsel *evsel, *leader;
857
858 TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
859 TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
860
861 /* cycles + :G group modifier */
862 evsel = leader = perf_evlist__first(evlist);
863 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
864 TEST_ASSERT_VAL("wrong config",
865 PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
866 TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
867 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
868 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
869 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
870 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
871 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
872 TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
873 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
874 TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
875 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
876
877 /* cache-misses:H + :G group modifier */
878 evsel = perf_evsel__next(evsel);
879 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
880 TEST_ASSERT_VAL("wrong config",
881 PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
882 TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
883 TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
884 TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
885 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
886 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
887 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
888 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
889 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
890
891 return 0;
892}
893
894static int test__group_gh3(struct perf_evlist *evlist)
895{
896 struct perf_evsel *evsel, *leader;
897
898 TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
899 TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
900
901 /* cycles:G + :u group modifier */
902 evsel = leader = perf_evlist__first(evlist);
903 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
904 TEST_ASSERT_VAL("wrong config",
905 PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
906 TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
907 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
908 TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
909 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
910 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
911 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
912 TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
913 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
914 TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
915 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
916
917 /* cache-misses:H + :u group modifier */
918 evsel = perf_evsel__next(evsel);
919 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
920 TEST_ASSERT_VAL("wrong config",
921 PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
922 TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
923 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
924 TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
925 TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
926 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
927 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
928 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
929 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
930
931 return 0;
932}
933
934static int test__group_gh4(struct perf_evlist *evlist)
935{
936 struct perf_evsel *evsel, *leader;
937
938 TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
939 TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
940
941 /* cycles:G + :uG group modifier */
942 evsel = leader = perf_evlist__first(evlist);
943 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
944 TEST_ASSERT_VAL("wrong config",
945 PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
946 TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
947 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
948 TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
949 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
950 TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
951 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
952 TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
953 TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
954 TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
955 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
956
957 /* cache-misses:H + :uG group modifier */
958 evsel = perf_evsel__next(evsel);
959 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
960 TEST_ASSERT_VAL("wrong config",
961 PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
962 TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
963 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
964 TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
965 TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
966 TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
967 TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
968 TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
969 TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
781 970
782 return 0; 971 return 0;
783} 972}
784 973
785struct test__event_st { 974static int count_tracepoints(void)
975{
976 char events_path[PATH_MAX];
977 struct dirent *events_ent;
978 DIR *events_dir;
979 int cnt = 0;
980
981 scnprintf(events_path, PATH_MAX, "%s/tracing/events",
982 debugfs_find_mountpoint());
983
984 events_dir = opendir(events_path);
985
986 TEST_ASSERT_VAL("Can't open events dir", events_dir);
987
988 while ((events_ent = readdir(events_dir))) {
989 char sys_path[PATH_MAX];
990 struct dirent *sys_ent;
991 DIR *sys_dir;
992
993 if (!strcmp(events_ent->d_name, ".")
994 || !strcmp(events_ent->d_name, "..")
995 || !strcmp(events_ent->d_name, "enable")
996 || !strcmp(events_ent->d_name, "header_event")
997 || !strcmp(events_ent->d_name, "header_page"))
998 continue;
999
1000 scnprintf(sys_path, PATH_MAX, "%s/%s",
1001 events_path, events_ent->d_name);
1002
1003 sys_dir = opendir(sys_path);
1004 TEST_ASSERT_VAL("Can't open sys dir", sys_dir);
1005
1006 while ((sys_ent = readdir(sys_dir))) {
1007 if (!strcmp(sys_ent->d_name, ".")
1008 || !strcmp(sys_ent->d_name, "..")
1009 || !strcmp(sys_ent->d_name, "enable")
1010 || !strcmp(sys_ent->d_name, "filter"))
1011 continue;
1012
1013 cnt++;
1014 }
1015
1016 closedir(sys_dir);
1017 }
1018
1019 closedir(events_dir);
1020 return cnt;
1021}
1022
1023static int test__all_tracepoints(struct perf_evlist *evlist)
1024{
1025 TEST_ASSERT_VAL("wrong events count",
1026 count_tracepoints() == evlist->nr_entries);
1027
1028 return test__checkevent_tracepoint_multi(evlist);
1029}
1030
1031struct evlist_test {
786 const char *name; 1032 const char *name;
787 __u32 type; 1033 __u32 type;
788 int (*check)(struct perf_evlist *evlist); 1034 int (*check)(struct perf_evlist *evlist);
789}; 1035};
790 1036
791static struct test__event_st test__events[] = { 1037static struct evlist_test test__events[] = {
792 [0] = { 1038 [0] = {
793 .name = "syscalls:sys_enter_open", 1039 .name = "syscalls:sys_enter_open",
794 .check = test__checkevent_tracepoint, 1040 .check = test__checkevent_tracepoint,
@@ -921,9 +1167,29 @@ static struct test__event_st test__events[] = {
921 .name = "{cycles,instructions}:G,{cycles:G,instructions:G},cycles", 1167 .name = "{cycles,instructions}:G,{cycles:G,instructions:G},cycles",
922 .check = test__group5, 1168 .check = test__group5,
923 }, 1169 },
1170 [33] = {
1171 .name = "*:*",
1172 .check = test__all_tracepoints,
1173 },
1174 [34] = {
1175 .name = "{cycles,cache-misses:G}:H",
1176 .check = test__group_gh1,
1177 },
1178 [35] = {
1179 .name = "{cycles,cache-misses:H}:G",
1180 .check = test__group_gh2,
1181 },
1182 [36] = {
1183 .name = "{cycles:G,cache-misses:H}:u",
1184 .check = test__group_gh3,
1185 },
1186 [37] = {
1187 .name = "{cycles:G,cache-misses:H}:uG",
1188 .check = test__group_gh4,
1189 },
924}; 1190};
925 1191
926static struct test__event_st test__events_pmu[] = { 1192static struct evlist_test test__events_pmu[] = {
927 [0] = { 1193 [0] = {
928 .name = "cpu/config=10,config1,config2=3,period=1000/u", 1194 .name = "cpu/config=10,config1,config2=3,period=1000/u",
929 .check = test__checkevent_pmu, 1195 .check = test__checkevent_pmu,
@@ -934,20 +1200,20 @@ static struct test__event_st test__events_pmu[] = {
934 }, 1200 },
935}; 1201};
936 1202
937struct test__term { 1203struct terms_test {
938 const char *str; 1204 const char *str;
939 __u32 type; 1205 __u32 type;
940 int (*check)(struct list_head *terms); 1206 int (*check)(struct list_head *terms);
941}; 1207};
942 1208
943static struct test__term test__terms[] = { 1209static struct terms_test test__terms[] = {
944 [0] = { 1210 [0] = {
945 .str = "config=10,config1,config2=3,umask=1", 1211 .str = "config=10,config1,config2=3,umask=1",
946 .check = test__checkterms_simple, 1212 .check = test__checkterms_simple,
947 }, 1213 },
948}; 1214};
949 1215
950static int test_event(struct test__event_st *e) 1216static int test_event(struct evlist_test *e)
951{ 1217{
952 struct perf_evlist *evlist; 1218 struct perf_evlist *evlist;
953 int ret; 1219 int ret;
@@ -956,7 +1222,7 @@ static int test_event(struct test__event_st *e)
956 if (evlist == NULL) 1222 if (evlist == NULL)
957 return -ENOMEM; 1223 return -ENOMEM;
958 1224
959 ret = parse_events(evlist, e->name, 0); 1225 ret = parse_events(evlist, e->name);
960 if (ret) { 1226 if (ret) {
961 pr_debug("failed to parse event '%s', err %d\n", 1227 pr_debug("failed to parse event '%s', err %d\n",
962 e->name, ret); 1228 e->name, ret);
@@ -969,13 +1235,13 @@ static int test_event(struct test__event_st *e)
969 return ret; 1235 return ret;
970} 1236}
971 1237
972static int test_events(struct test__event_st *events, unsigned cnt) 1238static int test_events(struct evlist_test *events, unsigned cnt)
973{ 1239{
974 int ret1, ret2 = 0; 1240 int ret1, ret2 = 0;
975 unsigned i; 1241 unsigned i;
976 1242
977 for (i = 0; i < cnt; i++) { 1243 for (i = 0; i < cnt; i++) {
978 struct test__event_st *e = &events[i]; 1244 struct evlist_test *e = &events[i];
979 1245
980 pr_debug("running test %d '%s'\n", i, e->name); 1246 pr_debug("running test %d '%s'\n", i, e->name);
981 ret1 = test_event(e); 1247 ret1 = test_event(e);
@@ -986,7 +1252,7 @@ static int test_events(struct test__event_st *events, unsigned cnt)
986 return ret2; 1252 return ret2;
987} 1253}
988 1254
989static int test_term(struct test__term *t) 1255static int test_term(struct terms_test *t)
990{ 1256{
991 struct list_head *terms; 1257 struct list_head *terms;
992 int ret; 1258 int ret;
@@ -1010,13 +1276,13 @@ static int test_term(struct test__term *t)
1010 return ret; 1276 return ret;
1011} 1277}
1012 1278
1013static int test_terms(struct test__term *terms, unsigned cnt) 1279static int test_terms(struct terms_test *terms, unsigned cnt)
1014{ 1280{
1015 int ret = 0; 1281 int ret = 0;
1016 unsigned i; 1282 unsigned i;
1017 1283
1018 for (i = 0; i < cnt; i++) { 1284 for (i = 0; i < cnt; i++) {
1019 struct test__term *t = &terms[i]; 1285 struct terms_test *t = &terms[i];
1020 1286
1021 pr_debug("running test %d '%s'\n", i, t->str); 1287 pr_debug("running test %d '%s'\n", i, t->str);
1022 ret = test_term(t); 1288 ret = test_term(t);
@@ -1067,7 +1333,7 @@ static int test_pmu_events(void)
1067 1333
1068 while (!ret && (ent = readdir(dir))) { 1334 while (!ret && (ent = readdir(dir))) {
1069#define MAX_NAME 100 1335#define MAX_NAME 100
1070 struct test__event_st e; 1336 struct evlist_test e;
1071 char name[MAX_NAME]; 1337 char name[MAX_NAME];
1072 1338
1073 if (!strcmp(ent->d_name, ".") || 1339 if (!strcmp(ent->d_name, ".") ||
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 70e0d4421df8..1e8e5128d0da 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -96,22 +96,22 @@ int test__PERF_RECORD(void)
96 err = perf_evlist__prepare_workload(evlist, &opts, argv); 96 err = perf_evlist__prepare_workload(evlist, &opts, argv);
97 if (err < 0) { 97 if (err < 0) {
98 pr_debug("Couldn't run the workload!\n"); 98 pr_debug("Couldn't run the workload!\n");
99 goto out_delete_evlist; 99 goto out_delete_maps;
100 } 100 }
101 101
102 /* 102 /*
103 * Config the evsels, setting attr->comm on the first one, etc. 103 * Config the evsels, setting attr->comm on the first one, etc.
104 */ 104 */
105 evsel = perf_evlist__first(evlist); 105 evsel = perf_evlist__first(evlist);
106 evsel->attr.sample_type |= PERF_SAMPLE_CPU; 106 perf_evsel__set_sample_bit(evsel, CPU);
107 evsel->attr.sample_type |= PERF_SAMPLE_TID; 107 perf_evsel__set_sample_bit(evsel, TID);
108 evsel->attr.sample_type |= PERF_SAMPLE_TIME; 108 perf_evsel__set_sample_bit(evsel, TIME);
109 perf_evlist__config_attrs(evlist, &opts); 109 perf_evlist__config(evlist, &opts);
110 110
111 err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask); 111 err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
112 if (err < 0) { 112 if (err < 0) {
113 pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno)); 113 pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno));
114 goto out_delete_evlist; 114 goto out_delete_maps;
115 } 115 }
116 116
117 cpu = err; 117 cpu = err;
@@ -121,7 +121,7 @@ int test__PERF_RECORD(void)
121 */ 121 */
122 if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) { 122 if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
123 pr_debug("sched_setaffinity: %s\n", strerror(errno)); 123 pr_debug("sched_setaffinity: %s\n", strerror(errno));
124 goto out_delete_evlist; 124 goto out_delete_maps;
125 } 125 }
126 126
127 /* 127 /*
@@ -131,7 +131,7 @@ int test__PERF_RECORD(void)
131 err = perf_evlist__open(evlist); 131 err = perf_evlist__open(evlist);
132 if (err < 0) { 132 if (err < 0) {
133 pr_debug("perf_evlist__open: %s\n", strerror(errno)); 133 pr_debug("perf_evlist__open: %s\n", strerror(errno));
134 goto out_delete_evlist; 134 goto out_delete_maps;
135 } 135 }
136 136
137 /* 137 /*
@@ -142,7 +142,7 @@ int test__PERF_RECORD(void)
142 err = perf_evlist__mmap(evlist, opts.mmap_pages, false); 142 err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
143 if (err < 0) { 143 if (err < 0) {
144 pr_debug("perf_evlist__mmap: %s\n", strerror(errno)); 144 pr_debug("perf_evlist__mmap: %s\n", strerror(errno));
145 goto out_delete_evlist; 145 goto out_delete_maps;
146 } 146 }
147 147
148 /* 148 /*
@@ -305,6 +305,8 @@ found_exit:
305 } 305 }
306out_err: 306out_err:
307 perf_evlist__munmap(evlist); 307 perf_evlist__munmap(evlist);
308out_delete_maps:
309 perf_evlist__delete_maps(evlist);
308out_delete_evlist: 310out_delete_evlist:
309 perf_evlist__delete(evlist); 311 perf_evlist__delete(evlist);
310out: 312out:
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index a5f379863b8f..12b322fa3475 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -19,10 +19,8 @@ static struct test_format {
19 { "krava23", "config2:28-29,38\n", }, 19 { "krava23", "config2:28-29,38\n", },
20}; 20};
21 21
22#define TEST_FORMATS_CNT (sizeof(test_formats) / sizeof(struct test_format))
23
24/* Simulated users input. */ 22/* Simulated users input. */
25static struct parse_events__term test_terms[] = { 23static struct parse_events_term test_terms[] = {
26 { 24 {
27 .config = (char *) "krava01", 25 .config = (char *) "krava01",
28 .val.num = 15, 26 .val.num = 15,
@@ -78,7 +76,6 @@ static struct parse_events__term test_terms[] = {
78 .type_term = PARSE_EVENTS__TERM_TYPE_USER, 76 .type_term = PARSE_EVENTS__TERM_TYPE_USER,
79 }, 77 },
80}; 78};
81#define TERMS_CNT (sizeof(test_terms) / sizeof(struct parse_events__term))
82 79
83/* 80/*
84 * Prepare format directory data, exported by kernel 81 * Prepare format directory data, exported by kernel
@@ -93,7 +90,7 @@ static char *test_format_dir_get(void)
93 if (!mkdtemp(dir)) 90 if (!mkdtemp(dir))
94 return NULL; 91 return NULL;
95 92
96 for (i = 0; i < TEST_FORMATS_CNT; i++) { 93 for (i = 0; i < ARRAY_SIZE(test_formats); i++) {
97 static char name[PATH_MAX]; 94 static char name[PATH_MAX];
98 struct test_format *format = &test_formats[i]; 95 struct test_format *format = &test_formats[i];
99 FILE *file; 96 FILE *file;
@@ -130,14 +127,12 @@ static struct list_head *test_terms_list(void)
130 static LIST_HEAD(terms); 127 static LIST_HEAD(terms);
131 unsigned int i; 128 unsigned int i;
132 129
133 for (i = 0; i < TERMS_CNT; i++) 130 for (i = 0; i < ARRAY_SIZE(test_terms); i++)
134 list_add_tail(&test_terms[i].list, &terms); 131 list_add_tail(&test_terms[i].list, &terms);
135 132
136 return &terms; 133 return &terms;
137} 134}
138 135
139#undef TERMS_CNT
140
141int test__pmu(void) 136int test__pmu(void)
142{ 137{
143 char *format = test_format_dir_get(); 138 char *format = test_format_dir_get();
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
new file mode 100644
index 000000000000..7760277c6def
--- /dev/null
+++ b/tools/perf/tests/python-use.c
@@ -0,0 +1,23 @@
1/*
2 * Just test if we can load the python binding.
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include "tests.h"
8
9extern int verbose;
10
11int test__python_use(void)
12{
13 char *cmd;
14 int ret;
15
16 if (asprintf(&cmd, "echo \"import sys ; sys.path.append('%s'); import perf\" | %s %s",
17 PYTHONPATH, PYTHON, verbose ? "" : "2> /dev/null") < 0)
18 return -1;
19
20 ret = system(cmd) ? -1 : 0;
21 free(cmd);
22 return ret;
23}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index fc121edab016..5de0be1ff4b6 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -1,6 +1,12 @@
1#ifndef TESTS_H 1#ifndef TESTS_H
2#define TESTS_H 2#define TESTS_H
3 3
4enum {
5 TEST_OK = 0,
6 TEST_FAIL = -1,
7 TEST_SKIP = -2,
8};
9
4/* Tests */ 10/* Tests */
5int test__vmlinux_matches_kallsyms(void); 11int test__vmlinux_matches_kallsyms(void);
6int test__open_syscall_event(void); 12int test__open_syscall_event(void);
@@ -15,8 +21,7 @@ int test__pmu(void);
15int test__attr(void); 21int test__attr(void);
16int test__dso_data(void); 22int test__dso_data(void);
17int test__parse_events(void); 23int test__parse_events(void);
18 24int test__hists_link(void);
19/* Util */ 25int test__python_use(void);
20int trace_event__id(const char *evname);
21 26
22#endif /* TESTS_H */ 27#endif /* TESTS_H */
diff --git a/tools/perf/tests/util.c b/tools/perf/tests/util.c
deleted file mode 100644
index 748f2e8f6961..000000000000
--- a/tools/perf/tests/util.c
+++ /dev/null
@@ -1,30 +0,0 @@
1#include <stdio.h>
2#include <unistd.h>
3#include <stdlib.h>
4#include <sys/types.h>
5#include <sys/stat.h>
6#include <fcntl.h>
7#include "tests.h"
8#include "debugfs.h"
9
10int trace_event__id(const char *evname)
11{
12 char *filename;
13 int err = -1, fd;
14
15 if (asprintf(&filename,
16 "%s/syscalls/%s/id",
17 tracing_events_path, evname) < 0)
18 return -1;
19
20 fd = open(filename, O_RDONLY);
21 if (fd >= 0) {
22 char id[16];
23 if (read(fd, id, sizeof(id)) > 0)
24 err = atoi(id);
25 close(fd);
26 }
27
28 free(filename);
29 return err;
30}
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 0d1cdbee2f59..7b4c4d26d1ba 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -44,7 +44,7 @@ int test__vmlinux_matches_kallsyms(void)
44 */ 44 */
45 if (machine__create_kernel_maps(&kallsyms) < 0) { 45 if (machine__create_kernel_maps(&kallsyms) < 0) {
46 pr_debug("machine__create_kernel_maps "); 46 pr_debug("machine__create_kernel_maps ");
47 return -1; 47 goto out;
48 } 48 }
49 49
50 /* 50 /*
@@ -101,7 +101,8 @@ int test__vmlinux_matches_kallsyms(void)
101 */ 101 */
102 if (machine__load_vmlinux_path(&vmlinux, type, 102 if (machine__load_vmlinux_path(&vmlinux, type,
103 vmlinux_matches_kallsyms_filter) <= 0) { 103 vmlinux_matches_kallsyms_filter) <= 0) {
104 pr_debug("machine__load_vmlinux_path "); 104 pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
105 err = TEST_SKIP;
105 goto out; 106 goto out;
106 } 107 }
107 108
@@ -226,5 +227,7 @@ detour:
226 map__fprintf(pos, stderr); 227 map__fprintf(pos, stderr);
227 } 228 }
228out: 229out:
230 machine__exit(&kallsyms);
231 machine__exit(&vmlinux);
229 return err; 232 return err;
230} 233}
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 4aeb7d5df939..809ea4632a34 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -273,6 +273,8 @@ void ui_browser__hide(struct ui_browser *browser __maybe_unused)
273{ 273{
274 pthread_mutex_lock(&ui__lock); 274 pthread_mutex_lock(&ui__lock);
275 ui_helpline__pop(); 275 ui_helpline__pop();
276 free(browser->helpline);
277 browser->helpline = NULL;
276 pthread_mutex_unlock(&ui__lock); 278 pthread_mutex_unlock(&ui__lock);
277} 279}
278 280
@@ -471,7 +473,7 @@ unsigned int ui_browser__list_head_refresh(struct ui_browser *browser)
471 return row; 473 return row;
472} 474}
473 475
474static struct ui_browser__colorset { 476static struct ui_browser_colorset {
475 const char *name, *fg, *bg; 477 const char *name, *fg, *bg;
476 int colorset; 478 int colorset;
477} ui_browser__colorsets[] = { 479} ui_browser__colorsets[] = {
@@ -706,7 +708,7 @@ void ui_browser__init(void)
706 perf_config(ui_browser__color_config, NULL); 708 perf_config(ui_browser__color_config, NULL);
707 709
708 while (ui_browser__colorsets[i].name) { 710 while (ui_browser__colorsets[i].name) {
709 struct ui_browser__colorset *c = &ui_browser__colorsets[i++]; 711 struct ui_browser_colorset *c = &ui_browser__colorsets[i++];
710 sltt_set_color(c->colorset, c->name, c->fg, c->bg); 712 sltt_set_color(c->colorset, c->name, c->fg, c->bg);
711 } 713 }
712 714
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 5dab3ca96980..7dca1555c610 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -182,6 +182,16 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
182 ab->selection = dl; 182 ab->selection = dl;
183} 183}
184 184
185static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym)
186{
187 if (!dl || !dl->ins || !ins__is_jump(dl->ins)
188 || !disasm_line__has_offset(dl)
189 || dl->ops.target.offset >= symbol__size(sym))
190 return false;
191
192 return true;
193}
194
185static void annotate_browser__draw_current_jump(struct ui_browser *browser) 195static void annotate_browser__draw_current_jump(struct ui_browser *browser)
186{ 196{
187 struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); 197 struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
@@ -195,8 +205,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
195 if (strstr(sym->name, "@plt")) 205 if (strstr(sym->name, "@plt"))
196 return; 206 return;
197 207
198 if (!cursor || !cursor->ins || !ins__is_jump(cursor->ins) || 208 if (!disasm_line__is_valid_jump(cursor, sym))
199 !disasm_line__has_offset(cursor))
200 return; 209 return;
201 210
202 target = ab->offsets[cursor->ops.target.offset]; 211 target = ab->offsets[cursor->ops.target.offset];
@@ -788,17 +797,9 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser
788 struct disasm_line *dl = browser->offsets[offset], *dlt; 797 struct disasm_line *dl = browser->offsets[offset], *dlt;
789 struct browser_disasm_line *bdlt; 798 struct browser_disasm_line *bdlt;
790 799
791 if (!dl || !dl->ins || !ins__is_jump(dl->ins) || 800 if (!disasm_line__is_valid_jump(dl, sym))
792 !disasm_line__has_offset(dl))
793 continue; 801 continue;
794 802
795 if (dl->ops.target.offset >= size) {
796 ui__error("jump to after symbol!\n"
797 "size: %zx, jump target: %" PRIx64,
798 size, dl->ops.target.offset);
799 continue;
800 }
801
802 dlt = browser->offsets[dl->ops.target.offset]; 803 dlt = browser->offsets[dl->ops.target.offset];
803 /* 804 /*
804 * FIXME: Oops, no jump target? Buggy disassembler? Or do we 805 * FIXME: Oops, no jump target? Buggy disassembler? Or do we
@@ -921,11 +922,11 @@ out_free_offsets:
921 922
922#define ANNOTATE_CFG(n) \ 923#define ANNOTATE_CFG(n) \
923 { .name = #n, .value = &annotate_browser__opts.n, } 924 { .name = #n, .value = &annotate_browser__opts.n, }
924 925
925/* 926/*
926 * Keep the entries sorted, they are bsearch'ed 927 * Keep the entries sorted, they are bsearch'ed
927 */ 928 */
928static struct annotate__config { 929static struct annotate_config {
929 const char *name; 930 const char *name;
930 bool *value; 931 bool *value;
931} annotate__configs[] = { 932} annotate__configs[] = {
@@ -939,7 +940,7 @@ static struct annotate__config {
939 940
940static int annotate_config__cmp(const void *name, const void *cfgp) 941static int annotate_config__cmp(const void *name, const void *cfgp)
941{ 942{
942 const struct annotate__config *cfg = cfgp; 943 const struct annotate_config *cfg = cfgp;
943 944
944 return strcmp(name, cfg->name); 945 return strcmp(name, cfg->name);
945} 946}
@@ -947,7 +948,7 @@ static int annotate_config__cmp(const void *name, const void *cfgp)
947static int annotate__config(const char *var, const char *value, 948static int annotate__config(const char *var, const char *value,
948 void *data __maybe_unused) 949 void *data __maybe_unused)
949{ 950{
950 struct annotate__config *cfg; 951 struct annotate_config *cfg;
951 const char *name; 952 const char *name;
952 953
953 if (prefixcmp(var, "annotate.") != 0) 954 if (prefixcmp(var, "annotate.") != 0)
@@ -955,7 +956,7 @@ static int annotate__config(const char *var, const char *value,
955 956
956 name = var + 9; 957 name = var + 9;
957 cfg = bsearch(name, annotate__configs, ARRAY_SIZE(annotate__configs), 958 cfg = bsearch(name, annotate__configs, ARRAY_SIZE(annotate__configs),
958 sizeof(struct annotate__config), annotate_config__cmp); 959 sizeof(struct annotate_config), annotate_config__cmp);
959 960
960 if (cfg == NULL) 961 if (cfg == NULL)
961 return -1; 962 return -1;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index ccc4bd161420..aa22704047d6 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -567,26 +567,128 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
567 return row - first_row; 567 return row - first_row;
568} 568}
569 569
570#define HPP__COLOR_FN(_name, _field) \ 570struct hpp_arg {
571static int hist_browser__hpp_color_ ## _name(struct perf_hpp *hpp, \ 571 struct ui_browser *b;
572 struct hist_entry *he) \ 572 char folded_sign;
573 bool current_entry;
574};
575
576static int __hpp__color_callchain(struct hpp_arg *arg)
577{
578 if (!symbol_conf.use_callchain)
579 return 0;
580
581 slsmg_printf("%c ", arg->folded_sign);
582 return 2;
583}
584
585static int __hpp__color_fmt(struct perf_hpp *hpp, struct hist_entry *he,
586 u64 (*get_field)(struct hist_entry *),
587 int (*callchain_cb)(struct hpp_arg *))
588{
589 int ret = 0;
590 double percent = 0.0;
591 struct hists *hists = he->hists;
592 struct hpp_arg *arg = hpp->ptr;
593
594 if (hists->stats.total_period)
595 percent = 100.0 * get_field(he) / hists->stats.total_period;
596
597 ui_browser__set_percent_color(arg->b, percent, arg->current_entry);
598
599 if (callchain_cb)
600 ret += callchain_cb(arg);
601
602 ret += scnprintf(hpp->buf, hpp->size, "%6.2f%%", percent);
603 slsmg_printf("%s", hpp->buf);
604
605 if (symbol_conf.event_group) {
606 int prev_idx, idx_delta;
607 struct perf_evsel *evsel = hists_to_evsel(hists);
608 struct hist_entry *pair;
609 int nr_members = evsel->nr_members;
610
611 if (nr_members <= 1)
612 goto out;
613
614 prev_idx = perf_evsel__group_idx(evsel);
615
616 list_for_each_entry(pair, &he->pairs.head, pairs.node) {
617 u64 period = get_field(pair);
618 u64 total = pair->hists->stats.total_period;
619
620 if (!total)
621 continue;
622
623 evsel = hists_to_evsel(pair->hists);
624 idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
625
626 while (idx_delta--) {
627 /*
628 * zero-fill group members in the middle which
629 * have no sample
630 */
631 ui_browser__set_percent_color(arg->b, 0.0,
632 arg->current_entry);
633 ret += scnprintf(hpp->buf, hpp->size,
634 " %6.2f%%", 0.0);
635 slsmg_printf("%s", hpp->buf);
636 }
637
638 percent = 100.0 * period / total;
639 ui_browser__set_percent_color(arg->b, percent,
640 arg->current_entry);
641 ret += scnprintf(hpp->buf, hpp->size,
642 " %6.2f%%", percent);
643 slsmg_printf("%s", hpp->buf);
644
645 prev_idx = perf_evsel__group_idx(evsel);
646 }
647
648 idx_delta = nr_members - prev_idx - 1;
649
650 while (idx_delta--) {
651 /*
652 * zero-fill group members at last which have no sample
653 */
654 ui_browser__set_percent_color(arg->b, 0.0,
655 arg->current_entry);
656 ret += scnprintf(hpp->buf, hpp->size,
657 " %6.2f%%", 0.0);
658 slsmg_printf("%s", hpp->buf);
659 }
660 }
661out:
662 if (!arg->current_entry || !arg->b->navkeypressed)
663 ui_browser__set_color(arg->b, HE_COLORSET_NORMAL);
664
665 return ret;
666}
667
668#define __HPP_COLOR_PERCENT_FN(_type, _field, _cb) \
669static u64 __hpp_get_##_field(struct hist_entry *he) \
670{ \
671 return he->stat._field; \
672} \
673 \
674static int hist_browser__hpp_color_##_type(struct perf_hpp *hpp, \
675 struct hist_entry *he) \
573{ \ 676{ \
574 struct hists *hists = he->hists; \ 677 return __hpp__color_fmt(hpp, he, __hpp_get_##_field, _cb); \
575 double percent = 100.0 * he->stat._field / hists->stats.total_period; \
576 *(double *)hpp->ptr = percent; \
577 return scnprintf(hpp->buf, hpp->size, "%6.2f%%", percent); \
578} 678}
579 679
580HPP__COLOR_FN(overhead, period) 680__HPP_COLOR_PERCENT_FN(overhead, period, __hpp__color_callchain)
581HPP__COLOR_FN(overhead_sys, period_sys) 681__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys, NULL)
582HPP__COLOR_FN(overhead_us, period_us) 682__HPP_COLOR_PERCENT_FN(overhead_us, period_us, NULL)
583HPP__COLOR_FN(overhead_guest_sys, period_guest_sys) 683__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys, NULL)
584HPP__COLOR_FN(overhead_guest_us, period_guest_us) 684__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us, NULL)
585 685
586#undef HPP__COLOR_FN 686#undef __HPP_COLOR_PERCENT_FN
587 687
588void hist_browser__init_hpp(void) 688void hist_browser__init_hpp(void)
589{ 689{
690 perf_hpp__column_enable(PERF_HPP__OVERHEAD);
691
590 perf_hpp__init(); 692 perf_hpp__init();
591 693
592 perf_hpp__format[PERF_HPP__OVERHEAD].color = 694 perf_hpp__format[PERF_HPP__OVERHEAD].color =
@@ -606,13 +708,13 @@ static int hist_browser__show_entry(struct hist_browser *browser,
606 unsigned short row) 708 unsigned short row)
607{ 709{
608 char s[256]; 710 char s[256];
609 double percent; 711 int printed = 0;
610 int i, printed = 0;
611 int width = browser->b.width; 712 int width = browser->b.width;
612 char folded_sign = ' '; 713 char folded_sign = ' ';
613 bool current_entry = ui_browser__is_current_entry(&browser->b, row); 714 bool current_entry = ui_browser__is_current_entry(&browser->b, row);
614 off_t row_offset = entry->row_offset; 715 off_t row_offset = entry->row_offset;
615 bool first = true; 716 bool first = true;
717 struct perf_hpp_fmt *fmt;
616 718
617 if (current_entry) { 719 if (current_entry) {
618 browser->he_selection = entry; 720 browser->he_selection = entry;
@@ -625,41 +727,30 @@ static int hist_browser__show_entry(struct hist_browser *browser,
625 } 727 }
626 728
627 if (row_offset == 0) { 729 if (row_offset == 0) {
730 struct hpp_arg arg = {
731 .b = &browser->b,
732 .folded_sign = folded_sign,
733 .current_entry = current_entry,
734 };
628 struct perf_hpp hpp = { 735 struct perf_hpp hpp = {
629 .buf = s, 736 .buf = s,
630 .size = sizeof(s), 737 .size = sizeof(s),
738 .ptr = &arg,
631 }; 739 };
632 740
633 ui_browser__gotorc(&browser->b, row, 0); 741 ui_browser__gotorc(&browser->b, row, 0);
634 742
635 for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { 743 perf_hpp__for_each_format(fmt) {
636 if (!perf_hpp__format[i].cond)
637 continue;
638
639 if (!first) { 744 if (!first) {
640 slsmg_printf(" "); 745 slsmg_printf(" ");
641 width -= 2; 746 width -= 2;
642 } 747 }
643 first = false; 748 first = false;
644 749
645 if (perf_hpp__format[i].color) { 750 if (fmt->color) {
646 hpp.ptr = &percent; 751 width -= fmt->color(&hpp, entry);
647 /* It will set percent for us. See HPP__COLOR_FN above. */
648 width -= perf_hpp__format[i].color(&hpp, entry);
649
650 ui_browser__set_percent_color(&browser->b, percent, current_entry);
651
652 if (i == PERF_HPP__OVERHEAD && symbol_conf.use_callchain) {
653 slsmg_printf("%c ", folded_sign);
654 width -= 2;
655 }
656
657 slsmg_printf("%s", s);
658
659 if (!current_entry || !browser->b.navkeypressed)
660 ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
661 } else { 752 } else {
662 width -= perf_hpp__format[i].entry(&hpp, entry); 753 width -= fmt->entry(&hpp, entry);
663 slsmg_printf("%s", s); 754 slsmg_printf("%s", s);
664 } 755 }
665 } 756 }
@@ -1098,6 +1189,21 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
1098 const struct thread *thread = hists->thread_filter; 1189 const struct thread *thread = hists->thread_filter;
1099 unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; 1190 unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
1100 u64 nr_events = hists->stats.total_period; 1191 u64 nr_events = hists->stats.total_period;
1192 struct perf_evsel *evsel = hists_to_evsel(hists);
1193 char buf[512];
1194 size_t buflen = sizeof(buf);
1195
1196 if (symbol_conf.event_group && evsel->nr_members > 1) {
1197 struct perf_evsel *pos;
1198
1199 perf_evsel__group_desc(evsel, buf, buflen);
1200 ev_name = buf;
1201
1202 for_each_group_member(pos, evsel) {
1203 nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
1204 nr_events += pos->hists.stats.total_period;
1205 }
1206 }
1101 1207
1102 nr_samples = convert_unit(nr_samples, &unit); 1208 nr_samples = convert_unit(nr_samples, &unit);
1103 printed = scnprintf(bf, size, 1209 printed = scnprintf(bf, size,
@@ -1135,6 +1241,96 @@ static inline bool is_report_browser(void *timer)
1135 return timer == NULL; 1241 return timer == NULL;
1136} 1242}
1137 1243
1244/*
1245 * Only runtime switching of perf data file will make "input_name" point
1246 * to a malloced buffer. So add "is_input_name_malloced" flag to decide
1247 * whether we need to call free() for current "input_name" during the switch.
1248 */
1249static bool is_input_name_malloced = false;
1250
1251static int switch_data_file(void)
1252{
1253 char *pwd, *options[32], *abs_path[32], *tmp;
1254 DIR *pwd_dir;
1255 int nr_options = 0, choice = -1, ret = -1;
1256 struct dirent *dent;
1257
1258 pwd = getenv("PWD");
1259 if (!pwd)
1260 return ret;
1261
1262 pwd_dir = opendir(pwd);
1263 if (!pwd_dir)
1264 return ret;
1265
1266 memset(options, 0, sizeof(options));
1267 memset(options, 0, sizeof(abs_path));
1268
1269 while ((dent = readdir(pwd_dir))) {
1270 char path[PATH_MAX];
1271 u64 magic;
1272 char *name = dent->d_name;
1273 FILE *file;
1274
1275 if (!(dent->d_type == DT_REG))
1276 continue;
1277
1278 snprintf(path, sizeof(path), "%s/%s", pwd, name);
1279
1280 file = fopen(path, "r");
1281 if (!file)
1282 continue;
1283
1284 if (fread(&magic, 1, 8, file) < 8)
1285 goto close_file_and_continue;
1286
1287 if (is_perf_magic(magic)) {
1288 options[nr_options] = strdup(name);
1289 if (!options[nr_options])
1290 goto close_file_and_continue;
1291
1292 abs_path[nr_options] = strdup(path);
1293 if (!abs_path[nr_options]) {
1294 free(options[nr_options]);
1295 ui__warning("Can't search all data files due to memory shortage.\n");
1296 fclose(file);
1297 break;
1298 }
1299
1300 nr_options++;
1301 }
1302
1303close_file_and_continue:
1304 fclose(file);
1305 if (nr_options >= 32) {
1306 ui__warning("Too many perf data files in PWD!\n"
1307 "Only the first 32 files will be listed.\n");
1308 break;
1309 }
1310 }
1311 closedir(pwd_dir);
1312
1313 if (nr_options) {
1314 choice = ui__popup_menu(nr_options, options);
1315 if (choice < nr_options && choice >= 0) {
1316 tmp = strdup(abs_path[choice]);
1317 if (tmp) {
1318 if (is_input_name_malloced)
1319 free((void *)input_name);
1320 input_name = tmp;
1321 is_input_name_malloced = true;
1322 ret = 0;
1323 } else
1324 ui__warning("Data switch failed due to memory shortage!\n");
1325 }
1326 }
1327
1328 free_popup_options(options, nr_options);
1329 free_popup_options(abs_path, nr_options);
1330 return ret;
1331}
1332
1333
1138static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, 1334static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1139 const char *helpline, const char *ev_name, 1335 const char *helpline, const char *ev_name,
1140 bool left_exits, 1336 bool left_exits,
@@ -1169,7 +1365,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1169 int choice = 0, 1365 int choice = 0,
1170 annotate = -2, zoom_dso = -2, zoom_thread = -2, 1366 annotate = -2, zoom_dso = -2, zoom_thread = -2,
1171 annotate_f = -2, annotate_t = -2, browse_map = -2; 1367 annotate_f = -2, annotate_t = -2, browse_map = -2;
1172 int scripts_comm = -2, scripts_symbol = -2, scripts_all = -2; 1368 int scripts_comm = -2, scripts_symbol = -2,
1369 scripts_all = -2, switch_data = -2;
1173 1370
1174 nr_options = 0; 1371 nr_options = 0;
1175 1372
@@ -1226,6 +1423,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1226 if (is_report_browser(hbt)) 1423 if (is_report_browser(hbt))
1227 goto do_scripts; 1424 goto do_scripts;
1228 continue; 1425 continue;
1426 case 's':
1427 if (is_report_browser(hbt))
1428 goto do_data_switch;
1429 continue;
1229 case K_F1: 1430 case K_F1:
1230 case 'h': 1431 case 'h':
1231 case '?': 1432 case '?':
@@ -1245,6 +1446,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1245 "d Zoom into current DSO\n" 1446 "d Zoom into current DSO\n"
1246 "t Zoom into current Thread\n" 1447 "t Zoom into current Thread\n"
1247 "r Run available scripts('perf report' only)\n" 1448 "r Run available scripts('perf report' only)\n"
1449 "s Switch to another data file in PWD ('perf report' only)\n"
1248 "P Print histograms to perf.hist.N\n" 1450 "P Print histograms to perf.hist.N\n"
1249 "V Verbose (DSO names in callchains, etc)\n" 1451 "V Verbose (DSO names in callchains, etc)\n"
1250 "/ Filter symbol by name"); 1452 "/ Filter symbol by name");
@@ -1352,6 +1554,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
1352 if (asprintf(&options[nr_options], "Run scripts for all samples") > 0) 1554 if (asprintf(&options[nr_options], "Run scripts for all samples") > 0)
1353 scripts_all = nr_options++; 1555 scripts_all = nr_options++;
1354 1556
1557 if (is_report_browser(hbt) && asprintf(&options[nr_options],
1558 "Switch to another data file in PWD") > 0)
1559 switch_data = nr_options++;
1355add_exit_option: 1560add_exit_option:
1356 options[nr_options++] = (char *)"Exit"; 1561 options[nr_options++] = (char *)"Exit";
1357retry_popup_menu: 1562retry_popup_menu:
@@ -1462,6 +1667,16 @@ do_scripts:
1462 1667
1463 script_browse(script_opt); 1668 script_browse(script_opt);
1464 } 1669 }
1670 /* Switch to another data file */
1671 else if (choice == switch_data) {
1672do_data_switch:
1673 if (!switch_data_file()) {
1674 key = K_SWITCH_INPUT_DATA;
1675 break;
1676 } else
1677 ui__warning("Won't switch the data files due to\n"
1678 "no valid data file get selected!\n");
1679 }
1465 } 1680 }
1466out_free_stack: 1681out_free_stack:
1467 pstack__delete(fstack); 1682 pstack__delete(fstack);
@@ -1494,6 +1709,16 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
1494 ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : 1709 ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
1495 HE_COLORSET_NORMAL); 1710 HE_COLORSET_NORMAL);
1496 1711
1712 if (symbol_conf.event_group && evsel->nr_members > 1) {
1713 struct perf_evsel *pos;
1714
1715 ev_name = perf_evsel__group_name(evsel);
1716
1717 for_each_group_member(pos, evsel) {
1718 nr_events += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
1719 }
1720 }
1721
1497 nr_events = convert_unit(nr_events, &unit); 1722 nr_events = convert_unit(nr_events, &unit);
1498 printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events, 1723 printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events,
1499 unit, unit == ' ' ? "" : " ", ev_name); 1724 unit, unit == ' ' ? "" : " ", ev_name);
@@ -1578,6 +1803,7 @@ browse_hists:
1578 "Do you really want to exit?")) 1803 "Do you really want to exit?"))
1579 continue; 1804 continue;
1580 /* Fall thru */ 1805 /* Fall thru */
1806 case K_SWITCH_INPUT_DATA:
1581 case 'q': 1807 case 'q':
1582 case CTRL('c'): 1808 case CTRL('c'):
1583 goto out; 1809 goto out;
@@ -1604,8 +1830,19 @@ out:
1604 return key; 1830 return key;
1605} 1831}
1606 1832
1833static bool filter_group_entries(struct ui_browser *self __maybe_unused,
1834 void *entry)
1835{
1836 struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);
1837
1838 if (symbol_conf.event_group && !perf_evsel__is_group_leader(evsel))
1839 return true;
1840
1841 return false;
1842}
1843
1607static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, 1844static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
1608 const char *help, 1845 int nr_entries, const char *help,
1609 struct hist_browser_timer *hbt, 1846 struct hist_browser_timer *hbt,
1610 struct perf_session_env *env) 1847 struct perf_session_env *env)
1611{ 1848{
@@ -1616,7 +1853,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
1616 .refresh = ui_browser__list_head_refresh, 1853 .refresh = ui_browser__list_head_refresh,
1617 .seek = ui_browser__list_head_seek, 1854 .seek = ui_browser__list_head_seek,
1618 .write = perf_evsel_menu__write, 1855 .write = perf_evsel_menu__write,
1619 .nr_entries = evlist->nr_entries, 1856 .filter = filter_group_entries,
1857 .nr_entries = nr_entries,
1620 .priv = evlist, 1858 .priv = evlist,
1621 }, 1859 },
1622 .env = env, 1860 .env = env,
@@ -1632,20 +1870,37 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
1632 menu.b.width = line_len; 1870 menu.b.width = line_len;
1633 } 1871 }
1634 1872
1635 return perf_evsel_menu__run(&menu, evlist->nr_entries, help, hbt); 1873 return perf_evsel_menu__run(&menu, nr_entries, help, hbt);
1636} 1874}
1637 1875
1638int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, 1876int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
1639 struct hist_browser_timer *hbt, 1877 struct hist_browser_timer *hbt,
1640 struct perf_session_env *env) 1878 struct perf_session_env *env)
1641{ 1879{
1642 if (evlist->nr_entries == 1) { 1880 int nr_entries = evlist->nr_entries;
1881
1882single_entry:
1883 if (nr_entries == 1) {
1643 struct perf_evsel *first = list_entry(evlist->entries.next, 1884 struct perf_evsel *first = list_entry(evlist->entries.next,
1644 struct perf_evsel, node); 1885 struct perf_evsel, node);
1645 const char *ev_name = perf_evsel__name(first); 1886 const char *ev_name = perf_evsel__name(first);
1646 return perf_evsel__hists_browse(first, evlist->nr_entries, help, 1887
1888 return perf_evsel__hists_browse(first, nr_entries, help,
1647 ev_name, false, hbt, env); 1889 ev_name, false, hbt, env);
1648 } 1890 }
1649 1891
1650 return __perf_evlist__tui_browse_hists(evlist, help, hbt, env); 1892 if (symbol_conf.event_group) {
1893 struct perf_evsel *pos;
1894
1895 nr_entries = 0;
1896 list_for_each_entry(pos, &evlist->entries, node)
1897 if (perf_evsel__is_group_leader(pos))
1898 nr_entries++;
1899
1900 if (nr_entries == 1)
1901 goto single_entry;
1902 }
1903
1904 return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
1905 hbt, env);
1651} 1906}
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
new file mode 100644
index 000000000000..7d8dc581a545
--- /dev/null
+++ b/tools/perf/ui/gtk/annotate.c
@@ -0,0 +1,229 @@
1#include "gtk.h"
2#include "util/debug.h"
3#include "util/annotate.h"
4#include "ui/helpline.h"
5
6
7enum {
8 ANN_COL__PERCENT,
9 ANN_COL__OFFSET,
10 ANN_COL__LINE,
11
12 MAX_ANN_COLS
13};
14
15static const char *const col_names[] = {
16 "Overhead",
17 "Offset",
18 "Line"
19};
20
21static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym,
22 struct disasm_line *dl, int evidx)
23{
24 struct sym_hist *symhist;
25 double percent = 0.0;
26 const char *markup;
27 int ret = 0;
28
29 strcpy(buf, "");
30
31 if (dl->offset == (s64) -1)
32 return 0;
33
34 symhist = annotation__histogram(symbol__annotation(sym), evidx);
35 if (!symhist->addr[dl->offset])
36 return 0;
37
38 percent = 100.0 * symhist->addr[dl->offset] / symhist->sum;
39
40 markup = perf_gtk__get_percent_color(percent);
41 if (markup)
42 ret += scnprintf(buf, size, "%s", markup);
43 ret += scnprintf(buf + ret, size - ret, "%6.2f%%", percent);
44 if (markup)
45 ret += scnprintf(buf + ret, size - ret, "</span>");
46
47 return ret;
48}
49
50static int perf_gtk__get_offset(char *buf, size_t size, struct symbol *sym,
51 struct map *map, struct disasm_line *dl)
52{
53 u64 start = map__rip_2objdump(map, sym->start);
54
55 strcpy(buf, "");
56
57 if (dl->offset == (s64) -1)
58 return 0;
59
60 return scnprintf(buf, size, "%"PRIx64, start + dl->offset);
61}
62
63static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl)
64{
65 int ret = 0;
66 char *line = g_markup_escape_text(dl->line, -1);
67 const char *markup = "<span fgcolor='gray'>";
68
69 strcpy(buf, "");
70
71 if (!line)
72 return 0;
73
74 if (dl->offset != (s64) -1)
75 markup = NULL;
76
77 if (markup)
78 ret += scnprintf(buf, size, "%s", markup);
79 ret += scnprintf(buf + ret, size - ret, "%s", line);
80 if (markup)
81 ret += scnprintf(buf + ret, size - ret, "</span>");
82
83 g_free(line);
84 return ret;
85}
86
87static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym,
88 struct map *map, int evidx,
89 struct hist_browser_timer *hbt __maybe_unused)
90{
91 struct disasm_line *pos, *n;
92 struct annotation *notes;
93 GType col_types[MAX_ANN_COLS];
94 GtkCellRenderer *renderer;
95 GtkListStore *store;
96 GtkWidget *view;
97 int i;
98 char s[512];
99
100 notes = symbol__annotation(sym);
101
102 for (i = 0; i < MAX_ANN_COLS; i++) {
103 col_types[i] = G_TYPE_STRING;
104 }
105 store = gtk_list_store_newv(MAX_ANN_COLS, col_types);
106
107 view = gtk_tree_view_new();
108 renderer = gtk_cell_renderer_text_new();
109
110 for (i = 0; i < MAX_ANN_COLS; i++) {
111 gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
112 -1, col_names[i], renderer, "markup",
113 i, NULL);
114 }
115
116 gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
117 g_object_unref(GTK_TREE_MODEL(store));
118
119 list_for_each_entry(pos, &notes->src->source, node) {
120 GtkTreeIter iter;
121
122 gtk_list_store_append(store, &iter);
123
124 if (perf_gtk__get_percent(s, sizeof(s), sym, pos, evidx))
125 gtk_list_store_set(store, &iter, ANN_COL__PERCENT, s, -1);
126 if (perf_gtk__get_offset(s, sizeof(s), sym, map, pos))
127 gtk_list_store_set(store, &iter, ANN_COL__OFFSET, s, -1);
128 if (perf_gtk__get_line(s, sizeof(s), pos))
129 gtk_list_store_set(store, &iter, ANN_COL__LINE, s, -1);
130 }
131
132 gtk_container_add(GTK_CONTAINER(window), view);
133
134 list_for_each_entry_safe(pos, n, &notes->src->source, node) {
135 list_del(&pos->node);
136 disasm_line__free(pos);
137 }
138
139 return 0;
140}
141
142int symbol__gtk_annotate(struct symbol *sym, struct map *map, int evidx,
143 struct hist_browser_timer *hbt)
144{
145 GtkWidget *window;
146 GtkWidget *notebook;
147 GtkWidget *scrolled_window;
148 GtkWidget *tab_label;
149
150 if (map->dso->annotate_warned)
151 return -1;
152
153 if (symbol__annotate(sym, map, 0) < 0) {
154 ui__error("%s", ui_helpline__current);
155 return -1;
156 }
157
158 if (perf_gtk__is_active_context(pgctx)) {
159 window = pgctx->main_window;
160 notebook = pgctx->notebook;
161 } else {
162 GtkWidget *vbox;
163 GtkWidget *infobar;
164 GtkWidget *statbar;
165
166 signal(SIGSEGV, perf_gtk__signal);
167 signal(SIGFPE, perf_gtk__signal);
168 signal(SIGINT, perf_gtk__signal);
169 signal(SIGQUIT, perf_gtk__signal);
170 signal(SIGTERM, perf_gtk__signal);
171
172 window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
173 gtk_window_set_title(GTK_WINDOW(window), "perf annotate");
174
175 g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
176
177 pgctx = perf_gtk__activate_context(window);
178 if (!pgctx)
179 return -1;
180
181 vbox = gtk_vbox_new(FALSE, 0);
182 notebook = gtk_notebook_new();
183 pgctx->notebook = notebook;
184
185 gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
186
187 infobar = perf_gtk__setup_info_bar();
188 if (infobar) {
189 gtk_box_pack_start(GTK_BOX(vbox), infobar,
190 FALSE, FALSE, 0);
191 }
192
193 statbar = perf_gtk__setup_statusbar();
194 gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
195
196 gtk_container_add(GTK_CONTAINER(window), vbox);
197 }
198
199 scrolled_window = gtk_scrolled_window_new(NULL, NULL);
200 tab_label = gtk_label_new(sym->name);
201
202 gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window),
203 GTK_POLICY_AUTOMATIC,
204 GTK_POLICY_AUTOMATIC);
205
206 gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window,
207 tab_label);
208
209 perf_gtk__annotate_symbol(scrolled_window, sym, map, evidx, hbt);
210 return 0;
211}
212
213void perf_gtk__show_annotations(void)
214{
215 GtkWidget *window;
216
217 if (!perf_gtk__is_active_context(pgctx))
218 return;
219
220 window = pgctx->main_window;
221 gtk_widget_show_all(window);
222
223 perf_gtk__resize_window(window);
224 gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
225
226 gtk_main();
227
228 perf_gtk__deactivate_context(&pgctx);
229}
diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c
index 253b6219a39e..c95012cdb438 100644
--- a/tools/perf/ui/gtk/browser.c
+++ b/tools/perf/ui/gtk/browser.c
@@ -8,15 +8,13 @@
8 8
9#include <signal.h> 9#include <signal.h>
10 10
11#define MAX_COLUMNS 32 11void perf_gtk__signal(int sig)
12
13static void perf_gtk__signal(int sig)
14{ 12{
15 perf_gtk__exit(false); 13 perf_gtk__exit(false);
16 psignal(sig, "perf"); 14 psignal(sig, "perf");
17} 15}
18 16
19static void perf_gtk__resize_window(GtkWidget *window) 17void perf_gtk__resize_window(GtkWidget *window)
20{ 18{
21 GdkRectangle rect; 19 GdkRectangle rect;
22 GdkScreen *screen; 20 GdkScreen *screen;
@@ -36,7 +34,7 @@ static void perf_gtk__resize_window(GtkWidget *window)
36 gtk_window_resize(GTK_WINDOW(window), width, height); 34 gtk_window_resize(GTK_WINDOW(window), width, height);
37} 35}
38 36
39static const char *perf_gtk__get_percent_color(double percent) 37const char *perf_gtk__get_percent_color(double percent)
40{ 38{
41 if (percent >= MIN_RED) 39 if (percent >= MIN_RED)
42 return "<span fgcolor='red'>"; 40 return "<span fgcolor='red'>";
@@ -45,155 +43,8 @@ static const char *perf_gtk__get_percent_color(double percent)
45 return NULL; 43 return NULL;
46} 44}
47 45
48#define HPP__COLOR_FN(_name, _field) \
49static int perf_gtk__hpp_color_ ## _name(struct perf_hpp *hpp, \
50 struct hist_entry *he) \
51{ \
52 struct hists *hists = he->hists; \
53 double percent = 100.0 * he->stat._field / hists->stats.total_period; \
54 const char *markup; \
55 int ret = 0; \
56 \
57 markup = perf_gtk__get_percent_color(percent); \
58 if (markup) \
59 ret += scnprintf(hpp->buf, hpp->size, "%s", markup); \
60 ret += scnprintf(hpp->buf + ret, hpp->size - ret, "%6.2f%%", percent); \
61 if (markup) \
62 ret += scnprintf(hpp->buf + ret, hpp->size - ret, "</span>"); \
63 \
64 return ret; \
65}
66
67HPP__COLOR_FN(overhead, period)
68HPP__COLOR_FN(overhead_sys, period_sys)
69HPP__COLOR_FN(overhead_us, period_us)
70HPP__COLOR_FN(overhead_guest_sys, period_guest_sys)
71HPP__COLOR_FN(overhead_guest_us, period_guest_us)
72
73#undef HPP__COLOR_FN
74
75void perf_gtk__init_hpp(void)
76{
77 perf_hpp__init();
78
79 perf_hpp__format[PERF_HPP__OVERHEAD].color =
80 perf_gtk__hpp_color_overhead;
81 perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color =
82 perf_gtk__hpp_color_overhead_sys;
83 perf_hpp__format[PERF_HPP__OVERHEAD_US].color =
84 perf_gtk__hpp_color_overhead_us;
85 perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color =
86 perf_gtk__hpp_color_overhead_guest_sys;
87 perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
88 perf_gtk__hpp_color_overhead_guest_us;
89}
90
91static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists)
92{
93 GType col_types[MAX_COLUMNS];
94 GtkCellRenderer *renderer;
95 struct sort_entry *se;
96 GtkListStore *store;
97 struct rb_node *nd;
98 GtkWidget *view;
99 int i, col_idx;
100 int nr_cols;
101 char s[512];
102
103 struct perf_hpp hpp = {
104 .buf = s,
105 .size = sizeof(s),
106 };
107
108 nr_cols = 0;
109
110 for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
111 if (!perf_hpp__format[i].cond)
112 continue;
113
114 col_types[nr_cols++] = G_TYPE_STRING;
115 }
116
117 list_for_each_entry(se, &hist_entry__sort_list, list) {
118 if (se->elide)
119 continue;
120
121 col_types[nr_cols++] = G_TYPE_STRING;
122 }
123
124 store = gtk_list_store_newv(nr_cols, col_types);
125
126 view = gtk_tree_view_new();
127
128 renderer = gtk_cell_renderer_text_new();
129
130 col_idx = 0;
131
132 for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
133 if (!perf_hpp__format[i].cond)
134 continue;
135
136 perf_hpp__format[i].header(&hpp);
137
138 gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
139 -1, s,
140 renderer, "markup",
141 col_idx++, NULL);
142 }
143
144 list_for_each_entry(se, &hist_entry__sort_list, list) {
145 if (se->elide)
146 continue;
147
148 gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
149 -1, se->se_header,
150 renderer, "text",
151 col_idx++, NULL);
152 }
153
154 gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
155
156 g_object_unref(GTK_TREE_MODEL(store));
157
158 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
159 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
160 GtkTreeIter iter;
161
162 if (h->filtered)
163 continue;
164
165 gtk_list_store_append(store, &iter);
166
167 col_idx = 0;
168
169 for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
170 if (!perf_hpp__format[i].cond)
171 continue;
172
173 if (perf_hpp__format[i].color)
174 perf_hpp__format[i].color(&hpp, h);
175 else
176 perf_hpp__format[i].entry(&hpp, h);
177
178 gtk_list_store_set(store, &iter, col_idx++, s, -1);
179 }
180
181 list_for_each_entry(se, &hist_entry__sort_list, list) {
182 if (se->elide)
183 continue;
184
185 se->se_snprintf(h, s, ARRAY_SIZE(s),
186 hists__col_len(hists, se->se_width_idx));
187
188 gtk_list_store_set(store, &iter, col_idx++, s, -1);
189 }
190 }
191
192 gtk_container_add(GTK_CONTAINER(window), view);
193}
194
195#ifdef HAVE_GTK_INFO_BAR 46#ifdef HAVE_GTK_INFO_BAR
196static GtkWidget *perf_gtk__setup_info_bar(void) 47GtkWidget *perf_gtk__setup_info_bar(void)
197{ 48{
198 GtkWidget *info_bar; 49 GtkWidget *info_bar;
199 GtkWidget *label; 50 GtkWidget *label;
@@ -220,7 +71,7 @@ static GtkWidget *perf_gtk__setup_info_bar(void)
220} 71}
221#endif 72#endif
222 73
223static GtkWidget *perf_gtk__setup_statusbar(void) 74GtkWidget *perf_gtk__setup_statusbar(void)
224{ 75{
225 GtkWidget *stbar; 76 GtkWidget *stbar;
226 unsigned ctxid; 77 unsigned ctxid;
@@ -234,79 +85,3 @@ static GtkWidget *perf_gtk__setup_statusbar(void)
234 85
235 return stbar; 86 return stbar;
236} 87}
237
238int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
239 const char *help,
240 struct hist_browser_timer *hbt __maybe_unused)
241{
242 struct perf_evsel *pos;
243 GtkWidget *vbox;
244 GtkWidget *notebook;
245 GtkWidget *info_bar;
246 GtkWidget *statbar;
247 GtkWidget *window;
248
249 signal(SIGSEGV, perf_gtk__signal);
250 signal(SIGFPE, perf_gtk__signal);
251 signal(SIGINT, perf_gtk__signal);
252 signal(SIGQUIT, perf_gtk__signal);
253 signal(SIGTERM, perf_gtk__signal);
254
255 window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
256
257 gtk_window_set_title(GTK_WINDOW(window), "perf report");
258
259 g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
260
261 pgctx = perf_gtk__activate_context(window);
262 if (!pgctx)
263 return -1;
264
265 vbox = gtk_vbox_new(FALSE, 0);
266
267 notebook = gtk_notebook_new();
268
269 list_for_each_entry(pos, &evlist->entries, node) {
270 struct hists *hists = &pos->hists;
271 const char *evname = perf_evsel__name(pos);
272 GtkWidget *scrolled_window;
273 GtkWidget *tab_label;
274
275 scrolled_window = gtk_scrolled_window_new(NULL, NULL);
276
277 gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window),
278 GTK_POLICY_AUTOMATIC,
279 GTK_POLICY_AUTOMATIC);
280
281 perf_gtk__show_hists(scrolled_window, hists);
282
283 tab_label = gtk_label_new(evname);
284
285 gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label);
286 }
287
288 gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
289
290 info_bar = perf_gtk__setup_info_bar();
291 if (info_bar)
292 gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0);
293
294 statbar = perf_gtk__setup_statusbar();
295 gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
296
297 gtk_container_add(GTK_CONTAINER(window), vbox);
298
299 gtk_widget_show_all(window);
300
301 perf_gtk__resize_window(window);
302
303 gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
304
305 ui_helpline__push(help);
306
307 gtk_main();
308
309 perf_gtk__deactivate_context(&pgctx);
310
311 return 0;
312}
diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h
index 856320e2cc05..3d96785ef155 100644
--- a/tools/perf/ui/gtk/gtk.h
+++ b/tools/perf/ui/gtk/gtk.h
@@ -10,6 +10,7 @@
10 10
11struct perf_gtk_context { 11struct perf_gtk_context {
12 GtkWidget *main_window; 12 GtkWidget *main_window;
13 GtkWidget *notebook;
13 14
14#ifdef HAVE_GTK_INFO_BAR 15#ifdef HAVE_GTK_INFO_BAR
15 GtkWidget *info_bar; 16 GtkWidget *info_bar;
@@ -33,7 +34,14 @@ void perf_gtk__init_helpline(void);
33void perf_gtk__init_progress(void); 34void perf_gtk__init_progress(void);
34void perf_gtk__init_hpp(void); 35void perf_gtk__init_hpp(void);
35 36
36#ifndef HAVE_GTK_INFO_BAR 37void perf_gtk__signal(int sig);
38void perf_gtk__resize_window(GtkWidget *window);
39const char *perf_gtk__get_percent_color(double percent);
40GtkWidget *perf_gtk__setup_statusbar(void);
41
42#ifdef HAVE_GTK_INFO_BAR
43GtkWidget *perf_gtk__setup_info_bar(void);
44#else
37static inline GtkWidget *perf_gtk__setup_info_bar(void) 45static inline GtkWidget *perf_gtk__setup_info_bar(void)
38{ 46{
39 return NULL; 47 return NULL;
diff --git a/tools/perf/ui/gtk/helpline.c b/tools/perf/ui/gtk/helpline.c
index 5db4432ff12a..3388cbd12186 100644
--- a/tools/perf/ui/gtk/helpline.c
+++ b/tools/perf/ui/gtk/helpline.c
@@ -24,17 +24,7 @@ static void gtk_helpline_push(const char *msg)
24 pgctx->statbar_ctx_id, msg); 24 pgctx->statbar_ctx_id, msg);
25} 25}
26 26
27static struct ui_helpline gtk_helpline_fns = { 27static int gtk_helpline_show(const char *fmt, va_list ap)
28 .pop = gtk_helpline_pop,
29 .push = gtk_helpline_push,
30};
31
32void perf_gtk__init_helpline(void)
33{
34 helpline_fns = &gtk_helpline_fns;
35}
36
37int perf_gtk__show_helpline(const char *fmt, va_list ap)
38{ 28{
39 int ret; 29 int ret;
40 char *ptr; 30 char *ptr;
@@ -54,3 +44,14 @@ int perf_gtk__show_helpline(const char *fmt, va_list ap)
54 44
55 return ret; 45 return ret;
56} 46}
47
48static struct ui_helpline gtk_helpline_fns = {
49 .pop = gtk_helpline_pop,
50 .push = gtk_helpline_push,
51 .show = gtk_helpline_show,
52};
53
54void perf_gtk__init_helpline(void)
55{
56 helpline_fns = &gtk_helpline_fns;
57}
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
new file mode 100644
index 000000000000..1e764a8ad259
--- /dev/null
+++ b/tools/perf/ui/gtk/hists.c
@@ -0,0 +1,312 @@
1#include "../evlist.h"
2#include "../cache.h"
3#include "../evsel.h"
4#include "../sort.h"
5#include "../hist.h"
6#include "../helpline.h"
7#include "gtk.h"
8
9#define MAX_COLUMNS 32
10
11static int __percent_color_snprintf(char *buf, size_t size, double percent)
12{
13 int ret = 0;
14 const char *markup;
15
16 markup = perf_gtk__get_percent_color(percent);
17 if (markup)
18 ret += scnprintf(buf, size, markup);
19
20 ret += scnprintf(buf + ret, size - ret, " %6.2f%%", percent);
21
22 if (markup)
23 ret += scnprintf(buf + ret, size - ret, "</span>");
24
25 return ret;
26}
27
28
29static int __hpp__color_fmt(struct perf_hpp *hpp, struct hist_entry *he,
30 u64 (*get_field)(struct hist_entry *))
31{
32 int ret;
33 double percent = 0.0;
34 struct hists *hists = he->hists;
35
36 if (hists->stats.total_period)
37 percent = 100.0 * get_field(he) / hists->stats.total_period;
38
39 ret = __percent_color_snprintf(hpp->buf, hpp->size, percent);
40
41 if (symbol_conf.event_group) {
42 int prev_idx, idx_delta;
43 struct perf_evsel *evsel = hists_to_evsel(hists);
44 struct hist_entry *pair;
45 int nr_members = evsel->nr_members;
46
47 if (nr_members <= 1)
48 return ret;
49
50 prev_idx = perf_evsel__group_idx(evsel);
51
52 list_for_each_entry(pair, &he->pairs.head, pairs.node) {
53 u64 period = get_field(pair);
54 u64 total = pair->hists->stats.total_period;
55
56 evsel = hists_to_evsel(pair->hists);
57 idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
58
59 while (idx_delta--) {
60 /*
61 * zero-fill group members in the middle which
62 * have no sample
63 */
64 ret += __percent_color_snprintf(hpp->buf + ret,
65 hpp->size - ret,
66 0.0);
67 }
68
69 percent = 100.0 * period / total;
70 ret += __percent_color_snprintf(hpp->buf + ret,
71 hpp->size - ret,
72 percent);
73
74 prev_idx = perf_evsel__group_idx(evsel);
75 }
76
77 idx_delta = nr_members - prev_idx - 1;
78
79 while (idx_delta--) {
80 /*
81 * zero-fill group members at last which have no sample
82 */
83 ret += __percent_color_snprintf(hpp->buf + ret,
84 hpp->size - ret,
85 0.0);
86 }
87 }
88 return ret;
89}
90
91#define __HPP_COLOR_PERCENT_FN(_type, _field) \
92static u64 he_get_##_field(struct hist_entry *he) \
93{ \
94 return he->stat._field; \
95} \
96 \
97static int perf_gtk__hpp_color_##_type(struct perf_hpp *hpp, \
98 struct hist_entry *he) \
99{ \
100 return __hpp__color_fmt(hpp, he, he_get_##_field); \
101}
102
103__HPP_COLOR_PERCENT_FN(overhead, period)
104__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
105__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
106__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
107__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
108
109#undef __HPP_COLOR_PERCENT_FN
110
111
112void perf_gtk__init_hpp(void)
113{
114 perf_hpp__column_enable(PERF_HPP__OVERHEAD);
115
116 perf_hpp__init();
117
118 perf_hpp__format[PERF_HPP__OVERHEAD].color =
119 perf_gtk__hpp_color_overhead;
120 perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color =
121 perf_gtk__hpp_color_overhead_sys;
122 perf_hpp__format[PERF_HPP__OVERHEAD_US].color =
123 perf_gtk__hpp_color_overhead_us;
124 perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color =
125 perf_gtk__hpp_color_overhead_guest_sys;
126 perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
127 perf_gtk__hpp_color_overhead_guest_us;
128}
129
130static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists)
131{
132 struct perf_hpp_fmt *fmt;
133 GType col_types[MAX_COLUMNS];
134 GtkCellRenderer *renderer;
135 struct sort_entry *se;
136 GtkListStore *store;
137 struct rb_node *nd;
138 GtkWidget *view;
139 int col_idx;
140 int nr_cols;
141 char s[512];
142
143 struct perf_hpp hpp = {
144 .buf = s,
145 .size = sizeof(s),
146 .ptr = hists_to_evsel(hists),
147 };
148
149 nr_cols = 0;
150
151 perf_hpp__for_each_format(fmt)
152 col_types[nr_cols++] = G_TYPE_STRING;
153
154 list_for_each_entry(se, &hist_entry__sort_list, list) {
155 if (se->elide)
156 continue;
157
158 col_types[nr_cols++] = G_TYPE_STRING;
159 }
160
161 store = gtk_list_store_newv(nr_cols, col_types);
162
163 view = gtk_tree_view_new();
164
165 renderer = gtk_cell_renderer_text_new();
166
167 col_idx = 0;
168
169 perf_hpp__for_each_format(fmt) {
170 fmt->header(&hpp);
171
172 gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
173 -1, ltrim(s),
174 renderer, "markup",
175 col_idx++, NULL);
176 }
177
178 list_for_each_entry(se, &hist_entry__sort_list, list) {
179 if (se->elide)
180 continue;
181
182 gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
183 -1, se->se_header,
184 renderer, "text",
185 col_idx++, NULL);
186 }
187
188 gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
189
190 g_object_unref(GTK_TREE_MODEL(store));
191
192 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
193 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
194 GtkTreeIter iter;
195
196 if (h->filtered)
197 continue;
198
199 gtk_list_store_append(store, &iter);
200
201 col_idx = 0;
202
203 perf_hpp__for_each_format(fmt) {
204 if (fmt->color)
205 fmt->color(&hpp, h);
206 else
207 fmt->entry(&hpp, h);
208
209 gtk_list_store_set(store, &iter, col_idx++, s, -1);
210 }
211
212 list_for_each_entry(se, &hist_entry__sort_list, list) {
213 if (se->elide)
214 continue;
215
216 se->se_snprintf(h, s, ARRAY_SIZE(s),
217 hists__col_len(hists, se->se_width_idx));
218
219 gtk_list_store_set(store, &iter, col_idx++, s, -1);
220 }
221 }
222
223 gtk_container_add(GTK_CONTAINER(window), view);
224}
225
226int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
227 const char *help,
228 struct hist_browser_timer *hbt __maybe_unused)
229{
230 struct perf_evsel *pos;
231 GtkWidget *vbox;
232 GtkWidget *notebook;
233 GtkWidget *info_bar;
234 GtkWidget *statbar;
235 GtkWidget *window;
236
237 signal(SIGSEGV, perf_gtk__signal);
238 signal(SIGFPE, perf_gtk__signal);
239 signal(SIGINT, perf_gtk__signal);
240 signal(SIGQUIT, perf_gtk__signal);
241 signal(SIGTERM, perf_gtk__signal);
242
243 window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
244
245 gtk_window_set_title(GTK_WINDOW(window), "perf report");
246
247 g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
248
249 pgctx = perf_gtk__activate_context(window);
250 if (!pgctx)
251 return -1;
252
253 vbox = gtk_vbox_new(FALSE, 0);
254
255 notebook = gtk_notebook_new();
256
257 gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
258
259 info_bar = perf_gtk__setup_info_bar();
260 if (info_bar)
261 gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0);
262
263 statbar = perf_gtk__setup_statusbar();
264 gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
265
266 gtk_container_add(GTK_CONTAINER(window), vbox);
267
268 list_for_each_entry(pos, &evlist->entries, node) {
269 struct hists *hists = &pos->hists;
270 const char *evname = perf_evsel__name(pos);
271 GtkWidget *scrolled_window;
272 GtkWidget *tab_label;
273 char buf[512];
274 size_t size = sizeof(buf);
275
276 if (symbol_conf.event_group) {
277 if (!perf_evsel__is_group_leader(pos))
278 continue;
279
280 if (pos->nr_members > 1) {
281 perf_evsel__group_desc(pos, buf, size);
282 evname = buf;
283 }
284 }
285
286 scrolled_window = gtk_scrolled_window_new(NULL, NULL);
287
288 gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window),
289 GTK_POLICY_AUTOMATIC,
290 GTK_POLICY_AUTOMATIC);
291
292 perf_gtk__show_hists(scrolled_window, hists);
293
294 tab_label = gtk_label_new(evname);
295
296 gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label);
297 }
298
299 gtk_widget_show_all(window);
300
301 perf_gtk__resize_window(window);
302
303 gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
304
305 ui_helpline__push(help);
306
307 gtk_main();
308
309 perf_gtk__deactivate_context(&pgctx);
310
311 return 0;
312}
diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c
index a49bcf3c190b..700fb3cfa1c7 100644
--- a/tools/perf/ui/helpline.c
+++ b/tools/perf/ui/helpline.c
@@ -16,9 +16,16 @@ static void nop_helpline__push(const char *msg __maybe_unused)
16{ 16{
17} 17}
18 18
19static int nop_helpline__show(const char *fmt __maybe_unused,
20 va_list ap __maybe_unused)
21{
22 return 0;
23}
24
19static struct ui_helpline default_helpline_fns = { 25static struct ui_helpline default_helpline_fns = {
20 .pop = nop_helpline__pop, 26 .pop = nop_helpline__pop,
21 .push = nop_helpline__push, 27 .push = nop_helpline__push,
28 .show = nop_helpline__show,
22}; 29};
23 30
24struct ui_helpline *helpline_fns = &default_helpline_fns; 31struct ui_helpline *helpline_fns = &default_helpline_fns;
@@ -59,3 +66,8 @@ void ui_helpline__puts(const char *msg)
59 ui_helpline__pop(); 66 ui_helpline__pop();
60 ui_helpline__push(msg); 67 ui_helpline__push(msg);
61} 68}
69
70int ui_helpline__vshow(const char *fmt, va_list ap)
71{
72 return helpline_fns->show(fmt, ap);
73}
diff --git a/tools/perf/ui/helpline.h b/tools/perf/ui/helpline.h
index baa28a4d16b9..46181f4fc07e 100644
--- a/tools/perf/ui/helpline.h
+++ b/tools/perf/ui/helpline.h
@@ -9,6 +9,7 @@
9struct ui_helpline { 9struct ui_helpline {
10 void (*pop)(void); 10 void (*pop)(void);
11 void (*push)(const char *msg); 11 void (*push)(const char *msg);
12 int (*show)(const char *fmt, va_list ap);
12}; 13};
13 14
14extern struct ui_helpline *helpline_fns; 15extern struct ui_helpline *helpline_fns;
@@ -20,28 +21,9 @@ void ui_helpline__push(const char *msg);
20void ui_helpline__vpush(const char *fmt, va_list ap); 21void ui_helpline__vpush(const char *fmt, va_list ap);
21void ui_helpline__fpush(const char *fmt, ...); 22void ui_helpline__fpush(const char *fmt, ...);
22void ui_helpline__puts(const char *msg); 23void ui_helpline__puts(const char *msg);
24int ui_helpline__vshow(const char *fmt, va_list ap);
23 25
24extern char ui_helpline__current[512]; 26extern char ui_helpline__current[512];
25
26#ifdef NEWT_SUPPORT
27extern char ui_helpline__last_msg[]; 27extern char ui_helpline__last_msg[];
28int ui_helpline__show_help(const char *format, va_list ap);
29#else
30static inline int ui_helpline__show_help(const char *format __maybe_unused,
31 va_list ap __maybe_unused)
32{
33 return 0;
34}
35#endif /* NEWT_SUPPORT */
36
37#ifdef GTK2_SUPPORT
38int perf_gtk__show_helpline(const char *format, va_list ap);
39#else
40static inline int perf_gtk__show_helpline(const char *format __maybe_unused,
41 va_list ap __maybe_unused)
42{
43 return 0;
44}
45#endif /* GTK2_SUPPORT */
46 28
47#endif /* _PERF_UI_HELPLINE_H_ */ 29#endif /* _PERF_UI_HELPLINE_H_ */
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index aa84130024d5..d671e63aa351 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -3,151 +3,163 @@
3#include "../util/hist.h" 3#include "../util/hist.h"
4#include "../util/util.h" 4#include "../util/util.h"
5#include "../util/sort.h" 5#include "../util/sort.h"
6 6#include "../util/evsel.h"
7 7
8/* hist period print (hpp) functions */ 8/* hist period print (hpp) functions */
9static int hpp__header_overhead(struct perf_hpp *hpp)
10{
11 return scnprintf(hpp->buf, hpp->size, "Overhead");
12}
13
14static int hpp__width_overhead(struct perf_hpp *hpp __maybe_unused)
15{
16 return 8;
17}
18
19static int hpp__color_overhead(struct perf_hpp *hpp, struct hist_entry *he)
20{
21 struct hists *hists = he->hists;
22 double percent = 100.0 * he->stat.period / hists->stats.total_period;
23 9
24 return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent); 10typedef int (*hpp_snprint_fn)(char *buf, size_t size, const char *fmt, ...);
25}
26 11
27static int hpp__entry_overhead(struct perf_hpp *hpp, struct hist_entry *he) 12static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
13 u64 (*get_field)(struct hist_entry *),
14 const char *fmt, hpp_snprint_fn print_fn,
15 bool fmt_percent)
28{ 16{
17 int ret;
29 struct hists *hists = he->hists; 18 struct hists *hists = he->hists;
30 double percent = 100.0 * he->stat.period / hists->stats.total_period;
31 const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%%";
32
33 return scnprintf(hpp->buf, hpp->size, fmt, percent);
34}
35 19
36static int hpp__header_overhead_sys(struct perf_hpp *hpp) 20 if (fmt_percent) {
37{ 21 double percent = 0.0;
38 const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
39
40 return scnprintf(hpp->buf, hpp->size, fmt, "sys");
41}
42 22
43static int hpp__width_overhead_sys(struct perf_hpp *hpp __maybe_unused) 23 if (hists->stats.total_period)
44{ 24 percent = 100.0 * get_field(he) /
45 return 7; 25 hists->stats.total_period;
46}
47 26
48static int hpp__color_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he) 27 ret = print_fn(hpp->buf, hpp->size, fmt, percent);
49{ 28 } else
50 struct hists *hists = he->hists; 29 ret = print_fn(hpp->buf, hpp->size, fmt, get_field(he));
51 double percent = 100.0 * he->stat.period_sys / hists->stats.total_period;
52 30
53 return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent); 31 if (symbol_conf.event_group) {
54} 32 int prev_idx, idx_delta;
33 struct perf_evsel *evsel = hists_to_evsel(hists);
34 struct hist_entry *pair;
35 int nr_members = evsel->nr_members;
55 36
56static int hpp__entry_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he) 37 if (nr_members <= 1)
57{ 38 return ret;
58 struct hists *hists = he->hists;
59 double percent = 100.0 * he->stat.period_sys / hists->stats.total_period;
60 const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%";
61 39
62 return scnprintf(hpp->buf, hpp->size, fmt, percent); 40 prev_idx = perf_evsel__group_idx(evsel);
63}
64 41
65static int hpp__header_overhead_us(struct perf_hpp *hpp) 42 list_for_each_entry(pair, &he->pairs.head, pairs.node) {
66{ 43 u64 period = get_field(pair);
67 const char *fmt = symbol_conf.field_sep ? "%s" : "%7s"; 44 u64 total = pair->hists->stats.total_period;
68 45
69 return scnprintf(hpp->buf, hpp->size, fmt, "user"); 46 if (!total)
70} 47 continue;
71 48
72static int hpp__width_overhead_us(struct perf_hpp *hpp __maybe_unused) 49 evsel = hists_to_evsel(pair->hists);
73{ 50 idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
74 return 7;
75}
76 51
77static int hpp__color_overhead_us(struct perf_hpp *hpp, struct hist_entry *he) 52 while (idx_delta--) {
78{ 53 /*
79 struct hists *hists = he->hists; 54 * zero-fill group members in the middle which
80 double percent = 100.0 * he->stat.period_us / hists->stats.total_period; 55 * have no sample
56 */
57 ret += print_fn(hpp->buf + ret, hpp->size - ret,
58 fmt, 0);
59 }
81 60
82 return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent); 61 if (fmt_percent)
83} 62 ret += print_fn(hpp->buf + ret, hpp->size - ret,
63 fmt, 100.0 * period / total);
64 else
65 ret += print_fn(hpp->buf + ret, hpp->size - ret,
66 fmt, period);
84 67
85static int hpp__entry_overhead_us(struct perf_hpp *hpp, struct hist_entry *he) 68 prev_idx = perf_evsel__group_idx(evsel);
86{ 69 }
87 struct hists *hists = he->hists;
88 double percent = 100.0 * he->stat.period_us / hists->stats.total_period;
89 const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%";
90
91 return scnprintf(hpp->buf, hpp->size, fmt, percent);
92}
93
94static int hpp__header_overhead_guest_sys(struct perf_hpp *hpp)
95{
96 return scnprintf(hpp->buf, hpp->size, "guest sys");
97}
98
99static int hpp__width_overhead_guest_sys(struct perf_hpp *hpp __maybe_unused)
100{
101 return 9;
102}
103
104static int hpp__color_overhead_guest_sys(struct perf_hpp *hpp,
105 struct hist_entry *he)
106{
107 struct hists *hists = he->hists;
108 double percent = 100.0 * he->stat.period_guest_sys / hists->stats.total_period;
109
110 return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent);
111}
112
113static int hpp__entry_overhead_guest_sys(struct perf_hpp *hpp,
114 struct hist_entry *he)
115{
116 struct hists *hists = he->hists;
117 double percent = 100.0 * he->stat.period_guest_sys / hists->stats.total_period;
118 const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% ";
119
120 return scnprintf(hpp->buf, hpp->size, fmt, percent);
121}
122
123static int hpp__header_overhead_guest_us(struct perf_hpp *hpp)
124{
125 return scnprintf(hpp->buf, hpp->size, "guest usr");
126}
127 70
128static int hpp__width_overhead_guest_us(struct perf_hpp *hpp __maybe_unused) 71 idx_delta = nr_members - prev_idx - 1;
129{
130 return 9;
131}
132 72
133static int hpp__color_overhead_guest_us(struct perf_hpp *hpp, 73 while (idx_delta--) {
134 struct hist_entry *he) 74 /*
135{ 75 * zero-fill group members at last which have no sample
136 struct hists *hists = he->hists; 76 */
137 double percent = 100.0 * he->stat.period_guest_us / hists->stats.total_period; 77 ret += print_fn(hpp->buf + ret, hpp->size - ret,
138 78 fmt, 0);
139 return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent); 79 }
80 }
81 return ret;
140} 82}
141 83
142static int hpp__entry_overhead_guest_us(struct perf_hpp *hpp, 84#define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
143 struct hist_entry *he) 85static int hpp__header_##_type(struct perf_hpp *hpp) \
144{ 86{ \
145 struct hists *hists = he->hists; 87 int len = _min_width; \
146 double percent = 100.0 * he->stat.period_guest_us / hists->stats.total_period; 88 \
147 const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% "; 89 if (symbol_conf.event_group) { \
90 struct perf_evsel *evsel = hpp->ptr; \
91 \
92 len = max(len, evsel->nr_members * _unit_width); \
93 } \
94 return scnprintf(hpp->buf, hpp->size, "%*s", len, _str); \
95}
96
97#define __HPP_WIDTH_FN(_type, _min_width, _unit_width) \
98static int hpp__width_##_type(struct perf_hpp *hpp __maybe_unused) \
99{ \
100 int len = _min_width; \
101 \
102 if (symbol_conf.event_group) { \
103 struct perf_evsel *evsel = hpp->ptr; \
104 \
105 len = max(len, evsel->nr_members * _unit_width); \
106 } \
107 return len; \
108}
109
110#define __HPP_COLOR_PERCENT_FN(_type, _field) \
111static u64 he_get_##_field(struct hist_entry *he) \
112{ \
113 return he->stat._field; \
114} \
115 \
116static int hpp__color_##_type(struct perf_hpp *hpp, struct hist_entry *he) \
117{ \
118 return __hpp__fmt(hpp, he, he_get_##_field, " %6.2f%%", \
119 (hpp_snprint_fn)percent_color_snprintf, true); \
120}
121
122#define __HPP_ENTRY_PERCENT_FN(_type, _field) \
123static int hpp__entry_##_type(struct perf_hpp *hpp, struct hist_entry *he) \
124{ \
125 const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \
126 return __hpp__fmt(hpp, he, he_get_##_field, fmt, \
127 scnprintf, true); \
128}
129
130#define __HPP_ENTRY_RAW_FN(_type, _field) \
131static u64 he_get_raw_##_field(struct hist_entry *he) \
132{ \
133 return he->stat._field; \
134} \
135 \
136static int hpp__entry_##_type(struct perf_hpp *hpp, struct hist_entry *he) \
137{ \
138 const char *fmt = symbol_conf.field_sep ? " %"PRIu64 : " %11"PRIu64; \
139 return __hpp__fmt(hpp, he, he_get_raw_##_field, fmt, scnprintf, false); \
140}
141
142#define HPP_PERCENT_FNS(_type, _str, _field, _min_width, _unit_width) \
143__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
144__HPP_WIDTH_FN(_type, _min_width, _unit_width) \
145__HPP_COLOR_PERCENT_FN(_type, _field) \
146__HPP_ENTRY_PERCENT_FN(_type, _field)
147
148#define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \
149__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \
150__HPP_WIDTH_FN(_type, _min_width, _unit_width) \
151__HPP_ENTRY_RAW_FN(_type, _field)
152
153
154HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8)
155HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8)
156HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8)
157HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8)
158HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8)
159
160HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12)
161HPP_RAW_FNS(period, "Period", period, 12, 12)
148 162
149 return scnprintf(hpp->buf, hpp->size, fmt, percent);
150}
151 163
152static int hpp__header_baseline(struct perf_hpp *hpp) 164static int hpp__header_baseline(struct perf_hpp *hpp)
153{ 165{
@@ -179,7 +191,7 @@ static int hpp__color_baseline(struct perf_hpp *hpp, struct hist_entry *he)
179{ 191{
180 double percent = baseline_percent(he); 192 double percent = baseline_percent(he);
181 193
182 if (hist_entry__has_pairs(he)) 194 if (hist_entry__has_pairs(he) || symbol_conf.field_sep)
183 return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent); 195 return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent);
184 else 196 else
185 return scnprintf(hpp->buf, hpp->size, " "); 197 return scnprintf(hpp->buf, hpp->size, " ");
@@ -196,44 +208,6 @@ static int hpp__entry_baseline(struct perf_hpp *hpp, struct hist_entry *he)
196 return scnprintf(hpp->buf, hpp->size, " "); 208 return scnprintf(hpp->buf, hpp->size, " ");
197} 209}
198 210
199static int hpp__header_samples(struct perf_hpp *hpp)
200{
201 const char *fmt = symbol_conf.field_sep ? "%s" : "%11s";
202
203 return scnprintf(hpp->buf, hpp->size, fmt, "Samples");
204}
205
206static int hpp__width_samples(struct perf_hpp *hpp __maybe_unused)
207{
208 return 11;
209}
210
211static int hpp__entry_samples(struct perf_hpp *hpp, struct hist_entry *he)
212{
213 const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%11" PRIu64;
214
215 return scnprintf(hpp->buf, hpp->size, fmt, he->stat.nr_events);
216}
217
218static int hpp__header_period(struct perf_hpp *hpp)
219{
220 const char *fmt = symbol_conf.field_sep ? "%s" : "%12s";
221
222 return scnprintf(hpp->buf, hpp->size, fmt, "Period");
223}
224
225static int hpp__width_period(struct perf_hpp *hpp __maybe_unused)
226{
227 return 12;
228}
229
230static int hpp__entry_period(struct perf_hpp *hpp, struct hist_entry *he)
231{
232 const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%12" PRIu64;
233
234 return scnprintf(hpp->buf, hpp->size, fmt, he->stat.period);
235}
236
237static int hpp__header_period_baseline(struct perf_hpp *hpp) 211static int hpp__header_period_baseline(struct perf_hpp *hpp)
238{ 212{
239 const char *fmt = symbol_conf.field_sep ? "%s" : "%12s"; 213 const char *fmt = symbol_conf.field_sep ? "%s" : "%12s";
@@ -254,6 +228,7 @@ static int hpp__entry_period_baseline(struct perf_hpp *hpp, struct hist_entry *h
254 228
255 return scnprintf(hpp->buf, hpp->size, fmt, period); 229 return scnprintf(hpp->buf, hpp->size, fmt, period);
256} 230}
231
257static int hpp__header_delta(struct perf_hpp *hpp) 232static int hpp__header_delta(struct perf_hpp *hpp)
258{ 233{
259 const char *fmt = symbol_conf.field_sep ? "%s" : "%7s"; 234 const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
@@ -268,14 +243,18 @@ static int hpp__width_delta(struct perf_hpp *hpp __maybe_unused)
268 243
269static int hpp__entry_delta(struct perf_hpp *hpp, struct hist_entry *he) 244static int hpp__entry_delta(struct perf_hpp *hpp, struct hist_entry *he)
270{ 245{
246 struct hist_entry *pair = hist_entry__next_pair(he);
271 const char *fmt = symbol_conf.field_sep ? "%s" : "%7.7s"; 247 const char *fmt = symbol_conf.field_sep ? "%s" : "%7.7s";
272 char buf[32] = " "; 248 char buf[32] = " ";
273 double diff; 249 double diff = 0.0;
274 250
275 if (he->diff.computed) 251 if (pair) {
276 diff = he->diff.period_ratio_delta; 252 if (he->diff.computed)
277 else 253 diff = he->diff.period_ratio_delta;
278 diff = perf_diff__compute_delta(he); 254 else
255 diff = perf_diff__compute_delta(he, pair);
256 } else
257 diff = perf_diff__period_percent(he, he->stat.period);
279 258
280 if (fabs(diff) >= 0.01) 259 if (fabs(diff) >= 0.01)
281 scnprintf(buf, sizeof(buf), "%+4.2F%%", diff); 260 scnprintf(buf, sizeof(buf), "%+4.2F%%", diff);
@@ -297,14 +276,17 @@ static int hpp__width_ratio(struct perf_hpp *hpp __maybe_unused)
297 276
298static int hpp__entry_ratio(struct perf_hpp *hpp, struct hist_entry *he) 277static int hpp__entry_ratio(struct perf_hpp *hpp, struct hist_entry *he)
299{ 278{
279 struct hist_entry *pair = hist_entry__next_pair(he);
300 const char *fmt = symbol_conf.field_sep ? "%s" : "%14s"; 280 const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
301 char buf[32] = " "; 281 char buf[32] = " ";
302 double ratio; 282 double ratio = 0.0;
303 283
304 if (he->diff.computed) 284 if (pair) {
305 ratio = he->diff.period_ratio; 285 if (he->diff.computed)
306 else 286 ratio = he->diff.period_ratio;
307 ratio = perf_diff__compute_ratio(he); 287 else
288 ratio = perf_diff__compute_ratio(he, pair);
289 }
308 290
309 if (ratio > 0.0) 291 if (ratio > 0.0)
310 scnprintf(buf, sizeof(buf), "%+14.6F", ratio); 292 scnprintf(buf, sizeof(buf), "%+14.6F", ratio);
@@ -326,14 +308,17 @@ static int hpp__width_wdiff(struct perf_hpp *hpp __maybe_unused)
326 308
327static int hpp__entry_wdiff(struct perf_hpp *hpp, struct hist_entry *he) 309static int hpp__entry_wdiff(struct perf_hpp *hpp, struct hist_entry *he)
328{ 310{
311 struct hist_entry *pair = hist_entry__next_pair(he);
329 const char *fmt = symbol_conf.field_sep ? "%s" : "%14s"; 312 const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
330 char buf[32] = " "; 313 char buf[32] = " ";
331 s64 wdiff; 314 s64 wdiff = 0;
332 315
333 if (he->diff.computed) 316 if (pair) {
334 wdiff = he->diff.wdiff; 317 if (he->diff.computed)
335 else 318 wdiff = he->diff.wdiff;
336 wdiff = perf_diff__compute_wdiff(he); 319 else
320 wdiff = perf_diff__compute_wdiff(he, pair);
321 }
337 322
338 if (wdiff != 0) 323 if (wdiff != 0)
339 scnprintf(buf, sizeof(buf), "%14ld", wdiff); 324 scnprintf(buf, sizeof(buf), "%14ld", wdiff);
@@ -341,30 +326,6 @@ static int hpp__entry_wdiff(struct perf_hpp *hpp, struct hist_entry *he)
341 return scnprintf(hpp->buf, hpp->size, fmt, buf); 326 return scnprintf(hpp->buf, hpp->size, fmt, buf);
342} 327}
343 328
344static int hpp__header_displ(struct perf_hpp *hpp)
345{
346 return scnprintf(hpp->buf, hpp->size, "Displ.");
347}
348
349static int hpp__width_displ(struct perf_hpp *hpp __maybe_unused)
350{
351 return 6;
352}
353
354static int hpp__entry_displ(struct perf_hpp *hpp,
355 struct hist_entry *he)
356{
357 struct hist_entry *pair = hist_entry__next_pair(he);
358 long displacement = pair ? pair->position - he->position : 0;
359 const char *fmt = symbol_conf.field_sep ? "%s" : "%6.6s";
360 char buf[32] = " ";
361
362 if (displacement)
363 scnprintf(buf, sizeof(buf), "%+4ld", displacement);
364
365 return scnprintf(hpp->buf, hpp->size, fmt, buf);
366}
367
368static int hpp__header_formula(struct perf_hpp *hpp) 329static int hpp__header_formula(struct perf_hpp *hpp)
369{ 330{
370 const char *fmt = symbol_conf.field_sep ? "%s" : "%70s"; 331 const char *fmt = symbol_conf.field_sep ? "%s" : "%70s";
@@ -379,67 +340,91 @@ static int hpp__width_formula(struct perf_hpp *hpp __maybe_unused)
379 340
380static int hpp__entry_formula(struct perf_hpp *hpp, struct hist_entry *he) 341static int hpp__entry_formula(struct perf_hpp *hpp, struct hist_entry *he)
381{ 342{
343 struct hist_entry *pair = hist_entry__next_pair(he);
382 const char *fmt = symbol_conf.field_sep ? "%s" : "%-70s"; 344 const char *fmt = symbol_conf.field_sep ? "%s" : "%-70s";
383 char buf[96] = " "; 345 char buf[96] = " ";
384 346
385 perf_diff__formula(buf, sizeof(buf), he); 347 if (pair)
348 perf_diff__formula(he, pair, buf, sizeof(buf));
349
386 return scnprintf(hpp->buf, hpp->size, fmt, buf); 350 return scnprintf(hpp->buf, hpp->size, fmt, buf);
387} 351}
388 352
389#define HPP__COLOR_PRINT_FNS(_name) \ 353#define HPP__COLOR_PRINT_FNS(_name) \
390 .header = hpp__header_ ## _name, \ 354 { \
391 .width = hpp__width_ ## _name, \ 355 .header = hpp__header_ ## _name, \
392 .color = hpp__color_ ## _name, \ 356 .width = hpp__width_ ## _name, \
393 .entry = hpp__entry_ ## _name 357 .color = hpp__color_ ## _name, \
358 .entry = hpp__entry_ ## _name \
359 }
394 360
395#define HPP__PRINT_FNS(_name) \ 361#define HPP__PRINT_FNS(_name) \
396 .header = hpp__header_ ## _name, \ 362 { \
397 .width = hpp__width_ ## _name, \ 363 .header = hpp__header_ ## _name, \
398 .entry = hpp__entry_ ## _name 364 .width = hpp__width_ ## _name, \
365 .entry = hpp__entry_ ## _name \
366 }
399 367
400struct perf_hpp_fmt perf_hpp__format[] = { 368struct perf_hpp_fmt perf_hpp__format[] = {
401 { .cond = false, HPP__COLOR_PRINT_FNS(baseline) }, 369 HPP__COLOR_PRINT_FNS(baseline),
402 { .cond = true, HPP__COLOR_PRINT_FNS(overhead) }, 370 HPP__COLOR_PRINT_FNS(overhead),
403 { .cond = false, HPP__COLOR_PRINT_FNS(overhead_sys) }, 371 HPP__COLOR_PRINT_FNS(overhead_sys),
404 { .cond = false, HPP__COLOR_PRINT_FNS(overhead_us) }, 372 HPP__COLOR_PRINT_FNS(overhead_us),
405 { .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_sys) }, 373 HPP__COLOR_PRINT_FNS(overhead_guest_sys),
406 { .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_us) }, 374 HPP__COLOR_PRINT_FNS(overhead_guest_us),
407 { .cond = false, HPP__PRINT_FNS(samples) }, 375 HPP__PRINT_FNS(samples),
408 { .cond = false, HPP__PRINT_FNS(period) }, 376 HPP__PRINT_FNS(period),
409 { .cond = false, HPP__PRINT_FNS(period_baseline) }, 377 HPP__PRINT_FNS(period_baseline),
410 { .cond = false, HPP__PRINT_FNS(delta) }, 378 HPP__PRINT_FNS(delta),
411 { .cond = false, HPP__PRINT_FNS(ratio) }, 379 HPP__PRINT_FNS(ratio),
412 { .cond = false, HPP__PRINT_FNS(wdiff) }, 380 HPP__PRINT_FNS(wdiff),
413 { .cond = false, HPP__PRINT_FNS(displ) }, 381 HPP__PRINT_FNS(formula)
414 { .cond = false, HPP__PRINT_FNS(formula) }
415}; 382};
416 383
384LIST_HEAD(perf_hpp__list);
385
386
417#undef HPP__COLOR_PRINT_FNS 387#undef HPP__COLOR_PRINT_FNS
418#undef HPP__PRINT_FNS 388#undef HPP__PRINT_FNS
419 389
390#undef HPP_PERCENT_FNS
391#undef HPP_RAW_FNS
392
393#undef __HPP_HEADER_FN
394#undef __HPP_WIDTH_FN
395#undef __HPP_COLOR_PERCENT_FN
396#undef __HPP_ENTRY_PERCENT_FN
397#undef __HPP_ENTRY_RAW_FN
398
399
420void perf_hpp__init(void) 400void perf_hpp__init(void)
421{ 401{
422 if (symbol_conf.show_cpu_utilization) { 402 if (symbol_conf.show_cpu_utilization) {
423 perf_hpp__format[PERF_HPP__OVERHEAD_SYS].cond = true; 403 perf_hpp__column_enable(PERF_HPP__OVERHEAD_SYS);
424 perf_hpp__format[PERF_HPP__OVERHEAD_US].cond = true; 404 perf_hpp__column_enable(PERF_HPP__OVERHEAD_US);
425 405
426 if (perf_guest) { 406 if (perf_guest) {
427 perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].cond = true; 407 perf_hpp__column_enable(PERF_HPP__OVERHEAD_GUEST_SYS);
428 perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].cond = true; 408 perf_hpp__column_enable(PERF_HPP__OVERHEAD_GUEST_US);
429 } 409 }
430 } 410 }
431 411
432 if (symbol_conf.show_nr_samples) 412 if (symbol_conf.show_nr_samples)
433 perf_hpp__format[PERF_HPP__SAMPLES].cond = true; 413 perf_hpp__column_enable(PERF_HPP__SAMPLES);
434 414
435 if (symbol_conf.show_total_period) 415 if (symbol_conf.show_total_period)
436 perf_hpp__format[PERF_HPP__PERIOD].cond = true; 416 perf_hpp__column_enable(PERF_HPP__PERIOD);
417}
418
419void perf_hpp__column_register(struct perf_hpp_fmt *format)
420{
421 list_add_tail(&format->list, &perf_hpp__list);
437} 422}
438 423
439void perf_hpp__column_enable(unsigned col, bool enable) 424void perf_hpp__column_enable(unsigned col)
440{ 425{
441 BUG_ON(col >= PERF_HPP__MAX_INDEX); 426 BUG_ON(col >= PERF_HPP__MAX_INDEX);
442 perf_hpp__format[col].cond = enable; 427 perf_hpp__column_register(&perf_hpp__format[col]);
443} 428}
444 429
445static inline void advance_hpp(struct perf_hpp *hpp, int inc) 430static inline void advance_hpp(struct perf_hpp *hpp, int inc)
@@ -452,27 +437,29 @@ int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he,
452 bool color) 437 bool color)
453{ 438{
454 const char *sep = symbol_conf.field_sep; 439 const char *sep = symbol_conf.field_sep;
440 struct perf_hpp_fmt *fmt;
455 char *start = hpp->buf; 441 char *start = hpp->buf;
456 int i, ret; 442 int ret;
457 bool first = true; 443 bool first = true;
458 444
459 if (symbol_conf.exclude_other && !he->parent) 445 if (symbol_conf.exclude_other && !he->parent)
460 return 0; 446 return 0;
461 447
462 for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { 448 perf_hpp__for_each_format(fmt) {
463 if (!perf_hpp__format[i].cond) 449 /*
464 continue; 450 * If there's no field_sep, we still need
465 451 * to display initial ' '.
452 */
466 if (!sep || !first) { 453 if (!sep || !first) {
467 ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: " "); 454 ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: " ");
468 advance_hpp(hpp, ret); 455 advance_hpp(hpp, ret);
456 } else
469 first = false; 457 first = false;
470 }
471 458
472 if (color && perf_hpp__format[i].color) 459 if (color && fmt->color)
473 ret = perf_hpp__format[i].color(hpp, he); 460 ret = fmt->color(hpp, he);
474 else 461 else
475 ret = perf_hpp__format[i].entry(hpp, he); 462 ret = fmt->entry(hpp, he);
476 463
477 advance_hpp(hpp, ret); 464 advance_hpp(hpp, ret);
478 } 465 }
@@ -504,16 +491,18 @@ int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size,
504 */ 491 */
505unsigned int hists__sort_list_width(struct hists *hists) 492unsigned int hists__sort_list_width(struct hists *hists)
506{ 493{
494 struct perf_hpp_fmt *fmt;
507 struct sort_entry *se; 495 struct sort_entry *se;
508 int i, ret = 0; 496 int i = 0, ret = 0;
497 struct perf_hpp dummy_hpp = {
498 .ptr = hists_to_evsel(hists),
499 };
509 500
510 for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { 501 perf_hpp__for_each_format(fmt) {
511 if (!perf_hpp__format[i].cond)
512 continue;
513 if (i) 502 if (i)
514 ret += 2; 503 ret += 2;
515 504
516 ret += perf_hpp__format[i].width(NULL); 505 ret += fmt->width(&dummy_hpp);
517 } 506 }
518 507
519 list_for_each_entry(se, &hist_entry__sort_list, list) 508 list_for_each_entry(se, &hist_entry__sort_list, list)
diff --git a/tools/perf/ui/keysyms.h b/tools/perf/ui/keysyms.h
index 809eca5707fa..65092d576b4e 100644
--- a/tools/perf/ui/keysyms.h
+++ b/tools/perf/ui/keysyms.h
@@ -23,5 +23,6 @@
23#define K_TIMER -1 23#define K_TIMER -1
24#define K_ERROR -2 24#define K_ERROR -2
25#define K_RESIZE -3 25#define K_RESIZE -3
26#define K_SWITCH_INPUT_DATA -4
26 27
27#endif /* _PERF_KEYSYMS_H_ */ 28#endif /* _PERF_KEYSYMS_H_ */
diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c
index ebb4cc107876..ae6a789cb0f6 100644
--- a/tools/perf/ui/setup.c
+++ b/tools/perf/ui/setup.c
@@ -8,7 +8,7 @@ pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
8 8
9void setup_browser(bool fallback_to_pager) 9void setup_browser(bool fallback_to_pager)
10{ 10{
11 if (!isatty(1) || dump_trace) 11 if (use_browser < 2 && (!isatty(1) || dump_trace))
12 use_browser = 0; 12 use_browser = 0;
13 13
14 /* default to TUI */ 14 /* default to TUI */
@@ -30,6 +30,7 @@ void setup_browser(bool fallback_to_pager)
30 if (fallback_to_pager) 30 if (fallback_to_pager)
31 setup_pager(); 31 setup_pager();
32 32
33 perf_hpp__column_enable(PERF_HPP__OVERHEAD);
33 perf_hpp__init(); 34 perf_hpp__init();
34 break; 35 break;
35 } 36 }
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index f0ee204f99bb..ff1f60cf442e 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -3,6 +3,7 @@
3#include "../../util/util.h" 3#include "../../util/util.h"
4#include "../../util/hist.h" 4#include "../../util/hist.h"
5#include "../../util/sort.h" 5#include "../../util/sort.h"
6#include "../../util/evsel.h"
6 7
7 8
8static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) 9static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin)
@@ -335,17 +336,19 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
335size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, 336size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
336 int max_cols, FILE *fp) 337 int max_cols, FILE *fp)
337{ 338{
339 struct perf_hpp_fmt *fmt;
338 struct sort_entry *se; 340 struct sort_entry *se;
339 struct rb_node *nd; 341 struct rb_node *nd;
340 size_t ret = 0; 342 size_t ret = 0;
341 unsigned int width; 343 unsigned int width;
342 const char *sep = symbol_conf.field_sep; 344 const char *sep = symbol_conf.field_sep;
343 const char *col_width = symbol_conf.col_width_list_str; 345 const char *col_width = symbol_conf.col_width_list_str;
344 int idx, nr_rows = 0; 346 int nr_rows = 0;
345 char bf[96]; 347 char bf[96];
346 struct perf_hpp dummy_hpp = { 348 struct perf_hpp dummy_hpp = {
347 .buf = bf, 349 .buf = bf,
348 .size = sizeof(bf), 350 .size = sizeof(bf),
351 .ptr = hists_to_evsel(hists),
349 }; 352 };
350 bool first = true; 353 bool first = true;
351 354
@@ -355,16 +358,14 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
355 goto print_entries; 358 goto print_entries;
356 359
357 fprintf(fp, "# "); 360 fprintf(fp, "# ");
358 for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) {
359 if (!perf_hpp__format[idx].cond)
360 continue;
361 361
362 perf_hpp__for_each_format(fmt) {
362 if (!first) 363 if (!first)
363 fprintf(fp, "%s", sep ?: " "); 364 fprintf(fp, "%s", sep ?: " ");
364 else 365 else
365 first = false; 366 first = false;
366 367
367 perf_hpp__format[idx].header(&dummy_hpp); 368 fmt->header(&dummy_hpp);
368 fprintf(fp, "%s", bf); 369 fprintf(fp, "%s", bf);
369 } 370 }
370 371
@@ -400,18 +401,16 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
400 first = true; 401 first = true;
401 402
402 fprintf(fp, "# "); 403 fprintf(fp, "# ");
403 for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) {
404 unsigned int i;
405 404
406 if (!perf_hpp__format[idx].cond) 405 perf_hpp__for_each_format(fmt) {
407 continue; 406 unsigned int i;
408 407
409 if (!first) 408 if (!first)
410 fprintf(fp, "%s", sep ?: " "); 409 fprintf(fp, "%s", sep ?: " ");
411 else 410 else
412 first = false; 411 first = false;
413 412
414 width = perf_hpp__format[idx].width(&dummy_hpp); 413 width = fmt->width(&dummy_hpp);
415 for (i = 0; i < width; i++) 414 for (i = 0; i < width; i++)
416 fprintf(fp, "."); 415 fprintf(fp, ".");
417 } 416 }
@@ -462,7 +461,7 @@ out:
462 return ret; 461 return ret;
463} 462}
464 463
465size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) 464size_t events_stats__fprintf(struct events_stats *stats, FILE *fp)
466{ 465{
467 int i; 466 int i;
468 size_t ret = 0; 467 size_t ret = 0;
@@ -470,7 +469,7 @@ size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp)
470 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { 469 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
471 const char *name; 470 const char *name;
472 471
473 if (hists->stats.nr_events[i] == 0) 472 if (stats->nr_events[i] == 0)
474 continue; 473 continue;
475 474
476 name = perf_event__name(i); 475 name = perf_event__name(i);
@@ -478,7 +477,7 @@ size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp)
478 continue; 477 continue;
479 478
480 ret += fprintf(fp, "%16s events: %10d\n", name, 479 ret += fprintf(fp, "%16s events: %10d\n", name,
481 hists->stats.nr_events[i]); 480 stats->nr_events[i]);
482 } 481 }
483 482
484 return ret; 483 return ret;
diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c
index 2884d2f41e33..1c8b9afd5d6e 100644
--- a/tools/perf/ui/tui/helpline.c
+++ b/tools/perf/ui/tui/helpline.c
@@ -8,6 +8,8 @@
8#include "../ui.h" 8#include "../ui.h"
9#include "../libslang.h" 9#include "../libslang.h"
10 10
11char ui_helpline__last_msg[1024];
12
11static void tui_helpline__pop(void) 13static void tui_helpline__pop(void)
12{ 14{
13} 15}
@@ -23,20 +25,7 @@ static void tui_helpline__push(const char *msg)
23 strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0'; 25 strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0';
24} 26}
25 27
26struct ui_helpline tui_helpline_fns = { 28static int tui_helpline__show(const char *format, va_list ap)
27 .pop = tui_helpline__pop,
28 .push = tui_helpline__push,
29};
30
31void ui_helpline__init(void)
32{
33 helpline_fns = &tui_helpline_fns;
34 ui_helpline__puts(" ");
35}
36
37char ui_helpline__last_msg[1024];
38
39int ui_helpline__show_help(const char *format, va_list ap)
40{ 29{
41 int ret; 30 int ret;
42 static int backlog; 31 static int backlog;
@@ -55,3 +44,15 @@ int ui_helpline__show_help(const char *format, va_list ap)
55 44
56 return ret; 45 return ret;
57} 46}
47
48struct ui_helpline tui_helpline_fns = {
49 .pop = tui_helpline__pop,
50 .push = tui_helpline__push,
51 .show = tui_helpline__show,
52};
53
54void ui_helpline__init(void)
55{
56 helpline_fns = &tui_helpline_fns;
57 ui_helpline__puts(" ");
58}
diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c
index 4f989774c8c6..e3e0a963d03a 100644
--- a/tools/perf/ui/util.c
+++ b/tools/perf/ui/util.c
@@ -52,7 +52,6 @@ int ui__warning(const char *format, ...)
52 return ret; 52 return ret;
53} 53}
54 54
55
56/** 55/**
57 * perf_error__register - Register error logging functions 56 * perf_error__register - Register error logging functions
58 * @eops: The pointer to error logging function struct 57 * @eops: The pointer to error logging function struct
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index 6aa34e5afdcf..055fef34b6f6 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -26,13 +26,13 @@ VN=$(expr "$VN" : v*'\(.*\)')
26 26
27if test -r $GVF 27if test -r $GVF
28then 28then
29 VC=$(sed -e 's/^PERF_VERSION = //' <$GVF) 29 VC=$(sed -e 's/^#define PERF_VERSION "\(.*\)"/\1/' <$GVF)
30else 30else
31 VC=unset 31 VC=unset
32fi 32fi
33test "$VN" = "$VC" || { 33test "$VN" = "$VC" || {
34 echo >&2 "PERF_VERSION = $VN" 34 echo >&2 "PERF_VERSION = $VN"
35 echo "PERF_VERSION = $VN" >$GVF 35 echo "#define PERF_VERSION \"$VN\"" >$GVF
36} 36}
37 37
38 38
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 07aaeea60000..d33fe937e6f1 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -809,7 +809,7 @@ fallback:
809 pr_err("Can't annotate %s:\n\n" 809 pr_err("Can't annotate %s:\n\n"
810 "No vmlinux file%s\nwas found in the path.\n\n" 810 "No vmlinux file%s\nwas found in the path.\n\n"
811 "Please use:\n\n" 811 "Please use:\n\n"
812 " perf buildid-cache -av vmlinux\n\n" 812 " perf buildid-cache -vu vmlinux\n\n"
813 "or:\n\n" 813 "or:\n\n"
814 " --vmlinux vmlinux\n", 814 " --vmlinux vmlinux\n",
815 sym->name, build_id_msg ?: ""); 815 sym->name, build_id_msg ?: "");
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 8eec94358a4a..c422440fe611 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -6,6 +6,7 @@
6#include "types.h" 6#include "types.h"
7#include "symbol.h" 7#include "symbol.h"
8#include "hist.h" 8#include "hist.h"
9#include "sort.h"
9#include <linux/list.h> 10#include <linux/list.h>
10#include <linux/rbtree.h> 11#include <linux/rbtree.h>
11#include <pthread.h> 12#include <pthread.h>
@@ -154,6 +155,29 @@ static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused,
154} 155}
155#endif 156#endif
156 157
158#ifdef GTK2_SUPPORT
159int symbol__gtk_annotate(struct symbol *sym, struct map *map, int evidx,
160 struct hist_browser_timer *hbt);
161
162static inline int hist_entry__gtk_annotate(struct hist_entry *he, int evidx,
163 struct hist_browser_timer *hbt)
164{
165 return symbol__gtk_annotate(he->ms.sym, he->ms.map, evidx, hbt);
166}
167
168void perf_gtk__show_annotations(void);
169#else
170static inline int hist_entry__gtk_annotate(struct hist_entry *he __maybe_unused,
171 int evidx __maybe_unused,
172 struct hist_browser_timer *hbt
173 __maybe_unused)
174{
175 return 0;
176}
177
178static inline void perf_gtk__show_annotations(void) {}
179#endif
180
157extern const char *disassembler_style; 181extern const char *disassembler_style;
158 182
159#endif /* __PERF_ANNOTATE_H */ 183#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index d3b3f5d82137..42b6a632fe7b 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -444,7 +444,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
444 struct callchain_cursor_node *node = *cursor->last; 444 struct callchain_cursor_node *node = *cursor->last;
445 445
446 if (!node) { 446 if (!node) {
447 node = calloc(sizeof(*node), 1); 447 node = calloc(1, sizeof(*node));
448 if (!node) 448 if (!node)
449 return -ENOMEM; 449 return -ENOMEM;
450 450
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index eb340571e7d6..3ee9f67d5af0 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -143,4 +143,9 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
143 cursor->curr = cursor->curr->next; 143 cursor->curr = cursor->curr->next;
144 cursor->pos++; 144 cursor->pos++;
145} 145}
146
147struct option;
148
149int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
150extern const char record_callchain_help[];
146#endif /* __PERF_CALLCHAIN_H */ 151#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 2b32ffa9ebdb..f817046e22b1 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -1,4 +1,5 @@
1#include "util.h" 1#include "util.h"
2#include "sysfs.h"
2#include "../perf.h" 3#include "../perf.h"
3#include "cpumap.h" 4#include "cpumap.h"
4#include <assert.h> 5#include <assert.h>
@@ -201,3 +202,56 @@ void cpu_map__delete(struct cpu_map *map)
201{ 202{
202 free(map); 203 free(map);
203} 204}
205
206int cpu_map__get_socket(struct cpu_map *map, int idx)
207{
208 FILE *fp;
209 const char *mnt;
210 char path[PATH_MAX];
211 int cpu, ret;
212
213 if (idx > map->nr)
214 return -1;
215
216 cpu = map->map[idx];
217
218 mnt = sysfs_find_mountpoint();
219 if (!mnt)
220 return -1;
221
222 sprintf(path,
223 "%s/devices/system/cpu/cpu%d/topology/physical_package_id",
224 mnt, cpu);
225
226 fp = fopen(path, "r");
227 if (!fp)
228 return -1;
229 ret = fscanf(fp, "%d", &cpu);
230 fclose(fp);
231 return ret == 1 ? cpu : -1;
232}
233
234int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
235{
236 struct cpu_map *sock;
237 int nr = cpus->nr;
238 int cpu, s1, s2;
239
240 sock = calloc(1, sizeof(*sock) + nr * sizeof(int));
241 if (!sock)
242 return -1;
243
244 for (cpu = 0; cpu < nr; cpu++) {
245 s1 = cpu_map__get_socket(cpus, cpu);
246 for (s2 = 0; s2 < sock->nr; s2++) {
247 if (s1 == sock->map[s2])
248 break;
249 }
250 if (s2 == sock->nr) {
251 sock->map[sock->nr] = s1;
252 sock->nr++;
253 }
254 }
255 *sockp = sock;
256 return 0;
257}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 2f68a3b8c285..161b00756a12 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -14,6 +14,15 @@ struct cpu_map *cpu_map__dummy_new(void);
14void cpu_map__delete(struct cpu_map *map); 14void cpu_map__delete(struct cpu_map *map);
15struct cpu_map *cpu_map__read(FILE *file); 15struct cpu_map *cpu_map__read(FILE *file);
16size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); 16size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
17int cpu_map__get_socket(struct cpu_map *map, int idx);
18int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
19
20static inline int cpu_map__socket(struct cpu_map *sock, int s)
21{
22 if (!sock || s > sock->nr || s < 0)
23 return 0;
24 return sock->map[s];
25}
17 26
18static inline int cpu_map__nr(const struct cpu_map *map) 27static inline int cpu_map__nr(const struct cpu_map *map)
19{ 28{
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 03f830b48148..399e74c34c1a 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -23,10 +23,8 @@ int eprintf(int level, const char *fmt, ...)
23 23
24 if (verbose >= level) { 24 if (verbose >= level) {
25 va_start(args, fmt); 25 va_start(args, fmt);
26 if (use_browser == 1) 26 if (use_browser >= 1)
27 ret = ui_helpline__show_help(fmt, args); 27 ui_helpline__vshow(fmt, args);
28 else if (use_browser == 2)
29 ret = perf_gtk__show_helpline(fmt, args);
30 else 28 else
31 ret = vfprintf(stderr, fmt, args); 29 ret = vfprintf(stderr, fmt, args);
32 va_end(args); 30 va_end(args);
@@ -49,28 +47,6 @@ int dump_printf(const char *fmt, ...)
49 return ret; 47 return ret;
50} 48}
51 49
52#if !defined(NEWT_SUPPORT) && !defined(GTK2_SUPPORT)
53int ui__warning(const char *format, ...)
54{
55 va_list args;
56
57 va_start(args, format);
58 vfprintf(stderr, format, args);
59 va_end(args);
60 return 0;
61}
62#endif
63
64int ui__error_paranoid(void)
65{
66 return ui__error("Permission error - are you root?\n"
67 "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n"
68 " -1 - Not paranoid at all\n"
69 " 0 - Disallow raw tracepoint access for unpriv\n"
70 " 1 - Disallow cpu events for unpriv\n"
71 " 2 - Disallow kernel profiling for unpriv\n");
72}
73
74void trace_event(union perf_event *event) 50void trace_event(union perf_event *event)
75{ 51{
76 unsigned char *raw_event = (void *)event; 52 unsigned char *raw_event = (void *)event;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 83e8d234af6b..efbd98805ad0 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -5,6 +5,8 @@
5#include <stdbool.h> 5#include <stdbool.h>
6#include "event.h" 6#include "event.h"
7#include "../ui/helpline.h" 7#include "../ui/helpline.h"
8#include "../ui/progress.h"
9#include "../ui/util.h"
8 10
9extern int verbose; 11extern int verbose;
10extern bool quiet, dump_trace; 12extern bool quiet, dump_trace;
@@ -12,39 +14,7 @@ extern bool quiet, dump_trace;
12int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); 14int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
13void trace_event(union perf_event *event); 15void trace_event(union perf_event *event);
14 16
15struct ui_progress;
16struct perf_error_ops;
17
18#if defined(NEWT_SUPPORT) || defined(GTK2_SUPPORT)
19
20#include "../ui/progress.h"
21int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2))); 17int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2)));
22#include "../ui/util.h"
23
24#else
25
26static inline void ui_progress__update(u64 curr __maybe_unused,
27 u64 total __maybe_unused,
28 const char *title __maybe_unused) {}
29static inline void ui_progress__finish(void) {}
30
31#define ui__error(format, arg...) ui__warning(format, ##arg)
32
33static inline int
34perf_error__register(struct perf_error_ops *eops __maybe_unused)
35{
36 return 0;
37}
38
39static inline int
40perf_error__unregister(struct perf_error_ops *eops __maybe_unused)
41{
42 return 0;
43}
44
45#endif /* NEWT_SUPPORT || GTK2_SUPPORT */
46
47int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2))); 18int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
48int ui__error_paranoid(void);
49 19
50#endif /* __PERF_DEBUG_H */ 20#endif /* __PERF_DEBUG_H */
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index d6d9a465acdb..6f7d5a9d6b05 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -539,13 +539,13 @@ struct dso *__dsos__findnew(struct list_head *head, const char *name)
539} 539}
540 540
541size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, 541size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
542 bool with_hits) 542 bool (skip)(struct dso *dso, int parm), int parm)
543{ 543{
544 struct dso *pos; 544 struct dso *pos;
545 size_t ret = 0; 545 size_t ret = 0;
546 546
547 list_for_each_entry(pos, head, node) { 547 list_for_each_entry(pos, head, node) {
548 if (with_hits && !pos->hit) 548 if (skip && skip(pos, parm))
549 continue; 549 continue;
550 ret += dso__fprintf_buildid(pos, fp); 550 ret += dso__fprintf_buildid(pos, fp);
551 ret += fprintf(fp, " %s\n", pos->long_name); 551 ret += fprintf(fp, " %s\n", pos->long_name);
@@ -583,7 +583,7 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
583 if (dso->short_name != dso->long_name) 583 if (dso->short_name != dso->long_name)
584 ret += fprintf(fp, "%s, ", dso->long_name); 584 ret += fprintf(fp, "%s, ", dso->long_name);
585 ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type], 585 ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type],
586 dso->loaded ? "" : "NOT "); 586 dso__loaded(dso, type) ? "" : "NOT ");
587 ret += dso__fprintf_buildid(dso, fp); 587 ret += dso__fprintf_buildid(dso, fp);
588 ret += fprintf(fp, ")\n"); 588 ret += fprintf(fp, ")\n");
589 for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) { 589 for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) {
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index e03276940b99..450199ab51b5 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -138,7 +138,7 @@ struct dso *__dsos__findnew(struct list_head *head, const char *name);
138bool __dsos__read_build_ids(struct list_head *head, bool with_hits); 138bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
139 139
140size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, 140size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
141 bool with_hits); 141 bool (skip)(struct dso *dso, int parm), int parm);
142size_t __dsos__fprintf(struct list_head *head, FILE *fp); 142size_t __dsos__fprintf(struct list_head *head, FILE *fp);
143 143
144size_t dso__fprintf_buildid(struct dso *dso, FILE *fp); 144size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 3cf2c3e0605f..5cd13d768cec 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -476,8 +476,10 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
476 } 476 }
477 } 477 }
478 478
479 if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0) 479 if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0) {
480 free(event);
480 return -ENOENT; 481 return -ENOENT;
482 }
481 483
482 map = machine->vmlinux_maps[MAP__FUNCTION]; 484 map = machine->vmlinux_maps[MAP__FUNCTION];
483 size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), 485 size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 705293489e3c..bc4ad7977438 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -49,10 +49,16 @@ struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
49 return evlist; 49 return evlist;
50} 50}
51 51
52void perf_evlist__config_attrs(struct perf_evlist *evlist, 52void perf_evlist__config(struct perf_evlist *evlist,
53 struct perf_record_opts *opts) 53 struct perf_record_opts *opts)
54{ 54{
55 struct perf_evsel *evsel; 55 struct perf_evsel *evsel;
56 /*
57 * Set the evsel leader links before we configure attributes,
58 * since some might depend on this info.
59 */
60 if (opts->group)
61 perf_evlist__set_leader(evlist);
56 62
57 if (evlist->cpus->map[0] < 0) 63 if (evlist->cpus->map[0] < 0)
58 opts->no_inherit = true; 64 opts->no_inherit = true;
@@ -61,7 +67,7 @@ void perf_evlist__config_attrs(struct perf_evlist *evlist,
61 perf_evsel__config(evsel, opts); 67 perf_evsel__config(evsel, opts);
62 68
63 if (evlist->nr_entries > 1) 69 if (evlist->nr_entries > 1)
64 evsel->attr.sample_type |= PERF_SAMPLE_ID; 70 perf_evsel__set_sample_id(evsel);
65 } 71 }
66} 72}
67 73
@@ -111,18 +117,21 @@ void __perf_evlist__set_leader(struct list_head *list)
111 struct perf_evsel *evsel, *leader; 117 struct perf_evsel *evsel, *leader;
112 118
113 leader = list_entry(list->next, struct perf_evsel, node); 119 leader = list_entry(list->next, struct perf_evsel, node);
114 leader->leader = NULL; 120 evsel = list_entry(list->prev, struct perf_evsel, node);
121
122 leader->nr_members = evsel->idx - leader->idx + 1;
115 123
116 list_for_each_entry(evsel, list, node) { 124 list_for_each_entry(evsel, list, node) {
117 if (evsel != leader) 125 evsel->leader = leader;
118 evsel->leader = leader;
119 } 126 }
120} 127}
121 128
122void perf_evlist__set_leader(struct perf_evlist *evlist) 129void perf_evlist__set_leader(struct perf_evlist *evlist)
123{ 130{
124 if (evlist->nr_entries) 131 if (evlist->nr_entries) {
132 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
125 __perf_evlist__set_leader(&evlist->entries); 133 __perf_evlist__set_leader(&evlist->entries);
134 }
126} 135}
127 136
128int perf_evlist__add_default(struct perf_evlist *evlist) 137int perf_evlist__add_default(struct perf_evlist *evlist)
@@ -222,7 +231,7 @@ void perf_evlist__disable(struct perf_evlist *evlist)
222 231
223 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { 232 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
224 list_for_each_entry(pos, &evlist->entries, node) { 233 list_for_each_entry(pos, &evlist->entries, node) {
225 if (perf_evsel__is_group_member(pos)) 234 if (!perf_evsel__is_group_leader(pos))
226 continue; 235 continue;
227 for (thread = 0; thread < evlist->threads->nr; thread++) 236 for (thread = 0; thread < evlist->threads->nr; thread++)
228 ioctl(FD(pos, cpu, thread), 237 ioctl(FD(pos, cpu, thread),
@@ -238,7 +247,7 @@ void perf_evlist__enable(struct perf_evlist *evlist)
238 247
239 for (cpu = 0; cpu < cpu_map__nr(evlist->cpus); cpu++) { 248 for (cpu = 0; cpu < cpu_map__nr(evlist->cpus); cpu++) {
240 list_for_each_entry(pos, &evlist->entries, node) { 249 list_for_each_entry(pos, &evlist->entries, node) {
241 if (perf_evsel__is_group_member(pos)) 250 if (!perf_evsel__is_group_leader(pos))
242 continue; 251 continue;
243 for (thread = 0; thread < evlist->threads->nr; thread++) 252 for (thread = 0; thread < evlist->threads->nr; thread++)
244 ioctl(FD(pos, cpu, thread), 253 ioctl(FD(pos, cpu, thread),
@@ -366,7 +375,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
366 if ((old & md->mask) + size != ((old + size) & md->mask)) { 375 if ((old & md->mask) + size != ((old + size) & md->mask)) {
367 unsigned int offset = old; 376 unsigned int offset = old;
368 unsigned int len = min(sizeof(*event), size), cpy; 377 unsigned int len = min(sizeof(*event), size), cpy;
369 void *dst = &evlist->event_copy; 378 void *dst = &md->event_copy;
370 379
371 do { 380 do {
372 cpy = min(md->mask + 1 - (offset & md->mask), len); 381 cpy = min(md->mask + 1 - (offset & md->mask), len);
@@ -376,7 +385,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
376 len -= cpy; 385 len -= cpy;
377 } while (len); 386 } while (len);
378 387
379 event = &evlist->event_copy; 388 event = &md->event_copy;
380 } 389 }
381 390
382 old += size; 391 old += size;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 56003f779e60..2dd07bd60b4f 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -17,10 +17,18 @@ struct perf_record_opts;
17#define PERF_EVLIST__HLIST_BITS 8 17#define PERF_EVLIST__HLIST_BITS 8
18#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) 18#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
19 19
20struct perf_mmap {
21 void *base;
22 int mask;
23 unsigned int prev;
24 union perf_event event_copy;
25};
26
20struct perf_evlist { 27struct perf_evlist {
21 struct list_head entries; 28 struct list_head entries;
22 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; 29 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
23 int nr_entries; 30 int nr_entries;
31 int nr_groups;
24 int nr_fds; 32 int nr_fds;
25 int nr_mmaps; 33 int nr_mmaps;
26 int mmap_len; 34 int mmap_len;
@@ -29,7 +37,6 @@ struct perf_evlist {
29 pid_t pid; 37 pid_t pid;
30 } workload; 38 } workload;
31 bool overwrite; 39 bool overwrite;
32 union perf_event event_copy;
33 struct perf_mmap *mmap; 40 struct perf_mmap *mmap;
34 struct pollfd *pollfd; 41 struct pollfd *pollfd;
35 struct thread_map *threads; 42 struct thread_map *threads;
@@ -76,8 +83,8 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx);
76 83
77int perf_evlist__open(struct perf_evlist *evlist); 84int perf_evlist__open(struct perf_evlist *evlist);
78 85
79void perf_evlist__config_attrs(struct perf_evlist *evlist, 86void perf_evlist__config(struct perf_evlist *evlist,
80 struct perf_record_opts *opts); 87 struct perf_record_opts *opts);
81 88
82int perf_evlist__prepare_workload(struct perf_evlist *evlist, 89int perf_evlist__prepare_workload(struct perf_evlist *evlist,
83 struct perf_record_opts *opts, 90 struct perf_record_opts *opts,
@@ -135,4 +142,25 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist)
135} 142}
136 143
137size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); 144size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp);
145
146static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
147{
148 struct perf_event_mmap_page *pc = mm->base;
149 int head = pc->data_head;
150 rmb();
151 return head;
152}
153
154static inline void perf_mmap__write_tail(struct perf_mmap *md,
155 unsigned long tail)
156{
157 struct perf_event_mmap_page *pc = md->base;
158
159 /*
160 * ensure all reads are done before we write the tail out.
161 */
162 /* mb(); */
163 pc->data_tail = tail;
164}
165
138#endif /* __PERF_EVLIST_H */ 166#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1b16dd1edc8e..9c82f98f26de 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -22,6 +22,11 @@
22#include <linux/perf_event.h> 22#include <linux/perf_event.h>
23#include "perf_regs.h" 23#include "perf_regs.h"
24 24
25static struct {
26 bool sample_id_all;
27 bool exclude_guest;
28} perf_missing_features;
29
25#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 30#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
26 31
27static int __perf_evsel__sample_size(u64 sample_type) 32static int __perf_evsel__sample_size(u64 sample_type)
@@ -50,11 +55,36 @@ void hists__init(struct hists *hists)
50 pthread_mutex_init(&hists->lock, NULL); 55 pthread_mutex_init(&hists->lock, NULL);
51} 56}
52 57
58void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
59 enum perf_event_sample_format bit)
60{
61 if (!(evsel->attr.sample_type & bit)) {
62 evsel->attr.sample_type |= bit;
63 evsel->sample_size += sizeof(u64);
64 }
65}
66
67void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
68 enum perf_event_sample_format bit)
69{
70 if (evsel->attr.sample_type & bit) {
71 evsel->attr.sample_type &= ~bit;
72 evsel->sample_size -= sizeof(u64);
73 }
74}
75
76void perf_evsel__set_sample_id(struct perf_evsel *evsel)
77{
78 perf_evsel__set_sample_bit(evsel, ID);
79 evsel->attr.read_format |= PERF_FORMAT_ID;
80}
81
53void perf_evsel__init(struct perf_evsel *evsel, 82void perf_evsel__init(struct perf_evsel *evsel,
54 struct perf_event_attr *attr, int idx) 83 struct perf_event_attr *attr, int idx)
55{ 84{
56 evsel->idx = idx; 85 evsel->idx = idx;
57 evsel->attr = *attr; 86 evsel->attr = *attr;
87 evsel->leader = evsel;
58 INIT_LIST_HEAD(&evsel->node); 88 INIT_LIST_HEAD(&evsel->node);
59 hists__init(&evsel->hists); 89 hists__init(&evsel->hists);
60 evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); 90 evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
@@ -404,6 +434,31 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
404 return evsel->name ?: "unknown"; 434 return evsel->name ?: "unknown";
405} 435}
406 436
437const char *perf_evsel__group_name(struct perf_evsel *evsel)
438{
439 return evsel->group_name ?: "anon group";
440}
441
442int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
443{
444 int ret;
445 struct perf_evsel *pos;
446 const char *group_name = perf_evsel__group_name(evsel);
447
448 ret = scnprintf(buf, size, "%s", group_name);
449
450 ret += scnprintf(buf + ret, size - ret, " { %s",
451 perf_evsel__name(evsel));
452
453 for_each_group_member(pos, evsel)
454 ret += scnprintf(buf + ret, size - ret, ", %s",
455 perf_evsel__name(pos));
456
457 ret += scnprintf(buf + ret, size - ret, " }");
458
459 return ret;
460}
461
407/* 462/*
408 * The enable_on_exec/disabled value strategy: 463 * The enable_on_exec/disabled value strategy:
409 * 464 *
@@ -438,13 +493,11 @@ void perf_evsel__config(struct perf_evsel *evsel,
438 struct perf_event_attr *attr = &evsel->attr; 493 struct perf_event_attr *attr = &evsel->attr;
439 int track = !evsel->idx; /* only the first counter needs these */ 494 int track = !evsel->idx; /* only the first counter needs these */
440 495
441 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; 496 attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
442 attr->inherit = !opts->no_inherit; 497 attr->inherit = !opts->no_inherit;
443 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
444 PERF_FORMAT_TOTAL_TIME_RUNNING |
445 PERF_FORMAT_ID;
446 498
447 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; 499 perf_evsel__set_sample_bit(evsel, IP);
500 perf_evsel__set_sample_bit(evsel, TID);
448 501
449 /* 502 /*
450 * We default some events to a 1 default interval. But keep 503 * We default some events to a 1 default interval. But keep
@@ -453,7 +506,7 @@ void perf_evsel__config(struct perf_evsel *evsel,
453 if (!attr->sample_period || (opts->user_freq != UINT_MAX && 506 if (!attr->sample_period || (opts->user_freq != UINT_MAX &&
454 opts->user_interval != ULLONG_MAX)) { 507 opts->user_interval != ULLONG_MAX)) {
455 if (opts->freq) { 508 if (opts->freq) {
456 attr->sample_type |= PERF_SAMPLE_PERIOD; 509 perf_evsel__set_sample_bit(evsel, PERIOD);
457 attr->freq = 1; 510 attr->freq = 1;
458 attr->sample_freq = opts->freq; 511 attr->sample_freq = opts->freq;
459 } else { 512 } else {
@@ -468,16 +521,16 @@ void perf_evsel__config(struct perf_evsel *evsel,
468 attr->inherit_stat = 1; 521 attr->inherit_stat = 1;
469 522
470 if (opts->sample_address) { 523 if (opts->sample_address) {
471 attr->sample_type |= PERF_SAMPLE_ADDR; 524 perf_evsel__set_sample_bit(evsel, ADDR);
472 attr->mmap_data = track; 525 attr->mmap_data = track;
473 } 526 }
474 527
475 if (opts->call_graph) { 528 if (opts->call_graph) {
476 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 529 perf_evsel__set_sample_bit(evsel, CALLCHAIN);
477 530
478 if (opts->call_graph == CALLCHAIN_DWARF) { 531 if (opts->call_graph == CALLCHAIN_DWARF) {
479 attr->sample_type |= PERF_SAMPLE_REGS_USER | 532 perf_evsel__set_sample_bit(evsel, REGS_USER);
480 PERF_SAMPLE_STACK_USER; 533 perf_evsel__set_sample_bit(evsel, STACK_USER);
481 attr->sample_regs_user = PERF_REGS_MASK; 534 attr->sample_regs_user = PERF_REGS_MASK;
482 attr->sample_stack_user = opts->stack_dump_size; 535 attr->sample_stack_user = opts->stack_dump_size;
483 attr->exclude_callchain_user = 1; 536 attr->exclude_callchain_user = 1;
@@ -485,20 +538,20 @@ void perf_evsel__config(struct perf_evsel *evsel,
485 } 538 }
486 539
487 if (perf_target__has_cpu(&opts->target)) 540 if (perf_target__has_cpu(&opts->target))
488 attr->sample_type |= PERF_SAMPLE_CPU; 541 perf_evsel__set_sample_bit(evsel, CPU);
489 542
490 if (opts->period) 543 if (opts->period)
491 attr->sample_type |= PERF_SAMPLE_PERIOD; 544 perf_evsel__set_sample_bit(evsel, PERIOD);
492 545
493 if (!opts->sample_id_all_missing && 546 if (!perf_missing_features.sample_id_all &&
494 (opts->sample_time || !opts->no_inherit || 547 (opts->sample_time || !opts->no_inherit ||
495 perf_target__has_cpu(&opts->target))) 548 perf_target__has_cpu(&opts->target)))
496 attr->sample_type |= PERF_SAMPLE_TIME; 549 perf_evsel__set_sample_bit(evsel, TIME);
497 550
498 if (opts->raw_samples) { 551 if (opts->raw_samples) {
499 attr->sample_type |= PERF_SAMPLE_TIME; 552 perf_evsel__set_sample_bit(evsel, TIME);
500 attr->sample_type |= PERF_SAMPLE_RAW; 553 perf_evsel__set_sample_bit(evsel, RAW);
501 attr->sample_type |= PERF_SAMPLE_CPU; 554 perf_evsel__set_sample_bit(evsel, CPU);
502 } 555 }
503 556
504 if (opts->no_delay) { 557 if (opts->no_delay) {
@@ -506,7 +559,7 @@ void perf_evsel__config(struct perf_evsel *evsel,
506 attr->wakeup_events = 1; 559 attr->wakeup_events = 1;
507 } 560 }
508 if (opts->branch_stack) { 561 if (opts->branch_stack) {
509 attr->sample_type |= PERF_SAMPLE_BRANCH_STACK; 562 perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
510 attr->branch_sample_type = opts->branch_stack; 563 attr->branch_sample_type = opts->branch_stack;
511 } 564 }
512 565
@@ -519,14 +572,14 @@ void perf_evsel__config(struct perf_evsel *evsel,
519 * Disabling only independent events or group leaders, 572 * Disabling only independent events or group leaders,
520 * keeping group members enabled. 573 * keeping group members enabled.
521 */ 574 */
522 if (!perf_evsel__is_group_member(evsel)) 575 if (perf_evsel__is_group_leader(evsel))
523 attr->disabled = 1; 576 attr->disabled = 1;
524 577
525 /* 578 /*
526 * Setting enable_on_exec for independent events and 579 * Setting enable_on_exec for independent events and
527 * group leaders for traced executed by perf. 580 * group leaders for traced executed by perf.
528 */ 581 */
529 if (perf_target__none(&opts->target) && !perf_evsel__is_group_member(evsel)) 582 if (perf_target__none(&opts->target) && perf_evsel__is_group_leader(evsel))
530 attr->enable_on_exec = 1; 583 attr->enable_on_exec = 1;
531} 584}
532 585
@@ -612,6 +665,11 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
612 } 665 }
613} 666}
614 667
668void perf_evsel__free_counts(struct perf_evsel *evsel)
669{
670 free(evsel->counts);
671}
672
615void perf_evsel__exit(struct perf_evsel *evsel) 673void perf_evsel__exit(struct perf_evsel *evsel)
616{ 674{
617 assert(list_empty(&evsel->node)); 675 assert(list_empty(&evsel->node));
@@ -631,6 +689,28 @@ void perf_evsel__delete(struct perf_evsel *evsel)
631 free(evsel); 689 free(evsel);
632} 690}
633 691
692static inline void compute_deltas(struct perf_evsel *evsel,
693 int cpu,
694 struct perf_counts_values *count)
695{
696 struct perf_counts_values tmp;
697
698 if (!evsel->prev_raw_counts)
699 return;
700
701 if (cpu == -1) {
702 tmp = evsel->prev_raw_counts->aggr;
703 evsel->prev_raw_counts->aggr = *count;
704 } else {
705 tmp = evsel->prev_raw_counts->cpu[cpu];
706 evsel->prev_raw_counts->cpu[cpu] = *count;
707 }
708
709 count->val = count->val - tmp.val;
710 count->ena = count->ena - tmp.ena;
711 count->run = count->run - tmp.run;
712}
713
634int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, 714int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
635 int cpu, int thread, bool scale) 715 int cpu, int thread, bool scale)
636{ 716{
@@ -646,6 +726,8 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
646 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) 726 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
647 return -errno; 727 return -errno;
648 728
729 compute_deltas(evsel, cpu, &count);
730
649 if (scale) { 731 if (scale) {
650 if (count.run == 0) 732 if (count.run == 0)
651 count.val = 0; 733 count.val = 0;
@@ -684,6 +766,8 @@ int __perf_evsel__read(struct perf_evsel *evsel,
684 } 766 }
685 } 767 }
686 768
769 compute_deltas(evsel, -1, aggr);
770
687 evsel->counts->scaled = 0; 771 evsel->counts->scaled = 0;
688 if (scale) { 772 if (scale) {
689 if (aggr->run == 0) { 773 if (aggr->run == 0) {
@@ -707,7 +791,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
707 struct perf_evsel *leader = evsel->leader; 791 struct perf_evsel *leader = evsel->leader;
708 int fd; 792 int fd;
709 793
710 if (!perf_evsel__is_group_member(evsel)) 794 if (perf_evsel__is_group_leader(evsel))
711 return -1; 795 return -1;
712 796
713 /* 797 /*
@@ -738,6 +822,13 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
738 pid = evsel->cgrp->fd; 822 pid = evsel->cgrp->fd;
739 } 823 }
740 824
825fallback_missing_features:
826 if (perf_missing_features.exclude_guest)
827 evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
828retry_sample_id:
829 if (perf_missing_features.sample_id_all)
830 evsel->attr.sample_id_all = 0;
831
741 for (cpu = 0; cpu < cpus->nr; cpu++) { 832 for (cpu = 0; cpu < cpus->nr; cpu++) {
742 833
743 for (thread = 0; thread < threads->nr; thread++) { 834 for (thread = 0; thread < threads->nr; thread++) {
@@ -754,13 +845,26 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
754 group_fd, flags); 845 group_fd, flags);
755 if (FD(evsel, cpu, thread) < 0) { 846 if (FD(evsel, cpu, thread) < 0) {
756 err = -errno; 847 err = -errno;
757 goto out_close; 848 goto try_fallback;
758 } 849 }
759 } 850 }
760 } 851 }
761 852
762 return 0; 853 return 0;
763 854
855try_fallback:
856 if (err != -EINVAL || cpu > 0 || thread > 0)
857 goto out_close;
858
859 if (!perf_missing_features.exclude_guest &&
860 (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
861 perf_missing_features.exclude_guest = true;
862 goto fallback_missing_features;
863 } else if (!perf_missing_features.sample_id_all) {
864 perf_missing_features.sample_id_all = true;
865 goto retry_sample_id;
866 }
867
764out_close: 868out_close:
765 do { 869 do {
766 while (--thread >= 0) { 870 while (--thread >= 0) {
@@ -1205,3 +1309,225 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
1205 1309
1206 return 0; 1310 return 0;
1207} 1311}
1312
1313static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
1314{
1315 va_list args;
1316 int ret = 0;
1317
1318 if (!*first) {
1319 ret += fprintf(fp, ",");
1320 } else {
1321 ret += fprintf(fp, ":");
1322 *first = false;
1323 }
1324
1325 va_start(args, fmt);
1326 ret += vfprintf(fp, fmt, args);
1327 va_end(args);
1328 return ret;
1329}
1330
1331static int __if_fprintf(FILE *fp, bool *first, const char *field, u64 value)
1332{
1333 if (value == 0)
1334 return 0;
1335
1336 return comma_fprintf(fp, first, " %s: %" PRIu64, field, value);
1337}
1338
1339#define if_print(field) printed += __if_fprintf(fp, &first, #field, evsel->attr.field)
1340
1341struct bit_names {
1342 int bit;
1343 const char *name;
1344};
1345
1346static int bits__fprintf(FILE *fp, const char *field, u64 value,
1347 struct bit_names *bits, bool *first)
1348{
1349 int i = 0, printed = comma_fprintf(fp, first, " %s: ", field);
1350 bool first_bit = true;
1351
1352 do {
1353 if (value & bits[i].bit) {
1354 printed += fprintf(fp, "%s%s", first_bit ? "" : "|", bits[i].name);
1355 first_bit = false;
1356 }
1357 } while (bits[++i].name != NULL);
1358
1359 return printed;
1360}
1361
1362static int sample_type__fprintf(FILE *fp, bool *first, u64 value)
1363{
1364#define bit_name(n) { PERF_SAMPLE_##n, #n }
1365 struct bit_names bits[] = {
1366 bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
1367 bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
1368 bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
1369 bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
1370 { .name = NULL, }
1371 };
1372#undef bit_name
1373 return bits__fprintf(fp, "sample_type", value, bits, first);
1374}
1375
1376static int read_format__fprintf(FILE *fp, bool *first, u64 value)
1377{
1378#define bit_name(n) { PERF_FORMAT_##n, #n }
1379 struct bit_names bits[] = {
1380 bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
1381 bit_name(ID), bit_name(GROUP),
1382 { .name = NULL, }
1383 };
1384#undef bit_name
1385 return bits__fprintf(fp, "read_format", value, bits, first);
1386}
1387
1388int perf_evsel__fprintf(struct perf_evsel *evsel,
1389 struct perf_attr_details *details, FILE *fp)
1390{
1391 bool first = true;
1392 int printed = 0;
1393
1394 if (details->event_group) {
1395 struct perf_evsel *pos;
1396
1397 if (!perf_evsel__is_group_leader(evsel))
1398 return 0;
1399
1400 if (evsel->nr_members > 1)
1401 printed += fprintf(fp, "%s{", evsel->group_name ?: "");
1402
1403 printed += fprintf(fp, "%s", perf_evsel__name(evsel));
1404 for_each_group_member(pos, evsel)
1405 printed += fprintf(fp, ",%s", perf_evsel__name(pos));
1406
1407 if (evsel->nr_members > 1)
1408 printed += fprintf(fp, "}");
1409 goto out;
1410 }
1411
1412 printed += fprintf(fp, "%s", perf_evsel__name(evsel));
1413
1414 if (details->verbose || details->freq) {
1415 printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64,
1416 (u64)evsel->attr.sample_freq);
1417 }
1418
1419 if (details->verbose) {
1420 if_print(type);
1421 if_print(config);
1422 if_print(config1);
1423 if_print(config2);
1424 if_print(size);
1425 printed += sample_type__fprintf(fp, &first, evsel->attr.sample_type);
1426 if (evsel->attr.read_format)
1427 printed += read_format__fprintf(fp, &first, evsel->attr.read_format);
1428 if_print(disabled);
1429 if_print(inherit);
1430 if_print(pinned);
1431 if_print(exclusive);
1432 if_print(exclude_user);
1433 if_print(exclude_kernel);
1434 if_print(exclude_hv);
1435 if_print(exclude_idle);
1436 if_print(mmap);
1437 if_print(comm);
1438 if_print(freq);
1439 if_print(inherit_stat);
1440 if_print(enable_on_exec);
1441 if_print(task);
1442 if_print(watermark);
1443 if_print(precise_ip);
1444 if_print(mmap_data);
1445 if_print(sample_id_all);
1446 if_print(exclude_host);
1447 if_print(exclude_guest);
1448 if_print(__reserved_1);
1449 if_print(wakeup_events);
1450 if_print(bp_type);
1451 if_print(branch_sample_type);
1452 }
1453out:
1454 fputc('\n', fp);
1455 return ++printed;
1456}
1457
1458bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
1459 char *msg, size_t msgsize)
1460{
1461 if ((err == ENOENT || err == ENXIO) &&
1462 evsel->attr.type == PERF_TYPE_HARDWARE &&
1463 evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
1464 /*
1465 * If it's cycles then fall back to hrtimer based
1466 * cpu-clock-tick sw counter, which is always available even if
1467 * no PMU support.
1468 *
1469 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
1470 * b0a873e).
1471 */
1472 scnprintf(msg, msgsize, "%s",
1473"The cycles event is not supported, trying to fall back to cpu-clock-ticks");
1474
1475 evsel->attr.type = PERF_TYPE_SOFTWARE;
1476 evsel->attr.config = PERF_COUNT_SW_CPU_CLOCK;
1477
1478 free(evsel->name);
1479 evsel->name = NULL;
1480 return true;
1481 }
1482
1483 return false;
1484}
1485
1486int perf_evsel__open_strerror(struct perf_evsel *evsel,
1487 struct perf_target *target,
1488 int err, char *msg, size_t size)
1489{
1490 switch (err) {
1491 case EPERM:
1492 case EACCES:
1493 return scnprintf(msg, size, "%s",
1494 "You may not have permission to collect %sstats.\n"
1495 "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n"
1496 " -1 - Not paranoid at all\n"
1497 " 0 - Disallow raw tracepoint access for unpriv\n"
1498 " 1 - Disallow cpu events for unpriv\n"
1499 " 2 - Disallow kernel profiling for unpriv",
1500 target->system_wide ? "system-wide " : "");
1501 case ENOENT:
1502 return scnprintf(msg, size, "The %s event is not supported.",
1503 perf_evsel__name(evsel));
1504 case EMFILE:
1505 return scnprintf(msg, size, "%s",
1506 "Too many events are opened.\n"
1507 "Try again after reducing the number of events.");
1508 case ENODEV:
1509 if (target->cpu_list)
1510 return scnprintf(msg, size, "%s",
1511 "No such device - did you specify an out-of-range profile CPU?\n");
1512 break;
1513 case EOPNOTSUPP:
1514 if (evsel->attr.precise_ip)
1515 return scnprintf(msg, size, "%s",
1516 "\'precise\' request may not be supported. Try removing 'p' modifier.");
1517#if defined(__i386__) || defined(__x86_64__)
1518 if (evsel->attr.type == PERF_TYPE_HARDWARE)
1519 return scnprintf(msg, size, "%s",
1520 "No hardware sampling interrupt available.\n"
1521 "No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.");
1522#endif
1523 break;
1524 default:
1525 break;
1526 }
1527
1528 return scnprintf(msg, size,
1529 "The sys_perf_event_open() syscall returned with %d (%s) for event (%s). \n"
1530 "/bin/dmesg may provide additional information.\n"
1531 "No CONFIG_PERF_EVENTS=y kernel support configured?\n",
1532 err, strerror(err), perf_evsel__name(evsel));
1533}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 3d2b8017438c..52021c3087df 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -53,6 +53,7 @@ struct perf_evsel {
53 struct xyarray *sample_id; 53 struct xyarray *sample_id;
54 u64 *id; 54 u64 *id;
55 struct perf_counts *counts; 55 struct perf_counts *counts;
56 struct perf_counts *prev_raw_counts;
56 int idx; 57 int idx;
57 u32 ids; 58 u32 ids;
58 struct hists hists; 59 struct hists hists;
@@ -73,10 +74,13 @@ struct perf_evsel {
73 bool needs_swap; 74 bool needs_swap;
74 /* parse modifier helper */ 75 /* parse modifier helper */
75 int exclude_GH; 76 int exclude_GH;
77 int nr_members;
76 struct perf_evsel *leader; 78 struct perf_evsel *leader;
77 char *group_name; 79 char *group_name;
78}; 80};
79 81
82#define hists_to_evsel(h) container_of(h, struct perf_evsel, hists)
83
80struct cpu_map; 84struct cpu_map;
81struct thread_map; 85struct thread_map;
82struct perf_evlist; 86struct perf_evlist;
@@ -110,14 +114,30 @@ extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX];
110int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, 114int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
111 char *bf, size_t size); 115 char *bf, size_t size);
112const char *perf_evsel__name(struct perf_evsel *evsel); 116const char *perf_evsel__name(struct perf_evsel *evsel);
117const char *perf_evsel__group_name(struct perf_evsel *evsel);
118int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size);
113 119
114int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); 120int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
115int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); 121int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
116int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); 122int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
117void perf_evsel__free_fd(struct perf_evsel *evsel); 123void perf_evsel__free_fd(struct perf_evsel *evsel);
118void perf_evsel__free_id(struct perf_evsel *evsel); 124void perf_evsel__free_id(struct perf_evsel *evsel);
125void perf_evsel__free_counts(struct perf_evsel *evsel);
119void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); 126void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
120 127
128void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
129 enum perf_event_sample_format bit);
130void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
131 enum perf_event_sample_format bit);
132
133#define perf_evsel__set_sample_bit(evsel, bit) \
134 __perf_evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit)
135
136#define perf_evsel__reset_sample_bit(evsel, bit) \
137 __perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
138
139void perf_evsel__set_sample_id(struct perf_evsel *evsel);
140
121int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, 141int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
122 const char *filter); 142 const char *filter);
123 143
@@ -226,8 +246,34 @@ static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel)
226 return list_entry(evsel->node.next, struct perf_evsel, node); 246 return list_entry(evsel->node.next, struct perf_evsel, node);
227} 247}
228 248
229static inline bool perf_evsel__is_group_member(const struct perf_evsel *evsel) 249static inline bool perf_evsel__is_group_leader(const struct perf_evsel *evsel)
250{
251 return evsel->leader == evsel;
252}
253
254struct perf_attr_details {
255 bool freq;
256 bool verbose;
257 bool event_group;
258};
259
260int perf_evsel__fprintf(struct perf_evsel *evsel,
261 struct perf_attr_details *details, FILE *fp);
262
263bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
264 char *msg, size_t msgsize);
265int perf_evsel__open_strerror(struct perf_evsel *evsel,
266 struct perf_target *target,
267 int err, char *msg, size_t size);
268
269static inline int perf_evsel__group_idx(struct perf_evsel *evsel)
230{ 270{
231 return evsel->leader != NULL; 271 return evsel->idx - evsel->leader->idx;
232} 272}
273
274#define for_each_group_member(_evsel, _leader) \
275for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); \
276 (_evsel) && (_evsel)->leader == (_leader); \
277 (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node))
278
233#endif /* __PERF_EVSEL_H */ 279#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b7da4634a047..f4bfd79ef6a7 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -148,7 +148,7 @@ static char *do_read_string(int fd, struct perf_header *ph)
148 u32 len; 148 u32 len;
149 char *buf; 149 char *buf;
150 150
151 sz = read(fd, &len, sizeof(len)); 151 sz = readn(fd, &len, sizeof(len));
152 if (sz < (ssize_t)sizeof(len)) 152 if (sz < (ssize_t)sizeof(len))
153 return NULL; 153 return NULL;
154 154
@@ -159,7 +159,7 @@ static char *do_read_string(int fd, struct perf_header *ph)
159 if (!buf) 159 if (!buf)
160 return NULL; 160 return NULL;
161 161
162 ret = read(fd, buf, len); 162 ret = readn(fd, buf, len);
163 if (ret == (ssize_t)len) { 163 if (ret == (ssize_t)len) {
164 /* 164 /*
165 * strings are padded by zeroes 165 * strings are padded by zeroes
@@ -287,12 +287,12 @@ static int dsos__write_buildid_table(struct perf_header *header, int fd)
287 struct perf_session *session = container_of(header, 287 struct perf_session *session = container_of(header,
288 struct perf_session, header); 288 struct perf_session, header);
289 struct rb_node *nd; 289 struct rb_node *nd;
290 int err = machine__write_buildid_table(&session->host_machine, fd); 290 int err = machine__write_buildid_table(&session->machines.host, fd);
291 291
292 if (err) 292 if (err)
293 return err; 293 return err;
294 294
295 for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { 295 for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
296 struct machine *pos = rb_entry(nd, struct machine, rb_node); 296 struct machine *pos = rb_entry(nd, struct machine, rb_node);
297 err = machine__write_buildid_table(pos, fd); 297 err = machine__write_buildid_table(pos, fd);
298 if (err) 298 if (err)
@@ -313,7 +313,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
313 if (is_kallsyms) { 313 if (is_kallsyms) {
314 if (symbol_conf.kptr_restrict) { 314 if (symbol_conf.kptr_restrict) {
315 pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); 315 pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n");
316 return 0; 316 err = 0;
317 goto out_free;
317 } 318 }
318 realname = (char *) name; 319 realname = (char *) name;
319 } else 320 } else
@@ -448,9 +449,9 @@ static int perf_session__cache_build_ids(struct perf_session *session)
448 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) 449 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
449 return -1; 450 return -1;
450 451
451 ret = machine__cache_build_ids(&session->host_machine, debugdir); 452 ret = machine__cache_build_ids(&session->machines.host, debugdir);
452 453
453 for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { 454 for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
454 struct machine *pos = rb_entry(nd, struct machine, rb_node); 455 struct machine *pos = rb_entry(nd, struct machine, rb_node);
455 ret |= machine__cache_build_ids(pos, debugdir); 456 ret |= machine__cache_build_ids(pos, debugdir);
456 } 457 }
@@ -467,9 +468,9 @@ static bool machine__read_build_ids(struct machine *machine, bool with_hits)
467static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) 468static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
468{ 469{
469 struct rb_node *nd; 470 struct rb_node *nd;
470 bool ret = machine__read_build_ids(&session->host_machine, with_hits); 471 bool ret = machine__read_build_ids(&session->machines.host, with_hits);
471 472
472 for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { 473 for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
473 struct machine *pos = rb_entry(nd, struct machine, rb_node); 474 struct machine *pos = rb_entry(nd, struct machine, rb_node);
474 ret |= machine__read_build_ids(pos, with_hits); 475 ret |= machine__read_build_ids(pos, with_hits);
475 } 476 }
@@ -954,6 +955,7 @@ static int write_topo_node(int fd, int node)
954 } 955 }
955 956
956 fclose(fp); 957 fclose(fp);
958 fp = NULL;
957 959
958 ret = do_write(fd, &mem_total, sizeof(u64)); 960 ret = do_write(fd, &mem_total, sizeof(u64));
959 if (ret) 961 if (ret)
@@ -980,7 +982,8 @@ static int write_topo_node(int fd, int node)
980 ret = do_write_string(fd, buf); 982 ret = do_write_string(fd, buf);
981done: 983done:
982 free(buf); 984 free(buf);
983 fclose(fp); 985 if (fp)
986 fclose(fp);
984 return ret; 987 return ret;
985} 988}
986 989
@@ -1051,16 +1054,25 @@ static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused,
1051 struct perf_pmu *pmu = NULL; 1054 struct perf_pmu *pmu = NULL;
1052 off_t offset = lseek(fd, 0, SEEK_CUR); 1055 off_t offset = lseek(fd, 0, SEEK_CUR);
1053 __u32 pmu_num = 0; 1056 __u32 pmu_num = 0;
1057 int ret;
1054 1058
1055 /* write real pmu_num later */ 1059 /* write real pmu_num later */
1056 do_write(fd, &pmu_num, sizeof(pmu_num)); 1060 ret = do_write(fd, &pmu_num, sizeof(pmu_num));
1061 if (ret < 0)
1062 return ret;
1057 1063
1058 while ((pmu = perf_pmu__scan(pmu))) { 1064 while ((pmu = perf_pmu__scan(pmu))) {
1059 if (!pmu->name) 1065 if (!pmu->name)
1060 continue; 1066 continue;
1061 pmu_num++; 1067 pmu_num++;
1062 do_write(fd, &pmu->type, sizeof(pmu->type)); 1068
1063 do_write_string(fd, pmu->name); 1069 ret = do_write(fd, &pmu->type, sizeof(pmu->type));
1070 if (ret < 0)
1071 return ret;
1072
1073 ret = do_write_string(fd, pmu->name);
1074 if (ret < 0)
1075 return ret;
1064 } 1076 }
1065 1077
1066 if (pwrite(fd, &pmu_num, sizeof(pmu_num), offset) != sizeof(pmu_num)) { 1078 if (pwrite(fd, &pmu_num, sizeof(pmu_num), offset) != sizeof(pmu_num)) {
@@ -1073,6 +1085,52 @@ static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused,
1073} 1085}
1074 1086
1075/* 1087/*
1088 * File format:
1089 *
1090 * struct group_descs {
1091 * u32 nr_groups;
1092 * struct group_desc {
1093 * char name[];
1094 * u32 leader_idx;
1095 * u32 nr_members;
1096 * }[nr_groups];
1097 * };
1098 */
1099static int write_group_desc(int fd, struct perf_header *h __maybe_unused,
1100 struct perf_evlist *evlist)
1101{
1102 u32 nr_groups = evlist->nr_groups;
1103 struct perf_evsel *evsel;
1104 int ret;
1105
1106 ret = do_write(fd, &nr_groups, sizeof(nr_groups));
1107 if (ret < 0)
1108 return ret;
1109
1110 list_for_each_entry(evsel, &evlist->entries, node) {
1111 if (perf_evsel__is_group_leader(evsel) &&
1112 evsel->nr_members > 1) {
1113 const char *name = evsel->group_name ?: "{anon_group}";
1114 u32 leader_idx = evsel->idx;
1115 u32 nr_members = evsel->nr_members;
1116
1117 ret = do_write_string(fd, name);
1118 if (ret < 0)
1119 return ret;
1120
1121 ret = do_write(fd, &leader_idx, sizeof(leader_idx));
1122 if (ret < 0)
1123 return ret;
1124
1125 ret = do_write(fd, &nr_members, sizeof(nr_members));
1126 if (ret < 0)
1127 return ret;
1128 }
1129 }
1130 return 0;
1131}
1132
1133/*
1076 * default get_cpuid(): nothing gets recorded 1134 * default get_cpuid(): nothing gets recorded
1077 * actual implementation must be in arch/$(ARCH)/util/header.c 1135 * actual implementation must be in arch/$(ARCH)/util/header.c
1078 */ 1136 */
@@ -1209,14 +1267,14 @@ read_event_desc(struct perf_header *ph, int fd)
1209 size_t msz; 1267 size_t msz;
1210 1268
1211 /* number of events */ 1269 /* number of events */
1212 ret = read(fd, &nre, sizeof(nre)); 1270 ret = readn(fd, &nre, sizeof(nre));
1213 if (ret != (ssize_t)sizeof(nre)) 1271 if (ret != (ssize_t)sizeof(nre))
1214 goto error; 1272 goto error;
1215 1273
1216 if (ph->needs_swap) 1274 if (ph->needs_swap)
1217 nre = bswap_32(nre); 1275 nre = bswap_32(nre);
1218 1276
1219 ret = read(fd, &sz, sizeof(sz)); 1277 ret = readn(fd, &sz, sizeof(sz));
1220 if (ret != (ssize_t)sizeof(sz)) 1278 if (ret != (ssize_t)sizeof(sz))
1221 goto error; 1279 goto error;
1222 1280
@@ -1244,7 +1302,7 @@ read_event_desc(struct perf_header *ph, int fd)
1244 * must read entire on-file attr struct to 1302 * must read entire on-file attr struct to
1245 * sync up with layout. 1303 * sync up with layout.
1246 */ 1304 */
1247 ret = read(fd, buf, sz); 1305 ret = readn(fd, buf, sz);
1248 if (ret != (ssize_t)sz) 1306 if (ret != (ssize_t)sz)
1249 goto error; 1307 goto error;
1250 1308
@@ -1253,7 +1311,7 @@ read_event_desc(struct perf_header *ph, int fd)
1253 1311
1254 memcpy(&evsel->attr, buf, msz); 1312 memcpy(&evsel->attr, buf, msz);
1255 1313
1256 ret = read(fd, &nr, sizeof(nr)); 1314 ret = readn(fd, &nr, sizeof(nr));
1257 if (ret != (ssize_t)sizeof(nr)) 1315 if (ret != (ssize_t)sizeof(nr))
1258 goto error; 1316 goto error;
1259 1317
@@ -1274,7 +1332,7 @@ read_event_desc(struct perf_header *ph, int fd)
1274 evsel->id = id; 1332 evsel->id = id;
1275 1333
1276 for (j = 0 ; j < nr; j++) { 1334 for (j = 0 ; j < nr; j++) {
1277 ret = read(fd, id, sizeof(*id)); 1335 ret = readn(fd, id, sizeof(*id));
1278 if (ret != (ssize_t)sizeof(*id)) 1336 if (ret != (ssize_t)sizeof(*id))
1279 goto error; 1337 goto error;
1280 if (ph->needs_swap) 1338 if (ph->needs_swap)
@@ -1435,6 +1493,31 @@ error:
1435 fprintf(fp, "# pmu mappings: unable to read\n"); 1493 fprintf(fp, "# pmu mappings: unable to read\n");
1436} 1494}
1437 1495
1496static void print_group_desc(struct perf_header *ph, int fd __maybe_unused,
1497 FILE *fp)
1498{
1499 struct perf_session *session;
1500 struct perf_evsel *evsel;
1501 u32 nr = 0;
1502
1503 session = container_of(ph, struct perf_session, header);
1504
1505 list_for_each_entry(evsel, &session->evlist->entries, node) {
1506 if (perf_evsel__is_group_leader(evsel) &&
1507 evsel->nr_members > 1) {
1508 fprintf(fp, "# group: %s{%s", evsel->group_name ?: "",
1509 perf_evsel__name(evsel));
1510
1511 nr = evsel->nr_members - 1;
1512 } else if (nr) {
1513 fprintf(fp, ",%s", perf_evsel__name(evsel));
1514
1515 if (--nr == 0)
1516 fprintf(fp, "}\n");
1517 }
1518 }
1519}
1520
1438static int __event_process_build_id(struct build_id_event *bev, 1521static int __event_process_build_id(struct build_id_event *bev,
1439 char *filename, 1522 char *filename,
1440 struct perf_session *session) 1523 struct perf_session *session)
@@ -1506,14 +1589,14 @@ static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
1506 while (offset < limit) { 1589 while (offset < limit) {
1507 ssize_t len; 1590 ssize_t len;
1508 1591
1509 if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev)) 1592 if (readn(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
1510 return -1; 1593 return -1;
1511 1594
1512 if (header->needs_swap) 1595 if (header->needs_swap)
1513 perf_event_header__bswap(&old_bev.header); 1596 perf_event_header__bswap(&old_bev.header);
1514 1597
1515 len = old_bev.header.size - sizeof(old_bev); 1598 len = old_bev.header.size - sizeof(old_bev);
1516 if (read(input, filename, len) != len) 1599 if (readn(input, filename, len) != len)
1517 return -1; 1600 return -1;
1518 1601
1519 bev.header = old_bev.header; 1602 bev.header = old_bev.header;
@@ -1548,14 +1631,14 @@ static int perf_header__read_build_ids(struct perf_header *header,
1548 while (offset < limit) { 1631 while (offset < limit) {
1549 ssize_t len; 1632 ssize_t len;
1550 1633
1551 if (read(input, &bev, sizeof(bev)) != sizeof(bev)) 1634 if (readn(input, &bev, sizeof(bev)) != sizeof(bev))
1552 goto out; 1635 goto out;
1553 1636
1554 if (header->needs_swap) 1637 if (header->needs_swap)
1555 perf_event_header__bswap(&bev.header); 1638 perf_event_header__bswap(&bev.header);
1556 1639
1557 len = bev.header.size - sizeof(bev); 1640 len = bev.header.size - sizeof(bev);
1558 if (read(input, filename, len) != len) 1641 if (readn(input, filename, len) != len)
1559 goto out; 1642 goto out;
1560 /* 1643 /*
1561 * The a1645ce1 changeset: 1644 * The a1645ce1 changeset:
@@ -1641,7 +1724,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused,
1641 size_t ret; 1724 size_t ret;
1642 u32 nr; 1725 u32 nr;
1643 1726
1644 ret = read(fd, &nr, sizeof(nr)); 1727 ret = readn(fd, &nr, sizeof(nr));
1645 if (ret != sizeof(nr)) 1728 if (ret != sizeof(nr))
1646 return -1; 1729 return -1;
1647 1730
@@ -1650,7 +1733,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused,
1650 1733
1651 ph->env.nr_cpus_online = nr; 1734 ph->env.nr_cpus_online = nr;
1652 1735
1653 ret = read(fd, &nr, sizeof(nr)); 1736 ret = readn(fd, &nr, sizeof(nr));
1654 if (ret != sizeof(nr)) 1737 if (ret != sizeof(nr))
1655 return -1; 1738 return -1;
1656 1739
@@ -1684,7 +1767,7 @@ static int process_total_mem(struct perf_file_section *section __maybe_unused,
1684 uint64_t mem; 1767 uint64_t mem;
1685 size_t ret; 1768 size_t ret;
1686 1769
1687 ret = read(fd, &mem, sizeof(mem)); 1770 ret = readn(fd, &mem, sizeof(mem));
1688 if (ret != sizeof(mem)) 1771 if (ret != sizeof(mem))
1689 return -1; 1772 return -1;
1690 1773
@@ -1756,7 +1839,7 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused,
1756 u32 nr, i; 1839 u32 nr, i;
1757 struct strbuf sb; 1840 struct strbuf sb;
1758 1841
1759 ret = read(fd, &nr, sizeof(nr)); 1842 ret = readn(fd, &nr, sizeof(nr));
1760 if (ret != sizeof(nr)) 1843 if (ret != sizeof(nr))
1761 return -1; 1844 return -1;
1762 1845
@@ -1792,7 +1875,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused
1792 char *str; 1875 char *str;
1793 struct strbuf sb; 1876 struct strbuf sb;
1794 1877
1795 ret = read(fd, &nr, sizeof(nr)); 1878 ret = readn(fd, &nr, sizeof(nr));
1796 if (ret != sizeof(nr)) 1879 if (ret != sizeof(nr))
1797 return -1; 1880 return -1;
1798 1881
@@ -1813,7 +1896,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused
1813 } 1896 }
1814 ph->env.sibling_cores = strbuf_detach(&sb, NULL); 1897 ph->env.sibling_cores = strbuf_detach(&sb, NULL);
1815 1898
1816 ret = read(fd, &nr, sizeof(nr)); 1899 ret = readn(fd, &nr, sizeof(nr));
1817 if (ret != sizeof(nr)) 1900 if (ret != sizeof(nr))
1818 return -1; 1901 return -1;
1819 1902
@@ -1850,7 +1933,7 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse
1850 struct strbuf sb; 1933 struct strbuf sb;
1851 1934
1852 /* nr nodes */ 1935 /* nr nodes */
1853 ret = read(fd, &nr, sizeof(nr)); 1936 ret = readn(fd, &nr, sizeof(nr));
1854 if (ret != sizeof(nr)) 1937 if (ret != sizeof(nr))
1855 goto error; 1938 goto error;
1856 1939
@@ -1862,15 +1945,15 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse
1862 1945
1863 for (i = 0; i < nr; i++) { 1946 for (i = 0; i < nr; i++) {
1864 /* node number */ 1947 /* node number */
1865 ret = read(fd, &node, sizeof(node)); 1948 ret = readn(fd, &node, sizeof(node));
1866 if (ret != sizeof(node)) 1949 if (ret != sizeof(node))
1867 goto error; 1950 goto error;
1868 1951
1869 ret = read(fd, &mem_total, sizeof(u64)); 1952 ret = readn(fd, &mem_total, sizeof(u64));
1870 if (ret != sizeof(u64)) 1953 if (ret != sizeof(u64))
1871 goto error; 1954 goto error;
1872 1955
1873 ret = read(fd, &mem_free, sizeof(u64)); 1956 ret = readn(fd, &mem_free, sizeof(u64));
1874 if (ret != sizeof(u64)) 1957 if (ret != sizeof(u64))
1875 goto error; 1958 goto error;
1876 1959
@@ -1909,7 +1992,7 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused
1909 u32 type; 1992 u32 type;
1910 struct strbuf sb; 1993 struct strbuf sb;
1911 1994
1912 ret = read(fd, &pmu_num, sizeof(pmu_num)); 1995 ret = readn(fd, &pmu_num, sizeof(pmu_num));
1913 if (ret != sizeof(pmu_num)) 1996 if (ret != sizeof(pmu_num))
1914 return -1; 1997 return -1;
1915 1998
@@ -1925,7 +2008,7 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused
1925 strbuf_init(&sb, 128); 2008 strbuf_init(&sb, 128);
1926 2009
1927 while (pmu_num) { 2010 while (pmu_num) {
1928 if (read(fd, &type, sizeof(type)) != sizeof(type)) 2011 if (readn(fd, &type, sizeof(type)) != sizeof(type))
1929 goto error; 2012 goto error;
1930 if (ph->needs_swap) 2013 if (ph->needs_swap)
1931 type = bswap_32(type); 2014 type = bswap_32(type);
@@ -1949,6 +2032,98 @@ error:
1949 return -1; 2032 return -1;
1950} 2033}
1951 2034
2035static int process_group_desc(struct perf_file_section *section __maybe_unused,
2036 struct perf_header *ph, int fd,
2037 void *data __maybe_unused)
2038{
2039 size_t ret = -1;
2040 u32 i, nr, nr_groups;
2041 struct perf_session *session;
2042 struct perf_evsel *evsel, *leader = NULL;
2043 struct group_desc {
2044 char *name;
2045 u32 leader_idx;
2046 u32 nr_members;
2047 } *desc;
2048
2049 if (readn(fd, &nr_groups, sizeof(nr_groups)) != sizeof(nr_groups))
2050 return -1;
2051
2052 if (ph->needs_swap)
2053 nr_groups = bswap_32(nr_groups);
2054
2055 ph->env.nr_groups = nr_groups;
2056 if (!nr_groups) {
2057 pr_debug("group desc not available\n");
2058 return 0;
2059 }
2060
2061 desc = calloc(nr_groups, sizeof(*desc));
2062 if (!desc)
2063 return -1;
2064
2065 for (i = 0; i < nr_groups; i++) {
2066 desc[i].name = do_read_string(fd, ph);
2067 if (!desc[i].name)
2068 goto out_free;
2069
2070 if (readn(fd, &desc[i].leader_idx, sizeof(u32)) != sizeof(u32))
2071 goto out_free;
2072
2073 if (readn(fd, &desc[i].nr_members, sizeof(u32)) != sizeof(u32))
2074 goto out_free;
2075
2076 if (ph->needs_swap) {
2077 desc[i].leader_idx = bswap_32(desc[i].leader_idx);
2078 desc[i].nr_members = bswap_32(desc[i].nr_members);
2079 }
2080 }
2081
2082 /*
2083 * Rebuild group relationship based on the group_desc
2084 */
2085 session = container_of(ph, struct perf_session, header);
2086 session->evlist->nr_groups = nr_groups;
2087
2088 i = nr = 0;
2089 list_for_each_entry(evsel, &session->evlist->entries, node) {
2090 if (evsel->idx == (int) desc[i].leader_idx) {
2091 evsel->leader = evsel;
2092 /* {anon_group} is a dummy name */
2093 if (strcmp(desc[i].name, "{anon_group}"))
2094 evsel->group_name = desc[i].name;
2095 evsel->nr_members = desc[i].nr_members;
2096
2097 if (i >= nr_groups || nr > 0) {
2098 pr_debug("invalid group desc\n");
2099 goto out_free;
2100 }
2101
2102 leader = evsel;
2103 nr = evsel->nr_members - 1;
2104 i++;
2105 } else if (nr) {
2106 /* This is a group member */
2107 evsel->leader = leader;
2108
2109 nr--;
2110 }
2111 }
2112
2113 if (i != nr_groups || nr != 0) {
2114 pr_debug("invalid group desc\n");
2115 goto out_free;
2116 }
2117
2118 ret = 0;
2119out_free:
2120 while ((int) --i >= 0)
2121 free(desc[i].name);
2122 free(desc);
2123
2124 return ret;
2125}
2126
1952struct feature_ops { 2127struct feature_ops {
1953 int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); 2128 int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
1954 void (*print)(struct perf_header *h, int fd, FILE *fp); 2129 void (*print)(struct perf_header *h, int fd, FILE *fp);
@@ -1988,6 +2163,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
1988 FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), 2163 FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology),
1989 FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), 2164 FEAT_OPA(HEADER_BRANCH_STACK, branch_stack),
1990 FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings), 2165 FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings),
2166 FEAT_OPP(HEADER_GROUP_DESC, group_desc),
1991}; 2167};
1992 2168
1993struct header_print_data { 2169struct header_print_data {
@@ -2077,7 +2253,7 @@ static int perf_header__adds_write(struct perf_header *header,
2077 if (!nr_sections) 2253 if (!nr_sections)
2078 return 0; 2254 return 0;
2079 2255
2080 feat_sec = p = calloc(sizeof(*feat_sec), nr_sections); 2256 feat_sec = p = calloc(nr_sections, sizeof(*feat_sec));
2081 if (feat_sec == NULL) 2257 if (feat_sec == NULL)
2082 return -ENOMEM; 2258 return -ENOMEM;
2083 2259
@@ -2249,7 +2425,7 @@ int perf_header__process_sections(struct perf_header *header, int fd,
2249 if (!nr_sections) 2425 if (!nr_sections)
2250 return 0; 2426 return 0;
2251 2427
2252 feat_sec = sec = calloc(sizeof(*feat_sec), nr_sections); 2428 feat_sec = sec = calloc(nr_sections, sizeof(*feat_sec));
2253 if (!feat_sec) 2429 if (!feat_sec)
2254 return -1; 2430 return -1;
2255 2431
@@ -2912,16 +3088,22 @@ int perf_event__process_tracing_data(union perf_event *event,
2912 session->repipe); 3088 session->repipe);
2913 padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read; 3089 padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read;
2914 3090
2915 if (read(session->fd, buf, padding) < 0) 3091 if (readn(session->fd, buf, padding) < 0) {
2916 die("reading input file"); 3092 pr_err("%s: reading input file", __func__);
3093 return -1;
3094 }
2917 if (session->repipe) { 3095 if (session->repipe) {
2918 int retw = write(STDOUT_FILENO, buf, padding); 3096 int retw = write(STDOUT_FILENO, buf, padding);
2919 if (retw <= 0 || retw != padding) 3097 if (retw <= 0 || retw != padding) {
2920 die("repiping tracing data padding"); 3098 pr_err("%s: repiping tracing data padding", __func__);
3099 return -1;
3100 }
2921 } 3101 }
2922 3102
2923 if (size_read + padding != size) 3103 if (size_read + padding != size) {
2924 die("tracing data size mismatch"); 3104 pr_err("%s: tracing data size mismatch", __func__);
3105 return -1;
3106 }
2925 3107
2926 perf_evlist__prepare_tracepoint_events(session->evlist, 3108 perf_evlist__prepare_tracepoint_events(session->evlist,
2927 session->pevent); 3109 session->pevent);
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 20f0344accb1..c9fc55cada6d 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -29,6 +29,7 @@ enum {
29 HEADER_NUMA_TOPOLOGY, 29 HEADER_NUMA_TOPOLOGY,
30 HEADER_BRANCH_STACK, 30 HEADER_BRANCH_STACK,
31 HEADER_PMU_MAPPINGS, 31 HEADER_PMU_MAPPINGS,
32 HEADER_GROUP_DESC,
32 HEADER_LAST_FEATURE, 33 HEADER_LAST_FEATURE,
33 HEADER_FEAT_BITS = 256, 34 HEADER_FEAT_BITS = 256,
34}; 35};
@@ -79,6 +80,7 @@ struct perf_session_env {
79 char *numa_nodes; 80 char *numa_nodes;
80 int nr_pmu_mappings; 81 int nr_pmu_mappings;
81 char *pmu_mappings; 82 char *pmu_mappings;
83 int nr_groups;
82}; 84};
83 85
84struct perf_header { 86struct perf_header {
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index cb17e2a8c6ed..f855941bebea 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -4,6 +4,7 @@
4#include "hist.h" 4#include "hist.h"
5#include "session.h" 5#include "session.h"
6#include "sort.h" 6#include "sort.h"
7#include "evsel.h"
7#include <math.h> 8#include <math.h>
8 9
9static bool hists__filter_entry_by_dso(struct hists *hists, 10static bool hists__filter_entry_by_dso(struct hists *hists,
@@ -82,6 +83,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
82 hists__new_col_len(hists, HISTC_DSO, len); 83 hists__new_col_len(hists, HISTC_DSO, len);
83 } 84 }
84 85
86 if (h->parent)
87 hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen);
88
85 if (h->branch_info) { 89 if (h->branch_info) {
86 int symlen; 90 int symlen;
87 /* 91 /*
@@ -242,6 +246,14 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
242 246
243 if (he->ms.map) 247 if (he->ms.map)
244 he->ms.map->referenced = true; 248 he->ms.map->referenced = true;
249
250 if (he->branch_info) {
251 if (he->branch_info->from.map)
252 he->branch_info->from.map->referenced = true;
253 if (he->branch_info->to.map)
254 he->branch_info->to.map->referenced = true;
255 }
256
245 if (symbol_conf.use_callchain) 257 if (symbol_conf.use_callchain)
246 callchain_init(he->callchain); 258 callchain_init(he->callchain);
247 259
@@ -251,7 +263,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template)
251 return he; 263 return he;
252} 264}
253 265
254static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h) 266void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
255{ 267{
256 if (!h->filtered) { 268 if (!h->filtered) {
257 hists__calc_col_len(hists, h); 269 hists__calc_col_len(hists, h);
@@ -285,7 +297,13 @@ static struct hist_entry *add_hist_entry(struct hists *hists,
285 parent = *p; 297 parent = *p;
286 he = rb_entry(parent, struct hist_entry, rb_node_in); 298 he = rb_entry(parent, struct hist_entry, rb_node_in);
287 299
288 cmp = hist_entry__cmp(entry, he); 300 /*
301 * Make sure that it receives arguments in a same order as
302 * hist_entry__collapse() so that we can use an appropriate
303 * function when searching an entry regardless which sort
304 * keys were used.
305 */
306 cmp = hist_entry__cmp(he, entry);
289 307
290 if (!cmp) { 308 if (!cmp) {
291 he_stat__add_period(&he->stat, period); 309 he_stat__add_period(&he->stat, period);
@@ -523,6 +541,62 @@ void hists__collapse_resort_threaded(struct hists *hists)
523 * reverse the map, sort on period. 541 * reverse the map, sort on period.
524 */ 542 */
525 543
544static int period_cmp(u64 period_a, u64 period_b)
545{
546 if (period_a > period_b)
547 return 1;
548 if (period_a < period_b)
549 return -1;
550 return 0;
551}
552
553static int hist_entry__sort_on_period(struct hist_entry *a,
554 struct hist_entry *b)
555{
556 int ret;
557 int i, nr_members;
558 struct perf_evsel *evsel;
559 struct hist_entry *pair;
560 u64 *periods_a, *periods_b;
561
562 ret = period_cmp(a->stat.period, b->stat.period);
563 if (ret || !symbol_conf.event_group)
564 return ret;
565
566 evsel = hists_to_evsel(a->hists);
567 nr_members = evsel->nr_members;
568 if (nr_members <= 1)
569 return ret;
570
571 periods_a = zalloc(sizeof(periods_a) * nr_members);
572 periods_b = zalloc(sizeof(periods_b) * nr_members);
573
574 if (!periods_a || !periods_b)
575 goto out;
576
577 list_for_each_entry(pair, &a->pairs.head, pairs.node) {
578 evsel = hists_to_evsel(pair->hists);
579 periods_a[perf_evsel__group_idx(evsel)] = pair->stat.period;
580 }
581
582 list_for_each_entry(pair, &b->pairs.head, pairs.node) {
583 evsel = hists_to_evsel(pair->hists);
584 periods_b[perf_evsel__group_idx(evsel)] = pair->stat.period;
585 }
586
587 for (i = 1; i < nr_members; i++) {
588 ret = period_cmp(periods_a[i], periods_b[i]);
589 if (ret)
590 break;
591 }
592
593out:
594 free(periods_a);
595 free(periods_b);
596
597 return ret;
598}
599
526static void __hists__insert_output_entry(struct rb_root *entries, 600static void __hists__insert_output_entry(struct rb_root *entries,
527 struct hist_entry *he, 601 struct hist_entry *he,
528 u64 min_callchain_hits) 602 u64 min_callchain_hits)
@@ -539,7 +613,7 @@ static void __hists__insert_output_entry(struct rb_root *entries,
539 parent = *p; 613 parent = *p;
540 iter = rb_entry(parent, struct hist_entry, rb_node); 614 iter = rb_entry(parent, struct hist_entry, rb_node);
541 615
542 if (he->stat.period > iter->stat.period) 616 if (hist_entry__sort_on_period(he, iter) > 0)
543 p = &(*p)->rb_left; 617 p = &(*p)->rb_left;
544 else 618 else
545 p = &(*p)->rb_right; 619 p = &(*p)->rb_right;
@@ -711,25 +785,38 @@ int hist_entry__annotate(struct hist_entry *he, size_t privsize)
711 return symbol__annotate(he->ms.sym, he->ms.map, privsize); 785 return symbol__annotate(he->ms.sym, he->ms.map, privsize);
712} 786}
713 787
788void events_stats__inc(struct events_stats *stats, u32 type)
789{
790 ++stats->nr_events[0];
791 ++stats->nr_events[type];
792}
793
714void hists__inc_nr_events(struct hists *hists, u32 type) 794void hists__inc_nr_events(struct hists *hists, u32 type)
715{ 795{
716 ++hists->stats.nr_events[0]; 796 events_stats__inc(&hists->stats, type);
717 ++hists->stats.nr_events[type];
718} 797}
719 798
720static struct hist_entry *hists__add_dummy_entry(struct hists *hists, 799static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
721 struct hist_entry *pair) 800 struct hist_entry *pair)
722{ 801{
723 struct rb_node **p = &hists->entries.rb_node; 802 struct rb_root *root;
803 struct rb_node **p;
724 struct rb_node *parent = NULL; 804 struct rb_node *parent = NULL;
725 struct hist_entry *he; 805 struct hist_entry *he;
726 int cmp; 806 int cmp;
727 807
808 if (sort__need_collapse)
809 root = &hists->entries_collapsed;
810 else
811 root = hists->entries_in;
812
813 p = &root->rb_node;
814
728 while (*p != NULL) { 815 while (*p != NULL) {
729 parent = *p; 816 parent = *p;
730 he = rb_entry(parent, struct hist_entry, rb_node); 817 he = rb_entry(parent, struct hist_entry, rb_node_in);
731 818
732 cmp = hist_entry__cmp(pair, he); 819 cmp = hist_entry__collapse(he, pair);
733 820
734 if (!cmp) 821 if (!cmp)
735 goto out; 822 goto out;
@@ -744,8 +831,8 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
744 if (he) { 831 if (he) {
745 memset(&he->stat, 0, sizeof(he->stat)); 832 memset(&he->stat, 0, sizeof(he->stat));
746 he->hists = hists; 833 he->hists = hists;
747 rb_link_node(&he->rb_node, parent, p); 834 rb_link_node(&he->rb_node_in, parent, p);
748 rb_insert_color(&he->rb_node, &hists->entries); 835 rb_insert_color(&he->rb_node_in, root);
749 hists__inc_nr_entries(hists, he); 836 hists__inc_nr_entries(hists, he);
750 } 837 }
751out: 838out:
@@ -755,11 +842,16 @@ out:
755static struct hist_entry *hists__find_entry(struct hists *hists, 842static struct hist_entry *hists__find_entry(struct hists *hists,
756 struct hist_entry *he) 843 struct hist_entry *he)
757{ 844{
758 struct rb_node *n = hists->entries.rb_node; 845 struct rb_node *n;
846
847 if (sort__need_collapse)
848 n = hists->entries_collapsed.rb_node;
849 else
850 n = hists->entries_in->rb_node;
759 851
760 while (n) { 852 while (n) {
761 struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node); 853 struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in);
762 int64_t cmp = hist_entry__cmp(he, iter); 854 int64_t cmp = hist_entry__collapse(iter, he);
763 855
764 if (cmp < 0) 856 if (cmp < 0)
765 n = n->rb_left; 857 n = n->rb_left;
@@ -777,15 +869,21 @@ static struct hist_entry *hists__find_entry(struct hists *hists,
777 */ 869 */
778void hists__match(struct hists *leader, struct hists *other) 870void hists__match(struct hists *leader, struct hists *other)
779{ 871{
872 struct rb_root *root;
780 struct rb_node *nd; 873 struct rb_node *nd;
781 struct hist_entry *pos, *pair; 874 struct hist_entry *pos, *pair;
782 875
783 for (nd = rb_first(&leader->entries); nd; nd = rb_next(nd)) { 876 if (sort__need_collapse)
784 pos = rb_entry(nd, struct hist_entry, rb_node); 877 root = &leader->entries_collapsed;
878 else
879 root = leader->entries_in;
880
881 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
882 pos = rb_entry(nd, struct hist_entry, rb_node_in);
785 pair = hists__find_entry(other, pos); 883 pair = hists__find_entry(other, pos);
786 884
787 if (pair) 885 if (pair)
788 hist__entry_add_pair(pos, pair); 886 hist_entry__add_pair(pair, pos);
789 } 887 }
790} 888}
791 889
@@ -796,17 +894,23 @@ void hists__match(struct hists *leader, struct hists *other)
796 */ 894 */
797int hists__link(struct hists *leader, struct hists *other) 895int hists__link(struct hists *leader, struct hists *other)
798{ 896{
897 struct rb_root *root;
799 struct rb_node *nd; 898 struct rb_node *nd;
800 struct hist_entry *pos, *pair; 899 struct hist_entry *pos, *pair;
801 900
802 for (nd = rb_first(&other->entries); nd; nd = rb_next(nd)) { 901 if (sort__need_collapse)
803 pos = rb_entry(nd, struct hist_entry, rb_node); 902 root = &other->entries_collapsed;
903 else
904 root = other->entries_in;
905
906 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
907 pos = rb_entry(nd, struct hist_entry, rb_node_in);
804 908
805 if (!hist_entry__has_pairs(pos)) { 909 if (!hist_entry__has_pairs(pos)) {
806 pair = hists__add_dummy_entry(leader, pos); 910 pair = hists__add_dummy_entry(leader, pos);
807 if (pair == NULL) 911 if (pair == NULL)
808 return -1; 912 return -1;
809 hist__entry_add_pair(pair, pos); 913 hist_entry__add_pair(pos, pair);
810 } 914 }
811 } 915 }
812 916
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 8b091a51e4a2..38624686ee9a 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -96,8 +96,10 @@ void hists__decay_entries_threaded(struct hists *hists, bool zap_user,
96 bool zap_kernel); 96 bool zap_kernel);
97void hists__output_recalc_col_len(struct hists *hists, int max_rows); 97void hists__output_recalc_col_len(struct hists *hists, int max_rows);
98 98
99void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h);
99void hists__inc_nr_events(struct hists *self, u32 type); 100void hists__inc_nr_events(struct hists *self, u32 type);
100size_t hists__fprintf_nr_events(struct hists *self, FILE *fp); 101void events_stats__inc(struct events_stats *stats, u32 type);
102size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
101 103
102size_t hists__fprintf(struct hists *self, bool show_header, int max_rows, 104size_t hists__fprintf(struct hists *self, bool show_header, int max_rows,
103 int max_cols, FILE *fp); 105 int max_cols, FILE *fp);
@@ -126,13 +128,19 @@ struct perf_hpp {
126}; 128};
127 129
128struct perf_hpp_fmt { 130struct perf_hpp_fmt {
129 bool cond;
130 int (*header)(struct perf_hpp *hpp); 131 int (*header)(struct perf_hpp *hpp);
131 int (*width)(struct perf_hpp *hpp); 132 int (*width)(struct perf_hpp *hpp);
132 int (*color)(struct perf_hpp *hpp, struct hist_entry *he); 133 int (*color)(struct perf_hpp *hpp, struct hist_entry *he);
133 int (*entry)(struct perf_hpp *hpp, struct hist_entry *he); 134 int (*entry)(struct perf_hpp *hpp, struct hist_entry *he);
135
136 struct list_head list;
134}; 137};
135 138
139extern struct list_head perf_hpp__list;
140
141#define perf_hpp__for_each_format(format) \
142 list_for_each_entry(format, &perf_hpp__list, list)
143
136extern struct perf_hpp_fmt perf_hpp__format[]; 144extern struct perf_hpp_fmt perf_hpp__format[];
137 145
138enum { 146enum {
@@ -148,14 +156,14 @@ enum {
148 PERF_HPP__DELTA, 156 PERF_HPP__DELTA,
149 PERF_HPP__RATIO, 157 PERF_HPP__RATIO,
150 PERF_HPP__WEIGHTED_DIFF, 158 PERF_HPP__WEIGHTED_DIFF,
151 PERF_HPP__DISPL,
152 PERF_HPP__FORMULA, 159 PERF_HPP__FORMULA,
153 160
154 PERF_HPP__MAX_INDEX 161 PERF_HPP__MAX_INDEX
155}; 162};
156 163
157void perf_hpp__init(void); 164void perf_hpp__init(void);
158void perf_hpp__column_enable(unsigned col, bool enable); 165void perf_hpp__column_register(struct perf_hpp_fmt *format);
166void perf_hpp__column_enable(unsigned col);
159int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he, 167int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he,
160 bool color); 168 bool color);
161 169
@@ -219,8 +227,10 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __maybe_unused,
219 227
220unsigned int hists__sort_list_width(struct hists *self); 228unsigned int hists__sort_list_width(struct hists *self);
221 229
222double perf_diff__compute_delta(struct hist_entry *he); 230double perf_diff__compute_delta(struct hist_entry *he, struct hist_entry *pair);
223double perf_diff__compute_ratio(struct hist_entry *he); 231double perf_diff__compute_ratio(struct hist_entry *he, struct hist_entry *pair);
224s64 perf_diff__compute_wdiff(struct hist_entry *he); 232s64 perf_diff__compute_wdiff(struct hist_entry *he, struct hist_entry *pair);
225int perf_diff__formula(char *buf, size_t size, struct hist_entry *he); 233int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair,
234 char *buf, size_t size);
235double perf_diff__period_percent(struct hist_entry *he, u64 period);
226#endif /* __PERF_HIST_H */ 236#endif /* __PERF_HIST_H */
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
index a55d8cf083c9..45cf10a562bd 100644
--- a/tools/perf/util/include/linux/bitops.h
+++ b/tools/perf/util/include/linux/bitops.h
@@ -14,6 +14,7 @@
14#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) 14#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
15#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) 15#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
16#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) 16#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
17#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE)
17 18
18#define for_each_set_bit(bit, addr, size) \ 19#define for_each_set_bit(bit, addr, size) \
19 for ((bit) = find_first_bit((addr), (size)); \ 20 for ((bit) = find_first_bit((addr), (size)); \
diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c
index 9d0740024ba8..11a8d86f7fea 100644
--- a/tools/perf/util/intlist.c
+++ b/tools/perf/util/intlist.c
@@ -59,16 +59,40 @@ void intlist__remove(struct intlist *ilist, struct int_node *node)
59 59
60struct int_node *intlist__find(struct intlist *ilist, int i) 60struct int_node *intlist__find(struct intlist *ilist, int i)
61{ 61{
62 struct int_node *node = NULL; 62 struct int_node *node;
63 struct rb_node *rb_node = rblist__find(&ilist->rblist, (void *)((long)i)); 63 struct rb_node *rb_node;
64 64
65 if (ilist == NULL)
66 return NULL;
67
68 node = NULL;
69 rb_node = rblist__find(&ilist->rblist, (void *)((long)i));
65 if (rb_node) 70 if (rb_node)
66 node = container_of(rb_node, struct int_node, rb_node); 71 node = container_of(rb_node, struct int_node, rb_node);
67 72
68 return node; 73 return node;
69} 74}
70 75
71struct intlist *intlist__new(void) 76static int intlist__parse_list(struct intlist *ilist, const char *s)
77{
78 char *sep;
79 int err;
80
81 do {
82 long value = strtol(s, &sep, 10);
83 err = -EINVAL;
84 if (*sep != ',' && *sep != '\0')
85 break;
86 err = intlist__add(ilist, value);
87 if (err)
88 break;
89 s = sep + 1;
90 } while (*sep != '\0');
91
92 return err;
93}
94
95struct intlist *intlist__new(const char *slist)
72{ 96{
73 struct intlist *ilist = malloc(sizeof(*ilist)); 97 struct intlist *ilist = malloc(sizeof(*ilist));
74 98
@@ -77,9 +101,15 @@ struct intlist *intlist__new(void)
77 ilist->rblist.node_cmp = intlist__node_cmp; 101 ilist->rblist.node_cmp = intlist__node_cmp;
78 ilist->rblist.node_new = intlist__node_new; 102 ilist->rblist.node_new = intlist__node_new;
79 ilist->rblist.node_delete = intlist__node_delete; 103 ilist->rblist.node_delete = intlist__node_delete;
104
105 if (slist && intlist__parse_list(ilist, slist))
106 goto out_delete;
80 } 107 }
81 108
82 return ilist; 109 return ilist;
110out_delete:
111 intlist__delete(ilist);
112 return NULL;
83} 113}
84 114
85void intlist__delete(struct intlist *ilist) 115void intlist__delete(struct intlist *ilist)
diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h
index 6d63ab90db50..62351dad848f 100644
--- a/tools/perf/util/intlist.h
+++ b/tools/perf/util/intlist.h
@@ -15,7 +15,7 @@ struct intlist {
15 struct rblist rblist; 15 struct rblist rblist;
16}; 16};
17 17
18struct intlist *intlist__new(void); 18struct intlist *intlist__new(const char *slist);
19void intlist__delete(struct intlist *ilist); 19void intlist__delete(struct intlist *ilist);
20 20
21void intlist__remove(struct intlist *ilist, struct int_node *in); 21void intlist__remove(struct intlist *ilist, struct int_node *in);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 1f09d0581e6b..efdb38e65a92 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1,10 +1,15 @@
1#include "callchain.h"
1#include "debug.h" 2#include "debug.h"
2#include "event.h" 3#include "event.h"
4#include "evsel.h"
5#include "hist.h"
3#include "machine.h" 6#include "machine.h"
4#include "map.h" 7#include "map.h"
8#include "sort.h"
5#include "strlist.h" 9#include "strlist.h"
6#include "thread.h" 10#include "thread.h"
7#include <stdbool.h> 11#include <stdbool.h>
12#include "unwind.h"
8 13
9int machine__init(struct machine *machine, const char *root_dir, pid_t pid) 14int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
10{ 15{
@@ -48,6 +53,29 @@ static void dsos__delete(struct list_head *dsos)
48 } 53 }
49} 54}
50 55
56void machine__delete_dead_threads(struct machine *machine)
57{
58 struct thread *n, *t;
59
60 list_for_each_entry_safe(t, n, &machine->dead_threads, node) {
61 list_del(&t->node);
62 thread__delete(t);
63 }
64}
65
66void machine__delete_threads(struct machine *machine)
67{
68 struct rb_node *nd = rb_first(&machine->threads);
69
70 while (nd) {
71 struct thread *t = rb_entry(nd, struct thread, rb_node);
72
73 rb_erase(&t->rb_node, &machine->threads);
74 nd = rb_next(nd);
75 thread__delete(t);
76 }
77}
78
51void machine__exit(struct machine *machine) 79void machine__exit(struct machine *machine)
52{ 80{
53 map_groups__exit(&machine->kmaps); 81 map_groups__exit(&machine->kmaps);
@@ -63,10 +91,22 @@ void machine__delete(struct machine *machine)
63 free(machine); 91 free(machine);
64} 92}
65 93
66struct machine *machines__add(struct rb_root *machines, pid_t pid, 94void machines__init(struct machines *machines)
95{
96 machine__init(&machines->host, "", HOST_KERNEL_ID);
97 machines->guests = RB_ROOT;
98}
99
100void machines__exit(struct machines *machines)
101{
102 machine__exit(&machines->host);
103 /* XXX exit guest */
104}
105
106struct machine *machines__add(struct machines *machines, pid_t pid,
67 const char *root_dir) 107 const char *root_dir)
68{ 108{
69 struct rb_node **p = &machines->rb_node; 109 struct rb_node **p = &machines->guests.rb_node;
70 struct rb_node *parent = NULL; 110 struct rb_node *parent = NULL;
71 struct machine *pos, *machine = malloc(sizeof(*machine)); 111 struct machine *pos, *machine = malloc(sizeof(*machine));
72 112
@@ -88,18 +128,21 @@ struct machine *machines__add(struct rb_root *machines, pid_t pid,
88 } 128 }
89 129
90 rb_link_node(&machine->rb_node, parent, p); 130 rb_link_node(&machine->rb_node, parent, p);
91 rb_insert_color(&machine->rb_node, machines); 131 rb_insert_color(&machine->rb_node, &machines->guests);
92 132
93 return machine; 133 return machine;
94} 134}
95 135
96struct machine *machines__find(struct rb_root *machines, pid_t pid) 136struct machine *machines__find(struct machines *machines, pid_t pid)
97{ 137{
98 struct rb_node **p = &machines->rb_node; 138 struct rb_node **p = &machines->guests.rb_node;
99 struct rb_node *parent = NULL; 139 struct rb_node *parent = NULL;
100 struct machine *machine; 140 struct machine *machine;
101 struct machine *default_machine = NULL; 141 struct machine *default_machine = NULL;
102 142
143 if (pid == HOST_KERNEL_ID)
144 return &machines->host;
145
103 while (*p != NULL) { 146 while (*p != NULL) {
104 parent = *p; 147 parent = *p;
105 machine = rb_entry(parent, struct machine, rb_node); 148 machine = rb_entry(parent, struct machine, rb_node);
@@ -116,7 +159,7 @@ struct machine *machines__find(struct rb_root *machines, pid_t pid)
116 return default_machine; 159 return default_machine;
117} 160}
118 161
119struct machine *machines__findnew(struct rb_root *machines, pid_t pid) 162struct machine *machines__findnew(struct machines *machines, pid_t pid)
120{ 163{
121 char path[PATH_MAX]; 164 char path[PATH_MAX];
122 const char *root_dir = ""; 165 const char *root_dir = "";
@@ -150,12 +193,12 @@ out:
150 return machine; 193 return machine;
151} 194}
152 195
153void machines__process(struct rb_root *machines, 196void machines__process_guests(struct machines *machines,
154 machine__process_t process, void *data) 197 machine__process_t process, void *data)
155{ 198{
156 struct rb_node *nd; 199 struct rb_node *nd;
157 200
158 for (nd = rb_first(machines); nd; nd = rb_next(nd)) { 201 for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
159 struct machine *pos = rb_entry(nd, struct machine, rb_node); 202 struct machine *pos = rb_entry(nd, struct machine, rb_node);
160 process(pos, data); 203 process(pos, data);
161 } 204 }
@@ -175,12 +218,14 @@ char *machine__mmap_name(struct machine *machine, char *bf, size_t size)
175 return bf; 218 return bf;
176} 219}
177 220
178void machines__set_id_hdr_size(struct rb_root *machines, u16 id_hdr_size) 221void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
179{ 222{
180 struct rb_node *node; 223 struct rb_node *node;
181 struct machine *machine; 224 struct machine *machine;
182 225
183 for (node = rb_first(machines); node; node = rb_next(node)) { 226 machines->host.id_hdr_size = id_hdr_size;
227
228 for (node = rb_first(&machines->guests); node; node = rb_next(node)) {
184 machine = rb_entry(node, struct machine, rb_node); 229 machine = rb_entry(node, struct machine, rb_node);
185 machine->id_hdr_size = id_hdr_size; 230 machine->id_hdr_size = id_hdr_size;
186 } 231 }
@@ -264,6 +309,537 @@ int machine__process_lost_event(struct machine *machine __maybe_unused,
264 return 0; 309 return 0;
265} 310}
266 311
312struct map *machine__new_module(struct machine *machine, u64 start,
313 const char *filename)
314{
315 struct map *map;
316 struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename);
317
318 if (dso == NULL)
319 return NULL;
320
321 map = map__new2(start, dso, MAP__FUNCTION);
322 if (map == NULL)
323 return NULL;
324
325 if (machine__is_host(machine))
326 dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
327 else
328 dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
329 map_groups__insert(&machine->kmaps, map);
330 return map;
331}
332
333size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
334{
335 struct rb_node *nd;
336 size_t ret = __dsos__fprintf(&machines->host.kernel_dsos, fp) +
337 __dsos__fprintf(&machines->host.user_dsos, fp);
338
339 for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
340 struct machine *pos = rb_entry(nd, struct machine, rb_node);
341 ret += __dsos__fprintf(&pos->kernel_dsos, fp);
342 ret += __dsos__fprintf(&pos->user_dsos, fp);
343 }
344
345 return ret;
346}
347
348size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
349 bool (skip)(struct dso *dso, int parm), int parm)
350{
351 return __dsos__fprintf_buildid(&machine->kernel_dsos, fp, skip, parm) +
352 __dsos__fprintf_buildid(&machine->user_dsos, fp, skip, parm);
353}
354
355size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
356 bool (skip)(struct dso *dso, int parm), int parm)
357{
358 struct rb_node *nd;
359 size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm);
360
361 for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
362 struct machine *pos = rb_entry(nd, struct machine, rb_node);
363 ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm);
364 }
365 return ret;
366}
367
368size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
369{
370 int i;
371 size_t printed = 0;
372 struct dso *kdso = machine->vmlinux_maps[MAP__FUNCTION]->dso;
373
374 if (kdso->has_build_id) {
375 char filename[PATH_MAX];
376 if (dso__build_id_filename(kdso, filename, sizeof(filename)))
377 printed += fprintf(fp, "[0] %s\n", filename);
378 }
379
380 for (i = 0; i < vmlinux_path__nr_entries; ++i)
381 printed += fprintf(fp, "[%d] %s\n",
382 i + kdso->has_build_id, vmlinux_path[i]);
383
384 return printed;
385}
386
387size_t machine__fprintf(struct machine *machine, FILE *fp)
388{
389 size_t ret = 0;
390 struct rb_node *nd;
391
392 for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
393 struct thread *pos = rb_entry(nd, struct thread, rb_node);
394
395 ret += thread__fprintf(pos, fp);
396 }
397
398 return ret;
399}
400
401static struct dso *machine__get_kernel(struct machine *machine)
402{
403 const char *vmlinux_name = NULL;
404 struct dso *kernel;
405
406 if (machine__is_host(machine)) {
407 vmlinux_name = symbol_conf.vmlinux_name;
408 if (!vmlinux_name)
409 vmlinux_name = "[kernel.kallsyms]";
410
411 kernel = dso__kernel_findnew(machine, vmlinux_name,
412 "[kernel]",
413 DSO_TYPE_KERNEL);
414 } else {
415 char bf[PATH_MAX];
416
417 if (machine__is_default_guest(machine))
418 vmlinux_name = symbol_conf.default_guest_vmlinux_name;
419 if (!vmlinux_name)
420 vmlinux_name = machine__mmap_name(machine, bf,
421 sizeof(bf));
422
423 kernel = dso__kernel_findnew(machine, vmlinux_name,
424 "[guest.kernel]",
425 DSO_TYPE_GUEST_KERNEL);
426 }
427
428 if (kernel != NULL && (!kernel->has_build_id))
429 dso__read_running_kernel_build_id(kernel, machine);
430
431 return kernel;
432}
433
434struct process_args {
435 u64 start;
436};
437
438static int symbol__in_kernel(void *arg, const char *name,
439 char type __maybe_unused, u64 start)
440{
441 struct process_args *args = arg;
442
443 if (strchr(name, '['))
444 return 0;
445
446 args->start = start;
447 return 1;
448}
449
450/* Figure out the start address of kernel map from /proc/kallsyms */
451static u64 machine__get_kernel_start_addr(struct machine *machine)
452{
453 const char *filename;
454 char path[PATH_MAX];
455 struct process_args args;
456
457 if (machine__is_host(machine)) {
458 filename = "/proc/kallsyms";
459 } else {
460 if (machine__is_default_guest(machine))
461 filename = (char *)symbol_conf.default_guest_kallsyms;
462 else {
463 sprintf(path, "%s/proc/kallsyms", machine->root_dir);
464 filename = path;
465 }
466 }
467
468 if (symbol__restricted_filename(filename, "/proc/kallsyms"))
469 return 0;
470
471 if (kallsyms__parse(filename, &args, symbol__in_kernel) <= 0)
472 return 0;
473
474 return args.start;
475}
476
477int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
478{
479 enum map_type type;
480 u64 start = machine__get_kernel_start_addr(machine);
481
482 for (type = 0; type < MAP__NR_TYPES; ++type) {
483 struct kmap *kmap;
484
485 machine->vmlinux_maps[type] = map__new2(start, kernel, type);
486 if (machine->vmlinux_maps[type] == NULL)
487 return -1;
488
489 machine->vmlinux_maps[type]->map_ip =
490 machine->vmlinux_maps[type]->unmap_ip =
491 identity__map_ip;
492 kmap = map__kmap(machine->vmlinux_maps[type]);
493 kmap->kmaps = &machine->kmaps;
494 map_groups__insert(&machine->kmaps,
495 machine->vmlinux_maps[type]);
496 }
497
498 return 0;
499}
500
501void machine__destroy_kernel_maps(struct machine *machine)
502{
503 enum map_type type;
504
505 for (type = 0; type < MAP__NR_TYPES; ++type) {
506 struct kmap *kmap;
507
508 if (machine->vmlinux_maps[type] == NULL)
509 continue;
510
511 kmap = map__kmap(machine->vmlinux_maps[type]);
512 map_groups__remove(&machine->kmaps,
513 machine->vmlinux_maps[type]);
514 if (kmap->ref_reloc_sym) {
515 /*
516 * ref_reloc_sym is shared among all maps, so free just
517 * on one of them.
518 */
519 if (type == MAP__FUNCTION) {
520 free((char *)kmap->ref_reloc_sym->name);
521 kmap->ref_reloc_sym->name = NULL;
522 free(kmap->ref_reloc_sym);
523 }
524 kmap->ref_reloc_sym = NULL;
525 }
526
527 map__delete(machine->vmlinux_maps[type]);
528 machine->vmlinux_maps[type] = NULL;
529 }
530}
531
532int machines__create_guest_kernel_maps(struct machines *machines)
533{
534 int ret = 0;
535 struct dirent **namelist = NULL;
536 int i, items = 0;
537 char path[PATH_MAX];
538 pid_t pid;
539 char *endp;
540
541 if (symbol_conf.default_guest_vmlinux_name ||
542 symbol_conf.default_guest_modules ||
543 symbol_conf.default_guest_kallsyms) {
544 machines__create_kernel_maps(machines, DEFAULT_GUEST_KERNEL_ID);
545 }
546
547 if (symbol_conf.guestmount) {
548 items = scandir(symbol_conf.guestmount, &namelist, NULL, NULL);
549 if (items <= 0)
550 return -ENOENT;
551 for (i = 0; i < items; i++) {
552 if (!isdigit(namelist[i]->d_name[0])) {
553 /* Filter out . and .. */
554 continue;
555 }
556 pid = (pid_t)strtol(namelist[i]->d_name, &endp, 10);
557 if ((*endp != '\0') ||
558 (endp == namelist[i]->d_name) ||
559 (errno == ERANGE)) {
560 pr_debug("invalid directory (%s). Skipping.\n",
561 namelist[i]->d_name);
562 continue;
563 }
564 sprintf(path, "%s/%s/proc/kallsyms",
565 symbol_conf.guestmount,
566 namelist[i]->d_name);
567 ret = access(path, R_OK);
568 if (ret) {
569 pr_debug("Can't access file %s\n", path);
570 goto failure;
571 }
572 machines__create_kernel_maps(machines, pid);
573 }
574failure:
575 free(namelist);
576 }
577
578 return ret;
579}
580
581void machines__destroy_kernel_maps(struct machines *machines)
582{
583 struct rb_node *next = rb_first(&machines->guests);
584
585 machine__destroy_kernel_maps(&machines->host);
586
587 while (next) {
588 struct machine *pos = rb_entry(next, struct machine, rb_node);
589
590 next = rb_next(&pos->rb_node);
591 rb_erase(&pos->rb_node, &machines->guests);
592 machine__delete(pos);
593 }
594}
595
596int machines__create_kernel_maps(struct machines *machines, pid_t pid)
597{
598 struct machine *machine = machines__findnew(machines, pid);
599
600 if (machine == NULL)
601 return -1;
602
603 return machine__create_kernel_maps(machine);
604}
605
606int machine__load_kallsyms(struct machine *machine, const char *filename,
607 enum map_type type, symbol_filter_t filter)
608{
609 struct map *map = machine->vmlinux_maps[type];
610 int ret = dso__load_kallsyms(map->dso, filename, map, filter);
611
612 if (ret > 0) {
613 dso__set_loaded(map->dso, type);
614 /*
615 * Since /proc/kallsyms will have multiple sessions for the
616 * kernel, with modules between them, fixup the end of all
617 * sections.
618 */
619 __map_groups__fixup_end(&machine->kmaps, type);
620 }
621
622 return ret;
623}
624
625int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
626 symbol_filter_t filter)
627{
628 struct map *map = machine->vmlinux_maps[type];
629 int ret = dso__load_vmlinux_path(map->dso, map, filter);
630
631 if (ret > 0) {
632 dso__set_loaded(map->dso, type);
633 map__reloc_vmlinux(map);
634 }
635
636 return ret;
637}
638
639static void map_groups__fixup_end(struct map_groups *mg)
640{
641 int i;
642 for (i = 0; i < MAP__NR_TYPES; ++i)
643 __map_groups__fixup_end(mg, i);
644}
645
646static char *get_kernel_version(const char *root_dir)
647{
648 char version[PATH_MAX];
649 FILE *file;
650 char *name, *tmp;
651 const char *prefix = "Linux version ";
652
653 sprintf(version, "%s/proc/version", root_dir);
654 file = fopen(version, "r");
655 if (!file)
656 return NULL;
657
658 version[0] = '\0';
659 tmp = fgets(version, sizeof(version), file);
660 fclose(file);
661
662 name = strstr(version, prefix);
663 if (!name)
664 return NULL;
665 name += strlen(prefix);
666 tmp = strchr(name, ' ');
667 if (tmp)
668 *tmp = '\0';
669
670 return strdup(name);
671}
672
673static int map_groups__set_modules_path_dir(struct map_groups *mg,
674 const char *dir_name)
675{
676 struct dirent *dent;
677 DIR *dir = opendir(dir_name);
678 int ret = 0;
679
680 if (!dir) {
681 pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
682 return -1;
683 }
684
685 while ((dent = readdir(dir)) != NULL) {
686 char path[PATH_MAX];
687 struct stat st;
688
689 /*sshfs might return bad dent->d_type, so we have to stat*/
690 snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
691 if (stat(path, &st))
692 continue;
693
694 if (S_ISDIR(st.st_mode)) {
695 if (!strcmp(dent->d_name, ".") ||
696 !strcmp(dent->d_name, ".."))
697 continue;
698
699 ret = map_groups__set_modules_path_dir(mg, path);
700 if (ret < 0)
701 goto out;
702 } else {
703 char *dot = strrchr(dent->d_name, '.'),
704 dso_name[PATH_MAX];
705 struct map *map;
706 char *long_name;
707
708 if (dot == NULL || strcmp(dot, ".ko"))
709 continue;
710 snprintf(dso_name, sizeof(dso_name), "[%.*s]",
711 (int)(dot - dent->d_name), dent->d_name);
712
713 strxfrchar(dso_name, '-', '_');
714 map = map_groups__find_by_name(mg, MAP__FUNCTION,
715 dso_name);
716 if (map == NULL)
717 continue;
718
719 long_name = strdup(path);
720 if (long_name == NULL) {
721 ret = -1;
722 goto out;
723 }
724 dso__set_long_name(map->dso, long_name);
725 map->dso->lname_alloc = 1;
726 dso__kernel_module_get_build_id(map->dso, "");
727 }
728 }
729
730out:
731 closedir(dir);
732 return ret;
733}
734
735static int machine__set_modules_path(struct machine *machine)
736{
737 char *version;
738 char modules_path[PATH_MAX];
739
740 version = get_kernel_version(machine->root_dir);
741 if (!version)
742 return -1;
743
744 snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s/kernel",
745 machine->root_dir, version);
746 free(version);
747
748 return map_groups__set_modules_path_dir(&machine->kmaps, modules_path);
749}
750
751static int machine__create_modules(struct machine *machine)
752{
753 char *line = NULL;
754 size_t n;
755 FILE *file;
756 struct map *map;
757 const char *modules;
758 char path[PATH_MAX];
759
760 if (machine__is_default_guest(machine))
761 modules = symbol_conf.default_guest_modules;
762 else {
763 sprintf(path, "%s/proc/modules", machine->root_dir);
764 modules = path;
765 }
766
767 if (symbol__restricted_filename(path, "/proc/modules"))
768 return -1;
769
770 file = fopen(modules, "r");
771 if (file == NULL)
772 return -1;
773
774 while (!feof(file)) {
775 char name[PATH_MAX];
776 u64 start;
777 char *sep;
778 int line_len;
779
780 line_len = getline(&line, &n, file);
781 if (line_len < 0)
782 break;
783
784 if (!line)
785 goto out_failure;
786
787 line[--line_len] = '\0'; /* \n */
788
789 sep = strrchr(line, 'x');
790 if (sep == NULL)
791 continue;
792
793 hex2u64(sep + 1, &start);
794
795 sep = strchr(line, ' ');
796 if (sep == NULL)
797 continue;
798
799 *sep = '\0';
800
801 snprintf(name, sizeof(name), "[%s]", line);
802 map = machine__new_module(machine, start, name);
803 if (map == NULL)
804 goto out_delete_line;
805 dso__kernel_module_get_build_id(map->dso, machine->root_dir);
806 }
807
808 free(line);
809 fclose(file);
810
811 return machine__set_modules_path(machine);
812
813out_delete_line:
814 free(line);
815out_failure:
816 return -1;
817}
818
819int machine__create_kernel_maps(struct machine *machine)
820{
821 struct dso *kernel = machine__get_kernel(machine);
822
823 if (kernel == NULL ||
824 __machine__create_kernel_maps(machine, kernel) < 0)
825 return -1;
826
827 if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
828 if (machine__is_host(machine))
829 pr_debug("Problems creating module maps, "
830 "continuing anyway...\n");
831 else
832 pr_debug("Problems creating module maps for guest %d, "
833 "continuing anyway...\n", machine->pid);
834 }
835
836 /*
837 * Now that we have all the maps created, just set the ->end of them:
838 */
839 map_groups__fixup_end(&machine->kmaps);
840 return 0;
841}
842
267static void machine__set_kernel_mmap_len(struct machine *machine, 843static void machine__set_kernel_mmap_len(struct machine *machine,
268 union perf_event *event) 844 union perf_event *event)
269{ 845{
@@ -462,3 +1038,189 @@ int machine__process_event(struct machine *machine, union perf_event *event)
462 1038
463 return ret; 1039 return ret;
464} 1040}
1041
1042void machine__remove_thread(struct machine *machine, struct thread *th)
1043{
1044 machine->last_match = NULL;
1045 rb_erase(&th->rb_node, &machine->threads);
1046 /*
1047 * We may have references to this thread, for instance in some hist_entry
1048 * instances, so just move them to a separate list.
1049 */
1050 list_add_tail(&th->node, &machine->dead_threads);
1051}
1052
1053static bool symbol__match_parent_regex(struct symbol *sym)
1054{
1055 if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
1056 return 1;
1057
1058 return 0;
1059}
1060
1061static const u8 cpumodes[] = {
1062 PERF_RECORD_MISC_USER,
1063 PERF_RECORD_MISC_KERNEL,
1064 PERF_RECORD_MISC_GUEST_USER,
1065 PERF_RECORD_MISC_GUEST_KERNEL
1066};
1067#define NCPUMODES (sizeof(cpumodes)/sizeof(u8))
1068
1069static void ip__resolve_ams(struct machine *machine, struct thread *thread,
1070 struct addr_map_symbol *ams,
1071 u64 ip)
1072{
1073 struct addr_location al;
1074 size_t i;
1075 u8 m;
1076
1077 memset(&al, 0, sizeof(al));
1078
1079 for (i = 0; i < NCPUMODES; i++) {
1080 m = cpumodes[i];
1081 /*
1082 * We cannot use the header.misc hint to determine whether a
1083 * branch stack address is user, kernel, guest, hypervisor.
1084 * Branches may straddle the kernel/user/hypervisor boundaries.
1085 * Thus, we have to try consecutively until we find a match
1086 * or else, the symbol is unknown
1087 */
1088 thread__find_addr_location(thread, machine, m, MAP__FUNCTION,
1089 ip, &al, NULL);
1090 if (al.sym)
1091 goto found;
1092 }
1093found:
1094 ams->addr = ip;
1095 ams->al_addr = al.addr;
1096 ams->sym = al.sym;
1097 ams->map = al.map;
1098}
1099
1100struct branch_info *machine__resolve_bstack(struct machine *machine,
1101 struct thread *thr,
1102 struct branch_stack *bs)
1103{
1104 struct branch_info *bi;
1105 unsigned int i;
1106
1107 bi = calloc(bs->nr, sizeof(struct branch_info));
1108 if (!bi)
1109 return NULL;
1110
1111 for (i = 0; i < bs->nr; i++) {
1112 ip__resolve_ams(machine, thr, &bi[i].to, bs->entries[i].to);
1113 ip__resolve_ams(machine, thr, &bi[i].from, bs->entries[i].from);
1114 bi[i].flags = bs->entries[i].flags;
1115 }
1116 return bi;
1117}
1118
1119static int machine__resolve_callchain_sample(struct machine *machine,
1120 struct thread *thread,
1121 struct ip_callchain *chain,
1122 struct symbol **parent)
1123
1124{
1125 u8 cpumode = PERF_RECORD_MISC_USER;
1126 unsigned int i;
1127 int err;
1128
1129 callchain_cursor_reset(&callchain_cursor);
1130
1131 if (chain->nr > PERF_MAX_STACK_DEPTH) {
1132 pr_warning("corrupted callchain. skipping...\n");
1133 return 0;
1134 }
1135
1136 for (i = 0; i < chain->nr; i++) {
1137 u64 ip;
1138 struct addr_location al;
1139
1140 if (callchain_param.order == ORDER_CALLEE)
1141 ip = chain->ips[i];
1142 else
1143 ip = chain->ips[chain->nr - i - 1];
1144
1145 if (ip >= PERF_CONTEXT_MAX) {
1146 switch (ip) {
1147 case PERF_CONTEXT_HV:
1148 cpumode = PERF_RECORD_MISC_HYPERVISOR;
1149 break;
1150 case PERF_CONTEXT_KERNEL:
1151 cpumode = PERF_RECORD_MISC_KERNEL;
1152 break;
1153 case PERF_CONTEXT_USER:
1154 cpumode = PERF_RECORD_MISC_USER;
1155 break;
1156 default:
1157 pr_debug("invalid callchain context: "
1158 "%"PRId64"\n", (s64) ip);
1159 /*
1160 * It seems the callchain is corrupted.
1161 * Discard all.
1162 */
1163 callchain_cursor_reset(&callchain_cursor);
1164 return 0;
1165 }
1166 continue;
1167 }
1168
1169 al.filtered = false;
1170 thread__find_addr_location(thread, machine, cpumode,
1171 MAP__FUNCTION, ip, &al, NULL);
1172 if (al.sym != NULL) {
1173 if (sort__has_parent && !*parent &&
1174 symbol__match_parent_regex(al.sym))
1175 *parent = al.sym;
1176 if (!symbol_conf.use_callchain)
1177 break;
1178 }
1179
1180 err = callchain_cursor_append(&callchain_cursor,
1181 ip, al.map, al.sym);
1182 if (err)
1183 return err;
1184 }
1185
1186 return 0;
1187}
1188
1189static int unwind_entry(struct unwind_entry *entry, void *arg)
1190{
1191 struct callchain_cursor *cursor = arg;
1192 return callchain_cursor_append(cursor, entry->ip,
1193 entry->map, entry->sym);
1194}
1195
1196int machine__resolve_callchain(struct machine *machine,
1197 struct perf_evsel *evsel,
1198 struct thread *thread,
1199 struct perf_sample *sample,
1200 struct symbol **parent)
1201
1202{
1203 int ret;
1204
1205 callchain_cursor_reset(&callchain_cursor);
1206
1207 ret = machine__resolve_callchain_sample(machine, thread,
1208 sample->callchain, parent);
1209 if (ret)
1210 return ret;
1211
1212 /* Can we do dwarf post unwind? */
1213 if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) &&
1214 (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
1215 return 0;
1216
1217 /* Bail out if nothing was captured. */
1218 if ((!sample->user_regs.regs) ||
1219 (!sample->user_stack.size))
1220 return 0;
1221
1222 return unwind__get_entries(unwind_entry, &callchain_cursor, machine,
1223 thread, evsel->attr.sample_regs_user,
1224 sample);
1225
1226}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index b7cde7467d55..5ac5892f2326 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -47,23 +47,32 @@ int machine__process_event(struct machine *machine, union perf_event *event);
47 47
48typedef void (*machine__process_t)(struct machine *machine, void *data); 48typedef void (*machine__process_t)(struct machine *machine, void *data);
49 49
50void machines__process(struct rb_root *machines, 50struct machines {
51 machine__process_t process, void *data); 51 struct machine host;
52 struct rb_root guests;
53};
54
55void machines__init(struct machines *machines);
56void machines__exit(struct machines *machines);
52 57
53struct machine *machines__add(struct rb_root *machines, pid_t pid, 58void machines__process_guests(struct machines *machines,
59 machine__process_t process, void *data);
60
61struct machine *machines__add(struct machines *machines, pid_t pid,
54 const char *root_dir); 62 const char *root_dir);
55struct machine *machines__find_host(struct rb_root *machines); 63struct machine *machines__find_host(struct machines *machines);
56struct machine *machines__find(struct rb_root *machines, pid_t pid); 64struct machine *machines__find(struct machines *machines, pid_t pid);
57struct machine *machines__findnew(struct rb_root *machines, pid_t pid); 65struct machine *machines__findnew(struct machines *machines, pid_t pid);
58 66
59void machines__set_id_hdr_size(struct rb_root *machines, u16 id_hdr_size); 67void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
60char *machine__mmap_name(struct machine *machine, char *bf, size_t size); 68char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
61 69
62int machine__init(struct machine *machine, const char *root_dir, pid_t pid); 70int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
63void machine__exit(struct machine *machine); 71void machine__exit(struct machine *machine);
72void machine__delete_dead_threads(struct machine *machine);
73void machine__delete_threads(struct machine *machine);
64void machine__delete(struct machine *machine); 74void machine__delete(struct machine *machine);
65 75
66
67struct branch_info *machine__resolve_bstack(struct machine *machine, 76struct branch_info *machine__resolve_bstack(struct machine *machine,
68 struct thread *thread, 77 struct thread *thread,
69 struct branch_stack *bs); 78 struct branch_stack *bs);
@@ -129,19 +138,19 @@ int machine__load_kallsyms(struct machine *machine, const char *filename,
129int machine__load_vmlinux_path(struct machine *machine, enum map_type type, 138int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
130 symbol_filter_t filter); 139 symbol_filter_t filter);
131 140
132size_t machine__fprintf_dsos_buildid(struct machine *machine, 141size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
133 FILE *fp, bool with_hits); 142 bool (skip)(struct dso *dso, int parm), int parm);
134size_t machines__fprintf_dsos(struct rb_root *machines, FILE *fp); 143size_t machines__fprintf_dsos(struct machines *machines, FILE *fp);
135size_t machines__fprintf_dsos_buildid(struct rb_root *machines, 144size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
136 FILE *fp, bool with_hits); 145 bool (skip)(struct dso *dso, int parm), int parm);
137 146
138void machine__destroy_kernel_maps(struct machine *machine); 147void machine__destroy_kernel_maps(struct machine *machine);
139int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel); 148int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel);
140int machine__create_kernel_maps(struct machine *machine); 149int machine__create_kernel_maps(struct machine *machine);
141 150
142int machines__create_kernel_maps(struct rb_root *machines, pid_t pid); 151int machines__create_kernel_maps(struct machines *machines, pid_t pid);
143int machines__create_guest_kernel_maps(struct rb_root *machines); 152int machines__create_guest_kernel_maps(struct machines *machines);
144void machines__destroy_guest_kernel_maps(struct rb_root *machines); 153void machines__destroy_kernel_maps(struct machines *machines);
145 154
146size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); 155size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
147 156
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 0328d45c4f2a..6fcb9de62340 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -11,6 +11,7 @@
11#include "strlist.h" 11#include "strlist.h"
12#include "vdso.h" 12#include "vdso.h"
13#include "build-id.h" 13#include "build-id.h"
14#include <linux/string.h>
14 15
15const char *map_type__name[MAP__NR_TYPES] = { 16const char *map_type__name[MAP__NR_TYPES] = {
16 [MAP__FUNCTION] = "Functions", 17 [MAP__FUNCTION] = "Functions",
@@ -19,7 +20,8 @@ const char *map_type__name[MAP__NR_TYPES] = {
19 20
20static inline int is_anon_memory(const char *filename) 21static inline int is_anon_memory(const char *filename)
21{ 22{
22 return strcmp(filename, "//anon") == 0; 23 return !strcmp(filename, "//anon") ||
24 !strcmp(filename, "/anon_hugepage (deleted)");
23} 25}
24 26
25static inline int is_no_dso_memory(const char *filename) 27static inline int is_no_dso_memory(const char *filename)
@@ -28,29 +30,29 @@ static inline int is_no_dso_memory(const char *filename)
28 !strcmp(filename, "[heap]"); 30 !strcmp(filename, "[heap]");
29} 31}
30 32
31void map__init(struct map *self, enum map_type type, 33void map__init(struct map *map, enum map_type type,
32 u64 start, u64 end, u64 pgoff, struct dso *dso) 34 u64 start, u64 end, u64 pgoff, struct dso *dso)
33{ 35{
34 self->type = type; 36 map->type = type;
35 self->start = start; 37 map->start = start;
36 self->end = end; 38 map->end = end;
37 self->pgoff = pgoff; 39 map->pgoff = pgoff;
38 self->dso = dso; 40 map->dso = dso;
39 self->map_ip = map__map_ip; 41 map->map_ip = map__map_ip;
40 self->unmap_ip = map__unmap_ip; 42 map->unmap_ip = map__unmap_ip;
41 RB_CLEAR_NODE(&self->rb_node); 43 RB_CLEAR_NODE(&map->rb_node);
42 self->groups = NULL; 44 map->groups = NULL;
43 self->referenced = false; 45 map->referenced = false;
44 self->erange_warned = false; 46 map->erange_warned = false;
45} 47}
46 48
47struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, 49struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
48 u64 pgoff, u32 pid, char *filename, 50 u64 pgoff, u32 pid, char *filename,
49 enum map_type type) 51 enum map_type type)
50{ 52{
51 struct map *self = malloc(sizeof(*self)); 53 struct map *map = malloc(sizeof(*map));
52 54
53 if (self != NULL) { 55 if (map != NULL) {
54 char newfilename[PATH_MAX]; 56 char newfilename[PATH_MAX];
55 struct dso *dso; 57 struct dso *dso;
56 int anon, no_dso, vdso; 58 int anon, no_dso, vdso;
@@ -73,10 +75,10 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
73 if (dso == NULL) 75 if (dso == NULL)
74 goto out_delete; 76 goto out_delete;
75 77
76 map__init(self, type, start, start + len, pgoff, dso); 78 map__init(map, type, start, start + len, pgoff, dso);
77 79
78 if (anon || no_dso) { 80 if (anon || no_dso) {
79 self->map_ip = self->unmap_ip = identity__map_ip; 81 map->map_ip = map->unmap_ip = identity__map_ip;
80 82
81 /* 83 /*
82 * Set memory without DSO as loaded. All map__find_* 84 * Set memory without DSO as loaded. All map__find_*
@@ -84,12 +86,12 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
84 * unnecessary map__load warning. 86 * unnecessary map__load warning.
85 */ 87 */
86 if (no_dso) 88 if (no_dso)
87 dso__set_loaded(dso, self->type); 89 dso__set_loaded(dso, map->type);
88 } 90 }
89 } 91 }
90 return self; 92 return map;
91out_delete: 93out_delete:
92 free(self); 94 free(map);
93 return NULL; 95 return NULL;
94} 96}
95 97
@@ -112,48 +114,48 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
112 return map; 114 return map;
113} 115}
114 116
115void map__delete(struct map *self) 117void map__delete(struct map *map)
116{ 118{
117 free(self); 119 free(map);
118} 120}
119 121
120void map__fixup_start(struct map *self) 122void map__fixup_start(struct map *map)
121{ 123{
122 struct rb_root *symbols = &self->dso->symbols[self->type]; 124 struct rb_root *symbols = &map->dso->symbols[map->type];
123 struct rb_node *nd = rb_first(symbols); 125 struct rb_node *nd = rb_first(symbols);
124 if (nd != NULL) { 126 if (nd != NULL) {
125 struct symbol *sym = rb_entry(nd, struct symbol, rb_node); 127 struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
126 self->start = sym->start; 128 map->start = sym->start;
127 } 129 }
128} 130}
129 131
130void map__fixup_end(struct map *self) 132void map__fixup_end(struct map *map)
131{ 133{
132 struct rb_root *symbols = &self->dso->symbols[self->type]; 134 struct rb_root *symbols = &map->dso->symbols[map->type];
133 struct rb_node *nd = rb_last(symbols); 135 struct rb_node *nd = rb_last(symbols);
134 if (nd != NULL) { 136 if (nd != NULL) {
135 struct symbol *sym = rb_entry(nd, struct symbol, rb_node); 137 struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
136 self->end = sym->end; 138 map->end = sym->end;
137 } 139 }
138} 140}
139 141
140#define DSO__DELETED "(deleted)" 142#define DSO__DELETED "(deleted)"
141 143
142int map__load(struct map *self, symbol_filter_t filter) 144int map__load(struct map *map, symbol_filter_t filter)
143{ 145{
144 const char *name = self->dso->long_name; 146 const char *name = map->dso->long_name;
145 int nr; 147 int nr;
146 148
147 if (dso__loaded(self->dso, self->type)) 149 if (dso__loaded(map->dso, map->type))
148 return 0; 150 return 0;
149 151
150 nr = dso__load(self->dso, self, filter); 152 nr = dso__load(map->dso, map, filter);
151 if (nr < 0) { 153 if (nr < 0) {
152 if (self->dso->has_build_id) { 154 if (map->dso->has_build_id) {
153 char sbuild_id[BUILD_ID_SIZE * 2 + 1]; 155 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
154 156
155 build_id__sprintf(self->dso->build_id, 157 build_id__sprintf(map->dso->build_id,
156 sizeof(self->dso->build_id), 158 sizeof(map->dso->build_id),
157 sbuild_id); 159 sbuild_id);
158 pr_warning("%s with build id %s not found", 160 pr_warning("%s with build id %s not found",
159 name, sbuild_id); 161 name, sbuild_id);
@@ -183,43 +185,36 @@ int map__load(struct map *self, symbol_filter_t filter)
183 * Only applies to the kernel, as its symtabs aren't relative like the 185 * Only applies to the kernel, as its symtabs aren't relative like the
184 * module ones. 186 * module ones.
185 */ 187 */
186 if (self->dso->kernel) 188 if (map->dso->kernel)
187 map__reloc_vmlinux(self); 189 map__reloc_vmlinux(map);
188 190
189 return 0; 191 return 0;
190} 192}
191 193
192struct symbol *map__find_symbol(struct map *self, u64 addr, 194struct symbol *map__find_symbol(struct map *map, u64 addr,
193 symbol_filter_t filter) 195 symbol_filter_t filter)
194{ 196{
195 if (map__load(self, filter) < 0) 197 if (map__load(map, filter) < 0)
196 return NULL; 198 return NULL;
197 199
198 return dso__find_symbol(self->dso, self->type, addr); 200 return dso__find_symbol(map->dso, map->type, addr);
199} 201}
200 202
201struct symbol *map__find_symbol_by_name(struct map *self, const char *name, 203struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
202 symbol_filter_t filter) 204 symbol_filter_t filter)
203{ 205{
204 if (map__load(self, filter) < 0) 206 if (map__load(map, filter) < 0)
205 return NULL; 207 return NULL;
206 208
207 if (!dso__sorted_by_name(self->dso, self->type)) 209 if (!dso__sorted_by_name(map->dso, map->type))
208 dso__sort_by_name(self->dso, self->type); 210 dso__sort_by_name(map->dso, map->type);
209 211
210 return dso__find_symbol_by_name(self->dso, self->type, name); 212 return dso__find_symbol_by_name(map->dso, map->type, name);
211} 213}
212 214
213struct map *map__clone(struct map *self) 215struct map *map__clone(struct map *map)
214{ 216{
215 struct map *map = malloc(sizeof(*self)); 217 return memdup(map, sizeof(*map));
216
217 if (!map)
218 return NULL;
219
220 memcpy(map, self, sizeof(*self));
221
222 return map;
223} 218}
224 219
225int map__overlap(struct map *l, struct map *r) 220int map__overlap(struct map *l, struct map *r)
@@ -236,10 +231,10 @@ int map__overlap(struct map *l, struct map *r)
236 return 0; 231 return 0;
237} 232}
238 233
239size_t map__fprintf(struct map *self, FILE *fp) 234size_t map__fprintf(struct map *map, FILE *fp)
240{ 235{
241 return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n", 236 return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n",
242 self->start, self->end, self->pgoff, self->dso->name); 237 map->start, map->end, map->pgoff, map->dso->name);
243} 238}
244 239
245size_t map__fprintf_dsoname(struct map *map, FILE *fp) 240size_t map__fprintf_dsoname(struct map *map, FILE *fp)
@@ -527,9 +522,9 @@ static u64 map__reloc_unmap_ip(struct map *map, u64 ip)
527 return ip - (s64)map->pgoff; 522 return ip - (s64)map->pgoff;
528} 523}
529 524
530void map__reloc_vmlinux(struct map *self) 525void map__reloc_vmlinux(struct map *map)
531{ 526{
532 struct kmap *kmap = map__kmap(self); 527 struct kmap *kmap = map__kmap(map);
533 s64 reloc; 528 s64 reloc;
534 529
535 if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr) 530 if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr)
@@ -541,9 +536,9 @@ void map__reloc_vmlinux(struct map *self)
541 if (!reloc) 536 if (!reloc)
542 return; 537 return;
543 538
544 self->map_ip = map__reloc_map_ip; 539 map->map_ip = map__reloc_map_ip;
545 self->unmap_ip = map__reloc_unmap_ip; 540 map->unmap_ip = map__reloc_unmap_ip;
546 self->pgoff = reloc; 541 map->pgoff = reloc;
547} 542}
548 543
549void maps__insert(struct rb_root *maps, struct map *map) 544void maps__insert(struct rb_root *maps, struct map *map)
@@ -566,9 +561,9 @@ void maps__insert(struct rb_root *maps, struct map *map)
566 rb_insert_color(&map->rb_node, maps); 561 rb_insert_color(&map->rb_node, maps);
567} 562}
568 563
569void maps__remove(struct rb_root *self, struct map *map) 564void maps__remove(struct rb_root *maps, struct map *map)
570{ 565{
571 rb_erase(&map->rb_node, self); 566 rb_erase(&map->rb_node, maps);
572} 567}
573 568
574struct map *maps__find(struct rb_root *maps, u64 ip) 569struct map *maps__find(struct rb_root *maps, u64 ip)
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index bcb39e2a6965..a887f2c9dfbb 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -57,9 +57,9 @@ struct map_groups {
57 struct machine *machine; 57 struct machine *machine;
58}; 58};
59 59
60static inline struct kmap *map__kmap(struct map *self) 60static inline struct kmap *map__kmap(struct map *map)
61{ 61{
62 return (struct kmap *)(self + 1); 62 return (struct kmap *)(map + 1);
63} 63}
64 64
65static inline u64 map__map_ip(struct map *map, u64 ip) 65static inline u64 map__map_ip(struct map *map, u64 ip)
@@ -85,27 +85,27 @@ struct symbol;
85 85
86typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); 86typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
87 87
88void map__init(struct map *self, enum map_type type, 88void map__init(struct map *map, enum map_type type,
89 u64 start, u64 end, u64 pgoff, struct dso *dso); 89 u64 start, u64 end, u64 pgoff, struct dso *dso);
90struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, 90struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
91 u64 pgoff, u32 pid, char *filename, 91 u64 pgoff, u32 pid, char *filename,
92 enum map_type type); 92 enum map_type type);
93struct map *map__new2(u64 start, struct dso *dso, enum map_type type); 93struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
94void map__delete(struct map *self); 94void map__delete(struct map *map);
95struct map *map__clone(struct map *self); 95struct map *map__clone(struct map *map);
96int map__overlap(struct map *l, struct map *r); 96int map__overlap(struct map *l, struct map *r);
97size_t map__fprintf(struct map *self, FILE *fp); 97size_t map__fprintf(struct map *map, FILE *fp);
98size_t map__fprintf_dsoname(struct map *map, FILE *fp); 98size_t map__fprintf_dsoname(struct map *map, FILE *fp);
99 99
100int map__load(struct map *self, symbol_filter_t filter); 100int map__load(struct map *map, symbol_filter_t filter);
101struct symbol *map__find_symbol(struct map *self, 101struct symbol *map__find_symbol(struct map *map,
102 u64 addr, symbol_filter_t filter); 102 u64 addr, symbol_filter_t filter);
103struct symbol *map__find_symbol_by_name(struct map *self, const char *name, 103struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
104 symbol_filter_t filter); 104 symbol_filter_t filter);
105void map__fixup_start(struct map *self); 105void map__fixup_start(struct map *map);
106void map__fixup_end(struct map *self); 106void map__fixup_end(struct map *map);
107 107
108void map__reloc_vmlinux(struct map *self); 108void map__reloc_vmlinux(struct map *map);
109 109
110size_t __map_groups__fprintf_maps(struct map_groups *mg, 110size_t __map_groups__fprintf_maps(struct map_groups *mg,
111 enum map_type type, int verbose, FILE *fp); 111 enum map_type type, int verbose, FILE *fp);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 2d8d53bec17e..c84f48cf9678 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -380,8 +380,8 @@ static int add_tracepoint(struct list_head **listp, int *idx,
380 return 0; 380 return 0;
381} 381}
382 382
383static int add_tracepoint_multi(struct list_head **list, int *idx, 383static int add_tracepoint_multi_event(struct list_head **list, int *idx,
384 char *sys_name, char *evt_name) 384 char *sys_name, char *evt_name)
385{ 385{
386 char evt_path[MAXPATHLEN]; 386 char evt_path[MAXPATHLEN];
387 struct dirent *evt_ent; 387 struct dirent *evt_ent;
@@ -408,6 +408,47 @@ static int add_tracepoint_multi(struct list_head **list, int *idx,
408 ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name); 408 ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name);
409 } 409 }
410 410
411 closedir(evt_dir);
412 return ret;
413}
414
415static int add_tracepoint_event(struct list_head **list, int *idx,
416 char *sys_name, char *evt_name)
417{
418 return strpbrk(evt_name, "*?") ?
419 add_tracepoint_multi_event(list, idx, sys_name, evt_name) :
420 add_tracepoint(list, idx, sys_name, evt_name);
421}
422
423static int add_tracepoint_multi_sys(struct list_head **list, int *idx,
424 char *sys_name, char *evt_name)
425{
426 struct dirent *events_ent;
427 DIR *events_dir;
428 int ret = 0;
429
430 events_dir = opendir(tracing_events_path);
431 if (!events_dir) {
432 perror("Can't open event dir");
433 return -1;
434 }
435
436 while (!ret && (events_ent = readdir(events_dir))) {
437 if (!strcmp(events_ent->d_name, ".")
438 || !strcmp(events_ent->d_name, "..")
439 || !strcmp(events_ent->d_name, "enable")
440 || !strcmp(events_ent->d_name, "header_event")
441 || !strcmp(events_ent->d_name, "header_page"))
442 continue;
443
444 if (!strglobmatch(events_ent->d_name, sys_name))
445 continue;
446
447 ret = add_tracepoint_event(list, idx, events_ent->d_name,
448 evt_name);
449 }
450
451 closedir(events_dir);
411 return ret; 452 return ret;
412} 453}
413 454
@@ -420,9 +461,10 @@ int parse_events_add_tracepoint(struct list_head **list, int *idx,
420 if (ret) 461 if (ret)
421 return ret; 462 return ret;
422 463
423 return strpbrk(event, "*?") ? 464 if (strpbrk(sys, "*?"))
424 add_tracepoint_multi(list, idx, sys, event) : 465 return add_tracepoint_multi_sys(list, idx, sys, event);
425 add_tracepoint(list, idx, sys, event); 466 else
467 return add_tracepoint_event(list, idx, sys, event);
426} 468}
427 469
428static int 470static int
@@ -492,7 +534,7 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx,
492} 534}
493 535
494static int config_term(struct perf_event_attr *attr, 536static int config_term(struct perf_event_attr *attr,
495 struct parse_events__term *term) 537 struct parse_events_term *term)
496{ 538{
497#define CHECK_TYPE_VAL(type) \ 539#define CHECK_TYPE_VAL(type) \
498do { \ 540do { \
@@ -537,7 +579,7 @@ do { \
537static int config_attr(struct perf_event_attr *attr, 579static int config_attr(struct perf_event_attr *attr,
538 struct list_head *head, int fail) 580 struct list_head *head, int fail)
539{ 581{
540 struct parse_events__term *term; 582 struct parse_events_term *term;
541 583
542 list_for_each_entry(term, head, list) 584 list_for_each_entry(term, head, list)
543 if (config_term(attr, term) && fail) 585 if (config_term(attr, term) && fail)
@@ -563,14 +605,14 @@ int parse_events_add_numeric(struct list_head **list, int *idx,
563 return add_event(list, idx, &attr, NULL); 605 return add_event(list, idx, &attr, NULL);
564} 606}
565 607
566static int parse_events__is_name_term(struct parse_events__term *term) 608static int parse_events__is_name_term(struct parse_events_term *term)
567{ 609{
568 return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; 610 return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
569} 611}
570 612
571static char *pmu_event_name(struct list_head *head_terms) 613static char *pmu_event_name(struct list_head *head_terms)
572{ 614{
573 struct parse_events__term *term; 615 struct parse_events_term *term;
574 616
575 list_for_each_entry(term, head_terms, list) 617 list_for_each_entry(term, head_terms, list)
576 if (parse_events__is_name_term(term)) 618 if (parse_events__is_name_term(term))
@@ -657,14 +699,6 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
657 int exclude = eu | ek | eh; 699 int exclude = eu | ek | eh;
658 int exclude_GH = evsel ? evsel->exclude_GH : 0; 700 int exclude_GH = evsel ? evsel->exclude_GH : 0;
659 701
660 /*
661 * We are here for group and 'GH' was not set as event
662 * modifier and whatever event/group modifier override
663 * default 'GH' setup.
664 */
665 if (evsel && !exclude_GH)
666 eH = eG = 0;
667
668 memset(mod, 0, sizeof(*mod)); 702 memset(mod, 0, sizeof(*mod));
669 703
670 while (*str) { 704 while (*str) {
@@ -814,7 +848,7 @@ static int parse_events__scanner(const char *str, void *data, int start_token)
814 */ 848 */
815int parse_events_terms(struct list_head *terms, const char *str) 849int parse_events_terms(struct list_head *terms, const char *str)
816{ 850{
817 struct parse_events_data__terms data = { 851 struct parse_events_terms data = {
818 .terms = NULL, 852 .terms = NULL,
819 }; 853 };
820 int ret; 854 int ret;
@@ -830,10 +864,9 @@ int parse_events_terms(struct list_head *terms, const char *str)
830 return ret; 864 return ret;
831} 865}
832 866
833int parse_events(struct perf_evlist *evlist, const char *str, 867int parse_events(struct perf_evlist *evlist, const char *str)
834 int unset __maybe_unused)
835{ 868{
836 struct parse_events_data__events data = { 869 struct parse_events_evlist data = {
837 .list = LIST_HEAD_INIT(data.list), 870 .list = LIST_HEAD_INIT(data.list),
838 .idx = evlist->nr_entries, 871 .idx = evlist->nr_entries,
839 }; 872 };
@@ -843,6 +876,7 @@ int parse_events(struct perf_evlist *evlist, const char *str,
843 if (!ret) { 876 if (!ret) {
844 int entries = data.idx - evlist->nr_entries; 877 int entries = data.idx - evlist->nr_entries;
845 perf_evlist__splice_list_tail(evlist, &data.list, entries); 878 perf_evlist__splice_list_tail(evlist, &data.list, entries);
879 evlist->nr_groups += data.nr_groups;
846 return 0; 880 return 0;
847 } 881 }
848 882
@@ -858,7 +892,7 @@ int parse_events_option(const struct option *opt, const char *str,
858 int unset __maybe_unused) 892 int unset __maybe_unused)
859{ 893{
860 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; 894 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
861 int ret = parse_events(evlist, str, unset); 895 int ret = parse_events(evlist, str);
862 896
863 if (ret) { 897 if (ret) {
864 fprintf(stderr, "invalid or unsupported event: '%s'\n", str); 898 fprintf(stderr, "invalid or unsupported event: '%s'\n", str);
@@ -1121,16 +1155,16 @@ void print_events(const char *event_glob, bool name_only)
1121 print_tracepoint_events(NULL, NULL, name_only); 1155 print_tracepoint_events(NULL, NULL, name_only);
1122} 1156}
1123 1157
1124int parse_events__is_hardcoded_term(struct parse_events__term *term) 1158int parse_events__is_hardcoded_term(struct parse_events_term *term)
1125{ 1159{
1126 return term->type_term != PARSE_EVENTS__TERM_TYPE_USER; 1160 return term->type_term != PARSE_EVENTS__TERM_TYPE_USER;
1127} 1161}
1128 1162
1129static int new_term(struct parse_events__term **_term, int type_val, 1163static int new_term(struct parse_events_term **_term, int type_val,
1130 int type_term, char *config, 1164 int type_term, char *config,
1131 char *str, u64 num) 1165 char *str, u64 num)
1132{ 1166{
1133 struct parse_events__term *term; 1167 struct parse_events_term *term;
1134 1168
1135 term = zalloc(sizeof(*term)); 1169 term = zalloc(sizeof(*term));
1136 if (!term) 1170 if (!term)
@@ -1156,21 +1190,21 @@ static int new_term(struct parse_events__term **_term, int type_val,
1156 return 0; 1190 return 0;
1157} 1191}
1158 1192
1159int parse_events__term_num(struct parse_events__term **term, 1193int parse_events_term__num(struct parse_events_term **term,
1160 int type_term, char *config, u64 num) 1194 int type_term, char *config, u64 num)
1161{ 1195{
1162 return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term, 1196 return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term,
1163 config, NULL, num); 1197 config, NULL, num);
1164} 1198}
1165 1199
1166int parse_events__term_str(struct parse_events__term **term, 1200int parse_events_term__str(struct parse_events_term **term,
1167 int type_term, char *config, char *str) 1201 int type_term, char *config, char *str)
1168{ 1202{
1169 return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term, 1203 return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term,
1170 config, str, 0); 1204 config, str, 0);
1171} 1205}
1172 1206
1173int parse_events__term_sym_hw(struct parse_events__term **term, 1207int parse_events_term__sym_hw(struct parse_events_term **term,
1174 char *config, unsigned idx) 1208 char *config, unsigned idx)
1175{ 1209{
1176 struct event_symbol *sym; 1210 struct event_symbol *sym;
@@ -1188,8 +1222,8 @@ int parse_events__term_sym_hw(struct parse_events__term **term,
1188 (char *) "event", (char *) sym->symbol, 0); 1222 (char *) "event", (char *) sym->symbol, 0);
1189} 1223}
1190 1224
1191int parse_events__term_clone(struct parse_events__term **new, 1225int parse_events_term__clone(struct parse_events_term **new,
1192 struct parse_events__term *term) 1226 struct parse_events_term *term)
1193{ 1227{
1194 return new_term(new, term->type_val, term->type_term, term->config, 1228 return new_term(new, term->type_val, term->type_term, term->config,
1195 term->val.str, term->val.num); 1229 term->val.str, term->val.num);
@@ -1197,7 +1231,7 @@ int parse_events__term_clone(struct parse_events__term **new,
1197 1231
1198void parse_events__free_terms(struct list_head *terms) 1232void parse_events__free_terms(struct list_head *terms)
1199{ 1233{
1200 struct parse_events__term *term, *h; 1234 struct parse_events_term *term, *h;
1201 1235
1202 list_for_each_entry_safe(term, h, terms, list) 1236 list_for_each_entry_safe(term, h, terms, list)
1203 free(term); 1237 free(term);
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index b7af80b8bdda..8a4859315fd9 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -29,8 +29,7 @@ const char *event_type(int type);
29 29
30extern int parse_events_option(const struct option *opt, const char *str, 30extern int parse_events_option(const struct option *opt, const char *str,
31 int unset); 31 int unset);
32extern int parse_events(struct perf_evlist *evlist, const char *str, 32extern int parse_events(struct perf_evlist *evlist, const char *str);
33 int unset);
34extern int parse_events_terms(struct list_head *terms, const char *str); 33extern int parse_events_terms(struct list_head *terms, const char *str);
35extern int parse_filter(const struct option *opt, const char *str, int unset); 34extern int parse_filter(const struct option *opt, const char *str, int unset);
36 35
@@ -51,7 +50,7 @@ enum {
51 PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE, 50 PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
52}; 51};
53 52
54struct parse_events__term { 53struct parse_events_term {
55 char *config; 54 char *config;
56 union { 55 union {
57 char *str; 56 char *str;
@@ -62,24 +61,25 @@ struct parse_events__term {
62 struct list_head list; 61 struct list_head list;
63}; 62};
64 63
65struct parse_events_data__events { 64struct parse_events_evlist {
66 struct list_head list; 65 struct list_head list;
67 int idx; 66 int idx;
67 int nr_groups;
68}; 68};
69 69
70struct parse_events_data__terms { 70struct parse_events_terms {
71 struct list_head *terms; 71 struct list_head *terms;
72}; 72};
73 73
74int parse_events__is_hardcoded_term(struct parse_events__term *term); 74int parse_events__is_hardcoded_term(struct parse_events_term *term);
75int parse_events__term_num(struct parse_events__term **_term, 75int parse_events_term__num(struct parse_events_term **_term,
76 int type_term, char *config, u64 num); 76 int type_term, char *config, u64 num);
77int parse_events__term_str(struct parse_events__term **_term, 77int parse_events_term__str(struct parse_events_term **_term,
78 int type_term, char *config, char *str); 78 int type_term, char *config, char *str);
79int parse_events__term_sym_hw(struct parse_events__term **term, 79int parse_events_term__sym_hw(struct parse_events_term **term,
80 char *config, unsigned idx); 80 char *config, unsigned idx);
81int parse_events__term_clone(struct parse_events__term **new, 81int parse_events_term__clone(struct parse_events_term **new,
82 struct parse_events__term *term); 82 struct parse_events_term *term);
83void parse_events__free_terms(struct list_head *terms); 83void parse_events__free_terms(struct list_head *terms);
84int parse_events__modifier_event(struct list_head *list, char *str, bool add); 84int parse_events__modifier_event(struct list_head *list, char *str, bool add);
85int parse_events__modifier_group(struct list_head *list, char *event_mod); 85int parse_events__modifier_group(struct list_head *list, char *event_mod);
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 0f9914ae6bac..afc44c18dfe1 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -1,5 +1,4 @@
1%pure-parser 1%pure-parser
2%name-prefix "parse_events_"
3%parse-param {void *_data} 2%parse-param {void *_data}
4%parse-param {void *scanner} 3%parse-param {void *scanner}
5%lex-param {void* scanner} 4%lex-param {void* scanner}
@@ -23,6 +22,14 @@ do { \
23 YYABORT; \ 22 YYABORT; \
24} while (0) 23} while (0)
25 24
25static inc_group_count(struct list_head *list,
26 struct parse_events_evlist *data)
27{
28 /* Count groups only have more than 1 members */
29 if (!list_is_last(list->next, list))
30 data->nr_groups++;
31}
32
26%} 33%}
27 34
28%token PE_START_EVENTS PE_START_TERMS 35%token PE_START_EVENTS PE_START_TERMS
@@ -68,7 +75,7 @@ do { \
68 char *str; 75 char *str;
69 u64 num; 76 u64 num;
70 struct list_head *head; 77 struct list_head *head;
71 struct parse_events__term *term; 78 struct parse_events_term *term;
72} 79}
73%% 80%%
74 81
@@ -79,7 +86,7 @@ PE_START_TERMS start_terms
79 86
80start_events: groups 87start_events: groups
81{ 88{
82 struct parse_events_data__events *data = _data; 89 struct parse_events_evlist *data = _data;
83 90
84 parse_events_update_lists($1, &data->list); 91 parse_events_update_lists($1, &data->list);
85} 92}
@@ -123,6 +130,7 @@ PE_NAME '{' events '}'
123{ 130{
124 struct list_head *list = $3; 131 struct list_head *list = $3;
125 132
133 inc_group_count(list, _data);
126 parse_events__set_leader($1, list); 134 parse_events__set_leader($1, list);
127 $$ = list; 135 $$ = list;
128} 136}
@@ -131,6 +139,7 @@ PE_NAME '{' events '}'
131{ 139{
132 struct list_head *list = $2; 140 struct list_head *list = $2;
133 141
142 inc_group_count(list, _data);
134 parse_events__set_leader(NULL, list); 143 parse_events__set_leader(NULL, list);
135 $$ = list; 144 $$ = list;
136} 145}
@@ -186,7 +195,7 @@ event_def: event_pmu |
186event_pmu: 195event_pmu:
187PE_NAME '/' event_config '/' 196PE_NAME '/' event_config '/'
188{ 197{
189 struct parse_events_data__events *data = _data; 198 struct parse_events_evlist *data = _data;
190 struct list_head *list = NULL; 199 struct list_head *list = NULL;
191 200
192 ABORT_ON(parse_events_add_pmu(&list, &data->idx, $1, $3)); 201 ABORT_ON(parse_events_add_pmu(&list, &data->idx, $1, $3));
@@ -202,7 +211,7 @@ PE_VALUE_SYM_SW
202event_legacy_symbol: 211event_legacy_symbol:
203value_sym '/' event_config '/' 212value_sym '/' event_config '/'
204{ 213{
205 struct parse_events_data__events *data = _data; 214 struct parse_events_evlist *data = _data;
206 struct list_head *list = NULL; 215 struct list_head *list = NULL;
207 int type = $1 >> 16; 216 int type = $1 >> 16;
208 int config = $1 & 255; 217 int config = $1 & 255;
@@ -215,7 +224,7 @@ value_sym '/' event_config '/'
215| 224|
216value_sym sep_slash_dc 225value_sym sep_slash_dc
217{ 226{
218 struct parse_events_data__events *data = _data; 227 struct parse_events_evlist *data = _data;
219 struct list_head *list = NULL; 228 struct list_head *list = NULL;
220 int type = $1 >> 16; 229 int type = $1 >> 16;
221 int config = $1 & 255; 230 int config = $1 & 255;
@@ -228,7 +237,7 @@ value_sym sep_slash_dc
228event_legacy_cache: 237event_legacy_cache:
229PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT 238PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT
230{ 239{
231 struct parse_events_data__events *data = _data; 240 struct parse_events_evlist *data = _data;
232 struct list_head *list = NULL; 241 struct list_head *list = NULL;
233 242
234 ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, $5)); 243 ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, $5));
@@ -237,7 +246,7 @@ PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT
237| 246|
238PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT 247PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT
239{ 248{
240 struct parse_events_data__events *data = _data; 249 struct parse_events_evlist *data = _data;
241 struct list_head *list = NULL; 250 struct list_head *list = NULL;
242 251
243 ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, NULL)); 252 ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, NULL));
@@ -246,7 +255,7 @@ PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT
246| 255|
247PE_NAME_CACHE_TYPE 256PE_NAME_CACHE_TYPE
248{ 257{
249 struct parse_events_data__events *data = _data; 258 struct parse_events_evlist *data = _data;
250 struct list_head *list = NULL; 259 struct list_head *list = NULL;
251 260
252 ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, NULL, NULL)); 261 ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, NULL, NULL));
@@ -256,7 +265,7 @@ PE_NAME_CACHE_TYPE
256event_legacy_mem: 265event_legacy_mem:
257PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc 266PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
258{ 267{
259 struct parse_events_data__events *data = _data; 268 struct parse_events_evlist *data = _data;
260 struct list_head *list = NULL; 269 struct list_head *list = NULL;
261 270
262 ABORT_ON(parse_events_add_breakpoint(&list, &data->idx, 271 ABORT_ON(parse_events_add_breakpoint(&list, &data->idx,
@@ -266,7 +275,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
266| 275|
267PE_PREFIX_MEM PE_VALUE sep_dc 276PE_PREFIX_MEM PE_VALUE sep_dc
268{ 277{
269 struct parse_events_data__events *data = _data; 278 struct parse_events_evlist *data = _data;
270 struct list_head *list = NULL; 279 struct list_head *list = NULL;
271 280
272 ABORT_ON(parse_events_add_breakpoint(&list, &data->idx, 281 ABORT_ON(parse_events_add_breakpoint(&list, &data->idx,
@@ -277,7 +286,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc
277event_legacy_tracepoint: 286event_legacy_tracepoint:
278PE_NAME ':' PE_NAME 287PE_NAME ':' PE_NAME
279{ 288{
280 struct parse_events_data__events *data = _data; 289 struct parse_events_evlist *data = _data;
281 struct list_head *list = NULL; 290 struct list_head *list = NULL;
282 291
283 ABORT_ON(parse_events_add_tracepoint(&list, &data->idx, $1, $3)); 292 ABORT_ON(parse_events_add_tracepoint(&list, &data->idx, $1, $3));
@@ -287,7 +296,7 @@ PE_NAME ':' PE_NAME
287event_legacy_numeric: 296event_legacy_numeric:
288PE_VALUE ':' PE_VALUE 297PE_VALUE ':' PE_VALUE
289{ 298{
290 struct parse_events_data__events *data = _data; 299 struct parse_events_evlist *data = _data;
291 struct list_head *list = NULL; 300 struct list_head *list = NULL;
292 301
293 ABORT_ON(parse_events_add_numeric(&list, &data->idx, (u32)$1, $3, NULL)); 302 ABORT_ON(parse_events_add_numeric(&list, &data->idx, (u32)$1, $3, NULL));
@@ -297,7 +306,7 @@ PE_VALUE ':' PE_VALUE
297event_legacy_raw: 306event_legacy_raw:
298PE_RAW 307PE_RAW
299{ 308{
300 struct parse_events_data__events *data = _data; 309 struct parse_events_evlist *data = _data;
301 struct list_head *list = NULL; 310 struct list_head *list = NULL;
302 311
303 ABORT_ON(parse_events_add_numeric(&list, &data->idx, 312 ABORT_ON(parse_events_add_numeric(&list, &data->idx,
@@ -307,7 +316,7 @@ PE_RAW
307 316
308start_terms: event_config 317start_terms: event_config
309{ 318{
310 struct parse_events_data__terms *data = _data; 319 struct parse_events_terms *data = _data;
311 data->terms = $1; 320 data->terms = $1;
312} 321}
313 322
@@ -315,7 +324,7 @@ event_config:
315event_config ',' event_term 324event_config ',' event_term
316{ 325{
317 struct list_head *head = $1; 326 struct list_head *head = $1;
318 struct parse_events__term *term = $3; 327 struct parse_events_term *term = $3;
319 328
320 ABORT_ON(!head); 329 ABORT_ON(!head);
321 list_add_tail(&term->list, head); 330 list_add_tail(&term->list, head);
@@ -325,7 +334,7 @@ event_config ',' event_term
325event_term 334event_term
326{ 335{
327 struct list_head *head = malloc(sizeof(*head)); 336 struct list_head *head = malloc(sizeof(*head));
328 struct parse_events__term *term = $1; 337 struct parse_events_term *term = $1;
329 338
330 ABORT_ON(!head); 339 ABORT_ON(!head);
331 INIT_LIST_HEAD(head); 340 INIT_LIST_HEAD(head);
@@ -336,70 +345,70 @@ event_term
336event_term: 345event_term:
337PE_NAME '=' PE_NAME 346PE_NAME '=' PE_NAME
338{ 347{
339 struct parse_events__term *term; 348 struct parse_events_term *term;
340 349
341 ABORT_ON(parse_events__term_str(&term, PARSE_EVENTS__TERM_TYPE_USER, 350 ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
342 $1, $3)); 351 $1, $3));
343 $$ = term; 352 $$ = term;
344} 353}
345| 354|
346PE_NAME '=' PE_VALUE 355PE_NAME '=' PE_VALUE
347{ 356{
348 struct parse_events__term *term; 357 struct parse_events_term *term;
349 358
350 ABORT_ON(parse_events__term_num(&term, PARSE_EVENTS__TERM_TYPE_USER, 359 ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
351 $1, $3)); 360 $1, $3));
352 $$ = term; 361 $$ = term;
353} 362}
354| 363|
355PE_NAME '=' PE_VALUE_SYM_HW 364PE_NAME '=' PE_VALUE_SYM_HW
356{ 365{
357 struct parse_events__term *term; 366 struct parse_events_term *term;
358 int config = $3 & 255; 367 int config = $3 & 255;
359 368
360 ABORT_ON(parse_events__term_sym_hw(&term, $1, config)); 369 ABORT_ON(parse_events_term__sym_hw(&term, $1, config));
361 $$ = term; 370 $$ = term;
362} 371}
363| 372|
364PE_NAME 373PE_NAME
365{ 374{
366 struct parse_events__term *term; 375 struct parse_events_term *term;
367 376
368 ABORT_ON(parse_events__term_num(&term, PARSE_EVENTS__TERM_TYPE_USER, 377 ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
369 $1, 1)); 378 $1, 1));
370 $$ = term; 379 $$ = term;
371} 380}
372| 381|
373PE_VALUE_SYM_HW 382PE_VALUE_SYM_HW
374{ 383{
375 struct parse_events__term *term; 384 struct parse_events_term *term;
376 int config = $1 & 255; 385 int config = $1 & 255;
377 386
378 ABORT_ON(parse_events__term_sym_hw(&term, NULL, config)); 387 ABORT_ON(parse_events_term__sym_hw(&term, NULL, config));
379 $$ = term; 388 $$ = term;
380} 389}
381| 390|
382PE_TERM '=' PE_NAME 391PE_TERM '=' PE_NAME
383{ 392{
384 struct parse_events__term *term; 393 struct parse_events_term *term;
385 394
386 ABORT_ON(parse_events__term_str(&term, (int)$1, NULL, $3)); 395 ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3));
387 $$ = term; 396 $$ = term;
388} 397}
389| 398|
390PE_TERM '=' PE_VALUE 399PE_TERM '=' PE_VALUE
391{ 400{
392 struct parse_events__term *term; 401 struct parse_events_term *term;
393 402
394 ABORT_ON(parse_events__term_num(&term, (int)$1, NULL, $3)); 403 ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3));
395 $$ = term; 404 $$ = term;
396} 405}
397| 406|
398PE_TERM 407PE_TERM
399{ 408{
400 struct parse_events__term *term; 409 struct parse_events_term *term;
401 410
402 ABORT_ON(parse_events__term_num(&term, (int)$1, NULL, 1)); 411 ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1));
403 $$ = term; 412 $$ = term;
404} 413}
405 414
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 9bdc60c6f138..4c6f9c490a8d 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1,4 +1,3 @@
1
2#include <linux/list.h> 1#include <linux/list.h>
3#include <sys/types.h> 2#include <sys/types.h>
4#include <sys/stat.h> 3#include <sys/stat.h>
@@ -11,6 +10,19 @@
11#include "parse-events.h" 10#include "parse-events.h"
12#include "cpumap.h" 11#include "cpumap.h"
13 12
13struct perf_pmu_alias {
14 char *name;
15 struct list_head terms;
16 struct list_head list;
17};
18
19struct perf_pmu_format {
20 char *name;
21 int value;
22 DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
23 struct list_head list;
24};
25
14#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" 26#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/"
15 27
16int perf_pmu_parse(struct list_head *list, char *name); 28int perf_pmu_parse(struct list_head *list, char *name);
@@ -85,7 +97,7 @@ static int pmu_format(char *name, struct list_head *format)
85 97
86static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) 98static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file)
87{ 99{
88 struct perf_pmu__alias *alias; 100 struct perf_pmu_alias *alias;
89 char buf[256]; 101 char buf[256];
90 int ret; 102 int ret;
91 103
@@ -172,15 +184,15 @@ static int pmu_aliases(char *name, struct list_head *head)
172 return 0; 184 return 0;
173} 185}
174 186
175static int pmu_alias_terms(struct perf_pmu__alias *alias, 187static int pmu_alias_terms(struct perf_pmu_alias *alias,
176 struct list_head *terms) 188 struct list_head *terms)
177{ 189{
178 struct parse_events__term *term, *clone; 190 struct parse_events_term *term, *clone;
179 LIST_HEAD(list); 191 LIST_HEAD(list);
180 int ret; 192 int ret;
181 193
182 list_for_each_entry(term, &alias->terms, list) { 194 list_for_each_entry(term, &alias->terms, list) {
183 ret = parse_events__term_clone(&clone, term); 195 ret = parse_events_term__clone(&clone, term);
184 if (ret) { 196 if (ret) {
185 parse_events__free_terms(&list); 197 parse_events__free_terms(&list);
186 return ret; 198 return ret;
@@ -360,10 +372,10 @@ struct perf_pmu *perf_pmu__find(char *name)
360 return pmu_lookup(name); 372 return pmu_lookup(name);
361} 373}
362 374
363static struct perf_pmu__format* 375static struct perf_pmu_format *
364pmu_find_format(struct list_head *formats, char *name) 376pmu_find_format(struct list_head *formats, char *name)
365{ 377{
366 struct perf_pmu__format *format; 378 struct perf_pmu_format *format;
367 379
368 list_for_each_entry(format, formats, list) 380 list_for_each_entry(format, formats, list)
369 if (!strcmp(format->name, name)) 381 if (!strcmp(format->name, name))
@@ -403,9 +415,9 @@ static __u64 pmu_format_value(unsigned long *format, __u64 value)
403 */ 415 */
404static int pmu_config_term(struct list_head *formats, 416static int pmu_config_term(struct list_head *formats,
405 struct perf_event_attr *attr, 417 struct perf_event_attr *attr,
406 struct parse_events__term *term) 418 struct parse_events_term *term)
407{ 419{
408 struct perf_pmu__format *format; 420 struct perf_pmu_format *format;
409 __u64 *vp; 421 __u64 *vp;
410 422
411 /* 423 /*
@@ -450,7 +462,7 @@ int perf_pmu__config_terms(struct list_head *formats,
450 struct perf_event_attr *attr, 462 struct perf_event_attr *attr,
451 struct list_head *head_terms) 463 struct list_head *head_terms)
452{ 464{
453 struct parse_events__term *term; 465 struct parse_events_term *term;
454 466
455 list_for_each_entry(term, head_terms, list) 467 list_for_each_entry(term, head_terms, list)
456 if (pmu_config_term(formats, attr, term)) 468 if (pmu_config_term(formats, attr, term))
@@ -471,10 +483,10 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
471 return perf_pmu__config_terms(&pmu->format, attr, head_terms); 483 return perf_pmu__config_terms(&pmu->format, attr, head_terms);
472} 484}
473 485
474static struct perf_pmu__alias *pmu_find_alias(struct perf_pmu *pmu, 486static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
475 struct parse_events__term *term) 487 struct parse_events_term *term)
476{ 488{
477 struct perf_pmu__alias *alias; 489 struct perf_pmu_alias *alias;
478 char *name; 490 char *name;
479 491
480 if (parse_events__is_hardcoded_term(term)) 492 if (parse_events__is_hardcoded_term(term))
@@ -507,8 +519,8 @@ static struct perf_pmu__alias *pmu_find_alias(struct perf_pmu *pmu,
507 */ 519 */
508int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms) 520int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms)
509{ 521{
510 struct parse_events__term *term, *h; 522 struct parse_events_term *term, *h;
511 struct perf_pmu__alias *alias; 523 struct perf_pmu_alias *alias;
512 int ret; 524 int ret;
513 525
514 list_for_each_entry_safe(term, h, head_terms, list) { 526 list_for_each_entry_safe(term, h, head_terms, list) {
@@ -527,7 +539,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms)
527int perf_pmu__new_format(struct list_head *list, char *name, 539int perf_pmu__new_format(struct list_head *list, char *name,
528 int config, unsigned long *bits) 540 int config, unsigned long *bits)
529{ 541{
530 struct perf_pmu__format *format; 542 struct perf_pmu_format *format;
531 543
532 format = zalloc(sizeof(*format)); 544 format = zalloc(sizeof(*format));
533 if (!format) 545 if (!format)
@@ -548,7 +560,7 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to)
548 if (!to) 560 if (!to)
549 to = from; 561 to = from;
550 562
551 memset(bits, 0, BITS_TO_LONGS(PERF_PMU_FORMAT_BITS)); 563 memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS));
552 for (b = from; b <= to; b++) 564 for (b = from; b <= to; b++)
553 set_bit(b, bits); 565 set_bit(b, bits);
554} 566}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index a313ed76a49a..32fe55b659fa 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -12,19 +12,6 @@ enum {
12 12
13#define PERF_PMU_FORMAT_BITS 64 13#define PERF_PMU_FORMAT_BITS 64
14 14
15struct perf_pmu__format {
16 char *name;
17 int value;
18 DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
19 struct list_head list;
20};
21
22struct perf_pmu__alias {
23 char *name;
24 struct list_head terms;
25 struct list_head list;
26};
27
28struct perf_pmu { 15struct perf_pmu {
29 char *name; 16 char *name;
30 __u32 type; 17 __u32 type;
@@ -42,7 +29,7 @@ int perf_pmu__config_terms(struct list_head *formats,
42 struct list_head *head_terms); 29 struct list_head *head_terms);
43int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms); 30int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms);
44struct list_head *perf_pmu__alias(struct perf_pmu *pmu, 31struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
45 struct list_head *head_terms); 32 struct list_head *head_terms);
46int perf_pmu_wrap(void); 33int perf_pmu_wrap(void);
47void perf_pmu_error(struct list_head *list, char *name, char const *msg); 34void perf_pmu_error(struct list_head *list, char *name, char const *msg);
48 35
diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y
index ec898047ebb9..bfd7e8509869 100644
--- a/tools/perf/util/pmu.y
+++ b/tools/perf/util/pmu.y
@@ -1,5 +1,4 @@
1 1
2%name-prefix "perf_pmu_"
3%parse-param {struct list_head *format} 2%parse-param {struct list_head *format}
4%parse-param {char *name} 3%parse-param {char *name}
5 4
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 1daf5c14e751..be0329394d56 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -413,12 +413,12 @@ static int convert_variable_type(Dwarf_Die *vr_die,
413 dwarf_diename(vr_die), dwarf_diename(&type)); 413 dwarf_diename(vr_die), dwarf_diename(&type));
414 return -EINVAL; 414 return -EINVAL;
415 } 415 }
416 if (die_get_real_type(&type, &type) == NULL) {
417 pr_warning("Failed to get a type"
418 " information.\n");
419 return -ENOENT;
420 }
416 if (ret == DW_TAG_pointer_type) { 421 if (ret == DW_TAG_pointer_type) {
417 if (die_get_real_type(&type, &type) == NULL) {
418 pr_warning("Failed to get a type"
419 " information.\n");
420 return -ENOENT;
421 }
422 while (*ref_ptr) 422 while (*ref_ptr)
423 ref_ptr = &(*ref_ptr)->next; 423 ref_ptr = &(*ref_ptr)->next;
424 /* Add new reference with offset +0 */ 424 /* Add new reference with offset +0 */
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index c40c2d33199e..64536a993f4a 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -18,4 +18,5 @@ util/cgroup.c
18util/debugfs.c 18util/debugfs.c
19util/rblist.c 19util/rblist.c
20util/strlist.c 20util/strlist.c
21util/sysfs.c
21../../lib/rbtree.c 22../../lib/rbtree.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index a2657fd96837..925e0c3e6d91 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -1045,3 +1045,12 @@ error:
1045 if (PyErr_Occurred()) 1045 if (PyErr_Occurred())
1046 PyErr_SetString(PyExc_ImportError, "perf: Init failed!"); 1046 PyErr_SetString(PyExc_ImportError, "perf: Init failed!");
1047} 1047}
1048
1049/*
1050 * Dummy, to avoid dragging all the test_attr infrastructure in the python
1051 * binding.
1052 */
1053void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
1054 int fd, int group_fd, unsigned long flags)
1055{
1056}
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index f80605eb1855..eacec859f299 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -292,6 +292,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused,
292 ns = nsecs - s * NSECS_PER_SEC; 292 ns = nsecs - s * NSECS_PER_SEC;
293 293
294 scripting_context->event_data = data; 294 scripting_context->event_data = data;
295 scripting_context->pevent = evsel->tp_format->pevent;
295 296
296 ENTER; 297 ENTER;
297 SAVETMPS; 298 SAVETMPS;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 14683dfca2ee..e87aa5d9696b 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -265,6 +265,7 @@ static void python_process_tracepoint(union perf_event *perf_event
265 ns = nsecs - s * NSECS_PER_SEC; 265 ns = nsecs - s * NSECS_PER_SEC;
266 266
267 scripting_context->event_data = data; 267 scripting_context->event_data = data;
268 scripting_context->pevent = evsel->tp_format->pevent;
268 269
269 context = PyCObject_FromVoidPtr(scripting_context, NULL); 270 context = PyCObject_FromVoidPtr(scripting_context, NULL);
270 271
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ce6f51162386..bd85280bb6e8 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -16,7 +16,6 @@
16#include "cpumap.h" 16#include "cpumap.h"
17#include "event-parse.h" 17#include "event-parse.h"
18#include "perf_regs.h" 18#include "perf_regs.h"
19#include "unwind.h"
20#include "vdso.h" 19#include "vdso.h"
21 20
22static int perf_session__open(struct perf_session *self, bool force) 21static int perf_session__open(struct perf_session *self, bool force)
@@ -87,13 +86,12 @@ void perf_session__set_id_hdr_size(struct perf_session *session)
87{ 86{
88 u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist); 87 u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist);
89 88
90 session->host_machine.id_hdr_size = id_hdr_size;
91 machines__set_id_hdr_size(&session->machines, id_hdr_size); 89 machines__set_id_hdr_size(&session->machines, id_hdr_size);
92} 90}
93 91
94int perf_session__create_kernel_maps(struct perf_session *self) 92int perf_session__create_kernel_maps(struct perf_session *self)
95{ 93{
96 int ret = machine__create_kernel_maps(&self->host_machine); 94 int ret = machine__create_kernel_maps(&self->machines.host);
97 95
98 if (ret >= 0) 96 if (ret >= 0)
99 ret = machines__create_guest_kernel_maps(&self->machines); 97 ret = machines__create_guest_kernel_maps(&self->machines);
@@ -102,8 +100,7 @@ int perf_session__create_kernel_maps(struct perf_session *self)
102 100
103static void perf_session__destroy_kernel_maps(struct perf_session *self) 101static void perf_session__destroy_kernel_maps(struct perf_session *self)
104{ 102{
105 machine__destroy_kernel_maps(&self->host_machine); 103 machines__destroy_kernel_maps(&self->machines);
106 machines__destroy_guest_kernel_maps(&self->machines);
107} 104}
108 105
109struct perf_session *perf_session__new(const char *filename, int mode, 106struct perf_session *perf_session__new(const char *filename, int mode,
@@ -128,22 +125,11 @@ struct perf_session *perf_session__new(const char *filename, int mode,
128 goto out; 125 goto out;
129 126
130 memcpy(self->filename, filename, len); 127 memcpy(self->filename, filename, len);
131 /*
132 * On 64bit we can mmap the data file in one go. No need for tiny mmap
133 * slices. On 32bit we use 32MB.
134 */
135#if BITS_PER_LONG == 64
136 self->mmap_window = ULLONG_MAX;
137#else
138 self->mmap_window = 32 * 1024 * 1024ULL;
139#endif
140 self->machines = RB_ROOT;
141 self->repipe = repipe; 128 self->repipe = repipe;
142 INIT_LIST_HEAD(&self->ordered_samples.samples); 129 INIT_LIST_HEAD(&self->ordered_samples.samples);
143 INIT_LIST_HEAD(&self->ordered_samples.sample_cache); 130 INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
144 INIT_LIST_HEAD(&self->ordered_samples.to_free); 131 INIT_LIST_HEAD(&self->ordered_samples.to_free);
145 machine__init(&self->host_machine, "", HOST_KERNEL_ID); 132 machines__init(&self->machines);
146 hists__init(&self->hists);
147 133
148 if (mode == O_RDONLY) { 134 if (mode == O_RDONLY) {
149 if (perf_session__open(self, force) < 0) 135 if (perf_session__open(self, force) < 0)
@@ -171,37 +157,30 @@ out_delete:
171 return NULL; 157 return NULL;
172} 158}
173 159
174static void machine__delete_dead_threads(struct machine *machine)
175{
176 struct thread *n, *t;
177
178 list_for_each_entry_safe(t, n, &machine->dead_threads, node) {
179 list_del(&t->node);
180 thread__delete(t);
181 }
182}
183
184static void perf_session__delete_dead_threads(struct perf_session *session) 160static void perf_session__delete_dead_threads(struct perf_session *session)
185{ 161{
186 machine__delete_dead_threads(&session->host_machine); 162 machine__delete_dead_threads(&session->machines.host);
187} 163}
188 164
189static void machine__delete_threads(struct machine *self) 165static void perf_session__delete_threads(struct perf_session *session)
190{ 166{
191 struct rb_node *nd = rb_first(&self->threads); 167 machine__delete_threads(&session->machines.host);
192
193 while (nd) {
194 struct thread *t = rb_entry(nd, struct thread, rb_node);
195
196 rb_erase(&t->rb_node, &self->threads);
197 nd = rb_next(nd);
198 thread__delete(t);
199 }
200} 168}
201 169
202static void perf_session__delete_threads(struct perf_session *session) 170static void perf_session_env__delete(struct perf_session_env *env)
203{ 171{
204 machine__delete_threads(&session->host_machine); 172 free(env->hostname);
173 free(env->os_release);
174 free(env->version);
175 free(env->arch);
176 free(env->cpu_desc);
177 free(env->cpuid);
178
179 free(env->cmdline);
180 free(env->sibling_cores);
181 free(env->sibling_threads);
182 free(env->numa_nodes);
183 free(env->pmu_mappings);
205} 184}
206 185
207void perf_session__delete(struct perf_session *self) 186void perf_session__delete(struct perf_session *self)
@@ -209,198 +188,13 @@ void perf_session__delete(struct perf_session *self)
209 perf_session__destroy_kernel_maps(self); 188 perf_session__destroy_kernel_maps(self);
210 perf_session__delete_dead_threads(self); 189 perf_session__delete_dead_threads(self);
211 perf_session__delete_threads(self); 190 perf_session__delete_threads(self);
212 machine__exit(&self->host_machine); 191 perf_session_env__delete(&self->header.env);
192 machines__exit(&self->machines);
213 close(self->fd); 193 close(self->fd);
214 free(self); 194 free(self);
215 vdso__exit(); 195 vdso__exit();
216} 196}
217 197
218void machine__remove_thread(struct machine *self, struct thread *th)
219{
220 self->last_match = NULL;
221 rb_erase(&th->rb_node, &self->threads);
222 /*
223 * We may have references to this thread, for instance in some hist_entry
224 * instances, so just move them to a separate list.
225 */
226 list_add_tail(&th->node, &self->dead_threads);
227}
228
229static bool symbol__match_parent_regex(struct symbol *sym)
230{
231 if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
232 return 1;
233
234 return 0;
235}
236
237static const u8 cpumodes[] = {
238 PERF_RECORD_MISC_USER,
239 PERF_RECORD_MISC_KERNEL,
240 PERF_RECORD_MISC_GUEST_USER,
241 PERF_RECORD_MISC_GUEST_KERNEL
242};
243#define NCPUMODES (sizeof(cpumodes)/sizeof(u8))
244
245static void ip__resolve_ams(struct machine *self, struct thread *thread,
246 struct addr_map_symbol *ams,
247 u64 ip)
248{
249 struct addr_location al;
250 size_t i;
251 u8 m;
252
253 memset(&al, 0, sizeof(al));
254
255 for (i = 0; i < NCPUMODES; i++) {
256 m = cpumodes[i];
257 /*
258 * We cannot use the header.misc hint to determine whether a
259 * branch stack address is user, kernel, guest, hypervisor.
260 * Branches may straddle the kernel/user/hypervisor boundaries.
261 * Thus, we have to try consecutively until we find a match
262 * or else, the symbol is unknown
263 */
264 thread__find_addr_location(thread, self, m, MAP__FUNCTION,
265 ip, &al, NULL);
266 if (al.sym)
267 goto found;
268 }
269found:
270 ams->addr = ip;
271 ams->al_addr = al.addr;
272 ams->sym = al.sym;
273 ams->map = al.map;
274}
275
276struct branch_info *machine__resolve_bstack(struct machine *self,
277 struct thread *thr,
278 struct branch_stack *bs)
279{
280 struct branch_info *bi;
281 unsigned int i;
282
283 bi = calloc(bs->nr, sizeof(struct branch_info));
284 if (!bi)
285 return NULL;
286
287 for (i = 0; i < bs->nr; i++) {
288 ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to);
289 ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from);
290 bi[i].flags = bs->entries[i].flags;
291 }
292 return bi;
293}
294
295static int machine__resolve_callchain_sample(struct machine *machine,
296 struct thread *thread,
297 struct ip_callchain *chain,
298 struct symbol **parent)
299
300{
301 u8 cpumode = PERF_RECORD_MISC_USER;
302 unsigned int i;
303 int err;
304
305 callchain_cursor_reset(&callchain_cursor);
306
307 if (chain->nr > PERF_MAX_STACK_DEPTH) {
308 pr_warning("corrupted callchain. skipping...\n");
309 return 0;
310 }
311
312 for (i = 0; i < chain->nr; i++) {
313 u64 ip;
314 struct addr_location al;
315
316 if (callchain_param.order == ORDER_CALLEE)
317 ip = chain->ips[i];
318 else
319 ip = chain->ips[chain->nr - i - 1];
320
321 if (ip >= PERF_CONTEXT_MAX) {
322 switch (ip) {
323 case PERF_CONTEXT_HV:
324 cpumode = PERF_RECORD_MISC_HYPERVISOR;
325 break;
326 case PERF_CONTEXT_KERNEL:
327 cpumode = PERF_RECORD_MISC_KERNEL;
328 break;
329 case PERF_CONTEXT_USER:
330 cpumode = PERF_RECORD_MISC_USER;
331 break;
332 default:
333 pr_debug("invalid callchain context: "
334 "%"PRId64"\n", (s64) ip);
335 /*
336 * It seems the callchain is corrupted.
337 * Discard all.
338 */
339 callchain_cursor_reset(&callchain_cursor);
340 return 0;
341 }
342 continue;
343 }
344
345 al.filtered = false;
346 thread__find_addr_location(thread, machine, cpumode,
347 MAP__FUNCTION, ip, &al, NULL);
348 if (al.sym != NULL) {
349 if (sort__has_parent && !*parent &&
350 symbol__match_parent_regex(al.sym))
351 *parent = al.sym;
352 if (!symbol_conf.use_callchain)
353 break;
354 }
355
356 err = callchain_cursor_append(&callchain_cursor,
357 ip, al.map, al.sym);
358 if (err)
359 return err;
360 }
361
362 return 0;
363}
364
365static int unwind_entry(struct unwind_entry *entry, void *arg)
366{
367 struct callchain_cursor *cursor = arg;
368 return callchain_cursor_append(cursor, entry->ip,
369 entry->map, entry->sym);
370}
371
372int machine__resolve_callchain(struct machine *machine,
373 struct perf_evsel *evsel,
374 struct thread *thread,
375 struct perf_sample *sample,
376 struct symbol **parent)
377
378{
379 int ret;
380
381 callchain_cursor_reset(&callchain_cursor);
382
383 ret = machine__resolve_callchain_sample(machine, thread,
384 sample->callchain, parent);
385 if (ret)
386 return ret;
387
388 /* Can we do dwarf post unwind? */
389 if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) &&
390 (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
391 return 0;
392
393 /* Bail out if nothing was captured. */
394 if ((!sample->user_regs.regs) ||
395 (!sample->user_stack.size))
396 return 0;
397
398 return unwind__get_entries(unwind_entry, &callchain_cursor, machine,
399 thread, evsel->attr.sample_regs_user,
400 sample);
401
402}
403
404static int process_event_synth_tracing_data_stub(union perf_event *event 198static int process_event_synth_tracing_data_stub(union perf_event *event
405 __maybe_unused, 199 __maybe_unused,
406 struct perf_session *session 200 struct perf_session *session
@@ -1027,7 +821,7 @@ static struct machine *
1027 return perf_session__findnew_machine(session, pid); 821 return perf_session__findnew_machine(session, pid);
1028 } 822 }
1029 823
1030 return perf_session__find_host_machine(session); 824 return &session->machines.host;
1031} 825}
1032 826
1033static int perf_session_deliver_event(struct perf_session *session, 827static int perf_session_deliver_event(struct perf_session *session,
@@ -1065,11 +859,11 @@ static int perf_session_deliver_event(struct perf_session *session,
1065 case PERF_RECORD_SAMPLE: 859 case PERF_RECORD_SAMPLE:
1066 dump_sample(evsel, event, sample); 860 dump_sample(evsel, event, sample);
1067 if (evsel == NULL) { 861 if (evsel == NULL) {
1068 ++session->hists.stats.nr_unknown_id; 862 ++session->stats.nr_unknown_id;
1069 return 0; 863 return 0;
1070 } 864 }
1071 if (machine == NULL) { 865 if (machine == NULL) {
1072 ++session->hists.stats.nr_unprocessable_samples; 866 ++session->stats.nr_unprocessable_samples;
1073 return 0; 867 return 0;
1074 } 868 }
1075 return tool->sample(tool, event, sample, evsel, machine); 869 return tool->sample(tool, event, sample, evsel, machine);
@@ -1083,7 +877,7 @@ static int perf_session_deliver_event(struct perf_session *session,
1083 return tool->exit(tool, event, sample, machine); 877 return tool->exit(tool, event, sample, machine);
1084 case PERF_RECORD_LOST: 878 case PERF_RECORD_LOST:
1085 if (tool->lost == perf_event__process_lost) 879 if (tool->lost == perf_event__process_lost)
1086 session->hists.stats.total_lost += event->lost.lost; 880 session->stats.total_lost += event->lost.lost;
1087 return tool->lost(tool, event, sample, machine); 881 return tool->lost(tool, event, sample, machine);
1088 case PERF_RECORD_READ: 882 case PERF_RECORD_READ:
1089 return tool->read(tool, event, sample, evsel, machine); 883 return tool->read(tool, event, sample, evsel, machine);
@@ -1092,7 +886,7 @@ static int perf_session_deliver_event(struct perf_session *session,
1092 case PERF_RECORD_UNTHROTTLE: 886 case PERF_RECORD_UNTHROTTLE:
1093 return tool->unthrottle(tool, event, sample, machine); 887 return tool->unthrottle(tool, event, sample, machine);
1094 default: 888 default:
1095 ++session->hists.stats.nr_unknown_events; 889 ++session->stats.nr_unknown_events;
1096 return -1; 890 return -1;
1097 } 891 }
1098} 892}
@@ -1106,8 +900,8 @@ static int perf_session__preprocess_sample(struct perf_session *session,
1106 900
1107 if (!ip_callchain__valid(sample->callchain, event)) { 901 if (!ip_callchain__valid(sample->callchain, event)) {
1108 pr_debug("call-chain problem with event, skipping it.\n"); 902 pr_debug("call-chain problem with event, skipping it.\n");
1109 ++session->hists.stats.nr_invalid_chains; 903 ++session->stats.nr_invalid_chains;
1110 session->hists.stats.total_invalid_chains += sample->period; 904 session->stats.total_invalid_chains += sample->period;
1111 return -EINVAL; 905 return -EINVAL;
1112 } 906 }
1113 return 0; 907 return 0;
@@ -1165,7 +959,7 @@ static int perf_session__process_event(struct perf_session *session,
1165 if (event->header.type >= PERF_RECORD_HEADER_MAX) 959 if (event->header.type >= PERF_RECORD_HEADER_MAX)
1166 return -EINVAL; 960 return -EINVAL;
1167 961
1168 hists__inc_nr_events(&session->hists, event->header.type); 962 events_stats__inc(&session->stats, event->header.type);
1169 963
1170 if (event->header.type >= PERF_RECORD_USER_TYPE_START) 964 if (event->header.type >= PERF_RECORD_USER_TYPE_START)
1171 return perf_session__process_user_event(session, event, tool, file_offset); 965 return perf_session__process_user_event(session, event, tool, file_offset);
@@ -1201,7 +995,7 @@ void perf_event_header__bswap(struct perf_event_header *self)
1201 995
1202struct thread *perf_session__findnew(struct perf_session *session, pid_t pid) 996struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
1203{ 997{
1204 return machine__findnew_thread(&session->host_machine, pid); 998 return machine__findnew_thread(&session->machines.host, pid);
1205} 999}
1206 1000
1207static struct thread *perf_session__register_idle_thread(struct perf_session *self) 1001static struct thread *perf_session__register_idle_thread(struct perf_session *self)
@@ -1220,39 +1014,39 @@ static void perf_session__warn_about_errors(const struct perf_session *session,
1220 const struct perf_tool *tool) 1014 const struct perf_tool *tool)
1221{ 1015{
1222 if (tool->lost == perf_event__process_lost && 1016 if (tool->lost == perf_event__process_lost &&
1223 session->hists.stats.nr_events[PERF_RECORD_LOST] != 0) { 1017 session->stats.nr_events[PERF_RECORD_LOST] != 0) {
1224 ui__warning("Processed %d events and lost %d chunks!\n\n" 1018 ui__warning("Processed %d events and lost %d chunks!\n\n"
1225 "Check IO/CPU overload!\n\n", 1019 "Check IO/CPU overload!\n\n",
1226 session->hists.stats.nr_events[0], 1020 session->stats.nr_events[0],
1227 session->hists.stats.nr_events[PERF_RECORD_LOST]); 1021 session->stats.nr_events[PERF_RECORD_LOST]);
1228 } 1022 }
1229 1023
1230 if (session->hists.stats.nr_unknown_events != 0) { 1024 if (session->stats.nr_unknown_events != 0) {
1231 ui__warning("Found %u unknown events!\n\n" 1025 ui__warning("Found %u unknown events!\n\n"
1232 "Is this an older tool processing a perf.data " 1026 "Is this an older tool processing a perf.data "
1233 "file generated by a more recent tool?\n\n" 1027 "file generated by a more recent tool?\n\n"
1234 "If that is not the case, consider " 1028 "If that is not the case, consider "
1235 "reporting to linux-kernel@vger.kernel.org.\n\n", 1029 "reporting to linux-kernel@vger.kernel.org.\n\n",
1236 session->hists.stats.nr_unknown_events); 1030 session->stats.nr_unknown_events);
1237 } 1031 }
1238 1032
1239 if (session->hists.stats.nr_unknown_id != 0) { 1033 if (session->stats.nr_unknown_id != 0) {
1240 ui__warning("%u samples with id not present in the header\n", 1034 ui__warning("%u samples with id not present in the header\n",
1241 session->hists.stats.nr_unknown_id); 1035 session->stats.nr_unknown_id);
1242 } 1036 }
1243 1037
1244 if (session->hists.stats.nr_invalid_chains != 0) { 1038 if (session->stats.nr_invalid_chains != 0) {
1245 ui__warning("Found invalid callchains!\n\n" 1039 ui__warning("Found invalid callchains!\n\n"
1246 "%u out of %u events were discarded for this reason.\n\n" 1040 "%u out of %u events were discarded for this reason.\n\n"
1247 "Consider reporting to linux-kernel@vger.kernel.org.\n\n", 1041 "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
1248 session->hists.stats.nr_invalid_chains, 1042 session->stats.nr_invalid_chains,
1249 session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); 1043 session->stats.nr_events[PERF_RECORD_SAMPLE]);
1250 } 1044 }
1251 1045
1252 if (session->hists.stats.nr_unprocessable_samples != 0) { 1046 if (session->stats.nr_unprocessable_samples != 0) {
1253 ui__warning("%u unprocessable samples recorded.\n" 1047 ui__warning("%u unprocessable samples recorded.\n"
1254 "Do you have a KVM guest running and not using 'perf kvm'?\n", 1048 "Do you have a KVM guest running and not using 'perf kvm'?\n",
1255 session->hists.stats.nr_unprocessable_samples); 1049 session->stats.nr_unprocessable_samples);
1256 } 1050 }
1257} 1051}
1258 1052
@@ -1369,6 +1163,18 @@ fetch_mmaped_event(struct perf_session *session,
1369 return event; 1163 return event;
1370} 1164}
1371 1165
1166/*
1167 * On 64bit we can mmap the data file in one go. No need for tiny mmap
1168 * slices. On 32bit we use 32MB.
1169 */
1170#if BITS_PER_LONG == 64
1171#define MMAP_SIZE ULLONG_MAX
1172#define NUM_MMAPS 1
1173#else
1174#define MMAP_SIZE (32 * 1024 * 1024ULL)
1175#define NUM_MMAPS 128
1176#endif
1177
1372int __perf_session__process_events(struct perf_session *session, 1178int __perf_session__process_events(struct perf_session *session,
1373 u64 data_offset, u64 data_size, 1179 u64 data_offset, u64 data_size,
1374 u64 file_size, struct perf_tool *tool) 1180 u64 file_size, struct perf_tool *tool)
@@ -1376,7 +1182,7 @@ int __perf_session__process_events(struct perf_session *session,
1376 u64 head, page_offset, file_offset, file_pos, progress_next; 1182 u64 head, page_offset, file_offset, file_pos, progress_next;
1377 int err, mmap_prot, mmap_flags, map_idx = 0; 1183 int err, mmap_prot, mmap_flags, map_idx = 0;
1378 size_t mmap_size; 1184 size_t mmap_size;
1379 char *buf, *mmaps[8]; 1185 char *buf, *mmaps[NUM_MMAPS];
1380 union perf_event *event; 1186 union perf_event *event;
1381 uint32_t size; 1187 uint32_t size;
1382 1188
@@ -1391,7 +1197,7 @@ int __perf_session__process_events(struct perf_session *session,
1391 1197
1392 progress_next = file_size / 16; 1198 progress_next = file_size / 16;
1393 1199
1394 mmap_size = session->mmap_window; 1200 mmap_size = MMAP_SIZE;
1395 if (mmap_size > file_size) 1201 if (mmap_size > file_size)
1396 mmap_size = file_size; 1202 mmap_size = file_size;
1397 1203
@@ -1526,16 +1332,13 @@ int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
1526 1332
1527size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp) 1333size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
1528{ 1334{
1529 return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) + 1335 return machines__fprintf_dsos(&self->machines, fp);
1530 __dsos__fprintf(&self->host_machine.user_dsos, fp) +
1531 machines__fprintf_dsos(&self->machines, fp);
1532} 1336}
1533 1337
1534size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp, 1338size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
1535 bool with_hits) 1339 bool (skip)(struct dso *dso, int parm), int parm)
1536{ 1340{
1537 size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits); 1341 return machines__fprintf_dsos_buildid(&self->machines, fp, skip, parm);
1538 return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
1539} 1342}
1540 1343
1541size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) 1344size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
@@ -1543,11 +1346,11 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
1543 struct perf_evsel *pos; 1346 struct perf_evsel *pos;
1544 size_t ret = fprintf(fp, "Aggregated stats:\n"); 1347 size_t ret = fprintf(fp, "Aggregated stats:\n");
1545 1348
1546 ret += hists__fprintf_nr_events(&session->hists, fp); 1349 ret += events_stats__fprintf(&session->stats, fp);
1547 1350
1548 list_for_each_entry(pos, &session->evlist->entries, node) { 1351 list_for_each_entry(pos, &session->evlist->entries, node) {
1549 ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos)); 1352 ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));
1550 ret += hists__fprintf_nr_events(&pos->hists, fp); 1353 ret += events_stats__fprintf(&pos->hists.stats, fp);
1551 } 1354 }
1552 1355
1553 return ret; 1356 return ret;
@@ -1559,7 +1362,7 @@ size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
1559 * FIXME: Here we have to actually print all the machines in this 1362 * FIXME: Here we have to actually print all the machines in this
1560 * session, not just the host... 1363 * session, not just the host...
1561 */ 1364 */
1562 return machine__fprintf(&session->host_machine, fp); 1365 return machine__fprintf(&session->machines.host, fp);
1563} 1366}
1564 1367
1565void perf_session__remove_thread(struct perf_session *session, 1368void perf_session__remove_thread(struct perf_session *session,
@@ -1568,10 +1371,10 @@ void perf_session__remove_thread(struct perf_session *session,
1568 /* 1371 /*
1569 * FIXME: This one makes no sense, we need to remove the thread from 1372 * FIXME: This one makes no sense, we need to remove the thread from
1570 * the machine it belongs to, perf_session can have many machines, so 1373 * the machine it belongs to, perf_session can have many machines, so
1571 * doing it always on ->host_machine is wrong. Fix when auditing all 1374 * doing it always on ->machines.host is wrong. Fix when auditing all
1572 * the 'perf kvm' code. 1375 * the 'perf kvm' code.
1573 */ 1376 */
1574 machine__remove_thread(&session->host_machine, th); 1377 machine__remove_thread(&session->machines.host, th);
1575} 1378}
1576 1379
1577struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, 1380struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index cea133a6bdf1..b5c0847edfa9 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -30,16 +30,10 @@ struct ordered_samples {
30struct perf_session { 30struct perf_session {
31 struct perf_header header; 31 struct perf_header header;
32 unsigned long size; 32 unsigned long size;
33 unsigned long mmap_window; 33 struct machines machines;
34 struct machine host_machine;
35 struct rb_root machines;
36 struct perf_evlist *evlist; 34 struct perf_evlist *evlist;
37 struct pevent *pevent; 35 struct pevent *pevent;
38 /* 36 struct events_stats stats;
39 * FIXME: Need to split this up further, we need global
40 * stats + per event stats.
41 */
42 struct hists hists;
43 int fd; 37 int fd;
44 bool fd_pipe; 38 bool fd_pipe;
45 bool repipe; 39 bool repipe;
@@ -54,7 +48,7 @@ struct perf_tool;
54struct perf_session *perf_session__new(const char *filename, int mode, 48struct perf_session *perf_session__new(const char *filename, int mode,
55 bool force, bool repipe, 49 bool force, bool repipe,
56 struct perf_tool *tool); 50 struct perf_tool *tool);
57void perf_session__delete(struct perf_session *self); 51void perf_session__delete(struct perf_session *session);
58 52
59void perf_event_header__bswap(struct perf_event_header *self); 53void perf_event_header__bswap(struct perf_event_header *self);
60 54
@@ -81,43 +75,24 @@ void perf_session__set_id_hdr_size(struct perf_session *session);
81void perf_session__remove_thread(struct perf_session *self, struct thread *th); 75void perf_session__remove_thread(struct perf_session *self, struct thread *th);
82 76
83static inline 77static inline
84struct machine *perf_session__find_host_machine(struct perf_session *self)
85{
86 return &self->host_machine;
87}
88
89static inline
90struct machine *perf_session__find_machine(struct perf_session *self, pid_t pid) 78struct machine *perf_session__find_machine(struct perf_session *self, pid_t pid)
91{ 79{
92 if (pid == HOST_KERNEL_ID)
93 return &self->host_machine;
94 return machines__find(&self->machines, pid); 80 return machines__find(&self->machines, pid);
95} 81}
96 82
97static inline 83static inline
98struct machine *perf_session__findnew_machine(struct perf_session *self, pid_t pid) 84struct machine *perf_session__findnew_machine(struct perf_session *self, pid_t pid)
99{ 85{
100 if (pid == HOST_KERNEL_ID)
101 return &self->host_machine;
102 return machines__findnew(&self->machines, pid); 86 return machines__findnew(&self->machines, pid);
103} 87}
104 88
105static inline
106void perf_session__process_machines(struct perf_session *self,
107 struct perf_tool *tool,
108 machine__process_t process)
109{
110 process(&self->host_machine, tool);
111 return machines__process(&self->machines, process, tool);
112}
113
114struct thread *perf_session__findnew(struct perf_session *self, pid_t pid); 89struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
115size_t perf_session__fprintf(struct perf_session *self, FILE *fp); 90size_t perf_session__fprintf(struct perf_session *self, FILE *fp);
116 91
117size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp); 92size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp);
118 93
119size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, 94size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
120 FILE *fp, bool with_hits); 95 bool (fn)(struct dso *dso, int parm), int parm);
121 96
122size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); 97size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp);
123 98
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index cfd1c0feb32d..d41926cb9e3f 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -60,7 +60,7 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
60static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf, 60static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
61 size_t size, unsigned int width) 61 size_t size, unsigned int width)
62{ 62{
63 return repsep_snprintf(bf, size, "%*s:%5d", width, 63 return repsep_snprintf(bf, size, "%*s:%5d", width - 6,
64 self->thread->comm ?: "", self->thread->pid); 64 self->thread->comm ?: "", self->thread->pid);
65} 65}
66 66
@@ -97,6 +97,16 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
97 return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); 97 return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);
98} 98}
99 99
100struct sort_entry sort_comm = {
101 .se_header = "Command",
102 .se_cmp = sort__comm_cmp,
103 .se_collapse = sort__comm_collapse,
104 .se_snprintf = hist_entry__comm_snprintf,
105 .se_width_idx = HISTC_COMM,
106};
107
108/* --sort dso */
109
100static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r) 110static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
101{ 111{
102 struct dso *dso_l = map_l ? map_l->dso : NULL; 112 struct dso *dso_l = map_l ? map_l->dso : NULL;
@@ -117,40 +127,12 @@ static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
117 return strcmp(dso_name_l, dso_name_r); 127 return strcmp(dso_name_l, dso_name_r);
118} 128}
119 129
120struct sort_entry sort_comm = {
121 .se_header = "Command",
122 .se_cmp = sort__comm_cmp,
123 .se_collapse = sort__comm_collapse,
124 .se_snprintf = hist_entry__comm_snprintf,
125 .se_width_idx = HISTC_COMM,
126};
127
128/* --sort dso */
129
130static int64_t 130static int64_t
131sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) 131sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
132{ 132{
133 return _sort__dso_cmp(left->ms.map, right->ms.map); 133 return _sort__dso_cmp(left->ms.map, right->ms.map);
134} 134}
135 135
136
137static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r,
138 u64 ip_l, u64 ip_r)
139{
140 if (!sym_l || !sym_r)
141 return cmp_null(sym_l, sym_r);
142
143 if (sym_l == sym_r)
144 return 0;
145
146 if (sym_l)
147 ip_l = sym_l->start;
148 if (sym_r)
149 ip_r = sym_r->start;
150
151 return (int64_t)(ip_r - ip_l);
152}
153
154static int _hist_entry__dso_snprintf(struct map *map, char *bf, 136static int _hist_entry__dso_snprintf(struct map *map, char *bf,
155 size_t size, unsigned int width) 137 size_t size, unsigned int width)
156{ 138{
@@ -169,9 +151,43 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
169 return _hist_entry__dso_snprintf(self->ms.map, bf, size, width); 151 return _hist_entry__dso_snprintf(self->ms.map, bf, size, width);
170} 152}
171 153
154struct sort_entry sort_dso = {
155 .se_header = "Shared Object",
156 .se_cmp = sort__dso_cmp,
157 .se_snprintf = hist_entry__dso_snprintf,
158 .se_width_idx = HISTC_DSO,
159};
160
161/* --sort symbol */
162
163static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
164{
165 u64 ip_l, ip_r;
166
167 if (!sym_l || !sym_r)
168 return cmp_null(sym_l, sym_r);
169
170 if (sym_l == sym_r)
171 return 0;
172
173 ip_l = sym_l->start;
174 ip_r = sym_r->start;
175
176 return (int64_t)(ip_r - ip_l);
177}
178
179static int64_t
180sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
181{
182 if (!left->ms.sym && !right->ms.sym)
183 return right->level - left->level;
184
185 return _sort__sym_cmp(left->ms.sym, right->ms.sym);
186}
187
172static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, 188static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
173 u64 ip, char level, char *bf, size_t size, 189 u64 ip, char level, char *bf, size_t size,
174 unsigned int width __maybe_unused) 190 unsigned int width)
175{ 191{
176 size_t ret = 0; 192 size_t ret = 0;
177 193
@@ -197,43 +213,13 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
197 return ret; 213 return ret;
198} 214}
199 215
200
201struct sort_entry sort_dso = {
202 .se_header = "Shared Object",
203 .se_cmp = sort__dso_cmp,
204 .se_snprintf = hist_entry__dso_snprintf,
205 .se_width_idx = HISTC_DSO,
206};
207
208static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, 216static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
209 size_t size, 217 size_t size, unsigned int width)
210 unsigned int width __maybe_unused)
211{ 218{
212 return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip, 219 return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip,
213 self->level, bf, size, width); 220 self->level, bf, size, width);
214} 221}
215 222
216/* --sort symbol */
217static int64_t
218sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
219{
220 u64 ip_l, ip_r;
221
222 if (!left->ms.sym && !right->ms.sym)
223 return right->level - left->level;
224
225 if (!left->ms.sym || !right->ms.sym)
226 return cmp_null(left->ms.sym, right->ms.sym);
227
228 if (left->ms.sym == right->ms.sym)
229 return 0;
230
231 ip_l = left->ms.sym->start;
232 ip_r = right->ms.sym->start;
233
234 return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r);
235}
236
237struct sort_entry sort_sym = { 223struct sort_entry sort_sym = {
238 .se_header = "Symbol", 224 .se_header = "Symbol",
239 .se_cmp = sort__sym_cmp, 225 .se_cmp = sort__sym_cmp,
@@ -253,7 +239,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
253 size_t size, 239 size_t size,
254 unsigned int width __maybe_unused) 240 unsigned int width __maybe_unused)
255{ 241{
256 FILE *fp; 242 FILE *fp = NULL;
257 char cmd[PATH_MAX + 2], *path = self->srcline, *nl; 243 char cmd[PATH_MAX + 2], *path = self->srcline, *nl;
258 size_t line_len; 244 size_t line_len;
259 245
@@ -274,7 +260,6 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
274 260
275 if (getline(&path, &line_len, fp) < 0 || !line_len) 261 if (getline(&path, &line_len, fp) < 0 || !line_len)
276 goto out_ip; 262 goto out_ip;
277 fclose(fp);
278 self->srcline = strdup(path); 263 self->srcline = strdup(path);
279 if (self->srcline == NULL) 264 if (self->srcline == NULL)
280 goto out_ip; 265 goto out_ip;
@@ -284,8 +269,12 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
284 *nl = '\0'; 269 *nl = '\0';
285 path = self->srcline; 270 path = self->srcline;
286out_path: 271out_path:
272 if (fp)
273 pclose(fp);
287 return repsep_snprintf(bf, size, "%s", path); 274 return repsep_snprintf(bf, size, "%s", path);
288out_ip: 275out_ip:
276 if (fp)
277 pclose(fp);
289 return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip); 278 return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip);
290} 279}
291 280
@@ -335,7 +324,7 @@ sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
335static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf, 324static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
336 size_t size, unsigned int width) 325 size_t size, unsigned int width)
337{ 326{
338 return repsep_snprintf(bf, size, "%-*d", width, self->cpu); 327 return repsep_snprintf(bf, size, "%*d", width, self->cpu);
339} 328}
340 329
341struct sort_entry sort_cpu = { 330struct sort_entry sort_cpu = {
@@ -345,6 +334,8 @@ struct sort_entry sort_cpu = {
345 .se_width_idx = HISTC_CPU, 334 .se_width_idx = HISTC_CPU,
346}; 335};
347 336
337/* sort keys for branch stacks */
338
348static int64_t 339static int64_t
349sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right) 340sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
350{ 341{
@@ -359,13 +350,6 @@ static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf,
359 bf, size, width); 350 bf, size, width);
360} 351}
361 352
362struct sort_entry sort_dso_from = {
363 .se_header = "Source Shared Object",
364 .se_cmp = sort__dso_from_cmp,
365 .se_snprintf = hist_entry__dso_from_snprintf,
366 .se_width_idx = HISTC_DSO_FROM,
367};
368
369static int64_t 353static int64_t
370sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) 354sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
371{ 355{
@@ -389,8 +373,7 @@ sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
389 if (!from_l->sym && !from_r->sym) 373 if (!from_l->sym && !from_r->sym)
390 return right->level - left->level; 374 return right->level - left->level;
391 375
392 return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr, 376 return _sort__sym_cmp(from_l->sym, from_r->sym);
393 from_r->addr);
394} 377}
395 378
396static int64_t 379static int64_t
@@ -402,12 +385,11 @@ sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
402 if (!to_l->sym && !to_r->sym) 385 if (!to_l->sym && !to_r->sym)
403 return right->level - left->level; 386 return right->level - left->level;
404 387
405 return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr); 388 return _sort__sym_cmp(to_l->sym, to_r->sym);
406} 389}
407 390
408static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, 391static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf,
409 size_t size, 392 size_t size, unsigned int width)
410 unsigned int width __maybe_unused)
411{ 393{
412 struct addr_map_symbol *from = &self->branch_info->from; 394 struct addr_map_symbol *from = &self->branch_info->from;
413 return _hist_entry__sym_snprintf(from->map, from->sym, from->addr, 395 return _hist_entry__sym_snprintf(from->map, from->sym, from->addr,
@@ -416,8 +398,7 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf,
416} 398}
417 399
418static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, 400static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf,
419 size_t size, 401 size_t size, unsigned int width)
420 unsigned int width __maybe_unused)
421{ 402{
422 struct addr_map_symbol *to = &self->branch_info->to; 403 struct addr_map_symbol *to = &self->branch_info->to;
423 return _hist_entry__sym_snprintf(to->map, to->sym, to->addr, 404 return _hist_entry__sym_snprintf(to->map, to->sym, to->addr,
@@ -425,6 +406,13 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf,
425 406
426} 407}
427 408
409struct sort_entry sort_dso_from = {
410 .se_header = "Source Shared Object",
411 .se_cmp = sort__dso_from_cmp,
412 .se_snprintf = hist_entry__dso_from_snprintf,
413 .se_width_idx = HISTC_DSO_FROM,
414};
415
428struct sort_entry sort_dso_to = { 416struct sort_entry sort_dso_to = {
429 .se_header = "Target Shared Object", 417 .se_header = "Target Shared Object",
430 .se_cmp = sort__dso_to_cmp, 418 .se_cmp = sort__dso_to_cmp,
@@ -484,30 +472,40 @@ struct sort_dimension {
484 472
485#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } 473#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
486 474
487static struct sort_dimension sort_dimensions[] = { 475static struct sort_dimension common_sort_dimensions[] = {
488 DIM(SORT_PID, "pid", sort_thread), 476 DIM(SORT_PID, "pid", sort_thread),
489 DIM(SORT_COMM, "comm", sort_comm), 477 DIM(SORT_COMM, "comm", sort_comm),
490 DIM(SORT_DSO, "dso", sort_dso), 478 DIM(SORT_DSO, "dso", sort_dso),
491 DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
492 DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
493 DIM(SORT_SYM, "symbol", sort_sym), 479 DIM(SORT_SYM, "symbol", sort_sym),
494 DIM(SORT_SYM_TO, "symbol_from", sort_sym_from),
495 DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to),
496 DIM(SORT_PARENT, "parent", sort_parent), 480 DIM(SORT_PARENT, "parent", sort_parent),
497 DIM(SORT_CPU, "cpu", sort_cpu), 481 DIM(SORT_CPU, "cpu", sort_cpu),
498 DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
499 DIM(SORT_SRCLINE, "srcline", sort_srcline), 482 DIM(SORT_SRCLINE, "srcline", sort_srcline),
500}; 483};
501 484
485#undef DIM
486
487#define DIM(d, n, func) [d - __SORT_BRANCH_STACK] = { .name = n, .entry = &(func) }
488
489static struct sort_dimension bstack_sort_dimensions[] = {
490 DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
491 DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
492 DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from),
493 DIM(SORT_SYM_TO, "symbol_to", sort_sym_to),
494 DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
495};
496
497#undef DIM
498
502int sort_dimension__add(const char *tok) 499int sort_dimension__add(const char *tok)
503{ 500{
504 unsigned int i; 501 unsigned int i;
505 502
506 for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { 503 for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
507 struct sort_dimension *sd = &sort_dimensions[i]; 504 struct sort_dimension *sd = &common_sort_dimensions[i];
508 505
509 if (strncasecmp(tok, sd->name, strlen(tok))) 506 if (strncasecmp(tok, sd->name, strlen(tok)))
510 continue; 507 continue;
508
511 if (sd->entry == &sort_parent) { 509 if (sd->entry == &sort_parent) {
512 int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); 510 int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
513 if (ret) { 511 if (ret) {
@@ -518,9 +516,7 @@ int sort_dimension__add(const char *tok)
518 return -EINVAL; 516 return -EINVAL;
519 } 517 }
520 sort__has_parent = 1; 518 sort__has_parent = 1;
521 } else if (sd->entry == &sort_sym || 519 } else if (sd->entry == &sort_sym) {
522 sd->entry == &sort_sym_from ||
523 sd->entry == &sort_sym_to) {
524 sort__has_sym = 1; 520 sort__has_sym = 1;
525 } 521 }
526 522
@@ -530,52 +526,69 @@ int sort_dimension__add(const char *tok)
530 if (sd->entry->se_collapse) 526 if (sd->entry->se_collapse)
531 sort__need_collapse = 1; 527 sort__need_collapse = 1;
532 528
533 if (list_empty(&hist_entry__sort_list)) { 529 if (list_empty(&hist_entry__sort_list))
534 if (!strcmp(sd->name, "pid")) 530 sort__first_dimension = i;
535 sort__first_dimension = SORT_PID;
536 else if (!strcmp(sd->name, "comm"))
537 sort__first_dimension = SORT_COMM;
538 else if (!strcmp(sd->name, "dso"))
539 sort__first_dimension = SORT_DSO;
540 else if (!strcmp(sd->name, "symbol"))
541 sort__first_dimension = SORT_SYM;
542 else if (!strcmp(sd->name, "parent"))
543 sort__first_dimension = SORT_PARENT;
544 else if (!strcmp(sd->name, "cpu"))
545 sort__first_dimension = SORT_CPU;
546 else if (!strcmp(sd->name, "symbol_from"))
547 sort__first_dimension = SORT_SYM_FROM;
548 else if (!strcmp(sd->name, "symbol_to"))
549 sort__first_dimension = SORT_SYM_TO;
550 else if (!strcmp(sd->name, "dso_from"))
551 sort__first_dimension = SORT_DSO_FROM;
552 else if (!strcmp(sd->name, "dso_to"))
553 sort__first_dimension = SORT_DSO_TO;
554 else if (!strcmp(sd->name, "mispredict"))
555 sort__first_dimension = SORT_MISPREDICT;
556 }
557 531
558 list_add_tail(&sd->entry->list, &hist_entry__sort_list); 532 list_add_tail(&sd->entry->list, &hist_entry__sort_list);
559 sd->taken = 1; 533 sd->taken = 1;
560 534
561 return 0; 535 return 0;
562 } 536 }
537
538 for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
539 struct sort_dimension *sd = &bstack_sort_dimensions[i];
540
541 if (strncasecmp(tok, sd->name, strlen(tok)))
542 continue;
543
544 if (sort__branch_mode != 1)
545 return -EINVAL;
546
547 if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
548 sort__has_sym = 1;
549
550 if (sd->taken)
551 return 0;
552
553 if (sd->entry->se_collapse)
554 sort__need_collapse = 1;
555
556 if (list_empty(&hist_entry__sort_list))
557 sort__first_dimension = i + __SORT_BRANCH_STACK;
558
559 list_add_tail(&sd->entry->list, &hist_entry__sort_list);
560 sd->taken = 1;
561
562 return 0;
563 }
564
563 return -ESRCH; 565 return -ESRCH;
564} 566}
565 567
566void setup_sorting(const char * const usagestr[], const struct option *opts) 568int setup_sorting(void)
567{ 569{
568 char *tmp, *tok, *str = strdup(sort_order); 570 char *tmp, *tok, *str = strdup(sort_order);
571 int ret = 0;
572
573 if (str == NULL) {
574 error("Not enough memory to setup sort keys");
575 return -ENOMEM;
576 }
569 577
570 for (tok = strtok_r(str, ", ", &tmp); 578 for (tok = strtok_r(str, ", ", &tmp);
571 tok; tok = strtok_r(NULL, ", ", &tmp)) { 579 tok; tok = strtok_r(NULL, ", ", &tmp)) {
572 if (sort_dimension__add(tok) < 0) { 580 ret = sort_dimension__add(tok);
581 if (ret == -EINVAL) {
582 error("Invalid --sort key: `%s'", tok);
583 break;
584 } else if (ret == -ESRCH) {
573 error("Unknown --sort key: `%s'", tok); 585 error("Unknown --sort key: `%s'", tok);
574 usage_with_options(usagestr, opts); 586 break;
575 } 587 }
576 } 588 }
577 589
578 free(str); 590 free(str);
591 return ret;
579} 592}
580 593
581void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list, 594void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index b4e8c3ba559d..b13e56f6ccbe 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -55,9 +55,6 @@ struct he_stat {
55struct hist_entry_diff { 55struct hist_entry_diff {
56 bool computed; 56 bool computed;
57 57
58 /* PERF_HPP__DISPL */
59 int displacement;
60
61 /* PERF_HPP__DELTA */ 58 /* PERF_HPP__DELTA */
62 double period_ratio_delta; 59 double period_ratio_delta;
63 60
@@ -118,25 +115,29 @@ static inline struct hist_entry *hist_entry__next_pair(struct hist_entry *he)
118 return NULL; 115 return NULL;
119} 116}
120 117
121static inline void hist__entry_add_pair(struct hist_entry *he, 118static inline void hist_entry__add_pair(struct hist_entry *he,
122 struct hist_entry *pair) 119 struct hist_entry *pair)
123{ 120{
124 list_add_tail(&he->pairs.head, &pair->pairs.node); 121 list_add_tail(&he->pairs.head, &pair->pairs.node);
125} 122}
126 123
127enum sort_type { 124enum sort_type {
125 /* common sort keys */
128 SORT_PID, 126 SORT_PID,
129 SORT_COMM, 127 SORT_COMM,
130 SORT_DSO, 128 SORT_DSO,
131 SORT_SYM, 129 SORT_SYM,
132 SORT_PARENT, 130 SORT_PARENT,
133 SORT_CPU, 131 SORT_CPU,
134 SORT_DSO_FROM, 132 SORT_SRCLINE,
133
134 /* branch stack specific sort keys */
135 __SORT_BRANCH_STACK,
136 SORT_DSO_FROM = __SORT_BRANCH_STACK,
135 SORT_DSO_TO, 137 SORT_DSO_TO,
136 SORT_SYM_FROM, 138 SORT_SYM_FROM,
137 SORT_SYM_TO, 139 SORT_SYM_TO,
138 SORT_MISPREDICT, 140 SORT_MISPREDICT,
139 SORT_SRCLINE,
140}; 141};
141 142
142/* 143/*
@@ -159,7 +160,7 @@ struct sort_entry {
159extern struct sort_entry sort_thread; 160extern struct sort_entry sort_thread;
160extern struct list_head hist_entry__sort_list; 161extern struct list_head hist_entry__sort_list;
161 162
162void setup_sorting(const char * const usagestr[], const struct option *opts); 163int setup_sorting(void);
163extern int sort_dimension__add(const char *); 164extern int sort_dimension__add(const char *);
164void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list, 165void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
165 const char *list_name, FILE *fp); 166 const char *list_name, FILE *fp);
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 346707df04b9..29c7b2cb2521 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -332,6 +332,24 @@ char *strxfrchar(char *s, char from, char to)
332} 332}
333 333
334/** 334/**
335 * ltrim - Removes leading whitespace from @s.
336 * @s: The string to be stripped.
337 *
338 * Return pointer to the first non-whitespace character in @s.
339 */
340char *ltrim(char *s)
341{
342 int len = strlen(s);
343
344 while (len && isspace(*s)) {
345 len--;
346 s++;
347 }
348
349 return s;
350}
351
352/**
335 * rtrim - Removes trailing whitespace from @s. 353 * rtrim - Removes trailing whitespace from @s.
336 * @s: The string to be stripped. 354 * @s: The string to be stripped.
337 * 355 *
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
index 155d8b7078a7..55433aa42c8f 100644
--- a/tools/perf/util/strlist.c
+++ b/tools/perf/util/strlist.c
@@ -35,11 +35,11 @@ out_delete:
35 return NULL; 35 return NULL;
36} 36}
37 37
38static void str_node__delete(struct str_node *self, bool dupstr) 38static void str_node__delete(struct str_node *snode, bool dupstr)
39{ 39{
40 if (dupstr) 40 if (dupstr)
41 free((void *)self->s); 41 free((void *)snode->s);
42 free(self); 42 free(snode);
43} 43}
44 44
45static 45static
@@ -59,12 +59,12 @@ static int strlist__node_cmp(struct rb_node *rb_node, const void *entry)
59 return strcmp(snode->s, str); 59 return strcmp(snode->s, str);
60} 60}
61 61
62int strlist__add(struct strlist *self, const char *new_entry) 62int strlist__add(struct strlist *slist, const char *new_entry)
63{ 63{
64 return rblist__add_node(&self->rblist, new_entry); 64 return rblist__add_node(&slist->rblist, new_entry);
65} 65}
66 66
67int strlist__load(struct strlist *self, const char *filename) 67int strlist__load(struct strlist *slist, const char *filename)
68{ 68{
69 char entry[1024]; 69 char entry[1024];
70 int err; 70 int err;
@@ -80,7 +80,7 @@ int strlist__load(struct strlist *self, const char *filename)
80 continue; 80 continue;
81 entry[len - 1] = '\0'; 81 entry[len - 1] = '\0';
82 82
83 err = strlist__add(self, entry); 83 err = strlist__add(slist, entry);
84 if (err != 0) 84 if (err != 0)
85 goto out; 85 goto out;
86 } 86 }
@@ -107,56 +107,56 @@ struct str_node *strlist__find(struct strlist *slist, const char *entry)
107 return snode; 107 return snode;
108} 108}
109 109
110static int strlist__parse_list_entry(struct strlist *self, const char *s) 110static int strlist__parse_list_entry(struct strlist *slist, const char *s)
111{ 111{
112 if (strncmp(s, "file://", 7) == 0) 112 if (strncmp(s, "file://", 7) == 0)
113 return strlist__load(self, s + 7); 113 return strlist__load(slist, s + 7);
114 114
115 return strlist__add(self, s); 115 return strlist__add(slist, s);
116} 116}
117 117
118int strlist__parse_list(struct strlist *self, const char *s) 118int strlist__parse_list(struct strlist *slist, const char *s)
119{ 119{
120 char *sep; 120 char *sep;
121 int err; 121 int err;
122 122
123 while ((sep = strchr(s, ',')) != NULL) { 123 while ((sep = strchr(s, ',')) != NULL) {
124 *sep = '\0'; 124 *sep = '\0';
125 err = strlist__parse_list_entry(self, s); 125 err = strlist__parse_list_entry(slist, s);
126 *sep = ','; 126 *sep = ',';
127 if (err != 0) 127 if (err != 0)
128 return err; 128 return err;
129 s = sep + 1; 129 s = sep + 1;
130 } 130 }
131 131
132 return *s ? strlist__parse_list_entry(self, s) : 0; 132 return *s ? strlist__parse_list_entry(slist, s) : 0;
133} 133}
134 134
135struct strlist *strlist__new(bool dupstr, const char *slist) 135struct strlist *strlist__new(bool dupstr, const char *list)
136{ 136{
137 struct strlist *self = malloc(sizeof(*self)); 137 struct strlist *slist = malloc(sizeof(*slist));
138 138
139 if (self != NULL) { 139 if (slist != NULL) {
140 rblist__init(&self->rblist); 140 rblist__init(&slist->rblist);
141 self->rblist.node_cmp = strlist__node_cmp; 141 slist->rblist.node_cmp = strlist__node_cmp;
142 self->rblist.node_new = strlist__node_new; 142 slist->rblist.node_new = strlist__node_new;
143 self->rblist.node_delete = strlist__node_delete; 143 slist->rblist.node_delete = strlist__node_delete;
144 144
145 self->dupstr = dupstr; 145 slist->dupstr = dupstr;
146 if (slist && strlist__parse_list(self, slist) != 0) 146 if (slist && strlist__parse_list(slist, list) != 0)
147 goto out_error; 147 goto out_error;
148 } 148 }
149 149
150 return self; 150 return slist;
151out_error: 151out_error:
152 free(self); 152 free(slist);
153 return NULL; 153 return NULL;
154} 154}
155 155
156void strlist__delete(struct strlist *self) 156void strlist__delete(struct strlist *slist)
157{ 157{
158 if (self != NULL) 158 if (slist != NULL)
159 rblist__delete(&self->rblist); 159 rblist__delete(&slist->rblist);
160} 160}
161 161
162struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx) 162struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx)
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
index dd9f922ec67c..5c7f87069d9c 100644
--- a/tools/perf/util/strlist.h
+++ b/tools/perf/util/strlist.h
@@ -17,34 +17,34 @@ struct strlist {
17}; 17};
18 18
19struct strlist *strlist__new(bool dupstr, const char *slist); 19struct strlist *strlist__new(bool dupstr, const char *slist);
20void strlist__delete(struct strlist *self); 20void strlist__delete(struct strlist *slist);
21 21
22void strlist__remove(struct strlist *self, struct str_node *sn); 22void strlist__remove(struct strlist *slist, struct str_node *sn);
23int strlist__load(struct strlist *self, const char *filename); 23int strlist__load(struct strlist *slist, const char *filename);
24int strlist__add(struct strlist *self, const char *str); 24int strlist__add(struct strlist *slist, const char *str);
25 25
26struct str_node *strlist__entry(const struct strlist *self, unsigned int idx); 26struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx);
27struct str_node *strlist__find(struct strlist *self, const char *entry); 27struct str_node *strlist__find(struct strlist *slist, const char *entry);
28 28
29static inline bool strlist__has_entry(struct strlist *self, const char *entry) 29static inline bool strlist__has_entry(struct strlist *slist, const char *entry)
30{ 30{
31 return strlist__find(self, entry) != NULL; 31 return strlist__find(slist, entry) != NULL;
32} 32}
33 33
34static inline bool strlist__empty(const struct strlist *self) 34static inline bool strlist__empty(const struct strlist *slist)
35{ 35{
36 return rblist__empty(&self->rblist); 36 return rblist__empty(&slist->rblist);
37} 37}
38 38
39static inline unsigned int strlist__nr_entries(const struct strlist *self) 39static inline unsigned int strlist__nr_entries(const struct strlist *slist)
40{ 40{
41 return rblist__nr_entries(&self->rblist); 41 return rblist__nr_entries(&slist->rblist);
42} 42}
43 43
44/* For strlist iteration */ 44/* For strlist iteration */
45static inline struct str_node *strlist__first(struct strlist *self) 45static inline struct str_node *strlist__first(struct strlist *slist)
46{ 46{
47 struct rb_node *rn = rb_first(&self->rblist.entries); 47 struct rb_node *rn = rb_first(&slist->rblist.entries);
48 return rn ? rb_entry(rn, struct str_node, rb_node) : NULL; 48 return rn ? rb_entry(rn, struct str_node, rb_node) : NULL;
49} 49}
50static inline struct str_node *strlist__next(struct str_node *sn) 50static inline struct str_node *strlist__next(struct str_node *sn)
@@ -59,21 +59,21 @@ static inline struct str_node *strlist__next(struct str_node *sn)
59/** 59/**
60 * strlist_for_each - iterate over a strlist 60 * strlist_for_each - iterate over a strlist
61 * @pos: the &struct str_node to use as a loop cursor. 61 * @pos: the &struct str_node to use as a loop cursor.
62 * @self: the &struct strlist for loop. 62 * @slist: the &struct strlist for loop.
63 */ 63 */
64#define strlist__for_each(pos, self) \ 64#define strlist__for_each(pos, slist) \
65 for (pos = strlist__first(self); pos; pos = strlist__next(pos)) 65 for (pos = strlist__first(slist); pos; pos = strlist__next(pos))
66 66
67/** 67/**
68 * strlist_for_each_safe - iterate over a strlist safe against removal of 68 * strlist_for_each_safe - iterate over a strlist safe against removal of
69 * str_node 69 * str_node
70 * @pos: the &struct str_node to use as a loop cursor. 70 * @pos: the &struct str_node to use as a loop cursor.
71 * @n: another &struct str_node to use as temporary storage. 71 * @n: another &struct str_node to use as temporary storage.
72 * @self: the &struct strlist for loop. 72 * @slist: the &struct strlist for loop.
73 */ 73 */
74#define strlist__for_each_safe(pos, n, self) \ 74#define strlist__for_each_safe(pos, n, slist) \
75 for (pos = strlist__first(self), n = strlist__next(pos); pos;\ 75 for (pos = strlist__first(slist), n = strlist__next(pos); pos;\
76 pos = n, n = strlist__next(n)) 76 pos = n, n = strlist__next(n))
77 77
78int strlist__parse_list(struct strlist *self, const char *s); 78int strlist__parse_list(struct strlist *slist, const char *s);
79#endif /* __PERF_STRLIST_H */ 79#endif /* __PERF_STRLIST_H */
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index db0cc92cf2ea..54efcb5659ac 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1,6 +1,3 @@
1#include <libelf.h>
2#include <gelf.h>
3#include <elf.h>
4#include <fcntl.h> 1#include <fcntl.h>
5#include <stdio.h> 2#include <stdio.h>
6#include <errno.h> 3#include <errno.h>
@@ -718,6 +715,17 @@ int dso__load_sym(struct dso *dso, struct map *map,
718 sym.st_value); 715 sym.st_value);
719 used_opd = true; 716 used_opd = true;
720 } 717 }
718 /*
719 * When loading symbols in a data mapping, ABS symbols (which
720 * has a value of SHN_ABS in its st_shndx) failed at
721 * elf_getscn(). And it marks the loading as a failure so
722 * already loaded symbols cannot be fixed up.
723 *
724 * I'm not sure what should be done. Just ignore them for now.
725 * - Namhyung Kim
726 */
727 if (sym.st_shndx == SHN_ABS)
728 continue;
721 729
722 sec = elf_getscn(runtime_ss->elf, sym.st_shndx); 730 sec = elf_getscn(runtime_ss->elf, sym.st_shndx);
723 if (!sec) 731 if (!sec)
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index 259f8f2ea9c9..a7390cde63bc 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -1,6 +1,5 @@
1#include "symbol.h" 1#include "symbol.h"
2 2
3#include <elf.h>
4#include <stdio.h> 3#include <stdio.h>
5#include <fcntl.h> 4#include <fcntl.h>
6#include <string.h> 5#include <string.h>
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 295f8d4feedf..e6432d85b43d 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -28,8 +28,8 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
28 symbol_filter_t filter); 28 symbol_filter_t filter);
29static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map, 29static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
30 symbol_filter_t filter); 30 symbol_filter_t filter);
31static int vmlinux_path__nr_entries; 31int vmlinux_path__nr_entries;
32static char **vmlinux_path; 32char **vmlinux_path;
33 33
34struct symbol_conf symbol_conf = { 34struct symbol_conf symbol_conf = {
35 .exclude_other = true, 35 .exclude_other = true,
@@ -202,13 +202,6 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
202 curr->end = ~0ULL; 202 curr->end = ~0ULL;
203} 203}
204 204
205static void map_groups__fixup_end(struct map_groups *mg)
206{
207 int i;
208 for (i = 0; i < MAP__NR_TYPES; ++i)
209 __map_groups__fixup_end(mg, i);
210}
211
212struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) 205struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
213{ 206{
214 size_t namelen = strlen(name) + 1; 207 size_t namelen = strlen(name) + 1;
@@ -652,8 +645,8 @@ discard_symbol: rb_erase(&pos->rb_node, root);
652 return count + moved; 645 return count + moved;
653} 646}
654 647
655static bool symbol__restricted_filename(const char *filename, 648bool symbol__restricted_filename(const char *filename,
656 const char *restricted_filename) 649 const char *restricted_filename)
657{ 650{
658 bool restricted = false; 651 bool restricted = false;
659 652
@@ -775,10 +768,6 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
775 else 768 else
776 machine = NULL; 769 machine = NULL;
777 770
778 name = malloc(PATH_MAX);
779 if (!name)
780 return -1;
781
782 dso->adjust_symbols = 0; 771 dso->adjust_symbols = 0;
783 772
784 if (strncmp(dso->name, "/tmp/perf-", 10) == 0) { 773 if (strncmp(dso->name, "/tmp/perf-", 10) == 0) {
@@ -802,6 +791,10 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
802 if (machine) 791 if (machine)
803 root_dir = machine->root_dir; 792 root_dir = machine->root_dir;
804 793
794 name = malloc(PATH_MAX);
795 if (!name)
796 return -1;
797
805 /* Iterate over candidate debug images. 798 /* Iterate over candidate debug images.
806 * Keep track of "interesting" ones (those which have a symtab, dynsym, 799 * Keep track of "interesting" ones (those which have a symtab, dynsym,
807 * and/or opd section) for processing. 800 * and/or opd section) for processing.
@@ -887,200 +880,6 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
887 return NULL; 880 return NULL;
888} 881}
889 882
890static int map_groups__set_modules_path_dir(struct map_groups *mg,
891 const char *dir_name)
892{
893 struct dirent *dent;
894 DIR *dir = opendir(dir_name);
895 int ret = 0;
896
897 if (!dir) {
898 pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
899 return -1;
900 }
901
902 while ((dent = readdir(dir)) != NULL) {
903 char path[PATH_MAX];
904 struct stat st;
905
906 /*sshfs might return bad dent->d_type, so we have to stat*/
907 snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
908 if (stat(path, &st))
909 continue;
910
911 if (S_ISDIR(st.st_mode)) {
912 if (!strcmp(dent->d_name, ".") ||
913 !strcmp(dent->d_name, ".."))
914 continue;
915
916 ret = map_groups__set_modules_path_dir(mg, path);
917 if (ret < 0)
918 goto out;
919 } else {
920 char *dot = strrchr(dent->d_name, '.'),
921 dso_name[PATH_MAX];
922 struct map *map;
923 char *long_name;
924
925 if (dot == NULL || strcmp(dot, ".ko"))
926 continue;
927 snprintf(dso_name, sizeof(dso_name), "[%.*s]",
928 (int)(dot - dent->d_name), dent->d_name);
929
930 strxfrchar(dso_name, '-', '_');
931 map = map_groups__find_by_name(mg, MAP__FUNCTION,
932 dso_name);
933 if (map == NULL)
934 continue;
935
936 long_name = strdup(path);
937 if (long_name == NULL) {
938 ret = -1;
939 goto out;
940 }
941 dso__set_long_name(map->dso, long_name);
942 map->dso->lname_alloc = 1;
943 dso__kernel_module_get_build_id(map->dso, "");
944 }
945 }
946
947out:
948 closedir(dir);
949 return ret;
950}
951
952static char *get_kernel_version(const char *root_dir)
953{
954 char version[PATH_MAX];
955 FILE *file;
956 char *name, *tmp;
957 const char *prefix = "Linux version ";
958
959 sprintf(version, "%s/proc/version", root_dir);
960 file = fopen(version, "r");
961 if (!file)
962 return NULL;
963
964 version[0] = '\0';
965 tmp = fgets(version, sizeof(version), file);
966 fclose(file);
967
968 name = strstr(version, prefix);
969 if (!name)
970 return NULL;
971 name += strlen(prefix);
972 tmp = strchr(name, ' ');
973 if (tmp)
974 *tmp = '\0';
975
976 return strdup(name);
977}
978
979static int machine__set_modules_path(struct machine *machine)
980{
981 char *version;
982 char modules_path[PATH_MAX];
983
984 version = get_kernel_version(machine->root_dir);
985 if (!version)
986 return -1;
987
988 snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s/kernel",
989 machine->root_dir, version);
990 free(version);
991
992 return map_groups__set_modules_path_dir(&machine->kmaps, modules_path);
993}
994
995struct map *machine__new_module(struct machine *machine, u64 start,
996 const char *filename)
997{
998 struct map *map;
999 struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename);
1000
1001 if (dso == NULL)
1002 return NULL;
1003
1004 map = map__new2(start, dso, MAP__FUNCTION);
1005 if (map == NULL)
1006 return NULL;
1007
1008 if (machine__is_host(machine))
1009 dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
1010 else
1011 dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
1012 map_groups__insert(&machine->kmaps, map);
1013 return map;
1014}
1015
1016static int machine__create_modules(struct machine *machine)
1017{
1018 char *line = NULL;
1019 size_t n;
1020 FILE *file;
1021 struct map *map;
1022 const char *modules;
1023 char path[PATH_MAX];
1024
1025 if (machine__is_default_guest(machine))
1026 modules = symbol_conf.default_guest_modules;
1027 else {
1028 sprintf(path, "%s/proc/modules", machine->root_dir);
1029 modules = path;
1030 }
1031
1032 if (symbol__restricted_filename(path, "/proc/modules"))
1033 return -1;
1034
1035 file = fopen(modules, "r");
1036 if (file == NULL)
1037 return -1;
1038
1039 while (!feof(file)) {
1040 char name[PATH_MAX];
1041 u64 start;
1042 char *sep;
1043 int line_len;
1044
1045 line_len = getline(&line, &n, file);
1046 if (line_len < 0)
1047 break;
1048
1049 if (!line)
1050 goto out_failure;
1051
1052 line[--line_len] = '\0'; /* \n */
1053
1054 sep = strrchr(line, 'x');
1055 if (sep == NULL)
1056 continue;
1057
1058 hex2u64(sep + 1, &start);
1059
1060 sep = strchr(line, ' ');
1061 if (sep == NULL)
1062 continue;
1063
1064 *sep = '\0';
1065
1066 snprintf(name, sizeof(name), "[%s]", line);
1067 map = machine__new_module(machine, start, name);
1068 if (map == NULL)
1069 goto out_delete_line;
1070 dso__kernel_module_get_build_id(map->dso, machine->root_dir);
1071 }
1072
1073 free(line);
1074 fclose(file);
1075
1076 return machine__set_modules_path(machine);
1077
1078out_delete_line:
1079 free(line);
1080out_failure:
1081 return -1;
1082}
1083
1084int dso__load_vmlinux(struct dso *dso, struct map *map, 883int dso__load_vmlinux(struct dso *dso, struct map *map,
1085 const char *vmlinux, symbol_filter_t filter) 884 const char *vmlinux, symbol_filter_t filter)
1086{ 885{
@@ -1124,8 +923,10 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map,
1124 filename = dso__build_id_filename(dso, NULL, 0); 923 filename = dso__build_id_filename(dso, NULL, 0);
1125 if (filename != NULL) { 924 if (filename != NULL) {
1126 err = dso__load_vmlinux(dso, map, filename, filter); 925 err = dso__load_vmlinux(dso, map, filename, filter);
1127 if (err > 0) 926 if (err > 0) {
927 dso->lname_alloc = 1;
1128 goto out; 928 goto out;
929 }
1129 free(filename); 930 free(filename);
1130 } 931 }
1131 932
@@ -1133,6 +934,7 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map,
1133 err = dso__load_vmlinux(dso, map, vmlinux_path[i], filter); 934 err = dso__load_vmlinux(dso, map, vmlinux_path[i], filter);
1134 if (err > 0) { 935 if (err > 0) {
1135 dso__set_long_name(dso, strdup(vmlinux_path[i])); 936 dso__set_long_name(dso, strdup(vmlinux_path[i]));
937 dso->lname_alloc = 1;
1136 break; 938 break;
1137 } 939 }
1138 } 940 }
@@ -1172,6 +974,7 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
1172 if (err > 0) { 974 if (err > 0) {
1173 dso__set_long_name(dso, 975 dso__set_long_name(dso,
1174 strdup(symbol_conf.vmlinux_name)); 976 strdup(symbol_conf.vmlinux_name));
977 dso->lname_alloc = 1;
1175 goto out_fixup; 978 goto out_fixup;
1176 } 979 }
1177 return err; 980 return err;
@@ -1300,195 +1103,6 @@ out_try_fixup:
1300 return err; 1103 return err;
1301} 1104}
1302 1105
1303size_t machines__fprintf_dsos(struct rb_root *machines, FILE *fp)
1304{
1305 struct rb_node *nd;
1306 size_t ret = 0;
1307
1308 for (nd = rb_first(machines); nd; nd = rb_next(nd)) {
1309 struct machine *pos = rb_entry(nd, struct machine, rb_node);
1310 ret += __dsos__fprintf(&pos->kernel_dsos, fp);
1311 ret += __dsos__fprintf(&pos->user_dsos, fp);
1312 }
1313
1314 return ret;
1315}
1316
1317size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
1318 bool with_hits)
1319{
1320 return __dsos__fprintf_buildid(&machine->kernel_dsos, fp, with_hits) +
1321 __dsos__fprintf_buildid(&machine->user_dsos, fp, with_hits);
1322}
1323
1324size_t machines__fprintf_dsos_buildid(struct rb_root *machines,
1325 FILE *fp, bool with_hits)
1326{
1327 struct rb_node *nd;
1328 size_t ret = 0;
1329
1330 for (nd = rb_first(machines); nd; nd = rb_next(nd)) {
1331 struct machine *pos = rb_entry(nd, struct machine, rb_node);
1332 ret += machine__fprintf_dsos_buildid(pos, fp, with_hits);
1333 }
1334 return ret;
1335}
1336
1337static struct dso *machine__get_kernel(struct machine *machine)
1338{
1339 const char *vmlinux_name = NULL;
1340 struct dso *kernel;
1341
1342 if (machine__is_host(machine)) {
1343 vmlinux_name = symbol_conf.vmlinux_name;
1344 if (!vmlinux_name)
1345 vmlinux_name = "[kernel.kallsyms]";
1346
1347 kernel = dso__kernel_findnew(machine, vmlinux_name,
1348 "[kernel]",
1349 DSO_TYPE_KERNEL);
1350 } else {
1351 char bf[PATH_MAX];
1352
1353 if (machine__is_default_guest(machine))
1354 vmlinux_name = symbol_conf.default_guest_vmlinux_name;
1355 if (!vmlinux_name)
1356 vmlinux_name = machine__mmap_name(machine, bf,
1357 sizeof(bf));
1358
1359 kernel = dso__kernel_findnew(machine, vmlinux_name,
1360 "[guest.kernel]",
1361 DSO_TYPE_GUEST_KERNEL);
1362 }
1363
1364 if (kernel != NULL && (!kernel->has_build_id))
1365 dso__read_running_kernel_build_id(kernel, machine);
1366
1367 return kernel;
1368}
1369
1370struct process_args {
1371 u64 start;
1372};
1373
1374static int symbol__in_kernel(void *arg, const char *name,
1375 char type __maybe_unused, u64 start)
1376{
1377 struct process_args *args = arg;
1378
1379 if (strchr(name, '['))
1380 return 0;
1381
1382 args->start = start;
1383 return 1;
1384}
1385
1386/* Figure out the start address of kernel map from /proc/kallsyms */
1387static u64 machine__get_kernel_start_addr(struct machine *machine)
1388{
1389 const char *filename;
1390 char path[PATH_MAX];
1391 struct process_args args;
1392
1393 if (machine__is_host(machine)) {
1394 filename = "/proc/kallsyms";
1395 } else {
1396 if (machine__is_default_guest(machine))
1397 filename = (char *)symbol_conf.default_guest_kallsyms;
1398 else {
1399 sprintf(path, "%s/proc/kallsyms", machine->root_dir);
1400 filename = path;
1401 }
1402 }
1403
1404 if (symbol__restricted_filename(filename, "/proc/kallsyms"))
1405 return 0;
1406
1407 if (kallsyms__parse(filename, &args, symbol__in_kernel) <= 0)
1408 return 0;
1409
1410 return args.start;
1411}
1412
1413int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
1414{
1415 enum map_type type;
1416 u64 start = machine__get_kernel_start_addr(machine);
1417
1418 for (type = 0; type < MAP__NR_TYPES; ++type) {
1419 struct kmap *kmap;
1420
1421 machine->vmlinux_maps[type] = map__new2(start, kernel, type);
1422 if (machine->vmlinux_maps[type] == NULL)
1423 return -1;
1424
1425 machine->vmlinux_maps[type]->map_ip =
1426 machine->vmlinux_maps[type]->unmap_ip =
1427 identity__map_ip;
1428 kmap = map__kmap(machine->vmlinux_maps[type]);
1429 kmap->kmaps = &machine->kmaps;
1430 map_groups__insert(&machine->kmaps,
1431 machine->vmlinux_maps[type]);
1432 }
1433
1434 return 0;
1435}
1436
1437void machine__destroy_kernel_maps(struct machine *machine)
1438{
1439 enum map_type type;
1440
1441 for (type = 0; type < MAP__NR_TYPES; ++type) {
1442 struct kmap *kmap;
1443
1444 if (machine->vmlinux_maps[type] == NULL)
1445 continue;
1446
1447 kmap = map__kmap(machine->vmlinux_maps[type]);
1448 map_groups__remove(&machine->kmaps,
1449 machine->vmlinux_maps[type]);
1450 if (kmap->ref_reloc_sym) {
1451 /*
1452 * ref_reloc_sym is shared among all maps, so free just
1453 * on one of them.
1454 */
1455 if (type == MAP__FUNCTION) {
1456 free((char *)kmap->ref_reloc_sym->name);
1457 kmap->ref_reloc_sym->name = NULL;
1458 free(kmap->ref_reloc_sym);
1459 }
1460 kmap->ref_reloc_sym = NULL;
1461 }
1462
1463 map__delete(machine->vmlinux_maps[type]);
1464 machine->vmlinux_maps[type] = NULL;
1465 }
1466}
1467
1468int machine__create_kernel_maps(struct machine *machine)
1469{
1470 struct dso *kernel = machine__get_kernel(machine);
1471
1472 if (kernel == NULL ||
1473 __machine__create_kernel_maps(machine, kernel) < 0)
1474 return -1;
1475
1476 if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
1477 if (machine__is_host(machine))
1478 pr_debug("Problems creating module maps, "
1479 "continuing anyway...\n");
1480 else
1481 pr_debug("Problems creating module maps for guest %d, "
1482 "continuing anyway...\n", machine->pid);
1483 }
1484
1485 /*
1486 * Now that we have all the maps created, just set the ->end of them:
1487 */
1488 map_groups__fixup_end(&machine->kmaps);
1489 return 0;
1490}
1491
1492static void vmlinux_path__exit(void) 1106static void vmlinux_path__exit(void)
1493{ 1107{
1494 while (--vmlinux_path__nr_entries >= 0) { 1108 while (--vmlinux_path__nr_entries >= 0) {
@@ -1549,25 +1163,6 @@ out_fail:
1549 return -1; 1163 return -1;
1550} 1164}
1551 1165
1552size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
1553{
1554 int i;
1555 size_t printed = 0;
1556 struct dso *kdso = machine->vmlinux_maps[MAP__FUNCTION]->dso;
1557
1558 if (kdso->has_build_id) {
1559 char filename[PATH_MAX];
1560 if (dso__build_id_filename(kdso, filename, sizeof(filename)))
1561 printed += fprintf(fp, "[0] %s\n", filename);
1562 }
1563
1564 for (i = 0; i < vmlinux_path__nr_entries; ++i)
1565 printed += fprintf(fp, "[%d] %s\n",
1566 i + kdso->has_build_id, vmlinux_path[i]);
1567
1568 return printed;
1569}
1570
1571static int setup_list(struct strlist **list, const char *list_str, 1166static int setup_list(struct strlist **list, const char *list_str,
1572 const char *list_name) 1167 const char *list_name)
1573{ 1168{
@@ -1671,108 +1266,3 @@ void symbol__exit(void)
1671 symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL; 1266 symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL;
1672 symbol_conf.initialized = false; 1267 symbol_conf.initialized = false;
1673} 1268}
1674
1675int machines__create_kernel_maps(struct rb_root *machines, pid_t pid)
1676{
1677 struct machine *machine = machines__findnew(machines, pid);
1678
1679 if (machine == NULL)
1680 return -1;
1681
1682 return machine__create_kernel_maps(machine);
1683}
1684
1685int machines__create_guest_kernel_maps(struct rb_root *machines)
1686{
1687 int ret = 0;
1688 struct dirent **namelist = NULL;
1689 int i, items = 0;
1690 char path[PATH_MAX];
1691 pid_t pid;
1692 char *endp;
1693
1694 if (symbol_conf.default_guest_vmlinux_name ||
1695 symbol_conf.default_guest_modules ||
1696 symbol_conf.default_guest_kallsyms) {
1697 machines__create_kernel_maps(machines, DEFAULT_GUEST_KERNEL_ID);
1698 }
1699
1700 if (symbol_conf.guestmount) {
1701 items = scandir(symbol_conf.guestmount, &namelist, NULL, NULL);
1702 if (items <= 0)
1703 return -ENOENT;
1704 for (i = 0; i < items; i++) {
1705 if (!isdigit(namelist[i]->d_name[0])) {
1706 /* Filter out . and .. */
1707 continue;
1708 }
1709 pid = (pid_t)strtol(namelist[i]->d_name, &endp, 10);
1710 if ((*endp != '\0') ||
1711 (endp == namelist[i]->d_name) ||
1712 (errno == ERANGE)) {
1713 pr_debug("invalid directory (%s). Skipping.\n",
1714 namelist[i]->d_name);
1715 continue;
1716 }
1717 sprintf(path, "%s/%s/proc/kallsyms",
1718 symbol_conf.guestmount,
1719 namelist[i]->d_name);
1720 ret = access(path, R_OK);
1721 if (ret) {
1722 pr_debug("Can't access file %s\n", path);
1723 goto failure;
1724 }
1725 machines__create_kernel_maps(machines, pid);
1726 }
1727failure:
1728 free(namelist);
1729 }
1730
1731 return ret;
1732}
1733
1734void machines__destroy_guest_kernel_maps(struct rb_root *machines)
1735{
1736 struct rb_node *next = rb_first(machines);
1737
1738 while (next) {
1739 struct machine *pos = rb_entry(next, struct machine, rb_node);
1740
1741 next = rb_next(&pos->rb_node);
1742 rb_erase(&pos->rb_node, machines);
1743 machine__delete(pos);
1744 }
1745}
1746
1747int machine__load_kallsyms(struct machine *machine, const char *filename,
1748 enum map_type type, symbol_filter_t filter)
1749{
1750 struct map *map = machine->vmlinux_maps[type];
1751 int ret = dso__load_kallsyms(map->dso, filename, map, filter);
1752
1753 if (ret > 0) {
1754 dso__set_loaded(map->dso, type);
1755 /*
1756 * Since /proc/kallsyms will have multiple sessions for the
1757 * kernel, with modules between them, fixup the end of all
1758 * sections.
1759 */
1760 __map_groups__fixup_end(&machine->kmaps, type);
1761 }
1762
1763 return ret;
1764}
1765
1766int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
1767 symbol_filter_t filter)
1768{
1769 struct map *map = machine->vmlinux_maps[type];
1770 int ret = dso__load_vmlinux_path(map->dso, map, filter);
1771
1772 if (ret > 0) {
1773 dso__set_loaded(map->dso, type);
1774 map__reloc_vmlinux(map);
1775 }
1776
1777 return ret;
1778}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index de68f98b236d..b62ca37c4b77 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -16,8 +16,8 @@
16#ifdef LIBELF_SUPPORT 16#ifdef LIBELF_SUPPORT
17#include <libelf.h> 17#include <libelf.h>
18#include <gelf.h> 18#include <gelf.h>
19#include <elf.h>
20#endif 19#endif
20#include <elf.h>
21 21
22#include "dso.h" 22#include "dso.h"
23 23
@@ -96,7 +96,8 @@ struct symbol_conf {
96 initialized, 96 initialized,
97 kptr_restrict, 97 kptr_restrict,
98 annotate_asm_raw, 98 annotate_asm_raw,
99 annotate_src; 99 annotate_src,
100 event_group;
100 const char *vmlinux_name, 101 const char *vmlinux_name,
101 *kallsyms_name, 102 *kallsyms_name,
102 *source_prefix, 103 *source_prefix,
@@ -120,6 +121,8 @@ struct symbol_conf {
120}; 121};
121 122
122extern struct symbol_conf symbol_conf; 123extern struct symbol_conf symbol_conf;
124extern int vmlinux_path__nr_entries;
125extern char **vmlinux_path;
123 126
124static inline void *symbol__priv(struct symbol *sym) 127static inline void *symbol__priv(struct symbol *sym)
125{ 128{
@@ -223,6 +226,8 @@ size_t symbol__fprintf_symname_offs(const struct symbol *sym,
223size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); 226size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
224size_t symbol__fprintf(struct symbol *sym, FILE *fp); 227size_t symbol__fprintf(struct symbol *sym, FILE *fp);
225bool symbol_type__is_a(char symbol_type, enum map_type map_type); 228bool symbol_type__is_a(char symbol_type, enum map_type map_type);
229bool symbol__restricted_filename(const char *filename,
230 const char *restricted_filename);
226 231
227int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, 232int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
228 struct symsrc *runtime_ss, symbol_filter_t filter, 233 struct symsrc *runtime_ss, symbol_filter_t filter,
diff --git a/tools/perf/util/sysfs.c b/tools/perf/util/sysfs.c
index 48c6902e749f..f71e9eafe15a 100644
--- a/tools/perf/util/sysfs.c
+++ b/tools/perf/util/sysfs.c
@@ -8,7 +8,7 @@ static const char * const sysfs_known_mountpoints[] = {
8}; 8};
9 9
10static int sysfs_found; 10static int sysfs_found;
11char sysfs_mountpoint[PATH_MAX]; 11char sysfs_mountpoint[PATH_MAX + 1];
12 12
13static int sysfs_valid_mountpoint(const char *sysfs) 13static int sysfs_valid_mountpoint(const char *sysfs)
14{ 14{
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index df59623ac763..632e40e5ceca 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -54,10 +54,10 @@ int thread__comm_len(struct thread *self)
54 return self->comm_len; 54 return self->comm_len;
55} 55}
56 56
57static size_t thread__fprintf(struct thread *self, FILE *fp) 57size_t thread__fprintf(struct thread *thread, FILE *fp)
58{ 58{
59 return fprintf(fp, "Thread %d %s\n", self->pid, self->comm) + 59 return fprintf(fp, "Thread %d %s\n", thread->pid, thread->comm) +
60 map_groups__fprintf(&self->mg, verbose, fp); 60 map_groups__fprintf(&thread->mg, verbose, fp);
61} 61}
62 62
63void thread__insert_map(struct thread *self, struct map *map) 63void thread__insert_map(struct thread *self, struct map *map)
@@ -84,17 +84,3 @@ int thread__fork(struct thread *self, struct thread *parent)
84 return -ENOMEM; 84 return -ENOMEM;
85 return 0; 85 return 0;
86} 86}
87
88size_t machine__fprintf(struct machine *machine, FILE *fp)
89{
90 size_t ret = 0;
91 struct rb_node *nd;
92
93 for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
94 struct thread *pos = rb_entry(nd, struct thread, rb_node);
95
96 ret += thread__fprintf(pos, fp);
97 }
98
99 return ret;
100}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index f2fa17caa7d5..5ad266403098 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -30,6 +30,7 @@ int thread__set_comm(struct thread *self, const char *comm);
30int thread__comm_len(struct thread *self); 30int thread__comm_len(struct thread *self);
31void thread__insert_map(struct thread *self, struct map *map); 31void thread__insert_map(struct thread *self, struct map *map);
32int thread__fork(struct thread *self, struct thread *parent); 32int thread__fork(struct thread *self, struct thread *parent);
33size_t thread__fprintf(struct thread *thread, FILE *fp);
33 34
34static inline struct map *thread__find_map(struct thread *self, 35static inline struct map *thread__find_map(struct thread *self,
35 enum map_type type, u64 addr) 36 enum map_type type, u64 addr)
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 884dde9b9bc1..54d37a4753c5 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -26,6 +26,8 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
26 float samples_per_sec = top->samples / top->delay_secs; 26 float samples_per_sec = top->samples / top->delay_secs;
27 float ksamples_per_sec = top->kernel_samples / top->delay_secs; 27 float ksamples_per_sec = top->kernel_samples / top->delay_secs;
28 float esamples_percent = (100.0 * top->exact_samples) / top->samples; 28 float esamples_percent = (100.0 * top->exact_samples) / top->samples;
29 struct perf_record_opts *opts = &top->record_opts;
30 struct perf_target *target = &opts->target;
29 size_t ret = 0; 31 size_t ret = 0;
30 32
31 if (!perf_guest) { 33 if (!perf_guest) {
@@ -61,31 +63,31 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
61 struct perf_evsel *first = perf_evlist__first(top->evlist); 63 struct perf_evsel *first = perf_evlist__first(top->evlist);
62 ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ", 64 ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ",
63 (uint64_t)first->attr.sample_period, 65 (uint64_t)first->attr.sample_period,
64 top->freq ? "Hz" : ""); 66 opts->freq ? "Hz" : "");
65 } 67 }
66 68
67 ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel)); 69 ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel));
68 70
69 ret += SNPRINTF(bf + ret, size - ret, "], "); 71 ret += SNPRINTF(bf + ret, size - ret, "], ");
70 72
71 if (top->target.pid) 73 if (target->pid)
72 ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s", 74 ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s",
73 top->target.pid); 75 target->pid);
74 else if (top->target.tid) 76 else if (target->tid)
75 ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s", 77 ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s",
76 top->target.tid); 78 target->tid);
77 else if (top->target.uid_str != NULL) 79 else if (target->uid_str != NULL)
78 ret += SNPRINTF(bf + ret, size - ret, " (uid: %s", 80 ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
79 top->target.uid_str); 81 target->uid_str);
80 else 82 else
81 ret += SNPRINTF(bf + ret, size - ret, " (all"); 83 ret += SNPRINTF(bf + ret, size - ret, " (all");
82 84
83 if (top->target.cpu_list) 85 if (target->cpu_list)
84 ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", 86 ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
85 top->evlist->cpus->nr > 1 ? "s" : "", 87 top->evlist->cpus->nr > 1 ? "s" : "",
86 top->target.cpu_list); 88 target->cpu_list);
87 else { 89 else {
88 if (top->target.tid) 90 if (target->tid)
89 ret += SNPRINTF(bf + ret, size - ret, ")"); 91 ret += SNPRINTF(bf + ret, size - ret, ")");
90 else 92 else
91 ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", 93 ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index 86ff1b15059b..7ebf357dc9e1 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -14,7 +14,7 @@ struct perf_session;
14struct perf_top { 14struct perf_top {
15 struct perf_tool tool; 15 struct perf_tool tool;
16 struct perf_evlist *evlist; 16 struct perf_evlist *evlist;
17 struct perf_target target; 17 struct perf_record_opts record_opts;
18 /* 18 /*
19 * Symbols will be added here in perf_event__process_sample and will 19 * Symbols will be added here in perf_event__process_sample and will
20 * get out after decayed. 20 * get out after decayed.
@@ -24,24 +24,16 @@ struct perf_top {
24 u64 exact_samples; 24 u64 exact_samples;
25 u64 guest_us_samples, guest_kernel_samples; 25 u64 guest_us_samples, guest_kernel_samples;
26 int print_entries, count_filter, delay_secs; 26 int print_entries, count_filter, delay_secs;
27 int freq;
28 bool hide_kernel_symbols, hide_user_symbols, zero; 27 bool hide_kernel_symbols, hide_user_symbols, zero;
29 bool use_tui, use_stdio; 28 bool use_tui, use_stdio;
30 bool sort_has_symbols; 29 bool sort_has_symbols;
31 bool dont_use_callchains;
32 bool kptr_restrict_warned; 30 bool kptr_restrict_warned;
33 bool vmlinux_warned; 31 bool vmlinux_warned;
34 bool inherit;
35 bool group;
36 bool sample_id_all_missing;
37 bool exclude_guest_missing;
38 bool dump_symtab; 32 bool dump_symtab;
39 struct hist_entry *sym_filter_entry; 33 struct hist_entry *sym_filter_entry;
40 struct perf_evsel *sym_evsel; 34 struct perf_evsel *sym_evsel;
41 struct perf_session *session; 35 struct perf_session *session;
42 struct winsize winsize; 36 struct winsize winsize;
43 unsigned int mmap_pages;
44 int default_interval;
45 int realtime_prio; 37 int realtime_prio;
46 int sym_pcnt_filter; 38 int sym_pcnt_filter;
47 const char *sym_filter; 39 const char *sym_filter;
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 5906e8426cc7..805d1f52c5b4 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -12,6 +12,8 @@
12 */ 12 */
13unsigned int page_size; 13unsigned int page_size;
14 14
15bool test_attr__enabled;
16
15bool perf_host = true; 17bool perf_host = true;
16bool perf_guest = false; 18bool perf_guest = false;
17 19
@@ -218,3 +220,25 @@ void dump_stack(void)
218#else 220#else
219void dump_stack(void) {} 221void dump_stack(void) {}
220#endif 222#endif
223
224void get_term_dimensions(struct winsize *ws)
225{
226 char *s = getenv("LINES");
227
228 if (s != NULL) {
229 ws->ws_row = atoi(s);
230 s = getenv("COLUMNS");
231 if (s != NULL) {
232 ws->ws_col = atoi(s);
233 if (ws->ws_row && ws->ws_col)
234 return;
235 }
236 }
237#ifdef TIOCGWINSZ
238 if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
239 ws->ws_row && ws->ws_col)
240 return;
241#endif
242 ws->ws_row = 25;
243 ws->ws_col = 80;
244}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index c2330918110c..09b4c26b71aa 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -265,10 +265,14 @@ bool is_power_of_2(unsigned long n)
265size_t hex_width(u64 v); 265size_t hex_width(u64 v);
266int hex2u64(const char *ptr, u64 *val); 266int hex2u64(const char *ptr, u64 *val);
267 267
268char *ltrim(char *s);
268char *rtrim(char *s); 269char *rtrim(char *s);
269 270
270void dump_stack(void); 271void dump_stack(void);
271 272
272extern unsigned int page_size; 273extern unsigned int page_size;
273 274
275struct winsize;
276void get_term_dimensions(struct winsize *ws);
277
274#endif 278#endif