aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorJames Morris <james.morris@microsoft.com>2018-06-25 12:20:40 -0400
committerJames Morris <james.morris@microsoft.com>2018-06-25 12:20:40 -0400
commit57b54d74dd5c559bd35f2affaf11d8828aaf5733 (patch)
tree025d99f0d6b298fbdbf3ce942ec7868131444090 /tools
parent65793f3cd93abf4ca1109f78e07c1b7193abdfec (diff)
parent7daf201d7fe8334e2d2364d4e8ed3394ec9af819 (diff)
Merge tag 'v4.18-rc2' into next-general
Merge to Linux 4.18-rc2 for security subsystem developers.
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/arm/include/uapi/asm/kvm.h6
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h6
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h21
-rw-r--r--tools/bpf/Makefile2
-rw-r--r--tools/bpf/bpf_dbg.c7
-rw-r--r--tools/bpf/bpf_exp.l2
-rw-r--r--tools/bpf/bpftool/.gitignore3
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst16
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst53
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-perf.rst81
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst3
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst7
-rw-r--r--tools/bpf/bpftool/Makefile7
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool60
-rw-r--r--tools/bpf/bpftool/cgroup.c17
-rw-r--r--tools/bpf/bpftool/common.c77
-rw-r--r--tools/bpf/bpftool/main.c3
-rw-r--r--tools/bpf/bpftool/main.h8
-rw-r--r--tools/bpf/bpftool/map.c90
-rw-r--r--tools/bpf/bpftool/map_perf_ring.c306
-rw-r--r--tools/bpf/bpftool/perf.c247
-rw-r--r--tools/bpf/bpftool/prog.c116
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c14
-rw-r--r--tools/bpf/bpftool/xlated_dumper.h3
-rw-r--r--tools/iio/iio_generic_buffer.c13
-rw-r--r--tools/include/linux/compiler-gcc.h3
-rw-r--r--tools/include/linux/filter.h10
-rw-r--r--tools/include/linux/spinlock.h3
-rw-r--r--tools/include/uapi/asm/bitsperlong.h18
-rw-r--r--tools/include/uapi/asm/errno.h18
-rw-r--r--tools/include/uapi/linux/bpf.h2220
-rw-r--r--tools/include/uapi/linux/btf.h113
-rw-r--r--tools/include/uapi/linux/erspan.h52
-rw-r--r--tools/include/uapi/linux/kvm.h7
-rw-r--r--tools/include/uapi/linux/lirc.h217
-rw-r--r--tools/include/uapi/linux/prctl.h14
-rw-r--r--tools/include/uapi/linux/seg6.h55
-rw-r--r--tools/include/uapi/linux/seg6_local.h80
-rw-r--r--tools/lib/api/fs/fs.c2
-rw-r--r--tools/lib/api/fs/tracing_path.c40
-rw-r--r--tools/lib/api/fs/tracing_path.h9
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile3
-rw-r--r--tools/lib/bpf/bpf.c127
-rw-r--r--tools/lib/bpf/bpf.h23
-rw-r--r--tools/lib/bpf/btf.c373
-rw-r--r--tools/lib/bpf/btf.h22
-rw-r--r--tools/lib/bpf/libbpf.c291
-rw-r--r--tools/lib/bpf/libbpf.h69
-rw-r--r--tools/lib/symbol/kallsyms.c6
-rw-r--r--tools/lib/symbol/kallsyms.h2
-rw-r--r--tools/memory-model/Documentation/cheatsheet.txt7
-rw-r--r--tools/memory-model/Documentation/explanation.txt221
-rw-r--r--tools/memory-model/Documentation/references.txt17
-rw-r--r--tools/memory-model/README2
-rw-r--r--tools/memory-model/linux-kernel.bell4
-rw-r--r--tools/memory-model/linux-kernel.cat41
-rw-r--r--tools/memory-model/linux-kernel.def34
-rw-r--r--tools/memory-model/litmus-tests/.gitignore1
-rw-r--r--tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus2
-rw-r--r--tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus35
-rw-r--r--tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus34
-rw-r--r--tools/memory-model/litmus-tests/README19
-rw-r--r--tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus4
-rw-r--r--tools/memory-model/lock.cat107
-rw-r--r--tools/memory-model/scripts/checkalllitmus.sh73
-rw-r--r--tools/memory-model/scripts/checklitmus.sh86
-rw-r--r--tools/objtool/arch/x86/include/asm/insn.h18
-rw-r--r--tools/objtool/check.c190
-rw-r--r--tools/objtool/elf.c42
-rw-r--r--tools/objtool/elf.h2
-rw-r--r--tools/perf/Documentation/Makefile29
-rw-r--r--tools/perf/Documentation/asciidoctor-extensions.rb29
-rw-r--r--tools/perf/Documentation/perf-buildid-cache.txt7
-rw-r--r--tools/perf/Documentation/perf-list.txt6
-rw-r--r--tools/perf/Documentation/perf-record.txt3
-rw-r--r--tools/perf/Documentation/perf-script-python.txt26
-rw-r--r--tools/perf/Documentation/perf-stat.txt59
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt10
-rw-r--r--tools/perf/Makefile.config14
-rw-r--r--tools/perf/Makefile.perf10
-rw-r--r--tools/perf/arch/arm/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/arch/arm64/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/arch/common.c4
-rw-r--r--tools/perf/arch/common.h4
-rw-r--r--tools/perf/arch/powerpc/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/arch/powerpc/util/skip-callchain-idx.c3
-rw-r--r--tools/perf/arch/x86/tests/dwarf-unwind.c2
-rw-r--r--tools/perf/arch/x86/util/Build2
-rw-r--r--tools/perf/arch/x86/util/event.c76
-rw-r--r--tools/perf/arch/x86/util/machine.c103
-rw-r--r--tools/perf/bench/numa.c2
-rw-r--r--tools/perf/builtin-annotate.c45
-rw-r--r--tools/perf/builtin-buildid-cache.c81
-rw-r--r--tools/perf/builtin-c2c.c12
-rw-r--r--tools/perf/builtin-inject.c4
-rw-r--r--tools/perf/builtin-kallsyms.c2
-rw-r--r--tools/perf/builtin-kmem.c6
-rw-r--r--tools/perf/builtin-kvm.c2
-rw-r--r--tools/perf/builtin-probe.c3
-rw-r--r--tools/perf/builtin-report.c66
-rw-r--r--tools/perf/builtin-sched.c14
-rw-r--r--tools/perf/builtin-script.c80
-rw-r--r--tools/perf/builtin-stat.c168
-rw-r--r--tools/perf/builtin-timechart.c8
-rw-r--r--tools/perf/builtin-top.c57
-rw-r--r--tools/perf/builtin-trace.c13
-rwxr-xr-xtools/perf/check-headers.sh30
-rw-r--r--tools/perf/examples/bpf/5sec.c49
-rw-r--r--tools/perf/examples/bpf/empty.c3
-rw-r--r--tools/perf/include/bpf/bpf.h13
-rw-r--r--tools/perf/perf.c25
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv1
-rw-r--r--tools/perf/scripts/python/bin/powerpc-hcalls-record2
-rw-r--r--tools/perf/scripts/python/bin/powerpc-hcalls-report2
-rw-r--r--tools/perf/scripts/python/powerpc-hcalls.py200
-rw-r--r--tools/perf/tests/builtin-test.c9
-rw-r--r--tools/perf/tests/code-reading.c6
-rw-r--r--tools/perf/tests/hists_common.c6
-rw-r--r--tools/perf/tests/kmod-path.c16
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c7
-rw-r--r--tools/perf/tests/parse-events.c26
-rw-r--r--tools/perf/tests/python-use.c3
-rwxr-xr-xtools/perf/tests/shell/record+probe_libc_inet_pton.sh14
-rw-r--r--tools/perf/tests/topology.c30
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c20
-rwxr-xr-xtools/perf/trace/beauty/prctl_option.sh2
-rw-r--r--tools/perf/ui/browsers/annotate.c29
-rw-r--r--tools/perf/ui/browsers/hists.c43
-rw-r--r--tools/perf/ui/browsers/hists.h3
-rw-r--r--tools/perf/ui/browsers/map.c2
-rw-r--r--tools/perf/ui/gtk/annotate.c2
-rw-r--r--tools/perf/ui/gtk/hists.c5
-rw-r--r--tools/perf/ui/hist.c2
-rw-r--r--tools/perf/ui/stdio/hist.c7
-rw-r--r--tools/perf/util/Build3
-rw-r--r--tools/perf/util/annotate.c225
-rw-r--r--tools/perf/util/annotate.h62
-rw-r--r--tools/perf/util/auxtrace.c12
-rw-r--r--tools/perf/util/bpf-loader.c6
-rw-r--r--tools/perf/util/bpf-prologue.c2
-rw-r--r--tools/perf/util/build-id.c4
-rw-r--r--tools/perf/util/cgroup.c9
-rw-r--r--tools/perf/util/config.c16
-rw-r--r--tools/perf/util/config.h1
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c12
-rw-r--r--tools/perf/util/cs-etm.c32
-rw-r--r--tools/perf/util/db-export.c7
-rw-r--r--tools/perf/util/dso.c36
-rw-r--r--tools/perf/util/dso.h37
-rw-r--r--tools/perf/util/env.c31
-rw-r--r--tools/perf/util/env.h3
-rw-r--r--tools/perf/util/event.c73
-rw-r--r--tools/perf/util/event.h8
-rw-r--r--tools/perf/util/evlist.c15
-rw-r--r--tools/perf/util/evlist.h3
-rw-r--r--tools/perf/util/evsel.c6
-rw-r--r--tools/perf/util/evsel.h6
-rw-r--r--tools/perf/util/genelf.c2
-rw-r--r--tools/perf/util/header.c24
-rw-r--r--tools/perf/util/hist.c27
-rw-r--r--tools/perf/util/hist.h26
-rw-r--r--tools/perf/util/intel-bts.c3
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.h18
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c23
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h9
-rw-r--r--tools/perf/util/intel-pt.c13
-rw-r--r--tools/perf/util/llvm-utils.c19
-rw-r--r--tools/perf/util/machine.c355
-rw-r--r--tools/perf/util/machine.h72
-rw-r--r--tools/perf/util/map.c147
-rw-r--r--tools/perf/util/map.h75
-rw-r--r--tools/perf/util/parse-events.c205
-rw-r--r--tools/perf/util/parse-events.h7
-rw-r--r--tools/perf/util/parse-events.l18
-rw-r--r--tools/perf/util/parse-events.y31
-rw-r--r--tools/perf/util/probe-event.c32
-rw-r--r--tools/perf/util/probe-file.c3
-rw-r--r--tools/perf/util/quote.c62
-rw-r--r--tools/perf/util/quote.h31
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c252
-rw-r--r--tools/perf/util/session.c15
-rw-r--r--tools/perf/util/sort.c91
-rw-r--r--tools/perf/util/sort.h13
-rw-r--r--tools/perf/util/srcline.c1
-rw-r--r--tools/perf/util/stat.h3
-rw-r--r--tools/perf/util/symbol-elf.c494
-rw-r--r--tools/perf/util/symbol-minimal.c3
-rw-r--r--tools/perf/util/symbol.c265
-rw-r--r--tools/perf/util/symbol.h27
-rw-r--r--tools/perf/util/symbol_fprintf.c4
-rw-r--r--tools/perf/util/thread.c35
-rw-r--r--tools/perf/util/thread.h13
-rw-r--r--tools/perf/util/top.h3
-rw-r--r--tools/perf/util/trace-event-info.c11
-rw-r--r--tools/perf/util/trace-event.c8
-rw-r--r--tools/perf/util/unwind-libdw.c23
-rw-r--r--tools/perf/util/unwind-libunwind-local.c19
-rw-r--r--tools/perf/util/util.c34
-rw-r--r--tools/perf/util/util.h4
-rw-r--r--tools/perf/util/vdso.c6
-rw-r--r--tools/power/acpi/Makefile.config1
-rw-r--r--tools/power/cpupower/bench/parse.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c15
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c35
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h9
-rwxr-xr-xtools/power/pm-graph/bootgraph.py2
-rw-r--r--tools/power/pm-graph/config/custom-timeline-functions.cfg4
-rw-r--r--tools/power/pm-graph/sleepgraph.84
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py399
-rwxr-xr-xtools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py54
-rw-r--r--tools/power/x86/turbostat/Makefile2
-rw-r--r--tools/power/x86/turbostat/turbostat.826
-rw-r--r--tools/power/x86/turbostat/turbostat.c1040
-rw-r--r--tools/power/x86/x86_energy_perf_policy/Makefile2
-rw-r--r--tools/testing/nvdimm/test/nfit.c104
-rw-r--r--tools/testing/radix-tree/Makefile6
-rw-r--r--tools/testing/radix-tree/idr-test.c7
-rw-r--r--tools/testing/radix-tree/multiorder.c63
-rw-r--r--tools/testing/radix-tree/test.c19
-rw-r--r--tools/testing/radix-tree/test.h3
-rw-r--r--tools/testing/selftests/Makefile7
-rw-r--r--tools/testing/selftests/android/Makefile8
-rwxr-xr-xtools/testing/selftests/android/ion/ion_test.sh7
-rw-r--r--tools/testing/selftests/bpf/.gitignore4
-rw-r--r--tools/testing/selftests/bpf/Makefile57
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h39
-rw-r--r--tools/testing/selftests/bpf/bpf_rand.h80
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c57
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/config12
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_kern.c40
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c149
-rw-r--r--tools/testing/selftests/bpf/sendmsg4_prog.c49
-rw-r--r--tools/testing/selftests/bpf/sendmsg6_prog.c60
-rw-r--r--tools/testing/selftests/bpf/test_adjust_tail.c30
-rw-r--r--tools/testing/selftests/bpf/test_btf.c2315
-rw-r--r--tools/testing/selftests/bpf/test_btf_haskv.c48
-rw-r--r--tools/testing/selftests/bpf/test_btf_nokv.c43
-rw-r--r--tools/testing/selftests/bpf/test_get_stack_rawtp.c102
-rwxr-xr-xtools/testing/selftests/bpf/test_lirc_mode2.sh28
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_kern.c23
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_user.c149
-rw-r--r--tools/testing/selftests/bpf/test_lwt_seg6local.c437
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh140
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py12
-rw-r--r--tools/testing/selftests/bpf/test_progs.c568
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c1155
-rw-r--r--tools/testing/selftests/bpf/test_sockhash_kern.c5
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c1524
-rw-r--r--tools/testing/selftests/bpf/test_sockmap_kern.c5
-rw-r--r--tools/testing/selftests/bpf/test_sockmap_kern.h363
-rw-r--r--tools/testing/selftests/bpf/test_stacktrace_build_id.c20
-rw-r--r--tools/testing/selftests/bpf/test_stacktrace_map.c19
-rwxr-xr-xtools/testing/selftests/bpf/test_tunnel.sh731
-rw-r--r--tools/testing/selftests/bpf/test_tunnel_kern.c713
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c998
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c165
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h21
-rw-r--r--tools/testing/selftests/bpf/urandom_read.c10
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c6
-rw-r--r--tools/testing/selftests/cgroup/Makefile10
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c331
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h41
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c1015
-rwxr-xr-xtools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh14
-rwxr-xr-xtools/testing/selftests/cpufreq/main.sh5
-rwxr-xr-xtools/testing/selftests/drivers/usb/usbip/usbip_test.sh198
-rwxr-xr-xtools/testing/selftests/efivarfs/efivarfs.sh7
-rw-r--r--tools/testing/selftests/exec/execveat.c6
-rw-r--r--tools/testing/selftests/filesystems/Makefile1
-rw-r--r--tools/testing/selftests/filesystems/devpts_pts.c15
-rwxr-xr-xtools/testing/selftests/firmware/fw_fallback.sh4
-rwxr-xr-xtools/testing/selftests/firmware/fw_filesystem.sh4
-rwxr-xr-xtools/testing/selftests/firmware/fw_lib.sh7
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions23
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc8
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc49
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc74
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc68
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc66
-rw-r--r--tools/testing/selftests/futex/Makefile12
-rwxr-xr-xtools/testing/selftests/gpio/gpio-mockup.sh12
-rw-r--r--tools/testing/selftests/intel_pstate/aperf.c6
-rwxr-xr-xtools/testing/selftests/intel_pstate/run.sh47
-rw-r--r--tools/testing/selftests/ipc/msgque.c7
-rwxr-xr-xtools/testing/selftests/kmod/kmod.sh13
-rw-r--r--tools/testing/selftests/kselftest.h2
-rw-r--r--tools/testing/selftests/kvm/.gitignore3
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h1
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c9
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c16
-rw-r--r--tools/testing/selftests/kvm/sync_regs_test.c40
-rw-r--r--tools/testing/selftests/kvm/vmx_tsc_adjust_test.c6
-rw-r--r--tools/testing/selftests/lib.mk67
-rw-r--r--tools/testing/selftests/lib/Makefile2
-rwxr-xr-xtools/testing/selftests/lib/bitmap.sh8
-rwxr-xr-xtools/testing/selftests/lib/prime_numbers.sh9
-rwxr-xr-xtools/testing/selftests/lib/printf.sh8
-rw-r--r--tools/testing/selftests/locking/Makefile10
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/locking/ww_mutex.sh8
-rw-r--r--tools/testing/selftests/media_tests/Makefile3
-rw-r--r--tools/testing/selftests/media_tests/media_device_open.c8
-rw-r--r--tools/testing/selftests/media_tests/media_device_test.c10
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test.c7
-rw-r--r--tools/testing/selftests/memfd/Makefile6
-rwxr-xr-xtools/testing/selftests/memfd/run_hugetlbfs_test.sh (renamed from tools/testing/selftests/memfd/run_tests.sh)18
-rw-r--r--tools/testing/selftests/memory-hotplug/Makefile5
-rwxr-xr-xtools/testing/selftests/memory-hotplug/mem-on-off-test.sh14
-rw-r--r--tools/testing/selftests/mount/Makefile12
-rwxr-xr-xtools/testing/selftests/mount/run_tests.sh12
-rw-r--r--tools/testing/selftests/mqueue/Makefile12
-rw-r--r--tools/testing/selftests/mqueue/mq_open_tests.c37
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c8
-rw-r--r--tools/testing/selftests/net/.gitignore5
-rw-r--r--tools/testing/selftests/net/Makefile8
-rw-r--r--tools/testing/selftests/net/config5
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh248
-rw-r--r--[-rwxr-xr-x]tools/testing/selftests/net/fib_tests.sh868
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh26
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh26
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh190
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre.sh159
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bound.sh226
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh121
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_changes.sh278
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_flower.sh137
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_lib.sh130
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_neigh.sh115
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_nh.sh127
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh94
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan.sh92
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh270
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh132
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_topo_lib.sh101
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_vlan.sh131
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh14
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh29
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh25
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_chains.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh80
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_shblocks.sh5
-rwxr-xr-xtools/testing/selftests/net/msg_zerocopy.sh8
-rwxr-xr-xtools/testing/selftests/net/netdevice.sh16
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh9
-rw-r--r--tools/testing/selftests/net/psock_snd.c397
-rwxr-xr-xtools/testing/selftests/net/psock_snd.sh98
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c4
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_numa.c4
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh134
-rw-r--r--tools/testing/selftests/net/tcp_inq.c189
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c447
-rw-r--r--tools/testing/selftests/net/udpgso.c693
-rwxr-xr-xtools/testing/selftests/net/udpgso.sh29
-rwxr-xr-xtools/testing/selftests/net/udpgso_bench.sh74
-rw-r--r--tools/testing/selftests/net/udpgso_bench_rx.c265
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c420
-rw-r--r--tools/testing/selftests/powerpc/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/alignment/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/exec_target.c7
-rw-r--r--tools/testing/selftests/powerpc/context_switch/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/context_switch/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/context_switch/cp_abort.c110
-rw-r--r--tools/testing/selftests/powerpc/include/reg.h1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/Makefile6
-rw-r--r--tools/testing/selftests/powerpc/ptrace/child.h139
-rw-r--r--tools/testing/selftests/powerpc/ptrace/core-pkey.c461
-rw-r--r--tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c195
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c342
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c327
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace.h38
-rw-r--r--tools/testing/selftests/powerpc/tm/.gitignore1
-rw-r--r--tools/testing/selftests/proc/.gitignore3
-rw-r--r--tools/testing/selftests/proc/Makefile5
-rw-r--r--tools/testing/selftests/proc/fd-001-lookup.c168
-rw-r--r--tools/testing/selftests/proc/fd-002-posix-eq.c57
-rw-r--r--tools/testing/selftests/proc/fd-003-kthread.c178
-rw-r--r--tools/testing/selftests/proc/proc-uptime.h16
-rw-r--r--tools/testing/selftests/proc/proc.h39
-rw-r--r--tools/testing/selftests/proc/read.c17
-rwxr-xr-xtools/testing/selftests/pstore/pstore_post_reboot_tests5
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh56
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh12
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh1
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh115
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-torture.sh105
-rw-r--r--tools/testing/selftests/rseq/.gitignore6
-rw-r--r--tools/testing/selftests/rseq/Makefile30
-rw-r--r--tools/testing/selftests/rseq/basic_percpu_ops_test.c312
-rw-r--r--tools/testing/selftests/rseq/basic_test.c56
-rw-r--r--tools/testing/selftests/rseq/param_test.c1284
-rw-r--r--tools/testing/selftests/rseq/rseq-arm.h716
-rw-r--r--tools/testing/selftests/rseq/rseq-mips.h725
-rw-r--r--tools/testing/selftests/rseq/rseq-ppc.h671
-rw-r--r--tools/testing/selftests/rseq/rseq-skip.h65
-rw-r--r--tools/testing/selftests/rseq/rseq-x86.h1132
-rw-r--r--tools/testing/selftests/rseq/rseq.c117
-rw-r--r--tools/testing/selftests/rseq/rseq.h149
-rwxr-xr-xtools/testing/selftests/rseq/run_param_test.sh121
-rw-r--r--tools/testing/selftests/rtc/.gitignore2
-rw-r--r--tools/testing/selftests/rtc/Makefile9
-rw-r--r--tools/testing/selftests/rtc/rtctest.c238
-rw-r--r--tools/testing/selftests/rtc/setdate.c (renamed from tools/testing/selftests/timers/rtctest_setdate.c)0
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c22
-rw-r--r--tools/testing/selftests/sparc64/Makefile50
-rw-r--r--tools/testing/selftests/sparc64/drivers/.gitignore1
-rw-r--r--tools/testing/selftests/sparc64/drivers/Makefile15
-rw-r--r--tools/testing/selftests/sparc64/drivers/adi-test.c721
-rwxr-xr-xtools/testing/selftests/sparc64/drivers/drivers_test.sh30
-rwxr-xr-xtools/testing/selftests/sparc64/run.sh3
-rwxr-xr-xtools/testing/selftests/static_keys/test_static_keys.sh13
-rw-r--r--tools/testing/selftests/sync/config4
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh20
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json11
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/csum.json74
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ife.json1036
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json4
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/sample.json588
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json320
-rw-r--r--tools/testing/selftests/timers/.gitignore3
-rw-r--r--tools/testing/selftests/timers/Makefile4
-rw-r--r--tools/testing/selftests/timers/rtcpie.c134
-rw-r--r--tools/testing/selftests/timers/rtctest.c403
-rw-r--r--tools/testing/selftests/uevent/Makefile17
-rw-r--r--tools/testing/selftests/uevent/config2
-rw-r--r--tools/testing/selftests/uevent/uevent_filtering.c486
-rwxr-xr-xtools/testing/selftests/user/test_user_copy.sh7
-rw-r--r--tools/testing/selftests/vm/compaction_test.c4
-rw-r--r--tools/testing/selftests/vm/mlock2-tests.c12
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests5
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c4
-rw-r--r--tools/testing/selftests/x86/Makefile10
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c285
-rw-r--r--tools/testing/selftests/x86/mpx-mini-test.c7
-rw-r--r--tools/testing/selftests/x86/pkey-helpers.h20
-rw-r--r--tools/testing/selftests/x86/protection_keys.c254
-rw-r--r--tools/testing/selftests/x86/trivial_program.c10
-rwxr-xr-xtools/testing/selftests/zram/zram.sh5
-rwxr-xr-xtools/testing/selftests/zram/zram_lib.sh5
-rw-r--r--tools/usb/usbip/libsrc/vhci_driver.c32
-rw-r--r--tools/usb/usbip/libsrc/vhci_driver.h3
-rw-r--r--tools/usb/usbip/src/usbip_detach.c46
-rw-r--r--tools/virtio/linux/dma-mapping.h2
-rw-r--r--tools/vm/page-types.c1
449 files changed, 39904 insertions, 4204 deletions
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index 2ba95d6fe852..caae4843cb70 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -195,6 +195,12 @@ struct kvm_arch_memory_slot {
195#define KVM_REG_ARM_VFP_FPINST 0x1009 195#define KVM_REG_ARM_VFP_FPINST 0x1009
196#define KVM_REG_ARM_VFP_FPINST2 0x100A 196#define KVM_REG_ARM_VFP_FPINST2 0x100A
197 197
198/* KVM-as-firmware specific pseudo-registers */
199#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
200#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \
201 KVM_REG_ARM_FW | ((r) & 0xffff))
202#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
203
198/* Device Control API: ARM VGIC */ 204/* Device Control API: ARM VGIC */
199#define KVM_DEV_ARM_VGIC_GRP_ADDR 0 205#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
200#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 206#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 9abbf3044654..04b3256f8e6d 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -206,6 +206,12 @@ struct kvm_arch_memory_slot {
206#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) 206#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
207#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) 207#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2)
208 208
209/* KVM-as-firmware specific pseudo-registers */
210#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
211#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
212 KVM_REG_ARM_FW | ((r) & 0xffff))
213#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
214
209/* Device Control API: ARM VGIC */ 215/* Device Control API: ARM VGIC */
210#define KVM_DEV_ARM_VGIC_GRP_ADDR 0 216#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
211#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 217#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index d554c11e01ff..fb00a2fca990 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -198,7 +198,6 @@
198#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ 198#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
199#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ 199#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
200#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ 200#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
201
202#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ 201#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
203#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ 202#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
204#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ 203#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
@@ -207,13 +206,19 @@
207#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ 206#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
208#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ 207#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
209#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ 208#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
210 209#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
210#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
211#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ 211#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
212#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ 212#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
213#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ 213#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
214
215#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ 214#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
216#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ 215#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
216#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
217#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */
218#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
219#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
220#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
221#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
217 222
218/* Virtualization flags: Linux defined, word 8 */ 223/* Virtualization flags: Linux defined, word 8 */
219#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ 224#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -274,9 +279,10 @@
274#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ 279#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
275#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ 280#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
276#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ 281#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
277#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ 282#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
278#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ 283#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
279#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ 284#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
285#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
280 286
281/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ 287/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
282#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ 288#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -320,6 +326,7 @@
320#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ 326#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
321#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ 327#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
322#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ 328#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
329#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
323 330
324/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ 331/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
325#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ 332#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
@@ -333,6 +340,7 @@
333#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ 340#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
334#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ 341#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
335#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ 342#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
343#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
336 344
337/* 345/*
338 * BUG word(s) 346 * BUG word(s)
@@ -362,5 +370,6 @@
362#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ 370#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
363#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ 371#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
364#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ 372#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
373#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
365 374
366#endif /* _ASM_X86_CPUFEATURES_H */ 375#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 1ea545965ee3..53b60ad452f5 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -76,6 +76,8 @@ $(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.le
76 $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ 76 $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^
77 77
78$(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c 78$(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
79$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
80$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
79 81
80clean: bpftool_clean 82clean: bpftool_clean
81 $(call QUIET_CLEAN, bpf-progs) 83 $(call QUIET_CLEAN, bpf-progs)
diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c
index 4f254bcc4423..61b9aa5d6415 100644
--- a/tools/bpf/bpf_dbg.c
+++ b/tools/bpf/bpf_dbg.c
@@ -1063,7 +1063,7 @@ static int cmd_load_pcap(char *file)
1063 1063
1064static int cmd_load(char *arg) 1064static int cmd_load(char *arg)
1065{ 1065{
1066 char *subcmd, *cont, *tmp = strdup(arg); 1066 char *subcmd, *cont = NULL, *tmp = strdup(arg);
1067 int ret = CMD_OK; 1067 int ret = CMD_OK;
1068 1068
1069 subcmd = strtok_r(tmp, " ", &cont); 1069 subcmd = strtok_r(tmp, " ", &cont);
@@ -1073,7 +1073,10 @@ static int cmd_load(char *arg)
1073 bpf_reset(); 1073 bpf_reset();
1074 bpf_reset_breakpoints(); 1074 bpf_reset_breakpoints();
1075 1075
1076 ret = cmd_load_bpf(cont); 1076 if (!cont)
1077 ret = CMD_ERR;
1078 else
1079 ret = cmd_load_bpf(cont);
1077 } else if (matches(subcmd, "pcap") == 0) { 1080 } else if (matches(subcmd, "pcap") == 0) {
1078 ret = cmd_load_pcap(cont); 1081 ret = cmd_load_pcap(cont);
1079 } else { 1082 } else {
diff --git a/tools/bpf/bpf_exp.l b/tools/bpf/bpf_exp.l
index bd83149e7be0..4da8d053d68f 100644
--- a/tools/bpf/bpf_exp.l
+++ b/tools/bpf/bpf_exp.l
@@ -175,7 +175,7 @@ extern void yyerror(const char *str);
175 yylval.number = strtol(yytext, NULL, 10); 175 yylval.number = strtol(yytext, NULL, 10);
176 return number; 176 return number;
177 } 177 }
178([0][0-9]+) { 178([0][0-7]+) {
179 yylval.number = strtol(yytext + 1, NULL, 8); 179 yylval.number = strtol(yytext + 1, NULL, 8);
180 return number; 180 return number;
181 } 181 }
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
new file mode 100644
index 000000000000..d7e678c2d396
--- /dev/null
+++ b/tools/bpf/bpftool/.gitignore
@@ -0,0 +1,3 @@
1*.d
2bpftool
3FEATURE-DUMP.bpftool
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 0e4e923235b6..7b0e6d453e92 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -26,7 +26,9 @@ MAP COMMANDS
26| **bpftool** **cgroup help** 26| **bpftool** **cgroup help**
27| 27|
28| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } 28| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
29| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** } 29| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
30| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
31| **sendmsg4** | **sendmsg6** }
30| *ATTACH_FLAGS* := { **multi** | **override** } 32| *ATTACH_FLAGS* := { **multi** | **override** }
31 33
32DESCRIPTION 34DESCRIPTION
@@ -63,7 +65,17 @@ DESCRIPTION
63 **egress** egress path of the inet socket (since 4.10); 65 **egress** egress path of the inet socket (since 4.10);
64 **sock_create** opening of an inet socket (since 4.10); 66 **sock_create** opening of an inet socket (since 4.10);
65 **sock_ops** various socket operations (since 4.12); 67 **sock_ops** various socket operations (since 4.12);
66 **device** device access (since 4.15). 68 **device** device access (since 4.15);
69 **bind4** call to bind(2) for an inet4 socket (since 4.17);
70 **bind6** call to bind(2) for an inet6 socket (since 4.17);
71 **post_bind4** return from bind(2) for an inet4 socket (since 4.17);
72 **post_bind6** return from bind(2) for an inet6 socket (since 4.17);
73 **connect4** call to connect(2) for an inet4 socket (since 4.17);
74 **connect6** call to connect(2) for an inet6 socket (since 4.17);
75 **sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an
76 unconnected udp4 socket (since 4.18);
77 **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an
78 unconnected udp6 socket (since 4.18).
67 79
68 **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* 80 **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
69 Detach *PROG* from the cgroup *CGROUP* and attach type 81 Detach *PROG* from the cgroup *CGROUP* and attach type
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 457e868bd32f..a6258bc8ec4f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -22,17 +22,19 @@ MAP COMMANDS
22============= 22=============
23 23
24| **bpftool** **map { show | list }** [*MAP*] 24| **bpftool** **map { show | list }** [*MAP*]
25| **bpftool** **map dump** *MAP* 25| **bpftool** **map dump** *MAP*
26| **bpftool** **map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*] 26| **bpftool** **map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*]
27| **bpftool** **map lookup** *MAP* **key** *BYTES* 27| **bpftool** **map lookup** *MAP* **key** *DATA*
28| **bpftool** **map getnext** *MAP* [**key** *BYTES*] 28| **bpftool** **map getnext** *MAP* [**key** *DATA*]
29| **bpftool** **map delete** *MAP* **key** *BYTES* 29| **bpftool** **map delete** *MAP* **key** *DATA*
30| **bpftool** **map pin** *MAP* *FILE* 30| **bpftool** **map pin** *MAP* *FILE*
31| **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
31| **bpftool** **map help** 32| **bpftool** **map help**
32| 33|
33| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } 34| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
35| *DATA* := { [**hex**] *BYTES* }
34| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } 36| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
35| *VALUE* := { *BYTES* | *MAP* | *PROG* } 37| *VALUE* := { *DATA* | *MAP* | *PROG* }
36| *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } 38| *UPDATE_FLAGS* := { **any** | **exist** | **noexist** }
37 39
38DESCRIPTION 40DESCRIPTION
@@ -48,20 +50,26 @@ DESCRIPTION
48 **bpftool map dump** *MAP* 50 **bpftool map dump** *MAP*
49 Dump all entries in a given *MAP*. 51 Dump all entries in a given *MAP*.
50 52
51 **bpftool map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*] 53 **bpftool map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*]
52 Update map entry for a given *KEY*. 54 Update map entry for a given *KEY*.
53 55
54 *UPDATE_FLAGS* can be one of: **any** update existing entry 56 *UPDATE_FLAGS* can be one of: **any** update existing entry
55 or add if doesn't exit; **exist** update only if entry already 57 or add if doesn't exit; **exist** update only if entry already
56 exists; **noexist** update only if entry doesn't exist. 58 exists; **noexist** update only if entry doesn't exist.
57 59
58 **bpftool map lookup** *MAP* **key** *BYTES* 60 If the **hex** keyword is provided in front of the bytes
61 sequence, the bytes are parsed as hexadeximal values, even if
62 no "0x" prefix is added. If the keyword is not provided, then
63 the bytes are parsed as decimal values, unless a "0x" prefix
64 (for hexadecimal) or a "0" prefix (for octal) is provided.
65
66 **bpftool map lookup** *MAP* **key** *DATA*
59 Lookup **key** in the map. 67 Lookup **key** in the map.
60 68
61 **bpftool map getnext** *MAP* [**key** *BYTES*] 69 **bpftool map getnext** *MAP* [**key** *DATA*]
62 Get next key. If *key* is not specified, get first key. 70 Get next key. If *key* is not specified, get first key.
63 71
64 **bpftool map delete** *MAP* **key** *BYTES* 72 **bpftool map delete** *MAP* **key** *DATA*
65 Remove entry from the map. 73 Remove entry from the map.
66 74
67 **bpftool map pin** *MAP* *FILE* 75 **bpftool map pin** *MAP* *FILE*
@@ -69,6 +77,22 @@ DESCRIPTION
69 77
70 Note: *FILE* must be located in *bpffs* mount. 78 Note: *FILE* must be located in *bpffs* mount.
71 79
80 **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
81 Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
82
83 Install perf rings into a perf event array map and dump
84 output of any bpf_perf_event_output() call in the kernel.
85 By default read the number of CPUs on the system and
86 install perf ring for each CPU in the corresponding index
87 in the array.
88
89 If **cpu** and **index** are specified, install perf ring
90 for given **cpu** at **index** in the array (single ring).
91
92 Note that installing a perf ring into an array will silently
93 replace any existing ring. Any other application will stop
94 receiving events if it installed its rings earlier.
95
72 **bpftool map help** 96 **bpftool map help**
73 Print short help message. 97 Print short help message.
74 98
@@ -98,7 +122,12 @@ EXAMPLES
98 10: hash name some_map flags 0x0 122 10: hash name some_map flags 0x0
99 key 4B value 8B max_entries 2048 memlock 167936B 123 key 4B value 8B max_entries 2048 memlock 167936B
100 124
101**# bpftool map update id 10 key 13 00 07 00 value 02 00 00 00 01 02 03 04** 125The following three commands are equivalent:
126
127|
128| **# bpftool map update id 10 key hex 20 c4 b7 00 value hex 0f ff ff ab 01 02 03 4c**
129| **# bpftool map update id 10 key 0x20 0xc4 0xb7 0x00 value 0x0f 0xff 0xff 0xab 0x01 0x02 0x03 0x4c**
130| **# bpftool map update id 10 key 32 196 183 0 value 15 255 255 171 1 2 3 76**
102 131
103**# bpftool map lookup id 10 key 0 1 2 3** 132**# bpftool map lookup id 10 key 0 1 2 3**
104 133
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
new file mode 100644
index 000000000000..e3eb0eab7641
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -0,0 +1,81 @@
1================
2bpftool-perf
3================
4-------------------------------------------------------------------------------
5tool for inspection of perf related bpf prog attachments
6-------------------------------------------------------------------------------
7
8:Manual section: 8
9
10SYNOPSIS
11========
12
13 **bpftool** [*OPTIONS*] **perf** *COMMAND*
14
15 *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
16
17 *COMMANDS* :=
18 { **show** | **list** | **help** }
19
20PERF COMMANDS
21=============
22
23| **bpftool** **perf { show | list }**
24| **bpftool** **perf help**
25
26DESCRIPTION
27===========
28 **bpftool perf { show | list }**
29 List all raw_tracepoint, tracepoint, kprobe attachment in the system.
30
31 Output will start with process id and file descriptor in that process,
32 followed by bpf program id, attachment information, and attachment point.
33 The attachment point for raw_tracepoint/tracepoint is the trace probe name.
34 The attachment point for k[ret]probe is either symbol name and offset,
35 or a kernel virtual address.
36 The attachment point for u[ret]probe is the file name and the file offset.
37
38 **bpftool perf help**
39 Print short help message.
40
41OPTIONS
42=======
43 -h, --help
44 Print short generic help message (similar to **bpftool help**).
45
46 -v, --version
47 Print version number (similar to **bpftool version**).
48
49 -j, --json
50 Generate JSON output. For commands that cannot produce JSON, this
51 option has no effect.
52
53 -p, --pretty
54 Generate human-readable JSON output. Implies **-j**.
55
56EXAMPLES
57========
58
59| **# bpftool perf**
60
61::
62
63 pid 21711 fd 5: prog_id 5 kprobe func __x64_sys_write offset 0
64 pid 21765 fd 5: prog_id 7 kretprobe func __x64_sys_nanosleep offset 0
65 pid 21767 fd 5: prog_id 8 tracepoint sys_enter_nanosleep
66 pid 21800 fd 5: prog_id 9 uprobe filename /home/yhs/a.out offset 1159
67
68|
69| **# bpftool -j perf**
70
71::
72
73 [{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
74 {"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
75 {"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
76 {"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
77
78
79SEE ALSO
80========
81 **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 67ca6c69376c..43d34a5c3ec5 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -95,7 +95,7 @@ EXAMPLES
95**# bpftool prog show** 95**# bpftool prog show**
96:: 96::
97 97
98 10: xdp name some_prog tag 005a3d2123620c8b 98 10: xdp name some_prog tag 005a3d2123620c8b gpl
99 loaded_at Sep 29/20:11 uid 0 99 loaded_at Sep 29/20:11 uid 0
100 xlated 528B jited 370B memlock 4096B map_ids 10 100 xlated 528B jited 370B memlock 4096B map_ids 10
101 101
@@ -108,6 +108,7 @@ EXAMPLES
108 "id": 10, 108 "id": 10,
109 "type": "xdp", 109 "type": "xdp",
110 "tag": "005a3d2123620c8b", 110 "tag": "005a3d2123620c8b",
111 "gpl_compatible": true,
111 "loaded_at": "Sep 29/20:11", 112 "loaded_at": "Sep 29/20:11",
112 "uid": 0, 113 "uid": 0,
113 "bytes_xlated": 528, 114 "bytes_xlated": 528,
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 20689a321ffe..b6f5d560460d 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -16,20 +16,22 @@ SYNOPSIS
16 16
17 **bpftool** **version** 17 **bpftool** **version**
18 18
19 *OBJECT* := { **map** | **program** | **cgroup** } 19 *OBJECT* := { **map** | **program** | **cgroup** | **perf** }
20 20
21 *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } 21 *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
22 | { **-j** | **--json** } [{ **-p** | **--pretty** }] } 22 | { **-j** | **--json** } [{ **-p** | **--pretty** }] }
23 23
24 *MAP-COMMANDS* := 24 *MAP-COMMANDS* :=
25 { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** 25 { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
26 | **pin** | **help** } 26 | **pin** | **event_pipe** | **help** }
27 27
28 *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** 28 *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
29 | **load** | **help** } 29 | **load** | **help** }
30 30
31 *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } 31 *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
32 32
33 *PERF-COMMANDS* := { **show** | **list** | **help** }
34
33DESCRIPTION 35DESCRIPTION
34=========== 36===========
35 *bpftool* allows for inspection and simple modification of BPF objects 37 *bpftool* allows for inspection and simple modification of BPF objects
@@ -56,3 +58,4 @@ OPTIONS
56SEE ALSO 58SEE ALSO
57======== 59========
58 **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) 60 **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
61 **bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 4e69782c4a79..892dbf095bff 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -39,7 +39,12 @@ CC = gcc
39 39
40CFLAGS += -O2 40CFLAGS += -O2
41CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers 41CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
42CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ 42CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
43 -I$(srctree)/kernel/bpf/ \
44 -I$(srctree)/tools/include \
45 -I$(srctree)/tools/include/uapi \
46 -I$(srctree)/tools/lib/bpf \
47 -I$(srctree)/tools/perf
43CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' 48CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
44LIBS = -lelf -lbfd -lopcodes $(LIBBPF) 49LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
45 50
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 490811b45fa7..1e1083321643 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -1,6 +1,6 @@
1# bpftool(8) bash completion -*- shell-script -*- 1# bpftool(8) bash completion -*- shell-script -*-
2# 2#
3# Copyright (C) 2017 Netronome Systems, Inc. 3# Copyright (C) 2017-2018 Netronome Systems, Inc.
4# 4#
5# This software is dual licensed under the GNU General License 5# This software is dual licensed under the GNU General License
6# Version 2, June 1991 as shown in the file COPYING in the top-level 6# Version 2, June 1991 as shown in the file COPYING in the top-level
@@ -79,6 +79,14 @@ _bpftool_get_map_ids()
79 command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) 79 command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
80} 80}
81 81
82_bpftool_get_perf_map_ids()
83{
84 COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \
85 command grep -C2 perf_event_array | \
86 command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
87}
88
89
82_bpftool_get_prog_ids() 90_bpftool_get_prog_ids()
83{ 91{
84 COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ 92 COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
@@ -147,7 +155,7 @@ _bpftool()
147 155
148 # Deal with simplest keywords 156 # Deal with simplest keywords
149 case $prev in 157 case $prev in
150 help|key|opcodes|visual) 158 help|hex|opcodes|visual)
151 return 0 159 return 0
152 ;; 160 ;;
153 tag) 161 tag)
@@ -283,7 +291,7 @@ _bpftool()
283 return 0 291 return 0
284 ;; 292 ;;
285 key) 293 key)
286 return 0 294 COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
287 ;; 295 ;;
288 *) 296 *)
289 _bpftool_once_attr 'key' 297 _bpftool_once_attr 'key'
@@ -302,7 +310,7 @@ _bpftool()
302 return 0 310 return 0
303 ;; 311 ;;
304 key) 312 key)
305 return 0 313 COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
306 ;; 314 ;;
307 value) 315 value)
308 # We can have bytes, or references to a prog or a 316 # We can have bytes, or references to a prog or a
@@ -321,6 +329,8 @@ _bpftool()
321 return 0 329 return 0
322 ;; 330 ;;
323 *) 331 *)
332 COMPREPLY+=( $( compgen -W 'hex' \
333 -- "$cur" ) )
324 return 0 334 return 0
325 ;; 335 ;;
326 esac 336 esac
@@ -357,10 +367,34 @@ _bpftool()
357 fi 367 fi
358 return 0 368 return 0
359 ;; 369 ;;
370 event_pipe)
371 case $prev in
372 $command)
373 COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
374 return 0
375 ;;
376 id)
377 _bpftool_get_perf_map_ids
378 return 0
379 ;;
380 cpu)
381 return 0
382 ;;
383 index)
384 return 0
385 ;;
386 *)
387 _bpftool_once_attr 'cpu'
388 _bpftool_once_attr 'index'
389 return 0
390 ;;
391 esac
392 ;;
360 *) 393 *)
361 [[ $prev == $object ]] && \ 394 [[ $prev == $object ]] && \
362 COMPREPLY=( $( compgen -W 'delete dump getnext help \ 395 COMPREPLY=( $( compgen -W 'delete dump getnext help \
363 lookup pin show list update' -- "$cur" ) ) 396 lookup pin event_pipe show list update' -- \
397 "$cur" ) )
364 ;; 398 ;;
365 esac 399 esac
366 ;; 400 ;;
@@ -372,7 +406,8 @@ _bpftool()
372 ;; 406 ;;
373 attach|detach) 407 attach|detach)
374 local ATTACH_TYPES='ingress egress sock_create sock_ops \ 408 local ATTACH_TYPES='ingress egress sock_create sock_ops \
375 device' 409 device bind4 bind6 post_bind4 post_bind6 connect4 \
410 connect6 sendmsg4 sendmsg6'
376 local ATTACH_FLAGS='multi override' 411 local ATTACH_FLAGS='multi override'
377 local PROG_TYPE='id pinned tag' 412 local PROG_TYPE='id pinned tag'
378 case $prev in 413 case $prev in
@@ -380,7 +415,9 @@ _bpftool()
380 _filedir 415 _filedir
381 return 0 416 return 0
382 ;; 417 ;;
383 ingress|egress|sock_create|sock_ops|device) 418 ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
419 post_bind4|post_bind6|connect4|connect6|sendmsg4|\
420 sendmsg6)
384 COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ 421 COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
385 "$cur" ) ) 422 "$cur" ) )
386 return 0 423 return 0
@@ -412,6 +449,15 @@ _bpftool()
412 ;; 449 ;;
413 esac 450 esac
414 ;; 451 ;;
452 perf)
453 case $command in
454 *)
455 [[ $prev == $object ]] && \
456 COMPREPLY=( $( compgen -W 'help \
457 show list' -- "$cur" ) )
458 ;;
459 esac
460 ;;
415 esac 461 esac
416} && 462} &&
417complete -F _bpftool bpftool 463complete -F _bpftool bpftool
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index cae32a61cb18..16bee011e16c 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -16,8 +16,11 @@
16#define HELP_SPEC_ATTACH_FLAGS \ 16#define HELP_SPEC_ATTACH_FLAGS \
17 "ATTACH_FLAGS := { multi | override }" 17 "ATTACH_FLAGS := { multi | override }"
18 18
19#define HELP_SPEC_ATTACH_TYPES \ 19#define HELP_SPEC_ATTACH_TYPES \
20 "ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }" 20 " ATTACH_TYPE := { ingress | egress | sock_create |\n" \
21 " sock_ops | device | bind4 | bind6 |\n" \
22 " post_bind4 | post_bind6 | connect4 |\n" \
23 " connect6 | sendmsg4 | sendmsg6 }"
21 24
22static const char * const attach_type_strings[] = { 25static const char * const attach_type_strings[] = {
23 [BPF_CGROUP_INET_INGRESS] = "ingress", 26 [BPF_CGROUP_INET_INGRESS] = "ingress",
@@ -25,6 +28,14 @@ static const char * const attach_type_strings[] = {
25 [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", 28 [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
26 [BPF_CGROUP_SOCK_OPS] = "sock_ops", 29 [BPF_CGROUP_SOCK_OPS] = "sock_ops",
27 [BPF_CGROUP_DEVICE] = "device", 30 [BPF_CGROUP_DEVICE] = "device",
31 [BPF_CGROUP_INET4_BIND] = "bind4",
32 [BPF_CGROUP_INET6_BIND] = "bind6",
33 [BPF_CGROUP_INET4_CONNECT] = "connect4",
34 [BPF_CGROUP_INET6_CONNECT] = "connect6",
35 [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
36 [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
37 [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
38 [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
28 [__MAX_BPF_ATTACH_TYPE] = NULL, 39 [__MAX_BPF_ATTACH_TYPE] = NULL,
29}; 40};
30 41
@@ -282,7 +293,7 @@ static int do_help(int argc, char **argv)
282 " %s %s detach CGROUP ATTACH_TYPE PROG\n" 293 " %s %s detach CGROUP ATTACH_TYPE PROG\n"
283 " %s %s help\n" 294 " %s %s help\n"
284 "\n" 295 "\n"
285 " " HELP_SPEC_ATTACH_TYPES "\n" 296 HELP_SPEC_ATTACH_TYPES "\n"
286 " " HELP_SPEC_ATTACH_FLAGS "\n" 297 " " HELP_SPEC_ATTACH_FLAGS "\n"
287 " " HELP_SPEC_PROGRAM "\n" 298 " " HELP_SPEC_PROGRAM "\n"
288 " " HELP_SPEC_OPTIONS "\n" 299 " " HELP_SPEC_OPTIONS "\n"
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 465995281dcd..32f9e397a6c0 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2017 Netronome Systems, Inc. 2 * Copyright (C) 2017-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -33,6 +33,7 @@
33 33
34/* Author: Jakub Kicinski <kubakici@wp.pl> */ 34/* Author: Jakub Kicinski <kubakici@wp.pl> */
35 35
36#include <ctype.h>
36#include <errno.h> 37#include <errno.h>
37#include <fcntl.h> 38#include <fcntl.h>
38#include <fts.h> 39#include <fts.h>
@@ -330,6 +331,16 @@ char *get_fdinfo(int fd, const char *key)
330 return NULL; 331 return NULL;
331} 332}
332 333
334void print_data_json(uint8_t *data, size_t len)
335{
336 unsigned int i;
337
338 jsonw_start_array(json_wtr);
339 for (i = 0; i < len; i++)
340 jsonw_printf(json_wtr, "%d", data[i]);
341 jsonw_end_array(json_wtr);
342}
343
333void print_hex_data_json(uint8_t *data, size_t len) 344void print_hex_data_json(uint8_t *data, size_t len)
334{ 345{
335 unsigned int i; 346 unsigned int i;
@@ -420,6 +431,70 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab)
420 } 431 }
421} 432}
422 433
434unsigned int get_page_size(void)
435{
436 static int result;
437
438 if (!result)
439 result = getpagesize();
440 return result;
441}
442
443unsigned int get_possible_cpus(void)
444{
445 static unsigned int result;
446 char buf[128];
447 long int n;
448 char *ptr;
449 int fd;
450
451 if (result)
452 return result;
453
454 fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
455 if (fd < 0) {
456 p_err("can't open sysfs possible cpus");
457 exit(-1);
458 }
459
460 n = read(fd, buf, sizeof(buf));
461 if (n < 2) {
462 p_err("can't read sysfs possible cpus");
463 exit(-1);
464 }
465 close(fd);
466
467 if (n == sizeof(buf)) {
468 p_err("read sysfs possible cpus overflow");
469 exit(-1);
470 }
471
472 ptr = buf;
473 n = 0;
474 while (*ptr && *ptr != '\n') {
475 unsigned int a, b;
476
477 if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
478 n += b - a + 1;
479
480 ptr = strchr(ptr, '-') + 1;
481 } else if (sscanf(ptr, "%u", &a) == 1) {
482 n++;
483 } else {
484 assert(0);
485 }
486
487 while (isdigit(*ptr))
488 ptr++;
489 if (*ptr == ',')
490 ptr++;
491 }
492
493 result = n;
494
495 return result;
496}
497
423static char * 498static char *
424ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf) 499ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
425{ 500{
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 1ec852d21d44..eea7f14355f3 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -87,7 +87,7 @@ static int do_help(int argc, char **argv)
87 " %s batch file FILE\n" 87 " %s batch file FILE\n"
88 " %s version\n" 88 " %s version\n"
89 "\n" 89 "\n"
90 " OBJECT := { prog | map | cgroup }\n" 90 " OBJECT := { prog | map | cgroup | perf }\n"
91 " " HELP_SPEC_OPTIONS "\n" 91 " " HELP_SPEC_OPTIONS "\n"
92 "", 92 "",
93 bin_name, bin_name, bin_name); 93 bin_name, bin_name, bin_name);
@@ -216,6 +216,7 @@ static const struct cmd cmds[] = {
216 { "prog", do_prog }, 216 { "prog", do_prog },
217 { "map", do_map }, 217 { "map", do_map },
218 { "cgroup", do_cgroup }, 218 { "cgroup", do_cgroup },
219 { "perf", do_perf },
219 { "version", do_version }, 220 { "version", do_version },
220 { 0 } 221 { 0 }
221}; 222};
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index b8e9584d6246..63fdb310b9a4 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2017 Netronome Systems, Inc. 2 * Copyright (C) 2017-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -117,14 +117,20 @@ int do_pin_fd(int fd, const char *name);
117 117
118int do_prog(int argc, char **arg); 118int do_prog(int argc, char **arg);
119int do_map(int argc, char **arg); 119int do_map(int argc, char **arg);
120int do_event_pipe(int argc, char **argv);
120int do_cgroup(int argc, char **arg); 121int do_cgroup(int argc, char **arg);
122int do_perf(int argc, char **arg);
121 123
122int prog_parse_fd(int *argc, char ***argv); 124int prog_parse_fd(int *argc, char ***argv);
125int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
123 126
124void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, 127void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
125 const char *arch); 128 const char *arch);
129void print_data_json(uint8_t *data, size_t len);
126void print_hex_data_json(uint8_t *data, size_t len); 130void print_hex_data_json(uint8_t *data, size_t len);
127 131
132unsigned int get_page_size(void);
133unsigned int get_possible_cpus(void);
128const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); 134const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino);
129 135
130#endif 136#endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index f509c86faede..097b1a5e046b 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2017 Netronome Systems, Inc. 2 * Copyright (C) 2017-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -34,7 +34,6 @@
34/* Author: Jakub Kicinski <kubakici@wp.pl> */ 34/* Author: Jakub Kicinski <kubakici@wp.pl> */
35 35
36#include <assert.h> 36#include <assert.h>
37#include <ctype.h>
38#include <errno.h> 37#include <errno.h>
39#include <fcntl.h> 38#include <fcntl.h>
40#include <stdbool.h> 39#include <stdbool.h>
@@ -67,63 +66,9 @@ static const char * const map_type_name[] = {
67 [BPF_MAP_TYPE_DEVMAP] = "devmap", 66 [BPF_MAP_TYPE_DEVMAP] = "devmap",
68 [BPF_MAP_TYPE_SOCKMAP] = "sockmap", 67 [BPF_MAP_TYPE_SOCKMAP] = "sockmap",
69 [BPF_MAP_TYPE_CPUMAP] = "cpumap", 68 [BPF_MAP_TYPE_CPUMAP] = "cpumap",
69 [BPF_MAP_TYPE_SOCKHASH] = "sockhash",
70}; 70};
71 71
72static unsigned int get_possible_cpus(void)
73{
74 static unsigned int result;
75 char buf[128];
76 long int n;
77 char *ptr;
78 int fd;
79
80 if (result)
81 return result;
82
83 fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
84 if (fd < 0) {
85 p_err("can't open sysfs possible cpus");
86 exit(-1);
87 }
88
89 n = read(fd, buf, sizeof(buf));
90 if (n < 2) {
91 p_err("can't read sysfs possible cpus");
92 exit(-1);
93 }
94 close(fd);
95
96 if (n == sizeof(buf)) {
97 p_err("read sysfs possible cpus overflow");
98 exit(-1);
99 }
100
101 ptr = buf;
102 n = 0;
103 while (*ptr && *ptr != '\n') {
104 unsigned int a, b;
105
106 if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
107 n += b - a + 1;
108
109 ptr = strchr(ptr, '-') + 1;
110 } else if (sscanf(ptr, "%u", &a) == 1) {
111 n++;
112 } else {
113 assert(0);
114 }
115
116 while (isdigit(*ptr))
117 ptr++;
118 if (*ptr == ',')
119 ptr++;
120 }
121
122 result = n;
123
124 return result;
125}
126
127static bool map_is_per_cpu(__u32 type) 72static bool map_is_per_cpu(__u32 type)
128{ 73{
129 return type == BPF_MAP_TYPE_PERCPU_HASH || 74 return type == BPF_MAP_TYPE_PERCPU_HASH ||
@@ -186,8 +131,7 @@ static int map_parse_fd(int *argc, char ***argv)
186 return -1; 131 return -1;
187} 132}
188 133
189static int 134int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
190map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
191{ 135{
192 int err; 136 int err;
193 int fd; 137 int fd;
@@ -283,11 +227,16 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
283static char **parse_bytes(char **argv, const char *name, unsigned char *val, 227static char **parse_bytes(char **argv, const char *name, unsigned char *val,
284 unsigned int n) 228 unsigned int n)
285{ 229{
286 unsigned int i = 0; 230 unsigned int i = 0, base = 0;
287 char *endptr; 231 char *endptr;
288 232
233 if (is_prefix(*argv, "hex")) {
234 base = 16;
235 argv++;
236 }
237
289 while (i < n && argv[i]) { 238 while (i < n && argv[i]) {
290 val[i] = strtoul(argv[i], &endptr, 0); 239 val[i] = strtoul(argv[i], &endptr, base);
291 if (*endptr) { 240 if (*endptr) {
292 p_err("error parsing byte: %s", argv[i]); 241 p_err("error parsing byte: %s", argv[i]);
293 return NULL; 242 return NULL;
@@ -868,23 +817,25 @@ static int do_help(int argc, char **argv)
868 817
869 fprintf(stderr, 818 fprintf(stderr,
870 "Usage: %s %s { show | list } [MAP]\n" 819 "Usage: %s %s { show | list } [MAP]\n"
871 " %s %s dump MAP\n" 820 " %s %s dump MAP\n"
872 " %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n" 821 " %s %s update MAP key DATA value VALUE [UPDATE_FLAGS]\n"
873 " %s %s lookup MAP key BYTES\n" 822 " %s %s lookup MAP key DATA\n"
874 " %s %s getnext MAP [key BYTES]\n" 823 " %s %s getnext MAP [key DATA]\n"
875 " %s %s delete MAP key BYTES\n" 824 " %s %s delete MAP key DATA\n"
876 " %s %s pin MAP FILE\n" 825 " %s %s pin MAP FILE\n"
826 " %s %s event_pipe MAP [cpu N index M]\n"
877 " %s %s help\n" 827 " %s %s help\n"
878 "\n" 828 "\n"
879 " MAP := { id MAP_ID | pinned FILE }\n" 829 " MAP := { id MAP_ID | pinned FILE }\n"
830 " DATA := { [hex] BYTES }\n"
880 " " HELP_SPEC_PROGRAM "\n" 831 " " HELP_SPEC_PROGRAM "\n"
881 " VALUE := { BYTES | MAP | PROG }\n" 832 " VALUE := { DATA | MAP | PROG }\n"
882 " UPDATE_FLAGS := { any | exist | noexist }\n" 833 " UPDATE_FLAGS := { any | exist | noexist }\n"
883 " " HELP_SPEC_OPTIONS "\n" 834 " " HELP_SPEC_OPTIONS "\n"
884 "", 835 "",
885 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], 836 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
886 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], 837 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
887 bin_name, argv[-2], bin_name, argv[-2]); 838 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
888 839
889 return 0; 840 return 0;
890} 841}
@@ -899,6 +850,7 @@ static const struct cmd cmds[] = {
899 { "getnext", do_getnext }, 850 { "getnext", do_getnext },
900 { "delete", do_delete }, 851 { "delete", do_delete },
901 { "pin", do_pin }, 852 { "pin", do_pin },
853 { "event_pipe", do_event_pipe },
902 { 0 } 854 { 0 }
903}; 855};
904 856
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
new file mode 100644
index 000000000000..1832100d1b27
--- /dev/null
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -0,0 +1,306 @@
1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (C) 2018 Netronome Systems, Inc. */
3/* This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include <errno.h>
8#include <fcntl.h>
9#include <libbpf.h>
10#include <poll.h>
11#include <signal.h>
12#include <stdbool.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16#include <time.h>
17#include <unistd.h>
18#include <linux/bpf.h>
19#include <linux/perf_event.h>
20#include <sys/ioctl.h>
21#include <sys/mman.h>
22#include <sys/syscall.h>
23
24#include <bpf.h>
25#include <perf-sys.h>
26
27#include "main.h"
28
29#define MMAP_PAGE_CNT 16
30
31static bool stop;
32
33struct event_ring_info {
34 int fd;
35 int key;
36 unsigned int cpu;
37 void *mem;
38};
39
40struct perf_event_sample {
41 struct perf_event_header header;
42 u64 time;
43 __u32 size;
44 unsigned char data[];
45};
46
47static void int_exit(int signo)
48{
49 fprintf(stderr, "Stopping...\n");
50 stop = true;
51}
52
53static enum bpf_perf_event_ret print_bpf_output(void *event, void *priv)
54{
55 struct event_ring_info *ring = priv;
56 struct perf_event_sample *e = event;
57 struct {
58 struct perf_event_header header;
59 __u64 id;
60 __u64 lost;
61 } *lost = event;
62
63 if (json_output) {
64 jsonw_start_object(json_wtr);
65 jsonw_name(json_wtr, "type");
66 jsonw_uint(json_wtr, e->header.type);
67 jsonw_name(json_wtr, "cpu");
68 jsonw_uint(json_wtr, ring->cpu);
69 jsonw_name(json_wtr, "index");
70 jsonw_uint(json_wtr, ring->key);
71 if (e->header.type == PERF_RECORD_SAMPLE) {
72 jsonw_name(json_wtr, "timestamp");
73 jsonw_uint(json_wtr, e->time);
74 jsonw_name(json_wtr, "data");
75 print_data_json(e->data, e->size);
76 } else if (e->header.type == PERF_RECORD_LOST) {
77 jsonw_name(json_wtr, "lost");
78 jsonw_start_object(json_wtr);
79 jsonw_name(json_wtr, "id");
80 jsonw_uint(json_wtr, lost->id);
81 jsonw_name(json_wtr, "count");
82 jsonw_uint(json_wtr, lost->lost);
83 jsonw_end_object(json_wtr);
84 }
85 jsonw_end_object(json_wtr);
86 } else {
87 if (e->header.type == PERF_RECORD_SAMPLE) {
88 printf("== @%lld.%09lld CPU: %d index: %d =====\n",
89 e->time / 1000000000ULL, e->time % 1000000000ULL,
90 ring->cpu, ring->key);
91 fprint_hex(stdout, e->data, e->size, " ");
92 printf("\n");
93 } else if (e->header.type == PERF_RECORD_LOST) {
94 printf("lost %lld events\n", lost->lost);
95 } else {
96 printf("unknown event type=%d size=%d\n",
97 e->header.type, e->header.size);
98 }
99 }
100
101 return LIBBPF_PERF_EVENT_CONT;
102}
103
104static void
105perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len)
106{
107 enum bpf_perf_event_ret ret;
108
109 ret = bpf_perf_event_read_simple(ring->mem,
110 MMAP_PAGE_CNT * get_page_size(),
111 get_page_size(), buf, buf_len,
112 print_bpf_output, ring);
113 if (ret != LIBBPF_PERF_EVENT_CONT) {
114 fprintf(stderr, "perf read loop failed with %d\n", ret);
115 stop = true;
116 }
117}
118
119static int perf_mmap_size(void)
120{
121 return get_page_size() * (MMAP_PAGE_CNT + 1);
122}
123
124static void *perf_event_mmap(int fd)
125{
126 int mmap_size = perf_mmap_size();
127 void *base;
128
129 base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
130 if (base == MAP_FAILED) {
131 p_err("event mmap failed: %s\n", strerror(errno));
132 return NULL;
133 }
134
135 return base;
136}
137
138static void perf_event_unmap(void *mem)
139{
140 if (munmap(mem, perf_mmap_size()))
141 fprintf(stderr, "Can't unmap ring memory!\n");
142}
143
144static int bpf_perf_event_open(int map_fd, int key, int cpu)
145{
146 struct perf_event_attr attr = {
147 .sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_TIME,
148 .type = PERF_TYPE_SOFTWARE,
149 .config = PERF_COUNT_SW_BPF_OUTPUT,
150 };
151 int pmu_fd;
152
153 pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
154 if (pmu_fd < 0) {
155 p_err("failed to open perf event %d for CPU %d", key, cpu);
156 return -1;
157 }
158
159 if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) {
160 p_err("failed to update map for event %d for CPU %d", key, cpu);
161 goto err_close;
162 }
163 if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
164 p_err("failed to enable event %d for CPU %d", key, cpu);
165 goto err_close;
166 }
167
168 return pmu_fd;
169
170err_close:
171 close(pmu_fd);
172 return -1;
173}
174
175int do_event_pipe(int argc, char **argv)
176{
177 int i, nfds, map_fd, index = -1, cpu = -1;
178 struct bpf_map_info map_info = {};
179 struct event_ring_info *rings;
180 size_t tmp_buf_sz = 0;
181 void *tmp_buf = NULL;
182 struct pollfd *pfds;
183 __u32 map_info_len;
184 bool do_all = true;
185
186 map_info_len = sizeof(map_info);
187 map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len);
188 if (map_fd < 0)
189 return -1;
190
191 if (map_info.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
192 p_err("map is not a perf event array");
193 goto err_close_map;
194 }
195
196 while (argc) {
197 if (argc < 2)
198 BAD_ARG();
199
200 if (is_prefix(*argv, "cpu")) {
201 char *endptr;
202
203 NEXT_ARG();
204 cpu = strtoul(*argv, &endptr, 0);
205 if (*endptr) {
206 p_err("can't parse %s as CPU ID", **argv);
207 goto err_close_map;
208 }
209
210 NEXT_ARG();
211 } else if (is_prefix(*argv, "index")) {
212 char *endptr;
213
214 NEXT_ARG();
215 index = strtoul(*argv, &endptr, 0);
216 if (*endptr) {
217 p_err("can't parse %s as index", **argv);
218 goto err_close_map;
219 }
220
221 NEXT_ARG();
222 } else {
223 BAD_ARG();
224 }
225
226 do_all = false;
227 }
228
229 if (!do_all) {
230 if (index == -1 || cpu == -1) {
231 p_err("cpu and index must be specified together");
232 goto err_close_map;
233 }
234
235 nfds = 1;
236 } else {
237 nfds = min(get_possible_cpus(), map_info.max_entries);
238 cpu = 0;
239 index = 0;
240 }
241
242 rings = calloc(nfds, sizeof(rings[0]));
243 if (!rings)
244 goto err_close_map;
245
246 pfds = calloc(nfds, sizeof(pfds[0]));
247 if (!pfds)
248 goto err_free_rings;
249
250 for (i = 0; i < nfds; i++) {
251 rings[i].cpu = cpu + i;
252 rings[i].key = index + i;
253
254 rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key,
255 rings[i].cpu);
256 if (rings[i].fd < 0)
257 goto err_close_fds_prev;
258
259 rings[i].mem = perf_event_mmap(rings[i].fd);
260 if (!rings[i].mem)
261 goto err_close_fds_current;
262
263 pfds[i].fd = rings[i].fd;
264 pfds[i].events = POLLIN;
265 }
266
267 signal(SIGINT, int_exit);
268 signal(SIGHUP, int_exit);
269 signal(SIGTERM, int_exit);
270
271 if (json_output)
272 jsonw_start_array(json_wtr);
273
274 while (!stop) {
275 poll(pfds, nfds, 200);
276 for (i = 0; i < nfds; i++)
277 perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz);
278 }
279 free(tmp_buf);
280
281 if (json_output)
282 jsonw_end_array(json_wtr);
283
284 for (i = 0; i < nfds; i++) {
285 perf_event_unmap(rings[i].mem);
286 close(rings[i].fd);
287 }
288 free(pfds);
289 free(rings);
290 close(map_fd);
291
292 return 0;
293
294err_close_fds_prev:
295 while (i--) {
296 perf_event_unmap(rings[i].mem);
297err_close_fds_current:
298 close(rings[i].fd);
299 }
300 free(pfds);
301err_free_rings:
302 free(rings);
303err_close_map:
304 close(map_fd);
305 return -1;
306}
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
new file mode 100644
index 000000000000..b76b77dcfd1f
--- /dev/null
+++ b/tools/bpf/bpftool/perf.c
@@ -0,0 +1,247 @@
1// SPDX-License-Identifier: GPL-2.0+
2// Copyright (C) 2018 Facebook
3// Author: Yonghong Song <yhs@fb.com>
4
5#define _GNU_SOURCE
6#include <ctype.h>
7#include <errno.h>
8#include <fcntl.h>
9#include <stdlib.h>
10#include <string.h>
11#include <sys/stat.h>
12#include <sys/types.h>
13#include <unistd.h>
14#include <ftw.h>
15
16#include <bpf.h>
17
18#include "main.h"
19
20/* 0: undecided, 1: supported, 2: not supported */
21static int perf_query_supported;
22static bool has_perf_query_support(void)
23{
24 __u64 probe_offset, probe_addr;
25 __u32 len, prog_id, fd_type;
26 char buf[256];
27 int fd;
28
29 if (perf_query_supported)
30 goto out;
31
32 fd = open("/", O_RDONLY);
33 if (fd < 0) {
34 p_err("perf_query_support: cannot open directory \"/\" (%s)",
35 strerror(errno));
36 goto out;
37 }
38
39 /* the following query will fail as no bpf attachment,
40 * the expected errno is ENOTSUPP
41 */
42 errno = 0;
43 len = sizeof(buf);
44 bpf_task_fd_query(getpid(), fd, 0, buf, &len, &prog_id,
45 &fd_type, &probe_offset, &probe_addr);
46
47 if (errno == 524 /* ENOTSUPP */) {
48 perf_query_supported = 1;
49 goto close_fd;
50 }
51
52 perf_query_supported = 2;
53 p_err("perf_query_support: %s", strerror(errno));
54 fprintf(stderr,
55 "HINT: non root or kernel doesn't support TASK_FD_QUERY\n");
56
57close_fd:
58 close(fd);
59out:
60 return perf_query_supported == 1;
61}
62
63static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type,
64 char *buf, __u64 probe_offset, __u64 probe_addr)
65{
66 jsonw_start_object(json_wtr);
67 jsonw_int_field(json_wtr, "pid", pid);
68 jsonw_int_field(json_wtr, "fd", fd);
69 jsonw_uint_field(json_wtr, "prog_id", prog_id);
70 switch (fd_type) {
71 case BPF_FD_TYPE_RAW_TRACEPOINT:
72 jsonw_string_field(json_wtr, "fd_type", "raw_tracepoint");
73 jsonw_string_field(json_wtr, "tracepoint", buf);
74 break;
75 case BPF_FD_TYPE_TRACEPOINT:
76 jsonw_string_field(json_wtr, "fd_type", "tracepoint");
77 jsonw_string_field(json_wtr, "tracepoint", buf);
78 break;
79 case BPF_FD_TYPE_KPROBE:
80 jsonw_string_field(json_wtr, "fd_type", "kprobe");
81 if (buf[0] != '\0') {
82 jsonw_string_field(json_wtr, "func", buf);
83 jsonw_lluint_field(json_wtr, "offset", probe_offset);
84 } else {
85 jsonw_lluint_field(json_wtr, "addr", probe_addr);
86 }
87 break;
88 case BPF_FD_TYPE_KRETPROBE:
89 jsonw_string_field(json_wtr, "fd_type", "kretprobe");
90 if (buf[0] != '\0') {
91 jsonw_string_field(json_wtr, "func", buf);
92 jsonw_lluint_field(json_wtr, "offset", probe_offset);
93 } else {
94 jsonw_lluint_field(json_wtr, "addr", probe_addr);
95 }
96 break;
97 case BPF_FD_TYPE_UPROBE:
98 jsonw_string_field(json_wtr, "fd_type", "uprobe");
99 jsonw_string_field(json_wtr, "filename", buf);
100 jsonw_lluint_field(json_wtr, "offset", probe_offset);
101 break;
102 case BPF_FD_TYPE_URETPROBE:
103 jsonw_string_field(json_wtr, "fd_type", "uretprobe");
104 jsonw_string_field(json_wtr, "filename", buf);
105 jsonw_lluint_field(json_wtr, "offset", probe_offset);
106 break;
107 }
108 jsonw_end_object(json_wtr);
109}
110
111static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
112 char *buf, __u64 probe_offset, __u64 probe_addr)
113{
114 printf("pid %d fd %d: prog_id %u ", pid, fd, prog_id);
115 switch (fd_type) {
116 case BPF_FD_TYPE_RAW_TRACEPOINT:
117 printf("raw_tracepoint %s\n", buf);
118 break;
119 case BPF_FD_TYPE_TRACEPOINT:
120 printf("tracepoint %s\n", buf);
121 break;
122 case BPF_FD_TYPE_KPROBE:
123 if (buf[0] != '\0')
124 printf("kprobe func %s offset %llu\n", buf,
125 probe_offset);
126 else
127 printf("kprobe addr %llu\n", probe_addr);
128 break;
129 case BPF_FD_TYPE_KRETPROBE:
130 if (buf[0] != '\0')
131 printf("kretprobe func %s offset %llu\n", buf,
132 probe_offset);
133 else
134 printf("kretprobe addr %llu\n", probe_addr);
135 break;
136 case BPF_FD_TYPE_UPROBE:
137 printf("uprobe filename %s offset %llu\n", buf, probe_offset);
138 break;
139 case BPF_FD_TYPE_URETPROBE:
140 printf("uretprobe filename %s offset %llu\n", buf,
141 probe_offset);
142 break;
143 }
144}
145
146static int show_proc(const char *fpath, const struct stat *sb,
147 int tflag, struct FTW *ftwbuf)
148{
149 __u64 probe_offset, probe_addr;
150 __u32 len, prog_id, fd_type;
151 int err, pid = 0, fd = 0;
152 const char *pch;
153 char buf[4096];
154
155 /* prefix always /proc */
156 pch = fpath + 5;
157 if (*pch == '\0')
158 return 0;
159
160 /* pid should be all numbers */
161 pch++;
162 while (isdigit(*pch)) {
163 pid = pid * 10 + *pch - '0';
164 pch++;
165 }
166 if (*pch == '\0')
167 return 0;
168 if (*pch != '/')
169 return FTW_SKIP_SUBTREE;
170
171 /* check /proc/<pid>/fd directory */
172 pch++;
173 if (strncmp(pch, "fd", 2))
174 return FTW_SKIP_SUBTREE;
175 pch += 2;
176 if (*pch == '\0')
177 return 0;
178 if (*pch != '/')
179 return FTW_SKIP_SUBTREE;
180
181 /* check /proc/<pid>/fd/<fd_num> */
182 pch++;
183 while (isdigit(*pch)) {
184 fd = fd * 10 + *pch - '0';
185 pch++;
186 }
187 if (*pch != '\0')
188 return FTW_SKIP_SUBTREE;
189
190 /* query (pid, fd) for potential perf events */
191 len = sizeof(buf);
192 err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type,
193 &probe_offset, &probe_addr);
194 if (err < 0)
195 return 0;
196
197 if (json_output)
198 print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset,
199 probe_addr);
200 else
201 print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset,
202 probe_addr);
203
204 return 0;
205}
206
207static int do_show(int argc, char **argv)
208{
209 int flags = FTW_ACTIONRETVAL | FTW_PHYS;
210 int err = 0, nopenfd = 16;
211
212 if (!has_perf_query_support())
213 return -1;
214
215 if (json_output)
216 jsonw_start_array(json_wtr);
217 if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
218 p_err("%s", strerror(errno));
219 err = -1;
220 }
221 if (json_output)
222 jsonw_end_array(json_wtr);
223
224 return err;
225}
226
227static int do_help(int argc, char **argv)
228{
229 fprintf(stderr,
230 "Usage: %s %s { show | list | help }\n"
231 "",
232 bin_name, argv[-2]);
233
234 return 0;
235}
236
237static const struct cmd cmds[] = {
238 { "show", do_show },
239 { "list", do_show },
240 { "help", do_help },
241 { 0 }
242};
243
244int do_perf(int argc, char **argv)
245{
246 return cmd_select(cmds, argc, argv, do_help);
247}
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f7a810897eac..05f42a46d6ed 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -68,6 +68,10 @@ static const char * const prog_type_name[] = {
68 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", 68 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
69 [BPF_PROG_TYPE_SK_SKB] = "sk_skb", 69 [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
70 [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", 70 [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
71 [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
72 [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
73 [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
74 [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
71}; 75};
72 76
73static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) 77static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
@@ -86,14 +90,19 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
86 } 90 }
87 91
88 wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + 92 wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) +
89 nsecs / 1000000000; 93 (real_time_ts.tv_nsec - boot_time_ts.tv_nsec + nsecs) /
94 1000000000;
95
90 96
91 if (!localtime_r(&wallclock_secs, &load_tm)) { 97 if (!localtime_r(&wallclock_secs, &load_tm)) {
92 snprintf(buf, size, "%llu", nsecs / 1000000000); 98 snprintf(buf, size, "%llu", nsecs / 1000000000);
93 return; 99 return;
94 } 100 }
95 101
96 strftime(buf, size, "%b %d/%H:%M", &load_tm); 102 if (json_output)
103 strftime(buf, size, "%s", &load_tm);
104 else
105 strftime(buf, size, "%FT%T%z", &load_tm);
97} 106}
98 107
99static int prog_fd_by_tag(unsigned char *tag) 108static int prog_fd_by_tag(unsigned char *tag)
@@ -232,6 +241,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
232 info->tag[0], info->tag[1], info->tag[2], info->tag[3], 241 info->tag[0], info->tag[1], info->tag[2], info->tag[3],
233 info->tag[4], info->tag[5], info->tag[6], info->tag[7]); 242 info->tag[4], info->tag[5], info->tag[6], info->tag[7]);
234 243
244 jsonw_bool_field(json_wtr, "gpl_compatible", info->gpl_compatible);
245
235 print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); 246 print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
236 247
237 if (info->load_time) { 248 if (info->load_time) {
@@ -240,7 +251,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
240 print_boot_time(info->load_time, buf, sizeof(buf)); 251 print_boot_time(info->load_time, buf, sizeof(buf));
241 252
242 /* Piggy back on load_time, since 0 uid is a valid one */ 253 /* Piggy back on load_time, since 0 uid is a valid one */
243 jsonw_string_field(json_wtr, "loaded_at", buf); 254 jsonw_name(json_wtr, "loaded_at");
255 jsonw_printf(json_wtr, "%s", buf);
244 jsonw_uint_field(json_wtr, "uid", info->created_by_uid); 256 jsonw_uint_field(json_wtr, "uid", info->created_by_uid);
245 } 257 }
246 258
@@ -292,6 +304,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
292 printf("tag "); 304 printf("tag ");
293 fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); 305 fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
294 print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); 306 print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
307 printf("%s", info->gpl_compatible ? " gpl" : "");
295 printf("\n"); 308 printf("\n");
296 309
297 if (info->load_time) { 310 if (info->load_time) {
@@ -410,7 +423,11 @@ static int do_show(int argc, char **argv)
410 423
411static int do_dump(int argc, char **argv) 424static int do_dump(int argc, char **argv)
412{ 425{
426 unsigned long *func_ksyms = NULL;
413 struct bpf_prog_info info = {}; 427 struct bpf_prog_info info = {};
428 unsigned int *func_lens = NULL;
429 unsigned int nr_func_ksyms;
430 unsigned int nr_func_lens;
414 struct dump_data dd = {}; 431 struct dump_data dd = {};
415 __u32 len = sizeof(info); 432 __u32 len = sizeof(info);
416 unsigned int buf_size; 433 unsigned int buf_size;
@@ -486,10 +503,34 @@ static int do_dump(int argc, char **argv)
486 return -1; 503 return -1;
487 } 504 }
488 505
506 nr_func_ksyms = info.nr_jited_ksyms;
507 if (nr_func_ksyms) {
508 func_ksyms = malloc(nr_func_ksyms * sizeof(__u64));
509 if (!func_ksyms) {
510 p_err("mem alloc failed");
511 close(fd);
512 goto err_free;
513 }
514 }
515
516 nr_func_lens = info.nr_jited_func_lens;
517 if (nr_func_lens) {
518 func_lens = malloc(nr_func_lens * sizeof(__u32));
519 if (!func_lens) {
520 p_err("mem alloc failed");
521 close(fd);
522 goto err_free;
523 }
524 }
525
489 memset(&info, 0, sizeof(info)); 526 memset(&info, 0, sizeof(info));
490 527
491 *member_ptr = ptr_to_u64(buf); 528 *member_ptr = ptr_to_u64(buf);
492 *member_len = buf_size; 529 *member_len = buf_size;
530 info.jited_ksyms = ptr_to_u64(func_ksyms);
531 info.nr_jited_ksyms = nr_func_ksyms;
532 info.jited_func_lens = ptr_to_u64(func_lens);
533 info.nr_jited_func_lens = nr_func_lens;
493 534
494 err = bpf_obj_get_info_by_fd(fd, &info, &len); 535 err = bpf_obj_get_info_by_fd(fd, &info, &len);
495 close(fd); 536 close(fd);
@@ -503,6 +544,16 @@ static int do_dump(int argc, char **argv)
503 goto err_free; 544 goto err_free;
504 } 545 }
505 546
547 if (info.nr_jited_ksyms > nr_func_ksyms) {
548 p_err("too many addresses returned");
549 goto err_free;
550 }
551
552 if (info.nr_jited_func_lens > nr_func_lens) {
553 p_err("too many values returned");
554 goto err_free;
555 }
556
506 if ((member_len == &info.jited_prog_len && 557 if ((member_len == &info.jited_prog_len &&
507 info.jited_prog_insns == 0) || 558 info.jited_prog_insns == 0) ||
508 (member_len == &info.xlated_prog_len && 559 (member_len == &info.xlated_prog_len &&
@@ -540,7 +591,57 @@ static int do_dump(int argc, char **argv)
540 goto err_free; 591 goto err_free;
541 } 592 }
542 593
543 disasm_print_insn(buf, *member_len, opcodes, name); 594 if (info.nr_jited_func_lens && info.jited_func_lens) {
595 struct kernel_sym *sym = NULL;
596 char sym_name[SYM_MAX_NAME];
597 unsigned char *img = buf;
598 __u64 *ksyms = NULL;
599 __u32 *lens;
600 __u32 i;
601
602 if (info.nr_jited_ksyms) {
603 kernel_syms_load(&dd);
604 ksyms = (__u64 *) info.jited_ksyms;
605 }
606
607 if (json_output)
608 jsonw_start_array(json_wtr);
609
610 lens = (__u32 *) info.jited_func_lens;
611 for (i = 0; i < info.nr_jited_func_lens; i++) {
612 if (ksyms) {
613 sym = kernel_syms_search(&dd, ksyms[i]);
614 if (sym)
615 sprintf(sym_name, "%s", sym->name);
616 else
617 sprintf(sym_name, "0x%016llx", ksyms[i]);
618 } else {
619 strcpy(sym_name, "unknown");
620 }
621
622 if (json_output) {
623 jsonw_start_object(json_wtr);
624 jsonw_name(json_wtr, "name");
625 jsonw_string(json_wtr, sym_name);
626 jsonw_name(json_wtr, "insns");
627 } else {
628 printf("%s:\n", sym_name);
629 }
630
631 disasm_print_insn(img, lens[i], opcodes, name);
632 img += lens[i];
633
634 if (json_output)
635 jsonw_end_object(json_wtr);
636 else
637 printf("\n");
638 }
639
640 if (json_output)
641 jsonw_end_array(json_wtr);
642 } else {
643 disasm_print_insn(buf, *member_len, opcodes, name);
644 }
544 } else if (visual) { 645 } else if (visual) {
545 if (json_output) 646 if (json_output)
546 jsonw_null(json_wtr); 647 jsonw_null(json_wtr);
@@ -548,6 +649,9 @@ static int do_dump(int argc, char **argv)
548 dump_xlated_cfg(buf, *member_len); 649 dump_xlated_cfg(buf, *member_len);
549 } else { 650 } else {
550 kernel_syms_load(&dd); 651 kernel_syms_load(&dd);
652 dd.nr_jited_ksyms = info.nr_jited_ksyms;
653 dd.jited_ksyms = (__u64 *) info.jited_ksyms;
654
551 if (json_output) 655 if (json_output)
552 dump_xlated_json(&dd, buf, *member_len, opcodes); 656 dump_xlated_json(&dd, buf, *member_len, opcodes);
553 else 657 else
@@ -556,10 +660,14 @@ static int do_dump(int argc, char **argv)
556 } 660 }
557 661
558 free(buf); 662 free(buf);
663 free(func_ksyms);
664 free(func_lens);
559 return 0; 665 return 0;
560 666
561err_free: 667err_free:
562 free(buf); 668 free(buf);
669 free(func_ksyms);
670 free(func_lens);
563 return -1; 671 return -1;
564} 672}
565 673
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 7a3173b76c16..b97f1da60dd1 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -102,8 +102,8 @@ void kernel_syms_destroy(struct dump_data *dd)
102 free(dd->sym_mapping); 102 free(dd->sym_mapping);
103} 103}
104 104
105static struct kernel_sym *kernel_syms_search(struct dump_data *dd, 105struct kernel_sym *kernel_syms_search(struct dump_data *dd,
106 unsigned long key) 106 unsigned long key)
107{ 107{
108 struct kernel_sym sym = { 108 struct kernel_sym sym = {
109 .address = key, 109 .address = key,
@@ -174,7 +174,11 @@ static const char *print_call_pcrel(struct dump_data *dd,
174 unsigned long address, 174 unsigned long address,
175 const struct bpf_insn *insn) 175 const struct bpf_insn *insn)
176{ 176{
177 if (sym) 177 if (!dd->nr_jited_ksyms)
178 /* Do not show address for interpreted programs */
179 snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
180 "%+d", insn->off);
181 else if (sym)
178 snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), 182 snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
179 "%+d#%s", insn->off, sym->name); 183 "%+d#%s", insn->off, sym->name);
180 else 184 else
@@ -203,6 +207,10 @@ static const char *print_call(void *private_data,
203 unsigned long address = dd->address_call_base + insn->imm; 207 unsigned long address = dd->address_call_base + insn->imm;
204 struct kernel_sym *sym; 208 struct kernel_sym *sym;
205 209
210 if (insn->src_reg == BPF_PSEUDO_CALL &&
211 (__u32) insn->imm < dd->nr_jited_ksyms)
212 address = dd->jited_ksyms[insn->imm];
213
206 sym = kernel_syms_search(dd, address); 214 sym = kernel_syms_search(dd, address);
207 if (insn->src_reg == BPF_PSEUDO_CALL) 215 if (insn->src_reg == BPF_PSEUDO_CALL)
208 return print_call_pcrel(dd, sym, address, insn); 216 return print_call_pcrel(dd, sym, address, insn);
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
index b34affa7ef2d..33d86e2b369b 100644
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -49,11 +49,14 @@ struct dump_data {
49 unsigned long address_call_base; 49 unsigned long address_call_base;
50 struct kernel_sym *sym_mapping; 50 struct kernel_sym *sym_mapping;
51 __u32 sym_count; 51 __u32 sym_count;
52 __u64 *jited_ksyms;
53 __u32 nr_jited_ksyms;
52 char scratch_buff[SYM_MAX_NAME + 8]; 54 char scratch_buff[SYM_MAX_NAME + 8];
53}; 55};
54 56
55void kernel_syms_load(struct dump_data *dd); 57void kernel_syms_load(struct dump_data *dd);
56void kernel_syms_destroy(struct dump_data *dd); 58void kernel_syms_destroy(struct dump_data *dd);
59struct kernel_sym *kernel_syms_search(struct dump_data *dd, unsigned long key);
57void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, 60void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
58 bool opcodes); 61 bool opcodes);
59void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, 62void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c
index f0c6f54a8b2f..3040830d7797 100644
--- a/tools/iio/iio_generic_buffer.c
+++ b/tools/iio/iio_generic_buffer.c
@@ -248,7 +248,7 @@ void print_usage(void)
248 "Capture, convert and output data from IIO device buffer\n" 248 "Capture, convert and output data from IIO device buffer\n"
249 " -a Auto-activate all available channels\n" 249 " -a Auto-activate all available channels\n"
250 " -A Force-activate ALL channels\n" 250 " -A Force-activate ALL channels\n"
251 " -c <n> Do n conversions\n" 251 " -c <n> Do n conversions, or loop forever if n < 0\n"
252 " -e Disable wait for event (new data)\n" 252 " -e Disable wait for event (new data)\n"
253 " -g Use trigger-less mode\n" 253 " -g Use trigger-less mode\n"
254 " -l <n> Set buffer length to n samples\n" 254 " -l <n> Set buffer length to n samples\n"
@@ -330,11 +330,14 @@ static const struct option longopts[] = {
330 330
331int main(int argc, char **argv) 331int main(int argc, char **argv)
332{ 332{
333 unsigned long num_loops = 2; 333 unsigned long long num_loops = 2;
334 unsigned long timedelay = 1000000; 334 unsigned long timedelay = 1000000;
335 unsigned long buf_len = 128; 335 unsigned long buf_len = 128;
336 336
337 int ret, c, i, j, toread; 337 ssize_t i;
338 unsigned long long j;
339 unsigned long toread;
340 int ret, c;
338 int fp = -1; 341 int fp = -1;
339 342
340 int num_channels = 0; 343 int num_channels = 0;
@@ -366,7 +369,7 @@ int main(int argc, char **argv)
366 break; 369 break;
367 case 'c': 370 case 'c':
368 errno = 0; 371 errno = 0;
369 num_loops = strtoul(optarg, &dummy, 10); 372 num_loops = strtoll(optarg, &dummy, 10);
370 if (errno) { 373 if (errno) {
371 ret = -errno; 374 ret = -errno;
372 goto error; 375 goto error;
@@ -634,7 +637,7 @@ int main(int argc, char **argv)
634 goto error; 637 goto error;
635 } 638 }
636 639
637 for (j = 0; j < num_loops; j++) { 640 for (j = 0; j < num_loops || num_loops < 0; j++) {
638 if (!noevents) { 641 if (!noevents) {
639 struct pollfd pfd = { 642 struct pollfd pfd = {
640 .fd = fp, 643 .fd = fp,
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index a3a4427441bf..70fe61295733 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -21,6 +21,9 @@
21/* &a[0] degrades to a pointer: a different type from an array */ 21/* &a[0] degrades to a pointer: a different type from an array */
22#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) 22#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
23 23
24#ifndef __pure
25#define __pure __attribute__((pure))
26#endif
24#define noinline __attribute__((noinline)) 27#define noinline __attribute__((noinline))
25#ifndef __packed 28#ifndef __packed
26#define __packed __attribute__((packed)) 29#define __packed __attribute__((packed))
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index c5e512da8d8a..af55acf73e75 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -263,6 +263,16 @@
263#define BPF_LD_MAP_FD(DST, MAP_FD) \ 263#define BPF_LD_MAP_FD(DST, MAP_FD) \
264 BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) 264 BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
265 265
266/* Relative call */
267
268#define BPF_CALL_REL(TGT) \
269 ((struct bpf_insn) { \
270 .code = BPF_JMP | BPF_CALL, \
271 .dst_reg = 0, \
272 .src_reg = BPF_PSEUDO_CALL, \
273 .off = 0, \
274 .imm = TGT })
275
266/* Program exit */ 276/* Program exit */
267 277
268#define BPF_EXIT_INSN() \ 278#define BPF_EXIT_INSN() \
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index b21b586b9854..1738c0391da4 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -6,8 +6,9 @@
6#include <stdbool.h> 6#include <stdbool.h>
7 7
8#define spinlock_t pthread_mutex_t 8#define spinlock_t pthread_mutex_t
9#define DEFINE_SPINLOCK(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER; 9#define DEFINE_SPINLOCK(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER
10#define __SPIN_LOCK_UNLOCKED(x) (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER 10#define __SPIN_LOCK_UNLOCKED(x) (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER
11#define spin_lock_init(x) pthread_mutex_init(x, NULL)
11 12
12#define spin_lock_irqsave(x, f) (void)f, pthread_mutex_lock(x) 13#define spin_lock_irqsave(x, f) (void)f, pthread_mutex_lock(x)
13#define spin_unlock_irqrestore(x, f) (void)f, pthread_mutex_unlock(x) 14#define spin_unlock_irqrestore(x, f) (void)f, pthread_mutex_unlock(x)
diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..8dd6aefdafa4
--- /dev/null
+++ b/tools/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,18 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#if defined(__i386__) || defined(__x86_64__)
3#include "../../arch/x86/include/uapi/asm/bitsperlong.h"
4#elif defined(__aarch64__)
5#include "../../arch/arm64/include/uapi/asm/bitsperlong.h"
6#elif defined(__powerpc__)
7#include "../../arch/powerpc/include/uapi/asm/bitsperlong.h"
8#elif defined(__s390__)
9#include "../../arch/s390/include/uapi/asm/bitsperlong.h"
10#elif defined(__sparc__)
11#include "../../arch/sparc/include/uapi/asm/bitsperlong.h"
12#elif defined(__mips__)
13#include "../../arch/mips/include/uapi/asm/bitsperlong.h"
14#elif defined(__ia64__)
15#include "../../arch/ia64/include/uapi/asm/bitsperlong.h"
16#else
17#include <asm-generic/bitsperlong.h>
18#endif
diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h
new file mode 100644
index 000000000000..ce3c5945a1c4
--- /dev/null
+++ b/tools/include/uapi/asm/errno.h
@@ -0,0 +1,18 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#if defined(__i386__) || defined(__x86_64__)
3#include "../../arch/x86/include/uapi/asm/errno.h"
4#elif defined(__powerpc__)
5#include "../../arch/powerpc/include/uapi/asm/errno.h"
6#elif defined(__sparc__)
7#include "../../arch/sparc/include/uapi/asm/errno.h"
8#elif defined(__alpha__)
9#include "../../arch/alpha/include/uapi/asm/errno.h"
10#elif defined(__mips__)
11#include "../../arch/mips/include/uapi/asm/errno.h"
12#elif defined(__ia64__)
13#include "../../arch/ia64/include/uapi/asm/errno.h"
14#elif defined(__xtensa__)
15#include "../../arch/xtensa/include/uapi/asm/errno.h"
16#else
17#include <asm-generic/errno.h>
18#endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c5ec89732a8d..e0b06784f227 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -95,6 +95,9 @@ enum bpf_cmd {
95 BPF_OBJ_GET_INFO_BY_FD, 95 BPF_OBJ_GET_INFO_BY_FD,
96 BPF_PROG_QUERY, 96 BPF_PROG_QUERY,
97 BPF_RAW_TRACEPOINT_OPEN, 97 BPF_RAW_TRACEPOINT_OPEN,
98 BPF_BTF_LOAD,
99 BPF_BTF_GET_FD_BY_ID,
100 BPF_TASK_FD_QUERY,
98}; 101};
99 102
100enum bpf_map_type { 103enum bpf_map_type {
@@ -115,6 +118,8 @@ enum bpf_map_type {
115 BPF_MAP_TYPE_DEVMAP, 118 BPF_MAP_TYPE_DEVMAP,
116 BPF_MAP_TYPE_SOCKMAP, 119 BPF_MAP_TYPE_SOCKMAP,
117 BPF_MAP_TYPE_CPUMAP, 120 BPF_MAP_TYPE_CPUMAP,
121 BPF_MAP_TYPE_XSKMAP,
122 BPF_MAP_TYPE_SOCKHASH,
118}; 123};
119 124
120enum bpf_prog_type { 125enum bpf_prog_type {
@@ -137,6 +142,8 @@ enum bpf_prog_type {
137 BPF_PROG_TYPE_SK_MSG, 142 BPF_PROG_TYPE_SK_MSG,
138 BPF_PROG_TYPE_RAW_TRACEPOINT, 143 BPF_PROG_TYPE_RAW_TRACEPOINT,
139 BPF_PROG_TYPE_CGROUP_SOCK_ADDR, 144 BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
145 BPF_PROG_TYPE_LWT_SEG6LOCAL,
146 BPF_PROG_TYPE_LIRC_MODE2,
140}; 147};
141 148
142enum bpf_attach_type { 149enum bpf_attach_type {
@@ -154,6 +161,9 @@ enum bpf_attach_type {
154 BPF_CGROUP_INET6_CONNECT, 161 BPF_CGROUP_INET6_CONNECT,
155 BPF_CGROUP_INET4_POST_BIND, 162 BPF_CGROUP_INET4_POST_BIND,
156 BPF_CGROUP_INET6_POST_BIND, 163 BPF_CGROUP_INET6_POST_BIND,
164 BPF_CGROUP_UDP4_SENDMSG,
165 BPF_CGROUP_UDP6_SENDMSG,
166 BPF_LIRC_MODE2,
157 __MAX_BPF_ATTACH_TYPE 167 __MAX_BPF_ATTACH_TYPE
158}; 168};
159 169
@@ -279,6 +289,9 @@ union bpf_attr {
279 */ 289 */
280 char map_name[BPF_OBJ_NAME_LEN]; 290 char map_name[BPF_OBJ_NAME_LEN];
281 __u32 map_ifindex; /* ifindex of netdev to create on */ 291 __u32 map_ifindex; /* ifindex of netdev to create on */
292 __u32 btf_fd; /* fd pointing to a BTF type data */
293 __u32 btf_key_type_id; /* BTF type_id of the key */
294 __u32 btf_value_type_id; /* BTF type_id of the value */
282 }; 295 };
283 296
284 struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ 297 struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -339,6 +352,7 @@ union bpf_attr {
339 __u32 start_id; 352 __u32 start_id;
340 __u32 prog_id; 353 __u32 prog_id;
341 __u32 map_id; 354 __u32 map_id;
355 __u32 btf_id;
342 }; 356 };
343 __u32 next_id; 357 __u32 next_id;
344 __u32 open_flags; 358 __u32 open_flags;
@@ -363,398 +377,1704 @@ union bpf_attr {
363 __u64 name; 377 __u64 name;
364 __u32 prog_fd; 378 __u32 prog_fd;
365 } raw_tracepoint; 379 } raw_tracepoint;
380
381 struct { /* anonymous struct for BPF_BTF_LOAD */
382 __aligned_u64 btf;
383 __aligned_u64 btf_log_buf;
384 __u32 btf_size;
385 __u32 btf_log_size;
386 __u32 btf_log_level;
387 };
388
389 struct {
390 __u32 pid; /* input: pid */
391 __u32 fd; /* input: fd */
392 __u32 flags; /* input: flags */
393 __u32 buf_len; /* input/output: buf len */
394 __aligned_u64 buf; /* input/output:
395 * tp_name for tracepoint
396 * symbol for kprobe
397 * filename for uprobe
398 */
399 __u32 prog_id; /* output: prod_id */
400 __u32 fd_type; /* output: BPF_FD_TYPE_* */
401 __u64 probe_offset; /* output: probe_offset */
402 __u64 probe_addr; /* output: probe_addr */
403 } task_fd_query;
366} __attribute__((aligned(8))); 404} __attribute__((aligned(8)));
367 405
368/* BPF helper function descriptions: 406/* The description below is an attempt at providing documentation to eBPF
369 * 407 * developers about the multiple available eBPF helper functions. It can be
370 * void *bpf_map_lookup_elem(&map, &key) 408 * parsed and used to produce a manual page. The workflow is the following,
371 * Return: Map value or NULL 409 * and requires the rst2man utility:
372 * 410 *
373 * int bpf_map_update_elem(&map, &key, &value, flags) 411 * $ ./scripts/bpf_helpers_doc.py \
374 * Return: 0 on success or negative error 412 * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
375 * 413 * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
376 * int bpf_map_delete_elem(&map, &key) 414 * $ man /tmp/bpf-helpers.7
377 * Return: 0 on success or negative error 415 *
378 * 416 * Note that in order to produce this external documentation, some RST
379 * int bpf_probe_read(void *dst, int size, void *src) 417 * formatting is used in the descriptions to get "bold" and "italics" in
380 * Return: 0 on success or negative error 418 * manual pages. Also note that the few trailing white spaces are
419 * intentional, removing them would break paragraphs for rst2man.
420 *
421 * Start of BPF helper function descriptions:
422 *
423 * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
424 * Description
425 * Perform a lookup in *map* for an entry associated to *key*.
426 * Return
427 * Map value associated to *key*, or **NULL** if no entry was
428 * found.
429 *
430 * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
431 * Description
432 * Add or update the value of the entry associated to *key* in
433 * *map* with *value*. *flags* is one of:
434 *
435 * **BPF_NOEXIST**
436 * The entry for *key* must not exist in the map.
437 * **BPF_EXIST**
438 * The entry for *key* must already exist in the map.
439 * **BPF_ANY**
440 * No condition on the existence of the entry for *key*.
441 *
442 * Flag value **BPF_NOEXIST** cannot be used for maps of types
443 * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all
444 * elements always exist), the helper would return an error.
445 * Return
446 * 0 on success, or a negative error in case of failure.
447 *
448 * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
449 * Description
450 * Delete entry with *key* from *map*.
451 * Return
452 * 0 on success, or a negative error in case of failure.
453 *
454 * int bpf_probe_read(void *dst, u32 size, const void *src)
455 * Description
456 * For tracing programs, safely attempt to read *size* bytes from
457 * address *src* and store the data in *dst*.
458 * Return
459 * 0 on success, or a negative error in case of failure.
381 * 460 *
382 * u64 bpf_ktime_get_ns(void) 461 * u64 bpf_ktime_get_ns(void)
383 * Return: current ktime 462 * Description
384 * 463 * Return the time elapsed since system boot, in nanoseconds.
385 * int bpf_trace_printk(const char *fmt, int fmt_size, ...) 464 * Return
386 * Return: length of buffer written or negative error 465 * Current *ktime*.
387 * 466 *
388 * u32 bpf_prandom_u32(void) 467 * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
389 * Return: random value 468 * Description
390 * 469 * This helper is a "printk()-like" facility for debugging. It
391 * u32 bpf_raw_smp_processor_id(void) 470 * prints a message defined by format *fmt* (of size *fmt_size*)
392 * Return: SMP processor ID 471 * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
393 * 472 * available. It can take up to three additional **u64**
394 * int bpf_skb_store_bytes(skb, offset, from, len, flags) 473 * arguments (as an eBPF helpers, the total number of arguments is
395 * store bytes into packet 474 * limited to five).
396 * @skb: pointer to skb 475 *
397 * @offset: offset within packet from skb->mac_header 476 * Each time the helper is called, it appends a line to the trace.
398 * @from: pointer where to copy bytes from 477 * The format of the trace is customizable, and the exact output
399 * @len: number of bytes to store into packet 478 * one will get depends on the options set in
400 * @flags: bit 0 - if true, recompute skb->csum 479 * *\/sys/kernel/debug/tracing/trace_options* (see also the
401 * other bits - reserved 480 * *README* file under the same directory). However, it usually
402 * Return: 0 on success or negative error 481 * defaults to something like:
403 * 482 *
404 * int bpf_l3_csum_replace(skb, offset, from, to, flags) 483 * ::
405 * recompute IP checksum 484 *
406 * @skb: pointer to skb 485 * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg>
407 * @offset: offset within packet where IP checksum is located 486 *
408 * @from: old value of header field 487 * In the above:
409 * @to: new value of header field 488 *
410 * @flags: bits 0-3 - size of header field 489 * * ``telnet`` is the name of the current task.
411 * other bits - reserved 490 * * ``470`` is the PID of the current task.
412 * Return: 0 on success or negative error 491 * * ``001`` is the CPU number on which the task is
413 * 492 * running.
414 * int bpf_l4_csum_replace(skb, offset, from, to, flags) 493 * * In ``.N..``, each character refers to a set of
415 * recompute TCP/UDP checksum 494 * options (whether irqs are enabled, scheduling
416 * @skb: pointer to skb 495 * options, whether hard/softirqs are running, level of
417 * @offset: offset within packet where TCP/UDP checksum is located 496 * preempt_disabled respectively). **N** means that
418 * @from: old value of header field 497 * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
419 * @to: new value of header field 498 * are set.
420 * @flags: bits 0-3 - size of header field 499 * * ``419421.045894`` is a timestamp.
421 * bit 4 - is pseudo header 500 * * ``0x00000001`` is a fake value used by BPF for the
422 * other bits - reserved 501 * instruction pointer register.
423 * Return: 0 on success or negative error 502 * * ``<formatted msg>`` is the message formatted with
424 * 503 * *fmt*.
425 * int bpf_tail_call(ctx, prog_array_map, index) 504 *
426 * jump into another BPF program 505 * The conversion specifiers supported by *fmt* are similar, but
427 * @ctx: context pointer passed to next program 506 * more limited than for printk(). They are **%d**, **%i**,
428 * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY 507 * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
429 * @index: 32-bit index inside array that selects specific program to run 508 * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
430 * Return: 0 on success or negative error 509 * of field, padding with zeroes, etc.) is available, and the
431 * 510 * helper will return **-EINVAL** (but print nothing) if it
432 * int bpf_clone_redirect(skb, ifindex, flags) 511 * encounters an unknown specifier.
433 * redirect to another netdev 512 *
434 * @skb: pointer to skb 513 * Also, note that **bpf_trace_printk**\ () is slow, and should
435 * @ifindex: ifindex of the net device 514 * only be used for debugging purposes. For this reason, a notice
436 * @flags: bit 0 - if set, redirect to ingress instead of egress 515 * bloc (spanning several lines) is printed to kernel logs and
437 * other bits - reserved 516 * states that the helper should not be used "for production use"
438 * Return: 0 on success or negative error 517 * the first time this helper is used (or more precisely, when
518 * **trace_printk**\ () buffers are allocated). For passing values
519 * to user space, perf events should be preferred.
520 * Return
521 * The number of bytes written to the buffer, or a negative error
522 * in case of failure.
523 *
524 * u32 bpf_get_prandom_u32(void)
525 * Description
526 * Get a pseudo-random number.
527 *
528 * From a security point of view, this helper uses its own
529 * pseudo-random internal state, and cannot be used to infer the
530 * seed of other random functions in the kernel. However, it is
531 * essential to note that the generator used by the helper is not
532 * cryptographically secure.
533 * Return
534 * A random 32-bit unsigned value.
535 *
536 * u32 bpf_get_smp_processor_id(void)
537 * Description
538 * Get the SMP (symmetric multiprocessing) processor id. Note that
539 * all programs run with preemption disabled, which means that the
540 * SMP processor id is stable during all the execution of the
541 * program.
542 * Return
543 * The SMP id of the processor running the program.
544 *
545 * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
546 * Description
547 * Store *len* bytes from address *from* into the packet
548 * associated to *skb*, at *offset*. *flags* are a combination of
549 * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
550 * checksum for the packet after storing the bytes) and
551 * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
552 * **->swhash** and *skb*\ **->l4hash** to 0).
553 *
554 * A call to this helper is susceptible to change the underlaying
555 * packet buffer. Therefore, at load time, all checks on pointers
556 * previously done by the verifier are invalidated and must be
557 * performed again, if the helper is used in combination with
558 * direct packet access.
559 * Return
560 * 0 on success, or a negative error in case of failure.
561 *
562 * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
563 * Description
564 * Recompute the layer 3 (e.g. IP) checksum for the packet
565 * associated to *skb*. Computation is incremental, so the helper
566 * must know the former value of the header field that was
567 * modified (*from*), the new value of this field (*to*), and the
568 * number of bytes (2 or 4) for this field, stored in *size*.
569 * Alternatively, it is possible to store the difference between
570 * the previous and the new values of the header field in *to*, by
571 * setting *from* and *size* to 0. For both methods, *offset*
572 * indicates the location of the IP checksum within the packet.
573 *
574 * This helper works in combination with **bpf_csum_diff**\ (),
575 * which does not update the checksum in-place, but offers more
576 * flexibility and can handle sizes larger than 2 or 4 for the
577 * checksum to update.
578 *
579 * A call to this helper is susceptible to change the underlaying
580 * packet buffer. Therefore, at load time, all checks on pointers
581 * previously done by the verifier are invalidated and must be
582 * performed again, if the helper is used in combination with
583 * direct packet access.
584 * Return
585 * 0 on success, or a negative error in case of failure.
586 *
587 * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
588 * Description
589 * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
590 * packet associated to *skb*. Computation is incremental, so the
591 * helper must know the former value of the header field that was
592 * modified (*from*), the new value of this field (*to*), and the
593 * number of bytes (2 or 4) for this field, stored on the lowest
594 * four bits of *flags*. Alternatively, it is possible to store
595 * the difference between the previous and the new values of the
596 * header field in *to*, by setting *from* and the four lowest
597 * bits of *flags* to 0. For both methods, *offset* indicates the
598 * location of the IP checksum within the packet. In addition to
599 * the size of the field, *flags* can be added (bitwise OR) actual
600 * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
601 * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
602 * for updates resulting in a null checksum the value is set to
603 * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
604 * the checksum is to be computed against a pseudo-header.
605 *
606 * This helper works in combination with **bpf_csum_diff**\ (),
607 * which does not update the checksum in-place, but offers more
608 * flexibility and can handle sizes larger than 2 or 4 for the
609 * checksum to update.
610 *
611 * A call to this helper is susceptible to change the underlaying
612 * packet buffer. Therefore, at load time, all checks on pointers
613 * previously done by the verifier are invalidated and must be
614 * performed again, if the helper is used in combination with
615 * direct packet access.
616 * Return
617 * 0 on success, or a negative error in case of failure.
618 *
619 * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
620 * Description
621 * This special helper is used to trigger a "tail call", or in
622 * other words, to jump into another eBPF program. The same stack
623 * frame is used (but values on stack and in registers for the
624 * caller are not accessible to the callee). This mechanism allows
625 * for program chaining, either for raising the maximum number of
626 * available eBPF instructions, or to execute given programs in
627 * conditional blocks. For security reasons, there is an upper
628 * limit to the number of successive tail calls that can be
629 * performed.
630 *
631 * Upon call of this helper, the program attempts to jump into a
632 * program referenced at index *index* in *prog_array_map*, a
633 * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
634 * *ctx*, a pointer to the context.
635 *
636 * If the call succeeds, the kernel immediately runs the first
637 * instruction of the new program. This is not a function call,
638 * and it never returns to the previous program. If the call
639 * fails, then the helper has no effect, and the caller continues
640 * to run its subsequent instructions. A call can fail if the
641 * destination program for the jump does not exist (i.e. *index*
642 * is superior to the number of entries in *prog_array_map*), or
643 * if the maximum number of tail calls has been reached for this
644 * chain of programs. This limit is defined in the kernel by the
645 * macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
646 * which is currently set to 32.
647 * Return
648 * 0 on success, or a negative error in case of failure.
649 *
650 * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
651 * Description
652 * Clone and redirect the packet associated to *skb* to another
653 * net device of index *ifindex*. Both ingress and egress
654 * interfaces can be used for redirection. The **BPF_F_INGRESS**
655 * value in *flags* is used to make the distinction (ingress path
656 * is selected if the flag is present, egress path otherwise).
657 * This is the only flag supported for now.
658 *
659 * In comparison with **bpf_redirect**\ () helper,
660 * **bpf_clone_redirect**\ () has the associated cost of
661 * duplicating the packet buffer, but this can be executed out of
662 * the eBPF program. Conversely, **bpf_redirect**\ () is more
663 * efficient, but it is handled through an action code where the
664 * redirection happens only after the eBPF program has returned.
665 *
666 * A call to this helper is susceptible to change the underlaying
667 * packet buffer. Therefore, at load time, all checks on pointers
668 * previously done by the verifier are invalidated and must be
669 * performed again, if the helper is used in combination with
670 * direct packet access.
671 * Return
672 * 0 on success, or a negative error in case of failure.
439 * 673 *
440 * u64 bpf_get_current_pid_tgid(void) 674 * u64 bpf_get_current_pid_tgid(void)
441 * Return: current->tgid << 32 | current->pid 675 * Return
676 * A 64-bit integer containing the current tgid and pid, and
677 * created as such:
678 * *current_task*\ **->tgid << 32 \|**
679 * *current_task*\ **->pid**.
442 * 680 *
443 * u64 bpf_get_current_uid_gid(void) 681 * u64 bpf_get_current_uid_gid(void)
444 * Return: current_gid << 32 | current_uid 682 * Return
445 * 683 * A 64-bit integer containing the current GID and UID, and
446 * int bpf_get_current_comm(char *buf, int size_of_buf) 684 * created as such: *current_gid* **<< 32 \|** *current_uid*.
447 * stores current->comm into buf 685 *
448 * Return: 0 on success or negative error 686 * int bpf_get_current_comm(char *buf, u32 size_of_buf)
449 * 687 * Description
450 * u32 bpf_get_cgroup_classid(skb) 688 * Copy the **comm** attribute of the current task into *buf* of
451 * retrieve a proc's classid 689 * *size_of_buf*. The **comm** attribute contains the name of
452 * @skb: pointer to skb 690 * the executable (excluding the path) for the current task. The
453 * Return: classid if != 0 691 * *size_of_buf* must be strictly positive. On success, the
454 * 692 * helper makes sure that the *buf* is NUL-terminated. On failure,
455 * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) 693 * it is filled with zeroes.
456 * Return: 0 on success or negative error 694 * Return
457 * 695 * 0 on success, or a negative error in case of failure.
458 * int bpf_skb_vlan_pop(skb) 696 *
459 * Return: 0 on success or negative error 697 * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
460 * 698 * Description
461 * int bpf_skb_get_tunnel_key(skb, key, size, flags) 699 * Retrieve the classid for the current task, i.e. for the net_cls
462 * int bpf_skb_set_tunnel_key(skb, key, size, flags) 700 * cgroup to which *skb* belongs.
463 * retrieve or populate tunnel metadata 701 *
464 * @skb: pointer to skb 702 * This helper can be used on TC egress path, but not on ingress.
465 * @key: pointer to 'struct bpf_tunnel_key' 703 *
466 * @size: size of 'struct bpf_tunnel_key' 704 * The net_cls cgroup provides an interface to tag network packets
467 * @flags: room for future extensions 705 * based on a user-provided identifier for all traffic coming from
468 * Return: 0 on success or negative error 706 * the tasks belonging to the related cgroup. See also the related
469 * 707 * kernel documentation, available from the Linux sources in file
470 * u64 bpf_perf_event_read(map, flags) 708 * *Documentation/cgroup-v1/net_cls.txt*.
471 * read perf event counter value 709 *
472 * @map: pointer to perf_event_array map 710 * The Linux kernel has two versions for cgroups: there are
473 * @flags: index of event in the map or bitmask flags 711 * cgroups v1 and cgroups v2. Both are available to users, who can
474 * Return: value of perf event counter read or error code 712 * use a mixture of them, but note that the net_cls cgroup is for
475 * 713 * cgroup v1 only. This makes it incompatible with BPF programs
476 * int bpf_redirect(ifindex, flags) 714 * run on cgroups, which is a cgroup-v2-only feature (a socket can
477 * redirect to another netdev 715 * only hold data for one version of cgroups at a time).
478 * @ifindex: ifindex of the net device 716 *
479 * @flags: 717 * This helper is only available is the kernel was compiled with
480 * cls_bpf: 718 * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
481 * bit 0 - if set, redirect to ingress instead of egress 719 * "**y**" or to "**m**".
482 * other bits - reserved 720 * Return
483 * xdp_bpf: 721 * The classid, or 0 for the default unconfigured classid.
484 * all bits - reserved 722 *
485 * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error 723 * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
486 * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error 724 * Description
487 * int bpf_redirect_map(map, key, flags) 725 * Push a *vlan_tci* (VLAN tag control information) of protocol
488 * redirect to endpoint in map 726 * *vlan_proto* to the packet associated to *skb*, then update
489 * @map: pointer to dev map 727 * the checksum. Note that if *vlan_proto* is different from
490 * @key: index in map to lookup 728 * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
491 * @flags: -- 729 * be **ETH_P_8021Q**.
492 * Return: XDP_REDIRECT on success or XDP_ABORT on error 730 *
493 * 731 * A call to this helper is susceptible to change the underlaying
494 * u32 bpf_get_route_realm(skb) 732 * packet buffer. Therefore, at load time, all checks on pointers
495 * retrieve a dst's tclassid 733 * previously done by the verifier are invalidated and must be
496 * @skb: pointer to skb 734 * performed again, if the helper is used in combination with
497 * Return: realm if != 0 735 * direct packet access.
498 * 736 * Return
499 * int bpf_perf_event_output(ctx, map, flags, data, size) 737 * 0 on success, or a negative error in case of failure.
500 * output perf raw sample 738 *
501 * @ctx: struct pt_regs* 739 * int bpf_skb_vlan_pop(struct sk_buff *skb)
502 * @map: pointer to perf_event_array map 740 * Description
503 * @flags: index of event in the map or bitmask flags 741 * Pop a VLAN header from the packet associated to *skb*.
504 * @data: data on stack to be output as raw data 742 *
505 * @size: size of data 743 * A call to this helper is susceptible to change the underlaying
506 * Return: 0 on success or negative error 744 * packet buffer. Therefore, at load time, all checks on pointers
507 * 745 * previously done by the verifier are invalidated and must be
508 * int bpf_get_stackid(ctx, map, flags) 746 * performed again, if the helper is used in combination with
509 * walk user or kernel stack and return id 747 * direct packet access.
510 * @ctx: struct pt_regs* 748 * Return
511 * @map: pointer to stack_trace map 749 * 0 on success, or a negative error in case of failure.
512 * @flags: bits 0-7 - numer of stack frames to skip 750 *
513 * bit 8 - collect user stack instead of kernel 751 * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
514 * bit 9 - compare stacks by hash only 752 * Description
515 * bit 10 - if two different stacks hash into the same stackid 753 * Get tunnel metadata. This helper takes a pointer *key* to an
516 * discard old 754 * empty **struct bpf_tunnel_key** of **size**, that will be
517 * other bits - reserved 755 * filled with tunnel metadata for the packet associated to *skb*.
518 * Return: >= 0 stackid on success or negative error 756 * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
519 * 757 * indicates that the tunnel is based on IPv6 protocol instead of
520 * s64 bpf_csum_diff(from, from_size, to, to_size, seed) 758 * IPv4.
521 * calculate csum diff 759 *
522 * @from: raw from buffer 760 * The **struct bpf_tunnel_key** is an object that generalizes the
523 * @from_size: length of from buffer 761 * principal parameters used by various tunneling protocols into a
524 * @to: raw to buffer 762 * single struct. This way, it can be used to easily make a
525 * @to_size: length of to buffer 763 * decision based on the contents of the encapsulation header,
526 * @seed: optional seed 764 * "summarized" in this struct. In particular, it holds the IP
527 * Return: csum result or negative error code 765 * address of the remote end (IPv4 or IPv6, depending on the case)
528 * 766 * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
529 * int bpf_skb_get_tunnel_opt(skb, opt, size) 767 * this struct exposes the *key*\ **->tunnel_id**, which is
530 * retrieve tunnel options metadata 768 * generally mapped to a VNI (Virtual Network Identifier), making
531 * @skb: pointer to skb 769 * it programmable together with the **bpf_skb_set_tunnel_key**\
532 * @opt: pointer to raw tunnel option data 770 * () helper.
533 * @size: size of @opt 771 *
534 * Return: option size 772 * Let's imagine that the following code is part of a program
535 * 773 * attached to the TC ingress interface, on one end of a GRE
536 * int bpf_skb_set_tunnel_opt(skb, opt, size) 774 * tunnel, and is supposed to filter out all messages coming from
537 * populate tunnel options metadata 775 * remote ends with IPv4 address other than 10.0.0.1:
538 * @skb: pointer to skb 776 *
539 * @opt: pointer to raw tunnel option data 777 * ::
540 * @size: size of @opt 778 *
541 * Return: 0 on success or negative error 779 * int ret;
542 * 780 * struct bpf_tunnel_key key = {};
543 * int bpf_skb_change_proto(skb, proto, flags) 781 *
544 * Change protocol of the skb. Currently supported is v4 -> v6, 782 * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
545 * v6 -> v4 transitions. The helper will also resize the skb. eBPF 783 * if (ret < 0)
546 * program is expected to fill the new headers via skb_store_bytes 784 * return TC_ACT_SHOT; // drop packet
547 * and lX_csum_replace. 785 *
548 * @skb: pointer to skb 786 * if (key.remote_ipv4 != 0x0a000001)
549 * @proto: new skb->protocol type 787 * return TC_ACT_SHOT; // drop packet
550 * @flags: reserved 788 *
551 * Return: 0 on success or negative error 789 * return TC_ACT_OK; // accept packet
552 * 790 *
553 * int bpf_skb_change_type(skb, type) 791 * This interface can also be used with all encapsulation devices
554 * Change packet type of skb. 792 * that can operate in "collect metadata" mode: instead of having
555 * @skb: pointer to skb 793 * one network device per specific configuration, the "collect
556 * @type: new skb->pkt_type type 794 * metadata" mode only requires a single device where the
557 * Return: 0 on success or negative error 795 * configuration can be extracted from this helper.
558 * 796 *
559 * int bpf_skb_under_cgroup(skb, map, index) 797 * This can be used together with various tunnels such as VXLan,
560 * Check cgroup2 membership of skb 798 * Geneve, GRE or IP in IP (IPIP).
561 * @skb: pointer to skb 799 * Return
562 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type 800 * 0 on success, or a negative error in case of failure.
563 * @index: index of the cgroup in the bpf_map 801 *
564 * Return: 802 * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
565 * == 0 skb failed the cgroup2 descendant test 803 * Description
566 * == 1 skb succeeded the cgroup2 descendant test 804 * Populate tunnel metadata for packet associated to *skb.* The
567 * < 0 error 805 * tunnel metadata is set to the contents of *key*, of *size*. The
568 * 806 * *flags* can be set to a combination of the following values:
569 * u32 bpf_get_hash_recalc(skb) 807 *
570 * Retrieve and possibly recalculate skb->hash. 808 * **BPF_F_TUNINFO_IPV6**
571 * @skb: pointer to skb 809 * Indicate that the tunnel is based on IPv6 protocol
572 * Return: hash 810 * instead of IPv4.
811 * **BPF_F_ZERO_CSUM_TX**
812 * For IPv4 packets, add a flag to tunnel metadata
813 * indicating that checksum computation should be skipped
814 * and checksum set to zeroes.
815 * **BPF_F_DONT_FRAGMENT**
816 * Add a flag to tunnel metadata indicating that the
817 * packet should not be fragmented.
818 * **BPF_F_SEQ_NUMBER**
819 * Add a flag to tunnel metadata indicating that a
820 * sequence number should be added to tunnel header before
821 * sending the packet. This flag was added for GRE
822 * encapsulation, but might be used with other protocols
823 * as well in the future.
824 *
825 * Here is a typical usage on the transmit path:
826 *
827 * ::
828 *
829 * struct bpf_tunnel_key key;
830 * populate key ...
831 * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
832 * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
833 *
834 * See also the description of the **bpf_skb_get_tunnel_key**\ ()
835 * helper for additional information.
836 * Return
837 * 0 on success, or a negative error in case of failure.
838 *
839 * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
840 * Description
841 * Read the value of a perf event counter. This helper relies on a
842 * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
843 * the perf event counter is selected when *map* is updated with
844 * perf event file descriptors. The *map* is an array whose size
845 * is the number of available CPUs, and each cell contains a value
846 * relative to one CPU. The value to retrieve is indicated by
847 * *flags*, that contains the index of the CPU to look up, masked
848 * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
849 * **BPF_F_CURRENT_CPU** to indicate that the value for the
850 * current CPU should be retrieved.
851 *
852 * Note that before Linux 4.13, only hardware perf event can be
853 * retrieved.
854 *
855 * Also, be aware that the newer helper
856 * **bpf_perf_event_read_value**\ () is recommended over
857 * **bpf_perf_event_read**\ () in general. The latter has some ABI
858 * quirks where error and counter value are used as a return code
859 * (which is wrong to do since ranges may overlap). This issue is
860 * fixed with **bpf_perf_event_read_value**\ (), which at the same
861 * time provides more features over the **bpf_perf_event_read**\
862 * () interface. Please refer to the description of
863 * **bpf_perf_event_read_value**\ () for details.
864 * Return
865 * The value of the perf event counter read from the map, or a
866 * negative error code in case of failure.
867 *
868 * int bpf_redirect(u32 ifindex, u64 flags)
869 * Description
870 * Redirect the packet to another net device of index *ifindex*.
871 * This helper is somewhat similar to **bpf_clone_redirect**\
872 * (), except that the packet is not cloned, which provides
873 * increased performance.
874 *
875 * Except for XDP, both ingress and egress interfaces can be used
876 * for redirection. The **BPF_F_INGRESS** value in *flags* is used
877 * to make the distinction (ingress path is selected if the flag
878 * is present, egress path otherwise). Currently, XDP only
879 * supports redirection to the egress interface, and accepts no
880 * flag at all.
881 *
882 * The same effect can be attained with the more generic
883 * **bpf_redirect_map**\ (), which requires specific maps to be
884 * used but offers better performance.
885 * Return
886 * For XDP, the helper returns **XDP_REDIRECT** on success or
887 * **XDP_ABORTED** on error. For other program types, the values
888 * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
889 * error.
890 *
891 * u32 bpf_get_route_realm(struct sk_buff *skb)
892 * Description
893 * Retrieve the realm or the route, that is to say the
894 * **tclassid** field of the destination for the *skb*. The
895 * indentifier retrieved is a user-provided tag, similar to the
896 * one used with the net_cls cgroup (see description for
897 * **bpf_get_cgroup_classid**\ () helper), but here this tag is
898 * held by a route (a destination entry), not by a task.
899 *
900 * Retrieving this identifier works with the clsact TC egress hook
901 * (see also **tc-bpf(8)**), or alternatively on conventional
902 * classful egress qdiscs, but not on TC ingress path. In case of
903 * clsact TC egress hook, this has the advantage that, internally,
904 * the destination entry has not been dropped yet in the transmit
905 * path. Therefore, the destination entry does not need to be
906 * artificially held via **netif_keep_dst**\ () for a classful
907 * qdisc until the *skb* is freed.
908 *
909 * This helper is available only if the kernel was compiled with
910 * **CONFIG_IP_ROUTE_CLASSID** configuration option.
911 * Return
912 * The realm of the route for the packet associated to *skb*, or 0
913 * if none was found.
914 *
915 * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
916 * Description
917 * Write raw *data* blob into a special BPF perf event held by
918 * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
919 * event must have the following attributes: **PERF_SAMPLE_RAW**
920 * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
921 * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
922 *
923 * The *flags* are used to indicate the index in *map* for which
924 * the value must be put, masked with **BPF_F_INDEX_MASK**.
925 * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
926 * to indicate that the index of the current CPU core should be
927 * used.
928 *
929 * The value to write, of *size*, is passed through eBPF stack and
930 * pointed by *data*.
931 *
932 * The context of the program *ctx* needs also be passed to the
933 * helper.
934 *
935 * On user space, a program willing to read the values needs to
936 * call **perf_event_open**\ () on the perf event (either for
937 * one or for all CPUs) and to store the file descriptor into the
938 * *map*. This must be done before the eBPF program can send data
939 * into it. An example is available in file
940 * *samples/bpf/trace_output_user.c* in the Linux kernel source
941 * tree (the eBPF program counterpart is in
942 * *samples/bpf/trace_output_kern.c*).
943 *
944 * **bpf_perf_event_output**\ () achieves better performance
945 * than **bpf_trace_printk**\ () for sharing data with user
946 * space, and is much better suitable for streaming data from eBPF
947 * programs.
948 *
949 * Note that this helper is not restricted to tracing use cases
950 * and can be used with programs attached to TC or XDP as well,
951 * where it allows for passing data to user space listeners. Data
952 * can be:
953 *
954 * * Only custom structs,
955 * * Only the packet payload, or
956 * * A combination of both.
957 * Return
958 * 0 on success, or a negative error in case of failure.
959 *
960 * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
961 * Description
962 * This helper was provided as an easy way to load data from a
963 * packet. It can be used to load *len* bytes from *offset* from
964 * the packet associated to *skb*, into the buffer pointed by
965 * *to*.
966 *
967 * Since Linux 4.7, usage of this helper has mostly been replaced
968 * by "direct packet access", enabling packet data to be
969 * manipulated with *skb*\ **->data** and *skb*\ **->data_end**
970 * pointing respectively to the first byte of packet data and to
971 * the byte after the last byte of packet data. However, it
972 * remains useful if one wishes to read large quantities of data
973 * at once from a packet into the eBPF stack.
974 * Return
975 * 0 on success, or a negative error in case of failure.
976 *
977 * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
978 * Description
979 * Walk a user or a kernel stack and return its id. To achieve
980 * this, the helper needs *ctx*, which is a pointer to the context
981 * on which the tracing program is executed, and a pointer to a
982 * *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
983 *
984 * The last argument, *flags*, holds the number of stack frames to
985 * skip (from 0 to 255), masked with
986 * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
987 * a combination of the following flags:
988 *
989 * **BPF_F_USER_STACK**
990 * Collect a user space stack instead of a kernel stack.
991 * **BPF_F_FAST_STACK_CMP**
992 * Compare stacks by hash only.
993 * **BPF_F_REUSE_STACKID**
994 * If two different stacks hash into the same *stackid*,
995 * discard the old one.
996 *
997 * The stack id retrieved is a 32 bit long integer handle which
998 * can be further combined with other data (including other stack
999 * ids) and used as a key into maps. This can be useful for
1000 * generating a variety of graphs (such as flame graphs or off-cpu
1001 * graphs).
1002 *
1003 * For walking a stack, this helper is an improvement over
1004 * **bpf_probe_read**\ (), which can be used with unrolled loops
1005 * but is not efficient and consumes a lot of eBPF instructions.
1006 * Instead, **bpf_get_stackid**\ () can collect up to
1007 * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
1008 * this limit can be controlled with the **sysctl** program, and
1009 * that it should be manually increased in order to profile long
1010 * user stacks (such as stacks for Java programs). To do so, use:
1011 *
1012 * ::
1013 *
1014 * # sysctl kernel.perf_event_max_stack=<new value>
1015 * Return
1016 * The positive or null stack id on success, or a negative error
1017 * in case of failure.
1018 *
1019 * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
1020 * Description
1021 * Compute a checksum difference, from the raw buffer pointed by
1022 * *from*, of length *from_size* (that must be a multiple of 4),
1023 * towards the raw buffer pointed by *to*, of size *to_size*
1024 * (same remark). An optional *seed* can be added to the value
1025 * (this can be cascaded, the seed may come from a previous call
1026 * to the helper).
1027 *
1028 * This is flexible enough to be used in several ways:
1029 *
1030 * * With *from_size* == 0, *to_size* > 0 and *seed* set to
1031 * checksum, it can be used when pushing new data.
1032 * * With *from_size* > 0, *to_size* == 0 and *seed* set to
1033 * checksum, it can be used when removing data from a packet.
1034 * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
1035 * can be used to compute a diff. Note that *from_size* and
1036 * *to_size* do not need to be equal.
1037 *
1038 * This helper can be used in combination with
1039 * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
1040 * which one can feed in the difference computed with
1041 * **bpf_csum_diff**\ ().
1042 * Return
1043 * The checksum result, or a negative error code in case of
1044 * failure.
1045 *
1046 * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
1047 * Description
1048 * Retrieve tunnel options metadata for the packet associated to
1049 * *skb*, and store the raw tunnel option data to the buffer *opt*
1050 * of *size*.
1051 *
1052 * This helper can be used with encapsulation devices that can
1053 * operate in "collect metadata" mode (please refer to the related
1054 * note in the description of **bpf_skb_get_tunnel_key**\ () for
1055 * more details). A particular example where this can be used is
1056 * in combination with the Geneve encapsulation protocol, where it
1057 * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
1058 * and retrieving arbitrary TLVs (Type-Length-Value headers) from
1059 * the eBPF program. This allows for full customization of these
1060 * headers.
1061 * Return
1062 * The size of the option data retrieved.
1063 *
1064 * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
1065 * Description
1066 * Set tunnel options metadata for the packet associated to *skb*
1067 * to the option data contained in the raw buffer *opt* of *size*.
1068 *
1069 * See also the description of the **bpf_skb_get_tunnel_opt**\ ()
1070 * helper for additional information.
1071 * Return
1072 * 0 on success, or a negative error in case of failure.
1073 *
1074 * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
1075 * Description
1076 * Change the protocol of the *skb* to *proto*. Currently
1077 * supported are transition from IPv4 to IPv6, and from IPv6 to
1078 * IPv4. The helper takes care of the groundwork for the
1079 * transition, including resizing the socket buffer. The eBPF
1080 * program is expected to fill the new headers, if any, via
1081 * **skb_store_bytes**\ () and to recompute the checksums with
1082 * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
1083 * (). The main case for this helper is to perform NAT64
1084 * operations out of an eBPF program.
1085 *
1086 * Internally, the GSO type is marked as dodgy so that headers are
1087 * checked and segments are recalculated by the GSO/GRO engine.
1088 * The size for GSO target is adapted as well.
1089 *
1090 * All values for *flags* are reserved for future usage, and must
1091 * be left at zero.
1092 *
1093 * A call to this helper is susceptible to change the underlaying
1094 * packet buffer. Therefore, at load time, all checks on pointers
1095 * previously done by the verifier are invalidated and must be
1096 * performed again, if the helper is used in combination with
1097 * direct packet access.
1098 * Return
1099 * 0 on success, or a negative error in case of failure.
1100 *
1101 * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
1102 * Description
1103 * Change the packet type for the packet associated to *skb*. This
1104 * comes down to setting *skb*\ **->pkt_type** to *type*, except
1105 * the eBPF program does not have a write access to *skb*\
1106 * **->pkt_type** beside this helper. Using a helper here allows
1107 * for graceful handling of errors.
1108 *
1109 * The major use case is to change incoming *skb*s to
1110 * **PACKET_HOST** in a programmatic way instead of having to
1111 * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
1112 * example.
1113 *
1114 * Note that *type* only allows certain values. At this time, they
1115 * are:
1116 *
1117 * **PACKET_HOST**
1118 * Packet is for us.
1119 * **PACKET_BROADCAST**
1120 * Send packet to all.
1121 * **PACKET_MULTICAST**
1122 * Send packet to group.
1123 * **PACKET_OTHERHOST**
1124 * Send packet to someone else.
1125 * Return
1126 * 0 on success, or a negative error in case of failure.
1127 *
1128 * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
1129 * Description
1130 * Check whether *skb* is a descendant of the cgroup2 held by
1131 * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
1132 * Return
1133 * The return value depends on the result of the test, and can be:
1134 *
1135 * * 0, if the *skb* failed the cgroup2 descendant test.
1136 * * 1, if the *skb* succeeded the cgroup2 descendant test.
1137 * * A negative error code, if an error occurred.
1138 *
1139 * u32 bpf_get_hash_recalc(struct sk_buff *skb)
1140 * Description
1141 * Retrieve the hash of the packet, *skb*\ **->hash**. If it is
1142 * not set, in particular if the hash was cleared due to mangling,
1143 * recompute this hash. Later accesses to the hash can be done
1144 * directly with *skb*\ **->hash**.
1145 *
1146 * Calling **bpf_set_hash_invalid**\ (), changing a packet
1147 * prototype with **bpf_skb_change_proto**\ (), or calling
1148 * **bpf_skb_store_bytes**\ () with the
1149 * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
1150 * the hash and to trigger a new computation for the next call to
1151 * **bpf_get_hash_recalc**\ ().
1152 * Return
1153 * The 32-bit hash.
573 * 1154 *
574 * u64 bpf_get_current_task(void) 1155 * u64 bpf_get_current_task(void)
575 * Returns current task_struct 1156 * Return
576 * Return: current 1157 * A pointer to the current task struct.
577 * 1158 *
578 * int bpf_probe_write_user(void *dst, void *src, int len) 1159 * int bpf_probe_write_user(void *dst, const void *src, u32 len)
579 * safely attempt to write to a location 1160 * Description
580 * @dst: destination address in userspace 1161 * Attempt in a safe way to write *len* bytes from the buffer
581 * @src: source address on stack 1162 * *src* to *dst* in memory. It only works for threads that are in
582 * @len: number of bytes to copy 1163 * user context, and *dst* must be a valid user space address.
583 * Return: 0 on success or negative error 1164 *
584 * 1165 * This helper should not be used to implement any kind of
585 * int bpf_current_task_under_cgroup(map, index) 1166 * security mechanism because of TOC-TOU attacks, but rather to
586 * Check cgroup2 membership of current task 1167 * debug, divert, and manipulate execution of semi-cooperative
587 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type 1168 * processes.
588 * @index: index of the cgroup in the bpf_map 1169 *
589 * Return: 1170 * Keep in mind that this feature is meant for experiments, and it
590 * == 0 current failed the cgroup2 descendant test 1171 * has a risk of crashing the system and running programs.
591 * == 1 current succeeded the cgroup2 descendant test 1172 * Therefore, when an eBPF program using this helper is attached,
592 * < 0 error 1173 * a warning including PID and process name is printed to kernel
593 * 1174 * logs.
594 * int bpf_skb_change_tail(skb, len, flags) 1175 * Return
595 * The helper will resize the skb to the given new size, to be used f.e. 1176 * 0 on success, or a negative error in case of failure.
596 * with control messages. 1177 *
597 * @skb: pointer to skb 1178 * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
598 * @len: new skb length 1179 * Description
599 * @flags: reserved 1180 * Check whether the probe is being run is the context of a given
600 * Return: 0 on success or negative error 1181 * subset of the cgroup2 hierarchy. The cgroup2 to test is held by
601 * 1182 * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
602 * int bpf_skb_pull_data(skb, len) 1183 * Return
603 * The helper will pull in non-linear data in case the skb is non-linear 1184 * The return value depends on the result of the test, and can be:
604 * and not all of len are part of the linear section. Only needed for 1185 *
605 * read/write with direct packet access. 1186 * * 0, if the *skb* task belongs to the cgroup2.
606 * @skb: pointer to skb 1187 * * 1, if the *skb* task does not belong to the cgroup2.
607 * @len: len to make read/writeable 1188 * * A negative error code, if an error occurred.
608 * Return: 0 on success or negative error 1189 *
609 * 1190 * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
610 * s64 bpf_csum_update(skb, csum) 1191 * Description
611 * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. 1192 * Resize (trim or grow) the packet associated to *skb* to the
612 * @skb: pointer to skb 1193 * new *len*. The *flags* are reserved for future usage, and must
613 * @csum: csum to add 1194 * be left at zero.
614 * Return: csum on success or negative error 1195 *
615 * 1196 * The basic idea is that the helper performs the needed work to
616 * void bpf_set_hash_invalid(skb) 1197 * change the size of the packet, then the eBPF program rewrites
617 * Invalidate current skb->hash. 1198 * the rest via helpers like **bpf_skb_store_bytes**\ (),
618 * @skb: pointer to skb 1199 * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
619 * 1200 * and others. This helper is a slow path utility intended for
620 * int bpf_get_numa_node_id() 1201 * replies with control messages. And because it is targeted for
621 * Return: Id of current NUMA node. 1202 * slow path, the helper itself can afford to be slow: it
622 * 1203 * implicitly linearizes, unclones and drops offloads from the
623 * int bpf_skb_change_head() 1204 * *skb*.
624 * Grows headroom of skb and adjusts MAC header offset accordingly. 1205 *
625 * Will extends/reallocae as required automatically. 1206 * A call to this helper is susceptible to change the underlaying
626 * May change skb data pointer and will thus invalidate any check 1207 * packet buffer. Therefore, at load time, all checks on pointers
627 * performed for direct packet access. 1208 * previously done by the verifier are invalidated and must be
628 * @skb: pointer to skb 1209 * performed again, if the helper is used in combination with
629 * @len: length of header to be pushed in front 1210 * direct packet access.
630 * @flags: Flags (unused for now) 1211 * Return
631 * Return: 0 on success or negative error 1212 * 0 on success, or a negative error in case of failure.
632 * 1213 *
633 * int bpf_xdp_adjust_head(xdp_md, delta) 1214 * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
634 * Adjust the xdp_md.data by delta 1215 * Description
635 * @xdp_md: pointer to xdp_md 1216 * Pull in non-linear data in case the *skb* is non-linear and not
636 * @delta: An positive/negative integer to be added to xdp_md.data 1217 * all of *len* are part of the linear section. Make *len* bytes
637 * Return: 0 on success or negative on error 1218 * from *skb* readable and writable. If a zero value is passed for
1219 * *len*, then the whole length of the *skb* is pulled.
1220 *
1221 * This helper is only needed for reading and writing with direct
1222 * packet access.
1223 *
1224 * For direct packet access, testing that offsets to access
1225 * are within packet boundaries (test on *skb*\ **->data_end**) is
1226 * susceptible to fail if offsets are invalid, or if the requested
1227 * data is in non-linear parts of the *skb*. On failure the
1228 * program can just bail out, or in the case of a non-linear
1229 * buffer, use a helper to make the data available. The
1230 * **bpf_skb_load_bytes**\ () helper is a first solution to access
1231 * the data. Another one consists in using **bpf_skb_pull_data**
1232 * to pull in once the non-linear parts, then retesting and
1233 * eventually access the data.
1234 *
1235 * At the same time, this also makes sure the *skb* is uncloned,
1236 * which is a necessary condition for direct write. As this needs
1237 * to be an invariant for the write part only, the verifier
1238 * detects writes and adds a prologue that is calling
1239 * **bpf_skb_pull_data()** to effectively unclone the *skb* from
1240 * the very beginning in case it is indeed cloned.
1241 *
1242 * A call to this helper is susceptible to change the underlaying
1243 * packet buffer. Therefore, at load time, all checks on pointers
1244 * previously done by the verifier are invalidated and must be
1245 * performed again, if the helper is used in combination with
1246 * direct packet access.
1247 * Return
1248 * 0 on success, or a negative error in case of failure.
1249 *
1250 * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
1251 * Description
1252 * Add the checksum *csum* into *skb*\ **->csum** in case the
1253 * driver has supplied a checksum for the entire packet into that
1254 * field. Return an error otherwise. This helper is intended to be
1255 * used in combination with **bpf_csum_diff**\ (), in particular
1256 * when the checksum needs to be updated after data has been
1257 * written into the packet through direct packet access.
1258 * Return
1259 * The checksum on success, or a negative error code in case of
1260 * failure.
1261 *
1262 * void bpf_set_hash_invalid(struct sk_buff *skb)
1263 * Description
1264 * Invalidate the current *skb*\ **->hash**. It can be used after
1265 * mangling on headers through direct packet access, in order to
1266 * indicate that the hash is outdated and to trigger a
1267 * recalculation the next time the kernel tries to access this
1268 * hash or when the **bpf_get_hash_recalc**\ () helper is called.
1269 *
1270 * int bpf_get_numa_node_id(void)
1271 * Description
1272 * Return the id of the current NUMA node. The primary use case
1273 * for this helper is the selection of sockets for the local NUMA
1274 * node, when the program is attached to sockets using the
1275 * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
1276 * but the helper is also available to other eBPF program types,
1277 * similarly to **bpf_get_smp_processor_id**\ ().
1278 * Return
1279 * The id of current NUMA node.
1280 *
1281 * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
1282 * Description
1283 * Grows headroom of packet associated to *skb* and adjusts the
1284 * offset of the MAC header accordingly, adding *len* bytes of
1285 * space. It automatically extends and reallocates memory as
1286 * required.
1287 *
1288 * This helper can be used on a layer 3 *skb* to push a MAC header
1289 * for redirection into a layer 2 device.
1290 *
1291 * All values for *flags* are reserved for future usage, and must
1292 * be left at zero.
1293 *
1294 * A call to this helper is susceptible to change the underlaying
1295 * packet buffer. Therefore, at load time, all checks on pointers
1296 * previously done by the verifier are invalidated and must be
1297 * performed again, if the helper is used in combination with
1298 * direct packet access.
1299 * Return
1300 * 0 on success, or a negative error in case of failure.
1301 *
1302 * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
1303 * Description
1304 * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
1305 * it is possible to use a negative value for *delta*. This helper
1306 * can be used to prepare the packet for pushing or popping
1307 * headers.
1308 *
1309 * A call to this helper is susceptible to change the underlaying
1310 * packet buffer. Therefore, at load time, all checks on pointers
1311 * previously done by the verifier are invalidated and must be
1312 * performed again, if the helper is used in combination with
1313 * direct packet access.
1314 * Return
1315 * 0 on success, or a negative error in case of failure.
638 * 1316 *
639 * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr) 1317 * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
640 * Copy a NUL terminated string from unsafe address. In case the string 1318 * Description
641 * length is smaller than size, the target is not padded with further NUL 1319 * Copy a NUL terminated string from an unsafe address
642 * bytes. In case the string length is larger than size, just count-1 1320 * *unsafe_ptr* to *dst*. The *size* should include the
643 * bytes are copied and the last byte is set to NUL. 1321 * terminating NUL byte. In case the string length is smaller than
644 * @dst: destination address 1322 * *size*, the target is not padded with further NUL bytes. If the
645 * @size: maximum number of bytes to copy, including the trailing NUL 1323 * string length is larger than *size*, just *size*-1 bytes are
646 * @unsafe_ptr: unsafe address 1324 * copied and the last byte is set to NUL.
647 * Return: 1325 *
648 * > 0 length of the string including the trailing NUL on success 1326 * On success, the length of the copied string is returned. This
649 * < 0 error 1327 * makes this helper useful in tracing programs for reading
650 * 1328 * strings, and more importantly to get its length at runtime. See
651 * u64 bpf_get_socket_cookie(skb) 1329 * the following snippet:
652 * Get the cookie for the socket stored inside sk_buff. 1330 *
653 * @skb: pointer to skb 1331 * ::
654 * Return: 8 Bytes non-decreasing number on success or 0 if the socket 1332 *
655 * field is missing inside sk_buff 1333 * SEC("kprobe/sys_open")
656 * 1334 * void bpf_sys_open(struct pt_regs *ctx)
657 * u32 bpf_get_socket_uid(skb) 1335 * {
658 * Get the owner uid of the socket stored inside sk_buff. 1336 * char buf[PATHLEN]; // PATHLEN is defined to 256
659 * @skb: pointer to skb 1337 * int res = bpf_probe_read_str(buf, sizeof(buf),
660 * Return: uid of the socket owner on success or overflowuid if failed. 1338 * ctx->di);
661 * 1339 *
662 * u32 bpf_set_hash(skb, hash) 1340 * // Consume buf, for example push it to
663 * Set full skb->hash. 1341 * // userspace via bpf_perf_event_output(); we
664 * @skb: pointer to skb 1342 * // can use res (the string length) as event
665 * @hash: hash to set 1343 * // size, after checking its boundaries.
666 * 1344 * }
667 * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen) 1345 *
668 * Calls setsockopt. Not all opts are available, only those with 1346 * In comparison, using **bpf_probe_read()** helper here instead
669 * integer optvals plus TCP_CONGESTION. 1347 * to read the string would require to estimate the length at
670 * Supported levels: SOL_SOCKET and IPPROTO_TCP 1348 * compile time, and would often result in copying more memory
671 * @bpf_socket: pointer to bpf_socket 1349 * than necessary.
672 * @level: SOL_SOCKET or IPPROTO_TCP 1350 *
673 * @optname: option name 1351 * Another useful use case is when parsing individual process
674 * @optval: pointer to option value 1352 * arguments or individual environment variables navigating
675 * @optlen: length of optval in bytes 1353 * *current*\ **->mm->arg_start** and *current*\
676 * Return: 0 or negative error 1354 * **->mm->env_start**: using this helper and the return value,
677 * 1355 * one can quickly iterate at the right offset of the memory area.
678 * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen) 1356 * Return
679 * Calls getsockopt. Not all opts are available. 1357 * On success, the strictly positive length of the string,
680 * Supported levels: IPPROTO_TCP 1358 * including the trailing NUL character. On error, a negative
681 * @bpf_socket: pointer to bpf_socket 1359 * value.
682 * @level: IPPROTO_TCP 1360 *
683 * @optname: option name 1361 * u64 bpf_get_socket_cookie(struct sk_buff *skb)
684 * @optval: pointer to option value 1362 * Description
685 * @optlen: length of optval in bytes 1363 * If the **struct sk_buff** pointed by *skb* has a known socket,
686 * Return: 0 or negative error 1364 * retrieve the cookie (generated by the kernel) of this socket.
687 * 1365 * If no cookie has been set yet, generate a new cookie. Once
688 * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags) 1366 * generated, the socket cookie remains stable for the life of the
689 * Set callback flags for sock_ops 1367 * socket. This helper can be useful for monitoring per socket
690 * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct 1368 * networking traffic statistics as it provides a unique socket
691 * @flags: flags value 1369 * identifier per namespace.
692 * Return: 0 for no error 1370 * Return
693 * -EINVAL if there is no full tcp socket 1371 * A 8-byte long non-decreasing number on success, or 0 if the
694 * bits in flags that are not supported by current kernel 1372 * socket field is missing inside *skb*.
695 * 1373 *
696 * int bpf_skb_adjust_room(skb, len_diff, mode, flags) 1374 * u32 bpf_get_socket_uid(struct sk_buff *skb)
697 * Grow or shrink room in sk_buff. 1375 * Return
698 * @skb: pointer to skb 1376 * The owner UID of the socket associated to *skb*. If the socket
699 * @len_diff: (signed) amount of room to grow/shrink 1377 * is **NULL**, or if it is not a full socket (i.e. if it is a
700 * @mode: operation mode (enum bpf_adj_room_mode) 1378 * time-wait or a request socket instead), **overflowuid** value
701 * @flags: reserved for future use 1379 * is returned (note that **overflowuid** might also be the actual
702 * Return: 0 on success or negative error code 1380 * UID value for the socket).
703 * 1381 *
704 * int bpf_sk_redirect_map(map, key, flags) 1382 * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
705 * Redirect skb to a sock in map using key as a lookup key for the 1383 * Description
706 * sock in map. 1384 * Set the full hash for *skb* (set the field *skb*\ **->hash**)
707 * @map: pointer to sockmap 1385 * to value *hash*.
708 * @key: key to lookup sock in map 1386 * Return
709 * @flags: reserved for future use 1387 * 0
710 * Return: SK_PASS 1388 *
711 * 1389 * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
712 * int bpf_sock_map_update(skops, map, key, flags) 1390 * Description
713 * @skops: pointer to bpf_sock_ops 1391 * Emulate a call to **setsockopt()** on the socket associated to
714 * @map: pointer to sockmap to update 1392 * *bpf_socket*, which must be a full socket. The *level* at
715 * @key: key to insert/update sock in map 1393 * which the option resides and the name *optname* of the option
716 * @flags: same flags as map update elem 1394 * must be specified, see **setsockopt(2)** for more information.
717 * 1395 * The option value of length *optlen* is pointed by *optval*.
718 * int bpf_xdp_adjust_meta(xdp_md, delta) 1396 *
719 * Adjust the xdp_md.data_meta by delta 1397 * This helper actually implements a subset of **setsockopt()**.
720 * @xdp_md: pointer to xdp_md 1398 * It supports the following *level*\ s:
721 * @delta: An positive/negative integer to be added to xdp_md.data_meta 1399 *
722 * Return: 0 on success or negative on error 1400 * * **SOL_SOCKET**, which supports the following *optname*\ s:
723 * 1401 * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
724 * int bpf_perf_event_read_value(map, flags, buf, buf_size) 1402 * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
725 * read perf event counter value and perf event enabled/running time 1403 * * **IPPROTO_TCP**, which supports the following *optname*\ s:
726 * @map: pointer to perf_event_array map 1404 * **TCP_CONGESTION**, **TCP_BPF_IW**,
727 * @flags: index of event in the map or bitmask flags 1405 * **TCP_BPF_SNDCWND_CLAMP**.
728 * @buf: buf to fill 1406 * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
729 * @buf_size: size of the buf 1407 * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
730 * Return: 0 on success or negative error code 1408 * Return
731 * 1409 * 0 on success, or a negative error in case of failure.
732 * int bpf_perf_prog_read_value(ctx, buf, buf_size) 1410 *
733 * read perf prog attached perf event counter and enabled/running time 1411 * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
734 * @ctx: pointer to ctx 1412 * Description
735 * @buf: buf to fill 1413 * Grow or shrink the room for data in the packet associated to
736 * @buf_size: size of the buf 1414 * *skb* by *len_diff*, and according to the selected *mode*.
737 * Return : 0 on success or negative error code 1415 *
738 * 1416 * There is a single supported mode at this time:
739 * int bpf_override_return(pt_regs, rc) 1417 *
740 * @pt_regs: pointer to struct pt_regs 1418 * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
741 * @rc: the return value to set 1419 * (room space is added or removed below the layer 3 header).
742 * 1420 *
743 * int bpf_msg_redirect_map(map, key, flags) 1421 * All values for *flags* are reserved for future usage, and must
744 * Redirect msg to a sock in map using key as a lookup key for the 1422 * be left at zero.
745 * sock in map. 1423 *
746 * @map: pointer to sockmap 1424 * A call to this helper is susceptible to change the underlaying
747 * @key: key to lookup sock in map 1425 * packet buffer. Therefore, at load time, all checks on pointers
748 * @flags: reserved for future use 1426 * previously done by the verifier are invalidated and must be
749 * Return: SK_PASS 1427 * performed again, if the helper is used in combination with
750 * 1428 * direct packet access.
751 * int bpf_bind(ctx, addr, addr_len) 1429 * Return
752 * Bind socket to address. Only binding to IP is supported, no port can be 1430 * 0 on success, or a negative error in case of failure.
753 * set in addr. 1431 *
754 * @ctx: pointer to context of type bpf_sock_addr 1432 * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
755 * @addr: pointer to struct sockaddr to bind socket to 1433 * Description
756 * @addr_len: length of sockaddr structure 1434 * Redirect the packet to the endpoint referenced by *map* at
757 * Return: 0 on success or negative error code 1435 * index *key*. Depending on its type, this *map* can contain
1436 * references to net devices (for forwarding packets through other
1437 * ports), or to CPUs (for redirecting XDP frames to another CPU;
1438 * but this is only implemented for native XDP (with driver
1439 * support) as of this writing).
1440 *
1441 * All values for *flags* are reserved for future usage, and must
1442 * be left at zero.
1443 *
1444 * When used to redirect packets to net devices, this helper
1445 * provides a high performance increase over **bpf_redirect**\ ().
1446 * This is due to various implementation details of the underlying
1447 * mechanisms, one of which is the fact that **bpf_redirect_map**\
1448 * () tries to send packet as a "bulk" to the device.
1449 * Return
1450 * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
1451 *
1452 * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
1453 * Description
1454 * Redirect the packet to the socket referenced by *map* (of type
1455 * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
1456 * egress interfaces can be used for redirection. The
1457 * **BPF_F_INGRESS** value in *flags* is used to make the
1458 * distinction (ingress path is selected if the flag is present,
1459 * egress path otherwise). This is the only flag supported for now.
1460 * Return
1461 * **SK_PASS** on success, or **SK_DROP** on error.
1462 *
1463 * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
1464 * Description
1465 * Add an entry to, or update a *map* referencing sockets. The
1466 * *skops* is used as a new value for the entry associated to
1467 * *key*. *flags* is one of:
1468 *
1469 * **BPF_NOEXIST**
1470 * The entry for *key* must not exist in the map.
1471 * **BPF_EXIST**
1472 * The entry for *key* must already exist in the map.
1473 * **BPF_ANY**
1474 * No condition on the existence of the entry for *key*.
1475 *
1476 * If the *map* has eBPF programs (parser and verdict), those will
1477 * be inherited by the socket being added. If the socket is
1478 * already attached to eBPF programs, this results in an error.
1479 * Return
1480 * 0 on success, or a negative error in case of failure.
1481 *
1482 * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
1483 * Description
1484 * Adjust the address pointed by *xdp_md*\ **->data_meta** by
1485 * *delta* (which can be positive or negative). Note that this
1486 * operation modifies the address stored in *xdp_md*\ **->data**,
1487 * so the latter must be loaded only after the helper has been
1488 * called.
1489 *
1490 * The use of *xdp_md*\ **->data_meta** is optional and programs
1491 * are not required to use it. The rationale is that when the
1492 * packet is processed with XDP (e.g. as DoS filter), it is
1493 * possible to push further meta data along with it before passing
1494 * to the stack, and to give the guarantee that an ingress eBPF
1495 * program attached as a TC classifier on the same device can pick
1496 * this up for further post-processing. Since TC works with socket
1497 * buffers, it remains possible to set from XDP the **mark** or
1498 * **priority** pointers, or other pointers for the socket buffer.
1499 * Having this scratch space generic and programmable allows for
1500 * more flexibility as the user is free to store whatever meta
1501 * data they need.
1502 *
1503 * A call to this helper is susceptible to change the underlaying
1504 * packet buffer. Therefore, at load time, all checks on pointers
1505 * previously done by the verifier are invalidated and must be
1506 * performed again, if the helper is used in combination with
1507 * direct packet access.
1508 * Return
1509 * 0 on success, or a negative error in case of failure.
1510 *
1511 * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
1512 * Description
1513 * Read the value of a perf event counter, and store it into *buf*
1514 * of size *buf_size*. This helper relies on a *map* of type
1515 * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
1516 * counter is selected when *map* is updated with perf event file
1517 * descriptors. The *map* is an array whose size is the number of
1518 * available CPUs, and each cell contains a value relative to one
1519 * CPU. The value to retrieve is indicated by *flags*, that
1520 * contains the index of the CPU to look up, masked with
1521 * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
1522 * **BPF_F_CURRENT_CPU** to indicate that the value for the
1523 * current CPU should be retrieved.
1524 *
1525 * This helper behaves in a way close to
1526 * **bpf_perf_event_read**\ () helper, save that instead of
1527 * just returning the value observed, it fills the *buf*
1528 * structure. This allows for additional data to be retrieved: in
1529 * particular, the enabled and running times (in *buf*\
1530 * **->enabled** and *buf*\ **->running**, respectively) are
1531 * copied. In general, **bpf_perf_event_read_value**\ () is
1532 * recommended over **bpf_perf_event_read**\ (), which has some
1533 * ABI issues and provides fewer functionalities.
1534 *
1535 * These values are interesting, because hardware PMU (Performance
1536 * Monitoring Unit) counters are limited resources. When there are
1537 * more PMU based perf events opened than available counters,
1538 * kernel will multiplex these events so each event gets certain
1539 * percentage (but not all) of the PMU time. In case that
1540 * multiplexing happens, the number of samples or counter value
1541 * will not reflect the case compared to when no multiplexing
1542 * occurs. This makes comparison between different runs difficult.
1543 * Typically, the counter value should be normalized before
1544 * comparing to other experiments. The usual normalization is done
1545 * as follows.
1546 *
1547 * ::
1548 *
1549 * normalized_counter = counter * t_enabled / t_running
1550 *
1551 * Where t_enabled is the time enabled for event and t_running is
1552 * the time running for event since last normalization. The
1553 * enabled and running times are accumulated since the perf event
1554 * open. To achieve scaling factor between two invocations of an
1555 * eBPF program, users can can use CPU id as the key (which is
1556 * typical for perf array usage model) to remember the previous
1557 * value and do the calculation inside the eBPF program.
1558 * Return
1559 * 0 on success, or a negative error in case of failure.
1560 *
1561 * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
1562 * Description
1563 * For en eBPF program attached to a perf event, retrieve the
1564 * value of the event counter associated to *ctx* and store it in
1565 * the structure pointed by *buf* and of size *buf_size*. Enabled
1566 * and running times are also stored in the structure (see
1567 * description of helper **bpf_perf_event_read_value**\ () for
1568 * more details).
1569 * Return
1570 * 0 on success, or a negative error in case of failure.
1571 *
1572 * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
1573 * Description
1574 * Emulate a call to **getsockopt()** on the socket associated to
1575 * *bpf_socket*, which must be a full socket. The *level* at
1576 * which the option resides and the name *optname* of the option
1577 * must be specified, see **getsockopt(2)** for more information.
1578 * The retrieved value is stored in the structure pointed by
1579 * *opval* and of length *optlen*.
1580 *
1581 * This helper actually implements a subset of **getsockopt()**.
1582 * It supports the following *level*\ s:
1583 *
1584 * * **IPPROTO_TCP**, which supports *optname*
1585 * **TCP_CONGESTION**.
1586 * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
1587 * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
1588 * Return
1589 * 0 on success, or a negative error in case of failure.
1590 *
1591 * int bpf_override_return(struct pt_reg *regs, u64 rc)
1592 * Description
1593 * Used for error injection, this helper uses kprobes to override
1594 * the return value of the probed function, and to set it to *rc*.
1595 * The first argument is the context *regs* on which the kprobe
1596 * works.
1597 *
1598 * This helper works by setting setting the PC (program counter)
1599 * to an override function which is run in place of the original
1600 * probed function. This means the probed function is not run at
1601 * all. The replacement function just returns with the required
1602 * value.
1603 *
1604 * This helper has security implications, and thus is subject to
1605 * restrictions. It is only available if the kernel was compiled
1606 * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
1607 * option, and in this case it only works on functions tagged with
1608 * **ALLOW_ERROR_INJECTION** in the kernel code.
1609 *
1610 * Also, the helper is only available for the architectures having
1611 * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
1612 * x86 architecture is the only one to support this feature.
1613 * Return
1614 * 0
1615 *
1616 * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
1617 * Description
1618 * Attempt to set the value of the **bpf_sock_ops_cb_flags** field
1619 * for the full TCP socket associated to *bpf_sock_ops* to
1620 * *argval*.
1621 *
1622 * The primary use of this field is to determine if there should
1623 * be calls to eBPF programs of type
1624 * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
1625 * code. A program of the same type can change its value, per
1626 * connection and as necessary, when the connection is
1627 * established. This field is directly accessible for reading, but
1628 * this helper must be used for updates in order to return an
1629 * error if an eBPF program tries to set a callback that is not
1630 * supported in the current kernel.
1631 *
1632 * The supported callback values that *argval* can combine are:
1633 *
1634 * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
1635 * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
1636 * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
1637 *
1638 * Here are some examples of where one could call such eBPF
1639 * program:
1640 *
1641 * * When RTO fires.
1642 * * When a packet is retransmitted.
1643 * * When the connection terminates.
1644 * * When a packet is sent.
1645 * * When a packet is received.
1646 * Return
1647 * Code **-EINVAL** if the socket is not a full TCP socket;
1648 * otherwise, a positive number containing the bits that could not
1649 * be set is returned (which comes down to 0 if all bits were set
1650 * as required).
1651 *
1652 * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
1653 * Description
1654 * This helper is used in programs implementing policies at the
1655 * socket level. If the message *msg* is allowed to pass (i.e. if
1656 * the verdict eBPF program returns **SK_PASS**), redirect it to
1657 * the socket referenced by *map* (of type
1658 * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
1659 * egress interfaces can be used for redirection. The
1660 * **BPF_F_INGRESS** value in *flags* is used to make the
1661 * distinction (ingress path is selected if the flag is present,
1662 * egress path otherwise). This is the only flag supported for now.
1663 * Return
1664 * **SK_PASS** on success, or **SK_DROP** on error.
1665 *
1666 * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
1667 * Description
1668 * For socket policies, apply the verdict of the eBPF program to
1669 * the next *bytes* (number of bytes) of message *msg*.
1670 *
1671 * For example, this helper can be used in the following cases:
1672 *
1673 * * A single **sendmsg**\ () or **sendfile**\ () system call
1674 * contains multiple logical messages that the eBPF program is
1675 * supposed to read and for which it should apply a verdict.
1676 * * An eBPF program only cares to read the first *bytes* of a
1677 * *msg*. If the message has a large payload, then setting up
1678 * and calling the eBPF program repeatedly for all bytes, even
1679 * though the verdict is already known, would create unnecessary
1680 * overhead.
1681 *
1682 * When called from within an eBPF program, the helper sets a
1683 * counter internal to the BPF infrastructure, that is used to
1684 * apply the last verdict to the next *bytes*. If *bytes* is
1685 * smaller than the current data being processed from a
1686 * **sendmsg**\ () or **sendfile**\ () system call, the first
1687 * *bytes* will be sent and the eBPF program will be re-run with
1688 * the pointer for start of data pointing to byte number *bytes*
1689 * **+ 1**. If *bytes* is larger than the current data being
1690 * processed, then the eBPF verdict will be applied to multiple
1691 * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
1692 * consumed.
1693 *
1694 * Note that if a socket closes with the internal counter holding
1695 * a non-zero value, this is not a problem because data is not
1696 * being buffered for *bytes* and is sent as it is received.
1697 * Return
1698 * 0
1699 *
1700 * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
1701 * Description
1702 * For socket policies, prevent the execution of the verdict eBPF
1703 * program for message *msg* until *bytes* (byte number) have been
1704 * accumulated.
1705 *
1706 * This can be used when one needs a specific number of bytes
1707 * before a verdict can be assigned, even if the data spans
1708 * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
1709 * case would be a user calling **sendmsg**\ () repeatedly with
1710 * 1-byte long message segments. Obviously, this is bad for
1711 * performance, but it is still valid. If the eBPF program needs
1712 * *bytes* bytes to validate a header, this helper can be used to
1713 * prevent the eBPF program to be called again until *bytes* have
1714 * been accumulated.
1715 * Return
1716 * 0
1717 *
1718 * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
1719 * Description
1720 * For socket policies, pull in non-linear data from user space
1721 * for *msg* and set pointers *msg*\ **->data** and *msg*\
1722 * **->data_end** to *start* and *end* bytes offsets into *msg*,
1723 * respectively.
1724 *
1725 * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
1726 * *msg* it can only parse data that the (**data**, **data_end**)
1727 * pointers have already consumed. For **sendmsg**\ () hooks this
1728 * is likely the first scatterlist element. But for calls relying
1729 * on the **sendpage** handler (e.g. **sendfile**\ ()) this will
1730 * be the range (**0**, **0**) because the data is shared with
1731 * user space and by default the objective is to avoid allowing
1732 * user space to modify data while (or after) eBPF verdict is
1733 * being decided. This helper can be used to pull in data and to
1734 * set the start and end pointer to given values. Data will be
1735 * copied if necessary (i.e. if data was not linear and if start
1736 * and end pointers do not point to the same chunk).
1737 *
1738 * A call to this helper is susceptible to change the underlaying
1739 * packet buffer. Therefore, at load time, all checks on pointers
1740 * previously done by the verifier are invalidated and must be
1741 * performed again, if the helper is used in combination with
1742 * direct packet access.
1743 *
1744 * All values for *flags* are reserved for future usage, and must
1745 * be left at zero.
1746 * Return
1747 * 0 on success, or a negative error in case of failure.
1748 *
1749 * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
1750 * Description
1751 * Bind the socket associated to *ctx* to the address pointed by
1752 * *addr*, of length *addr_len*. This allows for making outgoing
1753 * connection from the desired IP address, which can be useful for
1754 * example when all processes inside a cgroup should use one
1755 * single IP address on a host that has multiple IP configured.
1756 *
1757 * This helper works for IPv4 and IPv6, TCP and UDP sockets. The
1758 * domain (*addr*\ **->sa_family**) must be **AF_INET** (or
1759 * **AF_INET6**). Looking for a free port to bind to can be
1760 * expensive, therefore binding to port is not permitted by the
1761 * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
1762 * must be set to zero.
1763 * Return
1764 * 0 on success, or a negative error in case of failure.
1765 *
1766 * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
1767 * Description
1768 * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
1769 * only possible to shrink the packet as of this writing,
1770 * therefore *delta* must be a negative integer.
1771 *
1772 * A call to this helper is susceptible to change the underlaying
1773 * packet buffer. Therefore, at load time, all checks on pointers
1774 * previously done by the verifier are invalidated and must be
1775 * performed again, if the helper is used in combination with
1776 * direct packet access.
1777 * Return
1778 * 0 on success, or a negative error in case of failure.
1779 *
1780 * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
1781 * Description
1782 * Retrieve the XFRM state (IP transform framework, see also
1783 * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
1784 *
1785 * The retrieved value is stored in the **struct bpf_xfrm_state**
1786 * pointed by *xfrm_state* and of length *size*.
1787 *
1788 * All values for *flags* are reserved for future usage, and must
1789 * be left at zero.
1790 *
1791 * This helper is available only if the kernel was compiled with
1792 * **CONFIG_XFRM** configuration option.
1793 * Return
1794 * 0 on success, or a negative error in case of failure.
1795 *
1796 * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
1797 * Description
1798 * Return a user or a kernel stack in bpf program provided buffer.
1799 * To achieve this, the helper needs *ctx*, which is a pointer
1800 * to the context on which the tracing program is executed.
1801 * To store the stacktrace, the bpf program provides *buf* with
1802 * a nonnegative *size*.
1803 *
1804 * The last argument, *flags*, holds the number of stack frames to
1805 * skip (from 0 to 255), masked with
1806 * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
1807 * the following flags:
1808 *
1809 * **BPF_F_USER_STACK**
1810 * Collect a user space stack instead of a kernel stack.
1811 * **BPF_F_USER_BUILD_ID**
1812 * Collect buildid+offset instead of ips for user stack,
1813 * only valid if **BPF_F_USER_STACK** is also specified.
1814 *
1815 * **bpf_get_stack**\ () can collect up to
1816 * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
1817 * to sufficient large buffer size. Note that
1818 * this limit can be controlled with the **sysctl** program, and
1819 * that it should be manually increased in order to profile long
1820 * user stacks (such as stacks for Java programs). To do so, use:
1821 *
1822 * ::
1823 *
1824 * # sysctl kernel.perf_event_max_stack=<new value>
1825 * Return
1826 * A non-negative value equal to or less than *size* on success,
1827 * or a negative error in case of failure.
1828 *
1829 * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
1830 * Description
1831 * This helper is similar to **bpf_skb_load_bytes**\ () in that
1832 * it provides an easy way to load *len* bytes from *offset*
1833 * from the packet associated to *skb*, into the buffer pointed
1834 * by *to*. The difference to **bpf_skb_load_bytes**\ () is that
1835 * a fifth argument *start_header* exists in order to select a
1836 * base offset to start from. *start_header* can be one of:
1837 *
1838 * **BPF_HDR_START_MAC**
1839 * Base offset to load data from is *skb*'s mac header.
1840 * **BPF_HDR_START_NET**
1841 * Base offset to load data from is *skb*'s network header.
1842 *
1843 * In general, "direct packet access" is the preferred method to
1844 * access packet data, however, this helper is in particular useful
1845 * in socket filters where *skb*\ **->data** does not always point
1846 * to the start of the mac header and where "direct packet access"
1847 * is not available.
1848 * Return
1849 * 0 on success, or a negative error in case of failure.
1850 *
1851 * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
1852 * Description
1853 * Do FIB lookup in kernel tables using parameters in *params*.
1854 * If lookup is successful and result shows packet is to be
1855 * forwarded, the neighbor tables are searched for the nexthop.
1856 * If successful (ie., FIB lookup shows forwarding and nexthop
1857 * is resolved), the nexthop address is returned in ipv4_dst
1858 * or ipv6_dst based on family, smac is set to mac address of
1859 * egress device, dmac is set to nexthop mac address, rt_metric
1860 * is set to metric from route (IPv4/IPv6 only).
1861 *
1862 * *plen* argument is the size of the passed in struct.
1863 * *flags* argument can be a combination of one or more of the
1864 * following values:
1865 *
1866 * **BPF_FIB_LOOKUP_DIRECT**
1867 * Do a direct table lookup vs full lookup using FIB
1868 * rules.
1869 * **BPF_FIB_LOOKUP_OUTPUT**
1870 * Perform lookup from an egress perspective (default is
1871 * ingress).
1872 *
1873 * *ctx* is either **struct xdp_md** for XDP programs or
1874 * **struct sk_buff** tc cls_act programs.
1875 * Return
1876 * Egress device index on success, 0 if packet needs to continue
1877 * up the stack for further processing or a negative error in case
1878 * of failure.
1879 *
1880 * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
1881 * Description
1882 * Add an entry to, or update a sockhash *map* referencing sockets.
1883 * The *skops* is used as a new value for the entry associated to
1884 * *key*. *flags* is one of:
1885 *
1886 * **BPF_NOEXIST**
1887 * The entry for *key* must not exist in the map.
1888 * **BPF_EXIST**
1889 * The entry for *key* must already exist in the map.
1890 * **BPF_ANY**
1891 * No condition on the existence of the entry for *key*.
1892 *
1893 * If the *map* has eBPF programs (parser and verdict), those will
1894 * be inherited by the socket being added. If the socket is
1895 * already attached to eBPF programs, this results in an error.
1896 * Return
1897 * 0 on success, or a negative error in case of failure.
1898 *
1899 * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
1900 * Description
1901 * This helper is used in programs implementing policies at the
1902 * socket level. If the message *msg* is allowed to pass (i.e. if
1903 * the verdict eBPF program returns **SK_PASS**), redirect it to
1904 * the socket referenced by *map* (of type
1905 * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
1906 * egress interfaces can be used for redirection. The
1907 * **BPF_F_INGRESS** value in *flags* is used to make the
1908 * distinction (ingress path is selected if the flag is present,
1909 * egress path otherwise). This is the only flag supported for now.
1910 * Return
1911 * **SK_PASS** on success, or **SK_DROP** on error.
1912 *
1913 * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
1914 * Description
1915 * This helper is used in programs implementing policies at the
1916 * skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
1917 * if the verdeict eBPF program returns **SK_PASS**), redirect it
1918 * to the socket referenced by *map* (of type
1919 * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
1920 * egress interfaces can be used for redirection. The
1921 * **BPF_F_INGRESS** value in *flags* is used to make the
1922 * distinction (ingress path is selected if the flag is present,
1923 * egress otherwise). This is the only flag supported for now.
1924 * Return
1925 * **SK_PASS** on success, or **SK_DROP** on error.
1926 *
1927 * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
1928 * Description
1929 * Encapsulate the packet associated to *skb* within a Layer 3
1930 * protocol header. This header is provided in the buffer at
1931 * address *hdr*, with *len* its size in bytes. *type* indicates
1932 * the protocol of the header and can be one of:
1933 *
1934 * **BPF_LWT_ENCAP_SEG6**
1935 * IPv6 encapsulation with Segment Routing Header
1936 * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
1937 * the IPv6 header is computed by the kernel.
1938 * **BPF_LWT_ENCAP_SEG6_INLINE**
1939 * Only works if *skb* contains an IPv6 packet. Insert a
1940 * Segment Routing Header (**struct ipv6_sr_hdr**) inside
1941 * the IPv6 header.
1942 *
1943 * A call to this helper is susceptible to change the underlaying
1944 * packet buffer. Therefore, at load time, all checks on pointers
1945 * previously done by the verifier are invalidated and must be
1946 * performed again, if the helper is used in combination with
1947 * direct packet access.
1948 * Return
1949 * 0 on success, or a negative error in case of failure.
1950 *
1951 * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
1952 * Description
1953 * Store *len* bytes from address *from* into the packet
1954 * associated to *skb*, at *offset*. Only the flags, tag and TLVs
1955 * inside the outermost IPv6 Segment Routing Header can be
1956 * modified through this helper.
1957 *
1958 * A call to this helper is susceptible to change the underlaying
1959 * packet buffer. Therefore, at load time, all checks on pointers
1960 * previously done by the verifier are invalidated and must be
1961 * performed again, if the helper is used in combination with
1962 * direct packet access.
1963 * Return
1964 * 0 on success, or a negative error in case of failure.
1965 *
1966 * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
1967 * Description
1968 * Adjust the size allocated to TLVs in the outermost IPv6
1969 * Segment Routing Header contained in the packet associated to
1970 * *skb*, at position *offset* by *delta* bytes. Only offsets
1971 * after the segments are accepted. *delta* can be as well
1972 * positive (growing) as negative (shrinking).
1973 *
1974 * A call to this helper is susceptible to change the underlaying
1975 * packet buffer. Therefore, at load time, all checks on pointers
1976 * previously done by the verifier are invalidated and must be
1977 * performed again, if the helper is used in combination with
1978 * direct packet access.
1979 * Return
1980 * 0 on success, or a negative error in case of failure.
1981 *
1982 * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
1983 * Description
1984 * Apply an IPv6 Segment Routing action of type *action* to the
1985 * packet associated to *skb*. Each action takes a parameter
1986 * contained at address *param*, and of length *param_len* bytes.
1987 * *action* can be one of:
1988 *
1989 * **SEG6_LOCAL_ACTION_END_X**
1990 * End.X action: Endpoint with Layer-3 cross-connect.
1991 * Type of *param*: **struct in6_addr**.
1992 * **SEG6_LOCAL_ACTION_END_T**
1993 * End.T action: Endpoint with specific IPv6 table lookup.
1994 * Type of *param*: **int**.
1995 * **SEG6_LOCAL_ACTION_END_B6**
1996 * End.B6 action: Endpoint bound to an SRv6 policy.
1997 * Type of param: **struct ipv6_sr_hdr**.
1998 * **SEG6_LOCAL_ACTION_END_B6_ENCAP**
1999 * End.B6.Encap action: Endpoint bound to an SRv6
2000 * encapsulation policy.
2001 * Type of param: **struct ipv6_sr_hdr**.
2002 *
2003 * A call to this helper is susceptible to change the underlaying
2004 * packet buffer. Therefore, at load time, all checks on pointers
2005 * previously done by the verifier are invalidated and must be
2006 * performed again, if the helper is used in combination with
2007 * direct packet access.
2008 * Return
2009 * 0 on success, or a negative error in case of failure.
2010 *
2011 * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
2012 * Description
2013 * This helper is used in programs implementing IR decoding, to
2014 * report a successfully decoded key press with *scancode*,
2015 * *toggle* value in the given *protocol*. The scancode will be
2016 * translated to a keycode using the rc keymap, and reported as
2017 * an input key down event. After a period a key up event is
2018 * generated. This period can be extended by calling either
2019 * **bpf_rc_keydown** () again with the same values, or calling
2020 * **bpf_rc_repeat** ().
2021 *
2022 * Some protocols include a toggle bit, in case the button was
2023 * released and pressed again between consecutive scancodes.
2024 *
2025 * The *ctx* should point to the lirc sample as passed into
2026 * the program.
2027 *
2028 * The *protocol* is the decoded protocol number (see
2029 * **enum rc_proto** for some predefined values).
2030 *
2031 * This helper is only available is the kernel was compiled with
2032 * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
2033 * "**y**".
2034 *
2035 * Return
2036 * 0
2037 *
2038 * int bpf_rc_repeat(void *ctx)
2039 * Description
2040 * This helper is used in programs implementing IR decoding, to
2041 * report a successfully decoded repeat key message. This delays
2042 * the generation of a key up event for previously generated
2043 * key down event.
2044 *
2045 * Some IR protocols like NEC have a special IR message for
2046 * repeating last button, for when a button is held down.
2047 *
2048 * The *ctx* should point to the lirc sample as passed into
2049 * the program.
2050 *
2051 * This helper is only available is the kernel was compiled with
2052 * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
2053 * "**y**".
2054 *
2055 * Return
2056 * 0
2057 *
2058 * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
2059 * Description
2060 * Return the cgroup v2 id of the socket associated with the *skb*.
2061 * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
2062 * helper for cgroup v1 by providing a tag resp. identifier that
2063 * can be matched on or used for map lookups e.g. to implement
2064 * policy. The cgroup v2 id of a given path in the hierarchy is
2065 * exposed in user space through the f_handle API in order to get
2066 * to the same 64-bit id.
2067 *
2068 * This helper can be used on TC egress path, but not on ingress,
2069 * and is available only if the kernel was compiled with the
2070 * **CONFIG_SOCK_CGROUP_DATA** configuration option.
2071 * Return
2072 * The id is returned or 0 in case the id could not be retrieved.
2073 *
2074 * u64 bpf_get_current_cgroup_id(void)
2075 * Return
2076 * A 64-bit integer containing the current cgroup id based
2077 * on the cgroup within which the current task is running.
758 */ 2078 */
759#define __BPF_FUNC_MAPPER(FN) \ 2079#define __BPF_FUNC_MAPPER(FN) \
760 FN(unspec), \ 2080 FN(unspec), \
@@ -821,7 +2141,23 @@ union bpf_attr {
821 FN(msg_apply_bytes), \ 2141 FN(msg_apply_bytes), \
822 FN(msg_cork_bytes), \ 2142 FN(msg_cork_bytes), \
823 FN(msg_pull_data), \ 2143 FN(msg_pull_data), \
824 FN(bind), 2144 FN(bind), \
2145 FN(xdp_adjust_tail), \
2146 FN(skb_get_xfrm_state), \
2147 FN(get_stack), \
2148 FN(skb_load_bytes_relative), \
2149 FN(fib_lookup), \
2150 FN(sock_hash_update), \
2151 FN(msg_redirect_hash), \
2152 FN(sk_redirect_hash), \
2153 FN(lwt_push_encap), \
2154 FN(lwt_seg6_store_bytes), \
2155 FN(lwt_seg6_adjust_srh), \
2156 FN(lwt_seg6_action), \
2157 FN(rc_repeat), \
2158 FN(rc_keydown), \
2159 FN(skb_cgroup_id), \
2160 FN(get_current_cgroup_id),
825 2161
826/* integer value in 'imm' field of BPF_CALL instruction selects which helper 2162/* integer value in 'imm' field of BPF_CALL instruction selects which helper
827 * function eBPF program intends to call 2163 * function eBPF program intends to call
@@ -855,11 +2191,14 @@ enum bpf_func_id {
855/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ 2191/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
856#define BPF_F_TUNINFO_IPV6 (1ULL << 0) 2192#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
857 2193
858/* BPF_FUNC_get_stackid flags. */ 2194/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
859#define BPF_F_SKIP_FIELD_MASK 0xffULL 2195#define BPF_F_SKIP_FIELD_MASK 0xffULL
860#define BPF_F_USER_STACK (1ULL << 8) 2196#define BPF_F_USER_STACK (1ULL << 8)
2197/* flags used by BPF_FUNC_get_stackid only. */
861#define BPF_F_FAST_STACK_CMP (1ULL << 9) 2198#define BPF_F_FAST_STACK_CMP (1ULL << 9)
862#define BPF_F_REUSE_STACKID (1ULL << 10) 2199#define BPF_F_REUSE_STACKID (1ULL << 10)
2200/* flags used by BPF_FUNC_get_stack only. */
2201#define BPF_F_USER_BUILD_ID (1ULL << 11)
863 2202
864/* BPF_FUNC_skb_set_tunnel_key flags. */ 2203/* BPF_FUNC_skb_set_tunnel_key flags. */
865#define BPF_F_ZERO_CSUM_TX (1ULL << 1) 2204#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
@@ -879,6 +2218,18 @@ enum bpf_adj_room_mode {
879 BPF_ADJ_ROOM_NET, 2218 BPF_ADJ_ROOM_NET,
880}; 2219};
881 2220
2221/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
2222enum bpf_hdr_start_off {
2223 BPF_HDR_START_MAC,
2224 BPF_HDR_START_NET,
2225};
2226
2227/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
2228enum bpf_lwt_encap_mode {
2229 BPF_LWT_ENCAP_SEG6,
2230 BPF_LWT_ENCAP_SEG6_INLINE
2231};
2232
882/* user accessible mirror of in-kernel sk_buff. 2233/* user accessible mirror of in-kernel sk_buff.
883 * new fields can only be added to the end of this structure 2234 * new fields can only be added to the end of this structure
884 */ 2235 */
@@ -923,10 +2274,24 @@ struct bpf_tunnel_key {
923 }; 2274 };
924 __u8 tunnel_tos; 2275 __u8 tunnel_tos;
925 __u8 tunnel_ttl; 2276 __u8 tunnel_ttl;
926 __u16 tunnel_ext; 2277 __u16 tunnel_ext; /* Padding, future use. */
927 __u32 tunnel_label; 2278 __u32 tunnel_label;
928}; 2279};
929 2280
2281/* user accessible mirror of in-kernel xfrm_state.
2282 * new fields can only be added to the end of this structure
2283 */
2284struct bpf_xfrm_state {
2285 __u32 reqid;
2286 __u32 spi; /* Stored in network byte order */
2287 __u16 family;
2288 __u16 ext; /* Padding, future use. */
2289 union {
2290 __u32 remote_ipv4; /* Stored in network byte order */
2291 __u32 remote_ipv6[4]; /* Stored in network byte order */
2292 };
2293};
2294
930/* Generic BPF return codes which all BPF program types may support. 2295/* Generic BPF return codes which all BPF program types may support.
931 * The values are binary compatible with their TC_ACT_* counter-part to 2296 * The values are binary compatible with their TC_ACT_* counter-part to
932 * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT 2297 * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
@@ -999,6 +2364,14 @@ enum sk_action {
999struct sk_msg_md { 2364struct sk_msg_md {
1000 void *data; 2365 void *data;
1001 void *data_end; 2366 void *data_end;
2367
2368 __u32 family;
2369 __u32 remote_ip4; /* Stored in network byte order */
2370 __u32 local_ip4; /* Stored in network byte order */
2371 __u32 remote_ip6[4]; /* Stored in network byte order */
2372 __u32 local_ip6[4]; /* Stored in network byte order */
2373 __u32 remote_port; /* Stored in network byte order */
2374 __u32 local_port; /* stored in host byte order */
1002}; 2375};
1003 2376
1004#define BPF_TAG_SIZE 8 2377#define BPF_TAG_SIZE 8
@@ -1017,8 +2390,13 @@ struct bpf_prog_info {
1017 __aligned_u64 map_ids; 2390 __aligned_u64 map_ids;
1018 char name[BPF_OBJ_NAME_LEN]; 2391 char name[BPF_OBJ_NAME_LEN];
1019 __u32 ifindex; 2392 __u32 ifindex;
2393 __u32 gpl_compatible:1;
1020 __u64 netns_dev; 2394 __u64 netns_dev;
1021 __u64 netns_ino; 2395 __u64 netns_ino;
2396 __u32 nr_jited_ksyms;
2397 __u32 nr_jited_func_lens;
2398 __aligned_u64 jited_ksyms;
2399 __aligned_u64 jited_func_lens;
1022} __attribute__((aligned(8))); 2400} __attribute__((aligned(8)));
1023 2401
1024struct bpf_map_info { 2402struct bpf_map_info {
@@ -1030,8 +2408,18 @@ struct bpf_map_info {
1030 __u32 map_flags; 2408 __u32 map_flags;
1031 char name[BPF_OBJ_NAME_LEN]; 2409 char name[BPF_OBJ_NAME_LEN];
1032 __u32 ifindex; 2410 __u32 ifindex;
2411 __u32 :32;
1033 __u64 netns_dev; 2412 __u64 netns_dev;
1034 __u64 netns_ino; 2413 __u64 netns_ino;
2414 __u32 btf_id;
2415 __u32 btf_key_type_id;
2416 __u32 btf_value_type_id;
2417} __attribute__((aligned(8)));
2418
2419struct bpf_btf_info {
2420 __aligned_u64 btf;
2421 __u32 btf_size;
2422 __u32 id;
1035} __attribute__((aligned(8))); 2423} __attribute__((aligned(8)));
1036 2424
1037/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed 2425/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
@@ -1052,6 +2440,12 @@ struct bpf_sock_addr {
1052 __u32 family; /* Allows 4-byte read, but no write */ 2440 __u32 family; /* Allows 4-byte read, but no write */
1053 __u32 type; /* Allows 4-byte read, but no write */ 2441 __u32 type; /* Allows 4-byte read, but no write */
1054 __u32 protocol; /* Allows 4-byte read, but no write */ 2442 __u32 protocol; /* Allows 4-byte read, but no write */
2443 __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write.
2444 * Stored in network byte order.
2445 */
2446 __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
2447 * Stored in network byte order.
2448 */
1055}; 2449};
1056 2450
1057/* User bpf_sock_ops struct to access socket values and specify request ops 2451/* User bpf_sock_ops struct to access socket values and specify request ops
@@ -1212,4 +2606,64 @@ struct bpf_raw_tracepoint_args {
1212 __u64 args[0]; 2606 __u64 args[0];
1213}; 2607};
1214 2608
2609/* DIRECT: Skip the FIB rules and go to FIB table associated with device
2610 * OUTPUT: Do lookup from egress perspective; default is ingress
2611 */
2612#define BPF_FIB_LOOKUP_DIRECT BIT(0)
2613#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
2614
2615struct bpf_fib_lookup {
2616 /* input: network family for lookup (AF_INET, AF_INET6)
2617 * output: network family of egress nexthop
2618 */
2619 __u8 family;
2620
2621 /* set if lookup is to consider L4 data - e.g., FIB rules */
2622 __u8 l4_protocol;
2623 __be16 sport;
2624 __be16 dport;
2625
2626 /* total length of packet from network header - used for MTU check */
2627 __u16 tot_len;
2628 __u32 ifindex; /* L3 device index for lookup */
2629
2630 union {
2631 /* inputs to lookup */
2632 __u8 tos; /* AF_INET */
2633 __be32 flowlabel; /* AF_INET6 */
2634
2635 /* output: metric of fib result (IPv4/IPv6 only) */
2636 __u32 rt_metric;
2637 };
2638
2639 union {
2640 __be32 ipv4_src;
2641 __u32 ipv6_src[4]; /* in6_addr; network order */
2642 };
2643
2644 /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
2645 * network header. output: bpf_fib_lookup sets to gateway address
2646 * if FIB lookup returns gateway route
2647 */
2648 union {
2649 __be32 ipv4_dst;
2650 __u32 ipv6_dst[4]; /* in6_addr; network order */
2651 };
2652
2653 /* output */
2654 __be16 h_vlan_proto;
2655 __be16 h_vlan_TCI;
2656 __u8 smac[6]; /* ETH_ALEN */
2657 __u8 dmac[6]; /* ETH_ALEN */
2658};
2659
2660enum bpf_task_fd_type {
2661 BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
2662 BPF_FD_TYPE_TRACEPOINT, /* tp name */
2663 BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
2664 BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
2665 BPF_FD_TYPE_UPROBE, /* filename + offset */
2666 BPF_FD_TYPE_URETPROBE, /* filename + offset */
2667};
2668
1215#endif /* _UAPI__LINUX_BPF_H__ */ 2669#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
new file mode 100644
index 000000000000..0b5ddbe135a4
--- /dev/null
+++ b/tools/include/uapi/linux/btf.h
@@ -0,0 +1,113 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2/* Copyright (c) 2018 Facebook */
3#ifndef _UAPI__LINUX_BTF_H__
4#define _UAPI__LINUX_BTF_H__
5
6#include <linux/types.h>
7
8#define BTF_MAGIC 0xeB9F
9#define BTF_VERSION 1
10
11struct btf_header {
12 __u16 magic;
13 __u8 version;
14 __u8 flags;
15 __u32 hdr_len;
16
17 /* All offsets are in bytes relative to the end of this header */
18 __u32 type_off; /* offset of type section */
19 __u32 type_len; /* length of type section */
20 __u32 str_off; /* offset of string section */
21 __u32 str_len; /* length of string section */
22};
23
24/* Max # of type identifier */
25#define BTF_MAX_TYPE 0x0000ffff
26/* Max offset into the string section */
27#define BTF_MAX_NAME_OFFSET 0x0000ffff
28/* Max # of struct/union/enum members or func args */
29#define BTF_MAX_VLEN 0xffff
30
31struct btf_type {
32 __u32 name_off;
33 /* "info" bits arrangement
34 * bits 0-15: vlen (e.g. # of struct's members)
35 * bits 16-23: unused
36 * bits 24-27: kind (e.g. int, ptr, array...etc)
37 * bits 28-31: unused
38 */
39 __u32 info;
40 /* "size" is used by INT, ENUM, STRUCT and UNION.
41 * "size" tells the size of the type it is describing.
42 *
43 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
44 * "type" is a type_id referring to another type.
45 */
46 union {
47 __u32 size;
48 __u32 type;
49 };
50};
51
52#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
53#define BTF_INFO_VLEN(info) ((info) & 0xffff)
54
55#define BTF_KIND_UNKN 0 /* Unknown */
56#define BTF_KIND_INT 1 /* Integer */
57#define BTF_KIND_PTR 2 /* Pointer */
58#define BTF_KIND_ARRAY 3 /* Array */
59#define BTF_KIND_STRUCT 4 /* Struct */
60#define BTF_KIND_UNION 5 /* Union */
61#define BTF_KIND_ENUM 6 /* Enumeration */
62#define BTF_KIND_FWD 7 /* Forward */
63#define BTF_KIND_TYPEDEF 8 /* Typedef */
64#define BTF_KIND_VOLATILE 9 /* Volatile */
65#define BTF_KIND_CONST 10 /* Const */
66#define BTF_KIND_RESTRICT 11 /* Restrict */
67#define BTF_KIND_MAX 11
68#define NR_BTF_KINDS 12
69
70/* For some specific BTF_KIND, "struct btf_type" is immediately
71 * followed by extra data.
72 */
73
74/* BTF_KIND_INT is followed by a u32 and the following
75 * is the 32 bits arrangement:
76 */
77#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
78#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
79#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
80
81/* Attributes stored in the BTF_INT_ENCODING */
82#define BTF_INT_SIGNED (1 << 0)
83#define BTF_INT_CHAR (1 << 1)
84#define BTF_INT_BOOL (1 << 2)
85
86/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
87 * The exact number of btf_enum is stored in the vlen (of the
88 * info in "struct btf_type").
89 */
90struct btf_enum {
91 __u32 name_off;
92 __s32 val;
93};
94
95/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
96struct btf_array {
97 __u32 type;
98 __u32 index_type;
99 __u32 nelems;
100};
101
102/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
103 * by multiple "struct btf_member". The exact number
104 * of btf_member is stored in the vlen (of the info in
105 * "struct btf_type").
106 */
107struct btf_member {
108 __u32 name_off;
109 __u32 type;
110 __u32 offset; /* offset in bits */
111};
112
113#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/tools/include/uapi/linux/erspan.h b/tools/include/uapi/linux/erspan.h
new file mode 100644
index 000000000000..841573019ae1
--- /dev/null
+++ b/tools/include/uapi/linux/erspan.h
@@ -0,0 +1,52 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2/*
3 * ERSPAN Tunnel Metadata
4 *
5 * Copyright (c) 2018 VMware
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2
9 * as published by the Free Software Foundation.
10 *
11 * Userspace API for metadata mode ERSPAN tunnel
12 */
13#ifndef _UAPI_ERSPAN_H
14#define _UAPI_ERSPAN_H
15
16#include <linux/types.h> /* For __beXX in userspace */
17#include <asm/byteorder.h>
18
19/* ERSPAN version 2 metadata header */
20struct erspan_md2 {
21 __be32 timestamp;
22 __be16 sgt; /* security group tag */
23#if defined(__LITTLE_ENDIAN_BITFIELD)
24 __u8 hwid_upper:2,
25 ft:5,
26 p:1;
27 __u8 o:1,
28 gra:2,
29 dir:1,
30 hwid:4;
31#elif defined(__BIG_ENDIAN_BITFIELD)
32 __u8 p:1,
33 ft:5,
34 hwid_upper:2;
35 __u8 hwid:4,
36 dir:1,
37 gra:2,
38 o:1;
39#else
40#error "Please fix <asm/byteorder.h>"
41#endif
42};
43
44struct erspan_metadata {
45 int version;
46 union {
47 __be32 index; /* Version 1 (type II)*/
48 struct erspan_md2 md2; /* Version 2 (type III) */
49 } u;
50};
51
52#endif /* _UAPI_ERSPAN_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 1065006c9bf5..39e364c70caf 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -676,6 +676,13 @@ struct kvm_ioeventfd {
676 __u8 pad[36]; 676 __u8 pad[36];
677}; 677};
678 678
679#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
680#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
681#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
682#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
683 KVM_X86_DISABLE_EXITS_HLT | \
684 KVM_X86_DISABLE_EXITS_PAUSE)
685
679/* for KVM_ENABLE_CAP */ 686/* for KVM_ENABLE_CAP */
680struct kvm_enable_cap { 687struct kvm_enable_cap {
681 /* in */ 688 /* in */
diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h
new file mode 100644
index 000000000000..f189931042a7
--- /dev/null
+++ b/tools/include/uapi/linux/lirc.h
@@ -0,0 +1,217 @@
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2/*
3 * lirc.h - linux infrared remote control header file
4 * last modified 2010/07/13 by Jarod Wilson
5 */
6
7#ifndef _LINUX_LIRC_H
8#define _LINUX_LIRC_H
9
10#include <linux/types.h>
11#include <linux/ioctl.h>
12
13#define PULSE_BIT 0x01000000
14#define PULSE_MASK 0x00FFFFFF
15
16#define LIRC_MODE2_SPACE 0x00000000
17#define LIRC_MODE2_PULSE 0x01000000
18#define LIRC_MODE2_FREQUENCY 0x02000000
19#define LIRC_MODE2_TIMEOUT 0x03000000
20
21#define LIRC_VALUE_MASK 0x00FFFFFF
22#define LIRC_MODE2_MASK 0xFF000000
23
24#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE)
25#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE)
26#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY)
27#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT)
28
29#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK)
30#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK)
31
32#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE)
33#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE)
34#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY)
35#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT)
36
37/* used heavily by lirc userspace */
38#define lirc_t int
39
40/*** lirc compatible hardware features ***/
41
42#define LIRC_MODE2SEND(x) (x)
43#define LIRC_SEND2MODE(x) (x)
44#define LIRC_MODE2REC(x) ((x) << 16)
45#define LIRC_REC2MODE(x) ((x) >> 16)
46
47#define LIRC_MODE_RAW 0x00000001
48#define LIRC_MODE_PULSE 0x00000002
49#define LIRC_MODE_MODE2 0x00000004
50#define LIRC_MODE_SCANCODE 0x00000008
51#define LIRC_MODE_LIRCCODE 0x00000010
52
53
54#define LIRC_CAN_SEND_RAW LIRC_MODE2SEND(LIRC_MODE_RAW)
55#define LIRC_CAN_SEND_PULSE LIRC_MODE2SEND(LIRC_MODE_PULSE)
56#define LIRC_CAN_SEND_MODE2 LIRC_MODE2SEND(LIRC_MODE_MODE2)
57#define LIRC_CAN_SEND_LIRCCODE LIRC_MODE2SEND(LIRC_MODE_LIRCCODE)
58
59#define LIRC_CAN_SEND_MASK 0x0000003f
60
61#define LIRC_CAN_SET_SEND_CARRIER 0x00000100
62#define LIRC_CAN_SET_SEND_DUTY_CYCLE 0x00000200
63#define LIRC_CAN_SET_TRANSMITTER_MASK 0x00000400
64
65#define LIRC_CAN_REC_RAW LIRC_MODE2REC(LIRC_MODE_RAW)
66#define LIRC_CAN_REC_PULSE LIRC_MODE2REC(LIRC_MODE_PULSE)
67#define LIRC_CAN_REC_MODE2 LIRC_MODE2REC(LIRC_MODE_MODE2)
68#define LIRC_CAN_REC_SCANCODE LIRC_MODE2REC(LIRC_MODE_SCANCODE)
69#define LIRC_CAN_REC_LIRCCODE LIRC_MODE2REC(LIRC_MODE_LIRCCODE)
70
71#define LIRC_CAN_REC_MASK LIRC_MODE2REC(LIRC_CAN_SEND_MASK)
72
73#define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16)
74#define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16)
75
76#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000
77#define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000
78#define LIRC_CAN_GET_REC_RESOLUTION 0x20000000
79#define LIRC_CAN_SET_REC_TIMEOUT 0x10000000
80#define LIRC_CAN_SET_REC_FILTER 0x08000000
81
82#define LIRC_CAN_MEASURE_CARRIER 0x02000000
83#define LIRC_CAN_USE_WIDEBAND_RECEIVER 0x04000000
84
85#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK)
86#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK)
87
88#define LIRC_CAN_NOTIFY_DECODE 0x01000000
89
90/*** IOCTL commands for lirc driver ***/
91
92#define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32)
93
94#define LIRC_GET_SEND_MODE _IOR('i', 0x00000001, __u32)
95#define LIRC_GET_REC_MODE _IOR('i', 0x00000002, __u32)
96#define LIRC_GET_REC_RESOLUTION _IOR('i', 0x00000007, __u32)
97
98#define LIRC_GET_MIN_TIMEOUT _IOR('i', 0x00000008, __u32)
99#define LIRC_GET_MAX_TIMEOUT _IOR('i', 0x00000009, __u32)
100
101/* code length in bits, currently only for LIRC_MODE_LIRCCODE */
102#define LIRC_GET_LENGTH _IOR('i', 0x0000000f, __u32)
103
104#define LIRC_SET_SEND_MODE _IOW('i', 0x00000011, __u32)
105#define LIRC_SET_REC_MODE _IOW('i', 0x00000012, __u32)
106/* Note: these can reset the according pulse_width */
107#define LIRC_SET_SEND_CARRIER _IOW('i', 0x00000013, __u32)
108#define LIRC_SET_REC_CARRIER _IOW('i', 0x00000014, __u32)
109#define LIRC_SET_SEND_DUTY_CYCLE _IOW('i', 0x00000015, __u32)
110#define LIRC_SET_TRANSMITTER_MASK _IOW('i', 0x00000017, __u32)
111
112/*
113 * when a timeout != 0 is set the driver will send a
114 * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is
115 * never sent, timeout is disabled by default
116 */
117#define LIRC_SET_REC_TIMEOUT _IOW('i', 0x00000018, __u32)
118
119/* 1 enables, 0 disables timeout reports in MODE2 */
120#define LIRC_SET_REC_TIMEOUT_REPORTS _IOW('i', 0x00000019, __u32)
121
122/*
123 * if enabled from the next key press on the driver will send
124 * LIRC_MODE2_FREQUENCY packets
125 */
126#define LIRC_SET_MEASURE_CARRIER_MODE _IOW('i', 0x0000001d, __u32)
127
128/*
129 * to set a range use LIRC_SET_REC_CARRIER_RANGE with the
130 * lower bound first and later LIRC_SET_REC_CARRIER with the upper bound
131 */
132#define LIRC_SET_REC_CARRIER_RANGE _IOW('i', 0x0000001f, __u32)
133
134#define LIRC_SET_WIDEBAND_RECEIVER _IOW('i', 0x00000023, __u32)
135
136/*
137 * struct lirc_scancode - decoded scancode with protocol for use with
138 * LIRC_MODE_SCANCODE
139 *
140 * @timestamp: Timestamp in nanoseconds using CLOCK_MONOTONIC when IR
141 * was decoded.
142 * @flags: should be 0 for transmit. When receiving scancodes,
143 * LIRC_SCANCODE_FLAG_TOGGLE or LIRC_SCANCODE_FLAG_REPEAT can be set
144 * depending on the protocol
145 * @rc_proto: see enum rc_proto
146 * @keycode: the translated keycode. Set to 0 for transmit.
147 * @scancode: the scancode received or to be sent
148 */
149struct lirc_scancode {
150 __u64 timestamp;
151 __u16 flags;
152 __u16 rc_proto;
153 __u32 keycode;
154 __u64 scancode;
155};
156
157/* Set if the toggle bit of rc-5 or rc-6 is enabled */
158#define LIRC_SCANCODE_FLAG_TOGGLE 1
159/* Set if this is a nec or sanyo repeat */
160#define LIRC_SCANCODE_FLAG_REPEAT 2
161
162/**
163 * enum rc_proto - the Remote Controller protocol
164 *
165 * @RC_PROTO_UNKNOWN: Protocol not known
166 * @RC_PROTO_OTHER: Protocol known but proprietary
167 * @RC_PROTO_RC5: Philips RC5 protocol
168 * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol
169 * @RC_PROTO_RC5_SZ: StreamZap variant of RC5
170 * @RC_PROTO_JVC: JVC protocol
171 * @RC_PROTO_SONY12: Sony 12 bit protocol
172 * @RC_PROTO_SONY15: Sony 15 bit protocol
173 * @RC_PROTO_SONY20: Sony 20 bit protocol
174 * @RC_PROTO_NEC: NEC protocol
175 * @RC_PROTO_NECX: Extended NEC protocol
176 * @RC_PROTO_NEC32: NEC 32 bit protocol
177 * @RC_PROTO_SANYO: Sanyo protocol
178 * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard
179 * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse
180 * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol
181 * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol
182 * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol
183 * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol
184 * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol
185 * @RC_PROTO_SHARP: Sharp protocol
186 * @RC_PROTO_XMP: XMP protocol
187 * @RC_PROTO_CEC: CEC protocol
188 * @RC_PROTO_IMON: iMon Pad protocol
189 */
190enum rc_proto {
191 RC_PROTO_UNKNOWN = 0,
192 RC_PROTO_OTHER = 1,
193 RC_PROTO_RC5 = 2,
194 RC_PROTO_RC5X_20 = 3,
195 RC_PROTO_RC5_SZ = 4,
196 RC_PROTO_JVC = 5,
197 RC_PROTO_SONY12 = 6,
198 RC_PROTO_SONY15 = 7,
199 RC_PROTO_SONY20 = 8,
200 RC_PROTO_NEC = 9,
201 RC_PROTO_NECX = 10,
202 RC_PROTO_NEC32 = 11,
203 RC_PROTO_SANYO = 12,
204 RC_PROTO_MCIR2_KBD = 13,
205 RC_PROTO_MCIR2_MSE = 14,
206 RC_PROTO_RC6_0 = 15,
207 RC_PROTO_RC6_6A_20 = 16,
208 RC_PROTO_RC6_6A_24 = 17,
209 RC_PROTO_RC6_6A_32 = 18,
210 RC_PROTO_RC6_MCE = 19,
211 RC_PROTO_SHARP = 20,
212 RC_PROTO_XMP = 21,
213 RC_PROTO_CEC = 22,
214 RC_PROTO_IMON = 23,
215};
216
217#endif
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index af5f8c2df87a..c0d7ea0bf5b6 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -170,7 +170,7 @@ struct prctl_mm_map {
170 * asking selinux for a specific new context (e.g. with runcon) will result 170 * asking selinux for a specific new context (e.g. with runcon) will result
171 * in execve returning -EPERM. 171 * in execve returning -EPERM.
172 * 172 *
173 * See Documentation/prctl/no_new_privs.txt for more details. 173 * See Documentation/userspace-api/no_new_privs.rst for more details.
174 */ 174 */
175#define PR_SET_NO_NEW_PRIVS 38 175#define PR_SET_NO_NEW_PRIVS 38
176#define PR_GET_NO_NEW_PRIVS 39 176#define PR_GET_NO_NEW_PRIVS 39
@@ -207,4 +207,16 @@ struct prctl_mm_map {
207# define PR_SVE_VL_LEN_MASK 0xffff 207# define PR_SVE_VL_LEN_MASK 0xffff
208# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */ 208# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */
209 209
210/* Per task speculation control */
211#define PR_GET_SPECULATION_CTRL 52
212#define PR_SET_SPECULATION_CTRL 53
213/* Speculation control variants */
214# define PR_SPEC_STORE_BYPASS 0
215/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
216# define PR_SPEC_NOT_AFFECTED 0
217# define PR_SPEC_PRCTL (1UL << 0)
218# define PR_SPEC_ENABLE (1UL << 1)
219# define PR_SPEC_DISABLE (1UL << 2)
220# define PR_SPEC_FORCE_DISABLE (1UL << 3)
221
210#endif /* _LINUX_PRCTL_H */ 222#endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/linux/seg6.h b/tools/include/uapi/linux/seg6.h
new file mode 100644
index 000000000000..286e8d6a8e98
--- /dev/null
+++ b/tools/include/uapi/linux/seg6.h
@@ -0,0 +1,55 @@
1/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
2/*
3 * SR-IPv6 implementation
4 *
5 * Author:
6 * David Lebrun <david.lebrun@uclouvain.be>
7 *
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#ifndef _UAPI_LINUX_SEG6_H
16#define _UAPI_LINUX_SEG6_H
17
18#include <linux/types.h>
19#include <linux/in6.h> /* For struct in6_addr. */
20
21/*
22 * SRH
23 */
24struct ipv6_sr_hdr {
25 __u8 nexthdr;
26 __u8 hdrlen;
27 __u8 type;
28 __u8 segments_left;
29 __u8 first_segment; /* Represents the last_entry field of SRH */
30 __u8 flags;
31 __u16 tag;
32
33 struct in6_addr segments[0];
34};
35
36#define SR6_FLAG1_PROTECTED (1 << 6)
37#define SR6_FLAG1_OAM (1 << 5)
38#define SR6_FLAG1_ALERT (1 << 4)
39#define SR6_FLAG1_HMAC (1 << 3)
40
41#define SR6_TLV_INGRESS 1
42#define SR6_TLV_EGRESS 2
43#define SR6_TLV_OPAQUE 3
44#define SR6_TLV_PADDING 4
45#define SR6_TLV_HMAC 5
46
47#define sr_has_hmac(srh) ((srh)->flags & SR6_FLAG1_HMAC)
48
49struct sr6_tlv {
50 __u8 type;
51 __u8 len;
52 __u8 data[0];
53};
54
55#endif
diff --git a/tools/include/uapi/linux/seg6_local.h b/tools/include/uapi/linux/seg6_local.h
new file mode 100644
index 000000000000..edc138bdc56d
--- /dev/null
+++ b/tools/include/uapi/linux/seg6_local.h
@@ -0,0 +1,80 @@
1/*
2 * SR-IPv6 implementation
3 *
4 * Author:
5 * David Lebrun <david.lebrun@uclouvain.be>
6 *
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#ifndef _UAPI_LINUX_SEG6_LOCAL_H
15#define _UAPI_LINUX_SEG6_LOCAL_H
16
17#include <linux/seg6.h>
18
19enum {
20 SEG6_LOCAL_UNSPEC,
21 SEG6_LOCAL_ACTION,
22 SEG6_LOCAL_SRH,
23 SEG6_LOCAL_TABLE,
24 SEG6_LOCAL_NH4,
25 SEG6_LOCAL_NH6,
26 SEG6_LOCAL_IIF,
27 SEG6_LOCAL_OIF,
28 SEG6_LOCAL_BPF,
29 __SEG6_LOCAL_MAX,
30};
31#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
32
33enum {
34 SEG6_LOCAL_ACTION_UNSPEC = 0,
35 /* node segment */
36 SEG6_LOCAL_ACTION_END = 1,
37 /* adjacency segment (IPv6 cross-connect) */
38 SEG6_LOCAL_ACTION_END_X = 2,
39 /* lookup of next seg NH in table */
40 SEG6_LOCAL_ACTION_END_T = 3,
41 /* decap and L2 cross-connect */
42 SEG6_LOCAL_ACTION_END_DX2 = 4,
43 /* decap and IPv6 cross-connect */
44 SEG6_LOCAL_ACTION_END_DX6 = 5,
45 /* decap and IPv4 cross-connect */
46 SEG6_LOCAL_ACTION_END_DX4 = 6,
47 /* decap and lookup of DA in v6 table */
48 SEG6_LOCAL_ACTION_END_DT6 = 7,
49 /* decap and lookup of DA in v4 table */
50 SEG6_LOCAL_ACTION_END_DT4 = 8,
51 /* binding segment with insertion */
52 SEG6_LOCAL_ACTION_END_B6 = 9,
53 /* binding segment with encapsulation */
54 SEG6_LOCAL_ACTION_END_B6_ENCAP = 10,
55 /* binding segment with MPLS encap */
56 SEG6_LOCAL_ACTION_END_BM = 11,
57 /* lookup last seg in table */
58 SEG6_LOCAL_ACTION_END_S = 12,
59 /* forward to SR-unaware VNF with static proxy */
60 SEG6_LOCAL_ACTION_END_AS = 13,
61 /* forward to SR-unaware VNF with masquerading */
62 SEG6_LOCAL_ACTION_END_AM = 14,
63 /* custom BPF action */
64 SEG6_LOCAL_ACTION_END_BPF = 15,
65
66 __SEG6_LOCAL_ACTION_MAX,
67};
68
69#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)
70
71enum {
72 SEG6_LOCAL_BPF_PROG_UNSPEC,
73 SEG6_LOCAL_BPF_PROG,
74 SEG6_LOCAL_BPF_PROG_NAME,
75 __SEG6_LOCAL_BPF_PROG_MAX,
76};
77
78#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1)
79
80#endif
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 6a12bbf39f7b..7aba8243a0e7 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -201,7 +201,7 @@ static void mem_toupper(char *f, size_t len)
201 201
202/* 202/*
203 * Check for "NAME_PATH" environment variable to override fs location (for 203 * Check for "NAME_PATH" environment variable to override fs location (for
204 * testing). This matches the recommendation in Documentation/sysfs-rules.txt 204 * testing). This matches the recommendation in Documentation/admin-guide/sysfs-rules.rst
205 * for SYSFS_PATH. 205 * for SYSFS_PATH.
206 */ 206 */
207static bool fs__env_override(struct fs *fs) 207static bool fs__env_override(struct fs *fs)
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
index 7b7fd0b18551..120037496f77 100644
--- a/tools/lib/api/fs/tracing_path.c
+++ b/tools/lib/api/fs/tracing_path.c
@@ -13,11 +13,9 @@
13 13
14#include "tracing_path.h" 14#include "tracing_path.h"
15 15
16 16static char tracing_mnt[PATH_MAX] = "/sys/kernel/debug";
17char tracing_mnt[PATH_MAX] = "/sys/kernel/debug"; 17static char tracing_path[PATH_MAX] = "/sys/kernel/debug/tracing";
18char tracing_path[PATH_MAX] = "/sys/kernel/debug/tracing"; 18static char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events";
19char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events";
20
21 19
22static void __tracing_path_set(const char *tracing, const char *mountpoint) 20static void __tracing_path_set(const char *tracing, const char *mountpoint)
23{ 21{
@@ -76,7 +74,7 @@ char *get_tracing_file(const char *name)
76{ 74{
77 char *file; 75 char *file;
78 76
79 if (asprintf(&file, "%s/%s", tracing_path, name) < 0) 77 if (asprintf(&file, "%s/%s", tracing_path_mount(), name) < 0)
80 return NULL; 78 return NULL;
81 79
82 return file; 80 return file;
@@ -87,6 +85,34 @@ void put_tracing_file(char *file)
87 free(file); 85 free(file);
88} 86}
89 87
88char *get_events_file(const char *name)
89{
90 char *file;
91
92 if (asprintf(&file, "%s/events/%s", tracing_path_mount(), name) < 0)
93 return NULL;
94
95 return file;
96}
97
98void put_events_file(char *file)
99{
100 free(file);
101}
102
103DIR *tracing_events__opendir(void)
104{
105 DIR *dir = NULL;
106 char *path = get_tracing_file("events");
107
108 if (path) {
109 dir = opendir(path);
110 put_events_file(path);
111 }
112
113 return dir;
114}
115
90int tracing_path__strerror_open_tp(int err, char *buf, size_t size, 116int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
91 const char *sys, const char *name) 117 const char *sys, const char *name)
92{ 118{
@@ -129,7 +155,7 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
129 snprintf(buf, size, 155 snprintf(buf, size,
130 "Error:\tNo permissions to read %s/%s\n" 156 "Error:\tNo permissions to read %s/%s\n"
131 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", 157 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
132 tracing_events_path, filename, tracing_mnt); 158 tracing_events_path, filename, tracing_path_mount());
133 } 159 }
134 break; 160 break;
135 default: 161 default:
diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h
index 0066f06cc381..a19136b086dc 100644
--- a/tools/lib/api/fs/tracing_path.h
+++ b/tools/lib/api/fs/tracing_path.h
@@ -3,9 +3,9 @@
3#define __API_FS_TRACING_PATH_H 3#define __API_FS_TRACING_PATH_H
4 4
5#include <linux/types.h> 5#include <linux/types.h>
6#include <dirent.h>
6 7
7extern char tracing_path[]; 8DIR *tracing_events__opendir(void);
8extern char tracing_events_path[];
9 9
10void tracing_path_set(const char *mountpoint); 10void tracing_path_set(const char *mountpoint);
11const char *tracing_path_mount(void); 11const char *tracing_path_mount(void);
@@ -13,5 +13,10 @@ const char *tracing_path_mount(void);
13char *get_tracing_file(const char *name); 13char *get_tracing_file(const char *name);
14void put_tracing_file(char *file); 14void put_tracing_file(char *file);
15 15
16char *get_events_file(const char *name);
17void put_events_file(char *file);
18
19#define zput_events_file(ptr) ({ free(*ptr); *ptr = NULL; })
20
16int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name); 21int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name);
17#endif /* __API_FS_TRACING_PATH_H */ 22#endif /* __API_FS_TRACING_PATH_H */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 64c679d67109..6070e655042d 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1 @@
libbpf-y := libbpf.o bpf.o nlattr.o libbpf-y := libbpf.o bpf.o nlattr.o btf.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index e6d5f8d1477f..5390e7725e43 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -69,7 +69,7 @@ FEATURE_USER = .libbpf
69FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf 69FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf
70FEATURE_DISPLAY = libelf bpf 70FEATURE_DISPLAY = libelf bpf
71 71
72INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi 72INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi -I$(srctree)/tools/perf
73FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) 73FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
74 74
75check_feat := 1 75check_feat := 1
@@ -189,6 +189,7 @@ install_headers:
189 $(call QUIET_INSTALL, headers) \ 189 $(call QUIET_INSTALL, headers) \
190 $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ 190 $(call do_install,bpf.h,$(prefix)/include/bpf,644); \
191 $(call do_install,libbpf.h,$(prefix)/include/bpf,644); 191 $(call do_install,libbpf.h,$(prefix)/include/bpf,644);
192 $(call do_install,btf.h,$(prefix)/include/bpf,644);
192 193
193install: install_lib 194install: install_lib
194 195
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index acbb3f8b3bec..9ddc89dae962 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -73,43 +73,77 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
73 return syscall(__NR_bpf, cmd, attr, size); 73 return syscall(__NR_bpf, cmd, attr, size);
74} 74}
75 75
76int bpf_create_map_node(enum bpf_map_type map_type, const char *name, 76int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
77 int key_size, int value_size, int max_entries,
78 __u32 map_flags, int node)
79{ 77{
80 __u32 name_len = name ? strlen(name) : 0; 78 __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0;
81 union bpf_attr attr; 79 union bpf_attr attr;
82 80
83 memset(&attr, '\0', sizeof(attr)); 81 memset(&attr, '\0', sizeof(attr));
84 82
85 attr.map_type = map_type; 83 attr.map_type = create_attr->map_type;
86 attr.key_size = key_size; 84 attr.key_size = create_attr->key_size;
87 attr.value_size = value_size; 85 attr.value_size = create_attr->value_size;
88 attr.max_entries = max_entries; 86 attr.max_entries = create_attr->max_entries;
89 attr.map_flags = map_flags; 87 attr.map_flags = create_attr->map_flags;
90 memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); 88 memcpy(attr.map_name, create_attr->name,
89 min(name_len, BPF_OBJ_NAME_LEN - 1));
90 attr.numa_node = create_attr->numa_node;
91 attr.btf_fd = create_attr->btf_fd;
92 attr.btf_key_type_id = create_attr->btf_key_type_id;
93 attr.btf_value_type_id = create_attr->btf_value_type_id;
94 attr.map_ifindex = create_attr->map_ifindex;
95
96 return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
97}
91 98
99int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
100 int key_size, int value_size, int max_entries,
101 __u32 map_flags, int node)
102{
103 struct bpf_create_map_attr map_attr = {};
104
105 map_attr.name = name;
106 map_attr.map_type = map_type;
107 map_attr.map_flags = map_flags;
108 map_attr.key_size = key_size;
109 map_attr.value_size = value_size;
110 map_attr.max_entries = max_entries;
92 if (node >= 0) { 111 if (node >= 0) {
93 attr.map_flags |= BPF_F_NUMA_NODE; 112 map_attr.numa_node = node;
94 attr.numa_node = node; 113 map_attr.map_flags |= BPF_F_NUMA_NODE;
95 } 114 }
96 115
97 return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); 116 return bpf_create_map_xattr(&map_attr);
98} 117}
99 118
100int bpf_create_map(enum bpf_map_type map_type, int key_size, 119int bpf_create_map(enum bpf_map_type map_type, int key_size,
101 int value_size, int max_entries, __u32 map_flags) 120 int value_size, int max_entries, __u32 map_flags)
102{ 121{
103 return bpf_create_map_node(map_type, NULL, key_size, value_size, 122 struct bpf_create_map_attr map_attr = {};
104 max_entries, map_flags, -1); 123
124 map_attr.map_type = map_type;
125 map_attr.map_flags = map_flags;
126 map_attr.key_size = key_size;
127 map_attr.value_size = value_size;
128 map_attr.max_entries = max_entries;
129
130 return bpf_create_map_xattr(&map_attr);
105} 131}
106 132
107int bpf_create_map_name(enum bpf_map_type map_type, const char *name, 133int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
108 int key_size, int value_size, int max_entries, 134 int key_size, int value_size, int max_entries,
109 __u32 map_flags) 135 __u32 map_flags)
110{ 136{
111 return bpf_create_map_node(map_type, name, key_size, value_size, 137 struct bpf_create_map_attr map_attr = {};
112 max_entries, map_flags, -1); 138
139 map_attr.name = name;
140 map_attr.map_type = map_type;
141 map_attr.map_flags = map_flags;
142 map_attr.key_size = key_size;
143 map_attr.value_size = value_size;
144 map_attr.max_entries = max_entries;
145
146 return bpf_create_map_xattr(&map_attr);
113} 147}
114 148
115int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, 149int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
@@ -168,6 +202,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
168 attr.log_size = 0; 202 attr.log_size = 0;
169 attr.log_level = 0; 203 attr.log_level = 0;
170 attr.kern_version = load_attr->kern_version; 204 attr.kern_version = load_attr->kern_version;
205 attr.prog_ifindex = load_attr->prog_ifindex;
171 memcpy(attr.prog_name, load_attr->name, 206 memcpy(attr.prog_name, load_attr->name,
172 min(name_len, BPF_OBJ_NAME_LEN - 1)); 207 min(name_len, BPF_OBJ_NAME_LEN - 1));
173 208
@@ -425,6 +460,16 @@ int bpf_map_get_fd_by_id(__u32 id)
425 return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); 460 return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
426} 461}
427 462
463int bpf_btf_get_fd_by_id(__u32 id)
464{
465 union bpf_attr attr;
466
467 bzero(&attr, sizeof(attr));
468 attr.btf_id = id;
469
470 return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
471}
472
428int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) 473int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
429{ 474{
430 union bpf_attr attr; 475 union bpf_attr attr;
@@ -573,3 +618,51 @@ cleanup:
573 close(sock); 618 close(sock);
574 return ret; 619 return ret;
575} 620}
621
622int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
623 bool do_log)
624{
625 union bpf_attr attr = {};
626 int fd;
627
628 attr.btf = ptr_to_u64(btf);
629 attr.btf_size = btf_size;
630
631retry:
632 if (do_log && log_buf && log_buf_size) {
633 attr.btf_log_level = 1;
634 attr.btf_log_size = log_buf_size;
635 attr.btf_log_buf = ptr_to_u64(log_buf);
636 }
637
638 fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
639 if (fd == -1 && !do_log && log_buf && log_buf_size) {
640 do_log = true;
641 goto retry;
642 }
643
644 return fd;
645}
646
647int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
648 __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
649 __u64 *probe_addr)
650{
651 union bpf_attr attr = {};
652 int err;
653
654 attr.task_fd_query.pid = pid;
655 attr.task_fd_query.fd = fd;
656 attr.task_fd_query.flags = flags;
657 attr.task_fd_query.buf = ptr_to_u64(buf);
658 attr.task_fd_query.buf_len = *buf_len;
659
660 err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
661 *buf_len = attr.task_fd_query.buf_len;
662 *prog_id = attr.task_fd_query.prog_id;
663 *fd_type = attr.task_fd_query.fd_type;
664 *probe_offset = attr.task_fd_query.probe_offset;
665 *probe_addr = attr.task_fd_query.probe_addr;
666
667 return err;
668}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 39f6a0d64a3b..0639a30a457d 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -24,8 +24,24 @@
24#define __BPF_BPF_H 24#define __BPF_BPF_H
25 25
26#include <linux/bpf.h> 26#include <linux/bpf.h>
27#include <stdbool.h>
27#include <stddef.h> 28#include <stddef.h>
28 29
30struct bpf_create_map_attr {
31 const char *name;
32 enum bpf_map_type map_type;
33 __u32 map_flags;
34 __u32 key_size;
35 __u32 value_size;
36 __u32 max_entries;
37 __u32 numa_node;
38 __u32 btf_fd;
39 __u32 btf_key_type_id;
40 __u32 btf_value_type_id;
41 __u32 map_ifindex;
42};
43
44int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
29int bpf_create_map_node(enum bpf_map_type map_type, const char *name, 45int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
30 int key_size, int value_size, int max_entries, 46 int key_size, int value_size, int max_entries,
31 __u32 map_flags, int node); 47 __u32 map_flags, int node);
@@ -49,6 +65,7 @@ struct bpf_load_program_attr {
49 size_t insns_cnt; 65 size_t insns_cnt;
50 const char *license; 66 const char *license;
51 __u32 kern_version; 67 __u32 kern_version;
68 __u32 prog_ifindex;
52}; 69};
53 70
54/* Recommend log buffer size */ 71/* Recommend log buffer size */
@@ -83,8 +100,14 @@ int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
83int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); 100int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
84int bpf_prog_get_fd_by_id(__u32 id); 101int bpf_prog_get_fd_by_id(__u32 id);
85int bpf_map_get_fd_by_id(__u32 id); 102int bpf_map_get_fd_by_id(__u32 id);
103int bpf_btf_get_fd_by_id(__u32 id);
86int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); 104int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
87int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, 105int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
88 __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); 106 __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
89int bpf_raw_tracepoint_open(const char *name, int prog_fd); 107int bpf_raw_tracepoint_open(const char *name, int prog_fd);
108int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
109 bool do_log);
110int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
111 __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
112 __u64 *probe_addr);
90#endif 113#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
new file mode 100644
index 000000000000..8c54a4b6f187
--- /dev/null
+++ b/tools/lib/bpf/btf.c
@@ -0,0 +1,373 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/* Copyright (c) 2018 Facebook */
3
4#include <stdlib.h>
5#include <stdint.h>
6#include <string.h>
7#include <unistd.h>
8#include <errno.h>
9#include <linux/err.h>
10#include <linux/btf.h>
11#include "btf.h"
12#include "bpf.h"
13
14#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); }
15#define max(a, b) ((a) > (b) ? (a) : (b))
16#define min(a, b) ((a) < (b) ? (a) : (b))
17
18#define BTF_MAX_NR_TYPES 65535
19
20static struct btf_type btf_void;
21
22struct btf {
23 union {
24 struct btf_header *hdr;
25 void *data;
26 };
27 struct btf_type **types;
28 const char *strings;
29 void *nohdr_data;
30 uint32_t nr_types;
31 uint32_t types_size;
32 uint32_t data_size;
33 int fd;
34};
35
36static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset)
37{
38 if (offset < btf->hdr->str_len)
39 return &btf->strings[offset];
40 else
41 return NULL;
42}
43
44static int btf_add_type(struct btf *btf, struct btf_type *t)
45{
46 if (btf->types_size - btf->nr_types < 2) {
47 struct btf_type **new_types;
48 u32 expand_by, new_size;
49
50 if (btf->types_size == BTF_MAX_NR_TYPES)
51 return -E2BIG;
52
53 expand_by = max(btf->types_size >> 2, 16);
54 new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
55
56 new_types = realloc(btf->types, sizeof(*new_types) * new_size);
57 if (!new_types)
58 return -ENOMEM;
59
60 if (btf->nr_types == 0)
61 new_types[0] = &btf_void;
62
63 btf->types = new_types;
64 btf->types_size = new_size;
65 }
66
67 btf->types[++(btf->nr_types)] = t;
68
69 return 0;
70}
71
72static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log)
73{
74 const struct btf_header *hdr = btf->hdr;
75 u32 meta_left;
76
77 if (btf->data_size < sizeof(struct btf_header)) {
78 elog("BTF header not found\n");
79 return -EINVAL;
80 }
81
82 if (hdr->magic != BTF_MAGIC) {
83 elog("Invalid BTF magic:%x\n", hdr->magic);
84 return -EINVAL;
85 }
86
87 if (hdr->version != BTF_VERSION) {
88 elog("Unsupported BTF version:%u\n", hdr->version);
89 return -ENOTSUP;
90 }
91
92 if (hdr->flags) {
93 elog("Unsupported BTF flags:%x\n", hdr->flags);
94 return -ENOTSUP;
95 }
96
97 meta_left = btf->data_size - sizeof(*hdr);
98 if (!meta_left) {
99 elog("BTF has no data\n");
100 return -EINVAL;
101 }
102
103 if (meta_left < hdr->type_off) {
104 elog("Invalid BTF type section offset:%u\n", hdr->type_off);
105 return -EINVAL;
106 }
107
108 if (meta_left < hdr->str_off) {
109 elog("Invalid BTF string section offset:%u\n", hdr->str_off);
110 return -EINVAL;
111 }
112
113 if (hdr->type_off >= hdr->str_off) {
114 elog("BTF type section offset >= string section offset. No type?\n");
115 return -EINVAL;
116 }
117
118 if (hdr->type_off & 0x02) {
119 elog("BTF type section is not aligned to 4 bytes\n");
120 return -EINVAL;
121 }
122
123 btf->nohdr_data = btf->hdr + 1;
124
125 return 0;
126}
127
128static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log)
129{
130 const struct btf_header *hdr = btf->hdr;
131 const char *start = btf->nohdr_data + hdr->str_off;
132 const char *end = start + btf->hdr->str_len;
133
134 if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
135 start[0] || end[-1]) {
136 elog("Invalid BTF string section\n");
137 return -EINVAL;
138 }
139
140 btf->strings = start;
141
142 return 0;
143}
144
145static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
146{
147 struct btf_header *hdr = btf->hdr;
148 void *nohdr_data = btf->nohdr_data;
149 void *next_type = nohdr_data + hdr->type_off;
150 void *end_type = nohdr_data + hdr->str_off;
151
152 while (next_type < end_type) {
153 struct btf_type *t = next_type;
154 uint16_t vlen = BTF_INFO_VLEN(t->info);
155 int err;
156
157 next_type += sizeof(*t);
158 switch (BTF_INFO_KIND(t->info)) {
159 case BTF_KIND_INT:
160 next_type += sizeof(int);
161 break;
162 case BTF_KIND_ARRAY:
163 next_type += sizeof(struct btf_array);
164 break;
165 case BTF_KIND_STRUCT:
166 case BTF_KIND_UNION:
167 next_type += vlen * sizeof(struct btf_member);
168 break;
169 case BTF_KIND_ENUM:
170 next_type += vlen * sizeof(struct btf_enum);
171 break;
172 case BTF_KIND_TYPEDEF:
173 case BTF_KIND_PTR:
174 case BTF_KIND_FWD:
175 case BTF_KIND_VOLATILE:
176 case BTF_KIND_CONST:
177 case BTF_KIND_RESTRICT:
178 break;
179 default:
180 elog("Unsupported BTF_KIND:%u\n",
181 BTF_INFO_KIND(t->info));
182 return -EINVAL;
183 }
184
185 err = btf_add_type(btf, t);
186 if (err)
187 return err;
188 }
189
190 return 0;
191}
192
193static const struct btf_type *btf_type_by_id(const struct btf *btf,
194 uint32_t type_id)
195{
196 if (type_id > btf->nr_types)
197 return NULL;
198
199 return btf->types[type_id];
200}
201
202static bool btf_type_is_void(const struct btf_type *t)
203{
204 return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
205}
206
207static bool btf_type_is_void_or_null(const struct btf_type *t)
208{
209 return !t || btf_type_is_void(t);
210}
211
212static int64_t btf_type_size(const struct btf_type *t)
213{
214 switch (BTF_INFO_KIND(t->info)) {
215 case BTF_KIND_INT:
216 case BTF_KIND_STRUCT:
217 case BTF_KIND_UNION:
218 case BTF_KIND_ENUM:
219 return t->size;
220 case BTF_KIND_PTR:
221 return sizeof(void *);
222 default:
223 return -EINVAL;
224 }
225}
226
227#define MAX_RESOLVE_DEPTH 32
228
229int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id)
230{
231 const struct btf_array *array;
232 const struct btf_type *t;
233 uint32_t nelems = 1;
234 int64_t size = -1;
235 int i;
236
237 t = btf_type_by_id(btf, type_id);
238 for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
239 i++) {
240 size = btf_type_size(t);
241 if (size >= 0)
242 break;
243
244 switch (BTF_INFO_KIND(t->info)) {
245 case BTF_KIND_TYPEDEF:
246 case BTF_KIND_VOLATILE:
247 case BTF_KIND_CONST:
248 case BTF_KIND_RESTRICT:
249 type_id = t->type;
250 break;
251 case BTF_KIND_ARRAY:
252 array = (const struct btf_array *)(t + 1);
253 if (nelems && array->nelems > UINT32_MAX / nelems)
254 return -E2BIG;
255 nelems *= array->nelems;
256 type_id = array->type;
257 break;
258 default:
259 return -EINVAL;
260 }
261
262 t = btf_type_by_id(btf, type_id);
263 }
264
265 if (size < 0)
266 return -EINVAL;
267
268 if (nelems && size > UINT32_MAX / nelems)
269 return -E2BIG;
270
271 return nelems * size;
272}
273
274int32_t btf__find_by_name(const struct btf *btf, const char *type_name)
275{
276 uint32_t i;
277
278 if (!strcmp(type_name, "void"))
279 return 0;
280
281 for (i = 1; i <= btf->nr_types; i++) {
282 const struct btf_type *t = btf->types[i];
283 const char *name = btf_name_by_offset(btf, t->name_off);
284
285 if (name && !strcmp(type_name, name))
286 return i;
287 }
288
289 return -ENOENT;
290}
291
292void btf__free(struct btf *btf)
293{
294 if (!btf)
295 return;
296
297 if (btf->fd != -1)
298 close(btf->fd);
299
300 free(btf->data);
301 free(btf->types);
302 free(btf);
303}
304
305struct btf *btf__new(uint8_t *data, uint32_t size,
306 btf_print_fn_t err_log)
307{
308 uint32_t log_buf_size = 0;
309 char *log_buf = NULL;
310 struct btf *btf;
311 int err;
312
313 btf = calloc(1, sizeof(struct btf));
314 if (!btf)
315 return ERR_PTR(-ENOMEM);
316
317 btf->fd = -1;
318
319 if (err_log) {
320 log_buf = malloc(BPF_LOG_BUF_SIZE);
321 if (!log_buf) {
322 err = -ENOMEM;
323 goto done;
324 }
325 *log_buf = 0;
326 log_buf_size = BPF_LOG_BUF_SIZE;
327 }
328
329 btf->data = malloc(size);
330 if (!btf->data) {
331 err = -ENOMEM;
332 goto done;
333 }
334
335 memcpy(btf->data, data, size);
336 btf->data_size = size;
337
338 btf->fd = bpf_load_btf(btf->data, btf->data_size,
339 log_buf, log_buf_size, false);
340
341 if (btf->fd == -1) {
342 err = -errno;
343 elog("Error loading BTF: %s(%d)\n", strerror(errno), errno);
344 if (log_buf && *log_buf)
345 elog("%s\n", log_buf);
346 goto done;
347 }
348
349 err = btf_parse_hdr(btf, err_log);
350 if (err)
351 goto done;
352
353 err = btf_parse_str_sec(btf, err_log);
354 if (err)
355 goto done;
356
357 err = btf_parse_type_sec(btf, err_log);
358
359done:
360 free(log_buf);
361
362 if (err) {
363 btf__free(btf);
364 return ERR_PTR(err);
365 }
366
367 return btf;
368}
369
370int btf__fd(const struct btf *btf)
371{
372 return btf->fd;
373}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
new file mode 100644
index 000000000000..74bb344035bb
--- /dev/null
+++ b/tools/lib/bpf/btf.h
@@ -0,0 +1,22 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/* Copyright (c) 2018 Facebook */
3
4#ifndef __BPF_BTF_H
5#define __BPF_BTF_H
6
7#include <stdint.h>
8
9#define BTF_ELF_SEC ".BTF"
10
11struct btf;
12
13typedef int (*btf_print_fn_t)(const char *, ...)
14 __attribute__((format(printf, 1, 2)));
15
16void btf__free(struct btf *btf);
17struct btf *btf__new(uint8_t *data, uint32_t size, btf_print_fn_t err_log);
18int32_t btf__find_by_name(const struct btf *btf, const char *type_name);
19int64_t btf__resolve_size(const struct btf *btf, uint32_t type_id);
20int btf__fd(const struct btf *btf);
21
22#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5922443063f0..a1e96b5de5ff 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -31,6 +31,7 @@
31#include <unistd.h> 31#include <unistd.h>
32#include <fcntl.h> 32#include <fcntl.h>
33#include <errno.h> 33#include <errno.h>
34#include <perf-sys.h>
34#include <asm/unistd.h> 35#include <asm/unistd.h>
35#include <linux/err.h> 36#include <linux/err.h>
36#include <linux/kernel.h> 37#include <linux/kernel.h>
@@ -45,6 +46,7 @@
45 46
46#include "libbpf.h" 47#include "libbpf.h"
47#include "bpf.h" 48#include "bpf.h"
49#include "btf.h"
48 50
49#ifndef EM_BPF 51#ifndef EM_BPF
50#define EM_BPF 247 52#define EM_BPF 247
@@ -176,6 +178,7 @@ struct bpf_program {
176 /* Index in elf obj file, for relocation use. */ 178 /* Index in elf obj file, for relocation use. */
177 int idx; 179 int idx;
178 char *name; 180 char *name;
181 int prog_ifindex;
179 char *section_name; 182 char *section_name;
180 struct bpf_insn *insns; 183 struct bpf_insn *insns;
181 size_t insns_cnt, main_prog_cnt; 184 size_t insns_cnt, main_prog_cnt;
@@ -211,7 +214,10 @@ struct bpf_map {
211 int fd; 214 int fd;
212 char *name; 215 char *name;
213 size_t offset; 216 size_t offset;
217 int map_ifindex;
214 struct bpf_map_def def; 218 struct bpf_map_def def;
219 uint32_t btf_key_type_id;
220 uint32_t btf_value_type_id;
215 void *priv; 221 void *priv;
216 bpf_map_clear_priv_t clear_priv; 222 bpf_map_clear_priv_t clear_priv;
217}; 223};
@@ -256,6 +262,8 @@ struct bpf_object {
256 */ 262 */
257 struct list_head list; 263 struct list_head list;
258 264
265 struct btf *btf;
266
259 void *priv; 267 void *priv;
260 bpf_object_clear_priv_t clear_priv; 268 bpf_object_clear_priv_t clear_priv;
261 269
@@ -819,7 +827,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
819 data->d_size); 827 data->d_size);
820 else if (strcmp(name, "maps") == 0) 828 else if (strcmp(name, "maps") == 0)
821 obj->efile.maps_shndx = idx; 829 obj->efile.maps_shndx = idx;
822 else if (sh.sh_type == SHT_SYMTAB) { 830 else if (strcmp(name, BTF_ELF_SEC) == 0) {
831 obj->btf = btf__new(data->d_buf, data->d_size,
832 __pr_debug);
833 if (IS_ERR(obj->btf)) {
834 pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
835 BTF_ELF_SEC, PTR_ERR(obj->btf));
836 obj->btf = NULL;
837 }
838 } else if (sh.sh_type == SHT_SYMTAB) {
823 if (obj->efile.symbols) { 839 if (obj->efile.symbols) {
824 pr_warning("bpf: multiple SYMTAB in %s\n", 840 pr_warning("bpf: multiple SYMTAB in %s\n",
825 obj->path); 841 obj->path);
@@ -996,33 +1012,127 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
996 return 0; 1012 return 0;
997} 1013}
998 1014
1015static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
1016{
1017 struct bpf_map_def *def = &map->def;
1018 const size_t max_name = 256;
1019 int64_t key_size, value_size;
1020 int32_t key_id, value_id;
1021 char name[max_name];
1022
1023 /* Find key type by name from BTF */
1024 if (snprintf(name, max_name, "%s_key", map->name) == max_name) {
1025 pr_warning("map:%s length of BTF key_type:%s_key is too long\n",
1026 map->name, map->name);
1027 return -EINVAL;
1028 }
1029
1030 key_id = btf__find_by_name(btf, name);
1031 if (key_id < 0) {
1032 pr_debug("map:%s key_type:%s cannot be found in BTF\n",
1033 map->name, name);
1034 return key_id;
1035 }
1036
1037 key_size = btf__resolve_size(btf, key_id);
1038 if (key_size < 0) {
1039 pr_warning("map:%s key_type:%s cannot get the BTF type_size\n",
1040 map->name, name);
1041 return key_size;
1042 }
1043
1044 if (def->key_size != key_size) {
1045 pr_warning("map:%s key_type:%s has BTF type_size:%u != key_size:%u\n",
1046 map->name, name, (unsigned int)key_size, def->key_size);
1047 return -EINVAL;
1048 }
1049
1050 /* Find value type from BTF */
1051 if (snprintf(name, max_name, "%s_value", map->name) == max_name) {
1052 pr_warning("map:%s length of BTF value_type:%s_value is too long\n",
1053 map->name, map->name);
1054 return -EINVAL;
1055 }
1056
1057 value_id = btf__find_by_name(btf, name);
1058 if (value_id < 0) {
1059 pr_debug("map:%s value_type:%s cannot be found in BTF\n",
1060 map->name, name);
1061 return value_id;
1062 }
1063
1064 value_size = btf__resolve_size(btf, value_id);
1065 if (value_size < 0) {
1066 pr_warning("map:%s value_type:%s cannot get the BTF type_size\n",
1067 map->name, name);
1068 return value_size;
1069 }
1070
1071 if (def->value_size != value_size) {
1072 pr_warning("map:%s value_type:%s has BTF type_size:%u != value_size:%u\n",
1073 map->name, name, (unsigned int)value_size, def->value_size);
1074 return -EINVAL;
1075 }
1076
1077 map->btf_key_type_id = key_id;
1078 map->btf_value_type_id = value_id;
1079
1080 return 0;
1081}
1082
999static int 1083static int
1000bpf_object__create_maps(struct bpf_object *obj) 1084bpf_object__create_maps(struct bpf_object *obj)
1001{ 1085{
1086 struct bpf_create_map_attr create_attr = {};
1002 unsigned int i; 1087 unsigned int i;
1088 int err;
1003 1089
1004 for (i = 0; i < obj->nr_maps; i++) { 1090 for (i = 0; i < obj->nr_maps; i++) {
1005 struct bpf_map_def *def = &obj->maps[i].def; 1091 struct bpf_map *map = &obj->maps[i];
1006 int *pfd = &obj->maps[i].fd; 1092 struct bpf_map_def *def = &map->def;
1007 1093 int *pfd = &map->fd;
1008 *pfd = bpf_create_map_name(def->type, 1094
1009 obj->maps[i].name, 1095 create_attr.name = map->name;
1010 def->key_size, 1096 create_attr.map_ifindex = map->map_ifindex;
1011 def->value_size, 1097 create_attr.map_type = def->type;
1012 def->max_entries, 1098 create_attr.map_flags = def->map_flags;
1013 def->map_flags); 1099 create_attr.key_size = def->key_size;
1100 create_attr.value_size = def->value_size;
1101 create_attr.max_entries = def->max_entries;
1102 create_attr.btf_fd = 0;
1103 create_attr.btf_key_type_id = 0;
1104 create_attr.btf_value_type_id = 0;
1105
1106 if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
1107 create_attr.btf_fd = btf__fd(obj->btf);
1108 create_attr.btf_key_type_id = map->btf_key_type_id;
1109 create_attr.btf_value_type_id = map->btf_value_type_id;
1110 }
1111
1112 *pfd = bpf_create_map_xattr(&create_attr);
1113 if (*pfd < 0 && create_attr.btf_key_type_id) {
1114 pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
1115 map->name, strerror(errno), errno);
1116 create_attr.btf_fd = 0;
1117 create_attr.btf_key_type_id = 0;
1118 create_attr.btf_value_type_id = 0;
1119 map->btf_key_type_id = 0;
1120 map->btf_value_type_id = 0;
1121 *pfd = bpf_create_map_xattr(&create_attr);
1122 }
1123
1014 if (*pfd < 0) { 1124 if (*pfd < 0) {
1015 size_t j; 1125 size_t j;
1016 int err = *pfd;
1017 1126
1127 err = *pfd;
1018 pr_warning("failed to create map (name: '%s'): %s\n", 1128 pr_warning("failed to create map (name: '%s'): %s\n",
1019 obj->maps[i].name, 1129 map->name,
1020 strerror(errno)); 1130 strerror(errno));
1021 for (j = 0; j < i; j++) 1131 for (j = 0; j < i; j++)
1022 zclose(obj->maps[j].fd); 1132 zclose(obj->maps[j].fd);
1023 return err; 1133 return err;
1024 } 1134 }
1025 pr_debug("create map %s: fd=%d\n", obj->maps[i].name, *pfd); 1135 pr_debug("create map %s: fd=%d\n", map->name, *pfd);
1026 } 1136 }
1027 1137
1028 return 0; 1138 return 0;
@@ -1166,7 +1276,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
1166static int 1276static int
1167load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, 1277load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
1168 const char *name, struct bpf_insn *insns, int insns_cnt, 1278 const char *name, struct bpf_insn *insns, int insns_cnt,
1169 char *license, u32 kern_version, int *pfd) 1279 char *license, u32 kern_version, int *pfd, int prog_ifindex)
1170{ 1280{
1171 struct bpf_load_program_attr load_attr; 1281 struct bpf_load_program_attr load_attr;
1172 char *log_buf; 1282 char *log_buf;
@@ -1180,6 +1290,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
1180 load_attr.insns_cnt = insns_cnt; 1290 load_attr.insns_cnt = insns_cnt;
1181 load_attr.license = license; 1291 load_attr.license = license;
1182 load_attr.kern_version = kern_version; 1292 load_attr.kern_version = kern_version;
1293 load_attr.prog_ifindex = prog_ifindex;
1183 1294
1184 if (!load_attr.insns || !load_attr.insns_cnt) 1295 if (!load_attr.insns || !load_attr.insns_cnt)
1185 return -EINVAL; 1296 return -EINVAL;
@@ -1261,7 +1372,8 @@ bpf_program__load(struct bpf_program *prog,
1261 } 1372 }
1262 err = load_program(prog->type, prog->expected_attach_type, 1373 err = load_program(prog->type, prog->expected_attach_type,
1263 prog->name, prog->insns, prog->insns_cnt, 1374 prog->name, prog->insns, prog->insns_cnt,
1264 license, kern_version, &fd); 1375 license, kern_version, &fd,
1376 prog->prog_ifindex);
1265 if (!err) 1377 if (!err)
1266 prog->instances.fds[0] = fd; 1378 prog->instances.fds[0] = fd;
1267 goto out; 1379 goto out;
@@ -1292,7 +1404,8 @@ bpf_program__load(struct bpf_program *prog,
1292 err = load_program(prog->type, prog->expected_attach_type, 1404 err = load_program(prog->type, prog->expected_attach_type,
1293 prog->name, result.new_insn_ptr, 1405 prog->name, result.new_insn_ptr,
1294 result.new_insn_cnt, 1406 result.new_insn_cnt,
1295 license, kern_version, &fd); 1407 license, kern_version, &fd,
1408 prog->prog_ifindex);
1296 1409
1297 if (err) { 1410 if (err) {
1298 pr_warning("Loading the %dth instance of program '%s' failed\n", 1411 pr_warning("Loading the %dth instance of program '%s' failed\n",
@@ -1331,9 +1444,39 @@ bpf_object__load_progs(struct bpf_object *obj)
1331 return 0; 1444 return 0;
1332} 1445}
1333 1446
1334static int bpf_object__validate(struct bpf_object *obj) 1447static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
1448{
1449 switch (type) {
1450 case BPF_PROG_TYPE_SOCKET_FILTER:
1451 case BPF_PROG_TYPE_SCHED_CLS:
1452 case BPF_PROG_TYPE_SCHED_ACT:
1453 case BPF_PROG_TYPE_XDP:
1454 case BPF_PROG_TYPE_CGROUP_SKB:
1455 case BPF_PROG_TYPE_CGROUP_SOCK:
1456 case BPF_PROG_TYPE_LWT_IN:
1457 case BPF_PROG_TYPE_LWT_OUT:
1458 case BPF_PROG_TYPE_LWT_XMIT:
1459 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
1460 case BPF_PROG_TYPE_SOCK_OPS:
1461 case BPF_PROG_TYPE_SK_SKB:
1462 case BPF_PROG_TYPE_CGROUP_DEVICE:
1463 case BPF_PROG_TYPE_SK_MSG:
1464 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
1465 case BPF_PROG_TYPE_LIRC_MODE2:
1466 return false;
1467 case BPF_PROG_TYPE_UNSPEC:
1468 case BPF_PROG_TYPE_KPROBE:
1469 case BPF_PROG_TYPE_TRACEPOINT:
1470 case BPF_PROG_TYPE_PERF_EVENT:
1471 case BPF_PROG_TYPE_RAW_TRACEPOINT:
1472 default:
1473 return true;
1474 }
1475}
1476
1477static int bpf_object__validate(struct bpf_object *obj, bool needs_kver)
1335{ 1478{
1336 if (obj->kern_version == 0) { 1479 if (needs_kver && obj->kern_version == 0) {
1337 pr_warning("%s doesn't provide kernel version\n", 1480 pr_warning("%s doesn't provide kernel version\n",
1338 obj->path); 1481 obj->path);
1339 return -LIBBPF_ERRNO__KVERSION; 1482 return -LIBBPF_ERRNO__KVERSION;
@@ -1342,7 +1485,8 @@ static int bpf_object__validate(struct bpf_object *obj)
1342} 1485}
1343 1486
1344static struct bpf_object * 1487static struct bpf_object *
1345__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz) 1488__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
1489 bool needs_kver)
1346{ 1490{
1347 struct bpf_object *obj; 1491 struct bpf_object *obj;
1348 int err; 1492 int err;
@@ -1360,7 +1504,7 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz)
1360 CHECK_ERR(bpf_object__check_endianness(obj), err, out); 1504 CHECK_ERR(bpf_object__check_endianness(obj), err, out);
1361 CHECK_ERR(bpf_object__elf_collect(obj), err, out); 1505 CHECK_ERR(bpf_object__elf_collect(obj), err, out);
1362 CHECK_ERR(bpf_object__collect_reloc(obj), err, out); 1506 CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
1363 CHECK_ERR(bpf_object__validate(obj), err, out); 1507 CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out);
1364 1508
1365 bpf_object__elf_finish(obj); 1509 bpf_object__elf_finish(obj);
1366 return obj; 1510 return obj;
@@ -1377,7 +1521,7 @@ struct bpf_object *bpf_object__open(const char *path)
1377 1521
1378 pr_debug("loading %s\n", path); 1522 pr_debug("loading %s\n", path);
1379 1523
1380 return __bpf_object__open(path, NULL, 0); 1524 return __bpf_object__open(path, NULL, 0, true);
1381} 1525}
1382 1526
1383struct bpf_object *bpf_object__open_buffer(void *obj_buf, 1527struct bpf_object *bpf_object__open_buffer(void *obj_buf,
@@ -1400,7 +1544,7 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf,
1400 pr_debug("loading object '%s' from buffer\n", 1544 pr_debug("loading object '%s' from buffer\n",
1401 name); 1545 name);
1402 1546
1403 return __bpf_object__open(name, obj_buf, obj_buf_sz); 1547 return __bpf_object__open(name, obj_buf, obj_buf_sz, true);
1404} 1548}
1405 1549
1406int bpf_object__unload(struct bpf_object *obj) 1550int bpf_object__unload(struct bpf_object *obj)
@@ -1641,6 +1785,7 @@ void bpf_object__close(struct bpf_object *obj)
1641 1785
1642 bpf_object__elf_finish(obj); 1786 bpf_object__elf_finish(obj);
1643 bpf_object__unload(obj); 1787 bpf_object__unload(obj);
1788 btf__free(obj->btf);
1644 1789
1645 for (i = 0; i < obj->nr_maps; i++) { 1790 for (i = 0; i < obj->nr_maps; i++) {
1646 zfree(&obj->maps[i].name); 1791 zfree(&obj->maps[i].name);
@@ -1692,6 +1837,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj)
1692 return obj ? obj->kern_version : 0; 1837 return obj ? obj->kern_version : 0;
1693} 1838}
1694 1839
1840int bpf_object__btf_fd(const struct bpf_object *obj)
1841{
1842 return obj->btf ? btf__fd(obj->btf) : -1;
1843}
1844
1695int bpf_object__set_priv(struct bpf_object *obj, void *priv, 1845int bpf_object__set_priv(struct bpf_object *obj, void *priv,
1696 bpf_object_clear_priv_t clear_priv) 1846 bpf_object_clear_priv_t clear_priv)
1697{ 1847{
@@ -1845,11 +1995,12 @@ BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
1845BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS); 1995BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
1846BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT); 1996BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
1847BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); 1997BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
1998BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
1848BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); 1999BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
1849BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); 2000BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
1850 2001
1851static void bpf_program__set_expected_attach_type(struct bpf_program *prog, 2002void bpf_program__set_expected_attach_type(struct bpf_program *prog,
1852 enum bpf_attach_type type) 2003 enum bpf_attach_type type)
1853{ 2004{
1854 prog->expected_attach_type = type; 2005 prog->expected_attach_type = type;
1855} 2006}
@@ -1859,6 +2010,9 @@ static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
1859 2010
1860#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0) 2011#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
1861 2012
2013#define BPF_S_PROG_SEC(string, ptype) \
2014 BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype)
2015
1862#define BPF_SA_PROG_SEC(string, ptype) \ 2016#define BPF_SA_PROG_SEC(string, ptype) \
1863 BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype) 2017 BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
1864 2018
@@ -1874,6 +2028,7 @@ static const struct {
1874 BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), 2028 BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS),
1875 BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), 2029 BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
1876 BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), 2030 BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT),
2031 BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
1877 BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), 2032 BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
1878 BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), 2033 BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
1879 BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), 2034 BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
@@ -1889,10 +2044,15 @@ static const struct {
1889 BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND), 2044 BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
1890 BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT), 2045 BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
1891 BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT), 2046 BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
2047 BPF_SA_PROG_SEC("cgroup/sendmsg4", BPF_CGROUP_UDP4_SENDMSG),
2048 BPF_SA_PROG_SEC("cgroup/sendmsg6", BPF_CGROUP_UDP6_SENDMSG),
2049 BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND),
2050 BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND),
1892}; 2051};
1893 2052
1894#undef BPF_PROG_SEC 2053#undef BPF_PROG_SEC
1895#undef BPF_PROG_SEC_FULL 2054#undef BPF_PROG_SEC_FULL
2055#undef BPF_S_PROG_SEC
1896#undef BPF_SA_PROG_SEC 2056#undef BPF_SA_PROG_SEC
1897 2057
1898static int bpf_program__identify_section(struct bpf_program *prog) 2058static int bpf_program__identify_section(struct bpf_program *prog)
@@ -1929,6 +2089,16 @@ const char *bpf_map__name(struct bpf_map *map)
1929 return map ? map->name : NULL; 2089 return map ? map->name : NULL;
1930} 2090}
1931 2091
2092uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map)
2093{
2094 return map ? map->btf_key_type_id : 0;
2095}
2096
2097uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map)
2098{
2099 return map ? map->btf_value_type_id : 0;
2100}
2101
1932int bpf_map__set_priv(struct bpf_map *map, void *priv, 2102int bpf_map__set_priv(struct bpf_map *map, void *priv,
1933 bpf_map_clear_priv_t clear_priv) 2103 bpf_map_clear_priv_t clear_priv)
1934{ 2104{
@@ -2028,14 +2198,18 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
2028 enum bpf_attach_type expected_attach_type; 2198 enum bpf_attach_type expected_attach_type;
2029 enum bpf_prog_type prog_type; 2199 enum bpf_prog_type prog_type;
2030 struct bpf_object *obj; 2200 struct bpf_object *obj;
2201 struct bpf_map *map;
2031 int section_idx; 2202 int section_idx;
2032 int err; 2203 int err;
2033 2204
2034 if (!attr) 2205 if (!attr)
2035 return -EINVAL; 2206 return -EINVAL;
2207 if (!attr->file)
2208 return -EINVAL;
2036 2209
2037 obj = bpf_object__open(attr->file); 2210 obj = __bpf_object__open(attr->file, NULL, 0,
2038 if (IS_ERR(obj)) 2211 bpf_prog_type__needs_kver(attr->prog_type));
2212 if (IS_ERR_OR_NULL(obj))
2039 return -ENOENT; 2213 return -ENOENT;
2040 2214
2041 bpf_object__for_each_program(prog, obj) { 2215 bpf_object__for_each_program(prog, obj) {
@@ -2044,6 +2218,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
2044 * section name. 2218 * section name.
2045 */ 2219 */
2046 prog_type = attr->prog_type; 2220 prog_type = attr->prog_type;
2221 prog->prog_ifindex = attr->ifindex;
2047 expected_attach_type = attr->expected_attach_type; 2222 expected_attach_type = attr->expected_attach_type;
2048 if (prog_type == BPF_PROG_TYPE_UNSPEC) { 2223 if (prog_type == BPF_PROG_TYPE_UNSPEC) {
2049 section_idx = bpf_program__identify_section(prog); 2224 section_idx = bpf_program__identify_section(prog);
@@ -2064,6 +2239,10 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
2064 first_prog = prog; 2239 first_prog = prog;
2065 } 2240 }
2066 2241
2242 bpf_map__for_each(map, obj) {
2243 map->map_ifindex = attr->ifindex;
2244 }
2245
2067 if (!first_prog) { 2246 if (!first_prog) {
2068 pr_warning("object file doesn't contain bpf program\n"); 2247 pr_warning("object file doesn't contain bpf program\n");
2069 bpf_object__close(obj); 2248 bpf_object__close(obj);
@@ -2080,3 +2259,63 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
2080 *prog_fd = bpf_program__fd(first_prog); 2259 *prog_fd = bpf_program__fd(first_prog);
2081 return 0; 2260 return 0;
2082} 2261}
2262
2263enum bpf_perf_event_ret
2264bpf_perf_event_read_simple(void *mem, unsigned long size,
2265 unsigned long page_size, void **buf, size_t *buf_len,
2266 bpf_perf_event_print_t fn, void *priv)
2267{
2268 volatile struct perf_event_mmap_page *header = mem;
2269 __u64 data_tail = header->data_tail;
2270 __u64 data_head = header->data_head;
2271 void *base, *begin, *end;
2272 int ret;
2273
2274 asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
2275 if (data_head == data_tail)
2276 return LIBBPF_PERF_EVENT_CONT;
2277
2278 base = ((char *)header) + page_size;
2279
2280 begin = base + data_tail % size;
2281 end = base + data_head % size;
2282
2283 while (begin != end) {
2284 struct perf_event_header *ehdr;
2285
2286 ehdr = begin;
2287 if (begin + ehdr->size > base + size) {
2288 long len = base + size - begin;
2289
2290 if (*buf_len < ehdr->size) {
2291 free(*buf);
2292 *buf = malloc(ehdr->size);
2293 if (!*buf) {
2294 ret = LIBBPF_PERF_EVENT_ERROR;
2295 break;
2296 }
2297 *buf_len = ehdr->size;
2298 }
2299
2300 memcpy(*buf, begin, len);
2301 memcpy(*buf + len, base, ehdr->size - len);
2302 ehdr = (void *)*buf;
2303 begin = base + ehdr->size - len;
2304 } else if (begin + ehdr->size == base + size) {
2305 begin = base;
2306 } else {
2307 begin += ehdr->size;
2308 }
2309
2310 ret = fn(ehdr, priv);
2311 if (ret != LIBBPF_PERF_EVENT_CONT)
2312 break;
2313
2314 data_tail += ehdr->size;
2315 }
2316
2317 __sync_synchronize(); /* smp_mb() */
2318 header->data_tail = data_tail;
2319
2320 return ret;
2321}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index a3a62a583f27..09976531aa74 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -52,8 +52,8 @@ enum libbpf_errno {
52int libbpf_strerror(int err, char *buf, size_t size); 52int libbpf_strerror(int err, char *buf, size_t size);
53 53
54/* 54/*
55 * In include/linux/compiler-gcc.h, __printf is defined. However 55 * __printf is defined in include/linux/compiler-gcc.h. However,
56 * it should be better if libbpf.h doesn't depend on Linux header file. 56 * it would be better if libbpf.h didn't depend on Linux header files.
57 * So instead of __printf, here we use gcc attribute directly. 57 * So instead of __printf, here we use gcc attribute directly.
58 */ 58 */
59typedef int (*libbpf_print_fn_t)(const char *, ...) 59typedef int (*libbpf_print_fn_t)(const char *, ...)
@@ -78,6 +78,7 @@ int bpf_object__load(struct bpf_object *obj);
78int bpf_object__unload(struct bpf_object *obj); 78int bpf_object__unload(struct bpf_object *obj);
79const char *bpf_object__name(struct bpf_object *obj); 79const char *bpf_object__name(struct bpf_object *obj);
80unsigned int bpf_object__kversion(struct bpf_object *obj); 80unsigned int bpf_object__kversion(struct bpf_object *obj);
81int bpf_object__btf_fd(const struct bpf_object *obj);
81 82
82struct bpf_object *bpf_object__next(struct bpf_object *prev); 83struct bpf_object *bpf_object__next(struct bpf_object *prev);
83#define bpf_object__for_each_safe(pos, tmp) \ 84#define bpf_object__for_each_safe(pos, tmp) \
@@ -91,7 +92,7 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv,
91 bpf_object_clear_priv_t clear_priv); 92 bpf_object_clear_priv_t clear_priv);
92void *bpf_object__priv(struct bpf_object *prog); 93void *bpf_object__priv(struct bpf_object *prog);
93 94
94/* Accessors of bpf_program. */ 95/* Accessors of bpf_program */
95struct bpf_program; 96struct bpf_program;
96struct bpf_program *bpf_program__next(struct bpf_program *prog, 97struct bpf_program *bpf_program__next(struct bpf_program *prog,
97 struct bpf_object *obj); 98 struct bpf_object *obj);
@@ -120,28 +121,28 @@ struct bpf_insn;
120 121
121/* 122/*
122 * Libbpf allows callers to adjust BPF programs before being loaded 123 * Libbpf allows callers to adjust BPF programs before being loaded
123 * into kernel. One program in an object file can be transform into 124 * into kernel. One program in an object file can be transformed into
124 * multiple variants to be attached to different code. 125 * multiple variants to be attached to different hooks.
125 * 126 *
126 * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd 127 * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
127 * are APIs for this propose. 128 * form an API for this purpose.
128 * 129 *
129 * - bpf_program_prep_t: 130 * - bpf_program_prep_t:
130 * It defines 'preprocessor', which is a caller defined function 131 * Defines a 'preprocessor', which is a caller defined function
131 * passed to libbpf through bpf_program__set_prep(), and will be 132 * passed to libbpf through bpf_program__set_prep(), and will be
132 * called before program is loaded. The processor should adjust 133 * called before program is loaded. The processor should adjust
133 * the program one time for each instances according to the number 134 * the program one time for each instance according to the instance id
134 * passed to it. 135 * passed to it.
135 * 136 *
136 * - bpf_program__set_prep: 137 * - bpf_program__set_prep:
137 * Attachs a preprocessor to a BPF program. The number of instances 138 * Attaches a preprocessor to a BPF program. The number of instances
138 * whould be created is also passed through this function. 139 * that should be created is also passed through this function.
139 * 140 *
140 * - bpf_program__nth_fd: 141 * - bpf_program__nth_fd:
141 * After the program is loaded, get resuling fds from bpf program for 142 * After the program is loaded, get resulting FD of a given instance
142 * each instances. 143 * of the BPF program.
143 * 144 *
144 * If bpf_program__set_prep() is not used, the program whould be loaded 145 * If bpf_program__set_prep() is not used, the program would be loaded
145 * without adjustment during bpf_object__load(). The program has only 146 * without adjustment during bpf_object__load(). The program has only
146 * one instance. In this case bpf_program__fd(prog) is equal to 147 * one instance. In this case bpf_program__fd(prog) is equal to
147 * bpf_program__nth_fd(prog, 0). 148 * bpf_program__nth_fd(prog, 0).
@@ -155,7 +156,7 @@ struct bpf_prog_prep_result {
155 struct bpf_insn *new_insn_ptr; 156 struct bpf_insn *new_insn_ptr;
156 int new_insn_cnt; 157 int new_insn_cnt;
157 158
158 /* If not NULL, result fd is set to it */ 159 /* If not NULL, result FD is written to it. */
159 int *pfd; 160 int *pfd;
160}; 161};
161 162
@@ -168,8 +169,8 @@ struct bpf_prog_prep_result {
168 * - res: Output parameter, result of transformation. 169 * - res: Output parameter, result of transformation.
169 * 170 *
170 * Return value: 171 * Return value:
171 * - Zero: pre-processing success. 172 * - Zero: pre-processing success.
172 * - Non-zero: pre-processing, stop loading. 173 * - Non-zero: pre-processing error, stop loading.
173 */ 174 */
174typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, 175typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
175 struct bpf_insn *insns, int insns_cnt, 176 struct bpf_insn *insns, int insns_cnt,
@@ -181,19 +182,23 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
181int bpf_program__nth_fd(struct bpf_program *prog, int n); 182int bpf_program__nth_fd(struct bpf_program *prog, int n);
182 183
183/* 184/*
184 * Adjust type of bpf program. Default is kprobe. 185 * Adjust type of BPF program. Default is kprobe.
185 */ 186 */
186int bpf_program__set_socket_filter(struct bpf_program *prog); 187int bpf_program__set_socket_filter(struct bpf_program *prog);
187int bpf_program__set_tracepoint(struct bpf_program *prog); 188int bpf_program__set_tracepoint(struct bpf_program *prog);
189int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
188int bpf_program__set_kprobe(struct bpf_program *prog); 190int bpf_program__set_kprobe(struct bpf_program *prog);
189int bpf_program__set_sched_cls(struct bpf_program *prog); 191int bpf_program__set_sched_cls(struct bpf_program *prog);
190int bpf_program__set_sched_act(struct bpf_program *prog); 192int bpf_program__set_sched_act(struct bpf_program *prog);
191int bpf_program__set_xdp(struct bpf_program *prog); 193int bpf_program__set_xdp(struct bpf_program *prog);
192int bpf_program__set_perf_event(struct bpf_program *prog); 194int bpf_program__set_perf_event(struct bpf_program *prog);
193void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type); 195void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
196void bpf_program__set_expected_attach_type(struct bpf_program *prog,
197 enum bpf_attach_type type);
194 198
195bool bpf_program__is_socket_filter(struct bpf_program *prog); 199bool bpf_program__is_socket_filter(struct bpf_program *prog);
196bool bpf_program__is_tracepoint(struct bpf_program *prog); 200bool bpf_program__is_tracepoint(struct bpf_program *prog);
201bool bpf_program__is_raw_tracepoint(struct bpf_program *prog);
197bool bpf_program__is_kprobe(struct bpf_program *prog); 202bool bpf_program__is_kprobe(struct bpf_program *prog);
198bool bpf_program__is_sched_cls(struct bpf_program *prog); 203bool bpf_program__is_sched_cls(struct bpf_program *prog);
199bool bpf_program__is_sched_act(struct bpf_program *prog); 204bool bpf_program__is_sched_act(struct bpf_program *prog);
@@ -201,10 +206,10 @@ bool bpf_program__is_xdp(struct bpf_program *prog);
201bool bpf_program__is_perf_event(struct bpf_program *prog); 206bool bpf_program__is_perf_event(struct bpf_program *prog);
202 207
203/* 208/*
204 * We don't need __attribute__((packed)) now since it is 209 * No need for __attribute__((packed)), all members of 'bpf_map_def'
205 * unnecessary for 'bpf_map_def' because they are all aligned. 210 * are all aligned. In addition, using __attribute__((packed))
206 * In addition, using it will trigger -Wpacked warning message, 211 * would trigger a -Wpacked warning message, and lead to an error
207 * and will be treated as an error due to -Werror. 212 * if -Werror is set.
208 */ 213 */
209struct bpf_map_def { 214struct bpf_map_def {
210 unsigned int type; 215 unsigned int type;
@@ -215,8 +220,8 @@ struct bpf_map_def {
215}; 220};
216 221
217/* 222/*
218 * There is another 'struct bpf_map' in include/linux/map.h. However, 223 * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel,
219 * it is not a uapi header so no need to consider name clash. 224 * so no need to worry about a name clash.
220 */ 225 */
221struct bpf_map; 226struct bpf_map;
222struct bpf_map * 227struct bpf_map *
@@ -224,7 +229,7 @@ bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
224 229
225/* 230/*
226 * Get bpf_map through the offset of corresponding struct bpf_map_def 231 * Get bpf_map through the offset of corresponding struct bpf_map_def
227 * in the bpf object file. 232 * in the BPF object file.
228 */ 233 */
229struct bpf_map * 234struct bpf_map *
230bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); 235bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
@@ -239,6 +244,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
239int bpf_map__fd(struct bpf_map *map); 244int bpf_map__fd(struct bpf_map *map);
240const struct bpf_map_def *bpf_map__def(struct bpf_map *map); 245const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
241const char *bpf_map__name(struct bpf_map *map); 246const char *bpf_map__name(struct bpf_map *map);
247uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map);
248uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map);
242 249
243typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); 250typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
244int bpf_map__set_priv(struct bpf_map *map, void *priv, 251int bpf_map__set_priv(struct bpf_map *map, void *priv,
@@ -252,6 +259,7 @@ struct bpf_prog_load_attr {
252 const char *file; 259 const char *file;
253 enum bpf_prog_type prog_type; 260 enum bpf_prog_type prog_type;
254 enum bpf_attach_type expected_attach_type; 261 enum bpf_attach_type expected_attach_type;
262 int ifindex;
255}; 263};
256 264
257int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, 265int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
@@ -260,4 +268,17 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
260 struct bpf_object **pobj, int *prog_fd); 268 struct bpf_object **pobj, int *prog_fd);
261 269
262int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); 270int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
271
272enum bpf_perf_event_ret {
273 LIBBPF_PERF_EVENT_DONE = 0,
274 LIBBPF_PERF_EVENT_ERROR = -1,
275 LIBBPF_PERF_EVENT_CONT = -2,
276};
277
278typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event,
279 void *priv);
280int bpf_perf_event_read_simple(void *mem, unsigned long size,
281 unsigned long page_size,
282 void **buf, size_t *buf_len,
283 bpf_perf_event_print_t fn, void *priv);
263#endif 284#endif
diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
index 689b6a130dd7..96d830545bbb 100644
--- a/tools/lib/symbol/kallsyms.c
+++ b/tools/lib/symbol/kallsyms.c
@@ -10,6 +10,12 @@ u8 kallsyms2elf_type(char type)
10 return (type == 't' || type == 'w') ? STT_FUNC : STT_OBJECT; 10 return (type == 't' || type == 'w') ? STT_FUNC : STT_OBJECT;
11} 11}
12 12
13bool kallsyms__is_function(char symbol_type)
14{
15 symbol_type = toupper(symbol_type);
16 return symbol_type == 'T' || symbol_type == 'W';
17}
18
13int kallsyms__parse(const char *filename, void *arg, 19int kallsyms__parse(const char *filename, void *arg,
14 int (*process_symbol)(void *arg, const char *name, 20 int (*process_symbol)(void *arg, const char *name,
15 char type, u64 start)) 21 char type, u64 start))
diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h
index bc40101d72c1..72ab9870454b 100644
--- a/tools/lib/symbol/kallsyms.h
+++ b/tools/lib/symbol/kallsyms.h
@@ -20,6 +20,8 @@ static inline u8 kallsyms2elf_binding(char type)
20 20
21u8 kallsyms2elf_type(char type); 21u8 kallsyms2elf_type(char type);
22 22
23bool kallsyms__is_function(char symbol_type);
24
23int kallsyms__parse(const char *filename, void *arg, 25int kallsyms__parse(const char *filename, void *arg,
24 int (*process_symbol)(void *arg, const char *name, 26 int (*process_symbol)(void *arg, const char *name,
25 char type, u64 start)); 27 char type, u64 start));
diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt
index 956b1ae4aafb..33ba98d72b16 100644
--- a/tools/memory-model/Documentation/cheatsheet.txt
+++ b/tools/memory-model/Documentation/cheatsheet.txt
@@ -1,6 +1,6 @@
1 Prior Operation Subsequent Operation 1 Prior Operation Subsequent Operation
2 --------------- --------------------------- 2 --------------- ---------------------------
3 C Self R W RWM Self R W DR DW RMW SV 3 C Self R W RMW Self R W DR DW RMW SV
4 -- ---- - - --- ---- - - -- -- --- -- 4 -- ---- - - --- ---- - - -- -- --- --
5 5
6Store, e.g., WRITE_ONCE() Y Y 6Store, e.g., WRITE_ONCE() Y Y
@@ -14,7 +14,7 @@ smp_wmb() Y W Y Y W
14smp_mb() & synchronize_rcu() CP Y Y Y Y Y Y Y Y 14smp_mb() & synchronize_rcu() CP Y Y Y Y Y Y Y Y
15Successful full non-void RMW CP Y Y Y Y Y Y Y Y Y Y Y 15Successful full non-void RMW CP Y Y Y Y Y Y Y Y Y Y Y
16smp_mb__before_atomic() CP Y Y Y a a a a Y 16smp_mb__before_atomic() CP Y Y Y a a a a Y
17smp_mb__after_atomic() CP a a Y Y Y Y Y 17smp_mb__after_atomic() CP a a Y Y Y Y Y Y
18 18
19 19
20Key: C: Ordering is cumulative 20Key: C: Ordering is cumulative
@@ -26,4 +26,5 @@ Key: C: Ordering is cumulative
26 DR: Dependent read (address dependency) 26 DR: Dependent read (address dependency)
27 DW: Dependent write (address, data, or control dependency) 27 DW: Dependent write (address, data, or control dependency)
28 RMW: Atomic read-modify-write operation 28 RMW: Atomic read-modify-write operation
29 SV Same-variable access 29 SELF: Orders self, as opposed to accesses before and/or after
30 SV: Orders later accesses to the same variable
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index a727c82bd434..1b09f3175a1f 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -27,7 +27,7 @@ Explanation of the Linux-Kernel Memory Consistency Model
27 19. AND THEN THERE WAS ALPHA 27 19. AND THEN THERE WAS ALPHA
28 20. THE HAPPENS-BEFORE RELATION: hb 28 20. THE HAPPENS-BEFORE RELATION: hb
29 21. THE PROPAGATES-BEFORE RELATION: pb 29 21. THE PROPAGATES-BEFORE RELATION: pb
30 22. RCU RELATIONS: link, gp-link, rscs-link, and rcu-path 30 22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
31 23. ODDS AND ENDS 31 23. ODDS AND ENDS
32 32
33 33
@@ -1451,8 +1451,8 @@ they execute means that it cannot have cycles. This requirement is
1451the content of the LKMM's "propagation" axiom. 1451the content of the LKMM's "propagation" axiom.
1452 1452
1453 1453
1454RCU RELATIONS: link, gp-link, rscs-link, and rcu-path 1454RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
1455----------------------------------------------------- 1455----------------------------------------------------
1456 1456
1457RCU (Read-Copy-Update) is a powerful synchronization mechanism. It 1457RCU (Read-Copy-Update) is a powerful synchronization mechanism. It
1458rests on two concepts: grace periods and read-side critical sections. 1458rests on two concepts: grace periods and read-side critical sections.
@@ -1509,8 +1509,8 @@ y, which occurs before the end of the critical section, did not
1509propagate to P1 before the end of the grace period, violating the 1509propagate to P1 before the end of the grace period, violating the
1510Guarantee. 1510Guarantee.
1511 1511
1512In the kernel's implementations of RCU, the business about stores 1512In the kernel's implementations of RCU, the requirements for stores
1513propagating to every CPU is realized by placing strong fences at 1513to propagate to every CPU are fulfilled by placing strong fences at
1514suitable places in the RCU-related code. Thus, if a critical section 1514suitable places in the RCU-related code. Thus, if a critical section
1515starts before a grace period does then the critical section's CPU will 1515starts before a grace period does then the critical section's CPU will
1516execute an smp_mb() fence after the end of the critical section and 1516execute an smp_mb() fence after the end of the critical section and
@@ -1523,72 +1523,124 @@ executes.
1523What exactly do we mean by saying that a critical section "starts 1523What exactly do we mean by saying that a critical section "starts
1524before" or "ends after" a grace period? Some aspects of the meaning 1524before" or "ends after" a grace period? Some aspects of the meaning
1525are pretty obvious, as in the example above, but the details aren't 1525are pretty obvious, as in the example above, but the details aren't
1526entirely clear. The LKMM formalizes this notion by means of a 1526entirely clear. The LKMM formalizes this notion by means of the
1527relation with the unfortunately generic name "link". It is a very 1527rcu-link relation. rcu-link encompasses a very general notion of
1528general relation; among other things, X ->link Z includes cases where 1528"before": Among other things, X ->rcu-link Z includes cases where X
1529X happens-before or is equal to some event Y which is equal to or 1529happens-before or is equal to some event Y which is equal to or comes
1530comes before Z in the coherence order. Taking Y = Z, this says that 1530before Z in the coherence order. When Y = Z this says that X ->rfe Z
1531X ->rfe Z implies X ->link Z, and taking Y = X, it says that X ->fr Z 1531implies X ->rcu-link Z. In addition, when Y = X it says that X ->fr Z
1532and X ->co Z each imply X ->link Z. 1532and X ->co Z each imply X ->rcu-link Z.
1533 1533
1534The formal definition of the link relation is more than a little 1534The formal definition of the rcu-link relation is more than a little
1535obscure, and we won't give it here. It is closely related to the pb 1535obscure, and we won't give it here. It is closely related to the pb
1536relation, and the details don't matter unless you want to comb through 1536relation, and the details don't matter unless you want to comb through
1537a somewhat lengthy formal proof. Pretty much all you need to know 1537a somewhat lengthy formal proof. Pretty much all you need to know
1538about link is the information in the preceding paragraph. 1538about rcu-link is the information in the preceding paragraph.
1539 1539
1540The LKMM goes on to define the gp-link and rscs-link relations. They 1540The LKMM also defines the gp and rscs relations. They bring grace
1541bring grace periods and read-side critical sections into the picture, 1541periods and read-side critical sections into the picture, in the
1542in the following way: 1542following way:
1543 1543
1544 E ->gp-link F means there is a synchronize_rcu() fence event S 1544 E ->gp F means there is a synchronize_rcu() fence event S such
1545 and an event X such that E ->po S, either S ->po X or S = X, 1545 that E ->po S and either S ->po F or S = F. In simple terms,
1546 and X ->link F. In other words, E and F are connected by a 1546 there is a grace period po-between E and F.
1547 grace period followed by an instance of link. 1547
1548 1548 E ->rscs F means there is a critical section delimited by an
1549 E ->rscs-link F means there is a critical section delimited by 1549 rcu_read_lock() fence L and an rcu_read_unlock() fence U, such
1550 an rcu_read_lock() fence L and an rcu_read_unlock() fence U, 1550 that E ->po U and either L ->po F or L = F. You can think of
1551 and an event X such that E ->po U, either L ->po X or L = X, 1551 this as saying that E and F are in the same critical section
1552 and X ->link F. Roughly speaking, this says that some event 1552 (in fact, it also allows E to be po-before the start of the
1553 in the same critical section as E is connected by link to F. 1553 critical section and F to be po-after the end).
1554 1554
1555If we think of the link relation as standing for an extended "before", 1555If we think of the rcu-link relation as standing for an extended
1556then E ->gp-link F says that E executes before a grace period which 1556"before", then X ->gp Y ->rcu-link Z says that X executes before a
1557ends before F executes. (In fact it says more than this, because it 1557grace period which ends before Z executes. (In fact it covers more
1558includes cases where E executes before a grace period and some store 1558than this, because it also includes cases where X executes before a
1559propagates to F's CPU before F executes and doesn't propagate to some 1559grace period and some store propagates to Z's CPU before Z executes
1560other CPU until after the grace period ends.) Similarly, 1560but doesn't propagate to some other CPU until after the grace period
1561E ->rscs-link F says that E is part of (or before the start of) a 1561ends.) Similarly, X ->rscs Y ->rcu-link Z says that X is part of (or
1562critical section which starts before F executes. 1562before the start of) a critical section which starts before Z
1563executes.
1564
1565The LKMM goes on to define the rcu-fence relation as a sequence of gp
1566and rscs links separated by rcu-link links, in which the number of gp
1567links is >= the number of rscs links. For example:
1568
1569 X ->gp Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
1570
1571would imply that X ->rcu-fence V, because this sequence contains two
1572gp links and only one rscs link. (It also implies that X ->rcu-fence T
1573and Z ->rcu-fence V.) On the other hand:
1574
1575 X ->rscs Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
1576
1577does not imply X ->rcu-fence V, because the sequence contains only
1578one gp link but two rscs links.
1579
1580The rcu-fence relation is important because the Grace Period Guarantee
1581means that rcu-fence acts kind of like a strong fence. In particular,
1582if W is a write and we have W ->rcu-fence Z, the Guarantee says that W
1583will propagate to every CPU before Z executes.
1584
1585To prove this in full generality requires some intellectual effort.
1586We'll consider just a very simple case:
1587
1588 W ->gp X ->rcu-link Y ->rscs Z.
1589
1590This formula means that there is a grace period G and a critical
1591section C such that:
1592
1593 1. W is po-before G;
1594
1595 2. X is equal to or po-after G;
1596
1597 3. X comes "before" Y in some sense;
1598
1599 4. Y is po-before the end of C;
1600
1601 5. Z is equal to or po-after the start of C.
1602
1603From 2 - 4 we deduce that the grace period G ends before the critical
1604section C. Then the second part of the Grace Period Guarantee says
1605not only that G starts before C does, but also that W (which executes
1606on G's CPU before G starts) must propagate to every CPU before C
1607starts. In particular, W propagates to every CPU before Z executes
1608(or finishes executing, in the case where Z is equal to the
1609rcu_read_lock() fence event which starts C.) This sort of reasoning
1610can be expanded to handle all the situations covered by rcu-fence.
1611
1612Finally, the LKMM defines the RCU-before (rb) relation in terms of
1613rcu-fence. This is done in essentially the same way as the pb
1614relation was defined in terms of strong-fence. We will omit the
1615details; the end result is that E ->rb F implies E must execute before
1616F, just as E ->pb F does (and for much the same reasons).
1563 1617
1564Putting this all together, the LKMM expresses the Grace Period 1618Putting this all together, the LKMM expresses the Grace Period
1565Guarantee by requiring that there are no cycles consisting of gp-link 1619Guarantee by requiring that the rb relation does not contain a cycle.
1566and rscs-link connections in which the number of gp-link instances is 1620Equivalently, this "rcu" axiom requires that there are no events E and
1567>= the number of rscs-link instances. It does this by defining the 1621F with E ->rcu-link F ->rcu-fence E. Or to put it a third way, the
1568rcu-path relation to link events E and F whenever it is possible to 1622axiom requires that there are no cycles consisting of gp and rscs
1569pass from E to F by a sequence of gp-link and rscs-link connections 1623alternating with rcu-link, where the number of gp links is >= the
1570with at least as many of the former as the latter. The LKMM's "rcu" 1624number of rscs links.
1571axiom then says that there are no events E such that E ->rcu-path E. 1625
1572 1626Justifying the axiom isn't easy, but it is in fact a valid
1573Justifying this axiom takes some intellectual effort, but it is in 1627formalization of the Grace Period Guarantee. We won't attempt to go
1574fact a valid formalization of the Grace Period Guarantee. We won't 1628through the detailed argument, but the following analysis gives a
1575attempt to go through the detailed argument, but the following 1629taste of what is involved. Suppose we have a violation of the first
1576analysis gives a taste of what is involved. Suppose we have a 1630part of the Guarantee: A critical section starts before a grace
1577violation of the first part of the Guarantee: A critical section 1631period, and some store propagates to the critical section's CPU before
1578starts before a grace period, and some store propagates to the 1632the end of the critical section but doesn't propagate to some other
1579critical section's CPU before the end of the critical section but 1633CPU until after the end of the grace period.
1580doesn't propagate to some other CPU until after the end of the grace
1581period.
1582 1634
1583Putting symbols to these ideas, let L and U be the rcu_read_lock() and 1635Putting symbols to these ideas, let L and U be the rcu_read_lock() and
1584rcu_read_unlock() fence events delimiting the critical section in 1636rcu_read_unlock() fence events delimiting the critical section in
1585question, and let S be the synchronize_rcu() fence event for the grace 1637question, and let S be the synchronize_rcu() fence event for the grace
1586period. Saying that the critical section starts before S means there 1638period. Saying that the critical section starts before S means there
1587are events E and F where E is po-after L (which marks the start of the 1639are events E and F where E is po-after L (which marks the start of the
1588critical section), E is "before" F in the sense of the link relation, 1640critical section), E is "before" F in the sense of the rcu-link
1589and F is po-before the grace period S: 1641relation, and F is po-before the grace period S:
1590 1642
1591 L ->po E ->link F ->po S. 1643 L ->po E ->rcu-link F ->po S.
1592 1644
1593Let W be the store mentioned above, let Z come before the end of the 1645Let W be the store mentioned above, let Z come before the end of the
1594critical section and witness that W propagates to the critical 1646critical section and witness that W propagates to the critical
@@ -1600,16 +1652,19 @@ some event X which is po-after S. Symbolically, this amounts to:
1600 1652
1601The fr link from Y to W indicates that W has not propagated to Y's CPU 1653The fr link from Y to W indicates that W has not propagated to Y's CPU
1602at the time that Y executes. From this, it can be shown (see the 1654at the time that Y executes. From this, it can be shown (see the
1603discussion of the link relation earlier) that X and Z are connected by 1655discussion of the rcu-link relation earlier) that X and Z are related
1604link, yielding: 1656by rcu-link, yielding:
1657
1658 S ->po X ->rcu-link Z ->po U.
1659
1660The formulas say that S is po-between F and X, hence F ->gp X. They
1661also say that Z comes before the end of the critical section and E
1662comes after its start, hence Z ->rscs E. From all this we obtain:
1605 1663
1606 S ->po X ->link Z ->po U. 1664 F ->gp X ->rcu-link Z ->rscs E ->rcu-link F,
1607 1665
1608These formulas say that S is po-between F and X, hence F ->gp-link Z 1666a forbidden cycle. Thus the "rcu" axiom rules out this violation of
1609via X. They also say that Z comes before the end of the critical 1667the Grace Period Guarantee.
1610section and E comes after its start, hence Z ->rscs-link F via E. But
1611now we have a forbidden cycle: F ->gp-link Z ->rscs-link F. Thus the
1612"rcu" axiom rules out this violation of the Grace Period Guarantee.
1613 1668
1614For something a little more down-to-earth, let's see how the axiom 1669For something a little more down-to-earth, let's see how the axiom
1615works out in practice. Consider the RCU code example from above, this 1670works out in practice. Consider the RCU code example from above, this
@@ -1635,18 +1690,18 @@ time with statement labels added to the memory access instructions:
1635 } 1690 }
1636 1691
1637 1692
1638If r2 = 0 at the end then P0's store at X overwrites the value 1693If r2 = 0 at the end then P0's store at X overwrites the value that
1639that P1's load at Z reads from, so we have Z ->fre X and thus 1694P1's load at Z reads from, so we have Z ->fre X and thus Z ->rcu-link X.
1640Z ->link X. In addition, there is a synchronize_rcu() between Y and 1695In addition, there is a synchronize_rcu() between Y and Z, so therefore
1641Z, so therefore we have Y ->gp-link X. 1696we have Y ->gp Z.
1642 1697
1643If r1 = 1 at the end then P1's load at Y reads from P0's store at W, 1698If r1 = 1 at the end then P1's load at Y reads from P0's store at W,
1644so we have W ->link Y. In addition, W and X are in the same critical 1699so we have W ->rcu-link Y. In addition, W and X are in the same critical
1645section, so therefore we have X ->rscs-link Y. 1700section, so therefore we have X ->rscs W.
1646 1701
1647This gives us a cycle, Y ->gp-link X ->rscs-link Y, with one gp-link 1702Then X ->rscs W ->rcu-link Y ->gp Z ->rcu-link X is a forbidden cycle,
1648and one rscs-link, violating the "rcu" axiom. Hence the outcome is 1703violating the "rcu" axiom. Hence the outcome is not allowed by the
1649not allowed by the LKMM, as we would expect. 1704LKMM, as we would expect.
1650 1705
1651For contrast, let's see what can happen in a more complicated example: 1706For contrast, let's see what can happen in a more complicated example:
1652 1707
@@ -1682,15 +1737,11 @@ For contrast, let's see what can happen in a more complicated example:
1682 } 1737 }
1683 1738
1684If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows 1739If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows
1685that W ->rscs-link Y via X, Y ->gp-link U via Z, and U ->rscs-link W 1740that W ->rscs X ->rcu-link Y ->gp Z ->rcu-link U ->rscs V ->rcu-link W.
1686via V. And just as before, this gives a cycle: 1741However this cycle is not forbidden, because the sequence of relations
1687 1742contains fewer instances of gp (one) than of rscs (two). Consequently
1688 W ->rscs-link Y ->gp-link U ->rscs-link W. 1743the outcome is allowed by the LKMM. The following instruction timing
1689 1744diagram shows how it might actually occur:
1690However, this cycle has fewer gp-link instances than rscs-link
1691instances, and consequently the outcome is not forbidden by the LKMM.
1692The following instruction timing diagram shows how it might actually
1693occur:
1694 1745
1695P0 P1 P2 1746P0 P1 P2
1696-------------------- -------------------- -------------------- 1747-------------------- -------------------- --------------------
diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt
index ba2e34c2ec3f..b177f3e4a614 100644
--- a/tools/memory-model/Documentation/references.txt
+++ b/tools/memory-model/Documentation/references.txt
@@ -63,15 +63,22 @@ o Shaked Flur, Susmit Sarkar, Christopher Pulte, Kyndylan Nienhuis,
63 Principles of Programming Languages (POPL 2017). ACM, New York, 63 Principles of Programming Languages (POPL 2017). ACM, New York,
64 NY, USA, 429–442. 64 NY, USA, 429–442.
65 65
66o Christopher Pulte, Shaked Flur, Will Deacon, Jon French,
67 Susmit Sarkar, and Peter Sewell. 2018. "Simplifying ARM concurrency:
68 multicopy-atomic axiomatic and operational models for ARMv8". In
69 Proceedings of the ACM on Programming Languages, Volume 2, Issue
70 POPL, Article No. 19. ACM, New York, NY, USA.
71
66 72
67Linux-kernel memory model 73Linux-kernel memory model
68========================= 74=========================
69 75
70o Andrea Parri, Alan Stern, Luc Maranget, Paul E. McKenney, 76o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
71 and Jade Alglave. 2017. "A formal model of 77 Alan Stern. 2018. "Frightening small children and disconcerting
72 Linux-kernel memory ordering - companion webpage". 78 grown-ups: Concurrency in the Linux kernel". In Proceedings of
73 http://moscova.inria.fr/∼maranget/cats7/linux/. (2017). [Online; 79 the 23rd International Conference on Architectural Support for
74 accessed 30-January-2017]. 80 Programming Languages and Operating Systems (ASPLOS 2018). ACM,
81 New York, NY, USA, 405-418. Webpage: http://diy.inria.fr/linux/.
75 82
76o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and 83o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
77 Alan Stern. 2017. "A formal kernel memory-ordering model (part 1)" 84 Alan Stern. 2017. "A formal kernel memory-ordering model (part 1)"
diff --git a/tools/memory-model/README b/tools/memory-model/README
index 0b3a5f3c9ccd..734f7feaa5dc 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -20,7 +20,7 @@ that litmus test to be exercised within the Linux kernel.
20REQUIREMENTS 20REQUIREMENTS
21============ 21============
22 22
23Version 7.48 of the "herd7" and "klitmus7" tools must be downloaded 23Version 7.49 of the "herd7" and "klitmus7" tools must be downloaded
24separately: 24separately:
25 25
26 https://github.com/herd/herdtools7 26 https://github.com/herd/herdtools7
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index 432c7cf71b23..64f5740e0e75 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -5,10 +5,10 @@
5 * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>, 5 * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
6 * Andrea Parri <parri.andrea@gmail.com> 6 * Andrea Parri <parri.andrea@gmail.com>
7 * 7 *
8 * An earlier version of this file appears in the companion webpage for 8 * An earlier version of this file appeared in the companion webpage for
9 * "Frightening small children and disconcerting grown-ups: Concurrency 9 * "Frightening small children and disconcerting grown-ups: Concurrency
10 * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern, 10 * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
11 * which is to appear in ASPLOS 2018. 11 * which appeared in ASPLOS 2018.
12 *) 12 *)
13 13
14"Linux-kernel memory consistency model" 14"Linux-kernel memory consistency model"
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index df97db03b6c2..59b5cbe6b624 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -5,10 +5,10 @@
5 * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>, 5 * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
6 * Andrea Parri <parri.andrea@gmail.com> 6 * Andrea Parri <parri.andrea@gmail.com>
7 * 7 *
8 * An earlier version of this file appears in the companion webpage for 8 * An earlier version of this file appeared in the companion webpage for
9 * "Frightening small children and disconcerting grown-ups: Concurrency 9 * "Frightening small children and disconcerting grown-ups: Concurrency
10 * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern, 10 * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
11 * which is to appear in ASPLOS 2018. 11 * which appeared in ASPLOS 2018.
12 *) 12 *)
13 13
14"Linux-kernel memory consistency model" 14"Linux-kernel memory consistency model"
@@ -100,22 +100,29 @@ let rscs = po ; crit^-1 ; po?
100 * one but two non-rf relations, but only in conjunction with an RCU 100 * one but two non-rf relations, but only in conjunction with an RCU
101 * read-side critical section. 101 * read-side critical section.
102 *) 102 *)
103let link = hb* ; pb* ; prop 103let rcu-link = hb* ; pb* ; prop
104 104
105(* Chains that affect the RCU grace-period guarantee *) 105(*
106let gp-link = gp ; link 106 * Any sequence containing at least as many grace periods as RCU read-side
107let rscs-link = rscs ; link 107 * critical sections (joined by rcu-link) acts as a generalized strong fence.
108 *)
109let rec rcu-fence = gp |
110 (gp ; rcu-link ; rscs) |
111 (rscs ; rcu-link ; gp) |
112 (gp ; rcu-link ; rcu-fence ; rcu-link ; rscs) |
113 (rscs ; rcu-link ; rcu-fence ; rcu-link ; gp) |
114 (rcu-fence ; rcu-link ; rcu-fence)
115
116(* rb orders instructions just as pb does *)
117let rb = prop ; rcu-fence ; hb* ; pb*
118
119irreflexive rb as rcu
108 120
109(* 121(*
110 * A cycle containing at least as many grace periods as RCU read-side 122 * The happens-before, propagation, and rcu constraints are all
111 * critical sections is forbidden. 123 * expressions of temporal ordering. They could be replaced by
124 * a single constraint on an "executes-before" relation, xb:
125 *
126 * let xb = hb | pb | rb
127 * acyclic xb as executes-before
112 *) 128 *)
113let rec rcu-path =
114 gp-link |
115 (gp-link ; rscs-link) |
116 (rscs-link ; gp-link) |
117 (rcu-path ; rcu-path) |
118 (gp-link ; rcu-path ; rscs-link) |
119 (rscs-link ; rcu-path ; gp-link)
120
121irreflexive rcu-path as rcu
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index 397e4e67e8c8..6fa3eb28d40b 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -1,9 +1,9 @@
1// SPDX-License-Identifier: GPL-2.0+ 1// SPDX-License-Identifier: GPL-2.0+
2// 2//
3// An earlier version of this file appears in the companion webpage for 3// An earlier version of this file appeared in the companion webpage for
4// "Frightening small children and disconcerting grown-ups: Concurrency 4// "Frightening small children and disconcerting grown-ups: Concurrency
5// in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern, 5// in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
6// which is to appear in ASPLOS 2018. 6// which appeared in ASPLOS 2018.
7 7
8// ONCE 8// ONCE
9READ_ONCE(X) __load{once}(X) 9READ_ONCE(X) __load{once}(X)
@@ -14,14 +14,15 @@ smp_store_release(X,V) { __store{release}(*X,V); }
14smp_load_acquire(X) __load{acquire}(*X) 14smp_load_acquire(X) __load{acquire}(*X)
15rcu_assign_pointer(X,V) { __store{release}(X,V); } 15rcu_assign_pointer(X,V) { __store{release}(X,V); }
16rcu_dereference(X) __load{once}(X) 16rcu_dereference(X) __load{once}(X)
17smp_store_mb(X,V) { __store{once}(X,V); __fence{mb}; }
17 18
18// Fences 19// Fences
19smp_mb() { __fence{mb} ; } 20smp_mb() { __fence{mb}; }
20smp_rmb() { __fence{rmb} ; } 21smp_rmb() { __fence{rmb}; }
21smp_wmb() { __fence{wmb} ; } 22smp_wmb() { __fence{wmb}; }
22smp_mb__before_atomic() { __fence{before-atomic} ; } 23smp_mb__before_atomic() { __fence{before-atomic}; }
23smp_mb__after_atomic() { __fence{after-atomic} ; } 24smp_mb__after_atomic() { __fence{after-atomic}; }
24smp_mb__after_spinlock() { __fence{after-spinlock} ; } 25smp_mb__after_spinlock() { __fence{after-spinlock}; }
25 26
26// Exchange 27// Exchange
27xchg(X,V) __xchg{mb}(X,V) 28xchg(X,V) __xchg{mb}(X,V)
@@ -34,26 +35,27 @@ cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
34cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W) 35cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
35 36
36// Spinlocks 37// Spinlocks
37spin_lock(X) { __lock(X) ; } 38spin_lock(X) { __lock(X); }
38spin_unlock(X) { __unlock(X) ; } 39spin_unlock(X) { __unlock(X); }
39spin_trylock(X) __trylock(X) 40spin_trylock(X) __trylock(X)
41spin_is_locked(X) __islocked(X)
40 42
41// RCU 43// RCU
42rcu_read_lock() { __fence{rcu-lock}; } 44rcu_read_lock() { __fence{rcu-lock}; }
43rcu_read_unlock() { __fence{rcu-unlock};} 45rcu_read_unlock() { __fence{rcu-unlock}; }
44synchronize_rcu() { __fence{sync-rcu}; } 46synchronize_rcu() { __fence{sync-rcu}; }
45synchronize_rcu_expedited() { __fence{sync-rcu}; } 47synchronize_rcu_expedited() { __fence{sync-rcu}; }
46 48
47// Atomic 49// Atomic
48atomic_read(X) READ_ONCE(*X) 50atomic_read(X) READ_ONCE(*X)
49atomic_set(X,V) { WRITE_ONCE(*X,V) ; } 51atomic_set(X,V) { WRITE_ONCE(*X,V); }
50atomic_read_acquire(X) smp_load_acquire(X) 52atomic_read_acquire(X) smp_load_acquire(X)
51atomic_set_release(X,V) { smp_store_release(X,V); } 53atomic_set_release(X,V) { smp_store_release(X,V); }
52 54
53atomic_add(V,X) { __atomic_op(X,+,V) ; } 55atomic_add(V,X) { __atomic_op(X,+,V); }
54atomic_sub(V,X) { __atomic_op(X,-,V) ; } 56atomic_sub(V,X) { __atomic_op(X,-,V); }
55atomic_inc(X) { __atomic_op(X,+,1) ; } 57atomic_inc(X) { __atomic_op(X,+,1); }
56atomic_dec(X) { __atomic_op(X,-,1) ; } 58atomic_dec(X) { __atomic_op(X,-,1); }
57 59
58atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V) 60atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V)
59atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V) 61atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V)
diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore
new file mode 100644
index 000000000000..6e2ddc54152f
--- /dev/null
+++ b/tools/memory-model/litmus-tests/.gitignore
@@ -0,0 +1 @@
*.litmus.out
diff --git a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus
index 50d5db9ea983..98a3716efa37 100644
--- a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus
+++ b/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus
@@ -7,7 +7,7 @@ C IRIW+mbonceonces+OnceOnce
7 * between each pairs of reads. In other words, is smp_mb() sufficient to 7 * between each pairs of reads. In other words, is smp_mb() sufficient to
8 * cause two different reading processes to agree on the order of a pair 8 * cause two different reading processes to agree on the order of a pair
9 * of writes, where each write is to a different variable by a different 9 * of writes, where each write is to a different variable by a different
10 * process? 10 * process? This litmus test exercises LKMM's "propagation" rule.
11 *) 11 *)
12 12
13{} 13{}
diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
new file mode 100644
index 000000000000..50f4d62bbf0e
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
@@ -0,0 +1,35 @@
1C MP+polockmbonce+poacquiresilsil
2
3(*
4 * Result: Never
5 *
6 * Do spinlocks combined with smp_mb__after_spinlock() provide order
7 * to outside observers using spin_is_locked() to sense the lock-held
8 * state, ordered by acquire? Note that when the first spin_is_locked()
9 * returns false and the second true, we know that the smp_load_acquire()
10 * executed before the lock was acquired (loosely speaking).
11 *)
12
13{
14}
15
16P0(spinlock_t *lo, int *x)
17{
18 spin_lock(lo);
19 smp_mb__after_spinlock();
20 WRITE_ONCE(*x, 1);
21 spin_unlock(lo);
22}
23
24P1(spinlock_t *lo, int *x)
25{
26 int r1;
27 int r2;
28 int r3;
29
30 r1 = smp_load_acquire(x);
31 r2 = spin_is_locked(lo);
32 r3 = spin_is_locked(lo);
33}
34
35exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
new file mode 100644
index 000000000000..abf81e7a0895
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
@@ -0,0 +1,34 @@
1C MP+polockonce+poacquiresilsil
2
3(*
4 * Result: Sometimes
5 *
6 * Do spinlocks provide order to outside observers using spin_is_locked()
7 * to sense the lock-held state, ordered by acquire? Note that when the
8 * first spin_is_locked() returns false and the second true, we know that
9 * the smp_load_acquire() executed before the lock was acquired (loosely
10 * speaking).
11 *)
12
13{
14}
15
16P0(spinlock_t *lo, int *x)
17{
18 spin_lock(lo);
19 WRITE_ONCE(*x, 1);
20 spin_unlock(lo);
21}
22
23P1(spinlock_t *lo, int *x)
24{
25 int r1;
26 int r2;
27 int r3;
28
29 r1 = smp_load_acquire(x);
30 r2 = spin_is_locked(lo);
31 r3 = spin_is_locked(lo);
32}
33
34exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README
index 04096fb8b8d9..17eb9a8c222d 100644
--- a/tools/memory-model/litmus-tests/README
+++ b/tools/memory-model/litmus-tests/README
@@ -23,7 +23,8 @@ IRIW+mbonceonces+OnceOnce.litmus
23 between each pairs of reads. In other words, is smp_mb() 23 between each pairs of reads. In other words, is smp_mb()
24 sufficient to cause two different reading processes to agree on 24 sufficient to cause two different reading processes to agree on
25 the order of a pair of writes, where each write is to a different 25 the order of a pair of writes, where each write is to a different
26 variable by a different process? 26 variable by a different process? This litmus test is forbidden
27 by LKMM's propagation rule.
27 28
28IRIW+poonceonces+OnceOnce.litmus 29IRIW+poonceonces+OnceOnce.litmus
29 Test of independent reads from independent writes with nothing 30 Test of independent reads from independent writes with nothing
@@ -63,6 +64,16 @@ LB+poonceonces.litmus
63MP+onceassign+derefonce.litmus 64MP+onceassign+derefonce.litmus
64 As below, but with rcu_assign_pointer() and an rcu_dereference(). 65 As below, but with rcu_assign_pointer() and an rcu_dereference().
65 66
67MP+polockmbonce+poacquiresilsil.litmus
68 Protect the access with a lock and an smp_mb__after_spinlock()
69 in one process, and use an acquire load followed by a pair of
70 spin_is_locked() calls in the other process.
71
72MP+polockonce+poacquiresilsil.litmus
73 Protect the access with a lock in one process, and use an
74 acquire load followed by a pair of spin_is_locked() calls
75 in the other process.
76
66MP+polocks.litmus 77MP+polocks.litmus
67 As below, but with the second access of the writer process 78 As below, but with the second access of the writer process
68 and the first access of reader process protected by a lock. 79 and the first access of reader process protected by a lock.
@@ -109,8 +120,10 @@ S+wmbonceonce+poacquireonce.litmus
109 120
110WRC+poonceonces+Once.litmus 121WRC+poonceonces+Once.litmus
111WRC+pooncerelease+rmbonceonce+Once.litmus 122WRC+pooncerelease+rmbonceonce+Once.litmus
112 These two are members of an extension of the MP litmus-test class 123 These two are members of an extension of the MP litmus-test
113 in which the first write is moved to a separate process. 124 class in which the first write is moved to a separate process.
125 The second is forbidden because smp_store_release() is
126 A-cumulative in LKMM.
114 127
115Z6.0+pooncelock+pooncelock+pombonce.litmus 128Z6.0+pooncelock+pooncelock+pombonce.litmus
116 Is the ordering provided by a spin_unlock() and a subsequent 129 Is the ordering provided by a spin_unlock() and a subsequent
diff --git a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus b/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus
index 97fcbffde9a0..ad3448b941e6 100644
--- a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus
+++ b/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus
@@ -5,7 +5,9 @@ C WRC+pooncerelease+rmbonceonce+Once
5 * 5 *
6 * This litmus test is an extension of the message-passing pattern, where 6 * This litmus test is an extension of the message-passing pattern, where
7 * the first write is moved to a separate process. Because it features 7 * the first write is moved to a separate process. Because it features
8 * a release and a read memory barrier, it should be forbidden. 8 * a release and a read memory barrier, it should be forbidden. More
9 * specifically, this litmus test is forbidden because smp_store_release()
10 * is A-cumulative in LKMM.
9 *) 11 *)
10 12
11{} 13{}
diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat
index ba4a4ec6d313..305ded17e741 100644
--- a/tools/memory-model/lock.cat
+++ b/tools/memory-model/lock.cat
@@ -4,46 +4,72 @@
4 * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu> 4 * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>
5 *) 5 *)
6 6
7(* Generate coherence orders and handle lock operations *) 7(*
8 * Generate coherence orders and handle lock operations
9 *
10 * Warning: spin_is_locked() crashes herd7 versions strictly before 7.48.
11 * spin_is_locked() is functional from herd7 version 7.49.
12 *)
8 13
9include "cross.cat" 14include "cross.cat"
10 15
11(* From lock reads to their partner lock writes *)
12let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po)
13let rmw = rmw | lk-rmw
14
15(* 16(*
16 * A paired LKR must always see an unlocked value; spin_lock() calls nested 17 * The lock-related events generated by herd are as follows:
17 * inside a critical section (for the same lock) always deadlock. 18 *
19 * LKR Lock-Read: the read part of a spin_lock() or successful
20 * spin_trylock() read-modify-write event pair
21 * LKW Lock-Write: the write part of a spin_lock() or successful
22 * spin_trylock() RMW event pair
23 * UL Unlock: a spin_unlock() event
24 * LF Lock-Fail: a failed spin_trylock() event
25 * RL Read-Locked: a spin_is_locked() event which returns True
26 * RU Read-Unlocked: a spin_is_locked() event which returns False
27 *
28 * LKR and LKW events always come paired, like all RMW event sequences.
29 *
30 * LKR, LF, RL, and RU are read events; LKR has Acquire ordering.
31 * LKW and UL are write events; UL has Release ordering.
32 * LKW, LF, RL, and RU have no ordering properties.
18 *) 33 *)
19empty ([LKW] ; po-loc ; [domain(lk-rmw)]) \ (po-loc ; [UL] ; po-loc)
20 as lock-nest
21 34
22(* The litmus test is invalid if an LKW event is not part of an RMW pair *) 35(* Backward compatibility *)
23flag ~empty LKW \ range(lk-rmw) as unpaired-LKW 36let RL = try RL with emptyset
37let RU = try RU with emptyset
24 38
25(* This will be allowed if we implement spin_is_locked() *) 39(* Treat RL as a kind of LF: a read with no ordering properties *)
26flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR 40let LF = LF | RL
27 41
28(* There should be no R or W accesses to spinlocks *) 42(* There should be no ordinary R or W accesses to spinlocks *)
29let ALL-LOCKS = LKR | LKW | UL | LF 43let ALL-LOCKS = LKR | LKW | UL | LF | RU
30flag ~empty [M \ IW] ; loc ; [ALL-LOCKS] as mixed-lock-accesses 44flag ~empty [M \ IW] ; loc ; [ALL-LOCKS] as mixed-lock-accesses
31 45
46(* Link Lock-Reads to their RMW-partner Lock-Writes *)
47let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po)
48let rmw = rmw | lk-rmw
49
50(* The litmus test is invalid if an LKR/LKW event is not part of an RMW pair *)
51flag ~empty LKW \ range(lk-rmw) as unpaired-LKW
52flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR
53
54(*
55 * An LKR must always see an unlocked value; spin_lock() calls nested
56 * inside a critical section (for the same lock) always deadlock.
57 *)
58empty ([LKW] ; po-loc ; [LKR]) \ (po-loc ; [UL] ; po-loc) as lock-nest
59
32(* The final value of a spinlock should not be tested *) 60(* The final value of a spinlock should not be tested *)
33flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final 61flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final
34 62
35
36(* 63(*
37 * Put lock operations in their appropriate classes, but leave UL out of W 64 * Put lock operations in their appropriate classes, but leave UL out of W
38 * until after the co relation has been generated. 65 * until after the co relation has been generated.
39 *) 66 *)
40let R = R | LKR | LF 67let R = R | LKR | LF | RU
41let W = W | LKW 68let W = W | LKW
42 69
43let Release = Release | UL 70let Release = Release | UL
44let Acquire = Acquire | LKR 71let Acquire = Acquire | LKR
45 72
46
47(* Match LKW events to their corresponding UL events *) 73(* Match LKW events to their corresponding UL events *)
48let critical = ([LKW] ; po-loc ; [UL]) \ (po-loc ; [LKW | UL] ; po-loc) 74let critical = ([LKW] ; po-loc ; [UL]) \ (po-loc ; [LKW | UL] ; po-loc)
49 75
@@ -53,27 +79,48 @@ flag ~empty UL \ range(critical) as unmatched-unlock
53let UNMATCHED-LKW = LKW \ domain(critical) 79let UNMATCHED-LKW = LKW \ domain(critical)
54empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks 80empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks
55 81
56
57(* rfi for LF events: link each LKW to the LF events in its critical section *) 82(* rfi for LF events: link each LKW to the LF events in its critical section *)
58let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc) 83let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc)
59 84
60(* rfe for LF events *) 85(* rfe for LF events *)
61let all-possible-rfe-lf = 86let all-possible-rfe-lf =
62 (* 87 (*
63 * Given an LF event r, compute the possible rfe edges for that event 88 * Given an LF event r, compute the possible rfe edges for that event
64 * (all those starting from LKW events in other threads), 89 * (all those starting from LKW events in other threads),
65 * and then convert that relation to a set of single-edge relations. 90 * and then convert that relation to a set of single-edge relations.
66 *) 91 *)
67 let possible-rfe-lf r = 92 let possible-rfe-lf r =
68 let pair-to-relation p = p ++ 0 93 let pair-to-relation p = p ++ 0
69 in map pair-to-relation ((LKW * {r}) & loc & ext) 94 in map pair-to-relation ((LKW * {r}) & loc & ext)
70 (* Do this for each LF event r that isn't in rfi-lf *) 95 (* Do this for each LF event r that isn't in rfi-lf *)
71 in map possible-rfe-lf (LF \ range(rfi-lf)) 96 in map possible-rfe-lf (LF \ range(rfi-lf))
72 97
73(* Generate all rf relations for LF events *) 98(* Generate all rf relations for LF events *)
74with rfe-lf from cross(all-possible-rfe-lf) 99with rfe-lf from cross(all-possible-rfe-lf)
75let rf = rf | rfi-lf | rfe-lf 100let rf-lf = rfe-lf | rfi-lf
101
102(*
103 * RU, i.e., spin_is_locked() returning False, is slightly different.
104 * We rely on the memory model to rule out cases where spin_is_locked()
105 * within one of the lock's critical sections returns False.
106 *)
107
108(* rfi for RU events: an RU may read from the last po-previous UL *)
109let rfi-ru = ([UL] ; po-loc ; [RU]) \ ([UL] ; po-loc ; [LKW] ; po-loc)
110
111(* rfe for RU events: an RU may read from an external UL or the initial write *)
112let all-possible-rfe-ru =
113 let possible-rfe-ru r =
114 let pair-to-relation p = p ++ 0
115 in map pair-to-relation (((UL | IW) * {r}) & loc & ext)
116 in map possible-rfe-ru RU
117
118(* Generate all rf relations for RU events *)
119with rfe-ru from cross(all-possible-rfe-ru)
120let rf-ru = rfe-ru | rfi-ru
76 121
122(* Final rf relation *)
123let rf = rf | rf-lf | rf-ru
77 124
78(* Generate all co relations, including LKW events but not UL *) 125(* Generate all co relations, including LKW events but not UL *)
79let co0 = co0 | ([IW] ; loc ; [LKW]) | 126let co0 = co0 | ([IW] ; loc ; [LKW]) |
diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
new file mode 100644
index 000000000000..af0aa15ab84e
--- /dev/null
+++ b/tools/memory-model/scripts/checkalllitmus.sh
@@ -0,0 +1,73 @@
1#!/bin/sh
2#
3# Run herd tests on all .litmus files in the specified directory (which
4# defaults to litmus-tests) and check each file's result against a "Result:"
5# comment within that litmus test. If the verification result does not
6# match that specified in the litmus test, this script prints an error
7# message prefixed with "^^^". It also outputs verification results to
8# a file whose name is that of the specified litmus test, but with ".out"
9# appended.
10#
11# Usage:
12# sh checkalllitmus.sh [ directory ]
13#
14# The LINUX_HERD_OPTIONS environment variable may be used to specify
15# arguments to herd, whose default is defined by the checklitmus.sh script.
16# Thus, one would normally run this in the directory containing the memory
17# model, specifying the pathname of the litmus test to check.
18#
19# This script makes no attempt to run the litmus tests concurrently.
20#
21# This program is free software; you can redistribute it and/or modify
22# it under the terms of the GNU General Public License as published by
23# the Free Software Foundation; either version 2 of the License, or
24# (at your option) any later version.
25#
26# This program is distributed in the hope that it will be useful,
27# but WITHOUT ANY WARRANTY; without even the implied warranty of
28# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29# GNU General Public License for more details.
30#
31# You should have received a copy of the GNU General Public License
32# along with this program; if not, you can access it online at
33# http://www.gnu.org/licenses/gpl-2.0.html.
34#
35# Copyright IBM Corporation, 2018
36#
37# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
38
39litmusdir=${1-litmus-tests}
40if test -d "$litmusdir" -a -r "$litmusdir" -a -x "$litmusdir"
41then
42 :
43else
44 echo ' --- ' error: $litmusdir is not an accessible directory
45 exit 255
46fi
47
48# Find the checklitmus script. If it is not where we expect it, then
49# assume that the caller has the PATH environment variable set
50# appropriately.
51if test -x scripts/checklitmus.sh
52then
53 clscript=scripts/checklitmus.sh
54else
55 clscript=checklitmus.sh
56fi
57
58# Run the script on all the litmus tests in the specified directory
59ret=0
60for i in litmus-tests/*.litmus
61do
62 if ! $clscript $i
63 then
64 ret=1
65 fi
66done
67if test "$ret" -ne 0
68then
69 echo " ^^^ VERIFICATION MISMATCHES"
70else
71 echo All litmus tests verified as was expected.
72fi
73exit $ret
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
new file mode 100644
index 000000000000..e2e477472844
--- /dev/null
+++ b/tools/memory-model/scripts/checklitmus.sh
@@ -0,0 +1,86 @@
1#!/bin/sh
2#
3# Run a herd test and check the result against a "Result:" comment within
4# the litmus test. If the verification result does not match that specified
5# in the litmus test, this script prints an error message prefixed with
6# "^^^" and exits with a non-zero status. It also outputs verification
7# results to a file whose name is that of the specified litmus test, but
8# with ".out" appended.
9#
10# Usage:
11# sh checklitmus.sh file.litmus
12#
13# The LINUX_HERD_OPTIONS environment variable may be used to specify
14# arguments to herd, which default to "-conf linux-kernel.cfg". Thus,
15# one would normally run this in the directory containing the memory model,
16# specifying the pathname of the litmus test to check.
17#
18# This program is free software; you can redistribute it and/or modify
19# it under the terms of the GNU General Public License as published by
20# the Free Software Foundation; either version 2 of the License, or
21# (at your option) any later version.
22#
23# This program is distributed in the hope that it will be useful,
24# but WITHOUT ANY WARRANTY; without even the implied warranty of
25# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26# GNU General Public License for more details.
27#
28# You should have received a copy of the GNU General Public License
29# along with this program; if not, you can access it online at
30# http://www.gnu.org/licenses/gpl-2.0.html.
31#
32# Copyright IBM Corporation, 2018
33#
34# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
35
36litmus=$1
37herdoptions=${LINUX_HERD_OPTIONS--conf linux-kernel.cfg}
38
39if test -f "$litmus" -a -r "$litmus"
40then
41 :
42else
43 echo ' --- ' error: \"$litmus\" is not a readable file
44 exit 255
45fi
46if grep -q '^ \* Result: ' $litmus
47then
48 outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'`
49else
50 outcome=specified
51fi
52
53echo Herd options: $herdoptions > $litmus.out
54/usr/bin/time herd7 -o ~/tmp $herdoptions $litmus >> $litmus.out 2>&1
55grep "Herd options:" $litmus.out
56grep '^Observation' $litmus.out
57if grep -q '^Observation' $litmus.out
58then
59 :
60else
61 cat $litmus.out
62 echo ' ^^^ Verification error'
63 echo ' ^^^ Verification error' >> $litmus.out 2>&1
64 exit 255
65fi
66if test "$outcome" = DEADLOCK
67then
68 echo grep 3 and 4
69 if grep '^Observation' $litmus.out | grep -q 'Never 0 0$'
70 then
71 ret=0
72 else
73 echo " ^^^ Unexpected non-$outcome verification"
74 echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1
75 ret=1
76 fi
77elif grep '^Observation' $litmus.out | grep -q $outcome || test "$outcome" = Maybe
78then
79 ret=0
80else
81 echo " ^^^ Unexpected non-$outcome verification"
82 echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1
83 ret=1
84fi
85tail -2 $litmus.out | head -1
86exit $ret
diff --git a/tools/objtool/arch/x86/include/asm/insn.h b/tools/objtool/arch/x86/include/asm/insn.h
index b3e32b010ab1..c2c01f84df75 100644
--- a/tools/objtool/arch/x86/include/asm/insn.h
+++ b/tools/objtool/arch/x86/include/asm/insn.h
@@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn)
208 return insn_offset_displacement(insn) + insn->displacement.nbytes; 208 return insn_offset_displacement(insn) + insn->displacement.nbytes;
209} 209}
210 210
211#define POP_SS_OPCODE 0x1f
212#define MOV_SREG_OPCODE 0x8e
213
214/*
215 * Intel SDM Vol.3A 6.8.3 states;
216 * "Any single-step trap that would be delivered following the MOV to SS
217 * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
218 * suppressed."
219 * This function returns true if @insn is MOV SS or POP SS. On these
220 * instructions, single stepping is suppressed.
221 */
222static inline int insn_masking_exception(struct insn *insn)
223{
224 return insn->opcode.bytes[0] == POP_SS_OPCODE ||
225 (insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
226 X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
227}
228
211#endif /* _ASM_X86_INSN_H */ 229#endif /* _ASM_X86_INSN_H */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 5409f6f6c48d..f4a25bd1871f 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -59,6 +59,31 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file,
59 return next; 59 return next;
60} 60}
61 61
62static struct instruction *next_insn_same_func(struct objtool_file *file,
63 struct instruction *insn)
64{
65 struct instruction *next = list_next_entry(insn, list);
66 struct symbol *func = insn->func;
67
68 if (!func)
69 return NULL;
70
71 if (&next->list != &file->insn_list && next->func == func)
72 return next;
73
74 /* Check if we're already in the subfunction: */
75 if (func == func->cfunc)
76 return NULL;
77
78 /* Move to the subfunction: */
79 return find_insn(file, func->cfunc->sec, func->cfunc->offset);
80}
81
82#define func_for_each_insn_all(file, func, insn) \
83 for (insn = find_insn(file, func->sec, func->offset); \
84 insn; \
85 insn = next_insn_same_func(file, insn))
86
62#define func_for_each_insn(file, func, insn) \ 87#define func_for_each_insn(file, func, insn) \
63 for (insn = find_insn(file, func->sec, func->offset); \ 88 for (insn = find_insn(file, func->sec, func->offset); \
64 insn && &insn->list != &file->insn_list && \ 89 insn && &insn->list != &file->insn_list && \
@@ -139,6 +164,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
139 "lbug_with_loc", 164 "lbug_with_loc",
140 "fortify_panic", 165 "fortify_panic",
141 "usercopy_abort", 166 "usercopy_abort",
167 "machine_real_restart",
142 }; 168 };
143 169
144 if (func->bind == STB_WEAK) 170 if (func->bind == STB_WEAK)
@@ -149,10 +175,14 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
149 if (!strcmp(func->name, global_noreturns[i])) 175 if (!strcmp(func->name, global_noreturns[i]))
150 return 1; 176 return 1;
151 177
152 if (!func->sec) 178 if (!func->len)
179 return 0;
180
181 insn = find_insn(file, func->sec, func->offset);
182 if (!insn->func)
153 return 0; 183 return 0;
154 184
155 func_for_each_insn(file, func, insn) { 185 func_for_each_insn_all(file, func, insn) {
156 empty = false; 186 empty = false;
157 187
158 if (insn->type == INSN_RETURN) 188 if (insn->type == INSN_RETURN)
@@ -167,35 +197,28 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
167 * case, the function's dead-end status depends on whether the target 197 * case, the function's dead-end status depends on whether the target
168 * of the sibling call returns. 198 * of the sibling call returns.
169 */ 199 */
170 func_for_each_insn(file, func, insn) { 200 func_for_each_insn_all(file, func, insn) {
171 if (insn->sec != func->sec ||
172 insn->offset >= func->offset + func->len)
173 break;
174
175 if (insn->type == INSN_JUMP_UNCONDITIONAL) { 201 if (insn->type == INSN_JUMP_UNCONDITIONAL) {
176 struct instruction *dest = insn->jump_dest; 202 struct instruction *dest = insn->jump_dest;
177 struct symbol *dest_func;
178 203
179 if (!dest) 204 if (!dest)
180 /* sibling call to another file */ 205 /* sibling call to another file */
181 return 0; 206 return 0;
182 207
183 if (dest->sec != func->sec || 208 if (dest->func && dest->func->pfunc != insn->func->pfunc) {
184 dest->offset < func->offset ||
185 dest->offset >= func->offset + func->len) {
186 /* local sibling call */
187 dest_func = find_symbol_by_offset(dest->sec,
188 dest->offset);
189 if (!dest_func)
190 continue;
191 209
210 /* local sibling call */
192 if (recursion == 5) { 211 if (recursion == 5) {
193 WARN_FUNC("infinite recursion (objtool bug!)", 212 /*
194 dest->sec, dest->offset); 213 * Infinite recursion: two functions
195 return -1; 214 * have sibling calls to each other.
215 * This is a very rare case. It means
216 * they aren't dead ends.
217 */
218 return 0;
196 } 219 }
197 220
198 return __dead_end_function(file, dest_func, 221 return __dead_end_function(file, dest->func,
199 recursion + 1); 222 recursion + 1);
200 } 223 }
201 } 224 }
@@ -422,7 +445,7 @@ static void add_ignores(struct objtool_file *file)
422 if (!ignore_func(file, func)) 445 if (!ignore_func(file, func))
423 continue; 446 continue;
424 447
425 func_for_each_insn(file, func, insn) 448 func_for_each_insn_all(file, func, insn)
426 insn->ignore = true; 449 insn->ignore = true;
427 } 450 }
428 } 451 }
@@ -521,6 +544,28 @@ static int add_jump_destinations(struct objtool_file *file)
521 dest_off); 544 dest_off);
522 return -1; 545 return -1;
523 } 546 }
547
548 /*
549 * For GCC 8+, create parent/child links for any cold
550 * subfunctions. This is _mostly_ redundant with a similar
551 * initialization in read_symbols().
552 *
553 * If a function has aliases, we want the *first* such function
554 * in the symbol table to be the subfunction's parent. In that
555 * case we overwrite the initialization done in read_symbols().
556 *
557 * However this code can't completely replace the
558 * read_symbols() code because this doesn't detect the case
559 * where the parent function's only reference to a subfunction
560 * is through a switch table.
561 */
562 if (insn->func && insn->jump_dest->func &&
563 insn->func != insn->jump_dest->func &&
564 !strstr(insn->func->name, ".cold.") &&
565 strstr(insn->jump_dest->func->name, ".cold.")) {
566 insn->func->cfunc = insn->jump_dest->func;
567 insn->jump_dest->func->pfunc = insn->func;
568 }
524 } 569 }
525 570
526 return 0; 571 return 0;
@@ -782,30 +827,35 @@ out:
782 return ret; 827 return ret;
783} 828}
784 829
785static int add_switch_table(struct objtool_file *file, struct symbol *func, 830static int add_switch_table(struct objtool_file *file, struct instruction *insn,
786 struct instruction *insn, struct rela *table, 831 struct rela *table, struct rela *next_table)
787 struct rela *next_table)
788{ 832{
789 struct rela *rela = table; 833 struct rela *rela = table;
790 struct instruction *alt_insn; 834 struct instruction *alt_insn;
791 struct alternative *alt; 835 struct alternative *alt;
836 struct symbol *pfunc = insn->func->pfunc;
837 unsigned int prev_offset = 0;
792 838
793 list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) { 839 list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
794 if (rela == next_table) 840 if (rela == next_table)
795 break; 841 break;
796 842
797 if (rela->sym->sec != insn->sec || 843 /* Make sure the switch table entries are consecutive: */
798 rela->addend <= func->offset || 844 if (prev_offset && rela->offset != prev_offset + 8)
799 rela->addend >= func->offset + func->len)
800 break; 845 break;
801 846
802 alt_insn = find_insn(file, insn->sec, rela->addend); 847 /* Detect function pointers from contiguous objects: */
803 if (!alt_insn) { 848 if (rela->sym->sec == pfunc->sec &&
804 WARN("%s: can't find instruction at %s+0x%x", 849 rela->addend == pfunc->offset)
805 file->rodata->rela->name, insn->sec->name, 850 break;
806 rela->addend); 851
807 return -1; 852 alt_insn = find_insn(file, rela->sym->sec, rela->addend);
808 } 853 if (!alt_insn)
854 break;
855
856 /* Make sure the jmp dest is in the function or subfunction: */
857 if (alt_insn->func->pfunc != pfunc)
858 break;
809 859
810 alt = malloc(sizeof(*alt)); 860 alt = malloc(sizeof(*alt));
811 if (!alt) { 861 if (!alt) {
@@ -815,6 +865,13 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func,
815 865
816 alt->insn = alt_insn; 866 alt->insn = alt_insn;
817 list_add_tail(&alt->list, &insn->alts); 867 list_add_tail(&alt->list, &insn->alts);
868 prev_offset = rela->offset;
869 }
870
871 if (!prev_offset) {
872 WARN_FUNC("can't find switch jump table",
873 insn->sec, insn->offset);
874 return -1;
818 } 875 }
819 876
820 return 0; 877 return 0;
@@ -869,40 +926,21 @@ static struct rela *find_switch_table(struct objtool_file *file,
869{ 926{
870 struct rela *text_rela, *rodata_rela; 927 struct rela *text_rela, *rodata_rela;
871 struct instruction *orig_insn = insn; 928 struct instruction *orig_insn = insn;
929 unsigned long table_offset;
872 930
873 text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
874 if (text_rela && text_rela->sym == file->rodata->sym) {
875 /* case 1 */
876 rodata_rela = find_rela_by_dest(file->rodata,
877 text_rela->addend);
878 if (rodata_rela)
879 return rodata_rela;
880
881 /* case 2 */
882 rodata_rela = find_rela_by_dest(file->rodata,
883 text_rela->addend + 4);
884 if (!rodata_rela)
885 return NULL;
886
887 file->ignore_unreachables = true;
888 return rodata_rela;
889 }
890
891 /* case 3 */
892 /* 931 /*
893 * Backward search using the @first_jump_src links, these help avoid 932 * Backward search using the @first_jump_src links, these help avoid
894 * much of the 'in between' code. Which avoids us getting confused by 933 * much of the 'in between' code. Which avoids us getting confused by
895 * it. 934 * it.
896 */ 935 */
897 for (insn = list_prev_entry(insn, list); 936 for (;
898
899 &insn->list != &file->insn_list && 937 &insn->list != &file->insn_list &&
900 insn->sec == func->sec && 938 insn->sec == func->sec &&
901 insn->offset >= func->offset; 939 insn->offset >= func->offset;
902 940
903 insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { 941 insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
904 942
905 if (insn->type == INSN_JUMP_DYNAMIC) 943 if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
906 break; 944 break;
907 945
908 /* allow small jumps within the range */ 946 /* allow small jumps within the range */
@@ -918,18 +956,29 @@ static struct rela *find_switch_table(struct objtool_file *file,
918 if (!text_rela || text_rela->sym != file->rodata->sym) 956 if (!text_rela || text_rela->sym != file->rodata->sym)
919 continue; 957 continue;
920 958
959 table_offset = text_rela->addend;
960 if (text_rela->type == R_X86_64_PC32)
961 table_offset += 4;
962
921 /* 963 /*
922 * Make sure the .rodata address isn't associated with a 964 * Make sure the .rodata address isn't associated with a
923 * symbol. gcc jump tables are anonymous data. 965 * symbol. gcc jump tables are anonymous data.
924 */ 966 */
925 if (find_symbol_containing(file->rodata, text_rela->addend)) 967 if (find_symbol_containing(file->rodata, table_offset))
926 continue; 968 continue;
927 969
928 rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend); 970 rodata_rela = find_rela_by_dest(file->rodata, table_offset);
929 if (!rodata_rela) 971 if (rodata_rela) {
930 continue; 972 /*
973 * Use of RIP-relative switch jumps is quite rare, and
974 * indicates a rare GCC quirk/bug which can leave dead
975 * code behind.
976 */
977 if (text_rela->type == R_X86_64_PC32)
978 file->ignore_unreachables = true;
931 979
932 return rodata_rela; 980 return rodata_rela;
981 }
933 } 982 }
934 983
935 return NULL; 984 return NULL;
@@ -943,7 +992,7 @@ static int add_func_switch_tables(struct objtool_file *file,
943 struct rela *rela, *prev_rela = NULL; 992 struct rela *rela, *prev_rela = NULL;
944 int ret; 993 int ret;
945 994
946 func_for_each_insn(file, func, insn) { 995 func_for_each_insn_all(file, func, insn) {
947 if (!last) 996 if (!last)
948 last = insn; 997 last = insn;
949 998
@@ -974,8 +1023,7 @@ static int add_func_switch_tables(struct objtool_file *file,
974 * the beginning of another switch table in the same function. 1023 * the beginning of another switch table in the same function.
975 */ 1024 */
976 if (prev_jump) { 1025 if (prev_jump) {
977 ret = add_switch_table(file, func, prev_jump, prev_rela, 1026 ret = add_switch_table(file, prev_jump, prev_rela, rela);
978 rela);
979 if (ret) 1027 if (ret)
980 return ret; 1028 return ret;
981 } 1029 }
@@ -985,7 +1033,7 @@ static int add_func_switch_tables(struct objtool_file *file,
985 } 1033 }
986 1034
987 if (prev_jump) { 1035 if (prev_jump) {
988 ret = add_switch_table(file, func, prev_jump, prev_rela, NULL); 1036 ret = add_switch_table(file, prev_jump, prev_rela, NULL);
989 if (ret) 1037 if (ret)
990 return ret; 1038 return ret;
991 } 1039 }
@@ -1749,15 +1797,13 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
1749 while (1) { 1797 while (1) {
1750 next_insn = next_insn_same_sec(file, insn); 1798 next_insn = next_insn_same_sec(file, insn);
1751 1799
1752 1800 if (file->c_file && func && insn->func && func != insn->func->pfunc) {
1753 if (file->c_file && func && insn->func && func != insn->func) {
1754 WARN("%s() falls through to next function %s()", 1801 WARN("%s() falls through to next function %s()",
1755 func->name, insn->func->name); 1802 func->name, insn->func->name);
1756 return 1; 1803 return 1;
1757 } 1804 }
1758 1805
1759 if (insn->func) 1806 func = insn->func ? insn->func->pfunc : NULL;
1760 func = insn->func;
1761 1807
1762 if (func && insn->ignore) { 1808 if (func && insn->ignore) {
1763 WARN_FUNC("BUG: why am I validating an ignored function?", 1809 WARN_FUNC("BUG: why am I validating an ignored function?",
@@ -1778,7 +1824,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
1778 1824
1779 i = insn; 1825 i = insn;
1780 save_insn = NULL; 1826 save_insn = NULL;
1781 func_for_each_insn_continue_reverse(file, func, i) { 1827 func_for_each_insn_continue_reverse(file, insn->func, i) {
1782 if (i->save) { 1828 if (i->save) {
1783 save_insn = i; 1829 save_insn = i;
1784 break; 1830 break;
@@ -1865,7 +1911,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
1865 case INSN_JUMP_UNCONDITIONAL: 1911 case INSN_JUMP_UNCONDITIONAL:
1866 if (insn->jump_dest && 1912 if (insn->jump_dest &&
1867 (!func || !insn->jump_dest->func || 1913 (!func || !insn->jump_dest->func ||
1868 func == insn->jump_dest->func)) { 1914 insn->jump_dest->func->pfunc == func)) {
1869 ret = validate_branch(file, insn->jump_dest, 1915 ret = validate_branch(file, insn->jump_dest,
1870 state); 1916 state);
1871 if (ret) 1917 if (ret)
@@ -2060,7 +2106,7 @@ static int validate_functions(struct objtool_file *file)
2060 2106
2061 for_each_sec(file, sec) { 2107 for_each_sec(file, sec) {
2062 list_for_each_entry(func, &sec->symbol_list, list) { 2108 list_for_each_entry(func, &sec->symbol_list, list) {
2063 if (func->type != STT_FUNC) 2109 if (func->type != STT_FUNC || func->pfunc != func)
2064 continue; 2110 continue;
2065 2111
2066 insn = find_insn(file, sec, func->offset); 2112 insn = find_insn(file, sec, func->offset);
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index c1c338661699..4e60e105583e 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -79,6 +79,19 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
79 return NULL; 79 return NULL;
80} 80}
81 81
82struct symbol *find_symbol_by_name(struct elf *elf, const char *name)
83{
84 struct section *sec;
85 struct symbol *sym;
86
87 list_for_each_entry(sec, &elf->sections, list)
88 list_for_each_entry(sym, &sec->symbol_list, list)
89 if (!strcmp(sym->name, name))
90 return sym;
91
92 return NULL;
93}
94
82struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) 95struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
83{ 96{
84 struct symbol *sym; 97 struct symbol *sym;
@@ -203,10 +216,11 @@ static int read_sections(struct elf *elf)
203 216
204static int read_symbols(struct elf *elf) 217static int read_symbols(struct elf *elf)
205{ 218{
206 struct section *symtab; 219 struct section *symtab, *sec;
207 struct symbol *sym; 220 struct symbol *sym, *pfunc;
208 struct list_head *entry, *tmp; 221 struct list_head *entry, *tmp;
209 int symbols_nr, i; 222 int symbols_nr, i;
223 char *coldstr;
210 224
211 symtab = find_section_by_name(elf, ".symtab"); 225 symtab = find_section_by_name(elf, ".symtab");
212 if (!symtab) { 226 if (!symtab) {
@@ -281,6 +295,30 @@ static int read_symbols(struct elf *elf)
281 hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx); 295 hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx);
282 } 296 }
283 297
298 /* Create parent/child links for any cold subfunctions */
299 list_for_each_entry(sec, &elf->sections, list) {
300 list_for_each_entry(sym, &sec->symbol_list, list) {
301 if (sym->type != STT_FUNC)
302 continue;
303 sym->pfunc = sym->cfunc = sym;
304 coldstr = strstr(sym->name, ".cold.");
305 if (coldstr) {
306 coldstr[0] = '\0';
307 pfunc = find_symbol_by_name(elf, sym->name);
308 coldstr[0] = '.';
309
310 if (!pfunc) {
311 WARN("%s(): can't find parent function",
312 sym->name);
313 goto err;
314 }
315
316 sym->pfunc = pfunc;
317 pfunc->cfunc = sym;
318 }
319 }
320 }
321
284 return 0; 322 return 0;
285 323
286err: 324err:
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index d86e2ff14466..de5cd2ddded9 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -61,6 +61,7 @@ struct symbol {
61 unsigned char bind, type; 61 unsigned char bind, type;
62 unsigned long offset; 62 unsigned long offset;
63 unsigned int len; 63 unsigned int len;
64 struct symbol *pfunc, *cfunc;
64}; 65};
65 66
66struct rela { 67struct rela {
@@ -86,6 +87,7 @@ struct elf {
86struct elf *elf_open(const char *name, int flags); 87struct elf *elf_open(const char *name, int flags);
87struct section *find_section_by_name(struct elf *elf, const char *name); 88struct section *find_section_by_name(struct elf *elf, const char *name);
88struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); 89struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
90struct symbol *find_symbol_by_name(struct elf *elf, const char *name);
89struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); 91struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
90struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); 92struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
91struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, 93struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index db11478e30b4..42261a9b280e 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -47,7 +47,8 @@ man5dir=$(mandir)/man5
47man7dir=$(mandir)/man7 47man7dir=$(mandir)/man7
48 48
49ASCIIDOC=asciidoc 49ASCIIDOC=asciidoc
50ASCIIDOC_EXTRA = --unsafe 50ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
51ASCIIDOC_HTML = xhtml11
51MANPAGE_XSL = manpage-normal.xsl 52MANPAGE_XSL = manpage-normal.xsl
52XMLTO_EXTRA = 53XMLTO_EXTRA =
53INSTALL?=install 54INSTALL?=install
@@ -55,6 +56,14 @@ RM ?= rm -f
55DOC_REF = origin/man 56DOC_REF = origin/man
56HTML_REF = origin/html 57HTML_REF = origin/html
57 58
59ifdef USE_ASCIIDOCTOR
60ASCIIDOC = asciidoctor
61ASCIIDOC_EXTRA = -a compat-mode
62ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
63ASCIIDOC_EXTRA += -a mansource="perf" -a manmanual="perf Manual"
64ASCIIDOC_HTML = xhtml5
65endif
66
58infodir?=$(prefix)/share/info 67infodir?=$(prefix)/share/info
59MAKEINFO=makeinfo 68MAKEINFO=makeinfo
60INSTALL_INFO=install-info 69INSTALL_INFO=install-info
@@ -73,10 +82,12 @@ ifeq ($(_tmp_tool_path),)
73 missing_tools = $(ASCIIDOC) 82 missing_tools = $(ASCIIDOC)
74endif 83endif
75 84
85ifndef USE_ASCIIDOCTOR
76_tmp_tool_path := $(call get-executable,$(XMLTO)) 86_tmp_tool_path := $(call get-executable,$(XMLTO))
77ifeq ($(_tmp_tool_path),) 87ifeq ($(_tmp_tool_path),)
78 missing_tools += $(XMLTO) 88 missing_tools += $(XMLTO)
79endif 89endif
90endif
80 91
81# 92#
82# For asciidoc ... 93# For asciidoc ...
@@ -264,9 +275,17 @@ clean:
264 275
265$(MAN_HTML): $(OUTPUT)%.html : %.txt 276$(MAN_HTML): $(OUTPUT)%.html : %.txt
266 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ 277 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
267 $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ 278 $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \
279 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
280 mv $@+ $@
281
282ifdef USE_ASCIIDOCTOR
283$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.txt
284 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
285 $(ASCIIDOC) -b manpage -d manpage \
268 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ 286 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
269 mv $@+ $@ 287 mv $@+ $@
288endif
270 289
271$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml 290$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
272 $(QUIET_XMLTO)$(RM) $@ && \ 291 $(QUIET_XMLTO)$(RM) $@ && \
@@ -274,7 +293,7 @@ $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
274 293
275$(OUTPUT)%.xml : %.txt 294$(OUTPUT)%.xml : %.txt
276 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ 295 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
277 $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ 296 $(ASCIIDOC) -b docbook -d manpage \
278 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ 297 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
279 mv $@+ $@ 298 mv $@+ $@
280 299
@@ -321,13 +340,13 @@ howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
321 mv $@+ $@ 340 mv $@+ $@
322 341
323$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt 342$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
324 $(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt 343 $(QUIET_ASCIIDOC)$(ASCIIDOC) -b $(ASCIIDOC_HTML) $*.txt
325 344
326WEBDOC_DEST = /pub/software/tools/perf/docs 345WEBDOC_DEST = /pub/software/tools/perf/docs
327 346
328$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt 347$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
329 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ 348 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
330 sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \ 349 sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b $(ASCIIDOC_HTML) - >$@+ && \
331 mv $@+ $@ 350 mv $@+ $@
332 351
333# UNIMPLEMENTED 352# UNIMPLEMENTED
diff --git a/tools/perf/Documentation/asciidoctor-extensions.rb b/tools/perf/Documentation/asciidoctor-extensions.rb
new file mode 100644
index 000000000000..d148fe95c0c4
--- /dev/null
+++ b/tools/perf/Documentation/asciidoctor-extensions.rb
@@ -0,0 +1,29 @@
1require 'asciidoctor'
2require 'asciidoctor/extensions'
3
4module Perf
5 module Documentation
6 class LinkPerfProcessor < Asciidoctor::Extensions::InlineMacroProcessor
7 use_dsl
8
9 named :chrome
10
11 def process(parent, target, attrs)
12 if parent.document.basebackend? 'html'
13 %(<a href="#{target}.html">#{target}(#{attrs[1]})</a>\n)
14 elsif parent.document.basebackend? 'manpage'
15 "#{target}(#{attrs[1]})"
16 elsif parent.document.basebackend? 'docbook'
17 "<citerefentry>\n" \
18 "<refentrytitle>#{target}</refentrytitle>" \
19 "<manvolnum>#{attrs[1]}</manvolnum>\n" \
20 "</citerefentry>\n"
21 end
22 end
23 end
24 end
25end
26
27Asciidoctor::Extensions.register do
28 inline_macro Perf::Documentation::LinkPerfProcessor, :linkperf
29end
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index 73c2650bd0db..f6de0952ff3c 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -48,6 +48,9 @@ OPTIONS
48--purge=:: 48--purge=::
49 Purge all cached binaries including older caches which have specified 49 Purge all cached binaries including older caches which have specified
50 path from the cache. 50 path from the cache.
51-P::
52--purge-all::
53 Purge all cached binaries. This will flush out entire cache.
51-M:: 54-M::
52--missing=:: 55--missing=::
53 List missing build ids in the cache for the specified file. 56 List missing build ids in the cache for the specified file.
@@ -59,7 +62,9 @@ OPTIONS
59 exactly same build-id, that is replaced by new one. It can be used 62 exactly same build-id, that is replaced by new one. It can be used
60 to update kallsyms and kernel dso to vmlinux in order to support 63 to update kallsyms and kernel dso to vmlinux in order to support
61 annotation. 64 annotation.
62 65-l::
66--list::
67 List all valid binaries from cache.
63-v:: 68-v::
64--verbose:: 69--verbose::
65 Be more verbose. 70 Be more verbose.
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 2549c34a7895..11300dbe35c5 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -124,7 +124,11 @@ The available PMUs and their raw parameters can be listed with
124For example the raw event "LSD.UOPS" core pmu event above could 124For example the raw event "LSD.UOPS" core pmu event above could
125be specified as 125be specified as
126 126
127 perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ... 127 perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=0x1/ ...
128
129 or using extended name syntax
130
131 perf stat -e cpu/event=0xa8,umask=0x1,cmask=0x1,name=\'LSD.UOPS_CYCLES:cmask=0x1\'/ ...
128 132
129PER SOCKET PMUS 133PER SOCKET PMUS
130--------------- 134---------------
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index cc37b3a4be76..04168da4268e 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -57,6 +57,9 @@ OPTIONS
57 FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and 57 FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
58 "no" for disable callgraph. 58 "no" for disable callgraph.
59 - 'stack-size': user stack size for dwarf mode 59 - 'stack-size': user stack size for dwarf mode
60 - 'name' : User defined event name. Single quotes (') may be used to
61 escape symbols in the name from parsing by shell and tool
62 like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'.
60 63
61 See the linkperf:perf-list[1] man page for more parameters. 64 See the linkperf:perf-list[1] man page for more parameters.
62 65
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
index 51ec2d20068a..0fb9eda3cbca 100644
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -610,6 +610,32 @@ Various utility functions for use with perf script:
610 nsecs_str(nsecs) - returns printable string in the form secs.nsecs 610 nsecs_str(nsecs) - returns printable string in the form secs.nsecs
611 avg(total, n) - returns average given a sum and a total number of values 611 avg(total, n) - returns average given a sum and a total number of values
612 612
613SUPPORTED FIELDS
614----------------
615
616Currently supported fields:
617
618ev_name, comm, pid, tid, cpu, ip, time, period, phys_addr, addr,
619symbol, dso, time_enabled, time_running, values, callchain,
620brstack, brstacksym, datasrc, datasrc_decode, iregs, uregs,
621weight, transaction, raw_buf, attr.
622
623Some fields have sub items:
624
625brstack:
626 from, to, from_dsoname, to_dsoname, mispred,
627 predicted, in_tx, abort, cycles.
628
629brstacksym:
630 items: from, to, pred, in_tx, abort (converted string)
631
632For example,
633We can use this code to print brstack "from", "to", "cycles".
634
635if 'brstack' in dict:
636 for entry in dict['brstack']:
637 print "from %s, to %s, cycles %s" % (entry["from"], entry["to"], entry["cycles"])
638
613SEE ALSO 639SEE ALSO
614-------- 640--------
615linkperf:perf-script[1] 641linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index e6c3b4e555c2..b10a90b6a718 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -116,6 +116,22 @@ Do not aggregate counts across all monitored CPUs.
116print counts using a CSV-style output to make it easy to import directly into 116print counts using a CSV-style output to make it easy to import directly into
117spreadsheets. Columns are separated by the string specified in SEP. 117spreadsheets. Columns are separated by the string specified in SEP.
118 118
119--table:: Display time for each run (-r option), in a table format, e.g.:
120
121 $ perf stat --null -r 5 --table perf bench sched pipe
122
123 Performance counter stats for 'perf bench sched pipe' (5 runs):
124
125 # Table of individual measurements:
126 5.189 (-0.293) #
127 5.189 (-0.294) #
128 5.186 (-0.296) #
129 5.663 (+0.181) ##
130 6.186 (+0.703) ####
131
132 # Final result:
133 5.483 +- 0.198 seconds time elapsed ( +- 3.62% )
134
119-G name:: 135-G name::
120--cgroup name:: 136--cgroup name::
121monitor only in the container (cgroup) called "name". This option is available only 137monitor only in the container (cgroup) called "name". This option is available only
@@ -162,6 +178,9 @@ Print count deltas for fixed number of times.
162This option should be used together with "-I" option. 178This option should be used together with "-I" option.
163 example: 'perf stat -I 1000 --interval-count 2 -e cycles -a' 179 example: 'perf stat -I 1000 --interval-count 2 -e cycles -a'
164 180
181--interval-clear::
182Clear the screen before next interval.
183
165--timeout msecs:: 184--timeout msecs::
166Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms). 185Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms).
167This option is not supported with the "-I" option. 186This option is not supported with the "-I" option.
@@ -294,20 +313,38 @@ Users who wants to get the actual value can apply --no-metric-only.
294EXAMPLES 313EXAMPLES
295-------- 314--------
296 315
297$ perf stat -- make -j 316$ perf stat -- make
317
318 Performance counter stats for 'make':
319
320 83723.452481 task-clock:u (msec) # 1.004 CPUs utilized
321 0 context-switches:u # 0.000 K/sec
322 0 cpu-migrations:u # 0.000 K/sec
323 3,228,188 page-faults:u # 0.039 M/sec
324 229,570,665,834 cycles:u # 2.742 GHz
325 313,163,853,778 instructions:u # 1.36 insn per cycle
326 69,704,684,856 branches:u # 832.559 M/sec
327 2,078,861,393 branch-misses:u # 2.98% of all branches
328
329 83.409183620 seconds time elapsed
330
331 74.684747000 seconds user
332 8.739217000 seconds sys
333
334TIMINGS
335-------
336As displayed in the example above we can display 3 types of timings.
337We always display the time the counters were enabled/alive:
338
339 83.409183620 seconds time elapsed
298 340
299 Performance counter stats for 'make -j': 341For workload sessions we also display time the workloads spent in
342user/system lands:
300 343
301 8117.370256 task clock ticks # 11.281 CPU utilization factor 344 74.684747000 seconds user
302 678 context switches # 0.000 M/sec 345 8.739217000 seconds sys
303 133 CPU migrations # 0.000 M/sec
304 235724 pagefaults # 0.029 M/sec
305 24821162526 CPU cycles # 3057.784 M/sec
306 18687303457 instructions # 2302.138 M/sec
307 172158895 cache references # 21.209 M/sec
308 27075259 cache misses # 3.335 M/sec
309 346
310 Wall-clock time elapsed: 719.554352 msecs 347Those times are the very same as displayed by the 'time' tool.
311 348
312CSV FORMAT 349CSV FORMAT
313---------- 350----------
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index d00f0d51cab8..dfb218feaad9 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -111,8 +111,8 @@ A perf_header_string with the CPU architecture (uname -m)
111A structure defining the number of CPUs. 111A structure defining the number of CPUs.
112 112
113struct nr_cpus { 113struct nr_cpus {
114 uint32_t nr_cpus_online;
115 uint32_t nr_cpus_available; /* CPUs not yet onlined */ 114 uint32_t nr_cpus_available; /* CPUs not yet onlined */
115 uint32_t nr_cpus_online;
116}; 116};
117 117
118 HEADER_CPUDESC = 8, 118 HEADER_CPUDESC = 8,
@@ -153,10 +153,18 @@ struct {
153 HEADER_CPU_TOPOLOGY = 13, 153 HEADER_CPU_TOPOLOGY = 13,
154 154
155String lists defining the core and CPU threads topology. 155String lists defining the core and CPU threads topology.
156The string lists are followed by a variable length array
157which contains core_id and socket_id of each cpu.
158The number of entries can be determined by the size of the
159section minus the sizes of both string lists.
156 160
157struct { 161struct {
158 struct perf_header_string_list cores; /* Variable length */ 162 struct perf_header_string_list cores; /* Variable length */
159 struct perf_header_string_list threads; /* Variable length */ 163 struct perf_header_string_list threads; /* Variable length */
164 struct {
165 uint32_t core_id;
166 uint32_t socket_id;
167 } cpus[nr]; /* Variable length records */
160}; 168};
161 169
162Example: 170Example:
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index ae7dc46e8f8a..b5ac356ba323 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -885,6 +885,8 @@ endif
885 885
886# Among the variables below, these: 886# Among the variables below, these:
887# perfexecdir 887# perfexecdir
888# perf_include_dir
889# perf_examples_dir
888# template_dir 890# template_dir
889# mandir 891# mandir
890# infodir 892# infodir
@@ -904,6 +906,8 @@ bindir = $(abspath $(prefix)/$(bindir_relative))
904mandir = share/man 906mandir = share/man
905infodir = share/info 907infodir = share/info
906perfexecdir = libexec/perf-core 908perfexecdir = libexec/perf-core
909perf_include_dir = lib/include/perf
910perf_examples_dir = lib/examples/perf
907sharedir = $(prefix)/share 911sharedir = $(prefix)/share
908template_dir = share/perf-core/templates 912template_dir = share/perf-core/templates
909STRACE_GROUPS_DIR = share/perf-core/strace/groups 913STRACE_GROUPS_DIR = share/perf-core/strace/groups
@@ -934,6 +938,8 @@ bindir_SQ = $(subst ','\'',$(bindir))
934mandir_SQ = $(subst ','\'',$(mandir)) 938mandir_SQ = $(subst ','\'',$(mandir))
935infodir_SQ = $(subst ','\'',$(infodir)) 939infodir_SQ = $(subst ','\'',$(infodir))
936perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) 940perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
941perf_include_dir_SQ = $(subst ','\'',$(perf_include_dir))
942perf_examples_dir_SQ = $(subst ','\'',$(perf_examples_dir))
937template_dir_SQ = $(subst ','\'',$(template_dir)) 943template_dir_SQ = $(subst ','\'',$(template_dir))
938htmldir_SQ = $(subst ','\'',$(htmldir)) 944htmldir_SQ = $(subst ','\'',$(htmldir))
939tipdir_SQ = $(subst ','\'',$(tipdir)) 945tipdir_SQ = $(subst ','\'',$(tipdir))
@@ -944,14 +950,20 @@ srcdir_SQ = $(subst ','\'',$(srcdir))
944 950
945ifneq ($(filter /%,$(firstword $(perfexecdir))),) 951ifneq ($(filter /%,$(firstword $(perfexecdir))),)
946perfexec_instdir = $(perfexecdir) 952perfexec_instdir = $(perfexecdir)
953perf_include_instdir = $(perf_include_dir)
954perf_examples_instdir = $(perf_examples_dir)
947STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR) 955STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR)
948tip_instdir = $(tipdir) 956tip_instdir = $(tipdir)
949else 957else
950perfexec_instdir = $(prefix)/$(perfexecdir) 958perfexec_instdir = $(prefix)/$(perfexecdir)
959perf_include_instdir = $(prefix)/$(perf_include_dir)
960perf_examples_instdir = $(prefix)/$(perf_examples_dir)
951STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR) 961STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR)
952tip_instdir = $(prefix)/$(tipdir) 962tip_instdir = $(prefix)/$(tipdir)
953endif 963endif
954perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) 964perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
965perf_include_instdir_SQ = $(subst ','\'',$(perf_include_instdir))
966perf_examples_instdir_SQ = $(subst ','\'',$(perf_examples_instdir))
955STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR)) 967STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR))
956tip_instdir_SQ = $(subst ','\'',$(tip_instdir)) 968tip_instdir_SQ = $(subst ','\'',$(tip_instdir))
957 969
@@ -999,6 +1011,8 @@ $(call detected_var,ETC_PERFCONFIG_SQ)
999$(call detected_var,STRACE_GROUPS_DIR_SQ) 1011$(call detected_var,STRACE_GROUPS_DIR_SQ)
1000$(call detected_var,prefix_SQ) 1012$(call detected_var,prefix_SQ)
1001$(call detected_var,perfexecdir_SQ) 1013$(call detected_var,perfexecdir_SQ)
1014$(call detected_var,perf_include_dir_SQ)
1015$(call detected_var,perf_examples_dir_SQ)
1002$(call detected_var,tipdir_SQ) 1016$(call detected_var,tipdir_SQ)
1003$(call detected_var,srcdir_SQ) 1017$(call detected_var,srcdir_SQ)
1004$(call detected_var,LIBDIR) 1018$(call detected_var,LIBDIR)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 83e453de36f8..ecc9fc952655 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -767,6 +767,16 @@ ifndef NO_JVMTI
767endif 767endif
768 $(call QUIET_INSTALL, libexec) \ 768 $(call QUIET_INSTALL, libexec) \
769 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 769 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
770ifndef NO_LIBBPF
771 $(call QUIET_INSTALL, lib) \
772 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
773 $(call QUIET_INSTALL, include/bpf) \
774 $(INSTALL) include/bpf/*.h '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
775 $(call QUIET_INSTALL, lib) \
776 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
777 $(call QUIET_INSTALL, examples/bpf) \
778 $(INSTALL) examples/bpf/*.c '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
779endif
770 $(call QUIET_INSTALL, perf-archive) \ 780 $(call QUIET_INSTALL, perf-archive) \
771 $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 781 $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
772 $(call QUIET_INSTALL, perf-with-kcore) \ 782 $(call QUIET_INSTALL, perf-with-kcore) \
diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c
index 8cb347760233..9a0242e74cfc 100644
--- a/tools/perf/arch/arm/tests/dwarf-unwind.c
+++ b/tools/perf/arch/arm/tests/dwarf-unwind.c
@@ -25,7 +25,7 @@ static int sample_ustack(struct perf_sample *sample,
25 25
26 sp = (unsigned long) regs[PERF_REG_ARM_SP]; 26 sp = (unsigned long) regs[PERF_REG_ARM_SP];
27 27
28 map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); 28 map = map_groups__find(thread->mg, (u64)sp);
29 if (!map) { 29 if (!map) {
30 pr_debug("failed to get stack map\n"); 30 pr_debug("failed to get stack map\n");
31 free(buf); 31 free(buf);
diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c
index e907f0f4c20c..5522ce384723 100644
--- a/tools/perf/arch/arm64/tests/dwarf-unwind.c
+++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c
@@ -25,7 +25,7 @@ static int sample_ustack(struct perf_sample *sample,
25 25
26 sp = (unsigned long) regs[PERF_REG_ARM64_SP]; 26 sp = (unsigned long) regs[PERF_REG_ARM64_SP];
27 27
28 map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); 28 map = map_groups__find(thread->mg, (u64)sp);
29 if (!map) { 29 if (!map) {
30 pr_debug("failed to get stack map\n"); 30 pr_debug("failed to get stack map\n");
31 free(buf); 31 free(buf);
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index c6f373508a4f..82657c01a3b8 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -189,7 +189,7 @@ out_error:
189 return -1; 189 return -1;
190} 190}
191 191
192int perf_env__lookup_objdump(struct perf_env *env) 192int perf_env__lookup_objdump(struct perf_env *env, const char **path)
193{ 193{
194 /* 194 /*
195 * For live mode, env->arch will be NULL and we can use 195 * For live mode, env->arch will be NULL and we can use
@@ -198,5 +198,5 @@ int perf_env__lookup_objdump(struct perf_env *env)
198 if (env->arch == NULL) 198 if (env->arch == NULL)
199 return 0; 199 return 0;
200 200
201 return perf_env__lookup_binutils_path(env, "objdump", &objdump_path); 201 return perf_env__lookup_binutils_path(env, "objdump", path);
202} 202}
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index 2d875baa92e6..2167001b18c5 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -4,8 +4,6 @@
4 4
5#include "../util/env.h" 5#include "../util/env.h"
6 6
7extern const char *objdump_path; 7int perf_env__lookup_objdump(struct perf_env *env, const char **path);
8
9int perf_env__lookup_objdump(struct perf_env *env);
10 8
11#endif /* ARCH_PERF_COMMON_H */ 9#endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
index 30cbbd6d5be0..5f39efef0856 100644
--- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c
+++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
@@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample,
26 26
27 sp = (unsigned long) regs[PERF_REG_POWERPC_R1]; 27 sp = (unsigned long) regs[PERF_REG_POWERPC_R1];
28 28
29 map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); 29 map = map_groups__find(thread->mg, (u64)sp);
30 if (!map) { 30 if (!map) {
31 pr_debug("failed to get stack map\n"); 31 pr_debug("failed to get stack map\n");
32 free(buf); 32 free(buf);
diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
index 0c370f81e002..3598b8b75d27 100644
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -248,8 +248,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
248 248
249 ip = chain->ips[2]; 249 ip = chain->ips[2];
250 250
251 thread__find_addr_location(thread, PERF_RECORD_MISC_USER, 251 thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
252 MAP__FUNCTION, ip, &al);
253 252
254 if (al.map) 253 if (al.map)
255 dso = al.map->dso; 254 dso = al.map->dso;
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
index 95036c7a59e8..7879df34569a 100644
--- a/tools/perf/arch/x86/tests/dwarf-unwind.c
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample,
26 26
27 sp = (unsigned long) regs[PERF_REG_X86_SP]; 27 sp = (unsigned long) regs[PERF_REG_X86_SP];
28 28
29 map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); 29 map = map_groups__find(thread->mg, (u64)sp);
30 if (!map) { 30 if (!map) {
31 pr_debug("failed to get stack map\n"); 31 pr_debug("failed to get stack map\n");
32 free(buf); 32 free(buf);
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index f95e6f46ef0d..844b8f335532 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -4,6 +4,8 @@ libperf-y += pmu.o
4libperf-y += kvm-stat.o 4libperf-y += kvm-stat.o
5libperf-y += perf_regs.o 5libperf-y += perf_regs.o
6libperf-y += group.o 6libperf-y += group.o
7libperf-y += machine.o
8libperf-y += event.o
7 9
8libperf-$(CONFIG_DWARF) += dwarf-regs.o 10libperf-$(CONFIG_DWARF) += dwarf-regs.o
9libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o 11libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
new file mode 100644
index 000000000000..675a0213044d
--- /dev/null
+++ b/tools/perf/arch/x86/util/event.c
@@ -0,0 +1,76 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/types.h>
3#include <linux/string.h>
4
5#include "../../util/machine.h"
6#include "../../util/tool.h"
7#include "../../util/map.h"
8#include "../../util/util.h"
9#include "../../util/debug.h"
10
11#if defined(__x86_64__)
12
13int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
14 perf_event__handler_t process,
15 struct machine *machine)
16{
17 int rc = 0;
18 struct map *pos;
19 struct map_groups *kmaps = &machine->kmaps;
20 struct maps *maps = &kmaps->maps;
21 union perf_event *event = zalloc(sizeof(event->mmap) +
22 machine->id_hdr_size);
23
24 if (!event) {
25 pr_debug("Not enough memory synthesizing mmap event "
26 "for extra kernel maps\n");
27 return -1;
28 }
29
30 for (pos = maps__first(maps); pos; pos = map__next(pos)) {
31 struct kmap *kmap;
32 size_t size;
33
34 if (!__map__is_extra_kernel_map(pos))
35 continue;
36
37 kmap = map__kmap(pos);
38
39 size = sizeof(event->mmap) - sizeof(event->mmap.filename) +
40 PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) +
41 machine->id_hdr_size;
42
43 memset(event, 0, size);
44
45 event->mmap.header.type = PERF_RECORD_MMAP;
46
47 /*
48 * kernel uses 0 for user space maps, see kernel/perf_event.c
49 * __perf_event_mmap
50 */
51 if (machine__is_host(machine))
52 event->header.misc = PERF_RECORD_MISC_KERNEL;
53 else
54 event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
55
56 event->mmap.header.size = size;
57
58 event->mmap.start = pos->start;
59 event->mmap.len = pos->end - pos->start;
60 event->mmap.pgoff = pos->pgoff;
61 event->mmap.pid = machine->pid;
62
63 strlcpy(event->mmap.filename, kmap->name, PATH_MAX);
64
65 if (perf_tool__process_synth_event(tool, event, machine,
66 process) != 0) {
67 rc = -1;
68 break;
69 }
70 }
71
72 free(event);
73 return rc;
74}
75
76#endif
diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c
new file mode 100644
index 000000000000..4520ac53caa9
--- /dev/null
+++ b/tools/perf/arch/x86/util/machine.c
@@ -0,0 +1,103 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/types.h>
3#include <linux/string.h>
4#include <stdlib.h>
5
6#include "../../util/machine.h"
7#include "../../util/map.h"
8#include "../../util/symbol.h"
9#include "../../util/sane_ctype.h"
10
11#include <symbol/kallsyms.h>
12
13#if defined(__x86_64__)
14
15struct extra_kernel_map_info {
16 int cnt;
17 int max_cnt;
18 struct extra_kernel_map *maps;
19 bool get_entry_trampolines;
20 u64 entry_trampoline;
21};
22
23static int add_extra_kernel_map(struct extra_kernel_map_info *mi, u64 start,
24 u64 end, u64 pgoff, const char *name)
25{
26 if (mi->cnt >= mi->max_cnt) {
27 void *buf;
28 size_t sz;
29
30 mi->max_cnt = mi->max_cnt ? mi->max_cnt * 2 : 32;
31 sz = sizeof(struct extra_kernel_map) * mi->max_cnt;
32 buf = realloc(mi->maps, sz);
33 if (!buf)
34 return -1;
35 mi->maps = buf;
36 }
37
38 mi->maps[mi->cnt].start = start;
39 mi->maps[mi->cnt].end = end;
40 mi->maps[mi->cnt].pgoff = pgoff;
41 strlcpy(mi->maps[mi->cnt].name, name, KMAP_NAME_LEN);
42
43 mi->cnt += 1;
44
45 return 0;
46}
47
48static int find_extra_kernel_maps(void *arg, const char *name, char type,
49 u64 start)
50{
51 struct extra_kernel_map_info *mi = arg;
52
53 if (!mi->entry_trampoline && kallsyms2elf_binding(type) == STB_GLOBAL &&
54 !strcmp(name, "_entry_trampoline")) {
55 mi->entry_trampoline = start;
56 return 0;
57 }
58
59 if (is_entry_trampoline(name)) {
60 u64 end = start + page_size;
61
62 return add_extra_kernel_map(mi, start, end, 0, name);
63 }
64
65 return 0;
66}
67
68int machine__create_extra_kernel_maps(struct machine *machine,
69 struct dso *kernel)
70{
71 struct extra_kernel_map_info mi = { .cnt = 0, };
72 char filename[PATH_MAX];
73 int ret;
74 int i;
75
76 machine__get_kallsyms_filename(machine, filename, PATH_MAX);
77
78 if (symbol__restricted_filename(filename, "/proc/kallsyms"))
79 return 0;
80
81 ret = kallsyms__parse(filename, &mi, find_extra_kernel_maps);
82 if (ret)
83 goto out_free;
84
85 if (!mi.entry_trampoline)
86 goto out_free;
87
88 for (i = 0; i < mi.cnt; i++) {
89 struct extra_kernel_map *xm = &mi.maps[i];
90
91 xm->pgoff = mi.entry_trampoline;
92 ret = machine__create_extra_kernel_map(machine, kernel, xm);
93 if (ret)
94 goto out_free;
95 }
96
97 machine->trampolines_mapped = mi.cnt;
98out_free:
99 free(mi.maps);
100 return ret;
101}
102
103#endif
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 944070e98a2c..63eb49082774 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -175,7 +175,7 @@ static const struct option options[] = {
175 OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"), 175 OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"),
176 OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"), 176 OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"),
177 177
178 OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"), 178 OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via reads (can be mixed with -W)"),
179 OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"), 179 OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"),
180 OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"), 180 OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"),
181 OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"), 181 OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 51709a961496..5eb22cc56363 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -40,11 +40,11 @@
40struct perf_annotate { 40struct perf_annotate {
41 struct perf_tool tool; 41 struct perf_tool tool;
42 struct perf_session *session; 42 struct perf_session *session;
43 struct annotation_options opts;
43 bool use_tui, use_stdio, use_stdio2, use_gtk; 44 bool use_tui, use_stdio, use_stdio2, use_gtk;
44 bool full_paths;
45 bool print_line;
46 bool skip_missing; 45 bool skip_missing;
47 bool has_br_stack; 46 bool has_br_stack;
47 bool group_set;
48 const char *sym_hist_filter; 48 const char *sym_hist_filter;
49 const char *cpu_list; 49 const char *cpu_list;
50 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 50 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -161,12 +161,12 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter,
161 hist__account_cycles(sample->branch_stack, al, sample, false); 161 hist__account_cycles(sample->branch_stack, al, sample, false);
162 162
163 bi = he->branch_info; 163 bi = he->branch_info;
164 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); 164 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
165 165
166 if (err) 166 if (err)
167 goto out; 167 goto out;
168 168
169 err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); 169 err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
170 170
171out: 171out:
172 return err; 172 return err;
@@ -228,7 +228,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
228 */ 228 */
229 if (al->sym != NULL) { 229 if (al->sym != NULL) {
230 rb_erase(&al->sym->rb_node, 230 rb_erase(&al->sym->rb_node,
231 &al->map->dso->symbols[al->map->type]); 231 &al->map->dso->symbols);
232 symbol__delete(al->sym); 232 symbol__delete(al->sym);
233 dso__reset_find_symbol_cache(al->map->dso); 233 dso__reset_find_symbol_cache(al->map->dso);
234 } 234 }
@@ -248,7 +248,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
248 if (he == NULL) 248 if (he == NULL)
249 return -ENOMEM; 249 return -ENOMEM;
250 250
251 ret = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr); 251 ret = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
252 hists__inc_nr_samples(hists, true); 252 hists__inc_nr_samples(hists, true);
253 return ret; 253 return ret;
254} 254}
@@ -288,10 +288,9 @@ static int hist_entry__tty_annotate(struct hist_entry *he,
288 struct perf_annotate *ann) 288 struct perf_annotate *ann)
289{ 289{
290 if (!ann->use_stdio2) 290 if (!ann->use_stdio2)
291 return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel, 291 return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel, &ann->opts);
292 ann->print_line, ann->full_paths, 0, 0); 292
293 return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel, 293 return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel, &ann->opts);
294 ann->print_line, ann->full_paths);
295} 294}
296 295
297static void hists__find_annotations(struct hists *hists, 296static void hists__find_annotations(struct hists *hists,
@@ -342,7 +341,7 @@ find_next:
342 /* skip missing symbols */ 341 /* skip missing symbols */
343 nd = rb_next(nd); 342 nd = rb_next(nd);
344 } else if (use_browser == 1) { 343 } else if (use_browser == 1) {
345 key = hist_entry__tui_annotate(he, evsel, NULL); 344 key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts);
346 345
347 switch (key) { 346 switch (key) {
348 case -1: 347 case -1:
@@ -389,8 +388,9 @@ static int __cmd_annotate(struct perf_annotate *ann)
389 goto out; 388 goto out;
390 } 389 }
391 390
392 if (!objdump_path) { 391 if (!ann->opts.objdump_path) {
393 ret = perf_env__lookup_objdump(&session->header.env); 392 ret = perf_env__lookup_objdump(&session->header.env,
393 &ann->opts.objdump_path);
394 if (ret) 394 if (ret)
395 goto out; 395 goto out;
396 } 396 }
@@ -475,6 +475,7 @@ int cmd_annotate(int argc, const char **argv)
475 .ordered_events = true, 475 .ordered_events = true,
476 .ordering_requires_timestamps = true, 476 .ordering_requires_timestamps = true,
477 }, 477 },
478 .opts = annotation__default_options,
478 }; 479 };
479 struct perf_data data = { 480 struct perf_data data = {
480 .mode = PERF_DATA_MODE_READ, 481 .mode = PERF_DATA_MODE_READ,
@@ -502,23 +503,26 @@ int cmd_annotate(int argc, const char **argv)
502 "file", "vmlinux pathname"), 503 "file", "vmlinux pathname"),
503 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 504 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
504 "load module symbols - WARNING: use only with -k and LIVE kernel"), 505 "load module symbols - WARNING: use only with -k and LIVE kernel"),
505 OPT_BOOLEAN('l', "print-line", &annotate.print_line, 506 OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines,
506 "print matching source lines (may be slow)"), 507 "print matching source lines (may be slow)"),
507 OPT_BOOLEAN('P', "full-paths", &annotate.full_paths, 508 OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path,
508 "Don't shorten the displayed pathnames"), 509 "Don't shorten the displayed pathnames"),
509 OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing, 510 OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
510 "Skip symbols that cannot be annotated"), 511 "Skip symbols that cannot be annotated"),
512 OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group,
513 &annotate.group_set,
514 "Show event group information together"),
511 OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"), 515 OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
512 OPT_CALLBACK(0, "symfs", NULL, "directory", 516 OPT_CALLBACK(0, "symfs", NULL, "directory",
513 "Look for files with symbols relative to this directory", 517 "Look for files with symbols relative to this directory",
514 symbol__config_symfs), 518 symbol__config_symfs),
515 OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, 519 OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src,
516 "Interleave source code with assembly code (default)"), 520 "Interleave source code with assembly code (default)"),
517 OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, 521 OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw,
518 "Display raw encoding of assembly instructions (default)"), 522 "Display raw encoding of assembly instructions (default)"),
519 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 523 OPT_STRING('M', "disassembler-style", &annotate.opts.disassembler_style, "disassembler style",
520 "Specify disassembler style (e.g. -M intel for intel syntax)"), 524 "Specify disassembler style (e.g. -M intel for intel syntax)"),
521 OPT_STRING(0, "objdump", &objdump_path, "path", 525 OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path",
522 "objdump binary to use for disassembly and annotations"), 526 "objdump binary to use for disassembly and annotations"),
523 OPT_BOOLEAN(0, "group", &symbol_conf.event_group, 527 OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
524 "Show event group information together"), 528 "Show event group information together"),
@@ -570,6 +574,9 @@ int cmd_annotate(int argc, const char **argv)
570 annotate.has_br_stack = perf_header__has_feat(&annotate.session->header, 574 annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
571 HEADER_BRANCH_STACK); 575 HEADER_BRANCH_STACK);
572 576
577 if (annotate.group_set)
578 perf_evlist__force_leader(annotate.session->evlist);
579
573 ret = symbol__annotation_init(); 580 ret = symbol__annotation_init();
574 if (ret < 0) 581 if (ret < 0)
575 goto out_delete; 582 goto out_delete;
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 41db2cba77eb..115110a4796a 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -25,6 +25,7 @@
25#include "util/session.h" 25#include "util/session.h"
26#include "util/symbol.h" 26#include "util/symbol.h"
27#include "util/time-utils.h" 27#include "util/time-utils.h"
28#include "util/probe-file.h"
28 29
29static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) 30static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
30{ 31{
@@ -239,6 +240,34 @@ out:
239 return err; 240 return err;
240} 241}
241 242
243static int build_id_cache__purge_all(void)
244{
245 struct strlist *list;
246 struct str_node *pos;
247 int err = 0;
248 char *buf;
249
250 list = build_id_cache__list_all(false);
251 if (!list) {
252 pr_debug("Failed to get buildids: -%d\n", errno);
253 return -EINVAL;
254 }
255
256 strlist__for_each_entry(pos, list) {
257 buf = build_id_cache__origname(pos->s);
258 err = build_id_cache__remove_s(pos->s);
259 pr_debug("Removing %s (%s): %s\n", buf, pos->s,
260 err ? "FAIL" : "Ok");
261 free(buf);
262 if (err)
263 break;
264 }
265 strlist__delete(list);
266
267 pr_debug("Purged all: %s\n", err ? "FAIL" : "Ok");
268 return err;
269}
270
242static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) 271static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
243{ 272{
244 char filename[PATH_MAX]; 273 char filename[PATH_MAX];
@@ -297,6 +326,26 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi)
297 return err; 326 return err;
298} 327}
299 328
329static int build_id_cache__show_all(void)
330{
331 struct strlist *bidlist;
332 struct str_node *nd;
333 char *buf;
334
335 bidlist = build_id_cache__list_all(true);
336 if (!bidlist) {
337 pr_debug("Failed to get buildids: -%d\n", errno);
338 return -1;
339 }
340 strlist__for_each_entry(nd, bidlist) {
341 buf = build_id_cache__origname(nd->s);
342 fprintf(stdout, "%s %s\n", nd->s, buf);
343 free(buf);
344 }
345 strlist__delete(bidlist);
346 return 0;
347}
348
300int cmd_buildid_cache(int argc, const char **argv) 349int cmd_buildid_cache(int argc, const char **argv)
301{ 350{
302 struct strlist *list; 351 struct strlist *list;
@@ -304,6 +353,9 @@ int cmd_buildid_cache(int argc, const char **argv)
304 int ret = 0; 353 int ret = 0;
305 int ns_id = -1; 354 int ns_id = -1;
306 bool force = false; 355 bool force = false;
356 bool list_files = false;
357 bool opts_flag = false;
358 bool purge_all = false;
307 char const *add_name_list_str = NULL, 359 char const *add_name_list_str = NULL,
308 *remove_name_list_str = NULL, 360 *remove_name_list_str = NULL,
309 *purge_name_list_str = NULL, 361 *purge_name_list_str = NULL,
@@ -327,6 +379,8 @@ int cmd_buildid_cache(int argc, const char **argv)
327 "file(s) to remove"), 379 "file(s) to remove"),
328 OPT_STRING('p', "purge", &purge_name_list_str, "file list", 380 OPT_STRING('p', "purge", &purge_name_list_str, "file list",
329 "file(s) to remove (remove old caches too)"), 381 "file(s) to remove (remove old caches too)"),
382 OPT_BOOLEAN('P', "purge-all", &purge_all, "purge all cached files"),
383 OPT_BOOLEAN('l', "list", &list_files, "list all cached files"),
330 OPT_STRING('M', "missing", &missing_filename, "file", 384 OPT_STRING('M', "missing", &missing_filename, "file",
331 "to find missing build ids in the cache"), 385 "to find missing build ids in the cache"),
332 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 386 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@@ -344,11 +398,20 @@ int cmd_buildid_cache(int argc, const char **argv)
344 argc = parse_options(argc, argv, buildid_cache_options, 398 argc = parse_options(argc, argv, buildid_cache_options,
345 buildid_cache_usage, 0); 399 buildid_cache_usage, 0);
346 400
347 if (argc || (!add_name_list_str && !kcore_filename && 401 opts_flag = add_name_list_str || kcore_filename ||
348 !remove_name_list_str && !purge_name_list_str && 402 remove_name_list_str || purge_name_list_str ||
349 !missing_filename && !update_name_list_str)) 403 missing_filename || update_name_list_str ||
404 purge_all;
405
406 if (argc || !(list_files || opts_flag))
350 usage_with_options(buildid_cache_usage, buildid_cache_options); 407 usage_with_options(buildid_cache_usage, buildid_cache_options);
351 408
409 /* -l is exclusive. It can not be used with other options. */
410 if (list_files && opts_flag) {
411 usage_with_options_msg(buildid_cache_usage,
412 buildid_cache_options, "-l is exclusive.\n");
413 }
414
352 if (ns_id > 0) 415 if (ns_id > 0)
353 nsi = nsinfo__new(ns_id); 416 nsi = nsinfo__new(ns_id);
354 417
@@ -366,6 +429,11 @@ int cmd_buildid_cache(int argc, const char **argv)
366 429
367 setup_pager(); 430 setup_pager();
368 431
432 if (list_files) {
433 ret = build_id_cache__show_all();
434 goto out;
435 }
436
369 if (add_name_list_str) { 437 if (add_name_list_str) {
370 list = strlist__new(add_name_list_str, NULL); 438 list = strlist__new(add_name_list_str, NULL);
371 if (list) { 439 if (list) {
@@ -420,6 +488,13 @@ int cmd_buildid_cache(int argc, const char **argv)
420 } 488 }
421 } 489 }
422 490
491 if (purge_all) {
492 if (build_id_cache__purge_all()) {
493 pr_warning("Couldn't remove some caches. Error: %s.\n",
494 str_error_r(errno, sbuf, sizeof(sbuf)));
495 }
496 }
497
423 if (missing_filename) 498 if (missing_filename)
424 ret = build_id_cache__fprintf_missing(session, stdout); 499 ret = build_id_cache__fprintf_missing(session, stdout);
425 500
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 2126bfbcb385..6a8738f7ead3 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -56,16 +56,16 @@ struct c2c_hist_entry {
56 56
57 struct compute_stats cstats; 57 struct compute_stats cstats;
58 58
59 unsigned long paddr;
60 unsigned long paddr_cnt;
61 bool paddr_zero;
62 char *nodestr;
63
59 /* 64 /*
60 * must be at the end, 65 * must be at the end,
61 * because of its callchain dynamic entry 66 * because of its callchain dynamic entry
62 */ 67 */
63 struct hist_entry he; 68 struct hist_entry he;
64
65 unsigned long paddr;
66 unsigned long paddr_cnt;
67 bool paddr_zero;
68 char *nodestr;
69}; 69};
70 70
71static char const *coalesce_default = "pid,iaddr"; 71static char const *coalesce_default = "pid,iaddr";
@@ -1976,7 +1976,7 @@ static int filter_cb(struct hist_entry *he)
1976 c2c_he = container_of(he, struct c2c_hist_entry, he); 1976 c2c_he = container_of(he, struct c2c_hist_entry, he);
1977 1977
1978 if (c2c.show_src && !he->srcline) 1978 if (c2c.show_src && !he->srcline)
1979 he->srcline = hist_entry__get_srcline(he); 1979 he->srcline = hist_entry__srcline(he);
1980 1980
1981 calc_width(c2c_he); 1981 calc_width(c2c_he);
1982 1982
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 40fe919bbcf3..a3b346359ba0 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -440,9 +440,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
440 goto repipe; 440 goto repipe;
441 } 441 }
442 442
443 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); 443 if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
444
445 if (al.map != NULL) {
446 if (!al.map->dso->hit) { 444 if (!al.map->dso->hit) {
447 al.map->dso->hit = 1; 445 al.map->dso->hit = 1;
448 if (map__load(al.map) >= 0) { 446 if (map__load(al.map) >= 0) {
diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c
index bcfb363112d3..90d1a2305b72 100644
--- a/tools/perf/builtin-kallsyms.c
+++ b/tools/perf/builtin-kallsyms.c
@@ -27,7 +27,7 @@ static int __cmd_kallsyms(int argc, const char **argv)
27 27
28 for (i = 0; i < argc; ++i) { 28 for (i = 0; i < argc; ++i) {
29 struct map *map; 29 struct map *map;
30 struct symbol *symbol = machine__find_kernel_function_by_name(machine, argv[i], &map); 30 struct symbol *symbol = machine__find_kernel_symbol_by_name(machine, argv[i], &map);
31 31
32 if (symbol == NULL) { 32 if (symbol == NULL) {
33 printf("%s: not found\n", argv[i]); 33 printf("%s: not found\n", argv[i]);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index ae11e4c3516a..54d3f21b0e62 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1004,7 +1004,7 @@ static void __print_slab_result(struct rb_root *root,
1004 if (is_caller) { 1004 if (is_caller) {
1005 addr = data->call_site; 1005 addr = data->call_site;
1006 if (!raw_ip) 1006 if (!raw_ip)
1007 sym = machine__find_kernel_function(machine, addr, &map); 1007 sym = machine__find_kernel_symbol(machine, addr, &map);
1008 } else 1008 } else
1009 addr = data->ptr; 1009 addr = data->ptr;
1010 1010
@@ -1068,7 +1068,7 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines)
1068 char *caller = buf; 1068 char *caller = buf;
1069 1069
1070 data = rb_entry(next, struct page_stat, node); 1070 data = rb_entry(next, struct page_stat, node);
1071 sym = machine__find_kernel_function(machine, data->callsite, &map); 1071 sym = machine__find_kernel_symbol(machine, data->callsite, &map);
1072 if (sym) 1072 if (sym)
1073 caller = sym->name; 1073 caller = sym->name;
1074 else 1074 else
@@ -1110,7 +1110,7 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines
1110 char *caller = buf; 1110 char *caller = buf;
1111 1111
1112 data = rb_entry(next, struct page_stat, node); 1112 data = rb_entry(next, struct page_stat, node);
1113 sym = machine__find_kernel_function(machine, data->callsite, &map); 1113 sym = machine__find_kernel_symbol(machine, data->callsite, &map);
1114 if (sym) 1114 if (sym)
1115 caller = sym->name; 1115 caller = sym->name;
1116 else 1116 else
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 72e2ca096bf5..2b1ef704169f 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1438,8 +1438,6 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
1438 goto out; 1438 goto out;
1439 } 1439 }
1440 1440
1441 symbol_conf.nr_events = kvm->evlist->nr_entries;
1442
1443 if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0) 1441 if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
1444 usage_with_options(live_usage, live_options); 1442 usage_with_options(live_usage, live_options);
1445 1443
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index c0065923a525..99de91698de1 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -81,8 +81,7 @@ static int parse_probe_event(const char *str)
81 params.target_used = true; 81 params.target_used = true;
82 } 82 }
83 83
84 if (params.nsi) 84 pev->nsi = nsinfo__get(params.nsi);
85 pev->nsi = nsinfo__get(params.nsi);
86 85
87 /* Parse a perf-probe command into event */ 86 /* Parse a perf-probe command into event */
88 ret = parse_perf_probe_command(str, pev); 87 ret = parse_perf_probe_command(str, pev);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 0f198f6d9b77..cdb5b6949832 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -71,6 +71,7 @@ struct report {
71 bool group_set; 71 bool group_set;
72 int max_stack; 72 int max_stack;
73 struct perf_read_values show_threads_values; 73 struct perf_read_values show_threads_values;
74 struct annotation_options annotation_opts;
74 const char *pretty_printing_style; 75 const char *pretty_printing_style;
75 const char *cpu_list; 76 const char *cpu_list;
76 const char *symbol_filter_str; 77 const char *symbol_filter_str;
@@ -136,26 +137,25 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
136 137
137 if (sort__mode == SORT_MODE__BRANCH) { 138 if (sort__mode == SORT_MODE__BRANCH) {
138 bi = he->branch_info; 139 bi = he->branch_info;
139 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); 140 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
140 if (err) 141 if (err)
141 goto out; 142 goto out;
142 143
143 err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); 144 err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
144 145
145 } else if (rep->mem_mode) { 146 } else if (rep->mem_mode) {
146 mi = he->mem_info; 147 mi = he->mem_info;
147 err = addr_map_symbol__inc_samples(&mi->daddr, sample, evsel->idx); 148 err = addr_map_symbol__inc_samples(&mi->daddr, sample, evsel);
148 if (err) 149 if (err)
149 goto out; 150 goto out;
150 151
151 err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr); 152 err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
152 153
153 } else if (symbol_conf.cumulate_callchain) { 154 } else if (symbol_conf.cumulate_callchain) {
154 if (single) 155 if (single)
155 err = hist_entry__inc_addr_samples(he, sample, evsel->idx, 156 err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
156 al->addr);
157 } else { 157 } else {
158 err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr); 158 err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
159 } 159 }
160 160
161out: 161out:
@@ -181,11 +181,11 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter,
181 rep->nonany_branch_mode); 181 rep->nonany_branch_mode);
182 182
183 bi = he->branch_info; 183 bi = he->branch_info;
184 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); 184 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
185 if (err) 185 if (err)
186 goto out; 186 goto out;
187 187
188 err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); 188 err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
189 189
190 branch_type_count(&rep->brtype_stat, &bi->flags, 190 branch_type_count(&rep->brtype_stat, &bi->flags,
191 bi->from.addr, bi->to.addr); 191 bi->from.addr, bi->to.addr);
@@ -194,20 +194,11 @@ out:
194 return err; 194 return err;
195} 195}
196 196
197/*
198 * Events in data file are not collect in groups, but we still want
199 * the group display. Set the artificial group and set the leader's
200 * forced_leader flag to notify the display code.
201 */
202static void setup_forced_leader(struct report *report, 197static void setup_forced_leader(struct report *report,
203 struct perf_evlist *evlist) 198 struct perf_evlist *evlist)
204{ 199{
205 if (report->group_set && !evlist->nr_groups) { 200 if (report->group_set)
206 struct perf_evsel *leader = perf_evlist__first(evlist); 201 perf_evlist__force_leader(evlist);
207
208 perf_evlist__set_leader(evlist);
209 leader->forced_leader = true;
210 }
211} 202}
212 203
213static int process_feature_event(struct perf_tool *tool, 204static int process_feature_event(struct perf_tool *tool,
@@ -523,12 +514,9 @@ static void report__warn_kptr_restrict(const struct report *rep)
523 "As no suitable kallsyms nor vmlinux was found, kernel samples\n" 514 "As no suitable kallsyms nor vmlinux was found, kernel samples\n"
524 "can't be resolved."; 515 "can't be resolved.";
525 516
526 if (kernel_map) { 517 if (kernel_map && map__has_symbols(kernel_map)) {
527 const struct dso *kdso = kernel_map->dso; 518 desc = "If some relocation was applied (e.g. "
528 if (!RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION])) { 519 "kexec) symbols may be misresolved.";
529 desc = "If some relocation was applied (e.g. "
530 "kexec) symbols may be misresolved.";
531 }
532 } 520 }
533 521
534 ui__warning( 522 ui__warning(
@@ -573,7 +561,7 @@ static int report__browse_hists(struct report *rep)
573 ret = perf_evlist__tui_browse_hists(evlist, help, NULL, 561 ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
574 rep->min_percent, 562 rep->min_percent,
575 &session->header.env, 563 &session->header.env,
576 true); 564 true, &rep->annotation_opts);
577 /* 565 /*
578 * Usually "ret" is the last pressed key, and we only 566 * Usually "ret" is the last pressed key, and we only
579 * care if the key notifies us to switch data file. 567 * care if the key notifies us to switch data file.
@@ -718,10 +706,7 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp)
718 706
719static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp) 707static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp)
720{ 708{
721 int printed = 0, i; 709 return maps__fprintf_task(&mg->maps, indent, fp);
722 for (i = 0; i < MAP__NR_TYPES; ++i)
723 printed += maps__fprintf_task(&mg->maps[i], indent, fp);
724 return printed;
725} 710}
726 711
727static void task__print_level(struct task *task, FILE *fp, int level) 712static void task__print_level(struct task *task, FILE *fp, int level)
@@ -961,12 +946,6 @@ parse_percent_limit(const struct option *opt, const char *str,
961 return 0; 946 return 0;
962} 947}
963 948
964#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
965
966const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
967 CALLCHAIN_REPORT_HELP
968 "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT;
969
970int cmd_report(int argc, const char **argv) 949int cmd_report(int argc, const char **argv)
971{ 950{
972 struct perf_session *session; 951 struct perf_session *session;
@@ -975,6 +954,10 @@ int cmd_report(int argc, const char **argv)
975 bool has_br_stack = false; 954 bool has_br_stack = false;
976 int branch_mode = -1; 955 int branch_mode = -1;
977 bool branch_call_mode = false; 956 bool branch_call_mode = false;
957#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
958 const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
959 CALLCHAIN_REPORT_HELP
960 "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT;
978 char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; 961 char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
979 const char * const report_usage[] = { 962 const char * const report_usage[] = {
980 "perf report [<options>]", 963 "perf report [<options>]",
@@ -1004,6 +987,7 @@ int cmd_report(int argc, const char **argv)
1004 .max_stack = PERF_MAX_STACK_DEPTH, 987 .max_stack = PERF_MAX_STACK_DEPTH,
1005 .pretty_printing_style = "normal", 988 .pretty_printing_style = "normal",
1006 .socket_filter = -1, 989 .socket_filter = -1,
990 .annotation_opts = annotation__default_options,
1007 }; 991 };
1008 const struct option options[] = { 992 const struct option options[] = {
1009 OPT_STRING('i', "input", &input_name, "file", 993 OPT_STRING('i', "input", &input_name, "file",
@@ -1093,11 +1077,11 @@ int cmd_report(int argc, const char **argv)
1093 "list of cpus to profile"), 1077 "list of cpus to profile"),
1094 OPT_BOOLEAN('I', "show-info", &report.show_full_info, 1078 OPT_BOOLEAN('I', "show-info", &report.show_full_info,
1095 "Display extended information about perf.data file"), 1079 "Display extended information about perf.data file"),
1096 OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, 1080 OPT_BOOLEAN(0, "source", &report.annotation_opts.annotate_src,
1097 "Interleave source code with assembly code (default)"), 1081 "Interleave source code with assembly code (default)"),
1098 OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, 1082 OPT_BOOLEAN(0, "asm-raw", &report.annotation_opts.show_asm_raw,
1099 "Display raw encoding of assembly instructions (default)"), 1083 "Display raw encoding of assembly instructions (default)"),
1100 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 1084 OPT_STRING('M', "disassembler-style", &report.annotation_opts.disassembler_style, "disassembler style",
1101 "Specify disassembler style (e.g. -M intel for intel syntax)"), 1085 "Specify disassembler style (e.g. -M intel for intel syntax)"),
1102 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, 1086 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
1103 "Show a column with the sum of periods"), 1087 "Show a column with the sum of periods"),
@@ -1108,7 +1092,7 @@ int cmd_report(int argc, const char **argv)
1108 parse_branch_mode), 1092 parse_branch_mode),
1109 OPT_BOOLEAN(0, "branch-history", &branch_call_mode, 1093 OPT_BOOLEAN(0, "branch-history", &branch_call_mode,
1110 "add last branch records to call history"), 1094 "add last branch records to call history"),
1111 OPT_STRING(0, "objdump", &objdump_path, "path", 1095 OPT_STRING(0, "objdump", &report.annotation_opts.objdump_path, "path",
1112 "objdump binary to use for disassembly and annotations"), 1096 "objdump binary to use for disassembly and annotations"),
1113 OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, 1097 OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
1114 "Disable symbol demangling"), 1098 "Disable symbol demangling"),
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 4dfdee668b0c..cbf39dab19c1 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -2143,7 +2143,7 @@ static void save_task_callchain(struct perf_sched *sched,
2143 return; 2143 return;
2144 } 2144 }
2145 2145
2146 if (!symbol_conf.use_callchain || sample->callchain == NULL) 2146 if (!sched->show_callchain || sample->callchain == NULL)
2147 return; 2147 return;
2148 2148
2149 if (thread__resolve_callchain(thread, cursor, evsel, sample, 2149 if (thread__resolve_callchain(thread, cursor, evsel, sample,
@@ -2271,10 +2271,11 @@ static struct thread *get_idle_thread(int cpu)
2271 return idle_threads[cpu]; 2271 return idle_threads[cpu];
2272} 2272}
2273 2273
2274static void save_idle_callchain(struct idle_thread_runtime *itr, 2274static void save_idle_callchain(struct perf_sched *sched,
2275 struct idle_thread_runtime *itr,
2275 struct perf_sample *sample) 2276 struct perf_sample *sample)
2276{ 2277{
2277 if (!symbol_conf.use_callchain || sample->callchain == NULL) 2278 if (!sched->show_callchain || sample->callchain == NULL)
2278 return; 2279 return;
2279 2280
2280 callchain_cursor__copy(&itr->cursor, &callchain_cursor); 2281 callchain_cursor__copy(&itr->cursor, &callchain_cursor);
@@ -2320,7 +2321,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched,
2320 2321
2321 /* copy task callchain when entering to idle */ 2322 /* copy task callchain when entering to idle */
2322 if (perf_evsel__intval(evsel, sample, "next_pid") == 0) 2323 if (perf_evsel__intval(evsel, sample, "next_pid") == 0)
2323 save_idle_callchain(itr, sample); 2324 save_idle_callchain(sched, itr, sample);
2324 } 2325 }
2325 } 2326 }
2326 2327
@@ -2849,7 +2850,7 @@ static void timehist_print_summary(struct perf_sched *sched,
2849 printf(" CPU %2d idle entire time window\n", i); 2850 printf(" CPU %2d idle entire time window\n", i);
2850 } 2851 }
2851 2852
2852 if (sched->idle_hist && symbol_conf.use_callchain) { 2853 if (sched->idle_hist && sched->show_callchain) {
2853 callchain_param.mode = CHAIN_FOLDED; 2854 callchain_param.mode = CHAIN_FOLDED;
2854 callchain_param.value = CCVAL_PERIOD; 2855 callchain_param.value = CCVAL_PERIOD;
2855 2856
@@ -2933,8 +2934,7 @@ static int timehist_check_attr(struct perf_sched *sched,
2933 return -1; 2934 return -1;
2934 } 2935 }
2935 2936
2936 if (sched->show_callchain && 2937 if (sched->show_callchain && !evsel__has_callchain(evsel)) {
2937 !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) {
2938 pr_info("Samples do not have callchains.\n"); 2938 pr_info("Samples do not have callchains.\n");
2939 sched->show_callchain = 0; 2939 sched->show_callchain = 0;
2940 symbol_conf.use_callchain = 0; 2940 symbol_conf.use_callchain = 0;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index e0a9845b6cbc..a31d7082188e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -153,8 +153,8 @@ static struct {
153 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | 153 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
154 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | 154 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
155 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | 155 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
156 PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | 156 PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
157 PERF_OUTPUT_PERIOD, 157 PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
158 158
159 .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, 159 .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
160 }, 160 },
@@ -165,8 +165,9 @@ static struct {
165 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | 165 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
166 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | 166 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
167 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | 167 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
168 PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | 168 PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
169 PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT, 169 PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
170 PERF_OUTPUT_BPF_OUTPUT,
170 171
171 .invalid_fields = PERF_OUTPUT_TRACE, 172 .invalid_fields = PERF_OUTPUT_TRACE,
172 }, 173 },
@@ -179,16 +180,28 @@ static struct {
179 PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE 180 PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE
180 }, 181 },
181 182
183 [PERF_TYPE_HW_CACHE] = {
184 .user_set = false,
185
186 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
187 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
188 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
189 PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
190 PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
191
192 .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
193 },
194
182 [PERF_TYPE_RAW] = { 195 [PERF_TYPE_RAW] = {
183 .user_set = false, 196 .user_set = false,
184 197
185 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | 198 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
186 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | 199 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
187 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | 200 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
188 PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | 201 PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
189 PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | 202 PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
190 PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT | 203 PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC |
191 PERF_OUTPUT_PHYS_ADDR, 204 PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR,
192 205
193 .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, 206 .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
194 }, 207 },
@@ -199,8 +212,8 @@ static struct {
199 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | 212 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
200 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | 213 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
201 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | 214 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
202 PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | 215 PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
203 PERF_OUTPUT_PERIOD, 216 PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
204 217
205 .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, 218 .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
206 }, 219 },
@@ -211,8 +224,8 @@ static struct {
211 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | 224 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
212 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | 225 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
213 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | 226 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
214 PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | 227 PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
215 PERF_OUTPUT_SYNTH, 228 PERF_OUTPUT_DSO | PERF_OUTPUT_SYNTH,
216 229
217 .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, 230 .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
218 }, 231 },
@@ -516,7 +529,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
516 529
517 evlist__for_each_entry(session->evlist, evsel) { 530 evlist__for_each_entry(session->evlist, evsel) {
518 not_pipe = true; 531 not_pipe = true;
519 if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { 532 if (evsel__has_callchain(evsel)) {
520 use_callchain = true; 533 use_callchain = true;
521 break; 534 break;
522 } 535 }
@@ -531,21 +544,18 @@ static int perf_session__check_output_opt(struct perf_session *session)
531 */ 544 */
532 if (symbol_conf.use_callchain && 545 if (symbol_conf.use_callchain &&
533 !output[PERF_TYPE_TRACEPOINT].user_set) { 546 !output[PERF_TYPE_TRACEPOINT].user_set) {
534 struct perf_event_attr *attr;
535
536 j = PERF_TYPE_TRACEPOINT; 547 j = PERF_TYPE_TRACEPOINT;
537 548
538 evlist__for_each_entry(session->evlist, evsel) { 549 evlist__for_each_entry(session->evlist, evsel) {
539 if (evsel->attr.type != j) 550 if (evsel->attr.type != j)
540 continue; 551 continue;
541 552
542 attr = &evsel->attr; 553 if (evsel__has_callchain(evsel)) {
543
544 if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
545 output[j].fields |= PERF_OUTPUT_IP; 554 output[j].fields |= PERF_OUTPUT_IP;
546 output[j].fields |= PERF_OUTPUT_SYM; 555 output[j].fields |= PERF_OUTPUT_SYM;
556 output[j].fields |= PERF_OUTPUT_SYMOFFSET;
547 output[j].fields |= PERF_OUTPUT_DSO; 557 output[j].fields |= PERF_OUTPUT_DSO;
548 set_print_ip_opts(attr); 558 set_print_ip_opts(&evsel->attr);
549 goto out; 559 goto out;
550 } 560 }
551 } 561 }
@@ -608,7 +618,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
608 if (PRINT_FIELD(COMM)) { 618 if (PRINT_FIELD(COMM)) {
609 if (latency_format) 619 if (latency_format)
610 printed += fprintf(fp, "%8.8s ", thread__comm_str(thread)); 620 printed += fprintf(fp, "%8.8s ", thread__comm_str(thread));
611 else if (PRINT_FIELD(IP) && symbol_conf.use_callchain) 621 else if (PRINT_FIELD(IP) && evsel__has_callchain(evsel) && symbol_conf.use_callchain)
612 printed += fprintf(fp, "%s ", thread__comm_str(thread)); 622 printed += fprintf(fp, "%s ", thread__comm_str(thread));
613 else 623 else
614 printed += fprintf(fp, "%16s ", thread__comm_str(thread)); 624 printed += fprintf(fp, "%16s ", thread__comm_str(thread));
@@ -717,8 +727,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
717 if (PRINT_FIELD(DSO)) { 727 if (PRINT_FIELD(DSO)) {
718 memset(&alf, 0, sizeof(alf)); 728 memset(&alf, 0, sizeof(alf));
719 memset(&alt, 0, sizeof(alt)); 729 memset(&alt, 0, sizeof(alt));
720 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); 730 thread__find_map(thread, sample->cpumode, from, &alf);
721 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); 731 thread__find_map(thread, sample->cpumode, to, &alt);
722 } 732 }
723 733
724 printed += fprintf(fp, " 0x%"PRIx64, from); 734 printed += fprintf(fp, " 0x%"PRIx64, from);
@@ -764,13 +774,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
764 from = br->entries[i].from; 774 from = br->entries[i].from;
765 to = br->entries[i].to; 775 to = br->entries[i].to;
766 776
767 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); 777 thread__find_symbol(thread, sample->cpumode, from, &alf);
768 if (alf.map) 778 thread__find_symbol(thread, sample->cpumode, to, &alt);
769 alf.sym = map__find_symbol(alf.map, alf.addr);
770
771 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
772 if (alt.map)
773 alt.sym = map__find_symbol(alt.map, alt.addr);
774 779
775 printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp); 780 printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp);
776 if (PRINT_FIELD(DSO)) { 781 if (PRINT_FIELD(DSO)) {
@@ -814,12 +819,12 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
814 from = br->entries[i].from; 819 from = br->entries[i].from;
815 to = br->entries[i].to; 820 to = br->entries[i].to;
816 821
817 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); 822 if (thread__find_map(thread, sample->cpumode, from, &alf) &&
818 if (alf.map && !alf.map->dso->adjust_symbols) 823 !alf.map->dso->adjust_symbols)
819 from = map__map_ip(alf.map, from); 824 from = map__map_ip(alf.map, from);
820 825
821 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); 826 if (thread__find_map(thread, sample->cpumode, to, &alt) &&
822 if (alt.map && !alt.map->dso->adjust_symbols) 827 !alt.map->dso->adjust_symbols)
823 to = map__map_ip(alt.map, to); 828 to = map__map_ip(alt.map, to);
824 829
825 printed += fprintf(fp, " 0x%"PRIx64, from); 830 printed += fprintf(fp, " 0x%"PRIx64, from);
@@ -882,8 +887,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
882 return 0; 887 return 0;
883 } 888 }
884 889
885 thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al); 890 if (!thread__find_map(thread, *cpumode, start, &al) || !al.map->dso) {
886 if (!al.map || !al.map->dso) {
887 pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); 891 pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
888 return 0; 892 return 0;
889 } 893 }
@@ -933,10 +937,8 @@ static int ip__fprintf_sym(uint64_t addr, struct thread *thread,
933 937
934 memset(&al, 0, sizeof(al)); 938 memset(&al, 0, sizeof(al));
935 939
936 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); 940 thread__find_map(thread, cpumode, addr, &al);
937 if (!al.map) 941
938 thread__find_addr_map(thread, cpumode, MAP__VARIABLE,
939 addr, &al);
940 if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end) 942 if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end)
941 return 0; 943 return 0;
942 944
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f17dc601b0f3..22547a490e1f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,6 +65,7 @@
65#include "util/tool.h" 65#include "util/tool.h"
66#include "util/string2.h" 66#include "util/string2.h"
67#include "util/metricgroup.h" 67#include "util/metricgroup.h"
68#include "util/top.h"
68#include "asm/bug.h" 69#include "asm/bug.h"
69 70
70#include <linux/time64.h> 71#include <linux/time64.h>
@@ -80,6 +81,9 @@
80#include <sys/stat.h> 81#include <sys/stat.h>
81#include <sys/wait.h> 82#include <sys/wait.h>
82#include <unistd.h> 83#include <unistd.h>
84#include <sys/time.h>
85#include <sys/resource.h>
86#include <sys/wait.h>
83 87
84#include "sane_ctype.h" 88#include "sane_ctype.h"
85 89
@@ -141,6 +145,8 @@ static struct target target = {
141 145
142typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 146typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
143 147
148#define METRIC_ONLY_LEN 20
149
144static int run_count = 1; 150static int run_count = 1;
145static bool no_inherit = false; 151static bool no_inherit = false;
146static volatile pid_t child_pid = -1; 152static volatile pid_t child_pid = -1;
@@ -164,15 +170,21 @@ static bool forever = false;
164static bool metric_only = false; 170static bool metric_only = false;
165static bool force_metric_only = false; 171static bool force_metric_only = false;
166static bool no_merge = false; 172static bool no_merge = false;
173static bool walltime_run_table = false;
167static struct timespec ref_time; 174static struct timespec ref_time;
168static struct cpu_map *aggr_map; 175static struct cpu_map *aggr_map;
169static aggr_get_id_t aggr_get_id; 176static aggr_get_id_t aggr_get_id;
170static bool append_file; 177static bool append_file;
171static bool interval_count; 178static bool interval_count;
179static bool interval_clear;
172static const char *output_name; 180static const char *output_name;
173static int output_fd; 181static int output_fd;
174static int print_free_counters_hint; 182static int print_free_counters_hint;
175static int print_mixed_hw_group_error; 183static int print_mixed_hw_group_error;
184static u64 *walltime_run;
185static bool ru_display = false;
186static struct rusage ru_data;
187static unsigned int metric_only_len = METRIC_ONLY_LEN;
176 188
177struct perf_stat { 189struct perf_stat {
178 bool record; 190 bool record;
@@ -569,7 +581,7 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
569 return leader; 581 return leader;
570} 582}
571 583
572static int __run_perf_stat(int argc, const char **argv) 584static int __run_perf_stat(int argc, const char **argv, int run_idx)
573{ 585{
574 int interval = stat_config.interval; 586 int interval = stat_config.interval;
575 int times = stat_config.times; 587 int times = stat_config.times;
@@ -724,7 +736,7 @@ try_again:
724 break; 736 break;
725 } 737 }
726 } 738 }
727 waitpid(child_pid, &status, 0); 739 wait4(child_pid, &status, 0, &ru_data);
728 740
729 if (workload_exec_errno) { 741 if (workload_exec_errno) {
730 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 742 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
@@ -752,6 +764,9 @@ try_again:
752 764
753 t1 = rdclock(); 765 t1 = rdclock();
754 766
767 if (walltime_run_table)
768 walltime_run[run_idx] = t1 - t0;
769
755 update_stats(&walltime_nsecs_stats, t1 - t0); 770 update_stats(&walltime_nsecs_stats, t1 - t0);
756 771
757 /* 772 /*
@@ -766,7 +781,7 @@ try_again:
766 return WEXITSTATUS(status); 781 return WEXITSTATUS(status);
767} 782}
768 783
769static int run_perf_stat(int argc, const char **argv) 784static int run_perf_stat(int argc, const char **argv, int run_idx)
770{ 785{
771 int ret; 786 int ret;
772 787
@@ -779,7 +794,7 @@ static int run_perf_stat(int argc, const char **argv)
779 if (sync_run) 794 if (sync_run)
780 sync(); 795 sync();
781 796
782 ret = __run_perf_stat(argc, argv); 797 ret = __run_perf_stat(argc, argv, run_idx);
783 if (ret) 798 if (ret)
784 return ret; 799 return ret;
785 800
@@ -957,8 +972,6 @@ static void print_metric_csv(void *ctx,
957 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 972 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
958} 973}
959 974
960#define METRIC_ONLY_LEN 20
961
962/* Filter out some columns that don't work well in metrics only mode */ 975/* Filter out some columns that don't work well in metrics only mode */
963 976
964static bool valid_only_metric(const char *unit) 977static bool valid_only_metric(const char *unit)
@@ -989,22 +1002,20 @@ static void print_metric_only(void *ctx, const char *color, const char *fmt,
989{ 1002{
990 struct outstate *os = ctx; 1003 struct outstate *os = ctx;
991 FILE *out = os->fh; 1004 FILE *out = os->fh;
992 int n; 1005 char buf[1024], str[1024];
993 char buf[1024]; 1006 unsigned mlen = metric_only_len;
994 unsigned mlen = METRIC_ONLY_LEN;
995 1007
996 if (!valid_only_metric(unit)) 1008 if (!valid_only_metric(unit))
997 return; 1009 return;
998 unit = fixunit(buf, os->evsel, unit); 1010 unit = fixunit(buf, os->evsel, unit);
999 if (color)
1000 n = color_fprintf(out, color, fmt, val);
1001 else
1002 n = fprintf(out, fmt, val);
1003 if (n > METRIC_ONLY_LEN)
1004 n = METRIC_ONLY_LEN;
1005 if (mlen < strlen(unit)) 1011 if (mlen < strlen(unit))
1006 mlen = strlen(unit) + 1; 1012 mlen = strlen(unit) + 1;
1007 fprintf(out, "%*s", mlen - n, ""); 1013
1014 if (color)
1015 mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
1016
1017 color_snprintf(str, sizeof(str), color ?: "", fmt, val);
1018 fprintf(out, "%*s ", mlen, str);
1008} 1019}
1009 1020
1010static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 1021static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
@@ -1044,7 +1055,7 @@ static void print_metric_header(void *ctx, const char *color __maybe_unused,
1044 if (csv_output) 1055 if (csv_output)
1045 fprintf(os->fh, "%s%s", unit, csv_sep); 1056 fprintf(os->fh, "%s%s", unit, csv_sep);
1046 else 1057 else
1047 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 1058 fprintf(os->fh, "%*s ", metric_only_len, unit);
1048} 1059}
1049 1060
1050static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1061static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
@@ -1694,9 +1705,12 @@ static void print_interval(char *prefix, struct timespec *ts)
1694 FILE *output = stat_config.output; 1705 FILE *output = stat_config.output;
1695 static int num_print_interval; 1706 static int num_print_interval;
1696 1707
1708 if (interval_clear)
1709 puts(CONSOLE_CLEAR);
1710
1697 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1711 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
1698 1712
1699 if (num_print_interval == 0 && !csv_output) { 1713 if ((num_print_interval == 0 && !csv_output) || interval_clear) {
1700 switch (stat_config.aggr_mode) { 1714 switch (stat_config.aggr_mode) {
1701 case AGGR_SOCKET: 1715 case AGGR_SOCKET:
1702 fprintf(output, "# time socket cpus"); 1716 fprintf(output, "# time socket cpus");
@@ -1709,7 +1723,7 @@ static void print_interval(char *prefix, struct timespec *ts)
1709 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1723 fprintf(output, " counts %*s events\n", unit_width, "unit");
1710 break; 1724 break;
1711 case AGGR_NONE: 1725 case AGGR_NONE:
1712 fprintf(output, "# time CPU"); 1726 fprintf(output, "# time CPU ");
1713 if (!metric_only) 1727 if (!metric_only)
1714 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1728 fprintf(output, " counts %*s events\n", unit_width, "unit");
1715 break; 1729 break;
@@ -1728,7 +1742,7 @@ static void print_interval(char *prefix, struct timespec *ts)
1728 } 1742 }
1729 } 1743 }
1730 1744
1731 if (num_print_interval == 0 && metric_only) 1745 if ((num_print_interval == 0 && metric_only) || interval_clear)
1732 print_metric_headers(" ", true); 1746 print_metric_headers(" ", true);
1733 if (++num_print_interval == 25) 1747 if (++num_print_interval == 25)
1734 num_print_interval = 0; 1748 num_print_interval = 0;
@@ -1764,19 +1778,81 @@ static void print_header(int argc, const char **argv)
1764 } 1778 }
1765} 1779}
1766 1780
1781static int get_precision(double num)
1782{
1783 if (num > 1)
1784 return 0;
1785
1786 return lround(ceil(-log10(num)));
1787}
1788
1789static void print_table(FILE *output, int precision, double avg)
1790{
1791 char tmp[64];
1792 int idx, indent = 0;
1793
1794 scnprintf(tmp, 64, " %17.*f", precision, avg);
1795 while (tmp[indent] == ' ')
1796 indent++;
1797
1798 fprintf(output, "%*s# Table of individual measurements:\n", indent, "");
1799
1800 for (idx = 0; idx < run_count; idx++) {
1801 double run = (double) walltime_run[idx] / NSEC_PER_SEC;
1802 int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5);
1803
1804 fprintf(output, " %17.*f (%+.*f) ",
1805 precision, run, precision, run - avg);
1806
1807 for (h = 0; h < n; h++)
1808 fprintf(output, "#");
1809
1810 fprintf(output, "\n");
1811 }
1812
1813 fprintf(output, "\n%*s# Final result:\n", indent, "");
1814}
1815
1816static double timeval2double(struct timeval *t)
1817{
1818 return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC;
1819}
1820
1767static void print_footer(void) 1821static void print_footer(void)
1768{ 1822{
1823 double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
1769 FILE *output = stat_config.output; 1824 FILE *output = stat_config.output;
1770 int n; 1825 int n;
1771 1826
1772 if (!null_run) 1827 if (!null_run)
1773 fprintf(output, "\n"); 1828 fprintf(output, "\n");
1774 fprintf(output, " %17.9f seconds time elapsed", 1829
1775 avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC); 1830 if (run_count == 1) {
1776 if (run_count > 1) { 1831 fprintf(output, " %17.9f seconds time elapsed", avg);
1777 fprintf(output, " "); 1832
1778 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1833 if (ru_display) {
1779 avg_stats(&walltime_nsecs_stats)); 1834 double ru_utime = timeval2double(&ru_data.ru_utime);
1835 double ru_stime = timeval2double(&ru_data.ru_stime);
1836
1837 fprintf(output, "\n\n");
1838 fprintf(output, " %17.9f seconds user\n", ru_utime);
1839 fprintf(output, " %17.9f seconds sys\n", ru_stime);
1840 }
1841 } else {
1842 double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
1843 /*
1844 * Display at most 2 more significant
1845 * digits than the stddev inaccuracy.
1846 */
1847 int precision = get_precision(sd) + 2;
1848
1849 if (walltime_run_table)
1850 print_table(output, precision, avg);
1851
1852 fprintf(output, " %17.*f +- %.*f seconds time elapsed",
1853 precision, avg, precision, sd);
1854
1855 print_noise_pct(sd, avg);
1780 } 1856 }
1781 fprintf(output, "\n\n"); 1857 fprintf(output, "\n\n");
1782 1858
@@ -1952,6 +2028,8 @@ static const struct option stat_options[] = {
1952 "be more verbose (show counter open errors, etc)"), 2028 "be more verbose (show counter open errors, etc)"),
1953 OPT_INTEGER('r', "repeat", &run_count, 2029 OPT_INTEGER('r', "repeat", &run_count,
1954 "repeat command and print average + stddev (max: 100, forever: 0)"), 2030 "repeat command and print average + stddev (max: 100, forever: 0)"),
2031 OPT_BOOLEAN(0, "table", &walltime_run_table,
2032 "display details about each run (only with -r option)"),
1955 OPT_BOOLEAN('n', "null", &null_run, 2033 OPT_BOOLEAN('n', "null", &null_run,
1956 "null run - dont start any counters"), 2034 "null run - dont start any counters"),
1957 OPT_INCR('d', "detailed", &detailed_run, 2035 OPT_INCR('d', "detailed", &detailed_run,
@@ -1983,6 +2061,8 @@ static const struct option stat_options[] = {
1983 "(overhead is possible for values <= 100ms)"), 2061 "(overhead is possible for values <= 100ms)"),
1984 OPT_INTEGER(0, "interval-count", &stat_config.times, 2062 OPT_INTEGER(0, "interval-count", &stat_config.times,
1985 "print counts for fixed number of times"), 2063 "print counts for fixed number of times"),
2064 OPT_BOOLEAN(0, "interval-clear", &interval_clear,
2065 "clear screen in between new interval"),
1986 OPT_UINTEGER(0, "timeout", &stat_config.timeout, 2066 OPT_UINTEGER(0, "timeout", &stat_config.timeout,
1987 "stop workload and print counts after a timeout period in ms (>= 10ms)"), 2067 "stop workload and print counts after a timeout period in ms (>= 10ms)"),
1988 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 2068 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
@@ -2362,14 +2442,13 @@ static int add_default_attributes(void)
2362 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2442 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
2363 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2443 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
2364}; 2444};
2445 struct parse_events_error errinfo;
2365 2446
2366 /* Set attrs if no event is selected and !null_run: */ 2447 /* Set attrs if no event is selected and !null_run: */
2367 if (null_run) 2448 if (null_run)
2368 return 0; 2449 return 0;
2369 2450
2370 if (transaction_run) { 2451 if (transaction_run) {
2371 struct parse_events_error errinfo;
2372
2373 if (pmu_have_event("cpu", "cycles-ct") && 2452 if (pmu_have_event("cpu", "cycles-ct") &&
2374 pmu_have_event("cpu", "el-start")) 2453 pmu_have_event("cpu", "el-start"))
2375 err = parse_events(evsel_list, transaction_attrs, 2454 err = parse_events(evsel_list, transaction_attrs,
@@ -2380,6 +2459,7 @@ static int add_default_attributes(void)
2380 &errinfo); 2459 &errinfo);
2381 if (err) { 2460 if (err) {
2382 fprintf(stderr, "Cannot set up transaction events\n"); 2461 fprintf(stderr, "Cannot set up transaction events\n");
2462 parse_events_print_error(&errinfo, transaction_attrs);
2383 return -1; 2463 return -1;
2384 } 2464 }
2385 return 0; 2465 return 0;
@@ -2405,10 +2485,11 @@ static int add_default_attributes(void)
2405 pmu_have_event("msr", "smi")) { 2485 pmu_have_event("msr", "smi")) {
2406 if (!force_metric_only) 2486 if (!force_metric_only)
2407 metric_only = true; 2487 metric_only = true;
2408 err = parse_events(evsel_list, smi_cost_attrs, NULL); 2488 err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
2409 } else { 2489 } else {
2410 fprintf(stderr, "To measure SMI cost, it needs " 2490 fprintf(stderr, "To measure SMI cost, it needs "
2411 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); 2491 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
2492 parse_events_print_error(&errinfo, smi_cost_attrs);
2412 return -1; 2493 return -1;
2413 } 2494 }
2414 if (err) { 2495 if (err) {
@@ -2443,12 +2524,13 @@ static int add_default_attributes(void)
2443 if (topdown_attrs[0] && str) { 2524 if (topdown_attrs[0] && str) {
2444 if (warn) 2525 if (warn)
2445 arch_topdown_group_warn(); 2526 arch_topdown_group_warn();
2446 err = parse_events(evsel_list, str, NULL); 2527 err = parse_events(evsel_list, str, &errinfo);
2447 if (err) { 2528 if (err) {
2448 fprintf(stderr, 2529 fprintf(stderr,
2449 "Cannot set up top down events %s: %d\n", 2530 "Cannot set up top down events %s: %d\n",
2450 str, err); 2531 str, err);
2451 free(str); 2532 free(str);
2533 parse_events_print_error(&errinfo, str);
2452 return -1; 2534 return -1;
2453 } 2535 }
2454 } else { 2536 } else {
@@ -2843,6 +2925,13 @@ int cmd_stat(int argc, const char **argv)
2843 goto out; 2925 goto out;
2844 } 2926 }
2845 2927
2928 if (walltime_run_table && run_count <= 1) {
2929 fprintf(stderr, "--table is only supported with -r\n");
2930 parse_options_usage(stat_usage, stat_options, "r", 1);
2931 parse_options_usage(NULL, stat_options, "table", 0);
2932 goto out;
2933 }
2934
2846 if (output_fd < 0) { 2935 if (output_fd < 0) {
2847 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2936 fprintf(stderr, "argument to --log-fd must be a > 0\n");
2848 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2937 parse_options_usage(stat_usage, stat_options, "log-fd", 0);
@@ -2888,6 +2977,13 @@ int cmd_stat(int argc, const char **argv)
2888 2977
2889 setup_system_wide(argc); 2978 setup_system_wide(argc);
2890 2979
2980 /*
2981 * Display user/system times only for single
2982 * run and when there's specified tracee.
2983 */
2984 if ((run_count == 1) && target__none(&target))
2985 ru_display = true;
2986
2891 if (run_count < 0) { 2987 if (run_count < 0) {
2892 pr_err("Run count must be a positive number\n"); 2988 pr_err("Run count must be a positive number\n");
2893 parse_options_usage(stat_usage, stat_options, "r", 1); 2989 parse_options_usage(stat_usage, stat_options, "r", 1);
@@ -2897,6 +2993,14 @@ int cmd_stat(int argc, const char **argv)
2897 run_count = 1; 2993 run_count = 1;
2898 } 2994 }
2899 2995
2996 if (walltime_run_table) {
2997 walltime_run = zalloc(run_count * sizeof(walltime_run[0]));
2998 if (!walltime_run) {
2999 pr_err("failed to setup -r option");
3000 goto out;
3001 }
3002 }
3003
2900 if ((stat_config.aggr_mode == AGGR_THREAD) && 3004 if ((stat_config.aggr_mode == AGGR_THREAD) &&
2901 !target__has_task(&target)) { 3005 !target__has_task(&target)) {
2902 if (!target.system_wide || target.cpu_list) { 3006 if (!target.system_wide || target.cpu_list) {
@@ -3012,7 +3116,7 @@ int cmd_stat(int argc, const char **argv)
3012 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 3116 fprintf(output, "[ perf stat: executing run #%d ... ]\n",
3013 run_idx + 1); 3117 run_idx + 1);
3014 3118
3015 status = run_perf_stat(argc, argv); 3119 status = run_perf_stat(argc, argv, run_idx);
3016 if (forever && status != -1) { 3120 if (forever && status != -1) {
3017 print_counters(NULL, argc, argv); 3121 print_counters(NULL, argc, argv);
3018 perf_stat__reset_stats(); 3122 perf_stat__reset_stats();
@@ -3060,6 +3164,8 @@ int cmd_stat(int argc, const char **argv)
3060 perf_stat__exit_aggr_mode(); 3164 perf_stat__exit_aggr_mode();
3061 perf_evlist__free_stats(evsel_list); 3165 perf_evlist__free_stats(evsel_list);
3062out: 3166out:
3167 free(walltime_run);
3168
3063 if (smi_cost && smi_reset) 3169 if (smi_cost && smi_reset)
3064 sysfs__write_int(FREEZE_ON_SMI_PATH, 0); 3170 sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
3065 3171
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 813698a9b8c7..a827919c6263 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -533,12 +533,8 @@ static const char *cat_backtrace(union perf_event *event,
533 } 533 }
534 534
535 tal.filtered = 0; 535 tal.filtered = 0;
536 thread__find_addr_location(al.thread, cpumode, 536 if (thread__find_symbol(al.thread, cpumode, ip, &tal))
537 MAP__FUNCTION, ip, &tal); 537 fprintf(f, "..... %016" PRIx64 " %s\n", ip, tal.sym->name);
538
539 if (tal.sym)
540 fprintf(f, "..... %016" PRIx64 " %s\n", ip,
541 tal.sym->name);
542 else 538 else
543 fprintf(f, "..... %016" PRIx64 "\n", ip); 539 fprintf(f, "..... %016" PRIx64 "\n", ip);
544 } 540 }
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index f39bd60d2708..ffdc2769ff9f 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -123,14 +123,9 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
123 } 123 }
124 124
125 notes = symbol__annotation(sym); 125 notes = symbol__annotation(sym);
126 if (notes->src != NULL) {
127 pthread_mutex_lock(&notes->lock);
128 goto out_assign;
129 }
130
131 pthread_mutex_lock(&notes->lock); 126 pthread_mutex_lock(&notes->lock);
132 127
133 if (symbol__alloc_hist(sym) < 0) { 128 if (!symbol__hists(sym, top->evlist->nr_entries)) {
134 pthread_mutex_unlock(&notes->lock); 129 pthread_mutex_unlock(&notes->lock);
135 pr_err("Not enough memory for annotating '%s' symbol!\n", 130 pr_err("Not enough memory for annotating '%s' symbol!\n",
136 sym->name); 131 sym->name);
@@ -138,9 +133,8 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
138 return err; 133 return err;
139 } 134 }
140 135
141 err = symbol__annotate(sym, map, evsel, 0, NULL); 136 err = symbol__annotate(sym, map, evsel, 0, &top->annotation_opts, NULL);
142 if (err == 0) { 137 if (err == 0) {
143out_assign:
144 top->sym_filter_entry = he; 138 top->sym_filter_entry = he;
145 } else { 139 } else {
146 char msg[BUFSIZ]; 140 char msg[BUFSIZ];
@@ -188,7 +182,7 @@ static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
188static void perf_top__record_precise_ip(struct perf_top *top, 182static void perf_top__record_precise_ip(struct perf_top *top,
189 struct hist_entry *he, 183 struct hist_entry *he,
190 struct perf_sample *sample, 184 struct perf_sample *sample,
191 int counter, u64 ip) 185 struct perf_evsel *evsel, u64 ip)
192{ 186{
193 struct annotation *notes; 187 struct annotation *notes;
194 struct symbol *sym = he->ms.sym; 188 struct symbol *sym = he->ms.sym;
@@ -204,7 +198,7 @@ static void perf_top__record_precise_ip(struct perf_top *top,
204 if (pthread_mutex_trylock(&notes->lock)) 198 if (pthread_mutex_trylock(&notes->lock))
205 return; 199 return;
206 200
207 err = hist_entry__inc_addr_samples(he, sample, counter, ip); 201 err = hist_entry__inc_addr_samples(he, sample, evsel, ip);
208 202
209 pthread_mutex_unlock(&notes->lock); 203 pthread_mutex_unlock(&notes->lock);
210 204
@@ -249,10 +243,9 @@ static void perf_top__show_details(struct perf_top *top)
249 goto out_unlock; 243 goto out_unlock;
250 244
251 printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name); 245 printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
252 printf(" Events Pcnt (>=%d%%)\n", top->sym_pcnt_filter); 246 printf(" Events Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt);
253 247
254 more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel, 248 more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel, &top->annotation_opts);
255 0, top->sym_pcnt_filter, top->print_entries, 4);
256 249
257 if (top->evlist->enabled) { 250 if (top->evlist->enabled) {
258 if (top->zero) 251 if (top->zero)
@@ -412,7 +405,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top)
412 405
413 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter); 406 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter);
414 407
415 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter); 408 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->annotation_opts.min_pcnt);
416 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 409 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
417 fprintf(stdout, "\t[S] stop annotation.\n"); 410 fprintf(stdout, "\t[S] stop annotation.\n");
418 411
@@ -515,7 +508,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
515 prompt_integer(&top->count_filter, "Enter display event count filter"); 508 prompt_integer(&top->count_filter, "Enter display event count filter");
516 break; 509 break;
517 case 'F': 510 case 'F':
518 prompt_percent(&top->sym_pcnt_filter, 511 prompt_percent(&top->annotation_opts.min_pcnt,
519 "Enter details display event filter (percent)"); 512 "Enter details display event filter (percent)");
520 break; 513 break;
521 case 'K': 514 case 'K':
@@ -613,7 +606,8 @@ static void *display_thread_tui(void *arg)
613 perf_evlist__tui_browse_hists(top->evlist, help, &hbt, 606 perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
614 top->min_percent, 607 top->min_percent,
615 &top->session->header.env, 608 &top->session->header.env,
616 !top->record_opts.overwrite); 609 !top->record_opts.overwrite,
610 &top->annotation_opts);
617 611
618 done = 1; 612 done = 1;
619 return NULL; 613 return NULL;
@@ -691,7 +685,7 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
691 struct perf_evsel *evsel = iter->evsel; 685 struct perf_evsel *evsel = iter->evsel;
692 686
693 if (perf_hpp_list.sym && single) 687 if (perf_hpp_list.sym && single)
694 perf_top__record_precise_ip(top, he, iter->sample, evsel->idx, al->addr); 688 perf_top__record_precise_ip(top, he, iter->sample, evsel, al->addr);
695 689
696 hist__account_cycles(iter->sample->branch_stack, al, iter->sample, 690 hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
697 !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY)); 691 !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
@@ -742,7 +736,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
742"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" 736"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
743"Check /proc/sys/kernel/kptr_restrict.\n\n" 737"Check /proc/sys/kernel/kptr_restrict.\n\n"
744"Kernel%s samples will not be resolved.\n", 738"Kernel%s samples will not be resolved.\n",
745 al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? 739 al.map && map__has_symbols(al.map) ?
746 " modules" : ""); 740 " modules" : "");
747 if (use_browser <= 0) 741 if (use_browser <= 0)
748 sleep(5); 742 sleep(5);
@@ -750,7 +744,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
750 machine->kptr_restrict_warned = true; 744 machine->kptr_restrict_warned = true;
751 } 745 }
752 746
753 if (al.sym == NULL) { 747 if (al.sym == NULL && al.map != NULL) {
754 const char *msg = "Kernel samples will not be resolved.\n"; 748 const char *msg = "Kernel samples will not be resolved.\n";
755 /* 749 /*
756 * As we do lazy loading of symtabs we only will know if the 750 * As we do lazy loading of symtabs we only will know if the
@@ -764,8 +758,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
764 * invalid --vmlinux ;-) 758 * invalid --vmlinux ;-)
765 */ 759 */
766 if (!machine->kptr_restrict_warned && !top->vmlinux_warned && 760 if (!machine->kptr_restrict_warned && !top->vmlinux_warned &&
767 al.map == machine->vmlinux_maps[MAP__FUNCTION] && 761 __map__is_kernel(al.map) && map__has_symbols(al.map)) {
768 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
769 if (symbol_conf.vmlinux_name) { 762 if (symbol_conf.vmlinux_name) {
770 char serr[256]; 763 char serr[256];
771 dso__strerror_load(al.map->dso, serr, sizeof(serr)); 764 dso__strerror_load(al.map->dso, serr, sizeof(serr));
@@ -1084,8 +1077,9 @@ static int __cmd_top(struct perf_top *top)
1084 if (top->session == NULL) 1077 if (top->session == NULL)
1085 return -1; 1078 return -1;
1086 1079
1087 if (!objdump_path) { 1080 if (!top->annotation_opts.objdump_path) {
1088 ret = perf_env__lookup_objdump(&top->session->header.env); 1081 ret = perf_env__lookup_objdump(&top->session->header.env,
1082 &top->annotation_opts.objdump_path);
1089 if (ret) 1083 if (ret)
1090 goto out_delete; 1084 goto out_delete;
1091 } 1085 }
@@ -1265,8 +1259,8 @@ int cmd_top(int argc, const char **argv)
1265 .proc_map_timeout = 500, 1259 .proc_map_timeout = 500,
1266 .overwrite = 1, 1260 .overwrite = 1,
1267 }, 1261 },
1268 .max_stack = sysctl_perf_event_max_stack, 1262 .max_stack = sysctl__max_stack(),
1269 .sym_pcnt_filter = 5, 1263 .annotation_opts = annotation__default_options,
1270 .nr_threads_synthesize = UINT_MAX, 1264 .nr_threads_synthesize = UINT_MAX,
1271 }; 1265 };
1272 struct record_opts *opts = &top.record_opts; 1266 struct record_opts *opts = &top.record_opts;
@@ -1348,15 +1342,15 @@ int cmd_top(int argc, const char **argv)
1348 "only consider symbols in these comms"), 1342 "only consider symbols in these comms"),
1349 OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", 1343 OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
1350 "only consider these symbols"), 1344 "only consider these symbols"),
1351 OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, 1345 OPT_BOOLEAN(0, "source", &top.annotation_opts.annotate_src,
1352 "Interleave source code with assembly code (default)"), 1346 "Interleave source code with assembly code (default)"),
1353 OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, 1347 OPT_BOOLEAN(0, "asm-raw", &top.annotation_opts.show_asm_raw,
1354 "Display raw encoding of assembly instructions (default)"), 1348 "Display raw encoding of assembly instructions (default)"),
1355 OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, 1349 OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
1356 "Enable kernel symbol demangling"), 1350 "Enable kernel symbol demangling"),
1357 OPT_STRING(0, "objdump", &objdump_path, "path", 1351 OPT_STRING(0, "objdump", &top.annotation_opts.objdump_path, "path",
1358 "objdump binary to use for disassembly and annotations"), 1352 "objdump binary to use for disassembly and annotations"),
1359 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 1353 OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style",
1360 "Specify disassembler style (e.g. -M intel for intel syntax)"), 1354 "Specify disassembler style (e.g. -M intel for intel syntax)"),
1361 OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"), 1355 OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
1362 OPT_CALLBACK(0, "percent-limit", &top, "percent", 1356 OPT_CALLBACK(0, "percent-limit", &top, "percent",
@@ -1392,6 +1386,9 @@ int cmd_top(int argc, const char **argv)
1392 if (status < 0) 1386 if (status < 0)
1393 return status; 1387 return status;
1394 1388
1389 top.annotation_opts.min_pcnt = 5;
1390 top.annotation_opts.context = 4;
1391
1395 top.evlist = perf_evlist__new(); 1392 top.evlist = perf_evlist__new();
1396 if (top.evlist == NULL) 1393 if (top.evlist == NULL)
1397 return -ENOMEM; 1394 return -ENOMEM;
@@ -1469,8 +1466,6 @@ int cmd_top(int argc, const char **argv)
1469 goto out_delete_evlist; 1466 goto out_delete_evlist;
1470 } 1467 }
1471 1468
1472 symbol_conf.nr_events = top.evlist->nr_entries;
1473
1474 if (top.delay_secs < 1) 1469 if (top.delay_secs < 1)
1475 top.delay_secs = 1; 1470 top.delay_secs = 1;
1476 1471
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 3ad17ee89403..6a748eca2edb 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2024,8 +2024,7 @@ static int trace__pgfault(struct trace *trace,
2024 if (trace->summary_only) 2024 if (trace->summary_only)
2025 goto out; 2025 goto out;
2026 2026
2027 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, 2027 thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
2028 sample->ip, &al);
2029 2028
2030 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); 2029 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
2031 2030
@@ -2037,12 +2036,10 @@ static int trace__pgfault(struct trace *trace,
2037 2036
2038 fprintf(trace->output, "] => "); 2037 fprintf(trace->output, "] => ");
2039 2038
2040 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE, 2039 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2041 sample->addr, &al);
2042 2040
2043 if (!al.map) { 2041 if (!al.map) {
2044 thread__find_addr_location(thread, sample->cpumode, 2042 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2045 MAP__FUNCTION, sample->addr, &al);
2046 2043
2047 if (al.map) 2044 if (al.map)
2048 map_type = 'x'; 2045 map_type = 'x';
@@ -2494,7 +2491,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
2494 * to override an explicitely set --max-stack global setting. 2491 * to override an explicitely set --max-stack global setting.
2495 */ 2492 */
2496 evlist__for_each_entry(evlist, evsel) { 2493 evlist__for_each_entry(evlist, evsel) {
2497 if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) && 2494 if (evsel__has_callchain(evsel) &&
2498 evsel->attr.sample_max_stack == 0) 2495 evsel->attr.sample_max_stack == 0)
2499 evsel->attr.sample_max_stack = trace->max_stack; 2496 evsel->attr.sample_max_stack = trace->max_stack;
2500 } 2497 }
@@ -3165,7 +3162,7 @@ int cmd_trace(int argc, const char **argv)
3165 mmap_pages_user_set = false; 3162 mmap_pages_user_set = false;
3166 3163
3167 if (trace.max_stack == UINT_MAX) { 3164 if (trace.max_stack == UINT_MAX) {
3168 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack; 3165 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
3169 max_stack_user_set = false; 3166 max_stack_user_set = false;
3170 } 3167 }
3171 3168
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 9aff89bc7535..10f333e2e825 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -55,22 +55,26 @@ include/uapi/asm-generic/ioctls.h
55include/uapi/asm-generic/mman-common.h 55include/uapi/asm-generic/mman-common.h
56' 56'
57 57
58check () { 58check_2 () {
59 file=$1 59 file1=$1
60 file2=$2
60 61
61 shift 62 shift
62 opts= 63 shift
63 while [ -n "$*" ]; do
64 opts="$opts \"$1\""
65 shift
66 done
67 64
68 cmd="diff $opts ../$file ../../$file > /dev/null" 65 cmd="diff $* $file1 $file2 > /dev/null"
69 66
70 test -f ../../$file && 67 test -f $file2 &&
71 eval $cmd || echo "Warning: Kernel ABI header at 'tools/$file' differs from latest version at '$file'" >&2 68 eval $cmd || echo "Warning: Kernel ABI header at 'tools/$file' differs from latest version at '$file'" >&2
72} 69}
73 70
71check () {
72 file=$1
73
74 shift
75
76 check_2 ../$file ../../$file $*
77}
74 78
75# Check if we have the kernel headers (tools/perf/../../include), else 79# Check if we have the kernel headers (tools/perf/../../include), else
76# we're probably on a detached tarball, so no point in trying to check 80# we're probably on a detached tarball, so no point in trying to check
@@ -83,7 +87,7 @@ for i in $HEADERS; do
83done 87done
84 88
85# diff with extra ignore lines 89# diff with extra ignore lines
86check arch/x86/lib/memcpy_64.S -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" 90check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
87check arch/x86/lib/memset_64.S -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" 91check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
88check include/uapi/asm-generic/mman.h -I "^#include <\(uapi/\)*asm-generic/mman-common.h>" 92check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common.h>"'
89check include/uapi/linux/mman.h -I "^#include <\(uapi/\)*asm/mman.h>" 93check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c
new file mode 100644
index 000000000000..b9c203219691
--- /dev/null
+++ b/tools/perf/examples/bpf/5sec.c
@@ -0,0 +1,49 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 Description:
4
5 . Disable strace like syscall tracing (--no-syscalls), or try tracing
6 just some (-e *sleep).
7
8 . Attach a filter function to a kernel function, returning when it should
9 be considered, i.e. appear on the output.
10
11 . Run it system wide, so that any sleep of >= 5 seconds and < than 6
12 seconds gets caught.
13
14 . Ask for callgraphs using DWARF info, so that userspace can be unwound
15
16 . While this is running, run something like "sleep 5s".
17
18 . If we decide to add tv_nsec as well, then it becomes:
19
20 int probe(hrtimer_nanosleep, rqtp->tv_sec rqtp->tv_nsec)(void *ctx, int err, long sec, long nsec)
21
22 I.e. add where it comes from (rqtp->tv_nsec) and where it will be
23 accessible in the function body (nsec)
24
25 # perf trace --no-syscalls -e tools/perf/examples/bpf/5sec.c/call-graph=dwarf/
26 0.000 perf_bpf_probe:func:(ffffffff9811b5f0) tv_sec=5
27 hrtimer_nanosleep ([kernel.kallsyms])
28 __x64_sys_nanosleep ([kernel.kallsyms])
29 do_syscall_64 ([kernel.kallsyms])
30 entry_SYSCALL_64 ([kernel.kallsyms])
31 __GI___nanosleep (/usr/lib64/libc-2.26.so)
32 rpl_nanosleep (/usr/bin/sleep)
33 xnanosleep (/usr/bin/sleep)
34 main (/usr/bin/sleep)
35 __libc_start_main (/usr/lib64/libc-2.26.so)
36 _start (/usr/bin/sleep)
37 ^C#
38
39 Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
40*/
41
42#include <bpf.h>
43
44int probe(hrtimer_nanosleep, rqtp->tv_sec)(void *ctx, int err, long sec)
45{
46 return sec == 5;
47}
48
49license(GPL);
diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c
new file mode 100644
index 000000000000..3776d26db9e7
--- /dev/null
+++ b/tools/perf/examples/bpf/empty.c
@@ -0,0 +1,3 @@
1#include <bpf.h>
2
3license(GPL);
diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h
new file mode 100644
index 000000000000..dd764ad5efdf
--- /dev/null
+++ b/tools/perf/include/bpf/bpf.h
@@ -0,0 +1,13 @@
1// SPDX-License-Identifier: GPL-2.0
2#ifndef _PERF_BPF_H
3#define _PERF_BPF_H
4#define SEC(NAME) __attribute__((section(NAME), used))
5
6#define probe(function, vars) \
7 SEC(#function "=" #function " " #vars) function
8
9#define license(name) \
10char _license[] SEC("license") = #name; \
11int _version SEC("version") = LINUX_VERSION_CODE;
12
13#endif /* _PERF_BPF_H */
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 20a08cb32332..a11cb006f968 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -12,7 +12,6 @@
12#include "util/env.h" 12#include "util/env.h"
13#include <subcmd/exec-cmd.h> 13#include <subcmd/exec-cmd.h>
14#include "util/config.h" 14#include "util/config.h"
15#include "util/quote.h"
16#include <subcmd/run-command.h> 15#include <subcmd/run-command.h>
17#include "util/parse-events.h" 16#include "util/parse-events.h"
18#include <subcmd/parse-options.h> 17#include <subcmd/parse-options.h>
@@ -238,7 +237,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
238 (*argc)--; 237 (*argc)--;
239 } else if (strstarts(cmd, CMD_DEBUGFS_DIR)) { 238 } else if (strstarts(cmd, CMD_DEBUGFS_DIR)) {
240 tracing_path_set(cmd + strlen(CMD_DEBUGFS_DIR)); 239 tracing_path_set(cmd + strlen(CMD_DEBUGFS_DIR));
241 fprintf(stderr, "dir: %s\n", tracing_path); 240 fprintf(stderr, "dir: %s\n", tracing_path_mount());
242 if (envchanged) 241 if (envchanged)
243 *envchanged = 1; 242 *envchanged = 1;
244 } else if (!strcmp(cmd, "--list-cmds")) { 243 } else if (!strcmp(cmd, "--list-cmds")) {
@@ -421,22 +420,11 @@ void pthread__unblock_sigwinch(void)
421 pthread_sigmask(SIG_UNBLOCK, &set, NULL); 420 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
422} 421}
423 422
424#ifdef _SC_LEVEL1_DCACHE_LINESIZE
425#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE)
426#else
427static void cache_line_size(int *cacheline_sizep)
428{
429 if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
430 pr_debug("cannot determine cache line size");
431}
432#endif
433
434int main(int argc, const char **argv) 423int main(int argc, const char **argv)
435{ 424{
436 int err; 425 int err;
437 const char *cmd; 426 const char *cmd;
438 char sbuf[STRERR_BUFSIZE]; 427 char sbuf[STRERR_BUFSIZE];
439 int value;
440 428
441 /* libsubcmd init */ 429 /* libsubcmd init */
442 exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT); 430 exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
@@ -444,13 +432,6 @@ int main(int argc, const char **argv)
444 432
445 /* The page_size is placed in util object. */ 433 /* The page_size is placed in util object. */
446 page_size = sysconf(_SC_PAGE_SIZE); 434 page_size = sysconf(_SC_PAGE_SIZE);
447 cache_line_size(&cacheline_size);
448
449 if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
450 sysctl_perf_event_max_stack = value;
451
452 if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0)
453 sysctl_perf_event_max_contexts_per_stack = value;
454 435
455 cmd = extract_argv0_path(argv[0]); 436 cmd = extract_argv0_path(argv[0]);
456 if (!cmd) 437 if (!cmd)
@@ -458,15 +439,11 @@ int main(int argc, const char **argv)
458 439
459 srandom(time(NULL)); 440 srandom(time(NULL));
460 441
461 perf_config__init();
462 err = perf_config(perf_default_config, NULL); 442 err = perf_config(perf_default_config, NULL);
463 if (err) 443 if (err)
464 return err; 444 return err;
465 set_buildid_dir(NULL); 445 set_buildid_dir(NULL);
466 446
467 /* get debugfs/tracefs mount point from /proc/mounts */
468 tracing_path_mount();
469
470 /* 447 /*
471 * "perf-xxxx" is the same as "perf xxxx", but we obviously: 448 * "perf-xxxx" is the same as "perf xxxx", but we obviously:
472 * 449 *
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 93656f2fd53a..7e3cce3bcf3b 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -29,7 +29,6 @@ GenuineIntel-6-4D,v13,silvermont,core
29GenuineIntel-6-4C,v13,silvermont,core 29GenuineIntel-6-4C,v13,silvermont,core
30GenuineIntel-6-2A,v15,sandybridge,core 30GenuineIntel-6-2A,v15,sandybridge,core
31GenuineIntel-6-2C,v2,westmereep-dp,core 31GenuineIntel-6-2C,v2,westmereep-dp,core
32GenuineIntel-6-2C,v2,westmereep-dp,core
33GenuineIntel-6-25,v2,westmereep-sp,core 32GenuineIntel-6-25,v2,westmereep-sp,core
34GenuineIntel-6-2F,v2,westmereex,core 33GenuineIntel-6-2F,v2,westmereex,core
35GenuineIntel-6-55,v1,skylakex,core 34GenuineIntel-6-55,v1,skylakex,core
diff --git a/tools/perf/scripts/python/bin/powerpc-hcalls-record b/tools/perf/scripts/python/bin/powerpc-hcalls-record
new file mode 100644
index 000000000000..b7402aa9147d
--- /dev/null
+++ b/tools/perf/scripts/python/bin/powerpc-hcalls-record
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf record -e "{powerpc:hcall_entry,powerpc:hcall_exit}" $@
diff --git a/tools/perf/scripts/python/bin/powerpc-hcalls-report b/tools/perf/scripts/python/bin/powerpc-hcalls-report
new file mode 100644
index 000000000000..dd32ad7465f6
--- /dev/null
+++ b/tools/perf/scripts/python/bin/powerpc-hcalls-report
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/powerpc-hcalls.py
diff --git a/tools/perf/scripts/python/powerpc-hcalls.py b/tools/perf/scripts/python/powerpc-hcalls.py
new file mode 100644
index 000000000000..00e0e7476e55
--- /dev/null
+++ b/tools/perf/scripts/python/powerpc-hcalls.py
@@ -0,0 +1,200 @@
1# SPDX-License-Identifier: GPL-2.0+
2#
3# Copyright (C) 2018 Ravi Bangoria, IBM Corporation
4#
5# Hypervisor call statisics
6
7import os
8import sys
9
10sys.path.append(os.environ['PERF_EXEC_PATH'] + \
11 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
12
13from perf_trace_context import *
14from Core import *
15from Util import *
16
17# output: {
18# opcode: {
19# 'min': minimum time nsec
20# 'max': maximum time nsec
21# 'time': average time nsec
22# 'cnt': counter
23# } ...
24# }
25output = {}
26
27# d_enter: {
28# cpu: {
29# opcode: nsec
30# } ...
31# }
32d_enter = {}
33
34hcall_table = {
35 4: 'H_REMOVE',
36 8: 'H_ENTER',
37 12: 'H_READ',
38 16: 'H_CLEAR_MOD',
39 20: 'H_CLEAR_REF',
40 24: 'H_PROTECT',
41 28: 'H_GET_TCE',
42 32: 'H_PUT_TCE',
43 36: 'H_SET_SPRG0',
44 40: 'H_SET_DABR',
45 44: 'H_PAGE_INIT',
46 48: 'H_SET_ASR',
47 52: 'H_ASR_ON',
48 56: 'H_ASR_OFF',
49 60: 'H_LOGICAL_CI_LOAD',
50 64: 'H_LOGICAL_CI_STORE',
51 68: 'H_LOGICAL_CACHE_LOAD',
52 72: 'H_LOGICAL_CACHE_STORE',
53 76: 'H_LOGICAL_ICBI',
54 80: 'H_LOGICAL_DCBF',
55 84: 'H_GET_TERM_CHAR',
56 88: 'H_PUT_TERM_CHAR',
57 92: 'H_REAL_TO_LOGICAL',
58 96: 'H_HYPERVISOR_DATA',
59 100: 'H_EOI',
60 104: 'H_CPPR',
61 108: 'H_IPI',
62 112: 'H_IPOLL',
63 116: 'H_XIRR',
64 120: 'H_MIGRATE_DMA',
65 124: 'H_PERFMON',
66 220: 'H_REGISTER_VPA',
67 224: 'H_CEDE',
68 228: 'H_CONFER',
69 232: 'H_PROD',
70 236: 'H_GET_PPP',
71 240: 'H_SET_PPP',
72 244: 'H_PURR',
73 248: 'H_PIC',
74 252: 'H_REG_CRQ',
75 256: 'H_FREE_CRQ',
76 260: 'H_VIO_SIGNAL',
77 264: 'H_SEND_CRQ',
78 272: 'H_COPY_RDMA',
79 276: 'H_REGISTER_LOGICAL_LAN',
80 280: 'H_FREE_LOGICAL_LAN',
81 284: 'H_ADD_LOGICAL_LAN_BUFFER',
82 288: 'H_SEND_LOGICAL_LAN',
83 292: 'H_BULK_REMOVE',
84 304: 'H_MULTICAST_CTRL',
85 308: 'H_SET_XDABR',
86 312: 'H_STUFF_TCE',
87 316: 'H_PUT_TCE_INDIRECT',
88 332: 'H_CHANGE_LOGICAL_LAN_MAC',
89 336: 'H_VTERM_PARTNER_INFO',
90 340: 'H_REGISTER_VTERM',
91 344: 'H_FREE_VTERM',
92 348: 'H_RESET_EVENTS',
93 352: 'H_ALLOC_RESOURCE',
94 356: 'H_FREE_RESOURCE',
95 360: 'H_MODIFY_QP',
96 364: 'H_QUERY_QP',
97 368: 'H_REREGISTER_PMR',
98 372: 'H_REGISTER_SMR',
99 376: 'H_QUERY_MR',
100 380: 'H_QUERY_MW',
101 384: 'H_QUERY_HCA',
102 388: 'H_QUERY_PORT',
103 392: 'H_MODIFY_PORT',
104 396: 'H_DEFINE_AQP1',
105 400: 'H_GET_TRACE_BUFFER',
106 404: 'H_DEFINE_AQP0',
107 408: 'H_RESIZE_MR',
108 412: 'H_ATTACH_MCQP',
109 416: 'H_DETACH_MCQP',
110 420: 'H_CREATE_RPT',
111 424: 'H_REMOVE_RPT',
112 428: 'H_REGISTER_RPAGES',
113 432: 'H_DISABLE_AND_GETC',
114 436: 'H_ERROR_DATA',
115 440: 'H_GET_HCA_INFO',
116 444: 'H_GET_PERF_COUNT',
117 448: 'H_MANAGE_TRACE',
118 468: 'H_FREE_LOGICAL_LAN_BUFFER',
119 472: 'H_POLL_PENDING',
120 484: 'H_QUERY_INT_STATE',
121 580: 'H_ILLAN_ATTRIBUTES',
122 592: 'H_MODIFY_HEA_QP',
123 596: 'H_QUERY_HEA_QP',
124 600: 'H_QUERY_HEA',
125 604: 'H_QUERY_HEA_PORT',
126 608: 'H_MODIFY_HEA_PORT',
127 612: 'H_REG_BCMC',
128 616: 'H_DEREG_BCMC',
129 620: 'H_REGISTER_HEA_RPAGES',
130 624: 'H_DISABLE_AND_GET_HEA',
131 628: 'H_GET_HEA_INFO',
132 632: 'H_ALLOC_HEA_RESOURCE',
133 644: 'H_ADD_CONN',
134 648: 'H_DEL_CONN',
135 664: 'H_JOIN',
136 676: 'H_VASI_STATE',
137 688: 'H_ENABLE_CRQ',
138 696: 'H_GET_EM_PARMS',
139 720: 'H_SET_MPP',
140 724: 'H_GET_MPP',
141 748: 'H_HOME_NODE_ASSOCIATIVITY',
142 756: 'H_BEST_ENERGY',
143 764: 'H_XIRR_X',
144 768: 'H_RANDOM',
145 772: 'H_COP',
146 788: 'H_GET_MPP_X',
147 796: 'H_SET_MODE',
148 61440: 'H_RTAS',
149}
150
151def hcall_table_lookup(opcode):
152 if (hcall_table.has_key(opcode)):
153 return hcall_table[opcode]
154 else:
155 return opcode
156
157print_ptrn = '%-28s%10s%10s%10s%10s'
158
159def trace_end():
160 print print_ptrn % ('hcall', 'count', 'min(ns)', 'max(ns)', 'avg(ns)')
161 print '-' * 68
162 for opcode in output:
163 h_name = hcall_table_lookup(opcode)
164 time = output[opcode]['time']
165 cnt = output[opcode]['cnt']
166 min_t = output[opcode]['min']
167 max_t = output[opcode]['max']
168
169 print print_ptrn % (h_name, cnt, min_t, max_t, time/cnt)
170
171def powerpc__hcall_exit(name, context, cpu, sec, nsec, pid, comm, callchain,
172 opcode, retval):
173 if (d_enter.has_key(cpu) and d_enter[cpu].has_key(opcode)):
174 diff = nsecs(sec, nsec) - d_enter[cpu][opcode]
175
176 if (output.has_key(opcode)):
177 output[opcode]['time'] += diff
178 output[opcode]['cnt'] += 1
179 if (output[opcode]['min'] > diff):
180 output[opcode]['min'] = diff
181 if (output[opcode]['max'] < diff):
182 output[opcode]['max'] = diff
183 else:
184 output[opcode] = {
185 'time': diff,
186 'cnt': 1,
187 'min': diff,
188 'max': diff,
189 }
190
191 del d_enter[cpu][opcode]
192# else:
193# print "Can't find matching hcall_enter event. Ignoring sample"
194
195def powerpc__hcall_entry(event_name, context, cpu, sec, nsec, pid, comm,
196 callchain, opcode):
197 if (d_enter.has_key(cpu)):
198 d_enter[cpu][opcode] = nsecs(sec, nsec)
199 else:
200 d_enter[cpu] = {opcode: nsecs(sec, nsec)}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index cac8f8889bc3..2bde505e2e7e 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -654,6 +654,15 @@ static int perf_test__list(int argc, const char **argv)
654 continue; 654 continue;
655 655
656 pr_info("%2d: %s\n", i, t->desc); 656 pr_info("%2d: %s\n", i, t->desc);
657
658 if (t->subtest.get_nr) {
659 int subn = t->subtest.get_nr();
660 int subi;
661
662 for (subi = 0; subi < subn; subi++)
663 pr_info("%2d:%1d: %s\n", i, subi + 1,
664 t->subtest.get_desc(subi));
665 }
657 } 666 }
658 667
659 perf_test__list_shell(argc, argv, i); 668 perf_test__list_shell(argc, argv, i);
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 99936352df4f..4892bd2dc33e 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -236,14 +236,13 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
236 236
237 pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr); 237 pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
238 238
239 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); 239 if (!thread__find_map(thread, cpumode, addr, &al) || !al.map->dso) {
240 if (!al.map || !al.map->dso) {
241 if (cpumode == PERF_RECORD_MISC_HYPERVISOR) { 240 if (cpumode == PERF_RECORD_MISC_HYPERVISOR) {
242 pr_debug("Hypervisor address can not be resolved - skipping\n"); 241 pr_debug("Hypervisor address can not be resolved - skipping\n");
243 return 0; 242 return 0;
244 } 243 }
245 244
246 pr_debug("thread__find_addr_map failed\n"); 245 pr_debug("thread__find_map failed\n");
247 return -1; 246 return -1;
248 } 247 }
249 248
@@ -561,6 +560,7 @@ static int do_test_code_reading(bool try_kcore)
561 pid = getpid(); 560 pid = getpid();
562 561
563 machine = machine__new_host(); 562 machine = machine__new_host();
563 machine->env = &perf_env;
564 564
565 ret = machine__create_kernel_maps(machine); 565 ret = machine__create_kernel_maps(machine);
566 if (ret < 0) { 566 if (ret < 0) {
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index f7c5b613d667..b889a28fd80b 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -131,20 +131,20 @@ struct machine *setup_fake_machine(struct machines *machines)
131 goto out; 131 goto out;
132 132
133 /* emulate dso__load() */ 133 /* emulate dso__load() */
134 dso__set_loaded(dso, MAP__FUNCTION); 134 dso__set_loaded(dso);
135 135
136 for (k = 0; k < fake_symbols[i].nr_syms; k++) { 136 for (k = 0; k < fake_symbols[i].nr_syms; k++) {
137 struct symbol *sym; 137 struct symbol *sym;
138 struct fake_sym *fsym = &fake_symbols[i].syms[k]; 138 struct fake_sym *fsym = &fake_symbols[i].syms[k];
139 139
140 sym = symbol__new(fsym->start, fsym->length, 140 sym = symbol__new(fsym->start, fsym->length,
141 STB_GLOBAL, fsym->name); 141 STB_GLOBAL, STT_FUNC, fsym->name);
142 if (sym == NULL) { 142 if (sym == NULL) {
143 dso__put(dso); 143 dso__put(dso);
144 goto out; 144 goto out;
145 } 145 }
146 146
147 symbols__insert(&dso->symbols[MAP__FUNCTION], sym); 147 symbols__insert(&dso->symbols, sym);
148 } 148 }
149 149
150 dso__put(dso); 150 dso__put(dso);
diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
index 8e57d46109de..148dd31cc201 100644
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -127,6 +127,22 @@ int test__kmod_path__parse(struct test *t __maybe_unused, int subtest __maybe_un
127 M("[vdso]", PERF_RECORD_MISC_KERNEL, false); 127 M("[vdso]", PERF_RECORD_MISC_KERNEL, false);
128 M("[vdso]", PERF_RECORD_MISC_USER, false); 128 M("[vdso]", PERF_RECORD_MISC_USER, false);
129 129
130 T("[vdso32]", true , true , false, false, "[vdso32]", NULL);
131 T("[vdso32]", false , true , false, false, NULL , NULL);
132 T("[vdso32]", true , false , false, false, "[vdso32]", NULL);
133 T("[vdso32]", false , false , false, false, NULL , NULL);
134 M("[vdso32]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
135 M("[vdso32]", PERF_RECORD_MISC_KERNEL, false);
136 M("[vdso32]", PERF_RECORD_MISC_USER, false);
137
138 T("[vdsox32]", true , true , false, false, "[vdsox32]", NULL);
139 T("[vdsox32]", false , true , false, false, NULL , NULL);
140 T("[vdsox32]", true , false , false, false, "[vdsox32]", NULL);
141 T("[vdsox32]", false , false , false, false, NULL , NULL);
142 M("[vdsox32]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
143 M("[vdsox32]", PERF_RECORD_MISC_KERNEL, false);
144 M("[vdsox32]", PERF_RECORD_MISC_USER, false);
145
130 /* path alloc_name alloc_ext kmod comp name ext */ 146 /* path alloc_name alloc_ext kmod comp name ext */
131 T("[vsyscall]", true , true , false, false, "[vsyscall]", NULL); 147 T("[vsyscall]", true , true , false, false, "[vsyscall]", NULL);
132 T("[vsyscall]", false , true , false, false, NULL , NULL); 148 T("[vsyscall]", false , true , false, false, NULL , NULL);
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 868d82b501f4..b1af2499a3c9 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -188,9 +188,8 @@ static int mmap_events(synth_cb synth)
188 188
189 pr_debug("looking for map %p\n", td->map); 189 pr_debug("looking for map %p\n", td->map);
190 190
191 thread__find_addr_map(thread, 191 thread__find_map(thread, PERF_RECORD_MISC_USER,
192 PERF_RECORD_MISC_USER, MAP__FUNCTION, 192 (unsigned long) (td->map + 1), &al);
193 (unsigned long) (td->map + 1), &al);
194 193
195 thread__put(thread); 194 thread__put(thread);
196 195
@@ -218,7 +217,7 @@ static int mmap_events(synth_cb synth)
218 * perf_event__synthesize_threads (global) 217 * perf_event__synthesize_threads (global)
219 * 218 *
220 * We test we can find all memory maps via: 219 * We test we can find all memory maps via:
221 * thread__find_addr_map 220 * thread__find_map
222 * 221 *
223 * by using all thread objects. 222 * by using all thread objects.
224 */ 223 */
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 18b06444f230..7d4077068454 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -499,7 +499,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlis
499 * while this test executes only parse events method. 499 * while this test executes only parse events method.
500 */ 500 */
501 TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); 501 TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
502 TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type)); 502 TEST_ASSERT_VAL("wrong callgraph", !evsel__has_callchain(evsel));
503 TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type)); 503 TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
504 504
505 /* cpu/config=2,call-graph=no,time=0,period=2000/ */ 505 /* cpu/config=2,call-graph=no,time=0,period=2000/ */
@@ -512,7 +512,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlis
512 * while this test executes only parse events method. 512 * while this test executes only parse events method.
513 */ 513 */
514 TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); 514 TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
515 TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type)); 515 TEST_ASSERT_VAL("wrong callgraph", !evsel__has_callchain(evsel));
516 TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type)); 516 TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
517 517
518 return 0; 518 return 0;
@@ -1309,18 +1309,26 @@ static int test__checkevent_config_cache(struct perf_evlist *evlist)
1309 return 0; 1309 return 0;
1310} 1310}
1311 1311
1312static int test__intel_pt(struct perf_evlist *evlist)
1313{
1314 struct perf_evsel *evsel = perf_evlist__first(evlist);
1315
1316 TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "intel_pt//u") == 0);
1317 return 0;
1318}
1319
1312static int count_tracepoints(void) 1320static int count_tracepoints(void)
1313{ 1321{
1314 struct dirent *events_ent; 1322 struct dirent *events_ent;
1315 DIR *events_dir; 1323 DIR *events_dir;
1316 int cnt = 0; 1324 int cnt = 0;
1317 1325
1318 events_dir = opendir(tracing_events_path); 1326 events_dir = tracing_events__opendir();
1319 1327
1320 TEST_ASSERT_VAL("Can't open events dir", events_dir); 1328 TEST_ASSERT_VAL("Can't open events dir", events_dir);
1321 1329
1322 while ((events_ent = readdir(events_dir))) { 1330 while ((events_ent = readdir(events_dir))) {
1323 char sys_path[PATH_MAX]; 1331 char *sys_path;
1324 struct dirent *sys_ent; 1332 struct dirent *sys_ent;
1325 DIR *sys_dir; 1333 DIR *sys_dir;
1326 1334
@@ -1331,8 +1339,8 @@ static int count_tracepoints(void)
1331 || !strcmp(events_ent->d_name, "header_page")) 1339 || !strcmp(events_ent->d_name, "header_page"))
1332 continue; 1340 continue;
1333 1341
1334 scnprintf(sys_path, PATH_MAX, "%s/%s", 1342 sys_path = get_events_file(events_ent->d_name);
1335 tracing_events_path, events_ent->d_name); 1343 TEST_ASSERT_VAL("Can't get sys path", sys_path);
1336 1344
1337 sys_dir = opendir(sys_path); 1345 sys_dir = opendir(sys_path);
1338 TEST_ASSERT_VAL("Can't open sys dir", sys_dir); 1346 TEST_ASSERT_VAL("Can't open sys dir", sys_dir);
@@ -1348,6 +1356,7 @@ static int count_tracepoints(void)
1348 } 1356 }
1349 1357
1350 closedir(sys_dir); 1358 closedir(sys_dir);
1359 put_events_file(sys_path);
1351 } 1360 }
1352 1361
1353 closedir(events_dir); 1362 closedir(events_dir);
@@ -1637,6 +1646,11 @@ static struct evlist_test test__events[] = {
1637 .check = test__checkevent_config_cache, 1646 .check = test__checkevent_config_cache,
1638 .id = 51, 1647 .id = 51,
1639 }, 1648 },
1649 {
1650 .name = "intel_pt//u",
1651 .check = test__intel_pt,
1652 .id = 52,
1653 },
1640}; 1654};
1641 1655
1642static struct evlist_test test__events_pmu[] = { 1656static struct evlist_test test__events_pmu[] = {
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
index 5d2df65ada6a..40ab72149ce1 100644
--- a/tools/perf/tests/python-use.c
+++ b/tools/perf/tests/python-use.c
@@ -7,8 +7,7 @@
7#include <stdlib.h> 7#include <stdlib.h>
8#include <linux/compiler.h> 8#include <linux/compiler.h>
9#include "tests.h" 9#include "tests.h"
10 10#include "util/debug.h"
11extern int verbose;
12 11
13int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unused) 12int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unused)
14{ 13{
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index 016882dbbc16..263057039693 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -11,23 +11,23 @@
11. $(dirname $0)/lib/probe.sh 11. $(dirname $0)/lib/probe.sh
12 12
13libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g') 13libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g')
14nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254 14nm -Dg $libc 2>/dev/null | fgrep -q inet_pton || exit 254
15 15
16trace_libc_inet_pton_backtrace() { 16trace_libc_inet_pton_backtrace() {
17 idx=0 17 idx=0
18 expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)" 18 expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)"
19 expected[1]=".*inet_pton[[:space:]]\($libc\)$" 19 expected[1]=".*inet_pton\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
20 case "$(uname -m)" in 20 case "$(uname -m)" in
21 s390x) 21 s390x)
22 eventattr='call-graph=dwarf,max-stack=4' 22 eventattr='call-graph=dwarf,max-stack=4'
23 expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$" 23 expected[2]="gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
24 expected[3]="(__GI_)?getaddrinfo[[:space:]]\($libc|inlined\)$" 24 expected[3]="(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
25 expected[4]="main[[:space:]]\(.*/bin/ping.*\)$" 25 expected[4]="main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$"
26 ;; 26 ;;
27 *) 27 *)
28 eventattr='max-stack=3' 28 eventattr='max-stack=3'
29 expected[2]="getaddrinfo[[:space:]]\($libc\)$" 29 expected[2]="getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$"
30 expected[3]=".*\(.*/bin/ping.*\)$" 30 expected[3]=".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$"
31 ;; 31 ;;
32 esac 32 esac
33 33
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 17cb1bb3448c..40e30a26b23c 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -70,6 +70,27 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
70 session = perf_session__new(&data, false, NULL); 70 session = perf_session__new(&data, false, NULL);
71 TEST_ASSERT_VAL("can't get session", session); 71 TEST_ASSERT_VAL("can't get session", session);
72 72
73 /* On platforms with large numbers of CPUs process_cpu_topology()
74 * might issue an error while reading the perf.data file section
75 * HEADER_CPU_TOPOLOGY and the cpu_topology_map pointed to by member
76 * cpu is a NULL pointer.
77 * Example: On s390
78 * CPU 0 is on core_id 0 and physical_package_id 6
79 * CPU 1 is on core_id 1 and physical_package_id 3
80 *
81 * Core_id and physical_package_id are platform and architecture
82 * dependend and might have higher numbers than the CPU id.
83 * This actually depends on the configuration.
84 *
85 * In this case process_cpu_topology() prints error message:
86 * "socket_id number is too big. You may need to upgrade the
87 * perf tool."
88 *
89 * This is the reason why this test might be skipped.
90 */
91 if (!session->header.env.cpu)
92 return TEST_SKIP;
93
73 for (i = 0; i < session->header.env.nr_cpus_avail; i++) { 94 for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
74 if (!cpu_map__has(map, i)) 95 if (!cpu_map__has(map, i))
75 continue; 96 continue;
@@ -95,7 +116,7 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe
95{ 116{
96 char path[PATH_MAX]; 117 char path[PATH_MAX];
97 struct cpu_map *map; 118 struct cpu_map *map;
98 int ret = -1; 119 int ret = TEST_FAIL;
99 120
100 TEST_ASSERT_VAL("can't get templ file", !get_temp(path)); 121 TEST_ASSERT_VAL("can't get templ file", !get_temp(path));
101 122
@@ -110,12 +131,9 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe
110 goto free_path; 131 goto free_path;
111 } 132 }
112 133
113 if (check_cpu_topology(path, map)) 134 ret = check_cpu_topology(path, map);
114 goto free_map;
115 ret = 0;
116
117free_map:
118 cpu_map__put(map); 135 cpu_map__put(map);
136
119free_path: 137free_path:
120 unlink(path); 138 unlink(path);
121 return ret; 139 return ret;
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 1e5adb65632a..7691980b7df1 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -19,8 +19,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
19 struct symbol *sym; 19 struct symbol *sym;
20 struct map *kallsyms_map, *vmlinux_map, *map; 20 struct map *kallsyms_map, *vmlinux_map, *map;
21 struct machine kallsyms, vmlinux; 21 struct machine kallsyms, vmlinux;
22 enum map_type type = MAP__FUNCTION; 22 struct maps *maps = machine__kernel_maps(&vmlinux);
23 struct maps *maps = &vmlinux.kmaps.maps[type];
24 u64 mem_start, mem_end; 23 u64 mem_start, mem_end;
25 bool header_printed; 24 bool header_printed;
26 25
@@ -56,7 +55,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
56 * be compacted against the list of modules found in the "vmlinux" 55 * be compacted against the list of modules found in the "vmlinux"
57 * code and with the one got from /proc/modules from the "kallsyms" code. 56 * code and with the one got from /proc/modules from the "kallsyms" code.
58 */ 57 */
59 if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type) <= 0) { 58 if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms") <= 0) {
60 pr_debug("dso__load_kallsyms "); 59 pr_debug("dso__load_kallsyms ");
61 goto out; 60 goto out;
62 } 61 }
@@ -94,7 +93,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
94 * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines 93 * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
95 * to fixup the symbols. 94 * to fixup the symbols.
96 */ 95 */
97 if (machine__load_vmlinux_path(&vmlinux, type) <= 0) { 96 if (machine__load_vmlinux_path(&vmlinux) <= 0) {
98 pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n"); 97 pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
99 err = TEST_SKIP; 98 err = TEST_SKIP;
100 goto out; 99 goto out;
@@ -108,7 +107,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
108 * in the kallsyms dso. For the ones that are in both, check its names and 107 * in the kallsyms dso. For the ones that are in both, check its names and
109 * end addresses too. 108 * end addresses too.
110 */ 109 */
111 for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) { 110 map__for_each_symbol(vmlinux_map, sym, nd) {
112 struct symbol *pair, *first_pair; 111 struct symbol *pair, *first_pair;
113 112
114 sym = rb_entry(nd, struct symbol, rb_node); 113 sym = rb_entry(nd, struct symbol, rb_node);
@@ -119,8 +118,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
119 mem_start = vmlinux_map->unmap_ip(vmlinux_map, sym->start); 118 mem_start = vmlinux_map->unmap_ip(vmlinux_map, sym->start);
120 mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end); 119 mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
121 120
122 first_pair = machine__find_kernel_symbol(&kallsyms, type, 121 first_pair = machine__find_kernel_symbol(&kallsyms, mem_start, NULL);
123 mem_start, NULL);
124 pair = first_pair; 122 pair = first_pair;
125 123
126 if (pair && UM(pair->start) == mem_start) { 124 if (pair && UM(pair->start) == mem_start) {
@@ -149,7 +147,7 @@ next_pair:
149 */ 147 */
150 continue; 148 continue;
151 } else { 149 } else {
152 pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL); 150 pair = machine__find_kernel_symbol_by_name(&kallsyms, sym->name, NULL);
153 if (pair) { 151 if (pair) {
154 if (UM(pair->start) == mem_start) 152 if (UM(pair->start) == mem_start)
155 goto next_pair; 153 goto next_pair;
@@ -183,7 +181,7 @@ next_pair:
183 * so use the short name, less descriptive but the same ("[kernel]" in 181 * so use the short name, less descriptive but the same ("[kernel]" in
184 * both cases. 182 * both cases.
185 */ 183 */
186 pair = map_groups__find_by_name(&kallsyms.kmaps, type, 184 pair = map_groups__find_by_name(&kallsyms.kmaps,
187 (map->dso->kernel ? 185 (map->dso->kernel ?
188 map->dso->short_name : 186 map->dso->short_name :
189 map->dso->name)); 187 map->dso->name));
@@ -206,7 +204,7 @@ next_pair:
206 mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start); 204 mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start);
207 mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end); 205 mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end);
208 206
209 pair = map_groups__find(&kallsyms.kmaps, type, mem_start); 207 pair = map_groups__find(&kallsyms.kmaps, mem_start);
210 if (pair == NULL || pair->priv) 208 if (pair == NULL || pair->priv)
211 continue; 209 continue;
212 210
@@ -228,7 +226,7 @@ next_pair:
228 226
229 header_printed = false; 227 header_printed = false;
230 228
231 maps = &kallsyms.kmaps.maps[type]; 229 maps = machine__kernel_maps(&kallsyms);
232 230
233 for (map = maps__first(maps); map; map = map__next(map)) { 231 for (map = maps__first(maps); map; map = map__next(map)) {
234 if (!map->priv) { 232 if (!map->priv) {
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
index 0be4138fbe71..f24722146ebe 100755
--- a/tools/perf/trace/beauty/prctl_option.sh
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -1,6 +1,6 @@
1#!/bin/sh 1#!/bin/sh
2 2
3header_dir=$1 3[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
4 4
5printf "static const char *prctl_options[] = {\n" 5printf "static const char *prctl_options[] = {\n"
6regex='^#define[[:space:]]+PR_([GS]ET\w+)[[:space:]]*([[:xdigit:]]+).*' 6regex='^#define[[:space:]]+PR_([GS]ET\w+)[[:space:]]*([[:xdigit:]]+).*'
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 3781d74088a7..3b4f1c10ff57 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -29,6 +29,7 @@ struct annotate_browser {
29 struct rb_node *curr_hot; 29 struct rb_node *curr_hot;
30 struct annotation_line *selection; 30 struct annotation_line *selection;
31 struct arch *arch; 31 struct arch *arch;
32 struct annotation_options *opts;
32 bool searching_backwards; 33 bool searching_backwards;
33 char search_bf[128]; 34 char search_bf[128];
34}; 35};
@@ -410,7 +411,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
410 notes = symbol__annotation(dl->ops.target.sym); 411 notes = symbol__annotation(dl->ops.target.sym);
411 pthread_mutex_lock(&notes->lock); 412 pthread_mutex_lock(&notes->lock);
412 413
413 if (notes->src == NULL && symbol__alloc_hist(dl->ops.target.sym) < 0) { 414 if (!symbol__hists(dl->ops.target.sym, evsel->evlist->nr_entries)) {
414 pthread_mutex_unlock(&notes->lock); 415 pthread_mutex_unlock(&notes->lock);
415 ui__warning("Not enough memory for annotating '%s' symbol!\n", 416 ui__warning("Not enough memory for annotating '%s' symbol!\n",
416 dl->ops.target.sym->name); 417 dl->ops.target.sym->name);
@@ -418,7 +419,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
418 } 419 }
419 420
420 pthread_mutex_unlock(&notes->lock); 421 pthread_mutex_unlock(&notes->lock);
421 symbol__tui_annotate(dl->ops.target.sym, ms->map, evsel, hbt); 422 symbol__tui_annotate(dl->ops.target.sym, ms->map, evsel, hbt, browser->opts);
422 sym_title(ms->sym, ms->map, title, sizeof(title)); 423 sym_title(ms->sym, ms->map, title, sizeof(title));
423 ui_browser__show_title(&browser->b, title); 424 ui_browser__show_title(&browser->b, title);
424 return true; 425 return true;
@@ -695,6 +696,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
695 "O Bump offset level (jump targets -> +call -> all -> cycle thru)\n" 696 "O Bump offset level (jump targets -> +call -> all -> cycle thru)\n"
696 "s Toggle source code view\n" 697 "s Toggle source code view\n"
697 "t Circulate percent, total period, samples view\n" 698 "t Circulate percent, total period, samples view\n"
699 "c Show min/max cycle\n"
698 "/ Search string\n" 700 "/ Search string\n"
699 "k Toggle line numbers\n" 701 "k Toggle line numbers\n"
700 "P Print to [symbol_name].annotation file.\n" 702 "P Print to [symbol_name].annotation file.\n"
@@ -791,6 +793,13 @@ show_sup_ins:
791 notes->options->show_total_period = true; 793 notes->options->show_total_period = true;
792 annotation__update_column_widths(notes); 794 annotation__update_column_widths(notes);
793 continue; 795 continue;
796 case 'c':
797 if (notes->options->show_minmax_cycle)
798 notes->options->show_minmax_cycle = false;
799 else
800 notes->options->show_minmax_cycle = true;
801 annotation__update_column_widths(notes);
802 continue;
794 case K_LEFT: 803 case K_LEFT:
795 case K_ESC: 804 case K_ESC:
796 case 'q': 805 case 'q':
@@ -809,24 +818,27 @@ out:
809} 818}
810 819
811int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, 820int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
812 struct hist_browser_timer *hbt) 821 struct hist_browser_timer *hbt,
822 struct annotation_options *opts)
813{ 823{
814 return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt); 824 return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt, opts);
815} 825}
816 826
817int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, 827int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
818 struct hist_browser_timer *hbt) 828 struct hist_browser_timer *hbt,
829 struct annotation_options *opts)
819{ 830{
820 /* reset abort key so that it can get Ctrl-C as a key */ 831 /* reset abort key so that it can get Ctrl-C as a key */
821 SLang_reset_tty(); 832 SLang_reset_tty();
822 SLang_init_tty(0, 0, 0); 833 SLang_init_tty(0, 0, 0);
823 834
824 return map_symbol__tui_annotate(&he->ms, evsel, hbt); 835 return map_symbol__tui_annotate(&he->ms, evsel, hbt, opts);
825} 836}
826 837
827int symbol__tui_annotate(struct symbol *sym, struct map *map, 838int symbol__tui_annotate(struct symbol *sym, struct map *map,
828 struct perf_evsel *evsel, 839 struct perf_evsel *evsel,
829 struct hist_browser_timer *hbt) 840 struct hist_browser_timer *hbt,
841 struct annotation_options *opts)
830{ 842{
831 struct annotation *notes = symbol__annotation(sym); 843 struct annotation *notes = symbol__annotation(sym);
832 struct map_symbol ms = { 844 struct map_symbol ms = {
@@ -843,6 +855,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
843 .priv = &ms, 855 .priv = &ms,
844 .use_navkeypressed = true, 856 .use_navkeypressed = true,
845 }, 857 },
858 .opts = opts,
846 }; 859 };
847 int ret = -1, err; 860 int ret = -1, err;
848 861
@@ -852,7 +865,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
852 if (map->dso->annotate_warned) 865 if (map->dso->annotate_warned)
853 return -1; 866 return -1;
854 867
855 err = symbol__annotate2(sym, map, evsel, &annotation__default_options, &browser.arch); 868 err = symbol__annotate2(sym, map, evsel, opts, &browser.arch);
856 if (err) { 869 if (err) {
857 char msg[BUFSIZ]; 870 char msg[BUFSIZ];
858 symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); 871 symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index e5f247247daa..a96f62ca984a 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1231,6 +1231,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
1231 int width = browser->b.width; 1231 int width = browser->b.width;
1232 char folded_sign = ' '; 1232 char folded_sign = ' ';
1233 bool current_entry = ui_browser__is_current_entry(&browser->b, row); 1233 bool current_entry = ui_browser__is_current_entry(&browser->b, row);
1234 bool use_callchain = hist_entry__has_callchains(entry) && symbol_conf.use_callchain;
1234 off_t row_offset = entry->row_offset; 1235 off_t row_offset = entry->row_offset;
1235 bool first = true; 1236 bool first = true;
1236 struct perf_hpp_fmt *fmt; 1237 struct perf_hpp_fmt *fmt;
@@ -1240,7 +1241,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
1240 browser->selection = &entry->ms; 1241 browser->selection = &entry->ms;
1241 } 1242 }
1242 1243
1243 if (symbol_conf.use_callchain) { 1244 if (use_callchain) {
1244 hist_entry__init_have_children(entry); 1245 hist_entry__init_have_children(entry);
1245 folded_sign = hist_entry__folded(entry); 1246 folded_sign = hist_entry__folded(entry);
1246 } 1247 }
@@ -1276,7 +1277,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
1276 } 1277 }
1277 1278
1278 if (first) { 1279 if (first) {
1279 if (symbol_conf.use_callchain) { 1280 if (use_callchain) {
1280 ui_browser__printf(&browser->b, "%c ", folded_sign); 1281 ui_browser__printf(&browser->b, "%c ", folded_sign);
1281 width -= 2; 1282 width -= 2;
1282 } 1283 }
@@ -1583,7 +1584,7 @@ hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf,
1583 int column = 0; 1584 int column = 0;
1584 int span = 0; 1585 int span = 0;
1585 1586
1586 if (symbol_conf.use_callchain) { 1587 if (hists__has_callchains(hists) && symbol_conf.use_callchain) {
1587 ret = scnprintf(buf, size, " "); 1588 ret = scnprintf(buf, size, " ");
1588 if (advance_hpp_check(&dummy_hpp, ret)) 1589 if (advance_hpp_check(&dummy_hpp, ret))
1589 return ret; 1590 return ret;
@@ -1987,7 +1988,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
1987 bool first = true; 1988 bool first = true;
1988 int ret; 1989 int ret;
1989 1990
1990 if (symbol_conf.use_callchain) { 1991 if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
1991 folded_sign = hist_entry__folded(he); 1992 folded_sign = hist_entry__folded(he);
1992 printed += fprintf(fp, "%c ", folded_sign); 1993 printed += fprintf(fp, "%c ", folded_sign);
1993 } 1994 }
@@ -2175,7 +2176,8 @@ struct hist_browser *hist_browser__new(struct hists *hists)
2175static struct hist_browser * 2176static struct hist_browser *
2176perf_evsel_browser__new(struct perf_evsel *evsel, 2177perf_evsel_browser__new(struct perf_evsel *evsel,
2177 struct hist_browser_timer *hbt, 2178 struct hist_browser_timer *hbt,
2178 struct perf_env *env) 2179 struct perf_env *env,
2180 struct annotation_options *annotation_opts)
2179{ 2181{
2180 struct hist_browser *browser = hist_browser__new(evsel__hists(evsel)); 2182 struct hist_browser *browser = hist_browser__new(evsel__hists(evsel));
2181 2183
@@ -2183,6 +2185,7 @@ perf_evsel_browser__new(struct perf_evsel *evsel,
2183 browser->hbt = hbt; 2185 browser->hbt = hbt;
2184 browser->env = env; 2186 browser->env = env;
2185 browser->title = hists_browser__scnprintf_title; 2187 browser->title = hists_browser__scnprintf_title;
2188 browser->annotation_opts = annotation_opts;
2186 } 2189 }
2187 return browser; 2190 return browser;
2188} 2191}
@@ -2336,7 +2339,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act)
2336 struct hist_entry *he; 2339 struct hist_entry *he;
2337 int err; 2340 int err;
2338 2341
2339 if (!objdump_path && perf_env__lookup_objdump(browser->env)) 2342 if (!browser->annotation_opts->objdump_path &&
2343 perf_env__lookup_objdump(browser->env, &browser->annotation_opts->objdump_path))
2340 return 0; 2344 return 0;
2341 2345
2342 notes = symbol__annotation(act->ms.sym); 2346 notes = symbol__annotation(act->ms.sym);
@@ -2344,7 +2348,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act)
2344 return 0; 2348 return 0;
2345 2349
2346 evsel = hists_to_evsel(browser->hists); 2350 evsel = hists_to_evsel(browser->hists);
2347 err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt); 2351 err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt,
2352 browser->annotation_opts);
2348 he = hist_browser__selected_entry(browser); 2353 he = hist_browser__selected_entry(browser);
2349 /* 2354 /*
2350 * offer option to annotate the other branch source or target 2355 * offer option to annotate the other branch source or target
@@ -2667,7 +2672,7 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb,
2667 he->nr_rows = 0; 2672 he->nr_rows = 0;
2668 } 2673 }
2669 2674
2670 if (!he->leaf || !symbol_conf.use_callchain) 2675 if (!he->leaf || !hist_entry__has_callchains(he) || !symbol_conf.use_callchain)
2671 goto next; 2676 goto next;
2672 2677
2673 if (callchain_param.mode == CHAIN_GRAPH_REL) { 2678 if (callchain_param.mode == CHAIN_GRAPH_REL) {
@@ -2697,10 +2702,11 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
2697 struct hist_browser_timer *hbt, 2702 struct hist_browser_timer *hbt,
2698 float min_pcnt, 2703 float min_pcnt,
2699 struct perf_env *env, 2704 struct perf_env *env,
2700 bool warn_lost_event) 2705 bool warn_lost_event,
2706 struct annotation_options *annotation_opts)
2701{ 2707{
2702 struct hists *hists = evsel__hists(evsel); 2708 struct hists *hists = evsel__hists(evsel);
2703 struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env); 2709 struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts);
2704 struct branch_info *bi; 2710 struct branch_info *bi;
2705#define MAX_OPTIONS 16 2711#define MAX_OPTIONS 16
2706 char *options[MAX_OPTIONS]; 2712 char *options[MAX_OPTIONS];
@@ -3062,6 +3068,7 @@ out:
3062struct perf_evsel_menu { 3068struct perf_evsel_menu {
3063 struct ui_browser b; 3069 struct ui_browser b;
3064 struct perf_evsel *selection; 3070 struct perf_evsel *selection;
3071 struct annotation_options *annotation_opts;
3065 bool lost_events, lost_events_warned; 3072 bool lost_events, lost_events_warned;
3066 float min_pcnt; 3073 float min_pcnt;
3067 struct perf_env *env; 3074 struct perf_env *env;
@@ -3163,7 +3170,8 @@ browse_hists:
3163 true, hbt, 3170 true, hbt,
3164 menu->min_pcnt, 3171 menu->min_pcnt,
3165 menu->env, 3172 menu->env,
3166 warn_lost_event); 3173 warn_lost_event,
3174 menu->annotation_opts);
3167 ui_browser__show_title(&menu->b, title); 3175 ui_browser__show_title(&menu->b, title);
3168 switch (key) { 3176 switch (key) {
3169 case K_TAB: 3177 case K_TAB:
@@ -3222,7 +3230,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
3222 struct hist_browser_timer *hbt, 3230 struct hist_browser_timer *hbt,
3223 float min_pcnt, 3231 float min_pcnt,
3224 struct perf_env *env, 3232 struct perf_env *env,
3225 bool warn_lost_event) 3233 bool warn_lost_event,
3234 struct annotation_options *annotation_opts)
3226{ 3235{
3227 struct perf_evsel *pos; 3236 struct perf_evsel *pos;
3228 struct perf_evsel_menu menu = { 3237 struct perf_evsel_menu menu = {
@@ -3237,6 +3246,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
3237 }, 3246 },
3238 .min_pcnt = min_pcnt, 3247 .min_pcnt = min_pcnt,
3239 .env = env, 3248 .env = env,
3249 .annotation_opts = annotation_opts,
3240 }; 3250 };
3241 3251
3242 ui_helpline__push("Press ESC to exit"); 3252 ui_helpline__push("Press ESC to exit");
@@ -3257,7 +3267,8 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
3257 struct hist_browser_timer *hbt, 3267 struct hist_browser_timer *hbt,
3258 float min_pcnt, 3268 float min_pcnt,
3259 struct perf_env *env, 3269 struct perf_env *env,
3260 bool warn_lost_event) 3270 bool warn_lost_event,
3271 struct annotation_options *annotation_opts)
3261{ 3272{
3262 int nr_entries = evlist->nr_entries; 3273 int nr_entries = evlist->nr_entries;
3263 3274
@@ -3267,7 +3278,8 @@ single_entry:
3267 3278
3268 return perf_evsel__hists_browse(first, nr_entries, help, 3279 return perf_evsel__hists_browse(first, nr_entries, help,
3269 false, hbt, min_pcnt, 3280 false, hbt, min_pcnt,
3270 env, warn_lost_event); 3281 env, warn_lost_event,
3282 annotation_opts);
3271 } 3283 }
3272 3284
3273 if (symbol_conf.event_group) { 3285 if (symbol_conf.event_group) {
@@ -3285,5 +3297,6 @@ single_entry:
3285 3297
3286 return __perf_evlist__tui_browse_hists(evlist, nr_entries, help, 3298 return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
3287 hbt, min_pcnt, env, 3299 hbt, min_pcnt, env,
3288 warn_lost_event); 3300 warn_lost_event,
3301 annotation_opts);
3289} 3302}
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
index 9428bee076f2..91d3e18b50aa 100644
--- a/tools/perf/ui/browsers/hists.h
+++ b/tools/perf/ui/browsers/hists.h
@@ -4,6 +4,8 @@
4 4
5#include "ui/browser.h" 5#include "ui/browser.h"
6 6
7struct annotation_options;
8
7struct hist_browser { 9struct hist_browser {
8 struct ui_browser b; 10 struct ui_browser b;
9 struct hists *hists; 11 struct hists *hists;
@@ -12,6 +14,7 @@ struct hist_browser {
12 struct hist_browser_timer *hbt; 14 struct hist_browser_timer *hbt;
13 struct pstack *pstack; 15 struct pstack *pstack;
14 struct perf_env *env; 16 struct perf_env *env;
17 struct annotation_options *annotation_opts;
15 int print_seq; 18 int print_seq;
16 bool show_dso; 19 bool show_dso;
17 bool show_headers; 20 bool show_headers;
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index e03fa75f108a..5b8b8c637686 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -104,7 +104,7 @@ int map__browse(struct map *map)
104{ 104{
105 struct map_browser mb = { 105 struct map_browser mb = {
106 .b = { 106 .b = {
107 .entries = &map->dso->symbols[map->type], 107 .entries = &map->dso->symbols,
108 .refresh = ui_browser__rb_tree_refresh, 108 .refresh = ui_browser__rb_tree_refresh,
109 .seek = ui_browser__rb_tree_seek, 109 .seek = ui_browser__rb_tree_seek,
110 .write = map_browser__write, 110 .write = map_browser__write,
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index aeeaf15029f0..48428c9acd89 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -169,7 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
169 if (map->dso->annotate_warned) 169 if (map->dso->annotate_warned)
170 return -1; 170 return -1;
171 171
172 err = symbol__annotate(sym, map, evsel, 0, NULL); 172 err = symbol__annotate(sym, map, evsel, 0, &annotation__default_options, NULL);
173 if (err) { 173 if (err) {
174 char msg[BUFSIZ]; 174 char msg[BUFSIZ];
175 symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); 175 symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 24e1ec201ffd..4ab663ec3e5e 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -382,7 +382,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
382 gtk_tree_store_set(store, &iter, col_idx++, s, -1); 382 gtk_tree_store_set(store, &iter, col_idx++, s, -1);
383 } 383 }
384 384
385 if (symbol_conf.use_callchain && hists__has(hists, sym)) { 385 if (hist_entry__has_callchains(h) &&
386 symbol_conf.use_callchain && hists__has(hists, sym)) {
386 if (callchain_param.mode == CHAIN_GRAPH_REL) 387 if (callchain_param.mode == CHAIN_GRAPH_REL)
387 total = symbol_conf.cumulate_callchain ? 388 total = symbol_conf.cumulate_callchain ?
388 h->stat_acc->period : h->stat.period; 389 h->stat_acc->period : h->stat.period;
@@ -479,7 +480,7 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists,
479 } 480 }
480 } 481 }
481 482
482 if (symbol_conf.use_callchain && he->leaf) { 483 if (he->leaf && hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
483 if (callchain_param.mode == CHAIN_GRAPH_REL) 484 if (callchain_param.mode == CHAIN_GRAPH_REL)
484 total = symbol_conf.cumulate_callchain ? 485 total = symbol_conf.cumulate_callchain ?
485 he->stat_acc->period : he->stat.period; 486 he->stat_acc->period : he->stat.period;
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 706f6f1e9c7d..fe3dfaa64a91 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -207,7 +207,7 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
207 if (ret) 207 if (ret)
208 return ret; 208 return ret;
209 209
210 if (a->thread != b->thread || !symbol_conf.use_callchain) 210 if (a->thread != b->thread || !hist_entry__has_callchains(a) || !symbol_conf.use_callchain)
211 return 0; 211 return 0;
212 212
213 ret = b->callchain->max_depth - a->callchain->max_depth; 213 ret = b->callchain->max_depth - a->callchain->max_depth;
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 6832fcb2e6ff..69b7a28f7a1c 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -516,7 +516,7 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
516 } 516 }
517 printed += putc('\n', fp); 517 printed += putc('\n', fp);
518 518
519 if (symbol_conf.use_callchain && he->leaf) { 519 if (he->leaf && hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
520 u64 total = hists__total_period(hists); 520 u64 total = hists__total_period(hists);
521 521
522 printed += hist_entry_callchain__fprintf(he, total, 0, fp); 522 printed += hist_entry_callchain__fprintf(he, total, 0, fp);
@@ -550,7 +550,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
550 550
551 ret = fprintf(fp, "%s\n", bf); 551 ret = fprintf(fp, "%s\n", bf);
552 552
553 if (use_callchain) 553 if (hist_entry__has_callchains(he) && use_callchain)
554 callchain_ret = hist_entry_callchain__fprintf(he, total_period, 554 callchain_ret = hist_entry_callchain__fprintf(he, total_period,
555 0, fp); 555 0, fp);
556 556
@@ -819,8 +819,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
819 } 819 }
820 820
821 if (h->ms.map == NULL && verbose > 1) { 821 if (h->ms.map == NULL && verbose > 1) {
822 __map_groups__fprintf_maps(h->thread->mg, 822 map_groups__fprintf(h->thread->mg, fp);
823 MAP__FUNCTION, fp);
824 fprintf(fp, "%.10s end\n", graph_dotted_line); 823 fprintf(fp, "%.10s end\n", graph_dotted_line);
825 } 824 }
826 } 825 }
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8052373bcd6a..b604ef334dc9 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -24,7 +24,6 @@ libperf-y += libstring.o
24libperf-y += bitmap.o 24libperf-y += bitmap.o
25libperf-y += hweight.o 25libperf-y += hweight.o
26libperf-y += smt.o 26libperf-y += smt.o
27libperf-y += quote.o
28libperf-y += strbuf.o 27libperf-y += strbuf.o
29libperf-y += string.o 28libperf-y += string.o
30libperf-y += strlist.o 29libperf-y += strlist.o
@@ -152,6 +151,8 @@ libperf-y += perf-hooks.o
152libperf-$(CONFIG_CXX) += c++/ 151libperf-$(CONFIG_CXX) += c++/
153 152
154CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" 153CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
154CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
155
155# avoid compiler warnings in 32-bit mode 156# avoid compiler warnings in 32-bit mode
156CFLAGS_genelf_debug.o += -Wno-packed 157CFLAGS_genelf_debug.o += -Wno-packed
157 158
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 536ee148bff8..f91775b4bc3c 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -21,6 +21,7 @@
21#include "debug.h" 21#include "debug.h"
22#include "annotate.h" 22#include "annotate.h"
23#include "evsel.h" 23#include "evsel.h"
24#include "evlist.h"
24#include "block-range.h" 25#include "block-range.h"
25#include "string2.h" 26#include "string2.h"
26#include "arch/common.h" 27#include "arch/common.h"
@@ -46,11 +47,10 @@
46struct annotation_options annotation__default_options = { 47struct annotation_options annotation__default_options = {
47 .use_offset = true, 48 .use_offset = true,
48 .jump_arrows = true, 49 .jump_arrows = true,
50 .annotate_src = true,
49 .offset_level = ANNOTATION__OFFSET_JUMP_TARGETS, 51 .offset_level = ANNOTATION__OFFSET_JUMP_TARGETS,
50}; 52};
51 53
52const char *disassembler_style;
53const char *objdump_path;
54static regex_t file_lineno; 54static regex_t file_lineno;
55 55
56static struct ins_ops *ins__find(struct arch *arch, const char *name); 56static struct ins_ops *ins__find(struct arch *arch, const char *name);
@@ -678,10 +678,28 @@ static struct arch *arch__find(const char *name)
678 return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); 678 return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp);
679} 679}
680 680
681int symbol__alloc_hist(struct symbol *sym) 681static struct annotated_source *annotated_source__new(void)
682{
683 struct annotated_source *src = zalloc(sizeof(*src));
684
685 if (src != NULL)
686 INIT_LIST_HEAD(&src->source);
687
688 return src;
689}
690
691static __maybe_unused void annotated_source__delete(struct annotated_source *src)
692{
693 if (src == NULL)
694 return;
695 zfree(&src->histograms);
696 zfree(&src->cycles_hist);
697 free(src);
698}
699
700static int annotated_source__alloc_histograms(struct annotated_source *src,
701 size_t size, int nr_hists)
682{ 702{
683 struct annotation *notes = symbol__annotation(sym);
684 size_t size = symbol__size(sym);
685 size_t sizeof_sym_hist; 703 size_t sizeof_sym_hist;
686 704
687 /* 705 /*
@@ -701,17 +719,13 @@ int symbol__alloc_hist(struct symbol *sym)
701 sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry)); 719 sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry));
702 720
703 /* Check for overflow in zalloc argument */ 721 /* Check for overflow in zalloc argument */
704 if (sizeof_sym_hist > (SIZE_MAX - sizeof(*notes->src)) 722 if (sizeof_sym_hist > SIZE_MAX / nr_hists)
705 / symbol_conf.nr_events)
706 return -1; 723 return -1;
707 724
708 notes->src = zalloc(sizeof(*notes->src) + symbol_conf.nr_events * sizeof_sym_hist); 725 src->sizeof_sym_hist = sizeof_sym_hist;
709 if (notes->src == NULL) 726 src->nr_histograms = nr_hists;
710 return -1; 727 src->histograms = calloc(nr_hists, sizeof_sym_hist) ;
711 notes->src->sizeof_sym_hist = sizeof_sym_hist; 728 return src->histograms ? 0 : -1;
712 notes->src->nr_histograms = symbol_conf.nr_events;
713 INIT_LIST_HEAD(&notes->src->source);
714 return 0;
715} 729}
716 730
717/* The cycles histogram is lazily allocated. */ 731/* The cycles histogram is lazily allocated. */
@@ -741,14 +755,11 @@ void symbol__annotate_zero_histograms(struct symbol *sym)
741 pthread_mutex_unlock(&notes->lock); 755 pthread_mutex_unlock(&notes->lock);
742} 756}
743 757
744static int __symbol__account_cycles(struct annotation *notes, 758static int __symbol__account_cycles(struct cyc_hist *ch,
745 u64 start, 759 u64 start,
746 unsigned offset, unsigned cycles, 760 unsigned offset, unsigned cycles,
747 unsigned have_start) 761 unsigned have_start)
748{ 762{
749 struct cyc_hist *ch;
750
751 ch = notes->src->cycles_hist;
752 /* 763 /*
753 * For now we can only account one basic block per 764 * For now we can only account one basic block per
754 * final jump. But multiple could be overlapping. 765 * final jump. But multiple could be overlapping.
@@ -760,6 +771,15 @@ static int __symbol__account_cycles(struct annotation *notes,
760 ch[offset].num_aggr++; 771 ch[offset].num_aggr++;
761 ch[offset].cycles_aggr += cycles; 772 ch[offset].cycles_aggr += cycles;
762 773
774 if (cycles > ch[offset].cycles_max)
775 ch[offset].cycles_max = cycles;
776
777 if (ch[offset].cycles_min) {
778 if (cycles && cycles < ch[offset].cycles_min)
779 ch[offset].cycles_min = cycles;
780 } else
781 ch[offset].cycles_min = cycles;
782
763 if (!have_start && ch[offset].have_start) 783 if (!have_start && ch[offset].have_start)
764 return 0; 784 return 0;
765 if (ch[offset].num) { 785 if (ch[offset].num) {
@@ -782,7 +802,7 @@ static int __symbol__account_cycles(struct annotation *notes,
782} 802}
783 803
784static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, 804static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
785 struct annotation *notes, int evidx, u64 addr, 805 struct annotated_source *src, int evidx, u64 addr,
786 struct perf_sample *sample) 806 struct perf_sample *sample)
787{ 807{
788 unsigned offset; 808 unsigned offset;
@@ -798,7 +818,12 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
798 } 818 }
799 819
800 offset = addr - sym->start; 820 offset = addr - sym->start;
801 h = annotation__histogram(notes, evidx); 821 h = annotated_source__histogram(src, evidx);
822 if (h == NULL) {
823 pr_debug("%s(%d): ENOMEM! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 ", func: %d\n",
824 __func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC);
825 return -ENOMEM;
826 }
802 h->nr_samples++; 827 h->nr_samples++;
803 h->addr[offset].nr_samples++; 828 h->addr[offset].nr_samples++;
804 h->period += sample->period; 829 h->period += sample->period;
@@ -811,45 +836,69 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
811 return 0; 836 return 0;
812} 837}
813 838
814static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles) 839static struct cyc_hist *symbol__cycles_hist(struct symbol *sym)
815{ 840{
816 struct annotation *notes = symbol__annotation(sym); 841 struct annotation *notes = symbol__annotation(sym);
817 842
818 if (notes->src == NULL) { 843 if (notes->src == NULL) {
819 if (symbol__alloc_hist(sym) < 0) 844 notes->src = annotated_source__new();
845 if (notes->src == NULL)
820 return NULL; 846 return NULL;
847 goto alloc_cycles_hist;
821 } 848 }
822 if (!notes->src->cycles_hist && cycles) { 849
823 if (symbol__alloc_hist_cycles(sym) < 0) 850 if (!notes->src->cycles_hist) {
851alloc_cycles_hist:
852 symbol__alloc_hist_cycles(sym);
853 }
854
855 return notes->src->cycles_hist;
856}
857
858struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists)
859{
860 struct annotation *notes = symbol__annotation(sym);
861
862 if (notes->src == NULL) {
863 notes->src = annotated_source__new();
864 if (notes->src == NULL)
824 return NULL; 865 return NULL;
866 goto alloc_histograms;
825 } 867 }
826 return notes; 868
869 if (notes->src->histograms == NULL) {
870alloc_histograms:
871 annotated_source__alloc_histograms(notes->src, symbol__size(sym),
872 nr_hists);
873 }
874
875 return notes->src;
827} 876}
828 877
829static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, 878static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
830 int evidx, u64 addr, 879 struct perf_evsel *evsel, u64 addr,
831 struct perf_sample *sample) 880 struct perf_sample *sample)
832{ 881{
833 struct annotation *notes; 882 struct annotated_source *src;
834 883
835 if (sym == NULL) 884 if (sym == NULL)
836 return 0; 885 return 0;
837 notes = symbol__get_annotation(sym, false); 886 src = symbol__hists(sym, evsel->evlist->nr_entries);
838 if (notes == NULL) 887 if (src == NULL)
839 return -ENOMEM; 888 return -ENOMEM;
840 return __symbol__inc_addr_samples(sym, map, notes, evidx, addr, sample); 889 return __symbol__inc_addr_samples(sym, map, src, evsel->idx, addr, sample);
841} 890}
842 891
843static int symbol__account_cycles(u64 addr, u64 start, 892static int symbol__account_cycles(u64 addr, u64 start,
844 struct symbol *sym, unsigned cycles) 893 struct symbol *sym, unsigned cycles)
845{ 894{
846 struct annotation *notes; 895 struct cyc_hist *cycles_hist;
847 unsigned offset; 896 unsigned offset;
848 897
849 if (sym == NULL) 898 if (sym == NULL)
850 return 0; 899 return 0;
851 notes = symbol__get_annotation(sym, true); 900 cycles_hist = symbol__cycles_hist(sym);
852 if (notes == NULL) 901 if (cycles_hist == NULL)
853 return -ENOMEM; 902 return -ENOMEM;
854 if (addr < sym->start || addr >= sym->end) 903 if (addr < sym->start || addr >= sym->end)
855 return -ERANGE; 904 return -ERANGE;
@@ -861,7 +910,7 @@ static int symbol__account_cycles(u64 addr, u64 start,
861 start = 0; 910 start = 0;
862 } 911 }
863 offset = addr - sym->start; 912 offset = addr - sym->start;
864 return __symbol__account_cycles(notes, 913 return __symbol__account_cycles(cycles_hist,
865 start ? start - sym->start : 0, 914 start ? start - sym->start : 0,
866 offset, cycles, 915 offset, cycles,
867 !!start); 916 !!start);
@@ -953,8 +1002,11 @@ void annotation__compute_ipc(struct annotation *notes, size_t size)
953 if (ch->have_start) 1002 if (ch->have_start)
954 annotation__count_and_fill(notes, ch->start, offset, ch); 1003 annotation__count_and_fill(notes, ch->start, offset, ch);
955 al = notes->offsets[offset]; 1004 al = notes->offsets[offset];
956 if (al && ch->num_aggr) 1005 if (al && ch->num_aggr) {
957 al->cycles = ch->cycles_aggr / ch->num_aggr; 1006 al->cycles = ch->cycles_aggr / ch->num_aggr;
1007 al->cycles_max = ch->cycles_max;
1008 al->cycles_min = ch->cycles_min;
1009 }
958 notes->have_cycles = true; 1010 notes->have_cycles = true;
959 } 1011 }
960 } 1012 }
@@ -962,15 +1014,15 @@ void annotation__compute_ipc(struct annotation *notes, size_t size)
962} 1014}
963 1015
964int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, 1016int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
965 int evidx) 1017 struct perf_evsel *evsel)
966{ 1018{
967 return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr, sample); 1019 return symbol__inc_addr_samples(ams->sym, ams->map, evsel, ams->al_addr, sample);
968} 1020}
969 1021
970int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample, 1022int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
971 int evidx, u64 ip) 1023 struct perf_evsel *evsel, u64 ip)
972{ 1024{
973 return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip, sample); 1025 return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evsel, ip, sample);
974} 1026}
975 1027
976static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms) 1028static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms)
@@ -1019,6 +1071,7 @@ struct annotate_args {
1019 struct arch *arch; 1071 struct arch *arch;
1020 struct map_symbol ms; 1072 struct map_symbol ms;
1021 struct perf_evsel *evsel; 1073 struct perf_evsel *evsel;
1074 struct annotation_options *options;
1022 s64 offset; 1075 s64 offset;
1023 char *line; 1076 char *line;
1024 int line_nr; 1077 int line_nr;
@@ -1263,6 +1316,9 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
1263 max_percent = sample->percent; 1316 max_percent = sample->percent;
1264 } 1317 }
1265 1318
1319 if (al->samples_nr > nr_percent)
1320 nr_percent = al->samples_nr;
1321
1266 if (max_percent < min_pcnt) 1322 if (max_percent < min_pcnt)
1267 return -1; 1323 return -1;
1268 1324
@@ -1557,6 +1613,7 @@ fallback:
1557 1613
1558static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) 1614static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
1559{ 1615{
1616 struct annotation_options *opts = args->options;
1560 struct map *map = args->ms.map; 1617 struct map *map = args->ms.map;
1561 struct dso *dso = map->dso; 1618 struct dso *dso = map->dso;
1562 char *command; 1619 char *command;
@@ -1604,13 +1661,13 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
1604 "%s %s%s --start-address=0x%016" PRIx64 1661 "%s %s%s --start-address=0x%016" PRIx64
1605 " --stop-address=0x%016" PRIx64 1662 " --stop-address=0x%016" PRIx64
1606 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand", 1663 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
1607 objdump_path ? objdump_path : "objdump", 1664 opts->objdump_path ?: "objdump",
1608 disassembler_style ? "-M " : "", 1665 opts->disassembler_style ? "-M " : "",
1609 disassembler_style ? disassembler_style : "", 1666 opts->disassembler_style ?: "",
1610 map__rip_2objdump(map, sym->start), 1667 map__rip_2objdump(map, sym->start),
1611 map__rip_2objdump(map, sym->end), 1668 map__rip_2objdump(map, sym->end),
1612 symbol_conf.annotate_asm_raw ? "" : "--no-show-raw", 1669 opts->show_asm_raw ? "" : "--no-show-raw",
1613 symbol_conf.annotate_src ? "-S" : "", 1670 opts->annotate_src ? "-S" : "",
1614 symfs_filename, symfs_filename); 1671 symfs_filename, symfs_filename);
1615 1672
1616 if (err < 0) { 1673 if (err < 0) {
@@ -1752,11 +1809,13 @@ void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel)
1752 1809
1753int symbol__annotate(struct symbol *sym, struct map *map, 1810int symbol__annotate(struct symbol *sym, struct map *map,
1754 struct perf_evsel *evsel, size_t privsize, 1811 struct perf_evsel *evsel, size_t privsize,
1812 struct annotation_options *options,
1755 struct arch **parch) 1813 struct arch **parch)
1756{ 1814{
1757 struct annotate_args args = { 1815 struct annotate_args args = {
1758 .privsize = privsize, 1816 .privsize = privsize,
1759 .evsel = evsel, 1817 .evsel = evsel,
1818 .options = options,
1760 }; 1819 };
1761 struct perf_env *env = perf_evsel__env(evsel); 1820 struct perf_env *env = perf_evsel__env(evsel);
1762 const char *arch_name = perf_env__arch(env); 1821 const char *arch_name = perf_env__arch(env);
@@ -1934,8 +1993,8 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start)
1934} 1993}
1935 1994
1936int symbol__annotate_printf(struct symbol *sym, struct map *map, 1995int symbol__annotate_printf(struct symbol *sym, struct map *map,
1937 struct perf_evsel *evsel, bool full_paths, 1996 struct perf_evsel *evsel,
1938 int min_pcnt, int max_lines, int context) 1997 struct annotation_options *opts)
1939{ 1998{
1940 struct dso *dso = map->dso; 1999 struct dso *dso = map->dso;
1941 char *filename; 2000 char *filename;
@@ -1947,23 +2006,28 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
1947 u64 start = map__rip_2objdump(map, sym->start); 2006 u64 start = map__rip_2objdump(map, sym->start);
1948 int printed = 2, queue_len = 0, addr_fmt_width; 2007 int printed = 2, queue_len = 0, addr_fmt_width;
1949 int more = 0; 2008 int more = 0;
2009 bool context = opts->context;
1950 u64 len; 2010 u64 len;
1951 int width = symbol_conf.show_total_period ? 12 : 8; 2011 int width = symbol_conf.show_total_period ? 12 : 8;
1952 int graph_dotted_len; 2012 int graph_dotted_len;
2013 char buf[512];
1953 2014
1954 filename = strdup(dso->long_name); 2015 filename = strdup(dso->long_name);
1955 if (!filename) 2016 if (!filename)
1956 return -ENOMEM; 2017 return -ENOMEM;
1957 2018
1958 if (full_paths) 2019 if (opts->full_path)
1959 d_filename = filename; 2020 d_filename = filename;
1960 else 2021 else
1961 d_filename = basename(filename); 2022 d_filename = basename(filename);
1962 2023
1963 len = symbol__size(sym); 2024 len = symbol__size(sym);
1964 2025
1965 if (perf_evsel__is_group_event(evsel)) 2026 if (perf_evsel__is_group_event(evsel)) {
1966 width *= evsel->nr_members; 2027 width *= evsel->nr_members;
2028 perf_evsel__group_desc(evsel, buf, sizeof(buf));
2029 evsel_name = buf;
2030 }
1967 2031
1968 graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n", 2032 graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n",
1969 width, width, symbol_conf.show_total_period ? "Period" : 2033 width, width, symbol_conf.show_total_period ? "Period" :
@@ -1987,7 +2051,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
1987 } 2051 }
1988 2052
1989 err = annotation_line__print(pos, sym, start, evsel, len, 2053 err = annotation_line__print(pos, sym, start, evsel, len,
1990 min_pcnt, printed, max_lines, 2054 opts->min_pcnt, printed, opts->max_lines,
1991 queue, addr_fmt_width); 2055 queue, addr_fmt_width);
1992 2056
1993 switch (err) { 2057 switch (err) {
@@ -2320,20 +2384,19 @@ static void symbol__calc_lines(struct symbol *sym, struct map *map,
2320} 2384}
2321 2385
2322int symbol__tty_annotate2(struct symbol *sym, struct map *map, 2386int symbol__tty_annotate2(struct symbol *sym, struct map *map,
2323 struct perf_evsel *evsel, bool print_lines, 2387 struct perf_evsel *evsel,
2324 bool full_paths) 2388 struct annotation_options *opts)
2325{ 2389{
2326 struct dso *dso = map->dso; 2390 struct dso *dso = map->dso;
2327 struct rb_root source_line = RB_ROOT; 2391 struct rb_root source_line = RB_ROOT;
2328 struct annotation_options opts = annotation__default_options;
2329 struct annotation *notes = symbol__annotation(sym); 2392 struct annotation *notes = symbol__annotation(sym);
2330 char buf[1024]; 2393 char buf[1024];
2331 2394
2332 if (symbol__annotate2(sym, map, evsel, &opts, NULL) < 0) 2395 if (symbol__annotate2(sym, map, evsel, opts, NULL) < 0)
2333 return -1; 2396 return -1;
2334 2397
2335 if (print_lines) { 2398 if (opts->print_lines) {
2336 srcline_full_filename = full_paths; 2399 srcline_full_filename = opts->full_path;
2337 symbol__calc_lines(sym, map, &source_line); 2400 symbol__calc_lines(sym, map, &source_line);
2338 print_summary(&source_line, dso->long_name); 2401 print_summary(&source_line, dso->long_name);
2339 } 2402 }
@@ -2348,25 +2411,24 @@ int symbol__tty_annotate2(struct symbol *sym, struct map *map,
2348} 2411}
2349 2412
2350int symbol__tty_annotate(struct symbol *sym, struct map *map, 2413int symbol__tty_annotate(struct symbol *sym, struct map *map,
2351 struct perf_evsel *evsel, bool print_lines, 2414 struct perf_evsel *evsel,
2352 bool full_paths, int min_pcnt, int max_lines) 2415 struct annotation_options *opts)
2353{ 2416{
2354 struct dso *dso = map->dso; 2417 struct dso *dso = map->dso;
2355 struct rb_root source_line = RB_ROOT; 2418 struct rb_root source_line = RB_ROOT;
2356 2419
2357 if (symbol__annotate(sym, map, evsel, 0, NULL) < 0) 2420 if (symbol__annotate(sym, map, evsel, 0, opts, NULL) < 0)
2358 return -1; 2421 return -1;
2359 2422
2360 symbol__calc_percent(sym, evsel); 2423 symbol__calc_percent(sym, evsel);
2361 2424
2362 if (print_lines) { 2425 if (opts->print_lines) {
2363 srcline_full_filename = full_paths; 2426 srcline_full_filename = opts->full_path;
2364 symbol__calc_lines(sym, map, &source_line); 2427 symbol__calc_lines(sym, map, &source_line);
2365 print_summary(&source_line, dso->long_name); 2428 print_summary(&source_line, dso->long_name);
2366 } 2429 }
2367 2430
2368 symbol__annotate_printf(sym, map, evsel, full_paths, 2431 symbol__annotate_printf(sym, map, evsel, opts);
2369 min_pcnt, max_lines, 0);
2370 2432
2371 annotated_source__purge(symbol__annotation(sym)->src); 2433 annotated_source__purge(symbol__annotation(sym)->src);
2372 2434
@@ -2483,13 +2545,38 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
2483 else 2545 else
2484 obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC"); 2546 obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC");
2485 2547
2486 if (al->cycles) 2548 if (!notes->options->show_minmax_cycle) {
2487 obj__printf(obj, "%*" PRIu64 " ", 2549 if (al->cycles)
2550 obj__printf(obj, "%*" PRIu64 " ",
2488 ANNOTATION__CYCLES_WIDTH - 1, al->cycles); 2551 ANNOTATION__CYCLES_WIDTH - 1, al->cycles);
2489 else if (!show_title) 2552 else if (!show_title)
2490 obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " "); 2553 obj__printf(obj, "%*s",
2491 else 2554 ANNOTATION__CYCLES_WIDTH, " ");
2492 obj__printf(obj, "%*s ", ANNOTATION__CYCLES_WIDTH - 1, "Cycle"); 2555 else
2556 obj__printf(obj, "%*s ",
2557 ANNOTATION__CYCLES_WIDTH - 1,
2558 "Cycle");
2559 } else {
2560 if (al->cycles) {
2561 char str[32];
2562
2563 scnprintf(str, sizeof(str),
2564 "%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")",
2565 al->cycles, al->cycles_min,
2566 al->cycles_max);
2567
2568 obj__printf(obj, "%*s ",
2569 ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
2570 str);
2571 } else if (!show_title)
2572 obj__printf(obj, "%*s",
2573 ANNOTATION__MINMAX_CYCLES_WIDTH,
2574 " ");
2575 else
2576 obj__printf(obj, "%*s ",
2577 ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
2578 "Cycle(min/max)");
2579 }
2493 } 2580 }
2494 2581
2495 obj__printf(obj, " "); 2582 obj__printf(obj, " ");
@@ -2576,7 +2663,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *ev
2576 if (perf_evsel__is_group_event(evsel)) 2663 if (perf_evsel__is_group_event(evsel))
2577 nr_pcnt = evsel->nr_members; 2664 nr_pcnt = evsel->nr_members;
2578 2665
2579 err = symbol__annotate(sym, map, evsel, 0, parch); 2666 err = symbol__annotate(sym, map, evsel, 0, options, parch);
2580 if (err) 2667 if (err)
2581 goto out_free_offsets; 2668 goto out_free_offsets;
2582 2669
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index f28a9e43421d..a4c0d91907e6 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -61,16 +61,27 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
61 61
62#define ANNOTATION__IPC_WIDTH 6 62#define ANNOTATION__IPC_WIDTH 6
63#define ANNOTATION__CYCLES_WIDTH 6 63#define ANNOTATION__CYCLES_WIDTH 6
64#define ANNOTATION__MINMAX_CYCLES_WIDTH 19
64 65
65struct annotation_options { 66struct annotation_options {
66 bool hide_src_code, 67 bool hide_src_code,
67 use_offset, 68 use_offset,
68 jump_arrows, 69 jump_arrows,
70 print_lines,
71 full_path,
69 show_linenr, 72 show_linenr,
70 show_nr_jumps, 73 show_nr_jumps,
71 show_nr_samples, 74 show_nr_samples,
72 show_total_period; 75 show_total_period,
76 show_minmax_cycle,
77 show_asm_raw,
78 annotate_src;
73 u8 offset_level; 79 u8 offset_level;
80 int min_pcnt;
81 int max_lines;
82 int context;
83 const char *objdump_path;
84 const char *disassembler_style;
74}; 85};
75 86
76enum { 87enum {
@@ -105,6 +116,8 @@ struct annotation_line {
105 int jump_sources; 116 int jump_sources;
106 float ipc; 117 float ipc;
107 u64 cycles; 118 u64 cycles;
119 u64 cycles_max;
120 u64 cycles_min;
108 size_t privsize; 121 size_t privsize;
109 char *path; 122 char *path;
110 u32 idx; 123 u32 idx;
@@ -186,6 +199,8 @@ struct cyc_hist {
186 u64 start; 199 u64 start;
187 u64 cycles; 200 u64 cycles;
188 u64 cycles_aggr; 201 u64 cycles_aggr;
202 u64 cycles_max;
203 u64 cycles_min;
189 u32 num; 204 u32 num;
190 u32 num_aggr; 205 u32 num_aggr;
191 u8 have_start; 206 u8 have_start;
@@ -195,7 +210,11 @@ struct cyc_hist {
195 210
196/** struct annotated_source - symbols with hits have this attached as in sannotation 211/** struct annotated_source - symbols with hits have this attached as in sannotation
197 * 212 *
198 * @histogram: Array of addr hit histograms per event being monitored 213 * @histograms: Array of addr hit histograms per event being monitored
214 * nr_histograms: This may not be the same as evsel->evlist->nr_entries if
215 * we have more than a group in a evlist, where we will want
216 * to see each group separately, that is why symbol__annotate2()
217 * sets src->nr_histograms to evsel->nr_members.
199 * @lines: If 'print_lines' is specified, per source code line percentages 218 * @lines: If 'print_lines' is specified, per source code line percentages
200 * @source: source parsed from a disassembler like objdump -dS 219 * @source: source parsed from a disassembler like objdump -dS
201 * @cyc_hist: Average cycles per basic block 220 * @cyc_hist: Average cycles per basic block
@@ -211,7 +230,7 @@ struct annotated_source {
211 int nr_histograms; 230 int nr_histograms;
212 size_t sizeof_sym_hist; 231 size_t sizeof_sym_hist;
213 struct cyc_hist *cycles_hist; 232 struct cyc_hist *cycles_hist;
214 struct sym_hist histograms[0]; 233 struct sym_hist *histograms;
215}; 234};
216 235
217struct annotation { 236struct annotation {
@@ -239,6 +258,9 @@ struct annotation {
239 258
240static inline int annotation__cycles_width(struct annotation *notes) 259static inline int annotation__cycles_width(struct annotation *notes)
241{ 260{
261 if (notes->have_cycles && notes->options->show_minmax_cycle)
262 return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH;
263
242 return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; 264 return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
243} 265}
244 266
@@ -258,10 +280,14 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
258void annotation__update_column_widths(struct annotation *notes); 280void annotation__update_column_widths(struct annotation *notes);
259void annotation__init_column_widths(struct annotation *notes, struct symbol *sym); 281void annotation__init_column_widths(struct annotation *notes, struct symbol *sym);
260 282
283static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx)
284{
285 return ((void *)src->histograms) + (src->sizeof_sym_hist * idx);
286}
287
261static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx) 288static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
262{ 289{
263 return (((void *)&notes->src->histograms) + 290 return annotated_source__histogram(notes->src, idx);
264 (notes->src->sizeof_sym_hist * idx));
265} 291}
266 292
267static inline struct annotation *symbol__annotation(struct symbol *sym) 293static inline struct annotation *symbol__annotation(struct symbol *sym)
@@ -270,20 +296,21 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
270} 296}
271 297
272int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, 298int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
273 int evidx); 299 struct perf_evsel *evsel);
274 300
275int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, 301int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
276 struct addr_map_symbol *start, 302 struct addr_map_symbol *start,
277 unsigned cycles); 303 unsigned cycles);
278 304
279int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample, 305int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
280 int evidx, u64 addr); 306 struct perf_evsel *evsel, u64 addr);
281 307
282int symbol__alloc_hist(struct symbol *sym); 308struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists);
283void symbol__annotate_zero_histograms(struct symbol *sym); 309void symbol__annotate_zero_histograms(struct symbol *sym);
284 310
285int symbol__annotate(struct symbol *sym, struct map *map, 311int symbol__annotate(struct symbol *sym, struct map *map,
286 struct perf_evsel *evsel, size_t privsize, 312 struct perf_evsel *evsel, size_t privsize,
313 struct annotation_options *options,
287 struct arch **parch); 314 struct arch **parch);
288int symbol__annotate2(struct symbol *sym, struct map *map, 315int symbol__annotate2(struct symbol *sym, struct map *map,
289 struct perf_evsel *evsel, 316 struct perf_evsel *evsel,
@@ -311,8 +338,8 @@ int symbol__strerror_disassemble(struct symbol *sym, struct map *map,
311 int errnum, char *buf, size_t buflen); 338 int errnum, char *buf, size_t buflen);
312 339
313int symbol__annotate_printf(struct symbol *sym, struct map *map, 340int symbol__annotate_printf(struct symbol *sym, struct map *map,
314 struct perf_evsel *evsel, bool full_paths, 341 struct perf_evsel *evsel,
315 int min_pcnt, int max_lines, int context); 342 struct annotation_options *options);
316int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp); 343int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp);
317void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); 344void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
318void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); 345void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
@@ -323,30 +350,27 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel)
323bool ui__has_annotation(void); 350bool ui__has_annotation(void);
324 351
325int symbol__tty_annotate(struct symbol *sym, struct map *map, 352int symbol__tty_annotate(struct symbol *sym, struct map *map,
326 struct perf_evsel *evsel, bool print_lines, 353 struct perf_evsel *evsel, struct annotation_options *opts);
327 bool full_paths, int min_pcnt, int max_lines);
328 354
329int symbol__tty_annotate2(struct symbol *sym, struct map *map, 355int symbol__tty_annotate2(struct symbol *sym, struct map *map,
330 struct perf_evsel *evsel, bool print_lines, 356 struct perf_evsel *evsel, struct annotation_options *opts);
331 bool full_paths);
332 357
333#ifdef HAVE_SLANG_SUPPORT 358#ifdef HAVE_SLANG_SUPPORT
334int symbol__tui_annotate(struct symbol *sym, struct map *map, 359int symbol__tui_annotate(struct symbol *sym, struct map *map,
335 struct perf_evsel *evsel, 360 struct perf_evsel *evsel,
336 struct hist_browser_timer *hbt); 361 struct hist_browser_timer *hbt,
362 struct annotation_options *opts);
337#else 363#else
338static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused, 364static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused,
339 struct map *map __maybe_unused, 365 struct map *map __maybe_unused,
340 struct perf_evsel *evsel __maybe_unused, 366 struct perf_evsel *evsel __maybe_unused,
341 struct hist_browser_timer *hbt 367 struct hist_browser_timer *hbt __maybe_unused,
342 __maybe_unused) 368 struct annotation_options *opts __maybe_unused)
343{ 369{
344 return 0; 370 return 0;
345} 371}
346#endif 372#endif
347 373
348extern const char *disassembler_style;
349
350void annotation_config__init(void); 374void annotation_config__init(void);
351 375
352#endif /* __PERF_ANNOTATE_H */ 376#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 857de69a5361..d056447520a2 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1679,7 +1679,7 @@ struct sym_args {
1679static bool kern_sym_match(struct sym_args *args, const char *name, char type) 1679static bool kern_sym_match(struct sym_args *args, const char *name, char type)
1680{ 1680{
1681 /* A function with the same name, and global or the n'th found or any */ 1681 /* A function with the same name, and global or the n'th found or any */
1682 return symbol_type__is_a(type, MAP__FUNCTION) && 1682 return kallsyms__is_function(type) &&
1683 !strcmp(name, args->name) && 1683 !strcmp(name, args->name) &&
1684 ((args->global && isupper(type)) || 1684 ((args->global && isupper(type)) ||
1685 (args->selected && ++(args->cnt) == args->idx) || 1685 (args->selected && ++(args->cnt) == args->idx) ||
@@ -1784,7 +1784,7 @@ static int find_entire_kern_cb(void *arg, const char *name __maybe_unused,
1784{ 1784{
1785 struct sym_args *args = arg; 1785 struct sym_args *args = arg;
1786 1786
1787 if (!symbol_type__is_a(type, MAP__FUNCTION)) 1787 if (!kallsyms__is_function(type))
1788 return 0; 1788 return 0;
1789 1789
1790 if (!args->started) { 1790 if (!args->started) {
@@ -1915,7 +1915,7 @@ static void print_duplicate_syms(struct dso *dso, const char *sym_name)
1915 1915
1916 pr_err("Multiple symbols with name '%s'\n", sym_name); 1916 pr_err("Multiple symbols with name '%s'\n", sym_name);
1917 1917
1918 sym = dso__first_symbol(dso, MAP__FUNCTION); 1918 sym = dso__first_symbol(dso);
1919 while (sym) { 1919 while (sym) {
1920 if (dso_sym_match(sym, sym_name, &cnt, -1)) { 1920 if (dso_sym_match(sym, sym_name, &cnt, -1)) {
1921 pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n", 1921 pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
@@ -1945,7 +1945,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
1945 *start = 0; 1945 *start = 0;
1946 *size = 0; 1946 *size = 0;
1947 1947
1948 sym = dso__first_symbol(dso, MAP__FUNCTION); 1948 sym = dso__first_symbol(dso);
1949 while (sym) { 1949 while (sym) {
1950 if (*start) { 1950 if (*start) {
1951 if (!*size) 1951 if (!*size)
@@ -1972,8 +1972,8 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
1972 1972
1973static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso) 1973static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso)
1974{ 1974{
1975 struct symbol *first_sym = dso__first_symbol(dso, MAP__FUNCTION); 1975 struct symbol *first_sym = dso__first_symbol(dso);
1976 struct symbol *last_sym = dso__last_symbol(dso, MAP__FUNCTION); 1976 struct symbol *last_sym = dso__last_symbol(dso);
1977 1977
1978 if (!first_sym || !last_sym) { 1978 if (!first_sym || !last_sym) {
1979 pr_err("Failed to determine filter for %s\nNo symbols found.\n", 1979 pr_err("Failed to determine filter for %s\nNo symbols found.\n",
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index af7ad814b2c3..cee658733e2c 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -66,7 +66,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
66 } 66 }
67 67
68 obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name); 68 obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name);
69 if (IS_ERR(obj)) { 69 if (IS_ERR_OR_NULL(obj)) {
70 pr_debug("bpf: failed to load buffer\n"); 70 pr_debug("bpf: failed to load buffer\n");
71 return ERR_PTR(-EINVAL); 71 return ERR_PTR(-EINVAL);
72 } 72 }
@@ -102,14 +102,14 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
102 pr_debug("bpf: successfull builtin compilation\n"); 102 pr_debug("bpf: successfull builtin compilation\n");
103 obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename); 103 obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
104 104
105 if (!IS_ERR(obj) && llvm_param.dump_obj) 105 if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)
106 llvm__dump_obj(filename, obj_buf, obj_buf_sz); 106 llvm__dump_obj(filename, obj_buf, obj_buf_sz);
107 107
108 free(obj_buf); 108 free(obj_buf);
109 } else 109 } else
110 obj = bpf_object__open(filename); 110 obj = bpf_object__open(filename);
111 111
112 if (IS_ERR(obj)) { 112 if (IS_ERR_OR_NULL(obj)) {
113 pr_debug("bpf: failed to load %s\n", filename); 113 pr_debug("bpf: failed to load %s\n", filename);
114 return obj; 114 return obj;
115 } 115 }
diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c
index 29347756b0af..77e4891e17b0 100644
--- a/tools/perf/util/bpf-prologue.c
+++ b/tools/perf/util/bpf-prologue.c
@@ -61,7 +61,7 @@ check_pos(struct bpf_insn_pos *pos)
61 61
62/* 62/*
63 * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see 63 * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see
64 * Documentation/trace/kprobetrace.txt) to size field of BPF_LDX_MEM 64 * Documentation/trace/kprobetrace.rst) to size field of BPF_LDX_MEM
65 * instruction (BPF_{B,H,W,DW}). 65 * instruction (BPF_{B,H,W,DW}).
66 */ 66 */
67static int 67static int
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 537eadd81914..04b1d53e4bf9 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -47,9 +47,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
47 return -1; 47 return -1;
48 } 48 }
49 49
50 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); 50 if (thread__find_map(thread, sample->cpumode, sample->ip, &al))
51
52 if (al.map != NULL)
53 al.map->dso->hit = 1; 51 al.map->dso->hit = 1;
54 52
55 thread__put(thread); 53 thread__put(thread);
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index decb91f9da82..ccd02634a616 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -93,20 +93,17 @@ static int open_cgroup(const char *name)
93static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, const char *str) 93static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, const char *str)
94{ 94{
95 struct perf_evsel *counter; 95 struct perf_evsel *counter;
96 struct cgroup *cgrp = NULL;
97 /* 96 /*
98 * check if cgrp is already defined, if so we reuse it 97 * check if cgrp is already defined, if so we reuse it
99 */ 98 */
100 evlist__for_each_entry(evlist, counter) { 99 evlist__for_each_entry(evlist, counter) {
101 if (!counter->cgrp) 100 if (!counter->cgrp)
102 continue; 101 continue;
103 if (!strcmp(counter->cgrp->name, str)) { 102 if (!strcmp(counter->cgrp->name, str))
104 cgrp = cgroup__get(counter->cgrp); 103 return cgroup__get(counter->cgrp);
105 break;
106 }
107 } 104 }
108 105
109 return cgrp; 106 return NULL;
110} 107}
111 108
112static struct cgroup *cgroup__new(const char *name) 109static struct cgroup *cgroup__new(const char *name)
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 84eb9393c7db..5ac157056cdf 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -707,6 +707,14 @@ struct perf_config_set *perf_config_set__new(void)
707 return set; 707 return set;
708} 708}
709 709
710static int perf_config__init(void)
711{
712 if (config_set == NULL)
713 config_set = perf_config_set__new();
714
715 return config_set == NULL;
716}
717
710int perf_config(config_fn_t fn, void *data) 718int perf_config(config_fn_t fn, void *data)
711{ 719{
712 int ret = 0; 720 int ret = 0;
@@ -714,7 +722,7 @@ int perf_config(config_fn_t fn, void *data)
714 struct perf_config_section *section; 722 struct perf_config_section *section;
715 struct perf_config_item *item; 723 struct perf_config_item *item;
716 724
717 if (config_set == NULL) 725 if (config_set == NULL && perf_config__init())
718 return -1; 726 return -1;
719 727
720 perf_config_set__for_each_entry(config_set, section, item) { 728 perf_config_set__for_each_entry(config_set, section, item) {
@@ -735,12 +743,6 @@ int perf_config(config_fn_t fn, void *data)
735 return ret; 743 return ret;
736} 744}
737 745
738void perf_config__init(void)
739{
740 if (config_set == NULL)
741 config_set = perf_config_set__new();
742}
743
744void perf_config__exit(void) 746void perf_config__exit(void)
745{ 747{
746 perf_config_set__delete(config_set); 748 perf_config_set__delete(config_set);
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index baf82bf227ac..bd0a5897c76a 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -38,7 +38,6 @@ struct perf_config_set *perf_config_set__new(void);
38void perf_config_set__delete(struct perf_config_set *set); 38void perf_config_set__delete(struct perf_config_set *set);
39int perf_config_set__collect(struct perf_config_set *set, const char *file_name, 39int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
40 const char *var, const char *value); 40 const char *var, const char *value);
41void perf_config__init(void);
42void perf_config__exit(void); 41void perf_config__exit(void);
43void perf_config__refresh(void); 42void perf_config__refresh(void);
44 43
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index c8b98fa22997..4d5fc374e730 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -96,11 +96,19 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
96 /* Nothing to do, might as well just return */ 96 /* Nothing to do, might as well just return */
97 if (decoder->packet_count == 0) 97 if (decoder->packet_count == 0)
98 return 0; 98 return 0;
99 /*
100 * The queueing process in function cs_etm_decoder__buffer_packet()
101 * increments the tail *before* using it. This is somewhat counter
102 * intuitive but it has the advantage of centralizing tail management
103 * at a single location. Because of that we need to follow the same
104 * heuristic with the head, i.e we increment it before using its
105 * value. Otherwise the first element of the packet queue is not
106 * used.
107 */
108 decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
99 109
100 *packet = decoder->packet_buffer[decoder->head]; 110 *packet = decoder->packet_buffer[decoder->head];
101 111
102 decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
103
104 decoder->packet_count--; 112 decoder->packet_count--;
105 113
106 return 1; 114 return 1;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 40020b1ca54f..822ba915d144 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -239,6 +239,7 @@ static void cs_etm__free(struct perf_session *session)
239 for (i = 0; i < aux->num_cpu; i++) 239 for (i = 0; i < aux->num_cpu; i++)
240 zfree(&aux->metadata[i]); 240 zfree(&aux->metadata[i]);
241 241
242 thread__zput(aux->unknown_thread);
242 zfree(&aux->metadata); 243 zfree(&aux->metadata);
243 zfree(&aux); 244 zfree(&aux);
244} 245}
@@ -269,9 +270,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
269 thread = etmq->etm->unknown_thread; 270 thread = etmq->etm->unknown_thread;
270 } 271 }
271 272
272 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, address, &al); 273 if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso)
273
274 if (!al.map || !al.map->dso)
275 return 0; 274 return 0;
276 275
277 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && 276 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
@@ -612,8 +611,8 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
612 return buff->len; 611 return buff->len;
613} 612}
614 613
615static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, 614static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
616 struct auxtrace_queue *queue) 615 struct auxtrace_queue *queue)
617{ 616{
618 struct cs_etm_queue *etmq = queue->priv; 617 struct cs_etm_queue *etmq = queue->priv;
619 618
@@ -1357,6 +1356,23 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
1357 etm->auxtrace.free = cs_etm__free; 1356 etm->auxtrace.free = cs_etm__free;
1358 session->auxtrace = &etm->auxtrace; 1357 session->auxtrace = &etm->auxtrace;
1359 1358
1359 etm->unknown_thread = thread__new(999999999, 999999999);
1360 if (!etm->unknown_thread)
1361 goto err_free_queues;
1362
1363 /*
1364 * Initialize list node so that at thread__zput() we can avoid
1365 * segmentation fault at list_del_init().
1366 */
1367 INIT_LIST_HEAD(&etm->unknown_thread->node);
1368
1369 err = thread__set_comm(etm->unknown_thread, "unknown", 0);
1370 if (err)
1371 goto err_delete_thread;
1372
1373 if (thread__init_map_groups(etm->unknown_thread, etm->machine))
1374 goto err_delete_thread;
1375
1360 if (dump_trace) { 1376 if (dump_trace) {
1361 cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); 1377 cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
1362 return 0; 1378 return 0;
@@ -1371,16 +1387,18 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
1371 1387
1372 err = cs_etm__synth_events(etm, session); 1388 err = cs_etm__synth_events(etm, session);
1373 if (err) 1389 if (err)
1374 goto err_free_queues; 1390 goto err_delete_thread;
1375 1391
1376 err = auxtrace_queues__process_index(&etm->queues, session); 1392 err = auxtrace_queues__process_index(&etm->queues, session);
1377 if (err) 1393 if (err)
1378 goto err_free_queues; 1394 goto err_delete_thread;
1379 1395
1380 etm->data_queued = etm->queues.populated; 1396 etm->data_queued = etm->queues.populated;
1381 1397
1382 return 0; 1398 return 0;
1383 1399
1400err_delete_thread:
1401 thread__zput(etm->unknown_thread);
1384err_free_queues: 1402err_free_queues:
1385 auxtrace_queues__free(&etm->queues); 1403 auxtrace_queues__free(&etm->queues);
1386 session->auxtrace = NULL; 1404 session->auxtrace = NULL;
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index b0c2b5c5d337..7123746edcf4 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -247,9 +247,9 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al,
247 *dso_db_id = dso->db_id; 247 *dso_db_id = dso->db_id;
248 248
249 if (!al->sym) { 249 if (!al->sym) {
250 al->sym = symbol__new(al->addr, 0, 0, "unknown"); 250 al->sym = symbol__new(al->addr, 0, 0, 0, "unknown");
251 if (al->sym) 251 if (al->sym)
252 dso__insert_symbol(dso, al->map->type, al->sym); 252 dso__insert_symbol(dso, al->sym);
253 } 253 }
254 254
255 if (al->sym) { 255 if (al->sym) {
@@ -315,8 +315,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe,
315 al.addr = node->ip; 315 al.addr = node->ip;
316 316
317 if (al.map && !al.sym) 317 if (al.map && !al.sym)
318 al.sym = dso__find_symbol(al.map->dso, MAP__FUNCTION, 318 al.sym = dso__find_symbol(al.map->dso, al.addr);
319 al.addr);
320 319
321 db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset); 320 db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset);
322 321
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 36ef45b2e89d..51cf82cf1882 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -354,6 +354,8 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
354 if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) || 354 if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) ||
355 (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) || 355 (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) ||
356 (strncmp(name, "[vdso]", 6) == 0) || 356 (strncmp(name, "[vdso]", 6) == 0) ||
357 (strncmp(name, "[vdso32]", 8) == 0) ||
358 (strncmp(name, "[vdsox32]", 9) == 0) ||
357 (strncmp(name, "[vsyscall]", 10) == 0)) { 359 (strncmp(name, "[vsyscall]", 10) == 0)) {
358 m->kmod = false; 360 m->kmod = false;
359 361
@@ -1014,7 +1016,7 @@ struct map *dso__new_map(const char *name)
1014 struct dso *dso = dso__new(name); 1016 struct dso *dso = dso__new(name);
1015 1017
1016 if (dso) 1018 if (dso)
1017 map = map__new2(0, dso, MAP__FUNCTION); 1019 map = map__new2(0, dso);
1018 1020
1019 return map; 1021 return map;
1020} 1022}
@@ -1176,19 +1178,19 @@ int dso__name_len(const struct dso *dso)
1176 return dso->short_name_len; 1178 return dso->short_name_len;
1177} 1179}
1178 1180
1179bool dso__loaded(const struct dso *dso, enum map_type type) 1181bool dso__loaded(const struct dso *dso)
1180{ 1182{
1181 return dso->loaded & (1 << type); 1183 return dso->loaded;
1182} 1184}
1183 1185
1184bool dso__sorted_by_name(const struct dso *dso, enum map_type type) 1186bool dso__sorted_by_name(const struct dso *dso)
1185{ 1187{
1186 return dso->sorted_by_name & (1 << type); 1188 return dso->sorted_by_name;
1187} 1189}
1188 1190
1189void dso__set_sorted_by_name(struct dso *dso, enum map_type type) 1191void dso__set_sorted_by_name(struct dso *dso)
1190{ 1192{
1191 dso->sorted_by_name |= (1 << type); 1193 dso->sorted_by_name = true;
1192} 1194}
1193 1195
1194struct dso *dso__new(const char *name) 1196struct dso *dso__new(const char *name)
@@ -1196,12 +1198,10 @@ struct dso *dso__new(const char *name)
1196 struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1); 1198 struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1);
1197 1199
1198 if (dso != NULL) { 1200 if (dso != NULL) {
1199 int i;
1200 strcpy(dso->name, name); 1201 strcpy(dso->name, name);
1201 dso__set_long_name(dso, dso->name, false); 1202 dso__set_long_name(dso, dso->name, false);
1202 dso__set_short_name(dso, dso->name, false); 1203 dso__set_short_name(dso, dso->name, false);
1203 for (i = 0; i < MAP__NR_TYPES; ++i) 1204 dso->symbols = dso->symbol_names = RB_ROOT;
1204 dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
1205 dso->data.cache = RB_ROOT; 1205 dso->data.cache = RB_ROOT;
1206 dso->inlined_nodes = RB_ROOT; 1206 dso->inlined_nodes = RB_ROOT;
1207 dso->srclines = RB_ROOT; 1207 dso->srclines = RB_ROOT;
@@ -1231,8 +1231,6 @@ struct dso *dso__new(const char *name)
1231 1231
1232void dso__delete(struct dso *dso) 1232void dso__delete(struct dso *dso)
1233{ 1233{
1234 int i;
1235
1236 if (!RB_EMPTY_NODE(&dso->rb_node)) 1234 if (!RB_EMPTY_NODE(&dso->rb_node))
1237 pr_err("DSO %s is still in rbtree when being deleted!\n", 1235 pr_err("DSO %s is still in rbtree when being deleted!\n",
1238 dso->long_name); 1236 dso->long_name);
@@ -1240,8 +1238,7 @@ void dso__delete(struct dso *dso)
1240 /* free inlines first, as they reference symbols */ 1238 /* free inlines first, as they reference symbols */
1241 inlines__tree_delete(&dso->inlined_nodes); 1239 inlines__tree_delete(&dso->inlined_nodes);
1242 srcline__tree_delete(&dso->srclines); 1240 srcline__tree_delete(&dso->srclines);
1243 for (i = 0; i < MAP__NR_TYPES; ++i) 1241 symbols__delete(&dso->symbols);
1244 symbols__delete(&dso->symbols[i]);
1245 1242
1246 if (dso->short_name_allocated) { 1243 if (dso->short_name_allocated) {
1247 zfree((char **)&dso->short_name); 1244 zfree((char **)&dso->short_name);
@@ -1451,9 +1448,7 @@ size_t __dsos__fprintf(struct list_head *head, FILE *fp)
1451 size_t ret = 0; 1448 size_t ret = 0;
1452 1449
1453 list_for_each_entry(pos, head, node) { 1450 list_for_each_entry(pos, head, node) {
1454 int i; 1451 ret += dso__fprintf(pos, fp);
1455 for (i = 0; i < MAP__NR_TYPES; ++i)
1456 ret += dso__fprintf(pos, i, fp);
1457 } 1452 }
1458 1453
1459 return ret; 1454 return ret;
@@ -1467,18 +1462,17 @@ size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
1467 return fprintf(fp, "%s", sbuild_id); 1462 return fprintf(fp, "%s", sbuild_id);
1468} 1463}
1469 1464
1470size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp) 1465size_t dso__fprintf(struct dso *dso, FILE *fp)
1471{ 1466{
1472 struct rb_node *nd; 1467 struct rb_node *nd;
1473 size_t ret = fprintf(fp, "dso: %s (", dso->short_name); 1468 size_t ret = fprintf(fp, "dso: %s (", dso->short_name);
1474 1469
1475 if (dso->short_name != dso->long_name) 1470 if (dso->short_name != dso->long_name)
1476 ret += fprintf(fp, "%s, ", dso->long_name); 1471 ret += fprintf(fp, "%s, ", dso->long_name);
1477 ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type], 1472 ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT ");
1478 dso__loaded(dso, type) ? "" : "NOT ");
1479 ret += dso__fprintf_buildid(dso, fp); 1473 ret += dso__fprintf_buildid(dso, fp);
1480 ret += fprintf(fp, ")\n"); 1474 ret += fprintf(fp, ")\n");
1481 for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) { 1475 for (nd = rb_first(&dso->symbols); nd; nd = rb_next(nd)) {
1482 struct symbol *pos = rb_entry(nd, struct symbol, rb_node); 1476 struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
1483 ret += symbol__fprintf(pos, fp); 1477 ret += symbol__fprintf(pos, fp);
1484 } 1478 }
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index c229dbe0277a..ef69de2e69ea 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -140,14 +140,14 @@ struct dso {
140 struct list_head node; 140 struct list_head node;
141 struct rb_node rb_node; /* rbtree node sorted by long name */ 141 struct rb_node rb_node; /* rbtree node sorted by long name */
142 struct rb_root *root; /* root of rbtree that rb_node is in */ 142 struct rb_root *root; /* root of rbtree that rb_node is in */
143 struct rb_root symbols[MAP__NR_TYPES]; 143 struct rb_root symbols;
144 struct rb_root symbol_names[MAP__NR_TYPES]; 144 struct rb_root symbol_names;
145 struct rb_root inlined_nodes; 145 struct rb_root inlined_nodes;
146 struct rb_root srclines; 146 struct rb_root srclines;
147 struct { 147 struct {
148 u64 addr; 148 u64 addr;
149 struct symbol *symbol; 149 struct symbol *symbol;
150 } last_find_result[MAP__NR_TYPES]; 150 } last_find_result;
151 void *a2l; 151 void *a2l;
152 char *symsrc_filename; 152 char *symsrc_filename;
153 unsigned int a2l_fails; 153 unsigned int a2l_fails;
@@ -164,8 +164,8 @@ struct dso {
164 u8 short_name_allocated:1; 164 u8 short_name_allocated:1;
165 u8 long_name_allocated:1; 165 u8 long_name_allocated:1;
166 u8 is_64_bit:1; 166 u8 is_64_bit:1;
167 u8 sorted_by_name; 167 bool sorted_by_name;
168 u8 loaded; 168 bool loaded;
169 u8 rel; 169 u8 rel;
170 u8 build_id[BUILD_ID_SIZE]; 170 u8 build_id[BUILD_ID_SIZE];
171 u64 text_offset; 171 u64 text_offset;
@@ -202,14 +202,13 @@ struct dso {
202 * @dso: the 'struct dso *' in which symbols itereated 202 * @dso: the 'struct dso *' in which symbols itereated
203 * @pos: the 'struct symbol *' to use as a loop cursor 203 * @pos: the 'struct symbol *' to use as a loop cursor
204 * @n: the 'struct rb_node *' to use as a temporary storage 204 * @n: the 'struct rb_node *' to use as a temporary storage
205 * @type: the 'enum map_type' type of symbols
206 */ 205 */
207#define dso__for_each_symbol(dso, pos, n, type) \ 206#define dso__for_each_symbol(dso, pos, n) \
208 symbols__for_each_entry(&(dso)->symbols[(type)], pos, n) 207 symbols__for_each_entry(&(dso)->symbols, pos, n)
209 208
210static inline void dso__set_loaded(struct dso *dso, enum map_type type) 209static inline void dso__set_loaded(struct dso *dso)
211{ 210{
212 dso->loaded |= (1 << type); 211 dso->loaded = true;
213} 212}
214 213
215struct dso *dso__new(const char *name); 214struct dso *dso__new(const char *name);
@@ -231,11 +230,16 @@ static inline void __dso__zput(struct dso **dso)
231 230
232#define dso__zput(dso) __dso__zput(&dso) 231#define dso__zput(dso) __dso__zput(&dso)
233 232
234bool dso__loaded(const struct dso *dso, enum map_type type); 233bool dso__loaded(const struct dso *dso);
235 234
236bool dso__sorted_by_name(const struct dso *dso, enum map_type type); 235static inline bool dso__has_symbols(const struct dso *dso)
237void dso__set_sorted_by_name(struct dso *dso, enum map_type type); 236{
238void dso__sort_by_name(struct dso *dso, enum map_type type); 237 return !RB_EMPTY_ROOT(&dso->symbols);
238}
239
240bool dso__sorted_by_name(const struct dso *dso);
241void dso__set_sorted_by_name(struct dso *dso);
242void dso__sort_by_name(struct dso *dso);
239 243
240void dso__set_build_id(struct dso *dso, void *build_id); 244void dso__set_build_id(struct dso *dso, void *build_id);
241bool dso__build_id_equal(const struct dso *dso, u8 *build_id); 245bool dso__build_id_equal(const struct dso *dso, u8 *build_id);
@@ -349,9 +353,8 @@ size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
349size_t __dsos__fprintf(struct list_head *head, FILE *fp); 353size_t __dsos__fprintf(struct list_head *head, FILE *fp);
350 354
351size_t dso__fprintf_buildid(struct dso *dso, FILE *fp); 355size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
352size_t dso__fprintf_symbols_by_name(struct dso *dso, 356size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp);
353 enum map_type type, FILE *fp); 357size_t dso__fprintf(struct dso *dso, FILE *fp);
354size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp);
355 358
356static inline bool dso__is_vmlinux(struct dso *dso) 359static inline bool dso__is_vmlinux(struct dso *dso)
357{ 360{
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 4c842762e3f2..59f38c7693f8 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -93,6 +93,37 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
93 return 0; 93 return 0;
94} 94}
95 95
96static int perf_env__read_arch(struct perf_env *env)
97{
98 struct utsname uts;
99
100 if (env->arch)
101 return 0;
102
103 if (!uname(&uts))
104 env->arch = strdup(uts.machine);
105
106 return env->arch ? 0 : -ENOMEM;
107}
108
109static int perf_env__read_nr_cpus_avail(struct perf_env *env)
110{
111 if (env->nr_cpus_avail == 0)
112 env->nr_cpus_avail = cpu__max_present_cpu();
113
114 return env->nr_cpus_avail ? 0 : -ENOENT;
115}
116
117const char *perf_env__raw_arch(struct perf_env *env)
118{
119 return env && !perf_env__read_arch(env) ? env->arch : "unknown";
120}
121
122int perf_env__nr_cpus_avail(struct perf_env *env)
123{
124 return env && !perf_env__read_nr_cpus_avail(env) ? env->nr_cpus_avail : 0;
125}
126
96void cpu_cache_level__free(struct cpu_cache_level *cache) 127void cpu_cache_level__free(struct cpu_cache_level *cache)
97{ 128{
98 free(cache->type); 129 free(cache->type);
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index c4ef2e523367..1f3ccc368530 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -76,4 +76,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env);
76void cpu_cache_level__free(struct cpu_cache_level *cache); 76void cpu_cache_level__free(struct cpu_cache_level *cache);
77 77
78const char *perf_env__arch(struct perf_env *env); 78const char *perf_env__arch(struct perf_env *env);
79const char *perf_env__raw_arch(struct perf_env *env);
80int perf_env__nr_cpus_avail(struct perf_env *env);
81
79#endif /* __PERF_ENV_H */ 82#endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 98ff3a6a3d50..0c8ecf0c78a4 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -88,10 +88,10 @@ static const char *perf_ns__name(unsigned int id)
88 return perf_ns__names[id]; 88 return perf_ns__names[id];
89} 89}
90 90
91static int perf_tool__process_synth_event(struct perf_tool *tool, 91int perf_tool__process_synth_event(struct perf_tool *tool,
92 union perf_event *event, 92 union perf_event *event,
93 struct machine *machine, 93 struct machine *machine,
94 perf_event__handler_t process) 94 perf_event__handler_t process)
95{ 95{
96 struct perf_sample synth_sample = { 96 struct perf_sample synth_sample = {
97 .pid = -1, 97 .pid = -1,
@@ -464,8 +464,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
464{ 464{
465 int rc = 0; 465 int rc = 0;
466 struct map *pos; 466 struct map *pos;
467 struct map_groups *kmaps = &machine->kmaps; 467 struct maps *maps = machine__kernel_maps(machine);
468 struct maps *maps = &kmaps->maps[MAP__FUNCTION];
469 union perf_event *event = zalloc((sizeof(event->mmap) + 468 union perf_event *event = zalloc((sizeof(event->mmap) +
470 machine->id_hdr_size)); 469 machine->id_hdr_size));
471 if (event == NULL) { 470 if (event == NULL) {
@@ -488,7 +487,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
488 for (pos = maps__first(maps); pos; pos = map__next(pos)) { 487 for (pos = maps__first(maps); pos; pos = map__next(pos)) {
489 size_t size; 488 size_t size;
490 489
491 if (__map__is_kernel(pos)) 490 if (!__map__is_kmodule(pos))
492 continue; 491 continue;
493 492
494 size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); 493 size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
@@ -869,7 +868,7 @@ static int find_symbol_cb(void *arg, const char *name, char type,
869 * Must be a function or at least an alias, as in PARISC64, where "_text" is 868 * Must be a function or at least an alias, as in PARISC64, where "_text" is
870 * an 'A' to the same address as "_stext". 869 * an 'A' to the same address as "_stext".
871 */ 870 */
872 if (!(symbol_type__is_a(type, MAP__FUNCTION) || 871 if (!(kallsyms__is_function(type) ||
873 type == 'A') || strcmp(name, args->name)) 872 type == 'A') || strcmp(name, args->name))
874 return 0; 873 return 0;
875 874
@@ -889,9 +888,16 @@ int kallsyms__get_function_start(const char *kallsyms_filename,
889 return 0; 888 return 0;
890} 889}
891 890
892int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, 891int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
893 perf_event__handler_t process, 892 perf_event__handler_t process __maybe_unused,
894 struct machine *machine) 893 struct machine *machine __maybe_unused)
894{
895 return 0;
896}
897
898static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
899 perf_event__handler_t process,
900 struct machine *machine)
895{ 901{
896 size_t size; 902 size_t size;
897 struct map *map = machine__kernel_map(machine); 903 struct map *map = machine__kernel_map(machine);
@@ -944,6 +950,19 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
944 return err; 950 return err;
945} 951}
946 952
953int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
954 perf_event__handler_t process,
955 struct machine *machine)
956{
957 int err;
958
959 err = __perf_event__synthesize_kernel_mmap(tool, process, machine);
960 if (err < 0)
961 return err;
962
963 return perf_event__synthesize_extra_kmaps(tool, process, machine);
964}
965
947int perf_event__synthesize_thread_map2(struct perf_tool *tool, 966int perf_event__synthesize_thread_map2(struct perf_tool *tool,
948 struct thread_map *threads, 967 struct thread_map *threads,
949 perf_event__handler_t process, 968 perf_event__handler_t process,
@@ -1489,9 +1508,8 @@ int perf_event__process(struct perf_tool *tool __maybe_unused,
1489 return machine__process_event(machine, event, sample); 1508 return machine__process_event(machine, event, sample);
1490} 1509}
1491 1510
1492void thread__find_addr_map(struct thread *thread, u8 cpumode, 1511struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
1493 enum map_type type, u64 addr, 1512 struct addr_location *al)
1494 struct addr_location *al)
1495{ 1513{
1496 struct map_groups *mg = thread->mg; 1514 struct map_groups *mg = thread->mg;
1497 struct machine *machine = mg->machine; 1515 struct machine *machine = mg->machine;
@@ -1505,7 +1523,7 @@ void thread__find_addr_map(struct thread *thread, u8 cpumode,
1505 1523
1506 if (machine == NULL) { 1524 if (machine == NULL) {
1507 al->map = NULL; 1525 al->map = NULL;
1508 return; 1526 return NULL;
1509 } 1527 }
1510 1528
1511 if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { 1529 if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
@@ -1533,10 +1551,10 @@ void thread__find_addr_map(struct thread *thread, u8 cpumode,
1533 !perf_host) 1551 !perf_host)
1534 al->filtered |= (1 << HIST_FILTER__HOST); 1552 al->filtered |= (1 << HIST_FILTER__HOST);
1535 1553
1536 return; 1554 return NULL;
1537 } 1555 }
1538try_again: 1556try_again:
1539 al->map = map_groups__find(mg, type, al->addr); 1557 al->map = map_groups__find(mg, al->addr);
1540 if (al->map == NULL) { 1558 if (al->map == NULL) {
1541 /* 1559 /*
1542 * If this is outside of all known maps, and is a negative 1560 * If this is outside of all known maps, and is a negative
@@ -1563,17 +1581,17 @@ try_again:
1563 map__load(al->map); 1581 map__load(al->map);
1564 al->addr = al->map->map_ip(al->map, al->addr); 1582 al->addr = al->map->map_ip(al->map, al->addr);
1565 } 1583 }
1584
1585 return al->map;
1566} 1586}
1567 1587
1568void thread__find_addr_location(struct thread *thread, 1588struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
1569 u8 cpumode, enum map_type type, u64 addr, 1589 u64 addr, struct addr_location *al)
1570 struct addr_location *al)
1571{ 1590{
1572 thread__find_addr_map(thread, cpumode, type, addr, al); 1591 al->sym = NULL;
1573 if (al->map != NULL) 1592 if (thread__find_map(thread, cpumode, addr, al))
1574 al->sym = map__find_symbol(al->map, al->addr); 1593 al->sym = map__find_symbol(al->map, al->addr);
1575 else 1594 return al->sym;
1576 al->sym = NULL;
1577} 1595}
1578 1596
1579/* 1597/*
@@ -1590,7 +1608,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
1590 return -1; 1608 return -1;
1591 1609
1592 dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); 1610 dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
1593 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al); 1611 thread__find_map(thread, sample->cpumode, sample->ip, al);
1594 dump_printf(" ...... dso: %s\n", 1612 dump_printf(" ...... dso: %s\n",
1595 al->map ? al->map->dso->long_name : 1613 al->map ? al->map->dso->long_name :
1596 al->level == 'H' ? "[hypervisor]" : "<not found>"); 1614 al->level == 'H' ? "[hypervisor]" : "<not found>");
@@ -1669,10 +1687,7 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr)
1669void thread__resolve(struct thread *thread, struct addr_location *al, 1687void thread__resolve(struct thread *thread, struct addr_location *al,
1670 struct perf_sample *sample) 1688 struct perf_sample *sample)
1671{ 1689{
1672 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->addr, al); 1690 thread__find_map(thread, sample->cpumode, sample->addr, al);
1673 if (!al->map)
1674 thread__find_addr_map(thread, sample->cpumode, MAP__VARIABLE,
1675 sample->addr, al);
1676 1691
1677 al->cpu = sample->cpu; 1692 al->cpu = sample->cpu;
1678 al->sym = NULL; 1693 al->sym = NULL;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 0f794744919c..bfa60bcafbde 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -750,6 +750,10 @@ int perf_event__process_exit(struct perf_tool *tool,
750 union perf_event *event, 750 union perf_event *event,
751 struct perf_sample *sample, 751 struct perf_sample *sample,
752 struct machine *machine); 752 struct machine *machine);
753int perf_tool__process_synth_event(struct perf_tool *tool,
754 union perf_event *event,
755 struct machine *machine,
756 perf_event__handler_t process);
753int perf_event__process(struct perf_tool *tool, 757int perf_event__process(struct perf_tool *tool,
754 union perf_event *event, 758 union perf_event *event,
755 struct perf_sample *sample, 759 struct perf_sample *sample,
@@ -796,6 +800,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
796 bool mmap_data, 800 bool mmap_data,
797 unsigned int proc_map_timeout); 801 unsigned int proc_map_timeout);
798 802
803int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
804 perf_event__handler_t process,
805 struct machine *machine);
806
799size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp); 807size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
800size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp); 808size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
801size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); 809size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index a59281d64368..e7a4b31a84fb 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1795,3 +1795,18 @@ bool perf_evlist__exclude_kernel(struct perf_evlist *evlist)
1795 1795
1796 return true; 1796 return true;
1797} 1797}
1798
1799/*
1800 * Events in data file are not collect in groups, but we still want
1801 * the group display. Set the artificial group and set the leader's
1802 * forced_leader flag to notify the display code.
1803 */
1804void perf_evlist__force_leader(struct perf_evlist *evlist)
1805{
1806 if (!evlist->nr_groups) {
1807 struct perf_evsel *leader = perf_evlist__first(evlist);
1808
1809 perf_evlist__set_leader(evlist);
1810 leader->forced_leader = true;
1811 }
1812}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 6c41b2f78713..dc66436add98 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -309,4 +309,7 @@ struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
309 union perf_event *event); 309 union perf_event *event);
310 310
311bool perf_evlist__exclude_kernel(struct perf_evlist *evlist); 311bool perf_evlist__exclude_kernel(struct perf_evlist *evlist);
312
313void perf_evlist__force_leader(struct perf_evlist *evlist);
314
312#endif /* __PERF_EVLIST_H */ 315#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4cd2cf93f726..94fce4f537e9 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2197,7 +2197,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
2197 } 2197 }
2198 } 2198 }
2199 2199
2200 if (type & PERF_SAMPLE_CALLCHAIN) { 2200 if (evsel__has_callchain(evsel)) {
2201 const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); 2201 const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
2202 2202
2203 OVERFLOW_CHECK_u64(array); 2203 OVERFLOW_CHECK_u64(array);
@@ -2857,12 +2857,12 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
2857 "Hint: Try again after reducing the number of events.\n" 2857 "Hint: Try again after reducing the number of events.\n"
2858 "Hint: Try increasing the limit with 'ulimit -n <limit>'"); 2858 "Hint: Try increasing the limit with 'ulimit -n <limit>'");
2859 case ENOMEM: 2859 case ENOMEM:
2860 if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 && 2860 if (evsel__has_callchain(evsel) &&
2861 access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0) 2861 access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
2862 return scnprintf(msg, size, 2862 return scnprintf(msg, size,
2863 "Not enough memory to setup event with callchain.\n" 2863 "Not enough memory to setup event with callchain.\n"
2864 "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n" 2864 "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
2865 "Hint: Current value: %d", sysctl_perf_event_max_stack); 2865 "Hint: Current value: %d", sysctl__max_stack());
2866 break; 2866 break;
2867 case ENODEV: 2867 case ENODEV:
2868 if (target->cpu_list) 2868 if (target->cpu_list)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 92ec009a292d..d277930b19a1 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -127,6 +127,7 @@ struct perf_evsel {
127 bool precise_max; 127 bool precise_max;
128 bool ignore_missing_thread; 128 bool ignore_missing_thread;
129 bool forced_leader; 129 bool forced_leader;
130 bool use_uncore_alias;
130 /* parse modifier helper */ 131 /* parse modifier helper */
131 int exclude_GH; 132 int exclude_GH;
132 int nr_members; 133 int nr_members;
@@ -458,6 +459,11 @@ static inline bool perf_evsel__has_branch_callstack(const struct perf_evsel *evs
458 return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; 459 return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
459} 460}
460 461
462static inline bool evsel__has_callchain(const struct perf_evsel *evsel)
463{
464 return (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0;
465}
466
461typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); 467typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
462 468
463int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, 469int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index c540d47583e7..aafbe54fd3fa 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -114,7 +114,7 @@ gen_build_id(struct buildid_note *note,
114 114
115 fd = open("/dev/urandom", O_RDONLY); 115 fd = open("/dev/urandom", O_RDONLY);
116 if (fd == -1) 116 if (fd == -1)
117 err(1, "cannot access /dev/urandom for builid"); 117 err(1, "cannot access /dev/urandom for buildid");
118 118
119 sret = read(fd, note->build_id, sz); 119 sret = read(fd, note->build_id, sz);
120 120
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index a8bff2178fbc..540cd2dcd3e7 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1459,8 +1459,24 @@ static void print_cmdline(struct feat_fd *ff, FILE *fp)
1459 1459
1460 fprintf(fp, "# cmdline : "); 1460 fprintf(fp, "# cmdline : ");
1461 1461
1462 for (i = 0; i < nr; i++) 1462 for (i = 0; i < nr; i++) {
1463 fprintf(fp, "%s ", ff->ph->env.cmdline_argv[i]); 1463 char *argv_i = strdup(ff->ph->env.cmdline_argv[i]);
1464 if (!argv_i) {
1465 fprintf(fp, "%s ", ff->ph->env.cmdline_argv[i]);
1466 } else {
1467 char *mem = argv_i;
1468 do {
1469 char *quote = strchr(argv_i, '\'');
1470 if (!quote)
1471 break;
1472 *quote++ = '\0';
1473 fprintf(fp, "%s\\\'", argv_i);
1474 argv_i = quote;
1475 } while (1);
1476 fprintf(fp, "%s ", argv_i);
1477 free(mem);
1478 }
1479 }
1464 fputc('\n', fp); 1480 fputc('\n', fp);
1465} 1481}
1466 1482
@@ -3312,8 +3328,6 @@ int perf_session__read_header(struct perf_session *session)
3312 lseek(fd, tmp, SEEK_SET); 3328 lseek(fd, tmp, SEEK_SET);
3313 } 3329 }
3314 3330
3315 symbol_conf.nr_events = nr_attrs;
3316
3317 perf_header__process_sections(header, fd, &session->tevent, 3331 perf_header__process_sections(header, fd, &session->tevent,
3318 perf_file_section__process); 3332 perf_file_section__process);
3319 3333
@@ -3739,8 +3753,6 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
3739 perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]); 3753 perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]);
3740 } 3754 }
3741 3755
3742 symbol_conf.nr_events = evlist->nr_entries;
3743
3744 return 0; 3756 return 0;
3745} 3757}
3746 3758
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 4d602fba40b2..828cb9794c76 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -370,9 +370,11 @@ void hists__delete_entries(struct hists *hists)
370 370
371static int hist_entry__init(struct hist_entry *he, 371static int hist_entry__init(struct hist_entry *he,
372 struct hist_entry *template, 372 struct hist_entry *template,
373 bool sample_self) 373 bool sample_self,
374 size_t callchain_size)
374{ 375{
375 *he = *template; 376 *he = *template;
377 he->callchain_size = callchain_size;
376 378
377 if (symbol_conf.cumulate_callchain) { 379 if (symbol_conf.cumulate_callchain) {
378 he->stat_acc = malloc(sizeof(he->stat)); 380 he->stat_acc = malloc(sizeof(he->stat));
@@ -410,7 +412,7 @@ static int hist_entry__init(struct hist_entry *he,
410 map__get(he->mem_info->daddr.map); 412 map__get(he->mem_info->daddr.map);
411 } 413 }
412 414
413 if (symbol_conf.use_callchain) 415 if (hist_entry__has_callchains(he) && symbol_conf.use_callchain)
414 callchain_init(he->callchain); 416 callchain_init(he->callchain);
415 417
416 if (he->raw_data) { 418 if (he->raw_data) {
@@ -473,7 +475,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
473 475
474 he = ops->new(callchain_size); 476 he = ops->new(callchain_size);
475 if (he) { 477 if (he) {
476 err = hist_entry__init(he, template, sample_self); 478 err = hist_entry__init(he, template, sample_self, callchain_size);
477 if (err) { 479 if (err) {
478 ops->free(he); 480 ops->free(he);
479 he = NULL; 481 he = NULL;
@@ -492,7 +494,7 @@ static u8 symbol__parent_filter(const struct symbol *parent)
492 494
493static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period) 495static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
494{ 496{
495 if (!symbol_conf.use_callchain) 497 if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain)
496 return; 498 return;
497 499
498 he->hists->callchain_period += period; 500 he->hists->callchain_period += period;
@@ -619,9 +621,11 @@ __hists__add_entry(struct hists *hists,
619 .raw_data = sample->raw_data, 621 .raw_data = sample->raw_data,
620 .raw_size = sample->raw_size, 622 .raw_size = sample->raw_size,
621 .ops = ops, 623 .ops = ops,
622 }; 624 }, *he = hists__findnew_entry(hists, &entry, al, sample_self);
623 625
624 return hists__findnew_entry(hists, &entry, al, sample_self); 626 if (!hists->has_callchains && he && he->callchain_size != 0)
627 hists->has_callchains = true;
628 return he;
625} 629}
626 630
627struct hist_entry *hists__add_entry(struct hists *hists, 631struct hist_entry *hists__add_entry(struct hists *hists,
@@ -986,7 +990,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
986 iter->he = he; 990 iter->he = he;
987 he_cache[iter->curr++] = he; 991 he_cache[iter->curr++] = he;
988 992
989 if (symbol_conf.use_callchain) 993 if (hist_entry__has_callchains(he) && symbol_conf.use_callchain)
990 callchain_append(he->callchain, &cursor, sample->period); 994 callchain_append(he->callchain, &cursor, sample->period);
991 return 0; 995 return 0;
992} 996}
@@ -1039,7 +1043,7 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
1039 int err, err2; 1043 int err, err2;
1040 struct map *alm = NULL; 1044 struct map *alm = NULL;
1041 1045
1042 if (al && al->map) 1046 if (al)
1043 alm = map__get(al->map); 1047 alm = map__get(al->map);
1044 1048
1045 err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, 1049 err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent,
@@ -1373,7 +1377,8 @@ static int hists__hierarchy_insert_entry(struct hists *hists,
1373 if (new_he) { 1377 if (new_he) {
1374 new_he->leaf = true; 1378 new_he->leaf = true;
1375 1379
1376 if (symbol_conf.use_callchain) { 1380 if (hist_entry__has_callchains(new_he) &&
1381 symbol_conf.use_callchain) {
1377 callchain_cursor_reset(&callchain_cursor); 1382 callchain_cursor_reset(&callchain_cursor);
1378 if (callchain_merge(&callchain_cursor, 1383 if (callchain_merge(&callchain_cursor,
1379 new_he->callchain, 1384 new_he->callchain,
@@ -1414,7 +1419,7 @@ static int hists__collapse_insert_entry(struct hists *hists,
1414 if (symbol_conf.cumulate_callchain) 1419 if (symbol_conf.cumulate_callchain)
1415 he_stat__add_stat(iter->stat_acc, he->stat_acc); 1420 he_stat__add_stat(iter->stat_acc, he->stat_acc);
1416 1421
1417 if (symbol_conf.use_callchain) { 1422 if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
1418 callchain_cursor_reset(&callchain_cursor); 1423 callchain_cursor_reset(&callchain_cursor);
1419 if (callchain_merge(&callchain_cursor, 1424 if (callchain_merge(&callchain_cursor,
1420 iter->callchain, 1425 iter->callchain,
@@ -1757,7 +1762,7 @@ void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *pro
1757 bool use_callchain; 1762 bool use_callchain;
1758 1763
1759 if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) 1764 if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
1760 use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; 1765 use_callchain = evsel__has_callchain(evsel);
1761 else 1766 else
1762 use_callchain = symbol_conf.use_callchain; 1767 use_callchain = symbol_conf.use_callchain;
1763 1768
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index fbabfd8a215d..73049f7f0f60 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -85,6 +85,7 @@ struct hists {
85 struct events_stats stats; 85 struct events_stats stats;
86 u64 event_stream; 86 u64 event_stream;
87 u16 col_len[HISTC_NR_COLS]; 87 u16 col_len[HISTC_NR_COLS];
88 bool has_callchains;
88 int socket_filter; 89 int socket_filter;
89 struct perf_hpp_list *hpp_list; 90 struct perf_hpp_list *hpp_list;
90 struct list_head hpp_formats; 91 struct list_head hpp_formats;
@@ -220,6 +221,11 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel)
220 return &hevsel->hists; 221 return &hevsel->hists;
221} 222}
222 223
224static __pure inline bool hists__has_callchains(struct hists *hists)
225{
226 return hists->has_callchains;
227}
228
223int hists__init(void); 229int hists__init(void);
224int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); 230int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
225 231
@@ -419,19 +425,24 @@ struct hist_browser_timer {
419 int refresh; 425 int refresh;
420}; 426};
421 427
428struct annotation_options;
429
422#ifdef HAVE_SLANG_SUPPORT 430#ifdef HAVE_SLANG_SUPPORT
423#include "../ui/keysyms.h" 431#include "../ui/keysyms.h"
424int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, 432int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
425 struct hist_browser_timer *hbt); 433 struct hist_browser_timer *hbt,
434 struct annotation_options *annotation_opts);
426 435
427int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, 436int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
428 struct hist_browser_timer *hbt); 437 struct hist_browser_timer *hbt,
438 struct annotation_options *annotation_opts);
429 439
430int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, 440int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
431 struct hist_browser_timer *hbt, 441 struct hist_browser_timer *hbt,
432 float min_pcnt, 442 float min_pcnt,
433 struct perf_env *env, 443 struct perf_env *env,
434 bool warn_lost_event); 444 bool warn_lost_event,
445 struct annotation_options *annotation_options);
435int script_browse(const char *script_opt); 446int script_browse(const char *script_opt);
436#else 447#else
437static inline 448static inline
@@ -440,20 +451,23 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
440 struct hist_browser_timer *hbt __maybe_unused, 451 struct hist_browser_timer *hbt __maybe_unused,
441 float min_pcnt __maybe_unused, 452 float min_pcnt __maybe_unused,
442 struct perf_env *env __maybe_unused, 453 struct perf_env *env __maybe_unused,
443 bool warn_lost_event __maybe_unused) 454 bool warn_lost_event __maybe_unused,
455 struct annotation_options *annotation_options __maybe_unused)
444{ 456{
445 return 0; 457 return 0;
446} 458}
447static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, 459static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
448 struct perf_evsel *evsel __maybe_unused, 460 struct perf_evsel *evsel __maybe_unused,
449 struct hist_browser_timer *hbt __maybe_unused) 461 struct hist_browser_timer *hbt __maybe_unused,
462 struct annotation_options *annotation_options __maybe_unused)
450{ 463{
451 return 0; 464 return 0;
452} 465}
453 466
454static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, 467static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused,
455 struct perf_evsel *evsel __maybe_unused, 468 struct perf_evsel *evsel __maybe_unused,
456 struct hist_browser_timer *hbt __maybe_unused) 469 struct hist_browser_timer *hbt __maybe_unused,
470 struct annotation_options *annotation_opts __maybe_unused)
457{ 471{
458 return 0; 472 return 0;
459} 473}
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 72db2744876d..7f0c83b6332b 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -335,8 +335,7 @@ static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
335 if (!thread) 335 if (!thread)
336 return -1; 336 return -1;
337 337
338 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al); 338 if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
339 if (!al.map || !al.map->dso)
340 goto out_put; 339 goto out_put;
341 340
342 len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, 341 len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf,
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h
index e23578c7b1be..2669c9f748e4 100644
--- a/tools/perf/util/intel-pt-decoder/insn.h
+++ b/tools/perf/util/intel-pt-decoder/insn.h
@@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn)
208 return insn_offset_displacement(insn) + insn->displacement.nbytes; 208 return insn_offset_displacement(insn) + insn->displacement.nbytes;
209} 209}
210 210
211#define POP_SS_OPCODE 0x1f
212#define MOV_SREG_OPCODE 0x8e
213
214/*
215 * Intel SDM Vol.3A 6.8.3 states;
216 * "Any single-step trap that would be delivered following the MOV to SS
217 * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
218 * suppressed."
219 * This function returns true if @insn is MOV SS or POP SS. On these
220 * instructions, single stepping is suppressed.
221 */
222static inline int insn_masking_exception(struct insn *insn)
223{
224 return insn->opcode.bytes[0] == POP_SS_OPCODE ||
225 (insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
226 X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
227}
228
211#endif /* _ASM_X86_INSN_H */ 229#endif /* _ASM_X86_INSN_H */
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index f9157aed1289..d404bed7003a 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -113,6 +113,7 @@ struct intel_pt_decoder {
113 bool have_cyc; 113 bool have_cyc;
114 bool fixup_last_mtc; 114 bool fixup_last_mtc;
115 bool have_last_ip; 115 bool have_last_ip;
116 enum intel_pt_param_flags flags;
116 uint64_t pos; 117 uint64_t pos;
117 uint64_t last_ip; 118 uint64_t last_ip;
118 uint64_t ip; 119 uint64_t ip;
@@ -226,6 +227,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
226 decoder->return_compression = params->return_compression; 227 decoder->return_compression = params->return_compression;
227 decoder->branch_enable = params->branch_enable; 228 decoder->branch_enable = params->branch_enable;
228 229
230 decoder->flags = params->flags;
231
229 decoder->period = params->period; 232 decoder->period = params->period;
230 decoder->period_type = params->period_type; 233 decoder->period_type = params->period_type;
231 234
@@ -1097,6 +1100,15 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
1097 return ret; 1100 return ret;
1098} 1101}
1099 1102
1103static inline bool intel_pt_fup_with_nlip(struct intel_pt_decoder *decoder,
1104 struct intel_pt_insn *intel_pt_insn,
1105 uint64_t ip, int err)
1106{
1107 return decoder->flags & INTEL_PT_FUP_WITH_NLIP && !err &&
1108 intel_pt_insn->branch == INTEL_PT_BR_INDIRECT &&
1109 ip == decoder->ip + intel_pt_insn->length;
1110}
1111
1100static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) 1112static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
1101{ 1113{
1102 struct intel_pt_insn intel_pt_insn; 1114 struct intel_pt_insn intel_pt_insn;
@@ -1109,10 +1121,11 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
1109 err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip); 1121 err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
1110 if (err == INTEL_PT_RETURN) 1122 if (err == INTEL_PT_RETURN)
1111 return 0; 1123 return 0;
1112 if (err == -EAGAIN) { 1124 if (err == -EAGAIN ||
1125 intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
1113 if (intel_pt_fup_event(decoder)) 1126 if (intel_pt_fup_event(decoder))
1114 return 0; 1127 return 0;
1115 return err; 1128 return -EAGAIN;
1116 } 1129 }
1117 decoder->set_fup_tx_flags = false; 1130 decoder->set_fup_tx_flags = false;
1118 if (err) 1131 if (err)
@@ -1376,7 +1389,6 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
1376{ 1389{
1377 intel_pt_log("ERROR: Buffer overflow\n"); 1390 intel_pt_log("ERROR: Buffer overflow\n");
1378 intel_pt_clear_tx_flags(decoder); 1391 intel_pt_clear_tx_flags(decoder);
1379 decoder->have_tma = false;
1380 decoder->cbr = 0; 1392 decoder->cbr = 0;
1381 decoder->timestamp_insn_cnt = 0; 1393 decoder->timestamp_insn_cnt = 0;
1382 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; 1394 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
@@ -1604,7 +1616,6 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
1604 case INTEL_PT_PSB: 1616 case INTEL_PT_PSB:
1605 case INTEL_PT_TSC: 1617 case INTEL_PT_TSC:
1606 case INTEL_PT_TMA: 1618 case INTEL_PT_TMA:
1607 case INTEL_PT_CBR:
1608 case INTEL_PT_MODE_TSX: 1619 case INTEL_PT_MODE_TSX:
1609 case INTEL_PT_BAD: 1620 case INTEL_PT_BAD:
1610 case INTEL_PT_PSBEND: 1621 case INTEL_PT_PSBEND:
@@ -1620,6 +1631,10 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
1620 decoder->pkt_step = 0; 1631 decoder->pkt_step = 0;
1621 return -ENOENT; 1632 return -ENOENT;
1622 1633
1634 case INTEL_PT_CBR:
1635 intel_pt_calc_cbr(decoder);
1636 break;
1637
1623 case INTEL_PT_OVF: 1638 case INTEL_PT_OVF:
1624 return intel_pt_overflow(decoder); 1639 return intel_pt_overflow(decoder);
1625 1640
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index fc1752d50019..51c18d67f4ca 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -60,6 +60,14 @@ enum {
60 INTEL_PT_ERR_MAX, 60 INTEL_PT_ERR_MAX,
61}; 61};
62 62
63enum intel_pt_param_flags {
64 /*
65 * FUP packet can contain next linear instruction pointer instead of
66 * current linear instruction pointer.
67 */
68 INTEL_PT_FUP_WITH_NLIP = 1 << 0,
69};
70
63struct intel_pt_state { 71struct intel_pt_state {
64 enum intel_pt_sample_type type; 72 enum intel_pt_sample_type type;
65 int err; 73 int err;
@@ -106,6 +114,7 @@ struct intel_pt_params {
106 unsigned int mtc_period; 114 unsigned int mtc_period;
107 uint32_t tsc_ctc_ratio_n; 115 uint32_t tsc_ctc_ratio_n;
108 uint32_t tsc_ctc_ratio_d; 116 uint32_t tsc_ctc_ratio_d;
117 enum intel_pt_param_flags flags;
109}; 118};
110 119
111struct intel_pt_decoder; 120struct intel_pt_decoder;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 0effaff57020..aec68908d604 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -442,8 +442,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
442 } 442 }
443 443
444 while (1) { 444 while (1) {
445 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al); 445 if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso)
446 if (!al.map || !al.map->dso)
447 return -EINVAL; 446 return -EINVAL;
448 447
449 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && 448 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
@@ -596,8 +595,7 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data)
596 if (!thread) 595 if (!thread)
597 return -EINVAL; 596 return -EINVAL;
598 597
599 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al); 598 if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
600 if (!al.map || !al.map->dso)
601 return -EINVAL; 599 return -EINVAL;
602 600
603 offset = al.map->map_ip(al.map, ip); 601 offset = al.map->map_ip(al.map, ip);
@@ -751,6 +749,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
751 unsigned int queue_nr) 749 unsigned int queue_nr)
752{ 750{
753 struct intel_pt_params params = { .get_trace = 0, }; 751 struct intel_pt_params params = { .get_trace = 0, };
752 struct perf_env *env = pt->machine->env;
754 struct intel_pt_queue *ptq; 753 struct intel_pt_queue *ptq;
755 754
756 ptq = zalloc(sizeof(struct intel_pt_queue)); 755 ptq = zalloc(sizeof(struct intel_pt_queue));
@@ -832,6 +831,9 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
832 } 831 }
833 } 832 }
834 833
834 if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18))
835 params.flags |= INTEL_PT_FUP_WITH_NLIP;
836
835 ptq->decoder = intel_pt_decoder_new(&params); 837 ptq->decoder = intel_pt_decoder_new(&params);
836 if (!ptq->decoder) 838 if (!ptq->decoder)
837 goto out_free; 839 goto out_free;
@@ -1523,6 +1525,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
1523 1525
1524 if (intel_pt_is_switch_ip(ptq, state->to_ip)) { 1526 if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1525 switch (ptq->switch_state) { 1527 switch (ptq->switch_state) {
1528 case INTEL_PT_SS_NOT_TRACING:
1526 case INTEL_PT_SS_UNKNOWN: 1529 case INTEL_PT_SS_UNKNOWN:
1527 case INTEL_PT_SS_EXPECTING_SWITCH_IP: 1530 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1528 err = intel_pt_next_tid(pt, ptq); 1531 err = intel_pt_next_tid(pt, ptq);
@@ -1565,7 +1568,7 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
1565 if (map__load(map)) 1568 if (map__load(map))
1566 return 0; 1569 return 0;
1567 1570
1568 start = dso__first_symbol(map->dso, MAP__FUNCTION); 1571 start = dso__first_symbol(map->dso);
1569 1572
1570 for (sym = start; sym; sym = dso__next_symbol(sym)) { 1573 for (sym = start; sym; sym = dso__next_symbol(sym)) {
1571 if (sym->binding == STB_GLOBAL && 1574 if (sym->binding == STB_GLOBAL &&
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 1cca0a2fa641..976e658e38dc 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -14,11 +14,12 @@
14#include "config.h" 14#include "config.h"
15#include "util.h" 15#include "util.h"
16#include <sys/wait.h> 16#include <sys/wait.h>
17#include <subcmd/exec-cmd.h>
17 18
18#define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ 19#define CLANG_BPF_CMD_DEFAULT_TEMPLATE \
19 "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ 20 "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
20 "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ 21 "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \
21 "$CLANG_OPTIONS $KERNEL_INC_OPTIONS " \ 22 "$CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS " \
22 "-Wno-unused-value -Wno-pointer-sign " \ 23 "-Wno-unused-value -Wno-pointer-sign " \
23 "-working-directory $WORKING_DIR " \ 24 "-working-directory $WORKING_DIR " \
24 "-c \"$CLANG_SOURCE\" -target bpf -O2 -o -" 25 "-c \"$CLANG_SOURCE\" -target bpf -O2 -o -"
@@ -212,7 +213,7 @@ version_notice(void)
212" \t\thttp://llvm.org/apt\n\n" 213" \t\thttp://llvm.org/apt\n\n"
213" \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n" 214" \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n"
214" \toption in [llvm] section of ~/.perfconfig to:\n\n" 215" \toption in [llvm] section of ~/.perfconfig to:\n\n"
215" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \\\n" 216" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n"
216" \t -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n" 217" \t -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n"
217" \t -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n" 218" \t -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n"
218" \t(Replace /path/to/llc with path to your llc)\n\n" 219" \t(Replace /path/to/llc with path to your llc)\n\n"
@@ -431,9 +432,11 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
431 const char *clang_opt = llvm_param.clang_opt; 432 const char *clang_opt = llvm_param.clang_opt;
432 char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64]; 433 char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64];
433 char serr[STRERR_BUFSIZE]; 434 char serr[STRERR_BUFSIZE];
434 char *kbuild_dir = NULL, *kbuild_include_opts = NULL; 435 char *kbuild_dir = NULL, *kbuild_include_opts = NULL,
436 *perf_bpf_include_opts = NULL;
435 const char *template = llvm_param.clang_bpf_cmd_template; 437 const char *template = llvm_param.clang_bpf_cmd_template;
436 char *command_echo, *command_out; 438 char *command_echo = NULL, *command_out;
439 char *perf_include_dir = system_path(PERF_INCLUDE_DIR);
437 440
438 if (path[0] != '-' && realpath(path, abspath) == NULL) { 441 if (path[0] != '-' && realpath(path, abspath) == NULL) {
439 err = errno; 442 err = errno;
@@ -471,12 +474,14 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
471 474
472 snprintf(linux_version_code_str, sizeof(linux_version_code_str), 475 snprintf(linux_version_code_str, sizeof(linux_version_code_str),
473 "0x%x", kernel_version); 476 "0x%x", kernel_version);
474 477 if (asprintf(&perf_bpf_include_opts, "-I%s/bpf", perf_include_dir) < 0)
478 goto errout;
475 force_set_env("NR_CPUS", nr_cpus_avail_str); 479 force_set_env("NR_CPUS", nr_cpus_avail_str);
476 force_set_env("LINUX_VERSION_CODE", linux_version_code_str); 480 force_set_env("LINUX_VERSION_CODE", linux_version_code_str);
477 force_set_env("CLANG_EXEC", clang_path); 481 force_set_env("CLANG_EXEC", clang_path);
478 force_set_env("CLANG_OPTIONS", clang_opt); 482 force_set_env("CLANG_OPTIONS", clang_opt);
479 force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts); 483 force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts);
484 force_set_env("PERF_BPF_INC_OPTIONS", perf_bpf_include_opts);
480 force_set_env("WORKING_DIR", kbuild_dir ? : "."); 485 force_set_env("WORKING_DIR", kbuild_dir ? : ".");
481 486
482 /* 487 /*
@@ -512,6 +517,8 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
512 free(command_out); 517 free(command_out);
513 free(kbuild_dir); 518 free(kbuild_dir);
514 free(kbuild_include_opts); 519 free(kbuild_include_opts);
520 free(perf_bpf_include_opts);
521 free(perf_include_dir);
515 522
516 if (!p_obj_buf) 523 if (!p_obj_buf)
517 free(obj_buf); 524 free(obj_buf);
@@ -526,6 +533,8 @@ errout:
526 free(kbuild_dir); 533 free(kbuild_dir);
527 free(kbuild_include_opts); 534 free(kbuild_include_opts);
528 free(obj_buf); 535 free(obj_buf);
536 free(perf_bpf_include_opts);
537 free(perf_include_dir);
529 if (p_obj_buf) 538 if (p_obj_buf)
530 *p_obj_buf = NULL; 539 *p_obj_buf = NULL;
531 if (p_obj_buf_sz) 540 if (p_obj_buf_sz)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 32d50492505d..e7b4a8b513f2 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -24,6 +24,7 @@
24 24
25#include "sane_ctype.h" 25#include "sane_ctype.h"
26#include <symbol/kallsyms.h> 26#include <symbol/kallsyms.h>
27#include <linux/mman.h>
27 28
28static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock); 29static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
29 30
@@ -81,8 +82,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
81 machine->kptr_restrict_warned = false; 82 machine->kptr_restrict_warned = false;
82 machine->comm_exec = false; 83 machine->comm_exec = false;
83 machine->kernel_start = 0; 84 machine->kernel_start = 0;
84 85 machine->vmlinux_map = NULL;
85 memset(machine->vmlinux_maps, 0, sizeof(machine->vmlinux_maps));
86 86
87 machine->root_dir = strdup(root_dir); 87 machine->root_dir = strdup(root_dir);
88 if (machine->root_dir == NULL) 88 if (machine->root_dir == NULL)
@@ -137,13 +137,11 @@ struct machine *machine__new_kallsyms(void)
137 struct machine *machine = machine__new_host(); 137 struct machine *machine = machine__new_host();
138 /* 138 /*
139 * FIXME: 139 * FIXME:
140 * 1) MAP__FUNCTION will go away when we stop loading separate maps for 140 * 1) We should switch to machine__load_kallsyms(), i.e. not explicitely
141 * functions and data objects.
142 * 2) We should switch to machine__load_kallsyms(), i.e. not explicitely
143 * ask for not using the kcore parsing code, once this one is fixed 141 * ask for not using the kcore parsing code, once this one is fixed
144 * to create a map per module. 142 * to create a map per module.
145 */ 143 */
146 if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) { 144 if (machine && machine__load_kallsyms(machine, "/proc/kallsyms") <= 0) {
147 machine__delete(machine); 145 machine__delete(machine);
148 machine = NULL; 146 machine = NULL;
149 } 147 }
@@ -673,8 +671,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
673 if (kmod_path__parse_name(&m, filename)) 671 if (kmod_path__parse_name(&m, filename))
674 return NULL; 672 return NULL;
675 673
676 map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION, 674 map = map_groups__find_by_name(&machine->kmaps, m.name);
677 m.name);
678 if (map) { 675 if (map) {
679 /* 676 /*
680 * If the map's dso is an offline module, give dso__load() 677 * If the map's dso is an offline module, give dso__load()
@@ -689,7 +686,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
689 if (dso == NULL) 686 if (dso == NULL)
690 goto out; 687 goto out;
691 688
692 map = map__new2(start, dso, MAP__FUNCTION); 689 map = map__new2(start, dso);
693 if (map == NULL) 690 if (map == NULL)
694 goto out; 691 goto out;
695 692
@@ -810,8 +807,8 @@ struct process_args {
810 u64 start; 807 u64 start;
811}; 808};
812 809
813static void machine__get_kallsyms_filename(struct machine *machine, char *buf, 810void machine__get_kallsyms_filename(struct machine *machine, char *buf,
814 size_t bufsz) 811 size_t bufsz)
815{ 812{
816 if (machine__is_default_guest(machine)) 813 if (machine__is_default_guest(machine))
817 scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms); 814 scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms);
@@ -854,65 +851,171 @@ static int machine__get_running_kernel_start(struct machine *machine,
854 return 0; 851 return 0;
855} 852}
856 853
854int machine__create_extra_kernel_map(struct machine *machine,
855 struct dso *kernel,
856 struct extra_kernel_map *xm)
857{
858 struct kmap *kmap;
859 struct map *map;
860
861 map = map__new2(xm->start, kernel);
862 if (!map)
863 return -1;
864
865 map->end = xm->end;
866 map->pgoff = xm->pgoff;
867
868 kmap = map__kmap(map);
869
870 kmap->kmaps = &machine->kmaps;
871 strlcpy(kmap->name, xm->name, KMAP_NAME_LEN);
872
873 map_groups__insert(&machine->kmaps, map);
874
875 pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n",
876 kmap->name, map->start, map->end);
877
878 map__put(map);
879
880 return 0;
881}
882
883static u64 find_entry_trampoline(struct dso *dso)
884{
885 /* Duplicates are removed so lookup all aliases */
886 const char *syms[] = {
887 "_entry_trampoline",
888 "__entry_trampoline_start",
889 "entry_SYSCALL_64_trampoline",
890 };
891 struct symbol *sym = dso__first_symbol(dso);
892 unsigned int i;
893
894 for (; sym; sym = dso__next_symbol(sym)) {
895 if (sym->binding != STB_GLOBAL)
896 continue;
897 for (i = 0; i < ARRAY_SIZE(syms); i++) {
898 if (!strcmp(sym->name, syms[i]))
899 return sym->start;
900 }
901 }
902
903 return 0;
904}
905
906/*
907 * These values can be used for kernels that do not have symbols for the entry
908 * trampolines in kallsyms.
909 */
910#define X86_64_CPU_ENTRY_AREA_PER_CPU 0xfffffe0000000000ULL
911#define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000
912#define X86_64_ENTRY_TRAMPOLINE 0x6000
913
914/* Map x86_64 PTI entry trampolines */
915int machine__map_x86_64_entry_trampolines(struct machine *machine,
916 struct dso *kernel)
917{
918 struct map_groups *kmaps = &machine->kmaps;
919 struct maps *maps = &kmaps->maps;
920 int nr_cpus_avail, cpu;
921 bool found = false;
922 struct map *map;
923 u64 pgoff;
924
925 /*
926 * In the vmlinux case, pgoff is a virtual address which must now be
927 * mapped to a vmlinux offset.
928 */
929 for (map = maps__first(maps); map; map = map__next(map)) {
930 struct kmap *kmap = __map__kmap(map);
931 struct map *dest_map;
932
933 if (!kmap || !is_entry_trampoline(kmap->name))
934 continue;
935
936 dest_map = map_groups__find(kmaps, map->pgoff);
937 if (dest_map != map)
938 map->pgoff = dest_map->map_ip(dest_map, map->pgoff);
939 found = true;
940 }
941 if (found || machine->trampolines_mapped)
942 return 0;
943
944 pgoff = find_entry_trampoline(kernel);
945 if (!pgoff)
946 return 0;
947
948 nr_cpus_avail = machine__nr_cpus_avail(machine);
949
950 /* Add a 1 page map for each CPU's entry trampoline */
951 for (cpu = 0; cpu < nr_cpus_avail; cpu++) {
952 u64 va = X86_64_CPU_ENTRY_AREA_PER_CPU +
953 cpu * X86_64_CPU_ENTRY_AREA_SIZE +
954 X86_64_ENTRY_TRAMPOLINE;
955 struct extra_kernel_map xm = {
956 .start = va,
957 .end = va + page_size,
958 .pgoff = pgoff,
959 };
960
961 strlcpy(xm.name, ENTRY_TRAMPOLINE_NAME, KMAP_NAME_LEN);
962
963 if (machine__create_extra_kernel_map(machine, kernel, &xm) < 0)
964 return -1;
965 }
966
967 machine->trampolines_mapped = nr_cpus_avail;
968
969 return 0;
970}
971
972int __weak machine__create_extra_kernel_maps(struct machine *machine __maybe_unused,
973 struct dso *kernel __maybe_unused)
974{
975 return 0;
976}
977
857static int 978static int
858__machine__create_kernel_maps(struct machine *machine, struct dso *kernel) 979__machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
859{ 980{
860 int type; 981 struct kmap *kmap;
982 struct map *map;
861 983
862 /* In case of renewal the kernel map, destroy previous one */ 984 /* In case of renewal the kernel map, destroy previous one */
863 machine__destroy_kernel_maps(machine); 985 machine__destroy_kernel_maps(machine);
864 986
865 for (type = 0; type < MAP__NR_TYPES; ++type) { 987 machine->vmlinux_map = map__new2(0, kernel);
866 struct kmap *kmap; 988 if (machine->vmlinux_map == NULL)
867 struct map *map; 989 return -1;
868
869 machine->vmlinux_maps[type] = map__new2(0, kernel, type);
870 if (machine->vmlinux_maps[type] == NULL)
871 return -1;
872 990
873 machine->vmlinux_maps[type]->map_ip = 991 machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip;
874 machine->vmlinux_maps[type]->unmap_ip = 992 map = machine__kernel_map(machine);
875 identity__map_ip; 993 kmap = map__kmap(map);
876 map = __machine__kernel_map(machine, type); 994 if (!kmap)
877 kmap = map__kmap(map); 995 return -1;
878 if (!kmap)
879 return -1;
880 996
881 kmap->kmaps = &machine->kmaps; 997 kmap->kmaps = &machine->kmaps;
882 map_groups__insert(&machine->kmaps, map); 998 map_groups__insert(&machine->kmaps, map);
883 }
884 999
885 return 0; 1000 return 0;
886} 1001}
887 1002
888void machine__destroy_kernel_maps(struct machine *machine) 1003void machine__destroy_kernel_maps(struct machine *machine)
889{ 1004{
890 int type; 1005 struct kmap *kmap;
891 1006 struct map *map = machine__kernel_map(machine);
892 for (type = 0; type < MAP__NR_TYPES; ++type) {
893 struct kmap *kmap;
894 struct map *map = __machine__kernel_map(machine, type);
895
896 if (map == NULL)
897 continue;
898 1007
899 kmap = map__kmap(map); 1008 if (map == NULL)
900 map_groups__remove(&machine->kmaps, map); 1009 return;
901 if (kmap && kmap->ref_reloc_sym) {
902 /*
903 * ref_reloc_sym is shared among all maps, so free just
904 * on one of them.
905 */
906 if (type == MAP__FUNCTION) {
907 zfree((char **)&kmap->ref_reloc_sym->name);
908 zfree(&kmap->ref_reloc_sym);
909 } else
910 kmap->ref_reloc_sym = NULL;
911 }
912 1010
913 map__put(machine->vmlinux_maps[type]); 1011 kmap = map__kmap(map);
914 machine->vmlinux_maps[type] = NULL; 1012 map_groups__remove(&machine->kmaps, map);
1013 if (kmap && kmap->ref_reloc_sym) {
1014 zfree((char **)&kmap->ref_reloc_sym->name);
1015 zfree(&kmap->ref_reloc_sym);
915 } 1016 }
1017
1018 map__zput(machine->vmlinux_map);
916} 1019}
917 1020
918int machines__create_guest_kernel_maps(struct machines *machines) 1021int machines__create_guest_kernel_maps(struct machines *machines)
@@ -989,32 +1092,31 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid)
989 return machine__create_kernel_maps(machine); 1092 return machine__create_kernel_maps(machine);
990} 1093}
991 1094
992int machine__load_kallsyms(struct machine *machine, const char *filename, 1095int machine__load_kallsyms(struct machine *machine, const char *filename)
993 enum map_type type)
994{ 1096{
995 struct map *map = machine__kernel_map(machine); 1097 struct map *map = machine__kernel_map(machine);
996 int ret = __dso__load_kallsyms(map->dso, filename, map, true); 1098 int ret = __dso__load_kallsyms(map->dso, filename, map, true);
997 1099
998 if (ret > 0) { 1100 if (ret > 0) {
999 dso__set_loaded(map->dso, type); 1101 dso__set_loaded(map->dso);
1000 /* 1102 /*
1001 * Since /proc/kallsyms will have multiple sessions for the 1103 * Since /proc/kallsyms will have multiple sessions for the
1002 * kernel, with modules between them, fixup the end of all 1104 * kernel, with modules between them, fixup the end of all
1003 * sections. 1105 * sections.
1004 */ 1106 */
1005 __map_groups__fixup_end(&machine->kmaps, type); 1107 map_groups__fixup_end(&machine->kmaps);
1006 } 1108 }
1007 1109
1008 return ret; 1110 return ret;
1009} 1111}
1010 1112
1011int machine__load_vmlinux_path(struct machine *machine, enum map_type type) 1113int machine__load_vmlinux_path(struct machine *machine)
1012{ 1114{
1013 struct map *map = machine__kernel_map(machine); 1115 struct map *map = machine__kernel_map(machine);
1014 int ret = dso__load_vmlinux_path(map->dso, map); 1116 int ret = dso__load_vmlinux_path(map->dso, map);
1015 1117
1016 if (ret > 0) 1118 if (ret > 0)
1017 dso__set_loaded(map->dso, type); 1119 dso__set_loaded(map->dso);
1018 1120
1019 return ret; 1121 return ret;
1020} 1122}
@@ -1055,10 +1157,9 @@ static bool is_kmod_dso(struct dso *dso)
1055static int map_groups__set_module_path(struct map_groups *mg, const char *path, 1157static int map_groups__set_module_path(struct map_groups *mg, const char *path,
1056 struct kmod_path *m) 1158 struct kmod_path *m)
1057{ 1159{
1058 struct map *map;
1059 char *long_name; 1160 char *long_name;
1161 struct map *map = map_groups__find_by_name(mg, m->name);
1060 1162
1061 map = map_groups__find_by_name(mg, MAP__FUNCTION, m->name);
1062 if (map == NULL) 1163 if (map == NULL)
1063 return 0; 1164 return 0;
1064 1165
@@ -1207,19 +1308,14 @@ static int machine__create_modules(struct machine *machine)
1207static void machine__set_kernel_mmap(struct machine *machine, 1308static void machine__set_kernel_mmap(struct machine *machine,
1208 u64 start, u64 end) 1309 u64 start, u64 end)
1209{ 1310{
1210 int i; 1311 machine->vmlinux_map->start = start;
1211 1312 machine->vmlinux_map->end = end;
1212 for (i = 0; i < MAP__NR_TYPES; i++) { 1313 /*
1213 machine->vmlinux_maps[i]->start = start; 1314 * Be a bit paranoid here, some perf.data file came with
1214 machine->vmlinux_maps[i]->end = end; 1315 * a zero sized synthesized MMAP event for the kernel.
1215 1316 */
1216 /* 1317 if (start == 0 && end == 0)
1217 * Be a bit paranoid here, some perf.data file came with 1318 machine->vmlinux_map->end = ~0ULL;
1218 * a zero sized synthesized MMAP event for the kernel.
1219 */
1220 if (start == 0 && end == 0)
1221 machine->vmlinux_maps[i]->end = ~0ULL;
1222 }
1223} 1319}
1224 1320
1225int machine__create_kernel_maps(struct machine *machine) 1321int machine__create_kernel_maps(struct machine *machine)
@@ -1234,9 +1330,8 @@ int machine__create_kernel_maps(struct machine *machine)
1234 return -1; 1330 return -1;
1235 1331
1236 ret = __machine__create_kernel_maps(machine, kernel); 1332 ret = __machine__create_kernel_maps(machine, kernel);
1237 dso__put(kernel);
1238 if (ret < 0) 1333 if (ret < 0)
1239 return -1; 1334 goto out_put;
1240 1335
1241 if (symbol_conf.use_modules && machine__create_modules(machine) < 0) { 1336 if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
1242 if (machine__is_host(machine)) 1337 if (machine__is_host(machine))
@@ -1249,9 +1344,10 @@ int machine__create_kernel_maps(struct machine *machine)
1249 1344
1250 if (!machine__get_running_kernel_start(machine, &name, &addr)) { 1345 if (!machine__get_running_kernel_start(machine, &name, &addr)) {
1251 if (name && 1346 if (name &&
1252 maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { 1347 map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, addr)) {
1253 machine__destroy_kernel_maps(machine); 1348 machine__destroy_kernel_maps(machine);
1254 return -1; 1349 ret = -1;
1350 goto out_put;
1255 } 1351 }
1256 1352
1257 /* we have a real start address now, so re-order the kmaps */ 1353 /* we have a real start address now, so re-order the kmaps */
@@ -1267,12 +1363,16 @@ int machine__create_kernel_maps(struct machine *machine)
1267 map__put(map); 1363 map__put(map);
1268 } 1364 }
1269 1365
1366 if (machine__create_extra_kernel_maps(machine, kernel))
1367 pr_debug("Problems creating extra kernel maps, continuing anyway...\n");
1368
1270 /* update end address of the kernel map using adjacent module address */ 1369 /* update end address of the kernel map using adjacent module address */
1271 map = map__next(machine__kernel_map(machine)); 1370 map = map__next(machine__kernel_map(machine));
1272 if (map) 1371 if (map)
1273 machine__set_kernel_mmap(machine, addr, map->start); 1372 machine__set_kernel_mmap(machine, addr, map->start);
1274 1373out_put:
1275 return 0; 1374 dso__put(kernel);
1375 return ret;
1276} 1376}
1277 1377
1278static bool machine__uses_kcore(struct machine *machine) 1378static bool machine__uses_kcore(struct machine *machine)
@@ -1287,6 +1387,32 @@ static bool machine__uses_kcore(struct machine *machine)
1287 return false; 1387 return false;
1288} 1388}
1289 1389
1390static bool perf_event__is_extra_kernel_mmap(struct machine *machine,
1391 union perf_event *event)
1392{
1393 return machine__is(machine, "x86_64") &&
1394 is_entry_trampoline(event->mmap.filename);
1395}
1396
1397static int machine__process_extra_kernel_map(struct machine *machine,
1398 union perf_event *event)
1399{
1400 struct map *kernel_map = machine__kernel_map(machine);
1401 struct dso *kernel = kernel_map ? kernel_map->dso : NULL;
1402 struct extra_kernel_map xm = {
1403 .start = event->mmap.start,
1404 .end = event->mmap.start + event->mmap.len,
1405 .pgoff = event->mmap.pgoff,
1406 };
1407
1408 if (kernel == NULL)
1409 return -1;
1410
1411 strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN);
1412
1413 return machine__create_extra_kernel_map(machine, kernel, &xm);
1414}
1415
1290static int machine__process_kernel_mmap_event(struct machine *machine, 1416static int machine__process_kernel_mmap_event(struct machine *machine,
1291 union perf_event *event) 1417 union perf_event *event)
1292{ 1418{
@@ -1379,9 +1505,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
1379 * time /proc/sys/kernel/kptr_restrict was non zero. 1505 * time /proc/sys/kernel/kptr_restrict was non zero.
1380 */ 1506 */
1381 if (event->mmap.pgoff != 0) { 1507 if (event->mmap.pgoff != 0) {
1382 maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, 1508 map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map,
1383 symbol_name, 1509 symbol_name,
1384 event->mmap.pgoff); 1510 event->mmap.pgoff);
1385 } 1511 }
1386 1512
1387 if (machine__is_default_guest(machine)) { 1513 if (machine__is_default_guest(machine)) {
@@ -1390,6 +1516,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
1390 */ 1516 */
1391 dso__load(kernel, machine__kernel_map(machine)); 1517 dso__load(kernel, machine__kernel_map(machine));
1392 } 1518 }
1519 } else if (perf_event__is_extra_kernel_mmap(machine, event)) {
1520 return machine__process_extra_kernel_map(machine, event);
1393 } 1521 }
1394 return 0; 1522 return 0;
1395out_problem: 1523out_problem:
@@ -1402,7 +1530,6 @@ int machine__process_mmap2_event(struct machine *machine,
1402{ 1530{
1403 struct thread *thread; 1531 struct thread *thread;
1404 struct map *map; 1532 struct map *map;
1405 enum map_type type;
1406 int ret = 0; 1533 int ret = 0;
1407 1534
1408 if (dump_trace) 1535 if (dump_trace)
@@ -1421,11 +1548,6 @@ int machine__process_mmap2_event(struct machine *machine,
1421 if (thread == NULL) 1548 if (thread == NULL)
1422 goto out_problem; 1549 goto out_problem;
1423 1550
1424 if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA)
1425 type = MAP__VARIABLE;
1426 else
1427 type = MAP__FUNCTION;
1428
1429 map = map__new(machine, event->mmap2.start, 1551 map = map__new(machine, event->mmap2.start,
1430 event->mmap2.len, event->mmap2.pgoff, 1552 event->mmap2.len, event->mmap2.pgoff,
1431 event->mmap2.maj, 1553 event->mmap2.maj,
@@ -1433,7 +1555,7 @@ int machine__process_mmap2_event(struct machine *machine,
1433 event->mmap2.ino_generation, 1555 event->mmap2.ino_generation,
1434 event->mmap2.prot, 1556 event->mmap2.prot,
1435 event->mmap2.flags, 1557 event->mmap2.flags,
1436 event->mmap2.filename, type, thread); 1558 event->mmap2.filename, thread);
1437 1559
1438 if (map == NULL) 1560 if (map == NULL)
1439 goto out_problem_map; 1561 goto out_problem_map;
@@ -1460,7 +1582,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
1460{ 1582{
1461 struct thread *thread; 1583 struct thread *thread;
1462 struct map *map; 1584 struct map *map;
1463 enum map_type type; 1585 u32 prot = 0;
1464 int ret = 0; 1586 int ret = 0;
1465 1587
1466 if (dump_trace) 1588 if (dump_trace)
@@ -1479,16 +1601,14 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
1479 if (thread == NULL) 1601 if (thread == NULL)
1480 goto out_problem; 1602 goto out_problem;
1481 1603
1482 if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) 1604 if (!(event->header.misc & PERF_RECORD_MISC_MMAP_DATA))
1483 type = MAP__VARIABLE; 1605 prot = PROT_EXEC;
1484 else
1485 type = MAP__FUNCTION;
1486 1606
1487 map = map__new(machine, event->mmap.start, 1607 map = map__new(machine, event->mmap.start,
1488 event->mmap.len, event->mmap.pgoff, 1608 event->mmap.len, event->mmap.pgoff,
1489 0, 0, 0, 0, 0, 0, 1609 0, 0, 0, 0, prot, 0,
1490 event->mmap.filename, 1610 event->mmap.filename,
1491 type, thread); 1611 thread);
1492 1612
1493 if (map == NULL) 1613 if (map == NULL)
1494 goto out_problem_map; 1614 goto out_problem_map;
@@ -1664,7 +1784,7 @@ static void ip__resolve_ams(struct thread *thread,
1664 * Thus, we have to try consecutively until we find a match 1784 * Thus, we have to try consecutively until we find a match
1665 * or else, the symbol is unknown 1785 * or else, the symbol is unknown
1666 */ 1786 */
1667 thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al); 1787 thread__find_cpumode_addr_location(thread, ip, &al);
1668 1788
1669 ams->addr = ip; 1789 ams->addr = ip;
1670 ams->al_addr = al.addr; 1790 ams->al_addr = al.addr;
@@ -1681,15 +1801,7 @@ static void ip__resolve_data(struct thread *thread,
1681 1801
1682 memset(&al, 0, sizeof(al)); 1802 memset(&al, 0, sizeof(al));
1683 1803
1684 thread__find_addr_location(thread, m, MAP__VARIABLE, addr, &al); 1804 thread__find_symbol(thread, m, addr, &al);
1685 if (al.map == NULL) {
1686 /*
1687 * some shared data regions have execute bit set which puts
1688 * their mapping in the MAP__FUNCTION type array.
1689 * Check there as a fallback option before dropping the sample.
1690 */
1691 thread__find_addr_location(thread, m, MAP__FUNCTION, addr, &al);
1692 }
1693 1805
1694 ams->addr = addr; 1806 ams->addr = addr;
1695 ams->al_addr = al.addr; 1807 ams->al_addr = al.addr;
@@ -1758,8 +1870,7 @@ static int add_callchain_ip(struct thread *thread,
1758 al.filtered = 0; 1870 al.filtered = 0;
1759 al.sym = NULL; 1871 al.sym = NULL;
1760 if (!cpumode) { 1872 if (!cpumode) {
1761 thread__find_cpumode_addr_location(thread, MAP__FUNCTION, 1873 thread__find_cpumode_addr_location(thread, ip, &al);
1762 ip, &al);
1763 } else { 1874 } else {
1764 if (ip >= PERF_CONTEXT_MAX) { 1875 if (ip >= PERF_CONTEXT_MAX) {
1765 switch (ip) { 1876 switch (ip) {
@@ -1784,8 +1895,7 @@ static int add_callchain_ip(struct thread *thread,
1784 } 1895 }
1785 return 0; 1896 return 0;
1786 } 1897 }
1787 thread__find_addr_location(thread, *cpumode, MAP__FUNCTION, 1898 thread__find_symbol(thread, *cpumode, ip, &al);
1788 ip, &al);
1789 } 1899 }
1790 1900
1791 if (al.sym != NULL) { 1901 if (al.sym != NULL) {
@@ -1810,7 +1920,7 @@ static int add_callchain_ip(struct thread *thread,
1810 } 1920 }
1811 1921
1812 srcline = callchain_srcline(al.map, al.sym, al.addr); 1922 srcline = callchain_srcline(al.map, al.sym, al.addr);
1813 return callchain_cursor_append(cursor, al.addr, al.map, al.sym, 1923 return callchain_cursor_append(cursor, ip, al.map, al.sym,
1814 branch, flags, nr_loop_iter, 1924 branch, flags, nr_loop_iter,
1815 iter_cycles, branch_from, srcline); 1925 iter_cycles, branch_from, srcline);
1816} 1926}
@@ -2342,6 +2452,20 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
2342 return 0; 2452 return 0;
2343} 2453}
2344 2454
2455/*
2456 * Compares the raw arch string. N.B. see instead perf_env__arch() if a
2457 * normalized arch is needed.
2458 */
2459bool machine__is(struct machine *machine, const char *arch)
2460{
2461 return machine && !strcmp(perf_env__raw_arch(machine->env), arch);
2462}
2463
2464int machine__nr_cpus_avail(struct machine *machine)
2465{
2466 return machine ? perf_env__nr_cpus_avail(machine->env) : 0;
2467}
2468
2345int machine__get_kernel_start(struct machine *machine) 2469int machine__get_kernel_start(struct machine *machine)
2346{ 2470{
2347 struct map *map = machine__kernel_map(machine); 2471 struct map *map = machine__kernel_map(machine);
@@ -2358,7 +2482,12 @@ int machine__get_kernel_start(struct machine *machine)
2358 machine->kernel_start = 1ULL << 63; 2482 machine->kernel_start = 1ULL << 63;
2359 if (map) { 2483 if (map) {
2360 err = map__load(map); 2484 err = map__load(map);
2361 if (!err) 2485 /*
2486 * On x86_64, PTI entry trampolines are less than the
2487 * start of kernel text, but still above 2^63. So leave
2488 * kernel_start = 1ULL << 63 for x86_64.
2489 */
2490 if (!err && !machine__is(machine, "x86_64"))
2362 machine->kernel_start = map->start; 2491 machine->kernel_start = map->start;
2363 } 2492 }
2364 return err; 2493 return err;
@@ -2373,7 +2502,7 @@ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, ch
2373{ 2502{
2374 struct machine *machine = vmachine; 2503 struct machine *machine = vmachine;
2375 struct map *map; 2504 struct map *map;
2376 struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map); 2505 struct symbol *sym = machine__find_kernel_symbol(machine, *addrp, &map);
2377 2506
2378 if (sym == NULL) 2507 if (sym == NULL)
2379 return NULL; 2508 return NULL;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 66cc200ef86f..1de7660d93e9 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -49,13 +49,14 @@ struct machine {
49 struct perf_env *env; 49 struct perf_env *env;
50 struct dsos dsos; 50 struct dsos dsos;
51 struct map_groups kmaps; 51 struct map_groups kmaps;
52 struct map *vmlinux_maps[MAP__NR_TYPES]; 52 struct map *vmlinux_map;
53 u64 kernel_start; 53 u64 kernel_start;
54 pid_t *current_tid; 54 pid_t *current_tid;
55 union { /* Tool specific area */ 55 union { /* Tool specific area */
56 void *priv; 56 void *priv;
57 u64 db_id; 57 u64 db_id;
58 }; 58 };
59 bool trampolines_mapped;
59}; 60};
60 61
61static inline struct threads *machine__threads(struct machine *machine, pid_t tid) 62static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
@@ -64,16 +65,22 @@ static inline struct threads *machine__threads(struct machine *machine, pid_t ti
64 return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE]; 65 return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
65} 66}
66 67
68/*
69 * The main kernel (vmlinux) map
70 */
67static inline 71static inline
68struct map *__machine__kernel_map(struct machine *machine, enum map_type type) 72struct map *machine__kernel_map(struct machine *machine)
69{ 73{
70 return machine->vmlinux_maps[type]; 74 return machine->vmlinux_map;
71} 75}
72 76
77/*
78 * kernel (the one returned by machine__kernel_map()) plus kernel modules maps
79 */
73static inline 80static inline
74struct map *machine__kernel_map(struct machine *machine) 81struct maps *machine__kernel_maps(struct machine *machine)
75{ 82{
76 return __machine__kernel_map(machine, MAP__FUNCTION); 83 return &machine->kmaps.maps;
77} 84}
78 85
79int machine__get_kernel_start(struct machine *machine); 86int machine__get_kernel_start(struct machine *machine);
@@ -182,6 +189,9 @@ static inline bool machine__is_host(struct machine *machine)
182 return machine ? machine->pid == HOST_KERNEL_ID : false; 189 return machine ? machine->pid == HOST_KERNEL_ID : false;
183} 190}
184 191
192bool machine__is(struct machine *machine, const char *arch);
193int machine__nr_cpus_avail(struct machine *machine);
194
185struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); 195struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
186struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); 196struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
187 197
@@ -190,44 +200,27 @@ struct dso *machine__findnew_dso(struct machine *machine, const char *filename);
190size_t machine__fprintf(struct machine *machine, FILE *fp); 200size_t machine__fprintf(struct machine *machine, FILE *fp);
191 201
192static inline 202static inline
193struct symbol *machine__find_kernel_symbol(struct machine *machine, 203struct symbol *machine__find_kernel_symbol(struct machine *machine, u64 addr,
194 enum map_type type, u64 addr,
195 struct map **mapp) 204 struct map **mapp)
196{ 205{
197 return map_groups__find_symbol(&machine->kmaps, type, addr, mapp); 206 return map_groups__find_symbol(&machine->kmaps, addr, mapp);
198} 207}
199 208
200static inline 209static inline
201struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine, 210struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
202 enum map_type type, const char *name, 211 const char *name,
203 struct map **mapp) 212 struct map **mapp)
204{ 213{
205 return map_groups__find_symbol_by_name(&machine->kmaps, type, name, mapp); 214 return map_groups__find_symbol_by_name(&machine->kmaps, name, mapp);
206}
207
208static inline
209struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
210 struct map **mapp)
211{
212 return machine__find_kernel_symbol(machine, MAP__FUNCTION, addr,
213 mapp);
214}
215
216static inline
217struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
218 const char *name,
219 struct map **mapp)
220{
221 return map_groups__find_function_by_name(&machine->kmaps, name, mapp);
222} 215}
223 216
224struct map *machine__findnew_module_map(struct machine *machine, u64 start, 217struct map *machine__findnew_module_map(struct machine *machine, u64 start,
225 const char *filename); 218 const char *filename);
226int arch__fix_module_text_start(u64 *start, const char *name); 219int arch__fix_module_text_start(u64 *start, const char *name);
227 220
228int machine__load_kallsyms(struct machine *machine, const char *filename, 221int machine__load_kallsyms(struct machine *machine, const char *filename);
229 enum map_type type); 222
230int machine__load_vmlinux_path(struct machine *machine, enum map_type type); 223int machine__load_vmlinux_path(struct machine *machine);
231 224
232size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp, 225size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
233 bool (skip)(struct dso *dso, int parm), int parm); 226 bool (skip)(struct dso *dso, int parm), int parm);
@@ -276,4 +269,25 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
276 */ 269 */
277char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp); 270char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp);
278 271
272void machine__get_kallsyms_filename(struct machine *machine, char *buf,
273 size_t bufsz);
274
275int machine__create_extra_kernel_maps(struct machine *machine,
276 struct dso *kernel);
277
278/* Kernel-space maps for symbols that are outside the main kernel map and module maps */
279struct extra_kernel_map {
280 u64 start;
281 u64 end;
282 u64 pgoff;
283 char name[KMAP_NAME_LEN];
284};
285
286int machine__create_extra_kernel_map(struct machine *machine,
287 struct dso *kernel,
288 struct extra_kernel_map *xm);
289
290int machine__map_x86_64_entry_trampolines(struct machine *machine,
291 struct dso *kernel);
292
279#endif /* __PERF_MACHINE_H */ 293#endif /* __PERF_MACHINE_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 8fe57031e1a8..89ac5b5dc218 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -22,11 +22,6 @@
22 22
23static void __maps__insert(struct maps *maps, struct map *map); 23static void __maps__insert(struct maps *maps, struct map *map);
24 24
25const char *map_type__name[MAP__NR_TYPES] = {
26 [MAP__FUNCTION] = "Functions",
27 [MAP__VARIABLE] = "Variables",
28};
29
30static inline int is_anon_memory(const char *filename, u32 flags) 25static inline int is_anon_memory(const char *filename, u32 flags)
31{ 26{
32 return flags & MAP_HUGETLB || 27 return flags & MAP_HUGETLB ||
@@ -129,10 +124,8 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
129 return false; 124 return false;
130} 125}
131 126
132void map__init(struct map *map, enum map_type type, 127void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso)
133 u64 start, u64 end, u64 pgoff, struct dso *dso)
134{ 128{
135 map->type = type;
136 map->start = start; 129 map->start = start;
137 map->end = end; 130 map->end = end;
138 map->pgoff = pgoff; 131 map->pgoff = pgoff;
@@ -149,7 +142,7 @@ void map__init(struct map *map, enum map_type type,
149struct map *map__new(struct machine *machine, u64 start, u64 len, 142struct map *map__new(struct machine *machine, u64 start, u64 len,
150 u64 pgoff, u32 d_maj, u32 d_min, u64 ino, 143 u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
151 u64 ino_gen, u32 prot, u32 flags, char *filename, 144 u64 ino_gen, u32 prot, u32 flags, char *filename,
152 enum map_type type, struct thread *thread) 145 struct thread *thread)
153{ 146{
154 struct map *map = malloc(sizeof(*map)); 147 struct map *map = malloc(sizeof(*map));
155 struct nsinfo *nsi = NULL; 148 struct nsinfo *nsi = NULL;
@@ -173,7 +166,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
173 map->flags = flags; 166 map->flags = flags;
174 nsi = nsinfo__get(thread->nsinfo); 167 nsi = nsinfo__get(thread->nsinfo);
175 168
176 if ((anon || no_dso) && nsi && type == MAP__FUNCTION) { 169 if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
177 snprintf(newfilename, sizeof(newfilename), 170 snprintf(newfilename, sizeof(newfilename),
178 "/tmp/perf-%d.map", nsi->pid); 171 "/tmp/perf-%d.map", nsi->pid);
179 filename = newfilename; 172 filename = newfilename;
@@ -203,7 +196,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
203 if (dso == NULL) 196 if (dso == NULL)
204 goto out_delete; 197 goto out_delete;
205 198
206 map__init(map, type, start, start + len, pgoff, dso); 199 map__init(map, start, start + len, pgoff, dso);
207 200
208 if (anon || no_dso) { 201 if (anon || no_dso) {
209 map->map_ip = map->unmap_ip = identity__map_ip; 202 map->map_ip = map->unmap_ip = identity__map_ip;
@@ -213,8 +206,8 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
213 * functions still return NULL, and we avoid the 206 * functions still return NULL, and we avoid the
214 * unnecessary map__load warning. 207 * unnecessary map__load warning.
215 */ 208 */
216 if (type != MAP__FUNCTION) 209 if (!(prot & PROT_EXEC))
217 dso__set_loaded(dso, map->type); 210 dso__set_loaded(dso);
218 } 211 }
219 dso->nsinfo = nsi; 212 dso->nsinfo = nsi;
220 dso__put(dso); 213 dso__put(dso);
@@ -231,7 +224,7 @@ out_delete:
231 * they are loaded) and for vmlinux, where only after we load all the 224 * they are loaded) and for vmlinux, where only after we load all the
232 * symbols we'll know where it starts and ends. 225 * symbols we'll know where it starts and ends.
233 */ 226 */
234struct map *map__new2(u64 start, struct dso *dso, enum map_type type) 227struct map *map__new2(u64 start, struct dso *dso)
235{ 228{
236 struct map *map = calloc(1, (sizeof(*map) + 229 struct map *map = calloc(1, (sizeof(*map) +
237 (dso->kernel ? sizeof(struct kmap) : 0))); 230 (dso->kernel ? sizeof(struct kmap) : 0)));
@@ -239,7 +232,7 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
239 /* 232 /*
240 * ->end will be filled after we load all the symbols 233 * ->end will be filled after we load all the symbols
241 */ 234 */
242 map__init(map, type, start, 0, 0, dso); 235 map__init(map, start, 0, 0, dso);
243 } 236 }
244 237
245 return map; 238 return map;
@@ -256,7 +249,19 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
256 */ 249 */
257bool __map__is_kernel(const struct map *map) 250bool __map__is_kernel(const struct map *map)
258{ 251{
259 return __machine__kernel_map(map->groups->machine, map->type) == map; 252 return machine__kernel_map(map->groups->machine) == map;
253}
254
255bool __map__is_extra_kernel_map(const struct map *map)
256{
257 struct kmap *kmap = __map__kmap((struct map *)map);
258
259 return kmap && kmap->name[0];
260}
261
262bool map__has_symbols(const struct map *map)
263{
264 return dso__has_symbols(map->dso);
260} 265}
261 266
262static void map__exit(struct map *map) 267static void map__exit(struct map *map)
@@ -279,7 +284,7 @@ void map__put(struct map *map)
279 284
280void map__fixup_start(struct map *map) 285void map__fixup_start(struct map *map)
281{ 286{
282 struct rb_root *symbols = &map->dso->symbols[map->type]; 287 struct rb_root *symbols = &map->dso->symbols;
283 struct rb_node *nd = rb_first(symbols); 288 struct rb_node *nd = rb_first(symbols);
284 if (nd != NULL) { 289 if (nd != NULL) {
285 struct symbol *sym = rb_entry(nd, struct symbol, rb_node); 290 struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
@@ -289,7 +294,7 @@ void map__fixup_start(struct map *map)
289 294
290void map__fixup_end(struct map *map) 295void map__fixup_end(struct map *map)
291{ 296{
292 struct rb_root *symbols = &map->dso->symbols[map->type]; 297 struct rb_root *symbols = &map->dso->symbols;
293 struct rb_node *nd = rb_last(symbols); 298 struct rb_node *nd = rb_last(symbols);
294 if (nd != NULL) { 299 if (nd != NULL) {
295 struct symbol *sym = rb_entry(nd, struct symbol, rb_node); 300 struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
@@ -304,7 +309,7 @@ int map__load(struct map *map)
304 const char *name = map->dso->long_name; 309 const char *name = map->dso->long_name;
305 int nr; 310 int nr;
306 311
307 if (dso__loaded(map->dso, map->type)) 312 if (dso__loaded(map->dso))
308 return 0; 313 return 0;
309 314
310 nr = dso__load(map->dso, map); 315 nr = dso__load(map->dso, map);
@@ -348,7 +353,7 @@ struct symbol *map__find_symbol(struct map *map, u64 addr)
348 if (map__load(map) < 0) 353 if (map__load(map) < 0)
349 return NULL; 354 return NULL;
350 355
351 return dso__find_symbol(map->dso, map->type, addr); 356 return dso__find_symbol(map->dso, addr);
352} 357}
353 358
354struct symbol *map__find_symbol_by_name(struct map *map, const char *name) 359struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
@@ -356,10 +361,10 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
356 if (map__load(map) < 0) 361 if (map__load(map) < 0)
357 return NULL; 362 return NULL;
358 363
359 if (!dso__sorted_by_name(map->dso, map->type)) 364 if (!dso__sorted_by_name(map->dso))
360 dso__sort_by_name(map->dso, map->type); 365 dso__sort_by_name(map->dso);
361 366
362 return dso__find_symbol_by_name(map->dso, map->type, name); 367 return dso__find_symbol_by_name(map->dso, name);
363} 368}
364 369
365struct map *map__clone(struct map *from) 370struct map *map__clone(struct map *from)
@@ -410,16 +415,20 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp)
410 return fprintf(fp, "%s", dsoname); 415 return fprintf(fp, "%s", dsoname);
411} 416}
412 417
418char *map__srcline(struct map *map, u64 addr, struct symbol *sym)
419{
420 if (map == NULL)
421 return SRCLINE_UNKNOWN;
422 return get_srcline(map->dso, map__rip_2objdump(map, addr), sym, true, true, addr);
423}
424
413int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, 425int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
414 FILE *fp) 426 FILE *fp)
415{ 427{
416 char *srcline;
417 int ret = 0; 428 int ret = 0;
418 429
419 if (map && map->dso) { 430 if (map && map->dso) {
420 srcline = get_srcline(map->dso, 431 char *srcline = map__srcline(map, addr, NULL);
421 map__rip_2objdump(map, addr), NULL,
422 true, true, addr);
423 if (srcline != SRCLINE_UNKNOWN) 432 if (srcline != SRCLINE_UNKNOWN)
424 ret = fprintf(fp, "%s%s", prefix, srcline); 433 ret = fprintf(fp, "%s%s", prefix, srcline);
425 free_srcline(srcline); 434 free_srcline(srcline);
@@ -440,6 +449,20 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
440 */ 449 */
441u64 map__rip_2objdump(struct map *map, u64 rip) 450u64 map__rip_2objdump(struct map *map, u64 rip)
442{ 451{
452 struct kmap *kmap = __map__kmap(map);
453
454 /*
455 * vmlinux does not have program headers for PTI entry trampolines and
456 * kcore may not either. However the trampoline object code is on the
457 * main kernel map, so just use that instead.
458 */
459 if (kmap && is_entry_trampoline(kmap->name) && kmap->kmaps && kmap->kmaps->machine) {
460 struct map *kernel_map = machine__kernel_map(kmap->kmaps->machine);
461
462 if (kernel_map)
463 map = kernel_map;
464 }
465
443 if (!map->dso->adjust_symbols) 466 if (!map->dso->adjust_symbols)
444 return rip; 467 return rip;
445 468
@@ -494,10 +517,7 @@ static void maps__init(struct maps *maps)
494 517
495void map_groups__init(struct map_groups *mg, struct machine *machine) 518void map_groups__init(struct map_groups *mg, struct machine *machine)
496{ 519{
497 int i; 520 maps__init(&mg->maps);
498 for (i = 0; i < MAP__NR_TYPES; ++i) {
499 maps__init(&mg->maps[i]);
500 }
501 mg->machine = machine; 521 mg->machine = machine;
502 refcount_set(&mg->refcnt, 1); 522 refcount_set(&mg->refcnt, 1);
503} 523}
@@ -525,22 +545,12 @@ static void maps__exit(struct maps *maps)
525 545
526void map_groups__exit(struct map_groups *mg) 546void map_groups__exit(struct map_groups *mg)
527{ 547{
528 int i; 548 maps__exit(&mg->maps);
529
530 for (i = 0; i < MAP__NR_TYPES; ++i)
531 maps__exit(&mg->maps[i]);
532} 549}
533 550
534bool map_groups__empty(struct map_groups *mg) 551bool map_groups__empty(struct map_groups *mg)
535{ 552{
536 int i; 553 return !maps__first(&mg->maps);
537
538 for (i = 0; i < MAP__NR_TYPES; ++i) {
539 if (maps__first(&mg->maps[i]))
540 return false;
541 }
542
543 return true;
544} 554}
545 555
546struct map_groups *map_groups__new(struct machine *machine) 556struct map_groups *map_groups__new(struct machine *machine)
@@ -566,10 +576,9 @@ void map_groups__put(struct map_groups *mg)
566} 576}
567 577
568struct symbol *map_groups__find_symbol(struct map_groups *mg, 578struct symbol *map_groups__find_symbol(struct map_groups *mg,
569 enum map_type type, u64 addr, 579 u64 addr, struct map **mapp)
570 struct map **mapp)
571{ 580{
572 struct map *map = map_groups__find(mg, type, addr); 581 struct map *map = map_groups__find(mg, addr);
573 582
574 /* Ensure map is loaded before using map->map_ip */ 583 /* Ensure map is loaded before using map->map_ip */
575 if (map != NULL && map__load(map) >= 0) { 584 if (map != NULL && map__load(map) >= 0) {
@@ -608,13 +617,10 @@ out:
608} 617}
609 618
610struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, 619struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
611 enum map_type type,
612 const char *name, 620 const char *name,
613 struct map **mapp) 621 struct map **mapp)
614{ 622{
615 struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp); 623 return maps__find_symbol_by_name(&mg->maps, name, mapp);
616
617 return sym;
618} 624}
619 625
620int map_groups__find_ams(struct addr_map_symbol *ams) 626int map_groups__find_ams(struct addr_map_symbol *ams)
@@ -622,8 +628,7 @@ int map_groups__find_ams(struct addr_map_symbol *ams)
622 if (ams->addr < ams->map->start || ams->addr >= ams->map->end) { 628 if (ams->addr < ams->map->start || ams->addr >= ams->map->end) {
623 if (ams->map->groups == NULL) 629 if (ams->map->groups == NULL)
624 return -1; 630 return -1;
625 ams->map = map_groups__find(ams->map->groups, ams->map->type, 631 ams->map = map_groups__find(ams->map->groups, ams->addr);
626 ams->addr);
627 if (ams->map == NULL) 632 if (ams->map == NULL)
628 return -1; 633 return -1;
629 } 634 }
@@ -646,7 +651,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
646 printed += fprintf(fp, "Map:"); 651 printed += fprintf(fp, "Map:");
647 printed += map__fprintf(pos, fp); 652 printed += map__fprintf(pos, fp);
648 if (verbose > 2) { 653 if (verbose > 2) {
649 printed += dso__fprintf(pos->dso, pos->type, fp); 654 printed += dso__fprintf(pos->dso, fp);
650 printed += fprintf(fp, "--\n"); 655 printed += fprintf(fp, "--\n");
651 } 656 }
652 } 657 }
@@ -656,24 +661,14 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
656 return printed; 661 return printed;
657} 662}
658 663
659size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
660 FILE *fp)
661{
662 size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
663 return printed += maps__fprintf(&mg->maps[type], fp);
664}
665
666size_t map_groups__fprintf(struct map_groups *mg, FILE *fp) 664size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
667{ 665{
668 size_t printed = 0, i; 666 return maps__fprintf(&mg->maps, fp);
669 for (i = 0; i < MAP__NR_TYPES; ++i)
670 printed += __map_groups__fprintf_maps(mg, i, fp);
671 return printed;
672} 667}
673 668
674static void __map_groups__insert(struct map_groups *mg, struct map *map) 669static void __map_groups__insert(struct map_groups *mg, struct map *map)
675{ 670{
676 __maps__insert(&mg->maps[map->type], map); 671 __maps__insert(&mg->maps, map);
677 map->groups = mg; 672 map->groups = mg;
678} 673}
679 674
@@ -758,19 +753,18 @@ out:
758int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, 753int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
759 FILE *fp) 754 FILE *fp)
760{ 755{
761 return maps__fixup_overlappings(&mg->maps[map->type], map, fp); 756 return maps__fixup_overlappings(&mg->maps, map, fp);
762} 757}
763 758
764/* 759/*
765 * XXX This should not really _copy_ te maps, but refcount them. 760 * XXX This should not really _copy_ te maps, but refcount them.
766 */ 761 */
767int map_groups__clone(struct thread *thread, 762int map_groups__clone(struct thread *thread, struct map_groups *parent)
768 struct map_groups *parent, enum map_type type)
769{ 763{
770 struct map_groups *mg = thread->mg; 764 struct map_groups *mg = thread->mg;
771 int err = -ENOMEM; 765 int err = -ENOMEM;
772 struct map *map; 766 struct map *map;
773 struct maps *maps = &parent->maps[type]; 767 struct maps *maps = &parent->maps;
774 768
775 down_read(&maps->lock); 769 down_read(&maps->lock);
776 770
@@ -877,15 +871,22 @@ struct map *map__next(struct map *map)
877 return NULL; 871 return NULL;
878} 872}
879 873
880struct kmap *map__kmap(struct map *map) 874struct kmap *__map__kmap(struct map *map)
881{ 875{
882 if (!map->dso || !map->dso->kernel) { 876 if (!map->dso || !map->dso->kernel)
883 pr_err("Internal error: map__kmap with a non-kernel map\n");
884 return NULL; 877 return NULL;
885 }
886 return (struct kmap *)(map + 1); 878 return (struct kmap *)(map + 1);
887} 879}
888 880
881struct kmap *map__kmap(struct map *map)
882{
883 struct kmap *kmap = __map__kmap(map);
884
885 if (!kmap)
886 pr_err("Internal error: map__kmap with a non-kernel map\n");
887 return kmap;
888}
889
889struct map_groups *map__kmaps(struct map *map) 890struct map_groups *map__kmaps(struct map *map)
890{ 891{
891 struct kmap *kmap = map__kmap(map); 892 struct kmap *kmap = map__kmap(map);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 0e9bbe01b0ab..4cb90f242bed 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -8,19 +8,11 @@
8#include <linux/rbtree.h> 8#include <linux/rbtree.h>
9#include <pthread.h> 9#include <pthread.h>
10#include <stdio.h> 10#include <stdio.h>
11#include <string.h>
11#include <stdbool.h> 12#include <stdbool.h>
12#include <linux/types.h> 13#include <linux/types.h>
13#include "rwsem.h" 14#include "rwsem.h"
14 15
15enum map_type {
16 MAP__FUNCTION = 0,
17 MAP__VARIABLE,
18};
19
20#define MAP__NR_TYPES (MAP__VARIABLE + 1)
21
22extern const char *map_type__name[MAP__NR_TYPES];
23
24struct dso; 16struct dso;
25struct ip_callchain; 17struct ip_callchain;
26struct ref_reloc_sym; 18struct ref_reloc_sym;
@@ -35,7 +27,6 @@ struct map {
35 }; 27 };
36 u64 start; 28 u64 start;
37 u64 end; 29 u64 end;
38 u8 /* enum map_type */ type;
39 bool erange_warned; 30 bool erange_warned;
40 u32 priv; 31 u32 priv;
41 u32 prot; 32 u32 prot;
@@ -56,9 +47,12 @@ struct map {
56 refcount_t refcnt; 47 refcount_t refcnt;
57}; 48};
58 49
50#define KMAP_NAME_LEN 256
51
59struct kmap { 52struct kmap {
60 struct ref_reloc_sym *ref_reloc_sym; 53 struct ref_reloc_sym *ref_reloc_sym;
61 struct map_groups *kmaps; 54 struct map_groups *kmaps;
55 char name[KMAP_NAME_LEN];
62}; 56};
63 57
64struct maps { 58struct maps {
@@ -67,7 +61,7 @@ struct maps {
67}; 61};
68 62
69struct map_groups { 63struct map_groups {
70 struct maps maps[MAP__NR_TYPES]; 64 struct maps maps;
71 struct machine *machine; 65 struct machine *machine;
72 refcount_t refcnt; 66 refcount_t refcnt;
73}; 67};
@@ -85,6 +79,7 @@ static inline struct map_groups *map_groups__get(struct map_groups *mg)
85 79
86void map_groups__put(struct map_groups *mg); 80void map_groups__put(struct map_groups *mg);
87 81
82struct kmap *__map__kmap(struct map *map);
88struct kmap *map__kmap(struct map *map); 83struct kmap *map__kmap(struct map *map);
89struct map_groups *map__kmaps(struct map *map); 84struct map_groups *map__kmaps(struct map *map);
90 85
@@ -125,7 +120,7 @@ struct thread;
125 * Note: caller must ensure map->dso is not NULL (map is loaded). 120 * Note: caller must ensure map->dso is not NULL (map is loaded).
126 */ 121 */
127#define map__for_each_symbol(map, pos, n) \ 122#define map__for_each_symbol(map, pos, n) \
128 dso__for_each_symbol(map->dso, pos, n, map->type) 123 dso__for_each_symbol(map->dso, pos, n)
129 124
130/* map__for_each_symbol_with_name - iterate over the symbols in the given map 125/* map__for_each_symbol_with_name - iterate over the symbols in the given map
131 * that have the given name 126 * that have the given name
@@ -144,13 +139,13 @@ struct thread;
144#define map__for_each_symbol_by_name(map, sym_name, pos) \ 139#define map__for_each_symbol_by_name(map, sym_name, pos) \
145 __map__for_each_symbol_by_name(map, sym_name, (pos)) 140 __map__for_each_symbol_by_name(map, sym_name, (pos))
146 141
147void map__init(struct map *map, enum map_type type, 142void map__init(struct map *map,
148 u64 start, u64 end, u64 pgoff, struct dso *dso); 143 u64 start, u64 end, u64 pgoff, struct dso *dso);
149struct map *map__new(struct machine *machine, u64 start, u64 len, 144struct map *map__new(struct machine *machine, u64 start, u64 len,
150 u64 pgoff, u32 d_maj, u32 d_min, u64 ino, 145 u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
151 u64 ino_gen, u32 prot, u32 flags, 146 u64 ino_gen, u32 prot, u32 flags,
152 char *filename, enum map_type type, struct thread *thread); 147 char *filename, struct thread *thread);
153struct map *map__new2(u64 start, struct dso *dso, enum map_type type); 148struct map *map__new2(u64 start, struct dso *dso);
154void map__delete(struct map *map); 149void map__delete(struct map *map);
155struct map *map__clone(struct map *map); 150struct map *map__clone(struct map *map);
156 151
@@ -174,6 +169,7 @@ static inline void __map__zput(struct map **map)
174int map__overlap(struct map *l, struct map *r); 169int map__overlap(struct map *l, struct map *r);
175size_t map__fprintf(struct map *map, FILE *fp); 170size_t map__fprintf(struct map *map, FILE *fp);
176size_t map__fprintf_dsoname(struct map *map, FILE *fp); 171size_t map__fprintf_dsoname(struct map *map, FILE *fp);
172char *map__srcline(struct map *map, u64 addr, struct symbol *sym);
177int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, 173int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
178 FILE *fp); 174 FILE *fp);
179 175
@@ -185,8 +181,6 @@ void map__fixup_end(struct map *map);
185 181
186void map__reloc_vmlinux(struct map *map); 182void map__reloc_vmlinux(struct map *map);
187 183
188size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
189 FILE *fp);
190void maps__insert(struct maps *maps, struct map *map); 184void maps__insert(struct maps *maps, struct map *map);
191void maps__remove(struct maps *maps, struct map *map); 185void maps__remove(struct maps *maps, struct map *map);
192struct map *maps__find(struct maps *maps, u64 addr); 186struct map *maps__find(struct maps *maps, u64 addr);
@@ -197,34 +191,29 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
197void map_groups__init(struct map_groups *mg, struct machine *machine); 191void map_groups__init(struct map_groups *mg, struct machine *machine);
198void map_groups__exit(struct map_groups *mg); 192void map_groups__exit(struct map_groups *mg);
199int map_groups__clone(struct thread *thread, 193int map_groups__clone(struct thread *thread,
200 struct map_groups *parent, enum map_type type); 194 struct map_groups *parent);
201size_t map_groups__fprintf(struct map_groups *mg, FILE *fp); 195size_t map_groups__fprintf(struct map_groups *mg, FILE *fp);
202 196
203int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name, 197int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
204 u64 addr); 198 u64 addr);
205 199
206static inline void map_groups__insert(struct map_groups *mg, struct map *map) 200static inline void map_groups__insert(struct map_groups *mg, struct map *map)
207{ 201{
208 maps__insert(&mg->maps[map->type], map); 202 maps__insert(&mg->maps, map);
209 map->groups = mg; 203 map->groups = mg;
210} 204}
211 205
212static inline void map_groups__remove(struct map_groups *mg, struct map *map) 206static inline void map_groups__remove(struct map_groups *mg, struct map *map)
213{ 207{
214 maps__remove(&mg->maps[map->type], map); 208 maps__remove(&mg->maps, map);
215} 209}
216 210
217static inline struct map *map_groups__find(struct map_groups *mg, 211static inline struct map *map_groups__find(struct map_groups *mg, u64 addr)
218 enum map_type type, u64 addr)
219{ 212{
220 return maps__find(&mg->maps[type], addr); 213 return maps__find(&mg->maps, addr);
221} 214}
222 215
223static inline struct map *map_groups__first(struct map_groups *mg, 216struct map *map_groups__first(struct map_groups *mg);
224 enum map_type type)
225{
226 return maps__first(&mg->maps[type]);
227}
228 217
229static inline struct map *map_groups__next(struct map *map) 218static inline struct map *map_groups__next(struct map *map)
230{ 219{
@@ -232,11 +221,9 @@ static inline struct map *map_groups__next(struct map *map)
232} 221}
233 222
234struct symbol *map_groups__find_symbol(struct map_groups *mg, 223struct symbol *map_groups__find_symbol(struct map_groups *mg,
235 enum map_type type, u64 addr, 224 u64 addr, struct map **mapp);
236 struct map **mapp);
237 225
238struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, 226struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
239 enum map_type type,
240 const char *name, 227 const char *name,
241 struct map **mapp); 228 struct map **mapp);
242 229
@@ -244,24 +231,26 @@ struct addr_map_symbol;
244 231
245int map_groups__find_ams(struct addr_map_symbol *ams); 232int map_groups__find_ams(struct addr_map_symbol *ams);
246 233
247static inline
248struct symbol *map_groups__find_function_by_name(struct map_groups *mg,
249 const char *name, struct map **mapp)
250{
251 return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp);
252}
253
254int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, 234int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
255 FILE *fp); 235 FILE *fp);
256 236
257struct map *map_groups__find_by_name(struct map_groups *mg, 237struct map *map_groups__find_by_name(struct map_groups *mg, const char *name);
258 enum map_type type, const char *name);
259 238
260bool __map__is_kernel(const struct map *map); 239bool __map__is_kernel(const struct map *map);
240bool __map__is_extra_kernel_map(const struct map *map);
261 241
262static inline bool __map__is_kmodule(const struct map *map) 242static inline bool __map__is_kmodule(const struct map *map)
263{ 243{
264 return !__map__is_kernel(map); 244 return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map);
245}
246
247bool map__has_symbols(const struct map *map);
248
249#define ENTRY_TRAMPOLINE_NAME "__entry_SYSCALL_64_trampoline"
250
251static inline bool is_entry_trampoline(const char *name)
252{
253 return !strcmp(name, ENTRY_TRAMPOLINE_NAME);
265} 254}
266 255
267#endif /* __PERF_MAP_H */ 256#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 2fb0272146d8..15eec49e71a1 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -156,13 +156,12 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
156 (strcmp(sys_dirent->d_name, ".")) && \ 156 (strcmp(sys_dirent->d_name, ".")) && \
157 (strcmp(sys_dirent->d_name, ".."))) 157 (strcmp(sys_dirent->d_name, "..")))
158 158
159static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) 159static int tp_event_has_id(const char *dir_path, struct dirent *evt_dir)
160{ 160{
161 char evt_path[MAXPATHLEN]; 161 char evt_path[MAXPATHLEN];
162 int fd; 162 int fd;
163 163
164 snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path, 164 snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, evt_dir->d_name);
165 sys_dir->d_name, evt_dir->d_name);
166 fd = open(evt_path, O_RDONLY); 165 fd = open(evt_path, O_RDONLY);
167 if (fd < 0) 166 if (fd < 0)
168 return -EINVAL; 167 return -EINVAL;
@@ -171,12 +170,12 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
171 return 0; 170 return 0;
172} 171}
173 172
174#define for_each_event(sys_dirent, evt_dir, evt_dirent) \ 173#define for_each_event(dir_path, evt_dir, evt_dirent) \
175 while ((evt_dirent = readdir(evt_dir)) != NULL) \ 174 while ((evt_dirent = readdir(evt_dir)) != NULL) \
176 if (evt_dirent->d_type == DT_DIR && \ 175 if (evt_dirent->d_type == DT_DIR && \
177 (strcmp(evt_dirent->d_name, ".")) && \ 176 (strcmp(evt_dirent->d_name, ".")) && \
178 (strcmp(evt_dirent->d_name, "..")) && \ 177 (strcmp(evt_dirent->d_name, "..")) && \
179 (!tp_event_has_id(sys_dirent, evt_dirent))) 178 (!tp_event_has_id(dir_path, evt_dirent)))
180 179
181#define MAX_EVENT_LENGTH 512 180#define MAX_EVENT_LENGTH 512
182 181
@@ -190,21 +189,21 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
190 int fd; 189 int fd;
191 u64 id; 190 u64 id;
192 char evt_path[MAXPATHLEN]; 191 char evt_path[MAXPATHLEN];
193 char dir_path[MAXPATHLEN]; 192 char *dir_path;
194 193
195 sys_dir = opendir(tracing_events_path); 194 sys_dir = tracing_events__opendir();
196 if (!sys_dir) 195 if (!sys_dir)
197 return NULL; 196 return NULL;
198 197
199 for_each_subsystem(sys_dir, sys_dirent) { 198 for_each_subsystem(sys_dir, sys_dirent) {
200 199 dir_path = get_events_file(sys_dirent->d_name);
201 snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, 200 if (!dir_path)
202 sys_dirent->d_name); 201 continue;
203 evt_dir = opendir(dir_path); 202 evt_dir = opendir(dir_path);
204 if (!evt_dir) 203 if (!evt_dir)
205 continue; 204 goto next;
206 205
207 for_each_event(sys_dirent, evt_dir, evt_dirent) { 206 for_each_event(dir_path, evt_dir, evt_dirent) {
208 207
209 scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, 208 scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path,
210 evt_dirent->d_name); 209 evt_dirent->d_name);
@@ -218,6 +217,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
218 close(fd); 217 close(fd);
219 id = atoll(id_buf); 218 id = atoll(id_buf);
220 if (id == config) { 219 if (id == config) {
220 put_events_file(dir_path);
221 closedir(evt_dir); 221 closedir(evt_dir);
222 closedir(sys_dir); 222 closedir(sys_dir);
223 path = zalloc(sizeof(*path)); 223 path = zalloc(sizeof(*path));
@@ -242,6 +242,8 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
242 } 242 }
243 } 243 }
244 closedir(evt_dir); 244 closedir(evt_dir);
245next:
246 put_events_file(dir_path);
245 } 247 }
246 248
247 closedir(sys_dir); 249 closedir(sys_dir);
@@ -512,14 +514,19 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
512 struct parse_events_error *err, 514 struct parse_events_error *err,
513 struct list_head *head_config) 515 struct list_head *head_config)
514{ 516{
515 char evt_path[MAXPATHLEN]; 517 char *evt_path;
516 struct dirent *evt_ent; 518 struct dirent *evt_ent;
517 DIR *evt_dir; 519 DIR *evt_dir;
518 int ret = 0, found = 0; 520 int ret = 0, found = 0;
519 521
520 snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name); 522 evt_path = get_events_file(sys_name);
523 if (!evt_path) {
524 tracepoint_error(err, errno, sys_name, evt_name);
525 return -1;
526 }
521 evt_dir = opendir(evt_path); 527 evt_dir = opendir(evt_path);
522 if (!evt_dir) { 528 if (!evt_dir) {
529 put_events_file(evt_path);
523 tracepoint_error(err, errno, sys_name, evt_name); 530 tracepoint_error(err, errno, sys_name, evt_name);
524 return -1; 531 return -1;
525 } 532 }
@@ -545,6 +552,7 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
545 ret = -1; 552 ret = -1;
546 } 553 }
547 554
555 put_events_file(evt_path);
548 closedir(evt_dir); 556 closedir(evt_dir);
549 return ret; 557 return ret;
550} 558}
@@ -570,7 +578,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
570 DIR *events_dir; 578 DIR *events_dir;
571 int ret = 0; 579 int ret = 0;
572 580
573 events_dir = opendir(tracing_events_path); 581 events_dir = tracing_events__opendir();
574 if (!events_dir) { 582 if (!events_dir) {
575 tracepoint_error(err, errno, sys_name, evt_name); 583 tracepoint_error(err, errno, sys_name, evt_name);
576 return -1; 584 return -1;
@@ -1219,13 +1227,16 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
1219 1227
1220int parse_events_add_pmu(struct parse_events_state *parse_state, 1228int parse_events_add_pmu(struct parse_events_state *parse_state,
1221 struct list_head *list, char *name, 1229 struct list_head *list, char *name,
1222 struct list_head *head_config, bool auto_merge_stats) 1230 struct list_head *head_config,
1231 bool auto_merge_stats,
1232 bool use_alias)
1223{ 1233{
1224 struct perf_event_attr attr; 1234 struct perf_event_attr attr;
1225 struct perf_pmu_info info; 1235 struct perf_pmu_info info;
1226 struct perf_pmu *pmu; 1236 struct perf_pmu *pmu;
1227 struct perf_evsel *evsel; 1237 struct perf_evsel *evsel;
1228 struct parse_events_error *err = parse_state->error; 1238 struct parse_events_error *err = parse_state->error;
1239 bool use_uncore_alias;
1229 LIST_HEAD(config_terms); 1240 LIST_HEAD(config_terms);
1230 1241
1231 pmu = perf_pmu__find(name); 1242 pmu = perf_pmu__find(name);
@@ -1244,11 +1255,14 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
1244 memset(&attr, 0, sizeof(attr)); 1255 memset(&attr, 0, sizeof(attr));
1245 } 1256 }
1246 1257
1258 use_uncore_alias = (pmu->is_uncore && use_alias);
1259
1247 if (!head_config) { 1260 if (!head_config) {
1248 attr.type = pmu->type; 1261 attr.type = pmu->type;
1249 evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats); 1262 evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats);
1250 if (evsel) { 1263 if (evsel) {
1251 evsel->pmu_name = name; 1264 evsel->pmu_name = name;
1265 evsel->use_uncore_alias = use_uncore_alias;
1252 return 0; 1266 return 0;
1253 } else { 1267 } else {
1254 return -ENOMEM; 1268 return -ENOMEM;
@@ -1282,6 +1296,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
1282 evsel->metric_expr = info.metric_expr; 1296 evsel->metric_expr = info.metric_expr;
1283 evsel->metric_name = info.metric_name; 1297 evsel->metric_name = info.metric_name;
1284 evsel->pmu_name = name; 1298 evsel->pmu_name = name;
1299 evsel->use_uncore_alias = use_uncore_alias;
1285 } 1300 }
1286 1301
1287 return evsel ? 0 : -ENOMEM; 1302 return evsel ? 0 : -ENOMEM;
@@ -1317,7 +1332,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
1317 list_add_tail(&term->list, head); 1332 list_add_tail(&term->list, head);
1318 1333
1319 if (!parse_events_add_pmu(parse_state, list, 1334 if (!parse_events_add_pmu(parse_state, list,
1320 pmu->name, head, true)) { 1335 pmu->name, head,
1336 true, true)) {
1321 pr_debug("%s -> %s/%s/\n", str, 1337 pr_debug("%s -> %s/%s/\n", str,
1322 pmu->name, alias->str); 1338 pmu->name, alias->str);
1323 ok++; 1339 ok++;
@@ -1339,7 +1355,120 @@ int parse_events__modifier_group(struct list_head *list,
1339 return parse_events__modifier_event(list, event_mod, true); 1355 return parse_events__modifier_event(list, event_mod, true);
1340} 1356}
1341 1357
1342void parse_events__set_leader(char *name, struct list_head *list) 1358/*
1359 * Check if the two uncore PMUs are from the same uncore block
1360 * The format of the uncore PMU name is uncore_#blockname_#pmuidx
1361 */
1362static bool is_same_uncore_block(const char *pmu_name_a, const char *pmu_name_b)
1363{
1364 char *end_a, *end_b;
1365
1366 end_a = strrchr(pmu_name_a, '_');
1367 end_b = strrchr(pmu_name_b, '_');
1368
1369 if (!end_a || !end_b)
1370 return false;
1371
1372 if ((end_a - pmu_name_a) != (end_b - pmu_name_b))
1373 return false;
1374
1375 return (strncmp(pmu_name_a, pmu_name_b, end_a - pmu_name_a) == 0);
1376}
1377
1378static int
1379parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list,
1380 struct parse_events_state *parse_state)
1381{
1382 struct perf_evsel *evsel, *leader;
1383 uintptr_t *leaders;
1384 bool is_leader = true;
1385 int i, nr_pmu = 0, total_members, ret = 0;
1386
1387 leader = list_first_entry(list, struct perf_evsel, node);
1388 evsel = list_last_entry(list, struct perf_evsel, node);
1389 total_members = evsel->idx - leader->idx + 1;
1390
1391 leaders = calloc(total_members, sizeof(uintptr_t));
1392 if (WARN_ON(!leaders))
1393 return 0;
1394
1395 /*
1396 * Going through the whole group and doing sanity check.
1397 * All members must use alias, and be from the same uncore block.
1398 * Also, storing the leader events in an array.
1399 */
1400 __evlist__for_each_entry(list, evsel) {
1401
1402 /* Only split the uncore group which members use alias */
1403 if (!evsel->use_uncore_alias)
1404 goto out;
1405
1406 /* The events must be from the same uncore block */
1407 if (!is_same_uncore_block(leader->pmu_name, evsel->pmu_name))
1408 goto out;
1409
1410 if (!is_leader)
1411 continue;
1412 /*
1413 * If the event's PMU name starts to repeat, it must be a new
1414 * event. That can be used to distinguish the leader from
1415 * other members, even they have the same event name.
1416 */
1417 if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) {
1418 is_leader = false;
1419 continue;
1420 }
1421 /* The name is always alias name */
1422 WARN_ON(strcmp(leader->name, evsel->name));
1423
1424 /* Store the leader event for each PMU */
1425 leaders[nr_pmu++] = (uintptr_t) evsel;
1426 }
1427
1428 /* only one event alias */
1429 if (nr_pmu == total_members) {
1430 parse_state->nr_groups--;
1431 goto handled;
1432 }
1433
1434 /*
1435 * An uncore event alias is a joint name which means the same event
1436 * runs on all PMUs of a block.
1437 * Perf doesn't support mixed events from different PMUs in the same
1438 * group. The big group has to be split into multiple small groups
1439 * which only include the events from the same PMU.
1440 *
1441 * Here the uncore event aliases must be from the same uncore block.
1442 * The number of PMUs must be same for each alias. The number of new
1443 * small groups equals to the number of PMUs.
1444 * Setting the leader event for corresponding members in each group.
1445 */
1446 i = 0;
1447 __evlist__for_each_entry(list, evsel) {
1448 if (i >= nr_pmu)
1449 i = 0;
1450 evsel->leader = (struct perf_evsel *) leaders[i++];
1451 }
1452
1453 /* The number of members and group name are same for each group */
1454 for (i = 0; i < nr_pmu; i++) {
1455 evsel = (struct perf_evsel *) leaders[i];
1456 evsel->nr_members = total_members / nr_pmu;
1457 evsel->group_name = name ? strdup(name) : NULL;
1458 }
1459
1460 /* Take the new small groups into account */
1461 parse_state->nr_groups += nr_pmu - 1;
1462
1463handled:
1464 ret = 1;
1465out:
1466 free(leaders);
1467 return ret;
1468}
1469
1470void parse_events__set_leader(char *name, struct list_head *list,
1471 struct parse_events_state *parse_state)
1343{ 1472{
1344 struct perf_evsel *leader; 1473 struct perf_evsel *leader;
1345 1474
@@ -1348,6 +1477,9 @@ void parse_events__set_leader(char *name, struct list_head *list)
1348 return; 1477 return;
1349 } 1478 }
1350 1479
1480 if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state))
1481 return;
1482
1351 __perf_evlist__set_leader(list); 1483 __perf_evlist__set_leader(list);
1352 leader = list_entry(list->next, struct perf_evsel, node); 1484 leader = list_entry(list->next, struct perf_evsel, node);
1353 leader->group_name = name ? strdup(name) : NULL; 1485 leader->group_name = name ? strdup(name) : NULL;
@@ -1715,7 +1847,7 @@ int parse_events(struct perf_evlist *evlist, const char *str,
1715 struct perf_evsel *last; 1847 struct perf_evsel *last;
1716 1848
1717 if (list_empty(&parse_state.list)) { 1849 if (list_empty(&parse_state.list)) {
1718 WARN_ONCE(true, "WARNING: event parser found nothing"); 1850 WARN_ONCE(true, "WARNING: event parser found nothing\n");
1719 return -1; 1851 return -1;
1720 } 1852 }
1721 1853
@@ -1968,13 +2100,13 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
1968 DIR *sys_dir, *evt_dir; 2100 DIR *sys_dir, *evt_dir;
1969 struct dirent *sys_dirent, *evt_dirent; 2101 struct dirent *sys_dirent, *evt_dirent;
1970 char evt_path[MAXPATHLEN]; 2102 char evt_path[MAXPATHLEN];
1971 char dir_path[MAXPATHLEN]; 2103 char *dir_path;
1972 char **evt_list = NULL; 2104 char **evt_list = NULL;
1973 unsigned int evt_i = 0, evt_num = 0; 2105 unsigned int evt_i = 0, evt_num = 0;
1974 bool evt_num_known = false; 2106 bool evt_num_known = false;
1975 2107
1976restart: 2108restart:
1977 sys_dir = opendir(tracing_events_path); 2109 sys_dir = tracing_events__opendir();
1978 if (!sys_dir) 2110 if (!sys_dir)
1979 return; 2111 return;
1980 2112
@@ -1989,13 +2121,14 @@ restart:
1989 !strglobmatch(sys_dirent->d_name, subsys_glob)) 2121 !strglobmatch(sys_dirent->d_name, subsys_glob))
1990 continue; 2122 continue;
1991 2123
1992 snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, 2124 dir_path = get_events_file(sys_dirent->d_name);
1993 sys_dirent->d_name); 2125 if (!dir_path)
2126 continue;
1994 evt_dir = opendir(dir_path); 2127 evt_dir = opendir(dir_path);
1995 if (!evt_dir) 2128 if (!evt_dir)
1996 continue; 2129 goto next;
1997 2130
1998 for_each_event(sys_dirent, evt_dir, evt_dirent) { 2131 for_each_event(dir_path, evt_dir, evt_dirent) {
1999 if (event_glob != NULL && 2132 if (event_glob != NULL &&
2000 !strglobmatch(evt_dirent->d_name, event_glob)) 2133 !strglobmatch(evt_dirent->d_name, event_glob))
2001 continue; 2134 continue;
@@ -2009,11 +2142,15 @@ restart:
2009 sys_dirent->d_name, evt_dirent->d_name); 2142 sys_dirent->d_name, evt_dirent->d_name);
2010 2143
2011 evt_list[evt_i] = strdup(evt_path); 2144 evt_list[evt_i] = strdup(evt_path);
2012 if (evt_list[evt_i] == NULL) 2145 if (evt_list[evt_i] == NULL) {
2146 put_events_file(dir_path);
2013 goto out_close_evt_dir; 2147 goto out_close_evt_dir;
2148 }
2014 evt_i++; 2149 evt_i++;
2015 } 2150 }
2016 closedir(evt_dir); 2151 closedir(evt_dir);
2152next:
2153 put_events_file(dir_path);
2017 } 2154 }
2018 closedir(sys_dir); 2155 closedir(sys_dir);
2019 2156
@@ -2061,21 +2198,21 @@ int is_valid_tracepoint(const char *event_string)
2061 DIR *sys_dir, *evt_dir; 2198 DIR *sys_dir, *evt_dir;
2062 struct dirent *sys_dirent, *evt_dirent; 2199 struct dirent *sys_dirent, *evt_dirent;
2063 char evt_path[MAXPATHLEN]; 2200 char evt_path[MAXPATHLEN];
2064 char dir_path[MAXPATHLEN]; 2201 char *dir_path;
2065 2202
2066 sys_dir = opendir(tracing_events_path); 2203 sys_dir = tracing_events__opendir();
2067 if (!sys_dir) 2204 if (!sys_dir)
2068 return 0; 2205 return 0;
2069 2206
2070 for_each_subsystem(sys_dir, sys_dirent) { 2207 for_each_subsystem(sys_dir, sys_dirent) {
2071 2208 dir_path = get_events_file(sys_dirent->d_name);
2072 snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, 2209 if (!dir_path)
2073 sys_dirent->d_name); 2210 continue;
2074 evt_dir = opendir(dir_path); 2211 evt_dir = opendir(dir_path);
2075 if (!evt_dir) 2212 if (!evt_dir)
2076 continue; 2213 goto next;
2077 2214
2078 for_each_event(sys_dirent, evt_dir, evt_dirent) { 2215 for_each_event(dir_path, evt_dir, evt_dirent) {
2079 snprintf(evt_path, MAXPATHLEN, "%s:%s", 2216 snprintf(evt_path, MAXPATHLEN, "%s:%s",
2080 sys_dirent->d_name, evt_dirent->d_name); 2217 sys_dirent->d_name, evt_dirent->d_name);
2081 if (!strcmp(evt_path, event_string)) { 2218 if (!strcmp(evt_path, event_string)) {
@@ -2085,6 +2222,8 @@ int is_valid_tracepoint(const char *event_string)
2085 } 2222 }
2086 } 2223 }
2087 closedir(evt_dir); 2224 closedir(evt_dir);
2225next:
2226 put_events_file(dir_path);
2088 } 2227 }
2089 closedir(sys_dir); 2228 closedir(sys_dir);
2090 return 0; 2229 return 0;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 5015cfd58277..4473dac27aee 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -167,7 +167,9 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
167 void *ptr, char *type, u64 len); 167 void *ptr, char *type, u64 len);
168int parse_events_add_pmu(struct parse_events_state *parse_state, 168int parse_events_add_pmu(struct parse_events_state *parse_state,
169 struct list_head *list, char *name, 169 struct list_head *list, char *name,
170 struct list_head *head_config, bool auto_merge_stats); 170 struct list_head *head_config,
171 bool auto_merge_stats,
172 bool use_alias);
171 173
172int parse_events_multi_pmu_add(struct parse_events_state *parse_state, 174int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
173 char *str, 175 char *str,
@@ -178,7 +180,8 @@ int parse_events_copy_term_list(struct list_head *old,
178 180
179enum perf_pmu_event_symbol_type 181enum perf_pmu_event_symbol_type
180perf_pmu__parse_check(const char *name); 182perf_pmu__parse_check(const char *name);
181void parse_events__set_leader(char *name, struct list_head *list); 183void parse_events__set_leader(char *name, struct list_head *list,
184 struct parse_events_state *parse_state);
182void parse_events_update_lists(struct list_head *list_event, 185void parse_events_update_lists(struct list_head *list_event,
183 struct list_head *list_all); 186 struct list_head *list_all);
184void parse_events_evlist_error(struct parse_events_state *parse_state, 187void parse_events_evlist_error(struct parse_events_state *parse_state,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index a1a01b1ac8b8..5f761f3ed0f3 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -53,7 +53,21 @@ static int str(yyscan_t scanner, int token)
53 YYSTYPE *yylval = parse_events_get_lval(scanner); 53 YYSTYPE *yylval = parse_events_get_lval(scanner);
54 char *text = parse_events_get_text(scanner); 54 char *text = parse_events_get_text(scanner);
55 55
56 yylval->str = strdup(text); 56 if (text[0] != '\'') {
57 yylval->str = strdup(text);
58 } else {
59 /*
60 * If a text tag specified on the command line
61 * contains opening single quite ' then it is
62 * expected that the tag ends with single quote
63 * as well, like this:
64 * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\'
65 * quotes need to be escaped to bypass shell
66 * processing.
67 */
68 yylval->str = strndup(&text[1], strlen(text) - 2);
69 }
70
57 return token; 71 return token;
58} 72}
59 73
@@ -176,6 +190,7 @@ num_dec [0-9]+
176num_hex 0x[a-fA-F0-9]+ 190num_hex 0x[a-fA-F0-9]+
177num_raw_hex [a-fA-F0-9]+ 191num_raw_hex [a-fA-F0-9]+
178name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]* 192name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]*
193name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\']
179name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* 194name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
180drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? 195drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
181/* If you add a modifier you need to update check_modifier() */ 196/* If you add a modifier you need to update check_modifier() */
@@ -344,6 +359,7 @@ r{num_raw_hex} { return raw(yyscanner); }
344{bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); } 359{bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); }
345{bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); } 360{bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); }
346{name} { return pmu_str_check(yyscanner); } 361{name} { return pmu_str_check(yyscanner); }
362{name_tag} { return str(yyscanner, PE_NAME); }
347"/" { BEGIN(config); return '/'; } 363"/" { BEGIN(config); return '/'; }
348- { return '-'; } 364- { return '-'; }
349, { BEGIN(event); return ','; } 365, { BEGIN(event); return ','; }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index d14464c42714..da8fe57691b8 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -73,6 +73,7 @@ static void inc_group_count(struct list_head *list,
73%type <num> value_sym 73%type <num> value_sym
74%type <head> event_config 74%type <head> event_config
75%type <head> opt_event_config 75%type <head> opt_event_config
76%type <head> opt_pmu_config
76%type <term> event_term 77%type <term> event_term
77%type <head> event_pmu 78%type <head> event_pmu
78%type <head> event_legacy_symbol 79%type <head> event_legacy_symbol
@@ -161,7 +162,7 @@ PE_NAME '{' events '}'
161 struct list_head *list = $3; 162 struct list_head *list = $3;
162 163
163 inc_group_count(list, _parse_state); 164 inc_group_count(list, _parse_state);
164 parse_events__set_leader($1, list); 165 parse_events__set_leader($1, list, _parse_state);
165 $$ = list; 166 $$ = list;
166} 167}
167| 168|
@@ -170,7 +171,7 @@ PE_NAME '{' events '}'
170 struct list_head *list = $2; 171 struct list_head *list = $2;
171 172
172 inc_group_count(list, _parse_state); 173 inc_group_count(list, _parse_state);
173 parse_events__set_leader(NULL, list); 174 parse_events__set_leader(NULL, list, _parse_state);
174 $$ = list; 175 $$ = list;
175} 176}
176 177
@@ -224,15 +225,20 @@ event_def: event_pmu |
224 event_bpf_file 225 event_bpf_file
225 226
226event_pmu: 227event_pmu:
227PE_NAME '/' event_config '/' 228PE_NAME opt_pmu_config
228{ 229{
230 struct parse_events_state *parse_state = _parse_state;
231 struct parse_events_error *error = parse_state->error;
229 struct list_head *list, *orig_terms, *terms; 232 struct list_head *list, *orig_terms, *terms;
230 233
231 if (parse_events_copy_term_list($3, &orig_terms)) 234 if (parse_events_copy_term_list($2, &orig_terms))
232 YYABORT; 235 YYABORT;
233 236
237 if (error)
238 error->idx = @1.first_column;
239
234 ALLOC_LIST(list); 240 ALLOC_LIST(list);
235 if (parse_events_add_pmu(_parse_state, list, $1, $3, false)) { 241 if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) {
236 struct perf_pmu *pmu = NULL; 242 struct perf_pmu *pmu = NULL;
237 int ok = 0; 243 int ok = 0;
238 char *pattern; 244 char *pattern;
@@ -251,7 +257,7 @@ PE_NAME '/' event_config '/'
251 free(pattern); 257 free(pattern);
252 YYABORT; 258 YYABORT;
253 } 259 }
254 if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true)) 260 if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false))
255 ok++; 261 ok++;
256 parse_events_terms__delete(terms); 262 parse_events_terms__delete(terms);
257 } 263 }
@@ -262,7 +268,7 @@ PE_NAME '/' event_config '/'
262 if (!ok) 268 if (!ok)
263 YYABORT; 269 YYABORT;
264 } 270 }
265 parse_events_terms__delete($3); 271 parse_events_terms__delete($2);
266 parse_events_terms__delete(orig_terms); 272 parse_events_terms__delete(orig_terms);
267 $$ = list; 273 $$ = list;
268} 274}
@@ -496,6 +502,17 @@ opt_event_config:
496 $$ = NULL; 502 $$ = NULL;
497} 503}
498 504
505opt_pmu_config:
506'/' event_config '/'
507{
508 $$ = $2;
509}
510|
511'/' '/'
512{
513 $$ = NULL;
514}
515
499start_terms: event_config 516start_terms: event_config
500{ 517{
501 struct parse_events_state *parse_state = _parse_state; 518 struct parse_events_state *parse_state = _parse_state;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index e1dbc9821617..f119eb628dbb 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -111,17 +111,6 @@ void exit_probe_symbol_maps(void)
111 symbol__exit(); 111 symbol__exit();
112} 112}
113 113
114static struct symbol *__find_kernel_function_by_name(const char *name,
115 struct map **mapp)
116{
117 return machine__find_kernel_function_by_name(host_machine, name, mapp);
118}
119
120static struct symbol *__find_kernel_function(u64 addr, struct map **mapp)
121{
122 return machine__find_kernel_function(host_machine, addr, mapp);
123}
124
125static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void) 114static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
126{ 115{
127 /* kmap->ref_reloc_sym should be set if host_machine is initialized */ 116 /* kmap->ref_reloc_sym should be set if host_machine is initialized */
@@ -149,7 +138,7 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
149 if (reloc_sym && strcmp(name, reloc_sym->name) == 0) 138 if (reloc_sym && strcmp(name, reloc_sym->name) == 0)
150 *addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr; 139 *addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr;
151 else { 140 else {
152 sym = __find_kernel_function_by_name(name, &map); 141 sym = machine__find_kernel_symbol_by_name(host_machine, name, &map);
153 if (!sym) 142 if (!sym)
154 return -ENOENT; 143 return -ENOENT;
155 *addr = map->unmap_ip(map, sym->start) - 144 *addr = map->unmap_ip(map, sym->start) -
@@ -161,8 +150,7 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
161 150
162static struct map *kernel_get_module_map(const char *module) 151static struct map *kernel_get_module_map(const char *module)
163{ 152{
164 struct map_groups *grp = &host_machine->kmaps; 153 struct maps *maps = machine__kernel_maps(host_machine);
165 struct maps *maps = &grp->maps[MAP__FUNCTION];
166 struct map *pos; 154 struct map *pos;
167 155
168 /* A file path -- this is an offline module */ 156 /* A file path -- this is an offline module */
@@ -177,8 +165,7 @@ static struct map *kernel_get_module_map(const char *module)
177 if (strncmp(pos->dso->short_name + 1, module, 165 if (strncmp(pos->dso->short_name + 1, module,
178 pos->dso->short_name_len - 2) == 0 && 166 pos->dso->short_name_len - 2) == 0 &&
179 module[pos->dso->short_name_len - 2] == '\0') { 167 module[pos->dso->short_name_len - 2] == '\0') {
180 map__get(pos); 168 return map__get(pos);
181 return pos;
182 } 169 }
183 } 170 }
184 return NULL; 171 return NULL;
@@ -341,7 +328,7 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso)
341 char module_name[128]; 328 char module_name[128];
342 329
343 snprintf(module_name, sizeof(module_name), "[%s]", module); 330 snprintf(module_name, sizeof(module_name), "[%s]", module);
344 map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name); 331 map = map_groups__find_by_name(&host_machine->kmaps, module_name);
345 if (map) { 332 if (map) {
346 dso = map->dso; 333 dso = map->dso;
347 goto found; 334 goto found;
@@ -2098,7 +2085,7 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
2098 } 2085 }
2099 if (addr) { 2086 if (addr) {
2100 addr += tp->offset; 2087 addr += tp->offset;
2101 sym = __find_kernel_function(addr, &map); 2088 sym = machine__find_kernel_symbol(host_machine, addr, &map);
2102 } 2089 }
2103 } 2090 }
2104 2091
@@ -3504,19 +3491,18 @@ int show_available_funcs(const char *target, struct nsinfo *nsi,
3504 (target) ? : "kernel"); 3491 (target) ? : "kernel");
3505 goto end; 3492 goto end;
3506 } 3493 }
3507 if (!dso__sorted_by_name(map->dso, map->type)) 3494 if (!dso__sorted_by_name(map->dso))
3508 dso__sort_by_name(map->dso, map->type); 3495 dso__sort_by_name(map->dso);
3509 3496
3510 /* Show all (filtered) symbols */ 3497 /* Show all (filtered) symbols */
3511 setup_pager(); 3498 setup_pager();
3512 3499
3513 for (nd = rb_first(&map->dso->symbol_names[map->type]); nd; nd = rb_next(nd)) { 3500 for (nd = rb_first(&map->dso->symbol_names); nd; nd = rb_next(nd)) {
3514 struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); 3501 struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
3515 3502
3516 if (strfilter__compare(_filter, pos->sym.name)) 3503 if (strfilter__compare(_filter, pos->sym.name))
3517 printf("%s\n", pos->sym.name); 3504 printf("%s\n", pos->sym.name);
3518 } 3505 }
3519
3520end: 3506end:
3521 map__put(map); 3507 map__put(map);
3522 exit_probe_symbol_maps(); 3508 exit_probe_symbol_maps();
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 4ae1123c6794..b76088fadf3d 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -84,8 +84,7 @@ int open_trace_file(const char *trace_file, bool readwrite)
84 char buf[PATH_MAX]; 84 char buf[PATH_MAX];
85 int ret; 85 int ret;
86 86
87 ret = e_snprintf(buf, PATH_MAX, "%s/%s", 87 ret = e_snprintf(buf, PATH_MAX, "%s/%s", tracing_path_mount(), trace_file);
88 tracing_path, trace_file);
89 if (ret >= 0) { 88 if (ret >= 0) {
90 pr_debug("Opening %s write=%d\n", buf, readwrite); 89 pr_debug("Opening %s write=%d\n", buf, readwrite);
91 if (readwrite && !probe_event_dry_run) 90 if (readwrite && !probe_event_dry_run)
diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c
deleted file mode 100644
index 22eaa201aa27..000000000000
--- a/tools/perf/util/quote.c
+++ /dev/null
@@ -1,62 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <errno.h>
3#include <stdlib.h>
4#include "strbuf.h"
5#include "quote.h"
6#include "util.h"
7
8/* Help to copy the thing properly quoted for the shell safety.
9 * any single quote is replaced with '\'', any exclamation point
10 * is replaced with '\!', and the whole thing is enclosed in a
11 *
12 * E.g.
13 * original sq_quote result
14 * name ==> name ==> 'name'
15 * a b ==> a b ==> 'a b'
16 * a'b ==> a'\''b ==> 'a'\''b'
17 * a!b ==> a'\!'b ==> 'a'\!'b'
18 */
19static inline int need_bs_quote(char c)
20{
21 return (c == '\'' || c == '!');
22}
23
24static int sq_quote_buf(struct strbuf *dst, const char *src)
25{
26 char *to_free = NULL;
27 int ret;
28
29 if (dst->buf == src)
30 to_free = strbuf_detach(dst, NULL);
31
32 ret = strbuf_addch(dst, '\'');
33 while (!ret && *src) {
34 size_t len = strcspn(src, "'!");
35 ret = strbuf_add(dst, src, len);
36 src += len;
37 while (!ret && need_bs_quote(*src))
38 ret = strbuf_addf(dst, "'\\%c\'", *src++);
39 }
40 if (!ret)
41 ret = strbuf_addch(dst, '\'');
42 free(to_free);
43
44 return ret;
45}
46
47int sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
48{
49 int i, ret;
50
51 /* Copy into destination buffer. */
52 ret = strbuf_grow(dst, 255);
53 for (i = 0; !ret && argv[i]; ++i) {
54 ret = strbuf_addch(dst, ' ');
55 if (ret)
56 break;
57 ret = sq_quote_buf(dst, argv[i]);
58 if (maxlen && dst->len > maxlen)
59 return -ENOSPC;
60 }
61 return ret;
62}
diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h
deleted file mode 100644
index 274bf26d3511..000000000000
--- a/tools/perf/util/quote.h
+++ /dev/null
@@ -1,31 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __PERF_QUOTE_H
3#define __PERF_QUOTE_H
4
5#include <stddef.h>
6
7/* Help to copy the thing properly quoted for the shell safety.
8 * any single quote is replaced with '\'', any exclamation point
9 * is replaced with '\!', and the whole thing is enclosed in a
10 * single quote pair.
11 *
12 * For example, if you are passing the result to system() as an
13 * argument:
14 *
15 * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1))
16 *
17 * would be appropriate. If the system() is going to call ssh to
18 * run the command on the other side:
19 *
20 * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1));
21 * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd));
22 *
23 * Note that the above examples leak memory! Remember to free result from
24 * sq_quote() in a real application.
25 */
26
27struct strbuf;
28
29int sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
30
31#endif /* __PERF_QUOTE_H */
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 10dd5fce082b..46e9e19ab1ac 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -48,6 +48,7 @@
48#include "cpumap.h" 48#include "cpumap.h"
49#include "print_binary.h" 49#include "print_binary.h"
50#include "stat.h" 50#include "stat.h"
51#include "mem-events.h"
51 52
52#if PY_MAJOR_VERSION < 3 53#if PY_MAJOR_VERSION < 3
53#define _PyUnicode_FromString(arg) \ 54#define _PyUnicode_FromString(arg) \
@@ -372,6 +373,19 @@ static PyObject *get_field_numeric_entry(struct event_format *event,
372 return obj; 373 return obj;
373} 374}
374 375
376static const char *get_dsoname(struct map *map)
377{
378 const char *dsoname = "[unknown]";
379
380 if (map && map->dso) {
381 if (symbol_conf.show_kernel_path && map->dso->long_name)
382 dsoname = map->dso->long_name;
383 else
384 dsoname = map->dso->name;
385 }
386
387 return dsoname;
388}
375 389
376static PyObject *python_process_callchain(struct perf_sample *sample, 390static PyObject *python_process_callchain(struct perf_sample *sample,
377 struct perf_evsel *evsel, 391 struct perf_evsel *evsel,
@@ -427,14 +441,8 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
427 } 441 }
428 442
429 if (node->map) { 443 if (node->map) {
430 struct map *map = node->map; 444 const char *dsoname = get_dsoname(node->map);
431 const char *dsoname = "[unknown]"; 445
432 if (map && map->dso) {
433 if (symbol_conf.show_kernel_path && map->dso->long_name)
434 dsoname = map->dso->long_name;
435 else
436 dsoname = map->dso->name;
437 }
438 pydict_set_item_string_decref(pyelem, "dso", 446 pydict_set_item_string_decref(pyelem, "dso",
439 _PyUnicode_FromString(dsoname)); 447 _PyUnicode_FromString(dsoname));
440 } 448 }
@@ -448,6 +456,166 @@ exit:
448 return pylist; 456 return pylist;
449} 457}
450 458
459static PyObject *python_process_brstack(struct perf_sample *sample,
460 struct thread *thread)
461{
462 struct branch_stack *br = sample->branch_stack;
463 PyObject *pylist;
464 u64 i;
465
466 pylist = PyList_New(0);
467 if (!pylist)
468 Py_FatalError("couldn't create Python list");
469
470 if (!(br && br->nr))
471 goto exit;
472
473 for (i = 0; i < br->nr; i++) {
474 PyObject *pyelem;
475 struct addr_location al;
476 const char *dsoname;
477
478 pyelem = PyDict_New();
479 if (!pyelem)
480 Py_FatalError("couldn't create Python dictionary");
481
482 pydict_set_item_string_decref(pyelem, "from",
483 PyLong_FromUnsignedLongLong(br->entries[i].from));
484 pydict_set_item_string_decref(pyelem, "to",
485 PyLong_FromUnsignedLongLong(br->entries[i].to));
486 pydict_set_item_string_decref(pyelem, "mispred",
487 PyBool_FromLong(br->entries[i].flags.mispred));
488 pydict_set_item_string_decref(pyelem, "predicted",
489 PyBool_FromLong(br->entries[i].flags.predicted));
490 pydict_set_item_string_decref(pyelem, "in_tx",
491 PyBool_FromLong(br->entries[i].flags.in_tx));
492 pydict_set_item_string_decref(pyelem, "abort",
493 PyBool_FromLong(br->entries[i].flags.abort));
494 pydict_set_item_string_decref(pyelem, "cycles",
495 PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles));
496
497 thread__find_map(thread, sample->cpumode,
498 br->entries[i].from, &al);
499 dsoname = get_dsoname(al.map);
500 pydict_set_item_string_decref(pyelem, "from_dsoname",
501 _PyUnicode_FromString(dsoname));
502
503 thread__find_map(thread, sample->cpumode,
504 br->entries[i].to, &al);
505 dsoname = get_dsoname(al.map);
506 pydict_set_item_string_decref(pyelem, "to_dsoname",
507 _PyUnicode_FromString(dsoname));
508
509 PyList_Append(pylist, pyelem);
510 Py_DECREF(pyelem);
511 }
512
513exit:
514 return pylist;
515}
516
517static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
518{
519 unsigned long offset;
520
521 if (al->addr < sym->end)
522 offset = al->addr - sym->start;
523 else
524 offset = al->addr - al->map->start - sym->start;
525
526 return offset;
527}
528
529static int get_symoff(struct symbol *sym, struct addr_location *al,
530 bool print_off, char *bf, int size)
531{
532 unsigned long offset;
533
534 if (!sym || !sym->name[0])
535 return scnprintf(bf, size, "%s", "[unknown]");
536
537 if (!print_off)
538 return scnprintf(bf, size, "%s", sym->name);
539
540 offset = get_offset(sym, al);
541
542 return scnprintf(bf, size, "%s+0x%x", sym->name, offset);
543}
544
545static int get_br_mspred(struct branch_flags *flags, char *bf, int size)
546{
547 if (!flags->mispred && !flags->predicted)
548 return scnprintf(bf, size, "%s", "-");
549
550 if (flags->mispred)
551 return scnprintf(bf, size, "%s", "M");
552
553 return scnprintf(bf, size, "%s", "P");
554}
555
556static PyObject *python_process_brstacksym(struct perf_sample *sample,
557 struct thread *thread)
558{
559 struct branch_stack *br = sample->branch_stack;
560 PyObject *pylist;
561 u64 i;
562 char bf[512];
563 struct addr_location al;
564
565 pylist = PyList_New(0);
566 if (!pylist)
567 Py_FatalError("couldn't create Python list");
568
569 if (!(br && br->nr))
570 goto exit;
571
572 for (i = 0; i < br->nr; i++) {
573 PyObject *pyelem;
574
575 pyelem = PyDict_New();
576 if (!pyelem)
577 Py_FatalError("couldn't create Python dictionary");
578
579 thread__find_symbol(thread, sample->cpumode,
580 br->entries[i].from, &al);
581 get_symoff(al.sym, &al, true, bf, sizeof(bf));
582 pydict_set_item_string_decref(pyelem, "from",
583 _PyUnicode_FromString(bf));
584
585 thread__find_symbol(thread, sample->cpumode,
586 br->entries[i].to, &al);
587 get_symoff(al.sym, &al, true, bf, sizeof(bf));
588 pydict_set_item_string_decref(pyelem, "to",
589 _PyUnicode_FromString(bf));
590
591 get_br_mspred(&br->entries[i].flags, bf, sizeof(bf));
592 pydict_set_item_string_decref(pyelem, "pred",
593 _PyUnicode_FromString(bf));
594
595 if (br->entries[i].flags.in_tx) {
596 pydict_set_item_string_decref(pyelem, "in_tx",
597 _PyUnicode_FromString("X"));
598 } else {
599 pydict_set_item_string_decref(pyelem, "in_tx",
600 _PyUnicode_FromString("-"));
601 }
602
603 if (br->entries[i].flags.abort) {
604 pydict_set_item_string_decref(pyelem, "abort",
605 _PyUnicode_FromString("A"));
606 } else {
607 pydict_set_item_string_decref(pyelem, "abort",
608 _PyUnicode_FromString("-"));
609 }
610
611 PyList_Append(pylist, pyelem);
612 Py_DECREF(pyelem);
613 }
614
615exit:
616 return pylist;
617}
618
451static PyObject *get_sample_value_as_tuple(struct sample_read_value *value) 619static PyObject *get_sample_value_as_tuple(struct sample_read_value *value)
452{ 620{
453 PyObject *t; 621 PyObject *t;
@@ -498,12 +666,63 @@ static void set_sample_read_in_dict(PyObject *dict_sample,
498 pydict_set_item_string_decref(dict_sample, "values", values); 666 pydict_set_item_string_decref(dict_sample, "values", values);
499} 667}
500 668
669static void set_sample_datasrc_in_dict(PyObject *dict,
670 struct perf_sample *sample)
671{
672 struct mem_info mi = { .data_src.val = sample->data_src };
673 char decode[100];
674
675 pydict_set_item_string_decref(dict, "datasrc",
676 PyLong_FromUnsignedLongLong(sample->data_src));
677
678 perf_script__meminfo_scnprintf(decode, 100, &mi);
679
680 pydict_set_item_string_decref(dict, "datasrc_decode",
681 _PyUnicode_FromString(decode));
682}
683
684static int regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
685{
686 unsigned int i = 0, r;
687 int printed = 0;
688
689 bf[0] = 0;
690
691 for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
692 u64 val = regs->regs[i++];
693
694 printed += scnprintf(bf + printed, size - printed,
695 "%5s:0x%" PRIx64 " ",
696 perf_reg_name(r), val);
697 }
698
699 return printed;
700}
701
702static void set_regs_in_dict(PyObject *dict,
703 struct perf_sample *sample,
704 struct perf_evsel *evsel)
705{
706 struct perf_event_attr *attr = &evsel->attr;
707 char bf[512];
708
709 regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf));
710
711 pydict_set_item_string_decref(dict, "iregs",
712 _PyUnicode_FromString(bf));
713
714 regs_map(&sample->user_regs, attr->sample_regs_user, bf, sizeof(bf));
715
716 pydict_set_item_string_decref(dict, "uregs",
717 _PyUnicode_FromString(bf));
718}
719
501static PyObject *get_perf_sample_dict(struct perf_sample *sample, 720static PyObject *get_perf_sample_dict(struct perf_sample *sample,
502 struct perf_evsel *evsel, 721 struct perf_evsel *evsel,
503 struct addr_location *al, 722 struct addr_location *al,
504 PyObject *callchain) 723 PyObject *callchain)
505{ 724{
506 PyObject *dict, *dict_sample; 725 PyObject *dict, *dict_sample, *brstack, *brstacksym;
507 726
508 dict = PyDict_New(); 727 dict = PyDict_New();
509 if (!dict) 728 if (!dict)
@@ -531,7 +750,14 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
531 PyLong_FromUnsignedLongLong(sample->period)); 750 PyLong_FromUnsignedLongLong(sample->period));
532 pydict_set_item_string_decref(dict_sample, "phys_addr", 751 pydict_set_item_string_decref(dict_sample, "phys_addr",
533 PyLong_FromUnsignedLongLong(sample->phys_addr)); 752 PyLong_FromUnsignedLongLong(sample->phys_addr));
753 pydict_set_item_string_decref(dict_sample, "addr",
754 PyLong_FromUnsignedLongLong(sample->addr));
534 set_sample_read_in_dict(dict_sample, sample, evsel); 755 set_sample_read_in_dict(dict_sample, sample, evsel);
756 pydict_set_item_string_decref(dict_sample, "weight",
757 PyLong_FromUnsignedLongLong(sample->weight));
758 pydict_set_item_string_decref(dict_sample, "transaction",
759 PyLong_FromUnsignedLongLong(sample->transaction));
760 set_sample_datasrc_in_dict(dict_sample, sample);
535 pydict_set_item_string_decref(dict, "sample", dict_sample); 761 pydict_set_item_string_decref(dict, "sample", dict_sample);
536 762
537 pydict_set_item_string_decref(dict, "raw_buf", _PyBytes_FromStringAndSize( 763 pydict_set_item_string_decref(dict, "raw_buf", _PyBytes_FromStringAndSize(
@@ -549,6 +775,14 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
549 775
550 pydict_set_item_string_decref(dict, "callchain", callchain); 776 pydict_set_item_string_decref(dict, "callchain", callchain);
551 777
778 brstack = python_process_brstack(sample, al->thread);
779 pydict_set_item_string_decref(dict, "brstack", brstack);
780
781 brstacksym = python_process_brstacksym(sample, al->thread);
782 pydict_set_item_string_decref(dict, "brstacksym", brstacksym);
783
784 set_regs_in_dict(dict, sample, evsel);
785
552 return dict; 786 return dict;
553} 787}
554 788
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index f4a7a437ee87..8b9369303561 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1094,7 +1094,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
1094 1094
1095 sample_type = evsel->attr.sample_type; 1095 sample_type = evsel->attr.sample_type;
1096 1096
1097 if (sample_type & PERF_SAMPLE_CALLCHAIN) 1097 if (evsel__has_callchain(evsel))
1098 callchain__printf(evsel, sample); 1098 callchain__printf(evsel, sample);
1099 1099
1100 if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel)) 1100 if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel))
@@ -1973,12 +1973,11 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg)
1973 return false; 1973 return false;
1974} 1974}
1975 1975
1976int maps__set_kallsyms_ref_reloc_sym(struct map **maps, 1976int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
1977 const char *symbol_name, u64 addr)
1978{ 1977{
1979 char *bracket; 1978 char *bracket;
1980 int i;
1981 struct ref_reloc_sym *ref; 1979 struct ref_reloc_sym *ref;
1980 struct kmap *kmap;
1982 1981
1983 ref = zalloc(sizeof(struct ref_reloc_sym)); 1982 ref = zalloc(sizeof(struct ref_reloc_sym));
1984 if (ref == NULL) 1983 if (ref == NULL)
@@ -1996,13 +1995,9 @@ int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
1996 1995
1997 ref->addr = addr; 1996 ref->addr = addr;
1998 1997
1999 for (i = 0; i < MAP__NR_TYPES; ++i) { 1998 kmap = map__kmap(map);
2000 struct kmap *kmap = map__kmap(maps[i]); 1999 if (kmap)
2001
2002 if (!kmap)
2003 continue;
2004 kmap->ref_reloc_sym = ref; 2000 kmap->ref_reloc_sym = ref;
2005 }
2006 2001
2007 return 0; 2002 return 0;
2008} 2003}
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 26a68dfd8a4f..fed2952ab45a 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2,7 +2,7 @@
2#include <errno.h> 2#include <errno.h>
3#include <inttypes.h> 3#include <inttypes.h>
4#include <regex.h> 4#include <regex.h>
5#include <sys/mman.h> 5#include <linux/mman.h>
6#include "sort.h" 6#include "sort.h"
7#include "hist.h" 7#include "hist.h"
8#include "comm.h" 8#include "comm.h"
@@ -282,7 +282,7 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
282 282
283 ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); 283 ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
284 if (sym && map) { 284 if (sym && map) {
285 if (map->type == MAP__VARIABLE) { 285 if (sym->type == STT_OBJECT) {
286 ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name); 286 ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
287 ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx", 287 ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
288 ip - map->unmap_ip(map, sym->start)); 288 ip - map->unmap_ip(map, sym->start));
@@ -331,24 +331,18 @@ struct sort_entry sort_sym = {
331 331
332/* --sort srcline */ 332/* --sort srcline */
333 333
334char *hist_entry__get_srcline(struct hist_entry *he) 334char *hist_entry__srcline(struct hist_entry *he)
335{ 335{
336 struct map *map = he->ms.map; 336 return map__srcline(he->ms.map, he->ip, he->ms.sym);
337
338 if (!map)
339 return SRCLINE_UNKNOWN;
340
341 return get_srcline(map->dso, map__rip_2objdump(map, he->ip),
342 he->ms.sym, true, true, he->ip);
343} 337}
344 338
345static int64_t 339static int64_t
346sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) 340sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
347{ 341{
348 if (!left->srcline) 342 if (!left->srcline)
349 left->srcline = hist_entry__get_srcline(left); 343 left->srcline = hist_entry__srcline(left);
350 if (!right->srcline) 344 if (!right->srcline)
351 right->srcline = hist_entry__get_srcline(right); 345 right->srcline = hist_entry__srcline(right);
352 346
353 return strcmp(right->srcline, left->srcline); 347 return strcmp(right->srcline, left->srcline);
354} 348}
@@ -357,7 +351,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
357 size_t size, unsigned int width) 351 size_t size, unsigned int width)
358{ 352{
359 if (!he->srcline) 353 if (!he->srcline)
360 he->srcline = hist_entry__get_srcline(he); 354 he->srcline = hist_entry__srcline(he);
361 355
362 return repsep_snprintf(bf, size, "%-.*s", width, he->srcline); 356 return repsep_snprintf(bf, size, "%-.*s", width, he->srcline);
363} 357}
@@ -371,33 +365,20 @@ struct sort_entry sort_srcline = {
371 365
372/* --sort srcline_from */ 366/* --sort srcline_from */
373 367
368static char *addr_map_symbol__srcline(struct addr_map_symbol *ams)
369{
370 return map__srcline(ams->map, ams->al_addr, ams->sym);
371}
372
374static int64_t 373static int64_t
375sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) 374sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right)
376{ 375{
377 if (!left->branch_info->srcline_from) { 376 if (!left->branch_info->srcline_from)
378 struct map *map = left->branch_info->from.map; 377 left->branch_info->srcline_from = addr_map_symbol__srcline(&left->branch_info->from);
379 if (!map) 378
380 left->branch_info->srcline_from = SRCLINE_UNKNOWN; 379 if (!right->branch_info->srcline_from)
381 else 380 right->branch_info->srcline_from = addr_map_symbol__srcline(&right->branch_info->from);
382 left->branch_info->srcline_from = get_srcline(map->dso, 381
383 map__rip_2objdump(map,
384 left->branch_info->from.al_addr),
385 left->branch_info->from.sym,
386 true, true,
387 left->branch_info->from.al_addr);
388 }
389 if (!right->branch_info->srcline_from) {
390 struct map *map = right->branch_info->from.map;
391 if (!map)
392 right->branch_info->srcline_from = SRCLINE_UNKNOWN;
393 else
394 right->branch_info->srcline_from = get_srcline(map->dso,
395 map__rip_2objdump(map,
396 right->branch_info->from.al_addr),
397 right->branch_info->from.sym,
398 true, true,
399 right->branch_info->from.al_addr);
400 }
401 return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from); 382 return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from);
402} 383}
403 384
@@ -419,30 +400,12 @@ struct sort_entry sort_srcline_from = {
419static int64_t 400static int64_t
420sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) 401sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right)
421{ 402{
422 if (!left->branch_info->srcline_to) { 403 if (!left->branch_info->srcline_to)
423 struct map *map = left->branch_info->to.map; 404 left->branch_info->srcline_to = addr_map_symbol__srcline(&left->branch_info->to);
424 if (!map) 405
425 left->branch_info->srcline_to = SRCLINE_UNKNOWN; 406 if (!right->branch_info->srcline_to)
426 else 407 right->branch_info->srcline_to = addr_map_symbol__srcline(&right->branch_info->to);
427 left->branch_info->srcline_to = get_srcline(map->dso, 408
428 map__rip_2objdump(map,
429 left->branch_info->to.al_addr),
430 left->branch_info->from.sym,
431 true, true,
432 left->branch_info->to.al_addr);
433 }
434 if (!right->branch_info->srcline_to) {
435 struct map *map = right->branch_info->to.map;
436 if (!map)
437 right->branch_info->srcline_to = SRCLINE_UNKNOWN;
438 else
439 right->branch_info->srcline_to = get_srcline(map->dso,
440 map__rip_2objdump(map,
441 right->branch_info->to.al_addr),
442 right->branch_info->to.sym,
443 true, true,
444 right->branch_info->to.al_addr);
445 }
446 return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to); 409 return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to);
447} 410}
448 411
@@ -1211,7 +1174,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
1211 1174
1212 /* print [s] for shared data mmaps */ 1175 /* print [s] for shared data mmaps */
1213 if ((he->cpumode != PERF_RECORD_MISC_KERNEL) && 1176 if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
1214 map && (map->type == MAP__VARIABLE) && 1177 map && !(map->prot & PROT_EXEC) &&
1215 (map->flags & MAP_SHARED) && 1178 (map->flags & MAP_SHARED) &&
1216 (map->maj || map->min || map->ino || 1179 (map->maj || map->min || map->ino ||
1217 map->ino_generation)) 1180 map->ino_generation))
@@ -2582,7 +2545,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
2582 if (sort__mode != SORT_MODE__MEMORY) 2545 if (sort__mode != SORT_MODE__MEMORY)
2583 return -EINVAL; 2546 return -EINVAL;
2584 2547
2585 if (sd->entry == &sort_mem_dcacheline && cacheline_size == 0) 2548 if (sd->entry == &sort_mem_dcacheline && cacheline_size() == 0)
2586 return -EINVAL; 2549 return -EINVAL;
2587 2550
2588 if (sd->entry == &sort_mem_daddr_sym) 2551 if (sd->entry == &sort_mem_daddr_sym)
@@ -2628,7 +2591,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
2628 if (*tok) { 2591 if (*tok) {
2629 ret = sort_dimension__add(list, tok, evlist, level); 2592 ret = sort_dimension__add(list, tok, evlist, level);
2630 if (ret == -EINVAL) { 2593 if (ret == -EINVAL) {
2631 if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok))) 2594 if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok)))
2632 pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system"); 2595 pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
2633 else 2596 else
2634 pr_err("Invalid --sort key: `%s'", tok); 2597 pr_err("Invalid --sort key: `%s'", tok);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 035b62e2c60b..8bf302cafcec 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -112,6 +112,8 @@ struct hist_entry {
112 112
113 char level; 113 char level;
114 u8 filtered; 114 u8 filtered;
115
116 u16 callchain_size;
115 union { 117 union {
116 /* 118 /*
117 * Since perf diff only supports the stdio output, TUI 119 * Since perf diff only supports the stdio output, TUI
@@ -151,6 +153,11 @@ struct hist_entry {
151 struct callchain_root callchain[0]; /* must be last member */ 153 struct callchain_root callchain[0]; /* must be last member */
152}; 154};
153 155
156static __pure inline bool hist_entry__has_callchains(struct hist_entry *he)
157{
158 return he->callchain_size != 0;
159}
160
154static inline bool hist_entry__has_pairs(struct hist_entry *he) 161static inline bool hist_entry__has_pairs(struct hist_entry *he)
155{ 162{
156 return !list_empty(&he->pairs.node); 163 return !list_empty(&he->pairs.node);
@@ -186,13 +193,13 @@ static inline float hist_entry__get_percent_limit(struct hist_entry *he)
186static inline u64 cl_address(u64 address) 193static inline u64 cl_address(u64 address)
187{ 194{
188 /* return the cacheline of the address */ 195 /* return the cacheline of the address */
189 return (address & ~(cacheline_size - 1)); 196 return (address & ~(cacheline_size() - 1));
190} 197}
191 198
192static inline u64 cl_offset(u64 address) 199static inline u64 cl_offset(u64 address)
193{ 200{
194 /* return the cacheline of the address */ 201 /* return the cacheline of the address */
195 return (address & (cacheline_size - 1)); 202 return (address & (cacheline_size() - 1));
196} 203}
197 204
198enum sort_mode { 205enum sort_mode {
@@ -292,5 +299,5 @@ int64_t
292sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right); 299sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
293int64_t 300int64_t
294sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right); 301sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
295char *hist_entry__get_srcline(struct hist_entry *he); 302char *hist_entry__srcline(struct hist_entry *he);
296#endif /* __PERF_SORT_H */ 303#endif /* __PERF_SORT_H */
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 3c21fd059b64..09d6746e6ec8 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -103,6 +103,7 @@ static struct symbol *new_inline_sym(struct dso *dso,
103 inline_sym = symbol__new(base_sym ? base_sym->start : 0, 103 inline_sym = symbol__new(base_sym ? base_sym->start : 0,
104 base_sym ? base_sym->end : 0, 104 base_sym ? base_sym->end : 0,
105 base_sym ? base_sym->binding : 0, 105 base_sym ? base_sym->binding : 0,
106 base_sym ? base_sym->type : 0,
106 funcname); 107 funcname);
107 if (inline_sym) 108 if (inline_sym)
108 inline_sym->inlined = 1; 109 inline_sym->inlined = 1;
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 8f56ba4fd258..36efb986f7fc 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -7,8 +7,7 @@
7#include "xyarray.h" 7#include "xyarray.h"
8#include "rblist.h" 8#include "rblist.h"
9 9
10struct stats 10struct stats {
11{
12 double n, mean, M2; 11 double n, mean, M2;
13 u64 max, min; 12 u64 max, min;
14}; 13};
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 2de770511e70..29770ea61768 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -114,16 +114,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym)
114 sym->st_shndx != SHN_ABS; 114 sym->st_shndx != SHN_ABS;
115} 115}
116 116
117static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type) 117static bool elf_sym__filter(GElf_Sym *sym)
118{ 118{
119 switch (type) { 119 return elf_sym__is_function(sym) || elf_sym__is_object(sym);
120 case MAP__FUNCTION:
121 return elf_sym__is_function(sym);
122 case MAP__VARIABLE:
123 return elf_sym__is_object(sym);
124 default:
125 return false;
126 }
127} 120}
128 121
129static inline const char *elf_sym__name(const GElf_Sym *sym, 122static inline const char *elf_sym__name(const GElf_Sym *sym,
@@ -150,17 +143,10 @@ static inline bool elf_sec__is_data(const GElf_Shdr *shdr,
150 return strstr(elf_sec__name(shdr, secstrs), "data") != NULL; 143 return strstr(elf_sec__name(shdr, secstrs), "data") != NULL;
151} 144}
152 145
153static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs, 146static bool elf_sec__filter(GElf_Shdr *shdr, Elf_Data *secstrs)
154 enum map_type type)
155{ 147{
156 switch (type) { 148 return elf_sec__is_text(shdr, secstrs) ||
157 case MAP__FUNCTION: 149 elf_sec__is_data(shdr, secstrs);
158 return elf_sec__is_text(shdr, secstrs);
159 case MAP__VARIABLE:
160 return elf_sec__is_data(shdr, secstrs);
161 default:
162 return false;
163 }
164} 150}
165 151
166static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) 152static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
@@ -256,7 +242,7 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
256 * And always look at the original dso, not at debuginfo packages, that 242 * And always look at the original dso, not at debuginfo packages, that
257 * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). 243 * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
258 */ 244 */
259int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map) 245int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
260{ 246{
261 uint32_t nr_rel_entries, idx; 247 uint32_t nr_rel_entries, idx;
262 GElf_Sym sym; 248 GElf_Sym sym;
@@ -364,12 +350,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *
364 free(demangled); 350 free(demangled);
365 351
366 f = symbol__new(plt_offset, plt_entry_size, 352 f = symbol__new(plt_offset, plt_entry_size,
367 STB_GLOBAL, sympltname); 353 STB_GLOBAL, STT_FUNC, sympltname);
368 if (!f) 354 if (!f)
369 goto out_elf_end; 355 goto out_elf_end;
370 356
371 plt_offset += plt_entry_size; 357 plt_offset += plt_entry_size;
372 symbols__insert(&dso->symbols[map->type], f); 358 symbols__insert(&dso->symbols, f);
373 ++nr; 359 ++nr;
374 } 360 }
375 } else if (shdr_rel_plt.sh_type == SHT_REL) { 361 } else if (shdr_rel_plt.sh_type == SHT_REL) {
@@ -390,12 +376,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *
390 free(demangled); 376 free(demangled);
391 377
392 f = symbol__new(plt_offset, plt_entry_size, 378 f = symbol__new(plt_offset, plt_entry_size,
393 STB_GLOBAL, sympltname); 379 STB_GLOBAL, STT_FUNC, sympltname);
394 if (!f) 380 if (!f)
395 goto out_elf_end; 381 goto out_elf_end;
396 382
397 plt_offset += plt_entry_size; 383 plt_offset += plt_entry_size;
398 symbols__insert(&dso->symbols[map->type], f); 384 symbols__insert(&dso->symbols, f);
399 ++nr; 385 ++nr;
400 } 386 }
401 } 387 }
@@ -811,6 +797,110 @@ static u64 ref_reloc(struct kmap *kmap)
811void __weak arch__sym_update(struct symbol *s __maybe_unused, 797void __weak arch__sym_update(struct symbol *s __maybe_unused,
812 GElf_Sym *sym __maybe_unused) { } 798 GElf_Sym *sym __maybe_unused) { }
813 799
800static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
801 GElf_Sym *sym, GElf_Shdr *shdr,
802 struct map_groups *kmaps, struct kmap *kmap,
803 struct dso **curr_dsop, struct map **curr_mapp,
804 const char *section_name,
805 bool adjust_kernel_syms, bool kmodule, bool *remap_kernel)
806{
807 struct dso *curr_dso = *curr_dsop;
808 struct map *curr_map;
809 char dso_name[PATH_MAX];
810
811 /* Adjust symbol to map to file offset */
812 if (adjust_kernel_syms)
813 sym->st_value -= shdr->sh_addr - shdr->sh_offset;
814
815 if (strcmp(section_name, (curr_dso->short_name + dso->short_name_len)) == 0)
816 return 0;
817
818 if (strcmp(section_name, ".text") == 0) {
819 /*
820 * The initial kernel mapping is based on
821 * kallsyms and identity maps. Overwrite it to
822 * map to the kernel dso.
823 */
824 if (*remap_kernel && dso->kernel) {
825 *remap_kernel = false;
826 map->start = shdr->sh_addr + ref_reloc(kmap);
827 map->end = map->start + shdr->sh_size;
828 map->pgoff = shdr->sh_offset;
829 map->map_ip = map__map_ip;
830 map->unmap_ip = map__unmap_ip;
831 /* Ensure maps are correctly ordered */
832 if (kmaps) {
833 map__get(map);
834 map_groups__remove(kmaps, map);
835 map_groups__insert(kmaps, map);
836 map__put(map);
837 }
838 }
839
840 /*
841 * The initial module mapping is based on
842 * /proc/modules mapped to offset zero.
843 * Overwrite it to map to the module dso.
844 */
845 if (*remap_kernel && kmodule) {
846 *remap_kernel = false;
847 map->pgoff = shdr->sh_offset;
848 }
849
850 *curr_mapp = map;
851 *curr_dsop = dso;
852 return 0;
853 }
854
855 if (!kmap)
856 return 0;
857
858 snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name);
859
860 curr_map = map_groups__find_by_name(kmaps, dso_name);
861 if (curr_map == NULL) {
862 u64 start = sym->st_value;
863
864 if (kmodule)
865 start += map->start + shdr->sh_offset;
866
867 curr_dso = dso__new(dso_name);
868 if (curr_dso == NULL)
869 return -1;
870 curr_dso->kernel = dso->kernel;
871 curr_dso->long_name = dso->long_name;
872 curr_dso->long_name_len = dso->long_name_len;
873 curr_map = map__new2(start, curr_dso);
874 dso__put(curr_dso);
875 if (curr_map == NULL)
876 return -1;
877
878 if (adjust_kernel_syms) {
879 curr_map->start = shdr->sh_addr + ref_reloc(kmap);
880 curr_map->end = curr_map->start + shdr->sh_size;
881 curr_map->pgoff = shdr->sh_offset;
882 } else {
883 curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
884 }
885 curr_dso->symtab_type = dso->symtab_type;
886 map_groups__insert(kmaps, curr_map);
887 /*
888 * Add it before we drop the referece to curr_map, i.e. while
889 * we still are sure to have a reference to this DSO via
890 * *curr_map->dso.
891 */
892 dsos__add(&map->groups->machine->dsos, curr_dso);
893 /* kmaps already got it */
894 map__put(curr_map);
895 dso__set_loaded(curr_dso);
896 *curr_mapp = curr_map;
897 *curr_dsop = curr_dso;
898 } else
899 *curr_dsop = curr_map->dso;
900
901 return 0;
902}
903
814int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, 904int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
815 struct symsrc *runtime_ss, int kmodule) 905 struct symsrc *runtime_ss, int kmodule)
816{ 906{
@@ -844,7 +934,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
844 * have the wrong values for the dso maps, so remove them. 934 * have the wrong values for the dso maps, so remove them.
845 */ 935 */
846 if (kmodule && syms_ss->symtab) 936 if (kmodule && syms_ss->symtab)
847 symbols__delete(&dso->symbols[map->type]); 937 symbols__delete(&dso->symbols);
848 938
849 if (!syms_ss->symtab) { 939 if (!syms_ss->symtab) {
850 /* 940 /*
@@ -921,10 +1011,10 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
921 1011
922 dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); 1012 dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
923 /* 1013 /*
924 * Initial kernel and module mappings do not map to the dso. For 1014 * Initial kernel and module mappings do not map to the dso.
925 * function mappings, flag the fixups. 1015 * Flag the fixups.
926 */ 1016 */
927 if (map->type == MAP__FUNCTION && (dso->kernel || kmodule)) { 1017 if (dso->kernel || kmodule) {
928 remap_kernel = true; 1018 remap_kernel = true;
929 adjust_kernel_syms = dso->adjust_symbols; 1019 adjust_kernel_syms = dso->adjust_symbols;
930 } 1020 }
@@ -936,7 +1026,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
936 const char *section_name; 1026 const char *section_name;
937 bool used_opd = false; 1027 bool used_opd = false;
938 1028
939 if (!is_label && !elf_sym__is_a(&sym, map->type)) 1029 if (!is_label && !elf_sym__filter(&sym))
940 continue; 1030 continue;
941 1031
942 /* Reject ARM ELF "mapping symbols": these aren't unique and 1032 /* Reject ARM ELF "mapping symbols": these aren't unique and
@@ -974,7 +1064,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
974 1064
975 gelf_getshdr(sec, &shdr); 1065 gelf_getshdr(sec, &shdr);
976 1066
977 if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type)) 1067 if (is_label && !elf_sec__filter(&shdr, secstrs))
978 continue; 1068 continue;
979 1069
980 section_name = elf_sec__name(&shdr, secstrs); 1070 section_name = elf_sec__name(&shdr, secstrs);
@@ -982,134 +1072,37 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
982 /* On ARM, symbols for thumb functions have 1 added to 1072 /* On ARM, symbols for thumb functions have 1 added to
983 * the symbol address as a flag - remove it */ 1073 * the symbol address as a flag - remove it */
984 if ((ehdr.e_machine == EM_ARM) && 1074 if ((ehdr.e_machine == EM_ARM) &&
985 (map->type == MAP__FUNCTION) && 1075 (GELF_ST_TYPE(sym.st_info) == STT_FUNC) &&
986 (sym.st_value & 1)) 1076 (sym.st_value & 1))
987 --sym.st_value; 1077 --sym.st_value;
988 1078
989 if (dso->kernel || kmodule) { 1079 if (dso->kernel || kmodule) {
990 char dso_name[PATH_MAX]; 1080 if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map,
991 1081 section_name, adjust_kernel_syms, kmodule, &remap_kernel))
992 /* Adjust symbol to map to file offset */ 1082 goto out_elf_end;
993 if (adjust_kernel_syms) 1083 } else if ((used_opd && runtime_ss->adjust_symbols) ||
994 sym.st_value -= shdr.sh_addr - shdr.sh_offset; 1084 (!used_opd && syms_ss->adjust_symbols)) {
995
996 if (strcmp(section_name,
997 (curr_dso->short_name +
998 dso->short_name_len)) == 0)
999 goto new_symbol;
1000
1001 if (strcmp(section_name, ".text") == 0) {
1002 /*
1003 * The initial kernel mapping is based on
1004 * kallsyms and identity maps. Overwrite it to
1005 * map to the kernel dso.
1006 */
1007 if (remap_kernel && dso->kernel) {
1008 remap_kernel = false;
1009 map->start = shdr.sh_addr +
1010 ref_reloc(kmap);
1011 map->end = map->start + shdr.sh_size;
1012 map->pgoff = shdr.sh_offset;
1013 map->map_ip = map__map_ip;
1014 map->unmap_ip = map__unmap_ip;
1015 /* Ensure maps are correctly ordered */
1016 if (kmaps) {
1017 map__get(map);
1018 map_groups__remove(kmaps, map);
1019 map_groups__insert(kmaps, map);
1020 map__put(map);
1021 }
1022 }
1023
1024 /*
1025 * The initial module mapping is based on
1026 * /proc/modules mapped to offset zero.
1027 * Overwrite it to map to the module dso.
1028 */
1029 if (remap_kernel && kmodule) {
1030 remap_kernel = false;
1031 map->pgoff = shdr.sh_offset;
1032 }
1033
1034 curr_map = map;
1035 curr_dso = dso;
1036 goto new_symbol;
1037 }
1038
1039 if (!kmap)
1040 goto new_symbol;
1041
1042 snprintf(dso_name, sizeof(dso_name),
1043 "%s%s", dso->short_name, section_name);
1044
1045 curr_map = map_groups__find_by_name(kmaps, map->type, dso_name);
1046 if (curr_map == NULL) {
1047 u64 start = sym.st_value;
1048
1049 if (kmodule)
1050 start += map->start + shdr.sh_offset;
1051
1052 curr_dso = dso__new(dso_name);
1053 if (curr_dso == NULL)
1054 goto out_elf_end;
1055 curr_dso->kernel = dso->kernel;
1056 curr_dso->long_name = dso->long_name;
1057 curr_dso->long_name_len = dso->long_name_len;
1058 curr_map = map__new2(start, curr_dso,
1059 map->type);
1060 dso__put(curr_dso);
1061 if (curr_map == NULL) {
1062 goto out_elf_end;
1063 }
1064 if (adjust_kernel_syms) {
1065 curr_map->start = shdr.sh_addr +
1066 ref_reloc(kmap);
1067 curr_map->end = curr_map->start +
1068 shdr.sh_size;
1069 curr_map->pgoff = shdr.sh_offset;
1070 } else {
1071 curr_map->map_ip = identity__map_ip;
1072 curr_map->unmap_ip = identity__map_ip;
1073 }
1074 curr_dso->symtab_type = dso->symtab_type;
1075 map_groups__insert(kmaps, curr_map);
1076 /*
1077 * Add it before we drop the referece to curr_map,
1078 * i.e. while we still are sure to have a reference
1079 * to this DSO via curr_map->dso.
1080 */
1081 dsos__add(&map->groups->machine->dsos, curr_dso);
1082 /* kmaps already got it */
1083 map__put(curr_map);
1084 dso__set_loaded(curr_dso, map->type);
1085 } else
1086 curr_dso = curr_map->dso;
1087
1088 goto new_symbol;
1089 }
1090
1091 if ((used_opd && runtime_ss->adjust_symbols)
1092 || (!used_opd && syms_ss->adjust_symbols)) {
1093 pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " 1085 pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
1094 "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, 1086 "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
1095 (u64)sym.st_value, (u64)shdr.sh_addr, 1087 (u64)sym.st_value, (u64)shdr.sh_addr,
1096 (u64)shdr.sh_offset); 1088 (u64)shdr.sh_offset);
1097 sym.st_value -= shdr.sh_addr - shdr.sh_offset; 1089 sym.st_value -= shdr.sh_addr - shdr.sh_offset;
1098 } 1090 }
1099new_symbol: 1091
1100 demangled = demangle_sym(dso, kmodule, elf_name); 1092 demangled = demangle_sym(dso, kmodule, elf_name);
1101 if (demangled != NULL) 1093 if (demangled != NULL)
1102 elf_name = demangled; 1094 elf_name = demangled;
1103 1095
1104 f = symbol__new(sym.st_value, sym.st_size, 1096 f = symbol__new(sym.st_value, sym.st_size,
1105 GELF_ST_BIND(sym.st_info), elf_name); 1097 GELF_ST_BIND(sym.st_info),
1098 GELF_ST_TYPE(sym.st_info), elf_name);
1106 free(demangled); 1099 free(demangled);
1107 if (!f) 1100 if (!f)
1108 goto out_elf_end; 1101 goto out_elf_end;
1109 1102
1110 arch__sym_update(f, &sym); 1103 arch__sym_update(f, &sym);
1111 1104
1112 __symbols__insert(&curr_dso->symbols[curr_map->type], f, dso->kernel); 1105 __symbols__insert(&curr_dso->symbols, f, dso->kernel);
1113 nr++; 1106 nr++;
1114 } 1107 }
1115 1108
@@ -1117,14 +1110,14 @@ new_symbol:
1117 * For misannotated, zeroed, ASM function sizes. 1110 * For misannotated, zeroed, ASM function sizes.
1118 */ 1111 */
1119 if (nr > 0) { 1112 if (nr > 0) {
1120 symbols__fixup_end(&dso->symbols[map->type]); 1113 symbols__fixup_end(&dso->symbols);
1121 symbols__fixup_duplicate(&dso->symbols[map->type]); 1114 symbols__fixup_duplicate(&dso->symbols);
1122 if (kmap) { 1115 if (kmap) {
1123 /* 1116 /*
1124 * We need to fixup this here too because we create new 1117 * We need to fixup this here too because we create new
1125 * maps here, for things like vsyscall sections. 1118 * maps here, for things like vsyscall sections.
1126 */ 1119 */
1127 __map_groups__fixup_end(kmaps, map->type); 1120 map_groups__fixup_end(kmaps);
1128 } 1121 }
1129 } 1122 }
1130 err = nr; 1123 err = nr;
@@ -1393,8 +1386,16 @@ static off_t kcore__write(struct kcore *kcore)
1393 1386
1394struct phdr_data { 1387struct phdr_data {
1395 off_t offset; 1388 off_t offset;
1389 off_t rel;
1396 u64 addr; 1390 u64 addr;
1397 u64 len; 1391 u64 len;
1392 struct list_head node;
1393 struct phdr_data *remaps;
1394};
1395
1396struct sym_data {
1397 u64 addr;
1398 struct list_head node;
1398}; 1399};
1399 1400
1400struct kcore_copy_info { 1401struct kcore_copy_info {
@@ -1404,16 +1405,78 @@ struct kcore_copy_info {
1404 u64 last_symbol; 1405 u64 last_symbol;
1405 u64 first_module; 1406 u64 first_module;
1406 u64 last_module_symbol; 1407 u64 last_module_symbol;
1407 struct phdr_data kernel_map; 1408 size_t phnum;
1408 struct phdr_data modules_map; 1409 struct list_head phdrs;
1410 struct list_head syms;
1409}; 1411};
1410 1412
1413#define kcore_copy__for_each_phdr(k, p) \
1414 list_for_each_entry((p), &(k)->phdrs, node)
1415
1416static struct phdr_data *phdr_data__new(u64 addr, u64 len, off_t offset)
1417{
1418 struct phdr_data *p = zalloc(sizeof(*p));
1419
1420 if (p) {
1421 p->addr = addr;
1422 p->len = len;
1423 p->offset = offset;
1424 }
1425
1426 return p;
1427}
1428
1429static struct phdr_data *kcore_copy_info__addnew(struct kcore_copy_info *kci,
1430 u64 addr, u64 len,
1431 off_t offset)
1432{
1433 struct phdr_data *p = phdr_data__new(addr, len, offset);
1434
1435 if (p)
1436 list_add_tail(&p->node, &kci->phdrs);
1437
1438 return p;
1439}
1440
1441static void kcore_copy__free_phdrs(struct kcore_copy_info *kci)
1442{
1443 struct phdr_data *p, *tmp;
1444
1445 list_for_each_entry_safe(p, tmp, &kci->phdrs, node) {
1446 list_del(&p->node);
1447 free(p);
1448 }
1449}
1450
1451static struct sym_data *kcore_copy__new_sym(struct kcore_copy_info *kci,
1452 u64 addr)
1453{
1454 struct sym_data *s = zalloc(sizeof(*s));
1455
1456 if (s) {
1457 s->addr = addr;
1458 list_add_tail(&s->node, &kci->syms);
1459 }
1460
1461 return s;
1462}
1463
1464static void kcore_copy__free_syms(struct kcore_copy_info *kci)
1465{
1466 struct sym_data *s, *tmp;
1467
1468 list_for_each_entry_safe(s, tmp, &kci->syms, node) {
1469 list_del(&s->node);
1470 free(s);
1471 }
1472}
1473
1411static int kcore_copy__process_kallsyms(void *arg, const char *name, char type, 1474static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
1412 u64 start) 1475 u64 start)
1413{ 1476{
1414 struct kcore_copy_info *kci = arg; 1477 struct kcore_copy_info *kci = arg;
1415 1478
1416 if (!symbol_type__is_a(type, MAP__FUNCTION)) 1479 if (!kallsyms__is_function(type))
1417 return 0; 1480 return 0;
1418 1481
1419 if (strchr(name, '[')) { 1482 if (strchr(name, '[')) {
@@ -1438,6 +1501,9 @@ static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
1438 return 0; 1501 return 0;
1439 } 1502 }
1440 1503
1504 if (is_entry_trampoline(name) && !kcore_copy__new_sym(kci, start))
1505 return -1;
1506
1441 return 0; 1507 return 0;
1442} 1508}
1443 1509
@@ -1487,27 +1553,39 @@ static int kcore_copy__parse_modules(struct kcore_copy_info *kci,
1487 return 0; 1553 return 0;
1488} 1554}
1489 1555
1490static void kcore_copy__map(struct phdr_data *p, u64 start, u64 end, u64 pgoff, 1556static int kcore_copy__map(struct kcore_copy_info *kci, u64 start, u64 end,
1491 u64 s, u64 e) 1557 u64 pgoff, u64 s, u64 e)
1492{ 1558{
1493 if (p->addr || s < start || s >= end) 1559 u64 len, offset;
1494 return; 1560
1561 if (s < start || s >= end)
1562 return 0;
1495 1563
1496 p->addr = s; 1564 offset = (s - start) + pgoff;
1497 p->offset = (s - start) + pgoff; 1565 len = e < end ? e - s : end - s;
1498 p->len = e < end ? e - s : end - s; 1566
1567 return kcore_copy_info__addnew(kci, s, len, offset) ? 0 : -1;
1499} 1568}
1500 1569
1501static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data) 1570static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data)
1502{ 1571{
1503 struct kcore_copy_info *kci = data; 1572 struct kcore_copy_info *kci = data;
1504 u64 end = start + len; 1573 u64 end = start + len;
1574 struct sym_data *sdat;
1505 1575
1506 kcore_copy__map(&kci->kernel_map, start, end, pgoff, kci->stext, 1576 if (kcore_copy__map(kci, start, end, pgoff, kci->stext, kci->etext))
1507 kci->etext); 1577 return -1;
1508 1578
1509 kcore_copy__map(&kci->modules_map, start, end, pgoff, kci->first_module, 1579 if (kcore_copy__map(kci, start, end, pgoff, kci->first_module,
1510 kci->last_module_symbol); 1580 kci->last_module_symbol))
1581 return -1;
1582
1583 list_for_each_entry(sdat, &kci->syms, node) {
1584 u64 s = round_down(sdat->addr, page_size);
1585
1586 if (kcore_copy__map(kci, start, end, pgoff, s, s + len))
1587 return -1;
1588 }
1511 1589
1512 return 0; 1590 return 0;
1513} 1591}
@@ -1520,6 +1598,64 @@ static int kcore_copy__read_maps(struct kcore_copy_info *kci, Elf *elf)
1520 return 0; 1598 return 0;
1521} 1599}
1522 1600
1601static void kcore_copy__find_remaps(struct kcore_copy_info *kci)
1602{
1603 struct phdr_data *p, *k = NULL;
1604 u64 kend;
1605
1606 if (!kci->stext)
1607 return;
1608
1609 /* Find phdr that corresponds to the kernel map (contains stext) */
1610 kcore_copy__for_each_phdr(kci, p) {
1611 u64 pend = p->addr + p->len - 1;
1612
1613 if (p->addr <= kci->stext && pend >= kci->stext) {
1614 k = p;
1615 break;
1616 }
1617 }
1618
1619 if (!k)
1620 return;
1621
1622 kend = k->offset + k->len;
1623
1624 /* Find phdrs that remap the kernel */
1625 kcore_copy__for_each_phdr(kci, p) {
1626 u64 pend = p->offset + p->len;
1627
1628 if (p == k)
1629 continue;
1630
1631 if (p->offset >= k->offset && pend <= kend)
1632 p->remaps = k;
1633 }
1634}
1635
1636static void kcore_copy__layout(struct kcore_copy_info *kci)
1637{
1638 struct phdr_data *p;
1639 off_t rel = 0;
1640
1641 kcore_copy__find_remaps(kci);
1642
1643 kcore_copy__for_each_phdr(kci, p) {
1644 if (!p->remaps) {
1645 p->rel = rel;
1646 rel += p->len;
1647 }
1648 kci->phnum += 1;
1649 }
1650
1651 kcore_copy__for_each_phdr(kci, p) {
1652 struct phdr_data *k = p->remaps;
1653
1654 if (k)
1655 p->rel = p->offset - k->offset + k->rel;
1656 }
1657}
1658
1523static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir, 1659static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
1524 Elf *elf) 1660 Elf *elf)
1525{ 1661{
@@ -1555,7 +1691,12 @@ static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
1555 if (kci->first_module && !kci->last_module_symbol) 1691 if (kci->first_module && !kci->last_module_symbol)
1556 return -1; 1692 return -1;
1557 1693
1558 return kcore_copy__read_maps(kci, elf); 1694 if (kcore_copy__read_maps(kci, elf))
1695 return -1;
1696
1697 kcore_copy__layout(kci);
1698
1699 return 0;
1559} 1700}
1560 1701
1561static int kcore_copy__copy_file(const char *from_dir, const char *to_dir, 1702static int kcore_copy__copy_file(const char *from_dir, const char *to_dir,
@@ -1678,12 +1819,15 @@ int kcore_copy(const char *from_dir, const char *to_dir)
1678{ 1819{
1679 struct kcore kcore; 1820 struct kcore kcore;
1680 struct kcore extract; 1821 struct kcore extract;
1681 size_t count = 2;
1682 int idx = 0, err = -1; 1822 int idx = 0, err = -1;
1683 off_t offset = page_size, sz, modules_offset = 0; 1823 off_t offset, sz;
1684 struct kcore_copy_info kci = { .stext = 0, }; 1824 struct kcore_copy_info kci = { .stext = 0, };
1685 char kcore_filename[PATH_MAX]; 1825 char kcore_filename[PATH_MAX];
1686 char extract_filename[PATH_MAX]; 1826 char extract_filename[PATH_MAX];
1827 struct phdr_data *p;
1828
1829 INIT_LIST_HEAD(&kci.phdrs);
1830 INIT_LIST_HEAD(&kci.syms);
1687 1831
1688 if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms")) 1832 if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms"))
1689 return -1; 1833 return -1;
@@ -1703,20 +1847,17 @@ int kcore_copy(const char *from_dir, const char *to_dir)
1703 if (kcore__init(&extract, extract_filename, kcore.elfclass, false)) 1847 if (kcore__init(&extract, extract_filename, kcore.elfclass, false))
1704 goto out_kcore_close; 1848 goto out_kcore_close;
1705 1849
1706 if (!kci.modules_map.addr) 1850 if (kcore__copy_hdr(&kcore, &extract, kci.phnum))
1707 count -= 1;
1708
1709 if (kcore__copy_hdr(&kcore, &extract, count))
1710 goto out_extract_close; 1851 goto out_extract_close;
1711 1852
1712 if (kcore__add_phdr(&extract, idx++, offset, kci.kernel_map.addr, 1853 offset = gelf_fsize(extract.elf, ELF_T_EHDR, 1, EV_CURRENT) +
1713 kci.kernel_map.len)) 1854 gelf_fsize(extract.elf, ELF_T_PHDR, kci.phnum, EV_CURRENT);
1714 goto out_extract_close; 1855 offset = round_up(offset, page_size);
1856
1857 kcore_copy__for_each_phdr(&kci, p) {
1858 off_t offs = p->rel + offset;
1715 1859
1716 if (kci.modules_map.addr) { 1860 if (kcore__add_phdr(&extract, idx++, offs, p->addr, p->len))
1717 modules_offset = offset + kci.kernel_map.len;
1718 if (kcore__add_phdr(&extract, idx, modules_offset,
1719 kci.modules_map.addr, kci.modules_map.len))
1720 goto out_extract_close; 1861 goto out_extract_close;
1721 } 1862 }
1722 1863
@@ -1724,14 +1865,14 @@ int kcore_copy(const char *from_dir, const char *to_dir)
1724 if (sz < 0 || sz > offset) 1865 if (sz < 0 || sz > offset)
1725 goto out_extract_close; 1866 goto out_extract_close;
1726 1867
1727 if (copy_bytes(kcore.fd, kci.kernel_map.offset, extract.fd, offset, 1868 kcore_copy__for_each_phdr(&kci, p) {
1728 kci.kernel_map.len)) 1869 off_t offs = p->rel + offset;
1729 goto out_extract_close;
1730 1870
1731 if (modules_offset && copy_bytes(kcore.fd, kci.modules_map.offset, 1871 if (p->remaps)
1732 extract.fd, modules_offset, 1872 continue;
1733 kci.modules_map.len)) 1873 if (copy_bytes(kcore.fd, p->offset, extract.fd, offs, p->len))
1734 goto out_extract_close; 1874 goto out_extract_close;
1875 }
1735 1876
1736 if (kcore_copy__compare_file(from_dir, to_dir, "modules")) 1877 if (kcore_copy__compare_file(from_dir, to_dir, "modules"))
1737 goto out_extract_close; 1878 goto out_extract_close;
@@ -1754,6 +1895,9 @@ out_unlink_kallsyms:
1754 if (err) 1895 if (err)
1755 kcore_copy__unlink(to_dir, "kallsyms"); 1896 kcore_copy__unlink(to_dir, "kallsyms");
1756 1897
1898 kcore_copy__free_phdrs(&kci);
1899 kcore_copy__free_syms(&kci);
1900
1757 return err; 1901 return err;
1758} 1902}
1759 1903
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index ff48d0d49584..7119df77dc0b 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -288,8 +288,7 @@ void symsrc__destroy(struct symsrc *ss)
288} 288}
289 289
290int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused, 290int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused,
291 struct symsrc *ss __maybe_unused, 291 struct symsrc *ss __maybe_unused)
292 struct map *map __maybe_unused)
293{ 292{
294 return 0; 293 return 0;
295} 294}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 1466814ebada..d188b7588152 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -5,6 +5,7 @@
5#include <stdio.h> 5#include <stdio.h>
6#include <string.h> 6#include <string.h>
7#include <linux/kernel.h> 7#include <linux/kernel.h>
8#include <linux/mman.h>
8#include <sys/types.h> 9#include <sys/types.h>
9#include <sys/stat.h> 10#include <sys/stat.h>
10#include <sys/param.h> 11#include <sys/param.h>
@@ -39,7 +40,6 @@ char **vmlinux_path;
39struct symbol_conf symbol_conf = { 40struct symbol_conf symbol_conf = {
40 .use_modules = true, 41 .use_modules = true,
41 .try_vmlinux_path = true, 42 .try_vmlinux_path = true,
42 .annotate_src = true,
43 .demangle = true, 43 .demangle = true,
44 .demangle_kernel = false, 44 .demangle_kernel = false,
45 .cumulate_callchain = true, 45 .cumulate_callchain = true,
@@ -70,18 +70,10 @@ static enum dso_binary_type binary_type_symtab[] = {
70 70
71#define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab) 71#define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab)
72 72
73bool symbol_type__is_a(char symbol_type, enum map_type map_type) 73static bool symbol_type__filter(char symbol_type)
74{ 74{
75 symbol_type = toupper(symbol_type); 75 symbol_type = toupper(symbol_type);
76 76 return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B';
77 switch (map_type) {
78 case MAP__FUNCTION:
79 return symbol_type == 'T' || symbol_type == 'W';
80 case MAP__VARIABLE:
81 return symbol_type == 'D';
82 default:
83 return false;
84 }
85} 77}
86 78
87static int prefix_underscores_count(const char *str) 79static int prefix_underscores_count(const char *str)
@@ -228,9 +220,9 @@ void symbols__fixup_end(struct rb_root *symbols)
228 curr->end = roundup(curr->start, 4096) + 4096; 220 curr->end = roundup(curr->start, 4096) + 4096;
229} 221}
230 222
231void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) 223void map_groups__fixup_end(struct map_groups *mg)
232{ 224{
233 struct maps *maps = &mg->maps[type]; 225 struct maps *maps = &mg->maps;
234 struct map *next, *curr; 226 struct map *next, *curr;
235 227
236 down_write(&maps->lock); 228 down_write(&maps->lock);
@@ -256,7 +248,7 @@ out_unlock:
256 up_write(&maps->lock); 248 up_write(&maps->lock);
257} 249}
258 250
259struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) 251struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name)
260{ 252{
261 size_t namelen = strlen(name) + 1; 253 size_t namelen = strlen(name) + 1;
262 struct symbol *sym = calloc(1, (symbol_conf.priv_size + 254 struct symbol *sym = calloc(1, (symbol_conf.priv_size +
@@ -274,6 +266,7 @@ struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
274 266
275 sym->start = start; 267 sym->start = start;
276 sym->end = len ? start + len : start; 268 sym->end = len ? start + len : start;
269 sym->type = type;
277 sym->binding = binding; 270 sym->binding = binding;
278 sym->namelen = namelen - 1; 271 sym->namelen = namelen - 1;
279 272
@@ -484,45 +477,40 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
484 477
485void dso__reset_find_symbol_cache(struct dso *dso) 478void dso__reset_find_symbol_cache(struct dso *dso)
486{ 479{
487 enum map_type type; 480 dso->last_find_result.addr = 0;
488 481 dso->last_find_result.symbol = NULL;
489 for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) {
490 dso->last_find_result[type].addr = 0;
491 dso->last_find_result[type].symbol = NULL;
492 }
493} 482}
494 483
495void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym) 484void dso__insert_symbol(struct dso *dso, struct symbol *sym)
496{ 485{
497 __symbols__insert(&dso->symbols[type], sym, dso->kernel); 486 __symbols__insert(&dso->symbols, sym, dso->kernel);
498 487
499 /* update the symbol cache if necessary */ 488 /* update the symbol cache if necessary */
500 if (dso->last_find_result[type].addr >= sym->start && 489 if (dso->last_find_result.addr >= sym->start &&
501 (dso->last_find_result[type].addr < sym->end || 490 (dso->last_find_result.addr < sym->end ||
502 sym->start == sym->end)) { 491 sym->start == sym->end)) {
503 dso->last_find_result[type].symbol = sym; 492 dso->last_find_result.symbol = sym;
504 } 493 }
505} 494}
506 495
507struct symbol *dso__find_symbol(struct dso *dso, 496struct symbol *dso__find_symbol(struct dso *dso, u64 addr)
508 enum map_type type, u64 addr)
509{ 497{
510 if (dso->last_find_result[type].addr != addr || dso->last_find_result[type].symbol == NULL) { 498 if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) {
511 dso->last_find_result[type].addr = addr; 499 dso->last_find_result.addr = addr;
512 dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr); 500 dso->last_find_result.symbol = symbols__find(&dso->symbols, addr);
513 } 501 }
514 502
515 return dso->last_find_result[type].symbol; 503 return dso->last_find_result.symbol;
516} 504}
517 505
518struct symbol *dso__first_symbol(struct dso *dso, enum map_type type) 506struct symbol *dso__first_symbol(struct dso *dso)
519{ 507{
520 return symbols__first(&dso->symbols[type]); 508 return symbols__first(&dso->symbols);
521} 509}
522 510
523struct symbol *dso__last_symbol(struct dso *dso, enum map_type type) 511struct symbol *dso__last_symbol(struct dso *dso)
524{ 512{
525 return symbols__last(&dso->symbols[type]); 513 return symbols__last(&dso->symbols);
526} 514}
527 515
528struct symbol *dso__next_symbol(struct symbol *sym) 516struct symbol *dso__next_symbol(struct symbol *sym)
@@ -539,24 +527,22 @@ struct symbol *symbol__next_by_name(struct symbol *sym)
539} 527}
540 528
541 /* 529 /*
542 * Teturns first symbol that matched with @name. 530 * Returns first symbol that matched with @name.
543 */ 531 */
544struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, 532struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name)
545 const char *name)
546{ 533{
547 struct symbol *s = symbols__find_by_name(&dso->symbol_names[type], name, 534 struct symbol *s = symbols__find_by_name(&dso->symbol_names, name,
548 SYMBOL_TAG_INCLUDE__NONE); 535 SYMBOL_TAG_INCLUDE__NONE);
549 if (!s) 536 if (!s)
550 s = symbols__find_by_name(&dso->symbol_names[type], name, 537 s = symbols__find_by_name(&dso->symbol_names, name,
551 SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); 538 SYMBOL_TAG_INCLUDE__DEFAULT_ONLY);
552 return s; 539 return s;
553} 540}
554 541
555void dso__sort_by_name(struct dso *dso, enum map_type type) 542void dso__sort_by_name(struct dso *dso)
556{ 543{
557 dso__set_sorted_by_name(dso, type); 544 dso__set_sorted_by_name(dso);
558 return symbols__sort_by_name(&dso->symbol_names[type], 545 return symbols__sort_by_name(&dso->symbol_names, &dso->symbols);
559 &dso->symbols[type]);
560} 546}
561 547
562int modules__parse(const char *filename, void *arg, 548int modules__parse(const char *filename, void *arg,
@@ -621,11 +607,6 @@ out:
621 return err; 607 return err;
622} 608}
623 609
624struct process_kallsyms_args {
625 struct map *map;
626 struct dso *dso;
627};
628
629/* 610/*
630 * These are symbols in the kernel image, so make sure that 611 * These are symbols in the kernel image, so make sure that
631 * sym is from a kernel DSO. 612 * sym is from a kernel DSO.
@@ -661,10 +642,10 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
661 char type, u64 start) 642 char type, u64 start)
662{ 643{
663 struct symbol *sym; 644 struct symbol *sym;
664 struct process_kallsyms_args *a = arg; 645 struct dso *dso = arg;
665 struct rb_root *root = &a->dso->symbols[a->map->type]; 646 struct rb_root *root = &dso->symbols;
666 647
667 if (!symbol_type__is_a(type, a->map->type)) 648 if (!symbol_type__filter(type))
668 return 0; 649 return 0;
669 650
670 /* 651 /*
@@ -672,7 +653,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
672 * symbols, setting length to 0, and rely on 653 * symbols, setting length to 0, and rely on
673 * symbols__fixup_end() to fix it up. 654 * symbols__fixup_end() to fix it up.
674 */ 655 */
675 sym = symbol__new(start, 0, kallsyms2elf_binding(type), name); 656 sym = symbol__new(start, 0, kallsyms2elf_binding(type), kallsyms2elf_type(type), name);
676 if (sym == NULL) 657 if (sym == NULL)
677 return -ENOMEM; 658 return -ENOMEM;
678 /* 659 /*
@@ -689,21 +670,18 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
689 * so that we can in the next step set the symbol ->end address and then 670 * so that we can in the next step set the symbol ->end address and then
690 * call kernel_maps__split_kallsyms. 671 * call kernel_maps__split_kallsyms.
691 */ 672 */
692static int dso__load_all_kallsyms(struct dso *dso, const char *filename, 673static int dso__load_all_kallsyms(struct dso *dso, const char *filename)
693 struct map *map)
694{ 674{
695 struct process_kallsyms_args args = { .map = map, .dso = dso, }; 675 return kallsyms__parse(filename, dso, map__process_kallsym_symbol);
696 return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
697} 676}
698 677
699static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map) 678static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct dso *dso)
700{ 679{
701 struct map_groups *kmaps = map__kmaps(map);
702 struct map *curr_map; 680 struct map *curr_map;
703 struct symbol *pos; 681 struct symbol *pos;
704 int count = 0; 682 int count = 0;
705 struct rb_root old_root = dso->symbols[map->type]; 683 struct rb_root old_root = dso->symbols;
706 struct rb_root *root = &dso->symbols[map->type]; 684 struct rb_root *root = &dso->symbols;
707 struct rb_node *next = rb_first(root); 685 struct rb_node *next = rb_first(root);
708 686
709 if (!kmaps) 687 if (!kmaps)
@@ -723,7 +701,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
723 if (module) 701 if (module)
724 *module = '\0'; 702 *module = '\0';
725 703
726 curr_map = map_groups__find(kmaps, map->type, pos->start); 704 curr_map = map_groups__find(kmaps, pos->start);
727 705
728 if (!curr_map) { 706 if (!curr_map) {
729 symbol__delete(pos); 707 symbol__delete(pos);
@@ -733,7 +711,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
733 pos->start -= curr_map->start - curr_map->pgoff; 711 pos->start -= curr_map->start - curr_map->pgoff;
734 if (pos->end) 712 if (pos->end)
735 pos->end -= curr_map->start - curr_map->pgoff; 713 pos->end -= curr_map->start - curr_map->pgoff;
736 symbols__insert(&curr_map->dso->symbols[curr_map->type], pos); 714 symbols__insert(&curr_map->dso->symbols, pos);
737 ++count; 715 ++count;
738 } 716 }
739 717
@@ -748,22 +726,25 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
748 * kernel range is broken in several maps, named [kernel].N, as we don't have 726 * kernel range is broken in several maps, named [kernel].N, as we don't have
749 * the original ELF section names vmlinux have. 727 * the original ELF section names vmlinux have.
750 */ 728 */
751static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) 729static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, u64 delta,
730 struct map *initial_map)
752{ 731{
753 struct map_groups *kmaps = map__kmaps(map);
754 struct machine *machine; 732 struct machine *machine;
755 struct map *curr_map = map; 733 struct map *curr_map = initial_map;
756 struct symbol *pos; 734 struct symbol *pos;
757 int count = 0, moved = 0; 735 int count = 0, moved = 0;
758 struct rb_root *root = &dso->symbols[map->type]; 736 struct rb_root *root = &dso->symbols;
759 struct rb_node *next = rb_first(root); 737 struct rb_node *next = rb_first(root);
760 int kernel_range = 0; 738 int kernel_range = 0;
739 bool x86_64;
761 740
762 if (!kmaps) 741 if (!kmaps)
763 return -1; 742 return -1;
764 743
765 machine = kmaps->machine; 744 machine = kmaps->machine;
766 745
746 x86_64 = machine__is(machine, "x86_64");
747
767 while (next) { 748 while (next) {
768 char *module; 749 char *module;
769 750
@@ -778,7 +759,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
778 *module++ = '\0'; 759 *module++ = '\0';
779 760
780 if (strcmp(curr_map->dso->short_name, module)) { 761 if (strcmp(curr_map->dso->short_name, module)) {
781 if (curr_map != map && 762 if (curr_map != initial_map &&
782 dso->kernel == DSO_TYPE_GUEST_KERNEL && 763 dso->kernel == DSO_TYPE_GUEST_KERNEL &&
783 machine__is_default_guest(machine)) { 764 machine__is_default_guest(machine)) {
784 /* 765 /*
@@ -788,18 +769,16 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
788 * symbols are in its kmap. Mark it as 769 * symbols are in its kmap. Mark it as
789 * loaded. 770 * loaded.
790 */ 771 */
791 dso__set_loaded(curr_map->dso, 772 dso__set_loaded(curr_map->dso);
792 curr_map->type);
793 } 773 }
794 774
795 curr_map = map_groups__find_by_name(kmaps, 775 curr_map = map_groups__find_by_name(kmaps, module);
796 map->type, module);
797 if (curr_map == NULL) { 776 if (curr_map == NULL) {
798 pr_debug("%s/proc/{kallsyms,modules} " 777 pr_debug("%s/proc/{kallsyms,modules} "
799 "inconsistency while looking " 778 "inconsistency while looking "
800 "for \"%s\" module!\n", 779 "for \"%s\" module!\n",
801 machine->root_dir, module); 780 machine->root_dir, module);
802 curr_map = map; 781 curr_map = initial_map;
803 goto discard_symbol; 782 goto discard_symbol;
804 } 783 }
805 784
@@ -809,11 +788,21 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
809 } 788 }
810 /* 789 /*
811 * So that we look just like we get from .ko files, 790 * So that we look just like we get from .ko files,
812 * i.e. not prelinked, relative to map->start. 791 * i.e. not prelinked, relative to initial_map->start.
813 */ 792 */
814 pos->start = curr_map->map_ip(curr_map, pos->start); 793 pos->start = curr_map->map_ip(curr_map, pos->start);
815 pos->end = curr_map->map_ip(curr_map, pos->end); 794 pos->end = curr_map->map_ip(curr_map, pos->end);
816 } else if (curr_map != map) { 795 } else if (x86_64 && is_entry_trampoline(pos->name)) {
796 /*
797 * These symbols are not needed anymore since the
798 * trampoline maps refer to the text section and it's
799 * symbols instead. Avoid having to deal with
800 * relocations, and the assumption that the first symbol
801 * is the start of kernel text, by simply removing the
802 * symbols at this point.
803 */
804 goto discard_symbol;
805 } else if (curr_map != initial_map) {
817 char dso_name[PATH_MAX]; 806 char dso_name[PATH_MAX];
818 struct dso *ndso; 807 struct dso *ndso;
819 808
@@ -824,7 +813,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
824 } 813 }
825 814
826 if (count == 0) { 815 if (count == 0) {
827 curr_map = map; 816 curr_map = initial_map;
828 goto add_symbol; 817 goto add_symbol;
829 } 818 }
830 819
@@ -843,7 +832,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
843 832
844 ndso->kernel = dso->kernel; 833 ndso->kernel = dso->kernel;
845 834
846 curr_map = map__new2(pos->start, ndso, map->type); 835 curr_map = map__new2(pos->start, ndso);
847 if (curr_map == NULL) { 836 if (curr_map == NULL) {
848 dso__put(ndso); 837 dso__put(ndso);
849 return -1; 838 return -1;
@@ -858,9 +847,9 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
858 pos->end -= delta; 847 pos->end -= delta;
859 } 848 }
860add_symbol: 849add_symbol:
861 if (curr_map != map) { 850 if (curr_map != initial_map) {
862 rb_erase(&pos->rb_node, root); 851 rb_erase(&pos->rb_node, root);
863 symbols__insert(&curr_map->dso->symbols[curr_map->type], pos); 852 symbols__insert(&curr_map->dso->symbols, pos);
864 ++moved; 853 ++moved;
865 } else 854 } else
866 ++count; 855 ++count;
@@ -871,10 +860,10 @@ discard_symbol:
871 symbol__delete(pos); 860 symbol__delete(pos);
872 } 861 }
873 862
874 if (curr_map != map && 863 if (curr_map != initial_map &&
875 dso->kernel == DSO_TYPE_GUEST_KERNEL && 864 dso->kernel == DSO_TYPE_GUEST_KERNEL &&
876 machine__is_default_guest(kmaps->machine)) { 865 machine__is_default_guest(kmaps->machine)) {
877 dso__set_loaded(curr_map->dso, curr_map->type); 866 dso__set_loaded(curr_map->dso);
878 } 867 }
879 868
880 return count + moved; 869 return count + moved;
@@ -1035,7 +1024,12 @@ out_delete_from:
1035 return ret; 1024 return ret;
1036} 1025}
1037 1026
1038static int do_validate_kcore_modules(const char *filename, struct map *map, 1027struct map *map_groups__first(struct map_groups *mg)
1028{
1029 return maps__first(&mg->maps);
1030}
1031
1032static int do_validate_kcore_modules(const char *filename,
1039 struct map_groups *kmaps) 1033 struct map_groups *kmaps)
1040{ 1034{
1041 struct rb_root modules = RB_ROOT; 1035 struct rb_root modules = RB_ROOT;
@@ -1046,13 +1040,12 @@ static int do_validate_kcore_modules(const char *filename, struct map *map,
1046 if (err) 1040 if (err)
1047 return err; 1041 return err;
1048 1042
1049 old_map = map_groups__first(kmaps, map->type); 1043 old_map = map_groups__first(kmaps);
1050 while (old_map) { 1044 while (old_map) {
1051 struct map *next = map_groups__next(old_map); 1045 struct map *next = map_groups__next(old_map);
1052 struct module_info *mi; 1046 struct module_info *mi;
1053 1047
1054 if (old_map == map || old_map->start == map->start) { 1048 if (!__map__is_kmodule(old_map)) {
1055 /* The kernel map */
1056 old_map = next; 1049 old_map = next;
1057 continue; 1050 continue;
1058 } 1051 }
@@ -1109,7 +1102,7 @@ static int validate_kcore_modules(const char *kallsyms_filename,
1109 kallsyms_filename)) 1102 kallsyms_filename))
1110 return -EINVAL; 1103 return -EINVAL;
1111 1104
1112 if (do_validate_kcore_modules(modules_filename, map, kmaps)) 1105 if (do_validate_kcore_modules(modules_filename, kmaps))
1113 return -EINVAL; 1106 return -EINVAL;
1114 1107
1115 return 0; 1108 return 0;
@@ -1138,7 +1131,6 @@ static int validate_kcore_addresses(const char *kallsyms_filename,
1138 1131
1139struct kcore_mapfn_data { 1132struct kcore_mapfn_data {
1140 struct dso *dso; 1133 struct dso *dso;
1141 enum map_type type;
1142 struct list_head maps; 1134 struct list_head maps;
1143}; 1135};
1144 1136
@@ -1147,7 +1139,7 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
1147 struct kcore_mapfn_data *md = data; 1139 struct kcore_mapfn_data *md = data;
1148 struct map *map; 1140 struct map *map;
1149 1141
1150 map = map__new2(start, md->dso, md->type); 1142 map = map__new2(start, md->dso);
1151 if (map == NULL) 1143 if (map == NULL)
1152 return -ENOMEM; 1144 return -ENOMEM;
1153 1145
@@ -1163,13 +1155,13 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1163 const char *kallsyms_filename) 1155 const char *kallsyms_filename)
1164{ 1156{
1165 struct map_groups *kmaps = map__kmaps(map); 1157 struct map_groups *kmaps = map__kmaps(map);
1166 struct machine *machine;
1167 struct kcore_mapfn_data md; 1158 struct kcore_mapfn_data md;
1168 struct map *old_map, *new_map, *replacement_map = NULL; 1159 struct map *old_map, *new_map, *replacement_map = NULL;
1160 struct machine *machine;
1169 bool is_64_bit; 1161 bool is_64_bit;
1170 int err, fd; 1162 int err, fd;
1171 char kcore_filename[PATH_MAX]; 1163 char kcore_filename[PATH_MAX];
1172 struct symbol *sym; 1164 u64 stext;
1173 1165
1174 if (!kmaps) 1166 if (!kmaps)
1175 return -EINVAL; 1167 return -EINVAL;
@@ -1177,7 +1169,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1177 machine = kmaps->machine; 1169 machine = kmaps->machine;
1178 1170
1179 /* This function requires that the map is the kernel map */ 1171 /* This function requires that the map is the kernel map */
1180 if (map != machine->vmlinux_maps[map->type]) 1172 if (!__map__is_kernel(map))
1181 return -EINVAL; 1173 return -EINVAL;
1182 1174
1183 if (!filename_from_kallsyms_filename(kcore_filename, "kcore", 1175 if (!filename_from_kallsyms_filename(kcore_filename, "kcore",
@@ -1189,7 +1181,6 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1189 return -EINVAL; 1181 return -EINVAL;
1190 1182
1191 md.dso = dso; 1183 md.dso = dso;
1192 md.type = map->type;
1193 INIT_LIST_HEAD(&md.maps); 1184 INIT_LIST_HEAD(&md.maps);
1194 1185
1195 fd = open(kcore_filename, O_RDONLY); 1186 fd = open(kcore_filename, O_RDONLY);
@@ -1200,7 +1191,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1200 } 1191 }
1201 1192
1202 /* Read new maps into temporary lists */ 1193 /* Read new maps into temporary lists */
1203 err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md, 1194 err = file__read_maps(fd, map->prot & PROT_EXEC, kcore_mapfn, &md,
1204 &is_64_bit); 1195 &is_64_bit);
1205 if (err) 1196 if (err)
1206 goto out_err; 1197 goto out_err;
@@ -1212,7 +1203,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1212 } 1203 }
1213 1204
1214 /* Remove old maps */ 1205 /* Remove old maps */
1215 old_map = map_groups__first(kmaps, map->type); 1206 old_map = map_groups__first(kmaps);
1216 while (old_map) { 1207 while (old_map) {
1217 struct map *next = map_groups__next(old_map); 1208 struct map *next = map_groups__next(old_map);
1218 1209
@@ -1220,14 +1211,15 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1220 map_groups__remove(kmaps, old_map); 1211 map_groups__remove(kmaps, old_map);
1221 old_map = next; 1212 old_map = next;
1222 } 1213 }
1214 machine->trampolines_mapped = false;
1223 1215
1224 /* Find the kernel map using the first symbol */ 1216 /* Find the kernel map using the '_stext' symbol */
1225 sym = dso__first_symbol(dso, map->type); 1217 if (!kallsyms__get_function_start(kallsyms_filename, "_stext", &stext)) {
1226 list_for_each_entry(new_map, &md.maps, node) { 1218 list_for_each_entry(new_map, &md.maps, node) {
1227 if (sym && sym->start >= new_map->start && 1219 if (stext >= new_map->start && stext < new_map->end) {
1228 sym->start < new_map->end) { 1220 replacement_map = new_map;
1229 replacement_map = new_map; 1221 break;
1230 break; 1222 }
1231 } 1223 }
1232 } 1224 }
1233 1225
@@ -1256,6 +1248,19 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1256 map__put(new_map); 1248 map__put(new_map);
1257 } 1249 }
1258 1250
1251 if (machine__is(machine, "x86_64")) {
1252 u64 addr;
1253
1254 /*
1255 * If one of the corresponding symbols is there, assume the
1256 * entry trampoline maps are too.
1257 */
1258 if (!kallsyms__get_function_start(kallsyms_filename,
1259 ENTRY_TRAMPOLINE_NAME,
1260 &addr))
1261 machine->trampolines_mapped = true;
1262 }
1263
1259 /* 1264 /*
1260 * Set the data type and long name so that kcore can be read via 1265 * Set the data type and long name so that kcore can be read via
1261 * dso__data_read_addr(). 1266 * dso__data_read_addr().
@@ -1268,7 +1273,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1268 1273
1269 close(fd); 1274 close(fd);
1270 1275
1271 if (map->type == MAP__FUNCTION) 1276 if (map->prot & PROT_EXEC)
1272 pr_debug("Using %s for kernel object code\n", kcore_filename); 1277 pr_debug("Using %s for kernel object code\n", kcore_filename);
1273 else 1278 else
1274 pr_debug("Using %s for kernel data\n", kcore_filename); 1279 pr_debug("Using %s for kernel data\n", kcore_filename);
@@ -1289,14 +1294,10 @@ out_err:
1289 * If the kernel is relocated at boot time, kallsyms won't match. Compute the 1294 * If the kernel is relocated at boot time, kallsyms won't match. Compute the
1290 * delta based on the relocation reference symbol. 1295 * delta based on the relocation reference symbol.
1291 */ 1296 */
1292static int kallsyms__delta(struct map *map, const char *filename, u64 *delta) 1297static int kallsyms__delta(struct kmap *kmap, const char *filename, u64 *delta)
1293{ 1298{
1294 struct kmap *kmap = map__kmap(map);
1295 u64 addr; 1299 u64 addr;
1296 1300
1297 if (!kmap)
1298 return -1;
1299
1300 if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name) 1301 if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name)
1301 return 0; 1302 return 0;
1302 1303
@@ -1310,19 +1311,23 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta)
1310int __dso__load_kallsyms(struct dso *dso, const char *filename, 1311int __dso__load_kallsyms(struct dso *dso, const char *filename,
1311 struct map *map, bool no_kcore) 1312 struct map *map, bool no_kcore)
1312{ 1313{
1314 struct kmap *kmap = map__kmap(map);
1313 u64 delta = 0; 1315 u64 delta = 0;
1314 1316
1315 if (symbol__restricted_filename(filename, "/proc/kallsyms")) 1317 if (symbol__restricted_filename(filename, "/proc/kallsyms"))
1316 return -1; 1318 return -1;
1317 1319
1318 if (dso__load_all_kallsyms(dso, filename, map) < 0) 1320 if (!kmap || !kmap->kmaps)
1319 return -1; 1321 return -1;
1320 1322
1321 if (kallsyms__delta(map, filename, &delta)) 1323 if (dso__load_all_kallsyms(dso, filename) < 0)
1322 return -1; 1324 return -1;
1323 1325
1324 symbols__fixup_end(&dso->symbols[map->type]); 1326 if (kallsyms__delta(kmap, filename, &delta))
1325 symbols__fixup_duplicate(&dso->symbols[map->type]); 1327 return -1;
1328
1329 symbols__fixup_end(&dso->symbols);
1330 symbols__fixup_duplicate(&dso->symbols);
1326 1331
1327 if (dso->kernel == DSO_TYPE_GUEST_KERNEL) 1332 if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
1328 dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS; 1333 dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
@@ -1330,9 +1335,9 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
1330 dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS; 1335 dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
1331 1336
1332 if (!no_kcore && !dso__load_kcore(dso, map, filename)) 1337 if (!no_kcore && !dso__load_kcore(dso, map, filename))
1333 return dso__split_kallsyms_for_kcore(dso, map); 1338 return map_groups__split_kallsyms_for_kcore(kmap->kmaps, dso);
1334 else 1339 else
1335 return dso__split_kallsyms(dso, map, delta); 1340 return map_groups__split_kallsyms(kmap->kmaps, dso, delta, map);
1336} 1341}
1337 1342
1338int dso__load_kallsyms(struct dso *dso, const char *filename, 1343int dso__load_kallsyms(struct dso *dso, const char *filename,
@@ -1341,8 +1346,7 @@ int dso__load_kallsyms(struct dso *dso, const char *filename,
1341 return __dso__load_kallsyms(dso, filename, map, false); 1346 return __dso__load_kallsyms(dso, filename, map, false);
1342} 1347}
1343 1348
1344static int dso__load_perf_map(const char *map_path, struct dso *dso, 1349static int dso__load_perf_map(const char *map_path, struct dso *dso)
1345 struct map *map)
1346{ 1350{
1347 char *line = NULL; 1351 char *line = NULL;
1348 size_t n; 1352 size_t n;
@@ -1379,12 +1383,12 @@ static int dso__load_perf_map(const char *map_path, struct dso *dso,
1379 if (len + 2 >= line_len) 1383 if (len + 2 >= line_len)
1380 continue; 1384 continue;
1381 1385
1382 sym = symbol__new(start, size, STB_GLOBAL, line + len); 1386 sym = symbol__new(start, size, STB_GLOBAL, STT_FUNC, line + len);
1383 1387
1384 if (sym == NULL) 1388 if (sym == NULL)
1385 goto out_delete_line; 1389 goto out_delete_line;
1386 1390
1387 symbols__insert(&dso->symbols[map->type], sym); 1391 symbols__insert(&dso->symbols, sym);
1388 nr_syms++; 1392 nr_syms++;
1389 } 1393 }
1390 1394
@@ -1509,25 +1513,27 @@ int dso__load(struct dso *dso, struct map *map)
1509 pthread_mutex_lock(&dso->lock); 1513 pthread_mutex_lock(&dso->lock);
1510 1514
1511 /* check again under the dso->lock */ 1515 /* check again under the dso->lock */
1512 if (dso__loaded(dso, map->type)) { 1516 if (dso__loaded(dso)) {
1513 ret = 1; 1517 ret = 1;
1514 goto out; 1518 goto out;
1515 } 1519 }
1516 1520
1521 if (map->groups && map->groups->machine)
1522 machine = map->groups->machine;
1523 else
1524 machine = NULL;
1525
1517 if (dso->kernel) { 1526 if (dso->kernel) {
1518 if (dso->kernel == DSO_TYPE_KERNEL) 1527 if (dso->kernel == DSO_TYPE_KERNEL)
1519 ret = dso__load_kernel_sym(dso, map); 1528 ret = dso__load_kernel_sym(dso, map);
1520 else if (dso->kernel == DSO_TYPE_GUEST_KERNEL) 1529 else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
1521 ret = dso__load_guest_kernel_sym(dso, map); 1530 ret = dso__load_guest_kernel_sym(dso, map);
1522 1531
1532 if (machine__is(machine, "x86_64"))
1533 machine__map_x86_64_entry_trampolines(machine, dso);
1523 goto out; 1534 goto out;
1524 } 1535 }
1525 1536
1526 if (map->groups && map->groups->machine)
1527 machine = map->groups->machine;
1528 else
1529 machine = NULL;
1530
1531 dso->adjust_symbols = 0; 1537 dso->adjust_symbols = 0;
1532 1538
1533 if (perfmap) { 1539 if (perfmap) {
@@ -1542,7 +1548,7 @@ int dso__load(struct dso *dso, struct map *map)
1542 goto out; 1548 goto out;
1543 } 1549 }
1544 1550
1545 ret = dso__load_perf_map(map_path, dso, map); 1551 ret = dso__load_perf_map(map_path, dso);
1546 dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT : 1552 dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
1547 DSO_BINARY_TYPE__NOT_FOUND; 1553 DSO_BINARY_TYPE__NOT_FOUND;
1548 goto out; 1554 goto out;
@@ -1651,7 +1657,7 @@ int dso__load(struct dso *dso, struct map *map)
1651 if (ret > 0) { 1657 if (ret > 0) {
1652 int nr_plt; 1658 int nr_plt;
1653 1659
1654 nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map); 1660 nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss);
1655 if (nr_plt > 0) 1661 if (nr_plt > 0)
1656 ret += nr_plt; 1662 ret += nr_plt;
1657 } 1663 }
@@ -1663,17 +1669,16 @@ out_free:
1663 if (ret < 0 && strstr(dso->name, " (deleted)") != NULL) 1669 if (ret < 0 && strstr(dso->name, " (deleted)") != NULL)
1664 ret = 0; 1670 ret = 0;
1665out: 1671out:
1666 dso__set_loaded(dso, map->type); 1672 dso__set_loaded(dso);
1667 pthread_mutex_unlock(&dso->lock); 1673 pthread_mutex_unlock(&dso->lock);
1668 nsinfo__mountns_exit(&nsc); 1674 nsinfo__mountns_exit(&nsc);
1669 1675
1670 return ret; 1676 return ret;
1671} 1677}
1672 1678
1673struct map *map_groups__find_by_name(struct map_groups *mg, 1679struct map *map_groups__find_by_name(struct map_groups *mg, const char *name)
1674 enum map_type type, const char *name)
1675{ 1680{
1676 struct maps *maps = &mg->maps[type]; 1681 struct maps *maps = &mg->maps;
1677 struct map *map; 1682 struct map *map;
1678 1683
1679 down_read(&maps->lock); 1684 down_read(&maps->lock);
@@ -1720,7 +1725,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
1720 else 1725 else
1721 dso->binary_type = DSO_BINARY_TYPE__VMLINUX; 1726 dso->binary_type = DSO_BINARY_TYPE__VMLINUX;
1722 dso__set_long_name(dso, vmlinux, vmlinux_allocated); 1727 dso__set_long_name(dso, vmlinux, vmlinux_allocated);
1723 dso__set_loaded(dso, map->type); 1728 dso__set_loaded(dso);
1724 pr_debug("Using %s for symbols\n", symfs_vmlinux); 1729 pr_debug("Using %s for symbols\n", symfs_vmlinux);
1725 } 1730 }
1726 1731
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 70c16741f50a..f25fae4b5743 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -57,7 +57,8 @@ struct symbol {
57 u64 start; 57 u64 start;
58 u64 end; 58 u64 end;
59 u16 namelen; 59 u16 namelen;
60 u8 binding; 60 u8 type:4;
61 u8 binding:4;
61 u8 idle:1; 62 u8 idle:1;
62 u8 ignore:1; 63 u8 ignore:1;
63 u8 inlined:1; 64 u8 inlined:1;
@@ -89,7 +90,6 @@ struct intlist;
89 90
90struct symbol_conf { 91struct symbol_conf {
91 unsigned short priv_size; 92 unsigned short priv_size;
92 unsigned short nr_events;
93 bool try_vmlinux_path, 93 bool try_vmlinux_path,
94 init_annotation, 94 init_annotation,
95 force, 95 force,
@@ -108,8 +108,6 @@ struct symbol_conf {
108 show_cpu_utilization, 108 show_cpu_utilization,
109 initialized, 109 initialized,
110 kptr_restrict, 110 kptr_restrict,
111 annotate_asm_raw,
112 annotate_src,
113 event_group, 111 event_group,
114 demangle, 112 demangle,
115 demangle_kernel, 113 demangle_kernel,
@@ -259,17 +257,16 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
259 bool no_kcore); 257 bool no_kcore);
260int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map); 258int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map);
261 259
262void dso__insert_symbol(struct dso *dso, enum map_type type, 260void dso__insert_symbol(struct dso *dso,
263 struct symbol *sym); 261 struct symbol *sym);
264 262
265struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, 263struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
266 u64 addr); 264struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name);
267struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, 265
268 const char *name);
269struct symbol *symbol__next_by_name(struct symbol *sym); 266struct symbol *symbol__next_by_name(struct symbol *sym);
270 267
271struct symbol *dso__first_symbol(struct dso *dso, enum map_type type); 268struct symbol *dso__first_symbol(struct dso *dso);
272struct symbol *dso__last_symbol(struct dso *dso, enum map_type type); 269struct symbol *dso__last_symbol(struct dso *dso);
273struct symbol *dso__next_symbol(struct symbol *sym); 270struct symbol *dso__next_symbol(struct symbol *sym);
274 271
275enum dso_type dso__type_fd(int fd); 272enum dso_type dso__type_fd(int fd);
@@ -288,7 +285,7 @@ void symbol__exit(void);
288void symbol__elf_init(void); 285void symbol__elf_init(void);
289int symbol__annotation_init(void); 286int symbol__annotation_init(void);
290 287
291struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); 288struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name);
292size_t __symbol__fprintf_symname_offs(const struct symbol *sym, 289size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
293 const struct addr_location *al, 290 const struct addr_location *al,
294 bool unknown_as_addr, 291 bool unknown_as_addr,
@@ -300,7 +297,6 @@ size_t __symbol__fprintf_symname(const struct symbol *sym,
300 bool unknown_as_addr, FILE *fp); 297 bool unknown_as_addr, FILE *fp);
301size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); 298size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
302size_t symbol__fprintf(struct symbol *sym, FILE *fp); 299size_t symbol__fprintf(struct symbol *sym, FILE *fp);
303bool symbol_type__is_a(char symbol_type, enum map_type map_type);
304bool symbol__restricted_filename(const char *filename, 300bool symbol__restricted_filename(const char *filename,
305 const char *restricted_filename); 301 const char *restricted_filename);
306int symbol__config_symfs(const struct option *opt __maybe_unused, 302int symbol__config_symfs(const struct option *opt __maybe_unused,
@@ -308,8 +304,7 @@ int symbol__config_symfs(const struct option *opt __maybe_unused,
308 304
309int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, 305int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
310 struct symsrc *runtime_ss, int kmodule); 306 struct symsrc *runtime_ss, int kmodule);
311int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, 307int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
312 struct map *map);
313 308
314char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name); 309char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
315 310
@@ -317,7 +312,7 @@ void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel)
317void symbols__insert(struct rb_root *symbols, struct symbol *sym); 312void symbols__insert(struct rb_root *symbols, struct symbol *sym);
318void symbols__fixup_duplicate(struct rb_root *symbols); 313void symbols__fixup_duplicate(struct rb_root *symbols);
319void symbols__fixup_end(struct rb_root *symbols); 314void symbols__fixup_end(struct rb_root *symbols);
320void __map_groups__fixup_end(struct map_groups *mg, enum map_type type); 315void map_groups__fixup_end(struct map_groups *mg);
321 316
322typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); 317typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
323int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, 318int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
index 6dd2cb88ccbe..ed0205cc7942 100644
--- a/tools/perf/util/symbol_fprintf.c
+++ b/tools/perf/util/symbol_fprintf.c
@@ -58,13 +58,13 @@ size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
58} 58}
59 59
60size_t dso__fprintf_symbols_by_name(struct dso *dso, 60size_t dso__fprintf_symbols_by_name(struct dso *dso,
61 enum map_type type, FILE *fp) 61 FILE *fp)
62{ 62{
63 size_t ret = 0; 63 size_t ret = 0;
64 struct rb_node *nd; 64 struct rb_node *nd;
65 struct symbol_name_rb_node *pos; 65 struct symbol_name_rb_node *pos;
66 66
67 for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) { 67 for (nd = rb_first(&dso->symbol_names); nd; nd = rb_next(nd)) {
68 pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); 68 pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
69 fprintf(fp, "%s\n", pos->sym.name); 69 fprintf(fp, "%s\n", pos->sym.name);
70 } 70 }
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 68b65b10579b..2048d393ece6 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -302,23 +302,20 @@ int thread__insert_map(struct thread *thread, struct map *map)
302static int __thread__prepare_access(struct thread *thread) 302static int __thread__prepare_access(struct thread *thread)
303{ 303{
304 bool initialized = false; 304 bool initialized = false;
305 int i, err = 0; 305 int err = 0;
306 306 struct maps *maps = &thread->mg->maps;
307 for (i = 0; i < MAP__NR_TYPES; ++i) { 307 struct map *map;
308 struct maps *maps = &thread->mg->maps[i];
309 struct map *map;
310 308
311 down_read(&maps->lock); 309 down_read(&maps->lock);
312 310
313 for (map = maps__first(maps); map; map = map__next(map)) { 311 for (map = maps__first(maps); map; map = map__next(map)) {
314 err = unwind__prepare_access(thread, map, &initialized); 312 err = unwind__prepare_access(thread, map, &initialized);
315 if (err || initialized) 313 if (err || initialized)
316 break; 314 break;
317 }
318
319 up_read(&maps->lock);
320 } 315 }
321 316
317 up_read(&maps->lock);
318
322 return err; 319 return err;
323} 320}
324 321
@@ -335,8 +332,6 @@ static int thread__prepare_access(struct thread *thread)
335static int thread__clone_map_groups(struct thread *thread, 332static int thread__clone_map_groups(struct thread *thread,
336 struct thread *parent) 333 struct thread *parent)
337{ 334{
338 int i;
339
340 /* This is new thread, we share map groups for process. */ 335 /* This is new thread, we share map groups for process. */
341 if (thread->pid_ == parent->pid_) 336 if (thread->pid_ == parent->pid_)
342 return thread__prepare_access(thread); 337 return thread__prepare_access(thread);
@@ -348,9 +343,8 @@ static int thread__clone_map_groups(struct thread *thread,
348 } 343 }
349 344
350 /* But this one is new process, copy maps. */ 345 /* But this one is new process, copy maps. */
351 for (i = 0; i < MAP__NR_TYPES; ++i) 346 if (map_groups__clone(thread, parent->mg) < 0)
352 if (map_groups__clone(thread, parent->mg, i) < 0) 347 return -ENOMEM;
353 return -ENOMEM;
354 348
355 return 0; 349 return 0;
356} 350}
@@ -371,8 +365,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
371 return thread__clone_map_groups(thread, parent); 365 return thread__clone_map_groups(thread, parent);
372} 366}
373 367
374void thread__find_cpumode_addr_location(struct thread *thread, 368void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
375 enum map_type type, u64 addr,
376 struct addr_location *al) 369 struct addr_location *al)
377{ 370{
378 size_t i; 371 size_t i;
@@ -384,7 +377,7 @@ void thread__find_cpumode_addr_location(struct thread *thread,
384 }; 377 };
385 378
386 for (i = 0; i < ARRAY_SIZE(cpumodes); i++) { 379 for (i = 0; i < ARRAY_SIZE(cpumodes); i++) {
387 thread__find_addr_location(thread, cpumodes[i], type, addr, al); 380 thread__find_symbol(thread, cpumodes[i], addr, al);
388 if (al->map) 381 if (al->map)
389 break; 382 break;
390 } 383 }
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 14d44c3235b8..07606aa6998d 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -92,16 +92,13 @@ size_t thread__fprintf(struct thread *thread, FILE *fp);
92 92
93struct thread *thread__main_thread(struct machine *machine, struct thread *thread); 93struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
94 94
95void thread__find_addr_map(struct thread *thread, 95struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
96 u8 cpumode, enum map_type type, u64 addr, 96 struct addr_location *al);
97 struct addr_location *al);
98 97
99void thread__find_addr_location(struct thread *thread, 98struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
100 u8 cpumode, enum map_type type, u64 addr, 99 u64 addr, struct addr_location *al);
101 struct addr_location *al);
102 100
103void thread__find_cpumode_addr_location(struct thread *thread, 101void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
104 enum map_type type, u64 addr,
105 struct addr_location *al); 102 struct addr_location *al);
106 103
107static inline void *thread__priv(struct thread *thread) 104static inline void *thread__priv(struct thread *thread)
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index 9892323cdd7c..9add1f72ce95 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -3,6 +3,7 @@
3#define __PERF_TOP_H 1 3#define __PERF_TOP_H 1
4 4
5#include "tool.h" 5#include "tool.h"
6#include "annotate.h"
6#include <linux/types.h> 7#include <linux/types.h>
7#include <stddef.h> 8#include <stddef.h>
8#include <stdbool.h> 9#include <stdbool.h>
@@ -16,6 +17,7 @@ struct perf_top {
16 struct perf_tool tool; 17 struct perf_tool tool;
17 struct perf_evlist *evlist; 18 struct perf_evlist *evlist;
18 struct record_opts record_opts; 19 struct record_opts record_opts;
20 struct annotation_options annotation_opts;
19 /* 21 /*
20 * Symbols will be added here in perf_event__process_sample and will 22 * Symbols will be added here in perf_event__process_sample and will
21 * get out after decayed. 23 * get out after decayed.
@@ -35,7 +37,6 @@ struct perf_top {
35 struct perf_session *session; 37 struct perf_session *session;
36 struct winsize winsize; 38 struct winsize winsize;
37 int realtime_prio; 39 int realtime_prio;
38 int sym_pcnt_filter;
39 const char *sym_filter; 40 const char *sym_filter;
40 float min_percent; 41 float min_percent;
41 unsigned int nr_threads_synthesize; 42 unsigned int nr_threads_synthesize;
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index d7f2113462fb..c85d0d1a65ed 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -103,11 +103,10 @@ out:
103 103
104static int record_header_files(void) 104static int record_header_files(void)
105{ 105{
106 char *path; 106 char *path = get_events_file("header_page");
107 struct stat st; 107 struct stat st;
108 int err = -EIO; 108 int err = -EIO;
109 109
110 path = get_tracing_file("events/header_page");
111 if (!path) { 110 if (!path) {
112 pr_debug("can't get tracing/events/header_page"); 111 pr_debug("can't get tracing/events/header_page");
113 return -ENOMEM; 112 return -ENOMEM;
@@ -128,9 +127,9 @@ static int record_header_files(void)
128 goto out; 127 goto out;
129 } 128 }
130 129
131 put_tracing_file(path); 130 put_events_file(path);
132 131
133 path = get_tracing_file("events/header_event"); 132 path = get_events_file("header_event");
134 if (!path) { 133 if (!path) {
135 pr_debug("can't get tracing/events/header_event"); 134 pr_debug("can't get tracing/events/header_event");
136 err = -ENOMEM; 135 err = -ENOMEM;
@@ -154,7 +153,7 @@ static int record_header_files(void)
154 153
155 err = 0; 154 err = 0;
156out: 155out:
157 put_tracing_file(path); 156 put_events_file(path);
158 return err; 157 return err;
159} 158}
160 159
@@ -243,7 +242,7 @@ static int record_ftrace_files(struct tracepoint_path *tps)
243 char *path; 242 char *path;
244 int ret; 243 int ret;
245 244
246 path = get_tracing_file("events/ftrace"); 245 path = get_events_file("ftrace");
247 if (!path) { 246 if (!path) {
248 pr_debug("can't get tracing/events/ftrace"); 247 pr_debug("can't get tracing/events/ftrace");
249 return -ENOMEM; 248 return -ENOMEM;
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index 16a776371d03..1aa368603268 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c
@@ -75,6 +75,7 @@ void trace_event__cleanup(struct trace_event *t)
75static struct event_format* 75static struct event_format*
76tp_format(const char *sys, const char *name) 76tp_format(const char *sys, const char *name)
77{ 77{
78 char *tp_dir = get_events_file(sys);
78 struct pevent *pevent = tevent.pevent; 79 struct pevent *pevent = tevent.pevent;
79 struct event_format *event = NULL; 80 struct event_format *event = NULL;
80 char path[PATH_MAX]; 81 char path[PATH_MAX];
@@ -82,8 +83,11 @@ tp_format(const char *sys, const char *name)
82 char *data; 83 char *data;
83 int err; 84 int err;
84 85
85 scnprintf(path, PATH_MAX, "%s/%s/%s/format", 86 if (!tp_dir)
86 tracing_events_path, sys, name); 87 return ERR_PTR(-errno);
88
89 scnprintf(path, PATH_MAX, "%s/%s/format", tp_dir, name);
90 put_events_file(tp_dir);
87 91
88 err = filename__read_str(path, &data, &size); 92 err = filename__read_str(path, &data, &size);
89 if (err) 93 if (err)
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 7bdd239c795c..538db4e5d1e6 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -28,10 +28,11 @@ static int __report_module(struct addr_location *al, u64 ip,
28{ 28{
29 Dwfl_Module *mod; 29 Dwfl_Module *mod;
30 struct dso *dso = NULL; 30 struct dso *dso = NULL;
31 31 /*
32 thread__find_addr_location(ui->thread, 32 * Some callers will use al->sym, so we can't just use the
33 PERF_RECORD_MISC_USER, 33 * cheaper thread__find_map() here.
34 MAP__FUNCTION, ip, al); 34 */
35 thread__find_symbol(ui->thread, PERF_RECORD_MISC_USER, ip, al);
35 36
36 if (al->map) 37 if (al->map)
37 dso = al->map->dso; 38 dso = al->map->dso;
@@ -103,19 +104,7 @@ static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr,
103 struct addr_location al; 104 struct addr_location al;
104 ssize_t size; 105 ssize_t size;
105 106
106 thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, 107 if (!thread__find_map(ui->thread, PERF_RECORD_MISC_USER, addr, &al)) {
107 MAP__FUNCTION, addr, &al);
108 if (!al.map) {
109 /*
110 * We've seen cases (softice) where DWARF unwinder went
111 * through non executable mmaps, which we need to lookup
112 * in MAP__VARIABLE tree.
113 */
114 thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
115 MAP__VARIABLE, addr, &al);
116 }
117
118 if (!al.map) {
119 pr_debug("unwind: no map for %lx\n", (unsigned long)addr); 108 pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
120 return -1; 109 return -1;
121 } 110 }
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index af873044d33a..6a11bc7e6b27 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -366,19 +366,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
366static struct map *find_map(unw_word_t ip, struct unwind_info *ui) 366static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
367{ 367{
368 struct addr_location al; 368 struct addr_location al;
369 369 return thread__find_map(ui->thread, PERF_RECORD_MISC_USER, ip, &al);
370 thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
371 MAP__FUNCTION, ip, &al);
372 if (!al.map) {
373 /*
374 * We've seen cases (softice) where DWARF unwinder went
375 * through non executable mmaps, which we need to lookup
376 * in MAP__VARIABLE tree.
377 */
378 thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
379 MAP__VARIABLE, ip, &al);
380 }
381 return al.map;
382} 370}
383 371
384static int 372static int
@@ -586,12 +574,9 @@ static int entry(u64 ip, struct thread *thread,
586 struct unwind_entry e; 574 struct unwind_entry e;
587 struct addr_location al; 575 struct addr_location al;
588 576
589 thread__find_addr_location(thread, PERF_RECORD_MISC_USER, 577 e.sym = thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
590 MAP__FUNCTION, ip, &al);
591
592 e.ip = al.addr; 578 e.ip = al.addr;
593 e.map = al.map; 579 e.map = al.map;
594 e.sym = al.sym;
595 580
596 pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", 581 pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
597 al.sym ? al.sym->name : "''", 582 al.sym ? al.sym->name : "''",
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 1019bbc5dbd8..eac5b858a371 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -38,11 +38,43 @@ void perf_set_multithreaded(void)
38} 38}
39 39
40unsigned int page_size; 40unsigned int page_size;
41int cacheline_size; 41
42#ifdef _SC_LEVEL1_DCACHE_LINESIZE
43#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE)
44#else
45static void cache_line_size(int *cacheline_sizep)
46{
47 if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
48 pr_debug("cannot determine cache line size");
49}
50#endif
51
52int cacheline_size(void)
53{
54 static int size;
55
56 if (!size)
57 cache_line_size(&size);
58
59 return size;
60}
42 61
43int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH; 62int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
44int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK; 63int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK;
45 64
65int sysctl__max_stack(void)
66{
67 int value;
68
69 if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
70 sysctl_perf_event_max_stack = value;
71
72 if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0)
73 sysctl_perf_event_max_contexts_per_stack = value;
74
75 return sysctl_perf_event_max_stack;
76}
77
46bool test_attr__enabled; 78bool test_attr__enabled;
47 79
48bool perf_host = true; 80bool perf_host = true;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index c9626c206208..dc58254a2b69 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -43,7 +43,9 @@ size_t hex_width(u64 v);
43int hex2u64(const char *ptr, u64 *val); 43int hex2u64(const char *ptr, u64 *val);
44 44
45extern unsigned int page_size; 45extern unsigned int page_size;
46extern int cacheline_size; 46int __pure cacheline_size(void);
47
48int sysctl__max_stack(void);
47 49
48int fetch_kernel_version(unsigned int *puint, 50int fetch_kernel_version(unsigned int *puint,
49 char *str, size_t str_sz); 51 char *str, size_t str_sz);
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 0acb1ec0e2f0..741af209b19d 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -139,12 +139,10 @@ static enum dso_type machine__thread_dso_type(struct machine *machine,
139 struct thread *thread) 139 struct thread *thread)
140{ 140{
141 enum dso_type dso_type = DSO__TYPE_UNKNOWN; 141 enum dso_type dso_type = DSO__TYPE_UNKNOWN;
142 struct map *map; 142 struct map *map = map_groups__first(thread->mg);
143 struct dso *dso;
144 143
145 map = map_groups__first(thread->mg, MAP__FUNCTION);
146 for (; map ; map = map_groups__next(map)) { 144 for (; map ; map = map_groups__next(map)) {
147 dso = map->dso; 145 struct dso *dso = map->dso;
148 if (!dso || dso->long_name[0] != '/') 146 if (!dso || dso->long_name[0] != '/')
149 continue; 147 continue;
150 dso_type = dso__type(dso, machine); 148 dso_type = dso__type(dso, machine);
diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config
index 2cccbba64418..f304be71c278 100644
--- a/tools/power/acpi/Makefile.config
+++ b/tools/power/acpi/Makefile.config
@@ -56,6 +56,7 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM}
56# to compile vs uClibc, that can be done here as well. 56# to compile vs uClibc, that can be done here as well.
57CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- 57CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
58CROSS_COMPILE ?= $(CROSS) 58CROSS_COMPILE ?= $(CROSS)
59LD = $(CC)
59HOSTCC = gcc 60HOSTCC = gcc
60 61
61# check if compiler option is supported 62# check if compiler option is supported
diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c
index 9b65f052081f..9ba8a44ad2a7 100644
--- a/tools/power/cpupower/bench/parse.c
+++ b/tools/power/cpupower/bench/parse.c
@@ -104,7 +104,7 @@ FILE *prepare_output(const char *dirname)
104 dirname, time(NULL)); 104 dirname, time(NULL));
105 } 105 }
106 106
107 dprintf("logilename: %s\n", filename); 107 dprintf("logfilename: %s\n", filename);
108 108
109 output = fopen(filename, "w+"); 109 output = fopen(filename, "w+");
110 if (output == NULL) { 110 if (output == NULL) {
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index 5b3205f16217..5b8c4956ff9a 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -126,6 +126,20 @@ void fix_up_intel_idle_driver_name(char *tmp, int num)
126 } 126 }
127} 127}
128 128
129#ifdef __powerpc__
130void map_power_idle_state_name(char *tmp)
131{
132 if (!strncmp(tmp, "stop0_lite", CSTATE_NAME_LEN))
133 strcpy(tmp, "stop0L");
134 else if (!strncmp(tmp, "stop1_lite", CSTATE_NAME_LEN))
135 strcpy(tmp, "stop1L");
136 else if (!strncmp(tmp, "stop2_lite", CSTATE_NAME_LEN))
137 strcpy(tmp, "stop2L");
138}
139#else
140void map_power_idle_state_name(char *tmp) { }
141#endif
142
129static struct cpuidle_monitor *cpuidle_register(void) 143static struct cpuidle_monitor *cpuidle_register(void)
130{ 144{
131 int num; 145 int num;
@@ -145,6 +159,7 @@ static struct cpuidle_monitor *cpuidle_register(void)
145 if (tmp == NULL) 159 if (tmp == NULL)
146 continue; 160 continue;
147 161
162 map_power_idle_state_name(tmp);
148 fix_up_intel_idle_driver_name(tmp, num); 163 fix_up_intel_idle_driver_name(tmp, num);
149 strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); 164 strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1);
150 free(tmp); 165 free(tmp);
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index 05f953f0f0a0..051da0a7c454 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -70,36 +70,43 @@ void print_n_spaces(int n)
70 printf(" "); 70 printf(" ");
71} 71}
72 72
73/* size of s must be at least n + 1 */ 73/*s is filled with left and right spaces
74 *to make its length atleast n+1
75 */
74int fill_string_with_spaces(char *s, int n) 76int fill_string_with_spaces(char *s, int n)
75{ 77{
78 char *temp;
76 int len = strlen(s); 79 int len = strlen(s);
77 if (len > n) 80
81 if (len >= n)
78 return -1; 82 return -1;
83
84 temp = malloc(sizeof(char) * (n+1));
79 for (; len < n; len++) 85 for (; len < n; len++)
80 s[len] = ' '; 86 s[len] = ' ';
81 s[len] = '\0'; 87 s[len] = '\0';
88 snprintf(temp, n+1, " %s", s);
89 strcpy(s, temp);
90 free(temp);
82 return 0; 91 return 0;
83} 92}
84 93
94#define MAX_COL_WIDTH 6
85void print_header(int topology_depth) 95void print_header(int topology_depth)
86{ 96{
87 int unsigned mon; 97 int unsigned mon;
88 int state, need_len; 98 int state, need_len;
89 cstate_t s; 99 cstate_t s;
90 char buf[128] = ""; 100 char buf[128] = "";
91 int percent_width = 4;
92 101
93 fill_string_with_spaces(buf, topology_depth * 5 - 1); 102 fill_string_with_spaces(buf, topology_depth * 5 - 1);
94 printf("%s|", buf); 103 printf("%s|", buf);
95 104
96 for (mon = 0; mon < avail_monitors; mon++) { 105 for (mon = 0; mon < avail_monitors; mon++) {
97 need_len = monitors[mon]->hw_states_num * (percent_width + 3) 106 need_len = monitors[mon]->hw_states_num * (MAX_COL_WIDTH + 1)
98 - 1; 107 - 1;
99 if (mon != 0) { 108 if (mon != 0)
100 printf("|| "); 109 printf("||");
101 need_len--;
102 }
103 sprintf(buf, "%s", monitors[mon]->name); 110 sprintf(buf, "%s", monitors[mon]->name);
104 fill_string_with_spaces(buf, need_len); 111 fill_string_with_spaces(buf, need_len);
105 printf("%s", buf); 112 printf("%s", buf);
@@ -107,23 +114,21 @@ void print_header(int topology_depth)
107 printf("\n"); 114 printf("\n");
108 115
109 if (topology_depth > 2) 116 if (topology_depth > 2)
110 printf("PKG |"); 117 printf(" PKG|");
111 if (topology_depth > 1) 118 if (topology_depth > 1)
112 printf("CORE|"); 119 printf("CORE|");
113 if (topology_depth > 0) 120 if (topology_depth > 0)
114 printf("CPU |"); 121 printf(" CPU|");
115 122
116 for (mon = 0; mon < avail_monitors; mon++) { 123 for (mon = 0; mon < avail_monitors; mon++) {
117 if (mon != 0) 124 if (mon != 0)
118 printf("|| "); 125 printf("||");
119 else
120 printf(" ");
121 for (state = 0; state < monitors[mon]->hw_states_num; state++) { 126 for (state = 0; state < monitors[mon]->hw_states_num; state++) {
122 if (state != 0) 127 if (state != 0)
123 printf(" | "); 128 printf("|");
124 s = monitors[mon]->hw_states[state]; 129 s = monitors[mon]->hw_states[state];
125 sprintf(buf, "%s", s.name); 130 sprintf(buf, "%s", s.name);
126 fill_string_with_spaces(buf, percent_width); 131 fill_string_with_spaces(buf, MAX_COL_WIDTH);
127 printf("%s", buf); 132 printf("%s", buf);
128 } 133 }
129 printf(" "); 134 printf(" ");
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
index 9e43f3371fbc..2ae50b499e0a 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
@@ -15,7 +15,16 @@
15 15
16#define MONITORS_MAX 20 16#define MONITORS_MAX 20
17#define MONITOR_NAME_LEN 20 17#define MONITOR_NAME_LEN 20
18
19/* CSTATE_NAME_LEN is limited by header field width defined
20 * in cpupower-monitor.c. Header field width is defined to be
21 * sum of percent width and two spaces for padding.
22 */
23#ifdef __powerpc__
24#define CSTATE_NAME_LEN 7
25#else
18#define CSTATE_NAME_LEN 5 26#define CSTATE_NAME_LEN 5
27#endif
19#define CSTATE_DESC_LEN 60 28#define CSTATE_DESC_LEN 60
20 29
21int cpu_count; 30int cpu_count;
diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py
index abb4c38f029b..8ee626c0f6a5 100755
--- a/tools/power/pm-graph/bootgraph.py
+++ b/tools/power/pm-graph/bootgraph.py
@@ -1,4 +1,4 @@
1#!/usr/bin/python 1#!/usr/bin/python2
2# 2#
3# Tool for analyzing boot timing 3# Tool for analyzing boot timing
4# Copyright (c) 2013, Intel Corporation. 4# Copyright (c) 2013, Intel Corporation.
diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg
index 4f80ad7d7275..f8fcb06fd68b 100644
--- a/tools/power/pm-graph/config/custom-timeline-functions.cfg
+++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg
@@ -105,7 +105,7 @@ override-dev-timeline-functions: true
105# example: [color=#CC00CC] 105# example: [color=#CC00CC]
106# 106#
107# arglist: A list of arguments from registers/stack addresses. See URL: 107# arglist: A list of arguments from registers/stack addresses. See URL:
108# https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt 108# https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst
109# 109#
110# example: cpu=%di:s32 110# example: cpu=%di:s32
111# 111#
@@ -170,7 +170,7 @@ pm_restore_console:
170# example: [color=#CC00CC] 170# example: [color=#CC00CC]
171# 171#
172# arglist: A list of arguments from registers/stack addresses. See URL: 172# arglist: A list of arguments from registers/stack addresses. See URL:
173# https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt 173# https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst
174# 174#
175# example: port=+36(%di):s32 175# example: port=+36(%di):s32
176# 176#
diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8
index 18baaf6300c9..070be2cf7f74 100644
--- a/tools/power/pm-graph/sleepgraph.8
+++ b/tools/power/pm-graph/sleepgraph.8
@@ -168,6 +168,7 @@ Create a summary page of all tests in \fIindir\fR. Creates summary.html
168in the current folder. The output page is a table of tests with 168in the current folder. The output page is a table of tests with
169suspend and resume values sorted by suspend mode, host, and kernel. 169suspend and resume values sorted by suspend mode, host, and kernel.
170Includes test averages by mode and links to the test html files. 170Includes test averages by mode and links to the test html files.
171Use -genhtml to include tests with missing html.
171.TP 172.TP
172\fB-modes\fR 173\fB-modes\fR
173List available suspend modes. 174List available suspend modes.
@@ -179,6 +180,9 @@ with any options you intend to use to see if they will work.
179\fB-fpdt\fR 180\fB-fpdt\fR
180Print out the contents of the ACPI Firmware Performance Data Table. 181Print out the contents of the ACPI Firmware Performance Data Table.
181.TP 182.TP
183\fB-battery\fR
184Print out battery status and current charge.
185.TP
182\fB-sysinfo\fR 186\fB-sysinfo\fR
183Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode. 187Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode.
184.TP 188.TP
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 266409fb27ae..0c760478f7d7 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -1,4 +1,4 @@
1#!/usr/bin/python 1#!/usr/bin/python2
2# 2#
3# Tool for analyzing suspend/resume timing 3# Tool for analyzing suspend/resume timing
4# Copyright (c) 2013, Intel Corporation. 4# Copyright (c) 2013, Intel Corporation.
@@ -69,7 +69,7 @@ from subprocess import call, Popen, PIPE
69# store system values and test parameters 69# store system values and test parameters
70class SystemValues: 70class SystemValues:
71 title = 'SleepGraph' 71 title = 'SleepGraph'
72 version = '5.0' 72 version = '5.1'
73 ansi = False 73 ansi = False
74 rs = 0 74 rs = 0
75 display = 0 75 display = 0
@@ -240,7 +240,7 @@ class SystemValues:
240 kprobes = dict() 240 kprobes = dict()
241 timeformat = '%.3f' 241 timeformat = '%.3f'
242 cmdline = '%s %s' % \ 242 cmdline = '%s %s' % \
243 (os.path.basename(sys.argv[0]), string.join(sys.argv[1:], ' ')) 243 (os.path.basename(sys.argv[0]), ' '.join(sys.argv[1:]))
244 def __init__(self): 244 def __init__(self):
245 self.archargs = 'args_'+platform.machine() 245 self.archargs = 'args_'+platform.machine()
246 self.hostname = platform.node() 246 self.hostname = platform.node()
@@ -917,12 +917,18 @@ class Data:
917 self.devicegroups.append([phase]) 917 self.devicegroups.append([phase])
918 self.errorinfo = {'suspend':[],'resume':[]} 918 self.errorinfo = {'suspend':[],'resume':[]}
919 def extractErrorInfo(self): 919 def extractErrorInfo(self):
920 elist = {
921 'HWERROR' : '.*\[ *Hardware Error *\].*',
922 'FWBUG' : '.*\[ *Firmware Bug *\].*',
923 'BUG' : '.*BUG.*',
924 'ERROR' : '.*ERROR.*',
925 'WARNING' : '.*WARNING.*',
926 'IRQ' : '.*genirq: .*',
927 'TASKFAIL': '.*Freezing of tasks failed.*',
928 }
920 lf = sysvals.openlog(sysvals.dmesgfile, 'r') 929 lf = sysvals.openlog(sysvals.dmesgfile, 'r')
921 i = 0 930 i = 0
922 list = [] 931 list = []
923 # sl = start line, et = error time, el = error line
924 type = 'ERROR'
925 sl = et = el = -1
926 for line in lf: 932 for line in lf:
927 i += 1 933 i += 1
928 m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) 934 m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
@@ -931,43 +937,13 @@ class Data:
931 t = float(m.group('ktime')) 937 t = float(m.group('ktime'))
932 if t < self.start or t > self.end: 938 if t < self.start or t > self.end:
933 continue 939 continue
934 if t < self.tSuspended: 940 dir = 'suspend' if t < self.tSuspended else 'resume'
935 dir = 'suspend'
936 else:
937 dir = 'resume'
938 msg = m.group('msg') 941 msg = m.group('msg')
939 if re.match('-*\[ *cut here *\]-*', msg): 942 for err in elist:
940 type = 'WARNING' 943 if re.match(elist[err], msg):
941 sl = i 944 list.append((err, dir, t, i, i))
942 elif re.match('genirq: .*', msg):
943 type = 'IRQ'
944 sl = i
945 elif re.match('BUG: .*', msg) or re.match('kernel BUG .*', msg):
946 type = 'BUG'
947 sl = i
948 elif re.match('-*\[ *end trace .*\]-*', msg) or \
949 re.match('R13: .*', msg):
950 if et >= 0 and sl >= 0:
951 list.append((type, dir, et, sl, i))
952 self.kerror = True
953 sl = et = el = -1
954 type = 'ERROR'
955 elif 'Call Trace:' in msg:
956 if el >= 0 and et >= 0:
957 list.append((type, dir, et, el, el))
958 self.kerror = True 945 self.kerror = True
959 et, el = t, i 946 break
960 if sl < 0 or type == 'BUG':
961 slval = i
962 if sl >= 0:
963 slval = sl
964 list.append((type, dir, et, slval, i))
965 self.kerror = True
966 sl = et = el = -1
967 type = 'ERROR'
968 if el >= 0 and et >= 0:
969 list.append((type, dir, et, el, el))
970 self.kerror = True
971 for e in list: 947 for e in list:
972 type, dir, t, idx1, idx2 = e 948 type, dir, t, idx1, idx2 = e
973 sysvals.vprint('kernel %s found in %s at %f' % (type, dir, t)) 949 sysvals.vprint('kernel %s found in %s at %f' % (type, dir, t))
@@ -2331,12 +2307,14 @@ class TestProps:
2331 sv.suspendmode = data.stamp['mode'] 2307 sv.suspendmode = data.stamp['mode']
2332 if sv.suspendmode == 'command' and sv.ftracefile != '': 2308 if sv.suspendmode == 'command' and sv.ftracefile != '':
2333 modes = ['on', 'freeze', 'standby', 'mem', 'disk'] 2309 modes = ['on', 'freeze', 'standby', 'mem', 'disk']
2334 out = Popen(['grep', 'machine_suspend', sv.ftracefile], 2310 fp = sysvals.openlog(sv.ftracefile, 'r')
2335 stderr=PIPE, stdout=PIPE).stdout.read() 2311 for line in fp:
2336 m = re.match('.* machine_suspend\[(?P<mode>.*)\]', out) 2312 m = re.match('.* machine_suspend\[(?P<mode>.*)\]', line)
2337 if m and m.group('mode') in ['1', '2', '3', '4']: 2313 if m and m.group('mode') in ['1', '2', '3', '4']:
2338 sv.suspendmode = modes[int(m.group('mode'))] 2314 sv.suspendmode = modes[int(m.group('mode'))]
2339 data.stamp['mode'] = sv.suspendmode 2315 data.stamp['mode'] = sv.suspendmode
2316 break
2317 fp.close()
2340 m = re.match(self.cmdlinefmt, self.cmdline) 2318 m = re.match(self.cmdlinefmt, self.cmdline)
2341 if m: 2319 if m:
2342 sv.cmdline = m.group('cmd') 2320 sv.cmdline = m.group('cmd')
@@ -2413,7 +2391,7 @@ class ProcessMonitor:
2413# markers, and/or kprobes required for primary parsing. 2391# markers, and/or kprobes required for primary parsing.
2414def doesTraceLogHaveTraceEvents(): 2392def doesTraceLogHaveTraceEvents():
2415 kpcheck = ['_cal: (', '_cpu_down()'] 2393 kpcheck = ['_cal: (', '_cpu_down()']
2416 techeck = sysvals.traceevents[:] 2394 techeck = ['suspend_resume']
2417 tmcheck = ['SUSPEND START', 'RESUME COMPLETE'] 2395 tmcheck = ['SUSPEND START', 'RESUME COMPLETE']
2418 sysvals.usekprobes = False 2396 sysvals.usekprobes = False
2419 fp = sysvals.openlog(sysvals.ftracefile, 'r') 2397 fp = sysvals.openlog(sysvals.ftracefile, 'r')
@@ -2808,7 +2786,7 @@ def parseTraceLog(live=False):
2808 # -- phase changes -- 2786 # -- phase changes --
2809 # start of kernel suspend 2787 # start of kernel suspend
2810 if(re.match('suspend_enter\[.*', t.name)): 2788 if(re.match('suspend_enter\[.*', t.name)):
2811 if(isbegin): 2789 if(isbegin and data.start == data.tKernSus):
2812 data.dmesg[phase]['start'] = t.time 2790 data.dmesg[phase]['start'] = t.time
2813 data.tKernSus = t.time 2791 data.tKernSus = t.time
2814 continue 2792 continue
@@ -3072,13 +3050,20 @@ def parseTraceLog(live=False):
3072 sysvals.vprint('Callgraph found for task %d: %.3fms, %s' % (cg.pid, (cg.end - cg.start)*1000, name)) 3050 sysvals.vprint('Callgraph found for task %d: %.3fms, %s' % (cg.pid, (cg.end - cg.start)*1000, name))
3073 cg.newActionFromFunction(data) 3051 cg.newActionFromFunction(data)
3074 if sysvals.suspendmode == 'command': 3052 if sysvals.suspendmode == 'command':
3075 return testdata 3053 return (testdata, '')
3076 3054
3077 # fill in any missing phases 3055 # fill in any missing phases
3056 error = []
3078 for data in testdata: 3057 for data in testdata:
3058 tn = '' if len(testdata) == 1 else ('%d' % (data.testnumber + 1))
3059 terr = ''
3079 lp = data.phases[0] 3060 lp = data.phases[0]
3080 for p in data.phases: 3061 for p in data.phases:
3081 if(data.dmesg[p]['start'] < 0 and data.dmesg[p]['end'] < 0): 3062 if(data.dmesg[p]['start'] < 0 and data.dmesg[p]['end'] < 0):
3063 if not terr:
3064 print 'TEST%s FAILED: %s failed in %s phase' % (tn, sysvals.suspendmode, lp)
3065 terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, lp)
3066 error.append(terr)
3082 sysvals.vprint('WARNING: phase "%s" is missing!' % p) 3067 sysvals.vprint('WARNING: phase "%s" is missing!' % p)
3083 if(data.dmesg[p]['start'] < 0): 3068 if(data.dmesg[p]['start'] < 0):
3084 data.dmesg[p]['start'] = data.dmesg[lp]['end'] 3069 data.dmesg[p]['start'] = data.dmesg[lp]['end']
@@ -3106,7 +3091,7 @@ def parseTraceLog(live=False):
3106 for j in range(i + 1, tc): 3091 for j in range(i + 1, tc):
3107 testdata[j].mergeOverlapDevices(devlist) 3092 testdata[j].mergeOverlapDevices(devlist)
3108 testdata[0].stitchTouchingThreads(testdata[1:]) 3093 testdata[0].stitchTouchingThreads(testdata[1:])
3109 return testdata 3094 return (testdata, ', '.join(error))
3110 3095
3111# Function: loadKernelLog 3096# Function: loadKernelLog
3112# Description: 3097# Description:
@@ -3173,7 +3158,7 @@ def loadKernelLog():
3173 if data: 3158 if data:
3174 testruns.append(data) 3159 testruns.append(data)
3175 if len(testruns) < 1: 3160 if len(testruns) < 1:
3176 doError(' dmesg log has no suspend/resume data: %s' \ 3161 print('ERROR: dmesg log has no suspend/resume data: %s' \
3177 % sysvals.dmesgfile) 3162 % sysvals.dmesgfile)
3178 3163
3179 # fix lines with same timestamp/function with the call and return swapped 3164 # fix lines with same timestamp/function with the call and return swapped
@@ -3521,68 +3506,144 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
3521 .summary {border:1px solid;}\n\ 3506 .summary {border:1px solid;}\n\
3522 th {border: 1px solid black;background:#222;color:white;}\n\ 3507 th {border: 1px solid black;background:#222;color:white;}\n\
3523 td {font: 16px "Times New Roman";text-align: center;}\n\ 3508 td {font: 16px "Times New Roman";text-align: center;}\n\
3524 tr.alt td {background:#ddd;}\n\ 3509 tr.head td {border: 1px solid black;background:#aaa;}\n\
3525 tr.avg td {background:#aaa;}\n\ 3510 tr.alt {background-color:#ddd;}\n\
3511 tr.notice {color:red;}\n\
3512 .minval {background-color:#BBFFBB;}\n\
3513 .medval {background-color:#BBBBFF;}\n\
3514 .maxval {background-color:#FFBBBB;}\n\
3515 .head a {color:#000;text-decoration: none;}\n\
3526 </style>\n</head>\n<body>\n' 3516 </style>\n</head>\n<body>\n'
3527 3517
3518 # extract the test data into list
3519 list = dict()
3520 tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []]
3521 iMin, iMed, iMax = [0, 0], [0, 0], [0, 0]
3522 num = 0
3523 lastmode = ''
3524 cnt = {'pass':0, 'fail':0, 'hang':0}
3525 for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])):
3526 mode = data['mode']
3527 if mode not in list:
3528 list[mode] = {'data': [], 'avg': [0,0], 'min': [0,0], 'max': [0,0], 'med': [0,0]}
3529 if lastmode and lastmode != mode and num > 0:
3530 for i in range(2):
3531 s = sorted(tMed[i])
3532 list[lastmode]['med'][i] = s[int(len(s)/2)]
3533 iMed[i] = tMed[i].index(list[lastmode]['med'][i])
3534 list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num]
3535 list[lastmode]['min'] = tMin
3536 list[lastmode]['max'] = tMax
3537 list[lastmode]['idx'] = (iMin, iMed, iMax)
3538 tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []]
3539 iMin, iMed, iMax = [0, 0], [0, 0], [0, 0]
3540 num = 0
3541 tVal = [float(data['suspend']), float(data['resume'])]
3542 list[mode]['data'].append([data['host'], data['kernel'],
3543 data['time'], tVal[0], tVal[1], data['url'], data['result'],
3544 data['issues']])
3545 idx = len(list[mode]['data']) - 1
3546 if data['result'] == 'pass':
3547 cnt['pass'] += 1
3548 for i in range(2):
3549 tMed[i].append(tVal[i])
3550 tAvg[i] += tVal[i]
3551 if tMin[i] == 0 or tVal[i] < tMin[i]:
3552 iMin[i] = idx
3553 tMin[i] = tVal[i]
3554 if tMax[i] == 0 or tVal[i] > tMax[i]:
3555 iMax[i] = idx
3556 tMax[i] = tVal[i]
3557 num += 1
3558 elif data['result'] == 'hang':
3559 cnt['hang'] += 1
3560 elif data['result'] == 'fail':
3561 cnt['fail'] += 1
3562 lastmode = mode
3563 if lastmode and num > 0:
3564 for i in range(2):
3565 s = sorted(tMed[i])
3566 list[lastmode]['med'][i] = s[int(len(s)/2)]
3567 iMed[i] = tMed[i].index(list[lastmode]['med'][i])
3568 list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num]
3569 list[lastmode]['min'] = tMin
3570 list[lastmode]['max'] = tMax
3571 list[lastmode]['idx'] = (iMin, iMed, iMax)
3572
3528 # group test header 3573 # group test header
3529 html += '<div class="stamp">%s (%d tests)</div>\n' % (folder, len(testruns)) 3574 desc = []
3575 for ilk in sorted(cnt, reverse=True):
3576 if cnt[ilk] > 0:
3577 desc.append('%d %s' % (cnt[ilk], ilk))
3578 html += '<div class="stamp">%s (%d tests: %s)</div>\n' % (folder, len(testruns), ', '.join(desc))
3530 th = '\t<th>{0}</th>\n' 3579 th = '\t<th>{0}</th>\n'
3531 td = '\t<td>{0}</td>\n' 3580 td = '\t<td>{0}</td>\n'
3581 tdh = '\t<td{1}>{0}</td>\n'
3532 tdlink = '\t<td><a href="{0}">html</a></td>\n' 3582 tdlink = '\t<td><a href="{0}">html</a></td>\n'
3533 3583
3534 # table header 3584 # table header
3535 html += '<table class="summary">\n<tr>\n' + th.format('#') +\ 3585 html += '<table class="summary">\n<tr>\n' + th.format('#') +\
3536 th.format('Mode') + th.format('Host') + th.format('Kernel') +\ 3586 th.format('Mode') + th.format('Host') + th.format('Kernel') +\
3537 th.format('Test Time') + th.format('Suspend') + th.format('Resume') +\ 3587 th.format('Test Time') + th.format('Result') + th.format('Issues') +\
3538 th.format('Detail') + '</tr>\n' 3588 th.format('Suspend') + th.format('Resume') + th.format('Detail') + '</tr>\n'
3539 3589
3540 # test data, 1 row per test 3590 # export list into html
3541 avg = '<tr class="avg"><td></td><td></td><td></td><td></td>'+\ 3591 head = '<tr class="head"><td>{0}</td><td>{1}</td>'+\
3542 '<td>Average of {0} {1} tests</td><td>{2}</td><td>{3}</td><td></td></tr>\n' 3592 '<td colspan=8 class="sus">Suspend Avg={2} '+\
3543 sTimeAvg = rTimeAvg = 0.0 3593 '<span class=minval><a href="#s{10}min">Min={3}</a></span> '+\
3544 mode = '' 3594 '<span class=medval><a href="#s{10}med">Med={4}</a></span> '+\
3545 num = 0 3595 '<span class=maxval><a href="#s{10}max">Max={5}</a></span> '+\
3546 for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])): 3596 'Resume Avg={6} '+\
3547 if mode != data['mode']: 3597 '<span class=minval><a href="#r{10}min">Min={7}</a></span> '+\
3548 # test average line 3598 '<span class=medval><a href="#r{10}med">Med={8}</a></span> '+\
3549 if(num > 0): 3599 '<span class=maxval><a href="#r{10}max">Max={9}</a></span></td>'+\
3550 sTimeAvg /= (num - 1) 3600 '</tr>\n'
3551 rTimeAvg /= (num - 1) 3601 headnone = '<tr class="head"><td>{0}</td><td>{1}</td><td colspan=8></td></tr>\n'
3552 html += avg.format('%d' % (num - 1), mode, 3602 for mode in list:
3553 '%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg) 3603 # header line for each suspend mode
3554 sTimeAvg = rTimeAvg = 0.0 3604 num = 0
3555 mode = data['mode'] 3605 tAvg, tMin, tMax, tMed = list[mode]['avg'], list[mode]['min'],\
3556 num = 1 3606 list[mode]['max'], list[mode]['med']
3557 # alternate row color 3607 count = len(list[mode]['data'])
3558 if num % 2 == 1: 3608 if 'idx' in list[mode]:
3559 html += '<tr class="alt">\n' 3609 iMin, iMed, iMax = list[mode]['idx']
3610 html += head.format('%d' % count, mode.upper(),
3611 '%.3f' % tAvg[0], '%.3f' % tMin[0], '%.3f' % tMed[0], '%.3f' % tMax[0],
3612 '%.3f' % tAvg[1], '%.3f' % tMin[1], '%.3f' % tMed[1], '%.3f' % tMax[1],
3613 mode.lower()
3614 )
3560 else: 3615 else:
3561 html += '<tr>\n' 3616 iMin = iMed = iMax = [-1, -1, -1]
3562 html += td.format("%d" % num) 3617 html += headnone.format('%d' % count, mode.upper())
3563 num += 1 3618 for d in list[mode]['data']:
3564 # basic info 3619 # row classes - alternate row color
3565 for item in ['mode', 'host', 'kernel', 'time']: 3620 rcls = ['alt'] if num % 2 == 1 else []
3566 val = "unknown" 3621 if d[6] != 'pass':
3567 if(item in data): 3622 rcls.append('notice')
3568 val = data[item] 3623 html += '<tr class="'+(' '.join(rcls))+'">\n' if len(rcls) > 0 else '<tr>\n'
3569 html += td.format(val) 3624 # figure out if the line has sus or res highlighted
3570 # suspend time 3625 idx = list[mode]['data'].index(d)
3571 sTime = float(data['suspend']) 3626 tHigh = ['', '']
3572 sTimeAvg += sTime 3627 for i in range(2):
3573 html += td.format('%.3f ms' % sTime) 3628 tag = 's%s' % mode if i == 0 else 'r%s' % mode
3574 # resume time 3629 if idx == iMin[i]:
3575 rTime = float(data['resume']) 3630 tHigh[i] = ' id="%smin" class=minval title="Minimum"' % tag
3576 rTimeAvg += rTime 3631 elif idx == iMax[i]:
3577 html += td.format('%.3f ms' % rTime) 3632 tHigh[i] = ' id="%smax" class=maxval title="Maximum"' % tag
3578 # link to the output html 3633 elif idx == iMed[i]:
3579 html += tdlink.format(data['url']) + '</tr>\n' 3634 tHigh[i] = ' id="%smed" class=medval title="Median"' % tag
3580 # last test average line 3635 html += td.format("%d" % (list[mode]['data'].index(d) + 1)) # row
3581 if(num > 0): 3636 html += td.format(mode) # mode
3582 sTimeAvg /= (num - 1) 3637 html += td.format(d[0]) # host
3583 rTimeAvg /= (num - 1) 3638 html += td.format(d[1]) # kernel
3584 html += avg.format('%d' % (num - 1), mode, 3639 html += td.format(d[2]) # time
3585 '%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg) 3640 html += td.format(d[6]) # result
3641 html += td.format(d[7]) # issues
3642 html += tdh.format('%.3f ms' % d[3], tHigh[0]) if d[3] else td.format('') # suspend
3643 html += tdh.format('%.3f ms' % d[4], tHigh[1]) if d[4] else td.format('') # resume
3644 html += tdlink.format(d[5]) if d[5] else td.format('') # url
3645 html += '</tr>\n'
3646 num += 1
3586 3647
3587 # flush the data to file 3648 # flush the data to file
3588 hf = open(htmlfile, 'w') 3649 hf = open(htmlfile, 'w')
@@ -3607,7 +3668,7 @@ def ordinal(value):
3607# testruns: array of Data objects from parseKernelLog or parseTraceLog 3668# testruns: array of Data objects from parseKernelLog or parseTraceLog
3608# Output: 3669# Output:
3609# True if the html file was created, false if it failed 3670# True if the html file was created, false if it failed
3610def createHTML(testruns): 3671def createHTML(testruns, testfail):
3611 if len(testruns) < 1: 3672 if len(testruns) < 1:
3612 print('ERROR: Not enough test data to build a timeline') 3673 print('ERROR: Not enough test data to build a timeline')
3613 return 3674 return
@@ -3641,6 +3702,7 @@ def createHTML(testruns):
3641 '<td class="purple">{4}Firmware Resume: {2} ms</td>'\ 3702 '<td class="purple">{4}Firmware Resume: {2} ms</td>'\
3642 '<td class="yellow" title="time from firmware mode to return from kernel enter_state({5}) [kernel time only]">{4}Kernel Resume: {3} ms</td>'\ 3703 '<td class="yellow" title="time from firmware mode to return from kernel enter_state({5}) [kernel time only]">{4}Kernel Resume: {3} ms</td>'\
3643 '</tr>\n</table>\n' 3704 '</tr>\n</table>\n'
3705 html_fail = '<table class="testfail"><tr><td>{0}</td></tr></table>\n'
3644 3706
3645 # html format variables 3707 # html format variables
3646 scaleH = 20 3708 scaleH = 20
@@ -3708,6 +3770,9 @@ def createHTML(testruns):
3708 resume_time, testdesc, stitle, rtitle) 3770 resume_time, testdesc, stitle, rtitle)
3709 devtl.html += thtml 3771 devtl.html += thtml
3710 3772
3773 if testfail:
3774 devtl.html += html_fail.format(testfail)
3775
3711 # time scale for potentially multiple datasets 3776 # time scale for potentially multiple datasets
3712 t0 = testruns[0].start 3777 t0 = testruns[0].start
3713 tMax = testruns[-1].end 3778 tMax = testruns[-1].end
@@ -4006,6 +4071,7 @@ def addCSS(hf, sv, testcount=1, kerror=False, extra=''):
4006 .blue {background:rgba(169,208,245,0.4);}\n\ 4071 .blue {background:rgba(169,208,245,0.4);}\n\
4007 .time1 {font:22px Arial;border:1px solid;}\n\ 4072 .time1 {font:22px Arial;border:1px solid;}\n\
4008 .time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\ 4073 .time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\
4074 .testfail {font:bold 22px Arial;color:red;border:1px dashed;}\n\
4009 td {text-align:center;}\n\ 4075 td {text-align:center;}\n\
4010 r {color:#500000;font:15px Tahoma;}\n\ 4076 r {color:#500000;font:15px Tahoma;}\n\
4011 n {color:#505050;font:15px Tahoma;}\n\ 4077 n {color:#505050;font:15px Tahoma;}\n\
@@ -4927,6 +4993,25 @@ def dmidecode(mempath, fatal=False):
4927 count += 1 4993 count += 1
4928 return out 4994 return out
4929 4995
4996def getBattery():
4997 p = '/sys/class/power_supply'
4998 bat = dict()
4999 for d in os.listdir(p):
5000 type = sysvals.getVal(os.path.join(p, d, 'type')).strip().lower()
5001 if type != 'battery':
5002 continue
5003 for v in ['status', 'energy_now', 'capacity_now']:
5004 bat[v] = sysvals.getVal(os.path.join(p, d, v)).strip().lower()
5005 break
5006 ac = True
5007 if 'status' in bat and 'discharging' in bat['status']:
5008 ac = False
5009 charge = 0
5010 for v in ['energy_now', 'capacity_now']:
5011 if v in bat and bat[v]:
5012 charge = int(bat[v])
5013 return (ac, charge)
5014
4930# Function: getFPDT 5015# Function: getFPDT
4931# Description: 5016# Description:
4932# Read the acpi bios tables and pull out FPDT, the firmware data 5017# Read the acpi bios tables and pull out FPDT, the firmware data
@@ -5202,8 +5287,9 @@ def getArgFloat(name, args, min, max, main=True):
5202 5287
5203def processData(live=False): 5288def processData(live=False):
5204 print('PROCESSING DATA') 5289 print('PROCESSING DATA')
5290 error = ''
5205 if(sysvals.usetraceevents): 5291 if(sysvals.usetraceevents):
5206 testruns = parseTraceLog(live) 5292 testruns, error = parseTraceLog(live)
5207 if sysvals.dmesgfile: 5293 if sysvals.dmesgfile:
5208 for data in testruns: 5294 for data in testruns:
5209 data.extractErrorInfo() 5295 data.extractErrorInfo()
@@ -5220,15 +5306,18 @@ def processData(live=False):
5220 for data in testruns: 5306 for data in testruns:
5221 data.debugPrint() 5307 data.debugPrint()
5222 sys.exit() 5308 sys.exit()
5223 5309 if len(testruns) < 1:
5310 return (testruns, {'error': 'timeline generation failed'})
5224 sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile) 5311 sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile)
5225 createHTML(testruns) 5312 createHTML(testruns, error)
5226 print('DONE') 5313 print('DONE')
5227 data = testruns[0] 5314 data = testruns[0]
5228 stamp = data.stamp 5315 stamp = data.stamp
5229 stamp['suspend'], stamp['resume'] = data.getTimeValues() 5316 stamp['suspend'], stamp['resume'] = data.getTimeValues()
5230 if data.fwValid: 5317 if data.fwValid:
5231 stamp['fwsuspend'], stamp['fwresume'] = data.fwSuspend, data.fwResume 5318 stamp['fwsuspend'], stamp['fwresume'] = data.fwSuspend, data.fwResume
5319 if error:
5320 stamp['error'] = error
5232 return (testruns, stamp) 5321 return (testruns, stamp)
5233 5322
5234# Function: rerunTest 5323# Function: rerunTest
@@ -5268,58 +5357,88 @@ def runTest(n=0):
5268 sysvals.sudouser(sysvals.testdir) 5357 sysvals.sudouser(sysvals.testdir)
5269 sysvals.outputResult(stamp, n) 5358 sysvals.outputResult(stamp, n)
5270 5359
5271def find_in_html(html, strs, div=False): 5360def find_in_html(html, start, end, firstonly=True):
5272 for str in strs: 5361 n, out = 0, []
5273 l = len(str) 5362 while n < len(html):
5274 i = html.find(str) 5363 m = re.search(start, html[n:])
5275 if i >= 0: 5364 if not m:
5276 break 5365 break
5277 if i < 0: 5366 i = m.end()
5278 return '' 5367 m = re.search(end, html[n+i:])
5279 if not div: 5368 if not m:
5280 return re.search(r'[-+]?\d*\.\d+|\d+', html[i+l:i+l+50]).group() 5369 break
5281 n = html[i+l:].find('</div>') 5370 j = m.start()
5282 if n < 0: 5371 str = html[n+i:n+i+j]
5372 if end == 'ms':
5373 num = re.search(r'[-+]?\d*\.\d+|\d+', str)
5374 str = num.group() if num else 'NaN'
5375 if firstonly:
5376 return str
5377 out.append(str)
5378 n += i+j
5379 if firstonly:
5283 return '' 5380 return ''
5284 return html[i+l:i+l+n] 5381 return out
5285 5382
5286# Function: runSummary 5383# Function: runSummary
5287# Description: 5384# Description:
5288# create a summary of tests in a sub-directory 5385# create a summary of tests in a sub-directory
5289def runSummary(subdir, local=True): 5386def runSummary(subdir, local=True, genhtml=False):
5290 inpath = os.path.abspath(subdir) 5387 inpath = os.path.abspath(subdir)
5291 outpath = inpath 5388 outpath = inpath
5292 if local: 5389 if local:
5293 outpath = os.path.abspath('.') 5390 outpath = os.path.abspath('.')
5294 print('Generating a summary of folder "%s"' % inpath) 5391 print('Generating a summary of folder "%s"' % inpath)
5392 if genhtml:
5393 for dirname, dirnames, filenames in os.walk(subdir):
5394 sysvals.dmesgfile = sysvals.ftracefile = sysvals.htmlfile = ''
5395 for filename in filenames:
5396 if(re.match('.*_dmesg.txt', filename)):
5397 sysvals.dmesgfile = os.path.join(dirname, filename)
5398 elif(re.match('.*_ftrace.txt', filename)):
5399 sysvals.ftracefile = os.path.join(dirname, filename)
5400 sysvals.setOutputFile()
5401 if sysvals.ftracefile and sysvals.htmlfile and \
5402 not os.path.exists(sysvals.htmlfile):
5403 print('FTRACE: %s' % sysvals.ftracefile)
5404 if sysvals.dmesgfile:
5405 print('DMESG : %s' % sysvals.dmesgfile)
5406 rerunTest()
5295 testruns = [] 5407 testruns = []
5296 for dirname, dirnames, filenames in os.walk(subdir): 5408 for dirname, dirnames, filenames in os.walk(subdir):
5297 for filename in filenames: 5409 for filename in filenames:
5298 if(not re.match('.*.html', filename)): 5410 if(not re.match('.*.html', filename)):
5299 continue 5411 continue
5300 file = os.path.join(dirname, filename) 5412 file = os.path.join(dirname, filename)
5301 html = open(file, 'r').read(10000) 5413 html = open(file, 'r').read()
5302 suspend = find_in_html(html, 5414 suspend = find_in_html(html, 'Kernel Suspend', 'ms')
5303 ['Kernel Suspend: ', 'Kernel Suspend Time: ']) 5415 resume = find_in_html(html, 'Kernel Resume', 'ms')
5304 resume = find_in_html(html, 5416 line = find_in_html(html, '<div class="stamp">', '</div>')
5305 ['Kernel Resume: ', 'Kernel Resume Time: '])
5306 line = find_in_html(html, ['<div class="stamp">'], True)
5307 stmp = line.split() 5417 stmp = line.split()
5308 if not suspend or not resume or len(stmp) < 4: 5418 if not suspend or not resume or len(stmp) != 8:
5309 continue 5419 continue
5420 try:
5421 dt = datetime.strptime(' '.join(stmp[3:]), '%B %d %Y, %I:%M:%S %p')
5422 except:
5423 continue
5424 tstr = dt.strftime('%Y/%m/%d %H:%M:%S')
5425 error = find_in_html(html, '<table class="testfail"><tr><td>', '</td>')
5426 result = 'fail' if error else 'pass'
5427 ilist = []
5428 e = find_in_html(html, 'class="err"[\w=":;\.%\- ]*>', '&rarr;</div>', False)
5429 for i in list(set(e)):
5430 ilist.append('%sx%d' % (i, e.count(i)) if e.count(i) > 1 else i)
5310 data = { 5431 data = {
5432 'mode': stmp[2],
5311 'host': stmp[0], 5433 'host': stmp[0],
5312 'kernel': stmp[1], 5434 'kernel': stmp[1],
5313 'mode': stmp[2], 5435 'time': tstr,
5314 'time': string.join(stmp[3:], ' '), 5436 'result': result,
5437 'issues': ','.join(ilist),
5315 'suspend': suspend, 5438 'suspend': suspend,
5316 'resume': resume, 5439 'resume': resume,
5317 'url': os.path.relpath(file, outpath), 5440 'url': os.path.relpath(file, outpath),
5318 } 5441 }
5319 if len(stmp) == 7:
5320 data['kernel'] = 'unknown'
5321 data['mode'] = stmp[1]
5322 data['time'] = string.join(stmp[2:], ' ')
5323 testruns.append(data) 5442 testruns.append(data)
5324 outfile = os.path.join(outpath, 'summary.html') 5443 outfile = os.path.join(outpath, 'summary.html')
5325 print('Summary file: %s' % outfile) 5444 print('Summary file: %s' % outfile)
@@ -5609,11 +5728,12 @@ def printHelp():
5609 print(' -modes List available suspend modes') 5728 print(' -modes List available suspend modes')
5610 print(' -status Test to see if the system is enabled to run this tool') 5729 print(' -status Test to see if the system is enabled to run this tool')
5611 print(' -fpdt Print out the contents of the ACPI Firmware Performance Data Table') 5730 print(' -fpdt Print out the contents of the ACPI Firmware Performance Data Table')
5731 print(' -battery Print out battery info (if available)')
5612 print(' -sysinfo Print out system info extracted from BIOS') 5732 print(' -sysinfo Print out system info extracted from BIOS')
5613 print(' -devinfo Print out the pm settings of all devices which support runtime suspend') 5733 print(' -devinfo Print out the pm settings of all devices which support runtime suspend')
5614 print(' -flist Print the list of functions currently being captured in ftrace') 5734 print(' -flist Print the list of functions currently being captured in ftrace')
5615 print(' -flistall Print all functions capable of being captured in ftrace') 5735 print(' -flistall Print all functions capable of being captured in ftrace')
5616 print(' -summary directory Create a summary of all test in this dir') 5736 print(' -summary dir Create a summary of tests in this dir [-genhtml builds missing html]')
5617 print(' [redo]') 5737 print(' [redo]')
5618 print(' -ftrace ftracefile Create HTML output using ftrace input (used with -dmesg)') 5738 print(' -ftrace ftracefile Create HTML output using ftrace input (used with -dmesg)')
5619 print(' -dmesg dmesgfile Create HTML output using dmesg (used with -ftrace)') 5739 print(' -dmesg dmesgfile Create HTML output using dmesg (used with -ftrace)')
@@ -5623,8 +5743,9 @@ def printHelp():
5623# ----------------- MAIN -------------------- 5743# ----------------- MAIN --------------------
5624# exec start (skipped if script is loaded as library) 5744# exec start (skipped if script is loaded as library)
5625if __name__ == '__main__': 5745if __name__ == '__main__':
5746 genhtml = False
5626 cmd = '' 5747 cmd = ''
5627 simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-devinfo', '-status'] 5748 simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-devinfo', '-status', '-battery']
5628 if '-f' in sys.argv: 5749 if '-f' in sys.argv:
5629 sysvals.cgskip = sysvals.configFile('cgskip.txt') 5750 sysvals.cgskip = sysvals.configFile('cgskip.txt')
5630 # loop through the command line arguments 5751 # loop through the command line arguments
@@ -5660,6 +5781,8 @@ if __name__ == '__main__':
5660 sysvals.skiphtml = True 5781 sysvals.skiphtml = True
5661 elif(arg == '-cgdump'): 5782 elif(arg == '-cgdump'):
5662 sysvals.cgdump = True 5783 sysvals.cgdump = True
5784 elif(arg == '-genhtml'):
5785 genhtml = True
5663 elif(arg == '-addlogs'): 5786 elif(arg == '-addlogs'):
5664 sysvals.dmesglog = sysvals.ftracelog = True 5787 sysvals.dmesglog = sysvals.ftracelog = True
5665 elif(arg == '-verbose'): 5788 elif(arg == '-verbose'):
@@ -5856,6 +5979,8 @@ if __name__ == '__main__':
5856 statusCheck(True) 5979 statusCheck(True)
5857 elif(cmd == 'fpdt'): 5980 elif(cmd == 'fpdt'):
5858 getFPDT(True) 5981 getFPDT(True)
5982 elif(cmd == 'battery'):
5983 print 'AC Connect: %s\nCharge: %d' % getBattery()
5859 elif(cmd == 'sysinfo'): 5984 elif(cmd == 'sysinfo'):
5860 sysvals.printSystemInfo(True) 5985 sysvals.printSystemInfo(True)
5861 elif(cmd == 'devinfo'): 5986 elif(cmd == 'devinfo'):
@@ -5867,7 +5992,7 @@ if __name__ == '__main__':
5867 elif(cmd == 'flistall'): 5992 elif(cmd == 'flistall'):
5868 sysvals.getFtraceFilterFunctions(False) 5993 sysvals.getFtraceFilterFunctions(False)
5869 elif(cmd == 'summary'): 5994 elif(cmd == 'summary'):
5870 runSummary(sysvals.outdir, True) 5995 runSummary(sysvals.outdir, True, genhtml)
5871 sys.exit() 5996 sys.exit()
5872 5997
5873 # if instructed, re-analyze existing data files 5998 # if instructed, re-analyze existing data files
@@ -5920,7 +6045,7 @@ if __name__ == '__main__':
5920 print('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count'])) 6045 print('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count']))
5921 sysvals.logmsg = '' 6046 sysvals.logmsg = ''
5922 if not sysvals.skiphtml: 6047 if not sysvals.skiphtml:
5923 runSummary(sysvals.outdir, False) 6048 runSummary(sysvals.outdir, False, False)
5924 sysvals.sudouser(sysvals.outdir) 6049 sysvals.sudouser(sysvals.outdir)
5925 else: 6050 else:
5926 if sysvals.outdir: 6051 if sysvals.outdir:
diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
index 29f50d4cfea0..84e2b648e622 100755
--- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -28,6 +28,7 @@ import subprocess
28import os 28import os
29import time 29import time
30import re 30import re
31import signal
31import sys 32import sys
32import getopt 33import getopt
33import Gnuplot 34import Gnuplot
@@ -78,11 +79,12 @@ def print_help():
78 print(' Or') 79 print(' Or')
79 print(' ./intel_pstate_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>') 80 print(' ./intel_pstate_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>')
80 print(' To generate trace file, parse and plot, use (sudo required):') 81 print(' To generate trace file, parse and plot, use (sudo required):')
81 print(' sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name>') 82 print(' sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>')
82 print(' Or') 83 print(' Or')
83 print(' sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name>') 84 print(' sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>')
84 print(' Optional argument:') 85 print(' Optional argument:')
85 print(' cpus: comma separated list of CPUs') 86 print(' cpus: comma separated list of CPUs')
87 print(' kbytes: Kilo bytes of memory per CPU to allocate to the trace buffer. Default: 10240')
86 print(' Output:') 88 print(' Output:')
87 print(' If not already present, creates a "results/test_name" folder in the current working directory with:') 89 print(' If not already present, creates a "results/test_name" folder in the current working directory with:')
88 print(' cpu.csv - comma seperated values file with trace contents and some additional calculations.') 90 print(' cpu.csv - comma seperated values file with trace contents and some additional calculations.')
@@ -379,7 +381,7 @@ def clear_trace_file():
379 f_handle.close() 381 f_handle.close()
380 except: 382 except:
381 print('IO error clearing trace file ') 383 print('IO error clearing trace file ')
382 quit() 384 sys.exit(2)
383 385
384def enable_trace(): 386def enable_trace():
385 """ Enable trace """ 387 """ Enable trace """
@@ -389,7 +391,7 @@ def enable_trace():
389 , 'w').write("1") 391 , 'w').write("1")
390 except: 392 except:
391 print('IO error enabling trace ') 393 print('IO error enabling trace ')
392 quit() 394 sys.exit(2)
393 395
394def disable_trace(): 396def disable_trace():
395 """ Disable trace """ 397 """ Disable trace """
@@ -399,17 +401,17 @@ def disable_trace():
399 , 'w').write("0") 401 , 'w').write("0")
400 except: 402 except:
401 print('IO error disabling trace ') 403 print('IO error disabling trace ')
402 quit() 404 sys.exit(2)
403 405
404def set_trace_buffer_size(): 406def set_trace_buffer_size():
405 """ Set trace buffer size """ 407 """ Set trace buffer size """
406 408
407 try: 409 try:
408 open('/sys/kernel/debug/tracing/buffer_size_kb' 410 with open('/sys/kernel/debug/tracing/buffer_size_kb', 'w') as fp:
409 , 'w').write("10240") 411 fp.write(memory)
410 except: 412 except:
411 print('IO error setting trace buffer size ') 413 print('IO error setting trace buffer size ')
412 quit() 414 sys.exit(2)
413 415
414def free_trace_buffer(): 416def free_trace_buffer():
415 """ Free the trace buffer memory """ 417 """ Free the trace buffer memory """
@@ -418,8 +420,8 @@ def free_trace_buffer():
418 open('/sys/kernel/debug/tracing/buffer_size_kb' 420 open('/sys/kernel/debug/tracing/buffer_size_kb'
419 , 'w').write("1") 421 , 'w').write("1")
420 except: 422 except:
421 print('IO error setting trace buffer size ') 423 print('IO error freeing trace buffer ')
422 quit() 424 sys.exit(2)
423 425
424def read_trace_data(filename): 426def read_trace_data(filename):
425 """ Read and parse trace data """ 427 """ Read and parse trace data """
@@ -431,7 +433,7 @@ def read_trace_data(filename):
431 data = open(filename, 'r').read() 433 data = open(filename, 'r').read()
432 except: 434 except:
433 print('Error opening ', filename) 435 print('Error opening ', filename)
434 quit() 436 sys.exit(2)
435 437
436 for line in data.splitlines(): 438 for line in data.splitlines():
437 search_obj = \ 439 search_obj = \
@@ -489,10 +491,22 @@ def read_trace_data(filename):
489# Now seperate the main overall csv file into per CPU csv files. 491# Now seperate the main overall csv file into per CPU csv files.
490 split_csv() 492 split_csv()
491 493
494def signal_handler(signal, frame):
495 print(' SIGINT: Forcing cleanup before exit.')
496 if interval:
497 disable_trace()
498 clear_trace_file()
499 # Free the memory
500 free_trace_buffer()
501 sys.exit(0)
502
503signal.signal(signal.SIGINT, signal_handler)
504
492interval = "" 505interval = ""
493filename = "" 506filename = ""
494cpu_list = "" 507cpu_list = ""
495testname = "" 508testname = ""
509memory = "10240"
496graph_data_present = False; 510graph_data_present = False;
497 511
498valid1 = False 512valid1 = False
@@ -501,7 +515,7 @@ valid2 = False
501cpu_mask = zeros((MAX_CPUS,), dtype=int) 515cpu_mask = zeros((MAX_CPUS,), dtype=int)
502 516
503try: 517try:
504 opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:",["help","trace_file=","interval=","cpu=","name="]) 518 opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
505except getopt.GetoptError: 519except getopt.GetoptError:
506 print_help() 520 print_help()
507 sys.exit(2) 521 sys.exit(2)
@@ -521,6 +535,8 @@ for opt, arg in opts:
521 elif opt in ("-n", "--name"): 535 elif opt in ("-n", "--name"):
522 valid2 = True 536 valid2 = True
523 testname = arg 537 testname = arg
538 elif opt in ("-m", "--memory"):
539 memory = arg
524 540
525if not (valid1 and valid2): 541if not (valid1 and valid2):
526 print_help() 542 print_help()
@@ -569,6 +585,11 @@ current_max_cpu = 0
569 585
570read_trace_data(filename) 586read_trace_data(filename)
571 587
588clear_trace_file()
589# Free the memory
590if interval:
591 free_trace_buffer()
592
572if graph_data_present == False: 593if graph_data_present == False:
573 print('No valid data to plot') 594 print('No valid data to plot')
574 sys.exit(2) 595 sys.exit(2)
@@ -593,9 +614,4 @@ for root, dirs, files in os.walk('.'):
593 for f in files: 614 for f in files:
594 fix_ownership(f) 615 fix_ownership(f)
595 616
596clear_trace_file()
597# Free the memory
598if interval:
599 free_trace_buffer()
600
601os.chdir('../../') 617os.chdir('../../')
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index a9bc914a8fe8..2ab25aa38263 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -25,4 +25,4 @@ install : turbostat
25 install -d $(DESTDIR)$(PREFIX)/bin 25 install -d $(DESTDIR)$(PREFIX)/bin
26 install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat 26 install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat
27 install -d $(DESTDIR)$(PREFIX)/share/man/man8 27 install -d $(DESTDIR)$(PREFIX)/share/man/man8
28 install turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8 28 install -m 644 turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index ccf2a69365cc..d39e4ff7d0bf 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -54,9 +54,12 @@ name as necessary to disambiguate it from others is necessary. Note that option
54.PP 54.PP
55\fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44 55\fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44
56.PP 56.PP
57\fB--hide column\fP do not show the specified columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group. 57\fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group.
58.PP 58.PP
59\fB--show column\fP show only the specified columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group. 59\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec", "Time_Of_Day_Seconds", "APIC" and "X2APIC".
60The column name "all" can be used to enable all disabled-by-default built-in counters.
61.PP
62\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group.
60.PP 63.PP
61\fB--Dump\fP displays the raw counter values. 64\fB--Dump\fP displays the raw counter values.
62.PP 65.PP
@@ -64,6 +67,8 @@ name as necessary to disambiguate it from others is necessary. Note that option
64.PP 67.PP
65\fB--interval seconds\fP overrides the default 5.0 second measurement interval. 68\fB--interval seconds\fP overrides the default 5.0 second measurement interval.
66.PP 69.PP
70\fB--num_iterations num\fP number of the measurement iterations.
71.PP
67\fB--out output_file\fP turbostat output is written to the specified output_file. 72\fB--out output_file\fP turbostat output is written to the specified output_file.
68The file is truncated if it already exists, and it is created if it does not exist. 73The file is truncated if it already exists, and it is created if it does not exist.
69.PP 74.PP
@@ -86,6 +91,8 @@ displays the statistics gathered since it was forked.
86The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system. 91The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system.
87.SH COLUMN DESCRIPTIONS 92.SH COLUMN DESCRIPTIONS
88.nf 93.nf
94\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to collect the counters on all cpus.
95\fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected. This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug". On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU.
89\fBCore\fP processor core number. Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT). 96\fBCore\fP processor core number. Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
90\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. 97\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
91\fBPackage\fP processor package number -- not present on systems with a single processor package. 98\fBPackage\fP processor package number -- not present on systems with a single processor package.
@@ -262,6 +269,21 @@ CPU PRF_CTRL
262 269
263.fi 270.fi
264 271
272.SH INPUT
273
274For interval-mode, turbostat will immediately end the current interval
275when it sees a newline on standard input.
276turbostat will then start the next interval.
277Control-C will be send a SIGINT to turbostat,
278which will immediately abort the program with no further processing.
279.SH SIGNALS
280
281SIGINT will interrupt interval-mode.
282The end-of-interval data will be collected and displayed before turbostat exits.
283
284SIGUSR1 will end current interval,
285end-of-interval data will be collected and displayed before turbostat
286starts a new interval.
265.SH NOTES 287.SH NOTES
266 288
267.B "turbostat " 289.B "turbostat "
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index bd9c6b31a504..4d14bbbf9b63 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -29,6 +29,7 @@
29#include <sys/types.h> 29#include <sys/types.h>
30#include <sys/wait.h> 30#include <sys/wait.h>
31#include <sys/stat.h> 31#include <sys/stat.h>
32#include <sys/select.h>
32#include <sys/resource.h> 33#include <sys/resource.h>
33#include <fcntl.h> 34#include <fcntl.h>
34#include <signal.h> 35#include <signal.h>
@@ -47,9 +48,13 @@
47char *proc_stat = "/proc/stat"; 48char *proc_stat = "/proc/stat";
48FILE *outf; 49FILE *outf;
49int *fd_percpu; 50int *fd_percpu;
51struct timeval interval_tv = {5, 0};
50struct timespec interval_ts = {5, 0}; 52struct timespec interval_ts = {5, 0};
53struct timespec one_msec = {0, 1000000};
54unsigned int num_iterations;
51unsigned int debug; 55unsigned int debug;
52unsigned int quiet; 56unsigned int quiet;
57unsigned int shown;
53unsigned int sums_need_wide_columns; 58unsigned int sums_need_wide_columns;
54unsigned int rapl_joules; 59unsigned int rapl_joules;
55unsigned int summary_only; 60unsigned int summary_only;
@@ -58,6 +63,7 @@ unsigned int dump_only;
58unsigned int do_snb_cstates; 63unsigned int do_snb_cstates;
59unsigned int do_knl_cstates; 64unsigned int do_knl_cstates;
60unsigned int do_slm_cstates; 65unsigned int do_slm_cstates;
66unsigned int do_cnl_cstates;
61unsigned int use_c1_residency_msr; 67unsigned int use_c1_residency_msr;
62unsigned int has_aperf; 68unsigned int has_aperf;
63unsigned int has_epb; 69unsigned int has_epb;
@@ -80,6 +86,8 @@ unsigned int do_rapl;
80unsigned int do_dts; 86unsigned int do_dts;
81unsigned int do_ptm; 87unsigned int do_ptm;
82unsigned long long gfx_cur_rc6_ms; 88unsigned long long gfx_cur_rc6_ms;
89unsigned long long cpuidle_cur_cpu_lpi_us;
90unsigned long long cpuidle_cur_sys_lpi_us;
83unsigned int gfx_cur_mhz; 91unsigned int gfx_cur_mhz;
84unsigned int tcc_activation_temp; 92unsigned int tcc_activation_temp;
85unsigned int tcc_activation_temp_override; 93unsigned int tcc_activation_temp_override;
@@ -87,6 +95,7 @@ double rapl_power_units, rapl_time_units;
87double rapl_dram_energy_units, rapl_energy_units; 95double rapl_dram_energy_units, rapl_energy_units;
88double rapl_joule_counter_range; 96double rapl_joule_counter_range;
89unsigned int do_core_perf_limit_reasons; 97unsigned int do_core_perf_limit_reasons;
98unsigned int has_automatic_cstate_conversion;
90unsigned int do_gfx_perf_limit_reasons; 99unsigned int do_gfx_perf_limit_reasons;
91unsigned int do_ring_perf_limit_reasons; 100unsigned int do_ring_perf_limit_reasons;
92unsigned int crystal_hz; 101unsigned int crystal_hz;
@@ -100,6 +109,7 @@ unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
100unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ 109unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
101unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ 110unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
102unsigned int has_misc_feature_control; 111unsigned int has_misc_feature_control;
112unsigned int first_counter_read = 1;
103 113
104#define RAPL_PKG (1 << 0) 114#define RAPL_PKG (1 << 0)
105 /* 0x610 MSR_PKG_POWER_LIMIT */ 115 /* 0x610 MSR_PKG_POWER_LIMIT */
@@ -147,7 +157,9 @@ char *progname;
147#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ 157#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
148cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset; 158cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
149size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; 159size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
150#define MAX_ADDED_COUNTERS 16 160#define MAX_ADDED_COUNTERS 8
161#define MAX_ADDED_THREAD_COUNTERS 24
162#define BITMASK_SIZE 32
151 163
152struct thread_data { 164struct thread_data {
153 struct timeval tv_begin; 165 struct timeval tv_begin;
@@ -159,10 +171,12 @@ struct thread_data {
159 unsigned long long irq_count; 171 unsigned long long irq_count;
160 unsigned int smi_count; 172 unsigned int smi_count;
161 unsigned int cpu_id; 173 unsigned int cpu_id;
174 unsigned int apic_id;
175 unsigned int x2apic_id;
162 unsigned int flags; 176 unsigned int flags;
163#define CPU_IS_FIRST_THREAD_IN_CORE 0x2 177#define CPU_IS_FIRST_THREAD_IN_CORE 0x2
164#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 178#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4
165 unsigned long long counter[MAX_ADDED_COUNTERS]; 179 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
166} *thread_even, *thread_odd; 180} *thread_even, *thread_odd;
167 181
168struct core_data { 182struct core_data {
@@ -183,6 +197,8 @@ struct pkg_data {
183 unsigned long long pc8; 197 unsigned long long pc8;
184 unsigned long long pc9; 198 unsigned long long pc9;
185 unsigned long long pc10; 199 unsigned long long pc10;
200 unsigned long long cpu_lpi;
201 unsigned long long sys_lpi;
186 unsigned long long pkg_wtd_core_c0; 202 unsigned long long pkg_wtd_core_c0;
187 unsigned long long pkg_any_core_c0; 203 unsigned long long pkg_any_core_c0;
188 unsigned long long pkg_any_gfxe_c0; 204 unsigned long long pkg_any_gfxe_c0;
@@ -203,12 +219,21 @@ struct pkg_data {
203#define ODD_COUNTERS thread_odd, core_odd, package_odd 219#define ODD_COUNTERS thread_odd, core_odd, package_odd
204#define EVEN_COUNTERS thread_even, core_even, package_even 220#define EVEN_COUNTERS thread_even, core_even, package_even
205 221
206#define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ 222#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \
207 (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ 223 ((thread_base) + \
208 topo.num_threads_per_core + \ 224 ((pkg_no) * \
209 (core_no) * topo.num_threads_per_core + (thread_no)) 225 topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
210#define GET_CORE(core_base, core_no, pkg_no) \ 226 ((node_no) * topo.cores_per_node * topo.threads_per_core) + \
211 (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) 227 ((core_no) * topo.threads_per_core) + \
228 (thread_no))
229
230#define GET_CORE(core_base, core_no, node_no, pkg_no) \
231 ((core_base) + \
232 ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \
233 ((node_no) * topo.cores_per_node) + \
234 (core_no))
235
236
212#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 237#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
213 238
214enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE}; 239enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
@@ -244,14 +269,25 @@ struct system_summary {
244 struct pkg_data packages; 269 struct pkg_data packages;
245} average; 270} average;
246 271
272struct cpu_topology {
273 int physical_package_id;
274 int logical_cpu_id;
275 int physical_node_id;
276 int logical_node_id; /* 0-based count within the package */
277 int physical_core_id;
278 int thread_id;
279 cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
280} *cpus;
247 281
248struct topo_params { 282struct topo_params {
249 int num_packages; 283 int num_packages;
250 int num_cpus; 284 int num_cpus;
251 int num_cores; 285 int num_cores;
252 int max_cpu_num; 286 int max_cpu_num;
253 int num_cores_per_pkg; 287 int max_node_num;
254 int num_threads_per_core; 288 int nodes_per_pkg;
289 int cores_per_node;
290 int threads_per_core;
255} topo; 291} topo;
256 292
257struct timeval tv_even, tv_odd, tv_delta; 293struct timeval tv_even, tv_odd, tv_delta;
@@ -273,27 +309,33 @@ int cpu_is_not_present(int cpu)
273int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), 309int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
274 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 310 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
275{ 311{
276 int retval, pkg_no, core_no, thread_no; 312 int retval, pkg_no, core_no, thread_no, node_no;
277 313
278 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 314 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
279 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { 315 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
280 for (thread_no = 0; thread_no < 316 for (node_no = 0; node_no < topo.nodes_per_pkg;
281 topo.num_threads_per_core; ++thread_no) { 317 node_no++) {
282 struct thread_data *t; 318 for (thread_no = 0; thread_no <
283 struct core_data *c; 319 topo.threads_per_core; ++thread_no) {
284 struct pkg_data *p; 320 struct thread_data *t;
285 321 struct core_data *c;
286 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); 322 struct pkg_data *p;
287 323
288 if (cpu_is_not_present(t->cpu_id)) 324 t = GET_THREAD(thread_base, thread_no,
289 continue; 325 core_no, node_no,
290 326 pkg_no);
291 c = GET_CORE(core_base, core_no, pkg_no); 327
292 p = GET_PKG(pkg_base, pkg_no); 328 if (cpu_is_not_present(t->cpu_id))
293 329 continue;
294 retval = func(t, c, p); 330
295 if (retval) 331 c = GET_CORE(core_base, core_no,
296 return retval; 332 node_no, pkg_no);
333 p = GET_PKG(pkg_base, pkg_no);
334
335 retval = func(t, c, p);
336 if (retval)
337 return retval;
338 }
297 } 339 }
298 } 340 }
299 } 341 }
@@ -342,17 +384,23 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
342} 384}
343 385
344/* 386/*
345 * Each string in this array is compared in --show and --hide cmdline. 387 * This list matches the column headers, except
346 * Thus, strings that are proper sub-sets must follow their more specific peers. 388 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
389 * 2. Core and CPU are moved to the end, we can't have strings that contain them
390 * matching on them for --show and --hide.
347 */ 391 */
348struct msr_counter bic[] = { 392struct msr_counter bic[] = {
393 { 0x0, "usec" },
394 { 0x0, "Time_Of_Day_Seconds" },
349 { 0x0, "Package" }, 395 { 0x0, "Package" },
396 { 0x0, "Node" },
350 { 0x0, "Avg_MHz" }, 397 { 0x0, "Avg_MHz" },
398 { 0x0, "Busy%" },
351 { 0x0, "Bzy_MHz" }, 399 { 0x0, "Bzy_MHz" },
352 { 0x0, "TSC_MHz" }, 400 { 0x0, "TSC_MHz" },
353 { 0x0, "IRQ" }, 401 { 0x0, "IRQ" },
354 { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL}, 402 { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
355 { 0x0, "Busy%" }, 403 { 0x0, "sysfs" },
356 { 0x0, "CPU%c1" }, 404 { 0x0, "CPU%c1" },
357 { 0x0, "CPU%c3" }, 405 { 0x0, "CPU%c3" },
358 { 0x0, "CPU%c6" }, 406 { 0x0, "CPU%c6" },
@@ -369,7 +417,9 @@ struct msr_counter bic[] = {
369 { 0x0, "Pkg%pc7" }, 417 { 0x0, "Pkg%pc7" },
370 { 0x0, "Pkg%pc8" }, 418 { 0x0, "Pkg%pc8" },
371 { 0x0, "Pkg%pc9" }, 419 { 0x0, "Pkg%pc9" },
372 { 0x0, "Pkg%pc10" }, 420 { 0x0, "Pk%pc10" },
421 { 0x0, "CPU%LPI" },
422 { 0x0, "SYS%LPI" },
373 { 0x0, "PkgWatt" }, 423 { 0x0, "PkgWatt" },
374 { 0x0, "CorWatt" }, 424 { 0x0, "CorWatt" },
375 { 0x0, "GFXWatt" }, 425 { 0x0, "GFXWatt" },
@@ -381,70 +431,80 @@ struct msr_counter bic[] = {
381 { 0x0, "Cor_J" }, 431 { 0x0, "Cor_J" },
382 { 0x0, "GFX_J" }, 432 { 0x0, "GFX_J" },
383 { 0x0, "RAM_J" }, 433 { 0x0, "RAM_J" },
384 { 0x0, "Core" },
385 { 0x0, "CPU" },
386 { 0x0, "Mod%c6" }, 434 { 0x0, "Mod%c6" },
387 { 0x0, "sysfs" },
388 { 0x0, "Totl%C0" }, 435 { 0x0, "Totl%C0" },
389 { 0x0, "Any%C0" }, 436 { 0x0, "Any%C0" },
390 { 0x0, "GFX%C0" }, 437 { 0x0, "GFX%C0" },
391 { 0x0, "CPUGFX%" }, 438 { 0x0, "CPUGFX%" },
439 { 0x0, "Core" },
440 { 0x0, "CPU" },
441 { 0x0, "APIC" },
442 { 0x0, "X2APIC" },
392}; 443};
393 444
394
395
396#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) 445#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
397#define BIC_Package (1ULL << 0) 446#define BIC_USEC (1ULL << 0)
398#define BIC_Avg_MHz (1ULL << 1) 447#define BIC_TOD (1ULL << 1)
399#define BIC_Bzy_MHz (1ULL << 2) 448#define BIC_Package (1ULL << 2)
400#define BIC_TSC_MHz (1ULL << 3) 449#define BIC_Node (1ULL << 3)
401#define BIC_IRQ (1ULL << 4) 450#define BIC_Avg_MHz (1ULL << 4)
402#define BIC_SMI (1ULL << 5) 451#define BIC_Busy (1ULL << 5)
403#define BIC_Busy (1ULL << 6) 452#define BIC_Bzy_MHz (1ULL << 6)
404#define BIC_CPU_c1 (1ULL << 7) 453#define BIC_TSC_MHz (1ULL << 7)
405#define BIC_CPU_c3 (1ULL << 8) 454#define BIC_IRQ (1ULL << 8)
406#define BIC_CPU_c6 (1ULL << 9) 455#define BIC_SMI (1ULL << 9)
407#define BIC_CPU_c7 (1ULL << 10) 456#define BIC_sysfs (1ULL << 10)
408#define BIC_ThreadC (1ULL << 11) 457#define BIC_CPU_c1 (1ULL << 11)
409#define BIC_CoreTmp (1ULL << 12) 458#define BIC_CPU_c3 (1ULL << 12)
410#define BIC_CoreCnt (1ULL << 13) 459#define BIC_CPU_c6 (1ULL << 13)
411#define BIC_PkgTmp (1ULL << 14) 460#define BIC_CPU_c7 (1ULL << 14)
412#define BIC_GFX_rc6 (1ULL << 15) 461#define BIC_ThreadC (1ULL << 15)
413#define BIC_GFXMHz (1ULL << 16) 462#define BIC_CoreTmp (1ULL << 16)
414#define BIC_Pkgpc2 (1ULL << 17) 463#define BIC_CoreCnt (1ULL << 17)
415#define BIC_Pkgpc3 (1ULL << 18) 464#define BIC_PkgTmp (1ULL << 18)
416#define BIC_Pkgpc6 (1ULL << 19) 465#define BIC_GFX_rc6 (1ULL << 19)
417#define BIC_Pkgpc7 (1ULL << 20) 466#define BIC_GFXMHz (1ULL << 20)
418#define BIC_Pkgpc8 (1ULL << 21) 467#define BIC_Pkgpc2 (1ULL << 21)
419#define BIC_Pkgpc9 (1ULL << 22) 468#define BIC_Pkgpc3 (1ULL << 22)
420#define BIC_Pkgpc10 (1ULL << 23) 469#define BIC_Pkgpc6 (1ULL << 23)
421#define BIC_PkgWatt (1ULL << 24) 470#define BIC_Pkgpc7 (1ULL << 24)
422#define BIC_CorWatt (1ULL << 25) 471#define BIC_Pkgpc8 (1ULL << 25)
423#define BIC_GFXWatt (1ULL << 26) 472#define BIC_Pkgpc9 (1ULL << 26)
424#define BIC_PkgCnt (1ULL << 27) 473#define BIC_Pkgpc10 (1ULL << 27)
425#define BIC_RAMWatt (1ULL << 28) 474#define BIC_CPU_LPI (1ULL << 28)
426#define BIC_PKG__ (1ULL << 29) 475#define BIC_SYS_LPI (1ULL << 29)
427#define BIC_RAM__ (1ULL << 30) 476#define BIC_PkgWatt (1ULL << 30)
428#define BIC_Pkg_J (1ULL << 31) 477#define BIC_CorWatt (1ULL << 31)
429#define BIC_Cor_J (1ULL << 32) 478#define BIC_GFXWatt (1ULL << 32)
430#define BIC_GFX_J (1ULL << 33) 479#define BIC_PkgCnt (1ULL << 33)
431#define BIC_RAM_J (1ULL << 34) 480#define BIC_RAMWatt (1ULL << 34)
432#define BIC_Core (1ULL << 35) 481#define BIC_PKG__ (1ULL << 35)
433#define BIC_CPU (1ULL << 36) 482#define BIC_RAM__ (1ULL << 36)
434#define BIC_Mod_c6 (1ULL << 37) 483#define BIC_Pkg_J (1ULL << 37)
435#define BIC_sysfs (1ULL << 38) 484#define BIC_Cor_J (1ULL << 38)
436#define BIC_Totl_c0 (1ULL << 39) 485#define BIC_GFX_J (1ULL << 39)
437#define BIC_Any_c0 (1ULL << 40) 486#define BIC_RAM_J (1ULL << 40)
438#define BIC_GFX_c0 (1ULL << 41) 487#define BIC_Mod_c6 (1ULL << 41)
439#define BIC_CPUGFX (1ULL << 42) 488#define BIC_Totl_c0 (1ULL << 42)
440 489#define BIC_Any_c0 (1ULL << 43)
441unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL; 490#define BIC_GFX_c0 (1ULL << 44)
442unsigned long long bic_present = BIC_sysfs; 491#define BIC_CPUGFX (1ULL << 45)
492#define BIC_Core (1ULL << 46)
493#define BIC_CPU (1ULL << 47)
494#define BIC_APIC (1ULL << 48)
495#define BIC_X2APIC (1ULL << 49)
496
497#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
498
499unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
500unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
443 501
444#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) 502#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
503#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
445#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) 504#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
446#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) 505#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
447 506
507
448#define MAX_DEFERRED 16 508#define MAX_DEFERRED 16
449char *deferred_skip_names[MAX_DEFERRED]; 509char *deferred_skip_names[MAX_DEFERRED];
450int deferred_skip_index; 510int deferred_skip_index;
@@ -464,16 +524,34 @@ void help(void)
464 "when COMMAND completes.\n" 524 "when COMMAND completes.\n"
465 "If no COMMAND is specified, turbostat wakes every 5-seconds\n" 525 "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
466 "to print statistics, until interrupted.\n" 526 "to print statistics, until interrupted.\n"
467 "--add add a counter\n" 527 " -a, --add add a counter\n"
468 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" 528 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
469 "--cpu cpu-set limit output to summary plus cpu-set:\n" 529 " -c, --cpu cpu-set limit output to summary plus cpu-set:\n"
470 " {core | package | j,k,l..m,n-p }\n" 530 " {core | package | j,k,l..m,n-p }\n"
471 "--quiet skip decoding system configuration header\n" 531 " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n"
472 "--interval sec Override default 5-second measurement interval\n" 532 " -D, --Dump displays the raw counter values\n"
473 "--help print this help message\n" 533 " -e, --enable [all | column]\n"
474 "--list list column headers only\n" 534 " shows all or the specified disabled column\n"
475 "--out file create or truncate \"file\" for all output\n" 535 " -H, --hide [column|column,column,...]\n"
476 "--version print version information\n" 536 " hide the specified column(s)\n"
537 " -i, --interval sec.subsec\n"
538 " Override default 5-second measurement interval\n"
539 " -J, --Joules displays energy in Joules instead of Watts\n"
540 " -l, --list list column headers only\n"
541 " -n, --num_iterations num\n"
542 " number of the measurement iterations\n"
543 " -o, --out file\n"
544 " create or truncate \"file\" for all output\n"
545 " -q, --quiet skip decoding system configuration header\n"
546 " -s, --show [column|column,column,...]\n"
547 " show only the specified column(s)\n"
548 " -S, --Summary\n"
549 " limits output to 1-line system summary per interval\n"
550 " -T, --TCC temperature\n"
551 " sets the Thermal Control Circuit temperature in\n"
552 " degrees Celsius\n"
553 " -h, --help print this help message\n"
554 " -v, --version print version information\n"
477 "\n" 555 "\n"
478 "For more help, run \"man turbostat\"\n"); 556 "For more help, run \"man turbostat\"\n");
479} 557}
@@ -496,6 +574,9 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
496 if (comma) 574 if (comma)
497 *comma = '\0'; 575 *comma = '\0';
498 576
577 if (!strcmp(name_list, "all"))
578 return ~0;
579
499 for (i = 0; i < MAX_BIC; ++i) { 580 for (i = 0; i < MAX_BIC; ++i) {
500 if (!strcmp(name_list, bic[i].name)) { 581 if (!strcmp(name_list, bic[i].name)) {
501 retval |= (1ULL << i); 582 retval |= (1ULL << i);
@@ -532,14 +613,22 @@ void print_header(char *delim)
532 struct msr_counter *mp; 613 struct msr_counter *mp;
533 int printed = 0; 614 int printed = 0;
534 615
535 if (debug) 616 if (DO_BIC(BIC_USEC))
536 outp += sprintf(outp, "usec %s", delim); 617 outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
618 if (DO_BIC(BIC_TOD))
619 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
537 if (DO_BIC(BIC_Package)) 620 if (DO_BIC(BIC_Package))
538 outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); 621 outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
622 if (DO_BIC(BIC_Node))
623 outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
539 if (DO_BIC(BIC_Core)) 624 if (DO_BIC(BIC_Core))
540 outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); 625 outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
541 if (DO_BIC(BIC_CPU)) 626 if (DO_BIC(BIC_CPU))
542 outp += sprintf(outp, "%sCPU", (printed++ ? delim : "")); 627 outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
628 if (DO_BIC(BIC_APIC))
629 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
630 if (DO_BIC(BIC_X2APIC))
631 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
543 if (DO_BIC(BIC_Avg_MHz)) 632 if (DO_BIC(BIC_Avg_MHz))
544 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : "")); 633 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
545 if (DO_BIC(BIC_Busy)) 634 if (DO_BIC(BIC_Busy))
@@ -576,7 +665,7 @@ void print_header(char *delim)
576 665
577 if (DO_BIC(BIC_CPU_c1)) 666 if (DO_BIC(BIC_CPU_c1))
578 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); 667 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
579 if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) 668 if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
580 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); 669 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
581 if (DO_BIC(BIC_CPU_c6)) 670 if (DO_BIC(BIC_CPU_c6))
582 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); 671 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
@@ -635,6 +724,10 @@ void print_header(char *delim)
635 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); 724 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
636 if (DO_BIC(BIC_Pkgpc10)) 725 if (DO_BIC(BIC_Pkgpc10))
637 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); 726 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
727 if (DO_BIC(BIC_CPU_LPI))
728 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
729 if (DO_BIC(BIC_SYS_LPI))
730 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
638 731
639 if (do_rapl && !rapl_joules) { 732 if (do_rapl && !rapl_joules) {
640 if (DO_BIC(BIC_PkgWatt)) 733 if (DO_BIC(BIC_PkgWatt))
@@ -739,6 +832,9 @@ int dump_counters(struct thread_data *t, struct core_data *c,
739 outp += sprintf(outp, "pc8: %016llX\n", p->pc8); 832 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
740 outp += sprintf(outp, "pc9: %016llX\n", p->pc9); 833 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
741 outp += sprintf(outp, "pc10: %016llX\n", p->pc10); 834 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
835 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
836 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
837 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
742 outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg); 838 outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
743 outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores); 839 outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
744 outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx); 840 outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
@@ -786,7 +882,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
786 (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) 882 (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
787 return 0; 883 return 0;
788 884
789 if (debug) { 885 if (DO_BIC(BIC_USEC)) {
790 /* on each row, print how many usec each timestamp took to gather */ 886 /* on each row, print how many usec each timestamp took to gather */
791 struct timeval tv; 887 struct timeval tv;
792 888
@@ -794,6 +890,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
794 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); 890 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
795 } 891 }
796 892
893 /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
894 if (DO_BIC(BIC_TOD))
895 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
896
797 interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; 897 interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
798 898
799 tsc = t->tsc * tsc_tweak; 899 tsc = t->tsc * tsc_tweak;
@@ -802,10 +902,16 @@ int format_counters(struct thread_data *t, struct core_data *c,
802 if (t == &average.threads) { 902 if (t == &average.threads) {
803 if (DO_BIC(BIC_Package)) 903 if (DO_BIC(BIC_Package))
804 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 904 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
905 if (DO_BIC(BIC_Node))
906 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
805 if (DO_BIC(BIC_Core)) 907 if (DO_BIC(BIC_Core))
806 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 908 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
807 if (DO_BIC(BIC_CPU)) 909 if (DO_BIC(BIC_CPU))
808 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 910 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
911 if (DO_BIC(BIC_APIC))
912 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
913 if (DO_BIC(BIC_X2APIC))
914 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
809 } else { 915 } else {
810 if (DO_BIC(BIC_Package)) { 916 if (DO_BIC(BIC_Package)) {
811 if (p) 917 if (p)
@@ -813,6 +919,15 @@ int format_counters(struct thread_data *t, struct core_data *c,
813 else 919 else
814 outp += sprintf(outp, "%s-", (printed++ ? delim : "")); 920 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
815 } 921 }
922 if (DO_BIC(BIC_Node)) {
923 if (t)
924 outp += sprintf(outp, "%s%d",
925 (printed++ ? delim : ""),
926 cpus[t->cpu_id].physical_node_id);
927 else
928 outp += sprintf(outp, "%s-",
929 (printed++ ? delim : ""));
930 }
816 if (DO_BIC(BIC_Core)) { 931 if (DO_BIC(BIC_Core)) {
817 if (c) 932 if (c)
818 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); 933 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
@@ -821,6 +936,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
821 } 936 }
822 if (DO_BIC(BIC_CPU)) 937 if (DO_BIC(BIC_CPU))
823 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id); 938 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
939 if (DO_BIC(BIC_APIC))
940 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
941 if (DO_BIC(BIC_X2APIC))
942 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
824 } 943 }
825 944
826 if (DO_BIC(BIC_Avg_MHz)) 945 if (DO_BIC(BIC_Avg_MHz))
@@ -882,7 +1001,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
882 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 1001 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
883 goto done; 1002 goto done;
884 1003
885 if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) 1004 if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
886 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc); 1005 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
887 if (DO_BIC(BIC_CPU_c6)) 1006 if (DO_BIC(BIC_CPU_c6))
888 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc); 1007 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
@@ -959,6 +1078,11 @@ int format_counters(struct thread_data *t, struct core_data *c,
959 if (DO_BIC(BIC_Pkgpc10)) 1078 if (DO_BIC(BIC_Pkgpc10))
960 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc); 1079 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
961 1080
1081 if (DO_BIC(BIC_CPU_LPI))
1082 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1083 if (DO_BIC(BIC_SYS_LPI))
1084 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1085
962 /* 1086 /*
963 * If measurement interval exceeds minimum RAPL Joule Counter range, 1087 * If measurement interval exceeds minimum RAPL Joule Counter range,
964 * indicate that results are suspect by printing "**" in fraction place. 1088 * indicate that results are suspect by printing "**" in fraction place.
@@ -1006,7 +1130,8 @@ int format_counters(struct thread_data *t, struct core_data *c,
1006 } 1130 }
1007 1131
1008done: 1132done:
1009 outp += sprintf(outp, "\n"); 1133 if (*(outp - 1) != '\n')
1134 outp += sprintf(outp, "\n");
1010 1135
1011 return 0; 1136 return 0;
1012} 1137}
@@ -1083,6 +1208,8 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
1083 old->pc8 = new->pc8 - old->pc8; 1208 old->pc8 = new->pc8 - old->pc8;
1084 old->pc9 = new->pc9 - old->pc9; 1209 old->pc9 = new->pc9 - old->pc9;
1085 old->pc10 = new->pc10 - old->pc10; 1210 old->pc10 = new->pc10 - old->pc10;
1211 old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1212 old->sys_lpi = new->sys_lpi - old->sys_lpi;
1086 old->pkg_temp_c = new->pkg_temp_c; 1213 old->pkg_temp_c = new->pkg_temp_c;
1087 1214
1088 /* flag an error when rc6 counter resets/wraps */ 1215 /* flag an error when rc6 counter resets/wraps */
@@ -1140,6 +1267,21 @@ delta_thread(struct thread_data *new, struct thread_data *old,
1140 int i; 1267 int i;
1141 struct msr_counter *mp; 1268 struct msr_counter *mp;
1142 1269
1270 /* we run cpuid just the 1st time, copy the results */
1271 if (DO_BIC(BIC_APIC))
1272 new->apic_id = old->apic_id;
1273 if (DO_BIC(BIC_X2APIC))
1274 new->x2apic_id = old->x2apic_id;
1275
1276 /*
1277 * the timestamps from start of measurement interval are in "old"
1278 * the timestamp from end of measurement interval are in "new"
1279 * over-write old w/ new so we can print end of interval values
1280 */
1281
1282 old->tv_begin = new->tv_begin;
1283 old->tv_end = new->tv_end;
1284
1143 old->tsc = new->tsc - old->tsc; 1285 old->tsc = new->tsc - old->tsc;
1144 1286
1145 /* check for TSC < 1 Mcycles over interval */ 1287 /* check for TSC < 1 Mcycles over interval */
@@ -1228,6 +1370,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
1228 int i; 1370 int i;
1229 struct msr_counter *mp; 1371 struct msr_counter *mp;
1230 1372
1373 t->tv_begin.tv_sec = 0;
1374 t->tv_begin.tv_usec = 0;
1375 t->tv_end.tv_sec = 0;
1376 t->tv_end.tv_usec = 0;
1377
1231 t->tsc = 0; 1378 t->tsc = 0;
1232 t->aperf = 0; 1379 t->aperf = 0;
1233 t->mperf = 0; 1380 t->mperf = 0;
@@ -1260,6 +1407,8 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
1260 p->pc8 = 0; 1407 p->pc8 = 0;
1261 p->pc9 = 0; 1408 p->pc9 = 0;
1262 p->pc10 = 0; 1409 p->pc10 = 0;
1410 p->cpu_lpi = 0;
1411 p->sys_lpi = 0;
1263 1412
1264 p->energy_pkg = 0; 1413 p->energy_pkg = 0;
1265 p->energy_dram = 0; 1414 p->energy_dram = 0;
@@ -1286,6 +1435,19 @@ int sum_counters(struct thread_data *t, struct core_data *c,
1286 int i; 1435 int i;
1287 struct msr_counter *mp; 1436 struct msr_counter *mp;
1288 1437
1438 /* copy un-changing apic_id's */
1439 if (DO_BIC(BIC_APIC))
1440 average.threads.apic_id = t->apic_id;
1441 if (DO_BIC(BIC_X2APIC))
1442 average.threads.x2apic_id = t->x2apic_id;
1443
1444 /* remember first tv_begin */
1445 if (average.threads.tv_begin.tv_sec == 0)
1446 average.threads.tv_begin = t->tv_begin;
1447
1448 /* remember last tv_end */
1449 average.threads.tv_end = t->tv_end;
1450
1289 average.threads.tsc += t->tsc; 1451 average.threads.tsc += t->tsc;
1290 average.threads.aperf += t->aperf; 1452 average.threads.aperf += t->aperf;
1291 average.threads.mperf += t->mperf; 1453 average.threads.mperf += t->mperf;
@@ -1341,6 +1503,9 @@ int sum_counters(struct thread_data *t, struct core_data *c,
1341 average.packages.pc9 += p->pc9; 1503 average.packages.pc9 += p->pc9;
1342 average.packages.pc10 += p->pc10; 1504 average.packages.pc10 += p->pc10;
1343 1505
1506 average.packages.cpu_lpi = p->cpu_lpi;
1507 average.packages.sys_lpi = p->sys_lpi;
1508
1344 average.packages.energy_pkg += p->energy_pkg; 1509 average.packages.energy_pkg += p->energy_pkg;
1345 average.packages.energy_dram += p->energy_dram; 1510 average.packages.energy_dram += p->energy_dram;
1346 average.packages.energy_cores += p->energy_cores; 1511 average.packages.energy_cores += p->energy_cores;
@@ -1487,7 +1652,7 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1487 if (get_msr(cpu, mp->msr_num, counterp)) 1652 if (get_msr(cpu, mp->msr_num, counterp))
1488 return -1; 1653 return -1;
1489 } else { 1654 } else {
1490 char path[128]; 1655 char path[128 + PATH_BYTES];
1491 1656
1492 if (mp->flags & SYSFS_PERCPU) { 1657 if (mp->flags & SYSFS_PERCPU) {
1493 sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", 1658 sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
@@ -1502,6 +1667,34 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1502 return 0; 1667 return 0;
1503} 1668}
1504 1669
1670void get_apic_id(struct thread_data *t)
1671{
1672 unsigned int eax, ebx, ecx, edx, max_level;
1673
1674 eax = ebx = ecx = edx = 0;
1675
1676 if (!genuine_intel)
1677 return;
1678
1679 __cpuid(0, max_level, ebx, ecx, edx);
1680
1681 __cpuid(1, eax, ebx, ecx, edx);
1682 t->apic_id = (ebx >> 24) & 0xf;
1683
1684 if (max_level < 0xb)
1685 return;
1686
1687 if (!DO_BIC(BIC_X2APIC))
1688 return;
1689
1690 ecx = 0;
1691 __cpuid(0xb, eax, ebx, ecx, edx);
1692 t->x2apic_id = edx;
1693
1694 if (debug && (t->apic_id != t->x2apic_id))
1695 fprintf(stderr, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
1696}
1697
1505/* 1698/*
1506 * get_counters(...) 1699 * get_counters(...)
1507 * migrate to cpu 1700 * migrate to cpu
@@ -1515,7 +1708,6 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1515 struct msr_counter *mp; 1708 struct msr_counter *mp;
1516 int i; 1709 int i;
1517 1710
1518
1519 gettimeofday(&t->tv_begin, (struct timezone *)NULL); 1711 gettimeofday(&t->tv_begin, (struct timezone *)NULL);
1520 1712
1521 if (cpu_migrate(cpu)) { 1713 if (cpu_migrate(cpu)) {
@@ -1523,6 +1715,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1523 return -1; 1715 return -1;
1524 } 1716 }
1525 1717
1718 if (first_counter_read)
1719 get_apic_id(t);
1526retry: 1720retry:
1527 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 1721 t->tsc = rdtsc(); /* we are running on local CPU of interest */
1528 1722
@@ -1603,7 +1797,7 @@ retry:
1603 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 1797 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1604 goto done; 1798 goto done;
1605 1799
1606 if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) { 1800 if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) {
1607 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) 1801 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1608 return -6; 1802 return -6;
1609 } 1803 }
@@ -1684,6 +1878,11 @@ retry:
1684 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10)) 1878 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1685 return -13; 1879 return -13;
1686 1880
1881 if (DO_BIC(BIC_CPU_LPI))
1882 p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
1883 if (DO_BIC(BIC_SYS_LPI))
1884 p->sys_lpi = cpuidle_cur_sys_lpi_us;
1885
1687 if (do_rapl & RAPL_PKG) { 1886 if (do_rapl & RAPL_PKG) {
1688 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) 1887 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1689 return -13; 1888 return -13;
@@ -1769,7 +1968,7 @@ int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV,
1769int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 1968int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1770int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 1969int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1771int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 1970int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1772int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 1971int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1773 1972
1774 1973
1775static void 1974static void
@@ -2071,12 +2270,9 @@ dump_nhm_cst_cfg(void)
2071 2270
2072 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); 2271 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2073 2272
2074#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
2075#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
2076
2077 fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); 2273 fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2078 2274
2079 fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", 2275 fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2080 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", 2276 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2081 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", 2277 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2082 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", 2278 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
@@ -2084,6 +2280,15 @@ dump_nhm_cst_cfg(void)
2084 (msr & (1 << 15)) ? "" : "UN", 2280 (msr & (1 << 15)) ? "" : "UN",
2085 (unsigned int)msr & 0xF, 2281 (unsigned int)msr & 0xF,
2086 pkg_cstate_limit_strings[pkg_cstate_limit]); 2282 pkg_cstate_limit_strings[pkg_cstate_limit]);
2283
2284#define AUTOMATIC_CSTATE_CONVERSION (1UL << 16)
2285 if (has_automatic_cstate_conversion) {
2286 fprintf(outf, ", automatic c-state conversion=%s",
2287 (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2288 }
2289
2290 fprintf(outf, ")\n");
2291
2087 return; 2292 return;
2088} 2293}
2089 2294
@@ -2184,6 +2389,8 @@ void free_fd_percpu(void)
2184 2389
2185void free_all_buffers(void) 2390void free_all_buffers(void)
2186{ 2391{
2392 int i;
2393
2187 CPU_FREE(cpu_present_set); 2394 CPU_FREE(cpu_present_set);
2188 cpu_present_set = NULL; 2395 cpu_present_set = NULL;
2189 cpu_present_setsize = 0; 2396 cpu_present_setsize = 0;
@@ -2216,6 +2423,12 @@ void free_all_buffers(void)
2216 2423
2217 free(irq_column_2_cpu); 2424 free(irq_column_2_cpu);
2218 free(irqs_per_cpu); 2425 free(irqs_per_cpu);
2426
2427 for (i = 0; i <= topo.max_cpu_num; ++i) {
2428 if (cpus[i].put_ids)
2429 CPU_FREE(cpus[i].put_ids);
2430 }
2431 free(cpus);
2219} 2432}
2220 2433
2221 2434
@@ -2240,44 +2453,6 @@ int parse_int_file(const char *fmt, ...)
2240} 2453}
2241 2454
2242/* 2455/*
2243 * get_cpu_position_in_core(cpu)
2244 * return the position of the CPU among its HT siblings in the core
2245 * return -1 if the sibling is not in list
2246 */
2247int get_cpu_position_in_core(int cpu)
2248{
2249 char path[64];
2250 FILE *filep;
2251 int this_cpu;
2252 char character;
2253 int i;
2254
2255 sprintf(path,
2256 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
2257 cpu);
2258 filep = fopen(path, "r");
2259 if (filep == NULL) {
2260 perror(path);
2261 exit(1);
2262 }
2263
2264 for (i = 0; i < topo.num_threads_per_core; i++) {
2265 fscanf(filep, "%d", &this_cpu);
2266 if (this_cpu == cpu) {
2267 fclose(filep);
2268 return i;
2269 }
2270
2271 /* Account for no separator after last thread*/
2272 if (i != (topo.num_threads_per_core - 1))
2273 fscanf(filep, "%c", &character);
2274 }
2275
2276 fclose(filep);
2277 return -1;
2278}
2279
2280/*
2281 * cpu_is_first_core_in_package(cpu) 2456 * cpu_is_first_core_in_package(cpu)
2282 * return 1 if given CPU is 1st core in package 2457 * return 1 if given CPU is 1st core in package
2283 */ 2458 */
@@ -2296,35 +2471,121 @@ int get_core_id(int cpu)
2296 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); 2471 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2297} 2472}
2298 2473
2299int get_num_ht_siblings(int cpu) 2474void set_node_data(void)
2300{ 2475{
2301 char path[80]; 2476 char path[80];
2302 FILE *filep; 2477 FILE *filep;
2303 int sib1; 2478 int pkg, node, cpu;
2304 int matches = 0;
2305 char character;
2306 char str[100];
2307 char *ch;
2308 2479
2309 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); 2480 struct pkg_node_info {
2310 filep = fopen_or_die(path, "r"); 2481 int count;
2482 int min;
2483 } *pni;
2311 2484
2312 /* 2485 pni = calloc(topo.num_packages, sizeof(struct pkg_node_info));
2313 * file format: 2486 if (!pni)
2314 * A ',' separated or '-' separated set of numbers 2487 err(1, "calloc pkg_node_count");
2315 * (eg 1-2 or 1,3,4,5) 2488
2489 for (pkg = 0; pkg < topo.num_packages; pkg++)
2490 pni[pkg].min = topo.num_cpus;
2491
2492 for (node = 0; node <= topo.max_node_num; node++) {
2493 /* find the "first" cpu in the node */
2494 sprintf(path, "/sys/bus/node/devices/node%d/cpulist", node);
2495 filep = fopen(path, "r");
2496 if (!filep)
2497 continue;
2498 fscanf(filep, "%d", &cpu);
2499 fclose(filep);
2500
2501 pkg = cpus[cpu].physical_package_id;
2502 pni[pkg].count++;
2503
2504 if (node < pni[pkg].min)
2505 pni[pkg].min = node;
2506 }
2507
2508 for (pkg = 0; pkg < topo.num_packages; pkg++)
2509 if (pni[pkg].count > topo.nodes_per_pkg)
2510 topo.nodes_per_pkg = pni[0].count;
2511
2512 /* Fake 1 node per pkg for machines that don't
2513 * expose nodes and thus avoid -nan results
2316 */ 2514 */
2317 fscanf(filep, "%d%c\n", &sib1, &character); 2515 if (topo.nodes_per_pkg == 0)
2318 fseek(filep, 0, SEEK_SET); 2516 topo.nodes_per_pkg = 1;
2319 fgets(str, 100, filep); 2517
2320 ch = strchr(str, character); 2518 for (cpu = 0; cpu < topo.num_cpus; cpu++) {
2321 while (ch != NULL) { 2519 pkg = cpus[cpu].physical_package_id;
2322 matches++; 2520 node = cpus[cpu].physical_node_id;
2323 ch = strchr(ch+1, character); 2521 cpus[cpu].logical_node_id = node - pni[pkg].min;
2522 }
2523 free(pni);
2524
2525}
2526
2527int get_physical_node_id(struct cpu_topology *thiscpu)
2528{
2529 char path[80];
2530 FILE *filep;
2531 int i;
2532 int cpu = thiscpu->logical_cpu_id;
2533
2534 for (i = 0; i <= topo.max_cpu_num; i++) {
2535 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
2536 cpu, i);
2537 filep = fopen(path, "r");
2538 if (!filep)
2539 continue;
2540 fclose(filep);
2541 return i;
2324 } 2542 }
2543 return -1;
2544}
2545
2546int get_thread_siblings(struct cpu_topology *thiscpu)
2547{
2548 char path[80], character;
2549 FILE *filep;
2550 unsigned long map;
2551 int so, shift, sib_core;
2552 int cpu = thiscpu->logical_cpu_id;
2553 int offset = topo.max_cpu_num + 1;
2554 size_t size;
2555 int thread_id = 0;
2556
2557 thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2558 if (thiscpu->thread_id < 0)
2559 thiscpu->thread_id = thread_id++;
2560 if (!thiscpu->put_ids)
2561 return -1;
2325 2562
2563 size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2564 CPU_ZERO_S(size, thiscpu->put_ids);
2565
2566 sprintf(path,
2567 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2568 filep = fopen_or_die(path, "r");
2569 do {
2570 offset -= BITMASK_SIZE;
2571 fscanf(filep, "%lx%c", &map, &character);
2572 for (shift = 0; shift < BITMASK_SIZE; shift++) {
2573 if ((map >> shift) & 0x1) {
2574 so = shift + offset;
2575 sib_core = get_core_id(so);
2576 if (sib_core == thiscpu->physical_core_id) {
2577 CPU_SET_S(so, size, thiscpu->put_ids);
2578 if ((so != cpu) &&
2579 (cpus[so].thread_id < 0))
2580 cpus[so].thread_id =
2581 thread_id++;
2582 }
2583 }
2584 }
2585 } while (!strncmp(&character, ",", 1));
2326 fclose(filep); 2586 fclose(filep);
2327 return matches+1; 2587
2588 return CPU_COUNT_S(size, thiscpu->put_ids);
2328} 2589}
2329 2590
2330/* 2591/*
@@ -2339,32 +2600,42 @@ int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2339 struct thread_data *thread_base2, struct core_data *core_base2, 2600 struct thread_data *thread_base2, struct core_data *core_base2,
2340 struct pkg_data *pkg_base2) 2601 struct pkg_data *pkg_base2)
2341{ 2602{
2342 int retval, pkg_no, core_no, thread_no; 2603 int retval, pkg_no, node_no, core_no, thread_no;
2343 2604
2344 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 2605 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2345 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { 2606 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
2346 for (thread_no = 0; thread_no < 2607 for (core_no = 0; core_no < topo.cores_per_node;
2347 topo.num_threads_per_core; ++thread_no) { 2608 ++core_no) {
2348 struct thread_data *t, *t2; 2609 for (thread_no = 0; thread_no <
2349 struct core_data *c, *c2; 2610 topo.threads_per_core; ++thread_no) {
2350 struct pkg_data *p, *p2; 2611 struct thread_data *t, *t2;
2351 2612 struct core_data *c, *c2;
2352 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); 2613 struct pkg_data *p, *p2;
2353 2614
2354 if (cpu_is_not_present(t->cpu_id)) 2615 t = GET_THREAD(thread_base, thread_no,
2355 continue; 2616 core_no, node_no,
2356 2617 pkg_no);
2357 t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); 2618
2358 2619 if (cpu_is_not_present(t->cpu_id))
2359 c = GET_CORE(core_base, core_no, pkg_no); 2620 continue;
2360 c2 = GET_CORE(core_base2, core_no, pkg_no); 2621
2361 2622 t2 = GET_THREAD(thread_base2, thread_no,
2362 p = GET_PKG(pkg_base, pkg_no); 2623 core_no, node_no,
2363 p2 = GET_PKG(pkg_base2, pkg_no); 2624 pkg_no);
2364 2625
2365 retval = func(t, c, p, t2, c2, p2); 2626 c = GET_CORE(core_base, core_no,
2366 if (retval) 2627 node_no, pkg_no);
2367 return retval; 2628 c2 = GET_CORE(core_base2, core_no,
2629 node_no,
2630 pkg_no);
2631
2632 p = GET_PKG(pkg_base, pkg_no);
2633 p2 = GET_PKG(pkg_base2, pkg_no);
2634
2635 retval = func(t, c, p, t2, c2, p2);
2636 if (retval)
2637 return retval;
2638 }
2368 } 2639 }
2369 } 2640 }
2370 } 2641 }
@@ -2409,6 +2680,20 @@ void re_initialize(void)
2409 printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); 2680 printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2410} 2681}
2411 2682
2683void set_max_cpu_num(void)
2684{
2685 FILE *filep;
2686 unsigned long dummy;
2687
2688 topo.max_cpu_num = 0;
2689 filep = fopen_or_die(
2690 "/sys/devices/system/cpu/cpu0/topology/thread_siblings",
2691 "r");
2692 while (fscanf(filep, "%lx,", &dummy) == 1)
2693 topo.max_cpu_num += BITMASK_SIZE;
2694 fclose(filep);
2695 topo.max_cpu_num--; /* 0 based */
2696}
2412 2697
2413/* 2698/*
2414 * count_cpus() 2699 * count_cpus()
@@ -2416,10 +2701,7 @@ void re_initialize(void)
2416 */ 2701 */
2417int count_cpus(int cpu) 2702int count_cpus(int cpu)
2418{ 2703{
2419 if (topo.max_cpu_num < cpu) 2704 topo.num_cpus++;
2420 topo.max_cpu_num = cpu;
2421
2422 topo.num_cpus += 1;
2423 return 0; 2705 return 0;
2424} 2706}
2425int mark_cpu_present(int cpu) 2707int mark_cpu_present(int cpu)
@@ -2428,6 +2710,12 @@ int mark_cpu_present(int cpu)
2428 return 0; 2710 return 0;
2429} 2711}
2430 2712
2713int init_thread_id(int cpu)
2714{
2715 cpus[cpu].thread_id = -1;
2716 return 0;
2717}
2718
2431/* 2719/*
2432 * snapshot_proc_interrupts() 2720 * snapshot_proc_interrupts()
2433 * 2721 *
@@ -2542,6 +2830,52 @@ int snapshot_gfx_mhz(void)
2542} 2830}
2543 2831
2544/* 2832/*
2833 * snapshot_cpu_lpi()
2834 *
2835 * record snapshot of
2836 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
2837 *
2838 * return 1 if config change requires a restart, else return 0
2839 */
2840int snapshot_cpu_lpi_us(void)
2841{
2842 FILE *fp;
2843 int retval;
2844
2845 fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
2846
2847 retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
2848 if (retval != 1)
2849 err(1, "CPU LPI");
2850
2851 fclose(fp);
2852
2853 return 0;
2854}
2855/*
2856 * snapshot_sys_lpi()
2857 *
2858 * record snapshot of
2859 * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
2860 *
2861 * return 1 if config change requires a restart, else return 0
2862 */
2863int snapshot_sys_lpi_us(void)
2864{
2865 FILE *fp;
2866 int retval;
2867
2868 fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
2869
2870 retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
2871 if (retval != 1)
2872 err(1, "SYS LPI");
2873
2874 fclose(fp);
2875
2876 return 0;
2877}
2878/*
2545 * snapshot /proc and /sys files 2879 * snapshot /proc and /sys files
2546 * 2880 *
2547 * return 1 if configuration restart needed, else return 0 2881 * return 1 if configuration restart needed, else return 0
@@ -2558,19 +2892,91 @@ int snapshot_proc_sysfs_files(void)
2558 if (DO_BIC(BIC_GFXMHz)) 2892 if (DO_BIC(BIC_GFXMHz))
2559 snapshot_gfx_mhz(); 2893 snapshot_gfx_mhz();
2560 2894
2895 if (DO_BIC(BIC_CPU_LPI))
2896 snapshot_cpu_lpi_us();
2897
2898 if (DO_BIC(BIC_SYS_LPI))
2899 snapshot_sys_lpi_us();
2900
2561 return 0; 2901 return 0;
2562} 2902}
2563 2903
2904int exit_requested;
2905
2906static void signal_handler (int signal)
2907{
2908 switch (signal) {
2909 case SIGINT:
2910 exit_requested = 1;
2911 if (debug)
2912 fprintf(stderr, " SIGINT\n");
2913 break;
2914 case SIGUSR1:
2915 if (debug > 1)
2916 fprintf(stderr, "SIGUSR1\n");
2917 break;
2918 }
2919 /* make sure this manually-invoked interval is at least 1ms long */
2920 nanosleep(&one_msec, NULL);
2921}
2922
2923void setup_signal_handler(void)
2924{
2925 struct sigaction sa;
2926
2927 memset(&sa, 0, sizeof(sa));
2928
2929 sa.sa_handler = &signal_handler;
2930
2931 if (sigaction(SIGINT, &sa, NULL) < 0)
2932 err(1, "sigaction SIGINT");
2933 if (sigaction(SIGUSR1, &sa, NULL) < 0)
2934 err(1, "sigaction SIGUSR1");
2935}
2936
2937void do_sleep(void)
2938{
2939 struct timeval select_timeout;
2940 fd_set readfds;
2941 int retval;
2942
2943 FD_ZERO(&readfds);
2944 FD_SET(0, &readfds);
2945
2946 if (!isatty(fileno(stdin))) {
2947 nanosleep(&interval_ts, NULL);
2948 return;
2949 }
2950
2951 select_timeout = interval_tv;
2952 retval = select(1, &readfds, NULL, NULL, &select_timeout);
2953
2954 if (retval == 1) {
2955 switch (getc(stdin)) {
2956 case 'q':
2957 exit_requested = 1;
2958 break;
2959 }
2960 /* make sure this manually-invoked interval is at least 1ms long */
2961 nanosleep(&one_msec, NULL);
2962 }
2963}
2964
2965
2564void turbostat_loop() 2966void turbostat_loop()
2565{ 2967{
2566 int retval; 2968 int retval;
2567 int restarted = 0; 2969 int restarted = 0;
2970 int done_iters = 0;
2971
2972 setup_signal_handler();
2568 2973
2569restart: 2974restart:
2570 restarted++; 2975 restarted++;
2571 2976
2572 snapshot_proc_sysfs_files(); 2977 snapshot_proc_sysfs_files();
2573 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 2978 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2979 first_counter_read = 0;
2574 if (retval < -1) { 2980 if (retval < -1) {
2575 exit(retval); 2981 exit(retval);
2576 } else if (retval == -1) { 2982 } else if (retval == -1) {
@@ -2581,6 +2987,7 @@ restart:
2581 goto restart; 2987 goto restart;
2582 } 2988 }
2583 restarted = 0; 2989 restarted = 0;
2990 done_iters = 0;
2584 gettimeofday(&tv_even, (struct timezone *)NULL); 2991 gettimeofday(&tv_even, (struct timezone *)NULL);
2585 2992
2586 while (1) { 2993 while (1) {
@@ -2588,7 +2995,7 @@ restart:
2588 re_initialize(); 2995 re_initialize();
2589 goto restart; 2996 goto restart;
2590 } 2997 }
2591 nanosleep(&interval_ts, NULL); 2998 do_sleep();
2592 if (snapshot_proc_sysfs_files()) 2999 if (snapshot_proc_sysfs_files())
2593 goto restart; 3000 goto restart;
2594 retval = for_all_cpus(get_counters, ODD_COUNTERS); 3001 retval = for_all_cpus(get_counters, ODD_COUNTERS);
@@ -2607,7 +3014,11 @@ restart:
2607 compute_average(EVEN_COUNTERS); 3014 compute_average(EVEN_COUNTERS);
2608 format_all_counters(EVEN_COUNTERS); 3015 format_all_counters(EVEN_COUNTERS);
2609 flush_output_stdout(); 3016 flush_output_stdout();
2610 nanosleep(&interval_ts, NULL); 3017 if (exit_requested)
3018 break;
3019 if (num_iterations && ++done_iters >= num_iterations)
3020 break;
3021 do_sleep();
2611 if (snapshot_proc_sysfs_files()) 3022 if (snapshot_proc_sysfs_files())
2612 goto restart; 3023 goto restart;
2613 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 3024 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
@@ -2626,6 +3037,10 @@ restart:
2626 compute_average(ODD_COUNTERS); 3037 compute_average(ODD_COUNTERS);
2627 format_all_counters(ODD_COUNTERS); 3038 format_all_counters(ODD_COUNTERS);
2628 flush_output_stdout(); 3039 flush_output_stdout();
3040 if (exit_requested)
3041 break;
3042 if (num_iterations && ++done_iters >= num_iterations)
3043 break;
2629 } 3044 }
2630} 3045}
2631 3046
@@ -2740,6 +3155,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
2740 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ 3155 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
2741 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ 3156 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
2742 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ 3157 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
3158 case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
2743 pkg_cstate_limits = hsw_pkg_cstate_limits; 3159 pkg_cstate_limits = hsw_pkg_cstate_limits;
2744 has_misc_feature_control = 1; 3160 has_misc_feature_control = 1;
2745 break; 3161 break;
@@ -2945,6 +3361,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
2945 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ 3361 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
2946 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ 3362 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
2947 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ 3363 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
3364 case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
2948 case INTEL_FAM6_SKYLAKE_X: /* SKX */ 3365 case INTEL_FAM6_SKYLAKE_X: /* SKX */
2949 3366
2950 case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ 3367 case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */
@@ -3399,6 +3816,7 @@ void rapl_probe(unsigned int family, unsigned int model)
3399 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ 3816 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
3400 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ 3817 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
3401 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ 3818 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
3819 case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
3402 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; 3820 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3403 BIC_PRESENT(BIC_PKG__); 3821 BIC_PRESENT(BIC_PKG__);
3404 BIC_PRESENT(BIC_RAM__); 3822 BIC_PRESENT(BIC_RAM__);
@@ -3523,6 +3941,12 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
3523 } 3941 }
3524} 3942}
3525 3943
3944void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
3945{
3946 if (is_skx(family, model) || is_bdx(family, model))
3947 has_automatic_cstate_conversion = 1;
3948}
3949
3526int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) 3950int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3527{ 3951{
3528 unsigned long long msr; 3952 unsigned long long msr;
@@ -3728,6 +4152,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
3728 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ 4152 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
3729 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ 4153 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
3730 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ 4154 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
4155 case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
3731 case INTEL_FAM6_SKYLAKE_X: /* SKX */ 4156 case INTEL_FAM6_SKYLAKE_X: /* SKX */
3732 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ 4157 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3733 case INTEL_FAM6_ATOM_GEMINI_LAKE: 4158 case INTEL_FAM6_ATOM_GEMINI_LAKE:
@@ -3761,6 +4186,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
3761 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ 4186 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
3762 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ 4187 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
3763 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ 4188 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
4189 case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
3764 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ 4190 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3765 case INTEL_FAM6_ATOM_GEMINI_LAKE: 4191 case INTEL_FAM6_ATOM_GEMINI_LAKE:
3766 return 1; 4192 return 1;
@@ -3786,6 +4212,7 @@ int has_skl_msrs(unsigned int family, unsigned int model)
3786 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */ 4212 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
3787 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */ 4213 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
3788 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ 4214 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
4215 case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
3789 return 1; 4216 return 1;
3790 } 4217 }
3791 return 0; 4218 return 0;
@@ -3815,6 +4242,19 @@ int is_knl(unsigned int family, unsigned int model)
3815 return 0; 4242 return 0;
3816} 4243}
3817 4244
4245int is_cnl(unsigned int family, unsigned int model)
4246{
4247 if (!genuine_intel)
4248 return 0;
4249
4250 switch (model) {
4251 case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
4252 return 1;
4253 }
4254
4255 return 0;
4256}
4257
3818unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model) 4258unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
3819{ 4259{
3820 if (is_knl(family, model)) 4260 if (is_knl(family, model))
@@ -3947,7 +4387,7 @@ void decode_misc_enable_msr(void)
3947 base_cpu, msr, 4387 base_cpu, msr,
3948 msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", 4388 msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
3949 msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", 4389 msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
3950 msr & MSR_IA32_MISC_ENABLE_MWAIT ? "No-" : "", 4390 msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
3951 msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", 4391 msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
3952 msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); 4392 msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
3953} 4393}
@@ -4037,7 +4477,7 @@ void process_cpuid()
4037 if (!quiet) { 4477 if (!quiet) {
4038 fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", 4478 fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
4039 max_level, family, model, stepping, family, model, stepping); 4479 max_level, family, model, stepping, family, model, stepping);
4040 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n", 4480 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
4041 ecx & (1 << 0) ? "SSE3" : "-", 4481 ecx & (1 << 0) ? "SSE3" : "-",
4042 ecx & (1 << 3) ? "MONITOR" : "-", 4482 ecx & (1 << 3) ? "MONITOR" : "-",
4043 ecx & (1 << 6) ? "SMX" : "-", 4483 ecx & (1 << 6) ? "SMX" : "-",
@@ -4046,6 +4486,7 @@ void process_cpuid()
4046 edx & (1 << 4) ? "TSC" : "-", 4486 edx & (1 << 4) ? "TSC" : "-",
4047 edx & (1 << 5) ? "MSR" : "-", 4487 edx & (1 << 5) ? "MSR" : "-",
4048 edx & (1 << 22) ? "ACPI-TM" : "-", 4488 edx & (1 << 22) ? "ACPI-TM" : "-",
4489 edx & (1 << 28) ? "HT" : "-",
4049 edx & (1 << 29) ? "TM" : "-"); 4490 edx & (1 << 29) ? "TM" : "-");
4050 } 4491 }
4051 4492
@@ -4152,7 +4593,6 @@ void process_cpuid()
4152 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ 4593 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
4153 crystal_hz = 24000000; /* 24.0 MHz */ 4594 crystal_hz = 24000000; /* 24.0 MHz */
4154 break; 4595 break;
4155 case INTEL_FAM6_SKYLAKE_X: /* SKX */
4156 case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ 4596 case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
4157 crystal_hz = 25000000; /* 25.0 MHz */ 4597 crystal_hz = 25000000; /* 25.0 MHz */
4158 break; 4598 break;
@@ -4253,6 +4693,7 @@ void process_cpuid()
4253 } 4693 }
4254 do_slm_cstates = is_slm(family, model); 4694 do_slm_cstates = is_slm(family, model);
4255 do_knl_cstates = is_knl(family, model); 4695 do_knl_cstates = is_knl(family, model);
4696 do_cnl_cstates = is_cnl(family, model);
4256 4697
4257 if (!quiet) 4698 if (!quiet)
4258 decode_misc_pwr_mgmt_msr(); 4699 decode_misc_pwr_mgmt_msr();
@@ -4262,6 +4703,7 @@ void process_cpuid()
4262 4703
4263 rapl_probe(family, model); 4704 rapl_probe(family, model);
4264 perf_limit_reasons_probe(family, model); 4705 perf_limit_reasons_probe(family, model);
4706 automatic_cstate_conversion_probe(family, model);
4265 4707
4266 if (!quiet) 4708 if (!quiet)
4267 dump_cstate_pstate_config_info(family, model); 4709 dump_cstate_pstate_config_info(family, model);
@@ -4280,13 +4722,22 @@ void process_cpuid()
4280 if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) 4722 if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
4281 BIC_PRESENT(BIC_GFXMHz); 4723 BIC_PRESENT(BIC_GFXMHz);
4282 4724
4725 if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
4726 BIC_PRESENT(BIC_CPU_LPI);
4727 else
4728 BIC_NOT_PRESENT(BIC_CPU_LPI);
4729
4730 if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
4731 BIC_PRESENT(BIC_SYS_LPI);
4732 else
4733 BIC_NOT_PRESENT(BIC_SYS_LPI);
4734
4283 if (!quiet) 4735 if (!quiet)
4284 decode_misc_feature_control(); 4736 decode_misc_feature_control();
4285 4737
4286 return; 4738 return;
4287} 4739}
4288 4740
4289
4290/* 4741/*
4291 * in /dev/cpu/ return success for names that are numbers 4742 * in /dev/cpu/ return success for names that are numbers
4292 * ie. filter out ".", "..", "microcode". 4743 * ie. filter out ".", "..", "microcode".
@@ -4310,14 +4761,10 @@ void topology_probe()
4310 int max_core_id = 0; 4761 int max_core_id = 0;
4311 int max_package_id = 0; 4762 int max_package_id = 0;
4312 int max_siblings = 0; 4763 int max_siblings = 0;
4313 struct cpu_topology {
4314 int core_id;
4315 int physical_package_id;
4316 } *cpus;
4317 4764
4318 /* Initialize num_cpus, max_cpu_num */ 4765 /* Initialize num_cpus, max_cpu_num */
4766 set_max_cpu_num();
4319 topo.num_cpus = 0; 4767 topo.num_cpus = 0;
4320 topo.max_cpu_num = 0;
4321 for_all_proc_cpus(count_cpus); 4768 for_all_proc_cpus(count_cpus);
4322 if (!summary_only && topo.num_cpus > 1) 4769 if (!summary_only && topo.num_cpus > 1)
4323 BIC_PRESENT(BIC_CPU); 4770 BIC_PRESENT(BIC_CPU);
@@ -4357,6 +4804,7 @@ void topology_probe()
4357 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 4804 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4358 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 4805 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
4359 4806
4807 for_all_proc_cpus(init_thread_id);
4360 4808
4361 /* 4809 /*
4362 * For online cpus 4810 * For online cpus
@@ -4370,26 +4818,45 @@ void topology_probe()
4370 fprintf(outf, "cpu%d NOT PRESENT\n", i); 4818 fprintf(outf, "cpu%d NOT PRESENT\n", i);
4371 continue; 4819 continue;
4372 } 4820 }
4373 cpus[i].core_id = get_core_id(i);
4374 if (cpus[i].core_id > max_core_id)
4375 max_core_id = cpus[i].core_id;
4376 4821
4822 cpus[i].logical_cpu_id = i;
4823
4824 /* get package information */
4377 cpus[i].physical_package_id = get_physical_package_id(i); 4825 cpus[i].physical_package_id = get_physical_package_id(i);
4378 if (cpus[i].physical_package_id > max_package_id) 4826 if (cpus[i].physical_package_id > max_package_id)
4379 max_package_id = cpus[i].physical_package_id; 4827 max_package_id = cpus[i].physical_package_id;
4380 4828
4381 siblings = get_num_ht_siblings(i); 4829 /* get numa node information */
4830 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
4831 if (cpus[i].physical_node_id > topo.max_node_num)
4832 topo.max_node_num = cpus[i].physical_node_id;
4833
4834 /* get core information */
4835 cpus[i].physical_core_id = get_core_id(i);
4836 if (cpus[i].physical_core_id > max_core_id)
4837 max_core_id = cpus[i].physical_core_id;
4838
4839 /* get thread information */
4840 siblings = get_thread_siblings(&cpus[i]);
4382 if (siblings > max_siblings) 4841 if (siblings > max_siblings)
4383 max_siblings = siblings; 4842 max_siblings = siblings;
4843 if (cpus[i].thread_id != -1)
4844 topo.num_cores++;
4845
4384 if (debug > 1) 4846 if (debug > 1)
4385 fprintf(outf, "cpu %d pkg %d core %d\n", 4847 fprintf(outf,
4386 i, cpus[i].physical_package_id, cpus[i].core_id); 4848 "cpu %d pkg %d node %d core %d thread %d\n",
4849 i, cpus[i].physical_package_id,
4850 cpus[i].physical_node_id,
4851 cpus[i].physical_core_id,
4852 cpus[i].thread_id);
4387 } 4853 }
4388 topo.num_cores_per_pkg = max_core_id + 1; 4854
4855 topo.cores_per_node = max_core_id + 1;
4389 if (debug > 1) 4856 if (debug > 1)
4390 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", 4857 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
4391 max_core_id, topo.num_cores_per_pkg); 4858 max_core_id, topo.cores_per_node);
4392 if (!summary_only && topo.num_cores_per_pkg > 1) 4859 if (!summary_only && topo.cores_per_node > 1)
4393 BIC_PRESENT(BIC_Core); 4860 BIC_PRESENT(BIC_Core);
4394 4861
4395 topo.num_packages = max_package_id + 1; 4862 topo.num_packages = max_package_id + 1;
@@ -4399,33 +4866,38 @@ void topology_probe()
4399 if (!summary_only && topo.num_packages > 1) 4866 if (!summary_only && topo.num_packages > 1)
4400 BIC_PRESENT(BIC_Package); 4867 BIC_PRESENT(BIC_Package);
4401 4868
4402 topo.num_threads_per_core = max_siblings; 4869 set_node_data();
4403 if (debug > 1) 4870 if (debug > 1)
4404 fprintf(outf, "max_siblings %d\n", max_siblings); 4871 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
4872 if (!summary_only && topo.nodes_per_pkg > 1)
4873 BIC_PRESENT(BIC_Node);
4405 4874
4406 free(cpus); 4875 topo.threads_per_core = max_siblings;
4876 if (debug > 1)
4877 fprintf(outf, "max_siblings %d\n", max_siblings);
4407} 4878}
4408 4879
4409void 4880void
4410allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 4881allocate_counters(struct thread_data **t, struct core_data **c,
4882 struct pkg_data **p)
4411{ 4883{
4412 int i; 4884 int i;
4885 int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
4886 topo.num_packages;
4887 int num_threads = topo.threads_per_core * num_cores;
4413 4888
4414 *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * 4889 *t = calloc(num_threads, sizeof(struct thread_data));
4415 topo.num_packages, sizeof(struct thread_data));
4416 if (*t == NULL) 4890 if (*t == NULL)
4417 goto error; 4891 goto error;
4418 4892
4419 for (i = 0; i < topo.num_threads_per_core * 4893 for (i = 0; i < num_threads; i++)
4420 topo.num_cores_per_pkg * topo.num_packages; i++)
4421 (*t)[i].cpu_id = -1; 4894 (*t)[i].cpu_id = -1;
4422 4895
4423 *c = calloc(topo.num_cores_per_pkg * topo.num_packages, 4896 *c = calloc(num_cores, sizeof(struct core_data));
4424 sizeof(struct core_data));
4425 if (*c == NULL) 4897 if (*c == NULL)
4426 goto error; 4898 goto error;
4427 4899
4428 for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) 4900 for (i = 0; i < num_cores; i++)
4429 (*c)[i].core_id = -1; 4901 (*c)[i].core_id = -1;
4430 4902
4431 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 4903 *p = calloc(topo.num_packages, sizeof(struct pkg_data));
@@ -4442,47 +4914,46 @@ error:
4442/* 4914/*
4443 * init_counter() 4915 * init_counter()
4444 * 4916 *
4445 * set cpu_id, core_num, pkg_num
4446 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 4917 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
4447 *
4448 * increment topo.num_cores when 1st core in pkg seen
4449 */ 4918 */
4450void init_counter(struct thread_data *thread_base, struct core_data *core_base, 4919void init_counter(struct thread_data *thread_base, struct core_data *core_base,
4451 struct pkg_data *pkg_base, int thread_num, int core_num, 4920 struct pkg_data *pkg_base, int cpu_id)
4452 int pkg_num, int cpu_id)
4453{ 4921{
4922 int pkg_id = cpus[cpu_id].physical_package_id;
4923 int node_id = cpus[cpu_id].logical_node_id;
4924 int core_id = cpus[cpu_id].physical_core_id;
4925 int thread_id = cpus[cpu_id].thread_id;
4454 struct thread_data *t; 4926 struct thread_data *t;
4455 struct core_data *c; 4927 struct core_data *c;
4456 struct pkg_data *p; 4928 struct pkg_data *p;
4457 4929
4458 t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); 4930
4459 c = GET_CORE(core_base, core_num, pkg_num); 4931 /* Workaround for systems where physical_node_id==-1
4460 p = GET_PKG(pkg_base, pkg_num); 4932 * and logical_node_id==(-1 - topo.num_cpus)
4933 */
4934 if (node_id < 0)
4935 node_id = 0;
4936
4937 t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
4938 c = GET_CORE(core_base, core_id, node_id, pkg_id);
4939 p = GET_PKG(pkg_base, pkg_id);
4461 4940
4462 t->cpu_id = cpu_id; 4941 t->cpu_id = cpu_id;
4463 if (thread_num == 0) { 4942 if (thread_id == 0) {
4464 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; 4943 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
4465 if (cpu_is_first_core_in_package(cpu_id)) 4944 if (cpu_is_first_core_in_package(cpu_id))
4466 t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; 4945 t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
4467 } 4946 }
4468 4947
4469 c->core_id = core_num; 4948 c->core_id = core_id;
4470 p->package_id = pkg_num; 4949 p->package_id = pkg_id;
4471} 4950}
4472 4951
4473 4952
4474int initialize_counters(int cpu_id) 4953int initialize_counters(int cpu_id)
4475{ 4954{
4476 int my_thread_id, my_core_id, my_package_id; 4955 init_counter(EVEN_COUNTERS, cpu_id);
4477 4956 init_counter(ODD_COUNTERS, cpu_id);
4478 my_package_id = get_physical_package_id(cpu_id);
4479 my_core_id = get_core_id(cpu_id);
4480 my_thread_id = get_cpu_position_in_core(cpu_id);
4481 if (!my_thread_id)
4482 topo.num_cores++;
4483
4484 init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
4485 init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
4486 return 0; 4957 return 0;
4487} 4958}
4488 4959
@@ -4567,6 +5038,7 @@ int fork_it(char **argv)
4567 5038
4568 snapshot_proc_sysfs_files(); 5039 snapshot_proc_sysfs_files();
4569 status = for_all_cpus(get_counters, EVEN_COUNTERS); 5040 status = for_all_cpus(get_counters, EVEN_COUNTERS);
5041 first_counter_read = 0;
4570 if (status) 5042 if (status)
4571 exit(status); 5043 exit(status);
4572 /* clear affinity side-effect of get_counters() */ 5044 /* clear affinity side-effect of get_counters() */
@@ -4630,7 +5102,7 @@ int get_and_dump_counters(void)
4630} 5102}
4631 5103
4632void print_version() { 5104void print_version() {
4633 fprintf(outf, "turbostat version 17.06.23" 5105 fprintf(outf, "turbostat version 18.06.20"
4634 " - Len Brown <lenb@kernel.org>\n"); 5106 " - Len Brown <lenb@kernel.org>\n");
4635} 5107}
4636 5108
@@ -4661,7 +5133,7 @@ int add_counter(unsigned int msr_num, char *path, char *name,
4661 msrp->next = sys.tp; 5133 msrp->next = sys.tp;
4662 sys.tp = msrp; 5134 sys.tp = msrp;
4663 sys.added_thread_counters++; 5135 sys.added_thread_counters++;
4664 if (sys.added_thread_counters > MAX_ADDED_COUNTERS) { 5136 if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
4665 fprintf(stderr, "exceeded max %d added thread counters\n", 5137 fprintf(stderr, "exceeded max %d added thread counters\n",
4666 MAX_ADDED_COUNTERS); 5138 MAX_ADDED_COUNTERS);
4667 exit(-1); 5139 exit(-1);
@@ -4820,7 +5292,7 @@ void probe_sysfs(void)
4820 if (!DO_BIC(BIC_sysfs)) 5292 if (!DO_BIC(BIC_sysfs))
4821 return; 5293 return;
4822 5294
4823 for (state = 10; state > 0; --state) { 5295 for (state = 10; state >= 0; --state) {
4824 5296
4825 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", 5297 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
4826 base_cpu, state); 5298 base_cpu, state);
@@ -4847,7 +5319,7 @@ void probe_sysfs(void)
4847 FORMAT_PERCENT, SYSFS_PERCPU); 5319 FORMAT_PERCENT, SYSFS_PERCPU);
4848 } 5320 }
4849 5321
4850 for (state = 10; state > 0; --state) { 5322 for (state = 10; state >= 0; --state) {
4851 5323
4852 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", 5324 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
4853 base_cpu, state); 5325 base_cpu, state);
@@ -4960,34 +5432,6 @@ error:
4960 exit(-1); 5432 exit(-1);
4961} 5433}
4962 5434
4963int shown;
4964/*
4965 * parse_show_hide() - process cmdline to set default counter action
4966 */
4967void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
4968{
4969 /*
4970 * --show: show only those specified
4971 * The 1st invocation will clear and replace the enabled mask
4972 * subsequent invocations can add to it.
4973 */
4974 if (new_mode == SHOW_LIST) {
4975 if (shown == 0)
4976 bic_enabled = bic_lookup(optarg, new_mode);
4977 else
4978 bic_enabled |= bic_lookup(optarg, new_mode);
4979 shown = 1;
4980
4981 return;
4982 }
4983
4984 /*
4985 * --hide: do not show those specified
4986 * multiple invocations simply clear more bits in enabled mask
4987 */
4988 bic_enabled &= ~bic_lookup(optarg, new_mode);
4989
4990}
4991 5435
4992void cmdline(int argc, char **argv) 5436void cmdline(int argc, char **argv)
4993{ 5437{
@@ -4998,7 +5442,9 @@ void cmdline(int argc, char **argv)
4998 {"cpu", required_argument, 0, 'c'}, 5442 {"cpu", required_argument, 0, 'c'},
4999 {"Dump", no_argument, 0, 'D'}, 5443 {"Dump", no_argument, 0, 'D'},
5000 {"debug", no_argument, 0, 'd'}, /* internal, not documented */ 5444 {"debug", no_argument, 0, 'd'}, /* internal, not documented */
5445 {"enable", required_argument, 0, 'e'},
5001 {"interval", required_argument, 0, 'i'}, 5446 {"interval", required_argument, 0, 'i'},
5447 {"num_iterations", required_argument, 0, 'n'},
5002 {"help", no_argument, 0, 'h'}, 5448 {"help", no_argument, 0, 'h'},
5003 {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help 5449 {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help
5004 {"Joules", no_argument, 0, 'J'}, 5450 {"Joules", no_argument, 0, 'J'},
@@ -5014,7 +5460,7 @@ void cmdline(int argc, char **argv)
5014 5460
5015 progname = argv[0]; 5461 progname = argv[0];
5016 5462
5017 while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v", 5463 while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
5018 long_options, &option_index)) != -1) { 5464 long_options, &option_index)) != -1) {
5019 switch (opt) { 5465 switch (opt) {
5020 case 'a': 5466 case 'a':
@@ -5026,11 +5472,20 @@ void cmdline(int argc, char **argv)
5026 case 'D': 5472 case 'D':
5027 dump_only++; 5473 dump_only++;
5028 break; 5474 break;
5475 case 'e':
5476 /* --enable specified counter */
5477 bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
5478 break;
5029 case 'd': 5479 case 'd':
5030 debug++; 5480 debug++;
5481 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5031 break; 5482 break;
5032 case 'H': 5483 case 'H':
5033 parse_show_hide(optarg, HIDE_LIST); 5484 /*
5485 * --hide: do not show those specified
5486 * multiple invocations simply clear more bits in enabled mask
5487 */
5488 bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
5034 break; 5489 break;
5035 case 'h': 5490 case 'h':
5036 default: 5491 default:
@@ -5046,7 +5501,8 @@ void cmdline(int argc, char **argv)
5046 exit(2); 5501 exit(2);
5047 } 5502 }
5048 5503
5049 interval_ts.tv_sec = interval; 5504 interval_tv.tv_sec = interval_ts.tv_sec = interval;
5505 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
5050 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; 5506 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
5051 } 5507 }
5052 break; 5508 break;
@@ -5054,6 +5510,7 @@ void cmdline(int argc, char **argv)
5054 rapl_joules++; 5510 rapl_joules++;
5055 break; 5511 break;
5056 case 'l': 5512 case 'l':
5513 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5057 list_header_only++; 5514 list_header_only++;
5058 quiet++; 5515 quiet++;
5059 break; 5516 break;
@@ -5063,8 +5520,26 @@ void cmdline(int argc, char **argv)
5063 case 'q': 5520 case 'q':
5064 quiet = 1; 5521 quiet = 1;
5065 break; 5522 break;
5523 case 'n':
5524 num_iterations = strtod(optarg, NULL);
5525
5526 if (num_iterations <= 0) {
5527 fprintf(outf, "iterations %d should be positive number\n",
5528 num_iterations);
5529 exit(2);
5530 }
5531 break;
5066 case 's': 5532 case 's':
5067 parse_show_hide(optarg, SHOW_LIST); 5533 /*
5534 * --show: show only those specified
5535 * The 1st invocation will clear and replace the enabled mask
5536 * subsequent invocations can add to it.
5537 */
5538 if (shown == 0)
5539 bic_enabled = bic_lookup(optarg, SHOW_LIST);
5540 else
5541 bic_enabled |= bic_lookup(optarg, SHOW_LIST);
5542 shown = 1;
5068 break; 5543 break;
5069 case 'S': 5544 case 'S':
5070 summary_only++; 5545 summary_only++;
@@ -5083,7 +5558,6 @@ void cmdline(int argc, char **argv)
5083int main(int argc, char **argv) 5558int main(int argc, char **argv)
5084{ 5559{
5085 outf = stderr; 5560 outf = stderr;
5086
5087 cmdline(argc, argv); 5561 cmdline(argc, argv);
5088 5562
5089 if (!quiet) 5563 if (!quiet)
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
index 2447b1bbaacf..f4534fb8b951 100644
--- a/tools/power/x86/x86_energy_perf_policy/Makefile
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -24,5 +24,5 @@ install : x86_energy_perf_policy
24 install -d $(DESTDIR)$(PREFIX)/bin 24 install -d $(DESTDIR)$(PREFIX)/bin
25 install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy 25 install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy
26 install -d $(DESTDIR)$(PREFIX)/share/man/man8 26 install -d $(DESTDIR)$(PREFIX)/share/man/man8
27 install x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8 27 install -m 644 x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8
28 28
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 4ea385be528f..a8fb63edcf89 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -29,6 +29,8 @@
29#include "nfit_test.h" 29#include "nfit_test.h"
30#include "../watermark.h" 30#include "../watermark.h"
31 31
32#include <asm/mcsafe_test.h>
33
32/* 34/*
33 * Generate an NFIT table to describe the following topology: 35 * Generate an NFIT table to describe the following topology:
34 * 36 *
@@ -2681,6 +2683,107 @@ static struct platform_driver nfit_test_driver = {
2681 .id_table = nfit_test_id, 2683 .id_table = nfit_test_id,
2682}; 2684};
2683 2685
2686static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
2687
2688enum INJECT {
2689 INJECT_NONE,
2690 INJECT_SRC,
2691 INJECT_DST,
2692};
2693
2694static void mcsafe_test_init(char *dst, char *src, size_t size)
2695{
2696 size_t i;
2697
2698 memset(dst, 0xff, size);
2699 for (i = 0; i < size; i++)
2700 src[i] = (char) i;
2701}
2702
2703static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
2704 size_t size, unsigned long rem)
2705{
2706 size_t i;
2707
2708 for (i = 0; i < size - rem; i++)
2709 if (dst[i] != (unsigned char) i) {
2710 pr_info_once("%s:%d: offset: %zd got: %#x expect: %#x\n",
2711 __func__, __LINE__, i, dst[i],
2712 (unsigned char) i);
2713 return false;
2714 }
2715 for (i = size - rem; i < size; i++)
2716 if (dst[i] != 0xffU) {
2717 pr_info_once("%s:%d: offset: %zd got: %#x expect: 0xff\n",
2718 __func__, __LINE__, i, dst[i]);
2719 return false;
2720 }
2721 return true;
2722}
2723
2724void mcsafe_test(void)
2725{
2726 char *inject_desc[] = { "none", "source", "destination" };
2727 enum INJECT inj;
2728
2729 if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
2730 pr_info("%s: run...\n", __func__);
2731 } else {
2732 pr_info("%s: disabled, skip.\n", __func__);
2733 return;
2734 }
2735
2736 for (inj = INJECT_NONE; inj <= INJECT_DST; inj++) {
2737 int i;
2738
2739 pr_info("%s: inject: %s\n", __func__, inject_desc[inj]);
2740 for (i = 0; i < 512; i++) {
2741 unsigned long expect, rem;
2742 void *src, *dst;
2743 bool valid;
2744
2745 switch (inj) {
2746 case INJECT_NONE:
2747 mcsafe_inject_src(NULL);
2748 mcsafe_inject_dst(NULL);
2749 dst = &mcsafe_buf[2048];
2750 src = &mcsafe_buf[1024 - i];
2751 expect = 0;
2752 break;
2753 case INJECT_SRC:
2754 mcsafe_inject_src(&mcsafe_buf[1024]);
2755 mcsafe_inject_dst(NULL);
2756 dst = &mcsafe_buf[2048];
2757 src = &mcsafe_buf[1024 - i];
2758 expect = 512 - i;
2759 break;
2760 case INJECT_DST:
2761 mcsafe_inject_src(NULL);
2762 mcsafe_inject_dst(&mcsafe_buf[2048]);
2763 dst = &mcsafe_buf[2048 - i];
2764 src = &mcsafe_buf[1024];
2765 expect = 512 - i;
2766 break;
2767 }
2768
2769 mcsafe_test_init(dst, src, 512);
2770 rem = __memcpy_mcsafe(dst, src, 512);
2771 valid = mcsafe_test_validate(dst, src, 512, expect);
2772 if (rem == expect && valid)
2773 continue;
2774 pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
2775 __func__,
2776 ((unsigned long) dst) & ~PAGE_MASK,
2777 ((unsigned long ) src) & ~PAGE_MASK,
2778 512, i, rem, valid ? "valid" : "bad",
2779 expect);
2780 }
2781 }
2782
2783 mcsafe_inject_src(NULL);
2784 mcsafe_inject_dst(NULL);
2785}
2786
2684static __init int nfit_test_init(void) 2787static __init int nfit_test_init(void)
2685{ 2788{
2686 int rc, i; 2789 int rc, i;
@@ -2689,6 +2792,7 @@ static __init int nfit_test_init(void)
2689 libnvdimm_test(); 2792 libnvdimm_test();
2690 acpi_nfit_test(); 2793 acpi_nfit_test();
2691 device_dax_test(); 2794 device_dax_test();
2795 mcsafe_test();
2692 2796
2693 nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); 2797 nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
2694 2798
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index fa7ee369b3c9..db66f8a0d4be 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -17,7 +17,7 @@ ifeq ($(BUILD), 32)
17 LDFLAGS += -m32 17 LDFLAGS += -m32
18endif 18endif
19 19
20targets: mapshift $(TARGETS) 20targets: generated/map-shift.h $(TARGETS)
21 21
22main: $(OFILES) 22main: $(OFILES)
23 23
@@ -42,9 +42,7 @@ radix-tree.c: ../../../lib/radix-tree.c
42idr.c: ../../../lib/idr.c 42idr.c: ../../../lib/idr.c
43 sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ 43 sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
44 44
45.PHONY: mapshift 45generated/map-shift.h:
46
47mapshift:
48 @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ 46 @if ! grep -qws $(SHIFT) generated/map-shift.h; then \
49 echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ 47 echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \
50 generated/map-shift.h; \ 48 generated/map-shift.h; \
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 6c645eb77d42..ee820fcc29b0 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -252,6 +252,13 @@ void idr_checks(void)
252 idr_remove(&idr, 3); 252 idr_remove(&idr, 3);
253 idr_remove(&idr, 0); 253 idr_remove(&idr, 0);
254 254
255 assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == 0);
256 idr_remove(&idr, 1);
257 for (i = 1; i < RADIX_TREE_MAP_SIZE; i++)
258 assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == i);
259 idr_remove(&idr, 1 << 30);
260 idr_destroy(&idr);
261
255 for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) { 262 for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) {
256 struct item *item = item_create(i, 0); 263 struct item *item = item_create(i, 0);
257 assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i); 264 assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i);
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 59245b3d587c..7bf405638b0b 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -16,6 +16,7 @@
16#include <linux/radix-tree.h> 16#include <linux/radix-tree.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <pthread.h>
19 20
20#include "test.h" 21#include "test.h"
21 22
@@ -624,6 +625,67 @@ static void multiorder_account(void)
624 item_kill_tree(&tree); 625 item_kill_tree(&tree);
625} 626}
626 627
628bool stop_iteration = false;
629
630static void *creator_func(void *ptr)
631{
632 /* 'order' is set up to ensure we have sibling entries */
633 unsigned int order = RADIX_TREE_MAP_SHIFT - 1;
634 struct radix_tree_root *tree = ptr;
635 int i;
636
637 for (i = 0; i < 10000; i++) {
638 item_insert_order(tree, 0, order);
639 item_delete_rcu(tree, 0);
640 }
641
642 stop_iteration = true;
643 return NULL;
644}
645
646static void *iterator_func(void *ptr)
647{
648 struct radix_tree_root *tree = ptr;
649 struct radix_tree_iter iter;
650 struct item *item;
651 void **slot;
652
653 while (!stop_iteration) {
654 rcu_read_lock();
655 radix_tree_for_each_slot(slot, tree, &iter, 0) {
656 item = radix_tree_deref_slot(slot);
657
658 if (!item)
659 continue;
660 if (radix_tree_deref_retry(item)) {
661 slot = radix_tree_iter_retry(&iter);
662 continue;
663 }
664
665 item_sanity(item, iter.index);
666 }
667 rcu_read_unlock();
668 }
669 return NULL;
670}
671
672static void multiorder_iteration_race(void)
673{
674 const int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
675 pthread_t worker_thread[num_threads];
676 RADIX_TREE(tree, GFP_KERNEL);
677 int i;
678
679 pthread_create(&worker_thread[0], NULL, &creator_func, &tree);
680 for (i = 1; i < num_threads; i++)
681 pthread_create(&worker_thread[i], NULL, &iterator_func, &tree);
682
683 for (i = 0; i < num_threads; i++)
684 pthread_join(worker_thread[i], NULL);
685
686 item_kill_tree(&tree);
687}
688
627void multiorder_checks(void) 689void multiorder_checks(void)
628{ 690{
629 int i; 691 int i;
@@ -644,6 +706,7 @@ void multiorder_checks(void)
644 multiorder_join(); 706 multiorder_join();
645 multiorder_split(); 707 multiorder_split();
646 multiorder_account(); 708 multiorder_account();
709 multiorder_iteration_race();
647 710
648 radix_tree_cpu_dead(0); 711 radix_tree_cpu_dead(0);
649} 712}
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index 5978ab1f403d..def6015570b2 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -75,6 +75,25 @@ int item_delete(struct radix_tree_root *root, unsigned long index)
75 return 0; 75 return 0;
76} 76}
77 77
78static void item_free_rcu(struct rcu_head *head)
79{
80 struct item *item = container_of(head, struct item, rcu_head);
81
82 free(item);
83}
84
85int item_delete_rcu(struct radix_tree_root *root, unsigned long index)
86{
87 struct item *item = radix_tree_delete(root, index);
88
89 if (item) {
90 item_sanity(item, index);
91 call_rcu(&item->rcu_head, item_free_rcu);
92 return 1;
93 }
94 return 0;
95}
96
78void item_check_present(struct radix_tree_root *root, unsigned long index) 97void item_check_present(struct radix_tree_root *root, unsigned long index)
79{ 98{
80 struct item *item; 99 struct item *item;
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index d9c031dbeb1a..31f1d9b6f506 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -5,6 +5,7 @@
5#include <linux/rcupdate.h> 5#include <linux/rcupdate.h>
6 6
7struct item { 7struct item {
8 struct rcu_head rcu_head;
8 unsigned long index; 9 unsigned long index;
9 unsigned int order; 10 unsigned int order;
10}; 11};
@@ -12,9 +13,11 @@ struct item {
12struct item *item_create(unsigned long index, unsigned int order); 13struct item *item_create(unsigned long index, unsigned int order);
13int __item_insert(struct radix_tree_root *root, struct item *item); 14int __item_insert(struct radix_tree_root *root, struct item *item);
14int item_insert(struct radix_tree_root *root, unsigned long index); 15int item_insert(struct radix_tree_root *root, unsigned long index);
16void item_sanity(struct item *item, unsigned long index);
15int item_insert_order(struct radix_tree_root *root, unsigned long index, 17int item_insert_order(struct radix_tree_root *root, unsigned long index,
16 unsigned order); 18 unsigned order);
17int item_delete(struct radix_tree_root *root, unsigned long index); 19int item_delete(struct radix_tree_root *root, unsigned long index);
20int item_delete_rcu(struct radix_tree_root *root, unsigned long index);
18struct item *item_lookup(struct radix_tree_root *root, unsigned long index); 21struct item *item_lookup(struct radix_tree_root *root, unsigned long index);
19 22
20void item_check_present(struct radix_tree_root *root, unsigned long index); 23void item_check_present(struct radix_tree_root *root, unsigned long index);
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 32aafa92074c..f1fe492c8e17 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -3,6 +3,7 @@ TARGETS = android
3TARGETS += bpf 3TARGETS += bpf
4TARGETS += breakpoints 4TARGETS += breakpoints
5TARGETS += capabilities 5TARGETS += capabilities
6TARGETS += cgroup
6TARGETS += cpufreq 7TARGETS += cpufreq
7TARGETS += cpu-hotplug 8TARGETS += cpu-hotplug
8TARGETS += efivarfs 9TARGETS += efivarfs
@@ -28,9 +29,12 @@ TARGETS += powerpc
28TARGETS += proc 29TARGETS += proc
29TARGETS += pstore 30TARGETS += pstore
30TARGETS += ptrace 31TARGETS += ptrace
32TARGETS += rseq
33TARGETS += rtc
31TARGETS += seccomp 34TARGETS += seccomp
32TARGETS += sigaltstack 35TARGETS += sigaltstack
33TARGETS += size 36TARGETS += size
37TARGETS += sparc64
34TARGETS += splice 38TARGETS += splice
35TARGETS += static_keys 39TARGETS += static_keys
36TARGETS += sync 40TARGETS += sync
@@ -134,7 +138,8 @@ ifdef INSTALL_PATH
134 echo "else" >> $(ALL_SCRIPT) 138 echo "else" >> $(ALL_SCRIPT)
135 echo " OUTPUT=/dev/stdout" >> $(ALL_SCRIPT) 139 echo " OUTPUT=/dev/stdout" >> $(ALL_SCRIPT)
136 echo "fi" >> $(ALL_SCRIPT) 140 echo "fi" >> $(ALL_SCRIPT)
137 echo "export KSFT_TAP_LEVEL=`echo 1`" >> $(ALL_SCRIPT) 141 echo "export KSFT_TAP_LEVEL=1" >> $(ALL_SCRIPT)
142 echo "export skip=4" >> $(ALL_SCRIPT)
138 143
139 for TARGET in $(TARGETS); do \ 144 for TARGET in $(TARGETS); do \
140 BUILD_TARGET=$$BUILD/$$TARGET; \ 145 BUILD_TARGET=$$BUILD/$$TARGET; \
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
index f6304d2be90c..72c25a3cb658 100644
--- a/tools/testing/selftests/android/Makefile
+++ b/tools/testing/selftests/android/Makefile
@@ -18,10 +18,6 @@ all:
18 fi \ 18 fi \
19 done 19 done
20 20
21override define RUN_TESTS
22 @cd $(OUTPUT); ./run.sh
23endef
24
25override define INSTALL_RULE 21override define INSTALL_RULE
26 mkdir -p $(INSTALL_PATH) 22 mkdir -p $(INSTALL_PATH)
27 install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) 23 install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
@@ -33,10 +29,6 @@ override define INSTALL_RULE
33 done; 29 done;
34endef 30endef
35 31
36override define EMIT_TESTS
37 echo "./run.sh"
38endef
39
40override define CLEAN 32override define CLEAN
41 @for DIR in $(SUBDIRS); do \ 33 @for DIR in $(SUBDIRS); do \
42 BUILD_TARGET=$(OUTPUT)/$$DIR; \ 34 BUILD_TARGET=$(OUTPUT)/$$DIR; \
diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh
index a1aff506f5e6..69e676cfc94e 100755
--- a/tools/testing/selftests/android/ion/ion_test.sh
+++ b/tools/testing/selftests/android/ion/ion_test.sh
@@ -4,6 +4,9 @@ heapsize=4096
4TCID="ion_test.sh" 4TCID="ion_test.sh"
5errcode=0 5errcode=0
6 6
7# Kselftest framework requirement - SKIP code is 4.
8ksft_skip=4
9
7run_test() 10run_test()
8{ 11{
9 heaptype=$1 12 heaptype=$1
@@ -25,7 +28,7 @@ check_root()
25 uid=$(id -u) 28 uid=$(id -u)
26 if [ $uid -ne 0 ]; then 29 if [ $uid -ne 0 ]; then
27 echo $TCID: must be run as root >&2 30 echo $TCID: must be run as root >&2
28 exit 0 31 exit $ksft_skip
29 fi 32 fi
30} 33}
31 34
@@ -35,7 +38,7 @@ check_device()
35 if [ ! -e $DEVICE ]; then 38 if [ ! -e $DEVICE ]; then
36 echo $TCID: No $DEVICE device found >&2 39 echo $TCID: No $DEVICE device found >&2
37 echo $TCID: May be CONFIG_ION is not set >&2 40 echo $TCID: May be CONFIG_ION is not set >&2
38 exit 0 41 exit $ksft_skip
39 fi 42 fi
40} 43}
41 44
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 5e1ab2f0eb79..49938d72cf63 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -15,3 +15,7 @@ test_libbpf_open
15test_sock 15test_sock
16test_sock_addr 16test_sock_addr
17urandom_read 17urandom_read
18test_btf
19test_sockmap
20test_lirc_mode2_user
21get_cgroup_id_user
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 0a315ddabbf4..7a6214e9ae58 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -10,28 +10,30 @@ ifneq ($(wildcard $(GENHDR)),)
10 GENFLAGS := -DHAVE_GENHDR 10 GENFLAGS := -DHAVE_GENHDR
11endif 11endif
12 12
13CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include 13CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
14LDLIBS += -lcap -lelf -lrt -lpthread 14LDLIBS += -lcap -lelf -lrt -lpthread
15 15
16TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read 16TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
17all: $(TEST_CUSTOM_PROGS) 17all: $(TEST_CUSTOM_PROGS)
18 18
19$(TEST_CUSTOM_PROGS): urandom_read 19$(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
20 20 $(CC) -o $(TEST_CUSTOM_PROGS) -static $< -Wl,--build-id
21urandom_read: urandom_read.c
22 $(CC) -o $(TEST_CUSTOM_PROGS) -static $<
23 21
24# Order correspond to 'make run_tests' order 22# Order correspond to 'make run_tests' order
25TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ 23TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
26 test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ 24 test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
27 test_sock test_sock_addr 25 test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user
28 26
29TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ 27TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
30 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ 28 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
31 sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ 29 sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
32 test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ 30 test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
33 sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ 31 sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
34 sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o 32 sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
33 test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
34 test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
35 test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
36 get_cgroup_id_kern.o
35 37
36# Order correspond to 'make run_tests' order 38# Order correspond to 'make run_tests' order
37TEST_PROGS := test_kmod.sh \ 39TEST_PROGS := test_kmod.sh \
@@ -39,10 +41,13 @@ TEST_PROGS := test_kmod.sh \
39 test_xdp_redirect.sh \ 41 test_xdp_redirect.sh \
40 test_xdp_meta.sh \ 42 test_xdp_meta.sh \
41 test_offload.py \ 43 test_offload.py \
42 test_sock_addr.sh 44 test_sock_addr.sh \
45 test_tunnel.sh \
46 test_lwt_seg6local.sh \
47 test_lirc_mode2.sh
43 48
44# Compile but not part of 'make run_tests' 49# Compile but not part of 'make run_tests'
45TEST_GEN_PROGS_EXTENDED = test_libbpf_open 50TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
46 51
47include ../lib.mk 52include ../lib.mk
48 53
@@ -55,6 +60,9 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
55$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c 60$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
56$(OUTPUT)/test_sock: cgroup_helpers.c 61$(OUTPUT)/test_sock: cgroup_helpers.c
57$(OUTPUT)/test_sock_addr: cgroup_helpers.c 62$(OUTPUT)/test_sock_addr: cgroup_helpers.c
63$(OUTPUT)/test_sockmap: cgroup_helpers.c
64$(OUTPUT)/test_progs: trace_helpers.c
65$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
58 66
59.PHONY: force 67.PHONY: force
60 68
@@ -66,6 +74,8 @@ $(BPFOBJ): force
66 74
67CLANG ?= clang 75CLANG ?= clang
68LLC ?= llc 76LLC ?= llc
77LLVM_OBJCOPY ?= llvm-objcopy
78BTF_PAHOLE ?= pahole
69 79
70PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) 80PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
71 81
@@ -77,15 +87,42 @@ else
77 CPU ?= generic 87 CPU ?= generic
78endif 88endif
79 89
90# Get Clang's default includes on this system, as opposed to those seen by
91# '-target bpf'. This fixes "missing" files on some architectures/distros,
92# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
93#
94# Use '-idirafter': Don't interfere with include mechanics except where the
95# build would have failed anyways.
96CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
97 | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
98
80CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ 99CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
100 $(CLANG_SYS_INCLUDES) \
81 -Wno-compare-distinct-pointer-types 101 -Wno-compare-distinct-pointer-types
82 102
83$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline 103$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
84$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline 104$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
85 105
106BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
107BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
108BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --version 2>&1 | grep LLVM)
109
110ifneq ($(BTF_LLC_PROBE),)
111ifneq ($(BTF_PAHOLE_PROBE),)
112ifneq ($(BTF_OBJCOPY_PROBE),)
113 CLANG_FLAGS += -g
114 LLC_FLAGS += -mattr=dwarfris
115 DWARF2BTF = y
116endif
117endif
118endif
119
86$(OUTPUT)/%.o: %.c 120$(OUTPUT)/%.o: %.c
87 $(CLANG) $(CLANG_FLAGS) \ 121 $(CLANG) $(CLANG_FLAGS) \
88 -O2 -target bpf -emit-llvm -c $< -o - | \ 122 -O2 -target bpf -emit-llvm -c $< -o - | \
89 $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@ 123 $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
124ifeq ($(DWARF2BTF),y)
125 $(BTF_PAHOLE) -J $@
126endif
90 127
91EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) 128EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index d8223d99f96d..f2f28b6c8915 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -75,9 +75,14 @@ static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) =
75 (void *) BPF_FUNC_sock_ops_cb_flags_set; 75 (void *) BPF_FUNC_sock_ops_cb_flags_set;
76static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = 76static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
77 (void *) BPF_FUNC_sk_redirect_map; 77 (void *) BPF_FUNC_sk_redirect_map;
78static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) =
79 (void *) BPF_FUNC_sk_redirect_hash;
78static int (*bpf_sock_map_update)(void *map, void *key, void *value, 80static int (*bpf_sock_map_update)(void *map, void *key, void *value,
79 unsigned long long flags) = 81 unsigned long long flags) =
80 (void *) BPF_FUNC_sock_map_update; 82 (void *) BPF_FUNC_sock_map_update;
83static int (*bpf_sock_hash_update)(void *map, void *key, void *value,
84 unsigned long long flags) =
85 (void *) BPF_FUNC_sock_hash_update;
81static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, 86static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
82 void *buf, unsigned int buf_size) = 87 void *buf, unsigned int buf_size) =
83 (void *) BPF_FUNC_perf_event_read_value; 88 (void *) BPF_FUNC_perf_event_read_value;
@@ -88,6 +93,9 @@ static int (*bpf_override_return)(void *ctx, unsigned long rc) =
88 (void *) BPF_FUNC_override_return; 93 (void *) BPF_FUNC_override_return;
89static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) = 94static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
90 (void *) BPF_FUNC_msg_redirect_map; 95 (void *) BPF_FUNC_msg_redirect_map;
96static int (*bpf_msg_redirect_hash)(void *ctx,
97 void *map, void *key, int flags) =
98 (void *) BPF_FUNC_msg_redirect_hash;
91static int (*bpf_msg_apply_bytes)(void *ctx, int len) = 99static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
92 (void *) BPF_FUNC_msg_apply_bytes; 100 (void *) BPF_FUNC_msg_apply_bytes;
93static int (*bpf_msg_cork_bytes)(void *ctx, int len) = 101static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
@@ -96,6 +104,35 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
96 (void *) BPF_FUNC_msg_pull_data; 104 (void *) BPF_FUNC_msg_pull_data;
97static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = 105static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
98 (void *) BPF_FUNC_bind; 106 (void *) BPF_FUNC_bind;
107static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
108 (void *) BPF_FUNC_xdp_adjust_tail;
109static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
110 int size, int flags) =
111 (void *) BPF_FUNC_skb_get_xfrm_state;
112static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
113 (void *) BPF_FUNC_get_stack;
114static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
115 int plen, __u32 flags) =
116 (void *) BPF_FUNC_fib_lookup;
117static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr,
118 unsigned int len) =
119 (void *) BPF_FUNC_lwt_push_encap;
120static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset,
121 void *from, unsigned int len) =
122 (void *) BPF_FUNC_lwt_seg6_store_bytes;
123static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param,
124 unsigned int param_len) =
125 (void *) BPF_FUNC_lwt_seg6_action;
126static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset,
127 unsigned int len) =
128 (void *) BPF_FUNC_lwt_seg6_adjust_srh;
129static int (*bpf_rc_repeat)(void *ctx) =
130 (void *) BPF_FUNC_rc_repeat;
131static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
132 unsigned long long scancode, unsigned int toggle) =
133 (void *) BPF_FUNC_rc_keydown;
134static unsigned long long (*bpf_get_current_cgroup_id)(void) =
135 (void *) BPF_FUNC_get_current_cgroup_id;
99 136
100/* llvm builtin functions that eBPF C program may use to 137/* llvm builtin functions that eBPF C program may use to
101 * emit BPF_LD_ABS and BPF_LD_IND instructions 138 * emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -129,6 +166,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
129 (void *) BPF_FUNC_l3_csum_replace; 166 (void *) BPF_FUNC_l3_csum_replace;
130static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = 167static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
131 (void *) BPF_FUNC_l4_csum_replace; 168 (void *) BPF_FUNC_l4_csum_replace;
169static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
170 (void *) BPF_FUNC_csum_diff;
132static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = 171static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
133 (void *) BPF_FUNC_skb_under_cgroup; 172 (void *) BPF_FUNC_skb_under_cgroup;
134static int (*bpf_skb_change_head)(void *, int len, int flags) = 173static int (*bpf_skb_change_head)(void *, int len, int flags) =
diff --git a/tools/testing/selftests/bpf/bpf_rand.h b/tools/testing/selftests/bpf/bpf_rand.h
new file mode 100644
index 000000000000..59bf3e1a9371
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_rand.h
@@ -0,0 +1,80 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __BPF_RAND__
3#define __BPF_RAND__
4
5#include <stdint.h>
6#include <stdlib.h>
7#include <time.h>
8
9static inline uint64_t bpf_rand_mask(uint64_t mask)
10{
11 return (((uint64_t)(uint32_t)rand()) |
12 ((uint64_t)(uint32_t)rand() << 32)) & mask;
13}
14
15#define bpf_rand_ux(x, m) \
16static inline uint64_t bpf_rand_u##x(int shift) \
17{ \
18 return bpf_rand_mask((m)) << shift; \
19}
20
21bpf_rand_ux( 8, 0xffULL)
22bpf_rand_ux(16, 0xffffULL)
23bpf_rand_ux(24, 0xffffffULL)
24bpf_rand_ux(32, 0xffffffffULL)
25bpf_rand_ux(40, 0xffffffffffULL)
26bpf_rand_ux(48, 0xffffffffffffULL)
27bpf_rand_ux(56, 0xffffffffffffffULL)
28bpf_rand_ux(64, 0xffffffffffffffffULL)
29
30static inline void bpf_semi_rand_init(void)
31{
32 srand(time(NULL));
33}
34
35static inline uint64_t bpf_semi_rand_get(void)
36{
37 switch (rand() % 39) {
38 case 0: return 0x000000ff00000000ULL | bpf_rand_u8(0);
39 case 1: return 0xffffffff00000000ULL | bpf_rand_u16(0);
40 case 2: return 0x00000000ffff0000ULL | bpf_rand_u16(0);
41 case 3: return 0x8000000000000000ULL | bpf_rand_u32(0);
42 case 4: return 0x00000000f0000000ULL | bpf_rand_u32(0);
43 case 5: return 0x0000000100000000ULL | bpf_rand_u24(0);
44 case 6: return 0x800ff00000000000ULL | bpf_rand_u32(0);
45 case 7: return 0x7fffffff00000000ULL | bpf_rand_u32(0);
46 case 8: return 0xffffffffffffff00ULL ^ bpf_rand_u32(24);
47 case 9: return 0xffffffffffffff00ULL | bpf_rand_u8(0);
48 case 10: return 0x0000000010000000ULL | bpf_rand_u32(0);
49 case 11: return 0xf000000000000000ULL | bpf_rand_u8(0);
50 case 12: return 0x0000f00000000000ULL | bpf_rand_u8(8);
51 case 13: return 0x000000000f000000ULL | bpf_rand_u8(16);
52 case 14: return 0x0000000000000f00ULL | bpf_rand_u8(32);
53 case 15: return 0x00fff00000000f00ULL | bpf_rand_u8(48);
54 case 16: return 0x00007fffffffffffULL ^ bpf_rand_u32(1);
55 case 17: return 0xffff800000000000ULL | bpf_rand_u8(4);
56 case 18: return 0xffff800000000000ULL | bpf_rand_u8(20);
57 case 19: return (0xffffffc000000000ULL + 0x80000ULL) | bpf_rand_u32(0);
58 case 20: return (0xffffffc000000000ULL - 0x04000000ULL) | bpf_rand_u32(0);
59 case 21: return 0x0000000000000000ULL | bpf_rand_u8(55) | bpf_rand_u32(20);
60 case 22: return 0xffffffffffffffffULL ^ bpf_rand_u8(3) ^ bpf_rand_u32(40);
61 case 23: return 0x0000000000000000ULL | bpf_rand_u8(bpf_rand_u8(0) % 64);
62 case 24: return 0x0000000000000000ULL | bpf_rand_u16(bpf_rand_u8(0) % 64);
63 case 25: return 0xffffffffffffffffULL ^ bpf_rand_u8(bpf_rand_u8(0) % 64);
64 case 26: return 0xffffffffffffffffULL ^ bpf_rand_u40(bpf_rand_u8(0) % 64);
65 case 27: return 0x0000800000000000ULL;
66 case 28: return 0x8000000000000000ULL;
67 case 29: return 0x0000000000000000ULL;
68 case 30: return 0xffffffffffffffffULL;
69 case 31: return bpf_rand_u16(bpf_rand_u8(0) % 64);
70 case 32: return bpf_rand_u24(bpf_rand_u8(0) % 64);
71 case 33: return bpf_rand_u32(bpf_rand_u8(0) % 64);
72 case 34: return bpf_rand_u40(bpf_rand_u8(0) % 64);
73 case 35: return bpf_rand_u48(bpf_rand_u8(0) % 64);
74 case 36: return bpf_rand_u56(bpf_rand_u8(0) % 64);
75 case 37: return bpf_rand_u64(bpf_rand_u8(0) % 64);
76 default: return bpf_rand_u64(0);
77 }
78}
79
80#endif /* __BPF_RAND__ */
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index f3bca3ade0f3..c87b4e052ce9 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -6,6 +6,7 @@
6#include <sys/types.h> 6#include <sys/types.h>
7#include <linux/limits.h> 7#include <linux/limits.h>
8#include <stdio.h> 8#include <stdio.h>
9#include <stdlib.h>
9#include <linux/sched.h> 10#include <linux/sched.h>
10#include <fcntl.h> 11#include <fcntl.h>
11#include <unistd.h> 12#include <unistd.h>
@@ -176,3 +177,59 @@ int create_and_get_cgroup(char *path)
176 177
177 return fd; 178 return fd;
178} 179}
180
181/**
182 * get_cgroup_id() - Get cgroup id for a particular cgroup path
183 * @path: The cgroup path, relative to the workdir, to join
184 *
185 * On success, it returns the cgroup id. On failure it returns 0,
186 * which is an invalid cgroup id.
187 * If there is a failure, it prints the error to stderr.
188 */
189unsigned long long get_cgroup_id(char *path)
190{
191 int dirfd, err, flags, mount_id, fhsize;
192 union {
193 unsigned long long cgid;
194 unsigned char raw_bytes[8];
195 } id;
196 char cgroup_workdir[PATH_MAX + 1];
197 struct file_handle *fhp, *fhp2;
198 unsigned long long ret = 0;
199
200 format_cgroup_path(cgroup_workdir, path);
201
202 dirfd = AT_FDCWD;
203 flags = 0;
204 fhsize = sizeof(*fhp);
205 fhp = calloc(1, fhsize);
206 if (!fhp) {
207 log_err("calloc");
208 return 0;
209 }
210 err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
211 if (err >= 0 || fhp->handle_bytes != 8) {
212 log_err("name_to_handle_at");
213 goto free_mem;
214 }
215
216 fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
217 fhp2 = realloc(fhp, fhsize);
218 if (!fhp2) {
219 log_err("realloc");
220 goto free_mem;
221 }
222 err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
223 fhp = fhp2;
224 if (err < 0) {
225 log_err("name_to_handle_at");
226 goto free_mem;
227 }
228
229 memcpy(id.raw_bytes, fhp->f_handle, 8);
230 ret = id.cgid;
231
232free_mem:
233 free(fhp);
234 return ret;
235}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 06485e0002b3..20a4a5dcd469 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -13,5 +13,6 @@ int create_and_get_cgroup(char *path);
13int join_cgroup(char *path); 13int join_cgroup(char *path);
14int setup_cgroup_environment(void); 14int setup_cgroup_environment(void);
15void cleanup_cgroup_environment(void); 15void cleanup_cgroup_environment(void);
16unsigned long long get_cgroup_id(char *path);
16 17
17#endif 18#endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 983dd25d49f4..7eb613ffef55 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -5,3 +5,15 @@ CONFIG_BPF_EVENTS=y
5CONFIG_TEST_BPF=m 5CONFIG_TEST_BPF=m
6CONFIG_CGROUP_BPF=y 6CONFIG_CGROUP_BPF=y
7CONFIG_NETDEVSIM=m 7CONFIG_NETDEVSIM=m
8CONFIG_NET_CLS_ACT=y
9CONFIG_NET_SCH_INGRESS=y
10CONFIG_NET_IPIP=y
11CONFIG_IPV6=y
12CONFIG_NET_IPGRE_DEMUX=y
13CONFIG_NET_IPGRE=y
14CONFIG_IPV6_GRE=y
15CONFIG_CRYPTO_USER_API_HASH=m
16CONFIG_CRYPTO_HMAC=m
17CONFIG_CRYPTO_SHA256=m
18CONFIG_VXLAN=y
19CONFIG_GENEVE=y
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/get_cgroup_id_kern.c
new file mode 100644
index 000000000000..014dba10b8a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/get_cgroup_id_kern.c
@@ -0,0 +1,40 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook
3
4#include <linux/bpf.h>
5#include "bpf_helpers.h"
6
7struct bpf_map_def SEC("maps") cg_ids = {
8 .type = BPF_MAP_TYPE_ARRAY,
9 .key_size = sizeof(__u32),
10 .value_size = sizeof(__u64),
11 .max_entries = 1,
12};
13
14struct bpf_map_def SEC("maps") pidmap = {
15 .type = BPF_MAP_TYPE_ARRAY,
16 .key_size = sizeof(__u32),
17 .value_size = sizeof(__u32),
18 .max_entries = 1,
19};
20
21SEC("tracepoint/syscalls/sys_enter_nanosleep")
22int trace(void *ctx)
23{
24 __u32 pid = bpf_get_current_pid_tgid();
25 __u32 key = 0, *expected_pid;
26 __u64 *val;
27
28 expected_pid = bpf_map_lookup_elem(&pidmap, &key);
29 if (!expected_pid || *expected_pid != pid)
30 return 0;
31
32 val = bpf_map_lookup_elem(&cg_ids, &key);
33 if (val)
34 *val = bpf_get_current_cgroup_id();
35
36 return 0;
37}
38
39char _license[] SEC("license") = "GPL";
40__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
new file mode 100644
index 000000000000..e8da7b39158d
--- /dev/null
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -0,0 +1,149 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook
3
4#include <stdio.h>
5#include <stdlib.h>
6#include <string.h>
7#include <errno.h>
8#include <fcntl.h>
9#include <syscall.h>
10#include <unistd.h>
11#include <linux/perf_event.h>
12#include <sys/ioctl.h>
13#include <sys/time.h>
14#include <sys/types.h>
15#include <sys/stat.h>
16
17#include <linux/bpf.h>
18#include <bpf/bpf.h>
19#include <bpf/libbpf.h>
20
21#include "cgroup_helpers.h"
22#include "bpf_rlimit.h"
23
24#define CHECK(condition, tag, format...) ({ \
25 int __ret = !!(condition); \
26 if (__ret) { \
27 printf("%s:FAIL:%s ", __func__, tag); \
28 printf(format); \
29 } else { \
30 printf("%s:PASS:%s\n", __func__, tag); \
31 } \
32 __ret; \
33})
34
35static int bpf_find_map(const char *test, struct bpf_object *obj,
36 const char *name)
37{
38 struct bpf_map *map;
39
40 map = bpf_object__find_map_by_name(obj, name);
41 if (!map)
42 return -1;
43 return bpf_map__fd(map);
44}
45
46#define TEST_CGROUP "/test-bpf-get-cgroup-id/"
47
48int main(int argc, char **argv)
49{
50 const char *probe_name = "syscalls/sys_enter_nanosleep";
51 const char *file = "get_cgroup_id_kern.o";
52 int err, bytes, efd, prog_fd, pmu_fd;
53 int cgroup_fd, cgidmap_fd, pidmap_fd;
54 struct perf_event_attr attr = {};
55 struct bpf_object *obj;
56 __u64 kcgid = 0, ucgid;
57 __u32 key = 0, pid;
58 int exit_code = 1;
59 char buf[256];
60
61 err = setup_cgroup_environment();
62 if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err,
63 errno))
64 return 1;
65
66 cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
67 if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n",
68 cgroup_fd, errno))
69 goto cleanup_cgroup_env;
70
71 err = join_cgroup(TEST_CGROUP);
72 if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno))
73 goto cleanup_cgroup_env;
74
75 err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
76 if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
77 goto cleanup_cgroup_env;
78
79 cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids");
80 if (CHECK(cgidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
81 cgidmap_fd, errno))
82 goto close_prog;
83
84 pidmap_fd = bpf_find_map(__func__, obj, "pidmap");
85 if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
86 pidmap_fd, errno))
87 goto close_prog;
88
89 pid = getpid();
90 bpf_map_update_elem(pidmap_fd, &key, &pid, 0);
91
92 snprintf(buf, sizeof(buf),
93 "/sys/kernel/debug/tracing/events/%s/id", probe_name);
94 efd = open(buf, O_RDONLY, 0);
95 if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
96 goto close_prog;
97 bytes = read(efd, buf, sizeof(buf));
98 close(efd);
99 if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
100 "bytes %d errno %d\n", bytes, errno))
101 goto close_prog;
102
103 attr.config = strtol(buf, NULL, 0);
104 attr.type = PERF_TYPE_TRACEPOINT;
105 attr.sample_type = PERF_SAMPLE_RAW;
106 attr.sample_period = 1;
107 attr.wakeup_events = 1;
108
109 /* attach to this pid so the all bpf invocations will be in the
110 * cgroup associated with this pid.
111 */
112 pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
113 if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
114 errno))
115 goto close_prog;
116
117 err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
118 if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
119 errno))
120 goto close_pmu;
121
122 err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
123 if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
124 errno))
125 goto close_pmu;
126
127 /* trigger some syscalls */
128 sleep(1);
129
130 err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid);
131 if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
132 goto close_pmu;
133
134 ucgid = get_cgroup_id(TEST_CGROUP);
135 if (CHECK(kcgid != ucgid, "compare_cgroup_id",
136 "kern cgid %llx user cgid %llx", kcgid, ucgid))
137 goto close_pmu;
138
139 exit_code = 0;
140 printf("%s:PASS\n", argv[0]);
141
142close_pmu:
143 close(pmu_fd);
144close_prog:
145 bpf_object__close(obj);
146cleanup_cgroup_env:
147 cleanup_cgroup_environment();
148 return exit_code;
149}
diff --git a/tools/testing/selftests/bpf/sendmsg4_prog.c b/tools/testing/selftests/bpf/sendmsg4_prog.c
new file mode 100644
index 000000000000..a91536b1c47e
--- /dev/null
+++ b/tools/testing/selftests/bpf/sendmsg4_prog.c
@@ -0,0 +1,49 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook
3
4#include <linux/stddef.h>
5#include <linux/bpf.h>
6#include <sys/socket.h>
7
8#include "bpf_helpers.h"
9#include "bpf_endian.h"
10
11#define SRC1_IP4 0xAC100001U /* 172.16.0.1 */
12#define SRC2_IP4 0x00000000U
13#define SRC_REWRITE_IP4 0x7f000004U
14#define DST_IP4 0xC0A801FEU /* 192.168.1.254 */
15#define DST_REWRITE_IP4 0x7f000001U
16#define DST_PORT 4040
17#define DST_REWRITE_PORT4 4444
18
19int _version SEC("version") = 1;
20
21SEC("cgroup/sendmsg4")
22int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
23{
24 if (ctx->type != SOCK_DGRAM)
25 return 0;
26
27 /* Rewrite source. */
28 if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) ||
29 ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) {
30 ctx->msg_src_ip4 = bpf_htonl(SRC_REWRITE_IP4);
31 } else {
32 /* Unexpected source. Reject sendmsg. */
33 return 0;
34 }
35
36 /* Rewrite destination. */
37 if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) &&
38 ctx->user_port == bpf_htons(DST_PORT)) {
39 ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
40 ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
41 } else {
42 /* Unexpected source. Reject sendmsg. */
43 return 0;
44 }
45
46 return 1;
47}
48
49char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sendmsg6_prog.c b/tools/testing/selftests/bpf/sendmsg6_prog.c
new file mode 100644
index 000000000000..5aeaa284fc47
--- /dev/null
+++ b/tools/testing/selftests/bpf/sendmsg6_prog.c
@@ -0,0 +1,60 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook
3
4#include <linux/stddef.h>
5#include <linux/bpf.h>
6#include <sys/socket.h>
7
8#include "bpf_helpers.h"
9#include "bpf_endian.h"
10
11#define SRC_REWRITE_IP6_0 0
12#define SRC_REWRITE_IP6_1 0
13#define SRC_REWRITE_IP6_2 0
14#define SRC_REWRITE_IP6_3 6
15
16#define DST_REWRITE_IP6_0 0
17#define DST_REWRITE_IP6_1 0
18#define DST_REWRITE_IP6_2 0
19#define DST_REWRITE_IP6_3 1
20
21#define DST_REWRITE_PORT6 6666
22
23int _version SEC("version") = 1;
24
25SEC("cgroup/sendmsg6")
26int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
27{
28 if (ctx->type != SOCK_DGRAM)
29 return 0;
30
31 /* Rewrite source. */
32 if (ctx->msg_src_ip6[3] == bpf_htonl(1) ||
33 ctx->msg_src_ip6[3] == bpf_htonl(0)) {
34 ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
35 ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
36 ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
37 ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
38 } else {
39 /* Unexpected source. Reject sendmsg. */
40 return 0;
41 }
42
43 /* Rewrite destination. */
44 if ((ctx->user_ip6[0] & 0xFFFF) == bpf_htons(0xFACE) &&
45 ctx->user_ip6[0] >> 16 == bpf_htons(0xB00C)) {
46 ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
47 ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
48 ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
49 ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
50
51 ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
52 } else {
53 /* Unexpected destination. Reject sendmsg. */
54 return 0;
55 }
56
57 return 1;
58}
59
60char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_adjust_tail.c b/tools/testing/selftests/bpf/test_adjust_tail.c
new file mode 100644
index 000000000000..4cd5e860c903
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_adjust_tail.c
@@ -0,0 +1,30 @@
1/* SPDX-License-Identifier: GPL-2.0
2 * Copyright (c) 2018 Facebook
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 */
8#include <linux/bpf.h>
9#include <linux/if_ether.h>
10#include "bpf_helpers.h"
11
12int _version SEC("version") = 1;
13
14SEC("xdp_adjust_tail")
15int _xdp_adjust_tail(struct xdp_md *xdp)
16{
17 void *data_end = (void *)(long)xdp->data_end;
18 void *data = (void *)(long)xdp->data;
19 int offset = 0;
20
21 if (data_end - data == 54)
22 offset = 256;
23 else
24 offset = 20;
25 if (bpf_xdp_adjust_tail(xdp, 0 - offset))
26 return XDP_DROP;
27 return XDP_TX;
28}
29
30char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
new file mode 100644
index 000000000000..3619f3023088
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -0,0 +1,2315 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/* Copyright (c) 2018 Facebook */
3
4#include <linux/bpf.h>
5#include <linux/btf.h>
6#include <linux/err.h>
7#include <bpf/bpf.h>
8#include <sys/resource.h>
9#include <libelf.h>
10#include <gelf.h>
11#include <string.h>
12#include <stdlib.h>
13#include <stdio.h>
14#include <stdarg.h>
15#include <unistd.h>
16#include <fcntl.h>
17#include <errno.h>
18#include <bpf/libbpf.h>
19#include <bpf/btf.h>
20
21#include "bpf_rlimit.h"
22
23static uint32_t pass_cnt;
24static uint32_t error_cnt;
25static uint32_t skip_cnt;
26
27#define CHECK(condition, format...) ({ \
28 int __ret = !!(condition); \
29 if (__ret) { \
30 fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \
31 fprintf(stderr, format); \
32 } \
33 __ret; \
34})
35
36static int count_result(int err)
37{
38 if (err)
39 error_cnt++;
40 else
41 pass_cnt++;
42
43 fprintf(stderr, "\n");
44 return err;
45}
46
47#define min(a, b) ((a) < (b) ? (a) : (b))
48#define __printf(a, b) __attribute__((format(printf, a, b)))
49
50__printf(1, 2)
51static int __base_pr(const char *format, ...)
52{
53 va_list args;
54 int err;
55
56 va_start(args, format);
57 err = vfprintf(stderr, format, args);
58 va_end(args);
59 return err;
60}
61
62#define BTF_INFO_ENC(kind, root, vlen) \
63 ((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
64
65#define BTF_TYPE_ENC(name, info, size_or_type) \
66 (name), (info), (size_or_type)
67
68#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
69 ((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
70#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
71 BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
72 BTF_INT_ENC(encoding, bits_offset, bits)
73
74#define BTF_ARRAY_ENC(type, index_type, nr_elems) \
75 (type), (index_type), (nr_elems)
76#define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \
77 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \
78 BTF_ARRAY_ENC(type, index_type, nr_elems)
79
80#define BTF_MEMBER_ENC(name, type, bits_offset) \
81 (name), (type), (bits_offset)
82#define BTF_ENUM_ENC(name, val) (name), (val)
83
84#define BTF_TYPEDEF_ENC(name, type) \
85 BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type)
86
87#define BTF_PTR_ENC(name, type) \
88 BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type)
89
90#define BTF_END_RAW 0xdeadbeef
91#define NAME_TBD 0xdeadb33f
92
93#define MAX_NR_RAW_TYPES 1024
94#define BTF_LOG_BUF_SIZE 65535
95
96#ifndef ARRAY_SIZE
97# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
98#endif
99
100static struct args {
101 unsigned int raw_test_num;
102 unsigned int file_test_num;
103 unsigned int get_info_test_num;
104 bool raw_test;
105 bool file_test;
106 bool get_info_test;
107 bool pprint_test;
108 bool always_log;
109} args;
110
111static char btf_log_buf[BTF_LOG_BUF_SIZE];
112
113static struct btf_header hdr_tmpl = {
114 .magic = BTF_MAGIC,
115 .version = BTF_VERSION,
116 .hdr_len = sizeof(struct btf_header),
117};
118
119struct btf_raw_test {
120 const char *descr;
121 const char *str_sec;
122 const char *map_name;
123 const char *err_str;
124 __u32 raw_types[MAX_NR_RAW_TYPES];
125 __u32 str_sec_size;
126 enum bpf_map_type map_type;
127 __u32 key_size;
128 __u32 value_size;
129 __u32 key_type_id;
130 __u32 value_type_id;
131 __u32 max_entries;
132 bool btf_load_err;
133 bool map_create_err;
134 int hdr_len_delta;
135 int type_off_delta;
136 int str_off_delta;
137 int str_len_delta;
138};
139
140static struct btf_raw_test raw_tests[] = {
141/* enum E {
142 * E0,
143 * E1,
144 * };
145 *
146 * struct A {
147 * unsigned long long m;
148 * int n;
149 * char o;
150 * [3 bytes hole]
151 * int p[8];
152 * int q[4][8];
153 * enum E r;
154 * };
155 */
156{
157 .descr = "struct test #1",
158 .raw_types = {
159 /* int */
160 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
161 /* unsigned long long */
162 BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
163 /* char */
164 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
165 /* int[8] */
166 BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
167 /* struct A { */ /* [5] */
168 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180),
169 BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
170 BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
171 BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
172 BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
173 BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8] */
174 BTF_MEMBER_ENC(NAME_TBD, 7, 1408), /* enum E r */
175 /* } */
176 /* int[4][8] */
177 BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [6] */
178 /* enum E */ /* [7] */
179 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
180 BTF_ENUM_ENC(NAME_TBD, 0),
181 BTF_ENUM_ENC(NAME_TBD, 1),
182 BTF_END_RAW,
183 },
184 .str_sec = "\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1",
185 .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1"),
186 .map_type = BPF_MAP_TYPE_ARRAY,
187 .map_name = "struct_test1_map",
188 .key_size = sizeof(int),
189 .value_size = 180,
190 .key_type_id = 1,
191 .value_type_id = 5,
192 .max_entries = 4,
193},
194
195/* typedef struct b Struct_B;
196 *
197 * struct A {
198 * int m;
199 * struct b n[4];
200 * const Struct_B o[4];
201 * };
202 *
203 * struct B {
204 * int m;
205 * int n;
206 * };
207 */
208{
209 .descr = "struct test #2",
210 .raw_types = {
211 /* int */ /* [1] */
212 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
213 /* struct b [4] */ /* [2] */
214 BTF_TYPE_ARRAY_ENC(4, 1, 4),
215
216 /* struct A { */ /* [3] */
217 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 3), 68),
218 BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
219 BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct B n[4] */
220 BTF_MEMBER_ENC(NAME_TBD, 8, 288),/* const Struct_B o[4];*/
221 /* } */
222
223 /* struct B { */ /* [4] */
224 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
225 BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
226 BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
227 /* } */
228
229 /* const int */ /* [5] */
230 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
231 /* typedef struct b Struct_B */ /* [6] */
232 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 4),
233 /* const Struct_B */ /* [7] */
234 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 6),
235 /* const Struct_B [4] */ /* [8] */
236 BTF_TYPE_ARRAY_ENC(7, 1, 4),
237 BTF_END_RAW,
238 },
239 .str_sec = "\0A\0m\0n\0o\0B\0m\0n\0Struct_B",
240 .str_sec_size = sizeof("\0A\0m\0n\0o\0B\0m\0n\0Struct_B"),
241 .map_type = BPF_MAP_TYPE_ARRAY,
242 .map_name = "struct_test2_map",
243 .key_size = sizeof(int),
244 .value_size = 68,
245 .key_type_id = 1,
246 .value_type_id = 3,
247 .max_entries = 4,
248},
249
250/* Test member exceeds the size of struct.
251 *
252 * struct A {
253 * int m;
254 * int n;
255 * };
256 */
257{
258 .descr = "size check test #1",
259 .raw_types = {
260 /* int */ /* [1] */
261 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
262 /* struct A { */ /* [2] */
263 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
264 BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
265 BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
266 /* } */
267 BTF_END_RAW,
268 },
269 .str_sec = "\0A\0m\0n",
270 .str_sec_size = sizeof("\0A\0m\0n"),
271 .map_type = BPF_MAP_TYPE_ARRAY,
272 .map_name = "size_check1_map",
273 .key_size = sizeof(int),
274 .value_size = 1,
275 .key_type_id = 1,
276 .value_type_id = 2,
277 .max_entries = 4,
278 .btf_load_err = true,
279 .err_str = "Member exceeds struct_size",
280},
281
282/* Test member exeeds the size of struct
283 *
284 * struct A {
285 * int m;
286 * int n[2];
287 * };
288 */
289{
290 .descr = "size check test #2",
291 .raw_types = {
292 /* int */ /* [1] */
293 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
294 /* int[2] */ /* [2] */
295 BTF_TYPE_ARRAY_ENC(1, 1, 2),
296 /* struct A { */ /* [3] */
297 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 3 - 1),
298 BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
299 BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n[2]; */
300 /* } */
301 BTF_END_RAW,
302 },
303 .str_sec = "\0A\0m\0n",
304 .str_sec_size = sizeof("\0A\0m\0n"),
305 .map_type = BPF_MAP_TYPE_ARRAY,
306 .map_name = "size_check2_map",
307 .key_size = sizeof(int),
308 .value_size = 1,
309 .key_type_id = 1,
310 .value_type_id = 3,
311 .max_entries = 4,
312 .btf_load_err = true,
313 .err_str = "Member exceeds struct_size",
314},
315
316/* Test member exeeds the size of struct
317 *
318 * struct A {
319 * int m;
320 * void *n;
321 * };
322 */
323{
324 .descr = "size check test #3",
325 .raw_types = {
326 /* int */ /* [1] */
327 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
328 /* void* */ /* [2] */
329 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
330 /* struct A { */ /* [3] */
331 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) + sizeof(void *) - 1),
332 BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
333 BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* void *n; */
334 /* } */
335 BTF_END_RAW,
336 },
337 .str_sec = "\0A\0m\0n",
338 .str_sec_size = sizeof("\0A\0m\0n"),
339 .map_type = BPF_MAP_TYPE_ARRAY,
340 .map_name = "size_check3_map",
341 .key_size = sizeof(int),
342 .value_size = 1,
343 .key_type_id = 1,
344 .value_type_id = 3,
345 .max_entries = 4,
346 .btf_load_err = true,
347 .err_str = "Member exceeds struct_size",
348},
349
350/* Test member exceeds the size of struct
351 *
352 * enum E {
353 * E0,
354 * E1,
355 * };
356 *
357 * struct A {
358 * int m;
359 * enum E n;
360 * };
361 */
362{
363 .descr = "size check test #4",
364 .raw_types = {
365 /* int */ /* [1] */
366 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
367 /* enum E { */ /* [2] */
368 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
369 BTF_ENUM_ENC(NAME_TBD, 0),
370 BTF_ENUM_ENC(NAME_TBD, 1),
371 /* } */
372 /* struct A { */ /* [3] */
373 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
374 BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
375 BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* enum E n; */
376 /* } */
377 BTF_END_RAW,
378 },
379 .str_sec = "\0E\0E0\0E1\0A\0m\0n",
380 .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"),
381 .map_type = BPF_MAP_TYPE_ARRAY,
382 .map_name = "size_check4_map",
383 .key_size = sizeof(int),
384 .value_size = 1,
385 .key_type_id = 1,
386 .value_type_id = 3,
387 .max_entries = 4,
388 .btf_load_err = true,
389 .err_str = "Member exceeds struct_size",
390},
391
392/* typedef const void * const_void_ptr;
393 * struct A {
394 * const_void_ptr m;
395 * };
396 */
397{
398 .descr = "void test #1",
399 .raw_types = {
400 /* int */ /* [1] */
401 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
402 /* const void */ /* [2] */
403 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
404 /* const void* */ /* [3] */
405 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
406 /* typedef const void * const_void_ptr */
407 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
408 /* struct A { */ /* [4] */
409 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
410 /* const_void_ptr m; */
411 BTF_MEMBER_ENC(NAME_TBD, 3, 0),
412 /* } */
413 BTF_END_RAW,
414 },
415 .str_sec = "\0const_void_ptr\0A\0m",
416 .str_sec_size = sizeof("\0const_void_ptr\0A\0m"),
417 .map_type = BPF_MAP_TYPE_ARRAY,
418 .map_name = "void_test1_map",
419 .key_size = sizeof(int),
420 .value_size = sizeof(void *),
421 .key_type_id = 1,
422 .value_type_id = 4,
423 .max_entries = 4,
424},
425
426/* struct A {
427 * const void m;
428 * };
429 */
430{
431 .descr = "void test #2",
432 .raw_types = {
433 /* int */ /* [1] */
434 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
435 /* const void */ /* [2] */
436 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
437 /* struct A { */ /* [3] */
438 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 8),
439 /* const void m; */
440 BTF_MEMBER_ENC(NAME_TBD, 2, 0),
441 /* } */
442 BTF_END_RAW,
443 },
444 .str_sec = "\0A\0m",
445 .str_sec_size = sizeof("\0A\0m"),
446 .map_type = BPF_MAP_TYPE_ARRAY,
447 .map_name = "void_test2_map",
448 .key_size = sizeof(int),
449 .value_size = sizeof(void *),
450 .key_type_id = 1,
451 .value_type_id = 3,
452 .max_entries = 4,
453 .btf_load_err = true,
454 .err_str = "Invalid member",
455},
456
457/* typedef const void * const_void_ptr;
458 * const_void_ptr[4]
459 */
460{
461 .descr = "void test #3",
462 .raw_types = {
463 /* int */ /* [1] */
464 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
465 /* const void */ /* [2] */
466 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
467 /* const void* */ /* [3] */
468 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
469 /* typedef const void * const_void_ptr */ /* [4] */
470 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
471 /* const_void_ptr[4] */ /* [5] */
472 BTF_TYPE_ARRAY_ENC(3, 1, 4),
473 BTF_END_RAW,
474 },
475 .str_sec = "\0const_void_ptr",
476 .str_sec_size = sizeof("\0const_void_ptr"),
477 .map_type = BPF_MAP_TYPE_ARRAY,
478 .map_name = "void_test3_map",
479 .key_size = sizeof(int),
480 .value_size = sizeof(void *) * 4,
481 .key_type_id = 1,
482 .value_type_id = 4,
483 .max_entries = 4,
484},
485
486/* const void[4] */
487{
488 .descr = "void test #4",
489 .raw_types = {
490 /* int */ /* [1] */
491 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
492 /* const void */ /* [2] */
493 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
494 /* const void[4] */ /* [3] */
495 BTF_TYPE_ARRAY_ENC(2, 1, 4),
496 BTF_END_RAW,
497 },
498 .str_sec = "\0A\0m",
499 .str_sec_size = sizeof("\0A\0m"),
500 .map_type = BPF_MAP_TYPE_ARRAY,
501 .map_name = "void_test4_map",
502 .key_size = sizeof(int),
503 .value_size = sizeof(void *) * 4,
504 .key_type_id = 1,
505 .value_type_id = 3,
506 .max_entries = 4,
507 .btf_load_err = true,
508 .err_str = "Invalid elem",
509},
510
511/* Array_A <------------------+
512 * elem_type == Array_B |
513 * | |
514 * | |
515 * Array_B <-------- + |
516 * elem_type == Array A --+
517 */
518{
519 .descr = "loop test #1",
520 .raw_types = {
521 /* int */ /* [1] */
522 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
523 /* Array_A */ /* [2] */
524 BTF_TYPE_ARRAY_ENC(3, 1, 8),
525 /* Array_B */ /* [3] */
526 BTF_TYPE_ARRAY_ENC(2, 1, 8),
527 BTF_END_RAW,
528 },
529 .str_sec = "",
530 .str_sec_size = sizeof(""),
531 .map_type = BPF_MAP_TYPE_ARRAY,
532 .map_name = "loop_test1_map",
533 .key_size = sizeof(int),
534 .value_size = sizeof(sizeof(int) * 8),
535 .key_type_id = 1,
536 .value_type_id = 2,
537 .max_entries = 4,
538 .btf_load_err = true,
539 .err_str = "Loop detected",
540},
541
542/* typedef is _before_ the BTF type of Array_A and Array_B
543 *
544 * typedef Array_B int_array;
545 *
546 * Array_A <------------------+
547 * elem_type == int_array |
548 * | |
549 * | |
550 * Array_B <-------- + |
551 * elem_type == Array_A --+
552 */
553{
554 .descr = "loop test #2",
555 .raw_types = {
556 /* int */
557 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
558 /* typedef Array_B int_array */
559 BTF_TYPEDEF_ENC(1, 4), /* [2] */
560 /* Array_A */
561 BTF_TYPE_ARRAY_ENC(2, 1, 8), /* [3] */
562 /* Array_B */
563 BTF_TYPE_ARRAY_ENC(3, 1, 8), /* [4] */
564 BTF_END_RAW,
565 },
566 .str_sec = "\0int_array\0",
567 .str_sec_size = sizeof("\0int_array"),
568 .map_type = BPF_MAP_TYPE_ARRAY,
569 .map_name = "loop_test2_map",
570 .key_size = sizeof(int),
571 .value_size = sizeof(sizeof(int) * 8),
572 .key_type_id = 1,
573 .value_type_id = 2,
574 .max_entries = 4,
575 .btf_load_err = true,
576 .err_str = "Loop detected",
577},
578
579/* Array_A <------------------+
580 * elem_type == Array_B |
581 * | |
582 * | |
583 * Array_B <-------- + |
584 * elem_type == Array_A --+
585 */
586{
587 .descr = "loop test #3",
588 .raw_types = {
589 /* int */ /* [1] */
590 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
591 /* Array_A */ /* [2] */
592 BTF_TYPE_ARRAY_ENC(3, 1, 8),
593 /* Array_B */ /* [3] */
594 BTF_TYPE_ARRAY_ENC(2, 1, 8),
595 BTF_END_RAW,
596 },
597 .str_sec = "",
598 .str_sec_size = sizeof(""),
599 .map_type = BPF_MAP_TYPE_ARRAY,
600 .map_name = "loop_test3_map",
601 .key_size = sizeof(int),
602 .value_size = sizeof(sizeof(int) * 8),
603 .key_type_id = 1,
604 .value_type_id = 2,
605 .max_entries = 4,
606 .btf_load_err = true,
607 .err_str = "Loop detected",
608},
609
610/* typedef is _between_ the BTF type of Array_A and Array_B
611 *
612 * typedef Array_B int_array;
613 *
614 * Array_A <------------------+
615 * elem_type == int_array |
616 * | |
617 * | |
618 * Array_B <-------- + |
619 * elem_type == Array_A --+
620 */
621{
622 .descr = "loop test #4",
623 .raw_types = {
624 /* int */ /* [1] */
625 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
626 /* Array_A */ /* [2] */
627 BTF_TYPE_ARRAY_ENC(3, 1, 8),
628 /* typedef Array_B int_array */ /* [3] */
629 BTF_TYPEDEF_ENC(NAME_TBD, 4),
630 /* Array_B */ /* [4] */
631 BTF_TYPE_ARRAY_ENC(2, 1, 8),
632 BTF_END_RAW,
633 },
634 .str_sec = "\0int_array\0",
635 .str_sec_size = sizeof("\0int_array"),
636 .map_type = BPF_MAP_TYPE_ARRAY,
637 .map_name = "loop_test4_map",
638 .key_size = sizeof(int),
639 .value_size = sizeof(sizeof(int) * 8),
640 .key_type_id = 1,
641 .value_type_id = 2,
642 .max_entries = 4,
643 .btf_load_err = true,
644 .err_str = "Loop detected",
645},
646
647/* typedef struct B Struct_B
648 *
649 * struct A {
650 * int x;
651 * Struct_B y;
652 * };
653 *
654 * struct B {
655 * int x;
656 * struct A y;
657 * };
658 */
659{
660 .descr = "loop test #5",
661 .raw_types = {
662 /* int */
663 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
664 /* struct A */ /* [2] */
665 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
666 BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */
667 BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* Struct_B y; */
668 /* typedef struct B Struct_B */
669 BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */
670 /* struct B */ /* [4] */
671 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
672 BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */
673 BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A y; */
674 BTF_END_RAW,
675 },
676 .str_sec = "\0A\0x\0y\0Struct_B\0B\0x\0y",
677 .str_sec_size = sizeof("\0A\0x\0y\0Struct_B\0B\0x\0y"),
678 .map_type = BPF_MAP_TYPE_ARRAY,
679 .map_name = "loop_test5_map",
680 .key_size = sizeof(int),
681 .value_size = 8,
682 .key_type_id = 1,
683 .value_type_id = 2,
684 .max_entries = 4,
685 .btf_load_err = true,
686 .err_str = "Loop detected",
687},
688
689/* struct A {
690 * int x;
691 * struct A array_a[4];
692 * };
693 */
694{
695 .descr = "loop test #6",
696 .raw_types = {
697 /* int */
698 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
699 BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */
700 /* struct A */ /* [3] */
701 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
702 BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */
703 BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A array_a[4]; */
704 BTF_END_RAW,
705 },
706 .str_sec = "\0A\0x\0y",
707 .str_sec_size = sizeof("\0A\0x\0y"),
708 .map_type = BPF_MAP_TYPE_ARRAY,
709 .map_name = "loop_test6_map",
710 .key_size = sizeof(int),
711 .value_size = 8,
712 .key_type_id = 1,
713 .value_type_id = 2,
714 .max_entries = 4,
715 .btf_load_err = true,
716 .err_str = "Loop detected",
717},
718
719{
720 .descr = "loop test #7",
721 .raw_types = {
722 /* int */ /* [1] */
723 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
724 /* struct A { */ /* [2] */
725 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
726 /* const void *m; */
727 BTF_MEMBER_ENC(NAME_TBD, 3, 0),
728 /* CONST type_id=3 */ /* [3] */
729 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
730 /* PTR type_id=2 */ /* [4] */
731 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
732 BTF_END_RAW,
733 },
734 .str_sec = "\0A\0m",
735 .str_sec_size = sizeof("\0A\0m"),
736 .map_type = BPF_MAP_TYPE_ARRAY,
737 .map_name = "loop_test7_map",
738 .key_size = sizeof(int),
739 .value_size = sizeof(void *),
740 .key_type_id = 1,
741 .value_type_id = 2,
742 .max_entries = 4,
743 .btf_load_err = true,
744 .err_str = "Loop detected",
745},
746
747{
748 .descr = "loop test #8",
749 .raw_types = {
750 /* int */ /* [1] */
751 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
752 /* struct A { */ /* [2] */
753 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
754 /* const void *m; */
755 BTF_MEMBER_ENC(NAME_TBD, 4, 0),
756 /* struct B { */ /* [3] */
757 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
758 /* const void *n; */
759 BTF_MEMBER_ENC(NAME_TBD, 6, 0),
760 /* CONST type_id=5 */ /* [4] */
761 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 5),
762 /* PTR type_id=6 */ /* [5] */
763 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6),
764 /* CONST type_id=7 */ /* [6] */
765 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 7),
766 /* PTR type_id=4 */ /* [7] */
767 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 4),
768 BTF_END_RAW,
769 },
770 .str_sec = "\0A\0m\0B\0n",
771 .str_sec_size = sizeof("\0A\0m\0B\0n"),
772 .map_type = BPF_MAP_TYPE_ARRAY,
773 .map_name = "loop_test8_map",
774 .key_size = sizeof(int),
775 .value_size = sizeof(void *),
776 .key_type_id = 1,
777 .value_type_id = 2,
778 .max_entries = 4,
779 .btf_load_err = true,
780 .err_str = "Loop detected",
781},
782
783{
784 .descr = "string section does not end with null",
785 .raw_types = {
786 /* int */ /* [1] */
787 BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
788 BTF_END_RAW,
789 },
790 .str_sec = "\0int",
791 .str_sec_size = sizeof("\0int") - 1,
792 .map_type = BPF_MAP_TYPE_ARRAY,
793 .map_name = "hdr_test_map",
794 .key_size = sizeof(int),
795 .value_size = sizeof(int),
796 .key_type_id = 1,
797 .value_type_id = 1,
798 .max_entries = 4,
799 .btf_load_err = true,
800 .err_str = "Invalid string section",
801},
802
803{
804 .descr = "empty string section",
805 .raw_types = {
806 /* int */ /* [1] */
807 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
808 BTF_END_RAW,
809 },
810 .str_sec = "",
811 .str_sec_size = 0,
812 .map_type = BPF_MAP_TYPE_ARRAY,
813 .map_name = "hdr_test_map",
814 .key_size = sizeof(int),
815 .value_size = sizeof(int),
816 .key_type_id = 1,
817 .value_type_id = 1,
818 .max_entries = 4,
819 .btf_load_err = true,
820 .err_str = "Invalid string section",
821},
822
823{
824 .descr = "empty type section",
825 .raw_types = {
826 BTF_END_RAW,
827 },
828 .str_sec = "\0int",
829 .str_sec_size = sizeof("\0int"),
830 .map_type = BPF_MAP_TYPE_ARRAY,
831 .map_name = "hdr_test_map",
832 .key_size = sizeof(int),
833 .value_size = sizeof(int),
834 .key_type_id = 1,
835 .value_type_id = 1,
836 .max_entries = 4,
837 .btf_load_err = true,
838 .err_str = "No type found",
839},
840
841{
842 .descr = "btf_header test. Longer hdr_len",
843 .raw_types = {
844 /* int */ /* [1] */
845 BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
846 BTF_END_RAW,
847 },
848 .str_sec = "\0int",
849 .str_sec_size = sizeof("\0int"),
850 .map_type = BPF_MAP_TYPE_ARRAY,
851 .map_name = "hdr_test_map",
852 .key_size = sizeof(int),
853 .value_size = sizeof(int),
854 .key_type_id = 1,
855 .value_type_id = 1,
856 .max_entries = 4,
857 .btf_load_err = true,
858 .hdr_len_delta = 4,
859 .err_str = "Unsupported btf_header",
860},
861
862{
863 .descr = "btf_header test. Gap between hdr and type",
864 .raw_types = {
865 /* int */ /* [1] */
866 BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
867 BTF_END_RAW,
868 },
869 .str_sec = "\0int",
870 .str_sec_size = sizeof("\0int"),
871 .map_type = BPF_MAP_TYPE_ARRAY,
872 .map_name = "hdr_test_map",
873 .key_size = sizeof(int),
874 .value_size = sizeof(int),
875 .key_type_id = 1,
876 .value_type_id = 1,
877 .max_entries = 4,
878 .btf_load_err = true,
879 .type_off_delta = 4,
880 .err_str = "Unsupported section found",
881},
882
883{
884 .descr = "btf_header test. Gap between type and str",
885 .raw_types = {
886 /* int */ /* [1] */
887 BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
888 BTF_END_RAW,
889 },
890 .str_sec = "\0int",
891 .str_sec_size = sizeof("\0int"),
892 .map_type = BPF_MAP_TYPE_ARRAY,
893 .map_name = "hdr_test_map",
894 .key_size = sizeof(int),
895 .value_size = sizeof(int),
896 .key_type_id = 1,
897 .value_type_id = 1,
898 .max_entries = 4,
899 .btf_load_err = true,
900 .str_off_delta = 4,
901 .err_str = "Unsupported section found",
902},
903
904{
905 .descr = "btf_header test. Overlap between type and str",
906 .raw_types = {
907 /* int */ /* [1] */
908 BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
909 BTF_END_RAW,
910 },
911 .str_sec = "\0int",
912 .str_sec_size = sizeof("\0int"),
913 .map_type = BPF_MAP_TYPE_ARRAY,
914 .map_name = "hdr_test_map",
915 .key_size = sizeof(int),
916 .value_size = sizeof(int),
917 .key_type_id = 1,
918 .value_type_id = 1,
919 .max_entries = 4,
920 .btf_load_err = true,
921 .str_off_delta = -4,
922 .err_str = "Section overlap found",
923},
924
925{
926 .descr = "btf_header test. Larger BTF size",
927 .raw_types = {
928 /* int */ /* [1] */
929 BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
930 BTF_END_RAW,
931 },
932 .str_sec = "\0int",
933 .str_sec_size = sizeof("\0int"),
934 .map_type = BPF_MAP_TYPE_ARRAY,
935 .map_name = "hdr_test_map",
936 .key_size = sizeof(int),
937 .value_size = sizeof(int),
938 .key_type_id = 1,
939 .value_type_id = 1,
940 .max_entries = 4,
941 .btf_load_err = true,
942 .str_len_delta = -4,
943 .err_str = "Unsupported section found",
944},
945
946{
947 .descr = "btf_header test. Smaller BTF size",
948 .raw_types = {
949 /* int */ /* [1] */
950 BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
951 BTF_END_RAW,
952 },
953 .str_sec = "\0int",
954 .str_sec_size = sizeof("\0int"),
955 .map_type = BPF_MAP_TYPE_ARRAY,
956 .map_name = "hdr_test_map",
957 .key_size = sizeof(int),
958 .value_size = sizeof(int),
959 .key_type_id = 1,
960 .value_type_id = 1,
961 .max_entries = 4,
962 .btf_load_err = true,
963 .str_len_delta = 4,
964 .err_str = "Total section length too long",
965},
966
967{
968 .descr = "array test. index_type/elem_type \"int\"",
969 .raw_types = {
970 /* int */ /* [1] */
971 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
972 /* int[16] */ /* [2] */
973 BTF_TYPE_ARRAY_ENC(1, 1, 16),
974 BTF_END_RAW,
975 },
976 .str_sec = "",
977 .str_sec_size = sizeof(""),
978 .map_type = BPF_MAP_TYPE_ARRAY,
979 .map_name = "array_test_map",
980 .key_size = sizeof(int),
981 .value_size = sizeof(int),
982 .key_type_id = 1,
983 .value_type_id = 1,
984 .max_entries = 4,
985},
986
987{
988 .descr = "array test. index_type/elem_type \"const int\"",
989 .raw_types = {
990 /* int */ /* [1] */
991 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
992 /* int[16] */ /* [2] */
993 BTF_TYPE_ARRAY_ENC(3, 3, 16),
994 /* CONST type_id=1 */ /* [3] */
995 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
996 BTF_END_RAW,
997 },
998 .str_sec = "",
999 .str_sec_size = sizeof(""),
1000 .map_type = BPF_MAP_TYPE_ARRAY,
1001 .map_name = "array_test_map",
1002 .key_size = sizeof(int),
1003 .value_size = sizeof(int),
1004 .key_type_id = 1,
1005 .value_type_id = 1,
1006 .max_entries = 4,
1007},
1008
1009{
1010 .descr = "array test. index_type \"const int:31\"",
1011 .raw_types = {
1012 /* int */ /* [1] */
1013 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1014 /* int:31 */ /* [2] */
1015 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
1016 /* int[16] */ /* [3] */
1017 BTF_TYPE_ARRAY_ENC(1, 4, 16),
1018 /* CONST type_id=2 */ /* [4] */
1019 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
1020 BTF_END_RAW,
1021 },
1022 .str_sec = "",
1023 .str_sec_size = sizeof(""),
1024 .map_type = BPF_MAP_TYPE_ARRAY,
1025 .map_name = "array_test_map",
1026 .key_size = sizeof(int),
1027 .value_size = sizeof(int),
1028 .key_type_id = 1,
1029 .value_type_id = 1,
1030 .max_entries = 4,
1031 .btf_load_err = true,
1032 .err_str = "Invalid index",
1033},
1034
1035{
1036 .descr = "array test. elem_type \"const int:31\"",
1037 .raw_types = {
1038 /* int */ /* [1] */
1039 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1040 /* int:31 */ /* [2] */
1041 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
1042 /* int[16] */ /* [3] */
1043 BTF_TYPE_ARRAY_ENC(4, 1, 16),
1044 /* CONST type_id=2 */ /* [4] */
1045 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
1046 BTF_END_RAW,
1047 },
1048 .str_sec = "",
1049 .str_sec_size = sizeof(""),
1050 .map_type = BPF_MAP_TYPE_ARRAY,
1051 .map_name = "array_test_map",
1052 .key_size = sizeof(int),
1053 .value_size = sizeof(int),
1054 .key_type_id = 1,
1055 .value_type_id = 1,
1056 .max_entries = 4,
1057 .btf_load_err = true,
1058 .err_str = "Invalid array of int",
1059},
1060
1061{
1062 .descr = "array test. index_type \"void\"",
1063 .raw_types = {
1064 /* int */ /* [1] */
1065 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1066 /* int[16] */ /* [2] */
1067 BTF_TYPE_ARRAY_ENC(1, 0, 16),
1068 BTF_END_RAW,
1069 },
1070 .str_sec = "",
1071 .str_sec_size = sizeof(""),
1072 .map_type = BPF_MAP_TYPE_ARRAY,
1073 .map_name = "array_test_map",
1074 .key_size = sizeof(int),
1075 .value_size = sizeof(int),
1076 .key_type_id = 1,
1077 .value_type_id = 1,
1078 .max_entries = 4,
1079 .btf_load_err = true,
1080 .err_str = "Invalid index",
1081},
1082
1083{
1084 .descr = "array test. index_type \"const void\"",
1085 .raw_types = {
1086 /* int */ /* [1] */
1087 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1088 /* int[16] */ /* [2] */
1089 BTF_TYPE_ARRAY_ENC(1, 3, 16),
1090 /* CONST type_id=0 (void) */ /* [3] */
1091 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
1092 BTF_END_RAW,
1093 },
1094 .str_sec = "",
1095 .str_sec_size = sizeof(""),
1096 .map_type = BPF_MAP_TYPE_ARRAY,
1097 .map_name = "array_test_map",
1098 .key_size = sizeof(int),
1099 .value_size = sizeof(int),
1100 .key_type_id = 1,
1101 .value_type_id = 1,
1102 .max_entries = 4,
1103 .btf_load_err = true,
1104 .err_str = "Invalid index",
1105},
1106
1107{
1108 .descr = "array test. elem_type \"const void\"",
1109 .raw_types = {
1110 /* int */ /* [1] */
1111 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1112 /* int[16] */ /* [2] */
1113 BTF_TYPE_ARRAY_ENC(3, 1, 16),
1114 /* CONST type_id=0 (void) */ /* [3] */
1115 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
1116 BTF_END_RAW,
1117 },
1118 .str_sec = "",
1119 .str_sec_size = sizeof(""),
1120 .map_type = BPF_MAP_TYPE_ARRAY,
1121 .map_name = "array_test_map",
1122 .key_size = sizeof(int),
1123 .value_size = sizeof(int),
1124 .key_type_id = 1,
1125 .value_type_id = 1,
1126 .max_entries = 4,
1127 .btf_load_err = true,
1128 .err_str = "Invalid elem",
1129},
1130
1131{
1132 .descr = "array test. elem_type \"const void *\"",
1133 .raw_types = {
1134 /* int */ /* [1] */
1135 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1136 /* const void *[16] */ /* [2] */
1137 BTF_TYPE_ARRAY_ENC(3, 1, 16),
1138 /* CONST type_id=4 */ /* [3] */
1139 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
1140 /* void* */ /* [4] */
1141 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
1142 BTF_END_RAW,
1143 },
1144 .str_sec = "",
1145 .str_sec_size = sizeof(""),
1146 .map_type = BPF_MAP_TYPE_ARRAY,
1147 .map_name = "array_test_map",
1148 .key_size = sizeof(int),
1149 .value_size = sizeof(int),
1150 .key_type_id = 1,
1151 .value_type_id = 1,
1152 .max_entries = 4,
1153},
1154
1155{
1156 .descr = "array test. index_type \"const void *\"",
1157 .raw_types = {
1158 /* int */ /* [1] */
1159 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1160 /* const void *[16] */ /* [2] */
1161 BTF_TYPE_ARRAY_ENC(3, 3, 16),
1162 /* CONST type_id=4 */ /* [3] */
1163 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
1164 /* void* */ /* [4] */
1165 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
1166 BTF_END_RAW,
1167 },
1168 .str_sec = "",
1169 .str_sec_size = sizeof(""),
1170 .map_type = BPF_MAP_TYPE_ARRAY,
1171 .map_name = "array_test_map",
1172 .key_size = sizeof(int),
1173 .value_size = sizeof(int),
1174 .key_type_id = 1,
1175 .value_type_id = 1,
1176 .max_entries = 4,
1177 .btf_load_err = true,
1178 .err_str = "Invalid index",
1179},
1180
1181{
1182 .descr = "array test. t->size != 0\"",
1183 .raw_types = {
1184 /* int */ /* [1] */
1185 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1186 /* int[16] */ /* [2] */
1187 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 1),
1188 BTF_ARRAY_ENC(1, 1, 16),
1189 BTF_END_RAW,
1190 },
1191 .str_sec = "",
1192 .str_sec_size = sizeof(""),
1193 .map_type = BPF_MAP_TYPE_ARRAY,
1194 .map_name = "array_test_map",
1195 .key_size = sizeof(int),
1196 .value_size = sizeof(int),
1197 .key_type_id = 1,
1198 .value_type_id = 1,
1199 .max_entries = 4,
1200 .btf_load_err = true,
1201 .err_str = "size != 0",
1202},
1203
1204{
1205 .descr = "int test. invalid int_data",
1206 .raw_types = {
1207 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), 4),
1208 0x10000000,
1209 BTF_END_RAW,
1210 },
1211 .str_sec = "",
1212 .str_sec_size = sizeof(""),
1213 .map_type = BPF_MAP_TYPE_ARRAY,
1214 .map_name = "array_test_map",
1215 .key_size = sizeof(int),
1216 .value_size = sizeof(int),
1217 .key_type_id = 1,
1218 .value_type_id = 1,
1219 .max_entries = 4,
1220 .btf_load_err = true,
1221 .err_str = "Invalid int_data",
1222},
1223
1224{
1225 .descr = "invalid BTF_INFO",
1226 .raw_types = {
1227 /* int */ /* [1] */
1228 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1229 BTF_TYPE_ENC(0, 0x10000000, 4),
1230 BTF_END_RAW,
1231 },
1232 .str_sec = "",
1233 .str_sec_size = sizeof(""),
1234 .map_type = BPF_MAP_TYPE_ARRAY,
1235 .map_name = "array_test_map",
1236 .key_size = sizeof(int),
1237 .value_size = sizeof(int),
1238 .key_type_id = 1,
1239 .value_type_id = 1,
1240 .max_entries = 4,
1241 .btf_load_err = true,
1242 .err_str = "Invalid btf_info",
1243},
1244
1245{
1246 .descr = "fwd test. t->type != 0\"",
1247 .raw_types = {
1248 /* int */ /* [1] */
1249 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1250 /* fwd type */ /* [2] */
1251 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 1),
1252 BTF_END_RAW,
1253 },
1254 .str_sec = "",
1255 .str_sec_size = sizeof(""),
1256 .map_type = BPF_MAP_TYPE_ARRAY,
1257 .map_name = "fwd_test_map",
1258 .key_size = sizeof(int),
1259 .value_size = sizeof(int),
1260 .key_type_id = 1,
1261 .value_type_id = 1,
1262 .max_entries = 4,
1263 .btf_load_err = true,
1264 .err_str = "type != 0",
1265},
1266
1267}; /* struct btf_raw_test raw_tests[] */
1268
1269static const char *get_next_str(const char *start, const char *end)
1270{
1271 return start < end - 1 ? start + 1 : NULL;
1272}
1273
1274static int get_type_sec_size(const __u32 *raw_types)
1275{
1276 int i;
1277
1278 for (i = MAX_NR_RAW_TYPES - 1;
1279 i >= 0 && raw_types[i] != BTF_END_RAW;
1280 i--)
1281 ;
1282
1283 return i < 0 ? i : i * sizeof(raw_types[0]);
1284}
1285
1286static void *btf_raw_create(const struct btf_header *hdr,
1287 const __u32 *raw_types,
1288 const char *str,
1289 unsigned int str_sec_size,
1290 unsigned int *btf_size)
1291{
1292 const char *next_str = str, *end_str = str + str_sec_size;
1293 unsigned int size_needed, offset;
1294 struct btf_header *ret_hdr;
1295 int i, type_sec_size;
1296 uint32_t *ret_types;
1297 void *raw_btf;
1298
1299 type_sec_size = get_type_sec_size(raw_types);
1300 if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types"))
1301 return NULL;
1302
1303 size_needed = sizeof(*hdr) + type_sec_size + str_sec_size;
1304 raw_btf = malloc(size_needed);
1305 if (CHECK(!raw_btf, "Cannot allocate memory for raw_btf"))
1306 return NULL;
1307
1308 /* Copy header */
1309 memcpy(raw_btf, hdr, sizeof(*hdr));
1310 offset = sizeof(*hdr);
1311
1312 /* Copy type section */
1313 ret_types = raw_btf + offset;
1314 for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) {
1315 if (raw_types[i] == NAME_TBD) {
1316 next_str = get_next_str(next_str, end_str);
1317 if (CHECK(!next_str, "Error in getting next_str")) {
1318 free(raw_btf);
1319 return NULL;
1320 }
1321 ret_types[i] = next_str - str;
1322 next_str += strlen(next_str);
1323 } else {
1324 ret_types[i] = raw_types[i];
1325 }
1326 }
1327 offset += type_sec_size;
1328
1329 /* Copy string section */
1330 memcpy(raw_btf + offset, str, str_sec_size);
1331
1332 ret_hdr = (struct btf_header *)raw_btf;
1333 ret_hdr->type_len = type_sec_size;
1334 ret_hdr->str_off = type_sec_size;
1335 ret_hdr->str_len = str_sec_size;
1336
1337 *btf_size = size_needed;
1338
1339 return raw_btf;
1340}
1341
1342static int do_test_raw(unsigned int test_num)
1343{
1344 struct btf_raw_test *test = &raw_tests[test_num - 1];
1345 struct bpf_create_map_attr create_attr = {};
1346 int map_fd = -1, btf_fd = -1;
1347 unsigned int raw_btf_size;
1348 struct btf_header *hdr;
1349 void *raw_btf;
1350 int err;
1351
1352 fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr);
1353 raw_btf = btf_raw_create(&hdr_tmpl,
1354 test->raw_types,
1355 test->str_sec,
1356 test->str_sec_size,
1357 &raw_btf_size);
1358
1359 if (!raw_btf)
1360 return -1;
1361
1362 hdr = raw_btf;
1363
1364 hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta;
1365 hdr->type_off = (int)hdr->type_off + test->type_off_delta;
1366 hdr->str_off = (int)hdr->str_off + test->str_off_delta;
1367 hdr->str_len = (int)hdr->str_len + test->str_len_delta;
1368
1369 *btf_log_buf = '\0';
1370 btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
1371 btf_log_buf, BTF_LOG_BUF_SIZE,
1372 args.always_log);
1373 free(raw_btf);
1374
1375 err = ((btf_fd == -1) != test->btf_load_err);
1376 if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
1377 btf_fd, test->btf_load_err) ||
1378 CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
1379 "expected err_str:%s", test->err_str)) {
1380 err = -1;
1381 goto done;
1382 }
1383
1384 if (err || btf_fd == -1)
1385 goto done;
1386
1387 create_attr.name = test->map_name;
1388 create_attr.map_type = test->map_type;
1389 create_attr.key_size = test->key_size;
1390 create_attr.value_size = test->value_size;
1391 create_attr.max_entries = test->max_entries;
1392 create_attr.btf_fd = btf_fd;
1393 create_attr.btf_key_type_id = test->key_type_id;
1394 create_attr.btf_value_type_id = test->value_type_id;
1395
1396 map_fd = bpf_create_map_xattr(&create_attr);
1397
1398 err = ((map_fd == -1) != test->map_create_err);
1399 CHECK(err, "map_fd:%d test->map_create_err:%u",
1400 map_fd, test->map_create_err);
1401
1402done:
1403 if (!err)
1404 fprintf(stderr, "OK");
1405
1406 if (*btf_log_buf && (err || args.always_log))
1407 fprintf(stderr, "\n%s", btf_log_buf);
1408
1409 if (btf_fd != -1)
1410 close(btf_fd);
1411 if (map_fd != -1)
1412 close(map_fd);
1413
1414 return err;
1415}
1416
1417static int test_raw(void)
1418{
1419 unsigned int i;
1420 int err = 0;
1421
1422 if (args.raw_test_num)
1423 return count_result(do_test_raw(args.raw_test_num));
1424
1425 for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
1426 err |= count_result(do_test_raw(i));
1427
1428 return err;
1429}
1430
1431struct btf_get_info_test {
1432 const char *descr;
1433 const char *str_sec;
1434 __u32 raw_types[MAX_NR_RAW_TYPES];
1435 __u32 str_sec_size;
1436 int btf_size_delta;
1437 int (*special_test)(unsigned int test_num);
1438};
1439
1440static int test_big_btf_info(unsigned int test_num);
1441static int test_btf_id(unsigned int test_num);
1442
1443const struct btf_get_info_test get_info_tests[] = {
1444{
1445 .descr = "== raw_btf_size+1",
1446 .raw_types = {
1447 /* int */ /* [1] */
1448 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1449 BTF_END_RAW,
1450 },
1451 .str_sec = "",
1452 .str_sec_size = sizeof(""),
1453 .btf_size_delta = 1,
1454},
1455{
1456 .descr = "== raw_btf_size-3",
1457 .raw_types = {
1458 /* int */ /* [1] */
1459 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1460 BTF_END_RAW,
1461 },
1462 .str_sec = "",
1463 .str_sec_size = sizeof(""),
1464 .btf_size_delta = -3,
1465},
1466{
1467 .descr = "Large bpf_btf_info",
1468 .raw_types = {
1469 /* int */ /* [1] */
1470 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1471 BTF_END_RAW,
1472 },
1473 .str_sec = "",
1474 .str_sec_size = sizeof(""),
1475 .special_test = test_big_btf_info,
1476},
1477{
1478 .descr = "BTF ID",
1479 .raw_types = {
1480 /* int */ /* [1] */
1481 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
1482 /* unsigned int */ /* [2] */
1483 BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
1484 BTF_END_RAW,
1485 },
1486 .str_sec = "",
1487 .str_sec_size = sizeof(""),
1488 .special_test = test_btf_id,
1489},
1490};
1491
1492static inline __u64 ptr_to_u64(const void *ptr)
1493{
1494 return (__u64)(unsigned long)ptr;
1495}
1496
1497static int test_big_btf_info(unsigned int test_num)
1498{
1499 const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
1500 uint8_t *raw_btf = NULL, *user_btf = NULL;
1501 unsigned int raw_btf_size;
1502 struct {
1503 struct bpf_btf_info info;
1504 uint64_t garbage;
1505 } info_garbage;
1506 struct bpf_btf_info *info;
1507 int btf_fd = -1, err;
1508 uint32_t info_len;
1509
1510 raw_btf = btf_raw_create(&hdr_tmpl,
1511 test->raw_types,
1512 test->str_sec,
1513 test->str_sec_size,
1514 &raw_btf_size);
1515
1516 if (!raw_btf)
1517 return -1;
1518
1519 *btf_log_buf = '\0';
1520
1521 user_btf = malloc(raw_btf_size);
1522 if (CHECK(!user_btf, "!user_btf")) {
1523 err = -1;
1524 goto done;
1525 }
1526
1527 btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
1528 btf_log_buf, BTF_LOG_BUF_SIZE,
1529 args.always_log);
1530 if (CHECK(btf_fd == -1, "errno:%d", errno)) {
1531 err = -1;
1532 goto done;
1533 }
1534
1535 /*
1536 * GET_INFO should error out if the userspace info
1537 * has non zero tailing bytes.
1538 */
1539 info = &info_garbage.info;
1540 memset(info, 0, sizeof(*info));
1541 info_garbage.garbage = 0xdeadbeef;
1542 info_len = sizeof(info_garbage);
1543 info->btf = ptr_to_u64(user_btf);
1544 info->btf_size = raw_btf_size;
1545
1546 err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
1547 if (CHECK(!err, "!err")) {
1548 err = -1;
1549 goto done;
1550 }
1551
1552 /*
1553 * GET_INFO should succeed even info_len is larger than
1554 * the kernel supported as long as tailing bytes are zero.
1555 * The kernel supported info len should also be returned
1556 * to userspace.
1557 */
1558 info_garbage.garbage = 0;
1559 err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
1560 if (CHECK(err || info_len != sizeof(*info),
1561 "err:%d errno:%d info_len:%u sizeof(*info):%lu",
1562 err, errno, info_len, sizeof(*info))) {
1563 err = -1;
1564 goto done;
1565 }
1566
1567 fprintf(stderr, "OK");
1568
1569done:
1570 if (*btf_log_buf && (err || args.always_log))
1571 fprintf(stderr, "\n%s", btf_log_buf);
1572
1573 free(raw_btf);
1574 free(user_btf);
1575
1576 if (btf_fd != -1)
1577 close(btf_fd);
1578
1579 return err;
1580}
1581
1582static int test_btf_id(unsigned int test_num)
1583{
1584 const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
1585 struct bpf_create_map_attr create_attr = {};
1586 uint8_t *raw_btf = NULL, *user_btf[2] = {};
1587 int btf_fd[2] = {-1, -1}, map_fd = -1;
1588 struct bpf_map_info map_info = {};
1589 struct bpf_btf_info info[2] = {};
1590 unsigned int raw_btf_size;
1591 uint32_t info_len;
1592 int err, i, ret;
1593
1594 raw_btf = btf_raw_create(&hdr_tmpl,
1595 test->raw_types,
1596 test->str_sec,
1597 test->str_sec_size,
1598 &raw_btf_size);
1599
1600 if (!raw_btf)
1601 return -1;
1602
1603 *btf_log_buf = '\0';
1604
1605 for (i = 0; i < 2; i++) {
1606 user_btf[i] = malloc(raw_btf_size);
1607 if (CHECK(!user_btf[i], "!user_btf[%d]", i)) {
1608 err = -1;
1609 goto done;
1610 }
1611 info[i].btf = ptr_to_u64(user_btf[i]);
1612 info[i].btf_size = raw_btf_size;
1613 }
1614
1615 btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
1616 btf_log_buf, BTF_LOG_BUF_SIZE,
1617 args.always_log);
1618 if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
1619 err = -1;
1620 goto done;
1621 }
1622
1623 /* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */
1624 info_len = sizeof(info[0]);
1625 err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len);
1626 if (CHECK(err, "errno:%d", errno)) {
1627 err = -1;
1628 goto done;
1629 }
1630
1631 btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
1632 if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
1633 err = -1;
1634 goto done;
1635 }
1636
1637 ret = 0;
1638 err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len);
1639 if (CHECK(err || info[0].id != info[1].id ||
1640 info[0].btf_size != info[1].btf_size ||
1641 (ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)),
1642 "err:%d errno:%d id0:%u id1:%u btf_size0:%u btf_size1:%u memcmp:%d",
1643 err, errno, info[0].id, info[1].id,
1644 info[0].btf_size, info[1].btf_size, ret)) {
1645 err = -1;
1646 goto done;
1647 }
1648
1649 /* Test btf members in struct bpf_map_info */
1650 create_attr.name = "test_btf_id";
1651 create_attr.map_type = BPF_MAP_TYPE_ARRAY;
1652 create_attr.key_size = sizeof(int);
1653 create_attr.value_size = sizeof(unsigned int);
1654 create_attr.max_entries = 4;
1655 create_attr.btf_fd = btf_fd[0];
1656 create_attr.btf_key_type_id = 1;
1657 create_attr.btf_value_type_id = 2;
1658
1659 map_fd = bpf_create_map_xattr(&create_attr);
1660 if (CHECK(map_fd == -1, "errno:%d", errno)) {
1661 err = -1;
1662 goto done;
1663 }
1664
1665 info_len = sizeof(map_info);
1666 err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
1667 if (CHECK(err || map_info.btf_id != info[0].id ||
1668 map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2,
1669 "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u",
1670 err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id,
1671 map_info.btf_value_type_id)) {
1672 err = -1;
1673 goto done;
1674 }
1675
1676 for (i = 0; i < 2; i++) {
1677 close(btf_fd[i]);
1678 btf_fd[i] = -1;
1679 }
1680
1681 /* Test BTF ID is removed from the kernel */
1682 btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
1683 if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
1684 err = -1;
1685 goto done;
1686 }
1687 close(btf_fd[0]);
1688 btf_fd[0] = -1;
1689
1690 /* The map holds the last ref to BTF and its btf_id */
1691 close(map_fd);
1692 map_fd = -1;
1693 btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
1694 if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
1695 err = -1;
1696 goto done;
1697 }
1698
1699 fprintf(stderr, "OK");
1700
1701done:
1702 if (*btf_log_buf && (err || args.always_log))
1703 fprintf(stderr, "\n%s", btf_log_buf);
1704
1705 free(raw_btf);
1706 if (map_fd != -1)
1707 close(map_fd);
1708 for (i = 0; i < 2; i++) {
1709 free(user_btf[i]);
1710 if (btf_fd[i] != -1)
1711 close(btf_fd[i]);
1712 }
1713
1714 return err;
1715}
1716
1717static int do_test_get_info(unsigned int test_num)
1718{
1719 const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
1720 unsigned int raw_btf_size, user_btf_size, expected_nbytes;
1721 uint8_t *raw_btf = NULL, *user_btf = NULL;
1722 struct bpf_btf_info info = {};
1723 int btf_fd = -1, err, ret;
1724 uint32_t info_len;
1725
1726 fprintf(stderr, "BTF GET_INFO test[%u] (%s): ",
1727 test_num, test->descr);
1728
1729 if (test->special_test)
1730 return test->special_test(test_num);
1731
1732 raw_btf = btf_raw_create(&hdr_tmpl,
1733 test->raw_types,
1734 test->str_sec,
1735 test->str_sec_size,
1736 &raw_btf_size);
1737
1738 if (!raw_btf)
1739 return -1;
1740
1741 *btf_log_buf = '\0';
1742
1743 user_btf = malloc(raw_btf_size);
1744 if (CHECK(!user_btf, "!user_btf")) {
1745 err = -1;
1746 goto done;
1747 }
1748
1749 btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
1750 btf_log_buf, BTF_LOG_BUF_SIZE,
1751 args.always_log);
1752 if (CHECK(btf_fd == -1, "errno:%d", errno)) {
1753 err = -1;
1754 goto done;
1755 }
1756
1757 user_btf_size = (int)raw_btf_size + test->btf_size_delta;
1758 expected_nbytes = min(raw_btf_size, user_btf_size);
1759 if (raw_btf_size > expected_nbytes)
1760 memset(user_btf + expected_nbytes, 0xff,
1761 raw_btf_size - expected_nbytes);
1762
1763 info_len = sizeof(info);
1764 info.btf = ptr_to_u64(user_btf);
1765 info.btf_size = user_btf_size;
1766
1767 ret = 0;
1768 err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len);
1769 if (CHECK(err || !info.id || info_len != sizeof(info) ||
1770 info.btf_size != raw_btf_size ||
1771 (ret = memcmp(raw_btf, user_btf, expected_nbytes)),
1772 "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%lu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
1773 err, errno, info.id, info_len, sizeof(info),
1774 raw_btf_size, info.btf_size, expected_nbytes, ret)) {
1775 err = -1;
1776 goto done;
1777 }
1778
1779 while (expected_nbytes < raw_btf_size) {
1780 fprintf(stderr, "%u...", expected_nbytes);
1781 if (CHECK(user_btf[expected_nbytes++] != 0xff,
1782 "user_btf[%u]:%x != 0xff", expected_nbytes - 1,
1783 user_btf[expected_nbytes - 1])) {
1784 err = -1;
1785 goto done;
1786 }
1787 }
1788
1789 fprintf(stderr, "OK");
1790
1791done:
1792 if (*btf_log_buf && (err || args.always_log))
1793 fprintf(stderr, "\n%s", btf_log_buf);
1794
1795 free(raw_btf);
1796 free(user_btf);
1797
1798 if (btf_fd != -1)
1799 close(btf_fd);
1800
1801 return err;
1802}
1803
1804static int test_get_info(void)
1805{
1806 unsigned int i;
1807 int err = 0;
1808
1809 if (args.get_info_test_num)
1810 return count_result(do_test_get_info(args.get_info_test_num));
1811
1812 for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
1813 err |= count_result(do_test_get_info(i));
1814
1815 return err;
1816}
1817
1818struct btf_file_test {
1819 const char *file;
1820 bool btf_kv_notfound;
1821};
1822
1823static struct btf_file_test file_tests[] = {
1824{
1825 .file = "test_btf_haskv.o",
1826},
1827{
1828 .file = "test_btf_nokv.o",
1829 .btf_kv_notfound = true,
1830},
1831};
1832
1833static int file_has_btf_elf(const char *fn)
1834{
1835 Elf_Scn *scn = NULL;
1836 GElf_Ehdr ehdr;
1837 int elf_fd;
1838 Elf *elf;
1839 int ret;
1840
1841 if (CHECK(elf_version(EV_CURRENT) == EV_NONE,
1842 "elf_version(EV_CURRENT) == EV_NONE"))
1843 return -1;
1844
1845 elf_fd = open(fn, O_RDONLY);
1846 if (CHECK(elf_fd == -1, "open(%s): errno:%d", fn, errno))
1847 return -1;
1848
1849 elf = elf_begin(elf_fd, ELF_C_READ, NULL);
1850 if (CHECK(!elf, "elf_begin(%s): %s", fn, elf_errmsg(elf_errno()))) {
1851 ret = -1;
1852 goto done;
1853 }
1854
1855 if (CHECK(!gelf_getehdr(elf, &ehdr), "!gelf_getehdr(%s)", fn)) {
1856 ret = -1;
1857 goto done;
1858 }
1859
1860 while ((scn = elf_nextscn(elf, scn))) {
1861 const char *sh_name;
1862 GElf_Shdr sh;
1863
1864 if (CHECK(gelf_getshdr(scn, &sh) != &sh,
1865 "file:%s gelf_getshdr != &sh", fn)) {
1866 ret = -1;
1867 goto done;
1868 }
1869
1870 sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
1871 if (!strcmp(sh_name, BTF_ELF_SEC)) {
1872 ret = 1;
1873 goto done;
1874 }
1875 }
1876
1877 ret = 0;
1878
1879done:
1880 close(elf_fd);
1881 elf_end(elf);
1882 return ret;
1883}
1884
1885static int do_test_file(unsigned int test_num)
1886{
1887 const struct btf_file_test *test = &file_tests[test_num - 1];
1888 struct bpf_object *obj = NULL;
1889 struct bpf_program *prog;
1890 struct bpf_map *map;
1891 int err;
1892
1893 fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
1894 test->file);
1895
1896 err = file_has_btf_elf(test->file);
1897 if (err == -1)
1898 return err;
1899
1900 if (err == 0) {
1901 fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
1902 skip_cnt++;
1903 return 0;
1904 }
1905
1906 obj = bpf_object__open(test->file);
1907 if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
1908 return PTR_ERR(obj);
1909
1910 err = bpf_object__btf_fd(obj);
1911 if (CHECK(err == -1, "bpf_object__btf_fd: -1"))
1912 goto done;
1913
1914 prog = bpf_program__next(NULL, obj);
1915 if (CHECK(!prog, "Cannot find bpf_prog")) {
1916 err = -1;
1917 goto done;
1918 }
1919
1920 bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
1921 err = bpf_object__load(obj);
1922 if (CHECK(err < 0, "bpf_object__load: %d", err))
1923 goto done;
1924
1925 map = bpf_object__find_map_by_name(obj, "btf_map");
1926 if (CHECK(!map, "btf_map not found")) {
1927 err = -1;
1928 goto done;
1929 }
1930
1931 err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0)
1932 != test->btf_kv_notfound;
1933 if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u",
1934 bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map),
1935 test->btf_kv_notfound))
1936 goto done;
1937
1938 fprintf(stderr, "OK");
1939
1940done:
1941 bpf_object__close(obj);
1942 return err;
1943}
1944
1945static int test_file(void)
1946{
1947 unsigned int i;
1948 int err = 0;
1949
1950 if (args.file_test_num)
1951 return count_result(do_test_file(args.file_test_num));
1952
1953 for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
1954 err |= count_result(do_test_file(i));
1955
1956 return err;
1957}
1958
1959const char *pprint_enum_str[] = {
1960 "ENUM_ZERO",
1961 "ENUM_ONE",
1962 "ENUM_TWO",
1963 "ENUM_THREE",
1964};
1965
1966struct pprint_mapv {
1967 uint32_t ui32;
1968 uint16_t ui16;
1969 /* 2 bytes hole */
1970 int32_t si32;
1971 uint32_t unused_bits2a:2,
1972 bits28:28,
1973 unused_bits2b:2;
1974 union {
1975 uint64_t ui64;
1976 uint8_t ui8a[8];
1977 };
1978 enum {
1979 ENUM_ZERO,
1980 ENUM_ONE,
1981 ENUM_TWO,
1982 ENUM_THREE,
1983 } aenum;
1984};
1985
1986static struct btf_raw_test pprint_test = {
1987 .descr = "BTF pretty print test #1",
1988 .raw_types = {
1989 /* unsighed char */ /* [1] */
1990 BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1),
1991 /* unsigned short */ /* [2] */
1992 BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2),
1993 /* unsigned int */ /* [3] */
1994 BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),
1995 /* int */ /* [4] */
1996 BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
1997 /* unsigned long long */ /* [5] */
1998 BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8),
1999 /* 2 bits */ /* [6] */
2000 BTF_TYPE_INT_ENC(0, 0, 0, 2, 2),
2001 /* 28 bits */ /* [7] */
2002 BTF_TYPE_INT_ENC(0, 0, 0, 28, 4),
2003 /* uint8_t[8] */ /* [8] */
2004 BTF_TYPE_ARRAY_ENC(9, 1, 8),
2005 /* typedef unsigned char uint8_t */ /* [9] */
2006 BTF_TYPEDEF_ENC(NAME_TBD, 1),
2007 /* typedef unsigned short uint16_t */ /* [10] */
2008 BTF_TYPEDEF_ENC(NAME_TBD, 2),
2009 /* typedef unsigned int uint32_t */ /* [11] */
2010 BTF_TYPEDEF_ENC(NAME_TBD, 3),
2011 /* typedef int int32_t */ /* [12] */
2012 BTF_TYPEDEF_ENC(NAME_TBD, 4),
2013 /* typedef unsigned long long uint64_t *//* [13] */
2014 BTF_TYPEDEF_ENC(NAME_TBD, 5),
2015 /* union (anon) */ /* [14] */
2016 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8),
2017 BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */
2018 BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */
2019 /* enum (anon) */ /* [15] */
2020 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4),
2021 BTF_ENUM_ENC(NAME_TBD, 0),
2022 BTF_ENUM_ENC(NAME_TBD, 1),
2023 BTF_ENUM_ENC(NAME_TBD, 2),
2024 BTF_ENUM_ENC(NAME_TBD, 3),
2025 /* struct pprint_mapv */ /* [16] */
2026 BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 28),
2027 BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */
2028 BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */
2029 BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */
2030 BTF_MEMBER_ENC(NAME_TBD, 6, 96), /* unused_bits2a */
2031 BTF_MEMBER_ENC(NAME_TBD, 7, 98), /* bits28 */
2032 BTF_MEMBER_ENC(NAME_TBD, 6, 126), /* unused_bits2b */
2033 BTF_MEMBER_ENC(0, 14, 128), /* union (anon) */
2034 BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */
2035 BTF_END_RAW,
2036 },
2037 .str_sec = "\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum",
2038 .str_sec_size = sizeof("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"),
2039 .map_type = BPF_MAP_TYPE_ARRAY,
2040 .map_name = "pprint_test",
2041 .key_size = sizeof(unsigned int),
2042 .value_size = sizeof(struct pprint_mapv),
2043 .key_type_id = 3, /* unsigned int */
2044 .value_type_id = 16, /* struct pprint_mapv */
2045 .max_entries = 128 * 1024,
2046};
2047
2048static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i)
2049{
2050 v->ui32 = i;
2051 v->si32 = -i;
2052 v->unused_bits2a = 3;
2053 v->bits28 = i;
2054 v->unused_bits2b = 3;
2055 v->ui64 = i;
2056 v->aenum = i & 0x03;
2057}
2058
2059static int test_pprint(void)
2060{
2061 const struct btf_raw_test *test = &pprint_test;
2062 struct bpf_create_map_attr create_attr = {};
2063 int map_fd = -1, btf_fd = -1;
2064 struct pprint_mapv mapv = {};
2065 unsigned int raw_btf_size;
2066 char expected_line[255];
2067 FILE *pin_file = NULL;
2068 char pin_path[255];
2069 size_t line_len = 0;
2070 char *line = NULL;
2071 unsigned int key;
2072 uint8_t *raw_btf;
2073 ssize_t nread;
2074 int err, ret;
2075
2076 fprintf(stderr, "%s......", test->descr);
2077 raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
2078 test->str_sec, test->str_sec_size,
2079 &raw_btf_size);
2080
2081 if (!raw_btf)
2082 return -1;
2083
2084 *btf_log_buf = '\0';
2085 btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
2086 btf_log_buf, BTF_LOG_BUF_SIZE,
2087 args.always_log);
2088 free(raw_btf);
2089
2090 if (CHECK(btf_fd == -1, "errno:%d", errno)) {
2091 err = -1;
2092 goto done;
2093 }
2094
2095 create_attr.name = test->map_name;
2096 create_attr.map_type = test->map_type;
2097 create_attr.key_size = test->key_size;
2098 create_attr.value_size = test->value_size;
2099 create_attr.max_entries = test->max_entries;
2100 create_attr.btf_fd = btf_fd;
2101 create_attr.btf_key_type_id = test->key_type_id;
2102 create_attr.btf_value_type_id = test->value_type_id;
2103
2104 map_fd = bpf_create_map_xattr(&create_attr);
2105 if (CHECK(map_fd == -1, "errno:%d", errno)) {
2106 err = -1;
2107 goto done;
2108 }
2109
2110 ret = snprintf(pin_path, sizeof(pin_path), "%s/%s",
2111 "/sys/fs/bpf", test->map_name);
2112
2113 if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long",
2114 "/sys/fs/bpf", test->map_name)) {
2115 err = -1;
2116 goto done;
2117 }
2118
2119 err = bpf_obj_pin(map_fd, pin_path);
2120 if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno))
2121 goto done;
2122
2123 for (key = 0; key < test->max_entries; key++) {
2124 set_pprint_mapv(&mapv, key);
2125 bpf_map_update_elem(map_fd, &key, &mapv, 0);
2126 }
2127
2128 pin_file = fopen(pin_path, "r");
2129 if (CHECK(!pin_file, "fopen(%s): errno:%d", pin_path, errno)) {
2130 err = -1;
2131 goto done;
2132 }
2133
2134 /* Skip lines start with '#' */
2135 while ((nread = getline(&line, &line_len, pin_file)) > 0 &&
2136 *line == '#')
2137 ;
2138
2139 if (CHECK(nread <= 0, "Unexpected EOF")) {
2140 err = -1;
2141 goto done;
2142 }
2143
2144 key = 0;
2145 do {
2146 ssize_t nexpected_line;
2147
2148 set_pprint_mapv(&mapv, key);
2149 nexpected_line = snprintf(expected_line, sizeof(expected_line),
2150 "%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
2151 key,
2152 mapv.ui32, mapv.si32,
2153 mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b,
2154 mapv.ui64,
2155 mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3],
2156 mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7],
2157 pprint_enum_str[mapv.aenum]);
2158
2159 if (CHECK(nexpected_line == sizeof(expected_line),
2160 "expected_line is too long")) {
2161 err = -1;
2162 goto done;
2163 }
2164
2165 if (strcmp(expected_line, line)) {
2166 err = -1;
2167 fprintf(stderr, "unexpected pprint output\n");
2168 fprintf(stderr, "expected: %s", expected_line);
2169 fprintf(stderr, " read: %s", line);
2170 goto done;
2171 }
2172
2173 nread = getline(&line, &line_len, pin_file);
2174 } while (++key < test->max_entries && nread > 0);
2175
2176 if (CHECK(key < test->max_entries,
2177 "Unexpected EOF. key:%u test->max_entries:%u",
2178 key, test->max_entries)) {
2179 err = -1;
2180 goto done;
2181 }
2182
2183 if (CHECK(nread > 0, "Unexpected extra pprint output: %s", line)) {
2184 err = -1;
2185 goto done;
2186 }
2187
2188 err = 0;
2189
2190done:
2191 if (!err)
2192 fprintf(stderr, "OK");
2193 if (*btf_log_buf && (err || args.always_log))
2194 fprintf(stderr, "\n%s", btf_log_buf);
2195 if (btf_fd != -1)
2196 close(btf_fd);
2197 if (map_fd != -1)
2198 close(map_fd);
2199 if (pin_file)
2200 fclose(pin_file);
2201 unlink(pin_path);
2202 free(line);
2203
2204 return err;
2205}
2206
2207static void usage(const char *cmd)
2208{
2209 fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n",
2210 cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
2211 ARRAY_SIZE(file_tests));
2212}
2213
2214static int parse_args(int argc, char **argv)
2215{
2216 const char *optstr = "lpf:r:g:";
2217 int opt;
2218
2219 while ((opt = getopt(argc, argv, optstr)) != -1) {
2220 switch (opt) {
2221 case 'l':
2222 args.always_log = true;
2223 break;
2224 case 'f':
2225 args.file_test_num = atoi(optarg);
2226 args.file_test = true;
2227 break;
2228 case 'r':
2229 args.raw_test_num = atoi(optarg);
2230 args.raw_test = true;
2231 break;
2232 case 'g':
2233 args.get_info_test_num = atoi(optarg);
2234 args.get_info_test = true;
2235 break;
2236 case 'p':
2237 args.pprint_test = true;
2238 break;
2239 case 'h':
2240 usage(argv[0]);
2241 exit(0);
2242 default:
2243 usage(argv[0]);
2244 return -1;
2245 }
2246 }
2247
2248 if (args.raw_test_num &&
2249 (args.raw_test_num < 1 ||
2250 args.raw_test_num > ARRAY_SIZE(raw_tests))) {
2251 fprintf(stderr, "BTF raw test number must be [1 - %zu]\n",
2252 ARRAY_SIZE(raw_tests));
2253 return -1;
2254 }
2255
2256 if (args.file_test_num &&
2257 (args.file_test_num < 1 ||
2258 args.file_test_num > ARRAY_SIZE(file_tests))) {
2259 fprintf(stderr, "BTF file test number must be [1 - %zu]\n",
2260 ARRAY_SIZE(file_tests));
2261 return -1;
2262 }
2263
2264 if (args.get_info_test_num &&
2265 (args.get_info_test_num < 1 ||
2266 args.get_info_test_num > ARRAY_SIZE(get_info_tests))) {
2267 fprintf(stderr, "BTF get info test number must be [1 - %zu]\n",
2268 ARRAY_SIZE(get_info_tests));
2269 return -1;
2270 }
2271
2272 return 0;
2273}
2274
2275static void print_summary(void)
2276{
2277 fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n",
2278 pass_cnt - skip_cnt, skip_cnt, error_cnt);
2279}
2280
2281int main(int argc, char **argv)
2282{
2283 int err = 0;
2284
2285 err = parse_args(argc, argv);
2286 if (err)
2287 return err;
2288
2289 if (args.always_log)
2290 libbpf_set_print(__base_pr, __base_pr, __base_pr);
2291
2292 if (args.raw_test)
2293 err |= test_raw();
2294
2295 if (args.get_info_test)
2296 err |= test_get_info();
2297
2298 if (args.file_test)
2299 err |= test_file();
2300
2301 if (args.pprint_test)
2302 err |= count_result(test_pprint());
2303
2304 if (args.raw_test || args.get_info_test || args.file_test ||
2305 args.pprint_test)
2306 goto done;
2307
2308 err |= test_raw();
2309 err |= test_get_info();
2310 err |= test_file();
2311
2312done:
2313 print_summary();
2314 return err;
2315}
diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c
new file mode 100644
index 000000000000..8c7ca096ecf2
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_haskv.c
@@ -0,0 +1,48 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/* Copyright (c) 2018 Facebook */
3#include <linux/bpf.h>
4#include "bpf_helpers.h"
5
6int _version SEC("version") = 1;
7
8struct ipv_counts {
9 unsigned int v4;
10 unsigned int v6;
11};
12
13typedef int btf_map_key;
14typedef struct ipv_counts btf_map_value;
15btf_map_key dumm_key;
16btf_map_value dummy_value;
17
18struct bpf_map_def SEC("maps") btf_map = {
19 .type = BPF_MAP_TYPE_ARRAY,
20 .key_size = sizeof(int),
21 .value_size = sizeof(struct ipv_counts),
22 .max_entries = 4,
23};
24
25struct dummy_tracepoint_args {
26 unsigned long long pad;
27 struct sock *sock;
28};
29
30SEC("dummy_tracepoint")
31int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
32{
33 struct ipv_counts *counts;
34 int key = 0;
35
36 if (!arg->sock)
37 return 0;
38
39 counts = bpf_map_lookup_elem(&btf_map, &key);
40 if (!counts)
41 return 0;
42
43 counts->v6++;
44
45 return 0;
46}
47
48char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c
new file mode 100644
index 000000000000..0ed8e088eebf
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_nokv.c
@@ -0,0 +1,43 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/* Copyright (c) 2018 Facebook */
3#include <linux/bpf.h>
4#include "bpf_helpers.h"
5
6int _version SEC("version") = 1;
7
8struct ipv_counts {
9 unsigned int v4;
10 unsigned int v6;
11};
12
13struct bpf_map_def SEC("maps") btf_map = {
14 .type = BPF_MAP_TYPE_ARRAY,
15 .key_size = sizeof(int),
16 .value_size = sizeof(struct ipv_counts),
17 .max_entries = 4,
18};
19
20struct dummy_tracepoint_args {
21 unsigned long long pad;
22 struct sock *sock;
23};
24
25SEC("dummy_tracepoint")
26int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
27{
28 struct ipv_counts *counts;
29 int key = 0;
30
31 if (!arg->sock)
32 return 0;
33
34 counts = bpf_map_lookup_elem(&btf_map, &key);
35 if (!counts)
36 return 0;
37
38 counts->v6++;
39
40 return 0;
41}
42
43char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
new file mode 100644
index 000000000000..f6d9f238e00a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
@@ -0,0 +1,102 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#include <linux/bpf.h>
4#include "bpf_helpers.h"
5
6/* Permit pretty deep stack traces */
7#define MAX_STACK_RAWTP 100
8struct stack_trace_t {
9 int pid;
10 int kern_stack_size;
11 int user_stack_size;
12 int user_stack_buildid_size;
13 __u64 kern_stack[MAX_STACK_RAWTP];
14 __u64 user_stack[MAX_STACK_RAWTP];
15 struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
16};
17
18struct bpf_map_def SEC("maps") perfmap = {
19 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
20 .key_size = sizeof(int),
21 .value_size = sizeof(__u32),
22 .max_entries = 2,
23};
24
25struct bpf_map_def SEC("maps") stackdata_map = {
26 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
27 .key_size = sizeof(__u32),
28 .value_size = sizeof(struct stack_trace_t),
29 .max_entries = 1,
30};
31
32/* Allocate per-cpu space twice the needed. For the code below
33 * usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
34 * if (usize < 0)
35 * return 0;
36 * ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
37 *
38 * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64),
39 * verifier will complain that access "raw_data + usize"
40 * with size "max_len - usize" may be out of bound.
41 * The maximum "raw_data + usize" is "raw_data + max_len"
42 * and the maximum "max_len - usize" is "max_len", verifier
43 * concludes that the maximum buffer access range is
44 * "raw_data[0...max_len * 2 - 1]" and hence reject the program.
45 *
46 * Doubling the to-be-used max buffer size can fix this verifier
47 * issue and avoid complicated C programming massaging.
48 * This is an acceptable workaround since there is one entry here.
49 */
50struct bpf_map_def SEC("maps") rawdata_map = {
51 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
52 .key_size = sizeof(__u32),
53 .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
54 .max_entries = 1,
55};
56
57SEC("tracepoint/raw_syscalls/sys_enter")
58int bpf_prog1(void *ctx)
59{
60 int max_len, max_buildid_len, usize, ksize, total_size;
61 struct stack_trace_t *data;
62 void *raw_data;
63 __u32 key = 0;
64
65 data = bpf_map_lookup_elem(&stackdata_map, &key);
66 if (!data)
67 return 0;
68
69 max_len = MAX_STACK_RAWTP * sizeof(__u64);
70 max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id);
71 data->pid = bpf_get_current_pid_tgid();
72 data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack,
73 max_len, 0);
74 data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len,
75 BPF_F_USER_STACK);
76 data->user_stack_buildid_size = bpf_get_stack(
77 ctx, data->user_stack_buildid, max_buildid_len,
78 BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
79 bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data));
80
81 /* write both kernel and user stacks to the same buffer */
82 raw_data = bpf_map_lookup_elem(&rawdata_map, &key);
83 if (!raw_data)
84 return 0;
85
86 usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
87 if (usize < 0)
88 return 0;
89
90 ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
91 if (ksize < 0)
92 return 0;
93
94 total_size = usize + ksize;
95 if (total_size > 0 && total_size <= max_len)
96 bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size);
97
98 return 0;
99}
100
101char _license[] SEC("license") = "GPL";
102__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
new file mode 100755
index 000000000000..ce2e15e4f976
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
@@ -0,0 +1,28 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4GREEN='\033[0;92m'
5RED='\033[0;31m'
6NC='\033[0m' # No Color
7
8modprobe rc-loopback
9
10for i in /sys/class/rc/rc*
11do
12 if grep -q DRV_NAME=rc-loopback $i/uevent
13 then
14 LIRCDEV=$(grep DEVNAME= $i/lirc*/uevent | sed sQDEVNAME=Q/dev/Q)
15 fi
16done
17
18if [ -n $LIRCDEV ];
19then
20 TYPE=lirc_mode2
21 ./test_lirc_mode2_user $LIRCDEV
22 ret=$?
23 if [ $ret -ne 0 ]; then
24 echo -e ${RED}"FAIL: $TYPE"${NC}
25 else
26 echo -e ${GREEN}"PASS: $TYPE"${NC}
27 fi
28fi
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
new file mode 100644
index 000000000000..ba26855563a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
@@ -0,0 +1,23 @@
1// SPDX-License-Identifier: GPL-2.0
2// test ir decoder
3//
4// Copyright (C) 2018 Sean Young <sean@mess.org>
5
6#include <linux/bpf.h>
7#include <linux/lirc.h>
8#include "bpf_helpers.h"
9
10SEC("lirc_mode2")
11int bpf_decoder(unsigned int *sample)
12{
13 if (LIRC_IS_PULSE(*sample)) {
14 unsigned int duration = LIRC_VALUE(*sample);
15
16 if (duration & 0x10000)
17 bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0);
18 }
19
20 return 0;
21}
22
23char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
new file mode 100644
index 000000000000..d470d63c33db
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -0,0 +1,149 @@
1// SPDX-License-Identifier: GPL-2.0
2// test ir decoder
3//
4// Copyright (C) 2018 Sean Young <sean@mess.org>
5
6// A lirc chardev is a device representing a consumer IR (cir) device which
7// can receive infrared signals from remote control and/or transmit IR.
8//
9// IR is sent as a series of pulses and space somewhat like morse code. The
10// BPF program can decode this into scancodes so that rc-core can translate
11// this into input key codes using the rc keymap.
12//
13// This test works by sending IR over rc-loopback, so the IR is processed by
14// BPF and then decoded into scancodes. The lirc chardev must be the one
15// associated with rc-loopback, see the output of ir-keytable(1).
16//
17// The following CONFIG options must be enabled for the test to succeed:
18// CONFIG_RC_CORE=y
19// CONFIG_BPF_RAWIR_EVENT=y
20// CONFIG_RC_LOOPBACK=y
21
22// Steps:
23// 1. Open the /dev/lircN device for rc-loopback (given on command line)
24// 2. Attach bpf_lirc_mode2 program which decodes some IR.
25// 3. Send some IR to the same IR device; since it is loopback, this will
26// end up in the bpf program
27// 4. bpf program should decode IR and report keycode
28// 5. We can read keycode from same /dev/lirc device
29
30#include <linux/bpf.h>
31#include <linux/lirc.h>
32#include <errno.h>
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36#include <unistd.h>
37#include <poll.h>
38#include <sys/types.h>
39#include <sys/ioctl.h>
40#include <sys/stat.h>
41#include <fcntl.h>
42
43#include "bpf_util.h"
44#include <bpf/bpf.h>
45#include <bpf/libbpf.h>
46
47int main(int argc, char **argv)
48{
49 struct bpf_object *obj;
50 int ret, lircfd, progfd, mode;
51 int testir = 0x1dead;
52 u32 prog_ids[10], prog_flags[10], prog_cnt;
53
54 if (argc != 2) {
55 printf("Usage: %s /dev/lircN\n", argv[0]);
56 return 2;
57 }
58
59 ret = bpf_prog_load("test_lirc_mode2_kern.o",
60 BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
61 if (ret) {
62 printf("Failed to load bpf program\n");
63 return 1;
64 }
65
66 lircfd = open(argv[1], O_RDWR | O_NONBLOCK);
67 if (lircfd == -1) {
68 printf("failed to open lirc device %s: %m\n", argv[1]);
69 return 1;
70 }
71
72 /* Let's try detach it before it was ever attached */
73 ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
74 if (ret != -1 || errno != ENOENT) {
75 printf("bpf_prog_detach2 not attached should fail: %m\n");
76 return 1;
77 }
78
79 mode = LIRC_MODE_SCANCODE;
80 if (ioctl(lircfd, LIRC_SET_REC_MODE, &mode)) {
81 printf("failed to set rec mode: %m\n");
82 return 1;
83 }
84
85 prog_cnt = 10;
86 ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
87 &prog_cnt);
88 if (ret) {
89 printf("Failed to query bpf programs on lirc device: %m\n");
90 return 1;
91 }
92
93 if (prog_cnt != 0) {
94 printf("Expected nothing to be attached\n");
95 return 1;
96 }
97
98 ret = bpf_prog_attach(progfd, lircfd, BPF_LIRC_MODE2, 0);
99 if (ret) {
100 printf("Failed to attach bpf to lirc device: %m\n");
101 return 1;
102 }
103
104 /* Write raw IR */
105 ret = write(lircfd, &testir, sizeof(testir));
106 if (ret != sizeof(testir)) {
107 printf("Failed to send test IR message: %m\n");
108 return 1;
109 }
110
111 struct pollfd pfd = { .fd = lircfd, .events = POLLIN };
112 struct lirc_scancode lsc;
113
114 poll(&pfd, 1, 100);
115
116 /* Read decoded IR */
117 ret = read(lircfd, &lsc, sizeof(lsc));
118 if (ret != sizeof(lsc)) {
119 printf("Failed to read decoded IR: %m\n");
120 return 1;
121 }
122
123 if (lsc.scancode != 0xdead || lsc.rc_proto != 64) {
124 printf("Incorrect scancode decoded\n");
125 return 1;
126 }
127
128 prog_cnt = 10;
129 ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
130 &prog_cnt);
131 if (ret) {
132 printf("Failed to query bpf programs on lirc device: %m\n");
133 return 1;
134 }
135
136 if (prog_cnt != 1) {
137 printf("Expected one program to be attached\n");
138 return 1;
139 }
140
141 /* Let's try detaching it now it is actually attached */
142 ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
143 if (ret) {
144 printf("bpf_prog_detach2: returned %m\n");
145 return 1;
146 }
147
148 return 0;
149}
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.c b/tools/testing/selftests/bpf/test_lwt_seg6local.c
new file mode 100644
index 000000000000..0575751bc1bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.c
@@ -0,0 +1,437 @@
1#include <stddef.h>
2#include <inttypes.h>
3#include <errno.h>
4#include <linux/seg6_local.h>
5#include <linux/bpf.h>
6#include "bpf_helpers.h"
7#include "bpf_endian.h"
8
9#define bpf_printk(fmt, ...) \
10({ \
11 char ____fmt[] = fmt; \
12 bpf_trace_printk(____fmt, sizeof(____fmt), \
13 ##__VA_ARGS__); \
14})
15
16/* Packet parsing state machine helpers. */
17#define cursor_advance(_cursor, _len) \
18 ({ void *_tmp = _cursor; _cursor += _len; _tmp; })
19
20#define SR6_FLAG_ALERT (1 << 4)
21
22#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
23 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
24#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
25 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
26#define BPF_PACKET_HEADER __attribute__((packed))
27
28struct ip6_t {
29 unsigned int ver:4;
30 unsigned int priority:8;
31 unsigned int flow_label:20;
32 unsigned short payload_len;
33 unsigned char next_header;
34 unsigned char hop_limit;
35 unsigned long long src_hi;
36 unsigned long long src_lo;
37 unsigned long long dst_hi;
38 unsigned long long dst_lo;
39} BPF_PACKET_HEADER;
40
41struct ip6_addr_t {
42 unsigned long long hi;
43 unsigned long long lo;
44} BPF_PACKET_HEADER;
45
46struct ip6_srh_t {
47 unsigned char nexthdr;
48 unsigned char hdrlen;
49 unsigned char type;
50 unsigned char segments_left;
51 unsigned char first_segment;
52 unsigned char flags;
53 unsigned short tag;
54
55 struct ip6_addr_t segments[0];
56} BPF_PACKET_HEADER;
57
58struct sr6_tlv_t {
59 unsigned char type;
60 unsigned char len;
61 unsigned char value[0];
62} BPF_PACKET_HEADER;
63
64__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
65{
66 void *cursor, *data_end;
67 struct ip6_srh_t *srh;
68 struct ip6_t *ip;
69 uint8_t *ipver;
70
71 data_end = (void *)(long)skb->data_end;
72 cursor = (void *)(long)skb->data;
73 ipver = (uint8_t *)cursor;
74
75 if ((void *)ipver + sizeof(*ipver) > data_end)
76 return NULL;
77
78 if ((*ipver >> 4) != 6)
79 return NULL;
80
81 ip = cursor_advance(cursor, sizeof(*ip));
82 if ((void *)ip + sizeof(*ip) > data_end)
83 return NULL;
84
85 if (ip->next_header != 43)
86 return NULL;
87
88 srh = cursor_advance(cursor, sizeof(*srh));
89 if ((void *)srh + sizeof(*srh) > data_end)
90 return NULL;
91
92 if (srh->type != 4)
93 return NULL;
94
95 return srh;
96}
97
98__attribute__((always_inline))
99int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
100 uint32_t old_pad, uint32_t pad_off)
101{
102 int err;
103
104 if (new_pad != old_pad) {
105 err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
106 (int) new_pad - (int) old_pad);
107 if (err)
108 return err;
109 }
110
111 if (new_pad > 0) {
112 char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
113 0, 0, 0};
114 struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
115
116 pad_tlv->type = SR6_TLV_PADDING;
117 pad_tlv->len = new_pad - 2;
118
119 err = bpf_lwt_seg6_store_bytes(skb, pad_off,
120 (void *)pad_tlv_buf, new_pad);
121 if (err)
122 return err;
123 }
124
125 return 0;
126}
127
128__attribute__((always_inline))
129int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
130 uint32_t *tlv_off, uint32_t *pad_size,
131 uint32_t *pad_off)
132{
133 uint32_t srh_off, cur_off;
134 int offset_valid = 0;
135 int err;
136
137 srh_off = (char *)srh - (char *)(long)skb->data;
138 // cur_off = end of segments, start of possible TLVs
139 cur_off = srh_off + sizeof(*srh) +
140 sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
141
142 *pad_off = 0;
143
144 // we can only go as far as ~10 TLVs due to the BPF max stack size
145 #pragma clang loop unroll(full)
146 for (int i = 0; i < 10; i++) {
147 struct sr6_tlv_t tlv;
148
149 if (cur_off == *tlv_off)
150 offset_valid = 1;
151
152 if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
153 break;
154
155 err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
156 if (err)
157 return err;
158
159 if (tlv.type == SR6_TLV_PADDING) {
160 *pad_size = tlv.len + sizeof(tlv);
161 *pad_off = cur_off;
162
163 if (*tlv_off == srh_off) {
164 *tlv_off = cur_off;
165 offset_valid = 1;
166 }
167 break;
168
169 } else if (tlv.type == SR6_TLV_HMAC) {
170 break;
171 }
172
173 cur_off += sizeof(tlv) + tlv.len;
174 } // we reached the padding or HMAC TLVs, or the end of the SRH
175
176 if (*pad_off == 0)
177 *pad_off = cur_off;
178
179 if (*tlv_off == -1)
180 *tlv_off = cur_off;
181 else if (!offset_valid)
182 return -EINVAL;
183
184 return 0;
185}
186
187__attribute__((always_inline))
188int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
189 struct sr6_tlv_t *itlv, uint8_t tlv_size)
190{
191 uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
192 uint8_t len_remaining, new_pad;
193 uint32_t pad_off = 0;
194 uint32_t pad_size = 0;
195 uint32_t partial_srh_len;
196 int err;
197
198 if (tlv_off != -1)
199 tlv_off += srh_off;
200
201 if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
202 return -EINVAL;
203
204 err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
205 if (err)
206 return err;
207
208 err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
209 if (err)
210 return err;
211
212 err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
213 if (err)
214 return err;
215
216 // the following can't be moved inside update_tlv_pad because the
217 // bpf verifier has some issues with it
218 pad_off += sizeof(*itlv) + itlv->len;
219 partial_srh_len = pad_off - srh_off;
220 len_remaining = partial_srh_len % 8;
221 new_pad = 8 - len_remaining;
222
223 if (new_pad == 1) // cannot pad for 1 byte only
224 new_pad = 9;
225 else if (new_pad == 8)
226 new_pad = 0;
227
228 return update_tlv_pad(skb, new_pad, pad_size, pad_off);
229}
230
231__attribute__((always_inline))
232int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
233 uint32_t tlv_off)
234{
235 uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
236 uint8_t len_remaining, new_pad;
237 uint32_t partial_srh_len;
238 uint32_t pad_off = 0;
239 uint32_t pad_size = 0;
240 struct sr6_tlv_t tlv;
241 int err;
242
243 tlv_off += srh_off;
244
245 err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
246 if (err)
247 return err;
248
249 err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
250 if (err)
251 return err;
252
253 err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
254 if (err)
255 return err;
256
257 pad_off -= sizeof(tlv) + tlv.len;
258 partial_srh_len = pad_off - srh_off;
259 len_remaining = partial_srh_len % 8;
260 new_pad = 8 - len_remaining;
261 if (new_pad == 1) // cannot pad for 1 byte only
262 new_pad = 9;
263 else if (new_pad == 8)
264 new_pad = 0;
265
266 return update_tlv_pad(skb, new_pad, pad_size, pad_off);
267}
268
269__attribute__((always_inline))
270int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
271{
272 int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
273 ((srh->first_segment + 1) << 4);
274 struct sr6_tlv_t tlv;
275
276 if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
277 return 0;
278
279 if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
280 struct ip6_addr_t egr_addr;
281
282 if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
283 return 0;
284
285 // check if egress TLV value is correct
286 if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
287 ntohll(egr_addr.lo) == 0x4)
288 return 1;
289 }
290
291 return 0;
292}
293
294// This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
295// fd00::4
296SEC("encap_srh")
297int __encap_srh(struct __sk_buff *skb)
298{
299 unsigned long long hi = 0xfd00000000000000;
300 struct ip6_addr_t *seg;
301 struct ip6_srh_t *srh;
302 char srh_buf[72]; // room for 4 segments
303 int err;
304
305 srh = (struct ip6_srh_t *)srh_buf;
306 srh->nexthdr = 0;
307 srh->hdrlen = 8;
308 srh->type = 4;
309 srh->segments_left = 3;
310 srh->first_segment = 3;
311 srh->flags = 0;
312 srh->tag = 0;
313
314 seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
315
316 #pragma clang loop unroll(full)
317 for (unsigned long long lo = 0; lo < 4; lo++) {
318 seg->lo = htonll(4 - lo);
319 seg->hi = htonll(hi);
320 seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
321 }
322
323 err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
324 if (err)
325 return BPF_DROP;
326
327 return BPF_REDIRECT;
328}
329
330// Add an Egress TLV fc00::4, add the flag A,
331// and apply End.X action to fc42::1
332SEC("add_egr_x")
333int __add_egr_x(struct __sk_buff *skb)
334{
335 unsigned long long hi = 0xfc42000000000000;
336 unsigned long long lo = 0x1;
337 struct ip6_srh_t *srh = get_srh(skb);
338 uint8_t new_flags = SR6_FLAG_ALERT;
339 struct ip6_addr_t addr;
340 int err, offset;
341
342 if (srh == NULL)
343 return BPF_DROP;
344
345 uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
346 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
347
348 err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
349 (struct sr6_tlv_t *)&tlv, 20);
350 if (err)
351 return BPF_DROP;
352
353 offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
354 err = bpf_lwt_seg6_store_bytes(skb, offset,
355 (void *)&new_flags, sizeof(new_flags));
356 if (err)
357 return BPF_DROP;
358
359 addr.lo = htonll(lo);
360 addr.hi = htonll(hi);
361 err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
362 (void *)&addr, sizeof(addr));
363 if (err)
364 return BPF_DROP;
365 return BPF_REDIRECT;
366}
367
368// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
369// simple End action
370SEC("pop_egr")
371int __pop_egr(struct __sk_buff *skb)
372{
373 struct ip6_srh_t *srh = get_srh(skb);
374 uint16_t new_tag = bpf_htons(2442);
375 uint8_t new_flags = 0;
376 int err, offset;
377
378 if (srh == NULL)
379 return BPF_DROP;
380
381 if (srh->flags != SR6_FLAG_ALERT)
382 return BPF_DROP;
383
384 if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
385 return BPF_DROP;
386
387 if (!has_egr_tlv(skb, srh))
388 return BPF_DROP;
389
390 err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
391 if (err)
392 return BPF_DROP;
393
394 offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
395 if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
396 sizeof(new_flags)))
397 return BPF_DROP;
398
399 offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
400 if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
401 sizeof(new_tag)))
402 return BPF_DROP;
403
404 return BPF_OK;
405}
406
407// Inspect if the Egress TLV and flag have been removed, if the tag is correct,
408// then apply a End.T action to reach the last segment
409SEC("inspect_t")
410int __inspect_t(struct __sk_buff *skb)
411{
412 struct ip6_srh_t *srh = get_srh(skb);
413 int table = 117;
414 int err;
415
416 if (srh == NULL)
417 return BPF_DROP;
418
419 if (srh->flags != 0)
420 return BPF_DROP;
421
422 if (srh->tag != bpf_htons(2442))
423 return BPF_DROP;
424
425 if (srh->hdrlen != 8) // 4 segments
426 return BPF_DROP;
427
428 err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
429 (void *)&table, sizeof(table));
430
431 if (err)
432 return BPF_DROP;
433
434 return BPF_REDIRECT;
435}
436
437char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
new file mode 100755
index 000000000000..1c77994b5e71
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -0,0 +1,140 @@
1#!/bin/bash
2# Connects 6 network namespaces through veths.
3# Each NS may have different IPv6 global scope addresses :
4# NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
5# fb00::1 fd00::1 fd00::2 fd00::3 fb00::6
6# fc42::1 fd00::4
7#
8# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
9# IPv6 header with a Segment Routing Header, with segments :
10# fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
11#
12# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
13# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
14# - fd00::2 : remove the TLV, change the flags, add a tag
15# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
16#
17# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
18# Each End.BPF action will validate the operations applied on the SRH by the
19# previous BPF program in the chain, otherwise the packet is dropped.
20#
21# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
22# datagram can be read on NS6 when binding to fb00::6.
23
24TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
25
26cleanup()
27{
28 if [ "$?" = "0" ]; then
29 echo "selftests: test_lwt_seg6local [PASS]";
30 else
31 echo "selftests: test_lwt_seg6local [FAILED]";
32 fi
33
34 set +e
35 ip netns del ns1 2> /dev/null
36 ip netns del ns2 2> /dev/null
37 ip netns del ns3 2> /dev/null
38 ip netns del ns4 2> /dev/null
39 ip netns del ns5 2> /dev/null
40 ip netns del ns6 2> /dev/null
41 rm -f $TMP_FILE
42}
43
44set -e
45
46ip netns add ns1
47ip netns add ns2
48ip netns add ns3
49ip netns add ns4
50ip netns add ns5
51ip netns add ns6
52
53trap cleanup 0 2 3 6 9
54
55ip link add veth1 type veth peer name veth2
56ip link add veth3 type veth peer name veth4
57ip link add veth5 type veth peer name veth6
58ip link add veth7 type veth peer name veth8
59ip link add veth9 type veth peer name veth10
60
61ip link set veth1 netns ns1
62ip link set veth2 netns ns2
63ip link set veth3 netns ns2
64ip link set veth4 netns ns3
65ip link set veth5 netns ns3
66ip link set veth6 netns ns4
67ip link set veth7 netns ns4
68ip link set veth8 netns ns5
69ip link set veth9 netns ns5
70ip link set veth10 netns ns6
71
72ip netns exec ns1 ip link set dev veth1 up
73ip netns exec ns2 ip link set dev veth2 up
74ip netns exec ns2 ip link set dev veth3 up
75ip netns exec ns3 ip link set dev veth4 up
76ip netns exec ns3 ip link set dev veth5 up
77ip netns exec ns4 ip link set dev veth6 up
78ip netns exec ns4 ip link set dev veth7 up
79ip netns exec ns5 ip link set dev veth8 up
80ip netns exec ns5 ip link set dev veth9 up
81ip netns exec ns6 ip link set dev veth10 up
82ip netns exec ns6 ip link set dev lo up
83
84# All link scope addresses and routes required between veths
85ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
86ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
87ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
88ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
89ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
90ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
91ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
92ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
93ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
94ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
95ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
96ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
97ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
98ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
99ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
100ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
101
102ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
103ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
104
105ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
106ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
107
108ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
109ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF obj test_lwt_seg6local.o sec add_egr_x dev veth4
110
111ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF obj test_lwt_seg6local.o sec pop_egr dev veth6
112ip netns exec ns4 ip -6 addr add fc42::1 dev lo
113ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
114
115ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
116ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF obj test_lwt_seg6local.o sec inspect_t dev veth8
117
118ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
119ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
120
121ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
122ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
123ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
124ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
125ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
126
127ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
128ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
129ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
130
131ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
132ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
133sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
134kill -INT $!
135
136if [[ $(< $TMP_FILE) != "foobar" ]]; then
137 exit 1
138fi
139
140exit 0
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index e78aad0a68bb..be800d0e7a84 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -163,6 +163,10 @@ def bpftool(args, JSON=True, ns="", fail=True):
163 163
164def bpftool_prog_list(expected=None, ns=""): 164def bpftool_prog_list(expected=None, ns=""):
165 _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True) 165 _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
166 # Remove the base progs
167 for p in base_progs:
168 if p in progs:
169 progs.remove(p)
166 if expected is not None: 170 if expected is not None:
167 if len(progs) != expected: 171 if len(progs) != expected:
168 fail(True, "%d BPF programs loaded, expected %d" % 172 fail(True, "%d BPF programs loaded, expected %d" %
@@ -171,6 +175,10 @@ def bpftool_prog_list(expected=None, ns=""):
171 175
172def bpftool_map_list(expected=None, ns=""): 176def bpftool_map_list(expected=None, ns=""):
173 _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) 177 _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
178 # Remove the base maps
179 for m in base_maps:
180 if m in maps:
181 maps.remove(m)
174 if expected is not None: 182 if expected is not None:
175 if len(maps) != expected: 183 if len(maps) != expected:
176 fail(True, "%d BPF maps loaded, expected %d" % 184 fail(True, "%d BPF maps loaded, expected %d" %
@@ -585,8 +593,8 @@ skip(os.getuid() != 0, "test must be run as root")
585# Check tools 593# Check tools
586ret, progs = bpftool("prog", fail=False) 594ret, progs = bpftool("prog", fail=False)
587skip(ret != 0, "bpftool not installed") 595skip(ret != 0, "bpftool not installed")
588# Check no BPF programs are loaded 596base_progs = progs
589skip(len(progs) != 0, "BPF programs already loaded on the system") 597_, base_maps = bpftool("map")
590 598
591# Check netdevsim 599# Check netdevsim
592ret, out = cmd("modprobe netdevsim", fail=False) 600ret, out = cmd("modprobe netdevsim", fail=False)
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index faadbe233966..0ef68204c84b 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -38,8 +38,10 @@ typedef __u16 __sum16;
38#include "bpf_util.h" 38#include "bpf_util.h"
39#include "bpf_endian.h" 39#include "bpf_endian.h"
40#include "bpf_rlimit.h" 40#include "bpf_rlimit.h"
41#include "trace_helpers.h"
41 42
42static int error_cnt, pass_cnt; 43static int error_cnt, pass_cnt;
44static bool jit_enabled;
43 45
44#define MAGIC_BYTES 123 46#define MAGIC_BYTES 123
45 47
@@ -166,6 +168,37 @@ out:
166 bpf_object__close(obj); 168 bpf_object__close(obj);
167} 169}
168 170
171static void test_xdp_adjust_tail(void)
172{
173 const char *file = "./test_adjust_tail.o";
174 struct bpf_object *obj;
175 char buf[128];
176 __u32 duration, retval, size;
177 int err, prog_fd;
178
179 err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
180 if (err) {
181 error_cnt++;
182 return;
183 }
184
185 err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
186 buf, &size, &retval, &duration);
187
188 CHECK(err || errno || retval != XDP_DROP,
189 "ipv4", "err %d errno %d retval %d size %d\n",
190 err, errno, retval, size);
191
192 err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
193 buf, &size, &retval, &duration);
194 CHECK(err || errno || retval != XDP_TX || size != 54,
195 "ipv6", "err %d errno %d retval %d size %d\n",
196 err, errno, retval, size);
197 bpf_object__close(obj);
198}
199
200
201
169#define MAGIC_VAL 0x1234 202#define MAGIC_VAL 0x1234
170#define NUM_ITER 100000 203#define NUM_ITER 100000
171#define VIP_NUM 5 204#define VIP_NUM 5
@@ -360,13 +393,30 @@ static inline __u64 ptr_to_u64(const void *ptr)
360 return (__u64) (unsigned long) ptr; 393 return (__u64) (unsigned long) ptr;
361} 394}
362 395
396static bool is_jit_enabled(void)
397{
398 const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
399 bool enabled = false;
400 int sysctl_fd;
401
402 sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
403 if (sysctl_fd != -1) {
404 char tmpc;
405
406 if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
407 enabled = (tmpc != '0');
408 close(sysctl_fd);
409 }
410
411 return enabled;
412}
413
363static void test_bpf_obj_id(void) 414static void test_bpf_obj_id(void)
364{ 415{
365 const __u64 array_magic_value = 0xfaceb00c; 416 const __u64 array_magic_value = 0xfaceb00c;
366 const __u32 array_key = 0; 417 const __u32 array_key = 0;
367 const int nr_iters = 2; 418 const int nr_iters = 2;
368 const char *file = "./test_obj_id.o"; 419 const char *file = "./test_obj_id.o";
369 const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
370 const char *expected_prog_name = "test_obj_id"; 420 const char *expected_prog_name = "test_obj_id";
371 const char *expected_map_name = "test_map_id"; 421 const char *expected_map_name = "test_map_id";
372 const __u64 nsec_per_sec = 1000000000; 422 const __u64 nsec_per_sec = 1000000000;
@@ -383,20 +433,11 @@ static void test_bpf_obj_id(void)
383 char jited_insns[128], xlated_insns[128], zeros[128]; 433 char jited_insns[128], xlated_insns[128], zeros[128];
384 __u32 i, next_id, info_len, nr_id_found, duration = 0; 434 __u32 i, next_id, info_len, nr_id_found, duration = 0;
385 struct timespec real_time_ts, boot_time_ts; 435 struct timespec real_time_ts, boot_time_ts;
386 int sysctl_fd, jit_enabled = 0, err = 0; 436 int err = 0;
387 __u64 array_value; 437 __u64 array_value;
388 uid_t my_uid = getuid(); 438 uid_t my_uid = getuid();
389 time_t now, load_time; 439 time_t now, load_time;
390 440
391 sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
392 if (sysctl_fd != -1) {
393 char tmpc;
394
395 if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
396 jit_enabled = (tmpc != '0');
397 close(sysctl_fd);
398 }
399
400 err = bpf_prog_get_fd_by_id(0); 441 err = bpf_prog_get_fd_by_id(0);
401 CHECK(err >= 0 || errno != ENOENT, 442 CHECK(err >= 0 || errno != ENOENT,
402 "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); 443 "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
@@ -865,11 +906,47 @@ static int compare_map_keys(int map1_fd, int map2_fd)
865 return 0; 906 return 0;
866} 907}
867 908
909static int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len)
910{
911 __u32 key, next_key, *cur_key_p, *next_key_p;
912 char *val_buf1, *val_buf2;
913 int i, err = 0;
914
915 val_buf1 = malloc(stack_trace_len);
916 val_buf2 = malloc(stack_trace_len);
917 cur_key_p = NULL;
918 next_key_p = &key;
919 while (bpf_map_get_next_key(smap_fd, cur_key_p, next_key_p) == 0) {
920 err = bpf_map_lookup_elem(smap_fd, next_key_p, val_buf1);
921 if (err)
922 goto out;
923 err = bpf_map_lookup_elem(amap_fd, next_key_p, val_buf2);
924 if (err)
925 goto out;
926 for (i = 0; i < stack_trace_len; i++) {
927 if (val_buf1[i] != val_buf2[i]) {
928 err = -1;
929 goto out;
930 }
931 }
932 key = *next_key_p;
933 cur_key_p = &key;
934 next_key_p = &next_key;
935 }
936 if (errno != ENOENT)
937 err = -1;
938
939out:
940 free(val_buf1);
941 free(val_buf2);
942 return err;
943}
944
868static void test_stacktrace_map() 945static void test_stacktrace_map()
869{ 946{
870 int control_map_fd, stackid_hmap_fd, stackmap_fd; 947 int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
871 const char *file = "./test_stacktrace_map.o"; 948 const char *file = "./test_stacktrace_map.o";
872 int bytes, efd, err, pmu_fd, prog_fd; 949 int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
873 struct perf_event_attr attr = {}; 950 struct perf_event_attr attr = {};
874 __u32 key, val, duration = 0; 951 __u32 key, val, duration = 0;
875 struct bpf_object *obj; 952 struct bpf_object *obj;
@@ -925,6 +1002,10 @@ static void test_stacktrace_map()
925 if (stackmap_fd < 0) 1002 if (stackmap_fd < 0)
926 goto disable_pmu; 1003 goto disable_pmu;
927 1004
1005 stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
1006 if (stack_amap_fd < 0)
1007 goto disable_pmu;
1008
928 /* give some time for bpf program run */ 1009 /* give some time for bpf program run */
929 sleep(1); 1010 sleep(1);
930 1011
@@ -946,6 +1027,12 @@ static void test_stacktrace_map()
946 "err %d errno %d\n", err, errno)) 1027 "err %d errno %d\n", err, errno))
947 goto disable_pmu_noerr; 1028 goto disable_pmu_noerr;
948 1029
1030 stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
1031 err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
1032 if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
1033 "err %d errno %d\n", err, errno))
1034 goto disable_pmu_noerr;
1035
949 goto disable_pmu_noerr; 1036 goto disable_pmu_noerr;
950disable_pmu: 1037disable_pmu:
951 error_cnt++; 1038 error_cnt++;
@@ -1039,9 +1126,9 @@ err:
1039 1126
1040static void test_stacktrace_build_id(void) 1127static void test_stacktrace_build_id(void)
1041{ 1128{
1042 int control_map_fd, stackid_hmap_fd, stackmap_fd; 1129 int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
1043 const char *file = "./test_stacktrace_build_id.o"; 1130 const char *file = "./test_stacktrace_build_id.o";
1044 int bytes, efd, err, pmu_fd, prog_fd; 1131 int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
1045 struct perf_event_attr attr = {}; 1132 struct perf_event_attr attr = {};
1046 __u32 key, previous_key, val, duration = 0; 1133 __u32 key, previous_key, val, duration = 0;
1047 struct bpf_object *obj; 1134 struct bpf_object *obj;
@@ -1106,9 +1193,14 @@ static void test_stacktrace_build_id(void)
1106 err, errno)) 1193 err, errno))
1107 goto disable_pmu; 1194 goto disable_pmu;
1108 1195
1196 stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
1197 if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
1198 "err %d errno %d\n", err, errno))
1199 goto disable_pmu;
1200
1109 assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") 1201 assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
1110 == 0); 1202 == 0);
1111 assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); 1203 assert(system("./urandom_read") == 0);
1112 /* disable stack trace collection */ 1204 /* disable stack trace collection */
1113 key = 0; 1205 key = 0;
1114 val = 1; 1206 val = 1;
@@ -1157,8 +1249,15 @@ static void test_stacktrace_build_id(void)
1157 previous_key = key; 1249 previous_key = key;
1158 } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); 1250 } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
1159 1251
1160 CHECK(build_id_matches < 1, "build id match", 1252 if (CHECK(build_id_matches < 1, "build id match",
1161 "Didn't find expected build ID from the map"); 1253 "Didn't find expected build ID from the map\n"))
1254 goto disable_pmu;
1255
1256 stack_trace_len = PERF_MAX_STACK_DEPTH
1257 * sizeof(struct bpf_stack_build_id);
1258 err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
1259 CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
1260 "err %d errno %d\n", err, errno);
1162 1261
1163disable_pmu: 1262disable_pmu:
1164 ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); 1263 ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
@@ -1173,10 +1272,439 @@ out:
1173 return; 1272 return;
1174} 1273}
1175 1274
1275static void test_stacktrace_build_id_nmi(void)
1276{
1277 int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
1278 const char *file = "./test_stacktrace_build_id.o";
1279 int err, pmu_fd, prog_fd;
1280 struct perf_event_attr attr = {
1281 .sample_freq = 5000,
1282 .freq = 1,
1283 .type = PERF_TYPE_HARDWARE,
1284 .config = PERF_COUNT_HW_CPU_CYCLES,
1285 };
1286 __u32 key, previous_key, val, duration = 0;
1287 struct bpf_object *obj;
1288 char buf[256];
1289 int i, j;
1290 struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
1291 int build_id_matches = 0;
1292
1293 err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
1294 if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
1295 return;
1296
1297 pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
1298 0 /* cpu 0 */, -1 /* group id */,
1299 0 /* flags */);
1300 if (CHECK(pmu_fd < 0, "perf_event_open",
1301 "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
1302 pmu_fd, errno))
1303 goto close_prog;
1304
1305 err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
1306 if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
1307 err, errno))
1308 goto close_pmu;
1309
1310 err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
1311 if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
1312 err, errno))
1313 goto disable_pmu;
1314
1315 /* find map fds */
1316 control_map_fd = bpf_find_map(__func__, obj, "control_map");
1317 if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
1318 "err %d errno %d\n", err, errno))
1319 goto disable_pmu;
1320
1321 stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
1322 if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
1323 "err %d errno %d\n", err, errno))
1324 goto disable_pmu;
1325
1326 stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
1327 if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
1328 err, errno))
1329 goto disable_pmu;
1330
1331 stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
1332 if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
1333 "err %d errno %d\n", err, errno))
1334 goto disable_pmu;
1335
1336 assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
1337 == 0);
1338 assert(system("taskset 0x1 ./urandom_read 100000") == 0);
1339 /* disable stack trace collection */
1340 key = 0;
1341 val = 1;
1342 bpf_map_update_elem(control_map_fd, &key, &val, 0);
1343
1344 /* for every element in stackid_hmap, we can find a corresponding one
1345 * in stackmap, and vise versa.
1346 */
1347 err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
1348 if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
1349 "err %d errno %d\n", err, errno))
1350 goto disable_pmu;
1351
1352 err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
1353 if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
1354 "err %d errno %d\n", err, errno))
1355 goto disable_pmu;
1356
1357 err = extract_build_id(buf, 256);
1358
1359 if (CHECK(err, "get build_id with readelf",
1360 "err %d errno %d\n", err, errno))
1361 goto disable_pmu;
1362
1363 err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
1364 if (CHECK(err, "get_next_key from stackmap",
1365 "err %d, errno %d\n", err, errno))
1366 goto disable_pmu;
1367
1368 do {
1369 char build_id[64];
1370
1371 err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
1372 if (CHECK(err, "lookup_elem from stackmap",
1373 "err %d, errno %d\n", err, errno))
1374 goto disable_pmu;
1375 for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
1376 if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
1377 id_offs[i].offset != 0) {
1378 for (j = 0; j < 20; ++j)
1379 sprintf(build_id + 2 * j, "%02x",
1380 id_offs[i].build_id[j] & 0xff);
1381 if (strstr(buf, build_id) != NULL)
1382 build_id_matches = 1;
1383 }
1384 previous_key = key;
1385 } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
1386
1387 if (CHECK(build_id_matches < 1, "build id match",
1388 "Didn't find expected build ID from the map\n"))
1389 goto disable_pmu;
1390
1391 /*
1392 * We intentionally skip compare_stack_ips(). This is because we
1393 * only support one in_nmi() ips-to-build_id translation per cpu
1394 * at any time, thus stack_amap here will always fallback to
1395 * BPF_STACK_BUILD_ID_IP;
1396 */
1397
1398disable_pmu:
1399 ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
1400
1401close_pmu:
1402 close(pmu_fd);
1403
1404close_prog:
1405 bpf_object__close(obj);
1406}
1407
1408#define MAX_CNT_RAWTP 10ull
1409#define MAX_STACK_RAWTP 100
1410struct get_stack_trace_t {
1411 int pid;
1412 int kern_stack_size;
1413 int user_stack_size;
1414 int user_stack_buildid_size;
1415 __u64 kern_stack[MAX_STACK_RAWTP];
1416 __u64 user_stack[MAX_STACK_RAWTP];
1417 struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
1418};
1419
1420static int get_stack_print_output(void *data, int size)
1421{
1422 bool good_kern_stack = false, good_user_stack = false;
1423 const char *nonjit_func = "___bpf_prog_run";
1424 struct get_stack_trace_t *e = data;
1425 int i, num_stack;
1426 static __u64 cnt;
1427 struct ksym *ks;
1428
1429 cnt++;
1430
1431 if (size < sizeof(struct get_stack_trace_t)) {
1432 __u64 *raw_data = data;
1433 bool found = false;
1434
1435 num_stack = size / sizeof(__u64);
1436 /* If jit is enabled, we do not have a good way to
1437 * verify the sanity of the kernel stack. So we
1438 * just assume it is good if the stack is not empty.
1439 * This could be improved in the future.
1440 */
1441 if (jit_enabled) {
1442 found = num_stack > 0;
1443 } else {
1444 for (i = 0; i < num_stack; i++) {
1445 ks = ksym_search(raw_data[i]);
1446 if (strcmp(ks->name, nonjit_func) == 0) {
1447 found = true;
1448 break;
1449 }
1450 }
1451 }
1452 if (found) {
1453 good_kern_stack = true;
1454 good_user_stack = true;
1455 }
1456 } else {
1457 num_stack = e->kern_stack_size / sizeof(__u64);
1458 if (jit_enabled) {
1459 good_kern_stack = num_stack > 0;
1460 } else {
1461 for (i = 0; i < num_stack; i++) {
1462 ks = ksym_search(e->kern_stack[i]);
1463 if (strcmp(ks->name, nonjit_func) == 0) {
1464 good_kern_stack = true;
1465 break;
1466 }
1467 }
1468 }
1469 if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0)
1470 good_user_stack = true;
1471 }
1472 if (!good_kern_stack || !good_user_stack)
1473 return LIBBPF_PERF_EVENT_ERROR;
1474
1475 if (cnt == MAX_CNT_RAWTP)
1476 return LIBBPF_PERF_EVENT_DONE;
1477
1478 return LIBBPF_PERF_EVENT_CONT;
1479}
1480
1481static void test_get_stack_raw_tp(void)
1482{
1483 const char *file = "./test_get_stack_rawtp.o";
1484 int i, efd, err, prog_fd, pmu_fd, perfmap_fd;
1485 struct perf_event_attr attr = {};
1486 struct timespec tv = {0, 10};
1487 __u32 key = 0, duration = 0;
1488 struct bpf_object *obj;
1489
1490 err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
1491 if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
1492 return;
1493
1494 efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
1495 if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
1496 goto close_prog;
1497
1498 perfmap_fd = bpf_find_map(__func__, obj, "perfmap");
1499 if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
1500 perfmap_fd, errno))
1501 goto close_prog;
1502
1503 err = load_kallsyms();
1504 if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno))
1505 goto close_prog;
1506
1507 attr.sample_type = PERF_SAMPLE_RAW;
1508 attr.type = PERF_TYPE_SOFTWARE;
1509 attr.config = PERF_COUNT_SW_BPF_OUTPUT;
1510 pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/,
1511 -1/*group_fd*/, 0);
1512 if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
1513 errno))
1514 goto close_prog;
1515
1516 err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY);
1517 if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err,
1518 errno))
1519 goto close_prog;
1520
1521 err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
1522 if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n",
1523 err, errno))
1524 goto close_prog;
1525
1526 err = perf_event_mmap(pmu_fd);
1527 if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno))
1528 goto close_prog;
1529
1530 /* trigger some syscall action */
1531 for (i = 0; i < MAX_CNT_RAWTP; i++)
1532 nanosleep(&tv, NULL);
1533
1534 err = perf_event_poller(pmu_fd, get_stack_print_output);
1535 if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno))
1536 goto close_prog;
1537
1538 goto close_prog_noerr;
1539close_prog:
1540 error_cnt++;
1541close_prog_noerr:
1542 bpf_object__close(obj);
1543}
1544
1545static void test_task_fd_query_rawtp(void)
1546{
1547 const char *file = "./test_get_stack_rawtp.o";
1548 __u64 probe_offset, probe_addr;
1549 __u32 len, prog_id, fd_type;
1550 struct bpf_object *obj;
1551 int efd, err, prog_fd;
1552 __u32 duration = 0;
1553 char buf[256];
1554
1555 err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
1556 if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
1557 return;
1558
1559 efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
1560 if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
1561 goto close_prog;
1562
1563 /* query (getpid(), efd) */
1564 len = sizeof(buf);
1565 err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
1566 &fd_type, &probe_offset, &probe_addr);
1567 if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
1568 errno))
1569 goto close_prog;
1570
1571 err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
1572 strcmp(buf, "sys_enter") == 0;
1573 if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
1574 fd_type, buf))
1575 goto close_prog;
1576
1577 /* test zero len */
1578 len = 0;
1579 err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
1580 &fd_type, &probe_offset, &probe_addr);
1581 if (CHECK(err < 0, "bpf_task_fd_query (len = 0)", "err %d errno %d\n",
1582 err, errno))
1583 goto close_prog;
1584 err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
1585 len == strlen("sys_enter");
1586 if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
1587 goto close_prog;
1588
1589 /* test empty buffer */
1590 len = sizeof(buf);
1591 err = bpf_task_fd_query(getpid(), efd, 0, 0, &len, &prog_id,
1592 &fd_type, &probe_offset, &probe_addr);
1593 if (CHECK(err < 0, "bpf_task_fd_query (buf = 0)", "err %d errno %d\n",
1594 err, errno))
1595 goto close_prog;
1596 err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
1597 len == strlen("sys_enter");
1598 if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
1599 goto close_prog;
1600
1601 /* test smaller buffer */
1602 len = 3;
1603 err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
1604 &fd_type, &probe_offset, &probe_addr);
1605 if (CHECK(err >= 0 || errno != ENOSPC, "bpf_task_fd_query (len = 3)",
1606 "err %d errno %d\n", err, errno))
1607 goto close_prog;
1608 err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
1609 len == strlen("sys_enter") &&
1610 strcmp(buf, "sy") == 0;
1611 if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
1612 goto close_prog;
1613
1614 goto close_prog_noerr;
1615close_prog:
1616 error_cnt++;
1617close_prog_noerr:
1618 bpf_object__close(obj);
1619}
1620
1621static void test_task_fd_query_tp_core(const char *probe_name,
1622 const char *tp_name)
1623{
1624 const char *file = "./test_tracepoint.o";
1625 int err, bytes, efd, prog_fd, pmu_fd;
1626 struct perf_event_attr attr = {};
1627 __u64 probe_offset, probe_addr;
1628 __u32 len, prog_id, fd_type;
1629 struct bpf_object *obj;
1630 __u32 duration = 0;
1631 char buf[256];
1632
1633 err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
1634 if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
1635 goto close_prog;
1636
1637 snprintf(buf, sizeof(buf),
1638 "/sys/kernel/debug/tracing/events/%s/id", probe_name);
1639 efd = open(buf, O_RDONLY, 0);
1640 if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
1641 goto close_prog;
1642 bytes = read(efd, buf, sizeof(buf));
1643 close(efd);
1644 if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
1645 "bytes %d errno %d\n", bytes, errno))
1646 goto close_prog;
1647
1648 attr.config = strtol(buf, NULL, 0);
1649 attr.type = PERF_TYPE_TRACEPOINT;
1650 attr.sample_type = PERF_SAMPLE_RAW;
1651 attr.sample_period = 1;
1652 attr.wakeup_events = 1;
1653 pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
1654 0 /* cpu 0 */, -1 /* group id */,
1655 0 /* flags */);
1656 if (CHECK(err, "perf_event_open", "err %d errno %d\n", err, errno))
1657 goto close_pmu;
1658
1659 err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
1660 if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
1661 errno))
1662 goto close_pmu;
1663
1664 err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
1665 if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
1666 errno))
1667 goto close_pmu;
1668
1669 /* query (getpid(), pmu_fd) */
1670 len = sizeof(buf);
1671 err = bpf_task_fd_query(getpid(), pmu_fd, 0, buf, &len, &prog_id,
1672 &fd_type, &probe_offset, &probe_addr);
1673 if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
1674 errno))
1675 goto close_pmu;
1676
1677 err = (fd_type == BPF_FD_TYPE_TRACEPOINT) && !strcmp(buf, tp_name);
1678 if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
1679 fd_type, buf))
1680 goto close_pmu;
1681
1682 close(pmu_fd);
1683 goto close_prog_noerr;
1684
1685close_pmu:
1686 close(pmu_fd);
1687close_prog:
1688 error_cnt++;
1689close_prog_noerr:
1690 bpf_object__close(obj);
1691}
1692
1693static void test_task_fd_query_tp(void)
1694{
1695 test_task_fd_query_tp_core("sched/sched_switch",
1696 "sched_switch");
1697 test_task_fd_query_tp_core("syscalls/sys_enter_read",
1698 "sys_enter_read");
1699}
1700
1176int main(void) 1701int main(void)
1177{ 1702{
1703 jit_enabled = is_jit_enabled();
1704
1178 test_pkt_access(); 1705 test_pkt_access();
1179 test_xdp(); 1706 test_xdp();
1707 test_xdp_adjust_tail();
1180 test_l4lb_all(); 1708 test_l4lb_all();
1181 test_xdp_noinline(); 1709 test_xdp_noinline();
1182 test_tcp_estats(); 1710 test_tcp_estats();
@@ -1186,7 +1714,11 @@ int main(void)
1186 test_tp_attach_query(); 1714 test_tp_attach_query();
1187 test_stacktrace_map(); 1715 test_stacktrace_map();
1188 test_stacktrace_build_id(); 1716 test_stacktrace_build_id();
1717 test_stacktrace_build_id_nmi();
1189 test_stacktrace_map_raw_tp(); 1718 test_stacktrace_map_raw_tp();
1719 test_get_stack_raw_tp();
1720 test_task_fd_query_rawtp();
1721 test_task_fd_query_tp();
1190 1722
1191 printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); 1723 printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
1192 return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; 1724 return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 2950f80ba7fb..a5e76b9219b9 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -1,12 +1,16 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook 2// Copyright (c) 2018 Facebook
3 3
4#define _GNU_SOURCE
5
4#include <stdio.h> 6#include <stdio.h>
5#include <stdlib.h> 7#include <stdlib.h>
6#include <unistd.h> 8#include <unistd.h>
7 9
8#include <arpa/inet.h> 10#include <arpa/inet.h>
11#include <netinet/in.h>
9#include <sys/types.h> 12#include <sys/types.h>
13#include <sys/select.h>
10#include <sys/socket.h> 14#include <sys/socket.h>
11 15
12#include <linux/filter.h> 16#include <linux/filter.h>
@@ -17,34 +21,465 @@
17#include "cgroup_helpers.h" 21#include "cgroup_helpers.h"
18#include "bpf_rlimit.h" 22#include "bpf_rlimit.h"
19 23
24#ifndef ENOTSUPP
25# define ENOTSUPP 524
26#endif
27
28#ifndef ARRAY_SIZE
29# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
30#endif
31
20#define CG_PATH "/foo" 32#define CG_PATH "/foo"
21#define CONNECT4_PROG_PATH "./connect4_prog.o" 33#define CONNECT4_PROG_PATH "./connect4_prog.o"
22#define CONNECT6_PROG_PATH "./connect6_prog.o" 34#define CONNECT6_PROG_PATH "./connect6_prog.o"
35#define SENDMSG4_PROG_PATH "./sendmsg4_prog.o"
36#define SENDMSG6_PROG_PATH "./sendmsg6_prog.o"
23 37
24#define SERV4_IP "192.168.1.254" 38#define SERV4_IP "192.168.1.254"
25#define SERV4_REWRITE_IP "127.0.0.1" 39#define SERV4_REWRITE_IP "127.0.0.1"
40#define SRC4_IP "172.16.0.1"
41#define SRC4_REWRITE_IP "127.0.0.4"
26#define SERV4_PORT 4040 42#define SERV4_PORT 4040
27#define SERV4_REWRITE_PORT 4444 43#define SERV4_REWRITE_PORT 4444
28 44
29#define SERV6_IP "face:b00c:1234:5678::abcd" 45#define SERV6_IP "face:b00c:1234:5678::abcd"
30#define SERV6_REWRITE_IP "::1" 46#define SERV6_REWRITE_IP "::1"
47#define SERV6_V4MAPPED_IP "::ffff:192.168.0.4"
48#define SRC6_IP "::1"
49#define SRC6_REWRITE_IP "::6"
31#define SERV6_PORT 6060 50#define SERV6_PORT 6060
32#define SERV6_REWRITE_PORT 6666 51#define SERV6_REWRITE_PORT 6666
33 52
34#define INET_NTOP_BUF 40 53#define INET_NTOP_BUF 40
35 54
36typedef int (*load_fn)(enum bpf_attach_type, const char *comment); 55struct sock_addr_test;
56
57typedef int (*load_fn)(const struct sock_addr_test *test);
37typedef int (*info_fn)(int, struct sockaddr *, socklen_t *); 58typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
38 59
39struct program { 60char bpf_log_buf[BPF_LOG_BUF_SIZE];
40 enum bpf_attach_type type; 61
41 load_fn loadfn; 62struct sock_addr_test {
42 int fd; 63 const char *descr;
43 const char *name; 64 /* BPF prog properties */
44 enum bpf_attach_type invalid_type; 65 load_fn loadfn;
66 enum bpf_attach_type expected_attach_type;
67 enum bpf_attach_type attach_type;
68 /* Socket properties */
69 int domain;
70 int type;
71 /* IP:port pairs for BPF prog to override */
72 const char *requested_ip;
73 unsigned short requested_port;
74 const char *expected_ip;
75 unsigned short expected_port;
76 const char *expected_src_ip;
77 /* Expected test result */
78 enum {
79 LOAD_REJECT,
80 ATTACH_REJECT,
81 SYSCALL_EPERM,
82 SYSCALL_ENOTSUPP,
83 SUCCESS,
84 } expected_result;
45}; 85};
46 86
47char bpf_log_buf[BPF_LOG_BUF_SIZE]; 87static int bind4_prog_load(const struct sock_addr_test *test);
88static int bind6_prog_load(const struct sock_addr_test *test);
89static int connect4_prog_load(const struct sock_addr_test *test);
90static int connect6_prog_load(const struct sock_addr_test *test);
91static int sendmsg_deny_prog_load(const struct sock_addr_test *test);
92static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test);
93static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test);
94static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test);
95static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test);
96static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test);
97
98static struct sock_addr_test tests[] = {
99 /* bind */
100 {
101 "bind4: load prog with wrong expected attach type",
102 bind4_prog_load,
103 BPF_CGROUP_INET6_BIND,
104 BPF_CGROUP_INET4_BIND,
105 AF_INET,
106 SOCK_STREAM,
107 NULL,
108 0,
109 NULL,
110 0,
111 NULL,
112 LOAD_REJECT,
113 },
114 {
115 "bind4: attach prog with wrong attach type",
116 bind4_prog_load,
117 BPF_CGROUP_INET4_BIND,
118 BPF_CGROUP_INET6_BIND,
119 AF_INET,
120 SOCK_STREAM,
121 NULL,
122 0,
123 NULL,
124 0,
125 NULL,
126 ATTACH_REJECT,
127 },
128 {
129 "bind4: rewrite IP & TCP port in",
130 bind4_prog_load,
131 BPF_CGROUP_INET4_BIND,
132 BPF_CGROUP_INET4_BIND,
133 AF_INET,
134 SOCK_STREAM,
135 SERV4_IP,
136 SERV4_PORT,
137 SERV4_REWRITE_IP,
138 SERV4_REWRITE_PORT,
139 NULL,
140 SUCCESS,
141 },
142 {
143 "bind4: rewrite IP & UDP port in",
144 bind4_prog_load,
145 BPF_CGROUP_INET4_BIND,
146 BPF_CGROUP_INET4_BIND,
147 AF_INET,
148 SOCK_DGRAM,
149 SERV4_IP,
150 SERV4_PORT,
151 SERV4_REWRITE_IP,
152 SERV4_REWRITE_PORT,
153 NULL,
154 SUCCESS,
155 },
156 {
157 "bind6: load prog with wrong expected attach type",
158 bind6_prog_load,
159 BPF_CGROUP_INET4_BIND,
160 BPF_CGROUP_INET6_BIND,
161 AF_INET6,
162 SOCK_STREAM,
163 NULL,
164 0,
165 NULL,
166 0,
167 NULL,
168 LOAD_REJECT,
169 },
170 {
171 "bind6: attach prog with wrong attach type",
172 bind6_prog_load,
173 BPF_CGROUP_INET6_BIND,
174 BPF_CGROUP_INET4_BIND,
175 AF_INET,
176 SOCK_STREAM,
177 NULL,
178 0,
179 NULL,
180 0,
181 NULL,
182 ATTACH_REJECT,
183 },
184 {
185 "bind6: rewrite IP & TCP port in",
186 bind6_prog_load,
187 BPF_CGROUP_INET6_BIND,
188 BPF_CGROUP_INET6_BIND,
189 AF_INET6,
190 SOCK_STREAM,
191 SERV6_IP,
192 SERV6_PORT,
193 SERV6_REWRITE_IP,
194 SERV6_REWRITE_PORT,
195 NULL,
196 SUCCESS,
197 },
198 {
199 "bind6: rewrite IP & UDP port in",
200 bind6_prog_load,
201 BPF_CGROUP_INET6_BIND,
202 BPF_CGROUP_INET6_BIND,
203 AF_INET6,
204 SOCK_DGRAM,
205 SERV6_IP,
206 SERV6_PORT,
207 SERV6_REWRITE_IP,
208 SERV6_REWRITE_PORT,
209 NULL,
210 SUCCESS,
211 },
212
213 /* connect */
214 {
215 "connect4: load prog with wrong expected attach type",
216 connect4_prog_load,
217 BPF_CGROUP_INET6_CONNECT,
218 BPF_CGROUP_INET4_CONNECT,
219 AF_INET,
220 SOCK_STREAM,
221 NULL,
222 0,
223 NULL,
224 0,
225 NULL,
226 LOAD_REJECT,
227 },
228 {
229 "connect4: attach prog with wrong attach type",
230 connect4_prog_load,
231 BPF_CGROUP_INET4_CONNECT,
232 BPF_CGROUP_INET6_CONNECT,
233 AF_INET,
234 SOCK_STREAM,
235 NULL,
236 0,
237 NULL,
238 0,
239 NULL,
240 ATTACH_REJECT,
241 },
242 {
243 "connect4: rewrite IP & TCP port",
244 connect4_prog_load,
245 BPF_CGROUP_INET4_CONNECT,
246 BPF_CGROUP_INET4_CONNECT,
247 AF_INET,
248 SOCK_STREAM,
249 SERV4_IP,
250 SERV4_PORT,
251 SERV4_REWRITE_IP,
252 SERV4_REWRITE_PORT,
253 SRC4_REWRITE_IP,
254 SUCCESS,
255 },
256 {
257 "connect4: rewrite IP & UDP port",
258 connect4_prog_load,
259 BPF_CGROUP_INET4_CONNECT,
260 BPF_CGROUP_INET4_CONNECT,
261 AF_INET,
262 SOCK_DGRAM,
263 SERV4_IP,
264 SERV4_PORT,
265 SERV4_REWRITE_IP,
266 SERV4_REWRITE_PORT,
267 SRC4_REWRITE_IP,
268 SUCCESS,
269 },
270 {
271 "connect6: load prog with wrong expected attach type",
272 connect6_prog_load,
273 BPF_CGROUP_INET4_CONNECT,
274 BPF_CGROUP_INET6_CONNECT,
275 AF_INET6,
276 SOCK_STREAM,
277 NULL,
278 0,
279 NULL,
280 0,
281 NULL,
282 LOAD_REJECT,
283 },
284 {
285 "connect6: attach prog with wrong attach type",
286 connect6_prog_load,
287 BPF_CGROUP_INET6_CONNECT,
288 BPF_CGROUP_INET4_CONNECT,
289 AF_INET,
290 SOCK_STREAM,
291 NULL,
292 0,
293 NULL,
294 0,
295 NULL,
296 ATTACH_REJECT,
297 },
298 {
299 "connect6: rewrite IP & TCP port",
300 connect6_prog_load,
301 BPF_CGROUP_INET6_CONNECT,
302 BPF_CGROUP_INET6_CONNECT,
303 AF_INET6,
304 SOCK_STREAM,
305 SERV6_IP,
306 SERV6_PORT,
307 SERV6_REWRITE_IP,
308 SERV6_REWRITE_PORT,
309 SRC6_REWRITE_IP,
310 SUCCESS,
311 },
312 {
313 "connect6: rewrite IP & UDP port",
314 connect6_prog_load,
315 BPF_CGROUP_INET6_CONNECT,
316 BPF_CGROUP_INET6_CONNECT,
317 AF_INET6,
318 SOCK_DGRAM,
319 SERV6_IP,
320 SERV6_PORT,
321 SERV6_REWRITE_IP,
322 SERV6_REWRITE_PORT,
323 SRC6_REWRITE_IP,
324 SUCCESS,
325 },
326
327 /* sendmsg */
328 {
329 "sendmsg4: load prog with wrong expected attach type",
330 sendmsg4_rw_asm_prog_load,
331 BPF_CGROUP_UDP6_SENDMSG,
332 BPF_CGROUP_UDP4_SENDMSG,
333 AF_INET,
334 SOCK_DGRAM,
335 NULL,
336 0,
337 NULL,
338 0,
339 NULL,
340 LOAD_REJECT,
341 },
342 {
343 "sendmsg4: attach prog with wrong attach type",
344 sendmsg4_rw_asm_prog_load,
345 BPF_CGROUP_UDP4_SENDMSG,
346 BPF_CGROUP_UDP6_SENDMSG,
347 AF_INET,
348 SOCK_DGRAM,
349 NULL,
350 0,
351 NULL,
352 0,
353 NULL,
354 ATTACH_REJECT,
355 },
356 {
357 "sendmsg4: rewrite IP & port (asm)",
358 sendmsg4_rw_asm_prog_load,
359 BPF_CGROUP_UDP4_SENDMSG,
360 BPF_CGROUP_UDP4_SENDMSG,
361 AF_INET,
362 SOCK_DGRAM,
363 SERV4_IP,
364 SERV4_PORT,
365 SERV4_REWRITE_IP,
366 SERV4_REWRITE_PORT,
367 SRC4_REWRITE_IP,
368 SUCCESS,
369 },
370 {
371 "sendmsg4: rewrite IP & port (C)",
372 sendmsg4_rw_c_prog_load,
373 BPF_CGROUP_UDP4_SENDMSG,
374 BPF_CGROUP_UDP4_SENDMSG,
375 AF_INET,
376 SOCK_DGRAM,
377 SERV4_IP,
378 SERV4_PORT,
379 SERV4_REWRITE_IP,
380 SERV4_REWRITE_PORT,
381 SRC4_REWRITE_IP,
382 SUCCESS,
383 },
384 {
385 "sendmsg4: deny call",
386 sendmsg_deny_prog_load,
387 BPF_CGROUP_UDP4_SENDMSG,
388 BPF_CGROUP_UDP4_SENDMSG,
389 AF_INET,
390 SOCK_DGRAM,
391 SERV4_IP,
392 SERV4_PORT,
393 SERV4_REWRITE_IP,
394 SERV4_REWRITE_PORT,
395 SRC4_REWRITE_IP,
396 SYSCALL_EPERM,
397 },
398 {
399 "sendmsg6: load prog with wrong expected attach type",
400 sendmsg6_rw_asm_prog_load,
401 BPF_CGROUP_UDP4_SENDMSG,
402 BPF_CGROUP_UDP6_SENDMSG,
403 AF_INET6,
404 SOCK_DGRAM,
405 NULL,
406 0,
407 NULL,
408 0,
409 NULL,
410 LOAD_REJECT,
411 },
412 {
413 "sendmsg6: attach prog with wrong attach type",
414 sendmsg6_rw_asm_prog_load,
415 BPF_CGROUP_UDP6_SENDMSG,
416 BPF_CGROUP_UDP4_SENDMSG,
417 AF_INET6,
418 SOCK_DGRAM,
419 NULL,
420 0,
421 NULL,
422 0,
423 NULL,
424 ATTACH_REJECT,
425 },
426 {
427 "sendmsg6: rewrite IP & port (asm)",
428 sendmsg6_rw_asm_prog_load,
429 BPF_CGROUP_UDP6_SENDMSG,
430 BPF_CGROUP_UDP6_SENDMSG,
431 AF_INET6,
432 SOCK_DGRAM,
433 SERV6_IP,
434 SERV6_PORT,
435 SERV6_REWRITE_IP,
436 SERV6_REWRITE_PORT,
437 SRC6_REWRITE_IP,
438 SUCCESS,
439 },
440 {
441 "sendmsg6: rewrite IP & port (C)",
442 sendmsg6_rw_c_prog_load,
443 BPF_CGROUP_UDP6_SENDMSG,
444 BPF_CGROUP_UDP6_SENDMSG,
445 AF_INET6,
446 SOCK_DGRAM,
447 SERV6_IP,
448 SERV6_PORT,
449 SERV6_REWRITE_IP,
450 SERV6_REWRITE_PORT,
451 SRC6_REWRITE_IP,
452 SUCCESS,
453 },
454 {
455 "sendmsg6: IPv4-mapped IPv6",
456 sendmsg6_rw_v4mapped_prog_load,
457 BPF_CGROUP_UDP6_SENDMSG,
458 BPF_CGROUP_UDP6_SENDMSG,
459 AF_INET6,
460 SOCK_DGRAM,
461 SERV6_IP,
462 SERV6_PORT,
463 SERV6_REWRITE_IP,
464 SERV6_REWRITE_PORT,
465 SRC6_REWRITE_IP,
466 SYSCALL_ENOTSUPP,
467 },
468 {
469 "sendmsg6: deny call",
470 sendmsg_deny_prog_load,
471 BPF_CGROUP_UDP6_SENDMSG,
472 BPF_CGROUP_UDP6_SENDMSG,
473 AF_INET6,
474 SOCK_DGRAM,
475 SERV6_IP,
476 SERV6_PORT,
477 SERV6_REWRITE_IP,
478 SERV6_REWRITE_PORT,
479 SRC6_REWRITE_IP,
480 SYSCALL_EPERM,
481 },
482};
48 483
49static int mk_sockaddr(int domain, const char *ip, unsigned short port, 484static int mk_sockaddr(int domain, const char *ip, unsigned short port,
50 struct sockaddr *addr, socklen_t addr_len) 485 struct sockaddr *addr, socklen_t addr_len)
@@ -84,25 +519,23 @@ static int mk_sockaddr(int domain, const char *ip, unsigned short port,
84 return 0; 519 return 0;
85} 520}
86 521
87static int load_insns(enum bpf_attach_type attach_type, 522static int load_insns(const struct sock_addr_test *test,
88 const struct bpf_insn *insns, size_t insns_cnt, 523 const struct bpf_insn *insns, size_t insns_cnt)
89 const char *comment)
90{ 524{
91 struct bpf_load_program_attr load_attr; 525 struct bpf_load_program_attr load_attr;
92 int ret; 526 int ret;
93 527
94 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); 528 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
95 load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 529 load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
96 load_attr.expected_attach_type = attach_type; 530 load_attr.expected_attach_type = test->expected_attach_type;
97 load_attr.insns = insns; 531 load_attr.insns = insns;
98 load_attr.insns_cnt = insns_cnt; 532 load_attr.insns_cnt = insns_cnt;
99 load_attr.license = "GPL"; 533 load_attr.license = "GPL";
100 534
101 ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE); 535 ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
102 if (ret < 0 && comment) { 536 if (ret < 0 && test->expected_result != LOAD_REJECT) {
103 log_err(">>> Loading %s program error.\n" 537 log_err(">>> Loading program error.\n"
104 ">>> Output from verifier:\n%s\n-------\n", 538 ">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
105 comment, bpf_log_buf);
106 } 539 }
107 540
108 return ret; 541 return ret;
@@ -119,8 +552,7 @@ static int load_insns(enum bpf_attach_type attach_type,
119 * to count jumps properly. 552 * to count jumps properly.
120 */ 553 */
121 554
122static int bind4_prog_load(enum bpf_attach_type attach_type, 555static int bind4_prog_load(const struct sock_addr_test *test)
123 const char *comment)
124{ 556{
125 union { 557 union {
126 uint8_t u4_addr8[4]; 558 uint8_t u4_addr8[4];
@@ -186,12 +618,10 @@ static int bind4_prog_load(enum bpf_attach_type attach_type,
186 BPF_EXIT_INSN(), 618 BPF_EXIT_INSN(),
187 }; 619 };
188 620
189 return load_insns(attach_type, insns, 621 return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
190 sizeof(insns) / sizeof(struct bpf_insn), comment);
191} 622}
192 623
193static int bind6_prog_load(enum bpf_attach_type attach_type, 624static int bind6_prog_load(const struct sock_addr_test *test)
194 const char *comment)
195{ 625{
196 struct sockaddr_in6 addr6_rw; 626 struct sockaddr_in6 addr6_rw;
197 struct in6_addr ip6; 627 struct in6_addr ip6;
@@ -254,13 +684,10 @@ static int bind6_prog_load(enum bpf_attach_type attach_type,
254 BPF_EXIT_INSN(), 684 BPF_EXIT_INSN(),
255 }; 685 };
256 686
257 return load_insns(attach_type, insns, 687 return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
258 sizeof(insns) / sizeof(struct bpf_insn), comment);
259} 688}
260 689
261static int connect_prog_load_path(const char *path, 690static int load_path(const struct sock_addr_test *test, const char *path)
262 enum bpf_attach_type attach_type,
263 const char *comment)
264{ 691{
265 struct bpf_prog_load_attr attr; 692 struct bpf_prog_load_attr attr;
266 struct bpf_object *obj; 693 struct bpf_object *obj;
@@ -269,75 +696,218 @@ static int connect_prog_load_path(const char *path,
269 memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); 696 memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
270 attr.file = path; 697 attr.file = path;
271 attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 698 attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
272 attr.expected_attach_type = attach_type; 699 attr.expected_attach_type = test->expected_attach_type;
273 700
274 if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { 701 if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
275 if (comment) 702 if (test->expected_result != LOAD_REJECT)
276 log_err(">>> Loading %s program at %s error.\n", 703 log_err(">>> Loading program (%s) error.\n", path);
277 comment, path);
278 return -1; 704 return -1;
279 } 705 }
280 706
281 return prog_fd; 707 return prog_fd;
282} 708}
283 709
284static int connect4_prog_load(enum bpf_attach_type attach_type, 710static int connect4_prog_load(const struct sock_addr_test *test)
285 const char *comment)
286{ 711{
287 return connect_prog_load_path(CONNECT4_PROG_PATH, attach_type, comment); 712 return load_path(test, CONNECT4_PROG_PATH);
288} 713}
289 714
290static int connect6_prog_load(enum bpf_attach_type attach_type, 715static int connect6_prog_load(const struct sock_addr_test *test)
291 const char *comment)
292{ 716{
293 return connect_prog_load_path(CONNECT6_PROG_PATH, attach_type, comment); 717 return load_path(test, CONNECT6_PROG_PATH);
294} 718}
295 719
296static void print_ip_port(int sockfd, info_fn fn, const char *fmt) 720static int sendmsg_deny_prog_load(const struct sock_addr_test *test)
297{ 721{
298 char addr_buf[INET_NTOP_BUF]; 722 struct bpf_insn insns[] = {
299 struct sockaddr_storage addr; 723 /* return 0 */
300 struct sockaddr_in6 *addr6; 724 BPF_MOV64_IMM(BPF_REG_0, 0),
301 struct sockaddr_in *addr4; 725 BPF_EXIT_INSN(),
302 socklen_t addr_len; 726 };
303 unsigned short port; 727 return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
304 void *nip; 728}
305 729
306 addr_len = sizeof(struct sockaddr_storage); 730static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test)
307 memset(&addr, 0, addr_len); 731{
308 732 struct sockaddr_in dst4_rw_addr;
309 if (fn(sockfd, (struct sockaddr *)&addr, (socklen_t *)&addr_len) == 0) { 733 struct in_addr src4_rw_ip;
310 if (addr.ss_family == AF_INET) { 734
311 addr4 = (struct sockaddr_in *)&addr; 735 if (inet_pton(AF_INET, SRC4_REWRITE_IP, (void *)&src4_rw_ip) != 1) {
312 nip = (void *)&addr4->sin_addr; 736 log_err("Invalid IPv4: %s", SRC4_REWRITE_IP);
313 port = ntohs(addr4->sin_port); 737 return -1;
314 } else if (addr.ss_family == AF_INET6) { 738 }
315 addr6 = (struct sockaddr_in6 *)&addr; 739
316 nip = (void *)&addr6->sin6_addr; 740 if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
317 port = ntohs(addr6->sin6_port); 741 (struct sockaddr *)&dst4_rw_addr,
318 } else { 742 sizeof(dst4_rw_addr)) == -1)
319 return; 743 return -1;
320 } 744
321 const char *addr_str = 745 struct bpf_insn insns[] = {
322 inet_ntop(addr.ss_family, nip, addr_buf, INET_NTOP_BUF); 746 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
323 printf(fmt, addr_str ? addr_str : "??", port); 747
748 /* if (sk.family == AF_INET && */
749 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
750 offsetof(struct bpf_sock_addr, family)),
751 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 8),
752
753 /* sk.type == SOCK_DGRAM) { */
754 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
755 offsetof(struct bpf_sock_addr, type)),
756 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 6),
757
758 /* msg_src_ip4 = src4_rw_ip */
759 BPF_MOV32_IMM(BPF_REG_7, src4_rw_ip.s_addr),
760 BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
761 offsetof(struct bpf_sock_addr, msg_src_ip4)),
762
763 /* user_ip4 = dst4_rw_addr.sin_addr */
764 BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_addr.s_addr),
765 BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
766 offsetof(struct bpf_sock_addr, user_ip4)),
767
768 /* user_port = dst4_rw_addr.sin_port */
769 BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_port),
770 BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
771 offsetof(struct bpf_sock_addr, user_port)),
772 /* } */
773
774 /* return 1 */
775 BPF_MOV64_IMM(BPF_REG_0, 1),
776 BPF_EXIT_INSN(),
777 };
778
779 return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
780}
781
782static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test)
783{
784 return load_path(test, SENDMSG4_PROG_PATH);
785}
786
787static int sendmsg6_rw_dst_asm_prog_load(const struct sock_addr_test *test,
788 const char *rw_dst_ip)
789{
790 struct sockaddr_in6 dst6_rw_addr;
791 struct in6_addr src6_rw_ip;
792
793 if (inet_pton(AF_INET6, SRC6_REWRITE_IP, (void *)&src6_rw_ip) != 1) {
794 log_err("Invalid IPv6: %s", SRC6_REWRITE_IP);
795 return -1;
796 }
797
798 if (mk_sockaddr(AF_INET6, rw_dst_ip, SERV6_REWRITE_PORT,
799 (struct sockaddr *)&dst6_rw_addr,
800 sizeof(dst6_rw_addr)) == -1)
801 return -1;
802
803 struct bpf_insn insns[] = {
804 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
805
806 /* if (sk.family == AF_INET6) { */
807 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
808 offsetof(struct bpf_sock_addr, family)),
809 BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
810
811#define STORE_IPV6_WORD_N(DST, SRC, N) \
812 BPF_MOV32_IMM(BPF_REG_7, SRC[N]), \
813 BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \
814 offsetof(struct bpf_sock_addr, DST[N]))
815
816#define STORE_IPV6(DST, SRC) \
817 STORE_IPV6_WORD_N(DST, SRC, 0), \
818 STORE_IPV6_WORD_N(DST, SRC, 1), \
819 STORE_IPV6_WORD_N(DST, SRC, 2), \
820 STORE_IPV6_WORD_N(DST, SRC, 3)
821
822 STORE_IPV6(msg_src_ip6, src6_rw_ip.s6_addr32),
823 STORE_IPV6(user_ip6, dst6_rw_addr.sin6_addr.s6_addr32),
824
825 /* user_port = dst6_rw_addr.sin6_port */
826 BPF_MOV32_IMM(BPF_REG_7, dst6_rw_addr.sin6_port),
827 BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
828 offsetof(struct bpf_sock_addr, user_port)),
829
830 /* } */
831
832 /* return 1 */
833 BPF_MOV64_IMM(BPF_REG_0, 1),
834 BPF_EXIT_INSN(),
835 };
836
837 return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
838}
839
840static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test)
841{
842 return sendmsg6_rw_dst_asm_prog_load(test, SERV6_REWRITE_IP);
843}
844
845static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test)
846{
847 return sendmsg6_rw_dst_asm_prog_load(test, SERV6_V4MAPPED_IP);
848}
849
850static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test)
851{
852 return load_path(test, SENDMSG6_PROG_PATH);
853}
854
855static int cmp_addr(const struct sockaddr_storage *addr1,
856 const struct sockaddr_storage *addr2, int cmp_port)
857{
858 const struct sockaddr_in *four1, *four2;
859 const struct sockaddr_in6 *six1, *six2;
860
861 if (addr1->ss_family != addr2->ss_family)
862 return -1;
863
864 if (addr1->ss_family == AF_INET) {
865 four1 = (const struct sockaddr_in *)addr1;
866 four2 = (const struct sockaddr_in *)addr2;
867 return !((four1->sin_port == four2->sin_port || !cmp_port) &&
868 four1->sin_addr.s_addr == four2->sin_addr.s_addr);
869 } else if (addr1->ss_family == AF_INET6) {
870 six1 = (const struct sockaddr_in6 *)addr1;
871 six2 = (const struct sockaddr_in6 *)addr2;
872 return !((six1->sin6_port == six2->sin6_port || !cmp_port) &&
873 !memcmp(&six1->sin6_addr, &six2->sin6_addr,
874 sizeof(struct in6_addr)));
324 } 875 }
876
877 return -1;
325} 878}
326 879
327static void print_local_ip_port(int sockfd, const char *fmt) 880static int cmp_sock_addr(info_fn fn, int sock1,
881 const struct sockaddr_storage *addr2, int cmp_port)
328{ 882{
329 print_ip_port(sockfd, getsockname, fmt); 883 struct sockaddr_storage addr1;
884 socklen_t len1 = sizeof(addr1);
885
886 memset(&addr1, 0, len1);
887 if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0)
888 return -1;
889
890 return cmp_addr(&addr1, addr2, cmp_port);
891}
892
893static int cmp_local_ip(int sock1, const struct sockaddr_storage *addr2)
894{
895 return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 0);
330} 896}
331 897
332static void print_remote_ip_port(int sockfd, const char *fmt) 898static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2)
333{ 899{
334 print_ip_port(sockfd, getpeername, fmt); 900 return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 1);
901}
902
903static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2)
904{
905 return cmp_sock_addr(getpeername, sock1, addr2, /*cmp_port*/ 1);
335} 906}
336 907
337static int start_server(int type, const struct sockaddr_storage *addr, 908static int start_server(int type, const struct sockaddr_storage *addr,
338 socklen_t addr_len) 909 socklen_t addr_len)
339{ 910{
340
341 int fd; 911 int fd;
342 912
343 fd = socket(addr->ss_family, type, 0); 913 fd = socket(addr->ss_family, type, 0);
@@ -358,8 +928,6 @@ static int start_server(int type, const struct sockaddr_storage *addr,
358 } 928 }
359 } 929 }
360 930
361 print_local_ip_port(fd, "\t Actual: bind(%s, %d)\n");
362
363 goto out; 931 goto out;
364close_out: 932close_out:
365 close(fd); 933 close(fd);
@@ -372,19 +940,19 @@ static int connect_to_server(int type, const struct sockaddr_storage *addr,
372 socklen_t addr_len) 940 socklen_t addr_len)
373{ 941{
374 int domain; 942 int domain;
375 int fd; 943 int fd = -1;
376 944
377 domain = addr->ss_family; 945 domain = addr->ss_family;
378 946
379 if (domain != AF_INET && domain != AF_INET6) { 947 if (domain != AF_INET && domain != AF_INET6) {
380 log_err("Unsupported address family"); 948 log_err("Unsupported address family");
381 return -1; 949 goto err;
382 } 950 }
383 951
384 fd = socket(domain, type, 0); 952 fd = socket(domain, type, 0);
385 if (fd == -1) { 953 if (fd == -1) {
386 log_err("Failed to creating client socket"); 954 log_err("Failed to create client socket");
387 return -1; 955 goto err;
388 } 956 }
389 957
390 if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) { 958 if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) {
@@ -392,162 +960,394 @@ static int connect_to_server(int type, const struct sockaddr_storage *addr,
392 goto err; 960 goto err;
393 } 961 }
394 962
395 print_remote_ip_port(fd, "\t Actual: connect(%s, %d)"); 963 goto out;
396 print_local_ip_port(fd, " from (%s, %d)\n"); 964err:
965 close(fd);
966 fd = -1;
967out:
968 return fd;
969}
970
971int init_pktinfo(int domain, struct cmsghdr *cmsg)
972{
973 struct in6_pktinfo *pktinfo6;
974 struct in_pktinfo *pktinfo4;
975
976 if (domain == AF_INET) {
977 cmsg->cmsg_level = SOL_IP;
978 cmsg->cmsg_type = IP_PKTINFO;
979 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
980 pktinfo4 = (struct in_pktinfo *)CMSG_DATA(cmsg);
981 memset(pktinfo4, 0, sizeof(struct in_pktinfo));
982 if (inet_pton(domain, SRC4_IP,
983 (void *)&pktinfo4->ipi_spec_dst) != 1)
984 return -1;
985 } else if (domain == AF_INET6) {
986 cmsg->cmsg_level = SOL_IPV6;
987 cmsg->cmsg_type = IPV6_PKTINFO;
988 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
989 pktinfo6 = (struct in6_pktinfo *)CMSG_DATA(cmsg);
990 memset(pktinfo6, 0, sizeof(struct in6_pktinfo));
991 if (inet_pton(domain, SRC6_IP,
992 (void *)&pktinfo6->ipi6_addr) != 1)
993 return -1;
994 } else {
995 return -1;
996 }
397 997
398 return 0; 998 return 0;
999}
1000
1001static int sendmsg_to_server(const struct sockaddr_storage *addr,
1002 socklen_t addr_len, int set_cmsg, int *syscall_err)
1003{
1004 union {
1005 char buf[CMSG_SPACE(sizeof(struct in6_pktinfo))];
1006 struct cmsghdr align;
1007 } control6;
1008 union {
1009 char buf[CMSG_SPACE(sizeof(struct in_pktinfo))];
1010 struct cmsghdr align;
1011 } control4;
1012 struct msghdr hdr;
1013 struct iovec iov;
1014 char data = 'a';
1015 int domain;
1016 int fd = -1;
1017
1018 domain = addr->ss_family;
1019
1020 if (domain != AF_INET && domain != AF_INET6) {
1021 log_err("Unsupported address family");
1022 goto err;
1023 }
1024
1025 fd = socket(domain, SOCK_DGRAM, 0);
1026 if (fd == -1) {
1027 log_err("Failed to create client socket");
1028 goto err;
1029 }
1030
1031 memset(&iov, 0, sizeof(iov));
1032 iov.iov_base = &data;
1033 iov.iov_len = sizeof(data);
1034
1035 memset(&hdr, 0, sizeof(hdr));
1036 hdr.msg_name = (void *)addr;
1037 hdr.msg_namelen = addr_len;
1038 hdr.msg_iov = &iov;
1039 hdr.msg_iovlen = 1;
1040
1041 if (set_cmsg) {
1042 if (domain == AF_INET) {
1043 hdr.msg_control = &control4;
1044 hdr.msg_controllen = sizeof(control4.buf);
1045 } else if (domain == AF_INET6) {
1046 hdr.msg_control = &control6;
1047 hdr.msg_controllen = sizeof(control6.buf);
1048 }
1049 if (init_pktinfo(domain, CMSG_FIRSTHDR(&hdr))) {
1050 log_err("Fail to init pktinfo");
1051 goto err;
1052 }
1053 }
1054
1055 if (sendmsg(fd, &hdr, 0) != sizeof(data)) {
1056 log_err("Fail to send message to server");
1057 *syscall_err = errno;
1058 goto err;
1059 }
1060
1061 goto out;
399err: 1062err:
400 close(fd); 1063 close(fd);
401 return -1; 1064 fd = -1;
1065out:
1066 return fd;
402} 1067}
403 1068
404static void print_test_case_num(int domain, int type) 1069static int recvmsg_from_client(int sockfd, struct sockaddr_storage *src_addr)
405{ 1070{
406 static int test_num; 1071 struct timeval tv;
407 1072 struct msghdr hdr;
408 printf("Test case #%d (%s/%s):\n", ++test_num, 1073 struct iovec iov;
409 (domain == AF_INET ? "IPv4" : 1074 char data[64];
410 domain == AF_INET6 ? "IPv6" : 1075 fd_set rfds;
411 "unknown_domain"), 1076
412 (type == SOCK_STREAM ? "TCP" : 1077 FD_ZERO(&rfds);
413 type == SOCK_DGRAM ? "UDP" : 1078 FD_SET(sockfd, &rfds);
414 "unknown_type")); 1079
1080 tv.tv_sec = 2;
1081 tv.tv_usec = 0;
1082
1083 if (select(sockfd + 1, &rfds, NULL, NULL, &tv) <= 0 ||
1084 !FD_ISSET(sockfd, &rfds))
1085 return -1;
1086
1087 memset(&iov, 0, sizeof(iov));
1088 iov.iov_base = data;
1089 iov.iov_len = sizeof(data);
1090
1091 memset(&hdr, 0, sizeof(hdr));
1092 hdr.msg_name = src_addr;
1093 hdr.msg_namelen = sizeof(struct sockaddr_storage);
1094 hdr.msg_iov = &iov;
1095 hdr.msg_iovlen = 1;
1096
1097 return recvmsg(sockfd, &hdr, 0);
415} 1098}
416 1099
417static int run_test_case(int domain, int type, const char *ip, 1100static int init_addrs(const struct sock_addr_test *test,
418 unsigned short port) 1101 struct sockaddr_storage *requested_addr,
1102 struct sockaddr_storage *expected_addr,
1103 struct sockaddr_storage *expected_src_addr)
419{ 1104{
420 struct sockaddr_storage addr; 1105 socklen_t addr_len = sizeof(struct sockaddr_storage);
421 socklen_t addr_len = sizeof(addr); 1106
1107 if (mk_sockaddr(test->domain, test->expected_ip, test->expected_port,
1108 (struct sockaddr *)expected_addr, addr_len) == -1)
1109 goto err;
1110
1111 if (mk_sockaddr(test->domain, test->requested_ip, test->requested_port,
1112 (struct sockaddr *)requested_addr, addr_len) == -1)
1113 goto err;
1114
1115 if (test->expected_src_ip &&
1116 mk_sockaddr(test->domain, test->expected_src_ip, 0,
1117 (struct sockaddr *)expected_src_addr, addr_len) == -1)
1118 goto err;
1119
1120 return 0;
1121err:
1122 return -1;
1123}
1124
1125static int run_bind_test_case(const struct sock_addr_test *test)
1126{
1127 socklen_t addr_len = sizeof(struct sockaddr_storage);
1128 struct sockaddr_storage requested_addr;
1129 struct sockaddr_storage expected_addr;
1130 int clientfd = -1;
422 int servfd = -1; 1131 int servfd = -1;
423 int err = 0; 1132 int err = 0;
424 1133
425 print_test_case_num(domain, type); 1134 if (init_addrs(test, &requested_addr, &expected_addr, NULL))
426 1135 goto err;
427 if (mk_sockaddr(domain, ip, port, (struct sockaddr *)&addr,
428 addr_len) == -1)
429 return -1;
430 1136
431 printf("\tRequested: bind(%s, %d) ..\n", ip, port); 1137 servfd = start_server(test->type, &requested_addr, addr_len);
432 servfd = start_server(type, &addr, addr_len);
433 if (servfd == -1) 1138 if (servfd == -1)
434 goto err; 1139 goto err;
435 1140
436 printf("\tRequested: connect(%s, %d) from (*, *) ..\n", ip, port); 1141 if (cmp_local_addr(servfd, &expected_addr))
437 if (connect_to_server(type, &addr, addr_len)) 1142 goto err;
1143
1144 /* Try to connect to server just in case */
1145 clientfd = connect_to_server(test->type, &expected_addr, addr_len);
1146 if (clientfd == -1)
438 goto err; 1147 goto err;
439 1148
440 goto out; 1149 goto out;
441err: 1150err:
442 err = -1; 1151 err = -1;
443out: 1152out:
1153 close(clientfd);
444 close(servfd); 1154 close(servfd);
445 return err; 1155 return err;
446} 1156}
447 1157
448static void close_progs_fds(struct program *progs, size_t prog_cnt) 1158static int run_connect_test_case(const struct sock_addr_test *test)
449{ 1159{
450 size_t i; 1160 socklen_t addr_len = sizeof(struct sockaddr_storage);
1161 struct sockaddr_storage expected_src_addr;
1162 struct sockaddr_storage requested_addr;
1163 struct sockaddr_storage expected_addr;
1164 int clientfd = -1;
1165 int servfd = -1;
1166 int err = 0;
451 1167
452 for (i = 0; i < prog_cnt; ++i) { 1168 if (init_addrs(test, &requested_addr, &expected_addr,
453 close(progs[i].fd); 1169 &expected_src_addr))
454 progs[i].fd = -1; 1170 goto err;
455 } 1171
1172 /* Prepare server to connect to */
1173 servfd = start_server(test->type, &expected_addr, addr_len);
1174 if (servfd == -1)
1175 goto err;
1176
1177 clientfd = connect_to_server(test->type, &requested_addr, addr_len);
1178 if (clientfd == -1)
1179 goto err;
1180
1181 /* Make sure src and dst addrs were overridden properly */
1182 if (cmp_peer_addr(clientfd, &expected_addr))
1183 goto err;
1184
1185 if (cmp_local_ip(clientfd, &expected_src_addr))
1186 goto err;
1187
1188 goto out;
1189err:
1190 err = -1;
1191out:
1192 close(clientfd);
1193 close(servfd);
1194 return err;
456} 1195}
457 1196
458static int load_and_attach_progs(int cgfd, struct program *progs, 1197static int run_sendmsg_test_case(const struct sock_addr_test *test)
459 size_t prog_cnt)
460{ 1198{
461 size_t i; 1199 socklen_t addr_len = sizeof(struct sockaddr_storage);
462 1200 struct sockaddr_storage expected_src_addr;
463 for (i = 0; i < prog_cnt; ++i) { 1201 struct sockaddr_storage requested_addr;
464 printf("Load %s with invalid type (can pollute stderr) ", 1202 struct sockaddr_storage expected_addr;
465 progs[i].name); 1203 struct sockaddr_storage real_src_addr;
466 fflush(stdout); 1204 int clientfd = -1;
467 progs[i].fd = progs[i].loadfn(progs[i].invalid_type, NULL); 1205 int servfd = -1;
468 if (progs[i].fd != -1) { 1206 int set_cmsg;
469 log_err("Load with invalid type accepted for %s", 1207 int err = 0;
470 progs[i].name); 1208
471 goto err; 1209 if (test->type != SOCK_DGRAM)
472 } 1210 goto err;
473 printf("... REJECTED\n");
474 1211
475 printf("Load %s with valid type", progs[i].name); 1212 if (init_addrs(test, &requested_addr, &expected_addr,
476 progs[i].fd = progs[i].loadfn(progs[i].type, progs[i].name); 1213 &expected_src_addr))
477 if (progs[i].fd == -1) { 1214 goto err;
478 log_err("Failed to load program %s", progs[i].name); 1215
1216 /* Prepare server to sendmsg to */
1217 servfd = start_server(test->type, &expected_addr, addr_len);
1218 if (servfd == -1)
1219 goto err;
1220
1221 for (set_cmsg = 0; set_cmsg <= 1; ++set_cmsg) {
1222 if (clientfd >= 0)
1223 close(clientfd);
1224
1225 clientfd = sendmsg_to_server(&requested_addr, addr_len,
1226 set_cmsg, &err);
1227 if (err)
1228 goto out;
1229 else if (clientfd == -1)
479 goto err; 1230 goto err;
480 }
481 printf(" ... OK\n");
482 1231
483 printf("Attach %s with invalid type", progs[i].name); 1232 /* Try to receive message on server instead of using
484 if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].invalid_type, 1233 * getpeername(2) on client socket, to check that client's
485 BPF_F_ALLOW_OVERRIDE) != -1) { 1234 * destination address was rewritten properly, since
486 log_err("Attach with invalid type accepted for %s", 1235 * getpeername(2) doesn't work with unconnected datagram
487 progs[i].name); 1236 * sockets.
1237 *
1238 * Get source address from recvmsg(2) as well to make sure
1239 * source was rewritten properly: getsockname(2) can't be used
1240 * since socket is unconnected and source defined for one
1241 * specific packet may differ from the one used by default and
1242 * returned by getsockname(2).
1243 */
1244 if (recvmsg_from_client(servfd, &real_src_addr) == -1)
488 goto err; 1245 goto err;
489 }
490 printf(" ... REJECTED\n");
491 1246
492 printf("Attach %s with valid type", progs[i].name); 1247 if (cmp_addr(&real_src_addr, &expected_src_addr, /*cmp_port*/0))
493 if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].type,
494 BPF_F_ALLOW_OVERRIDE) == -1) {
495 log_err("Failed to attach program %s", progs[i].name);
496 goto err; 1248 goto err;
497 }
498 printf(" ... OK\n");
499 } 1249 }
500 1250
501 return 0; 1251 goto out;
502err: 1252err:
503 close_progs_fds(progs, prog_cnt); 1253 err = -1;
504 return -1; 1254out:
1255 close(clientfd);
1256 close(servfd);
1257 return err;
505} 1258}
506 1259
507static int run_domain_test(int domain, int cgfd, struct program *progs, 1260static int run_test_case(int cgfd, const struct sock_addr_test *test)
508 size_t prog_cnt, const char *ip, unsigned short port)
509{ 1261{
1262 int progfd = -1;
510 int err = 0; 1263 int err = 0;
511 1264
512 if (load_and_attach_progs(cgfd, progs, prog_cnt) == -1) 1265 printf("Test case: %s .. ", test->descr);
1266
1267 progfd = test->loadfn(test);
1268 if (test->expected_result == LOAD_REJECT && progfd < 0)
1269 goto out;
1270 else if (test->expected_result == LOAD_REJECT || progfd < 0)
1271 goto err;
1272
1273 err = bpf_prog_attach(progfd, cgfd, test->attach_type,
1274 BPF_F_ALLOW_OVERRIDE);
1275 if (test->expected_result == ATTACH_REJECT && err) {
1276 err = 0; /* error was expected, reset it */
1277 goto out;
1278 } else if (test->expected_result == ATTACH_REJECT || err) {
513 goto err; 1279 goto err;
1280 }
514 1281
515 if (run_test_case(domain, SOCK_STREAM, ip, port) == -1) 1282 switch (test->attach_type) {
1283 case BPF_CGROUP_INET4_BIND:
1284 case BPF_CGROUP_INET6_BIND:
1285 err = run_bind_test_case(test);
1286 break;
1287 case BPF_CGROUP_INET4_CONNECT:
1288 case BPF_CGROUP_INET6_CONNECT:
1289 err = run_connect_test_case(test);
1290 break;
1291 case BPF_CGROUP_UDP4_SENDMSG:
1292 case BPF_CGROUP_UDP6_SENDMSG:
1293 err = run_sendmsg_test_case(test);
1294 break;
1295 default:
516 goto err; 1296 goto err;
1297 }
1298
1299 if (test->expected_result == SYSCALL_EPERM && err == EPERM) {
1300 err = 0; /* error was expected, reset it */
1301 goto out;
1302 }
1303
1304 if (test->expected_result == SYSCALL_ENOTSUPP && err == ENOTSUPP) {
1305 err = 0; /* error was expected, reset it */
1306 goto out;
1307 }
517 1308
518 if (run_test_case(domain, SOCK_DGRAM, ip, port) == -1) 1309 if (err || test->expected_result != SUCCESS)
519 goto err; 1310 goto err;
520 1311
521 goto out; 1312 goto out;
522err: 1313err:
523 err = -1; 1314 err = -1;
524out: 1315out:
525 close_progs_fds(progs, prog_cnt); 1316 /* Detaching w/o checking return code: best effort attempt. */
1317 if (progfd != -1)
1318 bpf_prog_detach(cgfd, test->attach_type);
1319 close(progfd);
1320 printf("[%s]\n", err ? "FAIL" : "PASS");
526 return err; 1321 return err;
527} 1322}
528 1323
529static int run_test(void) 1324static int run_tests(int cgfd)
1325{
1326 int passes = 0;
1327 int fails = 0;
1328 int i;
1329
1330 for (i = 0; i < ARRAY_SIZE(tests); ++i) {
1331 if (run_test_case(cgfd, &tests[i]))
1332 ++fails;
1333 else
1334 ++passes;
1335 }
1336 printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
1337 return fails ? -1 : 0;
1338}
1339
1340int main(int argc, char **argv)
530{ 1341{
531 size_t inet6_prog_cnt;
532 size_t inet_prog_cnt;
533 int cgfd = -1; 1342 int cgfd = -1;
534 int err = 0; 1343 int err = 0;
535 1344
536 struct program inet6_progs[] = { 1345 if (argc < 2) {
537 {BPF_CGROUP_INET6_BIND, bind6_prog_load, -1, "bind6", 1346 fprintf(stderr,
538 BPF_CGROUP_INET4_BIND}, 1347 "%s has to be run via %s.sh. Skip direct run.\n",
539 {BPF_CGROUP_INET6_CONNECT, connect6_prog_load, -1, "connect6", 1348 argv[0], argv[0]);
540 BPF_CGROUP_INET4_CONNECT}, 1349 exit(err);
541 }; 1350 }
542 inet6_prog_cnt = sizeof(inet6_progs) / sizeof(struct program);
543
544 struct program inet_progs[] = {
545 {BPF_CGROUP_INET4_BIND, bind4_prog_load, -1, "bind4",
546 BPF_CGROUP_INET6_BIND},
547 {BPF_CGROUP_INET4_CONNECT, connect4_prog_load, -1, "connect4",
548 BPF_CGROUP_INET6_CONNECT},
549 };
550 inet_prog_cnt = sizeof(inet_progs) / sizeof(struct program);
551 1351
552 if (setup_cgroup_environment()) 1352 if (setup_cgroup_environment())
553 goto err; 1353 goto err;
@@ -559,12 +1359,7 @@ static int run_test(void)
559 if (join_cgroup(CG_PATH)) 1359 if (join_cgroup(CG_PATH))
560 goto err; 1360 goto err;
561 1361
562 if (run_domain_test(AF_INET, cgfd, inet_progs, inet_prog_cnt, SERV4_IP, 1362 if (run_tests(cgfd))
563 SERV4_PORT) == -1)
564 goto err;
565
566 if (run_domain_test(AF_INET6, cgfd, inet6_progs, inet6_prog_cnt,
567 SERV6_IP, SERV6_PORT) == -1)
568 goto err; 1363 goto err;
569 1364
570 goto out; 1365 goto out;
@@ -573,17 +1368,5 @@ err:
573out: 1368out:
574 close(cgfd); 1369 close(cgfd);
575 cleanup_cgroup_environment(); 1370 cleanup_cgroup_environment();
576 printf(err ? "### FAIL\n" : "### SUCCESS\n");
577 return err; 1371 return err;
578} 1372}
579
580int main(int argc, char **argv)
581{
582 if (argc < 2) {
583 fprintf(stderr,
584 "%s has to be run via %s.sh. Skip direct run.\n",
585 argv[0], argv[0]);
586 exit(0);
587 }
588 return run_test();
589}
diff --git a/tools/testing/selftests/bpf/test_sockhash_kern.c b/tools/testing/selftests/bpf/test_sockhash_kern.c
new file mode 100644
index 000000000000..e6755916442a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockhash_kern.c
@@ -0,0 +1,5 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
3#undef SOCKMAP
4#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKHASH
5#include "./test_sockmap_kern.h"
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
new file mode 100644
index 000000000000..05c8cb71724a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -0,0 +1,1524 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
3#include <stdio.h>
4#include <stdlib.h>
5#include <sys/socket.h>
6#include <sys/ioctl.h>
7#include <sys/select.h>
8#include <netinet/in.h>
9#include <arpa/inet.h>
10#include <unistd.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/ioctl.h>
14#include <stdbool.h>
15#include <signal.h>
16#include <fcntl.h>
17#include <sys/wait.h>
18#include <time.h>
19#include <sched.h>
20
21#include <sys/time.h>
22#include <sys/resource.h>
23#include <sys/types.h>
24#include <sys/sendfile.h>
25
26#include <linux/netlink.h>
27#include <linux/socket.h>
28#include <linux/sock_diag.h>
29#include <linux/bpf.h>
30#include <linux/if_link.h>
31#include <assert.h>
32#include <libgen.h>
33
34#include <getopt.h>
35
36#include <bpf/bpf.h>
37#include <bpf/libbpf.h>
38
39#include "bpf_util.h"
40#include "bpf_rlimit.h"
41#include "cgroup_helpers.h"
42
43int running;
44static void running_handler(int a);
45
46/* randomly selected ports for testing on lo */
47#define S1_PORT 10000
48#define S2_PORT 10001
49
50#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
51#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
52#define CG_PATH "/sockmap"
53
54/* global sockets */
55int s1, s2, c1, c2, p1, p2;
56int test_cnt;
57int passed;
58int failed;
59int map_fd[8];
60struct bpf_map *maps[8];
61int prog_fd[11];
62
63int txmsg_pass;
64int txmsg_noisy;
65int txmsg_redir;
66int txmsg_redir_noisy;
67int txmsg_drop;
68int txmsg_apply;
69int txmsg_cork;
70int txmsg_start;
71int txmsg_end;
72int txmsg_ingress;
73int txmsg_skb;
74
75static const struct option long_options[] = {
76 {"help", no_argument, NULL, 'h' },
77 {"cgroup", required_argument, NULL, 'c' },
78 {"rate", required_argument, NULL, 'r' },
79 {"verbose", no_argument, NULL, 'v' },
80 {"iov_count", required_argument, NULL, 'i' },
81 {"length", required_argument, NULL, 'l' },
82 {"test", required_argument, NULL, 't' },
83 {"data_test", no_argument, NULL, 'd' },
84 {"txmsg", no_argument, &txmsg_pass, 1 },
85 {"txmsg_noisy", no_argument, &txmsg_noisy, 1 },
86 {"txmsg_redir", no_argument, &txmsg_redir, 1 },
87 {"txmsg_redir_noisy", no_argument, &txmsg_redir_noisy, 1},
88 {"txmsg_drop", no_argument, &txmsg_drop, 1 },
89 {"txmsg_apply", required_argument, NULL, 'a'},
90 {"txmsg_cork", required_argument, NULL, 'k'},
91 {"txmsg_start", required_argument, NULL, 's'},
92 {"txmsg_end", required_argument, NULL, 'e'},
93 {"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
94 {"txmsg_skb", no_argument, &txmsg_skb, 1 },
95 {0, 0, NULL, 0 }
96};
97
98static void usage(char *argv[])
99{
100 int i;
101
102 printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
103 printf(" options:\n");
104 for (i = 0; long_options[i].name != 0; i++) {
105 printf(" --%-12s", long_options[i].name);
106 if (long_options[i].flag != NULL)
107 printf(" flag (internal value:%d)\n",
108 *long_options[i].flag);
109 else
110 printf(" -%c\n", long_options[i].val);
111 }
112 printf("\n");
113}
114
115static int sockmap_init_sockets(int verbose)
116{
117 int i, err, one = 1;
118 struct sockaddr_in addr;
119 int *fds[4] = {&s1, &s2, &c1, &c2};
120
121 s1 = s2 = p1 = p2 = c1 = c2 = 0;
122
123 /* Init sockets */
124 for (i = 0; i < 4; i++) {
125 *fds[i] = socket(AF_INET, SOCK_STREAM, 0);
126 if (*fds[i] < 0) {
127 perror("socket s1 failed()");
128 return errno;
129 }
130 }
131
132 /* Allow reuse */
133 for (i = 0; i < 2; i++) {
134 err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
135 (char *)&one, sizeof(one));
136 if (err) {
137 perror("setsockopt failed()");
138 return errno;
139 }
140 }
141
142 /* Non-blocking sockets */
143 for (i = 0; i < 2; i++) {
144 err = ioctl(*fds[i], FIONBIO, (char *)&one);
145 if (err < 0) {
146 perror("ioctl s1 failed()");
147 return errno;
148 }
149 }
150
151 /* Bind server sockets */
152 memset(&addr, 0, sizeof(struct sockaddr_in));
153 addr.sin_family = AF_INET;
154 addr.sin_addr.s_addr = inet_addr("127.0.0.1");
155
156 addr.sin_port = htons(S1_PORT);
157 err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
158 if (err < 0) {
159 perror("bind s1 failed()\n");
160 return errno;
161 }
162
163 addr.sin_port = htons(S2_PORT);
164 err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
165 if (err < 0) {
166 perror("bind s2 failed()\n");
167 return errno;
168 }
169
170 /* Listen server sockets */
171 addr.sin_port = htons(S1_PORT);
172 err = listen(s1, 32);
173 if (err < 0) {
174 perror("listen s1 failed()\n");
175 return errno;
176 }
177
178 addr.sin_port = htons(S2_PORT);
179 err = listen(s2, 32);
180 if (err < 0) {
181 perror("listen s1 failed()\n");
182 return errno;
183 }
184
185 /* Initiate Connect */
186 addr.sin_port = htons(S1_PORT);
187 err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
188 if (err < 0 && errno != EINPROGRESS) {
189 perror("connect c1 failed()\n");
190 return errno;
191 }
192
193 addr.sin_port = htons(S2_PORT);
194 err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
195 if (err < 0 && errno != EINPROGRESS) {
196 perror("connect c2 failed()\n");
197 return errno;
198 } else if (err < 0) {
199 err = 0;
200 }
201
202 /* Accept Connecrtions */
203 p1 = accept(s1, NULL, NULL);
204 if (p1 < 0) {
205 perror("accept s1 failed()\n");
206 return errno;
207 }
208
209 p2 = accept(s2, NULL, NULL);
210 if (p2 < 0) {
211 perror("accept s1 failed()\n");
212 return errno;
213 }
214
215 if (verbose) {
216 printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
217 printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
218 c1, s1, c2, s2);
219 }
220 return 0;
221}
222
223struct msg_stats {
224 size_t bytes_sent;
225 size_t bytes_recvd;
226 struct timespec start;
227 struct timespec end;
228};
229
230struct sockmap_options {
231 int verbose;
232 bool base;
233 bool sendpage;
234 bool data_test;
235 bool drop_expected;
236 int iov_count;
237 int iov_length;
238 int rate;
239};
240
241static int msg_loop_sendpage(int fd, int iov_length, int cnt,
242 struct msg_stats *s,
243 struct sockmap_options *opt)
244{
245 bool drop = opt->drop_expected;
246 unsigned char k = 0;
247 FILE *file;
248 int i, fp;
249
250 file = fopen(".sendpage_tst.tmp", "w+");
251 for (i = 0; i < iov_length * cnt; i++, k++)
252 fwrite(&k, sizeof(char), 1, file);
253 fflush(file);
254 fseek(file, 0, SEEK_SET);
255 fclose(file);
256
257 fp = open(".sendpage_tst.tmp", O_RDONLY);
258 clock_gettime(CLOCK_MONOTONIC, &s->start);
259 for (i = 0; i < cnt; i++) {
260 int sent = sendfile(fd, fp, NULL, iov_length);
261
262 if (!drop && sent < 0) {
263 perror("send loop error:");
264 close(fp);
265 return sent;
266 } else if (drop && sent >= 0) {
267 printf("sendpage loop error expected: %i\n", sent);
268 close(fp);
269 return -EIO;
270 }
271
272 if (sent > 0)
273 s->bytes_sent += sent;
274 }
275 clock_gettime(CLOCK_MONOTONIC, &s->end);
276 close(fp);
277 return 0;
278}
279
280static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
281 struct msg_stats *s, bool tx,
282 struct sockmap_options *opt)
283{
284 struct msghdr msg = {0};
285 int err, i, flags = MSG_NOSIGNAL;
286 struct iovec *iov;
287 unsigned char k;
288 bool data_test = opt->data_test;
289 bool drop = opt->drop_expected;
290
291 iov = calloc(iov_count, sizeof(struct iovec));
292 if (!iov)
293 return errno;
294
295 k = 0;
296 for (i = 0; i < iov_count; i++) {
297 unsigned char *d = calloc(iov_length, sizeof(char));
298
299 if (!d) {
300 fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
301 goto out_errno;
302 }
303 iov[i].iov_base = d;
304 iov[i].iov_len = iov_length;
305
306 if (data_test && tx) {
307 int j;
308
309 for (j = 0; j < iov_length; j++)
310 d[j] = k++;
311 }
312 }
313
314 msg.msg_iov = iov;
315 msg.msg_iovlen = iov_count;
316 k = 0;
317
318 if (tx) {
319 clock_gettime(CLOCK_MONOTONIC, &s->start);
320 for (i = 0; i < cnt; i++) {
321 int sent = sendmsg(fd, &msg, flags);
322
323 if (!drop && sent < 0) {
324 perror("send loop error:");
325 goto out_errno;
326 } else if (drop && sent >= 0) {
327 printf("send loop error expected: %i\n", sent);
328 errno = -EIO;
329 goto out_errno;
330 }
331 if (sent > 0)
332 s->bytes_sent += sent;
333 }
334 clock_gettime(CLOCK_MONOTONIC, &s->end);
335 } else {
336 int slct, recv, max_fd = fd;
337 int fd_flags = O_NONBLOCK;
338 struct timeval timeout;
339 float total_bytes;
340 int bytes_cnt = 0;
341 int chunk_sz;
342 fd_set w;
343
344 if (opt->sendpage)
345 chunk_sz = iov_length * cnt;
346 else
347 chunk_sz = iov_length * iov_count;
348
349 fcntl(fd, fd_flags);
350 total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
351 err = clock_gettime(CLOCK_MONOTONIC, &s->start);
352 if (err < 0)
353 perror("recv start time: ");
354 while (s->bytes_recvd < total_bytes) {
355 if (txmsg_cork) {
356 timeout.tv_sec = 0;
357 timeout.tv_usec = 1000;
358 } else {
359 timeout.tv_sec = 1;
360 timeout.tv_usec = 0;
361 }
362
363 /* FD sets */
364 FD_ZERO(&w);
365 FD_SET(fd, &w);
366
367 slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
368 if (slct == -1) {
369 perror("select()");
370 clock_gettime(CLOCK_MONOTONIC, &s->end);
371 goto out_errno;
372 } else if (!slct) {
373 if (opt->verbose)
374 fprintf(stderr, "unexpected timeout\n");
375 errno = -EIO;
376 clock_gettime(CLOCK_MONOTONIC, &s->end);
377 goto out_errno;
378 }
379
380 recv = recvmsg(fd, &msg, flags);
381 if (recv < 0) {
382 if (errno != EWOULDBLOCK) {
383 clock_gettime(CLOCK_MONOTONIC, &s->end);
384 perror("recv failed()\n");
385 goto out_errno;
386 }
387 }
388
389 s->bytes_recvd += recv;
390
391 if (data_test) {
392 int j;
393
394 for (i = 0; i < msg.msg_iovlen; i++) {
395 unsigned char *d = iov[i].iov_base;
396
397 for (j = 0;
398 j < iov[i].iov_len && recv; j++) {
399 if (d[j] != k++) {
400 errno = -EIO;
401 fprintf(stderr,
402 "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
403 i, j, d[j], k - 1, d[j+1], k);
404 goto out_errno;
405 }
406 bytes_cnt++;
407 if (bytes_cnt == chunk_sz) {
408 k = 0;
409 bytes_cnt = 0;
410 }
411 recv--;
412 }
413 }
414 }
415 }
416 clock_gettime(CLOCK_MONOTONIC, &s->end);
417 }
418
419 for (i = 0; i < iov_count; i++)
420 free(iov[i].iov_base);
421 free(iov);
422 return 0;
423out_errno:
424 for (i = 0; i < iov_count; i++)
425 free(iov[i].iov_base);
426 free(iov);
427 return errno;
428}
429
430static float giga = 1000000000;
431
432static inline float sentBps(struct msg_stats s)
433{
434 return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
435}
436
437static inline float recvdBps(struct msg_stats s)
438{
439 return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
440}
441
442static int sendmsg_test(struct sockmap_options *opt)
443{
444 float sent_Bps = 0, recvd_Bps = 0;
445 int rx_fd, txpid, rxpid, err = 0;
446 struct msg_stats s = {0};
447 int iov_count = opt->iov_count;
448 int iov_buf = opt->iov_length;
449 int rx_status, tx_status;
450 int cnt = opt->rate;
451
452 errno = 0;
453
454 if (opt->base)
455 rx_fd = p1;
456 else
457 rx_fd = p2;
458
459 rxpid = fork();
460 if (rxpid == 0) {
461 if (opt->drop_expected)
462 exit(0);
463
464 if (opt->sendpage)
465 iov_count = 1;
466 err = msg_loop(rx_fd, iov_count, iov_buf,
467 cnt, &s, false, opt);
468 if (err && opt->verbose)
469 fprintf(stderr,
470 "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
471 iov_count, iov_buf, cnt, err);
472 shutdown(p2, SHUT_RDWR);
473 shutdown(p1, SHUT_RDWR);
474 if (s.end.tv_sec - s.start.tv_sec) {
475 sent_Bps = sentBps(s);
476 recvd_Bps = recvdBps(s);
477 }
478 if (opt->verbose)
479 fprintf(stdout,
480 "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
481 s.bytes_sent, sent_Bps, sent_Bps/giga,
482 s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
483 if (err && txmsg_cork)
484 err = 0;
485 exit(err ? 1 : 0);
486 } else if (rxpid == -1) {
487 perror("msg_loop_rx: ");
488 return errno;
489 }
490
491 txpid = fork();
492 if (txpid == 0) {
493 if (opt->sendpage)
494 err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
495 else
496 err = msg_loop(c1, iov_count, iov_buf,
497 cnt, &s, true, opt);
498
499 if (err)
500 fprintf(stderr,
501 "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
502 iov_count, iov_buf, cnt, err);
503 shutdown(c1, SHUT_RDWR);
504 if (s.end.tv_sec - s.start.tv_sec) {
505 sent_Bps = sentBps(s);
506 recvd_Bps = recvdBps(s);
507 }
508 if (opt->verbose)
509 fprintf(stdout,
510 "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
511 s.bytes_sent, sent_Bps, sent_Bps/giga,
512 s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
513 exit(err ? 1 : 0);
514 } else if (txpid == -1) {
515 perror("msg_loop_tx: ");
516 return errno;
517 }
518
519 assert(waitpid(rxpid, &rx_status, 0) == rxpid);
520 assert(waitpid(txpid, &tx_status, 0) == txpid);
521 if (WIFEXITED(rx_status)) {
522 err = WEXITSTATUS(rx_status);
523 if (err) {
524 fprintf(stderr, "rx thread exited with err %d. ", err);
525 goto out;
526 }
527 }
528 if (WIFEXITED(tx_status)) {
529 err = WEXITSTATUS(tx_status);
530 if (err)
531 fprintf(stderr, "tx thread exited with err %d. ", err);
532 }
533out:
534 return err;
535}
536
537static int forever_ping_pong(int rate, struct sockmap_options *opt)
538{
539 struct timeval timeout;
540 char buf[1024] = {0};
541 int sc;
542
543 timeout.tv_sec = 10;
544 timeout.tv_usec = 0;
545
546 /* Ping/Pong data from client to server */
547 sc = send(c1, buf, sizeof(buf), 0);
548 if (sc < 0) {
549 perror("send failed()\n");
550 return sc;
551 }
552
553 do {
554 int s, rc, i, max_fd = p2;
555 fd_set w;
556
557 /* FD sets */
558 FD_ZERO(&w);
559 FD_SET(c1, &w);
560 FD_SET(c2, &w);
561 FD_SET(p1, &w);
562 FD_SET(p2, &w);
563
564 s = select(max_fd + 1, &w, NULL, NULL, &timeout);
565 if (s == -1) {
566 perror("select()");
567 break;
568 } else if (!s) {
569 fprintf(stderr, "unexpected timeout\n");
570 break;
571 }
572
573 for (i = 0; i <= max_fd && s > 0; ++i) {
574 if (!FD_ISSET(i, &w))
575 continue;
576
577 s--;
578
579 rc = recv(i, buf, sizeof(buf), 0);
580 if (rc < 0) {
581 if (errno != EWOULDBLOCK) {
582 perror("recv failed()\n");
583 return rc;
584 }
585 }
586
587 if (rc == 0) {
588 close(i);
589 break;
590 }
591
592 sc = send(i, buf, rc, 0);
593 if (sc < 0) {
594 perror("send failed()\n");
595 return sc;
596 }
597 }
598
599 if (rate)
600 sleep(rate);
601
602 if (opt->verbose) {
603 printf(".");
604 fflush(stdout);
605
606 }
607 } while (running);
608
609 return 0;
610}
611
612enum {
613 PING_PONG,
614 SENDMSG,
615 BASE,
616 BASE_SENDPAGE,
617 SENDPAGE,
618};
619
620static int run_options(struct sockmap_options *options, int cg_fd, int test)
621{
622 int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
623
624 /* If base test skip BPF setup */
625 if (test == BASE || test == BASE_SENDPAGE)
626 goto run;
627
628 /* Attach programs to sockmap */
629 err = bpf_prog_attach(prog_fd[0], map_fd[0],
630 BPF_SK_SKB_STREAM_PARSER, 0);
631 if (err) {
632 fprintf(stderr,
633 "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
634 prog_fd[0], map_fd[0], err, strerror(errno));
635 return err;
636 }
637
638 err = bpf_prog_attach(prog_fd[1], map_fd[0],
639 BPF_SK_SKB_STREAM_VERDICT, 0);
640 if (err) {
641 fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
642 err, strerror(errno));
643 return err;
644 }
645
646 /* Attach to cgroups */
647 err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
648 if (err) {
649 fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
650 err, strerror(errno));
651 return err;
652 }
653
654run:
655 err = sockmap_init_sockets(options->verbose);
656 if (err) {
657 fprintf(stderr, "ERROR: test socket failed: %d\n", err);
658 goto out;
659 }
660
661 /* Attach txmsg program to sockmap */
662 if (txmsg_pass)
663 tx_prog_fd = prog_fd[3];
664 else if (txmsg_noisy)
665 tx_prog_fd = prog_fd[4];
666 else if (txmsg_redir)
667 tx_prog_fd = prog_fd[5];
668 else if (txmsg_redir_noisy)
669 tx_prog_fd = prog_fd[6];
670 else if (txmsg_drop)
671 tx_prog_fd = prog_fd[9];
672 /* apply and cork must be last */
673 else if (txmsg_apply)
674 tx_prog_fd = prog_fd[7];
675 else if (txmsg_cork)
676 tx_prog_fd = prog_fd[8];
677 else
678 tx_prog_fd = 0;
679
680 if (tx_prog_fd) {
681 int redir_fd, i = 0;
682
683 err = bpf_prog_attach(tx_prog_fd,
684 map_fd[1], BPF_SK_MSG_VERDICT, 0);
685 if (err) {
686 fprintf(stderr,
687 "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
688 err, strerror(errno));
689 goto out;
690 }
691
692 err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
693 if (err) {
694 fprintf(stderr,
695 "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
696 err, strerror(errno));
697 goto out;
698 }
699
700 if (txmsg_redir || txmsg_redir_noisy)
701 redir_fd = c2;
702 else
703 redir_fd = c1;
704
705 err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
706 if (err) {
707 fprintf(stderr,
708 "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
709 err, strerror(errno));
710 goto out;
711 }
712
713 if (txmsg_apply) {
714 err = bpf_map_update_elem(map_fd[3],
715 &i, &txmsg_apply, BPF_ANY);
716 if (err) {
717 fprintf(stderr,
718 "ERROR: bpf_map_update_elem (apply_bytes): %d (%s\n",
719 err, strerror(errno));
720 goto out;
721 }
722 }
723
724 if (txmsg_cork) {
725 err = bpf_map_update_elem(map_fd[4],
726 &i, &txmsg_cork, BPF_ANY);
727 if (err) {
728 fprintf(stderr,
729 "ERROR: bpf_map_update_elem (cork_bytes): %d (%s\n",
730 err, strerror(errno));
731 goto out;
732 }
733 }
734
735 if (txmsg_start) {
736 err = bpf_map_update_elem(map_fd[5],
737 &i, &txmsg_start, BPF_ANY);
738 if (err) {
739 fprintf(stderr,
740 "ERROR: bpf_map_update_elem (txmsg_start): %d (%s)\n",
741 err, strerror(errno));
742 goto out;
743 }
744 }
745
746 if (txmsg_end) {
747 i = 1;
748 err = bpf_map_update_elem(map_fd[5],
749 &i, &txmsg_end, BPF_ANY);
750 if (err) {
751 fprintf(stderr,
752 "ERROR: bpf_map_update_elem (txmsg_end): %d (%s)\n",
753 err, strerror(errno));
754 goto out;
755 }
756 }
757
758 if (txmsg_ingress) {
759 int in = BPF_F_INGRESS;
760
761 i = 0;
762 err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
763 if (err) {
764 fprintf(stderr,
765 "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
766 err, strerror(errno));
767 }
768 i = 1;
769 err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
770 if (err) {
771 fprintf(stderr,
772 "ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
773 err, strerror(errno));
774 }
775 err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
776 if (err) {
777 fprintf(stderr,
778 "ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
779 err, strerror(errno));
780 }
781
782 i = 2;
783 err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
784 if (err) {
785 fprintf(stderr,
786 "ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
787 err, strerror(errno));
788 }
789 }
790
791 if (txmsg_skb) {
792 int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
793 p2 : p1;
794 int ingress = BPF_F_INGRESS;
795
796 i = 0;
797 err = bpf_map_update_elem(map_fd[7],
798 &i, &ingress, BPF_ANY);
799 if (err) {
800 fprintf(stderr,
801 "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
802 err, strerror(errno));
803 }
804
805 i = 3;
806 err = bpf_map_update_elem(map_fd[0],
807 &i, &skb_fd, BPF_ANY);
808 if (err) {
809 fprintf(stderr,
810 "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
811 err, strerror(errno));
812 }
813 }
814 }
815
816 if (txmsg_drop)
817 options->drop_expected = true;
818
819 if (test == PING_PONG)
820 err = forever_ping_pong(options->rate, options);
821 else if (test == SENDMSG) {
822 options->base = false;
823 options->sendpage = false;
824 err = sendmsg_test(options);
825 } else if (test == SENDPAGE) {
826 options->base = false;
827 options->sendpage = true;
828 err = sendmsg_test(options);
829 } else if (test == BASE) {
830 options->base = true;
831 options->sendpage = false;
832 err = sendmsg_test(options);
833 } else if (test == BASE_SENDPAGE) {
834 options->base = true;
835 options->sendpage = true;
836 err = sendmsg_test(options);
837 } else
838 fprintf(stderr, "unknown test\n");
839out:
840 /* Detatch and zero all the maps */
841 bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
842 bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
843 bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
844 if (tx_prog_fd >= 0)
845 bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
846
847 for (i = 0; i < 8; i++) {
848 key = next_key = 0;
849 bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
850 while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
851 bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
852 key = next_key;
853 }
854 }
855
856 close(s1);
857 close(s2);
858 close(p1);
859 close(p2);
860 close(c1);
861 close(c2);
862 return err;
863}
864
865static char *test_to_str(int test)
866{
867 switch (test) {
868 case SENDMSG:
869 return "sendmsg";
870 case SENDPAGE:
871 return "sendpage";
872 }
873 return "unknown";
874}
875
876#define OPTSTRING 60
877static void test_options(char *options)
878{
879 char tstr[OPTSTRING];
880
881 memset(options, 0, OPTSTRING);
882
883 if (txmsg_pass)
884 strncat(options, "pass,", OPTSTRING);
885 if (txmsg_noisy)
886 strncat(options, "pass_noisy,", OPTSTRING);
887 if (txmsg_redir)
888 strncat(options, "redir,", OPTSTRING);
889 if (txmsg_redir_noisy)
890 strncat(options, "redir_noisy,", OPTSTRING);
891 if (txmsg_drop)
892 strncat(options, "drop,", OPTSTRING);
893 if (txmsg_apply) {
894 snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
895 strncat(options, tstr, OPTSTRING);
896 }
897 if (txmsg_cork) {
898 snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
899 strncat(options, tstr, OPTSTRING);
900 }
901 if (txmsg_start) {
902 snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
903 strncat(options, tstr, OPTSTRING);
904 }
905 if (txmsg_end) {
906 snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
907 strncat(options, tstr, OPTSTRING);
908 }
909 if (txmsg_ingress)
910 strncat(options, "ingress,", OPTSTRING);
911 if (txmsg_skb)
912 strncat(options, "skb,", OPTSTRING);
913}
914
915static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
916{
917 char *options = calloc(OPTSTRING, sizeof(char));
918 int err;
919
920 if (test == SENDPAGE)
921 opt->sendpage = true;
922 else
923 opt->sendpage = false;
924
925 if (txmsg_drop)
926 opt->drop_expected = true;
927 else
928 opt->drop_expected = false;
929
930 test_options(options);
931
932 fprintf(stdout,
933 "[TEST %i]: (%i, %i, %i, %s, %s): ",
934 test_cnt, opt->rate, opt->iov_count, opt->iov_length,
935 test_to_str(test), options);
936 fflush(stdout);
937 err = run_options(opt, cgrp, test);
938 fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
939 test_cnt++;
940 !err ? passed++ : failed++;
941 free(options);
942 return err;
943}
944
945static int test_exec(int cgrp, struct sockmap_options *opt)
946{
947 int err = __test_exec(cgrp, SENDMSG, opt);
948
949 if (err)
950 goto out;
951
952 err = __test_exec(cgrp, SENDPAGE, opt);
953out:
954 return err;
955}
956
957static int test_loop(int cgrp)
958{
959 struct sockmap_options opt;
960
961 int err, i, l, r;
962
963 opt.verbose = 0;
964 opt.base = false;
965 opt.sendpage = false;
966 opt.data_test = false;
967 opt.drop_expected = false;
968 opt.iov_count = 0;
969 opt.iov_length = 0;
970 opt.rate = 0;
971
972 r = 1;
973 for (i = 1; i < 100; i += 33) {
974 for (l = 1; l < 100; l += 33) {
975 opt.rate = r;
976 opt.iov_count = i;
977 opt.iov_length = l;
978 err = test_exec(cgrp, &opt);
979 if (err)
980 goto out;
981 }
982 }
983 sched_yield();
984out:
985 return err;
986}
987
988static int test_txmsg(int cgrp)
989{
990 int err;
991
992 txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
993 txmsg_apply = txmsg_cork = 0;
994 txmsg_ingress = txmsg_skb = 0;
995
996 txmsg_pass = 1;
997 err = test_loop(cgrp);
998 txmsg_pass = 0;
999 if (err)
1000 goto out;
1001
1002 txmsg_redir = 1;
1003 err = test_loop(cgrp);
1004 txmsg_redir = 0;
1005 if (err)
1006 goto out;
1007
1008 txmsg_drop = 1;
1009 err = test_loop(cgrp);
1010 txmsg_drop = 0;
1011 if (err)
1012 goto out;
1013
1014 txmsg_redir = 1;
1015 txmsg_ingress = 1;
1016 err = test_loop(cgrp);
1017 txmsg_redir = 0;
1018 txmsg_ingress = 0;
1019 if (err)
1020 goto out;
1021out:
1022 txmsg_pass = 0;
1023 txmsg_redir = 0;
1024 txmsg_drop = 0;
1025 return err;
1026}
1027
1028static int test_send(struct sockmap_options *opt, int cgrp)
1029{
1030 int err;
1031
1032 opt->iov_length = 1;
1033 opt->iov_count = 1;
1034 opt->rate = 1;
1035 err = test_exec(cgrp, opt);
1036 if (err)
1037 goto out;
1038
1039 opt->iov_length = 1;
1040 opt->iov_count = 1024;
1041 opt->rate = 1;
1042 err = test_exec(cgrp, opt);
1043 if (err)
1044 goto out;
1045
1046 opt->iov_length = 1024;
1047 opt->iov_count = 1;
1048 opt->rate = 1;
1049 err = test_exec(cgrp, opt);
1050 if (err)
1051 goto out;
1052
1053 opt->iov_length = 1;
1054 opt->iov_count = 1;
1055 opt->rate = 512;
1056 err = test_exec(cgrp, opt);
1057 if (err)
1058 goto out;
1059
1060 opt->iov_length = 256;
1061 opt->iov_count = 1024;
1062 opt->rate = 2;
1063 err = test_exec(cgrp, opt);
1064 if (err)
1065 goto out;
1066
1067 opt->rate = 100;
1068 opt->iov_count = 1;
1069 opt->iov_length = 5;
1070 err = test_exec(cgrp, opt);
1071 if (err)
1072 goto out;
1073out:
1074 sched_yield();
1075 return err;
1076}
1077
1078static int test_mixed(int cgrp)
1079{
1080 struct sockmap_options opt = {0};
1081 int err;
1082
1083 txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
1084 txmsg_apply = txmsg_cork = 0;
1085 txmsg_start = txmsg_end = 0;
1086 /* Test small and large iov_count values with pass/redir/apply/cork */
1087 txmsg_pass = 1;
1088 txmsg_redir = 0;
1089 txmsg_apply = 1;
1090 txmsg_cork = 0;
1091 err = test_send(&opt, cgrp);
1092 if (err)
1093 goto out;
1094
1095 txmsg_pass = 1;
1096 txmsg_redir = 0;
1097 txmsg_apply = 0;
1098 txmsg_cork = 1;
1099 err = test_send(&opt, cgrp);
1100 if (err)
1101 goto out;
1102
1103 txmsg_pass = 1;
1104 txmsg_redir = 0;
1105 txmsg_apply = 1;
1106 txmsg_cork = 1;
1107 err = test_send(&opt, cgrp);
1108 if (err)
1109 goto out;
1110
1111 txmsg_pass = 1;
1112 txmsg_redir = 0;
1113 txmsg_apply = 1024;
1114 txmsg_cork = 0;
1115 err = test_send(&opt, cgrp);
1116 if (err)
1117 goto out;
1118
1119 txmsg_pass = 1;
1120 txmsg_redir = 0;
1121 txmsg_apply = 0;
1122 txmsg_cork = 1024;
1123 err = test_send(&opt, cgrp);
1124 if (err)
1125 goto out;
1126
1127 txmsg_pass = 1;
1128 txmsg_redir = 0;
1129 txmsg_apply = 1024;
1130 txmsg_cork = 1024;
1131 err = test_send(&opt, cgrp);
1132 if (err)
1133 goto out;
1134
1135 txmsg_pass = 1;
1136 txmsg_redir = 0;
1137 txmsg_cork = 4096;
1138 txmsg_apply = 4096;
1139 err = test_send(&opt, cgrp);
1140 if (err)
1141 goto out;
1142
1143 txmsg_pass = 0;
1144 txmsg_redir = 1;
1145 txmsg_apply = 1;
1146 txmsg_cork = 0;
1147 err = test_send(&opt, cgrp);
1148 if (err)
1149 goto out;
1150
1151 txmsg_pass = 0;
1152 txmsg_redir = 1;
1153 txmsg_apply = 0;
1154 txmsg_cork = 1;
1155 err = test_send(&opt, cgrp);
1156 if (err)
1157 goto out;
1158
1159 txmsg_pass = 0;
1160 txmsg_redir = 1;
1161 txmsg_apply = 1024;
1162 txmsg_cork = 0;
1163 err = test_send(&opt, cgrp);
1164 if (err)
1165 goto out;
1166
1167 txmsg_pass = 0;
1168 txmsg_redir = 1;
1169 txmsg_apply = 0;
1170 txmsg_cork = 1024;
1171 err = test_send(&opt, cgrp);
1172 if (err)
1173 goto out;
1174
1175 txmsg_pass = 0;
1176 txmsg_redir = 1;
1177 txmsg_apply = 1024;
1178 txmsg_cork = 1024;
1179 err = test_send(&opt, cgrp);
1180 if (err)
1181 goto out;
1182
1183 txmsg_pass = 0;
1184 txmsg_redir = 1;
1185 txmsg_cork = 4096;
1186 txmsg_apply = 4096;
1187 err = test_send(&opt, cgrp);
1188 if (err)
1189 goto out;
1190out:
1191 return err;
1192}
1193
1194static int test_start_end(int cgrp)
1195{
1196 struct sockmap_options opt = {0};
1197 int err, i;
1198
1199 /* Test basic start/end with lots of iov_count and iov_lengths */
1200 txmsg_start = 1;
1201 txmsg_end = 2;
1202 err = test_txmsg(cgrp);
1203 if (err)
1204 goto out;
1205
1206 /* Test start/end with cork */
1207 opt.rate = 16;
1208 opt.iov_count = 1;
1209 opt.iov_length = 100;
1210 txmsg_cork = 1600;
1211
1212 for (i = 99; i <= 1600; i += 500) {
1213 txmsg_start = 0;
1214 txmsg_end = i;
1215 err = test_exec(cgrp, &opt);
1216 if (err)
1217 goto out;
1218 }
1219
1220 /* Test start/end with cork but pull data in middle */
1221 for (i = 199; i <= 1600; i += 500) {
1222 txmsg_start = 100;
1223 txmsg_end = i;
1224 err = test_exec(cgrp, &opt);
1225 if (err)
1226 goto out;
1227 }
1228
1229 /* Test start/end with cork pulling last sg entry */
1230 txmsg_start = 1500;
1231 txmsg_end = 1600;
1232 err = test_exec(cgrp, &opt);
1233 if (err)
1234 goto out;
1235
1236 /* Test start/end pull of single byte in last page */
1237 txmsg_start = 1111;
1238 txmsg_end = 1112;
1239 err = test_exec(cgrp, &opt);
1240 if (err)
1241 goto out;
1242
1243 /* Test start/end with end < start */
1244 txmsg_start = 1111;
1245 txmsg_end = 0;
1246 err = test_exec(cgrp, &opt);
1247 if (err)
1248 goto out;
1249
1250 /* Test start/end with end > data */
1251 txmsg_start = 0;
1252 txmsg_end = 1601;
1253 err = test_exec(cgrp, &opt);
1254 if (err)
1255 goto out;
1256
1257 /* Test start/end with start > data */
1258 txmsg_start = 1601;
1259 txmsg_end = 1600;
1260 err = test_exec(cgrp, &opt);
1261
1262out:
1263 txmsg_start = 0;
1264 txmsg_end = 0;
1265 sched_yield();
1266 return err;
1267}
1268
1269char *map_names[] = {
1270 "sock_map",
1271 "sock_map_txmsg",
1272 "sock_map_redir",
1273 "sock_apply_bytes",
1274 "sock_cork_bytes",
1275 "sock_pull_bytes",
1276 "sock_redir_flags",
1277 "sock_skb_opts",
1278};
1279
1280int prog_attach_type[] = {
1281 BPF_SK_SKB_STREAM_PARSER,
1282 BPF_SK_SKB_STREAM_VERDICT,
1283 BPF_CGROUP_SOCK_OPS,
1284 BPF_SK_MSG_VERDICT,
1285 BPF_SK_MSG_VERDICT,
1286 BPF_SK_MSG_VERDICT,
1287 BPF_SK_MSG_VERDICT,
1288 BPF_SK_MSG_VERDICT,
1289 BPF_SK_MSG_VERDICT,
1290 BPF_SK_MSG_VERDICT,
1291};
1292
1293int prog_type[] = {
1294 BPF_PROG_TYPE_SK_SKB,
1295 BPF_PROG_TYPE_SK_SKB,
1296 BPF_PROG_TYPE_SOCK_OPS,
1297 BPF_PROG_TYPE_SK_MSG,
1298 BPF_PROG_TYPE_SK_MSG,
1299 BPF_PROG_TYPE_SK_MSG,
1300 BPF_PROG_TYPE_SK_MSG,
1301 BPF_PROG_TYPE_SK_MSG,
1302 BPF_PROG_TYPE_SK_MSG,
1303 BPF_PROG_TYPE_SK_MSG,
1304};
1305
1306static int populate_progs(char *bpf_file)
1307{
1308 struct bpf_program *prog;
1309 struct bpf_object *obj;
1310 int i = 0;
1311 long err;
1312
1313 obj = bpf_object__open(bpf_file);
1314 err = libbpf_get_error(obj);
1315 if (err) {
1316 char err_buf[256];
1317
1318 libbpf_strerror(err, err_buf, sizeof(err_buf));
1319 printf("Unable to load eBPF objects in file '%s' : %s\n",
1320 bpf_file, err_buf);
1321 return -1;
1322 }
1323
1324 bpf_object__for_each_program(prog, obj) {
1325 bpf_program__set_type(prog, prog_type[i]);
1326 bpf_program__set_expected_attach_type(prog,
1327 prog_attach_type[i]);
1328 i++;
1329 }
1330
1331 i = bpf_object__load(obj);
1332 i = 0;
1333 bpf_object__for_each_program(prog, obj) {
1334 prog_fd[i] = bpf_program__fd(prog);
1335 i++;
1336 }
1337
1338 for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
1339 maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
1340 map_fd[i] = bpf_map__fd(maps[i]);
1341 if (map_fd[i] < 0) {
1342 fprintf(stderr, "load_bpf_file: (%i) %s\n",
1343 map_fd[i], strerror(errno));
1344 return -1;
1345 }
1346 }
1347
1348 return 0;
1349}
1350
1351static int __test_suite(char *bpf_file)
1352{
1353 int cg_fd, err;
1354
1355 err = populate_progs(bpf_file);
1356 if (err < 0) {
1357 fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
1358 return err;
1359 }
1360
1361 if (setup_cgroup_environment()) {
1362 fprintf(stderr, "ERROR: cgroup env failed\n");
1363 return -EINVAL;
1364 }
1365
1366 cg_fd = create_and_get_cgroup(CG_PATH);
1367 if (cg_fd < 0) {
1368 fprintf(stderr,
1369 "ERROR: (%i) open cg path failed: %s\n",
1370 cg_fd, optarg);
1371 return cg_fd;
1372 }
1373
1374 if (join_cgroup(CG_PATH)) {
1375 fprintf(stderr, "ERROR: failed to join cgroup\n");
1376 return -EINVAL;
1377 }
1378
1379 /* Tests basic commands and APIs with range of iov values */
1380 txmsg_start = txmsg_end = 0;
1381 err = test_txmsg(cg_fd);
1382 if (err)
1383 goto out;
1384
1385 /* Tests interesting combinations of APIs used together */
1386 err = test_mixed(cg_fd);
1387 if (err)
1388 goto out;
1389
1390 /* Tests pull_data API using start/end API */
1391 err = test_start_end(cg_fd);
1392 if (err)
1393 goto out;
1394
1395out:
1396 printf("Summary: %i PASSED %i FAILED\n", passed, failed);
1397 cleanup_cgroup_environment();
1398 close(cg_fd);
1399 return err;
1400}
1401
1402static int test_suite(void)
1403{
1404 int err;
1405
1406 err = __test_suite(BPF_SOCKMAP_FILENAME);
1407 if (err)
1408 goto out;
1409 err = __test_suite(BPF_SOCKHASH_FILENAME);
1410out:
1411 return err;
1412}
1413
1414int main(int argc, char **argv)
1415{
1416 struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
1417 int iov_count = 1, length = 1024, rate = 1;
1418 struct sockmap_options options = {0};
1419 int opt, longindex, err, cg_fd = 0;
1420 char *bpf_file = BPF_SOCKMAP_FILENAME;
1421 int test = PING_PONG;
1422
1423 if (setrlimit(RLIMIT_MEMLOCK, &r)) {
1424 perror("setrlimit(RLIMIT_MEMLOCK)");
1425 return 1;
1426 }
1427
1428 if (argc < 2)
1429 return test_suite();
1430
1431 while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
1432 long_options, &longindex)) != -1) {
1433 switch (opt) {
1434 case 's':
1435 txmsg_start = atoi(optarg);
1436 break;
1437 case 'e':
1438 txmsg_end = atoi(optarg);
1439 break;
1440 case 'a':
1441 txmsg_apply = atoi(optarg);
1442 break;
1443 case 'k':
1444 txmsg_cork = atoi(optarg);
1445 break;
1446 case 'c':
1447 cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
1448 if (cg_fd < 0) {
1449 fprintf(stderr,
1450 "ERROR: (%i) open cg path failed: %s\n",
1451 cg_fd, optarg);
1452 return cg_fd;
1453 }
1454 break;
1455 case 'r':
1456 rate = atoi(optarg);
1457 break;
1458 case 'v':
1459 options.verbose = 1;
1460 break;
1461 case 'i':
1462 iov_count = atoi(optarg);
1463 break;
1464 case 'l':
1465 length = atoi(optarg);
1466 break;
1467 case 'd':
1468 options.data_test = true;
1469 break;
1470 case 't':
1471 if (strcmp(optarg, "ping") == 0) {
1472 test = PING_PONG;
1473 } else if (strcmp(optarg, "sendmsg") == 0) {
1474 test = SENDMSG;
1475 } else if (strcmp(optarg, "base") == 0) {
1476 test = BASE;
1477 } else if (strcmp(optarg, "base_sendpage") == 0) {
1478 test = BASE_SENDPAGE;
1479 } else if (strcmp(optarg, "sendpage") == 0) {
1480 test = SENDPAGE;
1481 } else {
1482 usage(argv);
1483 return -1;
1484 }
1485 break;
1486 case 0:
1487 break;
1488 case 'h':
1489 default:
1490 usage(argv);
1491 return -1;
1492 }
1493 }
1494
1495 if (!cg_fd) {
1496 fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
1497 argv[0]);
1498 return -1;
1499 }
1500
1501 err = populate_progs(bpf_file);
1502 if (err) {
1503 fprintf(stderr, "populate program: (%s) %s\n",
1504 bpf_file, strerror(errno));
1505 return 1;
1506 }
1507 running = 1;
1508
1509 /* catch SIGINT */
1510 signal(SIGINT, running_handler);
1511
1512 options.iov_count = iov_count;
1513 options.iov_length = length;
1514 options.rate = rate;
1515
1516 err = run_options(&options, cg_fd, test);
1517 close(cg_fd);
1518 return err;
1519}
1520
1521void running_handler(int a)
1522{
1523 running = 0;
1524}
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.c b/tools/testing/selftests/bpf/test_sockmap_kern.c
new file mode 100644
index 000000000000..677b2ed1cc1e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.c
@@ -0,0 +1,5 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
3#define SOCKMAP
4#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKMAP
5#include "./test_sockmap_kern.h"
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
new file mode 100644
index 000000000000..8e8e41780bb9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -0,0 +1,363 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/* Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io */
3#include <stddef.h>
4#include <string.h>
5#include <linux/bpf.h>
6#include <linux/if_ether.h>
7#include <linux/if_packet.h>
8#include <linux/ip.h>
9#include <linux/ipv6.h>
10#include <linux/in.h>
11#include <linux/udp.h>
12#include <linux/tcp.h>
13#include <linux/pkt_cls.h>
14#include <sys/socket.h>
15#include "bpf_helpers.h"
16#include "bpf_endian.h"
17
18/* Sockmap sample program connects a client and a backend together
19 * using cgroups.
20 *
21 * client:X <---> frontend:80 client:X <---> backend:80
22 *
23 * For simplicity we hard code values here and bind 1:1. The hard
24 * coded values are part of the setup in sockmap.sh script that
25 * is associated with this BPF program.
26 *
27 * The bpf_printk is verbose and prints information as connections
28 * are established and verdicts are decided.
29 */
30
31#define bpf_printk(fmt, ...) \
32({ \
33 char ____fmt[] = fmt; \
34 bpf_trace_printk(____fmt, sizeof(____fmt), \
35 ##__VA_ARGS__); \
36})
37
38struct bpf_map_def SEC("maps") sock_map = {
39 .type = TEST_MAP_TYPE,
40 .key_size = sizeof(int),
41 .value_size = sizeof(int),
42 .max_entries = 20,
43};
44
45struct bpf_map_def SEC("maps") sock_map_txmsg = {
46 .type = TEST_MAP_TYPE,
47 .key_size = sizeof(int),
48 .value_size = sizeof(int),
49 .max_entries = 20,
50};
51
52struct bpf_map_def SEC("maps") sock_map_redir = {
53 .type = TEST_MAP_TYPE,
54 .key_size = sizeof(int),
55 .value_size = sizeof(int),
56 .max_entries = 20,
57};
58
59struct bpf_map_def SEC("maps") sock_apply_bytes = {
60 .type = BPF_MAP_TYPE_ARRAY,
61 .key_size = sizeof(int),
62 .value_size = sizeof(int),
63 .max_entries = 1
64};
65
66struct bpf_map_def SEC("maps") sock_cork_bytes = {
67 .type = BPF_MAP_TYPE_ARRAY,
68 .key_size = sizeof(int),
69 .value_size = sizeof(int),
70 .max_entries = 1
71};
72
73struct bpf_map_def SEC("maps") sock_pull_bytes = {
74 .type = BPF_MAP_TYPE_ARRAY,
75 .key_size = sizeof(int),
76 .value_size = sizeof(int),
77 .max_entries = 2
78};
79
80struct bpf_map_def SEC("maps") sock_redir_flags = {
81 .type = BPF_MAP_TYPE_ARRAY,
82 .key_size = sizeof(int),
83 .value_size = sizeof(int),
84 .max_entries = 1
85};
86
87struct bpf_map_def SEC("maps") sock_skb_opts = {
88 .type = BPF_MAP_TYPE_ARRAY,
89 .key_size = sizeof(int),
90 .value_size = sizeof(int),
91 .max_entries = 1
92};
93
94SEC("sk_skb1")
95int bpf_prog1(struct __sk_buff *skb)
96{
97 return skb->len;
98}
99
100SEC("sk_skb2")
101int bpf_prog2(struct __sk_buff *skb)
102{
103 __u32 lport = skb->local_port;
104 __u32 rport = skb->remote_port;
105 int len, *f, ret, zero = 0;
106 __u64 flags = 0;
107
108 if (lport == 10000)
109 ret = 10;
110 else
111 ret = 1;
112
113 len = (__u32)skb->data_end - (__u32)skb->data;
114 f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
115 if (f && *f) {
116 ret = 3;
117 flags = *f;
118 }
119
120 bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
121 len, flags);
122#ifdef SOCKMAP
123 return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
124#else
125 return bpf_sk_redirect_hash(skb, &sock_map, &ret, flags);
126#endif
127
128}
129
130SEC("sockops")
131int bpf_sockmap(struct bpf_sock_ops *skops)
132{
133 __u32 lport, rport;
134 int op, err = 0, index, key, ret;
135
136
137 op = (int) skops->op;
138
139 switch (op) {
140 case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
141 lport = skops->local_port;
142 rport = skops->remote_port;
143
144 if (lport == 10000) {
145 ret = 1;
146#ifdef SOCKMAP
147 err = bpf_sock_map_update(skops, &sock_map, &ret,
148 BPF_NOEXIST);
149#else
150 err = bpf_sock_hash_update(skops, &sock_map, &ret,
151 BPF_NOEXIST);
152#endif
153 bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
154 lport, bpf_ntohl(rport), err);
155 }
156 break;
157 case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
158 lport = skops->local_port;
159 rport = skops->remote_port;
160
161 if (bpf_ntohl(rport) == 10001) {
162 ret = 10;
163#ifdef SOCKMAP
164 err = bpf_sock_map_update(skops, &sock_map, &ret,
165 BPF_NOEXIST);
166#else
167 err = bpf_sock_hash_update(skops, &sock_map, &ret,
168 BPF_NOEXIST);
169#endif
170 bpf_printk("active(%i -> %i) map ctx update err: %d\n",
171 lport, bpf_ntohl(rport), err);
172 }
173 break;
174 default:
175 break;
176 }
177
178 return 0;
179}
180
181SEC("sk_msg1")
182int bpf_prog4(struct sk_msg_md *msg)
183{
184 int *bytes, zero = 0, one = 1;
185 int *start, *end;
186
187 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
188 if (bytes)
189 bpf_msg_apply_bytes(msg, *bytes);
190 bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
191 if (bytes)
192 bpf_msg_cork_bytes(msg, *bytes);
193 start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
194 end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
195 if (start && end)
196 bpf_msg_pull_data(msg, *start, *end, 0);
197 return SK_PASS;
198}
199
200SEC("sk_msg2")
201int bpf_prog5(struct sk_msg_md *msg)
202{
203 int err1 = -1, err2 = -1, zero = 0, one = 1;
204 int *bytes, *start, *end, len1, len2;
205
206 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
207 if (bytes)
208 err1 = bpf_msg_apply_bytes(msg, *bytes);
209 bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
210 if (bytes)
211 err2 = bpf_msg_cork_bytes(msg, *bytes);
212 len1 = (__u64)msg->data_end - (__u64)msg->data;
213 start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
214 end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
215 if (start && end) {
216 int err;
217
218 bpf_printk("sk_msg2: pull(%i:%i)\n",
219 start ? *start : 0, end ? *end : 0);
220 err = bpf_msg_pull_data(msg, *start, *end, 0);
221 if (err)
222 bpf_printk("sk_msg2: pull_data err %i\n",
223 err);
224 len2 = (__u64)msg->data_end - (__u64)msg->data;
225 bpf_printk("sk_msg2: length update %i->%i\n",
226 len1, len2);
227 }
228 bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
229 len1, err1, err2);
230 return SK_PASS;
231}
232
233SEC("sk_msg3")
234int bpf_prog6(struct sk_msg_md *msg)
235{
236 int *bytes, zero = 0, one = 1, key = 0;
237 int *start, *end, *f;
238 __u64 flags = 0;
239
240 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
241 if (bytes)
242 bpf_msg_apply_bytes(msg, *bytes);
243 bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
244 if (bytes)
245 bpf_msg_cork_bytes(msg, *bytes);
246 start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
247 end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
248 if (start && end)
249 bpf_msg_pull_data(msg, *start, *end, 0);
250 f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
251 if (f && *f) {
252 key = 2;
253 flags = *f;
254 }
255#ifdef SOCKMAP
256 return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
257#else
258 return bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
259#endif
260}
261
262SEC("sk_msg4")
263int bpf_prog7(struct sk_msg_md *msg)
264{
265 int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
266 int *f, *bytes, *start, *end, len1, len2;
267 __u64 flags = 0;
268
269 int err;
270 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
271 if (bytes)
272 err1 = bpf_msg_apply_bytes(msg, *bytes);
273 bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
274 if (bytes)
275 err2 = bpf_msg_cork_bytes(msg, *bytes);
276 len1 = (__u64)msg->data_end - (__u64)msg->data;
277 start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
278 end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
279 if (start && end) {
280
281 bpf_printk("sk_msg2: pull(%i:%i)\n",
282 start ? *start : 0, end ? *end : 0);
283 err = bpf_msg_pull_data(msg, *start, *end, 0);
284 if (err)
285 bpf_printk("sk_msg2: pull_data err %i\n",
286 err);
287 len2 = (__u64)msg->data_end - (__u64)msg->data;
288 bpf_printk("sk_msg2: length update %i->%i\n",
289 len1, len2);
290 }
291 f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
292 if (f && *f) {
293 key = 2;
294 flags = *f;
295 }
296 bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
297 len1, flags, err1 ? err1 : err2);
298#ifdef SOCKMAP
299 err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
300#else
301 err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
302#endif
303 bpf_printk("sk_msg3: err %i\n", err);
304 return err;
305}
306
307SEC("sk_msg5")
308int bpf_prog8(struct sk_msg_md *msg)
309{
310 void *data_end = (void *)(long) msg->data_end;
311 void *data = (void *)(long) msg->data;
312 int ret = 0, *bytes, zero = 0;
313
314 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
315 if (bytes) {
316 ret = bpf_msg_apply_bytes(msg, *bytes);
317 if (ret)
318 return SK_DROP;
319 } else {
320 return SK_DROP;
321 }
322 return SK_PASS;
323}
324SEC("sk_msg6")
325int bpf_prog9(struct sk_msg_md *msg)
326{
327 void *data_end = (void *)(long) msg->data_end;
328 void *data = (void *)(long) msg->data;
329 int ret = 0, *bytes, zero = 0;
330
331 bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
332 if (bytes) {
333 if (((__u64)data_end - (__u64)data) >= *bytes)
334 return SK_PASS;
335 ret = bpf_msg_cork_bytes(msg, *bytes);
336 if (ret)
337 return SK_DROP;
338 }
339 return SK_PASS;
340}
341
342SEC("sk_msg7")
343int bpf_prog10(struct sk_msg_md *msg)
344{
345 int *bytes, zero = 0, one = 1;
346 int *start, *end;
347
348 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
349 if (bytes)
350 bpf_msg_apply_bytes(msg, *bytes);
351 bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
352 if (bytes)
353 bpf_msg_cork_bytes(msg, *bytes);
354 start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
355 end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
356 if (start && end)
357 bpf_msg_pull_data(msg, *start, *end, 0);
358
359 return SK_DROP;
360}
361
362int _version SEC("version") = 1;
363char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
index b755bd783ce5..d86c281e957f 100644
--- a/tools/testing/selftests/bpf/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
@@ -19,7 +19,7 @@ struct bpf_map_def SEC("maps") stackid_hmap = {
19 .type = BPF_MAP_TYPE_HASH, 19 .type = BPF_MAP_TYPE_HASH,
20 .key_size = sizeof(__u32), 20 .key_size = sizeof(__u32),
21 .value_size = sizeof(__u32), 21 .value_size = sizeof(__u32),
22 .max_entries = 10000, 22 .max_entries = 16384,
23}; 23};
24 24
25struct bpf_map_def SEC("maps") stackmap = { 25struct bpf_map_def SEC("maps") stackmap = {
@@ -31,6 +31,14 @@ struct bpf_map_def SEC("maps") stackmap = {
31 .map_flags = BPF_F_STACK_BUILD_ID, 31 .map_flags = BPF_F_STACK_BUILD_ID,
32}; 32};
33 33
34struct bpf_map_def SEC("maps") stack_amap = {
35 .type = BPF_MAP_TYPE_ARRAY,
36 .key_size = sizeof(__u32),
37 .value_size = sizeof(struct bpf_stack_build_id)
38 * PERF_MAX_STACK_DEPTH,
39 .max_entries = 128,
40};
41
34/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */ 42/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
35struct random_urandom_args { 43struct random_urandom_args {
36 unsigned long long pad; 44 unsigned long long pad;
@@ -42,7 +50,10 @@ struct random_urandom_args {
42SEC("tracepoint/random/urandom_read") 50SEC("tracepoint/random/urandom_read")
43int oncpu(struct random_urandom_args *args) 51int oncpu(struct random_urandom_args *args)
44{ 52{
53 __u32 max_len = sizeof(struct bpf_stack_build_id)
54 * PERF_MAX_STACK_DEPTH;
45 __u32 key = 0, val = 0, *value_p; 55 __u32 key = 0, val = 0, *value_p;
56 void *stack_p;
46 57
47 value_p = bpf_map_lookup_elem(&control_map, &key); 58 value_p = bpf_map_lookup_elem(&control_map, &key);
48 if (value_p && *value_p) 59 if (value_p && *value_p)
@@ -50,8 +61,13 @@ int oncpu(struct random_urandom_args *args)
50 61
51 /* The size of stackmap and stackid_hmap should be the same */ 62 /* The size of stackmap and stackid_hmap should be the same */
52 key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK); 63 key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
53 if ((int)key >= 0) 64 if ((int)key >= 0) {
54 bpf_map_update_elem(&stackid_hmap, &key, &val, 0); 65 bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
66 stack_p = bpf_map_lookup_elem(&stack_amap, &key);
67 if (stack_p)
68 bpf_get_stack(args, stack_p, max_len,
69 BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
70 }
55 71
56 return 0; 72 return 0;
57} 73}
diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c
index 76d85c5d08bd..af111af7ca1a 100644
--- a/tools/testing/selftests/bpf/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/test_stacktrace_map.c
@@ -19,14 +19,21 @@ struct bpf_map_def SEC("maps") stackid_hmap = {
19 .type = BPF_MAP_TYPE_HASH, 19 .type = BPF_MAP_TYPE_HASH,
20 .key_size = sizeof(__u32), 20 .key_size = sizeof(__u32),
21 .value_size = sizeof(__u32), 21 .value_size = sizeof(__u32),
22 .max_entries = 10000, 22 .max_entries = 16384,
23}; 23};
24 24
25struct bpf_map_def SEC("maps") stackmap = { 25struct bpf_map_def SEC("maps") stackmap = {
26 .type = BPF_MAP_TYPE_STACK_TRACE, 26 .type = BPF_MAP_TYPE_STACK_TRACE,
27 .key_size = sizeof(__u32), 27 .key_size = sizeof(__u32),
28 .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, 28 .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
29 .max_entries = 10000, 29 .max_entries = 16384,
30};
31
32struct bpf_map_def SEC("maps") stack_amap = {
33 .type = BPF_MAP_TYPE_ARRAY,
34 .key_size = sizeof(__u32),
35 .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
36 .max_entries = 16384,
30}; 37};
31 38
32/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ 39/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
@@ -44,7 +51,9 @@ struct sched_switch_args {
44SEC("tracepoint/sched/sched_switch") 51SEC("tracepoint/sched/sched_switch")
45int oncpu(struct sched_switch_args *ctx) 52int oncpu(struct sched_switch_args *ctx)
46{ 53{
54 __u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
47 __u32 key = 0, val = 0, *value_p; 55 __u32 key = 0, val = 0, *value_p;
56 void *stack_p;
48 57
49 value_p = bpf_map_lookup_elem(&control_map, &key); 58 value_p = bpf_map_lookup_elem(&control_map, &key);
50 if (value_p && *value_p) 59 if (value_p && *value_p)
@@ -52,8 +61,12 @@ int oncpu(struct sched_switch_args *ctx)
52 61
53 /* The size of stackmap and stackid_hmap should be the same */ 62 /* The size of stackmap and stackid_hmap should be the same */
54 key = bpf_get_stackid(ctx, &stackmap, 0); 63 key = bpf_get_stackid(ctx, &stackmap, 0);
55 if ((int)key >= 0) 64 if ((int)key >= 0) {
56 bpf_map_update_elem(&stackid_hmap, &key, &val, 0); 65 bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
66 stack_p = bpf_map_lookup_elem(&stack_amap, &key);
67 if (stack_p)
68 bpf_get_stack(ctx, stack_p, max_len, 0);
69 }
57 70
58 return 0; 71 return 0;
59} 72}
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
new file mode 100755
index 000000000000..546aee3e9fb4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -0,0 +1,731 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# End-to-end eBPF tunnel test suite
5# The script tests BPF network tunnel implementation.
6#
7# Topology:
8# ---------
9# root namespace | at_ns0 namespace
10# |
11# ----------- | -----------
12# | tnl dev | | | tnl dev | (overlay network)
13# ----------- | -----------
14# metadata-mode | native-mode
15# with bpf |
16# |
17# ---------- | ----------
18# | veth1 | --------- | veth0 | (underlay network)
19# ---------- peer ----------
20#
21#
22# Device Configuration
23# --------------------
24# Root namespace with metadata-mode tunnel + BPF
25# Device names and addresses:
26# veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
27# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay)
28#
29# Namespace at_ns0 with native tunnel
30# Device names and addresses:
31# veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
32# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay)
33#
34#
35# End-to-end ping packet flow
36# ---------------------------
37# Most of the tests start by namespace creation, device configuration,
38# then ping the underlay and overlay network. When doing 'ping 10.1.1.100'
39# from root namespace, the following operations happen:
40# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
41# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
42# with remote_ip=172.16.1.200 and others.
43# 3) Outer tunnel header is prepended and route the packet to veth1's egress
44# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
45# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
46# 6) Forward the packet to the overlay tnl dev
47
48PING_ARG="-c 3 -w 10 -q"
49ret=0
50GREEN='\033[0;92m'
51RED='\033[0;31m'
52NC='\033[0m' # No Color
53
54config_device()
55{
56 ip netns add at_ns0
57 ip link add veth0 type veth peer name veth1
58 ip link set veth0 netns at_ns0
59 ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
60 ip netns exec at_ns0 ip link set dev veth0 up
61 ip link set dev veth1 up mtu 1500
62 ip addr add dev veth1 172.16.1.200/24
63}
64
65add_gre_tunnel()
66{
67 # at_ns0 namespace
68 ip netns exec at_ns0 \
69 ip link add dev $DEV_NS type $TYPE seq key 2 \
70 local 172.16.1.100 remote 172.16.1.200
71 ip netns exec at_ns0 ip link set dev $DEV_NS up
72 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
73
74 # root namespace
75 ip link add dev $DEV type $TYPE key 2 external
76 ip link set dev $DEV up
77 ip addr add dev $DEV 10.1.1.200/24
78}
79
80add_ip6gretap_tunnel()
81{
82
83 # assign ipv6 address
84 ip netns exec at_ns0 ip addr add ::11/96 dev veth0
85 ip netns exec at_ns0 ip link set dev veth0 up
86 ip addr add dev veth1 ::22/96
87 ip link set dev veth1 up
88
89 # at_ns0 namespace
90 ip netns exec at_ns0 \
91 ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
92 local ::11 remote ::22
93
94 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
95 ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
96 ip netns exec at_ns0 ip link set dev $DEV_NS up
97
98 # root namespace
99 ip link add dev $DEV type $TYPE external
100 ip addr add dev $DEV 10.1.1.200/24
101 ip addr add dev $DEV fc80::200/24
102 ip link set dev $DEV up
103}
104
105add_erspan_tunnel()
106{
107 # at_ns0 namespace
108 if [ "$1" == "v1" ]; then
109 ip netns exec at_ns0 \
110 ip link add dev $DEV_NS type $TYPE seq key 2 \
111 local 172.16.1.100 remote 172.16.1.200 \
112 erspan_ver 1 erspan 123
113 else
114 ip netns exec at_ns0 \
115 ip link add dev $DEV_NS type $TYPE seq key 2 \
116 local 172.16.1.100 remote 172.16.1.200 \
117 erspan_ver 2 erspan_dir egress erspan_hwid 3
118 fi
119 ip netns exec at_ns0 ip link set dev $DEV_NS up
120 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
121
122 # root namespace
123 ip link add dev $DEV type $TYPE external
124 ip link set dev $DEV up
125 ip addr add dev $DEV 10.1.1.200/24
126}
127
128add_ip6erspan_tunnel()
129{
130
131 # assign ipv6 address
132 ip netns exec at_ns0 ip addr add ::11/96 dev veth0
133 ip netns exec at_ns0 ip link set dev veth0 up
134 ip addr add dev veth1 ::22/96
135 ip link set dev veth1 up
136
137 # at_ns0 namespace
138 if [ "$1" == "v1" ]; then
139 ip netns exec at_ns0 \
140 ip link add dev $DEV_NS type $TYPE seq key 2 \
141 local ::11 remote ::22 \
142 erspan_ver 1 erspan 123
143 else
144 ip netns exec at_ns0 \
145 ip link add dev $DEV_NS type $TYPE seq key 2 \
146 local ::11 remote ::22 \
147 erspan_ver 2 erspan_dir egress erspan_hwid 7
148 fi
149 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
150 ip netns exec at_ns0 ip link set dev $DEV_NS up
151
152 # root namespace
153 ip link add dev $DEV type $TYPE external
154 ip addr add dev $DEV 10.1.1.200/24
155 ip link set dev $DEV up
156}
157
158add_vxlan_tunnel()
159{
160 # Set static ARP entry here because iptables set-mark works
161 # on L3 packet, as a result not applying to ARP packets,
162 # causing errors at get_tunnel_{key/opt}.
163
164 # at_ns0 namespace
165 ip netns exec at_ns0 \
166 ip link add dev $DEV_NS type $TYPE \
167 id 2 dstport 4789 gbp remote 172.16.1.200
168 ip netns exec at_ns0 \
169 ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
170 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
171 ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
172 ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
173
174 # root namespace
175 ip link add dev $DEV type $TYPE external gbp dstport 4789
176 ip link set dev $DEV address 52:54:00:d9:02:00 up
177 ip addr add dev $DEV 10.1.1.200/24
178 arp -s 10.1.1.100 52:54:00:d9:01:00
179}
180
181add_ip6vxlan_tunnel()
182{
183 #ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
184 ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
185 ip netns exec at_ns0 ip link set dev veth0 up
186 #ip -4 addr del 172.16.1.200 dev veth1
187 ip -6 addr add dev veth1 ::22/96
188 ip link set dev veth1 up
189
190 # at_ns0 namespace
191 ip netns exec at_ns0 \
192 ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
193 local ::11 remote ::22
194 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
195 ip netns exec at_ns0 ip link set dev $DEV_NS up
196
197 # root namespace
198 ip link add dev $DEV type $TYPE external dstport 4789
199 ip addr add dev $DEV 10.1.1.200/24
200 ip link set dev $DEV up
201}
202
203add_geneve_tunnel()
204{
205 # at_ns0 namespace
206 ip netns exec at_ns0 \
207 ip link add dev $DEV_NS type $TYPE \
208 id 2 dstport 6081 remote 172.16.1.200
209 ip netns exec at_ns0 ip link set dev $DEV_NS up
210 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
211
212 # root namespace
213 ip link add dev $DEV type $TYPE dstport 6081 external
214 ip link set dev $DEV up
215 ip addr add dev $DEV 10.1.1.200/24
216}
217
218add_ip6geneve_tunnel()
219{
220 ip netns exec at_ns0 ip addr add ::11/96 dev veth0
221 ip netns exec at_ns0 ip link set dev veth0 up
222 ip addr add dev veth1 ::22/96
223 ip link set dev veth1 up
224
225 # at_ns0 namespace
226 ip netns exec at_ns0 \
227 ip link add dev $DEV_NS type $TYPE id 22 \
228 remote ::22 # geneve has no local option
229 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
230 ip netns exec at_ns0 ip link set dev $DEV_NS up
231
232 # root namespace
233 ip link add dev $DEV type $TYPE external
234 ip addr add dev $DEV 10.1.1.200/24
235 ip link set dev $DEV up
236}
237
238add_ipip_tunnel()
239{
240 # at_ns0 namespace
241 ip netns exec at_ns0 \
242 ip link add dev $DEV_NS type $TYPE \
243 local 172.16.1.100 remote 172.16.1.200
244 ip netns exec at_ns0 ip link set dev $DEV_NS up
245 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
246
247 # root namespace
248 ip link add dev $DEV type $TYPE external
249 ip link set dev $DEV up
250 ip addr add dev $DEV 10.1.1.200/24
251}
252
253add_ipip6tnl_tunnel()
254{
255 ip netns exec at_ns0 ip addr add ::11/96 dev veth0
256 ip netns exec at_ns0 ip link set dev veth0 up
257 ip addr add dev veth1 ::22/96
258 ip link set dev veth1 up
259
260 # at_ns0 namespace
261 ip netns exec at_ns0 \
262 ip link add dev $DEV_NS type $TYPE \
263 local ::11 remote ::22
264 ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
265 ip netns exec at_ns0 ip link set dev $DEV_NS up
266
267 # root namespace
268 ip link add dev $DEV type $TYPE external
269 ip addr add dev $DEV 10.1.1.200/24
270 ip link set dev $DEV up
271}
272
273test_gre()
274{
275 TYPE=gretap
276 DEV_NS=gretap00
277 DEV=gretap11
278 ret=0
279
280 check $TYPE
281 config_device
282 add_gre_tunnel
283 attach_bpf $DEV gre_set_tunnel gre_get_tunnel
284 ping $PING_ARG 10.1.1.100
285 check_err $?
286 ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
287 check_err $?
288 cleanup
289
290 if [ $ret -ne 0 ]; then
291 echo -e ${RED}"FAIL: $TYPE"${NC}
292 return 1
293 fi
294 echo -e ${GREEN}"PASS: $TYPE"${NC}
295}
296
297test_ip6gre()
298{
299 TYPE=ip6gre
300 DEV_NS=ip6gre00
301 DEV=ip6gre11
302 ret=0
303
304 check $TYPE
305 config_device
306 # reuse the ip6gretap function
307 add_ip6gretap_tunnel
308 attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
309 # underlay
310 ping6 $PING_ARG ::11
311 # overlay: ipv4 over ipv6
312 ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
313 ping $PING_ARG 10.1.1.100
314 check_err $?
315 # overlay: ipv6 over ipv6
316 ip netns exec at_ns0 ping6 $PING_ARG fc80::200
317 check_err $?
318 cleanup
319
320 if [ $ret -ne 0 ]; then
321 echo -e ${RED}"FAIL: $TYPE"${NC}
322 return 1
323 fi
324 echo -e ${GREEN}"PASS: $TYPE"${NC}
325}
326
327test_ip6gretap()
328{
329 TYPE=ip6gretap
330 DEV_NS=ip6gretap00
331 DEV=ip6gretap11
332 ret=0
333
334 check $TYPE
335 config_device
336 add_ip6gretap_tunnel
337 attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
338 # underlay
339 ping6 $PING_ARG ::11
340 # overlay: ipv4 over ipv6
341 ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
342 ping $PING_ARG 10.1.1.100
343 check_err $?
344 # overlay: ipv6 over ipv6
345 ip netns exec at_ns0 ping6 $PING_ARG fc80::200
346 check_err $?
347 cleanup
348
349 if [ $ret -ne 0 ]; then
350 echo -e ${RED}"FAIL: $TYPE"${NC}
351 return 1
352 fi
353 echo -e ${GREEN}"PASS: $TYPE"${NC}
354}
355
356test_erspan()
357{
358 TYPE=erspan
359 DEV_NS=erspan00
360 DEV=erspan11
361 ret=0
362
363 check $TYPE
364 config_device
365 add_erspan_tunnel $1
366 attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
367 ping $PING_ARG 10.1.1.100
368 check_err $?
369 ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
370 check_err $?
371 cleanup
372
373 if [ $ret -ne 0 ]; then
374 echo -e ${RED}"FAIL: $TYPE"${NC}
375 return 1
376 fi
377 echo -e ${GREEN}"PASS: $TYPE"${NC}
378}
379
380test_ip6erspan()
381{
382 TYPE=ip6erspan
383 DEV_NS=ip6erspan00
384 DEV=ip6erspan11
385 ret=0
386
387 check $TYPE
388 config_device
389 add_ip6erspan_tunnel $1
390 attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
391 ping6 $PING_ARG ::11
392 ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
393 check_err $?
394 cleanup
395
396 if [ $ret -ne 0 ]; then
397 echo -e ${RED}"FAIL: $TYPE"${NC}
398 return 1
399 fi
400 echo -e ${GREEN}"PASS: $TYPE"${NC}
401}
402
403test_vxlan()
404{
405 TYPE=vxlan
406 DEV_NS=vxlan00
407 DEV=vxlan11
408 ret=0
409
410 check $TYPE
411 config_device
412 add_vxlan_tunnel
413 attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
414 ping $PING_ARG 10.1.1.100
415 check_err $?
416 ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
417 check_err $?
418 cleanup
419
420 if [ $ret -ne 0 ]; then
421 echo -e ${RED}"FAIL: $TYPE"${NC}
422 return 1
423 fi
424 echo -e ${GREEN}"PASS: $TYPE"${NC}
425}
426
427test_ip6vxlan()
428{
429 TYPE=vxlan
430 DEV_NS=ip6vxlan00
431 DEV=ip6vxlan11
432 ret=0
433
434 check $TYPE
435 config_device
436 add_ip6vxlan_tunnel
437 ip link set dev veth1 mtu 1500
438 attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
439 # underlay
440 ping6 $PING_ARG ::11
441 # ip4 over ip6
442 ping $PING_ARG 10.1.1.100
443 check_err $?
444 ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
445 check_err $?
446 cleanup
447
448 if [ $ret -ne 0 ]; then
449 echo -e ${RED}"FAIL: ip6$TYPE"${NC}
450 return 1
451 fi
452 echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
453}
454
455test_geneve()
456{
457 TYPE=geneve
458 DEV_NS=geneve00
459 DEV=geneve11
460 ret=0
461
462 check $TYPE
463 config_device
464 add_geneve_tunnel
465 attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
466 ping $PING_ARG 10.1.1.100
467 check_err $?
468 ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
469 check_err $?
470 cleanup
471
472 if [ $ret -ne 0 ]; then
473 echo -e ${RED}"FAIL: $TYPE"${NC}
474 return 1
475 fi
476 echo -e ${GREEN}"PASS: $TYPE"${NC}
477}
478
479test_ip6geneve()
480{
481 TYPE=geneve
482 DEV_NS=ip6geneve00
483 DEV=ip6geneve11
484 ret=0
485
486 check $TYPE
487 config_device
488 add_ip6geneve_tunnel
489 attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
490 ping $PING_ARG 10.1.1.100
491 check_err $?
492 ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
493 check_err $?
494 cleanup
495
496 if [ $ret -ne 0 ]; then
497 echo -e ${RED}"FAIL: ip6$TYPE"${NC}
498 return 1
499 fi
500 echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
501}
502
503test_ipip()
504{
505 TYPE=ipip
506 DEV_NS=ipip00
507 DEV=ipip11
508 ret=0
509
510 check $TYPE
511 config_device
512 add_ipip_tunnel
513 ip link set dev veth1 mtu 1500
514 attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
515 ping $PING_ARG 10.1.1.100
516 check_err $?
517 ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
518 check_err $?
519 cleanup
520
521 if [ $ret -ne 0 ]; then
522 echo -e ${RED}"FAIL: $TYPE"${NC}
523 return 1
524 fi
525 echo -e ${GREEN}"PASS: $TYPE"${NC}
526}
527
528test_ipip6()
529{
530 TYPE=ip6tnl
531 DEV_NS=ipip6tnl00
532 DEV=ipip6tnl11
533 ret=0
534
535 check $TYPE
536 config_device
537 add_ipip6tnl_tunnel
538 ip link set dev veth1 mtu 1500
539 attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
540 # underlay
541 ping6 $PING_ARG ::11
542 # ip4 over ip6
543 ping $PING_ARG 10.1.1.100
544 check_err $?
545 ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
546 check_err $?
547 cleanup
548
549 if [ $ret -ne 0 ]; then
550 echo -e ${RED}"FAIL: $TYPE"${NC}
551 return 1
552 fi
553 echo -e ${GREEN}"PASS: $TYPE"${NC}
554}
555
556setup_xfrm_tunnel()
557{
558 auth=0x$(printf '1%.0s' {1..40})
559 enc=0x$(printf '2%.0s' {1..32})
560 spi_in_to_out=0x1
561 spi_out_to_in=0x2
562 # at_ns0 namespace
563 # at_ns0 -> root
564 ip netns exec at_ns0 \
565 ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
566 spi $spi_in_to_out reqid 1 mode tunnel \
567 auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
568 ip netns exec at_ns0 \
569 ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
570 tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
571 mode tunnel
572 # root -> at_ns0
573 ip netns exec at_ns0 \
574 ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
575 spi $spi_out_to_in reqid 2 mode tunnel \
576 auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
577 ip netns exec at_ns0 \
578 ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
579 tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
580 mode tunnel
581 # address & route
582 ip netns exec at_ns0 \
583 ip addr add dev veth0 10.1.1.100/32
584 ip netns exec at_ns0 \
585 ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
586 src 10.1.1.100
587
588 # root namespace
589 # at_ns0 -> root
590 ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
591 spi $spi_in_to_out reqid 1 mode tunnel \
592 auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
593 ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
594 tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
595 mode tunnel
596 # root -> at_ns0
597 ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
598 spi $spi_out_to_in reqid 2 mode tunnel \
599 auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
600 ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
601 tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
602 mode tunnel
603 # address & route
604 ip addr add dev veth1 10.1.1.200/32
605 ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
606}
607
608test_xfrm_tunnel()
609{
610 config_device
611 > /sys/kernel/debug/tracing/trace
612 setup_xfrm_tunnel
613 tc qdisc add dev veth1 clsact
614 tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
615 sec xfrm_get_state
616 ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
617 sleep 1
618 grep "reqid 1" /sys/kernel/debug/tracing/trace
619 check_err $?
620 grep "spi 0x1" /sys/kernel/debug/tracing/trace
621 check_err $?
622 grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace
623 check_err $?
624 cleanup
625
626 if [ $ret -ne 0 ]; then
627 echo -e ${RED}"FAIL: xfrm tunnel"${NC}
628 return 1
629 fi
630 echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
631}
632
633attach_bpf()
634{
635 DEV=$1
636 SET=$2
637 GET=$3
638 tc qdisc add dev $DEV clsact
639 tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET
640 tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET
641}
642
643cleanup()
644{
645 ip netns delete at_ns0 2> /dev/null
646 ip link del veth1 2> /dev/null
647 ip link del ipip11 2> /dev/null
648 ip link del ipip6tnl11 2> /dev/null
649 ip link del gretap11 2> /dev/null
650 ip link del ip6gre11 2> /dev/null
651 ip link del ip6gretap11 2> /dev/null
652 ip link del vxlan11 2> /dev/null
653 ip link del ip6vxlan11 2> /dev/null
654 ip link del geneve11 2> /dev/null
655 ip link del ip6geneve11 2> /dev/null
656 ip link del erspan11 2> /dev/null
657 ip link del ip6erspan11 2> /dev/null
658 ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
659 ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
660 ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
661 ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
662}
663
664cleanup_exit()
665{
666 echo "CATCH SIGKILL or SIGINT, cleanup and exit"
667 cleanup
668 exit 0
669}
670
671check()
672{
673 ip link help 2>&1 | grep -q "\s$1\s"
674 if [ $? -ne 0 ];then
675 echo "SKIP $1: iproute2 not support"
676 cleanup
677 return 1
678 fi
679}
680
681enable_debug()
682{
683 echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
684 echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
685 echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
686 echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
687 echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
688}
689
690check_err()
691{
692 if [ $ret -eq 0 ]; then
693 ret=$1
694 fi
695}
696
697bpf_tunnel_test()
698{
699 echo "Testing GRE tunnel..."
700 test_gre
701 echo "Testing IP6GRE tunnel..."
702 test_ip6gre
703 echo "Testing IP6GRETAP tunnel..."
704 test_ip6gretap
705 echo "Testing ERSPAN tunnel..."
706 test_erspan v2
707 echo "Testing IP6ERSPAN tunnel..."
708 test_ip6erspan v2
709 echo "Testing VXLAN tunnel..."
710 test_vxlan
711 echo "Testing IP6VXLAN tunnel..."
712 test_ip6vxlan
713 echo "Testing GENEVE tunnel..."
714 test_geneve
715 echo "Testing IP6GENEVE tunnel..."
716 test_ip6geneve
717 echo "Testing IPIP tunnel..."
718 test_ipip
719 echo "Testing IPIP6 tunnel..."
720 test_ipip6
721 echo "Testing IPSec tunnel..."
722 test_xfrm_tunnel
723}
724
725trap cleanup 0 3 6
726trap cleanup_exit 2 9
727
728cleanup
729bpf_tunnel_test
730
731exit 0
diff --git a/tools/testing/selftests/bpf/test_tunnel_kern.c b/tools/testing/selftests/bpf/test_tunnel_kern.c
new file mode 100644
index 000000000000..504df69c83df
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel_kern.c
@@ -0,0 +1,713 @@
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2016 VMware
3 * Copyright (c) 2016 Facebook
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 */
9#include <stddef.h>
10#include <string.h>
11#include <arpa/inet.h>
12#include <linux/bpf.h>
13#include <linux/if_ether.h>
14#include <linux/if_packet.h>
15#include <linux/ip.h>
16#include <linux/ipv6.h>
17#include <linux/types.h>
18#include <linux/tcp.h>
19#include <linux/socket.h>
20#include <linux/pkt_cls.h>
21#include <linux/erspan.h>
22#include "bpf_helpers.h"
23#include "bpf_endian.h"
24
25#define ERROR(ret) do {\
26 char fmt[] = "ERROR line:%d ret:%d\n";\
27 bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
28 } while (0)
29
30int _version SEC("version") = 1;
31
32struct geneve_opt {
33 __be16 opt_class;
34 __u8 type;
35 __u8 length:5;
36 __u8 r3:1;
37 __u8 r2:1;
38 __u8 r1:1;
39 __u8 opt_data[8]; /* hard-coded to 8 byte */
40};
41
42struct vxlan_metadata {
43 __u32 gbp;
44};
45
46SEC("gre_set_tunnel")
47int _gre_set_tunnel(struct __sk_buff *skb)
48{
49 int ret;
50 struct bpf_tunnel_key key;
51
52 __builtin_memset(&key, 0x0, sizeof(key));
53 key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
54 key.tunnel_id = 2;
55 key.tunnel_tos = 0;
56 key.tunnel_ttl = 64;
57
58 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
59 BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
60 if (ret < 0) {
61 ERROR(ret);
62 return TC_ACT_SHOT;
63 }
64
65 return TC_ACT_OK;
66}
67
68SEC("gre_get_tunnel")
69int _gre_get_tunnel(struct __sk_buff *skb)
70{
71 int ret;
72 struct bpf_tunnel_key key;
73 char fmt[] = "key %d remote ip 0x%x\n";
74
75 ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
76 if (ret < 0) {
77 ERROR(ret);
78 return TC_ACT_SHOT;
79 }
80
81 bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
82 return TC_ACT_OK;
83}
84
85SEC("ip6gretap_set_tunnel")
86int _ip6gretap_set_tunnel(struct __sk_buff *skb)
87{
88 struct bpf_tunnel_key key;
89 int ret;
90
91 __builtin_memset(&key, 0x0, sizeof(key));
92 key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
93 key.tunnel_id = 2;
94 key.tunnel_tos = 0;
95 key.tunnel_ttl = 64;
96 key.tunnel_label = 0xabcde;
97
98 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
99 BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
100 BPF_F_SEQ_NUMBER);
101 if (ret < 0) {
102 ERROR(ret);
103 return TC_ACT_SHOT;
104 }
105
106 return TC_ACT_OK;
107}
108
109SEC("ip6gretap_get_tunnel")
110int _ip6gretap_get_tunnel(struct __sk_buff *skb)
111{
112 char fmt[] = "key %d remote ip6 ::%x label %x\n";
113 struct bpf_tunnel_key key;
114 int ret;
115
116 ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
117 BPF_F_TUNINFO_IPV6);
118 if (ret < 0) {
119 ERROR(ret);
120 return TC_ACT_SHOT;
121 }
122
123 bpf_trace_printk(fmt, sizeof(fmt),
124 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
125
126 return TC_ACT_OK;
127}
128
129SEC("erspan_set_tunnel")
130int _erspan_set_tunnel(struct __sk_buff *skb)
131{
132 struct bpf_tunnel_key key;
133 struct erspan_metadata md;
134 int ret;
135
136 __builtin_memset(&key, 0x0, sizeof(key));
137 key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
138 key.tunnel_id = 2;
139 key.tunnel_tos = 0;
140 key.tunnel_ttl = 64;
141
142 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
143 BPF_F_ZERO_CSUM_TX);
144 if (ret < 0) {
145 ERROR(ret);
146 return TC_ACT_SHOT;
147 }
148
149 __builtin_memset(&md, 0, sizeof(md));
150#ifdef ERSPAN_V1
151 md.version = 1;
152 md.u.index = bpf_htonl(123);
153#else
154 __u8 direction = 1;
155 __u8 hwid = 7;
156
157 md.version = 2;
158 md.u.md2.dir = direction;
159 md.u.md2.hwid = hwid & 0xf;
160 md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
161#endif
162
163 ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
164 if (ret < 0) {
165 ERROR(ret);
166 return TC_ACT_SHOT;
167 }
168
169 return TC_ACT_OK;
170}
171
172SEC("erspan_get_tunnel")
173int _erspan_get_tunnel(struct __sk_buff *skb)
174{
175 char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
176 struct bpf_tunnel_key key;
177 struct erspan_metadata md;
178 __u32 index;
179 int ret;
180
181 ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
182 if (ret < 0) {
183 ERROR(ret);
184 return TC_ACT_SHOT;
185 }
186
187 ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
188 if (ret < 0) {
189 ERROR(ret);
190 return TC_ACT_SHOT;
191 }
192
193 bpf_trace_printk(fmt, sizeof(fmt),
194 key.tunnel_id, key.remote_ipv4, md.version);
195
196#ifdef ERSPAN_V1
197 char fmt2[] = "\tindex %x\n";
198
199 index = bpf_ntohl(md.u.index);
200 bpf_trace_printk(fmt2, sizeof(fmt2), index);
201#else
202 char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
203
204 bpf_trace_printk(fmt2, sizeof(fmt2),
205 md.u.md2.dir,
206 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
207 bpf_ntohl(md.u.md2.timestamp));
208#endif
209
210 return TC_ACT_OK;
211}
212
213SEC("ip4ip6erspan_set_tunnel")
214int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
215{
216 struct bpf_tunnel_key key;
217 struct erspan_metadata md;
218 int ret;
219
220 __builtin_memset(&key, 0x0, sizeof(key));
221 key.remote_ipv6[3] = bpf_htonl(0x11);
222 key.tunnel_id = 2;
223 key.tunnel_tos = 0;
224 key.tunnel_ttl = 64;
225
226 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
227 BPF_F_TUNINFO_IPV6);
228 if (ret < 0) {
229 ERROR(ret);
230 return TC_ACT_SHOT;
231 }
232
233 __builtin_memset(&md, 0, sizeof(md));
234
235#ifdef ERSPAN_V1
236 md.u.index = bpf_htonl(123);
237 md.version = 1;
238#else
239 __u8 direction = 0;
240 __u8 hwid = 17;
241
242 md.version = 2;
243 md.u.md2.dir = direction;
244 md.u.md2.hwid = hwid & 0xf;
245 md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
246#endif
247
248 ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
249 if (ret < 0) {
250 ERROR(ret);
251 return TC_ACT_SHOT;
252 }
253
254 return TC_ACT_OK;
255}
256
257SEC("ip4ip6erspan_get_tunnel")
258int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
259{
260 char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
261 struct bpf_tunnel_key key;
262 struct erspan_metadata md;
263 __u32 index;
264 int ret;
265
266 ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
267 BPF_F_TUNINFO_IPV6);
268 if (ret < 0) {
269 ERROR(ret);
270 return TC_ACT_SHOT;
271 }
272
273 ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
274 if (ret < 0) {
275 ERROR(ret);
276 return TC_ACT_SHOT;
277 }
278
279 bpf_trace_printk(fmt, sizeof(fmt),
280 key.tunnel_id, key.remote_ipv4, md.version);
281
282#ifdef ERSPAN_V1
283 char fmt2[] = "\tindex %x\n";
284
285 index = bpf_ntohl(md.u.index);
286 bpf_trace_printk(fmt2, sizeof(fmt2), index);
287#else
288 char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
289
290 bpf_trace_printk(fmt2, sizeof(fmt2),
291 md.u.md2.dir,
292 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
293 bpf_ntohl(md.u.md2.timestamp));
294#endif
295
296 return TC_ACT_OK;
297}
298
299SEC("vxlan_set_tunnel")
300int _vxlan_set_tunnel(struct __sk_buff *skb)
301{
302 int ret;
303 struct bpf_tunnel_key key;
304 struct vxlan_metadata md;
305
306 __builtin_memset(&key, 0x0, sizeof(key));
307 key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
308 key.tunnel_id = 2;
309 key.tunnel_tos = 0;
310 key.tunnel_ttl = 64;
311
312 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
313 BPF_F_ZERO_CSUM_TX);
314 if (ret < 0) {
315 ERROR(ret);
316 return TC_ACT_SHOT;
317 }
318
319 md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
320 ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
321 if (ret < 0) {
322 ERROR(ret);
323 return TC_ACT_SHOT;
324 }
325
326 return TC_ACT_OK;
327}
328
329SEC("vxlan_get_tunnel")
330int _vxlan_get_tunnel(struct __sk_buff *skb)
331{
332 int ret;
333 struct bpf_tunnel_key key;
334 struct vxlan_metadata md;
335 char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
336
337 ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
338 if (ret < 0) {
339 ERROR(ret);
340 return TC_ACT_SHOT;
341 }
342
343 ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
344 if (ret < 0) {
345 ERROR(ret);
346 return TC_ACT_SHOT;
347 }
348
349 bpf_trace_printk(fmt, sizeof(fmt),
350 key.tunnel_id, key.remote_ipv4, md.gbp);
351
352 return TC_ACT_OK;
353}
354
355SEC("ip6vxlan_set_tunnel")
356int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
357{
358 struct bpf_tunnel_key key;
359 int ret;
360
361 __builtin_memset(&key, 0x0, sizeof(key));
362 key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
363 key.tunnel_id = 22;
364 key.tunnel_tos = 0;
365 key.tunnel_ttl = 64;
366
367 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
368 BPF_F_TUNINFO_IPV6);
369 if (ret < 0) {
370 ERROR(ret);
371 return TC_ACT_SHOT;
372 }
373
374 return TC_ACT_OK;
375}
376
377SEC("ip6vxlan_get_tunnel")
378int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
379{
380 char fmt[] = "key %d remote ip6 ::%x label %x\n";
381 struct bpf_tunnel_key key;
382 int ret;
383
384 ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
385 BPF_F_TUNINFO_IPV6);
386 if (ret < 0) {
387 ERROR(ret);
388 return TC_ACT_SHOT;
389 }
390
391 bpf_trace_printk(fmt, sizeof(fmt),
392 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
393
394 return TC_ACT_OK;
395}
396
397SEC("geneve_set_tunnel")
398int _geneve_set_tunnel(struct __sk_buff *skb)
399{
400 int ret, ret2;
401 struct bpf_tunnel_key key;
402 struct geneve_opt gopt;
403
404 __builtin_memset(&key, 0x0, sizeof(key));
405 key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
406 key.tunnel_id = 2;
407 key.tunnel_tos = 0;
408 key.tunnel_ttl = 64;
409
410 __builtin_memset(&gopt, 0x0, sizeof(gopt));
411 gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
412 gopt.type = 0x08;
413 gopt.r1 = 0;
414 gopt.r2 = 0;
415 gopt.r3 = 0;
416 gopt.length = 2; /* 4-byte multiple */
417 *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
418
419 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
420 BPF_F_ZERO_CSUM_TX);
421 if (ret < 0) {
422 ERROR(ret);
423 return TC_ACT_SHOT;
424 }
425
426 ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
427 if (ret < 0) {
428 ERROR(ret);
429 return TC_ACT_SHOT;
430 }
431
432 return TC_ACT_OK;
433}
434
435SEC("geneve_get_tunnel")
436int _geneve_get_tunnel(struct __sk_buff *skb)
437{
438 int ret;
439 struct bpf_tunnel_key key;
440 struct geneve_opt gopt;
441 char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
442
443 ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
444 if (ret < 0) {
445 ERROR(ret);
446 return TC_ACT_SHOT;
447 }
448
449 ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
450 if (ret < 0) {
451 ERROR(ret);
452 return TC_ACT_SHOT;
453 }
454
455 bpf_trace_printk(fmt, sizeof(fmt),
456 key.tunnel_id, key.remote_ipv4, gopt.opt_class);
457 return TC_ACT_OK;
458}
459
460SEC("ip6geneve_set_tunnel")
461int _ip6geneve_set_tunnel(struct __sk_buff *skb)
462{
463 struct bpf_tunnel_key key;
464 struct geneve_opt gopt;
465 int ret;
466
467 __builtin_memset(&key, 0x0, sizeof(key));
468 key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
469 key.tunnel_id = 22;
470 key.tunnel_tos = 0;
471 key.tunnel_ttl = 64;
472
473 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
474 BPF_F_TUNINFO_IPV6);
475 if (ret < 0) {
476 ERROR(ret);
477 return TC_ACT_SHOT;
478 }
479
480 __builtin_memset(&gopt, 0x0, sizeof(gopt));
481 gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
482 gopt.type = 0x08;
483 gopt.r1 = 0;
484 gopt.r2 = 0;
485 gopt.r3 = 0;
486 gopt.length = 2; /* 4-byte multiple */
487 *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
488
489 ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
490 if (ret < 0) {
491 ERROR(ret);
492 return TC_ACT_SHOT;
493 }
494
495 return TC_ACT_OK;
496}
497
498SEC("ip6geneve_get_tunnel")
499int _ip6geneve_get_tunnel(struct __sk_buff *skb)
500{
501 char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
502 struct bpf_tunnel_key key;
503 struct geneve_opt gopt;
504 int ret;
505
506 ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
507 BPF_F_TUNINFO_IPV6);
508 if (ret < 0) {
509 ERROR(ret);
510 return TC_ACT_SHOT;
511 }
512
513 ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
514 if (ret < 0) {
515 ERROR(ret);
516 return TC_ACT_SHOT;
517 }
518
519 bpf_trace_printk(fmt, sizeof(fmt),
520 key.tunnel_id, key.remote_ipv4, gopt.opt_class);
521
522 return TC_ACT_OK;
523}
524
525SEC("ipip_set_tunnel")
526int _ipip_set_tunnel(struct __sk_buff *skb)
527{
528 struct bpf_tunnel_key key = {};
529 void *data = (void *)(long)skb->data;
530 struct iphdr *iph = data;
531 struct tcphdr *tcp = data + sizeof(*iph);
532 void *data_end = (void *)(long)skb->data_end;
533 int ret;
534
535 /* single length check */
536 if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
537 ERROR(1);
538 return TC_ACT_SHOT;
539 }
540
541 key.tunnel_ttl = 64;
542 if (iph->protocol == IPPROTO_ICMP) {
543 key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
544 } else {
545 if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
546 return TC_ACT_SHOT;
547
548 if (tcp->dest == bpf_htons(5200))
549 key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
550 else if (tcp->dest == bpf_htons(5201))
551 key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
552 else
553 return TC_ACT_SHOT;
554 }
555
556 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
557 if (ret < 0) {
558 ERROR(ret);
559 return TC_ACT_SHOT;
560 }
561
562 return TC_ACT_OK;
563}
564
565SEC("ipip_get_tunnel")
566int _ipip_get_tunnel(struct __sk_buff *skb)
567{
568 int ret;
569 struct bpf_tunnel_key key;
570 char fmt[] = "remote ip 0x%x\n";
571
572 ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
573 if (ret < 0) {
574 ERROR(ret);
575 return TC_ACT_SHOT;
576 }
577
578 bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
579 return TC_ACT_OK;
580}
581
582SEC("ipip6_set_tunnel")
583int _ipip6_set_tunnel(struct __sk_buff *skb)
584{
585 struct bpf_tunnel_key key = {};
586 void *data = (void *)(long)skb->data;
587 struct iphdr *iph = data;
588 struct tcphdr *tcp = data + sizeof(*iph);
589 void *data_end = (void *)(long)skb->data_end;
590 int ret;
591
592 /* single length check */
593 if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
594 ERROR(1);
595 return TC_ACT_SHOT;
596 }
597
598 __builtin_memset(&key, 0x0, sizeof(key));
599 key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
600 key.tunnel_ttl = 64;
601
602 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
603 BPF_F_TUNINFO_IPV6);
604 if (ret < 0) {
605 ERROR(ret);
606 return TC_ACT_SHOT;
607 }
608
609 return TC_ACT_OK;
610}
611
612SEC("ipip6_get_tunnel")
613int _ipip6_get_tunnel(struct __sk_buff *skb)
614{
615 int ret;
616 struct bpf_tunnel_key key;
617 char fmt[] = "remote ip6 %x::%x\n";
618
619 ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
620 BPF_F_TUNINFO_IPV6);
621 if (ret < 0) {
622 ERROR(ret);
623 return TC_ACT_SHOT;
624 }
625
626 bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
627 bpf_htonl(key.remote_ipv6[3]));
628 return TC_ACT_OK;
629}
630
631SEC("ip6ip6_set_tunnel")
632int _ip6ip6_set_tunnel(struct __sk_buff *skb)
633{
634 struct bpf_tunnel_key key = {};
635 void *data = (void *)(long)skb->data;
636 struct ipv6hdr *iph = data;
637 struct tcphdr *tcp = data + sizeof(*iph);
638 void *data_end = (void *)(long)skb->data_end;
639 int ret;
640
641 /* single length check */
642 if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
643 ERROR(1);
644 return TC_ACT_SHOT;
645 }
646
647 key.remote_ipv6[0] = bpf_htonl(0x2401db00);
648 key.tunnel_ttl = 64;
649
650 if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
651 key.remote_ipv6[3] = bpf_htonl(1);
652 } else {
653 if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
654 ERROR(iph->nexthdr);
655 return TC_ACT_SHOT;
656 }
657
658 if (tcp->dest == bpf_htons(5200)) {
659 key.remote_ipv6[3] = bpf_htonl(1);
660 } else if (tcp->dest == bpf_htons(5201)) {
661 key.remote_ipv6[3] = bpf_htonl(2);
662 } else {
663 ERROR(tcp->dest);
664 return TC_ACT_SHOT;
665 }
666 }
667
668 ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
669 BPF_F_TUNINFO_IPV6);
670 if (ret < 0) {
671 ERROR(ret);
672 return TC_ACT_SHOT;
673 }
674
675 return TC_ACT_OK;
676}
677
678SEC("ip6ip6_get_tunnel")
679int _ip6ip6_get_tunnel(struct __sk_buff *skb)
680{
681 int ret;
682 struct bpf_tunnel_key key;
683 char fmt[] = "remote ip6 %x::%x\n";
684
685 ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
686 BPF_F_TUNINFO_IPV6);
687 if (ret < 0) {
688 ERROR(ret);
689 return TC_ACT_SHOT;
690 }
691
692 bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
693 bpf_htonl(key.remote_ipv6[3]));
694 return TC_ACT_OK;
695}
696
697SEC("xfrm_get_state")
698int _xfrm_get_state(struct __sk_buff *skb)
699{
700 struct bpf_xfrm_state x;
701 char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
702 int ret;
703
704 ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
705 if (ret < 0)
706 return TC_ACT_OK;
707
708 bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
709 bpf_ntohl(x.remote_ipv4));
710 return TC_ACT_OK;
711}
712
713char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3e7718b1a9ae..2ecd27b670d7 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -41,15 +41,16 @@
41# endif 41# endif
42#endif 42#endif
43#include "bpf_rlimit.h" 43#include "bpf_rlimit.h"
44#include "bpf_rand.h"
44#include "../../../include/linux/filter.h" 45#include "../../../include/linux/filter.h"
45 46
46#ifndef ARRAY_SIZE 47#ifndef ARRAY_SIZE
47# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 48# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
48#endif 49#endif
49 50
50#define MAX_INSNS 512 51#define MAX_INSNS BPF_MAXINSNS
51#define MAX_FIXUPS 8 52#define MAX_FIXUPS 8
52#define MAX_NR_MAPS 4 53#define MAX_NR_MAPS 7
53#define POINTER_VALUE 0xcafe4all 54#define POINTER_VALUE 0xcafe4all
54#define TEST_DATA_LEN 64 55#define TEST_DATA_LEN 64
55 56
@@ -64,7 +65,10 @@ struct bpf_test {
64 struct bpf_insn insns[MAX_INSNS]; 65 struct bpf_insn insns[MAX_INSNS];
65 int fixup_map1[MAX_FIXUPS]; 66 int fixup_map1[MAX_FIXUPS];
66 int fixup_map2[MAX_FIXUPS]; 67 int fixup_map2[MAX_FIXUPS];
67 int fixup_prog[MAX_FIXUPS]; 68 int fixup_map3[MAX_FIXUPS];
69 int fixup_map4[MAX_FIXUPS];
70 int fixup_prog1[MAX_FIXUPS];
71 int fixup_prog2[MAX_FIXUPS];
68 int fixup_map_in_map[MAX_FIXUPS]; 72 int fixup_map_in_map[MAX_FIXUPS];
69 const char *errstr; 73 const char *errstr;
70 const char *errstr_unpriv; 74 const char *errstr_unpriv;
@@ -76,6 +80,8 @@ struct bpf_test {
76 } result, result_unpriv; 80 } result, result_unpriv;
77 enum bpf_prog_type prog_type; 81 enum bpf_prog_type prog_type;
78 uint8_t flags; 82 uint8_t flags;
83 __u8 data[TEST_DATA_LEN];
84 void (*fill_helper)(struct bpf_test *self);
79}; 85};
80 86
81/* Note we want this to be 64 bit aligned so that the end of our array is 87/* Note we want this to be 64 bit aligned so that the end of our array is
@@ -88,6 +94,91 @@ struct test_val {
88 int foo[MAX_ENTRIES]; 94 int foo[MAX_ENTRIES];
89}; 95};
90 96
97struct other_val {
98 long long foo;
99 long long bar;
100};
101
102static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
103{
104 /* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
105#define PUSH_CNT 51
106 unsigned int len = BPF_MAXINSNS;
107 struct bpf_insn *insn = self->insns;
108 int i = 0, j, k = 0;
109
110 insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
111loop:
112 for (j = 0; j < PUSH_CNT; j++) {
113 insn[i++] = BPF_LD_ABS(BPF_B, 0);
114 insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
115 i++;
116 insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
117 insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1);
118 insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2);
119 insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
120 BPF_FUNC_skb_vlan_push),
121 insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
122 i++;
123 }
124
125 for (j = 0; j < PUSH_CNT; j++) {
126 insn[i++] = BPF_LD_ABS(BPF_B, 0);
127 insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
128 i++;
129 insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
130 insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
131 BPF_FUNC_skb_vlan_pop),
132 insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
133 i++;
134 }
135 if (++k < 5)
136 goto loop;
137
138 for (; i < len - 1; i++)
139 insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef);
140 insn[len - 1] = BPF_EXIT_INSN();
141}
142
143static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
144{
145 struct bpf_insn *insn = self->insns;
146 unsigned int len = BPF_MAXINSNS;
147 int i = 0;
148
149 insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
150 insn[i++] = BPF_LD_ABS(BPF_B, 0);
151 insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 10, len - i - 2);
152 i++;
153 while (i < len - 1)
154 insn[i++] = BPF_LD_ABS(BPF_B, 1);
155 insn[i] = BPF_EXIT_INSN();
156}
157
158static void bpf_fill_rand_ld_dw(struct bpf_test *self)
159{
160 struct bpf_insn *insn = self->insns;
161 uint64_t res = 0;
162 int i = 0;
163
164 insn[i++] = BPF_MOV32_IMM(BPF_REG_0, 0);
165 while (i < self->retval) {
166 uint64_t val = bpf_semi_rand_get();
167 struct bpf_insn tmp[2] = { BPF_LD_IMM64(BPF_REG_1, val) };
168
169 res ^= val;
170 insn[i++] = tmp[0];
171 insn[i++] = tmp[1];
172 insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
173 }
174 insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
175 insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32);
176 insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
177 insn[i] = BPF_EXIT_INSN();
178 res ^= (res >> 32);
179 self->retval = (uint32_t)res;
180}
181
91static struct bpf_test tests[] = { 182static struct bpf_test tests[] = {
92 { 183 {
93 "add+sub+mul", 184 "add+sub+mul",
@@ -1597,6 +1688,121 @@ static struct bpf_test tests[] = {
1597 .prog_type = BPF_PROG_TYPE_SK_SKB, 1688 .prog_type = BPF_PROG_TYPE_SK_SKB,
1598 }, 1689 },
1599 { 1690 {
1691 "valid access family in SK_MSG",
1692 .insns = {
1693 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1694 offsetof(struct sk_msg_md, family)),
1695 BPF_EXIT_INSN(),
1696 },
1697 .result = ACCEPT,
1698 .prog_type = BPF_PROG_TYPE_SK_MSG,
1699 },
1700 {
1701 "valid access remote_ip4 in SK_MSG",
1702 .insns = {
1703 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1704 offsetof(struct sk_msg_md, remote_ip4)),
1705 BPF_EXIT_INSN(),
1706 },
1707 .result = ACCEPT,
1708 .prog_type = BPF_PROG_TYPE_SK_MSG,
1709 },
1710 {
1711 "valid access local_ip4 in SK_MSG",
1712 .insns = {
1713 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1714 offsetof(struct sk_msg_md, local_ip4)),
1715 BPF_EXIT_INSN(),
1716 },
1717 .result = ACCEPT,
1718 .prog_type = BPF_PROG_TYPE_SK_MSG,
1719 },
1720 {
1721 "valid access remote_port in SK_MSG",
1722 .insns = {
1723 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1724 offsetof(struct sk_msg_md, remote_port)),
1725 BPF_EXIT_INSN(),
1726 },
1727 .result = ACCEPT,
1728 .prog_type = BPF_PROG_TYPE_SK_MSG,
1729 },
1730 {
1731 "valid access local_port in SK_MSG",
1732 .insns = {
1733 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1734 offsetof(struct sk_msg_md, local_port)),
1735 BPF_EXIT_INSN(),
1736 },
1737 .result = ACCEPT,
1738 .prog_type = BPF_PROG_TYPE_SK_MSG,
1739 },
1740 {
1741 "valid access remote_ip6 in SK_MSG",
1742 .insns = {
1743 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1744 offsetof(struct sk_msg_md, remote_ip6[0])),
1745 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1746 offsetof(struct sk_msg_md, remote_ip6[1])),
1747 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1748 offsetof(struct sk_msg_md, remote_ip6[2])),
1749 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1750 offsetof(struct sk_msg_md, remote_ip6[3])),
1751 BPF_EXIT_INSN(),
1752 },
1753 .result = ACCEPT,
1754 .prog_type = BPF_PROG_TYPE_SK_SKB,
1755 },
1756 {
1757 "valid access local_ip6 in SK_MSG",
1758 .insns = {
1759 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1760 offsetof(struct sk_msg_md, local_ip6[0])),
1761 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1762 offsetof(struct sk_msg_md, local_ip6[1])),
1763 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1764 offsetof(struct sk_msg_md, local_ip6[2])),
1765 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
1766 offsetof(struct sk_msg_md, local_ip6[3])),
1767 BPF_EXIT_INSN(),
1768 },
1769 .result = ACCEPT,
1770 .prog_type = BPF_PROG_TYPE_SK_SKB,
1771 },
1772 {
1773 "invalid 64B read of family in SK_MSG",
1774 .insns = {
1775 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
1776 offsetof(struct sk_msg_md, family)),
1777 BPF_EXIT_INSN(),
1778 },
1779 .errstr = "invalid bpf_context access",
1780 .result = REJECT,
1781 .prog_type = BPF_PROG_TYPE_SK_MSG,
1782 },
1783 {
1784 "invalid read past end of SK_MSG",
1785 .insns = {
1786 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
1787 offsetof(struct sk_msg_md, local_port) + 4),
1788 BPF_EXIT_INSN(),
1789 },
1790 .errstr = "R0 !read_ok",
1791 .result = REJECT,
1792 .prog_type = BPF_PROG_TYPE_SK_MSG,
1793 },
1794 {
1795 "invalid read offset in SK_MSG",
1796 .insns = {
1797 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
1798 offsetof(struct sk_msg_md, family) + 1),
1799 BPF_EXIT_INSN(),
1800 },
1801 .errstr = "invalid bpf_context access",
1802 .result = REJECT,
1803 .prog_type = BPF_PROG_TYPE_SK_MSG,
1804 },
1805 {
1600 "direct packet read for SK_MSG", 1806 "direct packet read for SK_MSG",
1601 .insns = { 1807 .insns = {
1602 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 1808 BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
@@ -2565,7 +2771,7 @@ static struct bpf_test tests[] = {
2565 BPF_MOV64_IMM(BPF_REG_0, 0), 2771 BPF_MOV64_IMM(BPF_REG_0, 0),
2566 BPF_EXIT_INSN(), 2772 BPF_EXIT_INSN(),
2567 }, 2773 },
2568 .fixup_prog = { 1 }, 2774 .fixup_prog1 = { 1 },
2569 .errstr_unpriv = "R3 leaks addr into helper", 2775 .errstr_unpriv = "R3 leaks addr into helper",
2570 .result_unpriv = REJECT, 2776 .result_unpriv = REJECT,
2571 .result = ACCEPT, 2777 .result = ACCEPT,
@@ -2652,7 +2858,7 @@ static struct bpf_test tests[] = {
2652 BPF_MOV64_IMM(BPF_REG_0, 1), 2858 BPF_MOV64_IMM(BPF_REG_0, 1),
2653 BPF_EXIT_INSN(), 2859 BPF_EXIT_INSN(),
2654 }, 2860 },
2655 .fixup_prog = { 1 }, 2861 .fixup_prog1 = { 1 },
2656 .result = ACCEPT, 2862 .result = ACCEPT,
2657 .retval = 42, 2863 .retval = 42,
2658 }, 2864 },
@@ -2666,7 +2872,7 @@ static struct bpf_test tests[] = {
2666 BPF_MOV64_IMM(BPF_REG_0, 1), 2872 BPF_MOV64_IMM(BPF_REG_0, 1),
2667 BPF_EXIT_INSN(), 2873 BPF_EXIT_INSN(),
2668 }, 2874 },
2669 .fixup_prog = { 1 }, 2875 .fixup_prog1 = { 1 },
2670 .result = ACCEPT, 2876 .result = ACCEPT,
2671 .retval = 41, 2877 .retval = 41,
2672 }, 2878 },
@@ -2680,7 +2886,7 @@ static struct bpf_test tests[] = {
2680 BPF_MOV64_IMM(BPF_REG_0, 1), 2886 BPF_MOV64_IMM(BPF_REG_0, 1),
2681 BPF_EXIT_INSN(), 2887 BPF_EXIT_INSN(),
2682 }, 2888 },
2683 .fixup_prog = { 1 }, 2889 .fixup_prog1 = { 1 },
2684 .result = ACCEPT, 2890 .result = ACCEPT,
2685 .retval = 1, 2891 .retval = 1,
2686 }, 2892 },
@@ -2694,7 +2900,7 @@ static struct bpf_test tests[] = {
2694 BPF_MOV64_IMM(BPF_REG_0, 2), 2900 BPF_MOV64_IMM(BPF_REG_0, 2),
2695 BPF_EXIT_INSN(), 2901 BPF_EXIT_INSN(),
2696 }, 2902 },
2697 .fixup_prog = { 1 }, 2903 .fixup_prog1 = { 1 },
2698 .result = ACCEPT, 2904 .result = ACCEPT,
2699 .retval = 2, 2905 .retval = 2,
2700 }, 2906 },
@@ -2708,7 +2914,7 @@ static struct bpf_test tests[] = {
2708 BPF_MOV64_IMM(BPF_REG_0, 2), 2914 BPF_MOV64_IMM(BPF_REG_0, 2),
2709 BPF_EXIT_INSN(), 2915 BPF_EXIT_INSN(),
2710 }, 2916 },
2711 .fixup_prog = { 1 }, 2917 .fixup_prog1 = { 1 },
2712 .result = ACCEPT, 2918 .result = ACCEPT,
2713 .retval = 2, 2919 .retval = 2,
2714 }, 2920 },
@@ -2722,7 +2928,7 @@ static struct bpf_test tests[] = {
2722 BPF_MOV64_IMM(BPF_REG_0, 2), 2928 BPF_MOV64_IMM(BPF_REG_0, 2),
2723 BPF_EXIT_INSN(), 2929 BPF_EXIT_INSN(),
2724 }, 2930 },
2725 .fixup_prog = { 2 }, 2931 .fixup_prog1 = { 2 },
2726 .result = ACCEPT, 2932 .result = ACCEPT,
2727 .retval = 42, 2933 .retval = 42,
2728 }, 2934 },
@@ -5594,6 +5800,257 @@ static struct bpf_test tests[] = {
5594 .prog_type = BPF_PROG_TYPE_TRACEPOINT, 5800 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
5595 }, 5801 },
5596 { 5802 {
5803 "map lookup helper access to map",
5804 .insns = {
5805 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
5806 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
5807 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
5808 BPF_LD_MAP_FD(BPF_REG_1, 0),
5809 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5810 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
5811 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
5812 BPF_LD_MAP_FD(BPF_REG_1, 0),
5813 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5814 BPF_EXIT_INSN(),
5815 },
5816 .fixup_map3 = { 3, 8 },
5817 .result = ACCEPT,
5818 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
5819 },
5820 {
5821 "map update helper access to map",
5822 .insns = {
5823 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
5824 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
5825 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
5826 BPF_LD_MAP_FD(BPF_REG_1, 0),
5827 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5828 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
5829 BPF_MOV64_IMM(BPF_REG_4, 0),
5830 BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
5831 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
5832 BPF_LD_MAP_FD(BPF_REG_1, 0),
5833 BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
5834 BPF_EXIT_INSN(),
5835 },
5836 .fixup_map3 = { 3, 10 },
5837 .result = ACCEPT,
5838 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
5839 },
5840 {
5841 "map update helper access to map: wrong size",
5842 .insns = {
5843 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
5844 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
5845 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
5846 BPF_LD_MAP_FD(BPF_REG_1, 0),
5847 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5848 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
5849 BPF_MOV64_IMM(BPF_REG_4, 0),
5850 BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
5851 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
5852 BPF_LD_MAP_FD(BPF_REG_1, 0),
5853 BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
5854 BPF_EXIT_INSN(),
5855 },
5856 .fixup_map1 = { 3 },
5857 .fixup_map3 = { 10 },
5858 .result = REJECT,
5859 .errstr = "invalid access to map value, value_size=8 off=0 size=16",
5860 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
5861 },
5862 {
5863 "map helper access to adjusted map (via const imm)",
5864 .insns = {
5865 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
5866 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
5867 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
5868 BPF_LD_MAP_FD(BPF_REG_1, 0),
5869 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5870 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
5871 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
5872 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
5873 offsetof(struct other_val, bar)),
5874 BPF_LD_MAP_FD(BPF_REG_1, 0),
5875 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5876 BPF_EXIT_INSN(),
5877 },
5878 .fixup_map3 = { 3, 9 },
5879 .result = ACCEPT,
5880 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
5881 },
5882 {
5883 "map helper access to adjusted map (via const imm): out-of-bound 1",
5884 .insns = {
5885 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
5886 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
5887 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
5888 BPF_LD_MAP_FD(BPF_REG_1, 0),
5889 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5890 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
5891 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
5892 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
5893 sizeof(struct other_val) - 4),
5894 BPF_LD_MAP_FD(BPF_REG_1, 0),
5895 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5896 BPF_EXIT_INSN(),
5897 },
5898 .fixup_map3 = { 3, 9 },
5899 .result = REJECT,
5900 .errstr = "invalid access to map value, value_size=16 off=12 size=8",
5901 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
5902 },
5903 {
5904 "map helper access to adjusted map (via const imm): out-of-bound 2",
5905 .insns = {
5906 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
5907 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
5908 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
5909 BPF_LD_MAP_FD(BPF_REG_1, 0),
5910 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5911 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
5912 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
5913 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
5914 BPF_LD_MAP_FD(BPF_REG_1, 0),
5915 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5916 BPF_EXIT_INSN(),
5917 },
5918 .fixup_map3 = { 3, 9 },
5919 .result = REJECT,
5920 .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
5921 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
5922 },
5923 {
5924 "map helper access to adjusted map (via const reg)",
5925 .insns = {
5926 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
5927 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
5928 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
5929 BPF_LD_MAP_FD(BPF_REG_1, 0),
5930 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5931 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
5932 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
5933 BPF_MOV64_IMM(BPF_REG_3,
5934 offsetof(struct other_val, bar)),
5935 BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
5936 BPF_LD_MAP_FD(BPF_REG_1, 0),
5937 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5938 BPF_EXIT_INSN(),
5939 },
5940 .fixup_map3 = { 3, 10 },
5941 .result = ACCEPT,
5942 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
5943 },
5944 {
5945 "map helper access to adjusted map (via const reg): out-of-bound 1",
5946 .insns = {
5947 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
5948 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
5949 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
5950 BPF_LD_MAP_FD(BPF_REG_1, 0),
5951 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5952 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
5953 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
5954 BPF_MOV64_IMM(BPF_REG_3,
5955 sizeof(struct other_val) - 4),
5956 BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
5957 BPF_LD_MAP_FD(BPF_REG_1, 0),
5958 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5959 BPF_EXIT_INSN(),
5960 },
5961 .fixup_map3 = { 3, 10 },
5962 .result = REJECT,
5963 .errstr = "invalid access to map value, value_size=16 off=12 size=8",
5964 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
5965 },
5966 {
5967 "map helper access to adjusted map (via const reg): out-of-bound 2",
5968 .insns = {
5969 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
5970 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
5971 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
5972 BPF_LD_MAP_FD(BPF_REG_1, 0),
5973 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5974 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
5975 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
5976 BPF_MOV64_IMM(BPF_REG_3, -4),
5977 BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
5978 BPF_LD_MAP_FD(BPF_REG_1, 0),
5979 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5980 BPF_EXIT_INSN(),
5981 },
5982 .fixup_map3 = { 3, 10 },
5983 .result = REJECT,
5984 .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
5985 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
5986 },
5987 {
5988 "map helper access to adjusted map (via variable)",
5989 .insns = {
5990 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
5991 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
5992 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
5993 BPF_LD_MAP_FD(BPF_REG_1, 0),
5994 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
5995 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
5996 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
5997 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
5998 BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
5999 offsetof(struct other_val, bar), 4),
6000 BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
6001 BPF_LD_MAP_FD(BPF_REG_1, 0),
6002 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
6003 BPF_EXIT_INSN(),
6004 },
6005 .fixup_map3 = { 3, 11 },
6006 .result = ACCEPT,
6007 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
6008 },
6009 {
6010 "map helper access to adjusted map (via variable): no max check",
6011 .insns = {
6012 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6013 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6014 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
6015 BPF_LD_MAP_FD(BPF_REG_1, 0),
6016 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
6017 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
6018 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
6019 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
6020 BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
6021 BPF_LD_MAP_FD(BPF_REG_1, 0),
6022 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
6023 BPF_EXIT_INSN(),
6024 },
6025 .fixup_map3 = { 3, 10 },
6026 .result = REJECT,
6027 .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
6028 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
6029 },
6030 {
6031 "map helper access to adjusted map (via variable): wrong max check",
6032 .insns = {
6033 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
6034 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
6035 BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
6036 BPF_LD_MAP_FD(BPF_REG_1, 0),
6037 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
6038 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
6039 BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
6040 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
6041 BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
6042 offsetof(struct other_val, bar) + 1, 4),
6043 BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
6044 BPF_LD_MAP_FD(BPF_REG_1, 0),
6045 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
6046 BPF_EXIT_INSN(),
6047 },
6048 .fixup_map3 = { 3, 11 },
6049 .result = REJECT,
6050 .errstr = "invalid access to map value, value_size=16 off=9 size=8",
6051 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
6052 },
6053 {
5597 "map element value is preserved across register spilling", 6054 "map element value is preserved across register spilling",
5598 .insns = { 6055 .insns = {
5599 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 6056 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -8190,7 +8647,7 @@ static struct bpf_test tests[] = {
8190 offsetof(struct __sk_buff, mark)), 8647 offsetof(struct __sk_buff, mark)),
8191 BPF_EXIT_INSN(), 8648 BPF_EXIT_INSN(),
8192 }, 8649 },
8193 .errstr = "dereference of modified ctx ptr R1 off=68+8, ctx+const is allowed, ctx+const+const is not", 8650 .errstr = "dereference of modified ctx ptr",
8194 .result = REJECT, 8651 .result = REJECT,
8195 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 8652 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
8196 }, 8653 },
@@ -11227,6 +11684,112 @@ static struct bpf_test tests[] = {
11227 .prog_type = BPF_PROG_TYPE_XDP, 11684 .prog_type = BPF_PROG_TYPE_XDP,
11228 }, 11685 },
11229 { 11686 {
11687 "calls: two calls returning different map pointers for lookup (hash, array)",
11688 .insns = {
11689 /* main prog */
11690 BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
11691 BPF_CALL_REL(11),
11692 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11693 BPF_CALL_REL(12),
11694 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
11695 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
11696 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
11697 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
11698 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
11699 BPF_FUNC_map_lookup_elem),
11700 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
11701 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
11702 offsetof(struct test_val, foo)),
11703 BPF_MOV64_IMM(BPF_REG_0, 1),
11704 BPF_EXIT_INSN(),
11705 /* subprog 1 */
11706 BPF_LD_MAP_FD(BPF_REG_0, 0),
11707 BPF_EXIT_INSN(),
11708 /* subprog 2 */
11709 BPF_LD_MAP_FD(BPF_REG_0, 0),
11710 BPF_EXIT_INSN(),
11711 },
11712 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
11713 .fixup_map2 = { 13 },
11714 .fixup_map4 = { 16 },
11715 .result = ACCEPT,
11716 .retval = 1,
11717 },
11718 {
11719 "calls: two calls returning different map pointers for lookup (hash, map in map)",
11720 .insns = {
11721 /* main prog */
11722 BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
11723 BPF_CALL_REL(11),
11724 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11725 BPF_CALL_REL(12),
11726 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
11727 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
11728 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
11729 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
11730 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
11731 BPF_FUNC_map_lookup_elem),
11732 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
11733 BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
11734 offsetof(struct test_val, foo)),
11735 BPF_MOV64_IMM(BPF_REG_0, 1),
11736 BPF_EXIT_INSN(),
11737 /* subprog 1 */
11738 BPF_LD_MAP_FD(BPF_REG_0, 0),
11739 BPF_EXIT_INSN(),
11740 /* subprog 2 */
11741 BPF_LD_MAP_FD(BPF_REG_0, 0),
11742 BPF_EXIT_INSN(),
11743 },
11744 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
11745 .fixup_map_in_map = { 16 },
11746 .fixup_map4 = { 13 },
11747 .result = REJECT,
11748 .errstr = "R0 invalid mem access 'map_ptr'",
11749 },
11750 {
11751 "cond: two branches returning different map pointers for lookup (tail, tail)",
11752 .insns = {
11753 BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
11754 offsetof(struct __sk_buff, mark)),
11755 BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 3),
11756 BPF_LD_MAP_FD(BPF_REG_2, 0),
11757 BPF_JMP_IMM(BPF_JA, 0, 0, 2),
11758 BPF_LD_MAP_FD(BPF_REG_2, 0),
11759 BPF_MOV64_IMM(BPF_REG_3, 7),
11760 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
11761 BPF_FUNC_tail_call),
11762 BPF_MOV64_IMM(BPF_REG_0, 1),
11763 BPF_EXIT_INSN(),
11764 },
11765 .fixup_prog1 = { 5 },
11766 .fixup_prog2 = { 2 },
11767 .result_unpriv = REJECT,
11768 .errstr_unpriv = "tail_call abusing map_ptr",
11769 .result = ACCEPT,
11770 .retval = 42,
11771 },
11772 {
11773 "cond: two branches returning same map pointers for lookup (tail, tail)",
11774 .insns = {
11775 BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
11776 offsetof(struct __sk_buff, mark)),
11777 BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 3),
11778 BPF_LD_MAP_FD(BPF_REG_2, 0),
11779 BPF_JMP_IMM(BPF_JA, 0, 0, 2),
11780 BPF_LD_MAP_FD(BPF_REG_2, 0),
11781 BPF_MOV64_IMM(BPF_REG_3, 7),
11782 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
11783 BPF_FUNC_tail_call),
11784 BPF_MOV64_IMM(BPF_REG_0, 1),
11785 BPF_EXIT_INSN(),
11786 },
11787 .fixup_prog2 = { 2, 5 },
11788 .result_unpriv = ACCEPT,
11789 .result = ACCEPT,
11790 .retval = 42,
11791 },
11792 {
11230 "search pruning: all branches should be verified (nop operation)", 11793 "search pruning: all branches should be verified (nop operation)",
11231 .insns = { 11794 .insns = {
11232 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 11795 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -11423,6 +11986,334 @@ static struct bpf_test tests[] = {
11423 .errstr = "BPF_XADD stores into R2 packet", 11986 .errstr = "BPF_XADD stores into R2 packet",
11424 .prog_type = BPF_PROG_TYPE_XDP, 11987 .prog_type = BPF_PROG_TYPE_XDP,
11425 }, 11988 },
11989 {
11990 "bpf_get_stack return R0 within range",
11991 .insns = {
11992 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
11993 BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
11994 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
11995 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
11996 BPF_LD_MAP_FD(BPF_REG_1, 0),
11997 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
11998 BPF_FUNC_map_lookup_elem),
11999 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28),
12000 BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
12001 BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)),
12002 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
12003 BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
12004 BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)),
12005 BPF_MOV64_IMM(BPF_REG_4, 256),
12006 BPF_EMIT_CALL(BPF_FUNC_get_stack),
12007 BPF_MOV64_IMM(BPF_REG_1, 0),
12008 BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
12009 BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32),
12010 BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32),
12011 BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16),
12012 BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8),
12013 BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
12014 BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8),
12015 BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
12016 BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
12017 BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32),
12018 BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
12019 BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
12020 BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
12021 BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)),
12022 BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5),
12023 BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4),
12024 BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
12025 BPF_MOV64_REG(BPF_REG_3, BPF_REG_9),
12026 BPF_MOV64_IMM(BPF_REG_4, 0),
12027 BPF_EMIT_CALL(BPF_FUNC_get_stack),
12028 BPF_EXIT_INSN(),
12029 },
12030 .fixup_map2 = { 4 },
12031 .result = ACCEPT,
12032 .prog_type = BPF_PROG_TYPE_TRACEPOINT,
12033 },
12034 {
12035 "ld_abs: invalid op 1",
12036 .insns = {
12037 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
12038 BPF_LD_ABS(BPF_DW, 0),
12039 BPF_EXIT_INSN(),
12040 },
12041 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12042 .result = REJECT,
12043 .errstr = "unknown opcode",
12044 },
12045 {
12046 "ld_abs: invalid op 2",
12047 .insns = {
12048 BPF_MOV32_IMM(BPF_REG_0, 256),
12049 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
12050 BPF_LD_IND(BPF_DW, BPF_REG_0, 0),
12051 BPF_EXIT_INSN(),
12052 },
12053 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12054 .result = REJECT,
12055 .errstr = "unknown opcode",
12056 },
12057 {
12058 "ld_abs: nmap reduced",
12059 .insns = {
12060 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
12061 BPF_LD_ABS(BPF_H, 12),
12062 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 28),
12063 BPF_LD_ABS(BPF_H, 12),
12064 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 26),
12065 BPF_MOV32_IMM(BPF_REG_0, 18),
12066 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -64),
12067 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -64),
12068 BPF_LD_IND(BPF_W, BPF_REG_7, 14),
12069 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -60),
12070 BPF_MOV32_IMM(BPF_REG_0, 280971478),
12071 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56),
12072 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56),
12073 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -60),
12074 BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
12075 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 15),
12076 BPF_LD_ABS(BPF_H, 12),
12077 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 13),
12078 BPF_MOV32_IMM(BPF_REG_0, 22),
12079 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56),
12080 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56),
12081 BPF_LD_IND(BPF_H, BPF_REG_7, 14),
12082 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -52),
12083 BPF_MOV32_IMM(BPF_REG_0, 17366),
12084 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -48),
12085 BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -48),
12086 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -52),
12087 BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
12088 BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
12089 BPF_MOV32_IMM(BPF_REG_0, 256),
12090 BPF_EXIT_INSN(),
12091 BPF_MOV32_IMM(BPF_REG_0, 0),
12092 BPF_EXIT_INSN(),
12093 },
12094 .data = {
12095 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0,
12096 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12097 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6,
12098 },
12099 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12100 .result = ACCEPT,
12101 .retval = 256,
12102 },
12103 {
12104 "ld_abs: div + abs, test 1",
12105 .insns = {
12106 BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
12107 BPF_LD_ABS(BPF_B, 3),
12108 BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2),
12109 BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2),
12110 BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0),
12111 BPF_LD_ABS(BPF_B, 4),
12112 BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
12113 BPF_LD_IND(BPF_B, BPF_REG_8, -70),
12114 BPF_EXIT_INSN(),
12115 },
12116 .data = {
12117 10, 20, 30, 40, 50,
12118 },
12119 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12120 .result = ACCEPT,
12121 .retval = 10,
12122 },
12123 {
12124 "ld_abs: div + abs, test 2",
12125 .insns = {
12126 BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
12127 BPF_LD_ABS(BPF_B, 3),
12128 BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2),
12129 BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2),
12130 BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0),
12131 BPF_LD_ABS(BPF_B, 128),
12132 BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
12133 BPF_LD_IND(BPF_B, BPF_REG_8, -70),
12134 BPF_EXIT_INSN(),
12135 },
12136 .data = {
12137 10, 20, 30, 40, 50,
12138 },
12139 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12140 .result = ACCEPT,
12141 .retval = 0,
12142 },
12143 {
12144 "ld_abs: div + abs, test 3",
12145 .insns = {
12146 BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
12147 BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
12148 BPF_LD_ABS(BPF_B, 3),
12149 BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7),
12150 BPF_EXIT_INSN(),
12151 },
12152 .data = {
12153 10, 20, 30, 40, 50,
12154 },
12155 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12156 .result = ACCEPT,
12157 .retval = 0,
12158 },
12159 {
12160 "ld_abs: div + abs, test 4",
12161 .insns = {
12162 BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
12163 BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
12164 BPF_LD_ABS(BPF_B, 256),
12165 BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7),
12166 BPF_EXIT_INSN(),
12167 },
12168 .data = {
12169 10, 20, 30, 40, 50,
12170 },
12171 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12172 .result = ACCEPT,
12173 .retval = 0,
12174 },
12175 {
12176 "ld_abs: vlan + abs, test 1",
12177 .insns = { },
12178 .data = {
12179 0x34,
12180 },
12181 .fill_helper = bpf_fill_ld_abs_vlan_push_pop,
12182 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12183 .result = ACCEPT,
12184 .retval = 0xbef,
12185 },
12186 {
12187 "ld_abs: vlan + abs, test 2",
12188 .insns = {
12189 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
12190 BPF_LD_ABS(BPF_B, 0),
12191 BPF_LD_ABS(BPF_H, 0),
12192 BPF_LD_ABS(BPF_W, 0),
12193 BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
12194 BPF_MOV64_IMM(BPF_REG_6, 0),
12195 BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
12196 BPF_MOV64_IMM(BPF_REG_2, 1),
12197 BPF_MOV64_IMM(BPF_REG_3, 2),
12198 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
12199 BPF_FUNC_skb_vlan_push),
12200 BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
12201 BPF_LD_ABS(BPF_B, 0),
12202 BPF_LD_ABS(BPF_H, 0),
12203 BPF_LD_ABS(BPF_W, 0),
12204 BPF_MOV64_IMM(BPF_REG_0, 42),
12205 BPF_EXIT_INSN(),
12206 },
12207 .data = {
12208 0x34,
12209 },
12210 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12211 .result = ACCEPT,
12212 .retval = 42,
12213 },
12214 {
12215 "ld_abs: jump around ld_abs",
12216 .insns = { },
12217 .data = {
12218 10, 11,
12219 },
12220 .fill_helper = bpf_fill_jump_around_ld_abs,
12221 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12222 .result = ACCEPT,
12223 .retval = 10,
12224 },
12225 {
12226 "ld_dw: xor semi-random 64 bit imms, test 1",
12227 .insns = { },
12228 .data = { },
12229 .fill_helper = bpf_fill_rand_ld_dw,
12230 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12231 .result = ACCEPT,
12232 .retval = 4090,
12233 },
12234 {
12235 "ld_dw: xor semi-random 64 bit imms, test 2",
12236 .insns = { },
12237 .data = { },
12238 .fill_helper = bpf_fill_rand_ld_dw,
12239 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12240 .result = ACCEPT,
12241 .retval = 2047,
12242 },
12243 {
12244 "ld_dw: xor semi-random 64 bit imms, test 3",
12245 .insns = { },
12246 .data = { },
12247 .fill_helper = bpf_fill_rand_ld_dw,
12248 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12249 .result = ACCEPT,
12250 .retval = 511,
12251 },
12252 {
12253 "ld_dw: xor semi-random 64 bit imms, test 4",
12254 .insns = { },
12255 .data = { },
12256 .fill_helper = bpf_fill_rand_ld_dw,
12257 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12258 .result = ACCEPT,
12259 .retval = 5,
12260 },
12261 {
12262 "pass unmodified ctx pointer to helper",
12263 .insns = {
12264 BPF_MOV64_IMM(BPF_REG_2, 0),
12265 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
12266 BPF_FUNC_csum_update),
12267 BPF_MOV64_IMM(BPF_REG_0, 0),
12268 BPF_EXIT_INSN(),
12269 },
12270 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12271 .result = ACCEPT,
12272 },
12273 {
12274 "pass modified ctx pointer to helper, 1",
12275 .insns = {
12276 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
12277 BPF_MOV64_IMM(BPF_REG_2, 0),
12278 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
12279 BPF_FUNC_csum_update),
12280 BPF_MOV64_IMM(BPF_REG_0, 0),
12281 BPF_EXIT_INSN(),
12282 },
12283 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12284 .result = REJECT,
12285 .errstr = "dereference of modified ctx ptr",
12286 },
12287 {
12288 "pass modified ctx pointer to helper, 2",
12289 .insns = {
12290 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
12291 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
12292 BPF_FUNC_get_socket_cookie),
12293 BPF_MOV64_IMM(BPF_REG_0, 0),
12294 BPF_EXIT_INSN(),
12295 },
12296 .result_unpriv = REJECT,
12297 .result = REJECT,
12298 .errstr_unpriv = "dereference of modified ctx ptr",
12299 .errstr = "dereference of modified ctx ptr",
12300 },
12301 {
12302 "pass modified ctx pointer to helper, 3",
12303 .insns = {
12304 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
12305 BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4),
12306 BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
12307 BPF_MOV64_IMM(BPF_REG_2, 0),
12308 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
12309 BPF_FUNC_csum_update),
12310 BPF_MOV64_IMM(BPF_REG_0, 0),
12311 BPF_EXIT_INSN(),
12312 },
12313 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
12314 .result = REJECT,
12315 .errstr = "variable ctx access var_off=(0x0; 0x4)",
12316 },
11426}; 12317};
11427 12318
11428static int probe_filter_length(const struct bpf_insn *fp) 12319static int probe_filter_length(const struct bpf_insn *fp)
@@ -11435,12 +12326,13 @@ static int probe_filter_length(const struct bpf_insn *fp)
11435 return len + 1; 12326 return len + 1;
11436} 12327}
11437 12328
11438static int create_map(uint32_t size_value, uint32_t max_elem) 12329static int create_map(uint32_t type, uint32_t size_key,
12330 uint32_t size_value, uint32_t max_elem)
11439{ 12331{
11440 int fd; 12332 int fd;
11441 12333
11442 fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(long long), 12334 fd = bpf_create_map(type, size_key, size_value, max_elem,
11443 size_value, max_elem, BPF_F_NO_PREALLOC); 12335 type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0);
11444 if (fd < 0) 12336 if (fd < 0)
11445 printf("Failed to create hash map '%s'!\n", strerror(errno)); 12337 printf("Failed to create hash map '%s'!\n", strerror(errno));
11446 12338
@@ -11473,13 +12365,13 @@ static int create_prog_dummy2(int mfd, int idx)
11473 ARRAY_SIZE(prog), "GPL", 0, NULL, 0); 12365 ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
11474} 12366}
11475 12367
11476static int create_prog_array(void) 12368static int create_prog_array(uint32_t max_elem, int p1key)
11477{ 12369{
11478 int p1key = 0, p2key = 1; 12370 int p2key = 1;
11479 int mfd, p1fd, p2fd; 12371 int mfd, p1fd, p2fd;
11480 12372
11481 mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int), 12373 mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
11482 sizeof(int), 4, 0); 12374 sizeof(int), max_elem, 0);
11483 if (mfd < 0) { 12375 if (mfd < 0) {
11484 printf("Failed to create prog array '%s'!\n", strerror(errno)); 12376 printf("Failed to create prog array '%s'!\n", strerror(errno));
11485 return -1; 12377 return -1;
@@ -11526,22 +12418,29 @@ static int create_map_in_map(void)
11526 return outer_map_fd; 12418 return outer_map_fd;
11527} 12419}
11528 12420
11529static char bpf_vlog[32768]; 12421static char bpf_vlog[UINT_MAX >> 8];
11530 12422
11531static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, 12423static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
11532 int *map_fds) 12424 int *map_fds)
11533{ 12425{
11534 int *fixup_map1 = test->fixup_map1; 12426 int *fixup_map1 = test->fixup_map1;
11535 int *fixup_map2 = test->fixup_map2; 12427 int *fixup_map2 = test->fixup_map2;
11536 int *fixup_prog = test->fixup_prog; 12428 int *fixup_map3 = test->fixup_map3;
12429 int *fixup_map4 = test->fixup_map4;
12430 int *fixup_prog1 = test->fixup_prog1;
12431 int *fixup_prog2 = test->fixup_prog2;
11537 int *fixup_map_in_map = test->fixup_map_in_map; 12432 int *fixup_map_in_map = test->fixup_map_in_map;
11538 12433
12434 if (test->fill_helper)
12435 test->fill_helper(test);
12436
11539 /* Allocating HTs with 1 elem is fine here, since we only test 12437 /* Allocating HTs with 1 elem is fine here, since we only test
11540 * for verifier and not do a runtime lookup, so the only thing 12438 * for verifier and not do a runtime lookup, so the only thing
11541 * that really matters is value size in this case. 12439 * that really matters is value size in this case.
11542 */ 12440 */
11543 if (*fixup_map1) { 12441 if (*fixup_map1) {
11544 map_fds[0] = create_map(sizeof(long long), 1); 12442 map_fds[0] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
12443 sizeof(long long), 1);
11545 do { 12444 do {
11546 prog[*fixup_map1].imm = map_fds[0]; 12445 prog[*fixup_map1].imm = map_fds[0];
11547 fixup_map1++; 12446 fixup_map1++;
@@ -11549,25 +12448,52 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
11549 } 12448 }
11550 12449
11551 if (*fixup_map2) { 12450 if (*fixup_map2) {
11552 map_fds[1] = create_map(sizeof(struct test_val), 1); 12451 map_fds[1] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
12452 sizeof(struct test_val), 1);
11553 do { 12453 do {
11554 prog[*fixup_map2].imm = map_fds[1]; 12454 prog[*fixup_map2].imm = map_fds[1];
11555 fixup_map2++; 12455 fixup_map2++;
11556 } while (*fixup_map2); 12456 } while (*fixup_map2);
11557 } 12457 }
11558 12458
11559 if (*fixup_prog) { 12459 if (*fixup_map3) {
11560 map_fds[2] = create_prog_array(); 12460 map_fds[2] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
12461 sizeof(struct other_val), 1);
12462 do {
12463 prog[*fixup_map3].imm = map_fds[2];
12464 fixup_map3++;
12465 } while (*fixup_map3);
12466 }
12467
12468 if (*fixup_map4) {
12469 map_fds[3] = create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
12470 sizeof(struct test_val), 1);
11561 do { 12471 do {
11562 prog[*fixup_prog].imm = map_fds[2]; 12472 prog[*fixup_map4].imm = map_fds[3];
11563 fixup_prog++; 12473 fixup_map4++;
11564 } while (*fixup_prog); 12474 } while (*fixup_map4);
12475 }
12476
12477 if (*fixup_prog1) {
12478 map_fds[4] = create_prog_array(4, 0);
12479 do {
12480 prog[*fixup_prog1].imm = map_fds[4];
12481 fixup_prog1++;
12482 } while (*fixup_prog1);
12483 }
12484
12485 if (*fixup_prog2) {
12486 map_fds[5] = create_prog_array(8, 7);
12487 do {
12488 prog[*fixup_prog2].imm = map_fds[5];
12489 fixup_prog2++;
12490 } while (*fixup_prog2);
11565 } 12491 }
11566 12492
11567 if (*fixup_map_in_map) { 12493 if (*fixup_map_in_map) {
11568 map_fds[3] = create_map_in_map(); 12494 map_fds[6] = create_map_in_map();
11569 do { 12495 do {
11570 prog[*fixup_map_in_map].imm = map_fds[3]; 12496 prog[*fixup_map_in_map].imm = map_fds[6];
11571 fixup_map_in_map++; 12497 fixup_map_in_map++;
11572 } while (*fixup_map_in_map); 12498 } while (*fixup_map_in_map);
11573 } 12499 }
@@ -11577,10 +12503,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
11577 int *passes, int *errors) 12503 int *passes, int *errors)
11578{ 12504{
11579 int fd_prog, expected_ret, reject_from_alignment; 12505 int fd_prog, expected_ret, reject_from_alignment;
12506 int prog_len, prog_type = test->prog_type;
11580 struct bpf_insn *prog = test->insns; 12507 struct bpf_insn *prog = test->insns;
11581 int prog_len = probe_filter_length(prog);
11582 char data_in[TEST_DATA_LEN] = {};
11583 int prog_type = test->prog_type;
11584 int map_fds[MAX_NR_MAPS]; 12508 int map_fds[MAX_NR_MAPS];
11585 const char *expected_err; 12509 const char *expected_err;
11586 uint32_t retval; 12510 uint32_t retval;
@@ -11590,6 +12514,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
11590 map_fds[i] = -1; 12514 map_fds[i] = -1;
11591 12515
11592 do_test_fixup(test, prog, map_fds); 12516 do_test_fixup(test, prog, map_fds);
12517 prog_len = probe_filter_length(prog);
11593 12518
11594 fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, 12519 fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
11595 prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, 12520 prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT,
@@ -11629,8 +12554,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
11629 } 12554 }
11630 12555
11631 if (fd_prog >= 0) { 12556 if (fd_prog >= 0) {
11632 err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in), 12557 err = bpf_prog_test_run(fd_prog, 1, test->data,
11633 NULL, NULL, &retval, NULL); 12558 sizeof(test->data), NULL, NULL,
12559 &retval, NULL);
11634 if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { 12560 if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
11635 printf("Unexpected bpf_prog_test_run error\n"); 12561 printf("Unexpected bpf_prog_test_run error\n");
11636 goto fail_log; 12562 goto fail_log;
@@ -11713,6 +12639,11 @@ static void get_unpriv_disabled()
11713 FILE *fd; 12639 FILE *fd;
11714 12640
11715 fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r"); 12641 fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
12642 if (!fd) {
12643 perror("fopen /proc/sys/"UNPRIV_SYSCTL);
12644 unpriv_disabled = true;
12645 return;
12646 }
11716 if (fgets(buf, 2, fd) == buf && atoi(buf)) 12647 if (fgets(buf, 2, fd) == buf && atoi(buf))
11717 unpriv_disabled = true; 12648 unpriv_disabled = true;
11718 fclose(fd); 12649 fclose(fd);
@@ -11783,5 +12714,6 @@ int main(int argc, char **argv)
11783 return EXIT_FAILURE; 12714 return EXIT_FAILURE;
11784 } 12715 }
11785 12716
12717 bpf_semi_rand_init();
11786 return do_test(unpriv, from, to); 12718 return do_test(unpriv, from, to);
11787} 12719}
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
new file mode 100644
index 000000000000..3868dcb63420
--- /dev/null
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -0,0 +1,165 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5#include <assert.h>
6#include <errno.h>
7#include <poll.h>
8#include <unistd.h>
9#include <linux/perf_event.h>
10#include <sys/mman.h>
11#include "trace_helpers.h"
12
13#define MAX_SYMS 300000
14static struct ksym syms[MAX_SYMS];
15static int sym_cnt;
16
17static int ksym_cmp(const void *p1, const void *p2)
18{
19 return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
20}
21
22int load_kallsyms(void)
23{
24 FILE *f = fopen("/proc/kallsyms", "r");
25 char func[256], buf[256];
26 char symbol;
27 void *addr;
28 int i = 0;
29
30 if (!f)
31 return -ENOENT;
32
33 while (!feof(f)) {
34 if (!fgets(buf, sizeof(buf), f))
35 break;
36 if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
37 break;
38 if (!addr)
39 continue;
40 syms[i].addr = (long) addr;
41 syms[i].name = strdup(func);
42 i++;
43 }
44 sym_cnt = i;
45 qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
46 return 0;
47}
48
49struct ksym *ksym_search(long key)
50{
51 int start = 0, end = sym_cnt;
52 int result;
53
54 while (start < end) {
55 size_t mid = start + (end - start) / 2;
56
57 result = key - syms[mid].addr;
58 if (result < 0)
59 end = mid;
60 else if (result > 0)
61 start = mid + 1;
62 else
63 return &syms[mid];
64 }
65
66 if (start >= 1 && syms[start - 1].addr < key &&
67 key < syms[start].addr)
68 /* valid ksym */
69 return &syms[start - 1];
70
71 /* out of range. return _stext */
72 return &syms[0];
73}
74
75long ksym_get_addr(const char *name)
76{
77 int i;
78
79 for (i = 0; i < sym_cnt; i++) {
80 if (strcmp(syms[i].name, name) == 0)
81 return syms[i].addr;
82 }
83
84 return 0;
85}
86
87static int page_size;
88static int page_cnt = 8;
89static struct perf_event_mmap_page *header;
90
91int perf_event_mmap(int fd)
92{
93 void *base;
94 int mmap_size;
95
96 page_size = getpagesize();
97 mmap_size = page_size * (page_cnt + 1);
98
99 base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
100 if (base == MAP_FAILED) {
101 printf("mmap err\n");
102 return -1;
103 }
104
105 header = base;
106 return 0;
107}
108
109static int perf_event_poll(int fd)
110{
111 struct pollfd pfd = { .fd = fd, .events = POLLIN };
112
113 return poll(&pfd, 1, 1000);
114}
115
116struct perf_event_sample {
117 struct perf_event_header header;
118 __u32 size;
119 char data[];
120};
121
122static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv)
123{
124 struct perf_event_sample *e = event;
125 perf_event_print_fn fn = priv;
126 int ret;
127
128 if (e->header.type == PERF_RECORD_SAMPLE) {
129 ret = fn(e->data, e->size);
130 if (ret != LIBBPF_PERF_EVENT_CONT)
131 return ret;
132 } else if (e->header.type == PERF_RECORD_LOST) {
133 struct {
134 struct perf_event_header header;
135 __u64 id;
136 __u64 lost;
137 } *lost = (void *) e;
138 printf("lost %lld events\n", lost->lost);
139 } else {
140 printf("unknown event type=%d size=%d\n",
141 e->header.type, e->header.size);
142 }
143
144 return LIBBPF_PERF_EVENT_CONT;
145}
146
147int perf_event_poller(int fd, perf_event_print_fn output_fn)
148{
149 enum bpf_perf_event_ret ret;
150 void *buf = NULL;
151 size_t len = 0;
152
153 for (;;) {
154 perf_event_poll(fd);
155 ret = bpf_perf_event_read_simple(header, page_cnt * page_size,
156 page_size, &buf, &len,
157 bpf_perf_event_print,
158 output_fn);
159 if (ret != LIBBPF_PERF_EVENT_CONT)
160 break;
161 }
162 free(buf);
163
164 return ret;
165}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
new file mode 100644
index 000000000000..3b4bcf7f5084
--- /dev/null
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -0,0 +1,21 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __TRACE_HELPER_H
3#define __TRACE_HELPER_H
4
5#include <libbpf.h>
6
7struct ksym {
8 long addr;
9 char *name;
10};
11
12int load_kallsyms(void);
13struct ksym *ksym_search(long key);
14long ksym_get_addr(const char *name);
15
16typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
17
18int perf_event_mmap(int fd);
19/* return LIBBPF_PERF_EVENT_DONE or LIBBPF_PERF_EVENT_ERROR */
20int perf_event_poller(int fd, perf_event_print_fn output_fn);
21#endif
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index 4acfdebf36fa..9de8b7cb4e6d 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -6,15 +6,21 @@
6#include <stdlib.h> 6#include <stdlib.h>
7 7
8#define BUF_SIZE 256 8#define BUF_SIZE 256
9int main(void) 9
10int main(int argc, char *argv[])
10{ 11{
11 int fd = open("/dev/urandom", O_RDONLY); 12 int fd = open("/dev/urandom", O_RDONLY);
12 int i; 13 int i;
13 char buf[BUF_SIZE]; 14 char buf[BUF_SIZE];
15 int count = 4;
14 16
15 if (fd < 0) 17 if (fd < 0)
16 return 1; 18 return 1;
17 for (i = 0; i < 4; ++i) 19
20 if (argc == 2)
21 count = atoi(argv[1]);
22
23 for (i = 0; i < count; ++i)
18 read(fd, buf, BUF_SIZE); 24 read(fd, buf, BUF_SIZE);
19 25
20 close(fd); 26 close(fd);
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index 3fece06e9f64..f82dcc1f8841 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -143,10 +143,14 @@ void suspend(void)
143 int err; 143 int err;
144 struct itimerspec spec = {}; 144 struct itimerspec spec = {};
145 145
146 if (getuid() != 0)
147 ksft_exit_skip("Please run the test as root - Exiting.\n");
148
146 power_state_fd = open("/sys/power/state", O_RDWR); 149 power_state_fd = open("/sys/power/state", O_RDWR);
147 if (power_state_fd < 0) 150 if (power_state_fd < 0)
148 ksft_exit_fail_msg( 151 ksft_exit_fail_msg(
149 "open(\"/sys/power/state\") failed (is this test running as root?)\n"); 152 "open(\"/sys/power/state\") failed %s)\n",
153 strerror(errno));
150 154
151 timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0); 155 timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
152 if (timerfd < 0) 156 if (timerfd < 0)
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
new file mode 100644
index 000000000000..f7a31392eb2f
--- /dev/null
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -0,0 +1,10 @@
1# SPDX-License-Identifier: GPL-2.0
2CFLAGS += -Wall
3
4all:
5
6TEST_GEN_PROGS = test_memcontrol
7
8include ../lib.mk
9
10$(OUTPUT)/test_memcontrol: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
new file mode 100644
index 000000000000..1e9e3c470561
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -0,0 +1,331 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2
3#define _GNU_SOURCE
4
5#include <errno.h>
6#include <fcntl.h>
7#include <linux/limits.h>
8#include <signal.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <sys/stat.h>
13#include <sys/types.h>
14#include <sys/wait.h>
15#include <unistd.h>
16
17#include "cgroup_util.h"
18
19static ssize_t read_text(const char *path, char *buf, size_t max_len)
20{
21 ssize_t len;
22 int fd;
23
24 fd = open(path, O_RDONLY);
25 if (fd < 0)
26 return fd;
27
28 len = read(fd, buf, max_len - 1);
29 if (len < 0)
30 goto out;
31
32 buf[len] = 0;
33out:
34 close(fd);
35 return len;
36}
37
38static ssize_t write_text(const char *path, char *buf, ssize_t len)
39{
40 int fd;
41
42 fd = open(path, O_WRONLY | O_APPEND);
43 if (fd < 0)
44 return fd;
45
46 len = write(fd, buf, len);
47 if (len < 0) {
48 close(fd);
49 return len;
50 }
51
52 close(fd);
53
54 return len;
55}
56
57char *cg_name(const char *root, const char *name)
58{
59 size_t len = strlen(root) + strlen(name) + 2;
60 char *ret = malloc(len);
61
62 snprintf(ret, len, "%s/%s", root, name);
63
64 return ret;
65}
66
67char *cg_name_indexed(const char *root, const char *name, int index)
68{
69 size_t len = strlen(root) + strlen(name) + 10;
70 char *ret = malloc(len);
71
72 snprintf(ret, len, "%s/%s_%d", root, name, index);
73
74 return ret;
75}
76
77int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
78{
79 char path[PATH_MAX];
80
81 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
82
83 if (read_text(path, buf, len) >= 0)
84 return 0;
85
86 return -1;
87}
88
89int cg_read_strcmp(const char *cgroup, const char *control,
90 const char *expected)
91{
92 size_t size = strlen(expected) + 1;
93 char *buf;
94
95 buf = malloc(size);
96 if (!buf)
97 return -1;
98
99 if (cg_read(cgroup, control, buf, size))
100 return -1;
101
102 return strcmp(expected, buf);
103}
104
105int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
106{
107 char buf[PAGE_SIZE];
108
109 if (cg_read(cgroup, control, buf, sizeof(buf)))
110 return -1;
111
112 return strstr(buf, needle) ? 0 : -1;
113}
114
115long cg_read_long(const char *cgroup, const char *control)
116{
117 char buf[128];
118
119 if (cg_read(cgroup, control, buf, sizeof(buf)))
120 return -1;
121
122 return atol(buf);
123}
124
125long cg_read_key_long(const char *cgroup, const char *control, const char *key)
126{
127 char buf[PAGE_SIZE];
128 char *ptr;
129
130 if (cg_read(cgroup, control, buf, sizeof(buf)))
131 return -1;
132
133 ptr = strstr(buf, key);
134 if (!ptr)
135 return -1;
136
137 return atol(ptr + strlen(key));
138}
139
140int cg_write(const char *cgroup, const char *control, char *buf)
141{
142 char path[PATH_MAX];
143 ssize_t len = strlen(buf);
144
145 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
146
147 if (write_text(path, buf, len) == len)
148 return 0;
149
150 return -1;
151}
152
153int cg_find_unified_root(char *root, size_t len)
154{
155 char buf[10 * PAGE_SIZE];
156 char *fs, *mount, *type;
157 const char delim[] = "\n\t ";
158
159 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
160 return -1;
161
162 /*
163 * Example:
164 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
165 */
166 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
167 mount = strtok(NULL, delim);
168 type = strtok(NULL, delim);
169 strtok(NULL, delim);
170 strtok(NULL, delim);
171 strtok(NULL, delim);
172
173 if (strcmp(fs, "cgroup") == 0 &&
174 strcmp(type, "cgroup2") == 0) {
175 strncpy(root, mount, len);
176 return 0;
177 }
178 }
179
180 return -1;
181}
182
183int cg_create(const char *cgroup)
184{
185 return mkdir(cgroup, 0644);
186}
187
188static int cg_killall(const char *cgroup)
189{
190 char buf[PAGE_SIZE];
191 char *ptr = buf;
192
193 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
194 return -1;
195
196 while (ptr < buf + sizeof(buf)) {
197 int pid = strtol(ptr, &ptr, 10);
198
199 if (pid == 0)
200 break;
201 if (*ptr)
202 ptr++;
203 else
204 break;
205 if (kill(pid, SIGKILL))
206 return -1;
207 }
208
209 return 0;
210}
211
212int cg_destroy(const char *cgroup)
213{
214 int ret;
215
216retry:
217 ret = rmdir(cgroup);
218 if (ret && errno == EBUSY) {
219 ret = cg_killall(cgroup);
220 if (ret)
221 return ret;
222 usleep(100);
223 goto retry;
224 }
225
226 if (ret && errno == ENOENT)
227 ret = 0;
228
229 return ret;
230}
231
232int cg_run(const char *cgroup,
233 int (*fn)(const char *cgroup, void *arg),
234 void *arg)
235{
236 int pid, retcode;
237
238 pid = fork();
239 if (pid < 0) {
240 return pid;
241 } else if (pid == 0) {
242 char buf[64];
243
244 snprintf(buf, sizeof(buf), "%d", getpid());
245 if (cg_write(cgroup, "cgroup.procs", buf))
246 exit(EXIT_FAILURE);
247 exit(fn(cgroup, arg));
248 } else {
249 waitpid(pid, &retcode, 0);
250 if (WIFEXITED(retcode))
251 return WEXITSTATUS(retcode);
252 else
253 return -1;
254 }
255}
256
257int cg_run_nowait(const char *cgroup,
258 int (*fn)(const char *cgroup, void *arg),
259 void *arg)
260{
261 int pid;
262
263 pid = fork();
264 if (pid == 0) {
265 char buf[64];
266
267 snprintf(buf, sizeof(buf), "%d", getpid());
268 if (cg_write(cgroup, "cgroup.procs", buf))
269 exit(EXIT_FAILURE);
270 exit(fn(cgroup, arg));
271 }
272
273 return pid;
274}
275
276int get_temp_fd(void)
277{
278 return open(".", O_TMPFILE | O_RDWR | O_EXCL);
279}
280
281int alloc_pagecache(int fd, size_t size)
282{
283 char buf[PAGE_SIZE];
284 struct stat st;
285 int i;
286
287 if (fstat(fd, &st))
288 goto cleanup;
289
290 size += st.st_size;
291
292 if (ftruncate(fd, size))
293 goto cleanup;
294
295 for (i = 0; i < size; i += sizeof(buf))
296 read(fd, buf, sizeof(buf));
297
298 return 0;
299
300cleanup:
301 return -1;
302}
303
304int alloc_anon(const char *cgroup, void *arg)
305{
306 size_t size = (unsigned long)arg;
307 char *buf, *ptr;
308
309 buf = malloc(size);
310 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
311 *ptr = 0;
312
313 free(buf);
314 return 0;
315}
316
317int is_swap_enabled(void)
318{
319 char buf[PAGE_SIZE];
320 const char delim[] = "\n";
321 int cnt = 0;
322 char *line;
323
324 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
325 return -1;
326
327 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
328 cnt++;
329
330 return cnt > 1;
331}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
new file mode 100644
index 000000000000..fe82a297d4e0
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -0,0 +1,41 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <stdlib.h>
3
4#define PAGE_SIZE 4096
5
6#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
7
8#define MB(x) (x << 20)
9
10/*
11 * Checks if two given values differ by less than err% of their sum.
12 */
13static inline int values_close(long a, long b, int err)
14{
15 return abs(a - b) <= (a + b) / 100 * err;
16}
17
18extern int cg_find_unified_root(char *root, size_t len);
19extern char *cg_name(const char *root, const char *name);
20extern char *cg_name_indexed(const char *root, const char *name, int index);
21extern int cg_create(const char *cgroup);
22extern int cg_destroy(const char *cgroup);
23extern int cg_read(const char *cgroup, const char *control,
24 char *buf, size_t len);
25extern int cg_read_strcmp(const char *cgroup, const char *control,
26 const char *expected);
27extern int cg_read_strstr(const char *cgroup, const char *control,
28 const char *needle);
29extern long cg_read_long(const char *cgroup, const char *control);
30long cg_read_key_long(const char *cgroup, const char *control, const char *key);
31extern int cg_write(const char *cgroup, const char *control, char *buf);
32extern int cg_run(const char *cgroup,
33 int (*fn)(const char *cgroup, void *arg),
34 void *arg);
35extern int cg_run_nowait(const char *cgroup,
36 int (*fn)(const char *cgroup, void *arg),
37 void *arg);
38extern int get_temp_fd(void);
39extern int alloc_pagecache(int fd, size_t size);
40extern int alloc_anon(const char *cgroup, void *arg);
41extern int is_swap_enabled(void);
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
new file mode 100644
index 000000000000..cf0bddc9d271
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -0,0 +1,1015 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#define _GNU_SOURCE
3
4#include <linux/limits.h>
5#include <fcntl.h>
6#include <stdio.h>
7#include <stdlib.h>
8#include <string.h>
9#include <sys/stat.h>
10#include <sys/types.h>
11#include <unistd.h>
12#include <sys/socket.h>
13#include <sys/wait.h>
14#include <arpa/inet.h>
15#include <netinet/in.h>
16#include <netdb.h>
17#include <errno.h>
18
19#include "../kselftest.h"
20#include "cgroup_util.h"
21
22/*
23 * This test creates two nested cgroups with and without enabling
24 * the memory controller.
25 */
26static int test_memcg_subtree_control(const char *root)
27{
28 char *parent, *child, *parent2, *child2;
29 int ret = KSFT_FAIL;
30 char buf[PAGE_SIZE];
31
32 /* Create two nested cgroups with the memory controller enabled */
33 parent = cg_name(root, "memcg_test_0");
34 child = cg_name(root, "memcg_test_0/memcg_test_1");
35 if (!parent || !child)
36 goto cleanup;
37
38 if (cg_create(parent))
39 goto cleanup;
40
41 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
42 goto cleanup;
43
44 if (cg_create(child))
45 goto cleanup;
46
47 if (cg_read_strstr(child, "cgroup.controllers", "memory"))
48 goto cleanup;
49
50 /* Create two nested cgroups without enabling memory controller */
51 parent2 = cg_name(root, "memcg_test_1");
52 child2 = cg_name(root, "memcg_test_1/memcg_test_1");
53 if (!parent2 || !child2)
54 goto cleanup;
55
56 if (cg_create(parent2))
57 goto cleanup;
58
59 if (cg_create(child2))
60 goto cleanup;
61
62 if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
63 goto cleanup;
64
65 if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
66 goto cleanup;
67
68 ret = KSFT_PASS;
69
70cleanup:
71 cg_destroy(child);
72 cg_destroy(parent);
73 free(parent);
74 free(child);
75
76 cg_destroy(child2);
77 cg_destroy(parent2);
78 free(parent2);
79 free(child2);
80
81 return ret;
82}
83
84static int alloc_anon_50M_check(const char *cgroup, void *arg)
85{
86 size_t size = MB(50);
87 char *buf, *ptr;
88 long anon, current;
89 int ret = -1;
90
91 buf = malloc(size);
92 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
93 *ptr = 0;
94
95 current = cg_read_long(cgroup, "memory.current");
96 if (current < size)
97 goto cleanup;
98
99 if (!values_close(size, current, 3))
100 goto cleanup;
101
102 anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
103 if (anon < 0)
104 goto cleanup;
105
106 if (!values_close(anon, current, 3))
107 goto cleanup;
108
109 ret = 0;
110cleanup:
111 free(buf);
112 return ret;
113}
114
115static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
116{
117 size_t size = MB(50);
118 int ret = -1;
119 long current, file;
120 int fd;
121
122 fd = get_temp_fd();
123 if (fd < 0)
124 return -1;
125
126 if (alloc_pagecache(fd, size))
127 goto cleanup;
128
129 current = cg_read_long(cgroup, "memory.current");
130 if (current < size)
131 goto cleanup;
132
133 file = cg_read_key_long(cgroup, "memory.stat", "file ");
134 if (file < 0)
135 goto cleanup;
136
137 if (!values_close(file, current, 10))
138 goto cleanup;
139
140 ret = 0;
141
142cleanup:
143 close(fd);
144 return ret;
145}
146
147/*
148 * This test create a memory cgroup, allocates
149 * some anonymous memory and some pagecache
150 * and check memory.current and some memory.stat values.
151 */
152static int test_memcg_current(const char *root)
153{
154 int ret = KSFT_FAIL;
155 long current;
156 char *memcg;
157
158 memcg = cg_name(root, "memcg_test");
159 if (!memcg)
160 goto cleanup;
161
162 if (cg_create(memcg))
163 goto cleanup;
164
165 current = cg_read_long(memcg, "memory.current");
166 if (current != 0)
167 goto cleanup;
168
169 if (cg_run(memcg, alloc_anon_50M_check, NULL))
170 goto cleanup;
171
172 if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
173 goto cleanup;
174
175 ret = KSFT_PASS;
176
177cleanup:
178 cg_destroy(memcg);
179 free(memcg);
180
181 return ret;
182}
183
184static int alloc_pagecache_50M(const char *cgroup, void *arg)
185{
186 int fd = (long)arg;
187
188 return alloc_pagecache(fd, MB(50));
189}
190
191static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
192{
193 int fd = (long)arg;
194 int ppid = getppid();
195
196 if (alloc_pagecache(fd, MB(50)))
197 return -1;
198
199 while (getppid() == ppid)
200 sleep(1);
201
202 return 0;
203}
204
205/*
206 * First, this test creates the following hierarchy:
207 * A memory.min = 50M, memory.max = 200M
208 * A/B memory.min = 50M, memory.current = 50M
209 * A/B/C memory.min = 75M, memory.current = 50M
210 * A/B/D memory.min = 25M, memory.current = 50M
211 * A/B/E memory.min = 500M, memory.current = 0
212 * A/B/F memory.min = 0, memory.current = 50M
213 *
214 * Usages are pagecache, but the test keeps a running
215 * process in every leaf cgroup.
216 * Then it creates A/G and creates a significant
217 * memory pressure in it.
218 *
219 * A/B memory.current ~= 50M
220 * A/B/C memory.current ~= 33M
221 * A/B/D memory.current ~= 17M
222 * A/B/E memory.current ~= 0
223 *
224 * After that it tries to allocate more than there is
225 * unprotected memory in A available, and checks
226 * checks that memory.min protects pagecache even
227 * in this case.
228 */
229static int test_memcg_min(const char *root)
230{
231 int ret = KSFT_FAIL;
232 char *parent[3] = {NULL};
233 char *children[4] = {NULL};
234 long c[4];
235 int i, attempts;
236 int fd;
237
238 fd = get_temp_fd();
239 if (fd < 0)
240 goto cleanup;
241
242 parent[0] = cg_name(root, "memcg_test_0");
243 if (!parent[0])
244 goto cleanup;
245
246 parent[1] = cg_name(parent[0], "memcg_test_1");
247 if (!parent[1])
248 goto cleanup;
249
250 parent[2] = cg_name(parent[0], "memcg_test_2");
251 if (!parent[2])
252 goto cleanup;
253
254 if (cg_create(parent[0]))
255 goto cleanup;
256
257 if (cg_read_long(parent[0], "memory.min")) {
258 ret = KSFT_SKIP;
259 goto cleanup;
260 }
261
262 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
263 goto cleanup;
264
265 if (cg_write(parent[0], "memory.max", "200M"))
266 goto cleanup;
267
268 if (cg_write(parent[0], "memory.swap.max", "0"))
269 goto cleanup;
270
271 if (cg_create(parent[1]))
272 goto cleanup;
273
274 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
275 goto cleanup;
276
277 if (cg_create(parent[2]))
278 goto cleanup;
279
280 for (i = 0; i < ARRAY_SIZE(children); i++) {
281 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
282 if (!children[i])
283 goto cleanup;
284
285 if (cg_create(children[i]))
286 goto cleanup;
287
288 if (i == 2)
289 continue;
290
291 cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
292 (void *)(long)fd);
293 }
294
295 if (cg_write(parent[0], "memory.min", "50M"))
296 goto cleanup;
297 if (cg_write(parent[1], "memory.min", "50M"))
298 goto cleanup;
299 if (cg_write(children[0], "memory.min", "75M"))
300 goto cleanup;
301 if (cg_write(children[1], "memory.min", "25M"))
302 goto cleanup;
303 if (cg_write(children[2], "memory.min", "500M"))
304 goto cleanup;
305 if (cg_write(children[3], "memory.min", "0"))
306 goto cleanup;
307
308 attempts = 0;
309 while (!values_close(cg_read_long(parent[1], "memory.current"),
310 MB(150), 3)) {
311 if (attempts++ > 5)
312 break;
313 sleep(1);
314 }
315
316 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
317 goto cleanup;
318
319 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
320 goto cleanup;
321
322 for (i = 0; i < ARRAY_SIZE(children); i++)
323 c[i] = cg_read_long(children[i], "memory.current");
324
325 if (!values_close(c[0], MB(33), 10))
326 goto cleanup;
327
328 if (!values_close(c[1], MB(17), 10))
329 goto cleanup;
330
331 if (!values_close(c[2], 0, 1))
332 goto cleanup;
333
334 if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
335 goto cleanup;
336
337 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
338 goto cleanup;
339
340 ret = KSFT_PASS;
341
342cleanup:
343 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
344 if (!children[i])
345 continue;
346
347 cg_destroy(children[i]);
348 free(children[i]);
349 }
350
351 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
352 if (!parent[i])
353 continue;
354
355 cg_destroy(parent[i]);
356 free(parent[i]);
357 }
358 close(fd);
359 return ret;
360}
361
362/*
363 * First, this test creates the following hierarchy:
364 * A memory.low = 50M, memory.max = 200M
365 * A/B memory.low = 50M, memory.current = 50M
366 * A/B/C memory.low = 75M, memory.current = 50M
367 * A/B/D memory.low = 25M, memory.current = 50M
368 * A/B/E memory.low = 500M, memory.current = 0
369 * A/B/F memory.low = 0, memory.current = 50M
370 *
371 * Usages are pagecache.
372 * Then it creates A/G an creates a significant
373 * memory pressure in it.
374 *
375 * Then it checks actual memory usages and expects that:
376 * A/B memory.current ~= 50M
377 * A/B/ memory.current ~= 33M
378 * A/B/D memory.current ~= 17M
379 * A/B/E memory.current ~= 0
380 *
381 * After that it tries to allocate more than there is
382 * unprotected memory in A available,
383 * and checks low and oom events in memory.events.
384 */
385static int test_memcg_low(const char *root)
386{
387 int ret = KSFT_FAIL;
388 char *parent[3] = {NULL};
389 char *children[4] = {NULL};
390 long low, oom;
391 long c[4];
392 int i;
393 int fd;
394
395 fd = get_temp_fd();
396 if (fd < 0)
397 goto cleanup;
398
399 parent[0] = cg_name(root, "memcg_test_0");
400 if (!parent[0])
401 goto cleanup;
402
403 parent[1] = cg_name(parent[0], "memcg_test_1");
404 if (!parent[1])
405 goto cleanup;
406
407 parent[2] = cg_name(parent[0], "memcg_test_2");
408 if (!parent[2])
409 goto cleanup;
410
411 if (cg_create(parent[0]))
412 goto cleanup;
413
414 if (cg_read_long(parent[0], "memory.low"))
415 goto cleanup;
416
417 if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
418 goto cleanup;
419
420 if (cg_write(parent[0], "memory.max", "200M"))
421 goto cleanup;
422
423 if (cg_write(parent[0], "memory.swap.max", "0"))
424 goto cleanup;
425
426 if (cg_create(parent[1]))
427 goto cleanup;
428
429 if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
430 goto cleanup;
431
432 if (cg_create(parent[2]))
433 goto cleanup;
434
435 for (i = 0; i < ARRAY_SIZE(children); i++) {
436 children[i] = cg_name_indexed(parent[1], "child_memcg", i);
437 if (!children[i])
438 goto cleanup;
439
440 if (cg_create(children[i]))
441 goto cleanup;
442
443 if (i == 2)
444 continue;
445
446 if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
447 goto cleanup;
448 }
449
450 if (cg_write(parent[0], "memory.low", "50M"))
451 goto cleanup;
452 if (cg_write(parent[1], "memory.low", "50M"))
453 goto cleanup;
454 if (cg_write(children[0], "memory.low", "75M"))
455 goto cleanup;
456 if (cg_write(children[1], "memory.low", "25M"))
457 goto cleanup;
458 if (cg_write(children[2], "memory.low", "500M"))
459 goto cleanup;
460 if (cg_write(children[3], "memory.low", "0"))
461 goto cleanup;
462
463 if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
464 goto cleanup;
465
466 if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
467 goto cleanup;
468
469 for (i = 0; i < ARRAY_SIZE(children); i++)
470 c[i] = cg_read_long(children[i], "memory.current");
471
472 if (!values_close(c[0], MB(33), 10))
473 goto cleanup;
474
475 if (!values_close(c[1], MB(17), 10))
476 goto cleanup;
477
478 if (!values_close(c[2], 0, 1))
479 goto cleanup;
480
481 if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
482 fprintf(stderr,
483 "memory.low prevents from allocating anon memory\n");
484 goto cleanup;
485 }
486
487 for (i = 0; i < ARRAY_SIZE(children); i++) {
488 oom = cg_read_key_long(children[i], "memory.events", "oom ");
489 low = cg_read_key_long(children[i], "memory.events", "low ");
490
491 if (oom)
492 goto cleanup;
493 if (i < 2 && low <= 0)
494 goto cleanup;
495 if (i >= 2 && low)
496 goto cleanup;
497 }
498
499 ret = KSFT_PASS;
500
501cleanup:
502 for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
503 if (!children[i])
504 continue;
505
506 cg_destroy(children[i]);
507 free(children[i]);
508 }
509
510 for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
511 if (!parent[i])
512 continue;
513
514 cg_destroy(parent[i]);
515 free(parent[i]);
516 }
517 close(fd);
518 return ret;
519}
520
521static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
522{
523 size_t size = MB(50);
524 int ret = -1;
525 long current;
526 int fd;
527
528 fd = get_temp_fd();
529 if (fd < 0)
530 return -1;
531
532 if (alloc_pagecache(fd, size))
533 goto cleanup;
534
535 current = cg_read_long(cgroup, "memory.current");
536 if (current <= MB(29) || current > MB(30))
537 goto cleanup;
538
539 ret = 0;
540
541cleanup:
542 close(fd);
543 return ret;
544
545}
546
547/*
548 * This test checks that memory.high limits the amount of
549 * memory which can be consumed by either anonymous memory
550 * or pagecache.
551 */
552static int test_memcg_high(const char *root)
553{
554 int ret = KSFT_FAIL;
555 char *memcg;
556 long high;
557
558 memcg = cg_name(root, "memcg_test");
559 if (!memcg)
560 goto cleanup;
561
562 if (cg_create(memcg))
563 goto cleanup;
564
565 if (cg_read_strcmp(memcg, "memory.high", "max\n"))
566 goto cleanup;
567
568 if (cg_write(memcg, "memory.swap.max", "0"))
569 goto cleanup;
570
571 if (cg_write(memcg, "memory.high", "30M"))
572 goto cleanup;
573
574 if (cg_run(memcg, alloc_anon, (void *)MB(100)))
575 goto cleanup;
576
577 if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
578 goto cleanup;
579
580 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
581 goto cleanup;
582
583 high = cg_read_key_long(memcg, "memory.events", "high ");
584 if (high <= 0)
585 goto cleanup;
586
587 ret = KSFT_PASS;
588
589cleanup:
590 cg_destroy(memcg);
591 free(memcg);
592
593 return ret;
594}
595
596/*
597 * This test checks that memory.max limits the amount of
598 * memory which can be consumed by either anonymous memory
599 * or pagecache.
600 */
601static int test_memcg_max(const char *root)
602{
603 int ret = KSFT_FAIL;
604 char *memcg;
605 long current, max;
606
607 memcg = cg_name(root, "memcg_test");
608 if (!memcg)
609 goto cleanup;
610
611 if (cg_create(memcg))
612 goto cleanup;
613
614 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
615 goto cleanup;
616
617 if (cg_write(memcg, "memory.swap.max", "0"))
618 goto cleanup;
619
620 if (cg_write(memcg, "memory.max", "30M"))
621 goto cleanup;
622
623 /* Should be killed by OOM killer */
624 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
625 goto cleanup;
626
627 if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
628 goto cleanup;
629
630 current = cg_read_long(memcg, "memory.current");
631 if (current > MB(30) || !current)
632 goto cleanup;
633
634 max = cg_read_key_long(memcg, "memory.events", "max ");
635 if (max <= 0)
636 goto cleanup;
637
638 ret = KSFT_PASS;
639
640cleanup:
641 cg_destroy(memcg);
642 free(memcg);
643
644 return ret;
645}
646
647static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
648{
649 long mem_max = (long)arg;
650 size_t size = MB(50);
651 char *buf, *ptr;
652 long mem_current, swap_current;
653 int ret = -1;
654
655 buf = malloc(size);
656 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
657 *ptr = 0;
658
659 mem_current = cg_read_long(cgroup, "memory.current");
660 if (!mem_current || !values_close(mem_current, mem_max, 3))
661 goto cleanup;
662
663 swap_current = cg_read_long(cgroup, "memory.swap.current");
664 if (!swap_current ||
665 !values_close(mem_current + swap_current, size, 3))
666 goto cleanup;
667
668 ret = 0;
669cleanup:
670 free(buf);
671 return ret;
672}
673
674/*
675 * This test checks that memory.swap.max limits the amount of
676 * anonymous memory which can be swapped out.
677 */
678static int test_memcg_swap_max(const char *root)
679{
680 int ret = KSFT_FAIL;
681 char *memcg;
682 long max;
683
684 if (!is_swap_enabled())
685 return KSFT_SKIP;
686
687 memcg = cg_name(root, "memcg_test");
688 if (!memcg)
689 goto cleanup;
690
691 if (cg_create(memcg))
692 goto cleanup;
693
694 if (cg_read_long(memcg, "memory.swap.current")) {
695 ret = KSFT_SKIP;
696 goto cleanup;
697 }
698
699 if (cg_read_strcmp(memcg, "memory.max", "max\n"))
700 goto cleanup;
701
702 if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
703 goto cleanup;
704
705 if (cg_write(memcg, "memory.swap.max", "30M"))
706 goto cleanup;
707
708 if (cg_write(memcg, "memory.max", "30M"))
709 goto cleanup;
710
711 /* Should be killed by OOM killer */
712 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
713 goto cleanup;
714
715 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
716 goto cleanup;
717
718 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
719 goto cleanup;
720
721 if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
722 goto cleanup;
723
724 max = cg_read_key_long(memcg, "memory.events", "max ");
725 if (max <= 0)
726 goto cleanup;
727
728 ret = KSFT_PASS;
729
730cleanup:
731 cg_destroy(memcg);
732 free(memcg);
733
734 return ret;
735}
736
737/*
738 * This test disables swapping and tries to allocate anonymous memory
739 * up to OOM. Then it checks for oom and oom_kill events in
740 * memory.events.
741 */
742static int test_memcg_oom_events(const char *root)
743{
744 int ret = KSFT_FAIL;
745 char *memcg;
746
747 memcg = cg_name(root, "memcg_test");
748 if (!memcg)
749 goto cleanup;
750
751 if (cg_create(memcg))
752 goto cleanup;
753
754 if (cg_write(memcg, "memory.max", "30M"))
755 goto cleanup;
756
757 if (cg_write(memcg, "memory.swap.max", "0"))
758 goto cleanup;
759
760 if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
761 goto cleanup;
762
763 if (cg_read_strcmp(memcg, "cgroup.procs", ""))
764 goto cleanup;
765
766 if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
767 goto cleanup;
768
769 if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
770 goto cleanup;
771
772 ret = KSFT_PASS;
773
774cleanup:
775 cg_destroy(memcg);
776 free(memcg);
777
778 return ret;
779}
780
781struct tcp_server_args {
782 unsigned short port;
783 int ctl[2];
784};
785
786static int tcp_server(const char *cgroup, void *arg)
787{
788 struct tcp_server_args *srv_args = arg;
789 struct sockaddr_in6 saddr = { 0 };
790 socklen_t slen = sizeof(saddr);
791 int sk, client_sk, ctl_fd, yes = 1, ret = -1;
792
793 close(srv_args->ctl[0]);
794 ctl_fd = srv_args->ctl[1];
795
796 saddr.sin6_family = AF_INET6;
797 saddr.sin6_addr = in6addr_any;
798 saddr.sin6_port = htons(srv_args->port);
799
800 sk = socket(AF_INET6, SOCK_STREAM, 0);
801 if (sk < 0)
802 return ret;
803
804 if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
805 goto cleanup;
806
807 if (bind(sk, (struct sockaddr *)&saddr, slen)) {
808 write(ctl_fd, &errno, sizeof(errno));
809 goto cleanup;
810 }
811
812 if (listen(sk, 1))
813 goto cleanup;
814
815 ret = 0;
816 if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
817 ret = -1;
818 goto cleanup;
819 }
820
821 client_sk = accept(sk, NULL, NULL);
822 if (client_sk < 0)
823 goto cleanup;
824
825 ret = -1;
826 for (;;) {
827 uint8_t buf[0x100000];
828
829 if (write(client_sk, buf, sizeof(buf)) <= 0) {
830 if (errno == ECONNRESET)
831 ret = 0;
832 break;
833 }
834 }
835
836 close(client_sk);
837
838cleanup:
839 close(sk);
840 return ret;
841}
842
843static int tcp_client(const char *cgroup, unsigned short port)
844{
845 const char server[] = "localhost";
846 struct addrinfo *ai;
847 char servport[6];
848 int retries = 0x10; /* nice round number */
849 int sk, ret;
850
851 snprintf(servport, sizeof(servport), "%hd", port);
852 ret = getaddrinfo(server, servport, NULL, &ai);
853 if (ret)
854 return ret;
855
856 sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
857 if (sk < 0)
858 goto free_ainfo;
859
860 ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
861 if (ret < 0)
862 goto close_sk;
863
864 ret = KSFT_FAIL;
865 while (retries--) {
866 uint8_t buf[0x100000];
867 long current, sock;
868
869 if (read(sk, buf, sizeof(buf)) <= 0)
870 goto close_sk;
871
872 current = cg_read_long(cgroup, "memory.current");
873 sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
874
875 if (current < 0 || sock < 0)
876 goto close_sk;
877
878 if (current < sock)
879 goto close_sk;
880
881 if (values_close(current, sock, 10)) {
882 ret = KSFT_PASS;
883 break;
884 }
885 }
886
887close_sk:
888 close(sk);
889free_ainfo:
890 freeaddrinfo(ai);
891 return ret;
892}
893
894/*
895 * This test checks socket memory accounting.
896 * The test forks a TCP server listens on a random port between 1000
897 * and 61000. Once it gets a client connection, it starts writing to
898 * its socket.
899 * The TCP client interleaves reads from the socket with check whether
900 * memory.current and memory.stat.sock are similar.
901 */
902static int test_memcg_sock(const char *root)
903{
904 int bind_retries = 5, ret = KSFT_FAIL, pid, err;
905 unsigned short port;
906 char *memcg;
907
908 memcg = cg_name(root, "memcg_test");
909 if (!memcg)
910 goto cleanup;
911
912 if (cg_create(memcg))
913 goto cleanup;
914
915 while (bind_retries--) {
916 struct tcp_server_args args;
917
918 if (pipe(args.ctl))
919 goto cleanup;
920
921 port = args.port = 1000 + rand() % 60000;
922
923 pid = cg_run_nowait(memcg, tcp_server, &args);
924 if (pid < 0)
925 goto cleanup;
926
927 close(args.ctl[1]);
928 if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
929 goto cleanup;
930 close(args.ctl[0]);
931
932 if (!err)
933 break;
934 if (err != EADDRINUSE)
935 goto cleanup;
936
937 waitpid(pid, NULL, 0);
938 }
939
940 if (err == EADDRINUSE) {
941 ret = KSFT_SKIP;
942 goto cleanup;
943 }
944
945 if (tcp_client(memcg, port) != KSFT_PASS)
946 goto cleanup;
947
948 waitpid(pid, &err, 0);
949 if (WEXITSTATUS(err))
950 goto cleanup;
951
952 if (cg_read_long(memcg, "memory.current") < 0)
953 goto cleanup;
954
955 if (cg_read_key_long(memcg, "memory.stat", "sock "))
956 goto cleanup;
957
958 ret = KSFT_PASS;
959
960cleanup:
961 cg_destroy(memcg);
962 free(memcg);
963
964 return ret;
965}
966
967#define T(x) { x, #x }
968struct memcg_test {
969 int (*fn)(const char *root);
970 const char *name;
971} tests[] = {
972 T(test_memcg_subtree_control),
973 T(test_memcg_current),
974 T(test_memcg_min),
975 T(test_memcg_low),
976 T(test_memcg_high),
977 T(test_memcg_max),
978 T(test_memcg_oom_events),
979 T(test_memcg_swap_max),
980 T(test_memcg_sock),
981};
982#undef T
983
984int main(int argc, char **argv)
985{
986 char root[PATH_MAX];
987 int i, ret = EXIT_SUCCESS;
988
989 if (cg_find_unified_root(root, sizeof(root)))
990 ksft_exit_skip("cgroup v2 isn't mounted\n");
991
992 /*
993 * Check that memory controller is available:
994 * memory is listed in cgroup.controllers
995 */
996 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
997 ksft_exit_skip("memory controller isn't available\n");
998
999 for (i = 0; i < ARRAY_SIZE(tests); i++) {
1000 switch (tests[i].fn(root)) {
1001 case KSFT_PASS:
1002 ksft_test_result_pass("%s\n", tests[i].name);
1003 break;
1004 case KSFT_SKIP:
1005 ksft_test_result_skip("%s\n", tests[i].name);
1006 break;
1007 default:
1008 ret = EXIT_FAILURE;
1009 ksft_test_result_fail("%s\n", tests[i].name);
1010 break;
1011 }
1012 }
1013
1014 return ret;
1015}
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
index f3a8933c1275..bab13dd025a6 100755
--- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -2,6 +2,8 @@
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3 3
4SYSFS= 4SYSFS=
5# Kselftest framework requirement - SKIP code is 4.
6ksft_skip=4
5 7
6prerequisite() 8prerequisite()
7{ 9{
@@ -9,7 +11,7 @@ prerequisite()
9 11
10 if [ $UID != 0 ]; then 12 if [ $UID != 0 ]; then
11 echo $msg must be run as root >&2 13 echo $msg must be run as root >&2
12 exit 0 14 exit $ksft_skip
13 fi 15 fi
14 16
15 taskset -p 01 $$ 17 taskset -p 01 $$
@@ -18,12 +20,12 @@ prerequisite()
18 20
19 if [ ! -d "$SYSFS" ]; then 21 if [ ! -d "$SYSFS" ]; then
20 echo $msg sysfs is not mounted >&2 22 echo $msg sysfs is not mounted >&2
21 exit 0 23 exit $ksft_skip
22 fi 24 fi
23 25
24 if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then 26 if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then
25 echo $msg cpu hotplug is not supported >&2 27 echo $msg cpu hotplug is not supported >&2
26 exit 0 28 exit $ksft_skip
27 fi 29 fi
28 30
29 echo "CPU online/offline summary:" 31 echo "CPU online/offline summary:"
@@ -32,7 +34,7 @@ prerequisite()
32 34
33 if [[ "$online_cpus" = "$online_max" ]]; then 35 if [[ "$online_cpus" = "$online_max" ]]; then
34 echo "$msg: since there is only one cpu: $online_cpus" 36 echo "$msg: since there is only one cpu: $online_cpus"
35 exit 0 37 exit $ksft_skip
36 fi 38 fi
37 39
38 echo -e "\t Cpus in online state: $online_cpus" 40 echo -e "\t Cpus in online state: $online_cpus"
@@ -237,12 +239,12 @@ prerequisite_extra()
237 239
238 if [ ! -d "$DEBUGFS" ]; then 240 if [ ! -d "$DEBUGFS" ]; then
239 echo $msg debugfs is not mounted >&2 241 echo $msg debugfs is not mounted >&2
240 exit 0 242 exit $ksft_skip
241 fi 243 fi
242 244
243 if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then 245 if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
244 echo $msg cpu-notifier-error-inject module is not available >&2 246 echo $msg cpu-notifier-error-inject module is not available >&2
245 exit 0 247 exit $ksft_skip
246 fi 248 fi
247} 249}
248 250
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
index d83922de9d89..31f8c9a76c5f 100755
--- a/tools/testing/selftests/cpufreq/main.sh
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -13,6 +13,9 @@ SYSFS=
13CPUROOT= 13CPUROOT=
14CPUFREQROOT= 14CPUFREQROOT=
15 15
16# Kselftest framework requirement - SKIP code is 4.
17ksft_skip=4
18
16helpme() 19helpme()
17{ 20{
18 printf "Usage: $0 [-h] [-todg args] 21 printf "Usage: $0 [-h] [-todg args]
@@ -38,7 +41,7 @@ prerequisite()
38 41
39 if [ $UID != 0 ]; then 42 if [ $UID != 0 ]; then
40 echo $msg must be run as root >&2 43 echo $msg must be run as root >&2
41 exit 2 44 exit $ksft_skip
42 fi 45 fi
43 46
44 taskset -p 01 $$ 47 taskset -p 01 $$
diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
new file mode 100755
index 000000000000..1893d0f59ad7
--- /dev/null
+++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
@@ -0,0 +1,198 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# Kselftest framework requirement - SKIP code is 4.
5ksft_skip=4
6
7usage() { echo "usbip_test.sh -b <busid> -p <usbip tools path>"; exit 1; }
8
9while getopts "h:b:p:" arg; do
10 case "${arg}" in
11 h)
12 usage
13 ;;
14 b)
15 busid=${OPTARG}
16 ;;
17 p)
18 tools_path=${OPTARG}
19 ;;
20 *)
21 usage
22 ;;
23 esac
24done
25shift $((OPTIND-1))
26
27if [ -z "${busid}" ]; then
28 usage
29fi
30
31echo "Running USB over IP Testing on $busid";
32
33test_end_msg="End of USB over IP Testing on $busid"
34
35if [ $UID != 0 ]; then
36 echo "Please run usbip_test as root [SKIP]"
37 echo $test_end_msg
38 exit $ksft_skip
39fi
40
41echo "Load usbip_host module"
42if ! /sbin/modprobe -q -n usbip_host; then
43 echo "usbip_test: module usbip_host is not found [SKIP]"
44 echo $test_end_msg
45 exit $ksft_skip
46fi
47
48if /sbin/modprobe -q usbip_host; then
49 /sbin/modprobe -q -r test_bitmap
50 echo "usbip_test: module usbip_host is loaded [OK]"
51else
52 echo "usbip_test: module usbip_host failed to load [FAIL]"
53 echo $test_end_msg
54 exit 1
55fi
56
57echo "Load vhci_hcd module"
58if /sbin/modprobe -q vhci_hcd; then
59 /sbin/modprobe -q -r test_bitmap
60 echo "usbip_test: module vhci_hcd is loaded [OK]"
61else
62 echo "usbip_test: module vhci_hcd failed to load [FAIL]"
63 echo $test_end_msg
64 exit 1
65fi
66echo "=============================================================="
67
68cd $tools_path;
69
70if [ ! -f src/usbip ]; then
71 echo "Please build usbip tools"
72 echo $test_end_msg
73 exit $ksft_skip
74fi
75
76echo "Expect to see export-able devices";
77src/usbip list -l;
78echo "=============================================================="
79
80echo "Run lsusb to see all usb devices"
81lsusb -t;
82echo "=============================================================="
83
84src/usbipd -D;
85
86echo "Get exported devices from localhost - expect to see none";
87src/usbip list -r localhost;
88echo "=============================================================="
89
90echo "bind devices";
91src/usbip bind -b $busid;
92echo "=============================================================="
93
94echo "Run lsusb - bound devices should be under usbip_host control"
95lsusb -t;
96echo "=============================================================="
97
98echo "bind devices - expect already bound messages"
99src/usbip bind -b $busid;
100echo "=============================================================="
101
102echo "Get exported devices from localhost - expect to see exported devices";
103src/usbip list -r localhost;
104echo "=============================================================="
105
106echo "unbind devices";
107src/usbip unbind -b $busid;
108echo "=============================================================="
109
110echo "Run lsusb - bound devices should be rebound to original drivers"
111lsusb -t;
112echo "=============================================================="
113
114echo "unbind devices - expect no devices bound message";
115src/usbip unbind -b $busid;
116echo "=============================================================="
117
118echo "Get exported devices from localhost - expect to see none";
119src/usbip list -r localhost;
120echo "=============================================================="
121
122echo "List imported devices - expect to see none";
123src/usbip port;
124echo "=============================================================="
125
126echo "Import devices from localhost - should fail with no devices"
127src/usbip attach -r localhost -b $busid;
128echo "=============================================================="
129
130echo "bind devices";
131src/usbip bind -b $busid;
132echo "=============================================================="
133
134echo "List imported devices - expect to see exported devices";
135src/usbip list -r localhost;
136echo "=============================================================="
137
138echo "List imported devices - expect to see none";
139src/usbip port;
140echo "=============================================================="
141
142echo "Import devices from localhost - should work"
143src/usbip attach -r localhost -b $busid;
144echo "=============================================================="
145
146echo "List imported devices - expect to see imported devices";
147src/usbip port;
148echo "=============================================================="
149
150echo "Import devices from localhost - expect already imported messages"
151src/usbip attach -r localhost -b $busid;
152echo "=============================================================="
153
154echo "Un-import devices";
155src/usbip detach -p 00;
156src/usbip detach -p 01;
157echo "=============================================================="
158
159echo "List imported devices - expect to see none";
160src/usbip port;
161echo "=============================================================="
162
163echo "Un-import devices - expect no devices to detach messages";
164src/usbip detach -p 00;
165src/usbip detach -p 01;
166echo "=============================================================="
167
168echo "Detach invalid port tests - expect invalid port error message";
169src/usbip detach -p 100;
170echo "=============================================================="
171
172echo "Expect to see export-able devices";
173src/usbip list -l;
174echo "=============================================================="
175
176echo "Remove usbip_host module";
177rmmod usbip_host;
178
179echo "Run lsusb - bound devices should be rebound to original drivers"
180lsusb -t;
181echo "=============================================================="
182
183echo "Run bind without usbip_host - expect fail"
184src/usbip bind -b $busid;
185echo "=============================================================="
186
187echo "Run lsusb - devices that failed to bind aren't bound to any driver"
188lsusb -t;
189echo "=============================================================="
190
191echo "modprobe usbip_host - does it work?"
192/sbin/modprobe usbip_host
193echo "Should see -busid- is not in match_busid table... skip! dmesg"
194echo "=============================================================="
195dmesg | grep "is not in match_busid table"
196echo "=============================================================="
197
198echo $test_end_msg
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
index c6d5790575ae..a47029a799d2 100755
--- a/tools/testing/selftests/efivarfs/efivarfs.sh
+++ b/tools/testing/selftests/efivarfs/efivarfs.sh
@@ -4,18 +4,21 @@
4efivarfs_mount=/sys/firmware/efi/efivars 4efivarfs_mount=/sys/firmware/efi/efivars
5test_guid=210be57c-9849-4fc7-a635-e6382d1aec27 5test_guid=210be57c-9849-4fc7-a635-e6382d1aec27
6 6
7# Kselftest framework requirement - SKIP code is 4.
8ksft_skip=4
9
7check_prereqs() 10check_prereqs()
8{ 11{
9 local msg="skip all tests:" 12 local msg="skip all tests:"
10 13
11 if [ $UID != 0 ]; then 14 if [ $UID != 0 ]; then
12 echo $msg must be run as root >&2 15 echo $msg must be run as root >&2
13 exit 0 16 exit $ksft_skip
14 fi 17 fi
15 18
16 if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then 19 if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then
17 echo $msg efivarfs is not mounted on $efivarfs_mount >&2 20 echo $msg efivarfs is not mounted on $efivarfs_mount >&2
18 exit 0 21 exit $ksft_skip
19 fi 22 fi
20} 23}
21 24
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 67cd4597db2b..47cbf54d0801 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -20,6 +20,8 @@
20#include <string.h> 20#include <string.h>
21#include <unistd.h> 21#include <unistd.h>
22 22
23#include "../kselftest.h"
24
23static char longpath[2 * PATH_MAX] = ""; 25static char longpath[2 * PATH_MAX] = "";
24static char *envp[] = { "IN_TEST=yes", NULL, NULL }; 26static char *envp[] = { "IN_TEST=yes", NULL, NULL };
25static char *argv[] = { "execveat", "99", NULL }; 27static char *argv[] = { "execveat", "99", NULL };
@@ -249,8 +251,8 @@ static int run_tests(void)
249 errno = 0; 251 errno = 0;
250 execveat_(-1, NULL, NULL, NULL, 0); 252 execveat_(-1, NULL, NULL, NULL, 0);
251 if (errno == ENOSYS) { 253 if (errno == ENOSYS) {
252 printf("[FAIL] ENOSYS calling execveat - no kernel support?\n"); 254 ksft_exit_skip(
253 return 1; 255 "ENOSYS calling execveat - no kernel support?\n");
254 } 256 }
255 257
256 /* Change file position to confirm it doesn't affect anything */ 258 /* Change file position to confirm it doesn't affect anything */
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
index 5c7d7001ad37..129880fb42d3 100644
--- a/tools/testing/selftests/filesystems/Makefile
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -1,5 +1,6 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2 2
3CFLAGS += -I../../../../usr/include/
3TEST_GEN_PROGS := devpts_pts 4TEST_GEN_PROGS := devpts_pts
4TEST_GEN_PROGS_EXTENDED := dnotify_test 5TEST_GEN_PROGS_EXTENDED := dnotify_test
5 6
diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c
index b9055e974289..b1fc9b916ace 100644
--- a/tools/testing/selftests/filesystems/devpts_pts.c
+++ b/tools/testing/selftests/filesystems/devpts_pts.c
@@ -8,9 +8,10 @@
8#include <stdlib.h> 8#include <stdlib.h>
9#include <string.h> 9#include <string.h>
10#include <unistd.h> 10#include <unistd.h>
11#include <sys/ioctl.h> 11#include <asm/ioctls.h>
12#include <sys/mount.h> 12#include <sys/mount.h>
13#include <sys/wait.h> 13#include <sys/wait.h>
14#include "../kselftest.h"
14 15
15static bool terminal_dup2(int duplicate, int original) 16static bool terminal_dup2(int duplicate, int original)
16{ 17{
@@ -125,10 +126,12 @@ static int do_tiocgptpeer(char *ptmx, char *expected_procfd_contents)
125 if (errno == EINVAL) { 126 if (errno == EINVAL) {
126 fprintf(stderr, "TIOCGPTPEER is not supported. " 127 fprintf(stderr, "TIOCGPTPEER is not supported. "
127 "Skipping test.\n"); 128 "Skipping test.\n");
128 fret = EXIT_SUCCESS; 129 fret = KSFT_SKIP;
130 } else {
131 fprintf(stderr,
132 "Failed to perform TIOCGPTPEER ioctl\n");
133 fret = EXIT_FAILURE;
129 } 134 }
130
131 fprintf(stderr, "Failed to perform TIOCGPTPEER ioctl\n");
132 goto do_cleanup; 135 goto do_cleanup;
133 } 136 }
134 137
@@ -279,9 +282,9 @@ int main(int argc, char *argv[])
279 int ret; 282 int ret;
280 283
281 if (!isatty(STDIN_FILENO)) { 284 if (!isatty(STDIN_FILENO)) {
282 fprintf(stderr, "Standard input file desciptor is not attached " 285 fprintf(stderr, "Standard input file descriptor is not attached "
283 "to a terminal. Skipping test\n"); 286 "to a terminal. Skipping test\n");
284 exit(EXIT_FAILURE); 287 exit(KSFT_SKIP);
285 } 288 }
286 289
287 ret = unshare(CLONE_NEWNS); 290 ret = unshare(CLONE_NEWNS);
diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
index 8e2e34a2ca69..70d18be46af5 100755
--- a/tools/testing/selftests/firmware/fw_fallback.sh
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -74,7 +74,7 @@ load_fw_custom()
74{ 74{
75 if [ ! -e "$DIR"/trigger_custom_fallback ]; then 75 if [ ! -e "$DIR"/trigger_custom_fallback ]; then
76 echo "$0: custom fallback trigger not present, ignoring test" >&2 76 echo "$0: custom fallback trigger not present, ignoring test" >&2
77 return 1 77 exit $ksft_skip
78 fi 78 fi
79 79
80 local name="$1" 80 local name="$1"
@@ -107,7 +107,7 @@ load_fw_custom_cancel()
107{ 107{
108 if [ ! -e "$DIR"/trigger_custom_fallback ]; then 108 if [ ! -e "$DIR"/trigger_custom_fallback ]; then
109 echo "$0: canceling custom fallback trigger not present, ignoring test" >&2 109 echo "$0: canceling custom fallback trigger not present, ignoring test" >&2
110 return 1 110 exit $ksft_skip
111 fi 111 fi
112 112
113 local name="$1" 113 local name="$1"
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index 6452d2129cd9..a4320c4b44dc 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -30,6 +30,7 @@ fi
30 30
31if [ ! -e "$DIR"/trigger_async_request ]; then 31if [ ! -e "$DIR"/trigger_async_request ]; then
32 echo "$0: empty filename: async trigger not present, ignoring test" >&2 32 echo "$0: empty filename: async trigger not present, ignoring test" >&2
33 exit $ksft_skip
33else 34else
34 if printf '\000' >"$DIR"/trigger_async_request 2> /dev/null; then 35 if printf '\000' >"$DIR"/trigger_async_request 2> /dev/null; then
35 echo "$0: empty filename should not succeed (async)" >&2 36 echo "$0: empty filename should not succeed (async)" >&2
@@ -69,6 +70,7 @@ fi
69# Try the asynchronous version too 70# Try the asynchronous version too
70if [ ! -e "$DIR"/trigger_async_request ]; then 71if [ ! -e "$DIR"/trigger_async_request ]; then
71 echo "$0: firmware loading: async trigger not present, ignoring test" >&2 72 echo "$0: firmware loading: async trigger not present, ignoring test" >&2
73 exit $ksft_skip
72else 74else
73 if ! echo -n "$NAME" >"$DIR"/trigger_async_request ; then 75 if ! echo -n "$NAME" >"$DIR"/trigger_async_request ; then
74 echo "$0: could not trigger async request" >&2 76 echo "$0: could not trigger async request" >&2
@@ -89,7 +91,7 @@ test_config_present()
89{ 91{
90 if [ ! -f $DIR/reset ]; then 92 if [ ! -f $DIR/reset ]; then
91 echo "Configuration triggers not present, ignoring test" 93 echo "Configuration triggers not present, ignoring test"
92 exit 0 94 exit $ksft_skip
93 fi 95 fi
94} 96}
95 97
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
index 962d7f4ac627..6c5f1b2ffb74 100755
--- a/tools/testing/selftests/firmware/fw_lib.sh
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -9,11 +9,14 @@ DIR=/sys/devices/virtual/misc/test_firmware
9PROC_CONFIG="/proc/config.gz" 9PROC_CONFIG="/proc/config.gz"
10TEST_DIR=$(dirname $0) 10TEST_DIR=$(dirname $0)
11 11
12# Kselftest framework requirement - SKIP code is 4.
13ksft_skip=4
14
12print_reqs_exit() 15print_reqs_exit()
13{ 16{
14 echo "You must have the following enabled in your kernel:" >&2 17 echo "You must have the following enabled in your kernel:" >&2
15 cat $TEST_DIR/config >&2 18 cat $TEST_DIR/config >&2
16 exit 1 19 exit $ksft_skip
17} 20}
18 21
19test_modprobe() 22test_modprobe()
@@ -88,7 +91,7 @@ verify_reqs()
88 if [ "$TEST_REQS_FW_SYSFS_FALLBACK" = "yes" ]; then 91 if [ "$TEST_REQS_FW_SYSFS_FALLBACK" = "yes" ]; then
89 if [ ! "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then 92 if [ ! "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
90 echo "usermode helper disabled so ignoring test" 93 echo "usermode helper disabled so ignoring test"
91 exit 0 94 exit $ksft_skip
92 fi 95 fi
93 fi 96 fi
94} 97}
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index 2a4f16fc9819..e4645d5e3126 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -15,14 +15,29 @@ reset_tracer() { # reset the current tracer
15 echo nop > current_tracer 15 echo nop > current_tracer
16} 16}
17 17
18reset_trigger() { # reset all current setting triggers 18reset_trigger_file() {
19 grep -v ^# events/*/*/trigger | 19 # remove action triggers first
20 grep -H ':on[^:]*(' $@ |
21 while read line; do
22 cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
23 file=`echo $line | cut -f1 -d:`
24 echo "!$cmd" >> $file
25 done
26 grep -Hv ^# $@ |
20 while read line; do 27 while read line; do
21 cmd=`echo $line | cut -f2- -d: | cut -f1 -d" "` 28 cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
22 echo "!$cmd" > `echo $line | cut -f1 -d:` 29 file=`echo $line | cut -f1 -d:`
30 echo "!$cmd" > $file
23 done 31 done
24} 32}
25 33
34reset_trigger() { # reset all current setting triggers
35 if [ -d events/synthetic ]; then
36 reset_trigger_file events/synthetic/*/trigger
37 fi
38 reset_trigger_file events/*/*/trigger
39}
40
26reset_events_filter() { # reset all current setting filters 41reset_events_filter() { # reset all current setting filters
27 grep -v ^none events/*/*/filter | 42 grep -v ^none events/*/*/filter |
28 while read line; do 43 while read line; do
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index 5ba73035e1d9..a0002563e9ee 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -24,6 +24,14 @@ arm*)
24 ARG2=%r1 24 ARG2=%r1
25 OFFS=4 25 OFFS=4
26;; 26;;
27ppc64*)
28 ARG2=%r4
29 OFFS=8
30;;
31ppc*)
32 ARG2=%r4
33 OFFS=4
34;;
27*) 35*)
28 echo "Please implement other architecture here" 36 echo "Please implement other architecture here"
29 exit_untested 37 exit_untested
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
index 231bcd2c4eb5..d026ff4e562f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -34,6 +34,13 @@ arm*)
34 GOODREG=%r0 34 GOODREG=%r0
35 BADREG=%ax 35 BADREG=%ax
36;; 36;;
37ppc*)
38 GOODREG=%r3
39 BADREG=%msr
40;;
41*)
42 echo "Please implement other architecture here"
43 exit_untested
37esac 44esac
38 45
39test_goodarg() # Good-args 46test_goodarg() # Good-args
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
new file mode 100644
index 000000000000..2acbfe2c0c0c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
@@ -0,0 +1,49 @@
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3# description: trace_marker trigger - test histogram trigger
4# flags: instance
5
6do_reset() {
7 reset_trigger
8 echo > set_event
9 clear_trace
10}
11
12fail() { #msg
13 do_reset
14 echo $1
15 exit_fail
16}
17
18if [ ! -f set_event ]; then
19 echo "event tracing is not supported"
20 exit_unsupported
21fi
22
23if [ ! -d events/ftrace/print ]; then
24 echo "event trace_marker is not supported"
25 exit_unsupported
26fi
27
28if [ ! -f events/ftrace/print/trigger ]; then
29 echo "event trigger is not supported"
30 exit_unsupported
31fi
32
33if [ ! -f events/ftrace/print/hist ]; then
34 echo "hist trigger is not supported"
35 exit_unsupported
36fi
37
38do_reset
39
40echo "Test histogram trace_marker tigger"
41
42echo 'hist:keys=common_pid' > events/ftrace/print/trigger
43for i in `seq 1 10` ; do echo "hello" > trace_marker; done
44grep 'hitcount: *10$' events/ftrace/print/hist > /dev/null || \
45 fail "hist trigger did not trigger correct times on trace_marker"
46
47do_reset
48
49exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
new file mode 100644
index 000000000000..6748e8cb42d0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
@@ -0,0 +1,74 @@
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3# description: trace_marker trigger - test snapshot trigger
4# flags: instance
5
6do_reset() {
7 reset_trigger
8 echo > set_event
9 echo 0 > snapshot
10 clear_trace
11}
12
13fail() { #msg
14 do_reset
15 echo $1
16 exit_fail
17}
18
19if [ ! -f set_event ]; then
20 echo "event tracing is not supported"
21 exit_unsupported
22fi
23
24if [ ! -f snapshot ]; then
25 echo "snapshot is not supported"
26 exit_unsupported
27fi
28
29if [ ! -d events/ftrace/print ]; then
30 echo "event trace_marker is not supported"
31 exit_unsupported
32fi
33
34if [ ! -f events/ftrace/print/trigger ]; then
35 echo "event trigger is not supported"
36 exit_unsupported
37fi
38
39test_trace() {
40 file=$1
41 x=$2
42
43 cat $file | while read line; do
44 comment=`echo $line | sed -e 's/^#//'`
45 if [ "$line" != "$comment" ]; then
46 continue
47 fi
48 echo "testing $line for >$x<"
49 match=`echo $line | sed -e "s/>$x<//"`
50 if [ "$line" == "$match" ]; then
51 fail "$line does not have >$x< in it"
52 fi
53 let x=$x+2
54 done
55}
56
57do_reset
58
59echo "Test snapshot trace_marker tigger"
60
61echo 'snapshot' > events/ftrace/print/trigger
62
63# make sure the snapshot is allocated
64
65grep -q 'Snapshot is allocated' snapshot
66
67for i in `seq 1 10` ; do echo "hello >$i<" > trace_marker; done
68
69test_trace trace 1
70test_trace snapshot 2
71
72do_reset
73
74exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
new file mode 100644
index 000000000000..0a69c5d1cda8
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
@@ -0,0 +1,68 @@
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3# description: trace_marker trigger - test histogram with synthetic event against kernel event
4# flags:
5
6do_reset() {
7 reset_trigger
8 echo > set_event
9 echo > synthetic_events
10 clear_trace
11}
12
13fail() { #msg
14 do_reset
15 echo $1
16 exit_fail
17}
18
19if [ ! -f set_event ]; then
20 echo "event tracing is not supported"
21 exit_unsupported
22fi
23
24if [ ! -f synthetic_events ]; then
25 echo "synthetic events not supported"
26 exit_unsupported
27fi
28
29if [ ! -d events/ftrace/print ]; then
30 echo "event trace_marker is not supported"
31 exit_unsupported
32fi
33
34if [ ! -d events/sched/sched_waking ]; then
35 echo "event sched_waking is not supported"
36 exit_unsupported
37fi
38
39if [ ! -f events/ftrace/print/trigger ]; then
40 echo "event trigger is not supported"
41 exit_unsupported
42fi
43
44if [ ! -f events/ftrace/print/hist ]; then
45 echo "hist trigger is not supported"
46 exit_unsupported
47fi
48
49do_reset
50
51echo "Test histogram kernel event to trace_marker latency histogram trigger"
52
53echo 'latency u64 lat' > synthetic_events
54echo 'hist:keys=pid:ts0=common_timestamp.usecs' > events/sched/sched_waking/trigger
55echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).latency($lat)' > events/ftrace/print/trigger
56echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger
57sleep 1
58echo "hello" > trace_marker
59
60grep 'hitcount: *1$' events/ftrace/print/hist > /dev/null || \
61 fail "hist trigger did not trigger correct times on trace_marker"
62
63grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
64 fail "hist trigger did not trigger "
65
66do_reset
67
68exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
new file mode 100644
index 000000000000..3666dd6ab02a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
@@ -0,0 +1,66 @@
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3# description: trace_marker trigger - test histogram with synthetic event
4# flags:
5
6do_reset() {
7 reset_trigger
8 echo > set_event
9 echo > synthetic_events
10 clear_trace
11}
12
13fail() { #msg
14 do_reset
15 echo $1
16 exit_fail
17}
18
19if [ ! -f set_event ]; then
20 echo "event tracing is not supported"
21 exit_unsupported
22fi
23
24if [ ! -f synthetic_events ]; then
25 echo "synthetic events not supported"
26 exit_unsupported
27fi
28
29if [ ! -d events/ftrace/print ]; then
30 echo "event trace_marker is not supported"
31 exit_unsupported
32fi
33
34if [ ! -f events/ftrace/print/trigger ]; then
35 echo "event trigger is not supported"
36 exit_unsupported
37fi
38
39if [ ! -f events/ftrace/print/hist ]; then
40 echo "hist trigger is not supported"
41 exit_unsupported
42fi
43
44do_reset
45
46echo "Test histogram trace_marker to trace_marker latency histogram trigger"
47
48echo 'latency u64 lat' > synthetic_events
49echo 'hist:keys=common_pid:ts0=common_timestamp.usecs if buf == "start"' > events/ftrace/print/trigger
50echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(ftrace.print).latency($lat) if buf == "end"' >> events/ftrace/print/trigger
51echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger
52echo -n "start" > trace_marker
53echo -n "end" > trace_marker
54
55cnt=`grep 'hitcount: *1$' events/ftrace/print/hist | wc -l`
56
57if [ $cnt -ne 2 ]; then
58 fail "hist trace_marker trigger did not trigger correctly"
59fi
60
61grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
62 fail "hist trigger did not trigger "
63
64do_reset
65
66exit 0
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 8497a376ef9d..12631f0076a1 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -17,14 +17,6 @@ all:
17 fi \ 17 fi \
18 done 18 done
19 19
20override define RUN_TESTS
21 @export KSFT_TAP_LEVEL=`echo 1`;
22 @echo "TAP version 13";
23 @echo "selftests: futex";
24 @echo "========================================";
25 @cd $(OUTPUT); ./run.sh
26endef
27
28override define INSTALL_RULE 20override define INSTALL_RULE
29 mkdir -p $(INSTALL_PATH) 21 mkdir -p $(INSTALL_PATH)
30 install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) 22 install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
@@ -36,10 +28,6 @@ override define INSTALL_RULE
36 done; 28 done;
37endef 29endef
38 30
39override define EMIT_TESTS
40 echo "./run.sh"
41endef
42
43override define CLEAN 31override define CLEAN
44 @for DIR in $(SUBDIRS); do \ 32 @for DIR in $(SUBDIRS); do \
45 BUILD_TARGET=$(OUTPUT)/$$DIR; \ 33 BUILD_TARGET=$(OUTPUT)/$$DIR; \
diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh
index 183fb932edbd..7f35b9880485 100755
--- a/tools/testing/selftests/gpio/gpio-mockup.sh
+++ b/tools/testing/selftests/gpio/gpio-mockup.sh
@@ -2,10 +2,11 @@
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3 3
4#exit status 4#exit status
5#1: run as non-root user 5#1: Internal error
6#2: sysfs/debugfs not mount 6#2: sysfs/debugfs not mount
7#3: insert module fail when gpio-mockup is a module. 7#3: insert module fail when gpio-mockup is a module.
8#4: other reason. 8#4: Skip test including run as non-root user.
9#5: other reason.
9 10
10SYSFS= 11SYSFS=
11GPIO_SYSFS= 12GPIO_SYSFS=
@@ -15,6 +16,9 @@ GPIO_DEBUGFS=
15dev_type= 16dev_type=
16module= 17module=
17 18
19# Kselftest framework requirement - SKIP code is 4.
20ksft_skip=4
21
18usage() 22usage()
19{ 23{
20 echo "Usage:" 24 echo "Usage:"
@@ -34,7 +38,7 @@ prerequisite()
34 msg="skip all tests:" 38 msg="skip all tests:"
35 if [ $UID != 0 ]; then 39 if [ $UID != 0 ]; then
36 echo $msg must be run as root >&2 40 echo $msg must be run as root >&2
37 exit 1 41 exit $ksft_skip
38 fi 42 fi
39 SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` 43 SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
40 if [ ! -d "$SYSFS" ]; then 44 if [ ! -d "$SYSFS" ]; then
@@ -73,7 +77,7 @@ remove_module()
73die() 77die()
74{ 78{
75 remove_module 79 remove_module
76 exit 4 80 exit 5
77} 81}
78 82
79test_chips() 83test_chips()
diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
index d21edea9c560..f6cd03a87493 100644
--- a/tools/testing/selftests/intel_pstate/aperf.c
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -9,6 +9,8 @@
9#include <sys/timeb.h> 9#include <sys/timeb.h>
10#include <sched.h> 10#include <sched.h>
11#include <errno.h> 11#include <errno.h>
12#include <string.h>
13#include "../kselftest.h"
12 14
13void usage(char *name) { 15void usage(char *name) {
14 printf ("Usage: %s cpunum\n", name); 16 printf ("Usage: %s cpunum\n", name);
@@ -41,8 +43,8 @@ int main(int argc, char **argv) {
41 fd = open(msr_file_name, O_RDONLY); 43 fd = open(msr_file_name, O_RDONLY);
42 44
43 if (fd == -1) { 45 if (fd == -1) {
44 perror("Failed to open"); 46 printf("/dev/cpu/%d/msr: %s\n", cpu, strerror(errno));
45 return 1; 47 return KSFT_SKIP;
46 } 48 }
47 49
48 CPU_ZERO(&cpuset); 50 CPU_ZERO(&cpuset);
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
index c670359becc6..e7008f614ad7 100755
--- a/tools/testing/selftests/intel_pstate/run.sh
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -30,9 +30,18 @@
30 30
31EVALUATE_ONLY=0 31EVALUATE_ONLY=0
32 32
33# Kselftest framework requirement - SKIP code is 4.
34ksft_skip=4
35
33if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then 36if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then
34 echo "$0 # Skipped: Test can only run on x86 architectures." 37 echo "$0 # Skipped: Test can only run on x86 architectures."
35 exit 0 38 exit $ksft_skip
39fi
40
41msg="skip all tests:"
42if [ $UID != 0 ] && [ $EVALUATE_ONLY == 0 ]; then
43 echo $msg please run this as root >&2
44 exit $ksft_skip
36fi 45fi
37 46
38max_cpus=$(($(nproc)-1)) 47max_cpus=$(($(nproc)-1))
@@ -48,11 +57,12 @@ function run_test () {
48 57
49 echo "sleeping for 5 seconds" 58 echo "sleeping for 5 seconds"
50 sleep 5 59 sleep 5
51 num_freqs=$(cat /proc/cpuinfo | grep MHz | sort -u | wc -l) 60 grep MHz /proc/cpuinfo | sort -u > /tmp/result.freqs
52 if [ $num_freqs -le 2 ]; then 61 num_freqs=$(wc -l /tmp/result.freqs | awk ' { print $1 } ')
53 cat /proc/cpuinfo | grep MHz | sort -u | tail -1 > /tmp/result.$1 62 if [ $num_freqs -ge 2 ]; then
63 tail -n 1 /tmp/result.freqs > /tmp/result.$1
54 else 64 else
55 cat /proc/cpuinfo | grep MHz | sort -u > /tmp/result.$1 65 cp /tmp/result.freqs /tmp/result.$1
56 fi 66 fi
57 ./msr 0 >> /tmp/result.$1 67 ./msr 0 >> /tmp/result.$1
58 68
@@ -82,32 +92,37 @@ _max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ')
82max_freq=$(($_max_freq / 1000)) 92max_freq=$(($_max_freq / 1000))
83 93
84 94
85for freq in `seq $max_freq -100 $min_freq` 95[ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq`
86do 96do
87 echo "Setting maximum frequency to $freq" 97 echo "Setting maximum frequency to $freq"
88 cpupower frequency-set -g powersave --max=${freq}MHz >& /dev/null 98 cpupower frequency-set -g powersave --max=${freq}MHz >& /dev/null
89 [ $EVALUATE_ONLY -eq 0 ] && run_test $freq 99 run_test $freq
90done 100done
91 101
92echo "==============================================================================" 102[ $EVALUATE_ONLY -eq 0 ] && cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null
93 103
104echo "========================================================================"
94echo "The marketing frequency of the cpu is $mkt_freq MHz" 105echo "The marketing frequency of the cpu is $mkt_freq MHz"
95echo "The maximum frequency of the cpu is $max_freq MHz" 106echo "The maximum frequency of the cpu is $max_freq MHz"
96echo "The minimum frequency of the cpu is $min_freq MHz" 107echo "The minimum frequency of the cpu is $min_freq MHz"
97 108
98cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null
99
100# make a pretty table 109# make a pretty table
101echo "Target Actual Difference MSR(0x199) max_perf_pct" 110echo "Target Actual Difference MSR(0x199) max_perf_pct" | tr " " "\n" > /tmp/result.tab
102for freq in `seq $max_freq -100 $min_freq` 111for freq in `seq $max_freq -100 $min_freq`
103do 112do
104 result_freq=$(cat /tmp/result.${freq} | grep "cpu MHz" | awk ' { print $4 } ' | awk -F "." ' { print $1 } ') 113 result_freq=$(cat /tmp/result.${freq} | grep "cpu MHz" | awk ' { print $4 } ' | awk -F "." ' { print $1 } ')
105 msr=$(cat /tmp/result.${freq} | grep "msr" | awk ' { print $3 } ') 114 msr=$(cat /tmp/result.${freq} | grep "msr" | awk ' { print $3 } ')
106 max_perf_pct=$(cat /tmp/result.${freq} | grep "max_perf_pct" | awk ' { print $2 } ' ) 115 max_perf_pct=$(cat /tmp/result.${freq} | grep "max_perf_pct" | awk ' { print $2 } ' )
107 if [ $result_freq -eq $freq ]; then 116 cat >> /tmp/result.tab << EOF
108 echo " $freq $result_freq 0 $msr $(($max_perf_pct*3300))" 117$freq
109 else 118$result_freq
110 echo " $freq $result_freq $(($result_freq-$freq)) $msr $(($max_perf_pct*$max_freq))" 119$((result_freq - freq))
111 fi 120$msr
121$((max_perf_pct * max_freq))
122EOF
112done 123done
124
125# print the table
126pr -aTt -5 < /tmp/result.tab
127
113exit 0 128exit 0
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index ee9382bdfadc..dac927e82336 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -196,10 +196,9 @@ int main(int argc, char **argv)
196 int msg, pid, err; 196 int msg, pid, err;
197 struct msgque_data msgque; 197 struct msgque_data msgque;
198 198
199 if (getuid() != 0) { 199 if (getuid() != 0)
200 printf("Please run the test as root - Exiting.\n"); 200 return ksft_exit_skip(
201 return ksft_exit_fail(); 201 "Please run the test as root - Exiting.\n");
202 }
203 202
204 msgque.key = ftok(argv[0], 822155650); 203 msgque.key = ftok(argv[0], 822155650);
205 if (msgque.key == -1) { 204 if (msgque.key == -1) {
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index 7956ea3be667..0a76314b4414 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -62,13 +62,16 @@ ALL_TESTS="$ALL_TESTS 0007:5:1"
62ALL_TESTS="$ALL_TESTS 0008:150:1" 62ALL_TESTS="$ALL_TESTS 0008:150:1"
63ALL_TESTS="$ALL_TESTS 0009:150:1" 63ALL_TESTS="$ALL_TESTS 0009:150:1"
64 64
65# Kselftest framework requirement - SKIP code is 4.
66ksft_skip=4
67
65test_modprobe() 68test_modprobe()
66{ 69{
67 if [ ! -d $DIR ]; then 70 if [ ! -d $DIR ]; then
68 echo "$0: $DIR not present" >&2 71 echo "$0: $DIR not present" >&2
69 echo "You must have the following enabled in your kernel:" >&2 72 echo "You must have the following enabled in your kernel:" >&2
70 cat $TEST_DIR/config >&2 73 cat $TEST_DIR/config >&2
71 exit 1 74 exit $ksft_skip
72 fi 75 fi
73} 76}
74 77
@@ -105,12 +108,12 @@ test_reqs()
105{ 108{
106 if ! which modprobe 2> /dev/null > /dev/null; then 109 if ! which modprobe 2> /dev/null > /dev/null; then
107 echo "$0: You need modprobe installed" >&2 110 echo "$0: You need modprobe installed" >&2
108 exit 1 111 exit $ksft_skip
109 fi 112 fi
110 113
111 if ! which kmod 2> /dev/null > /dev/null; then 114 if ! which kmod 2> /dev/null > /dev/null; then
112 echo "$0: You need kmod installed" >&2 115 echo "$0: You need kmod installed" >&2
113 exit 1 116 exit $ksft_skip
114 fi 117 fi
115 118
116 # kmod 19 has a bad bug where it returns 0 when modprobe 119 # kmod 19 has a bad bug where it returns 0 when modprobe
@@ -124,13 +127,13 @@ test_reqs()
124 echo "$0: You need at least kmod 20" >&2 127 echo "$0: You need at least kmod 20" >&2
125 echo "kmod <= 19 is buggy, for details see:" >&2 128 echo "kmod <= 19 is buggy, for details see:" >&2
126 echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2 129 echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
127 exit 1 130 exit $ksft_skip
128 fi 131 fi
129 132
130 uid=$(id -u) 133 uid=$(id -u)
131 if [ $uid -ne 0 ]; then 134 if [ $uid -ne 0 ]; then
132 echo $msg must be run as root >&2 135 echo $msg must be run as root >&2
133 exit 0 136 exit $ksft_skip
134 fi 137 fi
135} 138}
136 139
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 1b9d8ecdebce..15e6b75fc3a5 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -20,7 +20,7 @@
20#define KSFT_XFAIL 2 20#define KSFT_XFAIL 2
21#define KSFT_XPASS 3 21#define KSFT_XPASS 3
22/* Treat skip as pass */ 22/* Treat skip as pass */
23#define KSFT_SKIP KSFT_PASS 23#define KSFT_SKIP 4
24 24
25/* counters */ 25/* counters */
26struct ksft_count { 26struct ksft_count {
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
new file mode 100644
index 000000000000..63fc1ab9248f
--- /dev/null
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -0,0 +1,3 @@
1set_sregs_test
2sync_regs_test
3vmx_tsc_adjust_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 2ddcc96ae456..d9d00319b07c 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -15,7 +15,7 @@ LIBKVM += $(LIBKVM_$(UNAME_M))
15 15
16INSTALL_HDR_PATH = $(top_srcdir)/usr 16INSTALL_HDR_PATH = $(top_srcdir)/usr
17LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ 17LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
18CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) 18CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
19 19
20# After inclusion, $(OUTPUT) is defined and 20# After inclusion, $(OUTPUT) is defined and
21# $(TEST_GEN_PROGS) starts with $(OUTPUT)/ 21# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 7ab98e41324f..ac53730b30aa 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -19,6 +19,7 @@
19#include <errno.h> 19#include <errno.h>
20#include <unistd.h> 20#include <unistd.h>
21#include <fcntl.h> 21#include <fcntl.h>
22#include "kselftest.h"
22 23
23ssize_t test_write(int fd, const void *buf, size_t count); 24ssize_t test_write(int fd, const void *buf, size_t count);
24ssize_t test_read(int fd, void *buf, size_t count); 25ssize_t test_read(int fd, void *buf, size_t count);
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index c9f5b7d4ce38..cd01144d27c8 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -13,6 +13,8 @@
13#include <execinfo.h> 13#include <execinfo.h>
14#include <sys/syscall.h> 14#include <sys/syscall.h>
15 15
16#include "../../kselftest.h"
17
16/* Dumps the current stack trace to stderr. */ 18/* Dumps the current stack trace to stderr. */
17static void __attribute__((noinline)) test_dump_stack(void); 19static void __attribute__((noinline)) test_dump_stack(void);
18static void test_dump_stack(void) 20static void test_dump_stack(void)
@@ -70,8 +72,9 @@ test_assert(bool exp, const char *exp_str,
70 72
71 fprintf(stderr, "==== Test Assertion Failure ====\n" 73 fprintf(stderr, "==== Test Assertion Failure ====\n"
72 " %s:%u: %s\n" 74 " %s:%u: %s\n"
73 " pid=%d tid=%d\n", 75 " pid=%d tid=%d - %s\n",
74 file, line, exp_str, getpid(), gettid()); 76 file, line, exp_str, getpid(), gettid(),
77 strerror(errno));
75 test_dump_stack(); 78 test_dump_stack();
76 if (fmt) { 79 if (fmt) {
77 fputs(" ", stderr); 80 fputs(" ", stderr);
@@ -80,6 +83,8 @@ test_assert(bool exp, const char *exp_str,
80 } 83 }
81 va_end(ap); 84 va_end(ap);
82 85
86 if (errno == EACCES)
87 ksft_exit_skip("Access denied - Exiting.\n");
83 exit(254); 88 exit(254);
84 } 89 }
85 90
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 2cedfda181d4..37e2a787d2fc 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -50,8 +50,8 @@ int kvm_check_cap(long cap)
50 int kvm_fd; 50 int kvm_fd;
51 51
52 kvm_fd = open(KVM_DEV_PATH, O_RDONLY); 52 kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
53 TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", 53 if (kvm_fd < 0)
54 KVM_DEV_PATH, kvm_fd, errno); 54 exit(KSFT_SKIP);
55 55
56 ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); 56 ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
57 TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" 57 TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
@@ -91,8 +91,8 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
91 91
92 vm->mode = mode; 92 vm->mode = mode;
93 kvm_fd = open(KVM_DEV_PATH, perm); 93 kvm_fd = open(KVM_DEV_PATH, perm);
94 TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", 94 if (kvm_fd < 0)
95 KVM_DEV_PATH, kvm_fd, errno); 95 exit(KSFT_SKIP);
96 96
97 /* Create VM. */ 97 /* Create VM. */
98 vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL); 98 vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL);
@@ -418,8 +418,8 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
418 418
419 cpuid = allocate_kvm_cpuid2(); 419 cpuid = allocate_kvm_cpuid2();
420 kvm_fd = open(KVM_DEV_PATH, O_RDONLY); 420 kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
421 TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", 421 if (kvm_fd < 0)
422 KVM_DEV_PATH, kvm_fd, errno); 422 exit(KSFT_SKIP);
423 423
424 ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); 424 ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
425 TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", 425 TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
@@ -675,8 +675,8 @@ static int vcpu_mmap_sz(void)
675 int dev_fd, ret; 675 int dev_fd, ret;
676 676
677 dev_fd = open(KVM_DEV_PATH, O_RDONLY); 677 dev_fd = open(KVM_DEV_PATH, O_RDONLY);
678 TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i", 678 if (dev_fd < 0)
679 __func__, KVM_DEV_PATH, dev_fd, errno); 679 exit(KSFT_SKIP);
680 680
681 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 681 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
682 TEST_ASSERT(ret >= sizeof(struct kvm_run), 682 TEST_ASSERT(ret >= sizeof(struct kvm_run),
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c
index 428e9473f5e2..eae1ece3c31b 100644
--- a/tools/testing/selftests/kvm/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/sync_regs_test.c
@@ -85,6 +85,9 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left,
85{ 85{
86} 86}
87 87
88#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
89#define INVALID_SYNC_FIELD 0x80000000
90
88int main(int argc, char *argv[]) 91int main(int argc, char *argv[])
89{ 92{
90 struct kvm_vm *vm; 93 struct kvm_vm *vm;
@@ -98,9 +101,14 @@ int main(int argc, char *argv[])
98 setbuf(stdout, NULL); 101 setbuf(stdout, NULL);
99 102
100 cap = kvm_check_cap(KVM_CAP_SYNC_REGS); 103 cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
101 TEST_ASSERT((unsigned long)cap == KVM_SYNC_X86_VALID_FIELDS, 104 if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
102 "KVM_CAP_SYNC_REGS (0x%x) != KVM_SYNC_X86_VALID_FIELDS (0x%lx)\n", 105 fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n");
103 cap, KVM_SYNC_X86_VALID_FIELDS); 106 exit(KSFT_SKIP);
107 }
108 if ((cap & INVALID_SYNC_FIELD) != 0) {
109 fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n");
110 exit(KSFT_SKIP);
111 }
104 112
105 /* Create VM */ 113 /* Create VM */
106 vm = vm_create_default(VCPU_ID, guest_code); 114 vm = vm_create_default(VCPU_ID, guest_code);
@@ -108,7 +116,14 @@ int main(int argc, char *argv[])
108 run = vcpu_state(vm, VCPU_ID); 116 run = vcpu_state(vm, VCPU_ID);
109 117
110 /* Request reading invalid register set from VCPU. */ 118 /* Request reading invalid register set from VCPU. */
111 run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS << 1; 119 run->kvm_valid_regs = INVALID_SYNC_FIELD;
120 rv = _vcpu_run(vm, VCPU_ID);
121 TEST_ASSERT(rv < 0 && errno == EINVAL,
122 "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
123 rv);
124 vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
125
126 run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
112 rv = _vcpu_run(vm, VCPU_ID); 127 rv = _vcpu_run(vm, VCPU_ID);
113 TEST_ASSERT(rv < 0 && errno == EINVAL, 128 TEST_ASSERT(rv < 0 && errno == EINVAL,
114 "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", 129 "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
@@ -116,7 +131,14 @@ int main(int argc, char *argv[])
116 vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; 131 vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
117 132
118 /* Request setting invalid register set into VCPU. */ 133 /* Request setting invalid register set into VCPU. */
119 run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS << 1; 134 run->kvm_dirty_regs = INVALID_SYNC_FIELD;
135 rv = _vcpu_run(vm, VCPU_ID);
136 TEST_ASSERT(rv < 0 && errno == EINVAL,
137 "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
138 rv);
139 vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
140
141 run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
120 rv = _vcpu_run(vm, VCPU_ID); 142 rv = _vcpu_run(vm, VCPU_ID);
121 TEST_ASSERT(rv < 0 && errno == EINVAL, 143 TEST_ASSERT(rv < 0 && errno == EINVAL,
122 "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", 144 "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
@@ -125,7 +147,7 @@ int main(int argc, char *argv[])
125 147
126 /* Request and verify all valid register sets. */ 148 /* Request and verify all valid register sets. */
127 /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ 149 /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
128 run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; 150 run->kvm_valid_regs = TEST_SYNC_FIELDS;
129 rv = _vcpu_run(vm, VCPU_ID); 151 rv = _vcpu_run(vm, VCPU_ID);
130 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 152 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
131 "Unexpected exit reason: %u (%s),\n", 153 "Unexpected exit reason: %u (%s),\n",
@@ -146,7 +168,7 @@ int main(int argc, char *argv[])
146 run->s.regs.sregs.apic_base = 1 << 11; 168 run->s.regs.sregs.apic_base = 1 << 11;
147 /* TODO run->s.regs.events.XYZ = ABC; */ 169 /* TODO run->s.regs.events.XYZ = ABC; */
148 170
149 run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; 171 run->kvm_valid_regs = TEST_SYNC_FIELDS;
150 run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; 172 run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
151 rv = _vcpu_run(vm, VCPU_ID); 173 rv = _vcpu_run(vm, VCPU_ID);
152 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 174 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
@@ -172,7 +194,7 @@ int main(int argc, char *argv[])
172 /* Clear kvm_dirty_regs bits, verify new s.regs values are 194 /* Clear kvm_dirty_regs bits, verify new s.regs values are
173 * overwritten with existing guest values. 195 * overwritten with existing guest values.
174 */ 196 */
175 run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; 197 run->kvm_valid_regs = TEST_SYNC_FIELDS;
176 run->kvm_dirty_regs = 0; 198 run->kvm_dirty_regs = 0;
177 run->s.regs.regs.r11 = 0xDEADBEEF; 199 run->s.regs.regs.r11 = 0xDEADBEEF;
178 rv = _vcpu_run(vm, VCPU_ID); 200 rv = _vcpu_run(vm, VCPU_ID);
@@ -211,7 +233,7 @@ int main(int argc, char *argv[])
211 * with kvm_sync_regs values. 233 * with kvm_sync_regs values.
212 */ 234 */
213 run->kvm_valid_regs = 0; 235 run->kvm_valid_regs = 0;
214 run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS; 236 run->kvm_dirty_regs = TEST_SYNC_FIELDS;
215 run->s.regs.regs.r11 = 0xBBBB; 237 run->s.regs.regs.r11 = 0xBBBB;
216 rv = _vcpu_run(vm, VCPU_ID); 238 rv = _vcpu_run(vm, VCPU_ID);
217 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 239 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
index 8f7f62093add..d7cb7944a42e 100644
--- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
@@ -28,6 +28,8 @@
28#include <string.h> 28#include <string.h>
29#include <sys/ioctl.h> 29#include <sys/ioctl.h>
30 30
31#include "../kselftest.h"
32
31#ifndef MSR_IA32_TSC_ADJUST 33#ifndef MSR_IA32_TSC_ADJUST
32#define MSR_IA32_TSC_ADJUST 0x3b 34#define MSR_IA32_TSC_ADJUST 0x3b
33#endif 35#endif
@@ -189,8 +191,8 @@ int main(int argc, char *argv[])
189 struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); 191 struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
190 192
191 if (!(entry->ecx & CPUID_VMX)) { 193 if (!(entry->ecx & CPUID_VMX)) {
192 printf("nested VMX not enabled, skipping test"); 194 fprintf(stderr, "nested VMX not enabled, skipping test\n");
193 return 0; 195 exit(KSFT_SKIP);
194 } 196 }
195 197
196 vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code); 198 vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code);
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 195e9d4739a9..17ab36605a8e 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -19,25 +19,43 @@ TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
19all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) 19all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
20 20
21.ONESHELL: 21.ONESHELL:
22define RUN_TEST_PRINT_RESULT
23 TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \
24 echo $$TEST_HDR_MSG; \
25 echo "========================================"; \
26 if [ ! -x $$TEST ]; then \
27 echo "$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.";\
28 echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
29 else \
30 cd `dirname $$TEST` > /dev/null; \
31 if [ "X$(summary)" != "X" ]; then \
32 (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && \
33 echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \
34 (if [ $$? -eq $$skip ]; then \
35 echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \
36 else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
37 fi;) \
38 else \
39 (./$$BASENAME_TEST && \
40 echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \
41 (if [ $$? -eq $$skip ]; then \
42 echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \
43 else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
44 fi;) \
45 fi; \
46 cd - > /dev/null; \
47 fi;
48endef
49
22define RUN_TESTS 50define RUN_TESTS
23 @export KSFT_TAP_LEVEL=`echo 1`; 51 @export KSFT_TAP_LEVEL=`echo 1`; \
24 @test_num=`echo 0`; 52 test_num=`echo 0`; \
25 @echo "TAP version 13"; 53 skip=`echo 4`; \
26 @for TEST in $(1); do \ 54 echo "TAP version 13"; \
55 for TEST in $(1); do \
27 BASENAME_TEST=`basename $$TEST`; \ 56 BASENAME_TEST=`basename $$TEST`; \
28 test_num=`echo $$test_num+1 | bc`; \ 57 test_num=`echo $$test_num+1 | bc`; \
29 echo "selftests: $$BASENAME_TEST"; \ 58 $(call RUN_TEST_PRINT_RESULT,$(TEST),$(BASENAME_TEST),$(test_num),$(skip)) \
30 echo "========================================"; \
31 if [ ! -x $$TEST ]; then \
32 echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\
33 echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \
34 else \
35 if [ "X$(summary)" != "X" ]; then \
36 cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
37 else \
38 cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
39 fi; \
40 fi; \
41 done; 59 done;
42endef 60endef
43 61
@@ -76,9 +94,18 @@ else
76endif 94endif
77 95
78define EMIT_TESTS 96define EMIT_TESTS
79 @for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \ 97 @test_num=`echo 0`; \
98 for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
80 BASENAME_TEST=`basename $$TEST`; \ 99 BASENAME_TEST=`basename $$TEST`; \
81 echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ 100 test_num=`echo $$test_num+1 | bc`; \
101 TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \
102 echo "echo $$TEST_HDR_MSG"; \
103 if [ ! -x $$TEST ]; then \
104 echo "echo \"$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.\""; \
105 echo "echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\""; \
106 else
107 echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"ok 1..$$test_num $$TEST_HDR_MSG [PASS]\") || (if [ \$$? -eq \$$skip ]; then echo \"not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]\"; else echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\"; fi;)"; \
108 fi; \
82 done; 109 done;
83endef 110endef
84 111
@@ -106,6 +133,9 @@ COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c
106LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) 133LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
107endif 134endif
108 135
136# Selftest makefiles can override those targets by setting
137# OVERRIDE_TARGETS = 1.
138ifeq ($(OVERRIDE_TARGETS),)
109$(OUTPUT)/%:%.c 139$(OUTPUT)/%:%.c
110 $(LINK.c) $^ $(LDLIBS) -o $@ 140 $(LINK.c) $^ $(LDLIBS) -o $@
111 141
@@ -114,5 +144,6 @@ $(OUTPUT)/%.o:%.S
114 144
115$(OUTPUT)/%:%.S 145$(OUTPUT)/%:%.S
116 $(LINK.S) $^ $(LDLIBS) -o $@ 146 $(LINK.S) $^ $(LDLIBS) -o $@
147endif
117 148
118.PHONY: run_tests all clean install emit_tests 149.PHONY: run_tests all clean install emit_tests
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index 08360060ab14..70d5711e3ac8 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -3,6 +3,6 @@
3# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" 3# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
4all: 4all:
5 5
6TEST_PROGS := printf.sh bitmap.sh 6TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh
7 7
8include ../lib.mk 8include ../lib.mk
diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh
index 4dee4d2a8bbe..5a90006d1aea 100755
--- a/tools/testing/selftests/lib/bitmap.sh
+++ b/tools/testing/selftests/lib/bitmap.sh
@@ -1,9 +1,13 @@
1#!/bin/sh 1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3
4# Kselftest framework requirement - SKIP code is 4.
5ksft_skip=4
6
3# Runs bitmap infrastructure tests using test_bitmap kernel module 7# Runs bitmap infrastructure tests using test_bitmap kernel module
4if ! /sbin/modprobe -q -n test_bitmap; then 8if ! /sbin/modprobe -q -n test_bitmap; then
5 echo "bitmap: [SKIP]" 9 echo "bitmap: module test_bitmap is not found [SKIP]"
6 exit 77 10 exit $ksft_skip
7fi 11fi
8 12
9if /sbin/modprobe -q test_bitmap; then 13if /sbin/modprobe -q test_bitmap; then
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
index b363994e5e11..78e7483c8d60 100755
--- a/tools/testing/selftests/lib/prime_numbers.sh
+++ b/tools/testing/selftests/lib/prime_numbers.sh
@@ -2,9 +2,12 @@
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3# Checks fast/slow prime_number generation for inconsistencies 3# Checks fast/slow prime_number generation for inconsistencies
4 4
5if ! /sbin/modprobe -q -r prime_numbers; then 5# Kselftest framework requirement - SKIP code is 4.
6 echo "prime_numbers: [SKIP]" 6ksft_skip=4
7 exit 77 7
8if ! /sbin/modprobe -q -n prime_numbers; then
9 echo "prime_numbers: module prime_numbers is not found [SKIP]"
10 exit $ksft_skip
8fi 11fi
9 12
10if /sbin/modprobe -q prime_numbers selftest=65536; then 13if /sbin/modprobe -q prime_numbers selftest=65536; then
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
index 0c37377fd7d4..45a23e2d64ad 100755
--- a/tools/testing/selftests/lib/printf.sh
+++ b/tools/testing/selftests/lib/printf.sh
@@ -1,9 +1,13 @@
1#!/bin/sh 1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3# Runs printf infrastructure using test_printf kernel module 3# Runs printf infrastructure using test_printf kernel module
4
5# Kselftest framework requirement - SKIP code is 4.
6ksft_skip=4
7
4if ! /sbin/modprobe -q -n test_printf; then 8if ! /sbin/modprobe -q -n test_printf; then
5 echo "printf: [SKIP]" 9 echo "printf: module test_printf is not found [SKIP]"
6 exit 77 10 exit $ksft_skip
7fi 11fi
8 12
9if /sbin/modprobe -q test_printf; then 13if /sbin/modprobe -q test_printf; then
diff --git a/tools/testing/selftests/locking/Makefile b/tools/testing/selftests/locking/Makefile
new file mode 100644
index 000000000000..6e7761ab3536
--- /dev/null
+++ b/tools/testing/selftests/locking/Makefile
@@ -0,0 +1,10 @@
1# SPDX-License-Identifier: GPL-2.0
2#
3# Makefile for locking/ww_mutx selftests
4
5# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
6all:
7
8TEST_PROGS := ww_mutex.sh
9
10include ../lib.mk
diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh
index 2c3d6b1878c2..91e4ac7566af 100644..100755
--- a/tools/testing/selftests/locking/ww_mutex.sh
+++ b/tools/testing/selftests/locking/ww_mutex.sh
@@ -1,6 +1,14 @@
1#!/bin/sh 1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3
4# Kselftest framework requirement - SKIP code is 4.
5ksft_skip=4
6
3# Runs API tests for struct ww_mutex (Wait/Wound mutexes) 7# Runs API tests for struct ww_mutex (Wait/Wound mutexes)
8if ! /sbin/modprobe -q -n test-ww_mutex; then
9 echo "ww_mutex: module test-ww_mutex is not found [SKIP]"
10 exit $ksft_skip
11fi
4 12
5if /sbin/modprobe -q test-ww_mutex; then 13if /sbin/modprobe -q test-ww_mutex; then
6 /sbin/modprobe -q -r test-ww_mutex 14 /sbin/modprobe -q -r test-ww_mutex
diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile
index c82cec2497de..60826d7d37d4 100644
--- a/tools/testing/selftests/media_tests/Makefile
+++ b/tools/testing/selftests/media_tests/Makefile
@@ -1,5 +1,6 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2#
3CFLAGS += -I../ -I../../../../usr/include/
2TEST_GEN_PROGS := media_device_test media_device_open video_device_test 4TEST_GEN_PROGS := media_device_test media_device_open video_device_test
3all: $(TEST_GEN_PROGS)
4 5
5include ../lib.mk 6include ../lib.mk
diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c
index a5ce5434bafd..93183a37b133 100644
--- a/tools/testing/selftests/media_tests/media_device_open.c
+++ b/tools/testing/selftests/media_tests/media_device_open.c
@@ -34,6 +34,8 @@
34#include <sys/stat.h> 34#include <sys/stat.h>
35#include <linux/media.h> 35#include <linux/media.h>
36 36
37#include "../kselftest.h"
38
37int main(int argc, char **argv) 39int main(int argc, char **argv)
38{ 40{
39 int opt; 41 int opt;
@@ -61,10 +63,8 @@ int main(int argc, char **argv)
61 } 63 }
62 } 64 }
63 65
64 if (getuid() != 0) { 66 if (getuid() != 0)
65 printf("Please run the test as root - Exiting.\n"); 67 ksft_exit_skip("Please run the test as root - Exiting.\n");
66 exit(-1);
67 }
68 68
69 /* Open Media device and keep it open */ 69 /* Open Media device and keep it open */
70 fd = open(media_device, O_RDWR); 70 fd = open(media_device, O_RDWR);
diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c
index 421a367e4bb3..4b9953359e40 100644
--- a/tools/testing/selftests/media_tests/media_device_test.c
+++ b/tools/testing/selftests/media_tests/media_device_test.c
@@ -39,6 +39,8 @@
39#include <time.h> 39#include <time.h>
40#include <linux/media.h> 40#include <linux/media.h>
41 41
42#include "../kselftest.h"
43
42int main(int argc, char **argv) 44int main(int argc, char **argv)
43{ 45{
44 int opt; 46 int opt;
@@ -66,10 +68,8 @@ int main(int argc, char **argv)
66 } 68 }
67 } 69 }
68 70
69 if (getuid() != 0) { 71 if (getuid() != 0)
70 printf("Please run the test as root - Exiting.\n"); 72 ksft_exit_skip("Please run the test as root - Exiting.\n");
71 exit(-1);
72 }
73 73
74 /* Generate random number of interations */ 74 /* Generate random number of interations */
75 srand((unsigned int) time(NULL)); 75 srand((unsigned int) time(NULL));
@@ -88,7 +88,7 @@ int main(int argc, char **argv)
88 "other Oops in the dmesg. Enable KaSan kernel\n" 88 "other Oops in the dmesg. Enable KaSan kernel\n"
89 "config option for use-after-free error detection.\n\n"); 89 "config option for use-after-free error detection.\n\n");
90 90
91 printf("Running test for %d iternations\n", count); 91 printf("Running test for %d iterations\n", count);
92 92
93 while (count > 0) { 93 while (count > 0) {
94 ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi); 94 ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi);
diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c
index 22bffd55a523..6793f8ecc8e7 100644
--- a/tools/testing/selftests/membarrier/membarrier_test.c
+++ b/tools/testing/selftests/membarrier/membarrier_test.c
@@ -293,10 +293,9 @@ static int test_membarrier_query(void)
293 } 293 }
294 ksft_exit_fail_msg("sys_membarrier() failed\n"); 294 ksft_exit_fail_msg("sys_membarrier() failed\n");
295 } 295 }
296 if (!(ret & MEMBARRIER_CMD_GLOBAL)) { 296 if (!(ret & MEMBARRIER_CMD_GLOBAL))
297 ksft_test_result_fail("sys_membarrier() CMD_GLOBAL query failed\n"); 297 ksft_exit_skip(
298 ksft_exit_fail_msg("sys_membarrier is not supported.\n"); 298 "sys_membarrier unsupported: CMD_GLOBAL not found.\n");
299 }
300 299
301 ksft_test_result_pass("sys_membarrier available\n"); 300 ksft_test_result_pass("sys_membarrier available\n");
302 return 0; 301 return 0;
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index 0862e6f47a38..53a848109f7b 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -4,9 +4,9 @@ CFLAGS += -I../../../../include/uapi/
4CFLAGS += -I../../../../include/ 4CFLAGS += -I../../../../include/
5CFLAGS += -I../../../../usr/include/ 5CFLAGS += -I../../../../usr/include/
6 6
7TEST_PROGS := run_tests.sh 7TEST_GEN_PROGS := memfd_test
8TEST_FILES := run_fuse_test.sh 8TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh
9TEST_GEN_FILES := memfd_test fuse_mnt fuse_test 9TEST_GEN_FILES := fuse_mnt fuse_test
10 10
11fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags) 11fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
12 12
diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
index c2d41ed81b24..fb633eeb0290 100755
--- a/tools/testing/selftests/memfd/run_tests.sh
+++ b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
@@ -1,11 +1,8 @@
1#!/bin/bash 1#!/bin/bash
2# please run as root 2# please run as root
3 3
4# 4# Kselftest framework requirement - SKIP code is 4.
5# Normal tests requiring no special resources 5ksft_skip=4
6#
7./run_fuse_test.sh
8./memfd_test
9 6
10# 7#
11# To test memfd_create with hugetlbfs, there needs to be hpages_test 8# To test memfd_create with hugetlbfs, there needs to be hpages_test
@@ -29,12 +26,13 @@ if [ -n "$freepgs" ] && [ $freepgs -lt $hpages_test ]; then
29 nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` 26 nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
30 hpages_needed=`expr $hpages_test - $freepgs` 27 hpages_needed=`expr $hpages_test - $freepgs`
31 28
29 if [ $UID != 0 ]; then
30 echo "Please run memfd with hugetlbfs test as root"
31 exit $ksft_skip
32 fi
33
32 echo 3 > /proc/sys/vm/drop_caches 34 echo 3 > /proc/sys/vm/drop_caches
33 echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages 35 echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
34 if [ $? -ne 0 ]; then
35 echo "Please run this test as root"
36 exit 1
37 fi
38 while read name size unit; do 36 while read name size unit; do
39 if [ "$name" = "HugePages_Free:" ]; then 37 if [ "$name" = "HugePages_Free:" ]; then
40 freepgs=$size 38 freepgs=$size
@@ -53,7 +51,7 @@ if [ $freepgs -lt $hpages_test ]; then
53 fi 51 fi
54 printf "Not enough huge pages available (%d < %d)\n" \ 52 printf "Not enough huge pages available (%d < %d)\n" \
55 $freepgs $needpgs 53 $freepgs $needpgs
56 exit 1 54 exit $ksft_skip
57fi 55fi
58 56
59# 57#
diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile
index 686da510f989..e0a625e34f40 100644
--- a/tools/testing/selftests/memory-hotplug/Makefile
+++ b/tools/testing/selftests/memory-hotplug/Makefile
@@ -4,11 +4,8 @@ all:
4include ../lib.mk 4include ../lib.mk
5 5
6TEST_PROGS := mem-on-off-test.sh 6TEST_PROGS := mem-on-off-test.sh
7override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]"
8
9override EMIT_TESTS := echo "$(subst @,,$(RUN_TESTS))"
10 7
11run_full_test: 8run_full_test:
12 @/bin/bash ./mem-on-off-test.sh && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]" 9 @/bin/bash ./mem-on-off-test.sh -r 10 && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]"
13 10
14clean: 11clean:
diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
index ae2c790d0880..b37585e6aa38 100755
--- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
+++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
@@ -3,30 +3,33 @@
3 3
4SYSFS= 4SYSFS=
5 5
6# Kselftest framework requirement - SKIP code is 4.
7ksft_skip=4
8
6prerequisite() 9prerequisite()
7{ 10{
8 msg="skip all tests:" 11 msg="skip all tests:"
9 12
10 if [ $UID != 0 ]; then 13 if [ $UID != 0 ]; then
11 echo $msg must be run as root >&2 14 echo $msg must be run as root >&2
12 exit 0 15 exit $ksft_skip
13 fi 16 fi
14 17
15 SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` 18 SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
16 19
17 if [ ! -d "$SYSFS" ]; then 20 if [ ! -d "$SYSFS" ]; then
18 echo $msg sysfs is not mounted >&2 21 echo $msg sysfs is not mounted >&2
19 exit 0 22 exit $ksft_skip
20 fi 23 fi
21 24
22 if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then 25 if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then
23 echo $msg memory hotplug is not supported >&2 26 echo $msg memory hotplug is not supported >&2
24 exit 0 27 exit $ksft_skip
25 fi 28 fi
26 29
27 if ! grep -q 1 $SYSFS/devices/system/memory/memory*/removable; then 30 if ! grep -q 1 $SYSFS/devices/system/memory/memory*/removable; then
28 echo $msg no hot-pluggable memory >&2 31 echo $msg no hot-pluggable memory >&2
29 exit 0 32 exit $ksft_skip
30 fi 33 fi
31} 34}
32 35
@@ -133,7 +136,8 @@ offline_memory_expect_fail()
133 136
134error=-12 137error=-12
135priority=0 138priority=0
136ratio=10 139# Run with default of ratio=2 for Kselftest run
140ratio=2
137retval=0 141retval=0
138 142
139while getopts e:hp:r: opt; do 143while getopts e:hp:r: opt; do
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
index e094f71c6dbc..026890744215 100644
--- a/tools/testing/selftests/mount/Makefile
+++ b/tools/testing/selftests/mount/Makefile
@@ -3,15 +3,7 @@
3CFLAGS = -Wall \ 3CFLAGS = -Wall \
4 -O2 4 -O2
5 5
6TEST_GEN_PROGS := unprivileged-remount-test 6TEST_PROGS := run_tests.sh
7TEST_GEN_FILES := unprivileged-remount-test
7 8
8include ../lib.mk 9include ../lib.mk
9
10override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \
11 then \
12 ./unprivileged-remount-test ; \
13 else \
14 echo "WARN: No /proc/self/uid_map exist, test skipped." ; \
15 fi
16override EMIT_TESTS := echo "$(RUN_TESTS)"
17
diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_tests.sh
new file mode 100755
index 000000000000..4ab8f507dcba
--- /dev/null
+++ b/tools/testing/selftests/mount/run_tests.sh
@@ -0,0 +1,12 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3# Kselftest framework requirement - SKIP code is 4.
4ksft_skip=4
5
6# Run mount selftests
7if [ -f /proc/self/uid_map ] ; then
8 ./unprivileged-remount-test ;
9else
10 echo "WARN: No /proc/self/uid_map exist, test skipped." ;
11 exit $ksft_skip
12fi
diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
index 743d3f9e5918..8a58055fc1f5 100644
--- a/tools/testing/selftests/mqueue/Makefile
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -1,17 +1,7 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2CFLAGS += -O2 2CFLAGS += -O2
3LDLIBS = -lrt -lpthread -lpopt 3LDLIBS = -lrt -lpthread -lpopt
4
4TEST_GEN_PROGS := mq_open_tests mq_perf_tests 5TEST_GEN_PROGS := mq_open_tests mq_perf_tests
5 6
6include ../lib.mk 7include ../lib.mk
7
8override define RUN_TESTS
9 @$(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
10 @$(OUTPUT)/mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
11endef
12
13override define EMIT_TESTS
14 echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\""
15 echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\""
16endef
17
diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c
index e0a74bd207a5..9403ac01ba11 100644
--- a/tools/testing/selftests/mqueue/mq_open_tests.c
+++ b/tools/testing/selftests/mqueue/mq_open_tests.c
@@ -33,6 +33,8 @@
33#include <mqueue.h> 33#include <mqueue.h>
34#include <error.h> 34#include <error.h>
35 35
36#include "../kselftest.h"
37
36static char *usage = 38static char *usage =
37"Usage:\n" 39"Usage:\n"
38" %s path\n" 40" %s path\n"
@@ -53,6 +55,7 @@ int saved_def_msgs, saved_def_msgsize, saved_max_msgs, saved_max_msgsize;
53int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize; 55int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize;
54FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize; 56FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize;
55char *queue_path; 57char *queue_path;
58char *default_queue_path = "/test1";
56mqd_t queue = -1; 59mqd_t queue = -1;
57 60
58static inline void __set(FILE *stream, int value, char *err_msg); 61static inline void __set(FILE *stream, int value, char *err_msg);
@@ -238,35 +241,33 @@ int main(int argc, char *argv[])
238 struct mq_attr attr, result; 241 struct mq_attr attr, result;
239 242
240 if (argc != 2) { 243 if (argc != 2) {
241 fprintf(stderr, "Must pass a valid queue name\n\n"); 244 printf("Using Default queue path - %s\n", default_queue_path);
242 fprintf(stderr, usage, argv[0]); 245 queue_path = default_queue_path;
243 exit(1); 246 } else {
244 }
245 247
246 /* 248 /*
247 * Although we can create a msg queue with a non-absolute path name, 249 * Although we can create a msg queue with a non-absolute path name,
248 * unlink will fail. So, if the name doesn't start with a /, add one 250 * unlink will fail. So, if the name doesn't start with a /, add one
249 * when we save it. 251 * when we save it.
250 */ 252 */
251 if (*argv[1] == '/') 253 if (*argv[1] == '/')
252 queue_path = strdup(argv[1]); 254 queue_path = strdup(argv[1]);
253 else { 255 else {
254 queue_path = malloc(strlen(argv[1]) + 2); 256 queue_path = malloc(strlen(argv[1]) + 2);
255 if (!queue_path) { 257 if (!queue_path) {
256 perror("malloc()"); 258 perror("malloc()");
257 exit(1); 259 exit(1);
260 }
261 queue_path[0] = '/';
262 queue_path[1] = 0;
263 strcat(queue_path, argv[1]);
258 } 264 }
259 queue_path[0] = '/';
260 queue_path[1] = 0;
261 strcat(queue_path, argv[1]);
262 } 265 }
263 266
264 if (getuid() != 0) { 267 if (getuid() != 0)
265 fprintf(stderr, "Not running as root, but almost all tests " 268 ksft_exit_skip("Not running as root, but almost all tests "
266 "require root in order to modify\nsystem settings. " 269 "require root in order to modify\nsystem settings. "
267 "Exiting.\n"); 270 "Exiting.\n");
268 exit(1);
269 }
270 271
271 /* Find out what files there are for us to make tweaks in */ 272 /* Find out what files there are for us to make tweaks in */
272 def_msgs = fopen(DEF_MSGS, "r+"); 273 def_msgs = fopen(DEF_MSGS, "r+");
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index 8188f72de93c..b019e0b8221c 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -39,6 +39,8 @@
39#include <popt.h> 39#include <popt.h>
40#include <error.h> 40#include <error.h>
41 41
42#include "../kselftest.h"
43
42static char *usage = 44static char *usage =
43"Usage:\n" 45"Usage:\n"
44" %s [-c #[,#..] -f] path\n" 46" %s [-c #[,#..] -f] path\n"
@@ -626,12 +628,10 @@ int main(int argc, char *argv[])
626 cpus_to_pin[0] = cpus_online - 1; 628 cpus_to_pin[0] = cpus_online - 1;
627 } 629 }
628 630
629 if (getuid() != 0) { 631 if (getuid() != 0)
630 fprintf(stderr, "Not running as root, but almost all tests " 632 ksft_exit_skip("Not running as root, but almost all tests "
631 "require root in order to modify\nsystem settings. " 633 "require root in order to modify\nsystem settings. "
632 "Exiting.\n"); 634 "Exiting.\n");
633 exit(1);
634 }
635 635
636 max_msgs = fopen(MAX_MSGS, "r+"); 636 max_msgs = fopen(MAX_MSGS, "r+");
637 max_msgsize = fopen(MAX_MSGSIZE, "r+"); 637 max_msgsize = fopen(MAX_MSGSIZE, "r+");
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index c612d6e38c62..128e548aa377 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,9 +1,14 @@
1msg_zerocopy 1msg_zerocopy
2socket 2socket
3psock_fanout 3psock_fanout
4psock_snd
4psock_tpacket 5psock_tpacket
5reuseport_bpf 6reuseport_bpf
6reuseport_bpf_cpu 7reuseport_bpf_cpu
7reuseport_bpf_numa 8reuseport_bpf_numa
8reuseport_dualstack 9reuseport_dualstack
9reuseaddr_conflict 10reuseaddr_conflict
11tcp_mmap
12udpgso
13udpgso_bench_rx
14udpgso_bench_tx
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 8f1e13d2e547..663e11e85727 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,12 +5,18 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
5CFLAGS += -I../../../../usr/include/ 5CFLAGS += -I../../../../usr/include/
6 6
7TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh 7TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
8TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh 8TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
9TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
10TEST_PROGS_EXTENDED := in_netns.sh
9TEST_GEN_FILES = socket 11TEST_GEN_FILES = socket
10TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy 12TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
13TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd
14TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
11TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa 15TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
12TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict 16TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
13 17
14include ../lib.mk 18include ../lib.mk
15 19
16$(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma 20$(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma
21$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread
22$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 6a75a3ea44ad..7ba089b33e8b 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -7,3 +7,8 @@ CONFIG_NET_L3_MASTER_DEV=y
7CONFIG_IPV6=y 7CONFIG_IPV6=y
8CONFIG_IPV6_MULTIPLE_TABLES=y 8CONFIG_IPV6_MULTIPLE_TABLES=y
9CONFIG_VETH=y 9CONFIG_VETH=y
10CONFIG_INET_XFRM_MODE_TUNNEL=y
11CONFIG_NET_IPVTI=y
12CONFIG_INET6_XFRM_MODE_TUNNEL=y
13CONFIG_IPV6_VTI=y
14CONFIG_DUMMY=y
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
new file mode 100755
index 000000000000..d4cfb6a7a086
--- /dev/null
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -0,0 +1,248 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# This test is for checking IPv4 and IPv6 FIB rules API
5
6ret=0
7
8PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
9IP="ip -netns testns"
10
11RTABLE=100
12GW_IP4=192.51.100.2
13SRC_IP=192.51.100.3
14GW_IP6=2001:db8:1::2
15SRC_IP6=2001:db8:1::3
16
17DEV_ADDR=192.51.100.1
18DEV=dummy0
19
20log_test()
21{
22 local rc=$1
23 local expected=$2
24 local msg="$3"
25
26 if [ ${rc} -eq ${expected} ]; then
27 nsuccess=$((nsuccess+1))
28 printf "\n TEST: %-50s [ OK ]\n" "${msg}"
29 else
30 nfail=$((nfail+1))
31 printf "\n TEST: %-50s [FAIL]\n" "${msg}"
32 if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
33 echo
34 echo "hit enter to continue, 'q' to quit"
35 read a
36 [ "$a" = "q" ] && exit 1
37 fi
38 fi
39}
40
41log_section()
42{
43 echo
44 echo "######################################################################"
45 echo "TEST SECTION: $*"
46 echo "######################################################################"
47}
48
49setup()
50{
51 set -e
52 ip netns add testns
53 $IP link set dev lo up
54
55 $IP link add dummy0 type dummy
56 $IP link set dev dummy0 up
57 $IP address add 198.51.100.1/24 dev dummy0
58 $IP -6 address add 2001:db8:1::1/64 dev dummy0
59
60 set +e
61}
62
63cleanup()
64{
65 $IP link del dev dummy0 &> /dev/null
66 ip netns del testns
67}
68
69fib_check_iproute_support()
70{
71 ip rule help 2>&1 | grep -q $1
72 if [ $? -ne 0 ]; then
73 echo "SKIP: iproute2 iprule too old, missing $1 match"
74 return 1
75 fi
76
77 ip route get help 2>&1 | grep -q $2
78 if [ $? -ne 0 ]; then
79 echo "SKIP: iproute2 get route too old, missing $2 match"
80 return 1
81 fi
82
83 return 0
84}
85
86fib_rule6_del()
87{
88 $IP -6 rule del $1
89 log_test $? 0 "rule6 del $1"
90}
91
92fib_rule6_del_by_pref()
93{
94 pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
95 $IP -6 rule del pref $pref
96}
97
98fib_rule6_test_match_n_redirect()
99{
100 local match="$1"
101 local getmatch="$2"
102
103 $IP -6 rule add $match table $RTABLE
104 $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
105 log_test $? 0 "rule6 check: $1"
106
107 fib_rule6_del_by_pref "$match"
108 log_test $? 0 "rule6 del by pref: $match"
109}
110
111fib_rule6_test()
112{
113 # setup the fib rule redirect route
114 $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
115
116 match="oif $DEV"
117 fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
118
119 match="from $SRC_IP6 iif $DEV"
120 fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
121
122 match="tos 0x10"
123 fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
124
125 match="fwmark 0x64"
126 getmatch="mark 0x64"
127 fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
128
129 fib_check_iproute_support "uidrange" "uid"
130 if [ $? -eq 0 ]; then
131 match="uidrange 100-100"
132 getmatch="uid 100"
133 fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
134 fi
135
136 fib_check_iproute_support "sport" "sport"
137 if [ $? -eq 0 ]; then
138 match="sport 666 dport 777"
139 fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
140 fi
141
142 fib_check_iproute_support "ipproto" "ipproto"
143 if [ $? -eq 0 ]; then
144 match="ipproto tcp"
145 fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match"
146 fi
147
148 fib_check_iproute_support "ipproto" "ipproto"
149 if [ $? -eq 0 ]; then
150 match="ipproto icmp"
151 fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match"
152 fi
153}
154
155fib_rule4_del()
156{
157 $IP rule del $1
158 log_test $? 0 "del $1"
159}
160
161fib_rule4_del_by_pref()
162{
163 pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
164 $IP rule del pref $pref
165}
166
167fib_rule4_test_match_n_redirect()
168{
169 local match="$1"
170 local getmatch="$2"
171
172 $IP rule add $match table $RTABLE
173 $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
174 log_test $? 0 "rule4 check: $1"
175
176 fib_rule4_del_by_pref "$match"
177 log_test $? 0 "rule4 del by pref: $match"
178}
179
180fib_rule4_test()
181{
182 # setup the fib rule redirect route
183 $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
184
185 match="oif $DEV"
186 fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
187
188 match="from $SRC_IP iif $DEV"
189 fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
190
191 match="tos 0x10"
192 fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
193
194 match="fwmark 0x64"
195 getmatch="mark 0x64"
196 fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
197
198 fib_check_iproute_support "uidrange" "uid"
199 if [ $? -eq 0 ]; then
200 match="uidrange 100-100"
201 getmatch="uid 100"
202 fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
203 fi
204
205 fib_check_iproute_support "sport" "sport"
206 if [ $? -eq 0 ]; then
207 match="sport 666 dport 777"
208 fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
209 fi
210
211 fib_check_iproute_support "ipproto" "ipproto"
212 if [ $? -eq 0 ]; then
213 match="ipproto tcp"
214 fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match"
215 fi
216
217 fib_check_iproute_support "ipproto" "ipproto"
218 if [ $? -eq 0 ]; then
219 match="ipproto icmp"
220 fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match"
221 fi
222}
223
224run_fibrule_tests()
225{
226 log_section "IPv4 fib rule"
227 fib_rule4_test
228 log_section "IPv6 fib rule"
229 fib_rule6_test
230}
231
232if [ "$(id -u)" -ne 0 ];then
233 echo "SKIP: Need root privileges"
234 exit 0
235fi
236
237if [ ! -x "$(command -v ip)" ]; then
238 echo "SKIP: Could not run test without ip tool"
239 exit 0
240fi
241
242# start clean
243cleanup &> /dev/null
244setup
245run_fibrule_tests
246cleanup
247
248exit $ret
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 9164e60d4b66..78245d60d8bc 100755..100644
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -5,9 +5,14 @@
5# different events. 5# different events.
6 6
7ret=0 7ret=0
8 8# Kselftest framework requirement - SKIP code is 4.
9VERBOSE=${VERBOSE:=0} 9ksft_skip=4
10PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} 10
11# all tests in this script. Can be overridden with -t option
12TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric"
13VERBOSE=0
14PAUSE_ON_FAIL=no
15PAUSE=no
11IP="ip -netns testns" 16IP="ip -netns testns"
12 17
13log_test() 18log_test()
@@ -18,8 +23,10 @@ log_test()
18 23
19 if [ ${rc} -eq ${expected} ]; then 24 if [ ${rc} -eq ${expected} ]; then
20 printf " TEST: %-60s [ OK ]\n" "${msg}" 25 printf " TEST: %-60s [ OK ]\n" "${msg}"
26 nsuccess=$((nsuccess+1))
21 else 27 else
22 ret=1 28 ret=1
29 nfail=$((nfail+1))
23 printf " TEST: %-60s [FAIL]\n" "${msg}" 30 printf " TEST: %-60s [FAIL]\n" "${msg}"
24 if [ "${PAUSE_ON_FAIL}" = "yes" ]; then 31 if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
25 echo 32 echo
@@ -28,6 +35,13 @@ log_test()
28 [ "$a" = "q" ] && exit 1 35 [ "$a" = "q" ] && exit 1
29 fi 36 fi
30 fi 37 fi
38
39 if [ "${PAUSE}" = "yes" ]; then
40 echo
41 echo "hit enter to continue, 'q' to quit"
42 read a
43 [ "$a" = "q" ] && exit 1
44 fi
31} 45}
32 46
33setup() 47setup()
@@ -563,39 +577,863 @@ fib_nexthop_test()
563} 577}
564 578
565################################################################################ 579################################################################################
566# 580# Tests on route add and replace
581
582run_cmd()
583{
584 local cmd="$1"
585 local out
586 local stderr="2>/dev/null"
587
588 if [ "$VERBOSE" = "1" ]; then
589 printf " COMMAND: $cmd\n"
590 stderr=
591 fi
592
593 out=$(eval $cmd $stderr)
594 rc=$?
595 if [ "$VERBOSE" = "1" -a -n "$out" ]; then
596 echo " $out"
597 fi
598
599 [ "$VERBOSE" = "1" ] && echo
600
601 return $rc
602}
603
604# add route for a prefix, flushing any existing routes first
605# expected to be the first step of a test
606add_route6()
607{
608 local pfx="$1"
609 local nh="$2"
610 local out
611
612 if [ "$VERBOSE" = "1" ]; then
613 echo
614 echo " ##################################################"
615 echo
616 fi
617
618 run_cmd "$IP -6 ro flush ${pfx}"
619 [ $? -ne 0 ] && exit 1
620
621 out=$($IP -6 ro ls match ${pfx})
622 if [ -n "$out" ]; then
623 echo "Failed to flush routes for prefix used for tests."
624 exit 1
625 fi
626
627 run_cmd "$IP -6 ro add ${pfx} ${nh}"
628 if [ $? -ne 0 ]; then
629 echo "Failed to add initial route for test."
630 exit 1
631 fi
632}
633
634# add initial route - used in replace route tests
635add_initial_route6()
636{
637 add_route6 "2001:db8:104::/64" "$1"
638}
639
640check_route6()
641{
642 local pfx="2001:db8:104::/64"
643 local expected="$1"
644 local out
645 local rc=0
646
647 out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
648 [ "${out}" = "${expected}" ] && return 0
649
650 if [ -z "${out}" ]; then
651 if [ "$VERBOSE" = "1" ]; then
652 printf "\nNo route entry found\n"
653 printf "Expected:\n"
654 printf " ${expected}\n"
655 fi
656 return 1
657 fi
658
659 # tricky way to convert output to 1-line without ip's
660 # messy '\'; this drops all extra white space
661 out=$(echo ${out})
662 if [ "${out}" != "${expected}" ]; then
663 rc=1
664 if [ "${VERBOSE}" = "1" ]; then
665 printf " Unexpected route entry. Have:\n"
666 printf " ${out}\n"
667 printf " Expected:\n"
668 printf " ${expected}\n\n"
669 fi
670 fi
671
672 return $rc
673}
674
675route_cleanup()
676{
677 $IP li del red 2>/dev/null
678 $IP li del dummy1 2>/dev/null
679 $IP li del veth1 2>/dev/null
680 $IP li del veth3 2>/dev/null
681
682 cleanup &> /dev/null
683}
684
685route_setup()
686{
687 route_cleanup
688 setup
689
690 [ "${VERBOSE}" = "1" ] && set -x
691 set -e
692
693 $IP li add red up type vrf table 101
694 $IP li add veth1 type veth peer name veth2
695 $IP li add veth3 type veth peer name veth4
696
697 $IP li set veth1 up
698 $IP li set veth3 up
699 $IP li set veth2 vrf red up
700 $IP li set veth4 vrf red up
701 $IP li add dummy1 type dummy
702 $IP li set dummy1 vrf red up
703
704 $IP -6 addr add 2001:db8:101::1/64 dev veth1
705 $IP -6 addr add 2001:db8:101::2/64 dev veth2
706 $IP -6 addr add 2001:db8:103::1/64 dev veth3
707 $IP -6 addr add 2001:db8:103::2/64 dev veth4
708 $IP -6 addr add 2001:db8:104::1/64 dev dummy1
709
710 $IP addr add 172.16.101.1/24 dev veth1
711 $IP addr add 172.16.101.2/24 dev veth2
712 $IP addr add 172.16.103.1/24 dev veth3
713 $IP addr add 172.16.103.2/24 dev veth4
714 $IP addr add 172.16.104.1/24 dev dummy1
715
716 set +ex
717}
718
719# assumption is that basic add of a single path route works
720# otherwise just adding an address on an interface is broken
721ipv6_rt_add()
722{
723 local rc
724
725 echo
726 echo "IPv6 route add / append tests"
727
728 # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
729 add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
730 run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2"
731 log_test $? 2 "Attempt to add duplicate route - gw"
732
733 # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
734 add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
735 run_cmd "$IP -6 ro add 2001:db8:104::/64 dev veth3"
736 log_test $? 2 "Attempt to add duplicate route - dev only"
737
738 # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
739 add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
740 run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
741 log_test $? 2 "Attempt to add duplicate route - reject route"
742
743 # iproute2 prepend only sets NLM_F_CREATE
744 # - adds a new route; does NOT convert existing route to ECMP
745 add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
746 run_cmd "$IP -6 ro prepend 2001:db8:104::/64 via 2001:db8:103::2"
747 check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024 2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
748 log_test $? 0 "Add new route for existing prefix (w/o NLM_F_EXCL)"
749
750 # route append with same prefix adds a new route
751 # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
752 add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
753 run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
754 check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
755 log_test $? 0 "Append nexthop to existing route - gw"
756
757 add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
758 run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
759 check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop dev veth3 weight 1"
760 log_test $? 0 "Append nexthop to existing route - dev only"
761
762 # multipath route can not have a nexthop that is a reject route
763 add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
764 run_cmd "$IP -6 ro append unreachable 2001:db8:104::/64"
765 log_test $? 2 "Append nexthop to existing route - reject route"
766
767 # reject route can not be converted to multipath route
768 run_cmd "$IP -6 ro flush 2001:db8:104::/64"
769 run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
770 run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
771 log_test $? 2 "Append nexthop to existing reject route - gw"
772
773 run_cmd "$IP -6 ro flush 2001:db8:104::/64"
774 run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
775 run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
776 log_test $? 2 "Append nexthop to existing reject route - dev only"
777
778 # insert mpath directly
779 add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
780 check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
781 log_test $? 0 "Add multipath route"
782
783 add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
784 run_cmd "$IP -6 ro add 2001:db8:104::/64 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
785 log_test $? 2 "Attempt to add duplicate multipath route"
786
787 # insert of a second route without append but different metric
788 add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
789 run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2 metric 512"
790 rc=$?
791 if [ $rc -eq 0 ]; then
792 run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::3 metric 256"
793 rc=$?
794 fi
795 log_test $rc 0 "Route add with different metrics"
796
797 run_cmd "$IP -6 ro del 2001:db8:104::/64 metric 512"
798 rc=$?
799 if [ $rc -eq 0 ]; then
800 check_route6 "2001:db8:104::/64 via 2001:db8:103::3 dev veth3 metric 256 2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
801 rc=$?
802 fi
803 log_test $rc 0 "Route delete with metric"
804}
805
806ipv6_rt_replace_single()
807{
808 # single path with single path
809 #
810 add_initial_route6 "via 2001:db8:101::2"
811 run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:103::2"
812 check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
813 log_test $? 0 "Single path with single path"
814
815 # single path with multipath
816 #
817 add_initial_route6 "nexthop via 2001:db8:101::2"
818 run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::2"
819 check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
820 log_test $? 0 "Single path with multipath"
821
822 # single path with reject
823 #
824 add_initial_route6 "nexthop via 2001:db8:101::2"
825 run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
826 check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
827 log_test $? 0 "Single path with reject route"
828
829 # single path with single path using MULTIPATH attribute
830 #
831 add_initial_route6 "via 2001:db8:101::2"
832 run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:103::2"
833 check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
834 log_test $? 0 "Single path with single path via multipath attribute"
835
836 # route replace fails - invalid nexthop
837 add_initial_route6 "via 2001:db8:101::2"
838 run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:104::2"
839 if [ $? -eq 0 ]; then
840 # previous command is expected to fail so if it returns 0
841 # that means the test failed.
842 log_test 0 1 "Invalid nexthop"
843 else
844 check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
845 log_test $? 0 "Invalid nexthop"
846 fi
847
848 # replace non-existent route
849 # - note use of change versus replace since ip adds NLM_F_CREATE
850 # for replace
851 add_initial_route6 "via 2001:db8:101::2"
852 run_cmd "$IP -6 ro change 2001:db8:105::/64 via 2001:db8:101::2"
853 log_test $? 2 "Single path - replace of non-existent route"
854}
855
856ipv6_rt_replace_mpath()
857{
858 # multipath with multipath
859 add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
860 run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
861 check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::3 dev veth3 weight 1"
862 log_test $? 0 "Multipath with multipath"
863
864 # multipath with single
865 add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
866 run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:101::3"
867 check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
868 log_test $? 0 "Multipath with single path"
869
870 # multipath with single
871 add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
872 run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3"
873 check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
874 log_test $? 0 "Multipath with single path via multipath attribute"
875
876 # multipath with reject
877 add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
878 run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
879 check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
880 log_test $? 0 "Multipath with reject route"
881
882 # route replace fails - invalid nexthop 1
883 add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
884 run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"
885 check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
886 log_test $? 0 "Multipath - invalid first nexthop"
887
888 # route replace fails - invalid nexthop 2
889 add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
890 run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:113::3"
891 check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
892 log_test $? 0 "Multipath - invalid second nexthop"
893
894 # multipath non-existent route
895 add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
896 run_cmd "$IP -6 ro change 2001:db8:105::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
897 log_test $? 2 "Multipath - replace of non-existent route"
898}
899
900ipv6_rt_replace()
901{
902 echo
903 echo "IPv6 route replace tests"
904
905 ipv6_rt_replace_single
906 ipv6_rt_replace_mpath
907}
908
909ipv6_route_test()
910{
911 route_setup
912
913 ipv6_rt_add
914 ipv6_rt_replace
915
916 route_cleanup
917}
918
919ip_addr_metric_check()
920{
921 ip addr help 2>&1 | grep -q metric
922 if [ $? -ne 0 ]; then
923 echo "iproute2 command does not support metric for addresses. Skipping test"
924 return 1
925 fi
926
927 return 0
928}
929
930ipv6_addr_metric_test()
931{
932 local rc
933
934 echo
935 echo "IPv6 prefix route tests"
936
937 ip_addr_metric_check || return 1
938
939 setup
940
941 set -e
942 $IP li add dummy1 type dummy
943 $IP li add dummy2 type dummy
944 $IP li set dummy1 up
945 $IP li set dummy2 up
946
947 # default entry is metric 256
948 run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64"
949 run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64"
950 set +e
951
952 check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 256 2001:db8:104::/64 dev dummy2 proto kernel metric 256"
953 log_test $? 0 "Default metric"
954
955 set -e
956 run_cmd "$IP -6 addr flush dev dummy1"
957 run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64 metric 257"
958 set +e
959
960 check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 256 2001:db8:104::/64 dev dummy1 proto kernel metric 257"
961 log_test $? 0 "User specified metric on first device"
962
963 set -e
964 run_cmd "$IP -6 addr flush dev dummy2"
965 run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64 metric 258"
966 set +e
967
968 check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 257 2001:db8:104::/64 dev dummy2 proto kernel metric 258"
969 log_test $? 0 "User specified metric on second device"
567 970
568fib_test() 971 run_cmd "$IP -6 addr del dev dummy1 2001:db8:104::1/64 metric 257"
972 rc=$?
973 if [ $rc -eq 0 ]; then
974 check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 258"
975 rc=$?
976 fi
977 log_test $rc 0 "Delete of address on first device"
978
979 run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::2/64 metric 259"
980 rc=$?
981 if [ $rc -eq 0 ]; then
982 check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
983 rc=$?
984 fi
985 log_test $rc 0 "Modify metric of address"
986
987 # verify prefix route removed on down
988 run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
989 run_cmd "$IP li set dev dummy2 down"
990 rc=$?
991 if [ $rc -eq 0 ]; then
992 check_route6 ""
993 rc=$?
994 fi
995 log_test $rc 0 "Prefix route removed on link down"
996
997 # verify prefix route re-inserted with assigned metric
998 run_cmd "$IP li set dev dummy2 up"
999 rc=$?
1000 if [ $rc -eq 0 ]; then
1001 check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
1002 rc=$?
1003 fi
1004 log_test $rc 0 "Prefix route with metric on link up"
1005
1006 $IP li del dummy1
1007 $IP li del dummy2
1008 cleanup
1009}
1010
1011# add route for a prefix, flushing any existing routes first
1012# expected to be the first step of a test
1013add_route()
1014{
1015 local pfx="$1"
1016 local nh="$2"
1017 local out
1018
1019 if [ "$VERBOSE" = "1" ]; then
1020 echo
1021 echo " ##################################################"
1022 echo
1023 fi
1024
1025 run_cmd "$IP ro flush ${pfx}"
1026 [ $? -ne 0 ] && exit 1
1027
1028 out=$($IP ro ls match ${pfx})
1029 if [ -n "$out" ]; then
1030 echo "Failed to flush routes for prefix used for tests."
1031 exit 1
1032 fi
1033
1034 run_cmd "$IP ro add ${pfx} ${nh}"
1035 if [ $? -ne 0 ]; then
1036 echo "Failed to add initial route for test."
1037 exit 1
1038 fi
1039}
1040
1041# add initial route - used in replace route tests
1042add_initial_route()
569{ 1043{
570 if [ -n "$TEST" ]; then 1044 add_route "172.16.104.0/24" "$1"
571 eval $TEST 1045}
1046
1047check_route()
1048{
1049 local pfx="172.16.104.0/24"
1050 local expected="$1"
1051 local out
1052 local rc=0
1053
1054 out=$($IP ro ls match ${pfx})
1055 [ "${out}" = "${expected}" ] && return 0
1056
1057 if [ -z "${out}" ]; then
1058 if [ "$VERBOSE" = "1" ]; then
1059 printf "\nNo route entry found\n"
1060 printf "Expected:\n"
1061 printf " ${expected}\n"
1062 fi
1063 return 1
1064 fi
1065
1066 # tricky way to convert output to 1-line without ip's
1067 # messy '\'; this drops all extra white space
1068 out=$(echo ${out})
1069 if [ "${out}" != "${expected}" ]; then
1070 rc=1
1071 if [ "${VERBOSE}" = "1" ]; then
1072 printf " Unexpected route entry. Have:\n"
1073 printf " ${out}\n"
1074 printf " Expected:\n"
1075 printf " ${expected}\n\n"
1076 fi
1077 fi
1078
1079 return $rc
1080}
1081
1082# assumption is that basic add of a single path route works
1083# otherwise just adding an address on an interface is broken
1084ipv4_rt_add()
1085{
1086 local rc
1087
1088 echo
1089 echo "IPv4 route add / append tests"
1090
1091 # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
1092 add_route "172.16.104.0/24" "via 172.16.101.2"
1093 run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2"
1094 log_test $? 2 "Attempt to add duplicate route - gw"
1095
1096 # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
1097 add_route "172.16.104.0/24" "via 172.16.101.2"
1098 run_cmd "$IP ro add 172.16.104.0/24 dev veth3"
1099 log_test $? 2 "Attempt to add duplicate route - dev only"
1100
1101 # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
1102 add_route "172.16.104.0/24" "via 172.16.101.2"
1103 run_cmd "$IP ro add unreachable 172.16.104.0/24"
1104 log_test $? 2 "Attempt to add duplicate route - reject route"
1105
1106 # iproute2 prepend only sets NLM_F_CREATE
1107 # - adds a new route; does NOT convert existing route to ECMP
1108 add_route "172.16.104.0/24" "via 172.16.101.2"
1109 run_cmd "$IP ro prepend 172.16.104.0/24 via 172.16.103.2"
1110 check_route "172.16.104.0/24 via 172.16.103.2 dev veth3 172.16.104.0/24 via 172.16.101.2 dev veth1"
1111 log_test $? 0 "Add new nexthop for existing prefix"
1112
1113 # route append with same prefix adds a new route
1114 # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
1115 add_route "172.16.104.0/24" "via 172.16.101.2"
1116 run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
1117 check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.2 dev veth3"
1118 log_test $? 0 "Append nexthop to existing route - gw"
1119
1120 add_route "172.16.104.0/24" "via 172.16.101.2"
1121 run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
1122 check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 dev veth3 scope link"
1123 log_test $? 0 "Append nexthop to existing route - dev only"
1124
1125 add_route "172.16.104.0/24" "via 172.16.101.2"
1126 run_cmd "$IP ro append unreachable 172.16.104.0/24"
1127 check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 unreachable 172.16.104.0/24"
1128 log_test $? 0 "Append nexthop to existing route - reject route"
1129
1130 run_cmd "$IP ro flush 172.16.104.0/24"
1131 run_cmd "$IP ro add unreachable 172.16.104.0/24"
1132 run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
1133 check_route "unreachable 172.16.104.0/24 172.16.104.0/24 via 172.16.103.2 dev veth3"
1134 log_test $? 0 "Append nexthop to existing reject route - gw"
1135
1136 run_cmd "$IP ro flush 172.16.104.0/24"
1137 run_cmd "$IP ro add unreachable 172.16.104.0/24"
1138 run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
1139 check_route "unreachable 172.16.104.0/24 172.16.104.0/24 dev veth3 scope link"
1140 log_test $? 0 "Append nexthop to existing reject route - dev only"
1141
1142 # insert mpath directly
1143 add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
1144 check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
1145 log_test $? 0 "add multipath route"
1146
1147 add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
1148 run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.101.2 nexthop via 172.16.103.2"
1149 log_test $? 2 "Attempt to add duplicate multipath route"
1150
1151 # insert of a second route without append but different metric
1152 add_route "172.16.104.0/24" "via 172.16.101.2"
1153 run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2 metric 512"
1154 rc=$?
1155 if [ $rc -eq 0 ]; then
1156 run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.3 metric 256"
1157 rc=$?
1158 fi
1159 log_test $rc 0 "Route add with different metrics"
1160
1161 run_cmd "$IP ro del 172.16.104.0/24 metric 512"
1162 rc=$?
1163 if [ $rc -eq 0 ]; then
1164 check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.3 dev veth3 metric 256"
1165 rc=$?
1166 fi
1167 log_test $rc 0 "Route delete with metric"
1168}
1169
1170ipv4_rt_replace_single()
1171{
1172 # single path with single path
1173 #
1174 add_initial_route "via 172.16.101.2"
1175 run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.103.2"
1176 check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
1177 log_test $? 0 "Single path with single path"
1178
1179 # single path with multipath
1180 #
1181 add_initial_route "nexthop via 172.16.101.2"
1182 run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.2"
1183 check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
1184 log_test $? 0 "Single path with multipath"
1185
1186 # single path with reject
1187 #
1188 add_initial_route "nexthop via 172.16.101.2"
1189 run_cmd "$IP ro replace unreachable 172.16.104.0/24"
1190 check_route "unreachable 172.16.104.0/24"
1191 log_test $? 0 "Single path with reject route"
1192
1193 # single path with single path using MULTIPATH attribute
1194 #
1195 add_initial_route "via 172.16.101.2"
1196 run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.103.2"
1197 check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
1198 log_test $? 0 "Single path with single path via multipath attribute"
1199
1200 # route replace fails - invalid nexthop
1201 add_initial_route "via 172.16.101.2"
1202 run_cmd "$IP ro replace 172.16.104.0/24 via 2001:db8:104::2"
1203 if [ $? -eq 0 ]; then
1204 # previous command is expected to fail so if it returns 0
1205 # that means the test failed.
1206 log_test 0 1 "Invalid nexthop"
572 else 1207 else
573 fib_unreg_test 1208 check_route "172.16.104.0/24 via 172.16.101.2 dev veth1"
574 fib_down_test 1209 log_test $? 0 "Invalid nexthop"
575 fib_carrier_test 1210 fi
576 fib_nexthop_test 1211
1212 # replace non-existent route
1213 # - note use of change versus replace since ip adds NLM_F_CREATE
1214 # for replace
1215 add_initial_route "via 172.16.101.2"
1216 run_cmd "$IP ro change 172.16.105.0/24 via 172.16.101.2"
1217 log_test $? 2 "Single path - replace of non-existent route"
1218}
1219
1220ipv4_rt_replace_mpath()
1221{
1222 # multipath with multipath
1223 add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
1224 run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
1225 check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.3 dev veth3 weight 1"
1226 log_test $? 0 "Multipath with multipath"
1227
1228 # multipath with single
1229 add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
1230 run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.101.3"
1231 check_route "172.16.104.0/24 via 172.16.101.3 dev veth1"
1232 log_test $? 0 "Multipath with single path"
1233
1234 # multipath with single
1235 add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
1236 run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3"
1237 check_route "172.16.104.0/24 via 172.16.101.3 dev veth1"
1238 log_test $? 0 "Multipath with single path via multipath attribute"
1239
1240 # multipath with reject
1241 add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
1242 run_cmd "$IP ro replace unreachable 172.16.104.0/24"
1243 check_route "unreachable 172.16.104.0/24"
1244 log_test $? 0 "Multipath with reject route"
1245
1246 # route replace fails - invalid nexthop 1
1247 add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
1248 run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.111.3 nexthop via 172.16.103.3"
1249 check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
1250 log_test $? 0 "Multipath - invalid first nexthop"
1251
1252 # route replace fails - invalid nexthop 2
1253 add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
1254 run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.113.3"
1255 check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
1256 log_test $? 0 "Multipath - invalid second nexthop"
1257
1258 # multipath non-existent route
1259 add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
1260 run_cmd "$IP ro change 172.16.105.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
1261 log_test $? 2 "Multipath - replace of non-existent route"
1262}
1263
1264ipv4_rt_replace()
1265{
1266 echo
1267 echo "IPv4 route replace tests"
1268
1269 ipv4_rt_replace_single
1270 ipv4_rt_replace_mpath
1271}
1272
1273ipv4_route_test()
1274{
1275 route_setup
1276
1277 ipv4_rt_add
1278 ipv4_rt_replace
1279
1280 route_cleanup
1281}
1282
1283ipv4_addr_metric_test()
1284{
1285 local rc
1286
1287 echo
1288 echo "IPv4 prefix route tests"
1289
1290 ip_addr_metric_check || return 1
1291
1292 setup
1293
1294 set -e
1295 $IP li add dummy1 type dummy
1296 $IP li add dummy2 type dummy
1297 $IP li set dummy1 up
1298 $IP li set dummy2 up
1299
1300 # default entry is metric 256
1301 run_cmd "$IP addr add dev dummy1 172.16.104.1/24"
1302 run_cmd "$IP addr add dev dummy2 172.16.104.2/24"
1303 set +e
1304
1305 check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2"
1306 log_test $? 0 "Default metric"
1307
1308 set -e
1309 run_cmd "$IP addr flush dev dummy1"
1310 run_cmd "$IP addr add dev dummy1 172.16.104.1/24 metric 257"
1311 set +e
1312
1313 check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257"
1314 log_test $? 0 "User specified metric on first device"
1315
1316 set -e
1317 run_cmd "$IP addr flush dev dummy2"
1318 run_cmd "$IP addr add dev dummy2 172.16.104.2/24 metric 258"
1319 set +e
1320
1321 check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
1322 log_test $? 0 "User specified metric on second device"
1323
1324 run_cmd "$IP addr del dev dummy1 172.16.104.1/24 metric 257"
1325 rc=$?
1326 if [ $rc -eq 0 ]; then
1327 check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
1328 rc=$?
1329 fi
1330 log_test $rc 0 "Delete of address on first device"
1331
1332 run_cmd "$IP addr change dev dummy2 172.16.104.2/24 metric 259"
1333 rc=$?
1334 if [ $rc -eq 0 ]; then
1335 check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
1336 rc=$?
1337 fi
1338 log_test $rc 0 "Modify metric of address"
1339
1340 # verify prefix route removed on down
1341 run_cmd "$IP li set dev dummy2 down"
1342 rc=$?
1343 if [ $rc -eq 0 ]; then
1344 check_route ""
1345 rc=$?
1346 fi
1347 log_test $rc 0 "Prefix route removed on link down"
1348
1349 # verify prefix route re-inserted with assigned metric
1350 run_cmd "$IP li set dev dummy2 up"
1351 rc=$?
1352 if [ $rc -eq 0 ]; then
1353 check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
1354 rc=$?
577 fi 1355 fi
1356 log_test $rc 0 "Prefix route with metric on link up"
1357
1358 $IP li del dummy1
1359 $IP li del dummy2
1360 cleanup
1361}
1362
1363################################################################################
1364# usage
1365
1366usage()
1367{
1368 cat <<EOF
1369usage: ${0##*/} OPTS
1370
1371 -t <test> Test(s) to run (default: all)
1372 (options: $TESTS)
1373 -p Pause on fail
1374 -P Pause after each test before cleanup
1375 -v verbose mode (show commands and output)
1376EOF
578} 1377}
579 1378
1379################################################################################
1380# main
1381
1382while getopts :t:pPhv o
1383do
1384 case $o in
1385 t) TESTS=$OPTARG;;
1386 p) PAUSE_ON_FAIL=yes;;
1387 P) PAUSE=yes;;
1388 v) VERBOSE=$(($VERBOSE + 1));;
1389 h) usage; exit 0;;
1390 *) usage; exit 1;;
1391 esac
1392done
1393
1394PEER_CMD="ip netns exec ${PEER_NS}"
1395
1396# make sure we don't pause twice
1397[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
1398
580if [ "$(id -u)" -ne 0 ];then 1399if [ "$(id -u)" -ne 0 ];then
581 echo "SKIP: Need root privileges" 1400 echo "SKIP: Need root privileges"
582 exit 0 1401 exit $ksft_skip;
583fi 1402fi
584 1403
585if [ ! -x "$(command -v ip)" ]; then 1404if [ ! -x "$(command -v ip)" ]; then
586 echo "SKIP: Could not run test without ip tool" 1405 echo "SKIP: Could not run test without ip tool"
587 exit 0 1406 exit $ksft_skip
588fi 1407fi
589 1408
590ip route help 2>&1 | grep -q fibmatch 1409ip route help 2>&1 | grep -q fibmatch
591if [ $? -ne 0 ]; then 1410if [ $? -ne 0 ]; then
592 echo "SKIP: iproute2 too old, missing fibmatch" 1411 echo "SKIP: iproute2 too old, missing fibmatch"
593 exit 0 1412 exit $ksft_skip
594fi 1413fi
595 1414
596# start clean 1415# start clean
597cleanup &> /dev/null 1416cleanup &> /dev/null
598 1417
599fib_test 1418for t in $TESTS
1419do
1420 case $t in
1421 fib_unreg_test|unregister) fib_unreg_test;;
1422 fib_down_test|down) fib_down_test;;
1423 fib_carrier_test|carrier) fib_carrier_test;;
1424 fib_nexthop_test|nexthop) fib_nexthop_test;;
1425 ipv6_route_test|ipv6_rt) ipv6_route_test;;
1426 ipv4_route_test|ipv4_rt) ipv4_route_test;;
1427 ipv6_addr_metric) ipv6_addr_metric_test;;
1428 ipv4_addr_metric) ipv4_addr_metric_test;;
1429
1430 help) echo "Test names: $TESTS"; exit 0;;
1431 esac
1432done
1433
1434if [ "$TESTS" != "none" ]; then
1435 printf "\nTests passed: %3d\n" ${nsuccess}
1436 printf "Tests failed: %3d\n" ${nfail}
1437fi
600 1438
601exit $ret 1439exit $ret
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 75d922438bc9..d8313d0438b7 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -1,6 +1,7 @@
1#!/bin/bash 1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3 3
4ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
4NUM_NETIFS=4 5NUM_NETIFS=4
5CHECK_TC="yes" 6CHECK_TC="yes"
6source lib.sh 7source lib.sh
@@ -75,14 +76,31 @@ cleanup()
75 vrf_cleanup 76 vrf_cleanup
76} 77}
77 78
79ping_ipv4()
80{
81 ping_test $h1 192.0.2.2
82}
83
84ping_ipv6()
85{
86 ping6_test $h1 2001:db8:1::2
87}
88
89learning()
90{
91 learning_test "br0" $swp1 $h1 $h2
92}
93
94flooding()
95{
96 flood_test $swp2 $h1 $h2
97}
98
78trap cleanup EXIT 99trap cleanup EXIT
79 100
80setup_prepare 101setup_prepare
81setup_wait 102setup_wait
82 103
83ping_test $h1 192.0.2.2 104tests_run
84ping6_test $h1 2001:db8:1::2
85learning_test "br0" $swp1 $h1 $h2
86flood_test $swp2 $h1 $h2
87 105
88exit $EXIT_STATUS 106exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index 1cddf06f691d..c15c6c85c984 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -1,6 +1,7 @@
1#!/bin/bash 1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3 3
4ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
4NUM_NETIFS=4 5NUM_NETIFS=4
5source lib.sh 6source lib.sh
6 7
@@ -73,14 +74,31 @@ cleanup()
73 vrf_cleanup 74 vrf_cleanup
74} 75}
75 76
77ping_ipv4()
78{
79 ping_test $h1 192.0.2.2
80}
81
82ping_ipv6()
83{
84 ping6_test $h1 2001:db8:1::2
85}
86
87learning()
88{
89 learning_test "br0" $swp1 $h1 $h2
90}
91
92flooding()
93{
94 flood_test $swp2 $h1 $h2
95}
96
76trap cleanup EXIT 97trap cleanup EXIT
77 98
78setup_prepare 99setup_prepare
79setup_wait 100setup_wait
80 101
81ping_test $h1 192.0.2.2 102tests_run
82ping6_test $h1 2001:db8:1::2
83learning_test "br0" $swp1 $h1 $h2
84flood_test $swp2 $h1 $h2
85 103
86exit $EXIT_STATUS 104exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 1ac6c62271f3..7b18a53aa556 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -321,6 +321,50 @@ simple_if_fini()
321 vrf_destroy $vrf_name 321 vrf_destroy $vrf_name
322} 322}
323 323
324tunnel_create()
325{
326 local name=$1; shift
327 local type=$1; shift
328 local local=$1; shift
329 local remote=$1; shift
330
331 ip link add name $name type $type \
332 local $local remote $remote "$@"
333 ip link set dev $name up
334}
335
336tunnel_destroy()
337{
338 local name=$1; shift
339
340 ip link del dev $name
341}
342
343vlan_create()
344{
345 local if_name=$1; shift
346 local vid=$1; shift
347 local vrf=$1; shift
348 local ips=("${@}")
349 local name=$if_name.$vid
350
351 ip link add name $name link $if_name type vlan id $vid
352 if [ "$vrf" != "" ]; then
353 ip link set dev $name master $vrf
354 fi
355 ip link set dev $name up
356 __addr_add_del $name add "${ips[@]}"
357}
358
359vlan_destroy()
360{
361 local if_name=$1; shift
362 local vid=$1; shift
363 local name=$if_name.$vid
364
365 ip link del dev $name
366}
367
324master_name_get() 368master_name_get()
325{ 369{
326 local if_name=$1 370 local if_name=$1
@@ -335,6 +379,15 @@ link_stats_tx_packets_get()
335 ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]' 379 ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
336} 380}
337 381
382tc_rule_stats_get()
383{
384 local dev=$1; shift
385 local pref=$1; shift
386
387 tc -j -s filter show dev $dev ingress pref $pref |
388 jq '.[1].options.actions[].stats.packets'
389}
390
338mac_get() 391mac_get()
339{ 392{
340 local if_name=$1 393 local if_name=$1
@@ -353,19 +406,33 @@ bridge_ageing_time_get()
353 echo $((ageing_time / 100)) 406 echo $((ageing_time / 100))
354} 407}
355 408
356forwarding_enable() 409declare -A SYSCTL_ORIG
410sysctl_set()
411{
412 local key=$1; shift
413 local value=$1; shift
414
415 SYSCTL_ORIG[$key]=$(sysctl -n $key)
416 sysctl -qw $key=$value
417}
418
419sysctl_restore()
357{ 420{
358 ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding) 421 local key=$1; shift
359 ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
360 422
361 sysctl -q -w net.ipv4.conf.all.forwarding=1 423 sysctl -qw $key=${SYSCTL_ORIG["$key"]}
362 sysctl -q -w net.ipv6.conf.all.forwarding=1 424}
425
426forwarding_enable()
427{
428 sysctl_set net.ipv4.conf.all.forwarding 1
429 sysctl_set net.ipv6.conf.all.forwarding 1
363} 430}
364 431
365forwarding_restore() 432forwarding_restore()
366{ 433{
367 sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd 434 sysctl_restore net.ipv6.conf.all.forwarding
368 sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd 435 sysctl_restore net.ipv4.conf.all.forwarding
369} 436}
370 437
371tc_offload_check() 438tc_offload_check()
@@ -381,6 +448,115 @@ tc_offload_check()
381 return 0 448 return 0
382} 449}
383 450
451trap_install()
452{
453 local dev=$1; shift
454 local direction=$1; shift
455
456 # For slow-path testing, we need to install a trap to get to
457 # slow path the packets that would otherwise be switched in HW.
458 tc filter add dev $dev $direction pref 1 flower skip_sw action trap
459}
460
461trap_uninstall()
462{
463 local dev=$1; shift
464 local direction=$1; shift
465
466 tc filter del dev $dev $direction pref 1 flower skip_sw
467}
468
469slow_path_trap_install()
470{
471 if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
472 trap_install "$@"
473 fi
474}
475
476slow_path_trap_uninstall()
477{
478 if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
479 trap_uninstall "$@"
480 fi
481}
482
483__icmp_capture_add_del()
484{
485 local add_del=$1; shift
486 local pref=$1; shift
487 local vsuf=$1; shift
488 local tundev=$1; shift
489 local filter=$1; shift
490
491 tc filter $add_del dev "$tundev" ingress \
492 proto ip$vsuf pref $pref \
493 flower ip_proto icmp$vsuf $filter \
494 action pass
495}
496
497icmp_capture_install()
498{
499 __icmp_capture_add_del add 100 "" "$@"
500}
501
502icmp_capture_uninstall()
503{
504 __icmp_capture_add_del del 100 "" "$@"
505}
506
507icmp6_capture_install()
508{
509 __icmp_capture_add_del add 100 v6 "$@"
510}
511
512icmp6_capture_uninstall()
513{
514 __icmp_capture_add_del del 100 v6 "$@"
515}
516
517__vlan_capture_add_del()
518{
519 local add_del=$1; shift
520 local pref=$1; shift
521 local dev=$1; shift
522 local filter=$1; shift
523
524 tc filter $add_del dev "$dev" ingress \
525 proto 802.1q pref $pref \
526 flower $filter \
527 action pass
528}
529
530vlan_capture_install()
531{
532 __vlan_capture_add_del add 100 "$@"
533}
534
535vlan_capture_uninstall()
536{
537 __vlan_capture_add_del del 100 "$@"
538}
539
540matchall_sink_create()
541{
542 local dev=$1; shift
543
544 tc qdisc add dev $dev clsact
545 tc filter add dev $dev ingress \
546 pref 10000 \
547 matchall \
548 action drop
549}
550
551tests_run()
552{
553 local current_test
554
555 for current_test in ${TESTS:-$ALL_TESTS}; do
556 $current_test
557 done
558}
559
384############################################################################## 560##############################################################################
385# Tests 561# Tests
386 562
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
new file mode 100755
index 000000000000..e6fd7a18c655
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -0,0 +1,159 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# This test uses standard topology for testing gretap. See
5# mirror_gre_topo_lib.sh for more details.
6#
7# Test for "tc action mirred egress mirror" when the device to mirror to is a
8# gretap or ip6gretap netdevice. Expect that the packets come out encapsulated,
9# and another gretap / ip6gretap netdevice is then capable of decapsulating the
10# traffic. Test that the payload is what is expected (ICMP ping request or
11# reply, depending on test).
12
13ALL_TESTS="
14 test_gretap
15 test_ip6gretap
16 test_gretap_mac
17 test_ip6gretap_mac
18 test_two_spans
19"
20
21NUM_NETIFS=6
22source lib.sh
23source mirror_lib.sh
24source mirror_gre_lib.sh
25source mirror_gre_topo_lib.sh
26
27setup_prepare()
28{
29 h1=${NETIFS[p1]}
30 swp1=${NETIFS[p2]}
31
32 swp2=${NETIFS[p3]}
33 h2=${NETIFS[p4]}
34
35 swp3=${NETIFS[p5]}
36 h3=${NETIFS[p6]}
37
38 vrf_prepare
39 mirror_gre_topo_create
40
41 ip address add dev $swp3 192.0.2.129/28
42 ip address add dev $h3 192.0.2.130/28
43
44 ip address add dev $swp3 2001:db8:2::1/64
45 ip address add dev $h3 2001:db8:2::2/64
46}
47
48cleanup()
49{
50 pre_cleanup
51
52 ip address del dev $h3 2001:db8:2::2/64
53 ip address del dev $swp3 2001:db8:2::1/64
54
55 ip address del dev $h3 192.0.2.130/28
56 ip address del dev $swp3 192.0.2.129/28
57
58 mirror_gre_topo_destroy
59 vrf_cleanup
60}
61
62test_span_gre_mac()
63{
64 local tundev=$1; shift
65 local direction=$1; shift
66 local prot=$1; shift
67 local what=$1; shift
68
69 local swp3mac=$(mac_get $swp3)
70 local h3mac=$(mac_get $h3)
71
72 RET=0
73
74 mirror_install $swp1 $direction $tundev "matchall $tcflags"
75 tc filter add dev $h3 ingress pref 77 prot $prot \
76 flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \
77 action pass
78
79 mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
80
81 tc filter del dev $h3 ingress pref 77
82 mirror_uninstall $swp1 $direction
83
84 log_test "$direction $what: envelope MAC ($tcflags)"
85}
86
87test_two_spans()
88{
89 RET=0
90
91 mirror_install $swp1 ingress gt4 "matchall $tcflags"
92 mirror_install $swp1 egress gt6 "matchall $tcflags"
93 quick_test_span_gre_dir gt4 ingress
94 quick_test_span_gre_dir gt6 egress
95
96 mirror_uninstall $swp1 ingress
97 fail_test_span_gre_dir gt4 ingress
98 quick_test_span_gre_dir gt6 egress
99
100 mirror_install $swp1 ingress gt4 "matchall $tcflags"
101 mirror_uninstall $swp1 egress
102 quick_test_span_gre_dir gt4 ingress
103 fail_test_span_gre_dir gt6 egress
104
105 mirror_uninstall $swp1 ingress
106 log_test "two simultaneously configured mirrors ($tcflags)"
107}
108
109test_gretap()
110{
111 full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
112 full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
113}
114
115test_ip6gretap()
116{
117 full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
118 full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
119}
120
121test_gretap_mac()
122{
123 test_span_gre_mac gt4 ingress ip "mirror to gretap"
124 test_span_gre_mac gt4 egress ip "mirror to gretap"
125}
126
127test_ip6gretap_mac()
128{
129 test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap"
130 test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap"
131}
132
133test_all()
134{
135 slow_path_trap_install $swp1 ingress
136 slow_path_trap_install $swp1 egress
137
138 tests_run
139
140 slow_path_trap_uninstall $swp1 egress
141 slow_path_trap_uninstall $swp1 ingress
142}
143
144trap cleanup EXIT
145
146setup_prepare
147setup_wait
148
149tcflags="skip_hw"
150test_all
151
152if ! tc_offload_check; then
153 echo "WARN: Could not test offloaded functionality"
154else
155 tcflags="skip_sw"
156 test_all
157fi
158
159exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
new file mode 100755
index 000000000000..360ca133bead
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
@@ -0,0 +1,226 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# +---------------------+ +---------------------+
5# | H1 | | H2 |
6# | + $h1 | | $h2 + |
7# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
8# +-----|---------------+ +---------------|-----+
9# | |
10# +-----|-------------------------------------------------------------|-----+
11# | SW o--> mirror | |
12# | +---|-------------------------------------------------------------|---+ |
13# | | + $swp1 BR $swp2 + | |
14# | +---------------------------------------------------------------------+ |
15# | |
16# | +---------------------------------------------------------------------+ |
17# | | OL + gt6 (ip6gretap) + gt4 (gretap) | |
18# | | : loc=2001:db8:2::1 : loc=192.0.2.129 | |
19# | | : rem=2001:db8:2::2 : rem=192.0.2.130 | |
20# | | : ttl=100 : ttl=100 | |
21# | | : tos=inherit : tos=inherit | |
22# | +-------------------------:--|-------------------:--|-----------------+ |
23# | : | : | |
24# | +-------------------------:--|-------------------:--|-----------------+ |
25# | | UL : |,---------------------' | |
26# | | + $swp3 : || : | |
27# | | | 192.0.2.129/28 : vv : | |
28# | | | 2001:db8:2::1/64 : + ul (dummy) : | |
29# | +---|---------------------:----------------------:--------------------+ |
30# +-----|---------------------:----------------------:----------------------+
31# | : :
32# +-----|---------------------:----------------------:----------------------+
33# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) |
34# | 192.0.2.130/28 loc=2001:db8:2::2 loc=192.0.2.130 |
35# | 2001:db8:2::2/64 rem=2001:db8:2::1 rem=192.0.2.129 |
36# | ttl=100 ttl=100 |
37# | tos=inherit tos=inherit |
38# | |
39# +-------------------------------------------------------------------------+
40#
41# This tests mirroring to gretap and ip6gretap configured in an overlay /
42# underlay manner, i.e. with a bound dummy device that marks underlay VRF where
43# the encapsulated packed should be routed.
44
45ALL_TESTS="
46 test_gretap
47 test_ip6gretap
48"
49
50NUM_NETIFS=6
51source lib.sh
52source mirror_lib.sh
53source mirror_gre_lib.sh
54
55h1_create()
56{
57 simple_if_init $h1 192.0.2.1/28
58}
59
60h1_destroy()
61{
62 simple_if_fini $h1 192.0.2.1/28
63}
64
65h2_create()
66{
67 simple_if_init $h2 192.0.2.2/28
68}
69
70h2_destroy()
71{
72 simple_if_fini $h2 192.0.2.2/28
73}
74
75h3_create()
76{
77 simple_if_init $h3 192.0.2.130/28 2001:db8:2::2/64
78
79 tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
80 ip link set h3-gt4 vrf v$h3
81 matchall_sink_create h3-gt4
82
83 tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
84 ip link set h3-gt6 vrf v$h3
85 matchall_sink_create h3-gt6
86}
87
88h3_destroy()
89{
90 tunnel_destroy h3-gt6
91 tunnel_destroy h3-gt4
92
93 simple_if_fini $h3 192.0.2.130/28 2001:db8:2::2/64
94}
95
96switch_create()
97{
98 # Bridge between H1 and H2.
99
100 ip link add name br1 type bridge vlan_filtering 1
101 ip link set dev br1 up
102
103 ip link set dev $swp1 master br1
104 ip link set dev $swp1 up
105
106 ip link set dev $swp2 master br1
107 ip link set dev $swp2 up
108
109 tc qdisc add dev $swp1 clsact
110
111 # Underlay.
112
113 simple_if_init $swp3 192.0.2.129/28 2001:db8:2::1/64
114
115 ip link add name ul type dummy
116 ip link set dev ul master v$swp3
117 ip link set dev ul up
118
119 # Overlay.
120
121 vrf_create vrf-ol
122 ip link set dev vrf-ol up
123
124 tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
125 ttl 100 tos inherit dev ul
126 ip link set dev gt4 master vrf-ol
127 ip link set dev gt4 up
128
129 tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
130 ttl 100 tos inherit dev ul allow-localremote
131 ip link set dev gt6 master vrf-ol
132 ip link set dev gt6 up
133}
134
135switch_destroy()
136{
137 vrf_destroy vrf-ol
138
139 tunnel_destroy gt6
140 tunnel_destroy gt4
141
142 simple_if_fini $swp3 192.0.2.129/28 2001:db8:2::1/64
143
144 ip link del dev ul
145
146 tc qdisc del dev $swp1 clsact
147
148 ip link set dev $swp1 down
149 ip link set dev $swp2 down
150 ip link del dev br1
151}
152
153setup_prepare()
154{
155 h1=${NETIFS[p1]}
156 swp1=${NETIFS[p2]}
157
158 swp2=${NETIFS[p3]}
159 h2=${NETIFS[p4]}
160
161 swp3=${NETIFS[p5]}
162 h3=${NETIFS[p6]}
163
164 vrf_prepare
165
166 h1_create
167 h2_create
168 h3_create
169
170 switch_create
171}
172
173cleanup()
174{
175 pre_cleanup
176
177 switch_destroy
178
179 h3_destroy
180 h2_destroy
181 h1_destroy
182
183 vrf_cleanup
184}
185
186test_gretap()
187{
188 full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap w/ UL"
189 full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap w/ UL"
190}
191
192test_ip6gretap()
193{
194 full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap w/ UL"
195 full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL"
196}
197
198test_all()
199{
200 RET=0
201
202 slow_path_trap_install $swp1 ingress
203 slow_path_trap_install $swp1 egress
204
205 tests_run
206
207 slow_path_trap_uninstall $swp1 egress
208 slow_path_trap_uninstall $swp1 ingress
209}
210
211trap cleanup EXIT
212
213setup_prepare
214setup_wait
215
216tcflags="skip_hw"
217test_all
218
219if ! tc_offload_check; then
220 echo "WARN: Could not test offloaded functionality"
221else
222 tcflags="skip_sw"
223 test_all
224fi
225
226exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
new file mode 100755
index 000000000000..3bb4c2ba7b14
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -0,0 +1,121 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# This test uses standard topology for testing gretap. See
5# mirror_gre_topo_lib.sh for more details.
6#
7# Test for "tc action mirred egress mirror" when the underlay route points at a
8# bridge device without vlan filtering (802.1d). The device attached to that
9# bridge is a VLAN.
10
11ALL_TESTS="
12 test_gretap
13 test_ip6gretap
14 test_gretap_stp
15 test_ip6gretap_stp
16"
17
18NUM_NETIFS=6
19source lib.sh
20source mirror_lib.sh
21source mirror_gre_lib.sh
22source mirror_gre_topo_lib.sh
23
24setup_prepare()
25{
26 h1=${NETIFS[p1]}
27 swp1=${NETIFS[p2]}
28
29 swp2=${NETIFS[p3]}
30 h2=${NETIFS[p4]}
31
32 swp3=${NETIFS[p5]}
33 h3=${NETIFS[p6]}
34
35 vrf_prepare
36 mirror_gre_topo_create
37
38 ip link add name br2 type bridge vlan_filtering 0
39 ip link set dev br2 up
40
41 vlan_create $swp3 555
42
43 ip link set dev $swp3.555 master br2
44 ip route add 192.0.2.130/32 dev br2
45 ip -6 route add 2001:db8:2::2/128 dev br2
46
47 ip address add dev br2 192.0.2.129/32
48 ip address add dev br2 2001:db8:2::1/128
49
50 vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
51}
52
53cleanup()
54{
55 pre_cleanup
56
57 vlan_destroy $h3 555
58 ip link del dev br2
59 vlan_destroy $swp3 555
60
61 mirror_gre_topo_destroy
62 vrf_cleanup
63}
64
65test_vlan_match()
66{
67 local tundev=$1; shift
68 local vlan_match=$1; shift
69 local what=$1; shift
70
71 full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
72 full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
73}
74
75test_gretap()
76{
77 test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap"
78}
79
80test_ip6gretap()
81{
82 test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap"
83}
84
85test_gretap_stp()
86{
87 full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
88}
89
90test_ip6gretap_stp()
91{
92 full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
93}
94
95test_all()
96{
97 slow_path_trap_install $swp1 ingress
98 slow_path_trap_install $swp1 egress
99
100 tests_run
101
102 slow_path_trap_uninstall $swp1 egress
103 slow_path_trap_uninstall $swp1 ingress
104}
105
106trap cleanup EXIT
107
108setup_prepare
109setup_wait
110
111tcflags="skip_hw"
112test_all
113
114if ! tc_offload_check; then
115 echo "WARN: Could not test offloaded functionality"
116else
117 tcflags="skip_sw"
118 test_all
119fi
120
121exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
new file mode 100755
index 000000000000..aa29d46186a8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -0,0 +1,278 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# This test uses standard topology for testing gretap. See
5# mirror_gre_topo_lib.sh for more details.
6#
7# Test how mirrors to gretap and ip6gretap react to changes to relevant
8# configuration.
9
10ALL_TESTS="
11 test_ttl
12 test_tun_up
13 test_egress_up
14 test_remote_ip
15 test_tun_del
16 test_route_del
17"
18
19NUM_NETIFS=6
20source lib.sh
21source mirror_lib.sh
22source mirror_gre_lib.sh
23source mirror_gre_topo_lib.sh
24
25setup_prepare()
26{
27 h1=${NETIFS[p1]}
28 swp1=${NETIFS[p2]}
29
30 swp2=${NETIFS[p3]}
31 h2=${NETIFS[p4]}
32
33 swp3=${NETIFS[p5]}
34 h3=${NETIFS[p6]}
35
36 vrf_prepare
37 mirror_gre_topo_create
38
39 # This test downs $swp3, which deletes the configured IPv6 address
40 # unless this sysctl is set.
41 sysctl_set net.ipv6.conf.$swp3.keep_addr_on_down 1
42
43 ip address add dev $swp3 192.0.2.129/28
44 ip address add dev $h3 192.0.2.130/28
45
46 ip address add dev $swp3 2001:db8:2::1/64
47 ip address add dev $h3 2001:db8:2::2/64
48}
49
50cleanup()
51{
52 pre_cleanup
53
54 ip address del dev $h3 2001:db8:2::2/64
55 ip address del dev $swp3 2001:db8:2::1/64
56
57 ip address del dev $h3 192.0.2.130/28
58 ip address del dev $swp3 192.0.2.129/28
59
60 sysctl_restore net.ipv6.conf.$swp3.keep_addr_on_down
61
62 mirror_gre_topo_destroy
63 vrf_cleanup
64}
65
66test_span_gre_ttl()
67{
68 local tundev=$1; shift
69 local type=$1; shift
70 local prot=$1; shift
71 local what=$1; shift
72
73 RET=0
74
75 mirror_install $swp1 ingress $tundev "matchall $tcflags"
76 tc filter add dev $h3 ingress pref 77 prot $prot \
77 flower ip_ttl 50 action pass
78
79 mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0
80
81 ip link set dev $tundev type $type ttl 50
82 mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
83
84 ip link set dev $tundev type $type ttl 100
85 tc filter del dev $h3 ingress pref 77
86 mirror_uninstall $swp1 ingress
87
88 log_test "$what: TTL change ($tcflags)"
89}
90
91test_span_gre_tun_up()
92{
93 local tundev=$1; shift
94 local what=$1; shift
95
96 RET=0
97
98 ip link set dev $tundev down
99 mirror_install $swp1 ingress $tundev "matchall $tcflags"
100 fail_test_span_gre_dir $tundev ingress
101
102 ip link set dev $tundev up
103
104 quick_test_span_gre_dir $tundev ingress
105 mirror_uninstall $swp1 ingress
106
107 log_test "$what: tunnel down/up ($tcflags)"
108}
109
110test_span_gre_egress_up()
111{
112 local tundev=$1; shift
113 local remote_ip=$1; shift
114 local what=$1; shift
115
116 RET=0
117
118 ip link set dev $swp3 down
119 mirror_install $swp1 ingress $tundev "matchall $tcflags"
120 fail_test_span_gre_dir $tundev ingress
121
122 # After setting the device up, wait for neighbor to get resolved so that
123 # we can expect mirroring to work.
124 ip link set dev $swp3 up
125 while true; do
126 ip neigh sh dev $swp3 $remote_ip nud reachable |
127 grep -q ^
128 if [[ $? -ne 0 ]]; then
129 sleep 1
130 else
131 break
132 fi
133 done
134
135 quick_test_span_gre_dir $tundev ingress
136 mirror_uninstall $swp1 ingress
137
138 log_test "$what: egress down/up ($tcflags)"
139}
140
141test_span_gre_remote_ip()
142{
143 local tundev=$1; shift
144 local type=$1; shift
145 local correct_ip=$1; shift
146 local wrong_ip=$1; shift
147 local what=$1; shift
148
149 RET=0
150
151 ip link set dev $tundev type $type remote $wrong_ip
152 mirror_install $swp1 ingress $tundev "matchall $tcflags"
153 fail_test_span_gre_dir $tundev ingress
154
155 ip link set dev $tundev type $type remote $correct_ip
156 quick_test_span_gre_dir $tundev ingress
157 mirror_uninstall $swp1 ingress
158
159 log_test "$what: remote address change ($tcflags)"
160}
161
162test_span_gre_tun_del()
163{
164 local tundev=$1; shift
165 local type=$1; shift
166 local flags=$1; shift
167 local local_ip=$1; shift
168 local remote_ip=$1; shift
169 local what=$1; shift
170
171 RET=0
172
173 mirror_install $swp1 ingress $tundev "matchall $tcflags"
174 quick_test_span_gre_dir $tundev ingress
175 ip link del dev $tundev
176 fail_test_span_gre_dir $tundev ingress
177
178 tunnel_create $tundev $type $local_ip $remote_ip \
179 ttl 100 tos inherit $flags
180
181 # Recreating the tunnel doesn't reestablish mirroring, so reinstall it
182 # and verify it works for the follow-up tests.
183 mirror_uninstall $swp1 ingress
184 mirror_install $swp1 ingress $tundev "matchall $tcflags"
185 quick_test_span_gre_dir $tundev ingress
186 mirror_uninstall $swp1 ingress
187
188 log_test "$what: tunnel deleted ($tcflags)"
189}
190
191test_span_gre_route_del()
192{
193 local tundev=$1; shift
194 local edev=$1; shift
195 local route=$1; shift
196 local what=$1; shift
197
198 RET=0
199
200 mirror_install $swp1 ingress $tundev "matchall $tcflags"
201 quick_test_span_gre_dir $tundev ingress
202
203 ip route del $route dev $edev
204 fail_test_span_gre_dir $tundev ingress
205
206 ip route add $route dev $edev
207 quick_test_span_gre_dir $tundev ingress
208
209 mirror_uninstall $swp1 ingress
210
211 log_test "$what: underlay route removal ($tcflags)"
212}
213
214test_ttl()
215{
216 test_span_gre_ttl gt4 gretap ip "mirror to gretap"
217 test_span_gre_ttl gt6 ip6gretap ipv6 "mirror to ip6gretap"
218}
219
220test_tun_up()
221{
222 test_span_gre_tun_up gt4 "mirror to gretap"
223 test_span_gre_tun_up gt6 "mirror to ip6gretap"
224}
225
226test_egress_up()
227{
228 test_span_gre_egress_up gt4 192.0.2.130 "mirror to gretap"
229 test_span_gre_egress_up gt6 2001:db8:2::2 "mirror to ip6gretap"
230}
231
232test_remote_ip()
233{
234 test_span_gre_remote_ip gt4 gretap 192.0.2.130 192.0.2.132 "mirror to gretap"
235 test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap"
236}
237
238test_tun_del()
239{
240 test_span_gre_tun_del gt4 gretap "" \
241 192.0.2.129 192.0.2.130 "mirror to gretap"
242 test_span_gre_tun_del gt6 ip6gretap allow-localremote \
243 2001:db8:2::1 2001:db8:2::2 "mirror to ip6gretap"
244}
245
246test_route_del()
247{
248 test_span_gre_route_del gt4 $swp3 192.0.2.128/28 "mirror to gretap"
249 test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap"
250}
251
252test_all()
253{
254 slow_path_trap_install $swp1 ingress
255 slow_path_trap_install $swp1 egress
256
257 tests_run
258
259 slow_path_trap_uninstall $swp1 egress
260 slow_path_trap_uninstall $swp1 ingress
261}
262
263trap cleanup EXIT
264
265setup_prepare
266setup_wait
267
268tcflags="skip_hw"
269test_all
270
271if ! tc_offload_check; then
272 echo "WARN: Could not test offloaded functionality"
273else
274 tcflags="skip_sw"
275 test_all
276fi
277
278exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
new file mode 100755
index 000000000000..12914f40612d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
@@ -0,0 +1,137 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# This test uses standard topology for testing gretap. See
5# mirror_gre_topo_lib.sh for more details.
6#
7# This tests flower-triggered mirroring to gretap and ip6gretap netdevices. The
8# interfaces on H1 and H2 have two addresses each. Flower match on one of the
9# addresses is configured with mirror action. It is expected that when pinging
10# this address, mirroring takes place, whereas when pinging the other one,
11# there's no mirroring.
12
13ALL_TESTS="
14 test_gretap
15 test_ip6gretap
16"
17
18NUM_NETIFS=6
19source lib.sh
20source mirror_lib.sh
21source mirror_gre_lib.sh
22source mirror_gre_topo_lib.sh
23
24setup_prepare()
25{
26 h1=${NETIFS[p1]}
27 swp1=${NETIFS[p2]}
28
29 swp2=${NETIFS[p3]}
30 h2=${NETIFS[p4]}
31
32 swp3=${NETIFS[p5]}
33 h3=${NETIFS[p6]}
34
35 vrf_prepare
36 mirror_gre_topo_create
37
38 ip address add dev $swp3 192.0.2.129/28
39 ip address add dev $h3 192.0.2.130/28
40
41 ip address add dev $swp3 2001:db8:2::1/64
42 ip address add dev $h3 2001:db8:2::2/64
43
44 ip address add dev $h1 192.0.2.3/28
45 ip address add dev $h2 192.0.2.4/28
46}
47
48cleanup()
49{
50 pre_cleanup
51
52 ip address del dev $h2 192.0.2.4/28
53 ip address del dev $h1 192.0.2.3/28
54
55 ip address del dev $h3 2001:db8:2::2/64
56 ip address del dev $swp3 2001:db8:2::1/64
57
58 ip address del dev $h3 192.0.2.130/28
59 ip address del dev $swp3 192.0.2.129/28
60
61 mirror_gre_topo_destroy
62 vrf_cleanup
63}
64
65test_span_gre_dir_acl()
66{
67 test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
68}
69
70fail_test_span_gre_dir_acl()
71{
72 fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
73}
74
75full_test_span_gre_dir_acl()
76{
77 local tundev=$1; shift
78 local direction=$1; shift
79 local forward_type=$1; shift
80 local backward_type=$1; shift
81 local match_dip=$1; shift
82 local what=$1; shift
83
84 mirror_install $swp1 $direction $tundev \
85 "protocol ip flower $tcflags dst_ip $match_dip"
86 fail_test_span_gre_dir $tundev $direction
87 test_span_gre_dir_acl "$tundev" "$direction" \
88 "$forward_type" "$backward_type"
89 mirror_uninstall $swp1 $direction
90
91 # Test lack of mirroring after ACL mirror is uninstalled.
92 fail_test_span_gre_dir_acl "$tundev" "$direction"
93
94 log_test "$direction $what ($tcflags)"
95}
96
97test_gretap()
98{
99 full_test_span_gre_dir_acl gt4 ingress 8 0 192.0.2.4 "ACL mirror to gretap"
100 full_test_span_gre_dir_acl gt4 egress 0 8 192.0.2.3 "ACL mirror to gretap"
101}
102
103test_ip6gretap()
104{
105 full_test_span_gre_dir_acl gt6 ingress 8 0 192.0.2.4 "ACL mirror to ip6gretap"
106 full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap"
107}
108
109test_all()
110{
111 RET=0
112
113 slow_path_trap_install $swp1 ingress
114 slow_path_trap_install $swp1 egress
115
116 tests_run
117
118 slow_path_trap_uninstall $swp1 egress
119 slow_path_trap_uninstall $swp1 ingress
120}
121
122trap cleanup EXIT
123
124setup_prepare
125setup_wait
126
127tcflags="skip_hw"
128test_all
129
130if ! tc_offload_check; then
131 echo "WARN: Could not test offloaded functionality"
132else
133 tcflags="skip_sw"
134 test_all
135fi
136
137exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
new file mode 100644
index 000000000000..619b469365be
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -0,0 +1,130 @@
1# SPDX-License-Identifier: GPL-2.0
2
3source mirror_lib.sh
4
5quick_test_span_gre_dir_ips()
6{
7 local tundev=$1; shift
8
9 do_test_span_dir_ips 10 h3-$tundev "$@"
10}
11
12fail_test_span_gre_dir_ips()
13{
14 local tundev=$1; shift
15
16 do_test_span_dir_ips 0 h3-$tundev "$@"
17}
18
19test_span_gre_dir_ips()
20{
21 local tundev=$1; shift
22
23 test_span_dir_ips h3-$tundev "$@"
24}
25
26full_test_span_gre_dir_ips()
27{
28 local tundev=$1; shift
29 local direction=$1; shift
30 local forward_type=$1; shift
31 local backward_type=$1; shift
32 local what=$1; shift
33 local ip1=$1; shift
34 local ip2=$1; shift
35
36 RET=0
37
38 mirror_install $swp1 $direction $tundev "matchall $tcflags"
39 test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
40 "$backward_type" "$ip1" "$ip2"
41 mirror_uninstall $swp1 $direction
42
43 log_test "$direction $what ($tcflags)"
44}
45
46full_test_span_gre_dir_vlan_ips()
47{
48 local tundev=$1; shift
49 local direction=$1; shift
50 local vlan_match=$1; shift
51 local forward_type=$1; shift
52 local backward_type=$1; shift
53 local what=$1; shift
54 local ip1=$1; shift
55 local ip2=$1; shift
56
57 RET=0
58
59 mirror_install $swp1 $direction $tundev "matchall $tcflags"
60
61 test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
62 "$backward_type" "$ip1" "$ip2"
63
64 tc filter add dev $h3 ingress pref 77 prot 802.1q \
65 flower $vlan_match ip_proto 0x2f \
66 action pass
67 mirror_test v$h1 $ip1 $ip2 $h3 77 10
68 tc filter del dev $h3 ingress pref 77
69
70 mirror_uninstall $swp1 $direction
71
72 log_test "$direction $what ($tcflags)"
73}
74
75quick_test_span_gre_dir()
76{
77 quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
78}
79
80fail_test_span_gre_dir()
81{
82 fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
83}
84
85test_span_gre_dir()
86{
87 test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
88}
89
90full_test_span_gre_dir()
91{
92 full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
93}
94
95full_test_span_gre_dir_vlan()
96{
97 full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2
98}
99
100full_test_span_gre_stp_ips()
101{
102 local tundev=$1; shift
103 local nbpdev=$1; shift
104 local what=$1; shift
105 local ip1=$1; shift
106 local ip2=$1; shift
107 local h3mac=$(mac_get $h3)
108
109 RET=0
110
111 mirror_install $swp1 ingress $tundev "matchall $tcflags"
112 quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
113
114 bridge link set dev $nbpdev state disabled
115 sleep 1
116 fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
117
118 bridge link set dev $nbpdev state forwarding
119 sleep 1
120 quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
121
122 mirror_uninstall $swp1 ingress
123
124 log_test "$what: STP state ($tcflags)"
125}
126
127full_test_span_gre_stp()
128{
129 full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2
130}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
new file mode 100755
index 000000000000..fc0508e40fca
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
@@ -0,0 +1,115 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# This test uses standard topology for testing gretap. See
5# mirror_gre_topo_lib.sh for more details.
6#
7# Test for mirroring to gretap and ip6gretap, such that the neighbor entry for
8# the tunnel remote address has invalid address at the time that the mirroring
9# is set up. Later on, the neighbor is deleted and it is expected to be
10# reinitialized using the usual ARP process, and the mirroring offload updated.
11
12ALL_TESTS="
13 test_gretap
14 test_ip6gretap
15"
16
17NUM_NETIFS=6
18source lib.sh
19source mirror_lib.sh
20source mirror_gre_lib.sh
21source mirror_gre_topo_lib.sh
22
23setup_prepare()
24{
25 h1=${NETIFS[p1]}
26 swp1=${NETIFS[p2]}
27
28 swp2=${NETIFS[p3]}
29 h2=${NETIFS[p4]}
30
31 swp3=${NETIFS[p5]}
32 h3=${NETIFS[p6]}
33
34 vrf_prepare
35 mirror_gre_topo_create
36
37 ip address add dev $swp3 192.0.2.129/28
38 ip address add dev $h3 192.0.2.130/28
39
40 ip address add dev $swp3 2001:db8:2::1/64
41 ip address add dev $h3 2001:db8:2::2/64
42}
43
44cleanup()
45{
46 pre_cleanup
47
48 ip address del dev $h3 2001:db8:2::2/64
49 ip address del dev $swp3 2001:db8:2::1/64
50
51 ip address del dev $h3 192.0.2.130/28
52 ip address del dev $swp3 192.0.2.129/28
53
54 mirror_gre_topo_destroy
55 vrf_cleanup
56}
57
58test_span_gre_neigh()
59{
60 local addr=$1; shift
61 local tundev=$1; shift
62 local direction=$1; shift
63 local what=$1; shift
64
65 RET=0
66
67 ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55
68 mirror_install $swp1 $direction $tundev "matchall $tcflags"
69 fail_test_span_gre_dir $tundev ingress
70 ip neigh del dev $swp3 $addr
71 quick_test_span_gre_dir $tundev ingress
72 mirror_uninstall $swp1 $direction
73
74 log_test "$direction $what: neighbor change ($tcflags)"
75}
76
77test_gretap()
78{
79 test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap"
80 test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap"
81}
82
83test_ip6gretap()
84{
85 test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap"
86 test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap"
87}
88
89test_all()
90{
91 slow_path_trap_install $swp1 ingress
92 slow_path_trap_install $swp1 egress
93
94 tests_run
95
96 slow_path_trap_uninstall $swp1 egress
97 slow_path_trap_uninstall $swp1 ingress
98}
99
100trap cleanup EXIT
101
102setup_prepare
103setup_wait
104
105tcflags="skip_hw"
106test_all
107
108if ! tc_offload_check; then
109 echo "WARN: Could not test offloaded functionality"
110else
111 tcflags="skip_sw"
112 test_all
113fi
114
115exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
new file mode 100755
index 000000000000..8fa681eb90e7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
@@ -0,0 +1,127 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# This test uses standard topology for testing gretap. See
5# mirror_gre_topo_lib.sh for more details.
6#
7# Test that gretap and ip6gretap mirroring works when the other tunnel endpoint
8# is reachable through a next-hop route (as opposed to directly-attached route).
9
10ALL_TESTS="
11 test_gretap
12 test_ip6gretap
13"
14
15NUM_NETIFS=6
16source lib.sh
17source mirror_lib.sh
18source mirror_gre_lib.sh
19source mirror_gre_topo_lib.sh
20
21setup_prepare()
22{
23 h1=${NETIFS[p1]}
24 swp1=${NETIFS[p2]}
25
26 swp2=${NETIFS[p3]}
27 h2=${NETIFS[p4]}
28
29 swp3=${NETIFS[p5]}
30 h3=${NETIFS[p6]}
31
32 sysctl_set net.ipv4.conf.all.rp_filter 0
33 sysctl_set net.ipv4.conf.$h3.rp_filter 0
34
35 vrf_prepare
36 mirror_gre_topo_create
37
38 ip address add dev $swp3 192.0.2.161/28
39 ip address add dev $h3 192.0.2.162/28
40 ip address add dev gt4 192.0.2.129/32
41 ip address add dev h3-gt4 192.0.2.130/32
42
43 # IPv6 route can't be added after address. Such routes are rejected due
44 # to the gateway address having been configured on the local system. It
45 # works the other way around though.
46 ip address add dev $swp3 2001:db8:4::1/64
47 ip -6 route add 2001:db8:2::2/128 via 2001:db8:4::2
48 ip address add dev $h3 2001:db8:4::2/64
49 ip address add dev gt6 2001:db8:2::1
50 ip address add dev h3-gt6 2001:db8:2::2
51}
52
53cleanup()
54{
55 pre_cleanup
56
57 ip -6 route del 2001:db8:2::2/128 via 2001:db8:4::2
58 ip address del dev $h3 2001:db8:4::2/64
59 ip address del dev $swp3 2001:db8:4::1/64
60
61 ip address del dev $h3 192.0.2.162/28
62 ip address del dev $swp3 192.0.2.161/28
63
64 mirror_gre_topo_destroy
65 vrf_cleanup
66
67 sysctl_restore net.ipv4.conf.$h3.rp_filter
68 sysctl_restore net.ipv4.conf.all.rp_filter
69}
70
71test_gretap()
72{
73 RET=0
74 mirror_install $swp1 ingress gt4 "matchall $tcflags"
75
76 # For IPv4, test that there's no mirroring without the route directing
77 # the traffic to tunnel remote address. Then add it and test that
78 # mirroring starts. For IPv6 we can't test this due to the limitation
79 # that routes for locally-specified IPv6 addresses can't be added.
80 fail_test_span_gre_dir gt4 ingress
81
82 ip route add 192.0.2.130/32 via 192.0.2.162
83 quick_test_span_gre_dir gt4 ingress
84 ip route del 192.0.2.130/32 via 192.0.2.162
85
86 mirror_uninstall $swp1 ingress
87 log_test "mirror to gre with next-hop remote ($tcflags)"
88}
89
90test_ip6gretap()
91{
92 RET=0
93
94 mirror_install $swp1 ingress gt6 "matchall $tcflags"
95 quick_test_span_gre_dir gt6 ingress
96 mirror_uninstall $swp1 ingress
97
98 log_test "mirror to ip6gre with next-hop remote ($tcflags)"
99}
100
101test_all()
102{
103 slow_path_trap_install $swp1 ingress
104 slow_path_trap_install $swp1 egress
105
106 tests_run
107
108 slow_path_trap_uninstall $swp1 egress
109 slow_path_trap_uninstall $swp1 ingress
110}
111
112trap cleanup EXIT
113
114setup_prepare
115setup_wait
116
117tcflags="skip_hw"
118test_all
119
120if ! tc_offload_check; then
121 echo "WARN: Could not test offloaded functionality"
122else
123 tcflags="skip_sw"
124 test_all
125fi
126
127exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
new file mode 100644
index 000000000000..253419564708
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -0,0 +1,94 @@
1# SPDX-License-Identifier: GPL-2.0
2
3# This is the standard topology for testing mirroring to gretap and ip6gretap
4# netdevices. The tests that use it tweak it in one way or another--importantly,
5# $swp3 and $h3 need to have addresses set up.
6#
7# +---------------------+ +---------------------+
8# | H1 | | H2 |
9# | + $h1 | | $h2 + |
10# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
11# +-----|---------------+ +---------------|-----+
12# | |
13# +-----|-------------------------------------------------------------|-----+
14# | SW o--> mirror | |
15# | +---|-------------------------------------------------------------|---+ |
16# | | + $swp1 BR $swp2 + | |
17# | +---------------------------------------------------------------------+ |
18# | |
19# | + $swp3 + gt6 (ip6gretap) + gt4 (gretap) |
20# | | : loc=2001:db8:2::1 : loc=192.0.2.129 |
21# | | : rem=2001:db8:2::2 : rem=192.0.2.130 |
22# | | : ttl=100 : ttl=100 |
23# | | : tos=inherit : tos=inherit |
24# | | : : |
25# +-----|---------------------:----------------------:----------------------+
26# | : :
27# +-----|---------------------:----------------------:----------------------+
28# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) |
29# | loc=2001:db8:2::2 loc=192.0.2.130 |
30# | rem=2001:db8:2::1 rem=192.0.2.129 |
31# | ttl=100 ttl=100 |
32# | tos=inherit tos=inherit |
33# | |
34# +-------------------------------------------------------------------------+
35
36source mirror_topo_lib.sh
37
38mirror_gre_topo_h3_create()
39{
40 mirror_topo_h3_create
41
42 tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
43 ip link set h3-gt4 vrf v$h3
44 matchall_sink_create h3-gt4
45
46 tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
47 ip link set h3-gt6 vrf v$h3
48 matchall_sink_create h3-gt6
49}
50
51mirror_gre_topo_h3_destroy()
52{
53 tunnel_destroy h3-gt6
54 tunnel_destroy h3-gt4
55
56 mirror_topo_h3_destroy
57}
58
59mirror_gre_topo_switch_create()
60{
61 mirror_topo_switch_create
62
63 tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
64 ttl 100 tos inherit
65
66 tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
67 ttl 100 tos inherit allow-localremote
68}
69
70mirror_gre_topo_switch_destroy()
71{
72 tunnel_destroy gt6
73 tunnel_destroy gt4
74
75 mirror_topo_switch_destroy
76}
77
78mirror_gre_topo_create()
79{
80 mirror_topo_h1_create
81 mirror_topo_h2_create
82 mirror_gre_topo_h3_create
83
84 mirror_gre_topo_switch_create
85}
86
87mirror_gre_topo_destroy()
88{
89 mirror_gre_topo_switch_destroy
90
91 mirror_gre_topo_h3_destroy
92 mirror_topo_h2_destroy
93 mirror_topo_h1_destroy
94}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
new file mode 100755
index 000000000000..88cecdb9a861
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
@@ -0,0 +1,92 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# This test uses standard topology for testing gretap. See
5# mirror_gre_topo_lib.sh for more details.
6#
7# Test for "tc action mirred egress mirror" that mirrors to a gretap netdevice
8# whose underlay route points at a vlan device.
9
10ALL_TESTS="
11 test_gretap
12"
13
14NUM_NETIFS=6
15source lib.sh
16source mirror_lib.sh
17source mirror_gre_lib.sh
18source mirror_gre_topo_lib.sh
19
20setup_prepare()
21{
22 h1=${NETIFS[p1]}
23 swp1=${NETIFS[p2]}
24
25 swp2=${NETIFS[p3]}
26 h2=${NETIFS[p4]}
27
28 swp3=${NETIFS[p5]}
29 h3=${NETIFS[p6]}
30
31 vrf_prepare
32 mirror_gre_topo_create
33
34 ip link add name $swp3.555 link $swp3 type vlan id 555
35 ip address add dev $swp3.555 192.0.2.129/32
36 ip address add dev $swp3.555 2001:db8:2::1/128
37 ip link set dev $swp3.555 up
38
39 ip route add 192.0.2.130/32 dev $swp3.555
40 ip -6 route add 2001:db8:2::2/128 dev $swp3.555
41
42 ip link add name $h3.555 link $h3 type vlan id 555
43 ip link set dev $h3.555 master v$h3
44 ip address add dev $h3.555 192.0.2.130/28
45 ip address add dev $h3.555 2001:db8:2::2/64
46 ip link set dev $h3.555 up
47}
48
49cleanup()
50{
51 pre_cleanup
52
53 ip link del dev $h3.555
54 ip link del dev $swp3.555
55
56 mirror_gre_topo_destroy
57 vrf_cleanup
58}
59
60test_gretap()
61{
62 full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
63 full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
64}
65
66test_all()
67{
68 slow_path_trap_install $swp1 ingress
69 slow_path_trap_install $swp1 egress
70
71 tests_run
72
73 slow_path_trap_uninstall $swp1 egress
74 slow_path_trap_uninstall $swp1 ingress
75}
76
77trap cleanup EXIT
78
79setup_prepare
80setup_wait
81
82tcflags="skip_hw"
83test_all
84
85if ! tc_offload_check; then
86 echo "WARN: Could not test offloaded functionality"
87else
88 tcflags="skip_sw"
89 test_all
90fi
91
92exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
new file mode 100755
index 000000000000..5dbc7a08f4bd
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -0,0 +1,270 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# This test uses standard topology for testing gretap. See
5# mirror_gre_topo_lib.sh for more details.
6#
7# Test for "tc action mirred egress mirror" when the underlay route points at a
8# vlan device on top of a bridge device with vlan filtering (802.1q).
9
10ALL_TESTS="
11 test_gretap
12 test_ip6gretap
13 test_gretap_forbidden_cpu
14 test_ip6gretap_forbidden_cpu
15 test_gretap_forbidden_egress
16 test_ip6gretap_forbidden_egress
17 test_gretap_untagged_egress
18 test_ip6gretap_untagged_egress
19 test_gretap_fdb_roaming
20 test_ip6gretap_fdb_roaming
21 test_gretap_stp
22 test_ip6gretap_stp
23"
24
25NUM_NETIFS=6
26source lib.sh
27source mirror_lib.sh
28source mirror_gre_lib.sh
29source mirror_gre_topo_lib.sh
30
31setup_prepare()
32{
33 h1=${NETIFS[p1]}
34 swp1=${NETIFS[p2]}
35
36 swp2=${NETIFS[p3]}
37 h2=${NETIFS[p4]}
38
39 swp3=${NETIFS[p5]}
40 h3=${NETIFS[p6]}
41
42 vrf_prepare
43 mirror_gre_topo_create
44
45 vlan_create br1 555 "" 192.0.2.129/32 2001:db8:2::1/128
46 bridge vlan add dev br1 vid 555 self
47 ip route rep 192.0.2.130/32 dev br1.555
48 ip -6 route rep 2001:db8:2::2/128 dev br1.555
49
50 vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
51
52 ip link set dev $swp3 master br1
53 bridge vlan add dev $swp3 vid 555
54 bridge vlan add dev $swp2 vid 555
55}
56
57cleanup()
58{
59 pre_cleanup
60
61 ip link set dev $swp2 nomaster
62 ip link set dev $swp3 nomaster
63 vlan_destroy $h3 555
64 vlan_destroy br1 555
65
66 mirror_gre_topo_destroy
67 vrf_cleanup
68}
69
70test_vlan_match()
71{
72 local tundev=$1; shift
73 local vlan_match=$1; shift
74 local what=$1; shift
75
76 full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
77 full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
78}
79
80test_gretap()
81{
82 test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap"
83}
84
85test_ip6gretap()
86{
87 test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap"
88}
89
90test_span_gre_forbidden_cpu()
91{
92 local tundev=$1; shift
93 local what=$1; shift
94
95 RET=0
96
97 # Run the pass-test first, to prime neighbor table.
98 mirror_install $swp1 ingress $tundev "matchall $tcflags"
99 quick_test_span_gre_dir $tundev ingress
100
101 # Now forbid the VLAN at the bridge and see it fail.
102 bridge vlan del dev br1 vid 555 self
103 sleep 1
104 fail_test_span_gre_dir $tundev ingress
105
106 bridge vlan add dev br1 vid 555 self
107 sleep 1
108 quick_test_span_gre_dir $tundev ingress
109
110 mirror_uninstall $swp1 ingress
111
112 log_test "$what: vlan forbidden at a bridge ($tcflags)"
113}
114
115test_gretap_forbidden_cpu()
116{
117 test_span_gre_forbidden_cpu gt4 "mirror to gretap"
118}
119
120test_ip6gretap_forbidden_cpu()
121{
122 test_span_gre_forbidden_cpu gt6 "mirror to ip6gretap"
123}
124
125test_span_gre_forbidden_egress()
126{
127 local tundev=$1; shift
128 local what=$1; shift
129
130 RET=0
131
132 mirror_install $swp1 ingress $tundev "matchall $tcflags"
133 quick_test_span_gre_dir $tundev ingress
134
135 bridge vlan del dev $swp3 vid 555
136 sleep 1
137 fail_test_span_gre_dir $tundev ingress
138
139 bridge vlan add dev $swp3 vid 555
140 # Re-prime FDB
141 arping -I br1.555 192.0.2.130 -fqc 1
142 sleep 1
143 quick_test_span_gre_dir $tundev ingress
144
145 mirror_uninstall $swp1 ingress
146
147 log_test "$what: vlan forbidden at a bridge egress ($tcflags)"
148}
149
150test_gretap_forbidden_egress()
151{
152 test_span_gre_forbidden_egress gt4 "mirror to gretap"
153}
154
155test_ip6gretap_forbidden_egress()
156{
157 test_span_gre_forbidden_egress gt6 "mirror to ip6gretap"
158}
159
160test_span_gre_untagged_egress()
161{
162 local tundev=$1; shift
163 local what=$1; shift
164
165 RET=0
166
167 mirror_install $swp1 ingress $tundev "matchall $tcflags"
168
169 quick_test_span_gre_dir $tundev ingress
170 quick_test_span_vlan_dir $h3 555 ingress
171
172 bridge vlan add dev $swp3 vid 555 pvid untagged
173 sleep 1
174 quick_test_span_gre_dir $tundev ingress
175 fail_test_span_vlan_dir $h3 555 ingress
176
177 bridge vlan add dev $swp3 vid 555
178 sleep 1
179 quick_test_span_gre_dir $tundev ingress
180 quick_test_span_vlan_dir $h3 555 ingress
181
182 mirror_uninstall $swp1 ingress
183
184 log_test "$what: vlan untagged at a bridge egress ($tcflags)"
185}
186
187test_gretap_untagged_egress()
188{
189 test_span_gre_untagged_egress gt4 "mirror to gretap"
190}
191
192test_ip6gretap_untagged_egress()
193{
194 test_span_gre_untagged_egress gt6 "mirror to ip6gretap"
195}
196
197test_span_gre_fdb_roaming()
198{
199 local tundev=$1; shift
200 local what=$1; shift
201 local h3mac=$(mac_get $h3)
202
203 RET=0
204
205 mirror_install $swp1 ingress $tundev "matchall $tcflags"
206 quick_test_span_gre_dir $tundev ingress
207
208 bridge fdb del dev $swp3 $h3mac vlan 555 master
209 bridge fdb add dev $swp2 $h3mac vlan 555 master
210 sleep 1
211 fail_test_span_gre_dir $tundev ingress
212
213 bridge fdb del dev $swp2 $h3mac vlan 555 master
214 # Re-prime FDB
215 arping -I br1.555 192.0.2.130 -fqc 1
216 sleep 1
217 quick_test_span_gre_dir $tundev ingress
218
219 mirror_uninstall $swp1 ingress
220
221 log_test "$what: MAC roaming ($tcflags)"
222}
223
224test_gretap_fdb_roaming()
225{
226 test_span_gre_fdb_roaming gt4 "mirror to gretap"
227}
228
229test_ip6gretap_fdb_roaming()
230{
231 test_span_gre_fdb_roaming gt6 "mirror to ip6gretap"
232}
233
234test_gretap_stp()
235{
236 full_test_span_gre_stp gt4 $swp3 "mirror to gretap"
237}
238
239test_ip6gretap_stp()
240{
241 full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap"
242}
243
244test_all()
245{
246 slow_path_trap_install $swp1 ingress
247 slow_path_trap_install $swp1 egress
248
249 tests_run
250
251 slow_path_trap_uninstall $swp1 egress
252 slow_path_trap_uninstall $swp1 ingress
253}
254
255trap cleanup EXIT
256
257setup_prepare
258setup_wait
259
260tcflags="skip_hw"
261test_all
262
263if ! tc_offload_check; then
264 echo "WARN: Could not test offloaded functionality"
265else
266 tcflags="skip_sw"
267 test_all
268fi
269
270exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
new file mode 100644
index 000000000000..d36dc26c6c51
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -0,0 +1,132 @@
1# SPDX-License-Identifier: GPL-2.0
2
3mirror_install()
4{
5 local from_dev=$1; shift
6 local direction=$1; shift
7 local to_dev=$1; shift
8 local filter=$1; shift
9
10 tc filter add dev $from_dev $direction \
11 pref 1000 $filter \
12 action mirred egress mirror dev $to_dev
13}
14
15mirror_uninstall()
16{
17 local from_dev=$1; shift
18 local direction=$1; shift
19
20 tc filter del dev $swp1 $direction pref 1000
21}
22
23mirror_test()
24{
25 local vrf_name=$1; shift
26 local sip=$1; shift
27 local dip=$1; shift
28 local dev=$1; shift
29 local pref=$1; shift
30 local expect=$1; shift
31
32 local t0=$(tc_rule_stats_get $dev $pref)
33 ip vrf exec $vrf_name \
34 ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.1 -w 2 &> /dev/null
35 local t1=$(tc_rule_stats_get $dev $pref)
36 local delta=$((t1 - t0))
37 # Tolerate a couple stray extra packets.
38 ((expect <= delta && delta <= expect + 2))
39 check_err $? "Expected to capture $expect packets, got $delta."
40}
41
42do_test_span_dir_ips()
43{
44 local expect=$1; shift
45 local dev=$1; shift
46 local direction=$1; shift
47 local ip1=$1; shift
48 local ip2=$1; shift
49
50 icmp_capture_install $dev
51 mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
52 mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
53 icmp_capture_uninstall $dev
54}
55
56quick_test_span_dir_ips()
57{
58 do_test_span_dir_ips 10 "$@"
59}
60
61fail_test_span_dir_ips()
62{
63 do_test_span_dir_ips 0 "$@"
64}
65
66test_span_dir_ips()
67{
68 local dev=$1; shift
69 local direction=$1; shift
70 local forward_type=$1; shift
71 local backward_type=$1; shift
72 local ip1=$1; shift
73 local ip2=$1; shift
74
75 quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2"
76
77 icmp_capture_install $dev "type $forward_type"
78 mirror_test v$h1 $ip1 $ip2 $dev 100 10
79 icmp_capture_uninstall $dev
80
81 icmp_capture_install $dev "type $backward_type"
82 mirror_test v$h2 $ip2 $ip1 $dev 100 10
83 icmp_capture_uninstall $dev
84}
85
86fail_test_span_dir()
87{
88 fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
89}
90
91test_span_dir()
92{
93 test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
94}
95
96do_test_span_vlan_dir_ips()
97{
98 local expect=$1; shift
99 local dev=$1; shift
100 local vid=$1; shift
101 local direction=$1; shift
102 local ip1=$1; shift
103 local ip2=$1; shift
104
105 # Install the capture as skip_hw to avoid double-counting of packets.
106 # The traffic is meant for local box anyway, so will be trapped to
107 # kernel.
108 vlan_capture_install $dev "skip_hw vlan_id $vid"
109 mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
110 mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
111 vlan_capture_uninstall $dev
112}
113
114quick_test_span_vlan_dir_ips()
115{
116 do_test_span_vlan_dir_ips 10 "$@"
117}
118
119fail_test_span_vlan_dir_ips()
120{
121 do_test_span_vlan_dir_ips 0 "$@"
122}
123
124quick_test_span_vlan_dir()
125{
126 quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
127}
128
129fail_test_span_vlan_dir()
130{
131 fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
132}
diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
new file mode 100644
index 000000000000..04979e5962e7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
@@ -0,0 +1,101 @@
1# SPDX-License-Identifier: GPL-2.0
2
3# This is the standard topology for testing mirroring. The tests that use it
4# tweak it in one way or another--typically add more devices to the topology.
5#
6# +---------------------+ +---------------------+
7# | H1 | | H2 |
8# | + $h1 | | $h2 + |
9# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
10# +-----|---------------+ +---------------|-----+
11# | |
12# +-----|-------------------------------------------------------------|-----+
13# | SW o--> mirror | |
14# | +---|-------------------------------------------------------------|---+ |
15# | | + $swp1 BR $swp2 + | |
16# | +---------------------------------------------------------------------+ |
17# | |
18# | + $swp3 |
19# +-----|-------------------------------------------------------------------+
20# |
21# +-----|-------------------------------------------------------------------+
22# | H3 + $h3 |
23# | |
24# +-------------------------------------------------------------------------+
25
26mirror_topo_h1_create()
27{
28 simple_if_init $h1 192.0.2.1/28
29}
30
31mirror_topo_h1_destroy()
32{
33 simple_if_fini $h1 192.0.2.1/28
34}
35
36mirror_topo_h2_create()
37{
38 simple_if_init $h2 192.0.2.2/28
39}
40
41mirror_topo_h2_destroy()
42{
43 simple_if_fini $h2 192.0.2.2/28
44}
45
46mirror_topo_h3_create()
47{
48 simple_if_init $h3
49 tc qdisc add dev $h3 clsact
50}
51
52mirror_topo_h3_destroy()
53{
54 tc qdisc del dev $h3 clsact
55 simple_if_fini $h3
56}
57
58mirror_topo_switch_create()
59{
60 ip link set dev $swp3 up
61
62 ip link add name br1 type bridge vlan_filtering 1
63 ip link set dev br1 up
64
65 ip link set dev $swp1 master br1
66 ip link set dev $swp1 up
67
68 ip link set dev $swp2 master br1
69 ip link set dev $swp2 up
70
71 tc qdisc add dev $swp1 clsact
72}
73
74mirror_topo_switch_destroy()
75{
76 tc qdisc del dev $swp1 clsact
77
78 ip link set dev $swp1 down
79 ip link set dev $swp2 down
80 ip link del dev br1
81
82 ip link set dev $swp3 down
83}
84
85mirror_topo_create()
86{
87 mirror_topo_h1_create
88 mirror_topo_h2_create
89 mirror_topo_h3_create
90
91 mirror_topo_switch_create
92}
93
94mirror_topo_destroy()
95{
96 mirror_topo_switch_destroy
97
98 mirror_topo_h3_destroy
99 mirror_topo_h2_destroy
100 mirror_topo_h1_destroy
101}
diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
new file mode 100755
index 000000000000..9ab2ce77b332
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
@@ -0,0 +1,131 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4# This test uses standard topology for testing mirroring. See mirror_topo_lib.sh
5# for more details.
6#
7# Test for "tc action mirred egress mirror" that mirrors to a vlan device.
8
9ALL_TESTS="
10 test_vlan
11 test_tagged_vlan
12"
13
14NUM_NETIFS=6
15source lib.sh
16source mirror_lib.sh
17source mirror_topo_lib.sh
18
19setup_prepare()
20{
21 h1=${NETIFS[p1]}
22 swp1=${NETIFS[p2]}
23
24 swp2=${NETIFS[p3]}
25 h2=${NETIFS[p4]}
26
27 swp3=${NETIFS[p5]}
28 h3=${NETIFS[p6]}
29
30 vrf_prepare
31 mirror_topo_create
32
33 vlan_create $swp3 555
34
35 vlan_create $h3 555 v$h3
36 matchall_sink_create $h3.555
37
38 vlan_create $h1 111 v$h1 192.0.2.17/28
39 bridge vlan add dev $swp1 vid 111
40
41 vlan_create $h2 111 v$h2 192.0.2.18/28
42 bridge vlan add dev $swp2 vid 111
43}
44
45cleanup()
46{
47 pre_cleanup
48
49 vlan_destroy $h2 111
50 vlan_destroy $h1 111
51 vlan_destroy $h3 555
52 vlan_destroy $swp3 555
53
54 mirror_topo_destroy
55 vrf_cleanup
56}
57
58test_vlan_dir()
59{
60 local direction=$1; shift
61 local forward_type=$1; shift
62 local backward_type=$1; shift
63
64 RET=0
65
66 mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
67 test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type"
68 mirror_uninstall $swp1 $direction
69
70 log_test "$direction mirror to vlan ($tcflags)"
71}
72
73test_vlan()
74{
75 test_vlan_dir ingress 8 0
76 test_vlan_dir egress 0 8
77}
78
79test_tagged_vlan_dir()
80{
81 local direction=$1; shift
82 local forward_type=$1; shift
83 local backward_type=$1; shift
84
85 RET=0
86
87 mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
88 do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \
89 192.0.2.17 192.0.2.18
90 do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" \
91 192.0.2.17 192.0.2.18
92 mirror_uninstall $swp1 $direction
93
94 log_test "$direction mirror tagged to vlan ($tcflags)"
95}
96
97test_tagged_vlan()
98{
99 test_tagged_vlan_dir ingress 8 0
100 test_tagged_vlan_dir egress 0 8
101}
102
103test_all()
104{
105 slow_path_trap_install $swp1 ingress
106 slow_path_trap_install $swp1 egress
107 trap_install $h3 ingress
108
109 tests_run
110
111 trap_uninstall $h3 ingress
112 slow_path_trap_uninstall $swp1 egress
113 slow_path_trap_uninstall $swp1 ingress
114}
115
116trap cleanup EXIT
117
118setup_prepare
119setup_wait
120
121tcflags="skip_hw"
122test_all
123
124if ! tc_offload_check; then
125 echo "WARN: Could not test offloaded functionality"
126else
127 tcflags="skip_sw"
128 test_all
129fi
130
131exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index cc6a14abfa87..a75cb51cc5bd 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -1,6 +1,7 @@
1#!/bin/bash 1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3 3
4ALL_TESTS="ping_ipv4 ping_ipv6"
4NUM_NETIFS=4 5NUM_NETIFS=4
5source lib.sh 6source lib.sh
6 7
@@ -114,12 +115,21 @@ cleanup()
114 vrf_cleanup 115 vrf_cleanup
115} 116}
116 117
118ping_ipv4()
119{
120 ping_test $h1 198.51.100.2
121}
122
123ping_ipv6()
124{
125 ping6_test $h1 2001:db8:2::2
126}
127
117trap cleanup EXIT 128trap cleanup EXIT
118 129
119setup_prepare 130setup_prepare
120setup_wait 131setup_wait
121 132
122ping_test $h1 198.51.100.2 133tests_run
123ping6_test $h1 2001:db8:2::2
124 134
125exit $EXIT_STATUS 135exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 3bc351008db6..8b6d0fb6d604 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -1,6 +1,7 @@
1#!/bin/bash 1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3 3
4ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
4NUM_NETIFS=8 5NUM_NETIFS=8
5source lib.sh 6source lib.sh
6 7
@@ -191,7 +192,7 @@ multipath_eval()
191 diff=$(echo $weights_ratio - $packets_ratio | bc -l) 192 diff=$(echo $weights_ratio - $packets_ratio | bc -l)
192 diff=${diff#-} 193 diff=${diff#-}
193 194
194 test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0 195 test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
195 check_err $? "Too large discrepancy between expected and measured ratios" 196 check_err $? "Too large discrepancy between expected and measured ratios"
196 log_test "$desc" 197 log_test "$desc"
197 log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio" 198 log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
@@ -204,13 +205,11 @@ multipath4_test()
204 local weight_rp13=$3 205 local weight_rp13=$3
205 local t0_rp12 t0_rp13 t1_rp12 t1_rp13 206 local t0_rp12 t0_rp13 t1_rp12 t1_rp13
206 local packets_rp12 packets_rp13 207 local packets_rp12 packets_rp13
207 local hash_policy
208 208
209 # Transmit multiple flows from h1 to h2 and make sure they are 209 # Transmit multiple flows from h1 to h2 and make sure they are
210 # distributed between both multipath links (rp12 and rp13) 210 # distributed between both multipath links (rp12 and rp13)
211 # according to the configured weights. 211 # according to the configured weights.
212 hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy) 212 sysctl_set net.ipv4.fib_multipath_hash_policy 1
213 sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
214 ip route replace 198.51.100.0/24 vrf vrf-r1 \ 213 ip route replace 198.51.100.0/24 vrf vrf-r1 \
215 nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \ 214 nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
216 nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13 215 nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
@@ -232,7 +231,7 @@ multipath4_test()
232 ip route replace 198.51.100.0/24 vrf vrf-r1 \ 231 ip route replace 198.51.100.0/24 vrf vrf-r1 \
233 nexthop via 169.254.2.22 dev $rp12 \ 232 nexthop via 169.254.2.22 dev $rp12 \
234 nexthop via 169.254.3.23 dev $rp13 233 nexthop via 169.254.3.23 dev $rp13
235 sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy 234 sysctl_restore net.ipv4.fib_multipath_hash_policy
236} 235}
237 236
238multipath6_l4_test() 237multipath6_l4_test()
@@ -242,13 +241,11 @@ multipath6_l4_test()
242 local weight_rp13=$3 241 local weight_rp13=$3
243 local t0_rp12 t0_rp13 t1_rp12 t1_rp13 242 local t0_rp12 t0_rp13 t1_rp12 t1_rp13
244 local packets_rp12 packets_rp13 243 local packets_rp12 packets_rp13
245 local hash_policy
246 244
247 # Transmit multiple flows from h1 to h2 and make sure they are 245 # Transmit multiple flows from h1 to h2 and make sure they are
248 # distributed between both multipath links (rp12 and rp13) 246 # distributed between both multipath links (rp12 and rp13)
249 # according to the configured weights. 247 # according to the configured weights.
250 hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy) 248 sysctl_set net.ipv6.fib_multipath_hash_policy 1
251 sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
252 249
253 ip route replace 2001:db8:2::/64 vrf vrf-r1 \ 250 ip route replace 2001:db8:2::/64 vrf vrf-r1 \
254 nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \ 251 nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
@@ -271,7 +268,7 @@ multipath6_l4_test()
271 nexthop via fe80:2::22 dev $rp12 \ 268 nexthop via fe80:2::22 dev $rp12 \
272 nexthop via fe80:3::23 dev $rp13 269 nexthop via fe80:3::23 dev $rp13
273 270
274 sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy 271 sysctl_restore net.ipv6.fib_multipath_hash_policy
275} 272}
276 273
277multipath6_test() 274multipath6_test()
@@ -364,13 +361,21 @@ cleanup()
364 vrf_cleanup 361 vrf_cleanup
365} 362}
366 363
364ping_ipv4()
365{
366 ping_test $h1 198.51.100.2
367}
368
369ping_ipv6()
370{
371 ping6_test $h1 2001:db8:2::2
372}
373
367trap cleanup EXIT 374trap cleanup EXIT
368 375
369setup_prepare 376setup_prepare
370setup_wait 377setup_wait
371 378
372ping_test $h1 198.51.100.2 379tests_run
373ping6_test $h1 2001:db8:2::2
374multipath_test
375 380
376exit $EXIT_STATUS 381exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 3a6385ebd5d0..813d02d1939d 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -1,6 +1,8 @@
1#!/bin/bash 1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3 3
4ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
5 mirred_egress_mirror_test gact_trap_test"
4NUM_NETIFS=4 6NUM_NETIFS=4
5source tc_common.sh 7source tc_common.sh
6source lib.sh 8source lib.sh
@@ -111,6 +113,10 @@ gact_trap_test()
111{ 113{
112 RET=0 114 RET=0
113 115
116 if [[ "$tcflags" != "skip_sw" ]]; then
117 return 0;
118 fi
119
114 tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ 120 tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
115 skip_hw dst_ip 192.0.2.2 action drop 121 skip_hw dst_ip 192.0.2.2 action drop
116 tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \ 122 tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
@@ -179,24 +185,29 @@ cleanup()
179 ip link set $swp1 address $swp1origmac 185 ip link set $swp1 address $swp1origmac
180} 186}
181 187
188mirred_egress_redirect_test()
189{
190 mirred_egress_test "redirect"
191}
192
193mirred_egress_mirror_test()
194{
195 mirred_egress_test "mirror"
196}
197
182trap cleanup EXIT 198trap cleanup EXIT
183 199
184setup_prepare 200setup_prepare
185setup_wait 201setup_wait
186 202
187gact_drop_and_ok_test 203tests_run
188mirred_egress_test "redirect"
189mirred_egress_test "mirror"
190 204
191tc_offload_check 205tc_offload_check
192if [[ $? -ne 0 ]]; then 206if [[ $? -ne 0 ]]; then
193 log_info "Could not test offloaded functionality" 207 log_info "Could not test offloaded functionality"
194else 208else
195 tcflags="skip_sw" 209 tcflags="skip_sw"
196 gact_drop_and_ok_test 210 tests_run
197 mirred_egress_test "redirect"
198 mirred_egress_test "mirror"
199 gact_trap_test
200fi 211fi
201 212
202exit $EXIT_STATUS 213exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
index 2fd15226974b..d2c783e94df3 100755
--- a/tools/testing/selftests/net/forwarding/tc_chains.sh
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -1,6 +1,7 @@
1#!/bin/bash 1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3 3
4ALL_TESTS="unreachable_chain_test gact_goto_chain_test"
4NUM_NETIFS=2 5NUM_NETIFS=2
5source tc_common.sh 6source tc_common.sh
6source lib.sh 7source lib.sh
@@ -107,16 +108,14 @@ trap cleanup EXIT
107setup_prepare 108setup_prepare
108setup_wait 109setup_wait
109 110
110unreachable_chain_test 111tests_run
111gact_goto_chain_test
112 112
113tc_offload_check 113tc_offload_check
114if [[ $? -ne 0 ]]; then 114if [[ $? -ne 0 ]]; then
115 log_info "Could not test offloaded functionality" 115 log_info "Could not test offloaded functionality"
116else 116else
117 tcflags="skip_sw" 117 tcflags="skip_sw"
118 unreachable_chain_test 118 tests_run
119 gact_goto_chain_test
120fi 119fi
121 120
122exit $EXIT_STATUS 121exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 032b882adfc0..20d1077e5a3d 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -1,6 +1,8 @@
1#!/bin/bash 1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3 3
4ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
5 match_src_ip_test match_ip_flags_test"
4NUM_NETIFS=2 6NUM_NETIFS=2
5source tc_common.sh 7source tc_common.sh
6source lib.sh 8source lib.sh
@@ -149,6 +151,74 @@ match_src_ip_test()
149 log_test "src_ip match ($tcflags)" 151 log_test "src_ip match ($tcflags)"
150} 152}
151 153
154match_ip_flags_test()
155{
156 RET=0
157
158 tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
159 $tcflags ip_flags frag action continue
160 tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
161 $tcflags ip_flags firstfrag action continue
162 tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
163 $tcflags ip_flags nofirstfrag action continue
164 tc filter add dev $h2 ingress protocol ip pref 4 handle 104 flower \
165 $tcflags ip_flags nofrag action drop
166
167 $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
168 -t ip "frag=0" -q
169
170 tc_check_packets "dev $h2 ingress" 101 1
171 check_fail $? "Matched on wrong frag filter (nofrag)"
172
173 tc_check_packets "dev $h2 ingress" 102 1
174 check_fail $? "Matched on wrong firstfrag filter (nofrag)"
175
176 tc_check_packets "dev $h2 ingress" 103 1
177 check_err $? "Did not match on nofirstfrag filter (nofrag) "
178
179 tc_check_packets "dev $h2 ingress" 104 1
180 check_err $? "Did not match on nofrag filter (nofrag)"
181
182 $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
183 -t ip "frag=0,mf" -q
184
185 tc_check_packets "dev $h2 ingress" 101 1
186 check_err $? "Did not match on frag filter (1stfrag)"
187
188 tc_check_packets "dev $h2 ingress" 102 1
189 check_err $? "Did not match fistfrag filter (1stfrag)"
190
191 tc_check_packets "dev $h2 ingress" 103 1
192 check_err $? "Matched on wrong nofirstfrag filter (1stfrag)"
193
194 tc_check_packets "dev $h2 ingress" 104 1
195 check_err $? "Match on wrong nofrag filter (1stfrag)"
196
197 $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
198 -t ip "frag=256,mf" -q
199 $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
200 -t ip "frag=256" -q
201
202 tc_check_packets "dev $h2 ingress" 101 3
203 check_err $? "Did not match on frag filter (no1stfrag)"
204
205 tc_check_packets "dev $h2 ingress" 102 1
206 check_err $? "Matched on wrong firstfrag filter (no1stfrag)"
207
208 tc_check_packets "dev $h2 ingress" 103 3
209 check_err $? "Did not match on nofirstfrag filter (no1stfrag)"
210
211 tc_check_packets "dev $h2 ingress" 104 1
212 check_err $? "Matched on nofrag filter (no1stfrag)"
213
214 tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
215 tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
216 tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
217 tc filter del dev $h2 ingress protocol ip pref 4 handle 104 flower
218
219 log_test "ip_flags match ($tcflags)"
220}
221
152setup_prepare() 222setup_prepare()
153{ 223{
154 h1=${NETIFS[p1]} 224 h1=${NETIFS[p1]}
@@ -177,20 +247,14 @@ trap cleanup EXIT
177setup_prepare 247setup_prepare
178setup_wait 248setup_wait
179 249
180match_dst_mac_test 250tests_run
181match_src_mac_test
182match_dst_ip_test
183match_src_ip_test
184 251
185tc_offload_check 252tc_offload_check
186if [[ $? -ne 0 ]]; then 253if [[ $? -ne 0 ]]; then
187 log_info "Could not test offloaded functionality" 254 log_info "Could not test offloaded functionality"
188else 255else
189 tcflags="skip_sw" 256 tcflags="skip_sw"
190 match_dst_mac_test 257 tests_run
191 match_src_mac_test
192 match_dst_ip_test
193 match_src_ip_test
194fi 258fi
195 259
196exit $EXIT_STATUS 260exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
index 077b98048ef4..b5b917203815 100755
--- a/tools/testing/selftests/net/forwarding/tc_shblocks.sh
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -1,6 +1,7 @@
1#!/bin/bash 1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3 3
4ALL_TESTS="shared_block_test"
4NUM_NETIFS=4 5NUM_NETIFS=4
5source tc_common.sh 6source tc_common.sh
6source lib.sh 7source lib.sh
@@ -109,14 +110,14 @@ trap cleanup EXIT
109setup_prepare 110setup_prepare
110setup_wait 111setup_wait
111 112
112shared_block_test 113tests_run
113 114
114tc_offload_check 115tc_offload_check
115if [[ $? -ne 0 ]]; then 116if [[ $? -ne 0 ]]; then
116 log_info "Could not test offloaded functionality" 117 log_info "Could not test offloaded functionality"
117else 118else
118 tcflags="skip_sw" 119 tcflags="skip_sw"
119 shared_block_test 120 tests_run
120fi 121fi
121 122
122exit $EXIT_STATUS 123exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index d571d213418d..c43c6debda06 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -21,6 +21,14 @@ readonly DADDR6='fd::2'
21 21
22readonly path_sysctl_mem="net.core.optmem_max" 22readonly path_sysctl_mem="net.core.optmem_max"
23 23
24# No arguments: automated test
25if [[ "$#" -eq "0" ]]; then
26 $0 4 tcp -t 1
27 $0 6 tcp -t 1
28 echo "OK. All tests passed"
29 exit 0
30fi
31
24# Argument parsing 32# Argument parsing
25if [[ "$#" -lt "2" ]]; then 33if [[ "$#" -lt "2" ]]; then
26 echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>" 34 echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
index 903679e0ff31..e3afcb424710 100755
--- a/tools/testing/selftests/net/netdevice.sh
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -8,6 +8,9 @@
8# if not they probably have failed earlier in the boot process and their logged error will be catched by another test 8# if not they probably have failed earlier in the boot process and their logged error will be catched by another test
9# 9#
10 10
11# Kselftest framework requirement - SKIP code is 4.
12ksft_skip=4
13
11# this function will try to up the interface 14# this function will try to up the interface
12# if already up, nothing done 15# if already up, nothing done
13# arg1: network interface name 16# arg1: network interface name
@@ -18,7 +21,7 @@ kci_net_start()
18 ip link show "$netdev" |grep -q UP 21 ip link show "$netdev" |grep -q UP
19 if [ $? -eq 0 ];then 22 if [ $? -eq 0 ];then
20 echo "SKIP: $netdev: interface already up" 23 echo "SKIP: $netdev: interface already up"
21 return 0 24 return $ksft_skip
22 fi 25 fi
23 26
24 ip link set "$netdev" up 27 ip link set "$netdev" up
@@ -61,12 +64,12 @@ kci_net_setup()
61 ip address show "$netdev" |grep '^[[:space:]]*inet' 64 ip address show "$netdev" |grep '^[[:space:]]*inet'
62 if [ $? -eq 0 ];then 65 if [ $? -eq 0 ];then
63 echo "SKIP: $netdev: already have an IP" 66 echo "SKIP: $netdev: already have an IP"
64 return 0 67 return $ksft_skip
65 fi 68 fi
66 69
67 # TODO what ipaddr to set ? DHCP ? 70 # TODO what ipaddr to set ? DHCP ?
68 echo "SKIP: $netdev: set IP address" 71 echo "SKIP: $netdev: set IP address"
69 return 0 72 return $ksft_skip
70} 73}
71 74
72# test an ethtool command 75# test an ethtool command
@@ -84,6 +87,7 @@ kci_netdev_ethtool_test()
84 if [ $ret -ne 0 ];then 87 if [ $ret -ne 0 ];then
85 if [ $ret -eq "$1" ];then 88 if [ $ret -eq "$1" ];then
86 echo "SKIP: $netdev: ethtool $2 not supported" 89 echo "SKIP: $netdev: ethtool $2 not supported"
90 return $ksft_skip
87 else 91 else
88 echo "FAIL: $netdev: ethtool $2" 92 echo "FAIL: $netdev: ethtool $2"
89 return 1 93 return 1
@@ -104,7 +108,7 @@ kci_netdev_ethtool()
104 ethtool --version 2>/dev/null >/dev/null 108 ethtool --version 2>/dev/null >/dev/null
105 if [ $? -ne 0 ];then 109 if [ $? -ne 0 ];then
106 echo "SKIP: ethtool not present" 110 echo "SKIP: ethtool not present"
107 return 1 111 return $ksft_skip
108 fi 112 fi
109 113
110 TMP_ETHTOOL_FEATURES="$(mktemp)" 114 TMP_ETHTOOL_FEATURES="$(mktemp)"
@@ -176,13 +180,13 @@ kci_test_netdev()
176#check for needed privileges 180#check for needed privileges
177if [ "$(id -u)" -ne 0 ];then 181if [ "$(id -u)" -ne 0 ];then
178 echo "SKIP: Need root privileges" 182 echo "SKIP: Need root privileges"
179 exit 0 183 exit $ksft_skip
180fi 184fi
181 185
182ip link show 2>/dev/null >/dev/null 186ip link show 2>/dev/null >/dev/null
183if [ $? -ne 0 ];then 187if [ $? -ne 0 ];then
184 echo "SKIP: Could not run test without the ip tool" 188 echo "SKIP: Could not run test without the ip tool"
185 exit 0 189 exit $ksft_skip
186fi 190fi
187 191
188TMP_LIST_NETDEV="$(mktemp)" 192TMP_LIST_NETDEV="$(mktemp)"
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 1e428781a625..f8cc38afffa2 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -43,6 +43,9 @@
43# that MTU is properly calculated instead when MTU is not configured from 43# that MTU is properly calculated instead when MTU is not configured from
44# userspace 44# userspace
45 45
46# Kselftest framework requirement - SKIP code is 4.
47ksft_skip=4
48
46tests=" 49tests="
47 pmtu_vti6_exception vti6: PMTU exceptions 50 pmtu_vti6_exception vti6: PMTU exceptions
48 pmtu_vti4_exception vti4: PMTU exceptions 51 pmtu_vti4_exception vti4: PMTU exceptions
@@ -162,7 +165,7 @@ setup_xfrm6() {
162} 165}
163 166
164setup() { 167setup() {
165 [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return 1 168 [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
166 169
167 cleanup_done=0 170 cleanup_done=0
168 for arg do 171 for arg do
@@ -368,7 +371,7 @@ test_pmtu_vti6_link_add_mtu() {
368 371
369 fail=0 372 fail=0
370 373
371 min=1280 374 min=68 # vti6 can carry IPv4 packets too
372 max=$((65535 - 40)) 375 max=$((65535 - 40))
373 # Check invalid values first 376 # Check invalid values first
374 for v in $((min - 1)) $((max + 1)); do 377 for v in $((min - 1)) $((max + 1)); do
@@ -384,7 +387,7 @@ test_pmtu_vti6_link_add_mtu() {
384 done 387 done
385 388
386 # Now check valid values 389 # Now check valid values
387 for v in 1280 1300 $((65535 - 40)); do 390 for v in 68 1280 1300 $((65535 - 40)); do
388 ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 391 ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
389 mtu="$(link_get_mtu "${ns_a}" vti6_a)" 392 mtu="$(link_get_mtu "${ns_a}" vti6_a)"
390 ${ns_a} ip link del vti6_a 393 ${ns_a} ip link del vti6_a
diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c
new file mode 100644
index 000000000000..7d15e10a9fb6
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.c
@@ -0,0 +1,397 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#define _GNU_SOURCE
4
5#include <arpa/inet.h>
6#include <errno.h>
7#include <error.h>
8#include <fcntl.h>
9#include <limits.h>
10#include <linux/filter.h>
11#include <linux/bpf.h>
12#include <linux/if_packet.h>
13#include <linux/if_vlan.h>
14#include <linux/virtio_net.h>
15#include <net/if.h>
16#include <net/ethernet.h>
17#include <netinet/ip.h>
18#include <netinet/udp.h>
19#include <poll.h>
20#include <sched.h>
21#include <stdbool.h>
22#include <stdint.h>
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include <sys/mman.h>
27#include <sys/socket.h>
28#include <sys/stat.h>
29#include <sys/types.h>
30#include <unistd.h>
31
32#include "psock_lib.h"
33
34static bool cfg_use_bind;
35static bool cfg_use_csum_off;
36static bool cfg_use_csum_off_bad;
37static bool cfg_use_dgram;
38static bool cfg_use_gso;
39static bool cfg_use_qdisc_bypass;
40static bool cfg_use_vlan;
41static bool cfg_use_vnet;
42
43static char *cfg_ifname = "lo";
44static int cfg_mtu = 1500;
45static int cfg_payload_len = DATA_LEN;
46static int cfg_truncate_len = INT_MAX;
47static uint16_t cfg_port = 8000;
48
49/* test sending up to max mtu + 1 */
50#define TEST_SZ (sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU + 1)
51
52static char tbuf[TEST_SZ], rbuf[TEST_SZ];
53
54static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
55{
56 unsigned long sum = 0;
57 int i;
58
59 for (i = 0; i < num_u16; i++)
60 sum += start[i];
61
62 return sum;
63}
64
65static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
66 unsigned long sum)
67{
68 sum += add_csum_hword(start, num_u16);
69
70 while (sum >> 16)
71 sum = (sum & 0xffff) + (sum >> 16);
72
73 return ~sum;
74}
75
76static int build_vnet_header(void *header)
77{
78 struct virtio_net_hdr *vh = header;
79
80 vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
81
82 if (cfg_use_csum_off) {
83 vh->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
84 vh->csum_start = ETH_HLEN + sizeof(struct iphdr);
85 vh->csum_offset = __builtin_offsetof(struct udphdr, check);
86
87 /* position check field exactly one byte beyond end of packet */
88 if (cfg_use_csum_off_bad)
89 vh->csum_start += sizeof(struct udphdr) + cfg_payload_len -
90 vh->csum_offset - 1;
91 }
92
93 if (cfg_use_gso) {
94 vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
95 vh->gso_size = cfg_mtu - sizeof(struct iphdr);
96 }
97
98 return sizeof(*vh);
99}
100
101static int build_eth_header(void *header)
102{
103 struct ethhdr *eth = header;
104
105 if (cfg_use_vlan) {
106 uint16_t *tag = header + ETH_HLEN;
107
108 eth->h_proto = htons(ETH_P_8021Q);
109 tag[1] = htons(ETH_P_IP);
110 return ETH_HLEN + 4;
111 }
112
113 eth->h_proto = htons(ETH_P_IP);
114 return ETH_HLEN;
115}
116
117static int build_ipv4_header(void *header, int payload_len)
118{
119 struct iphdr *iph = header;
120
121 iph->ihl = 5;
122 iph->version = 4;
123 iph->ttl = 8;
124 iph->tot_len = htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len);
125 iph->id = htons(1337);
126 iph->protocol = IPPROTO_UDP;
127 iph->saddr = htonl((172 << 24) | (17 << 16) | 2);
128 iph->daddr = htonl((172 << 24) | (17 << 16) | 1);
129 iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
130
131 return iph->ihl << 2;
132}
133
134static int build_udp_header(void *header, int payload_len)
135{
136 const int alen = sizeof(uint32_t);
137 struct udphdr *udph = header;
138 int len = sizeof(*udph) + payload_len;
139
140 udph->source = htons(9);
141 udph->dest = htons(cfg_port);
142 udph->len = htons(len);
143
144 if (cfg_use_csum_off)
145 udph->check = build_ip_csum(header - (2 * alen), alen,
146 htons(IPPROTO_UDP) + udph->len);
147 else
148 udph->check = 0;
149
150 return sizeof(*udph);
151}
152
153static int build_packet(int payload_len)
154{
155 int off = 0;
156
157 off += build_vnet_header(tbuf);
158 off += build_eth_header(tbuf + off);
159 off += build_ipv4_header(tbuf + off, payload_len);
160 off += build_udp_header(tbuf + off, payload_len);
161
162 if (off + payload_len > sizeof(tbuf))
163 error(1, 0, "payload length exceeds max");
164
165 memset(tbuf + off, DATA_CHAR, payload_len);
166
167 return off + payload_len;
168}
169
170static void do_bind(int fd)
171{
172 struct sockaddr_ll laddr = {0};
173
174 laddr.sll_family = AF_PACKET;
175 laddr.sll_protocol = htons(ETH_P_IP);
176 laddr.sll_ifindex = if_nametoindex(cfg_ifname);
177 if (!laddr.sll_ifindex)
178 error(1, errno, "if_nametoindex");
179
180 if (bind(fd, (void *)&laddr, sizeof(laddr)))
181 error(1, errno, "bind");
182}
183
184static void do_send(int fd, char *buf, int len)
185{
186 int ret;
187
188 if (!cfg_use_vnet) {
189 buf += sizeof(struct virtio_net_hdr);
190 len -= sizeof(struct virtio_net_hdr);
191 }
192 if (cfg_use_dgram) {
193 buf += ETH_HLEN;
194 len -= ETH_HLEN;
195 }
196
197 if (cfg_use_bind) {
198 ret = write(fd, buf, len);
199 } else {
200 struct sockaddr_ll laddr = {0};
201
202 laddr.sll_protocol = htons(ETH_P_IP);
203 laddr.sll_ifindex = if_nametoindex(cfg_ifname);
204 if (!laddr.sll_ifindex)
205 error(1, errno, "if_nametoindex");
206
207 ret = sendto(fd, buf, len, 0, (void *)&laddr, sizeof(laddr));
208 }
209
210 if (ret == -1)
211 error(1, errno, "write");
212 if (ret != len)
213 error(1, 0, "write: %u %u", ret, len);
214
215 fprintf(stderr, "tx: %u\n", ret);
216}
217
218static int do_tx(void)
219{
220 const int one = 1;
221 int fd, len;
222
223 fd = socket(PF_PACKET, cfg_use_dgram ? SOCK_DGRAM : SOCK_RAW, 0);
224 if (fd == -1)
225 error(1, errno, "socket t");
226
227 if (cfg_use_bind)
228 do_bind(fd);
229
230 if (cfg_use_qdisc_bypass &&
231 setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one)))
232 error(1, errno, "setsockopt qdisc bypass");
233
234 if (cfg_use_vnet &&
235 setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
236 error(1, errno, "setsockopt vnet");
237
238 len = build_packet(cfg_payload_len);
239
240 if (cfg_truncate_len < len)
241 len = cfg_truncate_len;
242
243 do_send(fd, tbuf, len);
244
245 if (close(fd))
246 error(1, errno, "close t");
247
248 return len;
249}
250
251static int setup_rx(void)
252{
253 struct timeval tv = { .tv_usec = 100 * 1000 };
254 struct sockaddr_in raddr = {0};
255 int fd;
256
257 fd = socket(PF_INET, SOCK_DGRAM, 0);
258 if (fd == -1)
259 error(1, errno, "socket r");
260
261 if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
262 error(1, errno, "setsockopt rcv timeout");
263
264 raddr.sin_family = AF_INET;
265 raddr.sin_port = htons(cfg_port);
266 raddr.sin_addr.s_addr = htonl(INADDR_ANY);
267
268 if (bind(fd, (void *)&raddr, sizeof(raddr)))
269 error(1, errno, "bind r");
270
271 return fd;
272}
273
274static void do_rx(int fd, int expected_len, char *expected)
275{
276 int ret;
277
278 ret = recv(fd, rbuf, sizeof(rbuf), 0);
279 if (ret == -1)
280 error(1, errno, "recv");
281 if (ret != expected_len)
282 error(1, 0, "recv: %u != %u", ret, expected_len);
283
284 if (memcmp(rbuf, expected, ret))
285 error(1, 0, "recv: data mismatch");
286
287 fprintf(stderr, "rx: %u\n", ret);
288}
289
290static int setup_sniffer(void)
291{
292 struct timeval tv = { .tv_usec = 100 * 1000 };
293 int fd;
294
295 fd = socket(PF_PACKET, SOCK_RAW, 0);
296 if (fd == -1)
297 error(1, errno, "socket p");
298
299 if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
300 error(1, errno, "setsockopt rcv timeout");
301
302 pair_udp_setfilter(fd);
303 do_bind(fd);
304
305 return fd;
306}
307
308static void parse_opts(int argc, char **argv)
309{
310 int c;
311
312 while ((c = getopt(argc, argv, "bcCdgl:qt:vV")) != -1) {
313 switch (c) {
314 case 'b':
315 cfg_use_bind = true;
316 break;
317 case 'c':
318 cfg_use_csum_off = true;
319 break;
320 case 'C':
321 cfg_use_csum_off_bad = true;
322 break;
323 case 'd':
324 cfg_use_dgram = true;
325 break;
326 case 'g':
327 cfg_use_gso = true;
328 break;
329 case 'l':
330 cfg_payload_len = strtoul(optarg, NULL, 0);
331 break;
332 case 'q':
333 cfg_use_qdisc_bypass = true;
334 break;
335 case 't':
336 cfg_truncate_len = strtoul(optarg, NULL, 0);
337 break;
338 case 'v':
339 cfg_use_vnet = true;
340 break;
341 case 'V':
342 cfg_use_vlan = true;
343 break;
344 default:
345 error(1, 0, "%s: parse error", argv[0]);
346 }
347 }
348
349 if (cfg_use_vlan && cfg_use_dgram)
350 error(1, 0, "option vlan (-V) conflicts with dgram (-d)");
351
352 if (cfg_use_csum_off && !cfg_use_vnet)
353 error(1, 0, "option csum offload (-c) requires vnet (-v)");
354
355 if (cfg_use_csum_off_bad && !cfg_use_csum_off)
356 error(1, 0, "option csum bad (-C) requires csum offload (-c)");
357
358 if (cfg_use_gso && !cfg_use_csum_off)
359 error(1, 0, "option gso (-g) requires csum offload (-c)");
360}
361
362static void run_test(void)
363{
364 int fdr, fds, total_len;
365
366 fdr = setup_rx();
367 fds = setup_sniffer();
368
369 total_len = do_tx();
370
371 /* BPF filter accepts only this length, vlan changes MAC */
372 if (cfg_payload_len == DATA_LEN && !cfg_use_vlan)
373 do_rx(fds, total_len - sizeof(struct virtio_net_hdr),
374 tbuf + sizeof(struct virtio_net_hdr));
375
376 do_rx(fdr, cfg_payload_len, tbuf + total_len - cfg_payload_len);
377
378 if (close(fds))
379 error(1, errno, "close s");
380 if (close(fdr))
381 error(1, errno, "close r");
382}
383
384int main(int argc, char **argv)
385{
386 parse_opts(argc, argv);
387
388 if (system("ip link set dev lo mtu 1500"))
389 error(1, errno, "ip link set mtu");
390 if (system("ip addr add dev lo 172.17.0.1/24"))
391 error(1, errno, "ip addr add");
392
393 run_test();
394
395 fprintf(stderr, "OK\n\n");
396 return 0;
397}
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
new file mode 100755
index 000000000000..6331d91b86a6
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -0,0 +1,98 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# Run a series of packet socket send regression tests
5
6set -e
7
8readonly mtu=1500
9readonly iphlen=20
10readonly udphlen=8
11
12readonly vnet_hlen=10
13readonly eth_hlen=14
14
15readonly mss="$((${mtu} - ${iphlen} - ${udphlen}))"
16readonly mss_exceeds="$((${mss} + 1))"
17
18readonly max_mtu=65535
19readonly max_mss="$((${max_mtu} - ${iphlen} - ${udphlen}))"
20readonly max_mss_exceeds="$((${max_mss} + 1))"
21
22# functional checks (not a full cross-product)
23
24echo "dgram"
25./in_netns.sh ./psock_snd -d
26
27echo "dgram bind"
28./in_netns.sh ./psock_snd -d -b
29
30echo "raw"
31./in_netns.sh ./psock_snd
32
33echo "raw bind"
34./in_netns.sh ./psock_snd -b
35
36echo "raw qdisc bypass"
37./in_netns.sh ./psock_snd -q
38
39echo "raw vlan"
40./in_netns.sh ./psock_snd -V
41
42echo "raw vnet hdr"
43./in_netns.sh ./psock_snd -v
44
45echo "raw csum_off"
46./in_netns.sh ./psock_snd -v -c
47
48echo "raw csum_off with bad offset (fails)"
49(! ./in_netns.sh ./psock_snd -v -c -C)
50
51
52# bounds check: send {max, max + 1, min, min - 1} lengths
53
54echo "raw min size"
55./in_netns.sh ./psock_snd -l 0
56
57echo "raw mtu size"
58./in_netns.sh ./psock_snd -l "${mss}"
59
60echo "raw mtu size + 1 (fails)"
61(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
62
63# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
64#
65# echo "raw vlan mtu size"
66# ./in_netns.sh ./psock_snd -V -l "${mss}"
67
68echo "raw vlan mtu size + 1 (fails)"
69(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
70
71echo "dgram mtu size"
72./in_netns.sh ./psock_snd -d -l "${mss}"
73
74echo "dgram mtu size + 1 (fails)"
75(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
76
77echo "raw truncate hlen (fails: does not arrive)"
78(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
79
80echo "raw truncate hlen - 1 (fails: EINVAL)"
81(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
82
83
84# gso checks: implies -l, because with gso len must exceed gso_size
85
86echo "raw gso min size"
87./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
88
89echo "raw gso min size - 1 (fails)"
90(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
91
92echo "raw gso max size"
93./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
94
95echo "raw gso max size + 1 (fails)"
96(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
97
98echo "OK. All tests passed"
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 7f6cd9fdacf3..7ec4fa4d55dc 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -60,6 +60,8 @@
60 60
61#include "psock_lib.h" 61#include "psock_lib.h"
62 62
63#include "../kselftest.h"
64
63#ifndef bug_on 65#ifndef bug_on
64# define bug_on(cond) assert(!(cond)) 66# define bug_on(cond) assert(!(cond))
65#endif 67#endif
@@ -825,7 +827,7 @@ static int test_tpacket(int version, int type)
825 fprintf(stderr, "test: skip %s %s since user and kernel " 827 fprintf(stderr, "test: skip %s %s since user and kernel "
826 "space have different bit width\n", 828 "space have different bit width\n",
827 tpacket_str[version], type_str[type]); 829 tpacket_str[version], type_str[type]);
828 return 0; 830 return KSFT_SKIP;
829 } 831 }
830 832
831 sock = pfsocket(version); 833 sock = pfsocket(version);
diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
index 365c32e84189..c9f478b40996 100644
--- a/tools/testing/selftests/net/reuseport_bpf_numa.c
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -23,6 +23,8 @@
23#include <unistd.h> 23#include <unistd.h>
24#include <numa.h> 24#include <numa.h>
25 25
26#include "../kselftest.h"
27
26static const int PORT = 8888; 28static const int PORT = 8888;
27 29
28static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto) 30static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
@@ -229,7 +231,7 @@ int main(void)
229 int *rcv_fd, nodes; 231 int *rcv_fd, nodes;
230 232
231 if (numa_available() < 0) 233 if (numa_available() < 0)
232 error(1, errno, "no numa api support"); 234 ksft_exit_skip("no numa api support\n");
233 235
234 nodes = numa_max_node() + 1; 236 nodes = numa_max_node() + 1;
235 237
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index e6f485235435..0d7a44fa30af 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -7,6 +7,9 @@
7devdummy="test-dummy0" 7devdummy="test-dummy0"
8ret=0 8ret=0
9 9
10# Kselftest framework requirement - SKIP code is 4.
11ksft_skip=4
12
10# set global exit status, but never reset nonzero one. 13# set global exit status, but never reset nonzero one.
11check_err() 14check_err()
12{ 15{
@@ -333,7 +336,7 @@ kci_test_vrf()
333 ip link show type vrf 2>/dev/null 336 ip link show type vrf 2>/dev/null
334 if [ $? -ne 0 ]; then 337 if [ $? -ne 0 ]; then
335 echo "SKIP: vrf: iproute2 too old" 338 echo "SKIP: vrf: iproute2 too old"
336 return 0 339 return $ksft_skip
337 fi 340 fi
338 341
339 ip link add "$vrfname" type vrf table 10 342 ip link add "$vrfname" type vrf table 10
@@ -409,7 +412,7 @@ kci_test_encap_fou()
409 ip fou help 2>&1 |grep -q 'Usage: ip fou' 412 ip fou help 2>&1 |grep -q 'Usage: ip fou'
410 if [ $? -ne 0 ];then 413 if [ $? -ne 0 ];then
411 echo "SKIP: fou: iproute2 too old" 414 echo "SKIP: fou: iproute2 too old"
412 return 1 415 return $ksft_skip
413 fi 416 fi
414 417
415 ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null 418 ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null
@@ -444,7 +447,7 @@ kci_test_encap()
444 ip netns add "$testns" 447 ip netns add "$testns"
445 if [ $? -ne 0 ]; then 448 if [ $? -ne 0 ]; then
446 echo "SKIP encap tests: cannot add net namespace $testns" 449 echo "SKIP encap tests: cannot add net namespace $testns"
447 return 1 450 return $ksft_skip
448 fi 451 fi
449 452
450 ip netns exec "$testns" ip link set lo up 453 ip netns exec "$testns" ip link set lo up
@@ -469,7 +472,7 @@ kci_test_macsec()
469 ip macsec help 2>&1 | grep -q "^Usage: ip macsec" 472 ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
470 if [ $? -ne 0 ]; then 473 if [ $? -ne 0 ]; then
471 echo "SKIP: macsec: iproute2 too old" 474 echo "SKIP: macsec: iproute2 too old"
472 return 0 475 return $ksft_skip
473 fi 476 fi
474 477
475 ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on 478 ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
@@ -502,6 +505,108 @@ kci_test_macsec()
502 echo "PASS: macsec" 505 echo "PASS: macsec"
503} 506}
504 507
508#-------------------------------------------------------------------
509# Example commands
510# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
511# spi 0x07 mode transport reqid 0x07 replay-window 32 \
512# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
513# sel src 14.0.0.52/24 dst 14.0.0.70/24
514# ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \
515# tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \
516# spi 0x07 mode transport reqid 0x07
517#
518# Subcommands not tested
519# ip x s update
520# ip x s allocspi
521# ip x s deleteall
522# ip x p update
523# ip x p deleteall
524# ip x p set
525#-------------------------------------------------------------------
526kci_test_ipsec()
527{
528 srcip="14.0.0.52"
529 dstip="14.0.0.70"
530 algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
531
532 # flush to be sure there's nothing configured
533 ip x s flush ; ip x p flush
534 check_err $?
535
536 # start the monitor in the background
537 tmpfile=`mktemp ipsectestXXX`
538 ip x m > $tmpfile &
539 mpid=$!
540 sleep 0.2
541
542 ipsecid="proto esp src $srcip dst $dstip spi 0x07"
543 ip x s add $ipsecid \
544 mode transport reqid 0x07 replay-window 32 \
545 $algo sel src $srcip/24 dst $dstip/24
546 check_err $?
547
548 lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
549 test $lines -eq 2
550 check_err $?
551
552 ip x s count | grep -q "SAD count 1"
553 check_err $?
554
555 lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l`
556 test $lines -eq 2
557 check_err $?
558
559 ip x s delete $ipsecid
560 check_err $?
561
562 lines=`ip x s list | wc -l`
563 test $lines -eq 0
564 check_err $?
565
566 ipsecsel="dir out src $srcip/24 dst $dstip/24"
567 ip x p add $ipsecsel \
568 tmpl proto esp src $srcip dst $dstip \
569 spi 0x07 mode transport reqid 0x07
570 check_err $?
571
572 lines=`ip x p list | grep $srcip | grep $dstip | wc -l`
573 test $lines -eq 2
574 check_err $?
575
576 ip x p count | grep -q "SPD IN 0 OUT 1 FWD 0"
577 check_err $?
578
579 lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l`
580 test $lines -eq 2
581 check_err $?
582
583 ip x p delete $ipsecsel
584 check_err $?
585
586 lines=`ip x p list | wc -l`
587 test $lines -eq 0
588 check_err $?
589
590 # check the monitor results
591 kill $mpid
592 lines=`wc -l $tmpfile | cut "-d " -f1`
593 test $lines -eq 20
594 check_err $?
595 rm -rf $tmpfile
596
597 # clean up any leftovers
598 ip x s flush
599 check_err $?
600 ip x p flush
601 check_err $?
602
603 if [ $ret -ne 0 ]; then
604 echo "FAIL: ipsec"
605 return 1
606 fi
607 echo "PASS: ipsec"
608}
609
505kci_test_gretap() 610kci_test_gretap()
506{ 611{
507 testns="testns" 612 testns="testns"
@@ -511,14 +616,14 @@ kci_test_gretap()
511 ip netns add "$testns" 616 ip netns add "$testns"
512 if [ $? -ne 0 ]; then 617 if [ $? -ne 0 ]; then
513 echo "SKIP gretap tests: cannot add net namespace $testns" 618 echo "SKIP gretap tests: cannot add net namespace $testns"
514 return 1 619 return $ksft_skip
515 fi 620 fi
516 621
517 ip link help gretap 2>&1 | grep -q "^Usage:" 622 ip link help gretap 2>&1 | grep -q "^Usage:"
518 if [ $? -ne 0 ];then 623 if [ $? -ne 0 ];then
519 echo "SKIP: gretap: iproute2 too old" 624 echo "SKIP: gretap: iproute2 too old"
520 ip netns del "$testns" 625 ip netns del "$testns"
521 return 1 626 return $ksft_skip
522 fi 627 fi
523 628
524 # test native tunnel 629 # test native tunnel
@@ -561,14 +666,14 @@ kci_test_ip6gretap()
561 ip netns add "$testns" 666 ip netns add "$testns"
562 if [ $? -ne 0 ]; then 667 if [ $? -ne 0 ]; then
563 echo "SKIP ip6gretap tests: cannot add net namespace $testns" 668 echo "SKIP ip6gretap tests: cannot add net namespace $testns"
564 return 1 669 return $ksft_skip
565 fi 670 fi
566 671
567 ip link help ip6gretap 2>&1 | grep -q "^Usage:" 672 ip link help ip6gretap 2>&1 | grep -q "^Usage:"
568 if [ $? -ne 0 ];then 673 if [ $? -ne 0 ];then
569 echo "SKIP: ip6gretap: iproute2 too old" 674 echo "SKIP: ip6gretap: iproute2 too old"
570 ip netns del "$testns" 675 ip netns del "$testns"
571 return 1 676 return $ksft_skip
572 fi 677 fi
573 678
574 # test native tunnel 679 # test native tunnel
@@ -611,13 +716,13 @@ kci_test_erspan()
611 ip link help erspan 2>&1 | grep -q "^Usage:" 716 ip link help erspan 2>&1 | grep -q "^Usage:"
612 if [ $? -ne 0 ];then 717 if [ $? -ne 0 ];then
613 echo "SKIP: erspan: iproute2 too old" 718 echo "SKIP: erspan: iproute2 too old"
614 return 1 719 return $ksft_skip
615 fi 720 fi
616 721
617 ip netns add "$testns" 722 ip netns add "$testns"
618 if [ $? -ne 0 ]; then 723 if [ $? -ne 0 ]; then
619 echo "SKIP erspan tests: cannot add net namespace $testns" 724 echo "SKIP erspan tests: cannot add net namespace $testns"
620 return 1 725 return $ksft_skip
621 fi 726 fi
622 727
623 # test native tunnel erspan v1 728 # test native tunnel erspan v1
@@ -676,13 +781,13 @@ kci_test_ip6erspan()
676 ip link help ip6erspan 2>&1 | grep -q "^Usage:" 781 ip link help ip6erspan 2>&1 | grep -q "^Usage:"
677 if [ $? -ne 0 ];then 782 if [ $? -ne 0 ];then
678 echo "SKIP: ip6erspan: iproute2 too old" 783 echo "SKIP: ip6erspan: iproute2 too old"
679 return 1 784 return $ksft_skip
680 fi 785 fi
681 786
682 ip netns add "$testns" 787 ip netns add "$testns"
683 if [ $? -ne 0 ]; then 788 if [ $? -ne 0 ]; then
684 echo "SKIP ip6erspan tests: cannot add net namespace $testns" 789 echo "SKIP ip6erspan tests: cannot add net namespace $testns"
685 return 1 790 return $ksft_skip
686 fi 791 fi
687 792
688 # test native tunnel ip6erspan v1 793 # test native tunnel ip6erspan v1
@@ -755,6 +860,7 @@ kci_test_rtnl()
755 kci_test_vrf 860 kci_test_vrf
756 kci_test_encap 861 kci_test_encap
757 kci_test_macsec 862 kci_test_macsec
863 kci_test_ipsec
758 864
759 kci_del_dummy 865 kci_del_dummy
760} 866}
@@ -762,14 +868,14 @@ kci_test_rtnl()
762#check for needed privileges 868#check for needed privileges
763if [ "$(id -u)" -ne 0 ];then 869if [ "$(id -u)" -ne 0 ];then
764 echo "SKIP: Need root privileges" 870 echo "SKIP: Need root privileges"
765 exit 0 871 exit $ksft_skip
766fi 872fi
767 873
768for x in ip tc;do 874for x in ip tc;do
769 $x -Version 2>/dev/null >/dev/null 875 $x -Version 2>/dev/null >/dev/null
770 if [ $? -ne 0 ];then 876 if [ $? -ne 0 ];then
771 echo "SKIP: Could not run test without the $x tool" 877 echo "SKIP: Could not run test without the $x tool"
772 exit 0 878 exit $ksft_skip
773 fi 879 fi
774done 880done
775 881
diff --git a/tools/testing/selftests/net/tcp_inq.c b/tools/testing/selftests/net/tcp_inq.c
new file mode 100644
index 000000000000..d044b29ddabc
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_inq.c
@@ -0,0 +1,189 @@
1/*
2 * Copyright 2018 Google Inc.
3 * Author: Soheil Hassas Yeganeh (soheil@google.com)
4 *
5 * Simple example on how to use TCP_INQ and TCP_CM_INQ.
6 *
7 * License (GPLv2):
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
16 * more details.
17 */
18#define _GNU_SOURCE
19
20#include <error.h>
21#include <netinet/in.h>
22#include <netinet/tcp.h>
23#include <pthread.h>
24#include <stdio.h>
25#include <errno.h>
26#include <stdlib.h>
27#include <string.h>
28#include <sys/socket.h>
29#include <unistd.h>
30
31#ifndef TCP_INQ
32#define TCP_INQ 36
33#endif
34
35#ifndef TCP_CM_INQ
36#define TCP_CM_INQ TCP_INQ
37#endif
38
39#define BUF_SIZE 8192
40#define CMSG_SIZE 32
41
42static int family = AF_INET6;
43static socklen_t addr_len = sizeof(struct sockaddr_in6);
44static int port = 4974;
45
46static void setup_loopback_addr(int family, struct sockaddr_storage *sockaddr)
47{
48 struct sockaddr_in6 *addr6 = (void *) sockaddr;
49 struct sockaddr_in *addr4 = (void *) sockaddr;
50
51 switch (family) {
52 case PF_INET:
53 memset(addr4, 0, sizeof(*addr4));
54 addr4->sin_family = AF_INET;
55 addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
56 addr4->sin_port = htons(port);
57 break;
58 case PF_INET6:
59 memset(addr6, 0, sizeof(*addr6));
60 addr6->sin6_family = AF_INET6;
61 addr6->sin6_addr = in6addr_loopback;
62 addr6->sin6_port = htons(port);
63 break;
64 default:
65 error(1, 0, "illegal family");
66 }
67}
68
69void *start_server(void *arg)
70{
71 int server_fd = (int)(unsigned long)arg;
72 struct sockaddr_in addr;
73 socklen_t addrlen = sizeof(addr);
74 char *buf;
75 int fd;
76 int r;
77
78 buf = malloc(BUF_SIZE);
79
80 for (;;) {
81 fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen);
82 if (fd == -1) {
83 perror("accept");
84 break;
85 }
86 do {
87 r = send(fd, buf, BUF_SIZE, 0);
88 } while (r < 0 && errno == EINTR);
89 if (r < 0)
90 perror("send");
91 if (r != BUF_SIZE)
92 fprintf(stderr, "can only send %d bytes\n", r);
93 /* TCP_INQ can overestimate in-queue by one byte if we send
94 * the FIN packet. Sleep for 1 second, so that the client
95 * likely invoked recvmsg().
96 */
97 sleep(1);
98 close(fd);
99 }
100
101 free(buf);
102 close(server_fd);
103 pthread_exit(0);
104}
105
106int main(int argc, char *argv[])
107{
108 struct sockaddr_storage listen_addr, addr;
109 int c, one = 1, inq = -1;
110 pthread_t server_thread;
111 char cmsgbuf[CMSG_SIZE];
112 struct iovec iov[1];
113 struct cmsghdr *cm;
114 struct msghdr msg;
115 int server_fd, fd;
116 char *buf;
117
118 while ((c = getopt(argc, argv, "46p:")) != -1) {
119 switch (c) {
120 case '4':
121 family = PF_INET;
122 addr_len = sizeof(struct sockaddr_in);
123 break;
124 case '6':
125 family = PF_INET6;
126 addr_len = sizeof(struct sockaddr_in6);
127 break;
128 case 'p':
129 port = atoi(optarg);
130 break;
131 }
132 }
133
134 server_fd = socket(family, SOCK_STREAM, 0);
135 if (server_fd < 0)
136 error(1, errno, "server socket");
137 setup_loopback_addr(family, &listen_addr);
138 if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR,
139 &one, sizeof(one)) != 0)
140 error(1, errno, "setsockopt(SO_REUSEADDR)");
141 if (bind(server_fd, (const struct sockaddr *)&listen_addr,
142 addr_len) == -1)
143 error(1, errno, "bind");
144 if (listen(server_fd, 128) == -1)
145 error(1, errno, "listen");
146 if (pthread_create(&server_thread, NULL, start_server,
147 (void *)(unsigned long)server_fd) != 0)
148 error(1, errno, "pthread_create");
149
150 fd = socket(family, SOCK_STREAM, 0);
151 if (fd < 0)
152 error(1, errno, "client socket");
153 setup_loopback_addr(family, &addr);
154 if (connect(fd, (const struct sockaddr *)&addr, addr_len) == -1)
155 error(1, errno, "connect");
156 if (setsockopt(fd, SOL_TCP, TCP_INQ, &one, sizeof(one)) != 0)
157 error(1, errno, "setsockopt(TCP_INQ)");
158
159 msg.msg_name = NULL;
160 msg.msg_namelen = 0;
161 msg.msg_iov = iov;
162 msg.msg_iovlen = 1;
163 msg.msg_control = cmsgbuf;
164 msg.msg_controllen = sizeof(cmsgbuf);
165 msg.msg_flags = 0;
166
167 buf = malloc(BUF_SIZE);
168 iov[0].iov_base = buf;
169 iov[0].iov_len = BUF_SIZE / 2;
170
171 if (recvmsg(fd, &msg, 0) != iov[0].iov_len)
172 error(1, errno, "recvmsg");
173 if (msg.msg_flags & MSG_CTRUNC)
174 error(1, 0, "control message is truncated");
175
176 for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm))
177 if (cm->cmsg_level == SOL_TCP && cm->cmsg_type == TCP_CM_INQ)
178 inq = *((int *) CMSG_DATA(cm));
179
180 if (inq != BUF_SIZE - iov[0].iov_len) {
181 fprintf(stderr, "unexpected inq: %d\n", inq);
182 exit(1);
183 }
184
185 printf("PASSED\n");
186 free(buf);
187 close(fd);
188 return 0;
189}
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
new file mode 100644
index 000000000000..77f762780199
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -0,0 +1,447 @@
1/*
2 * Copyright 2018 Google Inc.
3 * Author: Eric Dumazet (edumazet@google.com)
4 *
5 * Reference program demonstrating tcp mmap() usage,
6 * and SO_RCVLOWAT hints for receiver.
7 *
8 * Note : NIC with header split is needed to use mmap() on TCP :
9 * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload.
10 *
11 * How to use on loopback interface :
12 *
13 * ifconfig lo mtu 61512 # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header)
14 * tcp_mmap -s -z &
15 * tcp_mmap -H ::1 -z
16 *
17 * Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12)
18 * (4096 : page size on x86, 12: TCP TS option length)
19 * tcp_mmap -s -z -M $((4096+12)) &
20 * tcp_mmap -H ::1 -z -M $((4096+12))
21 *
22 * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface.
23 * We might use sendfile() instead, but really this test program is about mmap(), for receivers ;)
24 *
25 * $ ./tcp_mmap -s & # Without mmap()
26 * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
27 * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit
28 * cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches
29 * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit
30 * cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches
31 * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit
32 * cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches
33 * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit
34 * cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches
35 * $ kill %1 # kill tcp_mmap server
36 *
37 * $ ./tcp_mmap -s -z & # With mmap()
38 * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
39 * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit
40 * cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches
41 * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit
42 * cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches
43 * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit
44 * cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches
45 * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit
46 * cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches
47 *
48 * License (GPLv2):
49 *
50 * This program is free software; you can redistribute it and/or modify it
51 * under the terms and conditions of the GNU General Public License,
52 * version 2, as published by the Free Software Foundation.
53 *
54 * This program is distributed in the hope it will be useful, but WITHOUT
55 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
56 * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
57 * more details.
58 *
59 * You should have received a copy of the GNU General Public License along with
60 * this program; if not, write to the Free Software Foundation, Inc.,
61 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
62 */
63#define _GNU_SOURCE
64#include <pthread.h>
65#include <sys/types.h>
66#include <fcntl.h>
67#include <error.h>
68#include <sys/socket.h>
69#include <sys/mman.h>
70#include <sys/resource.h>
71#include <unistd.h>
72#include <string.h>
73#include <stdlib.h>
74#include <stdio.h>
75#include <errno.h>
76#include <time.h>
77#include <sys/time.h>
78#include <netinet/in.h>
79#include <arpa/inet.h>
80#include <poll.h>
81#include <linux/tcp.h>
82#include <assert.h>
83
84#ifndef MSG_ZEROCOPY
85#define MSG_ZEROCOPY 0x4000000
86#endif
87
88#define FILE_SZ (1UL << 35)
89static int cfg_family = AF_INET6;
90static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
91static int cfg_port = 8787;
92
93static int rcvbuf; /* Default: autotuning. Can be set with -r <integer> option */
94static int sndbuf; /* Default: autotuning. Can be set with -w <integer> option */
95static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */
96static int xflg; /* hash received data (simple xor) (-h option) */
97static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
98
99static int chunk_size = 512*1024;
100
101unsigned long htotal;
102
103static inline void prefetch(const void *x)
104{
105#if defined(__x86_64__)
106 asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x));
107#endif
108}
109
110void hash_zone(void *zone, unsigned int length)
111{
112 unsigned long temp = htotal;
113
114 while (length >= 8*sizeof(long)) {
115 prefetch(zone + 384);
116 temp ^= *(unsigned long *)zone;
117 temp ^= *(unsigned long *)(zone + sizeof(long));
118 temp ^= *(unsigned long *)(zone + 2*sizeof(long));
119 temp ^= *(unsigned long *)(zone + 3*sizeof(long));
120 temp ^= *(unsigned long *)(zone + 4*sizeof(long));
121 temp ^= *(unsigned long *)(zone + 5*sizeof(long));
122 temp ^= *(unsigned long *)(zone + 6*sizeof(long));
123 temp ^= *(unsigned long *)(zone + 7*sizeof(long));
124 zone += 8*sizeof(long);
125 length -= 8*sizeof(long);
126 }
127 while (length >= 1) {
128 temp ^= *(unsigned char *)zone;
129 zone += 1;
130 length--;
131 }
132 htotal = temp;
133}
134
135void *child_thread(void *arg)
136{
137 unsigned long total_mmap = 0, total = 0;
138 struct tcp_zerocopy_receive zc;
139 unsigned long delta_usec;
140 int flags = MAP_SHARED;
141 struct timeval t0, t1;
142 char *buffer = NULL;
143 void *addr = NULL;
144 double throughput;
145 struct rusage ru;
146 int lu, fd;
147
148 fd = (int)(unsigned long)arg;
149
150 gettimeofday(&t0, NULL);
151
152 fcntl(fd, F_SETFL, O_NDELAY);
153 buffer = malloc(chunk_size);
154 if (!buffer) {
155 perror("malloc");
156 goto error;
157 }
158 if (zflg) {
159 addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
160 if (addr == (void *)-1)
161 zflg = 0;
162 }
163 while (1) {
164 struct pollfd pfd = { .fd = fd, .events = POLLIN, };
165 int sub;
166
167 poll(&pfd, 1, 10000);
168 if (zflg) {
169 socklen_t zc_len = sizeof(zc);
170 int res;
171
172 zc.address = (__u64)addr;
173 zc.length = chunk_size;
174 zc.recv_skip_hint = 0;
175 res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
176 &zc, &zc_len);
177 if (res == -1)
178 break;
179
180 if (zc.length) {
181 assert(zc.length <= chunk_size);
182 total_mmap += zc.length;
183 if (xflg)
184 hash_zone(addr, zc.length);
185 total += zc.length;
186 }
187 if (zc.recv_skip_hint) {
188 assert(zc.recv_skip_hint <= chunk_size);
189 lu = read(fd, buffer, zc.recv_skip_hint);
190 if (lu > 0) {
191 if (xflg)
192 hash_zone(buffer, lu);
193 total += lu;
194 }
195 }
196 continue;
197 }
198 sub = 0;
199 while (sub < chunk_size) {
200 lu = read(fd, buffer + sub, chunk_size - sub);
201 if (lu == 0)
202 goto end;
203 if (lu < 0)
204 break;
205 if (xflg)
206 hash_zone(buffer + sub, lu);
207 total += lu;
208 sub += lu;
209 }
210 }
211end:
212 gettimeofday(&t1, NULL);
213 delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
214
215 throughput = 0;
216 if (delta_usec)
217 throughput = total * 8.0 / (double)delta_usec / 1000.0;
218 getrusage(RUSAGE_THREAD, &ru);
219 if (total > 1024*1024) {
220 unsigned long total_usec;
221 unsigned long mb = total >> 20;
222 total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec +
223 1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
224 printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n"
225 " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n",
226 total / (1024.0 * 1024.0),
227 100.0*total_mmap/total,
228 (double)delta_usec / 1000000.0,
229 throughput,
230 (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0,
231 (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0,
232 (double)total_usec/mb,
233 ru.ru_nvcsw);
234 }
235error:
236 free(buffer);
237 close(fd);
238 if (zflg)
239 munmap(addr, chunk_size);
240 pthread_exit(0);
241}
242
243static void apply_rcvsnd_buf(int fd)
244{
245 if (rcvbuf && setsockopt(fd, SOL_SOCKET,
246 SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) {
247 perror("setsockopt SO_RCVBUF");
248 }
249
250 if (sndbuf && setsockopt(fd, SOL_SOCKET,
251 SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) {
252 perror("setsockopt SO_SNDBUF");
253 }
254}
255
256
257static void setup_sockaddr(int domain, const char *str_addr,
258 struct sockaddr_storage *sockaddr)
259{
260 struct sockaddr_in6 *addr6 = (void *) sockaddr;
261 struct sockaddr_in *addr4 = (void *) sockaddr;
262
263 switch (domain) {
264 case PF_INET:
265 memset(addr4, 0, sizeof(*addr4));
266 addr4->sin_family = AF_INET;
267 addr4->sin_port = htons(cfg_port);
268 if (str_addr &&
269 inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
270 error(1, 0, "ipv4 parse error: %s", str_addr);
271 break;
272 case PF_INET6:
273 memset(addr6, 0, sizeof(*addr6));
274 addr6->sin6_family = AF_INET6;
275 addr6->sin6_port = htons(cfg_port);
276 if (str_addr &&
277 inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
278 error(1, 0, "ipv6 parse error: %s", str_addr);
279 break;
280 default:
281 error(1, 0, "illegal domain");
282 }
283}
284
285static void do_accept(int fdlisten)
286{
287 if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
288 &chunk_size, sizeof(chunk_size)) == -1) {
289 perror("setsockopt SO_RCVLOWAT");
290 }
291
292 apply_rcvsnd_buf(fdlisten);
293
294 while (1) {
295 struct sockaddr_in addr;
296 socklen_t addrlen = sizeof(addr);
297 pthread_t th;
298 int fd, res;
299
300 fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen);
301 if (fd == -1) {
302 perror("accept");
303 continue;
304 }
305 res = pthread_create(&th, NULL, child_thread,
306 (void *)(unsigned long)fd);
307 if (res) {
308 errno = res;
309 perror("pthread_create");
310 close(fd);
311 }
312 }
313}
314
315int main(int argc, char *argv[])
316{
317 struct sockaddr_storage listenaddr, addr;
318 unsigned int max_pacing_rate = 0;
319 unsigned long total = 0;
320 char *host = NULL;
321 int fd, c, on = 1;
322 char *buffer;
323 int sflg = 0;
324 int mss = 0;
325
326 while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) {
327 switch (c) {
328 case '4':
329 cfg_family = PF_INET;
330 cfg_alen = sizeof(struct sockaddr_in);
331 break;
332 case '6':
333 cfg_family = PF_INET6;
334 cfg_alen = sizeof(struct sockaddr_in6);
335 break;
336 case 'p':
337 cfg_port = atoi(optarg);
338 break;
339 case 'H':
340 host = optarg;
341 break;
342 case 's': /* server : listen for incoming connections */
343 sflg++;
344 break;
345 case 'r':
346 rcvbuf = atoi(optarg);
347 break;
348 case 'w':
349 sndbuf = atoi(optarg);
350 break;
351 case 'z':
352 zflg = 1;
353 break;
354 case 'M':
355 mss = atoi(optarg);
356 break;
357 case 'x':
358 xflg = 1;
359 break;
360 case 'k':
361 keepflag = 1;
362 break;
363 case 'P':
364 max_pacing_rate = atoi(optarg) ;
365 break;
366 default:
367 exit(1);
368 }
369 }
370 if (sflg) {
371 int fdlisten = socket(cfg_family, SOCK_STREAM, 0);
372
373 if (fdlisten == -1) {
374 perror("socket");
375 exit(1);
376 }
377 apply_rcvsnd_buf(fdlisten);
378 setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
379
380 setup_sockaddr(cfg_family, host, &listenaddr);
381
382 if (mss &&
383 setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG,
384 &mss, sizeof(mss)) == -1) {
385 perror("setsockopt TCP_MAXSEG");
386 exit(1);
387 }
388 if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) {
389 perror("bind");
390 exit(1);
391 }
392 if (listen(fdlisten, 128) == -1) {
393 perror("listen");
394 exit(1);
395 }
396 do_accept(fdlisten);
397 }
398 buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
399 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
400 if (buffer == (char *)-1) {
401 perror("mmap");
402 exit(1);
403 }
404
405 fd = socket(AF_INET6, SOCK_STREAM, 0);
406 if (fd == -1) {
407 perror("socket");
408 exit(1);
409 }
410 apply_rcvsnd_buf(fd);
411
412 setup_sockaddr(cfg_family, host, &addr);
413
414 if (mss &&
415 setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
416 perror("setsockopt TCP_MAXSEG");
417 exit(1);
418 }
419 if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) {
420 perror("connect");
421 exit(1);
422 }
423 if (max_pacing_rate &&
424 setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE,
425 &max_pacing_rate, sizeof(max_pacing_rate)) == -1)
426 perror("setsockopt SO_MAX_PACING_RATE");
427
428 if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY,
429 &on, sizeof(on)) == -1) {
430 perror("setsockopt SO_ZEROCOPY, (-z option disabled)");
431 zflg = 0;
432 }
433 while (total < FILE_SZ) {
434 long wr = FILE_SZ - total;
435
436 if (wr > chunk_size)
437 wr = chunk_size;
438 /* Note : we just want to fill the pipe with 0 bytes */
439 wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0);
440 if (wr <= 0)
441 break;
442 total += wr;
443 }
444 close(fd);
445 munmap(buffer, chunk_size);
446 return 0;
447}
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
new file mode 100644
index 000000000000..e279051bc631
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.c
@@ -0,0 +1,693 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#define _GNU_SOURCE
4
5#include <stddef.h>
6#include <arpa/inet.h>
7#include <error.h>
8#include <errno.h>
9#include <net/if.h>
10#include <linux/in.h>
11#include <linux/netlink.h>
12#include <linux/rtnetlink.h>
13#include <netinet/if_ether.h>
14#include <netinet/ip.h>
15#include <netinet/ip6.h>
16#include <netinet/udp.h>
17#include <stdbool.h>
18#include <stdlib.h>
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22#include <sys/ioctl.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/time.h>
26#include <sys/types.h>
27#include <unistd.h>
28
29#ifndef ETH_MAX_MTU
30#define ETH_MAX_MTU 0xFFFFU
31#endif
32
33#ifndef UDP_SEGMENT
34#define UDP_SEGMENT 103
35#endif
36
37#ifndef UDP_MAX_SEGMENTS
38#define UDP_MAX_SEGMENTS (1 << 6UL)
39#endif
40
41#define CONST_MTU_TEST 1500
42
43#define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr))
44#define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr))
45
46#define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4)
47#define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6)
48
49#define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4)
50#define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6)
51
52static bool cfg_do_ipv4;
53static bool cfg_do_ipv6;
54static bool cfg_do_connected;
55static bool cfg_do_connectionless;
56static bool cfg_do_msgmore;
57static bool cfg_do_setsockopt;
58static int cfg_specific_test_id = -1;
59
60static const char cfg_ifname[] = "lo";
61static unsigned short cfg_port = 9000;
62
63static char buf[ETH_MAX_MTU];
64
65struct testcase {
66 int tlen; /* send() buffer size, may exceed mss */
67 bool tfail; /* send() call is expected to fail */
68 int gso_len; /* mss after applying gso */
69 int r_num_mss; /* recv(): number of calls of full mss */
70 int r_len_last; /* recv(): size of last non-mss dgram, if any */
71};
72
73const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
74const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
75
76struct testcase testcases_v4[] = {
77 {
78 /* no GSO: send a single byte */
79 .tlen = 1,
80 .r_len_last = 1,
81 },
82 {
83 /* no GSO: send a single MSS */
84 .tlen = CONST_MSS_V4,
85 .r_num_mss = 1,
86 },
87 {
88 /* no GSO: send a single MSS + 1B: fail */
89 .tlen = CONST_MSS_V4 + 1,
90 .tfail = true,
91 },
92 {
93 /* send a single MSS: will fail with GSO, because the segment
94 * logic in udp4_ufo_fragment demands a gso skb to be > MTU
95 */
96 .tlen = CONST_MSS_V4,
97 .gso_len = CONST_MSS_V4,
98 .tfail = true,
99 .r_num_mss = 1,
100 },
101 {
102 /* send a single MSS + 1B */
103 .tlen = CONST_MSS_V4 + 1,
104 .gso_len = CONST_MSS_V4,
105 .r_num_mss = 1,
106 .r_len_last = 1,
107 },
108 {
109 /* send exactly 2 MSS */
110 .tlen = CONST_MSS_V4 * 2,
111 .gso_len = CONST_MSS_V4,
112 .r_num_mss = 2,
113 },
114 {
115 /* send 2 MSS + 1B */
116 .tlen = (CONST_MSS_V4 * 2) + 1,
117 .gso_len = CONST_MSS_V4,
118 .r_num_mss = 2,
119 .r_len_last = 1,
120 },
121 {
122 /* send MAX segs */
123 .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4,
124 .gso_len = CONST_MSS_V4,
125 .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4),
126 },
127
128 {
129 /* send MAX bytes */
130 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4,
131 .gso_len = CONST_MSS_V4,
132 .r_num_mss = CONST_MAX_SEGS_V4,
133 .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 -
134 (CONST_MAX_SEGS_V4 * CONST_MSS_V4),
135 },
136 {
137 /* send MAX + 1: fail */
138 .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1,
139 .gso_len = CONST_MSS_V4,
140 .tfail = true,
141 },
142 {
143 /* send a single 1B MSS: will fail, see single MSS above */
144 .tlen = 1,
145 .gso_len = 1,
146 .tfail = true,
147 .r_num_mss = 1,
148 },
149 {
150 /* send 2 1B segments */
151 .tlen = 2,
152 .gso_len = 1,
153 .r_num_mss = 2,
154 },
155 {
156 /* send 2B + 2B + 1B segments */
157 .tlen = 5,
158 .gso_len = 2,
159 .r_num_mss = 2,
160 .r_len_last = 1,
161 },
162 {
163 /* send max number of min sized segments */
164 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
165 .gso_len = 1,
166 .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
167 },
168 {
169 /* send max number + 1 of min sized segments: fail */
170 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
171 .gso_len = 1,
172 .tfail = true,
173 },
174 {
175 /* EOL */
176 }
177};
178
179#ifndef IP6_MAX_MTU
180#define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr))
181#endif
182
183struct testcase testcases_v6[] = {
184 {
185 /* no GSO: send a single byte */
186 .tlen = 1,
187 .r_len_last = 1,
188 },
189 {
190 /* no GSO: send a single MSS */
191 .tlen = CONST_MSS_V6,
192 .r_num_mss = 1,
193 },
194 {
195 /* no GSO: send a single MSS + 1B: fail */
196 .tlen = CONST_MSS_V6 + 1,
197 .tfail = true,
198 },
199 {
200 /* send a single MSS: will fail with GSO, because the segment
201 * logic in udp4_ufo_fragment demands a gso skb to be > MTU
202 */
203 .tlen = CONST_MSS_V6,
204 .gso_len = CONST_MSS_V6,
205 .tfail = true,
206 .r_num_mss = 1,
207 },
208 {
209 /* send a single MSS + 1B */
210 .tlen = CONST_MSS_V6 + 1,
211 .gso_len = CONST_MSS_V6,
212 .r_num_mss = 1,
213 .r_len_last = 1,
214 },
215 {
216 /* send exactly 2 MSS */
217 .tlen = CONST_MSS_V6 * 2,
218 .gso_len = CONST_MSS_V6,
219 .r_num_mss = 2,
220 },
221 {
222 /* send 2 MSS + 1B */
223 .tlen = (CONST_MSS_V6 * 2) + 1,
224 .gso_len = CONST_MSS_V6,
225 .r_num_mss = 2,
226 .r_len_last = 1,
227 },
228 {
229 /* send MAX segs */
230 .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6,
231 .gso_len = CONST_MSS_V6,
232 .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6),
233 },
234
235 {
236 /* send MAX bytes */
237 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6,
238 .gso_len = CONST_MSS_V6,
239 .r_num_mss = CONST_MAX_SEGS_V6,
240 .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 -
241 (CONST_MAX_SEGS_V6 * CONST_MSS_V6),
242 },
243 {
244 /* send MAX + 1: fail */
245 .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1,
246 .gso_len = CONST_MSS_V6,
247 .tfail = true,
248 },
249 {
250 /* send a single 1B MSS: will fail, see single MSS above */
251 .tlen = 1,
252 .gso_len = 1,
253 .tfail = true,
254 .r_num_mss = 1,
255 },
256 {
257 /* send 2 1B segments */
258 .tlen = 2,
259 .gso_len = 1,
260 .r_num_mss = 2,
261 },
262 {
263 /* send 2B + 2B + 1B segments */
264 .tlen = 5,
265 .gso_len = 2,
266 .r_num_mss = 2,
267 .r_len_last = 1,
268 },
269 {
270 /* send max number of min sized segments */
271 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
272 .gso_len = 1,
273 .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
274 },
275 {
276 /* send max number + 1 of min sized segments: fail */
277 .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
278 .gso_len = 1,
279 .tfail = true,
280 },
281 {
282 /* EOL */
283 }
284};
285
286static unsigned int get_device_mtu(int fd, const char *ifname)
287{
288 struct ifreq ifr;
289
290 memset(&ifr, 0, sizeof(ifr));
291
292 strcpy(ifr.ifr_name, ifname);
293
294 if (ioctl(fd, SIOCGIFMTU, &ifr))
295 error(1, errno, "ioctl get mtu");
296
297 return ifr.ifr_mtu;
298}
299
300static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
301{
302 struct ifreq ifr;
303
304 memset(&ifr, 0, sizeof(ifr));
305
306 ifr.ifr_mtu = mtu;
307 strcpy(ifr.ifr_name, ifname);
308
309 if (ioctl(fd, SIOCSIFMTU, &ifr))
310 error(1, errno, "ioctl set mtu");
311}
312
313static void set_device_mtu(int fd, int mtu)
314{
315 int val;
316
317 val = get_device_mtu(fd, cfg_ifname);
318 fprintf(stderr, "device mtu (orig): %u\n", val);
319
320 __set_device_mtu(fd, cfg_ifname, mtu);
321 val = get_device_mtu(fd, cfg_ifname);
322 if (val != mtu)
323 error(1, 0, "unable to set device mtu to %u\n", val);
324
325 fprintf(stderr, "device mtu (test): %u\n", val);
326}
327
328static void set_pmtu_discover(int fd, bool is_ipv4)
329{
330 int level, name, val;
331
332 if (is_ipv4) {
333 level = SOL_IP;
334 name = IP_MTU_DISCOVER;
335 val = IP_PMTUDISC_DO;
336 } else {
337 level = SOL_IPV6;
338 name = IPV6_MTU_DISCOVER;
339 val = IPV6_PMTUDISC_DO;
340 }
341
342 if (setsockopt(fd, level, name, &val, sizeof(val)))
343 error(1, errno, "setsockopt path mtu");
344}
345
346static unsigned int get_path_mtu(int fd, bool is_ipv4)
347{
348 socklen_t vallen;
349 unsigned int mtu;
350 int ret;
351
352 vallen = sizeof(mtu);
353 if (is_ipv4)
354 ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen);
355 else
356 ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen);
357
358 if (ret)
359 error(1, errno, "getsockopt mtu");
360
361
362 fprintf(stderr, "path mtu (read): %u\n", mtu);
363 return mtu;
364}
365
366/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
367static void set_route_mtu(int mtu, bool is_ipv4)
368{
369 struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
370 struct nlmsghdr *nh;
371 struct rtattr *rta;
372 struct rtmsg *rt;
373 char data[NLMSG_ALIGN(sizeof(*nh)) +
374 NLMSG_ALIGN(sizeof(*rt)) +
375 NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
376 NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
377 NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
378 int fd, ret, alen, off = 0;
379
380 alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
381
382 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
383 if (fd == -1)
384 error(1, errno, "socket netlink");
385
386 memset(data, 0, sizeof(data));
387
388 nh = (void *)data;
389 nh->nlmsg_type = RTM_NEWROUTE;
390 nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
391 off += NLMSG_ALIGN(sizeof(*nh));
392
393 rt = (void *)(data + off);
394 rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
395 rt->rtm_table = RT_TABLE_MAIN;
396 rt->rtm_dst_len = alen << 3;
397 rt->rtm_protocol = RTPROT_BOOT;
398 rt->rtm_scope = RT_SCOPE_UNIVERSE;
399 rt->rtm_type = RTN_UNICAST;
400 off += NLMSG_ALIGN(sizeof(*rt));
401
402 rta = (void *)(data + off);
403 rta->rta_type = RTA_DST;
404 rta->rta_len = RTA_LENGTH(alen);
405 if (is_ipv4)
406 memcpy(RTA_DATA(rta), &addr4, alen);
407 else
408 memcpy(RTA_DATA(rta), &addr6, alen);
409 off += NLMSG_ALIGN(rta->rta_len);
410
411 rta = (void *)(data + off);
412 rta->rta_type = RTA_OIF;
413 rta->rta_len = RTA_LENGTH(sizeof(int));
414 *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
415 off += NLMSG_ALIGN(rta->rta_len);
416
417 /* MTU is a subtype in a metrics type */
418 rta = (void *)(data + off);
419 rta->rta_type = RTA_METRICS;
420 rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
421 off += NLMSG_ALIGN(rta->rta_len);
422
423 /* now fill MTU subtype. Note that it fits within above rta_len */
424 rta = (void *)(((char *) rta) + RTA_LENGTH(0));
425 rta->rta_type = RTAX_MTU;
426 rta->rta_len = RTA_LENGTH(sizeof(int));
427 *((int *)(RTA_DATA(rta))) = mtu;
428
429 nh->nlmsg_len = off;
430
431 ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
432 if (ret != off)
433 error(1, errno, "send netlink: %uB != %uB\n", ret, off);
434
435 if (close(fd))
436 error(1, errno, "close netlink");
437
438 fprintf(stderr, "route mtu (test): %u\n", mtu);
439}
440
441static bool __send_one(int fd, struct msghdr *msg, int flags)
442{
443 int ret;
444
445 ret = sendmsg(fd, msg, flags);
446 if (ret == -1 &&
447 (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL))
448 return false;
449 if (ret == -1)
450 error(1, errno, "sendmsg");
451 if (ret != msg->msg_iov->iov_len)
452 error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
453 if (msg->msg_flags)
454 error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
455
456 return true;
457}
458
459static bool send_one(int fd, int len, int gso_len,
460 struct sockaddr *addr, socklen_t alen)
461{
462 char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
463 struct msghdr msg = {0};
464 struct iovec iov = {0};
465 struct cmsghdr *cm;
466
467 iov.iov_base = buf;
468 iov.iov_len = len;
469
470 msg.msg_iov = &iov;
471 msg.msg_iovlen = 1;
472
473 msg.msg_name = addr;
474 msg.msg_namelen = alen;
475
476 if (gso_len && !cfg_do_setsockopt) {
477 msg.msg_control = control;
478 msg.msg_controllen = sizeof(control);
479
480 cm = CMSG_FIRSTHDR(&msg);
481 cm->cmsg_level = SOL_UDP;
482 cm->cmsg_type = UDP_SEGMENT;
483 cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
484 *((uint16_t *) CMSG_DATA(cm)) = gso_len;
485 }
486
487 /* If MSG_MORE, send 1 byte followed by remainder */
488 if (cfg_do_msgmore && len > 1) {
489 iov.iov_len = 1;
490 if (!__send_one(fd, &msg, MSG_MORE))
491 error(1, 0, "send 1B failed");
492
493 iov.iov_base++;
494 iov.iov_len = len - 1;
495 }
496
497 return __send_one(fd, &msg, 0);
498}
499
500static int recv_one(int fd, int flags)
501{
502 int ret;
503
504 ret = recv(fd, buf, sizeof(buf), flags);
505 if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT))
506 return 0;
507 if (ret == -1)
508 error(1, errno, "recv");
509
510 return ret;
511}
512
513static void run_one(struct testcase *test, int fdt, int fdr,
514 struct sockaddr *addr, socklen_t alen)
515{
516 int i, ret, val, mss;
517 bool sent;
518
519 fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
520 addr->sa_family == AF_INET ? 4 : 6,
521 test->tlen, test->gso_len,
522 test->tfail ? "(fail)" : "");
523
524 val = test->gso_len;
525 if (cfg_do_setsockopt) {
526 if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
527 error(1, errno, "setsockopt udp segment");
528 }
529
530 sent = send_one(fdt, test->tlen, test->gso_len, addr, alen);
531 if (sent && test->tfail)
532 error(1, 0, "send succeeded while expecting failure");
533 if (!sent && !test->tfail)
534 error(1, 0, "send failed while expecting success");
535 if (!sent)
536 return;
537
538 if (test->gso_len)
539 mss = test->gso_len;
540 else
541 mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
542
543
544 /* Recv all full MSS datagrams */
545 for (i = 0; i < test->r_num_mss; i++) {
546 ret = recv_one(fdr, 0);
547 if (ret != mss)
548 error(1, 0, "recv.%d: %d != %d", i, ret, mss);
549 }
550
551 /* Recv the non-full last datagram, if tlen was not a multiple of mss */
552 if (test->r_len_last) {
553 ret = recv_one(fdr, 0);
554 if (ret != test->r_len_last)
555 error(1, 0, "recv.%d: %d != %d (last)",
556 i, ret, test->r_len_last);
557 }
558
559 /* Verify received all data */
560 ret = recv_one(fdr, MSG_DONTWAIT);
561 if (ret)
562 error(1, 0, "recv: unexpected datagram");
563}
564
565static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen)
566{
567 struct testcase *tests, *test;
568
569 tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6;
570
571 for (test = tests; test->tlen; test++) {
572 /* if a specific test is given, then skip all others */
573 if (cfg_specific_test_id == -1 ||
574 cfg_specific_test_id == test - tests)
575 run_one(test, fdt, fdr, addr, alen);
576 }
577}
578
579static void run_test(struct sockaddr *addr, socklen_t alen)
580{
581 struct timeval tv = { .tv_usec = 100 * 1000 };
582 int fdr, fdt, val;
583
584 fdr = socket(addr->sa_family, SOCK_DGRAM, 0);
585 if (fdr == -1)
586 error(1, errno, "socket r");
587
588 if (bind(fdr, addr, alen))
589 error(1, errno, "bind");
590
591 /* Have tests fail quickly instead of hang */
592 if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
593 error(1, errno, "setsockopt rcv timeout");
594
595 fdt = socket(addr->sa_family, SOCK_DGRAM, 0);
596 if (fdt == -1)
597 error(1, errno, "socket t");
598
599 /* Do not fragment these datagrams: only succeed if GSO works */
600 set_pmtu_discover(fdt, addr->sa_family == AF_INET);
601
602 if (cfg_do_connectionless) {
603 set_device_mtu(fdt, CONST_MTU_TEST);
604 run_all(fdt, fdr, addr, alen);
605 }
606
607 if (cfg_do_connected) {
608 set_device_mtu(fdt, CONST_MTU_TEST + 100);
609 set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
610
611 if (connect(fdt, addr, alen))
612 error(1, errno, "connect");
613
614 val = get_path_mtu(fdt, addr->sa_family == AF_INET);
615 if (val != CONST_MTU_TEST)
616 error(1, 0, "bad path mtu %u\n", val);
617
618 run_all(fdt, fdr, addr, 0 /* use connected addr */);
619 }
620
621 if (close(fdt))
622 error(1, errno, "close t");
623 if (close(fdr))
624 error(1, errno, "close r");
625}
626
627static void run_test_v4(void)
628{
629 struct sockaddr_in addr = {0};
630
631 addr.sin_family = AF_INET;
632 addr.sin_port = htons(cfg_port);
633 addr.sin_addr = addr4;
634
635 run_test((void *)&addr, sizeof(addr));
636}
637
638static void run_test_v6(void)
639{
640 struct sockaddr_in6 addr = {0};
641
642 addr.sin6_family = AF_INET6;
643 addr.sin6_port = htons(cfg_port);
644 addr.sin6_addr = addr6;
645
646 run_test((void *)&addr, sizeof(addr));
647}
648
649static void parse_opts(int argc, char **argv)
650{
651 int c;
652
653 while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
654 switch (c) {
655 case '4':
656 cfg_do_ipv4 = true;
657 break;
658 case '6':
659 cfg_do_ipv6 = true;
660 break;
661 case 'c':
662 cfg_do_connected = true;
663 break;
664 case 'C':
665 cfg_do_connectionless = true;
666 break;
667 case 'm':
668 cfg_do_msgmore = true;
669 break;
670 case 's':
671 cfg_do_setsockopt = true;
672 break;
673 case 't':
674 cfg_specific_test_id = strtoul(optarg, NULL, 0);
675 break;
676 default:
677 error(1, 0, "%s: parse error", argv[0]);
678 }
679 }
680}
681
682int main(int argc, char **argv)
683{
684 parse_opts(argc, argv);
685
686 if (cfg_do_ipv4)
687 run_test_v4();
688 if (cfg_do_ipv6)
689 run_test_v6();
690
691 fprintf(stderr, "OK\n");
692 return 0;
693}
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
new file mode 100755
index 000000000000..fec24f584fe9
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -0,0 +1,29 @@
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3#
4# Run a series of udpgso regression tests
5
6echo "ipv4 cmsg"
7./in_netns.sh ./udpgso -4 -C
8
9echo "ipv4 setsockopt"
10./in_netns.sh ./udpgso -4 -C -s
11
12echo "ipv6 cmsg"
13./in_netns.sh ./udpgso -6 -C
14
15echo "ipv6 setsockopt"
16./in_netns.sh ./udpgso -6 -C -s
17
18echo "ipv4 connected"
19./in_netns.sh ./udpgso -4 -c
20
21# blocked on 2nd loopback address
22# echo "ipv6 connected"
23# ./in_netns.sh ./udpgso -6 -c
24
25echo "ipv4 msg_more"
26./in_netns.sh ./udpgso -4 -C -m
27
28echo "ipv6 msg_more"
29./in_netns.sh ./udpgso -6 -C -m
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
new file mode 100755
index 000000000000..792fa4d0285e
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -0,0 +1,74 @@
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3#
4# Run a series of udpgso benchmarks
5
6wake_children() {
7 local -r jobs="$(jobs -p)"
8
9 if [[ "${jobs}" != "" ]]; then
10 kill -1 ${jobs} 2>/dev/null
11 fi
12}
13trap wake_children EXIT
14
15run_one() {
16 local -r args=$@
17
18 ./udpgso_bench_rx &
19 ./udpgso_bench_rx -t &
20
21 ./udpgso_bench_tx ${args}
22}
23
24run_in_netns() {
25 local -r args=$@
26
27 ./in_netns.sh $0 __subprocess ${args}
28}
29
30run_udp() {
31 local -r args=$@
32
33 echo "udp"
34 run_in_netns ${args}
35
36 echo "udp gso"
37 run_in_netns ${args} -S
38
39 echo "udp gso zerocopy"
40 run_in_netns ${args} -S -z
41}
42
43run_tcp() {
44 local -r args=$@
45
46 echo "tcp"
47 run_in_netns ${args} -t
48
49 echo "tcp zerocopy"
50 run_in_netns ${args} -t -z
51}
52
53run_all() {
54 local -r core_args="-l 4"
55 local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
56 local -r ipv6_args="${core_args} -6 -D ::1"
57
58 echo "ipv4"
59 run_tcp "${ipv4_args}"
60 run_udp "${ipv4_args}"
61
62 echo "ipv6"
63 run_tcp "${ipv4_args}"
64 run_udp "${ipv6_args}"
65}
66
67if [[ $# -eq 0 ]]; then
68 run_all
69elif [[ $1 == "__subprocess" ]]; then
70 shift
71 run_one $@
72else
73 run_in_netns $@
74fi
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
new file mode 100644
index 000000000000..727cf67a3f75
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -0,0 +1,265 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#define _GNU_SOURCE
4
5#include <arpa/inet.h>
6#include <error.h>
7#include <errno.h>
8#include <limits.h>
9#include <linux/errqueue.h>
10#include <linux/if_packet.h>
11#include <linux/socket.h>
12#include <linux/sockios.h>
13#include <net/ethernet.h>
14#include <net/if.h>
15#include <netinet/ip.h>
16#include <netinet/ip6.h>
17#include <netinet/tcp.h>
18#include <netinet/udp.h>
19#include <poll.h>
20#include <sched.h>
21#include <stdbool.h>
22#include <stdio.h>
23#include <stdint.h>
24#include <stdlib.h>
25#include <string.h>
26#include <sys/ioctl.h>
27#include <sys/socket.h>
28#include <sys/stat.h>
29#include <sys/time.h>
30#include <sys/types.h>
31#include <sys/wait.h>
32#include <unistd.h>
33
34static int cfg_port = 8000;
35static bool cfg_tcp;
36static bool cfg_verify;
37
38static bool interrupted;
39static unsigned long packets, bytes;
40
41static void sigint_handler(int signum)
42{
43 if (signum == SIGINT)
44 interrupted = true;
45}
46
47static unsigned long gettimeofday_ms(void)
48{
49 struct timeval tv;
50
51 gettimeofday(&tv, NULL);
52 return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
53}
54
55static void do_poll(int fd)
56{
57 struct pollfd pfd;
58 int ret;
59
60 pfd.events = POLLIN;
61 pfd.revents = 0;
62 pfd.fd = fd;
63
64 do {
65 ret = poll(&pfd, 1, 10);
66 if (ret == -1)
67 error(1, errno, "poll");
68 if (ret == 0)
69 continue;
70 if (pfd.revents != POLLIN)
71 error(1, errno, "poll: 0x%x expected 0x%x\n",
72 pfd.revents, POLLIN);
73 } while (!ret && !interrupted);
74}
75
76static int do_socket(bool do_tcp)
77{
78 struct sockaddr_in6 addr = {0};
79 int fd, val;
80
81 fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
82 if (fd == -1)
83 error(1, errno, "socket");
84
85 val = 1 << 21;
86 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)))
87 error(1, errno, "setsockopt rcvbuf");
88 val = 1;
89 if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
90 error(1, errno, "setsockopt reuseport");
91
92 addr.sin6_family = PF_INET6;
93 addr.sin6_port = htons(cfg_port);
94 addr.sin6_addr = in6addr_any;
95 if (bind(fd, (void *) &addr, sizeof(addr)))
96 error(1, errno, "bind");
97
98 if (do_tcp) {
99 int accept_fd = fd;
100
101 if (listen(accept_fd, 1))
102 error(1, errno, "listen");
103
104 do_poll(accept_fd);
105
106 fd = accept(accept_fd, NULL, NULL);
107 if (fd == -1)
108 error(1, errno, "accept");
109 if (close(accept_fd))
110 error(1, errno, "close accept fd");
111 }
112
113 return fd;
114}
115
116/* Flush all outstanding bytes for the tcp receive queue */
117static void do_flush_tcp(int fd)
118{
119 int ret;
120
121 while (true) {
122 /* MSG_TRUNC flushes up to len bytes */
123 ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
124 if (ret == -1 && errno == EAGAIN)
125 return;
126 if (ret == -1)
127 error(1, errno, "flush");
128 if (ret == 0) {
129 /* client detached */
130 exit(0);
131 }
132
133 packets++;
134 bytes += ret;
135 }
136
137}
138
139static char sanitized_char(char val)
140{
141 return (val >= 'a' && val <= 'z') ? val : '.';
142}
143
144static void do_verify_udp(const char *data, int len)
145{
146 char cur = data[0];
147 int i;
148
149 /* verify contents */
150 if (cur < 'a' || cur > 'z')
151 error(1, 0, "data initial byte out of range");
152
153 for (i = 1; i < len; i++) {
154 if (cur == 'z')
155 cur = 'a';
156 else
157 cur++;
158
159 if (data[i] != cur)
160 error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
161 i, len,
162 sanitized_char(data[i]), data[i],
163 sanitized_char(cur), cur);
164 }
165}
166
167/* Flush all outstanding datagrams. Verify first few bytes of each. */
168static void do_flush_udp(int fd)
169{
170 static char rbuf[ETH_DATA_LEN];
171 int ret, len, budget = 256;
172
173 len = cfg_verify ? sizeof(rbuf) : 0;
174 while (budget--) {
175 /* MSG_TRUNC will make return value full datagram length */
176 ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
177 if (ret == -1 && errno == EAGAIN)
178 return;
179 if (ret == -1)
180 error(1, errno, "recv");
181 if (len) {
182 if (ret == 0)
183 error(1, errno, "recv: 0 byte datagram\n");
184
185 do_verify_udp(rbuf, ret);
186 }
187
188 packets++;
189 bytes += ret;
190 }
191}
192
193static void usage(const char *filepath)
194{
195 error(1, 0, "Usage: %s [-tv] [-p port]", filepath);
196}
197
198static void parse_opts(int argc, char **argv)
199{
200 int c;
201
202 while ((c = getopt(argc, argv, "ptv")) != -1) {
203 switch (c) {
204 case 'p':
205 cfg_port = htons(strtoul(optarg, NULL, 0));
206 break;
207 case 't':
208 cfg_tcp = true;
209 break;
210 case 'v':
211 cfg_verify = true;
212 break;
213 }
214 }
215
216 if (optind != argc)
217 usage(argv[0]);
218
219 if (cfg_tcp && cfg_verify)
220 error(1, 0, "TODO: implement verify mode for tcp");
221}
222
223static void do_recv(void)
224{
225 unsigned long tnow, treport;
226 int fd;
227
228 fd = do_socket(cfg_tcp);
229
230 treport = gettimeofday_ms() + 1000;
231 do {
232 do_poll(fd);
233
234 if (cfg_tcp)
235 do_flush_tcp(fd);
236 else
237 do_flush_udp(fd);
238
239 tnow = gettimeofday_ms();
240 if (tnow > treport) {
241 if (packets)
242 fprintf(stderr,
243 "%s rx: %6lu MB/s %8lu calls/s\n",
244 cfg_tcp ? "tcp" : "udp",
245 bytes >> 20, packets);
246 bytes = packets = 0;
247 treport = tnow + 1000;
248 }
249
250 } while (!interrupted);
251
252 if (close(fd))
253 error(1, errno, "close");
254}
255
256int main(int argc, char **argv)
257{
258 parse_opts(argc, argv);
259
260 signal(SIGINT, sigint_handler);
261
262 do_recv();
263
264 return 0;
265}
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
new file mode 100644
index 000000000000..e821564053cf
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -0,0 +1,420 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#define _GNU_SOURCE
4
5#include <arpa/inet.h>
6#include <errno.h>
7#include <error.h>
8#include <netinet/if_ether.h>
9#include <netinet/in.h>
10#include <netinet/ip.h>
11#include <netinet/ip6.h>
12#include <netinet/udp.h>
13#include <poll.h>
14#include <sched.h>
15#include <signal.h>
16#include <stdbool.h>
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
20#include <sys/socket.h>
21#include <sys/time.h>
22#include <sys/types.h>
23#include <unistd.h>
24
25#ifndef ETH_MAX_MTU
26#define ETH_MAX_MTU 0xFFFFU
27#endif
28
29#ifndef UDP_SEGMENT
30#define UDP_SEGMENT 103
31#endif
32
33#ifndef SO_ZEROCOPY
34#define SO_ZEROCOPY 60
35#endif
36
37#ifndef MSG_ZEROCOPY
38#define MSG_ZEROCOPY 0x4000000
39#endif
40
41#define NUM_PKT 100
42
43static bool cfg_cache_trash;
44static int cfg_cpu = -1;
45static int cfg_connected = true;
46static int cfg_family = PF_UNSPEC;
47static uint16_t cfg_mss;
48static int cfg_payload_len = (1472 * 42);
49static int cfg_port = 8000;
50static int cfg_runtime_ms = -1;
51static bool cfg_segment;
52static bool cfg_sendmmsg;
53static bool cfg_tcp;
54static bool cfg_zerocopy;
55
56static socklen_t cfg_alen;
57static struct sockaddr_storage cfg_dst_addr;
58
59static bool interrupted;
60static char buf[NUM_PKT][ETH_MAX_MTU];
61
62static void sigint_handler(int signum)
63{
64 if (signum == SIGINT)
65 interrupted = true;
66}
67
68static unsigned long gettimeofday_ms(void)
69{
70 struct timeval tv;
71
72 gettimeofday(&tv, NULL);
73 return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
74}
75
76static int set_cpu(int cpu)
77{
78 cpu_set_t mask;
79
80 CPU_ZERO(&mask);
81 CPU_SET(cpu, &mask);
82 if (sched_setaffinity(0, sizeof(mask), &mask))
83 error(1, 0, "setaffinity %d", cpu);
84
85 return 0;
86}
87
88static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
89{
90 struct sockaddr_in6 *addr6 = (void *) sockaddr;
91 struct sockaddr_in *addr4 = (void *) sockaddr;
92
93 switch (domain) {
94 case PF_INET:
95 addr4->sin_family = AF_INET;
96 addr4->sin_port = htons(cfg_port);
97 if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
98 error(1, 0, "ipv4 parse error: %s", str_addr);
99 break;
100 case PF_INET6:
101 addr6->sin6_family = AF_INET6;
102 addr6->sin6_port = htons(cfg_port);
103 if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
104 error(1, 0, "ipv6 parse error: %s", str_addr);
105 break;
106 default:
107 error(1, 0, "illegal domain");
108 }
109}
110
111static void flush_zerocopy(int fd)
112{
113 struct msghdr msg = {0}; /* flush */
114 int ret;
115
116 while (1) {
117 ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
118 if (ret == -1 && errno == EAGAIN)
119 break;
120 if (ret == -1)
121 error(1, errno, "errqueue");
122 if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
123 error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
124 msg.msg_flags = 0;
125 }
126}
127
128static int send_tcp(int fd, char *data)
129{
130 int ret, done = 0, count = 0;
131
132 while (done < cfg_payload_len) {
133 ret = send(fd, data + done, cfg_payload_len - done,
134 cfg_zerocopy ? MSG_ZEROCOPY : 0);
135 if (ret == -1)
136 error(1, errno, "write");
137
138 done += ret;
139 count++;
140 }
141
142 return count;
143}
144
145static int send_udp(int fd, char *data)
146{
147 int ret, total_len, len, count = 0;
148
149 total_len = cfg_payload_len;
150
151 while (total_len) {
152 len = total_len < cfg_mss ? total_len : cfg_mss;
153
154 ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
155 cfg_connected ? NULL : (void *)&cfg_dst_addr,
156 cfg_connected ? 0 : cfg_alen);
157 if (ret == -1)
158 error(1, errno, "write");
159 if (ret != len)
160 error(1, errno, "write: %uB != %uB\n", ret, len);
161
162 total_len -= len;
163 count++;
164 }
165
166 return count;
167}
168
169static int send_udp_sendmmsg(int fd, char *data)
170{
171 const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
172 struct mmsghdr mmsgs[max_nr_msg];
173 struct iovec iov[max_nr_msg];
174 unsigned int off = 0, left;
175 int i = 0, ret;
176
177 memset(mmsgs, 0, sizeof(mmsgs));
178
179 left = cfg_payload_len;
180 while (left) {
181 if (i == max_nr_msg)
182 error(1, 0, "sendmmsg: exceeds max_nr_msg");
183
184 iov[i].iov_base = data + off;
185 iov[i].iov_len = cfg_mss < left ? cfg_mss : left;
186
187 mmsgs[i].msg_hdr.msg_iov = iov + i;
188 mmsgs[i].msg_hdr.msg_iovlen = 1;
189
190 off += iov[i].iov_len;
191 left -= iov[i].iov_len;
192 i++;
193 }
194
195 ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0);
196 if (ret == -1)
197 error(1, errno, "sendmmsg");
198
199 return ret;
200}
201
202static void send_udp_segment_cmsg(struct cmsghdr *cm)
203{
204 uint16_t *valp;
205
206 cm->cmsg_level = SOL_UDP;
207 cm->cmsg_type = UDP_SEGMENT;
208 cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss));
209 valp = (void *)CMSG_DATA(cm);
210 *valp = cfg_mss;
211}
212
213static int send_udp_segment(int fd, char *data)
214{
215 char control[CMSG_SPACE(sizeof(cfg_mss))] = {0};
216 struct msghdr msg = {0};
217 struct iovec iov = {0};
218 int ret;
219
220 iov.iov_base = data;
221 iov.iov_len = cfg_payload_len;
222
223 msg.msg_iov = &iov;
224 msg.msg_iovlen = 1;
225
226 msg.msg_control = control;
227 msg.msg_controllen = sizeof(control);
228 send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
229
230 msg.msg_name = (void *)&cfg_dst_addr;
231 msg.msg_namelen = cfg_alen;
232
233 ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0);
234 if (ret == -1)
235 error(1, errno, "sendmsg");
236 if (ret != iov.iov_len)
237 error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
238
239 return 1;
240}
241
242static void usage(const char *filepath)
243{
244 error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]",
245 filepath);
246}
247
248static void parse_opts(int argc, char **argv)
249{
250 int max_len, hdrlen;
251 int c;
252
253 while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) {
254 switch (c) {
255 case '4':
256 if (cfg_family != PF_UNSPEC)
257 error(1, 0, "Pass one of -4 or -6");
258 cfg_family = PF_INET;
259 cfg_alen = sizeof(struct sockaddr_in);
260 break;
261 case '6':
262 if (cfg_family != PF_UNSPEC)
263 error(1, 0, "Pass one of -4 or -6");
264 cfg_family = PF_INET6;
265 cfg_alen = sizeof(struct sockaddr_in6);
266 break;
267 case 'c':
268 cfg_cache_trash = true;
269 break;
270 case 'C':
271 cfg_cpu = strtol(optarg, NULL, 0);
272 break;
273 case 'D':
274 setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
275 break;
276 case 'l':
277 cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
278 break;
279 case 'm':
280 cfg_sendmmsg = true;
281 break;
282 case 'p':
283 cfg_port = strtoul(optarg, NULL, 0);
284 break;
285 case 's':
286 cfg_payload_len = strtoul(optarg, NULL, 0);
287 break;
288 case 'S':
289 cfg_segment = true;
290 break;
291 case 't':
292 cfg_tcp = true;
293 break;
294 case 'u':
295 cfg_connected = false;
296 break;
297 case 'z':
298 cfg_zerocopy = true;
299 break;
300 }
301 }
302
303 if (optind != argc)
304 usage(argv[0]);
305
306 if (cfg_family == PF_UNSPEC)
307 error(1, 0, "must pass one of -4 or -6");
308 if (cfg_tcp && !cfg_connected)
309 error(1, 0, "connectionless tcp makes no sense");
310 if (cfg_segment && cfg_sendmmsg)
311 error(1, 0, "cannot combine segment offload and sendmmsg");
312
313 if (cfg_family == PF_INET)
314 hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
315 else
316 hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr);
317
318 cfg_mss = ETH_DATA_LEN - hdrlen;
319 max_len = ETH_MAX_MTU - hdrlen;
320
321 if (cfg_payload_len > max_len)
322 error(1, 0, "payload length %u exceeds max %u",
323 cfg_payload_len, max_len);
324}
325
326static void set_pmtu_discover(int fd, bool is_ipv4)
327{
328 int level, name, val;
329
330 if (is_ipv4) {
331 level = SOL_IP;
332 name = IP_MTU_DISCOVER;
333 val = IP_PMTUDISC_DO;
334 } else {
335 level = SOL_IPV6;
336 name = IPV6_MTU_DISCOVER;
337 val = IPV6_PMTUDISC_DO;
338 }
339
340 if (setsockopt(fd, level, name, &val, sizeof(val)))
341 error(1, errno, "setsockopt path mtu");
342}
343
344int main(int argc, char **argv)
345{
346 unsigned long num_msgs, num_sends;
347 unsigned long tnow, treport, tstop;
348 int fd, i, val;
349
350 parse_opts(argc, argv);
351
352 if (cfg_cpu > 0)
353 set_cpu(cfg_cpu);
354
355 for (i = 0; i < sizeof(buf[0]); i++)
356 buf[0][i] = 'a' + (i % 26);
357 for (i = 1; i < NUM_PKT; i++)
358 memcpy(buf[i], buf[0], sizeof(buf[0]));
359
360 signal(SIGINT, sigint_handler);
361
362 fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
363 if (fd == -1)
364 error(1, errno, "socket");
365
366 if (cfg_zerocopy) {
367 val = 1;
368 if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
369 error(1, errno, "setsockopt zerocopy");
370 }
371
372 if (cfg_connected &&
373 connect(fd, (void *)&cfg_dst_addr, cfg_alen))
374 error(1, errno, "connect");
375
376 if (cfg_segment)
377 set_pmtu_discover(fd, cfg_family == PF_INET);
378
379 num_msgs = num_sends = 0;
380 tnow = gettimeofday_ms();
381 tstop = tnow + cfg_runtime_ms;
382 treport = tnow + 1000;
383
384 i = 0;
385 do {
386 if (cfg_tcp)
387 num_sends += send_tcp(fd, buf[i]);
388 else if (cfg_segment)
389 num_sends += send_udp_segment(fd, buf[i]);
390 else if (cfg_sendmmsg)
391 num_sends += send_udp_sendmmsg(fd, buf[i]);
392 else
393 num_sends += send_udp(fd, buf[i]);
394 num_msgs++;
395
396 if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
397 flush_zerocopy(fd);
398
399 tnow = gettimeofday_ms();
400 if (tnow > treport) {
401 fprintf(stderr,
402 "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
403 cfg_tcp ? "tcp" : "udp",
404 (num_msgs * cfg_payload_len) >> 20,
405 num_sends, num_msgs);
406 num_msgs = num_sends = 0;
407 treport = tnow + 1000;
408 }
409
410 /* cold cache when writing buffer */
411 if (cfg_cache_trash)
412 i = ++i < NUM_PKT ? i : 0;
413
414 } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
415
416 if (close(fd))
417 error(1, errno, "close");
418
419 return 0;
420}
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index f6b1338730db..201b598558b9 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -17,7 +17,6 @@ SUB_DIRS = alignment \
17 benchmarks \ 17 benchmarks \
18 cache_shape \ 18 cache_shape \
19 copyloops \ 19 copyloops \
20 context_switch \
21 dscr \ 20 dscr \
22 mm \ 21 mm \
23 pmu \ 22 pmu \
diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore
index 1d980e3d7039..9d383073b7ad 100644
--- a/tools/testing/selftests/powerpc/alignment/.gitignore
+++ b/tools/testing/selftests/powerpc/alignment/.gitignore
@@ -3,3 +3,4 @@ copy_first_unaligned
3paste_unaligned 3paste_unaligned
4paste_last_unaligned 4paste_last_unaligned
5copy_paste_unaligned_common 5copy_paste_unaligned_common
6alignment_handler
diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
index 3c9c144192be..c14b0fc1edde 100644
--- a/tools/testing/selftests/powerpc/benchmarks/exec_target.c
+++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
@@ -6,8 +6,11 @@
6 * Copyright 2018, Anton Blanchard, IBM Corp. 6 * Copyright 2018, Anton Blanchard, IBM Corp.
7 */ 7 */
8 8
9void _exit(int); 9#define _GNU_SOURCE
10#include <unistd.h>
11#include <sys/syscall.h>
12
10void _start(void) 13void _start(void)
11{ 14{
12 _exit(0); 15 syscall(SYS_exit, 0);
13} 16}
diff --git a/tools/testing/selftests/powerpc/context_switch/.gitignore b/tools/testing/selftests/powerpc/context_switch/.gitignore
deleted file mode 100644
index c1431af7b51c..000000000000
--- a/tools/testing/selftests/powerpc/context_switch/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
1cp_abort
diff --git a/tools/testing/selftests/powerpc/context_switch/Makefile b/tools/testing/selftests/powerpc/context_switch/Makefile
deleted file mode 100644
index e9351bb4285d..000000000000
--- a/tools/testing/selftests/powerpc/context_switch/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
1TEST_GEN_PROGS := cp_abort
2
3include ../../lib.mk
4
5$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/context_switch/cp_abort.c b/tools/testing/selftests/powerpc/context_switch/cp_abort.c
deleted file mode 100644
index 5a5b55afda0e..000000000000
--- a/tools/testing/selftests/powerpc/context_switch/cp_abort.c
+++ /dev/null
@@ -1,110 +0,0 @@
1/*
2 * Adapted from Anton Blanchard's context switch microbenchmark.
3 *
4 * Copyright 2009, Anton Blanchard, IBM Corporation.
5 * Copyright 2016, Mikey Neuling, Chris Smart, IBM Corporation.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * This program tests the copy paste abort functionality of a P9
13 * (or later) by setting up two processes on the same CPU, one
14 * which executes the copy instruction and the other which
15 * executes paste.
16 *
17 * The paste instruction should never succeed, as the cp_abort
18 * instruction is called by the kernel during a context switch.
19 *
20 */
21
22#define _GNU_SOURCE
23
24#include <stdio.h>
25#include <unistd.h>
26#include <stdlib.h>
27#include "utils.h"
28#include <sched.h>
29
30#define READ_FD 0
31#define WRITE_FD 1
32
33#define NUM_LOOPS 1000
34
35/* This defines the "paste" instruction from Power ISA 3.0 Book II, section 4.4. */
36#define PASTE(RA, RB, L, RC) \
37 .long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) | (RC) << (31-31))
38
39int paste(void *i)
40{
41 int cr;
42
43 asm volatile(str(PASTE(0, %1, 1, 1))";"
44 "mfcr %0;"
45 : "=r" (cr)
46 : "b" (i)
47 : "memory"
48 );
49 return cr;
50}
51
52/* This defines the "copy" instruction from Power ISA 3.0 Book II, section 4.4. */
53#define COPY(RA, RB, L) \
54 .long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10))
55
56void copy(void *i)
57{
58 asm volatile(str(COPY(0, %0, 1))";"
59 :
60 : "b" (i)
61 : "memory"
62 );
63}
64
65int test_cp_abort(void)
66{
67 /* 128 bytes for a full cache line */
68 char buf[128] __cacheline_aligned;
69 cpu_set_t cpuset;
70 int fd1[2], fd2[2], pid;
71 char c;
72
73 /* only run this test on a P9 or later */
74 SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
75
76 /*
77 * Run both processes on the same CPU, so that copy is more likely
78 * to leak into a paste.
79 */
80 CPU_ZERO(&cpuset);
81 CPU_SET(pick_online_cpu(), &cpuset);
82 FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset));
83
84 FAIL_IF(pipe(fd1) || pipe(fd2));
85
86 pid = fork();
87 FAIL_IF(pid < 0);
88
89 if (!pid) {
90 for (int i = 0; i < NUM_LOOPS; i++) {
91 FAIL_IF((write(fd1[WRITE_FD], &c, 1)) != 1);
92 FAIL_IF((read(fd2[READ_FD], &c, 1)) != 1);
93 /* A paste succeeds if CR0 EQ bit is set */
94 FAIL_IF(paste(buf) & 0x20000000);
95 }
96 } else {
97 for (int i = 0; i < NUM_LOOPS; i++) {
98 FAIL_IF((read(fd1[READ_FD], &c, 1)) != 1);
99 copy(buf);
100 FAIL_IF((write(fd2[WRITE_FD], &c, 1) != 1));
101 }
102 }
103 return 0;
104
105}
106
107int main(int argc, char *argv[])
108{
109 return test_harness(test_cp_abort, "cp_abort");
110}
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 4afdebcce4cd..7f348c059bc2 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -54,6 +54,7 @@
54#define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */ 54#define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */
55#define SPRN_DSCR 0x03 /* Data Stream Control Register */ 55#define SPRN_DSCR 0x03 /* Data Stream Control Register */
56#define SPRN_PPR 896 /* Program Priority Register */ 56#define SPRN_PPR 896 /* Program Priority Register */
57#define SPRN_AMR 13 /* Authority Mask Register - problem state */
57 58
58/* TEXASR register bits */ 59/* TEXASR register bits */
59#define TEXASR_FC 0xFE00000000000000 60#define TEXASR_FC 0xFE00000000000000
diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore
index 349acfafc95b..07ec449a2767 100644
--- a/tools/testing/selftests/powerpc/ptrace/.gitignore
+++ b/tools/testing/selftests/powerpc/ptrace/.gitignore
@@ -8,3 +8,5 @@ ptrace-vsx
8ptrace-tm-vsx 8ptrace-tm-vsx
9ptrace-tm-spd-vsx 9ptrace-tm-spd-vsx
10ptrace-tm-spr 10ptrace-tm-spr
11ptrace-hwbreak
12perf-hwbreak
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 480305266504..28f5b781a553 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -1,7 +1,8 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ 2TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
3 ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ 3 ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
4 ptrace-tm-spd-vsx ptrace-tm-spr 4 ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
5 perf-hwbreak
5 6
6include ../../lib.mk 7include ../../lib.mk
7 8
@@ -9,6 +10,9 @@ all: $(TEST_PROGS)
9 10
10CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie 11CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
11 12
13ptrace-pkey core-pkey: child.h
14ptrace-pkey core-pkey: LDLIBS += -pthread
15
12$(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h 16$(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
13 17
14clean: 18clean:
diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h
new file mode 100644
index 000000000000..d7275b7b33dc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/child.h
@@ -0,0 +1,139 @@
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Helper functions to sync execution between parent and child processes.
4 *
5 * Copyright 2018, Thiago Jung Bauermann, IBM Corporation.
6 */
7#include <stdio.h>
8#include <stdbool.h>
9#include <semaphore.h>
10
11/*
12 * Information in a shared memory location for synchronization between child and
13 * parent.
14 */
15struct child_sync {
16 /* The parent waits on this semaphore. */
17 sem_t sem_parent;
18
19 /* If true, the child should give up as well. */
20 bool parent_gave_up;
21
22 /* The child waits on this semaphore. */
23 sem_t sem_child;
24
25 /* If true, the parent should give up as well. */
26 bool child_gave_up;
27};
28
29#define CHILD_FAIL_IF(x, sync) \
30 do { \
31 if (x) { \
32 fprintf(stderr, \
33 "[FAIL] Test FAILED on line %d\n", __LINE__); \
34 (sync)->child_gave_up = true; \
35 prod_parent(sync); \
36 return 1; \
37 } \
38 } while (0)
39
40#define PARENT_FAIL_IF(x, sync) \
41 do { \
42 if (x) { \
43 fprintf(stderr, \
44 "[FAIL] Test FAILED on line %d\n", __LINE__); \
45 (sync)->parent_gave_up = true; \
46 prod_child(sync); \
47 return 1; \
48 } \
49 } while (0)
50
51#define PARENT_SKIP_IF_UNSUPPORTED(x, sync) \
52 do { \
53 if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \
54 (sync)->parent_gave_up = true; \
55 prod_child(sync); \
56 SKIP_IF(1); \
57 } \
58 } while (0)
59
60int init_child_sync(struct child_sync *sync)
61{
62 int ret;
63
64 ret = sem_init(&sync->sem_parent, 1, 0);
65 if (ret) {
66 perror("Semaphore initialization failed");
67 return 1;
68 }
69
70 ret = sem_init(&sync->sem_child, 1, 0);
71 if (ret) {
72 perror("Semaphore initialization failed");
73 return 1;
74 }
75
76 return 0;
77}
78
79void destroy_child_sync(struct child_sync *sync)
80{
81 sem_destroy(&sync->sem_parent);
82 sem_destroy(&sync->sem_child);
83}
84
85int wait_child(struct child_sync *sync)
86{
87 int ret;
88
89 /* Wait until the child prods us. */
90 ret = sem_wait(&sync->sem_parent);
91 if (ret) {
92 perror("Error waiting for child");
93 return 1;
94 }
95
96 return sync->child_gave_up;
97}
98
99int prod_child(struct child_sync *sync)
100{
101 int ret;
102
103 /* Unblock the child now. */
104 ret = sem_post(&sync->sem_child);
105 if (ret) {
106 perror("Error prodding child");
107 return 1;
108 }
109
110 return 0;
111}
112
113int wait_parent(struct child_sync *sync)
114{
115 int ret;
116
117 /* Wait until the parent prods us. */
118 ret = sem_wait(&sync->sem_child);
119 if (ret) {
120 perror("Error waiting for parent");
121 return 1;
122 }
123
124 return sync->parent_gave_up;
125}
126
127int prod_parent(struct child_sync *sync)
128{
129 int ret;
130
131 /* Unblock the parent now. */
132 ret = sem_post(&sync->sem_parent);
133 if (ret) {
134 perror("Error prodding parent");
135 return 1;
136 }
137
138 return 0;
139}
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
new file mode 100644
index 000000000000..36bc312b1f5c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -0,0 +1,461 @@
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Ptrace test for Memory Protection Key registers
4 *
5 * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
6 * Copyright (C) 2018 IBM Corporation.
7 */
8#include <limits.h>
9#include <linux/kernel.h>
10#include <sys/mman.h>
11#include <sys/types.h>
12#include <sys/stat.h>
13#include <sys/time.h>
14#include <sys/resource.h>
15#include <fcntl.h>
16#include <unistd.h>
17#include "ptrace.h"
18#include "child.h"
19
20#ifndef __NR_pkey_alloc
21#define __NR_pkey_alloc 384
22#endif
23
24#ifndef __NR_pkey_free
25#define __NR_pkey_free 385
26#endif
27
28#ifndef NT_PPC_PKEY
29#define NT_PPC_PKEY 0x110
30#endif
31
32#ifndef PKEY_DISABLE_EXECUTE
33#define PKEY_DISABLE_EXECUTE 0x4
34#endif
35
36#define AMR_BITS_PER_PKEY 2
37#define PKEY_REG_BITS (sizeof(u64) * 8)
38#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
39
40#define CORE_FILE_LIMIT (5 * 1024 * 1024) /* 5 MB should be enough */
41
42static const char core_pattern_file[] = "/proc/sys/kernel/core_pattern";
43
44static const char user_write[] = "[User Write (Running)]";
45static const char core_read_running[] = "[Core Read (Running)]";
46
47/* Information shared between the parent and the child. */
48struct shared_info {
49 struct child_sync child_sync;
50
51 /* AMR value the parent expects to read in the core file. */
52 unsigned long amr;
53
54 /* IAMR value the parent expects to read in the core file. */
55 unsigned long iamr;
56
57 /* UAMOR value the parent expects to read in the core file. */
58 unsigned long uamor;
59
60 /* When the child crashed. */
61 time_t core_time;
62};
63
64static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
65{
66 return syscall(__NR_pkey_alloc, flags, init_access_rights);
67}
68
69static int sys_pkey_free(int pkey)
70{
71 return syscall(__NR_pkey_free, pkey);
72}
73
74static int increase_core_file_limit(void)
75{
76 struct rlimit rlim;
77 int ret;
78
79 ret = getrlimit(RLIMIT_CORE, &rlim);
80 FAIL_IF(ret);
81
82 if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
83 rlim.rlim_cur = CORE_FILE_LIMIT;
84
85 if (rlim.rlim_max != RLIM_INFINITY &&
86 rlim.rlim_max < CORE_FILE_LIMIT)
87 rlim.rlim_max = CORE_FILE_LIMIT;
88
89 ret = setrlimit(RLIMIT_CORE, &rlim);
90 FAIL_IF(ret);
91 }
92
93 ret = getrlimit(RLIMIT_FSIZE, &rlim);
94 FAIL_IF(ret);
95
96 if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
97 rlim.rlim_cur = CORE_FILE_LIMIT;
98
99 if (rlim.rlim_max != RLIM_INFINITY &&
100 rlim.rlim_max < CORE_FILE_LIMIT)
101 rlim.rlim_max = CORE_FILE_LIMIT;
102
103 ret = setrlimit(RLIMIT_FSIZE, &rlim);
104 FAIL_IF(ret);
105 }
106
107 return TEST_PASS;
108}
109
110static int child(struct shared_info *info)
111{
112 bool disable_execute = true;
113 int pkey1, pkey2, pkey3;
114 int *ptr, ret;
115
116 /* Wait until parent fills out the initial register values. */
117 ret = wait_parent(&info->child_sync);
118 if (ret)
119 return ret;
120
121 ret = increase_core_file_limit();
122 FAIL_IF(ret);
123
124 /* Get some pkeys so that we can change their bits in the AMR. */
125 pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
126 if (pkey1 < 0) {
127 pkey1 = sys_pkey_alloc(0, 0);
128 FAIL_IF(pkey1 < 0);
129
130 disable_execute = false;
131 }
132
133 pkey2 = sys_pkey_alloc(0, 0);
134 FAIL_IF(pkey2 < 0);
135
136 pkey3 = sys_pkey_alloc(0, 0);
137 FAIL_IF(pkey3 < 0);
138
139 info->amr |= 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2);
140
141 if (disable_execute)
142 info->iamr |= 1ul << pkeyshift(pkey1);
143
144 info->uamor |= 3ul << pkeyshift(pkey1) | 3ul << pkeyshift(pkey2);
145
146 printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
147 user_write, info->amr, pkey1, pkey2, pkey3);
148
149 mtspr(SPRN_AMR, info->amr);
150
151 /*
152 * We won't use pkey3. This tests whether the kernel restores the UAMOR
153 * permissions after a key is freed.
154 */
155 sys_pkey_free(pkey3);
156
157 info->core_time = time(NULL);
158
159 /* Crash. */
160 ptr = 0;
161 *ptr = 1;
162
163 /* Shouldn't get here. */
164 FAIL_IF(true);
165
166 return TEST_FAIL;
167}
168
169/* Return file size if filename exists and pass sanity check, or zero if not. */
170static off_t try_core_file(const char *filename, struct shared_info *info,
171 pid_t pid)
172{
173 struct stat buf;
174 int ret;
175
176 ret = stat(filename, &buf);
177 if (ret == -1)
178 return TEST_FAIL;
179
180 /* Make sure we're not using a stale core file. */
181 return buf.st_mtime >= info->core_time ? buf.st_size : TEST_FAIL;
182}
183
184static Elf64_Nhdr *next_note(Elf64_Nhdr *nhdr)
185{
186 return (void *) nhdr + sizeof(*nhdr) +
187 __ALIGN_KERNEL(nhdr->n_namesz, 4) +
188 __ALIGN_KERNEL(nhdr->n_descsz, 4);
189}
190
191static int check_core_file(struct shared_info *info, Elf64_Ehdr *ehdr,
192 off_t core_size)
193{
194 unsigned long *regs;
195 Elf64_Phdr *phdr;
196 Elf64_Nhdr *nhdr;
197 size_t phdr_size;
198 void *p = ehdr, *note;
199 int ret;
200
201 ret = memcmp(ehdr->e_ident, ELFMAG, SELFMAG);
202 FAIL_IF(ret);
203
204 FAIL_IF(ehdr->e_type != ET_CORE);
205 FAIL_IF(ehdr->e_machine != EM_PPC64);
206 FAIL_IF(ehdr->e_phoff == 0 || ehdr->e_phnum == 0);
207
208 /*
209 * e_phnum is at most 65535 so calculating the size of the
210 * program header cannot overflow.
211 */
212 phdr_size = sizeof(*phdr) * ehdr->e_phnum;
213
214 /* Sanity check the program header table location. */
215 FAIL_IF(ehdr->e_phoff + phdr_size < ehdr->e_phoff);
216 FAIL_IF(ehdr->e_phoff + phdr_size > core_size);
217
218 /* Find the PT_NOTE segment. */
219 for (phdr = p + ehdr->e_phoff;
220 (void *) phdr < p + ehdr->e_phoff + phdr_size;
221 phdr += ehdr->e_phentsize)
222 if (phdr->p_type == PT_NOTE)
223 break;
224
225 FAIL_IF((void *) phdr >= p + ehdr->e_phoff + phdr_size);
226
227 /* Find the NT_PPC_PKEY note. */
228 for (nhdr = p + phdr->p_offset;
229 (void *) nhdr < p + phdr->p_offset + phdr->p_filesz;
230 nhdr = next_note(nhdr))
231 if (nhdr->n_type == NT_PPC_PKEY)
232 break;
233
234 FAIL_IF((void *) nhdr >= p + phdr->p_offset + phdr->p_filesz);
235 FAIL_IF(nhdr->n_descsz == 0);
236
237 p = nhdr;
238 note = p + sizeof(*nhdr) + __ALIGN_KERNEL(nhdr->n_namesz, 4);
239
240 regs = (unsigned long *) note;
241
242 printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
243 core_read_running, regs[0], regs[1], regs[2]);
244
245 FAIL_IF(regs[0] != info->amr);
246 FAIL_IF(regs[1] != info->iamr);
247 FAIL_IF(regs[2] != info->uamor);
248
249 return TEST_PASS;
250}
251
252static int parent(struct shared_info *info, pid_t pid)
253{
254 char *filenames, *filename[3];
255 int fd, i, ret, status;
256 unsigned long regs[3];
257 off_t core_size;
258 void *core;
259
260 /*
261 * Get the initial values for AMR, IAMR and UAMOR and communicate them
262 * to the child.
263 */
264 ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
265 PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
266 PARENT_FAIL_IF(ret, &info->child_sync);
267
268 info->amr = regs[0];
269 info->iamr = regs[1];
270 info->uamor = regs[2];
271
272 /* Wake up child so that it can set itself up. */
273 ret = prod_child(&info->child_sync);
274 PARENT_FAIL_IF(ret, &info->child_sync);
275
276 ret = wait(&status);
277 if (ret != pid) {
278 printf("Child's exit status not captured\n");
279 return TEST_FAIL;
280 } else if (!WIFSIGNALED(status) || !WCOREDUMP(status)) {
281 printf("Child didn't dump core\n");
282 return TEST_FAIL;
283 }
284
285 /* Construct array of core file names to try. */
286
287 filename[0] = filenames = malloc(PATH_MAX);
288 if (!filenames) {
289 perror("Error allocating memory");
290 return TEST_FAIL;
291 }
292
293 ret = snprintf(filename[0], PATH_MAX, "core-pkey.%d", pid);
294 if (ret < 0 || ret >= PATH_MAX) {
295 ret = TEST_FAIL;
296 goto out;
297 }
298
299 filename[1] = filename[0] + ret + 1;
300 ret = snprintf(filename[1], PATH_MAX - ret - 1, "core.%d", pid);
301 if (ret < 0 || ret >= PATH_MAX - ret - 1) {
302 ret = TEST_FAIL;
303 goto out;
304 }
305 filename[2] = "core";
306
307 for (i = 0; i < 3; i++) {
308 core_size = try_core_file(filename[i], info, pid);
309 if (core_size != TEST_FAIL)
310 break;
311 }
312
313 if (i == 3) {
314 printf("Couldn't find core file\n");
315 ret = TEST_FAIL;
316 goto out;
317 }
318
319 fd = open(filename[i], O_RDONLY);
320 if (fd == -1) {
321 perror("Error opening core file");
322 ret = TEST_FAIL;
323 goto out;
324 }
325
326 core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0);
327 if (core == (void *) -1) {
328 perror("Error mmaping core file");
329 ret = TEST_FAIL;
330 goto out;
331 }
332
333 ret = check_core_file(info, core, core_size);
334
335 munmap(core, core_size);
336 close(fd);
337 unlink(filename[i]);
338
339 out:
340 free(filenames);
341
342 return ret;
343}
344
345static int write_core_pattern(const char *core_pattern)
346{
347 size_t len = strlen(core_pattern), ret;
348 FILE *f;
349
350 f = fopen(core_pattern_file, "w");
351 if (!f) {
352 perror("Error writing to core_pattern file");
353 return TEST_FAIL;
354 }
355
356 ret = fwrite(core_pattern, 1, len, f);
357 fclose(f);
358 if (ret != len) {
359 perror("Error writing to core_pattern file");
360 return TEST_FAIL;
361 }
362
363 return TEST_PASS;
364}
365
366static int setup_core_pattern(char **core_pattern_, bool *changed_)
367{
368 FILE *f;
369 char *core_pattern;
370 int ret;
371
372 core_pattern = malloc(PATH_MAX);
373 if (!core_pattern) {
374 perror("Error allocating memory");
375 return TEST_FAIL;
376 }
377
378 f = fopen(core_pattern_file, "r");
379 if (!f) {
380 perror("Error opening core_pattern file");
381 ret = TEST_FAIL;
382 goto out;
383 }
384
385 ret = fread(core_pattern, 1, PATH_MAX, f);
386 fclose(f);
387 if (!ret) {
388 perror("Error reading core_pattern file");
389 ret = TEST_FAIL;
390 goto out;
391 }
392
393 /* Check whether we can predict the name of the core file. */
394 if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p"))
395 *changed_ = false;
396 else {
397 ret = write_core_pattern("core-pkey.%p");
398 if (ret)
399 goto out;
400
401 *changed_ = true;
402 }
403
404 *core_pattern_ = core_pattern;
405 ret = TEST_PASS;
406
407 out:
408 if (ret)
409 free(core_pattern);
410
411 return ret;
412}
413
414static int core_pkey(void)
415{
416 char *core_pattern;
417 bool changed_core_pattern;
418 struct shared_info *info;
419 int shm_id;
420 int ret;
421 pid_t pid;
422
423 ret = setup_core_pattern(&core_pattern, &changed_core_pattern);
424 if (ret)
425 return ret;
426
427 shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
428 info = shmat(shm_id, NULL, 0);
429
430 ret = init_child_sync(&info->child_sync);
431 if (ret)
432 return ret;
433
434 pid = fork();
435 if (pid < 0) {
436 perror("fork() failed");
437 ret = TEST_FAIL;
438 } else if (pid == 0)
439 ret = child(info);
440 else
441 ret = parent(info, pid);
442
443 shmdt(info);
444
445 if (pid) {
446 destroy_child_sync(&info->child_sync);
447 shmctl(shm_id, IPC_RMID, NULL);
448
449 if (changed_core_pattern)
450 write_core_pattern(core_pattern);
451 }
452
453 free(core_pattern);
454
455 return ret;
456}
457
458int main(int argc, char *argv[])
459{
460 return test_harness(core_pkey, "core_pkey");
461}
diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
new file mode 100644
index 000000000000..60df0b5e628a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -0,0 +1,195 @@
1/*
2 * perf events self profiling example test case for hw breakpoints.
3 *
4 * This tests perf PERF_TYPE_BREAKPOINT parameters
5 * 1) tests all variants of the break on read/write flags
6 * 2) tests exclude_user == 0 and 1
7 * 3) test array matches (if DAWR is supported))
8 * 4) test different numbers of breakpoints matches
9 *
10 * Configure this breakpoint, then read and write the data a number of
11 * times. Then check the output count from perf is as expected.
12 *
13 * Based on:
14 * http://ozlabs.org/~anton/junkcode/perf_events_example1.c
15 *
16 * Copyright (C) 2018 Michael Neuling, IBM Corporation.
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24#include <unistd.h>
25#include <assert.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <sys/ioctl.h>
30#include <elf.h>
31#include <pthread.h>
32#include <sys/syscall.h>
33#include <linux/perf_event.h>
34#include <linux/hw_breakpoint.h>
35#include "utils.h"
36
37#define MAX_LOOPS 10000
38
39#define DAWR_LENGTH_MAX ((0x3f + 1) * 8)
40
41static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
42 int cpu, int group_fd,
43 unsigned long flags)
44{
45 attr->size = sizeof(*attr);
46 return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
47}
48
49static inline bool breakpoint_test(int len)
50{
51 struct perf_event_attr attr;
52 int fd;
53
54 /* setup counters */
55 memset(&attr, 0, sizeof(attr));
56 attr.disabled = 1;
57 attr.type = PERF_TYPE_BREAKPOINT;
58 attr.bp_type = HW_BREAKPOINT_R;
59 /* bp_addr can point anywhere but needs to be aligned */
60 attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800;
61 attr.bp_len = len;
62 fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
63 if (fd < 0)
64 return false;
65 close(fd);
66 return true;
67}
68
69static inline bool perf_breakpoint_supported(void)
70{
71 return breakpoint_test(4);
72}
73
74static inline bool dawr_supported(void)
75{
76 return breakpoint_test(DAWR_LENGTH_MAX);
77}
78
79static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
80{
81 int i,j;
82 struct perf_event_attr attr;
83 size_t res;
84 unsigned long long breaks, needed;
85 int readint;
86 int readintarraybig[2*DAWR_LENGTH_MAX/sizeof(int)];
87 int *readintalign;
88 volatile int *ptr;
89 int break_fd;
90 int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */
91 volatile int *k;
92
93 /* align to 0x400 boundary as required by DAWR */
94 readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) &
95 0xfffffffffffff800);
96
97 ptr = &readint;
98 if (arraytest)
99 ptr = &readintalign[0];
100
101 /* setup counters */
102 memset(&attr, 0, sizeof(attr));
103 attr.disabled = 1;
104 attr.type = PERF_TYPE_BREAKPOINT;
105 attr.bp_type = readwriteflag;
106 attr.bp_addr = (__u64)ptr;
107 attr.bp_len = sizeof(int);
108 if (arraytest)
109 attr.bp_len = DAWR_LENGTH_MAX;
110 attr.exclude_user = exclude_user;
111 break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
112 if (break_fd < 0) {
113 perror("sys_perf_event_open");
114 exit(1);
115 }
116
117 /* start counters */
118 ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
119
120 /* Test a bunch of reads and writes */
121 k = &readint;
122 for (i = 0; i < loop_num; i++) {
123 if (arraytest)
124 k = &(readintalign[i % (DAWR_LENGTH_MAX/sizeof(int))]);
125
126 j = *k;
127 *k = j;
128 }
129
130 /* stop counters */
131 ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
132
133 /* read and check counters */
134 res = read(break_fd, &breaks, sizeof(unsigned long long));
135 assert(res == sizeof(unsigned long long));
136 /* we read and write each loop, so subtract the ones we are counting */
137 needed = 0;
138 if (readwriteflag & HW_BREAKPOINT_R)
139 needed += loop_num;
140 if (readwriteflag & HW_BREAKPOINT_W)
141 needed += loop_num;
142 needed = needed * (1 - exclude_user);
143 printf("TESTED: addr:0x%lx brks:% 8lld loops:% 8i rw:%i !user:%i array:%i\n",
144 (unsigned long int)ptr, breaks, loop_num, readwriteflag, exclude_user, arraytest);
145 if (breaks != needed) {
146 printf("FAILED: 0x%lx brks:%lld needed:%lli %i %i %i\n\n",
147 (unsigned long int)ptr, breaks, needed, loop_num, readwriteflag, exclude_user);
148 return 1;
149 }
150 close(break_fd);
151
152 return 0;
153}
154
155static int runtest(void)
156{
157 int rwflag;
158 int exclude_user;
159 int ret;
160
161 /*
162 * perf defines rwflag as two bits read and write and at least
163 * one must be set. So range 1-3.
164 */
165 for (rwflag = 1 ; rwflag < 4; rwflag++) {
166 for (exclude_user = 0 ; exclude_user < 2; exclude_user++) {
167 ret = runtestsingle(rwflag, exclude_user, 0);
168 if (ret)
169 return ret;
170
171 /* if we have the dawr, we can do an array test */
172 if (!dawr_supported())
173 continue;
174 ret = runtestsingle(rwflag, exclude_user, 1);
175 if (ret)
176 return ret;
177 }
178 }
179 return 0;
180}
181
182
183static int perf_hwbreak(void)
184{
185 srand ( time(NULL) );
186
187 SKIP_IF(!perf_breakpoint_supported());
188
189 return runtest();
190}
191
192int main(int argc, char *argv[], char **envp)
193{
194 return test_harness(perf_hwbreak, "perf_hwbreak");
195}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
new file mode 100644
index 000000000000..3066d310f32b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -0,0 +1,342 @@
1// SPDX-License-Identifier: GPL-2.0+
2
3/*
4 * Ptrace test for hw breakpoints
5 *
6 * Based on tools/testing/selftests/breakpoints/breakpoint_test.c
7 *
8 * This test forks and the parent then traces the child doing various
9 * types of ptrace enabled breakpoints
10 *
11 * Copyright (C) 2018 Michael Neuling, IBM Corporation.
12 */
13
14#include <sys/ptrace.h>
15#include <unistd.h>
16#include <stddef.h>
17#include <sys/user.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <signal.h>
21#include <sys/types.h>
22#include <sys/wait.h>
23#include "ptrace.h"
24
25/* Breakpoint access modes */
26enum {
27 BP_X = 1,
28 BP_RW = 2,
29 BP_W = 4,
30};
31
32static pid_t child_pid;
33static struct ppc_debug_info dbginfo;
34
35static void get_dbginfo(void)
36{
37 int ret;
38
39 ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
40 if (ret) {
41 perror("Can't get breakpoint info\n");
42 exit(-1);
43 }
44}
45
46static bool hwbreak_present(void)
47{
48 return (dbginfo.num_data_bps != 0);
49}
50
51static bool dawr_present(void)
52{
53 return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
54}
55
56static void set_breakpoint_addr(void *addr)
57{
58 int ret;
59
60 ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr);
61 if (ret) {
62 perror("Can't set breakpoint addr\n");
63 exit(-1);
64 }
65}
66
67static int set_hwbreakpoint_addr(void *addr, int range)
68{
69 int ret;
70
71 struct ppc_hw_breakpoint info;
72
73 info.version = 1;
74 info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
75 info.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
76 if (range > 0)
77 info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
78 info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
79 info.addr = (__u64)addr;
80 info.addr2 = (__u64)addr + range;
81 info.condition_value = 0;
82
83 ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info);
84 if (ret < 0) {
85 perror("Can't set breakpoint\n");
86 exit(-1);
87 }
88 return ret;
89}
90
91static int del_hwbreakpoint_addr(int watchpoint_handle)
92{
93 int ret;
94
95 ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle);
96 if (ret < 0) {
97 perror("Can't delete hw breakpoint\n");
98 exit(-1);
99 }
100 return ret;
101}
102
103#define DAWR_LENGTH_MAX 512
104
105/* Dummy variables to test read/write accesses */
106static unsigned long long
107 dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)]
108 __attribute__((aligned(512)));
109static unsigned long long *dummy_var = dummy_array;
110
111static void write_var(int len)
112{
113 long long *plval;
114 char *pcval;
115 short *psval;
116 int *pival;
117
118 switch (len) {
119 case 1:
120 pcval = (char *)dummy_var;
121 *pcval = 0xff;
122 break;
123 case 2:
124 psval = (short *)dummy_var;
125 *psval = 0xffff;
126 break;
127 case 4:
128 pival = (int *)dummy_var;
129 *pival = 0xffffffff;
130 break;
131 case 8:
132 plval = (long long *)dummy_var;
133 *plval = 0xffffffffffffffffLL;
134 break;
135 }
136}
137
138static void read_var(int len)
139{
140 char cval __attribute__((unused));
141 short sval __attribute__((unused));
142 int ival __attribute__((unused));
143 long long lval __attribute__((unused));
144
145 switch (len) {
146 case 1:
147 cval = *(char *)dummy_var;
148 break;
149 case 2:
150 sval = *(short *)dummy_var;
151 break;
152 case 4:
153 ival = *(int *)dummy_var;
154 break;
155 case 8:
156 lval = *(long long *)dummy_var;
157 break;
158 }
159}
160
161/*
162 * Do the r/w accesses to trigger the breakpoints. And run
163 * the usual traps.
164 */
165static void trigger_tests(void)
166{
167 int len, ret;
168
169 ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
170 if (ret) {
171 perror("Can't be traced?\n");
172 return;
173 }
174
175 /* Wake up father so that it sets up the first test */
176 kill(getpid(), SIGUSR1);
177
178 /* Test write watchpoints */
179 for (len = 1; len <= sizeof(long); len <<= 1)
180 write_var(len);
181
182 /* Test read/write watchpoints (on read accesses) */
183 for (len = 1; len <= sizeof(long); len <<= 1)
184 read_var(len);
185
186 /* Test when breakpoint is unset */
187
188 /* Test write watchpoints */
189 for (len = 1; len <= sizeof(long); len <<= 1)
190 write_var(len);
191
192 /* Test read/write watchpoints (on read accesses) */
193 for (len = 1; len <= sizeof(long); len <<= 1)
194 read_var(len);
195}
196
197static void check_success(const char *msg)
198{
199 const char *msg2;
200 int status;
201
202 /* Wait for the child to SIGTRAP */
203 wait(&status);
204
205 msg2 = "Failed";
206
207 if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
208 msg2 = "Child process hit the breakpoint";
209 }
210
211 printf("%s Result: [%s]\n", msg, msg2);
212}
213
214static void launch_watchpoints(char *buf, int mode, int len,
215 struct ppc_debug_info *dbginfo, bool dawr)
216{
217 const char *mode_str;
218 unsigned long data = (unsigned long)(dummy_var);
219 int wh, range;
220
221 data &= ~0x7UL;
222
223 if (mode == BP_W) {
224 data |= (1UL << 1);
225 mode_str = "write";
226 } else {
227 data |= (1UL << 0);
228 data |= (1UL << 1);
229 mode_str = "read";
230 }
231
232 /* Set DABR_TRANSLATION bit */
233 data |= (1UL << 2);
234
235 /* use PTRACE_SET_DEBUGREG breakpoints */
236 set_breakpoint_addr((void *)data);
237 ptrace(PTRACE_CONT, child_pid, NULL, 0);
238 sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
239 check_success(buf);
240 /* Unregister hw brkpoint */
241 set_breakpoint_addr(NULL);
242
243 data = (data & ~7); /* remove dabr control bits */
244
245 /* use PPC_PTRACE_SETHWDEBUG breakpoint */
246 if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
247 return; /* not supported */
248 wh = set_hwbreakpoint_addr((void *)data, 0);
249 ptrace(PTRACE_CONT, child_pid, NULL, 0);
250 sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
251 check_success(buf);
252 /* Unregister hw brkpoint */
253 del_hwbreakpoint_addr(wh);
254
255 /* try a wider range */
256 range = 8;
257 if (dawr)
258 range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1));
259 wh = set_hwbreakpoint_addr((void *)data, range);
260 ptrace(PTRACE_CONT, child_pid, NULL, 0);
261 sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
262 check_success(buf);
263 /* Unregister hw brkpoint */
264 del_hwbreakpoint_addr(wh);
265}
266
267/* Set the breakpoints and check the child successfully trigger them */
268static int launch_tests(bool dawr)
269{
270 char buf[1024];
271 int len, i, status;
272
273 struct ppc_debug_info dbginfo;
274
275 i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
276 if (i) {
277 perror("Can't set breakpoint info\n");
278 exit(-1);
279 }
280 if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
281 printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n");
282
283 /* Write watchpoint */
284 for (len = 1; len <= sizeof(long); len <<= 1)
285 launch_watchpoints(buf, BP_W, len, &dbginfo, dawr);
286
287 /* Read-Write watchpoint */
288 for (len = 1; len <= sizeof(long); len <<= 1)
289 launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr);
290
291 ptrace(PTRACE_CONT, child_pid, NULL, 0);
292
293 /*
294 * Now we have unregistered the breakpoint, access by child
295 * should not cause SIGTRAP.
296 */
297
298 wait(&status);
299
300 if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
301 printf("FAIL: Child process hit the breakpoint, which is not expected\n");
302 ptrace(PTRACE_CONT, child_pid, NULL, 0);
303 return TEST_FAIL;
304 }
305
306 if (WIFEXITED(status))
307 printf("Child exited normally\n");
308
309 return TEST_PASS;
310}
311
312static int ptrace_hwbreak(void)
313{
314 pid_t pid;
315 int ret;
316 bool dawr;
317
318 pid = fork();
319 if (!pid) {
320 trigger_tests();
321 return 0;
322 }
323
324 wait(NULL);
325
326 child_pid = pid;
327
328 get_dbginfo();
329 SKIP_IF(!hwbreak_present());
330 dawr = dawr_present();
331
332 ret = launch_tests(dawr);
333
334 wait(NULL);
335
336 return ret;
337}
338
339int main(int argc, char **argv, char **envp)
340{
341 return test_harness(ptrace_hwbreak, "ptrace-hwbreak");
342}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
new file mode 100644
index 000000000000..5cf631f792cc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -0,0 +1,327 @@
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Ptrace test for Memory Protection Key registers
4 *
5 * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
6 * Copyright (C) 2018 IBM Corporation.
7 */
8#include "ptrace.h"
9#include "child.h"
10
11#ifndef __NR_pkey_alloc
12#define __NR_pkey_alloc 384
13#endif
14
15#ifndef __NR_pkey_free
16#define __NR_pkey_free 385
17#endif
18
19#ifndef NT_PPC_PKEY
20#define NT_PPC_PKEY 0x110
21#endif
22
23#ifndef PKEY_DISABLE_EXECUTE
24#define PKEY_DISABLE_EXECUTE 0x4
25#endif
26
27#define AMR_BITS_PER_PKEY 2
28#define PKEY_REG_BITS (sizeof(u64) * 8)
29#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
30
31static const char user_read[] = "[User Read (Running)]";
32static const char user_write[] = "[User Write (Running)]";
33static const char ptrace_read_running[] = "[Ptrace Read (Running)]";
34static const char ptrace_write_running[] = "[Ptrace Write (Running)]";
35
36/* Information shared between the parent and the child. */
37struct shared_info {
38 struct child_sync child_sync;
39
40 /* AMR value the parent expects to read from the child. */
41 unsigned long amr1;
42
43 /* AMR value the parent is expected to write to the child. */
44 unsigned long amr2;
45
46 /* AMR value that ptrace should refuse to write to the child. */
47 unsigned long amr3;
48
49 /* IAMR value the parent expects to read from the child. */
50 unsigned long expected_iamr;
51
52 /* UAMOR value the parent expects to read from the child. */
53 unsigned long expected_uamor;
54
55 /*
56 * IAMR and UAMOR values that ptrace should refuse to write to the child
57 * (even though they're valid ones) because userspace doesn't have
58 * access to those registers.
59 */
60 unsigned long new_iamr;
61 unsigned long new_uamor;
62};
63
64static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
65{
66 return syscall(__NR_pkey_alloc, flags, init_access_rights);
67}
68
69static int sys_pkey_free(int pkey)
70{
71 return syscall(__NR_pkey_free, pkey);
72}
73
74static int child(struct shared_info *info)
75{
76 unsigned long reg;
77 bool disable_execute = true;
78 int pkey1, pkey2, pkey3;
79 int ret;
80
81 /* Wait until parent fills out the initial register values. */
82 ret = wait_parent(&info->child_sync);
83 if (ret)
84 return ret;
85
86 /* Get some pkeys so that we can change their bits in the AMR. */
87 pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
88 if (pkey1 < 0) {
89 pkey1 = sys_pkey_alloc(0, 0);
90 CHILD_FAIL_IF(pkey1 < 0, &info->child_sync);
91
92 disable_execute = false;
93 }
94
95 pkey2 = sys_pkey_alloc(0, 0);
96 CHILD_FAIL_IF(pkey2 < 0, &info->child_sync);
97
98 pkey3 = sys_pkey_alloc(0, 0);
99 CHILD_FAIL_IF(pkey3 < 0, &info->child_sync);
100
101 info->amr1 |= 3ul << pkeyshift(pkey1);
102 info->amr2 |= 3ul << pkeyshift(pkey2);
103 info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3);
104
105 if (disable_execute)
106 info->expected_iamr |= 1ul << pkeyshift(pkey1);
107
108 info->expected_uamor |= 3ul << pkeyshift(pkey1) |
109 3ul << pkeyshift(pkey2);
110 info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2);
111 info->new_uamor |= 3ul << pkeyshift(pkey1);
112
113 /*
114 * We won't use pkey3. We just want a plausible but invalid key to test
115 * whether ptrace will let us write to AMR bits we are not supposed to.
116 *
117 * This also tests whether the kernel restores the UAMOR permissions
118 * after a key is freed.
119 */
120 sys_pkey_free(pkey3);
121
122 printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
123 user_write, info->amr1, pkey1, pkey2, pkey3);
124
125 mtspr(SPRN_AMR, info->amr1);
126
127 /* Wait for parent to read our AMR value and write a new one. */
128 ret = prod_parent(&info->child_sync);
129 CHILD_FAIL_IF(ret, &info->child_sync);
130
131 ret = wait_parent(&info->child_sync);
132 if (ret)
133 return ret;
134
135 reg = mfspr(SPRN_AMR);
136
137 printf("%-30s AMR: %016lx\n", user_read, reg);
138
139 CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
140
141 /*
142 * Wait for parent to try to write an invalid AMR value.
143 */
144 ret = prod_parent(&info->child_sync);
145 CHILD_FAIL_IF(ret, &info->child_sync);
146
147 ret = wait_parent(&info->child_sync);
148 if (ret)
149 return ret;
150
151 reg = mfspr(SPRN_AMR);
152
153 printf("%-30s AMR: %016lx\n", user_read, reg);
154
155 CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
156
157 /*
158 * Wait for parent to try to write an IAMR and a UAMOR value. We can't
159 * verify them, but we can verify that the AMR didn't change.
160 */
161 ret = prod_parent(&info->child_sync);
162 CHILD_FAIL_IF(ret, &info->child_sync);
163
164 ret = wait_parent(&info->child_sync);
165 if (ret)
166 return ret;
167
168 reg = mfspr(SPRN_AMR);
169
170 printf("%-30s AMR: %016lx\n", user_read, reg);
171
172 CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
173
174 /* Now let parent now that we are finished. */
175
176 ret = prod_parent(&info->child_sync);
177 CHILD_FAIL_IF(ret, &info->child_sync);
178
179 return TEST_PASS;
180}
181
182static int parent(struct shared_info *info, pid_t pid)
183{
184 unsigned long regs[3];
185 int ret, status;
186
187 /*
188 * Get the initial values for AMR, IAMR and UAMOR and communicate them
189 * to the child.
190 */
191 ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
192 PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
193 PARENT_FAIL_IF(ret, &info->child_sync);
194
195 info->amr1 = info->amr2 = info->amr3 = regs[0];
196 info->expected_iamr = info->new_iamr = regs[1];
197 info->expected_uamor = info->new_uamor = regs[2];
198
199 /* Wake up child so that it can set itself up. */
200 ret = prod_child(&info->child_sync);
201 PARENT_FAIL_IF(ret, &info->child_sync);
202
203 ret = wait_child(&info->child_sync);
204 if (ret)
205 return ret;
206
207 /* Verify that we can read the pkey registers from the child. */
208 ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
209 PARENT_FAIL_IF(ret, &info->child_sync);
210
211 printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
212 ptrace_read_running, regs[0], regs[1], regs[2]);
213
214 PARENT_FAIL_IF(regs[0] != info->amr1, &info->child_sync);
215 PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
216 PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
217
218 /* Write valid AMR value in child. */
219 ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr2, 1);
220 PARENT_FAIL_IF(ret, &info->child_sync);
221
222 printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr2);
223
224 /* Wake up child so that it can verify it changed. */
225 ret = prod_child(&info->child_sync);
226 PARENT_FAIL_IF(ret, &info->child_sync);
227
228 ret = wait_child(&info->child_sync);
229 if (ret)
230 return ret;
231
232 /* Write invalid AMR value in child. */
233 ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1);
234 PARENT_FAIL_IF(ret, &info->child_sync);
235
236 printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3);
237
238 /* Wake up child so that it can verify it didn't change. */
239 ret = prod_child(&info->child_sync);
240 PARENT_FAIL_IF(ret, &info->child_sync);
241
242 ret = wait_child(&info->child_sync);
243 if (ret)
244 return ret;
245
246 /* Try to write to IAMR. */
247 regs[0] = info->amr1;
248 regs[1] = info->new_iamr;
249 ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2);
250 PARENT_FAIL_IF(!ret, &info->child_sync);
251
252 printf("%-30s AMR: %016lx IAMR: %016lx\n",
253 ptrace_write_running, regs[0], regs[1]);
254
255 /* Try to write to IAMR and UAMOR. */
256 regs[2] = info->new_uamor;
257 ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3);
258 PARENT_FAIL_IF(!ret, &info->child_sync);
259
260 printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
261 ptrace_write_running, regs[0], regs[1], regs[2]);
262
263 /* Verify that all registers still have their expected values. */
264 ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
265 PARENT_FAIL_IF(ret, &info->child_sync);
266
267 printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
268 ptrace_read_running, regs[0], regs[1], regs[2]);
269
270 PARENT_FAIL_IF(regs[0] != info->amr2, &info->child_sync);
271 PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
272 PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
273
274 /* Wake up child so that it can verify AMR didn't change and wrap up. */
275 ret = prod_child(&info->child_sync);
276 PARENT_FAIL_IF(ret, &info->child_sync);
277
278 ret = wait(&status);
279 if (ret != pid) {
280 printf("Child's exit status not captured\n");
281 ret = TEST_PASS;
282 } else if (!WIFEXITED(status)) {
283 printf("Child exited abnormally\n");
284 ret = TEST_FAIL;
285 } else
286 ret = WEXITSTATUS(status) ? TEST_FAIL : TEST_PASS;
287
288 return ret;
289}
290
291static int ptrace_pkey(void)
292{
293 struct shared_info *info;
294 int shm_id;
295 int ret;
296 pid_t pid;
297
298 shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
299 info = shmat(shm_id, NULL, 0);
300
301 ret = init_child_sync(&info->child_sync);
302 if (ret)
303 return ret;
304
305 pid = fork();
306 if (pid < 0) {
307 perror("fork() failed");
308 ret = TEST_FAIL;
309 } else if (pid == 0)
310 ret = child(info);
311 else
312 ret = parent(info, pid);
313
314 shmdt(info);
315
316 if (pid) {
317 destroy_child_sync(&info->child_sync);
318 shmctl(shm_id, IPC_RMID, NULL);
319 }
320
321 return ret;
322}
323
324int main(int argc, char *argv[])
325{
326 return test_harness(ptrace_pkey, "ptrace_pkey");
327}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h
index 19fb825270a1..34201cfa8335 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace.h
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h
@@ -102,6 +102,44 @@ int cont_trace(pid_t child)
102 return TEST_PASS; 102 return TEST_PASS;
103} 103}
104 104
105int ptrace_read_regs(pid_t child, unsigned long type, unsigned long regs[],
106 int n)
107{
108 struct iovec iov;
109 long ret;
110
111 FAIL_IF(start_trace(child));
112
113 iov.iov_base = regs;
114 iov.iov_len = n * sizeof(unsigned long);
115
116 ret = ptrace(PTRACE_GETREGSET, child, type, &iov);
117 if (ret)
118 return ret;
119
120 FAIL_IF(stop_trace(child));
121
122 return TEST_PASS;
123}
124
125long ptrace_write_regs(pid_t child, unsigned long type, unsigned long regs[],
126 int n)
127{
128 struct iovec iov;
129 long ret;
130
131 FAIL_IF(start_trace(child));
132
133 iov.iov_base = regs;
134 iov.iov_len = n * sizeof(unsigned long);
135
136 ret = ptrace(PTRACE_SETREGSET, child, type, &iov);
137
138 FAIL_IF(stop_trace(child));
139
140 return ret;
141}
142
105/* TAR, PPR, DSCR */ 143/* TAR, PPR, DSCR */
106int show_tar_registers(pid_t child, unsigned long *out) 144int show_tar_registers(pid_t child, unsigned long *out)
107{ 145{
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index bb90d4b79524..c3ee8393dae8 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -14,3 +14,4 @@ tm-signal-context-chk-vsx
14tm-vmx-unavail 14tm-vmx-unavail
15tm-unavailable 15tm-unavailable
16tm-trap 16tm-trap
17tm-sigreturn
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 6c16f77c722c..74e5912e9f2e 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -1,3 +1,6 @@
1/fd-001-lookup
2/fd-002-posix-eq
3/fd-003-kthread
1/proc-loadavg-001 4/proc-loadavg-001
2/proc-self-map-files-001 5/proc-self-map-files-001
3/proc-self-map-files-002 6/proc-self-map-files-002
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index dbb87e56264c..db310eedc268 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -1,6 +1,9 @@
1CFLAGS += -Wall -O2 1CFLAGS += -Wall -O2 -Wno-unused-function
2 2
3TEST_GEN_PROGS := 3TEST_GEN_PROGS :=
4TEST_GEN_PROGS += fd-001-lookup
5TEST_GEN_PROGS += fd-002-posix-eq
6TEST_GEN_PROGS += fd-003-kthread
4TEST_GEN_PROGS += proc-loadavg-001 7TEST_GEN_PROGS += proc-loadavg-001
5TEST_GEN_PROGS += proc-self-map-files-001 8TEST_GEN_PROGS += proc-self-map-files-001
6TEST_GEN_PROGS += proc-self-map-files-002 9TEST_GEN_PROGS += proc-self-map-files-002
diff --git a/tools/testing/selftests/proc/fd-001-lookup.c b/tools/testing/selftests/proc/fd-001-lookup.c
new file mode 100644
index 000000000000..a2010dfb2110
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-001-lookup.c
@@ -0,0 +1,168 @@
1/*
2 * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16// Test /proc/*/fd lookup.
17#define _GNU_SOURCE
18#undef NDEBUG
19#include <assert.h>
20#include <dirent.h>
21#include <errno.h>
22#include <limits.h>
23#include <sched.h>
24#include <stdio.h>
25#include <unistd.h>
26#include <sys/types.h>
27#include <sys/stat.h>
28#include <fcntl.h>
29
30#include "proc.h"
31
32/* lstat(2) has more "coverage" in case non-symlink pops up somehow. */
33static void test_lookup_pass(const char *pathname)
34{
35 struct stat st;
36 ssize_t rv;
37
38 memset(&st, 0, sizeof(struct stat));
39 rv = lstat(pathname, &st);
40 assert(rv == 0);
41 assert(S_ISLNK(st.st_mode));
42}
43
44static void test_lookup_fail(const char *pathname)
45{
46 struct stat st;
47 ssize_t rv;
48
49 rv = lstat(pathname, &st);
50 assert(rv == -1 && errno == ENOENT);
51}
52
53static void test_lookup(unsigned int fd)
54{
55 char buf[64];
56 unsigned int c;
57 unsigned int u;
58 int i;
59
60 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
61 test_lookup_pass(buf);
62
63 /* leading junk */
64 for (c = 1; c <= 255; c++) {
65 if (c == '/')
66 continue;
67 snprintf(buf, sizeof(buf), "/proc/self/fd/%c%u", c, fd);
68 test_lookup_fail(buf);
69 }
70
71 /* trailing junk */
72 for (c = 1; c <= 255; c++) {
73 if (c == '/')
74 continue;
75 snprintf(buf, sizeof(buf), "/proc/self/fd/%u%c", fd, c);
76 test_lookup_fail(buf);
77 }
78
79 for (i = INT_MIN; i < INT_MIN + 1024; i++) {
80 snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i);
81 test_lookup_fail(buf);
82 }
83 for (i = -1024; i < 0; i++) {
84 snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i);
85 test_lookup_fail(buf);
86 }
87 for (u = INT_MAX - 1024; u <= (unsigned int)INT_MAX + 1024; u++) {
88 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u);
89 test_lookup_fail(buf);
90 }
91 for (u = UINT_MAX - 1024; u != 0; u++) {
92 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u);
93 test_lookup_fail(buf);
94 }
95
96
97}
98
99int main(void)
100{
101 struct dirent *de;
102 unsigned int fd, target_fd;
103
104 if (unshare(CLONE_FILES) == -1)
105 return 1;
106
107 /* Wipe fdtable. */
108 do {
109 DIR *d;
110
111 d = opendir("/proc/self/fd");
112 if (!d)
113 return 1;
114
115 de = xreaddir(d);
116 assert(de->d_type == DT_DIR);
117 assert(streq(de->d_name, "."));
118
119 de = xreaddir(d);
120 assert(de->d_type == DT_DIR);
121 assert(streq(de->d_name, ".."));
122next:
123 de = xreaddir(d);
124 if (de) {
125 unsigned long long fd_ull;
126 unsigned int fd;
127 char *end;
128
129 assert(de->d_type == DT_LNK);
130
131 fd_ull = xstrtoull(de->d_name, &end);
132 assert(*end == '\0');
133 assert(fd_ull == (unsigned int)fd_ull);
134
135 fd = fd_ull;
136 if (fd == dirfd(d))
137 goto next;
138 close(fd);
139 }
140
141 closedir(d);
142 } while (de);
143
144 /* Now fdtable is clean. */
145
146 fd = open("/", O_PATH|O_DIRECTORY);
147 assert(fd == 0);
148 test_lookup(fd);
149 close(fd);
150
151 /* Clean again! */
152
153 fd = open("/", O_PATH|O_DIRECTORY);
154 assert(fd == 0);
155 /* Default RLIMIT_NOFILE-1 */
156 target_fd = 1023;
157 while (target_fd > 0) {
158 if (dup2(fd, target_fd) == target_fd)
159 break;
160 target_fd /= 2;
161 }
162 assert(target_fd > 0);
163 close(fd);
164 test_lookup(target_fd);
165 close(target_fd);
166
167 return 0;
168}
diff --git a/tools/testing/selftests/proc/fd-002-posix-eq.c b/tools/testing/selftests/proc/fd-002-posix-eq.c
new file mode 100644
index 000000000000..417322ca9c53
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-002-posix-eq.c
@@ -0,0 +1,57 @@
1/*
2 * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16// Test that open(/proc/*/fd/*) opens the same file.
17#undef NDEBUG
18#include <assert.h>
19#include <stdio.h>
20#include <sys/types.h>
21#include <sys/stat.h>
22#include <fcntl.h>
23#include <unistd.h>
24
25int main(void)
26{
27 int fd0, fd1, fd2;
28 struct stat st0, st1, st2;
29 char buf[64];
30 int rv;
31
32 fd0 = open("/", O_DIRECTORY|O_RDONLY);
33 assert(fd0 >= 0);
34
35 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd0);
36 fd1 = open(buf, O_RDONLY);
37 assert(fd1 >= 0);
38
39 snprintf(buf, sizeof(buf), "/proc/thread-self/fd/%u", fd0);
40 fd2 = open(buf, O_RDONLY);
41 assert(fd2 >= 0);
42
43 rv = fstat(fd0, &st0);
44 assert(rv == 0);
45 rv = fstat(fd1, &st1);
46 assert(rv == 0);
47 rv = fstat(fd2, &st2);
48 assert(rv == 0);
49
50 assert(st0.st_dev == st1.st_dev);
51 assert(st0.st_ino == st1.st_ino);
52
53 assert(st0.st_dev == st2.st_dev);
54 assert(st0.st_ino == st2.st_ino);
55
56 return 0;
57}
diff --git a/tools/testing/selftests/proc/fd-003-kthread.c b/tools/testing/selftests/proc/fd-003-kthread.c
new file mode 100644
index 000000000000..1d659d55368c
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-003-kthread.c
@@ -0,0 +1,178 @@
1/*
2 * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16// Test that /proc/$KERNEL_THREAD/fd/ is empty.
17#define _GNU_SOURCE
18#undef NDEBUG
19#include <sys/syscall.h>
20#include <assert.h>
21#include <dirent.h>
22#include <limits.h>
23#include <stdio.h>
24#include <string.h>
25#include <sys/types.h>
26#include <sys/stat.h>
27#include <fcntl.h>
28#include <unistd.h>
29
30#include "proc.h"
31
32#define PF_KHTREAD 0x00200000
33
34/*
35 * Test for kernel threadness atomically with openat().
36 *
37 * Return /proc/$PID/fd descriptor if process is kernel thread.
38 * Return -1 if a process is userspace process.
39 */
40static int kernel_thread_fd(unsigned int pid)
41{
42 unsigned int flags = 0;
43 char buf[4096];
44 int dir_fd, fd;
45 ssize_t rv;
46
47 snprintf(buf, sizeof(buf), "/proc/%u", pid);
48 dir_fd = open(buf, O_RDONLY|O_DIRECTORY);
49 if (dir_fd == -1)
50 return -1;
51
52 /*
53 * Believe it or not, struct task_struct::flags is directly exposed
54 * to userspace!
55 */
56 fd = openat(dir_fd, "stat", O_RDONLY);
57 if (fd == -1) {
58 close(dir_fd);
59 return -1;
60 }
61 rv = read(fd, buf, sizeof(buf));
62 close(fd);
63 if (0 < rv && rv <= sizeof(buf)) {
64 unsigned long long flags_ull;
65 char *p, *end;
66 int i;
67
68 assert(buf[rv - 1] == '\n');
69 buf[rv - 1] = '\0';
70
71 /* Search backwards: ->comm can contain whitespace and ')'. */
72 for (i = 0; i < 43; i++) {
73 p = strrchr(buf, ' ');
74 assert(p);
75 *p = '\0';
76 }
77
78 p = strrchr(buf, ' ');
79 assert(p);
80
81 flags_ull = xstrtoull(p + 1, &end);
82 assert(*end == '\0');
83 assert(flags_ull == (unsigned int)flags_ull);
84
85 flags = flags_ull;
86 }
87
88 fd = -1;
89 if (flags & PF_KHTREAD) {
90 fd = openat(dir_fd, "fd", O_RDONLY|O_DIRECTORY);
91 }
92 close(dir_fd);
93 return fd;
94}
95
96static void test_readdir(int fd)
97{
98 DIR *d;
99 struct dirent *de;
100
101 d = fdopendir(fd);
102 assert(d);
103
104 de = xreaddir(d);
105 assert(streq(de->d_name, "."));
106 assert(de->d_type == DT_DIR);
107
108 de = xreaddir(d);
109 assert(streq(de->d_name, ".."));
110 assert(de->d_type == DT_DIR);
111
112 de = xreaddir(d);
113 assert(!de);
114}
115
116static inline int sys_statx(int dirfd, const char *pathname, int flags,
117 unsigned int mask, void *stx)
118{
119 return syscall(SYS_statx, dirfd, pathname, flags, mask, stx);
120}
121
122static void test_lookup_fail(int fd, const char *pathname)
123{
124 char stx[256] __attribute__((aligned(8)));
125 int rv;
126
127 rv = sys_statx(fd, pathname, AT_SYMLINK_NOFOLLOW, 0, (void *)stx);
128 assert(rv == -1 && errno == ENOENT);
129}
130
131static void test_lookup(int fd)
132{
133 char buf[64];
134 unsigned int u;
135 int i;
136
137 for (i = INT_MIN; i < INT_MIN + 1024; i++) {
138 snprintf(buf, sizeof(buf), "%d", i);
139 test_lookup_fail(fd, buf);
140 }
141 for (i = -1024; i < 1024; i++) {
142 snprintf(buf, sizeof(buf), "%d", i);
143 test_lookup_fail(fd, buf);
144 }
145 for (u = INT_MAX - 1024; u < (unsigned int)INT_MAX + 1024; u++) {
146 snprintf(buf, sizeof(buf), "%u", u);
147 test_lookup_fail(fd, buf);
148 }
149 for (u = UINT_MAX - 1024; u != 0; u++) {
150 snprintf(buf, sizeof(buf), "%u", u);
151 test_lookup_fail(fd, buf);
152 }
153}
154
155int main(void)
156{
157 unsigned int pid;
158 int fd;
159
160 /*
161 * In theory this will loop indefinitely if kernel threads are exiled
162 * from /proc.
163 *
164 * Start with kthreadd.
165 */
166 pid = 2;
167 while ((fd = kernel_thread_fd(pid)) == -1 && pid < 1024) {
168 pid++;
169 }
170 /* EACCES if run as non-root. */
171 if (pid >= 1024)
172 return 1;
173
174 test_readdir(fd);
175 test_lookup(fd);
176
177 return 0;
178}
diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h
index 0e464b50e9d9..dc6a42b1d6b0 100644
--- a/tools/testing/selftests/proc/proc-uptime.h
+++ b/tools/testing/selftests/proc/proc-uptime.h
@@ -20,21 +20,7 @@
20#include <stdlib.h> 20#include <stdlib.h>
21#include <unistd.h> 21#include <unistd.h>
22 22
23static unsigned long long xstrtoull(const char *p, char **end) 23#include "proc.h"
24{
25 if (*p == '0') {
26 *end = (char *)p + 1;
27 return 0;
28 } else if ('1' <= *p && *p <= '9') {
29 unsigned long long val;
30
31 errno = 0;
32 val = strtoull(p, end, 10);
33 assert(errno == 0);
34 return val;
35 } else
36 assert(0);
37}
38 24
39static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle) 25static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
40{ 26{
diff --git a/tools/testing/selftests/proc/proc.h b/tools/testing/selftests/proc/proc.h
new file mode 100644
index 000000000000..4e178166fd84
--- /dev/null
+++ b/tools/testing/selftests/proc/proc.h
@@ -0,0 +1,39 @@
1#pragma once
2#undef NDEBUG
3#include <assert.h>
4#include <dirent.h>
5#include <errno.h>
6#include <stdbool.h>
7#include <stdlib.h>
8#include <string.h>
9
10static inline bool streq(const char *s1, const char *s2)
11{
12 return strcmp(s1, s2) == 0;
13}
14
15static unsigned long long xstrtoull(const char *p, char **end)
16{
17 if (*p == '0') {
18 *end = (char *)p + 1;
19 return 0;
20 } else if ('1' <= *p && *p <= '9') {
21 unsigned long long val;
22
23 errno = 0;
24 val = strtoull(p, end, 10);
25 assert(errno == 0);
26 return val;
27 } else
28 assert(0);
29}
30
31static struct dirent *xreaddir(DIR *d)
32{
33 struct dirent *de;
34
35 errno = 0;
36 de = readdir(d);
37 assert(de || errno == 0);
38 return de;
39}
diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c
index 1e73c2232097..563e752e6eba 100644
--- a/tools/testing/selftests/proc/read.c
+++ b/tools/testing/selftests/proc/read.c
@@ -31,22 +31,7 @@
31#include <fcntl.h> 31#include <fcntl.h>
32#include <unistd.h> 32#include <unistd.h>
33 33
34static inline bool streq(const char *s1, const char *s2) 34#include "proc.h"
35{
36 return strcmp(s1, s2) == 0;
37}
38
39static struct dirent *xreaddir(DIR *d)
40{
41 struct dirent *de;
42
43 errno = 0;
44 de = readdir(d);
45 if (!de && errno != 0) {
46 exit(1);
47 }
48 return de;
49}
50 35
51static void f_reg(DIR *d, const char *filename) 36static void f_reg(DIR *d, const char *filename)
52{ 37{
diff --git a/tools/testing/selftests/pstore/pstore_post_reboot_tests b/tools/testing/selftests/pstore/pstore_post_reboot_tests
index 6ccb154cb4aa..22f8df1ad7d4 100755
--- a/tools/testing/selftests/pstore/pstore_post_reboot_tests
+++ b/tools/testing/selftests/pstore/pstore_post_reboot_tests
@@ -7,13 +7,16 @@
7# 7#
8# Released under the terms of the GPL v2. 8# Released under the terms of the GPL v2.
9 9
10# Kselftest framework requirement - SKIP code is 4.
11ksft_skip=4
12
10. ./common_tests 13. ./common_tests
11 14
12if [ -e $REBOOT_FLAG ]; then 15if [ -e $REBOOT_FLAG ]; then
13 rm $REBOOT_FLAG 16 rm $REBOOT_FLAG
14else 17else
15 prlog "pstore_crash_test has not been executed yet. we skip further tests." 18 prlog "pstore_crash_test has not been executed yet. we skip further tests."
16 exit 0 19 exit $ksft_skip
17fi 20fi
18 21
19prlog -n "Mounting pstore filesystem ... " 22prlog -n "Mounting pstore filesystem ... "
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
new file mode 100755
index 000000000000..98f650c9bf54
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -0,0 +1,56 @@
1#!/bin/sh
2#
3# Invoke a text editor on all console.log files for all runs with diagnostics,
4# that is, on all such files having a console.log.diags counterpart.
5# Note that both console.log.diags and console.log are passed to the
6# editor (currently defaulting to "vi"), allowing the user to get an
7# idea of what to search for in the console.log file.
8#
9# Usage: kvm-find-errors.sh directory
10#
11# The "directory" above should end with the date/time directory, for example,
12# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
13
14rundir="${1}"
15if test -z "$rundir" -o ! -d "$rundir"
16then
17 echo Usage: $0 directory
18fi
19editor=${EDITOR-vi}
20
21# Find builds with errors
22files=
23for i in ${rundir}/*/Make.out
24do
25 if egrep -q "error:|warning:" < $i
26 then
27 egrep "error:|warning:" < $i > $i.diags
28 files="$files $i.diags $i"
29 fi
30done
31if test -n "$files"
32then
33 $editor $files
34else
35 echo No build errors.
36fi
37if grep -q -e "--buildonly" < ${rundir}/log
38then
39 echo Build-only run, no console logs to check.
40fi
41
42# Find console logs with errors
43files=
44for i in ${rundir}/*/console.log
45do
46 if test -r $i.diags
47 then
48 files="$files $i.diags $i"
49 fi
50done
51if test -n "$files"
52then
53 $editor $files
54else
55 echo No errors in console logs.
56fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index c2e1bb6d0cba..477ecb1293ab 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -34,11 +34,15 @@ fi
34 34
35configfile=`echo $i | sed -e 's/^.*\///'` 35configfile=`echo $i | sed -e 's/^.*\///'`
36ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` 36ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'`
37stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
38 tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
39 awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
40 tr -d '\012\015'`"
37if test -z "$ngps" 41if test -z "$ngps"
38then 42then
39 echo "$configfile -------" 43 echo "$configfile ------- " $stopstate
40else 44else
41 title="$configfile ------- $ngps grace periods" 45 title="$configfile ------- $ngps GPs"
42 dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null` 46 dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
43 if test -z "$dur" 47 if test -z "$dur"
44 then 48 then
@@ -46,9 +50,9 @@ else
46 else 50 else
47 ngpsps=`awk -v ngps=$ngps -v dur=$dur ' 51 ngpsps=`awk -v ngps=$ngps -v dur=$dur '
48 BEGIN { print ngps / dur }' < /dev/null` 52 BEGIN { print ngps / dur }' < /dev/null`
49 title="$title ($ngpsps per second)" 53 title="$title ($ngpsps/s)"
50 fi 54 fi
51 echo $title 55 echo $title $stopstate
52 nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'` 56 nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
53 if test -z "$nclosecalls" 57 if test -z "$nclosecalls"
54 then 58 then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index f7e988f369dd..c27e97824163 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -48,10 +48,6 @@ do
48 cat $i/Make.oldconfig.err 48 cat $i/Make.oldconfig.err
49 fi 49 fi
50 parse-build.sh $i/Make.out $configfile 50 parse-build.sh $i/Make.out $configfile
51 if test "$TORTURE_SUITE" != rcuperf
52 then
53 parse-torture.sh $i/console.log $configfile
54 fi
55 parse-console.sh $i/console.log $configfile 51 parse-console.sh $i/console.log $configfile
56 if test -r $i/Warnings 52 if test -r $i/Warnings
57 then 53 then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 5f8fbb0d7c17..c5b0f94341d9 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -267,5 +267,4 @@ then
267 echo Unknown PID, cannot kill qemu command 267 echo Unknown PID, cannot kill qemu command
268fi 268fi
269 269
270parse-torture.sh $resdir/console.log $title
271parse-console.sh $resdir/console.log $title 270parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 08aa7d50ae0e..17293436f551 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -24,57 +24,146 @@
24# 24#
25# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 25# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
26 26
27T=${TMPDIR-/tmp}/parse-console.sh.$$
27file="$1" 28file="$1"
28title="$2" 29title="$2"
29 30
31trap 'rm -f $T.seq $T.diags' 0
32
30. functions.sh 33. functions.sh
31 34
35# Check for presence and readability of console output file
36if test -f "$file" -a -r "$file"
37then
38 :
39else
40 echo $title unreadable console output file: $file
41 exit 1
42fi
32if grep -Pq '\x00' < $file 43if grep -Pq '\x00' < $file
33then 44then
34 print_warning Console output contains nul bytes, old qemu still running? 45 print_warning Console output contains nul bytes, old qemu still running?
35fi 46fi
36egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags 47cat /dev/null > $file.diags
37if test -s $1.diags 48
49# Check for proper termination, except that rcuperf runs don't indicate this.
50if test "$TORTURE_SUITE" != rcuperf
38then 51then
39 print_warning Assertion failure in $file $title 52 # check for abject failure
40 # cat $1.diags 53
54 if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
55 then
56 nerrs=`grep --binary-files=text '!!!' $file |
57 tail -1 |
58 awk '
59 {
60 for (i=NF-8;i<=NF;i++)
61 sum+=$i;
62 }
63 END { print sum }'`
64 print_bug $title FAILURE, $nerrs instances
65 exit
66 fi
67
68 grep --binary-files=text 'torture:.*ver:' $file |
69 egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
70 sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
71 awk '
72 BEGIN {
73 ver = 0;
74 badseq = 0;
75 }
76
77 {
78 if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
79 badseqno1 = ver;
80 badseqno2 = $5;
81 badseqnr = NR;
82 badseq = 1;
83 }
84 ver = $5
85 }
86
87 END {
88 if (badseq) {
89 if (badseqno1 == badseqno2 && badseqno2 == ver)
90 print "GP HANG at " ver " torture stat " badseqnr;
91 else
92 print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
93 }
94 }' > $T.seq
95
96 if grep -q SUCCESS $file
97 then
98 if test -s $T.seq
99 then
100 print_warning $title `cat $T.seq`
101 echo " " $file
102 exit 2
103 fi
104 else
105 if grep -q "_HOTPLUG:" $file
106 then
107 print_warning HOTPLUG FAILURES $title `cat $T.seq`
108 echo " " $file
109 exit 3
110 fi
111 echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
112 if test -s $T.seq
113 then
114 print_warning $title `cat $T.seq`
115 fi
116 exit 2
117 fi
118fi | tee -a $file.diags
119
120egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
121grep -v 'ODEBUG: ' |
122grep -v 'Warning: unable to open an initial console' > $T.diags
123if test -s $T.diags
124then
125 print_warning "Assertion failure in $file $title"
126 # cat $T.diags
41 summary="" 127 summary=""
42 n_badness=`grep -c Badness $1` 128 n_badness=`grep -c Badness $file`
43 if test "$n_badness" -ne 0 129 if test "$n_badness" -ne 0
44 then 130 then
45 summary="$summary Badness: $n_badness" 131 summary="$summary Badness: $n_badness"
46 fi 132 fi
47 n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'` 133 n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`
48 if test "$n_warn" -ne 0 134 if test "$n_warn" -ne 0
49 then 135 then
50 summary="$summary Warnings: $n_warn" 136 summary="$summary Warnings: $n_warn"
51 fi 137 fi
52 n_bugs=`egrep -c 'BUG|Oops:' $1` 138 n_bugs=`egrep -c 'BUG|Oops:' $file`
53 if test "$n_bugs" -ne 0 139 if test "$n_bugs" -ne 0
54 then 140 then
55 summary="$summary Bugs: $n_bugs" 141 summary="$summary Bugs: $n_bugs"
56 fi 142 fi
57 n_calltrace=`grep -c 'Call Trace:' $1` 143 n_calltrace=`grep -c 'Call Trace:' $file`
58 if test "$n_calltrace" -ne 0 144 if test "$n_calltrace" -ne 0
59 then 145 then
60 summary="$summary Call Traces: $n_calltrace" 146 summary="$summary Call Traces: $n_calltrace"
61 fi 147 fi
62 n_lockdep=`grep -c =========== $1` 148 n_lockdep=`grep -c =========== $file`
63 if test "$n_badness" -ne 0 149 if test "$n_badness" -ne 0
64 then 150 then
65 summary="$summary lockdep: $n_badness" 151 summary="$summary lockdep: $n_badness"
66 fi 152 fi
67 n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1` 153 n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
68 if test "$n_stalls" -ne 0 154 if test "$n_stalls" -ne 0
69 then 155 then
70 summary="$summary Stalls: $n_stalls" 156 summary="$summary Stalls: $n_stalls"
71 fi 157 fi
72 n_starves=`grep -c 'rcu_.*kthread starved for' $1` 158 n_starves=`grep -c 'rcu_.*kthread starved for' $file`
73 if test "$n_starves" -ne 0 159 if test "$n_starves" -ne 0
74 then 160 then
75 summary="$summary Starves: $n_starves" 161 summary="$summary Starves: $n_starves"
76 fi 162 fi
77 print_warning Summary: $summary 163 print_warning Summary: $summary
78else 164 cat $T.diags >> $file.diags
79 rm $1.diags 165fi
166if ! test -s $file.diags
167then
168 rm -f $file.diags
80fi 169fi
diff --git a/tools/testing/selftests/rcutorture/bin/parse-torture.sh b/tools/testing/selftests/rcutorture/bin/parse-torture.sh
deleted file mode 100755
index 5987e50cfeb4..000000000000
--- a/tools/testing/selftests/rcutorture/bin/parse-torture.sh
+++ /dev/null
@@ -1,105 +0,0 @@
1#!/bin/bash
2#
3# Check the console output from a torture run for goodness.
4# The "file" is a pathname on the local system, and "title" is
5# a text string for error-message purposes.
6#
7# The file must contain torture output, but can be interspersed
8# with other dmesg text, as in console-log output.
9#
10# Usage: parse-torture.sh file title
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, you can access it online at
24# http://www.gnu.org/licenses/gpl-2.0.html.
25#
26# Copyright (C) IBM Corporation, 2011
27#
28# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
29
30T=${TMPDIR-/tmp}/parse-torture.sh.$$
31file="$1"
32title="$2"
33
34trap 'rm -f $T.seq' 0
35
36. functions.sh
37
38# check for presence of torture output file.
39
40if test -f "$file" -a -r "$file"
41then
42 :
43else
44 echo $title unreadable torture output file: $file
45 exit 1
46fi
47
48# check for abject failure
49
50if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
51then
52 nerrs=`grep --binary-files=text '!!!' $file | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
53 print_bug $title FAILURE, $nerrs instances
54 echo " " $url
55 exit
56fi
57
58grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
59awk '
60BEGIN {
61 ver = 0;
62 badseq = 0;
63 }
64
65 {
66 if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
67 badseqno1 = ver;
68 badseqno2 = $5;
69 badseqnr = NR;
70 badseq = 1;
71 }
72 ver = $5
73 }
74
75END {
76 if (badseq) {
77 if (badseqno1 == badseqno2 && badseqno2 == ver)
78 print "GP HANG at " ver " torture stat " badseqnr;
79 else
80 print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
81 }
82 }' > $T.seq
83
84if grep -q SUCCESS $file
85then
86 if test -s $T.seq
87 then
88 print_warning $title $title `cat $T.seq`
89 echo " " $file
90 exit 2
91 fi
92else
93 if grep -q "_HOTPLUG:" $file
94 then
95 print_warning HOTPLUG FAILURES $title `cat $T.seq`
96 echo " " $file
97 exit 3
98 fi
99 echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
100 if test -s $T.seq
101 then
102 print_warning $title `cat $T.seq`
103 fi
104 exit 2
105fi
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
new file mode 100644
index 000000000000..cc610da7e369
--- /dev/null
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -0,0 +1,6 @@
1basic_percpu_ops_test
2basic_test
3basic_rseq_op_test
4param_test
5param_test_benchmark
6param_test_compare_twice
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
new file mode 100644
index 000000000000..c30c52e1d0d2
--- /dev/null
+++ b/tools/testing/selftests/rseq/Makefile
@@ -0,0 +1,30 @@
1# SPDX-License-Identifier: GPL-2.0+ OR MIT
2CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
3LDLIBS += -lpthread
4
5# Own dependencies because we only want to build against 1st prerequisite, but
6# still track changes to header files and depend on shared object.
7OVERRIDE_TARGETS = 1
8
9TEST_GEN_PROGS = basic_test basic_percpu_ops_test param_test \
10 param_test_benchmark param_test_compare_twice
11
12TEST_GEN_PROGS_EXTENDED = librseq.so
13
14TEST_PROGS = run_param_test.sh
15
16include ../lib.mk
17
18$(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h
19 $(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@
20
21$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h
22 $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@
23
24$(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
25 rseq.h rseq-*.h
26 $(CC) $(CFLAGS) -DBENCHMARK $< $(LDLIBS) -lrseq -o $@
27
28$(OUTPUT)/param_test_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
29 rseq.h rseq-*.h
30 $(CC) $(CFLAGS) -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@
diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
new file mode 100644
index 000000000000..eb3f6db36d36
--- /dev/null
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -0,0 +1,312 @@
1// SPDX-License-Identifier: LGPL-2.1
2#define _GNU_SOURCE
3#include <assert.h>
4#include <pthread.h>
5#include <sched.h>
6#include <stdint.h>
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10#include <stddef.h>
11
12#include "rseq.h"
13
14#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
15
16struct percpu_lock_entry {
17 intptr_t v;
18} __attribute__((aligned(128)));
19
20struct percpu_lock {
21 struct percpu_lock_entry c[CPU_SETSIZE];
22};
23
24struct test_data_entry {
25 intptr_t count;
26} __attribute__((aligned(128)));
27
28struct spinlock_test_data {
29 struct percpu_lock lock;
30 struct test_data_entry c[CPU_SETSIZE];
31 int reps;
32};
33
34struct percpu_list_node {
35 intptr_t data;
36 struct percpu_list_node *next;
37};
38
39struct percpu_list_entry {
40 struct percpu_list_node *head;
41} __attribute__((aligned(128)));
42
43struct percpu_list {
44 struct percpu_list_entry c[CPU_SETSIZE];
45};
46
47/* A simple percpu spinlock. Returns the cpu lock was acquired on. */
48int rseq_this_cpu_lock(struct percpu_lock *lock)
49{
50 int cpu;
51
52 for (;;) {
53 int ret;
54
55 cpu = rseq_cpu_start();
56 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
57 0, 1, cpu);
58 if (rseq_likely(!ret))
59 break;
60 /* Retry if comparison fails or rseq aborts. */
61 }
62 /*
63 * Acquire semantic when taking lock after control dependency.
64 * Matches rseq_smp_store_release().
65 */
66 rseq_smp_acquire__after_ctrl_dep();
67 return cpu;
68}
69
70void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
71{
72 assert(lock->c[cpu].v == 1);
73 /*
74 * Release lock, with release semantic. Matches
75 * rseq_smp_acquire__after_ctrl_dep().
76 */
77 rseq_smp_store_release(&lock->c[cpu].v, 0);
78}
79
80void *test_percpu_spinlock_thread(void *arg)
81{
82 struct spinlock_test_data *data = arg;
83 int i, cpu;
84
85 if (rseq_register_current_thread()) {
86 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
87 errno, strerror(errno));
88 abort();
89 }
90 for (i = 0; i < data->reps; i++) {
91 cpu = rseq_this_cpu_lock(&data->lock);
92 data->c[cpu].count++;
93 rseq_percpu_unlock(&data->lock, cpu);
94 }
95 if (rseq_unregister_current_thread()) {
96 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
97 errno, strerror(errno));
98 abort();
99 }
100
101 return NULL;
102}
103
104/*
105 * A simple test which implements a sharded counter using a per-cpu
106 * lock. Obviously real applications might prefer to simply use a
107 * per-cpu increment; however, this is reasonable for a test and the
108 * lock can be extended to synchronize more complicated operations.
109 */
110void test_percpu_spinlock(void)
111{
112 const int num_threads = 200;
113 int i;
114 uint64_t sum;
115 pthread_t test_threads[num_threads];
116 struct spinlock_test_data data;
117
118 memset(&data, 0, sizeof(data));
119 data.reps = 5000;
120
121 for (i = 0; i < num_threads; i++)
122 pthread_create(&test_threads[i], NULL,
123 test_percpu_spinlock_thread, &data);
124
125 for (i = 0; i < num_threads; i++)
126 pthread_join(test_threads[i], NULL);
127
128 sum = 0;
129 for (i = 0; i < CPU_SETSIZE; i++)
130 sum += data.c[i].count;
131
132 assert(sum == (uint64_t)data.reps * num_threads);
133}
134
135void this_cpu_list_push(struct percpu_list *list,
136 struct percpu_list_node *node,
137 int *_cpu)
138{
139 int cpu;
140
141 for (;;) {
142 intptr_t *targetptr, newval, expect;
143 int ret;
144
145 cpu = rseq_cpu_start();
146 /* Load list->c[cpu].head with single-copy atomicity. */
147 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
148 newval = (intptr_t)node;
149 targetptr = (intptr_t *)&list->c[cpu].head;
150 node->next = (struct percpu_list_node *)expect;
151 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
152 if (rseq_likely(!ret))
153 break;
154 /* Retry if comparison fails or rseq aborts. */
155 }
156 if (_cpu)
157 *_cpu = cpu;
158}
159
160/*
161 * Unlike a traditional lock-less linked list; the availability of a
162 * rseq primitive allows us to implement pop without concerns over
163 * ABA-type races.
164 */
165struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
166 int *_cpu)
167{
168 for (;;) {
169 struct percpu_list_node *head;
170 intptr_t *targetptr, expectnot, *load;
171 off_t offset;
172 int ret, cpu;
173
174 cpu = rseq_cpu_start();
175 targetptr = (intptr_t *)&list->c[cpu].head;
176 expectnot = (intptr_t)NULL;
177 offset = offsetof(struct percpu_list_node, next);
178 load = (intptr_t *)&head;
179 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
180 offset, load, cpu);
181 if (rseq_likely(!ret)) {
182 if (_cpu)
183 *_cpu = cpu;
184 return head;
185 }
186 if (ret > 0)
187 return NULL;
188 /* Retry if rseq aborts. */
189 }
190}
191
192/*
193 * __percpu_list_pop is not safe against concurrent accesses. Should
194 * only be used on lists that are not concurrently modified.
195 */
196struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
197{
198 struct percpu_list_node *node;
199
200 node = list->c[cpu].head;
201 if (!node)
202 return NULL;
203 list->c[cpu].head = node->next;
204 return node;
205}
206
207void *test_percpu_list_thread(void *arg)
208{
209 int i;
210 struct percpu_list *list = (struct percpu_list *)arg;
211
212 if (rseq_register_current_thread()) {
213 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
214 errno, strerror(errno));
215 abort();
216 }
217
218 for (i = 0; i < 100000; i++) {
219 struct percpu_list_node *node;
220
221 node = this_cpu_list_pop(list, NULL);
222 sched_yield(); /* encourage shuffling */
223 if (node)
224 this_cpu_list_push(list, node, NULL);
225 }
226
227 if (rseq_unregister_current_thread()) {
228 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
229 errno, strerror(errno));
230 abort();
231 }
232
233 return NULL;
234}
235
236/* Simultaneous modification to a per-cpu linked list from many threads. */
237void test_percpu_list(void)
238{
239 int i, j;
240 uint64_t sum = 0, expected_sum = 0;
241 struct percpu_list list;
242 pthread_t test_threads[200];
243 cpu_set_t allowed_cpus;
244
245 memset(&list, 0, sizeof(list));
246
247 /* Generate list entries for every usable cpu. */
248 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
249 for (i = 0; i < CPU_SETSIZE; i++) {
250 if (!CPU_ISSET(i, &allowed_cpus))
251 continue;
252 for (j = 1; j <= 100; j++) {
253 struct percpu_list_node *node;
254
255 expected_sum += j;
256
257 node = malloc(sizeof(*node));
258 assert(node);
259 node->data = j;
260 node->next = list.c[i].head;
261 list.c[i].head = node;
262 }
263 }
264
265 for (i = 0; i < 200; i++)
266 pthread_create(&test_threads[i], NULL,
267 test_percpu_list_thread, &list);
268
269 for (i = 0; i < 200; i++)
270 pthread_join(test_threads[i], NULL);
271
272 for (i = 0; i < CPU_SETSIZE; i++) {
273 struct percpu_list_node *node;
274
275 if (!CPU_ISSET(i, &allowed_cpus))
276 continue;
277
278 while ((node = __percpu_list_pop(&list, i))) {
279 sum += node->data;
280 free(node);
281 }
282 }
283
284 /*
285 * All entries should now be accounted for (unless some external
286 * actor is interfering with our allowed affinity while this
287 * test is running).
288 */
289 assert(sum == expected_sum);
290}
291
292int main(int argc, char **argv)
293{
294 if (rseq_register_current_thread()) {
295 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
296 errno, strerror(errno));
297 goto error;
298 }
299 printf("spinlock\n");
300 test_percpu_spinlock();
301 printf("percpu_list\n");
302 test_percpu_list();
303 if (rseq_unregister_current_thread()) {
304 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
305 errno, strerror(errno));
306 goto error;
307 }
308 return 0;
309
310error:
311 return -1;
312}
diff --git a/tools/testing/selftests/rseq/basic_test.c b/tools/testing/selftests/rseq/basic_test.c
new file mode 100644
index 000000000000..d8efbfb89193
--- /dev/null
+++ b/tools/testing/selftests/rseq/basic_test.c
@@ -0,0 +1,56 @@
1// SPDX-License-Identifier: LGPL-2.1
2/*
3 * Basic test coverage for critical regions and rseq_current_cpu().
4 */
5
6#define _GNU_SOURCE
7#include <assert.h>
8#include <sched.h>
9#include <signal.h>
10#include <stdio.h>
11#include <string.h>
12#include <sys/time.h>
13
14#include "rseq.h"
15
16void test_cpu_pointer(void)
17{
18 cpu_set_t affinity, test_affinity;
19 int i;
20
21 sched_getaffinity(0, sizeof(affinity), &affinity);
22 CPU_ZERO(&test_affinity);
23 for (i = 0; i < CPU_SETSIZE; i++) {
24 if (CPU_ISSET(i, &affinity)) {
25 CPU_SET(i, &test_affinity);
26 sched_setaffinity(0, sizeof(test_affinity),
27 &test_affinity);
28 assert(sched_getcpu() == i);
29 assert(rseq_current_cpu() == i);
30 assert(rseq_current_cpu_raw() == i);
31 assert(rseq_cpu_start() == i);
32 CPU_CLR(i, &test_affinity);
33 }
34 }
35 sched_setaffinity(0, sizeof(affinity), &affinity);
36}
37
38int main(int argc, char **argv)
39{
40 if (rseq_register_current_thread()) {
41 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
42 errno, strerror(errno));
43 goto init_thread_error;
44 }
45 printf("testing current cpu\n");
46 test_cpu_pointer();
47 if (rseq_unregister_current_thread()) {
48 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
49 errno, strerror(errno));
50 goto init_thread_error;
51 }
52 return 0;
53
54init_thread_error:
55 return -1;
56}
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
new file mode 100644
index 000000000000..615252331813
--- /dev/null
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -0,0 +1,1284 @@
1// SPDX-License-Identifier: LGPL-2.1
2#define _GNU_SOURCE
3#include <assert.h>
4#include <pthread.h>
5#include <sched.h>
6#include <stdint.h>
7#include <stdio.h>
8#include <stdlib.h>
9#include <string.h>
10#include <syscall.h>
11#include <unistd.h>
12#include <poll.h>
13#include <sys/types.h>
14#include <signal.h>
15#include <errno.h>
16#include <stddef.h>
17
18static inline pid_t gettid(void)
19{
20 return syscall(__NR_gettid);
21}
22
23#define NR_INJECT 9
24static int loop_cnt[NR_INJECT + 1];
25
26static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
27static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
28static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
29static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
30static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
31static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
32
33static int opt_modulo, verbose;
34
35static int opt_yield, opt_signal, opt_sleep,
36 opt_disable_rseq, opt_threads = 200,
37 opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
38
39#ifndef RSEQ_SKIP_FASTPATH
40static long long opt_reps = 5000;
41#else
42static long long opt_reps = 100;
43#endif
44
45static __thread __attribute__((tls_model("initial-exec")))
46unsigned int signals_delivered;
47
48#ifndef BENCHMARK
49
50static __thread __attribute__((tls_model("initial-exec"), unused))
51unsigned int yield_mod_cnt, nr_abort;
52
53#define printf_verbose(fmt, ...) \
54 do { \
55 if (verbose) \
56 printf(fmt, ## __VA_ARGS__); \
57 } while (0)
58
59#if defined(__x86_64__) || defined(__i386__)
60
61#define INJECT_ASM_REG "eax"
62
63#define RSEQ_INJECT_CLOBBER \
64 , INJECT_ASM_REG
65
66#ifdef __i386__
67
68#define RSEQ_INJECT_ASM(n) \
69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
71 "jz 333f\n\t" \
72 "222:\n\t" \
73 "dec %%" INJECT_ASM_REG "\n\t" \
74 "jnz 222b\n\t" \
75 "333:\n\t"
76
77#elif defined(__x86_64__)
78
79#define RSEQ_INJECT_ASM(n) \
80 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG "\n\t" \
81 "mov (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
82 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
83 "jz 333f\n\t" \
84 "222:\n\t" \
85 "dec %%" INJECT_ASM_REG "\n\t" \
86 "jnz 222b\n\t" \
87 "333:\n\t"
88
89#else
90#error "Unsupported architecture"
91#endif
92
93#elif defined(__ARMEL__)
94
95#define RSEQ_INJECT_INPUT \
96 , [loop_cnt_1]"m"(loop_cnt[1]) \
97 , [loop_cnt_2]"m"(loop_cnt[2]) \
98 , [loop_cnt_3]"m"(loop_cnt[3]) \
99 , [loop_cnt_4]"m"(loop_cnt[4]) \
100 , [loop_cnt_5]"m"(loop_cnt[5]) \
101 , [loop_cnt_6]"m"(loop_cnt[6])
102
103#define INJECT_ASM_REG "r4"
104
105#define RSEQ_INJECT_CLOBBER \
106 , INJECT_ASM_REG
107
108#define RSEQ_INJECT_ASM(n) \
109 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
110 "cmp " INJECT_ASM_REG ", #0\n\t" \
111 "beq 333f\n\t" \
112 "222:\n\t" \
113 "subs " INJECT_ASM_REG ", #1\n\t" \
114 "bne 222b\n\t" \
115 "333:\n\t"
116
117#elif __PPC__
118
119#define RSEQ_INJECT_INPUT \
120 , [loop_cnt_1]"m"(loop_cnt[1]) \
121 , [loop_cnt_2]"m"(loop_cnt[2]) \
122 , [loop_cnt_3]"m"(loop_cnt[3]) \
123 , [loop_cnt_4]"m"(loop_cnt[4]) \
124 , [loop_cnt_5]"m"(loop_cnt[5]) \
125 , [loop_cnt_6]"m"(loop_cnt[6])
126
127#define INJECT_ASM_REG "r18"
128
129#define RSEQ_INJECT_CLOBBER \
130 , INJECT_ASM_REG
131
132#define RSEQ_INJECT_ASM(n) \
133 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
134 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
135 "beq 333f\n\t" \
136 "222:\n\t" \
137 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
138 "bne 222b\n\t" \
139 "333:\n\t"
140
141#elif defined(__mips__)
142
143#define RSEQ_INJECT_INPUT \
144 , [loop_cnt_1]"m"(loop_cnt[1]) \
145 , [loop_cnt_2]"m"(loop_cnt[2]) \
146 , [loop_cnt_3]"m"(loop_cnt[3]) \
147 , [loop_cnt_4]"m"(loop_cnt[4]) \
148 , [loop_cnt_5]"m"(loop_cnt[5]) \
149 , [loop_cnt_6]"m"(loop_cnt[6])
150
151#define INJECT_ASM_REG "$5"
152
153#define RSEQ_INJECT_CLOBBER \
154 , INJECT_ASM_REG
155
156#define RSEQ_INJECT_ASM(n) \
157 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
158 "beqz " INJECT_ASM_REG ", 333f\n\t" \
159 "222:\n\t" \
160 "addiu " INJECT_ASM_REG ", -1\n\t" \
161 "bnez " INJECT_ASM_REG ", 222b\n\t" \
162 "333:\n\t"
163
164#else
165#error unsupported target
166#endif
167
168#define RSEQ_INJECT_FAILED \
169 nr_abort++;
170
171#define RSEQ_INJECT_C(n) \
172{ \
173 int loc_i, loc_nr_loops = loop_cnt[n]; \
174 \
175 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
176 rseq_barrier(); \
177 } \
178 if (loc_nr_loops == -1 && opt_modulo) { \
179 if (yield_mod_cnt == opt_modulo - 1) { \
180 if (opt_sleep > 0) \
181 poll(NULL, 0, opt_sleep); \
182 if (opt_yield) \
183 sched_yield(); \
184 if (opt_signal) \
185 raise(SIGUSR1); \
186 yield_mod_cnt = 0; \
187 } else { \
188 yield_mod_cnt++; \
189 } \
190 } \
191}
192
193#else
194
195#define printf_verbose(fmt, ...)
196
197#endif /* BENCHMARK */
198
199#include "rseq.h"
200
201struct percpu_lock_entry {
202 intptr_t v;
203} __attribute__((aligned(128)));
204
205struct percpu_lock {
206 struct percpu_lock_entry c[CPU_SETSIZE];
207};
208
209struct test_data_entry {
210 intptr_t count;
211} __attribute__((aligned(128)));
212
213struct spinlock_test_data {
214 struct percpu_lock lock;
215 struct test_data_entry c[CPU_SETSIZE];
216};
217
218struct spinlock_thread_test_data {
219 struct spinlock_test_data *data;
220 long long reps;
221 int reg;
222};
223
224struct inc_test_data {
225 struct test_data_entry c[CPU_SETSIZE];
226};
227
228struct inc_thread_test_data {
229 struct inc_test_data *data;
230 long long reps;
231 int reg;
232};
233
234struct percpu_list_node {
235 intptr_t data;
236 struct percpu_list_node *next;
237};
238
239struct percpu_list_entry {
240 struct percpu_list_node *head;
241} __attribute__((aligned(128)));
242
243struct percpu_list {
244 struct percpu_list_entry c[CPU_SETSIZE];
245};
246
247#define BUFFER_ITEM_PER_CPU 100
248
249struct percpu_buffer_node {
250 intptr_t data;
251};
252
253struct percpu_buffer_entry {
254 intptr_t offset;
255 intptr_t buflen;
256 struct percpu_buffer_node **array;
257} __attribute__((aligned(128)));
258
259struct percpu_buffer {
260 struct percpu_buffer_entry c[CPU_SETSIZE];
261};
262
263#define MEMCPY_BUFFER_ITEM_PER_CPU 100
264
265struct percpu_memcpy_buffer_node {
266 intptr_t data1;
267 uint64_t data2;
268};
269
270struct percpu_memcpy_buffer_entry {
271 intptr_t offset;
272 intptr_t buflen;
273 struct percpu_memcpy_buffer_node *array;
274} __attribute__((aligned(128)));
275
276struct percpu_memcpy_buffer {
277 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
278};
279
280/* A simple percpu spinlock. Grabs lock on current cpu. */
281static int rseq_this_cpu_lock(struct percpu_lock *lock)
282{
283 int cpu;
284
285 for (;;) {
286 int ret;
287
288 cpu = rseq_cpu_start();
289 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
290 0, 1, cpu);
291 if (rseq_likely(!ret))
292 break;
293 /* Retry if comparison fails or rseq aborts. */
294 }
295 /*
296 * Acquire semantic when taking lock after control dependency.
297 * Matches rseq_smp_store_release().
298 */
299 rseq_smp_acquire__after_ctrl_dep();
300 return cpu;
301}
302
303static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
304{
305 assert(lock->c[cpu].v == 1);
306 /*
307 * Release lock, with release semantic. Matches
308 * rseq_smp_acquire__after_ctrl_dep().
309 */
310 rseq_smp_store_release(&lock->c[cpu].v, 0);
311}
312
313void *test_percpu_spinlock_thread(void *arg)
314{
315 struct spinlock_thread_test_data *thread_data = arg;
316 struct spinlock_test_data *data = thread_data->data;
317 long long i, reps;
318
319 if (!opt_disable_rseq && thread_data->reg &&
320 rseq_register_current_thread())
321 abort();
322 reps = thread_data->reps;
323 for (i = 0; i < reps; i++) {
324 int cpu = rseq_cpu_start();
325
326 cpu = rseq_this_cpu_lock(&data->lock);
327 data->c[cpu].count++;
328 rseq_percpu_unlock(&data->lock, cpu);
329#ifndef BENCHMARK
330 if (i != 0 && !(i % (reps / 10)))
331 printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
332#endif
333 }
334 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
335 (int) gettid(), nr_abort, signals_delivered);
336 if (!opt_disable_rseq && thread_data->reg &&
337 rseq_unregister_current_thread())
338 abort();
339 return NULL;
340}
341
342/*
343 * A simple test which implements a sharded counter using a per-cpu
344 * lock. Obviously real applications might prefer to simply use a
345 * per-cpu increment; however, this is reasonable for a test and the
346 * lock can be extended to synchronize more complicated operations.
347 */
348void test_percpu_spinlock(void)
349{
350 const int num_threads = opt_threads;
351 int i, ret;
352 uint64_t sum;
353 pthread_t test_threads[num_threads];
354 struct spinlock_test_data data;
355 struct spinlock_thread_test_data thread_data[num_threads];
356
357 memset(&data, 0, sizeof(data));
358 for (i = 0; i < num_threads; i++) {
359 thread_data[i].reps = opt_reps;
360 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
361 thread_data[i].reg = 1;
362 else
363 thread_data[i].reg = 0;
364 thread_data[i].data = &data;
365 ret = pthread_create(&test_threads[i], NULL,
366 test_percpu_spinlock_thread,
367 &thread_data[i]);
368 if (ret) {
369 errno = ret;
370 perror("pthread_create");
371 abort();
372 }
373 }
374
375 for (i = 0; i < num_threads; i++) {
376 ret = pthread_join(test_threads[i], NULL);
377 if (ret) {
378 errno = ret;
379 perror("pthread_join");
380 abort();
381 }
382 }
383
384 sum = 0;
385 for (i = 0; i < CPU_SETSIZE; i++)
386 sum += data.c[i].count;
387
388 assert(sum == (uint64_t)opt_reps * num_threads);
389}
390
391void *test_percpu_inc_thread(void *arg)
392{
393 struct inc_thread_test_data *thread_data = arg;
394 struct inc_test_data *data = thread_data->data;
395 long long i, reps;
396
397 if (!opt_disable_rseq && thread_data->reg &&
398 rseq_register_current_thread())
399 abort();
400 reps = thread_data->reps;
401 for (i = 0; i < reps; i++) {
402 int ret;
403
404 do {
405 int cpu;
406
407 cpu = rseq_cpu_start();
408 ret = rseq_addv(&data->c[cpu].count, 1, cpu);
409 } while (rseq_unlikely(ret));
410#ifndef BENCHMARK
411 if (i != 0 && !(i % (reps / 10)))
412 printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
413#endif
414 }
415 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
416 (int) gettid(), nr_abort, signals_delivered);
417 if (!opt_disable_rseq && thread_data->reg &&
418 rseq_unregister_current_thread())
419 abort();
420 return NULL;
421}
422
423void test_percpu_inc(void)
424{
425 const int num_threads = opt_threads;
426 int i, ret;
427 uint64_t sum;
428 pthread_t test_threads[num_threads];
429 struct inc_test_data data;
430 struct inc_thread_test_data thread_data[num_threads];
431
432 memset(&data, 0, sizeof(data));
433 for (i = 0; i < num_threads; i++) {
434 thread_data[i].reps = opt_reps;
435 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
436 thread_data[i].reg = 1;
437 else
438 thread_data[i].reg = 0;
439 thread_data[i].data = &data;
440 ret = pthread_create(&test_threads[i], NULL,
441 test_percpu_inc_thread,
442 &thread_data[i]);
443 if (ret) {
444 errno = ret;
445 perror("pthread_create");
446 abort();
447 }
448 }
449
450 for (i = 0; i < num_threads; i++) {
451 ret = pthread_join(test_threads[i], NULL);
452 if (ret) {
453 errno = ret;
454 perror("pthread_join");
455 abort();
456 }
457 }
458
459 sum = 0;
460 for (i = 0; i < CPU_SETSIZE; i++)
461 sum += data.c[i].count;
462
463 assert(sum == (uint64_t)opt_reps * num_threads);
464}
465
466void this_cpu_list_push(struct percpu_list *list,
467 struct percpu_list_node *node,
468 int *_cpu)
469{
470 int cpu;
471
472 for (;;) {
473 intptr_t *targetptr, newval, expect;
474 int ret;
475
476 cpu = rseq_cpu_start();
477 /* Load list->c[cpu].head with single-copy atomicity. */
478 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
479 newval = (intptr_t)node;
480 targetptr = (intptr_t *)&list->c[cpu].head;
481 node->next = (struct percpu_list_node *)expect;
482 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
483 if (rseq_likely(!ret))
484 break;
485 /* Retry if comparison fails or rseq aborts. */
486 }
487 if (_cpu)
488 *_cpu = cpu;
489}
490
491/*
492 * Unlike a traditional lock-less linked list; the availability of a
493 * rseq primitive allows us to implement pop without concerns over
494 * ABA-type races.
495 */
496struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
497 int *_cpu)
498{
499 struct percpu_list_node *node = NULL;
500 int cpu;
501
502 for (;;) {
503 struct percpu_list_node *head;
504 intptr_t *targetptr, expectnot, *load;
505 off_t offset;
506 int ret;
507
508 cpu = rseq_cpu_start();
509 targetptr = (intptr_t *)&list->c[cpu].head;
510 expectnot = (intptr_t)NULL;
511 offset = offsetof(struct percpu_list_node, next);
512 load = (intptr_t *)&head;
513 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
514 offset, load, cpu);
515 if (rseq_likely(!ret)) {
516 node = head;
517 break;
518 }
519 if (ret > 0)
520 break;
521 /* Retry if rseq aborts. */
522 }
523 if (_cpu)
524 *_cpu = cpu;
525 return node;
526}
527
528/*
529 * __percpu_list_pop is not safe against concurrent accesses. Should
530 * only be used on lists that are not concurrently modified.
531 */
532struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
533{
534 struct percpu_list_node *node;
535
536 node = list->c[cpu].head;
537 if (!node)
538 return NULL;
539 list->c[cpu].head = node->next;
540 return node;
541}
542
543void *test_percpu_list_thread(void *arg)
544{
545 long long i, reps;
546 struct percpu_list *list = (struct percpu_list *)arg;
547
548 if (!opt_disable_rseq && rseq_register_current_thread())
549 abort();
550
551 reps = opt_reps;
552 for (i = 0; i < reps; i++) {
553 struct percpu_list_node *node;
554
555 node = this_cpu_list_pop(list, NULL);
556 if (opt_yield)
557 sched_yield(); /* encourage shuffling */
558 if (node)
559 this_cpu_list_push(list, node, NULL);
560 }
561
562 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
563 (int) gettid(), nr_abort, signals_delivered);
564 if (!opt_disable_rseq && rseq_unregister_current_thread())
565 abort();
566
567 return NULL;
568}
569
570/* Simultaneous modification to a per-cpu linked list from many threads. */
571void test_percpu_list(void)
572{
573 const int num_threads = opt_threads;
574 int i, j, ret;
575 uint64_t sum = 0, expected_sum = 0;
576 struct percpu_list list;
577 pthread_t test_threads[num_threads];
578 cpu_set_t allowed_cpus;
579
580 memset(&list, 0, sizeof(list));
581
582 /* Generate list entries for every usable cpu. */
583 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
584 for (i = 0; i < CPU_SETSIZE; i++) {
585 if (!CPU_ISSET(i, &allowed_cpus))
586 continue;
587 for (j = 1; j <= 100; j++) {
588 struct percpu_list_node *node;
589
590 expected_sum += j;
591
592 node = malloc(sizeof(*node));
593 assert(node);
594 node->data = j;
595 node->next = list.c[i].head;
596 list.c[i].head = node;
597 }
598 }
599
600 for (i = 0; i < num_threads; i++) {
601 ret = pthread_create(&test_threads[i], NULL,
602 test_percpu_list_thread, &list);
603 if (ret) {
604 errno = ret;
605 perror("pthread_create");
606 abort();
607 }
608 }
609
610 for (i = 0; i < num_threads; i++) {
611 ret = pthread_join(test_threads[i], NULL);
612 if (ret) {
613 errno = ret;
614 perror("pthread_join");
615 abort();
616 }
617 }
618
619 for (i = 0; i < CPU_SETSIZE; i++) {
620 struct percpu_list_node *node;
621
622 if (!CPU_ISSET(i, &allowed_cpus))
623 continue;
624
625 while ((node = __percpu_list_pop(&list, i))) {
626 sum += node->data;
627 free(node);
628 }
629 }
630
631 /*
632 * All entries should now be accounted for (unless some external
633 * actor is interfering with our allowed affinity while this
634 * test is running).
635 */
636 assert(sum == expected_sum);
637}
638
639bool this_cpu_buffer_push(struct percpu_buffer *buffer,
640 struct percpu_buffer_node *node,
641 int *_cpu)
642{
643 bool result = false;
644 int cpu;
645
646 for (;;) {
647 intptr_t *targetptr_spec, newval_spec;
648 intptr_t *targetptr_final, newval_final;
649 intptr_t offset;
650 int ret;
651
652 cpu = rseq_cpu_start();
653 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
654 if (offset == buffer->c[cpu].buflen)
655 break;
656 newval_spec = (intptr_t)node;
657 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
658 newval_final = offset + 1;
659 targetptr_final = &buffer->c[cpu].offset;
660 if (opt_mb)
661 ret = rseq_cmpeqv_trystorev_storev_release(
662 targetptr_final, offset, targetptr_spec,
663 newval_spec, newval_final, cpu);
664 else
665 ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
666 offset, targetptr_spec, newval_spec,
667 newval_final, cpu);
668 if (rseq_likely(!ret)) {
669 result = true;
670 break;
671 }
672 /* Retry if comparison fails or rseq aborts. */
673 }
674 if (_cpu)
675 *_cpu = cpu;
676 return result;
677}
678
679struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
680 int *_cpu)
681{
682 struct percpu_buffer_node *head;
683 int cpu;
684
685 for (;;) {
686 intptr_t *targetptr, newval;
687 intptr_t offset;
688 int ret;
689
690 cpu = rseq_cpu_start();
691 /* Load offset with single-copy atomicity. */
692 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
693 if (offset == 0) {
694 head = NULL;
695 break;
696 }
697 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
698 newval = offset - 1;
699 targetptr = (intptr_t *)&buffer->c[cpu].offset;
700 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
701 (intptr_t *)&buffer->c[cpu].array[offset - 1],
702 (intptr_t)head, newval, cpu);
703 if (rseq_likely(!ret))
704 break;
705 /* Retry if comparison fails or rseq aborts. */
706 }
707 if (_cpu)
708 *_cpu = cpu;
709 return head;
710}
711
712/*
713 * __percpu_buffer_pop is not safe against concurrent accesses. Should
714 * only be used on buffers that are not concurrently modified.
715 */
716struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
717 int cpu)
718{
719 struct percpu_buffer_node *head;
720 intptr_t offset;
721
722 offset = buffer->c[cpu].offset;
723 if (offset == 0)
724 return NULL;
725 head = buffer->c[cpu].array[offset - 1];
726 buffer->c[cpu].offset = offset - 1;
727 return head;
728}
729
730void *test_percpu_buffer_thread(void *arg)
731{
732 long long i, reps;
733 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
734
735 if (!opt_disable_rseq && rseq_register_current_thread())
736 abort();
737
738 reps = opt_reps;
739 for (i = 0; i < reps; i++) {
740 struct percpu_buffer_node *node;
741
742 node = this_cpu_buffer_pop(buffer, NULL);
743 if (opt_yield)
744 sched_yield(); /* encourage shuffling */
745 if (node) {
746 if (!this_cpu_buffer_push(buffer, node, NULL)) {
747 /* Should increase buffer size. */
748 abort();
749 }
750 }
751 }
752
753 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
754 (int) gettid(), nr_abort, signals_delivered);
755 if (!opt_disable_rseq && rseq_unregister_current_thread())
756 abort();
757
758 return NULL;
759}
760
761/* Simultaneous modification to a per-cpu buffer from many threads. */
762void test_percpu_buffer(void)
763{
764 const int num_threads = opt_threads;
765 int i, j, ret;
766 uint64_t sum = 0, expected_sum = 0;
767 struct percpu_buffer buffer;
768 pthread_t test_threads[num_threads];
769 cpu_set_t allowed_cpus;
770
771 memset(&buffer, 0, sizeof(buffer));
772
773 /* Generate list entries for every usable cpu. */
774 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
775 for (i = 0; i < CPU_SETSIZE; i++) {
776 if (!CPU_ISSET(i, &allowed_cpus))
777 continue;
778 /* Worse-case is every item in same CPU. */
779 buffer.c[i].array =
780 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
781 BUFFER_ITEM_PER_CPU);
782 assert(buffer.c[i].array);
783 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
784 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
785 struct percpu_buffer_node *node;
786
787 expected_sum += j;
788
789 /*
790 * We could theoretically put the word-sized
791 * "data" directly in the buffer. However, we
792 * want to model objects that would not fit
793 * within a single word, so allocate an object
794 * for each node.
795 */
796 node = malloc(sizeof(*node));
797 assert(node);
798 node->data = j;
799 buffer.c[i].array[j - 1] = node;
800 buffer.c[i].offset++;
801 }
802 }
803
804 for (i = 0; i < num_threads; i++) {
805 ret = pthread_create(&test_threads[i], NULL,
806 test_percpu_buffer_thread, &buffer);
807 if (ret) {
808 errno = ret;
809 perror("pthread_create");
810 abort();
811 }
812 }
813
814 for (i = 0; i < num_threads; i++) {
815 ret = pthread_join(test_threads[i], NULL);
816 if (ret) {
817 errno = ret;
818 perror("pthread_join");
819 abort();
820 }
821 }
822
823 for (i = 0; i < CPU_SETSIZE; i++) {
824 struct percpu_buffer_node *node;
825
826 if (!CPU_ISSET(i, &allowed_cpus))
827 continue;
828
829 while ((node = __percpu_buffer_pop(&buffer, i))) {
830 sum += node->data;
831 free(node);
832 }
833 free(buffer.c[i].array);
834 }
835
836 /*
837 * All entries should now be accounted for (unless some external
838 * actor is interfering with our allowed affinity while this
839 * test is running).
840 */
841 assert(sum == expected_sum);
842}
843
844bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
845 struct percpu_memcpy_buffer_node item,
846 int *_cpu)
847{
848 bool result = false;
849 int cpu;
850
851 for (;;) {
852 intptr_t *targetptr_final, newval_final, offset;
853 char *destptr, *srcptr;
854 size_t copylen;
855 int ret;
856
857 cpu = rseq_cpu_start();
858 /* Load offset with single-copy atomicity. */
859 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
860 if (offset == buffer->c[cpu].buflen)
861 break;
862 destptr = (char *)&buffer->c[cpu].array[offset];
863 srcptr = (char *)&item;
864 /* copylen must be <= 4kB. */
865 copylen = sizeof(item);
866 newval_final = offset + 1;
867 targetptr_final = &buffer->c[cpu].offset;
868 if (opt_mb)
869 ret = rseq_cmpeqv_trymemcpy_storev_release(
870 targetptr_final, offset,
871 destptr, srcptr, copylen,
872 newval_final, cpu);
873 else
874 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
875 offset, destptr, srcptr, copylen,
876 newval_final, cpu);
877 if (rseq_likely(!ret)) {
878 result = true;
879 break;
880 }
881 /* Retry if comparison fails or rseq aborts. */
882 }
883 if (_cpu)
884 *_cpu = cpu;
885 return result;
886}
887
888bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
889 struct percpu_memcpy_buffer_node *item,
890 int *_cpu)
891{
892 bool result = false;
893 int cpu;
894
895 for (;;) {
896 intptr_t *targetptr_final, newval_final, offset;
897 char *destptr, *srcptr;
898 size_t copylen;
899 int ret;
900
901 cpu = rseq_cpu_start();
902 /* Load offset with single-copy atomicity. */
903 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
904 if (offset == 0)
905 break;
906 destptr = (char *)item;
907 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
908 /* copylen must be <= 4kB. */
909 copylen = sizeof(*item);
910 newval_final = offset - 1;
911 targetptr_final = &buffer->c[cpu].offset;
912 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
913 offset, destptr, srcptr, copylen,
914 newval_final, cpu);
915 if (rseq_likely(!ret)) {
916 result = true;
917 break;
918 }
919 /* Retry if comparison fails or rseq aborts. */
920 }
921 if (_cpu)
922 *_cpu = cpu;
923 return result;
924}
925
926/*
927 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
928 * only be used on buffers that are not concurrently modified.
929 */
930bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
931 struct percpu_memcpy_buffer_node *item,
932 int cpu)
933{
934 intptr_t offset;
935
936 offset = buffer->c[cpu].offset;
937 if (offset == 0)
938 return false;
939 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
940 buffer->c[cpu].offset = offset - 1;
941 return true;
942}
943
944void *test_percpu_memcpy_buffer_thread(void *arg)
945{
946 long long i, reps;
947 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
948
949 if (!opt_disable_rseq && rseq_register_current_thread())
950 abort();
951
952 reps = opt_reps;
953 for (i = 0; i < reps; i++) {
954 struct percpu_memcpy_buffer_node item;
955 bool result;
956
957 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
958 if (opt_yield)
959 sched_yield(); /* encourage shuffling */
960 if (result) {
961 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
962 /* Should increase buffer size. */
963 abort();
964 }
965 }
966 }
967
968 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
969 (int) gettid(), nr_abort, signals_delivered);
970 if (!opt_disable_rseq && rseq_unregister_current_thread())
971 abort();
972
973 return NULL;
974}
975
976/* Simultaneous modification to a per-cpu buffer from many threads. */
977void test_percpu_memcpy_buffer(void)
978{
979 const int num_threads = opt_threads;
980 int i, j, ret;
981 uint64_t sum = 0, expected_sum = 0;
982 struct percpu_memcpy_buffer buffer;
983 pthread_t test_threads[num_threads];
984 cpu_set_t allowed_cpus;
985
986 memset(&buffer, 0, sizeof(buffer));
987
988 /* Generate list entries for every usable cpu. */
989 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
990 for (i = 0; i < CPU_SETSIZE; i++) {
991 if (!CPU_ISSET(i, &allowed_cpus))
992 continue;
993 /* Worse-case is every item in same CPU. */
994 buffer.c[i].array =
995 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
996 MEMCPY_BUFFER_ITEM_PER_CPU);
997 assert(buffer.c[i].array);
998 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
999 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1000 expected_sum += 2 * j + 1;
1001
1002 /*
1003 * We could theoretically put the word-sized
1004 * "data" directly in the buffer. However, we
1005 * want to model objects that would not fit
1006 * within a single word, so allocate an object
1007 * for each node.
1008 */
1009 buffer.c[i].array[j - 1].data1 = j;
1010 buffer.c[i].array[j - 1].data2 = j + 1;
1011 buffer.c[i].offset++;
1012 }
1013 }
1014
1015 for (i = 0; i < num_threads; i++) {
1016 ret = pthread_create(&test_threads[i], NULL,
1017 test_percpu_memcpy_buffer_thread,
1018 &buffer);
1019 if (ret) {
1020 errno = ret;
1021 perror("pthread_create");
1022 abort();
1023 }
1024 }
1025
1026 for (i = 0; i < num_threads; i++) {
1027 ret = pthread_join(test_threads[i], NULL);
1028 if (ret) {
1029 errno = ret;
1030 perror("pthread_join");
1031 abort();
1032 }
1033 }
1034
1035 for (i = 0; i < CPU_SETSIZE; i++) {
1036 struct percpu_memcpy_buffer_node item;
1037
1038 if (!CPU_ISSET(i, &allowed_cpus))
1039 continue;
1040
1041 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1042 sum += item.data1;
1043 sum += item.data2;
1044 }
1045 free(buffer.c[i].array);
1046 }
1047
1048 /*
1049 * All entries should now be accounted for (unless some external
1050 * actor is interfering with our allowed affinity while this
1051 * test is running).
1052 */
1053 assert(sum == expected_sum);
1054}
1055
1056static void test_signal_interrupt_handler(int signo)
1057{
1058 signals_delivered++;
1059}
1060
1061static int set_signal_handler(void)
1062{
1063 int ret = 0;
1064 struct sigaction sa;
1065 sigset_t sigset;
1066
1067 ret = sigemptyset(&sigset);
1068 if (ret < 0) {
1069 perror("sigemptyset");
1070 return ret;
1071 }
1072
1073 sa.sa_handler = test_signal_interrupt_handler;
1074 sa.sa_mask = sigset;
1075 sa.sa_flags = 0;
1076 ret = sigaction(SIGUSR1, &sa, NULL);
1077 if (ret < 0) {
1078 perror("sigaction");
1079 return ret;
1080 }
1081
1082 printf_verbose("Signal handler set for SIGUSR1\n");
1083
1084 return ret;
1085}
1086
1087static void show_usage(int argc, char **argv)
1088{
1089 printf("Usage : %s <OPTIONS>\n",
1090 argv[0]);
1091 printf("OPTIONS:\n");
1092 printf(" [-1 loops] Number of loops for delay injection 1\n");
1093 printf(" [-2 loops] Number of loops for delay injection 2\n");
1094 printf(" [-3 loops] Number of loops for delay injection 3\n");
1095 printf(" [-4 loops] Number of loops for delay injection 4\n");
1096 printf(" [-5 loops] Number of loops for delay injection 5\n");
1097 printf(" [-6 loops] Number of loops for delay injection 6\n");
1098 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1099 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1100 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1101 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1102 printf(" [-y] Yield\n");
1103 printf(" [-k] Kill thread with signal\n");
1104 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1105 printf(" [-t N] Number of threads (default 200)\n");
1106 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1107 printf(" [-d] Disable rseq system call (no initialization)\n");
1108 printf(" [-D M] Disable rseq for each M threads\n");
1109 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
1110 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1111 printf(" [-v] Verbose output.\n");
1112 printf(" [-h] Show this help.\n");
1113 printf("\n");
1114}
1115
1116int main(int argc, char **argv)
1117{
1118 int i;
1119
1120 for (i = 1; i < argc; i++) {
1121 if (argv[i][0] != '-')
1122 continue;
1123 switch (argv[i][1]) {
1124 case '1':
1125 case '2':
1126 case '3':
1127 case '4':
1128 case '5':
1129 case '6':
1130 case '7':
1131 case '8':
1132 case '9':
1133 if (argc < i + 2) {
1134 show_usage(argc, argv);
1135 goto error;
1136 }
1137 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1138 i++;
1139 break;
1140 case 'm':
1141 if (argc < i + 2) {
1142 show_usage(argc, argv);
1143 goto error;
1144 }
1145 opt_modulo = atol(argv[i + 1]);
1146 if (opt_modulo < 0) {
1147 show_usage(argc, argv);
1148 goto error;
1149 }
1150 i++;
1151 break;
1152 case 's':
1153 if (argc < i + 2) {
1154 show_usage(argc, argv);
1155 goto error;
1156 }
1157 opt_sleep = atol(argv[i + 1]);
1158 if (opt_sleep < 0) {
1159 show_usage(argc, argv);
1160 goto error;
1161 }
1162 i++;
1163 break;
1164 case 'y':
1165 opt_yield = 1;
1166 break;
1167 case 'k':
1168 opt_signal = 1;
1169 break;
1170 case 'd':
1171 opt_disable_rseq = 1;
1172 break;
1173 case 'D':
1174 if (argc < i + 2) {
1175 show_usage(argc, argv);
1176 goto error;
1177 }
1178 opt_disable_mod = atol(argv[i + 1]);
1179 if (opt_disable_mod < 0) {
1180 show_usage(argc, argv);
1181 goto error;
1182 }
1183 i++;
1184 break;
1185 case 't':
1186 if (argc < i + 2) {
1187 show_usage(argc, argv);
1188 goto error;
1189 }
1190 opt_threads = atol(argv[i + 1]);
1191 if (opt_threads < 0) {
1192 show_usage(argc, argv);
1193 goto error;
1194 }
1195 i++;
1196 break;
1197 case 'r':
1198 if (argc < i + 2) {
1199 show_usage(argc, argv);
1200 goto error;
1201 }
1202 opt_reps = atoll(argv[i + 1]);
1203 if (opt_reps < 0) {
1204 show_usage(argc, argv);
1205 goto error;
1206 }
1207 i++;
1208 break;
1209 case 'h':
1210 show_usage(argc, argv);
1211 goto end;
1212 case 'T':
1213 if (argc < i + 2) {
1214 show_usage(argc, argv);
1215 goto error;
1216 }
1217 opt_test = *argv[i + 1];
1218 switch (opt_test) {
1219 case 's':
1220 case 'l':
1221 case 'i':
1222 case 'b':
1223 case 'm':
1224 break;
1225 default:
1226 show_usage(argc, argv);
1227 goto error;
1228 }
1229 i++;
1230 break;
1231 case 'v':
1232 verbose = 1;
1233 break;
1234 case 'M':
1235 opt_mb = 1;
1236 break;
1237 default:
1238 show_usage(argc, argv);
1239 goto error;
1240 }
1241 }
1242
1243 loop_cnt_1 = loop_cnt[1];
1244 loop_cnt_2 = loop_cnt[2];
1245 loop_cnt_3 = loop_cnt[3];
1246 loop_cnt_4 = loop_cnt[4];
1247 loop_cnt_5 = loop_cnt[5];
1248 loop_cnt_6 = loop_cnt[6];
1249
1250 if (set_signal_handler())
1251 goto error;
1252
1253 if (!opt_disable_rseq && rseq_register_current_thread())
1254 goto error;
1255 switch (opt_test) {
1256 case 's':
1257 printf_verbose("spinlock\n");
1258 test_percpu_spinlock();
1259 break;
1260 case 'l':
1261 printf_verbose("linked list\n");
1262 test_percpu_list();
1263 break;
1264 case 'b':
1265 printf_verbose("buffer\n");
1266 test_percpu_buffer();
1267 break;
1268 case 'm':
1269 printf_verbose("memcpy buffer\n");
1270 test_percpu_memcpy_buffer();
1271 break;
1272 case 'i':
1273 printf_verbose("counter increment\n");
1274 test_percpu_inc();
1275 break;
1276 }
1277 if (!opt_disable_rseq && rseq_unregister_current_thread())
1278 abort();
1279end:
1280 return 0;
1281
1282error:
1283 return -1;
1284}
diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
new file mode 100644
index 000000000000..3cea19877227
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -0,0 +1,716 @@
1/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
2/*
3 * rseq-arm.h
4 *
5 * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
6 */
7
8#define RSEQ_SIG 0x53053053
9
10#define rseq_smp_mb() __asm__ __volatile__ ("dmb" ::: "memory", "cc")
11#define rseq_smp_rmb() __asm__ __volatile__ ("dmb" ::: "memory", "cc")
12#define rseq_smp_wmb() __asm__ __volatile__ ("dmb" ::: "memory", "cc")
13
14#define rseq_smp_load_acquire(p) \
15__extension__ ({ \
16 __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
17 rseq_smp_mb(); \
18 ____p1; \
19})
20
21#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
22
23#define rseq_smp_store_release(p, v) \
24do { \
25 rseq_smp_mb(); \
26 RSEQ_WRITE_ONCE(*p, v); \
27} while (0)
28
29#ifdef RSEQ_SKIP_FASTPATH
30#include "rseq-skip.h"
31#else /* !RSEQ_SKIP_FASTPATH */
32
33#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \
34 post_commit_offset, abort_ip) \
35 ".pushsection __rseq_table, \"aw\"\n\t" \
36 ".balign 32\n\t" \
37 ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
38 ".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
39 ".popsection\n\t"
40
41#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
42 __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
43 (post_commit_ip - start_ip), abort_ip)
44
45#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
46 RSEQ_INJECT_ASM(1) \
47 "adr r0, " __rseq_str(cs_label) "\n\t" \
48 "str r0, %[" __rseq_str(rseq_cs) "]\n\t" \
49 __rseq_str(label) ":\n\t"
50
51#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
52 RSEQ_INJECT_ASM(2) \
53 "ldr r0, %[" __rseq_str(current_cpu_id) "]\n\t" \
54 "cmp %[" __rseq_str(cpu_id) "], r0\n\t" \
55 "bne " __rseq_str(label) "\n\t"
56
57#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
58 abort_label, version, flags, \
59 start_ip, post_commit_offset, abort_ip) \
60 ".balign 32\n\t" \
61 __rseq_str(table_label) ":\n\t" \
62 ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
63 ".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
64 ".word " __rseq_str(RSEQ_SIG) "\n\t" \
65 __rseq_str(label) ":\n\t" \
66 teardown \
67 "b %l[" __rseq_str(abort_label) "]\n\t"
68
69#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \
70 start_ip, post_commit_ip, abort_ip) \
71 __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
72 abort_label, 0x0, 0x0, start_ip, \
73 (post_commit_ip - start_ip), abort_ip)
74
75#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
76 __rseq_str(label) ":\n\t" \
77 teardown \
78 "b %l[" __rseq_str(cmpfail_label) "]\n\t"
79
80#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("")
81
82static inline __attribute__((always_inline))
83int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
84{
85 RSEQ_INJECT_C(9)
86
87 rseq_workaround_gcc_asm_size_guess();
88 __asm__ __volatile__ goto (
89 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
90 /* Start rseq by storing table entry pointer into rseq_cs. */
91 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
92 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
93 RSEQ_INJECT_ASM(3)
94 "ldr r0, %[v]\n\t"
95 "cmp %[expect], r0\n\t"
96 "bne %l[cmpfail]\n\t"
97 RSEQ_INJECT_ASM(4)
98#ifdef RSEQ_COMPARE_TWICE
99 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
100 "ldr r0, %[v]\n\t"
101 "cmp %[expect], r0\n\t"
102 "bne %l[error2]\n\t"
103#endif
104 /* final store */
105 "str %[newv], %[v]\n\t"
106 "2:\n\t"
107 RSEQ_INJECT_ASM(5)
108 "b 5f\n\t"
109 RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
110 "5:\n\t"
111 : /* gcc asm goto does not allow outputs */
112 : [cpu_id] "r" (cpu),
113 [current_cpu_id] "m" (__rseq_abi.cpu_id),
114 [rseq_cs] "m" (__rseq_abi.rseq_cs),
115 [v] "m" (*v),
116 [expect] "r" (expect),
117 [newv] "r" (newv)
118 RSEQ_INJECT_INPUT
119 : "r0", "memory", "cc"
120 RSEQ_INJECT_CLOBBER
121 : abort, cmpfail
122#ifdef RSEQ_COMPARE_TWICE
123 , error1, error2
124#endif
125 );
126 rseq_workaround_gcc_asm_size_guess();
127 return 0;
128abort:
129 rseq_workaround_gcc_asm_size_guess();
130 RSEQ_INJECT_FAILED
131 return -1;
132cmpfail:
133 rseq_workaround_gcc_asm_size_guess();
134 return 1;
135#ifdef RSEQ_COMPARE_TWICE
136error1:
137 rseq_bug("cpu_id comparison failed");
138error2:
139 rseq_bug("expected value comparison failed");
140#endif
141}
142
143static inline __attribute__((always_inline))
144int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
145 off_t voffp, intptr_t *load, int cpu)
146{
147 RSEQ_INJECT_C(9)
148
149 rseq_workaround_gcc_asm_size_guess();
150 __asm__ __volatile__ goto (
151 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
152 /* Start rseq by storing table entry pointer into rseq_cs. */
153 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
154 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
155 RSEQ_INJECT_ASM(3)
156 "ldr r0, %[v]\n\t"
157 "cmp %[expectnot], r0\n\t"
158 "beq %l[cmpfail]\n\t"
159 RSEQ_INJECT_ASM(4)
160#ifdef RSEQ_COMPARE_TWICE
161 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
162 "ldr r0, %[v]\n\t"
163 "cmp %[expectnot], r0\n\t"
164 "beq %l[error2]\n\t"
165#endif
166 "str r0, %[load]\n\t"
167 "add r0, %[voffp]\n\t"
168 "ldr r0, [r0]\n\t"
169 /* final store */
170 "str r0, %[v]\n\t"
171 "2:\n\t"
172 RSEQ_INJECT_ASM(5)
173 "b 5f\n\t"
174 RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
175 "5:\n\t"
176 : /* gcc asm goto does not allow outputs */
177 : [cpu_id] "r" (cpu),
178 [current_cpu_id] "m" (__rseq_abi.cpu_id),
179 [rseq_cs] "m" (__rseq_abi.rseq_cs),
180 /* final store input */
181 [v] "m" (*v),
182 [expectnot] "r" (expectnot),
183 [voffp] "Ir" (voffp),
184 [load] "m" (*load)
185 RSEQ_INJECT_INPUT
186 : "r0", "memory", "cc"
187 RSEQ_INJECT_CLOBBER
188 : abort, cmpfail
189#ifdef RSEQ_COMPARE_TWICE
190 , error1, error2
191#endif
192 );
193 rseq_workaround_gcc_asm_size_guess();
194 return 0;
195abort:
196 rseq_workaround_gcc_asm_size_guess();
197 RSEQ_INJECT_FAILED
198 return -1;
199cmpfail:
200 rseq_workaround_gcc_asm_size_guess();
201 return 1;
202#ifdef RSEQ_COMPARE_TWICE
203error1:
204 rseq_bug("cpu_id comparison failed");
205error2:
206 rseq_bug("expected value comparison failed");
207#endif
208}
209
210static inline __attribute__((always_inline))
211int rseq_addv(intptr_t *v, intptr_t count, int cpu)
212{
213 RSEQ_INJECT_C(9)
214
215 rseq_workaround_gcc_asm_size_guess();
216 __asm__ __volatile__ goto (
217 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
218 /* Start rseq by storing table entry pointer into rseq_cs. */
219 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
220 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
221 RSEQ_INJECT_ASM(3)
222#ifdef RSEQ_COMPARE_TWICE
223 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
224#endif
225 "ldr r0, %[v]\n\t"
226 "add r0, %[count]\n\t"
227 /* final store */
228 "str r0, %[v]\n\t"
229 "2:\n\t"
230 RSEQ_INJECT_ASM(4)
231 "b 5f\n\t"
232 RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
233 "5:\n\t"
234 : /* gcc asm goto does not allow outputs */
235 : [cpu_id] "r" (cpu),
236 [current_cpu_id] "m" (__rseq_abi.cpu_id),
237 [rseq_cs] "m" (__rseq_abi.rseq_cs),
238 [v] "m" (*v),
239 [count] "Ir" (count)
240 RSEQ_INJECT_INPUT
241 : "r0", "memory", "cc"
242 RSEQ_INJECT_CLOBBER
243 : abort
244#ifdef RSEQ_COMPARE_TWICE
245 , error1
246#endif
247 );
248 rseq_workaround_gcc_asm_size_guess();
249 return 0;
250abort:
251 rseq_workaround_gcc_asm_size_guess();
252 RSEQ_INJECT_FAILED
253 return -1;
254#ifdef RSEQ_COMPARE_TWICE
255error1:
256 rseq_bug("cpu_id comparison failed");
257#endif
258}
259
260static inline __attribute__((always_inline))
261int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
262 intptr_t *v2, intptr_t newv2,
263 intptr_t newv, int cpu)
264{
265 RSEQ_INJECT_C(9)
266
267 rseq_workaround_gcc_asm_size_guess();
268 __asm__ __volatile__ goto (
269 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
270 /* Start rseq by storing table entry pointer into rseq_cs. */
271 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
272 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
273 RSEQ_INJECT_ASM(3)
274 "ldr r0, %[v]\n\t"
275 "cmp %[expect], r0\n\t"
276 "bne %l[cmpfail]\n\t"
277 RSEQ_INJECT_ASM(4)
278#ifdef RSEQ_COMPARE_TWICE
279 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
280 "ldr r0, %[v]\n\t"
281 "cmp %[expect], r0\n\t"
282 "bne %l[error2]\n\t"
283#endif
284 /* try store */
285 "str %[newv2], %[v2]\n\t"
286 RSEQ_INJECT_ASM(5)
287 /* final store */
288 "str %[newv], %[v]\n\t"
289 "2:\n\t"
290 RSEQ_INJECT_ASM(6)
291 "b 5f\n\t"
292 RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
293 "5:\n\t"
294 : /* gcc asm goto does not allow outputs */
295 : [cpu_id] "r" (cpu),
296 [current_cpu_id] "m" (__rseq_abi.cpu_id),
297 [rseq_cs] "m" (__rseq_abi.rseq_cs),
298 /* try store input */
299 [v2] "m" (*v2),
300 [newv2] "r" (newv2),
301 /* final store input */
302 [v] "m" (*v),
303 [expect] "r" (expect),
304 [newv] "r" (newv)
305 RSEQ_INJECT_INPUT
306 : "r0", "memory", "cc"
307 RSEQ_INJECT_CLOBBER
308 : abort, cmpfail
309#ifdef RSEQ_COMPARE_TWICE
310 , error1, error2
311#endif
312 );
313 rseq_workaround_gcc_asm_size_guess();
314 return 0;
315abort:
316 rseq_workaround_gcc_asm_size_guess();
317 RSEQ_INJECT_FAILED
318 return -1;
319cmpfail:
320 rseq_workaround_gcc_asm_size_guess();
321 return 1;
322#ifdef RSEQ_COMPARE_TWICE
323error1:
324 rseq_bug("cpu_id comparison failed");
325error2:
326 rseq_bug("expected value comparison failed");
327#endif
328}
329
330static inline __attribute__((always_inline))
331int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
332 intptr_t *v2, intptr_t newv2,
333 intptr_t newv, int cpu)
334{
335 RSEQ_INJECT_C(9)
336
337 rseq_workaround_gcc_asm_size_guess();
338 __asm__ __volatile__ goto (
339 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
340 /* Start rseq by storing table entry pointer into rseq_cs. */
341 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
342 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
343 RSEQ_INJECT_ASM(3)
344 "ldr r0, %[v]\n\t"
345 "cmp %[expect], r0\n\t"
346 "bne %l[cmpfail]\n\t"
347 RSEQ_INJECT_ASM(4)
348#ifdef RSEQ_COMPARE_TWICE
349 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
350 "ldr r0, %[v]\n\t"
351 "cmp %[expect], r0\n\t"
352 "bne %l[error2]\n\t"
353#endif
354 /* try store */
355 "str %[newv2], %[v2]\n\t"
356 RSEQ_INJECT_ASM(5)
357 "dmb\n\t" /* full mb provides store-release */
358 /* final store */
359 "str %[newv], %[v]\n\t"
360 "2:\n\t"
361 RSEQ_INJECT_ASM(6)
362 "b 5f\n\t"
363 RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
364 "5:\n\t"
365 : /* gcc asm goto does not allow outputs */
366 : [cpu_id] "r" (cpu),
367 [current_cpu_id] "m" (__rseq_abi.cpu_id),
368 [rseq_cs] "m" (__rseq_abi.rseq_cs),
369 /* try store input */
370 [v2] "m" (*v2),
371 [newv2] "r" (newv2),
372 /* final store input */
373 [v] "m" (*v),
374 [expect] "r" (expect),
375 [newv] "r" (newv)
376 RSEQ_INJECT_INPUT
377 : "r0", "memory", "cc"
378 RSEQ_INJECT_CLOBBER
379 : abort, cmpfail
380#ifdef RSEQ_COMPARE_TWICE
381 , error1, error2
382#endif
383 );
384 rseq_workaround_gcc_asm_size_guess();
385 return 0;
386abort:
387 rseq_workaround_gcc_asm_size_guess();
388 RSEQ_INJECT_FAILED
389 return -1;
390cmpfail:
391 rseq_workaround_gcc_asm_size_guess();
392 return 1;
393#ifdef RSEQ_COMPARE_TWICE
394error1:
395 rseq_bug("cpu_id comparison failed");
396error2:
397 rseq_bug("expected value comparison failed");
398#endif
399}
400
401static inline __attribute__((always_inline))
402int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
403 intptr_t *v2, intptr_t expect2,
404 intptr_t newv, int cpu)
405{
406 RSEQ_INJECT_C(9)
407
408 rseq_workaround_gcc_asm_size_guess();
409 __asm__ __volatile__ goto (
410 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
411 /* Start rseq by storing table entry pointer into rseq_cs. */
412 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
413 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
414 RSEQ_INJECT_ASM(3)
415 "ldr r0, %[v]\n\t"
416 "cmp %[expect], r0\n\t"
417 "bne %l[cmpfail]\n\t"
418 RSEQ_INJECT_ASM(4)
419 "ldr r0, %[v2]\n\t"
420 "cmp %[expect2], r0\n\t"
421 "bne %l[cmpfail]\n\t"
422 RSEQ_INJECT_ASM(5)
423#ifdef RSEQ_COMPARE_TWICE
424 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
425 "ldr r0, %[v]\n\t"
426 "cmp %[expect], r0\n\t"
427 "bne %l[error2]\n\t"
428 "ldr r0, %[v2]\n\t"
429 "cmp %[expect2], r0\n\t"
430 "bne %l[error3]\n\t"
431#endif
432 /* final store */
433 "str %[newv], %[v]\n\t"
434 "2:\n\t"
435 RSEQ_INJECT_ASM(6)
436 "b 5f\n\t"
437 RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
438 "5:\n\t"
439 : /* gcc asm goto does not allow outputs */
440 : [cpu_id] "r" (cpu),
441 [current_cpu_id] "m" (__rseq_abi.cpu_id),
442 [rseq_cs] "m" (__rseq_abi.rseq_cs),
443 /* cmp2 input */
444 [v2] "m" (*v2),
445 [expect2] "r" (expect2),
446 /* final store input */
447 [v] "m" (*v),
448 [expect] "r" (expect),
449 [newv] "r" (newv)
450 RSEQ_INJECT_INPUT
451 : "r0", "memory", "cc"
452 RSEQ_INJECT_CLOBBER
453 : abort, cmpfail
454#ifdef RSEQ_COMPARE_TWICE
455 , error1, error2, error3
456#endif
457 );
458 rseq_workaround_gcc_asm_size_guess();
459 return 0;
460abort:
461 rseq_workaround_gcc_asm_size_guess();
462 RSEQ_INJECT_FAILED
463 return -1;
464cmpfail:
465 rseq_workaround_gcc_asm_size_guess();
466 return 1;
467#ifdef RSEQ_COMPARE_TWICE
468error1:
469 rseq_bug("cpu_id comparison failed");
470error2:
471 rseq_bug("1st expected value comparison failed");
472error3:
473 rseq_bug("2nd expected value comparison failed");
474#endif
475}
476
477static inline __attribute__((always_inline))
478int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
479 void *dst, void *src, size_t len,
480 intptr_t newv, int cpu)
481{
482 uint32_t rseq_scratch[3];
483
484 RSEQ_INJECT_C(9)
485
486 rseq_workaround_gcc_asm_size_guess();
487 __asm__ __volatile__ goto (
488 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
489 "str %[src], %[rseq_scratch0]\n\t"
490 "str %[dst], %[rseq_scratch1]\n\t"
491 "str %[len], %[rseq_scratch2]\n\t"
492 /* Start rseq by storing table entry pointer into rseq_cs. */
493 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
494 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
495 RSEQ_INJECT_ASM(3)
496 "ldr r0, %[v]\n\t"
497 "cmp %[expect], r0\n\t"
498 "bne 5f\n\t"
499 RSEQ_INJECT_ASM(4)
500#ifdef RSEQ_COMPARE_TWICE
501 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
502 "ldr r0, %[v]\n\t"
503 "cmp %[expect], r0\n\t"
504 "bne 7f\n\t"
505#endif
506 /* try memcpy */
507 "cmp %[len], #0\n\t" \
508 "beq 333f\n\t" \
509 "222:\n\t" \
510 "ldrb %%r0, [%[src]]\n\t" \
511 "strb %%r0, [%[dst]]\n\t" \
512 "adds %[src], #1\n\t" \
513 "adds %[dst], #1\n\t" \
514 "subs %[len], #1\n\t" \
515 "bne 222b\n\t" \
516 "333:\n\t" \
517 RSEQ_INJECT_ASM(5)
518 /* final store */
519 "str %[newv], %[v]\n\t"
520 "2:\n\t"
521 RSEQ_INJECT_ASM(6)
522 /* teardown */
523 "ldr %[len], %[rseq_scratch2]\n\t"
524 "ldr %[dst], %[rseq_scratch1]\n\t"
525 "ldr %[src], %[rseq_scratch0]\n\t"
526 "b 8f\n\t"
527 RSEQ_ASM_DEFINE_ABORT(3, 4,
528 /* teardown */
529 "ldr %[len], %[rseq_scratch2]\n\t"
530 "ldr %[dst], %[rseq_scratch1]\n\t"
531 "ldr %[src], %[rseq_scratch0]\n\t",
532 abort, 1b, 2b, 4f)
533 RSEQ_ASM_DEFINE_CMPFAIL(5,
534 /* teardown */
535 "ldr %[len], %[rseq_scratch2]\n\t"
536 "ldr %[dst], %[rseq_scratch1]\n\t"
537 "ldr %[src], %[rseq_scratch0]\n\t",
538 cmpfail)
539#ifdef RSEQ_COMPARE_TWICE
540 RSEQ_ASM_DEFINE_CMPFAIL(6,
541 /* teardown */
542 "ldr %[len], %[rseq_scratch2]\n\t"
543 "ldr %[dst], %[rseq_scratch1]\n\t"
544 "ldr %[src], %[rseq_scratch0]\n\t",
545 error1)
546 RSEQ_ASM_DEFINE_CMPFAIL(7,
547 /* teardown */
548 "ldr %[len], %[rseq_scratch2]\n\t"
549 "ldr %[dst], %[rseq_scratch1]\n\t"
550 "ldr %[src], %[rseq_scratch0]\n\t",
551 error2)
552#endif
553 "8:\n\t"
554 : /* gcc asm goto does not allow outputs */
555 : [cpu_id] "r" (cpu),
556 [current_cpu_id] "m" (__rseq_abi.cpu_id),
557 [rseq_cs] "m" (__rseq_abi.rseq_cs),
558 /* final store input */
559 [v] "m" (*v),
560 [expect] "r" (expect),
561 [newv] "r" (newv),
562 /* try memcpy input */
563 [dst] "r" (dst),
564 [src] "r" (src),
565 [len] "r" (len),
566 [rseq_scratch0] "m" (rseq_scratch[0]),
567 [rseq_scratch1] "m" (rseq_scratch[1]),
568 [rseq_scratch2] "m" (rseq_scratch[2])
569 RSEQ_INJECT_INPUT
570 : "r0", "memory", "cc"
571 RSEQ_INJECT_CLOBBER
572 : abort, cmpfail
573#ifdef RSEQ_COMPARE_TWICE
574 , error1, error2
575#endif
576 );
577 rseq_workaround_gcc_asm_size_guess();
578 return 0;
579abort:
580 rseq_workaround_gcc_asm_size_guess();
581 RSEQ_INJECT_FAILED
582 return -1;
583cmpfail:
584 rseq_workaround_gcc_asm_size_guess();
585 return 1;
586#ifdef RSEQ_COMPARE_TWICE
587error1:
588 rseq_workaround_gcc_asm_size_guess();
589 rseq_bug("cpu_id comparison failed");
590error2:
591 rseq_workaround_gcc_asm_size_guess();
592 rseq_bug("expected value comparison failed");
593#endif
594}
595
596static inline __attribute__((always_inline))
597int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
598 void *dst, void *src, size_t len,
599 intptr_t newv, int cpu)
600{
601 uint32_t rseq_scratch[3];
602
603 RSEQ_INJECT_C(9)
604
605 rseq_workaround_gcc_asm_size_guess();
606 __asm__ __volatile__ goto (
607 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
608 "str %[src], %[rseq_scratch0]\n\t"
609 "str %[dst], %[rseq_scratch1]\n\t"
610 "str %[len], %[rseq_scratch2]\n\t"
611 /* Start rseq by storing table entry pointer into rseq_cs. */
612 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
613 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
614 RSEQ_INJECT_ASM(3)
615 "ldr r0, %[v]\n\t"
616 "cmp %[expect], r0\n\t"
617 "bne 5f\n\t"
618 RSEQ_INJECT_ASM(4)
619#ifdef RSEQ_COMPARE_TWICE
620 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
621 "ldr r0, %[v]\n\t"
622 "cmp %[expect], r0\n\t"
623 "bne 7f\n\t"
624#endif
625 /* try memcpy */
626 "cmp %[len], #0\n\t" \
627 "beq 333f\n\t" \
628 "222:\n\t" \
629 "ldrb %%r0, [%[src]]\n\t" \
630 "strb %%r0, [%[dst]]\n\t" \
631 "adds %[src], #1\n\t" \
632 "adds %[dst], #1\n\t" \
633 "subs %[len], #1\n\t" \
634 "bne 222b\n\t" \
635 "333:\n\t" \
636 RSEQ_INJECT_ASM(5)
637 "dmb\n\t" /* full mb provides store-release */
638 /* final store */
639 "str %[newv], %[v]\n\t"
640 "2:\n\t"
641 RSEQ_INJECT_ASM(6)
642 /* teardown */
643 "ldr %[len], %[rseq_scratch2]\n\t"
644 "ldr %[dst], %[rseq_scratch1]\n\t"
645 "ldr %[src], %[rseq_scratch0]\n\t"
646 "b 8f\n\t"
647 RSEQ_ASM_DEFINE_ABORT(3, 4,
648 /* teardown */
649 "ldr %[len], %[rseq_scratch2]\n\t"
650 "ldr %[dst], %[rseq_scratch1]\n\t"
651 "ldr %[src], %[rseq_scratch0]\n\t",
652 abort, 1b, 2b, 4f)
653 RSEQ_ASM_DEFINE_CMPFAIL(5,
654 /* teardown */
655 "ldr %[len], %[rseq_scratch2]\n\t"
656 "ldr %[dst], %[rseq_scratch1]\n\t"
657 "ldr %[src], %[rseq_scratch0]\n\t",
658 cmpfail)
659#ifdef RSEQ_COMPARE_TWICE
660 RSEQ_ASM_DEFINE_CMPFAIL(6,
661 /* teardown */
662 "ldr %[len], %[rseq_scratch2]\n\t"
663 "ldr %[dst], %[rseq_scratch1]\n\t"
664 "ldr %[src], %[rseq_scratch0]\n\t",
665 error1)
666 RSEQ_ASM_DEFINE_CMPFAIL(7,
667 /* teardown */
668 "ldr %[len], %[rseq_scratch2]\n\t"
669 "ldr %[dst], %[rseq_scratch1]\n\t"
670 "ldr %[src], %[rseq_scratch0]\n\t",
671 error2)
672#endif
673 "8:\n\t"
674 : /* gcc asm goto does not allow outputs */
675 : [cpu_id] "r" (cpu),
676 [current_cpu_id] "m" (__rseq_abi.cpu_id),
677 [rseq_cs] "m" (__rseq_abi.rseq_cs),
678 /* final store input */
679 [v] "m" (*v),
680 [expect] "r" (expect),
681 [newv] "r" (newv),
682 /* try memcpy input */
683 [dst] "r" (dst),
684 [src] "r" (src),
685 [len] "r" (len),
686 [rseq_scratch0] "m" (rseq_scratch[0]),
687 [rseq_scratch1] "m" (rseq_scratch[1]),
688 [rseq_scratch2] "m" (rseq_scratch[2])
689 RSEQ_INJECT_INPUT
690 : "r0", "memory", "cc"
691 RSEQ_INJECT_CLOBBER
692 : abort, cmpfail
693#ifdef RSEQ_COMPARE_TWICE
694 , error1, error2
695#endif
696 );
697 rseq_workaround_gcc_asm_size_guess();
698 return 0;
699abort:
700 rseq_workaround_gcc_asm_size_guess();
701 RSEQ_INJECT_FAILED
702 return -1;
703cmpfail:
704 rseq_workaround_gcc_asm_size_guess();
705 return 1;
706#ifdef RSEQ_COMPARE_TWICE
707error1:
708 rseq_workaround_gcc_asm_size_guess();
709 rseq_bug("cpu_id comparison failed");
710error2:
711 rseq_workaround_gcc_asm_size_guess();
712 rseq_bug("expected value comparison failed");
713#endif
714}
715
716#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
new file mode 100644
index 000000000000..7f48ecf46994
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -0,0 +1,725 @@
1/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
2/*
3 * Author: Paul Burton <paul.burton@mips.com>
4 * (C) Copyright 2018 MIPS Tech LLC
5 *
6 * Based on rseq-arm.h:
7 * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
8 */
9
10#define RSEQ_SIG 0x53053053
11
12#define rseq_smp_mb() __asm__ __volatile__ ("sync" ::: "memory")
13#define rseq_smp_rmb() rseq_smp_mb()
14#define rseq_smp_wmb() rseq_smp_mb()
15
16#define rseq_smp_load_acquire(p) \
17__extension__ ({ \
18 __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
19 rseq_smp_mb(); \
20 ____p1; \
21})
22
23#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
24
25#define rseq_smp_store_release(p, v) \
26do { \
27 rseq_smp_mb(); \
28 RSEQ_WRITE_ONCE(*p, v); \
29} while (0)
30
31#ifdef RSEQ_SKIP_FASTPATH
32#include "rseq-skip.h"
33#else /* !RSEQ_SKIP_FASTPATH */
34
35#if _MIPS_SZLONG == 64
36# define LONG ".dword"
37# define LONG_LA "dla"
38# define LONG_L "ld"
39# define LONG_S "sd"
40# define LONG_ADDI "daddiu"
41# define U32_U64_PAD(x) x
42#elif _MIPS_SZLONG == 32
43# define LONG ".word"
44# define LONG_LA "la"
45# define LONG_L "lw"
46# define LONG_S "sw"
47# define LONG_ADDI "addiu"
48# ifdef __BIG_ENDIAN
49# define U32_U64_PAD(x) "0x0, " x
50# else
51# define U32_U64_PAD(x) x ", 0x0"
52# endif
53#else
54# error unsupported _MIPS_SZLONG
55#endif
56
57#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \
58 post_commit_offset, abort_ip) \
59 ".pushsection __rseq_table, \"aw\"\n\t" \
60 ".balign 32\n\t" \
61 ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
62 LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
63 LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
64 LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
65 ".popsection\n\t"
66
67#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
68 __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
69 (post_commit_ip - start_ip), abort_ip)
70
71#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
72 RSEQ_INJECT_ASM(1) \
73 LONG_LA " $4, " __rseq_str(cs_label) "\n\t" \
74 LONG_S " $4, %[" __rseq_str(rseq_cs) "]\n\t" \
75 __rseq_str(label) ":\n\t"
76
77#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
78 RSEQ_INJECT_ASM(2) \
79 "lw $4, %[" __rseq_str(current_cpu_id) "]\n\t" \
80 "bne $4, %[" __rseq_str(cpu_id) "], " __rseq_str(label) "\n\t"
81
82#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
83 abort_label, version, flags, \
84 start_ip, post_commit_offset, abort_ip) \
85 ".balign 32\n\t" \
86 __rseq_str(table_label) ":\n\t" \
87 ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
88 LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
89 LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
90 LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
91 ".word " __rseq_str(RSEQ_SIG) "\n\t" \
92 __rseq_str(label) ":\n\t" \
93 teardown \
94 "b %l[" __rseq_str(abort_label) "]\n\t"
95
96#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \
97 start_ip, post_commit_ip, abort_ip) \
98 __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
99 abort_label, 0x0, 0x0, start_ip, \
100 (post_commit_ip - start_ip), abort_ip)
101
102#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
103 __rseq_str(label) ":\n\t" \
104 teardown \
105 "b %l[" __rseq_str(cmpfail_label) "]\n\t"
106
107#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("")
108
109static inline __attribute__((always_inline))
110int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
111{
112 RSEQ_INJECT_C(9)
113
114 rseq_workaround_gcc_asm_size_guess();
115 __asm__ __volatile__ goto (
116 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
117 /* Start rseq by storing table entry pointer into rseq_cs. */
118 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
119 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
120 RSEQ_INJECT_ASM(3)
121 LONG_L " $4, %[v]\n\t"
122 "bne $4, %[expect], %l[cmpfail]\n\t"
123 RSEQ_INJECT_ASM(4)
124#ifdef RSEQ_COMPARE_TWICE
125 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
126 LONG_L " $4, %[v]\n\t"
127 "bne $4, %[expect], %l[error2]\n\t"
128#endif
129 /* final store */
130 LONG_S " %[newv], %[v]\n\t"
131 "2:\n\t"
132 RSEQ_INJECT_ASM(5)
133 "b 5f\n\t"
134 RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
135 "5:\n\t"
136 : /* gcc asm goto does not allow outputs */
137 : [cpu_id] "r" (cpu),
138 [current_cpu_id] "m" (__rseq_abi.cpu_id),
139 [rseq_cs] "m" (__rseq_abi.rseq_cs),
140 [v] "m" (*v),
141 [expect] "r" (expect),
142 [newv] "r" (newv)
143 RSEQ_INJECT_INPUT
144 : "$4", "memory"
145 RSEQ_INJECT_CLOBBER
146 : abort, cmpfail
147#ifdef RSEQ_COMPARE_TWICE
148 , error1, error2
149#endif
150 );
151 rseq_workaround_gcc_asm_size_guess();
152 return 0;
153abort:
154 rseq_workaround_gcc_asm_size_guess();
155 RSEQ_INJECT_FAILED
156 return -1;
157cmpfail:
158 rseq_workaround_gcc_asm_size_guess();
159 return 1;
160#ifdef RSEQ_COMPARE_TWICE
161error1:
162 rseq_bug("cpu_id comparison failed");
163error2:
164 rseq_bug("expected value comparison failed");
165#endif
166}
167
168static inline __attribute__((always_inline))
169int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
170 off_t voffp, intptr_t *load, int cpu)
171{
172 RSEQ_INJECT_C(9)
173
174 rseq_workaround_gcc_asm_size_guess();
175 __asm__ __volatile__ goto (
176 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
177 /* Start rseq by storing table entry pointer into rseq_cs. */
178 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
179 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
180 RSEQ_INJECT_ASM(3)
181 LONG_L " $4, %[v]\n\t"
182 "beq $4, %[expectnot], %l[cmpfail]\n\t"
183 RSEQ_INJECT_ASM(4)
184#ifdef RSEQ_COMPARE_TWICE
185 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
186 LONG_L " $4, %[v]\n\t"
187 "beq $4, %[expectnot], %l[error2]\n\t"
188#endif
189 LONG_S " $4, %[load]\n\t"
190 LONG_ADDI " $4, %[voffp]\n\t"
191 LONG_L " $4, 0($4)\n\t"
192 /* final store */
193 LONG_S " $4, %[v]\n\t"
194 "2:\n\t"
195 RSEQ_INJECT_ASM(5)
196 "b 5f\n\t"
197 RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
198 "5:\n\t"
199 : /* gcc asm goto does not allow outputs */
200 : [cpu_id] "r" (cpu),
201 [current_cpu_id] "m" (__rseq_abi.cpu_id),
202 [rseq_cs] "m" (__rseq_abi.rseq_cs),
203 /* final store input */
204 [v] "m" (*v),
205 [expectnot] "r" (expectnot),
206 [voffp] "Ir" (voffp),
207 [load] "m" (*load)
208 RSEQ_INJECT_INPUT
209 : "$4", "memory"
210 RSEQ_INJECT_CLOBBER
211 : abort, cmpfail
212#ifdef RSEQ_COMPARE_TWICE
213 , error1, error2
214#endif
215 );
216 rseq_workaround_gcc_asm_size_guess();
217 return 0;
218abort:
219 rseq_workaround_gcc_asm_size_guess();
220 RSEQ_INJECT_FAILED
221 return -1;
222cmpfail:
223 rseq_workaround_gcc_asm_size_guess();
224 return 1;
225#ifdef RSEQ_COMPARE_TWICE
226error1:
227 rseq_bug("cpu_id comparison failed");
228error2:
229 rseq_bug("expected value comparison failed");
230#endif
231}
232
233static inline __attribute__((always_inline))
234int rseq_addv(intptr_t *v, intptr_t count, int cpu)
235{
236 RSEQ_INJECT_C(9)
237
238 rseq_workaround_gcc_asm_size_guess();
239 __asm__ __volatile__ goto (
240 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
241 /* Start rseq by storing table entry pointer into rseq_cs. */
242 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
243 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
244 RSEQ_INJECT_ASM(3)
245#ifdef RSEQ_COMPARE_TWICE
246 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
247#endif
248 LONG_L " $4, %[v]\n\t"
249 LONG_ADDI " $4, %[count]\n\t"
250 /* final store */
251 LONG_S " $4, %[v]\n\t"
252 "2:\n\t"
253 RSEQ_INJECT_ASM(4)
254 "b 5f\n\t"
255 RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
256 "5:\n\t"
257 : /* gcc asm goto does not allow outputs */
258 : [cpu_id] "r" (cpu),
259 [current_cpu_id] "m" (__rseq_abi.cpu_id),
260 [rseq_cs] "m" (__rseq_abi.rseq_cs),
261 [v] "m" (*v),
262 [count] "Ir" (count)
263 RSEQ_INJECT_INPUT
264 : "$4", "memory"
265 RSEQ_INJECT_CLOBBER
266 : abort
267#ifdef RSEQ_COMPARE_TWICE
268 , error1
269#endif
270 );
271 rseq_workaround_gcc_asm_size_guess();
272 return 0;
273abort:
274 rseq_workaround_gcc_asm_size_guess();
275 RSEQ_INJECT_FAILED
276 return -1;
277#ifdef RSEQ_COMPARE_TWICE
278error1:
279 rseq_bug("cpu_id comparison failed");
280#endif
281}
282
283static inline __attribute__((always_inline))
284int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
285 intptr_t *v2, intptr_t newv2,
286 intptr_t newv, int cpu)
287{
288 RSEQ_INJECT_C(9)
289
290 rseq_workaround_gcc_asm_size_guess();
291 __asm__ __volatile__ goto (
292 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
293 /* Start rseq by storing table entry pointer into rseq_cs. */
294 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
295 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
296 RSEQ_INJECT_ASM(3)
297 LONG_L " $4, %[v]\n\t"
298 "bne $4, %[expect], %l[cmpfail]\n\t"
299 RSEQ_INJECT_ASM(4)
300#ifdef RSEQ_COMPARE_TWICE
301 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
302 LONG_L " $4, %[v]\n\t"
303 "bne $4, %[expect], %l[error2]\n\t"
304#endif
305 /* try store */
306 LONG_S " %[newv2], %[v2]\n\t"
307 RSEQ_INJECT_ASM(5)
308 /* final store */
309 LONG_S " %[newv], %[v]\n\t"
310 "2:\n\t"
311 RSEQ_INJECT_ASM(6)
312 "b 5f\n\t"
313 RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
314 "5:\n\t"
315 : /* gcc asm goto does not allow outputs */
316 : [cpu_id] "r" (cpu),
317 [current_cpu_id] "m" (__rseq_abi.cpu_id),
318 [rseq_cs] "m" (__rseq_abi.rseq_cs),
319 /* try store input */
320 [v2] "m" (*v2),
321 [newv2] "r" (newv2),
322 /* final store input */
323 [v] "m" (*v),
324 [expect] "r" (expect),
325 [newv] "r" (newv)
326 RSEQ_INJECT_INPUT
327 : "$4", "memory"
328 RSEQ_INJECT_CLOBBER
329 : abort, cmpfail
330#ifdef RSEQ_COMPARE_TWICE
331 , error1, error2
332#endif
333 );
334 rseq_workaround_gcc_asm_size_guess();
335 return 0;
336abort:
337 rseq_workaround_gcc_asm_size_guess();
338 RSEQ_INJECT_FAILED
339 return -1;
340cmpfail:
341 rseq_workaround_gcc_asm_size_guess();
342 return 1;
343#ifdef RSEQ_COMPARE_TWICE
344error1:
345 rseq_bug("cpu_id comparison failed");
346error2:
347 rseq_bug("expected value comparison failed");
348#endif
349}
350
351static inline __attribute__((always_inline))
352int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
353 intptr_t *v2, intptr_t newv2,
354 intptr_t newv, int cpu)
355{
356 RSEQ_INJECT_C(9)
357
358 rseq_workaround_gcc_asm_size_guess();
359 __asm__ __volatile__ goto (
360 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
361 /* Start rseq by storing table entry pointer into rseq_cs. */
362 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
363 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
364 RSEQ_INJECT_ASM(3)
365 LONG_L " $4, %[v]\n\t"
366 "bne $4, %[expect], %l[cmpfail]\n\t"
367 RSEQ_INJECT_ASM(4)
368#ifdef RSEQ_COMPARE_TWICE
369 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
370 LONG_L " $4, %[v]\n\t"
371 "bne $4, %[expect], %l[error2]\n\t"
372#endif
373 /* try store */
374 LONG_S " %[newv2], %[v2]\n\t"
375 RSEQ_INJECT_ASM(5)
376 "sync\n\t" /* full sync provides store-release */
377 /* final store */
378 LONG_S " %[newv], %[v]\n\t"
379 "2:\n\t"
380 RSEQ_INJECT_ASM(6)
381 "b 5f\n\t"
382 RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
383 "5:\n\t"
384 : /* gcc asm goto does not allow outputs */
385 : [cpu_id] "r" (cpu),
386 [current_cpu_id] "m" (__rseq_abi.cpu_id),
387 [rseq_cs] "m" (__rseq_abi.rseq_cs),
388 /* try store input */
389 [v2] "m" (*v2),
390 [newv2] "r" (newv2),
391 /* final store input */
392 [v] "m" (*v),
393 [expect] "r" (expect),
394 [newv] "r" (newv)
395 RSEQ_INJECT_INPUT
396 : "$4", "memory"
397 RSEQ_INJECT_CLOBBER
398 : abort, cmpfail
399#ifdef RSEQ_COMPARE_TWICE
400 , error1, error2
401#endif
402 );
403 rseq_workaround_gcc_asm_size_guess();
404 return 0;
405abort:
406 rseq_workaround_gcc_asm_size_guess();
407 RSEQ_INJECT_FAILED
408 return -1;
409cmpfail:
410 rseq_workaround_gcc_asm_size_guess();
411 return 1;
412#ifdef RSEQ_COMPARE_TWICE
413error1:
414 rseq_bug("cpu_id comparison failed");
415error2:
416 rseq_bug("expected value comparison failed");
417#endif
418}
419
420static inline __attribute__((always_inline))
421int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
422 intptr_t *v2, intptr_t expect2,
423 intptr_t newv, int cpu)
424{
425 RSEQ_INJECT_C(9)
426
427 rseq_workaround_gcc_asm_size_guess();
428 __asm__ __volatile__ goto (
429 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
430 /* Start rseq by storing table entry pointer into rseq_cs. */
431 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
432 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
433 RSEQ_INJECT_ASM(3)
434 LONG_L " $4, %[v]\n\t"
435 "bne $4, %[expect], %l[cmpfail]\n\t"
436 RSEQ_INJECT_ASM(4)
437 LONG_L " $4, %[v2]\n\t"
438 "bne $4, %[expect2], %l[cmpfail]\n\t"
439 RSEQ_INJECT_ASM(5)
440#ifdef RSEQ_COMPARE_TWICE
441 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
442 LONG_L " $4, %[v]\n\t"
443 "bne $4, %[expect], %l[error2]\n\t"
444 LONG_L " $4, %[v2]\n\t"
445 "bne $4, %[expect2], %l[error3]\n\t"
446#endif
447 /* final store */
448 LONG_S " %[newv], %[v]\n\t"
449 "2:\n\t"
450 RSEQ_INJECT_ASM(6)
451 "b 5f\n\t"
452 RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
453 "5:\n\t"
454 : /* gcc asm goto does not allow outputs */
455 : [cpu_id] "r" (cpu),
456 [current_cpu_id] "m" (__rseq_abi.cpu_id),
457 [rseq_cs] "m" (__rseq_abi.rseq_cs),
458 /* cmp2 input */
459 [v2] "m" (*v2),
460 [expect2] "r" (expect2),
461 /* final store input */
462 [v] "m" (*v),
463 [expect] "r" (expect),
464 [newv] "r" (newv)
465 RSEQ_INJECT_INPUT
466 : "$4", "memory"
467 RSEQ_INJECT_CLOBBER
468 : abort, cmpfail
469#ifdef RSEQ_COMPARE_TWICE
470 , error1, error2, error3
471#endif
472 );
473 rseq_workaround_gcc_asm_size_guess();
474 return 0;
475abort:
476 rseq_workaround_gcc_asm_size_guess();
477 RSEQ_INJECT_FAILED
478 return -1;
479cmpfail:
480 rseq_workaround_gcc_asm_size_guess();
481 return 1;
482#ifdef RSEQ_COMPARE_TWICE
483error1:
484 rseq_bug("cpu_id comparison failed");
485error2:
486 rseq_bug("1st expected value comparison failed");
487error3:
488 rseq_bug("2nd expected value comparison failed");
489#endif
490}
491
492static inline __attribute__((always_inline))
493int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
494 void *dst, void *src, size_t len,
495 intptr_t newv, int cpu)
496{
497 uintptr_t rseq_scratch[3];
498
499 RSEQ_INJECT_C(9)
500
501 rseq_workaround_gcc_asm_size_guess();
502 __asm__ __volatile__ goto (
503 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
504 LONG_S " %[src], %[rseq_scratch0]\n\t"
505 LONG_S " %[dst], %[rseq_scratch1]\n\t"
506 LONG_S " %[len], %[rseq_scratch2]\n\t"
507 /* Start rseq by storing table entry pointer into rseq_cs. */
508 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
509 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
510 RSEQ_INJECT_ASM(3)
511 LONG_L " $4, %[v]\n\t"
512 "bne $4, %[expect], 5f\n\t"
513 RSEQ_INJECT_ASM(4)
514#ifdef RSEQ_COMPARE_TWICE
515 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
516 LONG_L " $4, %[v]\n\t"
517 "bne $4, %[expect], 7f\n\t"
518#endif
519 /* try memcpy */
520 "beqz %[len], 333f\n\t" \
521 "222:\n\t" \
522 "lb $4, 0(%[src])\n\t" \
523 "sb $4, 0(%[dst])\n\t" \
524 LONG_ADDI " %[src], 1\n\t" \
525 LONG_ADDI " %[dst], 1\n\t" \
526 LONG_ADDI " %[len], -1\n\t" \
527 "bnez %[len], 222b\n\t" \
528 "333:\n\t" \
529 RSEQ_INJECT_ASM(5)
530 /* final store */
531 LONG_S " %[newv], %[v]\n\t"
532 "2:\n\t"
533 RSEQ_INJECT_ASM(6)
534 /* teardown */
535 LONG_L " %[len], %[rseq_scratch2]\n\t"
536 LONG_L " %[dst], %[rseq_scratch1]\n\t"
537 LONG_L " %[src], %[rseq_scratch0]\n\t"
538 "b 8f\n\t"
539 RSEQ_ASM_DEFINE_ABORT(3, 4,
540 /* teardown */
541 LONG_L " %[len], %[rseq_scratch2]\n\t"
542 LONG_L " %[dst], %[rseq_scratch1]\n\t"
543 LONG_L " %[src], %[rseq_scratch0]\n\t",
544 abort, 1b, 2b, 4f)
545 RSEQ_ASM_DEFINE_CMPFAIL(5,
546 /* teardown */
547 LONG_L " %[len], %[rseq_scratch2]\n\t"
548 LONG_L " %[dst], %[rseq_scratch1]\n\t"
549 LONG_L " %[src], %[rseq_scratch0]\n\t",
550 cmpfail)
551#ifdef RSEQ_COMPARE_TWICE
552 RSEQ_ASM_DEFINE_CMPFAIL(6,
553 /* teardown */
554 LONG_L " %[len], %[rseq_scratch2]\n\t"
555 LONG_L " %[dst], %[rseq_scratch1]\n\t"
556 LONG_L " %[src], %[rseq_scratch0]\n\t",
557 error1)
558 RSEQ_ASM_DEFINE_CMPFAIL(7,
559 /* teardown */
560 LONG_L " %[len], %[rseq_scratch2]\n\t"
561 LONG_L " %[dst], %[rseq_scratch1]\n\t"
562 LONG_L " %[src], %[rseq_scratch0]\n\t",
563 error2)
564#endif
565 "8:\n\t"
566 : /* gcc asm goto does not allow outputs */
567 : [cpu_id] "r" (cpu),
568 [current_cpu_id] "m" (__rseq_abi.cpu_id),
569 [rseq_cs] "m" (__rseq_abi.rseq_cs),
570 /* final store input */
571 [v] "m" (*v),
572 [expect] "r" (expect),
573 [newv] "r" (newv),
574 /* try memcpy input */
575 [dst] "r" (dst),
576 [src] "r" (src),
577 [len] "r" (len),
578 [rseq_scratch0] "m" (rseq_scratch[0]),
579 [rseq_scratch1] "m" (rseq_scratch[1]),
580 [rseq_scratch2] "m" (rseq_scratch[2])
581 RSEQ_INJECT_INPUT
582 : "$4", "memory"
583 RSEQ_INJECT_CLOBBER
584 : abort, cmpfail
585#ifdef RSEQ_COMPARE_TWICE
586 , error1, error2
587#endif
588 );
589 rseq_workaround_gcc_asm_size_guess();
590 return 0;
591abort:
592 rseq_workaround_gcc_asm_size_guess();
593 RSEQ_INJECT_FAILED
594 return -1;
595cmpfail:
596 rseq_workaround_gcc_asm_size_guess();
597 return 1;
598#ifdef RSEQ_COMPARE_TWICE
599error1:
600 rseq_workaround_gcc_asm_size_guess();
601 rseq_bug("cpu_id comparison failed");
602error2:
603 rseq_workaround_gcc_asm_size_guess();
604 rseq_bug("expected value comparison failed");
605#endif
606}
607
608static inline __attribute__((always_inline))
609int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
610 void *dst, void *src, size_t len,
611 intptr_t newv, int cpu)
612{
613 uintptr_t rseq_scratch[3];
614
615 RSEQ_INJECT_C(9)
616
617 rseq_workaround_gcc_asm_size_guess();
618 __asm__ __volatile__ goto (
619 RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
620 LONG_S " %[src], %[rseq_scratch0]\n\t"
621 LONG_S " %[dst], %[rseq_scratch1]\n\t"
622 LONG_S " %[len], %[rseq_scratch2]\n\t"
623 /* Start rseq by storing table entry pointer into rseq_cs. */
624 RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
625 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
626 RSEQ_INJECT_ASM(3)
627 LONG_L " $4, %[v]\n\t"
628 "bne $4, %[expect], 5f\n\t"
629 RSEQ_INJECT_ASM(4)
630#ifdef RSEQ_COMPARE_TWICE
631 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
632 LONG_L " $4, %[v]\n\t"
633 "bne $4, %[expect], 7f\n\t"
634#endif
635 /* try memcpy */
636 "beqz %[len], 333f\n\t" \
637 "222:\n\t" \
638 "lb $4, 0(%[src])\n\t" \
639 "sb $4, 0(%[dst])\n\t" \
640 LONG_ADDI " %[src], 1\n\t" \
641 LONG_ADDI " %[dst], 1\n\t" \
642 LONG_ADDI " %[len], -1\n\t" \
643 "bnez %[len], 222b\n\t" \
644 "333:\n\t" \
645 RSEQ_INJECT_ASM(5)
646 "sync\n\t" /* full sync provides store-release */
647 /* final store */
648 LONG_S " %[newv], %[v]\n\t"
649 "2:\n\t"
650 RSEQ_INJECT_ASM(6)
651 /* teardown */
652 LONG_L " %[len], %[rseq_scratch2]\n\t"
653 LONG_L " %[dst], %[rseq_scratch1]\n\t"
654 LONG_L " %[src], %[rseq_scratch0]\n\t"
655 "b 8f\n\t"
656 RSEQ_ASM_DEFINE_ABORT(3, 4,
657 /* teardown */
658 LONG_L " %[len], %[rseq_scratch2]\n\t"
659 LONG_L " %[dst], %[rseq_scratch1]\n\t"
660 LONG_L " %[src], %[rseq_scratch0]\n\t",
661 abort, 1b, 2b, 4f)
662 RSEQ_ASM_DEFINE_CMPFAIL(5,
663 /* teardown */
664 LONG_L " %[len], %[rseq_scratch2]\n\t"
665 LONG_L " %[dst], %[rseq_scratch1]\n\t"
666 LONG_L " %[src], %[rseq_scratch0]\n\t",
667 cmpfail)
668#ifdef RSEQ_COMPARE_TWICE
669 RSEQ_ASM_DEFINE_CMPFAIL(6,
670 /* teardown */
671 LONG_L " %[len], %[rseq_scratch2]\n\t"
672 LONG_L " %[dst], %[rseq_scratch1]\n\t"
673 LONG_L " %[src], %[rseq_scratch0]\n\t",
674 error1)
675 RSEQ_ASM_DEFINE_CMPFAIL(7,
676 /* teardown */
677 LONG_L " %[len], %[rseq_scratch2]\n\t"
678 LONG_L " %[dst], %[rseq_scratch1]\n\t"
679 LONG_L " %[src], %[rseq_scratch0]\n\t",
680 error2)
681#endif
682 "8:\n\t"
683 : /* gcc asm goto does not allow outputs */
684 : [cpu_id] "r" (cpu),
685 [current_cpu_id] "m" (__rseq_abi.cpu_id),
686 [rseq_cs] "m" (__rseq_abi.rseq_cs),
687 /* final store input */
688 [v] "m" (*v),
689 [expect] "r" (expect),
690 [newv] "r" (newv),
691 /* try memcpy input */
692 [dst] "r" (dst),
693 [src] "r" (src),
694 [len] "r" (len),
695 [rseq_scratch0] "m" (rseq_scratch[0]),
696 [rseq_scratch1] "m" (rseq_scratch[1]),
697 [rseq_scratch2] "m" (rseq_scratch[2])
698 RSEQ_INJECT_INPUT
699 : "$4", "memory"
700 RSEQ_INJECT_CLOBBER
701 : abort, cmpfail
702#ifdef RSEQ_COMPARE_TWICE
703 , error1, error2
704#endif
705 );
706 rseq_workaround_gcc_asm_size_guess();
707 return 0;
708abort:
709 rseq_workaround_gcc_asm_size_guess();
710 RSEQ_INJECT_FAILED
711 return -1;
712cmpfail:
713 rseq_workaround_gcc_asm_size_guess();
714 return 1;
715#ifdef RSEQ_COMPARE_TWICE
716error1:
717 rseq_workaround_gcc_asm_size_guess();
718 rseq_bug("cpu_id comparison failed");
719error2:
720 rseq_workaround_gcc_asm_size_guess();
721 rseq_bug("expected value comparison failed");
722#endif
723}
724
725#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
new file mode 100644
index 000000000000..52630c9f42be
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -0,0 +1,671 @@
1/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
2/*
3 * rseq-ppc.h
4 *
5 * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
6 * (C) Copyright 2016-2018 - Boqun Feng <boqun.feng@gmail.com>
7 */
8
9#define RSEQ_SIG 0x53053053
10
11#define rseq_smp_mb() __asm__ __volatile__ ("sync" ::: "memory", "cc")
12#define rseq_smp_lwsync() __asm__ __volatile__ ("lwsync" ::: "memory", "cc")
13#define rseq_smp_rmb() rseq_smp_lwsync()
14#define rseq_smp_wmb() rseq_smp_lwsync()
15
16#define rseq_smp_load_acquire(p) \
17__extension__ ({ \
18 __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
19 rseq_smp_lwsync(); \
20 ____p1; \
21})
22
23#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_lwsync()
24
25#define rseq_smp_store_release(p, v) \
26do { \
27 rseq_smp_lwsync(); \
28 RSEQ_WRITE_ONCE(*p, v); \
29} while (0)
30
31#ifdef RSEQ_SKIP_FASTPATH
32#include "rseq-skip.h"
33#else /* !RSEQ_SKIP_FASTPATH */
34
35/*
36 * The __rseq_table section can be used by debuggers to better handle
37 * single-stepping through the restartable critical sections.
38 */
39
40#ifdef __PPC64__
41
42#define STORE_WORD "std "
43#define LOAD_WORD "ld "
44#define LOADX_WORD "ldx "
45#define CMP_WORD "cmpd "
46
47#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
48 start_ip, post_commit_offset, abort_ip) \
49 ".pushsection __rseq_table, \"aw\"\n\t" \
50 ".balign 32\n\t" \
51 __rseq_str(label) ":\n\t" \
52 ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
53 ".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
54 ".popsection\n\t"
55
56#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
57 RSEQ_INJECT_ASM(1) \
58 "lis %%r17, (" __rseq_str(cs_label) ")@highest\n\t" \
59 "ori %%r17, %%r17, (" __rseq_str(cs_label) ")@higher\n\t" \
60 "rldicr %%r17, %%r17, 32, 31\n\t" \
61 "oris %%r17, %%r17, (" __rseq_str(cs_label) ")@high\n\t" \
62 "ori %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \
63 "std %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
64 __rseq_str(label) ":\n\t"
65
66#else /* #ifdef __PPC64__ */
67
68#define STORE_WORD "stw "
69#define LOAD_WORD "lwz "
70#define LOADX_WORD "lwzx "
71#define CMP_WORD "cmpw "
72
73#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
74 start_ip, post_commit_offset, abort_ip) \
75 ".pushsection __rseq_table, \"aw\"\n\t" \
76 ".balign 32\n\t" \
77 __rseq_str(label) ":\n\t" \
78 ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
79 /* 32-bit only supported on BE */ \
80 ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
81 ".popsection\n\t"
82
83#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
84 RSEQ_INJECT_ASM(1) \
85 "lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t" \
86 "addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \
87 "stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
88 __rseq_str(label) ":\n\t"
89
90#endif /* #ifdef __PPC64__ */
91
92#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
93 __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
94 (post_commit_ip - start_ip), abort_ip)
95
96#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
97 RSEQ_INJECT_ASM(2) \
98 "lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \
99 "cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t" \
100 "bne- cr7, " __rseq_str(label) "\n\t"
101
102#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \
103 ".pushsection __rseq_failure, \"ax\"\n\t" \
104 ".long " __rseq_str(RSEQ_SIG) "\n\t" \
105 __rseq_str(label) ":\n\t" \
106 "b %l[" __rseq_str(abort_label) "]\n\t" \
107 ".popsection\n\t"
108
109/*
110 * RSEQ_ASM_OPs: asm operations for rseq
111 * RSEQ_ASM_OP_R_*: has hard-code registers in it
112 * RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7)
113 */
114#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
115 LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
116 CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \
117 "bne- cr7, " __rseq_str(label) "\n\t"
118
119#define RSEQ_ASM_OP_CMPNE(var, expectnot, label) \
120 LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
121 CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \
122 "beq- cr7, " __rseq_str(label) "\n\t"
123
124#define RSEQ_ASM_OP_STORE(value, var) \
125 STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t"
126
127/* Load @var to r17 */
128#define RSEQ_ASM_OP_R_LOAD(var) \
129 LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
130
131/* Store r17 to @var */
132#define RSEQ_ASM_OP_R_STORE(var) \
133 STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
134
135/* Add @count to r17 */
136#define RSEQ_ASM_OP_R_ADD(count) \
137 "add %%r17, %[" __rseq_str(count) "], %%r17\n\t"
138
139/* Load (r17 + voffp) to r17 */
140#define RSEQ_ASM_OP_R_LOADX(voffp) \
141 LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t"
142
143/* TODO: implement a faster memcpy. */
144#define RSEQ_ASM_OP_R_MEMCPY() \
145 "cmpdi %%r19, 0\n\t" \
146 "beq 333f\n\t" \
147 "addi %%r20, %%r20, -1\n\t" \
148 "addi %%r21, %%r21, -1\n\t" \
149 "222:\n\t" \
150 "lbzu %%r18, 1(%%r20)\n\t" \
151 "stbu %%r18, 1(%%r21)\n\t" \
152 "addi %%r19, %%r19, -1\n\t" \
153 "cmpdi %%r19, 0\n\t" \
154 "bne 222b\n\t" \
155 "333:\n\t" \
156
157#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \
158 STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
159 __rseq_str(post_commit_label) ":\n\t"
160
161#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
162 STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \
163 __rseq_str(post_commit_label) ":\n\t"
164
165static inline __attribute__((always_inline))
166int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
167{
168 RSEQ_INJECT_C(9)
169
170 __asm__ __volatile__ goto (
171 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
172 /* Start rseq by storing table entry pointer into rseq_cs. */
173 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
174 /* cmp cpuid */
175 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
176 RSEQ_INJECT_ASM(3)
177 /* cmp @v equal to @expect */
178 RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
179 RSEQ_INJECT_ASM(4)
180#ifdef RSEQ_COMPARE_TWICE
181 /* cmp cpuid */
182 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
183 /* cmp @v equal to @expect */
184 RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
185#endif
186 /* final store */
187 RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
188 RSEQ_INJECT_ASM(5)
189 RSEQ_ASM_DEFINE_ABORT(4, abort)
190 : /* gcc asm goto does not allow outputs */
191 : [cpu_id] "r" (cpu),
192 [current_cpu_id] "m" (__rseq_abi.cpu_id),
193 [rseq_cs] "m" (__rseq_abi.rseq_cs),
194 [v] "m" (*v),
195 [expect] "r" (expect),
196 [newv] "r" (newv)
197 RSEQ_INJECT_INPUT
198 : "memory", "cc", "r17"
199 RSEQ_INJECT_CLOBBER
200 : abort, cmpfail
201#ifdef RSEQ_COMPARE_TWICE
202 , error1, error2
203#endif
204 );
205 return 0;
206abort:
207 RSEQ_INJECT_FAILED
208 return -1;
209cmpfail:
210 return 1;
211#ifdef RSEQ_COMPARE_TWICE
212error1:
213 rseq_bug("cpu_id comparison failed");
214error2:
215 rseq_bug("expected value comparison failed");
216#endif
217}
218
219static inline __attribute__((always_inline))
220int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
221 off_t voffp, intptr_t *load, int cpu)
222{
223 RSEQ_INJECT_C(9)
224
225 __asm__ __volatile__ goto (
226 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
227 /* Start rseq by storing table entry pointer into rseq_cs. */
228 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
229 /* cmp cpuid */
230 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
231 RSEQ_INJECT_ASM(3)
232 /* cmp @v not equal to @expectnot */
233 RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail])
234 RSEQ_INJECT_ASM(4)
235#ifdef RSEQ_COMPARE_TWICE
236 /* cmp cpuid */
237 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
238 /* cmp @v not equal to @expectnot */
239 RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2])
240#endif
241 /* load the value of @v */
242 RSEQ_ASM_OP_R_LOAD(v)
243 /* store it in @load */
244 RSEQ_ASM_OP_R_STORE(load)
245 /* dereference voffp(v) */
246 RSEQ_ASM_OP_R_LOADX(voffp)
247 /* final store the value at voffp(v) */
248 RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
249 RSEQ_INJECT_ASM(5)
250 RSEQ_ASM_DEFINE_ABORT(4, abort)
251 : /* gcc asm goto does not allow outputs */
252 : [cpu_id] "r" (cpu),
253 [current_cpu_id] "m" (__rseq_abi.cpu_id),
254 [rseq_cs] "m" (__rseq_abi.rseq_cs),
255 /* final store input */
256 [v] "m" (*v),
257 [expectnot] "r" (expectnot),
258 [voffp] "b" (voffp),
259 [load] "m" (*load)
260 RSEQ_INJECT_INPUT
261 : "memory", "cc", "r17"
262 RSEQ_INJECT_CLOBBER
263 : abort, cmpfail
264#ifdef RSEQ_COMPARE_TWICE
265 , error1, error2
266#endif
267 );
268 return 0;
269abort:
270 RSEQ_INJECT_FAILED
271 return -1;
272cmpfail:
273 return 1;
274#ifdef RSEQ_COMPARE_TWICE
275error1:
276 rseq_bug("cpu_id comparison failed");
277error2:
278 rseq_bug("expected value comparison failed");
279#endif
280}
281
282static inline __attribute__((always_inline))
283int rseq_addv(intptr_t *v, intptr_t count, int cpu)
284{
285 RSEQ_INJECT_C(9)
286
287 __asm__ __volatile__ goto (
288 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
289 /* Start rseq by storing table entry pointer into rseq_cs. */
290 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
291 /* cmp cpuid */
292 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
293 RSEQ_INJECT_ASM(3)
294#ifdef RSEQ_COMPARE_TWICE
295 /* cmp cpuid */
296 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
297#endif
298 /* load the value of @v */
299 RSEQ_ASM_OP_R_LOAD(v)
300 /* add @count to it */
301 RSEQ_ASM_OP_R_ADD(count)
302 /* final store */
303 RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
304 RSEQ_INJECT_ASM(4)
305 RSEQ_ASM_DEFINE_ABORT(4, abort)
306 : /* gcc asm goto does not allow outputs */
307 : [cpu_id] "r" (cpu),
308 [current_cpu_id] "m" (__rseq_abi.cpu_id),
309 [rseq_cs] "m" (__rseq_abi.rseq_cs),
310 /* final store input */
311 [v] "m" (*v),
312 [count] "r" (count)
313 RSEQ_INJECT_INPUT
314 : "memory", "cc", "r17"
315 RSEQ_INJECT_CLOBBER
316 : abort
317#ifdef RSEQ_COMPARE_TWICE
318 , error1
319#endif
320 );
321 return 0;
322abort:
323 RSEQ_INJECT_FAILED
324 return -1;
325#ifdef RSEQ_COMPARE_TWICE
326error1:
327 rseq_bug("cpu_id comparison failed");
328#endif
329}
330
331static inline __attribute__((always_inline))
332int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
333 intptr_t *v2, intptr_t newv2,
334 intptr_t newv, int cpu)
335{
336 RSEQ_INJECT_C(9)
337
338 __asm__ __volatile__ goto (
339 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
340 /* Start rseq by storing table entry pointer into rseq_cs. */
341 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
342 /* cmp cpuid */
343 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
344 RSEQ_INJECT_ASM(3)
345 /* cmp @v equal to @expect */
346 RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
347 RSEQ_INJECT_ASM(4)
348#ifdef RSEQ_COMPARE_TWICE
349 /* cmp cpuid */
350 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
351 /* cmp @v equal to @expect */
352 RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
353#endif
354 /* try store */
355 RSEQ_ASM_OP_STORE(newv2, v2)
356 RSEQ_INJECT_ASM(5)
357 /* final store */
358 RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
359 RSEQ_INJECT_ASM(6)
360 RSEQ_ASM_DEFINE_ABORT(4, abort)
361 : /* gcc asm goto does not allow outputs */
362 : [cpu_id] "r" (cpu),
363 [current_cpu_id] "m" (__rseq_abi.cpu_id),
364 [rseq_cs] "m" (__rseq_abi.rseq_cs),
365 /* try store input */
366 [v2] "m" (*v2),
367 [newv2] "r" (newv2),
368 /* final store input */
369 [v] "m" (*v),
370 [expect] "r" (expect),
371 [newv] "r" (newv)
372 RSEQ_INJECT_INPUT
373 : "memory", "cc", "r17"
374 RSEQ_INJECT_CLOBBER
375 : abort, cmpfail
376#ifdef RSEQ_COMPARE_TWICE
377 , error1, error2
378#endif
379 );
380 return 0;
381abort:
382 RSEQ_INJECT_FAILED
383 return -1;
384cmpfail:
385 return 1;
386#ifdef RSEQ_COMPARE_TWICE
387error1:
388 rseq_bug("cpu_id comparison failed");
389error2:
390 rseq_bug("expected value comparison failed");
391#endif
392}
393
394static inline __attribute__((always_inline))
395int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
396 intptr_t *v2, intptr_t newv2,
397 intptr_t newv, int cpu)
398{
399 RSEQ_INJECT_C(9)
400
401 __asm__ __volatile__ goto (
402 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
403 /* Start rseq by storing table entry pointer into rseq_cs. */
404 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
405 /* cmp cpuid */
406 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
407 RSEQ_INJECT_ASM(3)
408 /* cmp @v equal to @expect */
409 RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
410 RSEQ_INJECT_ASM(4)
411#ifdef RSEQ_COMPARE_TWICE
412 /* cmp cpuid */
413 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
414 /* cmp @v equal to @expect */
415 RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
416#endif
417 /* try store */
418 RSEQ_ASM_OP_STORE(newv2, v2)
419 RSEQ_INJECT_ASM(5)
420 /* for 'release' */
421 "lwsync\n\t"
422 /* final store */
423 RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
424 RSEQ_INJECT_ASM(6)
425 RSEQ_ASM_DEFINE_ABORT(4, abort)
426 : /* gcc asm goto does not allow outputs */
427 : [cpu_id] "r" (cpu),
428 [current_cpu_id] "m" (__rseq_abi.cpu_id),
429 [rseq_cs] "m" (__rseq_abi.rseq_cs),
430 /* try store input */
431 [v2] "m" (*v2),
432 [newv2] "r" (newv2),
433 /* final store input */
434 [v] "m" (*v),
435 [expect] "r" (expect),
436 [newv] "r" (newv)
437 RSEQ_INJECT_INPUT
438 : "memory", "cc", "r17"
439 RSEQ_INJECT_CLOBBER
440 : abort, cmpfail
441#ifdef RSEQ_COMPARE_TWICE
442 , error1, error2
443#endif
444 );
445 return 0;
446abort:
447 RSEQ_INJECT_FAILED
448 return -1;
449cmpfail:
450 return 1;
451#ifdef RSEQ_COMPARE_TWICE
452error1:
453 rseq_bug("cpu_id comparison failed");
454error2:
455 rseq_bug("expected value comparison failed");
456#endif
457}
458
459static inline __attribute__((always_inline))
460int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
461 intptr_t *v2, intptr_t expect2,
462 intptr_t newv, int cpu)
463{
464 RSEQ_INJECT_C(9)
465
466 __asm__ __volatile__ goto (
467 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
468 /* Start rseq by storing table entry pointer into rseq_cs. */
469 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
470 /* cmp cpuid */
471 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
472 RSEQ_INJECT_ASM(3)
473 /* cmp @v equal to @expect */
474 RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
475 RSEQ_INJECT_ASM(4)
476 /* cmp @v2 equal to @expct2 */
477 RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail])
478 RSEQ_INJECT_ASM(5)
479#ifdef RSEQ_COMPARE_TWICE
480 /* cmp cpuid */
481 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
482 /* cmp @v equal to @expect */
483 RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
484 /* cmp @v2 equal to @expct2 */
485 RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3])
486#endif
487 /* final store */
488 RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
489 RSEQ_INJECT_ASM(6)
490 RSEQ_ASM_DEFINE_ABORT(4, abort)
491 : /* gcc asm goto does not allow outputs */
492 : [cpu_id] "r" (cpu),
493 [current_cpu_id] "m" (__rseq_abi.cpu_id),
494 [rseq_cs] "m" (__rseq_abi.rseq_cs),
495 /* cmp2 input */
496 [v2] "m" (*v2),
497 [expect2] "r" (expect2),
498 /* final store input */
499 [v] "m" (*v),
500 [expect] "r" (expect),
501 [newv] "r" (newv)
502 RSEQ_INJECT_INPUT
503 : "memory", "cc", "r17"
504 RSEQ_INJECT_CLOBBER
505 : abort, cmpfail
506#ifdef RSEQ_COMPARE_TWICE
507 , error1, error2, error3
508#endif
509 );
510 return 0;
511abort:
512 RSEQ_INJECT_FAILED
513 return -1;
514cmpfail:
515 return 1;
516#ifdef RSEQ_COMPARE_TWICE
517error1:
518 rseq_bug("cpu_id comparison failed");
519error2:
520 rseq_bug("1st expected value comparison failed");
521error3:
522 rseq_bug("2nd expected value comparison failed");
523#endif
524}
525
526static inline __attribute__((always_inline))
527int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
528 void *dst, void *src, size_t len,
529 intptr_t newv, int cpu)
530{
531 RSEQ_INJECT_C(9)
532
533 __asm__ __volatile__ goto (
534 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
535 /* setup for mempcy */
536 "mr %%r19, %[len]\n\t"
537 "mr %%r20, %[src]\n\t"
538 "mr %%r21, %[dst]\n\t"
539 /* Start rseq by storing table entry pointer into rseq_cs. */
540 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
541 /* cmp cpuid */
542 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
543 RSEQ_INJECT_ASM(3)
544 /* cmp @v equal to @expect */
545 RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
546 RSEQ_INJECT_ASM(4)
547#ifdef RSEQ_COMPARE_TWICE
548 /* cmp cpuid */
549 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
550 /* cmp @v equal to @expect */
551 RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
552#endif
553 /* try memcpy */
554 RSEQ_ASM_OP_R_MEMCPY()
555 RSEQ_INJECT_ASM(5)
556 /* final store */
557 RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
558 RSEQ_INJECT_ASM(6)
559 /* teardown */
560 RSEQ_ASM_DEFINE_ABORT(4, abort)
561 : /* gcc asm goto does not allow outputs */
562 : [cpu_id] "r" (cpu),
563 [current_cpu_id] "m" (__rseq_abi.cpu_id),
564 [rseq_cs] "m" (__rseq_abi.rseq_cs),
565 /* final store input */
566 [v] "m" (*v),
567 [expect] "r" (expect),
568 [newv] "r" (newv),
569 /* try memcpy input */
570 [dst] "r" (dst),
571 [src] "r" (src),
572 [len] "r" (len)
573 RSEQ_INJECT_INPUT
574 : "memory", "cc", "r17", "r18", "r19", "r20", "r21"
575 RSEQ_INJECT_CLOBBER
576 : abort, cmpfail
577#ifdef RSEQ_COMPARE_TWICE
578 , error1, error2
579#endif
580 );
581 return 0;
582abort:
583 RSEQ_INJECT_FAILED
584 return -1;
585cmpfail:
586 return 1;
587#ifdef RSEQ_COMPARE_TWICE
588error1:
589 rseq_bug("cpu_id comparison failed");
590error2:
591 rseq_bug("expected value comparison failed");
592#endif
593}
594
595static inline __attribute__((always_inline))
596int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
597 void *dst, void *src, size_t len,
598 intptr_t newv, int cpu)
599{
600 RSEQ_INJECT_C(9)
601
602 __asm__ __volatile__ goto (
603 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
604 /* setup for mempcy */
605 "mr %%r19, %[len]\n\t"
606 "mr %%r20, %[src]\n\t"
607 "mr %%r21, %[dst]\n\t"
608 /* Start rseq by storing table entry pointer into rseq_cs. */
609 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
610 /* cmp cpuid */
611 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
612 RSEQ_INJECT_ASM(3)
613 /* cmp @v equal to @expect */
614 RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
615 RSEQ_INJECT_ASM(4)
616#ifdef RSEQ_COMPARE_TWICE
617 /* cmp cpuid */
618 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
619 /* cmp @v equal to @expect */
620 RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
621#endif
622 /* try memcpy */
623 RSEQ_ASM_OP_R_MEMCPY()
624 RSEQ_INJECT_ASM(5)
625 /* for 'release' */
626 "lwsync\n\t"
627 /* final store */
628 RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
629 RSEQ_INJECT_ASM(6)
630 /* teardown */
631 RSEQ_ASM_DEFINE_ABORT(4, abort)
632 : /* gcc asm goto does not allow outputs */
633 : [cpu_id] "r" (cpu),
634 [current_cpu_id] "m" (__rseq_abi.cpu_id),
635 [rseq_cs] "m" (__rseq_abi.rseq_cs),
636 /* final store input */
637 [v] "m" (*v),
638 [expect] "r" (expect),
639 [newv] "r" (newv),
640 /* try memcpy input */
641 [dst] "r" (dst),
642 [src] "r" (src),
643 [len] "r" (len)
644 RSEQ_INJECT_INPUT
645 : "memory", "cc", "r17", "r18", "r19", "r20", "r21"
646 RSEQ_INJECT_CLOBBER
647 : abort, cmpfail
648#ifdef RSEQ_COMPARE_TWICE
649 , error1, error2
650#endif
651 );
652 return 0;
653abort:
654 RSEQ_INJECT_FAILED
655 return -1;
656cmpfail:
657 return 1;
658#ifdef RSEQ_COMPARE_TWICE
659error1:
660 rseq_bug("cpu_id comparison failed");
661error2:
662 rseq_bug("expected value comparison failed");
663#endif
664}
665
666#undef STORE_WORD
667#undef LOAD_WORD
668#undef LOADX_WORD
669#undef CMP_WORD
670
671#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h
new file mode 100644
index 000000000000..72750b5905a9
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-skip.h
@@ -0,0 +1,65 @@
1/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
2/*
3 * rseq-skip.h
4 *
5 * (C) Copyright 2017-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
6 */
7
8static inline __attribute__((always_inline))
9int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
10{
11 return -1;
12}
13
14static inline __attribute__((always_inline))
15int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
16 off_t voffp, intptr_t *load, int cpu)
17{
18 return -1;
19}
20
21static inline __attribute__((always_inline))
22int rseq_addv(intptr_t *v, intptr_t count, int cpu)
23{
24 return -1;
25}
26
27static inline __attribute__((always_inline))
28int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
29 intptr_t *v2, intptr_t newv2,
30 intptr_t newv, int cpu)
31{
32 return -1;
33}
34
35static inline __attribute__((always_inline))
36int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
37 intptr_t *v2, intptr_t newv2,
38 intptr_t newv, int cpu)
39{
40 return -1;
41}
42
43static inline __attribute__((always_inline))
44int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
45 intptr_t *v2, intptr_t expect2,
46 intptr_t newv, int cpu)
47{
48 return -1;
49}
50
51static inline __attribute__((always_inline))
52int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
53 void *dst, void *src, size_t len,
54 intptr_t newv, int cpu)
55{
56 return -1;
57}
58
59static inline __attribute__((always_inline))
60int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
61 void *dst, void *src, size_t len,
62 intptr_t newv, int cpu)
63{
64 return -1;
65}
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
new file mode 100644
index 000000000000..089410a314e9
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -0,0 +1,1132 @@
1/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
2/*
3 * rseq-x86.h
4 *
5 * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
6 */
7
8#include <stdint.h>
9
10#define RSEQ_SIG 0x53053053
11
12#ifdef __x86_64__
13
14#define rseq_smp_mb() \
15 __asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
16#define rseq_smp_rmb() rseq_barrier()
17#define rseq_smp_wmb() rseq_barrier()
18
19#define rseq_smp_load_acquire(p) \
20__extension__ ({ \
21 __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
22 rseq_barrier(); \
23 ____p1; \
24})
25
26#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
27
28#define rseq_smp_store_release(p, v) \
29do { \
30 rseq_barrier(); \
31 RSEQ_WRITE_ONCE(*p, v); \
32} while (0)
33
34#ifdef RSEQ_SKIP_FASTPATH
35#include "rseq-skip.h"
36#else /* !RSEQ_SKIP_FASTPATH */
37
38#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
39 start_ip, post_commit_offset, abort_ip) \
40 ".pushsection __rseq_table, \"aw\"\n\t" \
41 ".balign 32\n\t" \
42 __rseq_str(label) ":\n\t" \
43 ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
44 ".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
45 ".popsection\n\t"
46
47#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
48 __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
49 (post_commit_ip - start_ip), abort_ip)
50
51#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
52 RSEQ_INJECT_ASM(1) \
53 "leaq " __rseq_str(cs_label) "(%%rip), %%rax\n\t" \
54 "movq %%rax, %[" __rseq_str(rseq_cs) "]\n\t" \
55 __rseq_str(label) ":\n\t"
56
57#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
58 RSEQ_INJECT_ASM(2) \
59 "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
60 "jnz " __rseq_str(label) "\n\t"
61
62#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
63 ".pushsection __rseq_failure, \"ax\"\n\t" \
64 /* Disassembler-friendly signature: nopl <sig>(%rip). */\
65 ".byte 0x0f, 0x1f, 0x05\n\t" \
66 ".long " __rseq_str(RSEQ_SIG) "\n\t" \
67 __rseq_str(label) ":\n\t" \
68 teardown \
69 "jmp %l[" __rseq_str(abort_label) "]\n\t" \
70 ".popsection\n\t"
71
72#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
73 ".pushsection __rseq_failure, \"ax\"\n\t" \
74 __rseq_str(label) ":\n\t" \
75 teardown \
76 "jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \
77 ".popsection\n\t"
78
79static inline __attribute__((always_inline))
80int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
81{
82 RSEQ_INJECT_C(9)
83
84 __asm__ __volatile__ goto (
85 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
86 /* Start rseq by storing table entry pointer into rseq_cs. */
87 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
88 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
89 RSEQ_INJECT_ASM(3)
90 "cmpq %[v], %[expect]\n\t"
91 "jnz %l[cmpfail]\n\t"
92 RSEQ_INJECT_ASM(4)
93#ifdef RSEQ_COMPARE_TWICE
94 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
95 "cmpq %[v], %[expect]\n\t"
96 "jnz %l[error2]\n\t"
97#endif
98 /* final store */
99 "movq %[newv], %[v]\n\t"
100 "2:\n\t"
101 RSEQ_INJECT_ASM(5)
102 RSEQ_ASM_DEFINE_ABORT(4, "", abort)
103 : /* gcc asm goto does not allow outputs */
104 : [cpu_id] "r" (cpu),
105 [current_cpu_id] "m" (__rseq_abi.cpu_id),
106 [rseq_cs] "m" (__rseq_abi.rseq_cs),
107 [v] "m" (*v),
108 [expect] "r" (expect),
109 [newv] "r" (newv)
110 : "memory", "cc", "rax"
111 RSEQ_INJECT_CLOBBER
112 : abort, cmpfail
113#ifdef RSEQ_COMPARE_TWICE
114 , error1, error2
115#endif
116 );
117 return 0;
118abort:
119 RSEQ_INJECT_FAILED
120 return -1;
121cmpfail:
122 return 1;
123#ifdef RSEQ_COMPARE_TWICE
124error1:
125 rseq_bug("cpu_id comparison failed");
126error2:
127 rseq_bug("expected value comparison failed");
128#endif
129}
130
131/*
132 * Compare @v against @expectnot. When it does _not_ match, load @v
133 * into @load, and store the content of *@v + voffp into @v.
134 */
135static inline __attribute__((always_inline))
136int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
137 off_t voffp, intptr_t *load, int cpu)
138{
139 RSEQ_INJECT_C(9)
140
141 __asm__ __volatile__ goto (
142 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
143 /* Start rseq by storing table entry pointer into rseq_cs. */
144 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
145 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
146 RSEQ_INJECT_ASM(3)
147 "movq %[v], %%rbx\n\t"
148 "cmpq %%rbx, %[expectnot]\n\t"
149 "je %l[cmpfail]\n\t"
150 RSEQ_INJECT_ASM(4)
151#ifdef RSEQ_COMPARE_TWICE
152 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
153 "movq %[v], %%rbx\n\t"
154 "cmpq %%rbx, %[expectnot]\n\t"
155 "je %l[error2]\n\t"
156#endif
157 "movq %%rbx, %[load]\n\t"
158 "addq %[voffp], %%rbx\n\t"
159 "movq (%%rbx), %%rbx\n\t"
160 /* final store */
161 "movq %%rbx, %[v]\n\t"
162 "2:\n\t"
163 RSEQ_INJECT_ASM(5)
164 RSEQ_ASM_DEFINE_ABORT(4, "", abort)
165 : /* gcc asm goto does not allow outputs */
166 : [cpu_id] "r" (cpu),
167 [current_cpu_id] "m" (__rseq_abi.cpu_id),
168 [rseq_cs] "m" (__rseq_abi.rseq_cs),
169 /* final store input */
170 [v] "m" (*v),
171 [expectnot] "r" (expectnot),
172 [voffp] "er" (voffp),
173 [load] "m" (*load)
174 : "memory", "cc", "rax", "rbx"
175 RSEQ_INJECT_CLOBBER
176 : abort, cmpfail
177#ifdef RSEQ_COMPARE_TWICE
178 , error1, error2
179#endif
180 );
181 return 0;
182abort:
183 RSEQ_INJECT_FAILED
184 return -1;
185cmpfail:
186 return 1;
187#ifdef RSEQ_COMPARE_TWICE
188error1:
189 rseq_bug("cpu_id comparison failed");
190error2:
191 rseq_bug("expected value comparison failed");
192#endif
193}
194
195static inline __attribute__((always_inline))
196int rseq_addv(intptr_t *v, intptr_t count, int cpu)
197{
198 RSEQ_INJECT_C(9)
199
200 __asm__ __volatile__ goto (
201 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
202 /* Start rseq by storing table entry pointer into rseq_cs. */
203 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
204 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
205 RSEQ_INJECT_ASM(3)
206#ifdef RSEQ_COMPARE_TWICE
207 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
208#endif
209 /* final store */
210 "addq %[count], %[v]\n\t"
211 "2:\n\t"
212 RSEQ_INJECT_ASM(4)
213 RSEQ_ASM_DEFINE_ABORT(4, "", abort)
214 : /* gcc asm goto does not allow outputs */
215 : [cpu_id] "r" (cpu),
216 [current_cpu_id] "m" (__rseq_abi.cpu_id),
217 [rseq_cs] "m" (__rseq_abi.rseq_cs),
218 /* final store input */
219 [v] "m" (*v),
220 [count] "er" (count)
221 : "memory", "cc", "rax"
222 RSEQ_INJECT_CLOBBER
223 : abort
224#ifdef RSEQ_COMPARE_TWICE
225 , error1
226#endif
227 );
228 return 0;
229abort:
230 RSEQ_INJECT_FAILED
231 return -1;
232#ifdef RSEQ_COMPARE_TWICE
233error1:
234 rseq_bug("cpu_id comparison failed");
235#endif
236}
237
238static inline __attribute__((always_inline))
239int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
240 intptr_t *v2, intptr_t newv2,
241 intptr_t newv, int cpu)
242{
243 RSEQ_INJECT_C(9)
244
245 __asm__ __volatile__ goto (
246 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
247 /* Start rseq by storing table entry pointer into rseq_cs. */
248 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
249 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
250 RSEQ_INJECT_ASM(3)
251 "cmpq %[v], %[expect]\n\t"
252 "jnz %l[cmpfail]\n\t"
253 RSEQ_INJECT_ASM(4)
254#ifdef RSEQ_COMPARE_TWICE
255 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
256 "cmpq %[v], %[expect]\n\t"
257 "jnz %l[error2]\n\t"
258#endif
259 /* try store */
260 "movq %[newv2], %[v2]\n\t"
261 RSEQ_INJECT_ASM(5)
262 /* final store */
263 "movq %[newv], %[v]\n\t"
264 "2:\n\t"
265 RSEQ_INJECT_ASM(6)
266 RSEQ_ASM_DEFINE_ABORT(4, "", abort)
267 : /* gcc asm goto does not allow outputs */
268 : [cpu_id] "r" (cpu),
269 [current_cpu_id] "m" (__rseq_abi.cpu_id),
270 [rseq_cs] "m" (__rseq_abi.rseq_cs),
271 /* try store input */
272 [v2] "m" (*v2),
273 [newv2] "r" (newv2),
274 /* final store input */
275 [v] "m" (*v),
276 [expect] "r" (expect),
277 [newv] "r" (newv)
278 : "memory", "cc", "rax"
279 RSEQ_INJECT_CLOBBER
280 : abort, cmpfail
281#ifdef RSEQ_COMPARE_TWICE
282 , error1, error2
283#endif
284 );
285 return 0;
286abort:
287 RSEQ_INJECT_FAILED
288 return -1;
289cmpfail:
290 return 1;
291#ifdef RSEQ_COMPARE_TWICE
292error1:
293 rseq_bug("cpu_id comparison failed");
294error2:
295 rseq_bug("expected value comparison failed");
296#endif
297}
298
299/* x86-64 is TSO. */
300static inline __attribute__((always_inline))
301int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
302 intptr_t *v2, intptr_t newv2,
303 intptr_t newv, int cpu)
304{
305 return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu);
306}
307
308static inline __attribute__((always_inline))
309int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
310 intptr_t *v2, intptr_t expect2,
311 intptr_t newv, int cpu)
312{
313 RSEQ_INJECT_C(9)
314
315 __asm__ __volatile__ goto (
316 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
317 /* Start rseq by storing table entry pointer into rseq_cs. */
318 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
319 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
320 RSEQ_INJECT_ASM(3)
321 "cmpq %[v], %[expect]\n\t"
322 "jnz %l[cmpfail]\n\t"
323 RSEQ_INJECT_ASM(4)
324 "cmpq %[v2], %[expect2]\n\t"
325 "jnz %l[cmpfail]\n\t"
326 RSEQ_INJECT_ASM(5)
327#ifdef RSEQ_COMPARE_TWICE
328 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
329 "cmpq %[v], %[expect]\n\t"
330 "jnz %l[error2]\n\t"
331 "cmpq %[v2], %[expect2]\n\t"
332 "jnz %l[error3]\n\t"
333#endif
334 /* final store */
335 "movq %[newv], %[v]\n\t"
336 "2:\n\t"
337 RSEQ_INJECT_ASM(6)
338 RSEQ_ASM_DEFINE_ABORT(4, "", abort)
339 : /* gcc asm goto does not allow outputs */
340 : [cpu_id] "r" (cpu),
341 [current_cpu_id] "m" (__rseq_abi.cpu_id),
342 [rseq_cs] "m" (__rseq_abi.rseq_cs),
343 /* cmp2 input */
344 [v2] "m" (*v2),
345 [expect2] "r" (expect2),
346 /* final store input */
347 [v] "m" (*v),
348 [expect] "r" (expect),
349 [newv] "r" (newv)
350 : "memory", "cc", "rax"
351 RSEQ_INJECT_CLOBBER
352 : abort, cmpfail
353#ifdef RSEQ_COMPARE_TWICE
354 , error1, error2, error3
355#endif
356 );
357 return 0;
358abort:
359 RSEQ_INJECT_FAILED
360 return -1;
361cmpfail:
362 return 1;
363#ifdef RSEQ_COMPARE_TWICE
364error1:
365 rseq_bug("cpu_id comparison failed");
366error2:
367 rseq_bug("1st expected value comparison failed");
368error3:
369 rseq_bug("2nd expected value comparison failed");
370#endif
371}
372
373static inline __attribute__((always_inline))
374int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
375 void *dst, void *src, size_t len,
376 intptr_t newv, int cpu)
377{
378 uint64_t rseq_scratch[3];
379
380 RSEQ_INJECT_C(9)
381
382 __asm__ __volatile__ goto (
383 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
384 "movq %[src], %[rseq_scratch0]\n\t"
385 "movq %[dst], %[rseq_scratch1]\n\t"
386 "movq %[len], %[rseq_scratch2]\n\t"
387 /* Start rseq by storing table entry pointer into rseq_cs. */
388 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
389 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
390 RSEQ_INJECT_ASM(3)
391 "cmpq %[v], %[expect]\n\t"
392 "jnz 5f\n\t"
393 RSEQ_INJECT_ASM(4)
394#ifdef RSEQ_COMPARE_TWICE
395 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
396 "cmpq %[v], %[expect]\n\t"
397 "jnz 7f\n\t"
398#endif
399 /* try memcpy */
400 "test %[len], %[len]\n\t" \
401 "jz 333f\n\t" \
402 "222:\n\t" \
403 "movb (%[src]), %%al\n\t" \
404 "movb %%al, (%[dst])\n\t" \
405 "inc %[src]\n\t" \
406 "inc %[dst]\n\t" \
407 "dec %[len]\n\t" \
408 "jnz 222b\n\t" \
409 "333:\n\t" \
410 RSEQ_INJECT_ASM(5)
411 /* final store */
412 "movq %[newv], %[v]\n\t"
413 "2:\n\t"
414 RSEQ_INJECT_ASM(6)
415 /* teardown */
416 "movq %[rseq_scratch2], %[len]\n\t"
417 "movq %[rseq_scratch1], %[dst]\n\t"
418 "movq %[rseq_scratch0], %[src]\n\t"
419 RSEQ_ASM_DEFINE_ABORT(4,
420 "movq %[rseq_scratch2], %[len]\n\t"
421 "movq %[rseq_scratch1], %[dst]\n\t"
422 "movq %[rseq_scratch0], %[src]\n\t",
423 abort)
424 RSEQ_ASM_DEFINE_CMPFAIL(5,
425 "movq %[rseq_scratch2], %[len]\n\t"
426 "movq %[rseq_scratch1], %[dst]\n\t"
427 "movq %[rseq_scratch0], %[src]\n\t",
428 cmpfail)
429#ifdef RSEQ_COMPARE_TWICE
430 RSEQ_ASM_DEFINE_CMPFAIL(6,
431 "movq %[rseq_scratch2], %[len]\n\t"
432 "movq %[rseq_scratch1], %[dst]\n\t"
433 "movq %[rseq_scratch0], %[src]\n\t",
434 error1)
435 RSEQ_ASM_DEFINE_CMPFAIL(7,
436 "movq %[rseq_scratch2], %[len]\n\t"
437 "movq %[rseq_scratch1], %[dst]\n\t"
438 "movq %[rseq_scratch0], %[src]\n\t",
439 error2)
440#endif
441 : /* gcc asm goto does not allow outputs */
442 : [cpu_id] "r" (cpu),
443 [current_cpu_id] "m" (__rseq_abi.cpu_id),
444 [rseq_cs] "m" (__rseq_abi.rseq_cs),
445 /* final store input */
446 [v] "m" (*v),
447 [expect] "r" (expect),
448 [newv] "r" (newv),
449 /* try memcpy input */
450 [dst] "r" (dst),
451 [src] "r" (src),
452 [len] "r" (len),
453 [rseq_scratch0] "m" (rseq_scratch[0]),
454 [rseq_scratch1] "m" (rseq_scratch[1]),
455 [rseq_scratch2] "m" (rseq_scratch[2])
456 : "memory", "cc", "rax"
457 RSEQ_INJECT_CLOBBER
458 : abort, cmpfail
459#ifdef RSEQ_COMPARE_TWICE
460 , error1, error2
461#endif
462 );
463 return 0;
464abort:
465 RSEQ_INJECT_FAILED
466 return -1;
467cmpfail:
468 return 1;
469#ifdef RSEQ_COMPARE_TWICE
470error1:
471 rseq_bug("cpu_id comparison failed");
472error2:
473 rseq_bug("expected value comparison failed");
474#endif
475}
476
477/* x86-64 is TSO. */
478static inline __attribute__((always_inline))
479int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
480 void *dst, void *src, size_t len,
481 intptr_t newv, int cpu)
482{
483 return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len,
484 newv, cpu);
485}
486
487#endif /* !RSEQ_SKIP_FASTPATH */
488
489#elif __i386__
490
491#define rseq_smp_mb() \
492 __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
493#define rseq_smp_rmb() \
494 __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
495#define rseq_smp_wmb() \
496 __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
497
498#define rseq_smp_load_acquire(p) \
499__extension__ ({ \
500 __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
501 rseq_smp_mb(); \
502 ____p1; \
503})
504
505#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
506
507#define rseq_smp_store_release(p, v) \
508do { \
509 rseq_smp_mb(); \
510 RSEQ_WRITE_ONCE(*p, v); \
511} while (0)
512
513#ifdef RSEQ_SKIP_FASTPATH
514#include "rseq-skip.h"
515#else /* !RSEQ_SKIP_FASTPATH */
516
517/*
518 * Use eax as scratch register and take memory operands as input to
519 * lessen register pressure. Especially needed when compiling in O0.
520 */
521#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
522 start_ip, post_commit_offset, abort_ip) \
523 ".pushsection __rseq_table, \"aw\"\n\t" \
524 ".balign 32\n\t" \
525 __rseq_str(label) ":\n\t" \
526 ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
527 ".long " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
528 ".popsection\n\t"
529
530#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
531 __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
532 (post_commit_ip - start_ip), abort_ip)
533
534#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
535 RSEQ_INJECT_ASM(1) \
536 "movl $" __rseq_str(cs_label) ", %[rseq_cs]\n\t" \
537 __rseq_str(label) ":\n\t"
538
539#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
540 RSEQ_INJECT_ASM(2) \
541 "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
542 "jnz " __rseq_str(label) "\n\t"
543
544#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
545 ".pushsection __rseq_failure, \"ax\"\n\t" \
546 /* Disassembler-friendly signature: nopl <sig>. */ \
547 ".byte 0x0f, 0x1f, 0x05\n\t" \
548 ".long " __rseq_str(RSEQ_SIG) "\n\t" \
549 __rseq_str(label) ":\n\t" \
550 teardown \
551 "jmp %l[" __rseq_str(abort_label) "]\n\t" \
552 ".popsection\n\t"
553
554#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
555 ".pushsection __rseq_failure, \"ax\"\n\t" \
556 __rseq_str(label) ":\n\t" \
557 teardown \
558 "jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \
559 ".popsection\n\t"
560
561static inline __attribute__((always_inline))
562int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
563{
564 RSEQ_INJECT_C(9)
565
566 __asm__ __volatile__ goto (
567 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
568 /* Start rseq by storing table entry pointer into rseq_cs. */
569 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
570 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
571 RSEQ_INJECT_ASM(3)
572 "cmpl %[v], %[expect]\n\t"
573 "jnz %l[cmpfail]\n\t"
574 RSEQ_INJECT_ASM(4)
575#ifdef RSEQ_COMPARE_TWICE
576 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
577 "cmpl %[v], %[expect]\n\t"
578 "jnz %l[error2]\n\t"
579#endif
580 /* final store */
581 "movl %[newv], %[v]\n\t"
582 "2:\n\t"
583 RSEQ_INJECT_ASM(5)
584 RSEQ_ASM_DEFINE_ABORT(4, "", abort)
585 : /* gcc asm goto does not allow outputs */
586 : [cpu_id] "r" (cpu),
587 [current_cpu_id] "m" (__rseq_abi.cpu_id),
588 [rseq_cs] "m" (__rseq_abi.rseq_cs),
589 [v] "m" (*v),
590 [expect] "r" (expect),
591 [newv] "r" (newv)
592 : "memory", "cc", "eax"
593 RSEQ_INJECT_CLOBBER
594 : abort, cmpfail
595#ifdef RSEQ_COMPARE_TWICE
596 , error1, error2
597#endif
598 );
599 return 0;
600abort:
601 RSEQ_INJECT_FAILED
602 return -1;
603cmpfail:
604 return 1;
605#ifdef RSEQ_COMPARE_TWICE
606error1:
607 rseq_bug("cpu_id comparison failed");
608error2:
609 rseq_bug("expected value comparison failed");
610#endif
611}
612
613/*
614 * Compare @v against @expectnot. When it does _not_ match, load @v
615 * into @load, and store the content of *@v + voffp into @v.
616 */
617static inline __attribute__((always_inline))
618int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
619 off_t voffp, intptr_t *load, int cpu)
620{
621 RSEQ_INJECT_C(9)
622
623 __asm__ __volatile__ goto (
624 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
625 /* Start rseq by storing table entry pointer into rseq_cs. */
626 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
627 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
628 RSEQ_INJECT_ASM(3)
629 "movl %[v], %%ebx\n\t"
630 "cmpl %%ebx, %[expectnot]\n\t"
631 "je %l[cmpfail]\n\t"
632 RSEQ_INJECT_ASM(4)
633#ifdef RSEQ_COMPARE_TWICE
634 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
635 "movl %[v], %%ebx\n\t"
636 "cmpl %%ebx, %[expectnot]\n\t"
637 "je %l[error2]\n\t"
638#endif
639 "movl %%ebx, %[load]\n\t"
640 "addl %[voffp], %%ebx\n\t"
641 "movl (%%ebx), %%ebx\n\t"
642 /* final store */
643 "movl %%ebx, %[v]\n\t"
644 "2:\n\t"
645 RSEQ_INJECT_ASM(5)
646 RSEQ_ASM_DEFINE_ABORT(4, "", abort)
647 : /* gcc asm goto does not allow outputs */
648 : [cpu_id] "r" (cpu),
649 [current_cpu_id] "m" (__rseq_abi.cpu_id),
650 [rseq_cs] "m" (__rseq_abi.rseq_cs),
651 /* final store input */
652 [v] "m" (*v),
653 [expectnot] "r" (expectnot),
654 [voffp] "ir" (voffp),
655 [load] "m" (*load)
656 : "memory", "cc", "eax", "ebx"
657 RSEQ_INJECT_CLOBBER
658 : abort, cmpfail
659#ifdef RSEQ_COMPARE_TWICE
660 , error1, error2
661#endif
662 );
663 return 0;
664abort:
665 RSEQ_INJECT_FAILED
666 return -1;
667cmpfail:
668 return 1;
669#ifdef RSEQ_COMPARE_TWICE
670error1:
671 rseq_bug("cpu_id comparison failed");
672error2:
673 rseq_bug("expected value comparison failed");
674#endif
675}
676
677static inline __attribute__((always_inline))
678int rseq_addv(intptr_t *v, intptr_t count, int cpu)
679{
680 RSEQ_INJECT_C(9)
681
682 __asm__ __volatile__ goto (
683 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
684 /* Start rseq by storing table entry pointer into rseq_cs. */
685 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
686 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
687 RSEQ_INJECT_ASM(3)
688#ifdef RSEQ_COMPARE_TWICE
689 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
690#endif
691 /* final store */
692 "addl %[count], %[v]\n\t"
693 "2:\n\t"
694 RSEQ_INJECT_ASM(4)
695 RSEQ_ASM_DEFINE_ABORT(4, "", abort)
696 : /* gcc asm goto does not allow outputs */
697 : [cpu_id] "r" (cpu),
698 [current_cpu_id] "m" (__rseq_abi.cpu_id),
699 [rseq_cs] "m" (__rseq_abi.rseq_cs),
700 /* final store input */
701 [v] "m" (*v),
702 [count] "ir" (count)
703 : "memory", "cc", "eax"
704 RSEQ_INJECT_CLOBBER
705 : abort
706#ifdef RSEQ_COMPARE_TWICE
707 , error1
708#endif
709 );
710 return 0;
711abort:
712 RSEQ_INJECT_FAILED
713 return -1;
714#ifdef RSEQ_COMPARE_TWICE
715error1:
716 rseq_bug("cpu_id comparison failed");
717#endif
718}
719
720static inline __attribute__((always_inline))
721int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
722 intptr_t *v2, intptr_t newv2,
723 intptr_t newv, int cpu)
724{
725 RSEQ_INJECT_C(9)
726
727 __asm__ __volatile__ goto (
728 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
729 /* Start rseq by storing table entry pointer into rseq_cs. */
730 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
731 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
732 RSEQ_INJECT_ASM(3)
733 "cmpl %[v], %[expect]\n\t"
734 "jnz %l[cmpfail]\n\t"
735 RSEQ_INJECT_ASM(4)
736#ifdef RSEQ_COMPARE_TWICE
737 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
738 "cmpl %[v], %[expect]\n\t"
739 "jnz %l[error2]\n\t"
740#endif
741 /* try store */
742 "movl %[newv2], %%eax\n\t"
743 "movl %%eax, %[v2]\n\t"
744 RSEQ_INJECT_ASM(5)
745 /* final store */
746 "movl %[newv], %[v]\n\t"
747 "2:\n\t"
748 RSEQ_INJECT_ASM(6)
749 RSEQ_ASM_DEFINE_ABORT(4, "", abort)
750 : /* gcc asm goto does not allow outputs */
751 : [cpu_id] "r" (cpu),
752 [current_cpu_id] "m" (__rseq_abi.cpu_id),
753 [rseq_cs] "m" (__rseq_abi.rseq_cs),
754 /* try store input */
755 [v2] "m" (*v2),
756 [newv2] "m" (newv2),
757 /* final store input */
758 [v] "m" (*v),
759 [expect] "r" (expect),
760 [newv] "r" (newv)
761 : "memory", "cc", "eax"
762 RSEQ_INJECT_CLOBBER
763 : abort, cmpfail
764#ifdef RSEQ_COMPARE_TWICE
765 , error1, error2
766#endif
767 );
768 return 0;
769abort:
770 RSEQ_INJECT_FAILED
771 return -1;
772cmpfail:
773 return 1;
774#ifdef RSEQ_COMPARE_TWICE
775error1:
776 rseq_bug("cpu_id comparison failed");
777error2:
778 rseq_bug("expected value comparison failed");
779#endif
780}
781
782static inline __attribute__((always_inline))
783int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
784 intptr_t *v2, intptr_t newv2,
785 intptr_t newv, int cpu)
786{
787 RSEQ_INJECT_C(9)
788
789 __asm__ __volatile__ goto (
790 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
791 /* Start rseq by storing table entry pointer into rseq_cs. */
792 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
793 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
794 RSEQ_INJECT_ASM(3)
795 "movl %[expect], %%eax\n\t"
796 "cmpl %[v], %%eax\n\t"
797 "jnz %l[cmpfail]\n\t"
798 RSEQ_INJECT_ASM(4)
799#ifdef RSEQ_COMPARE_TWICE
800 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
801 "movl %[expect], %%eax\n\t"
802 "cmpl %[v], %%eax\n\t"
803 "jnz %l[error2]\n\t"
804#endif
805 /* try store */
806 "movl %[newv2], %[v2]\n\t"
807 RSEQ_INJECT_ASM(5)
808 "lock; addl $0,-128(%%esp)\n\t"
809 /* final store */
810 "movl %[newv], %[v]\n\t"
811 "2:\n\t"
812 RSEQ_INJECT_ASM(6)
813 RSEQ_ASM_DEFINE_ABORT(4, "", abort)
814 : /* gcc asm goto does not allow outputs */
815 : [cpu_id] "r" (cpu),
816 [current_cpu_id] "m" (__rseq_abi.cpu_id),
817 [rseq_cs] "m" (__rseq_abi.rseq_cs),
818 /* try store input */
819 [v2] "m" (*v2),
820 [newv2] "r" (newv2),
821 /* final store input */
822 [v] "m" (*v),
823 [expect] "m" (expect),
824 [newv] "r" (newv)
825 : "memory", "cc", "eax"
826 RSEQ_INJECT_CLOBBER
827 : abort, cmpfail
828#ifdef RSEQ_COMPARE_TWICE
829 , error1, error2
830#endif
831 );
832 return 0;
833abort:
834 RSEQ_INJECT_FAILED
835 return -1;
836cmpfail:
837 return 1;
838#ifdef RSEQ_COMPARE_TWICE
839error1:
840 rseq_bug("cpu_id comparison failed");
841error2:
842 rseq_bug("expected value comparison failed");
843#endif
844
845}
846
847static inline __attribute__((always_inline))
848int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
849 intptr_t *v2, intptr_t expect2,
850 intptr_t newv, int cpu)
851{
852 RSEQ_INJECT_C(9)
853
854 __asm__ __volatile__ goto (
855 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
856 /* Start rseq by storing table entry pointer into rseq_cs. */
857 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
858 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
859 RSEQ_INJECT_ASM(3)
860 "cmpl %[v], %[expect]\n\t"
861 "jnz %l[cmpfail]\n\t"
862 RSEQ_INJECT_ASM(4)
863 "cmpl %[expect2], %[v2]\n\t"
864 "jnz %l[cmpfail]\n\t"
865 RSEQ_INJECT_ASM(5)
866#ifdef RSEQ_COMPARE_TWICE
867 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
868 "cmpl %[v], %[expect]\n\t"
869 "jnz %l[error2]\n\t"
870 "cmpl %[expect2], %[v2]\n\t"
871 "jnz %l[error3]\n\t"
872#endif
873 "movl %[newv], %%eax\n\t"
874 /* final store */
875 "movl %%eax, %[v]\n\t"
876 "2:\n\t"
877 RSEQ_INJECT_ASM(6)
878 RSEQ_ASM_DEFINE_ABORT(4, "", abort)
879 : /* gcc asm goto does not allow outputs */
880 : [cpu_id] "r" (cpu),
881 [current_cpu_id] "m" (__rseq_abi.cpu_id),
882 [rseq_cs] "m" (__rseq_abi.rseq_cs),
883 /* cmp2 input */
884 [v2] "m" (*v2),
885 [expect2] "r" (expect2),
886 /* final store input */
887 [v] "m" (*v),
888 [expect] "r" (expect),
889 [newv] "m" (newv)
890 : "memory", "cc", "eax"
891 RSEQ_INJECT_CLOBBER
892 : abort, cmpfail
893#ifdef RSEQ_COMPARE_TWICE
894 , error1, error2, error3
895#endif
896 );
897 return 0;
898abort:
899 RSEQ_INJECT_FAILED
900 return -1;
901cmpfail:
902 return 1;
903#ifdef RSEQ_COMPARE_TWICE
904error1:
905 rseq_bug("cpu_id comparison failed");
906error2:
907 rseq_bug("1st expected value comparison failed");
908error3:
909 rseq_bug("2nd expected value comparison failed");
910#endif
911}
912
913/* TODO: implement a faster memcpy. */
914static inline __attribute__((always_inline))
915int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
916 void *dst, void *src, size_t len,
917 intptr_t newv, int cpu)
918{
919 uint32_t rseq_scratch[3];
920
921 RSEQ_INJECT_C(9)
922
923 __asm__ __volatile__ goto (
924 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
925 "movl %[src], %[rseq_scratch0]\n\t"
926 "movl %[dst], %[rseq_scratch1]\n\t"
927 "movl %[len], %[rseq_scratch2]\n\t"
928 /* Start rseq by storing table entry pointer into rseq_cs. */
929 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
930 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
931 RSEQ_INJECT_ASM(3)
932 "movl %[expect], %%eax\n\t"
933 "cmpl %%eax, %[v]\n\t"
934 "jnz 5f\n\t"
935 RSEQ_INJECT_ASM(4)
936#ifdef RSEQ_COMPARE_TWICE
937 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
938 "movl %[expect], %%eax\n\t"
939 "cmpl %%eax, %[v]\n\t"
940 "jnz 7f\n\t"
941#endif
942 /* try memcpy */
943 "test %[len], %[len]\n\t" \
944 "jz 333f\n\t" \
945 "222:\n\t" \
946 "movb (%[src]), %%al\n\t" \
947 "movb %%al, (%[dst])\n\t" \
948 "inc %[src]\n\t" \
949 "inc %[dst]\n\t" \
950 "dec %[len]\n\t" \
951 "jnz 222b\n\t" \
952 "333:\n\t" \
953 RSEQ_INJECT_ASM(5)
954 "movl %[newv], %%eax\n\t"
955 /* final store */
956 "movl %%eax, %[v]\n\t"
957 "2:\n\t"
958 RSEQ_INJECT_ASM(6)
959 /* teardown */
960 "movl %[rseq_scratch2], %[len]\n\t"
961 "movl %[rseq_scratch1], %[dst]\n\t"
962 "movl %[rseq_scratch0], %[src]\n\t"
963 RSEQ_ASM_DEFINE_ABORT(4,
964 "movl %[rseq_scratch2], %[len]\n\t"
965 "movl %[rseq_scratch1], %[dst]\n\t"
966 "movl %[rseq_scratch0], %[src]\n\t",
967 abort)
968 RSEQ_ASM_DEFINE_CMPFAIL(5,
969 "movl %[rseq_scratch2], %[len]\n\t"
970 "movl %[rseq_scratch1], %[dst]\n\t"
971 "movl %[rseq_scratch0], %[src]\n\t",
972 cmpfail)
973#ifdef RSEQ_COMPARE_TWICE
974 RSEQ_ASM_DEFINE_CMPFAIL(6,
975 "movl %[rseq_scratch2], %[len]\n\t"
976 "movl %[rseq_scratch1], %[dst]\n\t"
977 "movl %[rseq_scratch0], %[src]\n\t",
978 error1)
979 RSEQ_ASM_DEFINE_CMPFAIL(7,
980 "movl %[rseq_scratch2], %[len]\n\t"
981 "movl %[rseq_scratch1], %[dst]\n\t"
982 "movl %[rseq_scratch0], %[src]\n\t",
983 error2)
984#endif
985 : /* gcc asm goto does not allow outputs */
986 : [cpu_id] "r" (cpu),
987 [current_cpu_id] "m" (__rseq_abi.cpu_id),
988 [rseq_cs] "m" (__rseq_abi.rseq_cs),
989 /* final store input */
990 [v] "m" (*v),
991 [expect] "m" (expect),
992 [newv] "m" (newv),
993 /* try memcpy input */
994 [dst] "r" (dst),
995 [src] "r" (src),
996 [len] "r" (len),
997 [rseq_scratch0] "m" (rseq_scratch[0]),
998 [rseq_scratch1] "m" (rseq_scratch[1]),
999 [rseq_scratch2] "m" (rseq_scratch[2])
1000 : "memory", "cc", "eax"
1001 RSEQ_INJECT_CLOBBER
1002 : abort, cmpfail
1003#ifdef RSEQ_COMPARE_TWICE
1004 , error1, error2
1005#endif
1006 );
1007 return 0;
1008abort:
1009 RSEQ_INJECT_FAILED
1010 return -1;
1011cmpfail:
1012 return 1;
1013#ifdef RSEQ_COMPARE_TWICE
1014error1:
1015 rseq_bug("cpu_id comparison failed");
1016error2:
1017 rseq_bug("expected value comparison failed");
1018#endif
1019}
1020
1021/* TODO: implement a faster memcpy. */
1022static inline __attribute__((always_inline))
1023int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
1024 void *dst, void *src, size_t len,
1025 intptr_t newv, int cpu)
1026{
1027 uint32_t rseq_scratch[3];
1028
1029 RSEQ_INJECT_C(9)
1030
1031 __asm__ __volatile__ goto (
1032 RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
1033 "movl %[src], %[rseq_scratch0]\n\t"
1034 "movl %[dst], %[rseq_scratch1]\n\t"
1035 "movl %[len], %[rseq_scratch2]\n\t"
1036 /* Start rseq by storing table entry pointer into rseq_cs. */
1037 RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
1038 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
1039 RSEQ_INJECT_ASM(3)
1040 "movl %[expect], %%eax\n\t"
1041 "cmpl %%eax, %[v]\n\t"
1042 "jnz 5f\n\t"
1043 RSEQ_INJECT_ASM(4)
1044#ifdef RSEQ_COMPARE_TWICE
1045 RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
1046 "movl %[expect], %%eax\n\t"
1047 "cmpl %%eax, %[v]\n\t"
1048 "jnz 7f\n\t"
1049#endif
1050 /* try memcpy */
1051 "test %[len], %[len]\n\t" \
1052 "jz 333f\n\t" \
1053 "222:\n\t" \
1054 "movb (%[src]), %%al\n\t" \
1055 "movb %%al, (%[dst])\n\t" \
1056 "inc %[src]\n\t" \
1057 "inc %[dst]\n\t" \
1058 "dec %[len]\n\t" \
1059 "jnz 222b\n\t" \
1060 "333:\n\t" \
1061 RSEQ_INJECT_ASM(5)
1062 "lock; addl $0,-128(%%esp)\n\t"
1063 "movl %[newv], %%eax\n\t"
1064 /* final store */
1065 "movl %%eax, %[v]\n\t"
1066 "2:\n\t"
1067 RSEQ_INJECT_ASM(6)
1068 /* teardown */
1069 "movl %[rseq_scratch2], %[len]\n\t"
1070 "movl %[rseq_scratch1], %[dst]\n\t"
1071 "movl %[rseq_scratch0], %[src]\n\t"
1072 RSEQ_ASM_DEFINE_ABORT(4,
1073 "movl %[rseq_scratch2], %[len]\n\t"
1074 "movl %[rseq_scratch1], %[dst]\n\t"
1075 "movl %[rseq_scratch0], %[src]\n\t",
1076 abort)
1077 RSEQ_ASM_DEFINE_CMPFAIL(5,
1078 "movl %[rseq_scratch2], %[len]\n\t"
1079 "movl %[rseq_scratch1], %[dst]\n\t"
1080 "movl %[rseq_scratch0], %[src]\n\t",
1081 cmpfail)
1082#ifdef RSEQ_COMPARE_TWICE
1083 RSEQ_ASM_DEFINE_CMPFAIL(6,
1084 "movl %[rseq_scratch2], %[len]\n\t"
1085 "movl %[rseq_scratch1], %[dst]\n\t"
1086 "movl %[rseq_scratch0], %[src]\n\t",
1087 error1)
1088 RSEQ_ASM_DEFINE_CMPFAIL(7,
1089 "movl %[rseq_scratch2], %[len]\n\t"
1090 "movl %[rseq_scratch1], %[dst]\n\t"
1091 "movl %[rseq_scratch0], %[src]\n\t",
1092 error2)
1093#endif
1094 : /* gcc asm goto does not allow outputs */
1095 : [cpu_id] "r" (cpu),
1096 [current_cpu_id] "m" (__rseq_abi.cpu_id),
1097 [rseq_cs] "m" (__rseq_abi.rseq_cs),
1098 /* final store input */
1099 [v] "m" (*v),
1100 [expect] "m" (expect),
1101 [newv] "m" (newv),
1102 /* try memcpy input */
1103 [dst] "r" (dst),
1104 [src] "r" (src),
1105 [len] "r" (len),
1106 [rseq_scratch0] "m" (rseq_scratch[0]),
1107 [rseq_scratch1] "m" (rseq_scratch[1]),
1108 [rseq_scratch2] "m" (rseq_scratch[2])
1109 : "memory", "cc", "eax"
1110 RSEQ_INJECT_CLOBBER
1111 : abort, cmpfail
1112#ifdef RSEQ_COMPARE_TWICE
1113 , error1, error2
1114#endif
1115 );
1116 return 0;
1117abort:
1118 RSEQ_INJECT_FAILED
1119 return -1;
1120cmpfail:
1121 return 1;
1122#ifdef RSEQ_COMPARE_TWICE
1123error1:
1124 rseq_bug("cpu_id comparison failed");
1125error2:
1126 rseq_bug("expected value comparison failed");
1127#endif
1128}
1129
1130#endif /* !RSEQ_SKIP_FASTPATH */
1131
1132#endif
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
new file mode 100644
index 000000000000..4847e97ed049
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -0,0 +1,117 @@
1// SPDX-License-Identifier: LGPL-2.1
2/*
3 * rseq.c
4 *
5 * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; only
10 * version 2.1 of the License.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 */
17
18#define _GNU_SOURCE
19#include <errno.h>
20#include <sched.h>
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24#include <unistd.h>
25#include <syscall.h>
26#include <assert.h>
27#include <signal.h>
28
29#include "rseq.h"
30
31#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
32
33__attribute__((tls_model("initial-exec"))) __thread
34volatile struct rseq __rseq_abi = {
35 .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
36};
37
38static __attribute__((tls_model("initial-exec"))) __thread
39volatile int refcount;
40
41static void signal_off_save(sigset_t *oldset)
42{
43 sigset_t set;
44 int ret;
45
46 sigfillset(&set);
47 ret = pthread_sigmask(SIG_BLOCK, &set, oldset);
48 if (ret)
49 abort();
50}
51
52static void signal_restore(sigset_t oldset)
53{
54 int ret;
55
56 ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
57 if (ret)
58 abort();
59}
60
61static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len,
62 int flags, uint32_t sig)
63{
64 return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
65}
66
67int rseq_register_current_thread(void)
68{
69 int rc, ret = 0;
70 sigset_t oldset;
71
72 signal_off_save(&oldset);
73 if (refcount++)
74 goto end;
75 rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
76 if (!rc) {
77 assert(rseq_current_cpu_raw() >= 0);
78 goto end;
79 }
80 if (errno != EBUSY)
81 __rseq_abi.cpu_id = -2;
82 ret = -1;
83 refcount--;
84end:
85 signal_restore(oldset);
86 return ret;
87}
88
89int rseq_unregister_current_thread(void)
90{
91 int rc, ret = 0;
92 sigset_t oldset;
93
94 signal_off_save(&oldset);
95 if (--refcount)
96 goto end;
97 rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
98 RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
99 if (!rc)
100 goto end;
101 ret = -1;
102end:
103 signal_restore(oldset);
104 return ret;
105}
106
107int32_t rseq_fallback_current_cpu(void)
108{
109 int32_t cpu;
110
111 cpu = sched_getcpu();
112 if (cpu < 0) {
113 perror("sched_getcpu()");
114 abort();
115 }
116 return cpu;
117}
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
new file mode 100644
index 000000000000..a4684112676c
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -0,0 +1,149 @@
1/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
2/*
3 * rseq.h
4 *
5 * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
6 */
7
8#ifndef RSEQ_H
9#define RSEQ_H
10
11#include <stdint.h>
12#include <stdbool.h>
13#include <pthread.h>
14#include <signal.h>
15#include <sched.h>
16#include <errno.h>
17#include <stdio.h>
18#include <stdlib.h>
19#include <sched.h>
20#include <linux/rseq.h>
21
22/*
23 * Empty code injection macros, override when testing.
24 * It is important to consider that the ASM injection macros need to be
25 * fully reentrant (e.g. do not modify the stack).
26 */
27#ifndef RSEQ_INJECT_ASM
28#define RSEQ_INJECT_ASM(n)
29#endif
30
31#ifndef RSEQ_INJECT_C
32#define RSEQ_INJECT_C(n)
33#endif
34
35#ifndef RSEQ_INJECT_INPUT
36#define RSEQ_INJECT_INPUT
37#endif
38
39#ifndef RSEQ_INJECT_CLOBBER
40#define RSEQ_INJECT_CLOBBER
41#endif
42
43#ifndef RSEQ_INJECT_FAILED
44#define RSEQ_INJECT_FAILED
45#endif
46
47extern __thread volatile struct rseq __rseq_abi;
48
49#define rseq_likely(x) __builtin_expect(!!(x), 1)
50#define rseq_unlikely(x) __builtin_expect(!!(x), 0)
51#define rseq_barrier() __asm__ __volatile__("" : : : "memory")
52
53#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x))
54#define RSEQ_WRITE_ONCE(x, v) __extension__ ({ RSEQ_ACCESS_ONCE(x) = (v); })
55#define RSEQ_READ_ONCE(x) RSEQ_ACCESS_ONCE(x)
56
57#define __rseq_str_1(x) #x
58#define __rseq_str(x) __rseq_str_1(x)
59
60#define rseq_log(fmt, args...) \
61 fprintf(stderr, fmt "(in %s() at " __FILE__ ":" __rseq_str(__LINE__)"\n", \
62 ## args, __func__)
63
64#define rseq_bug(fmt, args...) \
65 do { \
66 rseq_log(fmt, ##args); \
67 abort(); \
68 } while (0)
69
70#if defined(__x86_64__) || defined(__i386__)
71#include <rseq-x86.h>
72#elif defined(__ARMEL__)
73#include <rseq-arm.h>
74#elif defined(__PPC__)
75#include <rseq-ppc.h>
76#elif defined(__mips__)
77#include <rseq-mips.h>
78#else
79#error unsupported target
80#endif
81
82/*
83 * Register rseq for the current thread. This needs to be called once
84 * by any thread which uses restartable sequences, before they start
85 * using restartable sequences, to ensure restartable sequences
86 * succeed. A restartable sequence executed from a non-registered
87 * thread will always fail.
88 */
89int rseq_register_current_thread(void);
90
91/*
92 * Unregister rseq for current thread.
93 */
94int rseq_unregister_current_thread(void);
95
96/*
97 * Restartable sequence fallback for reading the current CPU number.
98 */
99int32_t rseq_fallback_current_cpu(void);
100
101/*
102 * Values returned can be either the current CPU number, -1 (rseq is
103 * uninitialized), or -2 (rseq initialization has failed).
104 */
105static inline int32_t rseq_current_cpu_raw(void)
106{
107 return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id);
108}
109
110/*
111 * Returns a possible CPU number, which is typically the current CPU.
112 * The returned CPU number can be used to prepare for an rseq critical
113 * section, which will confirm whether the cpu number is indeed the
114 * current one, and whether rseq is initialized.
115 *
116 * The CPU number returned by rseq_cpu_start should always be validated
117 * by passing it to a rseq asm sequence, or by comparing it to the
118 * return value of rseq_current_cpu_raw() if the rseq asm sequence
119 * does not need to be invoked.
120 */
121static inline uint32_t rseq_cpu_start(void)
122{
123 return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
124}
125
126static inline uint32_t rseq_current_cpu(void)
127{
128 int32_t cpu;
129
130 cpu = rseq_current_cpu_raw();
131 if (rseq_unlikely(cpu < 0))
132 cpu = rseq_fallback_current_cpu();
133 return cpu;
134}
135
136/*
137 * rseq_prepare_unload() should be invoked by each thread using rseq_finish*()
138 * at least once between their last rseq_finish*() and library unload of the
139 * library defining the rseq critical section (struct rseq_cs). This also
140 * applies to use of rseq in code generated by JIT: rseq_prepare_unload()
141 * should be invoked at least once by each thread using rseq_finish*() before
142 * reclaim of the memory holding the struct rseq_cs.
143 */
144static inline void rseq_prepare_unload(void)
145{
146 __rseq_abi.rseq_cs = 0;
147}
148
149#endif /* RSEQ_H_ */
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
new file mode 100755
index 000000000000..3acd6d75ff9f
--- /dev/null
+++ b/tools/testing/selftests/rseq/run_param_test.sh
@@ -0,0 +1,121 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0+ or MIT
3
4EXTRA_ARGS=${@}
5
6OLDIFS="$IFS"
7IFS=$'\n'
8TEST_LIST=(
9 "-T s"
10 "-T l"
11 "-T b"
12 "-T b -M"
13 "-T m"
14 "-T m -M"
15 "-T i"
16)
17
18TEST_NAME=(
19 "spinlock"
20 "list"
21 "buffer"
22 "buffer with barrier"
23 "memcpy"
24 "memcpy with barrier"
25 "increment"
26)
27IFS="$OLDIFS"
28
29REPS=1000
30SLOW_REPS=100
31
32function do_tests()
33{
34 local i=0
35 while [ "$i" -lt "${#TEST_LIST[@]}" ]; do
36 echo "Running test ${TEST_NAME[$i]}"
37 ./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
38 echo "Running compare-twice test ${TEST_NAME[$i]}"
39 ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
40 let "i++"
41 done
42}
43
44echo "Default parameters"
45do_tests
46
47echo "Loop injection: 10000 loops"
48
49OLDIFS="$IFS"
50IFS=$'\n'
51INJECT_LIST=(
52 "1"
53 "2"
54 "3"
55 "4"
56 "5"
57 "6"
58 "7"
59 "8"
60 "9"
61)
62IFS="$OLDIFS"
63
64NR_LOOPS=10000
65
66i=0
67while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do
68 echo "Injecting at <${INJECT_LIST[$i]}>"
69 do_tests -${INJECT_LIST[i]} ${NR_LOOPS}
70 let "i++"
71done
72NR_LOOPS=
73
74function inject_blocking()
75{
76 OLDIFS="$IFS"
77 IFS=$'\n'
78 INJECT_LIST=(
79 "7"
80 "8"
81 "9"
82 )
83 IFS="$OLDIFS"
84
85 NR_LOOPS=-1
86
87 i=0
88 while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do
89 echo "Injecting at <${INJECT_LIST[$i]}>"
90 do_tests -${INJECT_LIST[i]} -1 ${@}
91 let "i++"
92 done
93 NR_LOOPS=
94}
95
96echo "Yield injection (25%)"
97inject_blocking -m 4 -y
98
99echo "Yield injection (50%)"
100inject_blocking -m 2 -y
101
102echo "Yield injection (100%)"
103inject_blocking -m 1 -y
104
105echo "Kill injection (25%)"
106inject_blocking -m 4 -k
107
108echo "Kill injection (50%)"
109inject_blocking -m 2 -k
110
111echo "Kill injection (100%)"
112inject_blocking -m 1 -k
113
114echo "Sleep injection (1ms, 25%)"
115inject_blocking -m 4 -s 1
116
117echo "Sleep injection (1ms, 50%)"
118inject_blocking -m 2 -s 1
119
120echo "Sleep injection (1ms, 100%)"
121inject_blocking -m 1 -s 1
diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore
new file mode 100644
index 000000000000..d0ad44f6294a
--- /dev/null
+++ b/tools/testing/selftests/rtc/.gitignore
@@ -0,0 +1,2 @@
1rtctest
2setdate
diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile
new file mode 100644
index 000000000000..de9c8566672a
--- /dev/null
+++ b/tools/testing/selftests/rtc/Makefile
@@ -0,0 +1,9 @@
1# SPDX-License-Identifier: GPL-2.0
2CFLAGS += -O3 -Wl,-no-as-needed -Wall
3LDFLAGS += -lrt -lpthread -lm
4
5TEST_GEN_PROGS = rtctest
6
7TEST_GEN_PROGS_EXTENDED = setdate
8
9include ../lib.mk
diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c
new file mode 100644
index 000000000000..e20b017e7073
--- /dev/null
+++ b/tools/testing/selftests/rtc/rtctest.c
@@ -0,0 +1,238 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Real Time Clock Driver Test Program
4 *
5 * Copyright (c) 2018 Alexandre Belloni <alexandre.belloni@bootlin.com>
6 */
7
8#include <errno.h>
9#include <fcntl.h>
10#include <linux/rtc.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <sys/ioctl.h>
14#include <sys/time.h>
15#include <sys/types.h>
16#include <time.h>
17#include <unistd.h>
18
19#include "../kselftest_harness.h"
20
21#define NUM_UIE 3
22#define ALARM_DELTA 3
23
24static char *rtc_file = "/dev/rtc0";
25
26FIXTURE(rtc) {
27 int fd;
28};
29
30FIXTURE_SETUP(rtc) {
31 self->fd = open(rtc_file, O_RDONLY);
32 ASSERT_NE(-1, self->fd);
33}
34
35FIXTURE_TEARDOWN(rtc) {
36 close(self->fd);
37}
38
39TEST_F(rtc, date_read) {
40 int rc;
41 struct rtc_time rtc_tm;
42
43 /* Read the RTC time/date */
44 rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm);
45 ASSERT_NE(-1, rc);
46
47 TH_LOG("Current RTC date/time is %02d/%02d/%02d %02d:%02d:%02d.",
48 rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
49 rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
50}
51
52TEST_F(rtc, uie_read) {
53 int i, rc, irq = 0;
54 unsigned long data;
55
56 /* Turn on update interrupts */
57 rc = ioctl(self->fd, RTC_UIE_ON, 0);
58 if (rc == -1) {
59 ASSERT_EQ(EINVAL, errno);
60 TH_LOG("skip update IRQs not supported.");
61 return;
62 }
63
64 for (i = 0; i < NUM_UIE; i++) {
65 /* This read will block */
66 rc = read(self->fd, &data, sizeof(data));
67 ASSERT_NE(-1, rc);
68 irq++;
69 }
70
71 EXPECT_EQ(NUM_UIE, irq);
72
73 rc = ioctl(self->fd, RTC_UIE_OFF, 0);
74 ASSERT_NE(-1, rc);
75}
76
77TEST_F(rtc, uie_select) {
78 int i, rc, irq = 0;
79 unsigned long data;
80
81 /* Turn on update interrupts */
82 rc = ioctl(self->fd, RTC_UIE_ON, 0);
83 if (rc == -1) {
84 ASSERT_EQ(EINVAL, errno);
85 TH_LOG("skip update IRQs not supported.");
86 return;
87 }
88
89 for (i = 0; i < NUM_UIE; i++) {
90 struct timeval tv = { .tv_sec = 2 };
91 fd_set readfds;
92
93 FD_ZERO(&readfds);
94 FD_SET(self->fd, &readfds);
95 /* The select will wait until an RTC interrupt happens. */
96 rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
97 ASSERT_NE(-1, rc);
98 ASSERT_NE(0, rc);
99
100 /* This read won't block */
101 rc = read(self->fd, &data, sizeof(unsigned long));
102 ASSERT_NE(-1, rc);
103 irq++;
104 }
105
106 EXPECT_EQ(NUM_UIE, irq);
107
108 rc = ioctl(self->fd, RTC_UIE_OFF, 0);
109 ASSERT_NE(-1, rc);
110}
111
112TEST_F(rtc, alarm_alm_set) {
113 struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
114 unsigned long data;
115 struct rtc_time tm;
116 fd_set readfds;
117 time_t secs, new;
118 int rc;
119
120 rc = ioctl(self->fd, RTC_RD_TIME, &tm);
121 ASSERT_NE(-1, rc);
122
123 secs = timegm((struct tm *)&tm) + ALARM_DELTA;
124 gmtime_r(&secs, (struct tm *)&tm);
125
126 rc = ioctl(self->fd, RTC_ALM_SET, &tm);
127 if (rc == -1) {
128 ASSERT_EQ(EINVAL, errno);
129 TH_LOG("skip alarms are not supported.");
130 return;
131 }
132
133 rc = ioctl(self->fd, RTC_ALM_READ, &tm);
134 ASSERT_NE(-1, rc);
135
136 TH_LOG("Alarm time now set to %02d:%02d:%02d.",
137 tm.tm_hour, tm.tm_min, tm.tm_sec);
138
139 /* Enable alarm interrupts */
140 rc = ioctl(self->fd, RTC_AIE_ON, 0);
141 ASSERT_NE(-1, rc);
142
143 FD_ZERO(&readfds);
144 FD_SET(self->fd, &readfds);
145
146 rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
147 ASSERT_NE(-1, rc);
148 EXPECT_NE(0, rc);
149
150 /* Disable alarm interrupts */
151 rc = ioctl(self->fd, RTC_AIE_OFF, 0);
152 ASSERT_NE(-1, rc);
153
154 if (rc == 0)
155 return;
156
157 rc = read(self->fd, &data, sizeof(unsigned long));
158 ASSERT_NE(-1, rc);
159 TH_LOG("data: %lx", data);
160
161 rc = ioctl(self->fd, RTC_RD_TIME, &tm);
162 ASSERT_NE(-1, rc);
163
164 new = timegm((struct tm *)&tm);
165 ASSERT_EQ(new, secs);
166}
167
168TEST_F(rtc, alarm_wkalm_set) {
169 struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
170 struct rtc_wkalrm alarm = { 0 };
171 struct rtc_time tm;
172 unsigned long data;
173 fd_set readfds;
174 time_t secs, new;
175 int rc;
176
177 rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time);
178 ASSERT_NE(-1, rc);
179
180 secs = timegm((struct tm *)&alarm.time) + ALARM_DELTA;
181 gmtime_r(&secs, (struct tm *)&alarm.time);
182
183 alarm.enabled = 1;
184
185 rc = ioctl(self->fd, RTC_WKALM_SET, &alarm);
186 if (rc == -1) {
187 ASSERT_EQ(EINVAL, errno);
188 TH_LOG("skip alarms are not supported.");
189 return;
190 }
191
192 rc = ioctl(self->fd, RTC_WKALM_RD, &alarm);
193 ASSERT_NE(-1, rc);
194
195 TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.",
196 alarm.time.tm_mday, alarm.time.tm_mon + 1,
197 alarm.time.tm_year + 1900, alarm.time.tm_hour,
198 alarm.time.tm_min, alarm.time.tm_sec);
199
200 FD_ZERO(&readfds);
201 FD_SET(self->fd, &readfds);
202
203 rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
204 ASSERT_NE(-1, rc);
205 EXPECT_NE(0, rc);
206
207 rc = read(self->fd, &data, sizeof(unsigned long));
208 ASSERT_NE(-1, rc);
209
210 rc = ioctl(self->fd, RTC_RD_TIME, &tm);
211 ASSERT_NE(-1, rc);
212
213 new = timegm((struct tm *)&tm);
214 ASSERT_EQ(new, secs);
215}
216
217static void __attribute__((constructor))
218__constructor_order_last(void)
219{
220 if (!__constructor_order)
221 __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD;
222}
223
224int main(int argc, char **argv)
225{
226 switch (argc) {
227 case 2:
228 rtc_file = argv[1];
229 /* FALLTHROUGH */
230 case 1:
231 break;
232 default:
233 fprintf(stderr, "usage: %s [rtcdev]\n", argv[0]);
234 return 1;
235 }
236
237 return test_harness_run(argc, argv);
238}
diff --git a/tools/testing/selftests/timers/rtctest_setdate.c b/tools/testing/selftests/rtc/setdate.c
index 2cb78489eca4..2cb78489eca4 100644
--- a/tools/testing/selftests/timers/rtctest_setdate.c
+++ b/tools/testing/selftests/rtc/setdate.c
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 168c66d74fc5..e1473234968d 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -134,11 +134,15 @@ struct seccomp_data {
134#endif 134#endif
135 135
136#ifndef SECCOMP_FILTER_FLAG_TSYNC 136#ifndef SECCOMP_FILTER_FLAG_TSYNC
137#define SECCOMP_FILTER_FLAG_TSYNC 1 137#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
138#endif 138#endif
139 139
140#ifndef SECCOMP_FILTER_FLAG_LOG 140#ifndef SECCOMP_FILTER_FLAG_LOG
141#define SECCOMP_FILTER_FLAG_LOG 2 141#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
142#endif
143
144#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
145#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
142#endif 146#endif
143 147
144#ifndef PTRACE_SECCOMP_GET_METADATA 148#ifndef PTRACE_SECCOMP_GET_METADATA
@@ -2072,14 +2076,26 @@ TEST(seccomp_syscall_mode_lock)
2072TEST(detect_seccomp_filter_flags) 2076TEST(detect_seccomp_filter_flags)
2073{ 2077{
2074 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, 2078 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2075 SECCOMP_FILTER_FLAG_LOG }; 2079 SECCOMP_FILTER_FLAG_LOG,
2080 SECCOMP_FILTER_FLAG_SPEC_ALLOW };
2076 unsigned int flag, all_flags; 2081 unsigned int flag, all_flags;
2077 int i; 2082 int i;
2078 long ret; 2083 long ret;
2079 2084
2080 /* Test detection of known-good filter flags */ 2085 /* Test detection of known-good filter flags */
2081 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { 2086 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2087 int bits = 0;
2088
2082 flag = flags[i]; 2089 flag = flags[i];
2090 /* Make sure the flag is a single bit! */
2091 while (flag) {
2092 if (flag & 0x1)
2093 bits ++;
2094 flag >>= 1;
2095 }
2096 ASSERT_EQ(1, bits);
2097 flag = flags[i];
2098
2083 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2099 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2084 ASSERT_NE(ENOSYS, errno) { 2100 ASSERT_NE(ENOSYS, errno) {
2085 TH_LOG("Kernel does not support seccomp syscall!"); 2101 TH_LOG("Kernel does not support seccomp syscall!");
diff --git a/tools/testing/selftests/sparc64/Makefile b/tools/testing/selftests/sparc64/Makefile
new file mode 100644
index 000000000000..a19531dba4dc
--- /dev/null
+++ b/tools/testing/selftests/sparc64/Makefile
@@ -0,0 +1,50 @@
1# SPDX-License-Identifier: GPL-2.0
2uname_M := $(shell uname -m 2>/dev/null || echo not)
3ARCH ?= $(shell echo $(uname_M) | sed -e s/x86_64/x86/)
4
5ifneq ($(ARCH),sparc64)
6nothing:
7.PHONY: all clean run_tests install
8.SILENT:
9else
10
11SUBDIRS := drivers
12
13TEST_PROGS := run.sh
14
15
16.PHONY: all clean
17
18include ../lib.mk
19
20all:
21 @for DIR in $(SUBDIRS); do \
22 BUILD_TARGET=$(OUTPUT)/$$DIR; \
23 mkdir $$BUILD_TARGET -p; \
24 make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
25 #SUBDIR test prog name should be in the form: SUBDIR_test.sh \
26 TEST=$$DIR"_test.sh"; \
27 if [ -e $$DIR/$$TEST ]; then \
28 rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
29 fi \
30 done
31
32override define INSTALL_RULE
33 mkdir -p $(INSTALL_PATH)
34 install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
35
36 @for SUBDIR in $(SUBDIRS); do \
37 BUILD_TARGET=$(OUTPUT)/$$SUBDIR; \
38 mkdir $$BUILD_TARGET -p; \
39 $(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
40 done;
41endef
42
43override define CLEAN
44 @for DIR in $(SUBDIRS); do \
45 BUILD_TARGET=$(OUTPUT)/$$DIR; \
46 mkdir $$BUILD_TARGET -p; \
47 make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
48 done
49endef
50endif
diff --git a/tools/testing/selftests/sparc64/drivers/.gitignore b/tools/testing/selftests/sparc64/drivers/.gitignore
new file mode 100644
index 000000000000..90e835ed74e6
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/.gitignore
@@ -0,0 +1 @@
adi-test
diff --git a/tools/testing/selftests/sparc64/drivers/Makefile b/tools/testing/selftests/sparc64/drivers/Makefile
new file mode 100644
index 000000000000..deb0df415565
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/Makefile
@@ -0,0 +1,15 @@
1# SPDX-License-Identifier: GPL-2.0
2INCLUDEDIR := -I.
3CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
4
5TEST_GEN_FILES := adi-test
6
7all: $(TEST_GEN_FILES)
8
9$(TEST_GEN_FILES): adi-test.c
10
11TEST_PROGS := drivers_test.sh
12
13include ../../lib.mk
14
15$(OUTPUT)/adi-test: adi-test.c
diff --git a/tools/testing/selftests/sparc64/drivers/adi-test.c b/tools/testing/selftests/sparc64/drivers/adi-test.c
new file mode 100644
index 000000000000..95d93c6a88a5
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/adi-test.c
@@ -0,0 +1,721 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * selftest for sparc64's privileged ADI driver
4 *
5 * Author: Tom Hromatka <tom.hromatka@oracle.com>
6 */
7#include <linux/kernel.h>
8#include <errno.h>
9#include <fcntl.h>
10#include <stdarg.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <sys/syscall.h>
15#include <sys/types.h>
16#include <sys/stat.h>
17#include <unistd.h>
18
19#include "../../kselftest.h"
20
21#define DEBUG_LEVEL_1_BIT (0x0001)
22#define DEBUG_LEVEL_2_BIT (0x0002)
23#define DEBUG_LEVEL_3_BIT (0x0004)
24#define DEBUG_LEVEL_4_BIT (0x0008)
25#define DEBUG_TIMING_BIT (0x1000)
26
27#ifndef ARRAY_SIZE
28# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
29#endif
30
31/* bit mask of enabled bits to print */
32#define DEBUG 0x0001
33
34#define DEBUG_PRINT_L1(...) debug_print(DEBUG_LEVEL_1_BIT, __VA_ARGS__)
35#define DEBUG_PRINT_L2(...) debug_print(DEBUG_LEVEL_2_BIT, __VA_ARGS__)
36#define DEBUG_PRINT_L3(...) debug_print(DEBUG_LEVEL_3_BIT, __VA_ARGS__)
37#define DEBUG_PRINT_L4(...) debug_print(DEBUG_LEVEL_4_BIT, __VA_ARGS__)
38#define DEBUG_PRINT_T(...) debug_print(DEBUG_TIMING_BIT, __VA_ARGS__)
39
40static void debug_print(int level, const char *s, ...)
41{
42 va_list args;
43
44 va_start(args, s);
45
46 if (DEBUG & level)
47 vfprintf(stdout, s, args);
48 va_end(args);
49}
50
51#ifndef min
52#define min(x, y) ((x) < (y) ? x : y)
53#endif
54
55#define RETURN_FROM_TEST(_ret) \
56 do { \
57 DEBUG_PRINT_L1( \
58 "\tTest %s returned %d\n", __func__, _ret); \
59 return _ret; \
60 } while (0)
61
62#define ADI_BLKSZ 64
63#define ADI_MAX_VERSION 15
64
65#define TEST_STEP_FAILURE(_ret) \
66 do { \
67 fprintf(stderr, "\tTest step failure: %d at %s:%d\n", \
68 _ret, __func__, __LINE__); \
69 goto out; \
70 } while (0)
71
72#define RDTICK(_x) \
73 asm volatile(" rd %%tick, %0\n" : "=r" (_x))
74
75static int random_version(void)
76{
77 long tick;
78
79 RDTICK(tick);
80
81 return tick % (ADI_MAX_VERSION + 1);
82}
83
84#define MAX_RANGES_SUPPORTED 5
85static const char system_ram_str[] = "System RAM\n";
86static int range_count;
87static unsigned long long int start_addr[MAX_RANGES_SUPPORTED];
88static unsigned long long int end_addr[MAX_RANGES_SUPPORTED];
89
90struct stats {
91 char name[16];
92 unsigned long total;
93 unsigned long count;
94 unsigned long bytes;
95};
96
97static struct stats read_stats = {
98 .name = "read", .total = 0, .count = 0, .bytes = 0};
99static struct stats pread_stats = {
100 .name = "pread", .total = 0, .count = 0, .bytes = 0};
101static struct stats write_stats = {
102 .name = "write", .total = 0, .count = 0, .bytes = 0};
103static struct stats pwrite_stats = {
104 .name = "pwrite", .total = 0, .count = 0, .bytes = 0};
105static struct stats seek_stats = {
106 .name = "seek", .total = 0, .count = 0, .bytes = 0};
107
108static void update_stats(struct stats * const ustats,
109 unsigned long measurement, unsigned long bytes)
110{
111 ustats->total += measurement;
112 ustats->bytes += bytes;
113 ustats->count++;
114}
115
116static void print_ustats(const struct stats * const ustats)
117{
118 DEBUG_PRINT_L1("%s\t%7d\t%7.0f\t%7.0f\n",
119 ustats->name, ustats->count,
120 (float)ustats->total / (float)ustats->count,
121 (float)ustats->bytes / (float)ustats->count);
122}
123
124static void print_stats(void)
125{
126 DEBUG_PRINT_L1("\nSyscall\tCall\tAvgTime\tAvgSize\n"
127 "\tCount\t(ticks)\t(bytes)\n"
128 "-------------------------------\n");
129
130 print_ustats(&read_stats);
131 print_ustats(&pread_stats);
132 print_ustats(&write_stats);
133 print_ustats(&pwrite_stats);
134 print_ustats(&seek_stats);
135}
136
137static int build_memory_map(void)
138{
139 char line[256];
140 FILE *fp;
141 int i;
142
143 range_count = 0;
144
145 fp = fopen("/proc/iomem", "r");
146 if (!fp) {
147 fprintf(stderr, "/proc/iomem: error %d: %s\n",
148 errno, strerror(errno));
149 return -errno;
150 }
151
152 while (fgets(line, sizeof(line), fp) != 0) {
153 if (strstr(line, system_ram_str)) {
154 char *dash, *end_ptr;
155
156 /* Given a line like this:
157 * d0400000-10ffaffff : System RAM
158 * replace the "-" with a space
159 */
160 dash = strstr(line, "-");
161 dash[0] = 0x20;
162
163 start_addr[range_count] = strtoull(line, &end_ptr, 16);
164 end_addr[range_count] = strtoull(end_ptr, NULL, 16);
165 range_count++;
166 }
167 }
168
169 fclose(fp);
170
171 DEBUG_PRINT_L1("RAM Ranges\n");
172 for (i = 0; i < range_count; i++)
173 DEBUG_PRINT_L1("\trange %d: 0x%llx\t- 0x%llx\n",
174 i, start_addr[i], end_addr[i]);
175
176 if (range_count == 0) {
177 fprintf(stderr, "No valid address ranges found. Error.\n");
178 return -1;
179 }
180
181 return 0;
182}
183
184static int read_adi(int fd, unsigned char *buf, int buf_sz)
185{
186 int ret, bytes_read = 0;
187 long start, end, elapsed_time = 0;
188
189 do {
190 RDTICK(start);
191 ret = read(fd, buf + bytes_read, buf_sz - bytes_read);
192 RDTICK(end);
193 if (ret < 0)
194 return -errno;
195
196 elapsed_time += end - start;
197 update_stats(&read_stats, elapsed_time, buf_sz);
198 bytes_read += ret;
199
200 } while (bytes_read < buf_sz);
201
202 DEBUG_PRINT_T("\tread elapsed timed = %ld\n", elapsed_time);
203 DEBUG_PRINT_L3("\tRead %d bytes\n", bytes_read);
204
205 return bytes_read;
206}
207
208static int pread_adi(int fd, unsigned char *buf,
209 int buf_sz, unsigned long offset)
210{
211 int ret, i, bytes_read = 0;
212 unsigned long cur_offset;
213 long start, end, elapsed_time = 0;
214
215 cur_offset = offset;
216 do {
217 RDTICK(start);
218 ret = pread(fd, buf + bytes_read, buf_sz - bytes_read,
219 cur_offset);
220 RDTICK(end);
221 if (ret < 0)
222 return -errno;
223
224 elapsed_time += end - start;
225 update_stats(&pread_stats, elapsed_time, buf_sz);
226 bytes_read += ret;
227 cur_offset += ret;
228
229 } while (bytes_read < buf_sz);
230
231 DEBUG_PRINT_T("\tpread elapsed timed = %ld\n", elapsed_time);
232 DEBUG_PRINT_L3("\tRead %d bytes starting at offset 0x%lx\n",
233 bytes_read, offset);
234 for (i = 0; i < bytes_read; i++)
235 DEBUG_PRINT_L4("\t\t0x%lx\t%d\n", offset + i, buf[i]);
236
237 return bytes_read;
238}
239
240static int write_adi(int fd, const unsigned char * const buf, int buf_sz)
241{
242 int ret, bytes_written = 0;
243 long start, end, elapsed_time = 0;
244
245 do {
246 RDTICK(start);
247 ret = write(fd, buf + bytes_written, buf_sz - bytes_written);
248 RDTICK(end);
249 if (ret < 0)
250 return -errno;
251
252 elapsed_time += (end - start);
253 update_stats(&write_stats, elapsed_time, buf_sz);
254 bytes_written += ret;
255 } while (bytes_written < buf_sz);
256
257 DEBUG_PRINT_T("\twrite elapsed timed = %ld\n", elapsed_time);
258 DEBUG_PRINT_L3("\tWrote %d of %d bytes\n", bytes_written, buf_sz);
259
260 return bytes_written;
261}
262
263static int pwrite_adi(int fd, const unsigned char * const buf,
264 int buf_sz, unsigned long offset)
265{
266 int ret, bytes_written = 0;
267 unsigned long cur_offset;
268 long start, end, elapsed_time = 0;
269
270 cur_offset = offset;
271
272 do {
273 RDTICK(start);
274 ret = pwrite(fd, buf + bytes_written,
275 buf_sz - bytes_written, cur_offset);
276 RDTICK(end);
277 if (ret < 0) {
278 fprintf(stderr, "pwrite(): error %d: %s\n",
279 errno, strerror(errno));
280 return -errno;
281 }
282
283 elapsed_time += (end - start);
284 update_stats(&pwrite_stats, elapsed_time, buf_sz);
285 bytes_written += ret;
286 cur_offset += ret;
287
288 } while (bytes_written < buf_sz);
289
290 DEBUG_PRINT_T("\tpwrite elapsed timed = %ld\n", elapsed_time);
291 DEBUG_PRINT_L3("\tWrote %d of %d bytes starting at address 0x%lx\n",
292 bytes_written, buf_sz, offset);
293
294 return bytes_written;
295}
296
297static off_t seek_adi(int fd, off_t offset, int whence)
298{
299 long start, end;
300 off_t ret;
301
302 RDTICK(start);
303 ret = lseek(fd, offset, whence);
304 RDTICK(end);
305 DEBUG_PRINT_L2("\tlseek ret = 0x%llx\n", ret);
306 if (ret < 0)
307 goto out;
308
309 DEBUG_PRINT_T("\tlseek elapsed timed = %ld\n", end - start);
310 update_stats(&seek_stats, end - start, 0);
311
312out:
313 (void)lseek(fd, 0, SEEK_END);
314 return ret;
315}
316
317static int test0_prpw_aligned_1byte(int fd)
318{
319 /* somewhat arbitrarily chosen address */
320 unsigned long paddr =
321 (end_addr[range_count - 1] - 0x1000) & ~(ADI_BLKSZ - 1);
322 unsigned char version[1], expected_version;
323 loff_t offset;
324 int ret;
325
326 version[0] = random_version();
327 expected_version = version[0];
328
329 offset = paddr / ADI_BLKSZ;
330
331 ret = pwrite_adi(fd, version, sizeof(version), offset);
332 if (ret != sizeof(version))
333 TEST_STEP_FAILURE(ret);
334
335 ret = pread_adi(fd, version, sizeof(version), offset);
336 if (ret != sizeof(version))
337 TEST_STEP_FAILURE(ret);
338
339 if (expected_version != version[0]) {
340 DEBUG_PRINT_L2("\tExpected version %d but read version %d\n",
341 expected_version, version[0]);
342 TEST_STEP_FAILURE(-expected_version);
343 }
344
345 ret = 0;
346out:
347 RETURN_FROM_TEST(ret);
348}
349
350#define TEST1_VERSION_SZ 4096
351static int test1_prpw_aligned_4096bytes(int fd)
352{
353 /* somewhat arbitrarily chosen address */
354 unsigned long paddr =
355 (end_addr[range_count - 1] - 0x6000) & ~(ADI_BLKSZ - 1);
356 unsigned char version[TEST1_VERSION_SZ],
357 expected_version[TEST1_VERSION_SZ];
358 loff_t offset;
359 int ret, i;
360
361 for (i = 0; i < TEST1_VERSION_SZ; i++) {
362 version[i] = random_version();
363 expected_version[i] = version[i];
364 }
365
366 offset = paddr / ADI_BLKSZ;
367
368 ret = pwrite_adi(fd, version, sizeof(version), offset);
369 if (ret != sizeof(version))
370 TEST_STEP_FAILURE(ret);
371
372 ret = pread_adi(fd, version, sizeof(version), offset);
373 if (ret != sizeof(version))
374 TEST_STEP_FAILURE(ret);
375
376 for (i = 0; i < TEST1_VERSION_SZ; i++) {
377 if (expected_version[i] != version[i]) {
378 DEBUG_PRINT_L2(
379 "\tExpected version %d but read version %d\n",
380 expected_version, version[0]);
381 TEST_STEP_FAILURE(-expected_version[i]);
382 }
383 }
384
385 ret = 0;
386out:
387 RETURN_FROM_TEST(ret);
388}
389
390#define TEST2_VERSION_SZ 10327
391static int test2_prpw_aligned_10327bytes(int fd)
392{
393 /* somewhat arbitrarily chosen address */
394 unsigned long paddr =
395 (start_addr[0] + 0x6000) & ~(ADI_BLKSZ - 1);
396 unsigned char version[TEST2_VERSION_SZ],
397 expected_version[TEST2_VERSION_SZ];
398 loff_t offset;
399 int ret, i;
400
401 for (i = 0; i < TEST2_VERSION_SZ; i++) {
402 version[i] = random_version();
403 expected_version[i] = version[i];
404 }
405
406 offset = paddr / ADI_BLKSZ;
407
408 ret = pwrite_adi(fd, version, sizeof(version), offset);
409 if (ret != sizeof(version))
410 TEST_STEP_FAILURE(ret);
411
412 ret = pread_adi(fd, version, sizeof(version), offset);
413 if (ret != sizeof(version))
414 TEST_STEP_FAILURE(ret);
415
416 for (i = 0; i < TEST2_VERSION_SZ; i++) {
417 if (expected_version[i] != version[i]) {
418 DEBUG_PRINT_L2(
419 "\tExpected version %d but read version %d\n",
420 expected_version, version[0]);
421 TEST_STEP_FAILURE(-expected_version[i]);
422 }
423 }
424
425 ret = 0;
426out:
427 RETURN_FROM_TEST(ret);
428}
429
430#define TEST3_VERSION_SZ 12541
431static int test3_prpw_unaligned_12541bytes(int fd)
432{
433 /* somewhat arbitrarily chosen address */
434 unsigned long paddr =
435 ((start_addr[0] + 0xC000) & ~(ADI_BLKSZ - 1)) + 17;
436 unsigned char version[TEST3_VERSION_SZ],
437 expected_version[TEST3_VERSION_SZ];
438 loff_t offset;
439 int ret, i;
440
441 for (i = 0; i < TEST3_VERSION_SZ; i++) {
442 version[i] = random_version();
443 expected_version[i] = version[i];
444 }
445
446 offset = paddr / ADI_BLKSZ;
447
448 ret = pwrite_adi(fd, version, sizeof(version), offset);
449 if (ret != sizeof(version))
450 TEST_STEP_FAILURE(ret);
451
452 ret = pread_adi(fd, version, sizeof(version), offset);
453 if (ret != sizeof(version))
454 TEST_STEP_FAILURE(ret);
455
456 for (i = 0; i < TEST3_VERSION_SZ; i++) {
457 if (expected_version[i] != version[i]) {
458 DEBUG_PRINT_L2(
459 "\tExpected version %d but read version %d\n",
460 expected_version, version[0]);
461 TEST_STEP_FAILURE(-expected_version[i]);
462 }
463 }
464
465 ret = 0;
466out:
467 RETURN_FROM_TEST(ret);
468}
469
470static int test4_lseek(int fd)
471{
472#define OFFSET_ADD (0x100)
473#define OFFSET_SUBTRACT (0xFFFFFFF000000000)
474
475 off_t offset_out, offset_in;
476 int ret;
477
478
479 offset_in = 0x123456789abcdef0;
480 offset_out = seek_adi(fd, offset_in, SEEK_SET);
481 if (offset_out != offset_in) {
482 ret = -1;
483 TEST_STEP_FAILURE(ret);
484 }
485
486 /* seek to the current offset. this should return EINVAL */
487 offset_out = seek_adi(fd, offset_in, SEEK_SET);
488 if (offset_out < 0 && errno == EINVAL)
489 DEBUG_PRINT_L2(
490 "\tSEEK_SET failed as designed. Not an error\n");
491 else {
492 ret = -2;
493 TEST_STEP_FAILURE(ret);
494 }
495
496 offset_out = seek_adi(fd, 0, SEEK_CUR);
497 if (offset_out != offset_in) {
498 ret = -3;
499 TEST_STEP_FAILURE(ret);
500 }
501
502 offset_out = seek_adi(fd, OFFSET_ADD, SEEK_CUR);
503 if (offset_out != (offset_in + OFFSET_ADD)) {
504 ret = -4;
505 TEST_STEP_FAILURE(ret);
506 }
507
508 offset_out = seek_adi(fd, OFFSET_SUBTRACT, SEEK_CUR);
509 if (offset_out != (offset_in + OFFSET_ADD + OFFSET_SUBTRACT)) {
510 ret = -5;
511 TEST_STEP_FAILURE(ret);
512 }
513
514 ret = 0;
515out:
516 RETURN_FROM_TEST(ret);
517}
518
519static int test5_rw_aligned_1byte(int fd)
520{
521 /* somewhat arbitrarily chosen address */
522 unsigned long paddr =
523 (end_addr[range_count - 1] - 0xF000) & ~(ADI_BLKSZ - 1);
524 unsigned char version, expected_version;
525 loff_t offset;
526 off_t oret;
527 int ret;
528
529 offset = paddr / ADI_BLKSZ;
530 version = expected_version = random_version();
531
532 oret = seek_adi(fd, offset, SEEK_SET);
533 if (oret != offset) {
534 ret = -1;
535 TEST_STEP_FAILURE(ret);
536 }
537
538 ret = write_adi(fd, &version, sizeof(version));
539 if (ret != sizeof(version))
540 TEST_STEP_FAILURE(ret);
541
542 oret = seek_adi(fd, offset, SEEK_SET);
543 if (oret != offset) {
544 ret = -1;
545 TEST_STEP_FAILURE(ret);
546 }
547
548 ret = read_adi(fd, &version, sizeof(version));
549 if (ret != sizeof(version))
550 TEST_STEP_FAILURE(ret);
551
552 if (expected_version != version) {
553 DEBUG_PRINT_L2("\tExpected version %d but read version %d\n",
554 expected_version, version);
555 TEST_STEP_FAILURE(-expected_version);
556 }
557
558 ret = 0;
559out:
560 RETURN_FROM_TEST(ret);
561}
562
563#define TEST6_VERSION_SZ 9434
564static int test6_rw_aligned_9434bytes(int fd)
565{
566 /* somewhat arbitrarily chosen address */
567 unsigned long paddr =
568 (end_addr[range_count - 1] - 0x5F000) & ~(ADI_BLKSZ - 1);
569 unsigned char version[TEST6_VERSION_SZ],
570 expected_version[TEST6_VERSION_SZ];
571 loff_t offset;
572 off_t oret;
573 int ret, i;
574
575 offset = paddr / ADI_BLKSZ;
576 for (i = 0; i < TEST6_VERSION_SZ; i++)
577 version[i] = expected_version[i] = random_version();
578
579 oret = seek_adi(fd, offset, SEEK_SET);
580 if (oret != offset) {
581 ret = -1;
582 TEST_STEP_FAILURE(ret);
583 }
584
585 ret = write_adi(fd, version, sizeof(version));
586 if (ret != sizeof(version))
587 TEST_STEP_FAILURE(ret);
588
589 memset(version, 0, TEST6_VERSION_SZ);
590
591 oret = seek_adi(fd, offset, SEEK_SET);
592 if (oret != offset) {
593 ret = -1;
594 TEST_STEP_FAILURE(ret);
595 }
596
597 ret = read_adi(fd, version, sizeof(version));
598 if (ret != sizeof(version))
599 TEST_STEP_FAILURE(ret);
600
601 for (i = 0; i < TEST6_VERSION_SZ; i++) {
602 if (expected_version[i] != version[i]) {
603 DEBUG_PRINT_L2(
604 "\tExpected version %d but read version %d\n",
605 expected_version[i], version[i]);
606 TEST_STEP_FAILURE(-expected_version[i]);
607 }
608 }
609
610 ret = 0;
611out:
612 RETURN_FROM_TEST(ret);
613}
614
615#define TEST7_VERSION_SZ 14963
616static int test7_rw_aligned_14963bytes(int fd)
617{
618 /* somewhat arbitrarily chosen address */
619 unsigned long paddr =
620 ((start_addr[range_count - 1] + 0xF000) & ~(ADI_BLKSZ - 1)) + 39;
621 unsigned char version[TEST7_VERSION_SZ],
622 expected_version[TEST7_VERSION_SZ];
623 loff_t offset;
624 off_t oret;
625 int ret, i;
626
627 offset = paddr / ADI_BLKSZ;
628 for (i = 0; i < TEST7_VERSION_SZ; i++) {
629 version[i] = random_version();
630 expected_version[i] = version[i];
631 }
632
633 oret = seek_adi(fd, offset, SEEK_SET);
634 if (oret != offset) {
635 ret = -1;
636 TEST_STEP_FAILURE(ret);
637 }
638
639 ret = write_adi(fd, version, sizeof(version));
640 if (ret != sizeof(version))
641 TEST_STEP_FAILURE(ret);
642
643 memset(version, 0, TEST7_VERSION_SZ);
644
645 oret = seek_adi(fd, offset, SEEK_SET);
646 if (oret != offset) {
647 ret = -1;
648 TEST_STEP_FAILURE(ret);
649 }
650
651 ret = read_adi(fd, version, sizeof(version));
652 if (ret != sizeof(version))
653 TEST_STEP_FAILURE(ret);
654
655 for (i = 0; i < TEST7_VERSION_SZ; i++) {
656 if (expected_version[i] != version[i]) {
657 DEBUG_PRINT_L2(
658 "\tExpected version %d but read version %d\n",
659 expected_version[i], version[i]);
660 TEST_STEP_FAILURE(-expected_version[i]);
661 }
662
663 paddr += ADI_BLKSZ;
664 }
665
666 ret = 0;
667out:
668 RETURN_FROM_TEST(ret);
669}
670
671static int (*tests[])(int fd) = {
672 test0_prpw_aligned_1byte,
673 test1_prpw_aligned_4096bytes,
674 test2_prpw_aligned_10327bytes,
675 test3_prpw_unaligned_12541bytes,
676 test4_lseek,
677 test5_rw_aligned_1byte,
678 test6_rw_aligned_9434bytes,
679 test7_rw_aligned_14963bytes,
680};
681#define TEST_COUNT ARRAY_SIZE(tests)
682
683int main(int argc, char *argv[])
684{
685 int fd, ret, test;
686
687 ret = build_memory_map();
688 if (ret < 0)
689 return ret;
690
691 fd = open("/dev/adi", O_RDWR);
692 if (fd < 0) {
693 fprintf(stderr, "open: error %d: %s\n",
694 errno, strerror(errno));
695 return -errno;
696 }
697
698 for (test = 0; test < TEST_COUNT; test++) {
699 DEBUG_PRINT_L1("Running test #%d\n", test);
700
701 ret = (*tests[test])(fd);
702 if (ret != 0)
703 ksft_test_result_fail("Test #%d failed: error %d\n",
704 test, ret);
705 else
706 ksft_test_result_pass("Test #%d passed\n", test);
707 }
708
709 print_stats();
710 close(fd);
711
712 if (ksft_get_fail_cnt() > 0)
713 ksft_exit_fail();
714 else
715 ksft_exit_pass();
716
717 /* it's impossible to get here, but the compiler throws a warning
718 * about control reaching the end of non-void function. bah.
719 */
720 return 0;
721}
diff --git a/tools/testing/selftests/sparc64/drivers/drivers_test.sh b/tools/testing/selftests/sparc64/drivers/drivers_test.sh
new file mode 100755
index 000000000000..6d08273b7532
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/drivers_test.sh
@@ -0,0 +1,30 @@
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3
4SRC_TREE=../../../../
5
6test_run()
7{
8 if [ -f ${SRC_TREE}/drivers/char/adi.ko ]; then
9 insmod ${SRC_TREE}/drivers/char/adi.ko 2> /dev/null
10 if [ $? -ne 0 ]; then
11 rc=1
12 fi
13 else
14 # Use modprobe dry run to check for missing adi module
15 if ! /sbin/modprobe -q -n adi; then
16 echo "adi: [SKIP]"
17 elif /sbin/modprobe -q adi; then
18 echo "adi: ok"
19 else
20 echo "adi: [FAIL]"
21 rc=1
22 fi
23 fi
24 ./adi-test
25 rmmod adi 2> /dev/null
26}
27
28rc=0
29test_run
30exit $rc
diff --git a/tools/testing/selftests/sparc64/run.sh b/tools/testing/selftests/sparc64/run.sh
new file mode 100755
index 000000000000..38ad61f9328e
--- /dev/null
+++ b/tools/testing/selftests/sparc64/run.sh
@@ -0,0 +1,3 @@
1#!/bin/sh
2
3(cd drivers; ./drivers_test.sh)
diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
index 24cff498b31a..fc9f8cde7d42 100755
--- a/tools/testing/selftests/static_keys/test_static_keys.sh
+++ b/tools/testing/selftests/static_keys/test_static_keys.sh
@@ -2,6 +2,19 @@
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3# Runs static keys kernel module tests 3# Runs static keys kernel module tests
4 4
5# Kselftest framework requirement - SKIP code is 4.
6ksft_skip=4
7
8if ! /sbin/modprobe -q -n test_static_key_base; then
9 echo "static_key: module test_static_key_base is not found [SKIP]"
10 exit $ksft_skip
11fi
12
13if ! /sbin/modprobe -q -n test_static_keys; then
14 echo "static_key: module test_static_keys is not found [SKIP]"
15 exit $ksft_skip
16fi
17
5if /sbin/modprobe -q test_static_key_base; then 18if /sbin/modprobe -q test_static_key_base; then
6 if /sbin/modprobe -q test_static_keys; then 19 if /sbin/modprobe -q test_static_keys; then
7 echo "static_key: ok" 20 echo "static_key: ok"
diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config
new file mode 100644
index 000000000000..1ab7e8130db2
--- /dev/null
+++ b/tools/testing/selftests/sync/config
@@ -0,0 +1,4 @@
1CONFIG_STAGING=y
2CONFIG_ANDROID=y
3CONFIG_SYNC=y
4CONFIG_SW_SYNC=y
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index ec232c3cfcaa..584eb8ea780a 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -14,6 +14,9 @@
14 14
15# This performs a series tests against the proc sysctl interface. 15# This performs a series tests against the proc sysctl interface.
16 16
17# Kselftest framework requirement - SKIP code is 4.
18ksft_skip=4
19
17TEST_NAME="sysctl" 20TEST_NAME="sysctl"
18TEST_DRIVER="test_${TEST_NAME}" 21TEST_DRIVER="test_${TEST_NAME}"
19TEST_DIR=$(dirname $0) 22TEST_DIR=$(dirname $0)
@@ -41,7 +44,7 @@ test_modprobe()
41 echo "$0: $DIR not present" >&2 44 echo "$0: $DIR not present" >&2
42 echo "You must have the following enabled in your kernel:" >&2 45 echo "You must have the following enabled in your kernel:" >&2
43 cat $TEST_DIR/config >&2 46 cat $TEST_DIR/config >&2
44 exit 1 47 exit $ksft_skip
45 fi 48 fi
46} 49}
47 50
@@ -98,28 +101,30 @@ test_reqs()
98 uid=$(id -u) 101 uid=$(id -u)
99 if [ $uid -ne 0 ]; then 102 if [ $uid -ne 0 ]; then
100 echo $msg must be run as root >&2 103 echo $msg must be run as root >&2
101 exit 0 104 exit $ksft_skip
102 fi 105 fi
103 106
104 if ! which perl 2> /dev/null > /dev/null; then 107 if ! which perl 2> /dev/null > /dev/null; then
105 echo "$0: You need perl installed" 108 echo "$0: You need perl installed"
106 exit 1 109 exit $ksft_skip
107 fi 110 fi
108 if ! which getconf 2> /dev/null > /dev/null; then 111 if ! which getconf 2> /dev/null > /dev/null; then
109 echo "$0: You need getconf installed" 112 echo "$0: You need getconf installed"
110 exit 1 113 exit $ksft_skip
111 fi 114 fi
112 if ! which diff 2> /dev/null > /dev/null; then 115 if ! which diff 2> /dev/null > /dev/null; then
113 echo "$0: You need diff installed" 116 echo "$0: You need diff installed"
114 exit 1 117 exit $ksft_skip
115 fi 118 fi
116} 119}
117 120
118function load_req_mod() 121function load_req_mod()
119{ 122{
120 trap "test_modprobe" EXIT
121
122 if [ ! -d $DIR ]; then 123 if [ ! -d $DIR ]; then
124 if ! modprobe -q -n $TEST_DRIVER; then
125 echo "$0: module $TEST_DRIVER not found [SKIP]"
126 exit $ksft_skip
127 fi
123 modprobe $TEST_DRIVER 128 modprobe $TEST_DRIVER
124 if [ $? -ne 0 ]; then 129 if [ $? -ne 0 ]; then
125 exit 130 exit
@@ -765,6 +770,7 @@ function parse_args()
765test_reqs 770test_reqs
766allow_user_defaults 771allow_user_defaults
767check_production_sysctl_writes_strict 772check_production_sysctl_writes_strict
773test_modprobe
768load_req_mod 774load_req_mod
769 775
770trap "test_finish" EXIT 776trap "test_finish" EXIT
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 5b012f4981d4..6f289a49e5ec 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -66,7 +66,7 @@
66 "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667", 66 "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667",
67 "expExitCode": "0", 67 "expExitCode": "0",
68 "verifyCmd": "$TC action get action bpf index 667", 68 "verifyCmd": "$TC action get action bpf index 667",
69 "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c default-action pipe.*index 667 ref", 69 "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref",
70 "matchCount": "1", 70 "matchCount": "1",
71 "teardown": [ 71 "teardown": [
72 "$TC action flush action bpf", 72 "$TC action flush action bpf",
@@ -92,10 +92,15 @@
92 "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667", 92 "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667",
93 "expExitCode": "255", 93 "expExitCode": "255",
94 "verifyCmd": "$TC action get action bpf index 667", 94 "verifyCmd": "$TC action get action bpf index 667",
95 "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9].*index 667 ref", 95 "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref",
96 "matchCount": "0", 96 "matchCount": "0",
97 "teardown": [ 97 "teardown": [
98 "$TC action flush action bpf", 98 [
99 "$TC action flush action bpf",
100 0,
101 1,
102 255
103 ],
99 "rm -f _c.o" 104 "rm -f _c.o"
100 ] 105 ]
101 }, 106 },
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
index 93cf8fea8ae7..3a2f51fc7fd4 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
@@ -398,13 +398,83 @@
398 255 398 255
399 ] 399 ]
400 ], 400 ],
401 "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action csum tcp continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args", 401 "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
402 "expExitCode": "255", 402 "expExitCode": "0",
403 "verifyCmd": "$TC actions ls action csum", 403 "verifyCmd": "$TC actions ls action csum",
404 "matchPattern": "^[ \t]+index [0-9]* ref", 404 "matchPattern": "^[ \t]+index [0-9]* ref",
405 "matchCount": "32", 405 "matchCount": "32",
406 "teardown": [ 406 "teardown": [
407 "$TC actions flush action csum" 407 "$TC actions flush action csum"
408 ] 408 ]
409 },
410 {
411 "id": "b4e9",
412 "name": "Delete batch of 32 csum actions",
413 "category": [
414 "actions",
415 "csum"
416 ],
417 "setup": [
418 [
419 "$TC actions flush action csum",
420 0,
421 1,
422 255
423 ],
424 "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
425 ],
426 "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
427 "expExitCode": "0",
428 "verifyCmd": "$TC actions list action csum",
429 "matchPattern": "^[ \t]+index [0-9]+ ref",
430 "matchCount": "0",
431 "teardown": []
432 },
433 {
434 "id": "0015",
435 "name": "Add batch of 32 csum tcp actions with large cookies",
436 "category": [
437 "actions",
438 "csum"
439 ],
440 "setup": [
441 [
442 "$TC actions flush action csum",
443 0,
444 1,
445 255
446 ]
447 ],
448 "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
449 "expExitCode": "0",
450 "verifyCmd": "$TC actions ls action csum",
451 "matchPattern": "^[ \t]+index [0-9]* ref",
452 "matchCount": "32",
453 "teardown": [
454 "$TC actions flush action csum"
455 ]
456 },
457 {
458 "id": "989e",
459 "name": "Delete batch of 32 csum actions with large cookies",
460 "category": [
461 "actions",
462 "csum"
463 ],
464 "setup": [
465 [
466 "$TC actions flush action csum",
467 0,
468 1,
469 255
470 ],
471 "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
472 ],
473 "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
474 "expExitCode": "0",
475 "verifyCmd": "$TC actions list action csum",
476 "matchPattern": "^[ \t]+index [0-9]+ ref",
477 "matchCount": "0",
478 "teardown": []
409 } 479 }
410] 480]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
index 9f34f0753969..637ea0219617 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
@@ -1,7 +1,7 @@
1[ 1[
2 { 2 {
3 "id": "a568", 3 "id": "7682",
4 "name": "Add action with ife type", 4 "name": "Create valid ife encode action with mark and pass control",
5 "category": [ 5 "category": [
6 "actions", 6 "actions",
7 "ife" 7 "ife"
@@ -12,21 +12,400 @@
12 0, 12 0,
13 1, 13 1,
14 255 14 255
15 ], 15 ]
16 "$TC actions add action ife encode type 0xDEAD index 1"
17 ], 16 ],
18 "cmdUnderTest": "$TC actions get action ife index 1", 17 "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 2",
18 "expExitCode": "0",
19 "verifyCmd": "$TC actions get action ife index 2",
20 "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 2",
21 "matchCount": "1",
22 "teardown": [
23 "$TC actions flush action ife"
24 ]
25 },
26 {
27 "id": "ef47",
28 "name": "Create valid ife encode action with mark and pipe control",
29 "category": [
30 "actions",
31 "ife"
32 ],
33 "setup": [
34 [
35 "$TC actions flush action ife",
36 0,
37 1,
38 255
39 ]
40 ],
41 "cmdUnderTest": "$TC actions add action ife encode use mark 10 pipe index 2",
42 "expExitCode": "0",
43 "verifyCmd": "$TC actions get action ife index 2",
44 "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark.*index 2",
45 "matchCount": "1",
46 "teardown": [
47 "$TC actions flush action ife"
48 ]
49 },
50 {
51 "id": "df43",
52 "name": "Create valid ife encode action with mark and continue control",
53 "category": [
54 "actions",
55 "ife"
56 ],
57 "setup": [
58 [
59 "$TC actions flush action ife",
60 0,
61 1,
62 255
63 ]
64 ],
65 "cmdUnderTest": "$TC actions add action ife encode allow mark continue index 2",
66 "expExitCode": "0",
67 "verifyCmd": "$TC actions get action ife index 2",
68 "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*allow mark.*index 2",
69 "matchCount": "1",
70 "teardown": [
71 "$TC actions flush action ife"
72 ]
73 },
74 {
75 "id": "e4cf",
76 "name": "Create valid ife encode action with mark and drop control",
77 "category": [
78 "actions",
79 "ife"
80 ],
81 "setup": [
82 [
83 "$TC actions flush action ife",
84 0,
85 1,
86 255
87 ]
88 ],
89 "cmdUnderTest": "$TC actions add action ife encode use mark 789 drop index 2",
90 "expExitCode": "0",
91 "verifyCmd": "$TC actions get action ife index 2",
92 "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*use mark 789.*index 2",
93 "matchCount": "1",
94 "teardown": [
95 "$TC actions flush action ife"
96 ]
97 },
98 {
99 "id": "ccba",
100 "name": "Create valid ife encode action with mark and reclassify control",
101 "category": [
102 "actions",
103 "ife"
104 ],
105 "setup": [
106 [
107 "$TC actions flush action ife",
108 0,
109 1,
110 255
111 ]
112 ],
113 "cmdUnderTest": "$TC actions add action ife encode use mark 656768 reclassify index 2",
114 "expExitCode": "0",
115 "verifyCmd": "$TC actions get action ife index 2",
116 "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 656768.*index 2",
117 "matchCount": "1",
118 "teardown": [
119 "$TC actions flush action ife"
120 ]
121 },
122 {
123 "id": "a1cf",
124 "name": "Create valid ife encode action with mark and jump control",
125 "category": [
126 "actions",
127 "ife"
128 ],
129 "setup": [
130 [
131 "$TC actions flush action ife",
132 0,
133 1,
134 255
135 ]
136 ],
137 "cmdUnderTest": "$TC actions add action ife encode use mark 65 jump 1 index 2",
138 "expExitCode": "0",
139 "verifyCmd": "$TC actions get action ife index 2",
140 "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0xED3E.*use mark 65.*index 2",
141 "matchCount": "1",
142 "teardown": [
143 "$TC actions flush action ife"
144 ]
145 },
146 {
147 "id": "cb3d",
148 "name": "Create valid ife encode action with mark value at 32-bit maximum",
149 "category": [
150 "actions",
151 "ife"
152 ],
153 "setup": [
154 [
155 "$TC actions flush action ife",
156 0,
157 1,
158 255
159 ]
160 ],
161 "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295 reclassify index 90",
162 "expExitCode": "0",
163 "verifyCmd": "$TC actions get action ife index 90",
164 "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 4294967295.*index 90",
165 "matchCount": "1",
166 "teardown": [
167 "$TC actions flush action ife"
168 ]
169 },
170 {
171 "id": "1efb",
172 "name": "Create ife encode action with mark value exceeding 32-bit maximum",
173 "category": [
174 "actions",
175 "ife"
176 ],
177 "setup": [
178 [
179 "$TC actions flush action ife",
180 0,
181 1,
182 255
183 ]
184 ],
185 "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295999 pipe index 90",
186 "expExitCode": "255",
187 "verifyCmd": "$TC actions get action ife index 90",
188 "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark 4294967295999.*index 90",
189 "matchCount": "0",
190 "teardown": []
191 },
192 {
193 "id": "95ed",
194 "name": "Create valid ife encode action with prio and pass control",
195 "category": [
196 "actions",
197 "ife"
198 ],
199 "setup": [
200 [
201 "$TC actions flush action ife",
202 0,
203 1,
204 255
205 ]
206 ],
207 "cmdUnderTest": "$TC actions add action ife encode allow prio pass index 9",
208 "expExitCode": "0",
209 "verifyCmd": "$TC actions get action ife index 9",
210 "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow prio.*index 9",
211 "matchCount": "1",
212 "teardown": [
213 "$TC actions flush action ife"
214 ]
215 },
216 {
217 "id": "aa17",
218 "name": "Create valid ife encode action with prio and pipe control",
219 "category": [
220 "actions",
221 "ife"
222 ],
223 "setup": [
224 [
225 "$TC actions flush action ife",
226 0,
227 1,
228 255
229 ]
230 ],
231 "cmdUnderTest": "$TC actions add action ife encode use prio 7 pipe index 9",
232 "expExitCode": "0",
233 "verifyCmd": "$TC actions get action ife index 9",
234 "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 7.*index 9",
235 "matchCount": "1",
236 "teardown": [
237 "$TC actions flush action ife"
238 ]
239 },
240 {
241 "id": "74c7",
242 "name": "Create valid ife encode action with prio and continue control",
243 "category": [
244 "actions",
245 "ife"
246 ],
247 "setup": [
248 [
249 "$TC actions flush action ife",
250 0,
251 1,
252 255
253 ]
254 ],
255 "cmdUnderTest": "$TC actions add action ife encode use prio 3 continue index 9",
256 "expExitCode": "0",
257 "verifyCmd": "$TC actions get action ife index 9",
258 "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use prio 3.*index 9",
259 "matchCount": "1",
260 "teardown": [
261 "$TC actions flush action ife"
262 ]
263 },
264 {
265 "id": "7a97",
266 "name": "Create valid ife encode action with prio and drop control",
267 "category": [
268 "actions",
269 "ife"
270 ],
271 "setup": [
272 [
273 "$TC actions flush action ife",
274 0,
275 1,
276 255
277 ]
278 ],
279 "cmdUnderTest": "$TC actions add action ife encode allow prio drop index 9",
280 "expExitCode": "0",
281 "verifyCmd": "$TC actions get action ife index 9",
282 "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow prio.*index 9",
283 "matchCount": "1",
284 "teardown": [
285 "$TC actions flush action ife"
286 ]
287 },
288 {
289 "id": "f66b",
290 "name": "Create valid ife encode action with prio and reclassify control",
291 "category": [
292 "actions",
293 "ife"
294 ],
295 "setup": [
296 [
297 "$TC actions flush action ife",
298 0,
299 1,
300 255
301 ]
302 ],
303 "cmdUnderTest": "$TC actions add action ife encode use prio 998877 reclassify index 9",
304 "expExitCode": "0",
305 "verifyCmd": "$TC actions get action ife index 9",
306 "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 998877.*index 9",
307 "matchCount": "1",
308 "teardown": [
309 "$TC actions flush action ife"
310 ]
311 },
312 {
313 "id": "3056",
314 "name": "Create valid ife encode action with prio and jump control",
315 "category": [
316 "actions",
317 "ife"
318 ],
319 "setup": [
320 [
321 "$TC actions flush action ife",
322 0,
323 1,
324 255
325 ]
326 ],
327 "cmdUnderTest": "$TC actions add action ife encode use prio 998877 jump 10 index 9",
328 "expExitCode": "0",
329 "verifyCmd": "$TC actions get action ife index 9",
330 "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0xED3E.*use prio 998877.*index 9",
331 "matchCount": "1",
332 "teardown": [
333 "$TC actions flush action ife"
334 ]
335 },
336 {
337 "id": "7dd3",
338 "name": "Create valid ife encode action with prio value at 32-bit maximum",
339 "category": [
340 "actions",
341 "ife"
342 ],
343 "setup": [
344 [
345 "$TC actions flush action ife",
346 0,
347 1,
348 255
349 ]
350 ],
351 "cmdUnderTest": "$TC actions add action ife encode use prio 4294967295 reclassify index 99",
352 "expExitCode": "0",
353 "verifyCmd": "$TC actions get action ife index 99",
354 "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 4294967295.*index 99",
355 "matchCount": "1",
356 "teardown": [
357 "$TC actions flush action ife"
358 ]
359 },
360 {
361 "id": "2ca1",
362 "name": "Create ife encode action with prio value exceeding 32-bit maximum",
363 "category": [
364 "actions",
365 "ife"
366 ],
367 "setup": [
368 [
369 "$TC actions flush action ife",
370 0,
371 1,
372 255
373 ]
374 ],
375 "cmdUnderTest": "$TC actions add action ife encode use prio 4294967298 pipe index 99",
376 "expExitCode": "255",
377 "verifyCmd": "$TC actions get action ife index 99",
378 "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 4294967298.*index 99",
379 "matchCount": "0",
380 "teardown": []
381 },
382 {
383 "id": "05bb",
384 "name": "Create valid ife encode action with tcindex and pass control",
385 "category": [
386 "actions",
387 "ife"
388 ],
389 "setup": [
390 [
391 "$TC actions flush action ife",
392 0,
393 1,
394 255
395 ]
396 ],
397 "cmdUnderTest": "$TC actions add action ife encode allow tcindex pass index 1",
19 "expExitCode": "0", 398 "expExitCode": "0",
20 "verifyCmd": "$TC actions get action ife index 1", 399 "verifyCmd": "$TC actions get action ife index 1",
21 "matchPattern": "type 0xDEAD", 400 "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex.*index 1",
22 "matchCount": "1", 401 "matchCount": "1",
23 "teardown": [ 402 "teardown": [
24 "$TC actions flush action ife" 403 "$TC actions flush action ife"
25 ] 404 ]
26 }, 405 },
27 { 406 {
28 "id": "b983", 407 "id": "ce65",
29 "name": "Add action without ife type", 408 "name": "Create valid ife encode action with tcindex and pipe control",
30 "category": [ 409 "category": [
31 "actions", 410 "actions",
32 "ife" 411 "ife"
@@ -37,16 +416,649 @@
37 0, 416 0,
38 1, 417 1,
39 255 418 255
40 ], 419 ]
41 "$TC actions add action ife encode index 1"
42 ], 420 ],
43 "cmdUnderTest": "$TC actions get action ife index 1", 421 "cmdUnderTest": "$TC actions add action ife encode use tcindex 111 pipe index 1",
44 "expExitCode": "0", 422 "expExitCode": "0",
45 "verifyCmd": "$TC actions get action ife index 1", 423 "verifyCmd": "$TC actions get action ife index 1",
46 "matchPattern": "type 0xED3E", 424 "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 111.*index 1",
47 "matchCount": "1", 425 "matchCount": "1",
48 "teardown": [ 426 "teardown": [
49 "$TC actions flush action ife" 427 "$TC actions flush action ife"
50 ] 428 ]
429 },
430 {
431 "id": "09cd",
432 "name": "Create valid ife encode action with tcindex and continue control",
433 "category": [
434 "actions",
435 "ife"
436 ],
437 "setup": [
438 [
439 "$TC actions flush action ife",
440 0,
441 1,
442 255
443 ]
444 ],
445 "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1",
446 "expExitCode": "0",
447 "verifyCmd": "$TC actions get action ife index 1",
448 "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1",
449 "matchCount": "1",
450 "teardown": [
451 "$TC actions flush action ife"
452 ]
453 },
454 {
455 "id": "8eb5",
456 "name": "Create valid ife encode action with tcindex and continue control",
457 "category": [
458 "actions",
459 "ife"
460 ],
461 "setup": [
462 [
463 "$TC actions flush action ife",
464 0,
465 1,
466 255
467 ]
468 ],
469 "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1",
470 "expExitCode": "0",
471 "verifyCmd": "$TC actions get action ife index 1",
472 "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1",
473 "matchCount": "1",
474 "teardown": [
475 "$TC actions flush action ife"
476 ]
477 },
478 {
479 "id": "451a",
480 "name": "Create valid ife encode action with tcindex and drop control",
481 "category": [
482 "actions",
483 "ife"
484 ],
485 "setup": [
486 [
487 "$TC actions flush action ife",
488 0,
489 1,
490 255
491 ]
492 ],
493 "cmdUnderTest": "$TC actions add action ife encode allow tcindex drop index 77",
494 "expExitCode": "0",
495 "verifyCmd": "$TC actions get action ife index 77",
496 "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow tcindex.*index 77",
497 "matchCount": "1",
498 "teardown": [
499 "$TC actions flush action ife"
500 ]
501 },
502 {
503 "id": "d76c",
504 "name": "Create valid ife encode action with tcindex and reclassify control",
505 "category": [
506 "actions",
507 "ife"
508 ],
509 "setup": [
510 [
511 "$TC actions flush action ife",
512 0,
513 1,
514 255
515 ]
516 ],
517 "cmdUnderTest": "$TC actions add action ife encode allow tcindex reclassify index 77",
518 "expExitCode": "0",
519 "verifyCmd": "$TC actions get action ife index 77",
520 "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*allow tcindex.*index 77",
521 "matchCount": "1",
522 "teardown": [
523 "$TC actions flush action ife"
524 ]
525 },
526 {
527 "id": "e731",
528 "name": "Create valid ife encode action with tcindex and jump control",
529 "category": [
530 "actions",
531 "ife"
532 ],
533 "setup": [
534 [
535 "$TC actions flush action ife",
536 0,
537 1,
538 255
539 ]
540 ],
541 "cmdUnderTest": "$TC actions add action ife encode allow tcindex jump 999 index 77",
542 "expExitCode": "0",
543 "verifyCmd": "$TC actions get action ife index 77",
544 "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0xED3E.*allow tcindex.*index 77",
545 "matchCount": "1",
546 "teardown": [
547 "$TC actions flush action ife"
548 ]
549 },
550 {
551 "id": "b7b8",
552 "name": "Create valid ife encode action with tcindex value at 16-bit maximum",
553 "category": [
554 "actions",
555 "ife"
556 ],
557 "setup": [
558 [
559 "$TC actions flush action ife",
560 0,
561 1,
562 255
563 ]
564 ],
565 "cmdUnderTest": "$TC actions add action ife encode use tcindex 65535 pass index 1",
566 "expExitCode": "0",
567 "verifyCmd": "$TC actions get action ife index 1",
568 "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*use tcindex 65535.*index 1",
569 "matchCount": "1",
570 "teardown": [
571 "$TC actions flush action ife"
572 ]
573 },
574 {
575 "id": "d0d8",
576 "name": "Create ife encode action with tcindex value exceeding 16-bit maximum",
577 "category": [
578 "actions",
579 "ife"
580 ],
581 "setup": [
582 [
583 "$TC actions flush action ife",
584 0,
585 1,
586 255
587 ]
588 ],
589 "cmdUnderTest": "$TC actions add action ife encode use tcindex 65539 pipe index 1",
590 "expExitCode": "255",
591 "verifyCmd": "$TC actions get action ife index 1",
592 "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 65539.*index 1",
593 "matchCount": "0",
594 "teardown": []
595 },
596 {
597 "id": "2a9c",
598 "name": "Create valid ife encode action with mac src parameter",
599 "category": [
600 "actions",
601 "ife"
602 ],
603 "setup": [
604 [
605 "$TC actions flush action ife",
606 0,
607 1,
608 255
609 ]
610 ],
611 "cmdUnderTest": "$TC actions add action ife encode allow mark src 00:11:22:33:44:55 pipe index 1",
612 "expExitCode": "0",
613 "verifyCmd": "$TC actions get action ife index 1",
614 "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow mark src 00:11:22:33:44:55.*index 1",
615 "matchCount": "1",
616 "teardown": [
617 "$TC actions flush action ife"
618 ]
619 },
620 {
621 "id": "cf5c",
622 "name": "Create valid ife encode action with mac dst parameter",
623 "category": [
624 "actions",
625 "ife"
626 ],
627 "setup": [
628 [
629 "$TC actions flush action ife",
630 0,
631 1,
632 255
633 ]
634 ],
635 "cmdUnderTest": "$TC actions add action ife encode use prio 9876 dst 00:11:22:33:44:55 reclassify index 1",
636 "expExitCode": "0",
637 "verifyCmd": "$TC actions get action ife index 1",
638 "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1",
639 "matchCount": "1",
640 "teardown": [
641 "$TC actions flush action ife"
642 ]
643 },
644 {
645 "id": "2353",
646 "name": "Create valid ife encode action with mac src and mac dst parameters",
647 "category": [
648 "actions",
649 "ife"
650 ],
651 "setup": [
652 [
653 "$TC actions flush action ife",
654 0,
655 1,
656 255
657 ]
658 ],
659 "cmdUnderTest": "$TC actions add action ife encode allow tcindex src 00:aa:bb:cc:dd:ee dst 00:11:22:33:44:55 pass index 11",
660 "expExitCode": "0",
661 "verifyCmd": "$TC actions get action ife index 11",
662 "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11",
663 "matchCount": "1",
664 "teardown": [
665 "$TC actions flush action ife"
666 ]
667 },
668 {
669 "id": "552c",
670 "name": "Create valid ife encode action with mark and type parameters",
671 "category": [
672 "actions",
673 "ife"
674 ],
675 "setup": [
676 [
677 "$TC actions flush action ife",
678 0,
679 1,
680 255
681 ]
682 ],
683 "cmdUnderTest": "$TC actions add action ife encode use mark 7 type 0xfefe pass index 1",
684 "expExitCode": "0",
685 "verifyCmd": "$TC actions get action ife index 1",
686 "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xFEFE.*use mark 7.*index 1",
687 "matchCount": "1",
688 "teardown": [
689 "$TC actions flush action ife"
690 ]
691 },
692 {
693 "id": "0421",
694 "name": "Create valid ife encode action with prio and type parameters",
695 "category": [
696 "actions",
697 "ife"
698 ],
699 "setup": [
700 [
701 "$TC actions flush action ife",
702 0,
703 1,
704 255
705 ]
706 ],
707 "cmdUnderTest": "$TC actions add action ife encode use prio 444 type 0xabba pipe index 21",
708 "expExitCode": "0",
709 "verifyCmd": "$TC actions get action ife index 21",
710 "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xABBA.*use prio 444.*index 21",
711 "matchCount": "1",
712 "teardown": [
713 "$TC actions flush action ife"
714 ]
715 },
716 {
717 "id": "4017",
718 "name": "Create valid ife encode action with tcindex and type parameters",
719 "category": [
720 "actions",
721 "ife"
722 ],
723 "setup": [
724 [
725 "$TC actions flush action ife",
726 0,
727 1,
728 255
729 ]
730 ],
731 "cmdUnderTest": "$TC actions add action ife encode use tcindex 5000 type 0xabcd reclassify index 21",
732 "expExitCode": "0",
733 "verifyCmd": "$TC actions get action ife index 21",
734 "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xABCD.*use tcindex 5000.*index 21",
735 "matchCount": "1",
736 "teardown": [
737 "$TC actions flush action ife"
738 ]
739 },
740 {
741 "id": "fac3",
742 "name": "Create valid ife encode action with index at 32-bit maximnum",
743 "category": [
744 "actions",
745 "ife"
746 ],
747 "setup": [
748 [
749 "$TC actions flush action ife",
750 0,
751 1,
752 255
753 ]
754 ],
755 "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295",
756 "expExitCode": "0",
757 "verifyCmd": "$TC actions get action ife index 4294967295",
758 "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295",
759 "matchCount": "1",
760 "teardown": [
761 "$TC actions flush action ife"
762 ]
763 },
764 {
765 "id": "7c25",
766 "name": "Create valid ife decode action with pass control",
767 "category": [
768 "actions",
769 "ife"
770 ],
771 "setup": [
772 [
773 "$TC actions flush action ife",
774 0,
775 1,
776 255
777 ]
778 ],
779 "cmdUnderTest": "$TC actions add action ife decode pass index 1",
780 "expExitCode": "0",
781 "verifyCmd": "$TC actions get action ife index 1",
782 "matchPattern": "action order [0-9]*: ife decode action pass.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
783 "matchCount": "1",
784 "teardown": [
785 "$TC actions flush action ife"
786 ]
787 },
788 {
789 "id": "dccb",
790 "name": "Create valid ife decode action with pipe control",
791 "category": [
792 "actions",
793 "ife"
794 ],
795 "setup": [
796 [
797 "$TC actions flush action ife",
798 0,
799 1,
800 255
801 ]
802 ],
803 "cmdUnderTest": "$TC actions add action ife decode pipe index 1",
804 "expExitCode": "0",
805 "verifyCmd": "$TC actions get action ife index 1",
806 "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
807 "matchCount": "1",
808 "teardown": [
809 "$TC actions flush action ife"
810 ]
811 },
812 {
813 "id": "7bb9",
814 "name": "Create valid ife decode action with continue control",
815 "category": [
816 "actions",
817 "ife"
818 ],
819 "setup": [
820 [
821 "$TC actions flush action ife",
822 0,
823 1,
824 255
825 ]
826 ],
827 "cmdUnderTest": "$TC actions add action ife decode continue index 1",
828 "expExitCode": "0",
829 "verifyCmd": "$TC actions get action ife index 1",
830 "matchPattern": "action order [0-9]*: ife decode action continue.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
831 "matchCount": "1",
832 "teardown": [
833 "$TC actions flush action ife"
834 ]
835 },
836 {
837 "id": "d9ad",
838 "name": "Create valid ife decode action with drop control",
839 "category": [
840 "actions",
841 "ife"
842 ],
843 "setup": [
844 [
845 "$TC actions flush action ife",
846 0,
847 1,
848 255
849 ]
850 ],
851 "cmdUnderTest": "$TC actions add action ife decode drop index 1",
852 "expExitCode": "0",
853 "verifyCmd": "$TC actions get action ife index 1",
854 "matchPattern": "action order [0-9]*: ife decode action drop.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
855 "matchCount": "1",
856 "teardown": [
857 "$TC actions flush action ife"
858 ]
859 },
860 {
861 "id": "219f",
862 "name": "Create valid ife decode action with reclassify control",
863 "category": [
864 "actions",
865 "ife"
866 ],
867 "setup": [
868 [
869 "$TC actions flush action ife",
870 0,
871 1,
872 255
873 ]
874 ],
875 "cmdUnderTest": "$TC actions add action ife decode reclassify index 1",
876 "expExitCode": "0",
877 "verifyCmd": "$TC actions get action ife index 1",
878 "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
879 "matchCount": "1",
880 "teardown": [
881 "$TC actions flush action ife"
882 ]
883 },
884 {
885 "id": "8f44",
886 "name": "Create valid ife decode action with jump control",
887 "category": [
888 "actions",
889 "ife"
890 ],
891 "setup": [
892 [
893 "$TC actions flush action ife",
894 0,
895 1,
896 255
897 ]
898 ],
899 "cmdUnderTest": "$TC actions add action ife decode jump 10 index 1",
900 "expExitCode": "0",
901 "verifyCmd": "$TC actions get action ife index 1",
902 "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0x0.*allow mark allow tcindex allow prio.*index 1",
903 "matchCount": "1",
904 "teardown": [
905 "$TC actions flush action ife"
906 ]
907 },
908 {
909 "id": "56cf",
910 "name": "Create ife encode action with index exceeding 32-bit maximum",
911 "category": [
912 "actions",
913 "ife"
914 ],
915 "setup": [
916 [
917 "$TC actions flush action ife",
918 0,
919 1,
920 255
921 ]
922 ],
923 "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295999",
924 "expExitCode": "255",
925 "verifyCmd": "$TC actions get action ife index 4294967295999",
926 "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295999",
927 "matchCount": "0",
928 "teardown": []
929 },
930 {
931 "id": "ee94",
932 "name": "Create ife encode action with invalid control",
933 "category": [
934 "actions",
935 "ife"
936 ],
937 "setup": [
938 [
939 "$TC actions flush action ife",
940 0,
941 1,
942 255
943 ]
944 ],
945 "cmdUnderTest": "$TC actions add action ife encode allow mark kuka index 4",
946 "expExitCode": "255",
947 "verifyCmd": "$TC actions get action ife index 4",
948 "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0xED3E.*allow mark.*index 4",
949 "matchCount": "0",
950 "teardown": []
951 },
952 {
953 "id": "b330",
954 "name": "Create ife encode action with cookie",
955 "category": [
956 "actions",
957 "ife"
958 ],
959 "setup": [
960 [
961 "$TC actions flush action ife",
962 0,
963 1,
964 255
965 ]
966 ],
967 "cmdUnderTest": "$TC actions add action ife encode allow prio pipe index 4 cookie aabbccddeeff112233445566778800a1",
968 "expExitCode": "0",
969 "verifyCmd": "$TC actions get action ife index 4",
970 "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1",
971 "matchCount": "1",
972 "teardown": [
973 "$TC actions flush action ife"
974 ]
975 },
976 {
977 "id": "bbc0",
978 "name": "Create ife encode action with invalid argument",
979 "category": [
980 "actions",
981 "ife"
982 ],
983 "setup": [
984 [
985 "$TC actions flush action ife",
986 0,
987 1,
988 255
989 ]
990 ],
991 "cmdUnderTest": "$TC actions add action ife encode allow foo pipe index 4",
992 "expExitCode": "255",
993 "verifyCmd": "$TC actions get action ife index 4",
994 "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow foo.*index 4",
995 "matchCount": "0",
996 "teardown": []
997 },
998 {
999 "id": "d54a",
1000 "name": "Create ife encode action with invalid type argument",
1001 "category": [
1002 "actions",
1003 "ife"
1004 ],
1005 "setup": [
1006 [
1007 "$TC actions flush action ife",
1008 0,
1009 1,
1010 255
1011 ]
1012 ],
1013 "cmdUnderTest": "$TC actions add action ife encode allow prio type 70000 pipe index 4",
1014 "expExitCode": "255",
1015 "verifyCmd": "$TC actions get action ife index 4",
1016 "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0x11170.*allow prio.*index 4",
1017 "matchCount": "0",
1018 "teardown": []
1019 },
1020 {
1021 "id": "7ee0",
1022 "name": "Create ife encode action with invalid mac src argument",
1023 "category": [
1024 "actions",
1025 "ife"
1026 ],
1027 "setup": [
1028 [
1029 "$TC actions flush action ife",
1030 0,
1031 1,
1032 255
1033 ]
1034 ],
1035 "cmdUnderTest": "$TC actions add action ife encode allow prio src 00:11:22:33:44:pp pipe index 4",
1036 "expExitCode": "255",
1037 "verifyCmd": "$TC actions get action ife index 4",
1038 "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4",
1039 "matchCount": "0",
1040 "teardown": []
1041 },
1042 {
1043 "id": "0a7d",
1044 "name": "Create ife encode action with invalid mac dst argument",
1045 "category": [
1046 "actions",
1047 "ife"
1048 ],
1049 "setup": [
1050 [
1051 "$TC actions flush action ife",
1052 0,
1053 1,
1054 255
1055 ]
1056 ],
1057 "cmdUnderTest": "$TC actions add action ife encode allow prio dst 00.111-22:33:44:aa pipe index 4",
1058 "expExitCode": "255",
1059 "verifyCmd": "$TC actions get action ife index 4",
1060 "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4",
1061 "matchCount": "0",
1062 "teardown": []
51 } 1063 }
52] 1064]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index 443c9b3c8664..6e4edfae1799 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -340,7 +340,7 @@
340 }, 340 },
341 { 341 {
342 "id": "8b69", 342 "id": "8b69",
343 "name": "Add mirred mirror action with maximum index", 343 "name": "Add mirred mirror action with index at 32-bit maximum",
344 "category": [ 344 "category": [
345 "actions", 345 "actions",
346 "mirred" 346 "mirred"
@@ -363,6 +363,28 @@
363 ] 363 ]
364 }, 364 },
365 { 365 {
366 "id": "3f66",
367 "name": "Add mirred mirror action with index exceeding 32-bit maximum",
368 "category": [
369 "actions",
370 "mirred"
371 ],
372 "setup": [
373 [
374 "$TC actions flush action mirred",
375 0,
376 1,
377 255
378 ]
379 ],
380 "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 429496729555",
381 "expExitCode": "255",
382 "verifyCmd": "$TC actions get action mirred index 429496729555",
383 "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 429496729555",
384 "matchCount": "0",
385 "teardown": []
386 },
387 {
366 "id": "a70e", 388 "id": "a70e",
367 "name": "Delete mirred mirror action", 389 "name": "Delete mirred mirror action",
368 "category": [ 390 "category": [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index 38d85a1d7492..f03763d81617 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -401,11 +401,11 @@
401 ], 401 ],
402 "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 4294967295", 402 "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 4294967295",
403 "expExitCode": "0", 403 "expExitCode": "0",
404 "verifyCmd": "$TC actions get action mirred index 4294967295", 404 "verifyCmd": "$TC actions get action police index 4294967295",
405 "matchPattern": "action order [0-9]*: police 0xffffffff rate 10Mbit burst 10Kb mtu 2Kb", 405 "matchPattern": "action order [0-9]*: police 0xffffffff rate 10Mbit burst 10Kb mtu 2Kb",
406 "matchCount": "1", 406 "matchCount": "1",
407 "teardown": [ 407 "teardown": [
408 "$TC actions flush action mirred" 408 "$TC actions flush action police"
409 ] 409 ]
410 }, 410 },
411 { 411 {
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
new file mode 100644
index 000000000000..3aca33c00039
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
@@ -0,0 +1,588 @@
1[
2 {
3 "id": "9784",
4 "name": "Add valid sample action with mandatory arguments",
5 "category": [
6 "actions",
7 "sample"
8 ],
9 "setup": [
10 [
11 "$TC actions flush action sample",
12 0,
13 1,
14 255
15 ]
16 ],
17 "cmdUnderTest": "$TC actions add action sample rate 10 group 1 index 2",
18 "expExitCode": "0",
19 "verifyCmd": "$TC actions get action sample index 2",
20 "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 2 ref",
21 "matchCount": "1",
22 "teardown": [
23 "$TC actions flush action sample"
24 ]
25 },
26 {
27 "id": "5c91",
28 "name": "Add valid sample action with mandatory arguments and continue control action",
29 "category": [
30 "actions",
31 "sample"
32 ],
33 "setup": [
34 [
35 "$TC actions flush action sample",
36 0,
37 1,
38 255
39 ]
40 ],
41 "cmdUnderTest": "$TC actions add action sample rate 700 group 2 continue index 2",
42 "expExitCode": "0",
43 "verifyCmd": "$TC actions get action sample index 2",
44 "matchPattern": "action order [0-9]+: sample rate 1/700 group 2 continue.*index 2 ref",
45 "matchCount": "1",
46 "teardown": [
47 "$TC actions flush action sample"
48 ]
49 },
50 {
51 "id": "334b",
52 "name": "Add valid sample action with mandatory arguments and drop control action",
53 "category": [
54 "actions",
55 "sample"
56 ],
57 "setup": [
58 [
59 "$TC actions flush action sample",
60 0,
61 1,
62 255
63 ]
64 ],
65 "cmdUnderTest": "$TC actions add action sample rate 10000 group 11 drop index 22",
66 "expExitCode": "0",
67 "verifyCmd": "$TC actions list action sample",
68 "matchPattern": "action order [0-9]+: sample rate 1/10000 group 11 drop.*index 22 ref",
69 "matchCount": "1",
70 "teardown": [
71 "$TC actions flush action sample"
72 ]
73 },
74 {
75 "id": "da69",
76 "name": "Add valid sample action with mandatory arguments and reclassify control action",
77 "category": [
78 "actions",
79 "sample"
80 ],
81 "setup": [
82 [
83 "$TC actions flush action sample",
84 0,
85 1,
86 255
87 ]
88 ],
89 "cmdUnderTest": "$TC actions add action sample rate 20000 group 72 reclassify index 100",
90 "expExitCode": "0",
91 "verifyCmd": "$TC actions list action sample",
92 "matchPattern": "action order [0-9]+: sample rate 1/20000 group 72 reclassify.*index 100 ref",
93 "matchCount": "1",
94 "teardown": [
95 "$TC actions flush action sample"
96 ]
97 },
98 {
99 "id": "13ce",
100 "name": "Add valid sample action with mandatory arguments and pipe control action",
101 "category": [
102 "actions",
103 "sample"
104 ],
105 "setup": [
106 [
107 "$TC actions flush action sample",
108 0,
109 1,
110 255
111 ]
112 ],
113 "cmdUnderTest": "$TC actions add action sample rate 20 group 2 pipe index 100",
114 "expExitCode": "0",
115 "verifyCmd": "$TC actions list action sample",
116 "matchPattern": "action order [0-9]+: sample rate 1/20 group 2 pipe.*index 100 ref",
117 "matchCount": "1",
118 "teardown": [
119 "$TC actions flush action sample"
120 ]
121 },
122 {
123 "id": "1886",
124 "name": "Add valid sample action with mandatory arguments and jump control action",
125 "category": [
126 "actions",
127 "sample"
128 ],
129 "setup": [
130 [
131 "$TC actions flush action sample",
132 0,
133 1,
134 255
135 ]
136 ],
137 "cmdUnderTest": "$TC actions add action sample rate 700 group 25 jump 4 index 200",
138 "expExitCode": "0",
139 "verifyCmd": "$TC actions get action sample index 200",
140 "matchPattern": "action order [0-9]+: sample rate 1/700 group 25 jump 4.*index 200 ref",
141 "matchCount": "1",
142 "teardown": [
143 "$TC actions flush action sample"
144 ]
145 },
146 {
147 "id": "b6d4",
148 "name": "Add sample action with mandatory arguments and invalid control action",
149 "category": [
150 "actions",
151 "sample"
152 ],
153 "setup": [
154 [
155 "$TC actions flush action sample",
156 0,
157 1,
158 255
159 ]
160 ],
161 "cmdUnderTest": "$TC actions add action sample rate 200000 group 52 foo index 1",
162 "expExitCode": "255",
163 "verifyCmd": "$TC actions list action sample",
164 "matchPattern": "action order [0-9]+: sample rate 1/200000 group 52 foo.*index 1 ref",
165 "matchCount": "0",
166 "teardown": []
167 },
168 {
169 "id": "a874",
170 "name": "Add invalid sample action without mandatory arguments",
171 "category": [
172 "actions",
173 "sample"
174 ],
175 "setup": [
176 [
177 "$TC actions flush action sample",
178 0,
179 1,
180 255
181 ]
182 ],
183 "cmdUnderTest": "$TC actions add action sample index 1",
184 "expExitCode": "255",
185 "verifyCmd": "$TC actions list action sample",
186 "matchPattern": "action order [0-9]+: sample.*index 1 ref",
187 "matchCount": "0",
188 "teardown": []
189 },
190 {
191 "id": "ac01",
192 "name": "Add invalid sample action without mandatory argument rate",
193 "category": [
194 "actions",
195 "sample"
196 ],
197 "setup": [
198 [
199 "$TC actions flush action sample",
200 0,
201 1,
202 255
203 ]
204 ],
205 "cmdUnderTest": "$TC actions add action sample group 10 index 1",
206 "expExitCode": "255",
207 "verifyCmd": "$TC actions list action sample",
208 "matchPattern": "action order [0-9]+: sample.*group 10.*index 1 ref",
209 "matchCount": "0",
210 "teardown": []
211 },
212 {
213 "id": "4203",
214 "name": "Add invalid sample action without mandatory argument group",
215 "category": [
216 "actions",
217 "sample"
218 ],
219 "setup": [
220 [
221 "$TC actions flush action sample",
222 0,
223 1,
224 255
225 ]
226 ],
227 "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
228 "expExitCode": "255",
229 "verifyCmd": "$TC actions get action sample index 10",
230 "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
231 "matchCount": "0",
232 "teardown": []
233 },
234 {
235 "id": "14a7",
236 "name": "Add invalid sample action without mandatory argument group",
237 "category": [
238 "actions",
239 "sample"
240 ],
241 "setup": [
242 [
243 "$TC actions flush action sample",
244 0,
245 1,
246 255
247 ]
248 ],
249 "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
250 "expExitCode": "255",
251 "verifyCmd": "$TC actions get action sample index 10",
252 "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
253 "matchCount": "0",
254 "teardown": []
255 },
256 {
257 "id": "8f2e",
258 "name": "Add valid sample action with trunc argument",
259 "category": [
260 "actions",
261 "sample"
262 ],
263 "setup": [
264 [
265 "$TC actions flush action sample",
266 0,
267 1,
268 255
269 ]
270 ],
271 "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 1024 index 10",
272 "expExitCode": "0",
273 "verifyCmd": "$TC actions get action sample index 10",
274 "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 1024 pipe.*index 10 ref",
275 "matchCount": "1",
276 "teardown": [
277 "$TC actions flush action sample"
278 ]
279 },
280 {
281 "id": "45f8",
282 "name": "Add sample action with maximum rate argument",
283 "category": [
284 "actions",
285 "sample"
286 ],
287 "setup": [
288 [
289 "$TC actions flush action sample",
290 0,
291 1,
292 255
293 ]
294 ],
295 "cmdUnderTest": "$TC actions add action sample rate 4294967295 group 4 index 10",
296 "expExitCode": "0",
297 "verifyCmd": "$TC actions get action sample index 10",
298 "matchPattern": "action order [0-9]+: sample rate 1/4294967295 group 4 pipe.*index 10 ref",
299 "matchCount": "1",
300 "teardown": [
301 "$TC actions flush action sample"
302 ]
303 },
304 {
305 "id": "ad0c",
306 "name": "Add sample action with maximum trunc argument",
307 "category": [
308 "actions",
309 "sample"
310 ],
311 "setup": [
312 [
313 "$TC actions flush action sample",
314 0,
315 1,
316 255
317 ]
318 ],
319 "cmdUnderTest": "$TC actions add action sample rate 16000 group 4 trunc 4294967295 index 10",
320 "expExitCode": "0",
321 "verifyCmd": "$TC actions get action sample index 10",
322 "matchPattern": "action order [0-9]+: sample rate 1/16000 group 4 trunc_size 4294967295 pipe.*index 10 ref",
323 "matchCount": "1",
324 "teardown": [
325 "$TC actions flush action sample"
326 ]
327 },
328 {
329 "id": "83a9",
330 "name": "Add sample action with maximum group argument",
331 "category": [
332 "actions",
333 "sample"
334 ],
335 "setup": [
336 [
337 "$TC actions flush action sample",
338 0,
339 1,
340 255
341 ]
342 ],
343 "cmdUnderTest": "$TC actions add action sample rate 4294 group 4294967295 index 1",
344 "expExitCode": "0",
345 "verifyCmd": "$TC actions get action sample index 1",
346 "matchPattern": "action order [0-9]+: sample rate 1/4294 group 4294967295 pipe.*index 1 ref",
347 "matchCount": "1",
348 "teardown": [
349 "$TC actions flush action sample"
350 ]
351 },
352 {
353 "id": "ed27",
354 "name": "Add sample action with invalid rate argument",
355 "category": [
356 "actions",
357 "sample"
358 ],
359 "setup": [
360 [
361 "$TC actions flush action sample",
362 0,
363 1,
364 255
365 ]
366 ],
367 "cmdUnderTest": "$TC actions add action sample rate 4294967296 group 4 index 10",
368 "expExitCode": "255",
369 "verifyCmd": "$TC actions get action sample index 10",
370 "matchPattern": "action order [0-9]+: sample rate 1/4294967296 group 4 pipe.*index 10 ref",
371 "matchCount": "0",
372 "teardown": []
373 },
374 {
375 "id": "2eae",
376 "name": "Add sample action with invalid group argument",
377 "category": [
378 "actions",
379 "sample"
380 ],
381 "setup": [
382 [
383 "$TC actions flush action sample",
384 0,
385 1,
386 255
387 ]
388 ],
389 "cmdUnderTest": "$TC actions add action sample rate 4098 group 5294967299 continue index 1",
390 "expExitCode": "255",
391 "verifyCmd": "$TC actions get action sample index 1",
392 "matchPattern": "action order [0-9]+: sample rate 1/4098 group 5294967299 continue.*index 1 ref",
393 "matchCount": "0",
394 "teardown": []
395 },
396 {
397 "id": "6ff3",
398 "name": "Add sample action with invalid trunc size",
399 "category": [
400 "actions",
401 "sample"
402 ],
403 "setup": [
404 [
405 "$TC actions flush action sample",
406 0,
407 1,
408 255
409 ]
410 ],
411 "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 112233445566 index 11",
412 "expExitCode": "255",
413 "verifyCmd": "$TC actions get action sample index 11",
414 "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 112233445566.*index 11 ref",
415 "matchCount": "0",
416 "teardown": []
417 },
418 {
419 "id": "2b2a",
420 "name": "Add sample action with invalid index",
421 "category": [
422 "actions",
423 "sample"
424 ],
425 "setup": [
426 [
427 "$TC actions flush action sample",
428 0,
429 1,
430 255
431 ]
432 ],
433 "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 5294967299",
434 "expExitCode": "255",
435 "verifyCmd": "$TC actions get action sample index 5294967299",
436 "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 5294967299 ref",
437 "matchCount": "0",
438 "teardown": []
439 },
440 {
441 "id": "dee2",
442 "name": "Add sample action with maximum allowed index",
443 "category": [
444 "actions",
445 "sample"
446 ],
447 "setup": [
448 [
449 "$TC actions flush action sample",
450 0,
451 1,
452 255
453 ]
454 ],
455 "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 4294967295",
456 "expExitCode": "0",
457 "verifyCmd": "$TC actions get action sample index 4294967295",
458 "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 4294967295 ref",
459 "matchCount": "1",
460 "teardown": [
461 "$TC actions flush action sample"
462 ]
463 },
464 {
465 "id": "560e",
466 "name": "Add sample action with cookie",
467 "category": [
468 "actions",
469 "sample"
470 ],
471 "setup": [
472 [
473 "$TC actions flush action sample",
474 0,
475 1,
476 255
477 ]
478 ],
479 "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 45 cookie aabbccdd",
480 "expExitCode": "0",
481 "verifyCmd": "$TC actions get action sample index 45",
482 "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 45.*cookie aabbccdd",
483 "matchCount": "1",
484 "teardown": [
485 "$TC actions flush action sample"
486 ]
487 },
488 {
489 "id": "704a",
490 "name": "Replace existing sample action with new rate argument",
491 "category": [
492 "actions",
493 "sample"
494 ],
495 "setup": [
496 [
497 "$TC actions flush action sample",
498 0,
499 1,
500 255
501 ],
502 "$TC actions add action sample rate 1024 group 4 index 4"
503 ],
504 "cmdUnderTest": "$TC actions replace action sample rate 2048 group 4 index 4",
505 "expExitCode": "0",
506 "verifyCmd": "$TC actions list action sample",
507 "matchPattern": "action order [0-9]+: sample rate 1/2048 group 4 pipe.*index 4",
508 "matchCount": "1",
509 "teardown": [
510 "$TC actions flush action sample"
511 ]
512 },
513 {
514 "id": "60eb",
515 "name": "Replace existing sample action with new group argument",
516 "category": [
517 "actions",
518 "sample"
519 ],
520 "setup": [
521 [
522 "$TC actions flush action sample",
523 0,
524 1,
525 255
526 ],
527 "$TC actions add action sample rate 1024 group 4 index 4"
528 ],
529 "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 index 4",
530 "expExitCode": "0",
531 "verifyCmd": "$TC actions list action sample",
532 "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
533 "matchCount": "1",
534 "teardown": [
535 "$TC actions flush action sample"
536 ]
537 },
538 {
539 "id": "2cce",
540 "name": "Replace existing sample action with new trunc argument",
541 "category": [
542 "actions",
543 "sample"
544 ],
545 "setup": [
546 [
547 "$TC actions flush action sample",
548 0,
549 1,
550 255
551 ],
552 "$TC actions add action sample rate 1024 group 4 trunc 48 index 4"
553 ],
554 "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 trunc 64 index 4",
555 "expExitCode": "0",
556 "verifyCmd": "$TC actions list action sample",
557 "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 trunc_size 64 pipe.*index 4",
558 "matchCount": "1",
559 "teardown": [
560 "$TC actions flush action sample"
561 ]
562 },
563 {
564 "id": "59d1",
565 "name": "Replace existing sample action with new control argument",
566 "category": [
567 "actions",
568 "sample"
569 ],
570 "setup": [
571 [
572 "$TC actions flush action sample",
573 0,
574 1,
575 255
576 ],
577 "$TC actions add action sample rate 1024 group 4 reclassify index 4"
578 ],
579 "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 pipe index 4",
580 "expExitCode": "0",
581 "verifyCmd": "$TC actions list action sample",
582 "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
583 "matchCount": "1",
584 "teardown": [
585 "$TC actions flush action sample"
586 ]
587 }
588]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
index 4510ddfa6e54..69ea09eefffc 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -1,7 +1,7 @@
1[ 1[
2 { 2 {
3 "id": "6f5a", 3 "id": "6f5a",
4 "name": "Add vlan pop action", 4 "name": "Add vlan pop action with pipe opcode",
5 "category": [ 5 "category": [
6 "actions", 6 "actions",
7 "vlan" 7 "vlan"
@@ -14,18 +14,18 @@
14 255 14 255
15 ] 15 ]
16 ], 16 ],
17 "cmdUnderTest": "$TC actions add action vlan pop index 8", 17 "cmdUnderTest": "$TC actions add action vlan pop pipe index 8",
18 "expExitCode": "0", 18 "expExitCode": "0",
19 "verifyCmd": "$TC actions list action vlan", 19 "verifyCmd": "$TC actions list action vlan",
20 "matchPattern": "action order [0-9]+: vlan.*pop.*index 8 ref", 20 "matchPattern": "action order [0-9]+: vlan.*pop.*pipe.*index 8 ref",
21 "matchCount": "1", 21 "matchCount": "1",
22 "teardown": [ 22 "teardown": [
23 "$TC actions flush action vlan" 23 "$TC actions flush action vlan"
24 ] 24 ]
25 }, 25 },
26 { 26 {
27 "id": "ee6f", 27 "id": "df35",
28 "name": "Add vlan pop action with large index", 28 "name": "Add vlan pop action with pass opcode",
29 "category": [ 29 "category": [
30 "actions", 30 "actions",
31 "vlan" 31 "vlan"
@@ -38,10 +38,82 @@
38 255 38 255
39 ] 39 ]
40 ], 40 ],
41 "cmdUnderTest": "$TC actions add action vlan pop index 4294967295", 41 "cmdUnderTest": "$TC actions add action vlan pop pass index 8",
42 "expExitCode": "0", 42 "expExitCode": "0",
43 "verifyCmd": "$TC actions list action vlan", 43 "verifyCmd": "$TC actions get action vlan index 8",
44 "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref", 44 "matchPattern": "action order [0-9]+: vlan.*pop.*pass.*index 8 ref",
45 "matchCount": "1",
46 "teardown": [
47 "$TC actions flush action vlan"
48 ]
49 },
50 {
51 "id": "b0d4",
52 "name": "Add vlan pop action with drop opcode",
53 "category": [
54 "actions",
55 "vlan"
56 ],
57 "setup": [
58 [
59 "$TC actions flush action vlan",
60 0,
61 1,
62 255
63 ]
64 ],
65 "cmdUnderTest": "$TC actions add action vlan pop drop index 8",
66 "expExitCode": "0",
67 "verifyCmd": "$TC actions get action vlan index 8",
68 "matchPattern": "action order [0-9]+: vlan.*pop.*drop.*index 8 ref",
69 "matchCount": "1",
70 "teardown": [
71 "$TC actions flush action vlan"
72 ]
73 },
74 {
75 "id": "95ee",
76 "name": "Add vlan pop action with reclassify opcode",
77 "category": [
78 "actions",
79 "vlan"
80 ],
81 "setup": [
82 [
83 "$TC actions flush action vlan",
84 0,
85 1,
86 255
87 ]
88 ],
89 "cmdUnderTest": "$TC actions add action vlan pop reclassify index 8",
90 "expExitCode": "0",
91 "verifyCmd": "$TC actions get action vlan index 8",
92 "matchPattern": "action order [0-9]+: vlan.*pop.*reclassify.*index 8 ref",
93 "matchCount": "1",
94 "teardown": [
95 "$TC actions flush action vlan"
96 ]
97 },
98 {
99 "id": "0283",
100 "name": "Add vlan pop action with continue opcode",
101 "category": [
102 "actions",
103 "vlan"
104 ],
105 "setup": [
106 [
107 "$TC actions flush action vlan",
108 0,
109 1,
110 255
111 ]
112 ],
113 "cmdUnderTest": "$TC actions add action vlan pop continue index 8",
114 "expExitCode": "0",
115 "verifyCmd": "$TC actions get action vlan index 8",
116 "matchPattern": "action order [0-9]+: vlan.*pop.*continue.*index 8 ref",
45 "matchCount": "1", 117 "matchCount": "1",
46 "teardown": [ 118 "teardown": [
47 "$TC actions flush action vlan" 119 "$TC actions flush action vlan"
@@ -96,6 +168,74 @@
96 ] 168 ]
97 }, 169 },
98 { 170 {
171 "id": "a178",
172 "name": "Add vlan pop action with invalid opcode",
173 "category": [
174 "actions",
175 "vlan"
176 ],
177 "setup": [
178 [
179 "$TC actions flush action vlan",
180 0,
181 1,
182 255
183 ]
184 ],
185 "cmdUnderTest": "$TC actions add action vlan pop foo index 8",
186 "expExitCode": "255",
187 "verifyCmd": "$TC actions list action vlan",
188 "matchPattern": "action order [0-9]+: vlan.*pop.*foo.*index 8 ref",
189 "matchCount": "0",
190 "teardown": []
191 },
192 {
193 "id": "ee6f",
194 "name": "Add vlan pop action with index at 32-bit maximum",
195 "category": [
196 "actions",
197 "vlan"
198 ],
199 "setup": [
200 [
201 "$TC actions flush action vlan",
202 0,
203 1,
204 255
205 ]
206 ],
207 "cmdUnderTest": "$TC actions add action vlan pop index 4294967295",
208 "expExitCode": "0",
209 "verifyCmd": "$TC actions list action vlan",
210 "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref",
211 "matchCount": "1",
212 "teardown": [
213 "$TC actions flush action vlan"
214 ]
215 },
216 {
217 "id": "0dfa",
218 "name": "Add vlan pop action with index exceeding 32-bit maximum",
219 "category": [
220 "actions",
221 "vlan"
222 ],
223 "setup": [
224 [
225 "$TC actions flush action vlan",
226 0,
227 1,
228 255
229 ]
230 ],
231 "cmdUnderTest": "$TC actions add action vlan pop reclassify index 429496729599",
232 "expExitCode": "255",
233 "verifyCmd": "$TC actions get action vlan index 429496729599",
234 "matchPattern": "action order [0-9]+: vlan.*pop.reclassify.*index 429496729599",
235 "matchCount": "0",
236 "teardown": []
237 },
238 {
99 "id": "2b91", 239 "id": "2b91",
100 "name": "Add vlan invalid action", 240 "name": "Add vlan invalid action",
101 "category": [ 241 "category": [
@@ -115,13 +255,11 @@
115 "verifyCmd": "$TC actions list action vlan", 255 "verifyCmd": "$TC actions list action vlan",
116 "matchPattern": "action order [0-9]+: vlan.*bad_mode", 256 "matchPattern": "action order [0-9]+: vlan.*bad_mode",
117 "matchCount": "0", 257 "matchCount": "0",
118 "teardown": [ 258 "teardown": []
119 "$TC actions flush action vlan"
120 ]
121 }, 259 },
122 { 260 {
123 "id": "57fc", 261 "id": "57fc",
124 "name": "Add vlan action with invalid protocol type", 262 "name": "Add vlan push action with invalid protocol type",
125 "category": [ 263 "category": [
126 "actions", 264 "actions",
127 "vlan" 265 "vlan"
@@ -139,9 +277,7 @@
139 "verifyCmd": "$TC actions list action vlan", 277 "verifyCmd": "$TC actions list action vlan",
140 "matchPattern": "action order [0-9]+: vlan.*push", 278 "matchPattern": "action order [0-9]+: vlan.*push",
141 "matchCount": "0", 279 "matchCount": "0",
142 "teardown": [ 280 "teardown": []
143 "$TC actions flush action vlan"
144 ]
145 }, 281 },
146 { 282 {
147 "id": "3989", 283 "id": "3989",
@@ -216,6 +352,30 @@
216 ] 352 ]
217 }, 353 },
218 { 354 {
355 "id": "1f4b",
356 "name": "Add vlan push action with maximum 12-bit vlan ID",
357 "category": [
358 "actions",
359 "vlan"
360 ],
361 "setup": [
362 [
363 "$TC actions flush action vlan",
364 0,
365 1,
366 255
367 ]
368 ],
369 "cmdUnderTest": "$TC actions add action vlan push id 4094 index 1",
370 "expExitCode": "0",
371 "verifyCmd": "$TC actions get action vlan index 1",
372 "matchPattern": "action order [0-9]+: vlan.*push id 4094.*protocol 802.1Q.*priority 0.*index 1 ref",
373 "matchCount": "1",
374 "teardown": [
375 "$TC actions flush action vlan"
376 ]
377 },
378 {
219 "id": "1f7b", 379 "id": "1f7b",
220 "name": "Add vlan push action with invalid vlan ID", 380 "name": "Add vlan push action with invalid vlan ID",
221 "category": [ 381 "category": [
@@ -240,6 +400,30 @@
240 ] 400 ]
241 }, 401 },
242 { 402 {
403 "id": "fe40",
404 "name": "Add vlan push action with maximum 3-bit IEEE 802.1p priority",
405 "category": [
406 "actions",
407 "vlan"
408 ],
409 "setup": [
410 [
411 "$TC actions flush action vlan",
412 0,
413 1,
414 255
415 ]
416 ],
417 "cmdUnderTest": "$TC actions add action vlan push id 4 priority 7 reclassify index 1",
418 "expExitCode": "0",
419 "verifyCmd": "$TC actions get action vlan index 1",
420 "matchPattern": "action order [0-9]+: vlan.*push id 4.*protocol 802.1Q.*priority 7.*reclassify.*index 1 ref",
421 "matchCount": "1",
422 "teardown": [
423 "$TC actions flush action vlan"
424 ]
425 },
426 {
243 "id": "5d02", 427 "id": "5d02",
244 "name": "Add vlan push action with invalid IEEE 802.1p priority", 428 "name": "Add vlan push action with invalid IEEE 802.1p priority",
245 "category": [ 429 "category": [
@@ -259,9 +443,7 @@
259 "verifyCmd": "$TC actions list action vlan", 443 "verifyCmd": "$TC actions list action vlan",
260 "matchPattern": "action order [0-9]+: vlan.*push id 5.*index 1 ref", 444 "matchPattern": "action order [0-9]+: vlan.*push id 5.*index 1 ref",
261 "matchCount": "0", 445 "matchCount": "0",
262 "teardown": [ 446 "teardown": []
263 "$TC actions flush action vlan"
264 ]
265 }, 447 },
266 { 448 {
267 "id": "6812", 449 "id": "6812",
@@ -312,6 +494,106 @@
312 ] 494 ]
313 }, 495 },
314 { 496 {
497 "id": "3deb",
498 "name": "Replace existing vlan push action with new ID",
499 "category": [
500 "actions",
501 "vlan"
502 ],
503 "setup": [
504 [
505 "$TC actions flush action vlan",
506 0,
507 1,
508 255
509 ],
510 "$TC actions add action vlan push id 500 pipe index 12"
511 ],
512 "cmdUnderTest": "$TC actions replace action vlan push id 700 pipe index 12",
513 "expExitCode": "0",
514 "verifyCmd": "$TC actions get action vlan index 12",
515 "matchPattern": "action order [0-9]+: vlan.*push id 700 protocol 802.1Q priority 0 pipe.*index 12 ref",
516 "matchCount": "1",
517 "teardown": [
518 "$TC actions flush action vlan"
519 ]
520 },
521 {
522 "id": "9e76",
523 "name": "Replace existing vlan push action with new protocol",
524 "category": [
525 "actions",
526 "vlan"
527 ],
528 "setup": [
529 [
530 "$TC actions flush action vlan",
531 0,
532 1,
533 255
534 ],
535 "$TC actions add action vlan push id 1 protocol 802.1Q pipe index 1"
536 ],
537 "cmdUnderTest": "$TC actions replace action vlan push id 1 protocol 802.1ad pipe index 1",
538 "expExitCode": "0",
539 "verifyCmd": "$TC actions get action vlan index 1",
540 "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1ad priority 0 pipe.*index 1 ref",
541 "matchCount": "1",
542 "teardown": [
543 "$TC actions flush action vlan"
544 ]
545 },
546 {
547 "id": "ede4",
548 "name": "Replace existing vlan push action with new priority",
549 "category": [
550 "actions",
551 "vlan"
552 ],
553 "setup": [
554 [
555 "$TC actions flush action vlan",
556 0,
557 1,
558 255
559 ],
560 "$TC actions add action vlan push id 1 protocol 802.1Q priority 3 reclassify index 1"
561 ],
562 "cmdUnderTest": "$TC actions replace action vlan push id 1 priority 4 reclassify index 1",
563 "expExitCode": "0",
564 "verifyCmd": "$TC actions get action vlan index 1",
565 "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1Q priority 4 reclassify.*index 1 ref",
566 "matchCount": "1",
567 "teardown": [
568 "$TC actions flush action vlan"
569 ]
570 },
571 {
572 "id": "d413",
573 "name": "Replace existing vlan pop action with new cookie",
574 "category": [
575 "actions",
576 "vlan"
577 ],
578 "setup": [
579 [
580 "$TC actions flush action vlan",
581 0,
582 1,
583 255
584 ],
585 "$TC actions add action vlan pop continue index 1 cookie 22334455"
586 ],
587 "cmdUnderTest": "$TC actions replace action vlan pop continue index 1 cookie a1b1c2d1",
588 "expExitCode": "0",
589 "verifyCmd": "$TC actions get action vlan index 1",
590 "matchPattern": "action order [0-9]+: vlan.*pop continue.*index 1 ref.*cookie a1b1c2d1",
591 "matchCount": "1",
592 "teardown": [
593 "$TC actions flush action vlan"
594 ]
595 },
596 {
315 "id": "83a4", 597 "id": "83a4",
316 "name": "Delete vlan pop action", 598 "name": "Delete vlan pop action",
317 "category": [ 599 "category": [
@@ -385,7 +667,7 @@
385 }, 667 },
386 { 668 {
387 "id": "1d78", 669 "id": "1d78",
388 "name": "Add vlan action with cookie", 670 "name": "Add vlan push action with cookie",
389 "category": [ 671 "category": [
390 "actions", 672 "actions",
391 "vlan" 673 "vlan"
diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore
index 2c8ac8416299..32a9eadb2d4e 100644
--- a/tools/testing/selftests/timers/.gitignore
+++ b/tools/testing/selftests/timers/.gitignore
@@ -9,7 +9,7 @@ nanosleep
9nsleep-lat 9nsleep-lat
10posix_timers 10posix_timers
11raw_skew 11raw_skew
12rtctest 12rtcpie
13set-2038 13set-2038
14set-tai 14set-tai
15set-timer-lat 15set-timer-lat
@@ -19,4 +19,3 @@ valid-adjtimex
19adjtick 19adjtick
20set-tz 20set-tz
21freq-step 21freq-step
22rtctest_setdate
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 3496680981f2..c02683cfb6c9 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -5,13 +5,13 @@ LDFLAGS += -lrt -lpthread -lm
5# these are all "safe" tests that don't modify 5# these are all "safe" tests that don't modify
6# system time or require escalated privileges 6# system time or require escalated privileges
7TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ 7TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
8 inconsistency-check raw_skew threadtest rtctest 8 inconsistency-check raw_skew threadtest rtcpie
9 9
10DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \ 10DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
11 skew_consistency clocksource-switch freq-step leap-a-day \ 11 skew_consistency clocksource-switch freq-step leap-a-day \
12 leapcrash set-tai set-2038 set-tz 12 leapcrash set-tai set-2038 set-tz
13 13
14TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) rtctest_setdate 14TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS)
15 15
16 16
17include ../lib.mk 17include ../lib.mk
diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c
new file mode 100644
index 000000000000..47b5bad1b393
--- /dev/null
+++ b/tools/testing/selftests/timers/rtcpie.c
@@ -0,0 +1,134 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Real Time Clock Periodic Interrupt test program
4 *
5 * Since commit 6610e0893b8bc ("RTC: Rework RTC code to use timerqueue for
6 * events"), PIE are completely handled using hrtimers, without actually using
7 * any underlying hardware RTC.
8 *
9 */
10
11#include <stdio.h>
12#include <linux/rtc.h>
13#include <sys/ioctl.h>
14#include <sys/time.h>
15#include <sys/types.h>
16#include <fcntl.h>
17#include <unistd.h>
18#include <stdlib.h>
19#include <errno.h>
20
21/*
22 * This expects the new RTC class driver framework, working with
23 * clocks that will often not be clones of what the PC-AT had.
24 * Use the command line to specify another RTC if you need one.
25 */
26static const char default_rtc[] = "/dev/rtc0";
27
28int main(int argc, char **argv)
29{
30 int i, fd, retval, irqcount = 0;
31 unsigned long tmp, data, old_pie_rate;
32 const char *rtc = default_rtc;
33 struct timeval start, end, diff;
34
35 switch (argc) {
36 case 2:
37 rtc = argv[1];
38 /* FALLTHROUGH */
39 case 1:
40 break;
41 default:
42 fprintf(stderr, "usage: rtctest [rtcdev] [d]\n");
43 return 1;
44 }
45
46 fd = open(rtc, O_RDONLY);
47
48 if (fd == -1) {
49 perror(rtc);
50 exit(errno);
51 }
52
53 /* Read periodic IRQ rate */
54 retval = ioctl(fd, RTC_IRQP_READ, &old_pie_rate);
55 if (retval == -1) {
56 /* not all RTCs support periodic IRQs */
57 if (errno == EINVAL) {
58 fprintf(stderr, "\nNo periodic IRQ support\n");
59 goto done;
60 }
61 perror("RTC_IRQP_READ ioctl");
62 exit(errno);
63 }
64 fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", old_pie_rate);
65
66 fprintf(stderr, "Counting 20 interrupts at:");
67 fflush(stderr);
68
69 /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
70 for (tmp=2; tmp<=64; tmp*=2) {
71
72 retval = ioctl(fd, RTC_IRQP_SET, tmp);
73 if (retval == -1) {
74 /* not all RTCs can change their periodic IRQ rate */
75 if (errno == EINVAL) {
76 fprintf(stderr,
77 "\n...Periodic IRQ rate is fixed\n");
78 goto done;
79 }
80 perror("RTC_IRQP_SET ioctl");
81 exit(errno);
82 }
83
84 fprintf(stderr, "\n%ldHz:\t", tmp);
85 fflush(stderr);
86
87 /* Enable periodic interrupts */
88 retval = ioctl(fd, RTC_PIE_ON, 0);
89 if (retval == -1) {
90 perror("RTC_PIE_ON ioctl");
91 exit(errno);
92 }
93
94 for (i=1; i<21; i++) {
95 gettimeofday(&start, NULL);
96 /* This blocks */
97 retval = read(fd, &data, sizeof(unsigned long));
98 if (retval == -1) {
99 perror("read");
100 exit(errno);
101 }
102 gettimeofday(&end, NULL);
103 timersub(&end, &start, &diff);
104 if (diff.tv_sec > 0 ||
105 diff.tv_usec > ((1000000L / tmp) * 1.10)) {
106 fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
107 diff.tv_sec, diff.tv_usec,
108 (1000000L / tmp));
109 fflush(stdout);
110 exit(-1);
111 }
112
113 fprintf(stderr, " %d",i);
114 fflush(stderr);
115 irqcount++;
116 }
117
118 /* Disable periodic interrupts */
119 retval = ioctl(fd, RTC_PIE_OFF, 0);
120 if (retval == -1) {
121 perror("RTC_PIE_OFF ioctl");
122 exit(errno);
123 }
124 }
125
126done:
127 ioctl(fd, RTC_IRQP_SET, old_pie_rate);
128
129 fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
130
131 close(fd);
132
133 return 0;
134}
diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c
deleted file mode 100644
index 411eff625e66..000000000000
--- a/tools/testing/selftests/timers/rtctest.c
+++ /dev/null
@@ -1,403 +0,0 @@
1/*
2 * Real Time Clock Driver Test/Example Program
3 *
4 * Compile with:
5 * gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest
6 *
7 * Copyright (C) 1996, Paul Gortmaker.
8 *
9 * Released under the GNU General Public License, version 2,
10 * included herein by reference.
11 *
12 */
13
14#include <stdio.h>
15#include <linux/rtc.h>
16#include <sys/ioctl.h>
17#include <sys/time.h>
18#include <sys/types.h>
19#include <fcntl.h>
20#include <unistd.h>
21#include <stdlib.h>
22#include <errno.h>
23
24#ifndef ARRAY_SIZE
25# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
26#endif
27
28/*
29 * This expects the new RTC class driver framework, working with
30 * clocks that will often not be clones of what the PC-AT had.
31 * Use the command line to specify another RTC if you need one.
32 */
33static const char default_rtc[] = "/dev/rtc0";
34
35static struct rtc_time cutoff_dates[] = {
36 {
37 .tm_year = 70, /* 1970 -1900 */
38 .tm_mday = 1,
39 },
40 /* signed time_t 19/01/2038 3:14:08 */
41 {
42 .tm_year = 138,
43 .tm_mday = 19,
44 },
45 {
46 .tm_year = 138,
47 .tm_mday = 20,
48 },
49 {
50 .tm_year = 199, /* 2099 -1900 */
51 .tm_mday = 1,
52 },
53 {
54 .tm_year = 200, /* 2100 -1900 */
55 .tm_mday = 1,
56 },
57 /* unsigned time_t 07/02/2106 7:28:15*/
58 {
59 .tm_year = 205,
60 .tm_mon = 1,
61 .tm_mday = 7,
62 },
63 {
64 .tm_year = 206,
65 .tm_mon = 1,
66 .tm_mday = 8,
67 },
68 /* signed time on 64bit in nanoseconds 12/04/2262 01:47:16*/
69 {
70 .tm_year = 362,
71 .tm_mon = 3,
72 .tm_mday = 12,
73 },
74 {
75 .tm_year = 362, /* 2262 -1900 */
76 .tm_mon = 3,
77 .tm_mday = 13,
78 },
79};
80
81static int compare_dates(struct rtc_time *a, struct rtc_time *b)
82{
83 if (a->tm_year != b->tm_year ||
84 a->tm_mon != b->tm_mon ||
85 a->tm_mday != b->tm_mday ||
86 a->tm_hour != b->tm_hour ||
87 a->tm_min != b->tm_min ||
88 ((b->tm_sec - a->tm_sec) > 1))
89 return 1;
90
91 return 0;
92}
93
94int main(int argc, char **argv)
95{
96 int i, fd, retval, irqcount = 0, dangerous = 0;
97 unsigned long tmp, data;
98 struct rtc_time rtc_tm;
99 const char *rtc = default_rtc;
100 struct timeval start, end, diff;
101
102 switch (argc) {
103 case 3:
104 if (*argv[2] == 'd')
105 dangerous = 1;
106 case 2:
107 rtc = argv[1];
108 /* FALLTHROUGH */
109 case 1:
110 break;
111 default:
112 fprintf(stderr, "usage: rtctest [rtcdev] [d]\n");
113 return 1;
114 }
115
116 fd = open(rtc, O_RDONLY);
117
118 if (fd == -1) {
119 perror(rtc);
120 exit(errno);
121 }
122
123 fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n");
124
125 /* Turn on update interrupts (one per second) */
126 retval = ioctl(fd, RTC_UIE_ON, 0);
127 if (retval == -1) {
128 if (errno == EINVAL) {
129 fprintf(stderr,
130 "\n...Update IRQs not supported.\n");
131 goto test_READ;
132 }
133 perror("RTC_UIE_ON ioctl");
134 exit(errno);
135 }
136
137 fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:",
138 rtc);
139 fflush(stderr);
140 for (i=1; i<6; i++) {
141 /* This read will block */
142 retval = read(fd, &data, sizeof(unsigned long));
143 if (retval == -1) {
144 perror("read");
145 exit(errno);
146 }
147 fprintf(stderr, " %d",i);
148 fflush(stderr);
149 irqcount++;
150 }
151
152 fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:");
153 fflush(stderr);
154 for (i=1; i<6; i++) {
155 struct timeval tv = {5, 0}; /* 5 second timeout on select */
156 fd_set readfds;
157
158 FD_ZERO(&readfds);
159 FD_SET(fd, &readfds);
160 /* The select will wait until an RTC interrupt happens. */
161 retval = select(fd+1, &readfds, NULL, NULL, &tv);
162 if (retval == -1) {
163 perror("select");
164 exit(errno);
165 }
166 /* This read won't block unlike the select-less case above. */
167 retval = read(fd, &data, sizeof(unsigned long));
168 if (retval == -1) {
169 perror("read");
170 exit(errno);
171 }
172 fprintf(stderr, " %d",i);
173 fflush(stderr);
174 irqcount++;
175 }
176
177 /* Turn off update interrupts */
178 retval = ioctl(fd, RTC_UIE_OFF, 0);
179 if (retval == -1) {
180 perror("RTC_UIE_OFF ioctl");
181 exit(errno);
182 }
183
184test_READ:
185 /* Read the RTC time/date */
186 retval = ioctl(fd, RTC_RD_TIME, &rtc_tm);
187 if (retval == -1) {
188 perror("RTC_RD_TIME ioctl");
189 exit(errno);
190 }
191
192 fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
193 rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
194 rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
195
196 /* Set the alarm to 5 sec in the future, and check for rollover */
197 rtc_tm.tm_sec += 5;
198 if (rtc_tm.tm_sec >= 60) {
199 rtc_tm.tm_sec %= 60;
200 rtc_tm.tm_min++;
201 }
202 if (rtc_tm.tm_min == 60) {
203 rtc_tm.tm_min = 0;
204 rtc_tm.tm_hour++;
205 }
206 if (rtc_tm.tm_hour == 24)
207 rtc_tm.tm_hour = 0;
208
209 retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
210 if (retval == -1) {
211 if (errno == EINVAL) {
212 fprintf(stderr,
213 "\n...Alarm IRQs not supported.\n");
214 goto test_PIE;
215 }
216
217 perror("RTC_ALM_SET ioctl");
218 exit(errno);
219 }
220
221 /* Read the current alarm settings */
222 retval = ioctl(fd, RTC_ALM_READ, &rtc_tm);
223 if (retval == -1) {
224 if (errno == EINVAL) {
225 fprintf(stderr,
226 "\n...EINVAL reading current alarm setting.\n");
227 goto test_PIE;
228 }
229 perror("RTC_ALM_READ ioctl");
230 exit(errno);
231 }
232
233 fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n",
234 rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
235
236 /* Enable alarm interrupts */
237 retval = ioctl(fd, RTC_AIE_ON, 0);
238 if (retval == -1) {
239 if (errno == EINVAL || errno == EIO) {
240 fprintf(stderr,
241 "\n...Alarm IRQs not supported.\n");
242 goto test_PIE;
243 }
244
245 perror("RTC_AIE_ON ioctl");
246 exit(errno);
247 }
248
249 fprintf(stderr, "Waiting 5 seconds for alarm...");
250 fflush(stderr);
251 /* This blocks until the alarm ring causes an interrupt */
252 retval = read(fd, &data, sizeof(unsigned long));
253 if (retval == -1) {
254 perror("read");
255 exit(errno);
256 }
257 irqcount++;
258 fprintf(stderr, " okay. Alarm rang.\n");
259
260 /* Disable alarm interrupts */
261 retval = ioctl(fd, RTC_AIE_OFF, 0);
262 if (retval == -1) {
263 perror("RTC_AIE_OFF ioctl");
264 exit(errno);
265 }
266
267test_PIE:
268 /* Read periodic IRQ rate */
269 retval = ioctl(fd, RTC_IRQP_READ, &tmp);
270 if (retval == -1) {
271 /* not all RTCs support periodic IRQs */
272 if (errno == EINVAL) {
273 fprintf(stderr, "\nNo periodic IRQ support\n");
274 goto test_DATE;
275 }
276 perror("RTC_IRQP_READ ioctl");
277 exit(errno);
278 }
279 fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp);
280
281 fprintf(stderr, "Counting 20 interrupts at:");
282 fflush(stderr);
283
284 /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
285 for (tmp=2; tmp<=64; tmp*=2) {
286
287 retval = ioctl(fd, RTC_IRQP_SET, tmp);
288 if (retval == -1) {
289 /* not all RTCs can change their periodic IRQ rate */
290 if (errno == EINVAL) {
291 fprintf(stderr,
292 "\n...Periodic IRQ rate is fixed\n");
293 goto test_DATE;
294 }
295 perror("RTC_IRQP_SET ioctl");
296 exit(errno);
297 }
298
299 fprintf(stderr, "\n%ldHz:\t", tmp);
300 fflush(stderr);
301
302 /* Enable periodic interrupts */
303 retval = ioctl(fd, RTC_PIE_ON, 0);
304 if (retval == -1) {
305 perror("RTC_PIE_ON ioctl");
306 exit(errno);
307 }
308
309 for (i=1; i<21; i++) {
310 gettimeofday(&start, NULL);
311 /* This blocks */
312 retval = read(fd, &data, sizeof(unsigned long));
313 if (retval == -1) {
314 perror("read");
315 exit(errno);
316 }
317 gettimeofday(&end, NULL);
318 timersub(&end, &start, &diff);
319 if (diff.tv_sec > 0 ||
320 diff.tv_usec > ((1000000L / tmp) * 1.10)) {
321 fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
322 diff.tv_sec, diff.tv_usec,
323 (1000000L / tmp));
324 fflush(stdout);
325 exit(-1);
326 }
327
328 fprintf(stderr, " %d",i);
329 fflush(stderr);
330 irqcount++;
331 }
332
333 /* Disable periodic interrupts */
334 retval = ioctl(fd, RTC_PIE_OFF, 0);
335 if (retval == -1) {
336 perror("RTC_PIE_OFF ioctl");
337 exit(errno);
338 }
339 }
340
341test_DATE:
342 if (!dangerous)
343 goto done;
344
345 fprintf(stderr, "\nTesting problematic dates\n");
346
347 for (i = 0; i < ARRAY_SIZE(cutoff_dates); i++) {
348 struct rtc_time current;
349
350 /* Write the new date in RTC */
351 retval = ioctl(fd, RTC_SET_TIME, &cutoff_dates[i]);
352 if (retval == -1) {
353 perror("RTC_SET_TIME ioctl");
354 close(fd);
355 exit(errno);
356 }
357
358 /* Read back */
359 retval = ioctl(fd, RTC_RD_TIME, &current);
360 if (retval == -1) {
361 perror("RTC_RD_TIME ioctl");
362 exit(errno);
363 }
364
365 if(compare_dates(&cutoff_dates[i], &current)) {
366 fprintf(stderr,"Setting date %d failed\n",
367 cutoff_dates[i].tm_year + 1900);
368 goto done;
369 }
370
371 cutoff_dates[i].tm_sec += 5;
372
373 /* Write the new alarm in RTC */
374 retval = ioctl(fd, RTC_ALM_SET, &cutoff_dates[i]);
375 if (retval == -1) {
376 perror("RTC_ALM_SET ioctl");
377 close(fd);
378 exit(errno);
379 }
380
381 /* Read back */
382 retval = ioctl(fd, RTC_ALM_READ, &current);
383 if (retval == -1) {
384 perror("RTC_ALM_READ ioctl");
385 exit(errno);
386 }
387
388 if(compare_dates(&cutoff_dates[i], &current)) {
389 fprintf(stderr,"Setting alarm %d failed\n",
390 cutoff_dates[i].tm_year + 1900);
391 goto done;
392 }
393
394 fprintf(stderr, "Setting year %d is OK \n",
395 cutoff_dates[i].tm_year + 1900);
396 }
397done:
398 fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
399
400 close(fd);
401
402 return 0;
403}
diff --git a/tools/testing/selftests/uevent/Makefile b/tools/testing/selftests/uevent/Makefile
new file mode 100644
index 000000000000..f7baa9aa2932
--- /dev/null
+++ b/tools/testing/selftests/uevent/Makefile
@@ -0,0 +1,17 @@
1# SPDX-License-Identifier: GPL-2.0
2all:
3
4include ../lib.mk
5
6.PHONY: all clean
7
8BINARIES := uevent_filtering
9CFLAGS += -Wl,-no-as-needed -Wall
10
11uevent_filtering: uevent_filtering.c ../kselftest.h ../kselftest_harness.h
12 $(CC) $(CFLAGS) $< -o $@
13
14TEST_PROGS += $(BINARIES)
15EXTRA_CLEAN := $(BINARIES)
16
17all: $(BINARIES)
diff --git a/tools/testing/selftests/uevent/config b/tools/testing/selftests/uevent/config
new file mode 100644
index 000000000000..1038f4515be8
--- /dev/null
+++ b/tools/testing/selftests/uevent/config
@@ -0,0 +1,2 @@
1CONFIG_USER_NS=y
2CONFIG_NET=y
diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c
new file mode 100644
index 000000000000..f83391aa42cf
--- /dev/null
+++ b/tools/testing/selftests/uevent/uevent_filtering.c
@@ -0,0 +1,486 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#define _GNU_SOURCE
4#include <errno.h>
5#include <fcntl.h>
6#include <linux/netlink.h>
7#include <signal.h>
8#include <stdbool.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <sys/prctl.h>
13#include <sys/socket.h>
14#include <sched.h>
15#include <sys/eventfd.h>
16#include <sys/stat.h>
17#include <sys/syscall.h>
18#include <sys/types.h>
19#include <sys/wait.h>
20#include <unistd.h>
21
22#include "../kselftest.h"
23#include "../kselftest_harness.h"
24
25#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent"
26#define __UEVENT_BUFFER_SIZE (2048 * 2)
27#define __UEVENT_HEADER "add@/devices/virtual/mem/full"
28#define __UEVENT_HEADER_LEN sizeof("add@/devices/virtual/mem/full")
29#define __UEVENT_LISTEN_ALL -1
30
31ssize_t read_nointr(int fd, void *buf, size_t count)
32{
33 ssize_t ret;
34
35again:
36 ret = read(fd, buf, count);
37 if (ret < 0 && errno == EINTR)
38 goto again;
39
40 return ret;
41}
42
43ssize_t write_nointr(int fd, const void *buf, size_t count)
44{
45 ssize_t ret;
46
47again:
48 ret = write(fd, buf, count);
49 if (ret < 0 && errno == EINTR)
50 goto again;
51
52 return ret;
53}
54
55int wait_for_pid(pid_t pid)
56{
57 int status, ret;
58
59again:
60 ret = waitpid(pid, &status, 0);
61 if (ret == -1) {
62 if (errno == EINTR)
63 goto again;
64
65 return -1;
66 }
67
68 if (ret != pid)
69 goto again;
70
71 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
72 return -1;
73
74 return 0;
75}
76
77static int uevent_listener(unsigned long post_flags, bool expect_uevent,
78 int sync_fd)
79{
80 int sk_fd, ret;
81 socklen_t sk_addr_len;
82 int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE;
83 uint64_t sync_add = 1;
84 struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 };
85 char buf[__UEVENT_BUFFER_SIZE] = { 0 };
86 struct iovec iov = { buf, __UEVENT_BUFFER_SIZE };
87 char control[CMSG_SPACE(sizeof(struct ucred))];
88 struct msghdr hdr = {
89 &rcv_addr, sizeof(rcv_addr), &iov, 1,
90 control, sizeof(control), 0,
91 };
92
93 sk_fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC,
94 NETLINK_KOBJECT_UEVENT);
95 if (sk_fd < 0) {
96 fprintf(stderr, "%s - Failed to open uevent socket\n", strerror(errno));
97 return -1;
98 }
99
100 ret = setsockopt(sk_fd, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz,
101 sizeof(rcv_buf_sz));
102 if (ret < 0) {
103 fprintf(stderr, "%s - Failed to set socket options\n", strerror(errno));
104 goto on_error;
105 }
106
107 sk_addr.nl_family = AF_NETLINK;
108 sk_addr.nl_groups = __UEVENT_LISTEN_ALL;
109
110 sk_addr_len = sizeof(sk_addr);
111 ret = bind(sk_fd, (struct sockaddr *)&sk_addr, sk_addr_len);
112 if (ret < 0) {
113 fprintf(stderr, "%s - Failed to bind socket\n", strerror(errno));
114 goto on_error;
115 }
116
117 ret = getsockname(sk_fd, (struct sockaddr *)&sk_addr, &sk_addr_len);
118 if (ret < 0) {
119 fprintf(stderr, "%s - Failed to retrieve socket name\n", strerror(errno));
120 goto on_error;
121 }
122
123 if ((size_t)sk_addr_len != sizeof(sk_addr)) {
124 fprintf(stderr, "Invalid socket address size\n");
125 goto on_error;
126 }
127
128 if (post_flags & CLONE_NEWUSER) {
129 ret = unshare(CLONE_NEWUSER);
130 if (ret < 0) {
131 fprintf(stderr,
132 "%s - Failed to unshare user namespace\n",
133 strerror(errno));
134 goto on_error;
135 }
136 }
137
138 if (post_flags & CLONE_NEWNET) {
139 ret = unshare(CLONE_NEWNET);
140 if (ret < 0) {
141 fprintf(stderr,
142 "%s - Failed to unshare network namespace\n",
143 strerror(errno));
144 goto on_error;
145 }
146 }
147
148 ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add));
149 close(sync_fd);
150 if (ret != sizeof(sync_add)) {
151 fprintf(stderr, "Failed to synchronize with parent process\n");
152 goto on_error;
153 }
154
155 fret = 0;
156 for (;;) {
157 ssize_t r;
158
159 r = recvmsg(sk_fd, &hdr, 0);
160 if (r <= 0) {
161 fprintf(stderr, "%s - Failed to receive uevent\n", strerror(errno));
162 ret = -1;
163 break;
164 }
165
166 /* ignore libudev messages */
167 if (memcmp(buf, "libudev", 8) == 0)
168 continue;
169
170 /* ignore uevents we didn't trigger */
171 if (memcmp(buf, __UEVENT_HEADER, __UEVENT_HEADER_LEN) != 0)
172 continue;
173
174 if (!expect_uevent) {
175 fprintf(stderr, "Received unexpected uevent:\n");
176 ret = -1;
177 }
178
179 if (TH_LOG_ENABLED) {
180 /* If logging is enabled dump the received uevent. */
181 (void)write_nointr(STDERR_FILENO, buf, r);
182 (void)write_nointr(STDERR_FILENO, "\n", 1);
183 }
184
185 break;
186 }
187
188on_error:
189 close(sk_fd);
190
191 return fret;
192}
193
194int trigger_uevent(unsigned int times)
195{
196 int fd, ret;
197 unsigned int i;
198
199 fd = open(__DEV_FULL, O_RDWR | O_CLOEXEC);
200 if (fd < 0) {
201 if (errno != ENOENT)
202 return -EINVAL;
203
204 return -1;
205 }
206
207 for (i = 0; i < times; i++) {
208 ret = write_nointr(fd, "add\n", sizeof("add\n") - 1);
209 if (ret < 0) {
210 fprintf(stderr, "Failed to trigger uevent\n");
211 break;
212 }
213 }
214 close(fd);
215
216 return ret;
217}
218
219int set_death_signal(void)
220{
221 int ret;
222 pid_t ppid;
223
224 ret = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
225
226 /* Check whether we have been orphaned. */
227 ppid = getppid();
228 if (ppid == 1) {
229 pid_t self;
230
231 self = getpid();
232 ret = kill(self, SIGKILL);
233 }
234
235 if (ret < 0)
236 return -1;
237
238 return 0;
239}
240
241static int do_test(unsigned long pre_flags, unsigned long post_flags,
242 bool expect_uevent, int sync_fd)
243{
244 int ret;
245 uint64_t wait_val;
246 pid_t pid;
247 sigset_t mask;
248 sigset_t orig_mask;
249 struct timespec timeout;
250
251 sigemptyset(&mask);
252 sigaddset(&mask, SIGCHLD);
253
254 ret = sigprocmask(SIG_BLOCK, &mask, &orig_mask);
255 if (ret < 0) {
256 fprintf(stderr, "%s- Failed to block SIGCHLD\n", strerror(errno));
257 return -1;
258 }
259
260 pid = fork();
261 if (pid < 0) {
262 fprintf(stderr, "%s - Failed to fork() new process\n", strerror(errno));
263 return -1;
264 }
265
266 if (pid == 0) {
267 /* Make sure that we go away when our parent dies. */
268 ret = set_death_signal();
269 if (ret < 0) {
270 fprintf(stderr, "Failed to set PR_SET_PDEATHSIG to SIGKILL\n");
271 _exit(EXIT_FAILURE);
272 }
273
274 if (pre_flags & CLONE_NEWUSER) {
275 ret = unshare(CLONE_NEWUSER);
276 if (ret < 0) {
277 fprintf(stderr,
278 "%s - Failed to unshare user namespace\n",
279 strerror(errno));
280 _exit(EXIT_FAILURE);
281 }
282 }
283
284 if (pre_flags & CLONE_NEWNET) {
285 ret = unshare(CLONE_NEWNET);
286 if (ret < 0) {
287 fprintf(stderr,
288 "%s - Failed to unshare network namespace\n",
289 strerror(errno));
290 _exit(EXIT_FAILURE);
291 }
292 }
293
294 if (uevent_listener(post_flags, expect_uevent, sync_fd) < 0)
295 _exit(EXIT_FAILURE);
296
297 _exit(EXIT_SUCCESS);
298 }
299
300 ret = read_nointr(sync_fd, &wait_val, sizeof(wait_val));
301 if (ret != sizeof(wait_val)) {
302 fprintf(stderr, "Failed to synchronize with child process\n");
303 _exit(EXIT_FAILURE);
304 }
305
306 /* Trigger 10 uevents to account for the case where the kernel might
307 * drop some.
308 */
309 ret = trigger_uevent(10);
310 if (ret < 0)
311 fprintf(stderr, "Failed triggering uevents\n");
312
313 /* Wait for 2 seconds before considering this failed. This should be
314 * plenty of time for the kernel to deliver the uevent even under heavy
315 * load.
316 */
317 timeout.tv_sec = 2;
318 timeout.tv_nsec = 0;
319
320again:
321 ret = sigtimedwait(&mask, NULL, &timeout);
322 if (ret < 0) {
323 if (errno == EINTR)
324 goto again;
325
326 if (!expect_uevent)
327 ret = kill(pid, SIGTERM); /* success */
328 else
329 ret = kill(pid, SIGUSR1); /* error */
330 if (ret < 0)
331 return -1;
332 }
333
334 ret = wait_for_pid(pid);
335 if (ret < 0)
336 return -1;
337
338 return ret;
339}
340
341static void signal_handler(int sig)
342{
343 if (sig == SIGTERM)
344 _exit(EXIT_SUCCESS);
345
346 _exit(EXIT_FAILURE);
347}
348
349TEST(uevent_filtering)
350{
351 int ret, sync_fd;
352 struct sigaction act;
353
354 if (geteuid()) {
355 TH_LOG("Uevent filtering tests require root privileges. Skipping test");
356 _exit(KSFT_SKIP);
357 }
358
359 ret = access(__DEV_FULL, F_OK);
360 EXPECT_EQ(0, ret) {
361 if (errno == ENOENT) {
362 TH_LOG(__DEV_FULL " does not exist. Skipping test");
363 _exit(KSFT_SKIP);
364 }
365
366 _exit(KSFT_FAIL);
367 }
368
369 act.sa_handler = signal_handler;
370 act.sa_flags = 0;
371 sigemptyset(&act.sa_mask);
372
373 ret = sigaction(SIGTERM, &act, NULL);
374 ASSERT_EQ(0, ret);
375
376 sync_fd = eventfd(0, EFD_CLOEXEC);
377 ASSERT_GE(sync_fd, 0);
378
379 /*
380 * Setup:
381 * - Open uevent listening socket in initial network namespace owned by
382 * initial user namespace.
383 * - Trigger uevent in initial network namespace owned by initial user
384 * namespace.
385 * Expected Result:
386 * - uevent listening socket receives uevent
387 */
388 ret = do_test(0, 0, true, sync_fd);
389 ASSERT_EQ(0, ret) {
390 goto do_cleanup;
391 }
392
393 /*
394 * Setup:
395 * - Open uevent listening socket in non-initial network namespace
396 * owned by initial user namespace.
397 * - Trigger uevent in initial network namespace owned by initial user
398 * namespace.
399 * Expected Result:
400 * - uevent listening socket receives uevent
401 */
402 ret = do_test(CLONE_NEWNET, 0, true, sync_fd);
403 ASSERT_EQ(0, ret) {
404 goto do_cleanup;
405 }
406
407 /*
408 * Setup:
409 * - unshare user namespace
410 * - Open uevent listening socket in initial network namespace
411 * owned by initial user namespace.
412 * - Trigger uevent in initial network namespace owned by initial user
413 * namespace.
414 * Expected Result:
415 * - uevent listening socket receives uevent
416 */
417 ret = do_test(CLONE_NEWUSER, 0, true, sync_fd);
418 ASSERT_EQ(0, ret) {
419 goto do_cleanup;
420 }
421
422 /*
423 * Setup:
424 * - Open uevent listening socket in non-initial network namespace
425 * owned by non-initial user namespace.
426 * - Trigger uevent in initial network namespace owned by initial user
427 * namespace.
428 * Expected Result:
429 * - uevent listening socket receives no uevent
430 */
431 ret = do_test(CLONE_NEWUSER | CLONE_NEWNET, 0, false, sync_fd);
432 ASSERT_EQ(0, ret) {
433 goto do_cleanup;
434 }
435
436 /*
437 * Setup:
438 * - Open uevent listening socket in initial network namespace
439 * owned by initial user namespace.
440 * - unshare network namespace
441 * - Trigger uevent in initial network namespace owned by initial user
442 * namespace.
443 * Expected Result:
444 * - uevent listening socket receives uevent
445 */
446 ret = do_test(0, CLONE_NEWNET, true, sync_fd);
447 ASSERT_EQ(0, ret) {
448 goto do_cleanup;
449 }
450
451 /*
452 * Setup:
453 * - Open uevent listening socket in initial network namespace
454 * owned by initial user namespace.
455 * - unshare user namespace
456 * - Trigger uevent in initial network namespace owned by initial user
457 * namespace.
458 * Expected Result:
459 * - uevent listening socket receives uevent
460 */
461 ret = do_test(0, CLONE_NEWUSER, true, sync_fd);
462 ASSERT_EQ(0, ret) {
463 goto do_cleanup;
464 }
465
466 /*
467 * Setup:
468 * - Open uevent listening socket in initial network namespace
469 * owned by initial user namespace.
470 * - unshare user namespace
471 * - unshare network namespace
472 * - Trigger uevent in initial network namespace owned by initial user
473 * namespace.
474 * Expected Result:
475 * - uevent listening socket receives uevent
476 */
477 ret = do_test(0, CLONE_NEWUSER | CLONE_NEWNET, true, sync_fd);
478 ASSERT_EQ(0, ret) {
479 goto do_cleanup;
480 }
481
482do_cleanup:
483 close(sync_fd);
484}
485
486TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh
index d60506fc77f8..f9b31a57439b 100755
--- a/tools/testing/selftests/user/test_user_copy.sh
+++ b/tools/testing/selftests/user/test_user_copy.sh
@@ -2,6 +2,13 @@
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3# Runs copy_to/from_user infrastructure using test_user_copy kernel module 3# Runs copy_to/from_user infrastructure using test_user_copy kernel module
4 4
5# Kselftest framework requirement - SKIP code is 4.
6ksft_skip=4
7
8if ! /sbin/modprobe -q -n test_user_copy; then
9 echo "user: module test_user_copy is not found [SKIP]"
10 exit $ksft_skip
11fi
5if /sbin/modprobe -q test_user_copy; then 12if /sbin/modprobe -q test_user_copy; then
6 /sbin/modprobe -q -r test_user_copy 13 /sbin/modprobe -q -r test_user_copy
7 echo "user_copy: ok" 14 echo "user_copy: ok"
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
index 1097f04e4d80..bcec71250873 100644
--- a/tools/testing/selftests/vm/compaction_test.c
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -16,6 +16,8 @@
16#include <unistd.h> 16#include <unistd.h>
17#include <string.h> 17#include <string.h>
18 18
19#include "../kselftest.h"
20
19#define MAP_SIZE 1048576 21#define MAP_SIZE 1048576
20 22
21struct map_list { 23struct map_list {
@@ -169,7 +171,7 @@ int main(int argc, char **argv)
169 printf("Either the sysctl compact_unevictable_allowed is not\n" 171 printf("Either the sysctl compact_unevictable_allowed is not\n"
170 "set to 1 or couldn't read the proc file.\n" 172 "set to 1 or couldn't read the proc file.\n"
171 "Skipping the test\n"); 173 "Skipping the test\n");
172 return 0; 174 return KSFT_SKIP;
173 } 175 }
174 176
175 lim.rlim_cur = RLIM_INFINITY; 177 lim.rlim_cur = RLIM_INFINITY;
diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c
index 4997b9222cfa..637b6d0ac0d0 100644
--- a/tools/testing/selftests/vm/mlock2-tests.c
+++ b/tools/testing/selftests/vm/mlock2-tests.c
@@ -9,6 +9,8 @@
9#include <stdbool.h> 9#include <stdbool.h>
10#include "mlock2.h" 10#include "mlock2.h"
11 11
12#include "../kselftest.h"
13
12struct vm_boundaries { 14struct vm_boundaries {
13 unsigned long start; 15 unsigned long start;
14 unsigned long end; 16 unsigned long end;
@@ -303,7 +305,7 @@ static int test_mlock_lock()
303 if (mlock2_(map, 2 * page_size, 0)) { 305 if (mlock2_(map, 2 * page_size, 0)) {
304 if (errno == ENOSYS) { 306 if (errno == ENOSYS) {
305 printf("Cannot call new mlock family, skipping test\n"); 307 printf("Cannot call new mlock family, skipping test\n");
306 _exit(0); 308 _exit(KSFT_SKIP);
307 } 309 }
308 perror("mlock2(0)"); 310 perror("mlock2(0)");
309 goto unmap; 311 goto unmap;
@@ -412,7 +414,7 @@ static int test_mlock_onfault()
412 if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { 414 if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
413 if (errno == ENOSYS) { 415 if (errno == ENOSYS) {
414 printf("Cannot call new mlock family, skipping test\n"); 416 printf("Cannot call new mlock family, skipping test\n");
415 _exit(0); 417 _exit(KSFT_SKIP);
416 } 418 }
417 perror("mlock2(MLOCK_ONFAULT)"); 419 perror("mlock2(MLOCK_ONFAULT)");
418 goto unmap; 420 goto unmap;
@@ -425,7 +427,7 @@ static int test_mlock_onfault()
425 if (munlock(map, 2 * page_size)) { 427 if (munlock(map, 2 * page_size)) {
426 if (errno == ENOSYS) { 428 if (errno == ENOSYS) {
427 printf("Cannot call new mlock family, skipping test\n"); 429 printf("Cannot call new mlock family, skipping test\n");
428 _exit(0); 430 _exit(KSFT_SKIP);
429 } 431 }
430 perror("munlock()"); 432 perror("munlock()");
431 goto unmap; 433 goto unmap;
@@ -457,7 +459,7 @@ static int test_lock_onfault_of_present()
457 if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { 459 if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
458 if (errno == ENOSYS) { 460 if (errno == ENOSYS) {
459 printf("Cannot call new mlock family, skipping test\n"); 461 printf("Cannot call new mlock family, skipping test\n");
460 _exit(0); 462 _exit(KSFT_SKIP);
461 } 463 }
462 perror("mlock2(MLOCK_ONFAULT)"); 464 perror("mlock2(MLOCK_ONFAULT)");
463 goto unmap; 465 goto unmap;
@@ -583,7 +585,7 @@ static int test_vma_management(bool call_mlock)
583 if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) { 585 if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) {
584 if (errno == ENOSYS) { 586 if (errno == ENOSYS) {
585 printf("Cannot call new mlock family, skipping test\n"); 587 printf("Cannot call new mlock family, skipping test\n");
586 _exit(0); 588 _exit(KSFT_SKIP);
587 } 589 }
588 perror("mlock(ONFAULT)\n"); 590 perror("mlock(ONFAULT)\n");
589 goto out; 591 goto out;
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 22d564673830..88cbe5575f0c 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -2,6 +2,9 @@
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3#please run as root 3#please run as root
4 4
5# Kselftest framework requirement - SKIP code is 4.
6ksft_skip=4
7
5mnt=./huge 8mnt=./huge
6exitcode=0 9exitcode=0
7 10
@@ -36,7 +39,7 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
36 echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages 39 echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
37 if [ $? -ne 0 ]; then 40 if [ $? -ne 0 ]; then
38 echo "Please run this test as root" 41 echo "Please run this test as root"
39 exit 1 42 exit $ksft_skip
40 fi 43 fi
41 while read name size unit; do 44 while read name size unit; do
42 if [ "$name" = "HugePages_Free:" ]; then 45 if [ "$name" = "HugePages_Free:" ]; then
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index de2f9ec8a87f..7b8171e3128a 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -69,6 +69,8 @@
69#include <setjmp.h> 69#include <setjmp.h>
70#include <stdbool.h> 70#include <stdbool.h>
71 71
72#include "../kselftest.h"
73
72#ifdef __NR_userfaultfd 74#ifdef __NR_userfaultfd
73 75
74static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; 76static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
@@ -1322,7 +1324,7 @@ int main(int argc, char **argv)
1322int main(void) 1324int main(void)
1323{ 1325{
1324 printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); 1326 printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
1325 return 0; 1327 return KSFT_SKIP;
1326} 1328}
1327 1329
1328#endif /* __NR_userfaultfd */ 1330#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index d744991c0f4f..186520198de7 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -8,10 +8,11 @@ include ../lib.mk
8UNAME_M := $(shell uname -m) 8UNAME_M := $(shell uname -m)
9CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) 9CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
10CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) 10CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
11CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
11 12
12TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ 13TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
13 check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ 14 check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
14 protection_keys test_vdso test_vsyscall 15 protection_keys test_vdso test_vsyscall mov_ss_trap
15TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ 16TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
16 test_FCMOV test_FCOMI test_FISTTP \ 17 test_FCMOV test_FCOMI test_FISTTP \
17 vdso_restorer 18 vdso_restorer
@@ -31,7 +32,12 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
31BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) 32BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
32BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) 33BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
33 34
34CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie 35CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
36
37# call32_from_64 in thunks.S uses absolute addresses.
38ifeq ($(CAN_BUILD_WITH_NOPIE),1)
39CFLAGS += -no-pie
40endif
35 41
36define gen-target-rule-32 42define gen-target-rule-32
37$(1) $(1)_32: $(OUTPUT)/$(1)_32 43$(1) $(1)_32: $(OUTPUT)/$(1)_32
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
new file mode 100644
index 000000000000..3c3a022654f3
--- /dev/null
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -0,0 +1,285 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS
4 *
5 * This does MOV SS from a watchpointed address followed by various
6 * types of kernel entries. A MOV SS that hits a watchpoint will queue
7 * up a #DB trap but will not actually deliver that trap. The trap
8 * will be delivered after the next instruction instead. The CPU's logic
9 * seems to be:
10 *
11 * - Any fault: drop the pending #DB trap.
12 * - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then
13 * deliver #DB.
14 * - ICEBP: enter the kernel but do not deliver the watchpoint trap
15 * - breakpoint: only one #DB is delivered (phew!)
16 *
17 * There are plenty of ways for a kernel to handle this incorrectly. This
18 * test tries to exercise all the cases.
19 *
20 * This should mostly cover CVE-2018-1087 and CVE-2018-8897.
21 */
22#define _GNU_SOURCE
23
24#include <stdlib.h>
25#include <sys/ptrace.h>
26#include <sys/types.h>
27#include <sys/wait.h>
28#include <sys/user.h>
29#include <sys/syscall.h>
30#include <unistd.h>
31#include <errno.h>
32#include <stddef.h>
33#include <stdio.h>
34#include <err.h>
35#include <string.h>
36#include <setjmp.h>
37#include <sys/prctl.h>
38
39#define X86_EFLAGS_RF (1UL << 16)
40
41#if __x86_64__
42# define REG_IP REG_RIP
43#else
44# define REG_IP REG_EIP
45#endif
46
47unsigned short ss;
48extern unsigned char breakpoint_insn[];
49sigjmp_buf jmpbuf;
50static unsigned char altstack_data[SIGSTKSZ];
51
52static void enable_watchpoint(void)
53{
54 pid_t parent = getpid();
55 int status;
56
57 pid_t child = fork();
58 if (child < 0)
59 err(1, "fork");
60
61 if (child) {
62 if (waitpid(child, &status, 0) != child)
63 err(1, "waitpid for child");
64 } else {
65 unsigned long dr0, dr1, dr7;
66
67 dr0 = (unsigned long)&ss;
68 dr1 = (unsigned long)breakpoint_insn;
69 dr7 = ((1UL << 1) | /* G0 */
70 (3UL << 16) | /* RW0 = read or write */
71 (1UL << 18) | /* LEN0 = 2 bytes */
72 (1UL << 3)); /* G1, RW1 = insn */
73
74 if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0)
75 err(1, "PTRACE_ATTACH");
76
77 if (waitpid(parent, &status, 0) != parent)
78 err(1, "waitpid for child");
79
80 if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0)
81 err(1, "PTRACE_POKEUSER DR0");
82
83 if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0)
84 err(1, "PTRACE_POKEUSER DR1");
85
86 if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0)
87 err(1, "PTRACE_POKEUSER DR7");
88
89 printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7);
90
91 if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0)
92 err(1, "PTRACE_DETACH");
93
94 exit(0);
95 }
96}
97
98static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
99 int flags)
100{
101 struct sigaction sa;
102 memset(&sa, 0, sizeof(sa));
103 sa.sa_sigaction = handler;
104 sa.sa_flags = SA_SIGINFO | flags;
105 sigemptyset(&sa.sa_mask);
106 if (sigaction(sig, &sa, 0))
107 err(1, "sigaction");
108}
109
110static char const * const signames[] = {
111 [SIGSEGV] = "SIGSEGV",
112 [SIGBUS] = "SIBGUS",
113 [SIGTRAP] = "SIGTRAP",
114 [SIGILL] = "SIGILL",
115};
116
117static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
118{
119 ucontext_t *ctx = ctx_void;
120
121 printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n",
122 (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
123 !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF));
124}
125
126static void handle_and_return(int sig, siginfo_t *si, void *ctx_void)
127{
128 ucontext_t *ctx = ctx_void;
129
130 printf("\tGot %s with RIP=%lx\n", signames[sig],
131 (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
132}
133
134static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
135{
136 ucontext_t *ctx = ctx_void;
137
138 printf("\tGot %s with RIP=%lx\n", signames[sig],
139 (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
140
141 siglongjmp(jmpbuf, 1);
142}
143
144int main()
145{
146 unsigned long nr;
147
148 asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss));
149 printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss);
150
151 if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0)
152 printf("\tPR_SET_PTRACER_ANY succeeded\n");
153
154 printf("\tSet up a watchpoint\n");
155 sethandler(SIGTRAP, sigtrap, 0);
156 enable_watchpoint();
157
158 printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n");
159 asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss));
160
161 printf("[RUN]\tMOV SS; INT3\n");
162 asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss));
163
164 printf("[RUN]\tMOV SS; INT 3\n");
165 asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss));
166
167 printf("[RUN]\tMOV SS; CS CS INT3\n");
168 asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss));
169
170 printf("[RUN]\tMOV SS; CSx14 INT3\n");
171 asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss));
172
173 printf("[RUN]\tMOV SS; INT 4\n");
174 sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
175 asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss));
176
177#ifdef __i386__
178 printf("[RUN]\tMOV SS; INTO\n");
179 sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
180 nr = -1;
181 asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into"
182 : [tmp] "+r" (nr) : [ss] "m" (ss));
183#endif
184
185 if (sigsetjmp(jmpbuf, 1) == 0) {
186 printf("[RUN]\tMOV SS; ICEBP\n");
187
188 /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */
189 sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
190
191 asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss));
192 }
193
194 if (sigsetjmp(jmpbuf, 1) == 0) {
195 printf("[RUN]\tMOV SS; CLI\n");
196 sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
197 asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss));
198 }
199
200 if (sigsetjmp(jmpbuf, 1) == 0) {
201 printf("[RUN]\tMOV SS; #PF\n");
202 sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
203 asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]"
204 : [tmp] "=r" (nr) : [ss] "m" (ss));
205 }
206
207 /*
208 * INT $1: if #DB has DPL=3 and there isn't special handling,
209 * then the kernel will die.
210 */
211 if (sigsetjmp(jmpbuf, 1) == 0) {
212 printf("[RUN]\tMOV SS; INT 1\n");
213 sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
214 asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss));
215 }
216
217#ifdef __x86_64__
218 /*
219 * In principle, we should test 32-bit SYSCALL as well, but
220 * the calling convention is so unpredictable that it's
221 * not obviously worth the effort.
222 */
223 if (sigsetjmp(jmpbuf, 1) == 0) {
224 printf("[RUN]\tMOV SS; SYSCALL\n");
225 sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
226 nr = SYS_getpid;
227 /*
228 * Toggle the high bit of RSP to make it noncanonical to
229 * strengthen this test on non-SMAP systems.
230 */
231 asm volatile ("btc $63, %%rsp\n\t"
232 "mov %[ss], %%ss; syscall\n\t"
233 "btc $63, %%rsp"
234 : "+a" (nr) : [ss] "m" (ss)
235 : "rcx"
236#ifdef __x86_64__
237 , "r11"
238#endif
239 );
240 }
241#endif
242
243 printf("[RUN]\tMOV SS; breakpointed NOP\n");
244 asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss));
245
246 /*
247 * Invoking SYSENTER directly breaks all the rules. Just handle
248 * the SIGSEGV.
249 */
250 if (sigsetjmp(jmpbuf, 1) == 0) {
251 printf("[RUN]\tMOV SS; SYSENTER\n");
252 stack_t stack = {
253 .ss_sp = altstack_data,
254 .ss_size = SIGSTKSZ,
255 };
256 if (sigaltstack(&stack, NULL) != 0)
257 err(1, "sigaltstack");
258 sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
259 nr = SYS_getpid;
260 asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
261 : [ss] "m" (ss) : "flags", "rcx"
262#ifdef __x86_64__
263 , "r11"
264#endif
265 );
266
267 /* We're unreachable here. SYSENTER forgets RIP. */
268 }
269
270 if (sigsetjmp(jmpbuf, 1) == 0) {
271 printf("[RUN]\tMOV SS; INT $0x80\n");
272 sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
273 nr = 20; /* compat getpid */
274 asm volatile ("mov %[ss], %%ss; int $0x80"
275 : "+a" (nr) : [ss] "m" (ss)
276 : "flags"
277#ifdef __x86_64__
278 , "r8", "r9", "r10", "r11"
279#endif
280 );
281 }
282
283 printf("[OK]\tI aten't dead\n");
284 return 0;
285}
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
index 9c0325e1ea68..50f7e9272481 100644
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -368,6 +368,11 @@ static int expected_bnd_index = -1;
368uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ 368uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
369unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; 369unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
370 370
371/* Failed address bound checks: */
372#ifndef SEGV_BNDERR
373# define SEGV_BNDERR 3
374#endif
375
371/* 376/*
372 * The kernel is supposed to provide some information about the bounds 377 * The kernel is supposed to provide some information about the bounds
373 * exception in the siginfo. It should match what we have in the bounds 378 * exception in the siginfo. It should match what we have in the bounds
@@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext)
419 br_count++; 424 br_count++;
420 dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); 425 dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
421 426
422#define SEGV_BNDERR 3 /* failed address bound checks */
423
424 dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", 427 dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
425 status, ip, br_reason); 428 status, ip, br_reason);
426 dprintf2("si_signo: %d\n", si->si_signo); 429 dprintf2("si_signo: %d\n", si->si_signo);
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
index b3cb7670e026..254e5436bdd9 100644
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ b/tools/testing/selftests/x86/pkey-helpers.h
@@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...)
26{ 26{
27 va_list ap; 27 va_list ap;
28 28
29 va_start(ap, format);
30 if (!dprint_in_signal) { 29 if (!dprint_in_signal) {
30 va_start(ap, format);
31 vprintf(format, ap); 31 vprintf(format, ap);
32 va_end(ap);
32 } else { 33 } else {
33 int ret; 34 int ret;
34 int len = vsnprintf(dprint_in_signal_buffer,
35 DPRINT_IN_SIGNAL_BUF_SIZE,
36 format, ap);
37 /* 35 /*
38 * len is amount that would have been printed, 36 * No printf() functions are signal-safe.
39 * but actual write is truncated at BUF_SIZE. 37 * They deadlock easily. Write the format
38 * string to get some output, even if
39 * incomplete.
40 */ 40 */
41 if (len > DPRINT_IN_SIGNAL_BUF_SIZE) 41 ret = write(1, format, strlen(format));
42 len = DPRINT_IN_SIGNAL_BUF_SIZE;
43 ret = write(1, dprint_in_signal_buffer, len);
44 if (ret < 0) 42 if (ret < 0)
45 abort(); 43 exit(1);
46 } 44 }
47 va_end(ap);
48} 45}
49#define dprintf_level(level, args...) do { \ 46#define dprintf_level(level, args...) do { \
50 if (level <= DEBUG_LEVEL) \ 47 if (level <= DEBUG_LEVEL) \
51 sigsafe_printf(args); \ 48 sigsafe_printf(args); \
52 fflush(NULL); \
53} while (0) 49} while (0)
54#define dprintf0(args...) dprintf_level(0, args) 50#define dprintf0(args...) dprintf_level(0, args)
55#define dprintf1(args...) dprintf_level(1, args) 51#define dprintf1(args...) dprintf_level(1, args)
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index f15aa5a76fe3..460b4bdf4c1e 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -72,10 +72,9 @@ extern void abort_hooks(void);
72 test_nr, iteration_nr); \ 72 test_nr, iteration_nr); \
73 dprintf0("errno at assert: %d", errno); \ 73 dprintf0("errno at assert: %d", errno); \
74 abort_hooks(); \ 74 abort_hooks(); \
75 assert(condition); \ 75 exit(__LINE__); \
76 } \ 76 } \
77} while (0) 77} while (0)
78#define raw_assert(cond) assert(cond)
79 78
80void cat_into_file(char *str, char *file) 79void cat_into_file(char *str, char *file)
81{ 80{
@@ -87,12 +86,17 @@ void cat_into_file(char *str, char *file)
87 * these need to be raw because they are called under 86 * these need to be raw because they are called under
88 * pkey_assert() 87 * pkey_assert()
89 */ 88 */
90 raw_assert(fd >= 0); 89 if (fd < 0) {
90 fprintf(stderr, "error opening '%s'\n", str);
91 perror("error: ");
92 exit(__LINE__);
93 }
94
91 ret = write(fd, str, strlen(str)); 95 ret = write(fd, str, strlen(str));
92 if (ret != strlen(str)) { 96 if (ret != strlen(str)) {
93 perror("write to file failed"); 97 perror("write to file failed");
94 fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); 98 fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
95 raw_assert(0); 99 exit(__LINE__);
96 } 100 }
97 close(fd); 101 close(fd);
98} 102}
@@ -191,26 +195,30 @@ void lots_o_noops_around_write(int *write_to_me)
191#ifdef __i386__ 195#ifdef __i386__
192 196
193#ifndef SYS_mprotect_key 197#ifndef SYS_mprotect_key
194# define SYS_mprotect_key 380 198# define SYS_mprotect_key 380
195#endif 199#endif
200
196#ifndef SYS_pkey_alloc 201#ifndef SYS_pkey_alloc
197# define SYS_pkey_alloc 381 202# define SYS_pkey_alloc 381
198# define SYS_pkey_free 382 203# define SYS_pkey_free 382
199#endif 204#endif
200#define REG_IP_IDX REG_EIP 205
201#define si_pkey_offset 0x14 206#define REG_IP_IDX REG_EIP
207#define si_pkey_offset 0x14
202 208
203#else 209#else
204 210
205#ifndef SYS_mprotect_key 211#ifndef SYS_mprotect_key
206# define SYS_mprotect_key 329 212# define SYS_mprotect_key 329
207#endif 213#endif
214
208#ifndef SYS_pkey_alloc 215#ifndef SYS_pkey_alloc
209# define SYS_pkey_alloc 330 216# define SYS_pkey_alloc 330
210# define SYS_pkey_free 331 217# define SYS_pkey_free 331
211#endif 218#endif
212#define REG_IP_IDX REG_RIP 219
213#define si_pkey_offset 0x20 220#define REG_IP_IDX REG_RIP
221#define si_pkey_offset 0x20
214 222
215#endif 223#endif
216 224
@@ -225,8 +233,14 @@ void dump_mem(void *dumpme, int len_bytes)
225 } 233 }
226} 234}
227 235
228#define SEGV_BNDERR 3 /* failed address bound checks */ 236/* Failed address bound checks: */
229#define SEGV_PKUERR 4 237#ifndef SEGV_BNDERR
238# define SEGV_BNDERR 3
239#endif
240
241#ifndef SEGV_PKUERR
242# define SEGV_PKUERR 4
243#endif
230 244
231static char *si_code_str(int si_code) 245static char *si_code_str(int si_code)
232{ 246{
@@ -289,13 +303,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
289 dump_mem(pkru_ptr - 128, 256); 303 dump_mem(pkru_ptr - 128, 256);
290 pkey_assert(*pkru_ptr); 304 pkey_assert(*pkru_ptr);
291 305
292 si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
293 dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
294 dump_mem(si_pkey_ptr - 8, 24);
295 siginfo_pkey = *si_pkey_ptr;
296 pkey_assert(siginfo_pkey < NR_PKEYS);
297 last_si_pkey = siginfo_pkey;
298
299 if ((si->si_code == SEGV_MAPERR) || 306 if ((si->si_code == SEGV_MAPERR) ||
300 (si->si_code == SEGV_ACCERR) || 307 (si->si_code == SEGV_ACCERR) ||
301 (si->si_code == SEGV_BNDERR)) { 308 (si->si_code == SEGV_BNDERR)) {
@@ -303,6 +310,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
303 exit(4); 310 exit(4);
304 } 311 }
305 312
313 si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
314 dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
315 dump_mem((u8 *)si_pkey_ptr - 8, 24);
316 siginfo_pkey = *si_pkey_ptr;
317 pkey_assert(siginfo_pkey < NR_PKEYS);
318 last_si_pkey = siginfo_pkey;
319
306 dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); 320 dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
307 /* need __rdpkru() version so we do not do shadow_pkru checking */ 321 /* need __rdpkru() version so we do not do shadow_pkru checking */
308 dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); 322 dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
@@ -311,22 +325,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
311 dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); 325 dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
312 pkru_faults++; 326 pkru_faults++;
313 dprintf1("<<<<==================================================\n"); 327 dprintf1("<<<<==================================================\n");
314 return;
315 if (trapno == 14) {
316 fprintf(stderr,
317 "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
318 trapno, ip);
319 fprintf(stderr, "si_addr %p\n", si->si_addr);
320 fprintf(stderr, "REG_ERR: %lx\n",
321 (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
322 exit(1);
323 } else {
324 fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip);
325 fprintf(stderr, "si_addr %p\n", si->si_addr);
326 fprintf(stderr, "REG_ERR: %lx\n",
327 (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
328 exit(2);
329 }
330 dprint_in_signal = 0; 328 dprint_in_signal = 0;
331} 329}
332 330
@@ -393,10 +391,15 @@ pid_t fork_lazy_child(void)
393 return forkret; 391 return forkret;
394} 392}
395 393
396#define PKEY_DISABLE_ACCESS 0x1 394#ifndef PKEY_DISABLE_ACCESS
397#define PKEY_DISABLE_WRITE 0x2 395# define PKEY_DISABLE_ACCESS 0x1
396#endif
397
398#ifndef PKEY_DISABLE_WRITE
399# define PKEY_DISABLE_WRITE 0x2
400#endif
398 401
399u32 pkey_get(int pkey, unsigned long flags) 402static u32 hw_pkey_get(int pkey, unsigned long flags)
400{ 403{
401 u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); 404 u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
402 u32 pkru = __rdpkru(); 405 u32 pkru = __rdpkru();
@@ -418,7 +421,7 @@ u32 pkey_get(int pkey, unsigned long flags)
418 return masked_pkru; 421 return masked_pkru;
419} 422}
420 423
421int pkey_set(int pkey, unsigned long rights, unsigned long flags) 424static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
422{ 425{
423 u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); 426 u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
424 u32 old_pkru = __rdpkru(); 427 u32 old_pkru = __rdpkru();
@@ -452,15 +455,15 @@ void pkey_disable_set(int pkey, int flags)
452 pkey, flags); 455 pkey, flags);
453 pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); 456 pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
454 457
455 pkey_rights = pkey_get(pkey, syscall_flags); 458 pkey_rights = hw_pkey_get(pkey, syscall_flags);
456 459
457 dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, 460 dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
458 pkey, pkey, pkey_rights); 461 pkey, pkey, pkey_rights);
459 pkey_assert(pkey_rights >= 0); 462 pkey_assert(pkey_rights >= 0);
460 463
461 pkey_rights |= flags; 464 pkey_rights |= flags;
462 465
463 ret = pkey_set(pkey, pkey_rights, syscall_flags); 466 ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
464 assert(!ret); 467 assert(!ret);
465 /*pkru and flags have the same format */ 468 /*pkru and flags have the same format */
466 shadow_pkru |= flags << (pkey * 2); 469 shadow_pkru |= flags << (pkey * 2);
@@ -468,8 +471,8 @@ void pkey_disable_set(int pkey, int flags)
468 471
469 pkey_assert(ret >= 0); 472 pkey_assert(ret >= 0);
470 473
471 pkey_rights = pkey_get(pkey, syscall_flags); 474 pkey_rights = hw_pkey_get(pkey, syscall_flags);
472 dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, 475 dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
473 pkey, pkey, pkey_rights); 476 pkey, pkey, pkey_rights);
474 477
475 dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); 478 dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
@@ -483,24 +486,24 @@ void pkey_disable_clear(int pkey, int flags)
483{ 486{
484 unsigned long syscall_flags = 0; 487 unsigned long syscall_flags = 0;
485 int ret; 488 int ret;
486 int pkey_rights = pkey_get(pkey, syscall_flags); 489 int pkey_rights = hw_pkey_get(pkey, syscall_flags);
487 u32 orig_pkru = rdpkru(); 490 u32 orig_pkru = rdpkru();
488 491
489 pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); 492 pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
490 493
491 dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, 494 dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
492 pkey, pkey, pkey_rights); 495 pkey, pkey, pkey_rights);
493 pkey_assert(pkey_rights >= 0); 496 pkey_assert(pkey_rights >= 0);
494 497
495 pkey_rights |= flags; 498 pkey_rights |= flags;
496 499
497 ret = pkey_set(pkey, pkey_rights, 0); 500 ret = hw_pkey_set(pkey, pkey_rights, 0);
498 /* pkru and flags have the same format */ 501 /* pkru and flags have the same format */
499 shadow_pkru &= ~(flags << (pkey * 2)); 502 shadow_pkru &= ~(flags << (pkey * 2));
500 pkey_assert(ret >= 0); 503 pkey_assert(ret >= 0);
501 504
502 pkey_rights = pkey_get(pkey, syscall_flags); 505 pkey_rights = hw_pkey_get(pkey, syscall_flags);
503 dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, 506 dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
504 pkey, pkey, pkey_rights); 507 pkey, pkey, pkey_rights);
505 508
506 dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); 509 dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
@@ -674,10 +677,12 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
674struct pkey_malloc_record { 677struct pkey_malloc_record {
675 void *ptr; 678 void *ptr;
676 long size; 679 long size;
680 int prot;
677}; 681};
678struct pkey_malloc_record *pkey_malloc_records; 682struct pkey_malloc_record *pkey_malloc_records;
683struct pkey_malloc_record *pkey_last_malloc_record;
679long nr_pkey_malloc_records; 684long nr_pkey_malloc_records;
680void record_pkey_malloc(void *ptr, long size) 685void record_pkey_malloc(void *ptr, long size, int prot)
681{ 686{
682 long i; 687 long i;
683 struct pkey_malloc_record *rec = NULL; 688 struct pkey_malloc_record *rec = NULL;
@@ -709,6 +714,8 @@ void record_pkey_malloc(void *ptr, long size)
709 (int)(rec - pkey_malloc_records), rec, ptr, size); 714 (int)(rec - pkey_malloc_records), rec, ptr, size);
710 rec->ptr = ptr; 715 rec->ptr = ptr;
711 rec->size = size; 716 rec->size = size;
717 rec->prot = prot;
718 pkey_last_malloc_record = rec;
712 nr_pkey_malloc_records++; 719 nr_pkey_malloc_records++;
713} 720}
714 721
@@ -753,7 +760,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
753 pkey_assert(ptr != (void *)-1); 760 pkey_assert(ptr != (void *)-1);
754 ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); 761 ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
755 pkey_assert(!ret); 762 pkey_assert(!ret);
756 record_pkey_malloc(ptr, size); 763 record_pkey_malloc(ptr, size, prot);
757 rdpkru(); 764 rdpkru();
758 765
759 dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); 766 dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
@@ -774,7 +781,7 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
774 size = ALIGN_UP(size, HPAGE_SIZE * 2); 781 size = ALIGN_UP(size, HPAGE_SIZE * 2);
775 ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 782 ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
776 pkey_assert(ptr != (void *)-1); 783 pkey_assert(ptr != (void *)-1);
777 record_pkey_malloc(ptr, size); 784 record_pkey_malloc(ptr, size, prot);
778 mprotect_pkey(ptr, size, prot, pkey); 785 mprotect_pkey(ptr, size, prot, pkey);
779 786
780 dprintf1("unaligned ptr: %p\n", ptr); 787 dprintf1("unaligned ptr: %p\n", ptr);
@@ -847,7 +854,7 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
847 pkey_assert(ptr != (void *)-1); 854 pkey_assert(ptr != (void *)-1);
848 mprotect_pkey(ptr, size, prot, pkey); 855 mprotect_pkey(ptr, size, prot, pkey);
849 856
850 record_pkey_malloc(ptr, size); 857 record_pkey_malloc(ptr, size, prot);
851 858
852 dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); 859 dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
853 return ptr; 860 return ptr;
@@ -869,7 +876,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
869 876
870 mprotect_pkey(ptr, size, prot, pkey); 877 mprotect_pkey(ptr, size, prot, pkey);
871 878
872 record_pkey_malloc(ptr, size); 879 record_pkey_malloc(ptr, size, prot);
873 880
874 dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); 881 dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
875 close(fd); 882 close(fd);
@@ -918,13 +925,21 @@ void *malloc_pkey(long size, int prot, u16 pkey)
918} 925}
919 926
920int last_pkru_faults; 927int last_pkru_faults;
928#define UNKNOWN_PKEY -2
921void expected_pk_fault(int pkey) 929void expected_pk_fault(int pkey)
922{ 930{
923 dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", 931 dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
924 __func__, last_pkru_faults, pkru_faults); 932 __func__, last_pkru_faults, pkru_faults);
925 dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); 933 dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
926 pkey_assert(last_pkru_faults + 1 == pkru_faults); 934 pkey_assert(last_pkru_faults + 1 == pkru_faults);
927 pkey_assert(last_si_pkey == pkey); 935
936 /*
937 * For exec-only memory, we do not know the pkey in
938 * advance, so skip this check.
939 */
940 if (pkey != UNKNOWN_PKEY)
941 pkey_assert(last_si_pkey == pkey);
942
928 /* 943 /*
929 * The signal handler shold have cleared out PKRU to let the 944 * The signal handler shold have cleared out PKRU to let the
930 * test program continue. We now have to restore it. 945 * test program continue. We now have to restore it.
@@ -939,10 +954,11 @@ void expected_pk_fault(int pkey)
939 last_si_pkey = -1; 954 last_si_pkey = -1;
940} 955}
941 956
942void do_not_expect_pk_fault(void) 957#define do_not_expect_pk_fault(msg) do { \
943{ 958 if (last_pkru_faults != pkru_faults) \
944 pkey_assert(last_pkru_faults == pkru_faults); 959 dprintf0("unexpected PK fault: %s\n", msg); \
945} 960 pkey_assert(last_pkru_faults == pkru_faults); \
961} while (0)
946 962
947int test_fds[10] = { -1 }; 963int test_fds[10] = { -1 };
948int nr_test_fds; 964int nr_test_fds;
@@ -1151,12 +1167,15 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
1151 pkey_assert(i < NR_PKEYS*2); 1167 pkey_assert(i < NR_PKEYS*2);
1152 1168
1153 /* 1169 /*
1154 * There are 16 pkeys supported in hardware. One is taken 1170 * There are 16 pkeys supported in hardware. Three are
1155 * up for the default (0) and another can be taken up by 1171 * allocated by the time we get here:
1156 * an execute-only mapping. Ensure that we can allocate 1172 * 1. The default key (0)
1157 * at least 14 (16-2). 1173 * 2. One possibly consumed by an execute-only mapping.
1174 * 3. One allocated by the test code and passed in via
1175 * 'pkey' to this function.
1176 * Ensure that we can allocate at least another 13 (16-3).
1158 */ 1177 */
1159 pkey_assert(i >= NR_PKEYS-2); 1178 pkey_assert(i >= NR_PKEYS-3);
1160 1179
1161 for (i = 0; i < nr_allocated_pkeys; i++) { 1180 for (i = 0; i < nr_allocated_pkeys; i++) {
1162 err = sys_pkey_free(allocated_pkeys[i]); 1181 err = sys_pkey_free(allocated_pkeys[i]);
@@ -1165,6 +1184,35 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
1165 } 1184 }
1166} 1185}
1167 1186
1187/*
1188 * pkey 0 is special. It is allocated by default, so you do not
1189 * have to call pkey_alloc() to use it first. Make sure that it
1190 * is usable.
1191 */
1192void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
1193{
1194 long size;
1195 int prot;
1196
1197 assert(pkey_last_malloc_record);
1198 size = pkey_last_malloc_record->size;
1199 /*
1200 * This is a bit of a hack. But mprotect() requires
1201 * huge-page-aligned sizes when operating on hugetlbfs.
1202 * So, make sure that we use something that's a multiple
1203 * of a huge page when we can.
1204 */
1205 if (size >= HPAGE_SIZE)
1206 size = HPAGE_SIZE;
1207 prot = pkey_last_malloc_record->prot;
1208
1209 /* Use pkey 0 */
1210 mprotect_pkey(ptr, size, prot, 0);
1211
1212 /* Make sure that we can set it back to the original pkey. */
1213 mprotect_pkey(ptr, size, prot, pkey);
1214}
1215
1168void test_ptrace_of_child(int *ptr, u16 pkey) 1216void test_ptrace_of_child(int *ptr, u16 pkey)
1169{ 1217{
1170 __attribute__((__unused__)) int peek_result; 1218 __attribute__((__unused__)) int peek_result;
@@ -1228,7 +1276,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
1228 pkey_assert(ret != -1); 1276 pkey_assert(ret != -1);
1229 /* Now access from the current task, and expect NO exception: */ 1277 /* Now access from the current task, and expect NO exception: */
1230 peek_result = read_ptr(plain_ptr); 1278 peek_result = read_ptr(plain_ptr);
1231 do_not_expect_pk_fault(); 1279 do_not_expect_pk_fault("read plain pointer after ptrace");
1232 1280
1233 ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); 1281 ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
1234 pkey_assert(ret != -1); 1282 pkey_assert(ret != -1);
@@ -1241,12 +1289,9 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
1241 free(plain_ptr_unaligned); 1289 free(plain_ptr_unaligned);
1242} 1290}
1243 1291
1244void test_executing_on_unreadable_memory(int *ptr, u16 pkey) 1292void *get_pointer_to_instructions(void)
1245{ 1293{
1246 void *p1; 1294 void *p1;
1247 int scratch;
1248 int ptr_contents;
1249 int ret;
1250 1295
1251 p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); 1296 p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
1252 dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); 1297 dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
@@ -1256,7 +1301,23 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
1256 /* Point 'p1' at the *second* page of the function: */ 1301 /* Point 'p1' at the *second* page of the function: */
1257 p1 += PAGE_SIZE; 1302 p1 += PAGE_SIZE;
1258 1303
1304 /*
1305 * Try to ensure we fault this in on next touch to ensure
1306 * we get an instruction fault as opposed to a data one
1307 */
1259 madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1308 madvise(p1, PAGE_SIZE, MADV_DONTNEED);
1309
1310 return p1;
1311}
1312
1313void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
1314{
1315 void *p1;
1316 int scratch;
1317 int ptr_contents;
1318 int ret;
1319
1320 p1 = get_pointer_to_instructions();
1260 lots_o_noops_around_write(&scratch); 1321 lots_o_noops_around_write(&scratch);
1261 ptr_contents = read_ptr(p1); 1322 ptr_contents = read_ptr(p1);
1262 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1323 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
@@ -1272,12 +1333,55 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
1272 */ 1333 */
1273 madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1334 madvise(p1, PAGE_SIZE, MADV_DONTNEED);
1274 lots_o_noops_around_write(&scratch); 1335 lots_o_noops_around_write(&scratch);
1275 do_not_expect_pk_fault(); 1336 do_not_expect_pk_fault("executing on PROT_EXEC memory");
1276 ptr_contents = read_ptr(p1); 1337 ptr_contents = read_ptr(p1);
1277 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1338 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
1278 expected_pk_fault(pkey); 1339 expected_pk_fault(pkey);
1279} 1340}
1280 1341
1342void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
1343{
1344 void *p1;
1345 int scratch;
1346 int ptr_contents;
1347 int ret;
1348
1349 dprintf1("%s() start\n", __func__);
1350
1351 p1 = get_pointer_to_instructions();
1352 lots_o_noops_around_write(&scratch);
1353 ptr_contents = read_ptr(p1);
1354 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
1355
1356 /* Use a *normal* mprotect(), not mprotect_pkey(): */
1357 ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
1358 pkey_assert(!ret);
1359
1360 dprintf2("pkru: %x\n", rdpkru());
1361
1362 /* Make sure this is an *instruction* fault */
1363 madvise(p1, PAGE_SIZE, MADV_DONTNEED);
1364 lots_o_noops_around_write(&scratch);
1365 do_not_expect_pk_fault("executing on PROT_EXEC memory");
1366 ptr_contents = read_ptr(p1);
1367 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
1368 expected_pk_fault(UNKNOWN_PKEY);
1369
1370 /*
1371 * Put the memory back to non-PROT_EXEC. Should clear the
1372 * exec-only pkey off the VMA and allow it to be readable
1373 * again. Go to PROT_NONE first to check for a kernel bug
1374 * that did not clear the pkey when doing PROT_NONE.
1375 */
1376 ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
1377 pkey_assert(!ret);
1378
1379 ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
1380 pkey_assert(!ret);
1381 ptr_contents = read_ptr(p1);
1382 do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
1383}
1384
1281void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) 1385void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
1282{ 1386{
1283 int size = PAGE_SIZE; 1387 int size = PAGE_SIZE;
@@ -1302,6 +1406,8 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
1302 test_kernel_gup_of_access_disabled_region, 1406 test_kernel_gup_of_access_disabled_region,
1303 test_kernel_gup_write_to_write_disabled_region, 1407 test_kernel_gup_write_to_write_disabled_region,
1304 test_executing_on_unreadable_memory, 1408 test_executing_on_unreadable_memory,
1409 test_implicit_mprotect_exec_only_memory,
1410 test_mprotect_with_pkey_0,
1305 test_ptrace_of_child, 1411 test_ptrace_of_child,
1306 test_pkey_syscalls_on_non_allocated_pkey, 1412 test_pkey_syscalls_on_non_allocated_pkey,
1307 test_pkey_syscalls_bad_args, 1413 test_pkey_syscalls_bad_args,
diff --git a/tools/testing/selftests/x86/trivial_program.c b/tools/testing/selftests/x86/trivial_program.c
new file mode 100644
index 000000000000..46a447163b93
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_program.c
@@ -0,0 +1,10 @@
1/* Trivial program to check that compilation with certain flags is working. */
2
3#include <stdio.h>
4
5int
6main(void)
7{
8 puts("");
9 return 0;
10}
diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh
index 754de7da426a..232e958ec454 100755
--- a/tools/testing/selftests/zram/zram.sh
+++ b/tools/testing/selftests/zram/zram.sh
@@ -2,6 +2,9 @@
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3TCID="zram.sh" 3TCID="zram.sh"
4 4
5# Kselftest framework requirement - SKIP code is 4.
6ksft_skip=4
7
5. ./zram_lib.sh 8. ./zram_lib.sh
6 9
7run_zram () { 10run_zram () {
@@ -24,5 +27,5 @@ elif [ -b /dev/zram0 ]; then
24else 27else
25 echo "$TCID : No zram.ko module or /dev/zram0 device file not found" 28 echo "$TCID : No zram.ko module or /dev/zram0 device file not found"
26 echo "$TCID : CONFIG_ZRAM is not set" 29 echo "$TCID : CONFIG_ZRAM is not set"
27 exit 1 30 exit $ksft_skip
28fi 31fi
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh
index f6a9c73e7a44..9e73a4fb9b0a 100755
--- a/tools/testing/selftests/zram/zram_lib.sh
+++ b/tools/testing/selftests/zram/zram_lib.sh
@@ -18,6 +18,9 @@ MODULE=0
18dev_makeswap=-1 18dev_makeswap=-1
19dev_mounted=-1 19dev_mounted=-1
20 20
21# Kselftest framework requirement - SKIP code is 4.
22ksft_skip=4
23
21trap INT 24trap INT
22 25
23check_prereqs() 26check_prereqs()
@@ -27,7 +30,7 @@ check_prereqs()
27 30
28 if [ $uid -ne 0 ]; then 31 if [ $uid -ne 0 ]; then
29 echo $msg must be run as root >&2 32 echo $msg must be run as root >&2
30 exit 0 33 exit $ksft_skip
31 fi 34 fi
32} 35}
33 36
diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c
index c9c81614a66a..4204359c9fee 100644
--- a/tools/usb/usbip/libsrc/vhci_driver.c
+++ b/tools/usb/usbip/libsrc/vhci_driver.c
@@ -135,11 +135,11 @@ static int refresh_imported_device_list(void)
135 return 0; 135 return 0;
136} 136}
137 137
138static int get_nports(void) 138static int get_nports(struct udev_device *hc_device)
139{ 139{
140 const char *attr_nports; 140 const char *attr_nports;
141 141
142 attr_nports = udev_device_get_sysattr_value(vhci_driver->hc_device, "nports"); 142 attr_nports = udev_device_get_sysattr_value(hc_device, "nports");
143 if (!attr_nports) { 143 if (!attr_nports) {
144 err("udev_device_get_sysattr_value nports failed"); 144 err("udev_device_get_sysattr_value nports failed");
145 return -1; 145 return -1;
@@ -242,35 +242,41 @@ static int read_record(int rhport, char *host, unsigned long host_len,
242 242
243int usbip_vhci_driver_open(void) 243int usbip_vhci_driver_open(void)
244{ 244{
245 int nports;
246 struct udev_device *hc_device;
247
245 udev_context = udev_new(); 248 udev_context = udev_new();
246 if (!udev_context) { 249 if (!udev_context) {
247 err("udev_new failed"); 250 err("udev_new failed");
248 return -1; 251 return -1;
249 } 252 }
250 253
251 vhci_driver = calloc(1, sizeof(struct usbip_vhci_driver));
252
253 /* will be freed in usbip_driver_close() */ 254 /* will be freed in usbip_driver_close() */
254 vhci_driver->hc_device = 255 hc_device =
255 udev_device_new_from_subsystem_sysname(udev_context, 256 udev_device_new_from_subsystem_sysname(udev_context,
256 USBIP_VHCI_BUS_TYPE, 257 USBIP_VHCI_BUS_TYPE,
257 USBIP_VHCI_DEVICE_NAME); 258 USBIP_VHCI_DEVICE_NAME);
258 if (!vhci_driver->hc_device) { 259 if (!hc_device) {
259 err("udev_device_new_from_subsystem_sysname failed"); 260 err("udev_device_new_from_subsystem_sysname failed");
260 goto err; 261 goto err;
261 } 262 }
262 263
263 vhci_driver->nports = get_nports(); 264 nports = get_nports(hc_device);
264 dbg("available ports: %d", vhci_driver->nports); 265 if (nports <= 0) {
265
266 if (vhci_driver->nports <= 0) {
267 err("no available ports"); 266 err("no available ports");
268 goto err; 267 goto err;
269 } else if (vhci_driver->nports > MAXNPORT) { 268 }
270 err("port number exceeds %d", MAXNPORT); 269 dbg("available ports: %d", nports);
270
271 vhci_driver = calloc(1, sizeof(struct usbip_vhci_driver) +
272 nports * sizeof(struct usbip_imported_device));
273 if (!vhci_driver) {
274 err("vhci_driver allocation failed");
271 goto err; 275 goto err;
272 } 276 }
273 277
278 vhci_driver->nports = nports;
279 vhci_driver->hc_device = hc_device;
274 vhci_driver->ncontrollers = get_ncontrollers(); 280 vhci_driver->ncontrollers = get_ncontrollers();
275 dbg("available controllers: %d", vhci_driver->ncontrollers); 281 dbg("available controllers: %d", vhci_driver->ncontrollers);
276 282
@@ -285,7 +291,7 @@ int usbip_vhci_driver_open(void)
285 return 0; 291 return 0;
286 292
287err: 293err:
288 udev_device_unref(vhci_driver->hc_device); 294 udev_device_unref(hc_device);
289 295
290 if (vhci_driver) 296 if (vhci_driver)
291 free(vhci_driver); 297 free(vhci_driver);
diff --git a/tools/usb/usbip/libsrc/vhci_driver.h b/tools/usb/usbip/libsrc/vhci_driver.h
index 418b404d5121..6c9aca216705 100644
--- a/tools/usb/usbip/libsrc/vhci_driver.h
+++ b/tools/usb/usbip/libsrc/vhci_driver.h
@@ -13,7 +13,6 @@
13 13
14#define USBIP_VHCI_BUS_TYPE "platform" 14#define USBIP_VHCI_BUS_TYPE "platform"
15#define USBIP_VHCI_DEVICE_NAME "vhci_hcd.0" 15#define USBIP_VHCI_DEVICE_NAME "vhci_hcd.0"
16#define MAXNPORT 128
17 16
18enum hub_speed { 17enum hub_speed {
19 HUB_SPEED_HIGH = 0, 18 HUB_SPEED_HIGH = 0,
@@ -41,7 +40,7 @@ struct usbip_vhci_driver {
41 40
42 int ncontrollers; 41 int ncontrollers;
43 int nports; 42 int nports;
44 struct usbip_imported_device idev[MAXNPORT]; 43 struct usbip_imported_device idev[];
45}; 44};
46 45
47 46
diff --git a/tools/usb/usbip/src/usbip_detach.c b/tools/usb/usbip/src/usbip_detach.c
index 9db9d21bb2ec..777f7286a0c5 100644
--- a/tools/usb/usbip/src/usbip_detach.c
+++ b/tools/usb/usbip/src/usbip_detach.c
@@ -43,9 +43,12 @@ void usbip_detach_usage(void)
43 43
44static int detach_port(char *port) 44static int detach_port(char *port)
45{ 45{
46 int ret; 46 int ret = 0;
47 uint8_t portnum; 47 uint8_t portnum;
48 char path[PATH_MAX+1]; 48 char path[PATH_MAX+1];
49 int i;
50 struct usbip_imported_device *idev;
51 int found = 0;
49 52
50 unsigned int port_len = strlen(port); 53 unsigned int port_len = strlen(port);
51 54
@@ -55,27 +58,48 @@ static int detach_port(char *port)
55 return -1; 58 return -1;
56 } 59 }
57 60
58 /* check max port */
59
60 portnum = atoi(port); 61 portnum = atoi(port);
61 62
62 /* remove the port state file */ 63 ret = usbip_vhci_driver_open();
64 if (ret < 0) {
65 err("open vhci_driver");
66 return -1;
67 }
68
69 /* check for invalid port */
70 for (i = 0; i < vhci_driver->nports; i++) {
71 idev = &vhci_driver->idev[i];
72
73 if (idev->port == portnum) {
74 found = 1;
75 if (idev->status != VDEV_ST_NULL)
76 break;
77 info("Port %d is already detached!\n", idev->port);
78 goto call_driver_close;
79 }
80 }
63 81
82 if (!found) {
83 err("Invalid port %s > maxports %d",
84 port, vhci_driver->nports);
85 goto call_driver_close;
86 }
87
88 /* remove the port state file */
64 snprintf(path, PATH_MAX, VHCI_STATE_PATH"/port%d", portnum); 89 snprintf(path, PATH_MAX, VHCI_STATE_PATH"/port%d", portnum);
65 90
66 remove(path); 91 remove(path);
67 rmdir(VHCI_STATE_PATH); 92 rmdir(VHCI_STATE_PATH);
68 93
69 ret = usbip_vhci_driver_open(); 94 ret = usbip_vhci_detach_device(portnum);
70 if (ret < 0) { 95 if (ret < 0) {
71 err("open vhci_driver"); 96 ret = -1;
72 return -1; 97 err("Port %d detach request failed!\n", portnum);
98 goto call_driver_close;
73 } 99 }
100 info("Port %d is now detached!\n", portnum);
74 101
75 ret = usbip_vhci_detach_device(portnum); 102call_driver_close:
76 if (ret < 0)
77 return -1;
78
79 usbip_vhci_driver_close(); 103 usbip_vhci_driver_close();
80 104
81 return ret; 105 return ret;
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
index 1571e24e9494..f91aeb5fe571 100644
--- a/tools/virtio/linux/dma-mapping.h
+++ b/tools/virtio/linux/dma-mapping.h
@@ -6,8 +6,6 @@
6# error Virtio userspace code does not support CONFIG_HAS_DMA 6# error Virtio userspace code does not support CONFIG_HAS_DMA
7#endif 7#endif
8 8
9#define PCI_DMA_BUS_IS_PHYS 1
10
11enum dma_data_direction { 9enum dma_data_direction {
12 DMA_BIDIRECTIONAL = 0, 10 DMA_BIDIRECTIONAL = 0,
13 DMA_TO_DEVICE = 1, 11 DMA_TO_DEVICE = 1,
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index a8783f48f77f..cce853dca691 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -131,6 +131,7 @@ static const char * const page_flag_names[] = {
131 [KPF_KSM] = "x:ksm", 131 [KPF_KSM] = "x:ksm",
132 [KPF_THP] = "t:thp", 132 [KPF_THP] = "t:thp",
133 [KPF_BALLOON] = "o:balloon", 133 [KPF_BALLOON] = "o:balloon",
134 [KPF_PGTABLE] = "g:pgtable",
134 [KPF_ZERO_PAGE] = "z:zero_page", 135 [KPF_ZERO_PAGE] = "z:zero_page",
135 [KPF_IDLE] = "i:idle_page", 136 [KPF_IDLE] = "i:idle_page",
136 137