diff options
| author | Ravi Bangoria <ravi.bangoria@linux.ibm.com> | 2019-01-29 08:24:12 -0500 |
|---|---|---|
| committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2019-02-04 09:32:14 -0500 |
| commit | f0fabf9c897327abd39018aefb5029aff8c7e133 (patch) | |
| tree | 7f438664f6584e3acb442a6a12150a468db03869 | |
| parent | 489338a717a0dfbbd5a3fabccf172b78f0ac9015 (diff) | |
perf mem/c2c: Fix perf_mem_events to support powerpc
PowerPC hardware does not have a builtin latency filter (--ldlat) for
the "mem-load" event and perf_mem_events by default includes
"/ldlat=30/" which is causing a failure on PowerPC. Refactor the code to
support "perf mem/c2c" on PowerPC.
This patch depends on kernel side changes done my Madhavan:
https://lists.ozlabs.org/pipermail/linuxppc-dev/2018-December/182596.html
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Dick Fowles <fowles@inreach.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20190129132412.771-1-ravi.bangoria@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
| -rw-r--r-- | tools/perf/Documentation/perf-c2c.txt | 16 | ||||
| -rw-r--r-- | tools/perf/Documentation/perf-mem.txt | 2 | ||||
| -rw-r--r-- | tools/perf/arch/powerpc/util/Build | 1 | ||||
| -rw-r--r-- | tools/perf/arch/powerpc/util/mem-events.c | 11 | ||||
| -rw-r--r-- | tools/perf/util/mem-events.c | 2 |
5 files changed, 26 insertions, 6 deletions
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 095aebdc5bb7..e6150f21267d 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt | |||
| @@ -19,8 +19,11 @@ C2C stands for Cache To Cache. | |||
| 19 | The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows | 19 | The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows |
| 20 | you to track down the cacheline contentions. | 20 | you to track down the cacheline contentions. |
| 21 | 21 | ||
| 22 | The tool is based on x86's load latency and precise store facility events | 22 | On x86, the tool is based on load latency and precise store facility events |
| 23 | provided by Intel CPUs. These events provide: | 23 | provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling |
| 24 | with thresholding feature. | ||
| 25 | |||
| 26 | These events provide: | ||
| 24 | - memory address of the access | 27 | - memory address of the access |
| 25 | - type of the access (load and store details) | 28 | - type of the access (load and store details) |
| 26 | - latency (in cycles) of the load access | 29 | - latency (in cycles) of the load access |
| @@ -46,7 +49,7 @@ RECORD OPTIONS | |||
| 46 | 49 | ||
| 47 | -l:: | 50 | -l:: |
| 48 | --ldlat:: | 51 | --ldlat:: |
| 49 | Configure mem-loads latency. | 52 | Configure mem-loads latency. (x86 only) |
| 50 | 53 | ||
| 51 | -k:: | 54 | -k:: |
| 52 | --all-kernel:: | 55 | --all-kernel:: |
| @@ -119,11 +122,16 @@ Following perf record options are configured by default: | |||
| 119 | -W,-d,--phys-data,--sample-cpu | 122 | -W,-d,--phys-data,--sample-cpu |
| 120 | 123 | ||
| 121 | Unless specified otherwise with '-e' option, following events are monitored by | 124 | Unless specified otherwise with '-e' option, following events are monitored by |
| 122 | default: | 125 | default on x86: |
| 123 | 126 | ||
| 124 | cpu/mem-loads,ldlat=30/P | 127 | cpu/mem-loads,ldlat=30/P |
| 125 | cpu/mem-stores/P | 128 | cpu/mem-stores/P |
| 126 | 129 | ||
| 130 | and following on PowerPC: | ||
| 131 | |||
| 132 | cpu/mem-loads/ | ||
| 133 | cpu/mem-stores/ | ||
| 134 | |||
| 127 | User can pass any 'perf record' option behind '--' mark, like (to enable | 135 | User can pass any 'perf record' option behind '--' mark, like (to enable |
| 128 | callchains and system wide monitoring): | 136 | callchains and system wide monitoring): |
| 129 | 137 | ||
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index f8d2167cf3e7..199ea0f0a6c0 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt | |||
| @@ -82,7 +82,7 @@ RECORD OPTIONS | |||
| 82 | Be more verbose (show counter open errors, etc) | 82 | Be more verbose (show counter open errors, etc) |
| 83 | 83 | ||
| 84 | --ldlat <n>:: | 84 | --ldlat <n>:: |
| 85 | Specify desired latency for loads event. | 85 | Specify desired latency for loads event. (x86 only) |
| 86 | 86 | ||
| 87 | In addition, for report all perf report options are valid, and for record | 87 | In addition, for report all perf report options are valid, and for record |
| 88 | all perf record options. | 88 | all perf record options. |
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 2e6595310420..ba98bd006488 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build | |||
| @@ -2,6 +2,7 @@ libperf-y += header.o | |||
| 2 | libperf-y += sym-handling.o | 2 | libperf-y += sym-handling.o |
| 3 | libperf-y += kvm-stat.o | 3 | libperf-y += kvm-stat.o |
| 4 | libperf-y += perf_regs.o | 4 | libperf-y += perf_regs.o |
| 5 | libperf-y += mem-events.o | ||
| 5 | 6 | ||
| 6 | libperf-$(CONFIG_DWARF) += dwarf-regs.o | 7 | libperf-$(CONFIG_DWARF) += dwarf-regs.o |
| 7 | libperf-$(CONFIG_DWARF) += skip-callchain-idx.o | 8 | libperf-$(CONFIG_DWARF) += skip-callchain-idx.o |
diff --git a/tools/perf/arch/powerpc/util/mem-events.c b/tools/perf/arch/powerpc/util/mem-events.c new file mode 100644 index 000000000000..d08311f04e95 --- /dev/null +++ b/tools/perf/arch/powerpc/util/mem-events.c | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | #include "mem-events.h" | ||
| 3 | |||
| 4 | /* PowerPC does not support 'ldlat' parameter. */ | ||
| 5 | char *perf_mem_events__name(int i) | ||
| 6 | { | ||
| 7 | if (i == PERF_MEM_EVENTS__LOAD) | ||
| 8 | return (char *) "cpu/mem-loads/"; | ||
| 9 | |||
| 10 | return (char *) "cpu/mem-stores/"; | ||
| 11 | } | ||
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 93f74d8d3cdd..42c3e5a229d2 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c | |||
| @@ -28,7 +28,7 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { | |||
| 28 | static char mem_loads_name[100]; | 28 | static char mem_loads_name[100]; |
| 29 | static bool mem_loads_name__init; | 29 | static bool mem_loads_name__init; |
| 30 | 30 | ||
| 31 | char *perf_mem_events__name(int i) | 31 | char * __weak perf_mem_events__name(int i) |
| 32 | { | 32 | { |
| 33 | if (i == PERF_MEM_EVENTS__LOAD) { | 33 | if (i == PERF_MEM_EVENTS__LOAD) { |
| 34 | if (!mem_loads_name__init) { | 34 | if (!mem_loads_name__init) { |
