aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-05-20 02:36:09 -0400
committerIngo Molnar <mingo@kernel.org>2014-05-20 02:36:09 -0400
commit6480c56130ba073df84d57d61062ec4118b10bbe (patch)
tree219d542060729e5ed5d4d13c30d73cf9cf5c2bb0 /tools/perf
parent722e76e60f2775c21b087ff12c5e678cf0ebcaaf (diff)
parent97eac381b113932bd7bd4a5c3c68b18e9ff7a2a0 (diff)
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf into perf/core
Pull perf/core improvements and fixes from Jiri Olsa: * Add libdw DWARF post unwind support for ARM (Jean Pihet) * Consolidate types.h for ARM and ARM64 (Jean Pihet) * Fix possible null pointer dereference in session.c (Masanari Iida) * Cleanup, remove unused variables in map_switch_event() (Dongsheng Yang) * Remove nr_state_machine_bugs in perf latency (Dongsheng Yang) * Remove usage of trace_sched_wakeup(.success) (Peter Zijlstra) Signed-off-by: Jiri Olsa <jolsa@kernel.org> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Makefile.perf2
-rw-r--r--tools/perf/arch/arm/Makefile7
-rw-r--r--tools/perf/arch/arm/include/perf_regs.h7
-rw-r--r--tools/perf/arch/arm/tests/dwarf-unwind.c60
-rw-r--r--tools/perf/arch/arm/tests/regs_load.S58
-rw-r--r--tools/perf/arch/arm/util/unwind-libdw.c36
-rw-r--r--tools/perf/arch/arm64/include/perf_regs.h2
-rw-r--r--tools/perf/builtin-sched.c32
-rw-r--r--tools/perf/config/Makefile4
-rw-r--r--tools/perf/tests/builtin-test.c2
-rw-r--r--tools/perf/tests/evsel-tp-sched.c3
-rw-r--r--tools/perf/tests/tests.h2
-rw-r--r--tools/perf/util/session.c5
13 files changed, 187 insertions, 33 deletions
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 2baf61cec7ff..dea2d633c374 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -411,7 +411,7 @@ LIB_OBJS += $(OUTPUT)tests/code-reading.o
411LIB_OBJS += $(OUTPUT)tests/sample-parsing.o 411LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
412LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o 412LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
413ifndef NO_DWARF_UNWIND 413ifndef NO_DWARF_UNWIND
414ifeq ($(ARCH),x86) 414ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
415LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o 415LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o
416endif 416endif
417endif 417endif
diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile
index 67e9b3d38e89..09d62153d384 100644
--- a/tools/perf/arch/arm/Makefile
+++ b/tools/perf/arch/arm/Makefile
@@ -5,3 +5,10 @@ endif
5ifndef NO_LIBUNWIND 5ifndef NO_LIBUNWIND
6LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o 6LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
7endif 7endif
8ifndef NO_LIBDW_DWARF_UNWIND
9LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
10endif
11ifndef NO_DWARF_UNWIND
12LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
13LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
14endif
diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h
index 2a1cfde66b69..f619c9c5a4bf 100644
--- a/tools/perf/arch/arm/include/perf_regs.h
+++ b/tools/perf/arch/arm/include/perf_regs.h
@@ -2,10 +2,15 @@
2#define ARCH_PERF_REGS_H 2#define ARCH_PERF_REGS_H
3 3
4#include <stdlib.h> 4#include <stdlib.h>
5#include "../../util/types.h" 5#include <linux/types.h>
6#include <asm/perf_regs.h> 6#include <asm/perf_regs.h>
7 7
8void perf_regs_load(u64 *regs);
9
8#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM_MAX) - 1) 10#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM_MAX) - 1)
11#define PERF_REGS_MAX PERF_REG_ARM_MAX
12#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
13
9#define PERF_REG_IP PERF_REG_ARM_PC 14#define PERF_REG_IP PERF_REG_ARM_PC
10#define PERF_REG_SP PERF_REG_ARM_SP 15#define PERF_REG_SP PERF_REG_ARM_SP
11 16
diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c
new file mode 100644
index 000000000000..9f870d27cb39
--- /dev/null
+++ b/tools/perf/arch/arm/tests/dwarf-unwind.c
@@ -0,0 +1,60 @@
1#include <string.h>
2#include "perf_regs.h"
3#include "thread.h"
4#include "map.h"
5#include "event.h"
6#include "tests/tests.h"
7
8#define STACK_SIZE 8192
9
10static int sample_ustack(struct perf_sample *sample,
11 struct thread *thread, u64 *regs)
12{
13 struct stack_dump *stack = &sample->user_stack;
14 struct map *map;
15 unsigned long sp;
16 u64 stack_size, *buf;
17
18 buf = malloc(STACK_SIZE);
19 if (!buf) {
20 pr_debug("failed to allocate sample uregs data\n");
21 return -1;
22 }
23
24 sp = (unsigned long) regs[PERF_REG_ARM_SP];
25
26 map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
27 if (!map) {
28 pr_debug("failed to get stack map\n");
29 free(buf);
30 return -1;
31 }
32
33 stack_size = map->end - sp;
34 stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
35
36 memcpy(buf, (void *) sp, stack_size);
37 stack->data = (char *) buf;
38 stack->size = stack_size;
39 return 0;
40}
41
42int test__arch_unwind_sample(struct perf_sample *sample,
43 struct thread *thread)
44{
45 struct regs_dump *regs = &sample->user_regs;
46 u64 *buf;
47
48 buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
49 if (!buf) {
50 pr_debug("failed to allocate sample uregs data\n");
51 return -1;
52 }
53
54 perf_regs_load(buf);
55 regs->abi = PERF_SAMPLE_REGS_ABI;
56 regs->regs = buf;
57 regs->mask = PERF_REGS_MASK;
58
59 return sample_ustack(sample, thread, buf);
60}
diff --git a/tools/perf/arch/arm/tests/regs_load.S b/tools/perf/arch/arm/tests/regs_load.S
new file mode 100644
index 000000000000..e09e983946fe
--- /dev/null
+++ b/tools/perf/arch/arm/tests/regs_load.S
@@ -0,0 +1,58 @@
1#include <linux/linkage.h>
2
3#define R0 0x00
4#define R1 0x08
5#define R2 0x10
6#define R3 0x18
7#define R4 0x20
8#define R5 0x28
9#define R6 0x30
10#define R7 0x38
11#define R8 0x40
12#define R9 0x48
13#define SL 0x50
14#define FP 0x58
15#define IP 0x60
16#define SP 0x68
17#define LR 0x70
18#define PC 0x78
19
20/*
21 * Implementation of void perf_regs_load(u64 *regs);
22 *
23 * This functions fills in the 'regs' buffer from the actual registers values,
24 * in the way the perf built-in unwinding test expects them:
25 * - the PC at the time at the call to this function. Since this function
26 * is called using a bl instruction, the PC value is taken from LR.
27 * The built-in unwinding test then unwinds the call stack from the dwarf
28 * information in unwind__get_entries.
29 *
30 * Notes:
31 * - the 8 bytes stride in the registers offsets comes from the fact
32 * that the registers are stored in an u64 array (u64 *regs),
33 * - the regs buffer needs to be zeroed before the call to this function,
34 * in this case using a calloc in dwarf-unwind.c.
35 */
36
37.text
38.type perf_regs_load,%function
39ENTRY(perf_regs_load)
40 str r0, [r0, #R0]
41 str r1, [r0, #R1]
42 str r2, [r0, #R2]
43 str r3, [r0, #R3]
44 str r4, [r0, #R4]
45 str r5, [r0, #R5]
46 str r6, [r0, #R6]
47 str r7, [r0, #R7]
48 str r8, [r0, #R8]
49 str r9, [r0, #R9]
50 str sl, [r0, #SL]
51 str fp, [r0, #FP]
52 str ip, [r0, #IP]
53 str sp, [r0, #SP]
54 str lr, [r0, #LR]
55 str lr, [r0, #PC] // store pc as lr in order to skip the call
56 // to this function
57 mov pc, lr
58ENDPROC(perf_regs_load)
diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c
new file mode 100644
index 000000000000..b4176c60117a
--- /dev/null
+++ b/tools/perf/arch/arm/util/unwind-libdw.c
@@ -0,0 +1,36 @@
1#include <elfutils/libdwfl.h>
2#include "../../util/unwind-libdw.h"
3#include "../../util/perf_regs.h"
4
5bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
6{
7 struct unwind_info *ui = arg;
8 struct regs_dump *user_regs = &ui->sample->user_regs;
9 Dwarf_Word dwarf_regs[PERF_REG_ARM_MAX];
10
11#define REG(r) ({ \
12 Dwarf_Word val = 0; \
13 perf_reg_value(&val, user_regs, PERF_REG_ARM_##r); \
14 val; \
15})
16
17 dwarf_regs[0] = REG(R0);
18 dwarf_regs[1] = REG(R1);
19 dwarf_regs[2] = REG(R2);
20 dwarf_regs[3] = REG(R3);
21 dwarf_regs[4] = REG(R4);
22 dwarf_regs[5] = REG(R5);
23 dwarf_regs[6] = REG(R6);
24 dwarf_regs[7] = REG(R7);
25 dwarf_regs[8] = REG(R8);
26 dwarf_regs[9] = REG(R9);
27 dwarf_regs[10] = REG(R10);
28 dwarf_regs[11] = REG(FP);
29 dwarf_regs[12] = REG(IP);
30 dwarf_regs[13] = REG(SP);
31 dwarf_regs[14] = REG(LR);
32 dwarf_regs[15] = REG(PC);
33
34 return dwfl_thread_state_registers(thread, 0, PERF_REG_ARM_MAX,
35 dwarf_regs);
36}
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
index 23595467402d..e9441b9e2a30 100644
--- a/tools/perf/arch/arm64/include/perf_regs.h
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -2,7 +2,7 @@
2#define ARCH_PERF_REGS_H 2#define ARCH_PERF_REGS_H
3 3
4#include <stdlib.h> 4#include <stdlib.h>
5#include "../../util/types.h" 5#include <linux/types.h>
6#include <asm/perf_regs.h> 6#include <asm/perf_regs.h>
7 7
8#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1) 8#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 2579215f5743..d7176830b9b2 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -149,7 +149,6 @@ struct perf_sched {
149 unsigned long nr_runs; 149 unsigned long nr_runs;
150 unsigned long nr_timestamps; 150 unsigned long nr_timestamps;
151 unsigned long nr_unordered_timestamps; 151 unsigned long nr_unordered_timestamps;
152 unsigned long nr_state_machine_bugs;
153 unsigned long nr_context_switch_bugs; 152 unsigned long nr_context_switch_bugs;
154 unsigned long nr_events; 153 unsigned long nr_events;
155 unsigned long nr_lost_chunks; 154 unsigned long nr_lost_chunks;
@@ -1007,17 +1006,12 @@ static int latency_wakeup_event(struct perf_sched *sched,
1007 struct perf_sample *sample, 1006 struct perf_sample *sample,
1008 struct machine *machine) 1007 struct machine *machine)
1009{ 1008{
1010 const u32 pid = perf_evsel__intval(evsel, sample, "pid"), 1009 const u32 pid = perf_evsel__intval(evsel, sample, "pid");
1011 success = perf_evsel__intval(evsel, sample, "success");
1012 struct work_atoms *atoms; 1010 struct work_atoms *atoms;
1013 struct work_atom *atom; 1011 struct work_atom *atom;
1014 struct thread *wakee; 1012 struct thread *wakee;
1015 u64 timestamp = sample->time; 1013 u64 timestamp = sample->time;
1016 1014
1017 /* Note for later, it may be interesting to observe the failing cases */
1018 if (!success)
1019 return 0;
1020
1021 wakee = machine__findnew_thread(machine, 0, pid); 1015 wakee = machine__findnew_thread(machine, 0, pid);
1022 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); 1016 atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
1023 if (!atoms) { 1017 if (!atoms) {
@@ -1037,12 +1031,18 @@ static int latency_wakeup_event(struct perf_sched *sched,
1037 atom = list_entry(atoms->work_list.prev, struct work_atom, list); 1031 atom = list_entry(atoms->work_list.prev, struct work_atom, list);
1038 1032
1039 /* 1033 /*
1034 * As we do not guarantee the wakeup event happens when
1035 * task is out of run queue, also may happen when task is
1036 * on run queue and wakeup only change ->state to TASK_RUNNING,
1037 * then we should not set the ->wake_up_time when wake up a
1038 * task which is on run queue.
1039 *
1040 * You WILL be missing events if you've recorded only 1040 * You WILL be missing events if you've recorded only
1041 * one CPU, or are only looking at only one, so don't 1041 * one CPU, or are only looking at only one, so don't
1042 * make useless noise. 1042 * skip in this case.
1043 */ 1043 */
1044 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) 1044 if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
1045 sched->nr_state_machine_bugs++; 1045 return 0;
1046 1046
1047 sched->nr_timestamps++; 1047 sched->nr_timestamps++;
1048 if (atom->sched_out_time > timestamp) { 1048 if (atom->sched_out_time > timestamp) {
@@ -1266,9 +1266,8 @@ static int process_sched_wakeup_event(struct perf_tool *tool,
1266static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, 1266static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
1267 struct perf_sample *sample, struct machine *machine) 1267 struct perf_sample *sample, struct machine *machine)
1268{ 1268{
1269 const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), 1269 const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
1270 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); 1270 struct thread *sched_in;
1271 struct thread *sched_out __maybe_unused, *sched_in;
1272 int new_shortname; 1271 int new_shortname;
1273 u64 timestamp0, timestamp = sample->time; 1272 u64 timestamp0, timestamp = sample->time;
1274 s64 delta; 1273 s64 delta;
@@ -1291,7 +1290,6 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
1291 return -1; 1290 return -1;
1292 } 1291 }
1293 1292
1294 sched_out = machine__findnew_thread(machine, 0, prev_pid);
1295 sched_in = machine__findnew_thread(machine, 0, next_pid); 1293 sched_in = machine__findnew_thread(machine, 0, next_pid);
1296 1294
1297 sched->curr_thread[this_cpu] = sched_in; 1295 sched->curr_thread[this_cpu] = sched_in;
@@ -1501,14 +1499,6 @@ static void print_bad_events(struct perf_sched *sched)
1501 (double)sched->nr_lost_events/(double)sched->nr_events * 100.0, 1499 (double)sched->nr_lost_events/(double)sched->nr_events * 100.0,
1502 sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks); 1500 sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks);
1503 } 1501 }
1504 if (sched->nr_state_machine_bugs && sched->nr_timestamps) {
1505 printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)",
1506 (double)sched->nr_state_machine_bugs/(double)sched->nr_timestamps*100.0,
1507 sched->nr_state_machine_bugs, sched->nr_timestamps);
1508 if (sched->nr_lost_events)
1509 printf(" (due to lost events?)");
1510 printf("\n");
1511 }
1512 if (sched->nr_context_switch_bugs && sched->nr_timestamps) { 1502 if (sched->nr_context_switch_bugs && sched->nr_timestamps) {
1513 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)", 1503 printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)",
1514 (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0, 1504 (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0,
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index f2edc593a7a7..729bbdf5cec7 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -40,11 +40,11 @@ ifeq ($(ARCH),arm64)
40 LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 40 LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
41endif 41endif
42 42
43# So far there's only x86 libdw unwind support merged in perf. 43# So far there's only x86 and arm libdw unwind support merged in perf.
44# Disable it on all other architectures in case libdw unwind 44# Disable it on all other architectures in case libdw unwind
45# support is detected in system. Add supported architectures 45# support is detected in system. Add supported architectures
46# to the check. 46# to the check.
47ifneq ($(ARCH),x86) 47ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
48 NO_LIBDW_DWARF_UNWIND := 1 48 NO_LIBDW_DWARF_UNWIND := 1
49endif 49endif
50 50
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 0d5afaf72944..5e0764b09317 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -115,7 +115,7 @@ static struct test {
115 .desc = "Test parsing with no sample_id_all bit set", 115 .desc = "Test parsing with no sample_id_all bit set",
116 .func = test__parse_no_sample_id_all, 116 .func = test__parse_no_sample_id_all,
117 }, 117 },
118#if defined(__x86_64__) || defined(__i386__) 118#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
119#ifdef HAVE_DWARF_UNWIND_SUPPORT 119#ifdef HAVE_DWARF_UNWIND_SUPPORT
120 { 120 {
121 .desc = "Test dwarf unwind", 121 .desc = "Test dwarf unwind",
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 4774f7fbb758..35d7fdb2328d 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -74,9 +74,6 @@ int test__perf_evsel__tp_sched_test(void)
74 if (perf_evsel__test_field(evsel, "prio", 4, true)) 74 if (perf_evsel__test_field(evsel, "prio", 4, true))
75 ret = -1; 75 ret = -1;
76 76
77 if (perf_evsel__test_field(evsel, "success", 4, true))
78 ret = -1;
79
80 if (perf_evsel__test_field(evsel, "target_cpu", 4, true)) 77 if (perf_evsel__test_field(evsel, "target_cpu", 4, true))
81 ret = -1; 78 ret = -1;
82 79
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index a9d7cb019f9e..8f91fb051ef1 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -45,7 +45,7 @@ int test__hists_filter(void);
45int test__mmap_thread_lookup(void); 45int test__mmap_thread_lookup(void);
46int test__thread_mg_share(void); 46int test__thread_mg_share(void);
47 47
48#if defined(__x86_64__) || defined(__i386__) 48#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
49#ifdef HAVE_DWARF_UNWIND_SUPPORT 49#ifdef HAVE_DWARF_UNWIND_SUPPORT
50struct thread; 50struct thread;
51struct perf_sample; 51struct perf_sample;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 55960f22233c..64a186edc7be 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1625,13 +1625,14 @@ out_delete_map:
1625void perf_session__fprintf_info(struct perf_session *session, FILE *fp, 1625void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
1626 bool full) 1626 bool full)
1627{ 1627{
1628 int fd = perf_data_file__fd(session->file);
1629 struct stat st; 1628 struct stat st;
1630 int ret; 1629 int fd, ret;
1631 1630
1632 if (session == NULL || fp == NULL) 1631 if (session == NULL || fp == NULL)
1633 return; 1632 return;
1634 1633
1634 fd = perf_data_file__fd(session->file);
1635
1635 ret = fstat(fd, &st); 1636 ret = fstat(fd, &st);
1636 if (ret == -1) 1637 if (ret == -1)
1637 return; 1638 return;