aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-07-22 12:05:48 -0400
committerPeter Zijlstra <a.p.zijlstra@chello.nl>2009-07-22 12:05:48 -0400
commit1d2f37945d1b3a14086c5ea802486778b635cf97 (patch)
treeb40a1a596a29acc1511f661c27f284dd06b0bc9d
parent1483b19f8f5e8ad0c8816de368b099322dad4db5 (diff)
parentf1c6a58121f9846ac665b0fbd3cbab90ce8bcbac (diff)
Merge commit 'tip/perfcounters/core' into perf-counters-for-linus
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c255
-rw-r--r--kernel/perf_counter.c36
-rw-r--r--tools/perf/Documentation/perf-report.txt15
-rw-r--r--tools/perf/builtin-report.c227
-rw-r--r--tools/perf/perf.h8
-rw-r--r--tools/perf/util/include/linux/kernel.h8
-rw-r--r--tools/perf/util/strlist.c20
-rw-r--r--tools/perf/util/strlist.h11
-rw-r--r--tools/perf/util/symbol.c117
-rw-r--r--tools/perf/util/symbol.h1
10 files changed, 556 insertions, 142 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 36c3dc7b8991..7e346d4bc0fb 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -66,6 +66,52 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
66}; 66};
67 67
68/* 68/*
69 * Not sure about some of these
70 */
71static const u64 p6_perfmon_event_map[] =
72{
73 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
74 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
75 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000,
76 [PERF_COUNT_HW_CACHE_MISSES] = 0x0000,
77 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
78 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
79 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
80};
81
82static u64 p6_pmu_event_map(int event)
83{
84 return p6_perfmon_event_map[event];
85}
86
87/*
88 * Counter setting that is specified not to count anything.
89 * We use this to effectively disable a counter.
90 *
91 * L2_RQSTS with 0 MESI unit mask.
92 */
93#define P6_NOP_COUNTER 0x0000002EULL
94
95static u64 p6_pmu_raw_event(u64 event)
96{
97#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
98#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
99#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
100#define P6_EVNTSEL_INV_MASK 0x00800000ULL
101#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL
102
103#define P6_EVNTSEL_MASK \
104 (P6_EVNTSEL_EVENT_MASK | \
105 P6_EVNTSEL_UNIT_MASK | \
106 P6_EVNTSEL_EDGE_MASK | \
107 P6_EVNTSEL_INV_MASK | \
108 P6_EVNTSEL_COUNTER_MASK)
109
110 return event & P6_EVNTSEL_MASK;
111}
112
113
114/*
69 * Intel PerfMon v3. Used on Core2 and later. 115 * Intel PerfMon v3. Used on Core2 and later.
70 */ 116 */
71static const u64 intel_perfmon_event_map[] = 117static const u64 intel_perfmon_event_map[] =
@@ -666,6 +712,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
666{ 712{
667 struct perf_counter_attr *attr = &counter->attr; 713 struct perf_counter_attr *attr = &counter->attr;
668 struct hw_perf_counter *hwc = &counter->hw; 714 struct hw_perf_counter *hwc = &counter->hw;
715 u64 config;
669 int err; 716 int err;
670 717
671 if (!x86_pmu_initialized()) 718 if (!x86_pmu_initialized())
@@ -718,14 +765,40 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
718 765
719 if (attr->config >= x86_pmu.max_events) 766 if (attr->config >= x86_pmu.max_events)
720 return -EINVAL; 767 return -EINVAL;
768
721 /* 769 /*
722 * The generic map: 770 * The generic map:
723 */ 771 */
724 hwc->config |= x86_pmu.event_map(attr->config); 772 config = x86_pmu.event_map(attr->config);
773
774 if (config == 0)
775 return -ENOENT;
776
777 if (config == -1LL)
778 return -EINVAL;
779
780 hwc->config |= config;
725 781
726 return 0; 782 return 0;
727} 783}
728 784
785static void p6_pmu_disable_all(void)
786{
787 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
788 u64 val;
789
790 if (!cpuc->enabled)
791 return;
792
793 cpuc->enabled = 0;
794 barrier();
795
796 /* p6 only has one enable register */
797 rdmsrl(MSR_P6_EVNTSEL0, val);
798 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
799 wrmsrl(MSR_P6_EVNTSEL0, val);
800}
801
729static void intel_pmu_disable_all(void) 802static void intel_pmu_disable_all(void)
730{ 803{
731 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 804 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -767,6 +840,23 @@ void hw_perf_disable(void)
767 return x86_pmu.disable_all(); 840 return x86_pmu.disable_all();
768} 841}
769 842
843static void p6_pmu_enable_all(void)
844{
845 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
846 unsigned long val;
847
848 if (cpuc->enabled)
849 return;
850
851 cpuc->enabled = 1;
852 barrier();
853
854 /* p6 only has one enable register */
855 rdmsrl(MSR_P6_EVNTSEL0, val);
856 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
857 wrmsrl(MSR_P6_EVNTSEL0, val);
858}
859
770static void intel_pmu_enable_all(void) 860static void intel_pmu_enable_all(void)
771{ 861{
772 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 862 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
@@ -784,13 +874,13 @@ static void amd_pmu_enable_all(void)
784 barrier(); 874 barrier();
785 875
786 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 876 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
877 struct perf_counter *counter = cpuc->counters[idx];
787 u64 val; 878 u64 val;
788 879
789 if (!test_bit(idx, cpuc->active_mask)) 880 if (!test_bit(idx, cpuc->active_mask))
790 continue; 881 continue;
791 rdmsrl(MSR_K7_EVNTSEL0 + idx, val); 882
792 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) 883 val = counter->hw.config;
793 continue;
794 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 884 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
795 wrmsrl(MSR_K7_EVNTSEL0 + idx, val); 885 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
796 } 886 }
@@ -819,16 +909,13 @@ static inline void intel_pmu_ack_status(u64 ack)
819 909
820static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 910static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
821{ 911{
822 int err; 912 (void)checking_wrmsrl(hwc->config_base + idx,
823 err = checking_wrmsrl(hwc->config_base + idx,
824 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); 913 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
825} 914}
826 915
827static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) 916static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
828{ 917{
829 int err; 918 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
830 err = checking_wrmsrl(hwc->config_base + idx,
831 hwc->config);
832} 919}
833 920
834static inline void 921static inline void
@@ -836,13 +923,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
836{ 923{
837 int idx = __idx - X86_PMC_IDX_FIXED; 924 int idx = __idx - X86_PMC_IDX_FIXED;
838 u64 ctrl_val, mask; 925 u64 ctrl_val, mask;
839 int err;
840 926
841 mask = 0xfULL << (idx * 4); 927 mask = 0xfULL << (idx * 4);
842 928
843 rdmsrl(hwc->config_base, ctrl_val); 929 rdmsrl(hwc->config_base, ctrl_val);
844 ctrl_val &= ~mask; 930 ctrl_val &= ~mask;
845 err = checking_wrmsrl(hwc->config_base, ctrl_val); 931 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
932}
933
934static inline void
935p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
936{
937 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
938 u64 val = P6_NOP_COUNTER;
939
940 if (cpuc->enabled)
941 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
942
943 (void)checking_wrmsrl(hwc->config_base + idx, val);
846} 944}
847 945
848static inline void 946static inline void
@@ -943,6 +1041,19 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
943 err = checking_wrmsrl(hwc->config_base, ctrl_val); 1041 err = checking_wrmsrl(hwc->config_base, ctrl_val);
944} 1042}
945 1043
1044static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1045{
1046 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1047 u64 val;
1048
1049 val = hwc->config;
1050 if (cpuc->enabled)
1051 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1052
1053 (void)checking_wrmsrl(hwc->config_base + idx, val);
1054}
1055
1056
946static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 1057static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
947{ 1058{
948 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 1059 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
@@ -959,8 +1070,6 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
959 1070
960 if (cpuc->enabled) 1071 if (cpuc->enabled)
961 x86_pmu_enable_counter(hwc, idx); 1072 x86_pmu_enable_counter(hwc, idx);
962 else
963 x86_pmu_disable_counter(hwc, idx);
964} 1073}
965 1074
966static int 1075static int
@@ -1176,6 +1285,49 @@ static void intel_pmu_reset(void)
1176 local_irq_restore(flags); 1285 local_irq_restore(flags);
1177} 1286}
1178 1287
1288static int p6_pmu_handle_irq(struct pt_regs *regs)
1289{
1290 struct perf_sample_data data;
1291 struct cpu_hw_counters *cpuc;
1292 struct perf_counter *counter;
1293 struct hw_perf_counter *hwc;
1294 int idx, handled = 0;
1295 u64 val;
1296
1297 data.regs = regs;
1298 data.addr = 0;
1299
1300 cpuc = &__get_cpu_var(cpu_hw_counters);
1301
1302 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1303 if (!test_bit(idx, cpuc->active_mask))
1304 continue;
1305
1306 counter = cpuc->counters[idx];
1307 hwc = &counter->hw;
1308
1309 val = x86_perf_counter_update(counter, hwc, idx);
1310 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
1311 continue;
1312
1313 /*
1314 * counter overflow
1315 */
1316 handled = 1;
1317 data.period = counter->hw.last_period;
1318
1319 if (!x86_perf_counter_set_period(counter, hwc, idx))
1320 continue;
1321
1322 if (perf_counter_overflow(counter, 1, &data))
1323 p6_pmu_disable_counter(hwc, idx);
1324 }
1325
1326 if (handled)
1327 inc_irq_stat(apic_perf_irqs);
1328
1329 return handled;
1330}
1179 1331
1180/* 1332/*
1181 * This handler is triggered by the local APIC, so the APIC IRQ handling 1333 * This handler is triggered by the local APIC, so the APIC IRQ handling
@@ -1185,14 +1337,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1185{ 1337{
1186 struct perf_sample_data data; 1338 struct perf_sample_data data;
1187 struct cpu_hw_counters *cpuc; 1339 struct cpu_hw_counters *cpuc;
1188 int bit, cpu, loops; 1340 int bit, loops;
1189 u64 ack, status; 1341 u64 ack, status;
1190 1342
1191 data.regs = regs; 1343 data.regs = regs;
1192 data.addr = 0; 1344 data.addr = 0;
1193 1345
1194 cpu = smp_processor_id(); 1346 cpuc = &__get_cpu_var(cpu_hw_counters);
1195 cpuc = &per_cpu(cpu_hw_counters, cpu);
1196 1347
1197 perf_disable(); 1348 perf_disable();
1198 status = intel_pmu_get_status(); 1349 status = intel_pmu_get_status();
@@ -1249,14 +1400,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1249 struct cpu_hw_counters *cpuc; 1400 struct cpu_hw_counters *cpuc;
1250 struct perf_counter *counter; 1401 struct perf_counter *counter;
1251 struct hw_perf_counter *hwc; 1402 struct hw_perf_counter *hwc;
1252 int cpu, idx, handled = 0; 1403 int idx, handled = 0;
1253 u64 val; 1404 u64 val;
1254 1405
1255 data.regs = regs; 1406 data.regs = regs;
1256 data.addr = 0; 1407 data.addr = 0;
1257 1408
1258 cpu = smp_processor_id(); 1409 cpuc = &__get_cpu_var(cpu_hw_counters);
1259 cpuc = &per_cpu(cpu_hw_counters, cpu);
1260 1410
1261 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1411 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1262 if (!test_bit(idx, cpuc->active_mask)) 1412 if (!test_bit(idx, cpuc->active_mask))
@@ -1353,6 +1503,32 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
1353 .priority = 1 1503 .priority = 1
1354}; 1504};
1355 1505
1506static struct x86_pmu p6_pmu = {
1507 .name = "p6",
1508 .handle_irq = p6_pmu_handle_irq,
1509 .disable_all = p6_pmu_disable_all,
1510 .enable_all = p6_pmu_enable_all,
1511 .enable = p6_pmu_enable_counter,
1512 .disable = p6_pmu_disable_counter,
1513 .eventsel = MSR_P6_EVNTSEL0,
1514 .perfctr = MSR_P6_PERFCTR0,
1515 .event_map = p6_pmu_event_map,
1516 .raw_event = p6_pmu_raw_event,
1517 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1518 .max_period = (1ULL << 31) - 1,
1519 .version = 0,
1520 .num_counters = 2,
1521 /*
1522 * Counters have 40 bits implemented. However they are designed such
1523 * that bits [32-39] are sign extensions of bit 31. As such the
1524 * effective width of a counter for P6-like PMU is 32 bits only.
1525 *
1526 * See IA-32 Intel Architecture Software developer manual Vol 3B
1527 */
1528 .counter_bits = 32,
1529 .counter_mask = (1ULL << 32) - 1,
1530};
1531
1356static struct x86_pmu intel_pmu = { 1532static struct x86_pmu intel_pmu = {
1357 .name = "Intel", 1533 .name = "Intel",
1358 .handle_irq = intel_pmu_handle_irq, 1534 .handle_irq = intel_pmu_handle_irq,
@@ -1392,6 +1568,39 @@ static struct x86_pmu amd_pmu = {
1392 .max_period = (1ULL << 47) - 1, 1568 .max_period = (1ULL << 47) - 1,
1393}; 1569};
1394 1570
1571static int p6_pmu_init(void)
1572{
1573 int high, low;
1574
1575 switch (boot_cpu_data.x86_model) {
1576 case 1:
1577 case 3: /* Pentium Pro */
1578 case 5:
1579 case 6: /* Pentium II */
1580 case 7:
1581 case 8:
1582 case 11: /* Pentium III */
1583 break;
1584 case 9:
1585 case 13:
1586 /* Pentium M */
1587 break;
1588 default:
1589 pr_cont("unsupported p6 CPU model %d ",
1590 boot_cpu_data.x86_model);
1591 return -ENODEV;
1592 }
1593
1594 if (!cpu_has_apic) {
1595 pr_info("no Local APIC, try rebooting with lapic");
1596 return -ENODEV;
1597 }
1598
1599 x86_pmu = p6_pmu;
1600
1601 return 0;
1602}
1603
1395static int intel_pmu_init(void) 1604static int intel_pmu_init(void)
1396{ 1605{
1397 union cpuid10_edx edx; 1606 union cpuid10_edx edx;
@@ -1400,8 +1609,14 @@ static int intel_pmu_init(void)
1400 unsigned int ebx; 1609 unsigned int ebx;
1401 int version; 1610 int version;
1402 1611
1403 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 1612 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1613 /* check for P6 processor family */
1614 if (boot_cpu_data.x86 == 6) {
1615 return p6_pmu_init();
1616 } else {
1404 return -ENODEV; 1617 return -ENODEV;
1618 }
1619 }
1405 1620
1406 /* 1621 /*
1407 * Check whether the Architectural PerfMon supports 1622 * Check whether the Architectural PerfMon supports
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index f7a8ab9576e4..5c6fae4f43d8 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -146,6 +146,14 @@ static void put_ctx(struct perf_counter_context *ctx)
146 } 146 }
147} 147}
148 148
149static void unclone_ctx(struct perf_counter_context *ctx)
150{
151 if (ctx->parent_ctx) {
152 put_ctx(ctx->parent_ctx);
153 ctx->parent_ctx = NULL;
154 }
155}
156
149/* 157/*
150 * Get the perf_counter_context for a task and lock it. 158 * Get the perf_counter_context for a task and lock it.
151 * This has to cope with with the fact that until it is locked, 159 * This has to cope with with the fact that until it is locked,
@@ -1463,10 +1471,8 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
1463 /* 1471 /*
1464 * Unclone this context if we enabled any counter. 1472 * Unclone this context if we enabled any counter.
1465 */ 1473 */
1466 if (enabled && ctx->parent_ctx) { 1474 if (enabled)
1467 put_ctx(ctx->parent_ctx); 1475 unclone_ctx(ctx);
1468 ctx->parent_ctx = NULL;
1469 }
1470 1476
1471 spin_unlock(&ctx->lock); 1477 spin_unlock(&ctx->lock);
1472 1478
@@ -1526,7 +1532,6 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
1526 1532
1527static struct perf_counter_context *find_get_context(pid_t pid, int cpu) 1533static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
1528{ 1534{
1529 struct perf_counter_context *parent_ctx;
1530 struct perf_counter_context *ctx; 1535 struct perf_counter_context *ctx;
1531 struct perf_cpu_context *cpuctx; 1536 struct perf_cpu_context *cpuctx;
1532 struct task_struct *task; 1537 struct task_struct *task;
@@ -1586,11 +1591,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
1586 retry: 1591 retry:
1587 ctx = perf_lock_task_context(task, &flags); 1592 ctx = perf_lock_task_context(task, &flags);
1588 if (ctx) { 1593 if (ctx) {
1589 parent_ctx = ctx->parent_ctx; 1594 unclone_ctx(ctx);
1590 if (parent_ctx) {
1591 put_ctx(parent_ctx);
1592 ctx->parent_ctx = NULL; /* no longer a clone */
1593 }
1594 spin_unlock_irqrestore(&ctx->lock, flags); 1595 spin_unlock_irqrestore(&ctx->lock, flags);
1595 } 1596 }
1596 1597
@@ -4262,15 +4263,12 @@ void perf_counter_exit_task(struct task_struct *child)
4262 */ 4263 */
4263 spin_lock(&child_ctx->lock); 4264 spin_lock(&child_ctx->lock);
4264 child->perf_counter_ctxp = NULL; 4265 child->perf_counter_ctxp = NULL;
4265 if (child_ctx->parent_ctx) { 4266 /*
4266 /* 4267 * If this context is a clone; unclone it so it can't get
4267 * This context is a clone; unclone it so it can't get 4268 * swapped to another process while we're removing all
4268 * swapped to another process while we're removing all 4269 * the counters from it.
4269 * the counters from it. 4270 */
4270 */ 4271 unclone_ctx(child_ctx);
4271 put_ctx(child_ctx->parent_ctx);
4272 child_ctx->parent_ctx = NULL;
4273 }
4274 spin_unlock(&child_ctx->lock); 4272 spin_unlock(&child_ctx->lock);
4275 local_irq_restore(flags); 4273 local_irq_restore(flags);
4276 4274
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 8aa3f8c88707..e72e93110782 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -24,6 +24,9 @@ OPTIONS
24--dsos=:: 24--dsos=::
25 Only consider symbols in these dsos. CSV that understands 25 Only consider symbols in these dsos. CSV that understands
26 file://filename entries. 26 file://filename entries.
27-n
28--show-nr-samples
29 Show the number of samples for each symbol
27-C:: 30-C::
28--comms=:: 31--comms=::
29 Only consider symbols in these comms. CSV that understands 32 Only consider symbols in these comms. CSV that understands
@@ -33,6 +36,18 @@ OPTIONS
33 Only consider these symbols. CSV that understands 36 Only consider these symbols. CSV that understands
34 file://filename entries. 37 file://filename entries.
35 38
39-w::
40--field-width=::
41 Force each column width to the provided list, for large terminal
42 readability.
43
44-t::
45--field-separator=::
46
47 Use a special separator character and don't pad with spaces, replacing
48 all occurances of this separator in symbol names (and other output)
49 with a '.' character, that thus it's the only non valid separator.
50
36SEE ALSO 51SEE ALSO
37-------- 52--------
38linkperf:perf-stat[1] 53linkperf:perf-stat[1]
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 4b980cce7055..a118bc77286d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -33,8 +33,10 @@ static char *vmlinux = NULL;
33 33
34static char default_sort_order[] = "comm,dso"; 34static char default_sort_order[] = "comm,dso";
35static char *sort_order = default_sort_order; 35static char *sort_order = default_sort_order;
36static char *dso_list_str, *comm_list_str, *sym_list_str; 36static char *dso_list_str, *comm_list_str, *sym_list_str,
37 *col_width_list_str;
37static struct strlist *dso_list, *comm_list, *sym_list; 38static struct strlist *dso_list, *comm_list, *sym_list;
39static char *field_sep;
38 40
39static int input; 41static int input;
40static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; 42static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
@@ -49,6 +51,7 @@ static int verbose;
49static int modules; 51static int modules;
50 52
51static int full_paths; 53static int full_paths;
54static int show_nr_samples;
52 55
53static unsigned long page_size; 56static unsigned long page_size;
54static unsigned long mmap_window = 32; 57static unsigned long mmap_window = 32;
@@ -129,6 +132,33 @@ typedef union event_union {
129 struct read_event read; 132 struct read_event read;
130} event_t; 133} event_t;
131 134
135static int repsep_fprintf(FILE *fp, const char *fmt, ...)
136{
137 int n;
138 va_list ap;
139
140 va_start(ap, fmt);
141 if (!field_sep)
142 n = vfprintf(fp, fmt, ap);
143 else {
144 char *bf = NULL;
145 n = vasprintf(&bf, fmt, ap);
146 if (n > 0) {
147 char *sep = bf;
148 while (1) {
149 sep = strchr(sep, *field_sep);
150 if (sep == NULL)
151 break;
152 *sep = '.';
153 }
154 }
155 fputs(bf, fp);
156 free(bf);
157 }
158 va_end(ap);
159 return n;
160}
161
132static LIST_HEAD(dsos); 162static LIST_HEAD(dsos);
133static struct dso *kernel_dso; 163static struct dso *kernel_dso;
134static struct dso *vdso; 164static struct dso *vdso;
@@ -360,12 +390,28 @@ static struct thread *thread__new(pid_t pid)
360 return self; 390 return self;
361} 391}
362 392
393static unsigned int dsos__col_width,
394 comms__col_width,
395 threads__col_width;
396
363static int thread__set_comm(struct thread *self, const char *comm) 397static int thread__set_comm(struct thread *self, const char *comm)
364{ 398{
365 if (self->comm) 399 if (self->comm)
366 free(self->comm); 400 free(self->comm);
367 self->comm = strdup(comm); 401 self->comm = strdup(comm);
368 return self->comm ? 0 : -ENOMEM; 402 if (!self->comm)
403 return -ENOMEM;
404
405 if (!col_width_list_str && !field_sep &&
406 (!comm_list || strlist__has_entry(comm_list, comm))) {
407 unsigned int slen = strlen(comm);
408 if (slen > comms__col_width) {
409 comms__col_width = slen;
410 threads__col_width = slen + 6;
411 }
412 }
413
414 return 0;
369} 415}
370 416
371static size_t thread__fprintf(struct thread *self, FILE *fp) 417static size_t thread__fprintf(struct thread *self, FILE *fp)
@@ -536,7 +582,9 @@ struct sort_entry {
536 582
537 int64_t (*cmp)(struct hist_entry *, struct hist_entry *); 583 int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
538 int64_t (*collapse)(struct hist_entry *, struct hist_entry *); 584 int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
539 size_t (*print)(FILE *fp, struct hist_entry *); 585 size_t (*print)(FILE *fp, struct hist_entry *, unsigned int width);
586 unsigned int *width;
587 bool elide;
540}; 588};
541 589
542static int64_t cmp_null(void *l, void *r) 590static int64_t cmp_null(void *l, void *r)
@@ -558,15 +606,17 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
558} 606}
559 607
560static size_t 608static size_t
561sort__thread_print(FILE *fp, struct hist_entry *self) 609sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width)
562{ 610{
563 return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid); 611 return repsep_fprintf(fp, "%*s:%5d", width - 6,
612 self->thread->comm ?: "", self->thread->pid);
564} 613}
565 614
566static struct sort_entry sort_thread = { 615static struct sort_entry sort_thread = {
567 .header = " Command: Pid", 616 .header = "Command: Pid",
568 .cmp = sort__thread_cmp, 617 .cmp = sort__thread_cmp,
569 .print = sort__thread_print, 618 .print = sort__thread_print,
619 .width = &threads__col_width,
570}; 620};
571 621
572/* --sort comm */ 622/* --sort comm */
@@ -590,16 +640,17 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
590} 640}
591 641
592static size_t 642static size_t
593sort__comm_print(FILE *fp, struct hist_entry *self) 643sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width)
594{ 644{
595 return fprintf(fp, "%16s", self->thread->comm); 645 return repsep_fprintf(fp, "%*s", width, self->thread->comm);
596} 646}
597 647
598static struct sort_entry sort_comm = { 648static struct sort_entry sort_comm = {
599 .header = " Command", 649 .header = "Command",
600 .cmp = sort__comm_cmp, 650 .cmp = sort__comm_cmp,
601 .collapse = sort__comm_collapse, 651 .collapse = sort__comm_collapse,
602 .print = sort__comm_print, 652 .print = sort__comm_print,
653 .width = &comms__col_width,
603}; 654};
604 655
605/* --sort dso */ 656/* --sort dso */
@@ -617,18 +668,19 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
617} 668}
618 669
619static size_t 670static size_t
620sort__dso_print(FILE *fp, struct hist_entry *self) 671sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width)
621{ 672{
622 if (self->dso) 673 if (self->dso)
623 return fprintf(fp, "%-25s", self->dso->name); 674 return repsep_fprintf(fp, "%-*s", width, self->dso->name);
624 675
625 return fprintf(fp, "%016llx ", (u64)self->ip); 676 return repsep_fprintf(fp, "%*llx", width, (u64)self->ip);
626} 677}
627 678
628static struct sort_entry sort_dso = { 679static struct sort_entry sort_dso = {
629 .header = "Shared Object ", 680 .header = "Shared Object",
630 .cmp = sort__dso_cmp, 681 .cmp = sort__dso_cmp,
631 .print = sort__dso_print, 682 .print = sort__dso_print,
683 .width = &dsos__col_width,
632}; 684};
633 685
634/* --sort symbol */ 686/* --sort symbol */
@@ -648,22 +700,22 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
648} 700}
649 701
650static size_t 702static size_t
651sort__sym_print(FILE *fp, struct hist_entry *self) 703sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used)
652{ 704{
653 size_t ret = 0; 705 size_t ret = 0;
654 706
655 if (verbose) 707 if (verbose)
656 ret += fprintf(fp, "%#018llx ", (u64)self->ip); 708 ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip);
657 709
710 ret += repsep_fprintf(fp, "[%c] ", self->level);
658 if (self->sym) { 711 if (self->sym) {
659 ret += fprintf(fp, "[%c] %s", 712 ret += repsep_fprintf(fp, "%s", self->sym->name);
660 self->dso == kernel_dso ? 'k' :
661 self->dso == hypervisor_dso ? 'h' : '.', self->sym->name);
662 713
663 if (self->sym->module) 714 if (self->sym->module)
664 ret += fprintf(fp, "\t[%s]", self->sym->module->name); 715 ret += repsep_fprintf(fp, "\t[%s]",
716 self->sym->module->name);
665 } else { 717 } else {
666 ret += fprintf(fp, "%#016llx", (u64)self->ip); 718 ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip);
667 } 719 }
668 720
669 return ret; 721 return ret;
@@ -690,19 +742,19 @@ sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
690} 742}
691 743
692static size_t 744static size_t
693sort__parent_print(FILE *fp, struct hist_entry *self) 745sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width)
694{ 746{
695 size_t ret = 0; 747 return repsep_fprintf(fp, "%-*s", width,
696 748 self->parent ? self->parent->name : "[other]");
697 ret += fprintf(fp, "%-20s", self->parent ? self->parent->name : "[other]");
698
699 return ret;
700} 749}
701 750
751static unsigned int parent_symbol__col_width;
752
702static struct sort_entry sort_parent = { 753static struct sort_entry sort_parent = {
703 .header = "Parent symbol ", 754 .header = "Parent symbol",
704 .cmp = sort__parent_cmp, 755 .cmp = sort__parent_cmp,
705 .print = sort__parent_print, 756 .print = sort__parent_print,
757 .width = &parent_symbol__col_width,
706}; 758};
707 759
708static int sort__need_collapse = 0; 760static int sort__need_collapse = 0;
@@ -967,17 +1019,25 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples)
967 return 0; 1019 return 0;
968 1020
969 if (total_samples) 1021 if (total_samples)
970 ret = percent_color_fprintf(fp, " %6.2f%%", 1022 ret = percent_color_fprintf(fp,
971 (self->count * 100.0) / total_samples); 1023 field_sep ? "%.2f" : " %6.2f%%",
1024 (self->count * 100.0) / total_samples);
972 else 1025 else
973 ret = fprintf(fp, "%12Ld ", self->count); 1026 ret = fprintf(fp, field_sep ? "%lld" : "%12lld ", self->count);
1027
1028 if (show_nr_samples) {
1029 if (field_sep)
1030 fprintf(fp, "%c%lld", *field_sep, self->count);
1031 else
1032 fprintf(fp, "%11lld", self->count);
1033 }
974 1034
975 list_for_each_entry(se, &hist_entry__sort_list, list) { 1035 list_for_each_entry(se, &hist_entry__sort_list, list) {
976 if (exclude_other && (se == &sort_parent)) 1036 if (se->elide)
977 continue; 1037 continue;
978 1038
979 fprintf(fp, " "); 1039 fprintf(fp, "%s", field_sep ?: " ");
980 ret += se->print(fp, self); 1040 ret += se->print(fp, self, se->width ? *se->width : 0);
981 } 1041 }
982 1042
983 ret += fprintf(fp, "\n"); 1043 ret += fprintf(fp, "\n");
@@ -992,6 +1052,18 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples)
992 * 1052 *
993 */ 1053 */
994 1054
1055static void dso__calc_col_width(struct dso *self)
1056{
1057 if (!col_width_list_str && !field_sep &&
1058 (!dso_list || strlist__has_entry(dso_list, self->name))) {
1059 unsigned int slen = strlen(self->name);
1060 if (slen > dsos__col_width)
1061 dsos__col_width = slen;
1062 }
1063
1064 self->slen_calculated = 1;
1065}
1066
995static struct symbol * 1067static struct symbol *
996resolve_symbol(struct thread *thread, struct map **mapp, 1068resolve_symbol(struct thread *thread, struct map **mapp,
997 struct dso **dsop, u64 *ipp) 1069 struct dso **dsop, u64 *ipp)
@@ -1011,6 +1083,14 @@ resolve_symbol(struct thread *thread, struct map **mapp,
1011 1083
1012 map = thread__find_map(thread, ip); 1084 map = thread__find_map(thread, ip);
1013 if (map != NULL) { 1085 if (map != NULL) {
1086 /*
1087 * We have to do this here as we may have a dso
1088 * with no symbol hit that has a name longer than
1089 * the ones with symbols sampled.
1090 */
1091 if (!sort_dso.elide && !map->dso->slen_calculated)
1092 dso__calc_col_width(map->dso);
1093
1014 if (mapp) 1094 if (mapp)
1015 *mapp = map; 1095 *mapp = map;
1016got_map: 1096got_map:
@@ -1282,35 +1362,67 @@ static size_t output__fprintf(FILE *fp, u64 total_samples)
1282 struct sort_entry *se; 1362 struct sort_entry *se;
1283 struct rb_node *nd; 1363 struct rb_node *nd;
1284 size_t ret = 0; 1364 size_t ret = 0;
1365 unsigned int width;
1366 char *col_width = col_width_list_str;
1285 1367
1286 fprintf(fp, "\n"); 1368 fprintf(fp, "# Samples: %Ld\n", (u64)total_samples);
1287 fprintf(fp, "#\n");
1288 fprintf(fp, "# (%Ld samples)\n", (u64)total_samples);
1289 fprintf(fp, "#\n"); 1369 fprintf(fp, "#\n");
1290 1370
1291 fprintf(fp, "# Overhead"); 1371 fprintf(fp, "# Overhead");
1372 if (show_nr_samples) {
1373 if (field_sep)
1374 fprintf(fp, "%cSamples", *field_sep);
1375 else
1376 fputs(" Samples ", fp);
1377 }
1292 list_for_each_entry(se, &hist_entry__sort_list, list) { 1378 list_for_each_entry(se, &hist_entry__sort_list, list) {
1293 if (exclude_other && (se == &sort_parent)) 1379 if (se->elide)
1380 continue;
1381 if (field_sep) {
1382 fprintf(fp, "%c%s", *field_sep, se->header);
1294 continue; 1383 continue;
1295 fprintf(fp, " %s", se->header); 1384 }
1385 width = strlen(se->header);
1386 if (se->width) {
1387 if (col_width_list_str) {
1388 if (col_width) {
1389 *se->width = atoi(col_width);
1390 col_width = strchr(col_width, ',');
1391 if (col_width)
1392 ++col_width;
1393 }
1394 }
1395 width = *se->width = max(*se->width, width);
1396 }
1397 fprintf(fp, " %*s", width, se->header);
1296 } 1398 }
1297 fprintf(fp, "\n"); 1399 fprintf(fp, "\n");
1298 1400
1401 if (field_sep)
1402 goto print_entries;
1403
1299 fprintf(fp, "# ........"); 1404 fprintf(fp, "# ........");
1405 if (show_nr_samples)
1406 fprintf(fp, " ..........");
1300 list_for_each_entry(se, &hist_entry__sort_list, list) { 1407 list_for_each_entry(se, &hist_entry__sort_list, list) {
1301 unsigned int i; 1408 unsigned int i;
1302 1409
1303 if (exclude_other && (se == &sort_parent)) 1410 if (se->elide)
1304 continue; 1411 continue;
1305 1412
1306 fprintf(fp, " "); 1413 fprintf(fp, " ");
1307 for (i = 0; i < strlen(se->header); i++) 1414 if (se->width)
1415 width = *se->width;
1416 else
1417 width = strlen(se->header);
1418 for (i = 0; i < width; i++)
1308 fprintf(fp, "."); 1419 fprintf(fp, ".");
1309 } 1420 }
1310 fprintf(fp, "\n"); 1421 fprintf(fp, "\n");
1311 1422
1312 fprintf(fp, "#\n"); 1423 fprintf(fp, "#\n");
1313 1424
1425print_entries:
1314 for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) { 1426 for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
1315 pos = rb_entry(nd, struct hist_entry, rb_node); 1427 pos = rb_entry(nd, struct hist_entry, rb_node);
1316 ret += hist_entry__fprintf(fp, pos, total_samples); 1428 ret += hist_entry__fprintf(fp, pos, total_samples);
@@ -1883,6 +1995,8 @@ static const struct option options[] = {
1883 OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), 1995 OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
1884 OPT_BOOLEAN('m', "modules", &modules, 1996 OPT_BOOLEAN('m', "modules", &modules,
1885 "load module symbols - WARNING: use only with -k and LIVE kernel"), 1997 "load module symbols - WARNING: use only with -k and LIVE kernel"),
1998 OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples,
1999 "Show a column with the number of samples"),
1886 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 2000 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
1887 "sort by key(s): pid, comm, dso, symbol, parent"), 2001 "sort by key(s): pid, comm, dso, symbol, parent"),
1888 OPT_BOOLEAN('P', "full-paths", &full_paths, 2002 OPT_BOOLEAN('P', "full-paths", &full_paths,
@@ -1900,6 +2014,12 @@ static const struct option options[] = {
1900 "only consider symbols in these comms"), 2014 "only consider symbols in these comms"),
1901 OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]", 2015 OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]",
1902 "only consider these symbols"), 2016 "only consider these symbols"),
2017 OPT_STRING('w', "column-widths", &col_width_list_str,
2018 "width[,width...]",
2019 "don't try to adjust column width, use these fixed values"),
2020 OPT_STRING('t', "field-separator", &field_sep, "separator",
2021 "separator for columns, no spaces will be added between "
2022 "columns '.' is reserved."),
1903 OPT_END() 2023 OPT_END()
1904}; 2024};
1905 2025
@@ -1919,7 +2039,8 @@ static void setup_sorting(void)
1919} 2039}
1920 2040
1921static void setup_list(struct strlist **list, const char *list_str, 2041static void setup_list(struct strlist **list, const char *list_str,
1922 const char *list_name) 2042 struct sort_entry *se, const char *list_name,
2043 FILE *fp)
1923{ 2044{
1924 if (list_str) { 2045 if (list_str) {
1925 *list = strlist__new(true, list_str); 2046 *list = strlist__new(true, list_str);
@@ -1928,6 +2049,11 @@ static void setup_list(struct strlist **list, const char *list_str,
1928 list_name); 2049 list_name);
1929 exit(129); 2050 exit(129);
1930 } 2051 }
2052 if (strlist__nr_entries(*list) == 1) {
2053 fprintf(fp, "# %s: %s\n", list_name,
2054 strlist__entry(*list, 0)->s);
2055 se->elide = true;
2056 }
1931 } 2057 }
1932} 2058}
1933 2059
@@ -1941,9 +2067,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
1941 2067
1942 setup_sorting(); 2068 setup_sorting();
1943 2069
1944 if (parent_pattern != default_parent_pattern) 2070 if (parent_pattern != default_parent_pattern) {
1945 sort_dimension__add("parent"); 2071 sort_dimension__add("parent");
1946 else 2072 sort_parent.elide = 1;
2073 } else
1947 exclude_other = 0; 2074 exclude_other = 0;
1948 2075
1949 /* 2076 /*
@@ -1952,11 +2079,17 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
1952 if (argc) 2079 if (argc)
1953 usage_with_options(report_usage, options); 2080 usage_with_options(report_usage, options);
1954 2081
1955 setup_list(&dso_list, dso_list_str, "dso");
1956 setup_list(&comm_list, comm_list_str, "comm");
1957 setup_list(&sym_list, sym_list_str, "symbol");
1958
1959 setup_pager(); 2082 setup_pager();
1960 2083
2084 setup_list(&dso_list, dso_list_str, &sort_dso, "dso", stdout);
2085 setup_list(&comm_list, comm_list_str, &sort_comm, "comm", stdout);
2086 setup_list(&sym_list, sym_list_str, &sort_sym, "symbol", stdout);
2087
2088 if (field_sep && *field_sep == '.') {
2089 fputs("'.' is the only non valid --field-separator argument\n",
2090 stderr);
2091 exit(129);
2092 }
2093
1961 return __cmd_report(); 2094 return __cmd_report();
1962} 2095}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 27887c916439..53bb9550def9 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -1,7 +1,13 @@
1#ifndef _PERF_PERF_H 1#ifndef _PERF_PERF_H
2#define _PERF_PERF_H 2#define _PERF_PERF_H
3 3
4#if defined(__x86_64__) || defined(__i386__) 4#if defined(__i386__)
5#include "../../arch/x86/include/asm/unistd.h"
6#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
7#define cpu_relax() asm volatile("rep; nop" ::: "memory");
8#endif
9
10#if defined(__x86_64__)
5#include "../../arch/x86/include/asm/unistd.h" 11#include "../../arch/x86/include/asm/unistd.h"
6#define rmb() asm volatile("lfence" ::: "memory") 12#define rmb() asm volatile("lfence" ::: "memory")
7#define cpu_relax() asm volatile("rep; nop" ::: "memory"); 13#define cpu_relax() asm volatile("rep; nop" ::: "memory");
diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h
index 99c1b3d1edd9..a6b87390cb52 100644
--- a/tools/perf/util/include/linux/kernel.h
+++ b/tools/perf/util/include/linux/kernel.h
@@ -18,4 +18,12 @@
18 (type *)((char *)__mptr - offsetof(type, member)); }) 18 (type *)((char *)__mptr - offsetof(type, member)); })
19#endif 19#endif
20 20
21#ifndef max
22#define max(x, y) ({ \
23 typeof(x) _max1 = (x); \
24 typeof(y) _max2 = (y); \
25 (void) (&_max1 == &_max2); \
26 _max1 > _max2 ? _max1 : _max2; })
27#endif
28
21#endif 29#endif
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
index 025a78edfffe..7ad38171dc2b 100644
--- a/tools/perf/util/strlist.c
+++ b/tools/perf/util/strlist.c
@@ -64,6 +64,7 @@ int strlist__add(struct strlist *self, const char *new_entry)
64 64
65 rb_link_node(&sn->rb_node, parent, p); 65 rb_link_node(&sn->rb_node, parent, p);
66 rb_insert_color(&sn->rb_node, &self->entries); 66 rb_insert_color(&sn->rb_node, &self->entries);
67 ++self->nr_entries;
67 68
68 return 0; 69 return 0;
69} 70}
@@ -155,8 +156,9 @@ struct strlist *strlist__new(bool dupstr, const char *slist)
155 struct strlist *self = malloc(sizeof(*self)); 156 struct strlist *self = malloc(sizeof(*self));
156 157
157 if (self != NULL) { 158 if (self != NULL) {
158 self->entries = RB_ROOT; 159 self->entries = RB_ROOT;
159 self->dupstr = dupstr; 160 self->dupstr = dupstr;
161 self->nr_entries = 0;
160 if (slist && strlist__parse_list(self, slist) != 0) 162 if (slist && strlist__parse_list(self, slist) != 0)
161 goto out_error; 163 goto out_error;
162 } 164 }
@@ -182,3 +184,17 @@ void strlist__delete(struct strlist *self)
182 free(self); 184 free(self);
183 } 185 }
184} 186}
187
188struct str_node *strlist__entry(const struct strlist *self, unsigned int idx)
189{
190 struct rb_node *nd;
191
192 for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
193 struct str_node *pos = rb_entry(nd, struct str_node, rb_node);
194
195 if (!idx--)
196 return pos;
197 }
198
199 return NULL;
200}
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
index 2fdcfee87586..921818e44a54 100644
--- a/tools/perf/util/strlist.h
+++ b/tools/perf/util/strlist.h
@@ -11,7 +11,8 @@ struct str_node {
11 11
12struct strlist { 12struct strlist {
13 struct rb_root entries; 13 struct rb_root entries;
14 bool dupstr; 14 unsigned int nr_entries;
15 bool dupstr;
15}; 16};
16 17
17struct strlist *strlist__new(bool dupstr, const char *slist); 18struct strlist *strlist__new(bool dupstr, const char *slist);
@@ -21,11 +22,17 @@ void strlist__remove(struct strlist *self, struct str_node *sn);
21int strlist__load(struct strlist *self, const char *filename); 22int strlist__load(struct strlist *self, const char *filename);
22int strlist__add(struct strlist *self, const char *str); 23int strlist__add(struct strlist *self, const char *str);
23 24
25struct str_node *strlist__entry(const struct strlist *self, unsigned int idx);
24bool strlist__has_entry(struct strlist *self, const char *entry); 26bool strlist__has_entry(struct strlist *self, const char *entry);
25 27
26static inline bool strlist__empty(const struct strlist *self) 28static inline bool strlist__empty(const struct strlist *self)
27{ 29{
28 return rb_first(&self->entries) == NULL; 30 return self->nr_entries == 0;
31}
32
33static inline unsigned int strlist__nr_entries(const struct strlist *self)
34{
35 return self->nr_entries;
29} 36}
30 37
31int strlist__parse_list(struct strlist *self, const char *s); 38int strlist__parse_list(struct strlist *self, const char *s);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 4683b67b5ee4..f40266b4845d 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -65,6 +65,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size)
65 self->syms = RB_ROOT; 65 self->syms = RB_ROOT;
66 self->sym_priv_size = sym_priv_size; 66 self->sym_priv_size = sym_priv_size;
67 self->find_symbol = dso__find_symbol; 67 self->find_symbol = dso__find_symbol;
68 self->slen_calculated = 0;
68 } 69 }
69 70
70 return self; 71 return self;
@@ -373,36 +374,61 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
373 idx < nr_entries; \ 374 idx < nr_entries; \
374 ++idx, pos = gelf_getrela(reldata, idx, &pos_mem)) 375 ++idx, pos = gelf_getrela(reldata, idx, &pos_mem))
375 376
376static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, 377/*
377 GElf_Ehdr *ehdr, Elf_Scn *scn_dynsym, 378 * We need to check if we have a .dynsym, so that we can handle the
378 GElf_Shdr *shdr_dynsym, 379 * .plt, synthesizing its symbols, that aren't on the symtabs (be it
379 size_t dynsym_idx, int verbose) 380 * .dynsym or .symtab).
381 * And always look at the original dso, not at debuginfo packages, that
382 * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
383 */
384static int dso__synthesize_plt_symbols(struct dso *self, int verbose)
380{ 385{
381 uint32_t nr_rel_entries, idx; 386 uint32_t nr_rel_entries, idx;
382 GElf_Sym sym; 387 GElf_Sym sym;
383 u64 plt_offset; 388 u64 plt_offset;
384 GElf_Shdr shdr_plt; 389 GElf_Shdr shdr_plt;
385 struct symbol *f; 390 struct symbol *f;
386 GElf_Shdr shdr_rel_plt; 391 GElf_Shdr shdr_rel_plt, shdr_dynsym;
387 Elf_Data *reldata, *syms, *symstrs; 392 Elf_Data *reldata, *syms, *symstrs;
388 Elf_Scn *scn_plt_rel, *scn_symstrs; 393 Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym;
394 size_t dynsym_idx;
395 GElf_Ehdr ehdr;
389 char sympltname[1024]; 396 char sympltname[1024];
390 int nr = 0, symidx; 397 Elf *elf;
398 int nr = 0, symidx, fd, err = 0;
399
400 fd = open(self->name, O_RDONLY);
401 if (fd < 0)
402 goto out;
403
404 elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
405 if (elf == NULL)
406 goto out_close;
407
408 if (gelf_getehdr(elf, &ehdr) == NULL)
409 goto out_elf_end;
410
411 scn_dynsym = elf_section_by_name(elf, &ehdr, &shdr_dynsym,
412 ".dynsym", &dynsym_idx);
413 if (scn_dynsym == NULL)
414 goto out_elf_end;
391 415
392 scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt, 416 scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt,
393 ".rela.plt", NULL); 417 ".rela.plt", NULL);
394 if (scn_plt_rel == NULL) { 418 if (scn_plt_rel == NULL) {
395 scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt, 419 scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt,
396 ".rel.plt", NULL); 420 ".rel.plt", NULL);
397 if (scn_plt_rel == NULL) 421 if (scn_plt_rel == NULL)
398 return 0; 422 goto out_elf_end;
399 } 423 }
400 424
425 err = -1;
426
401 if (shdr_rel_plt.sh_link != dynsym_idx) 427 if (shdr_rel_plt.sh_link != dynsym_idx)
402 return 0; 428 goto out_elf_end;
403 429
404 if (elf_section_by_name(elf, ehdr, &shdr_plt, ".plt", NULL) == NULL) 430 if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL)
405 return 0; 431 goto out_elf_end;
406 432
407 /* 433 /*
408 * Fetch the relocation section to find the indexes to the GOT 434 * Fetch the relocation section to find the indexes to the GOT
@@ -410,19 +436,19 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf,
410 */ 436 */
411 reldata = elf_getdata(scn_plt_rel, NULL); 437 reldata = elf_getdata(scn_plt_rel, NULL);
412 if (reldata == NULL) 438 if (reldata == NULL)
413 return -1; 439 goto out_elf_end;
414 440
415 syms = elf_getdata(scn_dynsym, NULL); 441 syms = elf_getdata(scn_dynsym, NULL);
416 if (syms == NULL) 442 if (syms == NULL)
417 return -1; 443 goto out_elf_end;
418 444
419 scn_symstrs = elf_getscn(elf, shdr_dynsym->sh_link); 445 scn_symstrs = elf_getscn(elf, shdr_dynsym.sh_link);
420 if (scn_symstrs == NULL) 446 if (scn_symstrs == NULL)
421 return -1; 447 goto out_elf_end;
422 448
423 symstrs = elf_getdata(scn_symstrs, NULL); 449 symstrs = elf_getdata(scn_symstrs, NULL);
424 if (symstrs == NULL) 450 if (symstrs == NULL)
425 return -1; 451 goto out_elf_end;
426 452
427 nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; 453 nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
428 plt_offset = shdr_plt.sh_offset; 454 plt_offset = shdr_plt.sh_offset;
@@ -441,7 +467,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf,
441 f = symbol__new(plt_offset, shdr_plt.sh_entsize, 467 f = symbol__new(plt_offset, shdr_plt.sh_entsize,
442 sympltname, self->sym_priv_size, 0, verbose); 468 sympltname, self->sym_priv_size, 0, verbose);
443 if (!f) 469 if (!f)
444 return -1; 470 goto out_elf_end;
445 471
446 dso__insert_symbol(self, f); 472 dso__insert_symbol(self, f);
447 ++nr; 473 ++nr;
@@ -459,19 +485,25 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf,
459 f = symbol__new(plt_offset, shdr_plt.sh_entsize, 485 f = symbol__new(plt_offset, shdr_plt.sh_entsize,
460 sympltname, self->sym_priv_size, 0, verbose); 486 sympltname, self->sym_priv_size, 0, verbose);
461 if (!f) 487 if (!f)
462 return -1; 488 goto out_elf_end;
463 489
464 dso__insert_symbol(self, f); 490 dso__insert_symbol(self, f);
465 ++nr; 491 ++nr;
466 } 492 }
467 } else {
468 /*
469 * TODO: There are still one more shdr_rel_plt.sh_type
470 * I have to investigate, but probably should be ignored.
471 */
472 } 493 }
473 494
474 return nr; 495 err = 0;
496out_elf_end:
497 elf_end(elf);
498out_close:
499 close(fd);
500
501 if (err == 0)
502 return nr;
503out:
504 fprintf(stderr, "%s: problems reading %s PLT info.\n",
505 __func__, self->name);
506 return 0;
475} 507}
476 508
477static int dso__load_sym(struct dso *self, int fd, const char *name, 509static int dso__load_sym(struct dso *self, int fd, const char *name,
@@ -485,9 +517,8 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
485 GElf_Shdr shdr; 517 GElf_Shdr shdr;
486 Elf_Data *syms; 518 Elf_Data *syms;
487 GElf_Sym sym; 519 GElf_Sym sym;
488 Elf_Scn *sec, *sec_dynsym, *sec_strndx; 520 Elf_Scn *sec, *sec_strndx;
489 Elf *elf; 521 Elf *elf;
490 size_t dynsym_idx;
491 int nr = 0; 522 int nr = 0;
492 523
493 elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); 524 elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
@@ -504,32 +535,11 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
504 goto out_elf_end; 535 goto out_elf_end;
505 } 536 }
506 537
507 /*
508 * We need to check if we have a .dynsym, so that we can handle the
509 * .plt, synthesizing its symbols, that aren't on the symtabs (be it
510 * .dynsym or .symtab)
511 */
512 sec_dynsym = elf_section_by_name(elf, &ehdr, &shdr,
513 ".dynsym", &dynsym_idx);
514 if (sec_dynsym != NULL) {
515 nr = dso__synthesize_plt_symbols(self, elf, &ehdr,
516 sec_dynsym, &shdr,
517 dynsym_idx, verbose);
518 if (nr < 0)
519 goto out_elf_end;
520 }
521
522 /*
523 * But if we have a full .symtab (that is a superset of .dynsym) we
524 * should add the symbols not in the .dynsyn
525 */
526 sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL); 538 sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL);
527 if (sec == NULL) { 539 if (sec == NULL) {
528 if (sec_dynsym == NULL) 540 sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL);
541 if (sec == NULL)
529 goto out_elf_end; 542 goto out_elf_end;
530
531 sec = sec_dynsym;
532 gelf_getshdr(sec, &shdr);
533 } 543 }
534 544
535 syms = elf_getdata(sec, NULL); 545 syms = elf_getdata(sec, NULL);
@@ -668,6 +678,11 @@ more:
668 if (!ret) 678 if (!ret)
669 goto more; 679 goto more;
670 680
681 if (ret > 0) {
682 int nr_plt = dso__synthesize_plt_symbols(self, verbose);
683 if (nr_plt > 0)
684 ret += nr_plt;
685 }
671out: 686out:
672 free(name); 687 free(name);
673 return ret; 688 return ret;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 7918cffb23cd..2f92b21c712d 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -25,6 +25,7 @@ struct dso {
25 struct symbol *(*find_symbol)(struct dso *, u64 ip); 25 struct symbol *(*find_symbol)(struct dso *, u64 ip);
26 unsigned int sym_priv_size; 26 unsigned int sym_priv_size;
27 unsigned char adjust_symbols; 27 unsigned char adjust_symbols;
28 unsigned char slen_calculated;
28 char name[0]; 29 char name[0];
29}; 30};
30 31