diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-08 12:14:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-08 12:14:46 -0400 |
commit | 106544d81d88069c2df66ebdee42a4ba8fcd25e9 (patch) | |
tree | 9a6233100699c28fafde9eaa1751de7ddc173f58 | |
parent | 03d8f5408235bfd2781142458e0c0671530e74e7 (diff) | |
parent | db0dc75d6403b6663c0eab4c6ccb672eb9b2ed72 (diff) |
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar:
"A bit larger than what I'd wish for - half of it is due to hw driver
updates to Intel Ivy-Bridge which info got recently released,
cycles:pp should work there now too, amongst other things. (but we
are generally making exceptions for hardware enablement of this type.)
There are also callchain fixes in it - responding to mostly
theoretical (but valid) concerns. The tooling side sports perf.data
endianness/portability fixes which did not make it for the merge
window - and various other fixes as well."
* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits)
perf/x86: Check user address explicitly in copy_from_user_nmi()
perf/x86: Check if user fp is valid
perf: Limit callchains to 127
perf/x86: Allow multiple stacks
perf/x86: Update SNB PEBS constraints
perf/x86: Enable/Add IvyBridge hardware support
perf/x86: Implement cycles:p for SNB/IVB
perf/x86: Fix Intel shared extra MSR allocation
x86/decoder: Fix bsr/bsf/jmpe decoding with operand-size prefix
perf: Remove duplicate invocation on perf_event_for_each
perf uprobes: Remove unnecessary check before strlist__delete
perf symbols: Check for valid dso before creating map
perf evsel: Fix 32 bit values endianity swap for sample_id_all header
perf session: Handle endianity swap on sample_id_all header data
perf symbols: Handle different endians properly during symbol load
perf evlist: Pass third argument to ioctl explicitly
perf tools: Update ioctl documentation for PERF_IOC_FLAG_GROUP
perf tools: Make --version show kernel version instead of pull req tag
perf tools: Check if callchain is corrupted
perf callchain: Make callchain cursors TLS
...
29 files changed, 357 insertions, 120 deletions
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 04cd6882308e..e1f3a17034fc 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -33,9 +33,8 @@ | |||
33 | #define segment_eq(a, b) ((a).seg == (b).seg) | 33 | #define segment_eq(a, b) ((a).seg == (b).seg) |
34 | 34 | ||
35 | #define user_addr_max() (current_thread_info()->addr_limit.seg) | 35 | #define user_addr_max() (current_thread_info()->addr_limit.seg) |
36 | #define __addr_ok(addr) \ | 36 | #define __addr_ok(addr) \ |
37 | ((unsigned long __force)(addr) < \ | 37 | ((unsigned long __force)(addr) < user_addr_max()) |
38 | (current_thread_info()->addr_limit.seg)) | ||
39 | 38 | ||
40 | /* | 39 | /* |
41 | * Test whether a block of memory is a valid user space address. | 40 | * Test whether a block of memory is a valid user space address. |
@@ -47,14 +46,14 @@ | |||
47 | * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... | 46 | * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... |
48 | */ | 47 | */ |
49 | 48 | ||
50 | #define __range_not_ok(addr, size) \ | 49 | #define __range_not_ok(addr, size, limit) \ |
51 | ({ \ | 50 | ({ \ |
52 | unsigned long flag, roksum; \ | 51 | unsigned long flag, roksum; \ |
53 | __chk_user_ptr(addr); \ | 52 | __chk_user_ptr(addr); \ |
54 | asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ | 53 | asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ |
55 | : "=&r" (flag), "=r" (roksum) \ | 54 | : "=&r" (flag), "=r" (roksum) \ |
56 | : "1" (addr), "g" ((long)(size)), \ | 55 | : "1" (addr), "g" ((long)(size)), \ |
57 | "rm" (current_thread_info()->addr_limit.seg)); \ | 56 | "rm" (limit)); \ |
58 | flag; \ | 57 | flag; \ |
59 | }) | 58 | }) |
60 | 59 | ||
@@ -77,7 +76,8 @@ | |||
77 | * checks that the pointer is in the user space range - after calling | 76 | * checks that the pointer is in the user space range - after calling |
78 | * this function, memory access functions may still return -EFAULT. | 77 | * this function, memory access functions may still return -EFAULT. |
79 | */ | 78 | */ |
80 | #define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) | 79 | #define access_ok(type, addr, size) \ |
80 | (likely(__range_not_ok(addr, size, user_addr_max()) == 0)) | ||
81 | 81 | ||
82 | /* | 82 | /* |
83 | * The exception table consists of pairs of addresses relative to the | 83 | * The exception table consists of pairs of addresses relative to the |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e049d6da0183..c4706cf9c011 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void) | |||
1496 | if (!cpuc->shared_regs) | 1496 | if (!cpuc->shared_regs) |
1497 | goto error; | 1497 | goto error; |
1498 | } | 1498 | } |
1499 | cpuc->is_fake = 1; | ||
1499 | return cpuc; | 1500 | return cpuc; |
1500 | error: | 1501 | error: |
1501 | free_fake_cpuc(cpuc); | 1502 | free_fake_cpuc(cpuc); |
@@ -1756,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1756 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); | 1757 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); |
1757 | } | 1758 | } |
1758 | 1759 | ||
1760 | static inline int | ||
1761 | valid_user_frame(const void __user *fp, unsigned long size) | ||
1762 | { | ||
1763 | return (__range_not_ok(fp, size, TASK_SIZE) == 0); | ||
1764 | } | ||
1765 | |||
1759 | #ifdef CONFIG_COMPAT | 1766 | #ifdef CONFIG_COMPAT |
1760 | 1767 | ||
1761 | #include <asm/compat.h> | 1768 | #include <asm/compat.h> |
@@ -1780,7 +1787,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1780 | if (bytes != sizeof(frame)) | 1787 | if (bytes != sizeof(frame)) |
1781 | break; | 1788 | break; |
1782 | 1789 | ||
1783 | if (fp < compat_ptr(regs->sp)) | 1790 | if (!valid_user_frame(fp, sizeof(frame))) |
1784 | break; | 1791 | break; |
1785 | 1792 | ||
1786 | perf_callchain_store(entry, frame.return_address); | 1793 | perf_callchain_store(entry, frame.return_address); |
@@ -1826,7 +1833,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1826 | if (bytes != sizeof(frame)) | 1833 | if (bytes != sizeof(frame)) |
1827 | break; | 1834 | break; |
1828 | 1835 | ||
1829 | if ((unsigned long)fp < regs->sp) | 1836 | if (!valid_user_frame(fp, sizeof(frame))) |
1830 | break; | 1837 | break; |
1831 | 1838 | ||
1832 | perf_callchain_store(entry, frame.return_address); | 1839 | perf_callchain_store(entry, frame.return_address); |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6638aaf54493..7241e2fc3c17 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -117,6 +117,7 @@ struct cpu_hw_events { | |||
117 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 117 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
118 | 118 | ||
119 | unsigned int group_flag; | 119 | unsigned int group_flag; |
120 | int is_fake; | ||
120 | 121 | ||
121 | /* | 122 | /* |
122 | * Intel DebugStore bits | 123 | * Intel DebugStore bits |
@@ -364,6 +365,7 @@ struct x86_pmu { | |||
364 | int pebs_record_size; | 365 | int pebs_record_size; |
365 | void (*drain_pebs)(struct pt_regs *regs); | 366 | void (*drain_pebs)(struct pt_regs *regs); |
366 | struct event_constraint *pebs_constraints; | 367 | struct event_constraint *pebs_constraints; |
368 | void (*pebs_aliases)(struct perf_event *event); | ||
367 | 369 | ||
368 | /* | 370 | /* |
369 | * Intel LBR | 371 | * Intel LBR |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 166546ec6aef..187c294bc658 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event) | |||
1119 | return NULL; | 1119 | return NULL; |
1120 | } | 1120 | } |
1121 | 1121 | ||
1122 | static bool intel_try_alt_er(struct perf_event *event, int orig_idx) | 1122 | static int intel_alt_er(int idx) |
1123 | { | 1123 | { |
1124 | if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) | 1124 | if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) |
1125 | return false; | 1125 | return idx; |
1126 | 1126 | ||
1127 | if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { | 1127 | if (idx == EXTRA_REG_RSP_0) |
1128 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | 1128 | return EXTRA_REG_RSP_1; |
1129 | event->hw.config |= 0x01bb; | 1129 | |
1130 | event->hw.extra_reg.idx = EXTRA_REG_RSP_1; | 1130 | if (idx == EXTRA_REG_RSP_1) |
1131 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; | 1131 | return EXTRA_REG_RSP_0; |
1132 | } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { | 1132 | |
1133 | return idx; | ||
1134 | } | ||
1135 | |||
1136 | static void intel_fixup_er(struct perf_event *event, int idx) | ||
1137 | { | ||
1138 | event->hw.extra_reg.idx = idx; | ||
1139 | |||
1140 | if (idx == EXTRA_REG_RSP_0) { | ||
1133 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | 1141 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; |
1134 | event->hw.config |= 0x01b7; | 1142 | event->hw.config |= 0x01b7; |
1135 | event->hw.extra_reg.idx = EXTRA_REG_RSP_0; | ||
1136 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; | 1143 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; |
1144 | } else if (idx == EXTRA_REG_RSP_1) { | ||
1145 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; | ||
1146 | event->hw.config |= 0x01bb; | ||
1147 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; | ||
1137 | } | 1148 | } |
1138 | |||
1139 | if (event->hw.extra_reg.idx == orig_idx) | ||
1140 | return false; | ||
1141 | |||
1142 | return true; | ||
1143 | } | 1149 | } |
1144 | 1150 | ||
1145 | /* | 1151 | /* |
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, | |||
1157 | struct event_constraint *c = &emptyconstraint; | 1163 | struct event_constraint *c = &emptyconstraint; |
1158 | struct er_account *era; | 1164 | struct er_account *era; |
1159 | unsigned long flags; | 1165 | unsigned long flags; |
1160 | int orig_idx = reg->idx; | 1166 | int idx = reg->idx; |
1161 | 1167 | ||
1162 | /* already allocated shared msr */ | 1168 | /* |
1163 | if (reg->alloc) | 1169 | * reg->alloc can be set due to existing state, so for fake cpuc we |
1170 | * need to ignore this, otherwise we might fail to allocate proper fake | ||
1171 | * state for this extra reg constraint. Also see the comment below. | ||
1172 | */ | ||
1173 | if (reg->alloc && !cpuc->is_fake) | ||
1164 | return NULL; /* call x86_get_event_constraint() */ | 1174 | return NULL; /* call x86_get_event_constraint() */ |
1165 | 1175 | ||
1166 | again: | 1176 | again: |
1167 | era = &cpuc->shared_regs->regs[reg->idx]; | 1177 | era = &cpuc->shared_regs->regs[idx]; |
1168 | /* | 1178 | /* |
1169 | * we use spin_lock_irqsave() to avoid lockdep issues when | 1179 | * we use spin_lock_irqsave() to avoid lockdep issues when |
1170 | * passing a fake cpuc | 1180 | * passing a fake cpuc |
@@ -1173,6 +1183,29 @@ again: | |||
1173 | 1183 | ||
1174 | if (!atomic_read(&era->ref) || era->config == reg->config) { | 1184 | if (!atomic_read(&era->ref) || era->config == reg->config) { |
1175 | 1185 | ||
1186 | /* | ||
1187 | * If its a fake cpuc -- as per validate_{group,event}() we | ||
1188 | * shouldn't touch event state and we can avoid doing so | ||
1189 | * since both will only call get_event_constraints() once | ||
1190 | * on each event, this avoids the need for reg->alloc. | ||
1191 | * | ||
1192 | * Not doing the ER fixup will only result in era->reg being | ||
1193 | * wrong, but since we won't actually try and program hardware | ||
1194 | * this isn't a problem either. | ||
1195 | */ | ||
1196 | if (!cpuc->is_fake) { | ||
1197 | if (idx != reg->idx) | ||
1198 | intel_fixup_er(event, idx); | ||
1199 | |||
1200 | /* | ||
1201 | * x86_schedule_events() can call get_event_constraints() | ||
1202 | * multiple times on events in the case of incremental | ||
1203 | * scheduling(). reg->alloc ensures we only do the ER | ||
1204 | * allocation once. | ||
1205 | */ | ||
1206 | reg->alloc = 1; | ||
1207 | } | ||
1208 | |||
1176 | /* lock in msr value */ | 1209 | /* lock in msr value */ |
1177 | era->config = reg->config; | 1210 | era->config = reg->config; |
1178 | era->reg = reg->reg; | 1211 | era->reg = reg->reg; |
@@ -1180,17 +1213,17 @@ again: | |||
1180 | /* one more user */ | 1213 | /* one more user */ |
1181 | atomic_inc(&era->ref); | 1214 | atomic_inc(&era->ref); |
1182 | 1215 | ||
1183 | /* no need to reallocate during incremental event scheduling */ | ||
1184 | reg->alloc = 1; | ||
1185 | |||
1186 | /* | 1216 | /* |
1187 | * need to call x86_get_event_constraint() | 1217 | * need to call x86_get_event_constraint() |
1188 | * to check if associated event has constraints | 1218 | * to check if associated event has constraints |
1189 | */ | 1219 | */ |
1190 | c = NULL; | 1220 | c = NULL; |
1191 | } else if (intel_try_alt_er(event, orig_idx)) { | 1221 | } else { |
1192 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1222 | idx = intel_alt_er(idx); |
1193 | goto again; | 1223 | if (idx != reg->idx) { |
1224 | raw_spin_unlock_irqrestore(&era->lock, flags); | ||
1225 | goto again; | ||
1226 | } | ||
1194 | } | 1227 | } |
1195 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1228 | raw_spin_unlock_irqrestore(&era->lock, flags); |
1196 | 1229 | ||
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, | |||
1204 | struct er_account *era; | 1237 | struct er_account *era; |
1205 | 1238 | ||
1206 | /* | 1239 | /* |
1207 | * only put constraint if extra reg was actually | 1240 | * Only put constraint if extra reg was actually allocated. Also takes |
1208 | * allocated. Also takes care of event which do | 1241 | * care of event which do not use an extra shared reg. |
1209 | * not use an extra shared reg | 1242 | * |
1243 | * Also, if this is a fake cpuc we shouldn't touch any event state | ||
1244 | * (reg->alloc) and we don't care about leaving inconsistent cpuc state | ||
1245 | * either since it'll be thrown out. | ||
1210 | */ | 1246 | */ |
1211 | if (!reg->alloc) | 1247 | if (!reg->alloc || cpuc->is_fake) |
1212 | return; | 1248 | return; |
1213 | 1249 | ||
1214 | era = &cpuc->shared_regs->regs[reg->idx]; | 1250 | era = &cpuc->shared_regs->regs[reg->idx]; |
@@ -1300,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | |||
1300 | intel_put_shared_regs_event_constraints(cpuc, event); | 1336 | intel_put_shared_regs_event_constraints(cpuc, event); |
1301 | } | 1337 | } |
1302 | 1338 | ||
1303 | static int intel_pmu_hw_config(struct perf_event *event) | 1339 | static void intel_pebs_aliases_core2(struct perf_event *event) |
1304 | { | 1340 | { |
1305 | int ret = x86_pmu_hw_config(event); | 1341 | if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { |
1306 | |||
1307 | if (ret) | ||
1308 | return ret; | ||
1309 | |||
1310 | if (event->attr.precise_ip && | ||
1311 | (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { | ||
1312 | /* | 1342 | /* |
1313 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P | 1343 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P |
1314 | * (0x003c) so that we can use it with PEBS. | 1344 | * (0x003c) so that we can use it with PEBS. |
@@ -1329,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event) | |||
1329 | */ | 1359 | */ |
1330 | u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); | 1360 | u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); |
1331 | 1361 | ||
1362 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); | ||
1363 | event->hw.config = alt_config; | ||
1364 | } | ||
1365 | } | ||
1366 | |||
1367 | static void intel_pebs_aliases_snb(struct perf_event *event) | ||
1368 | { | ||
1369 | if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { | ||
1370 | /* | ||
1371 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P | ||
1372 | * (0x003c) so that we can use it with PEBS. | ||
1373 | * | ||
1374 | * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't | ||
1375 | * PEBS capable. However we can use UOPS_RETIRED.ALL | ||
1376 | * (0x01c2), which is a PEBS capable event, to get the same | ||
1377 | * count. | ||
1378 | * | ||
1379 | * UOPS_RETIRED.ALL counts the number of cycles that retires | ||
1380 | * CNTMASK micro-ops. By setting CNTMASK to a value (16) | ||
1381 | * larger than the maximum number of micro-ops that can be | ||
1382 | * retired per cycle (4) and then inverting the condition, we | ||
1383 | * count all cycles that retire 16 or less micro-ops, which | ||
1384 | * is every cycle. | ||
1385 | * | ||
1386 | * Thereby we gain a PEBS capable cycle counter. | ||
1387 | */ | ||
1388 | u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); | ||
1332 | 1389 | ||
1333 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); | 1390 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); |
1334 | event->hw.config = alt_config; | 1391 | event->hw.config = alt_config; |
1335 | } | 1392 | } |
1393 | } | ||
1394 | |||
1395 | static int intel_pmu_hw_config(struct perf_event *event) | ||
1396 | { | ||
1397 | int ret = x86_pmu_hw_config(event); | ||
1398 | |||
1399 | if (ret) | ||
1400 | return ret; | ||
1401 | |||
1402 | if (event->attr.precise_ip && x86_pmu.pebs_aliases) | ||
1403 | x86_pmu.pebs_aliases(event); | ||
1336 | 1404 | ||
1337 | if (intel_pmu_needs_lbr_smpl(event)) { | 1405 | if (intel_pmu_needs_lbr_smpl(event)) { |
1338 | ret = intel_pmu_setup_lbr_filter(event); | 1406 | ret = intel_pmu_setup_lbr_filter(event); |
@@ -1607,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
1607 | .max_period = (1ULL << 31) - 1, | 1675 | .max_period = (1ULL << 31) - 1, |
1608 | .get_event_constraints = intel_get_event_constraints, | 1676 | .get_event_constraints = intel_get_event_constraints, |
1609 | .put_event_constraints = intel_put_event_constraints, | 1677 | .put_event_constraints = intel_put_event_constraints, |
1678 | .pebs_aliases = intel_pebs_aliases_core2, | ||
1610 | 1679 | ||
1611 | .format_attrs = intel_arch3_formats_attr, | 1680 | .format_attrs = intel_arch3_formats_attr, |
1612 | 1681 | ||
@@ -1840,8 +1909,9 @@ __init int intel_pmu_init(void) | |||
1840 | break; | 1909 | break; |
1841 | 1910 | ||
1842 | case 42: /* SandyBridge */ | 1911 | case 42: /* SandyBridge */ |
1843 | x86_add_quirk(intel_sandybridge_quirk); | ||
1844 | case 45: /* SandyBridge, "Romely-EP" */ | 1912 | case 45: /* SandyBridge, "Romely-EP" */ |
1913 | x86_add_quirk(intel_sandybridge_quirk); | ||
1914 | case 58: /* IvyBridge */ | ||
1845 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1915 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
1846 | sizeof(hw_cache_event_ids)); | 1916 | sizeof(hw_cache_event_ids)); |
1847 | 1917 | ||
@@ -1849,6 +1919,7 @@ __init int intel_pmu_init(void) | |||
1849 | 1919 | ||
1850 | x86_pmu.event_constraints = intel_snb_event_constraints; | 1920 | x86_pmu.event_constraints = intel_snb_event_constraints; |
1851 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; | 1921 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; |
1922 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; | ||
1852 | x86_pmu.extra_regs = intel_snb_extra_regs; | 1923 | x86_pmu.extra_regs = intel_snb_extra_regs; |
1853 | /* all extra regs are per-cpu when HT is on */ | 1924 | /* all extra regs are per-cpu when HT is on */ |
1854 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 1925 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 5a3edc27f6e5..35e2192df9f4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { | |||
400 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ | 400 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ |
401 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ | 401 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ |
402 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ | 402 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ |
403 | INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ | 403 | INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ |
404 | INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ | ||
405 | INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ | ||
406 | INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ | ||
407 | INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ | ||
408 | INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ | ||
409 | INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ | ||
410 | INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ | ||
411 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ | 404 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ |
412 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ | 405 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ |
413 | INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ | 406 | INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ |
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index f61ee67ec00f..677b1ed184c9 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | 9 | ||
10 | #include <asm/word-at-a-time.h> | 10 | #include <asm/word-at-a-time.h> |
11 | #include <linux/sched.h> | ||
11 | 12 | ||
12 | /* | 13 | /* |
13 | * best effort, GUP based copy_from_user() that is NMI-safe | 14 | * best effort, GUP based copy_from_user() that is NMI-safe |
@@ -21,6 +22,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | |||
21 | void *map; | 22 | void *map; |
22 | int ret; | 23 | int ret; |
23 | 24 | ||
25 | if (__range_not_ok(from, n, TASK_SIZE) == 0) | ||
26 | return len; | ||
27 | |||
24 | do { | 28 | do { |
25 | ret = __get_user_pages_fast(addr, 1, 0, &page); | 29 | ret = __get_user_pages_fast(addr, 1, 0, &page); |
26 | if (!ret) | 30 | if (!ret) |
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 819137904428..5d7e51f3fd28 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt | |||
@@ -28,7 +28,7 @@ | |||
28 | # - (66): the last prefix is 0x66 | 28 | # - (66): the last prefix is 0x66 |
29 | # - (F3): the last prefix is 0xF3 | 29 | # - (F3): the last prefix is 0xF3 |
30 | # - (F2): the last prefix is 0xF2 | 30 | # - (F2): the last prefix is 0xF2 |
31 | # | 31 | # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) |
32 | 32 | ||
33 | Table: one byte opcode | 33 | Table: one byte opcode |
34 | Referrer: | 34 | Referrer: |
@@ -515,12 +515,12 @@ b4: LFS Gv,Mp | |||
515 | b5: LGS Gv,Mp | 515 | b5: LGS Gv,Mp |
516 | b6: MOVZX Gv,Eb | 516 | b6: MOVZX Gv,Eb |
517 | b7: MOVZX Gv,Ew | 517 | b7: MOVZX Gv,Ew |
518 | b8: JMPE | POPCNT Gv,Ev (F3) | 518 | b8: JMPE (!F3) | POPCNT Gv,Ev (F3) |
519 | b9: Grp10 (1A) | 519 | b9: Grp10 (1A) |
520 | ba: Grp8 Ev,Ib (1A) | 520 | ba: Grp8 Ev,Ib (1A) |
521 | bb: BTC Ev,Gv | 521 | bb: BTC Ev,Gv |
522 | bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) | 522 | bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) |
523 | bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) | 523 | bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) |
524 | be: MOVSX Gv,Eb | 524 | be: MOVSX Gv,Eb |
525 | bf: MOVSX Gv,Ew | 525 | bf: MOVSX Gv,Ew |
526 | # 0x0f 0xc0-0xcf | 526 | # 0x0f 0xc0-0xcf |
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 5f6a5b6c3a15..ddcf39b1a18d 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk | |||
@@ -66,9 +66,10 @@ BEGIN { | |||
66 | rex_expr = "^REX(\\.[XRWB]+)*" | 66 | rex_expr = "^REX(\\.[XRWB]+)*" |
67 | fpu_expr = "^ESC" # TODO | 67 | fpu_expr = "^ESC" # TODO |
68 | 68 | ||
69 | lprefix1_expr = "\\(66\\)" | 69 | lprefix1_expr = "\\((66|!F3)\\)" |
70 | lprefix2_expr = "\\(F3\\)" | 70 | lprefix2_expr = "\\(F3\\)" |
71 | lprefix3_expr = "\\(F2\\)" | 71 | lprefix3_expr = "\\((F2|!F3)\\)" |
72 | lprefix_expr = "\\((66|F2|F3)\\)" | ||
72 | max_lprefix = 4 | 73 | max_lprefix = 4 |
73 | 74 | ||
74 | # All opcodes starting with lower-case 'v' or with (v1) superscript | 75 | # All opcodes starting with lower-case 'v' or with (v1) superscript |
@@ -333,13 +334,16 @@ function convert_operands(count,opnd, i,j,imm,mod) | |||
333 | if (match(ext, lprefix1_expr)) { | 334 | if (match(ext, lprefix1_expr)) { |
334 | lptable1[idx] = add_flags(lptable1[idx],flags) | 335 | lptable1[idx] = add_flags(lptable1[idx],flags) |
335 | variant = "INAT_VARIANT" | 336 | variant = "INAT_VARIANT" |
336 | } else if (match(ext, lprefix2_expr)) { | 337 | } |
338 | if (match(ext, lprefix2_expr)) { | ||
337 | lptable2[idx] = add_flags(lptable2[idx],flags) | 339 | lptable2[idx] = add_flags(lptable2[idx],flags) |
338 | variant = "INAT_VARIANT" | 340 | variant = "INAT_VARIANT" |
339 | } else if (match(ext, lprefix3_expr)) { | 341 | } |
342 | if (match(ext, lprefix3_expr)) { | ||
340 | lptable3[idx] = add_flags(lptable3[idx],flags) | 343 | lptable3[idx] = add_flags(lptable3[idx],flags) |
341 | variant = "INAT_VARIANT" | 344 | variant = "INAT_VARIANT" |
342 | } else { | 345 | } |
346 | if (!match(ext, lprefix_expr)){ | ||
343 | table[idx] = add_flags(table[idx],flags) | 347 | table[idx] = add_flags(table[idx],flags) |
344 | } | 348 | } |
345 | } | 349 | } |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f32578634d9d..45db49f64bb4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -555,6 +555,8 @@ enum perf_event_type { | |||
555 | PERF_RECORD_MAX, /* non-ABI */ | 555 | PERF_RECORD_MAX, /* non-ABI */ |
556 | }; | 556 | }; |
557 | 557 | ||
558 | #define PERF_MAX_STACK_DEPTH 127 | ||
559 | |||
558 | enum perf_callchain_context { | 560 | enum perf_callchain_context { |
559 | PERF_CONTEXT_HV = (__u64)-32, | 561 | PERF_CONTEXT_HV = (__u64)-32, |
560 | PERF_CONTEXT_KERNEL = (__u64)-128, | 562 | PERF_CONTEXT_KERNEL = (__u64)-128, |
@@ -609,8 +611,6 @@ struct perf_guest_info_callbacks { | |||
609 | #include <linux/sysfs.h> | 611 | #include <linux/sysfs.h> |
610 | #include <asm/local.h> | 612 | #include <asm/local.h> |
611 | 613 | ||
612 | #define PERF_MAX_STACK_DEPTH 255 | ||
613 | |||
614 | struct perf_callchain_entry { | 614 | struct perf_callchain_entry { |
615 | __u64 nr; | 615 | __u64 nr; |
616 | __u64 ip[PERF_MAX_STACK_DEPTH]; | 616 | __u64 ip[PERF_MAX_STACK_DEPTH]; |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 5b06cbbf6931..f85c0154b333 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -3181,7 +3181,6 @@ static void perf_event_for_each(struct perf_event *event, | |||
3181 | event = event->group_leader; | 3181 | event = event->group_leader; |
3182 | 3182 | ||
3183 | perf_event_for_each_child(event, func); | 3183 | perf_event_for_each_child(event, func); |
3184 | func(event); | ||
3185 | list_for_each_entry(sibling, &event->sibling_list, group_entry) | 3184 | list_for_each_entry(sibling, &event->sibling_list, group_entry) |
3186 | perf_event_for_each_child(sibling, func); | 3185 | perf_event_for_each_child(sibling, func); |
3187 | mutex_unlock(&ctx->mutex); | 3186 | mutex_unlock(&ctx->mutex); |
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 5476bc0a1eac..b4b572e8c100 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST | |||
@@ -1,4 +1,6 @@ | |||
1 | tools/perf | 1 | tools/perf |
2 | tools/scripts | ||
3 | tools/lib/traceevent | ||
2 | include/linux/const.h | 4 | include/linux/const.h |
3 | include/linux/perf_event.h | 5 | include/linux/perf_event.h |
4 | include/linux/rbtree.h | 6 | include/linux/rbtree.h |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8c767c6bca91..25249f76329d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -152,7 +152,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, | |||
152 | 152 | ||
153 | if (symbol_conf.use_callchain) { | 153 | if (symbol_conf.use_callchain) { |
154 | err = callchain_append(he->callchain, | 154 | err = callchain_append(he->callchain, |
155 | &evsel->hists.callchain_cursor, | 155 | &callchain_cursor, |
156 | sample->period); | 156 | sample->period); |
157 | if (err) | 157 | if (err) |
158 | return err; | 158 | return err; |
@@ -162,7 +162,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, | |||
162 | * so we don't allocated the extra space needed because the stdio | 162 | * so we don't allocated the extra space needed because the stdio |
163 | * code will not use it. | 163 | * code will not use it. |
164 | */ | 164 | */ |
165 | if (al->sym != NULL && use_browser > 0) { | 165 | if (he->ms.sym != NULL && use_browser > 0) { |
166 | struct annotation *notes = symbol__annotation(he->ms.sym); | 166 | struct annotation *notes = symbol__annotation(he->ms.sym); |
167 | 167 | ||
168 | assert(evsel != NULL); | 168 | assert(evsel != NULL); |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 62ae30d34fa6..262589991ea4 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -1129,7 +1129,7 @@ static int add_default_attributes(void) | |||
1129 | return 0; | 1129 | return 0; |
1130 | 1130 | ||
1131 | if (!evsel_list->nr_entries) { | 1131 | if (!evsel_list->nr_entries) { |
1132 | if (perf_evlist__add_attrs_array(evsel_list, default_attrs) < 0) | 1132 | if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) |
1133 | return -1; | 1133 | return -1; |
1134 | } | 1134 | } |
1135 | 1135 | ||
@@ -1139,21 +1139,21 @@ static int add_default_attributes(void) | |||
1139 | return 0; | 1139 | return 0; |
1140 | 1140 | ||
1141 | /* Append detailed run extra attributes: */ | 1141 | /* Append detailed run extra attributes: */ |
1142 | if (perf_evlist__add_attrs_array(evsel_list, detailed_attrs) < 0) | 1142 | if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) |
1143 | return -1; | 1143 | return -1; |
1144 | 1144 | ||
1145 | if (detailed_run < 2) | 1145 | if (detailed_run < 2) |
1146 | return 0; | 1146 | return 0; |
1147 | 1147 | ||
1148 | /* Append very detailed run extra attributes: */ | 1148 | /* Append very detailed run extra attributes: */ |
1149 | if (perf_evlist__add_attrs_array(evsel_list, very_detailed_attrs) < 0) | 1149 | if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) |
1150 | return -1; | 1150 | return -1; |
1151 | 1151 | ||
1152 | if (detailed_run < 3) | 1152 | if (detailed_run < 3) |
1153 | return 0; | 1153 | return 0; |
1154 | 1154 | ||
1155 | /* Append very, very detailed run extra attributes: */ | 1155 | /* Append very, very detailed run extra attributes: */ |
1156 | return perf_evlist__add_attrs_array(evsel_list, very_very_detailed_attrs); | 1156 | return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); |
1157 | } | 1157 | } |
1158 | 1158 | ||
1159 | int cmd_stat(int argc, const char **argv, const char *prefix __used) | 1159 | int cmd_stat(int argc, const char **argv, const char *prefix __used) |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 871b540293e1..6bb0277b7dfe 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -787,7 +787,7 @@ static void perf_event__process_sample(struct perf_tool *tool, | |||
787 | } | 787 | } |
788 | 788 | ||
789 | if (symbol_conf.use_callchain) { | 789 | if (symbol_conf.use_callchain) { |
790 | err = callchain_append(he->callchain, &evsel->hists.callchain_cursor, | 790 | err = callchain_append(he->callchain, &callchain_cursor, |
791 | sample->period); | 791 | sample->period); |
792 | if (err) | 792 | if (err) |
793 | return; | 793 | return; |
diff --git a/tools/perf/design.txt b/tools/perf/design.txt index bd0bb1b1279b..67e5d0cace85 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt | |||
@@ -409,14 +409,15 @@ Counters can be enabled and disabled in two ways: via ioctl and via | |||
409 | prctl. When a counter is disabled, it doesn't count or generate | 409 | prctl. When a counter is disabled, it doesn't count or generate |
410 | events but does continue to exist and maintain its count value. | 410 | events but does continue to exist and maintain its count value. |
411 | 411 | ||
412 | An individual counter or counter group can be enabled with | 412 | An individual counter can be enabled with |
413 | 413 | ||
414 | ioctl(fd, PERF_EVENT_IOC_ENABLE); | 414 | ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); |
415 | 415 | ||
416 | or disabled with | 416 | or disabled with |
417 | 417 | ||
418 | ioctl(fd, PERF_EVENT_IOC_DISABLE); | 418 | ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); |
419 | 419 | ||
420 | For a counter group, pass PERF_IOC_FLAG_GROUP as the third argument. | ||
420 | Enabling or disabling the leader of a group enables or disables the | 421 | Enabling or disabling the leader of a group enables or disables the |
421 | whole group; that is, while the group leader is disabled, none of the | 422 | whole group; that is, while the group leader is disabled, none of the |
422 | counters in the group will count. Enabling or disabling a member of a | 423 | counters in the group will count. Enabling or disabling a member of a |
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 4deea6aaf927..34b1c46eaf42 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c | |||
@@ -668,7 +668,7 @@ static int annotate_browser__run(struct annotate_browser *browser, int evidx, | |||
668 | "q/ESC/CTRL+C Exit\n\n" | 668 | "q/ESC/CTRL+C Exit\n\n" |
669 | "-> Go to target\n" | 669 | "-> Go to target\n" |
670 | "<- Exit\n" | 670 | "<- Exit\n" |
671 | "h Cycle thru hottest instructions\n" | 671 | "H Cycle thru hottest instructions\n" |
672 | "j Toggle showing jump to target arrows\n" | 672 | "j Toggle showing jump to target arrows\n" |
673 | "J Toggle showing number of jump sources on targets\n" | 673 | "J Toggle showing number of jump sources on targets\n" |
674 | "n Search next string\n" | 674 | "n Search next string\n" |
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index ad73300f7bac..95264f304179 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN | |||
@@ -12,7 +12,7 @@ LF=' | |||
12 | # First check if there is a .git to get the version from git describe | 12 | # First check if there is a .git to get the version from git describe |
13 | # otherwise try to get the version from the kernel makefile | 13 | # otherwise try to get the version from the kernel makefile |
14 | if test -d ../../.git -o -f ../../.git && | 14 | if test -d ../../.git -o -f ../../.git && |
15 | VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && | 15 | VN=$(git describe --match 'v[0-9].[0-9]*' --abbrev=4 HEAD 2>/dev/null) && |
16 | case "$VN" in | 16 | case "$VN" in |
17 | *$LF*) (exit 1) ;; | 17 | *$LF*) (exit 1) ;; |
18 | v[0-9]*) | 18 | v[0-9]*) |
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9f7106a8d9a4..3a6bff47614f 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
@@ -18,6 +18,8 @@ | |||
18 | #include "util.h" | 18 | #include "util.h" |
19 | #include "callchain.h" | 19 | #include "callchain.h" |
20 | 20 | ||
21 | __thread struct callchain_cursor callchain_cursor; | ||
22 | |||
21 | bool ip_callchain__valid(struct ip_callchain *chain, | 23 | bool ip_callchain__valid(struct ip_callchain *chain, |
22 | const union perf_event *event) | 24 | const union perf_event *event) |
23 | { | 25 | { |
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 7f9c0f1ae3a9..3bdb407f9cd9 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h | |||
@@ -76,6 +76,8 @@ struct callchain_cursor { | |||
76 | struct callchain_cursor_node *curr; | 76 | struct callchain_cursor_node *curr; |
77 | }; | 77 | }; |
78 | 78 | ||
79 | extern __thread struct callchain_cursor callchain_cursor; | ||
80 | |||
79 | static inline void callchain_init(struct callchain_root *root) | 81 | static inline void callchain_init(struct callchain_root *root) |
80 | { | 82 | { |
81 | INIT_LIST_HEAD(&root->node.siblings); | 83 | INIT_LIST_HEAD(&root->node.siblings); |
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4ac5f5ae4ce9..7400fb3fc50c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c | |||
@@ -159,6 +159,17 @@ out_delete_partial_list: | |||
159 | return -1; | 159 | return -1; |
160 | } | 160 | } |
161 | 161 | ||
162 | int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, | ||
163 | struct perf_event_attr *attrs, size_t nr_attrs) | ||
164 | { | ||
165 | size_t i; | ||
166 | |||
167 | for (i = 0; i < nr_attrs; i++) | ||
168 | event_attr_init(attrs + i); | ||
169 | |||
170 | return perf_evlist__add_attrs(evlist, attrs, nr_attrs); | ||
171 | } | ||
172 | |||
162 | static int trace_event__id(const char *evname) | 173 | static int trace_event__id(const char *evname) |
163 | { | 174 | { |
164 | char *filename, *colon; | 175 | char *filename, *colon; |
@@ -263,7 +274,8 @@ void perf_evlist__disable(struct perf_evlist *evlist) | |||
263 | for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { | 274 | for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { |
264 | list_for_each_entry(pos, &evlist->entries, node) { | 275 | list_for_each_entry(pos, &evlist->entries, node) { |
265 | for (thread = 0; thread < evlist->threads->nr; thread++) | 276 | for (thread = 0; thread < evlist->threads->nr; thread++) |
266 | ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_DISABLE); | 277 | ioctl(FD(pos, cpu, thread), |
278 | PERF_EVENT_IOC_DISABLE, 0); | ||
267 | } | 279 | } |
268 | } | 280 | } |
269 | } | 281 | } |
@@ -276,7 +288,8 @@ void perf_evlist__enable(struct perf_evlist *evlist) | |||
276 | for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { | 288 | for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { |
277 | list_for_each_entry(pos, &evlist->entries, node) { | 289 | list_for_each_entry(pos, &evlist->entries, node) { |
278 | for (thread = 0; thread < evlist->threads->nr; thread++) | 290 | for (thread = 0; thread < evlist->threads->nr; thread++) |
279 | ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_ENABLE); | 291 | ioctl(FD(pos, cpu, thread), |
292 | PERF_EVENT_IOC_ENABLE, 0); | ||
280 | } | 293 | } |
281 | } | 294 | } |
282 | } | 295 | } |
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 58abb63ac13a..989bee9624c2 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h | |||
@@ -54,6 +54,8 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); | |||
54 | int perf_evlist__add_default(struct perf_evlist *evlist); | 54 | int perf_evlist__add_default(struct perf_evlist *evlist); |
55 | int perf_evlist__add_attrs(struct perf_evlist *evlist, | 55 | int perf_evlist__add_attrs(struct perf_evlist *evlist, |
56 | struct perf_event_attr *attrs, size_t nr_attrs); | 56 | struct perf_event_attr *attrs, size_t nr_attrs); |
57 | int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, | ||
58 | struct perf_event_attr *attrs, size_t nr_attrs); | ||
57 | int perf_evlist__add_tracepoints(struct perf_evlist *evlist, | 59 | int perf_evlist__add_tracepoints(struct perf_evlist *evlist, |
58 | const char *tracepoints[], size_t nr_tracepoints); | 60 | const char *tracepoints[], size_t nr_tracepoints); |
59 | int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, | 61 | int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, |
@@ -62,6 +64,8 @@ int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, | |||
62 | 64 | ||
63 | #define perf_evlist__add_attrs_array(evlist, array) \ | 65 | #define perf_evlist__add_attrs_array(evlist, array) \ |
64 | perf_evlist__add_attrs(evlist, array, ARRAY_SIZE(array)) | 66 | perf_evlist__add_attrs(evlist, array, ARRAY_SIZE(array)) |
67 | #define perf_evlist__add_default_attrs(evlist, array) \ | ||
68 | __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) | ||
65 | 69 | ||
66 | #define perf_evlist__add_tracepoints_array(evlist, array) \ | 70 | #define perf_evlist__add_tracepoints_array(evlist, array) \ |
67 | perf_evlist__add_tracepoints(evlist, array, ARRAY_SIZE(array)) | 71 | perf_evlist__add_tracepoints(evlist, array, ARRAY_SIZE(array)) |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 91d19138f3ec..9f6cebd798ee 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
@@ -494,16 +494,24 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel, | |||
494 | } | 494 | } |
495 | 495 | ||
496 | static int perf_event__parse_id_sample(const union perf_event *event, u64 type, | 496 | static int perf_event__parse_id_sample(const union perf_event *event, u64 type, |
497 | struct perf_sample *sample) | 497 | struct perf_sample *sample, |
498 | bool swapped) | ||
498 | { | 499 | { |
499 | const u64 *array = event->sample.array; | 500 | const u64 *array = event->sample.array; |
501 | union u64_swap u; | ||
500 | 502 | ||
501 | array += ((event->header.size - | 503 | array += ((event->header.size - |
502 | sizeof(event->header)) / sizeof(u64)) - 1; | 504 | sizeof(event->header)) / sizeof(u64)) - 1; |
503 | 505 | ||
504 | if (type & PERF_SAMPLE_CPU) { | 506 | if (type & PERF_SAMPLE_CPU) { |
505 | u32 *p = (u32 *)array; | 507 | u.val64 = *array; |
506 | sample->cpu = *p; | 508 | if (swapped) { |
509 | /* undo swap of u64, then swap on individual u32s */ | ||
510 | u.val64 = bswap_64(u.val64); | ||
511 | u.val32[0] = bswap_32(u.val32[0]); | ||
512 | } | ||
513 | |||
514 | sample->cpu = u.val32[0]; | ||
507 | array--; | 515 | array--; |
508 | } | 516 | } |
509 | 517 | ||
@@ -523,9 +531,16 @@ static int perf_event__parse_id_sample(const union perf_event *event, u64 type, | |||
523 | } | 531 | } |
524 | 532 | ||
525 | if (type & PERF_SAMPLE_TID) { | 533 | if (type & PERF_SAMPLE_TID) { |
526 | u32 *p = (u32 *)array; | 534 | u.val64 = *array; |
527 | sample->pid = p[0]; | 535 | if (swapped) { |
528 | sample->tid = p[1]; | 536 | /* undo swap of u64, then swap on individual u32s */ |
537 | u.val64 = bswap_64(u.val64); | ||
538 | u.val32[0] = bswap_32(u.val32[0]); | ||
539 | u.val32[1] = bswap_32(u.val32[1]); | ||
540 | } | ||
541 | |||
542 | sample->pid = u.val32[0]; | ||
543 | sample->tid = u.val32[1]; | ||
529 | } | 544 | } |
530 | 545 | ||
531 | return 0; | 546 | return 0; |
@@ -562,7 +577,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, | |||
562 | if (event->header.type != PERF_RECORD_SAMPLE) { | 577 | if (event->header.type != PERF_RECORD_SAMPLE) { |
563 | if (!sample_id_all) | 578 | if (!sample_id_all) |
564 | return 0; | 579 | return 0; |
565 | return perf_event__parse_id_sample(event, type, data); | 580 | return perf_event__parse_id_sample(event, type, data, swapped); |
566 | } | 581 | } |
567 | 582 | ||
568 | array = event->sample.array; | 583 | array = event->sample.array; |
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 1293b5ebea4d..514e2a4b367d 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
@@ -378,7 +378,7 @@ void hist_entry__free(struct hist_entry *he) | |||
378 | * collapse the histogram | 378 | * collapse the histogram |
379 | */ | 379 | */ |
380 | 380 | ||
381 | static bool hists__collapse_insert_entry(struct hists *hists, | 381 | static bool hists__collapse_insert_entry(struct hists *hists __used, |
382 | struct rb_root *root, | 382 | struct rb_root *root, |
383 | struct hist_entry *he) | 383 | struct hist_entry *he) |
384 | { | 384 | { |
@@ -397,8 +397,9 @@ static bool hists__collapse_insert_entry(struct hists *hists, | |||
397 | iter->period += he->period; | 397 | iter->period += he->period; |
398 | iter->nr_events += he->nr_events; | 398 | iter->nr_events += he->nr_events; |
399 | if (symbol_conf.use_callchain) { | 399 | if (symbol_conf.use_callchain) { |
400 | callchain_cursor_reset(&hists->callchain_cursor); | 400 | callchain_cursor_reset(&callchain_cursor); |
401 | callchain_merge(&hists->callchain_cursor, iter->callchain, | 401 | callchain_merge(&callchain_cursor, |
402 | iter->callchain, | ||
402 | he->callchain); | 403 | he->callchain); |
403 | } | 404 | } |
404 | hist_entry__free(he); | 405 | hist_entry__free(he); |
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index cfc64e293f90..34bb556d6219 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h | |||
@@ -67,8 +67,6 @@ struct hists { | |||
67 | struct events_stats stats; | 67 | struct events_stats stats; |
68 | u64 event_stream; | 68 | u64 event_stream; |
69 | u16 col_len[HISTC_NR_COLS]; | 69 | u16 col_len[HISTC_NR_COLS]; |
70 | /* Best would be to reuse the session callchain cursor */ | ||
71 | struct callchain_cursor callchain_cursor; | ||
72 | }; | 70 | }; |
73 | 71 | ||
74 | struct hist_entry *__hists__add_entry(struct hists *self, | 72 | struct hist_entry *__hists__add_entry(struct hists *self, |
diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index 1915de20dcac..3322b8446e89 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c | |||
@@ -57,6 +57,10 @@ void setup_pager(void) | |||
57 | } | 57 | } |
58 | if (!pager) | 58 | if (!pager) |
59 | pager = getenv("PAGER"); | 59 | pager = getenv("PAGER"); |
60 | if (!pager) { | ||
61 | if (!access("/usr/bin/pager", X_OK)) | ||
62 | pager = "/usr/bin/pager"; | ||
63 | } | ||
60 | if (!pager) | 64 | if (!pager) |
61 | pager = "less"; | 65 | pager = "less"; |
62 | else if (!*pager || !strcmp(pager, "cat")) | 66 | else if (!*pager || !strcmp(pager, "cat")) |
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 59dccc98b554..0dda25d82d06 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c | |||
@@ -2164,16 +2164,12 @@ int del_perf_probe_events(struct strlist *dellist) | |||
2164 | 2164 | ||
2165 | error: | 2165 | error: |
2166 | if (kfd >= 0) { | 2166 | if (kfd >= 0) { |
2167 | if (namelist) | 2167 | strlist__delete(namelist); |
2168 | strlist__delete(namelist); | ||
2169 | |||
2170 | close(kfd); | 2168 | close(kfd); |
2171 | } | 2169 | } |
2172 | 2170 | ||
2173 | if (ufd >= 0) { | 2171 | if (ufd >= 0) { |
2174 | if (unamelist) | 2172 | strlist__delete(unamelist); |
2175 | strlist__delete(unamelist); | ||
2176 | |||
2177 | close(ufd); | 2173 | close(ufd); |
2178 | } | 2174 | } |
2179 | 2175 | ||
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 93d355d27109..2600916efa83 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
@@ -288,7 +288,8 @@ struct branch_info *machine__resolve_bstack(struct machine *self, | |||
288 | return bi; | 288 | return bi; |
289 | } | 289 | } |
290 | 290 | ||
291 | int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, | 291 | int machine__resolve_callchain(struct machine *self, |
292 | struct perf_evsel *evsel __used, | ||
292 | struct thread *thread, | 293 | struct thread *thread, |
293 | struct ip_callchain *chain, | 294 | struct ip_callchain *chain, |
294 | struct symbol **parent) | 295 | struct symbol **parent) |
@@ -297,7 +298,12 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, | |||
297 | unsigned int i; | 298 | unsigned int i; |
298 | int err; | 299 | int err; |
299 | 300 | ||
300 | callchain_cursor_reset(&evsel->hists.callchain_cursor); | 301 | callchain_cursor_reset(&callchain_cursor); |
302 | |||
303 | if (chain->nr > PERF_MAX_STACK_DEPTH) { | ||
304 | pr_warning("corrupted callchain. skipping...\n"); | ||
305 | return 0; | ||
306 | } | ||
301 | 307 | ||
302 | for (i = 0; i < chain->nr; i++) { | 308 | for (i = 0; i < chain->nr; i++) { |
303 | u64 ip; | 309 | u64 ip; |
@@ -317,7 +323,14 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, | |||
317 | case PERF_CONTEXT_USER: | 323 | case PERF_CONTEXT_USER: |
318 | cpumode = PERF_RECORD_MISC_USER; break; | 324 | cpumode = PERF_RECORD_MISC_USER; break; |
319 | default: | 325 | default: |
320 | break; | 326 | pr_debug("invalid callchain context: " |
327 | "%"PRId64"\n", (s64) ip); | ||
328 | /* | ||
329 | * It seems the callchain is corrupted. | ||
330 | * Discard all. | ||
331 | */ | ||
332 | callchain_cursor_reset(&callchain_cursor); | ||
333 | return 0; | ||
321 | } | 334 | } |
322 | continue; | 335 | continue; |
323 | } | 336 | } |
@@ -333,7 +346,7 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, | |||
333 | break; | 346 | break; |
334 | } | 347 | } |
335 | 348 | ||
336 | err = callchain_cursor_append(&evsel->hists.callchain_cursor, | 349 | err = callchain_cursor_append(&callchain_cursor, |
337 | ip, al.map, al.sym); | 350 | ip, al.map, al.sym); |
338 | if (err) | 351 | if (err) |
339 | return err; | 352 | return err; |
@@ -441,37 +454,65 @@ void mem_bswap_64(void *src, int byte_size) | |||
441 | } | 454 | } |
442 | } | 455 | } |
443 | 456 | ||
444 | static void perf_event__all64_swap(union perf_event *event) | 457 | static void swap_sample_id_all(union perf_event *event, void *data) |
458 | { | ||
459 | void *end = (void *) event + event->header.size; | ||
460 | int size = end - data; | ||
461 | |||
462 | BUG_ON(size % sizeof(u64)); | ||
463 | mem_bswap_64(data, size); | ||
464 | } | ||
465 | |||
466 | static void perf_event__all64_swap(union perf_event *event, | ||
467 | bool sample_id_all __used) | ||
445 | { | 468 | { |
446 | struct perf_event_header *hdr = &event->header; | 469 | struct perf_event_header *hdr = &event->header; |
447 | mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); | 470 | mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); |
448 | } | 471 | } |
449 | 472 | ||
450 | static void perf_event__comm_swap(union perf_event *event) | 473 | static void perf_event__comm_swap(union perf_event *event, bool sample_id_all) |
451 | { | 474 | { |
452 | event->comm.pid = bswap_32(event->comm.pid); | 475 | event->comm.pid = bswap_32(event->comm.pid); |
453 | event->comm.tid = bswap_32(event->comm.tid); | 476 | event->comm.tid = bswap_32(event->comm.tid); |
477 | |||
478 | if (sample_id_all) { | ||
479 | void *data = &event->comm.comm; | ||
480 | |||
481 | data += ALIGN(strlen(data) + 1, sizeof(u64)); | ||
482 | swap_sample_id_all(event, data); | ||
483 | } | ||
454 | } | 484 | } |
455 | 485 | ||
456 | static void perf_event__mmap_swap(union perf_event *event) | 486 | static void perf_event__mmap_swap(union perf_event *event, |
487 | bool sample_id_all) | ||
457 | { | 488 | { |
458 | event->mmap.pid = bswap_32(event->mmap.pid); | 489 | event->mmap.pid = bswap_32(event->mmap.pid); |
459 | event->mmap.tid = bswap_32(event->mmap.tid); | 490 | event->mmap.tid = bswap_32(event->mmap.tid); |
460 | event->mmap.start = bswap_64(event->mmap.start); | 491 | event->mmap.start = bswap_64(event->mmap.start); |
461 | event->mmap.len = bswap_64(event->mmap.len); | 492 | event->mmap.len = bswap_64(event->mmap.len); |
462 | event->mmap.pgoff = bswap_64(event->mmap.pgoff); | 493 | event->mmap.pgoff = bswap_64(event->mmap.pgoff); |
494 | |||
495 | if (sample_id_all) { | ||
496 | void *data = &event->mmap.filename; | ||
497 | |||
498 | data += ALIGN(strlen(data) + 1, sizeof(u64)); | ||
499 | swap_sample_id_all(event, data); | ||
500 | } | ||
463 | } | 501 | } |
464 | 502 | ||
465 | static void perf_event__task_swap(union perf_event *event) | 503 | static void perf_event__task_swap(union perf_event *event, bool sample_id_all) |
466 | { | 504 | { |
467 | event->fork.pid = bswap_32(event->fork.pid); | 505 | event->fork.pid = bswap_32(event->fork.pid); |
468 | event->fork.tid = bswap_32(event->fork.tid); | 506 | event->fork.tid = bswap_32(event->fork.tid); |
469 | event->fork.ppid = bswap_32(event->fork.ppid); | 507 | event->fork.ppid = bswap_32(event->fork.ppid); |
470 | event->fork.ptid = bswap_32(event->fork.ptid); | 508 | event->fork.ptid = bswap_32(event->fork.ptid); |
471 | event->fork.time = bswap_64(event->fork.time); | 509 | event->fork.time = bswap_64(event->fork.time); |
510 | |||
511 | if (sample_id_all) | ||
512 | swap_sample_id_all(event, &event->fork + 1); | ||
472 | } | 513 | } |
473 | 514 | ||
474 | static void perf_event__read_swap(union perf_event *event) | 515 | static void perf_event__read_swap(union perf_event *event, bool sample_id_all) |
475 | { | 516 | { |
476 | event->read.pid = bswap_32(event->read.pid); | 517 | event->read.pid = bswap_32(event->read.pid); |
477 | event->read.tid = bswap_32(event->read.tid); | 518 | event->read.tid = bswap_32(event->read.tid); |
@@ -479,6 +520,9 @@ static void perf_event__read_swap(union perf_event *event) | |||
479 | event->read.time_enabled = bswap_64(event->read.time_enabled); | 520 | event->read.time_enabled = bswap_64(event->read.time_enabled); |
480 | event->read.time_running = bswap_64(event->read.time_running); | 521 | event->read.time_running = bswap_64(event->read.time_running); |
481 | event->read.id = bswap_64(event->read.id); | 522 | event->read.id = bswap_64(event->read.id); |
523 | |||
524 | if (sample_id_all) | ||
525 | swap_sample_id_all(event, &event->read + 1); | ||
482 | } | 526 | } |
483 | 527 | ||
484 | static u8 revbyte(u8 b) | 528 | static u8 revbyte(u8 b) |
@@ -530,7 +574,8 @@ void perf_event__attr_swap(struct perf_event_attr *attr) | |||
530 | swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); | 574 | swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); |
531 | } | 575 | } |
532 | 576 | ||
533 | static void perf_event__hdr_attr_swap(union perf_event *event) | 577 | static void perf_event__hdr_attr_swap(union perf_event *event, |
578 | bool sample_id_all __used) | ||
534 | { | 579 | { |
535 | size_t size; | 580 | size_t size; |
536 | 581 | ||
@@ -541,18 +586,21 @@ static void perf_event__hdr_attr_swap(union perf_event *event) | |||
541 | mem_bswap_64(event->attr.id, size); | 586 | mem_bswap_64(event->attr.id, size); |
542 | } | 587 | } |
543 | 588 | ||
544 | static void perf_event__event_type_swap(union perf_event *event) | 589 | static void perf_event__event_type_swap(union perf_event *event, |
590 | bool sample_id_all __used) | ||
545 | { | 591 | { |
546 | event->event_type.event_type.event_id = | 592 | event->event_type.event_type.event_id = |
547 | bswap_64(event->event_type.event_type.event_id); | 593 | bswap_64(event->event_type.event_type.event_id); |
548 | } | 594 | } |
549 | 595 | ||
550 | static void perf_event__tracing_data_swap(union perf_event *event) | 596 | static void perf_event__tracing_data_swap(union perf_event *event, |
597 | bool sample_id_all __used) | ||
551 | { | 598 | { |
552 | event->tracing_data.size = bswap_32(event->tracing_data.size); | 599 | event->tracing_data.size = bswap_32(event->tracing_data.size); |
553 | } | 600 | } |
554 | 601 | ||
555 | typedef void (*perf_event__swap_op)(union perf_event *event); | 602 | typedef void (*perf_event__swap_op)(union perf_event *event, |
603 | bool sample_id_all); | ||
556 | 604 | ||
557 | static perf_event__swap_op perf_event__swap_ops[] = { | 605 | static perf_event__swap_op perf_event__swap_ops[] = { |
558 | [PERF_RECORD_MMAP] = perf_event__mmap_swap, | 606 | [PERF_RECORD_MMAP] = perf_event__mmap_swap, |
@@ -986,6 +1034,15 @@ static int perf_session__process_user_event(struct perf_session *session, union | |||
986 | } | 1034 | } |
987 | } | 1035 | } |
988 | 1036 | ||
1037 | static void event_swap(union perf_event *event, bool sample_id_all) | ||
1038 | { | ||
1039 | perf_event__swap_op swap; | ||
1040 | |||
1041 | swap = perf_event__swap_ops[event->header.type]; | ||
1042 | if (swap) | ||
1043 | swap(event, sample_id_all); | ||
1044 | } | ||
1045 | |||
989 | static int perf_session__process_event(struct perf_session *session, | 1046 | static int perf_session__process_event(struct perf_session *session, |
990 | union perf_event *event, | 1047 | union perf_event *event, |
991 | struct perf_tool *tool, | 1048 | struct perf_tool *tool, |
@@ -994,9 +1051,8 @@ static int perf_session__process_event(struct perf_session *session, | |||
994 | struct perf_sample sample; | 1051 | struct perf_sample sample; |
995 | int ret; | 1052 | int ret; |
996 | 1053 | ||
997 | if (session->header.needs_swap && | 1054 | if (session->header.needs_swap) |
998 | perf_event__swap_ops[event->header.type]) | 1055 | event_swap(event, session->sample_id_all); |
999 | perf_event__swap_ops[event->header.type](event); | ||
1000 | 1056 | ||
1001 | if (event->header.type >= PERF_RECORD_HEADER_MAX) | 1057 | if (event->header.type >= PERF_RECORD_HEADER_MAX) |
1002 | return -EINVAL; | 1058 | return -EINVAL; |
@@ -1428,7 +1484,6 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | |||
1428 | int print_sym, int print_dso, int print_symoffset) | 1484 | int print_sym, int print_dso, int print_symoffset) |
1429 | { | 1485 | { |
1430 | struct addr_location al; | 1486 | struct addr_location al; |
1431 | struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; | ||
1432 | struct callchain_cursor_node *node; | 1487 | struct callchain_cursor_node *node; |
1433 | 1488 | ||
1434 | if (perf_event__preprocess_sample(event, machine, &al, sample, | 1489 | if (perf_event__preprocess_sample(event, machine, &al, sample, |
@@ -1446,10 +1501,10 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | |||
1446 | error("Failed to resolve callchain. Skipping\n"); | 1501 | error("Failed to resolve callchain. Skipping\n"); |
1447 | return; | 1502 | return; |
1448 | } | 1503 | } |
1449 | callchain_cursor_commit(cursor); | 1504 | callchain_cursor_commit(&callchain_cursor); |
1450 | 1505 | ||
1451 | while (1) { | 1506 | while (1) { |
1452 | node = callchain_cursor_current(cursor); | 1507 | node = callchain_cursor_current(&callchain_cursor); |
1453 | if (!node) | 1508 | if (!node) |
1454 | break; | 1509 | break; |
1455 | 1510 | ||
@@ -1460,12 +1515,12 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | |||
1460 | } | 1515 | } |
1461 | if (print_dso) { | 1516 | if (print_dso) { |
1462 | printf(" ("); | 1517 | printf(" ("); |
1463 | map__fprintf_dsoname(al.map, stdout); | 1518 | map__fprintf_dsoname(node->map, stdout); |
1464 | printf(")"); | 1519 | printf(")"); |
1465 | } | 1520 | } |
1466 | printf("\n"); | 1521 | printf("\n"); |
1467 | 1522 | ||
1468 | callchain_cursor_advance(cursor); | 1523 | callchain_cursor_advance(&callchain_cursor); |
1469 | } | 1524 | } |
1470 | 1525 | ||
1471 | } else { | 1526 | } else { |
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e2ba8858f3e1..3e2e5ea0f03f 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
@@ -323,6 +323,7 @@ struct dso *dso__new(const char *name) | |||
323 | dso->sorted_by_name = 0; | 323 | dso->sorted_by_name = 0; |
324 | dso->has_build_id = 0; | 324 | dso->has_build_id = 0; |
325 | dso->kernel = DSO_TYPE_USER; | 325 | dso->kernel = DSO_TYPE_USER; |
326 | dso->needs_swap = DSO_SWAP__UNSET; | ||
326 | INIT_LIST_HEAD(&dso->node); | 327 | INIT_LIST_HEAD(&dso->node); |
327 | } | 328 | } |
328 | 329 | ||
@@ -1156,6 +1157,33 @@ static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) | |||
1156 | return -1; | 1157 | return -1; |
1157 | } | 1158 | } |
1158 | 1159 | ||
1160 | static int dso__swap_init(struct dso *dso, unsigned char eidata) | ||
1161 | { | ||
1162 | static unsigned int const endian = 1; | ||
1163 | |||
1164 | dso->needs_swap = DSO_SWAP__NO; | ||
1165 | |||
1166 | switch (eidata) { | ||
1167 | case ELFDATA2LSB: | ||
1168 | /* We are big endian, DSO is little endian. */ | ||
1169 | if (*(unsigned char const *)&endian != 1) | ||
1170 | dso->needs_swap = DSO_SWAP__YES; | ||
1171 | break; | ||
1172 | |||
1173 | case ELFDATA2MSB: | ||
1174 | /* We are little endian, DSO is big endian. */ | ||
1175 | if (*(unsigned char const *)&endian != 0) | ||
1176 | dso->needs_swap = DSO_SWAP__YES; | ||
1177 | break; | ||
1178 | |||
1179 | default: | ||
1180 | pr_err("unrecognized DSO data encoding %d\n", eidata); | ||
1181 | return -EINVAL; | ||
1182 | } | ||
1183 | |||
1184 | return 0; | ||
1185 | } | ||
1186 | |||
1159 | static int dso__load_sym(struct dso *dso, struct map *map, const char *name, | 1187 | static int dso__load_sym(struct dso *dso, struct map *map, const char *name, |
1160 | int fd, symbol_filter_t filter, int kmodule, | 1188 | int fd, symbol_filter_t filter, int kmodule, |
1161 | int want_symtab) | 1189 | int want_symtab) |
@@ -1187,6 +1215,9 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, | |||
1187 | goto out_elf_end; | 1215 | goto out_elf_end; |
1188 | } | 1216 | } |
1189 | 1217 | ||
1218 | if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) | ||
1219 | goto out_elf_end; | ||
1220 | |||
1190 | /* Always reject images with a mismatched build-id: */ | 1221 | /* Always reject images with a mismatched build-id: */ |
1191 | if (dso->has_build_id) { | 1222 | if (dso->has_build_id) { |
1192 | u8 build_id[BUILD_ID_SIZE]; | 1223 | u8 build_id[BUILD_ID_SIZE]; |
@@ -1272,7 +1303,7 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, | |||
1272 | if (opdsec && sym.st_shndx == opdidx) { | 1303 | if (opdsec && sym.st_shndx == opdidx) { |
1273 | u32 offset = sym.st_value - opdshdr.sh_addr; | 1304 | u32 offset = sym.st_value - opdshdr.sh_addr; |
1274 | u64 *opd = opddata->d_buf + offset; | 1305 | u64 *opd = opddata->d_buf + offset; |
1275 | sym.st_value = *opd; | 1306 | sym.st_value = DSO__SWAP(dso, u64, *opd); |
1276 | sym.st_shndx = elf_addr_to_index(elf, sym.st_value); | 1307 | sym.st_shndx = elf_addr_to_index(elf, sym.st_value); |
1277 | } | 1308 | } |
1278 | 1309 | ||
@@ -2786,8 +2817,11 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type, | |||
2786 | 2817 | ||
2787 | struct map *dso__new_map(const char *name) | 2818 | struct map *dso__new_map(const char *name) |
2788 | { | 2819 | { |
2820 | struct map *map = NULL; | ||
2789 | struct dso *dso = dso__new(name); | 2821 | struct dso *dso = dso__new(name); |
2790 | struct map *map = map__new2(0, dso, MAP__FUNCTION); | 2822 | |
2823 | if (dso) | ||
2824 | map = map__new2(0, dso, MAP__FUNCTION); | ||
2791 | 2825 | ||
2792 | return map; | 2826 | return map; |
2793 | } | 2827 | } |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5649d63798cb..af0752b1aca1 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/list.h> | 9 | #include <linux/list.h> |
10 | #include <linux/rbtree.h> | 10 | #include <linux/rbtree.h> |
11 | #include <stdio.h> | 11 | #include <stdio.h> |
12 | #include <byteswap.h> | ||
12 | 13 | ||
13 | #ifdef HAVE_CPLUS_DEMANGLE | 14 | #ifdef HAVE_CPLUS_DEMANGLE |
14 | extern char *cplus_demangle(const char *, int); | 15 | extern char *cplus_demangle(const char *, int); |
@@ -160,11 +161,18 @@ enum dso_kernel_type { | |||
160 | DSO_TYPE_GUEST_KERNEL | 161 | DSO_TYPE_GUEST_KERNEL |
161 | }; | 162 | }; |
162 | 163 | ||
164 | enum dso_swap_type { | ||
165 | DSO_SWAP__UNSET, | ||
166 | DSO_SWAP__NO, | ||
167 | DSO_SWAP__YES, | ||
168 | }; | ||
169 | |||
163 | struct dso { | 170 | struct dso { |
164 | struct list_head node; | 171 | struct list_head node; |
165 | struct rb_root symbols[MAP__NR_TYPES]; | 172 | struct rb_root symbols[MAP__NR_TYPES]; |
166 | struct rb_root symbol_names[MAP__NR_TYPES]; | 173 | struct rb_root symbol_names[MAP__NR_TYPES]; |
167 | enum dso_kernel_type kernel; | 174 | enum dso_kernel_type kernel; |
175 | enum dso_swap_type needs_swap; | ||
168 | u8 adjust_symbols:1; | 176 | u8 adjust_symbols:1; |
169 | u8 has_build_id:1; | 177 | u8 has_build_id:1; |
170 | u8 hit:1; | 178 | u8 hit:1; |
@@ -182,6 +190,28 @@ struct dso { | |||
182 | char name[0]; | 190 | char name[0]; |
183 | }; | 191 | }; |
184 | 192 | ||
193 | #define DSO__SWAP(dso, type, val) \ | ||
194 | ({ \ | ||
195 | type ____r = val; \ | ||
196 | BUG_ON(dso->needs_swap == DSO_SWAP__UNSET); \ | ||
197 | if (dso->needs_swap == DSO_SWAP__YES) { \ | ||
198 | switch (sizeof(____r)) { \ | ||
199 | case 2: \ | ||
200 | ____r = bswap_16(val); \ | ||
201 | break; \ | ||
202 | case 4: \ | ||
203 | ____r = bswap_32(val); \ | ||
204 | break; \ | ||
205 | case 8: \ | ||
206 | ____r = bswap_64(val); \ | ||
207 | break; \ | ||
208 | default: \ | ||
209 | BUG_ON(1); \ | ||
210 | } \ | ||
211 | } \ | ||
212 | ____r; \ | ||
213 | }) | ||
214 | |||
185 | struct dso *dso__new(const char *name); | 215 | struct dso *dso__new(const char *name); |
186 | void dso__delete(struct dso *dso); | 216 | void dso__delete(struct dso *dso); |
187 | 217 | ||