aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/uaccess.h12
-rw-r--r--arch/x86/kernel/cpu/perf_event.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c145
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c9
-rw-r--r--arch/x86/lib/usercopy.c4
-rw-r--r--arch/x86/lib/x86-opcode-map.txt8
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk14
-rw-r--r--include/linux/perf_event.h4
-rw-r--r--kernel/events/core.c1
-rw-r--r--tools/perf/MANIFEST2
-rw-r--r--tools/perf/builtin-report.c4
-rw-r--r--tools/perf/builtin-stat.c8
-rw-r--r--tools/perf/builtin-top.c2
-rw-r--r--tools/perf/design.txt7
-rw-r--r--tools/perf/ui/browsers/annotate.c2
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN2
-rw-r--r--tools/perf/util/callchain.c2
-rw-r--r--tools/perf/util/callchain.h2
-rw-r--r--tools/perf/util/evlist.c17
-rw-r--r--tools/perf/util/evlist.h4
-rw-r--r--tools/perf/util/evsel.c29
-rw-r--r--tools/perf/util/hist.c7
-rw-r--r--tools/perf/util/hist.h2
-rw-r--r--tools/perf/util/pager.c4
-rw-r--r--tools/perf/util/probe-event.c8
-rw-r--r--tools/perf/util/session.c97
-rw-r--r--tools/perf/util/symbol.c38
-rw-r--r--tools/perf/util/symbol.h30
29 files changed, 357 insertions, 120 deletions
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 04cd6882308e..e1f3a17034fc 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -33,9 +33,8 @@
33#define segment_eq(a, b) ((a).seg == (b).seg) 33#define segment_eq(a, b) ((a).seg == (b).seg)
34 34
35#define user_addr_max() (current_thread_info()->addr_limit.seg) 35#define user_addr_max() (current_thread_info()->addr_limit.seg)
36#define __addr_ok(addr) \ 36#define __addr_ok(addr) \
37 ((unsigned long __force)(addr) < \ 37 ((unsigned long __force)(addr) < user_addr_max())
38 (current_thread_info()->addr_limit.seg))
39 38
40/* 39/*
41 * Test whether a block of memory is a valid user space address. 40 * Test whether a block of memory is a valid user space address.
@@ -47,14 +46,14 @@
47 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... 46 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
48 */ 47 */
49 48
50#define __range_not_ok(addr, size) \ 49#define __range_not_ok(addr, size, limit) \
51({ \ 50({ \
52 unsigned long flag, roksum; \ 51 unsigned long flag, roksum; \
53 __chk_user_ptr(addr); \ 52 __chk_user_ptr(addr); \
54 asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ 53 asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \
55 : "=&r" (flag), "=r" (roksum) \ 54 : "=&r" (flag), "=r" (roksum) \
56 : "1" (addr), "g" ((long)(size)), \ 55 : "1" (addr), "g" ((long)(size)), \
57 "rm" (current_thread_info()->addr_limit.seg)); \ 56 "rm" (limit)); \
58 flag; \ 57 flag; \
59}) 58})
60 59
@@ -77,7 +76,8 @@
77 * checks that the pointer is in the user space range - after calling 76 * checks that the pointer is in the user space range - after calling
78 * this function, memory access functions may still return -EFAULT. 77 * this function, memory access functions may still return -EFAULT.
79 */ 78 */
80#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) 79#define access_ok(type, addr, size) \
80 (likely(__range_not_ok(addr, size, user_addr_max()) == 0))
81 81
82/* 82/*
83 * The exception table consists of pairs of addresses relative to the 83 * The exception table consists of pairs of addresses relative to the
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6da0183..c4706cf9c011 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
1496 if (!cpuc->shared_regs) 1496 if (!cpuc->shared_regs)
1497 goto error; 1497 goto error;
1498 } 1498 }
1499 cpuc->is_fake = 1;
1499 return cpuc; 1500 return cpuc;
1500error: 1501error:
1501 free_fake_cpuc(cpuc); 1502 free_fake_cpuc(cpuc);
@@ -1756,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1756 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); 1757 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1757} 1758}
1758 1759
1760static inline int
1761valid_user_frame(const void __user *fp, unsigned long size)
1762{
1763 return (__range_not_ok(fp, size, TASK_SIZE) == 0);
1764}
1765
1759#ifdef CONFIG_COMPAT 1766#ifdef CONFIG_COMPAT
1760 1767
1761#include <asm/compat.h> 1768#include <asm/compat.h>
@@ -1780,7 +1787,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1780 if (bytes != sizeof(frame)) 1787 if (bytes != sizeof(frame))
1781 break; 1788 break;
1782 1789
1783 if (fp < compat_ptr(regs->sp)) 1790 if (!valid_user_frame(fp, sizeof(frame)))
1784 break; 1791 break;
1785 1792
1786 perf_callchain_store(entry, frame.return_address); 1793 perf_callchain_store(entry, frame.return_address);
@@ -1826,7 +1833,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1826 if (bytes != sizeof(frame)) 1833 if (bytes != sizeof(frame))
1827 break; 1834 break;
1828 1835
1829 if ((unsigned long)fp < regs->sp) 1836 if (!valid_user_frame(fp, sizeof(frame)))
1830 break; 1837 break;
1831 1838
1832 perf_callchain_store(entry, frame.return_address); 1839 perf_callchain_store(entry, frame.return_address);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf54493..7241e2fc3c17 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
117 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 117 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
118 118
119 unsigned int group_flag; 119 unsigned int group_flag;
120 int is_fake;
120 121
121 /* 122 /*
122 * Intel DebugStore bits 123 * Intel DebugStore bits
@@ -364,6 +365,7 @@ struct x86_pmu {
364 int pebs_record_size; 365 int pebs_record_size;
365 void (*drain_pebs)(struct pt_regs *regs); 366 void (*drain_pebs)(struct pt_regs *regs);
366 struct event_constraint *pebs_constraints; 367 struct event_constraint *pebs_constraints;
368 void (*pebs_aliases)(struct perf_event *event);
367 369
368 /* 370 /*
369 * Intel LBR 371 * Intel LBR
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546ec6aef..187c294bc658 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
1119 return NULL; 1119 return NULL;
1120} 1120}
1121 1121
1122static bool intel_try_alt_er(struct perf_event *event, int orig_idx) 1122static int intel_alt_er(int idx)
1123{ 1123{
1124 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) 1124 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
1125 return false; 1125 return idx;
1126 1126
1127 if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { 1127 if (idx == EXTRA_REG_RSP_0)
1128 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1128 return EXTRA_REG_RSP_1;
1129 event->hw.config |= 0x01bb; 1129
1130 event->hw.extra_reg.idx = EXTRA_REG_RSP_1; 1130 if (idx == EXTRA_REG_RSP_1)
1131 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; 1131 return EXTRA_REG_RSP_0;
1132 } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { 1132
1133 return idx;
1134}
1135
1136static void intel_fixup_er(struct perf_event *event, int idx)
1137{
1138 event->hw.extra_reg.idx = idx;
1139
1140 if (idx == EXTRA_REG_RSP_0) {
1133 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1141 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1134 event->hw.config |= 0x01b7; 1142 event->hw.config |= 0x01b7;
1135 event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
1136 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; 1143 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
1144 } else if (idx == EXTRA_REG_RSP_1) {
1145 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1146 event->hw.config |= 0x01bb;
1147 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
1137 } 1148 }
1138
1139 if (event->hw.extra_reg.idx == orig_idx)
1140 return false;
1141
1142 return true;
1143} 1149}
1144 1150
1145/* 1151/*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1157 struct event_constraint *c = &emptyconstraint; 1163 struct event_constraint *c = &emptyconstraint;
1158 struct er_account *era; 1164 struct er_account *era;
1159 unsigned long flags; 1165 unsigned long flags;
1160 int orig_idx = reg->idx; 1166 int idx = reg->idx;
1161 1167
1162 /* already allocated shared msr */ 1168 /*
1163 if (reg->alloc) 1169 * reg->alloc can be set due to existing state, so for fake cpuc we
1170 * need to ignore this, otherwise we might fail to allocate proper fake
1171 * state for this extra reg constraint. Also see the comment below.
1172 */
1173 if (reg->alloc && !cpuc->is_fake)
1164 return NULL; /* call x86_get_event_constraint() */ 1174 return NULL; /* call x86_get_event_constraint() */
1165 1175
1166again: 1176again:
1167 era = &cpuc->shared_regs->regs[reg->idx]; 1177 era = &cpuc->shared_regs->regs[idx];
1168 /* 1178 /*
1169 * we use spin_lock_irqsave() to avoid lockdep issues when 1179 * we use spin_lock_irqsave() to avoid lockdep issues when
1170 * passing a fake cpuc 1180 * passing a fake cpuc
@@ -1173,6 +1183,29 @@ again:
1173 1183
1174 if (!atomic_read(&era->ref) || era->config == reg->config) { 1184 if (!atomic_read(&era->ref) || era->config == reg->config) {
1175 1185
1186 /*
1187 * If its a fake cpuc -- as per validate_{group,event}() we
1188 * shouldn't touch event state and we can avoid doing so
1189 * since both will only call get_event_constraints() once
1190 * on each event, this avoids the need for reg->alloc.
1191 *
1192 * Not doing the ER fixup will only result in era->reg being
1193 * wrong, but since we won't actually try and program hardware
1194 * this isn't a problem either.
1195 */
1196 if (!cpuc->is_fake) {
1197 if (idx != reg->idx)
1198 intel_fixup_er(event, idx);
1199
1200 /*
1201 * x86_schedule_events() can call get_event_constraints()
1202 * multiple times on events in the case of incremental
1203 * scheduling(). reg->alloc ensures we only do the ER
1204 * allocation once.
1205 */
1206 reg->alloc = 1;
1207 }
1208
1176 /* lock in msr value */ 1209 /* lock in msr value */
1177 era->config = reg->config; 1210 era->config = reg->config;
1178 era->reg = reg->reg; 1211 era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ again:
1180 /* one more user */ 1213 /* one more user */
1181 atomic_inc(&era->ref); 1214 atomic_inc(&era->ref);
1182 1215
1183 /* no need to reallocate during incremental event scheduling */
1184 reg->alloc = 1;
1185
1186 /* 1216 /*
1187 * need to call x86_get_event_constraint() 1217 * need to call x86_get_event_constraint()
1188 * to check if associated event has constraints 1218 * to check if associated event has constraints
1189 */ 1219 */
1190 c = NULL; 1220 c = NULL;
1191 } else if (intel_try_alt_er(event, orig_idx)) { 1221 } else {
1192 raw_spin_unlock_irqrestore(&era->lock, flags); 1222 idx = intel_alt_er(idx);
1193 goto again; 1223 if (idx != reg->idx) {
1224 raw_spin_unlock_irqrestore(&era->lock, flags);
1225 goto again;
1226 }
1194 } 1227 }
1195 raw_spin_unlock_irqrestore(&era->lock, flags); 1228 raw_spin_unlock_irqrestore(&era->lock, flags);
1196 1229
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1204 struct er_account *era; 1237 struct er_account *era;
1205 1238
1206 /* 1239 /*
1207 * only put constraint if extra reg was actually 1240 * Only put constraint if extra reg was actually allocated. Also takes
1208 * allocated. Also takes care of event which do 1241 * care of event which do not use an extra shared reg.
1209 * not use an extra shared reg 1242 *
1243 * Also, if this is a fake cpuc we shouldn't touch any event state
1244 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
1245 * either since it'll be thrown out.
1210 */ 1246 */
1211 if (!reg->alloc) 1247 if (!reg->alloc || cpuc->is_fake)
1212 return; 1248 return;
1213 1249
1214 era = &cpuc->shared_regs->regs[reg->idx]; 1250 era = &cpuc->shared_regs->regs[reg->idx];
@@ -1300,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1300 intel_put_shared_regs_event_constraints(cpuc, event); 1336 intel_put_shared_regs_event_constraints(cpuc, event);
1301} 1337}
1302 1338
1303static int intel_pmu_hw_config(struct perf_event *event) 1339static void intel_pebs_aliases_core2(struct perf_event *event)
1304{ 1340{
1305 int ret = x86_pmu_hw_config(event); 1341 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1306
1307 if (ret)
1308 return ret;
1309
1310 if (event->attr.precise_ip &&
1311 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1312 /* 1342 /*
1313 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 1343 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1314 * (0x003c) so that we can use it with PEBS. 1344 * (0x003c) so that we can use it with PEBS.
@@ -1329,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event)
1329 */ 1359 */
1330 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); 1360 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
1331 1361
1362 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1363 event->hw.config = alt_config;
1364 }
1365}
1366
1367static void intel_pebs_aliases_snb(struct perf_event *event)
1368{
1369 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1370 /*
1371 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1372 * (0x003c) so that we can use it with PEBS.
1373 *
1374 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
1375 * PEBS capable. However we can use UOPS_RETIRED.ALL
1376 * (0x01c2), which is a PEBS capable event, to get the same
1377 * count.
1378 *
1379 * UOPS_RETIRED.ALL counts the number of cycles that retires
1380 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
1381 * larger than the maximum number of micro-ops that can be
1382 * retired per cycle (4) and then inverting the condition, we
1383 * count all cycles that retire 16 or less micro-ops, which
1384 * is every cycle.
1385 *
1386 * Thereby we gain a PEBS capable cycle counter.
1387 */
1388 u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
1332 1389
1333 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 1390 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1334 event->hw.config = alt_config; 1391 event->hw.config = alt_config;
1335 } 1392 }
1393}
1394
1395static int intel_pmu_hw_config(struct perf_event *event)
1396{
1397 int ret = x86_pmu_hw_config(event);
1398
1399 if (ret)
1400 return ret;
1401
1402 if (event->attr.precise_ip && x86_pmu.pebs_aliases)
1403 x86_pmu.pebs_aliases(event);
1336 1404
1337 if (intel_pmu_needs_lbr_smpl(event)) { 1405 if (intel_pmu_needs_lbr_smpl(event)) {
1338 ret = intel_pmu_setup_lbr_filter(event); 1406 ret = intel_pmu_setup_lbr_filter(event);
@@ -1607,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1607 .max_period = (1ULL << 31) - 1, 1675 .max_period = (1ULL << 31) - 1,
1608 .get_event_constraints = intel_get_event_constraints, 1676 .get_event_constraints = intel_get_event_constraints,
1609 .put_event_constraints = intel_put_event_constraints, 1677 .put_event_constraints = intel_put_event_constraints,
1678 .pebs_aliases = intel_pebs_aliases_core2,
1610 1679
1611 .format_attrs = intel_arch3_formats_attr, 1680 .format_attrs = intel_arch3_formats_attr,
1612 1681
@@ -1840,8 +1909,9 @@ __init int intel_pmu_init(void)
1840 break; 1909 break;
1841 1910
1842 case 42: /* SandyBridge */ 1911 case 42: /* SandyBridge */
1843 x86_add_quirk(intel_sandybridge_quirk);
1844 case 45: /* SandyBridge, "Romely-EP" */ 1912 case 45: /* SandyBridge, "Romely-EP" */
1913 x86_add_quirk(intel_sandybridge_quirk);
1914 case 58: /* IvyBridge */
1845 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1915 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1846 sizeof(hw_cache_event_ids)); 1916 sizeof(hw_cache_event_ids));
1847 1917
@@ -1849,6 +1919,7 @@ __init int intel_pmu_init(void)
1849 1919
1850 x86_pmu.event_constraints = intel_snb_event_constraints; 1920 x86_pmu.event_constraints = intel_snb_event_constraints;
1851 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 1921 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
1922 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
1852 x86_pmu.extra_regs = intel_snb_extra_regs; 1923 x86_pmu.extra_regs = intel_snb_extra_regs;
1853 /* all extra regs are per-cpu when HT is on */ 1924 /* all extra regs are per-cpu when HT is on */
1854 x86_pmu.er_flags |= ERF_HAS_RSP_1; 1925 x86_pmu.er_flags |= ERF_HAS_RSP_1;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 5a3edc27f6e5..35e2192df9f4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
403 INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ 403 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
404 INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
405 INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
406 INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
407 INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
408 INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
409 INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
410 INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
411 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 404 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
412 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 405 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
413 INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ 406 INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index f61ee67ec00f..677b1ed184c9 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -8,6 +8,7 @@
8#include <linux/module.h> 8#include <linux/module.h>
9 9
10#include <asm/word-at-a-time.h> 10#include <asm/word-at-a-time.h>
11#include <linux/sched.h>
11 12
12/* 13/*
13 * best effort, GUP based copy_from_user() that is NMI-safe 14 * best effort, GUP based copy_from_user() that is NMI-safe
@@ -21,6 +22,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
21 void *map; 22 void *map;
22 int ret; 23 int ret;
23 24
25 if (__range_not_ok(from, n, TASK_SIZE) == 0)
26 return len;
27
24 do { 28 do {
25 ret = __get_user_pages_fast(addr, 1, 0, &page); 29 ret = __get_user_pages_fast(addr, 1, 0, &page);
26 if (!ret) 30 if (!ret)
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 819137904428..5d7e51f3fd28 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -28,7 +28,7 @@
28# - (66): the last prefix is 0x66 28# - (66): the last prefix is 0x66
29# - (F3): the last prefix is 0xF3 29# - (F3): the last prefix is 0xF3
30# - (F2): the last prefix is 0xF2 30# - (F2): the last prefix is 0xF2
31# 31# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
32 32
33Table: one byte opcode 33Table: one byte opcode
34Referrer: 34Referrer:
@@ -515,12 +515,12 @@ b4: LFS Gv,Mp
515b5: LGS Gv,Mp 515b5: LGS Gv,Mp
516b6: MOVZX Gv,Eb 516b6: MOVZX Gv,Eb
517b7: MOVZX Gv,Ew 517b7: MOVZX Gv,Ew
518b8: JMPE | POPCNT Gv,Ev (F3) 518b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
519b9: Grp10 (1A) 519b9: Grp10 (1A)
520ba: Grp8 Ev,Ib (1A) 520ba: Grp8 Ev,Ib (1A)
521bb: BTC Ev,Gv 521bb: BTC Ev,Gv
522bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) 522bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
523bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) 523bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
524be: MOVSX Gv,Eb 524be: MOVSX Gv,Eb
525bf: MOVSX Gv,Ew 525bf: MOVSX Gv,Ew
526# 0x0f 0xc0-0xcf 526# 0x0f 0xc0-0xcf
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index 5f6a5b6c3a15..ddcf39b1a18d 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -66,9 +66,10 @@ BEGIN {
66 rex_expr = "^REX(\\.[XRWB]+)*" 66 rex_expr = "^REX(\\.[XRWB]+)*"
67 fpu_expr = "^ESC" # TODO 67 fpu_expr = "^ESC" # TODO
68 68
69 lprefix1_expr = "\\(66\\)" 69 lprefix1_expr = "\\((66|!F3)\\)"
70 lprefix2_expr = "\\(F3\\)" 70 lprefix2_expr = "\\(F3\\)"
71 lprefix3_expr = "\\(F2\\)" 71 lprefix3_expr = "\\((F2|!F3)\\)"
72 lprefix_expr = "\\((66|F2|F3)\\)"
72 max_lprefix = 4 73 max_lprefix = 4
73 74
74 # All opcodes starting with lower-case 'v' or with (v1) superscript 75 # All opcodes starting with lower-case 'v' or with (v1) superscript
@@ -333,13 +334,16 @@ function convert_operands(count,opnd, i,j,imm,mod)
333 if (match(ext, lprefix1_expr)) { 334 if (match(ext, lprefix1_expr)) {
334 lptable1[idx] = add_flags(lptable1[idx],flags) 335 lptable1[idx] = add_flags(lptable1[idx],flags)
335 variant = "INAT_VARIANT" 336 variant = "INAT_VARIANT"
336 } else if (match(ext, lprefix2_expr)) { 337 }
338 if (match(ext, lprefix2_expr)) {
337 lptable2[idx] = add_flags(lptable2[idx],flags) 339 lptable2[idx] = add_flags(lptable2[idx],flags)
338 variant = "INAT_VARIANT" 340 variant = "INAT_VARIANT"
339 } else if (match(ext, lprefix3_expr)) { 341 }
342 if (match(ext, lprefix3_expr)) {
340 lptable3[idx] = add_flags(lptable3[idx],flags) 343 lptable3[idx] = add_flags(lptable3[idx],flags)
341 variant = "INAT_VARIANT" 344 variant = "INAT_VARIANT"
342 } else { 345 }
346 if (!match(ext, lprefix_expr)){
343 table[idx] = add_flags(table[idx],flags) 347 table[idx] = add_flags(table[idx],flags)
344 } 348 }
345 } 349 }
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f32578634d9d..45db49f64bb4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -555,6 +555,8 @@ enum perf_event_type {
555 PERF_RECORD_MAX, /* non-ABI */ 555 PERF_RECORD_MAX, /* non-ABI */
556}; 556};
557 557
558#define PERF_MAX_STACK_DEPTH 127
559
558enum perf_callchain_context { 560enum perf_callchain_context {
559 PERF_CONTEXT_HV = (__u64)-32, 561 PERF_CONTEXT_HV = (__u64)-32,
560 PERF_CONTEXT_KERNEL = (__u64)-128, 562 PERF_CONTEXT_KERNEL = (__u64)-128,
@@ -609,8 +611,6 @@ struct perf_guest_info_callbacks {
609#include <linux/sysfs.h> 611#include <linux/sysfs.h>
610#include <asm/local.h> 612#include <asm/local.h>
611 613
612#define PERF_MAX_STACK_DEPTH 255
613
614struct perf_callchain_entry { 614struct perf_callchain_entry {
615 __u64 nr; 615 __u64 nr;
616 __u64 ip[PERF_MAX_STACK_DEPTH]; 616 __u64 ip[PERF_MAX_STACK_DEPTH];
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5b06cbbf6931..f85c0154b333 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3181,7 +3181,6 @@ static void perf_event_for_each(struct perf_event *event,
3181 event = event->group_leader; 3181 event = event->group_leader;
3182 3182
3183 perf_event_for_each_child(event, func); 3183 perf_event_for_each_child(event, func);
3184 func(event);
3185 list_for_each_entry(sibling, &event->sibling_list, group_entry) 3184 list_for_each_entry(sibling, &event->sibling_list, group_entry)
3186 perf_event_for_each_child(sibling, func); 3185 perf_event_for_each_child(sibling, func);
3187 mutex_unlock(&ctx->mutex); 3186 mutex_unlock(&ctx->mutex);
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 5476bc0a1eac..b4b572e8c100 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,4 +1,6 @@
1tools/perf 1tools/perf
2tools/scripts
3tools/lib/traceevent
2include/linux/const.h 4include/linux/const.h
3include/linux/perf_event.h 5include/linux/perf_event.h
4include/linux/rbtree.h 6include/linux/rbtree.h
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8c767c6bca91..25249f76329d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -152,7 +152,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
152 152
153 if (symbol_conf.use_callchain) { 153 if (symbol_conf.use_callchain) {
154 err = callchain_append(he->callchain, 154 err = callchain_append(he->callchain,
155 &evsel->hists.callchain_cursor, 155 &callchain_cursor,
156 sample->period); 156 sample->period);
157 if (err) 157 if (err)
158 return err; 158 return err;
@@ -162,7 +162,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
162 * so we don't allocated the extra space needed because the stdio 162 * so we don't allocated the extra space needed because the stdio
163 * code will not use it. 163 * code will not use it.
164 */ 164 */
165 if (al->sym != NULL && use_browser > 0) { 165 if (he->ms.sym != NULL && use_browser > 0) {
166 struct annotation *notes = symbol__annotation(he->ms.sym); 166 struct annotation *notes = symbol__annotation(he->ms.sym);
167 167
168 assert(evsel != NULL); 168 assert(evsel != NULL);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 62ae30d34fa6..262589991ea4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1129,7 +1129,7 @@ static int add_default_attributes(void)
1129 return 0; 1129 return 0;
1130 1130
1131 if (!evsel_list->nr_entries) { 1131 if (!evsel_list->nr_entries) {
1132 if (perf_evlist__add_attrs_array(evsel_list, default_attrs) < 0) 1132 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1133 return -1; 1133 return -1;
1134 } 1134 }
1135 1135
@@ -1139,21 +1139,21 @@ static int add_default_attributes(void)
1139 return 0; 1139 return 0;
1140 1140
1141 /* Append detailed run extra attributes: */ 1141 /* Append detailed run extra attributes: */
1142 if (perf_evlist__add_attrs_array(evsel_list, detailed_attrs) < 0) 1142 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1143 return -1; 1143 return -1;
1144 1144
1145 if (detailed_run < 2) 1145 if (detailed_run < 2)
1146 return 0; 1146 return 0;
1147 1147
1148 /* Append very detailed run extra attributes: */ 1148 /* Append very detailed run extra attributes: */
1149 if (perf_evlist__add_attrs_array(evsel_list, very_detailed_attrs) < 0) 1149 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1150 return -1; 1150 return -1;
1151 1151
1152 if (detailed_run < 3) 1152 if (detailed_run < 3)
1153 return 0; 1153 return 0;
1154 1154
1155 /* Append very, very detailed run extra attributes: */ 1155 /* Append very, very detailed run extra attributes: */
1156 return perf_evlist__add_attrs_array(evsel_list, very_very_detailed_attrs); 1156 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1157} 1157}
1158 1158
1159int cmd_stat(int argc, const char **argv, const char *prefix __used) 1159int cmd_stat(int argc, const char **argv, const char *prefix __used)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 871b540293e1..6bb0277b7dfe 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -787,7 +787,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
787 } 787 }
788 788
789 if (symbol_conf.use_callchain) { 789 if (symbol_conf.use_callchain) {
790 err = callchain_append(he->callchain, &evsel->hists.callchain_cursor, 790 err = callchain_append(he->callchain, &callchain_cursor,
791 sample->period); 791 sample->period);
792 if (err) 792 if (err)
793 return; 793 return;
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index bd0bb1b1279b..67e5d0cace85 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -409,14 +409,15 @@ Counters can be enabled and disabled in two ways: via ioctl and via
409prctl. When a counter is disabled, it doesn't count or generate 409prctl. When a counter is disabled, it doesn't count or generate
410events but does continue to exist and maintain its count value. 410events but does continue to exist and maintain its count value.
411 411
412An individual counter or counter group can be enabled with 412An individual counter can be enabled with
413 413
414 ioctl(fd, PERF_EVENT_IOC_ENABLE); 414 ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
415 415
416or disabled with 416or disabled with
417 417
418 ioctl(fd, PERF_EVENT_IOC_DISABLE); 418 ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
419 419
420For a counter group, pass PERF_IOC_FLAG_GROUP as the third argument.
420Enabling or disabling the leader of a group enables or disables the 421Enabling or disabling the leader of a group enables or disables the
421whole group; that is, while the group leader is disabled, none of the 422whole group; that is, while the group leader is disabled, none of the
422counters in the group will count. Enabling or disabling a member of a 423counters in the group will count. Enabling or disabling a member of a
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 4deea6aaf927..34b1c46eaf42 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -668,7 +668,7 @@ static int annotate_browser__run(struct annotate_browser *browser, int evidx,
668 "q/ESC/CTRL+C Exit\n\n" 668 "q/ESC/CTRL+C Exit\n\n"
669 "-> Go to target\n" 669 "-> Go to target\n"
670 "<- Exit\n" 670 "<- Exit\n"
671 "h Cycle thru hottest instructions\n" 671 "H Cycle thru hottest instructions\n"
672 "j Toggle showing jump to target arrows\n" 672 "j Toggle showing jump to target arrows\n"
673 "J Toggle showing number of jump sources on targets\n" 673 "J Toggle showing number of jump sources on targets\n"
674 "n Search next string\n" 674 "n Search next string\n"
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index ad73300f7bac..95264f304179 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -12,7 +12,7 @@ LF='
12# First check if there is a .git to get the version from git describe 12# First check if there is a .git to get the version from git describe
13# otherwise try to get the version from the kernel makefile 13# otherwise try to get the version from the kernel makefile
14if test -d ../../.git -o -f ../../.git && 14if test -d ../../.git -o -f ../../.git &&
15 VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && 15 VN=$(git describe --match 'v[0-9].[0-9]*' --abbrev=4 HEAD 2>/dev/null) &&
16 case "$VN" in 16 case "$VN" in
17 *$LF*) (exit 1) ;; 17 *$LF*) (exit 1) ;;
18 v[0-9]*) 18 v[0-9]*)
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 9f7106a8d9a4..3a6bff47614f 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -18,6 +18,8 @@
18#include "util.h" 18#include "util.h"
19#include "callchain.h" 19#include "callchain.h"
20 20
21__thread struct callchain_cursor callchain_cursor;
22
21bool ip_callchain__valid(struct ip_callchain *chain, 23bool ip_callchain__valid(struct ip_callchain *chain,
22 const union perf_event *event) 24 const union perf_event *event)
23{ 25{
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 7f9c0f1ae3a9..3bdb407f9cd9 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -76,6 +76,8 @@ struct callchain_cursor {
76 struct callchain_cursor_node *curr; 76 struct callchain_cursor_node *curr;
77}; 77};
78 78
79extern __thread struct callchain_cursor callchain_cursor;
80
79static inline void callchain_init(struct callchain_root *root) 81static inline void callchain_init(struct callchain_root *root)
80{ 82{
81 INIT_LIST_HEAD(&root->node.siblings); 83 INIT_LIST_HEAD(&root->node.siblings);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 4ac5f5ae4ce9..7400fb3fc50c 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -159,6 +159,17 @@ out_delete_partial_list:
159 return -1; 159 return -1;
160} 160}
161 161
162int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
163 struct perf_event_attr *attrs, size_t nr_attrs)
164{
165 size_t i;
166
167 for (i = 0; i < nr_attrs; i++)
168 event_attr_init(attrs + i);
169
170 return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
171}
172
162static int trace_event__id(const char *evname) 173static int trace_event__id(const char *evname)
163{ 174{
164 char *filename, *colon; 175 char *filename, *colon;
@@ -263,7 +274,8 @@ void perf_evlist__disable(struct perf_evlist *evlist)
263 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { 274 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
264 list_for_each_entry(pos, &evlist->entries, node) { 275 list_for_each_entry(pos, &evlist->entries, node) {
265 for (thread = 0; thread < evlist->threads->nr; thread++) 276 for (thread = 0; thread < evlist->threads->nr; thread++)
266 ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_DISABLE); 277 ioctl(FD(pos, cpu, thread),
278 PERF_EVENT_IOC_DISABLE, 0);
267 } 279 }
268 } 280 }
269} 281}
@@ -276,7 +288,8 @@ void perf_evlist__enable(struct perf_evlist *evlist)
276 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { 288 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
277 list_for_each_entry(pos, &evlist->entries, node) { 289 list_for_each_entry(pos, &evlist->entries, node) {
278 for (thread = 0; thread < evlist->threads->nr; thread++) 290 for (thread = 0; thread < evlist->threads->nr; thread++)
279 ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_ENABLE); 291 ioctl(FD(pos, cpu, thread),
292 PERF_EVENT_IOC_ENABLE, 0);
280 } 293 }
281 } 294 }
282} 295}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 58abb63ac13a..989bee9624c2 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -54,6 +54,8 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
54int perf_evlist__add_default(struct perf_evlist *evlist); 54int perf_evlist__add_default(struct perf_evlist *evlist);
55int perf_evlist__add_attrs(struct perf_evlist *evlist, 55int perf_evlist__add_attrs(struct perf_evlist *evlist,
56 struct perf_event_attr *attrs, size_t nr_attrs); 56 struct perf_event_attr *attrs, size_t nr_attrs);
57int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
58 struct perf_event_attr *attrs, size_t nr_attrs);
57int perf_evlist__add_tracepoints(struct perf_evlist *evlist, 59int perf_evlist__add_tracepoints(struct perf_evlist *evlist,
58 const char *tracepoints[], size_t nr_tracepoints); 60 const char *tracepoints[], size_t nr_tracepoints);
59int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, 61int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist,
@@ -62,6 +64,8 @@ int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist,
62 64
63#define perf_evlist__add_attrs_array(evlist, array) \ 65#define perf_evlist__add_attrs_array(evlist, array) \
64 perf_evlist__add_attrs(evlist, array, ARRAY_SIZE(array)) 66 perf_evlist__add_attrs(evlist, array, ARRAY_SIZE(array))
67#define perf_evlist__add_default_attrs(evlist, array) \
68 __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
65 69
66#define perf_evlist__add_tracepoints_array(evlist, array) \ 70#define perf_evlist__add_tracepoints_array(evlist, array) \
67 perf_evlist__add_tracepoints(evlist, array, ARRAY_SIZE(array)) 71 perf_evlist__add_tracepoints(evlist, array, ARRAY_SIZE(array))
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 91d19138f3ec..9f6cebd798ee 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -494,16 +494,24 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel,
494} 494}
495 495
496static int perf_event__parse_id_sample(const union perf_event *event, u64 type, 496static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
497 struct perf_sample *sample) 497 struct perf_sample *sample,
498 bool swapped)
498{ 499{
499 const u64 *array = event->sample.array; 500 const u64 *array = event->sample.array;
501 union u64_swap u;
500 502
501 array += ((event->header.size - 503 array += ((event->header.size -
502 sizeof(event->header)) / sizeof(u64)) - 1; 504 sizeof(event->header)) / sizeof(u64)) - 1;
503 505
504 if (type & PERF_SAMPLE_CPU) { 506 if (type & PERF_SAMPLE_CPU) {
505 u32 *p = (u32 *)array; 507 u.val64 = *array;
506 sample->cpu = *p; 508 if (swapped) {
509 /* undo swap of u64, then swap on individual u32s */
510 u.val64 = bswap_64(u.val64);
511 u.val32[0] = bswap_32(u.val32[0]);
512 }
513
514 sample->cpu = u.val32[0];
507 array--; 515 array--;
508 } 516 }
509 517
@@ -523,9 +531,16 @@ static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
523 } 531 }
524 532
525 if (type & PERF_SAMPLE_TID) { 533 if (type & PERF_SAMPLE_TID) {
526 u32 *p = (u32 *)array; 534 u.val64 = *array;
527 sample->pid = p[0]; 535 if (swapped) {
528 sample->tid = p[1]; 536 /* undo swap of u64, then swap on individual u32s */
537 u.val64 = bswap_64(u.val64);
538 u.val32[0] = bswap_32(u.val32[0]);
539 u.val32[1] = bswap_32(u.val32[1]);
540 }
541
542 sample->pid = u.val32[0];
543 sample->tid = u.val32[1];
529 } 544 }
530 545
531 return 0; 546 return 0;
@@ -562,7 +577,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
562 if (event->header.type != PERF_RECORD_SAMPLE) { 577 if (event->header.type != PERF_RECORD_SAMPLE) {
563 if (!sample_id_all) 578 if (!sample_id_all)
564 return 0; 579 return 0;
565 return perf_event__parse_id_sample(event, type, data); 580 return perf_event__parse_id_sample(event, type, data, swapped);
566 } 581 }
567 582
568 array = event->sample.array; 583 array = event->sample.array;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 1293b5ebea4d..514e2a4b367d 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -378,7 +378,7 @@ void hist_entry__free(struct hist_entry *he)
378 * collapse the histogram 378 * collapse the histogram
379 */ 379 */
380 380
381static bool hists__collapse_insert_entry(struct hists *hists, 381static bool hists__collapse_insert_entry(struct hists *hists __used,
382 struct rb_root *root, 382 struct rb_root *root,
383 struct hist_entry *he) 383 struct hist_entry *he)
384{ 384{
@@ -397,8 +397,9 @@ static bool hists__collapse_insert_entry(struct hists *hists,
397 iter->period += he->period; 397 iter->period += he->period;
398 iter->nr_events += he->nr_events; 398 iter->nr_events += he->nr_events;
399 if (symbol_conf.use_callchain) { 399 if (symbol_conf.use_callchain) {
400 callchain_cursor_reset(&hists->callchain_cursor); 400 callchain_cursor_reset(&callchain_cursor);
401 callchain_merge(&hists->callchain_cursor, iter->callchain, 401 callchain_merge(&callchain_cursor,
402 iter->callchain,
402 he->callchain); 403 he->callchain);
403 } 404 }
404 hist_entry__free(he); 405 hist_entry__free(he);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index cfc64e293f90..34bb556d6219 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -67,8 +67,6 @@ struct hists {
67 struct events_stats stats; 67 struct events_stats stats;
68 u64 event_stream; 68 u64 event_stream;
69 u16 col_len[HISTC_NR_COLS]; 69 u16 col_len[HISTC_NR_COLS];
70 /* Best would be to reuse the session callchain cursor */
71 struct callchain_cursor callchain_cursor;
72}; 70};
73 71
74struct hist_entry *__hists__add_entry(struct hists *self, 72struct hist_entry *__hists__add_entry(struct hists *self,
diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c
index 1915de20dcac..3322b8446e89 100644
--- a/tools/perf/util/pager.c
+++ b/tools/perf/util/pager.c
@@ -57,6 +57,10 @@ void setup_pager(void)
57 } 57 }
58 if (!pager) 58 if (!pager)
59 pager = getenv("PAGER"); 59 pager = getenv("PAGER");
60 if (!pager) {
61 if (!access("/usr/bin/pager", X_OK))
62 pager = "/usr/bin/pager";
63 }
60 if (!pager) 64 if (!pager)
61 pager = "less"; 65 pager = "less";
62 else if (!*pager || !strcmp(pager, "cat")) 66 else if (!*pager || !strcmp(pager, "cat"))
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 59dccc98b554..0dda25d82d06 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2164,16 +2164,12 @@ int del_perf_probe_events(struct strlist *dellist)
2164 2164
2165error: 2165error:
2166 if (kfd >= 0) { 2166 if (kfd >= 0) {
2167 if (namelist) 2167 strlist__delete(namelist);
2168 strlist__delete(namelist);
2169
2170 close(kfd); 2168 close(kfd);
2171 } 2169 }
2172 2170
2173 if (ufd >= 0) { 2171 if (ufd >= 0) {
2174 if (unamelist) 2172 strlist__delete(unamelist);
2175 strlist__delete(unamelist);
2176
2177 close(ufd); 2173 close(ufd);
2178 } 2174 }
2179 2175
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 93d355d27109..2600916efa83 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -288,7 +288,8 @@ struct branch_info *machine__resolve_bstack(struct machine *self,
288 return bi; 288 return bi;
289} 289}
290 290
291int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, 291int machine__resolve_callchain(struct machine *self,
292 struct perf_evsel *evsel __used,
292 struct thread *thread, 293 struct thread *thread,
293 struct ip_callchain *chain, 294 struct ip_callchain *chain,
294 struct symbol **parent) 295 struct symbol **parent)
@@ -297,7 +298,12 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
297 unsigned int i; 298 unsigned int i;
298 int err; 299 int err;
299 300
300 callchain_cursor_reset(&evsel->hists.callchain_cursor); 301 callchain_cursor_reset(&callchain_cursor);
302
303 if (chain->nr > PERF_MAX_STACK_DEPTH) {
304 pr_warning("corrupted callchain. skipping...\n");
305 return 0;
306 }
301 307
302 for (i = 0; i < chain->nr; i++) { 308 for (i = 0; i < chain->nr; i++) {
303 u64 ip; 309 u64 ip;
@@ -317,7 +323,14 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
317 case PERF_CONTEXT_USER: 323 case PERF_CONTEXT_USER:
318 cpumode = PERF_RECORD_MISC_USER; break; 324 cpumode = PERF_RECORD_MISC_USER; break;
319 default: 325 default:
320 break; 326 pr_debug("invalid callchain context: "
327 "%"PRId64"\n", (s64) ip);
328 /*
329 * It seems the callchain is corrupted.
330 * Discard all.
331 */
332 callchain_cursor_reset(&callchain_cursor);
333 return 0;
321 } 334 }
322 continue; 335 continue;
323 } 336 }
@@ -333,7 +346,7 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
333 break; 346 break;
334 } 347 }
335 348
336 err = callchain_cursor_append(&evsel->hists.callchain_cursor, 349 err = callchain_cursor_append(&callchain_cursor,
337 ip, al.map, al.sym); 350 ip, al.map, al.sym);
338 if (err) 351 if (err)
339 return err; 352 return err;
@@ -441,37 +454,65 @@ void mem_bswap_64(void *src, int byte_size)
441 } 454 }
442} 455}
443 456
444static void perf_event__all64_swap(union perf_event *event) 457static void swap_sample_id_all(union perf_event *event, void *data)
458{
459 void *end = (void *) event + event->header.size;
460 int size = end - data;
461
462 BUG_ON(size % sizeof(u64));
463 mem_bswap_64(data, size);
464}
465
466static void perf_event__all64_swap(union perf_event *event,
467 bool sample_id_all __used)
445{ 468{
446 struct perf_event_header *hdr = &event->header; 469 struct perf_event_header *hdr = &event->header;
447 mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); 470 mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
448} 471}
449 472
450static void perf_event__comm_swap(union perf_event *event) 473static void perf_event__comm_swap(union perf_event *event, bool sample_id_all)
451{ 474{
452 event->comm.pid = bswap_32(event->comm.pid); 475 event->comm.pid = bswap_32(event->comm.pid);
453 event->comm.tid = bswap_32(event->comm.tid); 476 event->comm.tid = bswap_32(event->comm.tid);
477
478 if (sample_id_all) {
479 void *data = &event->comm.comm;
480
481 data += ALIGN(strlen(data) + 1, sizeof(u64));
482 swap_sample_id_all(event, data);
483 }
454} 484}
455 485
456static void perf_event__mmap_swap(union perf_event *event) 486static void perf_event__mmap_swap(union perf_event *event,
487 bool sample_id_all)
457{ 488{
458 event->mmap.pid = bswap_32(event->mmap.pid); 489 event->mmap.pid = bswap_32(event->mmap.pid);
459 event->mmap.tid = bswap_32(event->mmap.tid); 490 event->mmap.tid = bswap_32(event->mmap.tid);
460 event->mmap.start = bswap_64(event->mmap.start); 491 event->mmap.start = bswap_64(event->mmap.start);
461 event->mmap.len = bswap_64(event->mmap.len); 492 event->mmap.len = bswap_64(event->mmap.len);
462 event->mmap.pgoff = bswap_64(event->mmap.pgoff); 493 event->mmap.pgoff = bswap_64(event->mmap.pgoff);
494
495 if (sample_id_all) {
496 void *data = &event->mmap.filename;
497
498 data += ALIGN(strlen(data) + 1, sizeof(u64));
499 swap_sample_id_all(event, data);
500 }
463} 501}
464 502
465static void perf_event__task_swap(union perf_event *event) 503static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
466{ 504{
467 event->fork.pid = bswap_32(event->fork.pid); 505 event->fork.pid = bswap_32(event->fork.pid);
468 event->fork.tid = bswap_32(event->fork.tid); 506 event->fork.tid = bswap_32(event->fork.tid);
469 event->fork.ppid = bswap_32(event->fork.ppid); 507 event->fork.ppid = bswap_32(event->fork.ppid);
470 event->fork.ptid = bswap_32(event->fork.ptid); 508 event->fork.ptid = bswap_32(event->fork.ptid);
471 event->fork.time = bswap_64(event->fork.time); 509 event->fork.time = bswap_64(event->fork.time);
510
511 if (sample_id_all)
512 swap_sample_id_all(event, &event->fork + 1);
472} 513}
473 514
474static void perf_event__read_swap(union perf_event *event) 515static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
475{ 516{
476 event->read.pid = bswap_32(event->read.pid); 517 event->read.pid = bswap_32(event->read.pid);
477 event->read.tid = bswap_32(event->read.tid); 518 event->read.tid = bswap_32(event->read.tid);
@@ -479,6 +520,9 @@ static void perf_event__read_swap(union perf_event *event)
479 event->read.time_enabled = bswap_64(event->read.time_enabled); 520 event->read.time_enabled = bswap_64(event->read.time_enabled);
480 event->read.time_running = bswap_64(event->read.time_running); 521 event->read.time_running = bswap_64(event->read.time_running);
481 event->read.id = bswap_64(event->read.id); 522 event->read.id = bswap_64(event->read.id);
523
524 if (sample_id_all)
525 swap_sample_id_all(event, &event->read + 1);
482} 526}
483 527
484static u8 revbyte(u8 b) 528static u8 revbyte(u8 b)
@@ -530,7 +574,8 @@ void perf_event__attr_swap(struct perf_event_attr *attr)
530 swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); 574 swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64));
531} 575}
532 576
533static void perf_event__hdr_attr_swap(union perf_event *event) 577static void perf_event__hdr_attr_swap(union perf_event *event,
578 bool sample_id_all __used)
534{ 579{
535 size_t size; 580 size_t size;
536 581
@@ -541,18 +586,21 @@ static void perf_event__hdr_attr_swap(union perf_event *event)
541 mem_bswap_64(event->attr.id, size); 586 mem_bswap_64(event->attr.id, size);
542} 587}
543 588
544static void perf_event__event_type_swap(union perf_event *event) 589static void perf_event__event_type_swap(union perf_event *event,
590 bool sample_id_all __used)
545{ 591{
546 event->event_type.event_type.event_id = 592 event->event_type.event_type.event_id =
547 bswap_64(event->event_type.event_type.event_id); 593 bswap_64(event->event_type.event_type.event_id);
548} 594}
549 595
550static void perf_event__tracing_data_swap(union perf_event *event) 596static void perf_event__tracing_data_swap(union perf_event *event,
597 bool sample_id_all __used)
551{ 598{
552 event->tracing_data.size = bswap_32(event->tracing_data.size); 599 event->tracing_data.size = bswap_32(event->tracing_data.size);
553} 600}
554 601
555typedef void (*perf_event__swap_op)(union perf_event *event); 602typedef void (*perf_event__swap_op)(union perf_event *event,
603 bool sample_id_all);
556 604
557static perf_event__swap_op perf_event__swap_ops[] = { 605static perf_event__swap_op perf_event__swap_ops[] = {
558 [PERF_RECORD_MMAP] = perf_event__mmap_swap, 606 [PERF_RECORD_MMAP] = perf_event__mmap_swap,
@@ -986,6 +1034,15 @@ static int perf_session__process_user_event(struct perf_session *session, union
986 } 1034 }
987} 1035}
988 1036
1037static void event_swap(union perf_event *event, bool sample_id_all)
1038{
1039 perf_event__swap_op swap;
1040
1041 swap = perf_event__swap_ops[event->header.type];
1042 if (swap)
1043 swap(event, sample_id_all);
1044}
1045
989static int perf_session__process_event(struct perf_session *session, 1046static int perf_session__process_event(struct perf_session *session,
990 union perf_event *event, 1047 union perf_event *event,
991 struct perf_tool *tool, 1048 struct perf_tool *tool,
@@ -994,9 +1051,8 @@ static int perf_session__process_event(struct perf_session *session,
994 struct perf_sample sample; 1051 struct perf_sample sample;
995 int ret; 1052 int ret;
996 1053
997 if (session->header.needs_swap && 1054 if (session->header.needs_swap)
998 perf_event__swap_ops[event->header.type]) 1055 event_swap(event, session->sample_id_all);
999 perf_event__swap_ops[event->header.type](event);
1000 1056
1001 if (event->header.type >= PERF_RECORD_HEADER_MAX) 1057 if (event->header.type >= PERF_RECORD_HEADER_MAX)
1002 return -EINVAL; 1058 return -EINVAL;
@@ -1428,7 +1484,6 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1428 int print_sym, int print_dso, int print_symoffset) 1484 int print_sym, int print_dso, int print_symoffset)
1429{ 1485{
1430 struct addr_location al; 1486 struct addr_location al;
1431 struct callchain_cursor *cursor = &evsel->hists.callchain_cursor;
1432 struct callchain_cursor_node *node; 1487 struct callchain_cursor_node *node;
1433 1488
1434 if (perf_event__preprocess_sample(event, machine, &al, sample, 1489 if (perf_event__preprocess_sample(event, machine, &al, sample,
@@ -1446,10 +1501,10 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1446 error("Failed to resolve callchain. Skipping\n"); 1501 error("Failed to resolve callchain. Skipping\n");
1447 return; 1502 return;
1448 } 1503 }
1449 callchain_cursor_commit(cursor); 1504 callchain_cursor_commit(&callchain_cursor);
1450 1505
1451 while (1) { 1506 while (1) {
1452 node = callchain_cursor_current(cursor); 1507 node = callchain_cursor_current(&callchain_cursor);
1453 if (!node) 1508 if (!node)
1454 break; 1509 break;
1455 1510
@@ -1460,12 +1515,12 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1460 } 1515 }
1461 if (print_dso) { 1516 if (print_dso) {
1462 printf(" ("); 1517 printf(" (");
1463 map__fprintf_dsoname(al.map, stdout); 1518 map__fprintf_dsoname(node->map, stdout);
1464 printf(")"); 1519 printf(")");
1465 } 1520 }
1466 printf("\n"); 1521 printf("\n");
1467 1522
1468 callchain_cursor_advance(cursor); 1523 callchain_cursor_advance(&callchain_cursor);
1469 } 1524 }
1470 1525
1471 } else { 1526 } else {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index e2ba8858f3e1..3e2e5ea0f03f 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -323,6 +323,7 @@ struct dso *dso__new(const char *name)
323 dso->sorted_by_name = 0; 323 dso->sorted_by_name = 0;
324 dso->has_build_id = 0; 324 dso->has_build_id = 0;
325 dso->kernel = DSO_TYPE_USER; 325 dso->kernel = DSO_TYPE_USER;
326 dso->needs_swap = DSO_SWAP__UNSET;
326 INIT_LIST_HEAD(&dso->node); 327 INIT_LIST_HEAD(&dso->node);
327 } 328 }
328 329
@@ -1156,6 +1157,33 @@ static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
1156 return -1; 1157 return -1;
1157} 1158}
1158 1159
1160static int dso__swap_init(struct dso *dso, unsigned char eidata)
1161{
1162 static unsigned int const endian = 1;
1163
1164 dso->needs_swap = DSO_SWAP__NO;
1165
1166 switch (eidata) {
1167 case ELFDATA2LSB:
1168 /* We are big endian, DSO is little endian. */
1169 if (*(unsigned char const *)&endian != 1)
1170 dso->needs_swap = DSO_SWAP__YES;
1171 break;
1172
1173 case ELFDATA2MSB:
1174 /* We are little endian, DSO is big endian. */
1175 if (*(unsigned char const *)&endian != 0)
1176 dso->needs_swap = DSO_SWAP__YES;
1177 break;
1178
1179 default:
1180 pr_err("unrecognized DSO data encoding %d\n", eidata);
1181 return -EINVAL;
1182 }
1183
1184 return 0;
1185}
1186
1159static int dso__load_sym(struct dso *dso, struct map *map, const char *name, 1187static int dso__load_sym(struct dso *dso, struct map *map, const char *name,
1160 int fd, symbol_filter_t filter, int kmodule, 1188 int fd, symbol_filter_t filter, int kmodule,
1161 int want_symtab) 1189 int want_symtab)
@@ -1187,6 +1215,9 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name,
1187 goto out_elf_end; 1215 goto out_elf_end;
1188 } 1216 }
1189 1217
1218 if (dso__swap_init(dso, ehdr.e_ident[EI_DATA]))
1219 goto out_elf_end;
1220
1190 /* Always reject images with a mismatched build-id: */ 1221 /* Always reject images with a mismatched build-id: */
1191 if (dso->has_build_id) { 1222 if (dso->has_build_id) {
1192 u8 build_id[BUILD_ID_SIZE]; 1223 u8 build_id[BUILD_ID_SIZE];
@@ -1272,7 +1303,7 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name,
1272 if (opdsec && sym.st_shndx == opdidx) { 1303 if (opdsec && sym.st_shndx == opdidx) {
1273 u32 offset = sym.st_value - opdshdr.sh_addr; 1304 u32 offset = sym.st_value - opdshdr.sh_addr;
1274 u64 *opd = opddata->d_buf + offset; 1305 u64 *opd = opddata->d_buf + offset;
1275 sym.st_value = *opd; 1306 sym.st_value = DSO__SWAP(dso, u64, *opd);
1276 sym.st_shndx = elf_addr_to_index(elf, sym.st_value); 1307 sym.st_shndx = elf_addr_to_index(elf, sym.st_value);
1277 } 1308 }
1278 1309
@@ -2786,8 +2817,11 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
2786 2817
2787struct map *dso__new_map(const char *name) 2818struct map *dso__new_map(const char *name)
2788{ 2819{
2820 struct map *map = NULL;
2789 struct dso *dso = dso__new(name); 2821 struct dso *dso = dso__new(name);
2790 struct map *map = map__new2(0, dso, MAP__FUNCTION); 2822
2823 if (dso)
2824 map = map__new2(0, dso, MAP__FUNCTION);
2791 2825
2792 return map; 2826 return map;
2793} 2827}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 5649d63798cb..af0752b1aca1 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -9,6 +9,7 @@
9#include <linux/list.h> 9#include <linux/list.h>
10#include <linux/rbtree.h> 10#include <linux/rbtree.h>
11#include <stdio.h> 11#include <stdio.h>
12#include <byteswap.h>
12 13
13#ifdef HAVE_CPLUS_DEMANGLE 14#ifdef HAVE_CPLUS_DEMANGLE
14extern char *cplus_demangle(const char *, int); 15extern char *cplus_demangle(const char *, int);
@@ -160,11 +161,18 @@ enum dso_kernel_type {
160 DSO_TYPE_GUEST_KERNEL 161 DSO_TYPE_GUEST_KERNEL
161}; 162};
162 163
164enum dso_swap_type {
165 DSO_SWAP__UNSET,
166 DSO_SWAP__NO,
167 DSO_SWAP__YES,
168};
169
163struct dso { 170struct dso {
164 struct list_head node; 171 struct list_head node;
165 struct rb_root symbols[MAP__NR_TYPES]; 172 struct rb_root symbols[MAP__NR_TYPES];
166 struct rb_root symbol_names[MAP__NR_TYPES]; 173 struct rb_root symbol_names[MAP__NR_TYPES];
167 enum dso_kernel_type kernel; 174 enum dso_kernel_type kernel;
175 enum dso_swap_type needs_swap;
168 u8 adjust_symbols:1; 176 u8 adjust_symbols:1;
169 u8 has_build_id:1; 177 u8 has_build_id:1;
170 u8 hit:1; 178 u8 hit:1;
@@ -182,6 +190,28 @@ struct dso {
182 char name[0]; 190 char name[0];
183}; 191};
184 192
193#define DSO__SWAP(dso, type, val) \
194({ \
195 type ____r = val; \
196 BUG_ON(dso->needs_swap == DSO_SWAP__UNSET); \
197 if (dso->needs_swap == DSO_SWAP__YES) { \
198 switch (sizeof(____r)) { \
199 case 2: \
200 ____r = bswap_16(val); \
201 break; \
202 case 4: \
203 ____r = bswap_32(val); \
204 break; \
205 case 8: \
206 ____r = bswap_64(val); \
207 break; \
208 default: \
209 BUG_ON(1); \
210 } \
211 } \
212 ____r; \
213})
214
185struct dso *dso__new(const char *name); 215struct dso *dso__new(const char *name);
186void dso__delete(struct dso *dso); 216void dso__delete(struct dso *dso);
187 217