aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-27 12:51:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-27 12:51:39 -0400
commite3be4266d3488cbbaddf7fcc661f4473db341e46 (patch)
treeaeb3dd2c23a3b7a862dd06d262b56aae0bb3a739
parent73f479b243fe71a0fa82d21a21ac25d8932b88d5 (diff)
parent2530e39947d80901e8c56f1c1950437eb9f48354 (diff)
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Thomas Gleixner: "Another pile of fixes for perf: - Plug overflows and races in the core code - Sanitize the flow of the perf syscall so we error out before handling the more complex and hard to undo setups - Improve and fix Broadwell and Skylake hardware support - Revert a fix which broke what it tried to fix in perf tools - A couple of smaller fixes in various places of perf tools" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf tools: Fix copying of /proc/kcore perf intel-pt: Remove no_force_psb from documentation perf probe: Use existing routine to look for a kernel module by dso->short_name perf/x86: Change test_aperfmperf() and test_intel() to static tools lib traceevent: Fix string handling in heterogeneous arch environments perf record: Avoid infinite loop at buildid processing with no samples perf: Fix races in computing the header sizes perf: Fix u16 overflows perf: Restructure perf syscall point of no return perf/x86/intel: Fix Skylake FRONTEND MSR extrareg mask perf/x86/intel/pebs: Add PEBS frontend profiling for Skylake perf/x86/intel: Make the CYCLE_ACTIVITY.* constraint on Broadwell more specific perf tools: Bool functions shouldn't return -1 tools build: Add test for presence of __get_cpuid() gcc builtin tools build: Add test for presence of numa_num_possible_cpus() in libnuma Revert "perf symbols: Fix mismatched declarations for elf_getphdrnum" perf stat: Fix per-pkg event reporting bug
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c17
-rw-r--r--arch/x86/kernel/cpu/perf_event_msr.c4
-rw-r--r--kernel/events/core.c114
-rw-r--r--tools/build/Makefile.feature8
-rw-r--r--tools/build/feature/Makefile10
-rw-r--r--tools/build/feature/test-all.c10
-rw-r--r--tools/build/feature/test-get_cpuid.c7
-rw-r--r--tools/build/feature/test-numa_num_possible_cpus.c6
-rw-r--r--tools/lib/traceevent/event-parse.c23
-rw-r--r--tools/perf/Documentation/intel-pt.txt15
-rw-r--r--tools/perf/config/Makefile20
-rw-r--r--tools/perf/util/probe-event.c13
-rw-r--r--tools/perf/util/session.c5
-rw-r--r--tools/perf/util/stat.c16
-rw-r--r--tools/perf/util/symbol-elf.c37
-rw-r--r--tools/perf/util/util.c2
18 files changed, 214 insertions, 96 deletions
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b98b471a3b7e..b8c14bb7fc8f 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -141,6 +141,8 @@
141#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) 141#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
142#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) 142#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
143 143
144#define MSR_PEBS_FRONTEND 0x000003f7
145
144#define MSR_IA32_POWER_CTL 0x000001fc 146#define MSR_IA32_POWER_CTL 0x000001fc
145 147
146#define MSR_IA32_MC0_CTL 0x00000400 148#define MSR_IA32_MC0_CTL 0x00000400
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 5edf6d868fc1..165be83a7fa4 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -47,6 +47,7 @@ enum extra_reg_type {
47 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ 47 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
48 EXTRA_REG_LBR = 2, /* lbr_select */ 48 EXTRA_REG_LBR = 2, /* lbr_select */
49 EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ 49 EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
50 EXTRA_REG_FE = 4, /* fe_* */
50 51
51 EXTRA_REG_MAX /* number of entries needed */ 52 EXTRA_REG_MAX /* number of entries needed */
52}; 53};
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 3fefebfbdf4b..f63360be2238 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -205,6 +205,11 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
205 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), 205 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
206 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), 206 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
207 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 207 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
208 /*
209 * Note the low 8 bits eventsel code is not a continuous field, containing
210 * some #GPing bits. These are masked out.
211 */
212 INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
208 EVENT_EXTRA_END 213 EVENT_EXTRA_END
209}; 214};
210 215
@@ -250,7 +255,7 @@ struct event_constraint intel_bdw_event_constraints[] = {
250 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 255 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
251 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 256 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
252 INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ 257 INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
253 INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */ 258 INTEL_UEVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
254 EVENT_CONSTRAINT_END 259 EVENT_CONSTRAINT_END
255}; 260};
256 261
@@ -2891,6 +2896,8 @@ PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
2891 2896
2892PMU_FORMAT_ATTR(ldlat, "config1:0-15"); 2897PMU_FORMAT_ATTR(ldlat, "config1:0-15");
2893 2898
2899PMU_FORMAT_ATTR(frontend, "config1:0-23");
2900
2894static struct attribute *intel_arch3_formats_attr[] = { 2901static struct attribute *intel_arch3_formats_attr[] = {
2895 &format_attr_event.attr, 2902 &format_attr_event.attr,
2896 &format_attr_umask.attr, 2903 &format_attr_umask.attr,
@@ -2907,6 +2914,11 @@ static struct attribute *intel_arch3_formats_attr[] = {
2907 NULL, 2914 NULL,
2908}; 2915};
2909 2916
2917static struct attribute *skl_format_attr[] = {
2918 &format_attr_frontend.attr,
2919 NULL,
2920};
2921
2910static __initconst const struct x86_pmu core_pmu = { 2922static __initconst const struct x86_pmu core_pmu = {
2911 .name = "core", 2923 .name = "core",
2912 .handle_irq = x86_pmu_handle_irq, 2924 .handle_irq = x86_pmu_handle_irq,
@@ -3516,7 +3528,8 @@ __init int intel_pmu_init(void)
3516 3528
3517 x86_pmu.hw_config = hsw_hw_config; 3529 x86_pmu.hw_config = hsw_hw_config;
3518 x86_pmu.get_event_constraints = hsw_get_event_constraints; 3530 x86_pmu.get_event_constraints = hsw_get_event_constraints;
3519 x86_pmu.cpu_events = hsw_events_attrs; 3531 x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
3532 skl_format_attr);
3520 WARN_ON(!x86_pmu.format_attrs); 3533 WARN_ON(!x86_pmu.format_attrs);
3521 x86_pmu.cpu_events = hsw_events_attrs; 3534 x86_pmu.cpu_events = hsw_events_attrs;
3522 pr_cont("Skylake events, "); 3535 pr_cont("Skylake events, ");
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c
index 086b12eae794..f32ac13934f2 100644
--- a/arch/x86/kernel/cpu/perf_event_msr.c
+++ b/arch/x86/kernel/cpu/perf_event_msr.c
@@ -10,12 +10,12 @@ enum perf_msr_id {
10 PERF_MSR_EVENT_MAX, 10 PERF_MSR_EVENT_MAX,
11}; 11};
12 12
13bool test_aperfmperf(int idx) 13static bool test_aperfmperf(int idx)
14{ 14{
15 return boot_cpu_has(X86_FEATURE_APERFMPERF); 15 return boot_cpu_has(X86_FEATURE_APERFMPERF);
16} 16}
17 17
18bool test_intel(int idx) 18static bool test_intel(int idx)
19{ 19{
20 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || 20 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
21 boot_cpu_data.x86 != 6) 21 boot_cpu_data.x86 != 6)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f548f69c4299..b11756f9b6dc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1243,11 +1243,7 @@ static inline void perf_event__state_init(struct perf_event *event)
1243 PERF_EVENT_STATE_INACTIVE; 1243 PERF_EVENT_STATE_INACTIVE;
1244} 1244}
1245 1245
1246/* 1246static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
1247 * Called at perf_event creation and when events are attached/detached from a
1248 * group.
1249 */
1250static void perf_event__read_size(struct perf_event *event)
1251{ 1247{
1252 int entry = sizeof(u64); /* value */ 1248 int entry = sizeof(u64); /* value */
1253 int size = 0; 1249 int size = 0;
@@ -1263,7 +1259,7 @@ static void perf_event__read_size(struct perf_event *event)
1263 entry += sizeof(u64); 1259 entry += sizeof(u64);
1264 1260
1265 if (event->attr.read_format & PERF_FORMAT_GROUP) { 1261 if (event->attr.read_format & PERF_FORMAT_GROUP) {
1266 nr += event->group_leader->nr_siblings; 1262 nr += nr_siblings;
1267 size += sizeof(u64); 1263 size += sizeof(u64);
1268 } 1264 }
1269 1265
@@ -1271,14 +1267,11 @@ static void perf_event__read_size(struct perf_event *event)
1271 event->read_size = size; 1267 event->read_size = size;
1272} 1268}
1273 1269
1274static void perf_event__header_size(struct perf_event *event) 1270static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
1275{ 1271{
1276 struct perf_sample_data *data; 1272 struct perf_sample_data *data;
1277 u64 sample_type = event->attr.sample_type;
1278 u16 size = 0; 1273 u16 size = 0;
1279 1274
1280 perf_event__read_size(event);
1281
1282 if (sample_type & PERF_SAMPLE_IP) 1275 if (sample_type & PERF_SAMPLE_IP)
1283 size += sizeof(data->ip); 1276 size += sizeof(data->ip);
1284 1277
@@ -1303,6 +1296,17 @@ static void perf_event__header_size(struct perf_event *event)
1303 event->header_size = size; 1296 event->header_size = size;
1304} 1297}
1305 1298
1299/*
1300 * Called at perf_event creation and when events are attached/detached from a
1301 * group.
1302 */
1303static void perf_event__header_size(struct perf_event *event)
1304{
1305 __perf_event_read_size(event,
1306 event->group_leader->nr_siblings);
1307 __perf_event_header_size(event, event->attr.sample_type);
1308}
1309
1306static void perf_event__id_header_size(struct perf_event *event) 1310static void perf_event__id_header_size(struct perf_event *event)
1307{ 1311{
1308 struct perf_sample_data *data; 1312 struct perf_sample_data *data;
@@ -1330,6 +1334,27 @@ static void perf_event__id_header_size(struct perf_event *event)
1330 event->id_header_size = size; 1334 event->id_header_size = size;
1331} 1335}
1332 1336
1337static bool perf_event_validate_size(struct perf_event *event)
1338{
1339 /*
1340 * The values computed here will be over-written when we actually
1341 * attach the event.
1342 */
1343 __perf_event_read_size(event, event->group_leader->nr_siblings + 1);
1344 __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
1345 perf_event__id_header_size(event);
1346
1347 /*
1348 * Sum the lot; should not exceed the 64k limit we have on records.
1349 * Conservative limit to allow for callchains and other variable fields.
1350 */
1351 if (event->read_size + event->header_size +
1352 event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
1353 return false;
1354
1355 return true;
1356}
1357
1333static void perf_group_attach(struct perf_event *event) 1358static void perf_group_attach(struct perf_event *event)
1334{ 1359{
1335 struct perf_event *group_leader = event->group_leader, *pos; 1360 struct perf_event *group_leader = event->group_leader, *pos;
@@ -8297,13 +8322,35 @@ SYSCALL_DEFINE5(perf_event_open,
8297 8322
8298 if (move_group) { 8323 if (move_group) {
8299 gctx = group_leader->ctx; 8324 gctx = group_leader->ctx;
8325 mutex_lock_double(&gctx->mutex, &ctx->mutex);
8326 } else {
8327 mutex_lock(&ctx->mutex);
8328 }
8300 8329
8330 if (!perf_event_validate_size(event)) {
8331 err = -E2BIG;
8332 goto err_locked;
8333 }
8334
8335 /*
8336 * Must be under the same ctx::mutex as perf_install_in_context(),
8337 * because we need to serialize with concurrent event creation.
8338 */
8339 if (!exclusive_event_installable(event, ctx)) {
8340 /* exclusive and group stuff are assumed mutually exclusive */
8341 WARN_ON_ONCE(move_group);
8342
8343 err = -EBUSY;
8344 goto err_locked;
8345 }
8346
8347 WARN_ON_ONCE(ctx->parent_ctx);
8348
8349 if (move_group) {
8301 /* 8350 /*
8302 * See perf_event_ctx_lock() for comments on the details 8351 * See perf_event_ctx_lock() for comments on the details
8303 * of swizzling perf_event::ctx. 8352 * of swizzling perf_event::ctx.
8304 */ 8353 */
8305 mutex_lock_double(&gctx->mutex, &ctx->mutex);
8306
8307 perf_remove_from_context(group_leader, false); 8354 perf_remove_from_context(group_leader, false);
8308 8355
8309 list_for_each_entry(sibling, &group_leader->sibling_list, 8356 list_for_each_entry(sibling, &group_leader->sibling_list,
@@ -8311,13 +8358,7 @@ SYSCALL_DEFINE5(perf_event_open,
8311 perf_remove_from_context(sibling, false); 8358 perf_remove_from_context(sibling, false);
8312 put_ctx(gctx); 8359 put_ctx(gctx);
8313 } 8360 }
8314 } else {
8315 mutex_lock(&ctx->mutex);
8316 }
8317 8361
8318 WARN_ON_ONCE(ctx->parent_ctx);
8319
8320 if (move_group) {
8321 /* 8362 /*
8322 * Wait for everybody to stop referencing the events through 8363 * Wait for everybody to stop referencing the events through
8323 * the old lists, before installing it on new lists. 8364 * the old lists, before installing it on new lists.
@@ -8349,22 +8390,29 @@ SYSCALL_DEFINE5(perf_event_open,
8349 perf_event__state_init(group_leader); 8390 perf_event__state_init(group_leader);
8350 perf_install_in_context(ctx, group_leader, group_leader->cpu); 8391 perf_install_in_context(ctx, group_leader, group_leader->cpu);
8351 get_ctx(ctx); 8392 get_ctx(ctx);
8352 }
8353 8393
8354 if (!exclusive_event_installable(event, ctx)) { 8394 /*
8355 err = -EBUSY; 8395 * Now that all events are installed in @ctx, nothing
8356 mutex_unlock(&ctx->mutex); 8396 * references @gctx anymore, so drop the last reference we have
8357 fput(event_file); 8397 * on it.
8358 goto err_context; 8398 */
8399 put_ctx(gctx);
8359 } 8400 }
8360 8401
8402 /*
8403 * Precalculate sample_data sizes; do while holding ctx::mutex such
8404 * that we're serialized against further additions and before
8405 * perf_install_in_context() which is the point the event is active and
8406 * can use these values.
8407 */
8408 perf_event__header_size(event);
8409 perf_event__id_header_size(event);
8410
8361 perf_install_in_context(ctx, event, event->cpu); 8411 perf_install_in_context(ctx, event, event->cpu);
8362 perf_unpin_context(ctx); 8412 perf_unpin_context(ctx);
8363 8413
8364 if (move_group) { 8414 if (move_group)
8365 mutex_unlock(&gctx->mutex); 8415 mutex_unlock(&gctx->mutex);
8366 put_ctx(gctx);
8367 }
8368 mutex_unlock(&ctx->mutex); 8416 mutex_unlock(&ctx->mutex);
8369 8417
8370 put_online_cpus(); 8418 put_online_cpus();
@@ -8376,12 +8424,6 @@ SYSCALL_DEFINE5(perf_event_open,
8376 mutex_unlock(&current->perf_event_mutex); 8424 mutex_unlock(&current->perf_event_mutex);
8377 8425
8378 /* 8426 /*
8379 * Precalculate sample_data sizes
8380 */
8381 perf_event__header_size(event);
8382 perf_event__id_header_size(event);
8383
8384 /*
8385 * Drop the reference on the group_event after placing the 8427 * Drop the reference on the group_event after placing the
8386 * new event on the sibling_list. This ensures destruction 8428 * new event on the sibling_list. This ensures destruction
8387 * of the group leader will find the pointer to itself in 8429 * of the group leader will find the pointer to itself in
@@ -8391,6 +8433,12 @@ SYSCALL_DEFINE5(perf_event_open,
8391 fd_install(event_fd, event_file); 8433 fd_install(event_fd, event_file);
8392 return event_fd; 8434 return event_fd;
8393 8435
8436err_locked:
8437 if (move_group)
8438 mutex_unlock(&gctx->mutex);
8439 mutex_unlock(&ctx->mutex);
8440/* err_file: */
8441 fput(event_file);
8394err_context: 8442err_context:
8395 perf_unpin_context(ctx); 8443 perf_unpin_context(ctx);
8396 put_ctx(ctx); 8444 put_ctx(ctx);
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 2975632d51e2..c8fe6d177119 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -41,6 +41,7 @@ FEATURE_TESTS ?= \
41 libelf-getphdrnum \ 41 libelf-getphdrnum \
42 libelf-mmap \ 42 libelf-mmap \
43 libnuma \ 43 libnuma \
44 numa_num_possible_cpus \
44 libperl \ 45 libperl \
45 libpython \ 46 libpython \
46 libpython-version \ 47 libpython-version \
@@ -51,7 +52,8 @@ FEATURE_TESTS ?= \
51 timerfd \ 52 timerfd \
52 libdw-dwarf-unwind \ 53 libdw-dwarf-unwind \
53 zlib \ 54 zlib \
54 lzma 55 lzma \
56 get_cpuid
55 57
56FEATURE_DISPLAY ?= \ 58FEATURE_DISPLAY ?= \
57 dwarf \ 59 dwarf \
@@ -61,13 +63,15 @@ FEATURE_DISPLAY ?= \
61 libbfd \ 63 libbfd \
62 libelf \ 64 libelf \
63 libnuma \ 65 libnuma \
66 numa_num_possible_cpus \
64 libperl \ 67 libperl \
65 libpython \ 68 libpython \
66 libslang \ 69 libslang \
67 libunwind \ 70 libunwind \
68 libdw-dwarf-unwind \ 71 libdw-dwarf-unwind \
69 zlib \ 72 zlib \
70 lzma 73 lzma \
74 get_cpuid
71 75
72# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. 76# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
73# If in the future we need per-feature checks/flags for features not 77# If in the future we need per-feature checks/flags for features not
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 74ca42093d70..e43a2971bf56 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -19,6 +19,7 @@ FILES= \
19 test-libelf-getphdrnum.bin \ 19 test-libelf-getphdrnum.bin \
20 test-libelf-mmap.bin \ 20 test-libelf-mmap.bin \
21 test-libnuma.bin \ 21 test-libnuma.bin \
22 test-numa_num_possible_cpus.bin \
22 test-libperl.bin \ 23 test-libperl.bin \
23 test-libpython.bin \ 24 test-libpython.bin \
24 test-libpython-version.bin \ 25 test-libpython-version.bin \
@@ -34,7 +35,8 @@ FILES= \
34 test-compile-x32.bin \ 35 test-compile-x32.bin \
35 test-zlib.bin \ 36 test-zlib.bin \
36 test-lzma.bin \ 37 test-lzma.bin \
37 test-bpf.bin 38 test-bpf.bin \
39 test-get_cpuid.bin
38 40
39CC := $(CROSS_COMPILE)gcc -MD 41CC := $(CROSS_COMPILE)gcc -MD
40PKG_CONFIG := $(CROSS_COMPILE)pkg-config 42PKG_CONFIG := $(CROSS_COMPILE)pkg-config
@@ -87,6 +89,9 @@ test-libelf-getphdrnum.bin:
87test-libnuma.bin: 89test-libnuma.bin:
88 $(BUILD) -lnuma 90 $(BUILD) -lnuma
89 91
92test-numa_num_possible_cpus.bin:
93 $(BUILD) -lnuma
94
90test-libunwind.bin: 95test-libunwind.bin:
91 $(BUILD) -lelf 96 $(BUILD) -lelf
92 97
@@ -162,6 +167,9 @@ test-zlib.bin:
162test-lzma.bin: 167test-lzma.bin:
163 $(BUILD) -llzma 168 $(BUILD) -llzma
164 169
170test-get_cpuid.bin:
171 $(BUILD)
172
165test-bpf.bin: 173test-bpf.bin:
166 $(BUILD) 174 $(BUILD)
167 175
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 84689a67814a..33cf6f20bd4e 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -77,6 +77,10 @@
77# include "test-libnuma.c" 77# include "test-libnuma.c"
78#undef main 78#undef main
79 79
80#define main main_test_numa_num_possible_cpus
81# include "test-numa_num_possible_cpus.c"
82#undef main
83
80#define main main_test_timerfd 84#define main main_test_timerfd
81# include "test-timerfd.c" 85# include "test-timerfd.c"
82#undef main 86#undef main
@@ -117,6 +121,10 @@
117# include "test-lzma.c" 121# include "test-lzma.c"
118#undef main 122#undef main
119 123
124#define main main_test_get_cpuid
125# include "test-get_cpuid.c"
126#undef main
127
120int main(int argc, char *argv[]) 128int main(int argc, char *argv[])
121{ 129{
122 main_test_libpython(); 130 main_test_libpython();
@@ -136,6 +144,7 @@ int main(int argc, char *argv[])
136 main_test_libbfd(); 144 main_test_libbfd();
137 main_test_backtrace(); 145 main_test_backtrace();
138 main_test_libnuma(); 146 main_test_libnuma();
147 main_test_numa_num_possible_cpus();
139 main_test_timerfd(); 148 main_test_timerfd();
140 main_test_stackprotector_all(); 149 main_test_stackprotector_all();
141 main_test_libdw_dwarf_unwind(); 150 main_test_libdw_dwarf_unwind();
@@ -143,6 +152,7 @@ int main(int argc, char *argv[])
143 main_test_zlib(); 152 main_test_zlib();
144 main_test_pthread_attr_setaffinity_np(); 153 main_test_pthread_attr_setaffinity_np();
145 main_test_lzma(); 154 main_test_lzma();
155 main_test_get_cpuid();
146 156
147 return 0; 157 return 0;
148} 158}
diff --git a/tools/build/feature/test-get_cpuid.c b/tools/build/feature/test-get_cpuid.c
new file mode 100644
index 000000000000..d7a2c407130d
--- /dev/null
+++ b/tools/build/feature/test-get_cpuid.c
@@ -0,0 +1,7 @@
1#include <cpuid.h>
2
3int main(void)
4{
5 unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
6 return __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
7}
diff --git a/tools/build/feature/test-numa_num_possible_cpus.c b/tools/build/feature/test-numa_num_possible_cpus.c
new file mode 100644
index 000000000000..2606e94b0659
--- /dev/null
+++ b/tools/build/feature/test-numa_num_possible_cpus.c
@@ -0,0 +1,6 @@
1#include <numa.h>
2
3int main(void)
4{
5 return numa_num_possible_cpus();
6}
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 4d885934b919..cf42b090477b 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3795,7 +3795,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
3795 struct format_field *field; 3795 struct format_field *field;
3796 struct printk_map *printk; 3796 struct printk_map *printk;
3797 long long val, fval; 3797 long long val, fval;
3798 unsigned long addr; 3798 unsigned long long addr;
3799 char *str; 3799 char *str;
3800 unsigned char *hex; 3800 unsigned char *hex;
3801 int print; 3801 int print;
@@ -3828,13 +3828,30 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
3828 */ 3828 */
3829 if (!(field->flags & FIELD_IS_ARRAY) && 3829 if (!(field->flags & FIELD_IS_ARRAY) &&
3830 field->size == pevent->long_size) { 3830 field->size == pevent->long_size) {
3831 addr = *(unsigned long *)(data + field->offset); 3831
3832 /* Handle heterogeneous recording and processing
3833 * architectures
3834 *
3835 * CASE I:
3836 * Traces recorded on 32-bit devices (32-bit
3837 * addressing) and processed on 64-bit devices:
3838 * In this case, only 32 bits should be read.
3839 *
3840 * CASE II:
3841 * Traces recorded on 64 bit devices and processed
3842 * on 32-bit devices:
3843 * In this case, 64 bits must be read.
3844 */
3845 addr = (pevent->long_size == 8) ?
3846 *(unsigned long long *)(data + field->offset) :
3847 (unsigned long long)*(unsigned int *)(data + field->offset);
3848
3832 /* Check if it matches a print format */ 3849 /* Check if it matches a print format */
3833 printk = find_printk(pevent, addr); 3850 printk = find_printk(pevent, addr);
3834 if (printk) 3851 if (printk)
3835 trace_seq_puts(s, printk->printk); 3852 trace_seq_puts(s, printk->printk);
3836 else 3853 else
3837 trace_seq_printf(s, "%lx", addr); 3854 trace_seq_printf(s, "%llx", addr);
3838 break; 3855 break;
3839 } 3856 }
3840 str = malloc(len + 1); 3857 str = malloc(len + 1);
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 4a0501d7a3b4..c94c9de3173e 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -364,21 +364,6 @@ cyc_thresh Specifies how frequently CYC packets are produced - see cyc
364 364
365 CYC packets are not requested by default. 365 CYC packets are not requested by default.
366 366
367no_force_psb This is a driver option and is not in the IA32_RTIT_CTL MSR.
368
369 It stops the driver resetting the byte count to zero whenever
370 enabling the trace (for example on context switches) which in
371 turn results in no PSB being forced. However some processors
372 will produce a PSB anyway.
373
374 In any case, there is still a PSB when the trace is enabled for
375 the first time.
376
377 no_force_psb can be used to slightly decrease the trace size but
378 may make it harder for the decoder to recover from errors.
379
380 no_force_psb is not selected by default.
381
382 367
383new snapshot option 368new snapshot option
384------------------- 369-------------------
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 827557fc7511..38a08539f4bf 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -573,9 +573,14 @@ ifndef NO_LIBNUMA
573 msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev); 573 msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev);
574 NO_LIBNUMA := 1 574 NO_LIBNUMA := 1
575 else 575 else
576 CFLAGS += -DHAVE_LIBNUMA_SUPPORT 576 ifeq ($(feature-numa_num_possible_cpus), 0)
577 EXTLIBS += -lnuma 577 msg := $(warning Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8);
578 $(call detected,CONFIG_NUMA) 578 NO_LIBNUMA := 1
579 else
580 CFLAGS += -DHAVE_LIBNUMA_SUPPORT
581 EXTLIBS += -lnuma
582 $(call detected,CONFIG_NUMA)
583 endif
579 endif 584 endif
580endif 585endif
581 586
@@ -621,8 +626,13 @@ ifdef LIBBABELTRACE
621endif 626endif
622 627
623ifndef NO_AUXTRACE 628ifndef NO_AUXTRACE
624 $(call detected,CONFIG_AUXTRACE) 629 ifeq ($(feature-get_cpuid), 0)
625 CFLAGS += -DHAVE_AUXTRACE_SUPPORT 630 msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
631 NO_AUXTRACE := 1
632 else
633 $(call detected,CONFIG_AUXTRACE)
634 CFLAGS += -DHAVE_AUXTRACE_SUPPORT
635 endif
626endif 636endif
627 637
628# Among the variables below, these: 638# Among the variables below, these:
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index eb5f18b75402..c6f9af78f6f5 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -270,12 +270,13 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso)
270 int ret = 0; 270 int ret = 0;
271 271
272 if (module) { 272 if (module) {
273 list_for_each_entry(dso, &host_machine->dsos.head, node) { 273 char module_name[128];
274 if (!dso->kernel) 274
275 continue; 275 snprintf(module_name, sizeof(module_name), "[%s]", module);
276 if (strncmp(dso->short_name + 1, module, 276 map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name);
277 dso->short_name_len - 2) == 0) 277 if (map) {
278 goto found; 278 dso = map->dso;
279 goto found;
279 } 280 }
280 pr_debug("Failed to find module %s.\n", module); 281 pr_debug("Failed to find module %s.\n", module);
281 return -ENOENT; 282 return -ENOENT;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 8a4537ee9bc3..fc3f7c922f99 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1580,7 +1580,10 @@ static int __perf_session__process_events(struct perf_session *session,
1580 file_offset = page_offset; 1580 file_offset = page_offset;
1581 head = data_offset - page_offset; 1581 head = data_offset - page_offset;
1582 1582
1583 if (data_size && (data_offset + data_size < file_size)) 1583 if (data_size == 0)
1584 goto out;
1585
1586 if (data_offset + data_size < file_size)
1584 file_size = data_offset + data_size; 1587 file_size = data_offset + data_size;
1585 1588
1586 ui_progress__init(&prog, file_size, "Processing events..."); 1589 ui_progress__init(&prog, file_size, "Processing events...");
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 415c359de465..2d065d065b67 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -196,7 +196,8 @@ static void zero_per_pkg(struct perf_evsel *counter)
196 memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); 196 memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
197} 197}
198 198
199static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) 199static int check_per_pkg(struct perf_evsel *counter,
200 struct perf_counts_values *vals, int cpu, bool *skip)
200{ 201{
201 unsigned long *mask = counter->per_pkg_mask; 202 unsigned long *mask = counter->per_pkg_mask;
202 struct cpu_map *cpus = perf_evsel__cpus(counter); 203 struct cpu_map *cpus = perf_evsel__cpus(counter);
@@ -218,6 +219,17 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
218 counter->per_pkg_mask = mask; 219 counter->per_pkg_mask = mask;
219 } 220 }
220 221
222 /*
223 * we do not consider an event that has not run as a good
224 * instance to mark a package as used (skip=1). Otherwise
225 * we may run into a situation where the first CPU in a package
226 * is not running anything, yet the second is, and this function
227 * would mark the package as used after the first CPU and would
228 * not read the values from the second CPU.
229 */
230 if (!(vals->run && vals->ena))
231 return 0;
232
221 s = cpu_map__get_socket(cpus, cpu); 233 s = cpu_map__get_socket(cpus, cpu);
222 if (s < 0) 234 if (s < 0)
223 return -1; 235 return -1;
@@ -235,7 +247,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
235 static struct perf_counts_values zero; 247 static struct perf_counts_values zero;
236 bool skip = false; 248 bool skip = false;
237 249
238 if (check_per_pkg(evsel, cpu, &skip)) { 250 if (check_per_pkg(evsel, count, cpu, &skip)) {
239 pr_err("failed to read per-pkg counter\n"); 251 pr_err("failed to read per-pkg counter\n");
240 return -1; 252 return -1;
241 } 253 }
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 53bb5f59ec58..475d88d0a1c9 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -38,7 +38,7 @@ static inline char *bfd_demangle(void __maybe_unused *v,
38#endif 38#endif
39 39
40#ifndef HAVE_ELF_GETPHDRNUM_SUPPORT 40#ifndef HAVE_ELF_GETPHDRNUM_SUPPORT
41int elf_getphdrnum(Elf *elf, size_t *dst) 41static int elf_getphdrnum(Elf *elf, size_t *dst)
42{ 42{
43 GElf_Ehdr gehdr; 43 GElf_Ehdr gehdr;
44 GElf_Ehdr *ehdr; 44 GElf_Ehdr *ehdr;
@@ -1271,8 +1271,6 @@ out_close:
1271static int kcore__init(struct kcore *kcore, char *filename, int elfclass, 1271static int kcore__init(struct kcore *kcore, char *filename, int elfclass,
1272 bool temp) 1272 bool temp)
1273{ 1273{
1274 GElf_Ehdr *ehdr;
1275
1276 kcore->elfclass = elfclass; 1274 kcore->elfclass = elfclass;
1277 1275
1278 if (temp) 1276 if (temp)
@@ -1289,9 +1287,7 @@ static int kcore__init(struct kcore *kcore, char *filename, int elfclass,
1289 if (!gelf_newehdr(kcore->elf, elfclass)) 1287 if (!gelf_newehdr(kcore->elf, elfclass))
1290 goto out_end; 1288 goto out_end;
1291 1289
1292 ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr); 1290 memset(&kcore->ehdr, 0, sizeof(GElf_Ehdr));
1293 if (!ehdr)
1294 goto out_end;
1295 1291
1296 return 0; 1292 return 0;
1297 1293
@@ -1348,23 +1344,18 @@ static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count)
1348static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset, 1344static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset,
1349 u64 addr, u64 len) 1345 u64 addr, u64 len)
1350{ 1346{
1351 GElf_Phdr gphdr; 1347 GElf_Phdr phdr = {
1352 GElf_Phdr *phdr; 1348 .p_type = PT_LOAD,
1353 1349 .p_flags = PF_R | PF_W | PF_X,
1354 phdr = gelf_getphdr(kcore->elf, idx, &gphdr); 1350 .p_offset = offset,
1355 if (!phdr) 1351 .p_vaddr = addr,
1356 return -1; 1352 .p_paddr = 0,
1357 1353 .p_filesz = len,
1358 phdr->p_type = PT_LOAD; 1354 .p_memsz = len,
1359 phdr->p_flags = PF_R | PF_W | PF_X; 1355 .p_align = page_size,
1360 phdr->p_offset = offset; 1356 };
1361 phdr->p_vaddr = addr; 1357
1362 phdr->p_paddr = 0; 1358 if (!gelf_update_phdr(kcore->elf, idx, &phdr))
1363 phdr->p_filesz = len;
1364 phdr->p_memsz = len;
1365 phdr->p_align = page_size;
1366
1367 if (!gelf_update_phdr(kcore->elf, idx, phdr))
1368 return -1; 1359 return -1;
1369 1360
1370 return 0; 1361 return 0;
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 7acafb3c5592..c2cd9bf2348b 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -709,7 +709,7 @@ bool find_process(const char *name)
709 709
710 dir = opendir(procfs__mountpoint()); 710 dir = opendir(procfs__mountpoint());
711 if (!dir) 711 if (!dir)
712 return -1; 712 return false;
713 713
714 /* Walk through the directory. */ 714 /* Walk through the directory. */
715 while (ret && (d = readdir(dir)) != NULL) { 715 while (ret && (d = readdir(dir)) != NULL) {