diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-27 12:51:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-27 12:51:39 -0400 |
commit | e3be4266d3488cbbaddf7fcc661f4473db341e46 (patch) | |
tree | aeb3dd2c23a3b7a862dd06d262b56aae0bb3a739 | |
parent | 73f479b243fe71a0fa82d21a21ac25d8932b88d5 (diff) | |
parent | 2530e39947d80901e8c56f1c1950437eb9f48354 (diff) |
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Thomas Gleixner:
"Another pile of fixes for perf:
- Plug overflows and races in the core code
- Sanitize the flow of the perf syscall so we error out before
handling the more complex and hard to undo setups
- Improve and fix Broadwell and Skylake hardware support
- Revert a fix which broke what it tried to fix in perf tools
- A couple of smaller fixes in various places of perf tools"
* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf tools: Fix copying of /proc/kcore
perf intel-pt: Remove no_force_psb from documentation
perf probe: Use existing routine to look for a kernel module by dso->short_name
perf/x86: Change test_aperfmperf() and test_intel() to static
tools lib traceevent: Fix string handling in heterogeneous arch environments
perf record: Avoid infinite loop at buildid processing with no samples
perf: Fix races in computing the header sizes
perf: Fix u16 overflows
perf: Restructure perf syscall point of no return
perf/x86/intel: Fix Skylake FRONTEND MSR extrareg mask
perf/x86/intel/pebs: Add PEBS frontend profiling for Skylake
perf/x86/intel: Make the CYCLE_ACTIVITY.* constraint on Broadwell more specific
perf tools: Bool functions shouldn't return -1
tools build: Add test for presence of __get_cpuid() gcc builtin
tools build: Add test for presence of numa_num_possible_cpus() in libnuma
Revert "perf symbols: Fix mismatched declarations for elf_getphdrnum"
perf stat: Fix per-pkg event reporting bug
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_msr.c | 4 | ||||
-rw-r--r-- | kernel/events/core.c | 114 | ||||
-rw-r--r-- | tools/build/Makefile.feature | 8 | ||||
-rw-r--r-- | tools/build/feature/Makefile | 10 | ||||
-rw-r--r-- | tools/build/feature/test-all.c | 10 | ||||
-rw-r--r-- | tools/build/feature/test-get_cpuid.c | 7 | ||||
-rw-r--r-- | tools/build/feature/test-numa_num_possible_cpus.c | 6 | ||||
-rw-r--r-- | tools/lib/traceevent/event-parse.c | 23 | ||||
-rw-r--r-- | tools/perf/Documentation/intel-pt.txt | 15 | ||||
-rw-r--r-- | tools/perf/config/Makefile | 20 | ||||
-rw-r--r-- | tools/perf/util/probe-event.c | 13 | ||||
-rw-r--r-- | tools/perf/util/session.c | 5 | ||||
-rw-r--r-- | tools/perf/util/stat.c | 16 | ||||
-rw-r--r-- | tools/perf/util/symbol-elf.c | 37 | ||||
-rw-r--r-- | tools/perf/util/util.c | 2 |
18 files changed, 214 insertions, 96 deletions
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b98b471a3b7e..b8c14bb7fc8f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -141,6 +141,8 @@ | |||
141 | #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) | 141 | #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) |
142 | #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) | 142 | #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) |
143 | 143 | ||
144 | #define MSR_PEBS_FRONTEND 0x000003f7 | ||
145 | |||
144 | #define MSR_IA32_POWER_CTL 0x000001fc | 146 | #define MSR_IA32_POWER_CTL 0x000001fc |
145 | 147 | ||
146 | #define MSR_IA32_MC0_CTL 0x00000400 | 148 | #define MSR_IA32_MC0_CTL 0x00000400 |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 5edf6d868fc1..165be83a7fa4 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -47,6 +47,7 @@ enum extra_reg_type { | |||
47 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ | 47 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ |
48 | EXTRA_REG_LBR = 2, /* lbr_select */ | 48 | EXTRA_REG_LBR = 2, /* lbr_select */ |
49 | EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ | 49 | EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ |
50 | EXTRA_REG_FE = 4, /* fe_* */ | ||
50 | 51 | ||
51 | EXTRA_REG_MAX /* number of entries needed */ | 52 | EXTRA_REG_MAX /* number of entries needed */ |
52 | }; | 53 | }; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3fefebfbdf4b..f63360be2238 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -205,6 +205,11 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { | |||
205 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), | 205 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), |
206 | INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), | 206 | INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), |
207 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), | 207 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), |
208 | /* | ||
209 | * Note the low 8 bits eventsel code is not a continuous field, containing | ||
210 | * some #GPing bits. These are masked out. | ||
211 | */ | ||
212 | INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), | ||
208 | EVENT_EXTRA_END | 213 | EVENT_EXTRA_END |
209 | }; | 214 | }; |
210 | 215 | ||
@@ -250,7 +255,7 @@ struct event_constraint intel_bdw_event_constraints[] = { | |||
250 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 255 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
251 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ | 256 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
252 | INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ | 257 | INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ |
253 | INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */ | 258 | INTEL_UEVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ |
254 | EVENT_CONSTRAINT_END | 259 | EVENT_CONSTRAINT_END |
255 | }; | 260 | }; |
256 | 261 | ||
@@ -2891,6 +2896,8 @@ PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); | |||
2891 | 2896 | ||
2892 | PMU_FORMAT_ATTR(ldlat, "config1:0-15"); | 2897 | PMU_FORMAT_ATTR(ldlat, "config1:0-15"); |
2893 | 2898 | ||
2899 | PMU_FORMAT_ATTR(frontend, "config1:0-23"); | ||
2900 | |||
2894 | static struct attribute *intel_arch3_formats_attr[] = { | 2901 | static struct attribute *intel_arch3_formats_attr[] = { |
2895 | &format_attr_event.attr, | 2902 | &format_attr_event.attr, |
2896 | &format_attr_umask.attr, | 2903 | &format_attr_umask.attr, |
@@ -2907,6 +2914,11 @@ static struct attribute *intel_arch3_formats_attr[] = { | |||
2907 | NULL, | 2914 | NULL, |
2908 | }; | 2915 | }; |
2909 | 2916 | ||
2917 | static struct attribute *skl_format_attr[] = { | ||
2918 | &format_attr_frontend.attr, | ||
2919 | NULL, | ||
2920 | }; | ||
2921 | |||
2910 | static __initconst const struct x86_pmu core_pmu = { | 2922 | static __initconst const struct x86_pmu core_pmu = { |
2911 | .name = "core", | 2923 | .name = "core", |
2912 | .handle_irq = x86_pmu_handle_irq, | 2924 | .handle_irq = x86_pmu_handle_irq, |
@@ -3516,7 +3528,8 @@ __init int intel_pmu_init(void) | |||
3516 | 3528 | ||
3517 | x86_pmu.hw_config = hsw_hw_config; | 3529 | x86_pmu.hw_config = hsw_hw_config; |
3518 | x86_pmu.get_event_constraints = hsw_get_event_constraints; | 3530 | x86_pmu.get_event_constraints = hsw_get_event_constraints; |
3519 | x86_pmu.cpu_events = hsw_events_attrs; | 3531 | x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, |
3532 | skl_format_attr); | ||
3520 | WARN_ON(!x86_pmu.format_attrs); | 3533 | WARN_ON(!x86_pmu.format_attrs); |
3521 | x86_pmu.cpu_events = hsw_events_attrs; | 3534 | x86_pmu.cpu_events = hsw_events_attrs; |
3522 | pr_cont("Skylake events, "); | 3535 | pr_cont("Skylake events, "); |
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c index 086b12eae794..f32ac13934f2 100644 --- a/arch/x86/kernel/cpu/perf_event_msr.c +++ b/arch/x86/kernel/cpu/perf_event_msr.c | |||
@@ -10,12 +10,12 @@ enum perf_msr_id { | |||
10 | PERF_MSR_EVENT_MAX, | 10 | PERF_MSR_EVENT_MAX, |
11 | }; | 11 | }; |
12 | 12 | ||
13 | bool test_aperfmperf(int idx) | 13 | static bool test_aperfmperf(int idx) |
14 | { | 14 | { |
15 | return boot_cpu_has(X86_FEATURE_APERFMPERF); | 15 | return boot_cpu_has(X86_FEATURE_APERFMPERF); |
16 | } | 16 | } |
17 | 17 | ||
18 | bool test_intel(int idx) | 18 | static bool test_intel(int idx) |
19 | { | 19 | { |
20 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || | 20 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || |
21 | boot_cpu_data.x86 != 6) | 21 | boot_cpu_data.x86 != 6) |
diff --git a/kernel/events/core.c b/kernel/events/core.c index f548f69c4299..b11756f9b6dc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -1243,11 +1243,7 @@ static inline void perf_event__state_init(struct perf_event *event) | |||
1243 | PERF_EVENT_STATE_INACTIVE; | 1243 | PERF_EVENT_STATE_INACTIVE; |
1244 | } | 1244 | } |
1245 | 1245 | ||
1246 | /* | 1246 | static void __perf_event_read_size(struct perf_event *event, int nr_siblings) |
1247 | * Called at perf_event creation and when events are attached/detached from a | ||
1248 | * group. | ||
1249 | */ | ||
1250 | static void perf_event__read_size(struct perf_event *event) | ||
1251 | { | 1247 | { |
1252 | int entry = sizeof(u64); /* value */ | 1248 | int entry = sizeof(u64); /* value */ |
1253 | int size = 0; | 1249 | int size = 0; |
@@ -1263,7 +1259,7 @@ static void perf_event__read_size(struct perf_event *event) | |||
1263 | entry += sizeof(u64); | 1259 | entry += sizeof(u64); |
1264 | 1260 | ||
1265 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | 1261 | if (event->attr.read_format & PERF_FORMAT_GROUP) { |
1266 | nr += event->group_leader->nr_siblings; | 1262 | nr += nr_siblings; |
1267 | size += sizeof(u64); | 1263 | size += sizeof(u64); |
1268 | } | 1264 | } |
1269 | 1265 | ||
@@ -1271,14 +1267,11 @@ static void perf_event__read_size(struct perf_event *event) | |||
1271 | event->read_size = size; | 1267 | event->read_size = size; |
1272 | } | 1268 | } |
1273 | 1269 | ||
1274 | static void perf_event__header_size(struct perf_event *event) | 1270 | static void __perf_event_header_size(struct perf_event *event, u64 sample_type) |
1275 | { | 1271 | { |
1276 | struct perf_sample_data *data; | 1272 | struct perf_sample_data *data; |
1277 | u64 sample_type = event->attr.sample_type; | ||
1278 | u16 size = 0; | 1273 | u16 size = 0; |
1279 | 1274 | ||
1280 | perf_event__read_size(event); | ||
1281 | |||
1282 | if (sample_type & PERF_SAMPLE_IP) | 1275 | if (sample_type & PERF_SAMPLE_IP) |
1283 | size += sizeof(data->ip); | 1276 | size += sizeof(data->ip); |
1284 | 1277 | ||
@@ -1303,6 +1296,17 @@ static void perf_event__header_size(struct perf_event *event) | |||
1303 | event->header_size = size; | 1296 | event->header_size = size; |
1304 | } | 1297 | } |
1305 | 1298 | ||
1299 | /* | ||
1300 | * Called at perf_event creation and when events are attached/detached from a | ||
1301 | * group. | ||
1302 | */ | ||
1303 | static void perf_event__header_size(struct perf_event *event) | ||
1304 | { | ||
1305 | __perf_event_read_size(event, | ||
1306 | event->group_leader->nr_siblings); | ||
1307 | __perf_event_header_size(event, event->attr.sample_type); | ||
1308 | } | ||
1309 | |||
1306 | static void perf_event__id_header_size(struct perf_event *event) | 1310 | static void perf_event__id_header_size(struct perf_event *event) |
1307 | { | 1311 | { |
1308 | struct perf_sample_data *data; | 1312 | struct perf_sample_data *data; |
@@ -1330,6 +1334,27 @@ static void perf_event__id_header_size(struct perf_event *event) | |||
1330 | event->id_header_size = size; | 1334 | event->id_header_size = size; |
1331 | } | 1335 | } |
1332 | 1336 | ||
1337 | static bool perf_event_validate_size(struct perf_event *event) | ||
1338 | { | ||
1339 | /* | ||
1340 | * The values computed here will be over-written when we actually | ||
1341 | * attach the event. | ||
1342 | */ | ||
1343 | __perf_event_read_size(event, event->group_leader->nr_siblings + 1); | ||
1344 | __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ); | ||
1345 | perf_event__id_header_size(event); | ||
1346 | |||
1347 | /* | ||
1348 | * Sum the lot; should not exceed the 64k limit we have on records. | ||
1349 | * Conservative limit to allow for callchains and other variable fields. | ||
1350 | */ | ||
1351 | if (event->read_size + event->header_size + | ||
1352 | event->id_header_size + sizeof(struct perf_event_header) >= 16*1024) | ||
1353 | return false; | ||
1354 | |||
1355 | return true; | ||
1356 | } | ||
1357 | |||
1333 | static void perf_group_attach(struct perf_event *event) | 1358 | static void perf_group_attach(struct perf_event *event) |
1334 | { | 1359 | { |
1335 | struct perf_event *group_leader = event->group_leader, *pos; | 1360 | struct perf_event *group_leader = event->group_leader, *pos; |
@@ -8297,13 +8322,35 @@ SYSCALL_DEFINE5(perf_event_open, | |||
8297 | 8322 | ||
8298 | if (move_group) { | 8323 | if (move_group) { |
8299 | gctx = group_leader->ctx; | 8324 | gctx = group_leader->ctx; |
8325 | mutex_lock_double(&gctx->mutex, &ctx->mutex); | ||
8326 | } else { | ||
8327 | mutex_lock(&ctx->mutex); | ||
8328 | } | ||
8300 | 8329 | ||
8330 | if (!perf_event_validate_size(event)) { | ||
8331 | err = -E2BIG; | ||
8332 | goto err_locked; | ||
8333 | } | ||
8334 | |||
8335 | /* | ||
8336 | * Must be under the same ctx::mutex as perf_install_in_context(), | ||
8337 | * because we need to serialize with concurrent event creation. | ||
8338 | */ | ||
8339 | if (!exclusive_event_installable(event, ctx)) { | ||
8340 | /* exclusive and group stuff are assumed mutually exclusive */ | ||
8341 | WARN_ON_ONCE(move_group); | ||
8342 | |||
8343 | err = -EBUSY; | ||
8344 | goto err_locked; | ||
8345 | } | ||
8346 | |||
8347 | WARN_ON_ONCE(ctx->parent_ctx); | ||
8348 | |||
8349 | if (move_group) { | ||
8301 | /* | 8350 | /* |
8302 | * See perf_event_ctx_lock() for comments on the details | 8351 | * See perf_event_ctx_lock() for comments on the details |
8303 | * of swizzling perf_event::ctx. | 8352 | * of swizzling perf_event::ctx. |
8304 | */ | 8353 | */ |
8305 | mutex_lock_double(&gctx->mutex, &ctx->mutex); | ||
8306 | |||
8307 | perf_remove_from_context(group_leader, false); | 8354 | perf_remove_from_context(group_leader, false); |
8308 | 8355 | ||
8309 | list_for_each_entry(sibling, &group_leader->sibling_list, | 8356 | list_for_each_entry(sibling, &group_leader->sibling_list, |
@@ -8311,13 +8358,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
8311 | perf_remove_from_context(sibling, false); | 8358 | perf_remove_from_context(sibling, false); |
8312 | put_ctx(gctx); | 8359 | put_ctx(gctx); |
8313 | } | 8360 | } |
8314 | } else { | ||
8315 | mutex_lock(&ctx->mutex); | ||
8316 | } | ||
8317 | 8361 | ||
8318 | WARN_ON_ONCE(ctx->parent_ctx); | ||
8319 | |||
8320 | if (move_group) { | ||
8321 | /* | 8362 | /* |
8322 | * Wait for everybody to stop referencing the events through | 8363 | * Wait for everybody to stop referencing the events through |
8323 | * the old lists, before installing it on new lists. | 8364 | * the old lists, before installing it on new lists. |
@@ -8349,22 +8390,29 @@ SYSCALL_DEFINE5(perf_event_open, | |||
8349 | perf_event__state_init(group_leader); | 8390 | perf_event__state_init(group_leader); |
8350 | perf_install_in_context(ctx, group_leader, group_leader->cpu); | 8391 | perf_install_in_context(ctx, group_leader, group_leader->cpu); |
8351 | get_ctx(ctx); | 8392 | get_ctx(ctx); |
8352 | } | ||
8353 | 8393 | ||
8354 | if (!exclusive_event_installable(event, ctx)) { | 8394 | /* |
8355 | err = -EBUSY; | 8395 | * Now that all events are installed in @ctx, nothing |
8356 | mutex_unlock(&ctx->mutex); | 8396 | * references @gctx anymore, so drop the last reference we have |
8357 | fput(event_file); | 8397 | * on it. |
8358 | goto err_context; | 8398 | */ |
8399 | put_ctx(gctx); | ||
8359 | } | 8400 | } |
8360 | 8401 | ||
8402 | /* | ||
8403 | * Precalculate sample_data sizes; do while holding ctx::mutex such | ||
8404 | * that we're serialized against further additions and before | ||
8405 | * perf_install_in_context() which is the point the event is active and | ||
8406 | * can use these values. | ||
8407 | */ | ||
8408 | perf_event__header_size(event); | ||
8409 | perf_event__id_header_size(event); | ||
8410 | |||
8361 | perf_install_in_context(ctx, event, event->cpu); | 8411 | perf_install_in_context(ctx, event, event->cpu); |
8362 | perf_unpin_context(ctx); | 8412 | perf_unpin_context(ctx); |
8363 | 8413 | ||
8364 | if (move_group) { | 8414 | if (move_group) |
8365 | mutex_unlock(&gctx->mutex); | 8415 | mutex_unlock(&gctx->mutex); |
8366 | put_ctx(gctx); | ||
8367 | } | ||
8368 | mutex_unlock(&ctx->mutex); | 8416 | mutex_unlock(&ctx->mutex); |
8369 | 8417 | ||
8370 | put_online_cpus(); | 8418 | put_online_cpus(); |
@@ -8376,12 +8424,6 @@ SYSCALL_DEFINE5(perf_event_open, | |||
8376 | mutex_unlock(¤t->perf_event_mutex); | 8424 | mutex_unlock(¤t->perf_event_mutex); |
8377 | 8425 | ||
8378 | /* | 8426 | /* |
8379 | * Precalculate sample_data sizes | ||
8380 | */ | ||
8381 | perf_event__header_size(event); | ||
8382 | perf_event__id_header_size(event); | ||
8383 | |||
8384 | /* | ||
8385 | * Drop the reference on the group_event after placing the | 8427 | * Drop the reference on the group_event after placing the |
8386 | * new event on the sibling_list. This ensures destruction | 8428 | * new event on the sibling_list. This ensures destruction |
8387 | * of the group leader will find the pointer to itself in | 8429 | * of the group leader will find the pointer to itself in |
@@ -8391,6 +8433,12 @@ SYSCALL_DEFINE5(perf_event_open, | |||
8391 | fd_install(event_fd, event_file); | 8433 | fd_install(event_fd, event_file); |
8392 | return event_fd; | 8434 | return event_fd; |
8393 | 8435 | ||
8436 | err_locked: | ||
8437 | if (move_group) | ||
8438 | mutex_unlock(&gctx->mutex); | ||
8439 | mutex_unlock(&ctx->mutex); | ||
8440 | /* err_file: */ | ||
8441 | fput(event_file); | ||
8394 | err_context: | 8442 | err_context: |
8395 | perf_unpin_context(ctx); | 8443 | perf_unpin_context(ctx); |
8396 | put_ctx(ctx); | 8444 | put_ctx(ctx); |
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 2975632d51e2..c8fe6d177119 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature | |||
@@ -41,6 +41,7 @@ FEATURE_TESTS ?= \ | |||
41 | libelf-getphdrnum \ | 41 | libelf-getphdrnum \ |
42 | libelf-mmap \ | 42 | libelf-mmap \ |
43 | libnuma \ | 43 | libnuma \ |
44 | numa_num_possible_cpus \ | ||
44 | libperl \ | 45 | libperl \ |
45 | libpython \ | 46 | libpython \ |
46 | libpython-version \ | 47 | libpython-version \ |
@@ -51,7 +52,8 @@ FEATURE_TESTS ?= \ | |||
51 | timerfd \ | 52 | timerfd \ |
52 | libdw-dwarf-unwind \ | 53 | libdw-dwarf-unwind \ |
53 | zlib \ | 54 | zlib \ |
54 | lzma | 55 | lzma \ |
56 | get_cpuid | ||
55 | 57 | ||
56 | FEATURE_DISPLAY ?= \ | 58 | FEATURE_DISPLAY ?= \ |
57 | dwarf \ | 59 | dwarf \ |
@@ -61,13 +63,15 @@ FEATURE_DISPLAY ?= \ | |||
61 | libbfd \ | 63 | libbfd \ |
62 | libelf \ | 64 | libelf \ |
63 | libnuma \ | 65 | libnuma \ |
66 | numa_num_possible_cpus \ | ||
64 | libperl \ | 67 | libperl \ |
65 | libpython \ | 68 | libpython \ |
66 | libslang \ | 69 | libslang \ |
67 | libunwind \ | 70 | libunwind \ |
68 | libdw-dwarf-unwind \ | 71 | libdw-dwarf-unwind \ |
69 | zlib \ | 72 | zlib \ |
70 | lzma | 73 | lzma \ |
74 | get_cpuid | ||
71 | 75 | ||
72 | # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. | 76 | # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. |
73 | # If in the future we need per-feature checks/flags for features not | 77 | # If in the future we need per-feature checks/flags for features not |
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 74ca42093d70..e43a2971bf56 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile | |||
@@ -19,6 +19,7 @@ FILES= \ | |||
19 | test-libelf-getphdrnum.bin \ | 19 | test-libelf-getphdrnum.bin \ |
20 | test-libelf-mmap.bin \ | 20 | test-libelf-mmap.bin \ |
21 | test-libnuma.bin \ | 21 | test-libnuma.bin \ |
22 | test-numa_num_possible_cpus.bin \ | ||
22 | test-libperl.bin \ | 23 | test-libperl.bin \ |
23 | test-libpython.bin \ | 24 | test-libpython.bin \ |
24 | test-libpython-version.bin \ | 25 | test-libpython-version.bin \ |
@@ -34,7 +35,8 @@ FILES= \ | |||
34 | test-compile-x32.bin \ | 35 | test-compile-x32.bin \ |
35 | test-zlib.bin \ | 36 | test-zlib.bin \ |
36 | test-lzma.bin \ | 37 | test-lzma.bin \ |
37 | test-bpf.bin | 38 | test-bpf.bin \ |
39 | test-get_cpuid.bin | ||
38 | 40 | ||
39 | CC := $(CROSS_COMPILE)gcc -MD | 41 | CC := $(CROSS_COMPILE)gcc -MD |
40 | PKG_CONFIG := $(CROSS_COMPILE)pkg-config | 42 | PKG_CONFIG := $(CROSS_COMPILE)pkg-config |
@@ -87,6 +89,9 @@ test-libelf-getphdrnum.bin: | |||
87 | test-libnuma.bin: | 89 | test-libnuma.bin: |
88 | $(BUILD) -lnuma | 90 | $(BUILD) -lnuma |
89 | 91 | ||
92 | test-numa_num_possible_cpus.bin: | ||
93 | $(BUILD) -lnuma | ||
94 | |||
90 | test-libunwind.bin: | 95 | test-libunwind.bin: |
91 | $(BUILD) -lelf | 96 | $(BUILD) -lelf |
92 | 97 | ||
@@ -162,6 +167,9 @@ test-zlib.bin: | |||
162 | test-lzma.bin: | 167 | test-lzma.bin: |
163 | $(BUILD) -llzma | 168 | $(BUILD) -llzma |
164 | 169 | ||
170 | test-get_cpuid.bin: | ||
171 | $(BUILD) | ||
172 | |||
165 | test-bpf.bin: | 173 | test-bpf.bin: |
166 | $(BUILD) | 174 | $(BUILD) |
167 | 175 | ||
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 84689a67814a..33cf6f20bd4e 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c | |||
@@ -77,6 +77,10 @@ | |||
77 | # include "test-libnuma.c" | 77 | # include "test-libnuma.c" |
78 | #undef main | 78 | #undef main |
79 | 79 | ||
80 | #define main main_test_numa_num_possible_cpus | ||
81 | # include "test-numa_num_possible_cpus.c" | ||
82 | #undef main | ||
83 | |||
80 | #define main main_test_timerfd | 84 | #define main main_test_timerfd |
81 | # include "test-timerfd.c" | 85 | # include "test-timerfd.c" |
82 | #undef main | 86 | #undef main |
@@ -117,6 +121,10 @@ | |||
117 | # include "test-lzma.c" | 121 | # include "test-lzma.c" |
118 | #undef main | 122 | #undef main |
119 | 123 | ||
124 | #define main main_test_get_cpuid | ||
125 | # include "test-get_cpuid.c" | ||
126 | #undef main | ||
127 | |||
120 | int main(int argc, char *argv[]) | 128 | int main(int argc, char *argv[]) |
121 | { | 129 | { |
122 | main_test_libpython(); | 130 | main_test_libpython(); |
@@ -136,6 +144,7 @@ int main(int argc, char *argv[]) | |||
136 | main_test_libbfd(); | 144 | main_test_libbfd(); |
137 | main_test_backtrace(); | 145 | main_test_backtrace(); |
138 | main_test_libnuma(); | 146 | main_test_libnuma(); |
147 | main_test_numa_num_possible_cpus(); | ||
139 | main_test_timerfd(); | 148 | main_test_timerfd(); |
140 | main_test_stackprotector_all(); | 149 | main_test_stackprotector_all(); |
141 | main_test_libdw_dwarf_unwind(); | 150 | main_test_libdw_dwarf_unwind(); |
@@ -143,6 +152,7 @@ int main(int argc, char *argv[]) | |||
143 | main_test_zlib(); | 152 | main_test_zlib(); |
144 | main_test_pthread_attr_setaffinity_np(); | 153 | main_test_pthread_attr_setaffinity_np(); |
145 | main_test_lzma(); | 154 | main_test_lzma(); |
155 | main_test_get_cpuid(); | ||
146 | 156 | ||
147 | return 0; | 157 | return 0; |
148 | } | 158 | } |
diff --git a/tools/build/feature/test-get_cpuid.c b/tools/build/feature/test-get_cpuid.c new file mode 100644 index 000000000000..d7a2c407130d --- /dev/null +++ b/tools/build/feature/test-get_cpuid.c | |||
@@ -0,0 +1,7 @@ | |||
1 | #include <cpuid.h> | ||
2 | |||
3 | int main(void) | ||
4 | { | ||
5 | unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; | ||
6 | return __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); | ||
7 | } | ||
diff --git a/tools/build/feature/test-numa_num_possible_cpus.c b/tools/build/feature/test-numa_num_possible_cpus.c new file mode 100644 index 000000000000..2606e94b0659 --- /dev/null +++ b/tools/build/feature/test-numa_num_possible_cpus.c | |||
@@ -0,0 +1,6 @@ | |||
1 | #include <numa.h> | ||
2 | |||
3 | int main(void) | ||
4 | { | ||
5 | return numa_num_possible_cpus(); | ||
6 | } | ||
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 4d885934b919..cf42b090477b 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c | |||
@@ -3795,7 +3795,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, | |||
3795 | struct format_field *field; | 3795 | struct format_field *field; |
3796 | struct printk_map *printk; | 3796 | struct printk_map *printk; |
3797 | long long val, fval; | 3797 | long long val, fval; |
3798 | unsigned long addr; | 3798 | unsigned long long addr; |
3799 | char *str; | 3799 | char *str; |
3800 | unsigned char *hex; | 3800 | unsigned char *hex; |
3801 | int print; | 3801 | int print; |
@@ -3828,13 +3828,30 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, | |||
3828 | */ | 3828 | */ |
3829 | if (!(field->flags & FIELD_IS_ARRAY) && | 3829 | if (!(field->flags & FIELD_IS_ARRAY) && |
3830 | field->size == pevent->long_size) { | 3830 | field->size == pevent->long_size) { |
3831 | addr = *(unsigned long *)(data + field->offset); | 3831 | |
3832 | /* Handle heterogeneous recording and processing | ||
3833 | * architectures | ||
3834 | * | ||
3835 | * CASE I: | ||
3836 | * Traces recorded on 32-bit devices (32-bit | ||
3837 | * addressing) and processed on 64-bit devices: | ||
3838 | * In this case, only 32 bits should be read. | ||
3839 | * | ||
3840 | * CASE II: | ||
3841 | * Traces recorded on 64 bit devices and processed | ||
3842 | * on 32-bit devices: | ||
3843 | * In this case, 64 bits must be read. | ||
3844 | */ | ||
3845 | addr = (pevent->long_size == 8) ? | ||
3846 | *(unsigned long long *)(data + field->offset) : | ||
3847 | (unsigned long long)*(unsigned int *)(data + field->offset); | ||
3848 | |||
3832 | /* Check if it matches a print format */ | 3849 | /* Check if it matches a print format */ |
3833 | printk = find_printk(pevent, addr); | 3850 | printk = find_printk(pevent, addr); |
3834 | if (printk) | 3851 | if (printk) |
3835 | trace_seq_puts(s, printk->printk); | 3852 | trace_seq_puts(s, printk->printk); |
3836 | else | 3853 | else |
3837 | trace_seq_printf(s, "%lx", addr); | 3854 | trace_seq_printf(s, "%llx", addr); |
3838 | break; | 3855 | break; |
3839 | } | 3856 | } |
3840 | str = malloc(len + 1); | 3857 | str = malloc(len + 1); |
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 4a0501d7a3b4..c94c9de3173e 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt | |||
@@ -364,21 +364,6 @@ cyc_thresh Specifies how frequently CYC packets are produced - see cyc | |||
364 | 364 | ||
365 | CYC packets are not requested by default. | 365 | CYC packets are not requested by default. |
366 | 366 | ||
367 | no_force_psb This is a driver option and is not in the IA32_RTIT_CTL MSR. | ||
368 | |||
369 | It stops the driver resetting the byte count to zero whenever | ||
370 | enabling the trace (for example on context switches) which in | ||
371 | turn results in no PSB being forced. However some processors | ||
372 | will produce a PSB anyway. | ||
373 | |||
374 | In any case, there is still a PSB when the trace is enabled for | ||
375 | the first time. | ||
376 | |||
377 | no_force_psb can be used to slightly decrease the trace size but | ||
378 | may make it harder for the decoder to recover from errors. | ||
379 | |||
380 | no_force_psb is not selected by default. | ||
381 | |||
382 | 367 | ||
383 | new snapshot option | 368 | new snapshot option |
384 | ------------------- | 369 | ------------------- |
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 827557fc7511..38a08539f4bf 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile | |||
@@ -573,9 +573,14 @@ ifndef NO_LIBNUMA | |||
573 | msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev); | 573 | msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev); |
574 | NO_LIBNUMA := 1 | 574 | NO_LIBNUMA := 1 |
575 | else | 575 | else |
576 | CFLAGS += -DHAVE_LIBNUMA_SUPPORT | 576 | ifeq ($(feature-numa_num_possible_cpus), 0) |
577 | EXTLIBS += -lnuma | 577 | msg := $(warning Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8); |
578 | $(call detected,CONFIG_NUMA) | 578 | NO_LIBNUMA := 1 |
579 | else | ||
580 | CFLAGS += -DHAVE_LIBNUMA_SUPPORT | ||
581 | EXTLIBS += -lnuma | ||
582 | $(call detected,CONFIG_NUMA) | ||
583 | endif | ||
579 | endif | 584 | endif |
580 | endif | 585 | endif |
581 | 586 | ||
@@ -621,8 +626,13 @@ ifdef LIBBABELTRACE | |||
621 | endif | 626 | endif |
622 | 627 | ||
623 | ifndef NO_AUXTRACE | 628 | ifndef NO_AUXTRACE |
624 | $(call detected,CONFIG_AUXTRACE) | 629 | ifeq ($(feature-get_cpuid), 0) |
625 | CFLAGS += -DHAVE_AUXTRACE_SUPPORT | 630 | msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc); |
631 | NO_AUXTRACE := 1 | ||
632 | else | ||
633 | $(call detected,CONFIG_AUXTRACE) | ||
634 | CFLAGS += -DHAVE_AUXTRACE_SUPPORT | ||
635 | endif | ||
626 | endif | 636 | endif |
627 | 637 | ||
628 | # Among the variables below, these: | 638 | # Among the variables below, these: |
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index eb5f18b75402..c6f9af78f6f5 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c | |||
@@ -270,12 +270,13 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) | |||
270 | int ret = 0; | 270 | int ret = 0; |
271 | 271 | ||
272 | if (module) { | 272 | if (module) { |
273 | list_for_each_entry(dso, &host_machine->dsos.head, node) { | 273 | char module_name[128]; |
274 | if (!dso->kernel) | 274 | |
275 | continue; | 275 | snprintf(module_name, sizeof(module_name), "[%s]", module); |
276 | if (strncmp(dso->short_name + 1, module, | 276 | map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name); |
277 | dso->short_name_len - 2) == 0) | 277 | if (map) { |
278 | goto found; | 278 | dso = map->dso; |
279 | goto found; | ||
279 | } | 280 | } |
280 | pr_debug("Failed to find module %s.\n", module); | 281 | pr_debug("Failed to find module %s.\n", module); |
281 | return -ENOENT; | 282 | return -ENOENT; |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8a4537ee9bc3..fc3f7c922f99 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
@@ -1580,7 +1580,10 @@ static int __perf_session__process_events(struct perf_session *session, | |||
1580 | file_offset = page_offset; | 1580 | file_offset = page_offset; |
1581 | head = data_offset - page_offset; | 1581 | head = data_offset - page_offset; |
1582 | 1582 | ||
1583 | if (data_size && (data_offset + data_size < file_size)) | 1583 | if (data_size == 0) |
1584 | goto out; | ||
1585 | |||
1586 | if (data_offset + data_size < file_size) | ||
1584 | file_size = data_offset + data_size; | 1587 | file_size = data_offset + data_size; |
1585 | 1588 | ||
1586 | ui_progress__init(&prog, file_size, "Processing events..."); | 1589 | ui_progress__init(&prog, file_size, "Processing events..."); |
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 415c359de465..2d065d065b67 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c | |||
@@ -196,7 +196,8 @@ static void zero_per_pkg(struct perf_evsel *counter) | |||
196 | memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); | 196 | memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); |
197 | } | 197 | } |
198 | 198 | ||
199 | static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) | 199 | static int check_per_pkg(struct perf_evsel *counter, |
200 | struct perf_counts_values *vals, int cpu, bool *skip) | ||
200 | { | 201 | { |
201 | unsigned long *mask = counter->per_pkg_mask; | 202 | unsigned long *mask = counter->per_pkg_mask; |
202 | struct cpu_map *cpus = perf_evsel__cpus(counter); | 203 | struct cpu_map *cpus = perf_evsel__cpus(counter); |
@@ -218,6 +219,17 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) | |||
218 | counter->per_pkg_mask = mask; | 219 | counter->per_pkg_mask = mask; |
219 | } | 220 | } |
220 | 221 | ||
222 | /* | ||
223 | * we do not consider an event that has not run as a good | ||
224 | * instance to mark a package as used (skip=1). Otherwise | ||
225 | * we may run into a situation where the first CPU in a package | ||
226 | * is not running anything, yet the second is, and this function | ||
227 | * would mark the package as used after the first CPU and would | ||
228 | * not read the values from the second CPU. | ||
229 | */ | ||
230 | if (!(vals->run && vals->ena)) | ||
231 | return 0; | ||
232 | |||
221 | s = cpu_map__get_socket(cpus, cpu); | 233 | s = cpu_map__get_socket(cpus, cpu); |
222 | if (s < 0) | 234 | if (s < 0) |
223 | return -1; | 235 | return -1; |
@@ -235,7 +247,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel | |||
235 | static struct perf_counts_values zero; | 247 | static struct perf_counts_values zero; |
236 | bool skip = false; | 248 | bool skip = false; |
237 | 249 | ||
238 | if (check_per_pkg(evsel, cpu, &skip)) { | 250 | if (check_per_pkg(evsel, count, cpu, &skip)) { |
239 | pr_err("failed to read per-pkg counter\n"); | 251 | pr_err("failed to read per-pkg counter\n"); |
240 | return -1; | 252 | return -1; |
241 | } | 253 | } |
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 53bb5f59ec58..475d88d0a1c9 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c | |||
@@ -38,7 +38,7 @@ static inline char *bfd_demangle(void __maybe_unused *v, | |||
38 | #endif | 38 | #endif |
39 | 39 | ||
40 | #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT | 40 | #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT |
41 | int elf_getphdrnum(Elf *elf, size_t *dst) | 41 | static int elf_getphdrnum(Elf *elf, size_t *dst) |
42 | { | 42 | { |
43 | GElf_Ehdr gehdr; | 43 | GElf_Ehdr gehdr; |
44 | GElf_Ehdr *ehdr; | 44 | GElf_Ehdr *ehdr; |
@@ -1271,8 +1271,6 @@ out_close: | |||
1271 | static int kcore__init(struct kcore *kcore, char *filename, int elfclass, | 1271 | static int kcore__init(struct kcore *kcore, char *filename, int elfclass, |
1272 | bool temp) | 1272 | bool temp) |
1273 | { | 1273 | { |
1274 | GElf_Ehdr *ehdr; | ||
1275 | |||
1276 | kcore->elfclass = elfclass; | 1274 | kcore->elfclass = elfclass; |
1277 | 1275 | ||
1278 | if (temp) | 1276 | if (temp) |
@@ -1289,9 +1287,7 @@ static int kcore__init(struct kcore *kcore, char *filename, int elfclass, | |||
1289 | if (!gelf_newehdr(kcore->elf, elfclass)) | 1287 | if (!gelf_newehdr(kcore->elf, elfclass)) |
1290 | goto out_end; | 1288 | goto out_end; |
1291 | 1289 | ||
1292 | ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr); | 1290 | memset(&kcore->ehdr, 0, sizeof(GElf_Ehdr)); |
1293 | if (!ehdr) | ||
1294 | goto out_end; | ||
1295 | 1291 | ||
1296 | return 0; | 1292 | return 0; |
1297 | 1293 | ||
@@ -1348,23 +1344,18 @@ static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count) | |||
1348 | static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset, | 1344 | static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset, |
1349 | u64 addr, u64 len) | 1345 | u64 addr, u64 len) |
1350 | { | 1346 | { |
1351 | GElf_Phdr gphdr; | 1347 | GElf_Phdr phdr = { |
1352 | GElf_Phdr *phdr; | 1348 | .p_type = PT_LOAD, |
1353 | 1349 | .p_flags = PF_R | PF_W | PF_X, | |
1354 | phdr = gelf_getphdr(kcore->elf, idx, &gphdr); | 1350 | .p_offset = offset, |
1355 | if (!phdr) | 1351 | .p_vaddr = addr, |
1356 | return -1; | 1352 | .p_paddr = 0, |
1357 | 1353 | .p_filesz = len, | |
1358 | phdr->p_type = PT_LOAD; | 1354 | .p_memsz = len, |
1359 | phdr->p_flags = PF_R | PF_W | PF_X; | 1355 | .p_align = page_size, |
1360 | phdr->p_offset = offset; | 1356 | }; |
1361 | phdr->p_vaddr = addr; | 1357 | |
1362 | phdr->p_paddr = 0; | 1358 | if (!gelf_update_phdr(kcore->elf, idx, &phdr)) |
1363 | phdr->p_filesz = len; | ||
1364 | phdr->p_memsz = len; | ||
1365 | phdr->p_align = page_size; | ||
1366 | |||
1367 | if (!gelf_update_phdr(kcore->elf, idx, phdr)) | ||
1368 | return -1; | 1359 | return -1; |
1369 | 1360 | ||
1370 | return 0; | 1361 | return 0; |
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 7acafb3c5592..c2cd9bf2348b 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c | |||
@@ -709,7 +709,7 @@ bool find_process(const char *name) | |||
709 | 709 | ||
710 | dir = opendir(procfs__mountpoint()); | 710 | dir = opendir(procfs__mountpoint()); |
711 | if (!dir) | 711 | if (!dir) |
712 | return -1; | 712 | return false; |
713 | 713 | ||
714 | /* Walk through the directory. */ | 714 | /* Walk through the directory. */ |
715 | while (ret && (d = readdir(dir)) != NULL) { | 715 | while (ret && (d = readdir(dir)) != NULL) { |