diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-01-15 14:37:43 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-01-15 14:37:43 -0500 |
| commit | 79078c53baabee12dfefb0cfe00ca94cb2c35570 (patch) | |
| tree | c8586ca3e125d757756b1b9a020615dcdbb09d0c | |
| parent | 255e6140fa76ec9d0e24f201427e7e9a9573f681 (diff) | |
| parent | 18e7a45af91acdde99d3aa1372cc40e1f8142f7b (diff) | |
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar:
"Misc race fixes uncovered by fuzzing efforts, a Sparse fix, two PMU
driver fixes, plus miscellanous tooling fixes"
* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86: Reject non sampling events with precise_ip
perf/x86/intel: Account interrupts for PEBS errors
perf/core: Fix concurrent sys_perf_event_open() vs. 'move_group' race
perf/core: Fix sys_perf_event_open() vs. hotplug
perf/x86/intel: Use ULL constant to prevent undefined shift behaviour
perf/x86/intel/uncore: Fix hardcoded socket 0 assumption in the Haswell init code
perf/x86: Set pmu->module in Intel PMU modules
perf probe: Fix to probe on gcc generated symbols for offline kernel
perf probe: Fix --funcs to show correct symbols for offline module
perf symbols: Robustify reading of build-id from sysfs
perf tools: Install tools/lib/traceevent plugins with install-bin
tools lib traceevent: Fix prev/next_prio for deadline tasks
perf record: Fix --switch-output documentation and comment
perf record: Make __record_options static
tools lib subcmd: Add OPT_STRING_OPTARG_SET option
perf probe: Fix to get correct modname from elf header
samples/bpf trace_output_user: Remove duplicate sys/ioctl.h include
samples/bpf sock_example: Avoid getting ethhdr from two includes
perf sched timehist: Show total scheduling time
| -rw-r--r-- | arch/x86/events/core.c | 4 | ||||
| -rw-r--r-- | arch/x86/events/intel/core.c | 2 | ||||
| -rw-r--r-- | arch/x86/events/intel/cstate.c | 2 | ||||
| -rw-r--r-- | arch/x86/events/intel/ds.c | 6 | ||||
| -rw-r--r-- | arch/x86/events/intel/rapl.c | 1 | ||||
| -rw-r--r-- | arch/x86/events/intel/uncore.c | 1 | ||||
| -rw-r--r-- | arch/x86/events/intel/uncore_snbep.c | 2 | ||||
| -rw-r--r-- | include/linux/perf_event.h | 1 | ||||
| -rw-r--r-- | kernel/events/core.c | 175 | ||||
| -rw-r--r-- | samples/bpf/sock_example.h | 2 | ||||
| -rw-r--r-- | samples/bpf/trace_output_user.c | 1 | ||||
| -rw-r--r-- | tools/lib/subcmd/parse-options.c | 3 | ||||
| -rw-r--r-- | tools/lib/subcmd/parse-options.h | 5 | ||||
| -rw-r--r-- | tools/lib/traceevent/plugin_sched_switch.c | 4 | ||||
| -rw-r--r-- | tools/perf/Documentation/perf-record.txt | 4 | ||||
| -rw-r--r-- | tools/perf/Makefile.perf | 4 | ||||
| -rw-r--r-- | tools/perf/builtin-record.c | 4 | ||||
| -rw-r--r-- | tools/perf/builtin-sched.c | 17 | ||||
| -rw-r--r-- | tools/perf/util/probe-event.c | 105 | ||||
| -rw-r--r-- | tools/perf/util/symbol-elf.c | 6 |
20 files changed, 257 insertions, 92 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 019c5887b698..1635c0c8df23 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c | |||
| @@ -505,6 +505,10 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
| 505 | 505 | ||
| 506 | if (event->attr.precise_ip > precise) | 506 | if (event->attr.precise_ip > precise) |
| 507 | return -EOPNOTSUPP; | 507 | return -EOPNOTSUPP; |
| 508 | |||
| 509 | /* There's no sense in having PEBS for non sampling events: */ | ||
| 510 | if (!is_sampling_event(event)) | ||
| 511 | return -EINVAL; | ||
| 508 | } | 512 | } |
| 509 | /* | 513 | /* |
| 510 | * check that PEBS LBR correction does not conflict with | 514 | * check that PEBS LBR correction does not conflict with |
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 86138267b68a..d611cab214a6 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c | |||
| @@ -3987,7 +3987,7 @@ __init int intel_pmu_init(void) | |||
| 3987 | x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); | 3987 | x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); |
| 3988 | x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; | 3988 | x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; |
| 3989 | } | 3989 | } |
| 3990 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; | 3990 | x86_pmu.intel_ctrl = (1ULL << x86_pmu.num_counters) - 1; |
| 3991 | 3991 | ||
| 3992 | if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { | 3992 | if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { |
| 3993 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | 3993 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", |
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index fec8a461bdef..1076c9a77292 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c | |||
| @@ -434,6 +434,7 @@ static struct pmu cstate_core_pmu = { | |||
| 434 | .stop = cstate_pmu_event_stop, | 434 | .stop = cstate_pmu_event_stop, |
| 435 | .read = cstate_pmu_event_update, | 435 | .read = cstate_pmu_event_update, |
| 436 | .capabilities = PERF_PMU_CAP_NO_INTERRUPT, | 436 | .capabilities = PERF_PMU_CAP_NO_INTERRUPT, |
| 437 | .module = THIS_MODULE, | ||
| 437 | }; | 438 | }; |
| 438 | 439 | ||
| 439 | static struct pmu cstate_pkg_pmu = { | 440 | static struct pmu cstate_pkg_pmu = { |
| @@ -447,6 +448,7 @@ static struct pmu cstate_pkg_pmu = { | |||
| 447 | .stop = cstate_pmu_event_stop, | 448 | .stop = cstate_pmu_event_stop, |
| 448 | .read = cstate_pmu_event_update, | 449 | .read = cstate_pmu_event_update, |
| 449 | .capabilities = PERF_PMU_CAP_NO_INTERRUPT, | 450 | .capabilities = PERF_PMU_CAP_NO_INTERRUPT, |
| 451 | .module = THIS_MODULE, | ||
| 450 | }; | 452 | }; |
| 451 | 453 | ||
| 452 | static const struct cstate_model nhm_cstates __initconst = { | 454 | static const struct cstate_model nhm_cstates __initconst = { |
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index be202390bbd3..9dfeeeca0ea8 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c | |||
| @@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | |||
| 1389 | continue; | 1389 | continue; |
| 1390 | 1390 | ||
| 1391 | /* log dropped samples number */ | 1391 | /* log dropped samples number */ |
| 1392 | if (error[bit]) | 1392 | if (error[bit]) { |
| 1393 | perf_log_lost_samples(event, error[bit]); | 1393 | perf_log_lost_samples(event, error[bit]); |
| 1394 | 1394 | ||
| 1395 | if (perf_event_account_interrupt(event)) | ||
| 1396 | x86_pmu_stop(event, 0); | ||
| 1397 | } | ||
| 1398 | |||
| 1395 | if (counts[bit]) { | 1399 | if (counts[bit]) { |
| 1396 | __intel_pmu_pebs_event(event, iregs, base, | 1400 | __intel_pmu_pebs_event(event, iregs, base, |
| 1397 | top, bit, counts[bit]); | 1401 | top, bit, counts[bit]); |
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index bd34124449b0..17c3564d087a 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c | |||
| @@ -697,6 +697,7 @@ static int __init init_rapl_pmus(void) | |||
| 697 | rapl_pmus->pmu.start = rapl_pmu_event_start; | 697 | rapl_pmus->pmu.start = rapl_pmu_event_start; |
| 698 | rapl_pmus->pmu.stop = rapl_pmu_event_stop; | 698 | rapl_pmus->pmu.stop = rapl_pmu_event_stop; |
| 699 | rapl_pmus->pmu.read = rapl_pmu_event_read; | 699 | rapl_pmus->pmu.read = rapl_pmu_event_read; |
| 700 | rapl_pmus->pmu.module = THIS_MODULE; | ||
| 700 | return 0; | 701 | return 0; |
| 701 | } | 702 | } |
| 702 | 703 | ||
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 97c246f84dea..8c4ccdc3a3f3 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c | |||
| @@ -733,6 +733,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) | |||
| 733 | .start = uncore_pmu_event_start, | 733 | .start = uncore_pmu_event_start, |
| 734 | .stop = uncore_pmu_event_stop, | 734 | .stop = uncore_pmu_event_stop, |
| 735 | .read = uncore_pmu_event_read, | 735 | .read = uncore_pmu_event_read, |
| 736 | .module = THIS_MODULE, | ||
| 736 | }; | 737 | }; |
| 737 | } else { | 738 | } else { |
| 738 | pmu->pmu = *pmu->type->pmu; | 739 | pmu->pmu = *pmu->type->pmu; |
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index e6832be714bc..dae2fedc1601 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c | |||
| @@ -2686,7 +2686,7 @@ static struct intel_uncore_type *hswep_msr_uncores[] = { | |||
| 2686 | 2686 | ||
| 2687 | void hswep_uncore_cpu_init(void) | 2687 | void hswep_uncore_cpu_init(void) |
| 2688 | { | 2688 | { |
| 2689 | int pkg = topology_phys_to_logical_pkg(0); | 2689 | int pkg = boot_cpu_data.logical_proc_id; |
| 2690 | 2690 | ||
| 2691 | if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) | 2691 | if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) |
| 2692 | hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; | 2692 | hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4741ecdb9817..78ed8105e64d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
| @@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event); | |||
| 1259 | extern void perf_event_disable_local(struct perf_event *event); | 1259 | extern void perf_event_disable_local(struct perf_event *event); |
| 1260 | extern void perf_event_disable_inatomic(struct perf_event *event); | 1260 | extern void perf_event_disable_inatomic(struct perf_event *event); |
| 1261 | extern void perf_event_task_tick(void); | 1261 | extern void perf_event_task_tick(void); |
| 1262 | extern int perf_event_account_interrupt(struct perf_event *event); | ||
| 1262 | #else /* !CONFIG_PERF_EVENTS: */ | 1263 | #else /* !CONFIG_PERF_EVENTS: */ |
| 1263 | static inline void * | 1264 | static inline void * |
| 1264 | perf_aux_output_begin(struct perf_output_handle *handle, | 1265 | perf_aux_output_begin(struct perf_output_handle *handle, |
diff --git a/kernel/events/core.c b/kernel/events/core.c index ab15509fab8c..110b38a58493 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -2249,7 +2249,7 @@ static int __perf_install_in_context(void *info) | |||
| 2249 | struct perf_event_context *ctx = event->ctx; | 2249 | struct perf_event_context *ctx = event->ctx; |
| 2250 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 2250 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
| 2251 | struct perf_event_context *task_ctx = cpuctx->task_ctx; | 2251 | struct perf_event_context *task_ctx = cpuctx->task_ctx; |
| 2252 | bool activate = true; | 2252 | bool reprogram = true; |
| 2253 | int ret = 0; | 2253 | int ret = 0; |
| 2254 | 2254 | ||
| 2255 | raw_spin_lock(&cpuctx->ctx.lock); | 2255 | raw_spin_lock(&cpuctx->ctx.lock); |
| @@ -2257,27 +2257,26 @@ static int __perf_install_in_context(void *info) | |||
| 2257 | raw_spin_lock(&ctx->lock); | 2257 | raw_spin_lock(&ctx->lock); |
| 2258 | task_ctx = ctx; | 2258 | task_ctx = ctx; |
| 2259 | 2259 | ||
| 2260 | /* If we're on the wrong CPU, try again */ | 2260 | reprogram = (ctx->task == current); |
| 2261 | if (task_cpu(ctx->task) != smp_processor_id()) { | ||
| 2262 | ret = -ESRCH; | ||
| 2263 | goto unlock; | ||
| 2264 | } | ||
| 2265 | 2261 | ||
| 2266 | /* | 2262 | /* |
| 2267 | * If we're on the right CPU, see if the task we target is | 2263 | * If the task is running, it must be running on this CPU, |
| 2268 | * current, if not we don't have to activate the ctx, a future | 2264 | * otherwise we cannot reprogram things. |
| 2269 | * context switch will do that for us. | 2265 | * |
| 2266 | * If its not running, we don't care, ctx->lock will | ||
| 2267 | * serialize against it becoming runnable. | ||
| 2270 | */ | 2268 | */ |
| 2271 | if (ctx->task != current) | 2269 | if (task_curr(ctx->task) && !reprogram) { |
| 2272 | activate = false; | 2270 | ret = -ESRCH; |
| 2273 | else | 2271 | goto unlock; |
| 2274 | WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx); | 2272 | } |
| 2275 | 2273 | ||
| 2274 | WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx); | ||
| 2276 | } else if (task_ctx) { | 2275 | } else if (task_ctx) { |
| 2277 | raw_spin_lock(&task_ctx->lock); | 2276 | raw_spin_lock(&task_ctx->lock); |
| 2278 | } | 2277 | } |
| 2279 | 2278 | ||
| 2280 | if (activate) { | 2279 | if (reprogram) { |
| 2281 | ctx_sched_out(ctx, cpuctx, EVENT_TIME); | 2280 | ctx_sched_out(ctx, cpuctx, EVENT_TIME); |
| 2282 | add_event_to_ctx(event, ctx); | 2281 | add_event_to_ctx(event, ctx); |
| 2283 | ctx_resched(cpuctx, task_ctx); | 2282 | ctx_resched(cpuctx, task_ctx); |
| @@ -2328,13 +2327,36 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
| 2328 | /* | 2327 | /* |
| 2329 | * Installing events is tricky because we cannot rely on ctx->is_active | 2328 | * Installing events is tricky because we cannot rely on ctx->is_active |
| 2330 | * to be set in case this is the nr_events 0 -> 1 transition. | 2329 | * to be set in case this is the nr_events 0 -> 1 transition. |
| 2330 | * | ||
| 2331 | * Instead we use task_curr(), which tells us if the task is running. | ||
| 2332 | * However, since we use task_curr() outside of rq::lock, we can race | ||
| 2333 | * against the actual state. This means the result can be wrong. | ||
| 2334 | * | ||
| 2335 | * If we get a false positive, we retry, this is harmless. | ||
| 2336 | * | ||
| 2337 | * If we get a false negative, things are complicated. If we are after | ||
| 2338 | * perf_event_context_sched_in() ctx::lock will serialize us, and the | ||
| 2339 | * value must be correct. If we're before, it doesn't matter since | ||
| 2340 | * perf_event_context_sched_in() will program the counter. | ||
| 2341 | * | ||
| 2342 | * However, this hinges on the remote context switch having observed | ||
| 2343 | * our task->perf_event_ctxp[] store, such that it will in fact take | ||
| 2344 | * ctx::lock in perf_event_context_sched_in(). | ||
| 2345 | * | ||
| 2346 | * We do this by task_function_call(), if the IPI fails to hit the task | ||
| 2347 | * we know any future context switch of task must see the | ||
| 2348 | * perf_event_ctpx[] store. | ||
| 2331 | */ | 2349 | */ |
| 2332 | again: | 2350 | |
| 2333 | /* | 2351 | /* |
| 2334 | * Cannot use task_function_call() because we need to run on the task's | 2352 | * This smp_mb() orders the task->perf_event_ctxp[] store with the |
| 2335 | * CPU regardless of whether its current or not. | 2353 | * task_cpu() load, such that if the IPI then does not find the task |
| 2354 | * running, a future context switch of that task must observe the | ||
| 2355 | * store. | ||
| 2336 | */ | 2356 | */ |
| 2337 | if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event)) | 2357 | smp_mb(); |
| 2358 | again: | ||
| 2359 | if (!task_function_call(task, __perf_install_in_context, event)) | ||
| 2338 | return; | 2360 | return; |
| 2339 | 2361 | ||
| 2340 | raw_spin_lock_irq(&ctx->lock); | 2362 | raw_spin_lock_irq(&ctx->lock); |
| @@ -2348,12 +2370,16 @@ again: | |||
| 2348 | raw_spin_unlock_irq(&ctx->lock); | 2370 | raw_spin_unlock_irq(&ctx->lock); |
| 2349 | return; | 2371 | return; |
| 2350 | } | 2372 | } |
| 2351 | raw_spin_unlock_irq(&ctx->lock); | ||
| 2352 | /* | 2373 | /* |
| 2353 | * Since !ctx->is_active doesn't mean anything, we must IPI | 2374 | * If the task is not running, ctx->lock will avoid it becoming so, |
| 2354 | * unconditionally. | 2375 | * thus we can safely install the event. |
| 2355 | */ | 2376 | */ |
| 2356 | goto again; | 2377 | if (task_curr(task)) { |
| 2378 | raw_spin_unlock_irq(&ctx->lock); | ||
| 2379 | goto again; | ||
| 2380 | } | ||
| 2381 | add_event_to_ctx(event, ctx); | ||
| 2382 | raw_spin_unlock_irq(&ctx->lock); | ||
| 2357 | } | 2383 | } |
| 2358 | 2384 | ||
| 2359 | /* | 2385 | /* |
| @@ -7034,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event) | |||
| 7034 | perf_output_end(&handle); | 7060 | perf_output_end(&handle); |
| 7035 | } | 7061 | } |
| 7036 | 7062 | ||
| 7037 | /* | 7063 | static int |
| 7038 | * Generic event overflow handling, sampling. | 7064 | __perf_event_account_interrupt(struct perf_event *event, int throttle) |
| 7039 | */ | ||
| 7040 | |||
| 7041 | static int __perf_event_overflow(struct perf_event *event, | ||
| 7042 | int throttle, struct perf_sample_data *data, | ||
| 7043 | struct pt_regs *regs) | ||
| 7044 | { | 7065 | { |
| 7045 | int events = atomic_read(&event->event_limit); | ||
| 7046 | struct hw_perf_event *hwc = &event->hw; | 7066 | struct hw_perf_event *hwc = &event->hw; |
| 7047 | u64 seq; | ||
| 7048 | int ret = 0; | 7067 | int ret = 0; |
| 7049 | 7068 | u64 seq; | |
| 7050 | /* | ||
| 7051 | * Non-sampling counters might still use the PMI to fold short | ||
| 7052 | * hardware counters, ignore those. | ||
| 7053 | */ | ||
| 7054 | if (unlikely(!is_sampling_event(event))) | ||
| 7055 | return 0; | ||
| 7056 | 7069 | ||
| 7057 | seq = __this_cpu_read(perf_throttled_seq); | 7070 | seq = __this_cpu_read(perf_throttled_seq); |
| 7058 | if (seq != hwc->interrupts_seq) { | 7071 | if (seq != hwc->interrupts_seq) { |
| @@ -7080,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event, | |||
| 7080 | perf_adjust_period(event, delta, hwc->last_period, true); | 7093 | perf_adjust_period(event, delta, hwc->last_period, true); |
| 7081 | } | 7094 | } |
| 7082 | 7095 | ||
| 7096 | return ret; | ||
| 7097 | } | ||
| 7098 | |||
| 7099 | int perf_event_account_interrupt(struct perf_event *event) | ||
| 7100 | { | ||
| 7101 | return __perf_event_account_interrupt(event, 1); | ||
| 7102 | } | ||
| 7103 | |||
| 7104 | /* | ||
| 7105 | * Generic event overflow handling, sampling. | ||
| 7106 | */ | ||
| 7107 | |||
| 7108 | static int __perf_event_overflow(struct perf_event *event, | ||
| 7109 | int throttle, struct perf_sample_data *data, | ||
| 7110 | struct pt_regs *regs) | ||
| 7111 | { | ||
| 7112 | int events = atomic_read(&event->event_limit); | ||
| 7113 | int ret = 0; | ||
| 7114 | |||
| 7115 | /* | ||
| 7116 | * Non-sampling counters might still use the PMI to fold short | ||
| 7117 | * hardware counters, ignore those. | ||
| 7118 | */ | ||
| 7119 | if (unlikely(!is_sampling_event(event))) | ||
| 7120 | return 0; | ||
| 7121 | |||
| 7122 | ret = __perf_event_account_interrupt(event, throttle); | ||
| 7123 | |||
| 7083 | /* | 7124 | /* |
| 7084 | * XXX event_limit might not quite work as expected on inherited | 7125 | * XXX event_limit might not quite work as expected on inherited |
| 7085 | * events | 7126 | * events |
| @@ -9503,6 +9544,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) | |||
| 9503 | return 0; | 9544 | return 0; |
| 9504 | } | 9545 | } |
| 9505 | 9546 | ||
| 9547 | /* | ||
| 9548 | * Variation on perf_event_ctx_lock_nested(), except we take two context | ||
| 9549 | * mutexes. | ||
| 9550 | */ | ||
| 9551 | static struct perf_event_context * | ||
| 9552 | __perf_event_ctx_lock_double(struct perf_event *group_leader, | ||
| 9553 | struct perf_event_context *ctx) | ||
| 9554 | { | ||
| 9555 | struct perf_event_context *gctx; | ||
| 9556 | |||
| 9557 | again: | ||
| 9558 | rcu_read_lock(); | ||
| 9559 | gctx = READ_ONCE(group_leader->ctx); | ||
| 9560 | if (!atomic_inc_not_zero(&gctx->refcount)) { | ||
| 9561 | rcu_read_unlock(); | ||
| 9562 | goto again; | ||
| 9563 | } | ||
| 9564 | rcu_read_unlock(); | ||
| 9565 | |||
| 9566 | mutex_lock_double(&gctx->mutex, &ctx->mutex); | ||
| 9567 | |||
| 9568 | if (group_leader->ctx != gctx) { | ||
| 9569 | mutex_unlock(&ctx->mutex); | ||
| 9570 | mutex_unlock(&gctx->mutex); | ||
| 9571 | put_ctx(gctx); | ||
| 9572 | goto again; | ||
| 9573 | } | ||
| 9574 | |||
| 9575 | return gctx; | ||
| 9576 | } | ||
| 9577 | |||
| 9506 | /** | 9578 | /** |
| 9507 | * sys_perf_event_open - open a performance event, associate it to a task/cpu | 9579 | * sys_perf_event_open - open a performance event, associate it to a task/cpu |
| 9508 | * | 9580 | * |
| @@ -9746,12 +9818,31 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 9746 | } | 9818 | } |
| 9747 | 9819 | ||
| 9748 | if (move_group) { | 9820 | if (move_group) { |
| 9749 | gctx = group_leader->ctx; | 9821 | gctx = __perf_event_ctx_lock_double(group_leader, ctx); |
| 9750 | mutex_lock_double(&gctx->mutex, &ctx->mutex); | 9822 | |
| 9751 | if (gctx->task == TASK_TOMBSTONE) { | 9823 | if (gctx->task == TASK_TOMBSTONE) { |
| 9752 | err = -ESRCH; | 9824 | err = -ESRCH; |
| 9753 | goto err_locked; | 9825 | goto err_locked; |
| 9754 | } | 9826 | } |
| 9827 | |||
| 9828 | /* | ||
| 9829 | * Check if we raced against another sys_perf_event_open() call | ||
| 9830 | * moving the software group underneath us. | ||
| 9831 | */ | ||
| 9832 | if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) { | ||
| 9833 | /* | ||
| 9834 | * If someone moved the group out from under us, check | ||
| 9835 | * if this new event wound up on the same ctx, if so | ||
| 9836 | * its the regular !move_group case, otherwise fail. | ||
| 9837 | */ | ||
| 9838 | if (gctx != ctx) { | ||
| 9839 | err = -EINVAL; | ||
| 9840 | goto err_locked; | ||
| 9841 | } else { | ||
| 9842 | perf_event_ctx_unlock(group_leader, gctx); | ||
| 9843 | move_group = 0; | ||
| 9844 | } | ||
| 9845 | } | ||
| 9755 | } else { | 9846 | } else { |
| 9756 | mutex_lock(&ctx->mutex); | 9847 | mutex_lock(&ctx->mutex); |
| 9757 | } | 9848 | } |
| @@ -9853,7 +9944,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 9853 | perf_unpin_context(ctx); | 9944 | perf_unpin_context(ctx); |
| 9854 | 9945 | ||
| 9855 | if (move_group) | 9946 | if (move_group) |
| 9856 | mutex_unlock(&gctx->mutex); | 9947 | perf_event_ctx_unlock(group_leader, gctx); |
| 9857 | mutex_unlock(&ctx->mutex); | 9948 | mutex_unlock(&ctx->mutex); |
| 9858 | 9949 | ||
| 9859 | if (task) { | 9950 | if (task) { |
| @@ -9879,7 +9970,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 9879 | 9970 | ||
| 9880 | err_locked: | 9971 | err_locked: |
| 9881 | if (move_group) | 9972 | if (move_group) |
| 9882 | mutex_unlock(&gctx->mutex); | 9973 | perf_event_ctx_unlock(group_leader, gctx); |
| 9883 | mutex_unlock(&ctx->mutex); | 9974 | mutex_unlock(&ctx->mutex); |
| 9884 | /* err_file: */ | 9975 | /* err_file: */ |
| 9885 | fput(event_file); | 9976 | fput(event_file); |
diff --git a/samples/bpf/sock_example.h b/samples/bpf/sock_example.h index 09f7fe7e5fd7..d8014065d479 100644 --- a/samples/bpf/sock_example.h +++ b/samples/bpf/sock_example.h | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | #include <unistd.h> | 4 | #include <unistd.h> |
| 5 | #include <string.h> | 5 | #include <string.h> |
| 6 | #include <errno.h> | 6 | #include <errno.h> |
| 7 | #include <net/ethernet.h> | 7 | #include <linux/if_ether.h> |
| 8 | #include <net/if.h> | 8 | #include <net/if.h> |
| 9 | #include <linux/if_packet.h> | 9 | #include <linux/if_packet.h> |
| 10 | #include <arpa/inet.h> | 10 | #include <arpa/inet.h> |
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index f4fa6af22def..ccca1e348017 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c | |||
| @@ -9,7 +9,6 @@ | |||
| 9 | #include <string.h> | 9 | #include <string.h> |
| 10 | #include <fcntl.h> | 10 | #include <fcntl.h> |
| 11 | #include <poll.h> | 11 | #include <poll.h> |
| 12 | #include <sys/ioctl.h> | ||
| 13 | #include <linux/perf_event.h> | 12 | #include <linux/perf_event.h> |
| 14 | #include <linux/bpf.h> | 13 | #include <linux/bpf.h> |
| 15 | #include <errno.h> | 14 | #include <errno.h> |
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index 3284bb14ae78..8aad81151d50 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c | |||
| @@ -213,6 +213,9 @@ static int get_value(struct parse_opt_ctx_t *p, | |||
| 213 | else | 213 | else |
| 214 | err = get_arg(p, opt, flags, (const char **)opt->value); | 214 | err = get_arg(p, opt, flags, (const char **)opt->value); |
| 215 | 215 | ||
| 216 | if (opt->set) | ||
| 217 | *(bool *)opt->set = true; | ||
| 218 | |||
| 216 | /* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */ | 219 | /* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */ |
| 217 | if (opt->flags & PARSE_OPT_NOEMPTY) { | 220 | if (opt->flags & PARSE_OPT_NOEMPTY) { |
| 218 | const char *val = *(const char **)opt->value; | 221 | const char *val = *(const char **)opt->value; |
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h index 8866ac438b34..11c3be3bcce7 100644 --- a/tools/lib/subcmd/parse-options.h +++ b/tools/lib/subcmd/parse-options.h | |||
| @@ -137,6 +137,11 @@ struct option { | |||
| 137 | { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ | 137 | { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ |
| 138 | .value = check_vtype(v, const char **), (a), .help = (h), \ | 138 | .value = check_vtype(v, const char **), (a), .help = (h), \ |
| 139 | .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) } | 139 | .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) } |
| 140 | #define OPT_STRING_OPTARG_SET(s, l, v, os, a, h, d) \ | ||
| 141 | { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ | ||
| 142 | .value = check_vtype(v, const char **), (a), .help = (h), \ | ||
| 143 | .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d), \ | ||
| 144 | .set = check_vtype(os, bool *)} | ||
| 140 | #define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY} | 145 | #define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY} |
| 141 | #define OPT_DATE(s, l, v, h) \ | 146 | #define OPT_DATE(s, l, v, h) \ |
| 142 | { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } | 147 | { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } |
diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c index f1ce60065258..ec30c2fcbac0 100644 --- a/tools/lib/traceevent/plugin_sched_switch.c +++ b/tools/lib/traceevent/plugin_sched_switch.c | |||
| @@ -111,7 +111,7 @@ static int sched_switch_handler(struct trace_seq *s, | |||
| 111 | trace_seq_printf(s, "%lld ", val); | 111 | trace_seq_printf(s, "%lld ", val); |
| 112 | 112 | ||
| 113 | if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0) | 113 | if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0) |
| 114 | trace_seq_printf(s, "[%lld] ", val); | 114 | trace_seq_printf(s, "[%d] ", (int) val); |
| 115 | 115 | ||
| 116 | if (pevent_get_field_val(s, event, "prev_state", record, &val, 0) == 0) | 116 | if (pevent_get_field_val(s, event, "prev_state", record, &val, 0) == 0) |
| 117 | write_state(s, val); | 117 | write_state(s, val); |
| @@ -129,7 +129,7 @@ static int sched_switch_handler(struct trace_seq *s, | |||
| 129 | trace_seq_printf(s, "%lld", val); | 129 | trace_seq_printf(s, "%lld", val); |
| 130 | 130 | ||
| 131 | if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0) | 131 | if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0) |
| 132 | trace_seq_printf(s, " [%lld]", val); | 132 | trace_seq_printf(s, " [%d]", (int) val); |
| 133 | 133 | ||
| 134 | return 0; | 134 | return 0; |
| 135 | } | 135 | } |
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 27fc3617c6a4..5054d9147f0f 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt | |||
| @@ -430,6 +430,10 @@ that gets then processed, possibly via a perf script, to decide if that | |||
| 430 | particular perf.data snapshot should be kept or not. | 430 | particular perf.data snapshot should be kept or not. |
| 431 | 431 | ||
| 432 | Implies --timestamp-filename, --no-buildid and --no-buildid-cache. | 432 | Implies --timestamp-filename, --no-buildid and --no-buildid-cache. |
| 433 | The reason for the latter two is to reduce the data file switching | ||
| 434 | overhead. You can still switch them on with: | ||
| 435 | |||
| 436 | --switch-output --no-no-buildid --no-no-buildid-cache | ||
| 433 | 437 | ||
| 434 | --dry-run:: | 438 | --dry-run:: |
| 435 | Parse options then exit. --dry-run can be used to detect errors in cmdline | 439 | Parse options then exit. --dry-run can be used to detect errors in cmdline |
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8fc24824705e..8bb16aa9d661 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf | |||
| @@ -704,9 +704,9 @@ install-tests: all install-gtk | |||
| 704 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ | 704 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ |
| 705 | $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' | 705 | $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' |
| 706 | 706 | ||
| 707 | install-bin: install-tools install-tests | 707 | install-bin: install-tools install-tests install-traceevent-plugins |
| 708 | 708 | ||
| 709 | install: install-bin try-install-man install-traceevent-plugins | 709 | install: install-bin try-install-man |
| 710 | 710 | ||
| 711 | install-python_ext: | 711 | install-python_ext: |
| 712 | $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' | 712 | $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 74d6a035133a..4ec10e9427d9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
| @@ -1405,7 +1405,7 @@ static bool dry_run; | |||
| 1405 | * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', | 1405 | * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', |
| 1406 | * using pipes, etc. | 1406 | * using pipes, etc. |
| 1407 | */ | 1407 | */ |
| 1408 | struct option __record_options[] = { | 1408 | static struct option __record_options[] = { |
| 1409 | OPT_CALLBACK('e', "event", &record.evlist, "event", | 1409 | OPT_CALLBACK('e', "event", &record.evlist, "event", |
| 1410 | "event selector. use 'perf list' to list available events", | 1410 | "event selector. use 'perf list' to list available events", |
| 1411 | parse_events_option), | 1411 | parse_events_option), |
| @@ -1636,7 +1636,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) | |||
| 1636 | * overhead. Still generate buildid if they are required | 1636 | * overhead. Still generate buildid if they are required |
| 1637 | * explicitly using | 1637 | * explicitly using |
| 1638 | * | 1638 | * |
| 1639 | * perf record --signal-trigger --no-no-buildid \ | 1639 | * perf record --switch-output --no-no-buildid \ |
| 1640 | * --no-no-buildid-cache | 1640 | * --no-no-buildid-cache |
| 1641 | * | 1641 | * |
| 1642 | * Following code equals to: | 1642 | * Following code equals to: |
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index d53e706a6f17..5b134b0d1ff3 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c | |||
| @@ -209,6 +209,7 @@ struct perf_sched { | |||
| 209 | u64 skipped_samples; | 209 | u64 skipped_samples; |
| 210 | const char *time_str; | 210 | const char *time_str; |
| 211 | struct perf_time_interval ptime; | 211 | struct perf_time_interval ptime; |
| 212 | struct perf_time_interval hist_time; | ||
| 212 | }; | 213 | }; |
| 213 | 214 | ||
| 214 | /* per thread run time data */ | 215 | /* per thread run time data */ |
| @@ -2460,6 +2461,11 @@ static int timehist_sched_change_event(struct perf_tool *tool, | |||
| 2460 | timehist_print_sample(sched, sample, &al, thread, t); | 2461 | timehist_print_sample(sched, sample, &al, thread, t); |
| 2461 | 2462 | ||
| 2462 | out: | 2463 | out: |
| 2464 | if (sched->hist_time.start == 0 && t >= ptime->start) | ||
| 2465 | sched->hist_time.start = t; | ||
| 2466 | if (ptime->end == 0 || t <= ptime->end) | ||
| 2467 | sched->hist_time.end = t; | ||
| 2468 | |||
| 2463 | if (tr) { | 2469 | if (tr) { |
| 2464 | /* time of this sched_switch event becomes last time task seen */ | 2470 | /* time of this sched_switch event becomes last time task seen */ |
| 2465 | tr->last_time = sample->time; | 2471 | tr->last_time = sample->time; |
| @@ -2624,6 +2630,7 @@ static void timehist_print_summary(struct perf_sched *sched, | |||
| 2624 | struct thread *t; | 2630 | struct thread *t; |
| 2625 | struct thread_runtime *r; | 2631 | struct thread_runtime *r; |
| 2626 | int i; | 2632 | int i; |
| 2633 | u64 hist_time = sched->hist_time.end - sched->hist_time.start; | ||
| 2627 | 2634 | ||
| 2628 | memset(&totals, 0, sizeof(totals)); | 2635 | memset(&totals, 0, sizeof(totals)); |
| 2629 | 2636 | ||
| @@ -2665,7 +2672,7 @@ static void timehist_print_summary(struct perf_sched *sched, | |||
| 2665 | totals.sched_count += r->run_stats.n; | 2672 | totals.sched_count += r->run_stats.n; |
| 2666 | printf(" CPU %2d idle for ", i); | 2673 | printf(" CPU %2d idle for ", i); |
| 2667 | print_sched_time(r->total_run_time, 6); | 2674 | print_sched_time(r->total_run_time, 6); |
| 2668 | printf(" msec\n"); | 2675 | printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time); |
| 2669 | } else | 2676 | } else |
| 2670 | printf(" CPU %2d idle entire time window\n", i); | 2677 | printf(" CPU %2d idle entire time window\n", i); |
| 2671 | } | 2678 | } |
| @@ -2701,12 +2708,16 @@ static void timehist_print_summary(struct perf_sched *sched, | |||
| 2701 | 2708 | ||
| 2702 | printf("\n" | 2709 | printf("\n" |
| 2703 | " Total number of unique tasks: %" PRIu64 "\n" | 2710 | " Total number of unique tasks: %" PRIu64 "\n" |
| 2704 | "Total number of context switches: %" PRIu64 "\n" | 2711 | "Total number of context switches: %" PRIu64 "\n", |
| 2705 | " Total run time (msec): ", | ||
| 2706 | totals.task_count, totals.sched_count); | 2712 | totals.task_count, totals.sched_count); |
| 2707 | 2713 | ||
| 2714 | printf(" Total run time (msec): "); | ||
| 2708 | print_sched_time(totals.total_run_time, 2); | 2715 | print_sched_time(totals.total_run_time, 2); |
| 2709 | printf("\n"); | 2716 | printf("\n"); |
| 2717 | |||
| 2718 | printf(" Total scheduling time (msec): "); | ||
| 2719 | print_sched_time(hist_time, 2); | ||
| 2720 | printf(" (x %d)\n", sched->max_cpu); | ||
| 2710 | } | 2721 | } |
| 2711 | 2722 | ||
| 2712 | typedef int (*sched_handler)(struct perf_tool *tool, | 2723 | typedef int (*sched_handler)(struct perf_tool *tool, |
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index d281ae2b54e8..4a57c8a60bd9 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c | |||
| @@ -163,7 +163,7 @@ static struct map *kernel_get_module_map(const char *module) | |||
| 163 | 163 | ||
| 164 | /* A file path -- this is an offline module */ | 164 | /* A file path -- this is an offline module */ |
| 165 | if (module && strchr(module, '/')) | 165 | if (module && strchr(module, '/')) |
| 166 | return machine__findnew_module_map(host_machine, 0, module); | 166 | return dso__new_map(module); |
| 167 | 167 | ||
| 168 | if (!module) | 168 | if (!module) |
| 169 | module = "kernel"; | 169 | module = "kernel"; |
| @@ -173,6 +173,7 @@ static struct map *kernel_get_module_map(const char *module) | |||
| 173 | if (strncmp(pos->dso->short_name + 1, module, | 173 | if (strncmp(pos->dso->short_name + 1, module, |
| 174 | pos->dso->short_name_len - 2) == 0 && | 174 | pos->dso->short_name_len - 2) == 0 && |
| 175 | module[pos->dso->short_name_len - 2] == '\0') { | 175 | module[pos->dso->short_name_len - 2] == '\0') { |
| 176 | map__get(pos); | ||
| 176 | return pos; | 177 | return pos; |
| 177 | } | 178 | } |
| 178 | } | 179 | } |
| @@ -188,15 +189,6 @@ struct map *get_target_map(const char *target, bool user) | |||
| 188 | return kernel_get_module_map(target); | 189 | return kernel_get_module_map(target); |
| 189 | } | 190 | } |
| 190 | 191 | ||
| 191 | static void put_target_map(struct map *map, bool user) | ||
| 192 | { | ||
| 193 | if (map && user) { | ||
| 194 | /* Only the user map needs to be released */ | ||
| 195 | map__put(map); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | |||
| 199 | |||
| 200 | static int convert_exec_to_group(const char *exec, char **result) | 192 | static int convert_exec_to_group(const char *exec, char **result) |
| 201 | { | 193 | { |
| 202 | char *ptr1, *ptr2, *exec_copy; | 194 | char *ptr1, *ptr2, *exec_copy; |
| @@ -268,21 +260,6 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address) | |||
| 268 | } | 260 | } |
| 269 | 261 | ||
| 270 | /* | 262 | /* |
| 271 | * NOTE: | ||
| 272 | * '.gnu.linkonce.this_module' section of kernel module elf directly | ||
| 273 | * maps to 'struct module' from linux/module.h. This section contains | ||
| 274 | * actual module name which will be used by kernel after loading it. | ||
| 275 | * But, we cannot use 'struct module' here since linux/module.h is not | ||
| 276 | * exposed to user-space. Offset of 'name' has remained same from long | ||
| 277 | * time, so hardcoding it here. | ||
| 278 | */ | ||
| 279 | #ifdef __LP64__ | ||
| 280 | #define MOD_NAME_OFFSET 24 | ||
| 281 | #else | ||
| 282 | #define MOD_NAME_OFFSET 12 | ||
| 283 | #endif | ||
| 284 | |||
| 285 | /* | ||
| 286 | * @module can be module name of module file path. In case of path, | 263 | * @module can be module name of module file path. In case of path, |
| 287 | * inspect elf and find out what is actual module name. | 264 | * inspect elf and find out what is actual module name. |
| 288 | * Caller has to free mod_name after using it. | 265 | * Caller has to free mod_name after using it. |
| @@ -296,6 +273,7 @@ static char *find_module_name(const char *module) | |||
| 296 | Elf_Data *data; | 273 | Elf_Data *data; |
| 297 | Elf_Scn *sec; | 274 | Elf_Scn *sec; |
| 298 | char *mod_name = NULL; | 275 | char *mod_name = NULL; |
| 276 | int name_offset; | ||
| 299 | 277 | ||
| 300 | fd = open(module, O_RDONLY); | 278 | fd = open(module, O_RDONLY); |
| 301 | if (fd < 0) | 279 | if (fd < 0) |
| @@ -317,7 +295,21 @@ static char *find_module_name(const char *module) | |||
| 317 | if (!data || !data->d_buf) | 295 | if (!data || !data->d_buf) |
| 318 | goto ret_err; | 296 | goto ret_err; |
| 319 | 297 | ||
| 320 | mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET); | 298 | /* |
| 299 | * NOTE: | ||
| 300 | * '.gnu.linkonce.this_module' section of kernel module elf directly | ||
| 301 | * maps to 'struct module' from linux/module.h. This section contains | ||
| 302 | * actual module name which will be used by kernel after loading it. | ||
| 303 | * But, we cannot use 'struct module' here since linux/module.h is not | ||
| 304 | * exposed to user-space. Offset of 'name' has remained same from long | ||
| 305 | * time, so hardcoding it here. | ||
| 306 | */ | ||
| 307 | if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) | ||
| 308 | name_offset = 12; | ||
| 309 | else /* expect ELFCLASS64 by default */ | ||
| 310 | name_offset = 24; | ||
| 311 | |||
| 312 | mod_name = strdup((char *)data->d_buf + name_offset); | ||
| 321 | 313 | ||
| 322 | ret_err: | 314 | ret_err: |
| 323 | elf_end(elf); | 315 | elf_end(elf); |
| @@ -412,7 +404,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, | |||
| 412 | } | 404 | } |
| 413 | 405 | ||
| 414 | out: | 406 | out: |
| 415 | put_target_map(map, uprobes); | 407 | map__put(map); |
| 416 | return ret; | 408 | return ret; |
| 417 | 409 | ||
| 418 | } | 410 | } |
| @@ -618,6 +610,51 @@ error: | |||
| 618 | return ret ? : -ENOENT; | 610 | return ret ? : -ENOENT; |
| 619 | } | 611 | } |
| 620 | 612 | ||
| 613 | /* | ||
| 614 | * Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions | ||
| 615 | * and generate new symbols with suffixes such as .constprop.N or .isra.N | ||
| 616 | * etc. Since those symbols are not recorded in DWARF, we have to find | ||
| 617 | * correct generated symbols from offline ELF binary. | ||
| 618 | * For online kernel or uprobes we don't need this because those are | ||
| 619 | * rebased on _text, or already a section relative address. | ||
| 620 | */ | ||
| 621 | static int | ||
| 622 | post_process_offline_probe_trace_events(struct probe_trace_event *tevs, | ||
| 623 | int ntevs, const char *pathname) | ||
| 624 | { | ||
| 625 | struct symbol *sym; | ||
| 626 | struct map *map; | ||
| 627 | unsigned long stext = 0; | ||
| 628 | u64 addr; | ||
| 629 | int i; | ||
| 630 | |||
| 631 | /* Prepare a map for offline binary */ | ||
| 632 | map = dso__new_map(pathname); | ||
| 633 | if (!map || get_text_start_address(pathname, &stext) < 0) { | ||
| 634 | pr_warning("Failed to get ELF symbols for %s\n", pathname); | ||
| 635 | return -EINVAL; | ||
| 636 | } | ||
| 637 | |||
| 638 | for (i = 0; i < ntevs; i++) { | ||
| 639 | addr = tevs[i].point.address + tevs[i].point.offset - stext; | ||
| 640 | sym = map__find_symbol(map, addr); | ||
| 641 | if (!sym) | ||
| 642 | continue; | ||
| 643 | if (!strcmp(sym->name, tevs[i].point.symbol)) | ||
| 644 | continue; | ||
| 645 | /* If we have no realname, use symbol for it */ | ||
| 646 | if (!tevs[i].point.realname) | ||
| 647 | tevs[i].point.realname = tevs[i].point.symbol; | ||
| 648 | else | ||
| 649 | free(tevs[i].point.symbol); | ||
| 650 | tevs[i].point.symbol = strdup(sym->name); | ||
| 651 | tevs[i].point.offset = addr - sym->start; | ||
| 652 | } | ||
| 653 | map__put(map); | ||
| 654 | |||
| 655 | return 0; | ||
| 656 | } | ||
| 657 | |||
| 621 | static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, | 658 | static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, |
| 622 | int ntevs, const char *exec) | 659 | int ntevs, const char *exec) |
| 623 | { | 660 | { |
| @@ -679,7 +716,8 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, | |||
| 679 | 716 | ||
| 680 | /* Skip post process if the target is an offline kernel */ | 717 | /* Skip post process if the target is an offline kernel */ |
| 681 | if (symbol_conf.ignore_vmlinux_buildid) | 718 | if (symbol_conf.ignore_vmlinux_buildid) |
| 682 | return 0; | 719 | return post_process_offline_probe_trace_events(tevs, ntevs, |
| 720 | symbol_conf.vmlinux_name); | ||
| 683 | 721 | ||
| 684 | reloc_sym = kernel_get_ref_reloc_sym(); | 722 | reloc_sym = kernel_get_ref_reloc_sym(); |
| 685 | if (!reloc_sym) { | 723 | if (!reloc_sym) { |
| @@ -2869,7 +2907,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, | |||
| 2869 | } | 2907 | } |
| 2870 | 2908 | ||
| 2871 | out: | 2909 | out: |
| 2872 | put_target_map(map, pev->uprobes); | 2910 | map__put(map); |
| 2873 | free(syms); | 2911 | free(syms); |
| 2874 | return ret; | 2912 | return ret; |
| 2875 | 2913 | ||
| @@ -3362,10 +3400,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, | |||
| 3362 | return ret; | 3400 | return ret; |
| 3363 | 3401 | ||
| 3364 | /* Get a symbol map */ | 3402 | /* Get a symbol map */ |
| 3365 | if (user) | 3403 | map = get_target_map(target, user); |
| 3366 | map = dso__new_map(target); | ||
| 3367 | else | ||
| 3368 | map = kernel_get_module_map(target); | ||
| 3369 | if (!map) { | 3404 | if (!map) { |
| 3370 | pr_err("Failed to get a map for %s\n", (target) ? : "kernel"); | 3405 | pr_err("Failed to get a map for %s\n", (target) ? : "kernel"); |
| 3371 | return -EINVAL; | 3406 | return -EINVAL; |
| @@ -3397,9 +3432,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, | |||
| 3397 | } | 3432 | } |
| 3398 | 3433 | ||
| 3399 | end: | 3434 | end: |
| 3400 | if (user) { | 3435 | map__put(map); |
| 3401 | map__put(map); | ||
| 3402 | } | ||
| 3403 | exit_probe_symbol_maps(); | 3436 | exit_probe_symbol_maps(); |
| 3404 | 3437 | ||
| 3405 | return ret; | 3438 | return ret; |
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 99400b0e8f2a..adbc6c02c3aa 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c | |||
| @@ -537,6 +537,12 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) | |||
| 537 | break; | 537 | break; |
| 538 | } else { | 538 | } else { |
| 539 | int n = namesz + descsz; | 539 | int n = namesz + descsz; |
| 540 | |||
| 541 | if (n > (int)sizeof(bf)) { | ||
| 542 | n = sizeof(bf); | ||
| 543 | pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n", | ||
| 544 | __func__, filename, nhdr.n_namesz, nhdr.n_descsz); | ||
| 545 | } | ||
| 540 | if (read(fd, bf, n) != n) | 546 | if (read(fd, bf, n) != n) |
| 541 | break; | 547 | break; |
| 542 | } | 548 | } |
