diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-03-31 14:13:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-03-31 14:13:25 -0400 |
commit | 8c292f11744297dfb3a69f4a0bccbe4a6417b50d (patch) | |
tree | f1a89560de25a69b697d459a9b5cf2e738038d9f /kernel | |
parent | d31605dc8a63f1df28443ddb3560b1079417af92 (diff) | |
parent | 538592ff0b008237ae88f5ce5fb1247127dc3ce5 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf changes from Ingo Molnar:
"Main changes:
Kernel side changes:
- Add SNB/IVB/HSW client uncore memory controller support (Stephane
Eranian)
- Fix various x86/P4 PMU driver bugs (Don Zickus)
Tooling, user visible changes:
- Add several futex 'perf bench' microbenchmarks (Davidlohr Bueso)
- Speed up thread map generation (Don Zickus)
- Introduce 'perf kvm --list-cmds' command line option for use by
scripts (Ramkumar Ramachandra)
- Print the evsel name in the annotate stdio output, prep to fix
support outputting annotation for multiple events, not just for the
first one (Arnaldo Carvalho de Melo)
- Allow setting preferred callchain method in .perfconfig (Jiri Olsa)
- Show in what binaries/modules 'perf probe's are set (Masami
Hiramatsu)
- Support distro-style debuginfo for uprobe in 'perf probe' (Masami
Hiramatsu)
Tooling, internal changes and fixes:
- Use tid in mmap/mmap2 events to find maps (Don Zickus)
- Record the reason for filtering an address_location (Namhyung Kim)
- Apply all filters to an addr_location (Namhyung Kim)
- Merge al->filtered with hist_entry->filtered in report/hists
(Namhyung Kim)
- Fix memory leak when synthesizing thread records (Namhyung Kim)
- Use ui__has_annotation() in 'report' (Namhyung Kim)
- hists browser refactorings to reuse code accross UIs (Namhyung Kim)
- Add support for the new DWARF unwinder library in elfutils (Jiri
Olsa)
- Fix build race in the generation of bison files (Jiri Olsa)
- Further streamline the feature detection display, trimming it a bit
to show just the libraries detected, using VF=1 gets a more verbose
output, showing the less interesting feature checks as well (Jiri
Olsa).
- Check compatible symtab type before loading dso (Namhyung Kim)
- Check return value of filename__read_debuglink() (Stephane Eranian)
- Move some hashing and fs related code from tools/perf/util/ to
tools/lib/ so that it can be used by more tools/ living utilities
(Borislav Petkov)
- Prepare DWARF unwinding code for using an elfutils alternative
unwinding library (Jiri Olsa)
- Fix DWARF unwind max_stack processing (Jiri Olsa)
- Add dwarf unwind 'perf test' entry (Jiri Olsa)
- 'perf probe' improvements including memory leak fixes, sharing the
intlist class with other tools, uprobes/kprobes code sharing and
use of ref_reloc_sym (Masami Hiramatsu)
- Shorten sample symbol resolving by adding cpumode to struct
addr_location (Arnaldo Carvalho de Melo)
- Fix synthesizing mmaps for threads (Don Zickus)
- Fix invalid output on event group stdio report (Namhyung Kim)
- Fixup header alignment in 'perf sched latency' output (Ramkumar
Ramachandra)
- Fix off-by-one error in 'perf timechart record' argv handling
(Ramkumar Ramachandra)
Tooling, cleanups:
- Remove unused thread__find_map function (Jiri Olsa)
- Remove unused simple_strtoul() function (Ramkumar Ramachandra)
Tooling, documentation updates:
- Update function names in debug messages (Ramkumar Ramachandra)
- Update some code references in design.txt (Ramkumar Ramachandra)
- Clarify load-latency information in the 'perf mem' docs (Andi
Kleen)
- Clarify x86 register naming in 'perf probe' docs (Andi Kleen)"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (96 commits)
perf tools: Remove unused simple_strtoul() function
perf tools: Update some code references in design.txt
perf evsel: Update function names in debug messages
perf tools: Remove thread__find_map function
perf annotate: Print the evsel name in the stdio output
perf report: Use ui__has_annotation()
perf tools: Fix memory leak when synthesizing thread records
perf tools: Use tid in mmap/mmap2 events to find maps
perf report: Merge al->filtered with hist_entry->filtered
perf symbols: Apply all filters to an addr_location
perf symbols: Record the reason for filtering an address_location
perf sched: Fixup header alignment in 'latency' output
perf timechart: Fix off-by-one error in 'record' argv handling
perf machine: Factor machine__find_thread to take tid argument
perf tools: Speed up thread map generation
perf kvm: introduce --list-cmds for use by scripts
perf ui hists: Pass evsel to hpp->header/width functions explicitly
perf symbols: Introduce thread__find_cpumode_addr_location
perf session: Change header.misc dump from decimal to hex
perf ui/tui: Reuse generic __hpp__fmt() code
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 39 | ||||
-rw-r--r-- | kernel/irq_work.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 22 |
3 files changed, 51 insertions, 16 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index fa0b2d4ad83c..661951ab8ae7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -231,11 +231,29 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, | |||
231 | #define NR_ACCUMULATED_SAMPLES 128 | 231 | #define NR_ACCUMULATED_SAMPLES 128 |
232 | static DEFINE_PER_CPU(u64, running_sample_length); | 232 | static DEFINE_PER_CPU(u64, running_sample_length); |
233 | 233 | ||
234 | void perf_sample_event_took(u64 sample_len_ns) | 234 | static void perf_duration_warn(struct irq_work *w) |
235 | { | 235 | { |
236 | u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); | ||
236 | u64 avg_local_sample_len; | 237 | u64 avg_local_sample_len; |
237 | u64 local_samples_len; | 238 | u64 local_samples_len; |
239 | |||
240 | local_samples_len = __get_cpu_var(running_sample_length); | ||
241 | avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; | ||
242 | |||
243 | printk_ratelimited(KERN_WARNING | ||
244 | "perf interrupt took too long (%lld > %lld), lowering " | ||
245 | "kernel.perf_event_max_sample_rate to %d\n", | ||
246 | avg_local_sample_len, allowed_ns >> 1, | ||
247 | sysctl_perf_event_sample_rate); | ||
248 | } | ||
249 | |||
250 | static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn); | ||
251 | |||
252 | void perf_sample_event_took(u64 sample_len_ns) | ||
253 | { | ||
238 | u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); | 254 | u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); |
255 | u64 avg_local_sample_len; | ||
256 | u64 local_samples_len; | ||
239 | 257 | ||
240 | if (allowed_ns == 0) | 258 | if (allowed_ns == 0) |
241 | return; | 259 | return; |
@@ -263,13 +281,14 @@ void perf_sample_event_took(u64 sample_len_ns) | |||
263 | sysctl_perf_event_sample_rate = max_samples_per_tick * HZ; | 281 | sysctl_perf_event_sample_rate = max_samples_per_tick * HZ; |
264 | perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; | 282 | perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; |
265 | 283 | ||
266 | printk_ratelimited(KERN_WARNING | ||
267 | "perf samples too long (%lld > %lld), lowering " | ||
268 | "kernel.perf_event_max_sample_rate to %d\n", | ||
269 | avg_local_sample_len, allowed_ns, | ||
270 | sysctl_perf_event_sample_rate); | ||
271 | |||
272 | update_perf_cpu_limits(); | 284 | update_perf_cpu_limits(); |
285 | |||
286 | if (!irq_work_queue(&perf_duration_work)) { | ||
287 | early_printk("perf interrupt took too long (%lld > %lld), lowering " | ||
288 | "kernel.perf_event_max_sample_rate to %d\n", | ||
289 | avg_local_sample_len, allowed_ns >> 1, | ||
290 | sysctl_perf_event_sample_rate); | ||
291 | } | ||
273 | } | 292 | } |
274 | 293 | ||
275 | static atomic64_t perf_event_id; | 294 | static atomic64_t perf_event_id; |
@@ -1714,7 +1733,7 @@ group_sched_in(struct perf_event *group_event, | |||
1714 | struct perf_event_context *ctx) | 1733 | struct perf_event_context *ctx) |
1715 | { | 1734 | { |
1716 | struct perf_event *event, *partial_group = NULL; | 1735 | struct perf_event *event, *partial_group = NULL; |
1717 | struct pmu *pmu = group_event->pmu; | 1736 | struct pmu *pmu = ctx->pmu; |
1718 | u64 now = ctx->time; | 1737 | u64 now = ctx->time; |
1719 | bool simulate = false; | 1738 | bool simulate = false; |
1720 | 1739 | ||
@@ -2563,8 +2582,6 @@ static void perf_branch_stack_sched_in(struct task_struct *prev, | |||
2563 | if (cpuctx->ctx.nr_branch_stack > 0 | 2582 | if (cpuctx->ctx.nr_branch_stack > 0 |
2564 | && pmu->flush_branch_stack) { | 2583 | && pmu->flush_branch_stack) { |
2565 | 2584 | ||
2566 | pmu = cpuctx->ctx.pmu; | ||
2567 | |||
2568 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | 2585 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); |
2569 | 2586 | ||
2570 | perf_pmu_disable(pmu); | 2587 | perf_pmu_disable(pmu); |
@@ -6294,7 +6311,7 @@ static int perf_event_idx_default(struct perf_event *event) | |||
6294 | * Ensures all contexts with the same task_ctx_nr have the same | 6311 | * Ensures all contexts with the same task_ctx_nr have the same |
6295 | * pmu_cpu_context too. | 6312 | * pmu_cpu_context too. |
6296 | */ | 6313 | */ |
6297 | static void *find_pmu_context(int ctxn) | 6314 | static struct perf_cpu_context __percpu *find_pmu_context(int ctxn) |
6298 | { | 6315 | { |
6299 | struct pmu *pmu; | 6316 | struct pmu *pmu; |
6300 | 6317 | ||
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 55fcce6065cf..a82170e2fa78 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c | |||
@@ -61,11 +61,11 @@ void __weak arch_irq_work_raise(void) | |||
61 | * | 61 | * |
62 | * Can be re-enqueued while the callback is still in progress. | 62 | * Can be re-enqueued while the callback is still in progress. |
63 | */ | 63 | */ |
64 | void irq_work_queue(struct irq_work *work) | 64 | bool irq_work_queue(struct irq_work *work) |
65 | { | 65 | { |
66 | /* Only queue if not already pending */ | 66 | /* Only queue if not already pending */ |
67 | if (!irq_work_claim(work)) | 67 | if (!irq_work_claim(work)) |
68 | return; | 68 | return false; |
69 | 69 | ||
70 | /* Queue the entry and raise the IPI if needed. */ | 70 | /* Queue the entry and raise the IPI if needed. */ |
71 | preempt_disable(); | 71 | preempt_disable(); |
@@ -83,6 +83,8 @@ void irq_work_queue(struct irq_work *work) | |||
83 | } | 83 | } |
84 | 84 | ||
85 | preempt_enable(); | 85 | preempt_enable(); |
86 | |||
87 | return true; | ||
86 | } | 88 | } |
87 | EXPORT_SYMBOL_GPL(irq_work_queue); | 89 | EXPORT_SYMBOL_GPL(irq_work_queue); |
88 | 90 | ||
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index e854f420e033..c894614de14d 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -31,9 +31,25 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event, | |||
31 | } | 31 | } |
32 | 32 | ||
33 | /* The ftrace function trace is allowed only for root. */ | 33 | /* The ftrace function trace is allowed only for root. */ |
34 | if (ftrace_event_is_function(tp_event) && | 34 | if (ftrace_event_is_function(tp_event)) { |
35 | perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) | 35 | if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) |
36 | return -EPERM; | 36 | return -EPERM; |
37 | |||
38 | /* | ||
39 | * We don't allow user space callchains for function trace | ||
40 | * event, due to issues with page faults while tracing page | ||
41 | * fault handler and its overall trickiness nature. | ||
42 | */ | ||
43 | if (!p_event->attr.exclude_callchain_user) | ||
44 | return -EINVAL; | ||
45 | |||
46 | /* | ||
47 | * Same reason to disable user stack dump as for user space | ||
48 | * callchains above. | ||
49 | */ | ||
50 | if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER) | ||
51 | return -EINVAL; | ||
52 | } | ||
37 | 53 | ||
38 | /* No tracing, just counting, so no obvious leak */ | 54 | /* No tracing, just counting, so no obvious leak */ |
39 | if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) | 55 | if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) |