summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-03 16:18:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-03 16:18:00 -0400
commit3d521f9151dacab566904d1f57dcb3e7080cdd8f (patch)
tree160d15ff955541c6ca27a69c8291a0269f105bb3 /kernel
parent776edb59317ada867dfcddde40b55648beeb0078 (diff)
parente450f90e8c7d0bf70519223c1b848446ae63f313 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into next
Pull perf updates from Ingo Molnar: "The tooling changes maintained by Jiri Olsa until Arnaldo is on vacation: User visible changes: - Add -F option for specifying output fields (Namhyung Kim) - Propagate exit status of a command line workload for record command (Namhyung Kim) - Use tid for finding thread (Namhyung Kim) - Clarify the output of perf sched map plus small sched command fixes (Dongsheng Yang) - Wire up perf_regs and unwind support for ARM64 (Jean Pihet) - Factor hists statistics counts processing which in turn also fixes several bugs in TUI report command (Namhyung Kim) - Add --percentage option to control absolute/relative percentage output (Namhyung Kim) - Add --list-cmds to 'kmem', 'mem', 'lock' and 'sched', for use by completion scripts (Ramkumar Ramachandra) Development/infrastructure changes and fixes: - Android related fixes for pager and map dso resolving (Michael Lentine) - Add libdw DWARF post unwind support for ARM (Jean Pihet) - Consolidate types.h for ARM and ARM64 (Jean Pihet) - Fix possible null pointer dereference in session.c (Masanari Iida) - Cleanup, remove unused variables in map_switch_event() (Dongsheng Yang) - Remove nr_state_machine_bugs in perf latency (Dongsheng Yang) - Remove usage of trace_sched_wakeup(.success) (Peter Zijlstra) - Cleanups for perf.h header (Jiri Olsa) - Consolidate types.h and export.h within tools (Borislav Petkov) - Move u64_swap union to its single user's header, evsel.h (Borislav Petkov) - Fix for s390 to properly parse tracepoints plus test code (Alexander Yarygin) - Handle EINTR error for readn/writen (Namhyung Kim) - Add a test case for hists filtering (Namhyung Kim) - Share map_groups among threads of the same group (Arnaldo Carvalho de Melo, Jiri Olsa) - Making some code (cpu node map and report parse callchain callback) global to be usable by upcomming changes (Don Zickus) - Fix pmu object compilation error (Jiri Olsa) Kernel side changes: - intrusive uprobes fixes from Oleg Nesterov. Since the interface is admin-only, and the bug only affects user-space ("any probed jmp/call can kill the application"), we queued these fixes via the development tree, as a special exception. - more fuzzer motivated race fixes and related refactoring and robustization. - allow PMU drivers to be built as modules. (No actual module yet, because the x86 Intel uncore module wasn't ready in time for this)" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (114 commits) perf tools: Add automatic remapping of Android libraries perf tools: Add cat as fallback pager perf tests: Add a testcase for histogram output sorting perf tests: Factor out print_hists_*() perf tools: Introduce reset_output_field() perf tools: Get rid of obsolete hist_entry__sort_list perf hists: Reset width of output fields with header length perf tools: Skip elided sort entries perf top: Add --fields option to specify output fields perf report/tui: Fix a bug when --fields/sort is given perf tools: Add ->sort() member to struct sort_entry perf report: Add -F option to specify output fields perf tools: Call perf_hpp__init() before setting up GUI browsers perf tools: Consolidate management of default sort orders perf tools: Allow hpp fields to be sort keys perf ui: Get rid of callback from __hpp__fmt() perf tools: Consolidate output field handling to hpp format routines perf tools: Use hpp formats to sort final output perf tools: Support event grouping in hpp ->sort() perf tools: Use hpp formats to sort hist entries ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c117
-rw-r--r--kernel/events/uprobes.c31
-rw-r--r--kernel/hrtimer.c1
3 files changed, 80 insertions, 69 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 440eefc67397..689237a0c5e8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -39,6 +39,7 @@
39#include <linux/hw_breakpoint.h> 39#include <linux/hw_breakpoint.h>
40#include <linux/mm_types.h> 40#include <linux/mm_types.h>
41#include <linux/cgroup.h> 41#include <linux/cgroup.h>
42#include <linux/module.h>
42 43
43#include "internal.h" 44#include "internal.h"
44 45
@@ -1677,6 +1678,8 @@ event_sched_in(struct perf_event *event,
1677 u64 tstamp = perf_event_time(event); 1678 u64 tstamp = perf_event_time(event);
1678 int ret = 0; 1679 int ret = 0;
1679 1680
1681 lockdep_assert_held(&ctx->lock);
1682
1680 if (event->state <= PERF_EVENT_STATE_OFF) 1683 if (event->state <= PERF_EVENT_STATE_OFF)
1681 return 0; 1684 return 0;
1682 1685
@@ -3244,9 +3247,13 @@ static void __free_event(struct perf_event *event)
3244 if (event->ctx) 3247 if (event->ctx)
3245 put_ctx(event->ctx); 3248 put_ctx(event->ctx);
3246 3249
3250 if (event->pmu)
3251 module_put(event->pmu->module);
3252
3247 call_rcu(&event->rcu_head, free_event_rcu); 3253 call_rcu(&event->rcu_head, free_event_rcu);
3248} 3254}
3249static void free_event(struct perf_event *event) 3255
3256static void _free_event(struct perf_event *event)
3250{ 3257{
3251 irq_work_sync(&event->pending); 3258 irq_work_sync(&event->pending);
3252 3259
@@ -3267,42 +3274,31 @@ static void free_event(struct perf_event *event)
3267 if (is_cgroup_event(event)) 3274 if (is_cgroup_event(event))
3268 perf_detach_cgroup(event); 3275 perf_detach_cgroup(event);
3269 3276
3270
3271 __free_event(event); 3277 __free_event(event);
3272} 3278}
3273 3279
3274int perf_event_release_kernel(struct perf_event *event) 3280/*
3281 * Used to free events which have a known refcount of 1, such as in error paths
3282 * where the event isn't exposed yet and inherited events.
3283 */
3284static void free_event(struct perf_event *event)
3275{ 3285{
3276 struct perf_event_context *ctx = event->ctx; 3286 if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
3277 3287 "unexpected event refcount: %ld; ptr=%p\n",
3278 WARN_ON_ONCE(ctx->parent_ctx); 3288 atomic_long_read(&event->refcount), event)) {
3279 /* 3289 /* leak to avoid use-after-free */
3280 * There are two ways this annotation is useful: 3290 return;
3281 * 3291 }
3282 * 1) there is a lock recursion from perf_event_exit_task
3283 * see the comment there.
3284 *
3285 * 2) there is a lock-inversion with mmap_sem through
3286 * perf_event_read_group(), which takes faults while
3287 * holding ctx->mutex, however this is called after
3288 * the last filedesc died, so there is no possibility
3289 * to trigger the AB-BA case.
3290 */
3291 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
3292 perf_remove_from_context(event, true);
3293 mutex_unlock(&ctx->mutex);
3294
3295 free_event(event);
3296 3292
3297 return 0; 3293 _free_event(event);
3298} 3294}
3299EXPORT_SYMBOL_GPL(perf_event_release_kernel);
3300 3295
3301/* 3296/*
3302 * Called when the last reference to the file is gone. 3297 * Called when the last reference to the file is gone.
3303 */ 3298 */
3304static void put_event(struct perf_event *event) 3299static void put_event(struct perf_event *event)
3305{ 3300{
3301 struct perf_event_context *ctx = event->ctx;
3306 struct task_struct *owner; 3302 struct task_struct *owner;
3307 3303
3308 if (!atomic_long_dec_and_test(&event->refcount)) 3304 if (!atomic_long_dec_and_test(&event->refcount))
@@ -3341,9 +3337,33 @@ static void put_event(struct perf_event *event)
3341 put_task_struct(owner); 3337 put_task_struct(owner);
3342 } 3338 }
3343 3339
3344 perf_event_release_kernel(event); 3340 WARN_ON_ONCE(ctx->parent_ctx);
3341 /*
3342 * There are two ways this annotation is useful:
3343 *
3344 * 1) there is a lock recursion from perf_event_exit_task
3345 * see the comment there.
3346 *
3347 * 2) there is a lock-inversion with mmap_sem through
3348 * perf_event_read_group(), which takes faults while
3349 * holding ctx->mutex, however this is called after
3350 * the last filedesc died, so there is no possibility
3351 * to trigger the AB-BA case.
3352 */
3353 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
3354 perf_remove_from_context(event, true);
3355 mutex_unlock(&ctx->mutex);
3356
3357 _free_event(event);
3345} 3358}
3346 3359
3360int perf_event_release_kernel(struct perf_event *event)
3361{
3362 put_event(event);
3363 return 0;
3364}
3365EXPORT_SYMBOL_GPL(perf_event_release_kernel);
3366
3347static int perf_release(struct inode *inode, struct file *file) 3367static int perf_release(struct inode *inode, struct file *file)
3348{ 3368{
3349 put_event(file->private_data); 3369 put_event(file->private_data);
@@ -6578,6 +6598,7 @@ free_pdc:
6578 free_percpu(pmu->pmu_disable_count); 6598 free_percpu(pmu->pmu_disable_count);
6579 goto unlock; 6599 goto unlock;
6580} 6600}
6601EXPORT_SYMBOL_GPL(perf_pmu_register);
6581 6602
6582void perf_pmu_unregister(struct pmu *pmu) 6603void perf_pmu_unregister(struct pmu *pmu)
6583{ 6604{
@@ -6599,6 +6620,7 @@ void perf_pmu_unregister(struct pmu *pmu)
6599 put_device(pmu->dev); 6620 put_device(pmu->dev);
6600 free_pmu_context(pmu); 6621 free_pmu_context(pmu);
6601} 6622}
6623EXPORT_SYMBOL_GPL(perf_pmu_unregister);
6602 6624
6603struct pmu *perf_init_event(struct perf_event *event) 6625struct pmu *perf_init_event(struct perf_event *event)
6604{ 6626{
@@ -6612,6 +6634,10 @@ struct pmu *perf_init_event(struct perf_event *event)
6612 pmu = idr_find(&pmu_idr, event->attr.type); 6634 pmu = idr_find(&pmu_idr, event->attr.type);
6613 rcu_read_unlock(); 6635 rcu_read_unlock();
6614 if (pmu) { 6636 if (pmu) {
6637 if (!try_module_get(pmu->module)) {
6638 pmu = ERR_PTR(-ENODEV);
6639 goto unlock;
6640 }
6615 event->pmu = pmu; 6641 event->pmu = pmu;
6616 ret = pmu->event_init(event); 6642 ret = pmu->event_init(event);
6617 if (ret) 6643 if (ret)
@@ -6620,6 +6646,10 @@ struct pmu *perf_init_event(struct perf_event *event)
6620 } 6646 }
6621 6647
6622 list_for_each_entry_rcu(pmu, &pmus, entry) { 6648 list_for_each_entry_rcu(pmu, &pmus, entry) {
6649 if (!try_module_get(pmu->module)) {
6650 pmu = ERR_PTR(-ENODEV);
6651 goto unlock;
6652 }
6623 event->pmu = pmu; 6653 event->pmu = pmu;
6624 ret = pmu->event_init(event); 6654 ret = pmu->event_init(event);
6625 if (!ret) 6655 if (!ret)
@@ -6798,6 +6828,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
6798err_pmu: 6828err_pmu:
6799 if (event->destroy) 6829 if (event->destroy)
6800 event->destroy(event); 6830 event->destroy(event);
6831 module_put(pmu->module);
6801err_ns: 6832err_ns:
6802 if (event->ns) 6833 if (event->ns)
6803 put_pid_ns(event->ns); 6834 put_pid_ns(event->ns);
@@ -7067,20 +7098,26 @@ SYSCALL_DEFINE5(perf_event_open,
7067 } 7098 }
7068 } 7099 }
7069 7100
7101 if (task && group_leader &&
7102 group_leader->attr.inherit != attr.inherit) {
7103 err = -EINVAL;
7104 goto err_task;
7105 }
7106
7070 get_online_cpus(); 7107 get_online_cpus();
7071 7108
7072 event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, 7109 event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
7073 NULL, NULL); 7110 NULL, NULL);
7074 if (IS_ERR(event)) { 7111 if (IS_ERR(event)) {
7075 err = PTR_ERR(event); 7112 err = PTR_ERR(event);
7076 goto err_task; 7113 goto err_cpus;
7077 } 7114 }
7078 7115
7079 if (flags & PERF_FLAG_PID_CGROUP) { 7116 if (flags & PERF_FLAG_PID_CGROUP) {
7080 err = perf_cgroup_connect(pid, event, &attr, group_leader); 7117 err = perf_cgroup_connect(pid, event, &attr, group_leader);
7081 if (err) { 7118 if (err) {
7082 __free_event(event); 7119 __free_event(event);
7083 goto err_task; 7120 goto err_cpus;
7084 } 7121 }
7085 } 7122 }
7086 7123
@@ -7242,8 +7279,9 @@ err_context:
7242 put_ctx(ctx); 7279 put_ctx(ctx);
7243err_alloc: 7280err_alloc:
7244 free_event(event); 7281 free_event(event);
7245err_task: 7282err_cpus:
7246 put_online_cpus(); 7283 put_online_cpus();
7284err_task:
7247 if (task) 7285 if (task)
7248 put_task_struct(task); 7286 put_task_struct(task);
7249err_group_fd: 7287err_group_fd:
@@ -7379,7 +7417,7 @@ __perf_event_exit_task(struct perf_event *child_event,
7379 struct perf_event_context *child_ctx, 7417 struct perf_event_context *child_ctx,
7380 struct task_struct *child) 7418 struct task_struct *child)
7381{ 7419{
7382 perf_remove_from_context(child_event, !!child_event->parent); 7420 perf_remove_from_context(child_event, true);
7383 7421
7384 /* 7422 /*
7385 * It can happen that the parent exits first, and has events 7423 * It can happen that the parent exits first, and has events
@@ -7394,7 +7432,7 @@ __perf_event_exit_task(struct perf_event *child_event,
7394 7432
7395static void perf_event_exit_task_context(struct task_struct *child, int ctxn) 7433static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
7396{ 7434{
7397 struct perf_event *child_event, *tmp; 7435 struct perf_event *child_event;
7398 struct perf_event_context *child_ctx; 7436 struct perf_event_context *child_ctx;
7399 unsigned long flags; 7437 unsigned long flags;
7400 7438
@@ -7448,24 +7486,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
7448 */ 7486 */
7449 mutex_lock(&child_ctx->mutex); 7487 mutex_lock(&child_ctx->mutex);
7450 7488
7451again: 7489 list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
7452 list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
7453 group_entry)
7454 __perf_event_exit_task(child_event, child_ctx, child);
7455
7456 list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
7457 group_entry)
7458 __perf_event_exit_task(child_event, child_ctx, child); 7490 __perf_event_exit_task(child_event, child_ctx, child);
7459 7491
7460 /*
7461 * If the last event was a group event, it will have appended all
7462 * its siblings to the list, but we obtained 'tmp' before that which
7463 * will still point to the list head terminating the iteration.
7464 */
7465 if (!list_empty(&child_ctx->pinned_groups) ||
7466 !list_empty(&child_ctx->flexible_groups))
7467 goto again;
7468
7469 mutex_unlock(&child_ctx->mutex); 7492 mutex_unlock(&child_ctx->mutex);
7470 7493
7471 put_ctx(child_ctx); 7494 put_ctx(child_ctx);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 04709b66369d..d1edc5e6fd03 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -60,8 +60,6 @@ static struct percpu_rw_semaphore dup_mmap_sem;
60 60
61/* Have a copy of original instruction */ 61/* Have a copy of original instruction */
62#define UPROBE_COPY_INSN 0 62#define UPROBE_COPY_INSN 0
63/* Can skip singlestep */
64#define UPROBE_SKIP_SSTEP 1
65 63
66struct uprobe { 64struct uprobe {
67 struct rb_node rb_node; /* node in the rb tree */ 65 struct rb_node rb_node; /* node in the rb tree */
@@ -491,12 +489,9 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
491 uprobe->offset = offset; 489 uprobe->offset = offset;
492 init_rwsem(&uprobe->register_rwsem); 490 init_rwsem(&uprobe->register_rwsem);
493 init_rwsem(&uprobe->consumer_rwsem); 491 init_rwsem(&uprobe->consumer_rwsem);
494 /* For now assume that the instruction need not be single-stepped */
495 __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
496 492
497 /* add to uprobes_tree, sorted on inode:offset */ 493 /* add to uprobes_tree, sorted on inode:offset */
498 cur_uprobe = insert_uprobe(uprobe); 494 cur_uprobe = insert_uprobe(uprobe);
499
500 /* a uprobe exists for this inode:offset combination */ 495 /* a uprobe exists for this inode:offset combination */
501 if (cur_uprobe) { 496 if (cur_uprobe) {
502 kfree(uprobe); 497 kfree(uprobe);
@@ -1628,20 +1623,6 @@ bool uprobe_deny_signal(void)
1628 return true; 1623 return true;
1629} 1624}
1630 1625
1631/*
1632 * Avoid singlestepping the original instruction if the original instruction
1633 * is a NOP or can be emulated.
1634 */
1635static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
1636{
1637 if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) {
1638 if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
1639 return true;
1640 clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
1641 }
1642 return false;
1643}
1644
1645static void mmf_recalc_uprobes(struct mm_struct *mm) 1626static void mmf_recalc_uprobes(struct mm_struct *mm)
1646{ 1627{
1647 struct vm_area_struct *vma; 1628 struct vm_area_struct *vma;
@@ -1868,13 +1849,13 @@ static void handle_swbp(struct pt_regs *regs)
1868 1849
1869 handler_chain(uprobe, regs); 1850 handler_chain(uprobe, regs);
1870 1851
1871 if (can_skip_sstep(uprobe, regs)) 1852 if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
1872 goto out; 1853 goto out;
1873 1854
1874 if (!pre_ssout(uprobe, regs, bp_vaddr)) 1855 if (!pre_ssout(uprobe, regs, bp_vaddr))
1875 return; 1856 return;
1876 1857
1877 /* can_skip_sstep() succeeded, or restart if can't singlestep */ 1858 /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
1878out: 1859out:
1879 put_uprobe(uprobe); 1860 put_uprobe(uprobe);
1880} 1861}
@@ -1886,10 +1867,11 @@ out:
1886static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) 1867static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
1887{ 1868{
1888 struct uprobe *uprobe; 1869 struct uprobe *uprobe;
1870 int err = 0;
1889 1871
1890 uprobe = utask->active_uprobe; 1872 uprobe = utask->active_uprobe;
1891 if (utask->state == UTASK_SSTEP_ACK) 1873 if (utask->state == UTASK_SSTEP_ACK)
1892 arch_uprobe_post_xol(&uprobe->arch, regs); 1874 err = arch_uprobe_post_xol(&uprobe->arch, regs);
1893 else if (utask->state == UTASK_SSTEP_TRAPPED) 1875 else if (utask->state == UTASK_SSTEP_TRAPPED)
1894 arch_uprobe_abort_xol(&uprobe->arch, regs); 1876 arch_uprobe_abort_xol(&uprobe->arch, regs);
1895 else 1877 else
@@ -1903,6 +1885,11 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
1903 spin_lock_irq(&current->sighand->siglock); 1885 spin_lock_irq(&current->sighand->siglock);
1904 recalc_sigpending(); /* see uprobe_deny_signal() */ 1886 recalc_sigpending(); /* see uprobe_deny_signal() */
1905 spin_unlock_irq(&current->sighand->siglock); 1887 spin_unlock_irq(&current->sighand->siglock);
1888
1889 if (unlikely(err)) {
1890 uprobe_warn(current, "execute the probed insn, sending SIGILL.");
1891 force_sig_info(SIGILL, SEND_SIG_FORCED, current);
1892 }
1906} 1893}
1907 1894
1908/* 1895/*
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index e0501fe7140d..3ab28993f6e0 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1039,6 +1039,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1039 1039
1040 return ret; 1040 return ret;
1041} 1041}
1042EXPORT_SYMBOL_GPL(__hrtimer_start_range_ns);
1042 1043
1043/** 1044/**
1044 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU 1045 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU