diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-12 22:18:49 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-12 22:18:49 -0400 |
| commit | 3737a12761636ebde0f09ef49daebb8eed18cc8a (patch) | |
| tree | 965057f4bccd97049f8c0140f8670c5d4278ca3e /kernel/events | |
| parent | c29deef32e3699e40da3e9e82267610de04e6b54 (diff) | |
| parent | 82b897782d10fcc4930c9d4a15b175348fdd2871 (diff) | |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull more perf updates from Ingo Molnar:
"A second round of perf updates:
- wide reaching kprobes sanitization and robustization, with the hope
of fixing all 'probe this function crashes the kernel' bugs, by
Masami Hiramatsu.
- uprobes updates from Oleg Nesterov: tmpfs support, corner case
fixes and robustization work.
- perf tooling updates and fixes from Jiri Olsa, Namhyung Ki, Arnaldo
et al:
* Add support to accumulate hist periods (Namhyung Kim)
* various fixes, refactorings and enhancements"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (101 commits)
perf: Differentiate exec() and non-exec() comm events
perf: Fix perf_event_comm() vs. exec() assumption
uprobes/x86: Rename arch_uprobe->def to ->defparam, minor comment updates
perf/documentation: Add description for conditional branch filter
perf/x86: Add conditional branch filtering support
perf/tool: Add conditional branch filter 'cond' to perf record
perf: Add new conditional branch filter 'PERF_SAMPLE_BRANCH_COND'
uprobes: Teach copy_insn() to support tmpfs
uprobes: Shift ->readpage check from __copy_insn() to uprobe_register()
perf/x86: Use common PMU interrupt disabled code
perf/ARM: Use common PMU interrupt disabled code
perf: Disable sampled events if no PMU interrupt
perf: Fix use after free in perf_remove_from_context()
perf tools: Fix 'make help' message error
perf record: Fix poll return value propagation
perf tools: Move elide bool into perf_hpp_fmt struct
perf tools: Remove elide setup for SORT_MODE__MEMORY mode
perf tools: Fix "==" into "=" in ui_browser__warning assignment
perf tools: Allow overriding sysfs and proc finding with env var
perf tools: Consider header files outside perf directory in tags target
...
Diffstat (limited to 'kernel/events')
| -rw-r--r-- | kernel/events/core.c | 43 | ||||
| -rw-r--r-- | kernel/events/uprobes.c | 52 |
2 files changed, 60 insertions, 35 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 24d35cc38e42..5fa58e4cffac 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -2974,6 +2974,22 @@ out: | |||
| 2974 | local_irq_restore(flags); | 2974 | local_irq_restore(flags); |
| 2975 | } | 2975 | } |
| 2976 | 2976 | ||
| 2977 | void perf_event_exec(void) | ||
| 2978 | { | ||
| 2979 | struct perf_event_context *ctx; | ||
| 2980 | int ctxn; | ||
| 2981 | |||
| 2982 | rcu_read_lock(); | ||
| 2983 | for_each_task_context_nr(ctxn) { | ||
| 2984 | ctx = current->perf_event_ctxp[ctxn]; | ||
| 2985 | if (!ctx) | ||
| 2986 | continue; | ||
| 2987 | |||
| 2988 | perf_event_enable_on_exec(ctx); | ||
| 2989 | } | ||
| 2990 | rcu_read_unlock(); | ||
| 2991 | } | ||
| 2992 | |||
| 2977 | /* | 2993 | /* |
| 2978 | * Cross CPU call to read the hardware event | 2994 | * Cross CPU call to read the hardware event |
| 2979 | */ | 2995 | */ |
| @@ -5075,21 +5091,9 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
| 5075 | NULL); | 5091 | NULL); |
| 5076 | } | 5092 | } |
| 5077 | 5093 | ||
| 5078 | void perf_event_comm(struct task_struct *task) | 5094 | void perf_event_comm(struct task_struct *task, bool exec) |
| 5079 | { | 5095 | { |
| 5080 | struct perf_comm_event comm_event; | 5096 | struct perf_comm_event comm_event; |
| 5081 | struct perf_event_context *ctx; | ||
| 5082 | int ctxn; | ||
| 5083 | |||
| 5084 | rcu_read_lock(); | ||
| 5085 | for_each_task_context_nr(ctxn) { | ||
| 5086 | ctx = task->perf_event_ctxp[ctxn]; | ||
| 5087 | if (!ctx) | ||
| 5088 | continue; | ||
| 5089 | |||
| 5090 | perf_event_enable_on_exec(ctx); | ||
| 5091 | } | ||
| 5092 | rcu_read_unlock(); | ||
| 5093 | 5097 | ||
| 5094 | if (!atomic_read(&nr_comm_events)) | 5098 | if (!atomic_read(&nr_comm_events)) |
| 5095 | return; | 5099 | return; |
| @@ -5101,7 +5105,7 @@ void perf_event_comm(struct task_struct *task) | |||
| 5101 | .event_id = { | 5105 | .event_id = { |
| 5102 | .header = { | 5106 | .header = { |
| 5103 | .type = PERF_RECORD_COMM, | 5107 | .type = PERF_RECORD_COMM, |
| 5104 | .misc = 0, | 5108 | .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0, |
| 5105 | /* .size */ | 5109 | /* .size */ |
| 5106 | }, | 5110 | }, |
| 5107 | /* .pid */ | 5111 | /* .pid */ |
| @@ -7122,6 +7126,13 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 7122 | } | 7126 | } |
| 7123 | } | 7127 | } |
| 7124 | 7128 | ||
| 7129 | if (is_sampling_event(event)) { | ||
| 7130 | if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { | ||
| 7131 | err = -ENOTSUPP; | ||
| 7132 | goto err_alloc; | ||
| 7133 | } | ||
| 7134 | } | ||
| 7135 | |||
| 7125 | account_event(event); | 7136 | account_event(event); |
| 7126 | 7137 | ||
| 7127 | /* | 7138 | /* |
| @@ -7433,7 +7444,7 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
| 7433 | 7444 | ||
| 7434 | static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | 7445 | static void perf_event_exit_task_context(struct task_struct *child, int ctxn) |
| 7435 | { | 7446 | { |
| 7436 | struct perf_event *child_event; | 7447 | struct perf_event *child_event, *next; |
| 7437 | struct perf_event_context *child_ctx; | 7448 | struct perf_event_context *child_ctx; |
| 7438 | unsigned long flags; | 7449 | unsigned long flags; |
| 7439 | 7450 | ||
| @@ -7487,7 +7498,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
| 7487 | */ | 7498 | */ |
| 7488 | mutex_lock(&child_ctx->mutex); | 7499 | mutex_lock(&child_ctx->mutex); |
| 7489 | 7500 | ||
| 7490 | list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry) | 7501 | list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) |
| 7491 | __perf_event_exit_task(child_event, child_ctx, child); | 7502 | __perf_event_exit_task(child_event, child_ctx, child); |
| 7492 | 7503 | ||
| 7493 | mutex_unlock(&child_ctx->mutex); | 7504 | mutex_unlock(&child_ctx->mutex); |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index adcd76a96839..c445e392e93f 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include "../../mm/internal.h" /* munlock_vma_page */ | 36 | #include "../../mm/internal.h" /* munlock_vma_page */ |
| 37 | #include <linux/percpu-rwsem.h> | 37 | #include <linux/percpu-rwsem.h> |
| 38 | #include <linux/task_work.h> | 38 | #include <linux/task_work.h> |
| 39 | #include <linux/shmem_fs.h> | ||
| 39 | 40 | ||
| 40 | #include <linux/uprobes.h> | 41 | #include <linux/uprobes.h> |
| 41 | 42 | ||
| @@ -127,7 +128,7 @@ struct xol_area { | |||
| 127 | */ | 128 | */ |
| 128 | static bool valid_vma(struct vm_area_struct *vma, bool is_register) | 129 | static bool valid_vma(struct vm_area_struct *vma, bool is_register) |
| 129 | { | 130 | { |
| 130 | vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED; | 131 | vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE; |
| 131 | 132 | ||
| 132 | if (is_register) | 133 | if (is_register) |
| 133 | flags |= VM_WRITE; | 134 | flags |= VM_WRITE; |
| @@ -279,18 +280,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t | |||
| 279 | * supported by that architecture then we need to modify is_trap_at_addr and | 280 | * supported by that architecture then we need to modify is_trap_at_addr and |
| 280 | * uprobe_write_opcode accordingly. This would never be a problem for archs | 281 | * uprobe_write_opcode accordingly. This would never be a problem for archs |
| 281 | * that have fixed length instructions. | 282 | * that have fixed length instructions. |
| 282 | */ | 283 | * |
| 283 | |||
| 284 | /* | ||
| 285 | * uprobe_write_opcode - write the opcode at a given virtual address. | 284 | * uprobe_write_opcode - write the opcode at a given virtual address. |
| 286 | * @mm: the probed process address space. | 285 | * @mm: the probed process address space. |
| 287 | * @vaddr: the virtual address to store the opcode. | 286 | * @vaddr: the virtual address to store the opcode. |
| 288 | * @opcode: opcode to be written at @vaddr. | 287 | * @opcode: opcode to be written at @vaddr. |
| 289 | * | 288 | * |
| 290 | * Called with mm->mmap_sem held (for read and with a reference to | 289 | * Called with mm->mmap_sem held for write. |
| 291 | * mm). | ||
| 292 | * | ||
| 293 | * For mm @mm, write the opcode at @vaddr. | ||
| 294 | * Return 0 (success) or a negative errno. | 290 | * Return 0 (success) or a negative errno. |
| 295 | */ | 291 | */ |
| 296 | int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, | 292 | int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, |
| @@ -310,21 +306,25 @@ retry: | |||
| 310 | if (ret <= 0) | 306 | if (ret <= 0) |
| 311 | goto put_old; | 307 | goto put_old; |
| 312 | 308 | ||
| 309 | ret = anon_vma_prepare(vma); | ||
| 310 | if (ret) | ||
| 311 | goto put_old; | ||
| 312 | |||
| 313 | ret = -ENOMEM; | 313 | ret = -ENOMEM; |
| 314 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); | 314 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); |
| 315 | if (!new_page) | 315 | if (!new_page) |
| 316 | goto put_old; | 316 | goto put_old; |
| 317 | 317 | ||
| 318 | __SetPageUptodate(new_page); | 318 | if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) |
| 319 | goto put_new; | ||
| 319 | 320 | ||
| 321 | __SetPageUptodate(new_page); | ||
| 320 | copy_highpage(new_page, old_page); | 322 | copy_highpage(new_page, old_page); |
| 321 | copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); | 323 | copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); |
| 322 | 324 | ||
| 323 | ret = anon_vma_prepare(vma); | ||
| 324 | if (ret) | ||
| 325 | goto put_new; | ||
| 326 | |||
| 327 | ret = __replace_page(vma, vaddr, old_page, new_page); | 325 | ret = __replace_page(vma, vaddr, old_page, new_page); |
| 326 | if (ret) | ||
| 327 | mem_cgroup_uncharge_page(new_page); | ||
| 328 | 328 | ||
| 329 | put_new: | 329 | put_new: |
| 330 | page_cache_release(new_page); | 330 | page_cache_release(new_page); |
| @@ -537,14 +537,15 @@ static int __copy_insn(struct address_space *mapping, struct file *filp, | |||
| 537 | void *insn, int nbytes, loff_t offset) | 537 | void *insn, int nbytes, loff_t offset) |
| 538 | { | 538 | { |
| 539 | struct page *page; | 539 | struct page *page; |
| 540 | |||
| 541 | if (!mapping->a_ops->readpage) | ||
| 542 | return -EIO; | ||
| 543 | /* | 540 | /* |
| 544 | * Ensure that the page that has the original instruction is | 541 | * Ensure that the page that has the original instruction is populated |
| 545 | * populated and in page-cache. | 542 | * and in page-cache. If ->readpage == NULL it must be shmem_mapping(), |
| 543 | * see uprobe_register(). | ||
| 546 | */ | 544 | */ |
| 547 | page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); | 545 | if (mapping->a_ops->readpage) |
| 546 | page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); | ||
| 547 | else | ||
| 548 | page = shmem_read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT); | ||
| 548 | if (IS_ERR(page)) | 549 | if (IS_ERR(page)) |
| 549 | return PTR_ERR(page); | 550 | return PTR_ERR(page); |
| 550 | 551 | ||
| @@ -880,6 +881,9 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * | |||
| 880 | if (!uc->handler && !uc->ret_handler) | 881 | if (!uc->handler && !uc->ret_handler) |
| 881 | return -EINVAL; | 882 | return -EINVAL; |
| 882 | 883 | ||
| 884 | /* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */ | ||
| 885 | if (!inode->i_mapping->a_ops->readpage && !shmem_mapping(inode->i_mapping)) | ||
| 886 | return -EIO; | ||
| 883 | /* Racy, just to catch the obvious mistakes */ | 887 | /* Racy, just to catch the obvious mistakes */ |
| 884 | if (offset > i_size_read(inode)) | 888 | if (offset > i_size_read(inode)) |
| 885 | return -EINVAL; | 889 | return -EINVAL; |
| @@ -1361,6 +1365,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) | |||
| 1361 | return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; | 1365 | return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; |
| 1362 | } | 1366 | } |
| 1363 | 1367 | ||
| 1368 | unsigned long uprobe_get_trap_addr(struct pt_regs *regs) | ||
| 1369 | { | ||
| 1370 | struct uprobe_task *utask = current->utask; | ||
| 1371 | |||
| 1372 | if (unlikely(utask && utask->active_uprobe)) | ||
| 1373 | return utask->vaddr; | ||
| 1374 | |||
| 1375 | return instruction_pointer(regs); | ||
| 1376 | } | ||
| 1377 | |||
| 1364 | /* | 1378 | /* |
| 1365 | * Called with no locks held. | 1379 | * Called with no locks held. |
| 1366 | * Called in context of a exiting or a exec-ing thread. | 1380 | * Called in context of a exiting or a exec-ing thread. |
