diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-22 14:10:36 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-22 14:10:36 -0400 |
| commit | 2eafeb6a415864bc4c59df79151cf40f6ac74b9e (patch) | |
| tree | 331ee730275276aebbda5dd278a97c941067d5fd /kernel | |
| parent | 16d286e656250859946786de0df0fb01f8f241bc (diff) | |
| parent | 6e0f17be0361444862637e8986c8c1a3b3f8dcf8 (diff) | |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events changes from Ingo Molnar:
"- kernel side:
- Intel uncore PMU support for Nehalem and Sandy Bridge CPUs, we
support both the events available via the MSR and via the PCI
access space.
- various uprobes cleanups and restructurings
- PMU driver quirks by microcode version and required x86 microcode
loader cleanups/robustization
- various tracing robustness updates
- static keys: remove obsolete static_branch()
- tooling side:
- GTK browser improvements
- perf report browser: support screenshots to file
- more automated tests
- perf kvm improvements
- perf bench refinements
- build environment improvements
- pipe mode improvements
- libtraceevent updates, we have now hopefully merged most bits with
the out of tree forked code base
... and many other goodies."
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (138 commits)
tracing: Check for allocation failure in __tracing_open()
perf/x86: Fix intel_perfmon_event_mapformatting
jump label: Remove static_branch()
tracepoint: Use static_key_false(), since static_branch() is deprecated
perf/x86: Uncore filter support for SandyBridge-EP
perf/x86: Detect number of instances of uncore CBox
perf/x86: Fix event constraint for SandyBridge-EP C-Box
perf/x86: Use 0xff as pseudo code for fixed uncore event
perf/x86: Save a few bytes in 'struct x86_pmu'
perf/x86: Add a microcode revision check for SNB-PEBS
perf/x86: Improve debug output in check_hw_exists()
perf/x86/amd: Unify AMD's generic and family 15h pmus
perf/x86: Move Intel specific code to intel_pmu_init()
perf/x86: Rename Intel specific macros
perf/x86: Fix USER/KERNEL tagging of samples
perf tools: Split event symbols arrays to hw and sw parts
perf tools: Split out PE_VALUE_SYM parsing token to SW and HW tokens
perf tools: Add empty rule for new line in event syntax parsing
perf test: Use ARRAY_SIZE in parse events tests
tools lib traceevent: Cleanup realloc use
...
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/events/core.c | 49 | ||||
| -rw-r--r-- | kernel/events/uprobes.c | 461 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 8 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 4 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 33 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 8 | ||||
| -rw-r--r-- | kernel/trace/trace_functions_graph.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_output.c | 2 |
8 files changed, 313 insertions, 254 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index d7d71d6ec972..f1cf0edeb39a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -1645,6 +1645,8 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
| 1645 | lockdep_assert_held(&ctx->mutex); | 1645 | lockdep_assert_held(&ctx->mutex); |
| 1646 | 1646 | ||
| 1647 | event->ctx = ctx; | 1647 | event->ctx = ctx; |
| 1648 | if (event->cpu != -1) | ||
| 1649 | event->cpu = cpu; | ||
| 1648 | 1650 | ||
| 1649 | if (!task) { | 1651 | if (!task) { |
| 1650 | /* | 1652 | /* |
| @@ -6252,6 +6254,8 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 6252 | } | 6254 | } |
| 6253 | } | 6255 | } |
| 6254 | 6256 | ||
| 6257 | get_online_cpus(); | ||
| 6258 | |||
| 6255 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, | 6259 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, |
| 6256 | NULL, NULL); | 6260 | NULL, NULL); |
| 6257 | if (IS_ERR(event)) { | 6261 | if (IS_ERR(event)) { |
| @@ -6304,7 +6308,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 6304 | /* | 6308 | /* |
| 6305 | * Get the target context (task or percpu): | 6309 | * Get the target context (task or percpu): |
| 6306 | */ | 6310 | */ |
| 6307 | ctx = find_get_context(pmu, task, cpu); | 6311 | ctx = find_get_context(pmu, task, event->cpu); |
| 6308 | if (IS_ERR(ctx)) { | 6312 | if (IS_ERR(ctx)) { |
| 6309 | err = PTR_ERR(ctx); | 6313 | err = PTR_ERR(ctx); |
| 6310 | goto err_alloc; | 6314 | goto err_alloc; |
| @@ -6377,20 +6381,23 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 6377 | mutex_lock(&ctx->mutex); | 6381 | mutex_lock(&ctx->mutex); |
| 6378 | 6382 | ||
| 6379 | if (move_group) { | 6383 | if (move_group) { |
| 6380 | perf_install_in_context(ctx, group_leader, cpu); | 6384 | synchronize_rcu(); |
| 6385 | perf_install_in_context(ctx, group_leader, event->cpu); | ||
| 6381 | get_ctx(ctx); | 6386 | get_ctx(ctx); |
| 6382 | list_for_each_entry(sibling, &group_leader->sibling_list, | 6387 | list_for_each_entry(sibling, &group_leader->sibling_list, |
| 6383 | group_entry) { | 6388 | group_entry) { |
| 6384 | perf_install_in_context(ctx, sibling, cpu); | 6389 | perf_install_in_context(ctx, sibling, event->cpu); |
| 6385 | get_ctx(ctx); | 6390 | get_ctx(ctx); |
| 6386 | } | 6391 | } |
| 6387 | } | 6392 | } |
| 6388 | 6393 | ||
| 6389 | perf_install_in_context(ctx, event, cpu); | 6394 | perf_install_in_context(ctx, event, event->cpu); |
| 6390 | ++ctx->generation; | 6395 | ++ctx->generation; |
| 6391 | perf_unpin_context(ctx); | 6396 | perf_unpin_context(ctx); |
| 6392 | mutex_unlock(&ctx->mutex); | 6397 | mutex_unlock(&ctx->mutex); |
| 6393 | 6398 | ||
| 6399 | put_online_cpus(); | ||
| 6400 | |||
| 6394 | event->owner = current; | 6401 | event->owner = current; |
| 6395 | 6402 | ||
| 6396 | mutex_lock(¤t->perf_event_mutex); | 6403 | mutex_lock(¤t->perf_event_mutex); |
| @@ -6419,6 +6426,7 @@ err_context: | |||
| 6419 | err_alloc: | 6426 | err_alloc: |
| 6420 | free_event(event); | 6427 | free_event(event); |
| 6421 | err_task: | 6428 | err_task: |
| 6429 | put_online_cpus(); | ||
| 6422 | if (task) | 6430 | if (task) |
| 6423 | put_task_struct(task); | 6431 | put_task_struct(task); |
| 6424 | err_group_fd: | 6432 | err_group_fd: |
| @@ -6479,6 +6487,39 @@ err: | |||
| 6479 | } | 6487 | } |
| 6480 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); | 6488 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); |
| 6481 | 6489 | ||
| 6490 | void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | ||
| 6491 | { | ||
| 6492 | struct perf_event_context *src_ctx; | ||
| 6493 | struct perf_event_context *dst_ctx; | ||
| 6494 | struct perf_event *event, *tmp; | ||
| 6495 | LIST_HEAD(events); | ||
| 6496 | |||
| 6497 | src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; | ||
| 6498 | dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; | ||
| 6499 | |||
| 6500 | mutex_lock(&src_ctx->mutex); | ||
| 6501 | list_for_each_entry_safe(event, tmp, &src_ctx->event_list, | ||
| 6502 | event_entry) { | ||
| 6503 | perf_remove_from_context(event); | ||
| 6504 | put_ctx(src_ctx); | ||
| 6505 | list_add(&event->event_entry, &events); | ||
| 6506 | } | ||
| 6507 | mutex_unlock(&src_ctx->mutex); | ||
| 6508 | |||
| 6509 | synchronize_rcu(); | ||
| 6510 | |||
| 6511 | mutex_lock(&dst_ctx->mutex); | ||
| 6512 | list_for_each_entry_safe(event, tmp, &events, event_entry) { | ||
| 6513 | list_del(&event->event_entry); | ||
| 6514 | if (event->state >= PERF_EVENT_STATE_OFF) | ||
| 6515 | event->state = PERF_EVENT_STATE_INACTIVE; | ||
| 6516 | perf_install_in_context(dst_ctx, event, dst_cpu); | ||
| 6517 | get_ctx(dst_ctx); | ||
| 6518 | } | ||
| 6519 | mutex_unlock(&dst_ctx->mutex); | ||
| 6520 | } | ||
| 6521 | EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); | ||
| 6522 | |||
| 6482 | static void sync_child_event(struct perf_event *child_event, | 6523 | static void sync_child_event(struct perf_event *child_event, |
| 6483 | struct task_struct *child) | 6524 | struct task_struct *child) |
| 6484 | { | 6525 | { |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 985be4d80fe8..f93532748bca 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
| @@ -38,13 +38,29 @@ | |||
| 38 | #define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) | 38 | #define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) |
| 39 | #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE | 39 | #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE |
| 40 | 40 | ||
| 41 | static struct srcu_struct uprobes_srcu; | ||
| 42 | static struct rb_root uprobes_tree = RB_ROOT; | 41 | static struct rb_root uprobes_tree = RB_ROOT; |
| 43 | 42 | ||
| 44 | static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ | 43 | static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ |
| 45 | 44 | ||
| 46 | #define UPROBES_HASH_SZ 13 | 45 | #define UPROBES_HASH_SZ 13 |
| 47 | 46 | ||
| 47 | /* | ||
| 48 | * We need separate register/unregister and mmap/munmap lock hashes because | ||
| 49 | * of mmap_sem nesting. | ||
| 50 | * | ||
| 51 | * uprobe_register() needs to install probes on (potentially) all processes | ||
| 52 | * and thus needs to acquire multiple mmap_sems (consequtively, not | ||
| 53 | * concurrently), whereas uprobe_mmap() is called while holding mmap_sem | ||
| 54 | * for the particular process doing the mmap. | ||
| 55 | * | ||
| 56 | * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem | ||
| 57 | * because of lock order against i_mmap_mutex. This means there's a hole in | ||
| 58 | * the register vma iteration where a mmap() can happen. | ||
| 59 | * | ||
| 60 | * Thus uprobe_register() can race with uprobe_mmap() and we can try and | ||
| 61 | * install a probe where one is already installed. | ||
| 62 | */ | ||
| 63 | |||
| 48 | /* serialize (un)register */ | 64 | /* serialize (un)register */ |
| 49 | static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; | 65 | static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; |
| 50 | 66 | ||
| @@ -61,17 +77,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; | |||
| 61 | */ | 77 | */ |
| 62 | static atomic_t uprobe_events = ATOMIC_INIT(0); | 78 | static atomic_t uprobe_events = ATOMIC_INIT(0); |
| 63 | 79 | ||
| 64 | /* | ||
| 65 | * Maintain a temporary per vma info that can be used to search if a vma | ||
| 66 | * has already been handled. This structure is introduced since extending | ||
| 67 | * vm_area_struct wasnt recommended. | ||
| 68 | */ | ||
| 69 | struct vma_info { | ||
| 70 | struct list_head probe_list; | ||
| 71 | struct mm_struct *mm; | ||
| 72 | loff_t vaddr; | ||
| 73 | }; | ||
| 74 | |||
| 75 | struct uprobe { | 80 | struct uprobe { |
| 76 | struct rb_node rb_node; /* node in the rb tree */ | 81 | struct rb_node rb_node; /* node in the rb tree */ |
| 77 | atomic_t ref; | 82 | atomic_t ref; |
| @@ -100,7 +105,8 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register) | |||
| 100 | if (!is_register) | 105 | if (!is_register) |
| 101 | return true; | 106 | return true; |
| 102 | 107 | ||
| 103 | if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC)) | 108 | if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) |
| 109 | == (VM_READ|VM_EXEC)) | ||
| 104 | return true; | 110 | return true; |
| 105 | 111 | ||
| 106 | return false; | 112 | return false; |
| @@ -129,33 +135,17 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) | |||
| 129 | static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) | 135 | static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) |
| 130 | { | 136 | { |
| 131 | struct mm_struct *mm = vma->vm_mm; | 137 | struct mm_struct *mm = vma->vm_mm; |
| 132 | pgd_t *pgd; | ||
| 133 | pud_t *pud; | ||
| 134 | pmd_t *pmd; | ||
| 135 | pte_t *ptep; | ||
| 136 | spinlock_t *ptl; | ||
| 137 | unsigned long addr; | 138 | unsigned long addr; |
| 138 | int err = -EFAULT; | 139 | spinlock_t *ptl; |
| 140 | pte_t *ptep; | ||
| 139 | 141 | ||
| 140 | addr = page_address_in_vma(page, vma); | 142 | addr = page_address_in_vma(page, vma); |
| 141 | if (addr == -EFAULT) | 143 | if (addr == -EFAULT) |
| 142 | goto out; | 144 | return -EFAULT; |
| 143 | |||
| 144 | pgd = pgd_offset(mm, addr); | ||
| 145 | if (!pgd_present(*pgd)) | ||
| 146 | goto out; | ||
| 147 | |||
| 148 | pud = pud_offset(pgd, addr); | ||
| 149 | if (!pud_present(*pud)) | ||
| 150 | goto out; | ||
| 151 | |||
| 152 | pmd = pmd_offset(pud, addr); | ||
| 153 | if (!pmd_present(*pmd)) | ||
| 154 | goto out; | ||
| 155 | 145 | ||
| 156 | ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); | 146 | ptep = page_check_address(page, mm, addr, &ptl, 0); |
| 157 | if (!ptep) | 147 | if (!ptep) |
| 158 | goto out; | 148 | return -EAGAIN; |
| 159 | 149 | ||
| 160 | get_page(kpage); | 150 | get_page(kpage); |
| 161 | page_add_new_anon_rmap(kpage, vma, addr); | 151 | page_add_new_anon_rmap(kpage, vma, addr); |
| @@ -174,10 +164,8 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct | |||
| 174 | try_to_free_swap(page); | 164 | try_to_free_swap(page); |
| 175 | put_page(page); | 165 | put_page(page); |
| 176 | pte_unmap_unlock(ptep, ptl); | 166 | pte_unmap_unlock(ptep, ptl); |
| 177 | err = 0; | ||
| 178 | 167 | ||
| 179 | out: | 168 | return 0; |
| 180 | return err; | ||
| 181 | } | 169 | } |
| 182 | 170 | ||
| 183 | /** | 171 | /** |
| @@ -222,9 +210,8 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
| 222 | void *vaddr_old, *vaddr_new; | 210 | void *vaddr_old, *vaddr_new; |
| 223 | struct vm_area_struct *vma; | 211 | struct vm_area_struct *vma; |
| 224 | struct uprobe *uprobe; | 212 | struct uprobe *uprobe; |
| 225 | loff_t addr; | ||
| 226 | int ret; | 213 | int ret; |
| 227 | 214 | retry: | |
| 228 | /* Read the page with vaddr into memory */ | 215 | /* Read the page with vaddr into memory */ |
| 229 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); | 216 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); |
| 230 | if (ret <= 0) | 217 | if (ret <= 0) |
| @@ -246,10 +233,6 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
| 246 | if (mapping != vma->vm_file->f_mapping) | 233 | if (mapping != vma->vm_file->f_mapping) |
| 247 | goto put_out; | 234 | goto put_out; |
| 248 | 235 | ||
| 249 | addr = vma_address(vma, uprobe->offset); | ||
| 250 | if (vaddr != (unsigned long)addr) | ||
| 251 | goto put_out; | ||
| 252 | |||
| 253 | ret = -ENOMEM; | 236 | ret = -ENOMEM; |
| 254 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); | 237 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); |
| 255 | if (!new_page) | 238 | if (!new_page) |
| @@ -267,11 +250,7 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
| 267 | vaddr_new = kmap_atomic(new_page); | 250 | vaddr_new = kmap_atomic(new_page); |
| 268 | 251 | ||
| 269 | memcpy(vaddr_new, vaddr_old, PAGE_SIZE); | 252 | memcpy(vaddr_new, vaddr_old, PAGE_SIZE); |
| 270 | 253 | memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE); | |
| 271 | /* poke the new insn in, ASSUMES we don't cross page boundary */ | ||
| 272 | vaddr &= ~PAGE_MASK; | ||
| 273 | BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); | ||
| 274 | memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); | ||
| 275 | 254 | ||
| 276 | kunmap_atomic(vaddr_new); | 255 | kunmap_atomic(vaddr_new); |
| 277 | kunmap_atomic(vaddr_old); | 256 | kunmap_atomic(vaddr_old); |
| @@ -291,6 +270,8 @@ unlock_out: | |||
| 291 | put_out: | 270 | put_out: |
| 292 | put_page(old_page); | 271 | put_page(old_page); |
| 293 | 272 | ||
| 273 | if (unlikely(ret == -EAGAIN)) | ||
| 274 | goto retry; | ||
| 294 | return ret; | 275 | return ret; |
| 295 | } | 276 | } |
| 296 | 277 | ||
| @@ -312,7 +293,7 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_ | |||
| 312 | void *vaddr_new; | 293 | void *vaddr_new; |
| 313 | int ret; | 294 | int ret; |
| 314 | 295 | ||
| 315 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL); | 296 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); |
| 316 | if (ret <= 0) | 297 | if (ret <= 0) |
| 317 | return ret; | 298 | return ret; |
| 318 | 299 | ||
| @@ -333,10 +314,20 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) | |||
| 333 | uprobe_opcode_t opcode; | 314 | uprobe_opcode_t opcode; |
| 334 | int result; | 315 | int result; |
| 335 | 316 | ||
| 317 | if (current->mm == mm) { | ||
| 318 | pagefault_disable(); | ||
| 319 | result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr, | ||
| 320 | sizeof(opcode)); | ||
| 321 | pagefault_enable(); | ||
| 322 | |||
| 323 | if (likely(result == 0)) | ||
| 324 | goto out; | ||
| 325 | } | ||
| 326 | |||
| 336 | result = read_opcode(mm, vaddr, &opcode); | 327 | result = read_opcode(mm, vaddr, &opcode); |
| 337 | if (result) | 328 | if (result) |
| 338 | return result; | 329 | return result; |
| 339 | 330 | out: | |
| 340 | if (is_swbp_insn(&opcode)) | 331 | if (is_swbp_insn(&opcode)) |
| 341 | return 1; | 332 | return 1; |
| 342 | 333 | ||
| @@ -355,7 +346,9 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) | |||
| 355 | int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) | 346 | int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) |
| 356 | { | 347 | { |
| 357 | int result; | 348 | int result; |
| 358 | 349 | /* | |
| 350 | * See the comment near uprobes_hash(). | ||
| 351 | */ | ||
| 359 | result = is_swbp_at_addr(mm, vaddr); | 352 | result = is_swbp_at_addr(mm, vaddr); |
| 360 | if (result == 1) | 353 | if (result == 1) |
| 361 | return -EEXIST; | 354 | return -EEXIST; |
| @@ -520,7 +513,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) | |||
| 520 | uprobe->inode = igrab(inode); | 513 | uprobe->inode = igrab(inode); |
| 521 | uprobe->offset = offset; | 514 | uprobe->offset = offset; |
| 522 | init_rwsem(&uprobe->consumer_rwsem); | 515 | init_rwsem(&uprobe->consumer_rwsem); |
| 523 | INIT_LIST_HEAD(&uprobe->pending_list); | ||
| 524 | 516 | ||
| 525 | /* add to uprobes_tree, sorted on inode:offset */ | 517 | /* add to uprobes_tree, sorted on inode:offset */ |
| 526 | cur_uprobe = insert_uprobe(uprobe); | 518 | cur_uprobe = insert_uprobe(uprobe); |
| @@ -588,20 +580,22 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) | |||
| 588 | } | 580 | } |
| 589 | 581 | ||
| 590 | static int | 582 | static int |
| 591 | __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn, | 583 | __copy_insn(struct address_space *mapping, struct file *filp, char *insn, |
| 592 | unsigned long nbytes, unsigned long offset) | 584 | unsigned long nbytes, loff_t offset) |
| 593 | { | 585 | { |
| 594 | struct file *filp = vma->vm_file; | ||
| 595 | struct page *page; | 586 | struct page *page; |
| 596 | void *vaddr; | 587 | void *vaddr; |
| 597 | unsigned long off1; | 588 | unsigned long off; |
| 598 | unsigned long idx; | 589 | pgoff_t idx; |
| 599 | 590 | ||
| 600 | if (!filp) | 591 | if (!filp) |
| 601 | return -EINVAL; | 592 | return -EINVAL; |
| 602 | 593 | ||
| 603 | idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT); | 594 | if (!mapping->a_ops->readpage) |
| 604 | off1 = offset &= ~PAGE_MASK; | 595 | return -EIO; |
| 596 | |||
| 597 | idx = offset >> PAGE_CACHE_SHIFT; | ||
| 598 | off = offset & ~PAGE_MASK; | ||
| 605 | 599 | ||
| 606 | /* | 600 | /* |
| 607 | * Ensure that the page that has the original instruction is | 601 | * Ensure that the page that has the original instruction is |
| @@ -612,22 +606,20 @@ __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *ins | |||
| 612 | return PTR_ERR(page); | 606 | return PTR_ERR(page); |
| 613 | 607 | ||
| 614 | vaddr = kmap_atomic(page); | 608 | vaddr = kmap_atomic(page); |
| 615 | memcpy(insn, vaddr + off1, nbytes); | 609 | memcpy(insn, vaddr + off, nbytes); |
| 616 | kunmap_atomic(vaddr); | 610 | kunmap_atomic(vaddr); |
| 617 | page_cache_release(page); | 611 | page_cache_release(page); |
| 618 | 612 | ||
| 619 | return 0; | 613 | return 0; |
| 620 | } | 614 | } |
| 621 | 615 | ||
| 622 | static int | 616 | static int copy_insn(struct uprobe *uprobe, struct file *filp) |
| 623 | copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) | ||
| 624 | { | 617 | { |
| 625 | struct address_space *mapping; | 618 | struct address_space *mapping; |
| 626 | unsigned long nbytes; | 619 | unsigned long nbytes; |
| 627 | int bytes; | 620 | int bytes; |
| 628 | 621 | ||
| 629 | addr &= ~PAGE_MASK; | 622 | nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK); |
| 630 | nbytes = PAGE_SIZE - addr; | ||
| 631 | mapping = uprobe->inode->i_mapping; | 623 | mapping = uprobe->inode->i_mapping; |
| 632 | 624 | ||
| 633 | /* Instruction at end of binary; copy only available bytes */ | 625 | /* Instruction at end of binary; copy only available bytes */ |
| @@ -638,13 +630,13 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) | |||
| 638 | 630 | ||
| 639 | /* Instruction at the page-boundary; copy bytes in second page */ | 631 | /* Instruction at the page-boundary; copy bytes in second page */ |
| 640 | if (nbytes < bytes) { | 632 | if (nbytes < bytes) { |
| 641 | if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes, | 633 | int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, |
| 642 | bytes - nbytes, uprobe->offset + nbytes)) | 634 | bytes - nbytes, uprobe->offset + nbytes); |
| 643 | return -ENOMEM; | 635 | if (err) |
| 644 | 636 | return err; | |
| 645 | bytes = nbytes; | 637 | bytes = nbytes; |
| 646 | } | 638 | } |
| 647 | return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset); | 639 | return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); |
| 648 | } | 640 | } |
| 649 | 641 | ||
| 650 | /* | 642 | /* |
| @@ -672,9 +664,8 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) | |||
| 672 | */ | 664 | */ |
| 673 | static int | 665 | static int |
| 674 | install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | 666 | install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, |
| 675 | struct vm_area_struct *vma, loff_t vaddr) | 667 | struct vm_area_struct *vma, unsigned long vaddr) |
| 676 | { | 668 | { |
| 677 | unsigned long addr; | ||
| 678 | int ret; | 669 | int ret; |
| 679 | 670 | ||
| 680 | /* | 671 | /* |
| @@ -687,20 +678,22 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | |||
| 687 | if (!uprobe->consumers) | 678 | if (!uprobe->consumers) |
| 688 | return -EEXIST; | 679 | return -EEXIST; |
| 689 | 680 | ||
| 690 | addr = (unsigned long)vaddr; | ||
| 691 | |||
| 692 | if (!(uprobe->flags & UPROBE_COPY_INSN)) { | 681 | if (!(uprobe->flags & UPROBE_COPY_INSN)) { |
| 693 | ret = copy_insn(uprobe, vma, addr); | 682 | ret = copy_insn(uprobe, vma->vm_file); |
| 694 | if (ret) | 683 | if (ret) |
| 695 | return ret; | 684 | return ret; |
| 696 | 685 | ||
| 697 | if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) | 686 | if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) |
| 698 | return -EEXIST; | 687 | return -ENOTSUPP; |
| 699 | 688 | ||
| 700 | ret = arch_uprobe_analyze_insn(&uprobe->arch, mm); | 689 | ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); |
| 701 | if (ret) | 690 | if (ret) |
| 702 | return ret; | 691 | return ret; |
| 703 | 692 | ||
| 693 | /* write_opcode() assumes we don't cross page boundary */ | ||
| 694 | BUG_ON((uprobe->offset & ~PAGE_MASK) + | ||
| 695 | UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); | ||
| 696 | |||
| 704 | uprobe->flags |= UPROBE_COPY_INSN; | 697 | uprobe->flags |= UPROBE_COPY_INSN; |
| 705 | } | 698 | } |
| 706 | 699 | ||
| @@ -713,7 +706,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | |||
| 713 | * Hence increment before and decrement on failure. | 706 | * Hence increment before and decrement on failure. |
| 714 | */ | 707 | */ |
| 715 | atomic_inc(&mm->uprobes_state.count); | 708 | atomic_inc(&mm->uprobes_state.count); |
| 716 | ret = set_swbp(&uprobe->arch, mm, addr); | 709 | ret = set_swbp(&uprobe->arch, mm, vaddr); |
| 717 | if (ret) | 710 | if (ret) |
| 718 | atomic_dec(&mm->uprobes_state.count); | 711 | atomic_dec(&mm->uprobes_state.count); |
| 719 | 712 | ||
| @@ -721,27 +714,21 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | |||
| 721 | } | 714 | } |
| 722 | 715 | ||
| 723 | static void | 716 | static void |
| 724 | remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) | 717 | remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) |
| 725 | { | 718 | { |
| 726 | if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true)) | 719 | if (!set_orig_insn(&uprobe->arch, mm, vaddr, true)) |
| 727 | atomic_dec(&mm->uprobes_state.count); | 720 | atomic_dec(&mm->uprobes_state.count); |
| 728 | } | 721 | } |
| 729 | 722 | ||
| 730 | /* | 723 | /* |
| 731 | * There could be threads that have hit the breakpoint and are entering the | 724 | * There could be threads that have already hit the breakpoint. They |
| 732 | * notifier code and trying to acquire the uprobes_treelock. The thread | 725 | * will recheck the current insn and restart if find_uprobe() fails. |
| 733 | * calling delete_uprobe() that is removing the uprobe from the rb_tree can | 726 | * See find_active_uprobe(). |
| 734 | * race with these threads and might acquire the uprobes_treelock compared | ||
| 735 | * to some of the breakpoint hit threads. In such a case, the breakpoint | ||
| 736 | * hit threads will not find the uprobe. The current unregistering thread | ||
| 737 | * waits till all other threads have hit a breakpoint, to acquire the | ||
| 738 | * uprobes_treelock before the uprobe is removed from the rbtree. | ||
| 739 | */ | 727 | */ |
| 740 | static void delete_uprobe(struct uprobe *uprobe) | 728 | static void delete_uprobe(struct uprobe *uprobe) |
| 741 | { | 729 | { |
| 742 | unsigned long flags; | 730 | unsigned long flags; |
| 743 | 731 | ||
| 744 | synchronize_srcu(&uprobes_srcu); | ||
| 745 | spin_lock_irqsave(&uprobes_treelock, flags); | 732 | spin_lock_irqsave(&uprobes_treelock, flags); |
| 746 | rb_erase(&uprobe->rb_node, &uprobes_tree); | 733 | rb_erase(&uprobe->rb_node, &uprobes_tree); |
| 747 | spin_unlock_irqrestore(&uprobes_treelock, flags); | 734 | spin_unlock_irqrestore(&uprobes_treelock, flags); |
| @@ -750,139 +737,135 @@ static void delete_uprobe(struct uprobe *uprobe) | |||
| 750 | atomic_dec(&uprobe_events); | 737 | atomic_dec(&uprobe_events); |
| 751 | } | 738 | } |
| 752 | 739 | ||
| 753 | static struct vma_info * | 740 | struct map_info { |
| 754 | __find_next_vma_info(struct address_space *mapping, struct list_head *head, | 741 | struct map_info *next; |
| 755 | struct vma_info *vi, loff_t offset, bool is_register) | 742 | struct mm_struct *mm; |
| 743 | unsigned long vaddr; | ||
| 744 | }; | ||
| 745 | |||
| 746 | static inline struct map_info *free_map_info(struct map_info *info) | ||
| 747 | { | ||
| 748 | struct map_info *next = info->next; | ||
| 749 | kfree(info); | ||
| 750 | return next; | ||
| 751 | } | ||
| 752 | |||
| 753 | static struct map_info * | ||
| 754 | build_map_info(struct address_space *mapping, loff_t offset, bool is_register) | ||
| 756 | { | 755 | { |
| 756 | unsigned long pgoff = offset >> PAGE_SHIFT; | ||
| 757 | struct prio_tree_iter iter; | 757 | struct prio_tree_iter iter; |
| 758 | struct vm_area_struct *vma; | 758 | struct vm_area_struct *vma; |
| 759 | struct vma_info *tmpvi; | 759 | struct map_info *curr = NULL; |
| 760 | unsigned long pgoff; | 760 | struct map_info *prev = NULL; |
| 761 | int existing_vma; | 761 | struct map_info *info; |
| 762 | loff_t vaddr; | 762 | int more = 0; |
| 763 | |||
| 764 | pgoff = offset >> PAGE_SHIFT; | ||
| 765 | 763 | ||
| 764 | again: | ||
| 765 | mutex_lock(&mapping->i_mmap_mutex); | ||
| 766 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 766 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
| 767 | if (!valid_vma(vma, is_register)) | 767 | if (!valid_vma(vma, is_register)) |
| 768 | continue; | 768 | continue; |
| 769 | 769 | ||
| 770 | existing_vma = 0; | 770 | if (!prev && !more) { |
| 771 | vaddr = vma_address(vma, offset); | 771 | /* |
| 772 | 772 | * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through | |
| 773 | list_for_each_entry(tmpvi, head, probe_list) { | 773 | * reclaim. This is optimistic, no harm done if it fails. |
| 774 | if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) { | 774 | */ |
| 775 | existing_vma = 1; | 775 | prev = kmalloc(sizeof(struct map_info), |
| 776 | break; | 776 | GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN); |
| 777 | } | 777 | if (prev) |
| 778 | prev->next = NULL; | ||
| 778 | } | 779 | } |
| 779 | 780 | if (!prev) { | |
| 780 | /* | 781 | more++; |
| 781 | * Another vma needs a probe to be installed. However skip | 782 | continue; |
| 782 | * installing the probe if the vma is about to be unlinked. | ||
| 783 | */ | ||
| 784 | if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) { | ||
| 785 | vi->mm = vma->vm_mm; | ||
| 786 | vi->vaddr = vaddr; | ||
| 787 | list_add(&vi->probe_list, head); | ||
| 788 | |||
| 789 | return vi; | ||
| 790 | } | 783 | } |
| 791 | } | ||
| 792 | 784 | ||
| 793 | return NULL; | 785 | if (!atomic_inc_not_zero(&vma->vm_mm->mm_users)) |
| 794 | } | 786 | continue; |
| 795 | |||
| 796 | /* | ||
| 797 | * Iterate in the rmap prio tree and find a vma where a probe has not | ||
| 798 | * yet been inserted. | ||
| 799 | */ | ||
| 800 | static struct vma_info * | ||
| 801 | find_next_vma_info(struct address_space *mapping, struct list_head *head, | ||
| 802 | loff_t offset, bool is_register) | ||
| 803 | { | ||
| 804 | struct vma_info *vi, *retvi; | ||
| 805 | 787 | ||
| 806 | vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL); | 788 | info = prev; |
| 807 | if (!vi) | 789 | prev = prev->next; |
| 808 | return ERR_PTR(-ENOMEM); | 790 | info->next = curr; |
| 791 | curr = info; | ||
| 809 | 792 | ||
| 810 | mutex_lock(&mapping->i_mmap_mutex); | 793 | info->mm = vma->vm_mm; |
| 811 | retvi = __find_next_vma_info(mapping, head, vi, offset, is_register); | 794 | info->vaddr = vma_address(vma, offset); |
| 795 | } | ||
| 812 | mutex_unlock(&mapping->i_mmap_mutex); | 796 | mutex_unlock(&mapping->i_mmap_mutex); |
| 813 | 797 | ||
| 814 | if (!retvi) | 798 | if (!more) |
| 815 | kfree(vi); | 799 | goto out; |
| 800 | |||
| 801 | prev = curr; | ||
| 802 | while (curr) { | ||
| 803 | mmput(curr->mm); | ||
| 804 | curr = curr->next; | ||
| 805 | } | ||
| 816 | 806 | ||
| 817 | return retvi; | 807 | do { |
| 808 | info = kmalloc(sizeof(struct map_info), GFP_KERNEL); | ||
| 809 | if (!info) { | ||
| 810 | curr = ERR_PTR(-ENOMEM); | ||
| 811 | goto out; | ||
| 812 | } | ||
| 813 | info->next = prev; | ||
| 814 | prev = info; | ||
| 815 | } while (--more); | ||
| 816 | |||
| 817 | goto again; | ||
| 818 | out: | ||
| 819 | while (prev) | ||
| 820 | prev = free_map_info(prev); | ||
| 821 | return curr; | ||
| 818 | } | 822 | } |
| 819 | 823 | ||
| 820 | static int register_for_each_vma(struct uprobe *uprobe, bool is_register) | 824 | static int register_for_each_vma(struct uprobe *uprobe, bool is_register) |
| 821 | { | 825 | { |
| 822 | struct list_head try_list; | 826 | struct map_info *info; |
| 823 | struct vm_area_struct *vma; | 827 | int err = 0; |
| 824 | struct address_space *mapping; | ||
| 825 | struct vma_info *vi, *tmpvi; | ||
| 826 | struct mm_struct *mm; | ||
| 827 | loff_t vaddr; | ||
| 828 | int ret; | ||
| 829 | 828 | ||
| 830 | mapping = uprobe->inode->i_mapping; | 829 | info = build_map_info(uprobe->inode->i_mapping, |
| 831 | INIT_LIST_HEAD(&try_list); | 830 | uprobe->offset, is_register); |
| 831 | if (IS_ERR(info)) | ||
| 832 | return PTR_ERR(info); | ||
| 832 | 833 | ||
| 833 | ret = 0; | 834 | while (info) { |
| 835 | struct mm_struct *mm = info->mm; | ||
| 836 | struct vm_area_struct *vma; | ||
| 834 | 837 | ||
| 835 | for (;;) { | 838 | if (err) |
| 836 | vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register); | 839 | goto free; |
| 837 | if (!vi) | ||
| 838 | break; | ||
| 839 | 840 | ||
| 840 | if (IS_ERR(vi)) { | 841 | down_write(&mm->mmap_sem); |
| 841 | ret = PTR_ERR(vi); | 842 | vma = find_vma(mm, (unsigned long)info->vaddr); |
| 842 | break; | 843 | if (!vma || !valid_vma(vma, is_register)) |
| 843 | } | 844 | goto unlock; |
| 844 | 845 | ||
| 845 | mm = vi->mm; | ||
| 846 | down_read(&mm->mmap_sem); | ||
| 847 | vma = find_vma(mm, (unsigned long)vi->vaddr); | ||
| 848 | if (!vma || !valid_vma(vma, is_register)) { | ||
| 849 | list_del(&vi->probe_list); | ||
| 850 | kfree(vi); | ||
| 851 | up_read(&mm->mmap_sem); | ||
| 852 | mmput(mm); | ||
| 853 | continue; | ||
| 854 | } | ||
| 855 | vaddr = vma_address(vma, uprobe->offset); | ||
| 856 | if (vma->vm_file->f_mapping->host != uprobe->inode || | 846 | if (vma->vm_file->f_mapping->host != uprobe->inode || |
| 857 | vaddr != vi->vaddr) { | 847 | vma_address(vma, uprobe->offset) != info->vaddr) |
| 858 | list_del(&vi->probe_list); | 848 | goto unlock; |
| 859 | kfree(vi); | ||
| 860 | up_read(&mm->mmap_sem); | ||
| 861 | mmput(mm); | ||
| 862 | continue; | ||
| 863 | } | ||
| 864 | |||
| 865 | if (is_register) | ||
| 866 | ret = install_breakpoint(uprobe, mm, vma, vi->vaddr); | ||
| 867 | else | ||
| 868 | remove_breakpoint(uprobe, mm, vi->vaddr); | ||
| 869 | 849 | ||
| 870 | up_read(&mm->mmap_sem); | ||
| 871 | mmput(mm); | ||
| 872 | if (is_register) { | 850 | if (is_register) { |
| 873 | if (ret && ret == -EEXIST) | 851 | err = install_breakpoint(uprobe, mm, vma, info->vaddr); |
| 874 | ret = 0; | 852 | /* |
| 875 | if (ret) | 853 | * We can race against uprobe_mmap(), see the |
| 876 | break; | 854 | * comment near uprobe_hash(). |
| 855 | */ | ||
| 856 | if (err == -EEXIST) | ||
| 857 | err = 0; | ||
| 858 | } else { | ||
| 859 | remove_breakpoint(uprobe, mm, info->vaddr); | ||
| 877 | } | 860 | } |
| 861 | unlock: | ||
| 862 | up_write(&mm->mmap_sem); | ||
| 863 | free: | ||
| 864 | mmput(mm); | ||
| 865 | info = free_map_info(info); | ||
| 878 | } | 866 | } |
| 879 | 867 | ||
| 880 | list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) { | 868 | return err; |
| 881 | list_del(&vi->probe_list); | ||
| 882 | kfree(vi); | ||
| 883 | } | ||
| 884 | |||
| 885 | return ret; | ||
| 886 | } | 869 | } |
| 887 | 870 | ||
| 888 | static int __uprobe_register(struct uprobe *uprobe) | 871 | static int __uprobe_register(struct uprobe *uprobe) |
| @@ -1048,7 +1031,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head) | |||
| 1048 | int uprobe_mmap(struct vm_area_struct *vma) | 1031 | int uprobe_mmap(struct vm_area_struct *vma) |
| 1049 | { | 1032 | { |
| 1050 | struct list_head tmp_list; | 1033 | struct list_head tmp_list; |
| 1051 | struct uprobe *uprobe, *u; | 1034 | struct uprobe *uprobe; |
| 1052 | struct inode *inode; | 1035 | struct inode *inode; |
| 1053 | int ret, count; | 1036 | int ret, count; |
| 1054 | 1037 | ||
| @@ -1066,12 +1049,9 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
| 1066 | ret = 0; | 1049 | ret = 0; |
| 1067 | count = 0; | 1050 | count = 0; |
| 1068 | 1051 | ||
| 1069 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { | 1052 | list_for_each_entry(uprobe, &tmp_list, pending_list) { |
| 1070 | loff_t vaddr; | ||
| 1071 | |||
| 1072 | list_del(&uprobe->pending_list); | ||
| 1073 | if (!ret) { | 1053 | if (!ret) { |
| 1074 | vaddr = vma_address(vma, uprobe->offset); | 1054 | loff_t vaddr = vma_address(vma, uprobe->offset); |
| 1075 | 1055 | ||
| 1076 | if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { | 1056 | if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { |
| 1077 | put_uprobe(uprobe); | 1057 | put_uprobe(uprobe); |
| @@ -1079,8 +1059,10 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
| 1079 | } | 1059 | } |
| 1080 | 1060 | ||
| 1081 | ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); | 1061 | ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); |
| 1082 | 1062 | /* | |
| 1083 | /* Ignore double add: */ | 1063 | * We can race against uprobe_register(), see the |
| 1064 | * comment near uprobe_hash(). | ||
| 1065 | */ | ||
| 1084 | if (ret == -EEXIST) { | 1066 | if (ret == -EEXIST) { |
| 1085 | ret = 0; | 1067 | ret = 0; |
| 1086 | 1068 | ||
| @@ -1115,7 +1097,7 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
| 1115 | void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) | 1097 | void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) |
| 1116 | { | 1098 | { |
| 1117 | struct list_head tmp_list; | 1099 | struct list_head tmp_list; |
| 1118 | struct uprobe *uprobe, *u; | 1100 | struct uprobe *uprobe; |
| 1119 | struct inode *inode; | 1101 | struct inode *inode; |
| 1120 | 1102 | ||
| 1121 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) | 1103 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) |
| @@ -1132,11 +1114,8 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon | |||
| 1132 | mutex_lock(uprobes_mmap_hash(inode)); | 1114 | mutex_lock(uprobes_mmap_hash(inode)); |
| 1133 | build_probe_list(inode, &tmp_list); | 1115 | build_probe_list(inode, &tmp_list); |
| 1134 | 1116 | ||
| 1135 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { | 1117 | list_for_each_entry(uprobe, &tmp_list, pending_list) { |
| 1136 | loff_t vaddr; | 1118 | loff_t vaddr = vma_address(vma, uprobe->offset); |
| 1137 | |||
| 1138 | list_del(&uprobe->pending_list); | ||
| 1139 | vaddr = vma_address(vma, uprobe->offset); | ||
| 1140 | 1119 | ||
| 1141 | if (vaddr >= start && vaddr < end) { | 1120 | if (vaddr >= start && vaddr < end) { |
| 1142 | /* | 1121 | /* |
| @@ -1378,9 +1357,6 @@ void uprobe_free_utask(struct task_struct *t) | |||
| 1378 | { | 1357 | { |
| 1379 | struct uprobe_task *utask = t->utask; | 1358 | struct uprobe_task *utask = t->utask; |
| 1380 | 1359 | ||
| 1381 | if (t->uprobe_srcu_id != -1) | ||
| 1382 | srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id); | ||
| 1383 | |||
| 1384 | if (!utask) | 1360 | if (!utask) |
| 1385 | return; | 1361 | return; |
| 1386 | 1362 | ||
| @@ -1398,7 +1374,6 @@ void uprobe_free_utask(struct task_struct *t) | |||
| 1398 | void uprobe_copy_process(struct task_struct *t) | 1374 | void uprobe_copy_process(struct task_struct *t) |
| 1399 | { | 1375 | { |
| 1400 | t->utask = NULL; | 1376 | t->utask = NULL; |
| 1401 | t->uprobe_srcu_id = -1; | ||
| 1402 | } | 1377 | } |
| 1403 | 1378 | ||
| 1404 | /* | 1379 | /* |
| @@ -1417,7 +1392,6 @@ static struct uprobe_task *add_utask(void) | |||
| 1417 | if (unlikely(!utask)) | 1392 | if (unlikely(!utask)) |
| 1418 | return NULL; | 1393 | return NULL; |
| 1419 | 1394 | ||
| 1420 | utask->active_uprobe = NULL; | ||
| 1421 | current->utask = utask; | 1395 | current->utask = utask; |
| 1422 | return utask; | 1396 | return utask; |
| 1423 | } | 1397 | } |
| @@ -1479,41 +1453,64 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) | |||
| 1479 | return false; | 1453 | return false; |
| 1480 | } | 1454 | } |
| 1481 | 1455 | ||
| 1456 | static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) | ||
| 1457 | { | ||
| 1458 | struct mm_struct *mm = current->mm; | ||
| 1459 | struct uprobe *uprobe = NULL; | ||
| 1460 | struct vm_area_struct *vma; | ||
| 1461 | |||
| 1462 | down_read(&mm->mmap_sem); | ||
| 1463 | vma = find_vma(mm, bp_vaddr); | ||
| 1464 | if (vma && vma->vm_start <= bp_vaddr) { | ||
| 1465 | if (valid_vma(vma, false)) { | ||
| 1466 | struct inode *inode; | ||
| 1467 | loff_t offset; | ||
| 1468 | |||
| 1469 | inode = vma->vm_file->f_mapping->host; | ||
| 1470 | offset = bp_vaddr - vma->vm_start; | ||
| 1471 | offset += (vma->vm_pgoff << PAGE_SHIFT); | ||
| 1472 | uprobe = find_uprobe(inode, offset); | ||
| 1473 | } | ||
| 1474 | |||
| 1475 | if (!uprobe) | ||
| 1476 | *is_swbp = is_swbp_at_addr(mm, bp_vaddr); | ||
| 1477 | } else { | ||
| 1478 | *is_swbp = -EFAULT; | ||
| 1479 | } | ||
| 1480 | up_read(&mm->mmap_sem); | ||
| 1481 | |||
| 1482 | return uprobe; | ||
| 1483 | } | ||
| 1484 | |||
| 1482 | /* | 1485 | /* |
| 1483 | * Run handler and ask thread to singlestep. | 1486 | * Run handler and ask thread to singlestep. |
| 1484 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. | 1487 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. |
| 1485 | */ | 1488 | */ |
| 1486 | static void handle_swbp(struct pt_regs *regs) | 1489 | static void handle_swbp(struct pt_regs *regs) |
| 1487 | { | 1490 | { |
| 1488 | struct vm_area_struct *vma; | ||
| 1489 | struct uprobe_task *utask; | 1491 | struct uprobe_task *utask; |
| 1490 | struct uprobe *uprobe; | 1492 | struct uprobe *uprobe; |
| 1491 | struct mm_struct *mm; | ||
| 1492 | unsigned long bp_vaddr; | 1493 | unsigned long bp_vaddr; |
| 1494 | int uninitialized_var(is_swbp); | ||
| 1493 | 1495 | ||
| 1494 | uprobe = NULL; | ||
| 1495 | bp_vaddr = uprobe_get_swbp_addr(regs); | 1496 | bp_vaddr = uprobe_get_swbp_addr(regs); |
| 1496 | mm = current->mm; | 1497 | uprobe = find_active_uprobe(bp_vaddr, &is_swbp); |
| 1497 | down_read(&mm->mmap_sem); | ||
| 1498 | vma = find_vma(mm, bp_vaddr); | ||
| 1499 | |||
| 1500 | if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) { | ||
| 1501 | struct inode *inode; | ||
| 1502 | loff_t offset; | ||
| 1503 | |||
| 1504 | inode = vma->vm_file->f_mapping->host; | ||
| 1505 | offset = bp_vaddr - vma->vm_start; | ||
| 1506 | offset += (vma->vm_pgoff << PAGE_SHIFT); | ||
| 1507 | uprobe = find_uprobe(inode, offset); | ||
| 1508 | } | ||
| 1509 | |||
| 1510 | srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id); | ||
| 1511 | current->uprobe_srcu_id = -1; | ||
| 1512 | up_read(&mm->mmap_sem); | ||
| 1513 | 1498 | ||
| 1514 | if (!uprobe) { | 1499 | if (!uprobe) { |
| 1515 | /* No matching uprobe; signal SIGTRAP. */ | 1500 | if (is_swbp > 0) { |
| 1516 | send_sig(SIGTRAP, current, 0); | 1501 | /* No matching uprobe; signal SIGTRAP. */ |
| 1502 | send_sig(SIGTRAP, current, 0); | ||
| 1503 | } else { | ||
| 1504 | /* | ||
| 1505 | * Either we raced with uprobe_unregister() or we can't | ||
| 1506 | * access this memory. The latter is only possible if | ||
| 1507 | * another thread plays with our ->mm. In both cases | ||
| 1508 | * we can simply restart. If this vma was unmapped we | ||
| 1509 | * can pretend this insn was not executed yet and get | ||
| 1510 | * the (correct) SIGSEGV after restart. | ||
| 1511 | */ | ||
| 1512 | instruction_pointer_set(regs, bp_vaddr); | ||
| 1513 | } | ||
| 1517 | return; | 1514 | return; |
| 1518 | } | 1515 | } |
| 1519 | 1516 | ||
| @@ -1620,7 +1617,6 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs) | |||
| 1620 | utask->state = UTASK_BP_HIT; | 1617 | utask->state = UTASK_BP_HIT; |
| 1621 | 1618 | ||
| 1622 | set_thread_flag(TIF_UPROBE); | 1619 | set_thread_flag(TIF_UPROBE); |
| 1623 | current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu); | ||
| 1624 | 1620 | ||
| 1625 | return 1; | 1621 | return 1; |
| 1626 | } | 1622 | } |
| @@ -1655,7 +1651,6 @@ static int __init init_uprobes(void) | |||
| 1655 | mutex_init(&uprobes_mutex[i]); | 1651 | mutex_init(&uprobes_mutex[i]); |
| 1656 | mutex_init(&uprobes_mmap_mutex[i]); | 1652 | mutex_init(&uprobes_mmap_mutex[i]); |
| 1657 | } | 1653 | } |
| 1658 | init_srcu_struct(&uprobes_srcu); | ||
| 1659 | 1654 | ||
| 1660 | return register_die_notifier(&uprobe_exception_nb); | 1655 | return register_die_notifier(&uprobe_exception_nb); |
| 1661 | } | 1656 | } |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a008663d86c8..b4f20fba09fc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -312,7 +312,7 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list, | |||
| 312 | 312 | ||
| 313 | static int __register_ftrace_function(struct ftrace_ops *ops) | 313 | static int __register_ftrace_function(struct ftrace_ops *ops) |
| 314 | { | 314 | { |
| 315 | if (ftrace_disabled) | 315 | if (unlikely(ftrace_disabled)) |
| 316 | return -ENODEV; | 316 | return -ENODEV; |
| 317 | 317 | ||
| 318 | if (FTRACE_WARN_ON(ops == &global_ops)) | 318 | if (FTRACE_WARN_ON(ops == &global_ops)) |
| @@ -4299,16 +4299,12 @@ int register_ftrace_function(struct ftrace_ops *ops) | |||
| 4299 | 4299 | ||
| 4300 | mutex_lock(&ftrace_lock); | 4300 | mutex_lock(&ftrace_lock); |
| 4301 | 4301 | ||
| 4302 | if (unlikely(ftrace_disabled)) | ||
| 4303 | goto out_unlock; | ||
| 4304 | |||
| 4305 | ret = __register_ftrace_function(ops); | 4302 | ret = __register_ftrace_function(ops); |
| 4306 | if (!ret) | 4303 | if (!ret) |
| 4307 | ret = ftrace_startup(ops, 0); | 4304 | ret = ftrace_startup(ops, 0); |
| 4308 | 4305 | ||
| 4309 | |||
| 4310 | out_unlock: | ||
| 4311 | mutex_unlock(&ftrace_lock); | 4306 | mutex_unlock(&ftrace_lock); |
| 4307 | |||
| 4312 | return ret; | 4308 | return ret; |
| 4313 | } | 4309 | } |
| 4314 | EXPORT_SYMBOL_GPL(register_ftrace_function); | 4310 | EXPORT_SYMBOL_GPL(register_ftrace_function); |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f765465bffe4..49491fa7daa2 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -3239,6 +3239,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
| 3239 | if (cpu_buffer->commit_page == cpu_buffer->reader_page) | 3239 | if (cpu_buffer->commit_page == cpu_buffer->reader_page) |
| 3240 | goto out; | 3240 | goto out; |
| 3241 | 3241 | ||
| 3242 | /* Don't bother swapping if the ring buffer is empty */ | ||
| 3243 | if (rb_num_of_entries(cpu_buffer) == 0) | ||
| 3244 | goto out; | ||
| 3245 | |||
| 3242 | /* | 3246 | /* |
| 3243 | * Reset the reader page to size zero. | 3247 | * Reset the reader page to size zero. |
| 3244 | */ | 3248 | */ |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a7fa0702be1c..a120f98c4112 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -830,6 +830,8 @@ int register_tracer(struct tracer *type) | |||
| 830 | current_trace = saved_tracer; | 830 | current_trace = saved_tracer; |
| 831 | if (ret) { | 831 | if (ret) { |
| 832 | printk(KERN_CONT "FAILED!\n"); | 832 | printk(KERN_CONT "FAILED!\n"); |
| 833 | /* Add the warning after printing 'FAILED' */ | ||
| 834 | WARN_ON(1); | ||
| 833 | goto out; | 835 | goto out; |
| 834 | } | 836 | } |
| 835 | /* Only reset on passing, to avoid touching corrupted buffers */ | 837 | /* Only reset on passing, to avoid touching corrupted buffers */ |
| @@ -1708,9 +1710,11 @@ EXPORT_SYMBOL_GPL(trace_vprintk); | |||
| 1708 | 1710 | ||
| 1709 | static void trace_iterator_increment(struct trace_iterator *iter) | 1711 | static void trace_iterator_increment(struct trace_iterator *iter) |
| 1710 | { | 1712 | { |
| 1713 | struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); | ||
| 1714 | |||
| 1711 | iter->idx++; | 1715 | iter->idx++; |
| 1712 | if (iter->buffer_iter[iter->cpu]) | 1716 | if (buf_iter) |
| 1713 | ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); | 1717 | ring_buffer_read(buf_iter, NULL); |
| 1714 | } | 1718 | } |
| 1715 | 1719 | ||
| 1716 | static struct trace_entry * | 1720 | static struct trace_entry * |
| @@ -1718,7 +1722,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, | |||
| 1718 | unsigned long *lost_events) | 1722 | unsigned long *lost_events) |
| 1719 | { | 1723 | { |
| 1720 | struct ring_buffer_event *event; | 1724 | struct ring_buffer_event *event; |
| 1721 | struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; | 1725 | struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); |
| 1722 | 1726 | ||
| 1723 | if (buf_iter) | 1727 | if (buf_iter) |
| 1724 | event = ring_buffer_iter_peek(buf_iter, ts); | 1728 | event = ring_buffer_iter_peek(buf_iter, ts); |
| @@ -1856,10 +1860,10 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) | |||
| 1856 | 1860 | ||
| 1857 | tr->data[cpu]->skipped_entries = 0; | 1861 | tr->data[cpu]->skipped_entries = 0; |
| 1858 | 1862 | ||
| 1859 | if (!iter->buffer_iter[cpu]) | 1863 | buf_iter = trace_buffer_iter(iter, cpu); |
| 1864 | if (!buf_iter) | ||
| 1860 | return; | 1865 | return; |
| 1861 | 1866 | ||
| 1862 | buf_iter = iter->buffer_iter[cpu]; | ||
| 1863 | ring_buffer_iter_reset(buf_iter); | 1867 | ring_buffer_iter_reset(buf_iter); |
| 1864 | 1868 | ||
| 1865 | /* | 1869 | /* |
| @@ -2205,13 +2209,15 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) | |||
| 2205 | 2209 | ||
| 2206 | int trace_empty(struct trace_iterator *iter) | 2210 | int trace_empty(struct trace_iterator *iter) |
| 2207 | { | 2211 | { |
| 2212 | struct ring_buffer_iter *buf_iter; | ||
| 2208 | int cpu; | 2213 | int cpu; |
| 2209 | 2214 | ||
| 2210 | /* If we are looking at one CPU buffer, only check that one */ | 2215 | /* If we are looking at one CPU buffer, only check that one */ |
| 2211 | if (iter->cpu_file != TRACE_PIPE_ALL_CPU) { | 2216 | if (iter->cpu_file != TRACE_PIPE_ALL_CPU) { |
| 2212 | cpu = iter->cpu_file; | 2217 | cpu = iter->cpu_file; |
| 2213 | if (iter->buffer_iter[cpu]) { | 2218 | buf_iter = trace_buffer_iter(iter, cpu); |
| 2214 | if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) | 2219 | if (buf_iter) { |
| 2220 | if (!ring_buffer_iter_empty(buf_iter)) | ||
| 2215 | return 0; | 2221 | return 0; |
| 2216 | } else { | 2222 | } else { |
| 2217 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) | 2223 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) |
| @@ -2221,8 +2227,9 @@ int trace_empty(struct trace_iterator *iter) | |||
| 2221 | } | 2227 | } |
| 2222 | 2228 | ||
| 2223 | for_each_tracing_cpu(cpu) { | 2229 | for_each_tracing_cpu(cpu) { |
| 2224 | if (iter->buffer_iter[cpu]) { | 2230 | buf_iter = trace_buffer_iter(iter, cpu); |
| 2225 | if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) | 2231 | if (buf_iter) { |
| 2232 | if (!ring_buffer_iter_empty(buf_iter)) | ||
| 2226 | return 0; | 2233 | return 0; |
| 2227 | } else { | 2234 | } else { |
| 2228 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) | 2235 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) |
| @@ -2381,6 +2388,11 @@ __tracing_open(struct inode *inode, struct file *file) | |||
| 2381 | if (!iter) | 2388 | if (!iter) |
| 2382 | return ERR_PTR(-ENOMEM); | 2389 | return ERR_PTR(-ENOMEM); |
| 2383 | 2390 | ||
| 2391 | iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(), | ||
| 2392 | GFP_KERNEL); | ||
| 2393 | if (!iter->buffer_iter) | ||
| 2394 | goto release; | ||
| 2395 | |||
| 2384 | /* | 2396 | /* |
| 2385 | * We make a copy of the current tracer to avoid concurrent | 2397 | * We make a copy of the current tracer to avoid concurrent |
| 2386 | * changes on it while we are reading. | 2398 | * changes on it while we are reading. |
| @@ -2441,6 +2453,8 @@ __tracing_open(struct inode *inode, struct file *file) | |||
| 2441 | fail: | 2453 | fail: |
| 2442 | mutex_unlock(&trace_types_lock); | 2454 | mutex_unlock(&trace_types_lock); |
| 2443 | kfree(iter->trace); | 2455 | kfree(iter->trace); |
| 2456 | kfree(iter->buffer_iter); | ||
| 2457 | release: | ||
| 2444 | seq_release_private(inode, file); | 2458 | seq_release_private(inode, file); |
| 2445 | return ERR_PTR(-ENOMEM); | 2459 | return ERR_PTR(-ENOMEM); |
| 2446 | } | 2460 | } |
| @@ -2481,6 +2495,7 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
| 2481 | mutex_destroy(&iter->mutex); | 2495 | mutex_destroy(&iter->mutex); |
| 2482 | free_cpumask_var(iter->started); | 2496 | free_cpumask_var(iter->started); |
| 2483 | kfree(iter->trace); | 2497 | kfree(iter->trace); |
| 2498 | kfree(iter->buffer_iter); | ||
| 2484 | seq_release_private(inode, file); | 2499 | seq_release_private(inode, file); |
| 2485 | return 0; | 2500 | return 0; |
| 2486 | } | 2501 | } |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5aec220d2de0..55e1f7f0db12 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -317,6 +317,14 @@ struct tracer { | |||
| 317 | 317 | ||
| 318 | #define TRACE_PIPE_ALL_CPU -1 | 318 | #define TRACE_PIPE_ALL_CPU -1 |
| 319 | 319 | ||
| 320 | static inline struct ring_buffer_iter * | ||
| 321 | trace_buffer_iter(struct trace_iterator *iter, int cpu) | ||
| 322 | { | ||
| 323 | if (iter->buffer_iter && iter->buffer_iter[cpu]) | ||
| 324 | return iter->buffer_iter[cpu]; | ||
| 325 | return NULL; | ||
| 326 | } | ||
| 327 | |||
| 320 | int tracer_init(struct tracer *t, struct trace_array *tr); | 328 | int tracer_init(struct tracer *t, struct trace_array *tr); |
| 321 | int tracing_is_enabled(void); | 329 | int tracing_is_enabled(void); |
| 322 | void trace_wake_up(void); | 330 | void trace_wake_up(void); |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a7d2a4c653d8..ce27c8ba8d31 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
| @@ -538,7 +538,7 @@ get_return_for_leaf(struct trace_iterator *iter, | |||
| 538 | next = &data->ret; | 538 | next = &data->ret; |
| 539 | } else { | 539 | } else { |
| 540 | 540 | ||
| 541 | ring_iter = iter->buffer_iter[iter->cpu]; | 541 | ring_iter = trace_buffer_iter(iter, iter->cpu); |
| 542 | 542 | ||
| 543 | /* First peek to compare current entry and the next one */ | 543 | /* First peek to compare current entry and the next one */ |
| 544 | if (ring_iter) | 544 | if (ring_iter) |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index df611a0e76c5..123b189c732c 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
| @@ -1325,4 +1325,4 @@ __init static int init_events(void) | |||
| 1325 | 1325 | ||
| 1326 | return 0; | 1326 | return 0; |
| 1327 | } | 1327 | } |
| 1328 | device_initcall(init_events); | 1328 | early_initcall(init_events); |
