diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-22 14:10:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-22 14:10:36 -0400 |
commit | 2eafeb6a415864bc4c59df79151cf40f6ac74b9e (patch) | |
tree | 331ee730275276aebbda5dd278a97c941067d5fd /kernel | |
parent | 16d286e656250859946786de0df0fb01f8f241bc (diff) | |
parent | 6e0f17be0361444862637e8986c8c1a3b3f8dcf8 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events changes from Ingo Molnar:
"- kernel side:
- Intel uncore PMU support for Nehalem and Sandy Bridge CPUs, we
support both the events available via the MSR and via the PCI
access space.
- various uprobes cleanups and restructurings
- PMU driver quirks by microcode version and required x86 microcode
loader cleanups/robustization
- various tracing robustness updates
- static keys: remove obsolete static_branch()
- tooling side:
- GTK browser improvements
- perf report browser: support screenshots to file
- more automated tests
- perf kvm improvements
- perf bench refinements
- build environment improvements
- pipe mode improvements
- libtraceevent updates, we have now hopefully merged most bits with
the out of tree forked code base
... and many other goodies."
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (138 commits)
tracing: Check for allocation failure in __tracing_open()
perf/x86: Fix intel_perfmon_event_mapformatting
jump label: Remove static_branch()
tracepoint: Use static_key_false(), since static_branch() is deprecated
perf/x86: Uncore filter support for SandyBridge-EP
perf/x86: Detect number of instances of uncore CBox
perf/x86: Fix event constraint for SandyBridge-EP C-Box
perf/x86: Use 0xff as pseudo code for fixed uncore event
perf/x86: Save a few bytes in 'struct x86_pmu'
perf/x86: Add a microcode revision check for SNB-PEBS
perf/x86: Improve debug output in check_hw_exists()
perf/x86/amd: Unify AMD's generic and family 15h pmus
perf/x86: Move Intel specific code to intel_pmu_init()
perf/x86: Rename Intel specific macros
perf/x86: Fix USER/KERNEL tagging of samples
perf tools: Split event symbols arrays to hw and sw parts
perf tools: Split out PE_VALUE_SYM parsing token to SW and HW tokens
perf tools: Add empty rule for new line in event syntax parsing
perf test: Use ARRAY_SIZE in parse events tests
tools lib traceevent: Cleanup realloc use
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 49 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 461 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 8 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace.c | 33 | ||||
-rw-r--r-- | kernel/trace/trace.h | 8 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 2 |
8 files changed, 313 insertions, 254 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index d7d71d6ec972..f1cf0edeb39a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -1645,6 +1645,8 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
1645 | lockdep_assert_held(&ctx->mutex); | 1645 | lockdep_assert_held(&ctx->mutex); |
1646 | 1646 | ||
1647 | event->ctx = ctx; | 1647 | event->ctx = ctx; |
1648 | if (event->cpu != -1) | ||
1649 | event->cpu = cpu; | ||
1648 | 1650 | ||
1649 | if (!task) { | 1651 | if (!task) { |
1650 | /* | 1652 | /* |
@@ -6252,6 +6254,8 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6252 | } | 6254 | } |
6253 | } | 6255 | } |
6254 | 6256 | ||
6257 | get_online_cpus(); | ||
6258 | |||
6255 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, | 6259 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, |
6256 | NULL, NULL); | 6260 | NULL, NULL); |
6257 | if (IS_ERR(event)) { | 6261 | if (IS_ERR(event)) { |
@@ -6304,7 +6308,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6304 | /* | 6308 | /* |
6305 | * Get the target context (task or percpu): | 6309 | * Get the target context (task or percpu): |
6306 | */ | 6310 | */ |
6307 | ctx = find_get_context(pmu, task, cpu); | 6311 | ctx = find_get_context(pmu, task, event->cpu); |
6308 | if (IS_ERR(ctx)) { | 6312 | if (IS_ERR(ctx)) { |
6309 | err = PTR_ERR(ctx); | 6313 | err = PTR_ERR(ctx); |
6310 | goto err_alloc; | 6314 | goto err_alloc; |
@@ -6377,20 +6381,23 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6377 | mutex_lock(&ctx->mutex); | 6381 | mutex_lock(&ctx->mutex); |
6378 | 6382 | ||
6379 | if (move_group) { | 6383 | if (move_group) { |
6380 | perf_install_in_context(ctx, group_leader, cpu); | 6384 | synchronize_rcu(); |
6385 | perf_install_in_context(ctx, group_leader, event->cpu); | ||
6381 | get_ctx(ctx); | 6386 | get_ctx(ctx); |
6382 | list_for_each_entry(sibling, &group_leader->sibling_list, | 6387 | list_for_each_entry(sibling, &group_leader->sibling_list, |
6383 | group_entry) { | 6388 | group_entry) { |
6384 | perf_install_in_context(ctx, sibling, cpu); | 6389 | perf_install_in_context(ctx, sibling, event->cpu); |
6385 | get_ctx(ctx); | 6390 | get_ctx(ctx); |
6386 | } | 6391 | } |
6387 | } | 6392 | } |
6388 | 6393 | ||
6389 | perf_install_in_context(ctx, event, cpu); | 6394 | perf_install_in_context(ctx, event, event->cpu); |
6390 | ++ctx->generation; | 6395 | ++ctx->generation; |
6391 | perf_unpin_context(ctx); | 6396 | perf_unpin_context(ctx); |
6392 | mutex_unlock(&ctx->mutex); | 6397 | mutex_unlock(&ctx->mutex); |
6393 | 6398 | ||
6399 | put_online_cpus(); | ||
6400 | |||
6394 | event->owner = current; | 6401 | event->owner = current; |
6395 | 6402 | ||
6396 | mutex_lock(¤t->perf_event_mutex); | 6403 | mutex_lock(¤t->perf_event_mutex); |
@@ -6419,6 +6426,7 @@ err_context: | |||
6419 | err_alloc: | 6426 | err_alloc: |
6420 | free_event(event); | 6427 | free_event(event); |
6421 | err_task: | 6428 | err_task: |
6429 | put_online_cpus(); | ||
6422 | if (task) | 6430 | if (task) |
6423 | put_task_struct(task); | 6431 | put_task_struct(task); |
6424 | err_group_fd: | 6432 | err_group_fd: |
@@ -6479,6 +6487,39 @@ err: | |||
6479 | } | 6487 | } |
6480 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); | 6488 | EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); |
6481 | 6489 | ||
6490 | void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | ||
6491 | { | ||
6492 | struct perf_event_context *src_ctx; | ||
6493 | struct perf_event_context *dst_ctx; | ||
6494 | struct perf_event *event, *tmp; | ||
6495 | LIST_HEAD(events); | ||
6496 | |||
6497 | src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; | ||
6498 | dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; | ||
6499 | |||
6500 | mutex_lock(&src_ctx->mutex); | ||
6501 | list_for_each_entry_safe(event, tmp, &src_ctx->event_list, | ||
6502 | event_entry) { | ||
6503 | perf_remove_from_context(event); | ||
6504 | put_ctx(src_ctx); | ||
6505 | list_add(&event->event_entry, &events); | ||
6506 | } | ||
6507 | mutex_unlock(&src_ctx->mutex); | ||
6508 | |||
6509 | synchronize_rcu(); | ||
6510 | |||
6511 | mutex_lock(&dst_ctx->mutex); | ||
6512 | list_for_each_entry_safe(event, tmp, &events, event_entry) { | ||
6513 | list_del(&event->event_entry); | ||
6514 | if (event->state >= PERF_EVENT_STATE_OFF) | ||
6515 | event->state = PERF_EVENT_STATE_INACTIVE; | ||
6516 | perf_install_in_context(dst_ctx, event, dst_cpu); | ||
6517 | get_ctx(dst_ctx); | ||
6518 | } | ||
6519 | mutex_unlock(&dst_ctx->mutex); | ||
6520 | } | ||
6521 | EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); | ||
6522 | |||
6482 | static void sync_child_event(struct perf_event *child_event, | 6523 | static void sync_child_event(struct perf_event *child_event, |
6483 | struct task_struct *child) | 6524 | struct task_struct *child) |
6484 | { | 6525 | { |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 985be4d80fe8..f93532748bca 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -38,13 +38,29 @@ | |||
38 | #define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) | 38 | #define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) |
39 | #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE | 39 | #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE |
40 | 40 | ||
41 | static struct srcu_struct uprobes_srcu; | ||
42 | static struct rb_root uprobes_tree = RB_ROOT; | 41 | static struct rb_root uprobes_tree = RB_ROOT; |
43 | 42 | ||
44 | static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ | 43 | static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ |
45 | 44 | ||
46 | #define UPROBES_HASH_SZ 13 | 45 | #define UPROBES_HASH_SZ 13 |
47 | 46 | ||
47 | /* | ||
48 | * We need separate register/unregister and mmap/munmap lock hashes because | ||
49 | * of mmap_sem nesting. | ||
50 | * | ||
51 | * uprobe_register() needs to install probes on (potentially) all processes | ||
52 | * and thus needs to acquire multiple mmap_sems (consequtively, not | ||
53 | * concurrently), whereas uprobe_mmap() is called while holding mmap_sem | ||
54 | * for the particular process doing the mmap. | ||
55 | * | ||
56 | * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem | ||
57 | * because of lock order against i_mmap_mutex. This means there's a hole in | ||
58 | * the register vma iteration where a mmap() can happen. | ||
59 | * | ||
60 | * Thus uprobe_register() can race with uprobe_mmap() and we can try and | ||
61 | * install a probe where one is already installed. | ||
62 | */ | ||
63 | |||
48 | /* serialize (un)register */ | 64 | /* serialize (un)register */ |
49 | static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; | 65 | static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; |
50 | 66 | ||
@@ -61,17 +77,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; | |||
61 | */ | 77 | */ |
62 | static atomic_t uprobe_events = ATOMIC_INIT(0); | 78 | static atomic_t uprobe_events = ATOMIC_INIT(0); |
63 | 79 | ||
64 | /* | ||
65 | * Maintain a temporary per vma info that can be used to search if a vma | ||
66 | * has already been handled. This structure is introduced since extending | ||
67 | * vm_area_struct wasnt recommended. | ||
68 | */ | ||
69 | struct vma_info { | ||
70 | struct list_head probe_list; | ||
71 | struct mm_struct *mm; | ||
72 | loff_t vaddr; | ||
73 | }; | ||
74 | |||
75 | struct uprobe { | 80 | struct uprobe { |
76 | struct rb_node rb_node; /* node in the rb tree */ | 81 | struct rb_node rb_node; /* node in the rb tree */ |
77 | atomic_t ref; | 82 | atomic_t ref; |
@@ -100,7 +105,8 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register) | |||
100 | if (!is_register) | 105 | if (!is_register) |
101 | return true; | 106 | return true; |
102 | 107 | ||
103 | if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC)) | 108 | if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) |
109 | == (VM_READ|VM_EXEC)) | ||
104 | return true; | 110 | return true; |
105 | 111 | ||
106 | return false; | 112 | return false; |
@@ -129,33 +135,17 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) | |||
129 | static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) | 135 | static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) |
130 | { | 136 | { |
131 | struct mm_struct *mm = vma->vm_mm; | 137 | struct mm_struct *mm = vma->vm_mm; |
132 | pgd_t *pgd; | ||
133 | pud_t *pud; | ||
134 | pmd_t *pmd; | ||
135 | pte_t *ptep; | ||
136 | spinlock_t *ptl; | ||
137 | unsigned long addr; | 138 | unsigned long addr; |
138 | int err = -EFAULT; | 139 | spinlock_t *ptl; |
140 | pte_t *ptep; | ||
139 | 141 | ||
140 | addr = page_address_in_vma(page, vma); | 142 | addr = page_address_in_vma(page, vma); |
141 | if (addr == -EFAULT) | 143 | if (addr == -EFAULT) |
142 | goto out; | 144 | return -EFAULT; |
143 | |||
144 | pgd = pgd_offset(mm, addr); | ||
145 | if (!pgd_present(*pgd)) | ||
146 | goto out; | ||
147 | |||
148 | pud = pud_offset(pgd, addr); | ||
149 | if (!pud_present(*pud)) | ||
150 | goto out; | ||
151 | |||
152 | pmd = pmd_offset(pud, addr); | ||
153 | if (!pmd_present(*pmd)) | ||
154 | goto out; | ||
155 | 145 | ||
156 | ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); | 146 | ptep = page_check_address(page, mm, addr, &ptl, 0); |
157 | if (!ptep) | 147 | if (!ptep) |
158 | goto out; | 148 | return -EAGAIN; |
159 | 149 | ||
160 | get_page(kpage); | 150 | get_page(kpage); |
161 | page_add_new_anon_rmap(kpage, vma, addr); | 151 | page_add_new_anon_rmap(kpage, vma, addr); |
@@ -174,10 +164,8 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct | |||
174 | try_to_free_swap(page); | 164 | try_to_free_swap(page); |
175 | put_page(page); | 165 | put_page(page); |
176 | pte_unmap_unlock(ptep, ptl); | 166 | pte_unmap_unlock(ptep, ptl); |
177 | err = 0; | ||
178 | 167 | ||
179 | out: | 168 | return 0; |
180 | return err; | ||
181 | } | 169 | } |
182 | 170 | ||
183 | /** | 171 | /** |
@@ -222,9 +210,8 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
222 | void *vaddr_old, *vaddr_new; | 210 | void *vaddr_old, *vaddr_new; |
223 | struct vm_area_struct *vma; | 211 | struct vm_area_struct *vma; |
224 | struct uprobe *uprobe; | 212 | struct uprobe *uprobe; |
225 | loff_t addr; | ||
226 | int ret; | 213 | int ret; |
227 | 214 | retry: | |
228 | /* Read the page with vaddr into memory */ | 215 | /* Read the page with vaddr into memory */ |
229 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); | 216 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); |
230 | if (ret <= 0) | 217 | if (ret <= 0) |
@@ -246,10 +233,6 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
246 | if (mapping != vma->vm_file->f_mapping) | 233 | if (mapping != vma->vm_file->f_mapping) |
247 | goto put_out; | 234 | goto put_out; |
248 | 235 | ||
249 | addr = vma_address(vma, uprobe->offset); | ||
250 | if (vaddr != (unsigned long)addr) | ||
251 | goto put_out; | ||
252 | |||
253 | ret = -ENOMEM; | 236 | ret = -ENOMEM; |
254 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); | 237 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); |
255 | if (!new_page) | 238 | if (!new_page) |
@@ -267,11 +250,7 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
267 | vaddr_new = kmap_atomic(new_page); | 250 | vaddr_new = kmap_atomic(new_page); |
268 | 251 | ||
269 | memcpy(vaddr_new, vaddr_old, PAGE_SIZE); | 252 | memcpy(vaddr_new, vaddr_old, PAGE_SIZE); |
270 | 253 | memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE); | |
271 | /* poke the new insn in, ASSUMES we don't cross page boundary */ | ||
272 | vaddr &= ~PAGE_MASK; | ||
273 | BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); | ||
274 | memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); | ||
275 | 254 | ||
276 | kunmap_atomic(vaddr_new); | 255 | kunmap_atomic(vaddr_new); |
277 | kunmap_atomic(vaddr_old); | 256 | kunmap_atomic(vaddr_old); |
@@ -291,6 +270,8 @@ unlock_out: | |||
291 | put_out: | 270 | put_out: |
292 | put_page(old_page); | 271 | put_page(old_page); |
293 | 272 | ||
273 | if (unlikely(ret == -EAGAIN)) | ||
274 | goto retry; | ||
294 | return ret; | 275 | return ret; |
295 | } | 276 | } |
296 | 277 | ||
@@ -312,7 +293,7 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_ | |||
312 | void *vaddr_new; | 293 | void *vaddr_new; |
313 | int ret; | 294 | int ret; |
314 | 295 | ||
315 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL); | 296 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); |
316 | if (ret <= 0) | 297 | if (ret <= 0) |
317 | return ret; | 298 | return ret; |
318 | 299 | ||
@@ -333,10 +314,20 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) | |||
333 | uprobe_opcode_t opcode; | 314 | uprobe_opcode_t opcode; |
334 | int result; | 315 | int result; |
335 | 316 | ||
317 | if (current->mm == mm) { | ||
318 | pagefault_disable(); | ||
319 | result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr, | ||
320 | sizeof(opcode)); | ||
321 | pagefault_enable(); | ||
322 | |||
323 | if (likely(result == 0)) | ||
324 | goto out; | ||
325 | } | ||
326 | |||
336 | result = read_opcode(mm, vaddr, &opcode); | 327 | result = read_opcode(mm, vaddr, &opcode); |
337 | if (result) | 328 | if (result) |
338 | return result; | 329 | return result; |
339 | 330 | out: | |
340 | if (is_swbp_insn(&opcode)) | 331 | if (is_swbp_insn(&opcode)) |
341 | return 1; | 332 | return 1; |
342 | 333 | ||
@@ -355,7 +346,9 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) | |||
355 | int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) | 346 | int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) |
356 | { | 347 | { |
357 | int result; | 348 | int result; |
358 | 349 | /* | |
350 | * See the comment near uprobes_hash(). | ||
351 | */ | ||
359 | result = is_swbp_at_addr(mm, vaddr); | 352 | result = is_swbp_at_addr(mm, vaddr); |
360 | if (result == 1) | 353 | if (result == 1) |
361 | return -EEXIST; | 354 | return -EEXIST; |
@@ -520,7 +513,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) | |||
520 | uprobe->inode = igrab(inode); | 513 | uprobe->inode = igrab(inode); |
521 | uprobe->offset = offset; | 514 | uprobe->offset = offset; |
522 | init_rwsem(&uprobe->consumer_rwsem); | 515 | init_rwsem(&uprobe->consumer_rwsem); |
523 | INIT_LIST_HEAD(&uprobe->pending_list); | ||
524 | 516 | ||
525 | /* add to uprobes_tree, sorted on inode:offset */ | 517 | /* add to uprobes_tree, sorted on inode:offset */ |
526 | cur_uprobe = insert_uprobe(uprobe); | 518 | cur_uprobe = insert_uprobe(uprobe); |
@@ -588,20 +580,22 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) | |||
588 | } | 580 | } |
589 | 581 | ||
590 | static int | 582 | static int |
591 | __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn, | 583 | __copy_insn(struct address_space *mapping, struct file *filp, char *insn, |
592 | unsigned long nbytes, unsigned long offset) | 584 | unsigned long nbytes, loff_t offset) |
593 | { | 585 | { |
594 | struct file *filp = vma->vm_file; | ||
595 | struct page *page; | 586 | struct page *page; |
596 | void *vaddr; | 587 | void *vaddr; |
597 | unsigned long off1; | 588 | unsigned long off; |
598 | unsigned long idx; | 589 | pgoff_t idx; |
599 | 590 | ||
600 | if (!filp) | 591 | if (!filp) |
601 | return -EINVAL; | 592 | return -EINVAL; |
602 | 593 | ||
603 | idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT); | 594 | if (!mapping->a_ops->readpage) |
604 | off1 = offset &= ~PAGE_MASK; | 595 | return -EIO; |
596 | |||
597 | idx = offset >> PAGE_CACHE_SHIFT; | ||
598 | off = offset & ~PAGE_MASK; | ||
605 | 599 | ||
606 | /* | 600 | /* |
607 | * Ensure that the page that has the original instruction is | 601 | * Ensure that the page that has the original instruction is |
@@ -612,22 +606,20 @@ __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *ins | |||
612 | return PTR_ERR(page); | 606 | return PTR_ERR(page); |
613 | 607 | ||
614 | vaddr = kmap_atomic(page); | 608 | vaddr = kmap_atomic(page); |
615 | memcpy(insn, vaddr + off1, nbytes); | 609 | memcpy(insn, vaddr + off, nbytes); |
616 | kunmap_atomic(vaddr); | 610 | kunmap_atomic(vaddr); |
617 | page_cache_release(page); | 611 | page_cache_release(page); |
618 | 612 | ||
619 | return 0; | 613 | return 0; |
620 | } | 614 | } |
621 | 615 | ||
622 | static int | 616 | static int copy_insn(struct uprobe *uprobe, struct file *filp) |
623 | copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) | ||
624 | { | 617 | { |
625 | struct address_space *mapping; | 618 | struct address_space *mapping; |
626 | unsigned long nbytes; | 619 | unsigned long nbytes; |
627 | int bytes; | 620 | int bytes; |
628 | 621 | ||
629 | addr &= ~PAGE_MASK; | 622 | nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK); |
630 | nbytes = PAGE_SIZE - addr; | ||
631 | mapping = uprobe->inode->i_mapping; | 623 | mapping = uprobe->inode->i_mapping; |
632 | 624 | ||
633 | /* Instruction at end of binary; copy only available bytes */ | 625 | /* Instruction at end of binary; copy only available bytes */ |
@@ -638,13 +630,13 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) | |||
638 | 630 | ||
639 | /* Instruction at the page-boundary; copy bytes in second page */ | 631 | /* Instruction at the page-boundary; copy bytes in second page */ |
640 | if (nbytes < bytes) { | 632 | if (nbytes < bytes) { |
641 | if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes, | 633 | int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, |
642 | bytes - nbytes, uprobe->offset + nbytes)) | 634 | bytes - nbytes, uprobe->offset + nbytes); |
643 | return -ENOMEM; | 635 | if (err) |
644 | 636 | return err; | |
645 | bytes = nbytes; | 637 | bytes = nbytes; |
646 | } | 638 | } |
647 | return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset); | 639 | return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); |
648 | } | 640 | } |
649 | 641 | ||
650 | /* | 642 | /* |
@@ -672,9 +664,8 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) | |||
672 | */ | 664 | */ |
673 | static int | 665 | static int |
674 | install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | 666 | install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, |
675 | struct vm_area_struct *vma, loff_t vaddr) | 667 | struct vm_area_struct *vma, unsigned long vaddr) |
676 | { | 668 | { |
677 | unsigned long addr; | ||
678 | int ret; | 669 | int ret; |
679 | 670 | ||
680 | /* | 671 | /* |
@@ -687,20 +678,22 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | |||
687 | if (!uprobe->consumers) | 678 | if (!uprobe->consumers) |
688 | return -EEXIST; | 679 | return -EEXIST; |
689 | 680 | ||
690 | addr = (unsigned long)vaddr; | ||
691 | |||
692 | if (!(uprobe->flags & UPROBE_COPY_INSN)) { | 681 | if (!(uprobe->flags & UPROBE_COPY_INSN)) { |
693 | ret = copy_insn(uprobe, vma, addr); | 682 | ret = copy_insn(uprobe, vma->vm_file); |
694 | if (ret) | 683 | if (ret) |
695 | return ret; | 684 | return ret; |
696 | 685 | ||
697 | if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) | 686 | if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) |
698 | return -EEXIST; | 687 | return -ENOTSUPP; |
699 | 688 | ||
700 | ret = arch_uprobe_analyze_insn(&uprobe->arch, mm); | 689 | ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); |
701 | if (ret) | 690 | if (ret) |
702 | return ret; | 691 | return ret; |
703 | 692 | ||
693 | /* write_opcode() assumes we don't cross page boundary */ | ||
694 | BUG_ON((uprobe->offset & ~PAGE_MASK) + | ||
695 | UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); | ||
696 | |||
704 | uprobe->flags |= UPROBE_COPY_INSN; | 697 | uprobe->flags |= UPROBE_COPY_INSN; |
705 | } | 698 | } |
706 | 699 | ||
@@ -713,7 +706,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | |||
713 | * Hence increment before and decrement on failure. | 706 | * Hence increment before and decrement on failure. |
714 | */ | 707 | */ |
715 | atomic_inc(&mm->uprobes_state.count); | 708 | atomic_inc(&mm->uprobes_state.count); |
716 | ret = set_swbp(&uprobe->arch, mm, addr); | 709 | ret = set_swbp(&uprobe->arch, mm, vaddr); |
717 | if (ret) | 710 | if (ret) |
718 | atomic_dec(&mm->uprobes_state.count); | 711 | atomic_dec(&mm->uprobes_state.count); |
719 | 712 | ||
@@ -721,27 +714,21 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | |||
721 | } | 714 | } |
722 | 715 | ||
723 | static void | 716 | static void |
724 | remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) | 717 | remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) |
725 | { | 718 | { |
726 | if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true)) | 719 | if (!set_orig_insn(&uprobe->arch, mm, vaddr, true)) |
727 | atomic_dec(&mm->uprobes_state.count); | 720 | atomic_dec(&mm->uprobes_state.count); |
728 | } | 721 | } |
729 | 722 | ||
730 | /* | 723 | /* |
731 | * There could be threads that have hit the breakpoint and are entering the | 724 | * There could be threads that have already hit the breakpoint. They |
732 | * notifier code and trying to acquire the uprobes_treelock. The thread | 725 | * will recheck the current insn and restart if find_uprobe() fails. |
733 | * calling delete_uprobe() that is removing the uprobe from the rb_tree can | 726 | * See find_active_uprobe(). |
734 | * race with these threads and might acquire the uprobes_treelock compared | ||
735 | * to some of the breakpoint hit threads. In such a case, the breakpoint | ||
736 | * hit threads will not find the uprobe. The current unregistering thread | ||
737 | * waits till all other threads have hit a breakpoint, to acquire the | ||
738 | * uprobes_treelock before the uprobe is removed from the rbtree. | ||
739 | */ | 727 | */ |
740 | static void delete_uprobe(struct uprobe *uprobe) | 728 | static void delete_uprobe(struct uprobe *uprobe) |
741 | { | 729 | { |
742 | unsigned long flags; | 730 | unsigned long flags; |
743 | 731 | ||
744 | synchronize_srcu(&uprobes_srcu); | ||
745 | spin_lock_irqsave(&uprobes_treelock, flags); | 732 | spin_lock_irqsave(&uprobes_treelock, flags); |
746 | rb_erase(&uprobe->rb_node, &uprobes_tree); | 733 | rb_erase(&uprobe->rb_node, &uprobes_tree); |
747 | spin_unlock_irqrestore(&uprobes_treelock, flags); | 734 | spin_unlock_irqrestore(&uprobes_treelock, flags); |
@@ -750,139 +737,135 @@ static void delete_uprobe(struct uprobe *uprobe) | |||
750 | atomic_dec(&uprobe_events); | 737 | atomic_dec(&uprobe_events); |
751 | } | 738 | } |
752 | 739 | ||
753 | static struct vma_info * | 740 | struct map_info { |
754 | __find_next_vma_info(struct address_space *mapping, struct list_head *head, | 741 | struct map_info *next; |
755 | struct vma_info *vi, loff_t offset, bool is_register) | 742 | struct mm_struct *mm; |
743 | unsigned long vaddr; | ||
744 | }; | ||
745 | |||
746 | static inline struct map_info *free_map_info(struct map_info *info) | ||
747 | { | ||
748 | struct map_info *next = info->next; | ||
749 | kfree(info); | ||
750 | return next; | ||
751 | } | ||
752 | |||
753 | static struct map_info * | ||
754 | build_map_info(struct address_space *mapping, loff_t offset, bool is_register) | ||
756 | { | 755 | { |
756 | unsigned long pgoff = offset >> PAGE_SHIFT; | ||
757 | struct prio_tree_iter iter; | 757 | struct prio_tree_iter iter; |
758 | struct vm_area_struct *vma; | 758 | struct vm_area_struct *vma; |
759 | struct vma_info *tmpvi; | 759 | struct map_info *curr = NULL; |
760 | unsigned long pgoff; | 760 | struct map_info *prev = NULL; |
761 | int existing_vma; | 761 | struct map_info *info; |
762 | loff_t vaddr; | 762 | int more = 0; |
763 | |||
764 | pgoff = offset >> PAGE_SHIFT; | ||
765 | 763 | ||
764 | again: | ||
765 | mutex_lock(&mapping->i_mmap_mutex); | ||
766 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 766 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
767 | if (!valid_vma(vma, is_register)) | 767 | if (!valid_vma(vma, is_register)) |
768 | continue; | 768 | continue; |
769 | 769 | ||
770 | existing_vma = 0; | 770 | if (!prev && !more) { |
771 | vaddr = vma_address(vma, offset); | 771 | /* |
772 | 772 | * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through | |
773 | list_for_each_entry(tmpvi, head, probe_list) { | 773 | * reclaim. This is optimistic, no harm done if it fails. |
774 | if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) { | 774 | */ |
775 | existing_vma = 1; | 775 | prev = kmalloc(sizeof(struct map_info), |
776 | break; | 776 | GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN); |
777 | } | 777 | if (prev) |
778 | prev->next = NULL; | ||
778 | } | 779 | } |
779 | 780 | if (!prev) { | |
780 | /* | 781 | more++; |
781 | * Another vma needs a probe to be installed. However skip | 782 | continue; |
782 | * installing the probe if the vma is about to be unlinked. | ||
783 | */ | ||
784 | if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) { | ||
785 | vi->mm = vma->vm_mm; | ||
786 | vi->vaddr = vaddr; | ||
787 | list_add(&vi->probe_list, head); | ||
788 | |||
789 | return vi; | ||
790 | } | 783 | } |
791 | } | ||
792 | 784 | ||
793 | return NULL; | 785 | if (!atomic_inc_not_zero(&vma->vm_mm->mm_users)) |
794 | } | 786 | continue; |
795 | |||
796 | /* | ||
797 | * Iterate in the rmap prio tree and find a vma where a probe has not | ||
798 | * yet been inserted. | ||
799 | */ | ||
800 | static struct vma_info * | ||
801 | find_next_vma_info(struct address_space *mapping, struct list_head *head, | ||
802 | loff_t offset, bool is_register) | ||
803 | { | ||
804 | struct vma_info *vi, *retvi; | ||
805 | 787 | ||
806 | vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL); | 788 | info = prev; |
807 | if (!vi) | 789 | prev = prev->next; |
808 | return ERR_PTR(-ENOMEM); | 790 | info->next = curr; |
791 | curr = info; | ||
809 | 792 | ||
810 | mutex_lock(&mapping->i_mmap_mutex); | 793 | info->mm = vma->vm_mm; |
811 | retvi = __find_next_vma_info(mapping, head, vi, offset, is_register); | 794 | info->vaddr = vma_address(vma, offset); |
795 | } | ||
812 | mutex_unlock(&mapping->i_mmap_mutex); | 796 | mutex_unlock(&mapping->i_mmap_mutex); |
813 | 797 | ||
814 | if (!retvi) | 798 | if (!more) |
815 | kfree(vi); | 799 | goto out; |
800 | |||
801 | prev = curr; | ||
802 | while (curr) { | ||
803 | mmput(curr->mm); | ||
804 | curr = curr->next; | ||
805 | } | ||
816 | 806 | ||
817 | return retvi; | 807 | do { |
808 | info = kmalloc(sizeof(struct map_info), GFP_KERNEL); | ||
809 | if (!info) { | ||
810 | curr = ERR_PTR(-ENOMEM); | ||
811 | goto out; | ||
812 | } | ||
813 | info->next = prev; | ||
814 | prev = info; | ||
815 | } while (--more); | ||
816 | |||
817 | goto again; | ||
818 | out: | ||
819 | while (prev) | ||
820 | prev = free_map_info(prev); | ||
821 | return curr; | ||
818 | } | 822 | } |
819 | 823 | ||
820 | static int register_for_each_vma(struct uprobe *uprobe, bool is_register) | 824 | static int register_for_each_vma(struct uprobe *uprobe, bool is_register) |
821 | { | 825 | { |
822 | struct list_head try_list; | 826 | struct map_info *info; |
823 | struct vm_area_struct *vma; | 827 | int err = 0; |
824 | struct address_space *mapping; | ||
825 | struct vma_info *vi, *tmpvi; | ||
826 | struct mm_struct *mm; | ||
827 | loff_t vaddr; | ||
828 | int ret; | ||
829 | 828 | ||
830 | mapping = uprobe->inode->i_mapping; | 829 | info = build_map_info(uprobe->inode->i_mapping, |
831 | INIT_LIST_HEAD(&try_list); | 830 | uprobe->offset, is_register); |
831 | if (IS_ERR(info)) | ||
832 | return PTR_ERR(info); | ||
832 | 833 | ||
833 | ret = 0; | 834 | while (info) { |
835 | struct mm_struct *mm = info->mm; | ||
836 | struct vm_area_struct *vma; | ||
834 | 837 | ||
835 | for (;;) { | 838 | if (err) |
836 | vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register); | 839 | goto free; |
837 | if (!vi) | ||
838 | break; | ||
839 | 840 | ||
840 | if (IS_ERR(vi)) { | 841 | down_write(&mm->mmap_sem); |
841 | ret = PTR_ERR(vi); | 842 | vma = find_vma(mm, (unsigned long)info->vaddr); |
842 | break; | 843 | if (!vma || !valid_vma(vma, is_register)) |
843 | } | 844 | goto unlock; |
844 | 845 | ||
845 | mm = vi->mm; | ||
846 | down_read(&mm->mmap_sem); | ||
847 | vma = find_vma(mm, (unsigned long)vi->vaddr); | ||
848 | if (!vma || !valid_vma(vma, is_register)) { | ||
849 | list_del(&vi->probe_list); | ||
850 | kfree(vi); | ||
851 | up_read(&mm->mmap_sem); | ||
852 | mmput(mm); | ||
853 | continue; | ||
854 | } | ||
855 | vaddr = vma_address(vma, uprobe->offset); | ||
856 | if (vma->vm_file->f_mapping->host != uprobe->inode || | 846 | if (vma->vm_file->f_mapping->host != uprobe->inode || |
857 | vaddr != vi->vaddr) { | 847 | vma_address(vma, uprobe->offset) != info->vaddr) |
858 | list_del(&vi->probe_list); | 848 | goto unlock; |
859 | kfree(vi); | ||
860 | up_read(&mm->mmap_sem); | ||
861 | mmput(mm); | ||
862 | continue; | ||
863 | } | ||
864 | |||
865 | if (is_register) | ||
866 | ret = install_breakpoint(uprobe, mm, vma, vi->vaddr); | ||
867 | else | ||
868 | remove_breakpoint(uprobe, mm, vi->vaddr); | ||
869 | 849 | ||
870 | up_read(&mm->mmap_sem); | ||
871 | mmput(mm); | ||
872 | if (is_register) { | 850 | if (is_register) { |
873 | if (ret && ret == -EEXIST) | 851 | err = install_breakpoint(uprobe, mm, vma, info->vaddr); |
874 | ret = 0; | 852 | /* |
875 | if (ret) | 853 | * We can race against uprobe_mmap(), see the |
876 | break; | 854 | * comment near uprobe_hash(). |
855 | */ | ||
856 | if (err == -EEXIST) | ||
857 | err = 0; | ||
858 | } else { | ||
859 | remove_breakpoint(uprobe, mm, info->vaddr); | ||
877 | } | 860 | } |
861 | unlock: | ||
862 | up_write(&mm->mmap_sem); | ||
863 | free: | ||
864 | mmput(mm); | ||
865 | info = free_map_info(info); | ||
878 | } | 866 | } |
879 | 867 | ||
880 | list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) { | 868 | return err; |
881 | list_del(&vi->probe_list); | ||
882 | kfree(vi); | ||
883 | } | ||
884 | |||
885 | return ret; | ||
886 | } | 869 | } |
887 | 870 | ||
888 | static int __uprobe_register(struct uprobe *uprobe) | 871 | static int __uprobe_register(struct uprobe *uprobe) |
@@ -1048,7 +1031,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head) | |||
1048 | int uprobe_mmap(struct vm_area_struct *vma) | 1031 | int uprobe_mmap(struct vm_area_struct *vma) |
1049 | { | 1032 | { |
1050 | struct list_head tmp_list; | 1033 | struct list_head tmp_list; |
1051 | struct uprobe *uprobe, *u; | 1034 | struct uprobe *uprobe; |
1052 | struct inode *inode; | 1035 | struct inode *inode; |
1053 | int ret, count; | 1036 | int ret, count; |
1054 | 1037 | ||
@@ -1066,12 +1049,9 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
1066 | ret = 0; | 1049 | ret = 0; |
1067 | count = 0; | 1050 | count = 0; |
1068 | 1051 | ||
1069 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { | 1052 | list_for_each_entry(uprobe, &tmp_list, pending_list) { |
1070 | loff_t vaddr; | ||
1071 | |||
1072 | list_del(&uprobe->pending_list); | ||
1073 | if (!ret) { | 1053 | if (!ret) { |
1074 | vaddr = vma_address(vma, uprobe->offset); | 1054 | loff_t vaddr = vma_address(vma, uprobe->offset); |
1075 | 1055 | ||
1076 | if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { | 1056 | if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { |
1077 | put_uprobe(uprobe); | 1057 | put_uprobe(uprobe); |
@@ -1079,8 +1059,10 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
1079 | } | 1059 | } |
1080 | 1060 | ||
1081 | ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); | 1061 | ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); |
1082 | 1062 | /* | |
1083 | /* Ignore double add: */ | 1063 | * We can race against uprobe_register(), see the |
1064 | * comment near uprobe_hash(). | ||
1065 | */ | ||
1084 | if (ret == -EEXIST) { | 1066 | if (ret == -EEXIST) { |
1085 | ret = 0; | 1067 | ret = 0; |
1086 | 1068 | ||
@@ -1115,7 +1097,7 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
1115 | void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) | 1097 | void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) |
1116 | { | 1098 | { |
1117 | struct list_head tmp_list; | 1099 | struct list_head tmp_list; |
1118 | struct uprobe *uprobe, *u; | 1100 | struct uprobe *uprobe; |
1119 | struct inode *inode; | 1101 | struct inode *inode; |
1120 | 1102 | ||
1121 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) | 1103 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) |
@@ -1132,11 +1114,8 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon | |||
1132 | mutex_lock(uprobes_mmap_hash(inode)); | 1114 | mutex_lock(uprobes_mmap_hash(inode)); |
1133 | build_probe_list(inode, &tmp_list); | 1115 | build_probe_list(inode, &tmp_list); |
1134 | 1116 | ||
1135 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { | 1117 | list_for_each_entry(uprobe, &tmp_list, pending_list) { |
1136 | loff_t vaddr; | 1118 | loff_t vaddr = vma_address(vma, uprobe->offset); |
1137 | |||
1138 | list_del(&uprobe->pending_list); | ||
1139 | vaddr = vma_address(vma, uprobe->offset); | ||
1140 | 1119 | ||
1141 | if (vaddr >= start && vaddr < end) { | 1120 | if (vaddr >= start && vaddr < end) { |
1142 | /* | 1121 | /* |
@@ -1378,9 +1357,6 @@ void uprobe_free_utask(struct task_struct *t) | |||
1378 | { | 1357 | { |
1379 | struct uprobe_task *utask = t->utask; | 1358 | struct uprobe_task *utask = t->utask; |
1380 | 1359 | ||
1381 | if (t->uprobe_srcu_id != -1) | ||
1382 | srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id); | ||
1383 | |||
1384 | if (!utask) | 1360 | if (!utask) |
1385 | return; | 1361 | return; |
1386 | 1362 | ||
@@ -1398,7 +1374,6 @@ void uprobe_free_utask(struct task_struct *t) | |||
1398 | void uprobe_copy_process(struct task_struct *t) | 1374 | void uprobe_copy_process(struct task_struct *t) |
1399 | { | 1375 | { |
1400 | t->utask = NULL; | 1376 | t->utask = NULL; |
1401 | t->uprobe_srcu_id = -1; | ||
1402 | } | 1377 | } |
1403 | 1378 | ||
1404 | /* | 1379 | /* |
@@ -1417,7 +1392,6 @@ static struct uprobe_task *add_utask(void) | |||
1417 | if (unlikely(!utask)) | 1392 | if (unlikely(!utask)) |
1418 | return NULL; | 1393 | return NULL; |
1419 | 1394 | ||
1420 | utask->active_uprobe = NULL; | ||
1421 | current->utask = utask; | 1395 | current->utask = utask; |
1422 | return utask; | 1396 | return utask; |
1423 | } | 1397 | } |
@@ -1479,41 +1453,64 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) | |||
1479 | return false; | 1453 | return false; |
1480 | } | 1454 | } |
1481 | 1455 | ||
1456 | static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) | ||
1457 | { | ||
1458 | struct mm_struct *mm = current->mm; | ||
1459 | struct uprobe *uprobe = NULL; | ||
1460 | struct vm_area_struct *vma; | ||
1461 | |||
1462 | down_read(&mm->mmap_sem); | ||
1463 | vma = find_vma(mm, bp_vaddr); | ||
1464 | if (vma && vma->vm_start <= bp_vaddr) { | ||
1465 | if (valid_vma(vma, false)) { | ||
1466 | struct inode *inode; | ||
1467 | loff_t offset; | ||
1468 | |||
1469 | inode = vma->vm_file->f_mapping->host; | ||
1470 | offset = bp_vaddr - vma->vm_start; | ||
1471 | offset += (vma->vm_pgoff << PAGE_SHIFT); | ||
1472 | uprobe = find_uprobe(inode, offset); | ||
1473 | } | ||
1474 | |||
1475 | if (!uprobe) | ||
1476 | *is_swbp = is_swbp_at_addr(mm, bp_vaddr); | ||
1477 | } else { | ||
1478 | *is_swbp = -EFAULT; | ||
1479 | } | ||
1480 | up_read(&mm->mmap_sem); | ||
1481 | |||
1482 | return uprobe; | ||
1483 | } | ||
1484 | |||
1482 | /* | 1485 | /* |
1483 | * Run handler and ask thread to singlestep. | 1486 | * Run handler and ask thread to singlestep. |
1484 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. | 1487 | * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. |
1485 | */ | 1488 | */ |
1486 | static void handle_swbp(struct pt_regs *regs) | 1489 | static void handle_swbp(struct pt_regs *regs) |
1487 | { | 1490 | { |
1488 | struct vm_area_struct *vma; | ||
1489 | struct uprobe_task *utask; | 1491 | struct uprobe_task *utask; |
1490 | struct uprobe *uprobe; | 1492 | struct uprobe *uprobe; |
1491 | struct mm_struct *mm; | ||
1492 | unsigned long bp_vaddr; | 1493 | unsigned long bp_vaddr; |
1494 | int uninitialized_var(is_swbp); | ||
1493 | 1495 | ||
1494 | uprobe = NULL; | ||
1495 | bp_vaddr = uprobe_get_swbp_addr(regs); | 1496 | bp_vaddr = uprobe_get_swbp_addr(regs); |
1496 | mm = current->mm; | 1497 | uprobe = find_active_uprobe(bp_vaddr, &is_swbp); |
1497 | down_read(&mm->mmap_sem); | ||
1498 | vma = find_vma(mm, bp_vaddr); | ||
1499 | |||
1500 | if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) { | ||
1501 | struct inode *inode; | ||
1502 | loff_t offset; | ||
1503 | |||
1504 | inode = vma->vm_file->f_mapping->host; | ||
1505 | offset = bp_vaddr - vma->vm_start; | ||
1506 | offset += (vma->vm_pgoff << PAGE_SHIFT); | ||
1507 | uprobe = find_uprobe(inode, offset); | ||
1508 | } | ||
1509 | |||
1510 | srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id); | ||
1511 | current->uprobe_srcu_id = -1; | ||
1512 | up_read(&mm->mmap_sem); | ||
1513 | 1498 | ||
1514 | if (!uprobe) { | 1499 | if (!uprobe) { |
1515 | /* No matching uprobe; signal SIGTRAP. */ | 1500 | if (is_swbp > 0) { |
1516 | send_sig(SIGTRAP, current, 0); | 1501 | /* No matching uprobe; signal SIGTRAP. */ |
1502 | send_sig(SIGTRAP, current, 0); | ||
1503 | } else { | ||
1504 | /* | ||
1505 | * Either we raced with uprobe_unregister() or we can't | ||
1506 | * access this memory. The latter is only possible if | ||
1507 | * another thread plays with our ->mm. In both cases | ||
1508 | * we can simply restart. If this vma was unmapped we | ||
1509 | * can pretend this insn was not executed yet and get | ||
1510 | * the (correct) SIGSEGV after restart. | ||
1511 | */ | ||
1512 | instruction_pointer_set(regs, bp_vaddr); | ||
1513 | } | ||
1517 | return; | 1514 | return; |
1518 | } | 1515 | } |
1519 | 1516 | ||
@@ -1620,7 +1617,6 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs) | |||
1620 | utask->state = UTASK_BP_HIT; | 1617 | utask->state = UTASK_BP_HIT; |
1621 | 1618 | ||
1622 | set_thread_flag(TIF_UPROBE); | 1619 | set_thread_flag(TIF_UPROBE); |
1623 | current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu); | ||
1624 | 1620 | ||
1625 | return 1; | 1621 | return 1; |
1626 | } | 1622 | } |
@@ -1655,7 +1651,6 @@ static int __init init_uprobes(void) | |||
1655 | mutex_init(&uprobes_mutex[i]); | 1651 | mutex_init(&uprobes_mutex[i]); |
1656 | mutex_init(&uprobes_mmap_mutex[i]); | 1652 | mutex_init(&uprobes_mmap_mutex[i]); |
1657 | } | 1653 | } |
1658 | init_srcu_struct(&uprobes_srcu); | ||
1659 | 1654 | ||
1660 | return register_die_notifier(&uprobe_exception_nb); | 1655 | return register_die_notifier(&uprobe_exception_nb); |
1661 | } | 1656 | } |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a008663d86c8..b4f20fba09fc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -312,7 +312,7 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list, | |||
312 | 312 | ||
313 | static int __register_ftrace_function(struct ftrace_ops *ops) | 313 | static int __register_ftrace_function(struct ftrace_ops *ops) |
314 | { | 314 | { |
315 | if (ftrace_disabled) | 315 | if (unlikely(ftrace_disabled)) |
316 | return -ENODEV; | 316 | return -ENODEV; |
317 | 317 | ||
318 | if (FTRACE_WARN_ON(ops == &global_ops)) | 318 | if (FTRACE_WARN_ON(ops == &global_ops)) |
@@ -4299,16 +4299,12 @@ int register_ftrace_function(struct ftrace_ops *ops) | |||
4299 | 4299 | ||
4300 | mutex_lock(&ftrace_lock); | 4300 | mutex_lock(&ftrace_lock); |
4301 | 4301 | ||
4302 | if (unlikely(ftrace_disabled)) | ||
4303 | goto out_unlock; | ||
4304 | |||
4305 | ret = __register_ftrace_function(ops); | 4302 | ret = __register_ftrace_function(ops); |
4306 | if (!ret) | 4303 | if (!ret) |
4307 | ret = ftrace_startup(ops, 0); | 4304 | ret = ftrace_startup(ops, 0); |
4308 | 4305 | ||
4309 | |||
4310 | out_unlock: | ||
4311 | mutex_unlock(&ftrace_lock); | 4306 | mutex_unlock(&ftrace_lock); |
4307 | |||
4312 | return ret; | 4308 | return ret; |
4313 | } | 4309 | } |
4314 | EXPORT_SYMBOL_GPL(register_ftrace_function); | 4310 | EXPORT_SYMBOL_GPL(register_ftrace_function); |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f765465bffe4..49491fa7daa2 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -3239,6 +3239,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
3239 | if (cpu_buffer->commit_page == cpu_buffer->reader_page) | 3239 | if (cpu_buffer->commit_page == cpu_buffer->reader_page) |
3240 | goto out; | 3240 | goto out; |
3241 | 3241 | ||
3242 | /* Don't bother swapping if the ring buffer is empty */ | ||
3243 | if (rb_num_of_entries(cpu_buffer) == 0) | ||
3244 | goto out; | ||
3245 | |||
3242 | /* | 3246 | /* |
3243 | * Reset the reader page to size zero. | 3247 | * Reset the reader page to size zero. |
3244 | */ | 3248 | */ |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a7fa0702be1c..a120f98c4112 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -830,6 +830,8 @@ int register_tracer(struct tracer *type) | |||
830 | current_trace = saved_tracer; | 830 | current_trace = saved_tracer; |
831 | if (ret) { | 831 | if (ret) { |
832 | printk(KERN_CONT "FAILED!\n"); | 832 | printk(KERN_CONT "FAILED!\n"); |
833 | /* Add the warning after printing 'FAILED' */ | ||
834 | WARN_ON(1); | ||
833 | goto out; | 835 | goto out; |
834 | } | 836 | } |
835 | /* Only reset on passing, to avoid touching corrupted buffers */ | 837 | /* Only reset on passing, to avoid touching corrupted buffers */ |
@@ -1708,9 +1710,11 @@ EXPORT_SYMBOL_GPL(trace_vprintk); | |||
1708 | 1710 | ||
1709 | static void trace_iterator_increment(struct trace_iterator *iter) | 1711 | static void trace_iterator_increment(struct trace_iterator *iter) |
1710 | { | 1712 | { |
1713 | struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); | ||
1714 | |||
1711 | iter->idx++; | 1715 | iter->idx++; |
1712 | if (iter->buffer_iter[iter->cpu]) | 1716 | if (buf_iter) |
1713 | ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); | 1717 | ring_buffer_read(buf_iter, NULL); |
1714 | } | 1718 | } |
1715 | 1719 | ||
1716 | static struct trace_entry * | 1720 | static struct trace_entry * |
@@ -1718,7 +1722,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, | |||
1718 | unsigned long *lost_events) | 1722 | unsigned long *lost_events) |
1719 | { | 1723 | { |
1720 | struct ring_buffer_event *event; | 1724 | struct ring_buffer_event *event; |
1721 | struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; | 1725 | struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); |
1722 | 1726 | ||
1723 | if (buf_iter) | 1727 | if (buf_iter) |
1724 | event = ring_buffer_iter_peek(buf_iter, ts); | 1728 | event = ring_buffer_iter_peek(buf_iter, ts); |
@@ -1856,10 +1860,10 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) | |||
1856 | 1860 | ||
1857 | tr->data[cpu]->skipped_entries = 0; | 1861 | tr->data[cpu]->skipped_entries = 0; |
1858 | 1862 | ||
1859 | if (!iter->buffer_iter[cpu]) | 1863 | buf_iter = trace_buffer_iter(iter, cpu); |
1864 | if (!buf_iter) | ||
1860 | return; | 1865 | return; |
1861 | 1866 | ||
1862 | buf_iter = iter->buffer_iter[cpu]; | ||
1863 | ring_buffer_iter_reset(buf_iter); | 1867 | ring_buffer_iter_reset(buf_iter); |
1864 | 1868 | ||
1865 | /* | 1869 | /* |
@@ -2205,13 +2209,15 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) | |||
2205 | 2209 | ||
2206 | int trace_empty(struct trace_iterator *iter) | 2210 | int trace_empty(struct trace_iterator *iter) |
2207 | { | 2211 | { |
2212 | struct ring_buffer_iter *buf_iter; | ||
2208 | int cpu; | 2213 | int cpu; |
2209 | 2214 | ||
2210 | /* If we are looking at one CPU buffer, only check that one */ | 2215 | /* If we are looking at one CPU buffer, only check that one */ |
2211 | if (iter->cpu_file != TRACE_PIPE_ALL_CPU) { | 2216 | if (iter->cpu_file != TRACE_PIPE_ALL_CPU) { |
2212 | cpu = iter->cpu_file; | 2217 | cpu = iter->cpu_file; |
2213 | if (iter->buffer_iter[cpu]) { | 2218 | buf_iter = trace_buffer_iter(iter, cpu); |
2214 | if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) | 2219 | if (buf_iter) { |
2220 | if (!ring_buffer_iter_empty(buf_iter)) | ||
2215 | return 0; | 2221 | return 0; |
2216 | } else { | 2222 | } else { |
2217 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) | 2223 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) |
@@ -2221,8 +2227,9 @@ int trace_empty(struct trace_iterator *iter) | |||
2221 | } | 2227 | } |
2222 | 2228 | ||
2223 | for_each_tracing_cpu(cpu) { | 2229 | for_each_tracing_cpu(cpu) { |
2224 | if (iter->buffer_iter[cpu]) { | 2230 | buf_iter = trace_buffer_iter(iter, cpu); |
2225 | if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) | 2231 | if (buf_iter) { |
2232 | if (!ring_buffer_iter_empty(buf_iter)) | ||
2226 | return 0; | 2233 | return 0; |
2227 | } else { | 2234 | } else { |
2228 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) | 2235 | if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) |
@@ -2381,6 +2388,11 @@ __tracing_open(struct inode *inode, struct file *file) | |||
2381 | if (!iter) | 2388 | if (!iter) |
2382 | return ERR_PTR(-ENOMEM); | 2389 | return ERR_PTR(-ENOMEM); |
2383 | 2390 | ||
2391 | iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(), | ||
2392 | GFP_KERNEL); | ||
2393 | if (!iter->buffer_iter) | ||
2394 | goto release; | ||
2395 | |||
2384 | /* | 2396 | /* |
2385 | * We make a copy of the current tracer to avoid concurrent | 2397 | * We make a copy of the current tracer to avoid concurrent |
2386 | * changes on it while we are reading. | 2398 | * changes on it while we are reading. |
@@ -2441,6 +2453,8 @@ __tracing_open(struct inode *inode, struct file *file) | |||
2441 | fail: | 2453 | fail: |
2442 | mutex_unlock(&trace_types_lock); | 2454 | mutex_unlock(&trace_types_lock); |
2443 | kfree(iter->trace); | 2455 | kfree(iter->trace); |
2456 | kfree(iter->buffer_iter); | ||
2457 | release: | ||
2444 | seq_release_private(inode, file); | 2458 | seq_release_private(inode, file); |
2445 | return ERR_PTR(-ENOMEM); | 2459 | return ERR_PTR(-ENOMEM); |
2446 | } | 2460 | } |
@@ -2481,6 +2495,7 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
2481 | mutex_destroy(&iter->mutex); | 2495 | mutex_destroy(&iter->mutex); |
2482 | free_cpumask_var(iter->started); | 2496 | free_cpumask_var(iter->started); |
2483 | kfree(iter->trace); | 2497 | kfree(iter->trace); |
2498 | kfree(iter->buffer_iter); | ||
2484 | seq_release_private(inode, file); | 2499 | seq_release_private(inode, file); |
2485 | return 0; | 2500 | return 0; |
2486 | } | 2501 | } |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5aec220d2de0..55e1f7f0db12 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -317,6 +317,14 @@ struct tracer { | |||
317 | 317 | ||
318 | #define TRACE_PIPE_ALL_CPU -1 | 318 | #define TRACE_PIPE_ALL_CPU -1 |
319 | 319 | ||
320 | static inline struct ring_buffer_iter * | ||
321 | trace_buffer_iter(struct trace_iterator *iter, int cpu) | ||
322 | { | ||
323 | if (iter->buffer_iter && iter->buffer_iter[cpu]) | ||
324 | return iter->buffer_iter[cpu]; | ||
325 | return NULL; | ||
326 | } | ||
327 | |||
320 | int tracer_init(struct tracer *t, struct trace_array *tr); | 328 | int tracer_init(struct tracer *t, struct trace_array *tr); |
321 | int tracing_is_enabled(void); | 329 | int tracing_is_enabled(void); |
322 | void trace_wake_up(void); | 330 | void trace_wake_up(void); |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a7d2a4c653d8..ce27c8ba8d31 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -538,7 +538,7 @@ get_return_for_leaf(struct trace_iterator *iter, | |||
538 | next = &data->ret; | 538 | next = &data->ret; |
539 | } else { | 539 | } else { |
540 | 540 | ||
541 | ring_iter = iter->buffer_iter[iter->cpu]; | 541 | ring_iter = trace_buffer_iter(iter, iter->cpu); |
542 | 542 | ||
543 | /* First peek to compare current entry and the next one */ | 543 | /* First peek to compare current entry and the next one */ |
544 | if (ring_iter) | 544 | if (ring_iter) |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index df611a0e76c5..123b189c732c 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -1325,4 +1325,4 @@ __init static int init_events(void) | |||
1325 | 1325 | ||
1326 | return 0; | 1326 | return 0; |
1327 | } | 1327 | } |
1328 | device_initcall(init_events); | 1328 | early_initcall(init_events); |