aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-22 14:10:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-22 14:10:36 -0400
commit2eafeb6a415864bc4c59df79151cf40f6ac74b9e (patch)
tree331ee730275276aebbda5dd278a97c941067d5fd /kernel
parent16d286e656250859946786de0df0fb01f8f241bc (diff)
parent6e0f17be0361444862637e8986c8c1a3b3f8dcf8 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events changes from Ingo Molnar: "- kernel side: - Intel uncore PMU support for Nehalem and Sandy Bridge CPUs, we support both the events available via the MSR and via the PCI access space. - various uprobes cleanups and restructurings - PMU driver quirks by microcode version and required x86 microcode loader cleanups/robustization - various tracing robustness updates - static keys: remove obsolete static_branch() - tooling side: - GTK browser improvements - perf report browser: support screenshots to file - more automated tests - perf kvm improvements - perf bench refinements - build environment improvements - pipe mode improvements - libtraceevent updates, we have now hopefully merged most bits with the out of tree forked code base ... and many other goodies." * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (138 commits) tracing: Check for allocation failure in __tracing_open() perf/x86: Fix intel_perfmon_event_mapformatting jump label: Remove static_branch() tracepoint: Use static_key_false(), since static_branch() is deprecated perf/x86: Uncore filter support for SandyBridge-EP perf/x86: Detect number of instances of uncore CBox perf/x86: Fix event constraint for SandyBridge-EP C-Box perf/x86: Use 0xff as pseudo code for fixed uncore event perf/x86: Save a few bytes in 'struct x86_pmu' perf/x86: Add a microcode revision check for SNB-PEBS perf/x86: Improve debug output in check_hw_exists() perf/x86/amd: Unify AMD's generic and family 15h pmus perf/x86: Move Intel specific code to intel_pmu_init() perf/x86: Rename Intel specific macros perf/x86: Fix USER/KERNEL tagging of samples perf tools: Split event symbols arrays to hw and sw parts perf tools: Split out PE_VALUE_SYM parsing token to SW and HW tokens perf tools: Add empty rule for new line in event syntax parsing perf test: Use ARRAY_SIZE in parse events tests tools lib traceevent: Cleanup realloc use ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c49
-rw-r--r--kernel/events/uprobes.c461
-rw-r--r--kernel/trace/ftrace.c8
-rw-r--r--kernel/trace/ring_buffer.c4
-rw-r--r--kernel/trace/trace.c33
-rw-r--r--kernel/trace/trace.h8
-rw-r--r--kernel/trace/trace_functions_graph.c2
-rw-r--r--kernel/trace/trace_output.c2
8 files changed, 313 insertions, 254 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d7d71d6ec972..f1cf0edeb39a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1645,6 +1645,8 @@ perf_install_in_context(struct perf_event_context *ctx,
1645 lockdep_assert_held(&ctx->mutex); 1645 lockdep_assert_held(&ctx->mutex);
1646 1646
1647 event->ctx = ctx; 1647 event->ctx = ctx;
1648 if (event->cpu != -1)
1649 event->cpu = cpu;
1648 1650
1649 if (!task) { 1651 if (!task) {
1650 /* 1652 /*
@@ -6252,6 +6254,8 @@ SYSCALL_DEFINE5(perf_event_open,
6252 } 6254 }
6253 } 6255 }
6254 6256
6257 get_online_cpus();
6258
6255 event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, 6259 event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
6256 NULL, NULL); 6260 NULL, NULL);
6257 if (IS_ERR(event)) { 6261 if (IS_ERR(event)) {
@@ -6304,7 +6308,7 @@ SYSCALL_DEFINE5(perf_event_open,
6304 /* 6308 /*
6305 * Get the target context (task or percpu): 6309 * Get the target context (task or percpu):
6306 */ 6310 */
6307 ctx = find_get_context(pmu, task, cpu); 6311 ctx = find_get_context(pmu, task, event->cpu);
6308 if (IS_ERR(ctx)) { 6312 if (IS_ERR(ctx)) {
6309 err = PTR_ERR(ctx); 6313 err = PTR_ERR(ctx);
6310 goto err_alloc; 6314 goto err_alloc;
@@ -6377,20 +6381,23 @@ SYSCALL_DEFINE5(perf_event_open,
6377 mutex_lock(&ctx->mutex); 6381 mutex_lock(&ctx->mutex);
6378 6382
6379 if (move_group) { 6383 if (move_group) {
6380 perf_install_in_context(ctx, group_leader, cpu); 6384 synchronize_rcu();
6385 perf_install_in_context(ctx, group_leader, event->cpu);
6381 get_ctx(ctx); 6386 get_ctx(ctx);
6382 list_for_each_entry(sibling, &group_leader->sibling_list, 6387 list_for_each_entry(sibling, &group_leader->sibling_list,
6383 group_entry) { 6388 group_entry) {
6384 perf_install_in_context(ctx, sibling, cpu); 6389 perf_install_in_context(ctx, sibling, event->cpu);
6385 get_ctx(ctx); 6390 get_ctx(ctx);
6386 } 6391 }
6387 } 6392 }
6388 6393
6389 perf_install_in_context(ctx, event, cpu); 6394 perf_install_in_context(ctx, event, event->cpu);
6390 ++ctx->generation; 6395 ++ctx->generation;
6391 perf_unpin_context(ctx); 6396 perf_unpin_context(ctx);
6392 mutex_unlock(&ctx->mutex); 6397 mutex_unlock(&ctx->mutex);
6393 6398
6399 put_online_cpus();
6400
6394 event->owner = current; 6401 event->owner = current;
6395 6402
6396 mutex_lock(&current->perf_event_mutex); 6403 mutex_lock(&current->perf_event_mutex);
@@ -6419,6 +6426,7 @@ err_context:
6419err_alloc: 6426err_alloc:
6420 free_event(event); 6427 free_event(event);
6421err_task: 6428err_task:
6429 put_online_cpus();
6422 if (task) 6430 if (task)
6423 put_task_struct(task); 6431 put_task_struct(task);
6424err_group_fd: 6432err_group_fd:
@@ -6479,6 +6487,39 @@ err:
6479} 6487}
6480EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); 6488EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
6481 6489
6490void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
6491{
6492 struct perf_event_context *src_ctx;
6493 struct perf_event_context *dst_ctx;
6494 struct perf_event *event, *tmp;
6495 LIST_HEAD(events);
6496
6497 src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
6498 dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
6499
6500 mutex_lock(&src_ctx->mutex);
6501 list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
6502 event_entry) {
6503 perf_remove_from_context(event);
6504 put_ctx(src_ctx);
6505 list_add(&event->event_entry, &events);
6506 }
6507 mutex_unlock(&src_ctx->mutex);
6508
6509 synchronize_rcu();
6510
6511 mutex_lock(&dst_ctx->mutex);
6512 list_for_each_entry_safe(event, tmp, &events, event_entry) {
6513 list_del(&event->event_entry);
6514 if (event->state >= PERF_EVENT_STATE_OFF)
6515 event->state = PERF_EVENT_STATE_INACTIVE;
6516 perf_install_in_context(dst_ctx, event, dst_cpu);
6517 get_ctx(dst_ctx);
6518 }
6519 mutex_unlock(&dst_ctx->mutex);
6520}
6521EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
6522
6482static void sync_child_event(struct perf_event *child_event, 6523static void sync_child_event(struct perf_event *child_event,
6483 struct task_struct *child) 6524 struct task_struct *child)
6484{ 6525{
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 985be4d80fe8..f93532748bca 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -38,13 +38,29 @@
38#define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) 38#define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES)
39#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE 39#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE
40 40
41static struct srcu_struct uprobes_srcu;
42static struct rb_root uprobes_tree = RB_ROOT; 41static struct rb_root uprobes_tree = RB_ROOT;
43 42
44static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ 43static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
45 44
46#define UPROBES_HASH_SZ 13 45#define UPROBES_HASH_SZ 13
47 46
47/*
48 * We need separate register/unregister and mmap/munmap lock hashes because
49 * of mmap_sem nesting.
50 *
51 * uprobe_register() needs to install probes on (potentially) all processes
52 * and thus needs to acquire multiple mmap_sems (consequtively, not
53 * concurrently), whereas uprobe_mmap() is called while holding mmap_sem
54 * for the particular process doing the mmap.
55 *
56 * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem
57 * because of lock order against i_mmap_mutex. This means there's a hole in
58 * the register vma iteration where a mmap() can happen.
59 *
60 * Thus uprobe_register() can race with uprobe_mmap() and we can try and
61 * install a probe where one is already installed.
62 */
63
48/* serialize (un)register */ 64/* serialize (un)register */
49static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; 65static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
50 66
@@ -61,17 +77,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
61 */ 77 */
62static atomic_t uprobe_events = ATOMIC_INIT(0); 78static atomic_t uprobe_events = ATOMIC_INIT(0);
63 79
64/*
65 * Maintain a temporary per vma info that can be used to search if a vma
66 * has already been handled. This structure is introduced since extending
67 * vm_area_struct wasnt recommended.
68 */
69struct vma_info {
70 struct list_head probe_list;
71 struct mm_struct *mm;
72 loff_t vaddr;
73};
74
75struct uprobe { 80struct uprobe {
76 struct rb_node rb_node; /* node in the rb tree */ 81 struct rb_node rb_node; /* node in the rb tree */
77 atomic_t ref; 82 atomic_t ref;
@@ -100,7 +105,8 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register)
100 if (!is_register) 105 if (!is_register)
101 return true; 106 return true;
102 107
103 if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC)) 108 if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED))
109 == (VM_READ|VM_EXEC))
104 return true; 110 return true;
105 111
106 return false; 112 return false;
@@ -129,33 +135,17 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset)
129static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) 135static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage)
130{ 136{
131 struct mm_struct *mm = vma->vm_mm; 137 struct mm_struct *mm = vma->vm_mm;
132 pgd_t *pgd;
133 pud_t *pud;
134 pmd_t *pmd;
135 pte_t *ptep;
136 spinlock_t *ptl;
137 unsigned long addr; 138 unsigned long addr;
138 int err = -EFAULT; 139 spinlock_t *ptl;
140 pte_t *ptep;
139 141
140 addr = page_address_in_vma(page, vma); 142 addr = page_address_in_vma(page, vma);
141 if (addr == -EFAULT) 143 if (addr == -EFAULT)
142 goto out; 144 return -EFAULT;
143
144 pgd = pgd_offset(mm, addr);
145 if (!pgd_present(*pgd))
146 goto out;
147
148 pud = pud_offset(pgd, addr);
149 if (!pud_present(*pud))
150 goto out;
151
152 pmd = pmd_offset(pud, addr);
153 if (!pmd_present(*pmd))
154 goto out;
155 145
156 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); 146 ptep = page_check_address(page, mm, addr, &ptl, 0);
157 if (!ptep) 147 if (!ptep)
158 goto out; 148 return -EAGAIN;
159 149
160 get_page(kpage); 150 get_page(kpage);
161 page_add_new_anon_rmap(kpage, vma, addr); 151 page_add_new_anon_rmap(kpage, vma, addr);
@@ -174,10 +164,8 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct
174 try_to_free_swap(page); 164 try_to_free_swap(page);
175 put_page(page); 165 put_page(page);
176 pte_unmap_unlock(ptep, ptl); 166 pte_unmap_unlock(ptep, ptl);
177 err = 0;
178 167
179out: 168 return 0;
180 return err;
181} 169}
182 170
183/** 171/**
@@ -222,9 +210,8 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
222 void *vaddr_old, *vaddr_new; 210 void *vaddr_old, *vaddr_new;
223 struct vm_area_struct *vma; 211 struct vm_area_struct *vma;
224 struct uprobe *uprobe; 212 struct uprobe *uprobe;
225 loff_t addr;
226 int ret; 213 int ret;
227 214retry:
228 /* Read the page with vaddr into memory */ 215 /* Read the page with vaddr into memory */
229 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); 216 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);
230 if (ret <= 0) 217 if (ret <= 0)
@@ -246,10 +233,6 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
246 if (mapping != vma->vm_file->f_mapping) 233 if (mapping != vma->vm_file->f_mapping)
247 goto put_out; 234 goto put_out;
248 235
249 addr = vma_address(vma, uprobe->offset);
250 if (vaddr != (unsigned long)addr)
251 goto put_out;
252
253 ret = -ENOMEM; 236 ret = -ENOMEM;
254 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); 237 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
255 if (!new_page) 238 if (!new_page)
@@ -267,11 +250,7 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
267 vaddr_new = kmap_atomic(new_page); 250 vaddr_new = kmap_atomic(new_page);
268 251
269 memcpy(vaddr_new, vaddr_old, PAGE_SIZE); 252 memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
270 253 memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE);
271 /* poke the new insn in, ASSUMES we don't cross page boundary */
272 vaddr &= ~PAGE_MASK;
273 BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
274 memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
275 254
276 kunmap_atomic(vaddr_new); 255 kunmap_atomic(vaddr_new);
277 kunmap_atomic(vaddr_old); 256 kunmap_atomic(vaddr_old);
@@ -291,6 +270,8 @@ unlock_out:
291put_out: 270put_out:
292 put_page(old_page); 271 put_page(old_page);
293 272
273 if (unlikely(ret == -EAGAIN))
274 goto retry;
294 return ret; 275 return ret;
295} 276}
296 277
@@ -312,7 +293,7 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_
312 void *vaddr_new; 293 void *vaddr_new;
313 int ret; 294 int ret;
314 295
315 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL); 296 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
316 if (ret <= 0) 297 if (ret <= 0)
317 return ret; 298 return ret;
318 299
@@ -333,10 +314,20 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
333 uprobe_opcode_t opcode; 314 uprobe_opcode_t opcode;
334 int result; 315 int result;
335 316
317 if (current->mm == mm) {
318 pagefault_disable();
319 result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr,
320 sizeof(opcode));
321 pagefault_enable();
322
323 if (likely(result == 0))
324 goto out;
325 }
326
336 result = read_opcode(mm, vaddr, &opcode); 327 result = read_opcode(mm, vaddr, &opcode);
337 if (result) 328 if (result)
338 return result; 329 return result;
339 330out:
340 if (is_swbp_insn(&opcode)) 331 if (is_swbp_insn(&opcode))
341 return 1; 332 return 1;
342 333
@@ -355,7 +346,9 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
355int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) 346int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
356{ 347{
357 int result; 348 int result;
358 349 /*
350 * See the comment near uprobes_hash().
351 */
359 result = is_swbp_at_addr(mm, vaddr); 352 result = is_swbp_at_addr(mm, vaddr);
360 if (result == 1) 353 if (result == 1)
361 return -EEXIST; 354 return -EEXIST;
@@ -520,7 +513,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
520 uprobe->inode = igrab(inode); 513 uprobe->inode = igrab(inode);
521 uprobe->offset = offset; 514 uprobe->offset = offset;
522 init_rwsem(&uprobe->consumer_rwsem); 515 init_rwsem(&uprobe->consumer_rwsem);
523 INIT_LIST_HEAD(&uprobe->pending_list);
524 516
525 /* add to uprobes_tree, sorted on inode:offset */ 517 /* add to uprobes_tree, sorted on inode:offset */
526 cur_uprobe = insert_uprobe(uprobe); 518 cur_uprobe = insert_uprobe(uprobe);
@@ -588,20 +580,22 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
588} 580}
589 581
590static int 582static int
591__copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn, 583__copy_insn(struct address_space *mapping, struct file *filp, char *insn,
592 unsigned long nbytes, unsigned long offset) 584 unsigned long nbytes, loff_t offset)
593{ 585{
594 struct file *filp = vma->vm_file;
595 struct page *page; 586 struct page *page;
596 void *vaddr; 587 void *vaddr;
597 unsigned long off1; 588 unsigned long off;
598 unsigned long idx; 589 pgoff_t idx;
599 590
600 if (!filp) 591 if (!filp)
601 return -EINVAL; 592 return -EINVAL;
602 593
603 idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT); 594 if (!mapping->a_ops->readpage)
604 off1 = offset &= ~PAGE_MASK; 595 return -EIO;
596
597 idx = offset >> PAGE_CACHE_SHIFT;
598 off = offset & ~PAGE_MASK;
605 599
606 /* 600 /*
607 * Ensure that the page that has the original instruction is 601 * Ensure that the page that has the original instruction is
@@ -612,22 +606,20 @@ __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *ins
612 return PTR_ERR(page); 606 return PTR_ERR(page);
613 607
614 vaddr = kmap_atomic(page); 608 vaddr = kmap_atomic(page);
615 memcpy(insn, vaddr + off1, nbytes); 609 memcpy(insn, vaddr + off, nbytes);
616 kunmap_atomic(vaddr); 610 kunmap_atomic(vaddr);
617 page_cache_release(page); 611 page_cache_release(page);
618 612
619 return 0; 613 return 0;
620} 614}
621 615
622static int 616static int copy_insn(struct uprobe *uprobe, struct file *filp)
623copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
624{ 617{
625 struct address_space *mapping; 618 struct address_space *mapping;
626 unsigned long nbytes; 619 unsigned long nbytes;
627 int bytes; 620 int bytes;
628 621
629 addr &= ~PAGE_MASK; 622 nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK);
630 nbytes = PAGE_SIZE - addr;
631 mapping = uprobe->inode->i_mapping; 623 mapping = uprobe->inode->i_mapping;
632 624
633 /* Instruction at end of binary; copy only available bytes */ 625 /* Instruction at end of binary; copy only available bytes */
@@ -638,13 +630,13 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
638 630
639 /* Instruction at the page-boundary; copy bytes in second page */ 631 /* Instruction at the page-boundary; copy bytes in second page */
640 if (nbytes < bytes) { 632 if (nbytes < bytes) {
641 if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes, 633 int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes,
642 bytes - nbytes, uprobe->offset + nbytes)) 634 bytes - nbytes, uprobe->offset + nbytes);
643 return -ENOMEM; 635 if (err)
644 636 return err;
645 bytes = nbytes; 637 bytes = nbytes;
646 } 638 }
647 return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset); 639 return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset);
648} 640}
649 641
650/* 642/*
@@ -672,9 +664,8 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
672 */ 664 */
673static int 665static int
674install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, 666install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
675 struct vm_area_struct *vma, loff_t vaddr) 667 struct vm_area_struct *vma, unsigned long vaddr)
676{ 668{
677 unsigned long addr;
678 int ret; 669 int ret;
679 670
680 /* 671 /*
@@ -687,20 +678,22 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
687 if (!uprobe->consumers) 678 if (!uprobe->consumers)
688 return -EEXIST; 679 return -EEXIST;
689 680
690 addr = (unsigned long)vaddr;
691
692 if (!(uprobe->flags & UPROBE_COPY_INSN)) { 681 if (!(uprobe->flags & UPROBE_COPY_INSN)) {
693 ret = copy_insn(uprobe, vma, addr); 682 ret = copy_insn(uprobe, vma->vm_file);
694 if (ret) 683 if (ret)
695 return ret; 684 return ret;
696 685
697 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) 686 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
698 return -EEXIST; 687 return -ENOTSUPP;
699 688
700 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm); 689 ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
701 if (ret) 690 if (ret)
702 return ret; 691 return ret;
703 692
693 /* write_opcode() assumes we don't cross page boundary */
694 BUG_ON((uprobe->offset & ~PAGE_MASK) +
695 UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
696
704 uprobe->flags |= UPROBE_COPY_INSN; 697 uprobe->flags |= UPROBE_COPY_INSN;
705 } 698 }
706 699
@@ -713,7 +706,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
713 * Hence increment before and decrement on failure. 706 * Hence increment before and decrement on failure.
714 */ 707 */
715 atomic_inc(&mm->uprobes_state.count); 708 atomic_inc(&mm->uprobes_state.count);
716 ret = set_swbp(&uprobe->arch, mm, addr); 709 ret = set_swbp(&uprobe->arch, mm, vaddr);
717 if (ret) 710 if (ret)
718 atomic_dec(&mm->uprobes_state.count); 711 atomic_dec(&mm->uprobes_state.count);
719 712
@@ -721,27 +714,21 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
721} 714}
722 715
723static void 716static void
724remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) 717remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
725{ 718{
726 if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true)) 719 if (!set_orig_insn(&uprobe->arch, mm, vaddr, true))
727 atomic_dec(&mm->uprobes_state.count); 720 atomic_dec(&mm->uprobes_state.count);
728} 721}
729 722
730/* 723/*
731 * There could be threads that have hit the breakpoint and are entering the 724 * There could be threads that have already hit the breakpoint. They
732 * notifier code and trying to acquire the uprobes_treelock. The thread 725 * will recheck the current insn and restart if find_uprobe() fails.
733 * calling delete_uprobe() that is removing the uprobe from the rb_tree can 726 * See find_active_uprobe().
734 * race with these threads and might acquire the uprobes_treelock compared
735 * to some of the breakpoint hit threads. In such a case, the breakpoint
736 * hit threads will not find the uprobe. The current unregistering thread
737 * waits till all other threads have hit a breakpoint, to acquire the
738 * uprobes_treelock before the uprobe is removed from the rbtree.
739 */ 727 */
740static void delete_uprobe(struct uprobe *uprobe) 728static void delete_uprobe(struct uprobe *uprobe)
741{ 729{
742 unsigned long flags; 730 unsigned long flags;
743 731
744 synchronize_srcu(&uprobes_srcu);
745 spin_lock_irqsave(&uprobes_treelock, flags); 732 spin_lock_irqsave(&uprobes_treelock, flags);
746 rb_erase(&uprobe->rb_node, &uprobes_tree); 733 rb_erase(&uprobe->rb_node, &uprobes_tree);
747 spin_unlock_irqrestore(&uprobes_treelock, flags); 734 spin_unlock_irqrestore(&uprobes_treelock, flags);
@@ -750,139 +737,135 @@ static void delete_uprobe(struct uprobe *uprobe)
750 atomic_dec(&uprobe_events); 737 atomic_dec(&uprobe_events);
751} 738}
752 739
753static struct vma_info * 740struct map_info {
754__find_next_vma_info(struct address_space *mapping, struct list_head *head, 741 struct map_info *next;
755 struct vma_info *vi, loff_t offset, bool is_register) 742 struct mm_struct *mm;
743 unsigned long vaddr;
744};
745
746static inline struct map_info *free_map_info(struct map_info *info)
747{
748 struct map_info *next = info->next;
749 kfree(info);
750 return next;
751}
752
753static struct map_info *
754build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
756{ 755{
756 unsigned long pgoff = offset >> PAGE_SHIFT;
757 struct prio_tree_iter iter; 757 struct prio_tree_iter iter;
758 struct vm_area_struct *vma; 758 struct vm_area_struct *vma;
759 struct vma_info *tmpvi; 759 struct map_info *curr = NULL;
760 unsigned long pgoff; 760 struct map_info *prev = NULL;
761 int existing_vma; 761 struct map_info *info;
762 loff_t vaddr; 762 int more = 0;
763
764 pgoff = offset >> PAGE_SHIFT;
765 763
764 again:
765 mutex_lock(&mapping->i_mmap_mutex);
766 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 766 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
767 if (!valid_vma(vma, is_register)) 767 if (!valid_vma(vma, is_register))
768 continue; 768 continue;
769 769
770 existing_vma = 0; 770 if (!prev && !more) {
771 vaddr = vma_address(vma, offset); 771 /*
772 772 * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through
773 list_for_each_entry(tmpvi, head, probe_list) { 773 * reclaim. This is optimistic, no harm done if it fails.
774 if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) { 774 */
775 existing_vma = 1; 775 prev = kmalloc(sizeof(struct map_info),
776 break; 776 GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN);
777 } 777 if (prev)
778 prev->next = NULL;
778 } 779 }
779 780 if (!prev) {
780 /* 781 more++;
781 * Another vma needs a probe to be installed. However skip 782 continue;
782 * installing the probe if the vma is about to be unlinked.
783 */
784 if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) {
785 vi->mm = vma->vm_mm;
786 vi->vaddr = vaddr;
787 list_add(&vi->probe_list, head);
788
789 return vi;
790 } 783 }
791 }
792 784
793 return NULL; 785 if (!atomic_inc_not_zero(&vma->vm_mm->mm_users))
794} 786 continue;
795
796/*
797 * Iterate in the rmap prio tree and find a vma where a probe has not
798 * yet been inserted.
799 */
800static struct vma_info *
801find_next_vma_info(struct address_space *mapping, struct list_head *head,
802 loff_t offset, bool is_register)
803{
804 struct vma_info *vi, *retvi;
805 787
806 vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL); 788 info = prev;
807 if (!vi) 789 prev = prev->next;
808 return ERR_PTR(-ENOMEM); 790 info->next = curr;
791 curr = info;
809 792
810 mutex_lock(&mapping->i_mmap_mutex); 793 info->mm = vma->vm_mm;
811 retvi = __find_next_vma_info(mapping, head, vi, offset, is_register); 794 info->vaddr = vma_address(vma, offset);
795 }
812 mutex_unlock(&mapping->i_mmap_mutex); 796 mutex_unlock(&mapping->i_mmap_mutex);
813 797
814 if (!retvi) 798 if (!more)
815 kfree(vi); 799 goto out;
800
801 prev = curr;
802 while (curr) {
803 mmput(curr->mm);
804 curr = curr->next;
805 }
816 806
817 return retvi; 807 do {
808 info = kmalloc(sizeof(struct map_info), GFP_KERNEL);
809 if (!info) {
810 curr = ERR_PTR(-ENOMEM);
811 goto out;
812 }
813 info->next = prev;
814 prev = info;
815 } while (--more);
816
817 goto again;
818 out:
819 while (prev)
820 prev = free_map_info(prev);
821 return curr;
818} 822}
819 823
820static int register_for_each_vma(struct uprobe *uprobe, bool is_register) 824static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
821{ 825{
822 struct list_head try_list; 826 struct map_info *info;
823 struct vm_area_struct *vma; 827 int err = 0;
824 struct address_space *mapping;
825 struct vma_info *vi, *tmpvi;
826 struct mm_struct *mm;
827 loff_t vaddr;
828 int ret;
829 828
830 mapping = uprobe->inode->i_mapping; 829 info = build_map_info(uprobe->inode->i_mapping,
831 INIT_LIST_HEAD(&try_list); 830 uprobe->offset, is_register);
831 if (IS_ERR(info))
832 return PTR_ERR(info);
832 833
833 ret = 0; 834 while (info) {
835 struct mm_struct *mm = info->mm;
836 struct vm_area_struct *vma;
834 837
835 for (;;) { 838 if (err)
836 vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register); 839 goto free;
837 if (!vi)
838 break;
839 840
840 if (IS_ERR(vi)) { 841 down_write(&mm->mmap_sem);
841 ret = PTR_ERR(vi); 842 vma = find_vma(mm, (unsigned long)info->vaddr);
842 break; 843 if (!vma || !valid_vma(vma, is_register))
843 } 844 goto unlock;
844 845
845 mm = vi->mm;
846 down_read(&mm->mmap_sem);
847 vma = find_vma(mm, (unsigned long)vi->vaddr);
848 if (!vma || !valid_vma(vma, is_register)) {
849 list_del(&vi->probe_list);
850 kfree(vi);
851 up_read(&mm->mmap_sem);
852 mmput(mm);
853 continue;
854 }
855 vaddr = vma_address(vma, uprobe->offset);
856 if (vma->vm_file->f_mapping->host != uprobe->inode || 846 if (vma->vm_file->f_mapping->host != uprobe->inode ||
857 vaddr != vi->vaddr) { 847 vma_address(vma, uprobe->offset) != info->vaddr)
858 list_del(&vi->probe_list); 848 goto unlock;
859 kfree(vi);
860 up_read(&mm->mmap_sem);
861 mmput(mm);
862 continue;
863 }
864
865 if (is_register)
866 ret = install_breakpoint(uprobe, mm, vma, vi->vaddr);
867 else
868 remove_breakpoint(uprobe, mm, vi->vaddr);
869 849
870 up_read(&mm->mmap_sem);
871 mmput(mm);
872 if (is_register) { 850 if (is_register) {
873 if (ret && ret == -EEXIST) 851 err = install_breakpoint(uprobe, mm, vma, info->vaddr);
874 ret = 0; 852 /*
875 if (ret) 853 * We can race against uprobe_mmap(), see the
876 break; 854 * comment near uprobe_hash().
855 */
856 if (err == -EEXIST)
857 err = 0;
858 } else {
859 remove_breakpoint(uprobe, mm, info->vaddr);
877 } 860 }
861 unlock:
862 up_write(&mm->mmap_sem);
863 free:
864 mmput(mm);
865 info = free_map_info(info);
878 } 866 }
879 867
880 list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) { 868 return err;
881 list_del(&vi->probe_list);
882 kfree(vi);
883 }
884
885 return ret;
886} 869}
887 870
888static int __uprobe_register(struct uprobe *uprobe) 871static int __uprobe_register(struct uprobe *uprobe)
@@ -1048,7 +1031,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head)
1048int uprobe_mmap(struct vm_area_struct *vma) 1031int uprobe_mmap(struct vm_area_struct *vma)
1049{ 1032{
1050 struct list_head tmp_list; 1033 struct list_head tmp_list;
1051 struct uprobe *uprobe, *u; 1034 struct uprobe *uprobe;
1052 struct inode *inode; 1035 struct inode *inode;
1053 int ret, count; 1036 int ret, count;
1054 1037
@@ -1066,12 +1049,9 @@ int uprobe_mmap(struct vm_area_struct *vma)
1066 ret = 0; 1049 ret = 0;
1067 count = 0; 1050 count = 0;
1068 1051
1069 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { 1052 list_for_each_entry(uprobe, &tmp_list, pending_list) {
1070 loff_t vaddr;
1071
1072 list_del(&uprobe->pending_list);
1073 if (!ret) { 1053 if (!ret) {
1074 vaddr = vma_address(vma, uprobe->offset); 1054 loff_t vaddr = vma_address(vma, uprobe->offset);
1075 1055
1076 if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { 1056 if (vaddr < vma->vm_start || vaddr >= vma->vm_end) {
1077 put_uprobe(uprobe); 1057 put_uprobe(uprobe);
@@ -1079,8 +1059,10 @@ int uprobe_mmap(struct vm_area_struct *vma)
1079 } 1059 }
1080 1060
1081 ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); 1061 ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
1082 1062 /*
1083 /* Ignore double add: */ 1063 * We can race against uprobe_register(), see the
1064 * comment near uprobe_hash().
1065 */
1084 if (ret == -EEXIST) { 1066 if (ret == -EEXIST) {
1085 ret = 0; 1067 ret = 0;
1086 1068
@@ -1115,7 +1097,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
1115void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) 1097void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1116{ 1098{
1117 struct list_head tmp_list; 1099 struct list_head tmp_list;
1118 struct uprobe *uprobe, *u; 1100 struct uprobe *uprobe;
1119 struct inode *inode; 1101 struct inode *inode;
1120 1102
1121 if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) 1103 if (!atomic_read(&uprobe_events) || !valid_vma(vma, false))
@@ -1132,11 +1114,8 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
1132 mutex_lock(uprobes_mmap_hash(inode)); 1114 mutex_lock(uprobes_mmap_hash(inode));
1133 build_probe_list(inode, &tmp_list); 1115 build_probe_list(inode, &tmp_list);
1134 1116
1135 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { 1117 list_for_each_entry(uprobe, &tmp_list, pending_list) {
1136 loff_t vaddr; 1118 loff_t vaddr = vma_address(vma, uprobe->offset);
1137
1138 list_del(&uprobe->pending_list);
1139 vaddr = vma_address(vma, uprobe->offset);
1140 1119
1141 if (vaddr >= start && vaddr < end) { 1120 if (vaddr >= start && vaddr < end) {
1142 /* 1121 /*
@@ -1378,9 +1357,6 @@ void uprobe_free_utask(struct task_struct *t)
1378{ 1357{
1379 struct uprobe_task *utask = t->utask; 1358 struct uprobe_task *utask = t->utask;
1380 1359
1381 if (t->uprobe_srcu_id != -1)
1382 srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id);
1383
1384 if (!utask) 1360 if (!utask)
1385 return; 1361 return;
1386 1362
@@ -1398,7 +1374,6 @@ void uprobe_free_utask(struct task_struct *t)
1398void uprobe_copy_process(struct task_struct *t) 1374void uprobe_copy_process(struct task_struct *t)
1399{ 1375{
1400 t->utask = NULL; 1376 t->utask = NULL;
1401 t->uprobe_srcu_id = -1;
1402} 1377}
1403 1378
1404/* 1379/*
@@ -1417,7 +1392,6 @@ static struct uprobe_task *add_utask(void)
1417 if (unlikely(!utask)) 1392 if (unlikely(!utask))
1418 return NULL; 1393 return NULL;
1419 1394
1420 utask->active_uprobe = NULL;
1421 current->utask = utask; 1395 current->utask = utask;
1422 return utask; 1396 return utask;
1423} 1397}
@@ -1479,41 +1453,64 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
1479 return false; 1453 return false;
1480} 1454}
1481 1455
1456static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
1457{
1458 struct mm_struct *mm = current->mm;
1459 struct uprobe *uprobe = NULL;
1460 struct vm_area_struct *vma;
1461
1462 down_read(&mm->mmap_sem);
1463 vma = find_vma(mm, bp_vaddr);
1464 if (vma && vma->vm_start <= bp_vaddr) {
1465 if (valid_vma(vma, false)) {
1466 struct inode *inode;
1467 loff_t offset;
1468
1469 inode = vma->vm_file->f_mapping->host;
1470 offset = bp_vaddr - vma->vm_start;
1471 offset += (vma->vm_pgoff << PAGE_SHIFT);
1472 uprobe = find_uprobe(inode, offset);
1473 }
1474
1475 if (!uprobe)
1476 *is_swbp = is_swbp_at_addr(mm, bp_vaddr);
1477 } else {
1478 *is_swbp = -EFAULT;
1479 }
1480 up_read(&mm->mmap_sem);
1481
1482 return uprobe;
1483}
1484
1482/* 1485/*
1483 * Run handler and ask thread to singlestep. 1486 * Run handler and ask thread to singlestep.
1484 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. 1487 * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
1485 */ 1488 */
1486static void handle_swbp(struct pt_regs *regs) 1489static void handle_swbp(struct pt_regs *regs)
1487{ 1490{
1488 struct vm_area_struct *vma;
1489 struct uprobe_task *utask; 1491 struct uprobe_task *utask;
1490 struct uprobe *uprobe; 1492 struct uprobe *uprobe;
1491 struct mm_struct *mm;
1492 unsigned long bp_vaddr; 1493 unsigned long bp_vaddr;
1494 int uninitialized_var(is_swbp);
1493 1495
1494 uprobe = NULL;
1495 bp_vaddr = uprobe_get_swbp_addr(regs); 1496 bp_vaddr = uprobe_get_swbp_addr(regs);
1496 mm = current->mm; 1497 uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
1497 down_read(&mm->mmap_sem);
1498 vma = find_vma(mm, bp_vaddr);
1499
1500 if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) {
1501 struct inode *inode;
1502 loff_t offset;
1503
1504 inode = vma->vm_file->f_mapping->host;
1505 offset = bp_vaddr - vma->vm_start;
1506 offset += (vma->vm_pgoff << PAGE_SHIFT);
1507 uprobe = find_uprobe(inode, offset);
1508 }
1509
1510 srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id);
1511 current->uprobe_srcu_id = -1;
1512 up_read(&mm->mmap_sem);
1513 1498
1514 if (!uprobe) { 1499 if (!uprobe) {
1515 /* No matching uprobe; signal SIGTRAP. */ 1500 if (is_swbp > 0) {
1516 send_sig(SIGTRAP, current, 0); 1501 /* No matching uprobe; signal SIGTRAP. */
1502 send_sig(SIGTRAP, current, 0);
1503 } else {
1504 /*
1505 * Either we raced with uprobe_unregister() or we can't
1506 * access this memory. The latter is only possible if
1507 * another thread plays with our ->mm. In both cases
1508 * we can simply restart. If this vma was unmapped we
1509 * can pretend this insn was not executed yet and get
1510 * the (correct) SIGSEGV after restart.
1511 */
1512 instruction_pointer_set(regs, bp_vaddr);
1513 }
1517 return; 1514 return;
1518 } 1515 }
1519 1516
@@ -1620,7 +1617,6 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs)
1620 utask->state = UTASK_BP_HIT; 1617 utask->state = UTASK_BP_HIT;
1621 1618
1622 set_thread_flag(TIF_UPROBE); 1619 set_thread_flag(TIF_UPROBE);
1623 current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu);
1624 1620
1625 return 1; 1621 return 1;
1626} 1622}
@@ -1655,7 +1651,6 @@ static int __init init_uprobes(void)
1655 mutex_init(&uprobes_mutex[i]); 1651 mutex_init(&uprobes_mutex[i]);
1656 mutex_init(&uprobes_mmap_mutex[i]); 1652 mutex_init(&uprobes_mmap_mutex[i]);
1657 } 1653 }
1658 init_srcu_struct(&uprobes_srcu);
1659 1654
1660 return register_die_notifier(&uprobe_exception_nb); 1655 return register_die_notifier(&uprobe_exception_nb);
1661} 1656}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a008663d86c8..b4f20fba09fc 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -312,7 +312,7 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list,
312 312
313static int __register_ftrace_function(struct ftrace_ops *ops) 313static int __register_ftrace_function(struct ftrace_ops *ops)
314{ 314{
315 if (ftrace_disabled) 315 if (unlikely(ftrace_disabled))
316 return -ENODEV; 316 return -ENODEV;
317 317
318 if (FTRACE_WARN_ON(ops == &global_ops)) 318 if (FTRACE_WARN_ON(ops == &global_ops))
@@ -4299,16 +4299,12 @@ int register_ftrace_function(struct ftrace_ops *ops)
4299 4299
4300 mutex_lock(&ftrace_lock); 4300 mutex_lock(&ftrace_lock);
4301 4301
4302 if (unlikely(ftrace_disabled))
4303 goto out_unlock;
4304
4305 ret = __register_ftrace_function(ops); 4302 ret = __register_ftrace_function(ops);
4306 if (!ret) 4303 if (!ret)
4307 ret = ftrace_startup(ops, 0); 4304 ret = ftrace_startup(ops, 0);
4308 4305
4309
4310 out_unlock:
4311 mutex_unlock(&ftrace_lock); 4306 mutex_unlock(&ftrace_lock);
4307
4312 return ret; 4308 return ret;
4313} 4309}
4314EXPORT_SYMBOL_GPL(register_ftrace_function); 4310EXPORT_SYMBOL_GPL(register_ftrace_function);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f765465bffe4..49491fa7daa2 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3239,6 +3239,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
3239 if (cpu_buffer->commit_page == cpu_buffer->reader_page) 3239 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
3240 goto out; 3240 goto out;
3241 3241
3242 /* Don't bother swapping if the ring buffer is empty */
3243 if (rb_num_of_entries(cpu_buffer) == 0)
3244 goto out;
3245
3242 /* 3246 /*
3243 * Reset the reader page to size zero. 3247 * Reset the reader page to size zero.
3244 */ 3248 */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a7fa0702be1c..a120f98c4112 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -830,6 +830,8 @@ int register_tracer(struct tracer *type)
830 current_trace = saved_tracer; 830 current_trace = saved_tracer;
831 if (ret) { 831 if (ret) {
832 printk(KERN_CONT "FAILED!\n"); 832 printk(KERN_CONT "FAILED!\n");
833 /* Add the warning after printing 'FAILED' */
834 WARN_ON(1);
833 goto out; 835 goto out;
834 } 836 }
835 /* Only reset on passing, to avoid touching corrupted buffers */ 837 /* Only reset on passing, to avoid touching corrupted buffers */
@@ -1708,9 +1710,11 @@ EXPORT_SYMBOL_GPL(trace_vprintk);
1708 1710
1709static void trace_iterator_increment(struct trace_iterator *iter) 1711static void trace_iterator_increment(struct trace_iterator *iter)
1710{ 1712{
1713 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
1714
1711 iter->idx++; 1715 iter->idx++;
1712 if (iter->buffer_iter[iter->cpu]) 1716 if (buf_iter)
1713 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL); 1717 ring_buffer_read(buf_iter, NULL);
1714} 1718}
1715 1719
1716static struct trace_entry * 1720static struct trace_entry *
@@ -1718,7 +1722,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
1718 unsigned long *lost_events) 1722 unsigned long *lost_events)
1719{ 1723{
1720 struct ring_buffer_event *event; 1724 struct ring_buffer_event *event;
1721 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; 1725 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
1722 1726
1723 if (buf_iter) 1727 if (buf_iter)
1724 event = ring_buffer_iter_peek(buf_iter, ts); 1728 event = ring_buffer_iter_peek(buf_iter, ts);
@@ -1856,10 +1860,10 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1856 1860
1857 tr->data[cpu]->skipped_entries = 0; 1861 tr->data[cpu]->skipped_entries = 0;
1858 1862
1859 if (!iter->buffer_iter[cpu]) 1863 buf_iter = trace_buffer_iter(iter, cpu);
1864 if (!buf_iter)
1860 return; 1865 return;
1861 1866
1862 buf_iter = iter->buffer_iter[cpu];
1863 ring_buffer_iter_reset(buf_iter); 1867 ring_buffer_iter_reset(buf_iter);
1864 1868
1865 /* 1869 /*
@@ -2205,13 +2209,15 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2205 2209
2206int trace_empty(struct trace_iterator *iter) 2210int trace_empty(struct trace_iterator *iter)
2207{ 2211{
2212 struct ring_buffer_iter *buf_iter;
2208 int cpu; 2213 int cpu;
2209 2214
2210 /* If we are looking at one CPU buffer, only check that one */ 2215 /* If we are looking at one CPU buffer, only check that one */
2211 if (iter->cpu_file != TRACE_PIPE_ALL_CPU) { 2216 if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
2212 cpu = iter->cpu_file; 2217 cpu = iter->cpu_file;
2213 if (iter->buffer_iter[cpu]) { 2218 buf_iter = trace_buffer_iter(iter, cpu);
2214 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) 2219 if (buf_iter) {
2220 if (!ring_buffer_iter_empty(buf_iter))
2215 return 0; 2221 return 0;
2216 } else { 2222 } else {
2217 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) 2223 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
@@ -2221,8 +2227,9 @@ int trace_empty(struct trace_iterator *iter)
2221 } 2227 }
2222 2228
2223 for_each_tracing_cpu(cpu) { 2229 for_each_tracing_cpu(cpu) {
2224 if (iter->buffer_iter[cpu]) { 2230 buf_iter = trace_buffer_iter(iter, cpu);
2225 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu])) 2231 if (buf_iter) {
2232 if (!ring_buffer_iter_empty(buf_iter))
2226 return 0; 2233 return 0;
2227 } else { 2234 } else {
2228 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu)) 2235 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
@@ -2381,6 +2388,11 @@ __tracing_open(struct inode *inode, struct file *file)
2381 if (!iter) 2388 if (!iter)
2382 return ERR_PTR(-ENOMEM); 2389 return ERR_PTR(-ENOMEM);
2383 2390
2391 iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2392 GFP_KERNEL);
2393 if (!iter->buffer_iter)
2394 goto release;
2395
2384 /* 2396 /*
2385 * We make a copy of the current tracer to avoid concurrent 2397 * We make a copy of the current tracer to avoid concurrent
2386 * changes on it while we are reading. 2398 * changes on it while we are reading.
@@ -2441,6 +2453,8 @@ __tracing_open(struct inode *inode, struct file *file)
2441 fail: 2453 fail:
2442 mutex_unlock(&trace_types_lock); 2454 mutex_unlock(&trace_types_lock);
2443 kfree(iter->trace); 2455 kfree(iter->trace);
2456 kfree(iter->buffer_iter);
2457release:
2444 seq_release_private(inode, file); 2458 seq_release_private(inode, file);
2445 return ERR_PTR(-ENOMEM); 2459 return ERR_PTR(-ENOMEM);
2446} 2460}
@@ -2481,6 +2495,7 @@ static int tracing_release(struct inode *inode, struct file *file)
2481 mutex_destroy(&iter->mutex); 2495 mutex_destroy(&iter->mutex);
2482 free_cpumask_var(iter->started); 2496 free_cpumask_var(iter->started);
2483 kfree(iter->trace); 2497 kfree(iter->trace);
2498 kfree(iter->buffer_iter);
2484 seq_release_private(inode, file); 2499 seq_release_private(inode, file);
2485 return 0; 2500 return 0;
2486} 2501}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5aec220d2de0..55e1f7f0db12 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -317,6 +317,14 @@ struct tracer {
317 317
318#define TRACE_PIPE_ALL_CPU -1 318#define TRACE_PIPE_ALL_CPU -1
319 319
320static inline struct ring_buffer_iter *
321trace_buffer_iter(struct trace_iterator *iter, int cpu)
322{
323 if (iter->buffer_iter && iter->buffer_iter[cpu])
324 return iter->buffer_iter[cpu];
325 return NULL;
326}
327
320int tracer_init(struct tracer *t, struct trace_array *tr); 328int tracer_init(struct tracer *t, struct trace_array *tr);
321int tracing_is_enabled(void); 329int tracing_is_enabled(void);
322void trace_wake_up(void); 330void trace_wake_up(void);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index a7d2a4c653d8..ce27c8ba8d31 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -538,7 +538,7 @@ get_return_for_leaf(struct trace_iterator *iter,
538 next = &data->ret; 538 next = &data->ret;
539 } else { 539 } else {
540 540
541 ring_iter = iter->buffer_iter[iter->cpu]; 541 ring_iter = trace_buffer_iter(iter, iter->cpu);
542 542
543 /* First peek to compare current entry and the next one */ 543 /* First peek to compare current entry and the next one */
544 if (ring_iter) 544 if (ring_iter)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index df611a0e76c5..123b189c732c 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1325,4 +1325,4 @@ __init static int init_events(void)
1325 1325
1326 return 0; 1326 return 0;
1327} 1327}
1328device_initcall(init_events); 1328early_initcall(init_events);