aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/exit.c7
-rw-r--r--kernel/extable.c16
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/futex.c290
-rw-r--r--kernel/kthread.c3
-rw-r--r--kernel/lockdep.c37
-rw-r--r--kernel/lockdep_proc.c28
-rw-r--r--kernel/marker.c192
-rw-r--r--kernel/module.c11
-rw-r--r--kernel/mutex.c10
-rw-r--r--kernel/notifier.c8
-rw-r--r--kernel/posix-cpu-timers.c10
-rw-r--r--kernel/power/disk.c13
-rw-r--r--kernel/power/main.c5
-rw-r--r--kernel/profile.c2
-rw-r--r--kernel/rcuclassic.c4
-rw-r--r--kernel/sched.c10
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/softlockup.c2
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/sysctl.c10
-rw-r--r--kernel/trace/Kconfig76
-rw-r--r--kernel/trace/Makefile7
-rw-r--r--kernel/trace/ftrace.c359
-rw-r--r--kernel/trace/ring_buffer.c377
-rw-r--r--kernel/trace/trace.c793
-rw-r--r--kernel/trace/trace.h192
-rw-r--r--kernel/trace/trace_boot.c166
-rw-r--r--kernel/trace/trace_branch.c341
-rw-r--r--kernel/trace/trace_functions.c18
-rw-r--r--kernel/trace/trace_functions_return.c98
-rw-r--r--kernel/trace/trace_irqsoff.c61
-rw-r--r--kernel/trace/trace_mmiotrace.c41
-rw-r--r--kernel/trace/trace_nop.c65
-rw-r--r--kernel/trace/trace_sched_switch.c106
-rw-r--r--kernel/trace/trace_sched_wakeup.c70
-rw-r--r--kernel/trace/trace_selftest.c173
-rw-r--r--kernel/trace/trace_stack.c32
-rw-r--r--kernel/trace/trace_sysprof.c19
-rw-r--r--kernel/tracepoint.c295
41 files changed, 2938 insertions, 1022 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 6a212b842d86..010ccb311166 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -20,6 +20,10 @@ CFLAGS_REMOVE_rtmutex-debug.o = -pg
20CFLAGS_REMOVE_cgroup-debug.o = -pg 20CFLAGS_REMOVE_cgroup-debug.o = -pg
21CFLAGS_REMOVE_sched_clock.o = -pg 21CFLAGS_REMOVE_sched_clock.o = -pg
22endif 22endif
23ifdef CONFIG_FUNCTION_RET_TRACER
24CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address()
25CFLAGS_REMOVE_module.o = -pg # For __module_text_address()
26endif
23 27
24obj-$(CONFIG_FREEZER) += freezer.o 28obj-$(CONFIG_FREEZER) += freezer.o
25obj-$(CONFIG_PROFILING) += profile.o 29obj-$(CONFIG_PROFILING) += profile.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 2d8be7ebb0f7..61ba5b4b10cf 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -53,6 +53,10 @@
53#include <asm/pgtable.h> 53#include <asm/pgtable.h>
54#include <asm/mmu_context.h> 54#include <asm/mmu_context.h>
55 55
56DEFINE_TRACE(sched_process_free);
57DEFINE_TRACE(sched_process_exit);
58DEFINE_TRACE(sched_process_wait);
59
56static void exit_mm(struct task_struct * tsk); 60static void exit_mm(struct task_struct * tsk);
57 61
58static inline int task_detached(struct task_struct *p) 62static inline int task_detached(struct task_struct *p)
@@ -1123,7 +1127,6 @@ NORET_TYPE void do_exit(long code)
1123 preempt_disable(); 1127 preempt_disable();
1124 /* causes final put_task_struct in finish_task_switch(). */ 1128 /* causes final put_task_struct in finish_task_switch(). */
1125 tsk->state = TASK_DEAD; 1129 tsk->state = TASK_DEAD;
1126
1127 schedule(); 1130 schedule();
1128 BUG(); 1131 BUG();
1129 /* Avoid "noreturn function does return". */ 1132 /* Avoid "noreturn function does return". */
@@ -1321,10 +1324,10 @@ static int wait_task_zombie(struct task_struct *p, int options,
1321 * group, which consolidates times for all threads in the 1324 * group, which consolidates times for all threads in the
1322 * group including the group leader. 1325 * group including the group leader.
1323 */ 1326 */
1327 thread_group_cputime(p, &cputime);
1324 spin_lock_irq(&p->parent->sighand->siglock); 1328 spin_lock_irq(&p->parent->sighand->siglock);
1325 psig = p->parent->signal; 1329 psig = p->parent->signal;
1326 sig = p->signal; 1330 sig = p->signal;
1327 thread_group_cputime(p, &cputime);
1328 psig->cutime = 1331 psig->cutime =
1329 cputime_add(psig->cutime, 1332 cputime_add(psig->cutime,
1330 cputime_add(cputime.utime, 1333 cputime_add(cputime.utime,
diff --git a/kernel/extable.c b/kernel/extable.c
index a26cb2e17023..adf0cc9c02d6 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -66,3 +66,19 @@ int kernel_text_address(unsigned long addr)
66 return 1; 66 return 1;
67 return module_text_address(addr) != NULL; 67 return module_text_address(addr) != NULL;
68} 68}
69
70/*
71 * On some architectures (PPC64, IA64) function pointers
72 * are actually only tokens to some data that then holds the
73 * real function address. As a result, to find if a function
74 * pointer is part of the kernel text, we need to do some
75 * special dereferencing first.
76 */
77int func_ptr_is_kernel_text(void *ptr)
78{
79 unsigned long addr;
80 addr = (unsigned long) dereference_function_descriptor(ptr);
81 if (core_kernel_text(addr))
82 return 1;
83 return module_text_address(addr) != NULL;
84}
diff --git a/kernel/fork.c b/kernel/fork.c
index 2a372a0e206f..d6e1a3205f62 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -47,6 +47,7 @@
47#include <linux/mount.h> 47#include <linux/mount.h>
48#include <linux/audit.h> 48#include <linux/audit.h>
49#include <linux/memcontrol.h> 49#include <linux/memcontrol.h>
50#include <linux/ftrace.h>
50#include <linux/profile.h> 51#include <linux/profile.h>
51#include <linux/rmap.h> 52#include <linux/rmap.h>
52#include <linux/acct.h> 53#include <linux/acct.h>
@@ -80,6 +81,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
80 81
81__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 82__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
82 83
84DEFINE_TRACE(sched_process_fork);
85
83int nr_processes(void) 86int nr_processes(void)
84{ 87{
85 int cpu; 88 int cpu;
@@ -137,6 +140,7 @@ void free_task(struct task_struct *tsk)
137 prop_local_destroy_single(&tsk->dirties); 140 prop_local_destroy_single(&tsk->dirties);
138 free_thread_info(tsk->stack); 141 free_thread_info(tsk->stack);
139 rt_mutex_debug_task_free(tsk); 142 rt_mutex_debug_task_free(tsk);
143 ftrace_retfunc_exit_task(tsk);
140 free_task_struct(tsk); 144 free_task_struct(tsk);
141} 145}
142EXPORT_SYMBOL(free_task); 146EXPORT_SYMBOL(free_task);
@@ -1267,6 +1271,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1267 total_forks++; 1271 total_forks++;
1268 spin_unlock(&current->sighand->siglock); 1272 spin_unlock(&current->sighand->siglock);
1269 write_unlock_irq(&tasklist_lock); 1273 write_unlock_irq(&tasklist_lock);
1274 ftrace_retfunc_init_task(p);
1270 proc_fork_connector(p); 1275 proc_fork_connector(p);
1271 cgroup_post_fork(p); 1276 cgroup_post_fork(p);
1272 return p; 1277 return p;
diff --git a/kernel/futex.c b/kernel/futex.c
index 8af10027514b..e10c5c8786a6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -123,24 +123,6 @@ struct futex_hash_bucket {
123static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; 123static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
124 124
125/* 125/*
126 * Take mm->mmap_sem, when futex is shared
127 */
128static inline void futex_lock_mm(struct rw_semaphore *fshared)
129{
130 if (fshared)
131 down_read(fshared);
132}
133
134/*
135 * Release mm->mmap_sem, when the futex is shared
136 */
137static inline void futex_unlock_mm(struct rw_semaphore *fshared)
138{
139 if (fshared)
140 up_read(fshared);
141}
142
143/*
144 * We hash on the keys returned from get_futex_key (see below). 126 * We hash on the keys returned from get_futex_key (see below).
145 */ 127 */
146static struct futex_hash_bucket *hash_futex(union futex_key *key) 128static struct futex_hash_bucket *hash_futex(union futex_key *key)
@@ -161,6 +143,45 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
161 && key1->both.offset == key2->both.offset); 143 && key1->both.offset == key2->both.offset);
162} 144}
163 145
146/*
147 * Take a reference to the resource addressed by a key.
148 * Can be called while holding spinlocks.
149 *
150 */
151static void get_futex_key_refs(union futex_key *key)
152{
153 if (!key->both.ptr)
154 return;
155
156 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
157 case FUT_OFF_INODE:
158 atomic_inc(&key->shared.inode->i_count);
159 break;
160 case FUT_OFF_MMSHARED:
161 atomic_inc(&key->private.mm->mm_count);
162 break;
163 }
164}
165
166/*
167 * Drop a reference to the resource addressed by a key.
168 * The hash bucket spinlock must not be held.
169 */
170static void drop_futex_key_refs(union futex_key *key)
171{
172 if (!key->both.ptr)
173 return;
174
175 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
176 case FUT_OFF_INODE:
177 iput(key->shared.inode);
178 break;
179 case FUT_OFF_MMSHARED:
180 mmdrop(key->private.mm);
181 break;
182 }
183}
184
164/** 185/**
165 * get_futex_key - Get parameters which are the keys for a futex. 186 * get_futex_key - Get parameters which are the keys for a futex.
166 * @uaddr: virtual address of the futex 187 * @uaddr: virtual address of the futex
@@ -179,12 +200,10 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
179 * For other futexes, it points to &current->mm->mmap_sem and 200 * For other futexes, it points to &current->mm->mmap_sem and
180 * caller must have taken the reader lock. but NOT any spinlocks. 201 * caller must have taken the reader lock. but NOT any spinlocks.
181 */ 202 */
182static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, 203static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
183 union futex_key *key)
184{ 204{
185 unsigned long address = (unsigned long)uaddr; 205 unsigned long address = (unsigned long)uaddr;
186 struct mm_struct *mm = current->mm; 206 struct mm_struct *mm = current->mm;
187 struct vm_area_struct *vma;
188 struct page *page; 207 struct page *page;
189 int err; 208 int err;
190 209
@@ -208,100 +227,50 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
208 return -EFAULT; 227 return -EFAULT;
209 key->private.mm = mm; 228 key->private.mm = mm;
210 key->private.address = address; 229 key->private.address = address;
230 get_futex_key_refs(key);
211 return 0; 231 return 0;
212 } 232 }
213 /*
214 * The futex is hashed differently depending on whether
215 * it's in a shared or private mapping. So check vma first.
216 */
217 vma = find_extend_vma(mm, address);
218 if (unlikely(!vma))
219 return -EFAULT;
220 233
221 /* 234again:
222 * Permissions. 235 err = get_user_pages_fast(address, 1, 0, &page);
223 */ 236 if (err < 0)
224 if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) 237 return err;
225 return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; 238
239 lock_page(page);
240 if (!page->mapping) {
241 unlock_page(page);
242 put_page(page);
243 goto again;
244 }
226 245
227 /* 246 /*
228 * Private mappings are handled in a simple way. 247 * Private mappings are handled in a simple way.
229 * 248 *
230 * NOTE: When userspace waits on a MAP_SHARED mapping, even if 249 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
231 * it's a read-only handle, it's expected that futexes attach to 250 * it's a read-only handle, it's expected that futexes attach to
232 * the object not the particular process. Therefore we use 251 * the object not the particular process.
233 * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
234 * mappings of _writable_ handles.
235 */ 252 */
236 if (likely(!(vma->vm_flags & VM_MAYSHARE))) { 253 if (PageAnon(page)) {
237 key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */ 254 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
238 key->private.mm = mm; 255 key->private.mm = mm;
239 key->private.address = address; 256 key->private.address = address;
240 return 0; 257 } else {
258 key->both.offset |= FUT_OFF_INODE; /* inode-based key */
259 key->shared.inode = page->mapping->host;
260 key->shared.pgoff = page->index;
241 } 261 }
242 262
243 /* 263 get_futex_key_refs(key);
244 * Linear file mappings are also simple.
245 */
246 key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
247 key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
248 if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
249 key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
250 + vma->vm_pgoff);
251 return 0;
252 }
253 264
254 /* 265 unlock_page(page);
255 * We could walk the page table to read the non-linear 266 put_page(page);
256 * pte, and get the page index without fetching the page 267 return 0;
257 * from swap. But that's a lot of code to duplicate here
258 * for a rare case, so we simply fetch the page.
259 */
260 err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
261 if (err >= 0) {
262 key->shared.pgoff =
263 page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
264 put_page(page);
265 return 0;
266 }
267 return err;
268}
269
270/*
271 * Take a reference to the resource addressed by a key.
272 * Can be called while holding spinlocks.
273 *
274 */
275static void get_futex_key_refs(union futex_key *key)
276{
277 if (key->both.ptr == NULL)
278 return;
279 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
280 case FUT_OFF_INODE:
281 atomic_inc(&key->shared.inode->i_count);
282 break;
283 case FUT_OFF_MMSHARED:
284 atomic_inc(&key->private.mm->mm_count);
285 break;
286 }
287} 268}
288 269
289/* 270static inline
290 * Drop a reference to the resource addressed by a key. 271void put_futex_key(int fshared, union futex_key *key)
291 * The hash bucket spinlock must not be held.
292 */
293static void drop_futex_key_refs(union futex_key *key)
294{ 272{
295 if (!key->both.ptr) 273 drop_futex_key_refs(key);
296 return;
297 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
298 case FUT_OFF_INODE:
299 iput(key->shared.inode);
300 break;
301 case FUT_OFF_MMSHARED:
302 mmdrop(key->private.mm);
303 break;
304 }
305} 274}
306 275
307static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) 276static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@ -328,10 +297,8 @@ static int get_futex_value_locked(u32 *dest, u32 __user *from)
328 297
329/* 298/*
330 * Fault handling. 299 * Fault handling.
331 * if fshared is non NULL, current->mm->mmap_sem is already held
332 */ 300 */
333static int futex_handle_fault(unsigned long address, 301static int futex_handle_fault(unsigned long address, int attempt)
334 struct rw_semaphore *fshared, int attempt)
335{ 302{
336 struct vm_area_struct * vma; 303 struct vm_area_struct * vma;
337 struct mm_struct *mm = current->mm; 304 struct mm_struct *mm = current->mm;
@@ -340,8 +307,7 @@ static int futex_handle_fault(unsigned long address,
340 if (attempt > 2) 307 if (attempt > 2)
341 return ret; 308 return ret;
342 309
343 if (!fshared) 310 down_read(&mm->mmap_sem);
344 down_read(&mm->mmap_sem);
345 vma = find_vma(mm, address); 311 vma = find_vma(mm, address);
346 if (vma && address >= vma->vm_start && 312 if (vma && address >= vma->vm_start &&
347 (vma->vm_flags & VM_WRITE)) { 313 (vma->vm_flags & VM_WRITE)) {
@@ -361,8 +327,7 @@ static int futex_handle_fault(unsigned long address,
361 current->min_flt++; 327 current->min_flt++;
362 } 328 }
363 } 329 }
364 if (!fshared) 330 up_read(&mm->mmap_sem);
365 up_read(&mm->mmap_sem);
366 return ret; 331 return ret;
367} 332}
368 333
@@ -385,6 +350,7 @@ static int refill_pi_state_cache(void)
385 /* pi_mutex gets initialized later */ 350 /* pi_mutex gets initialized later */
386 pi_state->owner = NULL; 351 pi_state->owner = NULL;
387 atomic_set(&pi_state->refcount, 1); 352 atomic_set(&pi_state->refcount, 1);
353 pi_state->key = FUTEX_KEY_INIT;
388 354
389 current->pi_state_cache = pi_state; 355 current->pi_state_cache = pi_state;
390 356
@@ -462,7 +428,7 @@ void exit_pi_state_list(struct task_struct *curr)
462 struct list_head *next, *head = &curr->pi_state_list; 428 struct list_head *next, *head = &curr->pi_state_list;
463 struct futex_pi_state *pi_state; 429 struct futex_pi_state *pi_state;
464 struct futex_hash_bucket *hb; 430 struct futex_hash_bucket *hb;
465 union futex_key key; 431 union futex_key key = FUTEX_KEY_INIT;
466 432
467 if (!futex_cmpxchg_enabled) 433 if (!futex_cmpxchg_enabled)
468 return; 434 return;
@@ -719,20 +685,17 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
719 * Wake up all waiters hashed on the physical page that is mapped 685 * Wake up all waiters hashed on the physical page that is mapped
720 * to this virtual address: 686 * to this virtual address:
721 */ 687 */
722static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, 688static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
723 int nr_wake, u32 bitset)
724{ 689{
725 struct futex_hash_bucket *hb; 690 struct futex_hash_bucket *hb;
726 struct futex_q *this, *next; 691 struct futex_q *this, *next;
727 struct plist_head *head; 692 struct plist_head *head;
728 union futex_key key; 693 union futex_key key = FUTEX_KEY_INIT;
729 int ret; 694 int ret;
730 695
731 if (!bitset) 696 if (!bitset)
732 return -EINVAL; 697 return -EINVAL;
733 698
734 futex_lock_mm(fshared);
735
736 ret = get_futex_key(uaddr, fshared, &key); 699 ret = get_futex_key(uaddr, fshared, &key);
737 if (unlikely(ret != 0)) 700 if (unlikely(ret != 0))
738 goto out; 701 goto out;
@@ -760,7 +723,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
760 723
761 spin_unlock(&hb->lock); 724 spin_unlock(&hb->lock);
762out: 725out:
763 futex_unlock_mm(fshared); 726 put_futex_key(fshared, &key);
764 return ret; 727 return ret;
765} 728}
766 729
@@ -769,19 +732,16 @@ out:
769 * to this virtual address: 732 * to this virtual address:
770 */ 733 */
771static int 734static int
772futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, 735futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
773 u32 __user *uaddr2,
774 int nr_wake, int nr_wake2, int op) 736 int nr_wake, int nr_wake2, int op)
775{ 737{
776 union futex_key key1, key2; 738 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
777 struct futex_hash_bucket *hb1, *hb2; 739 struct futex_hash_bucket *hb1, *hb2;
778 struct plist_head *head; 740 struct plist_head *head;
779 struct futex_q *this, *next; 741 struct futex_q *this, *next;
780 int ret, op_ret, attempt = 0; 742 int ret, op_ret, attempt = 0;
781 743
782retryfull: 744retryfull:
783 futex_lock_mm(fshared);
784
785 ret = get_futex_key(uaddr1, fshared, &key1); 745 ret = get_futex_key(uaddr1, fshared, &key1);
786 if (unlikely(ret != 0)) 746 if (unlikely(ret != 0))
787 goto out; 747 goto out;
@@ -826,18 +786,12 @@ retry:
826 */ 786 */
827 if (attempt++) { 787 if (attempt++) {
828 ret = futex_handle_fault((unsigned long)uaddr2, 788 ret = futex_handle_fault((unsigned long)uaddr2,
829 fshared, attempt); 789 attempt);
830 if (ret) 790 if (ret)
831 goto out; 791 goto out;
832 goto retry; 792 goto retry;
833 } 793 }
834 794
835 /*
836 * If we would have faulted, release mmap_sem,
837 * fault it in and start all over again.
838 */
839 futex_unlock_mm(fshared);
840
841 ret = get_user(dummy, uaddr2); 795 ret = get_user(dummy, uaddr2);
842 if (ret) 796 if (ret)
843 return ret; 797 return ret;
@@ -873,7 +827,8 @@ retry:
873 if (hb1 != hb2) 827 if (hb1 != hb2)
874 spin_unlock(&hb2->lock); 828 spin_unlock(&hb2->lock);
875out: 829out:
876 futex_unlock_mm(fshared); 830 put_futex_key(fshared, &key2);
831 put_futex_key(fshared, &key1);
877 832
878 return ret; 833 return ret;
879} 834}
@@ -882,19 +837,16 @@ out:
882 * Requeue all waiters hashed on one physical page to another 837 * Requeue all waiters hashed on one physical page to another
883 * physical page. 838 * physical page.
884 */ 839 */
885static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, 840static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
886 u32 __user *uaddr2,
887 int nr_wake, int nr_requeue, u32 *cmpval) 841 int nr_wake, int nr_requeue, u32 *cmpval)
888{ 842{
889 union futex_key key1, key2; 843 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
890 struct futex_hash_bucket *hb1, *hb2; 844 struct futex_hash_bucket *hb1, *hb2;
891 struct plist_head *head1; 845 struct plist_head *head1;
892 struct futex_q *this, *next; 846 struct futex_q *this, *next;
893 int ret, drop_count = 0; 847 int ret, drop_count = 0;
894 848
895 retry: 849 retry:
896 futex_lock_mm(fshared);
897
898 ret = get_futex_key(uaddr1, fshared, &key1); 850 ret = get_futex_key(uaddr1, fshared, &key1);
899 if (unlikely(ret != 0)) 851 if (unlikely(ret != 0))
900 goto out; 852 goto out;
@@ -917,12 +869,6 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
917 if (hb1 != hb2) 869 if (hb1 != hb2)
918 spin_unlock(&hb2->lock); 870 spin_unlock(&hb2->lock);
919 871
920 /*
921 * If we would have faulted, release mmap_sem, fault
922 * it in and start all over again.
923 */
924 futex_unlock_mm(fshared);
925
926 ret = get_user(curval, uaddr1); 872 ret = get_user(curval, uaddr1);
927 873
928 if (!ret) 874 if (!ret)
@@ -974,7 +920,8 @@ out_unlock:
974 drop_futex_key_refs(&key1); 920 drop_futex_key_refs(&key1);
975 921
976out: 922out:
977 futex_unlock_mm(fshared); 923 put_futex_key(fshared, &key2);
924 put_futex_key(fshared, &key1);
978 return ret; 925 return ret;
979} 926}
980 927
@@ -1096,8 +1043,7 @@ static void unqueue_me_pi(struct futex_q *q)
1096 * private futexes. 1043 * private futexes.
1097 */ 1044 */
1098static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, 1045static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1099 struct task_struct *newowner, 1046 struct task_struct *newowner, int fshared)
1100 struct rw_semaphore *fshared)
1101{ 1047{
1102 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; 1048 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1103 struct futex_pi_state *pi_state = q->pi_state; 1049 struct futex_pi_state *pi_state = q->pi_state;
@@ -1176,7 +1122,7 @@ retry:
1176handle_fault: 1122handle_fault:
1177 spin_unlock(q->lock_ptr); 1123 spin_unlock(q->lock_ptr);
1178 1124
1179 ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++); 1125 ret = futex_handle_fault((unsigned long)uaddr, attempt++);
1180 1126
1181 spin_lock(q->lock_ptr); 1127 spin_lock(q->lock_ptr);
1182 1128
@@ -1200,7 +1146,7 @@ handle_fault:
1200 1146
1201static long futex_wait_restart(struct restart_block *restart); 1147static long futex_wait_restart(struct restart_block *restart);
1202 1148
1203static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, 1149static int futex_wait(u32 __user *uaddr, int fshared,
1204 u32 val, ktime_t *abs_time, u32 bitset) 1150 u32 val, ktime_t *abs_time, u32 bitset)
1205{ 1151{
1206 struct task_struct *curr = current; 1152 struct task_struct *curr = current;
@@ -1218,8 +1164,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1218 q.pi_state = NULL; 1164 q.pi_state = NULL;
1219 q.bitset = bitset; 1165 q.bitset = bitset;
1220 retry: 1166 retry:
1221 futex_lock_mm(fshared); 1167 q.key = FUTEX_KEY_INIT;
1222
1223 ret = get_futex_key(uaddr, fshared, &q.key); 1168 ret = get_futex_key(uaddr, fshared, &q.key);
1224 if (unlikely(ret != 0)) 1169 if (unlikely(ret != 0))
1225 goto out_release_sem; 1170 goto out_release_sem;
@@ -1251,12 +1196,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1251 if (unlikely(ret)) { 1196 if (unlikely(ret)) {
1252 queue_unlock(&q, hb); 1197 queue_unlock(&q, hb);
1253 1198
1254 /*
1255 * If we would have faulted, release mmap_sem, fault it in and
1256 * start all over again.
1257 */
1258 futex_unlock_mm(fshared);
1259
1260 ret = get_user(uval, uaddr); 1199 ret = get_user(uval, uaddr);
1261 1200
1262 if (!ret) 1201 if (!ret)
@@ -1271,12 +1210,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1271 queue_me(&q, hb); 1210 queue_me(&q, hb);
1272 1211
1273 /* 1212 /*
1274 * Now the futex is queued and we have checked the data, we
1275 * don't want to hold mmap_sem while we sleep.
1276 */
1277 futex_unlock_mm(fshared);
1278
1279 /*
1280 * There might have been scheduling since the queue_me(), as we 1213 * There might have been scheduling since the queue_me(), as we
1281 * cannot hold a spinlock across the get_user() in case it 1214 * cannot hold a spinlock across the get_user() in case it
1282 * faults, and we cannot just set TASK_INTERRUPTIBLE state when 1215 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
@@ -1363,7 +1296,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1363 queue_unlock(&q, hb); 1296 queue_unlock(&q, hb);
1364 1297
1365 out_release_sem: 1298 out_release_sem:
1366 futex_unlock_mm(fshared); 1299 put_futex_key(fshared, &q.key);
1367 return ret; 1300 return ret;
1368} 1301}
1369 1302
@@ -1371,13 +1304,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1371static long futex_wait_restart(struct restart_block *restart) 1304static long futex_wait_restart(struct restart_block *restart)
1372{ 1305{
1373 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; 1306 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
1374 struct rw_semaphore *fshared = NULL; 1307 int fshared = 0;
1375 ktime_t t; 1308 ktime_t t;
1376 1309
1377 t.tv64 = restart->futex.time; 1310 t.tv64 = restart->futex.time;
1378 restart->fn = do_no_restart_syscall; 1311 restart->fn = do_no_restart_syscall;
1379 if (restart->futex.flags & FLAGS_SHARED) 1312 if (restart->futex.flags & FLAGS_SHARED)
1380 fshared = &current->mm->mmap_sem; 1313 fshared = 1;
1381 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, 1314 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
1382 restart->futex.bitset); 1315 restart->futex.bitset);
1383} 1316}
@@ -1389,7 +1322,7 @@ static long futex_wait_restart(struct restart_block *restart)
1389 * if there are waiters then it will block, it does PI, etc. (Due to 1322 * if there are waiters then it will block, it does PI, etc. (Due to
1390 * races the kernel might see a 0 value of the futex too.) 1323 * races the kernel might see a 0 value of the futex too.)
1391 */ 1324 */
1392static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, 1325static int futex_lock_pi(u32 __user *uaddr, int fshared,
1393 int detect, ktime_t *time, int trylock) 1326 int detect, ktime_t *time, int trylock)
1394{ 1327{
1395 struct hrtimer_sleeper timeout, *to = NULL; 1328 struct hrtimer_sleeper timeout, *to = NULL;
@@ -1412,8 +1345,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1412 1345
1413 q.pi_state = NULL; 1346 q.pi_state = NULL;
1414 retry: 1347 retry:
1415 futex_lock_mm(fshared); 1348 q.key = FUTEX_KEY_INIT;
1416
1417 ret = get_futex_key(uaddr, fshared, &q.key); 1349 ret = get_futex_key(uaddr, fshared, &q.key);
1418 if (unlikely(ret != 0)) 1350 if (unlikely(ret != 0))
1419 goto out_release_sem; 1351 goto out_release_sem;
@@ -1502,7 +1434,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1502 * exit to complete. 1434 * exit to complete.
1503 */ 1435 */
1504 queue_unlock(&q, hb); 1436 queue_unlock(&q, hb);
1505 futex_unlock_mm(fshared);
1506 cond_resched(); 1437 cond_resched();
1507 goto retry; 1438 goto retry;
1508 1439
@@ -1534,12 +1465,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1534 */ 1465 */
1535 queue_me(&q, hb); 1466 queue_me(&q, hb);
1536 1467
1537 /*
1538 * Now the futex is queued and we have checked the data, we
1539 * don't want to hold mmap_sem while we sleep.
1540 */
1541 futex_unlock_mm(fshared);
1542
1543 WARN_ON(!q.pi_state); 1468 WARN_ON(!q.pi_state);
1544 /* 1469 /*
1545 * Block on the PI mutex: 1470 * Block on the PI mutex:
@@ -1552,7 +1477,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1552 ret = ret ? 0 : -EWOULDBLOCK; 1477 ret = ret ? 0 : -EWOULDBLOCK;
1553 } 1478 }
1554 1479
1555 futex_lock_mm(fshared);
1556 spin_lock(q.lock_ptr); 1480 spin_lock(q.lock_ptr);
1557 1481
1558 if (!ret) { 1482 if (!ret) {
@@ -1618,7 +1542,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1618 1542
1619 /* Unqueue and drop the lock */ 1543 /* Unqueue and drop the lock */
1620 unqueue_me_pi(&q); 1544 unqueue_me_pi(&q);
1621 futex_unlock_mm(fshared);
1622 1545
1623 if (to) 1546 if (to)
1624 destroy_hrtimer_on_stack(&to->timer); 1547 destroy_hrtimer_on_stack(&to->timer);
@@ -1628,7 +1551,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1628 queue_unlock(&q, hb); 1551 queue_unlock(&q, hb);
1629 1552
1630 out_release_sem: 1553 out_release_sem:
1631 futex_unlock_mm(fshared); 1554 put_futex_key(fshared, &q.key);
1632 if (to) 1555 if (to)
1633 destroy_hrtimer_on_stack(&to->timer); 1556 destroy_hrtimer_on_stack(&to->timer);
1634 return ret; 1557 return ret;
@@ -1645,15 +1568,12 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1645 queue_unlock(&q, hb); 1568 queue_unlock(&q, hb);
1646 1569
1647 if (attempt++) { 1570 if (attempt++) {
1648 ret = futex_handle_fault((unsigned long)uaddr, fshared, 1571 ret = futex_handle_fault((unsigned long)uaddr, attempt);
1649 attempt);
1650 if (ret) 1572 if (ret)
1651 goto out_release_sem; 1573 goto out_release_sem;
1652 goto retry_unlocked; 1574 goto retry_unlocked;
1653 } 1575 }
1654 1576
1655 futex_unlock_mm(fshared);
1656
1657 ret = get_user(uval, uaddr); 1577 ret = get_user(uval, uaddr);
1658 if (!ret && (uval != -EFAULT)) 1578 if (!ret && (uval != -EFAULT))
1659 goto retry; 1579 goto retry;
@@ -1668,13 +1588,13 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1668 * This is the in-kernel slowpath: we look up the PI state (if any), 1588 * This is the in-kernel slowpath: we look up the PI state (if any),
1669 * and do the rt-mutex unlock. 1589 * and do the rt-mutex unlock.
1670 */ 1590 */
1671static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared) 1591static int futex_unlock_pi(u32 __user *uaddr, int fshared)
1672{ 1592{
1673 struct futex_hash_bucket *hb; 1593 struct futex_hash_bucket *hb;
1674 struct futex_q *this, *next; 1594 struct futex_q *this, *next;
1675 u32 uval; 1595 u32 uval;
1676 struct plist_head *head; 1596 struct plist_head *head;
1677 union futex_key key; 1597 union futex_key key = FUTEX_KEY_INIT;
1678 int ret, attempt = 0; 1598 int ret, attempt = 0;
1679 1599
1680retry: 1600retry:
@@ -1685,10 +1605,6 @@ retry:
1685 */ 1605 */
1686 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) 1606 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
1687 return -EPERM; 1607 return -EPERM;
1688 /*
1689 * First take all the futex related locks:
1690 */
1691 futex_lock_mm(fshared);
1692 1608
1693 ret = get_futex_key(uaddr, fshared, &key); 1609 ret = get_futex_key(uaddr, fshared, &key);
1694 if (unlikely(ret != 0)) 1610 if (unlikely(ret != 0))
@@ -1747,7 +1663,7 @@ retry_unlocked:
1747out_unlock: 1663out_unlock:
1748 spin_unlock(&hb->lock); 1664 spin_unlock(&hb->lock);
1749out: 1665out:
1750 futex_unlock_mm(fshared); 1666 put_futex_key(fshared, &key);
1751 1667
1752 return ret; 1668 return ret;
1753 1669
@@ -1763,16 +1679,13 @@ pi_faulted:
1763 spin_unlock(&hb->lock); 1679 spin_unlock(&hb->lock);
1764 1680
1765 if (attempt++) { 1681 if (attempt++) {
1766 ret = futex_handle_fault((unsigned long)uaddr, fshared, 1682 ret = futex_handle_fault((unsigned long)uaddr, attempt);
1767 attempt);
1768 if (ret) 1683 if (ret)
1769 goto out; 1684 goto out;
1770 uval = 0; 1685 uval = 0;
1771 goto retry_unlocked; 1686 goto retry_unlocked;
1772 } 1687 }
1773 1688
1774 futex_unlock_mm(fshared);
1775
1776 ret = get_user(uval, uaddr); 1689 ret = get_user(uval, uaddr);
1777 if (!ret && (uval != -EFAULT)) 1690 if (!ret && (uval != -EFAULT))
1778 goto retry; 1691 goto retry;
@@ -1898,8 +1811,7 @@ retry:
1898 * PI futexes happens in exit_pi_state(): 1811 * PI futexes happens in exit_pi_state():
1899 */ 1812 */
1900 if (!pi && (uval & FUTEX_WAITERS)) 1813 if (!pi && (uval & FUTEX_WAITERS))
1901 futex_wake(uaddr, &curr->mm->mmap_sem, 1, 1814 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
1902 FUTEX_BITSET_MATCH_ANY);
1903 } 1815 }
1904 return 0; 1816 return 0;
1905} 1817}
@@ -1995,10 +1907,10 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
1995{ 1907{
1996 int ret = -ENOSYS; 1908 int ret = -ENOSYS;
1997 int cmd = op & FUTEX_CMD_MASK; 1909 int cmd = op & FUTEX_CMD_MASK;
1998 struct rw_semaphore *fshared = NULL; 1910 int fshared = 0;
1999 1911
2000 if (!(op & FUTEX_PRIVATE_FLAG)) 1912 if (!(op & FUTEX_PRIVATE_FLAG))
2001 fshared = &current->mm->mmap_sem; 1913 fshared = 1;
2002 1914
2003 switch (cmd) { 1915 switch (cmd) {
2004 case FUTEX_WAIT: 1916 case FUTEX_WAIT:
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 8e7a7ce3ed0a..4fbc456f393d 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -21,6 +21,9 @@ static DEFINE_SPINLOCK(kthread_create_lock);
21static LIST_HEAD(kthread_create_list); 21static LIST_HEAD(kthread_create_list);
22struct task_struct *kthreadd_task; 22struct task_struct *kthreadd_task;
23 23
24DEFINE_TRACE(sched_kthread_stop);
25DEFINE_TRACE(sched_kthread_stop_ret);
26
24struct kthread_create_info 27struct kthread_create_info
25{ 28{
26 /* Information passed to kthread() from kthreadd. */ 29 /* Information passed to kthread() from kthreadd. */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 06e157119d2b..e4bdda8dcf04 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -136,16 +136,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
136#ifdef CONFIG_LOCK_STAT 136#ifdef CONFIG_LOCK_STAT
137static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); 137static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
138 138
139static int lock_contention_point(struct lock_class *class, unsigned long ip) 139static int lock_point(unsigned long points[], unsigned long ip)
140{ 140{
141 int i; 141 int i;
142 142
143 for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { 143 for (i = 0; i < LOCKSTAT_POINTS; i++) {
144 if (class->contention_point[i] == 0) { 144 if (points[i] == 0) {
145 class->contention_point[i] = ip; 145 points[i] = ip;
146 break; 146 break;
147 } 147 }
148 if (class->contention_point[i] == ip) 148 if (points[i] == ip)
149 break; 149 break;
150 } 150 }
151 151
@@ -185,6 +185,9 @@ struct lock_class_stats lock_stats(struct lock_class *class)
185 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) 185 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
186 stats.contention_point[i] += pcs->contention_point[i]; 186 stats.contention_point[i] += pcs->contention_point[i];
187 187
188 for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
189 stats.contending_point[i] += pcs->contending_point[i];
190
188 lock_time_add(&pcs->read_waittime, &stats.read_waittime); 191 lock_time_add(&pcs->read_waittime, &stats.read_waittime);
189 lock_time_add(&pcs->write_waittime, &stats.write_waittime); 192 lock_time_add(&pcs->write_waittime, &stats.write_waittime);
190 193
@@ -209,6 +212,7 @@ void clear_lock_stats(struct lock_class *class)
209 memset(cpu_stats, 0, sizeof(struct lock_class_stats)); 212 memset(cpu_stats, 0, sizeof(struct lock_class_stats));
210 } 213 }
211 memset(class->contention_point, 0, sizeof(class->contention_point)); 214 memset(class->contention_point, 0, sizeof(class->contention_point));
215 memset(class->contending_point, 0, sizeof(class->contending_point));
212} 216}
213 217
214static struct lock_class_stats *get_lock_stats(struct lock_class *class) 218static struct lock_class_stats *get_lock_stats(struct lock_class *class)
@@ -2999,7 +3003,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
2999 struct held_lock *hlock, *prev_hlock; 3003 struct held_lock *hlock, *prev_hlock;
3000 struct lock_class_stats *stats; 3004 struct lock_class_stats *stats;
3001 unsigned int depth; 3005 unsigned int depth;
3002 int i, point; 3006 int i, contention_point, contending_point;
3003 3007
3004 depth = curr->lockdep_depth; 3008 depth = curr->lockdep_depth;
3005 if (DEBUG_LOCKS_WARN_ON(!depth)) 3009 if (DEBUG_LOCKS_WARN_ON(!depth))
@@ -3023,18 +3027,22 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
3023found_it: 3027found_it:
3024 hlock->waittime_stamp = sched_clock(); 3028 hlock->waittime_stamp = sched_clock();
3025 3029
3026 point = lock_contention_point(hlock_class(hlock), ip); 3030 contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
3031 contending_point = lock_point(hlock_class(hlock)->contending_point,
3032 lock->ip);
3027 3033
3028 stats = get_lock_stats(hlock_class(hlock)); 3034 stats = get_lock_stats(hlock_class(hlock));
3029 if (point < ARRAY_SIZE(stats->contention_point)) 3035 if (contention_point < LOCKSTAT_POINTS)
3030 stats->contention_point[point]++; 3036 stats->contention_point[contention_point]++;
3037 if (contending_point < LOCKSTAT_POINTS)
3038 stats->contending_point[contending_point]++;
3031 if (lock->cpu != smp_processor_id()) 3039 if (lock->cpu != smp_processor_id())
3032 stats->bounces[bounce_contended + !!hlock->read]++; 3040 stats->bounces[bounce_contended + !!hlock->read]++;
3033 put_lock_stats(stats); 3041 put_lock_stats(stats);
3034} 3042}
3035 3043
3036static void 3044static void
3037__lock_acquired(struct lockdep_map *lock) 3045__lock_acquired(struct lockdep_map *lock, unsigned long ip)
3038{ 3046{
3039 struct task_struct *curr = current; 3047 struct task_struct *curr = current;
3040 struct held_lock *hlock, *prev_hlock; 3048 struct held_lock *hlock, *prev_hlock;
@@ -3083,6 +3091,7 @@ found_it:
3083 put_lock_stats(stats); 3091 put_lock_stats(stats);
3084 3092
3085 lock->cpu = cpu; 3093 lock->cpu = cpu;
3094 lock->ip = ip;
3086} 3095}
3087 3096
3088void lock_contended(struct lockdep_map *lock, unsigned long ip) 3097void lock_contended(struct lockdep_map *lock, unsigned long ip)
@@ -3104,7 +3113,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
3104} 3113}
3105EXPORT_SYMBOL_GPL(lock_contended); 3114EXPORT_SYMBOL_GPL(lock_contended);
3106 3115
3107void lock_acquired(struct lockdep_map *lock) 3116void lock_acquired(struct lockdep_map *lock, unsigned long ip)
3108{ 3117{
3109 unsigned long flags; 3118 unsigned long flags;
3110 3119
@@ -3117,7 +3126,7 @@ void lock_acquired(struct lockdep_map *lock)
3117 raw_local_irq_save(flags); 3126 raw_local_irq_save(flags);
3118 check_flags(flags); 3127 check_flags(flags);
3119 current->lockdep_recursion = 1; 3128 current->lockdep_recursion = 1;
3120 __lock_acquired(lock); 3129 __lock_acquired(lock, ip);
3121 current->lockdep_recursion = 0; 3130 current->lockdep_recursion = 0;
3122 raw_local_irq_restore(flags); 3131 raw_local_irq_restore(flags);
3123} 3132}
@@ -3276,10 +3285,10 @@ void __init lockdep_info(void)
3276{ 3285{
3277 printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); 3286 printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
3278 3287
3279 printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); 3288 printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES);
3280 printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); 3289 printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH);
3281 printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); 3290 printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS);
3282 printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); 3291 printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE);
3283 printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); 3292 printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES);
3284 printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); 3293 printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS);
3285 printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); 3294 printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE);
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 20dbcbf9c7dd..13716b813896 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -470,11 +470,12 @@ static void seq_line(struct seq_file *m, char c, int offset, int length)
470 470
471static void snprint_time(char *buf, size_t bufsiz, s64 nr) 471static void snprint_time(char *buf, size_t bufsiz, s64 nr)
472{ 472{
473 unsigned long rem; 473 s64 div;
474 s32 rem;
474 475
475 nr += 5; /* for display rounding */ 476 nr += 5; /* for display rounding */
476 rem = do_div(nr, 1000); /* XXX: do_div_signed */ 477 div = div_s64_rem(nr, 1000, &rem);
477 snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10); 478 snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10);
478} 479}
479 480
480static void seq_time(struct seq_file *m, s64 time) 481static void seq_time(struct seq_file *m, s64 time)
@@ -556,7 +557,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
556 if (stats->read_holdtime.nr) 557 if (stats->read_holdtime.nr)
557 namelen += 2; 558 namelen += 2;
558 559
559 for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { 560 for (i = 0; i < LOCKSTAT_POINTS; i++) {
560 char sym[KSYM_SYMBOL_LEN]; 561 char sym[KSYM_SYMBOL_LEN];
561 char ip[32]; 562 char ip[32];
562 563
@@ -573,6 +574,23 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
573 stats->contention_point[i], 574 stats->contention_point[i],
574 ip, sym); 575 ip, sym);
575 } 576 }
577 for (i = 0; i < LOCKSTAT_POINTS; i++) {
578 char sym[KSYM_SYMBOL_LEN];
579 char ip[32];
580
581 if (class->contending_point[i] == 0)
582 break;
583
584 if (!i)
585 seq_line(m, '-', 40-namelen, namelen);
586
587 sprint_symbol(sym, class->contending_point[i]);
588 snprintf(ip, sizeof(ip), "[<%p>]",
589 (void *)class->contending_point[i]);
590 seq_printf(m, "%40s %14lu %29s %s\n", name,
591 stats->contending_point[i],
592 ip, sym);
593 }
576 if (i) { 594 if (i) {
577 seq_puts(m, "\n"); 595 seq_puts(m, "\n");
578 seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); 596 seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1));
@@ -582,7 +600,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
582 600
583static void seq_header(struct seq_file *m) 601static void seq_header(struct seq_file *m)
584{ 602{
585 seq_printf(m, "lock_stat version 0.2\n"); 603 seq_printf(m, "lock_stat version 0.3\n");
586 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); 604 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
587 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " 605 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
588 "%14s %14s\n", 606 "%14s %14s\n",
diff --git a/kernel/marker.c b/kernel/marker.c
index e9c6b2bc9400..ea54f2647868 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(markers_mutex);
43 */ 43 */
44#define MARKER_HASH_BITS 6 44#define MARKER_HASH_BITS 6
45#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) 45#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
46static struct hlist_head marker_table[MARKER_TABLE_SIZE];
46 47
47/* 48/*
48 * Note about RCU : 49 * Note about RCU :
@@ -64,11 +65,10 @@ struct marker_entry {
64 void *oldptr; 65 void *oldptr;
65 int rcu_pending; 66 int rcu_pending;
66 unsigned char ptype:1; 67 unsigned char ptype:1;
68 unsigned char format_allocated:1;
67 char name[0]; /* Contains name'\0'format'\0' */ 69 char name[0]; /* Contains name'\0'format'\0' */
68}; 70};
69 71
70static struct hlist_head marker_table[MARKER_TABLE_SIZE];
71
72/** 72/**
73 * __mark_empty_function - Empty probe callback 73 * __mark_empty_function - Empty probe callback
74 * @probe_private: probe private data 74 * @probe_private: probe private data
@@ -81,7 +81,7 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE];
81 * though the function pointer change and the marker enabling are two distinct 81 * though the function pointer change and the marker enabling are two distinct
82 * operations that modifies the execution flow of preemptible code. 82 * operations that modifies the execution flow of preemptible code.
83 */ 83 */
84void __mark_empty_function(void *probe_private, void *call_private, 84notrace void __mark_empty_function(void *probe_private, void *call_private,
85 const char *fmt, va_list *args) 85 const char *fmt, va_list *args)
86{ 86{
87} 87}
@@ -97,7 +97,8 @@ EXPORT_SYMBOL_GPL(__mark_empty_function);
97 * need to put a full smp_rmb() in this branch. This is why we do not use 97 * need to put a full smp_rmb() in this branch. This is why we do not use
98 * rcu_dereference() for the pointer read. 98 * rcu_dereference() for the pointer read.
99 */ 99 */
100void marker_probe_cb(const struct marker *mdata, void *call_private, ...) 100notrace void marker_probe_cb(const struct marker *mdata,
101 void *call_private, ...)
101{ 102{
102 va_list args; 103 va_list args;
103 char ptype; 104 char ptype;
@@ -107,7 +108,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
107 * sure the teardown of the callbacks can be done correctly when they 108 * sure the teardown of the callbacks can be done correctly when they
108 * are in modules and they insure RCU read coherency. 109 * are in modules and they insure RCU read coherency.
109 */ 110 */
110 rcu_read_lock_sched(); 111 rcu_read_lock_sched_notrace();
111 ptype = mdata->ptype; 112 ptype = mdata->ptype;
112 if (likely(!ptype)) { 113 if (likely(!ptype)) {
113 marker_probe_func *func; 114 marker_probe_func *func;
@@ -145,7 +146,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
145 va_end(args); 146 va_end(args);
146 } 147 }
147 } 148 }
148 rcu_read_unlock_sched(); 149 rcu_read_unlock_sched_notrace();
149} 150}
150EXPORT_SYMBOL_GPL(marker_probe_cb); 151EXPORT_SYMBOL_GPL(marker_probe_cb);
151 152
@@ -157,12 +158,13 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
157 * 158 *
158 * Should be connected to markers "MARK_NOARGS". 159 * Should be connected to markers "MARK_NOARGS".
159 */ 160 */
160void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) 161static notrace void marker_probe_cb_noarg(const struct marker *mdata,
162 void *call_private, ...)
161{ 163{
162 va_list args; /* not initialized */ 164 va_list args; /* not initialized */
163 char ptype; 165 char ptype;
164 166
165 rcu_read_lock_sched(); 167 rcu_read_lock_sched_notrace();
166 ptype = mdata->ptype; 168 ptype = mdata->ptype;
167 if (likely(!ptype)) { 169 if (likely(!ptype)) {
168 marker_probe_func *func; 170 marker_probe_func *func;
@@ -195,9 +197,8 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
195 multi[i].func(multi[i].probe_private, call_private, 197 multi[i].func(multi[i].probe_private, call_private,
196 mdata->format, &args); 198 mdata->format, &args);
197 } 199 }
198 rcu_read_unlock_sched(); 200 rcu_read_unlock_sched_notrace();
199} 201}
200EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
201 202
202static void free_old_closure(struct rcu_head *head) 203static void free_old_closure(struct rcu_head *head)
203{ 204{
@@ -416,6 +417,7 @@ static struct marker_entry *add_marker(const char *name, const char *format)
416 e->single.probe_private = NULL; 417 e->single.probe_private = NULL;
417 e->multi = NULL; 418 e->multi = NULL;
418 e->ptype = 0; 419 e->ptype = 0;
420 e->format_allocated = 0;
419 e->refcount = 0; 421 e->refcount = 0;
420 e->rcu_pending = 0; 422 e->rcu_pending = 0;
421 hlist_add_head(&e->hlist, head); 423 hlist_add_head(&e->hlist, head);
@@ -447,6 +449,8 @@ static int remove_marker(const char *name)
447 if (e->single.func != __mark_empty_function) 449 if (e->single.func != __mark_empty_function)
448 return -EBUSY; 450 return -EBUSY;
449 hlist_del(&e->hlist); 451 hlist_del(&e->hlist);
452 if (e->format_allocated)
453 kfree(e->format);
450 /* Make sure the call_rcu has been executed */ 454 /* Make sure the call_rcu has been executed */
451 if (e->rcu_pending) 455 if (e->rcu_pending)
452 rcu_barrier_sched(); 456 rcu_barrier_sched();
@@ -457,57 +461,34 @@ static int remove_marker(const char *name)
457/* 461/*
458 * Set the mark_entry format to the format found in the element. 462 * Set the mark_entry format to the format found in the element.
459 */ 463 */
460static int marker_set_format(struct marker_entry **entry, const char *format) 464static int marker_set_format(struct marker_entry *entry, const char *format)
461{ 465{
462 struct marker_entry *e; 466 entry->format = kstrdup(format, GFP_KERNEL);
463 size_t name_len = strlen((*entry)->name) + 1; 467 if (!entry->format)
464 size_t format_len = strlen(format) + 1;
465
466
467 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
468 GFP_KERNEL);
469 if (!e)
470 return -ENOMEM; 468 return -ENOMEM;
471 memcpy(&e->name[0], (*entry)->name, name_len); 469 entry->format_allocated = 1;
472 e->format = &e->name[name_len]; 470
473 memcpy(e->format, format, format_len);
474 if (strcmp(e->format, MARK_NOARGS) == 0)
475 e->call = marker_probe_cb_noarg;
476 else
477 e->call = marker_probe_cb;
478 e->single = (*entry)->single;
479 e->multi = (*entry)->multi;
480 e->ptype = (*entry)->ptype;
481 e->refcount = (*entry)->refcount;
482 e->rcu_pending = 0;
483 hlist_add_before(&e->hlist, &(*entry)->hlist);
484 hlist_del(&(*entry)->hlist);
485 /* Make sure the call_rcu has been executed */
486 if ((*entry)->rcu_pending)
487 rcu_barrier_sched();
488 kfree(*entry);
489 *entry = e;
490 trace_mark(core_marker_format, "name %s format %s", 471 trace_mark(core_marker_format, "name %s format %s",
491 e->name, e->format); 472 entry->name, entry->format);
492 return 0; 473 return 0;
493} 474}
494 475
495/* 476/*
496 * Sets the probe callback corresponding to one marker. 477 * Sets the probe callback corresponding to one marker.
497 */ 478 */
498static int set_marker(struct marker_entry **entry, struct marker *elem, 479static int set_marker(struct marker_entry *entry, struct marker *elem,
499 int active) 480 int active)
500{ 481{
501 int ret; 482 int ret = 0;
502 WARN_ON(strcmp((*entry)->name, elem->name) != 0); 483 WARN_ON(strcmp(entry->name, elem->name) != 0);
503 484
504 if ((*entry)->format) { 485 if (entry->format) {
505 if (strcmp((*entry)->format, elem->format) != 0) { 486 if (strcmp(entry->format, elem->format) != 0) {
506 printk(KERN_NOTICE 487 printk(KERN_NOTICE
507 "Format mismatch for probe %s " 488 "Format mismatch for probe %s "
508 "(%s), marker (%s)\n", 489 "(%s), marker (%s)\n",
509 (*entry)->name, 490 entry->name,
510 (*entry)->format, 491 entry->format,
511 elem->format); 492 elem->format);
512 return -EPERM; 493 return -EPERM;
513 } 494 }
@@ -523,37 +504,67 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
523 * pass from a "safe" callback (with argument) to an "unsafe" 504 * pass from a "safe" callback (with argument) to an "unsafe"
524 * callback (does not set arguments). 505 * callback (does not set arguments).
525 */ 506 */
526 elem->call = (*entry)->call; 507 elem->call = entry->call;
527 /* 508 /*
528 * Sanity check : 509 * Sanity check :
529 * We only update the single probe private data when the ptr is 510 * We only update the single probe private data when the ptr is
530 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) 511 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
531 */ 512 */
532 WARN_ON(elem->single.func != __mark_empty_function 513 WARN_ON(elem->single.func != __mark_empty_function
533 && elem->single.probe_private 514 && elem->single.probe_private != entry->single.probe_private
534 != (*entry)->single.probe_private && 515 && !elem->ptype);
535 !elem->ptype); 516 elem->single.probe_private = entry->single.probe_private;
536 elem->single.probe_private = (*entry)->single.probe_private;
537 /* 517 /*
538 * Make sure the private data is valid when we update the 518 * Make sure the private data is valid when we update the
539 * single probe ptr. 519 * single probe ptr.
540 */ 520 */
541 smp_wmb(); 521 smp_wmb();
542 elem->single.func = (*entry)->single.func; 522 elem->single.func = entry->single.func;
543 /* 523 /*
544 * We also make sure that the new probe callbacks array is consistent 524 * We also make sure that the new probe callbacks array is consistent
545 * before setting a pointer to it. 525 * before setting a pointer to it.
546 */ 526 */
547 rcu_assign_pointer(elem->multi, (*entry)->multi); 527 rcu_assign_pointer(elem->multi, entry->multi);
548 /* 528 /*
549 * Update the function or multi probe array pointer before setting the 529 * Update the function or multi probe array pointer before setting the
550 * ptype. 530 * ptype.
551 */ 531 */
552 smp_wmb(); 532 smp_wmb();
553 elem->ptype = (*entry)->ptype; 533 elem->ptype = entry->ptype;
534
535 if (elem->tp_name && (active ^ elem->state)) {
536 WARN_ON(!elem->tp_cb);
537 /*
538 * It is ok to directly call the probe registration because type
539 * checking has been done in the __trace_mark_tp() macro.
540 */
541
542 if (active) {
543 /*
544 * try_module_get should always succeed because we hold
545 * lock_module() to get the tp_cb address.
546 */
547 ret = try_module_get(__module_text_address(
548 (unsigned long)elem->tp_cb));
549 BUG_ON(!ret);
550 ret = tracepoint_probe_register_noupdate(
551 elem->tp_name,
552 elem->tp_cb);
553 } else {
554 ret = tracepoint_probe_unregister_noupdate(
555 elem->tp_name,
556 elem->tp_cb);
557 /*
558 * tracepoint_probe_update_all() must be called
559 * before the module containing tp_cb is unloaded.
560 */
561 module_put(__module_text_address(
562 (unsigned long)elem->tp_cb));
563 }
564 }
554 elem->state = active; 565 elem->state = active;
555 566
556 return 0; 567 return ret;
557} 568}
558 569
559/* 570/*
@@ -564,7 +575,24 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
564 */ 575 */
565static void disable_marker(struct marker *elem) 576static void disable_marker(struct marker *elem)
566{ 577{
578 int ret;
579
567 /* leave "call" as is. It is known statically. */ 580 /* leave "call" as is. It is known statically. */
581 if (elem->tp_name && elem->state) {
582 WARN_ON(!elem->tp_cb);
583 /*
584 * It is ok to directly call the probe registration because type
585 * checking has been done in the __trace_mark_tp() macro.
586 */
587 ret = tracepoint_probe_unregister_noupdate(elem->tp_name,
588 elem->tp_cb);
589 WARN_ON(ret);
590 /*
591 * tracepoint_probe_update_all() must be called
592 * before the module containing tp_cb is unloaded.
593 */
594 module_put(__module_text_address((unsigned long)elem->tp_cb));
595 }
568 elem->state = 0; 596 elem->state = 0;
569 elem->single.func = __mark_empty_function; 597 elem->single.func = __mark_empty_function;
570 /* Update the function before setting the ptype */ 598 /* Update the function before setting the ptype */
@@ -594,8 +622,7 @@ void marker_update_probe_range(struct marker *begin,
594 for (iter = begin; iter < end; iter++) { 622 for (iter = begin; iter < end; iter++) {
595 mark_entry = get_marker(iter->name); 623 mark_entry = get_marker(iter->name);
596 if (mark_entry) { 624 if (mark_entry) {
597 set_marker(&mark_entry, iter, 625 set_marker(mark_entry, iter, !!mark_entry->refcount);
598 !!mark_entry->refcount);
599 /* 626 /*
600 * ignore error, continue 627 * ignore error, continue
601 */ 628 */
@@ -629,6 +656,7 @@ static void marker_update_probes(void)
629 marker_update_probe_range(__start___markers, __stop___markers); 656 marker_update_probe_range(__start___markers, __stop___markers);
630 /* Markers in modules. */ 657 /* Markers in modules. */
631 module_update_markers(); 658 module_update_markers();
659 tracepoint_probe_update_all();
632} 660}
633 661
634/** 662/**
@@ -657,7 +685,7 @@ int marker_probe_register(const char *name, const char *format,
657 ret = PTR_ERR(entry); 685 ret = PTR_ERR(entry);
658 } else if (format) { 686 } else if (format) {
659 if (!entry->format) 687 if (!entry->format)
660 ret = marker_set_format(&entry, format); 688 ret = marker_set_format(entry, format);
661 else if (strcmp(entry->format, format)) 689 else if (strcmp(entry->format, format))
662 ret = -EPERM; 690 ret = -EPERM;
663 } 691 }
@@ -676,10 +704,11 @@ int marker_probe_register(const char *name, const char *format,
676 goto end; 704 goto end;
677 } 705 }
678 mutex_unlock(&markers_mutex); 706 mutex_unlock(&markers_mutex);
679 marker_update_probes(); /* may update entry */ 707 marker_update_probes();
680 mutex_lock(&markers_mutex); 708 mutex_lock(&markers_mutex);
681 entry = get_marker(name); 709 entry = get_marker(name);
682 WARN_ON(!entry); 710 if (!entry)
711 goto end;
683 if (entry->rcu_pending) 712 if (entry->rcu_pending)
684 rcu_barrier_sched(); 713 rcu_barrier_sched();
685 entry->oldptr = old; 714 entry->oldptr = old;
@@ -720,7 +749,7 @@ int marker_probe_unregister(const char *name,
720 rcu_barrier_sched(); 749 rcu_barrier_sched();
721 old = marker_entry_remove_probe(entry, probe, probe_private); 750 old = marker_entry_remove_probe(entry, probe, probe_private);
722 mutex_unlock(&markers_mutex); 751 mutex_unlock(&markers_mutex);
723 marker_update_probes(); /* may update entry */ 752 marker_update_probes();
724 mutex_lock(&markers_mutex); 753 mutex_lock(&markers_mutex);
725 entry = get_marker(name); 754 entry = get_marker(name);
726 if (!entry) 755 if (!entry)
@@ -801,10 +830,11 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
801 rcu_barrier_sched(); 830 rcu_barrier_sched();
802 old = marker_entry_remove_probe(entry, NULL, probe_private); 831 old = marker_entry_remove_probe(entry, NULL, probe_private);
803 mutex_unlock(&markers_mutex); 832 mutex_unlock(&markers_mutex);
804 marker_update_probes(); /* may update entry */ 833 marker_update_probes();
805 mutex_lock(&markers_mutex); 834 mutex_lock(&markers_mutex);
806 entry = get_marker_from_private_data(probe, probe_private); 835 entry = get_marker_from_private_data(probe, probe_private);
807 WARN_ON(!entry); 836 if (!entry)
837 goto end;
808 if (entry->rcu_pending) 838 if (entry->rcu_pending)
809 rcu_barrier_sched(); 839 rcu_barrier_sched();
810 entry->oldptr = old; 840 entry->oldptr = old;
@@ -848,8 +878,6 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
848 if (!e->ptype) { 878 if (!e->ptype) {
849 if (num == 0 && e->single.func == probe) 879 if (num == 0 && e->single.func == probe)
850 return e->single.probe_private; 880 return e->single.probe_private;
851 else
852 break;
853 } else { 881 } else {
854 struct marker_probe_closure *closure; 882 struct marker_probe_closure *closure;
855 int match = 0; 883 int match = 0;
@@ -861,8 +889,42 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
861 return closure[i].probe_private; 889 return closure[i].probe_private;
862 } 890 }
863 } 891 }
892 break;
864 } 893 }
865 } 894 }
866 return ERR_PTR(-ENOENT); 895 return ERR_PTR(-ENOENT);
867} 896}
868EXPORT_SYMBOL_GPL(marker_get_private_data); 897EXPORT_SYMBOL_GPL(marker_get_private_data);
898
899#ifdef CONFIG_MODULES
900
901int marker_module_notify(struct notifier_block *self,
902 unsigned long val, void *data)
903{
904 struct module *mod = data;
905
906 switch (val) {
907 case MODULE_STATE_COMING:
908 marker_update_probe_range(mod->markers,
909 mod->markers + mod->num_markers);
910 break;
911 case MODULE_STATE_GOING:
912 marker_update_probe_range(mod->markers,
913 mod->markers + mod->num_markers);
914 break;
915 }
916 return 0;
917}
918
919struct notifier_block marker_module_nb = {
920 .notifier_call = marker_module_notify,
921 .priority = 0,
922};
923
924static int init_markers(void)
925{
926 return register_module_notifier(&marker_module_nb);
927}
928__initcall(init_markers);
929
930#endif /* CONFIG_MODULES */
diff --git a/kernel/module.c b/kernel/module.c
index 1f4cc00e0c20..89bcf7c1327d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2184,24 +2184,15 @@ static noinline struct module *load_module(void __user *umod,
2184 struct mod_debug *debug; 2184 struct mod_debug *debug;
2185 unsigned int num_debug; 2185 unsigned int num_debug;
2186 2186
2187#ifdef CONFIG_MARKERS
2188 marker_update_probe_range(mod->markers,
2189 mod->markers + mod->num_markers);
2190#endif
2191 debug = section_objs(hdr, sechdrs, secstrings, "__verbose", 2187 debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
2192 sizeof(*debug), &num_debug); 2188 sizeof(*debug), &num_debug);
2193 dynamic_printk_setup(debug, num_debug); 2189 dynamic_printk_setup(debug, num_debug);
2194
2195#ifdef CONFIG_TRACEPOINTS
2196 tracepoint_update_probe_range(mod->tracepoints,
2197 mod->tracepoints + mod->num_tracepoints);
2198#endif
2199 } 2190 }
2200 2191
2201 /* sechdrs[0].sh_size is always zero */ 2192 /* sechdrs[0].sh_size is always zero */
2202 mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", 2193 mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
2203 sizeof(*mseg), &num_mcount); 2194 sizeof(*mseg), &num_mcount);
2204 ftrace_init_module(mseg, mseg + num_mcount); 2195 ftrace_init_module(mod, mseg, mseg + num_mcount);
2205 2196
2206 err = module_finalize(hdr, sechdrs, mod); 2197 err = module_finalize(hdr, sechdrs, mod);
2207 if (err < 0) 2198 if (err < 0)
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 12c779dc65d4..4f45d4b658ef 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -59,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init);
59 * We also put the fastpath first in the kernel image, to make sure the 59 * We also put the fastpath first in the kernel image, to make sure the
60 * branch is predicted by the CPU as default-untaken. 60 * branch is predicted by the CPU as default-untaken.
61 */ 61 */
62static void noinline __sched 62static __used noinline void __sched
63__mutex_lock_slowpath(atomic_t *lock_count); 63__mutex_lock_slowpath(atomic_t *lock_count);
64 64
65/*** 65/***
@@ -96,7 +96,7 @@ void inline __sched mutex_lock(struct mutex *lock)
96EXPORT_SYMBOL(mutex_lock); 96EXPORT_SYMBOL(mutex_lock);
97#endif 97#endif
98 98
99static noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); 99static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
100 100
101/*** 101/***
102 * mutex_unlock - release the mutex 102 * mutex_unlock - release the mutex
@@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
184 } 184 }
185 185
186done: 186done:
187 lock_acquired(&lock->dep_map); 187 lock_acquired(&lock->dep_map, ip);
188 /* got the lock - rejoice! */ 188 /* got the lock - rejoice! */
189 mutex_remove_waiter(lock, &waiter, task_thread_info(task)); 189 mutex_remove_waiter(lock, &waiter, task_thread_info(task));
190 debug_mutex_set_owner(lock, task_thread_info(task)); 190 debug_mutex_set_owner(lock, task_thread_info(task));
@@ -268,7 +268,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
268/* 268/*
269 * Release the lock, slowpath: 269 * Release the lock, slowpath:
270 */ 270 */
271static noinline void 271static __used noinline void
272__mutex_unlock_slowpath(atomic_t *lock_count) 272__mutex_unlock_slowpath(atomic_t *lock_count)
273{ 273{
274 __mutex_unlock_common_slowpath(lock_count, 1); 274 __mutex_unlock_common_slowpath(lock_count, 1);
@@ -313,7 +313,7 @@ int __sched mutex_lock_killable(struct mutex *lock)
313} 313}
314EXPORT_SYMBOL(mutex_lock_killable); 314EXPORT_SYMBOL(mutex_lock_killable);
315 315
316static noinline void __sched 316static __used noinline void __sched
317__mutex_lock_slowpath(atomic_t *lock_count) 317__mutex_lock_slowpath(atomic_t *lock_count)
318{ 318{
319 struct mutex *lock = container_of(lock_count, struct mutex, count); 319 struct mutex *lock = container_of(lock_count, struct mutex, count);
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 4282c0a40a57..61d5aa5eced3 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -82,6 +82,14 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
82 82
83 while (nb && nr_to_call) { 83 while (nb && nr_to_call) {
84 next_nb = rcu_dereference(nb->next); 84 next_nb = rcu_dereference(nb->next);
85
86#ifdef CONFIG_DEBUG_NOTIFIERS
87 if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
88 WARN(1, "Invalid notifier called!");
89 nb = next_nb;
90 continue;
91 }
92#endif
85 ret = nb->notifier_call(nb, val, v); 93 ret = nb->notifier_call(nb, val, v);
86 94
87 if (nr_calls) 95 if (nr_calls)
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 895337b16a24..3f4377e0aa04 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -58,21 +58,21 @@ void thread_group_cputime(
58 struct task_struct *tsk, 58 struct task_struct *tsk,
59 struct task_cputime *times) 59 struct task_cputime *times)
60{ 60{
61 struct signal_struct *sig; 61 struct task_cputime *totals, *tot;
62 int i; 62 int i;
63 struct task_cputime *tot;
64 63
65 sig = tsk->signal; 64 totals = tsk->signal->cputime.totals;
66 if (unlikely(!sig) || !sig->cputime.totals) { 65 if (!totals) {
67 times->utime = tsk->utime; 66 times->utime = tsk->utime;
68 times->stime = tsk->stime; 67 times->stime = tsk->stime;
69 times->sum_exec_runtime = tsk->se.sum_exec_runtime; 68 times->sum_exec_runtime = tsk->se.sum_exec_runtime;
70 return; 69 return;
71 } 70 }
71
72 times->stime = times->utime = cputime_zero; 72 times->stime = times->utime = cputime_zero;
73 times->sum_exec_runtime = 0; 73 times->sum_exec_runtime = 0;
74 for_each_possible_cpu(i) { 74 for_each_possible_cpu(i) {
75 tot = per_cpu_ptr(tsk->signal->cputime.totals, i); 75 tot = per_cpu_ptr(totals, i);
76 times->utime = cputime_add(times->utime, tot->utime); 76 times->utime = cputime_add(times->utime, tot->utime);
77 times->stime = cputime_add(times->stime, tot->stime); 77 times->stime = cputime_add(times->stime, tot->stime);
78 times->sum_exec_runtime += tot->sum_exec_runtime; 78 times->sum_exec_runtime += tot->sum_exec_runtime;
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index c9d74083746f..f77d3819ef57 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -22,7 +22,6 @@
22#include <linux/console.h> 22#include <linux/console.h>
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/freezer.h> 24#include <linux/freezer.h>
25#include <linux/ftrace.h>
26 25
27#include "power.h" 26#include "power.h"
28 27
@@ -257,7 +256,7 @@ static int create_image(int platform_mode)
257 256
258int hibernation_snapshot(int platform_mode) 257int hibernation_snapshot(int platform_mode)
259{ 258{
260 int error, ftrace_save; 259 int error;
261 260
262 /* Free memory before shutting down devices. */ 261 /* Free memory before shutting down devices. */
263 error = swsusp_shrink_memory(); 262 error = swsusp_shrink_memory();
@@ -269,7 +268,6 @@ int hibernation_snapshot(int platform_mode)
269 goto Close; 268 goto Close;
270 269
271 suspend_console(); 270 suspend_console();
272 ftrace_save = __ftrace_enabled_save();
273 error = device_suspend(PMSG_FREEZE); 271 error = device_suspend(PMSG_FREEZE);
274 if (error) 272 if (error)
275 goto Recover_platform; 273 goto Recover_platform;
@@ -299,7 +297,6 @@ int hibernation_snapshot(int platform_mode)
299 Resume_devices: 297 Resume_devices:
300 device_resume(in_suspend ? 298 device_resume(in_suspend ?
301 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); 299 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
302 __ftrace_enabled_restore(ftrace_save);
303 resume_console(); 300 resume_console();
304 Close: 301 Close:
305 platform_end(platform_mode); 302 platform_end(platform_mode);
@@ -370,11 +367,10 @@ static int resume_target_kernel(void)
370 367
371int hibernation_restore(int platform_mode) 368int hibernation_restore(int platform_mode)
372{ 369{
373 int error, ftrace_save; 370 int error;
374 371
375 pm_prepare_console(); 372 pm_prepare_console();
376 suspend_console(); 373 suspend_console();
377 ftrace_save = __ftrace_enabled_save();
378 error = device_suspend(PMSG_QUIESCE); 374 error = device_suspend(PMSG_QUIESCE);
379 if (error) 375 if (error)
380 goto Finish; 376 goto Finish;
@@ -389,7 +385,6 @@ int hibernation_restore(int platform_mode)
389 platform_restore_cleanup(platform_mode); 385 platform_restore_cleanup(platform_mode);
390 device_resume(PMSG_RECOVER); 386 device_resume(PMSG_RECOVER);
391 Finish: 387 Finish:
392 __ftrace_enabled_restore(ftrace_save);
393 resume_console(); 388 resume_console();
394 pm_restore_console(); 389 pm_restore_console();
395 return error; 390 return error;
@@ -402,7 +397,7 @@ int hibernation_restore(int platform_mode)
402 397
403int hibernation_platform_enter(void) 398int hibernation_platform_enter(void)
404{ 399{
405 int error, ftrace_save; 400 int error;
406 401
407 if (!hibernation_ops) 402 if (!hibernation_ops)
408 return -ENOSYS; 403 return -ENOSYS;
@@ -417,7 +412,6 @@ int hibernation_platform_enter(void)
417 goto Close; 412 goto Close;
418 413
419 suspend_console(); 414 suspend_console();
420 ftrace_save = __ftrace_enabled_save();
421 error = device_suspend(PMSG_HIBERNATE); 415 error = device_suspend(PMSG_HIBERNATE);
422 if (error) { 416 if (error) {
423 if (hibernation_ops->recover) 417 if (hibernation_ops->recover)
@@ -452,7 +446,6 @@ int hibernation_platform_enter(void)
452 hibernation_ops->finish(); 446 hibernation_ops->finish();
453 Resume_devices: 447 Resume_devices:
454 device_resume(PMSG_RESTORE); 448 device_resume(PMSG_RESTORE);
455 __ftrace_enabled_restore(ftrace_save);
456 resume_console(); 449 resume_console();
457 Close: 450 Close:
458 hibernation_ops->end(); 451 hibernation_ops->end();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b8f7ce9473e8..613f16941b85 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -22,7 +22,6 @@
22#include <linux/freezer.h> 22#include <linux/freezer.h>
23#include <linux/vmstat.h> 23#include <linux/vmstat.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/ftrace.h>
26 25
27#include "power.h" 26#include "power.h"
28 27
@@ -317,7 +316,7 @@ static int suspend_enter(suspend_state_t state)
317 */ 316 */
318int suspend_devices_and_enter(suspend_state_t state) 317int suspend_devices_and_enter(suspend_state_t state)
319{ 318{
320 int error, ftrace_save; 319 int error;
321 320
322 if (!suspend_ops) 321 if (!suspend_ops)
323 return -ENOSYS; 322 return -ENOSYS;
@@ -328,7 +327,6 @@ int suspend_devices_and_enter(suspend_state_t state)
328 goto Close; 327 goto Close;
329 } 328 }
330 suspend_console(); 329 suspend_console();
331 ftrace_save = __ftrace_enabled_save();
332 suspend_test_start(); 330 suspend_test_start();
333 error = device_suspend(PMSG_SUSPEND); 331 error = device_suspend(PMSG_SUSPEND);
334 if (error) { 332 if (error) {
@@ -360,7 +358,6 @@ int suspend_devices_and_enter(suspend_state_t state)
360 suspend_test_start(); 358 suspend_test_start();
361 device_resume(PMSG_RESUME); 359 device_resume(PMSG_RESUME);
362 suspend_test_finish("resume devices"); 360 suspend_test_finish("resume devices");
363 __ftrace_enabled_restore(ftrace_save);
364 resume_console(); 361 resume_console();
365 Close: 362 Close:
366 if (suspend_ops->end) 363 if (suspend_ops->end)
diff --git a/kernel/profile.c b/kernel/profile.c
index 5b7d1ac7124c..7f93a5042d3b 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -544,7 +544,7 @@ static const struct file_operations proc_profile_operations = {
544}; 544};
545 545
546#ifdef CONFIG_SMP 546#ifdef CONFIG_SMP
547static inline void profile_nop(void *unused) 547static void profile_nop(void *unused)
548{ 548{
549} 549}
550 550
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 37f72e551542..e503a002f330 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -191,7 +191,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
191 191
192 /* OK, time to rat on our buddy... */ 192 /* OK, time to rat on our buddy... */
193 193
194 printk(KERN_ERR "RCU detected CPU stalls:"); 194 printk(KERN_ERR "INFO: RCU detected CPU stalls:");
195 for_each_possible_cpu(cpu) { 195 for_each_possible_cpu(cpu) {
196 if (cpu_isset(cpu, rcp->cpumask)) 196 if (cpu_isset(cpu, rcp->cpumask))
197 printk(" %d", cpu); 197 printk(" %d", cpu);
@@ -204,7 +204,7 @@ static void print_cpu_stall(struct rcu_ctrlblk *rcp)
204{ 204{
205 unsigned long flags; 205 unsigned long flags;
206 206
207 printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n", 207 printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
208 smp_processor_id(), jiffies, 208 smp_processor_id(), jiffies,
209 jiffies - rcp->gp_start); 209 jiffies - rcp->gp_start);
210 dump_stack(); 210 dump_stack();
diff --git a/kernel/sched.c b/kernel/sched.c
index 338340a3fb89..bb827651558e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -118,6 +118,12 @@
118 */ 118 */
119#define RUNTIME_INF ((u64)~0ULL) 119#define RUNTIME_INF ((u64)~0ULL)
120 120
121DEFINE_TRACE(sched_wait_task);
122DEFINE_TRACE(sched_wakeup);
123DEFINE_TRACE(sched_wakeup_new);
124DEFINE_TRACE(sched_switch);
125DEFINE_TRACE(sched_migrate_task);
126
121#ifdef CONFIG_SMP 127#ifdef CONFIG_SMP
122/* 128/*
123 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) 129 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
@@ -4171,7 +4177,6 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
4171 4177
4172 if (p == rq->idle) { 4178 if (p == rq->idle) {
4173 p->stime = cputime_add(p->stime, steal); 4179 p->stime = cputime_add(p->stime, steal);
4174 account_group_system_time(p, steal);
4175 if (atomic_read(&rq->nr_iowait) > 0) 4180 if (atomic_read(&rq->nr_iowait) > 0)
4176 cpustat->iowait = cputime64_add(cpustat->iowait, tmp); 4181 cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
4177 else 4182 else
@@ -4307,7 +4312,7 @@ void __kprobes sub_preempt_count(int val)
4307 /* 4312 /*
4308 * Underflow? 4313 * Underflow?
4309 */ 4314 */
4310 if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) 4315 if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
4311 return; 4316 return;
4312 /* 4317 /*
4313 * Is the spinlock portion underflowing? 4318 * Is the spinlock portion underflowing?
@@ -5864,6 +5869,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5864 * The idle tasks have their own, simple scheduling class: 5869 * The idle tasks have their own, simple scheduling class:
5865 */ 5870 */
5866 idle->sched_class = &idle_sched_class; 5871 idle->sched_class = &idle_sched_class;
5872 ftrace_retfunc_init_task(idle);
5867} 5873}
5868 5874
5869/* 5875/*
diff --git a/kernel/signal.c b/kernel/signal.c
index 4530fc654455..e9afe63da24b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -41,6 +41,8 @@
41 41
42static struct kmem_cache *sigqueue_cachep; 42static struct kmem_cache *sigqueue_cachep;
43 43
44DEFINE_TRACE(sched_signal_send);
45
44static void __user *sig_handler(struct task_struct *t, int sig) 46static void __user *sig_handler(struct task_struct *t, int sig)
45{ 47{
46 return t->sighand->action[sig - 1].sa.sa_handler; 48 return t->sighand->action[sig - 1].sa.sa_handler;
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 3953e4aed733..884e6cd2769c 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -164,7 +164,7 @@ unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
164/* 164/*
165 * Zero means infinite timeout - no checking done: 165 * Zero means infinite timeout - no checking done:
166 */ 166 */
167unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; 167unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
168 168
169unsigned long __read_mostly sysctl_hung_task_warnings = 10; 169unsigned long __read_mostly sysctl_hung_task_warnings = 10;
170 170
diff --git a/kernel/sys.c b/kernel/sys.c
index 31deba8f7d16..5fc3a0cfb994 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -858,8 +858,8 @@ void do_sys_times(struct tms *tms)
858 struct task_cputime cputime; 858 struct task_cputime cputime;
859 cputime_t cutime, cstime; 859 cputime_t cutime, cstime;
860 860
861 spin_lock_irq(&current->sighand->siglock);
862 thread_group_cputime(current, &cputime); 861 thread_group_cputime(current, &cputime);
862 spin_lock_irq(&current->sighand->siglock);
863 cutime = current->signal->cutime; 863 cutime = current->signal->cutime;
864 cstime = current->signal->cstime; 864 cstime = current->signal->cstime;
865 spin_unlock_irq(&current->sighand->siglock); 865 spin_unlock_irq(&current->sighand->siglock);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9d048fa2d902..65d4a9ba79e4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -484,6 +484,16 @@ static struct ctl_table kern_table[] = {
484 .proc_handler = &ftrace_enable_sysctl, 484 .proc_handler = &ftrace_enable_sysctl,
485 }, 485 },
486#endif 486#endif
487#ifdef CONFIG_TRACING
488 {
489 .ctl_name = CTL_UNNUMBERED,
490 .procname = "ftrace_dump_on_oops",
491 .data = &ftrace_dump_on_oops,
492 .maxlen = sizeof(int),
493 .mode = 0644,
494 .proc_handler = &proc_dointvec,
495 },
496#endif
487#ifdef CONFIG_MODULES 497#ifdef CONFIG_MODULES
488 { 498 {
489 .ctl_name = KERN_MODPROBE, 499 .ctl_name = KERN_MODPROBE,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 33dbefd471e8..9cbf7761f498 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -3,12 +3,25 @@
3# select HAVE_FUNCTION_TRACER: 3# select HAVE_FUNCTION_TRACER:
4# 4#
5 5
6config USER_STACKTRACE_SUPPORT
7 bool
8
6config NOP_TRACER 9config NOP_TRACER
7 bool 10 bool
8 11
9config HAVE_FUNCTION_TRACER 12config HAVE_FUNCTION_TRACER
10 bool 13 bool
11 14
15config HAVE_FUNCTION_RET_TRACER
16 bool
17
18config HAVE_FUNCTION_TRACE_MCOUNT_TEST
19 bool
20 help
21 This gets selected when the arch tests the function_trace_stop
22 variable at the mcount call site. Otherwise, this variable
23 is tested by the called function.
24
12config HAVE_DYNAMIC_FTRACE 25config HAVE_DYNAMIC_FTRACE
13 bool 26 bool
14 27
@@ -47,6 +60,16 @@ config FUNCTION_TRACER
47 (the bootup default), then the overhead of the instructions is very 60 (the bootup default), then the overhead of the instructions is very
48 small and not measurable even in micro-benchmarks. 61 small and not measurable even in micro-benchmarks.
49 62
63config FUNCTION_RET_TRACER
64 bool "Kernel Function return Tracer"
65 depends on HAVE_FUNCTION_RET_TRACER
66 depends on FUNCTION_TRACER
67 help
68 Enable the kernel to trace a function at its return.
69 It's first purpose is to trace the duration of functions.
70 This is done by setting the current return address on the thread
71 info structure of the current task.
72
50config IRQSOFF_TRACER 73config IRQSOFF_TRACER
51 bool "Interrupts-off Latency Tracer" 74 bool "Interrupts-off Latency Tracer"
52 default n 75 default n
@@ -138,6 +161,59 @@ config BOOT_TRACER
138 selected, because the self-tests are an initcall as well and that 161 selected, because the self-tests are an initcall as well and that
139 would invalidate the boot trace. ) 162 would invalidate the boot trace. )
140 163
164config TRACE_BRANCH_PROFILING
165 bool "Trace likely/unlikely profiler"
166 depends on DEBUG_KERNEL
167 select TRACING
168 help
169 This tracer profiles all the the likely and unlikely macros
170 in the kernel. It will display the results in:
171
172 /debugfs/tracing/profile_annotated_branch
173
174 Note: this will add a significant overhead, only turn this
175 on if you need to profile the system's use of these macros.
176
177 Say N if unsure.
178
179config PROFILE_ALL_BRANCHES
180 bool "Profile all if conditionals"
181 depends on TRACE_BRANCH_PROFILING
182 help
183 This tracer profiles all branch conditions. Every if ()
184 taken in the kernel is recorded whether it hit or miss.
185 The results will be displayed in:
186
187 /debugfs/tracing/profile_branch
188
189 This configuration, when enabled, will impose a great overhead
190 on the system. This should only be enabled when the system
191 is to be analyzed
192
193 Say N if unsure.
194
195config TRACING_BRANCHES
196 bool
197 help
198 Selected by tracers that will trace the likely and unlikely
199 conditions. This prevents the tracers themselves from being
200 profiled. Profiling the tracing infrastructure can only happen
201 when the likelys and unlikelys are not being traced.
202
203config BRANCH_TRACER
204 bool "Trace likely/unlikely instances"
205 depends on TRACE_BRANCH_PROFILING
206 select TRACING_BRANCHES
207 help
208 This traces the events of likely and unlikely condition
209 calls in the kernel. The difference between this and the
210 "Trace likely/unlikely profiler" is that this is not a
211 histogram of the callers, but actually places the calling
212 events into a running trace buffer to see when and where the
213 events happened, as well as their results.
214
215 Say N if unsure.
216
141config STACK_TRACER 217config STACK_TRACER
142 bool "Trace max stack" 218 bool "Trace max stack"
143 depends on HAVE_FUNCTION_TRACER 219 depends on HAVE_FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c8228b1a49e9..1a8c9259dc69 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -10,6 +10,11 @@ CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o 10obj-y += trace_selftest_dynamic.o
11endif 11endif
12 12
13# If unlikely tracing is enabled, do not trace these files
14ifdef CONFIG_TRACING_BRANCHES
15KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
16endif
17
13obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o 18obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
15 20
@@ -24,5 +29,7 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
24obj-$(CONFIG_STACK_TRACER) += trace_stack.o 29obj-$(CONFIG_STACK_TRACER) += trace_stack.o
25obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 30obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
26obj-$(CONFIG_BOOT_TRACER) += trace_boot.o 31obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
32obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o
33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
27 34
28libftrace-y := ftrace.o 35libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 78db083390f0..53042f118f23 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -47,6 +47,12 @@
47int ftrace_enabled __read_mostly; 47int ftrace_enabled __read_mostly;
48static int last_ftrace_enabled; 48static int last_ftrace_enabled;
49 49
50/* Quick disabling of function tracer. */
51int function_trace_stop;
52
53/* By default, current tracing type is normal tracing. */
54enum ftrace_tracing_type_t ftrace_tracing_type = FTRACE_TYPE_ENTER;
55
50/* 56/*
51 * ftrace_disabled is set when an anomaly is discovered. 57 * ftrace_disabled is set when an anomaly is discovered.
52 * ftrace_disabled is much stronger than ftrace_enabled. 58 * ftrace_disabled is much stronger than ftrace_enabled.
@@ -63,6 +69,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
63 69
64static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; 70static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
65ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 71ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
72ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
66 73
67static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 74static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
68{ 75{
@@ -88,8 +95,23 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
88void clear_ftrace_function(void) 95void clear_ftrace_function(void)
89{ 96{
90 ftrace_trace_function = ftrace_stub; 97 ftrace_trace_function = ftrace_stub;
98 __ftrace_trace_function = ftrace_stub;
91} 99}
92 100
101#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
102/*
103 * For those archs that do not test ftrace_trace_stop in their
104 * mcount call site, we need to do it from C.
105 */
106static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
107{
108 if (function_trace_stop)
109 return;
110
111 __ftrace_trace_function(ip, parent_ip);
112}
113#endif
114
93static int __register_ftrace_function(struct ftrace_ops *ops) 115static int __register_ftrace_function(struct ftrace_ops *ops)
94{ 116{
95 /* should not be called from interrupt context */ 117 /* should not be called from interrupt context */
@@ -110,10 +132,18 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
110 * For one func, simply call it directly. 132 * For one func, simply call it directly.
111 * For more than one func, call the chain. 133 * For more than one func, call the chain.
112 */ 134 */
135#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
113 if (ops->next == &ftrace_list_end) 136 if (ops->next == &ftrace_list_end)
114 ftrace_trace_function = ops->func; 137 ftrace_trace_function = ops->func;
115 else 138 else
116 ftrace_trace_function = ftrace_list_func; 139 ftrace_trace_function = ftrace_list_func;
140#else
141 if (ops->next == &ftrace_list_end)
142 __ftrace_trace_function = ops->func;
143 else
144 __ftrace_trace_function = ftrace_list_func;
145 ftrace_trace_function = ftrace_test_stop_func;
146#endif
117 } 147 }
118 148
119 spin_unlock(&ftrace_lock); 149 spin_unlock(&ftrace_lock);
@@ -152,8 +182,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
152 182
153 if (ftrace_enabled) { 183 if (ftrace_enabled) {
154 /* If we only have one func left, then call that directly */ 184 /* If we only have one func left, then call that directly */
155 if (ftrace_list == &ftrace_list_end || 185 if (ftrace_list->next == &ftrace_list_end)
156 ftrace_list->next == &ftrace_list_end)
157 ftrace_trace_function = ftrace_list->func; 186 ftrace_trace_function = ftrace_list->func;
158 } 187 }
159 188
@@ -308,7 +337,7 @@ ftrace_record_ip(unsigned long ip)
308{ 337{
309 struct dyn_ftrace *rec; 338 struct dyn_ftrace *rec;
310 339
311 if (!ftrace_enabled || ftrace_disabled) 340 if (ftrace_disabled)
312 return NULL; 341 return NULL;
313 342
314 rec = ftrace_alloc_dyn_node(ip); 343 rec = ftrace_alloc_dyn_node(ip);
@@ -322,14 +351,58 @@ ftrace_record_ip(unsigned long ip)
322 return rec; 351 return rec;
323} 352}
324 353
325#define FTRACE_ADDR ((long)(ftrace_caller)) 354static void print_ip_ins(const char *fmt, unsigned char *p)
355{
356 int i;
357
358 printk(KERN_CONT "%s", fmt);
359
360 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
361 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
362}
363
364static void ftrace_bug(int failed, unsigned long ip)
365{
366 switch (failed) {
367 case -EFAULT:
368 FTRACE_WARN_ON_ONCE(1);
369 pr_info("ftrace faulted on modifying ");
370 print_ip_sym(ip);
371 break;
372 case -EINVAL:
373 FTRACE_WARN_ON_ONCE(1);
374 pr_info("ftrace failed to modify ");
375 print_ip_sym(ip);
376 print_ip_ins(" actual: ", (unsigned char *)ip);
377 printk(KERN_CONT "\n");
378 break;
379 case -EPERM:
380 FTRACE_WARN_ON_ONCE(1);
381 pr_info("ftrace faulted on writing ");
382 print_ip_sym(ip);
383 break;
384 default:
385 FTRACE_WARN_ON_ONCE(1);
386 pr_info("ftrace faulted on unknown error ");
387 print_ip_sym(ip);
388 }
389}
390
326 391
327static int 392static int
328__ftrace_replace_code(struct dyn_ftrace *rec, 393__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
329 unsigned char *nop, int enable)
330{ 394{
331 unsigned long ip, fl; 395 unsigned long ip, fl;
332 unsigned char *call, *old, *new; 396 unsigned long ftrace_addr;
397
398#ifdef CONFIG_FUNCTION_RET_TRACER
399 if (ftrace_tracing_type == FTRACE_TYPE_ENTER)
400 ftrace_addr = (unsigned long)ftrace_caller;
401 else
402 ftrace_addr = (unsigned long)ftrace_return_caller;
403#else
404 ftrace_addr = (unsigned long)ftrace_caller;
405#endif
333 406
334 ip = rec->ip; 407 ip = rec->ip;
335 408
@@ -388,34 +461,28 @@ __ftrace_replace_code(struct dyn_ftrace *rec,
388 } 461 }
389 } 462 }
390 463
391 call = ftrace_call_replace(ip, FTRACE_ADDR); 464 if (rec->flags & FTRACE_FL_ENABLED)
392 465 return ftrace_make_call(rec, ftrace_addr);
393 if (rec->flags & FTRACE_FL_ENABLED) { 466 else
394 old = nop; 467 return ftrace_make_nop(NULL, rec, ftrace_addr);
395 new = call;
396 } else {
397 old = call;
398 new = nop;
399 }
400
401 return ftrace_modify_code(ip, old, new);
402} 468}
403 469
404static void ftrace_replace_code(int enable) 470static void ftrace_replace_code(int enable)
405{ 471{
406 int i, failed; 472 int i, failed;
407 unsigned char *nop = NULL;
408 struct dyn_ftrace *rec; 473 struct dyn_ftrace *rec;
409 struct ftrace_page *pg; 474 struct ftrace_page *pg;
410 475
411 nop = ftrace_nop_replace();
412
413 for (pg = ftrace_pages_start; pg; pg = pg->next) { 476 for (pg = ftrace_pages_start; pg; pg = pg->next) {
414 for (i = 0; i < pg->index; i++) { 477 for (i = 0; i < pg->index; i++) {
415 rec = &pg->records[i]; 478 rec = &pg->records[i];
416 479
417 /* don't modify code that has already faulted */ 480 /*
418 if (rec->flags & FTRACE_FL_FAILED) 481 * Skip over free records and records that have
482 * failed.
483 */
484 if (rec->flags & FTRACE_FL_FREE ||
485 rec->flags & FTRACE_FL_FAILED)
419 continue; 486 continue;
420 487
421 /* ignore updates to this record's mcount site */ 488 /* ignore updates to this record's mcount site */
@@ -426,68 +493,30 @@ static void ftrace_replace_code(int enable)
426 unfreeze_record(rec); 493 unfreeze_record(rec);
427 } 494 }
428 495
429 failed = __ftrace_replace_code(rec, nop, enable); 496 failed = __ftrace_replace_code(rec, enable);
430 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 497 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
431 rec->flags |= FTRACE_FL_FAILED; 498 rec->flags |= FTRACE_FL_FAILED;
432 if ((system_state == SYSTEM_BOOTING) || 499 if ((system_state == SYSTEM_BOOTING) ||
433 !core_kernel_text(rec->ip)) { 500 !core_kernel_text(rec->ip)) {
434 ftrace_free_rec(rec); 501 ftrace_free_rec(rec);
435 } 502 } else
503 ftrace_bug(failed, rec->ip);
436 } 504 }
437 } 505 }
438 } 506 }
439} 507}
440 508
441static void print_ip_ins(const char *fmt, unsigned char *p)
442{
443 int i;
444
445 printk(KERN_CONT "%s", fmt);
446
447 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
448 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
449}
450
451static int 509static int
452ftrace_code_disable(struct dyn_ftrace *rec) 510ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
453{ 511{
454 unsigned long ip; 512 unsigned long ip;
455 unsigned char *nop, *call;
456 int ret; 513 int ret;
457 514
458 ip = rec->ip; 515 ip = rec->ip;
459 516
460 nop = ftrace_nop_replace(); 517 ret = ftrace_make_nop(mod, rec, mcount_addr);
461 call = ftrace_call_replace(ip, mcount_addr);
462
463 ret = ftrace_modify_code(ip, call, nop);
464 if (ret) { 518 if (ret) {
465 switch (ret) { 519 ftrace_bug(ret, ip);
466 case -EFAULT:
467 FTRACE_WARN_ON_ONCE(1);
468 pr_info("ftrace faulted on modifying ");
469 print_ip_sym(ip);
470 break;
471 case -EINVAL:
472 FTRACE_WARN_ON_ONCE(1);
473 pr_info("ftrace failed to modify ");
474 print_ip_sym(ip);
475 print_ip_ins(" expected: ", call);
476 print_ip_ins(" actual: ", (unsigned char *)ip);
477 print_ip_ins(" replace: ", nop);
478 printk(KERN_CONT "\n");
479 break;
480 case -EPERM:
481 FTRACE_WARN_ON_ONCE(1);
482 pr_info("ftrace faulted on writing ");
483 print_ip_sym(ip);
484 break;
485 default:
486 FTRACE_WARN_ON_ONCE(1);
487 pr_info("ftrace faulted on unknown error ");
488 print_ip_sym(ip);
489 }
490
491 rec->flags |= FTRACE_FL_FAILED; 520 rec->flags |= FTRACE_FL_FAILED;
492 return 0; 521 return 0;
493 } 522 }
@@ -515,7 +544,7 @@ static void ftrace_run_update_code(int command)
515} 544}
516 545
517static ftrace_func_t saved_ftrace_func; 546static ftrace_func_t saved_ftrace_func;
518static int ftrace_start; 547static int ftrace_start_up;
519static DEFINE_MUTEX(ftrace_start_lock); 548static DEFINE_MUTEX(ftrace_start_lock);
520 549
521static void ftrace_startup(void) 550static void ftrace_startup(void)
@@ -526,7 +555,7 @@ static void ftrace_startup(void)
526 return; 555 return;
527 556
528 mutex_lock(&ftrace_start_lock); 557 mutex_lock(&ftrace_start_lock);
529 ftrace_start++; 558 ftrace_start_up++;
530 command |= FTRACE_ENABLE_CALLS; 559 command |= FTRACE_ENABLE_CALLS;
531 560
532 if (saved_ftrace_func != ftrace_trace_function) { 561 if (saved_ftrace_func != ftrace_trace_function) {
@@ -550,8 +579,8 @@ static void ftrace_shutdown(void)
550 return; 579 return;
551 580
552 mutex_lock(&ftrace_start_lock); 581 mutex_lock(&ftrace_start_lock);
553 ftrace_start--; 582 ftrace_start_up--;
554 if (!ftrace_start) 583 if (!ftrace_start_up)
555 command |= FTRACE_DISABLE_CALLS; 584 command |= FTRACE_DISABLE_CALLS;
556 585
557 if (saved_ftrace_func != ftrace_trace_function) { 586 if (saved_ftrace_func != ftrace_trace_function) {
@@ -577,8 +606,8 @@ static void ftrace_startup_sysctl(void)
577 mutex_lock(&ftrace_start_lock); 606 mutex_lock(&ftrace_start_lock);
578 /* Force update next time */ 607 /* Force update next time */
579 saved_ftrace_func = NULL; 608 saved_ftrace_func = NULL;
580 /* ftrace_start is true if we want ftrace running */ 609 /* ftrace_start_up is true if we want ftrace running */
581 if (ftrace_start) 610 if (ftrace_start_up)
582 command |= FTRACE_ENABLE_CALLS; 611 command |= FTRACE_ENABLE_CALLS;
583 612
584 ftrace_run_update_code(command); 613 ftrace_run_update_code(command);
@@ -593,8 +622,8 @@ static void ftrace_shutdown_sysctl(void)
593 return; 622 return;
594 623
595 mutex_lock(&ftrace_start_lock); 624 mutex_lock(&ftrace_start_lock);
596 /* ftrace_start is true if ftrace is running */ 625 /* ftrace_start_up is true if ftrace is running */
597 if (ftrace_start) 626 if (ftrace_start_up)
598 command |= FTRACE_DISABLE_CALLS; 627 command |= FTRACE_DISABLE_CALLS;
599 628
600 ftrace_run_update_code(command); 629 ftrace_run_update_code(command);
@@ -605,7 +634,7 @@ static cycle_t ftrace_update_time;
605static unsigned long ftrace_update_cnt; 634static unsigned long ftrace_update_cnt;
606unsigned long ftrace_update_tot_cnt; 635unsigned long ftrace_update_tot_cnt;
607 636
608static int ftrace_update_code(void) 637static int ftrace_update_code(struct module *mod)
609{ 638{
610 struct dyn_ftrace *p, *t; 639 struct dyn_ftrace *p, *t;
611 cycle_t start, stop; 640 cycle_t start, stop;
@@ -622,7 +651,7 @@ static int ftrace_update_code(void)
622 list_del_init(&p->list); 651 list_del_init(&p->list);
623 652
624 /* convert record (i.e, patch mcount-call with NOP) */ 653 /* convert record (i.e, patch mcount-call with NOP) */
625 if (ftrace_code_disable(p)) { 654 if (ftrace_code_disable(mod, p)) {
626 p->flags |= FTRACE_FL_CONVERTED; 655 p->flags |= FTRACE_FL_CONVERTED;
627 ftrace_update_cnt++; 656 ftrace_update_cnt++;
628 } else 657 } else
@@ -1181,7 +1210,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1181 1210
1182 mutex_lock(&ftrace_sysctl_lock); 1211 mutex_lock(&ftrace_sysctl_lock);
1183 mutex_lock(&ftrace_start_lock); 1212 mutex_lock(&ftrace_start_lock);
1184 if (ftrace_start && ftrace_enabled) 1213 if (ftrace_start_up && ftrace_enabled)
1185 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1214 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1186 mutex_unlock(&ftrace_start_lock); 1215 mutex_unlock(&ftrace_start_lock);
1187 mutex_unlock(&ftrace_sysctl_lock); 1216 mutex_unlock(&ftrace_sysctl_lock);
@@ -1268,7 +1297,8 @@ static __init int ftrace_init_debugfs(void)
1268 1297
1269fs_initcall(ftrace_init_debugfs); 1298fs_initcall(ftrace_init_debugfs);
1270 1299
1271static int ftrace_convert_nops(unsigned long *start, 1300static int ftrace_convert_nops(struct module *mod,
1301 unsigned long *start,
1272 unsigned long *end) 1302 unsigned long *end)
1273{ 1303{
1274 unsigned long *p; 1304 unsigned long *p;
@@ -1279,23 +1309,32 @@ static int ftrace_convert_nops(unsigned long *start,
1279 p = start; 1309 p = start;
1280 while (p < end) { 1310 while (p < end) {
1281 addr = ftrace_call_adjust(*p++); 1311 addr = ftrace_call_adjust(*p++);
1312 /*
1313 * Some architecture linkers will pad between
1314 * the different mcount_loc sections of different
1315 * object files to satisfy alignments.
1316 * Skip any NULL pointers.
1317 */
1318 if (!addr)
1319 continue;
1282 ftrace_record_ip(addr); 1320 ftrace_record_ip(addr);
1283 } 1321 }
1284 1322
1285 /* disable interrupts to prevent kstop machine */ 1323 /* disable interrupts to prevent kstop machine */
1286 local_irq_save(flags); 1324 local_irq_save(flags);
1287 ftrace_update_code(); 1325 ftrace_update_code(mod);
1288 local_irq_restore(flags); 1326 local_irq_restore(flags);
1289 mutex_unlock(&ftrace_start_lock); 1327 mutex_unlock(&ftrace_start_lock);
1290 1328
1291 return 0; 1329 return 0;
1292} 1330}
1293 1331
1294void ftrace_init_module(unsigned long *start, unsigned long *end) 1332void ftrace_init_module(struct module *mod,
1333 unsigned long *start, unsigned long *end)
1295{ 1334{
1296 if (ftrace_disabled || start == end) 1335 if (ftrace_disabled || start == end)
1297 return; 1336 return;
1298 ftrace_convert_nops(start, end); 1337 ftrace_convert_nops(mod, start, end);
1299} 1338}
1300 1339
1301extern unsigned long __start_mcount_loc[]; 1340extern unsigned long __start_mcount_loc[];
@@ -1325,7 +1364,8 @@ void __init ftrace_init(void)
1325 1364
1326 last_ftrace_enabled = ftrace_enabled = 1; 1365 last_ftrace_enabled = ftrace_enabled = 1;
1327 1366
1328 ret = ftrace_convert_nops(__start_mcount_loc, 1367 ret = ftrace_convert_nops(NULL,
1368 __start_mcount_loc,
1329 __stop_mcount_loc); 1369 __stop_mcount_loc);
1330 1370
1331 return; 1371 return;
@@ -1381,10 +1421,17 @@ int register_ftrace_function(struct ftrace_ops *ops)
1381 return -1; 1421 return -1;
1382 1422
1383 mutex_lock(&ftrace_sysctl_lock); 1423 mutex_lock(&ftrace_sysctl_lock);
1424
1425 if (ftrace_tracing_type == FTRACE_TYPE_RETURN) {
1426 ret = -EBUSY;
1427 goto out;
1428 }
1429
1384 ret = __register_ftrace_function(ops); 1430 ret = __register_ftrace_function(ops);
1385 ftrace_startup(); 1431 ftrace_startup();
1386 mutex_unlock(&ftrace_sysctl_lock);
1387 1432
1433out:
1434 mutex_unlock(&ftrace_sysctl_lock);
1388 return ret; 1435 return ret;
1389} 1436}
1390 1437
@@ -1449,3 +1496,147 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1449 return ret; 1496 return ret;
1450} 1497}
1451 1498
1499#ifdef CONFIG_FUNCTION_RET_TRACER
1500
1501static atomic_t ftrace_retfunc_active;
1502
1503/* The callback that hooks the return of a function */
1504trace_function_return_t ftrace_function_return =
1505 (trace_function_return_t)ftrace_stub;
1506
1507
1508/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
1509static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
1510{
1511 int i;
1512 int ret = 0;
1513 unsigned long flags;
1514 int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
1515 struct task_struct *g, *t;
1516
1517 for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
1518 ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH
1519 * sizeof(struct ftrace_ret_stack),
1520 GFP_KERNEL);
1521 if (!ret_stack_list[i]) {
1522 start = 0;
1523 end = i;
1524 ret = -ENOMEM;
1525 goto free;
1526 }
1527 }
1528
1529 read_lock_irqsave(&tasklist_lock, flags);
1530 do_each_thread(g, t) {
1531 if (start == end) {
1532 ret = -EAGAIN;
1533 goto unlock;
1534 }
1535
1536 if (t->ret_stack == NULL) {
1537 t->ret_stack = ret_stack_list[start++];
1538 t->curr_ret_stack = -1;
1539 atomic_set(&t->trace_overrun, 0);
1540 }
1541 } while_each_thread(g, t);
1542
1543unlock:
1544 read_unlock_irqrestore(&tasklist_lock, flags);
1545free:
1546 for (i = start; i < end; i++)
1547 kfree(ret_stack_list[i]);
1548 return ret;
1549}
1550
1551/* Allocate a return stack for each task */
1552static int start_return_tracing(void)
1553{
1554 struct ftrace_ret_stack **ret_stack_list;
1555 int ret;
1556
1557 ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
1558 sizeof(struct ftrace_ret_stack *),
1559 GFP_KERNEL);
1560
1561 if (!ret_stack_list)
1562 return -ENOMEM;
1563
1564 do {
1565 ret = alloc_retstack_tasklist(ret_stack_list);
1566 } while (ret == -EAGAIN);
1567
1568 kfree(ret_stack_list);
1569 return ret;
1570}
1571
1572int register_ftrace_return(trace_function_return_t func)
1573{
1574 int ret = 0;
1575
1576 mutex_lock(&ftrace_sysctl_lock);
1577
1578 /*
1579 * Don't launch return tracing if normal function
1580 * tracing is already running.
1581 */
1582 if (ftrace_trace_function != ftrace_stub) {
1583 ret = -EBUSY;
1584 goto out;
1585 }
1586 atomic_inc(&ftrace_retfunc_active);
1587 ret = start_return_tracing();
1588 if (ret) {
1589 atomic_dec(&ftrace_retfunc_active);
1590 goto out;
1591 }
1592 ftrace_tracing_type = FTRACE_TYPE_RETURN;
1593 ftrace_function_return = func;
1594 ftrace_startup();
1595
1596out:
1597 mutex_unlock(&ftrace_sysctl_lock);
1598 return ret;
1599}
1600
1601void unregister_ftrace_return(void)
1602{
1603 mutex_lock(&ftrace_sysctl_lock);
1604
1605 atomic_dec(&ftrace_retfunc_active);
1606 ftrace_function_return = (trace_function_return_t)ftrace_stub;
1607 ftrace_shutdown();
1608 /* Restore normal tracing type */
1609 ftrace_tracing_type = FTRACE_TYPE_ENTER;
1610
1611 mutex_unlock(&ftrace_sysctl_lock);
1612}
1613
1614/* Allocate a return stack for newly created task */
1615void ftrace_retfunc_init_task(struct task_struct *t)
1616{
1617 if (atomic_read(&ftrace_retfunc_active)) {
1618 t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
1619 * sizeof(struct ftrace_ret_stack),
1620 GFP_KERNEL);
1621 if (!t->ret_stack)
1622 return;
1623 t->curr_ret_stack = -1;
1624 atomic_set(&t->trace_overrun, 0);
1625 } else
1626 t->ret_stack = NULL;
1627}
1628
1629void ftrace_retfunc_exit_task(struct task_struct *t)
1630{
1631 struct ftrace_ret_stack *ret_stack = t->ret_stack;
1632
1633 t->ret_stack = NULL;
1634 /* NULL must become visible to IRQs before we free it: */
1635 barrier();
1636
1637 kfree(ret_stack);
1638}
1639#endif
1640
1641
1642
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f780e9552f91..e206951603c1 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -18,8 +18,46 @@
18 18
19#include "trace.h" 19#include "trace.h"
20 20
21/* Global flag to disable all recording to ring buffers */ 21/*
22static int ring_buffers_off __read_mostly; 22 * A fast way to enable or disable all ring buffers is to
23 * call tracing_on or tracing_off. Turning off the ring buffers
24 * prevents all ring buffers from being recorded to.
25 * Turning this switch on, makes it OK to write to the
26 * ring buffer, if the ring buffer is enabled itself.
27 *
28 * There's three layers that must be on in order to write
29 * to the ring buffer.
30 *
31 * 1) This global flag must be set.
32 * 2) The ring buffer must be enabled for recording.
33 * 3) The per cpu buffer must be enabled for recording.
34 *
35 * In case of an anomaly, this global flag has a bit set that
36 * will permantly disable all ring buffers.
37 */
38
39/*
40 * Global flag to disable all recording to ring buffers
41 * This has two bits: ON, DISABLED
42 *
43 * ON DISABLED
44 * ---- ----------
45 * 0 0 : ring buffers are off
46 * 1 0 : ring buffers are on
47 * X 1 : ring buffers are permanently disabled
48 */
49
50enum {
51 RB_BUFFERS_ON_BIT = 0,
52 RB_BUFFERS_DISABLED_BIT = 1,
53};
54
55enum {
56 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58};
59
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
23 61
24/** 62/**
25 * tracing_on - enable all tracing buffers 63 * tracing_on - enable all tracing buffers
@@ -29,7 +67,7 @@ static int ring_buffers_off __read_mostly;
29 */ 67 */
30void tracing_on(void) 68void tracing_on(void)
31{ 69{
32 ring_buffers_off = 0; 70 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
33} 71}
34 72
35/** 73/**
@@ -42,9 +80,22 @@ void tracing_on(void)
42 */ 80 */
43void tracing_off(void) 81void tracing_off(void)
44{ 82{
45 ring_buffers_off = 1; 83 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
46} 84}
47 85
86/**
87 * tracing_off_permanent - permanently disable ring buffers
88 *
89 * This function, once called, will disable all ring buffers
90 * permanenty.
91 */
92void tracing_off_permanent(void)
93{
94 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
95}
96
97#include "trace.h"
98
48/* Up this if you want to test the TIME_EXTENTS and normalization */ 99/* Up this if you want to test the TIME_EXTENTS and normalization */
49#define DEBUG_SHIFT 0 100#define DEBUG_SHIFT 0
50 101
@@ -187,7 +238,8 @@ static inline int test_time_stamp(u64 delta)
187struct ring_buffer_per_cpu { 238struct ring_buffer_per_cpu {
188 int cpu; 239 int cpu;
189 struct ring_buffer *buffer; 240 struct ring_buffer *buffer;
190 spinlock_t lock; 241 spinlock_t reader_lock; /* serialize readers */
242 raw_spinlock_t lock;
191 struct lock_class_key lock_key; 243 struct lock_class_key lock_key;
192 struct list_head pages; 244 struct list_head pages;
193 struct buffer_page *head_page; /* read from head */ 245 struct buffer_page *head_page; /* read from head */
@@ -221,32 +273,16 @@ struct ring_buffer_iter {
221 u64 read_stamp; 273 u64 read_stamp;
222}; 274};
223 275
276/* buffer may be either ring_buffer or ring_buffer_per_cpu */
224#define RB_WARN_ON(buffer, cond) \ 277#define RB_WARN_ON(buffer, cond) \
225 do { \ 278 ({ \
226 if (unlikely(cond)) { \ 279 int _____ret = unlikely(cond); \
227 atomic_inc(&buffer->record_disabled); \ 280 if (_____ret) { \
228 WARN_ON(1); \
229 } \
230 } while (0)
231
232#define RB_WARN_ON_RET(buffer, cond) \
233 do { \
234 if (unlikely(cond)) { \
235 atomic_inc(&buffer->record_disabled); \
236 WARN_ON(1); \
237 return -1; \
238 } \
239 } while (0)
240
241#define RB_WARN_ON_ONCE(buffer, cond) \
242 do { \
243 static int once; \
244 if (unlikely(cond) && !once) { \
245 once++; \
246 atomic_inc(&buffer->record_disabled); \ 281 atomic_inc(&buffer->record_disabled); \
247 WARN_ON(1); \ 282 WARN_ON(1); \
248 } \ 283 } \
249 } while (0) 284 _____ret; \
285 })
250 286
251/** 287/**
252 * check_pages - integrity check of buffer pages 288 * check_pages - integrity check of buffer pages
@@ -260,14 +296,18 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
260 struct list_head *head = &cpu_buffer->pages; 296 struct list_head *head = &cpu_buffer->pages;
261 struct buffer_page *page, *tmp; 297 struct buffer_page *page, *tmp;
262 298
263 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); 299 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
264 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); 300 return -1;
301 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
302 return -1;
265 303
266 list_for_each_entry_safe(page, tmp, head, list) { 304 list_for_each_entry_safe(page, tmp, head, list) {
267 RB_WARN_ON_RET(cpu_buffer, 305 if (RB_WARN_ON(cpu_buffer,
268 page->list.next->prev != &page->list); 306 page->list.next->prev != &page->list))
269 RB_WARN_ON_RET(cpu_buffer, 307 return -1;
270 page->list.prev->next != &page->list); 308 if (RB_WARN_ON(cpu_buffer,
309 page->list.prev->next != &page->list))
310 return -1;
271 } 311 }
272 312
273 return 0; 313 return 0;
@@ -324,7 +364,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
324 364
325 cpu_buffer->cpu = cpu; 365 cpu_buffer->cpu = cpu;
326 cpu_buffer->buffer = buffer; 366 cpu_buffer->buffer = buffer;
327 spin_lock_init(&cpu_buffer->lock); 367 spin_lock_init(&cpu_buffer->reader_lock);
368 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
328 INIT_LIST_HEAD(&cpu_buffer->pages); 369 INIT_LIST_HEAD(&cpu_buffer->pages);
329 370
330 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 371 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
@@ -473,13 +514,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
473 synchronize_sched(); 514 synchronize_sched();
474 515
475 for (i = 0; i < nr_pages; i++) { 516 for (i = 0; i < nr_pages; i++) {
476 BUG_ON(list_empty(&cpu_buffer->pages)); 517 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
518 return;
477 p = cpu_buffer->pages.next; 519 p = cpu_buffer->pages.next;
478 page = list_entry(p, struct buffer_page, list); 520 page = list_entry(p, struct buffer_page, list);
479 list_del_init(&page->list); 521 list_del_init(&page->list);
480 free_buffer_page(page); 522 free_buffer_page(page);
481 } 523 }
482 BUG_ON(list_empty(&cpu_buffer->pages)); 524 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
525 return;
483 526
484 rb_reset_cpu(cpu_buffer); 527 rb_reset_cpu(cpu_buffer);
485 528
@@ -501,7 +544,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
501 synchronize_sched(); 544 synchronize_sched();
502 545
503 for (i = 0; i < nr_pages; i++) { 546 for (i = 0; i < nr_pages; i++) {
504 BUG_ON(list_empty(pages)); 547 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
548 return;
505 p = pages->next; 549 p = pages->next;
506 page = list_entry(p, struct buffer_page, list); 550 page = list_entry(p, struct buffer_page, list);
507 list_del_init(&page->list); 551 list_del_init(&page->list);
@@ -562,7 +606,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
562 if (size < buffer_size) { 606 if (size < buffer_size) {
563 607
564 /* easy case, just free pages */ 608 /* easy case, just free pages */
565 BUG_ON(nr_pages >= buffer->pages); 609 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
610 mutex_unlock(&buffer->mutex);
611 return -1;
612 }
566 613
567 rm_pages = buffer->pages - nr_pages; 614 rm_pages = buffer->pages - nr_pages;
568 615
@@ -581,7 +628,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
581 * add these pages to the cpu_buffers. Otherwise we just free 628 * add these pages to the cpu_buffers. Otherwise we just free
582 * them all and return -ENOMEM; 629 * them all and return -ENOMEM;
583 */ 630 */
584 BUG_ON(nr_pages <= buffer->pages); 631 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
632 mutex_unlock(&buffer->mutex);
633 return -1;
634 }
635
585 new_pages = nr_pages - buffer->pages; 636 new_pages = nr_pages - buffer->pages;
586 637
587 for_each_buffer_cpu(buffer, cpu) { 638 for_each_buffer_cpu(buffer, cpu) {
@@ -604,7 +655,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
604 rb_insert_pages(cpu_buffer, &pages, new_pages); 655 rb_insert_pages(cpu_buffer, &pages, new_pages);
605 } 656 }
606 657
607 BUG_ON(!list_empty(&pages)); 658 if (RB_WARN_ON(buffer, !list_empty(&pages))) {
659 mutex_unlock(&buffer->mutex);
660 return -1;
661 }
608 662
609 out: 663 out:
610 buffer->pages = nr_pages; 664 buffer->pages = nr_pages;
@@ -693,7 +747,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
693 head += rb_event_length(event)) { 747 head += rb_event_length(event)) {
694 748
695 event = __rb_page_index(cpu_buffer->head_page, head); 749 event = __rb_page_index(cpu_buffer->head_page, head);
696 BUG_ON(rb_null_event(event)); 750 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
751 return;
697 /* Only count data entries */ 752 /* Only count data entries */
698 if (event->type != RINGBUF_TYPE_DATA) 753 if (event->type != RINGBUF_TYPE_DATA)
699 continue; 754 continue;
@@ -746,8 +801,9 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
746 addr &= PAGE_MASK; 801 addr &= PAGE_MASK;
747 802
748 while (cpu_buffer->commit_page->page != (void *)addr) { 803 while (cpu_buffer->commit_page->page != (void *)addr) {
749 RB_WARN_ON(cpu_buffer, 804 if (RB_WARN_ON(cpu_buffer,
750 cpu_buffer->commit_page == cpu_buffer->tail_page); 805 cpu_buffer->commit_page == cpu_buffer->tail_page))
806 return;
751 cpu_buffer->commit_page->commit = 807 cpu_buffer->commit_page->commit =
752 cpu_buffer->commit_page->write; 808 cpu_buffer->commit_page->write;
753 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 809 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
@@ -894,7 +950,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
894 if (write > BUF_PAGE_SIZE) { 950 if (write > BUF_PAGE_SIZE) {
895 struct buffer_page *next_page = tail_page; 951 struct buffer_page *next_page = tail_page;
896 952
897 spin_lock_irqsave(&cpu_buffer->lock, flags); 953 local_irq_save(flags);
954 __raw_spin_lock(&cpu_buffer->lock);
898 955
899 rb_inc_page(cpu_buffer, &next_page); 956 rb_inc_page(cpu_buffer, &next_page);
900 957
@@ -902,7 +959,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
902 reader_page = cpu_buffer->reader_page; 959 reader_page = cpu_buffer->reader_page;
903 960
904 /* we grabbed the lock before incrementing */ 961 /* we grabbed the lock before incrementing */
905 RB_WARN_ON(cpu_buffer, next_page == reader_page); 962 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
963 goto out_unlock;
906 964
907 /* 965 /*
908 * If for some reason, we had an interrupt storm that made 966 * If for some reason, we had an interrupt storm that made
@@ -970,7 +1028,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
970 rb_set_commit_to_write(cpu_buffer); 1028 rb_set_commit_to_write(cpu_buffer);
971 } 1029 }
972 1030
973 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1031 __raw_spin_unlock(&cpu_buffer->lock);
1032 local_irq_restore(flags);
974 1033
975 /* fail and let the caller try again */ 1034 /* fail and let the caller try again */
976 return ERR_PTR(-EAGAIN); 1035 return ERR_PTR(-EAGAIN);
@@ -978,7 +1037,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
978 1037
979 /* We reserved something on the buffer */ 1038 /* We reserved something on the buffer */
980 1039
981 BUG_ON(write > BUF_PAGE_SIZE); 1040 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
1041 return NULL;
982 1042
983 event = __rb_page_index(tail_page, tail); 1043 event = __rb_page_index(tail_page, tail);
984 rb_update_event(event, type, length); 1044 rb_update_event(event, type, length);
@@ -993,7 +1053,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
993 return event; 1053 return event;
994 1054
995 out_unlock: 1055 out_unlock:
996 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1056 __raw_spin_unlock(&cpu_buffer->lock);
1057 local_irq_restore(flags);
997 return NULL; 1058 return NULL;
998} 1059}
999 1060
@@ -1076,10 +1137,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1076 * storm or we have something buggy. 1137 * storm or we have something buggy.
1077 * Bail! 1138 * Bail!
1078 */ 1139 */
1079 if (unlikely(++nr_loops > 1000)) { 1140 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1080 RB_WARN_ON(cpu_buffer, 1);
1081 return NULL; 1141 return NULL;
1082 }
1083 1142
1084 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1143 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1085 1144
@@ -1175,15 +1234,14 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1175 struct ring_buffer_event *event; 1234 struct ring_buffer_event *event;
1176 int cpu, resched; 1235 int cpu, resched;
1177 1236
1178 if (ring_buffers_off) 1237 if (ring_buffer_flags != RB_BUFFERS_ON)
1179 return NULL; 1238 return NULL;
1180 1239
1181 if (atomic_read(&buffer->record_disabled)) 1240 if (atomic_read(&buffer->record_disabled))
1182 return NULL; 1241 return NULL;
1183 1242
1184 /* If we are tracing schedule, we don't want to recurse */ 1243 /* If we are tracing schedule, we don't want to recurse */
1185 resched = need_resched(); 1244 resched = ftrace_preempt_disable();
1186 preempt_disable_notrace();
1187 1245
1188 cpu = raw_smp_processor_id(); 1246 cpu = raw_smp_processor_id();
1189 1247
@@ -1214,10 +1272,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1214 return event; 1272 return event;
1215 1273
1216 out: 1274 out:
1217 if (resched) 1275 ftrace_preempt_enable(resched);
1218 preempt_enable_notrace();
1219 else
1220 preempt_enable_notrace();
1221 return NULL; 1276 return NULL;
1222} 1277}
1223 1278
@@ -1259,12 +1314,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1259 /* 1314 /*
1260 * Only the last preempt count needs to restore preemption. 1315 * Only the last preempt count needs to restore preemption.
1261 */ 1316 */
1262 if (preempt_count() == 1) { 1317 if (preempt_count() == 1)
1263 if (per_cpu(rb_need_resched, cpu)) 1318 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1264 preempt_enable_no_resched_notrace(); 1319 else
1265 else
1266 preempt_enable_notrace();
1267 } else
1268 preempt_enable_no_resched_notrace(); 1320 preempt_enable_no_resched_notrace();
1269 1321
1270 return 0; 1322 return 0;
@@ -1294,14 +1346,13 @@ int ring_buffer_write(struct ring_buffer *buffer,
1294 int ret = -EBUSY; 1346 int ret = -EBUSY;
1295 int cpu, resched; 1347 int cpu, resched;
1296 1348
1297 if (ring_buffers_off) 1349 if (ring_buffer_flags != RB_BUFFERS_ON)
1298 return -EBUSY; 1350 return -EBUSY;
1299 1351
1300 if (atomic_read(&buffer->record_disabled)) 1352 if (atomic_read(&buffer->record_disabled))
1301 return -EBUSY; 1353 return -EBUSY;
1302 1354
1303 resched = need_resched(); 1355 resched = ftrace_preempt_disable();
1304 preempt_disable_notrace();
1305 1356
1306 cpu = raw_smp_processor_id(); 1357 cpu = raw_smp_processor_id();
1307 1358
@@ -1327,10 +1378,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1327 1378
1328 ret = 0; 1379 ret = 0;
1329 out: 1380 out:
1330 if (resched) 1381 ftrace_preempt_enable(resched);
1331 preempt_enable_no_resched_notrace();
1332 else
1333 preempt_enable_notrace();
1334 1382
1335 return ret; 1383 return ret;
1336} 1384}
@@ -1489,14 +1537,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1489 return overruns; 1537 return overruns;
1490} 1538}
1491 1539
1492/** 1540static void rb_iter_reset(struct ring_buffer_iter *iter)
1493 * ring_buffer_iter_reset - reset an iterator
1494 * @iter: The iterator to reset
1495 *
1496 * Resets the iterator, so that it will start from the beginning
1497 * again.
1498 */
1499void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1500{ 1541{
1501 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1542 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1502 1543
@@ -1515,6 +1556,23 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1515} 1556}
1516 1557
1517/** 1558/**
1559 * ring_buffer_iter_reset - reset an iterator
1560 * @iter: The iterator to reset
1561 *
1562 * Resets the iterator, so that it will start from the beginning
1563 * again.
1564 */
1565void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1566{
1567 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1568 unsigned long flags;
1569
1570 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1571 rb_iter_reset(iter);
1572 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1573}
1574
1575/**
1518 * ring_buffer_iter_empty - check if an iterator has no more to read 1576 * ring_buffer_iter_empty - check if an iterator has no more to read
1519 * @iter: The iterator to check 1577 * @iter: The iterator to check
1520 */ 1578 */
@@ -1597,7 +1655,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1597 unsigned long flags; 1655 unsigned long flags;
1598 int nr_loops = 0; 1656 int nr_loops = 0;
1599 1657
1600 spin_lock_irqsave(&cpu_buffer->lock, flags); 1658 local_irq_save(flags);
1659 __raw_spin_lock(&cpu_buffer->lock);
1601 1660
1602 again: 1661 again:
1603 /* 1662 /*
@@ -1606,8 +1665,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1606 * a case where we will loop three times. There should be no 1665 * a case where we will loop three times. There should be no
1607 * reason to loop four times (that I know of). 1666 * reason to loop four times (that I know of).
1608 */ 1667 */
1609 if (unlikely(++nr_loops > 3)) { 1668 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
1610 RB_WARN_ON(cpu_buffer, 1);
1611 reader = NULL; 1669 reader = NULL;
1612 goto out; 1670 goto out;
1613 } 1671 }
@@ -1619,8 +1677,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1619 goto out; 1677 goto out;
1620 1678
1621 /* Never should we have an index greater than the size */ 1679 /* Never should we have an index greater than the size */
1622 RB_WARN_ON(cpu_buffer, 1680 if (RB_WARN_ON(cpu_buffer,
1623 cpu_buffer->reader_page->read > rb_page_size(reader)); 1681 cpu_buffer->reader_page->read > rb_page_size(reader)))
1682 goto out;
1624 1683
1625 /* check if we caught up to the tail */ 1684 /* check if we caught up to the tail */
1626 reader = NULL; 1685 reader = NULL;
@@ -1659,7 +1718,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1659 goto again; 1718 goto again;
1660 1719
1661 out: 1720 out:
1662 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1721 __raw_spin_unlock(&cpu_buffer->lock);
1722 local_irq_restore(flags);
1663 1723
1664 return reader; 1724 return reader;
1665} 1725}
@@ -1673,7 +1733,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1673 reader = rb_get_reader_page(cpu_buffer); 1733 reader = rb_get_reader_page(cpu_buffer);
1674 1734
1675 /* This function should not be called when buffer is empty */ 1735 /* This function should not be called when buffer is empty */
1676 BUG_ON(!reader); 1736 if (RB_WARN_ON(cpu_buffer, !reader))
1737 return;
1677 1738
1678 event = rb_reader_event(cpu_buffer); 1739 event = rb_reader_event(cpu_buffer);
1679 1740
@@ -1700,7 +1761,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1700 * Check if we are at the end of the buffer. 1761 * Check if we are at the end of the buffer.
1701 */ 1762 */
1702 if (iter->head >= rb_page_size(iter->head_page)) { 1763 if (iter->head >= rb_page_size(iter->head_page)) {
1703 BUG_ON(iter->head_page == cpu_buffer->commit_page); 1764 if (RB_WARN_ON(buffer,
1765 iter->head_page == cpu_buffer->commit_page))
1766 return;
1704 rb_inc_iter(iter); 1767 rb_inc_iter(iter);
1705 return; 1768 return;
1706 } 1769 }
@@ -1713,8 +1776,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1713 * This should not be called to advance the header if we are 1776 * This should not be called to advance the header if we are
1714 * at the tail of the buffer. 1777 * at the tail of the buffer.
1715 */ 1778 */
1716 BUG_ON((iter->head_page == cpu_buffer->commit_page) && 1779 if (RB_WARN_ON(cpu_buffer,
1717 (iter->head + length > rb_commit_index(cpu_buffer))); 1780 (iter->head_page == cpu_buffer->commit_page) &&
1781 (iter->head + length > rb_commit_index(cpu_buffer))))
1782 return;
1718 1783
1719 rb_update_iter_read_stamp(iter, event); 1784 rb_update_iter_read_stamp(iter, event);
1720 1785
@@ -1726,17 +1791,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1726 rb_advance_iter(iter); 1791 rb_advance_iter(iter);
1727} 1792}
1728 1793
1729/** 1794static struct ring_buffer_event *
1730 * ring_buffer_peek - peek at the next event to be read 1795rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1731 * @buffer: The ring buffer to read
1732 * @cpu: The cpu to peak at
1733 * @ts: The timestamp counter of this event.
1734 *
1735 * This will return the event that will be read next, but does
1736 * not consume the data.
1737 */
1738struct ring_buffer_event *
1739ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1740{ 1796{
1741 struct ring_buffer_per_cpu *cpu_buffer; 1797 struct ring_buffer_per_cpu *cpu_buffer;
1742 struct ring_buffer_event *event; 1798 struct ring_buffer_event *event;
@@ -1757,10 +1813,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1757 * can have. Nesting 10 deep of interrupts is clearly 1813 * can have. Nesting 10 deep of interrupts is clearly
1758 * an anomaly. 1814 * an anomaly.
1759 */ 1815 */
1760 if (unlikely(++nr_loops > 10)) { 1816 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1761 RB_WARN_ON(cpu_buffer, 1);
1762 return NULL; 1817 return NULL;
1763 }
1764 1818
1765 reader = rb_get_reader_page(cpu_buffer); 1819 reader = rb_get_reader_page(cpu_buffer);
1766 if (!reader) 1820 if (!reader)
@@ -1798,16 +1852,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1798 return NULL; 1852 return NULL;
1799} 1853}
1800 1854
1801/** 1855static struct ring_buffer_event *
1802 * ring_buffer_iter_peek - peek at the next event to be read 1856rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1803 * @iter: The ring buffer iterator
1804 * @ts: The timestamp counter of this event.
1805 *
1806 * This will return the event that will be read next, but does
1807 * not increment the iterator.
1808 */
1809struct ring_buffer_event *
1810ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1811{ 1857{
1812 struct ring_buffer *buffer; 1858 struct ring_buffer *buffer;
1813 struct ring_buffer_per_cpu *cpu_buffer; 1859 struct ring_buffer_per_cpu *cpu_buffer;
@@ -1829,10 +1875,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1829 * can have. Nesting 10 deep of interrupts is clearly 1875 * can have. Nesting 10 deep of interrupts is clearly
1830 * an anomaly. 1876 * an anomaly.
1831 */ 1877 */
1832 if (unlikely(++nr_loops > 10)) { 1878 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1833 RB_WARN_ON(cpu_buffer, 1);
1834 return NULL; 1879 return NULL;
1835 }
1836 1880
1837 if (rb_per_cpu_empty(cpu_buffer)) 1881 if (rb_per_cpu_empty(cpu_buffer))
1838 return NULL; 1882 return NULL;
@@ -1869,6 +1913,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1869} 1913}
1870 1914
1871/** 1915/**
1916 * ring_buffer_peek - peek at the next event to be read
1917 * @buffer: The ring buffer to read
1918 * @cpu: The cpu to peak at
1919 * @ts: The timestamp counter of this event.
1920 *
1921 * This will return the event that will be read next, but does
1922 * not consume the data.
1923 */
1924struct ring_buffer_event *
1925ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1926{
1927 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1928 struct ring_buffer_event *event;
1929 unsigned long flags;
1930
1931 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1932 event = rb_buffer_peek(buffer, cpu, ts);
1933 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1934
1935 return event;
1936}
1937
1938/**
1939 * ring_buffer_iter_peek - peek at the next event to be read
1940 * @iter: The ring buffer iterator
1941 * @ts: The timestamp counter of this event.
1942 *
1943 * This will return the event that will be read next, but does
1944 * not increment the iterator.
1945 */
1946struct ring_buffer_event *
1947ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1948{
1949 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1950 struct ring_buffer_event *event;
1951 unsigned long flags;
1952
1953 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1954 event = rb_iter_peek(iter, ts);
1955 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1956
1957 return event;
1958}
1959
1960/**
1872 * ring_buffer_consume - return an event and consume it 1961 * ring_buffer_consume - return an event and consume it
1873 * @buffer: The ring buffer to get the next event from 1962 * @buffer: The ring buffer to get the next event from
1874 * 1963 *
@@ -1879,19 +1968,24 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1879struct ring_buffer_event * 1968struct ring_buffer_event *
1880ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 1969ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1881{ 1970{
1882 struct ring_buffer_per_cpu *cpu_buffer; 1971 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1883 struct ring_buffer_event *event; 1972 struct ring_buffer_event *event;
1973 unsigned long flags;
1884 1974
1885 if (!cpu_isset(cpu, buffer->cpumask)) 1975 if (!cpu_isset(cpu, buffer->cpumask))
1886 return NULL; 1976 return NULL;
1887 1977
1888 event = ring_buffer_peek(buffer, cpu, ts); 1978 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1979
1980 event = rb_buffer_peek(buffer, cpu, ts);
1889 if (!event) 1981 if (!event)
1890 return NULL; 1982 goto out;
1891 1983
1892 cpu_buffer = buffer->buffers[cpu];
1893 rb_advance_reader(cpu_buffer); 1984 rb_advance_reader(cpu_buffer);
1894 1985
1986 out:
1987 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1988
1895 return event; 1989 return event;
1896} 1990}
1897 1991
@@ -1928,9 +2022,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1928 atomic_inc(&cpu_buffer->record_disabled); 2022 atomic_inc(&cpu_buffer->record_disabled);
1929 synchronize_sched(); 2023 synchronize_sched();
1930 2024
1931 spin_lock_irqsave(&cpu_buffer->lock, flags); 2025 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1932 ring_buffer_iter_reset(iter); 2026 __raw_spin_lock(&cpu_buffer->lock);
1933 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2027 rb_iter_reset(iter);
2028 __raw_spin_unlock(&cpu_buffer->lock);
2029 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1934 2030
1935 return iter; 2031 return iter;
1936} 2032}
@@ -1962,12 +2058,17 @@ struct ring_buffer_event *
1962ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) 2058ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1963{ 2059{
1964 struct ring_buffer_event *event; 2060 struct ring_buffer_event *event;
2061 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2062 unsigned long flags;
1965 2063
1966 event = ring_buffer_iter_peek(iter, ts); 2064 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2065 event = rb_iter_peek(iter, ts);
1967 if (!event) 2066 if (!event)
1968 return NULL; 2067 goto out;
1969 2068
1970 rb_advance_iter(iter); 2069 rb_advance_iter(iter);
2070 out:
2071 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1971 2072
1972 return event; 2073 return event;
1973} 2074}
@@ -2016,11 +2117,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2016 if (!cpu_isset(cpu, buffer->cpumask)) 2117 if (!cpu_isset(cpu, buffer->cpumask))
2017 return; 2118 return;
2018 2119
2019 spin_lock_irqsave(&cpu_buffer->lock, flags); 2120 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2121
2122 __raw_spin_lock(&cpu_buffer->lock);
2020 2123
2021 rb_reset_cpu(cpu_buffer); 2124 rb_reset_cpu(cpu_buffer);
2022 2125
2023 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2126 __raw_spin_unlock(&cpu_buffer->lock);
2127
2128 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2024} 2129}
2025 2130
2026/** 2131/**
@@ -2122,12 +2227,14 @@ static ssize_t
2122rb_simple_read(struct file *filp, char __user *ubuf, 2227rb_simple_read(struct file *filp, char __user *ubuf,
2123 size_t cnt, loff_t *ppos) 2228 size_t cnt, loff_t *ppos)
2124{ 2229{
2125 int *p = filp->private_data; 2230 long *p = filp->private_data;
2126 char buf[64]; 2231 char buf[64];
2127 int r; 2232 int r;
2128 2233
2129 /* !ring_buffers_off == tracing_on */ 2234 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
2130 r = sprintf(buf, "%d\n", !*p); 2235 r = sprintf(buf, "permanently disabled\n");
2236 else
2237 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
2131 2238
2132 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2239 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2133} 2240}
@@ -2136,7 +2243,7 @@ static ssize_t
2136rb_simple_write(struct file *filp, const char __user *ubuf, 2243rb_simple_write(struct file *filp, const char __user *ubuf,
2137 size_t cnt, loff_t *ppos) 2244 size_t cnt, loff_t *ppos)
2138{ 2245{
2139 int *p = filp->private_data; 2246 long *p = filp->private_data;
2140 char buf[64]; 2247 char buf[64];
2141 long val; 2248 long val;
2142 int ret; 2249 int ret;
@@ -2153,8 +2260,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
2153 if (ret < 0) 2260 if (ret < 0)
2154 return ret; 2261 return ret;
2155 2262
2156 /* !ring_buffers_off == tracing_on */ 2263 if (val)
2157 *p = !val; 2264 set_bit(RB_BUFFERS_ON_BIT, p);
2265 else
2266 clear_bit(RB_BUFFERS_ON_BIT, p);
2158 2267
2159 (*ppos)++; 2268 (*ppos)++;
2160 2269
@@ -2176,7 +2285,7 @@ static __init int rb_init_debugfs(void)
2176 d_tracer = tracing_init_dentry(); 2285 d_tracer = tracing_init_dentry();
2177 2286
2178 entry = debugfs_create_file("tracing_on", 0644, d_tracer, 2287 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2179 &ring_buffers_off, &rb_simple_fops); 2288 &ring_buffer_flags, &rb_simple_fops);
2180 if (!entry) 2289 if (!entry)
2181 pr_warning("Could not create debugfs 'tracing_on' entry\n"); 2290 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2182 2291
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d86e3252f300..a45b59e53fbc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -30,6 +30,7 @@
30#include <linux/gfp.h> 30#include <linux/gfp.h>
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/kprobes.h> 32#include <linux/kprobes.h>
33#include <linux/seq_file.h>
33#include <linux/writeback.h> 34#include <linux/writeback.h>
34 35
35#include <linux/stacktrace.h> 36#include <linux/stacktrace.h>
@@ -43,6 +44,29 @@
43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 44unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
44unsigned long __read_mostly tracing_thresh; 45unsigned long __read_mostly tracing_thresh;
45 46
47/* For tracers that don't implement custom flags */
48static struct tracer_opt dummy_tracer_opt[] = {
49 { }
50};
51
52static struct tracer_flags dummy_tracer_flags = {
53 .val = 0,
54 .opts = dummy_tracer_opt
55};
56
57static int dummy_set_flag(u32 old_flags, u32 bit, int set)
58{
59 return 0;
60}
61
62/*
63 * Kill all tracing for good (never come back).
64 * It is initialized to 1 but will turn to zero if the initialization
65 * of the tracer is successful. But that is the only place that sets
66 * this back to zero.
67 */
68int tracing_disabled = 1;
69
46static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 70static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
47 71
48static inline void ftrace_disable_cpu(void) 72static inline void ftrace_disable_cpu(void)
@@ -62,7 +86,36 @@ static cpumask_t __read_mostly tracing_buffer_mask;
62#define for_each_tracing_cpu(cpu) \ 86#define for_each_tracing_cpu(cpu) \
63 for_each_cpu_mask(cpu, tracing_buffer_mask) 87 for_each_cpu_mask(cpu, tracing_buffer_mask)
64 88
65static int tracing_disabled = 1; 89/*
90 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
91 *
92 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
93 * is set, then ftrace_dump is called. This will output the contents
94 * of the ftrace buffers to the console. This is very useful for
95 * capturing traces that lead to crashes and outputing it to a
96 * serial console.
97 *
98 * It is default off, but you can enable it with either specifying
99 * "ftrace_dump_on_oops" in the kernel command line, or setting
100 * /proc/sys/kernel/ftrace_dump_on_oops to true.
101 */
102int ftrace_dump_on_oops;
103
104static int tracing_set_tracer(char *buf);
105
106static int __init set_ftrace(char *str)
107{
108 tracing_set_tracer(str);
109 return 1;
110}
111__setup("ftrace", set_ftrace);
112
113static int __init set_ftrace_dump_on_oops(char *str)
114{
115 ftrace_dump_on_oops = 1;
116 return 1;
117}
118__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
66 119
67long 120long
68ns2usecs(cycle_t nsec) 121ns2usecs(cycle_t nsec)
@@ -112,6 +165,19 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
112/* tracer_enabled is used to toggle activation of a tracer */ 165/* tracer_enabled is used to toggle activation of a tracer */
113static int tracer_enabled = 1; 166static int tracer_enabled = 1;
114 167
168/**
169 * tracing_is_enabled - return tracer_enabled status
170 *
171 * This function is used by other tracers to know the status
172 * of the tracer_enabled flag. Tracers may use this function
173 * to know if it should enable their features when starting
174 * up. See irqsoff tracer for an example (start_irqsoff_tracer).
175 */
176int tracing_is_enabled(void)
177{
178 return tracer_enabled;
179}
180
115/* function tracing enabled */ 181/* function tracing enabled */
116int ftrace_function_enabled; 182int ftrace_function_enabled;
117 183
@@ -153,8 +219,9 @@ static DEFINE_MUTEX(trace_types_lock);
153/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 219/* trace_wait is a waitqueue for tasks blocked on trace_poll */
154static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 220static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
155 221
156/* trace_flags holds iter_ctrl options */ 222/* trace_flags holds trace_options default values */
157unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; 223unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
224 TRACE_ITER_ANNOTATE;
158 225
159/** 226/**
160 * trace_wake_up - wake up tasks waiting for trace input 227 * trace_wake_up - wake up tasks waiting for trace input
@@ -193,13 +260,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
193 return nsecs / 1000; 260 return nsecs / 1000;
194} 261}
195 262
196/*
197 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
198 * control the output of kernel symbols.
199 */
200#define TRACE_ITER_SYM_MASK \
201 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
202
203/* These must match the bit postions in trace_iterator_flags */ 263/* These must match the bit postions in trace_iterator_flags */
204static const char *trace_options[] = { 264static const char *trace_options[] = {
205 "print-parent", 265 "print-parent",
@@ -213,6 +273,11 @@ static const char *trace_options[] = {
213 "stacktrace", 273 "stacktrace",
214 "sched-tree", 274 "sched-tree",
215 "ftrace_printk", 275 "ftrace_printk",
276 "ftrace_preempt",
277 "branch",
278 "annotate",
279 "userstacktrace",
280 "sym-userobj",
216 NULL 281 NULL
217}; 282};
218 283
@@ -359,6 +424,28 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
359 return trace_seq_putmem(s, hex, j); 424 return trace_seq_putmem(s, hex, j);
360} 425}
361 426
427static int
428trace_seq_path(struct trace_seq *s, struct path *path)
429{
430 unsigned char *p;
431
432 if (s->len >= (PAGE_SIZE - 1))
433 return 0;
434 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
435 if (!IS_ERR(p)) {
436 p = mangle_path(s->buffer + s->len, p, "\n");
437 if (p) {
438 s->len = p - s->buffer;
439 return 1;
440 }
441 } else {
442 s->buffer[s->len++] = '?';
443 return 1;
444 }
445
446 return 0;
447}
448
362static void 449static void
363trace_seq_reset(struct trace_seq *s) 450trace_seq_reset(struct trace_seq *s)
364{ 451{
@@ -470,7 +557,15 @@ int register_tracer(struct tracer *type)
470 return -1; 557 return -1;
471 } 558 }
472 559
560 /*
561 * When this gets called we hold the BKL which means that
562 * preemption is disabled. Various trace selftests however
563 * need to disable and enable preemption for successful tests.
564 * So we drop the BKL here and grab it after the tests again.
565 */
566 unlock_kernel();
473 mutex_lock(&trace_types_lock); 567 mutex_lock(&trace_types_lock);
568
474 for (t = trace_types; t; t = t->next) { 569 for (t = trace_types; t; t = t->next) {
475 if (strcmp(type->name, t->name) == 0) { 570 if (strcmp(type->name, t->name) == 0) {
476 /* already found */ 571 /* already found */
@@ -481,11 +576,18 @@ int register_tracer(struct tracer *type)
481 } 576 }
482 } 577 }
483 578
579 if (!type->set_flag)
580 type->set_flag = &dummy_set_flag;
581 if (!type->flags)
582 type->flags = &dummy_tracer_flags;
583 else
584 if (!type->flags->opts)
585 type->flags->opts = dummy_tracer_opt;
586
484#ifdef CONFIG_FTRACE_STARTUP_TEST 587#ifdef CONFIG_FTRACE_STARTUP_TEST
485 if (type->selftest) { 588 if (type->selftest) {
486 struct tracer *saved_tracer = current_trace; 589 struct tracer *saved_tracer = current_trace;
487 struct trace_array *tr = &global_trace; 590 struct trace_array *tr = &global_trace;
488 int saved_ctrl = tr->ctrl;
489 int i; 591 int i;
490 /* 592 /*
491 * Run a selftest on this tracer. 593 * Run a selftest on this tracer.
@@ -494,25 +596,23 @@ int register_tracer(struct tracer *type)
494 * internal tracing to verify that everything is in order. 596 * internal tracing to verify that everything is in order.
495 * If we fail, we do not register this tracer. 597 * If we fail, we do not register this tracer.
496 */ 598 */
497 for_each_tracing_cpu(i) { 599 for_each_tracing_cpu(i)
498 tracing_reset(tr, i); 600 tracing_reset(tr, i);
499 } 601
500 current_trace = type; 602 current_trace = type;
501 tr->ctrl = 0;
502 /* the test is responsible for initializing and enabling */ 603 /* the test is responsible for initializing and enabling */
503 pr_info("Testing tracer %s: ", type->name); 604 pr_info("Testing tracer %s: ", type->name);
504 ret = type->selftest(type, tr); 605 ret = type->selftest(type, tr);
505 /* the test is responsible for resetting too */ 606 /* the test is responsible for resetting too */
506 current_trace = saved_tracer; 607 current_trace = saved_tracer;
507 tr->ctrl = saved_ctrl;
508 if (ret) { 608 if (ret) {
509 printk(KERN_CONT "FAILED!\n"); 609 printk(KERN_CONT "FAILED!\n");
510 goto out; 610 goto out;
511 } 611 }
512 /* Only reset on passing, to avoid touching corrupted buffers */ 612 /* Only reset on passing, to avoid touching corrupted buffers */
513 for_each_tracing_cpu(i) { 613 for_each_tracing_cpu(i)
514 tracing_reset(tr, i); 614 tracing_reset(tr, i);
515 } 615
516 printk(KERN_CONT "PASSED\n"); 616 printk(KERN_CONT "PASSED\n");
517 } 617 }
518#endif 618#endif
@@ -525,6 +625,7 @@ int register_tracer(struct tracer *type)
525 625
526 out: 626 out:
527 mutex_unlock(&trace_types_lock); 627 mutex_unlock(&trace_types_lock);
628 lock_kernel();
528 629
529 return ret; 630 return ret;
530} 631}
@@ -581,6 +682,91 @@ static void trace_init_cmdlines(void)
581 cmdline_idx = 0; 682 cmdline_idx = 0;
582} 683}
583 684
685static int trace_stop_count;
686static DEFINE_SPINLOCK(tracing_start_lock);
687
688/**
689 * ftrace_off_permanent - disable all ftrace code permanently
690 *
691 * This should only be called when a serious anomally has
692 * been detected. This will turn off the function tracing,
693 * ring buffers, and other tracing utilites. It takes no
694 * locks and can be called from any context.
695 */
696void ftrace_off_permanent(void)
697{
698 tracing_disabled = 1;
699 ftrace_stop();
700 tracing_off_permanent();
701}
702
703/**
704 * tracing_start - quick start of the tracer
705 *
706 * If tracing is enabled but was stopped by tracing_stop,
707 * this will start the tracer back up.
708 */
709void tracing_start(void)
710{
711 struct ring_buffer *buffer;
712 unsigned long flags;
713
714 if (tracing_disabled)
715 return;
716
717 spin_lock_irqsave(&tracing_start_lock, flags);
718 if (--trace_stop_count)
719 goto out;
720
721 if (trace_stop_count < 0) {
722 /* Someone screwed up their debugging */
723 WARN_ON_ONCE(1);
724 trace_stop_count = 0;
725 goto out;
726 }
727
728
729 buffer = global_trace.buffer;
730 if (buffer)
731 ring_buffer_record_enable(buffer);
732
733 buffer = max_tr.buffer;
734 if (buffer)
735 ring_buffer_record_enable(buffer);
736
737 ftrace_start();
738 out:
739 spin_unlock_irqrestore(&tracing_start_lock, flags);
740}
741
742/**
743 * tracing_stop - quick stop of the tracer
744 *
745 * Light weight way to stop tracing. Use in conjunction with
746 * tracing_start.
747 */
748void tracing_stop(void)
749{
750 struct ring_buffer *buffer;
751 unsigned long flags;
752
753 ftrace_stop();
754 spin_lock_irqsave(&tracing_start_lock, flags);
755 if (trace_stop_count++)
756 goto out;
757
758 buffer = global_trace.buffer;
759 if (buffer)
760 ring_buffer_record_disable(buffer);
761
762 buffer = max_tr.buffer;
763 if (buffer)
764 ring_buffer_record_disable(buffer);
765
766 out:
767 spin_unlock_irqrestore(&tracing_start_lock, flags);
768}
769
584void trace_stop_cmdline_recording(void); 770void trace_stop_cmdline_recording(void);
585 771
586static void trace_save_cmdline(struct task_struct *tsk) 772static void trace_save_cmdline(struct task_struct *tsk)
@@ -655,6 +841,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
655 841
656 entry->preempt_count = pc & 0xff; 842 entry->preempt_count = pc & 0xff;
657 entry->pid = (tsk) ? tsk->pid : 0; 843 entry->pid = (tsk) ? tsk->pid : 0;
844 entry->tgid = (tsk) ? tsk->tgid : 0;
658 entry->flags = 845 entry->flags =
659#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 846#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
660 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 847 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -691,6 +878,36 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
691 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 878 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
692} 879}
693 880
881#ifdef CONFIG_FUNCTION_RET_TRACER
882static void __trace_function_return(struct trace_array *tr,
883 struct trace_array_cpu *data,
884 struct ftrace_retfunc *trace,
885 unsigned long flags,
886 int pc)
887{
888 struct ring_buffer_event *event;
889 struct ftrace_ret_entry *entry;
890 unsigned long irq_flags;
891
892 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
893 return;
894
895 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
896 &irq_flags);
897 if (!event)
898 return;
899 entry = ring_buffer_event_data(event);
900 tracing_generic_entry_update(&entry->ent, flags, pc);
901 entry->ent.type = TRACE_FN_RET;
902 entry->ip = trace->func;
903 entry->parent_ip = trace->ret;
904 entry->rettime = trace->rettime;
905 entry->calltime = trace->calltime;
906 entry->overrun = trace->overrun;
907 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
908}
909#endif
910
694void 911void
695ftrace(struct trace_array *tr, struct trace_array_cpu *data, 912ftrace(struct trace_array *tr, struct trace_array_cpu *data,
696 unsigned long ip, unsigned long parent_ip, unsigned long flags, 913 unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@ -742,6 +959,44 @@ void __trace_stack(struct trace_array *tr,
742 ftrace_trace_stack(tr, data, flags, skip, preempt_count()); 959 ftrace_trace_stack(tr, data, flags, skip, preempt_count());
743} 960}
744 961
962static void ftrace_trace_userstack(struct trace_array *tr,
963 struct trace_array_cpu *data,
964 unsigned long flags, int pc)
965{
966 struct ring_buffer_event *event;
967 struct userstack_entry *entry;
968 struct stack_trace trace;
969 unsigned long irq_flags;
970
971 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
972 return;
973
974 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
975 &irq_flags);
976 if (!event)
977 return;
978 entry = ring_buffer_event_data(event);
979 tracing_generic_entry_update(&entry->ent, flags, pc);
980 entry->ent.type = TRACE_USER_STACK;
981
982 memset(&entry->caller, 0, sizeof(entry->caller));
983
984 trace.nr_entries = 0;
985 trace.max_entries = FTRACE_STACK_ENTRIES;
986 trace.skip = 0;
987 trace.entries = entry->caller;
988
989 save_stack_trace_user(&trace);
990 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
991}
992
993void __trace_userstack(struct trace_array *tr,
994 struct trace_array_cpu *data,
995 unsigned long flags)
996{
997 ftrace_trace_userstack(tr, data, flags, preempt_count());
998}
999
745static void 1000static void
746ftrace_trace_special(void *__tr, void *__data, 1001ftrace_trace_special(void *__tr, void *__data,
747 unsigned long arg1, unsigned long arg2, unsigned long arg3, 1002 unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -765,6 +1020,7 @@ ftrace_trace_special(void *__tr, void *__data,
765 entry->arg3 = arg3; 1020 entry->arg3 = arg3;
766 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1021 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
767 ftrace_trace_stack(tr, data, irq_flags, 4, pc); 1022 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
1023 ftrace_trace_userstack(tr, data, irq_flags, pc);
768 1024
769 trace_wake_up(); 1025 trace_wake_up();
770} 1026}
@@ -803,6 +1059,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
803 entry->next_cpu = task_cpu(next); 1059 entry->next_cpu = task_cpu(next);
804 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1060 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
805 ftrace_trace_stack(tr, data, flags, 5, pc); 1061 ftrace_trace_stack(tr, data, flags, 5, pc);
1062 ftrace_trace_userstack(tr, data, flags, pc);
806} 1063}
807 1064
808void 1065void
@@ -832,6 +1089,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
832 entry->next_cpu = task_cpu(wakee); 1089 entry->next_cpu = task_cpu(wakee);
833 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1090 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
834 ftrace_trace_stack(tr, data, flags, 6, pc); 1091 ftrace_trace_stack(tr, data, flags, 6, pc);
1092 ftrace_trace_userstack(tr, data, flags, pc);
835 1093
836 trace_wake_up(); 1094 trace_wake_up();
837} 1095}
@@ -841,26 +1099,28 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
841{ 1099{
842 struct trace_array *tr = &global_trace; 1100 struct trace_array *tr = &global_trace;
843 struct trace_array_cpu *data; 1101 struct trace_array_cpu *data;
1102 unsigned long flags;
844 int cpu; 1103 int cpu;
845 int pc; 1104 int pc;
846 1105
847 if (tracing_disabled || !tr->ctrl) 1106 if (tracing_disabled)
848 return; 1107 return;
849 1108
850 pc = preempt_count(); 1109 pc = preempt_count();
851 preempt_disable_notrace(); 1110 local_irq_save(flags);
852 cpu = raw_smp_processor_id(); 1111 cpu = raw_smp_processor_id();
853 data = tr->data[cpu]; 1112 data = tr->data[cpu];
854 1113
855 if (likely(!atomic_read(&data->disabled))) 1114 if (likely(atomic_inc_return(&data->disabled) == 1))
856 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); 1115 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
857 1116
858 preempt_enable_notrace(); 1117 atomic_dec(&data->disabled);
1118 local_irq_restore(flags);
859} 1119}
860 1120
861#ifdef CONFIG_FUNCTION_TRACER 1121#ifdef CONFIG_FUNCTION_TRACER
862static void 1122static void
863function_trace_call(unsigned long ip, unsigned long parent_ip) 1123function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
864{ 1124{
865 struct trace_array *tr = &global_trace; 1125 struct trace_array *tr = &global_trace;
866 struct trace_array_cpu *data; 1126 struct trace_array_cpu *data;
@@ -873,8 +1133,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
873 return; 1133 return;
874 1134
875 pc = preempt_count(); 1135 pc = preempt_count();
876 resched = need_resched(); 1136 resched = ftrace_preempt_disable();
877 preempt_disable_notrace();
878 local_save_flags(flags); 1137 local_save_flags(flags);
879 cpu = raw_smp_processor_id(); 1138 cpu = raw_smp_processor_id();
880 data = tr->data[cpu]; 1139 data = tr->data[cpu];
@@ -884,11 +1143,62 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
884 trace_function(tr, data, ip, parent_ip, flags, pc); 1143 trace_function(tr, data, ip, parent_ip, flags, pc);
885 1144
886 atomic_dec(&data->disabled); 1145 atomic_dec(&data->disabled);
887 if (resched) 1146 ftrace_preempt_enable(resched);
888 preempt_enable_no_resched_notrace(); 1147}
889 else 1148
890 preempt_enable_notrace(); 1149static void
1150function_trace_call(unsigned long ip, unsigned long parent_ip)
1151{
1152 struct trace_array *tr = &global_trace;
1153 struct trace_array_cpu *data;
1154 unsigned long flags;
1155 long disabled;
1156 int cpu;
1157 int pc;
1158
1159 if (unlikely(!ftrace_function_enabled))
1160 return;
1161
1162 /*
1163 * Need to use raw, since this must be called before the
1164 * recursive protection is performed.
1165 */
1166 local_irq_save(flags);
1167 cpu = raw_smp_processor_id();
1168 data = tr->data[cpu];
1169 disabled = atomic_inc_return(&data->disabled);
1170
1171 if (likely(disabled == 1)) {
1172 pc = preempt_count();
1173 trace_function(tr, data, ip, parent_ip, flags, pc);
1174 }
1175
1176 atomic_dec(&data->disabled);
1177 local_irq_restore(flags);
1178}
1179
1180#ifdef CONFIG_FUNCTION_RET_TRACER
1181void trace_function_return(struct ftrace_retfunc *trace)
1182{
1183 struct trace_array *tr = &global_trace;
1184 struct trace_array_cpu *data;
1185 unsigned long flags;
1186 long disabled;
1187 int cpu;
1188 int pc;
1189
1190 raw_local_irq_save(flags);
1191 cpu = raw_smp_processor_id();
1192 data = tr->data[cpu];
1193 disabled = atomic_inc_return(&data->disabled);
1194 if (likely(disabled == 1)) {
1195 pc = preempt_count();
1196 __trace_function_return(tr, data, trace, flags, pc);
1197 }
1198 atomic_dec(&data->disabled);
1199 raw_local_irq_restore(flags);
891} 1200}
1201#endif /* CONFIG_FUNCTION_RET_TRACER */
892 1202
893static struct ftrace_ops trace_ops __read_mostly = 1203static struct ftrace_ops trace_ops __read_mostly =
894{ 1204{
@@ -898,9 +1208,14 @@ static struct ftrace_ops trace_ops __read_mostly =
898void tracing_start_function_trace(void) 1208void tracing_start_function_trace(void)
899{ 1209{
900 ftrace_function_enabled = 0; 1210 ftrace_function_enabled = 0;
1211
1212 if (trace_flags & TRACE_ITER_PREEMPTONLY)
1213 trace_ops.func = function_trace_call_preempt_only;
1214 else
1215 trace_ops.func = function_trace_call;
1216
901 register_ftrace_function(&trace_ops); 1217 register_ftrace_function(&trace_ops);
902 if (tracer_enabled) 1218 ftrace_function_enabled = 1;
903 ftrace_function_enabled = 1;
904} 1219}
905 1220
906void tracing_stop_function_trace(void) 1221void tracing_stop_function_trace(void)
@@ -912,6 +1227,7 @@ void tracing_stop_function_trace(void)
912 1227
913enum trace_file_type { 1228enum trace_file_type {
914 TRACE_FILE_LAT_FMT = 1, 1229 TRACE_FILE_LAT_FMT = 1,
1230 TRACE_FILE_ANNOTATE = 2,
915}; 1231};
916 1232
917static void trace_iterator_increment(struct trace_iterator *iter, int cpu) 1233static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
@@ -1047,10 +1363,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1047 1363
1048 atomic_inc(&trace_record_cmdline_disabled); 1364 atomic_inc(&trace_record_cmdline_disabled);
1049 1365
1050 /* let the tracer grab locks here if needed */
1051 if (current_trace->start)
1052 current_trace->start(iter);
1053
1054 if (*pos != iter->pos) { 1366 if (*pos != iter->pos) {
1055 iter->ent = NULL; 1367 iter->ent = NULL;
1056 iter->cpu = 0; 1368 iter->cpu = 0;
@@ -1077,14 +1389,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1077 1389
1078static void s_stop(struct seq_file *m, void *p) 1390static void s_stop(struct seq_file *m, void *p)
1079{ 1391{
1080 struct trace_iterator *iter = m->private;
1081
1082 atomic_dec(&trace_record_cmdline_disabled); 1392 atomic_dec(&trace_record_cmdline_disabled);
1083
1084 /* let the tracer release locks here if needed */
1085 if (current_trace && current_trace == iter->trace && iter->trace->stop)
1086 iter->trace->stop(iter);
1087
1088 mutex_unlock(&trace_types_lock); 1393 mutex_unlock(&trace_types_lock);
1089} 1394}
1090 1395
@@ -1143,7 +1448,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1143# define IP_FMT "%016lx" 1448# define IP_FMT "%016lx"
1144#endif 1449#endif
1145 1450
1146static int 1451int
1147seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) 1452seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1148{ 1453{
1149 int ret; 1454 int ret;
@@ -1164,6 +1469,78 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1164 return ret; 1469 return ret;
1165} 1470}
1166 1471
1472static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
1473 unsigned long ip, unsigned long sym_flags)
1474{
1475 struct file *file = NULL;
1476 unsigned long vmstart = 0;
1477 int ret = 1;
1478
1479 if (mm) {
1480 const struct vm_area_struct *vma;
1481
1482 down_read(&mm->mmap_sem);
1483 vma = find_vma(mm, ip);
1484 if (vma) {
1485 file = vma->vm_file;
1486 vmstart = vma->vm_start;
1487 }
1488 if (file) {
1489 ret = trace_seq_path(s, &file->f_path);
1490 if (ret)
1491 ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
1492 }
1493 up_read(&mm->mmap_sem);
1494 }
1495 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
1496 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1497 return ret;
1498}
1499
1500static int
1501seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
1502 unsigned long sym_flags)
1503{
1504 struct mm_struct *mm = NULL;
1505 int ret = 1;
1506 unsigned int i;
1507
1508 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
1509 struct task_struct *task;
1510 /*
1511 * we do the lookup on the thread group leader,
1512 * since individual threads might have already quit!
1513 */
1514 rcu_read_lock();
1515 task = find_task_by_vpid(entry->ent.tgid);
1516 if (task)
1517 mm = get_task_mm(task);
1518 rcu_read_unlock();
1519 }
1520
1521 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1522 unsigned long ip = entry->caller[i];
1523
1524 if (ip == ULONG_MAX || !ret)
1525 break;
1526 if (i && ret)
1527 ret = trace_seq_puts(s, " <- ");
1528 if (!ip) {
1529 if (ret)
1530 ret = trace_seq_puts(s, "??");
1531 continue;
1532 }
1533 if (!ret)
1534 break;
1535 if (ret)
1536 ret = seq_print_user_ip(s, mm, ip, sym_flags);
1537 }
1538
1539 if (mm)
1540 mmput(mm);
1541 return ret;
1542}
1543
1167static void print_lat_help_header(struct seq_file *m) 1544static void print_lat_help_header(struct seq_file *m)
1168{ 1545{
1169 seq_puts(m, "# _------=> CPU# \n"); 1546 seq_puts(m, "# _------=> CPU# \n");
@@ -1338,6 +1715,23 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1338 trace_seq_putc(s, '\n'); 1715 trace_seq_putc(s, '\n');
1339} 1716}
1340 1717
1718static void test_cpu_buff_start(struct trace_iterator *iter)
1719{
1720 struct trace_seq *s = &iter->seq;
1721
1722 if (!(trace_flags & TRACE_ITER_ANNOTATE))
1723 return;
1724
1725 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
1726 return;
1727
1728 if (cpu_isset(iter->cpu, iter->started))
1729 return;
1730
1731 cpu_set(iter->cpu, iter->started);
1732 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1733}
1734
1341static enum print_line_t 1735static enum print_line_t
1342print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) 1736print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1343{ 1737{
@@ -1357,6 +1751,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1357 if (entry->type == TRACE_CONT) 1751 if (entry->type == TRACE_CONT)
1358 return TRACE_TYPE_HANDLED; 1752 return TRACE_TYPE_HANDLED;
1359 1753
1754 test_cpu_buff_start(iter);
1755
1360 next_entry = find_next_entry(iter, NULL, &next_ts); 1756 next_entry = find_next_entry(iter, NULL, &next_ts);
1361 if (!next_entry) 1757 if (!next_entry)
1362 next_ts = iter->ts; 1758 next_ts = iter->ts;
@@ -1448,6 +1844,27 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1448 trace_seq_print_cont(s, iter); 1844 trace_seq_print_cont(s, iter);
1449 break; 1845 break;
1450 } 1846 }
1847 case TRACE_BRANCH: {
1848 struct trace_branch *field;
1849
1850 trace_assign_type(field, entry);
1851
1852 trace_seq_printf(s, "[%s] %s:%s:%d\n",
1853 field->correct ? " ok " : " MISS ",
1854 field->func,
1855 field->file,
1856 field->line);
1857 break;
1858 }
1859 case TRACE_USER_STACK: {
1860 struct userstack_entry *field;
1861
1862 trace_assign_type(field, entry);
1863
1864 seq_print_userip_objs(field, s, sym_flags);
1865 trace_seq_putc(s, '\n');
1866 break;
1867 }
1451 default: 1868 default:
1452 trace_seq_printf(s, "Unknown type %d\n", entry->type); 1869 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1453 } 1870 }
@@ -1472,6 +1889,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1472 if (entry->type == TRACE_CONT) 1889 if (entry->type == TRACE_CONT)
1473 return TRACE_TYPE_HANDLED; 1890 return TRACE_TYPE_HANDLED;
1474 1891
1892 test_cpu_buff_start(iter);
1893
1475 comm = trace_find_cmdline(iter->ent->pid); 1894 comm = trace_find_cmdline(iter->ent->pid);
1476 1895
1477 t = ns2usecs(iter->ts); 1896 t = ns2usecs(iter->ts);
@@ -1581,6 +2000,35 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1581 trace_seq_print_cont(s, iter); 2000 trace_seq_print_cont(s, iter);
1582 break; 2001 break;
1583 } 2002 }
2003 case TRACE_FN_RET: {
2004 return print_return_function(iter);
2005 break;
2006 }
2007 case TRACE_BRANCH: {
2008 struct trace_branch *field;
2009
2010 trace_assign_type(field, entry);
2011
2012 trace_seq_printf(s, "[%s] %s:%s:%d\n",
2013 field->correct ? " ok " : " MISS ",
2014 field->func,
2015 field->file,
2016 field->line);
2017 break;
2018 }
2019 case TRACE_USER_STACK: {
2020 struct userstack_entry *field;
2021
2022 trace_assign_type(field, entry);
2023
2024 ret = seq_print_userip_objs(field, s, sym_flags);
2025 if (!ret)
2026 return TRACE_TYPE_PARTIAL_LINE;
2027 ret = trace_seq_putc(s, '\n');
2028 if (!ret)
2029 return TRACE_TYPE_PARTIAL_LINE;
2030 break;
2031 }
1584 } 2032 }
1585 return TRACE_TYPE_HANDLED; 2033 return TRACE_TYPE_HANDLED;
1586} 2034}
@@ -1640,6 +2088,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1640 break; 2088 break;
1641 } 2089 }
1642 case TRACE_SPECIAL: 2090 case TRACE_SPECIAL:
2091 case TRACE_USER_STACK:
1643 case TRACE_STACK: { 2092 case TRACE_STACK: {
1644 struct special_entry *field; 2093 struct special_entry *field;
1645 2094
@@ -1728,6 +2177,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1728 break; 2177 break;
1729 } 2178 }
1730 case TRACE_SPECIAL: 2179 case TRACE_SPECIAL:
2180 case TRACE_USER_STACK:
1731 case TRACE_STACK: { 2181 case TRACE_STACK: {
1732 struct special_entry *field; 2182 struct special_entry *field;
1733 2183
@@ -1782,6 +2232,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1782 break; 2232 break;
1783 } 2233 }
1784 case TRACE_SPECIAL: 2234 case TRACE_SPECIAL:
2235 case TRACE_USER_STACK:
1785 case TRACE_STACK: { 2236 case TRACE_STACK: {
1786 struct special_entry *field; 2237 struct special_entry *field;
1787 2238
@@ -1899,6 +2350,11 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1899 iter->trace = current_trace; 2350 iter->trace = current_trace;
1900 iter->pos = -1; 2351 iter->pos = -1;
1901 2352
2353 /* Annotate start of buffers if we had overruns */
2354 if (ring_buffer_overruns(iter->tr->buffer))
2355 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2356
2357
1902 for_each_tracing_cpu(cpu) { 2358 for_each_tracing_cpu(cpu) {
1903 2359
1904 iter->buffer_iter[cpu] = 2360 iter->buffer_iter[cpu] =
@@ -1917,10 +2373,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1917 m->private = iter; 2373 m->private = iter;
1918 2374
1919 /* stop the trace while dumping */ 2375 /* stop the trace while dumping */
1920 if (iter->tr->ctrl) { 2376 tracing_stop();
1921 tracer_enabled = 0;
1922 ftrace_function_enabled = 0;
1923 }
1924 2377
1925 if (iter->trace && iter->trace->open) 2378 if (iter->trace && iter->trace->open)
1926 iter->trace->open(iter); 2379 iter->trace->open(iter);
@@ -1966,14 +2419,7 @@ int tracing_release(struct inode *inode, struct file *file)
1966 iter->trace->close(iter); 2419 iter->trace->close(iter);
1967 2420
1968 /* reenable tracing if it was previously enabled */ 2421 /* reenable tracing if it was previously enabled */
1969 if (iter->tr->ctrl) { 2422 tracing_start();
1970 tracer_enabled = 1;
1971 /*
1972 * It is safe to enable function tracing even if it
1973 * isn't used
1974 */
1975 ftrace_function_enabled = 1;
1976 }
1977 mutex_unlock(&trace_types_lock); 2423 mutex_unlock(&trace_types_lock);
1978 2424
1979 seq_release(inode, file); 2425 seq_release(inode, file);
@@ -2189,13 +2635,16 @@ static struct file_operations tracing_cpumask_fops = {
2189}; 2635};
2190 2636
2191static ssize_t 2637static ssize_t
2192tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, 2638tracing_trace_options_read(struct file *filp, char __user *ubuf,
2193 size_t cnt, loff_t *ppos) 2639 size_t cnt, loff_t *ppos)
2194{ 2640{
2641 int i;
2195 char *buf; 2642 char *buf;
2196 int r = 0; 2643 int r = 0;
2197 int len = 0; 2644 int len = 0;
2198 int i; 2645 u32 tracer_flags = current_trace->flags->val;
2646 struct tracer_opt *trace_opts = current_trace->flags->opts;
2647
2199 2648
2200 /* calulate max size */ 2649 /* calulate max size */
2201 for (i = 0; trace_options[i]; i++) { 2650 for (i = 0; trace_options[i]; i++) {
@@ -2203,6 +2652,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2203 len += 3; /* "no" and space */ 2652 len += 3; /* "no" and space */
2204 } 2653 }
2205 2654
2655 /*
2656 * Increase the size with names of options specific
2657 * of the current tracer.
2658 */
2659 for (i = 0; trace_opts[i].name; i++) {
2660 len += strlen(trace_opts[i].name);
2661 len += 3; /* "no" and space */
2662 }
2663
2206 /* +2 for \n and \0 */ 2664 /* +2 for \n and \0 */
2207 buf = kmalloc(len + 2, GFP_KERNEL); 2665 buf = kmalloc(len + 2, GFP_KERNEL);
2208 if (!buf) 2666 if (!buf)
@@ -2215,6 +2673,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2215 r += sprintf(buf + r, "no%s ", trace_options[i]); 2673 r += sprintf(buf + r, "no%s ", trace_options[i]);
2216 } 2674 }
2217 2675
2676 for (i = 0; trace_opts[i].name; i++) {
2677 if (tracer_flags & trace_opts[i].bit)
2678 r += sprintf(buf + r, "%s ",
2679 trace_opts[i].name);
2680 else
2681 r += sprintf(buf + r, "no%s ",
2682 trace_opts[i].name);
2683 }
2684
2218 r += sprintf(buf + r, "\n"); 2685 r += sprintf(buf + r, "\n");
2219 WARN_ON(r >= len + 2); 2686 WARN_ON(r >= len + 2);
2220 2687
@@ -2225,13 +2692,48 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2225 return r; 2692 return r;
2226} 2693}
2227 2694
2695/* Try to assign a tracer specific option */
2696static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2697{
2698 struct tracer_flags *trace_flags = trace->flags;
2699 struct tracer_opt *opts = NULL;
2700 int ret = 0, i = 0;
2701 int len;
2702
2703 for (i = 0; trace_flags->opts[i].name; i++) {
2704 opts = &trace_flags->opts[i];
2705 len = strlen(opts->name);
2706
2707 if (strncmp(cmp, opts->name, len) == 0) {
2708 ret = trace->set_flag(trace_flags->val,
2709 opts->bit, !neg);
2710 break;
2711 }
2712 }
2713 /* Not found */
2714 if (!trace_flags->opts[i].name)
2715 return -EINVAL;
2716
2717 /* Refused to handle */
2718 if (ret)
2719 return ret;
2720
2721 if (neg)
2722 trace_flags->val &= ~opts->bit;
2723 else
2724 trace_flags->val |= opts->bit;
2725
2726 return 0;
2727}
2728
2228static ssize_t 2729static ssize_t
2229tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, 2730tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2230 size_t cnt, loff_t *ppos) 2731 size_t cnt, loff_t *ppos)
2231{ 2732{
2232 char buf[64]; 2733 char buf[64];
2233 char *cmp = buf; 2734 char *cmp = buf;
2234 int neg = 0; 2735 int neg = 0;
2736 int ret;
2235 int i; 2737 int i;
2236 2738
2237 if (cnt >= sizeof(buf)) 2739 if (cnt >= sizeof(buf))
@@ -2258,11 +2760,13 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2258 break; 2760 break;
2259 } 2761 }
2260 } 2762 }
2261 /* 2763
2262 * If no option could be set, return an error: 2764 /* If no option could be set, test the specific tracer options */
2263 */ 2765 if (!trace_options[i]) {
2264 if (!trace_options[i]) 2766 ret = set_tracer_option(current_trace, cmp, neg);
2265 return -EINVAL; 2767 if (ret)
2768 return ret;
2769 }
2266 2770
2267 filp->f_pos += cnt; 2771 filp->f_pos += cnt;
2268 2772
@@ -2271,8 +2775,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2271 2775
2272static struct file_operations tracing_iter_fops = { 2776static struct file_operations tracing_iter_fops = {
2273 .open = tracing_open_generic, 2777 .open = tracing_open_generic,
2274 .read = tracing_iter_ctrl_read, 2778 .read = tracing_trace_options_read,
2275 .write = tracing_iter_ctrl_write, 2779 .write = tracing_trace_options_write,
2276}; 2780};
2277 2781
2278static const char readme_msg[] = 2782static const char readme_msg[] =
@@ -2286,9 +2790,9 @@ static const char readme_msg[] =
2286 "# echo sched_switch > /debug/tracing/current_tracer\n" 2790 "# echo sched_switch > /debug/tracing/current_tracer\n"
2287 "# cat /debug/tracing/current_tracer\n" 2791 "# cat /debug/tracing/current_tracer\n"
2288 "sched_switch\n" 2792 "sched_switch\n"
2289 "# cat /debug/tracing/iter_ctrl\n" 2793 "# cat /debug/tracing/trace_options\n"
2290 "noprint-parent nosym-offset nosym-addr noverbose\n" 2794 "noprint-parent nosym-offset nosym-addr noverbose\n"
2291 "# echo print-parent > /debug/tracing/iter_ctrl\n" 2795 "# echo print-parent > /debug/tracing/trace_options\n"
2292 "# echo 1 > /debug/tracing/tracing_enabled\n" 2796 "# echo 1 > /debug/tracing/tracing_enabled\n"
2293 "# cat /debug/tracing/trace > /tmp/trace.txt\n" 2797 "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2294 "echo 0 > /debug/tracing/tracing_enabled\n" 2798 "echo 0 > /debug/tracing/tracing_enabled\n"
@@ -2311,11 +2815,10 @@ static ssize_t
2311tracing_ctrl_read(struct file *filp, char __user *ubuf, 2815tracing_ctrl_read(struct file *filp, char __user *ubuf,
2312 size_t cnt, loff_t *ppos) 2816 size_t cnt, loff_t *ppos)
2313{ 2817{
2314 struct trace_array *tr = filp->private_data;
2315 char buf[64]; 2818 char buf[64];
2316 int r; 2819 int r;
2317 2820
2318 r = sprintf(buf, "%ld\n", tr->ctrl); 2821 r = sprintf(buf, "%u\n", tracer_enabled);
2319 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2822 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2320} 2823}
2321 2824
@@ -2343,16 +2846,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2343 val = !!val; 2846 val = !!val;
2344 2847
2345 mutex_lock(&trace_types_lock); 2848 mutex_lock(&trace_types_lock);
2346 if (tr->ctrl ^ val) { 2849 if (tracer_enabled ^ val) {
2347 if (val) 2850 if (val) {
2348 tracer_enabled = 1; 2851 tracer_enabled = 1;
2349 else 2852 if (current_trace->start)
2853 current_trace->start(tr);
2854 tracing_start();
2855 } else {
2350 tracer_enabled = 0; 2856 tracer_enabled = 0;
2351 2857 tracing_stop();
2352 tr->ctrl = val; 2858 if (current_trace->stop)
2353 2859 current_trace->stop(tr);
2354 if (current_trace && current_trace->ctrl_update) 2860 }
2355 current_trace->ctrl_update(tr);
2356 } 2861 }
2357 mutex_unlock(&trace_types_lock); 2862 mutex_unlock(&trace_types_lock);
2358 2863
@@ -2378,29 +2883,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
2378 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2883 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2379} 2884}
2380 2885
2381static ssize_t 2886static int tracing_set_tracer(char *buf)
2382tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2383 size_t cnt, loff_t *ppos)
2384{ 2887{
2385 struct trace_array *tr = &global_trace; 2888 struct trace_array *tr = &global_trace;
2386 struct tracer *t; 2889 struct tracer *t;
2387 char buf[max_tracer_type_len+1]; 2890 int ret = 0;
2388 int i;
2389 size_t ret;
2390
2391 ret = cnt;
2392
2393 if (cnt > max_tracer_type_len)
2394 cnt = max_tracer_type_len;
2395
2396 if (copy_from_user(&buf, ubuf, cnt))
2397 return -EFAULT;
2398
2399 buf[cnt] = 0;
2400
2401 /* strip ending whitespace. */
2402 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2403 buf[i] = 0;
2404 2891
2405 mutex_lock(&trace_types_lock); 2892 mutex_lock(&trace_types_lock);
2406 for (t = trace_types; t; t = t->next) { 2893 for (t = trace_types; t; t = t->next) {
@@ -2414,18 +2901,52 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2414 if (t == current_trace) 2901 if (t == current_trace)
2415 goto out; 2902 goto out;
2416 2903
2904 trace_branch_disable();
2417 if (current_trace && current_trace->reset) 2905 if (current_trace && current_trace->reset)
2418 current_trace->reset(tr); 2906 current_trace->reset(tr);
2419 2907
2420 current_trace = t; 2908 current_trace = t;
2421 if (t->init) 2909 if (t->init) {
2422 t->init(tr); 2910 ret = t->init(tr);
2911 if (ret)
2912 goto out;
2913 }
2423 2914
2915 trace_branch_enable(tr);
2424 out: 2916 out:
2425 mutex_unlock(&trace_types_lock); 2917 mutex_unlock(&trace_types_lock);
2426 2918
2427 if (ret > 0) 2919 return ret;
2428 filp->f_pos += ret; 2920}
2921
2922static ssize_t
2923tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2924 size_t cnt, loff_t *ppos)
2925{
2926 char buf[max_tracer_type_len+1];
2927 int i;
2928 size_t ret;
2929 int err;
2930
2931 ret = cnt;
2932
2933 if (cnt > max_tracer_type_len)
2934 cnt = max_tracer_type_len;
2935
2936 if (copy_from_user(&buf, ubuf, cnt))
2937 return -EFAULT;
2938
2939 buf[cnt] = 0;
2940
2941 /* strip ending whitespace. */
2942 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2943 buf[i] = 0;
2944
2945 err = tracing_set_tracer(buf);
2946 if (err)
2947 return err;
2948
2949 filp->f_pos += ret;
2429 2950
2430 return ret; 2951 return ret;
2431} 2952}
@@ -2492,6 +3013,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2492 return -ENOMEM; 3013 return -ENOMEM;
2493 3014
2494 mutex_lock(&trace_types_lock); 3015 mutex_lock(&trace_types_lock);
3016
3017 /* trace pipe does not show start of buffer */
3018 cpus_setall(iter->started);
3019
2495 iter->tr = &global_trace; 3020 iter->tr = &global_trace;
2496 iter->trace = current_trace; 3021 iter->trace = current_trace;
2497 filp->private_data = iter; 3022 filp->private_data = iter;
@@ -2667,7 +3192,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
2667 char buf[64]; 3192 char buf[64];
2668 int r; 3193 int r;
2669 3194
2670 r = sprintf(buf, "%lu\n", tr->entries); 3195 r = sprintf(buf, "%lu\n", tr->entries >> 10);
2671 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3196 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2672} 3197}
2673 3198
@@ -2678,7 +3203,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2678 unsigned long val; 3203 unsigned long val;
2679 char buf[64]; 3204 char buf[64];
2680 int ret, cpu; 3205 int ret, cpu;
2681 struct trace_array *tr = filp->private_data;
2682 3206
2683 if (cnt >= sizeof(buf)) 3207 if (cnt >= sizeof(buf))
2684 return -EINVAL; 3208 return -EINVAL;
@@ -2698,12 +3222,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2698 3222
2699 mutex_lock(&trace_types_lock); 3223 mutex_lock(&trace_types_lock);
2700 3224
2701 if (tr->ctrl) { 3225 tracing_stop();
2702 cnt = -EBUSY;
2703 pr_info("ftrace: please disable tracing"
2704 " before modifying buffer size\n");
2705 goto out;
2706 }
2707 3226
2708 /* disable all cpu buffers */ 3227 /* disable all cpu buffers */
2709 for_each_tracing_cpu(cpu) { 3228 for_each_tracing_cpu(cpu) {
@@ -2713,6 +3232,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2713 atomic_inc(&max_tr.data[cpu]->disabled); 3232 atomic_inc(&max_tr.data[cpu]->disabled);
2714 } 3233 }
2715 3234
3235 /* value is in KB */
3236 val <<= 10;
3237
2716 if (val != global_trace.entries) { 3238 if (val != global_trace.entries) {
2717 ret = ring_buffer_resize(global_trace.buffer, val); 3239 ret = ring_buffer_resize(global_trace.buffer, val);
2718 if (ret < 0) { 3240 if (ret < 0) {
@@ -2751,6 +3273,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2751 atomic_dec(&max_tr.data[cpu]->disabled); 3273 atomic_dec(&max_tr.data[cpu]->disabled);
2752 } 3274 }
2753 3275
3276 tracing_start();
2754 max_tr.entries = global_trace.entries; 3277 max_tr.entries = global_trace.entries;
2755 mutex_unlock(&trace_types_lock); 3278 mutex_unlock(&trace_types_lock);
2756 3279
@@ -2773,9 +3296,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
2773{ 3296{
2774 char *buf; 3297 char *buf;
2775 char *end; 3298 char *end;
2776 struct trace_array *tr = &global_trace;
2777 3299
2778 if (!tr->ctrl || tracing_disabled) 3300 if (tracing_disabled)
2779 return -EINVAL; 3301 return -EINVAL;
2780 3302
2781 if (cnt > TRACE_BUF_SIZE) 3303 if (cnt > TRACE_BUF_SIZE)
@@ -2841,22 +3363,38 @@ static struct file_operations tracing_mark_fops = {
2841 3363
2842#ifdef CONFIG_DYNAMIC_FTRACE 3364#ifdef CONFIG_DYNAMIC_FTRACE
2843 3365
3366int __weak ftrace_arch_read_dyn_info(char *buf, int size)
3367{
3368 return 0;
3369}
3370
2844static ssize_t 3371static ssize_t
2845tracing_read_long(struct file *filp, char __user *ubuf, 3372tracing_read_dyn_info(struct file *filp, char __user *ubuf,
2846 size_t cnt, loff_t *ppos) 3373 size_t cnt, loff_t *ppos)
2847{ 3374{
3375 static char ftrace_dyn_info_buffer[1024];
3376 static DEFINE_MUTEX(dyn_info_mutex);
2848 unsigned long *p = filp->private_data; 3377 unsigned long *p = filp->private_data;
2849 char buf[64]; 3378 char *buf = ftrace_dyn_info_buffer;
3379 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
2850 int r; 3380 int r;
2851 3381
2852 r = sprintf(buf, "%ld\n", *p); 3382 mutex_lock(&dyn_info_mutex);
3383 r = sprintf(buf, "%ld ", *p);
2853 3384
2854 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3385 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
3386 buf[r++] = '\n';
3387
3388 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3389
3390 mutex_unlock(&dyn_info_mutex);
3391
3392 return r;
2855} 3393}
2856 3394
2857static struct file_operations tracing_read_long_fops = { 3395static struct file_operations tracing_dyn_info_fops = {
2858 .open = tracing_open_generic, 3396 .open = tracing_open_generic,
2859 .read = tracing_read_long, 3397 .read = tracing_read_dyn_info,
2860}; 3398};
2861#endif 3399#endif
2862 3400
@@ -2897,10 +3435,10 @@ static __init int tracer_init_debugfs(void)
2897 if (!entry) 3435 if (!entry)
2898 pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); 3436 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2899 3437
2900 entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, 3438 entry = debugfs_create_file("trace_options", 0644, d_tracer,
2901 NULL, &tracing_iter_fops); 3439 NULL, &tracing_iter_fops);
2902 if (!entry) 3440 if (!entry)
2903 pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); 3441 pr_warning("Could not create debugfs 'trace_options' entry\n");
2904 3442
2905 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, 3443 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2906 NULL, &tracing_cpumask_fops); 3444 NULL, &tracing_cpumask_fops);
@@ -2950,11 +3488,11 @@ static __init int tracer_init_debugfs(void)
2950 pr_warning("Could not create debugfs " 3488 pr_warning("Could not create debugfs "
2951 "'trace_pipe' entry\n"); 3489 "'trace_pipe' entry\n");
2952 3490
2953 entry = debugfs_create_file("trace_entries", 0644, d_tracer, 3491 entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
2954 &global_trace, &tracing_entries_fops); 3492 &global_trace, &tracing_entries_fops);
2955 if (!entry) 3493 if (!entry)
2956 pr_warning("Could not create debugfs " 3494 pr_warning("Could not create debugfs "
2957 "'trace_entries' entry\n"); 3495 "'buffer_size_kb' entry\n");
2958 3496
2959 entry = debugfs_create_file("trace_marker", 0220, d_tracer, 3497 entry = debugfs_create_file("trace_marker", 0220, d_tracer,
2960 NULL, &tracing_mark_fops); 3498 NULL, &tracing_mark_fops);
@@ -2965,7 +3503,7 @@ static __init int tracer_init_debugfs(void)
2965#ifdef CONFIG_DYNAMIC_FTRACE 3503#ifdef CONFIG_DYNAMIC_FTRACE
2966 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 3504 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2967 &ftrace_update_tot_cnt, 3505 &ftrace_update_tot_cnt,
2968 &tracing_read_long_fops); 3506 &tracing_dyn_info_fops);
2969 if (!entry) 3507 if (!entry)
2970 pr_warning("Could not create debugfs " 3508 pr_warning("Could not create debugfs "
2971 "'dyn_ftrace_total_info' entry\n"); 3509 "'dyn_ftrace_total_info' entry\n");
@@ -2988,7 +3526,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2988 unsigned long flags, irq_flags; 3526 unsigned long flags, irq_flags;
2989 int cpu, len = 0, size, pc; 3527 int cpu, len = 0, size, pc;
2990 3528
2991 if (!tr->ctrl || tracing_disabled) 3529 if (tracing_disabled)
2992 return 0; 3530 return 0;
2993 3531
2994 pc = preempt_count(); 3532 pc = preempt_count();
@@ -3046,7 +3584,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk);
3046static int trace_panic_handler(struct notifier_block *this, 3584static int trace_panic_handler(struct notifier_block *this,
3047 unsigned long event, void *unused) 3585 unsigned long event, void *unused)
3048{ 3586{
3049 ftrace_dump(); 3587 if (ftrace_dump_on_oops)
3588 ftrace_dump();
3050 return NOTIFY_OK; 3589 return NOTIFY_OK;
3051} 3590}
3052 3591
@@ -3062,7 +3601,8 @@ static int trace_die_handler(struct notifier_block *self,
3062{ 3601{
3063 switch (val) { 3602 switch (val) {
3064 case DIE_OOPS: 3603 case DIE_OOPS:
3065 ftrace_dump(); 3604 if (ftrace_dump_on_oops)
3605 ftrace_dump();
3066 break; 3606 break;
3067 default: 3607 default:
3068 break; 3608 break;
@@ -3103,7 +3643,6 @@ trace_printk_seq(struct trace_seq *s)
3103 trace_seq_reset(s); 3643 trace_seq_reset(s);
3104} 3644}
3105 3645
3106
3107void ftrace_dump(void) 3646void ftrace_dump(void)
3108{ 3647{
3109 static DEFINE_SPINLOCK(ftrace_dump_lock); 3648 static DEFINE_SPINLOCK(ftrace_dump_lock);
@@ -3128,6 +3667,9 @@ void ftrace_dump(void)
3128 atomic_inc(&global_trace.data[cpu]->disabled); 3667 atomic_inc(&global_trace.data[cpu]->disabled);
3129 } 3668 }
3130 3669
3670 /* don't look at user memory in panic mode */
3671 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
3672
3131 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 3673 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3132 3674
3133 iter.tr = &global_trace; 3675 iter.tr = &global_trace;
@@ -3221,7 +3763,6 @@ __init static int tracer_alloc_buffers(void)
3221#endif 3763#endif
3222 3764
3223 /* All seems OK, enable tracing */ 3765 /* All seems OK, enable tracing */
3224 global_trace.ctrl = tracer_enabled;
3225 tracing_disabled = 0; 3766 tracing_disabled = 0;
3226 3767
3227 atomic_notifier_chain_register(&panic_notifier_list, 3768 atomic_notifier_chain_register(&panic_notifier_list,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8465ad052707..28c15c2ebc22 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -8,6 +8,7 @@
8#include <linux/ring_buffer.h> 8#include <linux/ring_buffer.h>
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <trace/boot.h>
11 12
12enum trace_type { 13enum trace_type {
13 __TRACE_FIRST_TYPE = 0, 14 __TRACE_FIRST_TYPE = 0,
@@ -21,7 +22,11 @@ enum trace_type {
21 TRACE_SPECIAL, 22 TRACE_SPECIAL,
22 TRACE_MMIO_RW, 23 TRACE_MMIO_RW,
23 TRACE_MMIO_MAP, 24 TRACE_MMIO_MAP,
24 TRACE_BOOT, 25 TRACE_BRANCH,
26 TRACE_BOOT_CALL,
27 TRACE_BOOT_RET,
28 TRACE_FN_RET,
29 TRACE_USER_STACK,
25 30
26 __TRACE_LAST_TYPE 31 __TRACE_LAST_TYPE
27}; 32};
@@ -38,6 +43,7 @@ struct trace_entry {
38 unsigned char flags; 43 unsigned char flags;
39 unsigned char preempt_count; 44 unsigned char preempt_count;
40 int pid; 45 int pid;
46 int tgid;
41}; 47};
42 48
43/* 49/*
@@ -48,6 +54,16 @@ struct ftrace_entry {
48 unsigned long ip; 54 unsigned long ip;
49 unsigned long parent_ip; 55 unsigned long parent_ip;
50}; 56};
57
58/* Function return entry */
59struct ftrace_ret_entry {
60 struct trace_entry ent;
61 unsigned long ip;
62 unsigned long parent_ip;
63 unsigned long long calltime;
64 unsigned long long rettime;
65 unsigned long overrun;
66};
51extern struct tracer boot_tracer; 67extern struct tracer boot_tracer;
52 68
53/* 69/*
@@ -85,6 +101,11 @@ struct stack_entry {
85 unsigned long caller[FTRACE_STACK_ENTRIES]; 101 unsigned long caller[FTRACE_STACK_ENTRIES];
86}; 102};
87 103
104struct userstack_entry {
105 struct trace_entry ent;
106 unsigned long caller[FTRACE_STACK_ENTRIES];
107};
108
88/* 109/*
89 * ftrace_printk entry: 110 * ftrace_printk entry:
90 */ 111 */
@@ -112,9 +133,24 @@ struct trace_mmiotrace_map {
112 struct mmiotrace_map map; 133 struct mmiotrace_map map;
113}; 134};
114 135
115struct trace_boot { 136struct trace_boot_call {
116 struct trace_entry ent; 137 struct trace_entry ent;
117 struct boot_trace initcall; 138 struct boot_trace_call boot_call;
139};
140
141struct trace_boot_ret {
142 struct trace_entry ent;
143 struct boot_trace_ret boot_ret;
144};
145
146#define TRACE_FUNC_SIZE 30
147#define TRACE_FILE_SIZE 20
148struct trace_branch {
149 struct trace_entry ent;
150 unsigned line;
151 char func[TRACE_FUNC_SIZE+1];
152 char file[TRACE_FILE_SIZE+1];
153 char correct;
118}; 154};
119 155
120/* 156/*
@@ -172,7 +208,6 @@ struct trace_iterator;
172struct trace_array { 208struct trace_array {
173 struct ring_buffer *buffer; 209 struct ring_buffer *buffer;
174 unsigned long entries; 210 unsigned long entries;
175 long ctrl;
176 int cpu; 211 int cpu;
177 cycle_t time_start; 212 cycle_t time_start;
178 struct task_struct *waiter; 213 struct task_struct *waiter;
@@ -212,13 +247,17 @@ extern void __ftrace_bad_type(void);
212 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ 247 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
213 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ 248 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
214 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ 249 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
250 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
215 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 251 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
216 IF_ASSIGN(var, ent, struct special_entry, 0); \ 252 IF_ASSIGN(var, ent, struct special_entry, 0); \
217 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ 253 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
218 TRACE_MMIO_RW); \ 254 TRACE_MMIO_RW); \
219 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ 255 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
220 TRACE_MMIO_MAP); \ 256 TRACE_MMIO_MAP); \
221 IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \ 257 IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
258 IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
259 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
260 IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\
222 __ftrace_bad_type(); \ 261 __ftrace_bad_type(); \
223 } while (0) 262 } while (0)
224 263
@@ -229,29 +268,55 @@ enum print_line_t {
229 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ 268 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */
230}; 269};
231 270
271
272/*
273 * An option specific to a tracer. This is a boolean value.
274 * The bit is the bit index that sets its value on the
275 * flags value in struct tracer_flags.
276 */
277struct tracer_opt {
278 const char *name; /* Will appear on the trace_options file */
279 u32 bit; /* Mask assigned in val field in tracer_flags */
280};
281
282/*
283 * The set of specific options for a tracer. Your tracer
284 * have to set the initial value of the flags val.
285 */
286struct tracer_flags {
287 u32 val;
288 struct tracer_opt *opts;
289};
290
291/* Makes more easy to define a tracer opt */
292#define TRACER_OPT(s, b) .name = #s, .bit = b
293
232/* 294/*
233 * A specific tracer, represented by methods that operate on a trace array: 295 * A specific tracer, represented by methods that operate on a trace array:
234 */ 296 */
235struct tracer { 297struct tracer {
236 const char *name; 298 const char *name;
237 void (*init)(struct trace_array *tr); 299 /* Your tracer should raise a warning if init fails */
300 int (*init)(struct trace_array *tr);
238 void (*reset)(struct trace_array *tr); 301 void (*reset)(struct trace_array *tr);
302 void (*start)(struct trace_array *tr);
303 void (*stop)(struct trace_array *tr);
239 void (*open)(struct trace_iterator *iter); 304 void (*open)(struct trace_iterator *iter);
240 void (*pipe_open)(struct trace_iterator *iter); 305 void (*pipe_open)(struct trace_iterator *iter);
241 void (*close)(struct trace_iterator *iter); 306 void (*close)(struct trace_iterator *iter);
242 void (*start)(struct trace_iterator *iter);
243 void (*stop)(struct trace_iterator *iter);
244 ssize_t (*read)(struct trace_iterator *iter, 307 ssize_t (*read)(struct trace_iterator *iter,
245 struct file *filp, char __user *ubuf, 308 struct file *filp, char __user *ubuf,
246 size_t cnt, loff_t *ppos); 309 size_t cnt, loff_t *ppos);
247 void (*ctrl_update)(struct trace_array *tr);
248#ifdef CONFIG_FTRACE_STARTUP_TEST 310#ifdef CONFIG_FTRACE_STARTUP_TEST
249 int (*selftest)(struct tracer *trace, 311 int (*selftest)(struct tracer *trace,
250 struct trace_array *tr); 312 struct trace_array *tr);
251#endif 313#endif
252 enum print_line_t (*print_line)(struct trace_iterator *iter); 314 enum print_line_t (*print_line)(struct trace_iterator *iter);
315 /* If you handled the flag setting, return 0 */
316 int (*set_flag)(u32 old_flags, u32 bit, int set);
253 struct tracer *next; 317 struct tracer *next;
254 int print_max; 318 int print_max;
319 struct tracer_flags *flags;
255}; 320};
256 321
257struct trace_seq { 322struct trace_seq {
@@ -279,8 +344,11 @@ struct trace_iterator {
279 unsigned long iter_flags; 344 unsigned long iter_flags;
280 loff_t pos; 345 loff_t pos;
281 long idx; 346 long idx;
347
348 cpumask_t started;
282}; 349};
283 350
351int tracing_is_enabled(void);
284void trace_wake_up(void); 352void trace_wake_up(void);
285void tracing_reset(struct trace_array *tr, int cpu); 353void tracing_reset(struct trace_array *tr, int cpu);
286int tracing_open_generic(struct inode *inode, struct file *filp); 354int tracing_open_generic(struct inode *inode, struct file *filp);
@@ -320,9 +388,14 @@ void trace_function(struct trace_array *tr,
320 unsigned long ip, 388 unsigned long ip,
321 unsigned long parent_ip, 389 unsigned long parent_ip,
322 unsigned long flags, int pc); 390 unsigned long flags, int pc);
391void
392trace_function_return(struct ftrace_retfunc *trace);
323 393
324void tracing_start_cmdline_record(void); 394void tracing_start_cmdline_record(void);
325void tracing_stop_cmdline_record(void); 395void tracing_stop_cmdline_record(void);
396void tracing_sched_switch_assign_trace(struct trace_array *tr);
397void tracing_stop_sched_switch_record(void);
398void tracing_start_sched_switch_record(void);
326int register_tracer(struct tracer *type); 399int register_tracer(struct tracer *type);
327void unregister_tracer(struct tracer *type); 400void unregister_tracer(struct tracer *type);
328 401
@@ -383,12 +456,18 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace,
383 struct trace_array *tr); 456 struct trace_array *tr);
384extern int trace_selftest_startup_sysprof(struct tracer *trace, 457extern int trace_selftest_startup_sysprof(struct tracer *trace,
385 struct trace_array *tr); 458 struct trace_array *tr);
459extern int trace_selftest_startup_branch(struct tracer *trace,
460 struct trace_array *tr);
386#endif /* CONFIG_FTRACE_STARTUP_TEST */ 461#endif /* CONFIG_FTRACE_STARTUP_TEST */
387 462
388extern void *head_page(struct trace_array_cpu *data); 463extern void *head_page(struct trace_array_cpu *data);
389extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); 464extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
390extern void trace_seq_print_cont(struct trace_seq *s, 465extern void trace_seq_print_cont(struct trace_seq *s,
391 struct trace_iterator *iter); 466 struct trace_iterator *iter);
467
468extern int
469seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
470 unsigned long sym_flags);
392extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 471extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
393 size_t cnt); 472 size_t cnt);
394extern long ns2usecs(cycle_t nsec); 473extern long ns2usecs(cycle_t nsec);
@@ -396,6 +475,17 @@ extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
396 475
397extern unsigned long trace_flags; 476extern unsigned long trace_flags;
398 477
478/* Standard output formatting function used for function return traces */
479#ifdef CONFIG_FUNCTION_RET_TRACER
480extern enum print_line_t print_return_function(struct trace_iterator *iter);
481#else
482static inline enum print_line_t
483print_return_function(struct trace_iterator *iter)
484{
485 return TRACE_TYPE_UNHANDLED;
486}
487#endif
488
399/* 489/*
400 * trace_iterator_flags is an enumeration that defines bit 490 * trace_iterator_flags is an enumeration that defines bit
401 * positions into trace_flags that controls the output. 491 * positions into trace_flags that controls the output.
@@ -415,8 +505,92 @@ enum trace_iterator_flags {
415 TRACE_ITER_STACKTRACE = 0x100, 505 TRACE_ITER_STACKTRACE = 0x100,
416 TRACE_ITER_SCHED_TREE = 0x200, 506 TRACE_ITER_SCHED_TREE = 0x200,
417 TRACE_ITER_PRINTK = 0x400, 507 TRACE_ITER_PRINTK = 0x400,
508 TRACE_ITER_PREEMPTONLY = 0x800,
509 TRACE_ITER_BRANCH = 0x1000,
510 TRACE_ITER_ANNOTATE = 0x2000,
511 TRACE_ITER_USERSTACKTRACE = 0x4000,
512 TRACE_ITER_SYM_USEROBJ = 0x8000
418}; 513};
419 514
515/*
516 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
517 * control the output of kernel symbols.
518 */
519#define TRACE_ITER_SYM_MASK \
520 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
521
420extern struct tracer nop_trace; 522extern struct tracer nop_trace;
421 523
524/**
525 * ftrace_preempt_disable - disable preemption scheduler safe
526 *
527 * When tracing can happen inside the scheduler, there exists
528 * cases that the tracing might happen before the need_resched
529 * flag is checked. If this happens and the tracer calls
530 * preempt_enable (after a disable), a schedule might take place
531 * causing an infinite recursion.
532 *
533 * To prevent this, we read the need_recshed flag before
534 * disabling preemption. When we want to enable preemption we
535 * check the flag, if it is set, then we call preempt_enable_no_resched.
536 * Otherwise, we call preempt_enable.
537 *
538 * The rational for doing the above is that if need resched is set
539 * and we have yet to reschedule, we are either in an atomic location
540 * (where we do not need to check for scheduling) or we are inside
541 * the scheduler and do not want to resched.
542 */
543static inline int ftrace_preempt_disable(void)
544{
545 int resched;
546
547 resched = need_resched();
548 preempt_disable_notrace();
549
550 return resched;
551}
552
553/**
554 * ftrace_preempt_enable - enable preemption scheduler safe
555 * @resched: the return value from ftrace_preempt_disable
556 *
557 * This is a scheduler safe way to enable preemption and not miss
558 * any preemption checks. The disabled saved the state of preemption.
559 * If resched is set, then we were either inside an atomic or
560 * are inside the scheduler (we would have already scheduled
561 * otherwise). In this case, we do not want to call normal
562 * preempt_enable, but preempt_enable_no_resched instead.
563 */
564static inline void ftrace_preempt_enable(int resched)
565{
566 if (resched)
567 preempt_enable_no_resched_notrace();
568 else
569 preempt_enable_notrace();
570}
571
572#ifdef CONFIG_BRANCH_TRACER
573extern int enable_branch_tracing(struct trace_array *tr);
574extern void disable_branch_tracing(void);
575static inline int trace_branch_enable(struct trace_array *tr)
576{
577 if (trace_flags & TRACE_ITER_BRANCH)
578 return enable_branch_tracing(tr);
579 return 0;
580}
581static inline void trace_branch_disable(void)
582{
583 /* due to races, always disable */
584 disable_branch_tracing();
585}
586#else
587static inline int trace_branch_enable(struct trace_array *tr)
588{
589 return 0;
590}
591static inline void trace_branch_disable(void)
592{
593}
594#endif /* CONFIG_BRANCH_TRACER */
595
422#endif /* _LINUX_KERNEL_TRACE_H */ 596#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index d0a5e50eeff2..a4fa2c57e34e 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -13,73 +13,117 @@
13#include "trace.h" 13#include "trace.h"
14 14
15static struct trace_array *boot_trace; 15static struct trace_array *boot_trace;
16static int trace_boot_enabled; 16static bool pre_initcalls_finished;
17 17
18 18/* Tells the boot tracer that the pre_smp_initcalls are finished.
19/* Should be started after do_pre_smp_initcalls() in init/main.c */ 19 * So we are ready .
20 * It doesn't enable sched events tracing however.
21 * You have to call enable_boot_trace to do so.
22 */
20void start_boot_trace(void) 23void start_boot_trace(void)
21{ 24{
22 trace_boot_enabled = 1; 25 pre_initcalls_finished = true;
23} 26}
24 27
25void stop_boot_trace(void) 28void enable_boot_trace(void)
26{ 29{
27 trace_boot_enabled = 0; 30 if (pre_initcalls_finished)
31 tracing_start_sched_switch_record();
28} 32}
29 33
30void reset_boot_trace(struct trace_array *tr) 34void disable_boot_trace(void)
31{ 35{
32 stop_boot_trace(); 36 if (pre_initcalls_finished)
37 tracing_stop_sched_switch_record();
33} 38}
34 39
35static void boot_trace_init(struct trace_array *tr) 40static void reset_boot_trace(struct trace_array *tr)
36{ 41{
37 int cpu; 42 int cpu;
38 boot_trace = tr;
39 43
40 trace_boot_enabled = 0; 44 tr->time_start = ftrace_now(tr->cpu);
45
46 for_each_online_cpu(cpu)
47 tracing_reset(tr, cpu);
48}
49
50static int boot_trace_init(struct trace_array *tr)
51{
52 int cpu;
53 boot_trace = tr;
41 54
42 for_each_cpu_mask(cpu, cpu_possible_map) 55 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu); 56 tracing_reset(tr, cpu);
57
58 tracing_sched_switch_assign_trace(tr);
59 return 0;
44} 60}
45 61
46static void boot_trace_ctrl_update(struct trace_array *tr) 62static enum print_line_t
63initcall_call_print_line(struct trace_iterator *iter)
47{ 64{
48 if (tr->ctrl) 65 struct trace_entry *entry = iter->ent;
49 start_boot_trace(); 66 struct trace_seq *s = &iter->seq;
67 struct trace_boot_call *field;
68 struct boot_trace_call *call;
69 u64 ts;
70 unsigned long nsec_rem;
71 int ret;
72
73 trace_assign_type(field, entry);
74 call = &field->boot_call;
75 ts = iter->ts;
76 nsec_rem = do_div(ts, 1000000000);
77
78 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
79 (unsigned long)ts, nsec_rem, call->func, call->caller);
80
81 if (!ret)
82 return TRACE_TYPE_PARTIAL_LINE;
50 else 83 else
51 stop_boot_trace(); 84 return TRACE_TYPE_HANDLED;
52} 85}
53 86
54static enum print_line_t initcall_print_line(struct trace_iterator *iter) 87static enum print_line_t
88initcall_ret_print_line(struct trace_iterator *iter)
55{ 89{
56 int ret;
57 struct trace_entry *entry = iter->ent; 90 struct trace_entry *entry = iter->ent;
58 struct trace_boot *field = (struct trace_boot *)entry;
59 struct boot_trace *it = &field->initcall;
60 struct trace_seq *s = &iter->seq; 91 struct trace_seq *s = &iter->seq;
61 struct timespec calltime = ktime_to_timespec(it->calltime); 92 struct trace_boot_ret *field;
62 struct timespec rettime = ktime_to_timespec(it->rettime); 93 struct boot_trace_ret *init_ret;
63 94 u64 ts;
64 if (entry->type == TRACE_BOOT) { 95 unsigned long nsec_rem;
65 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", 96 int ret;
66 calltime.tv_sec, 97
67 calltime.tv_nsec, 98 trace_assign_type(field, entry);
68 it->func, it->caller); 99 init_ret = &field->boot_ret;
69 if (!ret) 100 ts = iter->ts;
70 return TRACE_TYPE_PARTIAL_LINE; 101 nsec_rem = do_div(ts, 1000000000);
71 102
72 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " 103 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
73 "returned %d after %lld msecs\n", 104 "returned %d after %llu msecs\n",
74 rettime.tv_sec, 105 (unsigned long) ts,
75 rettime.tv_nsec, 106 nsec_rem,
76 it->func, it->result, it->duration); 107 init_ret->func, init_ret->result, init_ret->duration);
77 108
78 if (!ret) 109 if (!ret)
79 return TRACE_TYPE_PARTIAL_LINE; 110 return TRACE_TYPE_PARTIAL_LINE;
111 else
80 return TRACE_TYPE_HANDLED; 112 return TRACE_TYPE_HANDLED;
113}
114
115static enum print_line_t initcall_print_line(struct trace_iterator *iter)
116{
117 struct trace_entry *entry = iter->ent;
118
119 switch (entry->type) {
120 case TRACE_BOOT_CALL:
121 return initcall_call_print_line(iter);
122 case TRACE_BOOT_RET:
123 return initcall_ret_print_line(iter);
124 default:
125 return TRACE_TYPE_UNHANDLED;
81 } 126 }
82 return TRACE_TYPE_UNHANDLED;
83} 127}
84 128
85struct tracer boot_tracer __read_mostly = 129struct tracer boot_tracer __read_mostly =
@@ -87,27 +131,53 @@ struct tracer boot_tracer __read_mostly =
87 .name = "initcall", 131 .name = "initcall",
88 .init = boot_trace_init, 132 .init = boot_trace_init,
89 .reset = reset_boot_trace, 133 .reset = reset_boot_trace,
90 .ctrl_update = boot_trace_ctrl_update,
91 .print_line = initcall_print_line, 134 .print_line = initcall_print_line,
92}; 135};
93 136
94void trace_boot(struct boot_trace *it, initcall_t fn) 137void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
95{ 138{
96 struct ring_buffer_event *event; 139 struct ring_buffer_event *event;
97 struct trace_boot *entry; 140 struct trace_boot_call *entry;
98 struct trace_array_cpu *data;
99 unsigned long irq_flags; 141 unsigned long irq_flags;
100 struct trace_array *tr = boot_trace; 142 struct trace_array *tr = boot_trace;
101 143
102 if (!trace_boot_enabled) 144 if (!pre_initcalls_finished)
103 return; 145 return;
104 146
105 /* Get its name now since this function could 147 /* Get its name now since this function could
106 * disappear because it is in the .init section. 148 * disappear because it is in the .init section.
107 */ 149 */
108 sprint_symbol(it->func, (unsigned long)fn); 150 sprint_symbol(bt->func, (unsigned long)fn);
151 preempt_disable();
152
153 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
154 &irq_flags);
155 if (!event)
156 goto out;
157 entry = ring_buffer_event_data(event);
158 tracing_generic_entry_update(&entry->ent, 0, 0);
159 entry->ent.type = TRACE_BOOT_CALL;
160 entry->boot_call = *bt;
161 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
162
163 trace_wake_up();
164
165 out:
166 preempt_enable();
167}
168
169void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
170{
171 struct ring_buffer_event *event;
172 struct trace_boot_ret *entry;
173 unsigned long irq_flags;
174 struct trace_array *tr = boot_trace;
175
176 if (!pre_initcalls_finished)
177 return;
178
179 sprint_symbol(bt->func, (unsigned long)fn);
109 preempt_disable(); 180 preempt_disable();
110 data = tr->data[smp_processor_id()];
111 181
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 182 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
113 &irq_flags); 183 &irq_flags);
@@ -115,8 +185,8 @@ void trace_boot(struct boot_trace *it, initcall_t fn)
115 goto out; 185 goto out;
116 entry = ring_buffer_event_data(event); 186 entry = ring_buffer_event_data(event);
117 tracing_generic_entry_update(&entry->ent, 0, 0); 187 tracing_generic_entry_update(&entry->ent, 0, 0);
118 entry->ent.type = TRACE_BOOT; 188 entry->ent.type = TRACE_BOOT_RET;
119 entry->initcall = *it; 189 entry->boot_ret = *bt;
120 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 190 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
121 191
122 trace_wake_up(); 192 trace_wake_up();
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
new file mode 100644
index 000000000000..877ee88e6a74
--- /dev/null
+++ b/kernel/trace/trace_branch.c
@@ -0,0 +1,341 @@
1/*
2 * unlikely profiler
3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/kallsyms.h>
7#include <linux/seq_file.h>
8#include <linux/spinlock.h>
9#include <linux/debugfs.h>
10#include <linux/uaccess.h>
11#include <linux/module.h>
12#include <linux/ftrace.h>
13#include <linux/hash.h>
14#include <linux/fs.h>
15#include <asm/local.h>
16#include "trace.h"
17
18#ifdef CONFIG_BRANCH_TRACER
19
20static int branch_tracing_enabled __read_mostly;
21static DEFINE_MUTEX(branch_tracing_mutex);
22static struct trace_array *branch_tracer;
23
24static void
25probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
26{
27 struct trace_array *tr = branch_tracer;
28 struct ring_buffer_event *event;
29 struct trace_branch *entry;
30 unsigned long flags, irq_flags;
31 int cpu, pc;
32 const char *p;
33
34 /*
35 * I would love to save just the ftrace_likely_data pointer, but
36 * this code can also be used by modules. Ugly things can happen
37 * if the module is unloaded, and then we go and read the
38 * pointer. This is slower, but much safer.
39 */
40
41 if (unlikely(!tr))
42 return;
43
44 raw_local_irq_save(flags);
45 cpu = raw_smp_processor_id();
46 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
47 goto out;
48
49 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
50 &irq_flags);
51 if (!event)
52 goto out;
53
54 pc = preempt_count();
55 entry = ring_buffer_event_data(event);
56 tracing_generic_entry_update(&entry->ent, flags, pc);
57 entry->ent.type = TRACE_BRANCH;
58
59 /* Strip off the path, only save the file */
60 p = f->file + strlen(f->file);
61 while (p >= f->file && *p != '/')
62 p--;
63 p++;
64
65 strncpy(entry->func, f->func, TRACE_FUNC_SIZE);
66 strncpy(entry->file, p, TRACE_FILE_SIZE);
67 entry->func[TRACE_FUNC_SIZE] = 0;
68 entry->file[TRACE_FILE_SIZE] = 0;
69 entry->line = f->line;
70 entry->correct = val == expect;
71
72 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
73
74 out:
75 atomic_dec(&tr->data[cpu]->disabled);
76 raw_local_irq_restore(flags);
77}
78
79static inline
80void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
81{
82 if (!branch_tracing_enabled)
83 return;
84
85 probe_likely_condition(f, val, expect);
86}
87
88int enable_branch_tracing(struct trace_array *tr)
89{
90 int ret = 0;
91
92 mutex_lock(&branch_tracing_mutex);
93 branch_tracer = tr;
94 /*
95 * Must be seen before enabling. The reader is a condition
96 * where we do not need a matching rmb()
97 */
98 smp_wmb();
99 branch_tracing_enabled++;
100 mutex_unlock(&branch_tracing_mutex);
101
102 return ret;
103}
104
105void disable_branch_tracing(void)
106{
107 mutex_lock(&branch_tracing_mutex);
108
109 if (!branch_tracing_enabled)
110 goto out_unlock;
111
112 branch_tracing_enabled--;
113
114 out_unlock:
115 mutex_unlock(&branch_tracing_mutex);
116}
117
118static void start_branch_trace(struct trace_array *tr)
119{
120 enable_branch_tracing(tr);
121}
122
123static void stop_branch_trace(struct trace_array *tr)
124{
125 disable_branch_tracing();
126}
127
128static int branch_trace_init(struct trace_array *tr)
129{
130 int cpu;
131
132 for_each_online_cpu(cpu)
133 tracing_reset(tr, cpu);
134
135 start_branch_trace(tr);
136 return 0;
137}
138
139static void branch_trace_reset(struct trace_array *tr)
140{
141 stop_branch_trace(tr);
142}
143
144struct tracer branch_trace __read_mostly =
145{
146 .name = "branch",
147 .init = branch_trace_init,
148 .reset = branch_trace_reset,
149#ifdef CONFIG_FTRACE_SELFTEST
150 .selftest = trace_selftest_startup_branch,
151#endif
152};
153
154__init static int init_branch_trace(void)
155{
156 return register_tracer(&branch_trace);
157}
158
159device_initcall(init_branch_trace);
160#else
161static inline
162void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
163{
164}
165#endif /* CONFIG_BRANCH_TRACER */
166
167void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
168{
169 /*
170 * I would love to have a trace point here instead, but the
171 * trace point code is so inundated with unlikely and likely
172 * conditions that the recursive nightmare that exists is too
173 * much to try to get working. At least for now.
174 */
175 trace_likely_condition(f, val, expect);
176
177 /* FIXME: Make this atomic! */
178 if (val == expect)
179 f->correct++;
180 else
181 f->incorrect++;
182}
183EXPORT_SYMBOL(ftrace_likely_update);
184
185struct ftrace_pointer {
186 void *start;
187 void *stop;
188 int hit;
189};
190
191static void *
192t_next(struct seq_file *m, void *v, loff_t *pos)
193{
194 const struct ftrace_pointer *f = m->private;
195 struct ftrace_branch_data *p = v;
196
197 (*pos)++;
198
199 if (v == (void *)1)
200 return f->start;
201
202 ++p;
203
204 if ((void *)p >= (void *)f->stop)
205 return NULL;
206
207 return p;
208}
209
210static void *t_start(struct seq_file *m, loff_t *pos)
211{
212 void *t = (void *)1;
213 loff_t l = 0;
214
215 for (; t && l < *pos; t = t_next(m, t, &l))
216 ;
217
218 return t;
219}
220
221static void t_stop(struct seq_file *m, void *p)
222{
223}
224
225static int t_show(struct seq_file *m, void *v)
226{
227 const struct ftrace_pointer *fp = m->private;
228 struct ftrace_branch_data *p = v;
229 const char *f;
230 long percent;
231
232 if (v == (void *)1) {
233 if (fp->hit)
234 seq_printf(m, " miss hit %% ");
235 else
236 seq_printf(m, " correct incorrect %% ");
237 seq_printf(m, " Function "
238 " File Line\n"
239 " ------- --------- - "
240 " -------- "
241 " ---- ----\n");
242 return 0;
243 }
244
245 /* Only print the file, not the path */
246 f = p->file + strlen(p->file);
247 while (f >= p->file && *f != '/')
248 f--;
249 f++;
250
251 /*
252 * The miss is overlayed on correct, and hit on incorrect.
253 */
254 if (p->correct) {
255 percent = p->incorrect * 100;
256 percent /= p->correct + p->incorrect;
257 } else
258 percent = p->incorrect ? 100 : -1;
259
260 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect);
261 if (percent < 0)
262 seq_printf(m, " X ");
263 else
264 seq_printf(m, "%3ld ", percent);
265 seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
266 return 0;
267}
268
269static struct seq_operations tracing_likely_seq_ops = {
270 .start = t_start,
271 .next = t_next,
272 .stop = t_stop,
273 .show = t_show,
274};
275
276static int tracing_branch_open(struct inode *inode, struct file *file)
277{
278 int ret;
279
280 ret = seq_open(file, &tracing_likely_seq_ops);
281 if (!ret) {
282 struct seq_file *m = file->private_data;
283 m->private = (void *)inode->i_private;
284 }
285
286 return ret;
287}
288
289static const struct file_operations tracing_branch_fops = {
290 .open = tracing_branch_open,
291 .read = seq_read,
292 .llseek = seq_lseek,
293};
294
295#ifdef CONFIG_PROFILE_ALL_BRANCHES
296extern unsigned long __start_branch_profile[];
297extern unsigned long __stop_branch_profile[];
298
299static const struct ftrace_pointer ftrace_branch_pos = {
300 .start = __start_branch_profile,
301 .stop = __stop_branch_profile,
302 .hit = 1,
303};
304
305#endif /* CONFIG_PROFILE_ALL_BRANCHES */
306
307extern unsigned long __start_annotated_branch_profile[];
308extern unsigned long __stop_annotated_branch_profile[];
309
310static const struct ftrace_pointer ftrace_annotated_branch_pos = {
311 .start = __start_annotated_branch_profile,
312 .stop = __stop_annotated_branch_profile,
313};
314
315static __init int ftrace_branch_init(void)
316{
317 struct dentry *d_tracer;
318 struct dentry *entry;
319
320 d_tracer = tracing_init_dentry();
321
322 entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer,
323 (void *)&ftrace_annotated_branch_pos,
324 &tracing_branch_fops);
325 if (!entry)
326 pr_warning("Could not create debugfs "
327 "'profile_annotatet_branch' entry\n");
328
329#ifdef CONFIG_PROFILE_ALL_BRANCHES
330 entry = debugfs_create_file("profile_branch", 0444, d_tracer,
331 (void *)&ftrace_branch_pos,
332 &tracing_branch_fops);
333 if (!entry)
334 pr_warning("Could not create debugfs"
335 " 'profile_branch' entry\n");
336#endif
337
338 return 0;
339}
340
341device_initcall(ftrace_branch_init);
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 0f85a64003d3..e74f6d0a3216 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -42,24 +42,20 @@ static void stop_function_trace(struct trace_array *tr)
42 tracing_stop_cmdline_record(); 42 tracing_stop_cmdline_record();
43} 43}
44 44
45static void function_trace_init(struct trace_array *tr) 45static int function_trace_init(struct trace_array *tr)
46{ 46{
47 if (tr->ctrl) 47 start_function_trace(tr);
48 start_function_trace(tr); 48 return 0;
49} 49}
50 50
51static void function_trace_reset(struct trace_array *tr) 51static void function_trace_reset(struct trace_array *tr)
52{ 52{
53 if (tr->ctrl) 53 stop_function_trace(tr);
54 stop_function_trace(tr);
55} 54}
56 55
57static void function_trace_ctrl_update(struct trace_array *tr) 56static void function_trace_start(struct trace_array *tr)
58{ 57{
59 if (tr->ctrl) 58 function_reset(tr);
60 start_function_trace(tr);
61 else
62 stop_function_trace(tr);
63} 59}
64 60
65static struct tracer function_trace __read_mostly = 61static struct tracer function_trace __read_mostly =
@@ -67,7 +63,7 @@ static struct tracer function_trace __read_mostly =
67 .name = "function", 63 .name = "function",
68 .init = function_trace_init, 64 .init = function_trace_init,
69 .reset = function_trace_reset, 65 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update, 66 .start = function_trace_start,
71#ifdef CONFIG_FTRACE_SELFTEST 67#ifdef CONFIG_FTRACE_SELFTEST
72 .selftest = trace_selftest_startup_function, 68 .selftest = trace_selftest_startup_function,
73#endif 69#endif
diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c
new file mode 100644
index 000000000000..e00d64509c9c
--- /dev/null
+++ b/kernel/trace/trace_functions_return.c
@@ -0,0 +1,98 @@
1/*
2 *
3 * Function return tracer.
4 * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 * Mostly borrowed from function tracer which
6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
7 *
8 */
9#include <linux/debugfs.h>
10#include <linux/uaccess.h>
11#include <linux/ftrace.h>
12#include <linux/fs.h>
13
14#include "trace.h"
15
16
17#define TRACE_RETURN_PRINT_OVERRUN 0x1
18static struct tracer_opt trace_opts[] = {
19 /* Display overruns or not */
20 { TRACER_OPT(overrun, TRACE_RETURN_PRINT_OVERRUN) },
21 { } /* Empty entry */
22};
23
24static struct tracer_flags tracer_flags = {
25 .val = 0, /* Don't display overruns by default */
26 .opts = trace_opts
27};
28
29
30static int return_trace_init(struct trace_array *tr)
31{
32 int cpu;
33 for_each_online_cpu(cpu)
34 tracing_reset(tr, cpu);
35
36 return register_ftrace_return(&trace_function_return);
37}
38
39static void return_trace_reset(struct trace_array *tr)
40{
41 unregister_ftrace_return();
42}
43
44
45enum print_line_t
46print_return_function(struct trace_iterator *iter)
47{
48 struct trace_seq *s = &iter->seq;
49 struct trace_entry *entry = iter->ent;
50 struct ftrace_ret_entry *field;
51 int ret;
52
53 if (entry->type == TRACE_FN_RET) {
54 trace_assign_type(field, entry);
55 ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip);
56 if (!ret)
57 return TRACE_TYPE_PARTIAL_LINE;
58
59 ret = seq_print_ip_sym(s, field->ip,
60 trace_flags & TRACE_ITER_SYM_MASK);
61 if (!ret)
62 return TRACE_TYPE_PARTIAL_LINE;
63
64 ret = trace_seq_printf(s, " (%llu ns)",
65 field->rettime - field->calltime);
66 if (!ret)
67 return TRACE_TYPE_PARTIAL_LINE;
68
69 if (tracer_flags.val & TRACE_RETURN_PRINT_OVERRUN) {
70 ret = trace_seq_printf(s, " (Overruns: %lu)",
71 field->overrun);
72 if (!ret)
73 return TRACE_TYPE_PARTIAL_LINE;
74 }
75
76 ret = trace_seq_printf(s, "\n");
77 if (!ret)
78 return TRACE_TYPE_PARTIAL_LINE;
79
80 return TRACE_TYPE_HANDLED;
81 }
82 return TRACE_TYPE_UNHANDLED;
83}
84
85static struct tracer return_trace __read_mostly = {
86 .name = "return",
87 .init = return_trace_init,
88 .reset = return_trace_reset,
89 .print_line = print_return_function,
90 .flags = &tracer_flags,
91};
92
93static __init int init_return_trace(void)
94{
95 return register_tracer(&return_trace);
96}
97
98device_initcall(init_return_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 9c74071c10e0..7c2e326bbc8b 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -353,15 +353,28 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
353} 353}
354#endif /* CONFIG_PREEMPT_TRACER */ 354#endif /* CONFIG_PREEMPT_TRACER */
355 355
356/*
357 * save_tracer_enabled is used to save the state of the tracer_enabled
358 * variable when we disable it when we open a trace output file.
359 */
360static int save_tracer_enabled;
361
356static void start_irqsoff_tracer(struct trace_array *tr) 362static void start_irqsoff_tracer(struct trace_array *tr)
357{ 363{
358 register_ftrace_function(&trace_ops); 364 register_ftrace_function(&trace_ops);
359 tracer_enabled = 1; 365 if (tracing_is_enabled()) {
366 tracer_enabled = 1;
367 save_tracer_enabled = 1;
368 } else {
369 tracer_enabled = 0;
370 save_tracer_enabled = 0;
371 }
360} 372}
361 373
362static void stop_irqsoff_tracer(struct trace_array *tr) 374static void stop_irqsoff_tracer(struct trace_array *tr)
363{ 375{
364 tracer_enabled = 0; 376 tracer_enabled = 0;
377 save_tracer_enabled = 0;
365 unregister_ftrace_function(&trace_ops); 378 unregister_ftrace_function(&trace_ops);
366} 379}
367 380
@@ -370,53 +383,55 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
370 irqsoff_trace = tr; 383 irqsoff_trace = tr;
371 /* make sure that the tracer is visible */ 384 /* make sure that the tracer is visible */
372 smp_wmb(); 385 smp_wmb();
373 386 start_irqsoff_tracer(tr);
374 if (tr->ctrl)
375 start_irqsoff_tracer(tr);
376} 387}
377 388
378static void irqsoff_tracer_reset(struct trace_array *tr) 389static void irqsoff_tracer_reset(struct trace_array *tr)
379{ 390{
380 if (tr->ctrl) 391 stop_irqsoff_tracer(tr);
381 stop_irqsoff_tracer(tr);
382} 392}
383 393
384static void irqsoff_tracer_ctrl_update(struct trace_array *tr) 394static void irqsoff_tracer_start(struct trace_array *tr)
385{ 395{
386 if (tr->ctrl) 396 tracer_enabled = 1;
387 start_irqsoff_tracer(tr); 397 save_tracer_enabled = 1;
388 else 398}
389 stop_irqsoff_tracer(tr); 399
400static void irqsoff_tracer_stop(struct trace_array *tr)
401{
402 tracer_enabled = 0;
403 save_tracer_enabled = 0;
390} 404}
391 405
392static void irqsoff_tracer_open(struct trace_iterator *iter) 406static void irqsoff_tracer_open(struct trace_iterator *iter)
393{ 407{
394 /* stop the trace while dumping */ 408 /* stop the trace while dumping */
395 if (iter->tr->ctrl) 409 tracer_enabled = 0;
396 stop_irqsoff_tracer(iter->tr);
397} 410}
398 411
399static void irqsoff_tracer_close(struct trace_iterator *iter) 412static void irqsoff_tracer_close(struct trace_iterator *iter)
400{ 413{
401 if (iter->tr->ctrl) 414 /* restart tracing */
402 start_irqsoff_tracer(iter->tr); 415 tracer_enabled = save_tracer_enabled;
403} 416}
404 417
405#ifdef CONFIG_IRQSOFF_TRACER 418#ifdef CONFIG_IRQSOFF_TRACER
406static void irqsoff_tracer_init(struct trace_array *tr) 419static int irqsoff_tracer_init(struct trace_array *tr)
407{ 420{
408 trace_type = TRACER_IRQS_OFF; 421 trace_type = TRACER_IRQS_OFF;
409 422
410 __irqsoff_tracer_init(tr); 423 __irqsoff_tracer_init(tr);
424 return 0;
411} 425}
412static struct tracer irqsoff_tracer __read_mostly = 426static struct tracer irqsoff_tracer __read_mostly =
413{ 427{
414 .name = "irqsoff", 428 .name = "irqsoff",
415 .init = irqsoff_tracer_init, 429 .init = irqsoff_tracer_init,
416 .reset = irqsoff_tracer_reset, 430 .reset = irqsoff_tracer_reset,
431 .start = irqsoff_tracer_start,
432 .stop = irqsoff_tracer_stop,
417 .open = irqsoff_tracer_open, 433 .open = irqsoff_tracer_open,
418 .close = irqsoff_tracer_close, 434 .close = irqsoff_tracer_close,
419 .ctrl_update = irqsoff_tracer_ctrl_update,
420 .print_max = 1, 435 .print_max = 1,
421#ifdef CONFIG_FTRACE_SELFTEST 436#ifdef CONFIG_FTRACE_SELFTEST
422 .selftest = trace_selftest_startup_irqsoff, 437 .selftest = trace_selftest_startup_irqsoff,
@@ -428,11 +443,12 @@ static struct tracer irqsoff_tracer __read_mostly =
428#endif 443#endif
429 444
430#ifdef CONFIG_PREEMPT_TRACER 445#ifdef CONFIG_PREEMPT_TRACER
431static void preemptoff_tracer_init(struct trace_array *tr) 446static int preemptoff_tracer_init(struct trace_array *tr)
432{ 447{
433 trace_type = TRACER_PREEMPT_OFF; 448 trace_type = TRACER_PREEMPT_OFF;
434 449
435 __irqsoff_tracer_init(tr); 450 __irqsoff_tracer_init(tr);
451 return 0;
436} 452}
437 453
438static struct tracer preemptoff_tracer __read_mostly = 454static struct tracer preemptoff_tracer __read_mostly =
@@ -440,9 +456,10 @@ static struct tracer preemptoff_tracer __read_mostly =
440 .name = "preemptoff", 456 .name = "preemptoff",
441 .init = preemptoff_tracer_init, 457 .init = preemptoff_tracer_init,
442 .reset = irqsoff_tracer_reset, 458 .reset = irqsoff_tracer_reset,
459 .start = irqsoff_tracer_start,
460 .stop = irqsoff_tracer_stop,
443 .open = irqsoff_tracer_open, 461 .open = irqsoff_tracer_open,
444 .close = irqsoff_tracer_close, 462 .close = irqsoff_tracer_close,
445 .ctrl_update = irqsoff_tracer_ctrl_update,
446 .print_max = 1, 463 .print_max = 1,
447#ifdef CONFIG_FTRACE_SELFTEST 464#ifdef CONFIG_FTRACE_SELFTEST
448 .selftest = trace_selftest_startup_preemptoff, 465 .selftest = trace_selftest_startup_preemptoff,
@@ -456,11 +473,12 @@ static struct tracer preemptoff_tracer __read_mostly =
456#if defined(CONFIG_IRQSOFF_TRACER) && \ 473#if defined(CONFIG_IRQSOFF_TRACER) && \
457 defined(CONFIG_PREEMPT_TRACER) 474 defined(CONFIG_PREEMPT_TRACER)
458 475
459static void preemptirqsoff_tracer_init(struct trace_array *tr) 476static int preemptirqsoff_tracer_init(struct trace_array *tr)
460{ 477{
461 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; 478 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
462 479
463 __irqsoff_tracer_init(tr); 480 __irqsoff_tracer_init(tr);
481 return 0;
464} 482}
465 483
466static struct tracer preemptirqsoff_tracer __read_mostly = 484static struct tracer preemptirqsoff_tracer __read_mostly =
@@ -468,9 +486,10 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
468 .name = "preemptirqsoff", 486 .name = "preemptirqsoff",
469 .init = preemptirqsoff_tracer_init, 487 .init = preemptirqsoff_tracer_init,
470 .reset = irqsoff_tracer_reset, 488 .reset = irqsoff_tracer_reset,
489 .start = irqsoff_tracer_start,
490 .stop = irqsoff_tracer_stop,
471 .open = irqsoff_tracer_open, 491 .open = irqsoff_tracer_open,
472 .close = irqsoff_tracer_close, 492 .close = irqsoff_tracer_close,
473 .ctrl_update = irqsoff_tracer_ctrl_update,
474 .print_max = 1, 493 .print_max = 1,
475#ifdef CONFIG_FTRACE_SELFTEST 494#ifdef CONFIG_FTRACE_SELFTEST
476 .selftest = trace_selftest_startup_preemptirqsoff, 495 .selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index f28484618ff0..2a98a206acc2 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -18,46 +18,43 @@ struct header_iter {
18 18
19static struct trace_array *mmio_trace_array; 19static struct trace_array *mmio_trace_array;
20static bool overrun_detected; 20static bool overrun_detected;
21static unsigned long prev_overruns;
21 22
22static void mmio_reset_data(struct trace_array *tr) 23static void mmio_reset_data(struct trace_array *tr)
23{ 24{
24 int cpu; 25 int cpu;
25 26
26 overrun_detected = false; 27 overrun_detected = false;
28 prev_overruns = 0;
27 tr->time_start = ftrace_now(tr->cpu); 29 tr->time_start = ftrace_now(tr->cpu);
28 30
29 for_each_online_cpu(cpu) 31 for_each_online_cpu(cpu)
30 tracing_reset(tr, cpu); 32 tracing_reset(tr, cpu);
31} 33}
32 34
33static void mmio_trace_init(struct trace_array *tr) 35static int mmio_trace_init(struct trace_array *tr)
34{ 36{
35 pr_debug("in %s\n", __func__); 37 pr_debug("in %s\n", __func__);
36 mmio_trace_array = tr; 38 mmio_trace_array = tr;
37 if (tr->ctrl) { 39
38 mmio_reset_data(tr); 40 mmio_reset_data(tr);
39 enable_mmiotrace(); 41 enable_mmiotrace();
40 } 42 return 0;
41} 43}
42 44
43static void mmio_trace_reset(struct trace_array *tr) 45static void mmio_trace_reset(struct trace_array *tr)
44{ 46{
45 pr_debug("in %s\n", __func__); 47 pr_debug("in %s\n", __func__);
46 if (tr->ctrl) 48
47 disable_mmiotrace(); 49 disable_mmiotrace();
48 mmio_reset_data(tr); 50 mmio_reset_data(tr);
49 mmio_trace_array = NULL; 51 mmio_trace_array = NULL;
50} 52}
51 53
52static void mmio_trace_ctrl_update(struct trace_array *tr) 54static void mmio_trace_start(struct trace_array *tr)
53{ 55{
54 pr_debug("in %s\n", __func__); 56 pr_debug("in %s\n", __func__);
55 if (tr->ctrl) { 57 mmio_reset_data(tr);
56 mmio_reset_data(tr);
57 enable_mmiotrace();
58 } else {
59 disable_mmiotrace();
60 }
61} 58}
62 59
63static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) 60static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
@@ -128,16 +125,12 @@ static void mmio_close(struct trace_iterator *iter)
128 125
129static unsigned long count_overruns(struct trace_iterator *iter) 126static unsigned long count_overruns(struct trace_iterator *iter)
130{ 127{
131 int cpu;
132 unsigned long cnt = 0; 128 unsigned long cnt = 0;
133/* FIXME: */ 129 unsigned long over = ring_buffer_overruns(iter->tr->buffer);
134#if 0 130
135 for_each_online_cpu(cpu) { 131 if (over > prev_overruns)
136 cnt += iter->overrun[cpu]; 132 cnt = over - prev_overruns;
137 iter->overrun[cpu] = 0; 133 prev_overruns = over;
138 }
139#endif
140 (void)cpu;
141 return cnt; 134 return cnt;
142} 135}
143 136
@@ -298,10 +291,10 @@ static struct tracer mmio_tracer __read_mostly =
298 .name = "mmiotrace", 291 .name = "mmiotrace",
299 .init = mmio_trace_init, 292 .init = mmio_trace_init,
300 .reset = mmio_trace_reset, 293 .reset = mmio_trace_reset,
294 .start = mmio_trace_start,
301 .pipe_open = mmio_pipe_open, 295 .pipe_open = mmio_pipe_open,
302 .close = mmio_close, 296 .close = mmio_close,
303 .read = mmio_read, 297 .read = mmio_read,
304 .ctrl_update = mmio_trace_ctrl_update,
305 .print_line = mmio_print_line, 298 .print_line = mmio_print_line,
306}; 299};
307 300
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 4592b4862515..b9767acd30ac 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -12,6 +12,27 @@
12 12
13#include "trace.h" 13#include "trace.h"
14 14
15/* Our two options */
16enum {
17 TRACE_NOP_OPT_ACCEPT = 0x1,
18 TRACE_NOP_OPT_REFUSE = 0x2
19};
20
21/* Options for the tracer (see trace_options file) */
22static struct tracer_opt nop_opts[] = {
23 /* Option that will be accepted by set_flag callback */
24 { TRACER_OPT(test_nop_accept, TRACE_NOP_OPT_ACCEPT) },
25 /* Option that will be refused by set_flag callback */
26 { TRACER_OPT(test_nop_refuse, TRACE_NOP_OPT_REFUSE) },
27 { } /* Always set a last empty entry */
28};
29
30static struct tracer_flags nop_flags = {
31 /* You can check your flags value here when you want. */
32 .val = 0, /* By default: all flags disabled */
33 .opts = nop_opts
34};
35
15static struct trace_array *ctx_trace; 36static struct trace_array *ctx_trace;
16 37
17static void start_nop_trace(struct trace_array *tr) 38static void start_nop_trace(struct trace_array *tr)
@@ -24,7 +45,7 @@ static void stop_nop_trace(struct trace_array *tr)
24 /* Nothing to do! */ 45 /* Nothing to do! */
25} 46}
26 47
27static void nop_trace_init(struct trace_array *tr) 48static int nop_trace_init(struct trace_array *tr)
28{ 49{
29 int cpu; 50 int cpu;
30 ctx_trace = tr; 51 ctx_trace = tr;
@@ -32,33 +53,53 @@ static void nop_trace_init(struct trace_array *tr)
32 for_each_online_cpu(cpu) 53 for_each_online_cpu(cpu)
33 tracing_reset(tr, cpu); 54 tracing_reset(tr, cpu);
34 55
35 if (tr->ctrl) 56 start_nop_trace(tr);
36 start_nop_trace(tr); 57 return 0;
37} 58}
38 59
39static void nop_trace_reset(struct trace_array *tr) 60static void nop_trace_reset(struct trace_array *tr)
40{ 61{
41 if (tr->ctrl) 62 stop_nop_trace(tr);
42 stop_nop_trace(tr);
43} 63}
44 64
45static void nop_trace_ctrl_update(struct trace_array *tr) 65/* It only serves as a signal handler and a callback to
66 * accept or refuse tthe setting of a flag.
67 * If you don't implement it, then the flag setting will be
68 * automatically accepted.
69 */
70static int nop_set_flag(u32 old_flags, u32 bit, int set)
46{ 71{
47 /* When starting a new trace, reset the buffers */ 72 /*
48 if (tr->ctrl) 73 * Note that you don't need to update nop_flags.val yourself.
49 start_nop_trace(tr); 74 * The tracing Api will do it automatically if you return 0
50 else 75 */
51 stop_nop_trace(tr); 76 if (bit == TRACE_NOP_OPT_ACCEPT) {
77 printk(KERN_DEBUG "nop_test_accept flag set to %d: we accept."
78 " Now cat trace_options to see the result\n",
79 set);
80 return 0;
81 }
82
83 if (bit == TRACE_NOP_OPT_REFUSE) {
84 printk(KERN_DEBUG "nop_test_refuse flag set to %d: we refuse."
85 "Now cat trace_options to see the result\n",
86 set);
87 return -EINVAL;
88 }
89
90 return 0;
52} 91}
53 92
93
54struct tracer nop_trace __read_mostly = 94struct tracer nop_trace __read_mostly =
55{ 95{
56 .name = "nop", 96 .name = "nop",
57 .init = nop_trace_init, 97 .init = nop_trace_init,
58 .reset = nop_trace_reset, 98 .reset = nop_trace_reset,
59 .ctrl_update = nop_trace_ctrl_update,
60#ifdef CONFIG_FTRACE_SELFTEST 99#ifdef CONFIG_FTRACE_SELFTEST
61 .selftest = trace_selftest_startup_nop, 100 .selftest = trace_selftest_startup_nop,
62#endif 101#endif
102 .flags = &nop_flags,
103 .set_flag = nop_set_flag
63}; 104};
64 105
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index b8f56beb1a62..863390557b44 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -16,7 +16,8 @@
16 16
17static struct trace_array *ctx_trace; 17static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled; 18static int __read_mostly tracer_enabled;
19static atomic_t sched_ref; 19static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex);
20 21
21static void 22static void
22probe_sched_switch(struct rq *__rq, struct task_struct *prev, 23probe_sched_switch(struct rq *__rq, struct task_struct *prev,
@@ -27,7 +28,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
27 int cpu; 28 int cpu;
28 int pc; 29 int pc;
29 30
30 if (!atomic_read(&sched_ref)) 31 if (!sched_ref)
31 return; 32 return;
32 33
33 tracing_record_cmdline(prev); 34 tracing_record_cmdline(prev);
@@ -123,20 +124,18 @@ static void tracing_sched_unregister(void)
123 124
124static void tracing_start_sched_switch(void) 125static void tracing_start_sched_switch(void)
125{ 126{
126 long ref; 127 mutex_lock(&sched_register_mutex);
127 128 if (!(sched_ref++))
128 ref = atomic_inc_return(&sched_ref);
129 if (ref == 1)
130 tracing_sched_register(); 129 tracing_sched_register();
130 mutex_unlock(&sched_register_mutex);
131} 131}
132 132
133static void tracing_stop_sched_switch(void) 133static void tracing_stop_sched_switch(void)
134{ 134{
135 long ref; 135 mutex_lock(&sched_register_mutex);
136 136 if (!(--sched_ref))
137 ref = atomic_dec_and_test(&sched_ref);
138 if (ref)
139 tracing_sched_unregister(); 137 tracing_sched_unregister();
138 mutex_unlock(&sched_register_mutex);
140} 139}
141 140
142void tracing_start_cmdline_record(void) 141void tracing_start_cmdline_record(void)
@@ -149,40 +148,86 @@ void tracing_stop_cmdline_record(void)
149 tracing_stop_sched_switch(); 148 tracing_stop_sched_switch();
150} 149}
151 150
151/**
152 * tracing_start_sched_switch_record - start tracing context switches
153 *
154 * Turns on context switch tracing for a tracer.
155 */
156void tracing_start_sched_switch_record(void)
157{
158 if (unlikely(!ctx_trace)) {
159 WARN_ON(1);
160 return;
161 }
162
163 tracing_start_sched_switch();
164
165 mutex_lock(&sched_register_mutex);
166 tracer_enabled++;
167 mutex_unlock(&sched_register_mutex);
168}
169
170/**
171 * tracing_stop_sched_switch_record - start tracing context switches
172 *
173 * Turns off context switch tracing for a tracer.
174 */
175void tracing_stop_sched_switch_record(void)
176{
177 mutex_lock(&sched_register_mutex);
178 tracer_enabled--;
179 WARN_ON(tracer_enabled < 0);
180 mutex_unlock(&sched_register_mutex);
181
182 tracing_stop_sched_switch();
183}
184
185/**
186 * tracing_sched_switch_assign_trace - assign a trace array for ctx switch
187 * @tr: trace array pointer to assign
188 *
189 * Some tracers might want to record the context switches in their
190 * trace. This function lets those tracers assign the trace array
191 * to use.
192 */
193void tracing_sched_switch_assign_trace(struct trace_array *tr)
194{
195 ctx_trace = tr;
196}
197
152static void start_sched_trace(struct trace_array *tr) 198static void start_sched_trace(struct trace_array *tr)
153{ 199{
154 sched_switch_reset(tr); 200 sched_switch_reset(tr);
155 tracing_start_cmdline_record(); 201 tracing_start_sched_switch_record();
156 tracer_enabled = 1;
157} 202}
158 203
159static void stop_sched_trace(struct trace_array *tr) 204static void stop_sched_trace(struct trace_array *tr)
160{ 205{
161 tracer_enabled = 0; 206 tracing_stop_sched_switch_record();
162 tracing_stop_cmdline_record();
163} 207}
164 208
165static void sched_switch_trace_init(struct trace_array *tr) 209static int sched_switch_trace_init(struct trace_array *tr)
166{ 210{
167 ctx_trace = tr; 211 ctx_trace = tr;
168 212 start_sched_trace(tr);
169 if (tr->ctrl) 213 return 0;
170 start_sched_trace(tr);
171} 214}
172 215
173static void sched_switch_trace_reset(struct trace_array *tr) 216static void sched_switch_trace_reset(struct trace_array *tr)
174{ 217{
175 if (tr->ctrl) 218 if (sched_ref)
176 stop_sched_trace(tr); 219 stop_sched_trace(tr);
177} 220}
178 221
179static void sched_switch_trace_ctrl_update(struct trace_array *tr) 222static void sched_switch_trace_start(struct trace_array *tr)
180{ 223{
181 /* When starting a new trace, reset the buffers */ 224 sched_switch_reset(tr);
182 if (tr->ctrl) 225 tracing_start_sched_switch();
183 start_sched_trace(tr); 226}
184 else 227
185 stop_sched_trace(tr); 228static void sched_switch_trace_stop(struct trace_array *tr)
229{
230 tracing_stop_sched_switch();
186} 231}
187 232
188static struct tracer sched_switch_trace __read_mostly = 233static struct tracer sched_switch_trace __read_mostly =
@@ -190,7 +235,8 @@ static struct tracer sched_switch_trace __read_mostly =
190 .name = "sched_switch", 235 .name = "sched_switch",
191 .init = sched_switch_trace_init, 236 .init = sched_switch_trace_init,
192 .reset = sched_switch_trace_reset, 237 .reset = sched_switch_trace_reset,
193 .ctrl_update = sched_switch_trace_ctrl_update, 238 .start = sched_switch_trace_start,
239 .stop = sched_switch_trace_stop,
194#ifdef CONFIG_FTRACE_SELFTEST 240#ifdef CONFIG_FTRACE_SELFTEST
195 .selftest = trace_selftest_startup_sched_switch, 241 .selftest = trace_selftest_startup_sched_switch,
196#endif 242#endif
@@ -198,14 +244,6 @@ static struct tracer sched_switch_trace __read_mostly =
198 244
199__init static int init_sched_switch_trace(void) 245__init static int init_sched_switch_trace(void)
200{ 246{
201 int ret = 0;
202
203 if (atomic_read(&sched_ref))
204 ret = tracing_sched_register();
205 if (ret) {
206 pr_info("error registering scheduler trace\n");
207 return ret;
208 }
209 return register_tracer(&sched_switch_trace); 247 return register_tracer(&sched_switch_trace);
210} 248}
211device_initcall(init_sched_switch_trace); 249device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3ae93f16b565..0067b49746c1 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -50,8 +50,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
50 return; 50 return;
51 51
52 pc = preempt_count(); 52 pc = preempt_count();
53 resched = need_resched(); 53 resched = ftrace_preempt_disable();
54 preempt_disable_notrace();
55 54
56 cpu = raw_smp_processor_id(); 55 cpu = raw_smp_processor_id();
57 data = tr->data[cpu]; 56 data = tr->data[cpu];
@@ -81,15 +80,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
81 out: 80 out:
82 atomic_dec(&data->disabled); 81 atomic_dec(&data->disabled);
83 82
84 /* 83 ftrace_preempt_enable(resched);
85 * To prevent recursion from the scheduler, if the
86 * resched flag was set before we entered, then
87 * don't reschedule.
88 */
89 if (resched)
90 preempt_enable_no_resched_notrace();
91 else
92 preempt_enable_notrace();
93} 84}
94 85
95static struct ftrace_ops trace_ops __read_mostly = 86static struct ftrace_ops trace_ops __read_mostly =
@@ -271,6 +262,12 @@ out:
271 atomic_dec(&wakeup_trace->data[cpu]->disabled); 262 atomic_dec(&wakeup_trace->data[cpu]->disabled);
272} 263}
273 264
265/*
266 * save_tracer_enabled is used to save the state of the tracer_enabled
267 * variable when we disable it when we open a trace output file.
268 */
269static int save_tracer_enabled;
270
274static void start_wakeup_tracer(struct trace_array *tr) 271static void start_wakeup_tracer(struct trace_array *tr)
275{ 272{
276 int ret; 273 int ret;
@@ -309,7 +306,13 @@ static void start_wakeup_tracer(struct trace_array *tr)
309 306
310 register_ftrace_function(&trace_ops); 307 register_ftrace_function(&trace_ops);
311 308
312 tracer_enabled = 1; 309 if (tracing_is_enabled()) {
310 tracer_enabled = 1;
311 save_tracer_enabled = 1;
312 } else {
313 tracer_enabled = 0;
314 save_tracer_enabled = 0;
315 }
313 316
314 return; 317 return;
315fail_deprobe_wake_new: 318fail_deprobe_wake_new:
@@ -321,49 +324,53 @@ fail_deprobe:
321static void stop_wakeup_tracer(struct trace_array *tr) 324static void stop_wakeup_tracer(struct trace_array *tr)
322{ 325{
323 tracer_enabled = 0; 326 tracer_enabled = 0;
327 save_tracer_enabled = 0;
324 unregister_ftrace_function(&trace_ops); 328 unregister_ftrace_function(&trace_ops);
325 unregister_trace_sched_switch(probe_wakeup_sched_switch); 329 unregister_trace_sched_switch(probe_wakeup_sched_switch);
326 unregister_trace_sched_wakeup_new(probe_wakeup); 330 unregister_trace_sched_wakeup_new(probe_wakeup);
327 unregister_trace_sched_wakeup(probe_wakeup); 331 unregister_trace_sched_wakeup(probe_wakeup);
328} 332}
329 333
330static void wakeup_tracer_init(struct trace_array *tr) 334static int wakeup_tracer_init(struct trace_array *tr)
331{ 335{
332 wakeup_trace = tr; 336 wakeup_trace = tr;
333 337 start_wakeup_tracer(tr);
334 if (tr->ctrl) 338 return 0;
335 start_wakeup_tracer(tr);
336} 339}
337 340
338static void wakeup_tracer_reset(struct trace_array *tr) 341static void wakeup_tracer_reset(struct trace_array *tr)
339{ 342{
340 if (tr->ctrl) { 343 stop_wakeup_tracer(tr);
341 stop_wakeup_tracer(tr); 344 /* make sure we put back any tasks we are tracing */
342 /* make sure we put back any tasks we are tracing */ 345 wakeup_reset(tr);
343 wakeup_reset(tr); 346}
344 } 347
348static void wakeup_tracer_start(struct trace_array *tr)
349{
350 wakeup_reset(tr);
351 tracer_enabled = 1;
352 save_tracer_enabled = 1;
345} 353}
346 354
347static void wakeup_tracer_ctrl_update(struct trace_array *tr) 355static void wakeup_tracer_stop(struct trace_array *tr)
348{ 356{
349 if (tr->ctrl) 357 tracer_enabled = 0;
350 start_wakeup_tracer(tr); 358 save_tracer_enabled = 0;
351 else
352 stop_wakeup_tracer(tr);
353} 359}
354 360
355static void wakeup_tracer_open(struct trace_iterator *iter) 361static void wakeup_tracer_open(struct trace_iterator *iter)
356{ 362{
357 /* stop the trace while dumping */ 363 /* stop the trace while dumping */
358 if (iter->tr->ctrl) 364 tracer_enabled = 0;
359 stop_wakeup_tracer(iter->tr);
360} 365}
361 366
362static void wakeup_tracer_close(struct trace_iterator *iter) 367static void wakeup_tracer_close(struct trace_iterator *iter)
363{ 368{
364 /* forget about any processes we were recording */ 369 /* forget about any processes we were recording */
365 if (iter->tr->ctrl) 370 if (save_tracer_enabled) {
366 start_wakeup_tracer(iter->tr); 371 wakeup_reset(iter->tr);
372 tracer_enabled = 1;
373 }
367} 374}
368 375
369static struct tracer wakeup_tracer __read_mostly = 376static struct tracer wakeup_tracer __read_mostly =
@@ -371,9 +378,10 @@ static struct tracer wakeup_tracer __read_mostly =
371 .name = "wakeup", 378 .name = "wakeup",
372 .init = wakeup_tracer_init, 379 .init = wakeup_tracer_init,
373 .reset = wakeup_tracer_reset, 380 .reset = wakeup_tracer_reset,
381 .start = wakeup_tracer_start,
382 .stop = wakeup_tracer_stop,
374 .open = wakeup_tracer_open, 383 .open = wakeup_tracer_open,
375 .close = wakeup_tracer_close, 384 .close = wakeup_tracer_close,
376 .ctrl_update = wakeup_tracer_ctrl_update,
377 .print_max = 1, 385 .print_max = 1,
378#ifdef CONFIG_FTRACE_SELFTEST 386#ifdef CONFIG_FTRACE_SELFTEST
379 .selftest = trace_selftest_startup_wakeup, 387 .selftest = trace_selftest_startup_wakeup,
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 90bc752a7580..88c8eb70f54a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -13,6 +13,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
13 case TRACE_STACK: 13 case TRACE_STACK:
14 case TRACE_PRINT: 14 case TRACE_PRINT:
15 case TRACE_SPECIAL: 15 case TRACE_SPECIAL:
16 case TRACE_BRANCH:
16 return 1; 17 return 1;
17 } 18 }
18 return 0; 19 return 0;
@@ -51,7 +52,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
51 int cpu, ret = 0; 52 int cpu, ret = 0;
52 53
53 /* Don't allow flipping of max traces now */ 54 /* Don't allow flipping of max traces now */
54 raw_local_irq_save(flags); 55 local_irq_save(flags);
55 __raw_spin_lock(&ftrace_max_lock); 56 __raw_spin_lock(&ftrace_max_lock);
56 57
57 cnt = ring_buffer_entries(tr->buffer); 58 cnt = ring_buffer_entries(tr->buffer);
@@ -62,7 +63,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
62 break; 63 break;
63 } 64 }
64 __raw_spin_unlock(&ftrace_max_lock); 65 __raw_spin_unlock(&ftrace_max_lock);
65 raw_local_irq_restore(flags); 66 local_irq_restore(flags);
66 67
67 if (count) 68 if (count)
68 *count = cnt; 69 *count = cnt;
@@ -70,6 +71,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
70 return ret; 71 return ret;
71} 72}
72 73
74static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
75{
76 printk(KERN_WARNING "Failed to init %s tracer, init returned %d\n",
77 trace->name, init_ret);
78}
73#ifdef CONFIG_FUNCTION_TRACER 79#ifdef CONFIG_FUNCTION_TRACER
74 80
75#ifdef CONFIG_DYNAMIC_FTRACE 81#ifdef CONFIG_DYNAMIC_FTRACE
@@ -110,8 +116,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
110 ftrace_set_filter(func_name, strlen(func_name), 1); 116 ftrace_set_filter(func_name, strlen(func_name), 1);
111 117
112 /* enable tracing */ 118 /* enable tracing */
113 tr->ctrl = 1; 119 ret = trace->init(tr);
114 trace->init(tr); 120 if (ret) {
121 warn_failed_init_tracer(trace, ret);
122 goto out;
123 }
115 124
116 /* Sleep for a 1/10 of a second */ 125 /* Sleep for a 1/10 of a second */
117 msleep(100); 126 msleep(100);
@@ -134,13 +143,13 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
134 msleep(100); 143 msleep(100);
135 144
136 /* stop the tracing. */ 145 /* stop the tracing. */
137 tr->ctrl = 0; 146 tracing_stop();
138 trace->ctrl_update(tr);
139 ftrace_enabled = 0; 147 ftrace_enabled = 0;
140 148
141 /* check the trace buffer */ 149 /* check the trace buffer */
142 ret = trace_test_buffer(tr, &count); 150 ret = trace_test_buffer(tr, &count);
143 trace->reset(tr); 151 trace->reset(tr);
152 tracing_start();
144 153
145 /* we should only have one item */ 154 /* we should only have one item */
146 if (!ret && count != 1) { 155 if (!ret && count != 1) {
@@ -148,6 +157,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
148 ret = -1; 157 ret = -1;
149 goto out; 158 goto out;
150 } 159 }
160
151 out: 161 out:
152 ftrace_enabled = save_ftrace_enabled; 162 ftrace_enabled = save_ftrace_enabled;
153 tracer_enabled = save_tracer_enabled; 163 tracer_enabled = save_tracer_enabled;
@@ -180,18 +190,22 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
180 ftrace_enabled = 1; 190 ftrace_enabled = 1;
181 tracer_enabled = 1; 191 tracer_enabled = 1;
182 192
183 tr->ctrl = 1; 193 ret = trace->init(tr);
184 trace->init(tr); 194 if (ret) {
195 warn_failed_init_tracer(trace, ret);
196 goto out;
197 }
198
185 /* Sleep for a 1/10 of a second */ 199 /* Sleep for a 1/10 of a second */
186 msleep(100); 200 msleep(100);
187 /* stop the tracing. */ 201 /* stop the tracing. */
188 tr->ctrl = 0; 202 tracing_stop();
189 trace->ctrl_update(tr);
190 ftrace_enabled = 0; 203 ftrace_enabled = 0;
191 204
192 /* check the trace buffer */ 205 /* check the trace buffer */
193 ret = trace_test_buffer(tr, &count); 206 ret = trace_test_buffer(tr, &count);
194 trace->reset(tr); 207 trace->reset(tr);
208 tracing_start();
195 209
196 if (!ret && !count) { 210 if (!ret && !count) {
197 printk(KERN_CONT ".. no entries found .."); 211 printk(KERN_CONT ".. no entries found ..");
@@ -223,8 +237,12 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
223 int ret; 237 int ret;
224 238
225 /* start the tracing */ 239 /* start the tracing */
226 tr->ctrl = 1; 240 ret = trace->init(tr);
227 trace->init(tr); 241 if (ret) {
242 warn_failed_init_tracer(trace, ret);
243 return ret;
244 }
245
228 /* reset the max latency */ 246 /* reset the max latency */
229 tracing_max_latency = 0; 247 tracing_max_latency = 0;
230 /* disable interrupts for a bit */ 248 /* disable interrupts for a bit */
@@ -232,13 +250,13 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
232 udelay(100); 250 udelay(100);
233 local_irq_enable(); 251 local_irq_enable();
234 /* stop the tracing. */ 252 /* stop the tracing. */
235 tr->ctrl = 0; 253 tracing_stop();
236 trace->ctrl_update(tr);
237 /* check both trace buffers */ 254 /* check both trace buffers */
238 ret = trace_test_buffer(tr, NULL); 255 ret = trace_test_buffer(tr, NULL);
239 if (!ret) 256 if (!ret)
240 ret = trace_test_buffer(&max_tr, &count); 257 ret = trace_test_buffer(&max_tr, &count);
241 trace->reset(tr); 258 trace->reset(tr);
259 tracing_start();
242 260
243 if (!ret && !count) { 261 if (!ret && !count) {
244 printk(KERN_CONT ".. no entries found .."); 262 printk(KERN_CONT ".. no entries found ..");
@@ -259,9 +277,26 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
259 unsigned long count; 277 unsigned long count;
260 int ret; 278 int ret;
261 279
280 /*
281 * Now that the big kernel lock is no longer preemptable,
282 * and this is called with the BKL held, it will always
283 * fail. If preemption is already disabled, simply
284 * pass the test. When the BKL is removed, or becomes
285 * preemptible again, we will once again test this,
286 * so keep it in.
287 */
288 if (preempt_count()) {
289 printk(KERN_CONT "can not test ... force ");
290 return 0;
291 }
292
262 /* start the tracing */ 293 /* start the tracing */
263 tr->ctrl = 1; 294 ret = trace->init(tr);
264 trace->init(tr); 295 if (ret) {
296 warn_failed_init_tracer(trace, ret);
297 return ret;
298 }
299
265 /* reset the max latency */ 300 /* reset the max latency */
266 tracing_max_latency = 0; 301 tracing_max_latency = 0;
267 /* disable preemption for a bit */ 302 /* disable preemption for a bit */
@@ -269,13 +304,13 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
269 udelay(100); 304 udelay(100);
270 preempt_enable(); 305 preempt_enable();
271 /* stop the tracing. */ 306 /* stop the tracing. */
272 tr->ctrl = 0; 307 tracing_stop();
273 trace->ctrl_update(tr);
274 /* check both trace buffers */ 308 /* check both trace buffers */
275 ret = trace_test_buffer(tr, NULL); 309 ret = trace_test_buffer(tr, NULL);
276 if (!ret) 310 if (!ret)
277 ret = trace_test_buffer(&max_tr, &count); 311 ret = trace_test_buffer(&max_tr, &count);
278 trace->reset(tr); 312 trace->reset(tr);
313 tracing_start();
279 314
280 if (!ret && !count) { 315 if (!ret && !count) {
281 printk(KERN_CONT ".. no entries found .."); 316 printk(KERN_CONT ".. no entries found ..");
@@ -296,9 +331,25 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
296 unsigned long count; 331 unsigned long count;
297 int ret; 332 int ret;
298 333
334 /*
335 * Now that the big kernel lock is no longer preemptable,
336 * and this is called with the BKL held, it will always
337 * fail. If preemption is already disabled, simply
338 * pass the test. When the BKL is removed, or becomes
339 * preemptible again, we will once again test this,
340 * so keep it in.
341 */
342 if (preempt_count()) {
343 printk(KERN_CONT "can not test ... force ");
344 return 0;
345 }
346
299 /* start the tracing */ 347 /* start the tracing */
300 tr->ctrl = 1; 348 ret = trace->init(tr);
301 trace->init(tr); 349 if (ret) {
350 warn_failed_init_tracer(trace, ret);
351 goto out;
352 }
302 353
303 /* reset the max latency */ 354 /* reset the max latency */
304 tracing_max_latency = 0; 355 tracing_max_latency = 0;
@@ -312,27 +363,30 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
312 local_irq_enable(); 363 local_irq_enable();
313 364
314 /* stop the tracing. */ 365 /* stop the tracing. */
315 tr->ctrl = 0; 366 tracing_stop();
316 trace->ctrl_update(tr);
317 /* check both trace buffers */ 367 /* check both trace buffers */
318 ret = trace_test_buffer(tr, NULL); 368 ret = trace_test_buffer(tr, NULL);
319 if (ret) 369 if (ret) {
370 tracing_start();
320 goto out; 371 goto out;
372 }
321 373
322 ret = trace_test_buffer(&max_tr, &count); 374 ret = trace_test_buffer(&max_tr, &count);
323 if (ret) 375 if (ret) {
376 tracing_start();
324 goto out; 377 goto out;
378 }
325 379
326 if (!ret && !count) { 380 if (!ret && !count) {
327 printk(KERN_CONT ".. no entries found .."); 381 printk(KERN_CONT ".. no entries found ..");
328 ret = -1; 382 ret = -1;
383 tracing_start();
329 goto out; 384 goto out;
330 } 385 }
331 386
332 /* do the test by disabling interrupts first this time */ 387 /* do the test by disabling interrupts first this time */
333 tracing_max_latency = 0; 388 tracing_max_latency = 0;
334 tr->ctrl = 1; 389 tracing_start();
335 trace->ctrl_update(tr);
336 preempt_disable(); 390 preempt_disable();
337 local_irq_disable(); 391 local_irq_disable();
338 udelay(100); 392 udelay(100);
@@ -341,8 +395,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
341 local_irq_enable(); 395 local_irq_enable();
342 396
343 /* stop the tracing. */ 397 /* stop the tracing. */
344 tr->ctrl = 0; 398 tracing_stop();
345 trace->ctrl_update(tr);
346 /* check both trace buffers */ 399 /* check both trace buffers */
347 ret = trace_test_buffer(tr, NULL); 400 ret = trace_test_buffer(tr, NULL);
348 if (ret) 401 if (ret)
@@ -358,6 +411,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
358 411
359 out: 412 out:
360 trace->reset(tr); 413 trace->reset(tr);
414 tracing_start();
361 tracing_max_latency = save_max; 415 tracing_max_latency = save_max;
362 416
363 return ret; 417 return ret;
@@ -423,8 +477,12 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
423 wait_for_completion(&isrt); 477 wait_for_completion(&isrt);
424 478
425 /* start the tracing */ 479 /* start the tracing */
426 tr->ctrl = 1; 480 ret = trace->init(tr);
427 trace->init(tr); 481 if (ret) {
482 warn_failed_init_tracer(trace, ret);
483 return ret;
484 }
485
428 /* reset the max latency */ 486 /* reset the max latency */
429 tracing_max_latency = 0; 487 tracing_max_latency = 0;
430 488
@@ -448,8 +506,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
448 msleep(100); 506 msleep(100);
449 507
450 /* stop the tracing. */ 508 /* stop the tracing. */
451 tr->ctrl = 0; 509 tracing_stop();
452 trace->ctrl_update(tr);
453 /* check both trace buffers */ 510 /* check both trace buffers */
454 ret = trace_test_buffer(tr, NULL); 511 ret = trace_test_buffer(tr, NULL);
455 if (!ret) 512 if (!ret)
@@ -457,6 +514,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
457 514
458 515
459 trace->reset(tr); 516 trace->reset(tr);
517 tracing_start();
460 518
461 tracing_max_latency = save_max; 519 tracing_max_latency = save_max;
462 520
@@ -480,16 +538,20 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
480 int ret; 538 int ret;
481 539
482 /* start the tracing */ 540 /* start the tracing */
483 tr->ctrl = 1; 541 ret = trace->init(tr);
484 trace->init(tr); 542 if (ret) {
543 warn_failed_init_tracer(trace, ret);
544 return ret;
545 }
546
485 /* Sleep for a 1/10 of a second */ 547 /* Sleep for a 1/10 of a second */
486 msleep(100); 548 msleep(100);
487 /* stop the tracing. */ 549 /* stop the tracing. */
488 tr->ctrl = 0; 550 tracing_stop();
489 trace->ctrl_update(tr);
490 /* check the trace buffer */ 551 /* check the trace buffer */
491 ret = trace_test_buffer(tr, &count); 552 ret = trace_test_buffer(tr, &count);
492 trace->reset(tr); 553 trace->reset(tr);
554 tracing_start();
493 555
494 if (!ret && !count) { 556 if (!ret && !count) {
495 printk(KERN_CONT ".. no entries found .."); 557 printk(KERN_CONT ".. no entries found ..");
@@ -508,17 +570,48 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
508 int ret; 570 int ret;
509 571
510 /* start the tracing */ 572 /* start the tracing */
511 tr->ctrl = 1; 573 ret = trace->init(tr);
512 trace->init(tr); 574 if (ret) {
575 warn_failed_init_tracer(trace, ret);
576 return 0;
577 }
578
513 /* Sleep for a 1/10 of a second */ 579 /* Sleep for a 1/10 of a second */
514 msleep(100); 580 msleep(100);
515 /* stop the tracing. */ 581 /* stop the tracing. */
516 tr->ctrl = 0; 582 tracing_stop();
517 trace->ctrl_update(tr);
518 /* check the trace buffer */ 583 /* check the trace buffer */
519 ret = trace_test_buffer(tr, &count); 584 ret = trace_test_buffer(tr, &count);
520 trace->reset(tr); 585 trace->reset(tr);
586 tracing_start();
521 587
522 return ret; 588 return ret;
523} 589}
524#endif /* CONFIG_SYSPROF_TRACER */ 590#endif /* CONFIG_SYSPROF_TRACER */
591
592#ifdef CONFIG_BRANCH_TRACER
593int
594trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
595{
596 unsigned long count;
597 int ret;
598
599 /* start the tracing */
600 ret = trace->init(tr);
601 if (ret) {
602 warn_failed_init_tracer(trace, ret);
603 return ret;
604 }
605
606 /* Sleep for a 1/10 of a second */
607 msleep(100);
608 /* stop the tracing. */
609 tracing_stop();
610 /* check the trace buffer */
611 ret = trace_test_buffer(tr, &count);
612 trace->reset(tr);
613 tracing_start();
614
615 return ret;
616}
617#endif /* CONFIG_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index be682b62fe58..fde3be15c642 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -107,8 +107,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
107 if (unlikely(!ftrace_enabled || stack_trace_disabled)) 107 if (unlikely(!ftrace_enabled || stack_trace_disabled))
108 return; 108 return;
109 109
110 resched = need_resched(); 110 resched = ftrace_preempt_disable();
111 preempt_disable_notrace();
112 111
113 cpu = raw_smp_processor_id(); 112 cpu = raw_smp_processor_id();
114 /* no atomic needed, we only modify this variable by this cpu */ 113 /* no atomic needed, we only modify this variable by this cpu */
@@ -120,10 +119,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
120 out: 119 out:
121 per_cpu(trace_active, cpu)--; 120 per_cpu(trace_active, cpu)--;
122 /* prevent recursion in schedule */ 121 /* prevent recursion in schedule */
123 if (resched) 122 ftrace_preempt_enable(resched);
124 preempt_enable_no_resched_notrace();
125 else
126 preempt_enable_notrace();
127} 123}
128 124
129static struct ftrace_ops trace_ops __read_mostly = 125static struct ftrace_ops trace_ops __read_mostly =
@@ -184,11 +180,16 @@ static struct file_operations stack_max_size_fops = {
184static void * 180static void *
185t_next(struct seq_file *m, void *v, loff_t *pos) 181t_next(struct seq_file *m, void *v, loff_t *pos)
186{ 182{
187 long i = (long)m->private; 183 long i;
188 184
189 (*pos)++; 185 (*pos)++;
190 186
191 i++; 187 if (v == SEQ_START_TOKEN)
188 i = 0;
189 else {
190 i = *(long *)v;
191 i++;
192 }
192 193
193 if (i >= max_stack_trace.nr_entries || 194 if (i >= max_stack_trace.nr_entries ||
194 stack_dump_trace[i] == ULONG_MAX) 195 stack_dump_trace[i] == ULONG_MAX)
@@ -201,12 +202,15 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
201 202
202static void *t_start(struct seq_file *m, loff_t *pos) 203static void *t_start(struct seq_file *m, loff_t *pos)
203{ 204{
204 void *t = &m->private; 205 void *t = SEQ_START_TOKEN;
205 loff_t l = 0; 206 loff_t l = 0;
206 207
207 local_irq_disable(); 208 local_irq_disable();
208 __raw_spin_lock(&max_stack_lock); 209 __raw_spin_lock(&max_stack_lock);
209 210
211 if (*pos == 0)
212 return SEQ_START_TOKEN;
213
210 for (; t && l < *pos; t = t_next(m, t, &l)) 214 for (; t && l < *pos; t = t_next(m, t, &l))
211 ; 215 ;
212 216
@@ -235,10 +239,10 @@ static int trace_lookup_stack(struct seq_file *m, long i)
235 239
236static int t_show(struct seq_file *m, void *v) 240static int t_show(struct seq_file *m, void *v)
237{ 241{
238 long i = *(long *)v; 242 long i;
239 int size; 243 int size;
240 244
241 if (i < 0) { 245 if (v == SEQ_START_TOKEN) {
242 seq_printf(m, " Depth Size Location" 246 seq_printf(m, " Depth Size Location"
243 " (%d entries)\n" 247 " (%d entries)\n"
244 " ----- ---- --------\n", 248 " ----- ---- --------\n",
@@ -246,6 +250,8 @@ static int t_show(struct seq_file *m, void *v)
246 return 0; 250 return 0;
247 } 251 }
248 252
253 i = *(long *)v;
254
249 if (i >= max_stack_trace.nr_entries || 255 if (i >= max_stack_trace.nr_entries ||
250 stack_dump_trace[i] == ULONG_MAX) 256 stack_dump_trace[i] == ULONG_MAX)
251 return 0; 257 return 0;
@@ -275,10 +281,6 @@ static int stack_trace_open(struct inode *inode, struct file *file)
275 int ret; 281 int ret;
276 282
277 ret = seq_open(file, &stack_trace_seq_ops); 283 ret = seq_open(file, &stack_trace_seq_ops);
278 if (!ret) {
279 struct seq_file *m = file->private_data;
280 m->private = (void *)-1;
281 }
282 284
283 return ret; 285 return ret;
284} 286}
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 9587d3bcba55..54960edb96d0 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -261,27 +261,17 @@ static void stop_stack_trace(struct trace_array *tr)
261 mutex_unlock(&sample_timer_lock); 261 mutex_unlock(&sample_timer_lock);
262} 262}
263 263
264static void stack_trace_init(struct trace_array *tr) 264static int stack_trace_init(struct trace_array *tr)
265{ 265{
266 sysprof_trace = tr; 266 sysprof_trace = tr;
267 267
268 if (tr->ctrl) 268 start_stack_trace(tr);
269 start_stack_trace(tr); 269 return 0;
270} 270}
271 271
272static void stack_trace_reset(struct trace_array *tr) 272static void stack_trace_reset(struct trace_array *tr)
273{ 273{
274 if (tr->ctrl) 274 stop_stack_trace(tr);
275 stop_stack_trace(tr);
276}
277
278static void stack_trace_ctrl_update(struct trace_array *tr)
279{
280 /* When starting a new trace, reset the buffers */
281 if (tr->ctrl)
282 start_stack_trace(tr);
283 else
284 stop_stack_trace(tr);
285} 275}
286 276
287static struct tracer stack_trace __read_mostly = 277static struct tracer stack_trace __read_mostly =
@@ -289,7 +279,6 @@ static struct tracer stack_trace __read_mostly =
289 .name = "sysprof", 279 .name = "sysprof",
290 .init = stack_trace_init, 280 .init = stack_trace_init,
291 .reset = stack_trace_reset, 281 .reset = stack_trace_reset,
292 .ctrl_update = stack_trace_ctrl_update,
293#ifdef CONFIG_FTRACE_SELFTEST 282#ifdef CONFIG_FTRACE_SELFTEST
294 .selftest = trace_selftest_startup_sysprof, 283 .selftest = trace_selftest_startup_sysprof,
295#endif 284#endif
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index af8c85664882..79602740bbb5 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(tracepoints_mutex);
43 */ 43 */
44#define TRACEPOINT_HASH_BITS 6 44#define TRACEPOINT_HASH_BITS 6
45#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) 45#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
46static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
46 47
47/* 48/*
48 * Note about RCU : 49 * Note about RCU :
@@ -54,40 +55,43 @@ struct tracepoint_entry {
54 struct hlist_node hlist; 55 struct hlist_node hlist;
55 void **funcs; 56 void **funcs;
56 int refcount; /* Number of times armed. 0 if disarmed. */ 57 int refcount; /* Number of times armed. 0 if disarmed. */
57 struct rcu_head rcu;
58 void *oldptr;
59 unsigned char rcu_pending:1;
60 char name[0]; 58 char name[0];
61}; 59};
62 60
63static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; 61struct tp_probes {
62 union {
63 struct rcu_head rcu;
64 struct list_head list;
65 } u;
66 void *probes[0];
67};
64 68
65static void free_old_closure(struct rcu_head *head) 69static inline void *allocate_probes(int count)
66{ 70{
67 struct tracepoint_entry *entry = container_of(head, 71 struct tp_probes *p = kmalloc(count * sizeof(void *)
68 struct tracepoint_entry, rcu); 72 + sizeof(struct tp_probes), GFP_KERNEL);
69 kfree(entry->oldptr); 73 return p == NULL ? NULL : p->probes;
70 /* Make sure we free the data before setting the pending flag to 0 */
71 smp_wmb();
72 entry->rcu_pending = 0;
73} 74}
74 75
75static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) 76static void rcu_free_old_probes(struct rcu_head *head)
76{ 77{
77 if (!old) 78 kfree(container_of(head, struct tp_probes, u.rcu));
78 return; 79}
79 entry->oldptr = old; 80
80 entry->rcu_pending = 1; 81static inline void release_probes(void *old)
81 /* write rcu_pending before calling the RCU callback */ 82{
82 smp_wmb(); 83 if (old) {
83 call_rcu_sched(&entry->rcu, free_old_closure); 84 struct tp_probes *tp_probes = container_of(old,
85 struct tp_probes, probes[0]);
86 call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
87 }
84} 88}
85 89
86static void debug_print_probes(struct tracepoint_entry *entry) 90static void debug_print_probes(struct tracepoint_entry *entry)
87{ 91{
88 int i; 92 int i;
89 93
90 if (!tracepoint_debug) 94 if (!tracepoint_debug || !entry->funcs)
91 return; 95 return;
92 96
93 for (i = 0; entry->funcs[i]; i++) 97 for (i = 0; entry->funcs[i]; i++)
@@ -111,12 +115,13 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
111 return ERR_PTR(-EEXIST); 115 return ERR_PTR(-EEXIST);
112 } 116 }
113 /* + 2 : one for new probe, one for NULL func */ 117 /* + 2 : one for new probe, one for NULL func */
114 new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); 118 new = allocate_probes(nr_probes + 2);
115 if (new == NULL) 119 if (new == NULL)
116 return ERR_PTR(-ENOMEM); 120 return ERR_PTR(-ENOMEM);
117 if (old) 121 if (old)
118 memcpy(new, old, nr_probes * sizeof(void *)); 122 memcpy(new, old, nr_probes * sizeof(void *));
119 new[nr_probes] = probe; 123 new[nr_probes] = probe;
124 new[nr_probes + 1] = NULL;
120 entry->refcount = nr_probes + 1; 125 entry->refcount = nr_probes + 1;
121 entry->funcs = new; 126 entry->funcs = new;
122 debug_print_probes(entry); 127 debug_print_probes(entry);
@@ -132,7 +137,7 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
132 old = entry->funcs; 137 old = entry->funcs;
133 138
134 if (!old) 139 if (!old)
135 return NULL; 140 return ERR_PTR(-ENOENT);
136 141
137 debug_print_probes(entry); 142 debug_print_probes(entry);
138 /* (N -> M), (N > 1, M >= 0) probes */ 143 /* (N -> M), (N > 1, M >= 0) probes */
@@ -151,13 +156,13 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
151 int j = 0; 156 int j = 0;
152 /* N -> M, (N > 1, M > 0) */ 157 /* N -> M, (N > 1, M > 0) */
153 /* + 1 for NULL */ 158 /* + 1 for NULL */
154 new = kzalloc((nr_probes - nr_del + 1) 159 new = allocate_probes(nr_probes - nr_del + 1);
155 * sizeof(void *), GFP_KERNEL);
156 if (new == NULL) 160 if (new == NULL)
157 return ERR_PTR(-ENOMEM); 161 return ERR_PTR(-ENOMEM);
158 for (i = 0; old[i]; i++) 162 for (i = 0; old[i]; i++)
159 if ((probe && old[i] != probe)) 163 if ((probe && old[i] != probe))
160 new[j++] = old[i]; 164 new[j++] = old[i];
165 new[nr_probes - nr_del] = NULL;
161 entry->refcount = nr_probes - nr_del; 166 entry->refcount = nr_probes - nr_del;
162 entry->funcs = new; 167 entry->funcs = new;
163 } 168 }
@@ -215,7 +220,6 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
215 memcpy(&e->name[0], name, name_len); 220 memcpy(&e->name[0], name, name_len);
216 e->funcs = NULL; 221 e->funcs = NULL;
217 e->refcount = 0; 222 e->refcount = 0;
218 e->rcu_pending = 0;
219 hlist_add_head(&e->hlist, head); 223 hlist_add_head(&e->hlist, head);
220 return e; 224 return e;
221} 225}
@@ -224,32 +228,10 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
224 * Remove the tracepoint from the tracepoint hash table. Must be called with 228 * Remove the tracepoint from the tracepoint hash table. Must be called with
225 * mutex_lock held. 229 * mutex_lock held.
226 */ 230 */
227static int remove_tracepoint(const char *name) 231static inline void remove_tracepoint(struct tracepoint_entry *e)
228{ 232{
229 struct hlist_head *head;
230 struct hlist_node *node;
231 struct tracepoint_entry *e;
232 int found = 0;
233 size_t len = strlen(name) + 1;
234 u32 hash = jhash(name, len-1, 0);
235
236 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
237 hlist_for_each_entry(e, node, head, hlist) {
238 if (!strcmp(name, e->name)) {
239 found = 1;
240 break;
241 }
242 }
243 if (!found)
244 return -ENOENT;
245 if (e->refcount)
246 return -EBUSY;
247 hlist_del(&e->hlist); 233 hlist_del(&e->hlist);
248 /* Make sure the call_rcu_sched has been executed */
249 if (e->rcu_pending)
250 rcu_barrier_sched();
251 kfree(e); 234 kfree(e);
252 return 0;
253} 235}
254 236
255/* 237/*
@@ -280,6 +262,7 @@ static void set_tracepoint(struct tracepoint_entry **entry,
280static void disable_tracepoint(struct tracepoint *elem) 262static void disable_tracepoint(struct tracepoint *elem)
281{ 263{
282 elem->state = 0; 264 elem->state = 0;
265 rcu_assign_pointer(elem->funcs, NULL);
283} 266}
284 267
285/** 268/**
@@ -320,6 +303,23 @@ static void tracepoint_update_probes(void)
320 module_update_tracepoints(); 303 module_update_tracepoints();
321} 304}
322 305
306static void *tracepoint_add_probe(const char *name, void *probe)
307{
308 struct tracepoint_entry *entry;
309 void *old;
310
311 entry = get_tracepoint(name);
312 if (!entry) {
313 entry = add_tracepoint(name);
314 if (IS_ERR(entry))
315 return entry;
316 }
317 old = tracepoint_entry_add_probe(entry, probe);
318 if (IS_ERR(old) && !entry->refcount)
319 remove_tracepoint(entry);
320 return old;
321}
322
323/** 323/**
324 * tracepoint_probe_register - Connect a probe to a tracepoint 324 * tracepoint_probe_register - Connect a probe to a tracepoint
325 * @name: tracepoint name 325 * @name: tracepoint name
@@ -330,44 +330,36 @@ static void tracepoint_update_probes(void)
330 */ 330 */
331int tracepoint_probe_register(const char *name, void *probe) 331int tracepoint_probe_register(const char *name, void *probe)
332{ 332{
333 struct tracepoint_entry *entry;
334 int ret = 0;
335 void *old; 333 void *old;
336 334
337 mutex_lock(&tracepoints_mutex); 335 mutex_lock(&tracepoints_mutex);
338 entry = get_tracepoint(name); 336 old = tracepoint_add_probe(name, probe);
339 if (!entry) {
340 entry = add_tracepoint(name);
341 if (IS_ERR(entry)) {
342 ret = PTR_ERR(entry);
343 goto end;
344 }
345 }
346 /*
347 * If we detect that a call_rcu_sched is pending for this tracepoint,
348 * make sure it's executed now.
349 */
350 if (entry->rcu_pending)
351 rcu_barrier_sched();
352 old = tracepoint_entry_add_probe(entry, probe);
353 if (IS_ERR(old)) {
354 ret = PTR_ERR(old);
355 goto end;
356 }
357 mutex_unlock(&tracepoints_mutex); 337 mutex_unlock(&tracepoints_mutex);
338 if (IS_ERR(old))
339 return PTR_ERR(old);
340
358 tracepoint_update_probes(); /* may update entry */ 341 tracepoint_update_probes(); /* may update entry */
359 mutex_lock(&tracepoints_mutex); 342 release_probes(old);
360 entry = get_tracepoint(name); 343 return 0;
361 WARN_ON(!entry);
362 if (entry->rcu_pending)
363 rcu_barrier_sched();
364 tracepoint_entry_free_old(entry, old);
365end:
366 mutex_unlock(&tracepoints_mutex);
367 return ret;
368} 344}
369EXPORT_SYMBOL_GPL(tracepoint_probe_register); 345EXPORT_SYMBOL_GPL(tracepoint_probe_register);
370 346
347static void *tracepoint_remove_probe(const char *name, void *probe)
348{
349 struct tracepoint_entry *entry;
350 void *old;
351
352 entry = get_tracepoint(name);
353 if (!entry)
354 return ERR_PTR(-ENOENT);
355 old = tracepoint_entry_remove_probe(entry, probe);
356 if (IS_ERR(old))
357 return old;
358 if (!entry->refcount)
359 remove_tracepoint(entry);
360 return old;
361}
362
371/** 363/**
372 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint 364 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint
373 * @name: tracepoint name 365 * @name: tracepoint name
@@ -380,38 +372,104 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register);
380 */ 372 */
381int tracepoint_probe_unregister(const char *name, void *probe) 373int tracepoint_probe_unregister(const char *name, void *probe)
382{ 374{
383 struct tracepoint_entry *entry;
384 void *old; 375 void *old;
385 int ret = -ENOENT;
386 376
387 mutex_lock(&tracepoints_mutex); 377 mutex_lock(&tracepoints_mutex);
388 entry = get_tracepoint(name); 378 old = tracepoint_remove_probe(name, probe);
389 if (!entry)
390 goto end;
391 if (entry->rcu_pending)
392 rcu_barrier_sched();
393 old = tracepoint_entry_remove_probe(entry, probe);
394 if (!old) {
395 printk(KERN_WARNING "Warning: Trying to unregister a probe"
396 "that doesn't exist\n");
397 goto end;
398 }
399 mutex_unlock(&tracepoints_mutex); 379 mutex_unlock(&tracepoints_mutex);
380 if (IS_ERR(old))
381 return PTR_ERR(old);
382
400 tracepoint_update_probes(); /* may update entry */ 383 tracepoint_update_probes(); /* may update entry */
384 release_probes(old);
385 return 0;
386}
387EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
388
389static LIST_HEAD(old_probes);
390static int need_update;
391
392static void tracepoint_add_old_probes(void *old)
393{
394 need_update = 1;
395 if (old) {
396 struct tp_probes *tp_probes = container_of(old,
397 struct tp_probes, probes[0]);
398 list_add(&tp_probes->u.list, &old_probes);
399 }
400}
401
402/**
403 * tracepoint_probe_register_noupdate - register a probe but not connect
404 * @name: tracepoint name
405 * @probe: probe handler
406 *
407 * caller must call tracepoint_probe_update_all()
408 */
409int tracepoint_probe_register_noupdate(const char *name, void *probe)
410{
411 void *old;
412
401 mutex_lock(&tracepoints_mutex); 413 mutex_lock(&tracepoints_mutex);
402 entry = get_tracepoint(name); 414 old = tracepoint_add_probe(name, probe);
403 if (!entry) 415 if (IS_ERR(old)) {
404 goto end; 416 mutex_unlock(&tracepoints_mutex);
405 if (entry->rcu_pending) 417 return PTR_ERR(old);
406 rcu_barrier_sched(); 418 }
407 tracepoint_entry_free_old(entry, old); 419 tracepoint_add_old_probes(old);
408 remove_tracepoint(name); /* Ignore busy error message */
409 ret = 0;
410end:
411 mutex_unlock(&tracepoints_mutex); 420 mutex_unlock(&tracepoints_mutex);
412 return ret; 421 return 0;
413} 422}
414EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); 423EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
424
425/**
426 * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect
427 * @name: tracepoint name
428 * @probe: probe function pointer
429 *
430 * caller must call tracepoint_probe_update_all()
431 */
432int tracepoint_probe_unregister_noupdate(const char *name, void *probe)
433{
434 void *old;
435
436 mutex_lock(&tracepoints_mutex);
437 old = tracepoint_remove_probe(name, probe);
438 if (IS_ERR(old)) {
439 mutex_unlock(&tracepoints_mutex);
440 return PTR_ERR(old);
441 }
442 tracepoint_add_old_probes(old);
443 mutex_unlock(&tracepoints_mutex);
444 return 0;
445}
446EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
447
448/**
449 * tracepoint_probe_update_all - update tracepoints
450 */
451void tracepoint_probe_update_all(void)
452{
453 LIST_HEAD(release_probes);
454 struct tp_probes *pos, *next;
455
456 mutex_lock(&tracepoints_mutex);
457 if (!need_update) {
458 mutex_unlock(&tracepoints_mutex);
459 return;
460 }
461 if (!list_empty(&old_probes))
462 list_replace_init(&old_probes, &release_probes);
463 need_update = 0;
464 mutex_unlock(&tracepoints_mutex);
465
466 tracepoint_update_probes();
467 list_for_each_entry_safe(pos, next, &release_probes, u.list) {
468 list_del(&pos->u.list);
469 call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
470 }
471}
472EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
415 473
416/** 474/**
417 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. 475 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
@@ -483,3 +541,36 @@ void tracepoint_iter_reset(struct tracepoint_iter *iter)
483 iter->tracepoint = NULL; 541 iter->tracepoint = NULL;
484} 542}
485EXPORT_SYMBOL_GPL(tracepoint_iter_reset); 543EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
544
545#ifdef CONFIG_MODULES
546
547int tracepoint_module_notify(struct notifier_block *self,
548 unsigned long val, void *data)
549{
550 struct module *mod = data;
551
552 switch (val) {
553 case MODULE_STATE_COMING:
554 tracepoint_update_probe_range(mod->tracepoints,
555 mod->tracepoints + mod->num_tracepoints);
556 break;
557 case MODULE_STATE_GOING:
558 tracepoint_update_probe_range(mod->tracepoints,
559 mod->tracepoints + mod->num_tracepoints);
560 break;
561 }
562 return 0;
563}
564
565struct notifier_block tracepoint_module_nb = {
566 .notifier_call = tracepoint_module_notify,
567 .priority = 0,
568};
569
570static int init_tracepoints(void)
571{
572 return register_module_notifier(&tracepoint_module_nb);
573}
574__initcall(init_tracepoints);
575
576#endif /* CONFIG_MODULES */