aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--arch/x86/include/asm/debugreg.h22
-rw-r--r--arch/x86/include/asm/desc.h12
-rw-r--r--arch/x86/kernel/cpu/common.c24
-rw-r--r--arch/x86/kernel/entry_64.S218
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/nmi.c102
-rw-r--r--arch/x86/kernel/traps.c20
-rw-r--r--include/linux/compiler-gcc.h5
-rw-r--r--include/linux/ftrace.h77
-rw-r--r--kernel/trace/ftrace.c715
-rw-r--r--kernel/trace/trace_events_filter.c283
-rw-r--r--kernel/trace/trace_stack.c30
-rw-r--r--scripts/recordmcount.h2
-rw-r--r--tools/perf/Documentation/perf-list.txt2
-rw-r--r--tools/perf/MANIFEST1
-rw-r--r--tools/perf/builtin-annotate.c7
-rw-r--r--tools/perf/builtin-kmem.c3
-rw-r--r--tools/perf/builtin-kvm.c6
-rw-r--r--tools/perf/builtin-script.c4
-rw-r--r--tools/perf/builtin-test.c2
-rw-r--r--tools/perf/builtin-top.c5
-rw-r--r--tools/perf/util/evlist.c5
-rw-r--r--tools/perf/util/hist.c131
-rw-r--r--tools/perf/util/hist.h7
-rw-r--r--tools/perf/util/parse-events.c15
-rw-r--r--tools/perf/util/trace-event-info.c1
-rw-r--r--tools/perf/util/util.c15
-rw-r--r--tools/perf/util/util.h4
29 files changed, 1262 insertions, 468 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index eb93fd0ec734..b29f3c416296 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2475,6 +2475,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2475 stacktrace [FTRACE] 2475 stacktrace [FTRACE]
2476 Enabled the stack tracer on boot up. 2476 Enabled the stack tracer on boot up.
2477 2477
2478 stacktrace_filter=[function-list]
2479 [FTRACE] Limit the functions that the stack tracer
2480 will trace at boot up. function-list is a comma separated
2481 list of functions. This list can be changed at run
2482 time by the stack_trace_filter file in the debugfs
2483 tracing directory. Note, this enables stack tracing
2484 and the stacktrace above is not needed.
2485
2478 sti= [PARISC,HW] 2486 sti= [PARISC,HW]
2479 Format: <num> 2487 Format: <num>
2480 Set the STI (builtin display/keyboard on the HP-PARISC 2488 Set the STI (builtin display/keyboard on the HP-PARISC
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 078ad0caefc6..b903d5ea3941 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump);
101 101
102extern void hw_breakpoint_restore(void); 102extern void hw_breakpoint_restore(void);
103 103
104#ifdef CONFIG_X86_64
105DECLARE_PER_CPU(int, debug_stack_usage);
106static inline void debug_stack_usage_inc(void)
107{
108 __get_cpu_var(debug_stack_usage)++;
109}
110static inline void debug_stack_usage_dec(void)
111{
112 __get_cpu_var(debug_stack_usage)--;
113}
114int is_debug_stack(unsigned long addr);
115void debug_stack_set_zero(void);
116void debug_stack_reset(void);
117#else /* !X86_64 */
118static inline int is_debug_stack(unsigned long addr) { return 0; }
119static inline void debug_stack_set_zero(void) { }
120static inline void debug_stack_reset(void) { }
121static inline void debug_stack_usage_inc(void) { }
122static inline void debug_stack_usage_dec(void) { }
123#endif /* X86_64 */
124
125
104#endif /* __KERNEL__ */ 126#endif /* __KERNEL__ */
105 127
106#endif /* _ASM_X86_DEBUGREG_H */ 128#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 41935fadfdfc..e95822d683f4 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
35 35
36extern struct desc_ptr idt_descr; 36extern struct desc_ptr idt_descr;
37extern gate_desc idt_table[]; 37extern gate_desc idt_table[];
38extern struct desc_ptr nmi_idt_descr;
39extern gate_desc nmi_idt_table[];
38 40
39struct gdt_page { 41struct gdt_page {
40 struct desc_struct gdt[GDT_ENTRIES]; 42 struct desc_struct gdt[GDT_ENTRIES];
@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
307 desc->limit = (limit >> 16) & 0xf; 309 desc->limit = (limit >> 16) & 0xf;
308} 310}
309 311
312#ifdef CONFIG_X86_64
313static inline void set_nmi_gate(int gate, void *addr)
314{
315 gate_desc s;
316
317 pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
318 write_idt_entry(nmi_idt_table, gate, &s);
319}
320#endif
321
310static inline void _set_gate(int gate, unsigned type, void *addr, 322static inline void _set_gate(int gate, unsigned type, void *addr,
311 unsigned dpl, unsigned ist, unsigned seg) 323 unsigned dpl, unsigned ist, unsigned seg)
312{ 324{
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 850f2963a420..d43cad74f166 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1021,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid);
1021 1021
1022#ifdef CONFIG_X86_64 1022#ifdef CONFIG_X86_64
1023struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; 1023struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
1024struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
1025 (unsigned long) nmi_idt_table };
1024 1026
1025DEFINE_PER_CPU_FIRST(union irq_stack_union, 1027DEFINE_PER_CPU_FIRST(union irq_stack_union,
1026 irq_stack_union) __aligned(PAGE_SIZE); 1028 irq_stack_union) __aligned(PAGE_SIZE);
@@ -1085,6 +1087,26 @@ unsigned long kernel_eflags;
1085 */ 1087 */
1086DEFINE_PER_CPU(struct orig_ist, orig_ist); 1088DEFINE_PER_CPU(struct orig_ist, orig_ist);
1087 1089
1090static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
1091DEFINE_PER_CPU(int, debug_stack_usage);
1092
1093int is_debug_stack(unsigned long addr)
1094{
1095 return __get_cpu_var(debug_stack_usage) ||
1096 (addr <= __get_cpu_var(debug_stack_addr) &&
1097 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
1098}
1099
1100void debug_stack_set_zero(void)
1101{
1102 load_idt((const struct desc_ptr *)&nmi_idt_descr);
1103}
1104
1105void debug_stack_reset(void)
1106{
1107 load_idt((const struct desc_ptr *)&idt_descr);
1108}
1109
1088#else /* CONFIG_X86_64 */ 1110#else /* CONFIG_X86_64 */
1089 1111
1090DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 1112DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
@@ -1212,6 +1234,8 @@ void __cpuinit cpu_init(void)
1212 estacks += exception_stack_sizes[v]; 1234 estacks += exception_stack_sizes[v];
1213 oist->ist[v] = t->x86_tss.ist[v] = 1235 oist->ist[v] = t->x86_tss.ist[v] =
1214 (unsigned long)estacks; 1236 (unsigned long)estacks;
1237 if (v == DEBUG_STACK-1)
1238 per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
1215 } 1239 }
1216 } 1240 }
1217 1241
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a20e1cb9dc87..940ba711fc28 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1480,62 +1480,214 @@ ENTRY(error_exit)
1480 CFI_ENDPROC 1480 CFI_ENDPROC
1481END(error_exit) 1481END(error_exit)
1482 1482
1483/*
1484 * Test if a given stack is an NMI stack or not.
1485 */
1486 .macro test_in_nmi reg stack nmi_ret normal_ret
1487 cmpq %\reg, \stack
1488 ja \normal_ret
1489 subq $EXCEPTION_STKSZ, %\reg
1490 cmpq %\reg, \stack
1491 jb \normal_ret
1492 jmp \nmi_ret
1493 .endm
1483 1494
1484 /* runs on exception stack */ 1495 /* runs on exception stack */
1485ENTRY(nmi) 1496ENTRY(nmi)
1486 INTR_FRAME 1497 INTR_FRAME
1487 PARAVIRT_ADJUST_EXCEPTION_FRAME 1498 PARAVIRT_ADJUST_EXCEPTION_FRAME
1488 pushq_cfi $-1 1499 /*
1500 * We allow breakpoints in NMIs. If a breakpoint occurs, then
1501 * the iretq it performs will take us out of NMI context.
1502 * This means that we can have nested NMIs where the next
1503 * NMI is using the top of the stack of the previous NMI. We
1504 * can't let it execute because the nested NMI will corrupt the
1505 * stack of the previous NMI. NMI handlers are not re-entrant
1506 * anyway.
1507 *
1508 * To handle this case we do the following:
1509 * Check the a special location on the stack that contains
1510 * a variable that is set when NMIs are executing.
1511 * The interrupted task's stack is also checked to see if it
1512 * is an NMI stack.
1513 * If the variable is not set and the stack is not the NMI
1514 * stack then:
1515 * o Set the special variable on the stack
1516 * o Copy the interrupt frame into a "saved" location on the stack
1517 * o Copy the interrupt frame into a "copy" location on the stack
1518 * o Continue processing the NMI
1519 * If the variable is set or the previous stack is the NMI stack:
1520 * o Modify the "copy" location to jump to the repeate_nmi
1521 * o return back to the first NMI
1522 *
1523 * Now on exit of the first NMI, we first clear the stack variable
1524 * The NMI stack will tell any nested NMIs at that point that it is
1525 * nested. Then we pop the stack normally with iret, and if there was
1526 * a nested NMI that updated the copy interrupt stack frame, a
1527 * jump will be made to the repeat_nmi code that will handle the second
1528 * NMI.
1529 */
1530
1531 /* Use %rdx as out temp variable throughout */
1532 pushq_cfi %rdx
1533
1534 /*
1535 * Check the special variable on the stack to see if NMIs are
1536 * executing.
1537 */
1538 cmp $1, -8(%rsp)
1539 je nested_nmi
1540
1541 /*
1542 * Now test if the previous stack was an NMI stack.
1543 * We need the double check. We check the NMI stack to satisfy the
1544 * race when the first NMI clears the variable before returning.
1545 * We check the variable because the first NMI could be in a
1546 * breakpoint routine using a breakpoint stack.
1547 */
1548 lea 6*8(%rsp), %rdx
1549 test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
1550
1551nested_nmi:
1552 /*
1553 * Do nothing if we interrupted the fixup in repeat_nmi.
1554 * It's about to repeat the NMI handler, so we are fine
1555 * with ignoring this one.
1556 */
1557 movq $repeat_nmi, %rdx
1558 cmpq 8(%rsp), %rdx
1559 ja 1f
1560 movq $end_repeat_nmi, %rdx
1561 cmpq 8(%rsp), %rdx
1562 ja nested_nmi_out
1563
15641:
1565 /* Set up the interrupted NMIs stack to jump to repeat_nmi */
1566 leaq -6*8(%rsp), %rdx
1567 movq %rdx, %rsp
1568 CFI_ADJUST_CFA_OFFSET 6*8
1569 pushq_cfi $__KERNEL_DS
1570 pushq_cfi %rdx
1571 pushfq_cfi
1572 pushq_cfi $__KERNEL_CS
1573 pushq_cfi $repeat_nmi
1574
1575 /* Put stack back */
1576 addq $(11*8), %rsp
1577 CFI_ADJUST_CFA_OFFSET -11*8
1578
1579nested_nmi_out:
1580 popq_cfi %rdx
1581
1582 /* No need to check faults here */
1583 INTERRUPT_RETURN
1584
1585first_nmi:
1586 /*
1587 * Because nested NMIs will use the pushed location that we
1588 * stored in rdx, we must keep that space available.
1589 * Here's what our stack frame will look like:
1590 * +-------------------------+
1591 * | original SS |
1592 * | original Return RSP |
1593 * | original RFLAGS |
1594 * | original CS |
1595 * | original RIP |
1596 * +-------------------------+
1597 * | temp storage for rdx |
1598 * +-------------------------+
1599 * | NMI executing variable |
1600 * +-------------------------+
1601 * | Saved SS |
1602 * | Saved Return RSP |
1603 * | Saved RFLAGS |
1604 * | Saved CS |
1605 * | Saved RIP |
1606 * +-------------------------+
1607 * | copied SS |
1608 * | copied Return RSP |
1609 * | copied RFLAGS |
1610 * | copied CS |
1611 * | copied RIP |
1612 * +-------------------------+
1613 * | pt_regs |
1614 * +-------------------------+
1615 *
1616 * The saved RIP is used to fix up the copied RIP that a nested
1617 * NMI may zero out. The original stack frame and the temp storage
1618 * is also used by nested NMIs and can not be trusted on exit.
1619 */
1620 /* Set the NMI executing variable on the stack. */
1621 pushq_cfi $1
1622
1623 /* Copy the stack frame to the Saved frame */
1624 .rept 5
1625 pushq_cfi 6*8(%rsp)
1626 .endr
1627
1628 /* Make another copy, this one may be modified by nested NMIs */
1629 .rept 5
1630 pushq_cfi 4*8(%rsp)
1631 .endr
1632
1633 /* Do not pop rdx, nested NMIs will corrupt it */
1634 movq 11*8(%rsp), %rdx
1635
1636 /*
1637 * Everything below this point can be preempted by a nested
1638 * NMI if the first NMI took an exception. Repeated NMIs
1639 * caused by an exception and nested NMI will start here, and
1640 * can still be preempted by another NMI.
1641 */
1642restart_nmi:
1643 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1489 subq $ORIG_RAX-R15, %rsp 1644 subq $ORIG_RAX-R15, %rsp
1490 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1645 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1646 /*
1647 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
1648 * as we should not be calling schedule in NMI context.
1649 * Even with normal interrupts enabled. An NMI should not be
1650 * setting NEED_RESCHED or anything that normal interrupts and
1651 * exceptions might do.
1652 */
1491 call save_paranoid 1653 call save_paranoid
1492 DEFAULT_FRAME 0 1654 DEFAULT_FRAME 0
1493 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ 1655 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1494 movq %rsp,%rdi 1656 movq %rsp,%rdi
1495 movq $-1,%rsi 1657 movq $-1,%rsi
1496 call do_nmi 1658 call do_nmi
1497#ifdef CONFIG_TRACE_IRQFLAGS
1498 /* paranoidexit; without TRACE_IRQS_OFF */
1499 /* ebx: no swapgs flag */
1500 DISABLE_INTERRUPTS(CLBR_NONE)
1501 testl %ebx,%ebx /* swapgs needed? */ 1659 testl %ebx,%ebx /* swapgs needed? */
1502 jnz nmi_restore 1660 jnz nmi_restore
1503 testl $3,CS(%rsp)
1504 jnz nmi_userspace
1505nmi_swapgs: 1661nmi_swapgs:
1506 SWAPGS_UNSAFE_STACK 1662 SWAPGS_UNSAFE_STACK
1507nmi_restore: 1663nmi_restore:
1508 RESTORE_ALL 8 1664 RESTORE_ALL 8
1665 /* Clear the NMI executing stack variable */
1666 movq $0, 10*8(%rsp)
1509 jmp irq_return 1667 jmp irq_return
1510nmi_userspace:
1511 GET_THREAD_INFO(%rcx)
1512 movl TI_flags(%rcx),%ebx
1513 andl $_TIF_WORK_MASK,%ebx
1514 jz nmi_swapgs
1515 movq %rsp,%rdi /* &pt_regs */
1516 call sync_regs
1517 movq %rax,%rsp /* switch stack for scheduling */
1518 testl $_TIF_NEED_RESCHED,%ebx
1519 jnz nmi_schedule
1520 movl %ebx,%edx /* arg3: thread flags */
1521 ENABLE_INTERRUPTS(CLBR_NONE)
1522 xorl %esi,%esi /* arg2: oldset */
1523 movq %rsp,%rdi /* arg1: &pt_regs */
1524 call do_notify_resume
1525 DISABLE_INTERRUPTS(CLBR_NONE)
1526 jmp nmi_userspace
1527nmi_schedule:
1528 ENABLE_INTERRUPTS(CLBR_ANY)
1529 call schedule
1530 DISABLE_INTERRUPTS(CLBR_ANY)
1531 jmp nmi_userspace
1532 CFI_ENDPROC
1533#else
1534 jmp paranoid_exit
1535 CFI_ENDPROC 1668 CFI_ENDPROC
1536#endif
1537END(nmi) 1669END(nmi)
1538 1670
1671 /*
1672 * If an NMI hit an iret because of an exception or breakpoint,
1673 * it can lose its NMI context, and a nested NMI may come in.
1674 * In that case, the nested NMI will change the preempted NMI's
1675 * stack to jump to here when it does the final iret.
1676 */
1677repeat_nmi:
1678 INTR_FRAME
1679 /* Update the stack variable to say we are still in NMI */
1680 movq $1, 5*8(%rsp)
1681
1682 /* copy the saved stack back to copy stack */
1683 .rept 5
1684 pushq_cfi 4*8(%rsp)
1685 .endr
1686
1687 jmp restart_nmi
1688 CFI_ENDPROC
1689end_repeat_nmi:
1690
1539ENTRY(ignore_sysret) 1691ENTRY(ignore_sysret)
1540 CFI_STARTPROC 1692 CFI_STARTPROC
1541 mov $-ENOSYS,%eax 1693 mov $-ENOSYS,%eax
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index e11e39478a49..40f4eb3766d1 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -417,6 +417,10 @@ ENTRY(phys_base)
417ENTRY(idt_table) 417ENTRY(idt_table)
418 .skip IDT_ENTRIES * 16 418 .skip IDT_ENTRIES * 16
419 419
420 .align L1_CACHE_BYTES
421ENTRY(nmi_idt_table)
422 .skip IDT_ENTRIES * 16
423
420 __PAGE_ALIGNED_BSS 424 __PAGE_ALIGNED_BSS
421 .align PAGE_SIZE 425 .align PAGE_SIZE
422ENTRY(empty_zero_page) 426ENTRY(empty_zero_page)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index e88f37b58ddd..47acaf319165 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
405 unknown_nmi_error(reason, regs); 405 unknown_nmi_error(reason, regs);
406} 406}
407 407
408/*
409 * NMIs can hit breakpoints which will cause it to lose its
410 * NMI context with the CPU when the breakpoint does an iret.
411 */
412#ifdef CONFIG_X86_32
413/*
414 * For i386, NMIs use the same stack as the kernel, and we can
415 * add a workaround to the iret problem in C. Simply have 3 states
416 * the NMI can be in.
417 *
418 * 1) not running
419 * 2) executing
420 * 3) latched
421 *
422 * When no NMI is in progress, it is in the "not running" state.
423 * When an NMI comes in, it goes into the "executing" state.
424 * Normally, if another NMI is triggered, it does not interrupt
425 * the running NMI and the HW will simply latch it so that when
426 * the first NMI finishes, it will restart the second NMI.
427 * (Note, the latch is binary, thus multiple NMIs triggering,
428 * when one is running, are ignored. Only one NMI is restarted.)
429 *
430 * If an NMI hits a breakpoint that executes an iret, another
431 * NMI can preempt it. We do not want to allow this new NMI
432 * to run, but we want to execute it when the first one finishes.
433 * We set the state to "latched", and the first NMI will perform
434 * an cmpxchg on the state, and if it doesn't successfully
435 * reset the state to "not running" it will restart the next
436 * NMI.
437 */
438enum nmi_states {
439 NMI_NOT_RUNNING,
440 NMI_EXECUTING,
441 NMI_LATCHED,
442};
443static DEFINE_PER_CPU(enum nmi_states, nmi_state);
444
445#define nmi_nesting_preprocess(regs) \
446 do { \
447 if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \
448 __get_cpu_var(nmi_state) = NMI_LATCHED; \
449 return; \
450 } \
451 nmi_restart: \
452 __get_cpu_var(nmi_state) = NMI_EXECUTING; \
453 } while (0)
454
455#define nmi_nesting_postprocess() \
456 do { \
457 if (cmpxchg(&__get_cpu_var(nmi_state), \
458 NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \
459 goto nmi_restart; \
460 } while (0)
461#else /* x86_64 */
462/*
463 * In x86_64 things are a bit more difficult. This has the same problem
464 * where an NMI hitting a breakpoint that calls iret will remove the
465 * NMI context, allowing a nested NMI to enter. What makes this more
466 * difficult is that both NMIs and breakpoints have their own stack.
467 * When a new NMI or breakpoint is executed, the stack is set to a fixed
468 * point. If an NMI is nested, it will have its stack set at that same
469 * fixed address that the first NMI had, and will start corrupting the
470 * stack. This is handled in entry_64.S, but the same problem exists with
471 * the breakpoint stack.
472 *
473 * If a breakpoint is being processed, and the debug stack is being used,
474 * if an NMI comes in and also hits a breakpoint, the stack pointer
475 * will be set to the same fixed address as the breakpoint that was
476 * interrupted, causing that stack to be corrupted. To handle this case,
477 * check if the stack that was interrupted is the debug stack, and if
478 * so, change the IDT so that new breakpoints will use the current stack
479 * and not switch to the fixed address. On return of the NMI, switch back
480 * to the original IDT.
481 */
482static DEFINE_PER_CPU(int, update_debug_stack);
483
484static inline void nmi_nesting_preprocess(struct pt_regs *regs)
485{
486 /*
487 * If we interrupted a breakpoint, it is possible that
488 * the nmi handler will have breakpoints too. We need to
489 * change the IDT such that breakpoints that happen here
490 * continue to use the NMI stack.
491 */
492 if (unlikely(is_debug_stack(regs->sp))) {
493 debug_stack_set_zero();
494 __get_cpu_var(update_debug_stack) = 1;
495 }
496}
497
498static inline void nmi_nesting_postprocess(void)
499{
500 if (unlikely(__get_cpu_var(update_debug_stack)))
501 debug_stack_reset();
502}
503#endif
504
408dotraplinkage notrace __kprobes void 505dotraplinkage notrace __kprobes void
409do_nmi(struct pt_regs *regs, long error_code) 506do_nmi(struct pt_regs *regs, long error_code)
410{ 507{
508 nmi_nesting_preprocess(regs);
509
411 nmi_enter(); 510 nmi_enter();
412 511
413 inc_irq_stat(__nmi_count); 512 inc_irq_stat(__nmi_count);
@@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code)
416 default_do_nmi(regs); 515 default_do_nmi(regs);
417 516
418 nmi_exit(); 517 nmi_exit();
518
519 /* On i386, may loop back to preprocess */
520 nmi_nesting_postprocess();
419} 521}
420 522
421void stop_nmi(void) 523void stop_nmi(void)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index fa1191fb679d..482ec3af2067 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -311,9 +311,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
311 == NOTIFY_STOP) 311 == NOTIFY_STOP)
312 return; 312 return;
313 313
314 /*
315 * Let others (NMI) know that the debug stack is in use
316 * as we may switch to the interrupt stack.
317 */
318 debug_stack_usage_inc();
314 preempt_conditional_sti(regs); 319 preempt_conditional_sti(regs);
315 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); 320 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
316 preempt_conditional_cli(regs); 321 preempt_conditional_cli(regs);
322 debug_stack_usage_dec();
317} 323}
318 324
319#ifdef CONFIG_X86_64 325#ifdef CONFIG_X86_64
@@ -406,6 +412,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
406 SIGTRAP) == NOTIFY_STOP) 412 SIGTRAP) == NOTIFY_STOP)
407 return; 413 return;
408 414
415 /*
416 * Let others (NMI) know that the debug stack is in use
417 * as we may switch to the interrupt stack.
418 */
419 debug_stack_usage_inc();
420
409 /* It's safe to allow irq's after DR6 has been saved */ 421 /* It's safe to allow irq's after DR6 has been saved */
410 preempt_conditional_sti(regs); 422 preempt_conditional_sti(regs);
411 423
@@ -413,6 +425,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
413 handle_vm86_trap((struct kernel_vm86_regs *) regs, 425 handle_vm86_trap((struct kernel_vm86_regs *) regs,
414 error_code, 1); 426 error_code, 1);
415 preempt_conditional_cli(regs); 427 preempt_conditional_cli(regs);
428 debug_stack_usage_dec();
416 return; 429 return;
417 } 430 }
418 431
@@ -432,6 +445,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
432 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) 445 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
433 send_sigtrap(tsk, regs, error_code, si_code); 446 send_sigtrap(tsk, regs, error_code, si_code);
434 preempt_conditional_cli(regs); 447 preempt_conditional_cli(regs);
448 debug_stack_usage_dec();
435 449
436 return; 450 return;
437} 451}
@@ -718,4 +732,10 @@ void __init trap_init(void)
718 cpu_init(); 732 cpu_init();
719 733
720 x86_init.irqs.trap_init(); 734 x86_init.irqs.trap_init();
735
736#ifdef CONFIG_X86_64
737 memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16);
738 set_nmi_gate(1, &debug);
739 set_nmi_gate(3, &int3);
740#endif
721} 741}
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 59e4028e833d..3fd17c249221 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -50,6 +50,11 @@
50# define inline inline __attribute__((always_inline)) 50# define inline inline __attribute__((always_inline))
51# define __inline__ __inline__ __attribute__((always_inline)) 51# define __inline__ __inline__ __attribute__((always_inline))
52# define __inline __inline __attribute__((always_inline)) 52# define __inline __inline __attribute__((always_inline))
53#else
54/* A lot of inline functions can cause havoc with function tracing */
55# define inline inline notrace
56# define __inline__ __inline__ notrace
57# define __inline __inline notrace
53#endif 58#endif
54 59
55#define __deprecated __attribute__((deprecated)) 60#define __deprecated __attribute__((deprecated))
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 26eafcef75be..028e26f0bf08 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -133,6 +133,8 @@ struct ftrace_func_command {
133int ftrace_arch_code_modify_prepare(void); 133int ftrace_arch_code_modify_prepare(void);
134int ftrace_arch_code_modify_post_process(void); 134int ftrace_arch_code_modify_post_process(void);
135 135
136void ftrace_bug(int err, unsigned long ip);
137
136struct seq_file; 138struct seq_file;
137 139
138struct ftrace_probe_ops { 140struct ftrace_probe_ops {
@@ -161,7 +163,6 @@ extern int ftrace_text_reserved(void *start, void *end);
161 163
162enum { 164enum {
163 FTRACE_FL_ENABLED = (1 << 30), 165 FTRACE_FL_ENABLED = (1 << 30),
164 FTRACE_FL_FREE = (1 << 31),
165}; 166};
166 167
167#define FTRACE_FL_MASK (0x3UL << 30) 168#define FTRACE_FL_MASK (0x3UL << 30)
@@ -172,10 +173,7 @@ struct dyn_ftrace {
172 unsigned long ip; /* address of mcount call-site */ 173 unsigned long ip; /* address of mcount call-site */
173 struct dyn_ftrace *freelist; 174 struct dyn_ftrace *freelist;
174 }; 175 };
175 union { 176 unsigned long flags;
176 unsigned long flags;
177 struct dyn_ftrace *newlist;
178 };
179 struct dyn_arch_ftrace arch; 177 struct dyn_arch_ftrace arch;
180}; 178};
181 179
@@ -190,6 +188,56 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
190int register_ftrace_command(struct ftrace_func_command *cmd); 188int register_ftrace_command(struct ftrace_func_command *cmd);
191int unregister_ftrace_command(struct ftrace_func_command *cmd); 189int unregister_ftrace_command(struct ftrace_func_command *cmd);
192 190
191enum {
192 FTRACE_UPDATE_CALLS = (1 << 0),
193 FTRACE_DISABLE_CALLS = (1 << 1),
194 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
195 FTRACE_START_FUNC_RET = (1 << 3),
196 FTRACE_STOP_FUNC_RET = (1 << 4),
197};
198
199enum {
200 FTRACE_UPDATE_IGNORE,
201 FTRACE_UPDATE_MAKE_CALL,
202 FTRACE_UPDATE_MAKE_NOP,
203};
204
205enum {
206 FTRACE_ITER_FILTER = (1 << 0),
207 FTRACE_ITER_NOTRACE = (1 << 1),
208 FTRACE_ITER_PRINTALL = (1 << 2),
209 FTRACE_ITER_DO_HASH = (1 << 3),
210 FTRACE_ITER_HASH = (1 << 4),
211 FTRACE_ITER_ENABLED = (1 << 5),
212};
213
214void arch_ftrace_update_code(int command);
215
216struct ftrace_rec_iter;
217
218struct ftrace_rec_iter *ftrace_rec_iter_start(void);
219struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter);
220struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
221
222int ftrace_update_record(struct dyn_ftrace *rec, int enable);
223int ftrace_test_record(struct dyn_ftrace *rec, int enable);
224void ftrace_run_stop_machine(int command);
225int ftrace_location(unsigned long ip);
226
227extern ftrace_func_t ftrace_trace_function;
228
229int ftrace_regex_open(struct ftrace_ops *ops, int flag,
230 struct inode *inode, struct file *file);
231ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
232 size_t cnt, loff_t *ppos);
233ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
234 size_t cnt, loff_t *ppos);
235loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin);
236int ftrace_regex_release(struct inode *inode, struct file *file);
237
238void __init
239ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
240
193/* defined in arch */ 241/* defined in arch */
194extern int ftrace_ip_converted(unsigned long ip); 242extern int ftrace_ip_converted(unsigned long ip);
195extern int ftrace_dyn_arch_init(void *data); 243extern int ftrace_dyn_arch_init(void *data);
@@ -284,6 +332,25 @@ static inline int ftrace_text_reserved(void *start, void *end)
284{ 332{
285 return 0; 333 return 0;
286} 334}
335
336/*
337 * Again users of functions that have ftrace_ops may not
338 * have them defined when ftrace is not enabled, but these
339 * functions may still be called. Use a macro instead of inline.
340 */
341#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
342#define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
343
344static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
345 size_t cnt, loff_t *ppos) { return -ENODEV; }
346static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
347 size_t cnt, loff_t *ppos) { return -ENODEV; }
348static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
349{
350 return -ENODEV;
351}
352static inline int
353ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; }
287#endif /* CONFIG_DYNAMIC_FTRACE */ 354#endif /* CONFIG_DYNAMIC_FTRACE */
288 355
289/* totally disable ftrace - can not re-enable after this */ 356/* totally disable ftrace - can not re-enable after this */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b1e8943fed1d..683d559a0eef 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,11 +22,13 @@
22#include <linux/hardirq.h> 22#include <linux/hardirq.h>
23#include <linux/kthread.h> 23#include <linux/kthread.h>
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/bsearch.h>
25#include <linux/module.h> 26#include <linux/module.h>
26#include <linux/ftrace.h> 27#include <linux/ftrace.h>
27#include <linux/sysctl.h> 28#include <linux/sysctl.h>
28#include <linux/slab.h> 29#include <linux/slab.h>
29#include <linux/ctype.h> 30#include <linux/ctype.h>
31#include <linux/sort.h>
30#include <linux/list.h> 32#include <linux/list.h>
31#include <linux/hash.h> 33#include <linux/hash.h>
32#include <linux/rcupdate.h> 34#include <linux/rcupdate.h>
@@ -947,13 +949,6 @@ struct ftrace_func_probe {
947 struct rcu_head rcu; 949 struct rcu_head rcu;
948}; 950};
949 951
950enum {
951 FTRACE_ENABLE_CALLS = (1 << 0),
952 FTRACE_DISABLE_CALLS = (1 << 1),
953 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
954 FTRACE_START_FUNC_RET = (1 << 3),
955 FTRACE_STOP_FUNC_RET = (1 << 4),
956};
957struct ftrace_func_entry { 952struct ftrace_func_entry {
958 struct hlist_node hlist; 953 struct hlist_node hlist;
959 unsigned long ip; 954 unsigned long ip;
@@ -984,18 +979,19 @@ static struct ftrace_ops global_ops = {
984 .filter_hash = EMPTY_HASH, 979 .filter_hash = EMPTY_HASH,
985}; 980};
986 981
987static struct dyn_ftrace *ftrace_new_addrs;
988
989static DEFINE_MUTEX(ftrace_regex_lock); 982static DEFINE_MUTEX(ftrace_regex_lock);
990 983
991struct ftrace_page { 984struct ftrace_page {
992 struct ftrace_page *next; 985 struct ftrace_page *next;
986 struct dyn_ftrace *records;
993 int index; 987 int index;
994 struct dyn_ftrace records[]; 988 int size;
995}; 989};
996 990
997#define ENTRIES_PER_PAGE \ 991static struct ftrace_page *ftrace_new_pgs;
998 ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) 992
993#define ENTRY_SIZE sizeof(struct dyn_ftrace)
994#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)
999 995
1000/* estimate from running different kernels */ 996/* estimate from running different kernels */
1001#define NR_TO_INIT 10000 997#define NR_TO_INIT 10000
@@ -1003,7 +999,10 @@ struct ftrace_page {
1003static struct ftrace_page *ftrace_pages_start; 999static struct ftrace_page *ftrace_pages_start;
1004static struct ftrace_page *ftrace_pages; 1000static struct ftrace_page *ftrace_pages;
1005 1001
1006static struct dyn_ftrace *ftrace_free_records; 1002static bool ftrace_hash_empty(struct ftrace_hash *hash)
1003{
1004 return !hash || !hash->count;
1005}
1007 1006
1008static struct ftrace_func_entry * 1007static struct ftrace_func_entry *
1009ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) 1008ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
@@ -1013,7 +1012,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
1013 struct hlist_head *hhd; 1012 struct hlist_head *hhd;
1014 struct hlist_node *n; 1013 struct hlist_node *n;
1015 1014
1016 if (!hash->count) 1015 if (ftrace_hash_empty(hash))
1017 return NULL; 1016 return NULL;
1018 1017
1019 if (hash->size_bits > 0) 1018 if (hash->size_bits > 0)
@@ -1157,7 +1156,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
1157 return NULL; 1156 return NULL;
1158 1157
1159 /* Empty hash? */ 1158 /* Empty hash? */
1160 if (!hash || !hash->count) 1159 if (ftrace_hash_empty(hash))
1161 return new_hash; 1160 return new_hash;
1162 1161
1163 size = 1 << hash->size_bits; 1162 size = 1 << hash->size_bits;
@@ -1282,9 +1281,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
1282 filter_hash = rcu_dereference_raw(ops->filter_hash); 1281 filter_hash = rcu_dereference_raw(ops->filter_hash);
1283 notrace_hash = rcu_dereference_raw(ops->notrace_hash); 1282 notrace_hash = rcu_dereference_raw(ops->notrace_hash);
1284 1283
1285 if ((!filter_hash || !filter_hash->count || 1284 if ((ftrace_hash_empty(filter_hash) ||
1286 ftrace_lookup_ip(filter_hash, ip)) && 1285 ftrace_lookup_ip(filter_hash, ip)) &&
1287 (!notrace_hash || !notrace_hash->count || 1286 (ftrace_hash_empty(notrace_hash) ||
1288 !ftrace_lookup_ip(notrace_hash, ip))) 1287 !ftrace_lookup_ip(notrace_hash, ip)))
1289 ret = 1; 1288 ret = 1;
1290 else 1289 else
@@ -1307,6 +1306,47 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
1307 } \ 1306 } \
1308 } 1307 }
1309 1308
1309
1310static int ftrace_cmp_recs(const void *a, const void *b)
1311{
1312 const struct dyn_ftrace *reca = a;
1313 const struct dyn_ftrace *recb = b;
1314
1315 if (reca->ip > recb->ip)
1316 return 1;
1317 if (reca->ip < recb->ip)
1318 return -1;
1319 return 0;
1320}
1321
1322/**
1323 * ftrace_location - return true if the ip giving is a traced location
1324 * @ip: the instruction pointer to check
1325 *
1326 * Returns 1 if @ip given is a pointer to a ftrace location.
1327 * That is, the instruction that is either a NOP or call to
1328 * the function tracer. It checks the ftrace internal tables to
1329 * determine if the address belongs or not.
1330 */
1331int ftrace_location(unsigned long ip)
1332{
1333 struct ftrace_page *pg;
1334 struct dyn_ftrace *rec;
1335 struct dyn_ftrace key;
1336
1337 key.ip = ip;
1338
1339 for (pg = ftrace_pages_start; pg; pg = pg->next) {
1340 rec = bsearch(&key, pg->records, pg->index,
1341 sizeof(struct dyn_ftrace),
1342 ftrace_cmp_recs);
1343 if (rec)
1344 return 1;
1345 }
1346
1347 return 0;
1348}
1349
1310static void __ftrace_hash_rec_update(struct ftrace_ops *ops, 1350static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1311 int filter_hash, 1351 int filter_hash,
1312 bool inc) 1352 bool inc)
@@ -1336,7 +1376,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1336 if (filter_hash) { 1376 if (filter_hash) {
1337 hash = ops->filter_hash; 1377 hash = ops->filter_hash;
1338 other_hash = ops->notrace_hash; 1378 other_hash = ops->notrace_hash;
1339 if (!hash || !hash->count) 1379 if (ftrace_hash_empty(hash))
1340 all = 1; 1380 all = 1;
1341 } else { 1381 } else {
1342 inc = !inc; 1382 inc = !inc;
@@ -1346,7 +1386,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1346 * If the notrace hash has no items, 1386 * If the notrace hash has no items,
1347 * then there's nothing to do. 1387 * then there's nothing to do.
1348 */ 1388 */
1349 if (hash && !hash->count) 1389 if (ftrace_hash_empty(hash))
1350 return; 1390 return;
1351 } 1391 }
1352 1392
@@ -1363,8 +1403,8 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1363 if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip)) 1403 if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip))
1364 match = 1; 1404 match = 1;
1365 } else { 1405 } else {
1366 in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip); 1406 in_hash = !!ftrace_lookup_ip(hash, rec->ip);
1367 in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip); 1407 in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
1368 1408
1369 /* 1409 /*
1370 * 1410 *
@@ -1372,7 +1412,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1372 if (filter_hash && in_hash && !in_other_hash) 1412 if (filter_hash && in_hash && !in_other_hash)
1373 match = 1; 1413 match = 1;
1374 else if (!filter_hash && in_hash && 1414 else if (!filter_hash && in_hash &&
1375 (in_other_hash || !other_hash->count)) 1415 (in_other_hash || ftrace_hash_empty(other_hash)))
1376 match = 1; 1416 match = 1;
1377 } 1417 }
1378 if (!match) 1418 if (!match)
@@ -1406,40 +1446,12 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
1406 __ftrace_hash_rec_update(ops, filter_hash, 1); 1446 __ftrace_hash_rec_update(ops, filter_hash, 1);
1407} 1447}
1408 1448
1409static void ftrace_free_rec(struct dyn_ftrace *rec)
1410{
1411 rec->freelist = ftrace_free_records;
1412 ftrace_free_records = rec;
1413 rec->flags |= FTRACE_FL_FREE;
1414}
1415
1416static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 1449static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
1417{ 1450{
1418 struct dyn_ftrace *rec; 1451 if (ftrace_pages->index == ftrace_pages->size) {
1419 1452 /* We should have allocated enough */
1420 /* First check for freed records */ 1453 if (WARN_ON(!ftrace_pages->next))
1421 if (ftrace_free_records) {
1422 rec = ftrace_free_records;
1423
1424 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
1425 FTRACE_WARN_ON_ONCE(1);
1426 ftrace_free_records = NULL;
1427 return NULL; 1454 return NULL;
1428 }
1429
1430 ftrace_free_records = rec->freelist;
1431 memset(rec, 0, sizeof(*rec));
1432 return rec;
1433 }
1434
1435 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
1436 if (!ftrace_pages->next) {
1437 /* allocate another page */
1438 ftrace_pages->next =
1439 (void *)get_zeroed_page(GFP_KERNEL);
1440 if (!ftrace_pages->next)
1441 return NULL;
1442 }
1443 ftrace_pages = ftrace_pages->next; 1455 ftrace_pages = ftrace_pages->next;
1444 } 1456 }
1445 1457
@@ -1459,8 +1471,6 @@ ftrace_record_ip(unsigned long ip)
1459 return NULL; 1471 return NULL;
1460 1472
1461 rec->ip = ip; 1473 rec->ip = ip;
1462 rec->newlist = ftrace_new_addrs;
1463 ftrace_new_addrs = rec;
1464 1474
1465 return rec; 1475 return rec;
1466} 1476}
@@ -1475,7 +1485,19 @@ static void print_ip_ins(const char *fmt, unsigned char *p)
1475 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); 1485 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
1476} 1486}
1477 1487
1478static void ftrace_bug(int failed, unsigned long ip) 1488/**
1489 * ftrace_bug - report and shutdown function tracer
1490 * @failed: The failed type (EFAULT, EINVAL, EPERM)
1491 * @ip: The address that failed
1492 *
1493 * The arch code that enables or disables the function tracing
1494 * can call ftrace_bug() when it has detected a problem in
1495 * modifying the code. @failed should be one of either:
1496 * EFAULT - if the problem happens on reading the @ip address
1497 * EINVAL - if what is read at @ip is not what was expected
1498 * EPERM - if the problem happens on writting to the @ip address
1499 */
1500void ftrace_bug(int failed, unsigned long ip)
1479{ 1501{
1480 switch (failed) { 1502 switch (failed) {
1481 case -EFAULT: 1503 case -EFAULT:
@@ -1517,24 +1539,19 @@ int ftrace_text_reserved(void *start, void *end)
1517 return 0; 1539 return 0;
1518} 1540}
1519 1541
1520 1542static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
1521static int
1522__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1523{ 1543{
1524 unsigned long ftrace_addr;
1525 unsigned long flag = 0UL; 1544 unsigned long flag = 0UL;
1526 1545
1527 ftrace_addr = (unsigned long)FTRACE_ADDR;
1528
1529 /* 1546 /*
1530 * If we are enabling tracing: 1547 * If we are updating calls:
1531 * 1548 *
1532 * If the record has a ref count, then we need to enable it 1549 * If the record has a ref count, then we need to enable it
1533 * because someone is using it. 1550 * because someone is using it.
1534 * 1551 *
1535 * Otherwise we make sure its disabled. 1552 * Otherwise we make sure its disabled.
1536 * 1553 *
1537 * If we are disabling tracing, then disable all records that 1554 * If we are disabling calls, then disable all records that
1538 * are enabled. 1555 * are enabled.
1539 */ 1556 */
1540 if (enable && (rec->flags & ~FTRACE_FL_MASK)) 1557 if (enable && (rec->flags & ~FTRACE_FL_MASK))
@@ -1542,18 +1559,72 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1542 1559
1543 /* If the state of this record hasn't changed, then do nothing */ 1560 /* If the state of this record hasn't changed, then do nothing */
1544 if ((rec->flags & FTRACE_FL_ENABLED) == flag) 1561 if ((rec->flags & FTRACE_FL_ENABLED) == flag)
1545 return 0; 1562 return FTRACE_UPDATE_IGNORE;
1546 1563
1547 if (flag) { 1564 if (flag) {
1548 rec->flags |= FTRACE_FL_ENABLED; 1565 if (update)
1566 rec->flags |= FTRACE_FL_ENABLED;
1567 return FTRACE_UPDATE_MAKE_CALL;
1568 }
1569
1570 if (update)
1571 rec->flags &= ~FTRACE_FL_ENABLED;
1572
1573 return FTRACE_UPDATE_MAKE_NOP;
1574}
1575
1576/**
1577 * ftrace_update_record, set a record that now is tracing or not
1578 * @rec: the record to update
1579 * @enable: set to 1 if the record is tracing, zero to force disable
1580 *
1581 * The records that represent all functions that can be traced need
1582 * to be updated when tracing has been enabled.
1583 */
1584int ftrace_update_record(struct dyn_ftrace *rec, int enable)
1585{
1586 return ftrace_check_record(rec, enable, 1);
1587}
1588
1589/**
1590 * ftrace_test_record, check if the record has been enabled or not
1591 * @rec: the record to test
1592 * @enable: set to 1 to check if enabled, 0 if it is disabled
1593 *
1594 * The arch code may need to test if a record is already set to
1595 * tracing to determine how to modify the function code that it
1596 * represents.
1597 */
1598int ftrace_test_record(struct dyn_ftrace *rec, int enable)
1599{
1600 return ftrace_check_record(rec, enable, 0);
1601}
1602
1603static int
1604__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1605{
1606 unsigned long ftrace_addr;
1607 int ret;
1608
1609 ftrace_addr = (unsigned long)FTRACE_ADDR;
1610
1611 ret = ftrace_update_record(rec, enable);
1612
1613 switch (ret) {
1614 case FTRACE_UPDATE_IGNORE:
1615 return 0;
1616
1617 case FTRACE_UPDATE_MAKE_CALL:
1549 return ftrace_make_call(rec, ftrace_addr); 1618 return ftrace_make_call(rec, ftrace_addr);
1619
1620 case FTRACE_UPDATE_MAKE_NOP:
1621 return ftrace_make_nop(NULL, rec, ftrace_addr);
1550 } 1622 }
1551 1623
1552 rec->flags &= ~FTRACE_FL_ENABLED; 1624 return -1; /* unknow ftrace bug */
1553 return ftrace_make_nop(NULL, rec, ftrace_addr);
1554} 1625}
1555 1626
1556static void ftrace_replace_code(int enable) 1627static void ftrace_replace_code(int update)
1557{ 1628{
1558 struct dyn_ftrace *rec; 1629 struct dyn_ftrace *rec;
1559 struct ftrace_page *pg; 1630 struct ftrace_page *pg;
@@ -1563,11 +1634,7 @@ static void ftrace_replace_code(int enable)
1563 return; 1634 return;
1564 1635
1565 do_for_each_ftrace_rec(pg, rec) { 1636 do_for_each_ftrace_rec(pg, rec) {
1566 /* Skip over free records */ 1637 failed = __ftrace_replace_code(rec, update);
1567 if (rec->flags & FTRACE_FL_FREE)
1568 continue;
1569
1570 failed = __ftrace_replace_code(rec, enable);
1571 if (failed) { 1638 if (failed) {
1572 ftrace_bug(failed, rec->ip); 1639 ftrace_bug(failed, rec->ip);
1573 /* Stop processing */ 1640 /* Stop processing */
@@ -1576,6 +1643,78 @@ static void ftrace_replace_code(int enable)
1576 } while_for_each_ftrace_rec(); 1643 } while_for_each_ftrace_rec();
1577} 1644}
1578 1645
1646struct ftrace_rec_iter {
1647 struct ftrace_page *pg;
1648 int index;
1649};
1650
1651/**
1652 * ftrace_rec_iter_start, start up iterating over traced functions
1653 *
1654 * Returns an iterator handle that is used to iterate over all
1655 * the records that represent address locations where functions
1656 * are traced.
1657 *
1658 * May return NULL if no records are available.
1659 */
1660struct ftrace_rec_iter *ftrace_rec_iter_start(void)
1661{
1662 /*
1663 * We only use a single iterator.
1664 * Protected by the ftrace_lock mutex.
1665 */
1666 static struct ftrace_rec_iter ftrace_rec_iter;
1667 struct ftrace_rec_iter *iter = &ftrace_rec_iter;
1668
1669 iter->pg = ftrace_pages_start;
1670 iter->index = 0;
1671
1672 /* Could have empty pages */
1673 while (iter->pg && !iter->pg->index)
1674 iter->pg = iter->pg->next;
1675
1676 if (!iter->pg)
1677 return NULL;
1678
1679 return iter;
1680}
1681
1682/**
1683 * ftrace_rec_iter_next, get the next record to process.
1684 * @iter: The handle to the iterator.
1685 *
1686 * Returns the next iterator after the given iterator @iter.
1687 */
1688struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter)
1689{
1690 iter->index++;
1691
1692 if (iter->index >= iter->pg->index) {
1693 iter->pg = iter->pg->next;
1694 iter->index = 0;
1695
1696 /* Could have empty pages */
1697 while (iter->pg && !iter->pg->index)
1698 iter->pg = iter->pg->next;
1699 }
1700
1701 if (!iter->pg)
1702 return NULL;
1703
1704 return iter;
1705}
1706
1707/**
1708 * ftrace_rec_iter_record, get the record at the iterator location
1709 * @iter: The current iterator location
1710 *
1711 * Returns the record that the current @iter is at.
1712 */
1713struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter)
1714{
1715 return &iter->pg->records[iter->index];
1716}
1717
1579static int 1718static int
1580ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) 1719ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
1581{ 1720{
@@ -1617,13 +1756,7 @@ static int __ftrace_modify_code(void *data)
1617{ 1756{
1618 int *command = data; 1757 int *command = data;
1619 1758
1620 /* 1759 if (*command & FTRACE_UPDATE_CALLS)
1621 * Do not call function tracer while we update the code.
1622 * We are in stop machine, no worrying about races.
1623 */
1624 function_trace_stop++;
1625
1626 if (*command & FTRACE_ENABLE_CALLS)
1627 ftrace_replace_code(1); 1760 ftrace_replace_code(1);
1628 else if (*command & FTRACE_DISABLE_CALLS) 1761 else if (*command & FTRACE_DISABLE_CALLS)
1629 ftrace_replace_code(0); 1762 ftrace_replace_code(0);
@@ -1636,21 +1769,33 @@ static int __ftrace_modify_code(void *data)
1636 else if (*command & FTRACE_STOP_FUNC_RET) 1769 else if (*command & FTRACE_STOP_FUNC_RET)
1637 ftrace_disable_ftrace_graph_caller(); 1770 ftrace_disable_ftrace_graph_caller();
1638 1771
1639#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
1640 /*
1641 * For archs that call ftrace_test_stop_func(), we must
1642 * wait till after we update all the function callers
1643 * before we update the callback. This keeps different
1644 * ops that record different functions from corrupting
1645 * each other.
1646 */
1647 __ftrace_trace_function = __ftrace_trace_function_delay;
1648#endif
1649 function_trace_stop--;
1650
1651 return 0; 1772 return 0;
1652} 1773}
1653 1774
1775/**
1776 * ftrace_run_stop_machine, go back to the stop machine method
1777 * @command: The command to tell ftrace what to do
1778 *
1779 * If an arch needs to fall back to the stop machine method, the
1780 * it can call this function.
1781 */
1782void ftrace_run_stop_machine(int command)
1783{
1784 stop_machine(__ftrace_modify_code, &command, NULL);
1785}
1786
1787/**
1788 * arch_ftrace_update_code, modify the code to trace or not trace
1789 * @command: The command that needs to be done
1790 *
1791 * Archs can override this function if it does not need to
1792 * run stop_machine() to modify code.
1793 */
1794void __weak arch_ftrace_update_code(int command)
1795{
1796 ftrace_run_stop_machine(command);
1797}
1798
1654static void ftrace_run_update_code(int command) 1799static void ftrace_run_update_code(int command)
1655{ 1800{
1656 int ret; 1801 int ret;
@@ -1659,8 +1804,31 @@ static void ftrace_run_update_code(int command)
1659 FTRACE_WARN_ON(ret); 1804 FTRACE_WARN_ON(ret);
1660 if (ret) 1805 if (ret)
1661 return; 1806 return;
1807 /*
1808 * Do not call function tracer while we update the code.
1809 * We are in stop machine.
1810 */
1811 function_trace_stop++;
1662 1812
1663 stop_machine(__ftrace_modify_code, &command, NULL); 1813 /*
1814 * By default we use stop_machine() to modify the code.
1815 * But archs can do what ever they want as long as it
1816 * is safe. The stop_machine() is the safest, but also
1817 * produces the most overhead.
1818 */
1819 arch_ftrace_update_code(command);
1820
1821#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
1822 /*
1823 * For archs that call ftrace_test_stop_func(), we must
1824 * wait till after we update all the function callers
1825 * before we update the callback. This keeps different
1826 * ops that record different functions from corrupting
1827 * each other.
1828 */
1829 __ftrace_trace_function = __ftrace_trace_function_delay;
1830#endif
1831 function_trace_stop--;
1664 1832
1665 ret = ftrace_arch_code_modify_post_process(); 1833 ret = ftrace_arch_code_modify_post_process();
1666 FTRACE_WARN_ON(ret); 1834 FTRACE_WARN_ON(ret);
@@ -1691,7 +1859,7 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
1691 return -ENODEV; 1859 return -ENODEV;
1692 1860
1693 ftrace_start_up++; 1861 ftrace_start_up++;
1694 command |= FTRACE_ENABLE_CALLS; 1862 command |= FTRACE_UPDATE_CALLS;
1695 1863
1696 /* ops marked global share the filter hashes */ 1864 /* ops marked global share the filter hashes */
1697 if (ops->flags & FTRACE_OPS_FL_GLOBAL) { 1865 if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
@@ -1743,8 +1911,7 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command)
1743 if (ops != &global_ops || !global_start_up) 1911 if (ops != &global_ops || !global_start_up)
1744 ops->flags &= ~FTRACE_OPS_FL_ENABLED; 1912 ops->flags &= ~FTRACE_OPS_FL_ENABLED;
1745 1913
1746 if (!ftrace_start_up) 1914 command |= FTRACE_UPDATE_CALLS;
1747 command |= FTRACE_DISABLE_CALLS;
1748 1915
1749 if (saved_ftrace_func != ftrace_trace_function) { 1916 if (saved_ftrace_func != ftrace_trace_function) {
1750 saved_ftrace_func = ftrace_trace_function; 1917 saved_ftrace_func = ftrace_trace_function;
@@ -1766,7 +1933,7 @@ static void ftrace_startup_sysctl(void)
1766 saved_ftrace_func = NULL; 1933 saved_ftrace_func = NULL;
1767 /* ftrace_start_up is true if we want ftrace running */ 1934 /* ftrace_start_up is true if we want ftrace running */
1768 if (ftrace_start_up) 1935 if (ftrace_start_up)
1769 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1936 ftrace_run_update_code(FTRACE_UPDATE_CALLS);
1770} 1937}
1771 1938
1772static void ftrace_shutdown_sysctl(void) 1939static void ftrace_shutdown_sysctl(void)
@@ -1788,14 +1955,16 @@ static int ops_traces_mod(struct ftrace_ops *ops)
1788 struct ftrace_hash *hash; 1955 struct ftrace_hash *hash;
1789 1956
1790 hash = ops->filter_hash; 1957 hash = ops->filter_hash;
1791 return !!(!hash || !hash->count); 1958 return ftrace_hash_empty(hash);
1792} 1959}
1793 1960
1794static int ftrace_update_code(struct module *mod) 1961static int ftrace_update_code(struct module *mod)
1795{ 1962{
1963 struct ftrace_page *pg;
1796 struct dyn_ftrace *p; 1964 struct dyn_ftrace *p;
1797 cycle_t start, stop; 1965 cycle_t start, stop;
1798 unsigned long ref = 0; 1966 unsigned long ref = 0;
1967 int i;
1799 1968
1800 /* 1969 /*
1801 * When adding a module, we need to check if tracers are 1970 * When adding a module, we need to check if tracers are
@@ -1817,46 +1986,44 @@ static int ftrace_update_code(struct module *mod)
1817 start = ftrace_now(raw_smp_processor_id()); 1986 start = ftrace_now(raw_smp_processor_id());
1818 ftrace_update_cnt = 0; 1987 ftrace_update_cnt = 0;
1819 1988
1820 while (ftrace_new_addrs) { 1989 for (pg = ftrace_new_pgs; pg; pg = pg->next) {
1821 1990
1822 /* If something went wrong, bail without enabling anything */ 1991 for (i = 0; i < pg->index; i++) {
1823 if (unlikely(ftrace_disabled)) 1992 /* If something went wrong, bail without enabling anything */
1824 return -1; 1993 if (unlikely(ftrace_disabled))
1994 return -1;
1825 1995
1826 p = ftrace_new_addrs; 1996 p = &pg->records[i];
1827 ftrace_new_addrs = p->newlist; 1997 p->flags = ref;
1828 p->flags = ref;
1829 1998
1830 /* 1999 /*
1831 * Do the initial record conversion from mcount jump 2000 * Do the initial record conversion from mcount jump
1832 * to the NOP instructions. 2001 * to the NOP instructions.
1833 */ 2002 */
1834 if (!ftrace_code_disable(mod, p)) { 2003 if (!ftrace_code_disable(mod, p))
1835 ftrace_free_rec(p); 2004 break;
1836 /* Game over */
1837 break;
1838 }
1839 2005
1840 ftrace_update_cnt++; 2006 ftrace_update_cnt++;
1841 2007
1842 /* 2008 /*
1843 * If the tracing is enabled, go ahead and enable the record. 2009 * If the tracing is enabled, go ahead and enable the record.
1844 * 2010 *
1845 * The reason not to enable the record immediatelly is the 2011 * The reason not to enable the record immediatelly is the
1846 * inherent check of ftrace_make_nop/ftrace_make_call for 2012 * inherent check of ftrace_make_nop/ftrace_make_call for
1847 * correct previous instructions. Making first the NOP 2013 * correct previous instructions. Making first the NOP
1848 * conversion puts the module to the correct state, thus 2014 * conversion puts the module to the correct state, thus
1849 * passing the ftrace_make_call check. 2015 * passing the ftrace_make_call check.
1850 */ 2016 */
1851 if (ftrace_start_up && ref) { 2017 if (ftrace_start_up && ref) {
1852 int failed = __ftrace_replace_code(p, 1); 2018 int failed = __ftrace_replace_code(p, 1);
1853 if (failed) { 2019 if (failed)
1854 ftrace_bug(failed, p->ip); 2020 ftrace_bug(failed, p->ip);
1855 ftrace_free_rec(p);
1856 } 2021 }
1857 } 2022 }
1858 } 2023 }
1859 2024
2025 ftrace_new_pgs = NULL;
2026
1860 stop = ftrace_now(raw_smp_processor_id()); 2027 stop = ftrace_now(raw_smp_processor_id());
1861 ftrace_update_time = stop - start; 2028 ftrace_update_time = stop - start;
1862 ftrace_update_tot_cnt += ftrace_update_cnt; 2029 ftrace_update_tot_cnt += ftrace_update_cnt;
@@ -1864,57 +2031,108 @@ static int ftrace_update_code(struct module *mod)
1864 return 0; 2031 return 0;
1865} 2032}
1866 2033
1867static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) 2034static int ftrace_allocate_records(struct ftrace_page *pg, int count)
1868{ 2035{
1869 struct ftrace_page *pg; 2036 int order;
1870 int cnt; 2037 int cnt;
1871 int i;
1872 2038
1873 /* allocate a few pages */ 2039 if (WARN_ON(!count))
1874 ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); 2040 return -EINVAL;
1875 if (!ftrace_pages_start) 2041
1876 return -1; 2042 order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
1877 2043
1878 /* 2044 /*
1879 * Allocate a few more pages. 2045 * We want to fill as much as possible. No more than a page
1880 * 2046 * may be empty.
1881 * TODO: have some parser search vmlinux before
1882 * final linking to find all calls to ftrace.
1883 * Then we can:
1884 * a) know how many pages to allocate.
1885 * and/or
1886 * b) set up the table then.
1887 *
1888 * The dynamic code is still necessary for
1889 * modules.
1890 */ 2047 */
2048 while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE)
2049 order--;
1891 2050
1892 pg = ftrace_pages = ftrace_pages_start; 2051 again:
2052 pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
1893 2053
1894 cnt = num_to_init / ENTRIES_PER_PAGE; 2054 if (!pg->records) {
1895 pr_info("ftrace: allocating %ld entries in %d pages\n", 2055 /* if we can't allocate this size, try something smaller */
1896 num_to_init, cnt + 1); 2056 if (!order)
2057 return -ENOMEM;
2058 order >>= 1;
2059 goto again;
2060 }
1897 2061
1898 for (i = 0; i < cnt; i++) { 2062 cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
1899 pg->next = (void *)get_zeroed_page(GFP_KERNEL); 2063 pg->size = cnt;
1900 2064
1901 /* If we fail, we'll try later anyway */ 2065 if (cnt > count)
1902 if (!pg->next) 2066 cnt = count;
2067
2068 return cnt;
2069}
2070
2071static struct ftrace_page *
2072ftrace_allocate_pages(unsigned long num_to_init)
2073{
2074 struct ftrace_page *start_pg;
2075 struct ftrace_page *pg;
2076 int order;
2077 int cnt;
2078
2079 if (!num_to_init)
2080 return 0;
2081
2082 start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL);
2083 if (!pg)
2084 return NULL;
2085
2086 /*
2087 * Try to allocate as much as possible in one continues
2088 * location that fills in all of the space. We want to
2089 * waste as little space as possible.
2090 */
2091 for (;;) {
2092 cnt = ftrace_allocate_records(pg, num_to_init);
2093 if (cnt < 0)
2094 goto free_pages;
2095
2096 num_to_init -= cnt;
2097 if (!num_to_init)
1903 break; 2098 break;
1904 2099
2100 pg->next = kzalloc(sizeof(*pg), GFP_KERNEL);
2101 if (!pg->next)
2102 goto free_pages;
2103
1905 pg = pg->next; 2104 pg = pg->next;
1906 } 2105 }
1907 2106
1908 return 0; 2107 return start_pg;
2108
2109 free_pages:
2110 while (start_pg) {
2111 order = get_count_order(pg->size / ENTRIES_PER_PAGE);
2112 free_pages((unsigned long)pg->records, order);
2113 start_pg = pg->next;
2114 kfree(pg);
2115 pg = start_pg;
2116 }
2117 pr_info("ftrace: FAILED to allocate memory for functions\n");
2118 return NULL;
1909} 2119}
1910 2120
1911enum { 2121static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
1912 FTRACE_ITER_FILTER = (1 << 0), 2122{
1913 FTRACE_ITER_NOTRACE = (1 << 1), 2123 int cnt;
1914 FTRACE_ITER_PRINTALL = (1 << 2), 2124
1915 FTRACE_ITER_HASH = (1 << 3), 2125 if (!num_to_init) {
1916 FTRACE_ITER_ENABLED = (1 << 4), 2126 pr_info("ftrace: No functions to be traced?\n");
1917}; 2127 return -1;
2128 }
2129
2130 cnt = num_to_init / ENTRIES_PER_PAGE;
2131 pr_info("ftrace: allocating %ld entries in %d pages\n",
2132 num_to_init, cnt + 1);
2133
2134 return 0;
2135}
1918 2136
1919#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 2137#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
1920 2138
@@ -1980,6 +2198,9 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
1980 void *p = NULL; 2198 void *p = NULL;
1981 loff_t l; 2199 loff_t l;
1982 2200
2201 if (!(iter->flags & FTRACE_ITER_DO_HASH))
2202 return NULL;
2203
1983 if (iter->func_pos > *pos) 2204 if (iter->func_pos > *pos)
1984 return NULL; 2205 return NULL;
1985 2206
@@ -2023,7 +2244,7 @@ static void *
2023t_next(struct seq_file *m, void *v, loff_t *pos) 2244t_next(struct seq_file *m, void *v, loff_t *pos)
2024{ 2245{
2025 struct ftrace_iterator *iter = m->private; 2246 struct ftrace_iterator *iter = m->private;
2026 struct ftrace_ops *ops = &global_ops; 2247 struct ftrace_ops *ops = iter->ops;
2027 struct dyn_ftrace *rec = NULL; 2248 struct dyn_ftrace *rec = NULL;
2028 2249
2029 if (unlikely(ftrace_disabled)) 2250 if (unlikely(ftrace_disabled))
@@ -2047,9 +2268,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
2047 } 2268 }
2048 } else { 2269 } else {
2049 rec = &iter->pg->records[iter->idx++]; 2270 rec = &iter->pg->records[iter->idx++];
2050 if ((rec->flags & FTRACE_FL_FREE) || 2271 if (((iter->flags & FTRACE_ITER_FILTER) &&
2051
2052 ((iter->flags & FTRACE_ITER_FILTER) &&
2053 !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) || 2272 !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
2054 2273
2055 ((iter->flags & FTRACE_ITER_NOTRACE) && 2274 ((iter->flags & FTRACE_ITER_NOTRACE) &&
@@ -2081,7 +2300,7 @@ static void reset_iter_read(struct ftrace_iterator *iter)
2081static void *t_start(struct seq_file *m, loff_t *pos) 2300static void *t_start(struct seq_file *m, loff_t *pos)
2082{ 2301{
2083 struct ftrace_iterator *iter = m->private; 2302 struct ftrace_iterator *iter = m->private;
2084 struct ftrace_ops *ops = &global_ops; 2303 struct ftrace_ops *ops = iter->ops;
2085 void *p = NULL; 2304 void *p = NULL;
2086 loff_t l; 2305 loff_t l;
2087 2306
@@ -2101,7 +2320,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
2101 * off, we can short cut and just print out that all 2320 * off, we can short cut and just print out that all
2102 * functions are enabled. 2321 * functions are enabled.
2103 */ 2322 */
2104 if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) { 2323 if (iter->flags & FTRACE_ITER_FILTER &&
2324 ftrace_hash_empty(ops->filter_hash)) {
2105 if (*pos > 0) 2325 if (*pos > 0)
2106 return t_hash_start(m, pos); 2326 return t_hash_start(m, pos);
2107 iter->flags |= FTRACE_ITER_PRINTALL; 2327 iter->flags |= FTRACE_ITER_PRINTALL;
@@ -2126,12 +2346,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
2126 break; 2346 break;
2127 } 2347 }
2128 2348
2129 if (!p) { 2349 if (!p)
2130 if (iter->flags & FTRACE_ITER_FILTER) 2350 return t_hash_start(m, pos);
2131 return t_hash_start(m, pos);
2132
2133 return NULL;
2134 }
2135 2351
2136 return iter; 2352 return iter;
2137} 2353}
@@ -2189,6 +2405,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
2189 return -ENOMEM; 2405 return -ENOMEM;
2190 2406
2191 iter->pg = ftrace_pages_start; 2407 iter->pg = ftrace_pages_start;
2408 iter->ops = &global_ops;
2192 2409
2193 ret = seq_open(file, &show_ftrace_seq_ops); 2410 ret = seq_open(file, &show_ftrace_seq_ops);
2194 if (!ret) { 2411 if (!ret) {
@@ -2217,6 +2434,7 @@ ftrace_enabled_open(struct inode *inode, struct file *file)
2217 2434
2218 iter->pg = ftrace_pages_start; 2435 iter->pg = ftrace_pages_start;
2219 iter->flags = FTRACE_ITER_ENABLED; 2436 iter->flags = FTRACE_ITER_ENABLED;
2437 iter->ops = &global_ops;
2220 2438
2221 ret = seq_open(file, &show_ftrace_seq_ops); 2439 ret = seq_open(file, &show_ftrace_seq_ops);
2222 if (!ret) { 2440 if (!ret) {
@@ -2237,7 +2455,23 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
2237 mutex_unlock(&ftrace_lock); 2455 mutex_unlock(&ftrace_lock);
2238} 2456}
2239 2457
2240static int 2458/**
2459 * ftrace_regex_open - initialize function tracer filter files
2460 * @ops: The ftrace_ops that hold the hash filters
2461 * @flag: The type of filter to process
2462 * @inode: The inode, usually passed in to your open routine
2463 * @file: The file, usually passed in to your open routine
2464 *
2465 * ftrace_regex_open() initializes the filter files for the
2466 * @ops. Depending on @flag it may process the filter hash or
2467 * the notrace hash of @ops. With this called from the open
2468 * routine, you can use ftrace_filter_write() for the write
2469 * routine if @flag has FTRACE_ITER_FILTER set, or
2470 * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
2471 * ftrace_regex_lseek() should be used as the lseek routine, and
2472 * release must call ftrace_regex_release().
2473 */
2474int
2241ftrace_regex_open(struct ftrace_ops *ops, int flag, 2475ftrace_regex_open(struct ftrace_ops *ops, int flag,
2242 struct inode *inode, struct file *file) 2476 struct inode *inode, struct file *file)
2243{ 2477{
@@ -2306,8 +2540,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
2306static int 2540static int
2307ftrace_filter_open(struct inode *inode, struct file *file) 2541ftrace_filter_open(struct inode *inode, struct file *file)
2308{ 2542{
2309 return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER, 2543 return ftrace_regex_open(&global_ops,
2310 inode, file); 2544 FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH,
2545 inode, file);
2311} 2546}
2312 2547
2313static int 2548static int
@@ -2317,7 +2552,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
2317 inode, file); 2552 inode, file);
2318} 2553}
2319 2554
2320static loff_t 2555loff_t
2321ftrace_regex_lseek(struct file *file, loff_t offset, int origin) 2556ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
2322{ 2557{
2323 loff_t ret; 2558 loff_t ret;
@@ -2426,7 +2661,6 @@ match_records(struct ftrace_hash *hash, char *buff,
2426 goto out_unlock; 2661 goto out_unlock;
2427 2662
2428 do_for_each_ftrace_rec(pg, rec) { 2663 do_for_each_ftrace_rec(pg, rec) {
2429
2430 if (ftrace_match_record(rec, mod, search, search_len, type)) { 2664 if (ftrace_match_record(rec, mod, search, search_len, type)) {
2431 ret = enter_record(hash, rec, not); 2665 ret = enter_record(hash, rec, not);
2432 if (ret < 0) { 2666 if (ret < 0) {
@@ -2871,14 +3105,14 @@ out_unlock:
2871 return ret; 3105 return ret;
2872} 3106}
2873 3107
2874static ssize_t 3108ssize_t
2875ftrace_filter_write(struct file *file, const char __user *ubuf, 3109ftrace_filter_write(struct file *file, const char __user *ubuf,
2876 size_t cnt, loff_t *ppos) 3110 size_t cnt, loff_t *ppos)
2877{ 3111{
2878 return ftrace_regex_write(file, ubuf, cnt, ppos, 1); 3112 return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
2879} 3113}
2880 3114
2881static ssize_t 3115ssize_t
2882ftrace_notrace_write(struct file *file, const char __user *ubuf, 3116ftrace_notrace_write(struct file *file, const char __user *ubuf,
2883 size_t cnt, loff_t *ppos) 3117 size_t cnt, loff_t *ppos)
2884{ 3118{
@@ -2919,7 +3153,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
2919 ret = ftrace_hash_move(ops, enable, orig_hash, hash); 3153 ret = ftrace_hash_move(ops, enable, orig_hash, hash);
2920 if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED 3154 if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
2921 && ftrace_enabled) 3155 && ftrace_enabled)
2922 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 3156 ftrace_run_update_code(FTRACE_UPDATE_CALLS);
2923 3157
2924 mutex_unlock(&ftrace_lock); 3158 mutex_unlock(&ftrace_lock);
2925 3159
@@ -3045,8 +3279,8 @@ static void __init set_ftrace_early_graph(char *buf)
3045} 3279}
3046#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 3280#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
3047 3281
3048static void __init 3282void __init
3049set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable) 3283ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
3050{ 3284{
3051 char *func; 3285 char *func;
3052 3286
@@ -3059,17 +3293,16 @@ set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
3059static void __init set_ftrace_early_filters(void) 3293static void __init set_ftrace_early_filters(void)
3060{ 3294{
3061 if (ftrace_filter_buf[0]) 3295 if (ftrace_filter_buf[0])
3062 set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1); 3296 ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1);
3063 if (ftrace_notrace_buf[0]) 3297 if (ftrace_notrace_buf[0])
3064 set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0); 3298 ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0);
3065#ifdef CONFIG_FUNCTION_GRAPH_TRACER 3299#ifdef CONFIG_FUNCTION_GRAPH_TRACER
3066 if (ftrace_graph_buf[0]) 3300 if (ftrace_graph_buf[0])
3067 set_ftrace_early_graph(ftrace_graph_buf); 3301 set_ftrace_early_graph(ftrace_graph_buf);
3068#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 3302#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
3069} 3303}
3070 3304
3071static int 3305int ftrace_regex_release(struct inode *inode, struct file *file)
3072ftrace_regex_release(struct inode *inode, struct file *file)
3073{ 3306{
3074 struct seq_file *m = (struct seq_file *)file->private_data; 3307 struct seq_file *m = (struct seq_file *)file->private_data;
3075 struct ftrace_iterator *iter; 3308 struct ftrace_iterator *iter;
@@ -3107,7 +3340,7 @@ ftrace_regex_release(struct inode *inode, struct file *file)
3107 orig_hash, iter->hash); 3340 orig_hash, iter->hash);
3108 if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED) 3341 if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
3109 && ftrace_enabled) 3342 && ftrace_enabled)
3110 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 3343 ftrace_run_update_code(FTRACE_UPDATE_CALLS);
3111 3344
3112 mutex_unlock(&ftrace_lock); 3345 mutex_unlock(&ftrace_lock);
3113 } 3346 }
@@ -3270,9 +3503,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
3270 3503
3271 do_for_each_ftrace_rec(pg, rec) { 3504 do_for_each_ftrace_rec(pg, rec) {
3272 3505
3273 if (rec->flags & FTRACE_FL_FREE)
3274 continue;
3275
3276 if (ftrace_match_record(rec, NULL, search, search_len, type)) { 3506 if (ftrace_match_record(rec, NULL, search, search_len, type)) {
3277 /* if it is in the array */ 3507 /* if it is in the array */
3278 exists = false; 3508 exists = false;
@@ -3381,15 +3611,62 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
3381 return 0; 3611 return 0;
3382} 3612}
3383 3613
3614static void ftrace_swap_recs(void *a, void *b, int size)
3615{
3616 struct dyn_ftrace *reca = a;
3617 struct dyn_ftrace *recb = b;
3618 struct dyn_ftrace t;
3619
3620 t = *reca;
3621 *reca = *recb;
3622 *recb = t;
3623}
3624
3384static int ftrace_process_locs(struct module *mod, 3625static int ftrace_process_locs(struct module *mod,
3385 unsigned long *start, 3626 unsigned long *start,
3386 unsigned long *end) 3627 unsigned long *end)
3387{ 3628{
3629 struct ftrace_page *pg;
3630 unsigned long count;
3388 unsigned long *p; 3631 unsigned long *p;
3389 unsigned long addr; 3632 unsigned long addr;
3390 unsigned long flags = 0; /* Shut up gcc */ 3633 unsigned long flags = 0; /* Shut up gcc */
3634 int ret = -ENOMEM;
3635
3636 count = end - start;
3637
3638 if (!count)
3639 return 0;
3640
3641 pg = ftrace_allocate_pages(count);
3642 if (!pg)
3643 return -ENOMEM;
3391 3644
3392 mutex_lock(&ftrace_lock); 3645 mutex_lock(&ftrace_lock);
3646
3647 /*
3648 * Core and each module needs their own pages, as
3649 * modules will free them when they are removed.
3650 * Force a new page to be allocated for modules.
3651 */
3652 if (!mod) {
3653 WARN_ON(ftrace_pages || ftrace_pages_start);
3654 /* First initialization */
3655 ftrace_pages = ftrace_pages_start = pg;
3656 } else {
3657 if (!ftrace_pages)
3658 goto out;
3659
3660 if (WARN_ON(ftrace_pages->next)) {
3661 /* Hmm, we have free pages? */
3662 while (ftrace_pages->next)
3663 ftrace_pages = ftrace_pages->next;
3664 }
3665
3666 ftrace_pages->next = pg;
3667 ftrace_pages = pg;
3668 }
3669
3393 p = start; 3670 p = start;
3394 while (p < end) { 3671 while (p < end) {
3395 addr = ftrace_call_adjust(*p++); 3672 addr = ftrace_call_adjust(*p++);
@@ -3401,9 +3678,18 @@ static int ftrace_process_locs(struct module *mod,
3401 */ 3678 */
3402 if (!addr) 3679 if (!addr)
3403 continue; 3680 continue;
3404 ftrace_record_ip(addr); 3681 if (!ftrace_record_ip(addr))
3682 break;
3405 } 3683 }
3406 3684
3685 /* These new locations need to be initialized */
3686 ftrace_new_pgs = pg;
3687
3688 /* Make each individual set of pages sorted by ips */
3689 for (; pg; pg = pg->next)
3690 sort(pg->records, pg->index, sizeof(struct dyn_ftrace),
3691 ftrace_cmp_recs, ftrace_swap_recs);
3692
3407 /* 3693 /*
3408 * We only need to disable interrupts on start up 3694 * We only need to disable interrupts on start up
3409 * because we are modifying code that an interrupt 3695 * because we are modifying code that an interrupt
@@ -3417,32 +3703,55 @@ static int ftrace_process_locs(struct module *mod,
3417 ftrace_update_code(mod); 3703 ftrace_update_code(mod);
3418 if (!mod) 3704 if (!mod)
3419 local_irq_restore(flags); 3705 local_irq_restore(flags);
3706 ret = 0;
3707 out:
3420 mutex_unlock(&ftrace_lock); 3708 mutex_unlock(&ftrace_lock);
3421 3709
3422 return 0; 3710 return ret;
3423} 3711}
3424 3712
3425#ifdef CONFIG_MODULES 3713#ifdef CONFIG_MODULES
3714
3715#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
3716
3426void ftrace_release_mod(struct module *mod) 3717void ftrace_release_mod(struct module *mod)
3427{ 3718{
3428 struct dyn_ftrace *rec; 3719 struct dyn_ftrace *rec;
3720 struct ftrace_page **last_pg;
3429 struct ftrace_page *pg; 3721 struct ftrace_page *pg;
3722 int order;
3430 3723
3431 mutex_lock(&ftrace_lock); 3724 mutex_lock(&ftrace_lock);
3432 3725
3433 if (ftrace_disabled) 3726 if (ftrace_disabled)
3434 goto out_unlock; 3727 goto out_unlock;
3435 3728
3436 do_for_each_ftrace_rec(pg, rec) { 3729 /*
3730 * Each module has its own ftrace_pages, remove
3731 * them from the list.
3732 */
3733 last_pg = &ftrace_pages_start;
3734 for (pg = ftrace_pages_start; pg; pg = *last_pg) {
3735 rec = &pg->records[0];
3437 if (within_module_core(rec->ip, mod)) { 3736 if (within_module_core(rec->ip, mod)) {
3438 /* 3737 /*
3439 * rec->ip is changed in ftrace_free_rec() 3738 * As core pages are first, the first
3440 * It should not between s and e if record was freed. 3739 * page should never be a module page.
3441 */ 3740 */
3442 FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); 3741 if (WARN_ON(pg == ftrace_pages_start))
3443 ftrace_free_rec(rec); 3742 goto out_unlock;
3444 } 3743
3445 } while_for_each_ftrace_rec(); 3744 /* Check if we are deleting the last page */
3745 if (pg == ftrace_pages)
3746 ftrace_pages = next_to_ftrace_page(last_pg);
3747
3748 *last_pg = pg->next;
3749 order = get_count_order(pg->size / ENTRIES_PER_PAGE);
3750 free_pages((unsigned long)pg->records, order);
3751 kfree(pg);
3752 } else
3753 last_pg = &pg->next;
3754 }
3446 out_unlock: 3755 out_unlock:
3447 mutex_unlock(&ftrace_lock); 3756 mutex_unlock(&ftrace_lock);
3448} 3757}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f04cc3136bd3..24aee7127451 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1738,11 +1738,121 @@ static int replace_system_preds(struct event_subsystem *system,
1738 return -ENOMEM; 1738 return -ENOMEM;
1739} 1739}
1740 1740
1741static int create_filter_start(char *filter_str, bool set_str,
1742 struct filter_parse_state **psp,
1743 struct event_filter **filterp)
1744{
1745 struct event_filter *filter;
1746 struct filter_parse_state *ps = NULL;
1747 int err = 0;
1748
1749 WARN_ON_ONCE(*psp || *filterp);
1750
1751 /* allocate everything, and if any fails, free all and fail */
1752 filter = __alloc_filter();
1753 if (filter && set_str)
1754 err = replace_filter_string(filter, filter_str);
1755
1756 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1757
1758 if (!filter || !ps || err) {
1759 kfree(ps);
1760 __free_filter(filter);
1761 return -ENOMEM;
1762 }
1763
1764 /* we're committed to creating a new filter */
1765 *filterp = filter;
1766 *psp = ps;
1767
1768 parse_init(ps, filter_ops, filter_str);
1769 err = filter_parse(ps);
1770 if (err && set_str)
1771 append_filter_err(ps, filter);
1772 return err;
1773}
1774
1775static void create_filter_finish(struct filter_parse_state *ps)
1776{
1777 if (ps) {
1778 filter_opstack_clear(ps);
1779 postfix_clear(ps);
1780 kfree(ps);
1781 }
1782}
1783
1784/**
1785 * create_filter - create a filter for a ftrace_event_call
1786 * @call: ftrace_event_call to create a filter for
1787 * @filter_str: filter string
1788 * @set_str: remember @filter_str and enable detailed error in filter
1789 * @filterp: out param for created filter (always updated on return)
1790 *
1791 * Creates a filter for @call with @filter_str. If @set_str is %true,
1792 * @filter_str is copied and recorded in the new filter.
1793 *
1794 * On success, returns 0 and *@filterp points to the new filter. On
1795 * failure, returns -errno and *@filterp may point to %NULL or to a new
1796 * filter. In the latter case, the returned filter contains error
1797 * information if @set_str is %true and the caller is responsible for
1798 * freeing it.
1799 */
1800static int create_filter(struct ftrace_event_call *call,
1801 char *filter_str, bool set_str,
1802 struct event_filter **filterp)
1803{
1804 struct event_filter *filter = NULL;
1805 struct filter_parse_state *ps = NULL;
1806 int err;
1807
1808 err = create_filter_start(filter_str, set_str, &ps, &filter);
1809 if (!err) {
1810 err = replace_preds(call, filter, ps, filter_str, false);
1811 if (err && set_str)
1812 append_filter_err(ps, filter);
1813 }
1814 create_filter_finish(ps);
1815
1816 *filterp = filter;
1817 return err;
1818}
1819
1820/**
1821 * create_system_filter - create a filter for an event_subsystem
1822 * @system: event_subsystem to create a filter for
1823 * @filter_str: filter string
1824 * @filterp: out param for created filter (always updated on return)
1825 *
1826 * Identical to create_filter() except that it creates a subsystem filter
1827 * and always remembers @filter_str.
1828 */
1829static int create_system_filter(struct event_subsystem *system,
1830 char *filter_str, struct event_filter **filterp)
1831{
1832 struct event_filter *filter = NULL;
1833 struct filter_parse_state *ps = NULL;
1834 int err;
1835
1836 err = create_filter_start(filter_str, true, &ps, &filter);
1837 if (!err) {
1838 err = replace_system_preds(system, ps, filter_str);
1839 if (!err) {
1840 /* System filters just show a default message */
1841 kfree(filter->filter_string);
1842 filter->filter_string = NULL;
1843 } else {
1844 append_filter_err(ps, filter);
1845 }
1846 }
1847 create_filter_finish(ps);
1848
1849 *filterp = filter;
1850 return err;
1851}
1852
1741int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1853int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1742{ 1854{
1743 struct filter_parse_state *ps;
1744 struct event_filter *filter; 1855 struct event_filter *filter;
1745 struct event_filter *tmp;
1746 int err = 0; 1856 int err = 0;
1747 1857
1748 mutex_lock(&event_mutex); 1858 mutex_lock(&event_mutex);
@@ -1759,49 +1869,30 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1759 goto out_unlock; 1869 goto out_unlock;
1760 } 1870 }
1761 1871
1762 err = -ENOMEM; 1872 err = create_filter(call, filter_string, true, &filter);
1763 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1764 if (!ps)
1765 goto out_unlock;
1766
1767 filter = __alloc_filter();
1768 if (!filter) {
1769 kfree(ps);
1770 goto out_unlock;
1771 }
1772
1773 replace_filter_string(filter, filter_string);
1774
1775 parse_init(ps, filter_ops, filter_string);
1776 err = filter_parse(ps);
1777 if (err) {
1778 append_filter_err(ps, filter);
1779 goto out;
1780 }
1781 1873
1782 err = replace_preds(call, filter, ps, filter_string, false);
1783 if (err) {
1784 filter_disable(call);
1785 append_filter_err(ps, filter);
1786 } else
1787 call->flags |= TRACE_EVENT_FL_FILTERED;
1788out:
1789 /* 1874 /*
1790 * Always swap the call filter with the new filter 1875 * Always swap the call filter with the new filter
1791 * even if there was an error. If there was an error 1876 * even if there was an error. If there was an error
1792 * in the filter, we disable the filter and show the error 1877 * in the filter, we disable the filter and show the error
1793 * string 1878 * string
1794 */ 1879 */
1795 tmp = call->filter; 1880 if (filter) {
1796 rcu_assign_pointer(call->filter, filter); 1881 struct event_filter *tmp = call->filter;
1797 if (tmp) { 1882
1798 /* Make sure the call is done with the filter */ 1883 if (!err)
1799 synchronize_sched(); 1884 call->flags |= TRACE_EVENT_FL_FILTERED;
1800 __free_filter(tmp); 1885 else
1886 filter_disable(call);
1887
1888 rcu_assign_pointer(call->filter, filter);
1889
1890 if (tmp) {
1891 /* Make sure the call is done with the filter */
1892 synchronize_sched();
1893 __free_filter(tmp);
1894 }
1801 } 1895 }
1802 filter_opstack_clear(ps);
1803 postfix_clear(ps);
1804 kfree(ps);
1805out_unlock: 1896out_unlock:
1806 mutex_unlock(&event_mutex); 1897 mutex_unlock(&event_mutex);
1807 1898
@@ -1811,7 +1902,6 @@ out_unlock:
1811int apply_subsystem_event_filter(struct event_subsystem *system, 1902int apply_subsystem_event_filter(struct event_subsystem *system,
1812 char *filter_string) 1903 char *filter_string)
1813{ 1904{
1814 struct filter_parse_state *ps;
1815 struct event_filter *filter; 1905 struct event_filter *filter;
1816 int err = 0; 1906 int err = 0;
1817 1907
@@ -1835,48 +1925,19 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1835 goto out_unlock; 1925 goto out_unlock;
1836 } 1926 }
1837 1927
1838 err = -ENOMEM; 1928 err = create_system_filter(system, filter_string, &filter);
1839 ps = kzalloc(sizeof(*ps), GFP_KERNEL); 1929 if (filter) {
1840 if (!ps) 1930 /*
1841 goto out_unlock; 1931 * No event actually uses the system filter
1842 1932 * we can free it without synchronize_sched().
1843 filter = __alloc_filter(); 1933 */
1844 if (!filter) 1934 __free_filter(system->filter);
1845 goto out; 1935 system->filter = filter;
1846 1936 }
1847 /* System filters just show a default message */
1848 kfree(filter->filter_string);
1849 filter->filter_string = NULL;
1850
1851 /*
1852 * No event actually uses the system filter
1853 * we can free it without synchronize_sched().
1854 */
1855 __free_filter(system->filter);
1856 system->filter = filter;
1857
1858 parse_init(ps, filter_ops, filter_string);
1859 err = filter_parse(ps);
1860 if (err)
1861 goto err_filter;
1862
1863 err = replace_system_preds(system, ps, filter_string);
1864 if (err)
1865 goto err_filter;
1866
1867out:
1868 filter_opstack_clear(ps);
1869 postfix_clear(ps);
1870 kfree(ps);
1871out_unlock: 1937out_unlock:
1872 mutex_unlock(&event_mutex); 1938 mutex_unlock(&event_mutex);
1873 1939
1874 return err; 1940 return err;
1875
1876err_filter:
1877 replace_filter_string(filter, filter_string);
1878 append_filter_err(ps, system->filter);
1879 goto out;
1880} 1941}
1881 1942
1882#ifdef CONFIG_PERF_EVENTS 1943#ifdef CONFIG_PERF_EVENTS
@@ -1894,7 +1955,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1894{ 1955{
1895 int err; 1956 int err;
1896 struct event_filter *filter; 1957 struct event_filter *filter;
1897 struct filter_parse_state *ps;
1898 struct ftrace_event_call *call; 1958 struct ftrace_event_call *call;
1899 1959
1900 mutex_lock(&event_mutex); 1960 mutex_lock(&event_mutex);
@@ -1909,33 +1969,10 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1909 if (event->filter) 1969 if (event->filter)
1910 goto out_unlock; 1970 goto out_unlock;
1911 1971
1912 filter = __alloc_filter(); 1972 err = create_filter(call, filter_str, false, &filter);
1913 if (!filter) {
1914 err = PTR_ERR(filter);
1915 goto out_unlock;
1916 }
1917
1918 err = -ENOMEM;
1919 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1920 if (!ps)
1921 goto free_filter;
1922
1923 parse_init(ps, filter_ops, filter_str);
1924 err = filter_parse(ps);
1925 if (err)
1926 goto free_ps;
1927
1928 err = replace_preds(call, filter, ps, filter_str, false);
1929 if (!err) 1973 if (!err)
1930 event->filter = filter; 1974 event->filter = filter;
1931 1975 else
1932free_ps:
1933 filter_opstack_clear(ps);
1934 postfix_clear(ps);
1935 kfree(ps);
1936
1937free_filter:
1938 if (err)
1939 __free_filter(filter); 1976 __free_filter(filter);
1940 1977
1941out_unlock: 1978out_unlock:
@@ -1954,43 +1991,6 @@ out_unlock:
1954#define CREATE_TRACE_POINTS 1991#define CREATE_TRACE_POINTS
1955#include "trace_events_filter_test.h" 1992#include "trace_events_filter_test.h"
1956 1993
1957static int test_get_filter(char *filter_str, struct ftrace_event_call *call,
1958 struct event_filter **pfilter)
1959{
1960 struct event_filter *filter;
1961 struct filter_parse_state *ps;
1962 int err = -ENOMEM;
1963
1964 filter = __alloc_filter();
1965 if (!filter)
1966 goto out;
1967
1968 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1969 if (!ps)
1970 goto free_filter;
1971
1972 parse_init(ps, filter_ops, filter_str);
1973 err = filter_parse(ps);
1974 if (err)
1975 goto free_ps;
1976
1977 err = replace_preds(call, filter, ps, filter_str, false);
1978 if (!err)
1979 *pfilter = filter;
1980
1981 free_ps:
1982 filter_opstack_clear(ps);
1983 postfix_clear(ps);
1984 kfree(ps);
1985
1986 free_filter:
1987 if (err)
1988 __free_filter(filter);
1989
1990 out:
1991 return err;
1992}
1993
1994#define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \ 1994#define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \
1995{ \ 1995{ \
1996 .filter = FILTER, \ 1996 .filter = FILTER, \
@@ -2109,12 +2109,13 @@ static __init int ftrace_test_event_filter(void)
2109 struct test_filter_data_t *d = &test_filter_data[i]; 2109 struct test_filter_data_t *d = &test_filter_data[i];
2110 int err; 2110 int err;
2111 2111
2112 err = test_get_filter(d->filter, &event_ftrace_test_filter, 2112 err = create_filter(&event_ftrace_test_filter, d->filter,
2113 &filter); 2113 false, &filter);
2114 if (err) { 2114 if (err) {
2115 printk(KERN_INFO 2115 printk(KERN_INFO
2116 "Failed to get filter for '%s', err %d\n", 2116 "Failed to get filter for '%s', err %d\n",
2117 d->filter, err); 2117 d->filter, err);
2118 __free_filter(filter);
2118 break; 2119 break;
2119 } 2120 }
2120 2121
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 77575b386d97..d4545f49242e 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -13,6 +13,9 @@
13#include <linux/sysctl.h> 13#include <linux/sysctl.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16
17#include <asm/setup.h>
18
16#include "trace.h" 19#include "trace.h"
17 20
18#define STACK_TRACE_ENTRIES 500 21#define STACK_TRACE_ENTRIES 500
@@ -133,7 +136,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
133static struct ftrace_ops trace_ops __read_mostly = 136static struct ftrace_ops trace_ops __read_mostly =
134{ 137{
135 .func = stack_trace_call, 138 .func = stack_trace_call,
136 .flags = FTRACE_OPS_FL_GLOBAL,
137}; 139};
138 140
139static ssize_t 141static ssize_t
@@ -311,6 +313,21 @@ static const struct file_operations stack_trace_fops = {
311 .release = seq_release, 313 .release = seq_release,
312}; 314};
313 315
316static int
317stack_trace_filter_open(struct inode *inode, struct file *file)
318{
319 return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
320 inode, file);
321}
322
323static const struct file_operations stack_trace_filter_fops = {
324 .open = stack_trace_filter_open,
325 .read = seq_read,
326 .write = ftrace_filter_write,
327 .llseek = ftrace_regex_lseek,
328 .release = ftrace_regex_release,
329};
330
314int 331int
315stack_trace_sysctl(struct ctl_table *table, int write, 332stack_trace_sysctl(struct ctl_table *table, int write,
316 void __user *buffer, size_t *lenp, 333 void __user *buffer, size_t *lenp,
@@ -338,8 +355,13 @@ stack_trace_sysctl(struct ctl_table *table, int write,
338 return ret; 355 return ret;
339} 356}
340 357
358static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
359
341static __init int enable_stacktrace(char *str) 360static __init int enable_stacktrace(char *str)
342{ 361{
362 if (strncmp(str, "_filter=", 8) == 0)
363 strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
364
343 stack_tracer_enabled = 1; 365 stack_tracer_enabled = 1;
344 last_stack_tracer_enabled = 1; 366 last_stack_tracer_enabled = 1;
345 return 1; 367 return 1;
@@ -358,6 +380,12 @@ static __init int stack_trace_init(void)
358 trace_create_file("stack_trace", 0444, d_tracer, 380 trace_create_file("stack_trace", 0444, d_tracer,
359 NULL, &stack_trace_fops); 381 NULL, &stack_trace_fops);
360 382
383 trace_create_file("stack_trace_filter", 0444, d_tracer,
384 NULL, &stack_trace_filter_fops);
385
386 if (stack_trace_filter_buf[0])
387 ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
388
361 if (stack_tracer_enabled) 389 if (stack_tracer_enabled)
362 register_ftrace_function(&trace_ops); 390 register_ftrace_function(&trace_ops);
363 391
diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
index f40a6af6bf40..54e35c1e5948 100644
--- a/scripts/recordmcount.h
+++ b/scripts/recordmcount.h
@@ -462,7 +462,7 @@ __has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */
462 succeed_file(); 462 succeed_file();
463 } 463 }
464 if (w(txthdr->sh_type) != SHT_PROGBITS || 464 if (w(txthdr->sh_type) != SHT_PROGBITS ||
465 !(w(txthdr->sh_flags) & SHF_EXECINSTR)) 465 !(_w(txthdr->sh_flags) & SHF_EXECINSTR))
466 return NULL; 466 return NULL;
467 return txtname; 467 return txtname;
468} 468}
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 7a527f7e9da9..ddc22525228d 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -21,6 +21,8 @@ EVENT MODIFIERS
21Events can optionally have a modifer by appending a colon and one or 21Events can optionally have a modifer by appending a colon and one or
22more modifiers. Modifiers allow the user to restrict when events are 22more modifiers. Modifiers allow the user to restrict when events are
23counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor. 23counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor.
24Additional modifiers are 'G' for guest counting (in KVM guests) and 'H'
25for host counting (not in KVM guests).
24 26
25The 'p' modifier can be used for specifying how precise the instruction 27The 'p' modifier can be used for specifying how precise the instruction
26address should be. The 'p' modifier is currently only implemented for 28address should be. The 'p' modifier is currently only implemented for
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index c12659d8cb26..1078c5fadd5b 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,4 +1,5 @@
1tools/perf 1tools/perf
2include/linux/const.h
2include/linux/perf_event.h 3include/linux/perf_event.h
3include/linux/rbtree.h 4include/linux/rbtree.h
4include/linux/list.h 5include/linux/list.h
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 214ba7f9f577..806e0a286634 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -235,7 +235,7 @@ out_delete:
235} 235}
236 236
237static const char * const annotate_usage[] = { 237static const char * const annotate_usage[] = {
238 "perf annotate [<options>] <command>", 238 "perf annotate [<options>]",
239 NULL 239 NULL
240}; 240};
241 241
@@ -313,10 +313,5 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
313 annotate.sym_hist_filter = argv[0]; 313 annotate.sym_hist_filter = argv[0];
314 } 314 }
315 315
316 if (field_sep && *field_sep == '.') {
317 pr_err("'.' is the only non valid --field-separator argument\n");
318 return -1;
319 }
320
321 return __cmd_annotate(&annotate); 316 return __cmd_annotate(&annotate);
322} 317}
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index fe1ad8f21961..39104c0beea3 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -108,7 +108,9 @@ static void setup_cpunode_map(void)
108 continue; 108 continue;
109 cpunode_map[cpu] = mem; 109 cpunode_map[cpu] = mem;
110 } 110 }
111 closedir(dir2);
111 } 112 }
113 closedir(dir1);
112} 114}
113 115
114static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, 116static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
@@ -645,6 +647,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg)
645 break; 647 break;
646 if (sort_dimension__add(tok, sort_list) < 0) { 648 if (sort_dimension__add(tok, sort_list) < 0) {
647 error("Unknown --sort key: '%s'", tok); 649 error("Unknown --sort key: '%s'", tok);
650 free(str);
648 return -1; 651 return -1;
649 } 652 }
650 } 653 }
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 032324a76b87..9fc6e0fa3dce 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -22,9 +22,6 @@
22static const char *file_name; 22static const char *file_name;
23static char name_buffer[256]; 23static char name_buffer[256];
24 24
25bool perf_host = 1;
26bool perf_guest;
27
28static const char * const kvm_usage[] = { 25static const char * const kvm_usage[] = {
29 "perf kvm [<options>] {top|record|report|diff|buildid-list}", 26 "perf kvm [<options>] {top|record|report|diff|buildid-list}",
30 NULL 27 NULL
@@ -107,7 +104,8 @@ static int __cmd_buildid_list(int argc, const char **argv)
107 104
108int cmd_kvm(int argc, const char **argv, const char *prefix __used) 105int cmd_kvm(int argc, const char **argv, const char *prefix __used)
109{ 106{
110 perf_host = perf_guest = 0; 107 perf_host = 0;
108 perf_guest = 1;
111 109
112 argc = parse_options(argc, argv, kvm_options, kvm_usage, 110 argc = parse_options(argc, argv, kvm_options, kvm_usage,
113 PARSE_OPT_STOP_AT_NON_OPTION); 111 PARSE_OPT_STOP_AT_NON_OPTION);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index fd1909afcfd6..bb68ddf257b7 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1018,13 +1018,17 @@ static char *get_script_path(const char *script_root, const char *suffix)
1018 __script_root = get_script_root(&script_dirent, suffix); 1018 __script_root = get_script_root(&script_dirent, suffix);
1019 if (__script_root && !strcmp(script_root, __script_root)) { 1019 if (__script_root && !strcmp(script_root, __script_root)) {
1020 free(__script_root); 1020 free(__script_root);
1021 closedir(lang_dir);
1022 closedir(scripts_dir);
1021 snprintf(script_path, MAXPATHLEN, "%s/%s", 1023 snprintf(script_path, MAXPATHLEN, "%s/%s",
1022 lang_path, script_dirent.d_name); 1024 lang_path, script_dirent.d_name);
1023 return strdup(script_path); 1025 return strdup(script_path);
1024 } 1026 }
1025 free(__script_root); 1027 free(__script_root);
1026 } 1028 }
1029 closedir(lang_dir);
1027 } 1030 }
1031 closedir(scripts_dir);
1028 1032
1029 return NULL; 1033 return NULL;
1030} 1034}
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 2b9a7f497a20..3854e869dce1 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -1396,7 +1396,7 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
1396 NULL, 1396 NULL,
1397 }; 1397 };
1398 const struct option test_options[] = { 1398 const struct option test_options[] = {
1399 OPT_INTEGER('v', "verbose", &verbose, 1399 OPT_INCR('v', "verbose", &verbose,
1400 "be more verbose (show symbol address, etc)"), 1400 "be more verbose (show symbol address, etc)"),
1401 OPT_END() 1401 OPT_END()
1402 }; 1402 };
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 4f81eeb99875..8f80df896038 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -235,7 +235,6 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
235 if (he == NULL) 235 if (he == NULL)
236 return NULL; 236 return NULL;
237 237
238 evsel->hists.stats.total_period += sample->period;
239 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 238 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
240 return he; 239 return he;
241} 240}
@@ -889,6 +888,10 @@ try_again:
889 ui__warning("The %s event is not supported.\n", 888 ui__warning("The %s event is not supported.\n",
890 event_name(counter)); 889 event_name(counter));
891 goto out_err; 890 goto out_err;
891 } else if (err == EMFILE) {
892 ui__warning("Too many events are opened.\n"
893 "Try again after reducing the number of events\n");
894 goto out_err;
892 } 895 }
893 896
894 ui__warning("The sys_perf_event_open() syscall " 897 ui__warning("The sys_perf_event_open() syscall "
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index fa1837088ca8..3f16e08a5c8d 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -111,8 +111,11 @@ int perf_evlist__add_default(struct perf_evlist *evlist)
111 .type = PERF_TYPE_HARDWARE, 111 .type = PERF_TYPE_HARDWARE,
112 .config = PERF_COUNT_HW_CPU_CYCLES, 112 .config = PERF_COUNT_HW_CPU_CYCLES,
113 }; 113 };
114 struct perf_evsel *evsel = perf_evsel__new(&attr, 0); 114 struct perf_evsel *evsel;
115
116 event_attr_init(&attr);
115 117
118 evsel = perf_evsel__new(&attr, 0);
116 if (evsel == NULL) 119 if (evsel == NULL)
117 goto error; 120 goto error;
118 121
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index abef2703cd24..6f505d1abac7 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -76,21 +76,21 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
76 } 76 }
77} 77}
78 78
79static void hist_entry__add_cpumode_period(struct hist_entry *self, 79static void hist_entry__add_cpumode_period(struct hist_entry *he,
80 unsigned int cpumode, u64 period) 80 unsigned int cpumode, u64 period)
81{ 81{
82 switch (cpumode) { 82 switch (cpumode) {
83 case PERF_RECORD_MISC_KERNEL: 83 case PERF_RECORD_MISC_KERNEL:
84 self->period_sys += period; 84 he->period_sys += period;
85 break; 85 break;
86 case PERF_RECORD_MISC_USER: 86 case PERF_RECORD_MISC_USER:
87 self->period_us += period; 87 he->period_us += period;
88 break; 88 break;
89 case PERF_RECORD_MISC_GUEST_KERNEL: 89 case PERF_RECORD_MISC_GUEST_KERNEL:
90 self->period_guest_sys += period; 90 he->period_guest_sys += period;
91 break; 91 break;
92 case PERF_RECORD_MISC_GUEST_USER: 92 case PERF_RECORD_MISC_GUEST_USER:
93 self->period_guest_us += period; 93 he->period_guest_us += period;
94 break; 94 break;
95 default: 95 default:
96 break; 96 break;
@@ -165,18 +165,18 @@ void hists__decay_entries_threaded(struct hists *hists,
165static struct hist_entry *hist_entry__new(struct hist_entry *template) 165static struct hist_entry *hist_entry__new(struct hist_entry *template)
166{ 166{
167 size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; 167 size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0;
168 struct hist_entry *self = malloc(sizeof(*self) + callchain_size); 168 struct hist_entry *he = malloc(sizeof(*he) + callchain_size);
169 169
170 if (self != NULL) { 170 if (he != NULL) {
171 *self = *template; 171 *he = *template;
172 self->nr_events = 1; 172 he->nr_events = 1;
173 if (self->ms.map) 173 if (he->ms.map)
174 self->ms.map->referenced = true; 174 he->ms.map->referenced = true;
175 if (symbol_conf.use_callchain) 175 if (symbol_conf.use_callchain)
176 callchain_init(self->callchain); 176 callchain_init(he->callchain);
177 } 177 }
178 178
179 return self; 179 return he;
180} 180}
181 181
182static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h) 182static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h)
@@ -677,15 +677,16 @@ static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
677 return ret; 677 return ret;
678} 678}
679 679
680static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, 680static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
681 u64 total_samples, int left_margin) 681 u64 total_samples, int left_margin,
682 FILE *fp)
682{ 683{
683 struct rb_node *rb_node; 684 struct rb_node *rb_node;
684 struct callchain_node *chain; 685 struct callchain_node *chain;
685 size_t ret = 0; 686 size_t ret = 0;
686 u32 entries_printed = 0; 687 u32 entries_printed = 0;
687 688
688 rb_node = rb_first(&self->sorted_chain); 689 rb_node = rb_first(&he->sorted_chain);
689 while (rb_node) { 690 while (rb_node) {
690 double percent; 691 double percent;
691 692
@@ -730,35 +731,35 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows)
730 } 731 }
731} 732}
732 733
733static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s, 734static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s,
734 size_t size, struct hists *pair_hists, 735 size_t size, struct hists *pair_hists,
735 bool show_displacement, long displacement, 736 bool show_displacement, long displacement,
736 bool color, u64 session_total) 737 bool color, u64 total_period)
737{ 738{
738 u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; 739 u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
739 u64 nr_events; 740 u64 nr_events;
740 const char *sep = symbol_conf.field_sep; 741 const char *sep = symbol_conf.field_sep;
741 int ret; 742 int ret;
742 743
743 if (symbol_conf.exclude_other && !self->parent) 744 if (symbol_conf.exclude_other && !he->parent)
744 return 0; 745 return 0;
745 746
746 if (pair_hists) { 747 if (pair_hists) {
747 period = self->pair ? self->pair->period : 0; 748 period = he->pair ? he->pair->period : 0;
748 nr_events = self->pair ? self->pair->nr_events : 0; 749 nr_events = he->pair ? he->pair->nr_events : 0;
749 total = pair_hists->stats.total_period; 750 total = pair_hists->stats.total_period;
750 period_sys = self->pair ? self->pair->period_sys : 0; 751 period_sys = he->pair ? he->pair->period_sys : 0;
751 period_us = self->pair ? self->pair->period_us : 0; 752 period_us = he->pair ? he->pair->period_us : 0;
752 period_guest_sys = self->pair ? self->pair->period_guest_sys : 0; 753 period_guest_sys = he->pair ? he->pair->period_guest_sys : 0;
753 period_guest_us = self->pair ? self->pair->period_guest_us : 0; 754 period_guest_us = he->pair ? he->pair->period_guest_us : 0;
754 } else { 755 } else {
755 period = self->period; 756 period = he->period;
756 nr_events = self->nr_events; 757 nr_events = he->nr_events;
757 total = session_total; 758 total = total_period;
758 period_sys = self->period_sys; 759 period_sys = he->period_sys;
759 period_us = self->period_us; 760 period_us = he->period_us;
760 period_guest_sys = self->period_guest_sys; 761 period_guest_sys = he->period_guest_sys;
761 period_guest_us = self->period_guest_us; 762 period_guest_us = he->period_guest_us;
762 } 763 }
763 764
764 if (total) { 765 if (total) {
@@ -812,8 +813,8 @@ static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s,
812 813
813 if (total > 0) 814 if (total > 0)
814 old_percent = (period * 100.0) / total; 815 old_percent = (period * 100.0) / total;
815 if (session_total > 0) 816 if (total_period > 0)
816 new_percent = (self->period * 100.0) / session_total; 817 new_percent = (he->period * 100.0) / total_period;
817 818
818 diff = new_percent - old_percent; 819 diff = new_percent - old_percent;
819 820
@@ -862,9 +863,10 @@ int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size,
862 return ret; 863 return ret;
863} 864}
864 865
865int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists, 866static int hist_entry__fprintf(struct hist_entry *he, size_t size,
866 struct hists *pair_hists, bool show_displacement, 867 struct hists *hists, struct hists *pair_hists,
867 long displacement, FILE *fp, u64 session_total) 868 bool show_displacement, long displacement,
869 u64 total_period, FILE *fp)
868{ 870{
869 char bf[512]; 871 char bf[512];
870 int ret; 872 int ret;
@@ -874,14 +876,14 @@ int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
874 876
875 ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists, 877 ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists,
876 show_displacement, displacement, 878 show_displacement, displacement,
877 true, session_total); 879 true, total_period);
878 hist_entry__snprintf(he, bf + ret, size - ret, hists); 880 hist_entry__snprintf(he, bf + ret, size - ret, hists);
879 return fprintf(fp, "%s\n", bf); 881 return fprintf(fp, "%s\n", bf);
880} 882}
881 883
882static size_t hist_entry__fprintf_callchain(struct hist_entry *self, 884static size_t hist_entry__fprintf_callchain(struct hist_entry *he,
883 struct hists *hists, FILE *fp, 885 struct hists *hists,
884 u64 session_total) 886 u64 total_period, FILE *fp)
885{ 887{
886 int left_margin = 0; 888 int left_margin = 0;
887 889
@@ -889,11 +891,10 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self,
889 struct sort_entry *se = list_first_entry(&hist_entry__sort_list, 891 struct sort_entry *se = list_first_entry(&hist_entry__sort_list,
890 typeof(*se), list); 892 typeof(*se), list);
891 left_margin = hists__col_len(hists, se->se_width_idx); 893 left_margin = hists__col_len(hists, se->se_width_idx);
892 left_margin -= thread__comm_len(self->thread); 894 left_margin -= thread__comm_len(he->thread);
893 } 895 }
894 896
895 return hist_entry_callchain__fprintf(fp, self, session_total, 897 return hist_entry_callchain__fprintf(he, total_period, left_margin, fp);
896 left_margin);
897} 898}
898 899
899size_t hists__fprintf(struct hists *hists, struct hists *pair, 900size_t hists__fprintf(struct hists *hists, struct hists *pair,
@@ -903,6 +904,7 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
903 struct sort_entry *se; 904 struct sort_entry *se;
904 struct rb_node *nd; 905 struct rb_node *nd;
905 size_t ret = 0; 906 size_t ret = 0;
907 u64 total_period;
906 unsigned long position = 1; 908 unsigned long position = 1;
907 long displacement = 0; 909 long displacement = 0;
908 unsigned int width; 910 unsigned int width;
@@ -917,20 +919,6 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
917 919
918 fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); 920 fprintf(fp, "# %s", pair ? "Baseline" : "Overhead");
919 921
920 if (symbol_conf.show_nr_samples) {
921 if (sep)
922 fprintf(fp, "%cSamples", *sep);
923 else
924 fputs(" Samples ", fp);
925 }
926
927 if (symbol_conf.show_total_period) {
928 if (sep)
929 ret += fprintf(fp, "%cPeriod", *sep);
930 else
931 ret += fprintf(fp, " Period ");
932 }
933
934 if (symbol_conf.show_cpu_utilization) { 922 if (symbol_conf.show_cpu_utilization) {
935 if (sep) { 923 if (sep) {
936 ret += fprintf(fp, "%csys", *sep); 924 ret += fprintf(fp, "%csys", *sep);
@@ -940,8 +928,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
940 ret += fprintf(fp, "%cguest us", *sep); 928 ret += fprintf(fp, "%cguest us", *sep);
941 } 929 }
942 } else { 930 } else {
943 ret += fprintf(fp, " sys "); 931 ret += fprintf(fp, " sys ");
944 ret += fprintf(fp, " us "); 932 ret += fprintf(fp, " us ");
945 if (perf_guest) { 933 if (perf_guest) {
946 ret += fprintf(fp, " guest sys "); 934 ret += fprintf(fp, " guest sys ");
947 ret += fprintf(fp, " guest us "); 935 ret += fprintf(fp, " guest us ");
@@ -949,6 +937,20 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
949 } 937 }
950 } 938 }
951 939
940 if (symbol_conf.show_nr_samples) {
941 if (sep)
942 fprintf(fp, "%cSamples", *sep);
943 else
944 fputs(" Samples ", fp);
945 }
946
947 if (symbol_conf.show_total_period) {
948 if (sep)
949 ret += fprintf(fp, "%cPeriod", *sep);
950 else
951 ret += fprintf(fp, " Period ");
952 }
953
952 if (pair) { 954 if (pair) {
953 if (sep) 955 if (sep)
954 ret += fprintf(fp, "%cDelta", *sep); 956 ret += fprintf(fp, "%cDelta", *sep);
@@ -993,6 +995,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
993 goto print_entries; 995 goto print_entries;
994 996
995 fprintf(fp, "# ........"); 997 fprintf(fp, "# ........");
998 if (symbol_conf.show_cpu_utilization)
999 fprintf(fp, " ....... .......");
996 if (symbol_conf.show_nr_samples) 1000 if (symbol_conf.show_nr_samples)
997 fprintf(fp, " .........."); 1001 fprintf(fp, " ..........");
998 if (symbol_conf.show_total_period) 1002 if (symbol_conf.show_total_period)
@@ -1025,6 +1029,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair,
1025 goto out; 1029 goto out;
1026 1030
1027print_entries: 1031print_entries:
1032 total_period = hists->stats.total_period;
1033
1028 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { 1034 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
1029 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 1035 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
1030 1036
@@ -1040,11 +1046,10 @@ print_entries:
1040 ++position; 1046 ++position;
1041 } 1047 }
1042 ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, 1048 ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement,
1043 displacement, fp, hists->stats.total_period); 1049 displacement, total_period, fp);
1044 1050
1045 if (symbol_conf.use_callchain) 1051 if (symbol_conf.use_callchain)
1046 ret += hist_entry__fprintf_callchain(h, hists, fp, 1052 ret += hist_entry__fprintf_callchain(h, hists, total_period, fp);
1047 hists->stats.total_period);
1048 if (max_rows && ++nr_rows >= max_rows) 1053 if (max_rows && ++nr_rows >= max_rows)
1049 goto out; 1054 goto out;
1050 1055
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ff6f9d56ea41..f55f0a8d1f81 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -66,11 +66,8 @@ struct hists {
66struct hist_entry *__hists__add_entry(struct hists *self, 66struct hist_entry *__hists__add_entry(struct hists *self,
67 struct addr_location *al, 67 struct addr_location *al,
68 struct symbol *parent, u64 period); 68 struct symbol *parent, u64 period);
69extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); 69int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
70extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); 70int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
71int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists,
72 struct hists *pair_hists, bool show_displacement,
73 long displacement, FILE *fp, u64 session_total);
74int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, 71int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size,
75 struct hists *hists); 72 struct hists *hists);
76void hist_entry__free(struct hist_entry *); 73void hist_entry__free(struct hist_entry *);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 531c283fc0c5..b029296d20d9 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -735,8 +735,8 @@ static int
735parse_event_modifier(const char **strp, struct perf_event_attr *attr) 735parse_event_modifier(const char **strp, struct perf_event_attr *attr)
736{ 736{
737 const char *str = *strp; 737 const char *str = *strp;
738 int exclude = 0; 738 int exclude = 0, exclude_GH = 0;
739 int eu = 0, ek = 0, eh = 0, precise = 0; 739 int eu = 0, ek = 0, eh = 0, eH = 0, eG = 0, precise = 0;
740 740
741 if (!*str) 741 if (!*str)
742 return 0; 742 return 0;
@@ -760,6 +760,14 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
760 if (!exclude) 760 if (!exclude)
761 exclude = eu = ek = eh = 1; 761 exclude = eu = ek = eh = 1;
762 eh = 0; 762 eh = 0;
763 } else if (*str == 'G') {
764 if (!exclude_GH)
765 exclude_GH = eG = eH = 1;
766 eG = 0;
767 } else if (*str == 'H') {
768 if (!exclude_GH)
769 exclude_GH = eG = eH = 1;
770 eH = 0;
763 } else if (*str == 'p') { 771 } else if (*str == 'p') {
764 precise++; 772 precise++;
765 } else 773 } else
@@ -776,6 +784,8 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
776 attr->exclude_kernel = ek; 784 attr->exclude_kernel = ek;
777 attr->exclude_hv = eh; 785 attr->exclude_hv = eh;
778 attr->precise_ip = precise; 786 attr->precise_ip = precise;
787 attr->exclude_host = eH;
788 attr->exclude_guest = eG;
779 789
780 return 0; 790 return 0;
781} 791}
@@ -838,6 +848,7 @@ int parse_events(struct perf_evlist *evlist , const char *str, int unset __used)
838 for (;;) { 848 for (;;) {
839 ostr = str; 849 ostr = str;
840 memset(&attr, 0, sizeof(attr)); 850 memset(&attr, 0, sizeof(attr));
851 event_attr_init(&attr);
841 ret = parse_event_symbols(evlist, &str, &attr); 852 ret = parse_event_symbols(evlist, &str, &attr);
842 if (ret == EVT_FAILED) 853 if (ret == EVT_FAILED)
843 return -1; 854 return -1;
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index ac6830d8292b..fc22cf5c605f 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -18,7 +18,6 @@
18 * 18 *
19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 19 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
20 */ 20 */
21#include <ctype.h>
22#include "util.h" 21#include "util.h"
23#include <dirent.h> 22#include <dirent.h>
24#include <mntent.h> 23#include <mntent.h>
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 5b3ea49aa63e..813141047fc2 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -1,6 +1,21 @@
1#include "../perf.h"
1#include "util.h" 2#include "util.h"
2#include <sys/mman.h> 3#include <sys/mman.h>
3 4
5/*
6 * XXX We need to find a better place for these things...
7 */
8bool perf_host = true;
9bool perf_guest = true;
10
11void event_attr_init(struct perf_event_attr *attr)
12{
13 if (!perf_host)
14 attr->exclude_host = 1;
15 if (!perf_guest)
16 attr->exclude_guest = 1;
17}
18
4int mkdir_p(char *path, mode_t mode) 19int mkdir_p(char *path, mode_t mode)
5{ 20{
6 struct stat st; 21 struct stat st;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 37be34dff798..b9c530cce79a 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -242,6 +242,10 @@ int strtailcmp(const char *s1, const char *s2);
242unsigned long convert_unit(unsigned long value, char *unit); 242unsigned long convert_unit(unsigned long value, char *unit);
243int readn(int fd, void *buf, size_t size); 243int readn(int fd, void *buf, size_t size);
244 244
245struct perf_event_attr;
246
247void event_attr_init(struct perf_event_attr *attr);
248
245#define _STR(x) #x 249#define _STR(x) #x
246#define STR(x) _STR(x) 250#define STR(x) _STR(x)
247 251