diff options
29 files changed, 1262 insertions, 468 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index eb93fd0ec734..b29f3c416296 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -2475,6 +2475,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2475 | stacktrace [FTRACE] | 2475 | stacktrace [FTRACE] |
2476 | Enabled the stack tracer on boot up. | 2476 | Enabled the stack tracer on boot up. |
2477 | 2477 | ||
2478 | stacktrace_filter=[function-list] | ||
2479 | [FTRACE] Limit the functions that the stack tracer | ||
2480 | will trace at boot up. function-list is a comma separated | ||
2481 | list of functions. This list can be changed at run | ||
2482 | time by the stack_trace_filter file in the debugfs | ||
2483 | tracing directory. Note, this enables stack tracing | ||
2484 | and the stacktrace above is not needed. | ||
2485 | |||
2478 | sti= [PARISC,HW] | 2486 | sti= [PARISC,HW] |
2479 | Format: <num> | 2487 | Format: <num> |
2480 | Set the STI (builtin display/keyboard on the HP-PARISC | 2488 | Set the STI (builtin display/keyboard on the HP-PARISC |
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 078ad0caefc6..b903d5ea3941 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h | |||
@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump); | |||
101 | 101 | ||
102 | extern void hw_breakpoint_restore(void); | 102 | extern void hw_breakpoint_restore(void); |
103 | 103 | ||
104 | #ifdef CONFIG_X86_64 | ||
105 | DECLARE_PER_CPU(int, debug_stack_usage); | ||
106 | static inline void debug_stack_usage_inc(void) | ||
107 | { | ||
108 | __get_cpu_var(debug_stack_usage)++; | ||
109 | } | ||
110 | static inline void debug_stack_usage_dec(void) | ||
111 | { | ||
112 | __get_cpu_var(debug_stack_usage)--; | ||
113 | } | ||
114 | int is_debug_stack(unsigned long addr); | ||
115 | void debug_stack_set_zero(void); | ||
116 | void debug_stack_reset(void); | ||
117 | #else /* !X86_64 */ | ||
118 | static inline int is_debug_stack(unsigned long addr) { return 0; } | ||
119 | static inline void debug_stack_set_zero(void) { } | ||
120 | static inline void debug_stack_reset(void) { } | ||
121 | static inline void debug_stack_usage_inc(void) { } | ||
122 | static inline void debug_stack_usage_dec(void) { } | ||
123 | #endif /* X86_64 */ | ||
124 | |||
125 | |||
104 | #endif /* __KERNEL__ */ | 126 | #endif /* __KERNEL__ */ |
105 | 127 | ||
106 | #endif /* _ASM_X86_DEBUGREG_H */ | 128 | #endif /* _ASM_X86_DEBUGREG_H */ |
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 41935fadfdfc..e95822d683f4 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in | |||
35 | 35 | ||
36 | extern struct desc_ptr idt_descr; | 36 | extern struct desc_ptr idt_descr; |
37 | extern gate_desc idt_table[]; | 37 | extern gate_desc idt_table[]; |
38 | extern struct desc_ptr nmi_idt_descr; | ||
39 | extern gate_desc nmi_idt_table[]; | ||
38 | 40 | ||
39 | struct gdt_page { | 41 | struct gdt_page { |
40 | struct desc_struct gdt[GDT_ENTRIES]; | 42 | struct desc_struct gdt[GDT_ENTRIES]; |
@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) | |||
307 | desc->limit = (limit >> 16) & 0xf; | 309 | desc->limit = (limit >> 16) & 0xf; |
308 | } | 310 | } |
309 | 311 | ||
312 | #ifdef CONFIG_X86_64 | ||
313 | static inline void set_nmi_gate(int gate, void *addr) | ||
314 | { | ||
315 | gate_desc s; | ||
316 | |||
317 | pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); | ||
318 | write_idt_entry(nmi_idt_table, gate, &s); | ||
319 | } | ||
320 | #endif | ||
321 | |||
310 | static inline void _set_gate(int gate, unsigned type, void *addr, | 322 | static inline void _set_gate(int gate, unsigned type, void *addr, |
311 | unsigned dpl, unsigned ist, unsigned seg) | 323 | unsigned dpl, unsigned ist, unsigned seg) |
312 | { | 324 | { |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 850f2963a420..d43cad74f166 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1021,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid); | |||
1021 | 1021 | ||
1022 | #ifdef CONFIG_X86_64 | 1022 | #ifdef CONFIG_X86_64 |
1023 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; | 1023 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; |
1024 | struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, | ||
1025 | (unsigned long) nmi_idt_table }; | ||
1024 | 1026 | ||
1025 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 1027 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
1026 | irq_stack_union) __aligned(PAGE_SIZE); | 1028 | irq_stack_union) __aligned(PAGE_SIZE); |
@@ -1085,6 +1087,26 @@ unsigned long kernel_eflags; | |||
1085 | */ | 1087 | */ |
1086 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | 1088 | DEFINE_PER_CPU(struct orig_ist, orig_ist); |
1087 | 1089 | ||
1090 | static DEFINE_PER_CPU(unsigned long, debug_stack_addr); | ||
1091 | DEFINE_PER_CPU(int, debug_stack_usage); | ||
1092 | |||
1093 | int is_debug_stack(unsigned long addr) | ||
1094 | { | ||
1095 | return __get_cpu_var(debug_stack_usage) || | ||
1096 | (addr <= __get_cpu_var(debug_stack_addr) && | ||
1097 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); | ||
1098 | } | ||
1099 | |||
1100 | void debug_stack_set_zero(void) | ||
1101 | { | ||
1102 | load_idt((const struct desc_ptr *)&nmi_idt_descr); | ||
1103 | } | ||
1104 | |||
1105 | void debug_stack_reset(void) | ||
1106 | { | ||
1107 | load_idt((const struct desc_ptr *)&idt_descr); | ||
1108 | } | ||
1109 | |||
1088 | #else /* CONFIG_X86_64 */ | 1110 | #else /* CONFIG_X86_64 */ |
1089 | 1111 | ||
1090 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | 1112 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; |
@@ -1212,6 +1234,8 @@ void __cpuinit cpu_init(void) | |||
1212 | estacks += exception_stack_sizes[v]; | 1234 | estacks += exception_stack_sizes[v]; |
1213 | oist->ist[v] = t->x86_tss.ist[v] = | 1235 | oist->ist[v] = t->x86_tss.ist[v] = |
1214 | (unsigned long)estacks; | 1236 | (unsigned long)estacks; |
1237 | if (v == DEBUG_STACK-1) | ||
1238 | per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; | ||
1215 | } | 1239 | } |
1216 | } | 1240 | } |
1217 | 1241 | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a20e1cb9dc87..940ba711fc28 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1480,62 +1480,214 @@ ENTRY(error_exit) | |||
1480 | CFI_ENDPROC | 1480 | CFI_ENDPROC |
1481 | END(error_exit) | 1481 | END(error_exit) |
1482 | 1482 | ||
1483 | /* | ||
1484 | * Test if a given stack is an NMI stack or not. | ||
1485 | */ | ||
1486 | .macro test_in_nmi reg stack nmi_ret normal_ret | ||
1487 | cmpq %\reg, \stack | ||
1488 | ja \normal_ret | ||
1489 | subq $EXCEPTION_STKSZ, %\reg | ||
1490 | cmpq %\reg, \stack | ||
1491 | jb \normal_ret | ||
1492 | jmp \nmi_ret | ||
1493 | .endm | ||
1483 | 1494 | ||
1484 | /* runs on exception stack */ | 1495 | /* runs on exception stack */ |
1485 | ENTRY(nmi) | 1496 | ENTRY(nmi) |
1486 | INTR_FRAME | 1497 | INTR_FRAME |
1487 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1498 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1488 | pushq_cfi $-1 | 1499 | /* |
1500 | * We allow breakpoints in NMIs. If a breakpoint occurs, then | ||
1501 | * the iretq it performs will take us out of NMI context. | ||
1502 | * This means that we can have nested NMIs where the next | ||
1503 | * NMI is using the top of the stack of the previous NMI. We | ||
1504 | * can't let it execute because the nested NMI will corrupt the | ||
1505 | * stack of the previous NMI. NMI handlers are not re-entrant | ||
1506 | * anyway. | ||
1507 | * | ||
1508 | * To handle this case we do the following: | ||
1509 | * Check the a special location on the stack that contains | ||
1510 | * a variable that is set when NMIs are executing. | ||
1511 | * The interrupted task's stack is also checked to see if it | ||
1512 | * is an NMI stack. | ||
1513 | * If the variable is not set and the stack is not the NMI | ||
1514 | * stack then: | ||
1515 | * o Set the special variable on the stack | ||
1516 | * o Copy the interrupt frame into a "saved" location on the stack | ||
1517 | * o Copy the interrupt frame into a "copy" location on the stack | ||
1518 | * o Continue processing the NMI | ||
1519 | * If the variable is set or the previous stack is the NMI stack: | ||
1520 | * o Modify the "copy" location to jump to the repeate_nmi | ||
1521 | * o return back to the first NMI | ||
1522 | * | ||
1523 | * Now on exit of the first NMI, we first clear the stack variable | ||
1524 | * The NMI stack will tell any nested NMIs at that point that it is | ||
1525 | * nested. Then we pop the stack normally with iret, and if there was | ||
1526 | * a nested NMI that updated the copy interrupt stack frame, a | ||
1527 | * jump will be made to the repeat_nmi code that will handle the second | ||
1528 | * NMI. | ||
1529 | */ | ||
1530 | |||
1531 | /* Use %rdx as out temp variable throughout */ | ||
1532 | pushq_cfi %rdx | ||
1533 | |||
1534 | /* | ||
1535 | * Check the special variable on the stack to see if NMIs are | ||
1536 | * executing. | ||
1537 | */ | ||
1538 | cmp $1, -8(%rsp) | ||
1539 | je nested_nmi | ||
1540 | |||
1541 | /* | ||
1542 | * Now test if the previous stack was an NMI stack. | ||
1543 | * We need the double check. We check the NMI stack to satisfy the | ||
1544 | * race when the first NMI clears the variable before returning. | ||
1545 | * We check the variable because the first NMI could be in a | ||
1546 | * breakpoint routine using a breakpoint stack. | ||
1547 | */ | ||
1548 | lea 6*8(%rsp), %rdx | ||
1549 | test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi | ||
1550 | |||
1551 | nested_nmi: | ||
1552 | /* | ||
1553 | * Do nothing if we interrupted the fixup in repeat_nmi. | ||
1554 | * It's about to repeat the NMI handler, so we are fine | ||
1555 | * with ignoring this one. | ||
1556 | */ | ||
1557 | movq $repeat_nmi, %rdx | ||
1558 | cmpq 8(%rsp), %rdx | ||
1559 | ja 1f | ||
1560 | movq $end_repeat_nmi, %rdx | ||
1561 | cmpq 8(%rsp), %rdx | ||
1562 | ja nested_nmi_out | ||
1563 | |||
1564 | 1: | ||
1565 | /* Set up the interrupted NMIs stack to jump to repeat_nmi */ | ||
1566 | leaq -6*8(%rsp), %rdx | ||
1567 | movq %rdx, %rsp | ||
1568 | CFI_ADJUST_CFA_OFFSET 6*8 | ||
1569 | pushq_cfi $__KERNEL_DS | ||
1570 | pushq_cfi %rdx | ||
1571 | pushfq_cfi | ||
1572 | pushq_cfi $__KERNEL_CS | ||
1573 | pushq_cfi $repeat_nmi | ||
1574 | |||
1575 | /* Put stack back */ | ||
1576 | addq $(11*8), %rsp | ||
1577 | CFI_ADJUST_CFA_OFFSET -11*8 | ||
1578 | |||
1579 | nested_nmi_out: | ||
1580 | popq_cfi %rdx | ||
1581 | |||
1582 | /* No need to check faults here */ | ||
1583 | INTERRUPT_RETURN | ||
1584 | |||
1585 | first_nmi: | ||
1586 | /* | ||
1587 | * Because nested NMIs will use the pushed location that we | ||
1588 | * stored in rdx, we must keep that space available. | ||
1589 | * Here's what our stack frame will look like: | ||
1590 | * +-------------------------+ | ||
1591 | * | original SS | | ||
1592 | * | original Return RSP | | ||
1593 | * | original RFLAGS | | ||
1594 | * | original CS | | ||
1595 | * | original RIP | | ||
1596 | * +-------------------------+ | ||
1597 | * | temp storage for rdx | | ||
1598 | * +-------------------------+ | ||
1599 | * | NMI executing variable | | ||
1600 | * +-------------------------+ | ||
1601 | * | Saved SS | | ||
1602 | * | Saved Return RSP | | ||
1603 | * | Saved RFLAGS | | ||
1604 | * | Saved CS | | ||
1605 | * | Saved RIP | | ||
1606 | * +-------------------------+ | ||
1607 | * | copied SS | | ||
1608 | * | copied Return RSP | | ||
1609 | * | copied RFLAGS | | ||
1610 | * | copied CS | | ||
1611 | * | copied RIP | | ||
1612 | * +-------------------------+ | ||
1613 | * | pt_regs | | ||
1614 | * +-------------------------+ | ||
1615 | * | ||
1616 | * The saved RIP is used to fix up the copied RIP that a nested | ||
1617 | * NMI may zero out. The original stack frame and the temp storage | ||
1618 | * is also used by nested NMIs and can not be trusted on exit. | ||
1619 | */ | ||
1620 | /* Set the NMI executing variable on the stack. */ | ||
1621 | pushq_cfi $1 | ||
1622 | |||
1623 | /* Copy the stack frame to the Saved frame */ | ||
1624 | .rept 5 | ||
1625 | pushq_cfi 6*8(%rsp) | ||
1626 | .endr | ||
1627 | |||
1628 | /* Make another copy, this one may be modified by nested NMIs */ | ||
1629 | .rept 5 | ||
1630 | pushq_cfi 4*8(%rsp) | ||
1631 | .endr | ||
1632 | |||
1633 | /* Do not pop rdx, nested NMIs will corrupt it */ | ||
1634 | movq 11*8(%rsp), %rdx | ||
1635 | |||
1636 | /* | ||
1637 | * Everything below this point can be preempted by a nested | ||
1638 | * NMI if the first NMI took an exception. Repeated NMIs | ||
1639 | * caused by an exception and nested NMI will start here, and | ||
1640 | * can still be preempted by another NMI. | ||
1641 | */ | ||
1642 | restart_nmi: | ||
1643 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | ||
1489 | subq $ORIG_RAX-R15, %rsp | 1644 | subq $ORIG_RAX-R15, %rsp |
1490 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1645 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1646 | /* | ||
1647 | * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit | ||
1648 | * as we should not be calling schedule in NMI context. | ||
1649 | * Even with normal interrupts enabled. An NMI should not be | ||
1650 | * setting NEED_RESCHED or anything that normal interrupts and | ||
1651 | * exceptions might do. | ||
1652 | */ | ||
1491 | call save_paranoid | 1653 | call save_paranoid |
1492 | DEFAULT_FRAME 0 | 1654 | DEFAULT_FRAME 0 |
1493 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | 1655 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
1494 | movq %rsp,%rdi | 1656 | movq %rsp,%rdi |
1495 | movq $-1,%rsi | 1657 | movq $-1,%rsi |
1496 | call do_nmi | 1658 | call do_nmi |
1497 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1498 | /* paranoidexit; without TRACE_IRQS_OFF */ | ||
1499 | /* ebx: no swapgs flag */ | ||
1500 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1501 | testl %ebx,%ebx /* swapgs needed? */ | 1659 | testl %ebx,%ebx /* swapgs needed? */ |
1502 | jnz nmi_restore | 1660 | jnz nmi_restore |
1503 | testl $3,CS(%rsp) | ||
1504 | jnz nmi_userspace | ||
1505 | nmi_swapgs: | 1661 | nmi_swapgs: |
1506 | SWAPGS_UNSAFE_STACK | 1662 | SWAPGS_UNSAFE_STACK |
1507 | nmi_restore: | 1663 | nmi_restore: |
1508 | RESTORE_ALL 8 | 1664 | RESTORE_ALL 8 |
1665 | /* Clear the NMI executing stack variable */ | ||
1666 | movq $0, 10*8(%rsp) | ||
1509 | jmp irq_return | 1667 | jmp irq_return |
1510 | nmi_userspace: | ||
1511 | GET_THREAD_INFO(%rcx) | ||
1512 | movl TI_flags(%rcx),%ebx | ||
1513 | andl $_TIF_WORK_MASK,%ebx | ||
1514 | jz nmi_swapgs | ||
1515 | movq %rsp,%rdi /* &pt_regs */ | ||
1516 | call sync_regs | ||
1517 | movq %rax,%rsp /* switch stack for scheduling */ | ||
1518 | testl $_TIF_NEED_RESCHED,%ebx | ||
1519 | jnz nmi_schedule | ||
1520 | movl %ebx,%edx /* arg3: thread flags */ | ||
1521 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
1522 | xorl %esi,%esi /* arg2: oldset */ | ||
1523 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
1524 | call do_notify_resume | ||
1525 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1526 | jmp nmi_userspace | ||
1527 | nmi_schedule: | ||
1528 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
1529 | call schedule | ||
1530 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
1531 | jmp nmi_userspace | ||
1532 | CFI_ENDPROC | ||
1533 | #else | ||
1534 | jmp paranoid_exit | ||
1535 | CFI_ENDPROC | 1668 | CFI_ENDPROC |
1536 | #endif | ||
1537 | END(nmi) | 1669 | END(nmi) |
1538 | 1670 | ||
1671 | /* | ||
1672 | * If an NMI hit an iret because of an exception or breakpoint, | ||
1673 | * it can lose its NMI context, and a nested NMI may come in. | ||
1674 | * In that case, the nested NMI will change the preempted NMI's | ||
1675 | * stack to jump to here when it does the final iret. | ||
1676 | */ | ||
1677 | repeat_nmi: | ||
1678 | INTR_FRAME | ||
1679 | /* Update the stack variable to say we are still in NMI */ | ||
1680 | movq $1, 5*8(%rsp) | ||
1681 | |||
1682 | /* copy the saved stack back to copy stack */ | ||
1683 | .rept 5 | ||
1684 | pushq_cfi 4*8(%rsp) | ||
1685 | .endr | ||
1686 | |||
1687 | jmp restart_nmi | ||
1688 | CFI_ENDPROC | ||
1689 | end_repeat_nmi: | ||
1690 | |||
1539 | ENTRY(ignore_sysret) | 1691 | ENTRY(ignore_sysret) |
1540 | CFI_STARTPROC | 1692 | CFI_STARTPROC |
1541 | mov $-ENOSYS,%eax | 1693 | mov $-ENOSYS,%eax |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e11e39478a49..40f4eb3766d1 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -417,6 +417,10 @@ ENTRY(phys_base) | |||
417 | ENTRY(idt_table) | 417 | ENTRY(idt_table) |
418 | .skip IDT_ENTRIES * 16 | 418 | .skip IDT_ENTRIES * 16 |
419 | 419 | ||
420 | .align L1_CACHE_BYTES | ||
421 | ENTRY(nmi_idt_table) | ||
422 | .skip IDT_ENTRIES * 16 | ||
423 | |||
420 | __PAGE_ALIGNED_BSS | 424 | __PAGE_ALIGNED_BSS |
421 | .align PAGE_SIZE | 425 | .align PAGE_SIZE |
422 | ENTRY(empty_zero_page) | 426 | ENTRY(empty_zero_page) |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e88f37b58ddd..47acaf319165 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
405 | unknown_nmi_error(reason, regs); | 405 | unknown_nmi_error(reason, regs); |
406 | } | 406 | } |
407 | 407 | ||
408 | /* | ||
409 | * NMIs can hit breakpoints which will cause it to lose its | ||
410 | * NMI context with the CPU when the breakpoint does an iret. | ||
411 | */ | ||
412 | #ifdef CONFIG_X86_32 | ||
413 | /* | ||
414 | * For i386, NMIs use the same stack as the kernel, and we can | ||
415 | * add a workaround to the iret problem in C. Simply have 3 states | ||
416 | * the NMI can be in. | ||
417 | * | ||
418 | * 1) not running | ||
419 | * 2) executing | ||
420 | * 3) latched | ||
421 | * | ||
422 | * When no NMI is in progress, it is in the "not running" state. | ||
423 | * When an NMI comes in, it goes into the "executing" state. | ||
424 | * Normally, if another NMI is triggered, it does not interrupt | ||
425 | * the running NMI and the HW will simply latch it so that when | ||
426 | * the first NMI finishes, it will restart the second NMI. | ||
427 | * (Note, the latch is binary, thus multiple NMIs triggering, | ||
428 | * when one is running, are ignored. Only one NMI is restarted.) | ||
429 | * | ||
430 | * If an NMI hits a breakpoint that executes an iret, another | ||
431 | * NMI can preempt it. We do not want to allow this new NMI | ||
432 | * to run, but we want to execute it when the first one finishes. | ||
433 | * We set the state to "latched", and the first NMI will perform | ||
434 | * an cmpxchg on the state, and if it doesn't successfully | ||
435 | * reset the state to "not running" it will restart the next | ||
436 | * NMI. | ||
437 | */ | ||
438 | enum nmi_states { | ||
439 | NMI_NOT_RUNNING, | ||
440 | NMI_EXECUTING, | ||
441 | NMI_LATCHED, | ||
442 | }; | ||
443 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); | ||
444 | |||
445 | #define nmi_nesting_preprocess(regs) \ | ||
446 | do { \ | ||
447 | if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ | ||
448 | __get_cpu_var(nmi_state) = NMI_LATCHED; \ | ||
449 | return; \ | ||
450 | } \ | ||
451 | nmi_restart: \ | ||
452 | __get_cpu_var(nmi_state) = NMI_EXECUTING; \ | ||
453 | } while (0) | ||
454 | |||
455 | #define nmi_nesting_postprocess() \ | ||
456 | do { \ | ||
457 | if (cmpxchg(&__get_cpu_var(nmi_state), \ | ||
458 | NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ | ||
459 | goto nmi_restart; \ | ||
460 | } while (0) | ||
461 | #else /* x86_64 */ | ||
462 | /* | ||
463 | * In x86_64 things are a bit more difficult. This has the same problem | ||
464 | * where an NMI hitting a breakpoint that calls iret will remove the | ||
465 | * NMI context, allowing a nested NMI to enter. What makes this more | ||
466 | * difficult is that both NMIs and breakpoints have their own stack. | ||
467 | * When a new NMI or breakpoint is executed, the stack is set to a fixed | ||
468 | * point. If an NMI is nested, it will have its stack set at that same | ||
469 | * fixed address that the first NMI had, and will start corrupting the | ||
470 | * stack. This is handled in entry_64.S, but the same problem exists with | ||
471 | * the breakpoint stack. | ||
472 | * | ||
473 | * If a breakpoint is being processed, and the debug stack is being used, | ||
474 | * if an NMI comes in and also hits a breakpoint, the stack pointer | ||
475 | * will be set to the same fixed address as the breakpoint that was | ||
476 | * interrupted, causing that stack to be corrupted. To handle this case, | ||
477 | * check if the stack that was interrupted is the debug stack, and if | ||
478 | * so, change the IDT so that new breakpoints will use the current stack | ||
479 | * and not switch to the fixed address. On return of the NMI, switch back | ||
480 | * to the original IDT. | ||
481 | */ | ||
482 | static DEFINE_PER_CPU(int, update_debug_stack); | ||
483 | |||
484 | static inline void nmi_nesting_preprocess(struct pt_regs *regs) | ||
485 | { | ||
486 | /* | ||
487 | * If we interrupted a breakpoint, it is possible that | ||
488 | * the nmi handler will have breakpoints too. We need to | ||
489 | * change the IDT such that breakpoints that happen here | ||
490 | * continue to use the NMI stack. | ||
491 | */ | ||
492 | if (unlikely(is_debug_stack(regs->sp))) { | ||
493 | debug_stack_set_zero(); | ||
494 | __get_cpu_var(update_debug_stack) = 1; | ||
495 | } | ||
496 | } | ||
497 | |||
498 | static inline void nmi_nesting_postprocess(void) | ||
499 | { | ||
500 | if (unlikely(__get_cpu_var(update_debug_stack))) | ||
501 | debug_stack_reset(); | ||
502 | } | ||
503 | #endif | ||
504 | |||
408 | dotraplinkage notrace __kprobes void | 505 | dotraplinkage notrace __kprobes void |
409 | do_nmi(struct pt_regs *regs, long error_code) | 506 | do_nmi(struct pt_regs *regs, long error_code) |
410 | { | 507 | { |
508 | nmi_nesting_preprocess(regs); | ||
509 | |||
411 | nmi_enter(); | 510 | nmi_enter(); |
412 | 511 | ||
413 | inc_irq_stat(__nmi_count); | 512 | inc_irq_stat(__nmi_count); |
@@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code) | |||
416 | default_do_nmi(regs); | 515 | default_do_nmi(regs); |
417 | 516 | ||
418 | nmi_exit(); | 517 | nmi_exit(); |
518 | |||
519 | /* On i386, may loop back to preprocess */ | ||
520 | nmi_nesting_postprocess(); | ||
419 | } | 521 | } |
420 | 522 | ||
421 | void stop_nmi(void) | 523 | void stop_nmi(void) |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index fa1191fb679d..482ec3af2067 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -311,9 +311,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | |||
311 | == NOTIFY_STOP) | 311 | == NOTIFY_STOP) |
312 | return; | 312 | return; |
313 | 313 | ||
314 | /* | ||
315 | * Let others (NMI) know that the debug stack is in use | ||
316 | * as we may switch to the interrupt stack. | ||
317 | */ | ||
318 | debug_stack_usage_inc(); | ||
314 | preempt_conditional_sti(regs); | 319 | preempt_conditional_sti(regs); |
315 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | 320 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); |
316 | preempt_conditional_cli(regs); | 321 | preempt_conditional_cli(regs); |
322 | debug_stack_usage_dec(); | ||
317 | } | 323 | } |
318 | 324 | ||
319 | #ifdef CONFIG_X86_64 | 325 | #ifdef CONFIG_X86_64 |
@@ -406,6 +412,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
406 | SIGTRAP) == NOTIFY_STOP) | 412 | SIGTRAP) == NOTIFY_STOP) |
407 | return; | 413 | return; |
408 | 414 | ||
415 | /* | ||
416 | * Let others (NMI) know that the debug stack is in use | ||
417 | * as we may switch to the interrupt stack. | ||
418 | */ | ||
419 | debug_stack_usage_inc(); | ||
420 | |||
409 | /* It's safe to allow irq's after DR6 has been saved */ | 421 | /* It's safe to allow irq's after DR6 has been saved */ |
410 | preempt_conditional_sti(regs); | 422 | preempt_conditional_sti(regs); |
411 | 423 | ||
@@ -413,6 +425,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
413 | handle_vm86_trap((struct kernel_vm86_regs *) regs, | 425 | handle_vm86_trap((struct kernel_vm86_regs *) regs, |
414 | error_code, 1); | 426 | error_code, 1); |
415 | preempt_conditional_cli(regs); | 427 | preempt_conditional_cli(regs); |
428 | debug_stack_usage_dec(); | ||
416 | return; | 429 | return; |
417 | } | 430 | } |
418 | 431 | ||
@@ -432,6 +445,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
432 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) | 445 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) |
433 | send_sigtrap(tsk, regs, error_code, si_code); | 446 | send_sigtrap(tsk, regs, error_code, si_code); |
434 | preempt_conditional_cli(regs); | 447 | preempt_conditional_cli(regs); |
448 | debug_stack_usage_dec(); | ||
435 | 449 | ||
436 | return; | 450 | return; |
437 | } | 451 | } |
@@ -718,4 +732,10 @@ void __init trap_init(void) | |||
718 | cpu_init(); | 732 | cpu_init(); |
719 | 733 | ||
720 | x86_init.irqs.trap_init(); | 734 | x86_init.irqs.trap_init(); |
735 | |||
736 | #ifdef CONFIG_X86_64 | ||
737 | memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16); | ||
738 | set_nmi_gate(1, &debug); | ||
739 | set_nmi_gate(3, &int3); | ||
740 | #endif | ||
721 | } | 741 | } |
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 59e4028e833d..3fd17c249221 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h | |||
@@ -50,6 +50,11 @@ | |||
50 | # define inline inline __attribute__((always_inline)) | 50 | # define inline inline __attribute__((always_inline)) |
51 | # define __inline__ __inline__ __attribute__((always_inline)) | 51 | # define __inline__ __inline__ __attribute__((always_inline)) |
52 | # define __inline __inline __attribute__((always_inline)) | 52 | # define __inline __inline __attribute__((always_inline)) |
53 | #else | ||
54 | /* A lot of inline functions can cause havoc with function tracing */ | ||
55 | # define inline inline notrace | ||
56 | # define __inline__ __inline__ notrace | ||
57 | # define __inline __inline notrace | ||
53 | #endif | 58 | #endif |
54 | 59 | ||
55 | #define __deprecated __attribute__((deprecated)) | 60 | #define __deprecated __attribute__((deprecated)) |
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 26eafcef75be..028e26f0bf08 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h | |||
@@ -133,6 +133,8 @@ struct ftrace_func_command { | |||
133 | int ftrace_arch_code_modify_prepare(void); | 133 | int ftrace_arch_code_modify_prepare(void); |
134 | int ftrace_arch_code_modify_post_process(void); | 134 | int ftrace_arch_code_modify_post_process(void); |
135 | 135 | ||
136 | void ftrace_bug(int err, unsigned long ip); | ||
137 | |||
136 | struct seq_file; | 138 | struct seq_file; |
137 | 139 | ||
138 | struct ftrace_probe_ops { | 140 | struct ftrace_probe_ops { |
@@ -161,7 +163,6 @@ extern int ftrace_text_reserved(void *start, void *end); | |||
161 | 163 | ||
162 | enum { | 164 | enum { |
163 | FTRACE_FL_ENABLED = (1 << 30), | 165 | FTRACE_FL_ENABLED = (1 << 30), |
164 | FTRACE_FL_FREE = (1 << 31), | ||
165 | }; | 166 | }; |
166 | 167 | ||
167 | #define FTRACE_FL_MASK (0x3UL << 30) | 168 | #define FTRACE_FL_MASK (0x3UL << 30) |
@@ -172,10 +173,7 @@ struct dyn_ftrace { | |||
172 | unsigned long ip; /* address of mcount call-site */ | 173 | unsigned long ip; /* address of mcount call-site */ |
173 | struct dyn_ftrace *freelist; | 174 | struct dyn_ftrace *freelist; |
174 | }; | 175 | }; |
175 | union { | 176 | unsigned long flags; |
176 | unsigned long flags; | ||
177 | struct dyn_ftrace *newlist; | ||
178 | }; | ||
179 | struct dyn_arch_ftrace arch; | 177 | struct dyn_arch_ftrace arch; |
180 | }; | 178 | }; |
181 | 179 | ||
@@ -190,6 +188,56 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); | |||
190 | int register_ftrace_command(struct ftrace_func_command *cmd); | 188 | int register_ftrace_command(struct ftrace_func_command *cmd); |
191 | int unregister_ftrace_command(struct ftrace_func_command *cmd); | 189 | int unregister_ftrace_command(struct ftrace_func_command *cmd); |
192 | 190 | ||
191 | enum { | ||
192 | FTRACE_UPDATE_CALLS = (1 << 0), | ||
193 | FTRACE_DISABLE_CALLS = (1 << 1), | ||
194 | FTRACE_UPDATE_TRACE_FUNC = (1 << 2), | ||
195 | FTRACE_START_FUNC_RET = (1 << 3), | ||
196 | FTRACE_STOP_FUNC_RET = (1 << 4), | ||
197 | }; | ||
198 | |||
199 | enum { | ||
200 | FTRACE_UPDATE_IGNORE, | ||
201 | FTRACE_UPDATE_MAKE_CALL, | ||
202 | FTRACE_UPDATE_MAKE_NOP, | ||
203 | }; | ||
204 | |||
205 | enum { | ||
206 | FTRACE_ITER_FILTER = (1 << 0), | ||
207 | FTRACE_ITER_NOTRACE = (1 << 1), | ||
208 | FTRACE_ITER_PRINTALL = (1 << 2), | ||
209 | FTRACE_ITER_DO_HASH = (1 << 3), | ||
210 | FTRACE_ITER_HASH = (1 << 4), | ||
211 | FTRACE_ITER_ENABLED = (1 << 5), | ||
212 | }; | ||
213 | |||
214 | void arch_ftrace_update_code(int command); | ||
215 | |||
216 | struct ftrace_rec_iter; | ||
217 | |||
218 | struct ftrace_rec_iter *ftrace_rec_iter_start(void); | ||
219 | struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter); | ||
220 | struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter); | ||
221 | |||
222 | int ftrace_update_record(struct dyn_ftrace *rec, int enable); | ||
223 | int ftrace_test_record(struct dyn_ftrace *rec, int enable); | ||
224 | void ftrace_run_stop_machine(int command); | ||
225 | int ftrace_location(unsigned long ip); | ||
226 | |||
227 | extern ftrace_func_t ftrace_trace_function; | ||
228 | |||
229 | int ftrace_regex_open(struct ftrace_ops *ops, int flag, | ||
230 | struct inode *inode, struct file *file); | ||
231 | ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, | ||
232 | size_t cnt, loff_t *ppos); | ||
233 | ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf, | ||
234 | size_t cnt, loff_t *ppos); | ||
235 | loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin); | ||
236 | int ftrace_regex_release(struct inode *inode, struct file *file); | ||
237 | |||
238 | void __init | ||
239 | ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable); | ||
240 | |||
193 | /* defined in arch */ | 241 | /* defined in arch */ |
194 | extern int ftrace_ip_converted(unsigned long ip); | 242 | extern int ftrace_ip_converted(unsigned long ip); |
195 | extern int ftrace_dyn_arch_init(void *data); | 243 | extern int ftrace_dyn_arch_init(void *data); |
@@ -284,6 +332,25 @@ static inline int ftrace_text_reserved(void *start, void *end) | |||
284 | { | 332 | { |
285 | return 0; | 333 | return 0; |
286 | } | 334 | } |
335 | |||
336 | /* | ||
337 | * Again users of functions that have ftrace_ops may not | ||
338 | * have them defined when ftrace is not enabled, but these | ||
339 | * functions may still be called. Use a macro instead of inline. | ||
340 | */ | ||
341 | #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) | ||
342 | #define ftrace_set_early_filter(ops, buf, enable) do { } while (0) | ||
343 | |||
344 | static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, | ||
345 | size_t cnt, loff_t *ppos) { return -ENODEV; } | ||
346 | static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf, | ||
347 | size_t cnt, loff_t *ppos) { return -ENODEV; } | ||
348 | static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin) | ||
349 | { | ||
350 | return -ENODEV; | ||
351 | } | ||
352 | static inline int | ||
353 | ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; } | ||
287 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 354 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
288 | 355 | ||
289 | /* totally disable ftrace - can not re-enable after this */ | 356 | /* totally disable ftrace - can not re-enable after this */ |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b1e8943fed1d..683d559a0eef 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -22,11 +22,13 @@ | |||
22 | #include <linux/hardirq.h> | 22 | #include <linux/hardirq.h> |
23 | #include <linux/kthread.h> | 23 | #include <linux/kthread.h> |
24 | #include <linux/uaccess.h> | 24 | #include <linux/uaccess.h> |
25 | #include <linux/bsearch.h> | ||
25 | #include <linux/module.h> | 26 | #include <linux/module.h> |
26 | #include <linux/ftrace.h> | 27 | #include <linux/ftrace.h> |
27 | #include <linux/sysctl.h> | 28 | #include <linux/sysctl.h> |
28 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
29 | #include <linux/ctype.h> | 30 | #include <linux/ctype.h> |
31 | #include <linux/sort.h> | ||
30 | #include <linux/list.h> | 32 | #include <linux/list.h> |
31 | #include <linux/hash.h> | 33 | #include <linux/hash.h> |
32 | #include <linux/rcupdate.h> | 34 | #include <linux/rcupdate.h> |
@@ -947,13 +949,6 @@ struct ftrace_func_probe { | |||
947 | struct rcu_head rcu; | 949 | struct rcu_head rcu; |
948 | }; | 950 | }; |
949 | 951 | ||
950 | enum { | ||
951 | FTRACE_ENABLE_CALLS = (1 << 0), | ||
952 | FTRACE_DISABLE_CALLS = (1 << 1), | ||
953 | FTRACE_UPDATE_TRACE_FUNC = (1 << 2), | ||
954 | FTRACE_START_FUNC_RET = (1 << 3), | ||
955 | FTRACE_STOP_FUNC_RET = (1 << 4), | ||
956 | }; | ||
957 | struct ftrace_func_entry { | 952 | struct ftrace_func_entry { |
958 | struct hlist_node hlist; | 953 | struct hlist_node hlist; |
959 | unsigned long ip; | 954 | unsigned long ip; |
@@ -984,18 +979,19 @@ static struct ftrace_ops global_ops = { | |||
984 | .filter_hash = EMPTY_HASH, | 979 | .filter_hash = EMPTY_HASH, |
985 | }; | 980 | }; |
986 | 981 | ||
987 | static struct dyn_ftrace *ftrace_new_addrs; | ||
988 | |||
989 | static DEFINE_MUTEX(ftrace_regex_lock); | 982 | static DEFINE_MUTEX(ftrace_regex_lock); |
990 | 983 | ||
991 | struct ftrace_page { | 984 | struct ftrace_page { |
992 | struct ftrace_page *next; | 985 | struct ftrace_page *next; |
986 | struct dyn_ftrace *records; | ||
993 | int index; | 987 | int index; |
994 | struct dyn_ftrace records[]; | 988 | int size; |
995 | }; | 989 | }; |
996 | 990 | ||
997 | #define ENTRIES_PER_PAGE \ | 991 | static struct ftrace_page *ftrace_new_pgs; |
998 | ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) | 992 | |
993 | #define ENTRY_SIZE sizeof(struct dyn_ftrace) | ||
994 | #define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE) | ||
999 | 995 | ||
1000 | /* estimate from running different kernels */ | 996 | /* estimate from running different kernels */ |
1001 | #define NR_TO_INIT 10000 | 997 | #define NR_TO_INIT 10000 |
@@ -1003,7 +999,10 @@ struct ftrace_page { | |||
1003 | static struct ftrace_page *ftrace_pages_start; | 999 | static struct ftrace_page *ftrace_pages_start; |
1004 | static struct ftrace_page *ftrace_pages; | 1000 | static struct ftrace_page *ftrace_pages; |
1005 | 1001 | ||
1006 | static struct dyn_ftrace *ftrace_free_records; | 1002 | static bool ftrace_hash_empty(struct ftrace_hash *hash) |
1003 | { | ||
1004 | return !hash || !hash->count; | ||
1005 | } | ||
1007 | 1006 | ||
1008 | static struct ftrace_func_entry * | 1007 | static struct ftrace_func_entry * |
1009 | ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) | 1008 | ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) |
@@ -1013,7 +1012,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) | |||
1013 | struct hlist_head *hhd; | 1012 | struct hlist_head *hhd; |
1014 | struct hlist_node *n; | 1013 | struct hlist_node *n; |
1015 | 1014 | ||
1016 | if (!hash->count) | 1015 | if (ftrace_hash_empty(hash)) |
1017 | return NULL; | 1016 | return NULL; |
1018 | 1017 | ||
1019 | if (hash->size_bits > 0) | 1018 | if (hash->size_bits > 0) |
@@ -1157,7 +1156,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) | |||
1157 | return NULL; | 1156 | return NULL; |
1158 | 1157 | ||
1159 | /* Empty hash? */ | 1158 | /* Empty hash? */ |
1160 | if (!hash || !hash->count) | 1159 | if (ftrace_hash_empty(hash)) |
1161 | return new_hash; | 1160 | return new_hash; |
1162 | 1161 | ||
1163 | size = 1 << hash->size_bits; | 1162 | size = 1 << hash->size_bits; |
@@ -1282,9 +1281,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) | |||
1282 | filter_hash = rcu_dereference_raw(ops->filter_hash); | 1281 | filter_hash = rcu_dereference_raw(ops->filter_hash); |
1283 | notrace_hash = rcu_dereference_raw(ops->notrace_hash); | 1282 | notrace_hash = rcu_dereference_raw(ops->notrace_hash); |
1284 | 1283 | ||
1285 | if ((!filter_hash || !filter_hash->count || | 1284 | if ((ftrace_hash_empty(filter_hash) || |
1286 | ftrace_lookup_ip(filter_hash, ip)) && | 1285 | ftrace_lookup_ip(filter_hash, ip)) && |
1287 | (!notrace_hash || !notrace_hash->count || | 1286 | (ftrace_hash_empty(notrace_hash) || |
1288 | !ftrace_lookup_ip(notrace_hash, ip))) | 1287 | !ftrace_lookup_ip(notrace_hash, ip))) |
1289 | ret = 1; | 1288 | ret = 1; |
1290 | else | 1289 | else |
@@ -1307,6 +1306,47 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) | |||
1307 | } \ | 1306 | } \ |
1308 | } | 1307 | } |
1309 | 1308 | ||
1309 | |||
1310 | static int ftrace_cmp_recs(const void *a, const void *b) | ||
1311 | { | ||
1312 | const struct dyn_ftrace *reca = a; | ||
1313 | const struct dyn_ftrace *recb = b; | ||
1314 | |||
1315 | if (reca->ip > recb->ip) | ||
1316 | return 1; | ||
1317 | if (reca->ip < recb->ip) | ||
1318 | return -1; | ||
1319 | return 0; | ||
1320 | } | ||
1321 | |||
1322 | /** | ||
1323 | * ftrace_location - return true if the ip giving is a traced location | ||
1324 | * @ip: the instruction pointer to check | ||
1325 | * | ||
1326 | * Returns 1 if @ip given is a pointer to a ftrace location. | ||
1327 | * That is, the instruction that is either a NOP or call to | ||
1328 | * the function tracer. It checks the ftrace internal tables to | ||
1329 | * determine if the address belongs or not. | ||
1330 | */ | ||
1331 | int ftrace_location(unsigned long ip) | ||
1332 | { | ||
1333 | struct ftrace_page *pg; | ||
1334 | struct dyn_ftrace *rec; | ||
1335 | struct dyn_ftrace key; | ||
1336 | |||
1337 | key.ip = ip; | ||
1338 | |||
1339 | for (pg = ftrace_pages_start; pg; pg = pg->next) { | ||
1340 | rec = bsearch(&key, pg->records, pg->index, | ||
1341 | sizeof(struct dyn_ftrace), | ||
1342 | ftrace_cmp_recs); | ||
1343 | if (rec) | ||
1344 | return 1; | ||
1345 | } | ||
1346 | |||
1347 | return 0; | ||
1348 | } | ||
1349 | |||
1310 | static void __ftrace_hash_rec_update(struct ftrace_ops *ops, | 1350 | static void __ftrace_hash_rec_update(struct ftrace_ops *ops, |
1311 | int filter_hash, | 1351 | int filter_hash, |
1312 | bool inc) | 1352 | bool inc) |
@@ -1336,7 +1376,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, | |||
1336 | if (filter_hash) { | 1376 | if (filter_hash) { |
1337 | hash = ops->filter_hash; | 1377 | hash = ops->filter_hash; |
1338 | other_hash = ops->notrace_hash; | 1378 | other_hash = ops->notrace_hash; |
1339 | if (!hash || !hash->count) | 1379 | if (ftrace_hash_empty(hash)) |
1340 | all = 1; | 1380 | all = 1; |
1341 | } else { | 1381 | } else { |
1342 | inc = !inc; | 1382 | inc = !inc; |
@@ -1346,7 +1386,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, | |||
1346 | * If the notrace hash has no items, | 1386 | * If the notrace hash has no items, |
1347 | * then there's nothing to do. | 1387 | * then there's nothing to do. |
1348 | */ | 1388 | */ |
1349 | if (hash && !hash->count) | 1389 | if (ftrace_hash_empty(hash)) |
1350 | return; | 1390 | return; |
1351 | } | 1391 | } |
1352 | 1392 | ||
@@ -1363,8 +1403,8 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, | |||
1363 | if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip)) | 1403 | if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip)) |
1364 | match = 1; | 1404 | match = 1; |
1365 | } else { | 1405 | } else { |
1366 | in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip); | 1406 | in_hash = !!ftrace_lookup_ip(hash, rec->ip); |
1367 | in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip); | 1407 | in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip); |
1368 | 1408 | ||
1369 | /* | 1409 | /* |
1370 | * | 1410 | * |
@@ -1372,7 +1412,7 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, | |||
1372 | if (filter_hash && in_hash && !in_other_hash) | 1412 | if (filter_hash && in_hash && !in_other_hash) |
1373 | match = 1; | 1413 | match = 1; |
1374 | else if (!filter_hash && in_hash && | 1414 | else if (!filter_hash && in_hash && |
1375 | (in_other_hash || !other_hash->count)) | 1415 | (in_other_hash || ftrace_hash_empty(other_hash))) |
1376 | match = 1; | 1416 | match = 1; |
1377 | } | 1417 | } |
1378 | if (!match) | 1418 | if (!match) |
@@ -1406,40 +1446,12 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops, | |||
1406 | __ftrace_hash_rec_update(ops, filter_hash, 1); | 1446 | __ftrace_hash_rec_update(ops, filter_hash, 1); |
1407 | } | 1447 | } |
1408 | 1448 | ||
1409 | static void ftrace_free_rec(struct dyn_ftrace *rec) | ||
1410 | { | ||
1411 | rec->freelist = ftrace_free_records; | ||
1412 | ftrace_free_records = rec; | ||
1413 | rec->flags |= FTRACE_FL_FREE; | ||
1414 | } | ||
1415 | |||
1416 | static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) | 1449 | static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) |
1417 | { | 1450 | { |
1418 | struct dyn_ftrace *rec; | 1451 | if (ftrace_pages->index == ftrace_pages->size) { |
1419 | 1452 | /* We should have allocated enough */ | |
1420 | /* First check for freed records */ | 1453 | if (WARN_ON(!ftrace_pages->next)) |
1421 | if (ftrace_free_records) { | ||
1422 | rec = ftrace_free_records; | ||
1423 | |||
1424 | if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { | ||
1425 | FTRACE_WARN_ON_ONCE(1); | ||
1426 | ftrace_free_records = NULL; | ||
1427 | return NULL; | 1454 | return NULL; |
1428 | } | ||
1429 | |||
1430 | ftrace_free_records = rec->freelist; | ||
1431 | memset(rec, 0, sizeof(*rec)); | ||
1432 | return rec; | ||
1433 | } | ||
1434 | |||
1435 | if (ftrace_pages->index == ENTRIES_PER_PAGE) { | ||
1436 | if (!ftrace_pages->next) { | ||
1437 | /* allocate another page */ | ||
1438 | ftrace_pages->next = | ||
1439 | (void *)get_zeroed_page(GFP_KERNEL); | ||
1440 | if (!ftrace_pages->next) | ||
1441 | return NULL; | ||
1442 | } | ||
1443 | ftrace_pages = ftrace_pages->next; | 1455 | ftrace_pages = ftrace_pages->next; |
1444 | } | 1456 | } |
1445 | 1457 | ||
@@ -1459,8 +1471,6 @@ ftrace_record_ip(unsigned long ip) | |||
1459 | return NULL; | 1471 | return NULL; |
1460 | 1472 | ||
1461 | rec->ip = ip; | 1473 | rec->ip = ip; |
1462 | rec->newlist = ftrace_new_addrs; | ||
1463 | ftrace_new_addrs = rec; | ||
1464 | 1474 | ||
1465 | return rec; | 1475 | return rec; |
1466 | } | 1476 | } |
@@ -1475,7 +1485,19 @@ static void print_ip_ins(const char *fmt, unsigned char *p) | |||
1475 | printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); | 1485 | printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); |
1476 | } | 1486 | } |
1477 | 1487 | ||
1478 | static void ftrace_bug(int failed, unsigned long ip) | 1488 | /** |
1489 | * ftrace_bug - report and shutdown function tracer | ||
1490 | * @failed: The failed type (EFAULT, EINVAL, EPERM) | ||
1491 | * @ip: The address that failed | ||
1492 | * | ||
1493 | * The arch code that enables or disables the function tracing | ||
1494 | * can call ftrace_bug() when it has detected a problem in | ||
1495 | * modifying the code. @failed should be one of either: | ||
1496 | * EFAULT - if the problem happens on reading the @ip address | ||
1497 | * EINVAL - if what is read at @ip is not what was expected | ||
1498 | * EPERM - if the problem happens on writting to the @ip address | ||
1499 | */ | ||
1500 | void ftrace_bug(int failed, unsigned long ip) | ||
1479 | { | 1501 | { |
1480 | switch (failed) { | 1502 | switch (failed) { |
1481 | case -EFAULT: | 1503 | case -EFAULT: |
@@ -1517,24 +1539,19 @@ int ftrace_text_reserved(void *start, void *end) | |||
1517 | return 0; | 1539 | return 0; |
1518 | } | 1540 | } |
1519 | 1541 | ||
1520 | 1542 | static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) | |
1521 | static int | ||
1522 | __ftrace_replace_code(struct dyn_ftrace *rec, int enable) | ||
1523 | { | 1543 | { |
1524 | unsigned long ftrace_addr; | ||
1525 | unsigned long flag = 0UL; | 1544 | unsigned long flag = 0UL; |
1526 | 1545 | ||
1527 | ftrace_addr = (unsigned long)FTRACE_ADDR; | ||
1528 | |||
1529 | /* | 1546 | /* |
1530 | * If we are enabling tracing: | 1547 | * If we are updating calls: |
1531 | * | 1548 | * |
1532 | * If the record has a ref count, then we need to enable it | 1549 | * If the record has a ref count, then we need to enable it |
1533 | * because someone is using it. | 1550 | * because someone is using it. |
1534 | * | 1551 | * |
1535 | * Otherwise we make sure its disabled. | 1552 | * Otherwise we make sure its disabled. |
1536 | * | 1553 | * |
1537 | * If we are disabling tracing, then disable all records that | 1554 | * If we are disabling calls, then disable all records that |
1538 | * are enabled. | 1555 | * are enabled. |
1539 | */ | 1556 | */ |
1540 | if (enable && (rec->flags & ~FTRACE_FL_MASK)) | 1557 | if (enable && (rec->flags & ~FTRACE_FL_MASK)) |
@@ -1542,18 +1559,72 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) | |||
1542 | 1559 | ||
1543 | /* If the state of this record hasn't changed, then do nothing */ | 1560 | /* If the state of this record hasn't changed, then do nothing */ |
1544 | if ((rec->flags & FTRACE_FL_ENABLED) == flag) | 1561 | if ((rec->flags & FTRACE_FL_ENABLED) == flag) |
1545 | return 0; | 1562 | return FTRACE_UPDATE_IGNORE; |
1546 | 1563 | ||
1547 | if (flag) { | 1564 | if (flag) { |
1548 | rec->flags |= FTRACE_FL_ENABLED; | 1565 | if (update) |
1566 | rec->flags |= FTRACE_FL_ENABLED; | ||
1567 | return FTRACE_UPDATE_MAKE_CALL; | ||
1568 | } | ||
1569 | |||
1570 | if (update) | ||
1571 | rec->flags &= ~FTRACE_FL_ENABLED; | ||
1572 | |||
1573 | return FTRACE_UPDATE_MAKE_NOP; | ||
1574 | } | ||
1575 | |||
1576 | /** | ||
1577 | * ftrace_update_record, set a record that now is tracing or not | ||
1578 | * @rec: the record to update | ||
1579 | * @enable: set to 1 if the record is tracing, zero to force disable | ||
1580 | * | ||
1581 | * The records that represent all functions that can be traced need | ||
1582 | * to be updated when tracing has been enabled. | ||
1583 | */ | ||
1584 | int ftrace_update_record(struct dyn_ftrace *rec, int enable) | ||
1585 | { | ||
1586 | return ftrace_check_record(rec, enable, 1); | ||
1587 | } | ||
1588 | |||
1589 | /** | ||
1590 | * ftrace_test_record, check if the record has been enabled or not | ||
1591 | * @rec: the record to test | ||
1592 | * @enable: set to 1 to check if enabled, 0 if it is disabled | ||
1593 | * | ||
1594 | * The arch code may need to test if a record is already set to | ||
1595 | * tracing to determine how to modify the function code that it | ||
1596 | * represents. | ||
1597 | */ | ||
1598 | int ftrace_test_record(struct dyn_ftrace *rec, int enable) | ||
1599 | { | ||
1600 | return ftrace_check_record(rec, enable, 0); | ||
1601 | } | ||
1602 | |||
1603 | static int | ||
1604 | __ftrace_replace_code(struct dyn_ftrace *rec, int enable) | ||
1605 | { | ||
1606 | unsigned long ftrace_addr; | ||
1607 | int ret; | ||
1608 | |||
1609 | ftrace_addr = (unsigned long)FTRACE_ADDR; | ||
1610 | |||
1611 | ret = ftrace_update_record(rec, enable); | ||
1612 | |||
1613 | switch (ret) { | ||
1614 | case FTRACE_UPDATE_IGNORE: | ||
1615 | return 0; | ||
1616 | |||
1617 | case FTRACE_UPDATE_MAKE_CALL: | ||
1549 | return ftrace_make_call(rec, ftrace_addr); | 1618 | return ftrace_make_call(rec, ftrace_addr); |
1619 | |||
1620 | case FTRACE_UPDATE_MAKE_NOP: | ||
1621 | return ftrace_make_nop(NULL, rec, ftrace_addr); | ||
1550 | } | 1622 | } |
1551 | 1623 | ||
1552 | rec->flags &= ~FTRACE_FL_ENABLED; | 1624 | return -1; /* unknow ftrace bug */ |
1553 | return ftrace_make_nop(NULL, rec, ftrace_addr); | ||
1554 | } | 1625 | } |
1555 | 1626 | ||
1556 | static void ftrace_replace_code(int enable) | 1627 | static void ftrace_replace_code(int update) |
1557 | { | 1628 | { |
1558 | struct dyn_ftrace *rec; | 1629 | struct dyn_ftrace *rec; |
1559 | struct ftrace_page *pg; | 1630 | struct ftrace_page *pg; |
@@ -1563,11 +1634,7 @@ static void ftrace_replace_code(int enable) | |||
1563 | return; | 1634 | return; |
1564 | 1635 | ||
1565 | do_for_each_ftrace_rec(pg, rec) { | 1636 | do_for_each_ftrace_rec(pg, rec) { |
1566 | /* Skip over free records */ | 1637 | failed = __ftrace_replace_code(rec, update); |
1567 | if (rec->flags & FTRACE_FL_FREE) | ||
1568 | continue; | ||
1569 | |||
1570 | failed = __ftrace_replace_code(rec, enable); | ||
1571 | if (failed) { | 1638 | if (failed) { |
1572 | ftrace_bug(failed, rec->ip); | 1639 | ftrace_bug(failed, rec->ip); |
1573 | /* Stop processing */ | 1640 | /* Stop processing */ |
@@ -1576,6 +1643,78 @@ static void ftrace_replace_code(int enable) | |||
1576 | } while_for_each_ftrace_rec(); | 1643 | } while_for_each_ftrace_rec(); |
1577 | } | 1644 | } |
1578 | 1645 | ||
1646 | struct ftrace_rec_iter { | ||
1647 | struct ftrace_page *pg; | ||
1648 | int index; | ||
1649 | }; | ||
1650 | |||
1651 | /** | ||
1652 | * ftrace_rec_iter_start, start up iterating over traced functions | ||
1653 | * | ||
1654 | * Returns an iterator handle that is used to iterate over all | ||
1655 | * the records that represent address locations where functions | ||
1656 | * are traced. | ||
1657 | * | ||
1658 | * May return NULL if no records are available. | ||
1659 | */ | ||
1660 | struct ftrace_rec_iter *ftrace_rec_iter_start(void) | ||
1661 | { | ||
1662 | /* | ||
1663 | * We only use a single iterator. | ||
1664 | * Protected by the ftrace_lock mutex. | ||
1665 | */ | ||
1666 | static struct ftrace_rec_iter ftrace_rec_iter; | ||
1667 | struct ftrace_rec_iter *iter = &ftrace_rec_iter; | ||
1668 | |||
1669 | iter->pg = ftrace_pages_start; | ||
1670 | iter->index = 0; | ||
1671 | |||
1672 | /* Could have empty pages */ | ||
1673 | while (iter->pg && !iter->pg->index) | ||
1674 | iter->pg = iter->pg->next; | ||
1675 | |||
1676 | if (!iter->pg) | ||
1677 | return NULL; | ||
1678 | |||
1679 | return iter; | ||
1680 | } | ||
1681 | |||
1682 | /** | ||
1683 | * ftrace_rec_iter_next, get the next record to process. | ||
1684 | * @iter: The handle to the iterator. | ||
1685 | * | ||
1686 | * Returns the next iterator after the given iterator @iter. | ||
1687 | */ | ||
1688 | struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter) | ||
1689 | { | ||
1690 | iter->index++; | ||
1691 | |||
1692 | if (iter->index >= iter->pg->index) { | ||
1693 | iter->pg = iter->pg->next; | ||
1694 | iter->index = 0; | ||
1695 | |||
1696 | /* Could have empty pages */ | ||
1697 | while (iter->pg && !iter->pg->index) | ||
1698 | iter->pg = iter->pg->next; | ||
1699 | } | ||
1700 | |||
1701 | if (!iter->pg) | ||
1702 | return NULL; | ||
1703 | |||
1704 | return iter; | ||
1705 | } | ||
1706 | |||
1707 | /** | ||
1708 | * ftrace_rec_iter_record, get the record at the iterator location | ||
1709 | * @iter: The current iterator location | ||
1710 | * | ||
1711 | * Returns the record that the current @iter is at. | ||
1712 | */ | ||
1713 | struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter) | ||
1714 | { | ||
1715 | return &iter->pg->records[iter->index]; | ||
1716 | } | ||
1717 | |||
1579 | static int | 1718 | static int |
1580 | ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) | 1719 | ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) |
1581 | { | 1720 | { |
@@ -1617,13 +1756,7 @@ static int __ftrace_modify_code(void *data) | |||
1617 | { | 1756 | { |
1618 | int *command = data; | 1757 | int *command = data; |
1619 | 1758 | ||
1620 | /* | 1759 | if (*command & FTRACE_UPDATE_CALLS) |
1621 | * Do not call function tracer while we update the code. | ||
1622 | * We are in stop machine, no worrying about races. | ||
1623 | */ | ||
1624 | function_trace_stop++; | ||
1625 | |||
1626 | if (*command & FTRACE_ENABLE_CALLS) | ||
1627 | ftrace_replace_code(1); | 1760 | ftrace_replace_code(1); |
1628 | else if (*command & FTRACE_DISABLE_CALLS) | 1761 | else if (*command & FTRACE_DISABLE_CALLS) |
1629 | ftrace_replace_code(0); | 1762 | ftrace_replace_code(0); |
@@ -1636,21 +1769,33 @@ static int __ftrace_modify_code(void *data) | |||
1636 | else if (*command & FTRACE_STOP_FUNC_RET) | 1769 | else if (*command & FTRACE_STOP_FUNC_RET) |
1637 | ftrace_disable_ftrace_graph_caller(); | 1770 | ftrace_disable_ftrace_graph_caller(); |
1638 | 1771 | ||
1639 | #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | ||
1640 | /* | ||
1641 | * For archs that call ftrace_test_stop_func(), we must | ||
1642 | * wait till after we update all the function callers | ||
1643 | * before we update the callback. This keeps different | ||
1644 | * ops that record different functions from corrupting | ||
1645 | * each other. | ||
1646 | */ | ||
1647 | __ftrace_trace_function = __ftrace_trace_function_delay; | ||
1648 | #endif | ||
1649 | function_trace_stop--; | ||
1650 | |||
1651 | return 0; | 1772 | return 0; |
1652 | } | 1773 | } |
1653 | 1774 | ||
1775 | /** | ||
1776 | * ftrace_run_stop_machine, go back to the stop machine method | ||
1777 | * @command: The command to tell ftrace what to do | ||
1778 | * | ||
1779 | * If an arch needs to fall back to the stop machine method, the | ||
1780 | * it can call this function. | ||
1781 | */ | ||
1782 | void ftrace_run_stop_machine(int command) | ||
1783 | { | ||
1784 | stop_machine(__ftrace_modify_code, &command, NULL); | ||
1785 | } | ||
1786 | |||
1787 | /** | ||
1788 | * arch_ftrace_update_code, modify the code to trace or not trace | ||
1789 | * @command: The command that needs to be done | ||
1790 | * | ||
1791 | * Archs can override this function if it does not need to | ||
1792 | * run stop_machine() to modify code. | ||
1793 | */ | ||
1794 | void __weak arch_ftrace_update_code(int command) | ||
1795 | { | ||
1796 | ftrace_run_stop_machine(command); | ||
1797 | } | ||
1798 | |||
1654 | static void ftrace_run_update_code(int command) | 1799 | static void ftrace_run_update_code(int command) |
1655 | { | 1800 | { |
1656 | int ret; | 1801 | int ret; |
@@ -1659,8 +1804,31 @@ static void ftrace_run_update_code(int command) | |||
1659 | FTRACE_WARN_ON(ret); | 1804 | FTRACE_WARN_ON(ret); |
1660 | if (ret) | 1805 | if (ret) |
1661 | return; | 1806 | return; |
1807 | /* | ||
1808 | * Do not call function tracer while we update the code. | ||
1809 | * We are in stop machine. | ||
1810 | */ | ||
1811 | function_trace_stop++; | ||
1662 | 1812 | ||
1663 | stop_machine(__ftrace_modify_code, &command, NULL); | 1813 | /* |
1814 | * By default we use stop_machine() to modify the code. | ||
1815 | * But archs can do what ever they want as long as it | ||
1816 | * is safe. The stop_machine() is the safest, but also | ||
1817 | * produces the most overhead. | ||
1818 | */ | ||
1819 | arch_ftrace_update_code(command); | ||
1820 | |||
1821 | #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST | ||
1822 | /* | ||
1823 | * For archs that call ftrace_test_stop_func(), we must | ||
1824 | * wait till after we update all the function callers | ||
1825 | * before we update the callback. This keeps different | ||
1826 | * ops that record different functions from corrupting | ||
1827 | * each other. | ||
1828 | */ | ||
1829 | __ftrace_trace_function = __ftrace_trace_function_delay; | ||
1830 | #endif | ||
1831 | function_trace_stop--; | ||
1664 | 1832 | ||
1665 | ret = ftrace_arch_code_modify_post_process(); | 1833 | ret = ftrace_arch_code_modify_post_process(); |
1666 | FTRACE_WARN_ON(ret); | 1834 | FTRACE_WARN_ON(ret); |
@@ -1691,7 +1859,7 @@ static int ftrace_startup(struct ftrace_ops *ops, int command) | |||
1691 | return -ENODEV; | 1859 | return -ENODEV; |
1692 | 1860 | ||
1693 | ftrace_start_up++; | 1861 | ftrace_start_up++; |
1694 | command |= FTRACE_ENABLE_CALLS; | 1862 | command |= FTRACE_UPDATE_CALLS; |
1695 | 1863 | ||
1696 | /* ops marked global share the filter hashes */ | 1864 | /* ops marked global share the filter hashes */ |
1697 | if (ops->flags & FTRACE_OPS_FL_GLOBAL) { | 1865 | if (ops->flags & FTRACE_OPS_FL_GLOBAL) { |
@@ -1743,8 +1911,7 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command) | |||
1743 | if (ops != &global_ops || !global_start_up) | 1911 | if (ops != &global_ops || !global_start_up) |
1744 | ops->flags &= ~FTRACE_OPS_FL_ENABLED; | 1912 | ops->flags &= ~FTRACE_OPS_FL_ENABLED; |
1745 | 1913 | ||
1746 | if (!ftrace_start_up) | 1914 | command |= FTRACE_UPDATE_CALLS; |
1747 | command |= FTRACE_DISABLE_CALLS; | ||
1748 | 1915 | ||
1749 | if (saved_ftrace_func != ftrace_trace_function) { | 1916 | if (saved_ftrace_func != ftrace_trace_function) { |
1750 | saved_ftrace_func = ftrace_trace_function; | 1917 | saved_ftrace_func = ftrace_trace_function; |
@@ -1766,7 +1933,7 @@ static void ftrace_startup_sysctl(void) | |||
1766 | saved_ftrace_func = NULL; | 1933 | saved_ftrace_func = NULL; |
1767 | /* ftrace_start_up is true if we want ftrace running */ | 1934 | /* ftrace_start_up is true if we want ftrace running */ |
1768 | if (ftrace_start_up) | 1935 | if (ftrace_start_up) |
1769 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); | 1936 | ftrace_run_update_code(FTRACE_UPDATE_CALLS); |
1770 | } | 1937 | } |
1771 | 1938 | ||
1772 | static void ftrace_shutdown_sysctl(void) | 1939 | static void ftrace_shutdown_sysctl(void) |
@@ -1788,14 +1955,16 @@ static int ops_traces_mod(struct ftrace_ops *ops) | |||
1788 | struct ftrace_hash *hash; | 1955 | struct ftrace_hash *hash; |
1789 | 1956 | ||
1790 | hash = ops->filter_hash; | 1957 | hash = ops->filter_hash; |
1791 | return !!(!hash || !hash->count); | 1958 | return ftrace_hash_empty(hash); |
1792 | } | 1959 | } |
1793 | 1960 | ||
1794 | static int ftrace_update_code(struct module *mod) | 1961 | static int ftrace_update_code(struct module *mod) |
1795 | { | 1962 | { |
1963 | struct ftrace_page *pg; | ||
1796 | struct dyn_ftrace *p; | 1964 | struct dyn_ftrace *p; |
1797 | cycle_t start, stop; | 1965 | cycle_t start, stop; |
1798 | unsigned long ref = 0; | 1966 | unsigned long ref = 0; |
1967 | int i; | ||
1799 | 1968 | ||
1800 | /* | 1969 | /* |
1801 | * When adding a module, we need to check if tracers are | 1970 | * When adding a module, we need to check if tracers are |
@@ -1817,46 +1986,44 @@ static int ftrace_update_code(struct module *mod) | |||
1817 | start = ftrace_now(raw_smp_processor_id()); | 1986 | start = ftrace_now(raw_smp_processor_id()); |
1818 | ftrace_update_cnt = 0; | 1987 | ftrace_update_cnt = 0; |
1819 | 1988 | ||
1820 | while (ftrace_new_addrs) { | 1989 | for (pg = ftrace_new_pgs; pg; pg = pg->next) { |
1821 | 1990 | ||
1822 | /* If something went wrong, bail without enabling anything */ | 1991 | for (i = 0; i < pg->index; i++) { |
1823 | if (unlikely(ftrace_disabled)) | 1992 | /* If something went wrong, bail without enabling anything */ |
1824 | return -1; | 1993 | if (unlikely(ftrace_disabled)) |
1994 | return -1; | ||
1825 | 1995 | ||
1826 | p = ftrace_new_addrs; | 1996 | p = &pg->records[i]; |
1827 | ftrace_new_addrs = p->newlist; | 1997 | p->flags = ref; |
1828 | p->flags = ref; | ||
1829 | 1998 | ||
1830 | /* | 1999 | /* |
1831 | * Do the initial record conversion from mcount jump | 2000 | * Do the initial record conversion from mcount jump |
1832 | * to the NOP instructions. | 2001 | * to the NOP instructions. |
1833 | */ | 2002 | */ |
1834 | if (!ftrace_code_disable(mod, p)) { | 2003 | if (!ftrace_code_disable(mod, p)) |
1835 | ftrace_free_rec(p); | 2004 | break; |
1836 | /* Game over */ | ||
1837 | break; | ||
1838 | } | ||
1839 | 2005 | ||
1840 | ftrace_update_cnt++; | 2006 | ftrace_update_cnt++; |
1841 | 2007 | ||
1842 | /* | 2008 | /* |
1843 | * If the tracing is enabled, go ahead and enable the record. | 2009 | * If the tracing is enabled, go ahead and enable the record. |
1844 | * | 2010 | * |
1845 | * The reason not to enable the record immediatelly is the | 2011 | * The reason not to enable the record immediatelly is the |
1846 | * inherent check of ftrace_make_nop/ftrace_make_call for | 2012 | * inherent check of ftrace_make_nop/ftrace_make_call for |
1847 | * correct previous instructions. Making first the NOP | 2013 | * correct previous instructions. Making first the NOP |
1848 | * conversion puts the module to the correct state, thus | 2014 | * conversion puts the module to the correct state, thus |
1849 | * passing the ftrace_make_call check. | 2015 | * passing the ftrace_make_call check. |
1850 | */ | 2016 | */ |
1851 | if (ftrace_start_up && ref) { | 2017 | if (ftrace_start_up && ref) { |
1852 | int failed = __ftrace_replace_code(p, 1); | 2018 | int failed = __ftrace_replace_code(p, 1); |
1853 | if (failed) { | 2019 | if (failed) |
1854 | ftrace_bug(failed, p->ip); | 2020 | ftrace_bug(failed, p->ip); |
1855 | ftrace_free_rec(p); | ||
1856 | } | 2021 | } |
1857 | } | 2022 | } |
1858 | } | 2023 | } |
1859 | 2024 | ||
2025 | ftrace_new_pgs = NULL; | ||
2026 | |||
1860 | stop = ftrace_now(raw_smp_processor_id()); | 2027 | stop = ftrace_now(raw_smp_processor_id()); |
1861 | ftrace_update_time = stop - start; | 2028 | ftrace_update_time = stop - start; |
1862 | ftrace_update_tot_cnt += ftrace_update_cnt; | 2029 | ftrace_update_tot_cnt += ftrace_update_cnt; |
@@ -1864,57 +2031,108 @@ static int ftrace_update_code(struct module *mod) | |||
1864 | return 0; | 2031 | return 0; |
1865 | } | 2032 | } |
1866 | 2033 | ||
1867 | static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) | 2034 | static int ftrace_allocate_records(struct ftrace_page *pg, int count) |
1868 | { | 2035 | { |
1869 | struct ftrace_page *pg; | 2036 | int order; |
1870 | int cnt; | 2037 | int cnt; |
1871 | int i; | ||
1872 | 2038 | ||
1873 | /* allocate a few pages */ | 2039 | if (WARN_ON(!count)) |
1874 | ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL); | 2040 | return -EINVAL; |
1875 | if (!ftrace_pages_start) | 2041 | |
1876 | return -1; | 2042 | order = get_count_order(DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); |
1877 | 2043 | ||
1878 | /* | 2044 | /* |
1879 | * Allocate a few more pages. | 2045 | * We want to fill as much as possible. No more than a page |
1880 | * | 2046 | * may be empty. |
1881 | * TODO: have some parser search vmlinux before | ||
1882 | * final linking to find all calls to ftrace. | ||
1883 | * Then we can: | ||
1884 | * a) know how many pages to allocate. | ||
1885 | * and/or | ||
1886 | * b) set up the table then. | ||
1887 | * | ||
1888 | * The dynamic code is still necessary for | ||
1889 | * modules. | ||
1890 | */ | 2047 | */ |
2048 | while ((PAGE_SIZE << order) / ENTRY_SIZE >= count + ENTRIES_PER_PAGE) | ||
2049 | order--; | ||
1891 | 2050 | ||
1892 | pg = ftrace_pages = ftrace_pages_start; | 2051 | again: |
2052 | pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); | ||
1893 | 2053 | ||
1894 | cnt = num_to_init / ENTRIES_PER_PAGE; | 2054 | if (!pg->records) { |
1895 | pr_info("ftrace: allocating %ld entries in %d pages\n", | 2055 | /* if we can't allocate this size, try something smaller */ |
1896 | num_to_init, cnt + 1); | 2056 | if (!order) |
2057 | return -ENOMEM; | ||
2058 | order >>= 1; | ||
2059 | goto again; | ||
2060 | } | ||
1897 | 2061 | ||
1898 | for (i = 0; i < cnt; i++) { | 2062 | cnt = (PAGE_SIZE << order) / ENTRY_SIZE; |
1899 | pg->next = (void *)get_zeroed_page(GFP_KERNEL); | 2063 | pg->size = cnt; |
1900 | 2064 | ||
1901 | /* If we fail, we'll try later anyway */ | 2065 | if (cnt > count) |
1902 | if (!pg->next) | 2066 | cnt = count; |
2067 | |||
2068 | return cnt; | ||
2069 | } | ||
2070 | |||
2071 | static struct ftrace_page * | ||
2072 | ftrace_allocate_pages(unsigned long num_to_init) | ||
2073 | { | ||
2074 | struct ftrace_page *start_pg; | ||
2075 | struct ftrace_page *pg; | ||
2076 | int order; | ||
2077 | int cnt; | ||
2078 | |||
2079 | if (!num_to_init) | ||
2080 | return 0; | ||
2081 | |||
2082 | start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL); | ||
2083 | if (!pg) | ||
2084 | return NULL; | ||
2085 | |||
2086 | /* | ||
2087 | * Try to allocate as much as possible in one continues | ||
2088 | * location that fills in all of the space. We want to | ||
2089 | * waste as little space as possible. | ||
2090 | */ | ||
2091 | for (;;) { | ||
2092 | cnt = ftrace_allocate_records(pg, num_to_init); | ||
2093 | if (cnt < 0) | ||
2094 | goto free_pages; | ||
2095 | |||
2096 | num_to_init -= cnt; | ||
2097 | if (!num_to_init) | ||
1903 | break; | 2098 | break; |
1904 | 2099 | ||
2100 | pg->next = kzalloc(sizeof(*pg), GFP_KERNEL); | ||
2101 | if (!pg->next) | ||
2102 | goto free_pages; | ||
2103 | |||
1905 | pg = pg->next; | 2104 | pg = pg->next; |
1906 | } | 2105 | } |
1907 | 2106 | ||
1908 | return 0; | 2107 | return start_pg; |
2108 | |||
2109 | free_pages: | ||
2110 | while (start_pg) { | ||
2111 | order = get_count_order(pg->size / ENTRIES_PER_PAGE); | ||
2112 | free_pages((unsigned long)pg->records, order); | ||
2113 | start_pg = pg->next; | ||
2114 | kfree(pg); | ||
2115 | pg = start_pg; | ||
2116 | } | ||
2117 | pr_info("ftrace: FAILED to allocate memory for functions\n"); | ||
2118 | return NULL; | ||
1909 | } | 2119 | } |
1910 | 2120 | ||
1911 | enum { | 2121 | static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) |
1912 | FTRACE_ITER_FILTER = (1 << 0), | 2122 | { |
1913 | FTRACE_ITER_NOTRACE = (1 << 1), | 2123 | int cnt; |
1914 | FTRACE_ITER_PRINTALL = (1 << 2), | 2124 | |
1915 | FTRACE_ITER_HASH = (1 << 3), | 2125 | if (!num_to_init) { |
1916 | FTRACE_ITER_ENABLED = (1 << 4), | 2126 | pr_info("ftrace: No functions to be traced?\n"); |
1917 | }; | 2127 | return -1; |
2128 | } | ||
2129 | |||
2130 | cnt = num_to_init / ENTRIES_PER_PAGE; | ||
2131 | pr_info("ftrace: allocating %ld entries in %d pages\n", | ||
2132 | num_to_init, cnt + 1); | ||
2133 | |||
2134 | return 0; | ||
2135 | } | ||
1918 | 2136 | ||
1919 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ | 2137 | #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ |
1920 | 2138 | ||
@@ -1980,6 +2198,9 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos) | |||
1980 | void *p = NULL; | 2198 | void *p = NULL; |
1981 | loff_t l; | 2199 | loff_t l; |
1982 | 2200 | ||
2201 | if (!(iter->flags & FTRACE_ITER_DO_HASH)) | ||
2202 | return NULL; | ||
2203 | |||
1983 | if (iter->func_pos > *pos) | 2204 | if (iter->func_pos > *pos) |
1984 | return NULL; | 2205 | return NULL; |
1985 | 2206 | ||
@@ -2023,7 +2244,7 @@ static void * | |||
2023 | t_next(struct seq_file *m, void *v, loff_t *pos) | 2244 | t_next(struct seq_file *m, void *v, loff_t *pos) |
2024 | { | 2245 | { |
2025 | struct ftrace_iterator *iter = m->private; | 2246 | struct ftrace_iterator *iter = m->private; |
2026 | struct ftrace_ops *ops = &global_ops; | 2247 | struct ftrace_ops *ops = iter->ops; |
2027 | struct dyn_ftrace *rec = NULL; | 2248 | struct dyn_ftrace *rec = NULL; |
2028 | 2249 | ||
2029 | if (unlikely(ftrace_disabled)) | 2250 | if (unlikely(ftrace_disabled)) |
@@ -2047,9 +2268,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
2047 | } | 2268 | } |
2048 | } else { | 2269 | } else { |
2049 | rec = &iter->pg->records[iter->idx++]; | 2270 | rec = &iter->pg->records[iter->idx++]; |
2050 | if ((rec->flags & FTRACE_FL_FREE) || | 2271 | if (((iter->flags & FTRACE_ITER_FILTER) && |
2051 | |||
2052 | ((iter->flags & FTRACE_ITER_FILTER) && | ||
2053 | !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) || | 2272 | !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) || |
2054 | 2273 | ||
2055 | ((iter->flags & FTRACE_ITER_NOTRACE) && | 2274 | ((iter->flags & FTRACE_ITER_NOTRACE) && |
@@ -2081,7 +2300,7 @@ static void reset_iter_read(struct ftrace_iterator *iter) | |||
2081 | static void *t_start(struct seq_file *m, loff_t *pos) | 2300 | static void *t_start(struct seq_file *m, loff_t *pos) |
2082 | { | 2301 | { |
2083 | struct ftrace_iterator *iter = m->private; | 2302 | struct ftrace_iterator *iter = m->private; |
2084 | struct ftrace_ops *ops = &global_ops; | 2303 | struct ftrace_ops *ops = iter->ops; |
2085 | void *p = NULL; | 2304 | void *p = NULL; |
2086 | loff_t l; | 2305 | loff_t l; |
2087 | 2306 | ||
@@ -2101,7 +2320,8 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
2101 | * off, we can short cut and just print out that all | 2320 | * off, we can short cut and just print out that all |
2102 | * functions are enabled. | 2321 | * functions are enabled. |
2103 | */ | 2322 | */ |
2104 | if (iter->flags & FTRACE_ITER_FILTER && !ops->filter_hash->count) { | 2323 | if (iter->flags & FTRACE_ITER_FILTER && |
2324 | ftrace_hash_empty(ops->filter_hash)) { | ||
2105 | if (*pos > 0) | 2325 | if (*pos > 0) |
2106 | return t_hash_start(m, pos); | 2326 | return t_hash_start(m, pos); |
2107 | iter->flags |= FTRACE_ITER_PRINTALL; | 2327 | iter->flags |= FTRACE_ITER_PRINTALL; |
@@ -2126,12 +2346,8 @@ static void *t_start(struct seq_file *m, loff_t *pos) | |||
2126 | break; | 2346 | break; |
2127 | } | 2347 | } |
2128 | 2348 | ||
2129 | if (!p) { | 2349 | if (!p) |
2130 | if (iter->flags & FTRACE_ITER_FILTER) | 2350 | return t_hash_start(m, pos); |
2131 | return t_hash_start(m, pos); | ||
2132 | |||
2133 | return NULL; | ||
2134 | } | ||
2135 | 2351 | ||
2136 | return iter; | 2352 | return iter; |
2137 | } | 2353 | } |
@@ -2189,6 +2405,7 @@ ftrace_avail_open(struct inode *inode, struct file *file) | |||
2189 | return -ENOMEM; | 2405 | return -ENOMEM; |
2190 | 2406 | ||
2191 | iter->pg = ftrace_pages_start; | 2407 | iter->pg = ftrace_pages_start; |
2408 | iter->ops = &global_ops; | ||
2192 | 2409 | ||
2193 | ret = seq_open(file, &show_ftrace_seq_ops); | 2410 | ret = seq_open(file, &show_ftrace_seq_ops); |
2194 | if (!ret) { | 2411 | if (!ret) { |
@@ -2217,6 +2434,7 @@ ftrace_enabled_open(struct inode *inode, struct file *file) | |||
2217 | 2434 | ||
2218 | iter->pg = ftrace_pages_start; | 2435 | iter->pg = ftrace_pages_start; |
2219 | iter->flags = FTRACE_ITER_ENABLED; | 2436 | iter->flags = FTRACE_ITER_ENABLED; |
2437 | iter->ops = &global_ops; | ||
2220 | 2438 | ||
2221 | ret = seq_open(file, &show_ftrace_seq_ops); | 2439 | ret = seq_open(file, &show_ftrace_seq_ops); |
2222 | if (!ret) { | 2440 | if (!ret) { |
@@ -2237,7 +2455,23 @@ static void ftrace_filter_reset(struct ftrace_hash *hash) | |||
2237 | mutex_unlock(&ftrace_lock); | 2455 | mutex_unlock(&ftrace_lock); |
2238 | } | 2456 | } |
2239 | 2457 | ||
2240 | static int | 2458 | /** |
2459 | * ftrace_regex_open - initialize function tracer filter files | ||
2460 | * @ops: The ftrace_ops that hold the hash filters | ||
2461 | * @flag: The type of filter to process | ||
2462 | * @inode: The inode, usually passed in to your open routine | ||
2463 | * @file: The file, usually passed in to your open routine | ||
2464 | * | ||
2465 | * ftrace_regex_open() initializes the filter files for the | ||
2466 | * @ops. Depending on @flag it may process the filter hash or | ||
2467 | * the notrace hash of @ops. With this called from the open | ||
2468 | * routine, you can use ftrace_filter_write() for the write | ||
2469 | * routine if @flag has FTRACE_ITER_FILTER set, or | ||
2470 | * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. | ||
2471 | * ftrace_regex_lseek() should be used as the lseek routine, and | ||
2472 | * release must call ftrace_regex_release(). | ||
2473 | */ | ||
2474 | int | ||
2241 | ftrace_regex_open(struct ftrace_ops *ops, int flag, | 2475 | ftrace_regex_open(struct ftrace_ops *ops, int flag, |
2242 | struct inode *inode, struct file *file) | 2476 | struct inode *inode, struct file *file) |
2243 | { | 2477 | { |
@@ -2306,8 +2540,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, | |||
2306 | static int | 2540 | static int |
2307 | ftrace_filter_open(struct inode *inode, struct file *file) | 2541 | ftrace_filter_open(struct inode *inode, struct file *file) |
2308 | { | 2542 | { |
2309 | return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER, | 2543 | return ftrace_regex_open(&global_ops, |
2310 | inode, file); | 2544 | FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH, |
2545 | inode, file); | ||
2311 | } | 2546 | } |
2312 | 2547 | ||
2313 | static int | 2548 | static int |
@@ -2317,7 +2552,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file) | |||
2317 | inode, file); | 2552 | inode, file); |
2318 | } | 2553 | } |
2319 | 2554 | ||
2320 | static loff_t | 2555 | loff_t |
2321 | ftrace_regex_lseek(struct file *file, loff_t offset, int origin) | 2556 | ftrace_regex_lseek(struct file *file, loff_t offset, int origin) |
2322 | { | 2557 | { |
2323 | loff_t ret; | 2558 | loff_t ret; |
@@ -2426,7 +2661,6 @@ match_records(struct ftrace_hash *hash, char *buff, | |||
2426 | goto out_unlock; | 2661 | goto out_unlock; |
2427 | 2662 | ||
2428 | do_for_each_ftrace_rec(pg, rec) { | 2663 | do_for_each_ftrace_rec(pg, rec) { |
2429 | |||
2430 | if (ftrace_match_record(rec, mod, search, search_len, type)) { | 2664 | if (ftrace_match_record(rec, mod, search, search_len, type)) { |
2431 | ret = enter_record(hash, rec, not); | 2665 | ret = enter_record(hash, rec, not); |
2432 | if (ret < 0) { | 2666 | if (ret < 0) { |
@@ -2871,14 +3105,14 @@ out_unlock: | |||
2871 | return ret; | 3105 | return ret; |
2872 | } | 3106 | } |
2873 | 3107 | ||
2874 | static ssize_t | 3108 | ssize_t |
2875 | ftrace_filter_write(struct file *file, const char __user *ubuf, | 3109 | ftrace_filter_write(struct file *file, const char __user *ubuf, |
2876 | size_t cnt, loff_t *ppos) | 3110 | size_t cnt, loff_t *ppos) |
2877 | { | 3111 | { |
2878 | return ftrace_regex_write(file, ubuf, cnt, ppos, 1); | 3112 | return ftrace_regex_write(file, ubuf, cnt, ppos, 1); |
2879 | } | 3113 | } |
2880 | 3114 | ||
2881 | static ssize_t | 3115 | ssize_t |
2882 | ftrace_notrace_write(struct file *file, const char __user *ubuf, | 3116 | ftrace_notrace_write(struct file *file, const char __user *ubuf, |
2883 | size_t cnt, loff_t *ppos) | 3117 | size_t cnt, loff_t *ppos) |
2884 | { | 3118 | { |
@@ -2919,7 +3153,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, | |||
2919 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); | 3153 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); |
2920 | if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED | 3154 | if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED |
2921 | && ftrace_enabled) | 3155 | && ftrace_enabled) |
2922 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); | 3156 | ftrace_run_update_code(FTRACE_UPDATE_CALLS); |
2923 | 3157 | ||
2924 | mutex_unlock(&ftrace_lock); | 3158 | mutex_unlock(&ftrace_lock); |
2925 | 3159 | ||
@@ -3045,8 +3279,8 @@ static void __init set_ftrace_early_graph(char *buf) | |||
3045 | } | 3279 | } |
3046 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 3280 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
3047 | 3281 | ||
3048 | static void __init | 3282 | void __init |
3049 | set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable) | 3283 | ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable) |
3050 | { | 3284 | { |
3051 | char *func; | 3285 | char *func; |
3052 | 3286 | ||
@@ -3059,17 +3293,16 @@ set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable) | |||
3059 | static void __init set_ftrace_early_filters(void) | 3293 | static void __init set_ftrace_early_filters(void) |
3060 | { | 3294 | { |
3061 | if (ftrace_filter_buf[0]) | 3295 | if (ftrace_filter_buf[0]) |
3062 | set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1); | 3296 | ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1); |
3063 | if (ftrace_notrace_buf[0]) | 3297 | if (ftrace_notrace_buf[0]) |
3064 | set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0); | 3298 | ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0); |
3065 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 3299 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
3066 | if (ftrace_graph_buf[0]) | 3300 | if (ftrace_graph_buf[0]) |
3067 | set_ftrace_early_graph(ftrace_graph_buf); | 3301 | set_ftrace_early_graph(ftrace_graph_buf); |
3068 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 3302 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
3069 | } | 3303 | } |
3070 | 3304 | ||
3071 | static int | 3305 | int ftrace_regex_release(struct inode *inode, struct file *file) |
3072 | ftrace_regex_release(struct inode *inode, struct file *file) | ||
3073 | { | 3306 | { |
3074 | struct seq_file *m = (struct seq_file *)file->private_data; | 3307 | struct seq_file *m = (struct seq_file *)file->private_data; |
3075 | struct ftrace_iterator *iter; | 3308 | struct ftrace_iterator *iter; |
@@ -3107,7 +3340,7 @@ ftrace_regex_release(struct inode *inode, struct file *file) | |||
3107 | orig_hash, iter->hash); | 3340 | orig_hash, iter->hash); |
3108 | if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED) | 3341 | if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED) |
3109 | && ftrace_enabled) | 3342 | && ftrace_enabled) |
3110 | ftrace_run_update_code(FTRACE_ENABLE_CALLS); | 3343 | ftrace_run_update_code(FTRACE_UPDATE_CALLS); |
3111 | 3344 | ||
3112 | mutex_unlock(&ftrace_lock); | 3345 | mutex_unlock(&ftrace_lock); |
3113 | } | 3346 | } |
@@ -3270,9 +3503,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) | |||
3270 | 3503 | ||
3271 | do_for_each_ftrace_rec(pg, rec) { | 3504 | do_for_each_ftrace_rec(pg, rec) { |
3272 | 3505 | ||
3273 | if (rec->flags & FTRACE_FL_FREE) | ||
3274 | continue; | ||
3275 | |||
3276 | if (ftrace_match_record(rec, NULL, search, search_len, type)) { | 3506 | if (ftrace_match_record(rec, NULL, search, search_len, type)) { |
3277 | /* if it is in the array */ | 3507 | /* if it is in the array */ |
3278 | exists = false; | 3508 | exists = false; |
@@ -3381,15 +3611,62 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) | |||
3381 | return 0; | 3611 | return 0; |
3382 | } | 3612 | } |
3383 | 3613 | ||
3614 | static void ftrace_swap_recs(void *a, void *b, int size) | ||
3615 | { | ||
3616 | struct dyn_ftrace *reca = a; | ||
3617 | struct dyn_ftrace *recb = b; | ||
3618 | struct dyn_ftrace t; | ||
3619 | |||
3620 | t = *reca; | ||
3621 | *reca = *recb; | ||
3622 | *recb = t; | ||
3623 | } | ||
3624 | |||
3384 | static int ftrace_process_locs(struct module *mod, | 3625 | static int ftrace_process_locs(struct module *mod, |
3385 | unsigned long *start, | 3626 | unsigned long *start, |
3386 | unsigned long *end) | 3627 | unsigned long *end) |
3387 | { | 3628 | { |
3629 | struct ftrace_page *pg; | ||
3630 | unsigned long count; | ||
3388 | unsigned long *p; | 3631 | unsigned long *p; |
3389 | unsigned long addr; | 3632 | unsigned long addr; |
3390 | unsigned long flags = 0; /* Shut up gcc */ | 3633 | unsigned long flags = 0; /* Shut up gcc */ |
3634 | int ret = -ENOMEM; | ||
3635 | |||
3636 | count = end - start; | ||
3637 | |||
3638 | if (!count) | ||
3639 | return 0; | ||
3640 | |||
3641 | pg = ftrace_allocate_pages(count); | ||
3642 | if (!pg) | ||
3643 | return -ENOMEM; | ||
3391 | 3644 | ||
3392 | mutex_lock(&ftrace_lock); | 3645 | mutex_lock(&ftrace_lock); |
3646 | |||
3647 | /* | ||
3648 | * Core and each module needs their own pages, as | ||
3649 | * modules will free them when they are removed. | ||
3650 | * Force a new page to be allocated for modules. | ||
3651 | */ | ||
3652 | if (!mod) { | ||
3653 | WARN_ON(ftrace_pages || ftrace_pages_start); | ||
3654 | /* First initialization */ | ||
3655 | ftrace_pages = ftrace_pages_start = pg; | ||
3656 | } else { | ||
3657 | if (!ftrace_pages) | ||
3658 | goto out; | ||
3659 | |||
3660 | if (WARN_ON(ftrace_pages->next)) { | ||
3661 | /* Hmm, we have free pages? */ | ||
3662 | while (ftrace_pages->next) | ||
3663 | ftrace_pages = ftrace_pages->next; | ||
3664 | } | ||
3665 | |||
3666 | ftrace_pages->next = pg; | ||
3667 | ftrace_pages = pg; | ||
3668 | } | ||
3669 | |||
3393 | p = start; | 3670 | p = start; |
3394 | while (p < end) { | 3671 | while (p < end) { |
3395 | addr = ftrace_call_adjust(*p++); | 3672 | addr = ftrace_call_adjust(*p++); |
@@ -3401,9 +3678,18 @@ static int ftrace_process_locs(struct module *mod, | |||
3401 | */ | 3678 | */ |
3402 | if (!addr) | 3679 | if (!addr) |
3403 | continue; | 3680 | continue; |
3404 | ftrace_record_ip(addr); | 3681 | if (!ftrace_record_ip(addr)) |
3682 | break; | ||
3405 | } | 3683 | } |
3406 | 3684 | ||
3685 | /* These new locations need to be initialized */ | ||
3686 | ftrace_new_pgs = pg; | ||
3687 | |||
3688 | /* Make each individual set of pages sorted by ips */ | ||
3689 | for (; pg; pg = pg->next) | ||
3690 | sort(pg->records, pg->index, sizeof(struct dyn_ftrace), | ||
3691 | ftrace_cmp_recs, ftrace_swap_recs); | ||
3692 | |||
3407 | /* | 3693 | /* |
3408 | * We only need to disable interrupts on start up | 3694 | * We only need to disable interrupts on start up |
3409 | * because we are modifying code that an interrupt | 3695 | * because we are modifying code that an interrupt |
@@ -3417,32 +3703,55 @@ static int ftrace_process_locs(struct module *mod, | |||
3417 | ftrace_update_code(mod); | 3703 | ftrace_update_code(mod); |
3418 | if (!mod) | 3704 | if (!mod) |
3419 | local_irq_restore(flags); | 3705 | local_irq_restore(flags); |
3706 | ret = 0; | ||
3707 | out: | ||
3420 | mutex_unlock(&ftrace_lock); | 3708 | mutex_unlock(&ftrace_lock); |
3421 | 3709 | ||
3422 | return 0; | 3710 | return ret; |
3423 | } | 3711 | } |
3424 | 3712 | ||
3425 | #ifdef CONFIG_MODULES | 3713 | #ifdef CONFIG_MODULES |
3714 | |||
3715 | #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) | ||
3716 | |||
3426 | void ftrace_release_mod(struct module *mod) | 3717 | void ftrace_release_mod(struct module *mod) |
3427 | { | 3718 | { |
3428 | struct dyn_ftrace *rec; | 3719 | struct dyn_ftrace *rec; |
3720 | struct ftrace_page **last_pg; | ||
3429 | struct ftrace_page *pg; | 3721 | struct ftrace_page *pg; |
3722 | int order; | ||
3430 | 3723 | ||
3431 | mutex_lock(&ftrace_lock); | 3724 | mutex_lock(&ftrace_lock); |
3432 | 3725 | ||
3433 | if (ftrace_disabled) | 3726 | if (ftrace_disabled) |
3434 | goto out_unlock; | 3727 | goto out_unlock; |
3435 | 3728 | ||
3436 | do_for_each_ftrace_rec(pg, rec) { | 3729 | /* |
3730 | * Each module has its own ftrace_pages, remove | ||
3731 | * them from the list. | ||
3732 | */ | ||
3733 | last_pg = &ftrace_pages_start; | ||
3734 | for (pg = ftrace_pages_start; pg; pg = *last_pg) { | ||
3735 | rec = &pg->records[0]; | ||
3437 | if (within_module_core(rec->ip, mod)) { | 3736 | if (within_module_core(rec->ip, mod)) { |
3438 | /* | 3737 | /* |
3439 | * rec->ip is changed in ftrace_free_rec() | 3738 | * As core pages are first, the first |
3440 | * It should not between s and e if record was freed. | 3739 | * page should never be a module page. |
3441 | */ | 3740 | */ |
3442 | FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); | 3741 | if (WARN_ON(pg == ftrace_pages_start)) |
3443 | ftrace_free_rec(rec); | 3742 | goto out_unlock; |
3444 | } | 3743 | |
3445 | } while_for_each_ftrace_rec(); | 3744 | /* Check if we are deleting the last page */ |
3745 | if (pg == ftrace_pages) | ||
3746 | ftrace_pages = next_to_ftrace_page(last_pg); | ||
3747 | |||
3748 | *last_pg = pg->next; | ||
3749 | order = get_count_order(pg->size / ENTRIES_PER_PAGE); | ||
3750 | free_pages((unsigned long)pg->records, order); | ||
3751 | kfree(pg); | ||
3752 | } else | ||
3753 | last_pg = &pg->next; | ||
3754 | } | ||
3446 | out_unlock: | 3755 | out_unlock: |
3447 | mutex_unlock(&ftrace_lock); | 3756 | mutex_unlock(&ftrace_lock); |
3448 | } | 3757 | } |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index f04cc3136bd3..24aee7127451 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -1738,11 +1738,121 @@ static int replace_system_preds(struct event_subsystem *system, | |||
1738 | return -ENOMEM; | 1738 | return -ENOMEM; |
1739 | } | 1739 | } |
1740 | 1740 | ||
1741 | static int create_filter_start(char *filter_str, bool set_str, | ||
1742 | struct filter_parse_state **psp, | ||
1743 | struct event_filter **filterp) | ||
1744 | { | ||
1745 | struct event_filter *filter; | ||
1746 | struct filter_parse_state *ps = NULL; | ||
1747 | int err = 0; | ||
1748 | |||
1749 | WARN_ON_ONCE(*psp || *filterp); | ||
1750 | |||
1751 | /* allocate everything, and if any fails, free all and fail */ | ||
1752 | filter = __alloc_filter(); | ||
1753 | if (filter && set_str) | ||
1754 | err = replace_filter_string(filter, filter_str); | ||
1755 | |||
1756 | ps = kzalloc(sizeof(*ps), GFP_KERNEL); | ||
1757 | |||
1758 | if (!filter || !ps || err) { | ||
1759 | kfree(ps); | ||
1760 | __free_filter(filter); | ||
1761 | return -ENOMEM; | ||
1762 | } | ||
1763 | |||
1764 | /* we're committed to creating a new filter */ | ||
1765 | *filterp = filter; | ||
1766 | *psp = ps; | ||
1767 | |||
1768 | parse_init(ps, filter_ops, filter_str); | ||
1769 | err = filter_parse(ps); | ||
1770 | if (err && set_str) | ||
1771 | append_filter_err(ps, filter); | ||
1772 | return err; | ||
1773 | } | ||
1774 | |||
1775 | static void create_filter_finish(struct filter_parse_state *ps) | ||
1776 | { | ||
1777 | if (ps) { | ||
1778 | filter_opstack_clear(ps); | ||
1779 | postfix_clear(ps); | ||
1780 | kfree(ps); | ||
1781 | } | ||
1782 | } | ||
1783 | |||
1784 | /** | ||
1785 | * create_filter - create a filter for a ftrace_event_call | ||
1786 | * @call: ftrace_event_call to create a filter for | ||
1787 | * @filter_str: filter string | ||
1788 | * @set_str: remember @filter_str and enable detailed error in filter | ||
1789 | * @filterp: out param for created filter (always updated on return) | ||
1790 | * | ||
1791 | * Creates a filter for @call with @filter_str. If @set_str is %true, | ||
1792 | * @filter_str is copied and recorded in the new filter. | ||
1793 | * | ||
1794 | * On success, returns 0 and *@filterp points to the new filter. On | ||
1795 | * failure, returns -errno and *@filterp may point to %NULL or to a new | ||
1796 | * filter. In the latter case, the returned filter contains error | ||
1797 | * information if @set_str is %true and the caller is responsible for | ||
1798 | * freeing it. | ||
1799 | */ | ||
1800 | static int create_filter(struct ftrace_event_call *call, | ||
1801 | char *filter_str, bool set_str, | ||
1802 | struct event_filter **filterp) | ||
1803 | { | ||
1804 | struct event_filter *filter = NULL; | ||
1805 | struct filter_parse_state *ps = NULL; | ||
1806 | int err; | ||
1807 | |||
1808 | err = create_filter_start(filter_str, set_str, &ps, &filter); | ||
1809 | if (!err) { | ||
1810 | err = replace_preds(call, filter, ps, filter_str, false); | ||
1811 | if (err && set_str) | ||
1812 | append_filter_err(ps, filter); | ||
1813 | } | ||
1814 | create_filter_finish(ps); | ||
1815 | |||
1816 | *filterp = filter; | ||
1817 | return err; | ||
1818 | } | ||
1819 | |||
1820 | /** | ||
1821 | * create_system_filter - create a filter for an event_subsystem | ||
1822 | * @system: event_subsystem to create a filter for | ||
1823 | * @filter_str: filter string | ||
1824 | * @filterp: out param for created filter (always updated on return) | ||
1825 | * | ||
1826 | * Identical to create_filter() except that it creates a subsystem filter | ||
1827 | * and always remembers @filter_str. | ||
1828 | */ | ||
1829 | static int create_system_filter(struct event_subsystem *system, | ||
1830 | char *filter_str, struct event_filter **filterp) | ||
1831 | { | ||
1832 | struct event_filter *filter = NULL; | ||
1833 | struct filter_parse_state *ps = NULL; | ||
1834 | int err; | ||
1835 | |||
1836 | err = create_filter_start(filter_str, true, &ps, &filter); | ||
1837 | if (!err) { | ||
1838 | err = replace_system_preds(system, ps, filter_str); | ||
1839 | if (!err) { | ||
1840 | /* System filters just show a default message */ | ||
1841 | kfree(filter->filter_string); | ||
1842 | filter->filter_string = NULL; | ||
1843 | } else { | ||
1844 | append_filter_err(ps, filter); | ||
1845 | } | ||
1846 | } | ||
1847 | create_filter_finish(ps); | ||
1848 | |||
1849 | *filterp = filter; | ||
1850 | return err; | ||
1851 | } | ||
1852 | |||
1741 | int apply_event_filter(struct ftrace_event_call *call, char *filter_string) | 1853 | int apply_event_filter(struct ftrace_event_call *call, char *filter_string) |
1742 | { | 1854 | { |
1743 | struct filter_parse_state *ps; | ||
1744 | struct event_filter *filter; | 1855 | struct event_filter *filter; |
1745 | struct event_filter *tmp; | ||
1746 | int err = 0; | 1856 | int err = 0; |
1747 | 1857 | ||
1748 | mutex_lock(&event_mutex); | 1858 | mutex_lock(&event_mutex); |
@@ -1759,49 +1869,30 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) | |||
1759 | goto out_unlock; | 1869 | goto out_unlock; |
1760 | } | 1870 | } |
1761 | 1871 | ||
1762 | err = -ENOMEM; | 1872 | err = create_filter(call, filter_string, true, &filter); |
1763 | ps = kzalloc(sizeof(*ps), GFP_KERNEL); | ||
1764 | if (!ps) | ||
1765 | goto out_unlock; | ||
1766 | |||
1767 | filter = __alloc_filter(); | ||
1768 | if (!filter) { | ||
1769 | kfree(ps); | ||
1770 | goto out_unlock; | ||
1771 | } | ||
1772 | |||
1773 | replace_filter_string(filter, filter_string); | ||
1774 | |||
1775 | parse_init(ps, filter_ops, filter_string); | ||
1776 | err = filter_parse(ps); | ||
1777 | if (err) { | ||
1778 | append_filter_err(ps, filter); | ||
1779 | goto out; | ||
1780 | } | ||
1781 | 1873 | ||
1782 | err = replace_preds(call, filter, ps, filter_string, false); | ||
1783 | if (err) { | ||
1784 | filter_disable(call); | ||
1785 | append_filter_err(ps, filter); | ||
1786 | } else | ||
1787 | call->flags |= TRACE_EVENT_FL_FILTERED; | ||
1788 | out: | ||
1789 | /* | 1874 | /* |
1790 | * Always swap the call filter with the new filter | 1875 | * Always swap the call filter with the new filter |
1791 | * even if there was an error. If there was an error | 1876 | * even if there was an error. If there was an error |
1792 | * in the filter, we disable the filter and show the error | 1877 | * in the filter, we disable the filter and show the error |
1793 | * string | 1878 | * string |
1794 | */ | 1879 | */ |
1795 | tmp = call->filter; | 1880 | if (filter) { |
1796 | rcu_assign_pointer(call->filter, filter); | 1881 | struct event_filter *tmp = call->filter; |
1797 | if (tmp) { | 1882 | |
1798 | /* Make sure the call is done with the filter */ | 1883 | if (!err) |
1799 | synchronize_sched(); | 1884 | call->flags |= TRACE_EVENT_FL_FILTERED; |
1800 | __free_filter(tmp); | 1885 | else |
1886 | filter_disable(call); | ||
1887 | |||
1888 | rcu_assign_pointer(call->filter, filter); | ||
1889 | |||
1890 | if (tmp) { | ||
1891 | /* Make sure the call is done with the filter */ | ||
1892 | synchronize_sched(); | ||
1893 | __free_filter(tmp); | ||
1894 | } | ||
1801 | } | 1895 | } |
1802 | filter_opstack_clear(ps); | ||
1803 | postfix_clear(ps); | ||
1804 | kfree(ps); | ||
1805 | out_unlock: | 1896 | out_unlock: |
1806 | mutex_unlock(&event_mutex); | 1897 | mutex_unlock(&event_mutex); |
1807 | 1898 | ||
@@ -1811,7 +1902,6 @@ out_unlock: | |||
1811 | int apply_subsystem_event_filter(struct event_subsystem *system, | 1902 | int apply_subsystem_event_filter(struct event_subsystem *system, |
1812 | char *filter_string) | 1903 | char *filter_string) |
1813 | { | 1904 | { |
1814 | struct filter_parse_state *ps; | ||
1815 | struct event_filter *filter; | 1905 | struct event_filter *filter; |
1816 | int err = 0; | 1906 | int err = 0; |
1817 | 1907 | ||
@@ -1835,48 +1925,19 @@ int apply_subsystem_event_filter(struct event_subsystem *system, | |||
1835 | goto out_unlock; | 1925 | goto out_unlock; |
1836 | } | 1926 | } |
1837 | 1927 | ||
1838 | err = -ENOMEM; | 1928 | err = create_system_filter(system, filter_string, &filter); |
1839 | ps = kzalloc(sizeof(*ps), GFP_KERNEL); | 1929 | if (filter) { |
1840 | if (!ps) | 1930 | /* |
1841 | goto out_unlock; | 1931 | * No event actually uses the system filter |
1842 | 1932 | * we can free it without synchronize_sched(). | |
1843 | filter = __alloc_filter(); | 1933 | */ |
1844 | if (!filter) | 1934 | __free_filter(system->filter); |
1845 | goto out; | 1935 | system->filter = filter; |
1846 | 1936 | } | |
1847 | /* System filters just show a default message */ | ||
1848 | kfree(filter->filter_string); | ||
1849 | filter->filter_string = NULL; | ||
1850 | |||
1851 | /* | ||
1852 | * No event actually uses the system filter | ||
1853 | * we can free it without synchronize_sched(). | ||
1854 | */ | ||
1855 | __free_filter(system->filter); | ||
1856 | system->filter = filter; | ||
1857 | |||
1858 | parse_init(ps, filter_ops, filter_string); | ||
1859 | err = filter_parse(ps); | ||
1860 | if (err) | ||
1861 | goto err_filter; | ||
1862 | |||
1863 | err = replace_system_preds(system, ps, filter_string); | ||
1864 | if (err) | ||
1865 | goto err_filter; | ||
1866 | |||
1867 | out: | ||
1868 | filter_opstack_clear(ps); | ||
1869 | postfix_clear(ps); | ||
1870 | kfree(ps); | ||
1871 | out_unlock: | 1937 | out_unlock: |
1872 | mutex_unlock(&event_mutex); | 1938 | mutex_unlock(&event_mutex); |
1873 | 1939 | ||
1874 | return err; | 1940 | return err; |
1875 | |||
1876 | err_filter: | ||
1877 | replace_filter_string(filter, filter_string); | ||
1878 | append_filter_err(ps, system->filter); | ||
1879 | goto out; | ||
1880 | } | 1941 | } |
1881 | 1942 | ||
1882 | #ifdef CONFIG_PERF_EVENTS | 1943 | #ifdef CONFIG_PERF_EVENTS |
@@ -1894,7 +1955,6 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, | |||
1894 | { | 1955 | { |
1895 | int err; | 1956 | int err; |
1896 | struct event_filter *filter; | 1957 | struct event_filter *filter; |
1897 | struct filter_parse_state *ps; | ||
1898 | struct ftrace_event_call *call; | 1958 | struct ftrace_event_call *call; |
1899 | 1959 | ||
1900 | mutex_lock(&event_mutex); | 1960 | mutex_lock(&event_mutex); |
@@ -1909,33 +1969,10 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, | |||
1909 | if (event->filter) | 1969 | if (event->filter) |
1910 | goto out_unlock; | 1970 | goto out_unlock; |
1911 | 1971 | ||
1912 | filter = __alloc_filter(); | 1972 | err = create_filter(call, filter_str, false, &filter); |
1913 | if (!filter) { | ||
1914 | err = PTR_ERR(filter); | ||
1915 | goto out_unlock; | ||
1916 | } | ||
1917 | |||
1918 | err = -ENOMEM; | ||
1919 | ps = kzalloc(sizeof(*ps), GFP_KERNEL); | ||
1920 | if (!ps) | ||
1921 | goto free_filter; | ||
1922 | |||
1923 | parse_init(ps, filter_ops, filter_str); | ||
1924 | err = filter_parse(ps); | ||
1925 | if (err) | ||
1926 | goto free_ps; | ||
1927 | |||
1928 | err = replace_preds(call, filter, ps, filter_str, false); | ||
1929 | if (!err) | 1973 | if (!err) |
1930 | event->filter = filter; | 1974 | event->filter = filter; |
1931 | 1975 | else | |
1932 | free_ps: | ||
1933 | filter_opstack_clear(ps); | ||
1934 | postfix_clear(ps); | ||
1935 | kfree(ps); | ||
1936 | |||
1937 | free_filter: | ||
1938 | if (err) | ||
1939 | __free_filter(filter); | 1976 | __free_filter(filter); |
1940 | 1977 | ||
1941 | out_unlock: | 1978 | out_unlock: |
@@ -1954,43 +1991,6 @@ out_unlock: | |||
1954 | #define CREATE_TRACE_POINTS | 1991 | #define CREATE_TRACE_POINTS |
1955 | #include "trace_events_filter_test.h" | 1992 | #include "trace_events_filter_test.h" |
1956 | 1993 | ||
1957 | static int test_get_filter(char *filter_str, struct ftrace_event_call *call, | ||
1958 | struct event_filter **pfilter) | ||
1959 | { | ||
1960 | struct event_filter *filter; | ||
1961 | struct filter_parse_state *ps; | ||
1962 | int err = -ENOMEM; | ||
1963 | |||
1964 | filter = __alloc_filter(); | ||
1965 | if (!filter) | ||
1966 | goto out; | ||
1967 | |||
1968 | ps = kzalloc(sizeof(*ps), GFP_KERNEL); | ||
1969 | if (!ps) | ||
1970 | goto free_filter; | ||
1971 | |||
1972 | parse_init(ps, filter_ops, filter_str); | ||
1973 | err = filter_parse(ps); | ||
1974 | if (err) | ||
1975 | goto free_ps; | ||
1976 | |||
1977 | err = replace_preds(call, filter, ps, filter_str, false); | ||
1978 | if (!err) | ||
1979 | *pfilter = filter; | ||
1980 | |||
1981 | free_ps: | ||
1982 | filter_opstack_clear(ps); | ||
1983 | postfix_clear(ps); | ||
1984 | kfree(ps); | ||
1985 | |||
1986 | free_filter: | ||
1987 | if (err) | ||
1988 | __free_filter(filter); | ||
1989 | |||
1990 | out: | ||
1991 | return err; | ||
1992 | } | ||
1993 | |||
1994 | #define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \ | 1994 | #define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \ |
1995 | { \ | 1995 | { \ |
1996 | .filter = FILTER, \ | 1996 | .filter = FILTER, \ |
@@ -2109,12 +2109,13 @@ static __init int ftrace_test_event_filter(void) | |||
2109 | struct test_filter_data_t *d = &test_filter_data[i]; | 2109 | struct test_filter_data_t *d = &test_filter_data[i]; |
2110 | int err; | 2110 | int err; |
2111 | 2111 | ||
2112 | err = test_get_filter(d->filter, &event_ftrace_test_filter, | 2112 | err = create_filter(&event_ftrace_test_filter, d->filter, |
2113 | &filter); | 2113 | false, &filter); |
2114 | if (err) { | 2114 | if (err) { |
2115 | printk(KERN_INFO | 2115 | printk(KERN_INFO |
2116 | "Failed to get filter for '%s', err %d\n", | 2116 | "Failed to get filter for '%s', err %d\n", |
2117 | d->filter, err); | 2117 | d->filter, err); |
2118 | __free_filter(filter); | ||
2118 | break; | 2119 | break; |
2119 | } | 2120 | } |
2120 | 2121 | ||
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 77575b386d97..d4545f49242e 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
@@ -13,6 +13,9 @@ | |||
13 | #include <linux/sysctl.h> | 13 | #include <linux/sysctl.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/fs.h> | 15 | #include <linux/fs.h> |
16 | |||
17 | #include <asm/setup.h> | ||
18 | |||
16 | #include "trace.h" | 19 | #include "trace.h" |
17 | 20 | ||
18 | #define STACK_TRACE_ENTRIES 500 | 21 | #define STACK_TRACE_ENTRIES 500 |
@@ -133,7 +136,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) | |||
133 | static struct ftrace_ops trace_ops __read_mostly = | 136 | static struct ftrace_ops trace_ops __read_mostly = |
134 | { | 137 | { |
135 | .func = stack_trace_call, | 138 | .func = stack_trace_call, |
136 | .flags = FTRACE_OPS_FL_GLOBAL, | ||
137 | }; | 139 | }; |
138 | 140 | ||
139 | static ssize_t | 141 | static ssize_t |
@@ -311,6 +313,21 @@ static const struct file_operations stack_trace_fops = { | |||
311 | .release = seq_release, | 313 | .release = seq_release, |
312 | }; | 314 | }; |
313 | 315 | ||
316 | static int | ||
317 | stack_trace_filter_open(struct inode *inode, struct file *file) | ||
318 | { | ||
319 | return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER, | ||
320 | inode, file); | ||
321 | } | ||
322 | |||
323 | static const struct file_operations stack_trace_filter_fops = { | ||
324 | .open = stack_trace_filter_open, | ||
325 | .read = seq_read, | ||
326 | .write = ftrace_filter_write, | ||
327 | .llseek = ftrace_regex_lseek, | ||
328 | .release = ftrace_regex_release, | ||
329 | }; | ||
330 | |||
314 | int | 331 | int |
315 | stack_trace_sysctl(struct ctl_table *table, int write, | 332 | stack_trace_sysctl(struct ctl_table *table, int write, |
316 | void __user *buffer, size_t *lenp, | 333 | void __user *buffer, size_t *lenp, |
@@ -338,8 +355,13 @@ stack_trace_sysctl(struct ctl_table *table, int write, | |||
338 | return ret; | 355 | return ret; |
339 | } | 356 | } |
340 | 357 | ||
358 | static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata; | ||
359 | |||
341 | static __init int enable_stacktrace(char *str) | 360 | static __init int enable_stacktrace(char *str) |
342 | { | 361 | { |
362 | if (strncmp(str, "_filter=", 8) == 0) | ||
363 | strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE); | ||
364 | |||
343 | stack_tracer_enabled = 1; | 365 | stack_tracer_enabled = 1; |
344 | last_stack_tracer_enabled = 1; | 366 | last_stack_tracer_enabled = 1; |
345 | return 1; | 367 | return 1; |
@@ -358,6 +380,12 @@ static __init int stack_trace_init(void) | |||
358 | trace_create_file("stack_trace", 0444, d_tracer, | 380 | trace_create_file("stack_trace", 0444, d_tracer, |
359 | NULL, &stack_trace_fops); | 381 | NULL, &stack_trace_fops); |
360 | 382 | ||
383 | trace_create_file("stack_trace_filter", 0444, d_tracer, | ||
384 | NULL, &stack_trace_filter_fops); | ||
385 | |||
386 | if (stack_trace_filter_buf[0]) | ||
387 | ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1); | ||
388 | |||
361 | if (stack_tracer_enabled) | 389 | if (stack_tracer_enabled) |
362 | register_ftrace_function(&trace_ops); | 390 | register_ftrace_function(&trace_ops); |
363 | 391 | ||
diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index f40a6af6bf40..54e35c1e5948 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h | |||
@@ -462,7 +462,7 @@ __has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */ | |||
462 | succeed_file(); | 462 | succeed_file(); |
463 | } | 463 | } |
464 | if (w(txthdr->sh_type) != SHT_PROGBITS || | 464 | if (w(txthdr->sh_type) != SHT_PROGBITS || |
465 | !(w(txthdr->sh_flags) & SHF_EXECINSTR)) | 465 | !(_w(txthdr->sh_flags) & SHF_EXECINSTR)) |
466 | return NULL; | 466 | return NULL; |
467 | return txtname; | 467 | return txtname; |
468 | } | 468 | } |
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 7a527f7e9da9..ddc22525228d 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt | |||
@@ -21,6 +21,8 @@ EVENT MODIFIERS | |||
21 | Events can optionally have a modifer by appending a colon and one or | 21 | Events can optionally have a modifer by appending a colon and one or |
22 | more modifiers. Modifiers allow the user to restrict when events are | 22 | more modifiers. Modifiers allow the user to restrict when events are |
23 | counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor. | 23 | counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor. |
24 | Additional modifiers are 'G' for guest counting (in KVM guests) and 'H' | ||
25 | for host counting (not in KVM guests). | ||
24 | 26 | ||
25 | The 'p' modifier can be used for specifying how precise the instruction | 27 | The 'p' modifier can be used for specifying how precise the instruction |
26 | address should be. The 'p' modifier is currently only implemented for | 28 | address should be. The 'p' modifier is currently only implemented for |
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index c12659d8cb26..1078c5fadd5b 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST | |||
@@ -1,4 +1,5 @@ | |||
1 | tools/perf | 1 | tools/perf |
2 | include/linux/const.h | ||
2 | include/linux/perf_event.h | 3 | include/linux/perf_event.h |
3 | include/linux/rbtree.h | 4 | include/linux/rbtree.h |
4 | include/linux/list.h | 5 | include/linux/list.h |
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 214ba7f9f577..806e0a286634 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c | |||
@@ -235,7 +235,7 @@ out_delete: | |||
235 | } | 235 | } |
236 | 236 | ||
237 | static const char * const annotate_usage[] = { | 237 | static const char * const annotate_usage[] = { |
238 | "perf annotate [<options>] <command>", | 238 | "perf annotate [<options>]", |
239 | NULL | 239 | NULL |
240 | }; | 240 | }; |
241 | 241 | ||
@@ -313,10 +313,5 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) | |||
313 | annotate.sym_hist_filter = argv[0]; | 313 | annotate.sym_hist_filter = argv[0]; |
314 | } | 314 | } |
315 | 315 | ||
316 | if (field_sep && *field_sep == '.') { | ||
317 | pr_err("'.' is the only non valid --field-separator argument\n"); | ||
318 | return -1; | ||
319 | } | ||
320 | |||
321 | return __cmd_annotate(&annotate); | 316 | return __cmd_annotate(&annotate); |
322 | } | 317 | } |
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index fe1ad8f21961..39104c0beea3 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c | |||
@@ -108,7 +108,9 @@ static void setup_cpunode_map(void) | |||
108 | continue; | 108 | continue; |
109 | cpunode_map[cpu] = mem; | 109 | cpunode_map[cpu] = mem; |
110 | } | 110 | } |
111 | closedir(dir2); | ||
111 | } | 112 | } |
113 | closedir(dir1); | ||
112 | } | 114 | } |
113 | 115 | ||
114 | static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, | 116 | static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, |
@@ -645,6 +647,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) | |||
645 | break; | 647 | break; |
646 | if (sort_dimension__add(tok, sort_list) < 0) { | 648 | if (sort_dimension__add(tok, sort_list) < 0) { |
647 | error("Unknown --sort key: '%s'", tok); | 649 | error("Unknown --sort key: '%s'", tok); |
650 | free(str); | ||
648 | return -1; | 651 | return -1; |
649 | } | 652 | } |
650 | } | 653 | } |
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 032324a76b87..9fc6e0fa3dce 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c | |||
@@ -22,9 +22,6 @@ | |||
22 | static const char *file_name; | 22 | static const char *file_name; |
23 | static char name_buffer[256]; | 23 | static char name_buffer[256]; |
24 | 24 | ||
25 | bool perf_host = 1; | ||
26 | bool perf_guest; | ||
27 | |||
28 | static const char * const kvm_usage[] = { | 25 | static const char * const kvm_usage[] = { |
29 | "perf kvm [<options>] {top|record|report|diff|buildid-list}", | 26 | "perf kvm [<options>] {top|record|report|diff|buildid-list}", |
30 | NULL | 27 | NULL |
@@ -107,7 +104,8 @@ static int __cmd_buildid_list(int argc, const char **argv) | |||
107 | 104 | ||
108 | int cmd_kvm(int argc, const char **argv, const char *prefix __used) | 105 | int cmd_kvm(int argc, const char **argv, const char *prefix __used) |
109 | { | 106 | { |
110 | perf_host = perf_guest = 0; | 107 | perf_host = 0; |
108 | perf_guest = 1; | ||
111 | 109 | ||
112 | argc = parse_options(argc, argv, kvm_options, kvm_usage, | 110 | argc = parse_options(argc, argv, kvm_options, kvm_usage, |
113 | PARSE_OPT_STOP_AT_NON_OPTION); | 111 | PARSE_OPT_STOP_AT_NON_OPTION); |
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index fd1909afcfd6..bb68ddf257b7 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c | |||
@@ -1018,13 +1018,17 @@ static char *get_script_path(const char *script_root, const char *suffix) | |||
1018 | __script_root = get_script_root(&script_dirent, suffix); | 1018 | __script_root = get_script_root(&script_dirent, suffix); |
1019 | if (__script_root && !strcmp(script_root, __script_root)) { | 1019 | if (__script_root && !strcmp(script_root, __script_root)) { |
1020 | free(__script_root); | 1020 | free(__script_root); |
1021 | closedir(lang_dir); | ||
1022 | closedir(scripts_dir); | ||
1021 | snprintf(script_path, MAXPATHLEN, "%s/%s", | 1023 | snprintf(script_path, MAXPATHLEN, "%s/%s", |
1022 | lang_path, script_dirent.d_name); | 1024 | lang_path, script_dirent.d_name); |
1023 | return strdup(script_path); | 1025 | return strdup(script_path); |
1024 | } | 1026 | } |
1025 | free(__script_root); | 1027 | free(__script_root); |
1026 | } | 1028 | } |
1029 | closedir(lang_dir); | ||
1027 | } | 1030 | } |
1031 | closedir(scripts_dir); | ||
1028 | 1032 | ||
1029 | return NULL; | 1033 | return NULL; |
1030 | } | 1034 | } |
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 2b9a7f497a20..3854e869dce1 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c | |||
@@ -1396,7 +1396,7 @@ int cmd_test(int argc, const char **argv, const char *prefix __used) | |||
1396 | NULL, | 1396 | NULL, |
1397 | }; | 1397 | }; |
1398 | const struct option test_options[] = { | 1398 | const struct option test_options[] = { |
1399 | OPT_INTEGER('v', "verbose", &verbose, | 1399 | OPT_INCR('v', "verbose", &verbose, |
1400 | "be more verbose (show symbol address, etc)"), | 1400 | "be more verbose (show symbol address, etc)"), |
1401 | OPT_END() | 1401 | OPT_END() |
1402 | }; | 1402 | }; |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4f81eeb99875..8f80df896038 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -235,7 +235,6 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, | |||
235 | if (he == NULL) | 235 | if (he == NULL) |
236 | return NULL; | 236 | return NULL; |
237 | 237 | ||
238 | evsel->hists.stats.total_period += sample->period; | ||
239 | hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); | 238 | hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); |
240 | return he; | 239 | return he; |
241 | } | 240 | } |
@@ -889,6 +888,10 @@ try_again: | |||
889 | ui__warning("The %s event is not supported.\n", | 888 | ui__warning("The %s event is not supported.\n", |
890 | event_name(counter)); | 889 | event_name(counter)); |
891 | goto out_err; | 890 | goto out_err; |
891 | } else if (err == EMFILE) { | ||
892 | ui__warning("Too many events are opened.\n" | ||
893 | "Try again after reducing the number of events\n"); | ||
894 | goto out_err; | ||
892 | } | 895 | } |
893 | 896 | ||
894 | ui__warning("The sys_perf_event_open() syscall " | 897 | ui__warning("The sys_perf_event_open() syscall " |
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index fa1837088ca8..3f16e08a5c8d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c | |||
@@ -111,8 +111,11 @@ int perf_evlist__add_default(struct perf_evlist *evlist) | |||
111 | .type = PERF_TYPE_HARDWARE, | 111 | .type = PERF_TYPE_HARDWARE, |
112 | .config = PERF_COUNT_HW_CPU_CYCLES, | 112 | .config = PERF_COUNT_HW_CPU_CYCLES, |
113 | }; | 113 | }; |
114 | struct perf_evsel *evsel = perf_evsel__new(&attr, 0); | 114 | struct perf_evsel *evsel; |
115 | |||
116 | event_attr_init(&attr); | ||
115 | 117 | ||
118 | evsel = perf_evsel__new(&attr, 0); | ||
116 | if (evsel == NULL) | 119 | if (evsel == NULL) |
117 | goto error; | 120 | goto error; |
118 | 121 | ||
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index abef2703cd24..6f505d1abac7 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
@@ -76,21 +76,21 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) | |||
76 | } | 76 | } |
77 | } | 77 | } |
78 | 78 | ||
79 | static void hist_entry__add_cpumode_period(struct hist_entry *self, | 79 | static void hist_entry__add_cpumode_period(struct hist_entry *he, |
80 | unsigned int cpumode, u64 period) | 80 | unsigned int cpumode, u64 period) |
81 | { | 81 | { |
82 | switch (cpumode) { | 82 | switch (cpumode) { |
83 | case PERF_RECORD_MISC_KERNEL: | 83 | case PERF_RECORD_MISC_KERNEL: |
84 | self->period_sys += period; | 84 | he->period_sys += period; |
85 | break; | 85 | break; |
86 | case PERF_RECORD_MISC_USER: | 86 | case PERF_RECORD_MISC_USER: |
87 | self->period_us += period; | 87 | he->period_us += period; |
88 | break; | 88 | break; |
89 | case PERF_RECORD_MISC_GUEST_KERNEL: | 89 | case PERF_RECORD_MISC_GUEST_KERNEL: |
90 | self->period_guest_sys += period; | 90 | he->period_guest_sys += period; |
91 | break; | 91 | break; |
92 | case PERF_RECORD_MISC_GUEST_USER: | 92 | case PERF_RECORD_MISC_GUEST_USER: |
93 | self->period_guest_us += period; | 93 | he->period_guest_us += period; |
94 | break; | 94 | break; |
95 | default: | 95 | default: |
96 | break; | 96 | break; |
@@ -165,18 +165,18 @@ void hists__decay_entries_threaded(struct hists *hists, | |||
165 | static struct hist_entry *hist_entry__new(struct hist_entry *template) | 165 | static struct hist_entry *hist_entry__new(struct hist_entry *template) |
166 | { | 166 | { |
167 | size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; | 167 | size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; |
168 | struct hist_entry *self = malloc(sizeof(*self) + callchain_size); | 168 | struct hist_entry *he = malloc(sizeof(*he) + callchain_size); |
169 | 169 | ||
170 | if (self != NULL) { | 170 | if (he != NULL) { |
171 | *self = *template; | 171 | *he = *template; |
172 | self->nr_events = 1; | 172 | he->nr_events = 1; |
173 | if (self->ms.map) | 173 | if (he->ms.map) |
174 | self->ms.map->referenced = true; | 174 | he->ms.map->referenced = true; |
175 | if (symbol_conf.use_callchain) | 175 | if (symbol_conf.use_callchain) |
176 | callchain_init(self->callchain); | 176 | callchain_init(he->callchain); |
177 | } | 177 | } |
178 | 178 | ||
179 | return self; | 179 | return he; |
180 | } | 180 | } |
181 | 181 | ||
182 | static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h) | 182 | static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h) |
@@ -677,15 +677,16 @@ static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self, | |||
677 | return ret; | 677 | return ret; |
678 | } | 678 | } |
679 | 679 | ||
680 | static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, | 680 | static size_t hist_entry_callchain__fprintf(struct hist_entry *he, |
681 | u64 total_samples, int left_margin) | 681 | u64 total_samples, int left_margin, |
682 | FILE *fp) | ||
682 | { | 683 | { |
683 | struct rb_node *rb_node; | 684 | struct rb_node *rb_node; |
684 | struct callchain_node *chain; | 685 | struct callchain_node *chain; |
685 | size_t ret = 0; | 686 | size_t ret = 0; |
686 | u32 entries_printed = 0; | 687 | u32 entries_printed = 0; |
687 | 688 | ||
688 | rb_node = rb_first(&self->sorted_chain); | 689 | rb_node = rb_first(&he->sorted_chain); |
689 | while (rb_node) { | 690 | while (rb_node) { |
690 | double percent; | 691 | double percent; |
691 | 692 | ||
@@ -730,35 +731,35 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows) | |||
730 | } | 731 | } |
731 | } | 732 | } |
732 | 733 | ||
733 | static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s, | 734 | static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s, |
734 | size_t size, struct hists *pair_hists, | 735 | size_t size, struct hists *pair_hists, |
735 | bool show_displacement, long displacement, | 736 | bool show_displacement, long displacement, |
736 | bool color, u64 session_total) | 737 | bool color, u64 total_period) |
737 | { | 738 | { |
738 | u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; | 739 | u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; |
739 | u64 nr_events; | 740 | u64 nr_events; |
740 | const char *sep = symbol_conf.field_sep; | 741 | const char *sep = symbol_conf.field_sep; |
741 | int ret; | 742 | int ret; |
742 | 743 | ||
743 | if (symbol_conf.exclude_other && !self->parent) | 744 | if (symbol_conf.exclude_other && !he->parent) |
744 | return 0; | 745 | return 0; |
745 | 746 | ||
746 | if (pair_hists) { | 747 | if (pair_hists) { |
747 | period = self->pair ? self->pair->period : 0; | 748 | period = he->pair ? he->pair->period : 0; |
748 | nr_events = self->pair ? self->pair->nr_events : 0; | 749 | nr_events = he->pair ? he->pair->nr_events : 0; |
749 | total = pair_hists->stats.total_period; | 750 | total = pair_hists->stats.total_period; |
750 | period_sys = self->pair ? self->pair->period_sys : 0; | 751 | period_sys = he->pair ? he->pair->period_sys : 0; |
751 | period_us = self->pair ? self->pair->period_us : 0; | 752 | period_us = he->pair ? he->pair->period_us : 0; |
752 | period_guest_sys = self->pair ? self->pair->period_guest_sys : 0; | 753 | period_guest_sys = he->pair ? he->pair->period_guest_sys : 0; |
753 | period_guest_us = self->pair ? self->pair->period_guest_us : 0; | 754 | period_guest_us = he->pair ? he->pair->period_guest_us : 0; |
754 | } else { | 755 | } else { |
755 | period = self->period; | 756 | period = he->period; |
756 | nr_events = self->nr_events; | 757 | nr_events = he->nr_events; |
757 | total = session_total; | 758 | total = total_period; |
758 | period_sys = self->period_sys; | 759 | period_sys = he->period_sys; |
759 | period_us = self->period_us; | 760 | period_us = he->period_us; |
760 | period_guest_sys = self->period_guest_sys; | 761 | period_guest_sys = he->period_guest_sys; |
761 | period_guest_us = self->period_guest_us; | 762 | period_guest_us = he->period_guest_us; |
762 | } | 763 | } |
763 | 764 | ||
764 | if (total) { | 765 | if (total) { |
@@ -812,8 +813,8 @@ static int hist_entry__pcnt_snprintf(struct hist_entry *self, char *s, | |||
812 | 813 | ||
813 | if (total > 0) | 814 | if (total > 0) |
814 | old_percent = (period * 100.0) / total; | 815 | old_percent = (period * 100.0) / total; |
815 | if (session_total > 0) | 816 | if (total_period > 0) |
816 | new_percent = (self->period * 100.0) / session_total; | 817 | new_percent = (he->period * 100.0) / total_period; |
817 | 818 | ||
818 | diff = new_percent - old_percent; | 819 | diff = new_percent - old_percent; |
819 | 820 | ||
@@ -862,9 +863,10 @@ int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size, | |||
862 | return ret; | 863 | return ret; |
863 | } | 864 | } |
864 | 865 | ||
865 | int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists, | 866 | static int hist_entry__fprintf(struct hist_entry *he, size_t size, |
866 | struct hists *pair_hists, bool show_displacement, | 867 | struct hists *hists, struct hists *pair_hists, |
867 | long displacement, FILE *fp, u64 session_total) | 868 | bool show_displacement, long displacement, |
869 | u64 total_period, FILE *fp) | ||
868 | { | 870 | { |
869 | char bf[512]; | 871 | char bf[512]; |
870 | int ret; | 872 | int ret; |
@@ -874,14 +876,14 @@ int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists, | |||
874 | 876 | ||
875 | ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists, | 877 | ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists, |
876 | show_displacement, displacement, | 878 | show_displacement, displacement, |
877 | true, session_total); | 879 | true, total_period); |
878 | hist_entry__snprintf(he, bf + ret, size - ret, hists); | 880 | hist_entry__snprintf(he, bf + ret, size - ret, hists); |
879 | return fprintf(fp, "%s\n", bf); | 881 | return fprintf(fp, "%s\n", bf); |
880 | } | 882 | } |
881 | 883 | ||
882 | static size_t hist_entry__fprintf_callchain(struct hist_entry *self, | 884 | static size_t hist_entry__fprintf_callchain(struct hist_entry *he, |
883 | struct hists *hists, FILE *fp, | 885 | struct hists *hists, |
884 | u64 session_total) | 886 | u64 total_period, FILE *fp) |
885 | { | 887 | { |
886 | int left_margin = 0; | 888 | int left_margin = 0; |
887 | 889 | ||
@@ -889,11 +891,10 @@ static size_t hist_entry__fprintf_callchain(struct hist_entry *self, | |||
889 | struct sort_entry *se = list_first_entry(&hist_entry__sort_list, | 891 | struct sort_entry *se = list_first_entry(&hist_entry__sort_list, |
890 | typeof(*se), list); | 892 | typeof(*se), list); |
891 | left_margin = hists__col_len(hists, se->se_width_idx); | 893 | left_margin = hists__col_len(hists, se->se_width_idx); |
892 | left_margin -= thread__comm_len(self->thread); | 894 | left_margin -= thread__comm_len(he->thread); |
893 | } | 895 | } |
894 | 896 | ||
895 | return hist_entry_callchain__fprintf(fp, self, session_total, | 897 | return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); |
896 | left_margin); | ||
897 | } | 898 | } |
898 | 899 | ||
899 | size_t hists__fprintf(struct hists *hists, struct hists *pair, | 900 | size_t hists__fprintf(struct hists *hists, struct hists *pair, |
@@ -903,6 +904,7 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, | |||
903 | struct sort_entry *se; | 904 | struct sort_entry *se; |
904 | struct rb_node *nd; | 905 | struct rb_node *nd; |
905 | size_t ret = 0; | 906 | size_t ret = 0; |
907 | u64 total_period; | ||
906 | unsigned long position = 1; | 908 | unsigned long position = 1; |
907 | long displacement = 0; | 909 | long displacement = 0; |
908 | unsigned int width; | 910 | unsigned int width; |
@@ -917,20 +919,6 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, | |||
917 | 919 | ||
918 | fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); | 920 | fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); |
919 | 921 | ||
920 | if (symbol_conf.show_nr_samples) { | ||
921 | if (sep) | ||
922 | fprintf(fp, "%cSamples", *sep); | ||
923 | else | ||
924 | fputs(" Samples ", fp); | ||
925 | } | ||
926 | |||
927 | if (symbol_conf.show_total_period) { | ||
928 | if (sep) | ||
929 | ret += fprintf(fp, "%cPeriod", *sep); | ||
930 | else | ||
931 | ret += fprintf(fp, " Period "); | ||
932 | } | ||
933 | |||
934 | if (symbol_conf.show_cpu_utilization) { | 922 | if (symbol_conf.show_cpu_utilization) { |
935 | if (sep) { | 923 | if (sep) { |
936 | ret += fprintf(fp, "%csys", *sep); | 924 | ret += fprintf(fp, "%csys", *sep); |
@@ -940,8 +928,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, | |||
940 | ret += fprintf(fp, "%cguest us", *sep); | 928 | ret += fprintf(fp, "%cguest us", *sep); |
941 | } | 929 | } |
942 | } else { | 930 | } else { |
943 | ret += fprintf(fp, " sys "); | 931 | ret += fprintf(fp, " sys "); |
944 | ret += fprintf(fp, " us "); | 932 | ret += fprintf(fp, " us "); |
945 | if (perf_guest) { | 933 | if (perf_guest) { |
946 | ret += fprintf(fp, " guest sys "); | 934 | ret += fprintf(fp, " guest sys "); |
947 | ret += fprintf(fp, " guest us "); | 935 | ret += fprintf(fp, " guest us "); |
@@ -949,6 +937,20 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, | |||
949 | } | 937 | } |
950 | } | 938 | } |
951 | 939 | ||
940 | if (symbol_conf.show_nr_samples) { | ||
941 | if (sep) | ||
942 | fprintf(fp, "%cSamples", *sep); | ||
943 | else | ||
944 | fputs(" Samples ", fp); | ||
945 | } | ||
946 | |||
947 | if (symbol_conf.show_total_period) { | ||
948 | if (sep) | ||
949 | ret += fprintf(fp, "%cPeriod", *sep); | ||
950 | else | ||
951 | ret += fprintf(fp, " Period "); | ||
952 | } | ||
953 | |||
952 | if (pair) { | 954 | if (pair) { |
953 | if (sep) | 955 | if (sep) |
954 | ret += fprintf(fp, "%cDelta", *sep); | 956 | ret += fprintf(fp, "%cDelta", *sep); |
@@ -993,6 +995,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, | |||
993 | goto print_entries; | 995 | goto print_entries; |
994 | 996 | ||
995 | fprintf(fp, "# ........"); | 997 | fprintf(fp, "# ........"); |
998 | if (symbol_conf.show_cpu_utilization) | ||
999 | fprintf(fp, " ....... ......."); | ||
996 | if (symbol_conf.show_nr_samples) | 1000 | if (symbol_conf.show_nr_samples) |
997 | fprintf(fp, " .........."); | 1001 | fprintf(fp, " .........."); |
998 | if (symbol_conf.show_total_period) | 1002 | if (symbol_conf.show_total_period) |
@@ -1025,6 +1029,8 @@ size_t hists__fprintf(struct hists *hists, struct hists *pair, | |||
1025 | goto out; | 1029 | goto out; |
1026 | 1030 | ||
1027 | print_entries: | 1031 | print_entries: |
1032 | total_period = hists->stats.total_period; | ||
1033 | |||
1028 | for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { | 1034 | for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { |
1029 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); | 1035 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); |
1030 | 1036 | ||
@@ -1040,11 +1046,10 @@ print_entries: | |||
1040 | ++position; | 1046 | ++position; |
1041 | } | 1047 | } |
1042 | ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, | 1048 | ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, |
1043 | displacement, fp, hists->stats.total_period); | 1049 | displacement, total_period, fp); |
1044 | 1050 | ||
1045 | if (symbol_conf.use_callchain) | 1051 | if (symbol_conf.use_callchain) |
1046 | ret += hist_entry__fprintf_callchain(h, hists, fp, | 1052 | ret += hist_entry__fprintf_callchain(h, hists, total_period, fp); |
1047 | hists->stats.total_period); | ||
1048 | if (max_rows && ++nr_rows >= max_rows) | 1053 | if (max_rows && ++nr_rows >= max_rows) |
1049 | goto out; | 1054 | goto out; |
1050 | 1055 | ||
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index ff6f9d56ea41..f55f0a8d1f81 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h | |||
@@ -66,11 +66,8 @@ struct hists { | |||
66 | struct hist_entry *__hists__add_entry(struct hists *self, | 66 | struct hist_entry *__hists__add_entry(struct hists *self, |
67 | struct addr_location *al, | 67 | struct addr_location *al, |
68 | struct symbol *parent, u64 period); | 68 | struct symbol *parent, u64 period); |
69 | extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); | 69 | int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); |
70 | extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); | 70 | int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); |
71 | int hist_entry__fprintf(struct hist_entry *he, size_t size, struct hists *hists, | ||
72 | struct hists *pair_hists, bool show_displacement, | ||
73 | long displacement, FILE *fp, u64 session_total); | ||
74 | int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, | 71 | int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, |
75 | struct hists *hists); | 72 | struct hists *hists); |
76 | void hist_entry__free(struct hist_entry *); | 73 | void hist_entry__free(struct hist_entry *); |
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 531c283fc0c5..b029296d20d9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -735,8 +735,8 @@ static int | |||
735 | parse_event_modifier(const char **strp, struct perf_event_attr *attr) | 735 | parse_event_modifier(const char **strp, struct perf_event_attr *attr) |
736 | { | 736 | { |
737 | const char *str = *strp; | 737 | const char *str = *strp; |
738 | int exclude = 0; | 738 | int exclude = 0, exclude_GH = 0; |
739 | int eu = 0, ek = 0, eh = 0, precise = 0; | 739 | int eu = 0, ek = 0, eh = 0, eH = 0, eG = 0, precise = 0; |
740 | 740 | ||
741 | if (!*str) | 741 | if (!*str) |
742 | return 0; | 742 | return 0; |
@@ -760,6 +760,14 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr) | |||
760 | if (!exclude) | 760 | if (!exclude) |
761 | exclude = eu = ek = eh = 1; | 761 | exclude = eu = ek = eh = 1; |
762 | eh = 0; | 762 | eh = 0; |
763 | } else if (*str == 'G') { | ||
764 | if (!exclude_GH) | ||
765 | exclude_GH = eG = eH = 1; | ||
766 | eG = 0; | ||
767 | } else if (*str == 'H') { | ||
768 | if (!exclude_GH) | ||
769 | exclude_GH = eG = eH = 1; | ||
770 | eH = 0; | ||
763 | } else if (*str == 'p') { | 771 | } else if (*str == 'p') { |
764 | precise++; | 772 | precise++; |
765 | } else | 773 | } else |
@@ -776,6 +784,8 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr) | |||
776 | attr->exclude_kernel = ek; | 784 | attr->exclude_kernel = ek; |
777 | attr->exclude_hv = eh; | 785 | attr->exclude_hv = eh; |
778 | attr->precise_ip = precise; | 786 | attr->precise_ip = precise; |
787 | attr->exclude_host = eH; | ||
788 | attr->exclude_guest = eG; | ||
779 | 789 | ||
780 | return 0; | 790 | return 0; |
781 | } | 791 | } |
@@ -838,6 +848,7 @@ int parse_events(struct perf_evlist *evlist , const char *str, int unset __used) | |||
838 | for (;;) { | 848 | for (;;) { |
839 | ostr = str; | 849 | ostr = str; |
840 | memset(&attr, 0, sizeof(attr)); | 850 | memset(&attr, 0, sizeof(attr)); |
851 | event_attr_init(&attr); | ||
841 | ret = parse_event_symbols(evlist, &str, &attr); | 852 | ret = parse_event_symbols(evlist, &str, &attr); |
842 | if (ret == EVT_FAILED) | 853 | if (ret == EVT_FAILED) |
843 | return -1; | 854 | return -1; |
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index ac6830d8292b..fc22cf5c605f 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c | |||
@@ -18,7 +18,6 @@ | |||
18 | * | 18 | * |
19 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 19 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
20 | */ | 20 | */ |
21 | #include <ctype.h> | ||
22 | #include "util.h" | 21 | #include "util.h" |
23 | #include <dirent.h> | 22 | #include <dirent.h> |
24 | #include <mntent.h> | 23 | #include <mntent.h> |
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 5b3ea49aa63e..813141047fc2 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c | |||
@@ -1,6 +1,21 @@ | |||
1 | #include "../perf.h" | ||
1 | #include "util.h" | 2 | #include "util.h" |
2 | #include <sys/mman.h> | 3 | #include <sys/mman.h> |
3 | 4 | ||
5 | /* | ||
6 | * XXX We need to find a better place for these things... | ||
7 | */ | ||
8 | bool perf_host = true; | ||
9 | bool perf_guest = true; | ||
10 | |||
11 | void event_attr_init(struct perf_event_attr *attr) | ||
12 | { | ||
13 | if (!perf_host) | ||
14 | attr->exclude_host = 1; | ||
15 | if (!perf_guest) | ||
16 | attr->exclude_guest = 1; | ||
17 | } | ||
18 | |||
4 | int mkdir_p(char *path, mode_t mode) | 19 | int mkdir_p(char *path, mode_t mode) |
5 | { | 20 | { |
6 | struct stat st; | 21 | struct stat st; |
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 37be34dff798..b9c530cce79a 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h | |||
@@ -242,6 +242,10 @@ int strtailcmp(const char *s1, const char *s2); | |||
242 | unsigned long convert_unit(unsigned long value, char *unit); | 242 | unsigned long convert_unit(unsigned long value, char *unit); |
243 | int readn(int fd, void *buf, size_t size); | 243 | int readn(int fd, void *buf, size_t size); |
244 | 244 | ||
245 | struct perf_event_attr; | ||
246 | |||
247 | void event_attr_init(struct perf_event_attr *attr); | ||
248 | |||
245 | #define _STR(x) #x | 249 | #define _STR(x) #x |
246 | #define STR(x) _STR(x) | 250 | #define STR(x) _STR(x) |
247 | 251 | ||