diff options
165 files changed, 6107 insertions, 1984 deletions
diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt new file mode 100644 index 000000000000..d2a36602ca8d --- /dev/null +++ b/Documentation/lockup-watchdogs.txt | |||
| @@ -0,0 +1,63 @@ | |||
| 1 | =============================================================== | ||
| 2 | Softlockup detector and hardlockup detector (aka nmi_watchdog) | ||
| 3 | =============================================================== | ||
| 4 | |||
| 5 | The Linux kernel can act as a watchdog to detect both soft and hard | ||
| 6 | lockups. | ||
| 7 | |||
| 8 | A 'softlockup' is defined as a bug that causes the kernel to loop in | ||
| 9 | kernel mode for more than 20 seconds (see "Implementation" below for | ||
| 10 | details), without giving other tasks a chance to run. The current | ||
| 11 | stack trace is displayed upon detection and, by default, the system | ||
| 12 | will stay locked up. Alternatively, the kernel can be configured to | ||
| 13 | panic; a sysctl, "kernel.softlockup_panic", a kernel parameter, | ||
| 14 | "softlockup_panic" (see "Documentation/kernel-parameters.txt" for | ||
| 15 | details), and a compile option, "BOOTPARAM_HARDLOCKUP_PANIC", are | ||
| 16 | provided for this. | ||
| 17 | |||
| 18 | A 'hardlockup' is defined as a bug that causes the CPU to loop in | ||
| 19 | kernel mode for more than 10 seconds (see "Implementation" below for | ||
| 20 | details), without letting other interrupts have a chance to run. | ||
| 21 | Similarly to the softlockup case, the current stack trace is displayed | ||
| 22 | upon detection and the system will stay locked up unless the default | ||
| 23 | behavior is changed, which can be done through a compile time knob, | ||
| 24 | "BOOTPARAM_HARDLOCKUP_PANIC", and a kernel parameter, "nmi_watchdog" | ||
| 25 | (see "Documentation/kernel-parameters.txt" for details). | ||
| 26 | |||
| 27 | The panic option can be used in combination with panic_timeout (this | ||
| 28 | timeout is set through the confusingly named "kernel.panic" sysctl), | ||
| 29 | to cause the system to reboot automatically after a specified amount | ||
| 30 | of time. | ||
| 31 | |||
| 32 | === Implementation === | ||
| 33 | |||
| 34 | The soft and hard lockup detectors are built on top of the hrtimer and | ||
| 35 | perf subsystems, respectively. A direct consequence of this is that, | ||
| 36 | in principle, they should work in any architecture where these | ||
| 37 | subsystems are present. | ||
| 38 | |||
| 39 | A periodic hrtimer runs to generate interrupts and kick the watchdog | ||
| 40 | task. An NMI perf event is generated every "watchdog_thresh" | ||
| 41 | (compile-time initialized to 10 and configurable through sysctl of the | ||
| 42 | same name) seconds to check for hardlockups. If any CPU in the system | ||
| 43 | does not receive any hrtimer interrupt during that time the | ||
| 44 | 'hardlockup detector' (the handler for the NMI perf event) will | ||
| 45 | generate a kernel warning or call panic, depending on the | ||
| 46 | configuration. | ||
| 47 | |||
| 48 | The watchdog task is a high priority kernel thread that updates a | ||
| 49 | timestamp every time it is scheduled. If that timestamp is not updated | ||
| 50 | for 2*watchdog_thresh seconds (the softlockup threshold) the | ||
| 51 | 'softlockup detector' (coded inside the hrtimer callback function) | ||
| 52 | will dump useful debug information to the system log, after which it | ||
| 53 | will call panic if it was instructed to do so or resume execution of | ||
| 54 | other kernel code. | ||
| 55 | |||
| 56 | The period of the hrtimer is 2*watchdog_thresh/5, which means it has | ||
| 57 | two or three chances to generate an interrupt before the hardlockup | ||
| 58 | detector kicks in. | ||
| 59 | |||
| 60 | As explained above, a kernel knob is provided that allows | ||
| 61 | administrators to configure the period of the hrtimer and the perf | ||
| 62 | event. The right value for a particular environment is a trade-off | ||
| 63 | between fast response to lockups and detection overhead. | ||
diff --git a/Documentation/nmi_watchdog.txt b/Documentation/nmi_watchdog.txt deleted file mode 100644 index bf9f80a98282..000000000000 --- a/Documentation/nmi_watchdog.txt +++ /dev/null | |||
| @@ -1,83 +0,0 @@ | |||
| 1 | |||
| 2 | [NMI watchdog is available for x86 and x86-64 architectures] | ||
| 3 | |||
| 4 | Is your system locking up unpredictably? No keyboard activity, just | ||
| 5 | a frustrating complete hard lockup? Do you want to help us debugging | ||
| 6 | such lockups? If all yes then this document is definitely for you. | ||
| 7 | |||
| 8 | On many x86/x86-64 type hardware there is a feature that enables | ||
| 9 | us to generate 'watchdog NMI interrupts'. (NMI: Non Maskable Interrupt | ||
| 10 | which get executed even if the system is otherwise locked up hard). | ||
| 11 | This can be used to debug hard kernel lockups. By executing periodic | ||
| 12 | NMI interrupts, the kernel can monitor whether any CPU has locked up, | ||
| 13 | and print out debugging messages if so. | ||
| 14 | |||
| 15 | In order to use the NMI watchdog, you need to have APIC support in your | ||
| 16 | kernel. For SMP kernels, APIC support gets compiled in automatically. For | ||
| 17 | UP, enable either CONFIG_X86_UP_APIC (Processor type and features -> Local | ||
| 18 | APIC support on uniprocessors) or CONFIG_X86_UP_IOAPIC (Processor type and | ||
| 19 | features -> IO-APIC support on uniprocessors) in your kernel config. | ||
| 20 | CONFIG_X86_UP_APIC is for uniprocessor machines without an IO-APIC. | ||
| 21 | CONFIG_X86_UP_IOAPIC is for uniprocessor with an IO-APIC. [Note: certain | ||
| 22 | kernel debugging options, such as Kernel Stack Meter or Kernel Tracer, | ||
| 23 | may implicitly disable the NMI watchdog.] | ||
| 24 | |||
| 25 | For x86-64, the needed APIC is always compiled in. | ||
| 26 | |||
| 27 | Using local APIC (nmi_watchdog=2) needs the first performance register, so | ||
| 28 | you can't use it for other purposes (such as high precision performance | ||
| 29 | profiling.) However, at least oprofile and the perfctr driver disable the | ||
| 30 | local APIC NMI watchdog automatically. | ||
| 31 | |||
| 32 | To actually enable the NMI watchdog, use the 'nmi_watchdog=N' boot | ||
| 33 | parameter. Eg. the relevant lilo.conf entry: | ||
| 34 | |||
| 35 | append="nmi_watchdog=1" | ||
| 36 | |||
| 37 | For SMP machines and UP machines with an IO-APIC use nmi_watchdog=1. | ||
| 38 | For UP machines without an IO-APIC use nmi_watchdog=2, this only works | ||
| 39 | for some processor types. If in doubt, boot with nmi_watchdog=1 and | ||
| 40 | check the NMI count in /proc/interrupts; if the count is zero then | ||
| 41 | reboot with nmi_watchdog=2 and check the NMI count. If it is still | ||
| 42 | zero then log a problem, you probably have a processor that needs to be | ||
| 43 | added to the nmi code. | ||
| 44 | |||
| 45 | A 'lockup' is the following scenario: if any CPU in the system does not | ||
| 46 | execute the period local timer interrupt for more than 5 seconds, then | ||
| 47 | the NMI handler generates an oops and kills the process. This | ||
| 48 | 'controlled crash' (and the resulting kernel messages) can be used to | ||
| 49 | debug the lockup. Thus whenever the lockup happens, wait 5 seconds and | ||
| 50 | the oops will show up automatically. If the kernel produces no messages | ||
| 51 | then the system has crashed so hard (eg. hardware-wise) that either it | ||
| 52 | cannot even accept NMI interrupts, or the crash has made the kernel | ||
| 53 | unable to print messages. | ||
| 54 | |||
| 55 | Be aware that when using local APIC, the frequency of NMI interrupts | ||
| 56 | it generates, depends on the system load. The local APIC NMI watchdog, | ||
| 57 | lacking a better source, uses the "cycles unhalted" event. As you may | ||
| 58 | guess it doesn't tick when the CPU is in the halted state (which happens | ||
| 59 | when the system is idle), but if your system locks up on anything but the | ||
| 60 | "hlt" processor instruction, the watchdog will trigger very soon as the | ||
| 61 | "cycles unhalted" event will happen every clock tick. If it locks up on | ||
| 62 | "hlt", then you are out of luck -- the event will not happen at all and the | ||
| 63 | watchdog won't trigger. This is a shortcoming of the local APIC watchdog | ||
| 64 | -- unfortunately there is no "clock ticks" event that would work all the | ||
| 65 | time. The I/O APIC watchdog is driven externally and has no such shortcoming. | ||
| 66 | But its NMI frequency is much higher, resulting in a more significant hit | ||
| 67 | to the overall system performance. | ||
| 68 | |||
| 69 | On x86 nmi_watchdog is disabled by default so you have to enable it with | ||
| 70 | a boot time parameter. | ||
| 71 | |||
| 72 | It's possible to disable the NMI watchdog in run-time by writing "0" to | ||
| 73 | /proc/sys/kernel/nmi_watchdog. Writing "1" to the same file will re-enable | ||
| 74 | the NMI watchdog. Notice that you still need to use "nmi_watchdog=" parameter | ||
| 75 | at boot time. | ||
| 76 | |||
| 77 | NOTE: In kernels prior to 2.4.2-ac18 the NMI-oopser is enabled unconditionally | ||
| 78 | on x86 SMP boxes. | ||
| 79 | |||
| 80 | [ feel free to send bug reports, suggestions and patches to | ||
| 81 | Ingo Molnar <mingo@redhat.com> or the Linux SMP mailing | ||
| 82 | list at <linux-smp@vger.kernel.org> ] | ||
| 83 | |||
diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt new file mode 100644 index 000000000000..d93f3c00f245 --- /dev/null +++ b/Documentation/static-keys.txt | |||
| @@ -0,0 +1,286 @@ | |||
| 1 | Static Keys | ||
| 2 | ----------- | ||
| 3 | |||
| 4 | By: Jason Baron <jbaron@redhat.com> | ||
| 5 | |||
| 6 | 0) Abstract | ||
| 7 | |||
| 8 | Static keys allows the inclusion of seldom used features in | ||
| 9 | performance-sensitive fast-path kernel code, via a GCC feature and a code | ||
| 10 | patching technique. A quick example: | ||
| 11 | |||
| 12 | struct static_key key = STATIC_KEY_INIT_FALSE; | ||
| 13 | |||
| 14 | ... | ||
| 15 | |||
| 16 | if (static_key_false(&key)) | ||
| 17 | do unlikely code | ||
| 18 | else | ||
| 19 | do likely code | ||
| 20 | |||
| 21 | ... | ||
| 22 | static_key_slow_inc(); | ||
| 23 | ... | ||
| 24 | static_key_slow_inc(); | ||
| 25 | ... | ||
| 26 | |||
| 27 | The static_key_false() branch will be generated into the code with as little | ||
| 28 | impact to the likely code path as possible. | ||
| 29 | |||
| 30 | |||
| 31 | 1) Motivation | ||
| 32 | |||
| 33 | |||
| 34 | Currently, tracepoints are implemented using a conditional branch. The | ||
| 35 | conditional check requires checking a global variable for each tracepoint. | ||
| 36 | Although the overhead of this check is small, it increases when the memory | ||
| 37 | cache comes under pressure (memory cache lines for these global variables may | ||
| 38 | be shared with other memory accesses). As we increase the number of tracepoints | ||
| 39 | in the kernel this overhead may become more of an issue. In addition, | ||
| 40 | tracepoints are often dormant (disabled) and provide no direct kernel | ||
| 41 | functionality. Thus, it is highly desirable to reduce their impact as much as | ||
| 42 | possible. Although tracepoints are the original motivation for this work, other | ||
| 43 | kernel code paths should be able to make use of the static keys facility. | ||
| 44 | |||
| 45 | |||
| 46 | 2) Solution | ||
| 47 | |||
| 48 | |||
| 49 | gcc (v4.5) adds a new 'asm goto' statement that allows branching to a label: | ||
| 50 | |||
| 51 | http://gcc.gnu.org/ml/gcc-patches/2009-07/msg01556.html | ||
| 52 | |||
| 53 | Using the 'asm goto', we can create branches that are either taken or not taken | ||
| 54 | by default, without the need to check memory. Then, at run-time, we can patch | ||
| 55 | the branch site to change the branch direction. | ||
| 56 | |||
| 57 | For example, if we have a simple branch that is disabled by default: | ||
| 58 | |||
| 59 | if (static_key_false(&key)) | ||
| 60 | printk("I am the true branch\n"); | ||
| 61 | |||
| 62 | Thus, by default the 'printk' will not be emitted. And the code generated will | ||
| 63 | consist of a single atomic 'no-op' instruction (5 bytes on x86), in the | ||
| 64 | straight-line code path. When the branch is 'flipped', we will patch the | ||
| 65 | 'no-op' in the straight-line codepath with a 'jump' instruction to the | ||
| 66 | out-of-line true branch. Thus, changing branch direction is expensive but | ||
| 67 | branch selection is basically 'free'. That is the basic tradeoff of this | ||
| 68 | optimization. | ||
| 69 | |||
| 70 | This lowlevel patching mechanism is called 'jump label patching', and it gives | ||
| 71 | the basis for the static keys facility. | ||
| 72 | |||
| 73 | 3) Static key label API, usage and examples: | ||
| 74 | |||
| 75 | |||
| 76 | In order to make use of this optimization you must first define a key: | ||
| 77 | |||
| 78 | struct static_key key; | ||
| 79 | |||
| 80 | Which is initialized as: | ||
| 81 | |||
| 82 | struct static_key key = STATIC_KEY_INIT_TRUE; | ||
| 83 | |||
| 84 | or: | ||
| 85 | |||
| 86 | struct static_key key = STATIC_KEY_INIT_FALSE; | ||
| 87 | |||
| 88 | If the key is not initialized, it is default false. The 'struct static_key', | ||
| 89 | must be a 'global'. That is, it can't be allocated on the stack or dynamically | ||
| 90 | allocated at run-time. | ||
| 91 | |||
| 92 | The key is then used in code as: | ||
| 93 | |||
| 94 | if (static_key_false(&key)) | ||
| 95 | do unlikely code | ||
| 96 | else | ||
| 97 | do likely code | ||
| 98 | |||
| 99 | Or: | ||
| 100 | |||
| 101 | if (static_key_true(&key)) | ||
| 102 | do likely code | ||
| 103 | else | ||
| 104 | do unlikely code | ||
| 105 | |||
| 106 | A key that is initialized via 'STATIC_KEY_INIT_FALSE', must be used in a | ||
| 107 | 'static_key_false()' construct. Likewise, a key initialized via | ||
| 108 | 'STATIC_KEY_INIT_TRUE' must be used in a 'static_key_true()' construct. A | ||
| 109 | single key can be used in many branches, but all the branches must match the | ||
| 110 | way that the key has been initialized. | ||
| 111 | |||
| 112 | The branch(es) can then be switched via: | ||
| 113 | |||
| 114 | static_key_slow_inc(&key); | ||
| 115 | ... | ||
| 116 | static_key_slow_dec(&key); | ||
| 117 | |||
| 118 | Thus, 'static_key_slow_inc()' means 'make the branch true', and | ||
| 119 | 'static_key_slow_dec()' means 'make the the branch false' with appropriate | ||
| 120 | reference counting. For example, if the key is initialized true, a | ||
| 121 | static_key_slow_dec(), will switch the branch to false. And a subsequent | ||
| 122 | static_key_slow_inc(), will change the branch back to true. Likewise, if the | ||
| 123 | key is initialized false, a 'static_key_slow_inc()', will change the branch to | ||
| 124 | true. And then a 'static_key_slow_dec()', will again make the branch false. | ||
| 125 | |||
| 126 | An example usage in the kernel is the implementation of tracepoints: | ||
| 127 | |||
| 128 | static inline void trace_##name(proto) \ | ||
| 129 | { \ | ||
| 130 | if (static_key_false(&__tracepoint_##name.key)) \ | ||
| 131 | __DO_TRACE(&__tracepoint_##name, \ | ||
| 132 | TP_PROTO(data_proto), \ | ||
| 133 | TP_ARGS(data_args), \ | ||
| 134 | TP_CONDITION(cond)); \ | ||
| 135 | } | ||
| 136 | |||
| 137 | Tracepoints are disabled by default, and can be placed in performance critical | ||
| 138 | pieces of the kernel. Thus, by using a static key, the tracepoints can have | ||
| 139 | absolutely minimal impact when not in use. | ||
| 140 | |||
| 141 | |||
| 142 | 4) Architecture level code patching interface, 'jump labels' | ||
| 143 | |||
| 144 | |||
| 145 | There are a few functions and macros that architectures must implement in order | ||
| 146 | to take advantage of this optimization. If there is no architecture support, we | ||
| 147 | simply fall back to a traditional, load, test, and jump sequence. | ||
| 148 | |||
| 149 | * select HAVE_ARCH_JUMP_LABEL, see: arch/x86/Kconfig | ||
| 150 | |||
| 151 | * #define JUMP_LABEL_NOP_SIZE, see: arch/x86/include/asm/jump_label.h | ||
| 152 | |||
| 153 | * __always_inline bool arch_static_branch(struct static_key *key), see: | ||
| 154 | arch/x86/include/asm/jump_label.h | ||
| 155 | |||
| 156 | * void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type), | ||
| 157 | see: arch/x86/kernel/jump_label.c | ||
| 158 | |||
| 159 | * __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type), | ||
| 160 | see: arch/x86/kernel/jump_label.c | ||
| 161 | |||
| 162 | |||
| 163 | * struct jump_entry, see: arch/x86/include/asm/jump_label.h | ||
| 164 | |||
| 165 | |||
| 166 | 5) Static keys / jump label analysis, results (x86_64): | ||
| 167 | |||
| 168 | |||
| 169 | As an example, let's add the following branch to 'getppid()', such that the | ||
| 170 | system call now looks like: | ||
| 171 | |||
| 172 | SYSCALL_DEFINE0(getppid) | ||
| 173 | { | ||
| 174 | int pid; | ||
| 175 | |||
| 176 | + if (static_key_false(&key)) | ||
| 177 | + printk("I am the true branch\n"); | ||
| 178 | |||
| 179 | rcu_read_lock(); | ||
| 180 | pid = task_tgid_vnr(rcu_dereference(current->real_parent)); | ||
| 181 | rcu_read_unlock(); | ||
| 182 | |||
| 183 | return pid; | ||
| 184 | } | ||
| 185 | |||
| 186 | The resulting instructions with jump labels generated by GCC is: | ||
| 187 | |||
| 188 | ffffffff81044290 <sys_getppid>: | ||
| 189 | ffffffff81044290: 55 push %rbp | ||
| 190 | ffffffff81044291: 48 89 e5 mov %rsp,%rbp | ||
| 191 | ffffffff81044294: e9 00 00 00 00 jmpq ffffffff81044299 <sys_getppid+0x9> | ||
| 192 | ffffffff81044299: 65 48 8b 04 25 c0 b6 mov %gs:0xb6c0,%rax | ||
| 193 | ffffffff810442a0: 00 00 | ||
| 194 | ffffffff810442a2: 48 8b 80 80 02 00 00 mov 0x280(%rax),%rax | ||
| 195 | ffffffff810442a9: 48 8b 80 b0 02 00 00 mov 0x2b0(%rax),%rax | ||
| 196 | ffffffff810442b0: 48 8b b8 e8 02 00 00 mov 0x2e8(%rax),%rdi | ||
| 197 | ffffffff810442b7: e8 f4 d9 00 00 callq ffffffff81051cb0 <pid_vnr> | ||
| 198 | ffffffff810442bc: 5d pop %rbp | ||
| 199 | ffffffff810442bd: 48 98 cltq | ||
| 200 | ffffffff810442bf: c3 retq | ||
| 201 | ffffffff810442c0: 48 c7 c7 e3 54 98 81 mov $0xffffffff819854e3,%rdi | ||
| 202 | ffffffff810442c7: 31 c0 xor %eax,%eax | ||
| 203 | ffffffff810442c9: e8 71 13 6d 00 callq ffffffff8171563f <printk> | ||
| 204 | ffffffff810442ce: eb c9 jmp ffffffff81044299 <sys_getppid+0x9> | ||
| 205 | |||
| 206 | Without the jump label optimization it looks like: | ||
| 207 | |||
| 208 | ffffffff810441f0 <sys_getppid>: | ||
| 209 | ffffffff810441f0: 8b 05 8a 52 d8 00 mov 0xd8528a(%rip),%eax # ffffffff81dc9480 <key> | ||
| 210 | ffffffff810441f6: 55 push %rbp | ||
| 211 | ffffffff810441f7: 48 89 e5 mov %rsp,%rbp | ||
| 212 | ffffffff810441fa: 85 c0 test %eax,%eax | ||
| 213 | ffffffff810441fc: 75 27 jne ffffffff81044225 <sys_getppid+0x35> | ||
| 214 | ffffffff810441fe: 65 48 8b 04 25 c0 b6 mov %gs:0xb6c0,%rax | ||
| 215 | ffffffff81044205: 00 00 | ||
| 216 | ffffffff81044207: 48 8b 80 80 02 00 00 mov 0x280(%rax),%rax | ||
| 217 | ffffffff8104420e: 48 8b 80 b0 02 00 00 mov 0x2b0(%rax),%rax | ||
| 218 | ffffffff81044215: 48 8b b8 e8 02 00 00 mov 0x2e8(%rax),%rdi | ||
| 219 | ffffffff8104421c: e8 2f da 00 00 callq ffffffff81051c50 <pid_vnr> | ||
| 220 | ffffffff81044221: 5d pop %rbp | ||
| 221 | ffffffff81044222: 48 98 cltq | ||
| 222 | ffffffff81044224: c3 retq | ||
| 223 | ffffffff81044225: 48 c7 c7 13 53 98 81 mov $0xffffffff81985313,%rdi | ||
| 224 | ffffffff8104422c: 31 c0 xor %eax,%eax | ||
| 225 | ffffffff8104422e: e8 60 0f 6d 00 callq ffffffff81715193 <printk> | ||
| 226 | ffffffff81044233: eb c9 jmp ffffffff810441fe <sys_getppid+0xe> | ||
| 227 | ffffffff81044235: 66 66 2e 0f 1f 84 00 data32 nopw %cs:0x0(%rax,%rax,1) | ||
| 228 | ffffffff8104423c: 00 00 00 00 | ||
| 229 | |||
| 230 | Thus, the disable jump label case adds a 'mov', 'test' and 'jne' instruction | ||
| 231 | vs. the jump label case just has a 'no-op' or 'jmp 0'. (The jmp 0, is patched | ||
| 232 | to a 5 byte atomic no-op instruction at boot-time.) Thus, the disabled jump | ||
| 233 | label case adds: | ||
| 234 | |||
| 235 | 6 (mov) + 2 (test) + 2 (jne) = 10 - 5 (5 byte jump 0) = 5 addition bytes. | ||
| 236 | |||
| 237 | If we then include the padding bytes, the jump label code saves, 16 total bytes | ||
| 238 | of instruction memory for this small fucntion. In this case the non-jump label | ||
| 239 | function is 80 bytes long. Thus, we have have saved 20% of the instruction | ||
| 240 | footprint. We can in fact improve this even further, since the 5-byte no-op | ||
| 241 | really can be a 2-byte no-op since we can reach the branch with a 2-byte jmp. | ||
| 242 | However, we have not yet implemented optimal no-op sizes (they are currently | ||
| 243 | hard-coded). | ||
| 244 | |||
| 245 | Since there are a number of static key API uses in the scheduler paths, | ||
| 246 | 'pipe-test' (also known as 'perf bench sched pipe') can be used to show the | ||
| 247 | performance improvement. Testing done on 3.3.0-rc2: | ||
| 248 | |||
| 249 | jump label disabled: | ||
| 250 | |||
| 251 | Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs): | ||
| 252 | |||
| 253 | 855.700314 task-clock # 0.534 CPUs utilized ( +- 0.11% ) | ||
| 254 | 200,003 context-switches # 0.234 M/sec ( +- 0.00% ) | ||
| 255 | 0 CPU-migrations # 0.000 M/sec ( +- 39.58% ) | ||
| 256 | 487 page-faults # 0.001 M/sec ( +- 0.02% ) | ||
| 257 | 1,474,374,262 cycles # 1.723 GHz ( +- 0.17% ) | ||
| 258 | <not supported> stalled-cycles-frontend | ||
| 259 | <not supported> stalled-cycles-backend | ||
| 260 | 1,178,049,567 instructions # 0.80 insns per cycle ( +- 0.06% ) | ||
| 261 | 208,368,926 branches # 243.507 M/sec ( +- 0.06% ) | ||
| 262 | 5,569,188 branch-misses # 2.67% of all branches ( +- 0.54% ) | ||
| 263 | |||
| 264 | 1.601607384 seconds time elapsed ( +- 0.07% ) | ||
| 265 | |||
| 266 | jump label enabled: | ||
| 267 | |||
| 268 | Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs): | ||
| 269 | |||
| 270 | 841.043185 task-clock # 0.533 CPUs utilized ( +- 0.12% ) | ||
| 271 | 200,004 context-switches # 0.238 M/sec ( +- 0.00% ) | ||
| 272 | 0 CPU-migrations # 0.000 M/sec ( +- 40.87% ) | ||
| 273 | 487 page-faults # 0.001 M/sec ( +- 0.05% ) | ||
| 274 | 1,432,559,428 cycles # 1.703 GHz ( +- 0.18% ) | ||
| 275 | <not supported> stalled-cycles-frontend | ||
| 276 | <not supported> stalled-cycles-backend | ||
| 277 | 1,175,363,994 instructions # 0.82 insns per cycle ( +- 0.04% ) | ||
| 278 | 206,859,359 branches # 245.956 M/sec ( +- 0.04% ) | ||
| 279 | 4,884,119 branch-misses # 2.36% of all branches ( +- 0.85% ) | ||
| 280 | |||
| 281 | 1.579384366 seconds time elapsed | ||
| 282 | |||
| 283 | The percentage of saved branches is .7%, and we've saved 12% on | ||
| 284 | 'branch-misses'. This is where we would expect to get the most savings, since | ||
| 285 | this optimization is about reducing the number of branches. In addition, we've | ||
| 286 | saved .2% on instructions, and 2.8% on cycles and 1.4% on elapsed time. | ||
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 1ebc24cf9a55..6f51fed45f2d 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt | |||
| @@ -226,6 +226,13 @@ Here is the list of current tracers that may be configured. | |||
| 226 | Traces and records the max latency that it takes for | 226 | Traces and records the max latency that it takes for |
| 227 | the highest priority task to get scheduled after | 227 | the highest priority task to get scheduled after |
| 228 | it has been woken up. | 228 | it has been woken up. |
| 229 | Traces all tasks as an average developer would expect. | ||
| 230 | |||
| 231 | "wakeup_rt" | ||
| 232 | |||
| 233 | Traces and records the max latency that it takes for just | ||
| 234 | RT tasks (as the current "wakeup" does). This is useful | ||
| 235 | for those interested in wake up timings of RT tasks. | ||
| 229 | 236 | ||
| 230 | "hw-branch-tracer" | 237 | "hw-branch-tracer" |
| 231 | 238 | ||
diff --git a/arch/Kconfig b/arch/Kconfig index 4f55c736be11..5b448a74d0f7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
| @@ -47,18 +47,29 @@ config KPROBES | |||
| 47 | If in doubt, say "N". | 47 | If in doubt, say "N". |
| 48 | 48 | ||
| 49 | config JUMP_LABEL | 49 | config JUMP_LABEL |
| 50 | bool "Optimize trace point call sites" | 50 | bool "Optimize very unlikely/likely branches" |
| 51 | depends on HAVE_ARCH_JUMP_LABEL | 51 | depends on HAVE_ARCH_JUMP_LABEL |
| 52 | help | 52 | help |
| 53 | This option enables a transparent branch optimization that | ||
| 54 | makes certain almost-always-true or almost-always-false branch | ||
| 55 | conditions even cheaper to execute within the kernel. | ||
| 56 | |||
| 57 | Certain performance-sensitive kernel code, such as trace points, | ||
| 58 | scheduler functionality, networking code and KVM have such | ||
| 59 | branches and include support for this optimization technique. | ||
| 60 | |||
| 53 | If it is detected that the compiler has support for "asm goto", | 61 | If it is detected that the compiler has support for "asm goto", |
| 54 | the kernel will compile trace point locations with just a | 62 | the kernel will compile such branches with just a nop |
| 55 | nop instruction. When trace points are enabled, the nop will | 63 | instruction. When the condition flag is toggled to true, the |
| 56 | be converted to a jump to the trace function. This technique | 64 | nop will be converted to a jump instruction to execute the |
| 57 | lowers overhead and stress on the branch prediction of the | 65 | conditional block of instructions. |
| 58 | processor. | 66 | |
| 59 | 67 | This technique lowers overhead and stress on the branch prediction | |
| 60 | On i386, options added to the compiler flags may increase | 68 | of the processor and generally makes the kernel faster. The update |
| 61 | the size of the kernel slightly. | 69 | of the condition is slower, but those are always very rare. |
| 70 | |||
| 71 | ( On 32-bit x86, the necessary options added to the compiler | ||
| 72 | flags may increase the size of the kernel slightly. ) | ||
| 62 | 73 | ||
| 63 | config OPTPROBES | 74 | config OPTPROBES |
| 64 | def_bool y | 75 | def_bool y |
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 8143cd7cdbfb..0dae252f7a33 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c | |||
| @@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event) | |||
| 685 | { | 685 | { |
| 686 | int err; | 686 | int err; |
| 687 | 687 | ||
| 688 | /* does not support taken branch sampling */ | ||
| 689 | if (has_branch_stack(event)) | ||
| 690 | return -EOPNOTSUPP; | ||
| 691 | |||
| 688 | switch (event->attr.type) { | 692 | switch (event->attr.type) { |
| 689 | case PERF_TYPE_RAW: | 693 | case PERF_TYPE_RAW: |
| 690 | case PERF_TYPE_HARDWARE: | 694 | case PERF_TYPE_HARDWARE: |
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 99cfe3607989..7523340afb8a 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h | |||
| @@ -12,10 +12,6 @@ | |||
| 12 | #ifndef __ARM_PERF_EVENT_H__ | 12 | #ifndef __ARM_PERF_EVENT_H__ |
| 13 | #define __ARM_PERF_EVENT_H__ | 13 | #define __ARM_PERF_EVENT_H__ |
| 14 | 14 | ||
| 15 | /* ARM performance counters start from 1 (in the cp15 accesses) so use the | ||
| 16 | * same indexes here for consistency. */ | ||
| 17 | #define PERF_EVENT_INDEX_OFFSET 1 | ||
| 18 | |||
| 19 | /* ARM perf PMU IDs for use by internal perf clients. */ | 15 | /* ARM perf PMU IDs for use by internal perf clients. */ |
| 20 | enum arm_perf_pmu_ids { | 16 | enum arm_perf_pmu_ids { |
| 21 | ARM_PERF_PMU_ID_XSCALE1 = 0, | 17 | ARM_PERF_PMU_ID_XSCALE1 = 0, |
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index b2abfa18f137..8a89d3b7626b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
| @@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event) | |||
| 539 | int err = 0; | 539 | int err = 0; |
| 540 | atomic_t *active_events = &armpmu->active_events; | 540 | atomic_t *active_events = &armpmu->active_events; |
| 541 | 541 | ||
| 542 | /* does not support taken branch sampling */ | ||
| 543 | if (has_branch_stack(event)) | ||
| 544 | return -EOPNOTSUPP; | ||
| 545 | |||
| 542 | if (armpmu->map_event(event) == -ENOENT) | 546 | if (armpmu->map_event(event) == -ENOENT) |
| 543 | return -ENOENT; | 547 | return -ENOENT; |
| 544 | 548 | ||
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h index a69e0155d146..c52ea5546b5b 100644 --- a/arch/frv/include/asm/perf_event.h +++ b/arch/frv/include/asm/perf_event.h | |||
| @@ -12,6 +12,4 @@ | |||
| 12 | #ifndef _ASM_PERF_EVENT_H | 12 | #ifndef _ASM_PERF_EVENT_H |
| 13 | #define _ASM_PERF_EVENT_H | 13 | #define _ASM_PERF_EVENT_H |
| 14 | 14 | ||
| 15 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
| 16 | |||
| 17 | #endif /* _ASM_PERF_EVENT_H */ | 15 | #endif /* _ASM_PERF_EVENT_H */ |
diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h index 6c2910f91180..8b8526b491c7 100644 --- a/arch/hexagon/include/asm/perf_event.h +++ b/arch/hexagon/include/asm/perf_event.h | |||
| @@ -19,6 +19,4 @@ | |||
| 19 | #ifndef _ASM_PERF_EVENT_H | 19 | #ifndef _ASM_PERF_EVENT_H |
| 20 | #define _ASM_PERF_EVENT_H | 20 | #define _ASM_PERF_EVENT_H |
| 21 | 21 | ||
| 22 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
| 23 | |||
| 24 | #endif /* _ASM_PERF_EVENT_H */ | 22 | #endif /* _ASM_PERF_EVENT_H */ |
diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h index 32551d304cd7..b149b88ea795 100644 --- a/arch/ia64/include/asm/paravirt.h +++ b/arch/ia64/include/asm/paravirt.h | |||
| @@ -281,9 +281,9 @@ paravirt_init_missing_ticks_accounting(int cpu) | |||
| 281 | pv_time_ops.init_missing_ticks_accounting(cpu); | 281 | pv_time_ops.init_missing_ticks_accounting(cpu); |
| 282 | } | 282 | } |
| 283 | 283 | ||
| 284 | struct jump_label_key; | 284 | struct static_key; |
| 285 | extern struct jump_label_key paravirt_steal_enabled; | 285 | extern struct static_key paravirt_steal_enabled; |
| 286 | extern struct jump_label_key paravirt_steal_rq_enabled; | 286 | extern struct static_key paravirt_steal_rq_enabled; |
| 287 | 287 | ||
| 288 | static inline int | 288 | static inline int |
| 289 | paravirt_do_steal_accounting(unsigned long *new_itm) | 289 | paravirt_do_steal_accounting(unsigned long *new_itm) |
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 100868216c55..1b22f6de2932 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c | |||
| @@ -634,8 +634,8 @@ struct pv_irq_ops pv_irq_ops = { | |||
| 634 | * pv_time_ops | 634 | * pv_time_ops |
| 635 | * time operations | 635 | * time operations |
| 636 | */ | 636 | */ |
| 637 | struct jump_label_key paravirt_steal_enabled; | 637 | struct static_key paravirt_steal_enabled; |
| 638 | struct jump_label_key paravirt_steal_rq_enabled; | 638 | struct static_key paravirt_steal_rq_enabled; |
| 639 | 639 | ||
| 640 | static int | 640 | static int |
| 641 | ia64_native_do_steal_accounting(unsigned long *new_itm) | 641 | ia64_native_do_steal_accounting(unsigned long *new_itm) |
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 1881b316ca45..4d6d77ed9b9d 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | #define WORD_INSN ".word" | 20 | #define WORD_INSN ".word" |
| 21 | #endif | 21 | #endif |
| 22 | 22 | ||
| 23 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 23 | static __always_inline bool arch_static_branch(struct static_key *key) |
| 24 | { | 24 | { |
| 25 | asm goto("1:\tnop\n\t" | 25 | asm goto("1:\tnop\n\t" |
| 26 | "nop\n\t" | 26 | "nop\n\t" |
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index e3b897acfbc0..811084f4e422 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c | |||
| @@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event) | |||
| 606 | { | 606 | { |
| 607 | int err = 0; | 607 | int err = 0; |
| 608 | 608 | ||
| 609 | /* does not support taken branch sampling */ | ||
| 610 | if (has_branch_stack(event)) | ||
| 611 | return -EOPNOTSUPP; | ||
| 612 | |||
| 609 | switch (event->attr.type) { | 613 | switch (event->attr.type) { |
| 610 | case PERF_TYPE_RAW: | 614 | case PERF_TYPE_RAW: |
| 611 | case PERF_TYPE_HARDWARE: | 615 | case PERF_TYPE_HARDWARE: |
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 938986e412f1..ae098c438f00 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h | |||
| @@ -17,7 +17,7 @@ | |||
| 17 | #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) | 17 | #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) |
| 18 | #define JUMP_LABEL_NOP_SIZE 4 | 18 | #define JUMP_LABEL_NOP_SIZE 4 |
| 19 | 19 | ||
| 20 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 20 | static __always_inline bool arch_static_branch(struct static_key *key) |
| 21 | { | 21 | { |
| 22 | asm goto("1:\n\t" | 22 | asm goto("1:\n\t" |
| 23 | "nop\n\t" | 23 | "nop\n\t" |
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 8f1df1208d23..1a8093fa8f71 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h | |||
| @@ -61,8 +61,6 @@ struct pt_regs; | |||
| 61 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | 61 | extern unsigned long perf_misc_flags(struct pt_regs *regs); |
| 62 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | 62 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); |
| 63 | 63 | ||
| 64 | #define PERF_EVENT_INDEX_OFFSET 1 | ||
| 65 | |||
| 66 | /* | 64 | /* |
| 67 | * Only override the default definitions in include/linux/perf_event.h | 65 | * Only override the default definitions in include/linux/perf_event.h |
| 68 | * if we have hardware PMU support. | 66 | * if we have hardware PMU support. |
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 64483fde95c6..c2e27ede07ec 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c | |||
| @@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event) | |||
| 1084 | if (!ppmu) | 1084 | if (!ppmu) |
| 1085 | return -ENOENT; | 1085 | return -ENOENT; |
| 1086 | 1086 | ||
| 1087 | /* does not support taken branch sampling */ | ||
| 1088 | if (has_branch_stack(event)) | ||
| 1089 | return -EOPNOTSUPP; | ||
| 1090 | |||
| 1087 | switch (event->attr.type) { | 1091 | switch (event->attr.type) { |
| 1088 | case PERF_TYPE_HARDWARE: | 1092 | case PERF_TYPE_HARDWARE: |
| 1089 | ev = event->attr.config; | 1093 | ev = event->attr.config; |
| @@ -1193,6 +1197,11 @@ static int power_pmu_event_init(struct perf_event *event) | |||
| 1193 | return err; | 1197 | return err; |
| 1194 | } | 1198 | } |
| 1195 | 1199 | ||
| 1200 | static int power_pmu_event_idx(struct perf_event *event) | ||
| 1201 | { | ||
| 1202 | return event->hw.idx; | ||
| 1203 | } | ||
| 1204 | |||
| 1196 | struct pmu power_pmu = { | 1205 | struct pmu power_pmu = { |
| 1197 | .pmu_enable = power_pmu_enable, | 1206 | .pmu_enable = power_pmu_enable, |
| 1198 | .pmu_disable = power_pmu_disable, | 1207 | .pmu_disable = power_pmu_disable, |
| @@ -1205,6 +1214,7 @@ struct pmu power_pmu = { | |||
| 1205 | .start_txn = power_pmu_start_txn, | 1214 | .start_txn = power_pmu_start_txn, |
| 1206 | .cancel_txn = power_pmu_cancel_txn, | 1215 | .cancel_txn = power_pmu_cancel_txn, |
| 1207 | .commit_txn = power_pmu_commit_txn, | 1216 | .commit_txn = power_pmu_commit_txn, |
| 1217 | .event_idx = power_pmu_event_idx, | ||
| 1208 | }; | 1218 | }; |
| 1209 | 1219 | ||
| 1210 | /* | 1220 | /* |
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 95a6cf2b5b67..6c32190dc73e 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | #define ASM_ALIGN ".balign 4" | 13 | #define ASM_ALIGN ".balign 4" |
| 14 | #endif | 14 | #endif |
| 15 | 15 | ||
| 16 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 16 | static __always_inline bool arch_static_branch(struct static_key *key) |
| 17 | { | 17 | { |
| 18 | asm goto("0: brcl 0,0\n" | 18 | asm goto("0: brcl 0,0\n" |
| 19 | ".pushsection __jump_table, \"aw\"\n" | 19 | ".pushsection __jump_table, \"aw\"\n" |
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index a75f168d2718..4eb444edbe49 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h | |||
| @@ -6,4 +6,3 @@ | |||
| 6 | 6 | ||
| 7 | /* Empty, just to avoid compiling error */ | 7 | /* Empty, just to avoid compiling error */ |
| 8 | 8 | ||
| 9 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 10b14e3a7eb8..068b8a2759b5 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c | |||
| @@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event) | |||
| 310 | { | 310 | { |
| 311 | int err; | 311 | int err; |
| 312 | 312 | ||
| 313 | /* does not support taken branch sampling */ | ||
| 314 | if (has_branch_stack(event)) | ||
| 315 | return -EOPNOTSUPP; | ||
| 316 | |||
| 313 | switch (event->attr.type) { | 317 | switch (event->attr.type) { |
| 314 | case PERF_TYPE_RAW: | 318 | case PERF_TYPE_RAW: |
| 315 | case PERF_TYPE_HW_CACHE: | 319 | case PERF_TYPE_HW_CACHE: |
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index fc73a82366f8..5080d16a832f 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | 7 | ||
| 8 | #define JUMP_LABEL_NOP_SIZE 4 | 8 | #define JUMP_LABEL_NOP_SIZE 4 |
| 9 | 9 | ||
| 10 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 10 | static __always_inline bool arch_static_branch(struct static_key *key) |
| 11 | { | 11 | { |
| 12 | asm goto("1:\n\t" | 12 | asm goto("1:\n\t" |
| 13 | "nop\n\t" | 13 | "nop\n\t" |
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 614da624330c..8e16a4a21582 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
| @@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event) | |||
| 1105 | if (atomic_read(&nmi_active) < 0) | 1105 | if (atomic_read(&nmi_active) < 0) |
| 1106 | return -ENODEV; | 1106 | return -ENODEV; |
| 1107 | 1107 | ||
| 1108 | /* does not support taken branch sampling */ | ||
| 1109 | if (has_branch_stack(event)) | ||
| 1110 | return -EOPNOTSUPP; | ||
| 1111 | |||
| 1108 | switch (attr->type) { | 1112 | switch (attr->type) { |
| 1109 | case PERF_TYPE_HARDWARE: | 1113 | case PERF_TYPE_HARDWARE: |
| 1110 | if (attr->config >= sparc_pmu->max_events) | 1114 | if (attr->config >= sparc_pmu->max_events) |
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index 205b063e3e32..74a2e312e8a2 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h | |||
| @@ -97,11 +97,12 @@ | |||
| 97 | 97 | ||
| 98 | /* Attribute search APIs */ | 98 | /* Attribute search APIs */ |
| 99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); | 99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); |
| 100 | extern int inat_get_last_prefix_id(insn_byte_t last_pfx); | ||
| 100 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, | 101 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, |
| 101 | insn_byte_t last_pfx, | 102 | int lpfx_id, |
| 102 | insn_attr_t esc_attr); | 103 | insn_attr_t esc_attr); |
| 103 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, | 104 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, |
| 104 | insn_byte_t last_pfx, | 105 | int lpfx_id, |
| 105 | insn_attr_t esc_attr); | 106 | insn_attr_t esc_attr); |
| 106 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, | 107 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, |
| 107 | insn_byte_t vex_m, | 108 | insn_byte_t vex_m, |
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 74df3f1eddfd..48eb30a86062 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
| @@ -96,12 +96,6 @@ struct insn { | |||
| 96 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ | 96 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ |
| 97 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ | 97 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ |
| 98 | 98 | ||
| 99 | /* The last prefix is needed for two-byte and three-byte opcodes */ | ||
| 100 | static inline insn_byte_t insn_last_prefix(struct insn *insn) | ||
| 101 | { | ||
| 102 | return insn->prefixes.bytes[3]; | ||
| 103 | } | ||
| 104 | |||
| 105 | extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); | 99 | extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); |
| 106 | extern void insn_get_prefixes(struct insn *insn); | 100 | extern void insn_get_prefixes(struct insn *insn); |
| 107 | extern void insn_get_opcode(struct insn *insn); | 101 | extern void insn_get_opcode(struct insn *insn); |
| @@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn) | |||
| 160 | return X86_VEX_P(insn->vex_prefix.bytes[2]); | 154 | return X86_VEX_P(insn->vex_prefix.bytes[2]); |
| 161 | } | 155 | } |
| 162 | 156 | ||
| 157 | /* Get the last prefix id from last prefix or VEX prefix */ | ||
| 158 | static inline int insn_last_prefix_id(struct insn *insn) | ||
| 159 | { | ||
| 160 | if (insn_is_avx(insn)) | ||
| 161 | return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ | ||
| 162 | |||
| 163 | if (insn->prefixes.bytes[3]) | ||
| 164 | return inat_get_last_prefix_id(insn->prefixes.bytes[3]); | ||
| 165 | |||
| 166 | return 0; | ||
| 167 | } | ||
| 168 | |||
| 163 | /* Offset of each field from kaddr */ | 169 | /* Offset of each field from kaddr */ |
| 164 | static inline int insn_offset_rex_prefix(struct insn *insn) | 170 | static inline int insn_offset_rex_prefix(struct insn *insn) |
| 165 | { | 171 | { |
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a32b18ce6ead..3a16c1483b45 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h | |||
| @@ -9,12 +9,12 @@ | |||
| 9 | 9 | ||
| 10 | #define JUMP_LABEL_NOP_SIZE 5 | 10 | #define JUMP_LABEL_NOP_SIZE 5 |
| 11 | 11 | ||
| 12 | #define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" | 12 | #define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" |
| 13 | 13 | ||
| 14 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 14 | static __always_inline bool arch_static_branch(struct static_key *key) |
| 15 | { | 15 | { |
| 16 | asm goto("1:" | 16 | asm goto("1:" |
| 17 | JUMP_LABEL_INITIAL_NOP | 17 | STATIC_KEY_INITIAL_NOP |
| 18 | ".pushsection __jump_table, \"aw\" \n\t" | 18 | ".pushsection __jump_table, \"aw\" \n\t" |
| 19 | _ASM_ALIGN "\n\t" | 19 | _ASM_ALIGN "\n\t" |
| 20 | _ASM_PTR "1b, %l[l_yes], %c0 \n\t" | 20 | _ASM_PTR "1b, %l[l_yes], %c0 \n\t" |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a6962d9161a0..ccb805966f68 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
| @@ -56,6 +56,13 @@ | |||
| 56 | #define MSR_OFFCORE_RSP_0 0x000001a6 | 56 | #define MSR_OFFCORE_RSP_0 0x000001a6 |
| 57 | #define MSR_OFFCORE_RSP_1 0x000001a7 | 57 | #define MSR_OFFCORE_RSP_1 0x000001a7 |
| 58 | 58 | ||
| 59 | #define MSR_LBR_SELECT 0x000001c8 | ||
| 60 | #define MSR_LBR_TOS 0x000001c9 | ||
| 61 | #define MSR_LBR_NHM_FROM 0x00000680 | ||
| 62 | #define MSR_LBR_NHM_TO 0x000006c0 | ||
| 63 | #define MSR_LBR_CORE_FROM 0x00000040 | ||
| 64 | #define MSR_LBR_CORE_TO 0x00000060 | ||
| 65 | |||
| 59 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 | 66 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 |
| 60 | #define MSR_IA32_DS_AREA 0x00000600 | 67 | #define MSR_IA32_DS_AREA 0x00000600 |
| 61 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 | 68 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index a7d2db9a74fb..c0180fd372d2 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
| @@ -230,9 +230,9 @@ static inline unsigned long long paravirt_sched_clock(void) | |||
| 230 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); | 230 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); |
| 231 | } | 231 | } |
| 232 | 232 | ||
| 233 | struct jump_label_key; | 233 | struct static_key; |
| 234 | extern struct jump_label_key paravirt_steal_enabled; | 234 | extern struct static_key paravirt_steal_enabled; |
| 235 | extern struct jump_label_key paravirt_steal_rq_enabled; | 235 | extern struct static_key paravirt_steal_rq_enabled; |
| 236 | 236 | ||
| 237 | static inline u64 paravirt_steal_clock(int cpu) | 237 | static inline u64 paravirt_steal_clock(int cpu) |
| 238 | { | 238 | { |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 461ce432b1c2..e8fb2c7a5f4f 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
| @@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void); | |||
| 188 | #ifdef CONFIG_PERF_EVENTS | 188 | #ifdef CONFIG_PERF_EVENTS |
| 189 | extern void perf_events_lapic_init(void); | 189 | extern void perf_events_lapic_init(void); |
| 190 | 190 | ||
| 191 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
| 192 | |||
| 193 | /* | 191 | /* |
| 194 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. | 192 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. |
| 195 | * This flag is otherwise unused and ABI specified to be 0, so nobody should | 193 | * This flag is otherwise unused and ABI specified to be 0, so nobody should |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5369059c07a9..532d2e090e6f 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | |||
| 69 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 69 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
| 70 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 70 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
| 71 | obj-$(CONFIG_KPROBES) += kprobes.o | 71 | obj-$(CONFIG_KPROBES) += kprobes.o |
| 72 | obj-$(CONFIG_OPTPROBES) += kprobes-opt.o | ||
| 72 | obj-$(CONFIG_MODULES) += module.o | 73 | obj-$(CONFIG_MODULES) += module.o |
| 73 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o | 74 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o |
| 74 | obj-$(CONFIG_KGDB) += kgdb.o | 75 | obj-$(CONFIG_KGDB) += kgdb.o |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f4773f4aae35..0a44b90602b0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
| 6 | 6 | ||
| 7 | #include <linux/io.h> | 7 | #include <linux/io.h> |
| 8 | #include <linux/sched.h> | ||
| 8 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
| 9 | #include <asm/apic.h> | 10 | #include <asm/apic.h> |
| 10 | #include <asm/cpu.h> | 11 | #include <asm/cpu.h> |
| @@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
| 456 | if (c->x86_power & (1 << 8)) { | 457 | if (c->x86_power & (1 << 8)) { |
| 457 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 458 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
| 458 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 459 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
| 460 | if (!check_tsc_unstable()) | ||
| 461 | sched_clock_stable = 1; | ||
| 459 | } | 462 | } |
| 460 | 463 | ||
| 461 | #ifdef CONFIG_X86_64 | 464 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5adce1040b11..0a18d16cb58d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
| 25 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
| 26 | #include <linux/bitops.h> | 26 | #include <linux/bitops.h> |
| 27 | #include <linux/device.h> | ||
| 27 | 28 | ||
| 28 | #include <asm/apic.h> | 29 | #include <asm/apic.h> |
| 29 | #include <asm/stacktrace.h> | 30 | #include <asm/stacktrace.h> |
| @@ -31,6 +32,7 @@ | |||
| 31 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
| 32 | #include <asm/smp.h> | 33 | #include <asm/smp.h> |
| 33 | #include <asm/alternative.h> | 34 | #include <asm/alternative.h> |
| 35 | #include <asm/timer.h> | ||
| 34 | 36 | ||
| 35 | #include "perf_event.h" | 37 | #include "perf_event.h" |
| 36 | 38 | ||
| @@ -351,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event) | |||
| 351 | return 0; | 353 | return 0; |
| 352 | } | 354 | } |
| 353 | 355 | ||
| 356 | /* | ||
| 357 | * check that branch_sample_type is compatible with | ||
| 358 | * settings needed for precise_ip > 1 which implies | ||
| 359 | * using the LBR to capture ALL taken branches at the | ||
| 360 | * priv levels of the measurement | ||
| 361 | */ | ||
| 362 | static inline int precise_br_compat(struct perf_event *event) | ||
| 363 | { | ||
| 364 | u64 m = event->attr.branch_sample_type; | ||
| 365 | u64 b = 0; | ||
| 366 | |||
| 367 | /* must capture all branches */ | ||
| 368 | if (!(m & PERF_SAMPLE_BRANCH_ANY)) | ||
| 369 | return 0; | ||
| 370 | |||
| 371 | m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER; | ||
| 372 | |||
| 373 | if (!event->attr.exclude_user) | ||
| 374 | b |= PERF_SAMPLE_BRANCH_USER; | ||
| 375 | |||
| 376 | if (!event->attr.exclude_kernel) | ||
| 377 | b |= PERF_SAMPLE_BRANCH_KERNEL; | ||
| 378 | |||
| 379 | /* | ||
| 380 | * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86 | ||
| 381 | */ | ||
| 382 | |||
| 383 | return m == b; | ||
| 384 | } | ||
| 385 | |||
| 354 | int x86_pmu_hw_config(struct perf_event *event) | 386 | int x86_pmu_hw_config(struct perf_event *event) |
| 355 | { | 387 | { |
| 356 | if (event->attr.precise_ip) { | 388 | if (event->attr.precise_ip) { |
| @@ -367,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
| 367 | 399 | ||
| 368 | if (event->attr.precise_ip > precise) | 400 | if (event->attr.precise_ip > precise) |
| 369 | return -EOPNOTSUPP; | 401 | return -EOPNOTSUPP; |
| 402 | /* | ||
| 403 | * check that PEBS LBR correction does not conflict with | ||
| 404 | * whatever the user is asking with attr->branch_sample_type | ||
| 405 | */ | ||
| 406 | if (event->attr.precise_ip > 1) { | ||
| 407 | u64 *br_type = &event->attr.branch_sample_type; | ||
| 408 | |||
| 409 | if (has_branch_stack(event)) { | ||
| 410 | if (!precise_br_compat(event)) | ||
| 411 | return -EOPNOTSUPP; | ||
| 412 | |||
| 413 | /* branch_sample_type is compatible */ | ||
| 414 | |||
| 415 | } else { | ||
| 416 | /* | ||
| 417 | * user did not specify branch_sample_type | ||
| 418 | * | ||
| 419 | * For PEBS fixups, we capture all | ||
| 420 | * the branches at the priv level of the | ||
| 421 | * event. | ||
| 422 | */ | ||
| 423 | *br_type = PERF_SAMPLE_BRANCH_ANY; | ||
| 424 | |||
| 425 | if (!event->attr.exclude_user) | ||
| 426 | *br_type |= PERF_SAMPLE_BRANCH_USER; | ||
| 427 | |||
| 428 | if (!event->attr.exclude_kernel) | ||
| 429 | *br_type |= PERF_SAMPLE_BRANCH_KERNEL; | ||
| 430 | } | ||
| 431 | } | ||
| 370 | } | 432 | } |
| 371 | 433 | ||
| 372 | /* | 434 | /* |
| @@ -424,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
| 424 | /* mark unused */ | 486 | /* mark unused */ |
| 425 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | 487 | event->hw.extra_reg.idx = EXTRA_REG_NONE; |
| 426 | 488 | ||
| 489 | /* mark not used */ | ||
| 490 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
| 491 | event->hw.branch_reg.idx = EXTRA_REG_NONE; | ||
| 492 | |||
| 427 | return x86_pmu.hw_config(event); | 493 | return x86_pmu.hw_config(event); |
| 428 | } | 494 | } |
| 429 | 495 | ||
| @@ -1210,6 +1276,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | |||
| 1210 | break; | 1276 | break; |
| 1211 | 1277 | ||
| 1212 | case CPU_STARTING: | 1278 | case CPU_STARTING: |
| 1279 | if (x86_pmu.attr_rdpmc) | ||
| 1280 | set_in_cr4(X86_CR4_PCE); | ||
| 1213 | if (x86_pmu.cpu_starting) | 1281 | if (x86_pmu.cpu_starting) |
| 1214 | x86_pmu.cpu_starting(cpu); | 1282 | x86_pmu.cpu_starting(cpu); |
| 1215 | break; | 1283 | break; |
| @@ -1319,6 +1387,8 @@ static int __init init_hw_perf_events(void) | |||
| 1319 | } | 1387 | } |
| 1320 | } | 1388 | } |
| 1321 | 1389 | ||
| 1390 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ | ||
| 1391 | |||
| 1322 | pr_info("... version: %d\n", x86_pmu.version); | 1392 | pr_info("... version: %d\n", x86_pmu.version); |
| 1323 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); | 1393 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
| 1324 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); | 1394 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
| @@ -1542,23 +1612,106 @@ static int x86_pmu_event_init(struct perf_event *event) | |||
| 1542 | return err; | 1612 | return err; |
| 1543 | } | 1613 | } |
| 1544 | 1614 | ||
| 1615 | static int x86_pmu_event_idx(struct perf_event *event) | ||
| 1616 | { | ||
| 1617 | int idx = event->hw.idx; | ||
| 1618 | |||
| 1619 | if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { | ||
| 1620 | idx -= X86_PMC_IDX_FIXED; | ||
| 1621 | idx |= 1 << 30; | ||
| 1622 | } | ||
| 1623 | |||
| 1624 | return idx + 1; | ||
| 1625 | } | ||
| 1626 | |||
| 1627 | static ssize_t get_attr_rdpmc(struct device *cdev, | ||
| 1628 | struct device_attribute *attr, | ||
| 1629 | char *buf) | ||
| 1630 | { | ||
| 1631 | return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); | ||
| 1632 | } | ||
| 1633 | |||
| 1634 | static void change_rdpmc(void *info) | ||
| 1635 | { | ||
| 1636 | bool enable = !!(unsigned long)info; | ||
| 1637 | |||
| 1638 | if (enable) | ||
| 1639 | set_in_cr4(X86_CR4_PCE); | ||
| 1640 | else | ||
| 1641 | clear_in_cr4(X86_CR4_PCE); | ||
| 1642 | } | ||
| 1643 | |||
| 1644 | static ssize_t set_attr_rdpmc(struct device *cdev, | ||
| 1645 | struct device_attribute *attr, | ||
| 1646 | const char *buf, size_t count) | ||
| 1647 | { | ||
| 1648 | unsigned long val = simple_strtoul(buf, NULL, 0); | ||
| 1649 | |||
| 1650 | if (!!val != !!x86_pmu.attr_rdpmc) { | ||
| 1651 | x86_pmu.attr_rdpmc = !!val; | ||
| 1652 | smp_call_function(change_rdpmc, (void *)val, 1); | ||
| 1653 | } | ||
| 1654 | |||
| 1655 | return count; | ||
| 1656 | } | ||
| 1657 | |||
| 1658 | static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc); | ||
| 1659 | |||
| 1660 | static struct attribute *x86_pmu_attrs[] = { | ||
| 1661 | &dev_attr_rdpmc.attr, | ||
| 1662 | NULL, | ||
| 1663 | }; | ||
| 1664 | |||
| 1665 | static struct attribute_group x86_pmu_attr_group = { | ||
| 1666 | .attrs = x86_pmu_attrs, | ||
| 1667 | }; | ||
| 1668 | |||
| 1669 | static const struct attribute_group *x86_pmu_attr_groups[] = { | ||
| 1670 | &x86_pmu_attr_group, | ||
| 1671 | NULL, | ||
| 1672 | }; | ||
| 1673 | |||
| 1674 | static void x86_pmu_flush_branch_stack(void) | ||
| 1675 | { | ||
| 1676 | if (x86_pmu.flush_branch_stack) | ||
| 1677 | x86_pmu.flush_branch_stack(); | ||
| 1678 | } | ||
| 1679 | |||
| 1545 | static struct pmu pmu = { | 1680 | static struct pmu pmu = { |
| 1546 | .pmu_enable = x86_pmu_enable, | 1681 | .pmu_enable = x86_pmu_enable, |
| 1547 | .pmu_disable = x86_pmu_disable, | 1682 | .pmu_disable = x86_pmu_disable, |
| 1683 | |||
| 1684 | .attr_groups = x86_pmu_attr_groups, | ||
| 1548 | 1685 | ||
| 1549 | .event_init = x86_pmu_event_init, | 1686 | .event_init = x86_pmu_event_init, |
| 1550 | 1687 | ||
| 1551 | .add = x86_pmu_add, | 1688 | .add = x86_pmu_add, |
| 1552 | .del = x86_pmu_del, | 1689 | .del = x86_pmu_del, |
| 1553 | .start = x86_pmu_start, | 1690 | .start = x86_pmu_start, |
| 1554 | .stop = x86_pmu_stop, | 1691 | .stop = x86_pmu_stop, |
| 1555 | .read = x86_pmu_read, | 1692 | .read = x86_pmu_read, |
| 1556 | 1693 | ||
| 1557 | .start_txn = x86_pmu_start_txn, | 1694 | .start_txn = x86_pmu_start_txn, |
| 1558 | .cancel_txn = x86_pmu_cancel_txn, | 1695 | .cancel_txn = x86_pmu_cancel_txn, |
| 1559 | .commit_txn = x86_pmu_commit_txn, | 1696 | .commit_txn = x86_pmu_commit_txn, |
| 1697 | |||
| 1698 | .event_idx = x86_pmu_event_idx, | ||
| 1699 | .flush_branch_stack = x86_pmu_flush_branch_stack, | ||
| 1560 | }; | 1700 | }; |
| 1561 | 1701 | ||
| 1702 | void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | ||
| 1703 | { | ||
| 1704 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
| 1705 | return; | ||
| 1706 | |||
| 1707 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) | ||
| 1708 | return; | ||
| 1709 | |||
| 1710 | userpg->time_mult = this_cpu_read(cyc2ns); | ||
| 1711 | userpg->time_shift = CYC2NS_SCALE_FACTOR; | ||
| 1712 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; | ||
| 1713 | } | ||
| 1714 | |||
| 1562 | /* | 1715 | /* |
| 1563 | * callchain support | 1716 | * callchain support |
| 1564 | */ | 1717 | */ |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index c30c807ddc72..8484e77c211e 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
| @@ -33,6 +33,7 @@ enum extra_reg_type { | |||
| 33 | 33 | ||
| 34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ | 34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ |
| 35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ | 35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ |
| 36 | EXTRA_REG_LBR = 2, /* lbr_select */ | ||
| 36 | 37 | ||
| 37 | EXTRA_REG_MAX /* number of entries needed */ | 38 | EXTRA_REG_MAX /* number of entries needed */ |
| 38 | }; | 39 | }; |
| @@ -130,6 +131,8 @@ struct cpu_hw_events { | |||
| 130 | void *lbr_context; | 131 | void *lbr_context; |
| 131 | struct perf_branch_stack lbr_stack; | 132 | struct perf_branch_stack lbr_stack; |
| 132 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 133 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
| 134 | struct er_account *lbr_sel; | ||
| 135 | u64 br_sel; | ||
| 133 | 136 | ||
| 134 | /* | 137 | /* |
| 135 | * Intel host/guest exclude bits | 138 | * Intel host/guest exclude bits |
| @@ -268,6 +271,29 @@ struct x86_pmu_quirk { | |||
| 268 | void (*func)(void); | 271 | void (*func)(void); |
| 269 | }; | 272 | }; |
| 270 | 273 | ||
| 274 | union x86_pmu_config { | ||
| 275 | struct { | ||
| 276 | u64 event:8, | ||
| 277 | umask:8, | ||
| 278 | usr:1, | ||
| 279 | os:1, | ||
| 280 | edge:1, | ||
| 281 | pc:1, | ||
| 282 | interrupt:1, | ||
| 283 | __reserved1:1, | ||
| 284 | en:1, | ||
| 285 | inv:1, | ||
| 286 | cmask:8, | ||
| 287 | event2:4, | ||
| 288 | __reserved2:4, | ||
| 289 | go:1, | ||
| 290 | ho:1; | ||
| 291 | } bits; | ||
| 292 | u64 value; | ||
| 293 | }; | ||
| 294 | |||
| 295 | #define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value | ||
| 296 | |||
| 271 | /* | 297 | /* |
| 272 | * struct x86_pmu - generic x86 pmu | 298 | * struct x86_pmu - generic x86 pmu |
| 273 | */ | 299 | */ |
| @@ -309,10 +335,19 @@ struct x86_pmu { | |||
| 309 | struct x86_pmu_quirk *quirks; | 335 | struct x86_pmu_quirk *quirks; |
| 310 | int perfctr_second_write; | 336 | int perfctr_second_write; |
| 311 | 337 | ||
| 338 | /* | ||
| 339 | * sysfs attrs | ||
| 340 | */ | ||
| 341 | int attr_rdpmc; | ||
| 342 | |||
| 343 | /* | ||
| 344 | * CPU Hotplug hooks | ||
| 345 | */ | ||
| 312 | int (*cpu_prepare)(int cpu); | 346 | int (*cpu_prepare)(int cpu); |
| 313 | void (*cpu_starting)(int cpu); | 347 | void (*cpu_starting)(int cpu); |
| 314 | void (*cpu_dying)(int cpu); | 348 | void (*cpu_dying)(int cpu); |
| 315 | void (*cpu_dead)(int cpu); | 349 | void (*cpu_dead)(int cpu); |
| 350 | void (*flush_branch_stack)(void); | ||
| 316 | 351 | ||
| 317 | /* | 352 | /* |
| 318 | * Intel Arch Perfmon v2+ | 353 | * Intel Arch Perfmon v2+ |
| @@ -334,6 +369,8 @@ struct x86_pmu { | |||
| 334 | */ | 369 | */ |
| 335 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | 370 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ |
| 336 | int lbr_nr; /* hardware stack size */ | 371 | int lbr_nr; /* hardware stack size */ |
| 372 | u64 lbr_sel_mask; /* LBR_SELECT valid bits */ | ||
| 373 | const int *lbr_sel_map; /* lbr_select mappings */ | ||
| 337 | 374 | ||
| 338 | /* | 375 | /* |
| 339 | * Extra registers for events | 376 | * Extra registers for events |
| @@ -447,6 +484,15 @@ extern struct event_constraint emptyconstraint; | |||
| 447 | 484 | ||
| 448 | extern struct event_constraint unconstrained; | 485 | extern struct event_constraint unconstrained; |
| 449 | 486 | ||
| 487 | static inline bool kernel_ip(unsigned long ip) | ||
| 488 | { | ||
| 489 | #ifdef CONFIG_X86_32 | ||
| 490 | return ip > PAGE_OFFSET; | ||
| 491 | #else | ||
| 492 | return (long)ip < 0; | ||
| 493 | #endif | ||
| 494 | } | ||
| 495 | |||
| 450 | #ifdef CONFIG_CPU_SUP_AMD | 496 | #ifdef CONFIG_CPU_SUP_AMD |
| 451 | 497 | ||
| 452 | int amd_pmu_init(void); | 498 | int amd_pmu_init(void); |
| @@ -527,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void); | |||
| 527 | 573 | ||
| 528 | void intel_pmu_lbr_init_atom(void); | 574 | void intel_pmu_lbr_init_atom(void); |
| 529 | 575 | ||
| 576 | void intel_pmu_lbr_init_snb(void); | ||
| 577 | |||
| 578 | int intel_pmu_setup_lbr_filter(struct perf_event *event); | ||
| 579 | |||
| 530 | int p4_pmu_init(void); | 580 | int p4_pmu_init(void); |
| 531 | 581 | ||
| 532 | int p6_pmu_init(void); | 582 | int p6_pmu_init(void); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 67250a52430b..dd002faff7a6 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
| @@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event) | |||
| 139 | if (ret) | 139 | if (ret) |
| 140 | return ret; | 140 | return ret; |
| 141 | 141 | ||
| 142 | if (has_branch_stack(event)) | ||
| 143 | return -EOPNOTSUPP; | ||
| 144 | |||
| 142 | if (event->attr.exclude_host && event->attr.exclude_guest) | 145 | if (event->attr.exclude_host && event->attr.exclude_guest) |
| 143 | /* | 146 | /* |
| 144 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 | 147 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 61d4f79a550e..6a84e7f28f05 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
| @@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids | |||
| 728 | }, | 728 | }, |
| 729 | }; | 729 | }; |
| 730 | 730 | ||
| 731 | static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) | ||
| 732 | { | ||
| 733 | /* user explicitly requested branch sampling */ | ||
| 734 | if (has_branch_stack(event)) | ||
| 735 | return true; | ||
| 736 | |||
| 737 | /* implicit branch sampling to correct PEBS skid */ | ||
| 738 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
| 739 | return true; | ||
| 740 | |||
| 741 | return false; | ||
| 742 | } | ||
| 743 | |||
| 731 | static void intel_pmu_disable_all(void) | 744 | static void intel_pmu_disable_all(void) |
| 732 | { | 745 | { |
| 733 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 746 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| @@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
| 882 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); | 895 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); |
| 883 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); | 896 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); |
| 884 | 897 | ||
| 898 | /* | ||
| 899 | * must disable before any actual event | ||
| 900 | * because any event may be combined with LBR | ||
| 901 | */ | ||
| 902 | if (intel_pmu_needs_lbr_smpl(event)) | ||
| 903 | intel_pmu_lbr_disable(event); | ||
| 904 | |||
| 885 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 905 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
| 886 | intel_pmu_disable_fixed(hwc); | 906 | intel_pmu_disable_fixed(hwc); |
| 887 | return; | 907 | return; |
| @@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
| 936 | intel_pmu_enable_bts(hwc->config); | 956 | intel_pmu_enable_bts(hwc->config); |
| 937 | return; | 957 | return; |
| 938 | } | 958 | } |
| 959 | /* | ||
| 960 | * must enabled before any actual event | ||
| 961 | * because any event may be combined with LBR | ||
| 962 | */ | ||
| 963 | if (intel_pmu_needs_lbr_smpl(event)) | ||
| 964 | intel_pmu_lbr_enable(event); | ||
| 939 | 965 | ||
| 940 | if (event->attr.exclude_host) | 966 | if (event->attr.exclude_host) |
| 941 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); | 967 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); |
| @@ -1058,6 +1084,9 @@ again: | |||
| 1058 | 1084 | ||
| 1059 | data.period = event->hw.last_period; | 1085 | data.period = event->hw.last_period; |
| 1060 | 1086 | ||
| 1087 | if (has_branch_stack(event)) | ||
| 1088 | data.br_stack = &cpuc->lbr_stack; | ||
| 1089 | |||
| 1061 | if (perf_event_overflow(event, &data, regs)) | 1090 | if (perf_event_overflow(event, &data, regs)) |
| 1062 | x86_pmu_stop(event, 0); | 1091 | x86_pmu_stop(event, 0); |
| 1063 | } | 1092 | } |
| @@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx) | |||
| 1124 | */ | 1153 | */ |
| 1125 | static struct event_constraint * | 1154 | static struct event_constraint * |
| 1126 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, | 1155 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, |
| 1127 | struct perf_event *event) | 1156 | struct perf_event *event, |
| 1157 | struct hw_perf_event_extra *reg) | ||
| 1128 | { | 1158 | { |
| 1129 | struct event_constraint *c = &emptyconstraint; | 1159 | struct event_constraint *c = &emptyconstraint; |
| 1130 | struct hw_perf_event_extra *reg = &event->hw.extra_reg; | ||
| 1131 | struct er_account *era; | 1160 | struct er_account *era; |
| 1132 | unsigned long flags; | 1161 | unsigned long flags; |
| 1133 | int orig_idx = reg->idx; | 1162 | int orig_idx = reg->idx; |
| 1134 | 1163 | ||
| 1135 | /* already allocated shared msr */ | 1164 | /* already allocated shared msr */ |
| 1136 | if (reg->alloc) | 1165 | if (reg->alloc) |
| 1137 | return &unconstrained; | 1166 | return NULL; /* call x86_get_event_constraint() */ |
| 1138 | 1167 | ||
| 1139 | again: | 1168 | again: |
| 1140 | era = &cpuc->shared_regs->regs[reg->idx]; | 1169 | era = &cpuc->shared_regs->regs[reg->idx]; |
| @@ -1157,14 +1186,10 @@ again: | |||
| 1157 | reg->alloc = 1; | 1186 | reg->alloc = 1; |
| 1158 | 1187 | ||
| 1159 | /* | 1188 | /* |
| 1160 | * All events using extra_reg are unconstrained. | 1189 | * need to call x86_get_event_constraint() |
| 1161 | * Avoids calling x86_get_event_constraints() | 1190 | * to check if associated event has constraints |
| 1162 | * | ||
| 1163 | * Must revisit if extra_reg controlling events | ||
| 1164 | * ever have constraints. Worst case we go through | ||
| 1165 | * the regular event constraint table. | ||
| 1166 | */ | 1191 | */ |
| 1167 | c = &unconstrained; | 1192 | c = NULL; |
| 1168 | } else if (intel_try_alt_er(event, orig_idx)) { | 1193 | } else if (intel_try_alt_er(event, orig_idx)) { |
| 1169 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1194 | raw_spin_unlock_irqrestore(&era->lock, flags); |
| 1170 | goto again; | 1195 | goto again; |
| @@ -1201,11 +1226,23 @@ static struct event_constraint * | |||
| 1201 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, | 1226 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, |
| 1202 | struct perf_event *event) | 1227 | struct perf_event *event) |
| 1203 | { | 1228 | { |
| 1204 | struct event_constraint *c = NULL; | 1229 | struct event_constraint *c = NULL, *d; |
| 1205 | 1230 | struct hw_perf_event_extra *xreg, *breg; | |
| 1206 | if (event->hw.extra_reg.idx != EXTRA_REG_NONE) | 1231 | |
| 1207 | c = __intel_shared_reg_get_constraints(cpuc, event); | 1232 | xreg = &event->hw.extra_reg; |
| 1208 | 1233 | if (xreg->idx != EXTRA_REG_NONE) { | |
| 1234 | c = __intel_shared_reg_get_constraints(cpuc, event, xreg); | ||
| 1235 | if (c == &emptyconstraint) | ||
| 1236 | return c; | ||
| 1237 | } | ||
| 1238 | breg = &event->hw.branch_reg; | ||
| 1239 | if (breg->idx != EXTRA_REG_NONE) { | ||
| 1240 | d = __intel_shared_reg_get_constraints(cpuc, event, breg); | ||
| 1241 | if (d == &emptyconstraint) { | ||
| 1242 | __intel_shared_reg_put_constraints(cpuc, xreg); | ||
| 1243 | c = d; | ||
| 1244 | } | ||
| 1245 | } | ||
| 1209 | return c; | 1246 | return c; |
| 1210 | } | 1247 | } |
| 1211 | 1248 | ||
| @@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | |||
| 1253 | reg = &event->hw.extra_reg; | 1290 | reg = &event->hw.extra_reg; |
| 1254 | if (reg->idx != EXTRA_REG_NONE) | 1291 | if (reg->idx != EXTRA_REG_NONE) |
| 1255 | __intel_shared_reg_put_constraints(cpuc, reg); | 1292 | __intel_shared_reg_put_constraints(cpuc, reg); |
| 1293 | |||
| 1294 | reg = &event->hw.branch_reg; | ||
| 1295 | if (reg->idx != EXTRA_REG_NONE) | ||
| 1296 | __intel_shared_reg_put_constraints(cpuc, reg); | ||
| 1256 | } | 1297 | } |
| 1257 | 1298 | ||
| 1258 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 1299 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
| @@ -1288,12 +1329,19 @@ static int intel_pmu_hw_config(struct perf_event *event) | |||
| 1288 | * | 1329 | * |
| 1289 | * Thereby we gain a PEBS capable cycle counter. | 1330 | * Thereby we gain a PEBS capable cycle counter. |
| 1290 | */ | 1331 | */ |
| 1291 | u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */ | 1332 | u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); |
| 1333 | |||
| 1292 | 1334 | ||
| 1293 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); | 1335 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); |
| 1294 | event->hw.config = alt_config; | 1336 | event->hw.config = alt_config; |
| 1295 | } | 1337 | } |
| 1296 | 1338 | ||
| 1339 | if (intel_pmu_needs_lbr_smpl(event)) { | ||
| 1340 | ret = intel_pmu_setup_lbr_filter(event); | ||
| 1341 | if (ret) | ||
| 1342 | return ret; | ||
| 1343 | } | ||
| 1344 | |||
| 1297 | if (event->attr.type != PERF_TYPE_RAW) | 1345 | if (event->attr.type != PERF_TYPE_RAW) |
| 1298 | return 0; | 1346 | return 0; |
| 1299 | 1347 | ||
| @@ -1432,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu) | |||
| 1432 | { | 1480 | { |
| 1433 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1481 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 1434 | 1482 | ||
| 1435 | if (!x86_pmu.extra_regs) | 1483 | if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map)) |
| 1436 | return NOTIFY_OK; | 1484 | return NOTIFY_OK; |
| 1437 | 1485 | ||
| 1438 | cpuc->shared_regs = allocate_shared_regs(cpu); | 1486 | cpuc->shared_regs = allocate_shared_regs(cpu); |
| @@ -1454,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu) | |||
| 1454 | */ | 1502 | */ |
| 1455 | intel_pmu_lbr_reset(); | 1503 | intel_pmu_lbr_reset(); |
| 1456 | 1504 | ||
| 1457 | if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) | 1505 | cpuc->lbr_sel = NULL; |
| 1506 | |||
| 1507 | if (!cpuc->shared_regs) | ||
| 1458 | return; | 1508 | return; |
| 1459 | 1509 | ||
| 1460 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | 1510 | if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) { |
| 1461 | struct intel_shared_regs *pc; | 1511 | for_each_cpu(i, topology_thread_cpumask(cpu)) { |
| 1512 | struct intel_shared_regs *pc; | ||
| 1462 | 1513 | ||
| 1463 | pc = per_cpu(cpu_hw_events, i).shared_regs; | 1514 | pc = per_cpu(cpu_hw_events, i).shared_regs; |
| 1464 | if (pc && pc->core_id == core_id) { | 1515 | if (pc && pc->core_id == core_id) { |
| 1465 | cpuc->kfree_on_online = cpuc->shared_regs; | 1516 | cpuc->kfree_on_online = cpuc->shared_regs; |
| 1466 | cpuc->shared_regs = pc; | 1517 | cpuc->shared_regs = pc; |
| 1467 | break; | 1518 | break; |
| 1519 | } | ||
| 1468 | } | 1520 | } |
| 1521 | cpuc->shared_regs->core_id = core_id; | ||
| 1522 | cpuc->shared_regs->refcnt++; | ||
| 1469 | } | 1523 | } |
| 1470 | 1524 | ||
| 1471 | cpuc->shared_regs->core_id = core_id; | 1525 | if (x86_pmu.lbr_sel_map) |
| 1472 | cpuc->shared_regs->refcnt++; | 1526 | cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; |
| 1473 | } | 1527 | } |
| 1474 | 1528 | ||
| 1475 | static void intel_pmu_cpu_dying(int cpu) | 1529 | static void intel_pmu_cpu_dying(int cpu) |
| @@ -1487,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu) | |||
| 1487 | fini_debug_store_on_cpu(cpu); | 1541 | fini_debug_store_on_cpu(cpu); |
| 1488 | } | 1542 | } |
| 1489 | 1543 | ||
| 1544 | static void intel_pmu_flush_branch_stack(void) | ||
| 1545 | { | ||
| 1546 | /* | ||
| 1547 | * Intel LBR does not tag entries with the | ||
| 1548 | * PID of the current task, then we need to | ||
| 1549 | * flush it on ctxsw | ||
| 1550 | * For now, we simply reset it | ||
| 1551 | */ | ||
| 1552 | if (x86_pmu.lbr_nr) | ||
| 1553 | intel_pmu_lbr_reset(); | ||
| 1554 | } | ||
| 1555 | |||
| 1490 | static __initconst const struct x86_pmu intel_pmu = { | 1556 | static __initconst const struct x86_pmu intel_pmu = { |
| 1491 | .name = "Intel", | 1557 | .name = "Intel", |
| 1492 | .handle_irq = intel_pmu_handle_irq, | 1558 | .handle_irq = intel_pmu_handle_irq, |
| @@ -1514,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
| 1514 | .cpu_starting = intel_pmu_cpu_starting, | 1580 | .cpu_starting = intel_pmu_cpu_starting, |
| 1515 | .cpu_dying = intel_pmu_cpu_dying, | 1581 | .cpu_dying = intel_pmu_cpu_dying, |
| 1516 | .guest_get_msrs = intel_guest_get_msrs, | 1582 | .guest_get_msrs = intel_guest_get_msrs, |
| 1583 | .flush_branch_stack = intel_pmu_flush_branch_stack, | ||
| 1517 | }; | 1584 | }; |
| 1518 | 1585 | ||
| 1519 | static __init void intel_clovertown_quirk(void) | 1586 | static __init void intel_clovertown_quirk(void) |
| @@ -1690,9 +1757,11 @@ __init int intel_pmu_init(void) | |||
| 1690 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 1757 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
| 1691 | 1758 | ||
| 1692 | /* UOPS_ISSUED.STALLED_CYCLES */ | 1759 | /* UOPS_ISSUED.STALLED_CYCLES */ |
| 1693 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | 1760 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
| 1761 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | ||
| 1694 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | 1762 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
| 1695 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | 1763 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
| 1764 | X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); | ||
| 1696 | 1765 | ||
| 1697 | x86_add_quirk(intel_nehalem_quirk); | 1766 | x86_add_quirk(intel_nehalem_quirk); |
| 1698 | 1767 | ||
| @@ -1727,9 +1796,11 @@ __init int intel_pmu_init(void) | |||
| 1727 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 1796 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
| 1728 | 1797 | ||
| 1729 | /* UOPS_ISSUED.STALLED_CYCLES */ | 1798 | /* UOPS_ISSUED.STALLED_CYCLES */ |
| 1730 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | 1799 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
| 1800 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | ||
| 1731 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | 1801 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
| 1732 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | 1802 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
| 1803 | X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); | ||
| 1733 | 1804 | ||
| 1734 | pr_cont("Westmere events, "); | 1805 | pr_cont("Westmere events, "); |
| 1735 | break; | 1806 | break; |
| @@ -1740,7 +1811,7 @@ __init int intel_pmu_init(void) | |||
| 1740 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1811 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
| 1741 | sizeof(hw_cache_event_ids)); | 1812 | sizeof(hw_cache_event_ids)); |
| 1742 | 1813 | ||
| 1743 | intel_pmu_lbr_init_nhm(); | 1814 | intel_pmu_lbr_init_snb(); |
| 1744 | 1815 | ||
| 1745 | x86_pmu.event_constraints = intel_snb_event_constraints; | 1816 | x86_pmu.event_constraints = intel_snb_event_constraints; |
| 1746 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; | 1817 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; |
| @@ -1750,9 +1821,11 @@ __init int intel_pmu_init(void) | |||
| 1750 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | 1821 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; |
| 1751 | 1822 | ||
| 1752 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | 1823 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
| 1753 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | 1824 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
| 1825 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | ||
| 1754 | /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ | 1826 | /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ |
| 1755 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1; | 1827 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
| 1828 | X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); | ||
| 1756 | 1829 | ||
| 1757 | pr_cont("SandyBridge events, "); | 1830 | pr_cont("SandyBridge events, "); |
| 1758 | break; | 1831 | break; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index d6bd49faa40c..7f64df19e7dd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
| 4 | 4 | ||
| 5 | #include <asm/perf_event.h> | 5 | #include <asm/perf_event.h> |
| 6 | #include <asm/insn.h> | ||
| 6 | 7 | ||
| 7 | #include "perf_event.h" | 8 | #include "perf_event.h" |
| 8 | 9 | ||
| @@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) | |||
| 439 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | 440 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; |
| 440 | 441 | ||
| 441 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | 442 | cpuc->pebs_enabled |= 1ULL << hwc->idx; |
| 442 | |||
| 443 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
| 444 | intel_pmu_lbr_enable(event); | ||
| 445 | } | 443 | } |
| 446 | 444 | ||
| 447 | void intel_pmu_pebs_disable(struct perf_event *event) | 445 | void intel_pmu_pebs_disable(struct perf_event *event) |
| @@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) | |||
| 454 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | 452 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); |
| 455 | 453 | ||
| 456 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | 454 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; |
| 457 | |||
| 458 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
| 459 | intel_pmu_lbr_disable(event); | ||
| 460 | } | 455 | } |
| 461 | 456 | ||
| 462 | void intel_pmu_pebs_enable_all(void) | 457 | void intel_pmu_pebs_enable_all(void) |
| @@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void) | |||
| 475 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | 470 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); |
| 476 | } | 471 | } |
| 477 | 472 | ||
| 478 | #include <asm/insn.h> | ||
| 479 | |||
| 480 | static inline bool kernel_ip(unsigned long ip) | ||
| 481 | { | ||
| 482 | #ifdef CONFIG_X86_32 | ||
| 483 | return ip > PAGE_OFFSET; | ||
| 484 | #else | ||
| 485 | return (long)ip < 0; | ||
| 486 | #endif | ||
| 487 | } | ||
| 488 | |||
| 489 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | 473 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) |
| 490 | { | 474 | { |
| 491 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 475 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| @@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
| 572 | * both formats and we don't use the other fields in this | 556 | * both formats and we don't use the other fields in this |
| 573 | * routine. | 557 | * routine. |
| 574 | */ | 558 | */ |
| 559 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 575 | struct pebs_record_core *pebs = __pebs; | 560 | struct pebs_record_core *pebs = __pebs; |
| 576 | struct perf_sample_data data; | 561 | struct perf_sample_data data; |
| 577 | struct pt_regs regs; | 562 | struct pt_regs regs; |
| @@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
| 602 | else | 587 | else |
| 603 | regs.flags &= ~PERF_EFLAGS_EXACT; | 588 | regs.flags &= ~PERF_EFLAGS_EXACT; |
| 604 | 589 | ||
| 590 | if (has_branch_stack(event)) | ||
| 591 | data.br_stack = &cpuc->lbr_stack; | ||
| 592 | |||
| 605 | if (perf_event_overflow(event, &data, ®s)) | 593 | if (perf_event_overflow(event, &data, ®s)) |
| 606 | x86_pmu_stop(event, 0); | 594 | x86_pmu_stop(event, 0); |
| 607 | } | 595 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 47a7e63bfe54..520b4265fcd2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include <asm/perf_event.h> | 4 | #include <asm/perf_event.h> |
| 5 | #include <asm/msr.h> | 5 | #include <asm/msr.h> |
| 6 | #include <asm/insn.h> | ||
| 6 | 7 | ||
| 7 | #include "perf_event.h" | 8 | #include "perf_event.h" |
| 8 | 9 | ||
| @@ -14,6 +15,100 @@ enum { | |||
| 14 | }; | 15 | }; |
| 15 | 16 | ||
| 16 | /* | 17 | /* |
| 18 | * Intel LBR_SELECT bits | ||
| 19 | * Intel Vol3a, April 2011, Section 16.7 Table 16-10 | ||
| 20 | * | ||
| 21 | * Hardware branch filter (not available on all CPUs) | ||
| 22 | */ | ||
| 23 | #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ | ||
| 24 | #define LBR_USER_BIT 1 /* do not capture at ring > 0 */ | ||
| 25 | #define LBR_JCC_BIT 2 /* do not capture conditional branches */ | ||
| 26 | #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ | ||
| 27 | #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ | ||
| 28 | #define LBR_RETURN_BIT 5 /* do not capture near returns */ | ||
| 29 | #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ | ||
| 30 | #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ | ||
| 31 | #define LBR_FAR_BIT 8 /* do not capture far branches */ | ||
| 32 | |||
| 33 | #define LBR_KERNEL (1 << LBR_KERNEL_BIT) | ||
| 34 | #define LBR_USER (1 << LBR_USER_BIT) | ||
| 35 | #define LBR_JCC (1 << LBR_JCC_BIT) | ||
| 36 | #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) | ||
| 37 | #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) | ||
| 38 | #define LBR_RETURN (1 << LBR_RETURN_BIT) | ||
| 39 | #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) | ||
| 40 | #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) | ||
| 41 | #define LBR_FAR (1 << LBR_FAR_BIT) | ||
| 42 | |||
| 43 | #define LBR_PLM (LBR_KERNEL | LBR_USER) | ||
| 44 | |||
| 45 | #define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ | ||
| 46 | #define LBR_NOT_SUPP -1 /* LBR filter not supported */ | ||
| 47 | #define LBR_IGN 0 /* ignored */ | ||
| 48 | |||
| 49 | #define LBR_ANY \ | ||
| 50 | (LBR_JCC |\ | ||
| 51 | LBR_REL_CALL |\ | ||
| 52 | LBR_IND_CALL |\ | ||
| 53 | LBR_RETURN |\ | ||
| 54 | LBR_REL_JMP |\ | ||
| 55 | LBR_IND_JMP |\ | ||
| 56 | LBR_FAR) | ||
| 57 | |||
| 58 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
| 59 | |||
| 60 | #define for_each_branch_sample_type(x) \ | ||
| 61 | for ((x) = PERF_SAMPLE_BRANCH_USER; \ | ||
| 62 | (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) | ||
| 63 | |||
| 64 | /* | ||
| 65 | * x86control flow change classification | ||
| 66 | * x86control flow changes include branches, interrupts, traps, faults | ||
| 67 | */ | ||
| 68 | enum { | ||
| 69 | X86_BR_NONE = 0, /* unknown */ | ||
| 70 | |||
| 71 | X86_BR_USER = 1 << 0, /* branch target is user */ | ||
| 72 | X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ | ||
| 73 | |||
| 74 | X86_BR_CALL = 1 << 2, /* call */ | ||
| 75 | X86_BR_RET = 1 << 3, /* return */ | ||
| 76 | X86_BR_SYSCALL = 1 << 4, /* syscall */ | ||
| 77 | X86_BR_SYSRET = 1 << 5, /* syscall return */ | ||
| 78 | X86_BR_INT = 1 << 6, /* sw interrupt */ | ||
| 79 | X86_BR_IRET = 1 << 7, /* return from interrupt */ | ||
| 80 | X86_BR_JCC = 1 << 8, /* conditional */ | ||
| 81 | X86_BR_JMP = 1 << 9, /* jump */ | ||
| 82 | X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ | ||
| 83 | X86_BR_IND_CALL = 1 << 11,/* indirect calls */ | ||
| 84 | }; | ||
| 85 | |||
| 86 | #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) | ||
| 87 | |||
| 88 | #define X86_BR_ANY \ | ||
| 89 | (X86_BR_CALL |\ | ||
| 90 | X86_BR_RET |\ | ||
| 91 | X86_BR_SYSCALL |\ | ||
| 92 | X86_BR_SYSRET |\ | ||
| 93 | X86_BR_INT |\ | ||
| 94 | X86_BR_IRET |\ | ||
| 95 | X86_BR_JCC |\ | ||
| 96 | X86_BR_JMP |\ | ||
| 97 | X86_BR_IRQ |\ | ||
| 98 | X86_BR_IND_CALL) | ||
| 99 | |||
| 100 | #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) | ||
| 101 | |||
| 102 | #define X86_BR_ANY_CALL \ | ||
| 103 | (X86_BR_CALL |\ | ||
| 104 | X86_BR_IND_CALL |\ | ||
| 105 | X86_BR_SYSCALL |\ | ||
| 106 | X86_BR_IRQ |\ | ||
| 107 | X86_BR_INT) | ||
| 108 | |||
| 109 | static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); | ||
| 110 | |||
| 111 | /* | ||
| 17 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | 112 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI |
| 18 | * otherwise it becomes near impossible to get a reliable stack. | 113 | * otherwise it becomes near impossible to get a reliable stack. |
| 19 | */ | 114 | */ |
| @@ -21,6 +116,10 @@ enum { | |||
| 21 | static void __intel_pmu_lbr_enable(void) | 116 | static void __intel_pmu_lbr_enable(void) |
| 22 | { | 117 | { |
| 23 | u64 debugctl; | 118 | u64 debugctl; |
| 119 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 120 | |||
| 121 | if (cpuc->lbr_sel) | ||
| 122 | wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); | ||
| 24 | 123 | ||
| 25 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | 124 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); |
| 26 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | 125 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); |
| @@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event) | |||
| 76 | * Reset the LBR stack if we changed task context to | 175 | * Reset the LBR stack if we changed task context to |
| 77 | * avoid data leaks. | 176 | * avoid data leaks. |
| 78 | */ | 177 | */ |
| 79 | |||
| 80 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { | 178 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { |
| 81 | intel_pmu_lbr_reset(); | 179 | intel_pmu_lbr_reset(); |
| 82 | cpuc->lbr_context = event->ctx; | 180 | cpuc->lbr_context = event->ctx; |
| 83 | } | 181 | } |
| 182 | cpuc->br_sel = event->hw.branch_reg.reg; | ||
| 84 | 183 | ||
| 85 | cpuc->lbr_users++; | 184 | cpuc->lbr_users++; |
| 86 | } | 185 | } |
| @@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event) | |||
| 95 | cpuc->lbr_users--; | 194 | cpuc->lbr_users--; |
| 96 | WARN_ON_ONCE(cpuc->lbr_users < 0); | 195 | WARN_ON_ONCE(cpuc->lbr_users < 0); |
| 97 | 196 | ||
| 98 | if (cpuc->enabled && !cpuc->lbr_users) | 197 | if (cpuc->enabled && !cpuc->lbr_users) { |
| 99 | __intel_pmu_lbr_disable(); | 198 | __intel_pmu_lbr_disable(); |
| 199 | /* avoid stale pointer */ | ||
| 200 | cpuc->lbr_context = NULL; | ||
| 201 | } | ||
| 100 | } | 202 | } |
| 101 | 203 | ||
| 102 | void intel_pmu_lbr_enable_all(void) | 204 | void intel_pmu_lbr_enable_all(void) |
| @@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void) | |||
| 115 | __intel_pmu_lbr_disable(); | 217 | __intel_pmu_lbr_disable(); |
| 116 | } | 218 | } |
| 117 | 219 | ||
| 220 | /* | ||
| 221 | * TOS = most recently recorded branch | ||
| 222 | */ | ||
| 118 | static inline u64 intel_pmu_lbr_tos(void) | 223 | static inline u64 intel_pmu_lbr_tos(void) |
| 119 | { | 224 | { |
| 120 | u64 tos; | 225 | u64 tos; |
| @@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | |||
| 142 | 247 | ||
| 143 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | 248 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); |
| 144 | 249 | ||
| 145 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | 250 | cpuc->lbr_entries[i].from = msr_lastbranch.from; |
| 146 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | 251 | cpuc->lbr_entries[i].to = msr_lastbranch.to; |
| 147 | cpuc->lbr_entries[i].flags = 0; | 252 | cpuc->lbr_entries[i].mispred = 0; |
| 253 | cpuc->lbr_entries[i].predicted = 0; | ||
| 254 | cpuc->lbr_entries[i].reserved = 0; | ||
| 148 | } | 255 | } |
| 149 | cpuc->lbr_stack.nr = i; | 256 | cpuc->lbr_stack.nr = i; |
| 150 | } | 257 | } |
| 151 | 258 | ||
| 152 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
| 153 | |||
| 154 | /* | 259 | /* |
| 155 | * Due to lack of segmentation in Linux the effective address (offset) | 260 | * Due to lack of segmentation in Linux the effective address (offset) |
| 156 | * is the same as the linear address, allowing us to merge the LIP and EIP | 261 | * is the same as the linear address, allowing us to merge the LIP and EIP |
| @@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | |||
| 165 | 270 | ||
| 166 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | 271 | for (i = 0; i < x86_pmu.lbr_nr; i++) { |
| 167 | unsigned long lbr_idx = (tos - i) & mask; | 272 | unsigned long lbr_idx = (tos - i) & mask; |
| 168 | u64 from, to, flags = 0; | 273 | u64 from, to, mis = 0, pred = 0; |
| 169 | 274 | ||
| 170 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | 275 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); |
| 171 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | 276 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); |
| 172 | 277 | ||
| 173 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | 278 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { |
| 174 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | 279 | mis = !!(from & LBR_FROM_FLAG_MISPRED); |
| 280 | pred = !mis; | ||
| 175 | from = (u64)((((s64)from) << 1) >> 1); | 281 | from = (u64)((((s64)from) << 1) >> 1); |
| 176 | } | 282 | } |
| 177 | 283 | ||
| 178 | cpuc->lbr_entries[i].from = from; | 284 | cpuc->lbr_entries[i].from = from; |
| 179 | cpuc->lbr_entries[i].to = to; | 285 | cpuc->lbr_entries[i].to = to; |
| 180 | cpuc->lbr_entries[i].flags = flags; | 286 | cpuc->lbr_entries[i].mispred = mis; |
| 287 | cpuc->lbr_entries[i].predicted = pred; | ||
| 288 | cpuc->lbr_entries[i].reserved = 0; | ||
| 181 | } | 289 | } |
| 182 | cpuc->lbr_stack.nr = i; | 290 | cpuc->lbr_stack.nr = i; |
| 183 | } | 291 | } |
| @@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void) | |||
| 193 | intel_pmu_lbr_read_32(cpuc); | 301 | intel_pmu_lbr_read_32(cpuc); |
| 194 | else | 302 | else |
| 195 | intel_pmu_lbr_read_64(cpuc); | 303 | intel_pmu_lbr_read_64(cpuc); |
| 304 | |||
| 305 | intel_pmu_lbr_filter(cpuc); | ||
| 306 | } | ||
| 307 | |||
| 308 | /* | ||
| 309 | * SW filter is used: | ||
| 310 | * - in case there is no HW filter | ||
| 311 | * - in case the HW filter has errata or limitations | ||
| 312 | */ | ||
| 313 | static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) | ||
| 314 | { | ||
| 315 | u64 br_type = event->attr.branch_sample_type; | ||
| 316 | int mask = 0; | ||
| 317 | |||
| 318 | if (br_type & PERF_SAMPLE_BRANCH_USER) | ||
| 319 | mask |= X86_BR_USER; | ||
| 320 | |||
| 321 | if (br_type & PERF_SAMPLE_BRANCH_KERNEL) | ||
| 322 | mask |= X86_BR_KERNEL; | ||
| 323 | |||
| 324 | /* we ignore BRANCH_HV here */ | ||
| 325 | |||
| 326 | if (br_type & PERF_SAMPLE_BRANCH_ANY) | ||
| 327 | mask |= X86_BR_ANY; | ||
| 328 | |||
| 329 | if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) | ||
| 330 | mask |= X86_BR_ANY_CALL; | ||
| 331 | |||
| 332 | if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) | ||
| 333 | mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; | ||
| 334 | |||
| 335 | if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) | ||
| 336 | mask |= X86_BR_IND_CALL; | ||
| 337 | /* | ||
| 338 | * stash actual user request into reg, it may | ||
| 339 | * be used by fixup code for some CPU | ||
| 340 | */ | ||
| 341 | event->hw.branch_reg.reg = mask; | ||
| 342 | } | ||
| 343 | |||
| 344 | /* | ||
| 345 | * setup the HW LBR filter | ||
| 346 | * Used only when available, may not be enough to disambiguate | ||
| 347 | * all branches, may need the help of the SW filter | ||
| 348 | */ | ||
| 349 | static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) | ||
| 350 | { | ||
| 351 | struct hw_perf_event_extra *reg; | ||
| 352 | u64 br_type = event->attr.branch_sample_type; | ||
| 353 | u64 mask = 0, m; | ||
| 354 | u64 v; | ||
| 355 | |||
| 356 | for_each_branch_sample_type(m) { | ||
| 357 | if (!(br_type & m)) | ||
| 358 | continue; | ||
| 359 | |||
| 360 | v = x86_pmu.lbr_sel_map[m]; | ||
| 361 | if (v == LBR_NOT_SUPP) | ||
| 362 | return -EOPNOTSUPP; | ||
| 363 | |||
| 364 | if (v != LBR_IGN) | ||
| 365 | mask |= v; | ||
| 366 | } | ||
| 367 | reg = &event->hw.branch_reg; | ||
| 368 | reg->idx = EXTRA_REG_LBR; | ||
| 369 | |||
| 370 | /* LBR_SELECT operates in suppress mode so invert mask */ | ||
| 371 | reg->config = ~mask & x86_pmu.lbr_sel_mask; | ||
| 372 | |||
| 373 | return 0; | ||
| 374 | } | ||
| 375 | |||
| 376 | int intel_pmu_setup_lbr_filter(struct perf_event *event) | ||
| 377 | { | ||
| 378 | int ret = 0; | ||
| 379 | |||
| 380 | /* | ||
| 381 | * no LBR on this PMU | ||
| 382 | */ | ||
| 383 | if (!x86_pmu.lbr_nr) | ||
| 384 | return -EOPNOTSUPP; | ||
| 385 | |||
| 386 | /* | ||
| 387 | * setup SW LBR filter | ||
| 388 | */ | ||
| 389 | intel_pmu_setup_sw_lbr_filter(event); | ||
| 390 | |||
| 391 | /* | ||
| 392 | * setup HW LBR filter, if any | ||
| 393 | */ | ||
| 394 | if (x86_pmu.lbr_sel_map) | ||
| 395 | ret = intel_pmu_setup_hw_lbr_filter(event); | ||
| 396 | |||
| 397 | return ret; | ||
| 196 | } | 398 | } |
| 197 | 399 | ||
| 400 | /* | ||
| 401 | * return the type of control flow change at address "from" | ||
| 402 | * intruction is not necessarily a branch (in case of interrupt). | ||
| 403 | * | ||
| 404 | * The branch type returned also includes the priv level of the | ||
| 405 | * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). | ||
| 406 | * | ||
| 407 | * If a branch type is unknown OR the instruction cannot be | ||
| 408 | * decoded (e.g., text page not present), then X86_BR_NONE is | ||
| 409 | * returned. | ||
| 410 | */ | ||
| 411 | static int branch_type(unsigned long from, unsigned long to) | ||
| 412 | { | ||
| 413 | struct insn insn; | ||
| 414 | void *addr; | ||
| 415 | int bytes, size = MAX_INSN_SIZE; | ||
| 416 | int ret = X86_BR_NONE; | ||
| 417 | int ext, to_plm, from_plm; | ||
| 418 | u8 buf[MAX_INSN_SIZE]; | ||
| 419 | int is64 = 0; | ||
| 420 | |||
| 421 | to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; | ||
| 422 | from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; | ||
| 423 | |||
| 424 | /* | ||
| 425 | * maybe zero if lbr did not fill up after a reset by the time | ||
| 426 | * we get a PMU interrupt | ||
| 427 | */ | ||
| 428 | if (from == 0 || to == 0) | ||
| 429 | return X86_BR_NONE; | ||
| 430 | |||
| 431 | if (from_plm == X86_BR_USER) { | ||
| 432 | /* | ||
| 433 | * can happen if measuring at the user level only | ||
| 434 | * and we interrupt in a kernel thread, e.g., idle. | ||
| 435 | */ | ||
| 436 | if (!current->mm) | ||
| 437 | return X86_BR_NONE; | ||
| 438 | |||
| 439 | /* may fail if text not present */ | ||
| 440 | bytes = copy_from_user_nmi(buf, (void __user *)from, size); | ||
| 441 | if (bytes != size) | ||
| 442 | return X86_BR_NONE; | ||
| 443 | |||
| 444 | addr = buf; | ||
| 445 | } else | ||
| 446 | addr = (void *)from; | ||
| 447 | |||
| 448 | /* | ||
| 449 | * decoder needs to know the ABI especially | ||
| 450 | * on 64-bit systems running 32-bit apps | ||
| 451 | */ | ||
| 452 | #ifdef CONFIG_X86_64 | ||
| 453 | is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); | ||
| 454 | #endif | ||
| 455 | insn_init(&insn, addr, is64); | ||
| 456 | insn_get_opcode(&insn); | ||
| 457 | |||
| 458 | switch (insn.opcode.bytes[0]) { | ||
| 459 | case 0xf: | ||
| 460 | switch (insn.opcode.bytes[1]) { | ||
| 461 | case 0x05: /* syscall */ | ||
| 462 | case 0x34: /* sysenter */ | ||
| 463 | ret = X86_BR_SYSCALL; | ||
| 464 | break; | ||
| 465 | case 0x07: /* sysret */ | ||
| 466 | case 0x35: /* sysexit */ | ||
| 467 | ret = X86_BR_SYSRET; | ||
| 468 | break; | ||
| 469 | case 0x80 ... 0x8f: /* conditional */ | ||
| 470 | ret = X86_BR_JCC; | ||
| 471 | break; | ||
| 472 | default: | ||
| 473 | ret = X86_BR_NONE; | ||
| 474 | } | ||
| 475 | break; | ||
| 476 | case 0x70 ... 0x7f: /* conditional */ | ||
| 477 | ret = X86_BR_JCC; | ||
| 478 | break; | ||
| 479 | case 0xc2: /* near ret */ | ||
| 480 | case 0xc3: /* near ret */ | ||
| 481 | case 0xca: /* far ret */ | ||
| 482 | case 0xcb: /* far ret */ | ||
| 483 | ret = X86_BR_RET; | ||
| 484 | break; | ||
| 485 | case 0xcf: /* iret */ | ||
| 486 | ret = X86_BR_IRET; | ||
| 487 | break; | ||
| 488 | case 0xcc ... 0xce: /* int */ | ||
| 489 | ret = X86_BR_INT; | ||
| 490 | break; | ||
| 491 | case 0xe8: /* call near rel */ | ||
| 492 | case 0x9a: /* call far absolute */ | ||
| 493 | ret = X86_BR_CALL; | ||
| 494 | break; | ||
| 495 | case 0xe0 ... 0xe3: /* loop jmp */ | ||
| 496 | ret = X86_BR_JCC; | ||
| 497 | break; | ||
| 498 | case 0xe9 ... 0xeb: /* jmp */ | ||
| 499 | ret = X86_BR_JMP; | ||
| 500 | break; | ||
| 501 | case 0xff: /* call near absolute, call far absolute ind */ | ||
| 502 | insn_get_modrm(&insn); | ||
| 503 | ext = (insn.modrm.bytes[0] >> 3) & 0x7; | ||
| 504 | switch (ext) { | ||
| 505 | case 2: /* near ind call */ | ||
| 506 | case 3: /* far ind call */ | ||
| 507 | ret = X86_BR_IND_CALL; | ||
| 508 | break; | ||
| 509 | case 4: | ||
| 510 | case 5: | ||
| 511 | ret = X86_BR_JMP; | ||
| 512 | break; | ||
| 513 | } | ||
| 514 | break; | ||
| 515 | default: | ||
| 516 | ret = X86_BR_NONE; | ||
| 517 | } | ||
| 518 | /* | ||
| 519 | * interrupts, traps, faults (and thus ring transition) may | ||
| 520 | * occur on any instructions. Thus, to classify them correctly, | ||
| 521 | * we need to first look at the from and to priv levels. If they | ||
| 522 | * are different and to is in the kernel, then it indicates | ||
| 523 | * a ring transition. If the from instruction is not a ring | ||
| 524 | * transition instr (syscall, systenter, int), then it means | ||
| 525 | * it was a irq, trap or fault. | ||
| 526 | * | ||
| 527 | * we have no way of detecting kernel to kernel faults. | ||
| 528 | */ | ||
| 529 | if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL | ||
| 530 | && ret != X86_BR_SYSCALL && ret != X86_BR_INT) | ||
| 531 | ret = X86_BR_IRQ; | ||
| 532 | |||
| 533 | /* | ||
| 534 | * branch priv level determined by target as | ||
| 535 | * is done by HW when LBR_SELECT is implemented | ||
| 536 | */ | ||
| 537 | if (ret != X86_BR_NONE) | ||
| 538 | ret |= to_plm; | ||
| 539 | |||
| 540 | return ret; | ||
| 541 | } | ||
| 542 | |||
| 543 | /* | ||
| 544 | * implement actual branch filter based on user demand. | ||
| 545 | * Hardware may not exactly satisfy that request, thus | ||
| 546 | * we need to inspect opcodes. Mismatched branches are | ||
| 547 | * discarded. Therefore, the number of branches returned | ||
| 548 | * in PERF_SAMPLE_BRANCH_STACK sample may vary. | ||
| 549 | */ | ||
| 550 | static void | ||
| 551 | intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) | ||
| 552 | { | ||
| 553 | u64 from, to; | ||
| 554 | int br_sel = cpuc->br_sel; | ||
| 555 | int i, j, type; | ||
| 556 | bool compress = false; | ||
| 557 | |||
| 558 | /* if sampling all branches, then nothing to filter */ | ||
| 559 | if ((br_sel & X86_BR_ALL) == X86_BR_ALL) | ||
| 560 | return; | ||
| 561 | |||
| 562 | for (i = 0; i < cpuc->lbr_stack.nr; i++) { | ||
| 563 | |||
| 564 | from = cpuc->lbr_entries[i].from; | ||
| 565 | to = cpuc->lbr_entries[i].to; | ||
| 566 | |||
| 567 | type = branch_type(from, to); | ||
| 568 | |||
| 569 | /* if type does not correspond, then discard */ | ||
| 570 | if (type == X86_BR_NONE || (br_sel & type) != type) { | ||
| 571 | cpuc->lbr_entries[i].from = 0; | ||
| 572 | compress = true; | ||
| 573 | } | ||
| 574 | } | ||
| 575 | |||
| 576 | if (!compress) | ||
| 577 | return; | ||
| 578 | |||
| 579 | /* remove all entries with from=0 */ | ||
| 580 | for (i = 0; i < cpuc->lbr_stack.nr; ) { | ||
| 581 | if (!cpuc->lbr_entries[i].from) { | ||
| 582 | j = i; | ||
| 583 | while (++j < cpuc->lbr_stack.nr) | ||
| 584 | cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; | ||
| 585 | cpuc->lbr_stack.nr--; | ||
| 586 | if (!cpuc->lbr_entries[i].from) | ||
| 587 | continue; | ||
| 588 | } | ||
| 589 | i++; | ||
| 590 | } | ||
| 591 | } | ||
| 592 | |||
| 593 | /* | ||
| 594 | * Map interface branch filters onto LBR filters | ||
| 595 | */ | ||
| 596 | static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | ||
| 597 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | ||
| 598 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | ||
| 599 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | ||
| 600 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | ||
| 601 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP | ||
| 602 | | LBR_IND_JMP | LBR_FAR, | ||
| 603 | /* | ||
| 604 | * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches | ||
| 605 | */ | ||
| 606 | [PERF_SAMPLE_BRANCH_ANY_CALL] = | ||
| 607 | LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, | ||
| 608 | /* | ||
| 609 | * NHM/WSM erratum: must include IND_JMP to capture IND_CALL | ||
| 610 | */ | ||
| 611 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, | ||
| 612 | }; | ||
| 613 | |||
| 614 | static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | ||
| 615 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | ||
| 616 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | ||
| 617 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | ||
| 618 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | ||
| 619 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, | ||
| 620 | [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL | ||
| 621 | | LBR_FAR, | ||
| 622 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, | ||
| 623 | }; | ||
| 624 | |||
| 625 | /* core */ | ||
| 198 | void intel_pmu_lbr_init_core(void) | 626 | void intel_pmu_lbr_init_core(void) |
| 199 | { | 627 | { |
| 200 | x86_pmu.lbr_nr = 4; | 628 | x86_pmu.lbr_nr = 4; |
| 201 | x86_pmu.lbr_tos = 0x01c9; | 629 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
| 202 | x86_pmu.lbr_from = 0x40; | 630 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; |
| 203 | x86_pmu.lbr_to = 0x60; | 631 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; |
| 632 | |||
| 633 | /* | ||
| 634 | * SW branch filter usage: | ||
| 635 | * - compensate for lack of HW filter | ||
| 636 | */ | ||
| 637 | pr_cont("4-deep LBR, "); | ||
| 204 | } | 638 | } |
| 205 | 639 | ||
| 640 | /* nehalem/westmere */ | ||
| 206 | void intel_pmu_lbr_init_nhm(void) | 641 | void intel_pmu_lbr_init_nhm(void) |
| 207 | { | 642 | { |
| 208 | x86_pmu.lbr_nr = 16; | 643 | x86_pmu.lbr_nr = 16; |
| 209 | x86_pmu.lbr_tos = 0x01c9; | 644 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
| 210 | x86_pmu.lbr_from = 0x680; | 645 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; |
| 211 | x86_pmu.lbr_to = 0x6c0; | 646 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; |
| 647 | |||
| 648 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | ||
| 649 | x86_pmu.lbr_sel_map = nhm_lbr_sel_map; | ||
| 650 | |||
| 651 | /* | ||
| 652 | * SW branch filter usage: | ||
| 653 | * - workaround LBR_SEL errata (see above) | ||
| 654 | * - support syscall, sysret capture. | ||
| 655 | * That requires LBR_FAR but that means far | ||
| 656 | * jmp need to be filtered out | ||
| 657 | */ | ||
| 658 | pr_cont("16-deep LBR, "); | ||
| 659 | } | ||
| 660 | |||
| 661 | /* sandy bridge */ | ||
| 662 | void intel_pmu_lbr_init_snb(void) | ||
| 663 | { | ||
| 664 | x86_pmu.lbr_nr = 16; | ||
| 665 | x86_pmu.lbr_tos = MSR_LBR_TOS; | ||
| 666 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; | ||
| 667 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; | ||
| 668 | |||
| 669 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | ||
| 670 | x86_pmu.lbr_sel_map = snb_lbr_sel_map; | ||
| 671 | |||
| 672 | /* | ||
| 673 | * SW branch filter usage: | ||
| 674 | * - support syscall, sysret capture. | ||
| 675 | * That requires LBR_FAR but that means far | ||
| 676 | * jmp need to be filtered out | ||
| 677 | */ | ||
| 678 | pr_cont("16-deep LBR, "); | ||
| 212 | } | 679 | } |
| 213 | 680 | ||
| 681 | /* atom */ | ||
| 214 | void intel_pmu_lbr_init_atom(void) | 682 | void intel_pmu_lbr_init_atom(void) |
| 215 | { | 683 | { |
| 684 | /* | ||
| 685 | * only models starting at stepping 10 seems | ||
| 686 | * to have an operational LBR which can freeze | ||
| 687 | * on PMU interrupt | ||
| 688 | */ | ||
| 689 | if (boot_cpu_data.x86_mask < 10) { | ||
| 690 | pr_cont("LBR disabled due to erratum"); | ||
| 691 | return; | ||
| 692 | } | ||
| 693 | |||
| 216 | x86_pmu.lbr_nr = 8; | 694 | x86_pmu.lbr_nr = 8; |
| 217 | x86_pmu.lbr_tos = 0x01c9; | 695 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
| 218 | x86_pmu.lbr_from = 0x40; | 696 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; |
| 219 | x86_pmu.lbr_to = 0x60; | 697 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; |
| 698 | |||
| 699 | /* | ||
| 700 | * SW branch filter usage: | ||
| 701 | * - compensate for lack of HW filter | ||
| 702 | */ | ||
| 703 | pr_cont("8-deep LBR, "); | ||
| 220 | } | 704 | } |
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h new file mode 100644 index 000000000000..3230b68ef29a --- /dev/null +++ b/arch/x86/kernel/kprobes-common.h | |||
| @@ -0,0 +1,102 @@ | |||
| 1 | #ifndef __X86_KERNEL_KPROBES_COMMON_H | ||
| 2 | #define __X86_KERNEL_KPROBES_COMMON_H | ||
| 3 | |||
| 4 | /* Kprobes and Optprobes common header */ | ||
| 5 | |||
| 6 | #ifdef CONFIG_X86_64 | ||
| 7 | #define SAVE_REGS_STRING \ | ||
| 8 | /* Skip cs, ip, orig_ax. */ \ | ||
| 9 | " subq $24, %rsp\n" \ | ||
| 10 | " pushq %rdi\n" \ | ||
| 11 | " pushq %rsi\n" \ | ||
| 12 | " pushq %rdx\n" \ | ||
| 13 | " pushq %rcx\n" \ | ||
| 14 | " pushq %rax\n" \ | ||
| 15 | " pushq %r8\n" \ | ||
| 16 | " pushq %r9\n" \ | ||
| 17 | " pushq %r10\n" \ | ||
| 18 | " pushq %r11\n" \ | ||
| 19 | " pushq %rbx\n" \ | ||
| 20 | " pushq %rbp\n" \ | ||
| 21 | " pushq %r12\n" \ | ||
| 22 | " pushq %r13\n" \ | ||
| 23 | " pushq %r14\n" \ | ||
| 24 | " pushq %r15\n" | ||
| 25 | #define RESTORE_REGS_STRING \ | ||
| 26 | " popq %r15\n" \ | ||
| 27 | " popq %r14\n" \ | ||
| 28 | " popq %r13\n" \ | ||
| 29 | " popq %r12\n" \ | ||
| 30 | " popq %rbp\n" \ | ||
| 31 | " popq %rbx\n" \ | ||
| 32 | " popq %r11\n" \ | ||
| 33 | " popq %r10\n" \ | ||
| 34 | " popq %r9\n" \ | ||
| 35 | " popq %r8\n" \ | ||
| 36 | " popq %rax\n" \ | ||
| 37 | " popq %rcx\n" \ | ||
| 38 | " popq %rdx\n" \ | ||
| 39 | " popq %rsi\n" \ | ||
| 40 | " popq %rdi\n" \ | ||
| 41 | /* Skip orig_ax, ip, cs */ \ | ||
| 42 | " addq $24, %rsp\n" | ||
| 43 | #else | ||
| 44 | #define SAVE_REGS_STRING \ | ||
| 45 | /* Skip cs, ip, orig_ax and gs. */ \ | ||
| 46 | " subl $16, %esp\n" \ | ||
| 47 | " pushl %fs\n" \ | ||
| 48 | " pushl %es\n" \ | ||
| 49 | " pushl %ds\n" \ | ||
| 50 | " pushl %eax\n" \ | ||
| 51 | " pushl %ebp\n" \ | ||
| 52 | " pushl %edi\n" \ | ||
| 53 | " pushl %esi\n" \ | ||
| 54 | " pushl %edx\n" \ | ||
| 55 | " pushl %ecx\n" \ | ||
| 56 | " pushl %ebx\n" | ||
| 57 | #define RESTORE_REGS_STRING \ | ||
| 58 | " popl %ebx\n" \ | ||
| 59 | " popl %ecx\n" \ | ||
| 60 | " popl %edx\n" \ | ||
| 61 | " popl %esi\n" \ | ||
| 62 | " popl %edi\n" \ | ||
| 63 | " popl %ebp\n" \ | ||
| 64 | " popl %eax\n" \ | ||
| 65 | /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ | ||
| 66 | " addl $24, %esp\n" | ||
| 67 | #endif | ||
| 68 | |||
| 69 | /* Ensure if the instruction can be boostable */ | ||
| 70 | extern int can_boost(kprobe_opcode_t *instruction); | ||
| 71 | /* Recover instruction if given address is probed */ | ||
| 72 | extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, | ||
| 73 | unsigned long addr); | ||
| 74 | /* | ||
| 75 | * Copy an instruction and adjust the displacement if the instruction | ||
| 76 | * uses the %rip-relative addressing mode. | ||
| 77 | */ | ||
| 78 | extern int __copy_instruction(u8 *dest, u8 *src); | ||
| 79 | |||
| 80 | /* Generate a relative-jump/call instruction */ | ||
| 81 | extern void synthesize_reljump(void *from, void *to); | ||
| 82 | extern void synthesize_relcall(void *from, void *to); | ||
| 83 | |||
| 84 | #ifdef CONFIG_OPTPROBES | ||
| 85 | extern int arch_init_optprobes(void); | ||
| 86 | extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter); | ||
| 87 | extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr); | ||
| 88 | #else /* !CONFIG_OPTPROBES */ | ||
| 89 | static inline int arch_init_optprobes(void) | ||
| 90 | { | ||
| 91 | return 0; | ||
| 92 | } | ||
| 93 | static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) | ||
| 94 | { | ||
| 95 | return 0; | ||
| 96 | } | ||
| 97 | static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) | ||
| 98 | { | ||
| 99 | return addr; | ||
| 100 | } | ||
| 101 | #endif | ||
| 102 | #endif | ||
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes-opt.c new file mode 100644 index 000000000000..c5e410eed403 --- /dev/null +++ b/arch/x86/kernel/kprobes-opt.c | |||
| @@ -0,0 +1,512 @@ | |||
| 1 | /* | ||
| 2 | * Kernel Probes Jump Optimization (Optprobes) | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write to the Free Software | ||
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
| 17 | * | ||
| 18 | * Copyright (C) IBM Corporation, 2002, 2004 | ||
| 19 | * Copyright (C) Hitachi Ltd., 2012 | ||
| 20 | */ | ||
| 21 | #include <linux/kprobes.h> | ||
| 22 | #include <linux/ptrace.h> | ||
| 23 | #include <linux/string.h> | ||
| 24 | #include <linux/slab.h> | ||
| 25 | #include <linux/hardirq.h> | ||
| 26 | #include <linux/preempt.h> | ||
| 27 | #include <linux/module.h> | ||
| 28 | #include <linux/kdebug.h> | ||
| 29 | #include <linux/kallsyms.h> | ||
| 30 | #include <linux/ftrace.h> | ||
| 31 | |||
| 32 | #include <asm/cacheflush.h> | ||
| 33 | #include <asm/desc.h> | ||
| 34 | #include <asm/pgtable.h> | ||
| 35 | #include <asm/uaccess.h> | ||
| 36 | #include <asm/alternative.h> | ||
| 37 | #include <asm/insn.h> | ||
| 38 | #include <asm/debugreg.h> | ||
| 39 | |||
| 40 | #include "kprobes-common.h" | ||
| 41 | |||
| 42 | unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) | ||
| 43 | { | ||
| 44 | struct optimized_kprobe *op; | ||
| 45 | struct kprobe *kp; | ||
| 46 | long offs; | ||
| 47 | int i; | ||
| 48 | |||
| 49 | for (i = 0; i < RELATIVEJUMP_SIZE; i++) { | ||
| 50 | kp = get_kprobe((void *)addr - i); | ||
| 51 | /* This function only handles jump-optimized kprobe */ | ||
| 52 | if (kp && kprobe_optimized(kp)) { | ||
| 53 | op = container_of(kp, struct optimized_kprobe, kp); | ||
| 54 | /* If op->list is not empty, op is under optimizing */ | ||
| 55 | if (list_empty(&op->list)) | ||
| 56 | goto found; | ||
| 57 | } | ||
| 58 | } | ||
| 59 | |||
| 60 | return addr; | ||
| 61 | found: | ||
| 62 | /* | ||
| 63 | * If the kprobe can be optimized, original bytes which can be | ||
| 64 | * overwritten by jump destination address. In this case, original | ||
| 65 | * bytes must be recovered from op->optinsn.copied_insn buffer. | ||
| 66 | */ | ||
| 67 | memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
| 68 | if (addr == (unsigned long)kp->addr) { | ||
| 69 | buf[0] = kp->opcode; | ||
| 70 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
| 71 | } else { | ||
| 72 | offs = addr - (unsigned long)kp->addr - 1; | ||
| 73 | memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); | ||
| 74 | } | ||
| 75 | |||
| 76 | return (unsigned long)buf; | ||
| 77 | } | ||
| 78 | |||
| 79 | /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ | ||
| 80 | static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) | ||
| 81 | { | ||
| 82 | #ifdef CONFIG_X86_64 | ||
| 83 | *addr++ = 0x48; | ||
| 84 | *addr++ = 0xbf; | ||
| 85 | #else | ||
| 86 | *addr++ = 0xb8; | ||
| 87 | #endif | ||
| 88 | *(unsigned long *)addr = val; | ||
| 89 | } | ||
| 90 | |||
| 91 | static void __used __kprobes kprobes_optinsn_template_holder(void) | ||
| 92 | { | ||
| 93 | asm volatile ( | ||
| 94 | ".global optprobe_template_entry\n" | ||
| 95 | "optprobe_template_entry:\n" | ||
| 96 | #ifdef CONFIG_X86_64 | ||
| 97 | /* We don't bother saving the ss register */ | ||
| 98 | " pushq %rsp\n" | ||
| 99 | " pushfq\n" | ||
| 100 | SAVE_REGS_STRING | ||
| 101 | " movq %rsp, %rsi\n" | ||
| 102 | ".global optprobe_template_val\n" | ||
| 103 | "optprobe_template_val:\n" | ||
| 104 | ASM_NOP5 | ||
| 105 | ASM_NOP5 | ||
| 106 | ".global optprobe_template_call\n" | ||
| 107 | "optprobe_template_call:\n" | ||
| 108 | ASM_NOP5 | ||
| 109 | /* Move flags to rsp */ | ||
| 110 | " movq 144(%rsp), %rdx\n" | ||
| 111 | " movq %rdx, 152(%rsp)\n" | ||
| 112 | RESTORE_REGS_STRING | ||
| 113 | /* Skip flags entry */ | ||
| 114 | " addq $8, %rsp\n" | ||
| 115 | " popfq\n" | ||
| 116 | #else /* CONFIG_X86_32 */ | ||
| 117 | " pushf\n" | ||
| 118 | SAVE_REGS_STRING | ||
| 119 | " movl %esp, %edx\n" | ||
| 120 | ".global optprobe_template_val\n" | ||
| 121 | "optprobe_template_val:\n" | ||
| 122 | ASM_NOP5 | ||
| 123 | ".global optprobe_template_call\n" | ||
| 124 | "optprobe_template_call:\n" | ||
| 125 | ASM_NOP5 | ||
| 126 | RESTORE_REGS_STRING | ||
| 127 | " addl $4, %esp\n" /* skip cs */ | ||
| 128 | " popf\n" | ||
| 129 | #endif | ||
| 130 | ".global optprobe_template_end\n" | ||
| 131 | "optprobe_template_end:\n"); | ||
| 132 | } | ||
| 133 | |||
| 134 | #define TMPL_MOVE_IDX \ | ||
| 135 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) | ||
| 136 | #define TMPL_CALL_IDX \ | ||
| 137 | ((long)&optprobe_template_call - (long)&optprobe_template_entry) | ||
| 138 | #define TMPL_END_IDX \ | ||
| 139 | ((long)&optprobe_template_end - (long)&optprobe_template_entry) | ||
| 140 | |||
| 141 | #define INT3_SIZE sizeof(kprobe_opcode_t) | ||
| 142 | |||
| 143 | /* Optimized kprobe call back function: called from optinsn */ | ||
| 144 | static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) | ||
| 145 | { | ||
| 146 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
| 147 | unsigned long flags; | ||
| 148 | |||
| 149 | /* This is possible if op is under delayed unoptimizing */ | ||
| 150 | if (kprobe_disabled(&op->kp)) | ||
| 151 | return; | ||
| 152 | |||
| 153 | local_irq_save(flags); | ||
| 154 | if (kprobe_running()) { | ||
| 155 | kprobes_inc_nmissed_count(&op->kp); | ||
| 156 | } else { | ||
| 157 | /* Save skipped registers */ | ||
| 158 | #ifdef CONFIG_X86_64 | ||
| 159 | regs->cs = __KERNEL_CS; | ||
| 160 | #else | ||
| 161 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
| 162 | regs->gs = 0; | ||
| 163 | #endif | ||
| 164 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; | ||
| 165 | regs->orig_ax = ~0UL; | ||
| 166 | |||
| 167 | __this_cpu_write(current_kprobe, &op->kp); | ||
| 168 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
| 169 | opt_pre_handler(&op->kp, regs); | ||
| 170 | __this_cpu_write(current_kprobe, NULL); | ||
| 171 | } | ||
| 172 | local_irq_restore(flags); | ||
| 173 | } | ||
| 174 | |||
| 175 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | ||
| 176 | { | ||
| 177 | int len = 0, ret; | ||
| 178 | |||
| 179 | while (len < RELATIVEJUMP_SIZE) { | ||
| 180 | ret = __copy_instruction(dest + len, src + len); | ||
| 181 | if (!ret || !can_boost(dest + len)) | ||
| 182 | return -EINVAL; | ||
| 183 | len += ret; | ||
| 184 | } | ||
| 185 | /* Check whether the address range is reserved */ | ||
| 186 | if (ftrace_text_reserved(src, src + len - 1) || | ||
| 187 | alternatives_text_reserved(src, src + len - 1) || | ||
| 188 | jump_label_text_reserved(src, src + len - 1)) | ||
| 189 | return -EBUSY; | ||
| 190 | |||
| 191 | return len; | ||
| 192 | } | ||
| 193 | |||
| 194 | /* Check whether insn is indirect jump */ | ||
| 195 | static int __kprobes insn_is_indirect_jump(struct insn *insn) | ||
| 196 | { | ||
| 197 | return ((insn->opcode.bytes[0] == 0xff && | ||
| 198 | (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ | ||
| 199 | insn->opcode.bytes[0] == 0xea); /* Segment based jump */ | ||
| 200 | } | ||
| 201 | |||
| 202 | /* Check whether insn jumps into specified address range */ | ||
| 203 | static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | ||
| 204 | { | ||
| 205 | unsigned long target = 0; | ||
| 206 | |||
| 207 | switch (insn->opcode.bytes[0]) { | ||
| 208 | case 0xe0: /* loopne */ | ||
| 209 | case 0xe1: /* loope */ | ||
| 210 | case 0xe2: /* loop */ | ||
| 211 | case 0xe3: /* jcxz */ | ||
| 212 | case 0xe9: /* near relative jump */ | ||
| 213 | case 0xeb: /* short relative jump */ | ||
| 214 | break; | ||
| 215 | case 0x0f: | ||
| 216 | if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ | ||
| 217 | break; | ||
| 218 | return 0; | ||
| 219 | default: | ||
| 220 | if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ | ||
| 221 | break; | ||
| 222 | return 0; | ||
| 223 | } | ||
| 224 | target = (unsigned long)insn->next_byte + insn->immediate.value; | ||
| 225 | |||
| 226 | return (start <= target && target <= start + len); | ||
| 227 | } | ||
| 228 | |||
| 229 | /* Decode whole function to ensure any instructions don't jump into target */ | ||
| 230 | static int __kprobes can_optimize(unsigned long paddr) | ||
| 231 | { | ||
| 232 | unsigned long addr, size = 0, offset = 0; | ||
| 233 | struct insn insn; | ||
| 234 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
| 235 | |||
| 236 | /* Lookup symbol including addr */ | ||
| 237 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) | ||
| 238 | return 0; | ||
| 239 | |||
| 240 | /* | ||
| 241 | * Do not optimize in the entry code due to the unstable | ||
| 242 | * stack handling. | ||
| 243 | */ | ||
| 244 | if ((paddr >= (unsigned long)__entry_text_start) && | ||
| 245 | (paddr < (unsigned long)__entry_text_end)) | ||
| 246 | return 0; | ||
| 247 | |||
| 248 | /* Check there is enough space for a relative jump. */ | ||
| 249 | if (size - offset < RELATIVEJUMP_SIZE) | ||
| 250 | return 0; | ||
| 251 | |||
| 252 | /* Decode instructions */ | ||
| 253 | addr = paddr - offset; | ||
| 254 | while (addr < paddr - offset + size) { /* Decode until function end */ | ||
| 255 | if (search_exception_tables(addr)) | ||
| 256 | /* | ||
| 257 | * Since some fixup code will jumps into this function, | ||
| 258 | * we can't optimize kprobe in this function. | ||
| 259 | */ | ||
| 260 | return 0; | ||
| 261 | kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); | ||
| 262 | insn_get_length(&insn); | ||
| 263 | /* Another subsystem puts a breakpoint */ | ||
| 264 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
| 265 | return 0; | ||
| 266 | /* Recover address */ | ||
| 267 | insn.kaddr = (void *)addr; | ||
| 268 | insn.next_byte = (void *)(addr + insn.length); | ||
| 269 | /* Check any instructions don't jump into target */ | ||
| 270 | if (insn_is_indirect_jump(&insn) || | ||
| 271 | insn_jump_into_range(&insn, paddr + INT3_SIZE, | ||
| 272 | RELATIVE_ADDR_SIZE)) | ||
| 273 | return 0; | ||
| 274 | addr += insn.length; | ||
| 275 | } | ||
| 276 | |||
| 277 | return 1; | ||
| 278 | } | ||
| 279 | |||
| 280 | /* Check optimized_kprobe can actually be optimized. */ | ||
| 281 | int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) | ||
| 282 | { | ||
| 283 | int i; | ||
| 284 | struct kprobe *p; | ||
| 285 | |||
| 286 | for (i = 1; i < op->optinsn.size; i++) { | ||
| 287 | p = get_kprobe(op->kp.addr + i); | ||
| 288 | if (p && !kprobe_disabled(p)) | ||
| 289 | return -EEXIST; | ||
| 290 | } | ||
| 291 | |||
| 292 | return 0; | ||
| 293 | } | ||
| 294 | |||
| 295 | /* Check the addr is within the optimized instructions. */ | ||
| 296 | int __kprobes | ||
| 297 | arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) | ||
| 298 | { | ||
| 299 | return ((unsigned long)op->kp.addr <= addr && | ||
| 300 | (unsigned long)op->kp.addr + op->optinsn.size > addr); | ||
| 301 | } | ||
| 302 | |||
| 303 | /* Free optimized instruction slot */ | ||
| 304 | static __kprobes | ||
| 305 | void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) | ||
| 306 | { | ||
| 307 | if (op->optinsn.insn) { | ||
| 308 | free_optinsn_slot(op->optinsn.insn, dirty); | ||
| 309 | op->optinsn.insn = NULL; | ||
| 310 | op->optinsn.size = 0; | ||
| 311 | } | ||
| 312 | } | ||
| 313 | |||
| 314 | void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) | ||
| 315 | { | ||
| 316 | __arch_remove_optimized_kprobe(op, 1); | ||
| 317 | } | ||
| 318 | |||
| 319 | /* | ||
| 320 | * Copy replacing target instructions | ||
| 321 | * Target instructions MUST be relocatable (checked inside) | ||
| 322 | * This is called when new aggr(opt)probe is allocated or reused. | ||
| 323 | */ | ||
| 324 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | ||
| 325 | { | ||
| 326 | u8 *buf; | ||
| 327 | int ret; | ||
| 328 | long rel; | ||
| 329 | |||
| 330 | if (!can_optimize((unsigned long)op->kp.addr)) | ||
| 331 | return -EILSEQ; | ||
| 332 | |||
| 333 | op->optinsn.insn = get_optinsn_slot(); | ||
| 334 | if (!op->optinsn.insn) | ||
| 335 | return -ENOMEM; | ||
| 336 | |||
| 337 | /* | ||
| 338 | * Verify if the address gap is in 2GB range, because this uses | ||
| 339 | * a relative jump. | ||
| 340 | */ | ||
| 341 | rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; | ||
| 342 | if (abs(rel) > 0x7fffffff) | ||
| 343 | return -ERANGE; | ||
| 344 | |||
| 345 | buf = (u8 *)op->optinsn.insn; | ||
| 346 | |||
| 347 | /* Copy instructions into the out-of-line buffer */ | ||
| 348 | ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); | ||
| 349 | if (ret < 0) { | ||
| 350 | __arch_remove_optimized_kprobe(op, 0); | ||
| 351 | return ret; | ||
| 352 | } | ||
| 353 | op->optinsn.size = ret; | ||
| 354 | |||
| 355 | /* Copy arch-dep-instance from template */ | ||
| 356 | memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); | ||
| 357 | |||
| 358 | /* Set probe information */ | ||
| 359 | synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); | ||
| 360 | |||
| 361 | /* Set probe function call */ | ||
| 362 | synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); | ||
| 363 | |||
| 364 | /* Set returning jmp instruction at the tail of out-of-line buffer */ | ||
| 365 | synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, | ||
| 366 | (u8 *)op->kp.addr + op->optinsn.size); | ||
| 367 | |||
| 368 | flush_icache_range((unsigned long) buf, | ||
| 369 | (unsigned long) buf + TMPL_END_IDX + | ||
| 370 | op->optinsn.size + RELATIVEJUMP_SIZE); | ||
| 371 | return 0; | ||
| 372 | } | ||
| 373 | |||
| 374 | #define MAX_OPTIMIZE_PROBES 256 | ||
| 375 | static struct text_poke_param *jump_poke_params; | ||
| 376 | static struct jump_poke_buffer { | ||
| 377 | u8 buf[RELATIVEJUMP_SIZE]; | ||
| 378 | } *jump_poke_bufs; | ||
| 379 | |||
| 380 | static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, | ||
| 381 | u8 *insn_buf, | ||
| 382 | struct optimized_kprobe *op) | ||
| 383 | { | ||
| 384 | s32 rel = (s32)((long)op->optinsn.insn - | ||
| 385 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | ||
| 386 | |||
| 387 | /* Backup instructions which will be replaced by jump address */ | ||
| 388 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | ||
| 389 | RELATIVE_ADDR_SIZE); | ||
| 390 | |||
| 391 | insn_buf[0] = RELATIVEJUMP_OPCODE; | ||
| 392 | *(s32 *)(&insn_buf[1]) = rel; | ||
| 393 | |||
| 394 | tprm->addr = op->kp.addr; | ||
| 395 | tprm->opcode = insn_buf; | ||
| 396 | tprm->len = RELATIVEJUMP_SIZE; | ||
| 397 | } | ||
| 398 | |||
| 399 | /* | ||
| 400 | * Replace breakpoints (int3) with relative jumps. | ||
| 401 | * Caller must call with locking kprobe_mutex and text_mutex. | ||
| 402 | */ | ||
| 403 | void __kprobes arch_optimize_kprobes(struct list_head *oplist) | ||
| 404 | { | ||
| 405 | struct optimized_kprobe *op, *tmp; | ||
| 406 | int c = 0; | ||
| 407 | |||
| 408 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
| 409 | WARN_ON(kprobe_disabled(&op->kp)); | ||
| 410 | /* Setup param */ | ||
| 411 | setup_optimize_kprobe(&jump_poke_params[c], | ||
| 412 | jump_poke_bufs[c].buf, op); | ||
| 413 | list_del_init(&op->list); | ||
| 414 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
| 415 | break; | ||
| 416 | } | ||
| 417 | |||
| 418 | /* | ||
| 419 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
| 420 | * However, since kprobes itself also doesn't support NMI/MCE | ||
| 421 | * code probing, it's not a problem. | ||
| 422 | */ | ||
| 423 | text_poke_smp_batch(jump_poke_params, c); | ||
| 424 | } | ||
| 425 | |||
| 426 | static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, | ||
| 427 | u8 *insn_buf, | ||
| 428 | struct optimized_kprobe *op) | ||
| 429 | { | ||
| 430 | /* Set int3 to first byte for kprobes */ | ||
| 431 | insn_buf[0] = BREAKPOINT_INSTRUCTION; | ||
| 432 | memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
| 433 | |||
| 434 | tprm->addr = op->kp.addr; | ||
| 435 | tprm->opcode = insn_buf; | ||
| 436 | tprm->len = RELATIVEJUMP_SIZE; | ||
| 437 | } | ||
| 438 | |||
| 439 | /* | ||
| 440 | * Recover original instructions and breakpoints from relative jumps. | ||
| 441 | * Caller must call with locking kprobe_mutex. | ||
| 442 | */ | ||
| 443 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | ||
| 444 | struct list_head *done_list) | ||
| 445 | { | ||
| 446 | struct optimized_kprobe *op, *tmp; | ||
| 447 | int c = 0; | ||
| 448 | |||
| 449 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
| 450 | /* Setup param */ | ||
| 451 | setup_unoptimize_kprobe(&jump_poke_params[c], | ||
| 452 | jump_poke_bufs[c].buf, op); | ||
| 453 | list_move(&op->list, done_list); | ||
| 454 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
| 455 | break; | ||
| 456 | } | ||
| 457 | |||
| 458 | /* | ||
| 459 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
| 460 | * However, since kprobes itself also doesn't support NMI/MCE | ||
| 461 | * code probing, it's not a problem. | ||
| 462 | */ | ||
| 463 | text_poke_smp_batch(jump_poke_params, c); | ||
| 464 | } | ||
| 465 | |||
| 466 | /* Replace a relative jump with a breakpoint (int3). */ | ||
| 467 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) | ||
| 468 | { | ||
| 469 | u8 buf[RELATIVEJUMP_SIZE]; | ||
| 470 | |||
| 471 | /* Set int3 to first byte for kprobes */ | ||
| 472 | buf[0] = BREAKPOINT_INSTRUCTION; | ||
| 473 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
| 474 | text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); | ||
| 475 | } | ||
| 476 | |||
| 477 | int __kprobes | ||
| 478 | setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) | ||
| 479 | { | ||
| 480 | struct optimized_kprobe *op; | ||
| 481 | |||
| 482 | if (p->flags & KPROBE_FLAG_OPTIMIZED) { | ||
| 483 | /* This kprobe is really able to run optimized path. */ | ||
| 484 | op = container_of(p, struct optimized_kprobe, kp); | ||
| 485 | /* Detour through copied instructions */ | ||
| 486 | regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; | ||
| 487 | if (!reenter) | ||
| 488 | reset_current_kprobe(); | ||
| 489 | preempt_enable_no_resched(); | ||
| 490 | return 1; | ||
| 491 | } | ||
| 492 | return 0; | ||
| 493 | } | ||
| 494 | |||
| 495 | int __kprobes arch_init_optprobes(void) | ||
| 496 | { | ||
| 497 | /* Allocate code buffer and parameter array */ | ||
| 498 | jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * | ||
| 499 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
| 500 | if (!jump_poke_bufs) | ||
| 501 | return -ENOMEM; | ||
| 502 | |||
| 503 | jump_poke_params = kmalloc(sizeof(struct text_poke_param) * | ||
| 504 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
| 505 | if (!jump_poke_params) { | ||
| 506 | kfree(jump_poke_bufs); | ||
| 507 | jump_poke_bufs = NULL; | ||
| 508 | return -ENOMEM; | ||
| 509 | } | ||
| 510 | |||
| 511 | return 0; | ||
| 512 | } | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7da647d8b64c..e213fc8408d2 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
| @@ -30,16 +30,15 @@ | |||
| 30 | * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi | 30 | * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi |
| 31 | * <prasanna@in.ibm.com> added function-return probes. | 31 | * <prasanna@in.ibm.com> added function-return probes. |
| 32 | * 2005-May Rusty Lynch <rusty.lynch@intel.com> | 32 | * 2005-May Rusty Lynch <rusty.lynch@intel.com> |
| 33 | * Added function return probes functionality | 33 | * Added function return probes functionality |
| 34 | * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added | 34 | * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added |
| 35 | * kprobe-booster and kretprobe-booster for i386. | 35 | * kprobe-booster and kretprobe-booster for i386. |
| 36 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster | 36 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster |
| 37 | * and kretprobe-booster for x86-64 | 37 | * and kretprobe-booster for x86-64 |
| 38 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven | 38 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven |
| 39 | * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> | 39 | * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> |
| 40 | * unified x86 kprobes code. | 40 | * unified x86 kprobes code. |
| 41 | */ | 41 | */ |
| 42 | |||
| 43 | #include <linux/kprobes.h> | 42 | #include <linux/kprobes.h> |
| 44 | #include <linux/ptrace.h> | 43 | #include <linux/ptrace.h> |
| 45 | #include <linux/string.h> | 44 | #include <linux/string.h> |
| @@ -59,6 +58,8 @@ | |||
| 59 | #include <asm/insn.h> | 58 | #include <asm/insn.h> |
| 60 | #include <asm/debugreg.h> | 59 | #include <asm/debugreg.h> |
| 61 | 60 | ||
| 61 | #include "kprobes-common.h" | ||
| 62 | |||
| 62 | void jprobe_return_end(void); | 63 | void jprobe_return_end(void); |
| 63 | 64 | ||
| 64 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | 65 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; |
| @@ -108,6 +109,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { | |||
| 108 | doesn't switch kernel stack.*/ | 109 | doesn't switch kernel stack.*/ |
| 109 | {NULL, NULL} /* Terminator */ | 110 | {NULL, NULL} /* Terminator */ |
| 110 | }; | 111 | }; |
| 112 | |||
| 111 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); | 113 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); |
| 112 | 114 | ||
| 113 | static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) | 115 | static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) |
| @@ -123,11 +125,17 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) | |||
| 123 | } | 125 | } |
| 124 | 126 | ||
| 125 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | 127 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ |
| 126 | static void __kprobes synthesize_reljump(void *from, void *to) | 128 | void __kprobes synthesize_reljump(void *from, void *to) |
| 127 | { | 129 | { |
| 128 | __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); | 130 | __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); |
| 129 | } | 131 | } |
| 130 | 132 | ||
| 133 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ | ||
| 134 | void __kprobes synthesize_relcall(void *from, void *to) | ||
| 135 | { | ||
| 136 | __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); | ||
| 137 | } | ||
| 138 | |||
| 131 | /* | 139 | /* |
| 132 | * Skip the prefixes of the instruction. | 140 | * Skip the prefixes of the instruction. |
| 133 | */ | 141 | */ |
| @@ -151,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) | |||
| 151 | * Returns non-zero if opcode is boostable. | 159 | * Returns non-zero if opcode is boostable. |
| 152 | * RIP relative instructions are adjusted at copying time in 64 bits mode | 160 | * RIP relative instructions are adjusted at copying time in 64 bits mode |
| 153 | */ | 161 | */ |
| 154 | static int __kprobes can_boost(kprobe_opcode_t *opcodes) | 162 | int __kprobes can_boost(kprobe_opcode_t *opcodes) |
| 155 | { | 163 | { |
| 156 | kprobe_opcode_t opcode; | 164 | kprobe_opcode_t opcode; |
| 157 | kprobe_opcode_t *orig_opcodes = opcodes; | 165 | kprobe_opcode_t *orig_opcodes = opcodes; |
| @@ -207,13 +215,15 @@ retry: | |||
| 207 | } | 215 | } |
| 208 | } | 216 | } |
| 209 | 217 | ||
| 210 | /* Recover the probed instruction at addr for further analysis. */ | 218 | static unsigned long |
| 211 | static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | 219 | __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) |
| 212 | { | 220 | { |
| 213 | struct kprobe *kp; | 221 | struct kprobe *kp; |
| 222 | |||
| 214 | kp = get_kprobe((void *)addr); | 223 | kp = get_kprobe((void *)addr); |
| 224 | /* There is no probe, return original address */ | ||
| 215 | if (!kp) | 225 | if (!kp) |
| 216 | return -EINVAL; | 226 | return addr; |
| 217 | 227 | ||
| 218 | /* | 228 | /* |
| 219 | * Basically, kp->ainsn.insn has an original instruction. | 229 | * Basically, kp->ainsn.insn has an original instruction. |
| @@ -230,14 +240,29 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | |||
| 230 | */ | 240 | */ |
| 231 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | 241 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); |
| 232 | buf[0] = kp->opcode; | 242 | buf[0] = kp->opcode; |
| 233 | return 0; | 243 | return (unsigned long)buf; |
| 244 | } | ||
| 245 | |||
| 246 | /* | ||
| 247 | * Recover the probed instruction at addr for further analysis. | ||
| 248 | * Caller must lock kprobes by kprobe_mutex, or disable preemption | ||
| 249 | * for preventing to release referencing kprobes. | ||
| 250 | */ | ||
| 251 | unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | ||
| 252 | { | ||
| 253 | unsigned long __addr; | ||
| 254 | |||
| 255 | __addr = __recover_optprobed_insn(buf, addr); | ||
| 256 | if (__addr != addr) | ||
| 257 | return __addr; | ||
| 258 | |||
| 259 | return __recover_probed_insn(buf, addr); | ||
| 234 | } | 260 | } |
| 235 | 261 | ||
| 236 | /* Check if paddr is at an instruction boundary */ | 262 | /* Check if paddr is at an instruction boundary */ |
| 237 | static int __kprobes can_probe(unsigned long paddr) | 263 | static int __kprobes can_probe(unsigned long paddr) |
| 238 | { | 264 | { |
| 239 | int ret; | 265 | unsigned long addr, __addr, offset = 0; |
| 240 | unsigned long addr, offset = 0; | ||
| 241 | struct insn insn; | 266 | struct insn insn; |
| 242 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 267 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
| 243 | 268 | ||
| @@ -247,26 +272,24 @@ static int __kprobes can_probe(unsigned long paddr) | |||
| 247 | /* Decode instructions */ | 272 | /* Decode instructions */ |
| 248 | addr = paddr - offset; | 273 | addr = paddr - offset; |
| 249 | while (addr < paddr) { | 274 | while (addr < paddr) { |
| 250 | kernel_insn_init(&insn, (void *)addr); | ||
| 251 | insn_get_opcode(&insn); | ||
| 252 | |||
| 253 | /* | 275 | /* |
| 254 | * Check if the instruction has been modified by another | 276 | * Check if the instruction has been modified by another |
| 255 | * kprobe, in which case we replace the breakpoint by the | 277 | * kprobe, in which case we replace the breakpoint by the |
| 256 | * original instruction in our buffer. | 278 | * original instruction in our buffer. |
| 279 | * Also, jump optimization will change the breakpoint to | ||
| 280 | * relative-jump. Since the relative-jump itself is | ||
| 281 | * normally used, we just go through if there is no kprobe. | ||
| 257 | */ | 282 | */ |
| 258 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | 283 | __addr = recover_probed_instruction(buf, addr); |
| 259 | ret = recover_probed_instruction(buf, addr); | 284 | kernel_insn_init(&insn, (void *)__addr); |
| 260 | if (ret) | ||
| 261 | /* | ||
| 262 | * Another debugging subsystem might insert | ||
| 263 | * this breakpoint. In that case, we can't | ||
| 264 | * recover it. | ||
| 265 | */ | ||
| 266 | return 0; | ||
| 267 | kernel_insn_init(&insn, buf); | ||
| 268 | } | ||
| 269 | insn_get_length(&insn); | 285 | insn_get_length(&insn); |
| 286 | |||
| 287 | /* | ||
| 288 | * Another debugging subsystem might insert this breakpoint. | ||
| 289 | * In that case, we can't recover it. | ||
| 290 | */ | ||
| 291 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
| 292 | return 0; | ||
| 270 | addr += insn.length; | 293 | addr += insn.length; |
| 271 | } | 294 | } |
| 272 | 295 | ||
| @@ -299,24 +322,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
| 299 | * If not, return null. | 322 | * If not, return null. |
| 300 | * Only applicable to 64-bit x86. | 323 | * Only applicable to 64-bit x86. |
| 301 | */ | 324 | */ |
| 302 | static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | 325 | int __kprobes __copy_instruction(u8 *dest, u8 *src) |
| 303 | { | 326 | { |
| 304 | struct insn insn; | 327 | struct insn insn; |
| 305 | int ret; | ||
| 306 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 328 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
| 307 | 329 | ||
| 308 | kernel_insn_init(&insn, src); | 330 | kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); |
| 309 | if (recover) { | ||
| 310 | insn_get_opcode(&insn); | ||
| 311 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
| 312 | ret = recover_probed_instruction(buf, | ||
| 313 | (unsigned long)src); | ||
| 314 | if (ret) | ||
| 315 | return 0; | ||
| 316 | kernel_insn_init(&insn, buf); | ||
| 317 | } | ||
| 318 | } | ||
| 319 | insn_get_length(&insn); | 331 | insn_get_length(&insn); |
| 332 | /* Another subsystem puts a breakpoint, failed to recover */ | ||
| 333 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
| 334 | return 0; | ||
| 320 | memcpy(dest, insn.kaddr, insn.length); | 335 | memcpy(dest, insn.kaddr, insn.length); |
| 321 | 336 | ||
| 322 | #ifdef CONFIG_X86_64 | 337 | #ifdef CONFIG_X86_64 |
| @@ -337,8 +352,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | |||
| 337 | * extension of the original signed 32-bit displacement would | 352 | * extension of the original signed 32-bit displacement would |
| 338 | * have given. | 353 | * have given. |
| 339 | */ | 354 | */ |
| 340 | newdisp = (u8 *) src + (s64) insn.displacement.value - | 355 | newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; |
| 341 | (u8 *) dest; | ||
| 342 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ | 356 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
| 343 | disp = (u8 *) dest + insn_offset_displacement(&insn); | 357 | disp = (u8 *) dest + insn_offset_displacement(&insn); |
| 344 | *(s32 *) disp = (s32) newdisp; | 358 | *(s32 *) disp = (s32) newdisp; |
| @@ -349,18 +363,20 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | |||
| 349 | 363 | ||
| 350 | static void __kprobes arch_copy_kprobe(struct kprobe *p) | 364 | static void __kprobes arch_copy_kprobe(struct kprobe *p) |
| 351 | { | 365 | { |
| 366 | /* Copy an instruction with recovering if other optprobe modifies it.*/ | ||
| 367 | __copy_instruction(p->ainsn.insn, p->addr); | ||
| 368 | |||
| 352 | /* | 369 | /* |
| 353 | * Copy an instruction without recovering int3, because it will be | 370 | * __copy_instruction can modify the displacement of the instruction, |
| 354 | * put by another subsystem. | 371 | * but it doesn't affect boostable check. |
| 355 | */ | 372 | */ |
| 356 | __copy_instruction(p->ainsn.insn, p->addr, 0); | 373 | if (can_boost(p->ainsn.insn)) |
| 357 | |||
| 358 | if (can_boost(p->addr)) | ||
| 359 | p->ainsn.boostable = 0; | 374 | p->ainsn.boostable = 0; |
| 360 | else | 375 | else |
| 361 | p->ainsn.boostable = -1; | 376 | p->ainsn.boostable = -1; |
| 362 | 377 | ||
| 363 | p->opcode = *p->addr; | 378 | /* Also, displacement change doesn't affect the first byte */ |
| 379 | p->opcode = p->ainsn.insn[0]; | ||
| 364 | } | 380 | } |
| 365 | 381 | ||
| 366 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 382 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
| @@ -442,8 +458,8 @@ static void __kprobes restore_btf(void) | |||
| 442 | } | 458 | } |
| 443 | } | 459 | } |
| 444 | 460 | ||
| 445 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 461 | void __kprobes |
| 446 | struct pt_regs *regs) | 462 | arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) |
| 447 | { | 463 | { |
| 448 | unsigned long *sara = stack_addr(regs); | 464 | unsigned long *sara = stack_addr(regs); |
| 449 | 465 | ||
| @@ -453,16 +469,8 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | |||
| 453 | *sara = (unsigned long) &kretprobe_trampoline; | 469 | *sara = (unsigned long) &kretprobe_trampoline; |
| 454 | } | 470 | } |
| 455 | 471 | ||
| 456 | #ifdef CONFIG_OPTPROBES | 472 | static void __kprobes |
| 457 | static int __kprobes setup_detour_execution(struct kprobe *p, | 473 | setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) |
| 458 | struct pt_regs *regs, | ||
| 459 | int reenter); | ||
| 460 | #else | ||
| 461 | #define setup_detour_execution(p, regs, reenter) (0) | ||
| 462 | #endif | ||
| 463 | |||
| 464 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | ||
| 465 | struct kprobe_ctlblk *kcb, int reenter) | ||
| 466 | { | 474 | { |
| 467 | if (setup_detour_execution(p, regs, reenter)) | 475 | if (setup_detour_execution(p, regs, reenter)) |
| 468 | return; | 476 | return; |
| @@ -504,8 +512,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | |||
| 504 | * within the handler. We save the original kprobes variables and just single | 512 | * within the handler. We save the original kprobes variables and just single |
| 505 | * step on the instruction of the new probe without calling any user handlers. | 513 | * step on the instruction of the new probe without calling any user handlers. |
| 506 | */ | 514 | */ |
| 507 | static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | 515 | static int __kprobes |
| 508 | struct kprobe_ctlblk *kcb) | 516 | reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) |
| 509 | { | 517 | { |
| 510 | switch (kcb->kprobe_status) { | 518 | switch (kcb->kprobe_status) { |
| 511 | case KPROBE_HIT_SSDONE: | 519 | case KPROBE_HIT_SSDONE: |
| @@ -600,69 +608,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
| 600 | return 0; | 608 | return 0; |
| 601 | } | 609 | } |
| 602 | 610 | ||
| 603 | #ifdef CONFIG_X86_64 | ||
| 604 | #define SAVE_REGS_STRING \ | ||
| 605 | /* Skip cs, ip, orig_ax. */ \ | ||
| 606 | " subq $24, %rsp\n" \ | ||
| 607 | " pushq %rdi\n" \ | ||
| 608 | " pushq %rsi\n" \ | ||
| 609 | " pushq %rdx\n" \ | ||
| 610 | " pushq %rcx\n" \ | ||
| 611 | " pushq %rax\n" \ | ||
| 612 | " pushq %r8\n" \ | ||
| 613 | " pushq %r9\n" \ | ||
| 614 | " pushq %r10\n" \ | ||
| 615 | " pushq %r11\n" \ | ||
| 616 | " pushq %rbx\n" \ | ||
| 617 | " pushq %rbp\n" \ | ||
| 618 | " pushq %r12\n" \ | ||
| 619 | " pushq %r13\n" \ | ||
| 620 | " pushq %r14\n" \ | ||
| 621 | " pushq %r15\n" | ||
| 622 | #define RESTORE_REGS_STRING \ | ||
| 623 | " popq %r15\n" \ | ||
| 624 | " popq %r14\n" \ | ||
| 625 | " popq %r13\n" \ | ||
| 626 | " popq %r12\n" \ | ||
| 627 | " popq %rbp\n" \ | ||
| 628 | " popq %rbx\n" \ | ||
| 629 | " popq %r11\n" \ | ||
| 630 | " popq %r10\n" \ | ||
| 631 | " popq %r9\n" \ | ||
| 632 | " popq %r8\n" \ | ||
| 633 | " popq %rax\n" \ | ||
| 634 | " popq %rcx\n" \ | ||
| 635 | " popq %rdx\n" \ | ||
| 636 | " popq %rsi\n" \ | ||
| 637 | " popq %rdi\n" \ | ||
| 638 | /* Skip orig_ax, ip, cs */ \ | ||
| 639 | " addq $24, %rsp\n" | ||
| 640 | #else | ||
| 641 | #define SAVE_REGS_STRING \ | ||
| 642 | /* Skip cs, ip, orig_ax and gs. */ \ | ||
| 643 | " subl $16, %esp\n" \ | ||
| 644 | " pushl %fs\n" \ | ||
| 645 | " pushl %es\n" \ | ||
| 646 | " pushl %ds\n" \ | ||
| 647 | " pushl %eax\n" \ | ||
| 648 | " pushl %ebp\n" \ | ||
| 649 | " pushl %edi\n" \ | ||
| 650 | " pushl %esi\n" \ | ||
| 651 | " pushl %edx\n" \ | ||
| 652 | " pushl %ecx\n" \ | ||
| 653 | " pushl %ebx\n" | ||
| 654 | #define RESTORE_REGS_STRING \ | ||
| 655 | " popl %ebx\n" \ | ||
| 656 | " popl %ecx\n" \ | ||
| 657 | " popl %edx\n" \ | ||
| 658 | " popl %esi\n" \ | ||
| 659 | " popl %edi\n" \ | ||
| 660 | " popl %ebp\n" \ | ||
| 661 | " popl %eax\n" \ | ||
| 662 | /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ | ||
| 663 | " addl $24, %esp\n" | ||
| 664 | #endif | ||
| 665 | |||
| 666 | /* | 611 | /* |
| 667 | * When a retprobed function returns, this code saves registers and | 612 | * When a retprobed function returns, this code saves registers and |
| 668 | * calls trampoline_handler() runs, which calls the kretprobe's handler. | 613 | * calls trampoline_handler() runs, which calls the kretprobe's handler. |
| @@ -816,8 +761,8 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
| 816 | * jump instruction after the copied instruction, that jumps to the next | 761 | * jump instruction after the copied instruction, that jumps to the next |
| 817 | * instruction after the probepoint. | 762 | * instruction after the probepoint. |
| 818 | */ | 763 | */ |
| 819 | static void __kprobes resume_execution(struct kprobe *p, | 764 | static void __kprobes |
| 820 | struct pt_regs *regs, struct kprobe_ctlblk *kcb) | 765 | resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) |
| 821 | { | 766 | { |
| 822 | unsigned long *tos = stack_addr(regs); | 767 | unsigned long *tos = stack_addr(regs); |
| 823 | unsigned long copy_ip = (unsigned long)p->ainsn.insn; | 768 | unsigned long copy_ip = (unsigned long)p->ainsn.insn; |
| @@ -996,8 +941,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) | |||
| 996 | /* | 941 | /* |
| 997 | * Wrapper routine for handling exceptions. | 942 | * Wrapper routine for handling exceptions. |
| 998 | */ | 943 | */ |
| 999 | int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | 944 | int __kprobes |
| 1000 | unsigned long val, void *data) | 945 | kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) |
| 1001 | { | 946 | { |
| 1002 | struct die_args *args = data; | 947 | struct die_args *args = data; |
| 1003 | int ret = NOTIFY_DONE; | 948 | int ret = NOTIFY_DONE; |
| @@ -1107,466 +1052,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
| 1107 | return 0; | 1052 | return 0; |
| 1108 | } | 1053 | } |
| 1109 | 1054 | ||
| 1110 | |||
| 1111 | #ifdef CONFIG_OPTPROBES | ||
| 1112 | |||
| 1113 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ | ||
| 1114 | static void __kprobes synthesize_relcall(void *from, void *to) | ||
| 1115 | { | ||
| 1116 | __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ | ||
| 1120 | static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, | ||
| 1121 | unsigned long val) | ||
| 1122 | { | ||
| 1123 | #ifdef CONFIG_X86_64 | ||
| 1124 | *addr++ = 0x48; | ||
| 1125 | *addr++ = 0xbf; | ||
| 1126 | #else | ||
| 1127 | *addr++ = 0xb8; | ||
| 1128 | #endif | ||
| 1129 | *(unsigned long *)addr = val; | ||
| 1130 | } | ||
| 1131 | |||
| 1132 | static void __used __kprobes kprobes_optinsn_template_holder(void) | ||
| 1133 | { | ||
| 1134 | asm volatile ( | ||
| 1135 | ".global optprobe_template_entry\n" | ||
| 1136 | "optprobe_template_entry: \n" | ||
| 1137 | #ifdef CONFIG_X86_64 | ||
| 1138 | /* We don't bother saving the ss register */ | ||
| 1139 | " pushq %rsp\n" | ||
| 1140 | " pushfq\n" | ||
| 1141 | SAVE_REGS_STRING | ||
| 1142 | " movq %rsp, %rsi\n" | ||
| 1143 | ".global optprobe_template_val\n" | ||
| 1144 | "optprobe_template_val: \n" | ||
| 1145 | ASM_NOP5 | ||
| 1146 | ASM_NOP5 | ||
| 1147 | ".global optprobe_template_call\n" | ||
| 1148 | "optprobe_template_call: \n" | ||
| 1149 | ASM_NOP5 | ||
| 1150 | /* Move flags to rsp */ | ||
| 1151 | " movq 144(%rsp), %rdx\n" | ||
| 1152 | " movq %rdx, 152(%rsp)\n" | ||
| 1153 | RESTORE_REGS_STRING | ||
| 1154 | /* Skip flags entry */ | ||
| 1155 | " addq $8, %rsp\n" | ||
| 1156 | " popfq\n" | ||
| 1157 | #else /* CONFIG_X86_32 */ | ||
| 1158 | " pushf\n" | ||
| 1159 | SAVE_REGS_STRING | ||
| 1160 | " movl %esp, %edx\n" | ||
| 1161 | ".global optprobe_template_val\n" | ||
| 1162 | "optprobe_template_val: \n" | ||
| 1163 | ASM_NOP5 | ||
| 1164 | ".global optprobe_template_call\n" | ||
| 1165 | "optprobe_template_call: \n" | ||
| 1166 | ASM_NOP5 | ||
| 1167 | RESTORE_REGS_STRING | ||
| 1168 | " addl $4, %esp\n" /* skip cs */ | ||
| 1169 | " popf\n" | ||
| 1170 | #endif | ||
| 1171 | ".global optprobe_template_end\n" | ||
| 1172 | "optprobe_template_end: \n"); | ||
| 1173 | } | ||
| 1174 | |||
| 1175 | #define TMPL_MOVE_IDX \ | ||
| 1176 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) | ||
| 1177 | #define TMPL_CALL_IDX \ | ||
| 1178 | ((long)&optprobe_template_call - (long)&optprobe_template_entry) | ||
| 1179 | #define TMPL_END_IDX \ | ||
| 1180 | ((long)&optprobe_template_end - (long)&optprobe_template_entry) | ||
| 1181 | |||
| 1182 | #define INT3_SIZE sizeof(kprobe_opcode_t) | ||
| 1183 | |||
| 1184 | /* Optimized kprobe call back function: called from optinsn */ | ||
| 1185 | static void __kprobes optimized_callback(struct optimized_kprobe *op, | ||
| 1186 | struct pt_regs *regs) | ||
| 1187 | { | ||
| 1188 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
| 1189 | unsigned long flags; | ||
| 1190 | |||
| 1191 | /* This is possible if op is under delayed unoptimizing */ | ||
| 1192 | if (kprobe_disabled(&op->kp)) | ||
| 1193 | return; | ||
| 1194 | |||
| 1195 | local_irq_save(flags); | ||
| 1196 | if (kprobe_running()) { | ||
| 1197 | kprobes_inc_nmissed_count(&op->kp); | ||
| 1198 | } else { | ||
| 1199 | /* Save skipped registers */ | ||
| 1200 | #ifdef CONFIG_X86_64 | ||
| 1201 | regs->cs = __KERNEL_CS; | ||
| 1202 | #else | ||
| 1203 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
| 1204 | regs->gs = 0; | ||
| 1205 | #endif | ||
| 1206 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; | ||
| 1207 | regs->orig_ax = ~0UL; | ||
| 1208 | |||
| 1209 | __this_cpu_write(current_kprobe, &op->kp); | ||
| 1210 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
| 1211 | opt_pre_handler(&op->kp, regs); | ||
| 1212 | __this_cpu_write(current_kprobe, NULL); | ||
| 1213 | } | ||
| 1214 | local_irq_restore(flags); | ||
| 1215 | } | ||
| 1216 | |||
| 1217 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | ||
| 1218 | { | ||
| 1219 | int len = 0, ret; | ||
| 1220 | |||
| 1221 | while (len < RELATIVEJUMP_SIZE) { | ||
| 1222 | ret = __copy_instruction(dest + len, src + len, 1); | ||
| 1223 | if (!ret || !can_boost(dest + len)) | ||
| 1224 | return -EINVAL; | ||
| 1225 | len += ret; | ||
| 1226 | } | ||
| 1227 | /* Check whether the address range is reserved */ | ||
| 1228 | if (ftrace_text_reserved(src, src + len - 1) || | ||
| 1229 | alternatives_text_reserved(src, src + len - 1) || | ||
| 1230 | jump_label_text_reserved(src, src + len - 1)) | ||
| 1231 | return -EBUSY; | ||
| 1232 | |||
| 1233 | return len; | ||
| 1234 | } | ||
| 1235 | |||
| 1236 | /* Check whether insn is indirect jump */ | ||
| 1237 | static int __kprobes insn_is_indirect_jump(struct insn *insn) | ||
| 1238 | { | ||
| 1239 | return ((insn->opcode.bytes[0] == 0xff && | ||
| 1240 | (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ | ||
| 1241 | insn->opcode.bytes[0] == 0xea); /* Segment based jump */ | ||
| 1242 | } | ||
| 1243 | |||
| 1244 | /* Check whether insn jumps into specified address range */ | ||
| 1245 | static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | ||
| 1246 | { | ||
| 1247 | unsigned long target = 0; | ||
| 1248 | |||
| 1249 | switch (insn->opcode.bytes[0]) { | ||
| 1250 | case 0xe0: /* loopne */ | ||
| 1251 | case 0xe1: /* loope */ | ||
| 1252 | case 0xe2: /* loop */ | ||
| 1253 | case 0xe3: /* jcxz */ | ||
| 1254 | case 0xe9: /* near relative jump */ | ||
| 1255 | case 0xeb: /* short relative jump */ | ||
| 1256 | break; | ||
| 1257 | case 0x0f: | ||
| 1258 | if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ | ||
| 1259 | break; | ||
| 1260 | return 0; | ||
| 1261 | default: | ||
| 1262 | if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ | ||
| 1263 | break; | ||
| 1264 | return 0; | ||
| 1265 | } | ||
| 1266 | target = (unsigned long)insn->next_byte + insn->immediate.value; | ||
| 1267 | |||
| 1268 | return (start <= target && target <= start + len); | ||
| 1269 | } | ||
| 1270 | |||
| 1271 | /* Decode whole function to ensure any instructions don't jump into target */ | ||
| 1272 | static int __kprobes can_optimize(unsigned long paddr) | ||
| 1273 | { | ||
| 1274 | int ret; | ||
| 1275 | unsigned long addr, size = 0, offset = 0; | ||
| 1276 | struct insn insn; | ||
| 1277 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
| 1278 | |||
| 1279 | /* Lookup symbol including addr */ | ||
| 1280 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) | ||
| 1281 | return 0; | ||
| 1282 | |||
| 1283 | /* | ||
| 1284 | * Do not optimize in the entry code due to the unstable | ||
| 1285 | * stack handling. | ||
| 1286 | */ | ||
| 1287 | if ((paddr >= (unsigned long )__entry_text_start) && | ||
| 1288 | (paddr < (unsigned long )__entry_text_end)) | ||
| 1289 | return 0; | ||
| 1290 | |||
| 1291 | /* Check there is enough space for a relative jump. */ | ||
| 1292 | if (size - offset < RELATIVEJUMP_SIZE) | ||
| 1293 | return 0; | ||
| 1294 | |||
| 1295 | /* Decode instructions */ | ||
| 1296 | addr = paddr - offset; | ||
| 1297 | while (addr < paddr - offset + size) { /* Decode until function end */ | ||
| 1298 | if (search_exception_tables(addr)) | ||
| 1299 | /* | ||
| 1300 | * Since some fixup code will jumps into this function, | ||
| 1301 | * we can't optimize kprobe in this function. | ||
| 1302 | */ | ||
| 1303 | return 0; | ||
| 1304 | kernel_insn_init(&insn, (void *)addr); | ||
| 1305 | insn_get_opcode(&insn); | ||
| 1306 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
| 1307 | ret = recover_probed_instruction(buf, addr); | ||
| 1308 | if (ret) | ||
| 1309 | return 0; | ||
| 1310 | kernel_insn_init(&insn, buf); | ||
| 1311 | } | ||
| 1312 | insn_get_length(&insn); | ||
| 1313 | /* Recover address */ | ||
| 1314 | insn.kaddr = (void *)addr; | ||
| 1315 | insn.next_byte = (void *)(addr + insn.length); | ||
| 1316 | /* Check any instructions don't jump into target */ | ||
| 1317 | if (insn_is_indirect_jump(&insn) || | ||
| 1318 | insn_jump_into_range(&insn, paddr + INT3_SIZE, | ||
| 1319 | RELATIVE_ADDR_SIZE)) | ||
| 1320 | return 0; | ||
| 1321 | addr += insn.length; | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | return 1; | ||
| 1325 | } | ||
| 1326 | |||
| 1327 | /* Check optimized_kprobe can actually be optimized. */ | ||
| 1328 | int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) | ||
| 1329 | { | ||
| 1330 | int i; | ||
| 1331 | struct kprobe *p; | ||
| 1332 | |||
| 1333 | for (i = 1; i < op->optinsn.size; i++) { | ||
| 1334 | p = get_kprobe(op->kp.addr + i); | ||
| 1335 | if (p && !kprobe_disabled(p)) | ||
| 1336 | return -EEXIST; | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | return 0; | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | /* Check the addr is within the optimized instructions. */ | ||
| 1343 | int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op, | ||
| 1344 | unsigned long addr) | ||
| 1345 | { | ||
| 1346 | return ((unsigned long)op->kp.addr <= addr && | ||
| 1347 | (unsigned long)op->kp.addr + op->optinsn.size > addr); | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | /* Free optimized instruction slot */ | ||
| 1351 | static __kprobes | ||
| 1352 | void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) | ||
| 1353 | { | ||
| 1354 | if (op->optinsn.insn) { | ||
| 1355 | free_optinsn_slot(op->optinsn.insn, dirty); | ||
| 1356 | op->optinsn.insn = NULL; | ||
| 1357 | op->optinsn.size = 0; | ||
| 1358 | } | ||
| 1359 | } | ||
| 1360 | |||
| 1361 | void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) | ||
| 1362 | { | ||
| 1363 | __arch_remove_optimized_kprobe(op, 1); | ||
| 1364 | } | ||
| 1365 | |||
| 1366 | /* | ||
| 1367 | * Copy replacing target instructions | ||
| 1368 | * Target instructions MUST be relocatable (checked inside) | ||
| 1369 | */ | ||
| 1370 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | ||
| 1371 | { | ||
| 1372 | u8 *buf; | ||
| 1373 | int ret; | ||
| 1374 | long rel; | ||
| 1375 | |||
| 1376 | if (!can_optimize((unsigned long)op->kp.addr)) | ||
| 1377 | return -EILSEQ; | ||
| 1378 | |||
| 1379 | op->optinsn.insn = get_optinsn_slot(); | ||
| 1380 | if (!op->optinsn.insn) | ||
| 1381 | return -ENOMEM; | ||
| 1382 | |||
| 1383 | /* | ||
| 1384 | * Verify if the address gap is in 2GB range, because this uses | ||
| 1385 | * a relative jump. | ||
| 1386 | */ | ||
| 1387 | rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; | ||
| 1388 | if (abs(rel) > 0x7fffffff) | ||
| 1389 | return -ERANGE; | ||
| 1390 | |||
| 1391 | buf = (u8 *)op->optinsn.insn; | ||
| 1392 | |||
| 1393 | /* Copy instructions into the out-of-line buffer */ | ||
| 1394 | ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); | ||
| 1395 | if (ret < 0) { | ||
| 1396 | __arch_remove_optimized_kprobe(op, 0); | ||
| 1397 | return ret; | ||
| 1398 | } | ||
| 1399 | op->optinsn.size = ret; | ||
| 1400 | |||
| 1401 | /* Copy arch-dep-instance from template */ | ||
| 1402 | memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); | ||
| 1403 | |||
| 1404 | /* Set probe information */ | ||
| 1405 | synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); | ||
| 1406 | |||
| 1407 | /* Set probe function call */ | ||
| 1408 | synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); | ||
| 1409 | |||
| 1410 | /* Set returning jmp instruction at the tail of out-of-line buffer */ | ||
| 1411 | synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, | ||
| 1412 | (u8 *)op->kp.addr + op->optinsn.size); | ||
| 1413 | |||
| 1414 | flush_icache_range((unsigned long) buf, | ||
| 1415 | (unsigned long) buf + TMPL_END_IDX + | ||
| 1416 | op->optinsn.size + RELATIVEJUMP_SIZE); | ||
| 1417 | return 0; | ||
| 1418 | } | ||
| 1419 | |||
| 1420 | #define MAX_OPTIMIZE_PROBES 256 | ||
| 1421 | static struct text_poke_param *jump_poke_params; | ||
| 1422 | static struct jump_poke_buffer { | ||
| 1423 | u8 buf[RELATIVEJUMP_SIZE]; | ||
| 1424 | } *jump_poke_bufs; | ||
| 1425 | |||
| 1426 | static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, | ||
| 1427 | u8 *insn_buf, | ||
| 1428 | struct optimized_kprobe *op) | ||
| 1429 | { | ||
| 1430 | s32 rel = (s32)((long)op->optinsn.insn - | ||
| 1431 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | ||
| 1432 | |||
| 1433 | /* Backup instructions which will be replaced by jump address */ | ||
| 1434 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | ||
| 1435 | RELATIVE_ADDR_SIZE); | ||
| 1436 | |||
| 1437 | insn_buf[0] = RELATIVEJUMP_OPCODE; | ||
| 1438 | *(s32 *)(&insn_buf[1]) = rel; | ||
| 1439 | |||
| 1440 | tprm->addr = op->kp.addr; | ||
| 1441 | tprm->opcode = insn_buf; | ||
| 1442 | tprm->len = RELATIVEJUMP_SIZE; | ||
| 1443 | } | ||
| 1444 | |||
| 1445 | /* | ||
| 1446 | * Replace breakpoints (int3) with relative jumps. | ||
| 1447 | * Caller must call with locking kprobe_mutex and text_mutex. | ||
| 1448 | */ | ||
| 1449 | void __kprobes arch_optimize_kprobes(struct list_head *oplist) | ||
| 1450 | { | ||
| 1451 | struct optimized_kprobe *op, *tmp; | ||
| 1452 | int c = 0; | ||
| 1453 | |||
| 1454 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
| 1455 | WARN_ON(kprobe_disabled(&op->kp)); | ||
| 1456 | /* Setup param */ | ||
| 1457 | setup_optimize_kprobe(&jump_poke_params[c], | ||
| 1458 | jump_poke_bufs[c].buf, op); | ||
| 1459 | list_del_init(&op->list); | ||
| 1460 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
| 1461 | break; | ||
| 1462 | } | ||
| 1463 | |||
| 1464 | /* | ||
| 1465 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
| 1466 | * However, since kprobes itself also doesn't support NMI/MCE | ||
| 1467 | * code probing, it's not a problem. | ||
| 1468 | */ | ||
| 1469 | text_poke_smp_batch(jump_poke_params, c); | ||
| 1470 | } | ||
| 1471 | |||
| 1472 | static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, | ||
| 1473 | u8 *insn_buf, | ||
| 1474 | struct optimized_kprobe *op) | ||
| 1475 | { | ||
| 1476 | /* Set int3 to first byte for kprobes */ | ||
| 1477 | insn_buf[0] = BREAKPOINT_INSTRUCTION; | ||
| 1478 | memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
| 1479 | |||
| 1480 | tprm->addr = op->kp.addr; | ||
| 1481 | tprm->opcode = insn_buf; | ||
| 1482 | tprm->len = RELATIVEJUMP_SIZE; | ||
| 1483 | } | ||
| 1484 | |||
| 1485 | /* | ||
| 1486 | * Recover original instructions and breakpoints from relative jumps. | ||
| 1487 | * Caller must call with locking kprobe_mutex. | ||
| 1488 | */ | ||
| 1489 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | ||
| 1490 | struct list_head *done_list) | ||
| 1491 | { | ||
| 1492 | struct optimized_kprobe *op, *tmp; | ||
| 1493 | int c = 0; | ||
| 1494 | |||
| 1495 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
| 1496 | /* Setup param */ | ||
| 1497 | setup_unoptimize_kprobe(&jump_poke_params[c], | ||
| 1498 | jump_poke_bufs[c].buf, op); | ||
| 1499 | list_move(&op->list, done_list); | ||
| 1500 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
| 1501 | break; | ||
| 1502 | } | ||
| 1503 | |||
| 1504 | /* | ||
| 1505 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
| 1506 | * However, since kprobes itself also doesn't support NMI/MCE | ||
| 1507 | * code probing, it's not a problem. | ||
| 1508 | */ | ||
| 1509 | text_poke_smp_batch(jump_poke_params, c); | ||
| 1510 | } | ||
| 1511 | |||
| 1512 | /* Replace a relative jump with a breakpoint (int3). */ | ||
| 1513 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) | ||
| 1514 | { | ||
| 1515 | u8 buf[RELATIVEJUMP_SIZE]; | ||
| 1516 | |||
| 1517 | /* Set int3 to first byte for kprobes */ | ||
| 1518 | buf[0] = BREAKPOINT_INSTRUCTION; | ||
| 1519 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
| 1520 | text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); | ||
| 1521 | } | ||
| 1522 | |||
| 1523 | static int __kprobes setup_detour_execution(struct kprobe *p, | ||
| 1524 | struct pt_regs *regs, | ||
| 1525 | int reenter) | ||
| 1526 | { | ||
| 1527 | struct optimized_kprobe *op; | ||
| 1528 | |||
| 1529 | if (p->flags & KPROBE_FLAG_OPTIMIZED) { | ||
| 1530 | /* This kprobe is really able to run optimized path. */ | ||
| 1531 | op = container_of(p, struct optimized_kprobe, kp); | ||
| 1532 | /* Detour through copied instructions */ | ||
| 1533 | regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; | ||
| 1534 | if (!reenter) | ||
| 1535 | reset_current_kprobe(); | ||
| 1536 | preempt_enable_no_resched(); | ||
| 1537 | return 1; | ||
| 1538 | } | ||
| 1539 | return 0; | ||
| 1540 | } | ||
| 1541 | |||
| 1542 | static int __kprobes init_poke_params(void) | ||
| 1543 | { | ||
| 1544 | /* Allocate code buffer and parameter array */ | ||
| 1545 | jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * | ||
| 1546 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
| 1547 | if (!jump_poke_bufs) | ||
| 1548 | return -ENOMEM; | ||
| 1549 | |||
| 1550 | jump_poke_params = kmalloc(sizeof(struct text_poke_param) * | ||
| 1551 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
| 1552 | if (!jump_poke_params) { | ||
| 1553 | kfree(jump_poke_bufs); | ||
| 1554 | jump_poke_bufs = NULL; | ||
| 1555 | return -ENOMEM; | ||
| 1556 | } | ||
| 1557 | |||
| 1558 | return 0; | ||
| 1559 | } | ||
| 1560 | #else /* !CONFIG_OPTPROBES */ | ||
| 1561 | static int __kprobes init_poke_params(void) | ||
| 1562 | { | ||
| 1563 | return 0; | ||
| 1564 | } | ||
| 1565 | #endif | ||
| 1566 | |||
| 1567 | int __init arch_init_kprobes(void) | 1055 | int __init arch_init_kprobes(void) |
| 1568 | { | 1056 | { |
| 1569 | return init_poke_params(); | 1057 | return arch_init_optprobes(); |
| 1570 | } | 1058 | } |
| 1571 | 1059 | ||
| 1572 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) | 1060 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6f176b..694d801bf606 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
| @@ -438,9 +438,9 @@ void __init kvm_guest_init(void) | |||
| 438 | static __init int activate_jump_labels(void) | 438 | static __init int activate_jump_labels(void) |
| 439 | { | 439 | { |
| 440 | if (has_steal_clock) { | 440 | if (has_steal_clock) { |
| 441 | jump_label_inc(¶virt_steal_enabled); | 441 | static_key_slow_inc(¶virt_steal_enabled); |
| 442 | if (steal_acc) | 442 | if (steal_acc) |
| 443 | jump_label_inc(¶virt_steal_rq_enabled); | 443 | static_key_slow_inc(¶virt_steal_rq_enabled); |
| 444 | } | 444 | } |
| 445 | 445 | ||
| 446 | return 0; | 446 | return 0; |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index d90272e6bc40..ada2f99388dd 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
| @@ -202,8 +202,8 @@ static void native_flush_tlb_single(unsigned long addr) | |||
| 202 | __native_flush_tlb_single(addr); | 202 | __native_flush_tlb_single(addr); |
| 203 | } | 203 | } |
| 204 | 204 | ||
| 205 | struct jump_label_key paravirt_steal_enabled; | 205 | struct static_key paravirt_steal_enabled; |
| 206 | struct jump_label_key paravirt_steal_rq_enabled; | 206 | struct static_key paravirt_steal_rq_enabled; |
| 207 | 207 | ||
| 208 | static u64 native_steal_clock(int cpu) | 208 | static u64 native_steal_clock(int cpu) |
| 209 | { | 209 | { |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 15763af7bfe3..44eefde92109 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -377,8 +377,8 @@ static inline int hlt_use_halt(void) | |||
| 377 | void default_idle(void) | 377 | void default_idle(void) |
| 378 | { | 378 | { |
| 379 | if (hlt_use_halt()) { | 379 | if (hlt_use_halt()) { |
| 380 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); | 380 | trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); |
| 381 | trace_cpu_idle(1, smp_processor_id()); | 381 | trace_cpu_idle_rcuidle(1, smp_processor_id()); |
| 382 | current_thread_info()->status &= ~TS_POLLING; | 382 | current_thread_info()->status &= ~TS_POLLING; |
| 383 | /* | 383 | /* |
| 384 | * TS_POLLING-cleared state must be visible before we | 384 | * TS_POLLING-cleared state must be visible before we |
| @@ -391,8 +391,8 @@ void default_idle(void) | |||
| 391 | else | 391 | else |
| 392 | local_irq_enable(); | 392 | local_irq_enable(); |
| 393 | current_thread_info()->status |= TS_POLLING; | 393 | current_thread_info()->status |= TS_POLLING; |
| 394 | trace_power_end(smp_processor_id()); | 394 | trace_power_end_rcuidle(smp_processor_id()); |
| 395 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | 395 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
| 396 | } else { | 396 | } else { |
| 397 | local_irq_enable(); | 397 | local_irq_enable(); |
| 398 | /* loop is done by the caller */ | 398 | /* loop is done by the caller */ |
| @@ -450,8 +450,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
| 450 | static void mwait_idle(void) | 450 | static void mwait_idle(void) |
| 451 | { | 451 | { |
| 452 | if (!need_resched()) { | 452 | if (!need_resched()) { |
| 453 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); | 453 | trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); |
| 454 | trace_cpu_idle(1, smp_processor_id()); | 454 | trace_cpu_idle_rcuidle(1, smp_processor_id()); |
| 455 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) | 455 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) |
| 456 | clflush((void *)¤t_thread_info()->flags); | 456 | clflush((void *)¤t_thread_info()->flags); |
| 457 | 457 | ||
| @@ -461,8 +461,8 @@ static void mwait_idle(void) | |||
| 461 | __sti_mwait(0, 0); | 461 | __sti_mwait(0, 0); |
| 462 | else | 462 | else |
| 463 | local_irq_enable(); | 463 | local_irq_enable(); |
| 464 | trace_power_end(smp_processor_id()); | 464 | trace_power_end_rcuidle(smp_processor_id()); |
| 465 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | 465 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
| 466 | } else | 466 | } else |
| 467 | local_irq_enable(); | 467 | local_irq_enable(); |
| 468 | } | 468 | } |
| @@ -474,13 +474,13 @@ static void mwait_idle(void) | |||
| 474 | */ | 474 | */ |
| 475 | static void poll_idle(void) | 475 | static void poll_idle(void) |
| 476 | { | 476 | { |
| 477 | trace_power_start(POWER_CSTATE, 0, smp_processor_id()); | 477 | trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id()); |
| 478 | trace_cpu_idle(0, smp_processor_id()); | 478 | trace_cpu_idle_rcuidle(0, smp_processor_id()); |
| 479 | local_irq_enable(); | 479 | local_irq_enable(); |
| 480 | while (!need_resched()) | 480 | while (!need_resched()) |
| 481 | cpu_relax(); | 481 | cpu_relax(); |
| 482 | trace_power_end(smp_processor_id()); | 482 | trace_power_end_rcuidle(smp_processor_id()); |
| 483 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | 483 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
| 484 | } | 484 | } |
| 485 | 485 | ||
| 486 | /* | 486 | /* |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index fe15dcc07a6b..ea7b4fd34676 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
| @@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu) | |||
| 234 | } | 234 | } |
| 235 | 235 | ||
| 236 | static bool mmu_audit; | 236 | static bool mmu_audit; |
| 237 | static struct jump_label_key mmu_audit_key; | 237 | static struct static_key mmu_audit_key; |
| 238 | 238 | ||
| 239 | static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) | 239 | static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) |
| 240 | { | 240 | { |
| @@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) | |||
| 250 | 250 | ||
| 251 | static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) | 251 | static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) |
| 252 | { | 252 | { |
| 253 | if (static_branch((&mmu_audit_key))) | 253 | if (static_key_false((&mmu_audit_key))) |
| 254 | __kvm_mmu_audit(vcpu, point); | 254 | __kvm_mmu_audit(vcpu, point); |
| 255 | } | 255 | } |
| 256 | 256 | ||
| @@ -259,7 +259,7 @@ static void mmu_audit_enable(void) | |||
| 259 | if (mmu_audit) | 259 | if (mmu_audit) |
| 260 | return; | 260 | return; |
| 261 | 261 | ||
| 262 | jump_label_inc(&mmu_audit_key); | 262 | static_key_slow_inc(&mmu_audit_key); |
| 263 | mmu_audit = true; | 263 | mmu_audit = true; |
| 264 | } | 264 | } |
| 265 | 265 | ||
| @@ -268,7 +268,7 @@ static void mmu_audit_disable(void) | |||
| 268 | if (!mmu_audit) | 268 | if (!mmu_audit) |
| 269 | return; | 269 | return; |
| 270 | 270 | ||
| 271 | jump_label_dec(&mmu_audit_key); | 271 | static_key_slow_dec(&mmu_audit_key); |
| 272 | mmu_audit = false; | 272 | mmu_audit = false; |
| 273 | } | 273 | } |
| 274 | 274 | ||
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 88ad5fbda6e1..c1f01a8e9f65 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c | |||
| @@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) | |||
| 29 | return inat_primary_table[opcode]; | 29 | return inat_primary_table[opcode]; |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, | 32 | int inat_get_last_prefix_id(insn_byte_t last_pfx) |
| 33 | { | ||
| 34 | insn_attr_t lpfx_attr; | ||
| 35 | |||
| 36 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
| 37 | return inat_last_prefix_id(lpfx_attr); | ||
| 38 | } | ||
| 39 | |||
| 40 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, | ||
| 33 | insn_attr_t esc_attr) | 41 | insn_attr_t esc_attr) |
| 34 | { | 42 | { |
| 35 | const insn_attr_t *table; | 43 | const insn_attr_t *table; |
| 36 | insn_attr_t lpfx_attr; | 44 | int n; |
| 37 | int n, m = 0; | ||
| 38 | 45 | ||
| 39 | n = inat_escape_id(esc_attr); | 46 | n = inat_escape_id(esc_attr); |
| 40 | if (last_pfx) { | 47 | |
| 41 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
| 42 | m = inat_last_prefix_id(lpfx_attr); | ||
| 43 | } | ||
| 44 | table = inat_escape_tables[n][0]; | 48 | table = inat_escape_tables[n][0]; |
| 45 | if (!table) | 49 | if (!table) |
| 46 | return 0; | 50 | return 0; |
| 47 | if (inat_has_variant(table[opcode]) && m) { | 51 | if (inat_has_variant(table[opcode]) && lpfx_id) { |
| 48 | table = inat_escape_tables[n][m]; | 52 | table = inat_escape_tables[n][lpfx_id]; |
| 49 | if (!table) | 53 | if (!table) |
| 50 | return 0; | 54 | return 0; |
| 51 | } | 55 | } |
| 52 | return table[opcode]; | 56 | return table[opcode]; |
| 53 | } | 57 | } |
| 54 | 58 | ||
| 55 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, | 59 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, |
| 56 | insn_attr_t grp_attr) | 60 | insn_attr_t grp_attr) |
| 57 | { | 61 | { |
| 58 | const insn_attr_t *table; | 62 | const insn_attr_t *table; |
| 59 | insn_attr_t lpfx_attr; | 63 | int n; |
| 60 | int n, m = 0; | ||
| 61 | 64 | ||
| 62 | n = inat_group_id(grp_attr); | 65 | n = inat_group_id(grp_attr); |
| 63 | if (last_pfx) { | 66 | |
| 64 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
| 65 | m = inat_last_prefix_id(lpfx_attr); | ||
| 66 | } | ||
| 67 | table = inat_group_tables[n][0]; | 67 | table = inat_group_tables[n][0]; |
| 68 | if (!table) | 68 | if (!table) |
| 69 | return inat_group_common_attribute(grp_attr); | 69 | return inat_group_common_attribute(grp_attr); |
| 70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { | 70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { |
| 71 | table = inat_group_tables[n][m]; | 71 | table = inat_group_tables[n][lpfx_id]; |
| 72 | if (!table) | 72 | if (!table) |
| 73 | return inat_group_common_attribute(grp_attr); | 73 | return inat_group_common_attribute(grp_attr); |
| 74 | } | 74 | } |
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 5a1f9f3e3fbb..25feb1ae71c5 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c | |||
| @@ -185,7 +185,8 @@ err_out: | |||
| 185 | void insn_get_opcode(struct insn *insn) | 185 | void insn_get_opcode(struct insn *insn) |
| 186 | { | 186 | { |
| 187 | struct insn_field *opcode = &insn->opcode; | 187 | struct insn_field *opcode = &insn->opcode; |
| 188 | insn_byte_t op, pfx; | 188 | insn_byte_t op; |
| 189 | int pfx_id; | ||
| 189 | if (opcode->got) | 190 | if (opcode->got) |
| 190 | return; | 191 | return; |
| 191 | if (!insn->prefixes.got) | 192 | if (!insn->prefixes.got) |
| @@ -212,8 +213,8 @@ void insn_get_opcode(struct insn *insn) | |||
| 212 | /* Get escaped opcode */ | 213 | /* Get escaped opcode */ |
| 213 | op = get_next(insn_byte_t, insn); | 214 | op = get_next(insn_byte_t, insn); |
| 214 | opcode->bytes[opcode->nbytes++] = op; | 215 | opcode->bytes[opcode->nbytes++] = op; |
| 215 | pfx = insn_last_prefix(insn); | 216 | pfx_id = insn_last_prefix_id(insn); |
| 216 | insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); | 217 | insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); |
| 217 | } | 218 | } |
| 218 | if (inat_must_vex(insn->attr)) | 219 | if (inat_must_vex(insn->attr)) |
| 219 | insn->attr = 0; /* This instruction is bad */ | 220 | insn->attr = 0; /* This instruction is bad */ |
| @@ -235,7 +236,7 @@ err_out: | |||
| 235 | void insn_get_modrm(struct insn *insn) | 236 | void insn_get_modrm(struct insn *insn) |
| 236 | { | 237 | { |
| 237 | struct insn_field *modrm = &insn->modrm; | 238 | struct insn_field *modrm = &insn->modrm; |
| 238 | insn_byte_t pfx, mod; | 239 | insn_byte_t pfx_id, mod; |
| 239 | if (modrm->got) | 240 | if (modrm->got) |
| 240 | return; | 241 | return; |
| 241 | if (!insn->opcode.got) | 242 | if (!insn->opcode.got) |
| @@ -246,8 +247,8 @@ void insn_get_modrm(struct insn *insn) | |||
| 246 | modrm->value = mod; | 247 | modrm->value = mod; |
| 247 | modrm->nbytes = 1; | 248 | modrm->nbytes = 1; |
| 248 | if (inat_is_group(insn->attr)) { | 249 | if (inat_is_group(insn->attr)) { |
| 249 | pfx = insn_last_prefix(insn); | 250 | pfx_id = insn_last_prefix_id(insn); |
| 250 | insn->attr = inat_get_group_attribute(mod, pfx, | 251 | insn->attr = inat_get_group_attribute(mod, pfx_id, |
| 251 | insn->attr); | 252 | insn->attr); |
| 252 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) | 253 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) |
| 253 | insn->attr = 0; /* This is bad */ | 254 | insn->attr = 0; /* This is bad */ |
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 59f4261c753a..6588f43017bd 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c | |||
| @@ -94,13 +94,13 @@ int cpuidle_idle_call(void) | |||
| 94 | 94 | ||
| 95 | target_state = &drv->states[next_state]; | 95 | target_state = &drv->states[next_state]; |
| 96 | 96 | ||
| 97 | trace_power_start(POWER_CSTATE, next_state, dev->cpu); | 97 | trace_power_start_rcuidle(POWER_CSTATE, next_state, dev->cpu); |
| 98 | trace_cpu_idle(next_state, dev->cpu); | 98 | trace_cpu_idle_rcuidle(next_state, dev->cpu); |
| 99 | 99 | ||
| 100 | entered_state = target_state->enter(dev, drv, next_state); | 100 | entered_state = target_state->enter(dev, drv, next_state); |
| 101 | 101 | ||
| 102 | trace_power_end(dev->cpu); | 102 | trace_power_end_rcuidle(dev->cpu); |
| 103 | trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); | 103 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); |
| 104 | 104 | ||
| 105 | if (entered_state >= 0) { | 105 | if (entered_state >= 0) { |
| 106 | /* Update cpuidle counters */ | 106 | /* Update cpuidle counters */ |
| @@ -63,6 +63,8 @@ | |||
| 63 | #include <trace/events/task.h> | 63 | #include <trace/events/task.h> |
| 64 | #include "internal.h" | 64 | #include "internal.h" |
| 65 | 65 | ||
| 66 | #include <trace/events/sched.h> | ||
| 67 | |||
| 66 | int core_uses_pid; | 68 | int core_uses_pid; |
| 67 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | 69 | char core_pattern[CORENAME_MAX_SIZE] = "core"; |
| 68 | unsigned int core_pipe_limit; | 70 | unsigned int core_pipe_limit; |
| @@ -1402,9 +1404,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | |||
| 1402 | */ | 1404 | */ |
| 1403 | bprm->recursion_depth = depth; | 1405 | bprm->recursion_depth = depth; |
| 1404 | if (retval >= 0) { | 1406 | if (retval >= 0) { |
| 1405 | if (depth == 0) | 1407 | if (depth == 0) { |
| 1406 | ptrace_event(PTRACE_EVENT_EXEC, | 1408 | trace_sched_process_exec(current, old_pid, bprm); |
| 1407 | old_pid); | 1409 | ptrace_event(PTRACE_EVENT_EXEC, old_pid); |
| 1410 | } | ||
| 1408 | put_binfmt(fmt); | 1411 | put_binfmt(fmt); |
| 1409 | allow_write_access(bprm->file); | 1412 | allow_write_access(bprm->file); |
| 1410 | if (bprm->file) | 1413 | if (bprm->file) |
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 028e26f0bf08..72a6cabb4d5b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h | |||
| @@ -31,16 +31,33 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, | |||
| 31 | 31 | ||
| 32 | typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); | 32 | typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); |
| 33 | 33 | ||
| 34 | /* | ||
| 35 | * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are | ||
| 36 | * set in the flags member. | ||
| 37 | * | ||
| 38 | * ENABLED - set/unset when ftrace_ops is registered/unregistered | ||
| 39 | * GLOBAL - set manualy by ftrace_ops user to denote the ftrace_ops | ||
| 40 | * is part of the global tracers sharing the same filter | ||
| 41 | * via set_ftrace_* debugfs files. | ||
| 42 | * DYNAMIC - set when ftrace_ops is registered to denote dynamically | ||
| 43 | * allocated ftrace_ops which need special care | ||
| 44 | * CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops | ||
| 45 | * could be controled by following calls: | ||
| 46 | * ftrace_function_local_enable | ||
| 47 | * ftrace_function_local_disable | ||
| 48 | */ | ||
| 34 | enum { | 49 | enum { |
| 35 | FTRACE_OPS_FL_ENABLED = 1 << 0, | 50 | FTRACE_OPS_FL_ENABLED = 1 << 0, |
| 36 | FTRACE_OPS_FL_GLOBAL = 1 << 1, | 51 | FTRACE_OPS_FL_GLOBAL = 1 << 1, |
| 37 | FTRACE_OPS_FL_DYNAMIC = 1 << 2, | 52 | FTRACE_OPS_FL_DYNAMIC = 1 << 2, |
| 53 | FTRACE_OPS_FL_CONTROL = 1 << 3, | ||
| 38 | }; | 54 | }; |
| 39 | 55 | ||
| 40 | struct ftrace_ops { | 56 | struct ftrace_ops { |
| 41 | ftrace_func_t func; | 57 | ftrace_func_t func; |
| 42 | struct ftrace_ops *next; | 58 | struct ftrace_ops *next; |
| 43 | unsigned long flags; | 59 | unsigned long flags; |
| 60 | int __percpu *disabled; | ||
| 44 | #ifdef CONFIG_DYNAMIC_FTRACE | 61 | #ifdef CONFIG_DYNAMIC_FTRACE |
| 45 | struct ftrace_hash *notrace_hash; | 62 | struct ftrace_hash *notrace_hash; |
| 46 | struct ftrace_hash *filter_hash; | 63 | struct ftrace_hash *filter_hash; |
| @@ -97,6 +114,55 @@ int register_ftrace_function(struct ftrace_ops *ops); | |||
| 97 | int unregister_ftrace_function(struct ftrace_ops *ops); | 114 | int unregister_ftrace_function(struct ftrace_ops *ops); |
| 98 | void clear_ftrace_function(void); | 115 | void clear_ftrace_function(void); |
| 99 | 116 | ||
| 117 | /** | ||
| 118 | * ftrace_function_local_enable - enable controlled ftrace_ops on current cpu | ||
| 119 | * | ||
| 120 | * This function enables tracing on current cpu by decreasing | ||
| 121 | * the per cpu control variable. | ||
| 122 | * It must be called with preemption disabled and only on ftrace_ops | ||
| 123 | * registered with FTRACE_OPS_FL_CONTROL. If called without preemption | ||
| 124 | * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. | ||
| 125 | */ | ||
| 126 | static inline void ftrace_function_local_enable(struct ftrace_ops *ops) | ||
| 127 | { | ||
| 128 | if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL))) | ||
| 129 | return; | ||
| 130 | |||
| 131 | (*this_cpu_ptr(ops->disabled))--; | ||
| 132 | } | ||
| 133 | |||
| 134 | /** | ||
| 135 | * ftrace_function_local_disable - enable controlled ftrace_ops on current cpu | ||
| 136 | * | ||
| 137 | * This function enables tracing on current cpu by decreasing | ||
| 138 | * the per cpu control variable. | ||
| 139 | * It must be called with preemption disabled and only on ftrace_ops | ||
| 140 | * registered with FTRACE_OPS_FL_CONTROL. If called without preemption | ||
| 141 | * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. | ||
| 142 | */ | ||
| 143 | static inline void ftrace_function_local_disable(struct ftrace_ops *ops) | ||
| 144 | { | ||
| 145 | if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL))) | ||
| 146 | return; | ||
| 147 | |||
| 148 | (*this_cpu_ptr(ops->disabled))++; | ||
| 149 | } | ||
| 150 | |||
| 151 | /** | ||
| 152 | * ftrace_function_local_disabled - returns ftrace_ops disabled value | ||
| 153 | * on current cpu | ||
| 154 | * | ||
| 155 | * This function returns value of ftrace_ops::disabled on current cpu. | ||
| 156 | * It must be called with preemption disabled and only on ftrace_ops | ||
| 157 | * registered with FTRACE_OPS_FL_CONTROL. If called without preemption | ||
| 158 | * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. | ||
| 159 | */ | ||
| 160 | static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) | ||
| 161 | { | ||
| 162 | WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)); | ||
| 163 | return *this_cpu_ptr(ops->disabled); | ||
| 164 | } | ||
| 165 | |||
| 100 | extern void ftrace_stub(unsigned long a0, unsigned long a1); | 166 | extern void ftrace_stub(unsigned long a0, unsigned long a1); |
| 101 | 167 | ||
| 102 | #else /* !CONFIG_FUNCTION_TRACER */ | 168 | #else /* !CONFIG_FUNCTION_TRACER */ |
| @@ -178,12 +244,13 @@ struct dyn_ftrace { | |||
| 178 | }; | 244 | }; |
| 179 | 245 | ||
| 180 | int ftrace_force_update(void); | 246 | int ftrace_force_update(void); |
| 181 | void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, | 247 | int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, |
| 182 | int len, int reset); | 248 | int len, int reset); |
| 183 | void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, | 249 | int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, |
| 184 | int len, int reset); | 250 | int len, int reset); |
| 185 | void ftrace_set_global_filter(unsigned char *buf, int len, int reset); | 251 | void ftrace_set_global_filter(unsigned char *buf, int len, int reset); |
| 186 | void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); | 252 | void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); |
| 253 | void ftrace_free_filter(struct ftrace_ops *ops); | ||
| 187 | 254 | ||
| 188 | int register_ftrace_command(struct ftrace_func_command *cmd); | 255 | int register_ftrace_command(struct ftrace_func_command *cmd); |
| 189 | int unregister_ftrace_command(struct ftrace_func_command *cmd); | 256 | int unregister_ftrace_command(struct ftrace_func_command *cmd); |
| @@ -314,9 +381,6 @@ extern void ftrace_enable_daemon(void); | |||
| 314 | #else | 381 | #else |
| 315 | static inline int skip_trace(unsigned long ip) { return 0; } | 382 | static inline int skip_trace(unsigned long ip) { return 0; } |
| 316 | static inline int ftrace_force_update(void) { return 0; } | 383 | static inline int ftrace_force_update(void) { return 0; } |
| 317 | static inline void ftrace_set_filter(unsigned char *buf, int len, int reset) | ||
| 318 | { | ||
| 319 | } | ||
| 320 | static inline void ftrace_disable_daemon(void) { } | 384 | static inline void ftrace_disable_daemon(void) { } |
| 321 | static inline void ftrace_enable_daemon(void) { } | 385 | static inline void ftrace_enable_daemon(void) { } |
| 322 | static inline void ftrace_release_mod(struct module *mod) {} | 386 | static inline void ftrace_release_mod(struct module *mod) {} |
| @@ -340,6 +404,9 @@ static inline int ftrace_text_reserved(void *start, void *end) | |||
| 340 | */ | 404 | */ |
| 341 | #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) | 405 | #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) |
| 342 | #define ftrace_set_early_filter(ops, buf, enable) do { } while (0) | 406 | #define ftrace_set_early_filter(ops, buf, enable) do { } while (0) |
| 407 | #define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; }) | ||
| 408 | #define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; }) | ||
| 409 | #define ftrace_free_filter(ops) do { } while (0) | ||
| 343 | 410 | ||
| 344 | static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, | 411 | static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, |
| 345 | size_t cnt, loff_t *ppos) { return -ENODEV; } | 412 | size_t cnt, loff_t *ppos) { return -ENODEV; } |
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index c3da42dd22ba..dd478fc8f9f5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h | |||
| @@ -146,6 +146,10 @@ enum trace_reg { | |||
| 146 | TRACE_REG_UNREGISTER, | 146 | TRACE_REG_UNREGISTER, |
| 147 | TRACE_REG_PERF_REGISTER, | 147 | TRACE_REG_PERF_REGISTER, |
| 148 | TRACE_REG_PERF_UNREGISTER, | 148 | TRACE_REG_PERF_UNREGISTER, |
| 149 | TRACE_REG_PERF_OPEN, | ||
| 150 | TRACE_REG_PERF_CLOSE, | ||
| 151 | TRACE_REG_PERF_ADD, | ||
| 152 | TRACE_REG_PERF_DEL, | ||
| 149 | }; | 153 | }; |
| 150 | 154 | ||
| 151 | struct ftrace_event_call; | 155 | struct ftrace_event_call; |
| @@ -157,7 +161,7 @@ struct ftrace_event_class { | |||
| 157 | void *perf_probe; | 161 | void *perf_probe; |
| 158 | #endif | 162 | #endif |
| 159 | int (*reg)(struct ftrace_event_call *event, | 163 | int (*reg)(struct ftrace_event_call *event, |
| 160 | enum trace_reg type); | 164 | enum trace_reg type, void *data); |
| 161 | int (*define_fields)(struct ftrace_event_call *); | 165 | int (*define_fields)(struct ftrace_event_call *); |
| 162 | struct list_head *(*get_fields)(struct ftrace_event_call *); | 166 | struct list_head *(*get_fields)(struct ftrace_event_call *); |
| 163 | struct list_head fields; | 167 | struct list_head fields; |
| @@ -165,7 +169,7 @@ struct ftrace_event_class { | |||
| 165 | }; | 169 | }; |
| 166 | 170 | ||
| 167 | extern int ftrace_event_reg(struct ftrace_event_call *event, | 171 | extern int ftrace_event_reg(struct ftrace_event_call *event, |
| 168 | enum trace_reg type); | 172 | enum trace_reg type, void *data); |
| 169 | 173 | ||
| 170 | enum { | 174 | enum { |
| 171 | TRACE_EVENT_FL_ENABLED_BIT, | 175 | TRACE_EVENT_FL_ENABLED_BIT, |
| @@ -241,6 +245,7 @@ enum { | |||
| 241 | FILTER_STATIC_STRING, | 245 | FILTER_STATIC_STRING, |
| 242 | FILTER_DYN_STRING, | 246 | FILTER_DYN_STRING, |
| 243 | FILTER_PTR_STRING, | 247 | FILTER_PTR_STRING, |
| 248 | FILTER_TRACE_FN, | ||
| 244 | }; | 249 | }; |
| 245 | 250 | ||
| 246 | #define EVENT_STORAGE_SIZE 128 | 251 | #define EVENT_STORAGE_SIZE 128 |
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a64b00e286f5..3f830e005118 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include <linux/atomic.h> | 20 | #include <linux/atomic.h> |
| 21 | #include <asm/ptrace.h> | 21 | #include <asm/ptrace.h> |
| 22 | #include <asm/system.h> | 22 | #include <asm/system.h> |
| 23 | #include <trace/events/irq.h> | ||
| 24 | 23 | ||
| 25 | /* | 24 | /* |
| 26 | * These correspond to the IORESOURCE_IRQ_* defines in | 25 | * These correspond to the IORESOURCE_IRQ_* defines in |
| @@ -456,11 +455,7 @@ asmlinkage void do_softirq(void); | |||
| 456 | asmlinkage void __do_softirq(void); | 455 | asmlinkage void __do_softirq(void); |
| 457 | extern void open_softirq(int nr, void (*action)(struct softirq_action *)); | 456 | extern void open_softirq(int nr, void (*action)(struct softirq_action *)); |
| 458 | extern void softirq_init(void); | 457 | extern void softirq_init(void); |
| 459 | static inline void __raise_softirq_irqoff(unsigned int nr) | 458 | extern void __raise_softirq_irqoff(unsigned int nr); |
| 460 | { | ||
| 461 | trace_softirq_raise(nr); | ||
| 462 | or_softirq_pending(1UL << nr); | ||
| 463 | } | ||
| 464 | 459 | ||
| 465 | extern void raise_softirq_irqoff(unsigned int nr); | 460 | extern void raise_softirq_irqoff(unsigned int nr); |
| 466 | extern void raise_softirq(unsigned int nr); | 461 | extern void raise_softirq(unsigned int nr); |
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 5ce8b140428f..c513a40510f5 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h | |||
| @@ -1,22 +1,69 @@ | |||
| 1 | #ifndef _LINUX_JUMP_LABEL_H | 1 | #ifndef _LINUX_JUMP_LABEL_H |
| 2 | #define _LINUX_JUMP_LABEL_H | 2 | #define _LINUX_JUMP_LABEL_H |
| 3 | 3 | ||
| 4 | /* | ||
| 5 | * Jump label support | ||
| 6 | * | ||
| 7 | * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com> | ||
| 8 | * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com> | ||
| 9 | * | ||
| 10 | * Jump labels provide an interface to generate dynamic branches using | ||
| 11 | * self-modifying code. Assuming toolchain and architecture support the result | ||
| 12 | * of a "if (static_key_false(&key))" statement is a unconditional branch (which | ||
| 13 | * defaults to false - and the true block is placed out of line). | ||
| 14 | * | ||
| 15 | * However at runtime we can change the branch target using | ||
| 16 | * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key | ||
| 17 | * object and for as long as there are references all branches referring to | ||
| 18 | * that particular key will point to the (out of line) true block. | ||
| 19 | * | ||
| 20 | * Since this relies on modifying code the static_key_slow_{inc,dec}() functions | ||
| 21 | * must be considered absolute slow paths (machine wide synchronization etc.). | ||
| 22 | * OTOH, since the affected branches are unconditional their runtime overhead | ||
| 23 | * will be absolutely minimal, esp. in the default (off) case where the total | ||
| 24 | * effect is a single NOP of appropriate size. The on case will patch in a jump | ||
| 25 | * to the out-of-line block. | ||
| 26 | * | ||
| 27 | * When the control is directly exposed to userspace it is prudent to delay the | ||
| 28 | * decrement to avoid high frequency code modifications which can (and do) | ||
| 29 | * cause significant performance degradation. Struct static_key_deferred and | ||
| 30 | * static_key_slow_dec_deferred() provide for this. | ||
| 31 | * | ||
| 32 | * Lacking toolchain and or architecture support, it falls back to a simple | ||
| 33 | * conditional branch. | ||
| 34 | * | ||
| 35 | * struct static_key my_key = STATIC_KEY_INIT_TRUE; | ||
| 36 | * | ||
| 37 | * if (static_key_true(&my_key)) { | ||
| 38 | * } | ||
| 39 | * | ||
| 40 | * will result in the true case being in-line and starts the key with a single | ||
| 41 | * reference. Mixing static_key_true() and static_key_false() on the same key is not | ||
| 42 | * allowed. | ||
| 43 | * | ||
| 44 | * Not initializing the key (static data is initialized to 0s anyway) is the | ||
| 45 | * same as using STATIC_KEY_INIT_FALSE and static_key_false() is | ||
| 46 | * equivalent with static_branch(). | ||
| 47 | * | ||
| 48 | */ | ||
| 49 | |||
| 4 | #include <linux/types.h> | 50 | #include <linux/types.h> |
| 5 | #include <linux/compiler.h> | 51 | #include <linux/compiler.h> |
| 6 | #include <linux/workqueue.h> | 52 | #include <linux/workqueue.h> |
| 7 | 53 | ||
| 8 | #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) | 54 | #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) |
| 9 | 55 | ||
| 10 | struct jump_label_key { | 56 | struct static_key { |
| 11 | atomic_t enabled; | 57 | atomic_t enabled; |
| 58 | /* Set lsb bit to 1 if branch is default true, 0 ot */ | ||
| 12 | struct jump_entry *entries; | 59 | struct jump_entry *entries; |
| 13 | #ifdef CONFIG_MODULES | 60 | #ifdef CONFIG_MODULES |
| 14 | struct jump_label_mod *next; | 61 | struct static_key_mod *next; |
| 15 | #endif | 62 | #endif |
| 16 | }; | 63 | }; |
| 17 | 64 | ||
| 18 | struct jump_label_key_deferred { | 65 | struct static_key_deferred { |
| 19 | struct jump_label_key key; | 66 | struct static_key key; |
| 20 | unsigned long timeout; | 67 | unsigned long timeout; |
| 21 | struct delayed_work work; | 68 | struct delayed_work work; |
| 22 | }; | 69 | }; |
| @@ -34,13 +81,34 @@ struct module; | |||
| 34 | 81 | ||
| 35 | #ifdef HAVE_JUMP_LABEL | 82 | #ifdef HAVE_JUMP_LABEL |
| 36 | 83 | ||
| 37 | #ifdef CONFIG_MODULES | 84 | #define JUMP_LABEL_TRUE_BRANCH 1UL |
| 38 | #define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL, NULL} | 85 | |
| 39 | #else | 86 | static |
| 40 | #define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL} | 87 | inline struct jump_entry *jump_label_get_entries(struct static_key *key) |
| 41 | #endif | 88 | { |
| 89 | return (struct jump_entry *)((unsigned long)key->entries | ||
| 90 | & ~JUMP_LABEL_TRUE_BRANCH); | ||
| 91 | } | ||
| 42 | 92 | ||
| 43 | static __always_inline bool static_branch(struct jump_label_key *key) | 93 | static inline bool jump_label_get_branch_default(struct static_key *key) |
| 94 | { | ||
| 95 | if ((unsigned long)key->entries & JUMP_LABEL_TRUE_BRANCH) | ||
| 96 | return true; | ||
| 97 | return false; | ||
| 98 | } | ||
| 99 | |||
| 100 | static __always_inline bool static_key_false(struct static_key *key) | ||
| 101 | { | ||
| 102 | return arch_static_branch(key); | ||
| 103 | } | ||
| 104 | |||
| 105 | static __always_inline bool static_key_true(struct static_key *key) | ||
| 106 | { | ||
| 107 | return !static_key_false(key); | ||
| 108 | } | ||
| 109 | |||
| 110 | /* Deprecated. Please use 'static_key_false() instead. */ | ||
| 111 | static __always_inline bool static_branch(struct static_key *key) | ||
| 44 | { | 112 | { |
| 45 | return arch_static_branch(key); | 113 | return arch_static_branch(key); |
| 46 | } | 114 | } |
| @@ -56,21 +124,23 @@ extern void arch_jump_label_transform(struct jump_entry *entry, | |||
| 56 | extern void arch_jump_label_transform_static(struct jump_entry *entry, | 124 | extern void arch_jump_label_transform_static(struct jump_entry *entry, |
| 57 | enum jump_label_type type); | 125 | enum jump_label_type type); |
| 58 | extern int jump_label_text_reserved(void *start, void *end); | 126 | extern int jump_label_text_reserved(void *start, void *end); |
| 59 | extern void jump_label_inc(struct jump_label_key *key); | 127 | extern void static_key_slow_inc(struct static_key *key); |
| 60 | extern void jump_label_dec(struct jump_label_key *key); | 128 | extern void static_key_slow_dec(struct static_key *key); |
| 61 | extern void jump_label_dec_deferred(struct jump_label_key_deferred *key); | 129 | extern void static_key_slow_dec_deferred(struct static_key_deferred *key); |
| 62 | extern bool jump_label_enabled(struct jump_label_key *key); | ||
| 63 | extern void jump_label_apply_nops(struct module *mod); | 130 | extern void jump_label_apply_nops(struct module *mod); |
| 64 | extern void jump_label_rate_limit(struct jump_label_key_deferred *key, | 131 | extern void |
| 65 | unsigned long rl); | 132 | jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); |
| 133 | |||
| 134 | #define STATIC_KEY_INIT_TRUE ((struct static_key) \ | ||
| 135 | { .enabled = ATOMIC_INIT(1), .entries = (void *)1 }) | ||
| 136 | #define STATIC_KEY_INIT_FALSE ((struct static_key) \ | ||
| 137 | { .enabled = ATOMIC_INIT(0), .entries = (void *)0 }) | ||
| 66 | 138 | ||
| 67 | #else /* !HAVE_JUMP_LABEL */ | 139 | #else /* !HAVE_JUMP_LABEL */ |
| 68 | 140 | ||
| 69 | #include <linux/atomic.h> | 141 | #include <linux/atomic.h> |
| 70 | 142 | ||
| 71 | #define JUMP_LABEL_INIT {ATOMIC_INIT(0)} | 143 | struct static_key { |
| 72 | |||
| 73 | struct jump_label_key { | ||
| 74 | atomic_t enabled; | 144 | atomic_t enabled; |
| 75 | }; | 145 | }; |
| 76 | 146 | ||
| @@ -78,30 +148,45 @@ static __always_inline void jump_label_init(void) | |||
| 78 | { | 148 | { |
| 79 | } | 149 | } |
| 80 | 150 | ||
| 81 | struct jump_label_key_deferred { | 151 | struct static_key_deferred { |
| 82 | struct jump_label_key key; | 152 | struct static_key key; |
| 83 | }; | 153 | }; |
| 84 | 154 | ||
| 85 | static __always_inline bool static_branch(struct jump_label_key *key) | 155 | static __always_inline bool static_key_false(struct static_key *key) |
| 156 | { | ||
| 157 | if (unlikely(atomic_read(&key->enabled)) > 0) | ||
| 158 | return true; | ||
| 159 | return false; | ||
| 160 | } | ||
| 161 | |||
| 162 | static __always_inline bool static_key_true(struct static_key *key) | ||
| 86 | { | 163 | { |
| 87 | if (unlikely(atomic_read(&key->enabled))) | 164 | if (likely(atomic_read(&key->enabled)) > 0) |
| 88 | return true; | 165 | return true; |
| 89 | return false; | 166 | return false; |
| 90 | } | 167 | } |
| 91 | 168 | ||
| 92 | static inline void jump_label_inc(struct jump_label_key *key) | 169 | /* Deprecated. Please use 'static_key_false() instead. */ |
| 170 | static __always_inline bool static_branch(struct static_key *key) | ||
| 171 | { | ||
| 172 | if (unlikely(atomic_read(&key->enabled)) > 0) | ||
| 173 | return true; | ||
| 174 | return false; | ||
| 175 | } | ||
| 176 | |||
| 177 | static inline void static_key_slow_inc(struct static_key *key) | ||
| 93 | { | 178 | { |
| 94 | atomic_inc(&key->enabled); | 179 | atomic_inc(&key->enabled); |
| 95 | } | 180 | } |
| 96 | 181 | ||
| 97 | static inline void jump_label_dec(struct jump_label_key *key) | 182 | static inline void static_key_slow_dec(struct static_key *key) |
| 98 | { | 183 | { |
| 99 | atomic_dec(&key->enabled); | 184 | atomic_dec(&key->enabled); |
| 100 | } | 185 | } |
| 101 | 186 | ||
| 102 | static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key) | 187 | static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) |
| 103 | { | 188 | { |
| 104 | jump_label_dec(&key->key); | 189 | static_key_slow_dec(&key->key); |
| 105 | } | 190 | } |
| 106 | 191 | ||
| 107 | static inline int jump_label_text_reserved(void *start, void *end) | 192 | static inline int jump_label_text_reserved(void *start, void *end) |
| @@ -112,23 +197,30 @@ static inline int jump_label_text_reserved(void *start, void *end) | |||
| 112 | static inline void jump_label_lock(void) {} | 197 | static inline void jump_label_lock(void) {} |
| 113 | static inline void jump_label_unlock(void) {} | 198 | static inline void jump_label_unlock(void) {} |
| 114 | 199 | ||
| 115 | static inline bool jump_label_enabled(struct jump_label_key *key) | ||
| 116 | { | ||
| 117 | return !!atomic_read(&key->enabled); | ||
| 118 | } | ||
| 119 | |||
| 120 | static inline int jump_label_apply_nops(struct module *mod) | 200 | static inline int jump_label_apply_nops(struct module *mod) |
| 121 | { | 201 | { |
| 122 | return 0; | 202 | return 0; |
| 123 | } | 203 | } |
| 124 | 204 | ||
| 125 | static inline void jump_label_rate_limit(struct jump_label_key_deferred *key, | 205 | static inline void |
| 206 | jump_label_rate_limit(struct static_key_deferred *key, | ||
| 126 | unsigned long rl) | 207 | unsigned long rl) |
| 127 | { | 208 | { |
| 128 | } | 209 | } |
| 210 | |||
| 211 | #define STATIC_KEY_INIT_TRUE ((struct static_key) \ | ||
| 212 | { .enabled = ATOMIC_INIT(1) }) | ||
| 213 | #define STATIC_KEY_INIT_FALSE ((struct static_key) \ | ||
| 214 | { .enabled = ATOMIC_INIT(0) }) | ||
| 215 | |||
| 129 | #endif /* HAVE_JUMP_LABEL */ | 216 | #endif /* HAVE_JUMP_LABEL */ |
| 130 | 217 | ||
| 131 | #define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), }) | 218 | #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE |
| 132 | #define jump_label_key_disabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), }) | 219 | #define jump_label_enabled static_key_enabled |
| 220 | |||
| 221 | static inline bool static_key_enabled(struct static_key *key) | ||
| 222 | { | ||
| 223 | return (atomic_read(&key->enabled) > 0); | ||
| 224 | } | ||
| 133 | 225 | ||
| 134 | #endif /* _LINUX_JUMP_LABEL_H */ | 226 | #endif /* _LINUX_JUMP_LABEL_H */ |
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0eac07c95255..7dfaae7846ab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
| @@ -214,8 +214,8 @@ enum { | |||
| 214 | #include <linux/skbuff.h> | 214 | #include <linux/skbuff.h> |
| 215 | 215 | ||
| 216 | #ifdef CONFIG_RPS | 216 | #ifdef CONFIG_RPS |
| 217 | #include <linux/jump_label.h> | 217 | #include <linux/static_key.h> |
| 218 | extern struct jump_label_key rps_needed; | 218 | extern struct static_key rps_needed; |
| 219 | #endif | 219 | #endif |
| 220 | 220 | ||
| 221 | struct neighbour; | 221 | struct neighbour; |
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index b809265607d0..29734be334c1 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h | |||
| @@ -163,13 +163,13 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; | |||
| 163 | extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; | 163 | extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; |
| 164 | 164 | ||
| 165 | #if defined(CONFIG_JUMP_LABEL) | 165 | #if defined(CONFIG_JUMP_LABEL) |
| 166 | #include <linux/jump_label.h> | 166 | #include <linux/static_key.h> |
| 167 | extern struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; | 167 | extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; |
| 168 | static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) | 168 | static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) |
| 169 | { | 169 | { |
| 170 | if (__builtin_constant_p(pf) && | 170 | if (__builtin_constant_p(pf) && |
| 171 | __builtin_constant_p(hook)) | 171 | __builtin_constant_p(hook)) |
| 172 | return static_branch(&nf_hooks_needed[pf][hook]); | 172 | return static_key_false(&nf_hooks_needed[pf][hook]); |
| 173 | 173 | ||
| 174 | return !list_empty(&nf_hooks[pf][hook]); | 174 | return !list_empty(&nf_hooks[pf][hook]); |
| 175 | } | 175 | } |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index abb2776be1ba..bd9f55a5958d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
| @@ -129,11 +129,40 @@ enum perf_event_sample_format { | |||
| 129 | PERF_SAMPLE_PERIOD = 1U << 8, | 129 | PERF_SAMPLE_PERIOD = 1U << 8, |
| 130 | PERF_SAMPLE_STREAM_ID = 1U << 9, | 130 | PERF_SAMPLE_STREAM_ID = 1U << 9, |
| 131 | PERF_SAMPLE_RAW = 1U << 10, | 131 | PERF_SAMPLE_RAW = 1U << 10, |
| 132 | PERF_SAMPLE_BRANCH_STACK = 1U << 11, | ||
| 132 | 133 | ||
| 133 | PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ | 134 | PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ |
| 134 | }; | 135 | }; |
| 135 | 136 | ||
| 136 | /* | 137 | /* |
| 138 | * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set | ||
| 139 | * | ||
| 140 | * If the user does not pass priv level information via branch_sample_type, | ||
| 141 | * the kernel uses the event's priv level. Branch and event priv levels do | ||
| 142 | * not have to match. Branch priv level is checked for permissions. | ||
| 143 | * | ||
| 144 | * The branch types can be combined, however BRANCH_ANY covers all types | ||
| 145 | * of branches and therefore it supersedes all the other types. | ||
| 146 | */ | ||
| 147 | enum perf_branch_sample_type { | ||
| 148 | PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */ | ||
| 149 | PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */ | ||
| 150 | PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */ | ||
| 151 | |||
| 152 | PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */ | ||
| 153 | PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ | ||
| 154 | PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ | ||
| 155 | PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ | ||
| 156 | |||
| 157 | PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */ | ||
| 158 | }; | ||
| 159 | |||
| 160 | #define PERF_SAMPLE_BRANCH_PLM_ALL \ | ||
| 161 | (PERF_SAMPLE_BRANCH_USER|\ | ||
| 162 | PERF_SAMPLE_BRANCH_KERNEL|\ | ||
| 163 | PERF_SAMPLE_BRANCH_HV) | ||
| 164 | |||
| 165 | /* | ||
| 137 | * The format of the data returned by read() on a perf event fd, | 166 | * The format of the data returned by read() on a perf event fd, |
| 138 | * as specified by attr.read_format: | 167 | * as specified by attr.read_format: |
| 139 | * | 168 | * |
| @@ -163,6 +192,8 @@ enum perf_event_read_format { | |||
| 163 | }; | 192 | }; |
| 164 | 193 | ||
| 165 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ | 194 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ |
| 195 | #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ | ||
| 196 | #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ | ||
| 166 | 197 | ||
| 167 | /* | 198 | /* |
| 168 | * Hardware event_id to monitor via a performance monitoring event: | 199 | * Hardware event_id to monitor via a performance monitoring event: |
| @@ -240,6 +271,7 @@ struct perf_event_attr { | |||
| 240 | __u64 bp_len; | 271 | __u64 bp_len; |
| 241 | __u64 config2; /* extension of config1 */ | 272 | __u64 config2; /* extension of config1 */ |
| 242 | }; | 273 | }; |
| 274 | __u64 branch_sample_type; /* enum branch_sample_type */ | ||
| 243 | }; | 275 | }; |
| 244 | 276 | ||
| 245 | /* | 277 | /* |
| @@ -291,12 +323,14 @@ struct perf_event_mmap_page { | |||
| 291 | __s64 offset; /* add to hardware event value */ | 323 | __s64 offset; /* add to hardware event value */ |
| 292 | __u64 time_enabled; /* time event active */ | 324 | __u64 time_enabled; /* time event active */ |
| 293 | __u64 time_running; /* time event on cpu */ | 325 | __u64 time_running; /* time event on cpu */ |
| 326 | __u32 time_mult, time_shift; | ||
| 327 | __u64 time_offset; | ||
| 294 | 328 | ||
| 295 | /* | 329 | /* |
| 296 | * Hole for extension of the self monitor capabilities | 330 | * Hole for extension of the self monitor capabilities |
| 297 | */ | 331 | */ |
| 298 | 332 | ||
| 299 | __u64 __reserved[123]; /* align to 1k */ | 333 | __u64 __reserved[121]; /* align to 1k */ |
| 300 | 334 | ||
| 301 | /* | 335 | /* |
| 302 | * Control data for the mmap() data buffer. | 336 | * Control data for the mmap() data buffer. |
| @@ -456,6 +490,8 @@ enum perf_event_type { | |||
| 456 | * | 490 | * |
| 457 | * { u32 size; | 491 | * { u32 size; |
| 458 | * char data[size];}&& PERF_SAMPLE_RAW | 492 | * char data[size];}&& PERF_SAMPLE_RAW |
| 493 | * | ||
| 494 | * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK | ||
| 459 | * }; | 495 | * }; |
| 460 | */ | 496 | */ |
| 461 | PERF_RECORD_SAMPLE = 9, | 497 | PERF_RECORD_SAMPLE = 9, |
| @@ -512,7 +548,7 @@ struct perf_guest_info_callbacks { | |||
| 512 | #include <linux/ftrace.h> | 548 | #include <linux/ftrace.h> |
| 513 | #include <linux/cpu.h> | 549 | #include <linux/cpu.h> |
| 514 | #include <linux/irq_work.h> | 550 | #include <linux/irq_work.h> |
| 515 | #include <linux/jump_label.h> | 551 | #include <linux/static_key.h> |
| 516 | #include <linux/atomic.h> | 552 | #include <linux/atomic.h> |
| 517 | #include <asm/local.h> | 553 | #include <asm/local.h> |
| 518 | 554 | ||
| @@ -528,12 +564,34 @@ struct perf_raw_record { | |||
| 528 | void *data; | 564 | void *data; |
| 529 | }; | 565 | }; |
| 530 | 566 | ||
| 567 | /* | ||
| 568 | * single taken branch record layout: | ||
| 569 | * | ||
| 570 | * from: source instruction (may not always be a branch insn) | ||
| 571 | * to: branch target | ||
| 572 | * mispred: branch target was mispredicted | ||
| 573 | * predicted: branch target was predicted | ||
| 574 | * | ||
| 575 | * support for mispred, predicted is optional. In case it | ||
| 576 | * is not supported mispred = predicted = 0. | ||
| 577 | */ | ||
| 531 | struct perf_branch_entry { | 578 | struct perf_branch_entry { |
| 532 | __u64 from; | 579 | __u64 from; |
| 533 | __u64 to; | 580 | __u64 to; |
| 534 | __u64 flags; | 581 | __u64 mispred:1, /* target mispredicted */ |
| 582 | predicted:1,/* target predicted */ | ||
| 583 | reserved:62; | ||
| 535 | }; | 584 | }; |
| 536 | 585 | ||
| 586 | /* | ||
| 587 | * branch stack layout: | ||
| 588 | * nr: number of taken branches stored in entries[] | ||
| 589 | * | ||
| 590 | * Note that nr can vary from sample to sample | ||
| 591 | * branches (to, from) are stored from most recent | ||
| 592 | * to least recent, i.e., entries[0] contains the most | ||
| 593 | * recent branch. | ||
| 594 | */ | ||
| 537 | struct perf_branch_stack { | 595 | struct perf_branch_stack { |
| 538 | __u64 nr; | 596 | __u64 nr; |
| 539 | struct perf_branch_entry entries[0]; | 597 | struct perf_branch_entry entries[0]; |
| @@ -564,7 +622,9 @@ struct hw_perf_event { | |||
| 564 | unsigned long event_base; | 622 | unsigned long event_base; |
| 565 | int idx; | 623 | int idx; |
| 566 | int last_cpu; | 624 | int last_cpu; |
| 625 | |||
| 567 | struct hw_perf_event_extra extra_reg; | 626 | struct hw_perf_event_extra extra_reg; |
| 627 | struct hw_perf_event_extra branch_reg; | ||
| 568 | }; | 628 | }; |
| 569 | struct { /* software */ | 629 | struct { /* software */ |
| 570 | struct hrtimer hrtimer; | 630 | struct hrtimer hrtimer; |
| @@ -616,6 +676,7 @@ struct pmu { | |||
| 616 | struct list_head entry; | 676 | struct list_head entry; |
| 617 | 677 | ||
| 618 | struct device *dev; | 678 | struct device *dev; |
| 679 | const struct attribute_group **attr_groups; | ||
| 619 | char *name; | 680 | char *name; |
| 620 | int type; | 681 | int type; |
| 621 | 682 | ||
| @@ -681,6 +742,17 @@ struct pmu { | |||
| 681 | * for each successful ->add() during the transaction. | 742 | * for each successful ->add() during the transaction. |
| 682 | */ | 743 | */ |
| 683 | void (*cancel_txn) (struct pmu *pmu); /* optional */ | 744 | void (*cancel_txn) (struct pmu *pmu); /* optional */ |
| 745 | |||
| 746 | /* | ||
| 747 | * Will return the value for perf_event_mmap_page::index for this event, | ||
| 748 | * if no implementation is provided it will default to: event->hw.idx + 1. | ||
| 749 | */ | ||
| 750 | int (*event_idx) (struct perf_event *event); /*optional */ | ||
| 751 | |||
| 752 | /* | ||
| 753 | * flush branch stack on context-switches (needed in cpu-wide mode) | ||
| 754 | */ | ||
| 755 | void (*flush_branch_stack) (void); | ||
| 684 | }; | 756 | }; |
| 685 | 757 | ||
| 686 | /** | 758 | /** |
| @@ -850,6 +922,9 @@ struct perf_event { | |||
| 850 | #ifdef CONFIG_EVENT_TRACING | 922 | #ifdef CONFIG_EVENT_TRACING |
| 851 | struct ftrace_event_call *tp_event; | 923 | struct ftrace_event_call *tp_event; |
| 852 | struct event_filter *filter; | 924 | struct event_filter *filter; |
| 925 | #ifdef CONFIG_FUNCTION_TRACER | ||
| 926 | struct ftrace_ops ftrace_ops; | ||
| 927 | #endif | ||
| 853 | #endif | 928 | #endif |
| 854 | 929 | ||
| 855 | #ifdef CONFIG_CGROUP_PERF | 930 | #ifdef CONFIG_CGROUP_PERF |
| @@ -911,7 +986,8 @@ struct perf_event_context { | |||
| 911 | u64 parent_gen; | 986 | u64 parent_gen; |
| 912 | u64 generation; | 987 | u64 generation; |
| 913 | int pin_count; | 988 | int pin_count; |
| 914 | int nr_cgroups; /* cgroup events present */ | 989 | int nr_cgroups; /* cgroup evts */ |
| 990 | int nr_branch_stack; /* branch_stack evt */ | ||
| 915 | struct rcu_head rcu_head; | 991 | struct rcu_head rcu_head; |
| 916 | }; | 992 | }; |
| 917 | 993 | ||
| @@ -976,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, | |||
| 976 | extern u64 perf_event_read_value(struct perf_event *event, | 1052 | extern u64 perf_event_read_value(struct perf_event *event, |
| 977 | u64 *enabled, u64 *running); | 1053 | u64 *enabled, u64 *running); |
| 978 | 1054 | ||
| 1055 | |||
| 979 | struct perf_sample_data { | 1056 | struct perf_sample_data { |
| 980 | u64 type; | 1057 | u64 type; |
| 981 | 1058 | ||
| @@ -995,12 +1072,14 @@ struct perf_sample_data { | |||
| 995 | u64 period; | 1072 | u64 period; |
| 996 | struct perf_callchain_entry *callchain; | 1073 | struct perf_callchain_entry *callchain; |
| 997 | struct perf_raw_record *raw; | 1074 | struct perf_raw_record *raw; |
| 1075 | struct perf_branch_stack *br_stack; | ||
| 998 | }; | 1076 | }; |
| 999 | 1077 | ||
| 1000 | static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) | 1078 | static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) |
| 1001 | { | 1079 | { |
| 1002 | data->addr = addr; | 1080 | data->addr = addr; |
| 1003 | data->raw = NULL; | 1081 | data->raw = NULL; |
| 1082 | data->br_stack = NULL; | ||
| 1004 | } | 1083 | } |
| 1005 | 1084 | ||
| 1006 | extern void perf_output_sample(struct perf_output_handle *handle, | 1085 | extern void perf_output_sample(struct perf_output_handle *handle, |
| @@ -1029,7 +1108,7 @@ static inline int is_software_event(struct perf_event *event) | |||
| 1029 | return event->pmu->task_ctx_nr == perf_sw_context; | 1108 | return event->pmu->task_ctx_nr == perf_sw_context; |
| 1030 | } | 1109 | } |
| 1031 | 1110 | ||
| 1032 | extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 1111 | extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
| 1033 | 1112 | ||
| 1034 | extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); | 1113 | extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); |
| 1035 | 1114 | ||
| @@ -1057,7 +1136,7 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) | |||
| 1057 | { | 1136 | { |
| 1058 | struct pt_regs hot_regs; | 1137 | struct pt_regs hot_regs; |
| 1059 | 1138 | ||
| 1060 | if (static_branch(&perf_swevent_enabled[event_id])) { | 1139 | if (static_key_false(&perf_swevent_enabled[event_id])) { |
| 1061 | if (!regs) { | 1140 | if (!regs) { |
| 1062 | perf_fetch_caller_regs(&hot_regs); | 1141 | perf_fetch_caller_regs(&hot_regs); |
| 1063 | regs = &hot_regs; | 1142 | regs = &hot_regs; |
| @@ -1066,12 +1145,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) | |||
| 1066 | } | 1145 | } |
| 1067 | } | 1146 | } |
| 1068 | 1147 | ||
| 1069 | extern struct jump_label_key_deferred perf_sched_events; | 1148 | extern struct static_key_deferred perf_sched_events; |
| 1070 | 1149 | ||
| 1071 | static inline void perf_event_task_sched_in(struct task_struct *prev, | 1150 | static inline void perf_event_task_sched_in(struct task_struct *prev, |
| 1072 | struct task_struct *task) | 1151 | struct task_struct *task) |
| 1073 | { | 1152 | { |
| 1074 | if (static_branch(&perf_sched_events.key)) | 1153 | if (static_key_false(&perf_sched_events.key)) |
| 1075 | __perf_event_task_sched_in(prev, task); | 1154 | __perf_event_task_sched_in(prev, task); |
| 1076 | } | 1155 | } |
| 1077 | 1156 | ||
| @@ -1080,7 +1159,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, | |||
| 1080 | { | 1159 | { |
| 1081 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); | 1160 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); |
| 1082 | 1161 | ||
| 1083 | if (static_branch(&perf_sched_events.key)) | 1162 | if (static_key_false(&perf_sched_events.key)) |
| 1084 | __perf_event_task_sched_out(prev, next); | 1163 | __perf_event_task_sched_out(prev, next); |
| 1085 | } | 1164 | } |
| 1086 | 1165 | ||
| @@ -1139,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data); | |||
| 1139 | # define perf_instruction_pointer(regs) instruction_pointer(regs) | 1218 | # define perf_instruction_pointer(regs) instruction_pointer(regs) |
| 1140 | #endif | 1219 | #endif |
| 1141 | 1220 | ||
| 1221 | static inline bool has_branch_stack(struct perf_event *event) | ||
| 1222 | { | ||
| 1223 | return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; | ||
| 1224 | } | ||
| 1225 | |||
| 1142 | extern int perf_output_begin(struct perf_output_handle *handle, | 1226 | extern int perf_output_begin(struct perf_output_handle *handle, |
| 1143 | struct perf_event *event, unsigned int size); | 1227 | struct perf_event *event, unsigned int size); |
| 1144 | extern void perf_output_end(struct perf_output_handle *handle); | 1228 | extern void perf_output_end(struct perf_output_handle *handle); |
diff --git a/include/linux/static_key.h b/include/linux/static_key.h new file mode 100644 index 000000000000..27bd3f8a0857 --- /dev/null +++ b/include/linux/static_key.h | |||
| @@ -0,0 +1 @@ | |||
| #include <linux/jump_label.h> | |||
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index df0a779c1bbd..bd96ecd0e05c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h | |||
| @@ -17,7 +17,7 @@ | |||
| 17 | #include <linux/errno.h> | 17 | #include <linux/errno.h> |
| 18 | #include <linux/types.h> | 18 | #include <linux/types.h> |
| 19 | #include <linux/rcupdate.h> | 19 | #include <linux/rcupdate.h> |
| 20 | #include <linux/jump_label.h> | 20 | #include <linux/static_key.h> |
| 21 | 21 | ||
| 22 | struct module; | 22 | struct module; |
| 23 | struct tracepoint; | 23 | struct tracepoint; |
| @@ -29,7 +29,7 @@ struct tracepoint_func { | |||
| 29 | 29 | ||
| 30 | struct tracepoint { | 30 | struct tracepoint { |
| 31 | const char *name; /* Tracepoint name */ | 31 | const char *name; /* Tracepoint name */ |
| 32 | struct jump_label_key key; | 32 | struct static_key key; |
| 33 | void (*regfunc)(void); | 33 | void (*regfunc)(void); |
| 34 | void (*unregfunc)(void); | 34 | void (*unregfunc)(void); |
| 35 | struct tracepoint_func __rcu *funcs; | 35 | struct tracepoint_func __rcu *funcs; |
| @@ -114,7 +114,7 @@ static inline void tracepoint_synchronize_unregister(void) | |||
| 114 | * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just | 114 | * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just |
| 115 | * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". | 115 | * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". |
| 116 | */ | 116 | */ |
| 117 | #define __DO_TRACE(tp, proto, args, cond) \ | 117 | #define __DO_TRACE(tp, proto, args, cond, prercu, postrcu) \ |
| 118 | do { \ | 118 | do { \ |
| 119 | struct tracepoint_func *it_func_ptr; \ | 119 | struct tracepoint_func *it_func_ptr; \ |
| 120 | void *it_func; \ | 120 | void *it_func; \ |
| @@ -122,6 +122,7 @@ static inline void tracepoint_synchronize_unregister(void) | |||
| 122 | \ | 122 | \ |
| 123 | if (!(cond)) \ | 123 | if (!(cond)) \ |
| 124 | return; \ | 124 | return; \ |
| 125 | prercu; \ | ||
| 125 | rcu_read_lock_sched_notrace(); \ | 126 | rcu_read_lock_sched_notrace(); \ |
| 126 | it_func_ptr = rcu_dereference_sched((tp)->funcs); \ | 127 | it_func_ptr = rcu_dereference_sched((tp)->funcs); \ |
| 127 | if (it_func_ptr) { \ | 128 | if (it_func_ptr) { \ |
| @@ -132,6 +133,7 @@ static inline void tracepoint_synchronize_unregister(void) | |||
| 132 | } while ((++it_func_ptr)->func); \ | 133 | } while ((++it_func_ptr)->func); \ |
| 133 | } \ | 134 | } \ |
| 134 | rcu_read_unlock_sched_notrace(); \ | 135 | rcu_read_unlock_sched_notrace(); \ |
| 136 | postrcu; \ | ||
| 135 | } while (0) | 137 | } while (0) |
| 136 | 138 | ||
| 137 | /* | 139 | /* |
| @@ -139,15 +141,25 @@ static inline void tracepoint_synchronize_unregister(void) | |||
| 139 | * not add unwanted padding between the beginning of the section and the | 141 | * not add unwanted padding between the beginning of the section and the |
| 140 | * structure. Force alignment to the same alignment as the section start. | 142 | * structure. Force alignment to the same alignment as the section start. |
| 141 | */ | 143 | */ |
| 142 | #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ | 144 | #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ |
| 143 | extern struct tracepoint __tracepoint_##name; \ | 145 | extern struct tracepoint __tracepoint_##name; \ |
| 144 | static inline void trace_##name(proto) \ | 146 | static inline void trace_##name(proto) \ |
| 145 | { \ | 147 | { \ |
| 148 | if (static_key_false(&__tracepoint_##name.key)) \ | ||
| 149 | __DO_TRACE(&__tracepoint_##name, \ | ||
| 150 | TP_PROTO(data_proto), \ | ||
| 151 | TP_ARGS(data_args), \ | ||
| 152 | TP_CONDITION(cond),,); \ | ||
| 153 | } \ | ||
| 154 | static inline void trace_##name##_rcuidle(proto) \ | ||
| 155 | { \ | ||
| 146 | if (static_branch(&__tracepoint_##name.key)) \ | 156 | if (static_branch(&__tracepoint_##name.key)) \ |
| 147 | __DO_TRACE(&__tracepoint_##name, \ | 157 | __DO_TRACE(&__tracepoint_##name, \ |
| 148 | TP_PROTO(data_proto), \ | 158 | TP_PROTO(data_proto), \ |
| 149 | TP_ARGS(data_args), \ | 159 | TP_ARGS(data_args), \ |
| 150 | TP_CONDITION(cond)); \ | 160 | TP_CONDITION(cond), \ |
| 161 | rcu_idle_exit(), \ | ||
| 162 | rcu_idle_enter()); \ | ||
| 151 | } \ | 163 | } \ |
| 152 | static inline int \ | 164 | static inline int \ |
| 153 | register_trace_##name(void (*probe)(data_proto), void *data) \ | 165 | register_trace_##name(void (*probe)(data_proto), void *data) \ |
| @@ -176,7 +188,7 @@ static inline void tracepoint_synchronize_unregister(void) | |||
| 176 | __attribute__((section("__tracepoints_strings"))) = #name; \ | 188 | __attribute__((section("__tracepoints_strings"))) = #name; \ |
| 177 | struct tracepoint __tracepoint_##name \ | 189 | struct tracepoint __tracepoint_##name \ |
| 178 | __attribute__((section("__tracepoints"))) = \ | 190 | __attribute__((section("__tracepoints"))) = \ |
| 179 | { __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\ | 191 | { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\ |
| 180 | static struct tracepoint * const __tracepoint_ptr_##name __used \ | 192 | static struct tracepoint * const __tracepoint_ptr_##name __used \ |
| 181 | __attribute__((section("__tracepoints_ptrs"))) = \ | 193 | __attribute__((section("__tracepoints_ptrs"))) = \ |
| 182 | &__tracepoint_##name; | 194 | &__tracepoint_##name; |
| @@ -190,9 +202,11 @@ static inline void tracepoint_synchronize_unregister(void) | |||
| 190 | EXPORT_SYMBOL(__tracepoint_##name) | 202 | EXPORT_SYMBOL(__tracepoint_##name) |
| 191 | 203 | ||
| 192 | #else /* !CONFIG_TRACEPOINTS */ | 204 | #else /* !CONFIG_TRACEPOINTS */ |
| 193 | #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ | 205 | #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ |
| 194 | static inline void trace_##name(proto) \ | 206 | static inline void trace_##name(proto) \ |
| 195 | { } \ | 207 | { } \ |
| 208 | static inline void trace_##name##_rcuidle(proto) \ | ||
| 209 | { } \ | ||
| 196 | static inline int \ | 210 | static inline int \ |
| 197 | register_trace_##name(void (*probe)(data_proto), \ | 211 | register_trace_##name(void (*probe)(data_proto), \ |
| 198 | void *data) \ | 212 | void *data) \ |
diff --git a/include/net/sock.h b/include/net/sock.h index 91c1c8baf020..dcde2d9268cd 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
| @@ -55,7 +55,7 @@ | |||
| 55 | #include <linux/uaccess.h> | 55 | #include <linux/uaccess.h> |
| 56 | #include <linux/memcontrol.h> | 56 | #include <linux/memcontrol.h> |
| 57 | #include <linux/res_counter.h> | 57 | #include <linux/res_counter.h> |
| 58 | #include <linux/jump_label.h> | 58 | #include <linux/static_key.h> |
| 59 | 59 | ||
| 60 | #include <linux/filter.h> | 60 | #include <linux/filter.h> |
| 61 | #include <linux/rculist_nulls.h> | 61 | #include <linux/rculist_nulls.h> |
| @@ -924,13 +924,13 @@ inline void sk_refcnt_debug_release(const struct sock *sk) | |||
| 924 | #endif /* SOCK_REFCNT_DEBUG */ | 924 | #endif /* SOCK_REFCNT_DEBUG */ |
| 925 | 925 | ||
| 926 | #if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) | 926 | #if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) |
| 927 | extern struct jump_label_key memcg_socket_limit_enabled; | 927 | extern struct static_key memcg_socket_limit_enabled; |
| 928 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, | 928 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, |
| 929 | struct cg_proto *cg_proto) | 929 | struct cg_proto *cg_proto) |
| 930 | { | 930 | { |
| 931 | return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); | 931 | return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); |
| 932 | } | 932 | } |
| 933 | #define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled) | 933 | #define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled) |
| 934 | #else | 934 | #else |
| 935 | #define mem_cgroup_sockets_enabled 0 | 935 | #define mem_cgroup_sockets_enabled 0 |
| 936 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, | 936 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, |
diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 1bcc2a8c00e2..14b38940062b 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h | |||
| @@ -151,6 +151,8 @@ enum { | |||
| 151 | events get removed */ | 151 | events get removed */ |
| 152 | static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {}; | 152 | static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {}; |
| 153 | static inline void trace_power_end(u64 cpuid) {}; | 153 | static inline void trace_power_end(u64 cpuid) {}; |
| 154 | static inline void trace_power_start_rcuidle(u64 type, u64 state, u64 cpuid) {}; | ||
| 155 | static inline void trace_power_end_rcuidle(u64 cpuid) {}; | ||
| 154 | static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {}; | 156 | static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {}; |
| 155 | #endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */ | 157 | #endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */ |
| 156 | 158 | ||
diff --git a/include/trace/events/printk.h b/include/trace/events/printk.h new file mode 100644 index 000000000000..94ec79cc011a --- /dev/null +++ b/include/trace/events/printk.h | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | #undef TRACE_SYSTEM | ||
| 2 | #define TRACE_SYSTEM printk | ||
| 3 | |||
| 4 | #if !defined(_TRACE_PRINTK_H) || defined(TRACE_HEADER_MULTI_READ) | ||
| 5 | #define _TRACE_PRINTK_H | ||
| 6 | |||
| 7 | #include <linux/tracepoint.h> | ||
| 8 | |||
| 9 | TRACE_EVENT_CONDITION(console, | ||
| 10 | TP_PROTO(const char *log_buf, unsigned start, unsigned end, | ||
| 11 | unsigned log_buf_len), | ||
| 12 | |||
| 13 | TP_ARGS(log_buf, start, end, log_buf_len), | ||
| 14 | |||
| 15 | TP_CONDITION(start != end), | ||
| 16 | |||
| 17 | TP_STRUCT__entry( | ||
| 18 | __dynamic_array(char, msg, end - start + 1) | ||
| 19 | ), | ||
| 20 | |||
| 21 | TP_fast_assign( | ||
| 22 | if ((start & (log_buf_len - 1)) > (end & (log_buf_len - 1))) { | ||
| 23 | memcpy(__get_dynamic_array(msg), | ||
| 24 | log_buf + (start & (log_buf_len - 1)), | ||
| 25 | log_buf_len - (start & (log_buf_len - 1))); | ||
| 26 | memcpy((char *)__get_dynamic_array(msg) + | ||
| 27 | log_buf_len - (start & (log_buf_len - 1)), | ||
| 28 | log_buf, end & (log_buf_len - 1)); | ||
| 29 | } else | ||
| 30 | memcpy(__get_dynamic_array(msg), | ||
| 31 | log_buf + (start & (log_buf_len - 1)), | ||
| 32 | end - start); | ||
| 33 | ((char *)__get_dynamic_array(msg))[end - start] = 0; | ||
| 34 | ), | ||
| 35 | |||
| 36 | TP_printk("%s", __get_str(msg)) | ||
| 37 | ); | ||
| 38 | #endif /* _TRACE_PRINTK_H */ | ||
| 39 | |||
| 40 | /* This part must be outside protection */ | ||
| 41 | #include <trace/define_trace.h> | ||
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index e33ed1bfa113..fbc7b1ad929b 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
| 8 | #include <linux/tracepoint.h> | 8 | #include <linux/tracepoint.h> |
| 9 | #include <linux/binfmts.h> | ||
| 9 | 10 | ||
| 10 | /* | 11 | /* |
| 11 | * Tracepoint for calling kthread_stop, performed to end a kthread: | 12 | * Tracepoint for calling kthread_stop, performed to end a kthread: |
| @@ -276,6 +277,32 @@ TRACE_EVENT(sched_process_fork, | |||
| 276 | ); | 277 | ); |
| 277 | 278 | ||
| 278 | /* | 279 | /* |
| 280 | * Tracepoint for exec: | ||
| 281 | */ | ||
| 282 | TRACE_EVENT(sched_process_exec, | ||
| 283 | |||
| 284 | TP_PROTO(struct task_struct *p, pid_t old_pid, | ||
| 285 | struct linux_binprm *bprm), | ||
| 286 | |||
| 287 | TP_ARGS(p, old_pid, bprm), | ||
| 288 | |||
| 289 | TP_STRUCT__entry( | ||
| 290 | __string( filename, bprm->filename ) | ||
| 291 | __field( pid_t, pid ) | ||
| 292 | __field( pid_t, old_pid ) | ||
| 293 | ), | ||
| 294 | |||
| 295 | TP_fast_assign( | ||
| 296 | __assign_str(filename, bprm->filename); | ||
| 297 | __entry->pid = p->pid; | ||
| 298 | __entry->old_pid = p->pid; | ||
| 299 | ), | ||
| 300 | |||
| 301 | TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), | ||
| 302 | __entry->pid, __entry->old_pid) | ||
| 303 | ); | ||
| 304 | |||
| 305 | /* | ||
| 279 | * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE | 306 | * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE |
| 280 | * adding sched_stat support to SCHED_FIFO/RR would be welcome. | 307 | * adding sched_stat support to SCHED_FIFO/RR would be welcome. |
| 281 | */ | 308 | */ |
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index 17df43464df0..39a8a430d90f 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h | |||
| @@ -23,11 +23,23 @@ | |||
| 23 | } \ | 23 | } \ |
| 24 | } while (0) | 24 | } while (0) |
| 25 | 25 | ||
| 26 | #ifndef TRACE_HEADER_MULTI_READ | ||
| 27 | enum { | ||
| 28 | TRACE_SIGNAL_DELIVERED, | ||
| 29 | TRACE_SIGNAL_IGNORED, | ||
| 30 | TRACE_SIGNAL_ALREADY_PENDING, | ||
| 31 | TRACE_SIGNAL_OVERFLOW_FAIL, | ||
| 32 | TRACE_SIGNAL_LOSE_INFO, | ||
| 33 | }; | ||
| 34 | #endif | ||
| 35 | |||
| 26 | /** | 36 | /** |
| 27 | * signal_generate - called when a signal is generated | 37 | * signal_generate - called when a signal is generated |
| 28 | * @sig: signal number | 38 | * @sig: signal number |
| 29 | * @info: pointer to struct siginfo | 39 | * @info: pointer to struct siginfo |
| 30 | * @task: pointer to struct task_struct | 40 | * @task: pointer to struct task_struct |
| 41 | * @group: shared or private | ||
| 42 | * @result: TRACE_SIGNAL_* | ||
| 31 | * | 43 | * |
| 32 | * Current process sends a 'sig' signal to 'task' process with | 44 | * Current process sends a 'sig' signal to 'task' process with |
| 33 | * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, | 45 | * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, |
| @@ -37,9 +49,10 @@ | |||
| 37 | */ | 49 | */ |
| 38 | TRACE_EVENT(signal_generate, | 50 | TRACE_EVENT(signal_generate, |
| 39 | 51 | ||
| 40 | TP_PROTO(int sig, struct siginfo *info, struct task_struct *task), | 52 | TP_PROTO(int sig, struct siginfo *info, struct task_struct *task, |
| 53 | int group, int result), | ||
| 41 | 54 | ||
| 42 | TP_ARGS(sig, info, task), | 55 | TP_ARGS(sig, info, task, group, result), |
| 43 | 56 | ||
| 44 | TP_STRUCT__entry( | 57 | TP_STRUCT__entry( |
| 45 | __field( int, sig ) | 58 | __field( int, sig ) |
| @@ -47,6 +60,8 @@ TRACE_EVENT(signal_generate, | |||
| 47 | __field( int, code ) | 60 | __field( int, code ) |
| 48 | __array( char, comm, TASK_COMM_LEN ) | 61 | __array( char, comm, TASK_COMM_LEN ) |
| 49 | __field( pid_t, pid ) | 62 | __field( pid_t, pid ) |
| 63 | __field( int, group ) | ||
| 64 | __field( int, result ) | ||
| 50 | ), | 65 | ), |
| 51 | 66 | ||
| 52 | TP_fast_assign( | 67 | TP_fast_assign( |
| @@ -54,11 +69,14 @@ TRACE_EVENT(signal_generate, | |||
| 54 | TP_STORE_SIGINFO(__entry, info); | 69 | TP_STORE_SIGINFO(__entry, info); |
| 55 | memcpy(__entry->comm, task->comm, TASK_COMM_LEN); | 70 | memcpy(__entry->comm, task->comm, TASK_COMM_LEN); |
| 56 | __entry->pid = task->pid; | 71 | __entry->pid = task->pid; |
| 72 | __entry->group = group; | ||
| 73 | __entry->result = result; | ||
| 57 | ), | 74 | ), |
| 58 | 75 | ||
| 59 | TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d", | 76 | TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d", |
| 60 | __entry->sig, __entry->errno, __entry->code, | 77 | __entry->sig, __entry->errno, __entry->code, |
| 61 | __entry->comm, __entry->pid) | 78 | __entry->comm, __entry->pid, __entry->group, |
| 79 | __entry->result) | ||
| 62 | ); | 80 | ); |
| 63 | 81 | ||
| 64 | /** | 82 | /** |
| @@ -101,65 +119,6 @@ TRACE_EVENT(signal_deliver, | |||
| 101 | __entry->sa_handler, __entry->sa_flags) | 119 | __entry->sa_handler, __entry->sa_flags) |
| 102 | ); | 120 | ); |
| 103 | 121 | ||
| 104 | DECLARE_EVENT_CLASS(signal_queue_overflow, | ||
| 105 | |||
| 106 | TP_PROTO(int sig, int group, struct siginfo *info), | ||
| 107 | |||
| 108 | TP_ARGS(sig, group, info), | ||
| 109 | |||
| 110 | TP_STRUCT__entry( | ||
| 111 | __field( int, sig ) | ||
| 112 | __field( int, group ) | ||
| 113 | __field( int, errno ) | ||
| 114 | __field( int, code ) | ||
| 115 | ), | ||
| 116 | |||
| 117 | TP_fast_assign( | ||
| 118 | __entry->sig = sig; | ||
| 119 | __entry->group = group; | ||
| 120 | TP_STORE_SIGINFO(__entry, info); | ||
| 121 | ), | ||
| 122 | |||
| 123 | TP_printk("sig=%d group=%d errno=%d code=%d", | ||
| 124 | __entry->sig, __entry->group, __entry->errno, __entry->code) | ||
| 125 | ); | ||
| 126 | |||
| 127 | /** | ||
| 128 | * signal_overflow_fail - called when signal queue is overflow | ||
| 129 | * @sig: signal number | ||
| 130 | * @group: signal to process group or not (bool) | ||
| 131 | * @info: pointer to struct siginfo | ||
| 132 | * | ||
| 133 | * Kernel fails to generate 'sig' signal with 'info' siginfo, because | ||
| 134 | * siginfo queue is overflow, and the signal is dropped. | ||
| 135 | * 'group' is not 0 if the signal will be sent to a process group. | ||
| 136 | * 'sig' is always one of RT signals. | ||
| 137 | */ | ||
| 138 | DEFINE_EVENT(signal_queue_overflow, signal_overflow_fail, | ||
| 139 | |||
| 140 | TP_PROTO(int sig, int group, struct siginfo *info), | ||
| 141 | |||
| 142 | TP_ARGS(sig, group, info) | ||
| 143 | ); | ||
| 144 | |||
| 145 | /** | ||
| 146 | * signal_lose_info - called when siginfo is lost | ||
| 147 | * @sig: signal number | ||
| 148 | * @group: signal to process group or not (bool) | ||
| 149 | * @info: pointer to struct siginfo | ||
| 150 | * | ||
| 151 | * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo | ||
| 152 | * queue is overflow. | ||
| 153 | * 'group' is not 0 if the signal will be sent to a process group. | ||
| 154 | * 'sig' is always one of non-RT signals. | ||
| 155 | */ | ||
| 156 | DEFINE_EVENT(signal_queue_overflow, signal_lose_info, | ||
| 157 | |||
| 158 | TP_PROTO(int sig, int group, struct siginfo *info), | ||
| 159 | |||
| 160 | TP_ARGS(sig, group, info) | ||
| 161 | ); | ||
| 162 | |||
| 163 | #endif /* _TRACE_SIGNAL_H */ | 122 | #endif /* _TRACE_SIGNAL_H */ |
| 164 | 123 | ||
| 165 | /* This part must be outside protection */ | 124 | /* This part must be outside protection */ |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 1b5c081d8b9f..c61234b1a988 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info) | |||
| 118 | PERF_FLAG_FD_OUTPUT |\ | 118 | PERF_FLAG_FD_OUTPUT |\ |
| 119 | PERF_FLAG_PID_CGROUP) | 119 | PERF_FLAG_PID_CGROUP) |
| 120 | 120 | ||
| 121 | /* | ||
| 122 | * branch priv levels that need permission checks | ||
| 123 | */ | ||
| 124 | #define PERF_SAMPLE_BRANCH_PERM_PLM \ | ||
| 125 | (PERF_SAMPLE_BRANCH_KERNEL |\ | ||
| 126 | PERF_SAMPLE_BRANCH_HV) | ||
| 127 | |||
| 121 | enum event_type_t { | 128 | enum event_type_t { |
| 122 | EVENT_FLEXIBLE = 0x1, | 129 | EVENT_FLEXIBLE = 0x1, |
| 123 | EVENT_PINNED = 0x2, | 130 | EVENT_PINNED = 0x2, |
| @@ -128,8 +135,9 @@ enum event_type_t { | |||
| 128 | * perf_sched_events : >0 events exist | 135 | * perf_sched_events : >0 events exist |
| 129 | * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu | 136 | * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu |
| 130 | */ | 137 | */ |
| 131 | struct jump_label_key_deferred perf_sched_events __read_mostly; | 138 | struct static_key_deferred perf_sched_events __read_mostly; |
| 132 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); | 139 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); |
| 140 | static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); | ||
| 133 | 141 | ||
| 134 | static atomic_t nr_mmap_events __read_mostly; | 142 | static atomic_t nr_mmap_events __read_mostly; |
| 135 | static atomic_t nr_comm_events __read_mostly; | 143 | static atomic_t nr_comm_events __read_mostly; |
| @@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 881 | if (is_cgroup_event(event)) | 889 | if (is_cgroup_event(event)) |
| 882 | ctx->nr_cgroups++; | 890 | ctx->nr_cgroups++; |
| 883 | 891 | ||
| 892 | if (has_branch_stack(event)) | ||
| 893 | ctx->nr_branch_stack++; | ||
| 894 | |||
| 884 | list_add_rcu(&event->event_entry, &ctx->event_list); | 895 | list_add_rcu(&event->event_entry, &ctx->event_list); |
| 885 | if (!ctx->nr_events) | 896 | if (!ctx->nr_events) |
| 886 | perf_pmu_rotate_start(ctx->pmu); | 897 | perf_pmu_rotate_start(ctx->pmu); |
| @@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 1020 | cpuctx->cgrp = NULL; | 1031 | cpuctx->cgrp = NULL; |
| 1021 | } | 1032 | } |
| 1022 | 1033 | ||
| 1034 | if (has_branch_stack(event)) | ||
| 1035 | ctx->nr_branch_stack--; | ||
| 1036 | |||
| 1023 | ctx->nr_events--; | 1037 | ctx->nr_events--; |
| 1024 | if (event->attr.inherit_stat) | 1038 | if (event->attr.inherit_stat) |
| 1025 | ctx->nr_stat--; | 1039 | ctx->nr_stat--; |
| @@ -2195,6 +2209,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
| 2195 | } | 2209 | } |
| 2196 | 2210 | ||
| 2197 | /* | 2211 | /* |
| 2212 | * When sampling the branck stack in system-wide, it may be necessary | ||
| 2213 | * to flush the stack on context switch. This happens when the branch | ||
| 2214 | * stack does not tag its entries with the pid of the current task. | ||
| 2215 | * Otherwise it becomes impossible to associate a branch entry with a | ||
| 2216 | * task. This ambiguity is more likely to appear when the branch stack | ||
| 2217 | * supports priv level filtering and the user sets it to monitor only | ||
| 2218 | * at the user level (which could be a useful measurement in system-wide | ||
| 2219 | * mode). In that case, the risk is high of having a branch stack with | ||
| 2220 | * branch from multiple tasks. Flushing may mean dropping the existing | ||
| 2221 | * entries or stashing them somewhere in the PMU specific code layer. | ||
| 2222 | * | ||
| 2223 | * This function provides the context switch callback to the lower code | ||
| 2224 | * layer. It is invoked ONLY when there is at least one system-wide context | ||
| 2225 | * with at least one active event using taken branch sampling. | ||
| 2226 | */ | ||
| 2227 | static void perf_branch_stack_sched_in(struct task_struct *prev, | ||
| 2228 | struct task_struct *task) | ||
| 2229 | { | ||
| 2230 | struct perf_cpu_context *cpuctx; | ||
| 2231 | struct pmu *pmu; | ||
| 2232 | unsigned long flags; | ||
| 2233 | |||
| 2234 | /* no need to flush branch stack if not changing task */ | ||
| 2235 | if (prev == task) | ||
| 2236 | return; | ||
| 2237 | |||
| 2238 | local_irq_save(flags); | ||
| 2239 | |||
| 2240 | rcu_read_lock(); | ||
| 2241 | |||
| 2242 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
| 2243 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | ||
| 2244 | |||
| 2245 | /* | ||
| 2246 | * check if the context has at least one | ||
| 2247 | * event using PERF_SAMPLE_BRANCH_STACK | ||
| 2248 | */ | ||
| 2249 | if (cpuctx->ctx.nr_branch_stack > 0 | ||
| 2250 | && pmu->flush_branch_stack) { | ||
| 2251 | |||
| 2252 | pmu = cpuctx->ctx.pmu; | ||
| 2253 | |||
| 2254 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
| 2255 | |||
| 2256 | perf_pmu_disable(pmu); | ||
| 2257 | |||
| 2258 | pmu->flush_branch_stack(); | ||
| 2259 | |||
| 2260 | perf_pmu_enable(pmu); | ||
| 2261 | |||
| 2262 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
| 2263 | } | ||
| 2264 | } | ||
| 2265 | |||
| 2266 | rcu_read_unlock(); | ||
| 2267 | |||
| 2268 | local_irq_restore(flags); | ||
| 2269 | } | ||
| 2270 | |||
| 2271 | /* | ||
| 2198 | * Called from scheduler to add the events of the current task | 2272 | * Called from scheduler to add the events of the current task |
| 2199 | * with interrupts disabled. | 2273 | * with interrupts disabled. |
| 2200 | * | 2274 | * |
| @@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev, | |||
| 2225 | */ | 2299 | */ |
| 2226 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) | 2300 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) |
| 2227 | perf_cgroup_sched_in(prev, task); | 2301 | perf_cgroup_sched_in(prev, task); |
| 2302 | |||
| 2303 | /* check for system-wide branch_stack events */ | ||
| 2304 | if (atomic_read(&__get_cpu_var(perf_branch_stack_events))) | ||
| 2305 | perf_branch_stack_sched_in(prev, task); | ||
| 2228 | } | 2306 | } |
| 2229 | 2307 | ||
| 2230 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | 2308 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) |
| @@ -2778,7 +2856,7 @@ static void free_event(struct perf_event *event) | |||
| 2778 | 2856 | ||
| 2779 | if (!event->parent) { | 2857 | if (!event->parent) { |
| 2780 | if (event->attach_state & PERF_ATTACH_TASK) | 2858 | if (event->attach_state & PERF_ATTACH_TASK) |
| 2781 | jump_label_dec_deferred(&perf_sched_events); | 2859 | static_key_slow_dec_deferred(&perf_sched_events); |
| 2782 | if (event->attr.mmap || event->attr.mmap_data) | 2860 | if (event->attr.mmap || event->attr.mmap_data) |
| 2783 | atomic_dec(&nr_mmap_events); | 2861 | atomic_dec(&nr_mmap_events); |
| 2784 | if (event->attr.comm) | 2862 | if (event->attr.comm) |
| @@ -2789,7 +2867,15 @@ static void free_event(struct perf_event *event) | |||
| 2789 | put_callchain_buffers(); | 2867 | put_callchain_buffers(); |
| 2790 | if (is_cgroup_event(event)) { | 2868 | if (is_cgroup_event(event)) { |
| 2791 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); | 2869 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); |
| 2792 | jump_label_dec_deferred(&perf_sched_events); | 2870 | static_key_slow_dec_deferred(&perf_sched_events); |
| 2871 | } | ||
| 2872 | |||
| 2873 | if (has_branch_stack(event)) { | ||
| 2874 | static_key_slow_dec_deferred(&perf_sched_events); | ||
| 2875 | /* is system-wide event */ | ||
| 2876 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
| 2877 | atomic_dec(&per_cpu(perf_branch_stack_events, | ||
| 2878 | event->cpu)); | ||
| 2793 | } | 2879 | } |
| 2794 | } | 2880 | } |
| 2795 | 2881 | ||
| @@ -3238,10 +3324,6 @@ int perf_event_task_disable(void) | |||
| 3238 | return 0; | 3324 | return 0; |
| 3239 | } | 3325 | } |
| 3240 | 3326 | ||
| 3241 | #ifndef PERF_EVENT_INDEX_OFFSET | ||
| 3242 | # define PERF_EVENT_INDEX_OFFSET 0 | ||
| 3243 | #endif | ||
| 3244 | |||
| 3245 | static int perf_event_index(struct perf_event *event) | 3327 | static int perf_event_index(struct perf_event *event) |
| 3246 | { | 3328 | { |
| 3247 | if (event->hw.state & PERF_HES_STOPPED) | 3329 | if (event->hw.state & PERF_HES_STOPPED) |
| @@ -3250,21 +3332,26 @@ static int perf_event_index(struct perf_event *event) | |||
| 3250 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 3332 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
| 3251 | return 0; | 3333 | return 0; |
| 3252 | 3334 | ||
| 3253 | return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; | 3335 | return event->pmu->event_idx(event); |
| 3254 | } | 3336 | } |
| 3255 | 3337 | ||
| 3256 | static void calc_timer_values(struct perf_event *event, | 3338 | static void calc_timer_values(struct perf_event *event, |
| 3339 | u64 *now, | ||
| 3257 | u64 *enabled, | 3340 | u64 *enabled, |
| 3258 | u64 *running) | 3341 | u64 *running) |
| 3259 | { | 3342 | { |
| 3260 | u64 now, ctx_time; | 3343 | u64 ctx_time; |
| 3261 | 3344 | ||
| 3262 | now = perf_clock(); | 3345 | *now = perf_clock(); |
| 3263 | ctx_time = event->shadow_ctx_time + now; | 3346 | ctx_time = event->shadow_ctx_time + *now; |
| 3264 | *enabled = ctx_time - event->tstamp_enabled; | 3347 | *enabled = ctx_time - event->tstamp_enabled; |
| 3265 | *running = ctx_time - event->tstamp_running; | 3348 | *running = ctx_time - event->tstamp_running; |
| 3266 | } | 3349 | } |
| 3267 | 3350 | ||
| 3351 | void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | ||
| 3352 | { | ||
| 3353 | } | ||
| 3354 | |||
| 3268 | /* | 3355 | /* |
| 3269 | * Callers need to ensure there can be no nesting of this function, otherwise | 3356 | * Callers need to ensure there can be no nesting of this function, otherwise |
| 3270 | * the seqlock logic goes bad. We can not serialize this because the arch | 3357 | * the seqlock logic goes bad. We can not serialize this because the arch |
| @@ -3274,7 +3361,7 @@ void perf_event_update_userpage(struct perf_event *event) | |||
| 3274 | { | 3361 | { |
| 3275 | struct perf_event_mmap_page *userpg; | 3362 | struct perf_event_mmap_page *userpg; |
| 3276 | struct ring_buffer *rb; | 3363 | struct ring_buffer *rb; |
| 3277 | u64 enabled, running; | 3364 | u64 enabled, running, now; |
| 3278 | 3365 | ||
| 3279 | rcu_read_lock(); | 3366 | rcu_read_lock(); |
| 3280 | /* | 3367 | /* |
| @@ -3286,7 +3373,7 @@ void perf_event_update_userpage(struct perf_event *event) | |||
| 3286 | * because of locking issue as we can be called in | 3373 | * because of locking issue as we can be called in |
| 3287 | * NMI context | 3374 | * NMI context |
| 3288 | */ | 3375 | */ |
| 3289 | calc_timer_values(event, &enabled, &running); | 3376 | calc_timer_values(event, &now, &enabled, &running); |
| 3290 | rb = rcu_dereference(event->rb); | 3377 | rb = rcu_dereference(event->rb); |
| 3291 | if (!rb) | 3378 | if (!rb) |
| 3292 | goto unlock; | 3379 | goto unlock; |
| @@ -3302,7 +3389,7 @@ void perf_event_update_userpage(struct perf_event *event) | |||
| 3302 | barrier(); | 3389 | barrier(); |
| 3303 | userpg->index = perf_event_index(event); | 3390 | userpg->index = perf_event_index(event); |
| 3304 | userpg->offset = perf_event_count(event); | 3391 | userpg->offset = perf_event_count(event); |
| 3305 | if (event->state == PERF_EVENT_STATE_ACTIVE) | 3392 | if (userpg->index) |
| 3306 | userpg->offset -= local64_read(&event->hw.prev_count); | 3393 | userpg->offset -= local64_read(&event->hw.prev_count); |
| 3307 | 3394 | ||
| 3308 | userpg->time_enabled = enabled + | 3395 | userpg->time_enabled = enabled + |
| @@ -3311,6 +3398,8 @@ void perf_event_update_userpage(struct perf_event *event) | |||
| 3311 | userpg->time_running = running + | 3398 | userpg->time_running = running + |
| 3312 | atomic64_read(&event->child_total_time_running); | 3399 | atomic64_read(&event->child_total_time_running); |
| 3313 | 3400 | ||
| 3401 | perf_update_user_clock(userpg, now); | ||
| 3402 | |||
| 3314 | barrier(); | 3403 | barrier(); |
| 3315 | ++userpg->lock; | 3404 | ++userpg->lock; |
| 3316 | preempt_enable(); | 3405 | preempt_enable(); |
| @@ -3568,6 +3657,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 3568 | event->mmap_user = get_current_user(); | 3657 | event->mmap_user = get_current_user(); |
| 3569 | vma->vm_mm->pinned_vm += event->mmap_locked; | 3658 | vma->vm_mm->pinned_vm += event->mmap_locked; |
| 3570 | 3659 | ||
| 3660 | perf_event_update_userpage(event); | ||
| 3661 | |||
| 3571 | unlock: | 3662 | unlock: |
| 3572 | if (!ret) | 3663 | if (!ret) |
| 3573 | atomic_inc(&event->mmap_count); | 3664 | atomic_inc(&event->mmap_count); |
| @@ -3799,7 +3890,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
| 3799 | static void perf_output_read(struct perf_output_handle *handle, | 3890 | static void perf_output_read(struct perf_output_handle *handle, |
| 3800 | struct perf_event *event) | 3891 | struct perf_event *event) |
| 3801 | { | 3892 | { |
| 3802 | u64 enabled = 0, running = 0; | 3893 | u64 enabled = 0, running = 0, now; |
| 3803 | u64 read_format = event->attr.read_format; | 3894 | u64 read_format = event->attr.read_format; |
| 3804 | 3895 | ||
| 3805 | /* | 3896 | /* |
| @@ -3812,7 +3903,7 @@ static void perf_output_read(struct perf_output_handle *handle, | |||
| 3812 | * NMI context | 3903 | * NMI context |
| 3813 | */ | 3904 | */ |
| 3814 | if (read_format & PERF_FORMAT_TOTAL_TIMES) | 3905 | if (read_format & PERF_FORMAT_TOTAL_TIMES) |
| 3815 | calc_timer_values(event, &enabled, &running); | 3906 | calc_timer_values(event, &now, &enabled, &running); |
| 3816 | 3907 | ||
| 3817 | if (event->attr.read_format & PERF_FORMAT_GROUP) | 3908 | if (event->attr.read_format & PERF_FORMAT_GROUP) |
| 3818 | perf_output_read_group(handle, event, enabled, running); | 3909 | perf_output_read_group(handle, event, enabled, running); |
| @@ -3902,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
| 3902 | } | 3993 | } |
| 3903 | } | 3994 | } |
| 3904 | } | 3995 | } |
| 3996 | |||
| 3997 | if (sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
| 3998 | if (data->br_stack) { | ||
| 3999 | size_t size; | ||
| 4000 | |||
| 4001 | size = data->br_stack->nr | ||
| 4002 | * sizeof(struct perf_branch_entry); | ||
| 4003 | |||
| 4004 | perf_output_put(handle, data->br_stack->nr); | ||
| 4005 | perf_output_copy(handle, data->br_stack->entries, size); | ||
| 4006 | } else { | ||
| 4007 | /* | ||
| 4008 | * we always store at least the value of nr | ||
| 4009 | */ | ||
| 4010 | u64 nr = 0; | ||
| 4011 | perf_output_put(handle, nr); | ||
| 4012 | } | ||
| 4013 | } | ||
| 3905 | } | 4014 | } |
| 3906 | 4015 | ||
| 3907 | void perf_prepare_sample(struct perf_event_header *header, | 4016 | void perf_prepare_sample(struct perf_event_header *header, |
| @@ -3944,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
| 3944 | WARN_ON_ONCE(size & (sizeof(u64)-1)); | 4053 | WARN_ON_ONCE(size & (sizeof(u64)-1)); |
| 3945 | header->size += size; | 4054 | header->size += size; |
| 3946 | } | 4055 | } |
| 4056 | |||
| 4057 | if (sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
| 4058 | int size = sizeof(u64); /* nr */ | ||
| 4059 | if (data->br_stack) { | ||
| 4060 | size += data->br_stack->nr | ||
| 4061 | * sizeof(struct perf_branch_entry); | ||
| 4062 | } | ||
| 4063 | header->size += size; | ||
| 4064 | } | ||
| 3947 | } | 4065 | } |
| 3948 | 4066 | ||
| 3949 | static void perf_event_output(struct perf_event *event, | 4067 | static void perf_event_output(struct perf_event *event, |
| @@ -4986,7 +5104,7 @@ fail: | |||
| 4986 | return err; | 5104 | return err; |
| 4987 | } | 5105 | } |
| 4988 | 5106 | ||
| 4989 | struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 5107 | struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
| 4990 | 5108 | ||
| 4991 | static void sw_perf_event_destroy(struct perf_event *event) | 5109 | static void sw_perf_event_destroy(struct perf_event *event) |
| 4992 | { | 5110 | { |
| @@ -4994,7 +5112,7 @@ static void sw_perf_event_destroy(struct perf_event *event) | |||
| 4994 | 5112 | ||
| 4995 | WARN_ON(event->parent); | 5113 | WARN_ON(event->parent); |
| 4996 | 5114 | ||
| 4997 | jump_label_dec(&perf_swevent_enabled[event_id]); | 5115 | static_key_slow_dec(&perf_swevent_enabled[event_id]); |
| 4998 | swevent_hlist_put(event); | 5116 | swevent_hlist_put(event); |
| 4999 | } | 5117 | } |
| 5000 | 5118 | ||
| @@ -5005,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event) | |||
| 5005 | if (event->attr.type != PERF_TYPE_SOFTWARE) | 5123 | if (event->attr.type != PERF_TYPE_SOFTWARE) |
| 5006 | return -ENOENT; | 5124 | return -ENOENT; |
| 5007 | 5125 | ||
| 5126 | /* | ||
| 5127 | * no branch sampling for software events | ||
| 5128 | */ | ||
| 5129 | if (has_branch_stack(event)) | ||
| 5130 | return -EOPNOTSUPP; | ||
| 5131 | |||
| 5008 | switch (event_id) { | 5132 | switch (event_id) { |
| 5009 | case PERF_COUNT_SW_CPU_CLOCK: | 5133 | case PERF_COUNT_SW_CPU_CLOCK: |
| 5010 | case PERF_COUNT_SW_TASK_CLOCK: | 5134 | case PERF_COUNT_SW_TASK_CLOCK: |
| @@ -5024,13 +5148,18 @@ static int perf_swevent_init(struct perf_event *event) | |||
| 5024 | if (err) | 5148 | if (err) |
| 5025 | return err; | 5149 | return err; |
| 5026 | 5150 | ||
| 5027 | jump_label_inc(&perf_swevent_enabled[event_id]); | 5151 | static_key_slow_inc(&perf_swevent_enabled[event_id]); |
| 5028 | event->destroy = sw_perf_event_destroy; | 5152 | event->destroy = sw_perf_event_destroy; |
| 5029 | } | 5153 | } |
| 5030 | 5154 | ||
| 5031 | return 0; | 5155 | return 0; |
| 5032 | } | 5156 | } |
| 5033 | 5157 | ||
| 5158 | static int perf_swevent_event_idx(struct perf_event *event) | ||
| 5159 | { | ||
| 5160 | return 0; | ||
| 5161 | } | ||
| 5162 | |||
| 5034 | static struct pmu perf_swevent = { | 5163 | static struct pmu perf_swevent = { |
| 5035 | .task_ctx_nr = perf_sw_context, | 5164 | .task_ctx_nr = perf_sw_context, |
| 5036 | 5165 | ||
| @@ -5040,6 +5169,8 @@ static struct pmu perf_swevent = { | |||
| 5040 | .start = perf_swevent_start, | 5169 | .start = perf_swevent_start, |
| 5041 | .stop = perf_swevent_stop, | 5170 | .stop = perf_swevent_stop, |
| 5042 | .read = perf_swevent_read, | 5171 | .read = perf_swevent_read, |
| 5172 | |||
| 5173 | .event_idx = perf_swevent_event_idx, | ||
| 5043 | }; | 5174 | }; |
| 5044 | 5175 | ||
| 5045 | #ifdef CONFIG_EVENT_TRACING | 5176 | #ifdef CONFIG_EVENT_TRACING |
| @@ -5108,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event) | |||
| 5108 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | 5239 | if (event->attr.type != PERF_TYPE_TRACEPOINT) |
| 5109 | return -ENOENT; | 5240 | return -ENOENT; |
| 5110 | 5241 | ||
| 5242 | /* | ||
| 5243 | * no branch sampling for tracepoint events | ||
| 5244 | */ | ||
| 5245 | if (has_branch_stack(event)) | ||
| 5246 | return -EOPNOTSUPP; | ||
| 5247 | |||
| 5111 | err = perf_trace_init(event); | 5248 | err = perf_trace_init(event); |
| 5112 | if (err) | 5249 | if (err) |
| 5113 | return err; | 5250 | return err; |
| @@ -5126,6 +5263,8 @@ static struct pmu perf_tracepoint = { | |||
| 5126 | .start = perf_swevent_start, | 5263 | .start = perf_swevent_start, |
| 5127 | .stop = perf_swevent_stop, | 5264 | .stop = perf_swevent_stop, |
| 5128 | .read = perf_swevent_read, | 5265 | .read = perf_swevent_read, |
| 5266 | |||
| 5267 | .event_idx = perf_swevent_event_idx, | ||
| 5129 | }; | 5268 | }; |
| 5130 | 5269 | ||
| 5131 | static inline void perf_tp_register(void) | 5270 | static inline void perf_tp_register(void) |
| @@ -5331,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event) | |||
| 5331 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) | 5470 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) |
| 5332 | return -ENOENT; | 5471 | return -ENOENT; |
| 5333 | 5472 | ||
| 5473 | /* | ||
| 5474 | * no branch sampling for software events | ||
| 5475 | */ | ||
| 5476 | if (has_branch_stack(event)) | ||
| 5477 | return -EOPNOTSUPP; | ||
| 5478 | |||
| 5334 | perf_swevent_init_hrtimer(event); | 5479 | perf_swevent_init_hrtimer(event); |
| 5335 | 5480 | ||
| 5336 | return 0; | 5481 | return 0; |
| @@ -5345,6 +5490,8 @@ static struct pmu perf_cpu_clock = { | |||
| 5345 | .start = cpu_clock_event_start, | 5490 | .start = cpu_clock_event_start, |
| 5346 | .stop = cpu_clock_event_stop, | 5491 | .stop = cpu_clock_event_stop, |
| 5347 | .read = cpu_clock_event_read, | 5492 | .read = cpu_clock_event_read, |
| 5493 | |||
| 5494 | .event_idx = perf_swevent_event_idx, | ||
| 5348 | }; | 5495 | }; |
| 5349 | 5496 | ||
| 5350 | /* | 5497 | /* |
| @@ -5403,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event) | |||
| 5403 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) | 5550 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) |
| 5404 | return -ENOENT; | 5551 | return -ENOENT; |
| 5405 | 5552 | ||
| 5553 | /* | ||
| 5554 | * no branch sampling for software events | ||
| 5555 | */ | ||
| 5556 | if (has_branch_stack(event)) | ||
| 5557 | return -EOPNOTSUPP; | ||
| 5558 | |||
| 5406 | perf_swevent_init_hrtimer(event); | 5559 | perf_swevent_init_hrtimer(event); |
| 5407 | 5560 | ||
| 5408 | return 0; | 5561 | return 0; |
| @@ -5417,6 +5570,8 @@ static struct pmu perf_task_clock = { | |||
| 5417 | .start = task_clock_event_start, | 5570 | .start = task_clock_event_start, |
| 5418 | .stop = task_clock_event_stop, | 5571 | .stop = task_clock_event_stop, |
| 5419 | .read = task_clock_event_read, | 5572 | .read = task_clock_event_read, |
| 5573 | |||
| 5574 | .event_idx = perf_swevent_event_idx, | ||
| 5420 | }; | 5575 | }; |
| 5421 | 5576 | ||
| 5422 | static void perf_pmu_nop_void(struct pmu *pmu) | 5577 | static void perf_pmu_nop_void(struct pmu *pmu) |
| @@ -5444,6 +5599,11 @@ static void perf_pmu_cancel_txn(struct pmu *pmu) | |||
| 5444 | perf_pmu_enable(pmu); | 5599 | perf_pmu_enable(pmu); |
| 5445 | } | 5600 | } |
| 5446 | 5601 | ||
| 5602 | static int perf_event_idx_default(struct perf_event *event) | ||
| 5603 | { | ||
| 5604 | return event->hw.idx + 1; | ||
| 5605 | } | ||
| 5606 | |||
| 5447 | /* | 5607 | /* |
| 5448 | * Ensures all contexts with the same task_ctx_nr have the same | 5608 | * Ensures all contexts with the same task_ctx_nr have the same |
| 5449 | * pmu_cpu_context too. | 5609 | * pmu_cpu_context too. |
| @@ -5530,6 +5690,7 @@ static int pmu_dev_alloc(struct pmu *pmu) | |||
| 5530 | if (!pmu->dev) | 5690 | if (!pmu->dev) |
| 5531 | goto out; | 5691 | goto out; |
| 5532 | 5692 | ||
| 5693 | pmu->dev->groups = pmu->attr_groups; | ||
| 5533 | device_initialize(pmu->dev); | 5694 | device_initialize(pmu->dev); |
| 5534 | ret = dev_set_name(pmu->dev, "%s", pmu->name); | 5695 | ret = dev_set_name(pmu->dev, "%s", pmu->name); |
| 5535 | if (ret) | 5696 | if (ret) |
| @@ -5633,6 +5794,9 @@ got_cpu_context: | |||
| 5633 | pmu->pmu_disable = perf_pmu_nop_void; | 5794 | pmu->pmu_disable = perf_pmu_nop_void; |
| 5634 | } | 5795 | } |
| 5635 | 5796 | ||
| 5797 | if (!pmu->event_idx) | ||
| 5798 | pmu->event_idx = perf_event_idx_default; | ||
| 5799 | |||
| 5636 | list_add_rcu(&pmu->entry, &pmus); | 5800 | list_add_rcu(&pmu->entry, &pmus); |
| 5637 | ret = 0; | 5801 | ret = 0; |
| 5638 | unlock: | 5802 | unlock: |
| @@ -5825,7 +5989,7 @@ done: | |||
| 5825 | 5989 | ||
| 5826 | if (!event->parent) { | 5990 | if (!event->parent) { |
| 5827 | if (event->attach_state & PERF_ATTACH_TASK) | 5991 | if (event->attach_state & PERF_ATTACH_TASK) |
| 5828 | jump_label_inc(&perf_sched_events.key); | 5992 | static_key_slow_inc(&perf_sched_events.key); |
| 5829 | if (event->attr.mmap || event->attr.mmap_data) | 5993 | if (event->attr.mmap || event->attr.mmap_data) |
| 5830 | atomic_inc(&nr_mmap_events); | 5994 | atomic_inc(&nr_mmap_events); |
| 5831 | if (event->attr.comm) | 5995 | if (event->attr.comm) |
| @@ -5839,6 +6003,12 @@ done: | |||
| 5839 | return ERR_PTR(err); | 6003 | return ERR_PTR(err); |
| 5840 | } | 6004 | } |
| 5841 | } | 6005 | } |
| 6006 | if (has_branch_stack(event)) { | ||
| 6007 | static_key_slow_inc(&perf_sched_events.key); | ||
| 6008 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
| 6009 | atomic_inc(&per_cpu(perf_branch_stack_events, | ||
| 6010 | event->cpu)); | ||
| 6011 | } | ||
| 5842 | } | 6012 | } |
| 5843 | 6013 | ||
| 5844 | return event; | 6014 | return event; |
| @@ -5908,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, | |||
| 5908 | if (attr->read_format & ~(PERF_FORMAT_MAX-1)) | 6078 | if (attr->read_format & ~(PERF_FORMAT_MAX-1)) |
| 5909 | return -EINVAL; | 6079 | return -EINVAL; |
| 5910 | 6080 | ||
| 6081 | if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
| 6082 | u64 mask = attr->branch_sample_type; | ||
| 6083 | |||
| 6084 | /* only using defined bits */ | ||
| 6085 | if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1)) | ||
| 6086 | return -EINVAL; | ||
| 6087 | |||
| 6088 | /* at least one branch bit must be set */ | ||
| 6089 | if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL)) | ||
| 6090 | return -EINVAL; | ||
| 6091 | |||
| 6092 | /* kernel level capture: check permissions */ | ||
| 6093 | if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM) | ||
| 6094 | && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) | ||
| 6095 | return -EACCES; | ||
| 6096 | |||
| 6097 | /* propagate priv level, when not set for branch */ | ||
| 6098 | if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) { | ||
| 6099 | |||
| 6100 | /* exclude_kernel checked on syscall entry */ | ||
| 6101 | if (!attr->exclude_kernel) | ||
| 6102 | mask |= PERF_SAMPLE_BRANCH_KERNEL; | ||
| 6103 | |||
| 6104 | if (!attr->exclude_user) | ||
| 6105 | mask |= PERF_SAMPLE_BRANCH_USER; | ||
| 6106 | |||
| 6107 | if (!attr->exclude_hv) | ||
| 6108 | mask |= PERF_SAMPLE_BRANCH_HV; | ||
| 6109 | /* | ||
| 6110 | * adjust user setting (for HW filter setup) | ||
| 6111 | */ | ||
| 6112 | attr->branch_sample_type = mask; | ||
| 6113 | } | ||
| 6114 | } | ||
| 5911 | out: | 6115 | out: |
| 5912 | return ret; | 6116 | return ret; |
| 5913 | 6117 | ||
| @@ -6063,7 +6267,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 6063 | * - that may need work on context switch | 6267 | * - that may need work on context switch |
| 6064 | */ | 6268 | */ |
| 6065 | atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); | 6269 | atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); |
| 6066 | jump_label_inc(&perf_sched_events.key); | 6270 | static_key_slow_inc(&perf_sched_events.key); |
| 6067 | } | 6271 | } |
| 6068 | 6272 | ||
| 6069 | /* | 6273 | /* |
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index ee706ce44aa0..bb38c4d3ee12 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
| @@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp) | |||
| 581 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) | 581 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) |
| 582 | return -ENOENT; | 582 | return -ENOENT; |
| 583 | 583 | ||
| 584 | /* | ||
| 585 | * no branch sampling for breakpoint events | ||
| 586 | */ | ||
| 587 | if (has_branch_stack(bp)) | ||
| 588 | return -EOPNOTSUPP; | ||
| 589 | |||
| 584 | err = register_perf_hw_breakpoint(bp); | 590 | err = register_perf_hw_breakpoint(bp); |
| 585 | if (err) | 591 | if (err) |
| 586 | return err; | 592 | return err; |
| @@ -613,6 +619,11 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags) | |||
| 613 | bp->hw.state = PERF_HES_STOPPED; | 619 | bp->hw.state = PERF_HES_STOPPED; |
| 614 | } | 620 | } |
| 615 | 621 | ||
| 622 | static int hw_breakpoint_event_idx(struct perf_event *bp) | ||
| 623 | { | ||
| 624 | return 0; | ||
| 625 | } | ||
| 626 | |||
| 616 | static struct pmu perf_breakpoint = { | 627 | static struct pmu perf_breakpoint = { |
| 617 | .task_ctx_nr = perf_sw_context, /* could eventually get its own */ | 628 | .task_ctx_nr = perf_sw_context, /* could eventually get its own */ |
| 618 | 629 | ||
| @@ -622,6 +633,8 @@ static struct pmu perf_breakpoint = { | |||
| 622 | .start = hw_breakpoint_start, | 633 | .start = hw_breakpoint_start, |
| 623 | .stop = hw_breakpoint_stop, | 634 | .stop = hw_breakpoint_stop, |
| 624 | .read = hw_breakpoint_pmu_read, | 635 | .read = hw_breakpoint_pmu_read, |
| 636 | |||
| 637 | .event_idx = hw_breakpoint_event_idx, | ||
| 625 | }; | 638 | }; |
| 626 | 639 | ||
| 627 | int __init init_hw_breakpoint(void) | 640 | int __init init_hw_breakpoint(void) |
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 25784d630a12..6080f6bc8c33 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
| @@ -16,6 +16,8 @@ | |||
| 16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
| 17 | #include <linux/kernel_stat.h> | 17 | #include <linux/kernel_stat.h> |
| 18 | 18 | ||
| 19 | #include <trace/events/irq.h> | ||
| 20 | |||
| 19 | #include "internals.h" | 21 | #include "internals.h" |
| 20 | 22 | ||
| 21 | /** | 23 | /** |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 01d3b70fc98a..43049192b5ec 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
| 13 | #include <linux/sort.h> | 13 | #include <linux/sort.h> |
| 14 | #include <linux/err.h> | 14 | #include <linux/err.h> |
| 15 | #include <linux/jump_label.h> | 15 | #include <linux/static_key.h> |
| 16 | 16 | ||
| 17 | #ifdef HAVE_JUMP_LABEL | 17 | #ifdef HAVE_JUMP_LABEL |
| 18 | 18 | ||
| @@ -29,11 +29,6 @@ void jump_label_unlock(void) | |||
| 29 | mutex_unlock(&jump_label_mutex); | 29 | mutex_unlock(&jump_label_mutex); |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | bool jump_label_enabled(struct jump_label_key *key) | ||
| 33 | { | ||
| 34 | return !!atomic_read(&key->enabled); | ||
| 35 | } | ||
| 36 | |||
| 37 | static int jump_label_cmp(const void *a, const void *b) | 32 | static int jump_label_cmp(const void *a, const void *b) |
| 38 | { | 33 | { |
| 39 | const struct jump_entry *jea = a; | 34 | const struct jump_entry *jea = a; |
| @@ -58,56 +53,66 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop) | |||
| 58 | sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); | 53 | sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); |
| 59 | } | 54 | } |
| 60 | 55 | ||
| 61 | static void jump_label_update(struct jump_label_key *key, int enable); | 56 | static void jump_label_update(struct static_key *key, int enable); |
| 62 | 57 | ||
| 63 | void jump_label_inc(struct jump_label_key *key) | 58 | void static_key_slow_inc(struct static_key *key) |
| 64 | { | 59 | { |
| 65 | if (atomic_inc_not_zero(&key->enabled)) | 60 | if (atomic_inc_not_zero(&key->enabled)) |
| 66 | return; | 61 | return; |
| 67 | 62 | ||
| 68 | jump_label_lock(); | 63 | jump_label_lock(); |
| 69 | if (atomic_read(&key->enabled) == 0) | 64 | if (atomic_read(&key->enabled) == 0) { |
| 70 | jump_label_update(key, JUMP_LABEL_ENABLE); | 65 | if (!jump_label_get_branch_default(key)) |
| 66 | jump_label_update(key, JUMP_LABEL_ENABLE); | ||
| 67 | else | ||
| 68 | jump_label_update(key, JUMP_LABEL_DISABLE); | ||
| 69 | } | ||
| 71 | atomic_inc(&key->enabled); | 70 | atomic_inc(&key->enabled); |
| 72 | jump_label_unlock(); | 71 | jump_label_unlock(); |
| 73 | } | 72 | } |
| 74 | EXPORT_SYMBOL_GPL(jump_label_inc); | 73 | EXPORT_SYMBOL_GPL(static_key_slow_inc); |
| 75 | 74 | ||
| 76 | static void __jump_label_dec(struct jump_label_key *key, | 75 | static void __static_key_slow_dec(struct static_key *key, |
| 77 | unsigned long rate_limit, struct delayed_work *work) | 76 | unsigned long rate_limit, struct delayed_work *work) |
| 78 | { | 77 | { |
| 79 | if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) | 78 | if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { |
| 79 | WARN(atomic_read(&key->enabled) < 0, | ||
| 80 | "jump label: negative count!\n"); | ||
| 80 | return; | 81 | return; |
| 82 | } | ||
| 81 | 83 | ||
| 82 | if (rate_limit) { | 84 | if (rate_limit) { |
| 83 | atomic_inc(&key->enabled); | 85 | atomic_inc(&key->enabled); |
| 84 | schedule_delayed_work(work, rate_limit); | 86 | schedule_delayed_work(work, rate_limit); |
| 85 | } else | 87 | } else { |
| 86 | jump_label_update(key, JUMP_LABEL_DISABLE); | 88 | if (!jump_label_get_branch_default(key)) |
| 87 | 89 | jump_label_update(key, JUMP_LABEL_DISABLE); | |
| 90 | else | ||
| 91 | jump_label_update(key, JUMP_LABEL_ENABLE); | ||
| 92 | } | ||
| 88 | jump_label_unlock(); | 93 | jump_label_unlock(); |
| 89 | } | 94 | } |
| 90 | EXPORT_SYMBOL_GPL(jump_label_dec); | ||
| 91 | 95 | ||
| 92 | static void jump_label_update_timeout(struct work_struct *work) | 96 | static void jump_label_update_timeout(struct work_struct *work) |
| 93 | { | 97 | { |
| 94 | struct jump_label_key_deferred *key = | 98 | struct static_key_deferred *key = |
| 95 | container_of(work, struct jump_label_key_deferred, work.work); | 99 | container_of(work, struct static_key_deferred, work.work); |
| 96 | __jump_label_dec(&key->key, 0, NULL); | 100 | __static_key_slow_dec(&key->key, 0, NULL); |
| 97 | } | 101 | } |
| 98 | 102 | ||
| 99 | void jump_label_dec(struct jump_label_key *key) | 103 | void static_key_slow_dec(struct static_key *key) |
| 100 | { | 104 | { |
| 101 | __jump_label_dec(key, 0, NULL); | 105 | __static_key_slow_dec(key, 0, NULL); |
| 102 | } | 106 | } |
| 107 | EXPORT_SYMBOL_GPL(static_key_slow_dec); | ||
| 103 | 108 | ||
| 104 | void jump_label_dec_deferred(struct jump_label_key_deferred *key) | 109 | void static_key_slow_dec_deferred(struct static_key_deferred *key) |
| 105 | { | 110 | { |
| 106 | __jump_label_dec(&key->key, key->timeout, &key->work); | 111 | __static_key_slow_dec(&key->key, key->timeout, &key->work); |
| 107 | } | 112 | } |
| 113 | EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); | ||
| 108 | 114 | ||
| 109 | 115 | void jump_label_rate_limit(struct static_key_deferred *key, | |
| 110 | void jump_label_rate_limit(struct jump_label_key_deferred *key, | ||
| 111 | unsigned long rl) | 116 | unsigned long rl) |
| 112 | { | 117 | { |
| 113 | key->timeout = rl; | 118 | key->timeout = rl; |
| @@ -150,7 +155,7 @@ void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry | |||
| 150 | arch_jump_label_transform(entry, type); | 155 | arch_jump_label_transform(entry, type); |
| 151 | } | 156 | } |
| 152 | 157 | ||
| 153 | static void __jump_label_update(struct jump_label_key *key, | 158 | static void __jump_label_update(struct static_key *key, |
| 154 | struct jump_entry *entry, | 159 | struct jump_entry *entry, |
| 155 | struct jump_entry *stop, int enable) | 160 | struct jump_entry *stop, int enable) |
| 156 | { | 161 | { |
| @@ -167,27 +172,40 @@ static void __jump_label_update(struct jump_label_key *key, | |||
| 167 | } | 172 | } |
| 168 | } | 173 | } |
| 169 | 174 | ||
| 175 | static enum jump_label_type jump_label_type(struct static_key *key) | ||
| 176 | { | ||
| 177 | bool true_branch = jump_label_get_branch_default(key); | ||
| 178 | bool state = static_key_enabled(key); | ||
| 179 | |||
| 180 | if ((!true_branch && state) || (true_branch && !state)) | ||
| 181 | return JUMP_LABEL_ENABLE; | ||
| 182 | |||
| 183 | return JUMP_LABEL_DISABLE; | ||
| 184 | } | ||
| 185 | |||
| 170 | void __init jump_label_init(void) | 186 | void __init jump_label_init(void) |
| 171 | { | 187 | { |
| 172 | struct jump_entry *iter_start = __start___jump_table; | 188 | struct jump_entry *iter_start = __start___jump_table; |
| 173 | struct jump_entry *iter_stop = __stop___jump_table; | 189 | struct jump_entry *iter_stop = __stop___jump_table; |
| 174 | struct jump_label_key *key = NULL; | 190 | struct static_key *key = NULL; |
| 175 | struct jump_entry *iter; | 191 | struct jump_entry *iter; |
| 176 | 192 | ||
| 177 | jump_label_lock(); | 193 | jump_label_lock(); |
| 178 | jump_label_sort_entries(iter_start, iter_stop); | 194 | jump_label_sort_entries(iter_start, iter_stop); |
| 179 | 195 | ||
| 180 | for (iter = iter_start; iter < iter_stop; iter++) { | 196 | for (iter = iter_start; iter < iter_stop; iter++) { |
| 181 | struct jump_label_key *iterk; | 197 | struct static_key *iterk; |
| 182 | 198 | ||
| 183 | iterk = (struct jump_label_key *)(unsigned long)iter->key; | 199 | iterk = (struct static_key *)(unsigned long)iter->key; |
| 184 | arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? | 200 | arch_jump_label_transform_static(iter, jump_label_type(iterk)); |
| 185 | JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE); | ||
| 186 | if (iterk == key) | 201 | if (iterk == key) |
| 187 | continue; | 202 | continue; |
| 188 | 203 | ||
| 189 | key = iterk; | 204 | key = iterk; |
| 190 | key->entries = iter; | 205 | /* |
| 206 | * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. | ||
| 207 | */ | ||
| 208 | *((unsigned long *)&key->entries) += (unsigned long)iter; | ||
| 191 | #ifdef CONFIG_MODULES | 209 | #ifdef CONFIG_MODULES |
| 192 | key->next = NULL; | 210 | key->next = NULL; |
| 193 | #endif | 211 | #endif |
| @@ -197,8 +215,8 @@ void __init jump_label_init(void) | |||
| 197 | 215 | ||
| 198 | #ifdef CONFIG_MODULES | 216 | #ifdef CONFIG_MODULES |
| 199 | 217 | ||
| 200 | struct jump_label_mod { | 218 | struct static_key_mod { |
| 201 | struct jump_label_mod *next; | 219 | struct static_key_mod *next; |
| 202 | struct jump_entry *entries; | 220 | struct jump_entry *entries; |
| 203 | struct module *mod; | 221 | struct module *mod; |
| 204 | }; | 222 | }; |
| @@ -218,9 +236,9 @@ static int __jump_label_mod_text_reserved(void *start, void *end) | |||
| 218 | start, end); | 236 | start, end); |
| 219 | } | 237 | } |
| 220 | 238 | ||
| 221 | static void __jump_label_mod_update(struct jump_label_key *key, int enable) | 239 | static void __jump_label_mod_update(struct static_key *key, int enable) |
| 222 | { | 240 | { |
| 223 | struct jump_label_mod *mod = key->next; | 241 | struct static_key_mod *mod = key->next; |
| 224 | 242 | ||
| 225 | while (mod) { | 243 | while (mod) { |
| 226 | struct module *m = mod->mod; | 244 | struct module *m = mod->mod; |
| @@ -251,11 +269,7 @@ void jump_label_apply_nops(struct module *mod) | |||
| 251 | return; | 269 | return; |
| 252 | 270 | ||
| 253 | for (iter = iter_start; iter < iter_stop; iter++) { | 271 | for (iter = iter_start; iter < iter_stop; iter++) { |
| 254 | struct jump_label_key *iterk; | 272 | arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE); |
| 255 | |||
| 256 | iterk = (struct jump_label_key *)(unsigned long)iter->key; | ||
| 257 | arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? | ||
| 258 | JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE); | ||
| 259 | } | 273 | } |
| 260 | } | 274 | } |
| 261 | 275 | ||
| @@ -264,8 +278,8 @@ static int jump_label_add_module(struct module *mod) | |||
| 264 | struct jump_entry *iter_start = mod->jump_entries; | 278 | struct jump_entry *iter_start = mod->jump_entries; |
| 265 | struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; | 279 | struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; |
| 266 | struct jump_entry *iter; | 280 | struct jump_entry *iter; |
| 267 | struct jump_label_key *key = NULL; | 281 | struct static_key *key = NULL; |
| 268 | struct jump_label_mod *jlm; | 282 | struct static_key_mod *jlm; |
| 269 | 283 | ||
| 270 | /* if the module doesn't have jump label entries, just return */ | 284 | /* if the module doesn't have jump label entries, just return */ |
| 271 | if (iter_start == iter_stop) | 285 | if (iter_start == iter_stop) |
| @@ -274,28 +288,30 @@ static int jump_label_add_module(struct module *mod) | |||
| 274 | jump_label_sort_entries(iter_start, iter_stop); | 288 | jump_label_sort_entries(iter_start, iter_stop); |
| 275 | 289 | ||
| 276 | for (iter = iter_start; iter < iter_stop; iter++) { | 290 | for (iter = iter_start; iter < iter_stop; iter++) { |
| 277 | if (iter->key == (jump_label_t)(unsigned long)key) | 291 | struct static_key *iterk; |
| 278 | continue; | ||
| 279 | 292 | ||
| 280 | key = (struct jump_label_key *)(unsigned long)iter->key; | 293 | iterk = (struct static_key *)(unsigned long)iter->key; |
| 294 | if (iterk == key) | ||
| 295 | continue; | ||
| 281 | 296 | ||
| 297 | key = iterk; | ||
| 282 | if (__module_address(iter->key) == mod) { | 298 | if (__module_address(iter->key) == mod) { |
| 283 | atomic_set(&key->enabled, 0); | 299 | /* |
| 284 | key->entries = iter; | 300 | * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. |
| 301 | */ | ||
| 302 | *((unsigned long *)&key->entries) += (unsigned long)iter; | ||
| 285 | key->next = NULL; | 303 | key->next = NULL; |
| 286 | continue; | 304 | continue; |
| 287 | } | 305 | } |
| 288 | 306 | jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL); | |
| 289 | jlm = kzalloc(sizeof(struct jump_label_mod), GFP_KERNEL); | ||
| 290 | if (!jlm) | 307 | if (!jlm) |
| 291 | return -ENOMEM; | 308 | return -ENOMEM; |
| 292 | |||
| 293 | jlm->mod = mod; | 309 | jlm->mod = mod; |
| 294 | jlm->entries = iter; | 310 | jlm->entries = iter; |
| 295 | jlm->next = key->next; | 311 | jlm->next = key->next; |
| 296 | key->next = jlm; | 312 | key->next = jlm; |
| 297 | 313 | ||
| 298 | if (jump_label_enabled(key)) | 314 | if (jump_label_type(key) == JUMP_LABEL_ENABLE) |
| 299 | __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE); | 315 | __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE); |
| 300 | } | 316 | } |
| 301 | 317 | ||
| @@ -307,14 +323,14 @@ static void jump_label_del_module(struct module *mod) | |||
| 307 | struct jump_entry *iter_start = mod->jump_entries; | 323 | struct jump_entry *iter_start = mod->jump_entries; |
| 308 | struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; | 324 | struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; |
| 309 | struct jump_entry *iter; | 325 | struct jump_entry *iter; |
| 310 | struct jump_label_key *key = NULL; | 326 | struct static_key *key = NULL; |
| 311 | struct jump_label_mod *jlm, **prev; | 327 | struct static_key_mod *jlm, **prev; |
| 312 | 328 | ||
| 313 | for (iter = iter_start; iter < iter_stop; iter++) { | 329 | for (iter = iter_start; iter < iter_stop; iter++) { |
| 314 | if (iter->key == (jump_label_t)(unsigned long)key) | 330 | if (iter->key == (jump_label_t)(unsigned long)key) |
| 315 | continue; | 331 | continue; |
| 316 | 332 | ||
| 317 | key = (struct jump_label_key *)(unsigned long)iter->key; | 333 | key = (struct static_key *)(unsigned long)iter->key; |
| 318 | 334 | ||
| 319 | if (__module_address(iter->key) == mod) | 335 | if (__module_address(iter->key) == mod) |
| 320 | continue; | 336 | continue; |
| @@ -416,12 +432,13 @@ int jump_label_text_reserved(void *start, void *end) | |||
| 416 | return ret; | 432 | return ret; |
| 417 | } | 433 | } |
| 418 | 434 | ||
| 419 | static void jump_label_update(struct jump_label_key *key, int enable) | 435 | static void jump_label_update(struct static_key *key, int enable) |
| 420 | { | 436 | { |
| 421 | struct jump_entry *entry = key->entries, *stop = __stop___jump_table; | 437 | struct jump_entry *stop = __stop___jump_table; |
| 438 | struct jump_entry *entry = jump_label_get_entries(key); | ||
| 422 | 439 | ||
| 423 | #ifdef CONFIG_MODULES | 440 | #ifdef CONFIG_MODULES |
| 424 | struct module *mod = __module_address((jump_label_t)key); | 441 | struct module *mod = __module_address((unsigned long)key); |
| 425 | 442 | ||
| 426 | __jump_label_mod_update(key, enable); | 443 | __jump_label_mod_update(key, enable); |
| 427 | 444 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index 32690a0b7a18..0b3ea2cbd5fb 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -44,6 +44,9 @@ | |||
| 44 | 44 | ||
| 45 | #include <asm/uaccess.h> | 45 | #include <asm/uaccess.h> |
| 46 | 46 | ||
| 47 | #define CREATE_TRACE_POINTS | ||
| 48 | #include <trace/events/printk.h> | ||
| 49 | |||
| 47 | /* | 50 | /* |
| 48 | * Architectures can override it: | 51 | * Architectures can override it: |
| 49 | */ | 52 | */ |
| @@ -542,6 +545,8 @@ MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to" | |||
| 542 | static void _call_console_drivers(unsigned start, | 545 | static void _call_console_drivers(unsigned start, |
| 543 | unsigned end, int msg_log_level) | 546 | unsigned end, int msg_log_level) |
| 544 | { | 547 | { |
| 548 | trace_console(&LOG_BUF(0), start, end, log_buf_len); | ||
| 549 | |||
| 545 | if ((msg_log_level < console_loglevel || ignore_loglevel) && | 550 | if ((msg_log_level < console_loglevel || ignore_loglevel) && |
| 546 | console_drivers && start != end) { | 551 | console_drivers && start != end) { |
| 547 | if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { | 552 | if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b342f57879e6..6c41ba49767a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -162,13 +162,13 @@ static int sched_feat_show(struct seq_file *m, void *v) | |||
| 162 | 162 | ||
| 163 | #ifdef HAVE_JUMP_LABEL | 163 | #ifdef HAVE_JUMP_LABEL |
| 164 | 164 | ||
| 165 | #define jump_label_key__true jump_label_key_enabled | 165 | #define jump_label_key__true STATIC_KEY_INIT_TRUE |
| 166 | #define jump_label_key__false jump_label_key_disabled | 166 | #define jump_label_key__false STATIC_KEY_INIT_FALSE |
| 167 | 167 | ||
| 168 | #define SCHED_FEAT(name, enabled) \ | 168 | #define SCHED_FEAT(name, enabled) \ |
| 169 | jump_label_key__##enabled , | 169 | jump_label_key__##enabled , |
| 170 | 170 | ||
| 171 | struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { | 171 | struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { |
| 172 | #include "features.h" | 172 | #include "features.h" |
| 173 | }; | 173 | }; |
| 174 | 174 | ||
| @@ -176,14 +176,14 @@ struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { | |||
| 176 | 176 | ||
| 177 | static void sched_feat_disable(int i) | 177 | static void sched_feat_disable(int i) |
| 178 | { | 178 | { |
| 179 | if (jump_label_enabled(&sched_feat_keys[i])) | 179 | if (static_key_enabled(&sched_feat_keys[i])) |
| 180 | jump_label_dec(&sched_feat_keys[i]); | 180 | static_key_slow_dec(&sched_feat_keys[i]); |
| 181 | } | 181 | } |
| 182 | 182 | ||
| 183 | static void sched_feat_enable(int i) | 183 | static void sched_feat_enable(int i) |
| 184 | { | 184 | { |
| 185 | if (!jump_label_enabled(&sched_feat_keys[i])) | 185 | if (!static_key_enabled(&sched_feat_keys[i])) |
| 186 | jump_label_inc(&sched_feat_keys[i]); | 186 | static_key_slow_inc(&sched_feat_keys[i]); |
| 187 | } | 187 | } |
| 188 | #else | 188 | #else |
| 189 | static void sched_feat_disable(int i) { }; | 189 | static void sched_feat_disable(int i) { }; |
| @@ -894,7 +894,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
| 894 | delta -= irq_delta; | 894 | delta -= irq_delta; |
| 895 | #endif | 895 | #endif |
| 896 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING | 896 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING |
| 897 | if (static_branch((¶virt_steal_rq_enabled))) { | 897 | if (static_key_false((¶virt_steal_rq_enabled))) { |
| 898 | u64 st; | 898 | u64 st; |
| 899 | 899 | ||
| 900 | steal = paravirt_steal_clock(cpu_of(rq)); | 900 | steal = paravirt_steal_clock(cpu_of(rq)); |
| @@ -2755,7 +2755,7 @@ void account_idle_time(cputime_t cputime) | |||
| 2755 | static __always_inline bool steal_account_process_tick(void) | 2755 | static __always_inline bool steal_account_process_tick(void) |
| 2756 | { | 2756 | { |
| 2757 | #ifdef CONFIG_PARAVIRT | 2757 | #ifdef CONFIG_PARAVIRT |
| 2758 | if (static_branch(¶virt_steal_enabled)) { | 2758 | if (static_key_false(¶virt_steal_enabled)) { |
| 2759 | u64 steal, st = 0; | 2759 | u64 steal, st = 0; |
| 2760 | 2760 | ||
| 2761 | steal = paravirt_steal_clock(smp_processor_id()); | 2761 | steal = paravirt_steal_clock(smp_processor_id()); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index aca16b843b7e..fd974faf467d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -1401,20 +1401,20 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) | |||
| 1401 | #ifdef CONFIG_CFS_BANDWIDTH | 1401 | #ifdef CONFIG_CFS_BANDWIDTH |
| 1402 | 1402 | ||
| 1403 | #ifdef HAVE_JUMP_LABEL | 1403 | #ifdef HAVE_JUMP_LABEL |
| 1404 | static struct jump_label_key __cfs_bandwidth_used; | 1404 | static struct static_key __cfs_bandwidth_used; |
| 1405 | 1405 | ||
| 1406 | static inline bool cfs_bandwidth_used(void) | 1406 | static inline bool cfs_bandwidth_used(void) |
| 1407 | { | 1407 | { |
| 1408 | return static_branch(&__cfs_bandwidth_used); | 1408 | return static_key_false(&__cfs_bandwidth_used); |
| 1409 | } | 1409 | } |
| 1410 | 1410 | ||
| 1411 | void account_cfs_bandwidth_used(int enabled, int was_enabled) | 1411 | void account_cfs_bandwidth_used(int enabled, int was_enabled) |
| 1412 | { | 1412 | { |
| 1413 | /* only need to count groups transitioning between enabled/!enabled */ | 1413 | /* only need to count groups transitioning between enabled/!enabled */ |
| 1414 | if (enabled && !was_enabled) | 1414 | if (enabled && !was_enabled) |
| 1415 | jump_label_inc(&__cfs_bandwidth_used); | 1415 | static_key_slow_inc(&__cfs_bandwidth_used); |
| 1416 | else if (!enabled && was_enabled) | 1416 | else if (!enabled && was_enabled) |
| 1417 | jump_label_dec(&__cfs_bandwidth_used); | 1417 | static_key_slow_dec(&__cfs_bandwidth_used); |
| 1418 | } | 1418 | } |
| 1419 | #else /* HAVE_JUMP_LABEL */ | 1419 | #else /* HAVE_JUMP_LABEL */ |
| 1420 | static bool cfs_bandwidth_used(void) | 1420 | static bool cfs_bandwidth_used(void) |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 98c0c2623db8..b4cd6d8ea150 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
| @@ -611,7 +611,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | |||
| 611 | * Tunables that become constants when CONFIG_SCHED_DEBUG is off: | 611 | * Tunables that become constants when CONFIG_SCHED_DEBUG is off: |
| 612 | */ | 612 | */ |
| 613 | #ifdef CONFIG_SCHED_DEBUG | 613 | #ifdef CONFIG_SCHED_DEBUG |
| 614 | # include <linux/jump_label.h> | 614 | # include <linux/static_key.h> |
| 615 | # define const_debug __read_mostly | 615 | # define const_debug __read_mostly |
| 616 | #else | 616 | #else |
| 617 | # define const_debug const | 617 | # define const_debug const |
| @@ -630,18 +630,18 @@ enum { | |||
| 630 | #undef SCHED_FEAT | 630 | #undef SCHED_FEAT |
| 631 | 631 | ||
| 632 | #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) | 632 | #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) |
| 633 | static __always_inline bool static_branch__true(struct jump_label_key *key) | 633 | static __always_inline bool static_branch__true(struct static_key *key) |
| 634 | { | 634 | { |
| 635 | return likely(static_branch(key)); /* Not out of line branch. */ | 635 | return static_key_true(key); /* Not out of line branch. */ |
| 636 | } | 636 | } |
| 637 | 637 | ||
| 638 | static __always_inline bool static_branch__false(struct jump_label_key *key) | 638 | static __always_inline bool static_branch__false(struct static_key *key) |
| 639 | { | 639 | { |
| 640 | return unlikely(static_branch(key)); /* Out of line branch. */ | 640 | return static_key_false(key); /* Out of line branch. */ |
| 641 | } | 641 | } |
| 642 | 642 | ||
| 643 | #define SCHED_FEAT(name, enabled) \ | 643 | #define SCHED_FEAT(name, enabled) \ |
| 644 | static __always_inline bool static_branch_##name(struct jump_label_key *key) \ | 644 | static __always_inline bool static_branch_##name(struct static_key *key) \ |
| 645 | { \ | 645 | { \ |
| 646 | return static_branch__##enabled(key); \ | 646 | return static_branch__##enabled(key); \ |
| 647 | } | 647 | } |
| @@ -650,7 +650,7 @@ static __always_inline bool static_branch_##name(struct jump_label_key *key) \ | |||
| 650 | 650 | ||
| 651 | #undef SCHED_FEAT | 651 | #undef SCHED_FEAT |
| 652 | 652 | ||
| 653 | extern struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR]; | 653 | extern struct static_key sched_feat_keys[__SCHED_FEAT_NR]; |
| 654 | #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) | 654 | #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) |
| 655 | #else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */ | 655 | #else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */ |
| 656 | #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) | 656 | #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) |
diff --git a/kernel/signal.c b/kernel/signal.c index c73c4284160e..8511e39813c7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -1054,13 +1054,13 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
| 1054 | struct sigpending *pending; | 1054 | struct sigpending *pending; |
| 1055 | struct sigqueue *q; | 1055 | struct sigqueue *q; |
| 1056 | int override_rlimit; | 1056 | int override_rlimit; |
| 1057 | 1057 | int ret = 0, result; | |
| 1058 | trace_signal_generate(sig, info, t); | ||
| 1059 | 1058 | ||
| 1060 | assert_spin_locked(&t->sighand->siglock); | 1059 | assert_spin_locked(&t->sighand->siglock); |
| 1061 | 1060 | ||
| 1061 | result = TRACE_SIGNAL_IGNORED; | ||
| 1062 | if (!prepare_signal(sig, t, from_ancestor_ns)) | 1062 | if (!prepare_signal(sig, t, from_ancestor_ns)) |
| 1063 | return 0; | 1063 | goto ret; |
| 1064 | 1064 | ||
| 1065 | pending = group ? &t->signal->shared_pending : &t->pending; | 1065 | pending = group ? &t->signal->shared_pending : &t->pending; |
| 1066 | /* | 1066 | /* |
| @@ -1068,8 +1068,11 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
| 1068 | * exactly one non-rt signal, so that we can get more | 1068 | * exactly one non-rt signal, so that we can get more |
| 1069 | * detailed information about the cause of the signal. | 1069 | * detailed information about the cause of the signal. |
| 1070 | */ | 1070 | */ |
| 1071 | result = TRACE_SIGNAL_ALREADY_PENDING; | ||
| 1071 | if (legacy_queue(pending, sig)) | 1072 | if (legacy_queue(pending, sig)) |
| 1072 | return 0; | 1073 | goto ret; |
| 1074 | |||
| 1075 | result = TRACE_SIGNAL_DELIVERED; | ||
| 1073 | /* | 1076 | /* |
| 1074 | * fast-pathed signals for kernel-internal things like SIGSTOP | 1077 | * fast-pathed signals for kernel-internal things like SIGSTOP |
| 1075 | * or SIGKILL. | 1078 | * or SIGKILL. |
| @@ -1127,14 +1130,15 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
| 1127 | * signal was rt and sent by user using something | 1130 | * signal was rt and sent by user using something |
| 1128 | * other than kill(). | 1131 | * other than kill(). |
| 1129 | */ | 1132 | */ |
| 1130 | trace_signal_overflow_fail(sig, group, info); | 1133 | result = TRACE_SIGNAL_OVERFLOW_FAIL; |
| 1131 | return -EAGAIN; | 1134 | ret = -EAGAIN; |
| 1135 | goto ret; | ||
| 1132 | } else { | 1136 | } else { |
| 1133 | /* | 1137 | /* |
| 1134 | * This is a silent loss of information. We still | 1138 | * This is a silent loss of information. We still |
| 1135 | * send the signal, but the *info bits are lost. | 1139 | * send the signal, but the *info bits are lost. |
| 1136 | */ | 1140 | */ |
| 1137 | trace_signal_lose_info(sig, group, info); | 1141 | result = TRACE_SIGNAL_LOSE_INFO; |
| 1138 | } | 1142 | } |
| 1139 | } | 1143 | } |
| 1140 | 1144 | ||
| @@ -1142,7 +1146,9 @@ out_set: | |||
| 1142 | signalfd_notify(t, sig); | 1146 | signalfd_notify(t, sig); |
| 1143 | sigaddset(&pending->signal, sig); | 1147 | sigaddset(&pending->signal, sig); |
| 1144 | complete_signal(sig, t, group); | 1148 | complete_signal(sig, t, group); |
| 1145 | return 0; | 1149 | ret: |
| 1150 | trace_signal_generate(sig, info, t, group, result); | ||
| 1151 | return ret; | ||
| 1146 | } | 1152 | } |
| 1147 | 1153 | ||
| 1148 | static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | 1154 | static int send_signal(int sig, struct siginfo *info, struct task_struct *t, |
| @@ -1585,7 +1591,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
| 1585 | int sig = q->info.si_signo; | 1591 | int sig = q->info.si_signo; |
| 1586 | struct sigpending *pending; | 1592 | struct sigpending *pending; |
| 1587 | unsigned long flags; | 1593 | unsigned long flags; |
| 1588 | int ret; | 1594 | int ret, result; |
| 1589 | 1595 | ||
| 1590 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); | 1596 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); |
| 1591 | 1597 | ||
| @@ -1594,6 +1600,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
| 1594 | goto ret; | 1600 | goto ret; |
| 1595 | 1601 | ||
| 1596 | ret = 1; /* the signal is ignored */ | 1602 | ret = 1; /* the signal is ignored */ |
| 1603 | result = TRACE_SIGNAL_IGNORED; | ||
| 1597 | if (!prepare_signal(sig, t, 0)) | 1604 | if (!prepare_signal(sig, t, 0)) |
| 1598 | goto out; | 1605 | goto out; |
| 1599 | 1606 | ||
| @@ -1605,6 +1612,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
| 1605 | */ | 1612 | */ |
| 1606 | BUG_ON(q->info.si_code != SI_TIMER); | 1613 | BUG_ON(q->info.si_code != SI_TIMER); |
| 1607 | q->info.si_overrun++; | 1614 | q->info.si_overrun++; |
| 1615 | result = TRACE_SIGNAL_ALREADY_PENDING; | ||
| 1608 | goto out; | 1616 | goto out; |
| 1609 | } | 1617 | } |
| 1610 | q->info.si_overrun = 0; | 1618 | q->info.si_overrun = 0; |
| @@ -1614,7 +1622,9 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
| 1614 | list_add_tail(&q->list, &pending->list); | 1622 | list_add_tail(&q->list, &pending->list); |
| 1615 | sigaddset(&pending->signal, sig); | 1623 | sigaddset(&pending->signal, sig); |
| 1616 | complete_signal(sig, t, group); | 1624 | complete_signal(sig, t, group); |
| 1625 | result = TRACE_SIGNAL_DELIVERED; | ||
| 1617 | out: | 1626 | out: |
| 1627 | trace_signal_generate(sig, &q->info, t, group, result); | ||
| 1618 | unlock_task_sighand(t, &flags); | 1628 | unlock_task_sighand(t, &flags); |
| 1619 | ret: | 1629 | ret: |
| 1620 | return ret; | 1630 | return ret; |
diff --git a/kernel/softirq.c b/kernel/softirq.c index c82d95a022ef..8afc6a8d4d7c 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -375,6 +375,12 @@ void raise_softirq(unsigned int nr) | |||
| 375 | local_irq_restore(flags); | 375 | local_irq_restore(flags); |
| 376 | } | 376 | } |
| 377 | 377 | ||
| 378 | void __raise_softirq_irqoff(unsigned int nr) | ||
| 379 | { | ||
| 380 | trace_softirq_raise(nr); | ||
| 381 | or_softirq_pending(1UL << nr); | ||
| 382 | } | ||
| 383 | |||
| 378 | void open_softirq(int nr, void (*action)(struct softirq_action *)) | 384 | void open_softirq(int nr, void (*action)(struct softirq_action *)) |
| 379 | { | 385 | { |
| 380 | softirq_vec[nr].action = action; | 386 | softirq_vec[nr].action = action; |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 683d559a0eef..867bd1dd2dd0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -62,6 +62,8 @@ | |||
| 62 | #define FTRACE_HASH_DEFAULT_BITS 10 | 62 | #define FTRACE_HASH_DEFAULT_BITS 10 |
| 63 | #define FTRACE_HASH_MAX_BITS 12 | 63 | #define FTRACE_HASH_MAX_BITS 12 |
| 64 | 64 | ||
| 65 | #define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) | ||
| 66 | |||
| 65 | /* ftrace_enabled is a method to turn ftrace on or off */ | 67 | /* ftrace_enabled is a method to turn ftrace on or off */ |
| 66 | int ftrace_enabled __read_mostly; | 68 | int ftrace_enabled __read_mostly; |
| 67 | static int last_ftrace_enabled; | 69 | static int last_ftrace_enabled; |
| @@ -89,12 +91,14 @@ static struct ftrace_ops ftrace_list_end __read_mostly = { | |||
| 89 | }; | 91 | }; |
| 90 | 92 | ||
| 91 | static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; | 93 | static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; |
| 94 | static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; | ||
| 92 | static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; | 95 | static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; |
| 93 | ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; | 96 | ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; |
| 94 | static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub; | 97 | static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub; |
| 95 | ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; | 98 | ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; |
| 96 | ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; | 99 | ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; |
| 97 | static struct ftrace_ops global_ops; | 100 | static struct ftrace_ops global_ops; |
| 101 | static struct ftrace_ops control_ops; | ||
| 98 | 102 | ||
| 99 | static void | 103 | static void |
| 100 | ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); | 104 | ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); |
| @@ -168,6 +172,32 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip) | |||
| 168 | } | 172 | } |
| 169 | #endif | 173 | #endif |
| 170 | 174 | ||
| 175 | static void control_ops_disable_all(struct ftrace_ops *ops) | ||
| 176 | { | ||
| 177 | int cpu; | ||
| 178 | |||
| 179 | for_each_possible_cpu(cpu) | ||
| 180 | *per_cpu_ptr(ops->disabled, cpu) = 1; | ||
| 181 | } | ||
| 182 | |||
| 183 | static int control_ops_alloc(struct ftrace_ops *ops) | ||
| 184 | { | ||
| 185 | int __percpu *disabled; | ||
| 186 | |||
| 187 | disabled = alloc_percpu(int); | ||
| 188 | if (!disabled) | ||
| 189 | return -ENOMEM; | ||
| 190 | |||
| 191 | ops->disabled = disabled; | ||
| 192 | control_ops_disable_all(ops); | ||
| 193 | return 0; | ||
| 194 | } | ||
| 195 | |||
| 196 | static void control_ops_free(struct ftrace_ops *ops) | ||
| 197 | { | ||
| 198 | free_percpu(ops->disabled); | ||
| 199 | } | ||
| 200 | |||
| 171 | static void update_global_ops(void) | 201 | static void update_global_ops(void) |
| 172 | { | 202 | { |
| 173 | ftrace_func_t func; | 203 | ftrace_func_t func; |
| @@ -259,6 +289,26 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) | |||
| 259 | return 0; | 289 | return 0; |
| 260 | } | 290 | } |
| 261 | 291 | ||
| 292 | static void add_ftrace_list_ops(struct ftrace_ops **list, | ||
| 293 | struct ftrace_ops *main_ops, | ||
| 294 | struct ftrace_ops *ops) | ||
| 295 | { | ||
| 296 | int first = *list == &ftrace_list_end; | ||
| 297 | add_ftrace_ops(list, ops); | ||
| 298 | if (first) | ||
| 299 | add_ftrace_ops(&ftrace_ops_list, main_ops); | ||
| 300 | } | ||
| 301 | |||
| 302 | static int remove_ftrace_list_ops(struct ftrace_ops **list, | ||
| 303 | struct ftrace_ops *main_ops, | ||
| 304 | struct ftrace_ops *ops) | ||
| 305 | { | ||
| 306 | int ret = remove_ftrace_ops(list, ops); | ||
| 307 | if (!ret && *list == &ftrace_list_end) | ||
| 308 | ret = remove_ftrace_ops(&ftrace_ops_list, main_ops); | ||
| 309 | return ret; | ||
| 310 | } | ||
| 311 | |||
| 262 | static int __register_ftrace_function(struct ftrace_ops *ops) | 312 | static int __register_ftrace_function(struct ftrace_ops *ops) |
| 263 | { | 313 | { |
| 264 | if (ftrace_disabled) | 314 | if (ftrace_disabled) |
| @@ -270,15 +320,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops) | |||
| 270 | if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) | 320 | if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) |
| 271 | return -EBUSY; | 321 | return -EBUSY; |
| 272 | 322 | ||
| 323 | /* We don't support both control and global flags set. */ | ||
| 324 | if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) | ||
| 325 | return -EINVAL; | ||
| 326 | |||
| 273 | if (!core_kernel_data((unsigned long)ops)) | 327 | if (!core_kernel_data((unsigned long)ops)) |
| 274 | ops->flags |= FTRACE_OPS_FL_DYNAMIC; | 328 | ops->flags |= FTRACE_OPS_FL_DYNAMIC; |
| 275 | 329 | ||
| 276 | if (ops->flags & FTRACE_OPS_FL_GLOBAL) { | 330 | if (ops->flags & FTRACE_OPS_FL_GLOBAL) { |
| 277 | int first = ftrace_global_list == &ftrace_list_end; | 331 | add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops); |
| 278 | add_ftrace_ops(&ftrace_global_list, ops); | ||
| 279 | ops->flags |= FTRACE_OPS_FL_ENABLED; | 332 | ops->flags |= FTRACE_OPS_FL_ENABLED; |
| 280 | if (first) | 333 | } else if (ops->flags & FTRACE_OPS_FL_CONTROL) { |
| 281 | add_ftrace_ops(&ftrace_ops_list, &global_ops); | 334 | if (control_ops_alloc(ops)) |
| 335 | return -ENOMEM; | ||
| 336 | add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); | ||
| 282 | } else | 337 | } else |
| 283 | add_ftrace_ops(&ftrace_ops_list, ops); | 338 | add_ftrace_ops(&ftrace_ops_list, ops); |
| 284 | 339 | ||
| @@ -302,11 +357,23 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) | |||
| 302 | return -EINVAL; | 357 | return -EINVAL; |
| 303 | 358 | ||
| 304 | if (ops->flags & FTRACE_OPS_FL_GLOBAL) { | 359 | if (ops->flags & FTRACE_OPS_FL_GLOBAL) { |
| 305 | ret = remove_ftrace_ops(&ftrace_global_list, ops); | 360 | ret = remove_ftrace_list_ops(&ftrace_global_list, |
| 306 | if (!ret && ftrace_global_list == &ftrace_list_end) | 361 | &global_ops, ops); |
| 307 | ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops); | ||
| 308 | if (!ret) | 362 | if (!ret) |
| 309 | ops->flags &= ~FTRACE_OPS_FL_ENABLED; | 363 | ops->flags &= ~FTRACE_OPS_FL_ENABLED; |
| 364 | } else if (ops->flags & FTRACE_OPS_FL_CONTROL) { | ||
| 365 | ret = remove_ftrace_list_ops(&ftrace_control_list, | ||
| 366 | &control_ops, ops); | ||
| 367 | if (!ret) { | ||
| 368 | /* | ||
| 369 | * The ftrace_ops is now removed from the list, | ||
| 370 | * so there'll be no new users. We must ensure | ||
| 371 | * all current users are done before we free | ||
| 372 | * the control data. | ||
| 373 | */ | ||
| 374 | synchronize_sched(); | ||
| 375 | control_ops_free(ops); | ||
| 376 | } | ||
| 310 | } else | 377 | } else |
| 311 | ret = remove_ftrace_ops(&ftrace_ops_list, ops); | 378 | ret = remove_ftrace_ops(&ftrace_ops_list, ops); |
| 312 | 379 | ||
| @@ -1119,6 +1186,12 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash) | |||
| 1119 | call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); | 1186 | call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); |
| 1120 | } | 1187 | } |
| 1121 | 1188 | ||
| 1189 | void ftrace_free_filter(struct ftrace_ops *ops) | ||
| 1190 | { | ||
| 1191 | free_ftrace_hash(ops->filter_hash); | ||
| 1192 | free_ftrace_hash(ops->notrace_hash); | ||
| 1193 | } | ||
| 1194 | |||
| 1122 | static struct ftrace_hash *alloc_ftrace_hash(int size_bits) | 1195 | static struct ftrace_hash *alloc_ftrace_hash(int size_bits) |
| 1123 | { | 1196 | { |
| 1124 | struct ftrace_hash *hash; | 1197 | struct ftrace_hash *hash; |
| @@ -1129,7 +1202,7 @@ static struct ftrace_hash *alloc_ftrace_hash(int size_bits) | |||
| 1129 | return NULL; | 1202 | return NULL; |
| 1130 | 1203 | ||
| 1131 | size = 1 << size_bits; | 1204 | size = 1 << size_bits; |
| 1132 | hash->buckets = kzalloc(sizeof(*hash->buckets) * size, GFP_KERNEL); | 1205 | hash->buckets = kcalloc(size, sizeof(*hash->buckets), GFP_KERNEL); |
| 1133 | 1206 | ||
| 1134 | if (!hash->buckets) { | 1207 | if (!hash->buckets) { |
| 1135 | kfree(hash); | 1208 | kfree(hash); |
| @@ -3146,8 +3219,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, | |||
| 3146 | mutex_lock(&ftrace_regex_lock); | 3219 | mutex_lock(&ftrace_regex_lock); |
| 3147 | if (reset) | 3220 | if (reset) |
| 3148 | ftrace_filter_reset(hash); | 3221 | ftrace_filter_reset(hash); |
| 3149 | if (buf) | 3222 | if (buf && !ftrace_match_records(hash, buf, len)) { |
| 3150 | ftrace_match_records(hash, buf, len); | 3223 | ret = -EINVAL; |
| 3224 | goto out_regex_unlock; | ||
| 3225 | } | ||
| 3151 | 3226 | ||
| 3152 | mutex_lock(&ftrace_lock); | 3227 | mutex_lock(&ftrace_lock); |
| 3153 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); | 3228 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); |
| @@ -3157,6 +3232,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, | |||
| 3157 | 3232 | ||
| 3158 | mutex_unlock(&ftrace_lock); | 3233 | mutex_unlock(&ftrace_lock); |
| 3159 | 3234 | ||
| 3235 | out_regex_unlock: | ||
| 3160 | mutex_unlock(&ftrace_regex_lock); | 3236 | mutex_unlock(&ftrace_regex_lock); |
| 3161 | 3237 | ||
| 3162 | free_ftrace_hash(hash); | 3238 | free_ftrace_hash(hash); |
| @@ -3173,10 +3249,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, | |||
| 3173 | * Filters denote which functions should be enabled when tracing is enabled. | 3249 | * Filters denote which functions should be enabled when tracing is enabled. |
| 3174 | * If @buf is NULL and reset is set, all functions will be enabled for tracing. | 3250 | * If @buf is NULL and reset is set, all functions will be enabled for tracing. |
| 3175 | */ | 3251 | */ |
| 3176 | void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, | 3252 | int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, |
| 3177 | int len, int reset) | 3253 | int len, int reset) |
| 3178 | { | 3254 | { |
| 3179 | ftrace_set_regex(ops, buf, len, reset, 1); | 3255 | return ftrace_set_regex(ops, buf, len, reset, 1); |
| 3180 | } | 3256 | } |
| 3181 | EXPORT_SYMBOL_GPL(ftrace_set_filter); | 3257 | EXPORT_SYMBOL_GPL(ftrace_set_filter); |
| 3182 | 3258 | ||
| @@ -3191,10 +3267,10 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter); | |||
| 3191 | * is enabled. If @buf is NULL and reset is set, all functions will be enabled | 3267 | * is enabled. If @buf is NULL and reset is set, all functions will be enabled |
| 3192 | * for tracing. | 3268 | * for tracing. |
| 3193 | */ | 3269 | */ |
| 3194 | void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, | 3270 | int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, |
| 3195 | int len, int reset) | 3271 | int len, int reset) |
| 3196 | { | 3272 | { |
| 3197 | ftrace_set_regex(ops, buf, len, reset, 0); | 3273 | return ftrace_set_regex(ops, buf, len, reset, 0); |
| 3198 | } | 3274 | } |
| 3199 | EXPORT_SYMBOL_GPL(ftrace_set_notrace); | 3275 | EXPORT_SYMBOL_GPL(ftrace_set_notrace); |
| 3200 | /** | 3276 | /** |
| @@ -3871,6 +3947,36 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) | |||
| 3871 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 3947 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
| 3872 | 3948 | ||
| 3873 | static void | 3949 | static void |
| 3950 | ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) | ||
| 3951 | { | ||
| 3952 | struct ftrace_ops *op; | ||
| 3953 | |||
| 3954 | if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) | ||
| 3955 | return; | ||
| 3956 | |||
| 3957 | /* | ||
| 3958 | * Some of the ops may be dynamically allocated, | ||
| 3959 | * they must be freed after a synchronize_sched(). | ||
| 3960 | */ | ||
| 3961 | preempt_disable_notrace(); | ||
| 3962 | trace_recursion_set(TRACE_CONTROL_BIT); | ||
| 3963 | op = rcu_dereference_raw(ftrace_control_list); | ||
| 3964 | while (op != &ftrace_list_end) { | ||
| 3965 | if (!ftrace_function_local_disabled(op) && | ||
| 3966 | ftrace_ops_test(op, ip)) | ||
| 3967 | op->func(ip, parent_ip); | ||
| 3968 | |||
| 3969 | op = rcu_dereference_raw(op->next); | ||
| 3970 | }; | ||
| 3971 | trace_recursion_clear(TRACE_CONTROL_BIT); | ||
| 3972 | preempt_enable_notrace(); | ||
| 3973 | } | ||
| 3974 | |||
| 3975 | static struct ftrace_ops control_ops = { | ||
| 3976 | .func = ftrace_ops_control_func, | ||
| 3977 | }; | ||
| 3978 | |||
| 3979 | static void | ||
| 3874 | ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) | 3980 | ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) |
| 3875 | { | 3981 | { |
| 3876 | struct ftrace_ops *op; | 3982 | struct ftrace_ops *op; |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a3f1bc5d2a00..10d5503f0d04 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -2764,12 +2764,12 @@ static const char readme_msg[] = | |||
| 2764 | "tracing mini-HOWTO:\n\n" | 2764 | "tracing mini-HOWTO:\n\n" |
| 2765 | "# mount -t debugfs nodev /sys/kernel/debug\n\n" | 2765 | "# mount -t debugfs nodev /sys/kernel/debug\n\n" |
| 2766 | "# cat /sys/kernel/debug/tracing/available_tracers\n" | 2766 | "# cat /sys/kernel/debug/tracing/available_tracers\n" |
| 2767 | "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" | 2767 | "wakeup wakeup_rt preemptirqsoff preemptoff irqsoff function nop\n\n" |
| 2768 | "# cat /sys/kernel/debug/tracing/current_tracer\n" | 2768 | "# cat /sys/kernel/debug/tracing/current_tracer\n" |
| 2769 | "nop\n" | 2769 | "nop\n" |
| 2770 | "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n" | 2770 | "# echo wakeup > /sys/kernel/debug/tracing/current_tracer\n" |
| 2771 | "# cat /sys/kernel/debug/tracing/current_tracer\n" | 2771 | "# cat /sys/kernel/debug/tracing/current_tracer\n" |
| 2772 | "sched_switch\n" | 2772 | "wakeup\n" |
| 2773 | "# cat /sys/kernel/debug/tracing/trace_options\n" | 2773 | "# cat /sys/kernel/debug/tracing/trace_options\n" |
| 2774 | "noprint-parent nosym-offset nosym-addr noverbose\n" | 2774 | "noprint-parent nosym-offset nosym-addr noverbose\n" |
| 2775 | "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" | 2775 | "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b93ecbadad6d..54faec790bc1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -56,17 +56,23 @@ enum trace_type { | |||
| 56 | #define F_STRUCT(args...) args | 56 | #define F_STRUCT(args...) args |
| 57 | 57 | ||
| 58 | #undef FTRACE_ENTRY | 58 | #undef FTRACE_ENTRY |
| 59 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ | 59 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ |
| 60 | struct struct_name { \ | 60 | struct struct_name { \ |
| 61 | struct trace_entry ent; \ | 61 | struct trace_entry ent; \ |
| 62 | tstruct \ | 62 | tstruct \ |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | #undef TP_ARGS | 65 | #undef TP_ARGS |
| 66 | #define TP_ARGS(args...) args | 66 | #define TP_ARGS(args...) args |
| 67 | 67 | ||
| 68 | #undef FTRACE_ENTRY_DUP | 68 | #undef FTRACE_ENTRY_DUP |
| 69 | #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) | 69 | #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter) |
| 70 | |||
| 71 | #undef FTRACE_ENTRY_REG | ||
| 72 | #define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \ | ||
| 73 | filter, regfn) \ | ||
| 74 | FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ | ||
| 75 | filter) | ||
| 70 | 76 | ||
| 71 | #include "trace_entries.h" | 77 | #include "trace_entries.h" |
| 72 | 78 | ||
| @@ -288,6 +294,8 @@ struct tracer { | |||
| 288 | /* for function tracing recursion */ | 294 | /* for function tracing recursion */ |
| 289 | #define TRACE_INTERNAL_BIT (1<<11) | 295 | #define TRACE_INTERNAL_BIT (1<<11) |
| 290 | #define TRACE_GLOBAL_BIT (1<<12) | 296 | #define TRACE_GLOBAL_BIT (1<<12) |
| 297 | #define TRACE_CONTROL_BIT (1<<13) | ||
| 298 | |||
| 291 | /* | 299 | /* |
| 292 | * Abuse of the trace_recursion. | 300 | * Abuse of the trace_recursion. |
| 293 | * As we need a way to maintain state if we are tracing the function | 301 | * As we need a way to maintain state if we are tracing the function |
| @@ -589,6 +597,8 @@ static inline int ftrace_trace_task(struct task_struct *task) | |||
| 589 | static inline int ftrace_is_dead(void) { return 0; } | 597 | static inline int ftrace_is_dead(void) { return 0; } |
| 590 | #endif | 598 | #endif |
| 591 | 599 | ||
| 600 | int ftrace_event_is_function(struct ftrace_event_call *call); | ||
| 601 | |||
| 592 | /* | 602 | /* |
| 593 | * struct trace_parser - servers for reading the user input separated by spaces | 603 | * struct trace_parser - servers for reading the user input separated by spaces |
| 594 | * @cont: set if the input is not complete - no final space char was found | 604 | * @cont: set if the input is not complete - no final space char was found |
| @@ -766,9 +776,7 @@ struct filter_pred { | |||
| 766 | u64 val; | 776 | u64 val; |
| 767 | struct regex regex; | 777 | struct regex regex; |
| 768 | unsigned short *ops; | 778 | unsigned short *ops; |
| 769 | #ifdef CONFIG_FTRACE_STARTUP_TEST | ||
| 770 | struct ftrace_event_field *field; | 779 | struct ftrace_event_field *field; |
| 771 | #endif | ||
| 772 | int offset; | 780 | int offset; |
| 773 | int not; | 781 | int not; |
| 774 | int op; | 782 | int op; |
| @@ -818,12 +826,22 @@ extern const char *__start___trace_bprintk_fmt[]; | |||
| 818 | extern const char *__stop___trace_bprintk_fmt[]; | 826 | extern const char *__stop___trace_bprintk_fmt[]; |
| 819 | 827 | ||
| 820 | #undef FTRACE_ENTRY | 828 | #undef FTRACE_ENTRY |
| 821 | #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ | 829 | #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ |
| 822 | extern struct ftrace_event_call \ | 830 | extern struct ftrace_event_call \ |
| 823 | __attribute__((__aligned__(4))) event_##call; | 831 | __attribute__((__aligned__(4))) event_##call; |
| 824 | #undef FTRACE_ENTRY_DUP | 832 | #undef FTRACE_ENTRY_DUP |
| 825 | #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ | 833 | #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \ |
| 826 | FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) | 834 | FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ |
| 835 | filter) | ||
| 827 | #include "trace_entries.h" | 836 | #include "trace_entries.h" |
| 828 | 837 | ||
| 838 | #ifdef CONFIG_PERF_EVENTS | ||
| 839 | #ifdef CONFIG_FUNCTION_TRACER | ||
| 840 | int perf_ftrace_event_register(struct ftrace_event_call *call, | ||
| 841 | enum trace_reg type, void *data); | ||
| 842 | #else | ||
| 843 | #define perf_ftrace_event_register NULL | ||
| 844 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
| 845 | #endif /* CONFIG_PERF_EVENTS */ | ||
| 846 | |||
| 829 | #endif /* _LINUX_KERNEL_TRACE_H */ | 847 | #endif /* _LINUX_KERNEL_TRACE_H */ |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 93365907f219..d91eb0541b3a 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h | |||
| @@ -55,7 +55,7 @@ | |||
| 55 | /* | 55 | /* |
| 56 | * Function trace entry - function address and parent function address: | 56 | * Function trace entry - function address and parent function address: |
| 57 | */ | 57 | */ |
| 58 | FTRACE_ENTRY(function, ftrace_entry, | 58 | FTRACE_ENTRY_REG(function, ftrace_entry, |
| 59 | 59 | ||
| 60 | TRACE_FN, | 60 | TRACE_FN, |
| 61 | 61 | ||
| @@ -64,7 +64,11 @@ FTRACE_ENTRY(function, ftrace_entry, | |||
| 64 | __field( unsigned long, parent_ip ) | 64 | __field( unsigned long, parent_ip ) |
| 65 | ), | 65 | ), |
| 66 | 66 | ||
| 67 | F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip) | 67 | F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip), |
| 68 | |||
| 69 | FILTER_TRACE_FN, | ||
| 70 | |||
| 71 | perf_ftrace_event_register | ||
| 68 | ); | 72 | ); |
| 69 | 73 | ||
| 70 | /* Function call entry */ | 74 | /* Function call entry */ |
| @@ -78,7 +82,9 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry, | |||
| 78 | __field_desc( int, graph_ent, depth ) | 82 | __field_desc( int, graph_ent, depth ) |
| 79 | ), | 83 | ), |
| 80 | 84 | ||
| 81 | F_printk("--> %lx (%d)", __entry->func, __entry->depth) | 85 | F_printk("--> %lx (%d)", __entry->func, __entry->depth), |
| 86 | |||
| 87 | FILTER_OTHER | ||
| 82 | ); | 88 | ); |
| 83 | 89 | ||
| 84 | /* Function return entry */ | 90 | /* Function return entry */ |
| @@ -98,7 +104,9 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry, | |||
| 98 | F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d", | 104 | F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d", |
| 99 | __entry->func, __entry->depth, | 105 | __entry->func, __entry->depth, |
| 100 | __entry->calltime, __entry->rettime, | 106 | __entry->calltime, __entry->rettime, |
| 101 | __entry->depth) | 107 | __entry->depth), |
| 108 | |||
| 109 | FILTER_OTHER | ||
| 102 | ); | 110 | ); |
| 103 | 111 | ||
| 104 | /* | 112 | /* |
| @@ -127,8 +135,9 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry, | |||
| 127 | F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", | 135 | F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", |
| 128 | __entry->prev_pid, __entry->prev_prio, __entry->prev_state, | 136 | __entry->prev_pid, __entry->prev_prio, __entry->prev_state, |
| 129 | __entry->next_pid, __entry->next_prio, __entry->next_state, | 137 | __entry->next_pid, __entry->next_prio, __entry->next_state, |
| 130 | __entry->next_cpu | 138 | __entry->next_cpu), |
| 131 | ) | 139 | |
| 140 | FILTER_OTHER | ||
| 132 | ); | 141 | ); |
| 133 | 142 | ||
| 134 | /* | 143 | /* |
| @@ -146,8 +155,9 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry, | |||
| 146 | F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", | 155 | F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", |
| 147 | __entry->prev_pid, __entry->prev_prio, __entry->prev_state, | 156 | __entry->prev_pid, __entry->prev_prio, __entry->prev_state, |
| 148 | __entry->next_pid, __entry->next_prio, __entry->next_state, | 157 | __entry->next_pid, __entry->next_prio, __entry->next_state, |
| 149 | __entry->next_cpu | 158 | __entry->next_cpu), |
| 150 | ) | 159 | |
| 160 | FILTER_OTHER | ||
| 151 | ); | 161 | ); |
| 152 | 162 | ||
| 153 | /* | 163 | /* |
| @@ -169,7 +179,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry, | |||
| 169 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", | 179 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", |
| 170 | __entry->caller[0], __entry->caller[1], __entry->caller[2], | 180 | __entry->caller[0], __entry->caller[1], __entry->caller[2], |
| 171 | __entry->caller[3], __entry->caller[4], __entry->caller[5], | 181 | __entry->caller[3], __entry->caller[4], __entry->caller[5], |
| 172 | __entry->caller[6], __entry->caller[7]) | 182 | __entry->caller[6], __entry->caller[7]), |
| 183 | |||
| 184 | FILTER_OTHER | ||
| 173 | ); | 185 | ); |
| 174 | 186 | ||
| 175 | FTRACE_ENTRY(user_stack, userstack_entry, | 187 | FTRACE_ENTRY(user_stack, userstack_entry, |
| @@ -185,7 +197,9 @@ FTRACE_ENTRY(user_stack, userstack_entry, | |||
| 185 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", | 197 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", |
| 186 | __entry->caller[0], __entry->caller[1], __entry->caller[2], | 198 | __entry->caller[0], __entry->caller[1], __entry->caller[2], |
| 187 | __entry->caller[3], __entry->caller[4], __entry->caller[5], | 199 | __entry->caller[3], __entry->caller[4], __entry->caller[5], |
| 188 | __entry->caller[6], __entry->caller[7]) | 200 | __entry->caller[6], __entry->caller[7]), |
| 201 | |||
| 202 | FILTER_OTHER | ||
| 189 | ); | 203 | ); |
| 190 | 204 | ||
| 191 | /* | 205 | /* |
| @@ -202,7 +216,9 @@ FTRACE_ENTRY(bprint, bprint_entry, | |||
| 202 | ), | 216 | ), |
| 203 | 217 | ||
| 204 | F_printk("%08lx fmt:%p", | 218 | F_printk("%08lx fmt:%p", |
| 205 | __entry->ip, __entry->fmt) | 219 | __entry->ip, __entry->fmt), |
| 220 | |||
| 221 | FILTER_OTHER | ||
| 206 | ); | 222 | ); |
| 207 | 223 | ||
| 208 | FTRACE_ENTRY(print, print_entry, | 224 | FTRACE_ENTRY(print, print_entry, |
| @@ -215,7 +231,9 @@ FTRACE_ENTRY(print, print_entry, | |||
| 215 | ), | 231 | ), |
| 216 | 232 | ||
| 217 | F_printk("%08lx %s", | 233 | F_printk("%08lx %s", |
| 218 | __entry->ip, __entry->buf) | 234 | __entry->ip, __entry->buf), |
| 235 | |||
| 236 | FILTER_OTHER | ||
| 219 | ); | 237 | ); |
| 220 | 238 | ||
| 221 | FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, | 239 | FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, |
| @@ -234,7 +252,9 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, | |||
| 234 | 252 | ||
| 235 | F_printk("%lx %lx %lx %d %x %x", | 253 | F_printk("%lx %lx %lx %d %x %x", |
| 236 | (unsigned long)__entry->phys, __entry->value, __entry->pc, | 254 | (unsigned long)__entry->phys, __entry->value, __entry->pc, |
| 237 | __entry->map_id, __entry->opcode, __entry->width) | 255 | __entry->map_id, __entry->opcode, __entry->width), |
| 256 | |||
| 257 | FILTER_OTHER | ||
| 238 | ); | 258 | ); |
| 239 | 259 | ||
| 240 | FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, | 260 | FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, |
| @@ -252,7 +272,9 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, | |||
| 252 | 272 | ||
| 253 | F_printk("%lx %lx %lx %d %x", | 273 | F_printk("%lx %lx %lx %d %x", |
| 254 | (unsigned long)__entry->phys, __entry->virt, __entry->len, | 274 | (unsigned long)__entry->phys, __entry->virt, __entry->len, |
| 255 | __entry->map_id, __entry->opcode) | 275 | __entry->map_id, __entry->opcode), |
| 276 | |||
| 277 | FILTER_OTHER | ||
| 256 | ); | 278 | ); |
| 257 | 279 | ||
| 258 | 280 | ||
| @@ -272,6 +294,8 @@ FTRACE_ENTRY(branch, trace_branch, | |||
| 272 | 294 | ||
| 273 | F_printk("%u:%s:%s (%u)", | 295 | F_printk("%u:%s:%s (%u)", |
| 274 | __entry->line, | 296 | __entry->line, |
| 275 | __entry->func, __entry->file, __entry->correct) | 297 | __entry->func, __entry->file, __entry->correct), |
| 298 | |||
| 299 | FILTER_OTHER | ||
| 276 | ); | 300 | ); |
| 277 | 301 | ||
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 19a359d5e6d5..fee3752ae8f6 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
| @@ -24,6 +24,11 @@ static int total_ref_count; | |||
| 24 | static int perf_trace_event_perm(struct ftrace_event_call *tp_event, | 24 | static int perf_trace_event_perm(struct ftrace_event_call *tp_event, |
| 25 | struct perf_event *p_event) | 25 | struct perf_event *p_event) |
| 26 | { | 26 | { |
| 27 | /* The ftrace function trace is allowed only for root. */ | ||
| 28 | if (ftrace_event_is_function(tp_event) && | ||
| 29 | perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) | ||
| 30 | return -EPERM; | ||
| 31 | |||
| 27 | /* No tracing, just counting, so no obvious leak */ | 32 | /* No tracing, just counting, so no obvious leak */ |
| 28 | if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) | 33 | if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) |
| 29 | return 0; | 34 | return 0; |
| @@ -44,23 +49,17 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event, | |||
| 44 | return 0; | 49 | return 0; |
| 45 | } | 50 | } |
| 46 | 51 | ||
| 47 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, | 52 | static int perf_trace_event_reg(struct ftrace_event_call *tp_event, |
| 48 | struct perf_event *p_event) | 53 | struct perf_event *p_event) |
| 49 | { | 54 | { |
| 50 | struct hlist_head __percpu *list; | 55 | struct hlist_head __percpu *list; |
| 51 | int ret; | 56 | int ret = -ENOMEM; |
| 52 | int cpu; | 57 | int cpu; |
| 53 | 58 | ||
| 54 | ret = perf_trace_event_perm(tp_event, p_event); | ||
| 55 | if (ret) | ||
| 56 | return ret; | ||
| 57 | |||
| 58 | p_event->tp_event = tp_event; | 59 | p_event->tp_event = tp_event; |
| 59 | if (tp_event->perf_refcount++ > 0) | 60 | if (tp_event->perf_refcount++ > 0) |
| 60 | return 0; | 61 | return 0; |
| 61 | 62 | ||
| 62 | ret = -ENOMEM; | ||
| 63 | |||
| 64 | list = alloc_percpu(struct hlist_head); | 63 | list = alloc_percpu(struct hlist_head); |
| 65 | if (!list) | 64 | if (!list) |
| 66 | goto fail; | 65 | goto fail; |
| @@ -83,7 +82,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, | |||
| 83 | } | 82 | } |
| 84 | } | 83 | } |
| 85 | 84 | ||
| 86 | ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER); | 85 | ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL); |
| 87 | if (ret) | 86 | if (ret) |
| 88 | goto fail; | 87 | goto fail; |
| 89 | 88 | ||
| @@ -108,6 +107,69 @@ fail: | |||
| 108 | return ret; | 107 | return ret; |
| 109 | } | 108 | } |
| 110 | 109 | ||
| 110 | static void perf_trace_event_unreg(struct perf_event *p_event) | ||
| 111 | { | ||
| 112 | struct ftrace_event_call *tp_event = p_event->tp_event; | ||
| 113 | int i; | ||
| 114 | |||
| 115 | if (--tp_event->perf_refcount > 0) | ||
| 116 | goto out; | ||
| 117 | |||
| 118 | tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL); | ||
| 119 | |||
| 120 | /* | ||
| 121 | * Ensure our callback won't be called anymore. The buffers | ||
| 122 | * will be freed after that. | ||
| 123 | */ | ||
| 124 | tracepoint_synchronize_unregister(); | ||
| 125 | |||
| 126 | free_percpu(tp_event->perf_events); | ||
| 127 | tp_event->perf_events = NULL; | ||
| 128 | |||
| 129 | if (!--total_ref_count) { | ||
| 130 | for (i = 0; i < PERF_NR_CONTEXTS; i++) { | ||
| 131 | free_percpu(perf_trace_buf[i]); | ||
| 132 | perf_trace_buf[i] = NULL; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | out: | ||
| 136 | module_put(tp_event->mod); | ||
| 137 | } | ||
| 138 | |||
| 139 | static int perf_trace_event_open(struct perf_event *p_event) | ||
| 140 | { | ||
| 141 | struct ftrace_event_call *tp_event = p_event->tp_event; | ||
| 142 | return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event); | ||
| 143 | } | ||
| 144 | |||
| 145 | static void perf_trace_event_close(struct perf_event *p_event) | ||
| 146 | { | ||
| 147 | struct ftrace_event_call *tp_event = p_event->tp_event; | ||
| 148 | tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event); | ||
| 149 | } | ||
| 150 | |||
| 151 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, | ||
| 152 | struct perf_event *p_event) | ||
| 153 | { | ||
| 154 | int ret; | ||
| 155 | |||
| 156 | ret = perf_trace_event_perm(tp_event, p_event); | ||
| 157 | if (ret) | ||
| 158 | return ret; | ||
| 159 | |||
| 160 | ret = perf_trace_event_reg(tp_event, p_event); | ||
| 161 | if (ret) | ||
| 162 | return ret; | ||
| 163 | |||
| 164 | ret = perf_trace_event_open(p_event); | ||
| 165 | if (ret) { | ||
| 166 | perf_trace_event_unreg(p_event); | ||
| 167 | return ret; | ||
| 168 | } | ||
| 169 | |||
| 170 | return 0; | ||
| 171 | } | ||
| 172 | |||
| 111 | int perf_trace_init(struct perf_event *p_event) | 173 | int perf_trace_init(struct perf_event *p_event) |
| 112 | { | 174 | { |
| 113 | struct ftrace_event_call *tp_event; | 175 | struct ftrace_event_call *tp_event; |
| @@ -130,6 +192,14 @@ int perf_trace_init(struct perf_event *p_event) | |||
| 130 | return ret; | 192 | return ret; |
| 131 | } | 193 | } |
| 132 | 194 | ||
| 195 | void perf_trace_destroy(struct perf_event *p_event) | ||
| 196 | { | ||
| 197 | mutex_lock(&event_mutex); | ||
| 198 | perf_trace_event_close(p_event); | ||
| 199 | perf_trace_event_unreg(p_event); | ||
| 200 | mutex_unlock(&event_mutex); | ||
| 201 | } | ||
| 202 | |||
| 133 | int perf_trace_add(struct perf_event *p_event, int flags) | 203 | int perf_trace_add(struct perf_event *p_event, int flags) |
| 134 | { | 204 | { |
| 135 | struct ftrace_event_call *tp_event = p_event->tp_event; | 205 | struct ftrace_event_call *tp_event = p_event->tp_event; |
| @@ -146,43 +216,14 @@ int perf_trace_add(struct perf_event *p_event, int flags) | |||
| 146 | list = this_cpu_ptr(pcpu_list); | 216 | list = this_cpu_ptr(pcpu_list); |
| 147 | hlist_add_head_rcu(&p_event->hlist_entry, list); | 217 | hlist_add_head_rcu(&p_event->hlist_entry, list); |
| 148 | 218 | ||
| 149 | return 0; | 219 | return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event); |
| 150 | } | 220 | } |
| 151 | 221 | ||
| 152 | void perf_trace_del(struct perf_event *p_event, int flags) | 222 | void perf_trace_del(struct perf_event *p_event, int flags) |
| 153 | { | 223 | { |
| 154 | hlist_del_rcu(&p_event->hlist_entry); | ||
| 155 | } | ||
| 156 | |||
| 157 | void perf_trace_destroy(struct perf_event *p_event) | ||
| 158 | { | ||
| 159 | struct ftrace_event_call *tp_event = p_event->tp_event; | 224 | struct ftrace_event_call *tp_event = p_event->tp_event; |
| 160 | int i; | 225 | hlist_del_rcu(&p_event->hlist_entry); |
| 161 | 226 | tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); | |
| 162 | mutex_lock(&event_mutex); | ||
| 163 | if (--tp_event->perf_refcount > 0) | ||
| 164 | goto out; | ||
| 165 | |||
| 166 | tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); | ||
| 167 | |||
| 168 | /* | ||
| 169 | * Ensure our callback won't be called anymore. The buffers | ||
| 170 | * will be freed after that. | ||
| 171 | */ | ||
| 172 | tracepoint_synchronize_unregister(); | ||
| 173 | |||
| 174 | free_percpu(tp_event->perf_events); | ||
| 175 | tp_event->perf_events = NULL; | ||
| 176 | |||
| 177 | if (!--total_ref_count) { | ||
| 178 | for (i = 0; i < PERF_NR_CONTEXTS; i++) { | ||
| 179 | free_percpu(perf_trace_buf[i]); | ||
| 180 | perf_trace_buf[i] = NULL; | ||
| 181 | } | ||
| 182 | } | ||
| 183 | out: | ||
| 184 | module_put(tp_event->mod); | ||
| 185 | mutex_unlock(&event_mutex); | ||
| 186 | } | 227 | } |
| 187 | 228 | ||
| 188 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | 229 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, |
| @@ -214,3 +255,86 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | |||
| 214 | return raw_data; | 255 | return raw_data; |
| 215 | } | 256 | } |
| 216 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); | 257 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); |
| 258 | |||
| 259 | #ifdef CONFIG_FUNCTION_TRACER | ||
| 260 | static void | ||
| 261 | perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip) | ||
| 262 | { | ||
| 263 | struct ftrace_entry *entry; | ||
| 264 | struct hlist_head *head; | ||
| 265 | struct pt_regs regs; | ||
| 266 | int rctx; | ||
| 267 | |||
| 268 | #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ | ||
| 269 | sizeof(u64)) - sizeof(u32)) | ||
| 270 | |||
| 271 | BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE); | ||
| 272 | |||
| 273 | perf_fetch_caller_regs(®s); | ||
| 274 | |||
| 275 | entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx); | ||
| 276 | if (!entry) | ||
| 277 | return; | ||
| 278 | |||
| 279 | entry->ip = ip; | ||
| 280 | entry->parent_ip = parent_ip; | ||
| 281 | |||
| 282 | head = this_cpu_ptr(event_function.perf_events); | ||
| 283 | perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, | ||
| 284 | 1, ®s, head); | ||
| 285 | |||
| 286 | #undef ENTRY_SIZE | ||
| 287 | } | ||
| 288 | |||
| 289 | static int perf_ftrace_function_register(struct perf_event *event) | ||
| 290 | { | ||
| 291 | struct ftrace_ops *ops = &event->ftrace_ops; | ||
| 292 | |||
| 293 | ops->flags |= FTRACE_OPS_FL_CONTROL; | ||
| 294 | ops->func = perf_ftrace_function_call; | ||
| 295 | return register_ftrace_function(ops); | ||
| 296 | } | ||
| 297 | |||
| 298 | static int perf_ftrace_function_unregister(struct perf_event *event) | ||
| 299 | { | ||
| 300 | struct ftrace_ops *ops = &event->ftrace_ops; | ||
| 301 | int ret = unregister_ftrace_function(ops); | ||
| 302 | ftrace_free_filter(ops); | ||
| 303 | return ret; | ||
| 304 | } | ||
| 305 | |||
| 306 | static void perf_ftrace_function_enable(struct perf_event *event) | ||
| 307 | { | ||
| 308 | ftrace_function_local_enable(&event->ftrace_ops); | ||
| 309 | } | ||
| 310 | |||
| 311 | static void perf_ftrace_function_disable(struct perf_event *event) | ||
| 312 | { | ||
| 313 | ftrace_function_local_disable(&event->ftrace_ops); | ||
| 314 | } | ||
| 315 | |||
| 316 | int perf_ftrace_event_register(struct ftrace_event_call *call, | ||
| 317 | enum trace_reg type, void *data) | ||
| 318 | { | ||
| 319 | switch (type) { | ||
| 320 | case TRACE_REG_REGISTER: | ||
| 321 | case TRACE_REG_UNREGISTER: | ||
| 322 | break; | ||
| 323 | case TRACE_REG_PERF_REGISTER: | ||
| 324 | case TRACE_REG_PERF_UNREGISTER: | ||
| 325 | return 0; | ||
| 326 | case TRACE_REG_PERF_OPEN: | ||
| 327 | return perf_ftrace_function_register(data); | ||
| 328 | case TRACE_REG_PERF_CLOSE: | ||
| 329 | return perf_ftrace_function_unregister(data); | ||
| 330 | case TRACE_REG_PERF_ADD: | ||
| 331 | perf_ftrace_function_enable(data); | ||
| 332 | return 0; | ||
| 333 | case TRACE_REG_PERF_DEL: | ||
| 334 | perf_ftrace_function_disable(data); | ||
| 335 | return 0; | ||
| 336 | } | ||
| 337 | |||
| 338 | return -EINVAL; | ||
| 339 | } | ||
| 340 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c212a7f934ec..079a93ae8a9d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
| @@ -147,7 +147,8 @@ int trace_event_raw_init(struct ftrace_event_call *call) | |||
| 147 | } | 147 | } |
| 148 | EXPORT_SYMBOL_GPL(trace_event_raw_init); | 148 | EXPORT_SYMBOL_GPL(trace_event_raw_init); |
| 149 | 149 | ||
| 150 | int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type) | 150 | int ftrace_event_reg(struct ftrace_event_call *call, |
| 151 | enum trace_reg type, void *data) | ||
| 151 | { | 152 | { |
| 152 | switch (type) { | 153 | switch (type) { |
| 153 | case TRACE_REG_REGISTER: | 154 | case TRACE_REG_REGISTER: |
| @@ -170,6 +171,11 @@ int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type) | |||
| 170 | call->class->perf_probe, | 171 | call->class->perf_probe, |
| 171 | call); | 172 | call); |
| 172 | return 0; | 173 | return 0; |
| 174 | case TRACE_REG_PERF_OPEN: | ||
| 175 | case TRACE_REG_PERF_CLOSE: | ||
| 176 | case TRACE_REG_PERF_ADD: | ||
| 177 | case TRACE_REG_PERF_DEL: | ||
| 178 | return 0; | ||
| 173 | #endif | 179 | #endif |
| 174 | } | 180 | } |
| 175 | return 0; | 181 | return 0; |
| @@ -209,7 +215,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call, | |||
| 209 | tracing_stop_cmdline_record(); | 215 | tracing_stop_cmdline_record(); |
| 210 | call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; | 216 | call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; |
| 211 | } | 217 | } |
| 212 | call->class->reg(call, TRACE_REG_UNREGISTER); | 218 | call->class->reg(call, TRACE_REG_UNREGISTER, NULL); |
| 213 | } | 219 | } |
| 214 | break; | 220 | break; |
| 215 | case 1: | 221 | case 1: |
| @@ -218,7 +224,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call, | |||
| 218 | tracing_start_cmdline_record(); | 224 | tracing_start_cmdline_record(); |
| 219 | call->flags |= TRACE_EVENT_FL_RECORDED_CMD; | 225 | call->flags |= TRACE_EVENT_FL_RECORDED_CMD; |
| 220 | } | 226 | } |
| 221 | ret = call->class->reg(call, TRACE_REG_REGISTER); | 227 | ret = call->class->reg(call, TRACE_REG_REGISTER, NULL); |
| 222 | if (ret) { | 228 | if (ret) { |
| 223 | tracing_stop_cmdline_record(); | 229 | tracing_stop_cmdline_record(); |
| 224 | pr_info("event trace: Could not enable event " | 230 | pr_info("event trace: Could not enable event " |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 24aee7127451..431dba8b7542 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
| @@ -81,6 +81,7 @@ enum { | |||
| 81 | FILT_ERR_TOO_MANY_PREDS, | 81 | FILT_ERR_TOO_MANY_PREDS, |
| 82 | FILT_ERR_MISSING_FIELD, | 82 | FILT_ERR_MISSING_FIELD, |
| 83 | FILT_ERR_INVALID_FILTER, | 83 | FILT_ERR_INVALID_FILTER, |
| 84 | FILT_ERR_IP_FIELD_ONLY, | ||
| 84 | }; | 85 | }; |
| 85 | 86 | ||
| 86 | static char *err_text[] = { | 87 | static char *err_text[] = { |
| @@ -96,6 +97,7 @@ static char *err_text[] = { | |||
| 96 | "Too many terms in predicate expression", | 97 | "Too many terms in predicate expression", |
| 97 | "Missing field name and/or value", | 98 | "Missing field name and/or value", |
| 98 | "Meaningless filter expression", | 99 | "Meaningless filter expression", |
| 100 | "Only 'ip' field is supported for function trace", | ||
| 99 | }; | 101 | }; |
| 100 | 102 | ||
| 101 | struct opstack_op { | 103 | struct opstack_op { |
| @@ -685,7 +687,7 @@ find_event_field(struct ftrace_event_call *call, char *name) | |||
| 685 | 687 | ||
| 686 | static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) | 688 | static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) |
| 687 | { | 689 | { |
| 688 | stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL); | 690 | stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL); |
| 689 | if (!stack->preds) | 691 | if (!stack->preds) |
| 690 | return -ENOMEM; | 692 | return -ENOMEM; |
| 691 | stack->index = n_preds; | 693 | stack->index = n_preds; |
| @@ -826,8 +828,7 @@ static int __alloc_preds(struct event_filter *filter, int n_preds) | |||
| 826 | if (filter->preds) | 828 | if (filter->preds) |
| 827 | __free_preds(filter); | 829 | __free_preds(filter); |
| 828 | 830 | ||
| 829 | filter->preds = | 831 | filter->preds = kcalloc(n_preds, sizeof(*filter->preds), GFP_KERNEL); |
| 830 | kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL); | ||
| 831 | 832 | ||
| 832 | if (!filter->preds) | 833 | if (!filter->preds) |
| 833 | return -ENOMEM; | 834 | return -ENOMEM; |
| @@ -900,6 +901,11 @@ int filter_assign_type(const char *type) | |||
| 900 | return FILTER_OTHER; | 901 | return FILTER_OTHER; |
| 901 | } | 902 | } |
| 902 | 903 | ||
| 904 | static bool is_function_field(struct ftrace_event_field *field) | ||
| 905 | { | ||
| 906 | return field->filter_type == FILTER_TRACE_FN; | ||
| 907 | } | ||
| 908 | |||
| 903 | static bool is_string_field(struct ftrace_event_field *field) | 909 | static bool is_string_field(struct ftrace_event_field *field) |
| 904 | { | 910 | { |
| 905 | return field->filter_type == FILTER_DYN_STRING || | 911 | return field->filter_type == FILTER_DYN_STRING || |
| @@ -987,6 +993,11 @@ static int init_pred(struct filter_parse_state *ps, | |||
| 987 | fn = filter_pred_strloc; | 993 | fn = filter_pred_strloc; |
| 988 | else | 994 | else |
| 989 | fn = filter_pred_pchar; | 995 | fn = filter_pred_pchar; |
| 996 | } else if (is_function_field(field)) { | ||
| 997 | if (strcmp(field->name, "ip")) { | ||
| 998 | parse_error(ps, FILT_ERR_IP_FIELD_ONLY, 0); | ||
| 999 | return -EINVAL; | ||
| 1000 | } | ||
| 990 | } else { | 1001 | } else { |
| 991 | if (field->is_signed) | 1002 | if (field->is_signed) |
| 992 | ret = strict_strtoll(pred->regex.pattern, 0, &val); | 1003 | ret = strict_strtoll(pred->regex.pattern, 0, &val); |
| @@ -1334,10 +1345,7 @@ static struct filter_pred *create_pred(struct filter_parse_state *ps, | |||
| 1334 | 1345 | ||
| 1335 | strcpy(pred.regex.pattern, operand2); | 1346 | strcpy(pred.regex.pattern, operand2); |
| 1336 | pred.regex.len = strlen(pred.regex.pattern); | 1347 | pred.regex.len = strlen(pred.regex.pattern); |
| 1337 | |||
| 1338 | #ifdef CONFIG_FTRACE_STARTUP_TEST | ||
| 1339 | pred.field = field; | 1348 | pred.field = field; |
| 1340 | #endif | ||
| 1341 | return init_pred(ps, field, &pred) ? NULL : &pred; | 1349 | return init_pred(ps, field, &pred) ? NULL : &pred; |
| 1342 | } | 1350 | } |
| 1343 | 1351 | ||
| @@ -1486,7 +1494,7 @@ static int fold_pred(struct filter_pred *preds, struct filter_pred *root) | |||
| 1486 | children = count_leafs(preds, &preds[root->left]); | 1494 | children = count_leafs(preds, &preds[root->left]); |
| 1487 | children += count_leafs(preds, &preds[root->right]); | 1495 | children += count_leafs(preds, &preds[root->right]); |
| 1488 | 1496 | ||
| 1489 | root->ops = kzalloc(sizeof(*root->ops) * children, GFP_KERNEL); | 1497 | root->ops = kcalloc(children, sizeof(*root->ops), GFP_KERNEL); |
| 1490 | if (!root->ops) | 1498 | if (!root->ops) |
| 1491 | return -ENOMEM; | 1499 | return -ENOMEM; |
| 1492 | 1500 | ||
| @@ -1950,6 +1958,148 @@ void ftrace_profile_free_filter(struct perf_event *event) | |||
| 1950 | __free_filter(filter); | 1958 | __free_filter(filter); |
| 1951 | } | 1959 | } |
| 1952 | 1960 | ||
| 1961 | struct function_filter_data { | ||
| 1962 | struct ftrace_ops *ops; | ||
| 1963 | int first_filter; | ||
| 1964 | int first_notrace; | ||
| 1965 | }; | ||
| 1966 | |||
| 1967 | #ifdef CONFIG_FUNCTION_TRACER | ||
| 1968 | static char ** | ||
| 1969 | ftrace_function_filter_re(char *buf, int len, int *count) | ||
| 1970 | { | ||
| 1971 | char *str, *sep, **re; | ||
| 1972 | |||
| 1973 | str = kstrndup(buf, len, GFP_KERNEL); | ||
| 1974 | if (!str) | ||
| 1975 | return NULL; | ||
| 1976 | |||
| 1977 | /* | ||
| 1978 | * The argv_split function takes white space | ||
| 1979 | * as a separator, so convert ',' into spaces. | ||
| 1980 | */ | ||
| 1981 | while ((sep = strchr(str, ','))) | ||
| 1982 | *sep = ' '; | ||
| 1983 | |||
| 1984 | re = argv_split(GFP_KERNEL, str, count); | ||
| 1985 | kfree(str); | ||
| 1986 | return re; | ||
| 1987 | } | ||
| 1988 | |||
| 1989 | static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter, | ||
| 1990 | int reset, char *re, int len) | ||
| 1991 | { | ||
| 1992 | int ret; | ||
| 1993 | |||
| 1994 | if (filter) | ||
| 1995 | ret = ftrace_set_filter(ops, re, len, reset); | ||
| 1996 | else | ||
| 1997 | ret = ftrace_set_notrace(ops, re, len, reset); | ||
| 1998 | |||
| 1999 | return ret; | ||
| 2000 | } | ||
| 2001 | |||
| 2002 | static int __ftrace_function_set_filter(int filter, char *buf, int len, | ||
| 2003 | struct function_filter_data *data) | ||
| 2004 | { | ||
| 2005 | int i, re_cnt, ret; | ||
| 2006 | int *reset; | ||
| 2007 | char **re; | ||
| 2008 | |||
| 2009 | reset = filter ? &data->first_filter : &data->first_notrace; | ||
| 2010 | |||
| 2011 | /* | ||
| 2012 | * The 'ip' field could have multiple filters set, separated | ||
| 2013 | * either by space or comma. We first cut the filter and apply | ||
| 2014 | * all pieces separatelly. | ||
| 2015 | */ | ||
| 2016 | re = ftrace_function_filter_re(buf, len, &re_cnt); | ||
| 2017 | if (!re) | ||
| 2018 | return -EINVAL; | ||
| 2019 | |||
| 2020 | for (i = 0; i < re_cnt; i++) { | ||
| 2021 | ret = ftrace_function_set_regexp(data->ops, filter, *reset, | ||
| 2022 | re[i], strlen(re[i])); | ||
| 2023 | if (ret) | ||
| 2024 | break; | ||
| 2025 | |||
| 2026 | if (*reset) | ||
| 2027 | *reset = 0; | ||
| 2028 | } | ||
| 2029 | |||
| 2030 | argv_free(re); | ||
| 2031 | return ret; | ||
| 2032 | } | ||
| 2033 | |||
| 2034 | static int ftrace_function_check_pred(struct filter_pred *pred, int leaf) | ||
| 2035 | { | ||
| 2036 | struct ftrace_event_field *field = pred->field; | ||
| 2037 | |||
| 2038 | if (leaf) { | ||
| 2039 | /* | ||
| 2040 | * Check the leaf predicate for function trace, verify: | ||
| 2041 | * - only '==' and '!=' is used | ||
| 2042 | * - the 'ip' field is used | ||
| 2043 | */ | ||
| 2044 | if ((pred->op != OP_EQ) && (pred->op != OP_NE)) | ||
| 2045 | return -EINVAL; | ||
| 2046 | |||
| 2047 | if (strcmp(field->name, "ip")) | ||
| 2048 | return -EINVAL; | ||
| 2049 | } else { | ||
| 2050 | /* | ||
| 2051 | * Check the non leaf predicate for function trace, verify: | ||
| 2052 | * - only '||' is used | ||
| 2053 | */ | ||
| 2054 | if (pred->op != OP_OR) | ||
| 2055 | return -EINVAL; | ||
| 2056 | } | ||
| 2057 | |||
| 2058 | return 0; | ||
| 2059 | } | ||
| 2060 | |||
| 2061 | static int ftrace_function_set_filter_cb(enum move_type move, | ||
| 2062 | struct filter_pred *pred, | ||
| 2063 | int *err, void *data) | ||
| 2064 | { | ||
| 2065 | /* Checking the node is valid for function trace. */ | ||
| 2066 | if ((move != MOVE_DOWN) || | ||
| 2067 | (pred->left != FILTER_PRED_INVALID)) { | ||
| 2068 | *err = ftrace_function_check_pred(pred, 0); | ||
| 2069 | } else { | ||
| 2070 | *err = ftrace_function_check_pred(pred, 1); | ||
| 2071 | if (*err) | ||
| 2072 | return WALK_PRED_ABORT; | ||
| 2073 | |||
| 2074 | *err = __ftrace_function_set_filter(pred->op == OP_EQ, | ||
| 2075 | pred->regex.pattern, | ||
| 2076 | pred->regex.len, | ||
| 2077 | data); | ||
| 2078 | } | ||
| 2079 | |||
| 2080 | return (*err) ? WALK_PRED_ABORT : WALK_PRED_DEFAULT; | ||
| 2081 | } | ||
| 2082 | |||
| 2083 | static int ftrace_function_set_filter(struct perf_event *event, | ||
| 2084 | struct event_filter *filter) | ||
| 2085 | { | ||
| 2086 | struct function_filter_data data = { | ||
| 2087 | .first_filter = 1, | ||
| 2088 | .first_notrace = 1, | ||
| 2089 | .ops = &event->ftrace_ops, | ||
| 2090 | }; | ||
| 2091 | |||
| 2092 | return walk_pred_tree(filter->preds, filter->root, | ||
| 2093 | ftrace_function_set_filter_cb, &data); | ||
| 2094 | } | ||
| 2095 | #else | ||
| 2096 | static int ftrace_function_set_filter(struct perf_event *event, | ||
| 2097 | struct event_filter *filter) | ||
| 2098 | { | ||
| 2099 | return -ENODEV; | ||
| 2100 | } | ||
| 2101 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
| 2102 | |||
| 1953 | int ftrace_profile_set_filter(struct perf_event *event, int event_id, | 2103 | int ftrace_profile_set_filter(struct perf_event *event, int event_id, |
| 1954 | char *filter_str) | 2104 | char *filter_str) |
| 1955 | { | 2105 | { |
| @@ -1970,9 +2120,16 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, | |||
| 1970 | goto out_unlock; | 2120 | goto out_unlock; |
| 1971 | 2121 | ||
| 1972 | err = create_filter(call, filter_str, false, &filter); | 2122 | err = create_filter(call, filter_str, false, &filter); |
| 1973 | if (!err) | 2123 | if (err) |
| 1974 | event->filter = filter; | 2124 | goto free_filter; |
| 2125 | |||
| 2126 | if (ftrace_event_is_function(call)) | ||
| 2127 | err = ftrace_function_set_filter(event, filter); | ||
| 1975 | else | 2128 | else |
| 2129 | event->filter = filter; | ||
| 2130 | |||
| 2131 | free_filter: | ||
| 2132 | if (err || ftrace_event_is_function(call)) | ||
| 1976 | __free_filter(filter); | 2133 | __free_filter(filter); |
| 1977 | 2134 | ||
| 1978 | out_unlock: | 2135 | out_unlock: |
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index bbeec31e0ae3..7b46c9bd22ae 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
| @@ -18,6 +18,16 @@ | |||
| 18 | #undef TRACE_SYSTEM | 18 | #undef TRACE_SYSTEM |
| 19 | #define TRACE_SYSTEM ftrace | 19 | #define TRACE_SYSTEM ftrace |
| 20 | 20 | ||
| 21 | /* | ||
| 22 | * The FTRACE_ENTRY_REG macro allows ftrace entry to define register | ||
| 23 | * function and thus become accesible via perf. | ||
| 24 | */ | ||
| 25 | #undef FTRACE_ENTRY_REG | ||
| 26 | #define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \ | ||
| 27 | filter, regfn) \ | ||
| 28 | FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ | ||
| 29 | filter) | ||
| 30 | |||
| 21 | /* not needed for this file */ | 31 | /* not needed for this file */ |
| 22 | #undef __field_struct | 32 | #undef __field_struct |
| 23 | #define __field_struct(type, item) | 33 | #define __field_struct(type, item) |
| @@ -44,21 +54,22 @@ | |||
| 44 | #define F_printk(fmt, args...) fmt, args | 54 | #define F_printk(fmt, args...) fmt, args |
| 45 | 55 | ||
| 46 | #undef FTRACE_ENTRY | 56 | #undef FTRACE_ENTRY |
| 47 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ | 57 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ |
| 48 | struct ____ftrace_##name { \ | 58 | struct ____ftrace_##name { \ |
| 49 | tstruct \ | 59 | tstruct \ |
| 50 | }; \ | 60 | }; \ |
| 51 | static void __always_unused ____ftrace_check_##name(void) \ | 61 | static void __always_unused ____ftrace_check_##name(void) \ |
| 52 | { \ | 62 | { \ |
| 53 | struct ____ftrace_##name *__entry = NULL; \ | 63 | struct ____ftrace_##name *__entry = NULL; \ |
| 54 | \ | 64 | \ |
| 55 | /* force compile-time check on F_printk() */ \ | 65 | /* force compile-time check on F_printk() */ \ |
| 56 | printk(print); \ | 66 | printk(print); \ |
| 57 | } | 67 | } |
| 58 | 68 | ||
| 59 | #undef FTRACE_ENTRY_DUP | 69 | #undef FTRACE_ENTRY_DUP |
| 60 | #define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \ | 70 | #define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter) \ |
| 61 | FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) | 71 | FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ |
| 72 | filter) | ||
| 62 | 73 | ||
| 63 | #include "trace_entries.h" | 74 | #include "trace_entries.h" |
| 64 | 75 | ||
| @@ -67,7 +78,7 @@ static void __always_unused ____ftrace_check_##name(void) \ | |||
| 67 | ret = trace_define_field(event_call, #type, #item, \ | 78 | ret = trace_define_field(event_call, #type, #item, \ |
| 68 | offsetof(typeof(field), item), \ | 79 | offsetof(typeof(field), item), \ |
| 69 | sizeof(field.item), \ | 80 | sizeof(field.item), \ |
| 70 | is_signed_type(type), FILTER_OTHER); \ | 81 | is_signed_type(type), filter_type); \ |
| 71 | if (ret) \ | 82 | if (ret) \ |
| 72 | return ret; | 83 | return ret; |
| 73 | 84 | ||
| @@ -77,7 +88,7 @@ static void __always_unused ____ftrace_check_##name(void) \ | |||
| 77 | offsetof(typeof(field), \ | 88 | offsetof(typeof(field), \ |
| 78 | container.item), \ | 89 | container.item), \ |
| 79 | sizeof(field.container.item), \ | 90 | sizeof(field.container.item), \ |
| 80 | is_signed_type(type), FILTER_OTHER); \ | 91 | is_signed_type(type), filter_type); \ |
| 81 | if (ret) \ | 92 | if (ret) \ |
| 82 | return ret; | 93 | return ret; |
| 83 | 94 | ||
| @@ -91,7 +102,7 @@ static void __always_unused ____ftrace_check_##name(void) \ | |||
| 91 | ret = trace_define_field(event_call, event_storage, #item, \ | 102 | ret = trace_define_field(event_call, event_storage, #item, \ |
| 92 | offsetof(typeof(field), item), \ | 103 | offsetof(typeof(field), item), \ |
| 93 | sizeof(field.item), \ | 104 | sizeof(field.item), \ |
| 94 | is_signed_type(type), FILTER_OTHER); \ | 105 | is_signed_type(type), filter_type); \ |
| 95 | mutex_unlock(&event_storage_mutex); \ | 106 | mutex_unlock(&event_storage_mutex); \ |
| 96 | if (ret) \ | 107 | if (ret) \ |
| 97 | return ret; \ | 108 | return ret; \ |
| @@ -104,7 +115,7 @@ static void __always_unused ____ftrace_check_##name(void) \ | |||
| 104 | offsetof(typeof(field), \ | 115 | offsetof(typeof(field), \ |
| 105 | container.item), \ | 116 | container.item), \ |
| 106 | sizeof(field.container.item), \ | 117 | sizeof(field.container.item), \ |
| 107 | is_signed_type(type), FILTER_OTHER); \ | 118 | is_signed_type(type), filter_type); \ |
| 108 | if (ret) \ | 119 | if (ret) \ |
| 109 | return ret; | 120 | return ret; |
| 110 | 121 | ||
| @@ -112,17 +123,18 @@ static void __always_unused ____ftrace_check_##name(void) \ | |||
| 112 | #define __dynamic_array(type, item) \ | 123 | #define __dynamic_array(type, item) \ |
| 113 | ret = trace_define_field(event_call, #type, #item, \ | 124 | ret = trace_define_field(event_call, #type, #item, \ |
| 114 | offsetof(typeof(field), item), \ | 125 | offsetof(typeof(field), item), \ |
| 115 | 0, is_signed_type(type), FILTER_OTHER);\ | 126 | 0, is_signed_type(type), filter_type);\ |
| 116 | if (ret) \ | 127 | if (ret) \ |
| 117 | return ret; | 128 | return ret; |
| 118 | 129 | ||
| 119 | #undef FTRACE_ENTRY | 130 | #undef FTRACE_ENTRY |
| 120 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ | 131 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ |
| 121 | int \ | 132 | int \ |
| 122 | ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ | 133 | ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ |
| 123 | { \ | 134 | { \ |
| 124 | struct struct_name field; \ | 135 | struct struct_name field; \ |
| 125 | int ret; \ | 136 | int ret; \ |
| 137 | int filter_type = filter; \ | ||
| 126 | \ | 138 | \ |
| 127 | tstruct; \ | 139 | tstruct; \ |
| 128 | \ | 140 | \ |
| @@ -152,13 +164,15 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ | |||
| 152 | #undef F_printk | 164 | #undef F_printk |
| 153 | #define F_printk(fmt, args...) #fmt ", " __stringify(args) | 165 | #define F_printk(fmt, args...) #fmt ", " __stringify(args) |
| 154 | 166 | ||
| 155 | #undef FTRACE_ENTRY | 167 | #undef FTRACE_ENTRY_REG |
| 156 | #define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \ | 168 | #define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ |
| 169 | regfn) \ | ||
| 157 | \ | 170 | \ |
| 158 | struct ftrace_event_class event_class_ftrace_##call = { \ | 171 | struct ftrace_event_class event_class_ftrace_##call = { \ |
| 159 | .system = __stringify(TRACE_SYSTEM), \ | 172 | .system = __stringify(TRACE_SYSTEM), \ |
| 160 | .define_fields = ftrace_define_fields_##call, \ | 173 | .define_fields = ftrace_define_fields_##call, \ |
| 161 | .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ | 174 | .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ |
| 175 | .reg = regfn, \ | ||
| 162 | }; \ | 176 | }; \ |
| 163 | \ | 177 | \ |
| 164 | struct ftrace_event_call __used event_##call = { \ | 178 | struct ftrace_event_call __used event_##call = { \ |
| @@ -170,4 +184,14 @@ struct ftrace_event_call __used event_##call = { \ | |||
| 170 | struct ftrace_event_call __used \ | 184 | struct ftrace_event_call __used \ |
| 171 | __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; | 185 | __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; |
| 172 | 186 | ||
| 187 | #undef FTRACE_ENTRY | ||
| 188 | #define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter) \ | ||
| 189 | FTRACE_ENTRY_REG(call, struct_name, etype, \ | ||
| 190 | PARAMS(tstruct), PARAMS(print), filter, NULL) | ||
| 191 | |||
| 192 | int ftrace_event_is_function(struct ftrace_event_call *call) | ||
| 193 | { | ||
| 194 | return call == &event_function; | ||
| 195 | } | ||
| 196 | |||
| 173 | #include "trace_entries.h" | 197 | #include "trace_entries.h" |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 00d527c945a4..580a05ec926b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
| @@ -1892,7 +1892,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
| 1892 | #endif /* CONFIG_PERF_EVENTS */ | 1892 | #endif /* CONFIG_PERF_EVENTS */ |
| 1893 | 1893 | ||
| 1894 | static __kprobes | 1894 | static __kprobes |
| 1895 | int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) | 1895 | int kprobe_register(struct ftrace_event_call *event, |
| 1896 | enum trace_reg type, void *data) | ||
| 1896 | { | 1897 | { |
| 1897 | struct trace_probe *tp = (struct trace_probe *)event->data; | 1898 | struct trace_probe *tp = (struct trace_probe *)event->data; |
| 1898 | 1899 | ||
| @@ -1909,6 +1910,11 @@ int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) | |||
| 1909 | case TRACE_REG_PERF_UNREGISTER: | 1910 | case TRACE_REG_PERF_UNREGISTER: |
| 1910 | disable_trace_probe(tp, TP_FLAG_PROFILE); | 1911 | disable_trace_probe(tp, TP_FLAG_PROFILE); |
| 1911 | return 0; | 1912 | return 0; |
| 1913 | case TRACE_REG_PERF_OPEN: | ||
| 1914 | case TRACE_REG_PERF_CLOSE: | ||
| 1915 | case TRACE_REG_PERF_ADD: | ||
| 1916 | case TRACE_REG_PERF_DEL: | ||
| 1917 | return 0; | ||
| 1912 | #endif | 1918 | #endif |
| 1913 | } | 1919 | } |
| 1914 | return 0; | 1920 | return 0; |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 0d6ff3555942..c5a01873567d 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
| @@ -300,7 +300,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, | |||
| 300 | unsigned long mask; | 300 | unsigned long mask; |
| 301 | const char *str; | 301 | const char *str; |
| 302 | const char *ret = p->buffer + p->len; | 302 | const char *ret = p->buffer + p->len; |
| 303 | int i; | 303 | int i, first = 1; |
| 304 | 304 | ||
| 305 | for (i = 0; flag_array[i].name && flags; i++) { | 305 | for (i = 0; flag_array[i].name && flags; i++) { |
| 306 | 306 | ||
| @@ -310,14 +310,16 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, | |||
| 310 | 310 | ||
| 311 | str = flag_array[i].name; | 311 | str = flag_array[i].name; |
| 312 | flags &= ~mask; | 312 | flags &= ~mask; |
| 313 | if (p->len && delim) | 313 | if (!first && delim) |
| 314 | trace_seq_puts(p, delim); | 314 | trace_seq_puts(p, delim); |
| 315 | else | ||
| 316 | first = 0; | ||
| 315 | trace_seq_puts(p, str); | 317 | trace_seq_puts(p, str); |
| 316 | } | 318 | } |
| 317 | 319 | ||
| 318 | /* check for left over flags */ | 320 | /* check for left over flags */ |
| 319 | if (flags) { | 321 | if (flags) { |
| 320 | if (p->len && delim) | 322 | if (!first && delim) |
| 321 | trace_seq_puts(p, delim); | 323 | trace_seq_puts(p, delim); |
| 322 | trace_seq_printf(p, "0x%lx", flags); | 324 | trace_seq_printf(p, "0x%lx", flags); |
| 323 | } | 325 | } |
| @@ -344,7 +346,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, | |||
| 344 | break; | 346 | break; |
| 345 | } | 347 | } |
| 346 | 348 | ||
| 347 | if (!p->len) | 349 | if (ret == (const char *)(p->buffer + p->len)) |
| 348 | trace_seq_printf(p, "0x%lx", val); | 350 | trace_seq_printf(p, "0x%lx", val); |
| 349 | 351 | ||
| 350 | trace_seq_putc(p, 0); | 352 | trace_seq_putc(p, 0); |
| @@ -370,7 +372,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, | |||
| 370 | break; | 372 | break; |
| 371 | } | 373 | } |
| 372 | 374 | ||
| 373 | if (!p->len) | 375 | if (ret == (const char *)(p->buffer + p->len)) |
| 374 | trace_seq_printf(p, "0x%llx", val); | 376 | trace_seq_printf(p, "0x%llx", val); |
| 375 | 377 | ||
| 376 | trace_seq_putc(p, 0); | 378 | trace_seq_putc(p, 0); |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index cb654542c1a1..96fc73369099 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
| @@ -17,9 +17,9 @@ static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); | |||
| 17 | static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); | 17 | static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); |
| 18 | 18 | ||
| 19 | static int syscall_enter_register(struct ftrace_event_call *event, | 19 | static int syscall_enter_register(struct ftrace_event_call *event, |
| 20 | enum trace_reg type); | 20 | enum trace_reg type, void *data); |
| 21 | static int syscall_exit_register(struct ftrace_event_call *event, | 21 | static int syscall_exit_register(struct ftrace_event_call *event, |
| 22 | enum trace_reg type); | 22 | enum trace_reg type, void *data); |
| 23 | 23 | ||
| 24 | static int syscall_enter_define_fields(struct ftrace_event_call *call); | 24 | static int syscall_enter_define_fields(struct ftrace_event_call *call); |
| 25 | static int syscall_exit_define_fields(struct ftrace_event_call *call); | 25 | static int syscall_exit_define_fields(struct ftrace_event_call *call); |
| @@ -468,8 +468,8 @@ int __init init_ftrace_syscalls(void) | |||
| 468 | unsigned long addr; | 468 | unsigned long addr; |
| 469 | int i; | 469 | int i; |
| 470 | 470 | ||
| 471 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * | 471 | syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata), |
| 472 | NR_syscalls, GFP_KERNEL); | 472 | GFP_KERNEL); |
| 473 | if (!syscalls_metadata) { | 473 | if (!syscalls_metadata) { |
| 474 | WARN_ON(1); | 474 | WARN_ON(1); |
| 475 | return -ENOMEM; | 475 | return -ENOMEM; |
| @@ -649,7 +649,7 @@ void perf_sysexit_disable(struct ftrace_event_call *call) | |||
| 649 | #endif /* CONFIG_PERF_EVENTS */ | 649 | #endif /* CONFIG_PERF_EVENTS */ |
| 650 | 650 | ||
| 651 | static int syscall_enter_register(struct ftrace_event_call *event, | 651 | static int syscall_enter_register(struct ftrace_event_call *event, |
| 652 | enum trace_reg type) | 652 | enum trace_reg type, void *data) |
| 653 | { | 653 | { |
| 654 | switch (type) { | 654 | switch (type) { |
| 655 | case TRACE_REG_REGISTER: | 655 | case TRACE_REG_REGISTER: |
| @@ -664,13 +664,18 @@ static int syscall_enter_register(struct ftrace_event_call *event, | |||
| 664 | case TRACE_REG_PERF_UNREGISTER: | 664 | case TRACE_REG_PERF_UNREGISTER: |
| 665 | perf_sysenter_disable(event); | 665 | perf_sysenter_disable(event); |
| 666 | return 0; | 666 | return 0; |
| 667 | case TRACE_REG_PERF_OPEN: | ||
| 668 | case TRACE_REG_PERF_CLOSE: | ||
| 669 | case TRACE_REG_PERF_ADD: | ||
| 670 | case TRACE_REG_PERF_DEL: | ||
| 671 | return 0; | ||
| 667 | #endif | 672 | #endif |
| 668 | } | 673 | } |
| 669 | return 0; | 674 | return 0; |
| 670 | } | 675 | } |
| 671 | 676 | ||
| 672 | static int syscall_exit_register(struct ftrace_event_call *event, | 677 | static int syscall_exit_register(struct ftrace_event_call *event, |
| 673 | enum trace_reg type) | 678 | enum trace_reg type, void *data) |
| 674 | { | 679 | { |
| 675 | switch (type) { | 680 | switch (type) { |
| 676 | case TRACE_REG_REGISTER: | 681 | case TRACE_REG_REGISTER: |
| @@ -685,6 +690,11 @@ static int syscall_exit_register(struct ftrace_event_call *event, | |||
| 685 | case TRACE_REG_PERF_UNREGISTER: | 690 | case TRACE_REG_PERF_UNREGISTER: |
| 686 | perf_sysexit_disable(event); | 691 | perf_sysexit_disable(event); |
| 687 | return 0; | 692 | return 0; |
| 693 | case TRACE_REG_PERF_OPEN: | ||
| 694 | case TRACE_REG_PERF_CLOSE: | ||
| 695 | case TRACE_REG_PERF_ADD: | ||
| 696 | case TRACE_REG_PERF_DEL: | ||
| 697 | return 0; | ||
| 688 | #endif | 698 | #endif |
| 689 | } | 699 | } |
| 690 | return 0; | 700 | return 0; |
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index f1539decd99d..d96ba22dabfa 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
| @@ -25,7 +25,7 @@ | |||
| 25 | #include <linux/err.h> | 25 | #include <linux/err.h> |
| 26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
| 27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
| 28 | #include <linux/jump_label.h> | 28 | #include <linux/static_key.h> |
| 29 | 29 | ||
| 30 | extern struct tracepoint * const __start___tracepoints_ptrs[]; | 30 | extern struct tracepoint * const __start___tracepoints_ptrs[]; |
| 31 | extern struct tracepoint * const __stop___tracepoints_ptrs[]; | 31 | extern struct tracepoint * const __stop___tracepoints_ptrs[]; |
| @@ -256,9 +256,9 @@ static void set_tracepoint(struct tracepoint_entry **entry, | |||
| 256 | { | 256 | { |
| 257 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); | 257 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); |
| 258 | 258 | ||
| 259 | if (elem->regfunc && !jump_label_enabled(&elem->key) && active) | 259 | if (elem->regfunc && !static_key_enabled(&elem->key) && active) |
| 260 | elem->regfunc(); | 260 | elem->regfunc(); |
| 261 | else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active) | 261 | else if (elem->unregfunc && static_key_enabled(&elem->key) && !active) |
| 262 | elem->unregfunc(); | 262 | elem->unregfunc(); |
| 263 | 263 | ||
| 264 | /* | 264 | /* |
| @@ -269,10 +269,10 @@ static void set_tracepoint(struct tracepoint_entry **entry, | |||
| 269 | * is used. | 269 | * is used. |
| 270 | */ | 270 | */ |
| 271 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); | 271 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); |
| 272 | if (active && !jump_label_enabled(&elem->key)) | 272 | if (active && !static_key_enabled(&elem->key)) |
| 273 | jump_label_inc(&elem->key); | 273 | static_key_slow_inc(&elem->key); |
| 274 | else if (!active && jump_label_enabled(&elem->key)) | 274 | else if (!active && static_key_enabled(&elem->key)) |
| 275 | jump_label_dec(&elem->key); | 275 | static_key_slow_dec(&elem->key); |
| 276 | } | 276 | } |
| 277 | 277 | ||
| 278 | /* | 278 | /* |
| @@ -283,11 +283,11 @@ static void set_tracepoint(struct tracepoint_entry **entry, | |||
| 283 | */ | 283 | */ |
| 284 | static void disable_tracepoint(struct tracepoint *elem) | 284 | static void disable_tracepoint(struct tracepoint *elem) |
| 285 | { | 285 | { |
| 286 | if (elem->unregfunc && jump_label_enabled(&elem->key)) | 286 | if (elem->unregfunc && static_key_enabled(&elem->key)) |
| 287 | elem->unregfunc(); | 287 | elem->unregfunc(); |
| 288 | 288 | ||
| 289 | if (jump_label_enabled(&elem->key)) | 289 | if (static_key_enabled(&elem->key)) |
| 290 | jump_label_dec(&elem->key); | 290 | static_key_slow_dec(&elem->key); |
| 291 | rcu_assign_pointer(elem->funcs, NULL); | 291 | rcu_assign_pointer(elem->funcs, NULL); |
| 292 | } | 292 | } |
| 293 | 293 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d117262deba3..14bc092fb12c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -3,12 +3,9 @@ | |||
| 3 | * | 3 | * |
| 4 | * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. | 4 | * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. |
| 5 | * | 5 | * |
| 6 | * this code detects hard lockups: incidents in where on a CPU | 6 | * Note: Most of this code is borrowed heavily from the original softlockup |
| 7 | * the kernel does not respond to anything except NMI. | 7 | * detector, so thanks to Ingo for the initial implementation. |
| 8 | * | 8 | * Some chunks also taken from the old x86-specific nmi watchdog code, thanks |
| 9 | * Note: Most of this code is borrowed heavily from softlockup.c, | ||
| 10 | * so thanks to Ingo for the initial implementation. | ||
| 11 | * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks | ||
| 12 | * to those contributors as well. | 9 | * to those contributors as well. |
| 13 | */ | 10 | */ |
| 14 | 11 | ||
| @@ -117,9 +114,10 @@ static unsigned long get_sample_period(void) | |||
| 117 | { | 114 | { |
| 118 | /* | 115 | /* |
| 119 | * convert watchdog_thresh from seconds to ns | 116 | * convert watchdog_thresh from seconds to ns |
| 120 | * the divide by 5 is to give hrtimer 5 chances to | 117 | * the divide by 5 is to give hrtimer several chances (two |
| 121 | * increment before the hardlockup detector generates | 118 | * or three with the current relation between the soft |
| 122 | * a warning | 119 | * and hard thresholds) to increment before the |
| 120 | * hardlockup detector generates a warning | ||
| 123 | */ | 121 | */ |
| 124 | return get_softlockup_thresh() * (NSEC_PER_SEC / 5); | 122 | return get_softlockup_thresh() * (NSEC_PER_SEC / 5); |
| 125 | } | 123 | } |
| @@ -336,9 +334,11 @@ static int watchdog(void *unused) | |||
| 336 | 334 | ||
| 337 | set_current_state(TASK_INTERRUPTIBLE); | 335 | set_current_state(TASK_INTERRUPTIBLE); |
| 338 | /* | 336 | /* |
| 339 | * Run briefly once per second to reset the softlockup timestamp. | 337 | * Run briefly (kicked by the hrtimer callback function) once every |
| 340 | * If this gets delayed for more than 60 seconds then the | 338 | * get_sample_period() seconds (4 seconds by default) to reset the |
| 341 | * debug-printout triggers in watchdog_timer_fn(). | 339 | * softlockup timestamp. If this gets delayed for more than |
| 340 | * 2*watchdog_thresh seconds then the debug-printout triggers in | ||
| 341 | * watchdog_timer_fn(). | ||
| 342 | */ | 342 | */ |
| 343 | while (!kthread_should_stop()) { | 343 | while (!kthread_should_stop()) { |
| 344 | __touch_watchdog(); | 344 | __touch_watchdog(); |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d27a2aa3e815..05037dc9bde7 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
| @@ -166,18 +166,21 @@ config LOCKUP_DETECTOR | |||
| 166 | hard and soft lockups. | 166 | hard and soft lockups. |
| 167 | 167 | ||
| 168 | Softlockups are bugs that cause the kernel to loop in kernel | 168 | Softlockups are bugs that cause the kernel to loop in kernel |
| 169 | mode for more than 60 seconds, without giving other tasks a | 169 | mode for more than 20 seconds, without giving other tasks a |
| 170 | chance to run. The current stack trace is displayed upon | 170 | chance to run. The current stack trace is displayed upon |
| 171 | detection and the system will stay locked up. | 171 | detection and the system will stay locked up. |
| 172 | 172 | ||
| 173 | Hardlockups are bugs that cause the CPU to loop in kernel mode | 173 | Hardlockups are bugs that cause the CPU to loop in kernel mode |
| 174 | for more than 60 seconds, without letting other interrupts have a | 174 | for more than 10 seconds, without letting other interrupts have a |
| 175 | chance to run. The current stack trace is displayed upon detection | 175 | chance to run. The current stack trace is displayed upon detection |
| 176 | and the system will stay locked up. | 176 | and the system will stay locked up. |
| 177 | 177 | ||
| 178 | The overhead should be minimal. A periodic hrtimer runs to | 178 | The overhead should be minimal. A periodic hrtimer runs to |
| 179 | generate interrupts and kick the watchdog task every 10-12 seconds. | 179 | generate interrupts and kick the watchdog task every 4 seconds. |
| 180 | An NMI is generated every 60 seconds or so to check for hardlockups. | 180 | An NMI is generated every 10 seconds or so to check for hardlockups. |
| 181 | |||
| 182 | The frequency of hrtimer and NMI events and the soft and hard lockup | ||
| 183 | thresholds can be controlled through the sysctl watchdog_thresh. | ||
| 181 | 184 | ||
| 182 | config HARDLOCKUP_DETECTOR | 185 | config HARDLOCKUP_DETECTOR |
| 183 | def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ | 186 | def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ |
| @@ -189,7 +192,8 @@ config BOOTPARAM_HARDLOCKUP_PANIC | |||
| 189 | help | 192 | help |
| 190 | Say Y here to enable the kernel to panic on "hard lockups", | 193 | Say Y here to enable the kernel to panic on "hard lockups", |
| 191 | which are bugs that cause the kernel to loop in kernel | 194 | which are bugs that cause the kernel to loop in kernel |
| 192 | mode with interrupts disabled for more than 60 seconds. | 195 | mode with interrupts disabled for more than 10 seconds (configurable |
| 196 | using the watchdog_thresh sysctl). | ||
| 193 | 197 | ||
| 194 | Say N if unsure. | 198 | Say N if unsure. |
| 195 | 199 | ||
| @@ -206,8 +210,8 @@ config BOOTPARAM_SOFTLOCKUP_PANIC | |||
| 206 | help | 210 | help |
| 207 | Say Y here to enable the kernel to panic on "soft lockups", | 211 | Say Y here to enable the kernel to panic on "soft lockups", |
| 208 | which are bugs that cause the kernel to loop in kernel | 212 | which are bugs that cause the kernel to loop in kernel |
| 209 | mode for more than 60 seconds, without giving other tasks a | 213 | mode for more than 20 seconds (configurable using the watchdog_thresh |
| 210 | chance to run. | 214 | sysctl), without giving other tasks a chance to run. |
| 211 | 215 | ||
| 212 | The panic can be used in combination with panic_timeout, | 216 | The panic can be used in combination with panic_timeout, |
| 213 | to cause the system to reboot automatically after a | 217 | to cause the system to reboot automatically after a |
diff --git a/net/core/dev.c b/net/core/dev.c index 6ca32f6b3105..6982bfd6a781 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -134,7 +134,7 @@ | |||
| 134 | #include <linux/inetdevice.h> | 134 | #include <linux/inetdevice.h> |
| 135 | #include <linux/cpu_rmap.h> | 135 | #include <linux/cpu_rmap.h> |
| 136 | #include <linux/net_tstamp.h> | 136 | #include <linux/net_tstamp.h> |
| 137 | #include <linux/jump_label.h> | 137 | #include <linux/static_key.h> |
| 138 | #include <net/flow_keys.h> | 138 | #include <net/flow_keys.h> |
| 139 | 139 | ||
| 140 | #include "net-sysfs.h" | 140 | #include "net-sysfs.h" |
| @@ -1441,11 +1441,11 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) | |||
| 1441 | } | 1441 | } |
| 1442 | EXPORT_SYMBOL(call_netdevice_notifiers); | 1442 | EXPORT_SYMBOL(call_netdevice_notifiers); |
| 1443 | 1443 | ||
| 1444 | static struct jump_label_key netstamp_needed __read_mostly; | 1444 | static struct static_key netstamp_needed __read_mostly; |
| 1445 | #ifdef HAVE_JUMP_LABEL | 1445 | #ifdef HAVE_JUMP_LABEL |
| 1446 | /* We are not allowed to call jump_label_dec() from irq context | 1446 | /* We are not allowed to call static_key_slow_dec() from irq context |
| 1447 | * If net_disable_timestamp() is called from irq context, defer the | 1447 | * If net_disable_timestamp() is called from irq context, defer the |
| 1448 | * jump_label_dec() calls. | 1448 | * static_key_slow_dec() calls. |
| 1449 | */ | 1449 | */ |
| 1450 | static atomic_t netstamp_needed_deferred; | 1450 | static atomic_t netstamp_needed_deferred; |
| 1451 | #endif | 1451 | #endif |
| @@ -1457,12 +1457,12 @@ void net_enable_timestamp(void) | |||
| 1457 | 1457 | ||
| 1458 | if (deferred) { | 1458 | if (deferred) { |
| 1459 | while (--deferred) | 1459 | while (--deferred) |
| 1460 | jump_label_dec(&netstamp_needed); | 1460 | static_key_slow_dec(&netstamp_needed); |
| 1461 | return; | 1461 | return; |
| 1462 | } | 1462 | } |
| 1463 | #endif | 1463 | #endif |
| 1464 | WARN_ON(in_interrupt()); | 1464 | WARN_ON(in_interrupt()); |
| 1465 | jump_label_inc(&netstamp_needed); | 1465 | static_key_slow_inc(&netstamp_needed); |
| 1466 | } | 1466 | } |
| 1467 | EXPORT_SYMBOL(net_enable_timestamp); | 1467 | EXPORT_SYMBOL(net_enable_timestamp); |
| 1468 | 1468 | ||
| @@ -1474,19 +1474,19 @@ void net_disable_timestamp(void) | |||
| 1474 | return; | 1474 | return; |
| 1475 | } | 1475 | } |
| 1476 | #endif | 1476 | #endif |
| 1477 | jump_label_dec(&netstamp_needed); | 1477 | static_key_slow_dec(&netstamp_needed); |
| 1478 | } | 1478 | } |
| 1479 | EXPORT_SYMBOL(net_disable_timestamp); | 1479 | EXPORT_SYMBOL(net_disable_timestamp); |
| 1480 | 1480 | ||
| 1481 | static inline void net_timestamp_set(struct sk_buff *skb) | 1481 | static inline void net_timestamp_set(struct sk_buff *skb) |
| 1482 | { | 1482 | { |
| 1483 | skb->tstamp.tv64 = 0; | 1483 | skb->tstamp.tv64 = 0; |
| 1484 | if (static_branch(&netstamp_needed)) | 1484 | if (static_key_false(&netstamp_needed)) |
| 1485 | __net_timestamp(skb); | 1485 | __net_timestamp(skb); |
| 1486 | } | 1486 | } |
| 1487 | 1487 | ||
| 1488 | #define net_timestamp_check(COND, SKB) \ | 1488 | #define net_timestamp_check(COND, SKB) \ |
| 1489 | if (static_branch(&netstamp_needed)) { \ | 1489 | if (static_key_false(&netstamp_needed)) { \ |
| 1490 | if ((COND) && !(SKB)->tstamp.tv64) \ | 1490 | if ((COND) && !(SKB)->tstamp.tv64) \ |
| 1491 | __net_timestamp(SKB); \ | 1491 | __net_timestamp(SKB); \ |
| 1492 | } \ | 1492 | } \ |
| @@ -2660,7 +2660,7 @@ EXPORT_SYMBOL(__skb_get_rxhash); | |||
| 2660 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; | 2660 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; |
| 2661 | EXPORT_SYMBOL(rps_sock_flow_table); | 2661 | EXPORT_SYMBOL(rps_sock_flow_table); |
| 2662 | 2662 | ||
| 2663 | struct jump_label_key rps_needed __read_mostly; | 2663 | struct static_key rps_needed __read_mostly; |
| 2664 | 2664 | ||
| 2665 | static struct rps_dev_flow * | 2665 | static struct rps_dev_flow * |
| 2666 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 2666 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, |
| @@ -2945,7 +2945,7 @@ int netif_rx(struct sk_buff *skb) | |||
| 2945 | 2945 | ||
| 2946 | trace_netif_rx(skb); | 2946 | trace_netif_rx(skb); |
| 2947 | #ifdef CONFIG_RPS | 2947 | #ifdef CONFIG_RPS |
| 2948 | if (static_branch(&rps_needed)) { | 2948 | if (static_key_false(&rps_needed)) { |
| 2949 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 2949 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
| 2950 | int cpu; | 2950 | int cpu; |
| 2951 | 2951 | ||
| @@ -3309,7 +3309,7 @@ int netif_receive_skb(struct sk_buff *skb) | |||
| 3309 | return NET_RX_SUCCESS; | 3309 | return NET_RX_SUCCESS; |
| 3310 | 3310 | ||
| 3311 | #ifdef CONFIG_RPS | 3311 | #ifdef CONFIG_RPS |
| 3312 | if (static_branch(&rps_needed)) { | 3312 | if (static_key_false(&rps_needed)) { |
| 3313 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 3313 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
| 3314 | int cpu, ret; | 3314 | int cpu, ret; |
| 3315 | 3315 | ||
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a1727cda03d7..495586232aa1 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
| @@ -608,10 +608,10 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, | |||
| 608 | spin_unlock(&rps_map_lock); | 608 | spin_unlock(&rps_map_lock); |
| 609 | 609 | ||
| 610 | if (map) | 610 | if (map) |
| 611 | jump_label_inc(&rps_needed); | 611 | static_key_slow_inc(&rps_needed); |
| 612 | if (old_map) { | 612 | if (old_map) { |
| 613 | kfree_rcu(old_map, rcu); | 613 | kfree_rcu(old_map, rcu); |
| 614 | jump_label_dec(&rps_needed); | 614 | static_key_slow_dec(&rps_needed); |
| 615 | } | 615 | } |
| 616 | free_cpumask_var(mask); | 616 | free_cpumask_var(mask); |
| 617 | return len; | 617 | return len; |
diff --git a/net/core/sock.c b/net/core/sock.c index 02f8dfe320b7..95aff9c7419b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
| @@ -111,7 +111,7 @@ | |||
| 111 | #include <linux/init.h> | 111 | #include <linux/init.h> |
| 112 | #include <linux/highmem.h> | 112 | #include <linux/highmem.h> |
| 113 | #include <linux/user_namespace.h> | 113 | #include <linux/user_namespace.h> |
| 114 | #include <linux/jump_label.h> | 114 | #include <linux/static_key.h> |
| 115 | #include <linux/memcontrol.h> | 115 | #include <linux/memcontrol.h> |
| 116 | 116 | ||
| 117 | #include <asm/uaccess.h> | 117 | #include <asm/uaccess.h> |
| @@ -184,7 +184,7 @@ void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) | |||
| 184 | static struct lock_class_key af_family_keys[AF_MAX]; | 184 | static struct lock_class_key af_family_keys[AF_MAX]; |
| 185 | static struct lock_class_key af_family_slock_keys[AF_MAX]; | 185 | static struct lock_class_key af_family_slock_keys[AF_MAX]; |
| 186 | 186 | ||
| 187 | struct jump_label_key memcg_socket_limit_enabled; | 187 | struct static_key memcg_socket_limit_enabled; |
| 188 | EXPORT_SYMBOL(memcg_socket_limit_enabled); | 188 | EXPORT_SYMBOL(memcg_socket_limit_enabled); |
| 189 | 189 | ||
| 190 | /* | 190 | /* |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d05559d4d9cd..0c2850874254 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
| @@ -69,9 +69,9 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, | |||
| 69 | if (sock_table != orig_sock_table) { | 69 | if (sock_table != orig_sock_table) { |
| 70 | rcu_assign_pointer(rps_sock_flow_table, sock_table); | 70 | rcu_assign_pointer(rps_sock_flow_table, sock_table); |
| 71 | if (sock_table) | 71 | if (sock_table) |
| 72 | jump_label_inc(&rps_needed); | 72 | static_key_slow_inc(&rps_needed); |
| 73 | if (orig_sock_table) { | 73 | if (orig_sock_table) { |
| 74 | jump_label_dec(&rps_needed); | 74 | static_key_slow_dec(&rps_needed); |
| 75 | synchronize_rcu(); | 75 | synchronize_rcu(); |
| 76 | vfree(orig_sock_table); | 76 | vfree(orig_sock_table); |
| 77 | } | 77 | } |
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 49978788a9dc..602fb305365f 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c | |||
| @@ -111,7 +111,7 @@ void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) | |||
| 111 | val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | 111 | val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); |
| 112 | 112 | ||
| 113 | if (val != RESOURCE_MAX) | 113 | if (val != RESOURCE_MAX) |
| 114 | jump_label_dec(&memcg_socket_limit_enabled); | 114 | static_key_slow_dec(&memcg_socket_limit_enabled); |
| 115 | } | 115 | } |
| 116 | EXPORT_SYMBOL(tcp_destroy_cgroup); | 116 | EXPORT_SYMBOL(tcp_destroy_cgroup); |
| 117 | 117 | ||
| @@ -143,9 +143,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) | |||
| 143 | net->ipv4.sysctl_tcp_mem[i]); | 143 | net->ipv4.sysctl_tcp_mem[i]); |
| 144 | 144 | ||
| 145 | if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) | 145 | if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) |
| 146 | jump_label_dec(&memcg_socket_limit_enabled); | 146 | static_key_slow_dec(&memcg_socket_limit_enabled); |
| 147 | else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) | 147 | else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) |
| 148 | jump_label_inc(&memcg_socket_limit_enabled); | 148 | static_key_slow_inc(&memcg_socket_limit_enabled); |
| 149 | 149 | ||
| 150 | return 0; | 150 | return 0; |
| 151 | } | 151 | } |
diff --git a/net/netfilter/core.c b/net/netfilter/core.c index b4e8ff05b301..e1b7e051332e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c | |||
| @@ -56,7 +56,7 @@ struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; | |||
| 56 | EXPORT_SYMBOL(nf_hooks); | 56 | EXPORT_SYMBOL(nf_hooks); |
| 57 | 57 | ||
| 58 | #if defined(CONFIG_JUMP_LABEL) | 58 | #if defined(CONFIG_JUMP_LABEL) |
| 59 | struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; | 59 | struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; |
| 60 | EXPORT_SYMBOL(nf_hooks_needed); | 60 | EXPORT_SYMBOL(nf_hooks_needed); |
| 61 | #endif | 61 | #endif |
| 62 | 62 | ||
| @@ -77,7 +77,7 @@ int nf_register_hook(struct nf_hook_ops *reg) | |||
| 77 | list_add_rcu(®->list, elem->list.prev); | 77 | list_add_rcu(®->list, elem->list.prev); |
| 78 | mutex_unlock(&nf_hook_mutex); | 78 | mutex_unlock(&nf_hook_mutex); |
| 79 | #if defined(CONFIG_JUMP_LABEL) | 79 | #if defined(CONFIG_JUMP_LABEL) |
| 80 | jump_label_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); | 80 | static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); |
| 81 | #endif | 81 | #endif |
| 82 | return 0; | 82 | return 0; |
| 83 | } | 83 | } |
| @@ -89,7 +89,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) | |||
| 89 | list_del_rcu(®->list); | 89 | list_del_rcu(®->list); |
| 90 | mutex_unlock(&nf_hook_mutex); | 90 | mutex_unlock(&nf_hook_mutex); |
| 91 | #if defined(CONFIG_JUMP_LABEL) | 91 | #if defined(CONFIG_JUMP_LABEL) |
| 92 | jump_label_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); | 92 | static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); |
| 93 | #endif | 93 | #endif |
| 94 | synchronize_net(); | 94 | synchronize_net(); |
| 95 | } | 95 | } |
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 4626a398836a..ca600e09c8d4 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile | |||
| @@ -1,3 +1,10 @@ | |||
| 1 | OUTPUT := ./ | ||
| 2 | ifeq ("$(origin O)", "command line") | ||
| 3 | ifneq ($(O),) | ||
| 4 | OUTPUT := $(O)/ | ||
| 5 | endif | ||
| 6 | endif | ||
| 7 | |||
| 1 | MAN1_TXT= \ | 8 | MAN1_TXT= \ |
| 2 | $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ | 9 | $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ |
| 3 | $(wildcard perf-*.txt)) \ | 10 | $(wildcard perf-*.txt)) \ |
| @@ -6,10 +13,11 @@ MAN5_TXT= | |||
| 6 | MAN7_TXT= | 13 | MAN7_TXT= |
| 7 | 14 | ||
| 8 | MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT) | 15 | MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT) |
| 9 | MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) | 16 | _MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) |
| 10 | MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) | 17 | _MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) |
| 11 | 18 | ||
| 12 | DOC_HTML=$(MAN_HTML) | 19 | MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML)) |
| 20 | MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML)) | ||
| 13 | 21 | ||
| 14 | ARTICLES = | 22 | ARTICLES = |
| 15 | # with their own formatting rules. | 23 | # with their own formatting rules. |
| @@ -18,11 +26,17 @@ API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technica | |||
| 18 | SP_ARTICLES += $(API_DOCS) | 26 | SP_ARTICLES += $(API_DOCS) |
| 19 | SP_ARTICLES += technical/api-index | 27 | SP_ARTICLES += technical/api-index |
| 20 | 28 | ||
| 21 | DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES)) | 29 | _DOC_HTML = $(_MAN_HTML) |
| 30 | _DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES)) | ||
| 31 | DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML)) | ||
| 22 | 32 | ||
| 23 | DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) | 33 | _DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) |
| 24 | DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) | 34 | _DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) |
| 25 | DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) | 35 | _DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) |
| 36 | |||
| 37 | DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1)) | ||
| 38 | DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5)) | ||
| 39 | DOC_MAN7=$(addprefix $(OUTPUT),$(_DOC_MAN7)) | ||
| 26 | 40 | ||
| 27 | # Make the path relative to DESTDIR, not prefix | 41 | # Make the path relative to DESTDIR, not prefix |
| 28 | ifndef DESTDIR | 42 | ifndef DESTDIR |
| @@ -150,9 +164,9 @@ man1: $(DOC_MAN1) | |||
| 150 | man5: $(DOC_MAN5) | 164 | man5: $(DOC_MAN5) |
| 151 | man7: $(DOC_MAN7) | 165 | man7: $(DOC_MAN7) |
| 152 | 166 | ||
| 153 | info: perf.info perfman.info | 167 | info: $(OUTPUT)perf.info $(OUTPUT)perfman.info |
| 154 | 168 | ||
| 155 | pdf: user-manual.pdf | 169 | pdf: $(OUTPUT)user-manual.pdf |
| 156 | 170 | ||
| 157 | install: install-man | 171 | install: install-man |
| 158 | 172 | ||
| @@ -166,7 +180,7 @@ install-man: man | |||
| 166 | 180 | ||
| 167 | install-info: info | 181 | install-info: info |
| 168 | $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) | 182 | $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) |
| 169 | $(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir) | 183 | $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir) |
| 170 | if test -r $(DESTDIR)$(infodir)/dir; then \ | 184 | if test -r $(DESTDIR)$(infodir)/dir; then \ |
| 171 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ | 185 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ |
| 172 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ | 186 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ |
| @@ -176,7 +190,7 @@ install-info: info | |||
| 176 | 190 | ||
| 177 | install-pdf: pdf | 191 | install-pdf: pdf |
| 178 | $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) | 192 | $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) |
| 179 | $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir) | 193 | $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir) |
| 180 | 194 | ||
| 181 | #install-html: html | 195 | #install-html: html |
| 182 | # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) | 196 | # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) |
| @@ -189,14 +203,14 @@ install-pdf: pdf | |||
| 189 | # | 203 | # |
| 190 | # Determine "include::" file references in asciidoc files. | 204 | # Determine "include::" file references in asciidoc files. |
| 191 | # | 205 | # |
| 192 | doc.dep : $(wildcard *.txt) build-docdep.perl | 206 | $(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl |
| 193 | $(QUIET_GEN)$(RM) $@+ $@ && \ | 207 | $(QUIET_GEN)$(RM) $@+ $@ && \ |
| 194 | $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ | 208 | $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ |
| 195 | mv $@+ $@ | 209 | mv $@+ $@ |
| 196 | 210 | ||
| 197 | -include doc.dep | 211 | -include $(OUPTUT)doc.dep |
| 198 | 212 | ||
| 199 | cmds_txt = cmds-ancillaryinterrogators.txt \ | 213 | _cmds_txt = cmds-ancillaryinterrogators.txt \ |
| 200 | cmds-ancillarymanipulators.txt \ | 214 | cmds-ancillarymanipulators.txt \ |
| 201 | cmds-mainporcelain.txt \ | 215 | cmds-mainporcelain.txt \ |
| 202 | cmds-plumbinginterrogators.txt \ | 216 | cmds-plumbinginterrogators.txt \ |
| @@ -205,32 +219,36 @@ cmds_txt = cmds-ancillaryinterrogators.txt \ | |||
| 205 | cmds-synchelpers.txt \ | 219 | cmds-synchelpers.txt \ |
| 206 | cmds-purehelpers.txt \ | 220 | cmds-purehelpers.txt \ |
| 207 | cmds-foreignscminterface.txt | 221 | cmds-foreignscminterface.txt |
| 222 | cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt)) | ||
| 208 | 223 | ||
| 209 | $(cmds_txt): cmd-list.made | 224 | $(cmds_txt): $(OUTPUT)cmd-list.made |
| 210 | 225 | ||
| 211 | cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) | 226 | $(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) |
| 212 | $(QUIET_GEN)$(RM) $@ && \ | 227 | $(QUIET_GEN)$(RM) $@ && \ |
| 213 | $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ | 228 | $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ |
| 214 | date >$@ | 229 | date >$@ |
| 215 | 230 | ||
| 216 | clean: | 231 | clean: |
| 217 | $(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7 | 232 | $(RM) $(MAN_XML) $(addsuffix +,$(MAN_XML)) |
| 218 | $(RM) *.texi *.texi+ *.texi++ perf.info perfman.info | 233 | $(RM) $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) |
| 219 | $(RM) howto-index.txt howto/*.html doc.dep | 234 | $(RM) $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) |
| 220 | $(RM) technical/api-*.html technical/api-index.txt | 235 | $(RM) $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ |
| 221 | $(RM) $(cmds_txt) *.made | 236 | $(RM) $(OUTPUT)perf.info $(OUTPUT)perfman.info |
| 222 | 237 | $(RM) $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep | |
| 223 | $(MAN_HTML): %.html : %.txt | 238 | $(RM) $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt |
| 239 | $(RM) $(cmds_txt) $(OUTPUT)*.made | ||
| 240 | |||
| 241 | $(MAN_HTML): $(OUTPUT)%.html : %.txt | ||
| 224 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ | 242 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ |
| 225 | $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ | 243 | $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ |
| 226 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ | 244 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ |
| 227 | mv $@+ $@ | 245 | mv $@+ $@ |
| 228 | 246 | ||
| 229 | %.1 %.5 %.7 : %.xml | 247 | $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml |
| 230 | $(QUIET_XMLTO)$(RM) $@ && \ | 248 | $(QUIET_XMLTO)$(RM) $@ && \ |
| 231 | xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< | 249 | xmlto -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< |
| 232 | 250 | ||
| 233 | %.xml : %.txt | 251 | $(OUTPUT)%.xml : %.txt |
| 234 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ | 252 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ |
| 235 | $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ | 253 | $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ |
| 236 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ | 254 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ |
| @@ -239,25 +257,25 @@ $(MAN_HTML): %.html : %.txt | |||
| 239 | XSLT = docbook.xsl | 257 | XSLT = docbook.xsl |
| 240 | XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css | 258 | XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css |
| 241 | 259 | ||
| 242 | user-manual.html: user-manual.xml | 260 | $(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml |
| 243 | $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< | 261 | $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< |
| 244 | 262 | ||
| 245 | perf.info: user-manual.texi | 263 | $(OUTPUT)perf.info: $(OUTPUT)user-manual.texi |
| 246 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi | 264 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi |
| 247 | 265 | ||
| 248 | user-manual.texi: user-manual.xml | 266 | $(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml |
| 249 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ | 267 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ |
| 250 | $(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ | 268 | $(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ |
| 251 | $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ | 269 | $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ |
| 252 | rm $@++ && \ | 270 | rm $@++ && \ |
| 253 | mv $@+ $@ | 271 | mv $@+ $@ |
| 254 | 272 | ||
| 255 | user-manual.pdf: user-manual.xml | 273 | $(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml |
| 256 | $(QUIET_DBLATEX)$(RM) $@+ $@ && \ | 274 | $(QUIET_DBLATEX)$(RM) $@+ $@ && \ |
| 257 | $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \ | 275 | $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \ |
| 258 | mv $@+ $@ | 276 | mv $@+ $@ |
| 259 | 277 | ||
| 260 | perfman.texi: $(MAN_XML) cat-texi.perl | 278 | $(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl |
| 261 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ | 279 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ |
| 262 | ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \ | 280 | ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \ |
| 263 | --to-stdout $(xml) &&) true) > $@++ && \ | 281 | --to-stdout $(xml) &&) true) > $@++ && \ |
| @@ -265,7 +283,7 @@ perfman.texi: $(MAN_XML) cat-texi.perl | |||
| 265 | rm $@++ && \ | 283 | rm $@++ && \ |
| 266 | mv $@+ $@ | 284 | mv $@+ $@ |
| 267 | 285 | ||
| 268 | perfman.info: perfman.texi | 286 | $(OUTPUT)perfman.info: $(OUTPUT)perfman.texi |
| 269 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi | 287 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi |
| 270 | 288 | ||
| 271 | $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml | 289 | $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml |
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index d6b2a4f2108b..c7f5f55634ac 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt | |||
| @@ -8,7 +8,7 @@ perf-lock - Analyze lock events | |||
| 8 | SYNOPSIS | 8 | SYNOPSIS |
| 9 | -------- | 9 | -------- |
| 10 | [verse] | 10 | [verse] |
| 11 | 'perf lock' {record|report|trace} | 11 | 'perf lock' {record|report|script|info} |
| 12 | 12 | ||
| 13 | DESCRIPTION | 13 | DESCRIPTION |
| 14 | ----------- | 14 | ----------- |
| @@ -20,10 +20,13 @@ and statistics with this 'perf lock' command. | |||
| 20 | produces the file "perf.data" which contains tracing | 20 | produces the file "perf.data" which contains tracing |
| 21 | results of lock events. | 21 | results of lock events. |
| 22 | 22 | ||
| 23 | 'perf lock trace' shows raw lock events. | ||
| 24 | |||
| 25 | 'perf lock report' reports statistical data. | 23 | 'perf lock report' reports statistical data. |
| 26 | 24 | ||
| 25 | 'perf lock script' shows raw lock events. | ||
| 26 | |||
| 27 | 'perf lock info' shows metadata like threads or addresses | ||
| 28 | of lock instances. | ||
| 29 | |||
| 27 | COMMON OPTIONS | 30 | COMMON OPTIONS |
| 28 | -------------- | 31 | -------------- |
| 29 | 32 | ||
| @@ -47,6 +50,17 @@ REPORT OPTIONS | |||
| 47 | Sorting key. Possible values: acquired (default), contended, | 50 | Sorting key. Possible values: acquired (default), contended, |
| 48 | wait_total, wait_max, wait_min. | 51 | wait_total, wait_max, wait_min. |
| 49 | 52 | ||
| 53 | INFO OPTIONS | ||
| 54 | ------------ | ||
| 55 | |||
| 56 | -t:: | ||
| 57 | --threads:: | ||
| 58 | dump thread list in perf.data | ||
| 59 | |||
| 60 | -m:: | ||
| 61 | --map:: | ||
| 62 | dump map of lock instances (address:name table) | ||
| 63 | |||
| 50 | SEE ALSO | 64 | SEE ALSO |
| 51 | -------- | 65 | -------- |
| 52 | linkperf:perf[1] | 66 | linkperf:perf[1] |
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 2937f7e14bb7..a1386b2fff00 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt | |||
| @@ -52,11 +52,15 @@ OPTIONS | |||
| 52 | 52 | ||
| 53 | -p:: | 53 | -p:: |
| 54 | --pid=:: | 54 | --pid=:: |
| 55 | Record events on existing process ID. | 55 | Record events on existing process ID (comma separated list). |
| 56 | 56 | ||
| 57 | -t:: | 57 | -t:: |
| 58 | --tid=:: | 58 | --tid=:: |
| 59 | Record events on existing thread ID. | 59 | Record events on existing thread ID (comma separated list). |
| 60 | |||
| 61 | -u:: | ||
| 62 | --uid=:: | ||
| 63 | Record events in threads owned by uid. Name or number. | ||
| 60 | 64 | ||
| 61 | -r:: | 65 | -r:: |
| 62 | --realtime=:: | 66 | --realtime=:: |
| @@ -148,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha | |||
| 148 | corresponding events, i.e., they always refer to events defined earlier on the command | 152 | corresponding events, i.e., they always refer to events defined earlier on the command |
| 149 | line. | 153 | line. |
| 150 | 154 | ||
| 155 | -b:: | ||
| 156 | --branch-any:: | ||
| 157 | Enable taken branch stack sampling. Any type of taken branch may be sampled. | ||
| 158 | This is a shortcut for --branch-filter any. See --branch-filter for more infos. | ||
| 159 | |||
| 160 | -j:: | ||
| 161 | --branch-filter:: | ||
| 162 | Enable taken branch stack sampling. Each sample captures a series of consecutive | ||
| 163 | taken branches. The number of branches captured with each sample depends on the | ||
| 164 | underlying hardware, the type of branches of interest, and the executed code. | ||
| 165 | It is possible to select the types of branches captured by enabling filters. The | ||
| 166 | following filters are defined: | ||
| 167 | |||
| 168 | - any: any type of branches | ||
| 169 | - any_call: any function call or system call | ||
| 170 | - any_ret: any function return or system call return | ||
| 171 | - any_ind: any indirect branch | ||
| 172 | - u: only when the branch target is at the user level | ||
| 173 | - k: only when the branch target is in the kernel | ||
| 174 | - hv: only when the target is at the hypervisor level | ||
| 175 | |||
| 176 | + | ||
| 177 | The option requires at least one branch type among any, any_call, any_ret, ind_call. | ||
| 178 | The privilege levels may be ommitted, in which case, the privilege levels of the associated | ||
| 179 | event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege | ||
| 180 | levels are subject to permissions. When sampling on multiple events, branch stack sampling | ||
| 181 | is enabled for all the sampling events. The sampled branch type is the same for all events. | ||
| 182 | The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k | ||
| 183 | Note that this feature may not be available on all processors. | ||
| 184 | |||
| 151 | SEE ALSO | 185 | SEE ALSO |
| 152 | -------- | 186 | -------- |
| 153 | linkperf:perf-stat[1], linkperf:perf-list[1] | 187 | linkperf:perf-stat[1], linkperf:perf-list[1] |
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 9b430e98712e..87feeee8b90c 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
| @@ -153,6 +153,16 @@ OPTIONS | |||
| 153 | information which may be very large and thus may clutter the display. | 153 | information which may be very large and thus may clutter the display. |
| 154 | It currently includes: cpu and numa topology of the host system. | 154 | It currently includes: cpu and numa topology of the host system. |
| 155 | 155 | ||
| 156 | -b:: | ||
| 157 | --branch-stack:: | ||
| 158 | Use the addresses of sampled taken branches instead of the instruction | ||
| 159 | address to build the histograms. To generate meaningful output, the | ||
| 160 | perf.data file must have been obtained using perf record -b or | ||
| 161 | perf record --branch-filter xxx where xxx is a branch filter option. | ||
| 162 | perf report is able to auto-detect whether a perf.data file contains | ||
| 163 | branch stacks and it will automatically switch to the branch view mode, | ||
| 164 | unless --no-branch-stack is used. | ||
| 165 | |||
| 156 | SEE ALSO | 166 | SEE ALSO |
| 157 | -------- | 167 | -------- |
| 158 | linkperf:perf-stat[1], linkperf:perf-annotate[1] | 168 | linkperf:perf-stat[1], linkperf:perf-annotate[1] |
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 2f6cef43da25..e9cbfcddfa3f 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt | |||
| @@ -115,7 +115,7 @@ OPTIONS | |||
| 115 | -f:: | 115 | -f:: |
| 116 | --fields:: | 116 | --fields:: |
| 117 | Comma separated list of fields to print. Options are: | 117 | Comma separated list of fields to print. Options are: |
| 118 | comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr. | 118 | comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff. |
| 119 | Field list can be prepended with the type, trace, sw or hw, | 119 | Field list can be prepended with the type, trace, sw or hw, |
| 120 | to indicate to which event type the field list applies. | 120 | to indicate to which event type the field list applies. |
| 121 | e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace | 121 | e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace |
| @@ -200,6 +200,9 @@ OPTIONS | |||
| 200 | It currently includes: cpu and numa topology of the host system. | 200 | It currently includes: cpu and numa topology of the host system. |
| 201 | It can only be used with the perf script report mode. | 201 | It can only be used with the perf script report mode. |
| 202 | 202 | ||
| 203 | --show-kernel-path:: | ||
| 204 | Try to resolve the path of [kernel.kallsyms] | ||
| 205 | |||
| 203 | SEE ALSO | 206 | SEE ALSO |
| 204 | -------- | 207 | -------- |
| 205 | linkperf:perf-record[1], linkperf:perf-script-perl[1], | 208 | linkperf:perf-record[1], linkperf:perf-script-perl[1], |
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 8966b9ab2014..2fa173b51970 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
| @@ -35,11 +35,11 @@ OPTIONS | |||
| 35 | child tasks do not inherit counters | 35 | child tasks do not inherit counters |
| 36 | -p:: | 36 | -p:: |
| 37 | --pid=<pid>:: | 37 | --pid=<pid>:: |
| 38 | stat events on existing process id | 38 | stat events on existing process id (comma separated list) |
| 39 | 39 | ||
| 40 | -t:: | 40 | -t:: |
| 41 | --tid=<tid>:: | 41 | --tid=<tid>:: |
| 42 | stat events on existing thread id | 42 | stat events on existing thread id (comma separated list) |
| 43 | 43 | ||
| 44 | 44 | ||
| 45 | -a:: | 45 | -a:: |
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index b1a5bbbfebef..4a5680cb242e 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt | |||
| @@ -72,11 +72,15 @@ Default is to monitor all CPUS. | |||
| 72 | 72 | ||
| 73 | -p <pid>:: | 73 | -p <pid>:: |
| 74 | --pid=<pid>:: | 74 | --pid=<pid>:: |
| 75 | Profile events on existing Process ID. | 75 | Profile events on existing Process ID (comma separated list). |
| 76 | 76 | ||
| 77 | -t <tid>:: | 77 | -t <tid>:: |
| 78 | --tid=<tid>:: | 78 | --tid=<tid>:: |
| 79 | Profile events on existing thread ID. | 79 | Profile events on existing thread ID (comma separated list). |
| 80 | |||
| 81 | -u:: | ||
| 82 | --uid=:: | ||
| 83 | Record events in threads owned by uid. Name or number. | ||
| 80 | 84 | ||
| 81 | -r <priority>:: | 85 | -r <priority>:: |
| 82 | --realtime=<priority>:: | 86 | --realtime=<priority>:: |
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 1078c5fadd5b..5476bc0a1eac 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST | |||
| @@ -9,6 +9,7 @@ lib/rbtree.c | |||
| 9 | include/linux/swab.h | 9 | include/linux/swab.h |
| 10 | arch/*/include/asm/unistd*.h | 10 | arch/*/include/asm/unistd*.h |
| 11 | arch/*/lib/memcpy*.S | 11 | arch/*/lib/memcpy*.S |
| 12 | arch/*/lib/memset*.S | ||
| 12 | include/linux/poison.h | 13 | include/linux/poison.h |
| 13 | include/linux/magic.h | 14 | include/linux/magic.h |
| 14 | include/linux/hw_breakpoint.h | 15 | include/linux/hw_breakpoint.h |
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 8a4b9bccf8b2..74fd7f89208a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
| @@ -15,6 +15,16 @@ endif | |||
| 15 | 15 | ||
| 16 | # Define V to have a more verbose compile. | 16 | # Define V to have a more verbose compile. |
| 17 | # | 17 | # |
| 18 | # Define O to save output files in a separate directory. | ||
| 19 | # | ||
| 20 | # Define ARCH as name of target architecture if you want cross-builds. | ||
| 21 | # | ||
| 22 | # Define CROSS_COMPILE as prefix name of compiler if you want cross-builds. | ||
| 23 | # | ||
| 24 | # Define NO_LIBPERL to disable perl script extension. | ||
| 25 | # | ||
| 26 | # Define NO_LIBPYTHON to disable python script extension. | ||
| 27 | # | ||
| 18 | # Define PYTHON to point to the python binary if the default | 28 | # Define PYTHON to point to the python binary if the default |
| 19 | # `python' is not correct; for example: PYTHON=python2 | 29 | # `python' is not correct; for example: PYTHON=python2 |
| 20 | # | 30 | # |
| @@ -32,6 +42,10 @@ endif | |||
| 32 | # Define NO_DWARF if you do not want debug-info analysis feature at all. | 42 | # Define NO_DWARF if you do not want debug-info analysis feature at all. |
| 33 | # | 43 | # |
| 34 | # Define WERROR=0 to disable treating any warnings as errors. | 44 | # Define WERROR=0 to disable treating any warnings as errors. |
| 45 | # | ||
| 46 | # Define NO_NEWT if you do not want TUI support. | ||
| 47 | # | ||
| 48 | # Define NO_DEMANGLE if you do not want C++ symbol demangling. | ||
| 35 | 49 | ||
| 36 | $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE | 50 | $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE |
| 37 | @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) | 51 | @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) |
| @@ -61,7 +75,7 @@ ifeq ($(ARCH),x86_64) | |||
| 61 | ifeq (${IS_X86_64}, 1) | 75 | ifeq (${IS_X86_64}, 1) |
| 62 | RAW_ARCH := x86_64 | 76 | RAW_ARCH := x86_64 |
| 63 | ARCH_CFLAGS := -DARCH_X86_64 | 77 | ARCH_CFLAGS := -DARCH_X86_64 |
| 64 | ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S | 78 | ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S |
| 65 | endif | 79 | endif |
| 66 | endif | 80 | endif |
| 67 | 81 | ||
| @@ -183,7 +197,10 @@ SCRIPT_SH += perf-archive.sh | |||
| 183 | grep-libs = $(filter -l%,$(1)) | 197 | grep-libs = $(filter -l%,$(1)) |
| 184 | strip-libs = $(filter-out -l%,$(1)) | 198 | strip-libs = $(filter-out -l%,$(1)) |
| 185 | 199 | ||
| 186 | $(OUTPUT)python/perf.so: $(PYRF_OBJS) | 200 | PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) |
| 201 | PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py | ||
| 202 | |||
| 203 | $(OUTPUT)python/perf.so: $(PYRF_OBJS) $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) | ||
| 187 | $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ | 204 | $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ |
| 188 | --quiet build_ext; \ | 205 | --quiet build_ext; \ |
| 189 | mkdir -p $(OUTPUT)python && \ | 206 | mkdir -p $(OUTPUT)python && \ |
| @@ -258,6 +275,7 @@ LIB_H += util/callchain.h | |||
| 258 | LIB_H += util/build-id.h | 275 | LIB_H += util/build-id.h |
| 259 | LIB_H += util/debug.h | 276 | LIB_H += util/debug.h |
| 260 | LIB_H += util/debugfs.h | 277 | LIB_H += util/debugfs.h |
| 278 | LIB_H += util/sysfs.h | ||
| 261 | LIB_H += util/event.h | 279 | LIB_H += util/event.h |
| 262 | LIB_H += util/evsel.h | 280 | LIB_H += util/evsel.h |
| 263 | LIB_H += util/evlist.h | 281 | LIB_H += util/evlist.h |
| @@ -304,6 +322,7 @@ LIB_OBJS += $(OUTPUT)util/build-id.o | |||
| 304 | LIB_OBJS += $(OUTPUT)util/config.o | 322 | LIB_OBJS += $(OUTPUT)util/config.o |
| 305 | LIB_OBJS += $(OUTPUT)util/ctype.o | 323 | LIB_OBJS += $(OUTPUT)util/ctype.o |
| 306 | LIB_OBJS += $(OUTPUT)util/debugfs.o | 324 | LIB_OBJS += $(OUTPUT)util/debugfs.o |
| 325 | LIB_OBJS += $(OUTPUT)util/sysfs.o | ||
| 307 | LIB_OBJS += $(OUTPUT)util/environment.o | 326 | LIB_OBJS += $(OUTPUT)util/environment.o |
| 308 | LIB_OBJS += $(OUTPUT)util/event.o | 327 | LIB_OBJS += $(OUTPUT)util/event.o |
| 309 | LIB_OBJS += $(OUTPUT)util/evlist.o | 328 | LIB_OBJS += $(OUTPUT)util/evlist.o |
| @@ -361,8 +380,10 @@ BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o | |||
| 361 | BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o | 380 | BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o |
| 362 | ifeq ($(RAW_ARCH),x86_64) | 381 | ifeq ($(RAW_ARCH),x86_64) |
| 363 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o | 382 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o |
| 383 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o | ||
| 364 | endif | 384 | endif |
| 365 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o | 385 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o |
| 386 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o | ||
| 366 | 387 | ||
| 367 | BUILTIN_OBJS += $(OUTPUT)builtin-diff.o | 388 | BUILTIN_OBJS += $(OUTPUT)builtin-diff.o |
| 368 | BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o | 389 | BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o |
| @@ -794,7 +815,6 @@ help: | |||
| 794 | @echo ' quick-install-html - install the html documentation quickly' | 815 | @echo ' quick-install-html - install the html documentation quickly' |
| 795 | @echo '' | 816 | @echo '' |
| 796 | @echo 'Perf maintainer targets:' | 817 | @echo 'Perf maintainer targets:' |
| 797 | @echo ' distclean - alias to clean' | ||
| 798 | @echo ' clean - clean all binary objects and build output' | 818 | @echo ' clean - clean all binary objects and build output' |
| 799 | 819 | ||
| 800 | doc: | 820 | doc: |
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index f7781c6267c0..a09bece6dad2 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); | 4 | extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); |
| 5 | extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); | 5 | extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); |
| 6 | extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); | 6 | extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); |
| 7 | extern int bench_mem_memset(int argc, const char **argv, const char *prefix); | ||
| 7 | 8 | ||
| 8 | #define BENCH_FORMAT_DEFAULT_STR "default" | 9 | #define BENCH_FORMAT_DEFAULT_STR "default" |
| 9 | #define BENCH_FORMAT_DEFAULT 0 | 10 | #define BENCH_FORMAT_DEFAULT 0 |
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h index d588b87696fc..d66ab799b35f 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h | |||
| @@ -2,3 +2,11 @@ | |||
| 2 | MEMCPY_FN(__memcpy, | 2 | MEMCPY_FN(__memcpy, |
| 3 | "x86-64-unrolled", | 3 | "x86-64-unrolled", |
| 4 | "unrolled memcpy() in arch/x86/lib/memcpy_64.S") | 4 | "unrolled memcpy() in arch/x86/lib/memcpy_64.S") |
| 5 | |||
| 6 | MEMCPY_FN(memcpy_c, | ||
| 7 | "x86-64-movsq", | ||
| 8 | "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") | ||
| 9 | |||
| 10 | MEMCPY_FN(memcpy_c_e, | ||
| 11 | "x86-64-movsb", | ||
| 12 | "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") | ||
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index 185a96d66dd1..fcd9cf00600a 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S | |||
| @@ -1,4 +1,8 @@ | |||
| 1 | 1 | #define memcpy MEMCPY /* don't hide glibc's memcpy() */ | |
| 2 | #define altinstr_replacement text | ||
| 3 | #define globl p2align 4; .globl | ||
| 4 | #define Lmemcpy_c globl memcpy_c; memcpy_c | ||
| 5 | #define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e | ||
| 2 | #include "../../../arch/x86/lib/memcpy_64.S" | 6 | #include "../../../arch/x86/lib/memcpy_64.S" |
| 3 | /* | 7 | /* |
| 4 | * We need to provide note.GNU-stack section, saying that we want | 8 | * We need to provide note.GNU-stack section, saying that we want |
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index db82021f4b91..71557225bf92 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | * | 5 | * |
| 6 | * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> | 6 | * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> |
| 7 | */ | 7 | */ |
| 8 | #include <ctype.h> | ||
| 9 | 8 | ||
| 10 | #include "../perf.h" | 9 | #include "../perf.h" |
| 11 | #include "../util/util.h" | 10 | #include "../util/util.h" |
| @@ -24,6 +23,7 @@ | |||
| 24 | 23 | ||
| 25 | static const char *length_str = "1MB"; | 24 | static const char *length_str = "1MB"; |
| 26 | static const char *routine = "default"; | 25 | static const char *routine = "default"; |
| 26 | static int iterations = 1; | ||
| 27 | static bool use_clock; | 27 | static bool use_clock; |
| 28 | static int clock_fd; | 28 | static int clock_fd; |
| 29 | static bool only_prefault; | 29 | static bool only_prefault; |
| @@ -35,6 +35,8 @@ static const struct option options[] = { | |||
| 35 | "available unit: B, MB, GB (upper and lower)"), | 35 | "available unit: B, MB, GB (upper and lower)"), |
| 36 | OPT_STRING('r', "routine", &routine, "default", | 36 | OPT_STRING('r', "routine", &routine, "default", |
| 37 | "Specify routine to copy"), | 37 | "Specify routine to copy"), |
| 38 | OPT_INTEGER('i', "iterations", &iterations, | ||
| 39 | "repeat memcpy() invocation this number of times"), | ||
| 38 | OPT_BOOLEAN('c', "clock", &use_clock, | 40 | OPT_BOOLEAN('c', "clock", &use_clock, |
| 39 | "Use CPU clock for measuring"), | 41 | "Use CPU clock for measuring"), |
| 40 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, | 42 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, |
| @@ -121,6 +123,7 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) | |||
| 121 | { | 123 | { |
| 122 | u64 clock_start = 0ULL, clock_end = 0ULL; | 124 | u64 clock_start = 0ULL, clock_end = 0ULL; |
| 123 | void *src = NULL, *dst = NULL; | 125 | void *src = NULL, *dst = NULL; |
| 126 | int i; | ||
| 124 | 127 | ||
| 125 | alloc_mem(&src, &dst, len); | 128 | alloc_mem(&src, &dst, len); |
| 126 | 129 | ||
| @@ -128,7 +131,8 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) | |||
| 128 | fn(dst, src, len); | 131 | fn(dst, src, len); |
| 129 | 132 | ||
| 130 | clock_start = get_clock(); | 133 | clock_start = get_clock(); |
| 131 | fn(dst, src, len); | 134 | for (i = 0; i < iterations; ++i) |
| 135 | fn(dst, src, len); | ||
| 132 | clock_end = get_clock(); | 136 | clock_end = get_clock(); |
| 133 | 137 | ||
| 134 | free(src); | 138 | free(src); |
| @@ -140,6 +144,7 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) | |||
| 140 | { | 144 | { |
| 141 | struct timeval tv_start, tv_end, tv_diff; | 145 | struct timeval tv_start, tv_end, tv_diff; |
| 142 | void *src = NULL, *dst = NULL; | 146 | void *src = NULL, *dst = NULL; |
| 147 | int i; | ||
| 143 | 148 | ||
| 144 | alloc_mem(&src, &dst, len); | 149 | alloc_mem(&src, &dst, len); |
| 145 | 150 | ||
| @@ -147,7 +152,8 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) | |||
| 147 | fn(dst, src, len); | 152 | fn(dst, src, len); |
| 148 | 153 | ||
| 149 | BUG_ON(gettimeofday(&tv_start, NULL)); | 154 | BUG_ON(gettimeofday(&tv_start, NULL)); |
| 150 | fn(dst, src, len); | 155 | for (i = 0; i < iterations; ++i) |
| 156 | fn(dst, src, len); | ||
| 151 | BUG_ON(gettimeofday(&tv_end, NULL)); | 157 | BUG_ON(gettimeofday(&tv_end, NULL)); |
| 152 | 158 | ||
| 153 | timersub(&tv_end, &tv_start, &tv_diff); | 159 | timersub(&tv_end, &tv_start, &tv_diff); |
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h new file mode 100644 index 000000000000..a040fa77665b --- /dev/null +++ b/tools/perf/bench/mem-memset-arch.h | |||
| @@ -0,0 +1,12 @@ | |||
| 1 | |||
| 2 | #ifdef ARCH_X86_64 | ||
| 3 | |||
| 4 | #define MEMSET_FN(fn, name, desc) \ | ||
| 5 | extern void *fn(void *, int, size_t); | ||
| 6 | |||
| 7 | #include "mem-memset-x86-64-asm-def.h" | ||
| 8 | |||
| 9 | #undef MEMSET_FN | ||
| 10 | |||
| 11 | #endif | ||
| 12 | |||
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h new file mode 100644 index 000000000000..a71dff97c1f5 --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h | |||
| @@ -0,0 +1,12 @@ | |||
| 1 | |||
| 2 | MEMSET_FN(__memset, | ||
| 3 | "x86-64-unrolled", | ||
| 4 | "unrolled memset() in arch/x86/lib/memset_64.S") | ||
| 5 | |||
| 6 | MEMSET_FN(memset_c, | ||
| 7 | "x86-64-stosq", | ||
| 8 | "movsq-based memset() in arch/x86/lib/memset_64.S") | ||
| 9 | |||
| 10 | MEMSET_FN(memset_c_e, | ||
| 11 | "x86-64-stosb", | ||
| 12 | "movsb-based memset() in arch/x86/lib/memset_64.S") | ||
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S new file mode 100644 index 000000000000..9e5af89ed13a --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm.S | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | #define memset MEMSET /* don't hide glibc's memset() */ | ||
| 2 | #define altinstr_replacement text | ||
| 3 | #define globl p2align 4; .globl | ||
| 4 | #define Lmemset_c globl memset_c; memset_c | ||
| 5 | #define Lmemset_c_e globl memset_c_e; memset_c_e | ||
| 6 | #include "../../../arch/x86/lib/memset_64.S" | ||
| 7 | |||
| 8 | /* | ||
| 9 | * We need to provide note.GNU-stack section, saying that we want | ||
| 10 | * NOT executable stack. Otherwise the final linking will assume that | ||
| 11 | * the ELF stack should not be restricted at all and set it RWX. | ||
| 12 | */ | ||
| 13 | .section .note.GNU-stack,"",@progbits | ||
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c new file mode 100644 index 000000000000..e9079185bd72 --- /dev/null +++ b/tools/perf/bench/mem-memset.c | |||
| @@ -0,0 +1,297 @@ | |||
| 1 | /* | ||
| 2 | * mem-memset.c | ||
| 3 | * | ||
| 4 | * memset: Simple memory set in various ways | ||
| 5 | * | ||
| 6 | * Trivial clone of mem-memcpy.c. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include "../perf.h" | ||
| 10 | #include "../util/util.h" | ||
| 11 | #include "../util/parse-options.h" | ||
| 12 | #include "../util/header.h" | ||
| 13 | #include "bench.h" | ||
| 14 | #include "mem-memset-arch.h" | ||
| 15 | |||
| 16 | #include <stdio.h> | ||
| 17 | #include <stdlib.h> | ||
| 18 | #include <string.h> | ||
| 19 | #include <sys/time.h> | ||
| 20 | #include <errno.h> | ||
| 21 | |||
| 22 | #define K 1024 | ||
| 23 | |||
| 24 | static const char *length_str = "1MB"; | ||
| 25 | static const char *routine = "default"; | ||
| 26 | static int iterations = 1; | ||
| 27 | static bool use_clock; | ||
| 28 | static int clock_fd; | ||
| 29 | static bool only_prefault; | ||
| 30 | static bool no_prefault; | ||
| 31 | |||
| 32 | static const struct option options[] = { | ||
| 33 | OPT_STRING('l', "length", &length_str, "1MB", | ||
| 34 | "Specify length of memory to copy. " | ||
| 35 | "available unit: B, MB, GB (upper and lower)"), | ||
| 36 | OPT_STRING('r', "routine", &routine, "default", | ||
| 37 | "Specify routine to copy"), | ||
| 38 | OPT_INTEGER('i', "iterations", &iterations, | ||
| 39 | "repeat memset() invocation this number of times"), | ||
| 40 | OPT_BOOLEAN('c', "clock", &use_clock, | ||
| 41 | "Use CPU clock for measuring"), | ||
| 42 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, | ||
| 43 | "Show only the result with page faults before memset()"), | ||
| 44 | OPT_BOOLEAN('n', "no-prefault", &no_prefault, | ||
| 45 | "Show only the result without page faults before memset()"), | ||
| 46 | OPT_END() | ||
| 47 | }; | ||
| 48 | |||
| 49 | typedef void *(*memset_t)(void *, int, size_t); | ||
| 50 | |||
| 51 | struct routine { | ||
| 52 | const char *name; | ||
| 53 | const char *desc; | ||
| 54 | memset_t fn; | ||
| 55 | }; | ||
| 56 | |||
| 57 | static const struct routine routines[] = { | ||
| 58 | { "default", | ||
| 59 | "Default memset() provided by glibc", | ||
| 60 | memset }, | ||
| 61 | #ifdef ARCH_X86_64 | ||
| 62 | |||
| 63 | #define MEMSET_FN(fn, name, desc) { name, desc, fn }, | ||
| 64 | #include "mem-memset-x86-64-asm-def.h" | ||
| 65 | #undef MEMSET_FN | ||
| 66 | |||
| 67 | #endif | ||
| 68 | |||
| 69 | { NULL, | ||
| 70 | NULL, | ||
| 71 | NULL } | ||
| 72 | }; | ||
| 73 | |||
| 74 | static const char * const bench_mem_memset_usage[] = { | ||
| 75 | "perf bench mem memset <options>", | ||
| 76 | NULL | ||
| 77 | }; | ||
| 78 | |||
| 79 | static struct perf_event_attr clock_attr = { | ||
| 80 | .type = PERF_TYPE_HARDWARE, | ||
| 81 | .config = PERF_COUNT_HW_CPU_CYCLES | ||
| 82 | }; | ||
| 83 | |||
| 84 | static void init_clock(void) | ||
| 85 | { | ||
| 86 | clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); | ||
| 87 | |||
| 88 | if (clock_fd < 0 && errno == ENOSYS) | ||
| 89 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | ||
| 90 | else | ||
| 91 | BUG_ON(clock_fd < 0); | ||
| 92 | } | ||
| 93 | |||
| 94 | static u64 get_clock(void) | ||
| 95 | { | ||
| 96 | int ret; | ||
| 97 | u64 clk; | ||
| 98 | |||
| 99 | ret = read(clock_fd, &clk, sizeof(u64)); | ||
| 100 | BUG_ON(ret != sizeof(u64)); | ||
| 101 | |||
| 102 | return clk; | ||
| 103 | } | ||
| 104 | |||
| 105 | static double timeval2double(struct timeval *ts) | ||
| 106 | { | ||
| 107 | return (double)ts->tv_sec + | ||
| 108 | (double)ts->tv_usec / (double)1000000; | ||
| 109 | } | ||
| 110 | |||
| 111 | static void alloc_mem(void **dst, size_t length) | ||
| 112 | { | ||
| 113 | *dst = zalloc(length); | ||
| 114 | if (!dst) | ||
| 115 | die("memory allocation failed - maybe length is too large?\n"); | ||
| 116 | } | ||
| 117 | |||
| 118 | static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) | ||
| 119 | { | ||
| 120 | u64 clock_start = 0ULL, clock_end = 0ULL; | ||
| 121 | void *dst = NULL; | ||
| 122 | int i; | ||
| 123 | |||
| 124 | alloc_mem(&dst, len); | ||
| 125 | |||
| 126 | if (prefault) | ||
| 127 | fn(dst, -1, len); | ||
| 128 | |||
| 129 | clock_start = get_clock(); | ||
| 130 | for (i = 0; i < iterations; ++i) | ||
| 131 | fn(dst, i, len); | ||
| 132 | clock_end = get_clock(); | ||
| 133 | |||
| 134 | free(dst); | ||
| 135 | return clock_end - clock_start; | ||
| 136 | } | ||
| 137 | |||
| 138 | static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) | ||
| 139 | { | ||
| 140 | struct timeval tv_start, tv_end, tv_diff; | ||
| 141 | void *dst = NULL; | ||
| 142 | int i; | ||
| 143 | |||
| 144 | alloc_mem(&dst, len); | ||
| 145 | |||
| 146 | if (prefault) | ||
| 147 | fn(dst, -1, len); | ||
| 148 | |||
| 149 | BUG_ON(gettimeofday(&tv_start, NULL)); | ||
| 150 | for (i = 0; i < iterations; ++i) | ||
| 151 | fn(dst, i, len); | ||
| 152 | BUG_ON(gettimeofday(&tv_end, NULL)); | ||
| 153 | |||
| 154 | timersub(&tv_end, &tv_start, &tv_diff); | ||
| 155 | |||
| 156 | free(dst); | ||
| 157 | return (double)((double)len / timeval2double(&tv_diff)); | ||
| 158 | } | ||
| 159 | |||
| 160 | #define pf (no_prefault ? 0 : 1) | ||
| 161 | |||
| 162 | #define print_bps(x) do { \ | ||
| 163 | if (x < K) \ | ||
| 164 | printf(" %14lf B/Sec", x); \ | ||
| 165 | else if (x < K * K) \ | ||
| 166 | printf(" %14lfd KB/Sec", x / K); \ | ||
| 167 | else if (x < K * K * K) \ | ||
| 168 | printf(" %14lf MB/Sec", x / K / K); \ | ||
| 169 | else \ | ||
| 170 | printf(" %14lf GB/Sec", x / K / K / K); \ | ||
| 171 | } while (0) | ||
| 172 | |||
| 173 | int bench_mem_memset(int argc, const char **argv, | ||
| 174 | const char *prefix __used) | ||
| 175 | { | ||
| 176 | int i; | ||
| 177 | size_t len; | ||
| 178 | double result_bps[2]; | ||
| 179 | u64 result_clock[2]; | ||
| 180 | |||
| 181 | argc = parse_options(argc, argv, options, | ||
| 182 | bench_mem_memset_usage, 0); | ||
| 183 | |||
| 184 | if (use_clock) | ||
| 185 | init_clock(); | ||
| 186 | |||
| 187 | len = (size_t)perf_atoll((char *)length_str); | ||
| 188 | |||
| 189 | result_clock[0] = result_clock[1] = 0ULL; | ||
| 190 | result_bps[0] = result_bps[1] = 0.0; | ||
| 191 | |||
| 192 | if ((s64)len <= 0) { | ||
| 193 | fprintf(stderr, "Invalid length:%s\n", length_str); | ||
| 194 | return 1; | ||
| 195 | } | ||
| 196 | |||
| 197 | /* same to without specifying either of prefault and no-prefault */ | ||
| 198 | if (only_prefault && no_prefault) | ||
| 199 | only_prefault = no_prefault = false; | ||
| 200 | |||
| 201 | for (i = 0; routines[i].name; i++) { | ||
| 202 | if (!strcmp(routines[i].name, routine)) | ||
| 203 | break; | ||
| 204 | } | ||
| 205 | if (!routines[i].name) { | ||
| 206 | printf("Unknown routine:%s\n", routine); | ||
| 207 | printf("Available routines...\n"); | ||
| 208 | for (i = 0; routines[i].name; i++) { | ||
| 209 | printf("\t%s ... %s\n", | ||
| 210 | routines[i].name, routines[i].desc); | ||
| 211 | } | ||
| 212 | return 1; | ||
| 213 | } | ||
| 214 | |||
| 215 | if (bench_format == BENCH_FORMAT_DEFAULT) | ||
| 216 | printf("# Copying %s Bytes ...\n\n", length_str); | ||
| 217 | |||
| 218 | if (!only_prefault && !no_prefault) { | ||
| 219 | /* show both of results */ | ||
| 220 | if (use_clock) { | ||
| 221 | result_clock[0] = | ||
| 222 | do_memset_clock(routines[i].fn, len, false); | ||
| 223 | result_clock[1] = | ||
| 224 | do_memset_clock(routines[i].fn, len, true); | ||
| 225 | } else { | ||
| 226 | result_bps[0] = | ||
| 227 | do_memset_gettimeofday(routines[i].fn, | ||
| 228 | len, false); | ||
| 229 | result_bps[1] = | ||
| 230 | do_memset_gettimeofday(routines[i].fn, | ||
| 231 | len, true); | ||
| 232 | } | ||
| 233 | } else { | ||
| 234 | if (use_clock) { | ||
| 235 | result_clock[pf] = | ||
| 236 | do_memset_clock(routines[i].fn, | ||
| 237 | len, only_prefault); | ||
| 238 | } else { | ||
| 239 | result_bps[pf] = | ||
| 240 | do_memset_gettimeofday(routines[i].fn, | ||
| 241 | len, only_prefault); | ||
| 242 | } | ||
| 243 | } | ||
| 244 | |||
| 245 | switch (bench_format) { | ||
| 246 | case BENCH_FORMAT_DEFAULT: | ||
| 247 | if (!only_prefault && !no_prefault) { | ||
| 248 | if (use_clock) { | ||
| 249 | printf(" %14lf Clock/Byte\n", | ||
| 250 | (double)result_clock[0] | ||
| 251 | / (double)len); | ||
| 252 | printf(" %14lf Clock/Byte (with prefault)\n ", | ||
| 253 | (double)result_clock[1] | ||
| 254 | / (double)len); | ||
| 255 | } else { | ||
| 256 | print_bps(result_bps[0]); | ||
| 257 | printf("\n"); | ||
| 258 | print_bps(result_bps[1]); | ||
| 259 | printf(" (with prefault)\n"); | ||
| 260 | } | ||
| 261 | } else { | ||
| 262 | if (use_clock) { | ||
| 263 | printf(" %14lf Clock/Byte", | ||
| 264 | (double)result_clock[pf] | ||
| 265 | / (double)len); | ||
| 266 | } else | ||
| 267 | print_bps(result_bps[pf]); | ||
| 268 | |||
| 269 | printf("%s\n", only_prefault ? " (with prefault)" : ""); | ||
| 270 | } | ||
| 271 | break; | ||
| 272 | case BENCH_FORMAT_SIMPLE: | ||
| 273 | if (!only_prefault && !no_prefault) { | ||
| 274 | if (use_clock) { | ||
| 275 | printf("%lf %lf\n", | ||
| 276 | (double)result_clock[0] / (double)len, | ||
| 277 | (double)result_clock[1] / (double)len); | ||
| 278 | } else { | ||
| 279 | printf("%lf %lf\n", | ||
| 280 | result_bps[0], result_bps[1]); | ||
| 281 | } | ||
| 282 | } else { | ||
| 283 | if (use_clock) { | ||
| 284 | printf("%lf\n", (double)result_clock[pf] | ||
| 285 | / (double)len); | ||
| 286 | } else | ||
| 287 | printf("%lf\n", result_bps[pf]); | ||
| 288 | } | ||
| 289 | break; | ||
| 290 | default: | ||
| 291 | /* reaching this means there's some disaster: */ | ||
| 292 | die("unknown format: %d\n", bench_format); | ||
| 293 | break; | ||
| 294 | } | ||
| 295 | |||
| 296 | return 0; | ||
| 297 | } | ||
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index fcb96269852a..b0e74ab2d7a2 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c | |||
| @@ -52,6 +52,9 @@ static struct bench_suite mem_suites[] = { | |||
| 52 | { "memcpy", | 52 | { "memcpy", |
| 53 | "Simple memory copy in various ways", | 53 | "Simple memory copy in various ways", |
| 54 | bench_mem_memcpy }, | 54 | bench_mem_memcpy }, |
| 55 | { "memset", | ||
| 56 | "Simple memory set in various ways", | ||
| 57 | bench_mem_memset }, | ||
| 55 | suite_all, | 58 | suite_all, |
| 56 | { NULL, | 59 | { NULL, |
| 57 | NULL, | 60 | NULL, |
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 2296c391d0f5..12c814838993 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c | |||
| @@ -922,12 +922,12 @@ static const struct option info_options[] = { | |||
| 922 | OPT_BOOLEAN('t', "threads", &info_threads, | 922 | OPT_BOOLEAN('t', "threads", &info_threads, |
| 923 | "dump thread list in perf.data"), | 923 | "dump thread list in perf.data"), |
| 924 | OPT_BOOLEAN('m', "map", &info_map, | 924 | OPT_BOOLEAN('m', "map", &info_map, |
| 925 | "map of lock instances (name:address table)"), | 925 | "map of lock instances (address:name table)"), |
| 926 | OPT_END() | 926 | OPT_END() |
| 927 | }; | 927 | }; |
| 928 | 928 | ||
| 929 | static const char * const lock_usage[] = { | 929 | static const char * const lock_usage[] = { |
| 930 | "perf lock [<options>] {record|trace|report}", | 930 | "perf lock [<options>] {record|report|script|info}", |
| 931 | NULL | 931 | NULL |
| 932 | }; | 932 | }; |
| 933 | 933 | ||
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index fb8566181f27..4935c09dd5b5 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c | |||
| @@ -58,7 +58,7 @@ static struct { | |||
| 58 | struct perf_probe_event events[MAX_PROBES]; | 58 | struct perf_probe_event events[MAX_PROBES]; |
| 59 | struct strlist *dellist; | 59 | struct strlist *dellist; |
| 60 | struct line_range line_range; | 60 | struct line_range line_range; |
| 61 | const char *target_module; | 61 | const char *target; |
| 62 | int max_probe_points; | 62 | int max_probe_points; |
| 63 | struct strfilter *filter; | 63 | struct strfilter *filter; |
| 64 | } params; | 64 | } params; |
| @@ -246,7 +246,7 @@ static const struct option options[] = { | |||
| 246 | "file", "vmlinux pathname"), | 246 | "file", "vmlinux pathname"), |
| 247 | OPT_STRING('s', "source", &symbol_conf.source_prefix, | 247 | OPT_STRING('s', "source", &symbol_conf.source_prefix, |
| 248 | "directory", "path to kernel source"), | 248 | "directory", "path to kernel source"), |
| 249 | OPT_STRING('m', "module", ¶ms.target_module, | 249 | OPT_STRING('m', "module", ¶ms.target, |
| 250 | "modname|path", | 250 | "modname|path", |
| 251 | "target module name (for online) or path (for offline)"), | 251 | "target module name (for online) or path (for offline)"), |
| 252 | #endif | 252 | #endif |
| @@ -333,7 +333,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) | |||
| 333 | if (!params.filter) | 333 | if (!params.filter) |
| 334 | params.filter = strfilter__new(DEFAULT_FUNC_FILTER, | 334 | params.filter = strfilter__new(DEFAULT_FUNC_FILTER, |
| 335 | NULL); | 335 | NULL); |
| 336 | ret = show_available_funcs(params.target_module, | 336 | ret = show_available_funcs(params.target, |
| 337 | params.filter); | 337 | params.filter); |
| 338 | strfilter__delete(params.filter); | 338 | strfilter__delete(params.filter); |
| 339 | if (ret < 0) | 339 | if (ret < 0) |
| @@ -354,7 +354,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) | |||
| 354 | usage_with_options(probe_usage, options); | 354 | usage_with_options(probe_usage, options); |
| 355 | } | 355 | } |
| 356 | 356 | ||
| 357 | ret = show_line_range(¶ms.line_range, params.target_module); | 357 | ret = show_line_range(¶ms.line_range, params.target); |
| 358 | if (ret < 0) | 358 | if (ret < 0) |
| 359 | pr_err(" Error: Failed to show lines. (%d)\n", ret); | 359 | pr_err(" Error: Failed to show lines. (%d)\n", ret); |
| 360 | return ret; | 360 | return ret; |
| @@ -371,7 +371,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) | |||
| 371 | 371 | ||
| 372 | ret = show_available_vars(params.events, params.nevents, | 372 | ret = show_available_vars(params.events, params.nevents, |
| 373 | params.max_probe_points, | 373 | params.max_probe_points, |
| 374 | params.target_module, | 374 | params.target, |
| 375 | params.filter, | 375 | params.filter, |
| 376 | params.show_ext_vars); | 376 | params.show_ext_vars); |
| 377 | strfilter__delete(params.filter); | 377 | strfilter__delete(params.filter); |
| @@ -393,7 +393,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) | |||
| 393 | if (params.nevents) { | 393 | if (params.nevents) { |
| 394 | ret = add_perf_probe_events(params.events, params.nevents, | 394 | ret = add_perf_probe_events(params.events, params.nevents, |
| 395 | params.max_probe_points, | 395 | params.max_probe_points, |
| 396 | params.target_module, | 396 | params.target, |
| 397 | params.force_add); | 397 | params.force_add); |
| 398 | if (ret < 0) { | 398 | if (ret < 0) { |
| 399 | pr_err(" Error: Failed to add events. (%d)\n", ret); | 399 | pr_err(" Error: Failed to add events. (%d)\n", ret); |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 227b6ae99785..be4e1eee782e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
| @@ -44,6 +44,7 @@ struct perf_record { | |||
| 44 | struct perf_evlist *evlist; | 44 | struct perf_evlist *evlist; |
| 45 | struct perf_session *session; | 45 | struct perf_session *session; |
| 46 | const char *progname; | 46 | const char *progname; |
| 47 | const char *uid_str; | ||
| 47 | int output; | 48 | int output; |
| 48 | unsigned int page_size; | 49 | unsigned int page_size; |
| 49 | int realtime_prio; | 50 | int realtime_prio; |
| @@ -208,7 +209,7 @@ fallback_missing_features: | |||
| 208 | if (opts->exclude_guest_missing) | 209 | if (opts->exclude_guest_missing) |
| 209 | attr->exclude_guest = attr->exclude_host = 0; | 210 | attr->exclude_guest = attr->exclude_host = 0; |
| 210 | retry_sample_id: | 211 | retry_sample_id: |
| 211 | attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; | 212 | attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; |
| 212 | try_again: | 213 | try_again: |
| 213 | if (perf_evsel__open(pos, evlist->cpus, evlist->threads, | 214 | if (perf_evsel__open(pos, evlist->cpus, evlist->threads, |
| 214 | opts->group, group_fd) < 0) { | 215 | opts->group, group_fd) < 0) { |
| @@ -227,11 +228,11 @@ try_again: | |||
| 227 | "guest or host samples.\n"); | 228 | "guest or host samples.\n"); |
| 228 | opts->exclude_guest_missing = true; | 229 | opts->exclude_guest_missing = true; |
| 229 | goto fallback_missing_features; | 230 | goto fallback_missing_features; |
| 230 | } else if (opts->sample_id_all_avail) { | 231 | } else if (!opts->sample_id_all_missing) { |
| 231 | /* | 232 | /* |
| 232 | * Old kernel, no attr->sample_id_type_all field | 233 | * Old kernel, no attr->sample_id_type_all field |
| 233 | */ | 234 | */ |
| 234 | opts->sample_id_all_avail = false; | 235 | opts->sample_id_all_missing = true; |
| 235 | if (!opts->sample_time && !opts->raw_samples && !time_needed) | 236 | if (!opts->sample_time && !opts->raw_samples && !time_needed) |
| 236 | attr->sample_type &= ~PERF_SAMPLE_TIME; | 237 | attr->sample_type &= ~PERF_SAMPLE_TIME; |
| 237 | 238 | ||
| @@ -396,7 +397,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) | |||
| 396 | { | 397 | { |
| 397 | struct stat st; | 398 | struct stat st; |
| 398 | int flags; | 399 | int flags; |
| 399 | int err, output; | 400 | int err, output, feat; |
| 400 | unsigned long waking = 0; | 401 | unsigned long waking = 0; |
| 401 | const bool forks = argc > 0; | 402 | const bool forks = argc > 0; |
| 402 | struct machine *machine; | 403 | struct machine *machine; |
| @@ -463,8 +464,17 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) | |||
| 463 | 464 | ||
| 464 | rec->session = session; | 465 | rec->session = session; |
| 465 | 466 | ||
| 466 | if (!rec->no_buildid) | 467 | for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) |
| 467 | perf_header__set_feat(&session->header, HEADER_BUILD_ID); | 468 | perf_header__set_feat(&session->header, feat); |
| 469 | |||
| 470 | if (rec->no_buildid) | ||
| 471 | perf_header__clear_feat(&session->header, HEADER_BUILD_ID); | ||
| 472 | |||
| 473 | if (!have_tracepoints(&evsel_list->entries)) | ||
| 474 | perf_header__clear_feat(&session->header, HEADER_TRACE_INFO); | ||
| 475 | |||
| 476 | if (!rec->opts.branch_stack) | ||
| 477 | perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); | ||
| 468 | 478 | ||
| 469 | if (!rec->file_new) { | 479 | if (!rec->file_new) { |
| 470 | err = perf_session__read_header(session, output); | 480 | err = perf_session__read_header(session, output); |
| @@ -472,22 +482,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) | |||
| 472 | goto out_delete_session; | 482 | goto out_delete_session; |
| 473 | } | 483 | } |
| 474 | 484 | ||
| 475 | if (have_tracepoints(&evsel_list->entries)) | ||
| 476 | perf_header__set_feat(&session->header, HEADER_TRACE_INFO); | ||
| 477 | |||
| 478 | perf_header__set_feat(&session->header, HEADER_HOSTNAME); | ||
| 479 | perf_header__set_feat(&session->header, HEADER_OSRELEASE); | ||
| 480 | perf_header__set_feat(&session->header, HEADER_ARCH); | ||
| 481 | perf_header__set_feat(&session->header, HEADER_CPUDESC); | ||
| 482 | perf_header__set_feat(&session->header, HEADER_NRCPUS); | ||
| 483 | perf_header__set_feat(&session->header, HEADER_EVENT_DESC); | ||
| 484 | perf_header__set_feat(&session->header, HEADER_CMDLINE); | ||
| 485 | perf_header__set_feat(&session->header, HEADER_VERSION); | ||
| 486 | perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); | ||
| 487 | perf_header__set_feat(&session->header, HEADER_TOTAL_MEM); | ||
| 488 | perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY); | ||
| 489 | perf_header__set_feat(&session->header, HEADER_CPUID); | ||
| 490 | |||
| 491 | if (forks) { | 485 | if (forks) { |
| 492 | err = perf_evlist__prepare_workload(evsel_list, opts, argv); | 486 | err = perf_evlist__prepare_workload(evsel_list, opts, argv); |
| 493 | if (err < 0) { | 487 | if (err < 0) { |
| @@ -647,6 +641,90 @@ out_delete_session: | |||
| 647 | return err; | 641 | return err; |
| 648 | } | 642 | } |
| 649 | 643 | ||
| 644 | #define BRANCH_OPT(n, m) \ | ||
| 645 | { .name = n, .mode = (m) } | ||
| 646 | |||
| 647 | #define BRANCH_END { .name = NULL } | ||
| 648 | |||
| 649 | struct branch_mode { | ||
| 650 | const char *name; | ||
| 651 | int mode; | ||
| 652 | }; | ||
| 653 | |||
| 654 | static const struct branch_mode branch_modes[] = { | ||
| 655 | BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER), | ||
| 656 | BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL), | ||
| 657 | BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV), | ||
| 658 | BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY), | ||
| 659 | BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), | ||
| 660 | BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), | ||
| 661 | BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), | ||
| 662 | BRANCH_END | ||
| 663 | }; | ||
| 664 | |||
| 665 | static int | ||
| 666 | parse_branch_stack(const struct option *opt, const char *str, int unset) | ||
| 667 | { | ||
| 668 | #define ONLY_PLM \ | ||
| 669 | (PERF_SAMPLE_BRANCH_USER |\ | ||
| 670 | PERF_SAMPLE_BRANCH_KERNEL |\ | ||
| 671 | PERF_SAMPLE_BRANCH_HV) | ||
| 672 | |||
| 673 | uint64_t *mode = (uint64_t *)opt->value; | ||
| 674 | const struct branch_mode *br; | ||
| 675 | char *s, *os = NULL, *p; | ||
| 676 | int ret = -1; | ||
| 677 | |||
| 678 | if (unset) | ||
| 679 | return 0; | ||
| 680 | |||
| 681 | /* | ||
| 682 | * cannot set it twice, -b + --branch-filter for instance | ||
| 683 | */ | ||
| 684 | if (*mode) | ||
| 685 | return -1; | ||
| 686 | |||
| 687 | /* str may be NULL in case no arg is passed to -b */ | ||
| 688 | if (str) { | ||
| 689 | /* because str is read-only */ | ||
| 690 | s = os = strdup(str); | ||
| 691 | if (!s) | ||
| 692 | return -1; | ||
| 693 | |||
| 694 | for (;;) { | ||
| 695 | p = strchr(s, ','); | ||
| 696 | if (p) | ||
| 697 | *p = '\0'; | ||
| 698 | |||
| 699 | for (br = branch_modes; br->name; br++) { | ||
| 700 | if (!strcasecmp(s, br->name)) | ||
| 701 | break; | ||
| 702 | } | ||
| 703 | if (!br->name) { | ||
| 704 | ui__warning("unknown branch filter %s," | ||
| 705 | " check man page\n", s); | ||
| 706 | goto error; | ||
| 707 | } | ||
| 708 | |||
| 709 | *mode |= br->mode; | ||
| 710 | |||
| 711 | if (!p) | ||
| 712 | break; | ||
| 713 | |||
| 714 | s = p + 1; | ||
| 715 | } | ||
| 716 | } | ||
| 717 | ret = 0; | ||
| 718 | |||
| 719 | /* default to any branch */ | ||
| 720 | if ((*mode & ~ONLY_PLM) == 0) { | ||
| 721 | *mode = PERF_SAMPLE_BRANCH_ANY; | ||
| 722 | } | ||
| 723 | error: | ||
| 724 | free(os); | ||
| 725 | return ret; | ||
| 726 | } | ||
| 727 | |||
| 650 | static const char * const record_usage[] = { | 728 | static const char * const record_usage[] = { |
| 651 | "perf record [<options>] [<command>]", | 729 | "perf record [<options>] [<command>]", |
| 652 | "perf record [<options>] -- <command> [<options>]", | 730 | "perf record [<options>] -- <command> [<options>]", |
| @@ -665,13 +743,10 @@ static const char * const record_usage[] = { | |||
| 665 | */ | 743 | */ |
| 666 | static struct perf_record record = { | 744 | static struct perf_record record = { |
| 667 | .opts = { | 745 | .opts = { |
| 668 | .target_pid = -1, | ||
| 669 | .target_tid = -1, | ||
| 670 | .mmap_pages = UINT_MAX, | 746 | .mmap_pages = UINT_MAX, |
| 671 | .user_freq = UINT_MAX, | 747 | .user_freq = UINT_MAX, |
| 672 | .user_interval = ULLONG_MAX, | 748 | .user_interval = ULLONG_MAX, |
| 673 | .freq = 1000, | 749 | .freq = 1000, |
| 674 | .sample_id_all_avail = true, | ||
| 675 | }, | 750 | }, |
| 676 | .write_mode = WRITE_FORCE, | 751 | .write_mode = WRITE_FORCE, |
| 677 | .file_new = true, | 752 | .file_new = true, |
| @@ -690,9 +765,9 @@ const struct option record_options[] = { | |||
| 690 | parse_events_option), | 765 | parse_events_option), |
| 691 | OPT_CALLBACK(0, "filter", &record.evlist, "filter", | 766 | OPT_CALLBACK(0, "filter", &record.evlist, "filter", |
| 692 | "event filter", parse_filter), | 767 | "event filter", parse_filter), |
| 693 | OPT_INTEGER('p', "pid", &record.opts.target_pid, | 768 | OPT_STRING('p', "pid", &record.opts.target_pid, "pid", |
| 694 | "record events on existing process id"), | 769 | "record events on existing process id"), |
| 695 | OPT_INTEGER('t', "tid", &record.opts.target_tid, | 770 | OPT_STRING('t', "tid", &record.opts.target_tid, "tid", |
| 696 | "record events on existing thread id"), | 771 | "record events on existing thread id"), |
| 697 | OPT_INTEGER('r', "realtime", &record.realtime_prio, | 772 | OPT_INTEGER('r', "realtime", &record.realtime_prio, |
| 698 | "collect data with this RT SCHED_FIFO priority"), | 773 | "collect data with this RT SCHED_FIFO priority"), |
| @@ -738,6 +813,15 @@ const struct option record_options[] = { | |||
| 738 | OPT_CALLBACK('G', "cgroup", &record.evlist, "name", | 813 | OPT_CALLBACK('G', "cgroup", &record.evlist, "name", |
| 739 | "monitor event in cgroup name only", | 814 | "monitor event in cgroup name only", |
| 740 | parse_cgroups), | 815 | parse_cgroups), |
| 816 | OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), | ||
| 817 | |||
| 818 | OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, | ||
| 819 | "branch any", "sample any taken branches", | ||
| 820 | parse_branch_stack), | ||
| 821 | |||
| 822 | OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, | ||
| 823 | "branch filter mask", "branch stack filter modes", | ||
| 824 | parse_branch_stack), | ||
| 741 | OPT_END() | 825 | OPT_END() |
| 742 | }; | 826 | }; |
| 743 | 827 | ||
| @@ -758,8 +842,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) | |||
| 758 | 842 | ||
| 759 | argc = parse_options(argc, argv, record_options, record_usage, | 843 | argc = parse_options(argc, argv, record_options, record_usage, |
| 760 | PARSE_OPT_STOP_AT_NON_OPTION); | 844 | PARSE_OPT_STOP_AT_NON_OPTION); |
| 761 | if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 && | 845 | if (!argc && !rec->opts.target_pid && !rec->opts.target_tid && |
| 762 | !rec->opts.system_wide && !rec->opts.cpu_list) | 846 | !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str) |
| 763 | usage_with_options(record_usage, record_options); | 847 | usage_with_options(record_usage, record_options); |
| 764 | 848 | ||
| 765 | if (rec->force && rec->append_file) { | 849 | if (rec->force && rec->append_file) { |
| @@ -799,11 +883,17 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) | |||
| 799 | goto out_symbol_exit; | 883 | goto out_symbol_exit; |
| 800 | } | 884 | } |
| 801 | 885 | ||
| 802 | if (rec->opts.target_pid != -1) | 886 | rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid, |
| 887 | rec->opts.target_pid); | ||
| 888 | if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1) | ||
| 889 | goto out_free_fd; | ||
| 890 | |||
| 891 | if (rec->opts.target_pid) | ||
| 803 | rec->opts.target_tid = rec->opts.target_pid; | 892 | rec->opts.target_tid = rec->opts.target_pid; |
| 804 | 893 | ||
| 805 | if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid, | 894 | if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid, |
| 806 | rec->opts.target_tid, rec->opts.cpu_list) < 0) | 895 | rec->opts.target_tid, rec->opts.uid, |
| 896 | rec->opts.cpu_list) < 0) | ||
| 807 | usage_with_options(record_usage, record_options); | 897 | usage_with_options(record_usage, record_options); |
| 808 | 898 | ||
| 809 | list_for_each_entry(pos, &evsel_list->entries, node) { | 899 | list_for_each_entry(pos, &evsel_list->entries, node) { |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 25d34d483e49..8e91c6eba18a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
| @@ -53,6 +53,82 @@ struct perf_report { | |||
| 53 | DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); | 53 | DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
| 56 | static int perf_report__add_branch_hist_entry(struct perf_tool *tool, | ||
| 57 | struct addr_location *al, | ||
| 58 | struct perf_sample *sample, | ||
| 59 | struct perf_evsel *evsel, | ||
| 60 | struct machine *machine) | ||
| 61 | { | ||
| 62 | struct perf_report *rep = container_of(tool, struct perf_report, tool); | ||
| 63 | struct symbol *parent = NULL; | ||
| 64 | int err = 0; | ||
| 65 | unsigned i; | ||
| 66 | struct hist_entry *he; | ||
| 67 | struct branch_info *bi, *bx; | ||
| 68 | |||
| 69 | if ((sort__has_parent || symbol_conf.use_callchain) | ||
| 70 | && sample->callchain) { | ||
| 71 | err = machine__resolve_callchain(machine, evsel, al->thread, | ||
| 72 | sample->callchain, &parent); | ||
| 73 | if (err) | ||
| 74 | return err; | ||
| 75 | } | ||
| 76 | |||
| 77 | bi = machine__resolve_bstack(machine, al->thread, | ||
| 78 | sample->branch_stack); | ||
| 79 | if (!bi) | ||
| 80 | return -ENOMEM; | ||
| 81 | |||
| 82 | for (i = 0; i < sample->branch_stack->nr; i++) { | ||
| 83 | if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) | ||
| 84 | continue; | ||
| 85 | /* | ||
| 86 | * The report shows the percentage of total branches captured | ||
| 87 | * and not events sampled. Thus we use a pseudo period of 1. | ||
| 88 | */ | ||
| 89 | he = __hists__add_branch_entry(&evsel->hists, al, parent, | ||
| 90 | &bi[i], 1); | ||
| 91 | if (he) { | ||
| 92 | struct annotation *notes; | ||
| 93 | err = -ENOMEM; | ||
| 94 | bx = he->branch_info; | ||
| 95 | if (bx->from.sym && use_browser > 0) { | ||
| 96 | notes = symbol__annotation(bx->from.sym); | ||
| 97 | if (!notes->src | ||
| 98 | && symbol__alloc_hist(bx->from.sym) < 0) | ||
| 99 | goto out; | ||
| 100 | |||
| 101 | err = symbol__inc_addr_samples(bx->from.sym, | ||
| 102 | bx->from.map, | ||
| 103 | evsel->idx, | ||
| 104 | bx->from.al_addr); | ||
| 105 | if (err) | ||
| 106 | goto out; | ||
| 107 | } | ||
| 108 | |||
| 109 | if (bx->to.sym && use_browser > 0) { | ||
| 110 | notes = symbol__annotation(bx->to.sym); | ||
| 111 | if (!notes->src | ||
| 112 | && symbol__alloc_hist(bx->to.sym) < 0) | ||
| 113 | goto out; | ||
| 114 | |||
| 115 | err = symbol__inc_addr_samples(bx->to.sym, | ||
| 116 | bx->to.map, | ||
| 117 | evsel->idx, | ||
| 118 | bx->to.al_addr); | ||
| 119 | if (err) | ||
| 120 | goto out; | ||
| 121 | } | ||
| 122 | evsel->hists.stats.total_period += 1; | ||
| 123 | hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); | ||
| 124 | err = 0; | ||
| 125 | } else | ||
| 126 | return -ENOMEM; | ||
| 127 | } | ||
| 128 | out: | ||
| 129 | return err; | ||
| 130 | } | ||
| 131 | |||
| 56 | static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, | 132 | static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, |
| 57 | struct addr_location *al, | 133 | struct addr_location *al, |
| 58 | struct perf_sample *sample, | 134 | struct perf_sample *sample, |
| @@ -126,14 +202,21 @@ static int process_sample_event(struct perf_tool *tool, | |||
| 126 | if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) | 202 | if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) |
| 127 | return 0; | 203 | return 0; |
| 128 | 204 | ||
| 129 | if (al.map != NULL) | 205 | if (sort__branch_mode == 1) { |
| 130 | al.map->dso->hit = 1; | 206 | if (perf_report__add_branch_hist_entry(tool, &al, sample, |
| 207 | evsel, machine)) { | ||
| 208 | pr_debug("problem adding lbr entry, skipping event\n"); | ||
| 209 | return -1; | ||
| 210 | } | ||
| 211 | } else { | ||
| 212 | if (al.map != NULL) | ||
| 213 | al.map->dso->hit = 1; | ||
| 131 | 214 | ||
| 132 | if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { | 215 | if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { |
| 133 | pr_debug("problem incrementing symbol period, skipping event\n"); | 216 | pr_debug("problem incrementing symbol period, skipping event\n"); |
| 134 | return -1; | 217 | return -1; |
| 218 | } | ||
| 135 | } | 219 | } |
| 136 | |||
| 137 | return 0; | 220 | return 0; |
| 138 | } | 221 | } |
| 139 | 222 | ||
| @@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep) | |||
| 188 | } | 271 | } |
| 189 | } | 272 | } |
| 190 | 273 | ||
| 274 | if (sort__branch_mode == 1) { | ||
| 275 | if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) { | ||
| 276 | fprintf(stderr, "selected -b but no branch data." | ||
| 277 | " Did you call perf record without" | ||
| 278 | " -b?\n"); | ||
| 279 | return -1; | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 191 | return 0; | 283 | return 0; |
| 192 | } | 284 | } |
| 193 | 285 | ||
| @@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep) | |||
| 246 | { | 338 | { |
| 247 | int ret = -EINVAL; | 339 | int ret = -EINVAL; |
| 248 | u64 nr_samples; | 340 | u64 nr_samples; |
| 249 | struct perf_session *session; | 341 | struct perf_session *session = rep->session; |
| 250 | struct perf_evsel *pos; | 342 | struct perf_evsel *pos; |
| 251 | struct map *kernel_map; | 343 | struct map *kernel_map; |
| 252 | struct kmap *kernel_kmap; | 344 | struct kmap *kernel_kmap; |
| @@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep) | |||
| 254 | 346 | ||
| 255 | signal(SIGINT, sig_handler); | 347 | signal(SIGINT, sig_handler); |
| 256 | 348 | ||
| 257 | session = perf_session__new(rep->input_name, O_RDONLY, | ||
| 258 | rep->force, false, &rep->tool); | ||
| 259 | if (session == NULL) | ||
| 260 | return -ENOMEM; | ||
| 261 | |||
| 262 | rep->session = session; | ||
| 263 | |||
| 264 | if (rep->cpu_list) { | 349 | if (rep->cpu_list) { |
| 265 | ret = perf_session__cpu_bitmap(session, rep->cpu_list, | 350 | ret = perf_session__cpu_bitmap(session, rep->cpu_list, |
| 266 | rep->cpu_bitmap); | 351 | rep->cpu_bitmap); |
| @@ -427,9 +512,19 @@ setup: | |||
| 427 | return 0; | 512 | return 0; |
| 428 | } | 513 | } |
| 429 | 514 | ||
| 515 | static int | ||
| 516 | parse_branch_mode(const struct option *opt __used, const char *str __used, int unset) | ||
| 517 | { | ||
| 518 | sort__branch_mode = !unset; | ||
| 519 | return 0; | ||
| 520 | } | ||
| 521 | |||
| 430 | int cmd_report(int argc, const char **argv, const char *prefix __used) | 522 | int cmd_report(int argc, const char **argv, const char *prefix __used) |
| 431 | { | 523 | { |
| 524 | struct perf_session *session; | ||
| 432 | struct stat st; | 525 | struct stat st; |
| 526 | bool has_br_stack = false; | ||
| 527 | int ret = -1; | ||
| 433 | char callchain_default_opt[] = "fractal,0.5,callee"; | 528 | char callchain_default_opt[] = "fractal,0.5,callee"; |
| 434 | const char * const report_usage[] = { | 529 | const char * const report_usage[] = { |
| 435 | "perf report [<options>]", | 530 | "perf report [<options>]", |
| @@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
| 477 | OPT_BOOLEAN(0, "stdio", &report.use_stdio, | 572 | OPT_BOOLEAN(0, "stdio", &report.use_stdio, |
| 478 | "Use the stdio interface"), | 573 | "Use the stdio interface"), |
| 479 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", | 574 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", |
| 480 | "sort by key(s): pid, comm, dso, symbol, parent"), | 575 | "sort by key(s): pid, comm, dso, symbol, parent, dso_to," |
| 576 | " dso_from, symbol_to, symbol_from, mispredict"), | ||
| 481 | OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, | 577 | OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, |
| 482 | "Show sample percentage for different cpu modes"), | 578 | "Show sample percentage for different cpu modes"), |
| 483 | OPT_STRING('p', "parent", &parent_pattern, "regex", | 579 | OPT_STRING('p', "parent", &parent_pattern, "regex", |
| @@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
| 517 | "Specify disassembler style (e.g. -M intel for intel syntax)"), | 613 | "Specify disassembler style (e.g. -M intel for intel syntax)"), |
| 518 | OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, | 614 | OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, |
| 519 | "Show a column with the sum of periods"), | 615 | "Show a column with the sum of periods"), |
| 616 | OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", | ||
| 617 | "use branch records for histogram filling", parse_branch_mode), | ||
| 520 | OPT_END() | 618 | OPT_END() |
| 521 | }; | 619 | }; |
| 522 | 620 | ||
| @@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
| 536 | else | 634 | else |
| 537 | report.input_name = "perf.data"; | 635 | report.input_name = "perf.data"; |
| 538 | } | 636 | } |
| 637 | session = perf_session__new(report.input_name, O_RDONLY, | ||
| 638 | report.force, false, &report.tool); | ||
| 639 | if (session == NULL) | ||
| 640 | return -ENOMEM; | ||
| 539 | 641 | ||
| 540 | if (strcmp(report.input_name, "-") != 0) | 642 | report.session = session; |
| 643 | |||
| 644 | has_br_stack = perf_header__has_feat(&session->header, | ||
| 645 | HEADER_BRANCH_STACK); | ||
| 646 | |||
| 647 | if (sort__branch_mode == -1 && has_br_stack) | ||
| 648 | sort__branch_mode = 1; | ||
| 649 | |||
| 650 | /* sort__branch_mode could be 0 if --no-branch-stack */ | ||
| 651 | if (sort__branch_mode == 1) { | ||
| 652 | /* | ||
| 653 | * if no sort_order is provided, then specify | ||
| 654 | * branch-mode specific order | ||
| 655 | */ | ||
| 656 | if (sort_order == default_sort_order) | ||
| 657 | sort_order = "comm,dso_from,symbol_from," | ||
| 658 | "dso_to,symbol_to"; | ||
| 659 | |||
| 660 | } | ||
| 661 | |||
| 662 | if (strcmp(report.input_name, "-") != 0) { | ||
| 541 | setup_browser(true); | 663 | setup_browser(true); |
| 542 | else | 664 | } else { |
| 543 | use_browser = 0; | 665 | use_browser = 0; |
| 666 | } | ||
| 544 | 667 | ||
| 545 | /* | 668 | /* |
| 546 | * Only in the newt browser we are doing integrated annotation, | 669 | * Only in the newt browser we are doing integrated annotation, |
| @@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
| 568 | } | 691 | } |
| 569 | 692 | ||
| 570 | if (symbol__init() < 0) | 693 | if (symbol__init() < 0) |
| 571 | return -1; | 694 | goto error; |
| 572 | 695 | ||
| 573 | setup_sorting(report_usage, options); | 696 | setup_sorting(report_usage, options); |
| 574 | 697 | ||
| 575 | if (parent_pattern != default_parent_pattern) { | 698 | if (parent_pattern != default_parent_pattern) { |
| 576 | if (sort_dimension__add("parent") < 0) | 699 | if (sort_dimension__add("parent") < 0) |
| 577 | return -1; | 700 | goto error; |
| 578 | 701 | ||
| 579 | /* | 702 | /* |
| 580 | * Only show the parent fields if we explicitly | 703 | * Only show the parent fields if we explicitly |
| @@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
| 592 | if (argc) | 715 | if (argc) |
| 593 | usage_with_options(report_usage, options); | 716 | usage_with_options(report_usage, options); |
| 594 | 717 | ||
| 595 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); | ||
| 596 | sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); | 718 | sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); |
| 597 | sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); | ||
| 598 | 719 | ||
| 599 | return __cmd_report(&report); | 720 | if (sort__branch_mode == 1) { |
| 721 | sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout); | ||
| 722 | sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout); | ||
| 723 | sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); | ||
| 724 | sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); | ||
| 725 | } else { | ||
| 726 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); | ||
| 727 | sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); | ||
| 728 | } | ||
| 729 | |||
| 730 | ret = __cmd_report(&report); | ||
| 731 | error: | ||
| 732 | perf_session__delete(session); | ||
| 733 | return ret; | ||
| 600 | } | 734 | } |
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index bb68ddf257b7..d4ce733b9eba 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c | |||
| @@ -40,6 +40,7 @@ enum perf_output_field { | |||
| 40 | PERF_OUTPUT_SYM = 1U << 8, | 40 | PERF_OUTPUT_SYM = 1U << 8, |
| 41 | PERF_OUTPUT_DSO = 1U << 9, | 41 | PERF_OUTPUT_DSO = 1U << 9, |
| 42 | PERF_OUTPUT_ADDR = 1U << 10, | 42 | PERF_OUTPUT_ADDR = 1U << 10, |
| 43 | PERF_OUTPUT_SYMOFFSET = 1U << 11, | ||
| 43 | }; | 44 | }; |
| 44 | 45 | ||
| 45 | struct output_option { | 46 | struct output_option { |
| @@ -57,6 +58,7 @@ struct output_option { | |||
| 57 | {.str = "sym", .field = PERF_OUTPUT_SYM}, | 58 | {.str = "sym", .field = PERF_OUTPUT_SYM}, |
| 58 | {.str = "dso", .field = PERF_OUTPUT_DSO}, | 59 | {.str = "dso", .field = PERF_OUTPUT_DSO}, |
| 59 | {.str = "addr", .field = PERF_OUTPUT_ADDR}, | 60 | {.str = "addr", .field = PERF_OUTPUT_ADDR}, |
| 61 | {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET}, | ||
| 60 | }; | 62 | }; |
| 61 | 63 | ||
| 62 | /* default set to maintain compatibility with current format */ | 64 | /* default set to maintain compatibility with current format */ |
| @@ -193,6 +195,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, | |||
| 193 | "to symbols.\n"); | 195 | "to symbols.\n"); |
| 194 | return -EINVAL; | 196 | return -EINVAL; |
| 195 | } | 197 | } |
| 198 | if (PRINT_FIELD(SYMOFFSET) && !PRINT_FIELD(SYM)) { | ||
| 199 | pr_err("Display of offsets requested but symbol is not" | ||
| 200 | "selected.\n"); | ||
| 201 | return -EINVAL; | ||
| 202 | } | ||
| 196 | if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { | 203 | if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { |
| 197 | pr_err("Display of DSO requested but neither sample IP nor " | 204 | pr_err("Display of DSO requested but neither sample IP nor " |
| 198 | "sample address\nis selected. Hence, no addresses to convert " | 205 | "sample address\nis selected. Hence, no addresses to convert " |
| @@ -300,10 +307,17 @@ static void print_sample_start(struct perf_sample *sample, | |||
| 300 | } else | 307 | } else |
| 301 | evname = __event_name(attr->type, attr->config); | 308 | evname = __event_name(attr->type, attr->config); |
| 302 | 309 | ||
| 303 | printf("%s: ", evname ? evname : "(unknown)"); | 310 | printf("%s: ", evname ? evname : "[unknown]"); |
| 304 | } | 311 | } |
| 305 | } | 312 | } |
| 306 | 313 | ||
| 314 | static bool is_bts_event(struct perf_event_attr *attr) | ||
| 315 | { | ||
| 316 | return ((attr->type == PERF_TYPE_HARDWARE) && | ||
| 317 | (attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | ||
| 318 | (attr->sample_period == 1)); | ||
| 319 | } | ||
| 320 | |||
| 307 | static bool sample_addr_correlates_sym(struct perf_event_attr *attr) | 321 | static bool sample_addr_correlates_sym(struct perf_event_attr *attr) |
| 308 | { | 322 | { |
| 309 | if ((attr->type == PERF_TYPE_SOFTWARE) && | 323 | if ((attr->type == PERF_TYPE_SOFTWARE) && |
| @@ -312,6 +326,9 @@ static bool sample_addr_correlates_sym(struct perf_event_attr *attr) | |||
| 312 | (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))) | 326 | (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))) |
| 313 | return true; | 327 | return true; |
| 314 | 328 | ||
| 329 | if (is_bts_event(attr)) | ||
| 330 | return true; | ||
| 331 | |||
| 315 | return false; | 332 | return false; |
| 316 | } | 333 | } |
| 317 | 334 | ||
| @@ -323,7 +340,6 @@ static void print_sample_addr(union perf_event *event, | |||
| 323 | { | 340 | { |
| 324 | struct addr_location al; | 341 | struct addr_location al; |
| 325 | u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | 342 | u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; |
| 326 | const char *symname, *dsoname; | ||
| 327 | 343 | ||
| 328 | printf("%16" PRIx64, sample->addr); | 344 | printf("%16" PRIx64, sample->addr); |
| 329 | 345 | ||
| @@ -343,22 +359,46 @@ static void print_sample_addr(union perf_event *event, | |||
| 343 | al.sym = map__find_symbol(al.map, al.addr, NULL); | 359 | al.sym = map__find_symbol(al.map, al.addr, NULL); |
| 344 | 360 | ||
| 345 | if (PRINT_FIELD(SYM)) { | 361 | if (PRINT_FIELD(SYM)) { |
| 346 | if (al.sym && al.sym->name) | 362 | printf(" "); |
| 347 | symname = al.sym->name; | 363 | if (PRINT_FIELD(SYMOFFSET)) |
| 364 | symbol__fprintf_symname_offs(al.sym, &al, stdout); | ||
| 348 | else | 365 | else |
| 349 | symname = ""; | 366 | symbol__fprintf_symname(al.sym, stdout); |
| 350 | |||
| 351 | printf(" %16s", symname); | ||
| 352 | } | 367 | } |
| 353 | 368 | ||
| 354 | if (PRINT_FIELD(DSO)) { | 369 | if (PRINT_FIELD(DSO)) { |
| 355 | if (al.map && al.map->dso && al.map->dso->name) | 370 | printf(" ("); |
| 356 | dsoname = al.map->dso->name; | 371 | map__fprintf_dsoname(al.map, stdout); |
| 357 | else | 372 | printf(")"); |
| 358 | dsoname = ""; | 373 | } |
| 374 | } | ||
| 359 | 375 | ||
| 360 | printf(" (%s)", dsoname); | 376 | static void print_sample_bts(union perf_event *event, |
| 377 | struct perf_sample *sample, | ||
| 378 | struct perf_evsel *evsel, | ||
| 379 | struct machine *machine, | ||
| 380 | struct thread *thread) | ||
| 381 | { | ||
| 382 | struct perf_event_attr *attr = &evsel->attr; | ||
| 383 | |||
| 384 | /* print branch_from information */ | ||
| 385 | if (PRINT_FIELD(IP)) { | ||
| 386 | if (!symbol_conf.use_callchain) | ||
| 387 | printf(" "); | ||
| 388 | else | ||
| 389 | printf("\n"); | ||
| 390 | perf_event__print_ip(event, sample, machine, evsel, | ||
| 391 | PRINT_FIELD(SYM), PRINT_FIELD(DSO), | ||
| 392 | PRINT_FIELD(SYMOFFSET)); | ||
| 361 | } | 393 | } |
| 394 | |||
| 395 | printf(" => "); | ||
| 396 | |||
| 397 | /* print branch_to information */ | ||
| 398 | if (PRINT_FIELD(ADDR)) | ||
| 399 | print_sample_addr(event, sample, machine, thread, attr); | ||
| 400 | |||
| 401 | printf("\n"); | ||
| 362 | } | 402 | } |
| 363 | 403 | ||
| 364 | static void process_event(union perf_event *event __unused, | 404 | static void process_event(union perf_event *event __unused, |
| @@ -374,6 +414,11 @@ static void process_event(union perf_event *event __unused, | |||
| 374 | 414 | ||
| 375 | print_sample_start(sample, thread, attr); | 415 | print_sample_start(sample, thread, attr); |
| 376 | 416 | ||
| 417 | if (is_bts_event(attr)) { | ||
| 418 | print_sample_bts(event, sample, evsel, machine, thread); | ||
| 419 | return; | ||
| 420 | } | ||
| 421 | |||
| 377 | if (PRINT_FIELD(TRACE)) | 422 | if (PRINT_FIELD(TRACE)) |
| 378 | print_trace_event(sample->cpu, sample->raw_data, | 423 | print_trace_event(sample->cpu, sample->raw_data, |
| 379 | sample->raw_size); | 424 | sample->raw_size); |
| @@ -387,7 +432,8 @@ static void process_event(union perf_event *event __unused, | |||
| 387 | else | 432 | else |
| 388 | printf("\n"); | 433 | printf("\n"); |
| 389 | perf_event__print_ip(event, sample, machine, evsel, | 434 | perf_event__print_ip(event, sample, machine, evsel, |
| 390 | PRINT_FIELD(SYM), PRINT_FIELD(DSO)); | 435 | PRINT_FIELD(SYM), PRINT_FIELD(DSO), |
| 436 | PRINT_FIELD(SYMOFFSET)); | ||
| 391 | } | 437 | } |
| 392 | 438 | ||
| 393 | printf("\n"); | 439 | printf("\n"); |
| @@ -1097,7 +1143,10 @@ static const struct option options[] = { | |||
| 1097 | OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", | 1143 | OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", |
| 1098 | "Look for files with symbols relative to this directory"), | 1144 | "Look for files with symbols relative to this directory"), |
| 1099 | OPT_CALLBACK('f', "fields", NULL, "str", | 1145 | OPT_CALLBACK('f', "fields", NULL, "str", |
| 1100 | "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr", | 1146 | "comma separated output fields prepend with 'type:'. " |
| 1147 | "Valid types: hw,sw,trace,raw. " | ||
| 1148 | "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," | ||
| 1149 | "addr,symoff", | ||
| 1101 | parse_output_fields), | 1150 | parse_output_fields), |
| 1102 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 1151 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
| 1103 | "system-wide collection from all CPUs"), | 1152 | "system-wide collection from all CPUs"), |
| @@ -1106,6 +1155,9 @@ static const struct option options[] = { | |||
| 1106 | "only display events for these comms"), | 1155 | "only display events for these comms"), |
| 1107 | OPT_BOOLEAN('I', "show-info", &show_full_info, | 1156 | OPT_BOOLEAN('I', "show-info", &show_full_info, |
| 1108 | "display extended information from perf.data file"), | 1157 | "display extended information from perf.data file"), |
| 1158 | OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, | ||
| 1159 | "Show the path of [kernel.kallsyms]"), | ||
| 1160 | |||
| 1109 | OPT_END() | 1161 | OPT_END() |
| 1110 | }; | 1162 | }; |
| 1111 | 1163 | ||
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f5d2a63eba66..ea40e4e8b227 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
| @@ -182,8 +182,8 @@ static int run_count = 1; | |||
| 182 | static bool no_inherit = false; | 182 | static bool no_inherit = false; |
| 183 | static bool scale = true; | 183 | static bool scale = true; |
| 184 | static bool no_aggr = false; | 184 | static bool no_aggr = false; |
| 185 | static pid_t target_pid = -1; | 185 | static const char *target_pid; |
| 186 | static pid_t target_tid = -1; | 186 | static const char *target_tid; |
| 187 | static pid_t child_pid = -1; | 187 | static pid_t child_pid = -1; |
| 188 | static bool null_run = false; | 188 | static bool null_run = false; |
| 189 | static int detailed_run = 0; | 189 | static int detailed_run = 0; |
| @@ -296,7 +296,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel, | |||
| 296 | if (system_wide) | 296 | if (system_wide) |
| 297 | return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, | 297 | return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, |
| 298 | group, group_fd); | 298 | group, group_fd); |
| 299 | if (target_pid == -1 && target_tid == -1) { | 299 | if (!target_pid && !target_tid) { |
| 300 | attr->disabled = 1; | 300 | attr->disabled = 1; |
| 301 | attr->enable_on_exec = 1; | 301 | attr->enable_on_exec = 1; |
| 302 | } | 302 | } |
| @@ -446,7 +446,7 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
| 446 | exit(-1); | 446 | exit(-1); |
| 447 | } | 447 | } |
| 448 | 448 | ||
| 449 | if (target_tid == -1 && target_pid == -1 && !system_wide) | 449 | if (!target_tid && !target_pid && !system_wide) |
| 450 | evsel_list->threads->map[0] = child_pid; | 450 | evsel_list->threads->map[0] = child_pid; |
| 451 | 451 | ||
| 452 | /* | 452 | /* |
| @@ -576,6 +576,8 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
| 576 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) | 576 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) |
| 577 | fprintf(output, " # %8.3f CPUs utilized ", | 577 | fprintf(output, " # %8.3f CPUs utilized ", |
| 578 | avg / avg_stats(&walltime_nsecs_stats)); | 578 | avg / avg_stats(&walltime_nsecs_stats)); |
| 579 | else | ||
| 580 | fprintf(output, " "); | ||
| 579 | } | 581 | } |
| 580 | 582 | ||
| 581 | /* used for get_ratio_color() */ | 583 | /* used for get_ratio_color() */ |
| @@ -844,12 +846,18 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
| 844 | 846 | ||
| 845 | fprintf(output, " # %8.3f GHz ", ratio); | 847 | fprintf(output, " # %8.3f GHz ", ratio); |
| 846 | } else if (runtime_nsecs_stats[cpu].n != 0) { | 848 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
| 849 | char unit = 'M'; | ||
| 850 | |||
| 847 | total = avg_stats(&runtime_nsecs_stats[cpu]); | 851 | total = avg_stats(&runtime_nsecs_stats[cpu]); |
| 848 | 852 | ||
| 849 | if (total) | 853 | if (total) |
| 850 | ratio = 1000.0 * avg / total; | 854 | ratio = 1000.0 * avg / total; |
| 855 | if (ratio < 0.001) { | ||
| 856 | ratio *= 1000; | ||
| 857 | unit = 'K'; | ||
| 858 | } | ||
| 851 | 859 | ||
| 852 | fprintf(output, " # %8.3f M/sec ", ratio); | 860 | fprintf(output, " # %8.3f %c/sec ", ratio, unit); |
| 853 | } else { | 861 | } else { |
| 854 | fprintf(output, " "); | 862 | fprintf(output, " "); |
| 855 | } | 863 | } |
| @@ -960,14 +968,14 @@ static void print_stat(int argc, const char **argv) | |||
| 960 | if (!csv_output) { | 968 | if (!csv_output) { |
| 961 | fprintf(output, "\n"); | 969 | fprintf(output, "\n"); |
| 962 | fprintf(output, " Performance counter stats for "); | 970 | fprintf(output, " Performance counter stats for "); |
| 963 | if(target_pid == -1 && target_tid == -1) { | 971 | if (!target_pid && !target_tid) { |
| 964 | fprintf(output, "\'%s", argv[0]); | 972 | fprintf(output, "\'%s", argv[0]); |
| 965 | for (i = 1; i < argc; i++) | 973 | for (i = 1; i < argc; i++) |
| 966 | fprintf(output, " %s", argv[i]); | 974 | fprintf(output, " %s", argv[i]); |
| 967 | } else if (target_pid != -1) | 975 | } else if (target_pid) |
| 968 | fprintf(output, "process id \'%d", target_pid); | 976 | fprintf(output, "process id \'%s", target_pid); |
| 969 | else | 977 | else |
| 970 | fprintf(output, "thread id \'%d", target_tid); | 978 | fprintf(output, "thread id \'%s", target_tid); |
| 971 | 979 | ||
| 972 | fprintf(output, "\'"); | 980 | fprintf(output, "\'"); |
| 973 | if (run_count > 1) | 981 | if (run_count > 1) |
| @@ -1041,10 +1049,10 @@ static const struct option options[] = { | |||
| 1041 | "event filter", parse_filter), | 1049 | "event filter", parse_filter), |
| 1042 | OPT_BOOLEAN('i', "no-inherit", &no_inherit, | 1050 | OPT_BOOLEAN('i', "no-inherit", &no_inherit, |
| 1043 | "child tasks do not inherit counters"), | 1051 | "child tasks do not inherit counters"), |
| 1044 | OPT_INTEGER('p', "pid", &target_pid, | 1052 | OPT_STRING('p', "pid", &target_pid, "pid", |
| 1045 | "stat events on existing process id"), | 1053 | "stat events on existing process id"), |
| 1046 | OPT_INTEGER('t', "tid", &target_tid, | 1054 | OPT_STRING('t', "tid", &target_tid, "tid", |
| 1047 | "stat events on existing thread id"), | 1055 | "stat events on existing thread id"), |
| 1048 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 1056 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
| 1049 | "system-wide collection from all CPUs"), | 1057 | "system-wide collection from all CPUs"), |
| 1050 | OPT_BOOLEAN('g', "group", &group, | 1058 | OPT_BOOLEAN('g', "group", &group, |
| @@ -1182,7 +1190,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
| 1182 | } else if (big_num_opt == 0) /* User passed --no-big-num */ | 1190 | } else if (big_num_opt == 0) /* User passed --no-big-num */ |
| 1183 | big_num = false; | 1191 | big_num = false; |
| 1184 | 1192 | ||
| 1185 | if (!argc && target_pid == -1 && target_tid == -1) | 1193 | if (!argc && !target_pid && !target_tid) |
| 1186 | usage_with_options(stat_usage, options); | 1194 | usage_with_options(stat_usage, options); |
| 1187 | if (run_count <= 0) | 1195 | if (run_count <= 0) |
| 1188 | usage_with_options(stat_usage, options); | 1196 | usage_with_options(stat_usage, options); |
| @@ -1198,10 +1206,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
| 1198 | if (add_default_attributes()) | 1206 | if (add_default_attributes()) |
| 1199 | goto out; | 1207 | goto out; |
| 1200 | 1208 | ||
| 1201 | if (target_pid != -1) | 1209 | if (target_pid) |
| 1202 | target_tid = target_pid; | 1210 | target_tid = target_pid; |
| 1203 | 1211 | ||
| 1204 | evsel_list->threads = thread_map__new(target_pid, target_tid); | 1212 | evsel_list->threads = thread_map__new_str(target_pid, |
| 1213 | target_tid, UINT_MAX); | ||
| 1205 | if (evsel_list->threads == NULL) { | 1214 | if (evsel_list->threads == NULL) { |
| 1206 | pr_err("Problems finding threads of monitor\n"); | 1215 | pr_err("Problems finding threads of monitor\n"); |
| 1207 | usage_with_options(stat_usage, options); | 1216 | usage_with_options(stat_usage, options); |
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 3854e869dce1..3e087ce8daa6 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c | |||
| @@ -15,6 +15,8 @@ | |||
| 15 | #include "util/thread_map.h" | 15 | #include "util/thread_map.h" |
| 16 | #include "../../include/linux/hw_breakpoint.h" | 16 | #include "../../include/linux/hw_breakpoint.h" |
| 17 | 17 | ||
| 18 | #include <sys/mman.h> | ||
| 19 | |||
| 18 | static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) | 20 | static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) |
| 19 | { | 21 | { |
| 20 | bool *visited = symbol__priv(sym); | 22 | bool *visited = symbol__priv(sym); |
| @@ -276,7 +278,7 @@ static int test__open_syscall_event(void) | |||
| 276 | return -1; | 278 | return -1; |
| 277 | } | 279 | } |
| 278 | 280 | ||
| 279 | threads = thread_map__new(-1, getpid()); | 281 | threads = thread_map__new(-1, getpid(), UINT_MAX); |
| 280 | if (threads == NULL) { | 282 | if (threads == NULL) { |
| 281 | pr_debug("thread_map__new\n"); | 283 | pr_debug("thread_map__new\n"); |
| 282 | return -1; | 284 | return -1; |
| @@ -342,7 +344,7 @@ static int test__open_syscall_event_on_all_cpus(void) | |||
| 342 | return -1; | 344 | return -1; |
| 343 | } | 345 | } |
| 344 | 346 | ||
| 345 | threads = thread_map__new(-1, getpid()); | 347 | threads = thread_map__new(-1, getpid(), UINT_MAX); |
| 346 | if (threads == NULL) { | 348 | if (threads == NULL) { |
| 347 | pr_debug("thread_map__new\n"); | 349 | pr_debug("thread_map__new\n"); |
| 348 | return -1; | 350 | return -1; |
| @@ -490,7 +492,7 @@ static int test__basic_mmap(void) | |||
| 490 | expected_nr_events[i] = random() % 257; | 492 | expected_nr_events[i] = random() % 257; |
| 491 | } | 493 | } |
| 492 | 494 | ||
| 493 | threads = thread_map__new(-1, getpid()); | 495 | threads = thread_map__new(-1, getpid(), UINT_MAX); |
| 494 | if (threads == NULL) { | 496 | if (threads == NULL) { |
| 495 | pr_debug("thread_map__new\n"); | 497 | pr_debug("thread_map__new\n"); |
| 496 | return -1; | 498 | return -1; |
| @@ -1008,12 +1010,9 @@ realloc: | |||
| 1008 | static int test__PERF_RECORD(void) | 1010 | static int test__PERF_RECORD(void) |
| 1009 | { | 1011 | { |
| 1010 | struct perf_record_opts opts = { | 1012 | struct perf_record_opts opts = { |
| 1011 | .target_pid = -1, | ||
| 1012 | .target_tid = -1, | ||
| 1013 | .no_delay = true, | 1013 | .no_delay = true, |
| 1014 | .freq = 10, | 1014 | .freq = 10, |
| 1015 | .mmap_pages = 256, | 1015 | .mmap_pages = 256, |
| 1016 | .sample_id_all_avail = true, | ||
| 1017 | }; | 1016 | }; |
| 1018 | cpu_set_t *cpu_mask = NULL; | 1017 | cpu_set_t *cpu_mask = NULL; |
| 1019 | size_t cpu_mask_size = 0; | 1018 | size_t cpu_mask_size = 0; |
| @@ -1054,7 +1053,7 @@ static int test__PERF_RECORD(void) | |||
| 1054 | * we're monitoring, the one forked there. | 1053 | * we're monitoring, the one forked there. |
| 1055 | */ | 1054 | */ |
| 1056 | err = perf_evlist__create_maps(evlist, opts.target_pid, | 1055 | err = perf_evlist__create_maps(evlist, opts.target_pid, |
| 1057 | opts.target_tid, opts.cpu_list); | 1056 | opts.target_tid, UINT_MAX, opts.cpu_list); |
| 1058 | if (err < 0) { | 1057 | if (err < 0) { |
| 1059 | pr_debug("Not enough memory to create thread/cpu maps\n"); | 1058 | pr_debug("Not enough memory to create thread/cpu maps\n"); |
| 1060 | goto out_delete_evlist; | 1059 | goto out_delete_evlist; |
| @@ -1296,6 +1295,173 @@ out: | |||
| 1296 | return (err < 0 || errs > 0) ? -1 : 0; | 1295 | return (err < 0 || errs > 0) ? -1 : 0; |
| 1297 | } | 1296 | } |
| 1298 | 1297 | ||
| 1298 | |||
| 1299 | #if defined(__x86_64__) || defined(__i386__) | ||
| 1300 | |||
| 1301 | #define barrier() asm volatile("" ::: "memory") | ||
| 1302 | |||
| 1303 | static u64 rdpmc(unsigned int counter) | ||
| 1304 | { | ||
| 1305 | unsigned int low, high; | ||
| 1306 | |||
| 1307 | asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter)); | ||
| 1308 | |||
| 1309 | return low | ((u64)high) << 32; | ||
| 1310 | } | ||
| 1311 | |||
| 1312 | static u64 rdtsc(void) | ||
| 1313 | { | ||
| 1314 | unsigned int low, high; | ||
| 1315 | |||
| 1316 | asm volatile("rdtsc" : "=a" (low), "=d" (high)); | ||
| 1317 | |||
| 1318 | return low | ((u64)high) << 32; | ||
| 1319 | } | ||
| 1320 | |||
| 1321 | static u64 mmap_read_self(void *addr) | ||
| 1322 | { | ||
| 1323 | struct perf_event_mmap_page *pc = addr; | ||
| 1324 | u32 seq, idx, time_mult = 0, time_shift = 0; | ||
| 1325 | u64 count, cyc = 0, time_offset = 0, enabled, running, delta; | ||
| 1326 | |||
| 1327 | do { | ||
| 1328 | seq = pc->lock; | ||
| 1329 | barrier(); | ||
| 1330 | |||
| 1331 | enabled = pc->time_enabled; | ||
| 1332 | running = pc->time_running; | ||
| 1333 | |||
| 1334 | if (enabled != running) { | ||
| 1335 | cyc = rdtsc(); | ||
| 1336 | time_mult = pc->time_mult; | ||
| 1337 | time_shift = pc->time_shift; | ||
| 1338 | time_offset = pc->time_offset; | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | idx = pc->index; | ||
| 1342 | count = pc->offset; | ||
| 1343 | if (idx) | ||
| 1344 | count += rdpmc(idx - 1); | ||
| 1345 | |||
| 1346 | barrier(); | ||
| 1347 | } while (pc->lock != seq); | ||
| 1348 | |||
| 1349 | if (enabled != running) { | ||
| 1350 | u64 quot, rem; | ||
| 1351 | |||
| 1352 | quot = (cyc >> time_shift); | ||
| 1353 | rem = cyc & ((1 << time_shift) - 1); | ||
| 1354 | delta = time_offset + quot * time_mult + | ||
| 1355 | ((rem * time_mult) >> time_shift); | ||
| 1356 | |||
| 1357 | enabled += delta; | ||
| 1358 | if (idx) | ||
| 1359 | running += delta; | ||
| 1360 | |||
| 1361 | quot = count / running; | ||
| 1362 | rem = count % running; | ||
| 1363 | count = quot * enabled + (rem * enabled) / running; | ||
| 1364 | } | ||
| 1365 | |||
| 1366 | return count; | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | /* | ||
| 1370 | * If the RDPMC instruction faults then signal this back to the test parent task: | ||
| 1371 | */ | ||
| 1372 | static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used) | ||
| 1373 | { | ||
| 1374 | exit(-1); | ||
| 1375 | } | ||
| 1376 | |||
| 1377 | static int __test__rdpmc(void) | ||
| 1378 | { | ||
| 1379 | long page_size = sysconf(_SC_PAGE_SIZE); | ||
| 1380 | volatile int tmp = 0; | ||
| 1381 | u64 i, loops = 1000; | ||
| 1382 | int n; | ||
| 1383 | int fd; | ||
| 1384 | void *addr; | ||
| 1385 | struct perf_event_attr attr = { | ||
| 1386 | .type = PERF_TYPE_HARDWARE, | ||
| 1387 | .config = PERF_COUNT_HW_INSTRUCTIONS, | ||
| 1388 | .exclude_kernel = 1, | ||
| 1389 | }; | ||
| 1390 | u64 delta_sum = 0; | ||
| 1391 | struct sigaction sa; | ||
| 1392 | |||
| 1393 | sigfillset(&sa.sa_mask); | ||
| 1394 | sa.sa_sigaction = segfault_handler; | ||
| 1395 | sigaction(SIGSEGV, &sa, NULL); | ||
| 1396 | |||
| 1397 | fprintf(stderr, "\n\n"); | ||
| 1398 | |||
| 1399 | fd = sys_perf_event_open(&attr, 0, -1, -1, 0); | ||
| 1400 | if (fd < 0) { | ||
| 1401 | die("Error: sys_perf_event_open() syscall returned " | ||
| 1402 | "with %d (%s)\n", fd, strerror(errno)); | ||
| 1403 | } | ||
| 1404 | |||
| 1405 | addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); | ||
| 1406 | if (addr == (void *)(-1)) { | ||
| 1407 | die("Error: mmap() syscall returned " | ||
| 1408 | "with (%s)\n", strerror(errno)); | ||
| 1409 | } | ||
| 1410 | |||
| 1411 | for (n = 0; n < 6; n++) { | ||
| 1412 | u64 stamp, now, delta; | ||
| 1413 | |||
| 1414 | stamp = mmap_read_self(addr); | ||
| 1415 | |||
| 1416 | for (i = 0; i < loops; i++) | ||
| 1417 | tmp++; | ||
| 1418 | |||
| 1419 | now = mmap_read_self(addr); | ||
| 1420 | loops *= 10; | ||
| 1421 | |||
| 1422 | delta = now - stamp; | ||
| 1423 | fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta); | ||
| 1424 | |||
| 1425 | delta_sum += delta; | ||
| 1426 | } | ||
| 1427 | |||
| 1428 | munmap(addr, page_size); | ||
| 1429 | close(fd); | ||
| 1430 | |||
| 1431 | fprintf(stderr, " "); | ||
| 1432 | |||
| 1433 | if (!delta_sum) | ||
| 1434 | return -1; | ||
| 1435 | |||
| 1436 | return 0; | ||
| 1437 | } | ||
| 1438 | |||
| 1439 | static int test__rdpmc(void) | ||
| 1440 | { | ||
| 1441 | int status = 0; | ||
| 1442 | int wret = 0; | ||
| 1443 | int ret; | ||
| 1444 | int pid; | ||
| 1445 | |||
| 1446 | pid = fork(); | ||
| 1447 | if (pid < 0) | ||
| 1448 | return -1; | ||
| 1449 | |||
| 1450 | if (!pid) { | ||
| 1451 | ret = __test__rdpmc(); | ||
| 1452 | |||
| 1453 | exit(ret); | ||
| 1454 | } | ||
| 1455 | |||
| 1456 | wret = waitpid(pid, &status, 0); | ||
| 1457 | if (wret < 0 || status) | ||
| 1458 | return -1; | ||
| 1459 | |||
| 1460 | return 0; | ||
| 1461 | } | ||
| 1462 | |||
| 1463 | #endif | ||
| 1464 | |||
| 1299 | static struct test { | 1465 | static struct test { |
| 1300 | const char *desc; | 1466 | const char *desc; |
| 1301 | int (*func)(void); | 1467 | int (*func)(void); |
| @@ -1320,6 +1486,12 @@ static struct test { | |||
| 1320 | .desc = "parse events tests", | 1486 | .desc = "parse events tests", |
| 1321 | .func = test__parse_events, | 1487 | .func = test__parse_events, |
| 1322 | }, | 1488 | }, |
| 1489 | #if defined(__x86_64__) || defined(__i386__) | ||
| 1490 | { | ||
| 1491 | .desc = "x86 rdpmc test", | ||
| 1492 | .func = test__rdpmc, | ||
| 1493 | }, | ||
| 1494 | #endif | ||
| 1323 | { | 1495 | { |
| 1324 | .desc = "Validate PERF_RECORD_* events & perf_sample fields", | 1496 | .desc = "Validate PERF_RECORD_* events & perf_sample fields", |
| 1325 | .func = test__PERF_RECORD, | 1497 | .func = test__PERF_RECORD, |
| @@ -1412,7 +1584,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used) | |||
| 1412 | if (symbol__init() < 0) | 1584 | if (symbol__init() < 0) |
| 1413 | return -1; | 1585 | return -1; |
| 1414 | 1586 | ||
| 1415 | setup_pager(); | ||
| 1416 | |||
| 1417 | return __cmd_test(argc, argv); | 1587 | return __cmd_test(argc, argv); |
| 1418 | } | 1588 | } |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ecff31257eb3..e3c63aef8efc 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
| @@ -64,7 +64,6 @@ | |||
| 64 | #include <linux/unistd.h> | 64 | #include <linux/unistd.h> |
| 65 | #include <linux/types.h> | 65 | #include <linux/types.h> |
| 66 | 66 | ||
| 67 | |||
| 68 | void get_term_dimensions(struct winsize *ws) | 67 | void get_term_dimensions(struct winsize *ws) |
| 69 | { | 68 | { |
| 70 | char *s = getenv("LINES"); | 69 | char *s = getenv("LINES"); |
| @@ -544,10 +543,20 @@ static void perf_top__sort_new_samples(void *arg) | |||
| 544 | 543 | ||
| 545 | static void *display_thread_tui(void *arg) | 544 | static void *display_thread_tui(void *arg) |
| 546 | { | 545 | { |
| 546 | struct perf_evsel *pos; | ||
| 547 | struct perf_top *top = arg; | 547 | struct perf_top *top = arg; |
| 548 | const char *help = "For a higher level overview, try: perf top --sort comm,dso"; | 548 | const char *help = "For a higher level overview, try: perf top --sort comm,dso"; |
| 549 | 549 | ||
| 550 | perf_top__sort_new_samples(top); | 550 | perf_top__sort_new_samples(top); |
| 551 | |||
| 552 | /* | ||
| 553 | * Initialize the uid_filter_str, in the future the TUI will allow | ||
| 554 | * Zooming in/out UIDs. For now juse use whatever the user passed | ||
| 555 | * via --uid. | ||
| 556 | */ | ||
| 557 | list_for_each_entry(pos, &top->evlist->entries, node) | ||
| 558 | pos->hists.uid_filter_str = top->uid_str; | ||
| 559 | |||
| 551 | perf_evlist__tui_browse_hists(top->evlist, help, | 560 | perf_evlist__tui_browse_hists(top->evlist, help, |
| 552 | perf_top__sort_new_samples, | 561 | perf_top__sort_new_samples, |
| 553 | top, top->delay_secs); | 562 | top, top->delay_secs); |
| @@ -668,6 +677,12 @@ static void perf_event__process_sample(struct perf_tool *tool, | |||
| 668 | return; | 677 | return; |
| 669 | } | 678 | } |
| 670 | 679 | ||
| 680 | if (!machine) { | ||
| 681 | pr_err("%u unprocessable samples recorded.", | ||
| 682 | top->session->hists.stats.nr_unprocessable_samples++); | ||
| 683 | return; | ||
| 684 | } | ||
| 685 | |||
| 671 | if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) | 686 | if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) |
| 672 | top->exact_samples++; | 687 | top->exact_samples++; |
| 673 | 688 | ||
| @@ -861,7 +876,7 @@ fallback_missing_features: | |||
| 861 | if (top->exclude_guest_missing) | 876 | if (top->exclude_guest_missing) |
| 862 | attr->exclude_guest = attr->exclude_host = 0; | 877 | attr->exclude_guest = attr->exclude_host = 0; |
| 863 | retry_sample_id: | 878 | retry_sample_id: |
| 864 | attr->sample_id_all = top->sample_id_all_avail ? 1 : 0; | 879 | attr->sample_id_all = top->sample_id_all_missing ? 0 : 1; |
| 865 | try_again: | 880 | try_again: |
| 866 | if (perf_evsel__open(counter, top->evlist->cpus, | 881 | if (perf_evsel__open(counter, top->evlist->cpus, |
| 867 | top->evlist->threads, top->group, | 882 | top->evlist->threads, top->group, |
| @@ -878,11 +893,11 @@ try_again: | |||
| 878 | "guest or host samples.\n"); | 893 | "guest or host samples.\n"); |
| 879 | top->exclude_guest_missing = true; | 894 | top->exclude_guest_missing = true; |
| 880 | goto fallback_missing_features; | 895 | goto fallback_missing_features; |
| 881 | } else if (top->sample_id_all_avail) { | 896 | } else if (!top->sample_id_all_missing) { |
| 882 | /* | 897 | /* |
| 883 | * Old kernel, no attr->sample_id_type_all field | 898 | * Old kernel, no attr->sample_id_type_all field |
| 884 | */ | 899 | */ |
| 885 | top->sample_id_all_avail = false; | 900 | top->sample_id_all_missing = true; |
| 886 | goto retry_sample_id; | 901 | goto retry_sample_id; |
| 887 | } | 902 | } |
| 888 | } | 903 | } |
| @@ -967,7 +982,7 @@ static int __cmd_top(struct perf_top *top) | |||
| 967 | if (ret) | 982 | if (ret) |
| 968 | goto out_delete; | 983 | goto out_delete; |
| 969 | 984 | ||
| 970 | if (top->target_tid != -1) | 985 | if (top->target_tid || top->uid != UINT_MAX) |
| 971 | perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, | 986 | perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, |
| 972 | perf_event__process, | 987 | perf_event__process, |
| 973 | &top->session->host_machine); | 988 | &top->session->host_machine); |
| @@ -1105,10 +1120,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
| 1105 | struct perf_top top = { | 1120 | struct perf_top top = { |
| 1106 | .count_filter = 5, | 1121 | .count_filter = 5, |
| 1107 | .delay_secs = 2, | 1122 | .delay_secs = 2, |
| 1108 | .target_pid = -1, | 1123 | .uid = UINT_MAX, |
| 1109 | .target_tid = -1, | ||
| 1110 | .freq = 1000, /* 1 KHz */ | 1124 | .freq = 1000, /* 1 KHz */ |
| 1111 | .sample_id_all_avail = true, | ||
| 1112 | .mmap_pages = 128, | 1125 | .mmap_pages = 128, |
| 1113 | .sym_pcnt_filter = 5, | 1126 | .sym_pcnt_filter = 5, |
| 1114 | }; | 1127 | }; |
| @@ -1119,9 +1132,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
| 1119 | parse_events_option), | 1132 | parse_events_option), |
| 1120 | OPT_INTEGER('c', "count", &top.default_interval, | 1133 | OPT_INTEGER('c', "count", &top.default_interval, |
| 1121 | "event period to sample"), | 1134 | "event period to sample"), |
| 1122 | OPT_INTEGER('p', "pid", &top.target_pid, | 1135 | OPT_STRING('p', "pid", &top.target_pid, "pid", |
| 1123 | "profile events on existing process id"), | 1136 | "profile events on existing process id"), |
| 1124 | OPT_INTEGER('t', "tid", &top.target_tid, | 1137 | OPT_STRING('t', "tid", &top.target_tid, "tid", |
| 1125 | "profile events on existing thread id"), | 1138 | "profile events on existing thread id"), |
| 1126 | OPT_BOOLEAN('a', "all-cpus", &top.system_wide, | 1139 | OPT_BOOLEAN('a', "all-cpus", &top.system_wide, |
| 1127 | "system-wide collection from all CPUs"), | 1140 | "system-wide collection from all CPUs"), |
| @@ -1180,6 +1193,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
| 1180 | "Display raw encoding of assembly instructions (default)"), | 1193 | "Display raw encoding of assembly instructions (default)"), |
| 1181 | OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", | 1194 | OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", |
| 1182 | "Specify disassembler style (e.g. -M intel for intel syntax)"), | 1195 | "Specify disassembler style (e.g. -M intel for intel syntax)"), |
| 1196 | OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"), | ||
| 1183 | OPT_END() | 1197 | OPT_END() |
| 1184 | }; | 1198 | }; |
| 1185 | 1199 | ||
| @@ -1205,18 +1219,22 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
| 1205 | 1219 | ||
| 1206 | setup_browser(false); | 1220 | setup_browser(false); |
| 1207 | 1221 | ||
| 1222 | top.uid = parse_target_uid(top.uid_str, top.target_tid, top.target_pid); | ||
| 1223 | if (top.uid_str != NULL && top.uid == UINT_MAX - 1) | ||
| 1224 | goto out_delete_evlist; | ||
| 1225 | |||
| 1208 | /* CPU and PID are mutually exclusive */ | 1226 | /* CPU and PID are mutually exclusive */ |
| 1209 | if (top.target_tid > 0 && top.cpu_list) { | 1227 | if (top.target_tid && top.cpu_list) { |
| 1210 | printf("WARNING: PID switch overriding CPU\n"); | 1228 | printf("WARNING: PID switch overriding CPU\n"); |
| 1211 | sleep(1); | 1229 | sleep(1); |
| 1212 | top.cpu_list = NULL; | 1230 | top.cpu_list = NULL; |
| 1213 | } | 1231 | } |
| 1214 | 1232 | ||
| 1215 | if (top.target_pid != -1) | 1233 | if (top.target_pid) |
| 1216 | top.target_tid = top.target_pid; | 1234 | top.target_tid = top.target_pid; |
| 1217 | 1235 | ||
| 1218 | if (perf_evlist__create_maps(top.evlist, top.target_pid, | 1236 | if (perf_evlist__create_maps(top.evlist, top.target_pid, |
| 1219 | top.target_tid, top.cpu_list) < 0) | 1237 | top.target_tid, top.uid, top.cpu_list) < 0) |
| 1220 | usage_with_options(top_usage, options); | 1238 | usage_with_options(top_usage, options); |
| 1221 | 1239 | ||
| 1222 | if (!top.evlist->nr_entries && | 1240 | if (!top.evlist->nr_entries && |
| @@ -1280,6 +1298,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
| 1280 | 1298 | ||
| 1281 | status = __cmd_top(&top); | 1299 | status = __cmd_top(&top); |
| 1282 | 1300 | ||
| 1301 | out_delete_evlist: | ||
| 1283 | perf_evlist__delete(top.evlist); | 1302 | perf_evlist__delete(top.evlist); |
| 1284 | 1303 | ||
| 1285 | return status; | 1304 | return status; |
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 3afa39ac1d40..89e3355ab173 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
| @@ -173,7 +173,6 @@ sys_perf_event_open(struct perf_event_attr *attr, | |||
| 173 | pid_t pid, int cpu, int group_fd, | 173 | pid_t pid, int cpu, int group_fd, |
| 174 | unsigned long flags) | 174 | unsigned long flags) |
| 175 | { | 175 | { |
| 176 | attr->size = sizeof(*attr); | ||
| 177 | return syscall(__NR_perf_event_open, attr, pid, cpu, | 176 | return syscall(__NR_perf_event_open, attr, pid, cpu, |
| 178 | group_fd, flags); | 177 | group_fd, flags); |
| 179 | } | 178 | } |
| @@ -186,14 +185,32 @@ struct ip_callchain { | |||
| 186 | u64 ips[0]; | 185 | u64 ips[0]; |
| 187 | }; | 186 | }; |
| 188 | 187 | ||
| 188 | struct branch_flags { | ||
| 189 | u64 mispred:1; | ||
| 190 | u64 predicted:1; | ||
| 191 | u64 reserved:62; | ||
| 192 | }; | ||
| 193 | |||
| 194 | struct branch_entry { | ||
| 195 | u64 from; | ||
| 196 | u64 to; | ||
| 197 | struct branch_flags flags; | ||
| 198 | }; | ||
| 199 | |||
| 200 | struct branch_stack { | ||
| 201 | u64 nr; | ||
| 202 | struct branch_entry entries[0]; | ||
| 203 | }; | ||
| 204 | |||
| 189 | extern bool perf_host, perf_guest; | 205 | extern bool perf_host, perf_guest; |
| 190 | extern const char perf_version_string[]; | 206 | extern const char perf_version_string[]; |
| 191 | 207 | ||
| 192 | void pthread__unblock_sigwinch(void); | 208 | void pthread__unblock_sigwinch(void); |
| 193 | 209 | ||
| 194 | struct perf_record_opts { | 210 | struct perf_record_opts { |
| 195 | pid_t target_pid; | 211 | const char *target_pid; |
| 196 | pid_t target_tid; | 212 | const char *target_tid; |
| 213 | uid_t uid; | ||
| 197 | bool call_graph; | 214 | bool call_graph; |
| 198 | bool group; | 215 | bool group; |
| 199 | bool inherit_stat; | 216 | bool inherit_stat; |
| @@ -204,13 +221,14 @@ struct perf_record_opts { | |||
| 204 | bool raw_samples; | 221 | bool raw_samples; |
| 205 | bool sample_address; | 222 | bool sample_address; |
| 206 | bool sample_time; | 223 | bool sample_time; |
| 207 | bool sample_id_all_avail; | 224 | bool sample_id_all_missing; |
| 208 | bool exclude_guest_missing; | 225 | bool exclude_guest_missing; |
| 209 | bool system_wide; | 226 | bool system_wide; |
| 210 | bool period; | 227 | bool period; |
| 211 | unsigned int freq; | 228 | unsigned int freq; |
| 212 | unsigned int mmap_pages; | 229 | unsigned int mmap_pages; |
| 213 | unsigned int user_freq; | 230 | unsigned int user_freq; |
| 231 | int branch_stack; | ||
| 214 | u64 default_interval; | 232 | u64 default_interval; |
| 215 | u64 user_interval; | 233 | u64 user_interval; |
| 216 | const char *cpu_list; | 234 | const char *cpu_list; |
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py index df638c438a9f..b11cca584238 100755 --- a/tools/perf/python/twatch.py +++ b/tools/perf/python/twatch.py | |||
| @@ -19,7 +19,7 @@ def main(): | |||
| 19 | cpus = perf.cpu_map() | 19 | cpus = perf.cpu_map() |
| 20 | threads = perf.thread_map() | 20 | threads = perf.thread_map() |
| 21 | evsel = perf.evsel(task = 1, comm = 1, mmap = 0, | 21 | evsel = perf.evsel(task = 1, comm = 1, mmap = 0, |
| 22 | wakeup_events = 1, sample_period = 1, | 22 | wakeup_events = 1, watermark = 1, |
| 23 | sample_id_all = 1, | 23 | sample_id_all = 1, |
| 24 | sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID) | 24 | sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID) |
| 25 | evsel.open(cpus = cpus, threads = threads); | 25 | evsel.open(cpus = cpus, threads = threads); |
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 011ed2676604..e5a462f1d07c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c | |||
| @@ -315,7 +315,7 @@ fallback: | |||
| 315 | "Please use:\n\n" | 315 | "Please use:\n\n" |
| 316 | " perf buildid-cache -av vmlinux\n\n" | 316 | " perf buildid-cache -av vmlinux\n\n" |
| 317 | "or:\n\n" | 317 | "or:\n\n" |
| 318 | " --vmlinux vmlinux", | 318 | " --vmlinux vmlinux\n", |
| 319 | sym->name, build_id_msg ?: ""); | 319 | sym->name, build_id_msg ?: ""); |
| 320 | goto out_free_filename; | 320 | goto out_free_filename; |
| 321 | } | 321 | } |
diff --git a/tools/perf/util/bitmap.c b/tools/perf/util/bitmap.c index 5e230acae1e9..0a1adc1111fd 100644 --- a/tools/perf/util/bitmap.c +++ b/tools/perf/util/bitmap.c | |||
| @@ -19,3 +19,13 @@ int __bitmap_weight(const unsigned long *bitmap, int bits) | |||
| 19 | 19 | ||
| 20 | return w; | 20 | return w; |
| 21 | } | 21 | } |
| 22 | |||
| 23 | void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, | ||
| 24 | const unsigned long *bitmap2, int bits) | ||
| 25 | { | ||
| 26 | int k; | ||
| 27 | int nr = BITS_TO_LONGS(bits); | ||
| 28 | |||
| 29 | for (k = 0; k < nr; k++) | ||
| 30 | dst[k] = bitmap1[k] | bitmap2[k]; | ||
| 31 | } | ||
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 6893eec693ab..adc72f09914d 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c | |||
| @@ -166,6 +166,17 @@ out: | |||
| 166 | return cpus; | 166 | return cpus; |
| 167 | } | 167 | } |
| 168 | 168 | ||
| 169 | size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp) | ||
| 170 | { | ||
| 171 | int i; | ||
| 172 | size_t printed = fprintf(fp, "%d cpu%s: ", | ||
| 173 | map->nr, map->nr > 1 ? "s" : ""); | ||
| 174 | for (i = 0; i < map->nr; ++i) | ||
| 175 | printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]); | ||
| 176 | |||
| 177 | return printed + fprintf(fp, "\n"); | ||
| 178 | } | ||
| 179 | |||
| 169 | struct cpu_map *cpu_map__dummy_new(void) | 180 | struct cpu_map *cpu_map__dummy_new(void) |
| 170 | { | 181 | { |
| 171 | struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); | 182 | struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); |
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 072c0a374794..c41518573c6a 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h | |||
| @@ -1,6 +1,8 @@ | |||
| 1 | #ifndef __PERF_CPUMAP_H | 1 | #ifndef __PERF_CPUMAP_H |
| 2 | #define __PERF_CPUMAP_H | 2 | #define __PERF_CPUMAP_H |
| 3 | 3 | ||
| 4 | #include <stdio.h> | ||
| 5 | |||
| 4 | struct cpu_map { | 6 | struct cpu_map { |
| 5 | int nr; | 7 | int nr; |
| 6 | int map[]; | 8 | int map[]; |
| @@ -10,4 +12,6 @@ struct cpu_map *cpu_map__new(const char *cpu_list); | |||
| 10 | struct cpu_map *cpu_map__dummy_new(void); | 12 | struct cpu_map *cpu_map__dummy_new(void); |
| 11 | void cpu_map__delete(struct cpu_map *map); | 13 | void cpu_map__delete(struct cpu_map *map); |
| 12 | 14 | ||
| 15 | size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); | ||
| 16 | |||
| 13 | #endif /* __PERF_CPUMAP_H */ | 17 | #endif /* __PERF_CPUMAP_H */ |
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index 35073621e5de..aada3ac5e891 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | * | 3 | * |
| 4 | * No surprises, and works with signed and unsigned chars. | 4 | * No surprises, and works with signed and unsigned chars. |
| 5 | */ | 5 | */ |
| 6 | #include "cache.h" | 6 | #include "util.h" |
| 7 | 7 | ||
| 8 | enum { | 8 | enum { |
| 9 | S = GIT_SPACE, | 9 | S = GIT_SPACE, |
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c index ffc35e748e89..dd8b19319c03 100644 --- a/tools/perf/util/debugfs.c +++ b/tools/perf/util/debugfs.c | |||
| @@ -15,32 +15,6 @@ static const char *debugfs_known_mountpoints[] = { | |||
| 15 | 0, | 15 | 0, |
| 16 | }; | 16 | }; |
| 17 | 17 | ||
| 18 | /* use this to force a umount */ | ||
| 19 | void debugfs_force_cleanup(void) | ||
| 20 | { | ||
| 21 | debugfs_find_mountpoint(); | ||
| 22 | debugfs_premounted = 0; | ||
| 23 | debugfs_umount(); | ||
| 24 | } | ||
| 25 | |||
| 26 | /* construct a full path to a debugfs element */ | ||
| 27 | int debugfs_make_path(const char *element, char *buffer, int size) | ||
| 28 | { | ||
| 29 | int len; | ||
| 30 | |||
| 31 | if (strlen(debugfs_mountpoint) == 0) { | ||
| 32 | buffer[0] = '\0'; | ||
| 33 | return -1; | ||
| 34 | } | ||
| 35 | |||
| 36 | len = strlen(debugfs_mountpoint) + strlen(element) + 1; | ||
| 37 | if (len >= size) | ||
| 38 | return len+1; | ||
| 39 | |||
| 40 | snprintf(buffer, size-1, "%s/%s", debugfs_mountpoint, element); | ||
| 41 | return 0; | ||
| 42 | } | ||
| 43 | |||
| 44 | static int debugfs_found; | 18 | static int debugfs_found; |
| 45 | 19 | ||
| 46 | /* find the path to the mounted debugfs */ | 20 | /* find the path to the mounted debugfs */ |
| @@ -97,17 +71,6 @@ int debugfs_valid_mountpoint(const char *debugfs) | |||
| 97 | return 0; | 71 | return 0; |
| 98 | } | 72 | } |
| 99 | 73 | ||
| 100 | |||
| 101 | int debugfs_valid_entry(const char *path) | ||
| 102 | { | ||
| 103 | struct stat st; | ||
| 104 | |||
| 105 | if (stat(path, &st)) | ||
| 106 | return -errno; | ||
| 107 | |||
| 108 | return 0; | ||
| 109 | } | ||
| 110 | |||
| 111 | static void debugfs_set_tracing_events_path(const char *mountpoint) | 74 | static void debugfs_set_tracing_events_path(const char *mountpoint) |
| 112 | { | 75 | { |
| 113 | snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", | 76 | snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", |
| @@ -149,107 +112,3 @@ void debugfs_set_path(const char *mountpoint) | |||
| 149 | snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint); | 112 | snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint); |
| 150 | debugfs_set_tracing_events_path(mountpoint); | 113 | debugfs_set_tracing_events_path(mountpoint); |
| 151 | } | 114 | } |
| 152 | |||
| 153 | /* umount the debugfs */ | ||
| 154 | |||
| 155 | int debugfs_umount(void) | ||
| 156 | { | ||
| 157 | char umountcmd[128]; | ||
| 158 | int ret; | ||
| 159 | |||
| 160 | /* if it was already mounted, leave it */ | ||
| 161 | if (debugfs_premounted) | ||
| 162 | return 0; | ||
| 163 | |||
| 164 | /* make sure it's a valid mount point */ | ||
| 165 | ret = debugfs_valid_mountpoint(debugfs_mountpoint); | ||
| 166 | if (ret) | ||
| 167 | return ret; | ||
| 168 | |||
| 169 | snprintf(umountcmd, sizeof(umountcmd), | ||
| 170 | "/bin/umount %s", debugfs_mountpoint); | ||
| 171 | return system(umountcmd); | ||
| 172 | } | ||
| 173 | |||
| 174 | int debugfs_write(const char *entry, const char *value) | ||
| 175 | { | ||
| 176 | char path[PATH_MAX + 1]; | ||
| 177 | int ret, count; | ||
| 178 | int fd; | ||
| 179 | |||
| 180 | /* construct the path */ | ||
| 181 | snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry); | ||
| 182 | |||
| 183 | /* verify that it exists */ | ||
| 184 | ret = debugfs_valid_entry(path); | ||
| 185 | if (ret) | ||
| 186 | return ret; | ||
| 187 | |||
| 188 | /* get how many chars we're going to write */ | ||
| 189 | count = strlen(value); | ||
| 190 | |||
| 191 | /* open the debugfs entry */ | ||
| 192 | fd = open(path, O_RDWR); | ||
| 193 | if (fd < 0) | ||
| 194 | return -errno; | ||
| 195 | |||
| 196 | while (count > 0) { | ||
| 197 | /* write it */ | ||
| 198 | ret = write(fd, value, count); | ||
| 199 | if (ret <= 0) { | ||
| 200 | if (ret == EAGAIN) | ||
| 201 | continue; | ||
| 202 | close(fd); | ||
| 203 | return -errno; | ||
| 204 | } | ||
| 205 | count -= ret; | ||
| 206 | } | ||
| 207 | |||
| 208 | /* close it */ | ||
| 209 | close(fd); | ||
| 210 | |||
| 211 | /* return success */ | ||
| 212 | return 0; | ||
| 213 | } | ||
| 214 | |||
| 215 | /* | ||
| 216 | * read a debugfs entry | ||
| 217 | * returns the number of chars read or a negative errno | ||
| 218 | */ | ||
| 219 | int debugfs_read(const char *entry, char *buffer, size_t size) | ||
| 220 | { | ||
| 221 | char path[PATH_MAX + 1]; | ||
| 222 | int ret; | ||
| 223 | int fd; | ||
| 224 | |||
| 225 | /* construct the path */ | ||
| 226 | snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry); | ||
| 227 | |||
| 228 | /* verify that it exists */ | ||
| 229 | ret = debugfs_valid_entry(path); | ||
| 230 | if (ret) | ||
| 231 | return ret; | ||
| 232 | |||
| 233 | /* open the debugfs entry */ | ||
| 234 | fd = open(path, O_RDONLY); | ||
| 235 | if (fd < 0) | ||
| 236 | return -errno; | ||
| 237 | |||
| 238 | do { | ||
| 239 | /* read it */ | ||
| 240 | ret = read(fd, buffer, size); | ||
| 241 | if (ret == 0) { | ||
| 242 | close(fd); | ||
| 243 | return EOF; | ||
| 244 | } | ||
| 245 | } while (ret < 0 && errno == EAGAIN); | ||
| 246 | |||
| 247 | /* close it */ | ||
| 248 | close(fd); | ||
| 249 | |||
| 250 | /* make *sure* there's a null character at the end */ | ||
| 251 | buffer[ret] = '\0'; | ||
| 252 | |||
| 253 | /* return the number of chars read */ | ||
| 254 | return ret; | ||
| 255 | } | ||
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h index 4a878f735eb0..68f3e87ec57f 100644 --- a/tools/perf/util/debugfs.h +++ b/tools/perf/util/debugfs.h | |||
| @@ -3,14 +3,8 @@ | |||
| 3 | 3 | ||
| 4 | const char *debugfs_find_mountpoint(void); | 4 | const char *debugfs_find_mountpoint(void); |
| 5 | int debugfs_valid_mountpoint(const char *debugfs); | 5 | int debugfs_valid_mountpoint(const char *debugfs); |
| 6 | int debugfs_valid_entry(const char *path); | ||
| 7 | char *debugfs_mount(const char *mountpoint); | 6 | char *debugfs_mount(const char *mountpoint); |
| 8 | int debugfs_umount(void); | ||
| 9 | void debugfs_set_path(const char *mountpoint); | 7 | void debugfs_set_path(const char *mountpoint); |
| 10 | int debugfs_write(const char *entry, const char *value); | ||
| 11 | int debugfs_read(const char *entry, char *buffer, size_t size); | ||
| 12 | void debugfs_force_cleanup(void); | ||
| 13 | int debugfs_make_path(const char *element, char *buffer, int size); | ||
| 14 | 8 | ||
| 15 | extern char debugfs_mountpoint[]; | 9 | extern char debugfs_mountpoint[]; |
| 16 | extern char tracing_events_path[]; | 10 | extern char tracing_events_path[]; |
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index cbdeaad9c5e5..1b197280c621 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h | |||
| @@ -81,6 +81,7 @@ struct perf_sample { | |||
| 81 | u32 raw_size; | 81 | u32 raw_size; |
| 82 | void *raw_data; | 82 | void *raw_data; |
| 83 | struct ip_callchain *callchain; | 83 | struct ip_callchain *callchain; |
| 84 | struct branch_stack *branch_stack; | ||
| 84 | }; | 85 | }; |
| 85 | 86 | ||
| 86 | #define BUILD_ID_SIZE 20 | 87 | #define BUILD_ID_SIZE 20 |
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ea32a061f1c8..159263d17c2d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c | |||
| @@ -97,9 +97,9 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) | |||
| 97 | ++evlist->nr_entries; | 97 | ++evlist->nr_entries; |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | static void perf_evlist__splice_list_tail(struct perf_evlist *evlist, | 100 | void perf_evlist__splice_list_tail(struct perf_evlist *evlist, |
| 101 | struct list_head *list, | 101 | struct list_head *list, |
| 102 | int nr_entries) | 102 | int nr_entries) |
| 103 | { | 103 | { |
| 104 | list_splice_tail(list, &evlist->entries); | 104 | list_splice_tail(list, &evlist->entries); |
| 105 | evlist->nr_entries += nr_entries; | 105 | evlist->nr_entries += nr_entries; |
| @@ -597,15 +597,15 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, | |||
| 597 | return perf_evlist__mmap_per_cpu(evlist, prot, mask); | 597 | return perf_evlist__mmap_per_cpu(evlist, prot, mask); |
| 598 | } | 598 | } |
| 599 | 599 | ||
| 600 | int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, | 600 | int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid, |
| 601 | pid_t target_tid, const char *cpu_list) | 601 | const char *target_tid, uid_t uid, const char *cpu_list) |
| 602 | { | 602 | { |
| 603 | evlist->threads = thread_map__new(target_pid, target_tid); | 603 | evlist->threads = thread_map__new_str(target_pid, target_tid, uid); |
| 604 | 604 | ||
| 605 | if (evlist->threads == NULL) | 605 | if (evlist->threads == NULL) |
| 606 | return -1; | 606 | return -1; |
| 607 | 607 | ||
| 608 | if (cpu_list == NULL && target_tid != -1) | 608 | if (uid != UINT_MAX || (cpu_list == NULL && target_tid)) |
| 609 | evlist->cpus = cpu_map__dummy_new(); | 609 | evlist->cpus = cpu_map__dummy_new(); |
| 610 | else | 610 | else |
| 611 | evlist->cpus = cpu_map__new(cpu_list); | 611 | evlist->cpus = cpu_map__new(cpu_list); |
| @@ -765,6 +765,7 @@ out_err: | |||
| 765 | list_for_each_entry_reverse(evsel, &evlist->entries, node) | 765 | list_for_each_entry_reverse(evsel, &evlist->entries, node) |
| 766 | perf_evsel__close(evsel, ncpus, nthreads); | 766 | perf_evsel__close(evsel, ncpus, nthreads); |
| 767 | 767 | ||
| 768 | errno = -err; | ||
| 768 | return err; | 769 | return err; |
| 769 | } | 770 | } |
| 770 | 771 | ||
| @@ -824,7 +825,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, | |||
| 824 | exit(-1); | 825 | exit(-1); |
| 825 | } | 826 | } |
| 826 | 827 | ||
| 827 | if (!opts->system_wide && opts->target_tid == -1 && opts->target_pid == -1) | 828 | if (!opts->system_wide && !opts->target_tid && !opts->target_pid) |
| 828 | evlist->threads->map[0] = evlist->workload.pid; | 829 | evlist->threads->map[0] = evlist->workload.pid; |
| 829 | 830 | ||
| 830 | close(child_ready_pipe[1]); | 831 | close(child_ready_pipe[1]); |
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 8922aeed0467..21f1c9e57f13 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h | |||
| @@ -106,8 +106,8 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, | |||
| 106 | evlist->threads = threads; | 106 | evlist->threads = threads; |
| 107 | } | 107 | } |
| 108 | 108 | ||
| 109 | int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, | 109 | int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid, |
| 110 | pid_t target_tid, const char *cpu_list); | 110 | const char *tid, uid_t uid, const char *cpu_list); |
| 111 | void perf_evlist__delete_maps(struct perf_evlist *evlist); | 111 | void perf_evlist__delete_maps(struct perf_evlist *evlist); |
| 112 | int perf_evlist__set_filters(struct perf_evlist *evlist); | 112 | int perf_evlist__set_filters(struct perf_evlist *evlist); |
| 113 | 113 | ||
| @@ -117,4 +117,9 @@ u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist); | |||
| 117 | 117 | ||
| 118 | bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); | 118 | bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); |
| 119 | bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); | 119 | bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); |
| 120 | |||
| 121 | void perf_evlist__splice_list_tail(struct perf_evlist *evlist, | ||
| 122 | struct list_head *list, | ||
| 123 | int nr_entries); | ||
| 124 | |||
| 120 | #endif /* __PERF_EVLIST_H */ | 125 | #endif /* __PERF_EVLIST_H */ |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 7132ee834e0e..f421f7cbc0d3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
| @@ -68,7 +68,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) | |||
| 68 | struct perf_event_attr *attr = &evsel->attr; | 68 | struct perf_event_attr *attr = &evsel->attr; |
| 69 | int track = !evsel->idx; /* only the first counter needs these */ | 69 | int track = !evsel->idx; /* only the first counter needs these */ |
| 70 | 70 | ||
| 71 | attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; | 71 | attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; |
| 72 | attr->inherit = !opts->no_inherit; | 72 | attr->inherit = !opts->no_inherit; |
| 73 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 73 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
| 74 | PERF_FORMAT_TOTAL_TIME_RUNNING | | 74 | PERF_FORMAT_TOTAL_TIME_RUNNING | |
| @@ -111,7 +111,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) | |||
| 111 | if (opts->period) | 111 | if (opts->period) |
| 112 | attr->sample_type |= PERF_SAMPLE_PERIOD; | 112 | attr->sample_type |= PERF_SAMPLE_PERIOD; |
| 113 | 113 | ||
| 114 | if (opts->sample_id_all_avail && | 114 | if (!opts->sample_id_all_missing && |
| 115 | (opts->sample_time || opts->system_wide || | 115 | (opts->sample_time || opts->system_wide || |
| 116 | !opts->no_inherit || opts->cpu_list)) | 116 | !opts->no_inherit || opts->cpu_list)) |
| 117 | attr->sample_type |= PERF_SAMPLE_TIME; | 117 | attr->sample_type |= PERF_SAMPLE_TIME; |
| @@ -126,11 +126,15 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) | |||
| 126 | attr->watermark = 0; | 126 | attr->watermark = 0; |
| 127 | attr->wakeup_events = 1; | 127 | attr->wakeup_events = 1; |
| 128 | } | 128 | } |
| 129 | if (opts->branch_stack) { | ||
| 130 | attr->sample_type |= PERF_SAMPLE_BRANCH_STACK; | ||
| 131 | attr->branch_sample_type = opts->branch_stack; | ||
| 132 | } | ||
| 129 | 133 | ||
| 130 | attr->mmap = track; | 134 | attr->mmap = track; |
| 131 | attr->comm = track; | 135 | attr->comm = track; |
| 132 | 136 | ||
| 133 | if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) { | 137 | if (!opts->target_pid && !opts->target_tid && !opts->system_wide) { |
| 134 | attr->disabled = 1; | 138 | attr->disabled = 1; |
| 135 | attr->enable_on_exec = 1; | 139 | attr->enable_on_exec = 1; |
| 136 | } | 140 | } |
| @@ -536,7 +540,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, | |||
| 536 | } | 540 | } |
| 537 | 541 | ||
| 538 | if (type & PERF_SAMPLE_READ) { | 542 | if (type & PERF_SAMPLE_READ) { |
| 539 | fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n"); | 543 | fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n"); |
| 540 | return -1; | 544 | return -1; |
| 541 | } | 545 | } |
| 542 | 546 | ||
| @@ -576,6 +580,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, | |||
| 576 | data->raw_data = (void *) pdata; | 580 | data->raw_data = (void *) pdata; |
| 577 | } | 581 | } |
| 578 | 582 | ||
| 583 | if (type & PERF_SAMPLE_BRANCH_STACK) { | ||
| 584 | u64 sz; | ||
| 585 | |||
| 586 | data->branch_stack = (struct branch_stack *)array; | ||
| 587 | array++; /* nr */ | ||
| 588 | |||
| 589 | sz = data->branch_stack->nr * sizeof(struct branch_entry); | ||
| 590 | sz /= sizeof(u64); | ||
| 591 | array += sz; | ||
| 592 | } | ||
| 579 | return 0; | 593 | return 0; |
| 580 | } | 594 | } |
| 581 | 595 | ||
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 14bb035c5fd9..fcd9cf3ea63e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
| @@ -63,9 +63,20 @@ char *perf_header__find_event(u64 id) | |||
| 63 | return NULL; | 63 | return NULL; |
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | static const char *__perf_magic = "PERFFILE"; | 66 | /* |
| 67 | * magic2 = "PERFILE2" | ||
| 68 | * must be a numerical value to let the endianness | ||
| 69 | * determine the memory layout. That way we are able | ||
| 70 | * to detect endianness when reading the perf.data file | ||
| 71 | * back. | ||
| 72 | * | ||
| 73 | * we check for legacy (PERFFILE) format. | ||
| 74 | */ | ||
| 75 | static const char *__perf_magic1 = "PERFFILE"; | ||
| 76 | static const u64 __perf_magic2 = 0x32454c4946524550ULL; | ||
| 77 | static const u64 __perf_magic2_sw = 0x50455246494c4532ULL; | ||
| 67 | 78 | ||
| 68 | #define PERF_MAGIC (*(u64 *)__perf_magic) | 79 | #define PERF_MAGIC __perf_magic2 |
| 69 | 80 | ||
| 70 | struct perf_file_attr { | 81 | struct perf_file_attr { |
| 71 | struct perf_event_attr attr; | 82 | struct perf_event_attr attr; |
| @@ -1012,6 +1023,12 @@ write_it: | |||
| 1012 | return do_write_string(fd, buffer); | 1023 | return do_write_string(fd, buffer); |
| 1013 | } | 1024 | } |
| 1014 | 1025 | ||
| 1026 | static int write_branch_stack(int fd __used, struct perf_header *h __used, | ||
| 1027 | struct perf_evlist *evlist __used) | ||
| 1028 | { | ||
| 1029 | return 0; | ||
| 1030 | } | ||
| 1031 | |||
| 1015 | static void print_hostname(struct perf_header *ph, int fd, FILE *fp) | 1032 | static void print_hostname(struct perf_header *ph, int fd, FILE *fp) |
| 1016 | { | 1033 | { |
| 1017 | char *str = do_read_string(fd, ph); | 1034 | char *str = do_read_string(fd, ph); |
| @@ -1133,8 +1150,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) | |||
| 1133 | uint64_t id; | 1150 | uint64_t id; |
| 1134 | void *buf = NULL; | 1151 | void *buf = NULL; |
| 1135 | char *str; | 1152 | char *str; |
| 1136 | u32 nre, sz, nr, i, j, msz; | 1153 | u32 nre, sz, nr, i, j; |
| 1137 | int ret; | 1154 | ssize_t ret; |
| 1155 | size_t msz; | ||
| 1138 | 1156 | ||
| 1139 | /* number of events */ | 1157 | /* number of events */ |
| 1140 | ret = read(fd, &nre, sizeof(nre)); | 1158 | ret = read(fd, &nre, sizeof(nre)); |
| @@ -1151,25 +1169,23 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) | |||
| 1151 | if (ph->needs_swap) | 1169 | if (ph->needs_swap) |
| 1152 | sz = bswap_32(sz); | 1170 | sz = bswap_32(sz); |
| 1153 | 1171 | ||
| 1154 | /* | ||
| 1155 | * ensure it is at least to our ABI rev | ||
| 1156 | */ | ||
| 1157 | if (sz < (u32)sizeof(attr)) | ||
| 1158 | goto error; | ||
| 1159 | |||
| 1160 | memset(&attr, 0, sizeof(attr)); | 1172 | memset(&attr, 0, sizeof(attr)); |
| 1161 | 1173 | ||
| 1162 | /* read entire region to sync up to next field */ | 1174 | /* buffer to hold on file attr struct */ |
| 1163 | buf = malloc(sz); | 1175 | buf = malloc(sz); |
| 1164 | if (!buf) | 1176 | if (!buf) |
| 1165 | goto error; | 1177 | goto error; |
| 1166 | 1178 | ||
| 1167 | msz = sizeof(attr); | 1179 | msz = sizeof(attr); |
| 1168 | if (sz < msz) | 1180 | if (sz < (ssize_t)msz) |
| 1169 | msz = sz; | 1181 | msz = sz; |
| 1170 | 1182 | ||
| 1171 | for (i = 0 ; i < nre; i++) { | 1183 | for (i = 0 ; i < nre; i++) { |
| 1172 | 1184 | ||
| 1185 | /* | ||
| 1186 | * must read entire on-file attr struct to | ||
| 1187 | * sync up with layout. | ||
| 1188 | */ | ||
| 1173 | ret = read(fd, buf, sz); | 1189 | ret = read(fd, buf, sz); |
| 1174 | if (ret != (ssize_t)sz) | 1190 | if (ret != (ssize_t)sz) |
| 1175 | goto error; | 1191 | goto error; |
| @@ -1305,25 +1321,204 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) | |||
| 1305 | free(str); | 1321 | free(str); |
| 1306 | } | 1322 | } |
| 1307 | 1323 | ||
| 1324 | static void print_branch_stack(struct perf_header *ph __used, int fd __used, | ||
| 1325 | FILE *fp) | ||
| 1326 | { | ||
| 1327 | fprintf(fp, "# contains samples with branch stack\n"); | ||
| 1328 | } | ||
| 1329 | |||
| 1330 | static int __event_process_build_id(struct build_id_event *bev, | ||
| 1331 | char *filename, | ||
| 1332 | struct perf_session *session) | ||
| 1333 | { | ||
| 1334 | int err = -1; | ||
| 1335 | struct list_head *head; | ||
| 1336 | struct machine *machine; | ||
| 1337 | u16 misc; | ||
| 1338 | struct dso *dso; | ||
| 1339 | enum dso_kernel_type dso_type; | ||
| 1340 | |||
| 1341 | machine = perf_session__findnew_machine(session, bev->pid); | ||
| 1342 | if (!machine) | ||
| 1343 | goto out; | ||
| 1344 | |||
| 1345 | misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | ||
| 1346 | |||
| 1347 | switch (misc) { | ||
| 1348 | case PERF_RECORD_MISC_KERNEL: | ||
| 1349 | dso_type = DSO_TYPE_KERNEL; | ||
| 1350 | head = &machine->kernel_dsos; | ||
| 1351 | break; | ||
| 1352 | case PERF_RECORD_MISC_GUEST_KERNEL: | ||
| 1353 | dso_type = DSO_TYPE_GUEST_KERNEL; | ||
| 1354 | head = &machine->kernel_dsos; | ||
| 1355 | break; | ||
| 1356 | case PERF_RECORD_MISC_USER: | ||
| 1357 | case PERF_RECORD_MISC_GUEST_USER: | ||
| 1358 | dso_type = DSO_TYPE_USER; | ||
| 1359 | head = &machine->user_dsos; | ||
| 1360 | break; | ||
| 1361 | default: | ||
| 1362 | goto out; | ||
| 1363 | } | ||
| 1364 | |||
| 1365 | dso = __dsos__findnew(head, filename); | ||
| 1366 | if (dso != NULL) { | ||
| 1367 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; | ||
| 1368 | |||
| 1369 | dso__set_build_id(dso, &bev->build_id); | ||
| 1370 | |||
| 1371 | if (filename[0] == '[') | ||
| 1372 | dso->kernel = dso_type; | ||
| 1373 | |||
| 1374 | build_id__sprintf(dso->build_id, sizeof(dso->build_id), | ||
| 1375 | sbuild_id); | ||
| 1376 | pr_debug("build id event received for %s: %s\n", | ||
| 1377 | dso->long_name, sbuild_id); | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | err = 0; | ||
| 1381 | out: | ||
| 1382 | return err; | ||
| 1383 | } | ||
| 1384 | |||
| 1385 | static int perf_header__read_build_ids_abi_quirk(struct perf_header *header, | ||
| 1386 | int input, u64 offset, u64 size) | ||
| 1387 | { | ||
| 1388 | struct perf_session *session = container_of(header, struct perf_session, header); | ||
| 1389 | struct { | ||
| 1390 | struct perf_event_header header; | ||
| 1391 | u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; | ||
| 1392 | char filename[0]; | ||
| 1393 | } old_bev; | ||
| 1394 | struct build_id_event bev; | ||
| 1395 | char filename[PATH_MAX]; | ||
| 1396 | u64 limit = offset + size; | ||
| 1397 | |||
| 1398 | while (offset < limit) { | ||
| 1399 | ssize_t len; | ||
| 1400 | |||
| 1401 | if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev)) | ||
| 1402 | return -1; | ||
| 1403 | |||
| 1404 | if (header->needs_swap) | ||
| 1405 | perf_event_header__bswap(&old_bev.header); | ||
| 1406 | |||
| 1407 | len = old_bev.header.size - sizeof(old_bev); | ||
| 1408 | if (read(input, filename, len) != len) | ||
| 1409 | return -1; | ||
| 1410 | |||
| 1411 | bev.header = old_bev.header; | ||
| 1412 | |||
| 1413 | /* | ||
| 1414 | * As the pid is the missing value, we need to fill | ||
| 1415 | * it properly. The header.misc value give us nice hint. | ||
| 1416 | */ | ||
| 1417 | bev.pid = HOST_KERNEL_ID; | ||
| 1418 | if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER || | ||
| 1419 | bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL) | ||
| 1420 | bev.pid = DEFAULT_GUEST_KERNEL_ID; | ||
| 1421 | |||
| 1422 | memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id)); | ||
| 1423 | __event_process_build_id(&bev, filename, session); | ||
| 1424 | |||
| 1425 | offset += bev.header.size; | ||
| 1426 | } | ||
| 1427 | |||
| 1428 | return 0; | ||
| 1429 | } | ||
| 1430 | |||
| 1431 | static int perf_header__read_build_ids(struct perf_header *header, | ||
| 1432 | int input, u64 offset, u64 size) | ||
| 1433 | { | ||
| 1434 | struct perf_session *session = container_of(header, struct perf_session, header); | ||
| 1435 | struct build_id_event bev; | ||
| 1436 | char filename[PATH_MAX]; | ||
| 1437 | u64 limit = offset + size, orig_offset = offset; | ||
| 1438 | int err = -1; | ||
| 1439 | |||
| 1440 | while (offset < limit) { | ||
| 1441 | ssize_t len; | ||
| 1442 | |||
| 1443 | if (read(input, &bev, sizeof(bev)) != sizeof(bev)) | ||
| 1444 | goto out; | ||
| 1445 | |||
| 1446 | if (header->needs_swap) | ||
| 1447 | perf_event_header__bswap(&bev.header); | ||
| 1448 | |||
| 1449 | len = bev.header.size - sizeof(bev); | ||
| 1450 | if (read(input, filename, len) != len) | ||
| 1451 | goto out; | ||
| 1452 | /* | ||
| 1453 | * The a1645ce1 changeset: | ||
| 1454 | * | ||
| 1455 | * "perf: 'perf kvm' tool for monitoring guest performance from host" | ||
| 1456 | * | ||
| 1457 | * Added a field to struct build_id_event that broke the file | ||
| 1458 | * format. | ||
| 1459 | * | ||
| 1460 | * Since the kernel build-id is the first entry, process the | ||
| 1461 | * table using the old format if the well known | ||
| 1462 | * '[kernel.kallsyms]' string for the kernel build-id has the | ||
| 1463 | * first 4 characters chopped off (where the pid_t sits). | ||
| 1464 | */ | ||
| 1465 | if (memcmp(filename, "nel.kallsyms]", 13) == 0) { | ||
| 1466 | if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1) | ||
| 1467 | return -1; | ||
| 1468 | return perf_header__read_build_ids_abi_quirk(header, input, offset, size); | ||
| 1469 | } | ||
| 1470 | |||
| 1471 | __event_process_build_id(&bev, filename, session); | ||
| 1472 | |||
| 1473 | offset += bev.header.size; | ||
| 1474 | } | ||
| 1475 | err = 0; | ||
| 1476 | out: | ||
| 1477 | return err; | ||
| 1478 | } | ||
| 1479 | |||
| 1480 | static int process_trace_info(struct perf_file_section *section __unused, | ||
| 1481 | struct perf_header *ph __unused, | ||
| 1482 | int feat __unused, int fd) | ||
| 1483 | { | ||
| 1484 | trace_report(fd, false); | ||
| 1485 | return 0; | ||
| 1486 | } | ||
| 1487 | |||
| 1488 | static int process_build_id(struct perf_file_section *section, | ||
| 1489 | struct perf_header *ph, | ||
| 1490 | int feat __unused, int fd) | ||
| 1491 | { | ||
| 1492 | if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) | ||
| 1493 | pr_debug("Failed to read buildids, continuing...\n"); | ||
| 1494 | return 0; | ||
| 1495 | } | ||
| 1496 | |||
| 1308 | struct feature_ops { | 1497 | struct feature_ops { |
| 1309 | int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); | 1498 | int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); |
| 1310 | void (*print)(struct perf_header *h, int fd, FILE *fp); | 1499 | void (*print)(struct perf_header *h, int fd, FILE *fp); |
| 1500 | int (*process)(struct perf_file_section *section, | ||
| 1501 | struct perf_header *h, int feat, int fd); | ||
| 1311 | const char *name; | 1502 | const char *name; |
| 1312 | bool full_only; | 1503 | bool full_only; |
| 1313 | }; | 1504 | }; |
| 1314 | 1505 | ||
| 1315 | #define FEAT_OPA(n, func) \ | 1506 | #define FEAT_OPA(n, func) \ |
| 1316 | [n] = { .name = #n, .write = write_##func, .print = print_##func } | 1507 | [n] = { .name = #n, .write = write_##func, .print = print_##func } |
| 1508 | #define FEAT_OPP(n, func) \ | ||
| 1509 | [n] = { .name = #n, .write = write_##func, .print = print_##func, \ | ||
| 1510 | .process = process_##func } | ||
| 1317 | #define FEAT_OPF(n, func) \ | 1511 | #define FEAT_OPF(n, func) \ |
| 1318 | [n] = { .name = #n, .write = write_##func, .print = print_##func, .full_only = true } | 1512 | [n] = { .name = #n, .write = write_##func, .print = print_##func, \ |
| 1513 | .full_only = true } | ||
| 1319 | 1514 | ||
| 1320 | /* feature_ops not implemented: */ | 1515 | /* feature_ops not implemented: */ |
| 1321 | #define print_trace_info NULL | 1516 | #define print_trace_info NULL |
| 1322 | #define print_build_id NULL | 1517 | #define print_build_id NULL |
| 1323 | 1518 | ||
| 1324 | static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { | 1519 | static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { |
| 1325 | FEAT_OPA(HEADER_TRACE_INFO, trace_info), | 1520 | FEAT_OPP(HEADER_TRACE_INFO, trace_info), |
| 1326 | FEAT_OPA(HEADER_BUILD_ID, build_id), | 1521 | FEAT_OPP(HEADER_BUILD_ID, build_id), |
| 1327 | FEAT_OPA(HEADER_HOSTNAME, hostname), | 1522 | FEAT_OPA(HEADER_HOSTNAME, hostname), |
| 1328 | FEAT_OPA(HEADER_OSRELEASE, osrelease), | 1523 | FEAT_OPA(HEADER_OSRELEASE, osrelease), |
| 1329 | FEAT_OPA(HEADER_VERSION, version), | 1524 | FEAT_OPA(HEADER_VERSION, version), |
| @@ -1336,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { | |||
| 1336 | FEAT_OPA(HEADER_CMDLINE, cmdline), | 1531 | FEAT_OPA(HEADER_CMDLINE, cmdline), |
| 1337 | FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), | 1532 | FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), |
| 1338 | FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), | 1533 | FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), |
| 1534 | FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), | ||
| 1339 | }; | 1535 | }; |
| 1340 | 1536 | ||
| 1341 | struct header_print_data { | 1537 | struct header_print_data { |
| @@ -1620,24 +1816,128 @@ out_free: | |||
| 1620 | return err; | 1816 | return err; |
| 1621 | } | 1817 | } |
| 1622 | 1818 | ||
| 1819 | static const int attr_file_abi_sizes[] = { | ||
| 1820 | [0] = PERF_ATTR_SIZE_VER0, | ||
| 1821 | [1] = PERF_ATTR_SIZE_VER1, | ||
| 1822 | 0, | ||
| 1823 | }; | ||
| 1824 | |||
| 1825 | /* | ||
| 1826 | * In the legacy file format, the magic number is not used to encode endianness. | ||
| 1827 | * hdr_sz was used to encode endianness. But given that hdr_sz can vary based | ||
| 1828 | * on ABI revisions, we need to try all combinations for all endianness to | ||
| 1829 | * detect the endianness. | ||
| 1830 | */ | ||
| 1831 | static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph) | ||
| 1832 | { | ||
| 1833 | uint64_t ref_size, attr_size; | ||
| 1834 | int i; | ||
| 1835 | |||
| 1836 | for (i = 0 ; attr_file_abi_sizes[i]; i++) { | ||
| 1837 | ref_size = attr_file_abi_sizes[i] | ||
| 1838 | + sizeof(struct perf_file_section); | ||
| 1839 | if (hdr_sz != ref_size) { | ||
| 1840 | attr_size = bswap_64(hdr_sz); | ||
| 1841 | if (attr_size != ref_size) | ||
| 1842 | continue; | ||
| 1843 | |||
| 1844 | ph->needs_swap = true; | ||
| 1845 | } | ||
| 1846 | pr_debug("ABI%d perf.data file detected, need_swap=%d\n", | ||
| 1847 | i, | ||
| 1848 | ph->needs_swap); | ||
| 1849 | return 0; | ||
| 1850 | } | ||
| 1851 | /* could not determine endianness */ | ||
| 1852 | return -1; | ||
| 1853 | } | ||
| 1854 | |||
| 1855 | #define PERF_PIPE_HDR_VER0 16 | ||
| 1856 | |||
| 1857 | static const size_t attr_pipe_abi_sizes[] = { | ||
| 1858 | [0] = PERF_PIPE_HDR_VER0, | ||
| 1859 | 0, | ||
| 1860 | }; | ||
| 1861 | |||
| 1862 | /* | ||
| 1863 | * In the legacy pipe format, there is an implicit assumption that endiannesss | ||
| 1864 | * between host recording the samples, and host parsing the samples is the | ||
| 1865 | * same. This is not always the case given that the pipe output may always be | ||
| 1866 | * redirected into a file and analyzed on a different machine with possibly a | ||
| 1867 | * different endianness and perf_event ABI revsions in the perf tool itself. | ||
| 1868 | */ | ||
| 1869 | static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph) | ||
| 1870 | { | ||
| 1871 | u64 attr_size; | ||
| 1872 | int i; | ||
| 1873 | |||
| 1874 | for (i = 0 ; attr_pipe_abi_sizes[i]; i++) { | ||
| 1875 | if (hdr_sz != attr_pipe_abi_sizes[i]) { | ||
| 1876 | attr_size = bswap_64(hdr_sz); | ||
| 1877 | if (attr_size != hdr_sz) | ||
| 1878 | continue; | ||
| 1879 | |||
| 1880 | ph->needs_swap = true; | ||
| 1881 | } | ||
| 1882 | pr_debug("Pipe ABI%d perf.data file detected\n", i); | ||
| 1883 | return 0; | ||
| 1884 | } | ||
| 1885 | return -1; | ||
| 1886 | } | ||
| 1887 | |||
| 1888 | static int check_magic_endian(u64 magic, uint64_t hdr_sz, | ||
| 1889 | bool is_pipe, struct perf_header *ph) | ||
| 1890 | { | ||
| 1891 | int ret; | ||
| 1892 | |||
| 1893 | /* check for legacy format */ | ||
| 1894 | ret = memcmp(&magic, __perf_magic1, sizeof(magic)); | ||
| 1895 | if (ret == 0) { | ||
| 1896 | pr_debug("legacy perf.data format\n"); | ||
| 1897 | if (is_pipe) | ||
| 1898 | return try_all_pipe_abis(hdr_sz, ph); | ||
| 1899 | |||
| 1900 | return try_all_file_abis(hdr_sz, ph); | ||
| 1901 | } | ||
| 1902 | /* | ||
| 1903 | * the new magic number serves two purposes: | ||
| 1904 | * - unique number to identify actual perf.data files | ||
| 1905 | * - encode endianness of file | ||
| 1906 | */ | ||
| 1907 | |||
| 1908 | /* check magic number with one endianness */ | ||
| 1909 | if (magic == __perf_magic2) | ||
| 1910 | return 0; | ||
| 1911 | |||
| 1912 | /* check magic number with opposite endianness */ | ||
| 1913 | if (magic != __perf_magic2_sw) | ||
| 1914 | return -1; | ||
| 1915 | |||
| 1916 | ph->needs_swap = true; | ||
| 1917 | |||
| 1918 | return 0; | ||
| 1919 | } | ||
| 1920 | |||
| 1623 | int perf_file_header__read(struct perf_file_header *header, | 1921 | int perf_file_header__read(struct perf_file_header *header, |
| 1624 | struct perf_header *ph, int fd) | 1922 | struct perf_header *ph, int fd) |
| 1625 | { | 1923 | { |
| 1924 | int ret; | ||
| 1925 | |||
| 1626 | lseek(fd, 0, SEEK_SET); | 1926 | lseek(fd, 0, SEEK_SET); |
| 1627 | 1927 | ||
| 1628 | if (readn(fd, header, sizeof(*header)) <= 0 || | 1928 | ret = readn(fd, header, sizeof(*header)); |
| 1629 | memcmp(&header->magic, __perf_magic, sizeof(header->magic))) | 1929 | if (ret <= 0) |
| 1630 | return -1; | 1930 | return -1; |
| 1631 | 1931 | ||
| 1632 | if (header->attr_size != sizeof(struct perf_file_attr)) { | 1932 | if (check_magic_endian(header->magic, |
| 1633 | u64 attr_size = bswap_64(header->attr_size); | 1933 | header->attr_size, false, ph) < 0) { |
| 1634 | 1934 | pr_debug("magic/endian check failed\n"); | |
| 1635 | if (attr_size != sizeof(struct perf_file_attr)) | 1935 | return -1; |
| 1636 | return -1; | 1936 | } |
| 1637 | 1937 | ||
| 1938 | if (ph->needs_swap) { | ||
| 1638 | mem_bswap_64(header, offsetof(struct perf_file_header, | 1939 | mem_bswap_64(header, offsetof(struct perf_file_header, |
| 1639 | adds_features)); | 1940 | adds_features)); |
| 1640 | ph->needs_swap = true; | ||
| 1641 | } | 1941 | } |
| 1642 | 1942 | ||
| 1643 | if (header->size != sizeof(*header)) { | 1943 | if (header->size != sizeof(*header)) { |
| @@ -1689,156 +1989,6 @@ int perf_file_header__read(struct perf_file_header *header, | |||
| 1689 | return 0; | 1989 | return 0; |
| 1690 | } | 1990 | } |
| 1691 | 1991 | ||
| 1692 | static int __event_process_build_id(struct build_id_event *bev, | ||
| 1693 | char *filename, | ||
| 1694 | struct perf_session *session) | ||
| 1695 | { | ||
| 1696 | int err = -1; | ||
| 1697 | struct list_head *head; | ||
| 1698 | struct machine *machine; | ||
| 1699 | u16 misc; | ||
| 1700 | struct dso *dso; | ||
| 1701 | enum dso_kernel_type dso_type; | ||
| 1702 | |||
| 1703 | machine = perf_session__findnew_machine(session, bev->pid); | ||
| 1704 | if (!machine) | ||
| 1705 | goto out; | ||
| 1706 | |||
| 1707 | misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | ||
| 1708 | |||
| 1709 | switch (misc) { | ||
| 1710 | case PERF_RECORD_MISC_KERNEL: | ||
| 1711 | dso_type = DSO_TYPE_KERNEL; | ||
| 1712 | head = &machine->kernel_dsos; | ||
| 1713 | break; | ||
| 1714 | case PERF_RECORD_MISC_GUEST_KERNEL: | ||
| 1715 | dso_type = DSO_TYPE_GUEST_KERNEL; | ||
| 1716 | head = &machine->kernel_dsos; | ||
| 1717 | break; | ||
| 1718 | case PERF_RECORD_MISC_USER: | ||
| 1719 | case PERF_RECORD_MISC_GUEST_USER: | ||
| 1720 | dso_type = DSO_TYPE_USER; | ||
| 1721 | head = &machine->user_dsos; | ||
| 1722 | break; | ||
| 1723 | default: | ||
| 1724 | goto out; | ||
| 1725 | } | ||
| 1726 | |||
| 1727 | dso = __dsos__findnew(head, filename); | ||
| 1728 | if (dso != NULL) { | ||
| 1729 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; | ||
| 1730 | |||
| 1731 | dso__set_build_id(dso, &bev->build_id); | ||
| 1732 | |||
| 1733 | if (filename[0] == '[') | ||
| 1734 | dso->kernel = dso_type; | ||
| 1735 | |||
| 1736 | build_id__sprintf(dso->build_id, sizeof(dso->build_id), | ||
| 1737 | sbuild_id); | ||
| 1738 | pr_debug("build id event received for %s: %s\n", | ||
| 1739 | dso->long_name, sbuild_id); | ||
| 1740 | } | ||
| 1741 | |||
| 1742 | err = 0; | ||
| 1743 | out: | ||
| 1744 | return err; | ||
| 1745 | } | ||
| 1746 | |||
| 1747 | static int perf_header__read_build_ids_abi_quirk(struct perf_header *header, | ||
| 1748 | int input, u64 offset, u64 size) | ||
| 1749 | { | ||
| 1750 | struct perf_session *session = container_of(header, struct perf_session, header); | ||
| 1751 | struct { | ||
| 1752 | struct perf_event_header header; | ||
| 1753 | u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; | ||
| 1754 | char filename[0]; | ||
| 1755 | } old_bev; | ||
| 1756 | struct build_id_event bev; | ||
| 1757 | char filename[PATH_MAX]; | ||
| 1758 | u64 limit = offset + size; | ||
| 1759 | |||
| 1760 | while (offset < limit) { | ||
| 1761 | ssize_t len; | ||
| 1762 | |||
| 1763 | if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev)) | ||
| 1764 | return -1; | ||
| 1765 | |||
| 1766 | if (header->needs_swap) | ||
| 1767 | perf_event_header__bswap(&old_bev.header); | ||
| 1768 | |||
| 1769 | len = old_bev.header.size - sizeof(old_bev); | ||
| 1770 | if (read(input, filename, len) != len) | ||
| 1771 | return -1; | ||
| 1772 | |||
| 1773 | bev.header = old_bev.header; | ||
| 1774 | |||
| 1775 | /* | ||
| 1776 | * As the pid is the missing value, we need to fill | ||
| 1777 | * it properly. The header.misc value give us nice hint. | ||
| 1778 | */ | ||
| 1779 | bev.pid = HOST_KERNEL_ID; | ||
| 1780 | if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER || | ||
| 1781 | bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL) | ||
| 1782 | bev.pid = DEFAULT_GUEST_KERNEL_ID; | ||
| 1783 | |||
| 1784 | memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id)); | ||
| 1785 | __event_process_build_id(&bev, filename, session); | ||
| 1786 | |||
| 1787 | offset += bev.header.size; | ||
| 1788 | } | ||
| 1789 | |||
| 1790 | return 0; | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | static int perf_header__read_build_ids(struct perf_header *header, | ||
| 1794 | int input, u64 offset, u64 size) | ||
| 1795 | { | ||
| 1796 | struct perf_session *session = container_of(header, struct perf_session, header); | ||
| 1797 | struct build_id_event bev; | ||
| 1798 | char filename[PATH_MAX]; | ||
| 1799 | u64 limit = offset + size, orig_offset = offset; | ||
| 1800 | int err = -1; | ||
| 1801 | |||
| 1802 | while (offset < limit) { | ||
| 1803 | ssize_t len; | ||
| 1804 | |||
| 1805 | if (read(input, &bev, sizeof(bev)) != sizeof(bev)) | ||
| 1806 | goto out; | ||
| 1807 | |||
| 1808 | if (header->needs_swap) | ||
| 1809 | perf_event_header__bswap(&bev.header); | ||
| 1810 | |||
| 1811 | len = bev.header.size - sizeof(bev); | ||
| 1812 | if (read(input, filename, len) != len) | ||
| 1813 | goto out; | ||
| 1814 | /* | ||
| 1815 | * The a1645ce1 changeset: | ||
| 1816 | * | ||
| 1817 | * "perf: 'perf kvm' tool for monitoring guest performance from host" | ||
| 1818 | * | ||
| 1819 | * Added a field to struct build_id_event that broke the file | ||
| 1820 | * format. | ||
| 1821 | * | ||
| 1822 | * Since the kernel build-id is the first entry, process the | ||
| 1823 | * table using the old format if the well known | ||
| 1824 | * '[kernel.kallsyms]' string for the kernel build-id has the | ||
| 1825 | * first 4 characters chopped off (where the pid_t sits). | ||
| 1826 | */ | ||
| 1827 | if (memcmp(filename, "nel.kallsyms]", 13) == 0) { | ||
| 1828 | if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1) | ||
| 1829 | return -1; | ||
| 1830 | return perf_header__read_build_ids_abi_quirk(header, input, offset, size); | ||
| 1831 | } | ||
| 1832 | |||
| 1833 | __event_process_build_id(&bev, filename, session); | ||
| 1834 | |||
| 1835 | offset += bev.header.size; | ||
| 1836 | } | ||
| 1837 | err = 0; | ||
| 1838 | out: | ||
| 1839 | return err; | ||
| 1840 | } | ||
| 1841 | |||
| 1842 | static int perf_file_section__process(struct perf_file_section *section, | 1992 | static int perf_file_section__process(struct perf_file_section *section, |
| 1843 | struct perf_header *ph, | 1993 | struct perf_header *ph, |
| 1844 | int feat, int fd, void *data __used) | 1994 | int feat, int fd, void *data __used) |
| @@ -1854,40 +2004,32 @@ static int perf_file_section__process(struct perf_file_section *section, | |||
| 1854 | return 0; | 2004 | return 0; |
| 1855 | } | 2005 | } |
| 1856 | 2006 | ||
| 1857 | switch (feat) { | 2007 | if (!feat_ops[feat].process) |
| 1858 | case HEADER_TRACE_INFO: | 2008 | return 0; |
| 1859 | trace_report(fd, false); | ||
| 1860 | break; | ||
| 1861 | case HEADER_BUILD_ID: | ||
| 1862 | if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) | ||
| 1863 | pr_debug("Failed to read buildids, continuing...\n"); | ||
| 1864 | break; | ||
| 1865 | default: | ||
| 1866 | break; | ||
| 1867 | } | ||
| 1868 | 2009 | ||
| 1869 | return 0; | 2010 | return feat_ops[feat].process(section, ph, feat, fd); |
| 1870 | } | 2011 | } |
| 1871 | 2012 | ||
| 1872 | static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, | 2013 | static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, |
| 1873 | struct perf_header *ph, int fd, | 2014 | struct perf_header *ph, int fd, |
| 1874 | bool repipe) | 2015 | bool repipe) |
| 1875 | { | 2016 | { |
| 1876 | if (readn(fd, header, sizeof(*header)) <= 0 || | 2017 | int ret; |
| 1877 | memcmp(&header->magic, __perf_magic, sizeof(header->magic))) | ||
| 1878 | return -1; | ||
| 1879 | 2018 | ||
| 1880 | if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) | 2019 | ret = readn(fd, header, sizeof(*header)); |
| 2020 | if (ret <= 0) | ||
| 1881 | return -1; | 2021 | return -1; |
| 1882 | 2022 | ||
| 1883 | if (header->size != sizeof(*header)) { | 2023 | if (check_magic_endian(header->magic, header->size, true, ph) < 0) { |
| 1884 | u64 size = bswap_64(header->size); | 2024 | pr_debug("endian/magic failed\n"); |
| 2025 | return -1; | ||
| 2026 | } | ||
| 1885 | 2027 | ||
| 1886 | if (size != sizeof(*header)) | 2028 | if (ph->needs_swap) |
| 1887 | return -1; | 2029 | header->size = bswap_64(header->size); |
| 1888 | 2030 | ||
| 1889 | ph->needs_swap = true; | 2031 | if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) |
| 1890 | } | 2032 | return -1; |
| 1891 | 2033 | ||
| 1892 | return 0; | 2034 | return 0; |
| 1893 | } | 2035 | } |
| @@ -1908,6 +2050,52 @@ static int perf_header__read_pipe(struct perf_session *session, int fd) | |||
| 1908 | return 0; | 2050 | return 0; |
| 1909 | } | 2051 | } |
| 1910 | 2052 | ||
| 2053 | static int read_attr(int fd, struct perf_header *ph, | ||
| 2054 | struct perf_file_attr *f_attr) | ||
| 2055 | { | ||
| 2056 | struct perf_event_attr *attr = &f_attr->attr; | ||
| 2057 | size_t sz, left; | ||
| 2058 | size_t our_sz = sizeof(f_attr->attr); | ||
| 2059 | int ret; | ||
| 2060 | |||
| 2061 | memset(f_attr, 0, sizeof(*f_attr)); | ||
| 2062 | |||
| 2063 | /* read minimal guaranteed structure */ | ||
| 2064 | ret = readn(fd, attr, PERF_ATTR_SIZE_VER0); | ||
| 2065 | if (ret <= 0) { | ||
| 2066 | pr_debug("cannot read %d bytes of header attr\n", | ||
| 2067 | PERF_ATTR_SIZE_VER0); | ||
| 2068 | return -1; | ||
| 2069 | } | ||
| 2070 | |||
| 2071 | /* on file perf_event_attr size */ | ||
| 2072 | sz = attr->size; | ||
| 2073 | |||
| 2074 | if (ph->needs_swap) | ||
| 2075 | sz = bswap_32(sz); | ||
| 2076 | |||
| 2077 | if (sz == 0) { | ||
| 2078 | /* assume ABI0 */ | ||
| 2079 | sz = PERF_ATTR_SIZE_VER0; | ||
| 2080 | } else if (sz > our_sz) { | ||
| 2081 | pr_debug("file uses a more recent and unsupported ABI" | ||
| 2082 | " (%zu bytes extra)\n", sz - our_sz); | ||
| 2083 | return -1; | ||
| 2084 | } | ||
| 2085 | /* what we have not yet read and that we know about */ | ||
| 2086 | left = sz - PERF_ATTR_SIZE_VER0; | ||
| 2087 | if (left) { | ||
| 2088 | void *ptr = attr; | ||
| 2089 | ptr += PERF_ATTR_SIZE_VER0; | ||
| 2090 | |||
| 2091 | ret = readn(fd, ptr, left); | ||
| 2092 | } | ||
| 2093 | /* read perf_file_section, ids are read in caller */ | ||
| 2094 | ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids)); | ||
| 2095 | |||
| 2096 | return ret <= 0 ? -1 : 0; | ||
| 2097 | } | ||
| 2098 | |||
| 1911 | int perf_session__read_header(struct perf_session *session, int fd) | 2099 | int perf_session__read_header(struct perf_session *session, int fd) |
| 1912 | { | 2100 | { |
| 1913 | struct perf_header *header = &session->header; | 2101 | struct perf_header *header = &session->header; |
| @@ -1923,19 +2111,17 @@ int perf_session__read_header(struct perf_session *session, int fd) | |||
| 1923 | if (session->fd_pipe) | 2111 | if (session->fd_pipe) |
| 1924 | return perf_header__read_pipe(session, fd); | 2112 | return perf_header__read_pipe(session, fd); |
| 1925 | 2113 | ||
| 1926 | if (perf_file_header__read(&f_header, header, fd) < 0) { | 2114 | if (perf_file_header__read(&f_header, header, fd) < 0) |
| 1927 | pr_debug("incompatible file format\n"); | ||
| 1928 | return -EINVAL; | 2115 | return -EINVAL; |
| 1929 | } | ||
| 1930 | 2116 | ||
| 1931 | nr_attrs = f_header.attrs.size / sizeof(f_attr); | 2117 | nr_attrs = f_header.attrs.size / f_header.attr_size; |
| 1932 | lseek(fd, f_header.attrs.offset, SEEK_SET); | 2118 | lseek(fd, f_header.attrs.offset, SEEK_SET); |
| 1933 | 2119 | ||
| 1934 | for (i = 0; i < nr_attrs; i++) { | 2120 | for (i = 0; i < nr_attrs; i++) { |
| 1935 | struct perf_evsel *evsel; | 2121 | struct perf_evsel *evsel; |
| 1936 | off_t tmp; | 2122 | off_t tmp; |
| 1937 | 2123 | ||
| 1938 | if (readn(fd, &f_attr, sizeof(f_attr)) <= 0) | 2124 | if (read_attr(fd, header, &f_attr) < 0) |
| 1939 | goto out_errno; | 2125 | goto out_errno; |
| 1940 | 2126 | ||
| 1941 | if (header->needs_swap) | 2127 | if (header->needs_swap) |
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index ac4ec956024e..21a6be09c129 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | enum { | 12 | enum { |
| 13 | HEADER_RESERVED = 0, /* always cleared */ | 13 | HEADER_RESERVED = 0, /* always cleared */ |
| 14 | HEADER_FIRST_FEATURE = 1, | ||
| 14 | HEADER_TRACE_INFO = 1, | 15 | HEADER_TRACE_INFO = 1, |
| 15 | HEADER_BUILD_ID, | 16 | HEADER_BUILD_ID, |
| 16 | 17 | ||
| @@ -26,7 +27,7 @@ enum { | |||
| 26 | HEADER_EVENT_DESC, | 27 | HEADER_EVENT_DESC, |
| 27 | HEADER_CPU_TOPOLOGY, | 28 | HEADER_CPU_TOPOLOGY, |
| 28 | HEADER_NUMA_TOPOLOGY, | 29 | HEADER_NUMA_TOPOLOGY, |
| 29 | 30 | HEADER_BRANCH_STACK, | |
| 30 | HEADER_LAST_FEATURE, | 31 | HEADER_LAST_FEATURE, |
| 31 | HEADER_FEAT_BITS = 256, | 32 | HEADER_FEAT_BITS = 256, |
| 32 | }; | 33 | }; |
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e11e482bd185..3dc99a9b71f5 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
| @@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists) | |||
| 50 | hists__set_col_len(hists, col, 0); | 50 | hists__set_col_len(hists, col, 0); |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | static void hists__set_unres_dso_col_len(struct hists *hists, int dso) | ||
| 54 | { | ||
| 55 | const unsigned int unresolved_col_width = BITS_PER_LONG / 4; | ||
| 56 | |||
| 57 | if (hists__col_len(hists, dso) < unresolved_col_width && | ||
| 58 | !symbol_conf.col_width_list_str && !symbol_conf.field_sep && | ||
| 59 | !symbol_conf.dso_list) | ||
| 60 | hists__set_col_len(hists, dso, unresolved_col_width); | ||
| 61 | } | ||
| 62 | |||
| 53 | static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) | 63 | static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) |
| 54 | { | 64 | { |
| 65 | const unsigned int unresolved_col_width = BITS_PER_LONG / 4; | ||
| 55 | u16 len; | 66 | u16 len; |
| 56 | 67 | ||
| 57 | if (h->ms.sym) | 68 | if (h->ms.sym) |
| 58 | hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen); | 69 | hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4); |
| 59 | else { | 70 | else |
| 60 | const unsigned int unresolved_col_width = BITS_PER_LONG / 4; | 71 | hists__set_unres_dso_col_len(hists, HISTC_DSO); |
| 61 | |||
| 62 | if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width && | ||
| 63 | !symbol_conf.col_width_list_str && !symbol_conf.field_sep && | ||
| 64 | !symbol_conf.dso_list) | ||
| 65 | hists__set_col_len(hists, HISTC_DSO, | ||
| 66 | unresolved_col_width); | ||
| 67 | } | ||
| 68 | 72 | ||
| 69 | len = thread__comm_len(h->thread); | 73 | len = thread__comm_len(h->thread); |
| 70 | if (hists__new_col_len(hists, HISTC_COMM, len)) | 74 | if (hists__new_col_len(hists, HISTC_COMM, len)) |
| @@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) | |||
| 74 | len = dso__name_len(h->ms.map->dso); | 78 | len = dso__name_len(h->ms.map->dso); |
| 75 | hists__new_col_len(hists, HISTC_DSO, len); | 79 | hists__new_col_len(hists, HISTC_DSO, len); |
| 76 | } | 80 | } |
| 81 | |||
| 82 | if (h->branch_info) { | ||
| 83 | int symlen; | ||
| 84 | /* | ||
| 85 | * +4 accounts for '[x] ' priv level info | ||
| 86 | * +2 account of 0x prefix on raw addresses | ||
| 87 | */ | ||
| 88 | if (h->branch_info->from.sym) { | ||
| 89 | symlen = (int)h->branch_info->from.sym->namelen + 4; | ||
| 90 | hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); | ||
| 91 | |||
| 92 | symlen = dso__name_len(h->branch_info->from.map->dso); | ||
| 93 | hists__new_col_len(hists, HISTC_DSO_FROM, symlen); | ||
| 94 | } else { | ||
| 95 | symlen = unresolved_col_width + 4 + 2; | ||
| 96 | hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); | ||
| 97 | hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM); | ||
| 98 | } | ||
| 99 | |||
| 100 | if (h->branch_info->to.sym) { | ||
| 101 | symlen = (int)h->branch_info->to.sym->namelen + 4; | ||
| 102 | hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); | ||
| 103 | |||
| 104 | symlen = dso__name_len(h->branch_info->to.map->dso); | ||
| 105 | hists__new_col_len(hists, HISTC_DSO_TO, symlen); | ||
| 106 | } else { | ||
| 107 | symlen = unresolved_col_width + 4 + 2; | ||
| 108 | hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); | ||
| 109 | hists__set_unres_dso_col_len(hists, HISTC_DSO_TO); | ||
| 110 | } | ||
| 111 | } | ||
| 77 | } | 112 | } |
| 78 | 113 | ||
| 79 | static void hist_entry__add_cpumode_period(struct hist_entry *he, | 114 | static void hist_entry__add_cpumode_period(struct hist_entry *he, |
| @@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent) | |||
| 195 | return 0; | 230 | return 0; |
| 196 | } | 231 | } |
| 197 | 232 | ||
| 198 | struct hist_entry *__hists__add_entry(struct hists *hists, | 233 | static struct hist_entry *add_hist_entry(struct hists *hists, |
| 234 | struct hist_entry *entry, | ||
| 199 | struct addr_location *al, | 235 | struct addr_location *al, |
| 200 | struct symbol *sym_parent, u64 period) | 236 | u64 period) |
| 201 | { | 237 | { |
| 202 | struct rb_node **p; | 238 | struct rb_node **p; |
| 203 | struct rb_node *parent = NULL; | 239 | struct rb_node *parent = NULL; |
| 204 | struct hist_entry *he; | 240 | struct hist_entry *he; |
| 205 | struct hist_entry entry = { | ||
| 206 | .thread = al->thread, | ||
| 207 | .ms = { | ||
| 208 | .map = al->map, | ||
| 209 | .sym = al->sym, | ||
| 210 | }, | ||
| 211 | .cpu = al->cpu, | ||
| 212 | .ip = al->addr, | ||
| 213 | .level = al->level, | ||
| 214 | .period = period, | ||
| 215 | .parent = sym_parent, | ||
| 216 | .filtered = symbol__parent_filter(sym_parent), | ||
| 217 | }; | ||
| 218 | int cmp; | 241 | int cmp; |
| 219 | 242 | ||
| 220 | pthread_mutex_lock(&hists->lock); | 243 | pthread_mutex_lock(&hists->lock); |
| @@ -225,7 +248,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, | |||
| 225 | parent = *p; | 248 | parent = *p; |
| 226 | he = rb_entry(parent, struct hist_entry, rb_node_in); | 249 | he = rb_entry(parent, struct hist_entry, rb_node_in); |
| 227 | 250 | ||
| 228 | cmp = hist_entry__cmp(&entry, he); | 251 | cmp = hist_entry__cmp(entry, he); |
| 229 | 252 | ||
| 230 | if (!cmp) { | 253 | if (!cmp) { |
| 231 | he->period += period; | 254 | he->period += period; |
| @@ -239,7 +262,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, | |||
| 239 | p = &(*p)->rb_right; | 262 | p = &(*p)->rb_right; |
| 240 | } | 263 | } |
| 241 | 264 | ||
| 242 | he = hist_entry__new(&entry); | 265 | he = hist_entry__new(entry); |
| 243 | if (!he) | 266 | if (!he) |
| 244 | goto out_unlock; | 267 | goto out_unlock; |
| 245 | 268 | ||
| @@ -252,6 +275,51 @@ out_unlock: | |||
| 252 | return he; | 275 | return he; |
| 253 | } | 276 | } |
| 254 | 277 | ||
| 278 | struct hist_entry *__hists__add_branch_entry(struct hists *self, | ||
| 279 | struct addr_location *al, | ||
| 280 | struct symbol *sym_parent, | ||
| 281 | struct branch_info *bi, | ||
| 282 | u64 period) | ||
| 283 | { | ||
| 284 | struct hist_entry entry = { | ||
| 285 | .thread = al->thread, | ||
| 286 | .ms = { | ||
| 287 | .map = bi->to.map, | ||
| 288 | .sym = bi->to.sym, | ||
| 289 | }, | ||
| 290 | .cpu = al->cpu, | ||
| 291 | .ip = bi->to.addr, | ||
| 292 | .level = al->level, | ||
| 293 | .period = period, | ||
| 294 | .parent = sym_parent, | ||
| 295 | .filtered = symbol__parent_filter(sym_parent), | ||
| 296 | .branch_info = bi, | ||
| 297 | }; | ||
| 298 | |||
| 299 | return add_hist_entry(self, &entry, al, period); | ||
| 300 | } | ||
| 301 | |||
| 302 | struct hist_entry *__hists__add_entry(struct hists *self, | ||
| 303 | struct addr_location *al, | ||
| 304 | struct symbol *sym_parent, u64 period) | ||
| 305 | { | ||
| 306 | struct hist_entry entry = { | ||
| 307 | .thread = al->thread, | ||
| 308 | .ms = { | ||
| 309 | .map = al->map, | ||
| 310 | .sym = al->sym, | ||
| 311 | }, | ||
| 312 | .cpu = al->cpu, | ||
| 313 | .ip = al->addr, | ||
| 314 | .level = al->level, | ||
| 315 | .period = period, | ||
| 316 | .parent = sym_parent, | ||
| 317 | .filtered = symbol__parent_filter(sym_parent), | ||
| 318 | }; | ||
| 319 | |||
| 320 | return add_hist_entry(self, &entry, al, period); | ||
| 321 | } | ||
| 322 | |||
| 255 | int64_t | 323 | int64_t |
| 256 | hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) | 324 | hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) |
| 257 | { | 325 | { |
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f55f0a8d1f81..9413f3e31fea 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h | |||
| @@ -32,6 +32,7 @@ struct events_stats { | |||
| 32 | u32 nr_unknown_events; | 32 | u32 nr_unknown_events; |
| 33 | u32 nr_invalid_chains; | 33 | u32 nr_invalid_chains; |
| 34 | u32 nr_unknown_id; | 34 | u32 nr_unknown_id; |
| 35 | u32 nr_unprocessable_samples; | ||
| 35 | }; | 36 | }; |
| 36 | 37 | ||
| 37 | enum hist_column { | 38 | enum hist_column { |
| @@ -41,6 +42,11 @@ enum hist_column { | |||
| 41 | HISTC_COMM, | 42 | HISTC_COMM, |
| 42 | HISTC_PARENT, | 43 | HISTC_PARENT, |
| 43 | HISTC_CPU, | 44 | HISTC_CPU, |
| 45 | HISTC_MISPREDICT, | ||
| 46 | HISTC_SYMBOL_FROM, | ||
| 47 | HISTC_SYMBOL_TO, | ||
| 48 | HISTC_DSO_FROM, | ||
| 49 | HISTC_DSO_TO, | ||
| 44 | HISTC_NR_COLS, /* Last entry */ | 50 | HISTC_NR_COLS, /* Last entry */ |
| 45 | }; | 51 | }; |
| 46 | 52 | ||
| @@ -55,6 +61,7 @@ struct hists { | |||
| 55 | u64 nr_entries; | 61 | u64 nr_entries; |
| 56 | const struct thread *thread_filter; | 62 | const struct thread *thread_filter; |
| 57 | const struct dso *dso_filter; | 63 | const struct dso *dso_filter; |
| 64 | const char *uid_filter_str; | ||
| 58 | pthread_mutex_t lock; | 65 | pthread_mutex_t lock; |
| 59 | struct events_stats stats; | 66 | struct events_stats stats; |
| 60 | u64 event_stream; | 67 | u64 event_stream; |
| @@ -72,6 +79,12 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, | |||
| 72 | struct hists *hists); | 79 | struct hists *hists); |
| 73 | void hist_entry__free(struct hist_entry *); | 80 | void hist_entry__free(struct hist_entry *); |
| 74 | 81 | ||
| 82 | struct hist_entry *__hists__add_branch_entry(struct hists *self, | ||
| 83 | struct addr_location *al, | ||
| 84 | struct symbol *sym_parent, | ||
| 85 | struct branch_info *bi, | ||
| 86 | u64 period); | ||
| 87 | |||
| 75 | void hists__output_resort(struct hists *self); | 88 | void hists__output_resort(struct hists *self); |
| 76 | void hists__output_resort_threaded(struct hists *hists); | 89 | void hists__output_resort_threaded(struct hists *hists); |
| 77 | void hists__collapse_resort(struct hists *self); | 90 | void hists__collapse_resort(struct hists *self); |
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h index bb4198e7837a..afe38199e922 100644 --- a/tools/perf/util/include/asm/dwarf2.h +++ b/tools/perf/util/include/asm/dwarf2.h | |||
| @@ -2,10 +2,12 @@ | |||
| 2 | #ifndef PERF_DWARF2_H | 2 | #ifndef PERF_DWARF2_H |
| 3 | #define PERF_DWARF2_H | 3 | #define PERF_DWARF2_H |
| 4 | 4 | ||
| 5 | /* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ | 5 | /* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */ |
| 6 | 6 | ||
| 7 | #define CFI_STARTPROC | 7 | #define CFI_STARTPROC |
| 8 | #define CFI_ENDPROC | 8 | #define CFI_ENDPROC |
| 9 | #define CFI_REMEMBER_STATE | ||
| 10 | #define CFI_RESTORE_STATE | ||
| 9 | 11 | ||
| 10 | #endif /* PERF_DWARF2_H */ | 12 | #endif /* PERF_DWARF2_H */ |
| 11 | 13 | ||
diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h index eda4416efa0a..bb162e40c76c 100644 --- a/tools/perf/util/include/linux/bitmap.h +++ b/tools/perf/util/include/linux/bitmap.h | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | #include <linux/bitops.h> | 5 | #include <linux/bitops.h> |
| 6 | 6 | ||
| 7 | int __bitmap_weight(const unsigned long *bitmap, int bits); | 7 | int __bitmap_weight(const unsigned long *bitmap, int bits); |
| 8 | void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, | ||
| 9 | const unsigned long *bitmap2, int bits); | ||
| 8 | 10 | ||
| 9 | #define BITMAP_LAST_WORD_MASK(nbits) \ | 11 | #define BITMAP_LAST_WORD_MASK(nbits) \ |
| 10 | ( \ | 12 | ( \ |
| @@ -32,4 +34,13 @@ static inline int bitmap_weight(const unsigned long *src, int nbits) | |||
| 32 | return __bitmap_weight(src, nbits); | 34 | return __bitmap_weight(src, nbits); |
| 33 | } | 35 | } |
| 34 | 36 | ||
| 37 | static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, | ||
| 38 | const unsigned long *src2, int nbits) | ||
| 39 | { | ||
| 40 | if (small_const_nbits(nbits)) | ||
| 41 | *dst = *src1 | *src2; | ||
| 42 | else | ||
| 43 | __bitmap_or(dst, src1, src2, nbits); | ||
| 44 | } | ||
| 45 | |||
| 35 | #endif /* _PERF_BITOPS_H */ | 46 | #endif /* _PERF_BITOPS_H */ |
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 316aa0ab7122..dea6d1c1a954 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c | |||
| @@ -212,6 +212,21 @@ size_t map__fprintf(struct map *self, FILE *fp) | |||
| 212 | self->start, self->end, self->pgoff, self->dso->name); | 212 | self->start, self->end, self->pgoff, self->dso->name); |
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | size_t map__fprintf_dsoname(struct map *map, FILE *fp) | ||
| 216 | { | ||
| 217 | const char *dsoname; | ||
| 218 | |||
| 219 | if (map && map->dso && (map->dso->name || map->dso->long_name)) { | ||
| 220 | if (symbol_conf.show_kernel_path && map->dso->long_name) | ||
| 221 | dsoname = map->dso->long_name; | ||
| 222 | else if (map->dso->name) | ||
| 223 | dsoname = map->dso->name; | ||
| 224 | } else | ||
| 225 | dsoname = "[unknown]"; | ||
| 226 | |||
| 227 | return fprintf(fp, "%s", dsoname); | ||
| 228 | } | ||
| 229 | |||
| 215 | /* | 230 | /* |
| 216 | * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN. | 231 | * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN. |
| 217 | * map->dso->adjust_symbols==1 for ET_EXEC-like cases. | 232 | * map->dso->adjust_symbols==1 for ET_EXEC-like cases. |
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 2b8017f8a930..b100c20b7f94 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h | |||
| @@ -118,6 +118,7 @@ void map__delete(struct map *self); | |||
| 118 | struct map *map__clone(struct map *self); | 118 | struct map *map__clone(struct map *self); |
| 119 | int map__overlap(struct map *l, struct map *r); | 119 | int map__overlap(struct map *l, struct map *r); |
| 120 | size_t map__fprintf(struct map *self, FILE *fp); | 120 | size_t map__fprintf(struct map *self, FILE *fp); |
| 121 | size_t map__fprintf_dsoname(struct map *map, FILE *fp); | ||
| 121 | 122 | ||
| 122 | int map__load(struct map *self, symbol_filter_t filter); | 123 | int map__load(struct map *self, symbol_filter_t filter); |
| 123 | struct symbol *map__find_symbol(struct map *self, | 124 | struct symbol *map__find_symbol(struct map *self, |
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e33554a562b3..8a8ee64e72d1 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c | |||
| @@ -34,7 +34,6 @@ | |||
| 34 | 34 | ||
| 35 | #include "util.h" | 35 | #include "util.h" |
| 36 | #include "event.h" | 36 | #include "event.h" |
| 37 | #include "string.h" | ||
| 38 | #include "strlist.h" | 37 | #include "strlist.h" |
| 39 | #include "debug.h" | 38 | #include "debug.h" |
| 40 | #include "cache.h" | 39 | #include "cache.h" |
| @@ -273,10 +272,10 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs, | |||
| 273 | /* Try to find perf_probe_event with debuginfo */ | 272 | /* Try to find perf_probe_event with debuginfo */ |
| 274 | static int try_to_find_probe_trace_events(struct perf_probe_event *pev, | 273 | static int try_to_find_probe_trace_events(struct perf_probe_event *pev, |
| 275 | struct probe_trace_event **tevs, | 274 | struct probe_trace_event **tevs, |
| 276 | int max_tevs, const char *module) | 275 | int max_tevs, const char *target) |
| 277 | { | 276 | { |
| 278 | bool need_dwarf = perf_probe_event_need_dwarf(pev); | 277 | bool need_dwarf = perf_probe_event_need_dwarf(pev); |
| 279 | struct debuginfo *dinfo = open_debuginfo(module); | 278 | struct debuginfo *dinfo = open_debuginfo(target); |
| 280 | int ntevs, ret = 0; | 279 | int ntevs, ret = 0; |
| 281 | 280 | ||
| 282 | if (!dinfo) { | 281 | if (!dinfo) { |
| @@ -295,9 +294,9 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, | |||
| 295 | 294 | ||
| 296 | if (ntevs > 0) { /* Succeeded to find trace events */ | 295 | if (ntevs > 0) { /* Succeeded to find trace events */ |
| 297 | pr_debug("find %d probe_trace_events.\n", ntevs); | 296 | pr_debug("find %d probe_trace_events.\n", ntevs); |
| 298 | if (module) | 297 | if (target) |
| 299 | ret = add_module_to_probe_trace_events(*tevs, ntevs, | 298 | ret = add_module_to_probe_trace_events(*tevs, ntevs, |
| 300 | module); | 299 | target); |
| 301 | return ret < 0 ? ret : ntevs; | 300 | return ret < 0 ? ret : ntevs; |
| 302 | } | 301 | } |
| 303 | 302 | ||
| @@ -1729,7 +1728,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, | |||
| 1729 | } | 1728 | } |
| 1730 | 1729 | ||
| 1731 | ret = 0; | 1730 | ret = 0; |
| 1732 | printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":"); | 1731 | printf("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); |
| 1733 | for (i = 0; i < ntevs; i++) { | 1732 | for (i = 0; i < ntevs; i++) { |
| 1734 | tev = &tevs[i]; | 1733 | tev = &tevs[i]; |
| 1735 | if (pev->event) | 1734 | if (pev->event) |
| @@ -1784,7 +1783,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, | |||
| 1784 | 1783 | ||
| 1785 | if (ret >= 0) { | 1784 | if (ret >= 0) { |
| 1786 | /* Show how to use the event. */ | 1785 | /* Show how to use the event. */ |
| 1787 | printf("\nYou can now use it on all perf tools, such as:\n\n"); | 1786 | printf("\nYou can now use it in all perf tools, such as:\n\n"); |
| 1788 | printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, | 1787 | printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, |
| 1789 | tev->event); | 1788 | tev->event); |
| 1790 | } | 1789 | } |
| @@ -1796,14 +1795,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, | |||
| 1796 | 1795 | ||
| 1797 | static int convert_to_probe_trace_events(struct perf_probe_event *pev, | 1796 | static int convert_to_probe_trace_events(struct perf_probe_event *pev, |
| 1798 | struct probe_trace_event **tevs, | 1797 | struct probe_trace_event **tevs, |
| 1799 | int max_tevs, const char *module) | 1798 | int max_tevs, const char *target) |
| 1800 | { | 1799 | { |
| 1801 | struct symbol *sym; | 1800 | struct symbol *sym; |
| 1802 | int ret = 0, i; | 1801 | int ret = 0, i; |
| 1803 | struct probe_trace_event *tev; | 1802 | struct probe_trace_event *tev; |
| 1804 | 1803 | ||
| 1805 | /* Convert perf_probe_event with debuginfo */ | 1804 | /* Convert perf_probe_event with debuginfo */ |
| 1806 | ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module); | 1805 | ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target); |
| 1807 | if (ret != 0) | 1806 | if (ret != 0) |
| 1808 | return ret; /* Found in debuginfo or got an error */ | 1807 | return ret; /* Found in debuginfo or got an error */ |
| 1809 | 1808 | ||
| @@ -1819,8 +1818,8 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, | |||
| 1819 | goto error; | 1818 | goto error; |
| 1820 | } | 1819 | } |
| 1821 | 1820 | ||
| 1822 | if (module) { | 1821 | if (target) { |
| 1823 | tev->point.module = strdup(module); | 1822 | tev->point.module = strdup(target); |
| 1824 | if (tev->point.module == NULL) { | 1823 | if (tev->point.module == NULL) { |
| 1825 | ret = -ENOMEM; | 1824 | ret = -ENOMEM; |
| 1826 | goto error; | 1825 | goto error; |
| @@ -1890,7 +1889,7 @@ struct __event_package { | |||
| 1890 | }; | 1889 | }; |
| 1891 | 1890 | ||
| 1892 | int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, | 1891 | int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, |
| 1893 | int max_tevs, const char *module, bool force_add) | 1892 | int max_tevs, const char *target, bool force_add) |
| 1894 | { | 1893 | { |
| 1895 | int i, j, ret; | 1894 | int i, j, ret; |
| 1896 | struct __event_package *pkgs; | 1895 | struct __event_package *pkgs; |
| @@ -1913,7 +1912,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, | |||
| 1913 | ret = convert_to_probe_trace_events(pkgs[i].pev, | 1912 | ret = convert_to_probe_trace_events(pkgs[i].pev, |
| 1914 | &pkgs[i].tevs, | 1913 | &pkgs[i].tevs, |
| 1915 | max_tevs, | 1914 | max_tevs, |
| 1916 | module); | 1915 | target); |
| 1917 | if (ret < 0) | 1916 | if (ret < 0) |
| 1918 | goto end; | 1917 | goto end; |
| 1919 | pkgs[i].ntevs = ret; | 1918 | pkgs[i].ntevs = ret; |
| @@ -1965,7 +1964,7 @@ static int __del_trace_probe_event(int fd, struct str_node *ent) | |||
| 1965 | goto error; | 1964 | goto error; |
| 1966 | } | 1965 | } |
| 1967 | 1966 | ||
| 1968 | printf("Remove event: %s\n", ent->s); | 1967 | printf("Removed event: %s\n", ent->s); |
| 1969 | return 0; | 1968 | return 0; |
| 1970 | error: | 1969 | error: |
| 1971 | pr_warning("Failed to delete event: %s\n", strerror(-ret)); | 1970 | pr_warning("Failed to delete event: %s\n", strerror(-ret)); |
| @@ -2069,7 +2068,7 @@ static int filter_available_functions(struct map *map __unused, | |||
| 2069 | return 1; | 2068 | return 1; |
| 2070 | } | 2069 | } |
| 2071 | 2070 | ||
| 2072 | int show_available_funcs(const char *module, struct strfilter *_filter) | 2071 | int show_available_funcs(const char *target, struct strfilter *_filter) |
| 2073 | { | 2072 | { |
| 2074 | struct map *map; | 2073 | struct map *map; |
| 2075 | int ret; | 2074 | int ret; |
| @@ -2080,9 +2079,9 @@ int show_available_funcs(const char *module, struct strfilter *_filter) | |||
| 2080 | if (ret < 0) | 2079 | if (ret < 0) |
| 2081 | return ret; | 2080 | return ret; |
| 2082 | 2081 | ||
| 2083 | map = kernel_get_module_map(module); | 2082 | map = kernel_get_module_map(target); |
| 2084 | if (!map) { | 2083 | if (!map) { |
| 2085 | pr_err("Failed to find %s map.\n", (module) ? : "kernel"); | 2084 | pr_err("Failed to find %s map.\n", (target) ? : "kernel"); |
| 2086 | return -EINVAL; | 2085 | return -EINVAL; |
| 2087 | } | 2086 | } |
| 2088 | available_func_filter = _filter; | 2087 | available_func_filter = _filter; |
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 74bd2e63c4b4..2cc162d3b78c 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c | |||
| @@ -30,7 +30,6 @@ | |||
| 30 | #include <stdlib.h> | 30 | #include <stdlib.h> |
| 31 | #include <string.h> | 31 | #include <string.h> |
| 32 | #include <stdarg.h> | 32 | #include <stdarg.h> |
| 33 | #include <ctype.h> | ||
| 34 | #include <dwarf-regs.h> | 33 | #include <dwarf-regs.h> |
| 35 | 34 | ||
| 36 | #include <linux/bitops.h> | 35 | #include <linux/bitops.h> |
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources new file mode 100644 index 000000000000..2884e67ee625 --- /dev/null +++ b/tools/perf/util/python-ext-sources | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | # | ||
| 2 | # List of files needed by perf python extention | ||
| 3 | # | ||
| 4 | # Each source file must be placed on its own line so that it can be | ||
| 5 | # processed by Makefile and util/setup.py accordingly. | ||
| 6 | # | ||
| 7 | |||
| 8 | util/python.c | ||
| 9 | util/ctype.c | ||
| 10 | util/evlist.c | ||
| 11 | util/evsel.c | ||
| 12 | util/cpumap.c | ||
| 13 | util/thread_map.c | ||
| 14 | util/util.c | ||
| 15 | util/xyarray.c | ||
| 16 | util/cgroup.c | ||
| 17 | util/debugfs.c | ||
| 18 | util/strlist.c | ||
| 19 | ../../lib/rbtree.c | ||
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 9dd47a4f2596..e03b58a48424 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c | |||
| @@ -425,14 +425,14 @@ struct pyrf_thread_map { | |||
| 425 | static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, | 425 | static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, |
| 426 | PyObject *args, PyObject *kwargs) | 426 | PyObject *args, PyObject *kwargs) |
| 427 | { | 427 | { |
| 428 | static char *kwlist[] = { "pid", "tid", NULL }; | 428 | static char *kwlist[] = { "pid", "tid", "uid", NULL }; |
| 429 | int pid = -1, tid = -1; | 429 | int pid = -1, tid = -1, uid = UINT_MAX; |
| 430 | 430 | ||
| 431 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", | 431 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii", |
| 432 | kwlist, &pid, &tid)) | 432 | kwlist, &pid, &tid, &uid)) |
| 433 | return -1; | 433 | return -1; |
| 434 | 434 | ||
| 435 | pthreads->threads = thread_map__new(pid, tid); | 435 | pthreads->threads = thread_map__new(pid, tid, uid); |
| 436 | if (pthreads->threads == NULL) | 436 | if (pthreads->threads == NULL) |
| 437 | return -1; | 437 | return -1; |
| 438 | return 0; | 438 | return 0; |
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 0b2a48783172..c2623c6f9b51 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c | |||
| @@ -24,7 +24,6 @@ | |||
| 24 | #include <stdio.h> | 24 | #include <stdio.h> |
| 25 | #include <stdlib.h> | 25 | #include <stdlib.h> |
| 26 | #include <string.h> | 26 | #include <string.h> |
| 27 | #include <ctype.h> | ||
| 28 | #include <errno.h> | 27 | #include <errno.h> |
| 29 | 28 | ||
| 30 | #include "../../perf.h" | 29 | #include "../../perf.h" |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index b5ca2558c7bb..002ebbf59f48 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
| @@ -24,7 +24,7 @@ static int perf_session__open(struct perf_session *self, bool force) | |||
| 24 | self->fd = STDIN_FILENO; | 24 | self->fd = STDIN_FILENO; |
| 25 | 25 | ||
| 26 | if (perf_session__read_header(self, self->fd) < 0) | 26 | if (perf_session__read_header(self, self->fd) < 0) |
| 27 | pr_err("incompatible file format"); | 27 | pr_err("incompatible file format (rerun with -v to learn more)"); |
| 28 | 28 | ||
| 29 | return 0; | 29 | return 0; |
| 30 | } | 30 | } |
| @@ -56,7 +56,7 @@ static int perf_session__open(struct perf_session *self, bool force) | |||
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | if (perf_session__read_header(self, self->fd) < 0) { | 58 | if (perf_session__read_header(self, self->fd) < 0) { |
| 59 | pr_err("incompatible file format"); | 59 | pr_err("incompatible file format (rerun with -v to learn more)"); |
| 60 | goto out_close; | 60 | goto out_close; |
| 61 | } | 61 | } |
| 62 | 62 | ||
| @@ -229,6 +229,64 @@ static bool symbol__match_parent_regex(struct symbol *sym) | |||
| 229 | return 0; | 229 | return 0; |
| 230 | } | 230 | } |
| 231 | 231 | ||
| 232 | static const u8 cpumodes[] = { | ||
| 233 | PERF_RECORD_MISC_USER, | ||
| 234 | PERF_RECORD_MISC_KERNEL, | ||
| 235 | PERF_RECORD_MISC_GUEST_USER, | ||
| 236 | PERF_RECORD_MISC_GUEST_KERNEL | ||
| 237 | }; | ||
| 238 | #define NCPUMODES (sizeof(cpumodes)/sizeof(u8)) | ||
| 239 | |||
| 240 | static void ip__resolve_ams(struct machine *self, struct thread *thread, | ||
| 241 | struct addr_map_symbol *ams, | ||
| 242 | u64 ip) | ||
| 243 | { | ||
| 244 | struct addr_location al; | ||
| 245 | size_t i; | ||
| 246 | u8 m; | ||
| 247 | |||
| 248 | memset(&al, 0, sizeof(al)); | ||
| 249 | |||
| 250 | for (i = 0; i < NCPUMODES; i++) { | ||
| 251 | m = cpumodes[i]; | ||
| 252 | /* | ||
| 253 | * We cannot use the header.misc hint to determine whether a | ||
| 254 | * branch stack address is user, kernel, guest, hypervisor. | ||
| 255 | * Branches may straddle the kernel/user/hypervisor boundaries. | ||
| 256 | * Thus, we have to try consecutively until we find a match | ||
| 257 | * or else, the symbol is unknown | ||
| 258 | */ | ||
| 259 | thread__find_addr_location(thread, self, m, MAP__FUNCTION, | ||
| 260 | ip, &al, NULL); | ||
| 261 | if (al.sym) | ||
| 262 | goto found; | ||
| 263 | } | ||
| 264 | found: | ||
| 265 | ams->addr = ip; | ||
| 266 | ams->al_addr = al.addr; | ||
| 267 | ams->sym = al.sym; | ||
| 268 | ams->map = al.map; | ||
| 269 | } | ||
| 270 | |||
| 271 | struct branch_info *machine__resolve_bstack(struct machine *self, | ||
| 272 | struct thread *thr, | ||
| 273 | struct branch_stack *bs) | ||
| 274 | { | ||
| 275 | struct branch_info *bi; | ||
| 276 | unsigned int i; | ||
| 277 | |||
| 278 | bi = calloc(bs->nr, sizeof(struct branch_info)); | ||
| 279 | if (!bi) | ||
| 280 | return NULL; | ||
| 281 | |||
| 282 | for (i = 0; i < bs->nr; i++) { | ||
| 283 | ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to); | ||
| 284 | ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from); | ||
| 285 | bi[i].flags = bs->entries[i].flags; | ||
| 286 | } | ||
| 287 | return bi; | ||
| 288 | } | ||
| 289 | |||
| 232 | int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, | 290 | int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, |
| 233 | struct thread *thread, | 291 | struct thread *thread, |
| 234 | struct ip_callchain *chain, | 292 | struct ip_callchain *chain, |
| @@ -697,6 +755,18 @@ static void callchain__printf(struct perf_sample *sample) | |||
| 697 | i, sample->callchain->ips[i]); | 755 | i, sample->callchain->ips[i]); |
| 698 | } | 756 | } |
| 699 | 757 | ||
| 758 | static void branch_stack__printf(struct perf_sample *sample) | ||
| 759 | { | ||
| 760 | uint64_t i; | ||
| 761 | |||
| 762 | printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); | ||
| 763 | |||
| 764 | for (i = 0; i < sample->branch_stack->nr; i++) | ||
| 765 | printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", | ||
| 766 | i, sample->branch_stack->entries[i].from, | ||
| 767 | sample->branch_stack->entries[i].to); | ||
| 768 | } | ||
| 769 | |||
| 700 | static void perf_session__print_tstamp(struct perf_session *session, | 770 | static void perf_session__print_tstamp(struct perf_session *session, |
| 701 | union perf_event *event, | 771 | union perf_event *event, |
| 702 | struct perf_sample *sample) | 772 | struct perf_sample *sample) |
| @@ -744,6 +814,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event, | |||
| 744 | 814 | ||
| 745 | if (session->sample_type & PERF_SAMPLE_CALLCHAIN) | 815 | if (session->sample_type & PERF_SAMPLE_CALLCHAIN) |
| 746 | callchain__printf(sample); | 816 | callchain__printf(sample); |
| 817 | |||
| 818 | if (session->sample_type & PERF_SAMPLE_BRANCH_STACK) | ||
| 819 | branch_stack__printf(sample); | ||
| 747 | } | 820 | } |
| 748 | 821 | ||
| 749 | static struct machine * | 822 | static struct machine * |
| @@ -796,6 +869,10 @@ static int perf_session_deliver_event(struct perf_session *session, | |||
| 796 | ++session->hists.stats.nr_unknown_id; | 869 | ++session->hists.stats.nr_unknown_id; |
| 797 | return -1; | 870 | return -1; |
| 798 | } | 871 | } |
| 872 | if (machine == NULL) { | ||
| 873 | ++session->hists.stats.nr_unprocessable_samples; | ||
| 874 | return -1; | ||
| 875 | } | ||
| 799 | return tool->sample(tool, event, sample, evsel, machine); | 876 | return tool->sample(tool, event, sample, evsel, machine); |
| 800 | case PERF_RECORD_MMAP: | 877 | case PERF_RECORD_MMAP: |
| 801 | return tool->mmap(tool, event, sample, machine); | 878 | return tool->mmap(tool, event, sample, machine); |
| @@ -964,6 +1041,12 @@ static void perf_session__warn_about_errors(const struct perf_session *session, | |||
| 964 | session->hists.stats.nr_invalid_chains, | 1041 | session->hists.stats.nr_invalid_chains, |
| 965 | session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); | 1042 | session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); |
| 966 | } | 1043 | } |
| 1044 | |||
| 1045 | if (session->hists.stats.nr_unprocessable_samples != 0) { | ||
| 1046 | ui__warning("%u unprocessable samples recorded.\n" | ||
| 1047 | "Do you have a KVM guest running and not using 'perf kvm'?\n", | ||
| 1048 | session->hists.stats.nr_unprocessable_samples); | ||
| 1049 | } | ||
| 967 | } | 1050 | } |
| 968 | 1051 | ||
| 969 | #define session_done() (*(volatile int *)(&session_done)) | 1052 | #define session_done() (*(volatile int *)(&session_done)) |
| @@ -1293,10 +1376,9 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, | |||
| 1293 | 1376 | ||
| 1294 | void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | 1377 | void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, |
| 1295 | struct machine *machine, struct perf_evsel *evsel, | 1378 | struct machine *machine, struct perf_evsel *evsel, |
| 1296 | int print_sym, int print_dso) | 1379 | int print_sym, int print_dso, int print_symoffset) |
| 1297 | { | 1380 | { |
| 1298 | struct addr_location al; | 1381 | struct addr_location al; |
| 1299 | const char *symname, *dsoname; | ||
| 1300 | struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; | 1382 | struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; |
| 1301 | struct callchain_cursor_node *node; | 1383 | struct callchain_cursor_node *node; |
| 1302 | 1384 | ||
| @@ -1324,20 +1406,13 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | |||
| 1324 | 1406 | ||
| 1325 | printf("\t%16" PRIx64, node->ip); | 1407 | printf("\t%16" PRIx64, node->ip); |
| 1326 | if (print_sym) { | 1408 | if (print_sym) { |
| 1327 | if (node->sym && node->sym->name) | 1409 | printf(" "); |
| 1328 | symname = node->sym->name; | 1410 | symbol__fprintf_symname(node->sym, stdout); |
| 1329 | else | ||
| 1330 | symname = ""; | ||
| 1331 | |||
| 1332 | printf(" %s", symname); | ||
| 1333 | } | 1411 | } |
| 1334 | if (print_dso) { | 1412 | if (print_dso) { |
| 1335 | if (node->map && node->map->dso && node->map->dso->name) | 1413 | printf(" ("); |
| 1336 | dsoname = node->map->dso->name; | 1414 | map__fprintf_dsoname(al.map, stdout); |
| 1337 | else | 1415 | printf(")"); |
| 1338 | dsoname = ""; | ||
| 1339 | |||
| 1340 | printf(" (%s)", dsoname); | ||
| 1341 | } | 1416 | } |
| 1342 | printf("\n"); | 1417 | printf("\n"); |
| 1343 | 1418 | ||
| @@ -1347,21 +1422,18 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | |||
| 1347 | } else { | 1422 | } else { |
| 1348 | printf("%16" PRIx64, sample->ip); | 1423 | printf("%16" PRIx64, sample->ip); |
| 1349 | if (print_sym) { | 1424 | if (print_sym) { |
| 1350 | if (al.sym && al.sym->name) | 1425 | printf(" "); |
| 1351 | symname = al.sym->name; | 1426 | if (print_symoffset) |
| 1427 | symbol__fprintf_symname_offs(al.sym, &al, | ||
| 1428 | stdout); | ||
| 1352 | else | 1429 | else |
| 1353 | symname = ""; | 1430 | symbol__fprintf_symname(al.sym, stdout); |
| 1354 | |||
| 1355 | printf(" %s", symname); | ||
| 1356 | } | 1431 | } |
| 1357 | 1432 | ||
| 1358 | if (print_dso) { | 1433 | if (print_dso) { |
| 1359 | if (al.map && al.map->dso && al.map->dso->name) | 1434 | printf(" ("); |
| 1360 | dsoname = al.map->dso->name; | 1435 | map__fprintf_dsoname(al.map, stdout); |
| 1361 | else | 1436 | printf(")"); |
| 1362 | dsoname = ""; | ||
| 1363 | |||
| 1364 | printf(" (%s)", dsoname); | ||
| 1365 | } | 1437 | } |
| 1366 | } | 1438 | } |
| 1367 | } | 1439 | } |
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 37bc38381fb6..7a5434c00565 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h | |||
| @@ -73,6 +73,10 @@ int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel | |||
| 73 | struct ip_callchain *chain, | 73 | struct ip_callchain *chain, |
| 74 | struct symbol **parent); | 74 | struct symbol **parent); |
| 75 | 75 | ||
| 76 | struct branch_info *machine__resolve_bstack(struct machine *self, | ||
| 77 | struct thread *thread, | ||
| 78 | struct branch_stack *bs); | ||
| 79 | |||
| 76 | bool perf_session__has_traces(struct perf_session *self, const char *msg); | 80 | bool perf_session__has_traces(struct perf_session *self, const char *msg); |
| 77 | 81 | ||
| 78 | void mem_bswap_64(void *src, int byte_size); | 82 | void mem_bswap_64(void *src, int byte_size); |
| @@ -147,7 +151,7 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, | |||
| 147 | 151 | ||
| 148 | void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | 152 | void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, |
| 149 | struct machine *machine, struct perf_evsel *evsel, | 153 | struct machine *machine, struct perf_evsel *evsel, |
| 150 | int print_sym, int print_dso); | 154 | int print_sym, int print_dso, int print_symoffset); |
| 151 | 155 | ||
| 152 | int perf_session__cpu_bitmap(struct perf_session *session, | 156 | int perf_session__cpu_bitmap(struct perf_session *session, |
| 153 | const char *cpu_list, unsigned long *cpu_bitmap); | 157 | const char *cpu_list, unsigned long *cpu_bitmap); |
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 36d4c5619575..d0f9f29cf181 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py | |||
| @@ -24,11 +24,11 @@ cflags += getenv('CFLAGS', '').split() | |||
| 24 | build_lib = getenv('PYTHON_EXTBUILD_LIB') | 24 | build_lib = getenv('PYTHON_EXTBUILD_LIB') |
| 25 | build_tmp = getenv('PYTHON_EXTBUILD_TMP') | 25 | build_tmp = getenv('PYTHON_EXTBUILD_TMP') |
| 26 | 26 | ||
| 27 | ext_sources = [f.strip() for f in file('util/python-ext-sources') | ||
| 28 | if len(f.strip()) > 0 and f[0] != '#'] | ||
| 29 | |||
| 27 | perf = Extension('perf', | 30 | perf = Extension('perf', |
| 28 | sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c', | 31 | sources = ext_sources, |
| 29 | 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c', | ||
| 30 | 'util/util.c', 'util/xyarray.c', 'util/cgroup.c', | ||
| 31 | 'util/debugfs.c'], | ||
| 32 | include_dirs = ['util/include'], | 32 | include_dirs = ['util/include'], |
| 33 | extra_compile_args = cflags, | 33 | extra_compile_args = cflags, |
| 34 | ) | 34 | ) |
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 076c9d4e1ea4..a27237430c5f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c | |||
| @@ -8,6 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol"; | |||
| 8 | const char *sort_order = default_sort_order; | 8 | const char *sort_order = default_sort_order; |
| 9 | int sort__need_collapse = 0; | 9 | int sort__need_collapse = 0; |
| 10 | int sort__has_parent = 0; | 10 | int sort__has_parent = 0; |
| 11 | int sort__branch_mode = -1; /* -1 = means not set */ | ||
| 11 | 12 | ||
| 12 | enum sort_type sort__first_dimension; | 13 | enum sort_type sort__first_dimension; |
| 13 | 14 | ||
| @@ -97,6 +98,26 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf, | |||
| 97 | return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); | 98 | return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); |
| 98 | } | 99 | } |
| 99 | 100 | ||
| 101 | static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r) | ||
| 102 | { | ||
| 103 | struct dso *dso_l = map_l ? map_l->dso : NULL; | ||
| 104 | struct dso *dso_r = map_r ? map_r->dso : NULL; | ||
| 105 | const char *dso_name_l, *dso_name_r; | ||
| 106 | |||
| 107 | if (!dso_l || !dso_r) | ||
| 108 | return cmp_null(dso_l, dso_r); | ||
| 109 | |||
| 110 | if (verbose) { | ||
| 111 | dso_name_l = dso_l->long_name; | ||
| 112 | dso_name_r = dso_r->long_name; | ||
| 113 | } else { | ||
| 114 | dso_name_l = dso_l->short_name; | ||
| 115 | dso_name_r = dso_r->short_name; | ||
| 116 | } | ||
| 117 | |||
| 118 | return strcmp(dso_name_l, dso_name_r); | ||
| 119 | } | ||
| 120 | |||
| 100 | struct sort_entry sort_comm = { | 121 | struct sort_entry sort_comm = { |
| 101 | .se_header = "Command", | 122 | .se_header = "Command", |
| 102 | .se_cmp = sort__comm_cmp, | 123 | .se_cmp = sort__comm_cmp, |
| @@ -110,36 +131,74 @@ struct sort_entry sort_comm = { | |||
| 110 | static int64_t | 131 | static int64_t |
| 111 | sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) | 132 | sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) |
| 112 | { | 133 | { |
| 113 | struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL; | 134 | return _sort__dso_cmp(left->ms.map, right->ms.map); |
| 114 | struct dso *dso_r = right->ms.map ? right->ms.map->dso : NULL; | 135 | } |
| 115 | const char *dso_name_l, *dso_name_r; | ||
| 116 | 136 | ||
| 117 | if (!dso_l || !dso_r) | ||
| 118 | return cmp_null(dso_l, dso_r); | ||
| 119 | 137 | ||
| 120 | if (verbose) { | 138 | static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r, |
| 121 | dso_name_l = dso_l->long_name; | 139 | u64 ip_l, u64 ip_r) |
| 122 | dso_name_r = dso_r->long_name; | 140 | { |
| 123 | } else { | 141 | if (!sym_l || !sym_r) |
| 124 | dso_name_l = dso_l->short_name; | 142 | return cmp_null(sym_l, sym_r); |
| 125 | dso_name_r = dso_r->short_name; | 143 | |
| 144 | if (sym_l == sym_r) | ||
| 145 | return 0; | ||
| 146 | |||
| 147 | if (sym_l) | ||
| 148 | ip_l = sym_l->start; | ||
| 149 | if (sym_r) | ||
| 150 | ip_r = sym_r->start; | ||
| 151 | |||
| 152 | return (int64_t)(ip_r - ip_l); | ||
| 153 | } | ||
| 154 | |||
| 155 | static int _hist_entry__dso_snprintf(struct map *map, char *bf, | ||
| 156 | size_t size, unsigned int width) | ||
| 157 | { | ||
| 158 | if (map && map->dso) { | ||
| 159 | const char *dso_name = !verbose ? map->dso->short_name : | ||
| 160 | map->dso->long_name; | ||
| 161 | return repsep_snprintf(bf, size, "%-*s", width, dso_name); | ||
| 126 | } | 162 | } |
| 127 | 163 | ||
| 128 | return strcmp(dso_name_l, dso_name_r); | 164 | return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); |
| 129 | } | 165 | } |
| 130 | 166 | ||
| 131 | static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, | 167 | static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, |
| 132 | size_t size, unsigned int width) | 168 | size_t size, unsigned int width) |
| 133 | { | 169 | { |
| 134 | if (self->ms.map && self->ms.map->dso) { | 170 | return _hist_entry__dso_snprintf(self->ms.map, bf, size, width); |
| 135 | const char *dso_name = !verbose ? self->ms.map->dso->short_name : | 171 | } |
| 136 | self->ms.map->dso->long_name; | 172 | |
| 137 | return repsep_snprintf(bf, size, "%-*s", width, dso_name); | 173 | static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, |
| 174 | u64 ip, char level, char *bf, size_t size, | ||
| 175 | unsigned int width __used) | ||
| 176 | { | ||
| 177 | size_t ret = 0; | ||
| 178 | |||
| 179 | if (verbose) { | ||
| 180 | char o = map ? dso__symtab_origin(map->dso) : '!'; | ||
| 181 | ret += repsep_snprintf(bf, size, "%-#*llx %c ", | ||
| 182 | BITS_PER_LONG / 4, ip, o); | ||
| 138 | } | 183 | } |
| 139 | 184 | ||
| 140 | return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); | 185 | ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); |
| 186 | if (sym) | ||
| 187 | ret += repsep_snprintf(bf + ret, size - ret, "%-*s", | ||
| 188 | width - ret, | ||
| 189 | sym->name); | ||
| 190 | else { | ||
| 191 | size_t len = BITS_PER_LONG / 4; | ||
| 192 | ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", | ||
| 193 | len, ip); | ||
| 194 | ret += repsep_snprintf(bf + ret, size - ret, "%-*s", | ||
| 195 | width - ret, ""); | ||
| 196 | } | ||
| 197 | |||
| 198 | return ret; | ||
| 141 | } | 199 | } |
| 142 | 200 | ||
| 201 | |||
| 143 | struct sort_entry sort_dso = { | 202 | struct sort_entry sort_dso = { |
| 144 | .se_header = "Shared Object", | 203 | .se_header = "Shared Object", |
| 145 | .se_cmp = sort__dso_cmp, | 204 | .se_cmp = sort__dso_cmp, |
| @@ -147,8 +206,14 @@ struct sort_entry sort_dso = { | |||
| 147 | .se_width_idx = HISTC_DSO, | 206 | .se_width_idx = HISTC_DSO, |
| 148 | }; | 207 | }; |
| 149 | 208 | ||
| 150 | /* --sort symbol */ | 209 | static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, |
| 210 | size_t size, unsigned int width __used) | ||
| 211 | { | ||
| 212 | return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip, | ||
| 213 | self->level, bf, size, width); | ||
| 214 | } | ||
| 151 | 215 | ||
| 216 | /* --sort symbol */ | ||
| 152 | static int64_t | 217 | static int64_t |
| 153 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) | 218 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) |
| 154 | { | 219 | { |
| @@ -166,31 +231,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) | |||
| 166 | ip_l = left->ms.sym->start; | 231 | ip_l = left->ms.sym->start; |
| 167 | ip_r = right->ms.sym->start; | 232 | ip_r = right->ms.sym->start; |
| 168 | 233 | ||
| 169 | return (int64_t)(ip_r - ip_l); | 234 | return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r); |
| 170 | } | ||
| 171 | |||
| 172 | static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, | ||
| 173 | size_t size, unsigned int width __used) | ||
| 174 | { | ||
| 175 | size_t ret = 0; | ||
| 176 | |||
| 177 | if (verbose) { | ||
| 178 | char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; | ||
| 179 | ret += repsep_snprintf(bf, size, "%-#*llx %c ", | ||
| 180 | BITS_PER_LONG / 4, self->ip, o); | ||
| 181 | } | ||
| 182 | |||
| 183 | if (!sort_dso.elide) | ||
| 184 | ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level); | ||
| 185 | |||
| 186 | if (self->ms.sym) | ||
| 187 | ret += repsep_snprintf(bf + ret, size - ret, "%s", | ||
| 188 | self->ms.sym->name); | ||
| 189 | else | ||
| 190 | ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx", | ||
| 191 | BITS_PER_LONG / 4, self->ip); | ||
| 192 | |||
| 193 | return ret; | ||
| 194 | } | 235 | } |
| 195 | 236 | ||
| 196 | struct sort_entry sort_sym = { | 237 | struct sort_entry sort_sym = { |
| @@ -249,19 +290,155 @@ struct sort_entry sort_cpu = { | |||
| 249 | .se_width_idx = HISTC_CPU, | 290 | .se_width_idx = HISTC_CPU, |
| 250 | }; | 291 | }; |
| 251 | 292 | ||
| 293 | static int64_t | ||
| 294 | sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right) | ||
| 295 | { | ||
| 296 | return _sort__dso_cmp(left->branch_info->from.map, | ||
| 297 | right->branch_info->from.map); | ||
| 298 | } | ||
| 299 | |||
| 300 | static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf, | ||
| 301 | size_t size, unsigned int width) | ||
| 302 | { | ||
| 303 | return _hist_entry__dso_snprintf(self->branch_info->from.map, | ||
| 304 | bf, size, width); | ||
| 305 | } | ||
| 306 | |||
| 307 | struct sort_entry sort_dso_from = { | ||
| 308 | .se_header = "Source Shared Object", | ||
| 309 | .se_cmp = sort__dso_from_cmp, | ||
| 310 | .se_snprintf = hist_entry__dso_from_snprintf, | ||
| 311 | .se_width_idx = HISTC_DSO_FROM, | ||
| 312 | }; | ||
| 313 | |||
| 314 | static int64_t | ||
| 315 | sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) | ||
| 316 | { | ||
| 317 | return _sort__dso_cmp(left->branch_info->to.map, | ||
| 318 | right->branch_info->to.map); | ||
| 319 | } | ||
| 320 | |||
| 321 | static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf, | ||
| 322 | size_t size, unsigned int width) | ||
| 323 | { | ||
| 324 | return _hist_entry__dso_snprintf(self->branch_info->to.map, | ||
| 325 | bf, size, width); | ||
| 326 | } | ||
| 327 | |||
| 328 | static int64_t | ||
| 329 | sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) | ||
| 330 | { | ||
| 331 | struct addr_map_symbol *from_l = &left->branch_info->from; | ||
| 332 | struct addr_map_symbol *from_r = &right->branch_info->from; | ||
| 333 | |||
| 334 | if (!from_l->sym && !from_r->sym) | ||
| 335 | return right->level - left->level; | ||
| 336 | |||
| 337 | return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr, | ||
| 338 | from_r->addr); | ||
| 339 | } | ||
| 340 | |||
| 341 | static int64_t | ||
| 342 | sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right) | ||
| 343 | { | ||
| 344 | struct addr_map_symbol *to_l = &left->branch_info->to; | ||
| 345 | struct addr_map_symbol *to_r = &right->branch_info->to; | ||
| 346 | |||
| 347 | if (!to_l->sym && !to_r->sym) | ||
| 348 | return right->level - left->level; | ||
| 349 | |||
| 350 | return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr); | ||
| 351 | } | ||
| 352 | |||
| 353 | static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, | ||
| 354 | size_t size, unsigned int width __used) | ||
| 355 | { | ||
| 356 | struct addr_map_symbol *from = &self->branch_info->from; | ||
| 357 | return _hist_entry__sym_snprintf(from->map, from->sym, from->addr, | ||
| 358 | self->level, bf, size, width); | ||
| 359 | |||
| 360 | } | ||
| 361 | |||
| 362 | static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, | ||
| 363 | size_t size, unsigned int width __used) | ||
| 364 | { | ||
| 365 | struct addr_map_symbol *to = &self->branch_info->to; | ||
| 366 | return _hist_entry__sym_snprintf(to->map, to->sym, to->addr, | ||
| 367 | self->level, bf, size, width); | ||
| 368 | |||
| 369 | } | ||
| 370 | |||
| 371 | struct sort_entry sort_dso_to = { | ||
| 372 | .se_header = "Target Shared Object", | ||
| 373 | .se_cmp = sort__dso_to_cmp, | ||
| 374 | .se_snprintf = hist_entry__dso_to_snprintf, | ||
| 375 | .se_width_idx = HISTC_DSO_TO, | ||
| 376 | }; | ||
| 377 | |||
| 378 | struct sort_entry sort_sym_from = { | ||
| 379 | .se_header = "Source Symbol", | ||
| 380 | .se_cmp = sort__sym_from_cmp, | ||
| 381 | .se_snprintf = hist_entry__sym_from_snprintf, | ||
| 382 | .se_width_idx = HISTC_SYMBOL_FROM, | ||
| 383 | }; | ||
| 384 | |||
| 385 | struct sort_entry sort_sym_to = { | ||
| 386 | .se_header = "Target Symbol", | ||
| 387 | .se_cmp = sort__sym_to_cmp, | ||
| 388 | .se_snprintf = hist_entry__sym_to_snprintf, | ||
| 389 | .se_width_idx = HISTC_SYMBOL_TO, | ||
| 390 | }; | ||
| 391 | |||
| 392 | static int64_t | ||
| 393 | sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right) | ||
| 394 | { | ||
| 395 | const unsigned char mp = left->branch_info->flags.mispred != | ||
| 396 | right->branch_info->flags.mispred; | ||
| 397 | const unsigned char p = left->branch_info->flags.predicted != | ||
| 398 | right->branch_info->flags.predicted; | ||
| 399 | |||
| 400 | return mp || p; | ||
| 401 | } | ||
| 402 | |||
| 403 | static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf, | ||
| 404 | size_t size, unsigned int width){ | ||
| 405 | static const char *out = "N/A"; | ||
| 406 | |||
| 407 | if (self->branch_info->flags.predicted) | ||
| 408 | out = "N"; | ||
| 409 | else if (self->branch_info->flags.mispred) | ||
| 410 | out = "Y"; | ||
| 411 | |||
| 412 | return repsep_snprintf(bf, size, "%-*s", width, out); | ||
| 413 | } | ||
| 414 | |||
| 415 | struct sort_entry sort_mispredict = { | ||
| 416 | .se_header = "Branch Mispredicted", | ||
| 417 | .se_cmp = sort__mispredict_cmp, | ||
| 418 | .se_snprintf = hist_entry__mispredict_snprintf, | ||
| 419 | .se_width_idx = HISTC_MISPREDICT, | ||
| 420 | }; | ||
| 421 | |||
| 252 | struct sort_dimension { | 422 | struct sort_dimension { |
| 253 | const char *name; | 423 | const char *name; |
| 254 | struct sort_entry *entry; | 424 | struct sort_entry *entry; |
| 255 | int taken; | 425 | int taken; |
| 256 | }; | 426 | }; |
| 257 | 427 | ||
| 428 | #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } | ||
| 429 | |||
| 258 | static struct sort_dimension sort_dimensions[] = { | 430 | static struct sort_dimension sort_dimensions[] = { |
| 259 | { .name = "pid", .entry = &sort_thread, }, | 431 | DIM(SORT_PID, "pid", sort_thread), |
| 260 | { .name = "comm", .entry = &sort_comm, }, | 432 | DIM(SORT_COMM, "comm", sort_comm), |
| 261 | { .name = "dso", .entry = &sort_dso, }, | 433 | DIM(SORT_DSO, "dso", sort_dso), |
| 262 | { .name = "symbol", .entry = &sort_sym, }, | 434 | DIM(SORT_DSO_FROM, "dso_from", sort_dso_from), |
| 263 | { .name = "parent", .entry = &sort_parent, }, | 435 | DIM(SORT_DSO_TO, "dso_to", sort_dso_to), |
| 264 | { .name = "cpu", .entry = &sort_cpu, }, | 436 | DIM(SORT_SYM, "symbol", sort_sym), |
| 437 | DIM(SORT_SYM_TO, "symbol_from", sort_sym_from), | ||
| 438 | DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to), | ||
| 439 | DIM(SORT_PARENT, "parent", sort_parent), | ||
| 440 | DIM(SORT_CPU, "cpu", sort_cpu), | ||
| 441 | DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), | ||
| 265 | }; | 442 | }; |
| 266 | 443 | ||
| 267 | int sort_dimension__add(const char *tok) | 444 | int sort_dimension__add(const char *tok) |
| @@ -273,7 +450,6 @@ int sort_dimension__add(const char *tok) | |||
| 273 | 450 | ||
| 274 | if (strncasecmp(tok, sd->name, strlen(tok))) | 451 | if (strncasecmp(tok, sd->name, strlen(tok))) |
| 275 | continue; | 452 | continue; |
| 276 | |||
| 277 | if (sd->entry == &sort_parent) { | 453 | if (sd->entry == &sort_parent) { |
| 278 | int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); | 454 | int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); |
| 279 | if (ret) { | 455 | if (ret) { |
| @@ -305,6 +481,16 @@ int sort_dimension__add(const char *tok) | |||
| 305 | sort__first_dimension = SORT_PARENT; | 481 | sort__first_dimension = SORT_PARENT; |
| 306 | else if (!strcmp(sd->name, "cpu")) | 482 | else if (!strcmp(sd->name, "cpu")) |
| 307 | sort__first_dimension = SORT_CPU; | 483 | sort__first_dimension = SORT_CPU; |
| 484 | else if (!strcmp(sd->name, "symbol_from")) | ||
| 485 | sort__first_dimension = SORT_SYM_FROM; | ||
| 486 | else if (!strcmp(sd->name, "symbol_to")) | ||
| 487 | sort__first_dimension = SORT_SYM_TO; | ||
| 488 | else if (!strcmp(sd->name, "dso_from")) | ||
| 489 | sort__first_dimension = SORT_DSO_FROM; | ||
| 490 | else if (!strcmp(sd->name, "dso_to")) | ||
| 491 | sort__first_dimension = SORT_DSO_TO; | ||
| 492 | else if (!strcmp(sd->name, "mispredict")) | ||
| 493 | sort__first_dimension = SORT_MISPREDICT; | ||
| 308 | } | 494 | } |
| 309 | 495 | ||
| 310 | list_add_tail(&sd->entry->list, &hist_entry__sort_list); | 496 | list_add_tail(&sd->entry->list, &hist_entry__sort_list); |
| @@ -312,7 +498,6 @@ int sort_dimension__add(const char *tok) | |||
| 312 | 498 | ||
| 313 | return 0; | 499 | return 0; |
| 314 | } | 500 | } |
| 315 | |||
| 316 | return -ESRCH; | 501 | return -ESRCH; |
| 317 | } | 502 | } |
| 318 | 503 | ||
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 3f67ae395752..472aa5a63a58 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h | |||
| @@ -31,11 +31,16 @@ extern const char *parent_pattern; | |||
| 31 | extern const char default_sort_order[]; | 31 | extern const char default_sort_order[]; |
| 32 | extern int sort__need_collapse; | 32 | extern int sort__need_collapse; |
| 33 | extern int sort__has_parent; | 33 | extern int sort__has_parent; |
| 34 | extern int sort__branch_mode; | ||
| 34 | extern char *field_sep; | 35 | extern char *field_sep; |
| 35 | extern struct sort_entry sort_comm; | 36 | extern struct sort_entry sort_comm; |
| 36 | extern struct sort_entry sort_dso; | 37 | extern struct sort_entry sort_dso; |
| 37 | extern struct sort_entry sort_sym; | 38 | extern struct sort_entry sort_sym; |
| 38 | extern struct sort_entry sort_parent; | 39 | extern struct sort_entry sort_parent; |
| 40 | extern struct sort_entry sort_dso_from; | ||
| 41 | extern struct sort_entry sort_dso_to; | ||
| 42 | extern struct sort_entry sort_sym_from; | ||
| 43 | extern struct sort_entry sort_sym_to; | ||
| 39 | extern enum sort_type sort__first_dimension; | 44 | extern enum sort_type sort__first_dimension; |
| 40 | 45 | ||
| 41 | /** | 46 | /** |
| @@ -72,6 +77,7 @@ struct hist_entry { | |||
| 72 | struct hist_entry *pair; | 77 | struct hist_entry *pair; |
| 73 | struct rb_root sorted_chain; | 78 | struct rb_root sorted_chain; |
| 74 | }; | 79 | }; |
| 80 | struct branch_info *branch_info; | ||
| 75 | struct callchain_root callchain[0]; | 81 | struct callchain_root callchain[0]; |
| 76 | }; | 82 | }; |
| 77 | 83 | ||
| @@ -82,6 +88,11 @@ enum sort_type { | |||
| 82 | SORT_SYM, | 88 | SORT_SYM, |
| 83 | SORT_PARENT, | 89 | SORT_PARENT, |
| 84 | SORT_CPU, | 90 | SORT_CPU, |
| 91 | SORT_DSO_FROM, | ||
| 92 | SORT_DSO_TO, | ||
| 93 | SORT_SYM_FROM, | ||
| 94 | SORT_SYM_TO, | ||
| 95 | SORT_MISPREDICT, | ||
| 85 | }; | 96 | }; |
| 86 | 97 | ||
| 87 | /* | 98 | /* |
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0975438c3e72..5dd83c3e2c0c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
| @@ -1,4 +1,3 @@ | |||
| 1 | #include <ctype.h> | ||
| 2 | #include <dirent.h> | 1 | #include <dirent.h> |
| 3 | #include <errno.h> | 2 | #include <errno.h> |
| 4 | #include <libgen.h> | 3 | #include <libgen.h> |
| @@ -12,6 +11,7 @@ | |||
| 12 | #include <unistd.h> | 11 | #include <unistd.h> |
| 13 | #include <inttypes.h> | 12 | #include <inttypes.h> |
| 14 | #include "build-id.h" | 13 | #include "build-id.h" |
| 14 | #include "util.h" | ||
| 15 | #include "debug.h" | 15 | #include "debug.h" |
| 16 | #include "symbol.h" | 16 | #include "symbol.h" |
| 17 | #include "strlist.h" | 17 | #include "strlist.h" |
| @@ -263,6 +263,28 @@ static size_t symbol__fprintf(struct symbol *sym, FILE *fp) | |||
| 263 | sym->name); | 263 | sym->name); |
| 264 | } | 264 | } |
| 265 | 265 | ||
| 266 | size_t symbol__fprintf_symname_offs(const struct symbol *sym, | ||
| 267 | const struct addr_location *al, FILE *fp) | ||
| 268 | { | ||
| 269 | unsigned long offset; | ||
| 270 | size_t length; | ||
| 271 | |||
| 272 | if (sym && sym->name) { | ||
| 273 | length = fprintf(fp, "%s", sym->name); | ||
| 274 | if (al) { | ||
| 275 | offset = al->addr - sym->start; | ||
| 276 | length += fprintf(fp, "+0x%lx", offset); | ||
| 277 | } | ||
| 278 | return length; | ||
| 279 | } else | ||
| 280 | return fprintf(fp, "[unknown]"); | ||
| 281 | } | ||
| 282 | |||
| 283 | size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) | ||
| 284 | { | ||
| 285 | return symbol__fprintf_symname_offs(sym, NULL, fp); | ||
| 286 | } | ||
| 287 | |||
| 266 | void dso__set_long_name(struct dso *dso, char *name) | 288 | void dso__set_long_name(struct dso *dso, char *name) |
| 267 | { | 289 | { |
| 268 | if (name == NULL) | 290 | if (name == NULL) |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 123c2e14353e..ac49ef208a5f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <stdbool.h> | 5 | #include <stdbool.h> |
| 6 | #include <stdint.h> | 6 | #include <stdint.h> |
| 7 | #include "map.h" | 7 | #include "map.h" |
| 8 | #include "../perf.h" | ||
| 8 | #include <linux/list.h> | 9 | #include <linux/list.h> |
| 9 | #include <linux/rbtree.h> | 10 | #include <linux/rbtree.h> |
| 10 | #include <stdio.h> | 11 | #include <stdio.h> |
| @@ -70,6 +71,7 @@ struct symbol_conf { | |||
| 70 | unsigned short priv_size; | 71 | unsigned short priv_size; |
| 71 | unsigned short nr_events; | 72 | unsigned short nr_events; |
| 72 | bool try_vmlinux_path, | 73 | bool try_vmlinux_path, |
| 74 | show_kernel_path, | ||
| 73 | use_modules, | 75 | use_modules, |
| 74 | sort_by_name, | 76 | sort_by_name, |
| 75 | show_nr_samples, | 77 | show_nr_samples, |
| @@ -95,7 +97,11 @@ struct symbol_conf { | |||
| 95 | *col_width_list_str; | 97 | *col_width_list_str; |
| 96 | struct strlist *dso_list, | 98 | struct strlist *dso_list, |
| 97 | *comm_list, | 99 | *comm_list, |
| 98 | *sym_list; | 100 | *sym_list, |
| 101 | *dso_from_list, | ||
| 102 | *dso_to_list, | ||
| 103 | *sym_from_list, | ||
| 104 | *sym_to_list; | ||
| 99 | const char *symfs; | 105 | const char *symfs; |
| 100 | }; | 106 | }; |
| 101 | 107 | ||
| @@ -119,6 +125,19 @@ struct map_symbol { | |||
| 119 | bool has_children; | 125 | bool has_children; |
| 120 | }; | 126 | }; |
| 121 | 127 | ||
| 128 | struct addr_map_symbol { | ||
| 129 | struct map *map; | ||
| 130 | struct symbol *sym; | ||
| 131 | u64 addr; | ||
| 132 | u64 al_addr; | ||
| 133 | }; | ||
| 134 | |||
| 135 | struct branch_info { | ||
| 136 | struct addr_map_symbol from; | ||
| 137 | struct addr_map_symbol to; | ||
| 138 | struct branch_flags flags; | ||
| 139 | }; | ||
| 140 | |||
| 122 | struct addr_location { | 141 | struct addr_location { |
| 123 | struct thread *thread; | 142 | struct thread *thread; |
| 124 | struct map *map; | 143 | struct map *map; |
| @@ -241,6 +260,9 @@ void machines__destroy_guest_kernel_maps(struct rb_root *machines); | |||
| 241 | 260 | ||
| 242 | int symbol__init(void); | 261 | int symbol__init(void); |
| 243 | void symbol__exit(void); | 262 | void symbol__exit(void); |
| 263 | size_t symbol__fprintf_symname_offs(const struct symbol *sym, | ||
| 264 | const struct addr_location *al, FILE *fp); | ||
| 265 | size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); | ||
| 244 | bool symbol_type__is_a(char symbol_type, enum map_type map_type); | 266 | bool symbol_type__is_a(char symbol_type, enum map_type map_type); |
| 245 | 267 | ||
| 246 | size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); | 268 | size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); |
diff --git a/tools/perf/util/sysfs.c b/tools/perf/util/sysfs.c new file mode 100644 index 000000000000..48c6902e749f --- /dev/null +++ b/tools/perf/util/sysfs.c | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | |||
| 2 | #include "util.h" | ||
| 3 | #include "sysfs.h" | ||
| 4 | |||
| 5 | static const char * const sysfs_known_mountpoints[] = { | ||
| 6 | "/sys", | ||
| 7 | 0, | ||
| 8 | }; | ||
| 9 | |||
| 10 | static int sysfs_found; | ||
| 11 | char sysfs_mountpoint[PATH_MAX]; | ||
| 12 | |||
| 13 | static int sysfs_valid_mountpoint(const char *sysfs) | ||
| 14 | { | ||
| 15 | struct statfs st_fs; | ||
| 16 | |||
| 17 | if (statfs(sysfs, &st_fs) < 0) | ||
| 18 | return -ENOENT; | ||
| 19 | else if (st_fs.f_type != (long) SYSFS_MAGIC) | ||
| 20 | return -ENOENT; | ||
| 21 | |||
| 22 | return 0; | ||
| 23 | } | ||
| 24 | |||
| 25 | const char *sysfs_find_mountpoint(void) | ||
| 26 | { | ||
| 27 | const char * const *ptr; | ||
| 28 | char type[100]; | ||
| 29 | FILE *fp; | ||
| 30 | |||
| 31 | if (sysfs_found) | ||
| 32 | return (const char *) sysfs_mountpoint; | ||
| 33 | |||
| 34 | ptr = sysfs_known_mountpoints; | ||
| 35 | while (*ptr) { | ||
| 36 | if (sysfs_valid_mountpoint(*ptr) == 0) { | ||
| 37 | sysfs_found = 1; | ||
| 38 | strcpy(sysfs_mountpoint, *ptr); | ||
| 39 | return sysfs_mountpoint; | ||
| 40 | } | ||
| 41 | ptr++; | ||
| 42 | } | ||
| 43 | |||
| 44 | /* give up and parse /proc/mounts */ | ||
| 45 | fp = fopen("/proc/mounts", "r"); | ||
| 46 | if (fp == NULL) | ||
| 47 | return NULL; | ||
| 48 | |||
| 49 | while (!sysfs_found && | ||
| 50 | fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", | ||
| 51 | sysfs_mountpoint, type) == 2) { | ||
| 52 | |||
| 53 | if (strcmp(type, "sysfs") == 0) | ||
| 54 | sysfs_found = 1; | ||
| 55 | } | ||
| 56 | |||
| 57 | fclose(fp); | ||
| 58 | |||
| 59 | return sysfs_found ? sysfs_mountpoint : NULL; | ||
| 60 | } | ||
diff --git a/tools/perf/util/sysfs.h b/tools/perf/util/sysfs.h new file mode 100644 index 000000000000..a813b7203938 --- /dev/null +++ b/tools/perf/util/sysfs.h | |||
| @@ -0,0 +1,6 @@ | |||
| 1 | #ifndef __SYSFS_H__ | ||
| 2 | #define __SYSFS_H__ | ||
| 3 | |||
| 4 | const char *sysfs_find_mountpoint(void); | ||
| 5 | |||
| 6 | #endif /* __DEBUGFS_H__ */ | ||
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index a5df131b77c3..84d9bd782004 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c | |||
| @@ -1,6 +1,13 @@ | |||
| 1 | #include <dirent.h> | 1 | #include <dirent.h> |
| 2 | #include <limits.h> | ||
| 3 | #include <stdbool.h> | ||
| 2 | #include <stdlib.h> | 4 | #include <stdlib.h> |
| 3 | #include <stdio.h> | 5 | #include <stdio.h> |
| 6 | #include <sys/types.h> | ||
| 7 | #include <sys/stat.h> | ||
| 8 | #include <unistd.h> | ||
| 9 | #include "strlist.h" | ||
| 10 | #include <string.h> | ||
| 4 | #include "thread_map.h" | 11 | #include "thread_map.h" |
| 5 | 12 | ||
| 6 | /* Skip "." and ".." directories */ | 13 | /* Skip "." and ".." directories */ |
| @@ -23,7 +30,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) | |||
| 23 | sprintf(name, "/proc/%d/task", pid); | 30 | sprintf(name, "/proc/%d/task", pid); |
| 24 | items = scandir(name, &namelist, filter, NULL); | 31 | items = scandir(name, &namelist, filter, NULL); |
| 25 | if (items <= 0) | 32 | if (items <= 0) |
| 26 | return NULL; | 33 | return NULL; |
| 27 | 34 | ||
| 28 | threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); | 35 | threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); |
| 29 | if (threads != NULL) { | 36 | if (threads != NULL) { |
| @@ -51,14 +58,240 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) | |||
| 51 | return threads; | 58 | return threads; |
| 52 | } | 59 | } |
| 53 | 60 | ||
| 54 | struct thread_map *thread_map__new(pid_t pid, pid_t tid) | 61 | struct thread_map *thread_map__new_by_uid(uid_t uid) |
| 62 | { | ||
| 63 | DIR *proc; | ||
| 64 | int max_threads = 32, items, i; | ||
| 65 | char path[256]; | ||
| 66 | struct dirent dirent, *next, **namelist = NULL; | ||
| 67 | struct thread_map *threads = malloc(sizeof(*threads) + | ||
| 68 | max_threads * sizeof(pid_t)); | ||
| 69 | if (threads == NULL) | ||
| 70 | goto out; | ||
| 71 | |||
| 72 | proc = opendir("/proc"); | ||
| 73 | if (proc == NULL) | ||
| 74 | goto out_free_threads; | ||
| 75 | |||
| 76 | threads->nr = 0; | ||
| 77 | |||
| 78 | while (!readdir_r(proc, &dirent, &next) && next) { | ||
| 79 | char *end; | ||
| 80 | bool grow = false; | ||
| 81 | struct stat st; | ||
| 82 | pid_t pid = strtol(dirent.d_name, &end, 10); | ||
| 83 | |||
| 84 | if (*end) /* only interested in proper numerical dirents */ | ||
| 85 | continue; | ||
| 86 | |||
| 87 | snprintf(path, sizeof(path), "/proc/%s", dirent.d_name); | ||
| 88 | |||
| 89 | if (stat(path, &st) != 0) | ||
| 90 | continue; | ||
| 91 | |||
| 92 | if (st.st_uid != uid) | ||
| 93 | continue; | ||
| 94 | |||
| 95 | snprintf(path, sizeof(path), "/proc/%d/task", pid); | ||
| 96 | items = scandir(path, &namelist, filter, NULL); | ||
| 97 | if (items <= 0) | ||
| 98 | goto out_free_closedir; | ||
| 99 | |||
| 100 | while (threads->nr + items >= max_threads) { | ||
| 101 | max_threads *= 2; | ||
| 102 | grow = true; | ||
| 103 | } | ||
| 104 | |||
| 105 | if (grow) { | ||
| 106 | struct thread_map *tmp; | ||
| 107 | |||
| 108 | tmp = realloc(threads, (sizeof(*threads) + | ||
| 109 | max_threads * sizeof(pid_t))); | ||
| 110 | if (tmp == NULL) | ||
| 111 | goto out_free_namelist; | ||
| 112 | |||
| 113 | threads = tmp; | ||
| 114 | } | ||
| 115 | |||
| 116 | for (i = 0; i < items; i++) | ||
| 117 | threads->map[threads->nr + i] = atoi(namelist[i]->d_name); | ||
| 118 | |||
| 119 | for (i = 0; i < items; i++) | ||
| 120 | free(namelist[i]); | ||
| 121 | free(namelist); | ||
| 122 | |||
| 123 | threads->nr += items; | ||
| 124 | } | ||
| 125 | |||
| 126 | out_closedir: | ||
| 127 | closedir(proc); | ||
| 128 | out: | ||
| 129 | return threads; | ||
| 130 | |||
| 131 | out_free_threads: | ||
| 132 | free(threads); | ||
| 133 | return NULL; | ||
| 134 | |||
| 135 | out_free_namelist: | ||
| 136 | for (i = 0; i < items; i++) | ||
| 137 | free(namelist[i]); | ||
| 138 | free(namelist); | ||
| 139 | |||
| 140 | out_free_closedir: | ||
| 141 | free(threads); | ||
| 142 | threads = NULL; | ||
| 143 | goto out_closedir; | ||
| 144 | } | ||
| 145 | |||
| 146 | struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) | ||
| 55 | { | 147 | { |
| 56 | if (pid != -1) | 148 | if (pid != -1) |
| 57 | return thread_map__new_by_pid(pid); | 149 | return thread_map__new_by_pid(pid); |
| 150 | |||
| 151 | if (tid == -1 && uid != UINT_MAX) | ||
| 152 | return thread_map__new_by_uid(uid); | ||
| 153 | |||
| 58 | return thread_map__new_by_tid(tid); | 154 | return thread_map__new_by_tid(tid); |
| 59 | } | 155 | } |
| 60 | 156 | ||
| 157 | static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) | ||
| 158 | { | ||
| 159 | struct thread_map *threads = NULL, *nt; | ||
| 160 | char name[256]; | ||
| 161 | int items, total_tasks = 0; | ||
| 162 | struct dirent **namelist = NULL; | ||
| 163 | int i, j = 0; | ||
| 164 | pid_t pid, prev_pid = INT_MAX; | ||
| 165 | char *end_ptr; | ||
| 166 | struct str_node *pos; | ||
| 167 | struct strlist *slist = strlist__new(false, pid_str); | ||
| 168 | |||
| 169 | if (!slist) | ||
| 170 | return NULL; | ||
| 171 | |||
| 172 | strlist__for_each(pos, slist) { | ||
| 173 | pid = strtol(pos->s, &end_ptr, 10); | ||
| 174 | |||
| 175 | if (pid == INT_MIN || pid == INT_MAX || | ||
| 176 | (*end_ptr != '\0' && *end_ptr != ',')) | ||
| 177 | goto out_free_threads; | ||
| 178 | |||
| 179 | if (pid == prev_pid) | ||
| 180 | continue; | ||
| 181 | |||
| 182 | sprintf(name, "/proc/%d/task", pid); | ||
| 183 | items = scandir(name, &namelist, filter, NULL); | ||
| 184 | if (items <= 0) | ||
| 185 | goto out_free_threads; | ||
| 186 | |||
| 187 | total_tasks += items; | ||
| 188 | nt = realloc(threads, (sizeof(*threads) + | ||
| 189 | sizeof(pid_t) * total_tasks)); | ||
| 190 | if (nt == NULL) | ||
| 191 | goto out_free_threads; | ||
| 192 | |||
| 193 | threads = nt; | ||
| 194 | |||
| 195 | if (threads) { | ||
| 196 | for (i = 0; i < items; i++) | ||
| 197 | threads->map[j++] = atoi(namelist[i]->d_name); | ||
| 198 | threads->nr = total_tasks; | ||
| 199 | } | ||
| 200 | |||
| 201 | for (i = 0; i < items; i++) | ||
| 202 | free(namelist[i]); | ||
| 203 | free(namelist); | ||
| 204 | |||
| 205 | if (!threads) | ||
| 206 | break; | ||
| 207 | } | ||
| 208 | |||
| 209 | out: | ||
| 210 | strlist__delete(slist); | ||
| 211 | return threads; | ||
| 212 | |||
| 213 | out_free_threads: | ||
| 214 | free(threads); | ||
| 215 | threads = NULL; | ||
| 216 | goto out; | ||
| 217 | } | ||
| 218 | |||
| 219 | static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) | ||
| 220 | { | ||
| 221 | struct thread_map *threads = NULL, *nt; | ||
| 222 | int ntasks = 0; | ||
| 223 | pid_t tid, prev_tid = INT_MAX; | ||
| 224 | char *end_ptr; | ||
| 225 | struct str_node *pos; | ||
| 226 | struct strlist *slist; | ||
| 227 | |||
| 228 | /* perf-stat expects threads to be generated even if tid not given */ | ||
| 229 | if (!tid_str) { | ||
| 230 | threads = malloc(sizeof(*threads) + sizeof(pid_t)); | ||
| 231 | if (threads != NULL) { | ||
| 232 | threads->map[0] = -1; | ||
| 233 | threads->nr = 1; | ||
| 234 | } | ||
| 235 | return threads; | ||
| 236 | } | ||
| 237 | |||
| 238 | slist = strlist__new(false, tid_str); | ||
| 239 | if (!slist) | ||
| 240 | return NULL; | ||
| 241 | |||
| 242 | strlist__for_each(pos, slist) { | ||
| 243 | tid = strtol(pos->s, &end_ptr, 10); | ||
| 244 | |||
| 245 | if (tid == INT_MIN || tid == INT_MAX || | ||
| 246 | (*end_ptr != '\0' && *end_ptr != ',')) | ||
| 247 | goto out_free_threads; | ||
| 248 | |||
| 249 | if (tid == prev_tid) | ||
| 250 | continue; | ||
| 251 | |||
| 252 | ntasks++; | ||
| 253 | nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks); | ||
| 254 | |||
| 255 | if (nt == NULL) | ||
| 256 | goto out_free_threads; | ||
| 257 | |||
| 258 | threads = nt; | ||
| 259 | threads->map[ntasks - 1] = tid; | ||
| 260 | threads->nr = ntasks; | ||
| 261 | } | ||
| 262 | out: | ||
| 263 | return threads; | ||
| 264 | |||
| 265 | out_free_threads: | ||
| 266 | free(threads); | ||
| 267 | threads = NULL; | ||
| 268 | goto out; | ||
| 269 | } | ||
| 270 | |||
| 271 | struct thread_map *thread_map__new_str(const char *pid, const char *tid, | ||
| 272 | uid_t uid) | ||
| 273 | { | ||
| 274 | if (pid) | ||
| 275 | return thread_map__new_by_pid_str(pid); | ||
| 276 | |||
| 277 | if (!tid && uid != UINT_MAX) | ||
| 278 | return thread_map__new_by_uid(uid); | ||
| 279 | |||
| 280 | return thread_map__new_by_tid_str(tid); | ||
| 281 | } | ||
| 282 | |||
| 61 | void thread_map__delete(struct thread_map *threads) | 283 | void thread_map__delete(struct thread_map *threads) |
| 62 | { | 284 | { |
| 63 | free(threads); | 285 | free(threads); |
| 64 | } | 286 | } |
| 287 | |||
| 288 | size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) | ||
| 289 | { | ||
| 290 | int i; | ||
| 291 | size_t printed = fprintf(fp, "%d thread%s: ", | ||
| 292 | threads->nr, threads->nr > 1 ? "s" : ""); | ||
| 293 | for (i = 0; i < threads->nr; ++i) | ||
| 294 | printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]); | ||
| 295 | |||
| 296 | return printed + fprintf(fp, "\n"); | ||
| 297 | } | ||
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 3cb907311409..7da80f14418b 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | #define __PERF_THREAD_MAP_H | 2 | #define __PERF_THREAD_MAP_H |
| 3 | 3 | ||
| 4 | #include <sys/types.h> | 4 | #include <sys/types.h> |
| 5 | #include <stdio.h> | ||
| 5 | 6 | ||
| 6 | struct thread_map { | 7 | struct thread_map { |
| 7 | int nr; | 8 | int nr; |
| @@ -10,6 +11,14 @@ struct thread_map { | |||
| 10 | 11 | ||
| 11 | struct thread_map *thread_map__new_by_pid(pid_t pid); | 12 | struct thread_map *thread_map__new_by_pid(pid_t pid); |
| 12 | struct thread_map *thread_map__new_by_tid(pid_t tid); | 13 | struct thread_map *thread_map__new_by_tid(pid_t tid); |
| 13 | struct thread_map *thread_map__new(pid_t pid, pid_t tid); | 14 | struct thread_map *thread_map__new_by_uid(uid_t uid); |
| 15 | struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); | ||
| 16 | |||
| 17 | struct thread_map *thread_map__new_str(const char *pid, | ||
| 18 | const char *tid, uid_t uid); | ||
| 19 | |||
| 14 | void thread_map__delete(struct thread_map *threads); | 20 | void thread_map__delete(struct thread_map *threads); |
| 21 | |||
| 22 | size_t thread_map__fprintf(struct thread_map *threads, FILE *fp); | ||
| 23 | |||
| 15 | #endif /* __PERF_THREAD_MAP_H */ | 24 | #endif /* __PERF_THREAD_MAP_H */ |
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 500471dffa4f..09fe579ccafb 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c | |||
| @@ -69,12 +69,15 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) | |||
| 69 | 69 | ||
| 70 | ret += SNPRINTF(bf + ret, size - ret, "], "); | 70 | ret += SNPRINTF(bf + ret, size - ret, "], "); |
| 71 | 71 | ||
| 72 | if (top->target_pid != -1) | 72 | if (top->target_pid) |
| 73 | ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d", | 73 | ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s", |
| 74 | top->target_pid); | 74 | top->target_pid); |
| 75 | else if (top->target_tid != -1) | 75 | else if (top->target_tid) |
| 76 | ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d", | 76 | ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s", |
| 77 | top->target_tid); | 77 | top->target_tid); |
| 78 | else if (top->uid_str != NULL) | ||
| 79 | ret += SNPRINTF(bf + ret, size - ret, " (uid: %s", | ||
| 80 | top->uid_str); | ||
| 78 | else | 81 | else |
| 79 | ret += SNPRINTF(bf + ret, size - ret, " (all"); | 82 | ret += SNPRINTF(bf + ret, size - ret, " (all"); |
| 80 | 83 | ||
| @@ -82,7 +85,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) | |||
| 82 | ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", | 85 | ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", |
| 83 | top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list); | 86 | top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list); |
| 84 | else { | 87 | else { |
| 85 | if (top->target_tid != -1) | 88 | if (top->target_tid) |
| 86 | ret += SNPRINTF(bf + ret, size - ret, ")"); | 89 | ret += SNPRINTF(bf + ret, size - ret, ")"); |
| 87 | else | 90 | else |
| 88 | ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", | 91 | ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", |
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index f2eab81435ae..ce61cb2d1acf 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h | |||
| @@ -23,7 +23,8 @@ struct perf_top { | |||
| 23 | u64 guest_us_samples, guest_kernel_samples; | 23 | u64 guest_us_samples, guest_kernel_samples; |
| 24 | int print_entries, count_filter, delay_secs; | 24 | int print_entries, count_filter, delay_secs; |
| 25 | int freq; | 25 | int freq; |
| 26 | pid_t target_pid, target_tid; | 26 | const char *target_pid, *target_tid; |
| 27 | uid_t uid; | ||
| 27 | bool hide_kernel_symbols, hide_user_symbols, zero; | 28 | bool hide_kernel_symbols, hide_user_symbols, zero; |
| 28 | bool system_wide; | 29 | bool system_wide; |
| 29 | bool use_tui, use_stdio; | 30 | bool use_tui, use_stdio; |
| @@ -33,7 +34,7 @@ struct perf_top { | |||
| 33 | bool vmlinux_warned; | 34 | bool vmlinux_warned; |
| 34 | bool inherit; | 35 | bool inherit; |
| 35 | bool group; | 36 | bool group; |
| 36 | bool sample_id_all_avail; | 37 | bool sample_id_all_missing; |
| 37 | bool exclude_guest_missing; | 38 | bool exclude_guest_missing; |
| 38 | bool dump_symtab; | 39 | bool dump_symtab; |
| 39 | const char *cpu_list; | 40 | const char *cpu_list; |
| @@ -46,6 +47,7 @@ struct perf_top { | |||
| 46 | int realtime_prio; | 47 | int realtime_prio; |
| 47 | int sym_pcnt_filter; | 48 | int sym_pcnt_filter; |
| 48 | const char *sym_filter; | 49 | const char *sym_filter; |
| 50 | const char *uid_str; | ||
| 49 | }; | 51 | }; |
| 50 | 52 | ||
| 51 | size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); | 53 | size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); |
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 1a8d4dc4f386..a4088ced1e64 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c | |||
| @@ -25,7 +25,6 @@ | |||
| 25 | #include <stdio.h> | 25 | #include <stdio.h> |
| 26 | #include <stdlib.h> | 26 | #include <stdlib.h> |
| 27 | #include <string.h> | 27 | #include <string.h> |
| 28 | #include <ctype.h> | ||
| 29 | #include <errno.h> | 28 | #include <errno.h> |
| 30 | 29 | ||
| 31 | #include "../perf.h" | 30 | #include "../perf.h" |
| @@ -1424,6 +1423,11 @@ static long long arg_num_eval(struct print_arg *arg) | |||
| 1424 | die("unknown op '%s'", arg->op.op); | 1423 | die("unknown op '%s'", arg->op.op); |
| 1425 | } | 1424 | } |
| 1426 | break; | 1425 | break; |
| 1426 | case '+': | ||
| 1427 | left = arg_num_eval(arg->op.left); | ||
| 1428 | right = arg_num_eval(arg->op.right); | ||
| 1429 | val = left + right; | ||
| 1430 | break; | ||
| 1427 | default: | 1431 | default: |
| 1428 | die("unknown op '%s'", arg->op.op); | 1432 | die("unknown op '%s'", arg->op.op); |
| 1429 | } | 1433 | } |
| @@ -1484,6 +1488,13 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok) | |||
| 1484 | 1488 | ||
| 1485 | free_token(token); | 1489 | free_token(token); |
| 1486 | type = process_arg(event, arg, &token); | 1490 | type = process_arg(event, arg, &token); |
| 1491 | |||
| 1492 | if (type == EVENT_OP) | ||
| 1493 | type = process_op(event, arg, &token); | ||
| 1494 | |||
| 1495 | if (type == EVENT_ERROR) | ||
| 1496 | goto out_free; | ||
| 1497 | |||
| 1487 | if (test_type_token(type, token, EVENT_DELIM, ",")) | 1498 | if (test_type_token(type, token, EVENT_DELIM, ",")) |
| 1488 | goto out_free; | 1499 | goto out_free; |
| 1489 | 1500 | ||
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index f55cc3a765a1..b9592e0de8d7 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c | |||
| @@ -33,7 +33,6 @@ | |||
| 33 | #include <pthread.h> | 33 | #include <pthread.h> |
| 34 | #include <fcntl.h> | 34 | #include <fcntl.h> |
| 35 | #include <unistd.h> | 35 | #include <unistd.h> |
| 36 | #include <ctype.h> | ||
| 37 | #include <errno.h> | 36 | #include <errno.h> |
| 38 | 37 | ||
| 39 | #include "../perf.h" | 38 | #include "../perf.h" |
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index a3fdf55f317b..18ae6c1831d3 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c | |||
| @@ -22,7 +22,6 @@ | |||
| 22 | #include <stdio.h> | 22 | #include <stdio.h> |
| 23 | #include <stdlib.h> | 23 | #include <stdlib.h> |
| 24 | #include <string.h> | 24 | #include <string.h> |
| 25 | #include <ctype.h> | ||
| 26 | #include <errno.h> | 25 | #include <errno.h> |
| 27 | 26 | ||
| 28 | #include "../perf.h" | 27 | #include "../perf.h" |
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index 295a9c93f945..57a4c6ef3fd2 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c | |||
| @@ -69,14 +69,17 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro | |||
| 69 | if (!self->navkeypressed) | 69 | if (!self->navkeypressed) |
| 70 | width += 1; | 70 | width += 1; |
| 71 | 71 | ||
| 72 | if (!ab->hide_src_code && ol->offset != -1) | ||
| 73 | if (!current_entry || (self->use_navkeypressed && | ||
| 74 | !self->navkeypressed)) | ||
| 75 | ui_browser__set_color(self, HE_COLORSET_CODE); | ||
| 76 | |||
| 72 | if (!*ol->line) | 77 | if (!*ol->line) |
| 73 | slsmg_write_nstring(" ", width - 18); | 78 | slsmg_write_nstring(" ", width - 18); |
| 74 | else | 79 | else |
| 75 | slsmg_write_nstring(ol->line, width - 18); | 80 | slsmg_write_nstring(ol->line, width - 18); |
| 76 | 81 | ||
| 77 | if (!current_entry) | 82 | if (current_entry) |
| 78 | ui_browser__set_color(self, HE_COLORSET_CODE); | ||
| 79 | else | ||
| 80 | ab->selection = ol; | 83 | ab->selection = ol; |
| 81 | } | 84 | } |
| 82 | 85 | ||
| @@ -230,9 +233,9 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx, | |||
| 230 | struct rb_node *nd = NULL; | 233 | struct rb_node *nd = NULL; |
| 231 | struct map_symbol *ms = self->b.priv; | 234 | struct map_symbol *ms = self->b.priv; |
| 232 | struct symbol *sym = ms->sym; | 235 | struct symbol *sym = ms->sym; |
| 233 | const char *help = "<-, ESC: exit, TAB/shift+TAB: cycle hottest lines, " | 236 | const char *help = "<-/ESC: Exit, TAB/shift+TAB: Cycle hot lines, " |
| 234 | "H: Hottest, -> Line action, S -> Toggle source " | 237 | "H: Go to hottest line, ->/ENTER: Line action, " |
| 235 | "code view"; | 238 | "S: Toggle source code view"; |
| 236 | int key; | 239 | int key; |
| 237 | 240 | ||
| 238 | if (ui_browser__show(&self->b, sym->name, help) < 0) | 241 | if (ui_browser__show(&self->b, sym->name, help) < 0) |
| @@ -284,9 +287,11 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx, | |||
| 284 | nd = self->curr_hot; | 287 | nd = self->curr_hot; |
| 285 | break; | 288 | break; |
| 286 | case 'H': | 289 | case 'H': |
| 290 | case 'h': | ||
| 287 | nd = self->curr_hot; | 291 | nd = self->curr_hot; |
| 288 | break; | 292 | break; |
| 289 | case 'S': | 293 | case 'S': |
| 294 | case 's': | ||
| 290 | if (annotate_browser__toggle_source(self)) | 295 | if (annotate_browser__toggle_source(self)) |
| 291 | ui_helpline__puts(help); | 296 | ui_helpline__puts(help); |
| 292 | continue; | 297 | continue; |
| @@ -338,6 +343,7 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx, | |||
| 338 | pthread_mutex_unlock(¬es->lock); | 343 | pthread_mutex_unlock(¬es->lock); |
| 339 | symbol__tui_annotate(target, ms->map, evidx, | 344 | symbol__tui_annotate(target, ms->map, evidx, |
| 340 | timer, arg, delay_secs); | 345 | timer, arg, delay_secs); |
| 346 | ui_browser__show_title(&self->b, sym->name); | ||
| 341 | } | 347 | } |
| 342 | continue; | 348 | continue; |
| 343 | case K_LEFT: | 349 | case K_LEFT: |
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index bb9197c9c4a4..fa530fcc764a 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c | |||
| @@ -805,8 +805,11 @@ static struct hist_browser *hist_browser__new(struct hists *hists) | |||
| 805 | self->hists = hists; | 805 | self->hists = hists; |
| 806 | self->b.refresh = hist_browser__refresh; | 806 | self->b.refresh = hist_browser__refresh; |
| 807 | self->b.seek = ui_browser__hists_seek; | 807 | self->b.seek = ui_browser__hists_seek; |
| 808 | self->b.use_navkeypressed = true, | 808 | self->b.use_navkeypressed = true; |
| 809 | self->has_symbols = sort_sym.list.next != NULL; | 809 | if (sort__branch_mode == 1) |
| 810 | self->has_symbols = sort_sym_from.list.next != NULL; | ||
| 811 | else | ||
| 812 | self->has_symbols = sort_sym.list.next != NULL; | ||
| 810 | } | 813 | } |
| 811 | 814 | ||
| 812 | return self; | 815 | return self; |
| @@ -839,6 +842,9 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, | |||
| 839 | nr_events = convert_unit(nr_events, &unit); | 842 | nr_events = convert_unit(nr_events, &unit); |
| 840 | printed = scnprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name); | 843 | printed = scnprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name); |
| 841 | 844 | ||
| 845 | if (self->uid_filter_str) | ||
| 846 | printed += snprintf(bf + printed, size - printed, | ||
| 847 | ", UID: %s", self->uid_filter_str); | ||
| 842 | if (thread) | 848 | if (thread) |
| 843 | printed += scnprintf(bf + printed, size - printed, | 849 | printed += scnprintf(bf + printed, size - printed, |
| 844 | ", Thread: %s(%d)", | 850 | ", Thread: %s(%d)", |
| @@ -850,6 +856,16 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, | |||
| 850 | return printed; | 856 | return printed; |
| 851 | } | 857 | } |
| 852 | 858 | ||
| 859 | static inline void free_popup_options(char **options, int n) | ||
| 860 | { | ||
| 861 | int i; | ||
| 862 | |||
| 863 | for (i = 0; i < n; ++i) { | ||
| 864 | free(options[i]); | ||
| 865 | options[i] = NULL; | ||
| 866 | } | ||
| 867 | } | ||
| 868 | |||
| 853 | static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | 869 | static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, |
| 854 | const char *helpline, const char *ev_name, | 870 | const char *helpline, const char *ev_name, |
| 855 | bool left_exits, | 871 | bool left_exits, |
| @@ -858,7 +874,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 858 | { | 874 | { |
| 859 | struct hists *self = &evsel->hists; | 875 | struct hists *self = &evsel->hists; |
| 860 | struct hist_browser *browser = hist_browser__new(self); | 876 | struct hist_browser *browser = hist_browser__new(self); |
| 877 | struct branch_info *bi; | ||
| 861 | struct pstack *fstack; | 878 | struct pstack *fstack; |
| 879 | char *options[16]; | ||
| 880 | int nr_options = 0; | ||
| 862 | int key = -1; | 881 | int key = -1; |
| 863 | 882 | ||
| 864 | if (browser == NULL) | 883 | if (browser == NULL) |
| @@ -870,13 +889,16 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 870 | 889 | ||
| 871 | ui_helpline__push(helpline); | 890 | ui_helpline__push(helpline); |
| 872 | 891 | ||
| 892 | memset(options, 0, sizeof(options)); | ||
| 893 | |||
| 873 | while (1) { | 894 | while (1) { |
| 874 | const struct thread *thread = NULL; | 895 | const struct thread *thread = NULL; |
| 875 | const struct dso *dso = NULL; | 896 | const struct dso *dso = NULL; |
| 876 | char *options[16]; | 897 | int choice = 0, |
| 877 | int nr_options = 0, choice = 0, i, | ||
| 878 | annotate = -2, zoom_dso = -2, zoom_thread = -2, | 898 | annotate = -2, zoom_dso = -2, zoom_thread = -2, |
| 879 | browse_map = -2; | 899 | annotate_f = -2, annotate_t = -2, browse_map = -2; |
| 900 | |||
| 901 | nr_options = 0; | ||
| 880 | 902 | ||
| 881 | key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); | 903 | key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); |
| 882 | 904 | ||
| @@ -884,7 +906,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 884 | thread = hist_browser__selected_thread(browser); | 906 | thread = hist_browser__selected_thread(browser); |
| 885 | dso = browser->selection->map ? browser->selection->map->dso : NULL; | 907 | dso = browser->selection->map ? browser->selection->map->dso : NULL; |
| 886 | } | 908 | } |
| 887 | |||
| 888 | switch (key) { | 909 | switch (key) { |
| 889 | case K_TAB: | 910 | case K_TAB: |
| 890 | case K_UNTAB: | 911 | case K_UNTAB: |
| @@ -899,7 +920,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 899 | if (!browser->has_symbols) { | 920 | if (!browser->has_symbols) { |
| 900 | ui_browser__warning(&browser->b, delay_secs * 2, | 921 | ui_browser__warning(&browser->b, delay_secs * 2, |
| 901 | "Annotation is only available for symbolic views, " | 922 | "Annotation is only available for symbolic views, " |
| 902 | "include \"sym\" in --sort to use it."); | 923 | "include \"sym*\" in --sort to use it."); |
| 903 | continue; | 924 | continue; |
| 904 | } | 925 | } |
| 905 | 926 | ||
| @@ -969,12 +990,34 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 969 | if (!browser->has_symbols) | 990 | if (!browser->has_symbols) |
| 970 | goto add_exit_option; | 991 | goto add_exit_option; |
| 971 | 992 | ||
| 972 | if (browser->selection != NULL && | 993 | if (sort__branch_mode == 1) { |
| 973 | browser->selection->sym != NULL && | 994 | bi = browser->he_selection->branch_info; |
| 974 | !browser->selection->map->dso->annotate_warned && | 995 | if (browser->selection != NULL && |
| 975 | asprintf(&options[nr_options], "Annotate %s", | 996 | bi && |
| 976 | browser->selection->sym->name) > 0) | 997 | bi->from.sym != NULL && |
| 977 | annotate = nr_options++; | 998 | !bi->from.map->dso->annotate_warned && |
| 999 | asprintf(&options[nr_options], "Annotate %s", | ||
| 1000 | bi->from.sym->name) > 0) | ||
| 1001 | annotate_f = nr_options++; | ||
| 1002 | |||
| 1003 | if (browser->selection != NULL && | ||
| 1004 | bi && | ||
| 1005 | bi->to.sym != NULL && | ||
| 1006 | !bi->to.map->dso->annotate_warned && | ||
| 1007 | (bi->to.sym != bi->from.sym || | ||
| 1008 | bi->to.map->dso != bi->from.map->dso) && | ||
| 1009 | asprintf(&options[nr_options], "Annotate %s", | ||
| 1010 | bi->to.sym->name) > 0) | ||
| 1011 | annotate_t = nr_options++; | ||
| 1012 | } else { | ||
| 1013 | |||
| 1014 | if (browser->selection != NULL && | ||
| 1015 | browser->selection->sym != NULL && | ||
| 1016 | !browser->selection->map->dso->annotate_warned && | ||
| 1017 | asprintf(&options[nr_options], "Annotate %s", | ||
| 1018 | browser->selection->sym->name) > 0) | ||
| 1019 | annotate = nr_options++; | ||
| 1020 | } | ||
| 978 | 1021 | ||
| 979 | if (thread != NULL && | 1022 | if (thread != NULL && |
| 980 | asprintf(&options[nr_options], "Zoom %s %s(%d) thread", | 1023 | asprintf(&options[nr_options], "Zoom %s %s(%d) thread", |
| @@ -995,25 +1038,39 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 995 | browse_map = nr_options++; | 1038 | browse_map = nr_options++; |
| 996 | add_exit_option: | 1039 | add_exit_option: |
| 997 | options[nr_options++] = (char *)"Exit"; | 1040 | options[nr_options++] = (char *)"Exit"; |
| 998 | 1041 | retry_popup_menu: | |
| 999 | choice = ui__popup_menu(nr_options, options); | 1042 | choice = ui__popup_menu(nr_options, options); |
| 1000 | 1043 | ||
| 1001 | for (i = 0; i < nr_options - 1; ++i) | ||
| 1002 | free(options[i]); | ||
| 1003 | |||
| 1004 | if (choice == nr_options - 1) | 1044 | if (choice == nr_options - 1) |
| 1005 | break; | 1045 | break; |
| 1006 | 1046 | ||
| 1007 | if (choice == -1) | 1047 | if (choice == -1) { |
| 1048 | free_popup_options(options, nr_options - 1); | ||
| 1008 | continue; | 1049 | continue; |
| 1050 | } | ||
| 1009 | 1051 | ||
| 1010 | if (choice == annotate) { | 1052 | if (choice == annotate || choice == annotate_t || choice == annotate_f) { |
| 1011 | struct hist_entry *he; | 1053 | struct hist_entry *he; |
| 1012 | int err; | 1054 | int err; |
| 1013 | do_annotate: | 1055 | do_annotate: |
| 1014 | he = hist_browser__selected_entry(browser); | 1056 | he = hist_browser__selected_entry(browser); |
| 1015 | if (he == NULL) | 1057 | if (he == NULL) |
| 1016 | continue; | 1058 | continue; |
| 1059 | |||
| 1060 | /* | ||
| 1061 | * we stash the branch_info symbol + map into the | ||
| 1062 | * the ms so we don't have to rewrite all the annotation | ||
| 1063 | * code to use branch_info. | ||
| 1064 | * in branch mode, the ms struct is not used | ||
| 1065 | */ | ||
| 1066 | if (choice == annotate_f) { | ||
| 1067 | he->ms.sym = he->branch_info->from.sym; | ||
| 1068 | he->ms.map = he->branch_info->from.map; | ||
| 1069 | } else if (choice == annotate_t) { | ||
| 1070 | he->ms.sym = he->branch_info->to.sym; | ||
| 1071 | he->ms.map = he->branch_info->to.map; | ||
| 1072 | } | ||
| 1073 | |||
| 1017 | /* | 1074 | /* |
| 1018 | * Don't let this be freed, say, by hists__decay_entry. | 1075 | * Don't let this be freed, say, by hists__decay_entry. |
| 1019 | */ | 1076 | */ |
| @@ -1021,9 +1078,18 @@ do_annotate: | |||
| 1021 | err = hist_entry__tui_annotate(he, evsel->idx, | 1078 | err = hist_entry__tui_annotate(he, evsel->idx, |
| 1022 | timer, arg, delay_secs); | 1079 | timer, arg, delay_secs); |
| 1023 | he->used = false; | 1080 | he->used = false; |
| 1081 | /* | ||
| 1082 | * offer option to annotate the other branch source or target | ||
| 1083 | * (if they exists) when returning from annotate | ||
| 1084 | */ | ||
| 1085 | if ((err == 'q' || err == CTRL('c')) | ||
| 1086 | && annotate_t != -2 && annotate_f != -2) | ||
| 1087 | goto retry_popup_menu; | ||
| 1088 | |||
| 1024 | ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); | 1089 | ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); |
| 1025 | if (err) | 1090 | if (err) |
| 1026 | ui_browser__handle_resize(&browser->b); | 1091 | ui_browser__handle_resize(&browser->b); |
| 1092 | |||
| 1027 | } else if (choice == browse_map) | 1093 | } else if (choice == browse_map) |
| 1028 | map__browse(browser->selection->map); | 1094 | map__browse(browser->selection->map); |
| 1029 | else if (choice == zoom_dso) { | 1095 | else if (choice == zoom_dso) { |
| @@ -1069,6 +1135,7 @@ out_free_stack: | |||
| 1069 | pstack__delete(fstack); | 1135 | pstack__delete(fstack); |
| 1070 | out: | 1136 | out: |
| 1071 | hist_browser__delete(browser); | 1137 | hist_browser__delete(browser); |
| 1138 | free_popup_options(options, nr_options - 1); | ||
| 1072 | return key; | 1139 | return key; |
| 1073 | } | 1140 | } |
| 1074 | 1141 | ||
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c index 6905bcc8be2d..eca6575abfd0 100644 --- a/tools/perf/util/ui/browsers/map.c +++ b/tools/perf/util/ui/browsers/map.c | |||
| @@ -3,9 +3,9 @@ | |||
| 3 | #include <newt.h> | 3 | #include <newt.h> |
| 4 | #include <inttypes.h> | 4 | #include <inttypes.h> |
| 5 | #include <sys/ttydefaults.h> | 5 | #include <sys/ttydefaults.h> |
| 6 | #include <ctype.h> | ||
| 7 | #include <string.h> | 6 | #include <string.h> |
| 8 | #include <linux/bitops.h> | 7 | #include <linux/bitops.h> |
| 8 | #include "../../util.h" | ||
| 9 | #include "../../debug.h" | 9 | #include "../../debug.h" |
| 10 | #include "../../symbol.h" | 10 | #include "../../symbol.h" |
| 11 | #include "../browser.h" | 11 | #include "../browser.h" |
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index d76d1c0ff98f..52bb07c6442a 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | * Copyright (C) Linus Torvalds, 2005 | 7 | * Copyright (C) Linus Torvalds, 2005 |
| 8 | */ | 8 | */ |
| 9 | #include "util.h" | 9 | #include "util.h" |
| 10 | #include "debug.h" | ||
| 10 | 11 | ||
| 11 | static void report(const char *prefix, const char *err, va_list params) | 12 | static void report(const char *prefix, const char *err, va_list params) |
| 12 | { | 13 | { |
| @@ -81,3 +82,41 @@ void warning(const char *warn, ...) | |||
| 81 | warn_routine(warn, params); | 82 | warn_routine(warn, params); |
| 82 | va_end(params); | 83 | va_end(params); |
| 83 | } | 84 | } |
| 85 | |||
| 86 | uid_t parse_target_uid(const char *str, const char *tid, const char *pid) | ||
| 87 | { | ||
| 88 | struct passwd pwd, *result; | ||
| 89 | char buf[1024]; | ||
| 90 | |||
| 91 | if (str == NULL) | ||
| 92 | return UINT_MAX; | ||
| 93 | |||
| 94 | /* UID and PID are mutually exclusive */ | ||
| 95 | if (tid || pid) { | ||
| 96 | ui__warning("PID/TID switch overriding UID\n"); | ||
| 97 | sleep(1); | ||
| 98 | return UINT_MAX; | ||
| 99 | } | ||
| 100 | |||
| 101 | getpwnam_r(str, &pwd, buf, sizeof(buf), &result); | ||
| 102 | |||
| 103 | if (result == NULL) { | ||
| 104 | char *endptr; | ||
| 105 | int uid = strtol(str, &endptr, 10); | ||
| 106 | |||
| 107 | if (*endptr != '\0') { | ||
| 108 | ui__error("Invalid user %s\n", str); | ||
| 109 | return UINT_MAX - 1; | ||
| 110 | } | ||
| 111 | |||
| 112 | getpwuid_r(uid, &pwd, buf, sizeof(buf), &result); | ||
| 113 | |||
| 114 | if (result == NULL) { | ||
| 115 | ui__error("Problems obtaining information for user %s\n", | ||
| 116 | str); | ||
| 117 | return UINT_MAX - 1; | ||
| 118 | } | ||
| 119 | } | ||
| 120 | |||
| 121 | return result->pw_uid; | ||
| 122 | } | ||
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index fb25d1329218..8109a907841e 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c | |||
| @@ -14,6 +14,8 @@ void event_attr_init(struct perf_event_attr *attr) | |||
| 14 | attr->exclude_host = 1; | 14 | attr->exclude_host = 1; |
| 15 | if (!perf_guest) | 15 | if (!perf_guest) |
| 16 | attr->exclude_guest = 1; | 16 | attr->exclude_guest = 1; |
| 17 | /* to capture ABI version */ | ||
| 18 | attr->size = sizeof(*attr); | ||
| 17 | } | 19 | } |
| 18 | 20 | ||
| 19 | int mkdir_p(char *path, mode_t mode) | 21 | int mkdir_p(char *path, mode_t mode) |
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ecf9898169c8..0f99f394d8e0 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h | |||
| @@ -199,6 +199,8 @@ static inline int has_extension(const char *filename, const char *ext) | |||
| 199 | #undef isalpha | 199 | #undef isalpha |
| 200 | #undef isprint | 200 | #undef isprint |
| 201 | #undef isalnum | 201 | #undef isalnum |
| 202 | #undef islower | ||
| 203 | #undef isupper | ||
| 202 | #undef tolower | 204 | #undef tolower |
| 203 | #undef toupper | 205 | #undef toupper |
| 204 | 206 | ||
| @@ -219,6 +221,8 @@ extern unsigned char sane_ctype[256]; | |||
| 219 | #define isalpha(x) sane_istest(x,GIT_ALPHA) | 221 | #define isalpha(x) sane_istest(x,GIT_ALPHA) |
| 220 | #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) | 222 | #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) |
| 221 | #define isprint(x) sane_istest(x,GIT_PRINT) | 223 | #define isprint(x) sane_istest(x,GIT_PRINT) |
| 224 | #define islower(x) (sane_istest(x,GIT_ALPHA) && sane_istest(x,0x20)) | ||
| 225 | #define isupper(x) (sane_istest(x,GIT_ALPHA) && !sane_istest(x,0x20)) | ||
| 222 | #define tolower(x) sane_case((unsigned char)(x), 0x20) | 226 | #define tolower(x) sane_case((unsigned char)(x), 0x20) |
| 223 | #define toupper(x) sane_case((unsigned char)(x), 0) | 227 | #define toupper(x) sane_case((unsigned char)(x), 0) |
| 224 | 228 | ||
| @@ -245,6 +249,8 @@ struct perf_event_attr; | |||
| 245 | 249 | ||
| 246 | void event_attr_init(struct perf_event_attr *attr); | 250 | void event_attr_init(struct perf_event_attr *attr); |
| 247 | 251 | ||
| 252 | uid_t parse_target_uid(const char *str, const char *tid, const char *pid); | ||
| 253 | |||
| 248 | #define _STR(x) #x | 254 | #define _STR(x) #x |
| 249 | #define STR(x) _STR(x) | 255 | #define STR(x) _STR(x) |
| 250 | 256 | ||
