diff options
165 files changed, 6107 insertions, 1984 deletions
diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt new file mode 100644 index 000000000000..d2a36602ca8d --- /dev/null +++ b/Documentation/lockup-watchdogs.txt | |||
@@ -0,0 +1,63 @@ | |||
1 | =============================================================== | ||
2 | Softlockup detector and hardlockup detector (aka nmi_watchdog) | ||
3 | =============================================================== | ||
4 | |||
5 | The Linux kernel can act as a watchdog to detect both soft and hard | ||
6 | lockups. | ||
7 | |||
8 | A 'softlockup' is defined as a bug that causes the kernel to loop in | ||
9 | kernel mode for more than 20 seconds (see "Implementation" below for | ||
10 | details), without giving other tasks a chance to run. The current | ||
11 | stack trace is displayed upon detection and, by default, the system | ||
12 | will stay locked up. Alternatively, the kernel can be configured to | ||
13 | panic; a sysctl, "kernel.softlockup_panic", a kernel parameter, | ||
14 | "softlockup_panic" (see "Documentation/kernel-parameters.txt" for | ||
15 | details), and a compile option, "BOOTPARAM_HARDLOCKUP_PANIC", are | ||
16 | provided for this. | ||
17 | |||
18 | A 'hardlockup' is defined as a bug that causes the CPU to loop in | ||
19 | kernel mode for more than 10 seconds (see "Implementation" below for | ||
20 | details), without letting other interrupts have a chance to run. | ||
21 | Similarly to the softlockup case, the current stack trace is displayed | ||
22 | upon detection and the system will stay locked up unless the default | ||
23 | behavior is changed, which can be done through a compile time knob, | ||
24 | "BOOTPARAM_HARDLOCKUP_PANIC", and a kernel parameter, "nmi_watchdog" | ||
25 | (see "Documentation/kernel-parameters.txt" for details). | ||
26 | |||
27 | The panic option can be used in combination with panic_timeout (this | ||
28 | timeout is set through the confusingly named "kernel.panic" sysctl), | ||
29 | to cause the system to reboot automatically after a specified amount | ||
30 | of time. | ||
31 | |||
32 | === Implementation === | ||
33 | |||
34 | The soft and hard lockup detectors are built on top of the hrtimer and | ||
35 | perf subsystems, respectively. A direct consequence of this is that, | ||
36 | in principle, they should work in any architecture where these | ||
37 | subsystems are present. | ||
38 | |||
39 | A periodic hrtimer runs to generate interrupts and kick the watchdog | ||
40 | task. An NMI perf event is generated every "watchdog_thresh" | ||
41 | (compile-time initialized to 10 and configurable through sysctl of the | ||
42 | same name) seconds to check for hardlockups. If any CPU in the system | ||
43 | does not receive any hrtimer interrupt during that time the | ||
44 | 'hardlockup detector' (the handler for the NMI perf event) will | ||
45 | generate a kernel warning or call panic, depending on the | ||
46 | configuration. | ||
47 | |||
48 | The watchdog task is a high priority kernel thread that updates a | ||
49 | timestamp every time it is scheduled. If that timestamp is not updated | ||
50 | for 2*watchdog_thresh seconds (the softlockup threshold) the | ||
51 | 'softlockup detector' (coded inside the hrtimer callback function) | ||
52 | will dump useful debug information to the system log, after which it | ||
53 | will call panic if it was instructed to do so or resume execution of | ||
54 | other kernel code. | ||
55 | |||
56 | The period of the hrtimer is 2*watchdog_thresh/5, which means it has | ||
57 | two or three chances to generate an interrupt before the hardlockup | ||
58 | detector kicks in. | ||
59 | |||
60 | As explained above, a kernel knob is provided that allows | ||
61 | administrators to configure the period of the hrtimer and the perf | ||
62 | event. The right value for a particular environment is a trade-off | ||
63 | between fast response to lockups and detection overhead. | ||
diff --git a/Documentation/nmi_watchdog.txt b/Documentation/nmi_watchdog.txt deleted file mode 100644 index bf9f80a98282..000000000000 --- a/Documentation/nmi_watchdog.txt +++ /dev/null | |||
@@ -1,83 +0,0 @@ | |||
1 | |||
2 | [NMI watchdog is available for x86 and x86-64 architectures] | ||
3 | |||
4 | Is your system locking up unpredictably? No keyboard activity, just | ||
5 | a frustrating complete hard lockup? Do you want to help us debugging | ||
6 | such lockups? If all yes then this document is definitely for you. | ||
7 | |||
8 | On many x86/x86-64 type hardware there is a feature that enables | ||
9 | us to generate 'watchdog NMI interrupts'. (NMI: Non Maskable Interrupt | ||
10 | which get executed even if the system is otherwise locked up hard). | ||
11 | This can be used to debug hard kernel lockups. By executing periodic | ||
12 | NMI interrupts, the kernel can monitor whether any CPU has locked up, | ||
13 | and print out debugging messages if so. | ||
14 | |||
15 | In order to use the NMI watchdog, you need to have APIC support in your | ||
16 | kernel. For SMP kernels, APIC support gets compiled in automatically. For | ||
17 | UP, enable either CONFIG_X86_UP_APIC (Processor type and features -> Local | ||
18 | APIC support on uniprocessors) or CONFIG_X86_UP_IOAPIC (Processor type and | ||
19 | features -> IO-APIC support on uniprocessors) in your kernel config. | ||
20 | CONFIG_X86_UP_APIC is for uniprocessor machines without an IO-APIC. | ||
21 | CONFIG_X86_UP_IOAPIC is for uniprocessor with an IO-APIC. [Note: certain | ||
22 | kernel debugging options, such as Kernel Stack Meter or Kernel Tracer, | ||
23 | may implicitly disable the NMI watchdog.] | ||
24 | |||
25 | For x86-64, the needed APIC is always compiled in. | ||
26 | |||
27 | Using local APIC (nmi_watchdog=2) needs the first performance register, so | ||
28 | you can't use it for other purposes (such as high precision performance | ||
29 | profiling.) However, at least oprofile and the perfctr driver disable the | ||
30 | local APIC NMI watchdog automatically. | ||
31 | |||
32 | To actually enable the NMI watchdog, use the 'nmi_watchdog=N' boot | ||
33 | parameter. Eg. the relevant lilo.conf entry: | ||
34 | |||
35 | append="nmi_watchdog=1" | ||
36 | |||
37 | For SMP machines and UP machines with an IO-APIC use nmi_watchdog=1. | ||
38 | For UP machines without an IO-APIC use nmi_watchdog=2, this only works | ||
39 | for some processor types. If in doubt, boot with nmi_watchdog=1 and | ||
40 | check the NMI count in /proc/interrupts; if the count is zero then | ||
41 | reboot with nmi_watchdog=2 and check the NMI count. If it is still | ||
42 | zero then log a problem, you probably have a processor that needs to be | ||
43 | added to the nmi code. | ||
44 | |||
45 | A 'lockup' is the following scenario: if any CPU in the system does not | ||
46 | execute the period local timer interrupt for more than 5 seconds, then | ||
47 | the NMI handler generates an oops and kills the process. This | ||
48 | 'controlled crash' (and the resulting kernel messages) can be used to | ||
49 | debug the lockup. Thus whenever the lockup happens, wait 5 seconds and | ||
50 | the oops will show up automatically. If the kernel produces no messages | ||
51 | then the system has crashed so hard (eg. hardware-wise) that either it | ||
52 | cannot even accept NMI interrupts, or the crash has made the kernel | ||
53 | unable to print messages. | ||
54 | |||
55 | Be aware that when using local APIC, the frequency of NMI interrupts | ||
56 | it generates, depends on the system load. The local APIC NMI watchdog, | ||
57 | lacking a better source, uses the "cycles unhalted" event. As you may | ||
58 | guess it doesn't tick when the CPU is in the halted state (which happens | ||
59 | when the system is idle), but if your system locks up on anything but the | ||
60 | "hlt" processor instruction, the watchdog will trigger very soon as the | ||
61 | "cycles unhalted" event will happen every clock tick. If it locks up on | ||
62 | "hlt", then you are out of luck -- the event will not happen at all and the | ||
63 | watchdog won't trigger. This is a shortcoming of the local APIC watchdog | ||
64 | -- unfortunately there is no "clock ticks" event that would work all the | ||
65 | time. The I/O APIC watchdog is driven externally and has no such shortcoming. | ||
66 | But its NMI frequency is much higher, resulting in a more significant hit | ||
67 | to the overall system performance. | ||
68 | |||
69 | On x86 nmi_watchdog is disabled by default so you have to enable it with | ||
70 | a boot time parameter. | ||
71 | |||
72 | It's possible to disable the NMI watchdog in run-time by writing "0" to | ||
73 | /proc/sys/kernel/nmi_watchdog. Writing "1" to the same file will re-enable | ||
74 | the NMI watchdog. Notice that you still need to use "nmi_watchdog=" parameter | ||
75 | at boot time. | ||
76 | |||
77 | NOTE: In kernels prior to 2.4.2-ac18 the NMI-oopser is enabled unconditionally | ||
78 | on x86 SMP boxes. | ||
79 | |||
80 | [ feel free to send bug reports, suggestions and patches to | ||
81 | Ingo Molnar <mingo@redhat.com> or the Linux SMP mailing | ||
82 | list at <linux-smp@vger.kernel.org> ] | ||
83 | |||
diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt new file mode 100644 index 000000000000..d93f3c00f245 --- /dev/null +++ b/Documentation/static-keys.txt | |||
@@ -0,0 +1,286 @@ | |||
1 | Static Keys | ||
2 | ----------- | ||
3 | |||
4 | By: Jason Baron <jbaron@redhat.com> | ||
5 | |||
6 | 0) Abstract | ||
7 | |||
8 | Static keys allows the inclusion of seldom used features in | ||
9 | performance-sensitive fast-path kernel code, via a GCC feature and a code | ||
10 | patching technique. A quick example: | ||
11 | |||
12 | struct static_key key = STATIC_KEY_INIT_FALSE; | ||
13 | |||
14 | ... | ||
15 | |||
16 | if (static_key_false(&key)) | ||
17 | do unlikely code | ||
18 | else | ||
19 | do likely code | ||
20 | |||
21 | ... | ||
22 | static_key_slow_inc(); | ||
23 | ... | ||
24 | static_key_slow_inc(); | ||
25 | ... | ||
26 | |||
27 | The static_key_false() branch will be generated into the code with as little | ||
28 | impact to the likely code path as possible. | ||
29 | |||
30 | |||
31 | 1) Motivation | ||
32 | |||
33 | |||
34 | Currently, tracepoints are implemented using a conditional branch. The | ||
35 | conditional check requires checking a global variable for each tracepoint. | ||
36 | Although the overhead of this check is small, it increases when the memory | ||
37 | cache comes under pressure (memory cache lines for these global variables may | ||
38 | be shared with other memory accesses). As we increase the number of tracepoints | ||
39 | in the kernel this overhead may become more of an issue. In addition, | ||
40 | tracepoints are often dormant (disabled) and provide no direct kernel | ||
41 | functionality. Thus, it is highly desirable to reduce their impact as much as | ||
42 | possible. Although tracepoints are the original motivation for this work, other | ||
43 | kernel code paths should be able to make use of the static keys facility. | ||
44 | |||
45 | |||
46 | 2) Solution | ||
47 | |||
48 | |||
49 | gcc (v4.5) adds a new 'asm goto' statement that allows branching to a label: | ||
50 | |||
51 | http://gcc.gnu.org/ml/gcc-patches/2009-07/msg01556.html | ||
52 | |||
53 | Using the 'asm goto', we can create branches that are either taken or not taken | ||
54 | by default, without the need to check memory. Then, at run-time, we can patch | ||
55 | the branch site to change the branch direction. | ||
56 | |||
57 | For example, if we have a simple branch that is disabled by default: | ||
58 | |||
59 | if (static_key_false(&key)) | ||
60 | printk("I am the true branch\n"); | ||
61 | |||
62 | Thus, by default the 'printk' will not be emitted. And the code generated will | ||
63 | consist of a single atomic 'no-op' instruction (5 bytes on x86), in the | ||
64 | straight-line code path. When the branch is 'flipped', we will patch the | ||
65 | 'no-op' in the straight-line codepath with a 'jump' instruction to the | ||
66 | out-of-line true branch. Thus, changing branch direction is expensive but | ||
67 | branch selection is basically 'free'. That is the basic tradeoff of this | ||
68 | optimization. | ||
69 | |||
70 | This lowlevel patching mechanism is called 'jump label patching', and it gives | ||
71 | the basis for the static keys facility. | ||
72 | |||
73 | 3) Static key label API, usage and examples: | ||
74 | |||
75 | |||
76 | In order to make use of this optimization you must first define a key: | ||
77 | |||
78 | struct static_key key; | ||
79 | |||
80 | Which is initialized as: | ||
81 | |||
82 | struct static_key key = STATIC_KEY_INIT_TRUE; | ||
83 | |||
84 | or: | ||
85 | |||
86 | struct static_key key = STATIC_KEY_INIT_FALSE; | ||
87 | |||
88 | If the key is not initialized, it is default false. The 'struct static_key', | ||
89 | must be a 'global'. That is, it can't be allocated on the stack or dynamically | ||
90 | allocated at run-time. | ||
91 | |||
92 | The key is then used in code as: | ||
93 | |||
94 | if (static_key_false(&key)) | ||
95 | do unlikely code | ||
96 | else | ||
97 | do likely code | ||
98 | |||
99 | Or: | ||
100 | |||
101 | if (static_key_true(&key)) | ||
102 | do likely code | ||
103 | else | ||
104 | do unlikely code | ||
105 | |||
106 | A key that is initialized via 'STATIC_KEY_INIT_FALSE', must be used in a | ||
107 | 'static_key_false()' construct. Likewise, a key initialized via | ||
108 | 'STATIC_KEY_INIT_TRUE' must be used in a 'static_key_true()' construct. A | ||
109 | single key can be used in many branches, but all the branches must match the | ||
110 | way that the key has been initialized. | ||
111 | |||
112 | The branch(es) can then be switched via: | ||
113 | |||
114 | static_key_slow_inc(&key); | ||
115 | ... | ||
116 | static_key_slow_dec(&key); | ||
117 | |||
118 | Thus, 'static_key_slow_inc()' means 'make the branch true', and | ||
119 | 'static_key_slow_dec()' means 'make the the branch false' with appropriate | ||
120 | reference counting. For example, if the key is initialized true, a | ||
121 | static_key_slow_dec(), will switch the branch to false. And a subsequent | ||
122 | static_key_slow_inc(), will change the branch back to true. Likewise, if the | ||
123 | key is initialized false, a 'static_key_slow_inc()', will change the branch to | ||
124 | true. And then a 'static_key_slow_dec()', will again make the branch false. | ||
125 | |||
126 | An example usage in the kernel is the implementation of tracepoints: | ||
127 | |||
128 | static inline void trace_##name(proto) \ | ||
129 | { \ | ||
130 | if (static_key_false(&__tracepoint_##name.key)) \ | ||
131 | __DO_TRACE(&__tracepoint_##name, \ | ||
132 | TP_PROTO(data_proto), \ | ||
133 | TP_ARGS(data_args), \ | ||
134 | TP_CONDITION(cond)); \ | ||
135 | } | ||
136 | |||
137 | Tracepoints are disabled by default, and can be placed in performance critical | ||
138 | pieces of the kernel. Thus, by using a static key, the tracepoints can have | ||
139 | absolutely minimal impact when not in use. | ||
140 | |||
141 | |||
142 | 4) Architecture level code patching interface, 'jump labels' | ||
143 | |||
144 | |||
145 | There are a few functions and macros that architectures must implement in order | ||
146 | to take advantage of this optimization. If there is no architecture support, we | ||
147 | simply fall back to a traditional, load, test, and jump sequence. | ||
148 | |||
149 | * select HAVE_ARCH_JUMP_LABEL, see: arch/x86/Kconfig | ||
150 | |||
151 | * #define JUMP_LABEL_NOP_SIZE, see: arch/x86/include/asm/jump_label.h | ||
152 | |||
153 | * __always_inline bool arch_static_branch(struct static_key *key), see: | ||
154 | arch/x86/include/asm/jump_label.h | ||
155 | |||
156 | * void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type), | ||
157 | see: arch/x86/kernel/jump_label.c | ||
158 | |||
159 | * __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type), | ||
160 | see: arch/x86/kernel/jump_label.c | ||
161 | |||
162 | |||
163 | * struct jump_entry, see: arch/x86/include/asm/jump_label.h | ||
164 | |||
165 | |||
166 | 5) Static keys / jump label analysis, results (x86_64): | ||
167 | |||
168 | |||
169 | As an example, let's add the following branch to 'getppid()', such that the | ||
170 | system call now looks like: | ||
171 | |||
172 | SYSCALL_DEFINE0(getppid) | ||
173 | { | ||
174 | int pid; | ||
175 | |||
176 | + if (static_key_false(&key)) | ||
177 | + printk("I am the true branch\n"); | ||
178 | |||
179 | rcu_read_lock(); | ||
180 | pid = task_tgid_vnr(rcu_dereference(current->real_parent)); | ||
181 | rcu_read_unlock(); | ||
182 | |||
183 | return pid; | ||
184 | } | ||
185 | |||
186 | The resulting instructions with jump labels generated by GCC is: | ||
187 | |||
188 | ffffffff81044290 <sys_getppid>: | ||
189 | ffffffff81044290: 55 push %rbp | ||
190 | ffffffff81044291: 48 89 e5 mov %rsp,%rbp | ||
191 | ffffffff81044294: e9 00 00 00 00 jmpq ffffffff81044299 <sys_getppid+0x9> | ||
192 | ffffffff81044299: 65 48 8b 04 25 c0 b6 mov %gs:0xb6c0,%rax | ||
193 | ffffffff810442a0: 00 00 | ||
194 | ffffffff810442a2: 48 8b 80 80 02 00 00 mov 0x280(%rax),%rax | ||
195 | ffffffff810442a9: 48 8b 80 b0 02 00 00 mov 0x2b0(%rax),%rax | ||
196 | ffffffff810442b0: 48 8b b8 e8 02 00 00 mov 0x2e8(%rax),%rdi | ||
197 | ffffffff810442b7: e8 f4 d9 00 00 callq ffffffff81051cb0 <pid_vnr> | ||
198 | ffffffff810442bc: 5d pop %rbp | ||
199 | ffffffff810442bd: 48 98 cltq | ||
200 | ffffffff810442bf: c3 retq | ||
201 | ffffffff810442c0: 48 c7 c7 e3 54 98 81 mov $0xffffffff819854e3,%rdi | ||
202 | ffffffff810442c7: 31 c0 xor %eax,%eax | ||
203 | ffffffff810442c9: e8 71 13 6d 00 callq ffffffff8171563f <printk> | ||
204 | ffffffff810442ce: eb c9 jmp ffffffff81044299 <sys_getppid+0x9> | ||
205 | |||
206 | Without the jump label optimization it looks like: | ||
207 | |||
208 | ffffffff810441f0 <sys_getppid>: | ||
209 | ffffffff810441f0: 8b 05 8a 52 d8 00 mov 0xd8528a(%rip),%eax # ffffffff81dc9480 <key> | ||
210 | ffffffff810441f6: 55 push %rbp | ||
211 | ffffffff810441f7: 48 89 e5 mov %rsp,%rbp | ||
212 | ffffffff810441fa: 85 c0 test %eax,%eax | ||
213 | ffffffff810441fc: 75 27 jne ffffffff81044225 <sys_getppid+0x35> | ||
214 | ffffffff810441fe: 65 48 8b 04 25 c0 b6 mov %gs:0xb6c0,%rax | ||
215 | ffffffff81044205: 00 00 | ||
216 | ffffffff81044207: 48 8b 80 80 02 00 00 mov 0x280(%rax),%rax | ||
217 | ffffffff8104420e: 48 8b 80 b0 02 00 00 mov 0x2b0(%rax),%rax | ||
218 | ffffffff81044215: 48 8b b8 e8 02 00 00 mov 0x2e8(%rax),%rdi | ||
219 | ffffffff8104421c: e8 2f da 00 00 callq ffffffff81051c50 <pid_vnr> | ||
220 | ffffffff81044221: 5d pop %rbp | ||
221 | ffffffff81044222: 48 98 cltq | ||
222 | ffffffff81044224: c3 retq | ||
223 | ffffffff81044225: 48 c7 c7 13 53 98 81 mov $0xffffffff81985313,%rdi | ||
224 | ffffffff8104422c: 31 c0 xor %eax,%eax | ||
225 | ffffffff8104422e: e8 60 0f 6d 00 callq ffffffff81715193 <printk> | ||
226 | ffffffff81044233: eb c9 jmp ffffffff810441fe <sys_getppid+0xe> | ||
227 | ffffffff81044235: 66 66 2e 0f 1f 84 00 data32 nopw %cs:0x0(%rax,%rax,1) | ||
228 | ffffffff8104423c: 00 00 00 00 | ||
229 | |||
230 | Thus, the disable jump label case adds a 'mov', 'test' and 'jne' instruction | ||
231 | vs. the jump label case just has a 'no-op' or 'jmp 0'. (The jmp 0, is patched | ||
232 | to a 5 byte atomic no-op instruction at boot-time.) Thus, the disabled jump | ||
233 | label case adds: | ||
234 | |||
235 | 6 (mov) + 2 (test) + 2 (jne) = 10 - 5 (5 byte jump 0) = 5 addition bytes. | ||
236 | |||
237 | If we then include the padding bytes, the jump label code saves, 16 total bytes | ||
238 | of instruction memory for this small fucntion. In this case the non-jump label | ||
239 | function is 80 bytes long. Thus, we have have saved 20% of the instruction | ||
240 | footprint. We can in fact improve this even further, since the 5-byte no-op | ||
241 | really can be a 2-byte no-op since we can reach the branch with a 2-byte jmp. | ||
242 | However, we have not yet implemented optimal no-op sizes (they are currently | ||
243 | hard-coded). | ||
244 | |||
245 | Since there are a number of static key API uses in the scheduler paths, | ||
246 | 'pipe-test' (also known as 'perf bench sched pipe') can be used to show the | ||
247 | performance improvement. Testing done on 3.3.0-rc2: | ||
248 | |||
249 | jump label disabled: | ||
250 | |||
251 | Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs): | ||
252 | |||
253 | 855.700314 task-clock # 0.534 CPUs utilized ( +- 0.11% ) | ||
254 | 200,003 context-switches # 0.234 M/sec ( +- 0.00% ) | ||
255 | 0 CPU-migrations # 0.000 M/sec ( +- 39.58% ) | ||
256 | 487 page-faults # 0.001 M/sec ( +- 0.02% ) | ||
257 | 1,474,374,262 cycles # 1.723 GHz ( +- 0.17% ) | ||
258 | <not supported> stalled-cycles-frontend | ||
259 | <not supported> stalled-cycles-backend | ||
260 | 1,178,049,567 instructions # 0.80 insns per cycle ( +- 0.06% ) | ||
261 | 208,368,926 branches # 243.507 M/sec ( +- 0.06% ) | ||
262 | 5,569,188 branch-misses # 2.67% of all branches ( +- 0.54% ) | ||
263 | |||
264 | 1.601607384 seconds time elapsed ( +- 0.07% ) | ||
265 | |||
266 | jump label enabled: | ||
267 | |||
268 | Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs): | ||
269 | |||
270 | 841.043185 task-clock # 0.533 CPUs utilized ( +- 0.12% ) | ||
271 | 200,004 context-switches # 0.238 M/sec ( +- 0.00% ) | ||
272 | 0 CPU-migrations # 0.000 M/sec ( +- 40.87% ) | ||
273 | 487 page-faults # 0.001 M/sec ( +- 0.05% ) | ||
274 | 1,432,559,428 cycles # 1.703 GHz ( +- 0.18% ) | ||
275 | <not supported> stalled-cycles-frontend | ||
276 | <not supported> stalled-cycles-backend | ||
277 | 1,175,363,994 instructions # 0.82 insns per cycle ( +- 0.04% ) | ||
278 | 206,859,359 branches # 245.956 M/sec ( +- 0.04% ) | ||
279 | 4,884,119 branch-misses # 2.36% of all branches ( +- 0.85% ) | ||
280 | |||
281 | 1.579384366 seconds time elapsed | ||
282 | |||
283 | The percentage of saved branches is .7%, and we've saved 12% on | ||
284 | 'branch-misses'. This is where we would expect to get the most savings, since | ||
285 | this optimization is about reducing the number of branches. In addition, we've | ||
286 | saved .2% on instructions, and 2.8% on cycles and 1.4% on elapsed time. | ||
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 1ebc24cf9a55..6f51fed45f2d 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt | |||
@@ -226,6 +226,13 @@ Here is the list of current tracers that may be configured. | |||
226 | Traces and records the max latency that it takes for | 226 | Traces and records the max latency that it takes for |
227 | the highest priority task to get scheduled after | 227 | the highest priority task to get scheduled after |
228 | it has been woken up. | 228 | it has been woken up. |
229 | Traces all tasks as an average developer would expect. | ||
230 | |||
231 | "wakeup_rt" | ||
232 | |||
233 | Traces and records the max latency that it takes for just | ||
234 | RT tasks (as the current "wakeup" does). This is useful | ||
235 | for those interested in wake up timings of RT tasks. | ||
229 | 236 | ||
230 | "hw-branch-tracer" | 237 | "hw-branch-tracer" |
231 | 238 | ||
diff --git a/arch/Kconfig b/arch/Kconfig index 4f55c736be11..5b448a74d0f7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -47,18 +47,29 @@ config KPROBES | |||
47 | If in doubt, say "N". | 47 | If in doubt, say "N". |
48 | 48 | ||
49 | config JUMP_LABEL | 49 | config JUMP_LABEL |
50 | bool "Optimize trace point call sites" | 50 | bool "Optimize very unlikely/likely branches" |
51 | depends on HAVE_ARCH_JUMP_LABEL | 51 | depends on HAVE_ARCH_JUMP_LABEL |
52 | help | 52 | help |
53 | This option enables a transparent branch optimization that | ||
54 | makes certain almost-always-true or almost-always-false branch | ||
55 | conditions even cheaper to execute within the kernel. | ||
56 | |||
57 | Certain performance-sensitive kernel code, such as trace points, | ||
58 | scheduler functionality, networking code and KVM have such | ||
59 | branches and include support for this optimization technique. | ||
60 | |||
53 | If it is detected that the compiler has support for "asm goto", | 61 | If it is detected that the compiler has support for "asm goto", |
54 | the kernel will compile trace point locations with just a | 62 | the kernel will compile such branches with just a nop |
55 | nop instruction. When trace points are enabled, the nop will | 63 | instruction. When the condition flag is toggled to true, the |
56 | be converted to a jump to the trace function. This technique | 64 | nop will be converted to a jump instruction to execute the |
57 | lowers overhead and stress on the branch prediction of the | 65 | conditional block of instructions. |
58 | processor. | 66 | |
59 | 67 | This technique lowers overhead and stress on the branch prediction | |
60 | On i386, options added to the compiler flags may increase | 68 | of the processor and generally makes the kernel faster. The update |
61 | the size of the kernel slightly. | 69 | of the condition is slower, but those are always very rare. |
70 | |||
71 | ( On 32-bit x86, the necessary options added to the compiler | ||
72 | flags may increase the size of the kernel slightly. ) | ||
62 | 73 | ||
63 | config OPTPROBES | 74 | config OPTPROBES |
64 | def_bool y | 75 | def_bool y |
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 8143cd7cdbfb..0dae252f7a33 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c | |||
@@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event) | |||
685 | { | 685 | { |
686 | int err; | 686 | int err; |
687 | 687 | ||
688 | /* does not support taken branch sampling */ | ||
689 | if (has_branch_stack(event)) | ||
690 | return -EOPNOTSUPP; | ||
691 | |||
688 | switch (event->attr.type) { | 692 | switch (event->attr.type) { |
689 | case PERF_TYPE_RAW: | 693 | case PERF_TYPE_RAW: |
690 | case PERF_TYPE_HARDWARE: | 694 | case PERF_TYPE_HARDWARE: |
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 99cfe3607989..7523340afb8a 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h | |||
@@ -12,10 +12,6 @@ | |||
12 | #ifndef __ARM_PERF_EVENT_H__ | 12 | #ifndef __ARM_PERF_EVENT_H__ |
13 | #define __ARM_PERF_EVENT_H__ | 13 | #define __ARM_PERF_EVENT_H__ |
14 | 14 | ||
15 | /* ARM performance counters start from 1 (in the cp15 accesses) so use the | ||
16 | * same indexes here for consistency. */ | ||
17 | #define PERF_EVENT_INDEX_OFFSET 1 | ||
18 | |||
19 | /* ARM perf PMU IDs for use by internal perf clients. */ | 15 | /* ARM perf PMU IDs for use by internal perf clients. */ |
20 | enum arm_perf_pmu_ids { | 16 | enum arm_perf_pmu_ids { |
21 | ARM_PERF_PMU_ID_XSCALE1 = 0, | 17 | ARM_PERF_PMU_ID_XSCALE1 = 0, |
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index b2abfa18f137..8a89d3b7626b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
@@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event) | |||
539 | int err = 0; | 539 | int err = 0; |
540 | atomic_t *active_events = &armpmu->active_events; | 540 | atomic_t *active_events = &armpmu->active_events; |
541 | 541 | ||
542 | /* does not support taken branch sampling */ | ||
543 | if (has_branch_stack(event)) | ||
544 | return -EOPNOTSUPP; | ||
545 | |||
542 | if (armpmu->map_event(event) == -ENOENT) | 546 | if (armpmu->map_event(event) == -ENOENT) |
543 | return -ENOENT; | 547 | return -ENOENT; |
544 | 548 | ||
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h index a69e0155d146..c52ea5546b5b 100644 --- a/arch/frv/include/asm/perf_event.h +++ b/arch/frv/include/asm/perf_event.h | |||
@@ -12,6 +12,4 @@ | |||
12 | #ifndef _ASM_PERF_EVENT_H | 12 | #ifndef _ASM_PERF_EVENT_H |
13 | #define _ASM_PERF_EVENT_H | 13 | #define _ASM_PERF_EVENT_H |
14 | 14 | ||
15 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
16 | |||
17 | #endif /* _ASM_PERF_EVENT_H */ | 15 | #endif /* _ASM_PERF_EVENT_H */ |
diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h index 6c2910f91180..8b8526b491c7 100644 --- a/arch/hexagon/include/asm/perf_event.h +++ b/arch/hexagon/include/asm/perf_event.h | |||
@@ -19,6 +19,4 @@ | |||
19 | #ifndef _ASM_PERF_EVENT_H | 19 | #ifndef _ASM_PERF_EVENT_H |
20 | #define _ASM_PERF_EVENT_H | 20 | #define _ASM_PERF_EVENT_H |
21 | 21 | ||
22 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
23 | |||
24 | #endif /* _ASM_PERF_EVENT_H */ | 22 | #endif /* _ASM_PERF_EVENT_H */ |
diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h index 32551d304cd7..b149b88ea795 100644 --- a/arch/ia64/include/asm/paravirt.h +++ b/arch/ia64/include/asm/paravirt.h | |||
@@ -281,9 +281,9 @@ paravirt_init_missing_ticks_accounting(int cpu) | |||
281 | pv_time_ops.init_missing_ticks_accounting(cpu); | 281 | pv_time_ops.init_missing_ticks_accounting(cpu); |
282 | } | 282 | } |
283 | 283 | ||
284 | struct jump_label_key; | 284 | struct static_key; |
285 | extern struct jump_label_key paravirt_steal_enabled; | 285 | extern struct static_key paravirt_steal_enabled; |
286 | extern struct jump_label_key paravirt_steal_rq_enabled; | 286 | extern struct static_key paravirt_steal_rq_enabled; |
287 | 287 | ||
288 | static inline int | 288 | static inline int |
289 | paravirt_do_steal_accounting(unsigned long *new_itm) | 289 | paravirt_do_steal_accounting(unsigned long *new_itm) |
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 100868216c55..1b22f6de2932 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c | |||
@@ -634,8 +634,8 @@ struct pv_irq_ops pv_irq_ops = { | |||
634 | * pv_time_ops | 634 | * pv_time_ops |
635 | * time operations | 635 | * time operations |
636 | */ | 636 | */ |
637 | struct jump_label_key paravirt_steal_enabled; | 637 | struct static_key paravirt_steal_enabled; |
638 | struct jump_label_key paravirt_steal_rq_enabled; | 638 | struct static_key paravirt_steal_rq_enabled; |
639 | 639 | ||
640 | static int | 640 | static int |
641 | ia64_native_do_steal_accounting(unsigned long *new_itm) | 641 | ia64_native_do_steal_accounting(unsigned long *new_itm) |
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 1881b316ca45..4d6d77ed9b9d 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h | |||
@@ -20,7 +20,7 @@ | |||
20 | #define WORD_INSN ".word" | 20 | #define WORD_INSN ".word" |
21 | #endif | 21 | #endif |
22 | 22 | ||
23 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 23 | static __always_inline bool arch_static_branch(struct static_key *key) |
24 | { | 24 | { |
25 | asm goto("1:\tnop\n\t" | 25 | asm goto("1:\tnop\n\t" |
26 | "nop\n\t" | 26 | "nop\n\t" |
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index e3b897acfbc0..811084f4e422 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c | |||
@@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event) | |||
606 | { | 606 | { |
607 | int err = 0; | 607 | int err = 0; |
608 | 608 | ||
609 | /* does not support taken branch sampling */ | ||
610 | if (has_branch_stack(event)) | ||
611 | return -EOPNOTSUPP; | ||
612 | |||
609 | switch (event->attr.type) { | 613 | switch (event->attr.type) { |
610 | case PERF_TYPE_RAW: | 614 | case PERF_TYPE_RAW: |
611 | case PERF_TYPE_HARDWARE: | 615 | case PERF_TYPE_HARDWARE: |
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 938986e412f1..ae098c438f00 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h | |||
@@ -17,7 +17,7 @@ | |||
17 | #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) | 17 | #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) |
18 | #define JUMP_LABEL_NOP_SIZE 4 | 18 | #define JUMP_LABEL_NOP_SIZE 4 |
19 | 19 | ||
20 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 20 | static __always_inline bool arch_static_branch(struct static_key *key) |
21 | { | 21 | { |
22 | asm goto("1:\n\t" | 22 | asm goto("1:\n\t" |
23 | "nop\n\t" | 23 | "nop\n\t" |
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 8f1df1208d23..1a8093fa8f71 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h | |||
@@ -61,8 +61,6 @@ struct pt_regs; | |||
61 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | 61 | extern unsigned long perf_misc_flags(struct pt_regs *regs); |
62 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | 62 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); |
63 | 63 | ||
64 | #define PERF_EVENT_INDEX_OFFSET 1 | ||
65 | |||
66 | /* | 64 | /* |
67 | * Only override the default definitions in include/linux/perf_event.h | 65 | * Only override the default definitions in include/linux/perf_event.h |
68 | * if we have hardware PMU support. | 66 | * if we have hardware PMU support. |
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 64483fde95c6..c2e27ede07ec 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c | |||
@@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event) | |||
1084 | if (!ppmu) | 1084 | if (!ppmu) |
1085 | return -ENOENT; | 1085 | return -ENOENT; |
1086 | 1086 | ||
1087 | /* does not support taken branch sampling */ | ||
1088 | if (has_branch_stack(event)) | ||
1089 | return -EOPNOTSUPP; | ||
1090 | |||
1087 | switch (event->attr.type) { | 1091 | switch (event->attr.type) { |
1088 | case PERF_TYPE_HARDWARE: | 1092 | case PERF_TYPE_HARDWARE: |
1089 | ev = event->attr.config; | 1093 | ev = event->attr.config; |
@@ -1193,6 +1197,11 @@ static int power_pmu_event_init(struct perf_event *event) | |||
1193 | return err; | 1197 | return err; |
1194 | } | 1198 | } |
1195 | 1199 | ||
1200 | static int power_pmu_event_idx(struct perf_event *event) | ||
1201 | { | ||
1202 | return event->hw.idx; | ||
1203 | } | ||
1204 | |||
1196 | struct pmu power_pmu = { | 1205 | struct pmu power_pmu = { |
1197 | .pmu_enable = power_pmu_enable, | 1206 | .pmu_enable = power_pmu_enable, |
1198 | .pmu_disable = power_pmu_disable, | 1207 | .pmu_disable = power_pmu_disable, |
@@ -1205,6 +1214,7 @@ struct pmu power_pmu = { | |||
1205 | .start_txn = power_pmu_start_txn, | 1214 | .start_txn = power_pmu_start_txn, |
1206 | .cancel_txn = power_pmu_cancel_txn, | 1215 | .cancel_txn = power_pmu_cancel_txn, |
1207 | .commit_txn = power_pmu_commit_txn, | 1216 | .commit_txn = power_pmu_commit_txn, |
1217 | .event_idx = power_pmu_event_idx, | ||
1208 | }; | 1218 | }; |
1209 | 1219 | ||
1210 | /* | 1220 | /* |
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 95a6cf2b5b67..6c32190dc73e 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h | |||
@@ -13,7 +13,7 @@ | |||
13 | #define ASM_ALIGN ".balign 4" | 13 | #define ASM_ALIGN ".balign 4" |
14 | #endif | 14 | #endif |
15 | 15 | ||
16 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 16 | static __always_inline bool arch_static_branch(struct static_key *key) |
17 | { | 17 | { |
18 | asm goto("0: brcl 0,0\n" | 18 | asm goto("0: brcl 0,0\n" |
19 | ".pushsection __jump_table, \"aw\"\n" | 19 | ".pushsection __jump_table, \"aw\"\n" |
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index a75f168d2718..4eb444edbe49 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h | |||
@@ -6,4 +6,3 @@ | |||
6 | 6 | ||
7 | /* Empty, just to avoid compiling error */ | 7 | /* Empty, just to avoid compiling error */ |
8 | 8 | ||
9 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 10b14e3a7eb8..068b8a2759b5 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c | |||
@@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event) | |||
310 | { | 310 | { |
311 | int err; | 311 | int err; |
312 | 312 | ||
313 | /* does not support taken branch sampling */ | ||
314 | if (has_branch_stack(event)) | ||
315 | return -EOPNOTSUPP; | ||
316 | |||
313 | switch (event->attr.type) { | 317 | switch (event->attr.type) { |
314 | case PERF_TYPE_RAW: | 318 | case PERF_TYPE_RAW: |
315 | case PERF_TYPE_HW_CACHE: | 319 | case PERF_TYPE_HW_CACHE: |
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index fc73a82366f8..5080d16a832f 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h | |||
@@ -7,7 +7,7 @@ | |||
7 | 7 | ||
8 | #define JUMP_LABEL_NOP_SIZE 4 | 8 | #define JUMP_LABEL_NOP_SIZE 4 |
9 | 9 | ||
10 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 10 | static __always_inline bool arch_static_branch(struct static_key *key) |
11 | { | 11 | { |
12 | asm goto("1:\n\t" | 12 | asm goto("1:\n\t" |
13 | "nop\n\t" | 13 | "nop\n\t" |
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 614da624330c..8e16a4a21582 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
@@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event) | |||
1105 | if (atomic_read(&nmi_active) < 0) | 1105 | if (atomic_read(&nmi_active) < 0) |
1106 | return -ENODEV; | 1106 | return -ENODEV; |
1107 | 1107 | ||
1108 | /* does not support taken branch sampling */ | ||
1109 | if (has_branch_stack(event)) | ||
1110 | return -EOPNOTSUPP; | ||
1111 | |||
1108 | switch (attr->type) { | 1112 | switch (attr->type) { |
1109 | case PERF_TYPE_HARDWARE: | 1113 | case PERF_TYPE_HARDWARE: |
1110 | if (attr->config >= sparc_pmu->max_events) | 1114 | if (attr->config >= sparc_pmu->max_events) |
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index 205b063e3e32..74a2e312e8a2 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h | |||
@@ -97,11 +97,12 @@ | |||
97 | 97 | ||
98 | /* Attribute search APIs */ | 98 | /* Attribute search APIs */ |
99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); | 99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); |
100 | extern int inat_get_last_prefix_id(insn_byte_t last_pfx); | ||
100 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, | 101 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, |
101 | insn_byte_t last_pfx, | 102 | int lpfx_id, |
102 | insn_attr_t esc_attr); | 103 | insn_attr_t esc_attr); |
103 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, | 104 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, |
104 | insn_byte_t last_pfx, | 105 | int lpfx_id, |
105 | insn_attr_t esc_attr); | 106 | insn_attr_t esc_attr); |
106 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, | 107 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, |
107 | insn_byte_t vex_m, | 108 | insn_byte_t vex_m, |
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 74df3f1eddfd..48eb30a86062 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
@@ -96,12 +96,6 @@ struct insn { | |||
96 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ | 96 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ |
97 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ | 97 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ |
98 | 98 | ||
99 | /* The last prefix is needed for two-byte and three-byte opcodes */ | ||
100 | static inline insn_byte_t insn_last_prefix(struct insn *insn) | ||
101 | { | ||
102 | return insn->prefixes.bytes[3]; | ||
103 | } | ||
104 | |||
105 | extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); | 99 | extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); |
106 | extern void insn_get_prefixes(struct insn *insn); | 100 | extern void insn_get_prefixes(struct insn *insn); |
107 | extern void insn_get_opcode(struct insn *insn); | 101 | extern void insn_get_opcode(struct insn *insn); |
@@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn) | |||
160 | return X86_VEX_P(insn->vex_prefix.bytes[2]); | 154 | return X86_VEX_P(insn->vex_prefix.bytes[2]); |
161 | } | 155 | } |
162 | 156 | ||
157 | /* Get the last prefix id from last prefix or VEX prefix */ | ||
158 | static inline int insn_last_prefix_id(struct insn *insn) | ||
159 | { | ||
160 | if (insn_is_avx(insn)) | ||
161 | return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ | ||
162 | |||
163 | if (insn->prefixes.bytes[3]) | ||
164 | return inat_get_last_prefix_id(insn->prefixes.bytes[3]); | ||
165 | |||
166 | return 0; | ||
167 | } | ||
168 | |||
163 | /* Offset of each field from kaddr */ | 169 | /* Offset of each field from kaddr */ |
164 | static inline int insn_offset_rex_prefix(struct insn *insn) | 170 | static inline int insn_offset_rex_prefix(struct insn *insn) |
165 | { | 171 | { |
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a32b18ce6ead..3a16c1483b45 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h | |||
@@ -9,12 +9,12 @@ | |||
9 | 9 | ||
10 | #define JUMP_LABEL_NOP_SIZE 5 | 10 | #define JUMP_LABEL_NOP_SIZE 5 |
11 | 11 | ||
12 | #define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" | 12 | #define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" |
13 | 13 | ||
14 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 14 | static __always_inline bool arch_static_branch(struct static_key *key) |
15 | { | 15 | { |
16 | asm goto("1:" | 16 | asm goto("1:" |
17 | JUMP_LABEL_INITIAL_NOP | 17 | STATIC_KEY_INITIAL_NOP |
18 | ".pushsection __jump_table, \"aw\" \n\t" | 18 | ".pushsection __jump_table, \"aw\" \n\t" |
19 | _ASM_ALIGN "\n\t" | 19 | _ASM_ALIGN "\n\t" |
20 | _ASM_PTR "1b, %l[l_yes], %c0 \n\t" | 20 | _ASM_PTR "1b, %l[l_yes], %c0 \n\t" |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a6962d9161a0..ccb805966f68 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -56,6 +56,13 @@ | |||
56 | #define MSR_OFFCORE_RSP_0 0x000001a6 | 56 | #define MSR_OFFCORE_RSP_0 0x000001a6 |
57 | #define MSR_OFFCORE_RSP_1 0x000001a7 | 57 | #define MSR_OFFCORE_RSP_1 0x000001a7 |
58 | 58 | ||
59 | #define MSR_LBR_SELECT 0x000001c8 | ||
60 | #define MSR_LBR_TOS 0x000001c9 | ||
61 | #define MSR_LBR_NHM_FROM 0x00000680 | ||
62 | #define MSR_LBR_NHM_TO 0x000006c0 | ||
63 | #define MSR_LBR_CORE_FROM 0x00000040 | ||
64 | #define MSR_LBR_CORE_TO 0x00000060 | ||
65 | |||
59 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 | 66 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 |
60 | #define MSR_IA32_DS_AREA 0x00000600 | 67 | #define MSR_IA32_DS_AREA 0x00000600 |
61 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 | 68 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index a7d2db9a74fb..c0180fd372d2 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -230,9 +230,9 @@ static inline unsigned long long paravirt_sched_clock(void) | |||
230 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); | 230 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); |
231 | } | 231 | } |
232 | 232 | ||
233 | struct jump_label_key; | 233 | struct static_key; |
234 | extern struct jump_label_key paravirt_steal_enabled; | 234 | extern struct static_key paravirt_steal_enabled; |
235 | extern struct jump_label_key paravirt_steal_rq_enabled; | 235 | extern struct static_key paravirt_steal_rq_enabled; |
236 | 236 | ||
237 | static inline u64 paravirt_steal_clock(int cpu) | 237 | static inline u64 paravirt_steal_clock(int cpu) |
238 | { | 238 | { |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 461ce432b1c2..e8fb2c7a5f4f 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void); | |||
188 | #ifdef CONFIG_PERF_EVENTS | 188 | #ifdef CONFIG_PERF_EVENTS |
189 | extern void perf_events_lapic_init(void); | 189 | extern void perf_events_lapic_init(void); |
190 | 190 | ||
191 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
192 | |||
193 | /* | 191 | /* |
194 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. | 192 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. |
195 | * This flag is otherwise unused and ABI specified to be 0, so nobody should | 193 | * This flag is otherwise unused and ABI specified to be 0, so nobody should |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5369059c07a9..532d2e090e6f 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | |||
69 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 69 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
70 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 70 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
71 | obj-$(CONFIG_KPROBES) += kprobes.o | 71 | obj-$(CONFIG_KPROBES) += kprobes.o |
72 | obj-$(CONFIG_OPTPROBES) += kprobes-opt.o | ||
72 | obj-$(CONFIG_MODULES) += module.o | 73 | obj-$(CONFIG_MODULES) += module.o |
73 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o | 74 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o |
74 | obj-$(CONFIG_KGDB) += kgdb.o | 75 | obj-$(CONFIG_KGDB) += kgdb.o |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f4773f4aae35..0a44b90602b0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
6 | 6 | ||
7 | #include <linux/io.h> | 7 | #include <linux/io.h> |
8 | #include <linux/sched.h> | ||
8 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
9 | #include <asm/apic.h> | 10 | #include <asm/apic.h> |
10 | #include <asm/cpu.h> | 11 | #include <asm/cpu.h> |
@@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
456 | if (c->x86_power & (1 << 8)) { | 457 | if (c->x86_power & (1 << 8)) { |
457 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 458 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
458 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 459 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
460 | if (!check_tsc_unstable()) | ||
461 | sched_clock_stable = 1; | ||
459 | } | 462 | } |
460 | 463 | ||
461 | #ifdef CONFIG_X86_64 | 464 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5adce1040b11..0a18d16cb58d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
26 | #include <linux/bitops.h> | 26 | #include <linux/bitops.h> |
27 | #include <linux/device.h> | ||
27 | 28 | ||
28 | #include <asm/apic.h> | 29 | #include <asm/apic.h> |
29 | #include <asm/stacktrace.h> | 30 | #include <asm/stacktrace.h> |
@@ -31,6 +32,7 @@ | |||
31 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
32 | #include <asm/smp.h> | 33 | #include <asm/smp.h> |
33 | #include <asm/alternative.h> | 34 | #include <asm/alternative.h> |
35 | #include <asm/timer.h> | ||
34 | 36 | ||
35 | #include "perf_event.h" | 37 | #include "perf_event.h" |
36 | 38 | ||
@@ -351,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event) | |||
351 | return 0; | 353 | return 0; |
352 | } | 354 | } |
353 | 355 | ||
356 | /* | ||
357 | * check that branch_sample_type is compatible with | ||
358 | * settings needed for precise_ip > 1 which implies | ||
359 | * using the LBR to capture ALL taken branches at the | ||
360 | * priv levels of the measurement | ||
361 | */ | ||
362 | static inline int precise_br_compat(struct perf_event *event) | ||
363 | { | ||
364 | u64 m = event->attr.branch_sample_type; | ||
365 | u64 b = 0; | ||
366 | |||
367 | /* must capture all branches */ | ||
368 | if (!(m & PERF_SAMPLE_BRANCH_ANY)) | ||
369 | return 0; | ||
370 | |||
371 | m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER; | ||
372 | |||
373 | if (!event->attr.exclude_user) | ||
374 | b |= PERF_SAMPLE_BRANCH_USER; | ||
375 | |||
376 | if (!event->attr.exclude_kernel) | ||
377 | b |= PERF_SAMPLE_BRANCH_KERNEL; | ||
378 | |||
379 | /* | ||
380 | * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86 | ||
381 | */ | ||
382 | |||
383 | return m == b; | ||
384 | } | ||
385 | |||
354 | int x86_pmu_hw_config(struct perf_event *event) | 386 | int x86_pmu_hw_config(struct perf_event *event) |
355 | { | 387 | { |
356 | if (event->attr.precise_ip) { | 388 | if (event->attr.precise_ip) { |
@@ -367,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
367 | 399 | ||
368 | if (event->attr.precise_ip > precise) | 400 | if (event->attr.precise_ip > precise) |
369 | return -EOPNOTSUPP; | 401 | return -EOPNOTSUPP; |
402 | /* | ||
403 | * check that PEBS LBR correction does not conflict with | ||
404 | * whatever the user is asking with attr->branch_sample_type | ||
405 | */ | ||
406 | if (event->attr.precise_ip > 1) { | ||
407 | u64 *br_type = &event->attr.branch_sample_type; | ||
408 | |||
409 | if (has_branch_stack(event)) { | ||
410 | if (!precise_br_compat(event)) | ||
411 | return -EOPNOTSUPP; | ||
412 | |||
413 | /* branch_sample_type is compatible */ | ||
414 | |||
415 | } else { | ||
416 | /* | ||
417 | * user did not specify branch_sample_type | ||
418 | * | ||
419 | * For PEBS fixups, we capture all | ||
420 | * the branches at the priv level of the | ||
421 | * event. | ||
422 | */ | ||
423 | *br_type = PERF_SAMPLE_BRANCH_ANY; | ||
424 | |||
425 | if (!event->attr.exclude_user) | ||
426 | *br_type |= PERF_SAMPLE_BRANCH_USER; | ||
427 | |||
428 | if (!event->attr.exclude_kernel) | ||
429 | *br_type |= PERF_SAMPLE_BRANCH_KERNEL; | ||
430 | } | ||
431 | } | ||
370 | } | 432 | } |
371 | 433 | ||
372 | /* | 434 | /* |
@@ -424,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
424 | /* mark unused */ | 486 | /* mark unused */ |
425 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | 487 | event->hw.extra_reg.idx = EXTRA_REG_NONE; |
426 | 488 | ||
489 | /* mark not used */ | ||
490 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
491 | event->hw.branch_reg.idx = EXTRA_REG_NONE; | ||
492 | |||
427 | return x86_pmu.hw_config(event); | 493 | return x86_pmu.hw_config(event); |
428 | } | 494 | } |
429 | 495 | ||
@@ -1210,6 +1276,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | |||
1210 | break; | 1276 | break; |
1211 | 1277 | ||
1212 | case CPU_STARTING: | 1278 | case CPU_STARTING: |
1279 | if (x86_pmu.attr_rdpmc) | ||
1280 | set_in_cr4(X86_CR4_PCE); | ||
1213 | if (x86_pmu.cpu_starting) | 1281 | if (x86_pmu.cpu_starting) |
1214 | x86_pmu.cpu_starting(cpu); | 1282 | x86_pmu.cpu_starting(cpu); |
1215 | break; | 1283 | break; |
@@ -1319,6 +1387,8 @@ static int __init init_hw_perf_events(void) | |||
1319 | } | 1387 | } |
1320 | } | 1388 | } |
1321 | 1389 | ||
1390 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ | ||
1391 | |||
1322 | pr_info("... version: %d\n", x86_pmu.version); | 1392 | pr_info("... version: %d\n", x86_pmu.version); |
1323 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); | 1393 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
1324 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); | 1394 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
@@ -1542,23 +1612,106 @@ static int x86_pmu_event_init(struct perf_event *event) | |||
1542 | return err; | 1612 | return err; |
1543 | } | 1613 | } |
1544 | 1614 | ||
1615 | static int x86_pmu_event_idx(struct perf_event *event) | ||
1616 | { | ||
1617 | int idx = event->hw.idx; | ||
1618 | |||
1619 | if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { | ||
1620 | idx -= X86_PMC_IDX_FIXED; | ||
1621 | idx |= 1 << 30; | ||
1622 | } | ||
1623 | |||
1624 | return idx + 1; | ||
1625 | } | ||
1626 | |||
1627 | static ssize_t get_attr_rdpmc(struct device *cdev, | ||
1628 | struct device_attribute *attr, | ||
1629 | char *buf) | ||
1630 | { | ||
1631 | return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); | ||
1632 | } | ||
1633 | |||
1634 | static void change_rdpmc(void *info) | ||
1635 | { | ||
1636 | bool enable = !!(unsigned long)info; | ||
1637 | |||
1638 | if (enable) | ||
1639 | set_in_cr4(X86_CR4_PCE); | ||
1640 | else | ||
1641 | clear_in_cr4(X86_CR4_PCE); | ||
1642 | } | ||
1643 | |||
1644 | static ssize_t set_attr_rdpmc(struct device *cdev, | ||
1645 | struct device_attribute *attr, | ||
1646 | const char *buf, size_t count) | ||
1647 | { | ||
1648 | unsigned long val = simple_strtoul(buf, NULL, 0); | ||
1649 | |||
1650 | if (!!val != !!x86_pmu.attr_rdpmc) { | ||
1651 | x86_pmu.attr_rdpmc = !!val; | ||
1652 | smp_call_function(change_rdpmc, (void *)val, 1); | ||
1653 | } | ||
1654 | |||
1655 | return count; | ||
1656 | } | ||
1657 | |||
1658 | static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc); | ||
1659 | |||
1660 | static struct attribute *x86_pmu_attrs[] = { | ||
1661 | &dev_attr_rdpmc.attr, | ||
1662 | NULL, | ||
1663 | }; | ||
1664 | |||
1665 | static struct attribute_group x86_pmu_attr_group = { | ||
1666 | .attrs = x86_pmu_attrs, | ||
1667 | }; | ||
1668 | |||
1669 | static const struct attribute_group *x86_pmu_attr_groups[] = { | ||
1670 | &x86_pmu_attr_group, | ||
1671 | NULL, | ||
1672 | }; | ||
1673 | |||
1674 | static void x86_pmu_flush_branch_stack(void) | ||
1675 | { | ||
1676 | if (x86_pmu.flush_branch_stack) | ||
1677 | x86_pmu.flush_branch_stack(); | ||
1678 | } | ||
1679 | |||
1545 | static struct pmu pmu = { | 1680 | static struct pmu pmu = { |
1546 | .pmu_enable = x86_pmu_enable, | 1681 | .pmu_enable = x86_pmu_enable, |
1547 | .pmu_disable = x86_pmu_disable, | 1682 | .pmu_disable = x86_pmu_disable, |
1683 | |||
1684 | .attr_groups = x86_pmu_attr_groups, | ||
1548 | 1685 | ||
1549 | .event_init = x86_pmu_event_init, | 1686 | .event_init = x86_pmu_event_init, |
1550 | 1687 | ||
1551 | .add = x86_pmu_add, | 1688 | .add = x86_pmu_add, |
1552 | .del = x86_pmu_del, | 1689 | .del = x86_pmu_del, |
1553 | .start = x86_pmu_start, | 1690 | .start = x86_pmu_start, |
1554 | .stop = x86_pmu_stop, | 1691 | .stop = x86_pmu_stop, |
1555 | .read = x86_pmu_read, | 1692 | .read = x86_pmu_read, |
1556 | 1693 | ||
1557 | .start_txn = x86_pmu_start_txn, | 1694 | .start_txn = x86_pmu_start_txn, |
1558 | .cancel_txn = x86_pmu_cancel_txn, | 1695 | .cancel_txn = x86_pmu_cancel_txn, |
1559 | .commit_txn = x86_pmu_commit_txn, | 1696 | .commit_txn = x86_pmu_commit_txn, |
1697 | |||
1698 | .event_idx = x86_pmu_event_idx, | ||
1699 | .flush_branch_stack = x86_pmu_flush_branch_stack, | ||
1560 | }; | 1700 | }; |
1561 | 1701 | ||
1702 | void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | ||
1703 | { | ||
1704 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
1705 | return; | ||
1706 | |||
1707 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) | ||
1708 | return; | ||
1709 | |||
1710 | userpg->time_mult = this_cpu_read(cyc2ns); | ||
1711 | userpg->time_shift = CYC2NS_SCALE_FACTOR; | ||
1712 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; | ||
1713 | } | ||
1714 | |||
1562 | /* | 1715 | /* |
1563 | * callchain support | 1716 | * callchain support |
1564 | */ | 1717 | */ |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index c30c807ddc72..8484e77c211e 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -33,6 +33,7 @@ enum extra_reg_type { | |||
33 | 33 | ||
34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ | 34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ |
35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ | 35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ |
36 | EXTRA_REG_LBR = 2, /* lbr_select */ | ||
36 | 37 | ||
37 | EXTRA_REG_MAX /* number of entries needed */ | 38 | EXTRA_REG_MAX /* number of entries needed */ |
38 | }; | 39 | }; |
@@ -130,6 +131,8 @@ struct cpu_hw_events { | |||
130 | void *lbr_context; | 131 | void *lbr_context; |
131 | struct perf_branch_stack lbr_stack; | 132 | struct perf_branch_stack lbr_stack; |
132 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 133 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
134 | struct er_account *lbr_sel; | ||
135 | u64 br_sel; | ||
133 | 136 | ||
134 | /* | 137 | /* |
135 | * Intel host/guest exclude bits | 138 | * Intel host/guest exclude bits |
@@ -268,6 +271,29 @@ struct x86_pmu_quirk { | |||
268 | void (*func)(void); | 271 | void (*func)(void); |
269 | }; | 272 | }; |
270 | 273 | ||
274 | union x86_pmu_config { | ||
275 | struct { | ||
276 | u64 event:8, | ||
277 | umask:8, | ||
278 | usr:1, | ||
279 | os:1, | ||
280 | edge:1, | ||
281 | pc:1, | ||
282 | interrupt:1, | ||
283 | __reserved1:1, | ||
284 | en:1, | ||
285 | inv:1, | ||
286 | cmask:8, | ||
287 | event2:4, | ||
288 | __reserved2:4, | ||
289 | go:1, | ||
290 | ho:1; | ||
291 | } bits; | ||
292 | u64 value; | ||
293 | }; | ||
294 | |||
295 | #define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value | ||
296 | |||
271 | /* | 297 | /* |
272 | * struct x86_pmu - generic x86 pmu | 298 | * struct x86_pmu - generic x86 pmu |
273 | */ | 299 | */ |
@@ -309,10 +335,19 @@ struct x86_pmu { | |||
309 | struct x86_pmu_quirk *quirks; | 335 | struct x86_pmu_quirk *quirks; |
310 | int perfctr_second_write; | 336 | int perfctr_second_write; |
311 | 337 | ||
338 | /* | ||
339 | * sysfs attrs | ||
340 | */ | ||
341 | int attr_rdpmc; | ||
342 | |||
343 | /* | ||
344 | * CPU Hotplug hooks | ||
345 | */ | ||
312 | int (*cpu_prepare)(int cpu); | 346 | int (*cpu_prepare)(int cpu); |
313 | void (*cpu_starting)(int cpu); | 347 | void (*cpu_starting)(int cpu); |
314 | void (*cpu_dying)(int cpu); | 348 | void (*cpu_dying)(int cpu); |
315 | void (*cpu_dead)(int cpu); | 349 | void (*cpu_dead)(int cpu); |
350 | void (*flush_branch_stack)(void); | ||
316 | 351 | ||
317 | /* | 352 | /* |
318 | * Intel Arch Perfmon v2+ | 353 | * Intel Arch Perfmon v2+ |
@@ -334,6 +369,8 @@ struct x86_pmu { | |||
334 | */ | 369 | */ |
335 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | 370 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ |
336 | int lbr_nr; /* hardware stack size */ | 371 | int lbr_nr; /* hardware stack size */ |
372 | u64 lbr_sel_mask; /* LBR_SELECT valid bits */ | ||
373 | const int *lbr_sel_map; /* lbr_select mappings */ | ||
337 | 374 | ||
338 | /* | 375 | /* |
339 | * Extra registers for events | 376 | * Extra registers for events |
@@ -447,6 +484,15 @@ extern struct event_constraint emptyconstraint; | |||
447 | 484 | ||
448 | extern struct event_constraint unconstrained; | 485 | extern struct event_constraint unconstrained; |
449 | 486 | ||
487 | static inline bool kernel_ip(unsigned long ip) | ||
488 | { | ||
489 | #ifdef CONFIG_X86_32 | ||
490 | return ip > PAGE_OFFSET; | ||
491 | #else | ||
492 | return (long)ip < 0; | ||
493 | #endif | ||
494 | } | ||
495 | |||
450 | #ifdef CONFIG_CPU_SUP_AMD | 496 | #ifdef CONFIG_CPU_SUP_AMD |
451 | 497 | ||
452 | int amd_pmu_init(void); | 498 | int amd_pmu_init(void); |
@@ -527,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void); | |||
527 | 573 | ||
528 | void intel_pmu_lbr_init_atom(void); | 574 | void intel_pmu_lbr_init_atom(void); |
529 | 575 | ||
576 | void intel_pmu_lbr_init_snb(void); | ||
577 | |||
578 | int intel_pmu_setup_lbr_filter(struct perf_event *event); | ||
579 | |||
530 | int p4_pmu_init(void); | 580 | int p4_pmu_init(void); |
531 | 581 | ||
532 | int p6_pmu_init(void); | 582 | int p6_pmu_init(void); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 67250a52430b..dd002faff7a6 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event) | |||
139 | if (ret) | 139 | if (ret) |
140 | return ret; | 140 | return ret; |
141 | 141 | ||
142 | if (has_branch_stack(event)) | ||
143 | return -EOPNOTSUPP; | ||
144 | |||
142 | if (event->attr.exclude_host && event->attr.exclude_guest) | 145 | if (event->attr.exclude_host && event->attr.exclude_guest) |
143 | /* | 146 | /* |
144 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 | 147 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 61d4f79a550e..6a84e7f28f05 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids | |||
728 | }, | 728 | }, |
729 | }; | 729 | }; |
730 | 730 | ||
731 | static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) | ||
732 | { | ||
733 | /* user explicitly requested branch sampling */ | ||
734 | if (has_branch_stack(event)) | ||
735 | return true; | ||
736 | |||
737 | /* implicit branch sampling to correct PEBS skid */ | ||
738 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
739 | return true; | ||
740 | |||
741 | return false; | ||
742 | } | ||
743 | |||
731 | static void intel_pmu_disable_all(void) | 744 | static void intel_pmu_disable_all(void) |
732 | { | 745 | { |
733 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 746 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
882 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); | 895 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); |
883 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); | 896 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); |
884 | 897 | ||
898 | /* | ||
899 | * must disable before any actual event | ||
900 | * because any event may be combined with LBR | ||
901 | */ | ||
902 | if (intel_pmu_needs_lbr_smpl(event)) | ||
903 | intel_pmu_lbr_disable(event); | ||
904 | |||
885 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 905 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
886 | intel_pmu_disable_fixed(hwc); | 906 | intel_pmu_disable_fixed(hwc); |
887 | return; | 907 | return; |
@@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
936 | intel_pmu_enable_bts(hwc->config); | 956 | intel_pmu_enable_bts(hwc->config); |
937 | return; | 957 | return; |
938 | } | 958 | } |
959 | /* | ||
960 | * must enabled before any actual event | ||
961 | * because any event may be combined with LBR | ||
962 | */ | ||
963 | if (intel_pmu_needs_lbr_smpl(event)) | ||
964 | intel_pmu_lbr_enable(event); | ||
939 | 965 | ||
940 | if (event->attr.exclude_host) | 966 | if (event->attr.exclude_host) |
941 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); | 967 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); |
@@ -1058,6 +1084,9 @@ again: | |||
1058 | 1084 | ||
1059 | data.period = event->hw.last_period; | 1085 | data.period = event->hw.last_period; |
1060 | 1086 | ||
1087 | if (has_branch_stack(event)) | ||
1088 | data.br_stack = &cpuc->lbr_stack; | ||
1089 | |||
1061 | if (perf_event_overflow(event, &data, regs)) | 1090 | if (perf_event_overflow(event, &data, regs)) |
1062 | x86_pmu_stop(event, 0); | 1091 | x86_pmu_stop(event, 0); |
1063 | } | 1092 | } |
@@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx) | |||
1124 | */ | 1153 | */ |
1125 | static struct event_constraint * | 1154 | static struct event_constraint * |
1126 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, | 1155 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, |
1127 | struct perf_event *event) | 1156 | struct perf_event *event, |
1157 | struct hw_perf_event_extra *reg) | ||
1128 | { | 1158 | { |
1129 | struct event_constraint *c = &emptyconstraint; | 1159 | struct event_constraint *c = &emptyconstraint; |
1130 | struct hw_perf_event_extra *reg = &event->hw.extra_reg; | ||
1131 | struct er_account *era; | 1160 | struct er_account *era; |
1132 | unsigned long flags; | 1161 | unsigned long flags; |
1133 | int orig_idx = reg->idx; | 1162 | int orig_idx = reg->idx; |
1134 | 1163 | ||
1135 | /* already allocated shared msr */ | 1164 | /* already allocated shared msr */ |
1136 | if (reg->alloc) | 1165 | if (reg->alloc) |
1137 | return &unconstrained; | 1166 | return NULL; /* call x86_get_event_constraint() */ |
1138 | 1167 | ||
1139 | again: | 1168 | again: |
1140 | era = &cpuc->shared_regs->regs[reg->idx]; | 1169 | era = &cpuc->shared_regs->regs[reg->idx]; |
@@ -1157,14 +1186,10 @@ again: | |||
1157 | reg->alloc = 1; | 1186 | reg->alloc = 1; |
1158 | 1187 | ||
1159 | /* | 1188 | /* |
1160 | * All events using extra_reg are unconstrained. | 1189 | * need to call x86_get_event_constraint() |
1161 | * Avoids calling x86_get_event_constraints() | 1190 | * to check if associated event has constraints |
1162 | * | ||
1163 | * Must revisit if extra_reg controlling events | ||
1164 | * ever have constraints. Worst case we go through | ||
1165 | * the regular event constraint table. | ||
1166 | */ | 1191 | */ |
1167 | c = &unconstrained; | 1192 | c = NULL; |
1168 | } else if (intel_try_alt_er(event, orig_idx)) { | 1193 | } else if (intel_try_alt_er(event, orig_idx)) { |
1169 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1194 | raw_spin_unlock_irqrestore(&era->lock, flags); |
1170 | goto again; | 1195 | goto again; |
@@ -1201,11 +1226,23 @@ static struct event_constraint * | |||
1201 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, | 1226 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, |
1202 | struct perf_event *event) | 1227 | struct perf_event *event) |
1203 | { | 1228 | { |
1204 | struct event_constraint *c = NULL; | 1229 | struct event_constraint *c = NULL, *d; |
1205 | 1230 | struct hw_perf_event_extra *xreg, *breg; | |
1206 | if (event->hw.extra_reg.idx != EXTRA_REG_NONE) | 1231 | |
1207 | c = __intel_shared_reg_get_constraints(cpuc, event); | 1232 | xreg = &event->hw.extra_reg; |
1208 | 1233 | if (xreg->idx != EXTRA_REG_NONE) { | |
1234 | c = __intel_shared_reg_get_constraints(cpuc, event, xreg); | ||
1235 | if (c == &emptyconstraint) | ||
1236 | return c; | ||
1237 | } | ||
1238 | breg = &event->hw.branch_reg; | ||
1239 | if (breg->idx != EXTRA_REG_NONE) { | ||
1240 | d = __intel_shared_reg_get_constraints(cpuc, event, breg); | ||
1241 | if (d == &emptyconstraint) { | ||
1242 | __intel_shared_reg_put_constraints(cpuc, xreg); | ||
1243 | c = d; | ||
1244 | } | ||
1245 | } | ||
1209 | return c; | 1246 | return c; |
1210 | } | 1247 | } |
1211 | 1248 | ||
@@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | |||
1253 | reg = &event->hw.extra_reg; | 1290 | reg = &event->hw.extra_reg; |
1254 | if (reg->idx != EXTRA_REG_NONE) | 1291 | if (reg->idx != EXTRA_REG_NONE) |
1255 | __intel_shared_reg_put_constraints(cpuc, reg); | 1292 | __intel_shared_reg_put_constraints(cpuc, reg); |
1293 | |||
1294 | reg = &event->hw.branch_reg; | ||
1295 | if (reg->idx != EXTRA_REG_NONE) | ||
1296 | __intel_shared_reg_put_constraints(cpuc, reg); | ||
1256 | } | 1297 | } |
1257 | 1298 | ||
1258 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 1299 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
@@ -1288,12 +1329,19 @@ static int intel_pmu_hw_config(struct perf_event *event) | |||
1288 | * | 1329 | * |
1289 | * Thereby we gain a PEBS capable cycle counter. | 1330 | * Thereby we gain a PEBS capable cycle counter. |
1290 | */ | 1331 | */ |
1291 | u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */ | 1332 | u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); |
1333 | |||
1292 | 1334 | ||
1293 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); | 1335 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); |
1294 | event->hw.config = alt_config; | 1336 | event->hw.config = alt_config; |
1295 | } | 1337 | } |
1296 | 1338 | ||
1339 | if (intel_pmu_needs_lbr_smpl(event)) { | ||
1340 | ret = intel_pmu_setup_lbr_filter(event); | ||
1341 | if (ret) | ||
1342 | return ret; | ||
1343 | } | ||
1344 | |||
1297 | if (event->attr.type != PERF_TYPE_RAW) | 1345 | if (event->attr.type != PERF_TYPE_RAW) |
1298 | return 0; | 1346 | return 0; |
1299 | 1347 | ||
@@ -1432,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu) | |||
1432 | { | 1480 | { |
1433 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1481 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
1434 | 1482 | ||
1435 | if (!x86_pmu.extra_regs) | 1483 | if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map)) |
1436 | return NOTIFY_OK; | 1484 | return NOTIFY_OK; |
1437 | 1485 | ||
1438 | cpuc->shared_regs = allocate_shared_regs(cpu); | 1486 | cpuc->shared_regs = allocate_shared_regs(cpu); |
@@ -1454,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu) | |||
1454 | */ | 1502 | */ |
1455 | intel_pmu_lbr_reset(); | 1503 | intel_pmu_lbr_reset(); |
1456 | 1504 | ||
1457 | if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) | 1505 | cpuc->lbr_sel = NULL; |
1506 | |||
1507 | if (!cpuc->shared_regs) | ||
1458 | return; | 1508 | return; |
1459 | 1509 | ||
1460 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | 1510 | if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) { |
1461 | struct intel_shared_regs *pc; | 1511 | for_each_cpu(i, topology_thread_cpumask(cpu)) { |
1512 | struct intel_shared_regs *pc; | ||
1462 | 1513 | ||
1463 | pc = per_cpu(cpu_hw_events, i).shared_regs; | 1514 | pc = per_cpu(cpu_hw_events, i).shared_regs; |
1464 | if (pc && pc->core_id == core_id) { | 1515 | if (pc && pc->core_id == core_id) { |
1465 | cpuc->kfree_on_online = cpuc->shared_regs; | 1516 | cpuc->kfree_on_online = cpuc->shared_regs; |
1466 | cpuc->shared_regs = pc; | 1517 | cpuc->shared_regs = pc; |
1467 | break; | 1518 | break; |
1519 | } | ||
1468 | } | 1520 | } |
1521 | cpuc->shared_regs->core_id = core_id; | ||
1522 | cpuc->shared_regs->refcnt++; | ||
1469 | } | 1523 | } |
1470 | 1524 | ||
1471 | cpuc->shared_regs->core_id = core_id; | 1525 | if (x86_pmu.lbr_sel_map) |
1472 | cpuc->shared_regs->refcnt++; | 1526 | cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; |
1473 | } | 1527 | } |
1474 | 1528 | ||
1475 | static void intel_pmu_cpu_dying(int cpu) | 1529 | static void intel_pmu_cpu_dying(int cpu) |
@@ -1487,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu) | |||
1487 | fini_debug_store_on_cpu(cpu); | 1541 | fini_debug_store_on_cpu(cpu); |
1488 | } | 1542 | } |
1489 | 1543 | ||
1544 | static void intel_pmu_flush_branch_stack(void) | ||
1545 | { | ||
1546 | /* | ||
1547 | * Intel LBR does not tag entries with the | ||
1548 | * PID of the current task, then we need to | ||
1549 | * flush it on ctxsw | ||
1550 | * For now, we simply reset it | ||
1551 | */ | ||
1552 | if (x86_pmu.lbr_nr) | ||
1553 | intel_pmu_lbr_reset(); | ||
1554 | } | ||
1555 | |||
1490 | static __initconst const struct x86_pmu intel_pmu = { | 1556 | static __initconst const struct x86_pmu intel_pmu = { |
1491 | .name = "Intel", | 1557 | .name = "Intel", |
1492 | .handle_irq = intel_pmu_handle_irq, | 1558 | .handle_irq = intel_pmu_handle_irq, |
@@ -1514,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
1514 | .cpu_starting = intel_pmu_cpu_starting, | 1580 | .cpu_starting = intel_pmu_cpu_starting, |
1515 | .cpu_dying = intel_pmu_cpu_dying, | 1581 | .cpu_dying = intel_pmu_cpu_dying, |
1516 | .guest_get_msrs = intel_guest_get_msrs, | 1582 | .guest_get_msrs = intel_guest_get_msrs, |
1583 | .flush_branch_stack = intel_pmu_flush_branch_stack, | ||
1517 | }; | 1584 | }; |
1518 | 1585 | ||
1519 | static __init void intel_clovertown_quirk(void) | 1586 | static __init void intel_clovertown_quirk(void) |
@@ -1690,9 +1757,11 @@ __init int intel_pmu_init(void) | |||
1690 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 1757 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
1691 | 1758 | ||
1692 | /* UOPS_ISSUED.STALLED_CYCLES */ | 1759 | /* UOPS_ISSUED.STALLED_CYCLES */ |
1693 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | 1760 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
1761 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | ||
1694 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | 1762 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
1695 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | 1763 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
1764 | X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); | ||
1696 | 1765 | ||
1697 | x86_add_quirk(intel_nehalem_quirk); | 1766 | x86_add_quirk(intel_nehalem_quirk); |
1698 | 1767 | ||
@@ -1727,9 +1796,11 @@ __init int intel_pmu_init(void) | |||
1727 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 1796 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
1728 | 1797 | ||
1729 | /* UOPS_ISSUED.STALLED_CYCLES */ | 1798 | /* UOPS_ISSUED.STALLED_CYCLES */ |
1730 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | 1799 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
1800 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | ||
1731 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | 1801 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
1732 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | 1802 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
1803 | X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); | ||
1733 | 1804 | ||
1734 | pr_cont("Westmere events, "); | 1805 | pr_cont("Westmere events, "); |
1735 | break; | 1806 | break; |
@@ -1740,7 +1811,7 @@ __init int intel_pmu_init(void) | |||
1740 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1811 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
1741 | sizeof(hw_cache_event_ids)); | 1812 | sizeof(hw_cache_event_ids)); |
1742 | 1813 | ||
1743 | intel_pmu_lbr_init_nhm(); | 1814 | intel_pmu_lbr_init_snb(); |
1744 | 1815 | ||
1745 | x86_pmu.event_constraints = intel_snb_event_constraints; | 1816 | x86_pmu.event_constraints = intel_snb_event_constraints; |
1746 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; | 1817 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; |
@@ -1750,9 +1821,11 @@ __init int intel_pmu_init(void) | |||
1750 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | 1821 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; |
1751 | 1822 | ||
1752 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | 1823 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
1753 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | 1824 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
1825 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | ||
1754 | /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ | 1826 | /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ |
1755 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1; | 1827 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
1828 | X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); | ||
1756 | 1829 | ||
1757 | pr_cont("SandyBridge events, "); | 1830 | pr_cont("SandyBridge events, "); |
1758 | break; | 1831 | break; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index d6bd49faa40c..7f64df19e7dd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
4 | 4 | ||
5 | #include <asm/perf_event.h> | 5 | #include <asm/perf_event.h> |
6 | #include <asm/insn.h> | ||
6 | 7 | ||
7 | #include "perf_event.h" | 8 | #include "perf_event.h" |
8 | 9 | ||
@@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) | |||
439 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | 440 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; |
440 | 441 | ||
441 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | 442 | cpuc->pebs_enabled |= 1ULL << hwc->idx; |
442 | |||
443 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
444 | intel_pmu_lbr_enable(event); | ||
445 | } | 443 | } |
446 | 444 | ||
447 | void intel_pmu_pebs_disable(struct perf_event *event) | 445 | void intel_pmu_pebs_disable(struct perf_event *event) |
@@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) | |||
454 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | 452 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); |
455 | 453 | ||
456 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | 454 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; |
457 | |||
458 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
459 | intel_pmu_lbr_disable(event); | ||
460 | } | 455 | } |
461 | 456 | ||
462 | void intel_pmu_pebs_enable_all(void) | 457 | void intel_pmu_pebs_enable_all(void) |
@@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void) | |||
475 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | 470 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); |
476 | } | 471 | } |
477 | 472 | ||
478 | #include <asm/insn.h> | ||
479 | |||
480 | static inline bool kernel_ip(unsigned long ip) | ||
481 | { | ||
482 | #ifdef CONFIG_X86_32 | ||
483 | return ip > PAGE_OFFSET; | ||
484 | #else | ||
485 | return (long)ip < 0; | ||
486 | #endif | ||
487 | } | ||
488 | |||
489 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | 473 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) |
490 | { | 474 | { |
491 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 475 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
572 | * both formats and we don't use the other fields in this | 556 | * both formats and we don't use the other fields in this |
573 | * routine. | 557 | * routine. |
574 | */ | 558 | */ |
559 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
575 | struct pebs_record_core *pebs = __pebs; | 560 | struct pebs_record_core *pebs = __pebs; |
576 | struct perf_sample_data data; | 561 | struct perf_sample_data data; |
577 | struct pt_regs regs; | 562 | struct pt_regs regs; |
@@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
602 | else | 587 | else |
603 | regs.flags &= ~PERF_EFLAGS_EXACT; | 588 | regs.flags &= ~PERF_EFLAGS_EXACT; |
604 | 589 | ||
590 | if (has_branch_stack(event)) | ||
591 | data.br_stack = &cpuc->lbr_stack; | ||
592 | |||
605 | if (perf_event_overflow(event, &data, ®s)) | 593 | if (perf_event_overflow(event, &data, ®s)) |
606 | x86_pmu_stop(event, 0); | 594 | x86_pmu_stop(event, 0); |
607 | } | 595 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 47a7e63bfe54..520b4265fcd2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -3,6 +3,7 @@ | |||
3 | 3 | ||
4 | #include <asm/perf_event.h> | 4 | #include <asm/perf_event.h> |
5 | #include <asm/msr.h> | 5 | #include <asm/msr.h> |
6 | #include <asm/insn.h> | ||
6 | 7 | ||
7 | #include "perf_event.h" | 8 | #include "perf_event.h" |
8 | 9 | ||
@@ -14,6 +15,100 @@ enum { | |||
14 | }; | 15 | }; |
15 | 16 | ||
16 | /* | 17 | /* |
18 | * Intel LBR_SELECT bits | ||
19 | * Intel Vol3a, April 2011, Section 16.7 Table 16-10 | ||
20 | * | ||
21 | * Hardware branch filter (not available on all CPUs) | ||
22 | */ | ||
23 | #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ | ||
24 | #define LBR_USER_BIT 1 /* do not capture at ring > 0 */ | ||
25 | #define LBR_JCC_BIT 2 /* do not capture conditional branches */ | ||
26 | #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ | ||
27 | #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ | ||
28 | #define LBR_RETURN_BIT 5 /* do not capture near returns */ | ||
29 | #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ | ||
30 | #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ | ||
31 | #define LBR_FAR_BIT 8 /* do not capture far branches */ | ||
32 | |||
33 | #define LBR_KERNEL (1 << LBR_KERNEL_BIT) | ||
34 | #define LBR_USER (1 << LBR_USER_BIT) | ||
35 | #define LBR_JCC (1 << LBR_JCC_BIT) | ||
36 | #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) | ||
37 | #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) | ||
38 | #define LBR_RETURN (1 << LBR_RETURN_BIT) | ||
39 | #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) | ||
40 | #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) | ||
41 | #define LBR_FAR (1 << LBR_FAR_BIT) | ||
42 | |||
43 | #define LBR_PLM (LBR_KERNEL | LBR_USER) | ||
44 | |||
45 | #define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ | ||
46 | #define LBR_NOT_SUPP -1 /* LBR filter not supported */ | ||
47 | #define LBR_IGN 0 /* ignored */ | ||
48 | |||
49 | #define LBR_ANY \ | ||
50 | (LBR_JCC |\ | ||
51 | LBR_REL_CALL |\ | ||
52 | LBR_IND_CALL |\ | ||
53 | LBR_RETURN |\ | ||
54 | LBR_REL_JMP |\ | ||
55 | LBR_IND_JMP |\ | ||
56 | LBR_FAR) | ||
57 | |||
58 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
59 | |||
60 | #define for_each_branch_sample_type(x) \ | ||
61 | for ((x) = PERF_SAMPLE_BRANCH_USER; \ | ||
62 | (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) | ||
63 | |||
64 | /* | ||
65 | * x86control flow change classification | ||
66 | * x86control flow changes include branches, interrupts, traps, faults | ||
67 | */ | ||
68 | enum { | ||
69 | X86_BR_NONE = 0, /* unknown */ | ||
70 | |||
71 | X86_BR_USER = 1 << 0, /* branch target is user */ | ||
72 | X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ | ||
73 | |||
74 | X86_BR_CALL = 1 << 2, /* call */ | ||
75 | X86_BR_RET = 1 << 3, /* return */ | ||
76 | X86_BR_SYSCALL = 1 << 4, /* syscall */ | ||
77 | X86_BR_SYSRET = 1 << 5, /* syscall return */ | ||
78 | X86_BR_INT = 1 << 6, /* sw interrupt */ | ||
79 | X86_BR_IRET = 1 << 7, /* return from interrupt */ | ||
80 | X86_BR_JCC = 1 << 8, /* conditional */ | ||
81 | X86_BR_JMP = 1 << 9, /* jump */ | ||
82 | X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ | ||
83 | X86_BR_IND_CALL = 1 << 11,/* indirect calls */ | ||
84 | }; | ||
85 | |||
86 | #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) | ||
87 | |||
88 | #define X86_BR_ANY \ | ||
89 | (X86_BR_CALL |\ | ||
90 | X86_BR_RET |\ | ||
91 | X86_BR_SYSCALL |\ | ||
92 | X86_BR_SYSRET |\ | ||
93 | X86_BR_INT |\ | ||
94 | X86_BR_IRET |\ | ||
95 | X86_BR_JCC |\ | ||
96 | X86_BR_JMP |\ | ||
97 | X86_BR_IRQ |\ | ||
98 | X86_BR_IND_CALL) | ||
99 | |||
100 | #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) | ||
101 | |||
102 | #define X86_BR_ANY_CALL \ | ||
103 | (X86_BR_CALL |\ | ||
104 | X86_BR_IND_CALL |\ | ||
105 | X86_BR_SYSCALL |\ | ||
106 | X86_BR_IRQ |\ | ||
107 | X86_BR_INT) | ||
108 | |||
109 | static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); | ||
110 | |||
111 | /* | ||
17 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | 112 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI |
18 | * otherwise it becomes near impossible to get a reliable stack. | 113 | * otherwise it becomes near impossible to get a reliable stack. |
19 | */ | 114 | */ |
@@ -21,6 +116,10 @@ enum { | |||
21 | static void __intel_pmu_lbr_enable(void) | 116 | static void __intel_pmu_lbr_enable(void) |
22 | { | 117 | { |
23 | u64 debugctl; | 118 | u64 debugctl; |
119 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
120 | |||
121 | if (cpuc->lbr_sel) | ||
122 | wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); | ||
24 | 123 | ||
25 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | 124 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); |
26 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | 125 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); |
@@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event) | |||
76 | * Reset the LBR stack if we changed task context to | 175 | * Reset the LBR stack if we changed task context to |
77 | * avoid data leaks. | 176 | * avoid data leaks. |
78 | */ | 177 | */ |
79 | |||
80 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { | 178 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { |
81 | intel_pmu_lbr_reset(); | 179 | intel_pmu_lbr_reset(); |
82 | cpuc->lbr_context = event->ctx; | 180 | cpuc->lbr_context = event->ctx; |
83 | } | 181 | } |
182 | cpuc->br_sel = event->hw.branch_reg.reg; | ||
84 | 183 | ||
85 | cpuc->lbr_users++; | 184 | cpuc->lbr_users++; |
86 | } | 185 | } |
@@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event) | |||
95 | cpuc->lbr_users--; | 194 | cpuc->lbr_users--; |
96 | WARN_ON_ONCE(cpuc->lbr_users < 0); | 195 | WARN_ON_ONCE(cpuc->lbr_users < 0); |
97 | 196 | ||
98 | if (cpuc->enabled && !cpuc->lbr_users) | 197 | if (cpuc->enabled && !cpuc->lbr_users) { |
99 | __intel_pmu_lbr_disable(); | 198 | __intel_pmu_lbr_disable(); |
199 | /* avoid stale pointer */ | ||
200 | cpuc->lbr_context = NULL; | ||
201 | } | ||
100 | } | 202 | } |
101 | 203 | ||
102 | void intel_pmu_lbr_enable_all(void) | 204 | void intel_pmu_lbr_enable_all(void) |
@@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void) | |||
115 | __intel_pmu_lbr_disable(); | 217 | __intel_pmu_lbr_disable(); |
116 | } | 218 | } |
117 | 219 | ||
220 | /* | ||
221 | * TOS = most recently recorded branch | ||
222 | */ | ||
118 | static inline u64 intel_pmu_lbr_tos(void) | 223 | static inline u64 intel_pmu_lbr_tos(void) |
119 | { | 224 | { |
120 | u64 tos; | 225 | u64 tos; |
@@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | |||
142 | 247 | ||
143 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | 248 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); |
144 | 249 | ||
145 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | 250 | cpuc->lbr_entries[i].from = msr_lastbranch.from; |
146 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | 251 | cpuc->lbr_entries[i].to = msr_lastbranch.to; |
147 | cpuc->lbr_entries[i].flags = 0; | 252 | cpuc->lbr_entries[i].mispred = 0; |
253 | cpuc->lbr_entries[i].predicted = 0; | ||
254 | cpuc->lbr_entries[i].reserved = 0; | ||
148 | } | 255 | } |
149 | cpuc->lbr_stack.nr = i; | 256 | cpuc->lbr_stack.nr = i; |
150 | } | 257 | } |
151 | 258 | ||
152 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
153 | |||
154 | /* | 259 | /* |
155 | * Due to lack of segmentation in Linux the effective address (offset) | 260 | * Due to lack of segmentation in Linux the effective address (offset) |
156 | * is the same as the linear address, allowing us to merge the LIP and EIP | 261 | * is the same as the linear address, allowing us to merge the LIP and EIP |
@@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | |||
165 | 270 | ||
166 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | 271 | for (i = 0; i < x86_pmu.lbr_nr; i++) { |
167 | unsigned long lbr_idx = (tos - i) & mask; | 272 | unsigned long lbr_idx = (tos - i) & mask; |
168 | u64 from, to, flags = 0; | 273 | u64 from, to, mis = 0, pred = 0; |
169 | 274 | ||
170 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | 275 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); |
171 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | 276 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); |
172 | 277 | ||
173 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | 278 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { |
174 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | 279 | mis = !!(from & LBR_FROM_FLAG_MISPRED); |
280 | pred = !mis; | ||
175 | from = (u64)((((s64)from) << 1) >> 1); | 281 | from = (u64)((((s64)from) << 1) >> 1); |
176 | } | 282 | } |
177 | 283 | ||
178 | cpuc->lbr_entries[i].from = from; | 284 | cpuc->lbr_entries[i].from = from; |
179 | cpuc->lbr_entries[i].to = to; | 285 | cpuc->lbr_entries[i].to = to; |
180 | cpuc->lbr_entries[i].flags = flags; | 286 | cpuc->lbr_entries[i].mispred = mis; |
287 | cpuc->lbr_entries[i].predicted = pred; | ||
288 | cpuc->lbr_entries[i].reserved = 0; | ||
181 | } | 289 | } |
182 | cpuc->lbr_stack.nr = i; | 290 | cpuc->lbr_stack.nr = i; |
183 | } | 291 | } |
@@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void) | |||
193 | intel_pmu_lbr_read_32(cpuc); | 301 | intel_pmu_lbr_read_32(cpuc); |
194 | else | 302 | else |
195 | intel_pmu_lbr_read_64(cpuc); | 303 | intel_pmu_lbr_read_64(cpuc); |
304 | |||
305 | intel_pmu_lbr_filter(cpuc); | ||
306 | } | ||
307 | |||
308 | /* | ||
309 | * SW filter is used: | ||
310 | * - in case there is no HW filter | ||
311 | * - in case the HW filter has errata or limitations | ||
312 | */ | ||
313 | static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) | ||
314 | { | ||
315 | u64 br_type = event->attr.branch_sample_type; | ||
316 | int mask = 0; | ||
317 | |||
318 | if (br_type & PERF_SAMPLE_BRANCH_USER) | ||
319 | mask |= X86_BR_USER; | ||
320 | |||
321 | if (br_type & PERF_SAMPLE_BRANCH_KERNEL) | ||
322 | mask |= X86_BR_KERNEL; | ||
323 | |||
324 | /* we ignore BRANCH_HV here */ | ||
325 | |||
326 | if (br_type & PERF_SAMPLE_BRANCH_ANY) | ||
327 | mask |= X86_BR_ANY; | ||
328 | |||
329 | if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) | ||
330 | mask |= X86_BR_ANY_CALL; | ||
331 | |||
332 | if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) | ||
333 | mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; | ||
334 | |||
335 | if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) | ||
336 | mask |= X86_BR_IND_CALL; | ||
337 | /* | ||
338 | * stash actual user request into reg, it may | ||
339 | * be used by fixup code for some CPU | ||
340 | */ | ||
341 | event->hw.branch_reg.reg = mask; | ||
342 | } | ||
343 | |||
344 | /* | ||
345 | * setup the HW LBR filter | ||
346 | * Used only when available, may not be enough to disambiguate | ||
347 | * all branches, may need the help of the SW filter | ||
348 | */ | ||
349 | static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) | ||
350 | { | ||
351 | struct hw_perf_event_extra *reg; | ||
352 | u64 br_type = event->attr.branch_sample_type; | ||
353 | u64 mask = 0, m; | ||
354 | u64 v; | ||
355 | |||
356 | for_each_branch_sample_type(m) { | ||
357 | if (!(br_type & m)) | ||
358 | continue; | ||
359 | |||
360 | v = x86_pmu.lbr_sel_map[m]; | ||
361 | if (v == LBR_NOT_SUPP) | ||
362 | return -EOPNOTSUPP; | ||
363 | |||
364 | if (v != LBR_IGN) | ||
365 | mask |= v; | ||
366 | } | ||
367 | reg = &event->hw.branch_reg; | ||
368 | reg->idx = EXTRA_REG_LBR; | ||
369 | |||
370 | /* LBR_SELECT operates in suppress mode so invert mask */ | ||
371 | reg->config = ~mask & x86_pmu.lbr_sel_mask; | ||
372 | |||
373 | return 0; | ||
374 | } | ||
375 | |||
376 | int intel_pmu_setup_lbr_filter(struct perf_event *event) | ||
377 | { | ||
378 | int ret = 0; | ||
379 | |||
380 | /* | ||
381 | * no LBR on this PMU | ||
382 | */ | ||
383 | if (!x86_pmu.lbr_nr) | ||
384 | return -EOPNOTSUPP; | ||
385 | |||
386 | /* | ||
387 | * setup SW LBR filter | ||
388 | */ | ||
389 | intel_pmu_setup_sw_lbr_filter(event); | ||
390 | |||
391 | /* | ||
392 | * setup HW LBR filter, if any | ||
393 | */ | ||
394 | if (x86_pmu.lbr_sel_map) | ||
395 | ret = intel_pmu_setup_hw_lbr_filter(event); | ||
396 | |||
397 | return ret; | ||
196 | } | 398 | } |
197 | 399 | ||
400 | /* | ||
401 | * return the type of control flow change at address "from" | ||
402 | * intruction is not necessarily a branch (in case of interrupt). | ||
403 | * | ||
404 | * The branch type returned also includes the priv level of the | ||
405 | * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). | ||
406 | * | ||
407 | * If a branch type is unknown OR the instruction cannot be | ||
408 | * decoded (e.g., text page not present), then X86_BR_NONE is | ||
409 | * returned. | ||
410 | */ | ||
411 | static int branch_type(unsigned long from, unsigned long to) | ||
412 | { | ||
413 | struct insn insn; | ||
414 | void *addr; | ||
415 | int bytes, size = MAX_INSN_SIZE; | ||
416 | int ret = X86_BR_NONE; | ||
417 | int ext, to_plm, from_plm; | ||
418 | u8 buf[MAX_INSN_SIZE]; | ||
419 | int is64 = 0; | ||
420 | |||
421 | to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; | ||
422 | from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; | ||
423 | |||
424 | /* | ||
425 | * maybe zero if lbr did not fill up after a reset by the time | ||
426 | * we get a PMU interrupt | ||
427 | */ | ||
428 | if (from == 0 || to == 0) | ||
429 | return X86_BR_NONE; | ||
430 | |||
431 | if (from_plm == X86_BR_USER) { | ||
432 | /* | ||
433 | * can happen if measuring at the user level only | ||
434 | * and we interrupt in a kernel thread, e.g., idle. | ||
435 | */ | ||
436 | if (!current->mm) | ||
437 | return X86_BR_NONE; | ||
438 | |||
439 | /* may fail if text not present */ | ||
440 | bytes = copy_from_user_nmi(buf, (void __user *)from, size); | ||
441 | if (bytes != size) | ||
442 | return X86_BR_NONE; | ||
443 | |||
444 | addr = buf; | ||
445 | } else | ||
446 | addr = (void *)from; | ||
447 | |||
448 | /* | ||
449 | * decoder needs to know the ABI especially | ||
450 | * on 64-bit systems running 32-bit apps | ||
451 | */ | ||
452 | #ifdef CONFIG_X86_64 | ||
453 | is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); | ||
454 | #endif | ||
455 | insn_init(&insn, addr, is64); | ||
456 | insn_get_opcode(&insn); | ||
457 | |||
458 | switch (insn.opcode.bytes[0]) { | ||
459 | case 0xf: | ||
460 | switch (insn.opcode.bytes[1]) { | ||
461 | case 0x05: /* syscall */ | ||
462 | case 0x34: /* sysenter */ | ||
463 | ret = X86_BR_SYSCALL; | ||
464 | break; | ||
465 | case 0x07: /* sysret */ | ||
466 | case 0x35: /* sysexit */ | ||
467 | ret = X86_BR_SYSRET; | ||
468 | break; | ||
469 | case 0x80 ... 0x8f: /* conditional */ | ||
470 | ret = X86_BR_JCC; | ||
471 | break; | ||
472 | default: | ||
473 | ret = X86_BR_NONE; | ||
474 | } | ||
475 | break; | ||
476 | case 0x70 ... 0x7f: /* conditional */ | ||
477 | ret = X86_BR_JCC; | ||
478 | break; | ||
479 | case 0xc2: /* near ret */ | ||
480 | case 0xc3: /* near ret */ | ||
481 | case 0xca: /* far ret */ | ||
482 | case 0xcb: /* far ret */ | ||
483 | ret = X86_BR_RET; | ||
484 | break; | ||
485 | case 0xcf: /* iret */ | ||
486 | ret = X86_BR_IRET; | ||
487 | break; | ||
488 | case 0xcc ... 0xce: /* int */ | ||
489 | ret = X86_BR_INT; | ||
490 | break; | ||
491 | case 0xe8: /* call near rel */ | ||
492 | case 0x9a: /* call far absolute */ | ||
493 | ret = X86_BR_CALL; | ||
494 | break; | ||
495 | case 0xe0 ... 0xe3: /* loop jmp */ | ||
496 | ret = X86_BR_JCC; | ||
497 | break; | ||
498 | case 0xe9 ... 0xeb: /* jmp */ | ||
499 | ret = X86_BR_JMP; | ||
500 | break; | ||
501 | case 0xff: /* call near absolute, call far absolute ind */ | ||
502 | insn_get_modrm(&insn); | ||
503 | ext = (insn.modrm.bytes[0] >> 3) & 0x7; | ||
504 | switch (ext) { | ||
505 | case 2: /* near ind call */ | ||
506 | case 3: /* far ind call */ | ||
507 | ret = X86_BR_IND_CALL; | ||
508 | break; | ||
509 | case 4: | ||
510 | case 5: | ||
511 | ret = X86_BR_JMP; | ||
512 | break; | ||
513 | } | ||
514 | break; | ||
515 | default: | ||
516 | ret = X86_BR_NONE; | ||
517 | } | ||
518 | /* | ||
519 | * interrupts, traps, faults (and thus ring transition) may | ||
520 | * occur on any instructions. Thus, to classify them correctly, | ||
521 | * we need to first look at the from and to priv levels. If they | ||
522 | * are different and to is in the kernel, then it indicates | ||
523 | * a ring transition. If the from instruction is not a ring | ||
524 | * transition instr (syscall, systenter, int), then it means | ||
525 | * it was a irq, trap or fault. | ||
526 | * | ||
527 | * we have no way of detecting kernel to kernel faults. | ||
528 | */ | ||
529 | if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL | ||
530 | && ret != X86_BR_SYSCALL && ret != X86_BR_INT) | ||
531 | ret = X86_BR_IRQ; | ||
532 | |||
533 | /* | ||
534 | * branch priv level determined by target as | ||
535 | * is done by HW when LBR_SELECT is implemented | ||
536 | */ | ||
537 | if (ret != X86_BR_NONE) | ||
538 | ret |= to_plm; | ||
539 | |||
540 | return ret; | ||
541 | } | ||
542 | |||
543 | /* | ||
544 | * implement actual branch filter based on user demand. | ||
545 | * Hardware may not exactly satisfy that request, thus | ||
546 | * we need to inspect opcodes. Mismatched branches are | ||
547 | * discarded. Therefore, the number of branches returned | ||
548 | * in PERF_SAMPLE_BRANCH_STACK sample may vary. | ||
549 | */ | ||
550 | static void | ||
551 | intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) | ||
552 | { | ||
553 | u64 from, to; | ||
554 | int br_sel = cpuc->br_sel; | ||
555 | int i, j, type; | ||
556 | bool compress = false; | ||
557 | |||
558 | /* if sampling all branches, then nothing to filter */ | ||
559 | if ((br_sel & X86_BR_ALL) == X86_BR_ALL) | ||
560 | return; | ||
561 | |||
562 | for (i = 0; i < cpuc->lbr_stack.nr; i++) { | ||
563 | |||
564 | from = cpuc->lbr_entries[i].from; | ||
565 | to = cpuc->lbr_entries[i].to; | ||
566 | |||
567 | type = branch_type(from, to); | ||
568 | |||
569 | /* if type does not correspond, then discard */ | ||
570 | if (type == X86_BR_NONE || (br_sel & type) != type) { | ||
571 | cpuc->lbr_entries[i].from = 0; | ||
572 | compress = true; | ||
573 | } | ||
574 | } | ||
575 | |||
576 | if (!compress) | ||
577 | return; | ||
578 | |||
579 | /* remove all entries with from=0 */ | ||
580 | for (i = 0; i < cpuc->lbr_stack.nr; ) { | ||
581 | if (!cpuc->lbr_entries[i].from) { | ||
582 | j = i; | ||
583 | while (++j < cpuc->lbr_stack.nr) | ||
584 | cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; | ||
585 | cpuc->lbr_stack.nr--; | ||
586 | if (!cpuc->lbr_entries[i].from) | ||
587 | continue; | ||
588 | } | ||
589 | i++; | ||
590 | } | ||
591 | } | ||
592 | |||
593 | /* | ||
594 | * Map interface branch filters onto LBR filters | ||
595 | */ | ||
596 | static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | ||
597 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | ||
598 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | ||
599 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | ||
600 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | ||
601 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP | ||
602 | | LBR_IND_JMP | LBR_FAR, | ||
603 | /* | ||
604 | * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches | ||
605 | */ | ||
606 | [PERF_SAMPLE_BRANCH_ANY_CALL] = | ||
607 | LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, | ||
608 | /* | ||
609 | * NHM/WSM erratum: must include IND_JMP to capture IND_CALL | ||
610 | */ | ||
611 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, | ||
612 | }; | ||
613 | |||
614 | static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | ||
615 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | ||
616 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | ||
617 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | ||
618 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | ||
619 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, | ||
620 | [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL | ||
621 | | LBR_FAR, | ||
622 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, | ||
623 | }; | ||
624 | |||
625 | /* core */ | ||
198 | void intel_pmu_lbr_init_core(void) | 626 | void intel_pmu_lbr_init_core(void) |
199 | { | 627 | { |
200 | x86_pmu.lbr_nr = 4; | 628 | x86_pmu.lbr_nr = 4; |
201 | x86_pmu.lbr_tos = 0x01c9; | 629 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
202 | x86_pmu.lbr_from = 0x40; | 630 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; |
203 | x86_pmu.lbr_to = 0x60; | 631 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; |
632 | |||
633 | /* | ||
634 | * SW branch filter usage: | ||
635 | * - compensate for lack of HW filter | ||
636 | */ | ||
637 | pr_cont("4-deep LBR, "); | ||
204 | } | 638 | } |
205 | 639 | ||
640 | /* nehalem/westmere */ | ||
206 | void intel_pmu_lbr_init_nhm(void) | 641 | void intel_pmu_lbr_init_nhm(void) |
207 | { | 642 | { |
208 | x86_pmu.lbr_nr = 16; | 643 | x86_pmu.lbr_nr = 16; |
209 | x86_pmu.lbr_tos = 0x01c9; | 644 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
210 | x86_pmu.lbr_from = 0x680; | 645 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; |
211 | x86_pmu.lbr_to = 0x6c0; | 646 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; |
647 | |||
648 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | ||
649 | x86_pmu.lbr_sel_map = nhm_lbr_sel_map; | ||
650 | |||
651 | /* | ||
652 | * SW branch filter usage: | ||
653 | * - workaround LBR_SEL errata (see above) | ||
654 | * - support syscall, sysret capture. | ||
655 | * That requires LBR_FAR but that means far | ||
656 | * jmp need to be filtered out | ||
657 | */ | ||
658 | pr_cont("16-deep LBR, "); | ||
659 | } | ||
660 | |||
661 | /* sandy bridge */ | ||
662 | void intel_pmu_lbr_init_snb(void) | ||
663 | { | ||
664 | x86_pmu.lbr_nr = 16; | ||
665 | x86_pmu.lbr_tos = MSR_LBR_TOS; | ||
666 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; | ||
667 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; | ||
668 | |||
669 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | ||
670 | x86_pmu.lbr_sel_map = snb_lbr_sel_map; | ||
671 | |||
672 | /* | ||
673 | * SW branch filter usage: | ||
674 | * - support syscall, sysret capture. | ||
675 | * That requires LBR_FAR but that means far | ||
676 | * jmp need to be filtered out | ||
677 | */ | ||
678 | pr_cont("16-deep LBR, "); | ||
212 | } | 679 | } |
213 | 680 | ||
681 | /* atom */ | ||
214 | void intel_pmu_lbr_init_atom(void) | 682 | void intel_pmu_lbr_init_atom(void) |
215 | { | 683 | { |
684 | /* | ||
685 | * only models starting at stepping 10 seems | ||
686 | * to have an operational LBR which can freeze | ||
687 | * on PMU interrupt | ||
688 | */ | ||
689 | if (boot_cpu_data.x86_mask < 10) { | ||
690 | pr_cont("LBR disabled due to erratum"); | ||
691 | return; | ||
692 | } | ||
693 | |||
216 | x86_pmu.lbr_nr = 8; | 694 | x86_pmu.lbr_nr = 8; |
217 | x86_pmu.lbr_tos = 0x01c9; | 695 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
218 | x86_pmu.lbr_from = 0x40; | 696 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; |
219 | x86_pmu.lbr_to = 0x60; | 697 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; |
698 | |||
699 | /* | ||
700 | * SW branch filter usage: | ||
701 | * - compensate for lack of HW filter | ||
702 | */ | ||
703 | pr_cont("8-deep LBR, "); | ||
220 | } | 704 | } |
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h new file mode 100644 index 000000000000..3230b68ef29a --- /dev/null +++ b/arch/x86/kernel/kprobes-common.h | |||
@@ -0,0 +1,102 @@ | |||
1 | #ifndef __X86_KERNEL_KPROBES_COMMON_H | ||
2 | #define __X86_KERNEL_KPROBES_COMMON_H | ||
3 | |||
4 | /* Kprobes and Optprobes common header */ | ||
5 | |||
6 | #ifdef CONFIG_X86_64 | ||
7 | #define SAVE_REGS_STRING \ | ||
8 | /* Skip cs, ip, orig_ax. */ \ | ||
9 | " subq $24, %rsp\n" \ | ||
10 | " pushq %rdi\n" \ | ||
11 | " pushq %rsi\n" \ | ||
12 | " pushq %rdx\n" \ | ||
13 | " pushq %rcx\n" \ | ||
14 | " pushq %rax\n" \ | ||
15 | " pushq %r8\n" \ | ||
16 | " pushq %r9\n" \ | ||
17 | " pushq %r10\n" \ | ||
18 | " pushq %r11\n" \ | ||
19 | " pushq %rbx\n" \ | ||
20 | " pushq %rbp\n" \ | ||
21 | " pushq %r12\n" \ | ||
22 | " pushq %r13\n" \ | ||
23 | " pushq %r14\n" \ | ||
24 | " pushq %r15\n" | ||
25 | #define RESTORE_REGS_STRING \ | ||
26 | " popq %r15\n" \ | ||
27 | " popq %r14\n" \ | ||
28 | " popq %r13\n" \ | ||
29 | " popq %r12\n" \ | ||
30 | " popq %rbp\n" \ | ||
31 | " popq %rbx\n" \ | ||
32 | " popq %r11\n" \ | ||
33 | " popq %r10\n" \ | ||
34 | " popq %r9\n" \ | ||
35 | " popq %r8\n" \ | ||
36 | " popq %rax\n" \ | ||
37 | " popq %rcx\n" \ | ||
38 | " popq %rdx\n" \ | ||
39 | " popq %rsi\n" \ | ||
40 | " popq %rdi\n" \ | ||
41 | /* Skip orig_ax, ip, cs */ \ | ||
42 | " addq $24, %rsp\n" | ||
43 | #else | ||
44 | #define SAVE_REGS_STRING \ | ||
45 | /* Skip cs, ip, orig_ax and gs. */ \ | ||
46 | " subl $16, %esp\n" \ | ||
47 | " pushl %fs\n" \ | ||
48 | " pushl %es\n" \ | ||
49 | " pushl %ds\n" \ | ||
50 | " pushl %eax\n" \ | ||
51 | " pushl %ebp\n" \ | ||
52 | " pushl %edi\n" \ | ||
53 | " pushl %esi\n" \ | ||
54 | " pushl %edx\n" \ | ||
55 | " pushl %ecx\n" \ | ||
56 | " pushl %ebx\n" | ||
57 | #define RESTORE_REGS_STRING \ | ||
58 | " popl %ebx\n" \ | ||
59 | " popl %ecx\n" \ | ||
60 | " popl %edx\n" \ | ||
61 | " popl %esi\n" \ | ||
62 | " popl %edi\n" \ | ||
63 | " popl %ebp\n" \ | ||
64 | " popl %eax\n" \ | ||
65 | /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ | ||
66 | " addl $24, %esp\n" | ||
67 | #endif | ||
68 | |||
69 | /* Ensure if the instruction can be boostable */ | ||
70 | extern int can_boost(kprobe_opcode_t *instruction); | ||
71 | /* Recover instruction if given address is probed */ | ||
72 | extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, | ||
73 | unsigned long addr); | ||
74 | /* | ||
75 | * Copy an instruction and adjust the displacement if the instruction | ||
76 | * uses the %rip-relative addressing mode. | ||
77 | */ | ||
78 | extern int __copy_instruction(u8 *dest, u8 *src); | ||
79 | |||
80 | /* Generate a relative-jump/call instruction */ | ||
81 | extern void synthesize_reljump(void *from, void *to); | ||
82 | extern void synthesize_relcall(void *from, void *to); | ||
83 | |||
84 | #ifdef CONFIG_OPTPROBES | ||
85 | extern int arch_init_optprobes(void); | ||
86 | extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter); | ||
87 | extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr); | ||
88 | #else /* !CONFIG_OPTPROBES */ | ||
89 | static inline int arch_init_optprobes(void) | ||
90 | { | ||
91 | return 0; | ||
92 | } | ||
93 | static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) | ||
94 | { | ||
95 | return 0; | ||
96 | } | ||
97 | static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) | ||
98 | { | ||
99 | return addr; | ||
100 | } | ||
101 | #endif | ||
102 | #endif | ||
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes-opt.c new file mode 100644 index 000000000000..c5e410eed403 --- /dev/null +++ b/arch/x86/kernel/kprobes-opt.c | |||
@@ -0,0 +1,512 @@ | |||
1 | /* | ||
2 | * Kernel Probes Jump Optimization (Optprobes) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2002, 2004 | ||
19 | * Copyright (C) Hitachi Ltd., 2012 | ||
20 | */ | ||
21 | #include <linux/kprobes.h> | ||
22 | #include <linux/ptrace.h> | ||
23 | #include <linux/string.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/hardirq.h> | ||
26 | #include <linux/preempt.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/kdebug.h> | ||
29 | #include <linux/kallsyms.h> | ||
30 | #include <linux/ftrace.h> | ||
31 | |||
32 | #include <asm/cacheflush.h> | ||
33 | #include <asm/desc.h> | ||
34 | #include <asm/pgtable.h> | ||
35 | #include <asm/uaccess.h> | ||
36 | #include <asm/alternative.h> | ||
37 | #include <asm/insn.h> | ||
38 | #include <asm/debugreg.h> | ||
39 | |||
40 | #include "kprobes-common.h" | ||
41 | |||
42 | unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) | ||
43 | { | ||
44 | struct optimized_kprobe *op; | ||
45 | struct kprobe *kp; | ||
46 | long offs; | ||
47 | int i; | ||
48 | |||
49 | for (i = 0; i < RELATIVEJUMP_SIZE; i++) { | ||
50 | kp = get_kprobe((void *)addr - i); | ||
51 | /* This function only handles jump-optimized kprobe */ | ||
52 | if (kp && kprobe_optimized(kp)) { | ||
53 | op = container_of(kp, struct optimized_kprobe, kp); | ||
54 | /* If op->list is not empty, op is under optimizing */ | ||
55 | if (list_empty(&op->list)) | ||
56 | goto found; | ||
57 | } | ||
58 | } | ||
59 | |||
60 | return addr; | ||
61 | found: | ||
62 | /* | ||
63 | * If the kprobe can be optimized, original bytes which can be | ||
64 | * overwritten by jump destination address. In this case, original | ||
65 | * bytes must be recovered from op->optinsn.copied_insn buffer. | ||
66 | */ | ||
67 | memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
68 | if (addr == (unsigned long)kp->addr) { | ||
69 | buf[0] = kp->opcode; | ||
70 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
71 | } else { | ||
72 | offs = addr - (unsigned long)kp->addr - 1; | ||
73 | memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); | ||
74 | } | ||
75 | |||
76 | return (unsigned long)buf; | ||
77 | } | ||
78 | |||
79 | /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ | ||
80 | static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) | ||
81 | { | ||
82 | #ifdef CONFIG_X86_64 | ||
83 | *addr++ = 0x48; | ||
84 | *addr++ = 0xbf; | ||
85 | #else | ||
86 | *addr++ = 0xb8; | ||
87 | #endif | ||
88 | *(unsigned long *)addr = val; | ||
89 | } | ||
90 | |||
91 | static void __used __kprobes kprobes_optinsn_template_holder(void) | ||
92 | { | ||
93 | asm volatile ( | ||
94 | ".global optprobe_template_entry\n" | ||
95 | "optprobe_template_entry:\n" | ||
96 | #ifdef CONFIG_X86_64 | ||
97 | /* We don't bother saving the ss register */ | ||
98 | " pushq %rsp\n" | ||
99 | " pushfq\n" | ||
100 | SAVE_REGS_STRING | ||
101 | " movq %rsp, %rsi\n" | ||
102 | ".global optprobe_template_val\n" | ||
103 | "optprobe_template_val:\n" | ||
104 | ASM_NOP5 | ||
105 | ASM_NOP5 | ||
106 | ".global optprobe_template_call\n" | ||
107 | "optprobe_template_call:\n" | ||
108 | ASM_NOP5 | ||
109 | /* Move flags to rsp */ | ||
110 | " movq 144(%rsp), %rdx\n" | ||
111 | " movq %rdx, 152(%rsp)\n" | ||
112 | RESTORE_REGS_STRING | ||
113 | /* Skip flags entry */ | ||
114 | " addq $8, %rsp\n" | ||
115 | " popfq\n" | ||
116 | #else /* CONFIG_X86_32 */ | ||
117 | " pushf\n" | ||
118 | SAVE_REGS_STRING | ||
119 | " movl %esp, %edx\n" | ||
120 | ".global optprobe_template_val\n" | ||
121 | "optprobe_template_val:\n" | ||
122 | ASM_NOP5 | ||
123 | ".global optprobe_template_call\n" | ||
124 | "optprobe_template_call:\n" | ||
125 | ASM_NOP5 | ||
126 | RESTORE_REGS_STRING | ||
127 | " addl $4, %esp\n" /* skip cs */ | ||
128 | " popf\n" | ||
129 | #endif | ||
130 | ".global optprobe_template_end\n" | ||
131 | "optprobe_template_end:\n"); | ||
132 | } | ||
133 | |||
134 | #define TMPL_MOVE_IDX \ | ||
135 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) | ||
136 | #define TMPL_CALL_IDX \ | ||
137 | ((long)&optprobe_template_call - (long)&optprobe_template_entry) | ||
138 | #define TMPL_END_IDX \ | ||
139 | ((long)&optprobe_template_end - (long)&optprobe_template_entry) | ||
140 | |||
141 | #define INT3_SIZE sizeof(kprobe_opcode_t) | ||
142 | |||
143 | /* Optimized kprobe call back function: called from optinsn */ | ||
144 | static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) | ||
145 | { | ||
146 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
147 | unsigned long flags; | ||
148 | |||
149 | /* This is possible if op is under delayed unoptimizing */ | ||
150 | if (kprobe_disabled(&op->kp)) | ||
151 | return; | ||
152 | |||
153 | local_irq_save(flags); | ||
154 | if (kprobe_running()) { | ||
155 | kprobes_inc_nmissed_count(&op->kp); | ||
156 | } else { | ||
157 | /* Save skipped registers */ | ||
158 | #ifdef CONFIG_X86_64 | ||
159 | regs->cs = __KERNEL_CS; | ||
160 | #else | ||
161 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
162 | regs->gs = 0; | ||
163 | #endif | ||
164 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; | ||
165 | regs->orig_ax = ~0UL; | ||
166 | |||
167 | __this_cpu_write(current_kprobe, &op->kp); | ||
168 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
169 | opt_pre_handler(&op->kp, regs); | ||
170 | __this_cpu_write(current_kprobe, NULL); | ||
171 | } | ||
172 | local_irq_restore(flags); | ||
173 | } | ||
174 | |||
175 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | ||
176 | { | ||
177 | int len = 0, ret; | ||
178 | |||
179 | while (len < RELATIVEJUMP_SIZE) { | ||
180 | ret = __copy_instruction(dest + len, src + len); | ||
181 | if (!ret || !can_boost(dest + len)) | ||
182 | return -EINVAL; | ||
183 | len += ret; | ||
184 | } | ||
185 | /* Check whether the address range is reserved */ | ||
186 | if (ftrace_text_reserved(src, src + len - 1) || | ||
187 | alternatives_text_reserved(src, src + len - 1) || | ||
188 | jump_label_text_reserved(src, src + len - 1)) | ||
189 | return -EBUSY; | ||
190 | |||
191 | return len; | ||
192 | } | ||
193 | |||
194 | /* Check whether insn is indirect jump */ | ||
195 | static int __kprobes insn_is_indirect_jump(struct insn *insn) | ||
196 | { | ||
197 | return ((insn->opcode.bytes[0] == 0xff && | ||
198 | (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ | ||
199 | insn->opcode.bytes[0] == 0xea); /* Segment based jump */ | ||
200 | } | ||
201 | |||
202 | /* Check whether insn jumps into specified address range */ | ||
203 | static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | ||
204 | { | ||
205 | unsigned long target = 0; | ||
206 | |||
207 | switch (insn->opcode.bytes[0]) { | ||
208 | case 0xe0: /* loopne */ | ||
209 | case 0xe1: /* loope */ | ||
210 | case 0xe2: /* loop */ | ||
211 | case 0xe3: /* jcxz */ | ||
212 | case 0xe9: /* near relative jump */ | ||
213 | case 0xeb: /* short relative jump */ | ||
214 | break; | ||
215 | case 0x0f: | ||
216 | if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ | ||
217 | break; | ||
218 | return 0; | ||
219 | default: | ||
220 | if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ | ||
221 | break; | ||
222 | return 0; | ||
223 | } | ||
224 | target = (unsigned long)insn->next_byte + insn->immediate.value; | ||
225 | |||
226 | return (start <= target && target <= start + len); | ||
227 | } | ||
228 | |||
229 | /* Decode whole function to ensure any instructions don't jump into target */ | ||
230 | static int __kprobes can_optimize(unsigned long paddr) | ||
231 | { | ||
232 | unsigned long addr, size = 0, offset = 0; | ||
233 | struct insn insn; | ||
234 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
235 | |||
236 | /* Lookup symbol including addr */ | ||
237 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) | ||
238 | return 0; | ||
239 | |||
240 | /* | ||
241 | * Do not optimize in the entry code due to the unstable | ||
242 | * stack handling. | ||
243 | */ | ||
244 | if ((paddr >= (unsigned long)__entry_text_start) && | ||
245 | (paddr < (unsigned long)__entry_text_end)) | ||
246 | return 0; | ||
247 | |||
248 | /* Check there is enough space for a relative jump. */ | ||
249 | if (size - offset < RELATIVEJUMP_SIZE) | ||
250 | return 0; | ||
251 | |||
252 | /* Decode instructions */ | ||
253 | addr = paddr - offset; | ||
254 | while (addr < paddr - offset + size) { /* Decode until function end */ | ||
255 | if (search_exception_tables(addr)) | ||
256 | /* | ||
257 | * Since some fixup code will jumps into this function, | ||
258 | * we can't optimize kprobe in this function. | ||
259 | */ | ||
260 | return 0; | ||
261 | kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); | ||
262 | insn_get_length(&insn); | ||
263 | /* Another subsystem puts a breakpoint */ | ||
264 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
265 | return 0; | ||
266 | /* Recover address */ | ||
267 | insn.kaddr = (void *)addr; | ||
268 | insn.next_byte = (void *)(addr + insn.length); | ||
269 | /* Check any instructions don't jump into target */ | ||
270 | if (insn_is_indirect_jump(&insn) || | ||
271 | insn_jump_into_range(&insn, paddr + INT3_SIZE, | ||
272 | RELATIVE_ADDR_SIZE)) | ||
273 | return 0; | ||
274 | addr += insn.length; | ||
275 | } | ||
276 | |||
277 | return 1; | ||
278 | } | ||
279 | |||
280 | /* Check optimized_kprobe can actually be optimized. */ | ||
281 | int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) | ||
282 | { | ||
283 | int i; | ||
284 | struct kprobe *p; | ||
285 | |||
286 | for (i = 1; i < op->optinsn.size; i++) { | ||
287 | p = get_kprobe(op->kp.addr + i); | ||
288 | if (p && !kprobe_disabled(p)) | ||
289 | return -EEXIST; | ||
290 | } | ||
291 | |||
292 | return 0; | ||
293 | } | ||
294 | |||
295 | /* Check the addr is within the optimized instructions. */ | ||
296 | int __kprobes | ||
297 | arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) | ||
298 | { | ||
299 | return ((unsigned long)op->kp.addr <= addr && | ||
300 | (unsigned long)op->kp.addr + op->optinsn.size > addr); | ||
301 | } | ||
302 | |||
303 | /* Free optimized instruction slot */ | ||
304 | static __kprobes | ||
305 | void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) | ||
306 | { | ||
307 | if (op->optinsn.insn) { | ||
308 | free_optinsn_slot(op->optinsn.insn, dirty); | ||
309 | op->optinsn.insn = NULL; | ||
310 | op->optinsn.size = 0; | ||
311 | } | ||
312 | } | ||
313 | |||
314 | void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) | ||
315 | { | ||
316 | __arch_remove_optimized_kprobe(op, 1); | ||
317 | } | ||
318 | |||
319 | /* | ||
320 | * Copy replacing target instructions | ||
321 | * Target instructions MUST be relocatable (checked inside) | ||
322 | * This is called when new aggr(opt)probe is allocated or reused. | ||
323 | */ | ||
324 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | ||
325 | { | ||
326 | u8 *buf; | ||
327 | int ret; | ||
328 | long rel; | ||
329 | |||
330 | if (!can_optimize((unsigned long)op->kp.addr)) | ||
331 | return -EILSEQ; | ||
332 | |||
333 | op->optinsn.insn = get_optinsn_slot(); | ||
334 | if (!op->optinsn.insn) | ||
335 | return -ENOMEM; | ||
336 | |||
337 | /* | ||
338 | * Verify if the address gap is in 2GB range, because this uses | ||
339 | * a relative jump. | ||
340 | */ | ||
341 | rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; | ||
342 | if (abs(rel) > 0x7fffffff) | ||
343 | return -ERANGE; | ||
344 | |||
345 | buf = (u8 *)op->optinsn.insn; | ||
346 | |||
347 | /* Copy instructions into the out-of-line buffer */ | ||
348 | ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); | ||
349 | if (ret < 0) { | ||
350 | __arch_remove_optimized_kprobe(op, 0); | ||
351 | return ret; | ||
352 | } | ||
353 | op->optinsn.size = ret; | ||
354 | |||
355 | /* Copy arch-dep-instance from template */ | ||
356 | memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); | ||
357 | |||
358 | /* Set probe information */ | ||
359 | synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); | ||
360 | |||
361 | /* Set probe function call */ | ||
362 | synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); | ||
363 | |||
364 | /* Set returning jmp instruction at the tail of out-of-line buffer */ | ||
365 | synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, | ||
366 | (u8 *)op->kp.addr + op->optinsn.size); | ||
367 | |||
368 | flush_icache_range((unsigned long) buf, | ||
369 | (unsigned long) buf + TMPL_END_IDX + | ||
370 | op->optinsn.size + RELATIVEJUMP_SIZE); | ||
371 | return 0; | ||
372 | } | ||
373 | |||
374 | #define MAX_OPTIMIZE_PROBES 256 | ||
375 | static struct text_poke_param *jump_poke_params; | ||
376 | static struct jump_poke_buffer { | ||
377 | u8 buf[RELATIVEJUMP_SIZE]; | ||
378 | } *jump_poke_bufs; | ||
379 | |||
380 | static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, | ||
381 | u8 *insn_buf, | ||
382 | struct optimized_kprobe *op) | ||
383 | { | ||
384 | s32 rel = (s32)((long)op->optinsn.insn - | ||
385 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | ||
386 | |||
387 | /* Backup instructions which will be replaced by jump address */ | ||
388 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | ||
389 | RELATIVE_ADDR_SIZE); | ||
390 | |||
391 | insn_buf[0] = RELATIVEJUMP_OPCODE; | ||
392 | *(s32 *)(&insn_buf[1]) = rel; | ||
393 | |||
394 | tprm->addr = op->kp.addr; | ||
395 | tprm->opcode = insn_buf; | ||
396 | tprm->len = RELATIVEJUMP_SIZE; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Replace breakpoints (int3) with relative jumps. | ||
401 | * Caller must call with locking kprobe_mutex and text_mutex. | ||
402 | */ | ||
403 | void __kprobes arch_optimize_kprobes(struct list_head *oplist) | ||
404 | { | ||
405 | struct optimized_kprobe *op, *tmp; | ||
406 | int c = 0; | ||
407 | |||
408 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
409 | WARN_ON(kprobe_disabled(&op->kp)); | ||
410 | /* Setup param */ | ||
411 | setup_optimize_kprobe(&jump_poke_params[c], | ||
412 | jump_poke_bufs[c].buf, op); | ||
413 | list_del_init(&op->list); | ||
414 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
415 | break; | ||
416 | } | ||
417 | |||
418 | /* | ||
419 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
420 | * However, since kprobes itself also doesn't support NMI/MCE | ||
421 | * code probing, it's not a problem. | ||
422 | */ | ||
423 | text_poke_smp_batch(jump_poke_params, c); | ||
424 | } | ||
425 | |||
426 | static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, | ||
427 | u8 *insn_buf, | ||
428 | struct optimized_kprobe *op) | ||
429 | { | ||
430 | /* Set int3 to first byte for kprobes */ | ||
431 | insn_buf[0] = BREAKPOINT_INSTRUCTION; | ||
432 | memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
433 | |||
434 | tprm->addr = op->kp.addr; | ||
435 | tprm->opcode = insn_buf; | ||
436 | tprm->len = RELATIVEJUMP_SIZE; | ||
437 | } | ||
438 | |||
439 | /* | ||
440 | * Recover original instructions and breakpoints from relative jumps. | ||
441 | * Caller must call with locking kprobe_mutex. | ||
442 | */ | ||
443 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | ||
444 | struct list_head *done_list) | ||
445 | { | ||
446 | struct optimized_kprobe *op, *tmp; | ||
447 | int c = 0; | ||
448 | |||
449 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
450 | /* Setup param */ | ||
451 | setup_unoptimize_kprobe(&jump_poke_params[c], | ||
452 | jump_poke_bufs[c].buf, op); | ||
453 | list_move(&op->list, done_list); | ||
454 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
455 | break; | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
460 | * However, since kprobes itself also doesn't support NMI/MCE | ||
461 | * code probing, it's not a problem. | ||
462 | */ | ||
463 | text_poke_smp_batch(jump_poke_params, c); | ||
464 | } | ||
465 | |||
466 | /* Replace a relative jump with a breakpoint (int3). */ | ||
467 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) | ||
468 | { | ||
469 | u8 buf[RELATIVEJUMP_SIZE]; | ||
470 | |||
471 | /* Set int3 to first byte for kprobes */ | ||
472 | buf[0] = BREAKPOINT_INSTRUCTION; | ||
473 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
474 | text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); | ||
475 | } | ||
476 | |||
477 | int __kprobes | ||
478 | setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) | ||
479 | { | ||
480 | struct optimized_kprobe *op; | ||
481 | |||
482 | if (p->flags & KPROBE_FLAG_OPTIMIZED) { | ||
483 | /* This kprobe is really able to run optimized path. */ | ||
484 | op = container_of(p, struct optimized_kprobe, kp); | ||
485 | /* Detour through copied instructions */ | ||
486 | regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; | ||
487 | if (!reenter) | ||
488 | reset_current_kprobe(); | ||
489 | preempt_enable_no_resched(); | ||
490 | return 1; | ||
491 | } | ||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | int __kprobes arch_init_optprobes(void) | ||
496 | { | ||
497 | /* Allocate code buffer and parameter array */ | ||
498 | jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * | ||
499 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
500 | if (!jump_poke_bufs) | ||
501 | return -ENOMEM; | ||
502 | |||
503 | jump_poke_params = kmalloc(sizeof(struct text_poke_param) * | ||
504 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
505 | if (!jump_poke_params) { | ||
506 | kfree(jump_poke_bufs); | ||
507 | jump_poke_bufs = NULL; | ||
508 | return -ENOMEM; | ||
509 | } | ||
510 | |||
511 | return 0; | ||
512 | } | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7da647d8b64c..e213fc8408d2 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -30,16 +30,15 @@ | |||
30 | * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi | 30 | * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi |
31 | * <prasanna@in.ibm.com> added function-return probes. | 31 | * <prasanna@in.ibm.com> added function-return probes. |
32 | * 2005-May Rusty Lynch <rusty.lynch@intel.com> | 32 | * 2005-May Rusty Lynch <rusty.lynch@intel.com> |
33 | * Added function return probes functionality | 33 | * Added function return probes functionality |
34 | * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added | 34 | * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added |
35 | * kprobe-booster and kretprobe-booster for i386. | 35 | * kprobe-booster and kretprobe-booster for i386. |
36 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster | 36 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster |
37 | * and kretprobe-booster for x86-64 | 37 | * and kretprobe-booster for x86-64 |
38 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven | 38 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven |
39 | * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> | 39 | * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> |
40 | * unified x86 kprobes code. | 40 | * unified x86 kprobes code. |
41 | */ | 41 | */ |
42 | |||
43 | #include <linux/kprobes.h> | 42 | #include <linux/kprobes.h> |
44 | #include <linux/ptrace.h> | 43 | #include <linux/ptrace.h> |
45 | #include <linux/string.h> | 44 | #include <linux/string.h> |
@@ -59,6 +58,8 @@ | |||
59 | #include <asm/insn.h> | 58 | #include <asm/insn.h> |
60 | #include <asm/debugreg.h> | 59 | #include <asm/debugreg.h> |
61 | 60 | ||
61 | #include "kprobes-common.h" | ||
62 | |||
62 | void jprobe_return_end(void); | 63 | void jprobe_return_end(void); |
63 | 64 | ||
64 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | 65 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; |
@@ -108,6 +109,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { | |||
108 | doesn't switch kernel stack.*/ | 109 | doesn't switch kernel stack.*/ |
109 | {NULL, NULL} /* Terminator */ | 110 | {NULL, NULL} /* Terminator */ |
110 | }; | 111 | }; |
112 | |||
111 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); | 113 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); |
112 | 114 | ||
113 | static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) | 115 | static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) |
@@ -123,11 +125,17 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) | |||
123 | } | 125 | } |
124 | 126 | ||
125 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | 127 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ |
126 | static void __kprobes synthesize_reljump(void *from, void *to) | 128 | void __kprobes synthesize_reljump(void *from, void *to) |
127 | { | 129 | { |
128 | __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); | 130 | __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); |
129 | } | 131 | } |
130 | 132 | ||
133 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ | ||
134 | void __kprobes synthesize_relcall(void *from, void *to) | ||
135 | { | ||
136 | __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); | ||
137 | } | ||
138 | |||
131 | /* | 139 | /* |
132 | * Skip the prefixes of the instruction. | 140 | * Skip the prefixes of the instruction. |
133 | */ | 141 | */ |
@@ -151,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) | |||
151 | * Returns non-zero if opcode is boostable. | 159 | * Returns non-zero if opcode is boostable. |
152 | * RIP relative instructions are adjusted at copying time in 64 bits mode | 160 | * RIP relative instructions are adjusted at copying time in 64 bits mode |
153 | */ | 161 | */ |
154 | static int __kprobes can_boost(kprobe_opcode_t *opcodes) | 162 | int __kprobes can_boost(kprobe_opcode_t *opcodes) |
155 | { | 163 | { |
156 | kprobe_opcode_t opcode; | 164 | kprobe_opcode_t opcode; |
157 | kprobe_opcode_t *orig_opcodes = opcodes; | 165 | kprobe_opcode_t *orig_opcodes = opcodes; |
@@ -207,13 +215,15 @@ retry: | |||
207 | } | 215 | } |
208 | } | 216 | } |
209 | 217 | ||
210 | /* Recover the probed instruction at addr for further analysis. */ | 218 | static unsigned long |
211 | static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | 219 | __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) |
212 | { | 220 | { |
213 | struct kprobe *kp; | 221 | struct kprobe *kp; |
222 | |||
214 | kp = get_kprobe((void *)addr); | 223 | kp = get_kprobe((void *)addr); |
224 | /* There is no probe, return original address */ | ||
215 | if (!kp) | 225 | if (!kp) |
216 | return -EINVAL; | 226 | return addr; |
217 | 227 | ||
218 | /* | 228 | /* |
219 | * Basically, kp->ainsn.insn has an original instruction. | 229 | * Basically, kp->ainsn.insn has an original instruction. |
@@ -230,14 +240,29 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | |||
230 | */ | 240 | */ |
231 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | 241 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); |
232 | buf[0] = kp->opcode; | 242 | buf[0] = kp->opcode; |
233 | return 0; | 243 | return (unsigned long)buf; |
244 | } | ||
245 | |||
246 | /* | ||
247 | * Recover the probed instruction at addr for further analysis. | ||
248 | * Caller must lock kprobes by kprobe_mutex, or disable preemption | ||
249 | * for preventing to release referencing kprobes. | ||
250 | */ | ||
251 | unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | ||
252 | { | ||
253 | unsigned long __addr; | ||
254 | |||
255 | __addr = __recover_optprobed_insn(buf, addr); | ||
256 | if (__addr != addr) | ||
257 | return __addr; | ||
258 | |||
259 | return __recover_probed_insn(buf, addr); | ||
234 | } | 260 | } |
235 | 261 | ||
236 | /* Check if paddr is at an instruction boundary */ | 262 | /* Check if paddr is at an instruction boundary */ |
237 | static int __kprobes can_probe(unsigned long paddr) | 263 | static int __kprobes can_probe(unsigned long paddr) |
238 | { | 264 | { |
239 | int ret; | 265 | unsigned long addr, __addr, offset = 0; |
240 | unsigned long addr, offset = 0; | ||
241 | struct insn insn; | 266 | struct insn insn; |
242 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 267 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
243 | 268 | ||
@@ -247,26 +272,24 @@ static int __kprobes can_probe(unsigned long paddr) | |||
247 | /* Decode instructions */ | 272 | /* Decode instructions */ |
248 | addr = paddr - offset; | 273 | addr = paddr - offset; |
249 | while (addr < paddr) { | 274 | while (addr < paddr) { |
250 | kernel_insn_init(&insn, (void *)addr); | ||
251 | insn_get_opcode(&insn); | ||
252 | |||
253 | /* | 275 | /* |
254 | * Check if the instruction has been modified by another | 276 | * Check if the instruction has been modified by another |
255 | * kprobe, in which case we replace the breakpoint by the | 277 | * kprobe, in which case we replace the breakpoint by the |
256 | * original instruction in our buffer. | 278 | * original instruction in our buffer. |
279 | * Also, jump optimization will change the breakpoint to | ||
280 | * relative-jump. Since the relative-jump itself is | ||
281 | * normally used, we just go through if there is no kprobe. | ||
257 | */ | 282 | */ |
258 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | 283 | __addr = recover_probed_instruction(buf, addr); |
259 | ret = recover_probed_instruction(buf, addr); | 284 | kernel_insn_init(&insn, (void *)__addr); |
260 | if (ret) | ||
261 | /* | ||
262 | * Another debugging subsystem might insert | ||
263 | * this breakpoint. In that case, we can't | ||
264 | * recover it. | ||
265 | */ | ||
266 | return 0; | ||
267 | kernel_insn_init(&insn, buf); | ||
268 | } | ||
269 | insn_get_length(&insn); | 285 | insn_get_length(&insn); |
286 | |||
287 | /* | ||
288 | * Another debugging subsystem might insert this breakpoint. | ||
289 | * In that case, we can't recover it. | ||
290 | */ | ||
291 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
292 | return 0; | ||
270 | addr += insn.length; | 293 | addr += insn.length; |
271 | } | 294 | } |
272 | 295 | ||
@@ -299,24 +322,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
299 | * If not, return null. | 322 | * If not, return null. |
300 | * Only applicable to 64-bit x86. | 323 | * Only applicable to 64-bit x86. |
301 | */ | 324 | */ |
302 | static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | 325 | int __kprobes __copy_instruction(u8 *dest, u8 *src) |
303 | { | 326 | { |
304 | struct insn insn; | 327 | struct insn insn; |
305 | int ret; | ||
306 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 328 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
307 | 329 | ||
308 | kernel_insn_init(&insn, src); | 330 | kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); |
309 | if (recover) { | ||
310 | insn_get_opcode(&insn); | ||
311 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
312 | ret = recover_probed_instruction(buf, | ||
313 | (unsigned long)src); | ||
314 | if (ret) | ||
315 | return 0; | ||
316 | kernel_insn_init(&insn, buf); | ||
317 | } | ||
318 | } | ||
319 | insn_get_length(&insn); | 331 | insn_get_length(&insn); |
332 | /* Another subsystem puts a breakpoint, failed to recover */ | ||
333 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
334 | return 0; | ||
320 | memcpy(dest, insn.kaddr, insn.length); | 335 | memcpy(dest, insn.kaddr, insn.length); |
321 | 336 | ||
322 | #ifdef CONFIG_X86_64 | 337 | #ifdef CONFIG_X86_64 |
@@ -337,8 +352,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | |||
337 | * extension of the original signed 32-bit displacement would | 352 | * extension of the original signed 32-bit displacement would |
338 | * have given. | 353 | * have given. |
339 | */ | 354 | */ |
340 | newdisp = (u8 *) src + (s64) insn.displacement.value - | 355 | newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; |
341 | (u8 *) dest; | ||
342 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ | 356 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
343 | disp = (u8 *) dest + insn_offset_displacement(&insn); | 357 | disp = (u8 *) dest + insn_offset_displacement(&insn); |
344 | *(s32 *) disp = (s32) newdisp; | 358 | *(s32 *) disp = (s32) newdisp; |
@@ -349,18 +363,20 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | |||
349 | 363 | ||
350 | static void __kprobes arch_copy_kprobe(struct kprobe *p) | 364 | static void __kprobes arch_copy_kprobe(struct kprobe *p) |
351 | { | 365 | { |
366 | /* Copy an instruction with recovering if other optprobe modifies it.*/ | ||
367 | __copy_instruction(p->ainsn.insn, p->addr); | ||
368 | |||
352 | /* | 369 | /* |
353 | * Copy an instruction without recovering int3, because it will be | 370 | * __copy_instruction can modify the displacement of the instruction, |
354 | * put by another subsystem. | 371 | * but it doesn't affect boostable check. |
355 | */ | 372 | */ |
356 | __copy_instruction(p->ainsn.insn, p->addr, 0); | 373 | if (can_boost(p->ainsn.insn)) |
357 | |||
358 | if (can_boost(p->addr)) | ||
359 | p->ainsn.boostable = 0; | 374 | p->ainsn.boostable = 0; |
360 | else | 375 | else |
361 | p->ainsn.boostable = -1; | 376 | p->ainsn.boostable = -1; |
362 | 377 | ||
363 | p->opcode = *p->addr; | 378 | /* Also, displacement change doesn't affect the first byte */ |
379 | p->opcode = p->ainsn.insn[0]; | ||
364 | } | 380 | } |
365 | 381 | ||
366 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 382 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
@@ -442,8 +458,8 @@ static void __kprobes restore_btf(void) | |||
442 | } | 458 | } |
443 | } | 459 | } |
444 | 460 | ||
445 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 461 | void __kprobes |
446 | struct pt_regs *regs) | 462 | arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) |
447 | { | 463 | { |
448 | unsigned long *sara = stack_addr(regs); | 464 | unsigned long *sara = stack_addr(regs); |
449 | 465 | ||
@@ -453,16 +469,8 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | |||
453 | *sara = (unsigned long) &kretprobe_trampoline; | 469 | *sara = (unsigned long) &kretprobe_trampoline; |
454 | } | 470 | } |
455 | 471 | ||
456 | #ifdef CONFIG_OPTPROBES | 472 | static void __kprobes |
457 | static int __kprobes setup_detour_execution(struct kprobe *p, | 473 | setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) |
458 | struct pt_regs *regs, | ||
459 | int reenter); | ||
460 | #else | ||
461 | #define setup_detour_execution(p, regs, reenter) (0) | ||
462 | #endif | ||
463 | |||
464 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | ||
465 | struct kprobe_ctlblk *kcb, int reenter) | ||
466 | { | 474 | { |
467 | if (setup_detour_execution(p, regs, reenter)) | 475 | if (setup_detour_execution(p, regs, reenter)) |
468 | return; | 476 | return; |
@@ -504,8 +512,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | |||
504 | * within the handler. We save the original kprobes variables and just single | 512 | * within the handler. We save the original kprobes variables and just single |
505 | * step on the instruction of the new probe without calling any user handlers. | 513 | * step on the instruction of the new probe without calling any user handlers. |
506 | */ | 514 | */ |
507 | static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | 515 | static int __kprobes |
508 | struct kprobe_ctlblk *kcb) | 516 | reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) |
509 | { | 517 | { |
510 | switch (kcb->kprobe_status) { | 518 | switch (kcb->kprobe_status) { |
511 | case KPROBE_HIT_SSDONE: | 519 | case KPROBE_HIT_SSDONE: |
@@ -600,69 +608,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
600 | return 0; | 608 | return 0; |
601 | } | 609 | } |
602 | 610 | ||
603 | #ifdef CONFIG_X86_64 | ||
604 | #define SAVE_REGS_STRING \ | ||
605 | /* Skip cs, ip, orig_ax. */ \ | ||
606 | " subq $24, %rsp\n" \ | ||
607 | " pushq %rdi\n" \ | ||
608 | " pushq %rsi\n" \ | ||
609 | " pushq %rdx\n" \ | ||
610 | " pushq %rcx\n" \ | ||
611 | " pushq %rax\n" \ | ||
612 | " pushq %r8\n" \ | ||
613 | " pushq %r9\n" \ | ||
614 | " pushq %r10\n" \ | ||
615 | " pushq %r11\n" \ | ||
616 | " pushq %rbx\n" \ | ||
617 | " pushq %rbp\n" \ | ||
618 | " pushq %r12\n" \ | ||
619 | " pushq %r13\n" \ | ||
620 | " pushq %r14\n" \ | ||
621 | " pushq %r15\n" | ||
622 | #define RESTORE_REGS_STRING \ | ||
623 | " popq %r15\n" \ | ||
624 | " popq %r14\n" \ | ||
625 | " popq %r13\n" \ | ||
626 | " popq %r12\n" \ | ||
627 | " popq %rbp\n" \ | ||
628 | " popq %rbx\n" \ | ||
629 | " popq %r11\n" \ | ||
630 | " popq %r10\n" \ | ||
631 | " popq %r9\n" \ | ||
632 | " popq %r8\n" \ | ||
633 | " popq %rax\n" \ | ||
634 | " popq %rcx\n" \ | ||
635 | " popq %rdx\n" \ | ||
636 | " popq %rsi\n" \ | ||
637 | " popq %rdi\n" \ | ||
638 | /* Skip orig_ax, ip, cs */ \ | ||
639 | " addq $24, %rsp\n" | ||
640 | #else | ||
641 | #define SAVE_REGS_STRING \ | ||
642 | /* Skip cs, ip, orig_ax and gs. */ \ | ||
643 | " subl $16, %esp\n" \ | ||
644 | " pushl %fs\n" \ | ||
645 | " pushl %es\n" \ | ||
646 | " pushl %ds\n" \ | ||
647 | " pushl %eax\n" \ | ||
648 | " pushl %ebp\n" \ | ||
649 | " pushl %edi\n" \ | ||
650 | " pushl %esi\n" \ | ||
651 | " pushl %edx\n" \ | ||
652 | " pushl %ecx\n" \ | ||
653 | " pushl %ebx\n" | ||
654 | #define RESTORE_REGS_STRING \ | ||
655 | " popl %ebx\n" \ | ||
656 | " popl %ecx\n" \ | ||
657 | " popl %edx\n" \ | ||
658 | " popl %esi\n" \ | ||
659 | " popl %edi\n" \ | ||
660 | " popl %ebp\n" \ | ||
661 | " popl %eax\n" \ | ||
662 | /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ | ||
663 | " addl $24, %esp\n" | ||
664 | #endif | ||
665 | |||
666 | /* | 611 | /* |
667 | * When a retprobed function returns, this code saves registers and | 612 | * When a retprobed function returns, this code saves registers and |
668 | * calls trampoline_handler() runs, which calls the kretprobe's handler. | 613 | * calls trampoline_handler() runs, which calls the kretprobe's handler. |
@@ -816,8 +761,8 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
816 | * jump instruction after the copied instruction, that jumps to the next | 761 | * jump instruction after the copied instruction, that jumps to the next |
817 | * instruction after the probepoint. | 762 | * instruction after the probepoint. |
818 | */ | 763 | */ |
819 | static void __kprobes resume_execution(struct kprobe *p, | 764 | static void __kprobes |
820 | struct pt_regs *regs, struct kprobe_ctlblk *kcb) | 765 | resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) |
821 | { | 766 | { |
822 | unsigned long *tos = stack_addr(regs); | 767 | unsigned long *tos = stack_addr(regs); |
823 | unsigned long copy_ip = (unsigned long)p->ainsn.insn; | 768 | unsigned long copy_ip = (unsigned long)p->ainsn.insn; |
@@ -996,8 +941,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) | |||
996 | /* | 941 | /* |
997 | * Wrapper routine for handling exceptions. | 942 | * Wrapper routine for handling exceptions. |
998 | */ | 943 | */ |
999 | int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | 944 | int __kprobes |
1000 | unsigned long val, void *data) | 945 | kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) |
1001 | { | 946 | { |
1002 | struct die_args *args = data; | 947 | struct die_args *args = data; |
1003 | int ret = NOTIFY_DONE; | 948 | int ret = NOTIFY_DONE; |
@@ -1107,466 +1052,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
1107 | return 0; | 1052 | return 0; |
1108 | } | 1053 | } |
1109 | 1054 | ||
1110 | |||
1111 | #ifdef CONFIG_OPTPROBES | ||
1112 | |||
1113 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ | ||
1114 | static void __kprobes synthesize_relcall(void *from, void *to) | ||
1115 | { | ||
1116 | __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); | ||
1117 | } | ||
1118 | |||
1119 | /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ | ||
1120 | static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, | ||
1121 | unsigned long val) | ||
1122 | { | ||
1123 | #ifdef CONFIG_X86_64 | ||
1124 | *addr++ = 0x48; | ||
1125 | *addr++ = 0xbf; | ||
1126 | #else | ||
1127 | *addr++ = 0xb8; | ||
1128 | #endif | ||
1129 | *(unsigned long *)addr = val; | ||
1130 | } | ||
1131 | |||
1132 | static void __used __kprobes kprobes_optinsn_template_holder(void) | ||
1133 | { | ||
1134 | asm volatile ( | ||
1135 | ".global optprobe_template_entry\n" | ||
1136 | "optprobe_template_entry: \n" | ||
1137 | #ifdef CONFIG_X86_64 | ||
1138 | /* We don't bother saving the ss register */ | ||
1139 | " pushq %rsp\n" | ||
1140 | " pushfq\n" | ||
1141 | SAVE_REGS_STRING | ||
1142 | " movq %rsp, %rsi\n" | ||
1143 | ".global optprobe_template_val\n" | ||
1144 | "optprobe_template_val: \n" | ||
1145 | ASM_NOP5 | ||
1146 | ASM_NOP5 | ||
1147 | ".global optprobe_template_call\n" | ||
1148 | "optprobe_template_call: \n" | ||
1149 | ASM_NOP5 | ||
1150 | /* Move flags to rsp */ | ||
1151 | " movq 144(%rsp), %rdx\n" | ||
1152 | " movq %rdx, 152(%rsp)\n" | ||
1153 | RESTORE_REGS_STRING | ||
1154 | /* Skip flags entry */ | ||
1155 | " addq $8, %rsp\n" | ||
1156 | " popfq\n" | ||
1157 | #else /* CONFIG_X86_32 */ | ||
1158 | " pushf\n" | ||
1159 | SAVE_REGS_STRING | ||
1160 | " movl %esp, %edx\n" | ||
1161 | ".global optprobe_template_val\n" | ||
1162 | "optprobe_template_val: \n" | ||
1163 | ASM_NOP5 | ||
1164 | ".global optprobe_template_call\n" | ||
1165 | "optprobe_template_call: \n" | ||
1166 | ASM_NOP5 | ||
1167 | RESTORE_REGS_STRING | ||
1168 | " addl $4, %esp\n" /* skip cs */ | ||
1169 | " popf\n" | ||
1170 | #endif | ||
1171 | ".global optprobe_template_end\n" | ||
1172 | "optprobe_template_end: \n"); | ||
1173 | } | ||
1174 | |||
1175 | #define TMPL_MOVE_IDX \ | ||
1176 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) | ||
1177 | #define TMPL_CALL_IDX \ | ||
1178 | ((long)&optprobe_template_call - (long)&optprobe_template_entry) | ||
1179 | #define TMPL_END_IDX \ | ||
1180 | ((long)&optprobe_template_end - (long)&optprobe_template_entry) | ||
1181 | |||
1182 | #define INT3_SIZE sizeof(kprobe_opcode_t) | ||
1183 | |||
1184 | /* Optimized kprobe call back function: called from optinsn */ | ||
1185 | static void __kprobes optimized_callback(struct optimized_kprobe *op, | ||
1186 | struct pt_regs *regs) | ||
1187 | { | ||
1188 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
1189 | unsigned long flags; | ||
1190 | |||
1191 | /* This is possible if op is under delayed unoptimizing */ | ||
1192 | if (kprobe_disabled(&op->kp)) | ||
1193 | return; | ||
1194 | |||
1195 | local_irq_save(flags); | ||
1196 | if (kprobe_running()) { | ||
1197 | kprobes_inc_nmissed_count(&op->kp); | ||
1198 | } else { | ||
1199 | /* Save skipped registers */ | ||
1200 | #ifdef CONFIG_X86_64 | ||
1201 | regs->cs = __KERNEL_CS; | ||
1202 | #else | ||
1203 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
1204 | regs->gs = 0; | ||
1205 | #endif | ||
1206 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; | ||
1207 | regs->orig_ax = ~0UL; | ||
1208 | |||
1209 | __this_cpu_write(current_kprobe, &op->kp); | ||
1210 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
1211 | opt_pre_handler(&op->kp, regs); | ||
1212 | __this_cpu_write(current_kprobe, NULL); | ||
1213 | } | ||
1214 | local_irq_restore(flags); | ||
1215 | } | ||
1216 | |||
1217 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | ||
1218 | { | ||
1219 | int len = 0, ret; | ||
1220 | |||
1221 | while (len < RELATIVEJUMP_SIZE) { | ||
1222 | ret = __copy_instruction(dest + len, src + len, 1); | ||
1223 | if (!ret || !can_boost(dest + len)) | ||
1224 | return -EINVAL; | ||
1225 | len += ret; | ||
1226 | } | ||
1227 | /* Check whether the address range is reserved */ | ||
1228 | if (ftrace_text_reserved(src, src + len - 1) || | ||
1229 | alternatives_text_reserved(src, src + len - 1) || | ||
1230 | jump_label_text_reserved(src, src + len - 1)) | ||
1231 | return -EBUSY; | ||
1232 | |||
1233 | return len; | ||
1234 | } | ||
1235 | |||
1236 | /* Check whether insn is indirect jump */ | ||
1237 | static int __kprobes insn_is_indirect_jump(struct insn *insn) | ||
1238 | { | ||
1239 | return ((insn->opcode.bytes[0] == 0xff && | ||
1240 | (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ | ||
1241 | insn->opcode.bytes[0] == 0xea); /* Segment based jump */ | ||
1242 | } | ||
1243 | |||
1244 | /* Check whether insn jumps into specified address range */ | ||
1245 | static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | ||
1246 | { | ||
1247 | unsigned long target = 0; | ||
1248 | |||
1249 | switch (insn->opcode.bytes[0]) { | ||
1250 | case 0xe0: /* loopne */ | ||
1251 | case 0xe1: /* loope */ | ||
1252 | case 0xe2: /* loop */ | ||
1253 | case 0xe3: /* jcxz */ | ||
1254 | case 0xe9: /* near relative jump */ | ||
1255 | case 0xeb: /* short relative jump */ | ||
1256 | break; | ||
1257 | case 0x0f: | ||
1258 | if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ | ||
1259 | break; | ||
1260 | return 0; | ||
1261 | default: | ||
1262 | if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ | ||
1263 | break; | ||
1264 | return 0; | ||
1265 | } | ||
1266 | target = (unsigned long)insn->next_byte + insn->immediate.value; | ||
1267 | |||
1268 | return (start <= target && target <= start + len); | ||
1269 | } | ||
1270 | |||
1271 | /* Decode whole function to ensure any instructions don't jump into target */ | ||
1272 | static int __kprobes can_optimize(unsigned long paddr) | ||
1273 | { | ||
1274 | int ret; | ||
1275 | unsigned long addr, size = 0, offset = 0; | ||
1276 | struct insn insn; | ||
1277 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
1278 | |||
1279 | /* Lookup symbol including addr */ | ||
1280 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) | ||
1281 | return 0; | ||
1282 | |||
1283 | /* | ||
1284 | * Do not optimize in the entry code due to the unstable | ||
1285 | * stack handling. | ||
1286 | */ | ||
1287 | if ((paddr >= (unsigned long )__entry_text_start) && | ||
1288 | (paddr < (unsigned long )__entry_text_end)) | ||
1289 | return 0; | ||
1290 | |||
1291 | /* Check there is enough space for a relative jump. */ | ||
1292 | if (size - offset < RELATIVEJUMP_SIZE) | ||
1293 | return 0; | ||
1294 | |||
1295 | /* Decode instructions */ | ||
1296 | addr = paddr - offset; | ||
1297 | while (addr < paddr - offset + size) { /* Decode until function end */ | ||
1298 | if (search_exception_tables(addr)) | ||
1299 | /* | ||
1300 | * Since some fixup code will jumps into this function, | ||
1301 | * we can't optimize kprobe in this function. | ||
1302 | */ | ||
1303 | return 0; | ||
1304 | kernel_insn_init(&insn, (void *)addr); | ||
1305 | insn_get_opcode(&insn); | ||
1306 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
1307 | ret = recover_probed_instruction(buf, addr); | ||
1308 | if (ret) | ||
1309 | return 0; | ||
1310 | kernel_insn_init(&insn, buf); | ||
1311 | } | ||
1312 | insn_get_length(&insn); | ||
1313 | /* Recover address */ | ||
1314 | insn.kaddr = (void *)addr; | ||
1315 | insn.next_byte = (void *)(addr + insn.length); | ||
1316 | /* Check any instructions don't jump into target */ | ||
1317 | if (insn_is_indirect_jump(&insn) || | ||
1318 | insn_jump_into_range(&insn, paddr + INT3_SIZE, | ||
1319 | RELATIVE_ADDR_SIZE)) | ||
1320 | return 0; | ||
1321 | addr += insn.length; | ||
1322 | } | ||
1323 | |||
1324 | return 1; | ||
1325 | } | ||
1326 | |||
1327 | /* Check optimized_kprobe can actually be optimized. */ | ||
1328 | int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) | ||
1329 | { | ||
1330 | int i; | ||
1331 | struct kprobe *p; | ||
1332 | |||
1333 | for (i = 1; i < op->optinsn.size; i++) { | ||
1334 | p = get_kprobe(op->kp.addr + i); | ||
1335 | if (p && !kprobe_disabled(p)) | ||
1336 | return -EEXIST; | ||
1337 | } | ||
1338 | |||
1339 | return 0; | ||
1340 | } | ||
1341 | |||
1342 | /* Check the addr is within the optimized instructions. */ | ||
1343 | int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op, | ||
1344 | unsigned long addr) | ||
1345 | { | ||
1346 | return ((unsigned long)op->kp.addr <= addr && | ||
1347 | (unsigned long)op->kp.addr + op->optinsn.size > addr); | ||
1348 | } | ||
1349 | |||
1350 | /* Free optimized instruction slot */ | ||
1351 | static __kprobes | ||
1352 | void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) | ||
1353 | { | ||
1354 | if (op->optinsn.insn) { | ||
1355 | free_optinsn_slot(op->optinsn.insn, dirty); | ||
1356 | op->optinsn.insn = NULL; | ||
1357 | op->optinsn.size = 0; | ||
1358 | } | ||
1359 | } | ||
1360 | |||
1361 | void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) | ||
1362 | { | ||
1363 | __arch_remove_optimized_kprobe(op, 1); | ||
1364 | } | ||
1365 | |||
1366 | /* | ||
1367 | * Copy replacing target instructions | ||
1368 | * Target instructions MUST be relocatable (checked inside) | ||
1369 | */ | ||
1370 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | ||
1371 | { | ||
1372 | u8 *buf; | ||
1373 | int ret; | ||
1374 | long rel; | ||
1375 | |||
1376 | if (!can_optimize((unsigned long)op->kp.addr)) | ||
1377 | return -EILSEQ; | ||
1378 | |||
1379 | op->optinsn.insn = get_optinsn_slot(); | ||
1380 | if (!op->optinsn.insn) | ||
1381 | return -ENOMEM; | ||
1382 | |||
1383 | /* | ||
1384 | * Verify if the address gap is in 2GB range, because this uses | ||
1385 | * a relative jump. | ||
1386 | */ | ||
1387 | rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; | ||
1388 | if (abs(rel) > 0x7fffffff) | ||
1389 | return -ERANGE; | ||
1390 | |||
1391 | buf = (u8 *)op->optinsn.insn; | ||
1392 | |||
1393 | /* Copy instructions into the out-of-line buffer */ | ||
1394 | ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); | ||
1395 | if (ret < 0) { | ||
1396 | __arch_remove_optimized_kprobe(op, 0); | ||
1397 | return ret; | ||
1398 | } | ||
1399 | op->optinsn.size = ret; | ||
1400 | |||
1401 | /* Copy arch-dep-instance from template */ | ||
1402 | memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); | ||
1403 | |||
1404 | /* Set probe information */ | ||
1405 | synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); | ||
1406 | |||
1407 | /* Set probe function call */ | ||
1408 | synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); | ||
1409 | |||
1410 | /* Set returning jmp instruction at the tail of out-of-line buffer */ | ||
1411 | synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, | ||
1412 | (u8 *)op->kp.addr + op->optinsn.size); | ||
1413 | |||
1414 | flush_icache_range((unsigned long) buf, | ||
1415 | (unsigned long) buf + TMPL_END_IDX + | ||
1416 | op->optinsn.size + RELATIVEJUMP_SIZE); | ||
1417 | return 0; | ||
1418 | } | ||
1419 | |||
1420 | #define MAX_OPTIMIZE_PROBES 256 | ||
1421 | static struct text_poke_param *jump_poke_params; | ||
1422 | static struct jump_poke_buffer { | ||
1423 | u8 buf[RELATIVEJUMP_SIZE]; | ||
1424 | } *jump_poke_bufs; | ||
1425 | |||
1426 | static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, | ||
1427 | u8 *insn_buf, | ||
1428 | struct optimized_kprobe *op) | ||
1429 | { | ||
1430 | s32 rel = (s32)((long)op->optinsn.insn - | ||
1431 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | ||
1432 | |||
1433 | /* Backup instructions which will be replaced by jump address */ | ||
1434 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | ||
1435 | RELATIVE_ADDR_SIZE); | ||
1436 | |||
1437 | insn_buf[0] = RELATIVEJUMP_OPCODE; | ||
1438 | *(s32 *)(&insn_buf[1]) = rel; | ||
1439 | |||
1440 | tprm->addr = op->kp.addr; | ||
1441 | tprm->opcode = insn_buf; | ||
1442 | tprm->len = RELATIVEJUMP_SIZE; | ||
1443 | } | ||
1444 | |||
1445 | /* | ||
1446 | * Replace breakpoints (int3) with relative jumps. | ||
1447 | * Caller must call with locking kprobe_mutex and text_mutex. | ||
1448 | */ | ||
1449 | void __kprobes arch_optimize_kprobes(struct list_head *oplist) | ||
1450 | { | ||
1451 | struct optimized_kprobe *op, *tmp; | ||
1452 | int c = 0; | ||
1453 | |||
1454 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
1455 | WARN_ON(kprobe_disabled(&op->kp)); | ||
1456 | /* Setup param */ | ||
1457 | setup_optimize_kprobe(&jump_poke_params[c], | ||
1458 | jump_poke_bufs[c].buf, op); | ||
1459 | list_del_init(&op->list); | ||
1460 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
1461 | break; | ||
1462 | } | ||
1463 | |||
1464 | /* | ||
1465 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
1466 | * However, since kprobes itself also doesn't support NMI/MCE | ||
1467 | * code probing, it's not a problem. | ||
1468 | */ | ||
1469 | text_poke_smp_batch(jump_poke_params, c); | ||
1470 | } | ||
1471 | |||
1472 | static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, | ||
1473 | u8 *insn_buf, | ||
1474 | struct optimized_kprobe *op) | ||
1475 | { | ||
1476 | /* Set int3 to first byte for kprobes */ | ||
1477 | insn_buf[0] = BREAKPOINT_INSTRUCTION; | ||
1478 | memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
1479 | |||
1480 | tprm->addr = op->kp.addr; | ||
1481 | tprm->opcode = insn_buf; | ||
1482 | tprm->len = RELATIVEJUMP_SIZE; | ||
1483 | } | ||
1484 | |||
1485 | /* | ||
1486 | * Recover original instructions and breakpoints from relative jumps. | ||
1487 | * Caller must call with locking kprobe_mutex. | ||
1488 | */ | ||
1489 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | ||
1490 | struct list_head *done_list) | ||
1491 | { | ||
1492 | struct optimized_kprobe *op, *tmp; | ||
1493 | int c = 0; | ||
1494 | |||
1495 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
1496 | /* Setup param */ | ||
1497 | setup_unoptimize_kprobe(&jump_poke_params[c], | ||
1498 | jump_poke_bufs[c].buf, op); | ||
1499 | list_move(&op->list, done_list); | ||
1500 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
1501 | break; | ||
1502 | } | ||
1503 | |||
1504 | /* | ||
1505 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
1506 | * However, since kprobes itself also doesn't support NMI/MCE | ||
1507 | * code probing, it's not a problem. | ||
1508 | */ | ||
1509 | text_poke_smp_batch(jump_poke_params, c); | ||
1510 | } | ||
1511 | |||
1512 | /* Replace a relative jump with a breakpoint (int3). */ | ||
1513 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) | ||
1514 | { | ||
1515 | u8 buf[RELATIVEJUMP_SIZE]; | ||
1516 | |||
1517 | /* Set int3 to first byte for kprobes */ | ||
1518 | buf[0] = BREAKPOINT_INSTRUCTION; | ||
1519 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
1520 | text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); | ||
1521 | } | ||
1522 | |||
1523 | static int __kprobes setup_detour_execution(struct kprobe *p, | ||
1524 | struct pt_regs *regs, | ||
1525 | int reenter) | ||
1526 | { | ||
1527 | struct optimized_kprobe *op; | ||
1528 | |||
1529 | if (p->flags & KPROBE_FLAG_OPTIMIZED) { | ||
1530 | /* This kprobe is really able to run optimized path. */ | ||
1531 | op = container_of(p, struct optimized_kprobe, kp); | ||
1532 | /* Detour through copied instructions */ | ||
1533 | regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; | ||
1534 | if (!reenter) | ||
1535 | reset_current_kprobe(); | ||
1536 | preempt_enable_no_resched(); | ||
1537 | return 1; | ||
1538 | } | ||
1539 | return 0; | ||
1540 | } | ||
1541 | |||
1542 | static int __kprobes init_poke_params(void) | ||
1543 | { | ||
1544 | /* Allocate code buffer and parameter array */ | ||
1545 | jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * | ||
1546 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
1547 | if (!jump_poke_bufs) | ||
1548 | return -ENOMEM; | ||
1549 | |||
1550 | jump_poke_params = kmalloc(sizeof(struct text_poke_param) * | ||
1551 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
1552 | if (!jump_poke_params) { | ||
1553 | kfree(jump_poke_bufs); | ||
1554 | jump_poke_bufs = NULL; | ||
1555 | return -ENOMEM; | ||
1556 | } | ||
1557 | |||
1558 | return 0; | ||
1559 | } | ||
1560 | #else /* !CONFIG_OPTPROBES */ | ||
1561 | static int __kprobes init_poke_params(void) | ||
1562 | { | ||
1563 | return 0; | ||
1564 | } | ||
1565 | #endif | ||
1566 | |||
1567 | int __init arch_init_kprobes(void) | 1055 | int __init arch_init_kprobes(void) |
1568 | { | 1056 | { |
1569 | return init_poke_params(); | 1057 | return arch_init_optprobes(); |
1570 | } | 1058 | } |
1571 | 1059 | ||
1572 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) | 1060 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6f176b..694d801bf606 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -438,9 +438,9 @@ void __init kvm_guest_init(void) | |||
438 | static __init int activate_jump_labels(void) | 438 | static __init int activate_jump_labels(void) |
439 | { | 439 | { |
440 | if (has_steal_clock) { | 440 | if (has_steal_clock) { |
441 | jump_label_inc(¶virt_steal_enabled); | 441 | static_key_slow_inc(¶virt_steal_enabled); |
442 | if (steal_acc) | 442 | if (steal_acc) |
443 | jump_label_inc(¶virt_steal_rq_enabled); | 443 | static_key_slow_inc(¶virt_steal_rq_enabled); |
444 | } | 444 | } |
445 | 445 | ||
446 | return 0; | 446 | return 0; |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index d90272e6bc40..ada2f99388dd 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -202,8 +202,8 @@ static void native_flush_tlb_single(unsigned long addr) | |||
202 | __native_flush_tlb_single(addr); | 202 | __native_flush_tlb_single(addr); |
203 | } | 203 | } |
204 | 204 | ||
205 | struct jump_label_key paravirt_steal_enabled; | 205 | struct static_key paravirt_steal_enabled; |
206 | struct jump_label_key paravirt_steal_rq_enabled; | 206 | struct static_key paravirt_steal_rq_enabled; |
207 | 207 | ||
208 | static u64 native_steal_clock(int cpu) | 208 | static u64 native_steal_clock(int cpu) |
209 | { | 209 | { |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 15763af7bfe3..44eefde92109 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -377,8 +377,8 @@ static inline int hlt_use_halt(void) | |||
377 | void default_idle(void) | 377 | void default_idle(void) |
378 | { | 378 | { |
379 | if (hlt_use_halt()) { | 379 | if (hlt_use_halt()) { |
380 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); | 380 | trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); |
381 | trace_cpu_idle(1, smp_processor_id()); | 381 | trace_cpu_idle_rcuidle(1, smp_processor_id()); |
382 | current_thread_info()->status &= ~TS_POLLING; | 382 | current_thread_info()->status &= ~TS_POLLING; |
383 | /* | 383 | /* |
384 | * TS_POLLING-cleared state must be visible before we | 384 | * TS_POLLING-cleared state must be visible before we |
@@ -391,8 +391,8 @@ void default_idle(void) | |||
391 | else | 391 | else |
392 | local_irq_enable(); | 392 | local_irq_enable(); |
393 | current_thread_info()->status |= TS_POLLING; | 393 | current_thread_info()->status |= TS_POLLING; |
394 | trace_power_end(smp_processor_id()); | 394 | trace_power_end_rcuidle(smp_processor_id()); |
395 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | 395 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
396 | } else { | 396 | } else { |
397 | local_irq_enable(); | 397 | local_irq_enable(); |
398 | /* loop is done by the caller */ | 398 | /* loop is done by the caller */ |
@@ -450,8 +450,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
450 | static void mwait_idle(void) | 450 | static void mwait_idle(void) |
451 | { | 451 | { |
452 | if (!need_resched()) { | 452 | if (!need_resched()) { |
453 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); | 453 | trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); |
454 | trace_cpu_idle(1, smp_processor_id()); | 454 | trace_cpu_idle_rcuidle(1, smp_processor_id()); |
455 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) | 455 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) |
456 | clflush((void *)¤t_thread_info()->flags); | 456 | clflush((void *)¤t_thread_info()->flags); |
457 | 457 | ||
@@ -461,8 +461,8 @@ static void mwait_idle(void) | |||
461 | __sti_mwait(0, 0); | 461 | __sti_mwait(0, 0); |
462 | else | 462 | else |
463 | local_irq_enable(); | 463 | local_irq_enable(); |
464 | trace_power_end(smp_processor_id()); | 464 | trace_power_end_rcuidle(smp_processor_id()); |
465 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | 465 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
466 | } else | 466 | } else |
467 | local_irq_enable(); | 467 | local_irq_enable(); |
468 | } | 468 | } |
@@ -474,13 +474,13 @@ static void mwait_idle(void) | |||
474 | */ | 474 | */ |
475 | static void poll_idle(void) | 475 | static void poll_idle(void) |
476 | { | 476 | { |
477 | trace_power_start(POWER_CSTATE, 0, smp_processor_id()); | 477 | trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id()); |
478 | trace_cpu_idle(0, smp_processor_id()); | 478 | trace_cpu_idle_rcuidle(0, smp_processor_id()); |
479 | local_irq_enable(); | 479 | local_irq_enable(); |
480 | while (!need_resched()) | 480 | while (!need_resched()) |
481 | cpu_relax(); | 481 | cpu_relax(); |
482 | trace_power_end(smp_processor_id()); | 482 | trace_power_end_rcuidle(smp_processor_id()); |
483 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | 483 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
484 | } | 484 | } |
485 | 485 | ||
486 | /* | 486 | /* |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index fe15dcc07a6b..ea7b4fd34676 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu) | |||
234 | } | 234 | } |
235 | 235 | ||
236 | static bool mmu_audit; | 236 | static bool mmu_audit; |
237 | static struct jump_label_key mmu_audit_key; | 237 | static struct static_key mmu_audit_key; |
238 | 238 | ||
239 | static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) | 239 | static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) |
240 | { | 240 | { |
@@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) | |||
250 | 250 | ||
251 | static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) | 251 | static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) |
252 | { | 252 | { |
253 | if (static_branch((&mmu_audit_key))) | 253 | if (static_key_false((&mmu_audit_key))) |
254 | __kvm_mmu_audit(vcpu, point); | 254 | __kvm_mmu_audit(vcpu, point); |
255 | } | 255 | } |
256 | 256 | ||
@@ -259,7 +259,7 @@ static void mmu_audit_enable(void) | |||
259 | if (mmu_audit) | 259 | if (mmu_audit) |
260 | return; | 260 | return; |
261 | 261 | ||
262 | jump_label_inc(&mmu_audit_key); | 262 | static_key_slow_inc(&mmu_audit_key); |
263 | mmu_audit = true; | 263 | mmu_audit = true; |
264 | } | 264 | } |
265 | 265 | ||
@@ -268,7 +268,7 @@ static void mmu_audit_disable(void) | |||
268 | if (!mmu_audit) | 268 | if (!mmu_audit) |
269 | return; | 269 | return; |
270 | 270 | ||
271 | jump_label_dec(&mmu_audit_key); | 271 | static_key_slow_dec(&mmu_audit_key); |
272 | mmu_audit = false; | 272 | mmu_audit = false; |
273 | } | 273 | } |
274 | 274 | ||
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 88ad5fbda6e1..c1f01a8e9f65 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c | |||
@@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) | |||
29 | return inat_primary_table[opcode]; | 29 | return inat_primary_table[opcode]; |
30 | } | 30 | } |
31 | 31 | ||
32 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, | 32 | int inat_get_last_prefix_id(insn_byte_t last_pfx) |
33 | { | ||
34 | insn_attr_t lpfx_attr; | ||
35 | |||
36 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
37 | return inat_last_prefix_id(lpfx_attr); | ||
38 | } | ||
39 | |||
40 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, | ||
33 | insn_attr_t esc_attr) | 41 | insn_attr_t esc_attr) |
34 | { | 42 | { |
35 | const insn_attr_t *table; | 43 | const insn_attr_t *table; |
36 | insn_attr_t lpfx_attr; | 44 | int n; |
37 | int n, m = 0; | ||
38 | 45 | ||
39 | n = inat_escape_id(esc_attr); | 46 | n = inat_escape_id(esc_attr); |
40 | if (last_pfx) { | 47 | |
41 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
42 | m = inat_last_prefix_id(lpfx_attr); | ||
43 | } | ||
44 | table = inat_escape_tables[n][0]; | 48 | table = inat_escape_tables[n][0]; |
45 | if (!table) | 49 | if (!table) |
46 | return 0; | 50 | return 0; |
47 | if (inat_has_variant(table[opcode]) && m) { | 51 | if (inat_has_variant(table[opcode]) && lpfx_id) { |
48 | table = inat_escape_tables[n][m]; | 52 | table = inat_escape_tables[n][lpfx_id]; |
49 | if (!table) | 53 | if (!table) |
50 | return 0; | 54 | return 0; |
51 | } | 55 | } |
52 | return table[opcode]; | 56 | return table[opcode]; |
53 | } | 57 | } |
54 | 58 | ||
55 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, | 59 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, |
56 | insn_attr_t grp_attr) | 60 | insn_attr_t grp_attr) |
57 | { | 61 | { |
58 | const insn_attr_t *table; | 62 | const insn_attr_t *table; |
59 | insn_attr_t lpfx_attr; | 63 | int n; |
60 | int n, m = 0; | ||
61 | 64 | ||
62 | n = inat_group_id(grp_attr); | 65 | n = inat_group_id(grp_attr); |
63 | if (last_pfx) { | 66 | |
64 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
65 | m = inat_last_prefix_id(lpfx_attr); | ||
66 | } | ||
67 | table = inat_group_tables[n][0]; | 67 | table = inat_group_tables[n][0]; |
68 | if (!table) | 68 | if (!table) |
69 | return inat_group_common_attribute(grp_attr); | 69 | return inat_group_common_attribute(grp_attr); |
70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { | 70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { |
71 | table = inat_group_tables[n][m]; | 71 | table = inat_group_tables[n][lpfx_id]; |
72 | if (!table) | 72 | if (!table) |
73 | return inat_group_common_attribute(grp_attr); | 73 | return inat_group_common_attribute(grp_attr); |
74 | } | 74 | } |
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 5a1f9f3e3fbb..25feb1ae71c5 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c | |||
@@ -185,7 +185,8 @@ err_out: | |||
185 | void insn_get_opcode(struct insn *insn) | 185 | void insn_get_opcode(struct insn *insn) |
186 | { | 186 | { |
187 | struct insn_field *opcode = &insn->opcode; | 187 | struct insn_field *opcode = &insn->opcode; |
188 | insn_byte_t op, pfx; | 188 | insn_byte_t op; |
189 | int pfx_id; | ||
189 | if (opcode->got) | 190 | if (opcode->got) |
190 | return; | 191 | return; |
191 | if (!insn->prefixes.got) | 192 | if (!insn->prefixes.got) |
@@ -212,8 +213,8 @@ void insn_get_opcode(struct insn *insn) | |||
212 | /* Get escaped opcode */ | 213 | /* Get escaped opcode */ |
213 | op = get_next(insn_byte_t, insn); | 214 | op = get_next(insn_byte_t, insn); |
214 | opcode->bytes[opcode->nbytes++] = op; | 215 | opcode->bytes[opcode->nbytes++] = op; |
215 | pfx = insn_last_prefix(insn); | 216 | pfx_id = insn_last_prefix_id(insn); |
216 | insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); | 217 | insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); |
217 | } | 218 | } |
218 | if (inat_must_vex(insn->attr)) | 219 | if (inat_must_vex(insn->attr)) |
219 | insn->attr = 0; /* This instruction is bad */ | 220 | insn->attr = 0; /* This instruction is bad */ |
@@ -235,7 +236,7 @@ err_out: | |||
235 | void insn_get_modrm(struct insn *insn) | 236 | void insn_get_modrm(struct insn *insn) |
236 | { | 237 | { |
237 | struct insn_field *modrm = &insn->modrm; | 238 | struct insn_field *modrm = &insn->modrm; |
238 | insn_byte_t pfx, mod; | 239 | insn_byte_t pfx_id, mod; |
239 | if (modrm->got) | 240 | if (modrm->got) |
240 | return; | 241 | return; |
241 | if (!insn->opcode.got) | 242 | if (!insn->opcode.got) |
@@ -246,8 +247,8 @@ void insn_get_modrm(struct insn *insn) | |||
246 | modrm->value = mod; | 247 | modrm->value = mod; |
247 | modrm->nbytes = 1; | 248 | modrm->nbytes = 1; |
248 | if (inat_is_group(insn->attr)) { | 249 | if (inat_is_group(insn->attr)) { |
249 | pfx = insn_last_prefix(insn); | 250 | pfx_id = insn_last_prefix_id(insn); |
250 | insn->attr = inat_get_group_attribute(mod, pfx, | 251 | insn->attr = inat_get_group_attribute(mod, pfx_id, |
251 | insn->attr); | 252 | insn->attr); |
252 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) | 253 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) |
253 | insn->attr = 0; /* This is bad */ | 254 | insn->attr = 0; /* This is bad */ |
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 59f4261c753a..6588f43017bd 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c | |||
@@ -94,13 +94,13 @@ int cpuidle_idle_call(void) | |||
94 | 94 | ||
95 | target_state = &drv->states[next_state]; | 95 | target_state = &drv->states[next_state]; |
96 | 96 | ||
97 | trace_power_start(POWER_CSTATE, next_state, dev->cpu); | 97 | trace_power_start_rcuidle(POWER_CSTATE, next_state, dev->cpu); |
98 | trace_cpu_idle(next_state, dev->cpu); | 98 | trace_cpu_idle_rcuidle(next_state, dev->cpu); |
99 | 99 | ||
100 | entered_state = target_state->enter(dev, drv, next_state); | 100 | entered_state = target_state->enter(dev, drv, next_state); |
101 | 101 | ||
102 | trace_power_end(dev->cpu); | 102 | trace_power_end_rcuidle(dev->cpu); |
103 | trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); | 103 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); |
104 | 104 | ||
105 | if (entered_state >= 0) { | 105 | if (entered_state >= 0) { |
106 | /* Update cpuidle counters */ | 106 | /* Update cpuidle counters */ |
@@ -63,6 +63,8 @@ | |||
63 | #include <trace/events/task.h> | 63 | #include <trace/events/task.h> |
64 | #include "internal.h" | 64 | #include "internal.h" |
65 | 65 | ||
66 | #include <trace/events/sched.h> | ||
67 | |||
66 | int core_uses_pid; | 68 | int core_uses_pid; |
67 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | 69 | char core_pattern[CORENAME_MAX_SIZE] = "core"; |
68 | unsigned int core_pipe_limit; | 70 | unsigned int core_pipe_limit; |
@@ -1402,9 +1404,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | |||
1402 | */ | 1404 | */ |
1403 | bprm->recursion_depth = depth; | 1405 | bprm->recursion_depth = depth; |
1404 | if (retval >= 0) { | 1406 | if (retval >= 0) { |
1405 | if (depth == 0) | 1407 | if (depth == 0) { |
1406 | ptrace_event(PTRACE_EVENT_EXEC, | 1408 | trace_sched_process_exec(current, old_pid, bprm); |
1407 | old_pid); | 1409 | ptrace_event(PTRACE_EVENT_EXEC, old_pid); |
1410 | } | ||
1408 | put_binfmt(fmt); | 1411 | put_binfmt(fmt); |
1409 | allow_write_access(bprm->file); | 1412 | allow_write_access(bprm->file); |
1410 | if (bprm->file) | 1413 | if (bprm->file) |
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 028e26f0bf08..72a6cabb4d5b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h | |||
@@ -31,16 +31,33 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, | |||
31 | 31 | ||
32 | typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); | 32 | typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); |
33 | 33 | ||
34 | /* | ||
35 | * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are | ||
36 | * set in the flags member. | ||
37 | * | ||
38 | * ENABLED - set/unset when ftrace_ops is registered/unregistered | ||
39 | * GLOBAL - set manualy by ftrace_ops user to denote the ftrace_ops | ||
40 | * is part of the global tracers sharing the same filter | ||
41 | * via set_ftrace_* debugfs files. | ||
42 | * DYNAMIC - set when ftrace_ops is registered to denote dynamically | ||
43 | * allocated ftrace_ops which need special care | ||
44 | * CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops | ||
45 | * could be controled by following calls: | ||
46 | * ftrace_function_local_enable | ||
47 | * ftrace_function_local_disable | ||
48 | */ | ||
34 | enum { | 49 | enum { |
35 | FTRACE_OPS_FL_ENABLED = 1 << 0, | 50 | FTRACE_OPS_FL_ENABLED = 1 << 0, |
36 | FTRACE_OPS_FL_GLOBAL = 1 << 1, | 51 | FTRACE_OPS_FL_GLOBAL = 1 << 1, |
37 | FTRACE_OPS_FL_DYNAMIC = 1 << 2, | 52 | FTRACE_OPS_FL_DYNAMIC = 1 << 2, |
53 | FTRACE_OPS_FL_CONTROL = 1 << 3, | ||
38 | }; | 54 | }; |
39 | 55 | ||
40 | struct ftrace_ops { | 56 | struct ftrace_ops { |
41 | ftrace_func_t func; | 57 | ftrace_func_t func; |
42 | struct ftrace_ops *next; | 58 | struct ftrace_ops *next; |
43 | unsigned long flags; | 59 | unsigned long flags; |
60 | int __percpu *disabled; | ||
44 | #ifdef CONFIG_DYNAMIC_FTRACE | 61 | #ifdef CONFIG_DYNAMIC_FTRACE |
45 | struct ftrace_hash *notrace_hash; | 62 | struct ftrace_hash *notrace_hash; |
46 | struct ftrace_hash *filter_hash; | 63 | struct ftrace_hash *filter_hash; |
@@ -97,6 +114,55 @@ int register_ftrace_function(struct ftrace_ops *ops); | |||
97 | int unregister_ftrace_function(struct ftrace_ops *ops); | 114 | int unregister_ftrace_function(struct ftrace_ops *ops); |
98 | void clear_ftrace_function(void); | 115 | void clear_ftrace_function(void); |
99 | 116 | ||
117 | /** | ||
118 | * ftrace_function_local_enable - enable controlled ftrace_ops on current cpu | ||
119 | * | ||
120 | * This function enables tracing on current cpu by decreasing | ||
121 | * the per cpu control variable. | ||
122 | * It must be called with preemption disabled and only on ftrace_ops | ||
123 | * registered with FTRACE_OPS_FL_CONTROL. If called without preemption | ||
124 | * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. | ||
125 | */ | ||
126 | static inline void ftrace_function_local_enable(struct ftrace_ops *ops) | ||
127 | { | ||
128 | if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL))) | ||
129 | return; | ||
130 | |||
131 | (*this_cpu_ptr(ops->disabled))--; | ||
132 | } | ||
133 | |||
134 | /** | ||
135 | * ftrace_function_local_disable - enable controlled ftrace_ops on current cpu | ||
136 | * | ||
137 | * This function enables tracing on current cpu by decreasing | ||
138 | * the per cpu control variable. | ||
139 | * It must be called with preemption disabled and only on ftrace_ops | ||
140 | * registered with FTRACE_OPS_FL_CONTROL. If called without preemption | ||
141 | * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. | ||
142 | */ | ||
143 | static inline void ftrace_function_local_disable(struct ftrace_ops *ops) | ||
144 | { | ||
145 | if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL))) | ||
146 | return; | ||
147 | |||
148 | (*this_cpu_ptr(ops->disabled))++; | ||
149 | } | ||
150 | |||
151 | /** | ||
152 | * ftrace_function_local_disabled - returns ftrace_ops disabled value | ||
153 | * on current cpu | ||
154 | * | ||
155 | * This function returns value of ftrace_ops::disabled on current cpu. | ||
156 | * It must be called with preemption disabled and only on ftrace_ops | ||
157 | * registered with FTRACE_OPS_FL_CONTROL. If called without preemption | ||
158 | * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. | ||
159 | */ | ||
160 | static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) | ||
161 | { | ||
162 | WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)); | ||
163 | return *this_cpu_ptr(ops->disabled); | ||
164 | } | ||
165 | |||
100 | extern void ftrace_stub(unsigned long a0, unsigned long a1); | 166 | extern void ftrace_stub(unsigned long a0, unsigned long a1); |
101 | 167 | ||
102 | #else /* !CONFIG_FUNCTION_TRACER */ | 168 | #else /* !CONFIG_FUNCTION_TRACER */ |
@@ -178,12 +244,13 @@ struct dyn_ftrace { | |||
178 | }; | 244 | }; |
179 | 245 | ||
180 | int ftrace_force_update(void); | 246 | int ftrace_force_update(void); |
181 | void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, | 247 | int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, |
182 | int len, int reset); | 248 | int len, int reset); |
183 | void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, | 249 | int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, |
184 | int len, int reset); | 250 | int len, int reset); |
185 | void ftrace_set_global_filter(unsigned char *buf, int len, int reset); | 251 | void ftrace_set_global_filter(unsigned char *buf, int len, int reset); |
186 | void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); | 252 | void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); |
253 | void ftrace_free_filter(struct ftrace_ops *ops); | ||
187 | 254 | ||
188 | int register_ftrace_command(struct ftrace_func_command *cmd); | 255 | int register_ftrace_command(struct ftrace_func_command *cmd); |
189 | int unregister_ftrace_command(struct ftrace_func_command *cmd); | 256 | int unregister_ftrace_command(struct ftrace_func_command *cmd); |
@@ -314,9 +381,6 @@ extern void ftrace_enable_daemon(void); | |||
314 | #else | 381 | #else |
315 | static inline int skip_trace(unsigned long ip) { return 0; } | 382 | static inline int skip_trace(unsigned long ip) { return 0; } |
316 | static inline int ftrace_force_update(void) { return 0; } | 383 | static inline int ftrace_force_update(void) { return 0; } |
317 | static inline void ftrace_set_filter(unsigned char *buf, int len, int reset) | ||
318 | { | ||
319 | } | ||
320 | static inline void ftrace_disable_daemon(void) { } | 384 | static inline void ftrace_disable_daemon(void) { } |
321 | static inline void ftrace_enable_daemon(void) { } | 385 | static inline void ftrace_enable_daemon(void) { } |
322 | static inline void ftrace_release_mod(struct module *mod) {} | 386 | static inline void ftrace_release_mod(struct module *mod) {} |
@@ -340,6 +404,9 @@ static inline int ftrace_text_reserved(void *start, void *end) | |||
340 | */ | 404 | */ |
341 | #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) | 405 | #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) |
342 | #define ftrace_set_early_filter(ops, buf, enable) do { } while (0) | 406 | #define ftrace_set_early_filter(ops, buf, enable) do { } while (0) |
407 | #define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; }) | ||
408 | #define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; }) | ||
409 | #define ftrace_free_filter(ops) do { } while (0) | ||
343 | 410 | ||
344 | static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, | 411 | static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, |
345 | size_t cnt, loff_t *ppos) { return -ENODEV; } | 412 | size_t cnt, loff_t *ppos) { return -ENODEV; } |
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index c3da42dd22ba..dd478fc8f9f5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h | |||
@@ -146,6 +146,10 @@ enum trace_reg { | |||
146 | TRACE_REG_UNREGISTER, | 146 | TRACE_REG_UNREGISTER, |
147 | TRACE_REG_PERF_REGISTER, | 147 | TRACE_REG_PERF_REGISTER, |
148 | TRACE_REG_PERF_UNREGISTER, | 148 | TRACE_REG_PERF_UNREGISTER, |
149 | TRACE_REG_PERF_OPEN, | ||
150 | TRACE_REG_PERF_CLOSE, | ||
151 | TRACE_REG_PERF_ADD, | ||
152 | TRACE_REG_PERF_DEL, | ||
149 | }; | 153 | }; |
150 | 154 | ||
151 | struct ftrace_event_call; | 155 | struct ftrace_event_call; |
@@ -157,7 +161,7 @@ struct ftrace_event_class { | |||
157 | void *perf_probe; | 161 | void *perf_probe; |
158 | #endif | 162 | #endif |
159 | int (*reg)(struct ftrace_event_call *event, | 163 | int (*reg)(struct ftrace_event_call *event, |
160 | enum trace_reg type); | 164 | enum trace_reg type, void *data); |
161 | int (*define_fields)(struct ftrace_event_call *); | 165 | int (*define_fields)(struct ftrace_event_call *); |
162 | struct list_head *(*get_fields)(struct ftrace_event_call *); | 166 | struct list_head *(*get_fields)(struct ftrace_event_call *); |
163 | struct list_head fields; | 167 | struct list_head fields; |
@@ -165,7 +169,7 @@ struct ftrace_event_class { | |||
165 | }; | 169 | }; |
166 | 170 | ||
167 | extern int ftrace_event_reg(struct ftrace_event_call *event, | 171 | extern int ftrace_event_reg(struct ftrace_event_call *event, |
168 | enum trace_reg type); | 172 | enum trace_reg type, void *data); |
169 | 173 | ||
170 | enum { | 174 | enum { |
171 | TRACE_EVENT_FL_ENABLED_BIT, | 175 | TRACE_EVENT_FL_ENABLED_BIT, |
@@ -241,6 +245,7 @@ enum { | |||
241 | FILTER_STATIC_STRING, | 245 | FILTER_STATIC_STRING, |
242 | FILTER_DYN_STRING, | 246 | FILTER_DYN_STRING, |
243 | FILTER_PTR_STRING, | 247 | FILTER_PTR_STRING, |
248 | FILTER_TRACE_FN, | ||
244 | }; | 249 | }; |
245 | 250 | ||
246 | #define EVENT_STORAGE_SIZE 128 | 251 | #define EVENT_STORAGE_SIZE 128 |
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a64b00e286f5..3f830e005118 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/atomic.h> | 20 | #include <linux/atomic.h> |
21 | #include <asm/ptrace.h> | 21 | #include <asm/ptrace.h> |
22 | #include <asm/system.h> | 22 | #include <asm/system.h> |
23 | #include <trace/events/irq.h> | ||
24 | 23 | ||
25 | /* | 24 | /* |
26 | * These correspond to the IORESOURCE_IRQ_* defines in | 25 | * These correspond to the IORESOURCE_IRQ_* defines in |
@@ -456,11 +455,7 @@ asmlinkage void do_softirq(void); | |||
456 | asmlinkage void __do_softirq(void); | 455 | asmlinkage void __do_softirq(void); |
457 | extern void open_softirq(int nr, void (*action)(struct softirq_action *)); | 456 | extern void open_softirq(int nr, void (*action)(struct softirq_action *)); |
458 | extern void softirq_init(void); | 457 | extern void softirq_init(void); |
459 | static inline void __raise_softirq_irqoff(unsigned int nr) | 458 | extern void __raise_softirq_irqoff(unsigned int nr); |
460 | { | ||
461 | trace_softirq_raise(nr); | ||
462 | or_softirq_pending(1UL << nr); | ||
463 | } | ||
464 | 459 | ||
465 | extern void raise_softirq_irqoff(unsigned int nr); | 460 | extern void raise_softirq_irqoff(unsigned int nr); |
466 | extern void raise_softirq(unsigned int nr); | 461 | extern void raise_softirq(unsigned int nr); |
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 5ce8b140428f..c513a40510f5 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h | |||
@@ -1,22 +1,69 @@ | |||
1 | #ifndef _LINUX_JUMP_LABEL_H | 1 | #ifndef _LINUX_JUMP_LABEL_H |
2 | #define _LINUX_JUMP_LABEL_H | 2 | #define _LINUX_JUMP_LABEL_H |
3 | 3 | ||
4 | /* | ||
5 | * Jump label support | ||
6 | * | ||
7 | * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com> | ||
8 | * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com> | ||
9 | * | ||
10 | * Jump labels provide an interface to generate dynamic branches using | ||
11 | * self-modifying code. Assuming toolchain and architecture support the result | ||
12 | * of a "if (static_key_false(&key))" statement is a unconditional branch (which | ||
13 | * defaults to false - and the true block is placed out of line). | ||
14 | * | ||
15 | * However at runtime we can change the branch target using | ||
16 | * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key | ||
17 | * object and for as long as there are references all branches referring to | ||
18 | * that particular key will point to the (out of line) true block. | ||
19 | * | ||
20 | * Since this relies on modifying code the static_key_slow_{inc,dec}() functions | ||
21 | * must be considered absolute slow paths (machine wide synchronization etc.). | ||
22 | * OTOH, since the affected branches are unconditional their runtime overhead | ||
23 | * will be absolutely minimal, esp. in the default (off) case where the total | ||
24 | * effect is a single NOP of appropriate size. The on case will patch in a jump | ||
25 | * to the out-of-line block. | ||
26 | * | ||
27 | * When the control is directly exposed to userspace it is prudent to delay the | ||
28 | * decrement to avoid high frequency code modifications which can (and do) | ||
29 | * cause significant performance degradation. Struct static_key_deferred and | ||
30 | * static_key_slow_dec_deferred() provide for this. | ||
31 | * | ||
32 | * Lacking toolchain and or architecture support, it falls back to a simple | ||
33 | * conditional branch. | ||
34 | * | ||
35 | * struct static_key my_key = STATIC_KEY_INIT_TRUE; | ||
36 | * | ||
37 | * if (static_key_true(&my_key)) { | ||
38 | * } | ||
39 | * | ||
40 | * will result in the true case being in-line and starts the key with a single | ||
41 | * reference. Mixing static_key_true() and static_key_false() on the same key is not | ||
42 | * allowed. | ||
43 | * | ||
44 | * Not initializing the key (static data is initialized to 0s anyway) is the | ||
45 | * same as using STATIC_KEY_INIT_FALSE and static_key_false() is | ||
46 | * equivalent with static_branch(). | ||
47 | * | ||
48 | */ | ||
49 | |||
4 | #include <linux/types.h> | 50 | #include <linux/types.h> |
5 | #include <linux/compiler.h> | 51 | #include <linux/compiler.h> |
6 | #include <linux/workqueue.h> | 52 | #include <linux/workqueue.h> |
7 | 53 | ||
8 | #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) | 54 | #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) |
9 | 55 | ||
10 | struct jump_label_key { | 56 | struct static_key { |
11 | atomic_t enabled; | 57 | atomic_t enabled; |
58 | /* Set lsb bit to 1 if branch is default true, 0 ot */ | ||
12 | struct jump_entry *entries; | 59 | struct jump_entry *entries; |
13 | #ifdef CONFIG_MODULES | 60 | #ifdef CONFIG_MODULES |
14 | struct jump_label_mod *next; | 61 | struct static_key_mod *next; |
15 | #endif | 62 | #endif |
16 | }; | 63 | }; |
17 | 64 | ||
18 | struct jump_label_key_deferred { | 65 | struct static_key_deferred { |
19 | struct jump_label_key key; | 66 | struct static_key key; |
20 | unsigned long timeout; | 67 | unsigned long timeout; |
21 | struct delayed_work work; | 68 | struct delayed_work work; |
22 | }; | 69 | }; |
@@ -34,13 +81,34 @@ struct module; | |||
34 | 81 | ||
35 | #ifdef HAVE_JUMP_LABEL | 82 | #ifdef HAVE_JUMP_LABEL |
36 | 83 | ||
37 | #ifdef CONFIG_MODULES | 84 | #define JUMP_LABEL_TRUE_BRANCH 1UL |
38 | #define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL, NULL} | 85 | |
39 | #else | 86 | static |
40 | #define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL} | 87 | inline struct jump_entry *jump_label_get_entries(struct static_key *key) |
41 | #endif | 88 | { |
89 | return (struct jump_entry *)((unsigned long)key->entries | ||
90 | & ~JUMP_LABEL_TRUE_BRANCH); | ||
91 | } | ||
42 | 92 | ||
43 | static __always_inline bool static_branch(struct jump_label_key *key) | 93 | static inline bool jump_label_get_branch_default(struct static_key *key) |
94 | { | ||
95 | if ((unsigned long)key->entries & JUMP_LABEL_TRUE_BRANCH) | ||
96 | return true; | ||
97 | return false; | ||
98 | } | ||
99 | |||
100 | static __always_inline bool static_key_false(struct static_key *key) | ||
101 | { | ||
102 | return arch_static_branch(key); | ||
103 | } | ||
104 | |||
105 | static __always_inline bool static_key_true(struct static_key *key) | ||
106 | { | ||
107 | return !static_key_false(key); | ||
108 | } | ||
109 | |||
110 | /* Deprecated. Please use 'static_key_false() instead. */ | ||
111 | static __always_inline bool static_branch(struct static_key *key) | ||
44 | { | 112 | { |
45 | return arch_static_branch(key); | 113 | return arch_static_branch(key); |
46 | } | 114 | } |
@@ -56,21 +124,23 @@ extern void arch_jump_label_transform(struct jump_entry *entry, | |||
56 | extern void arch_jump_label_transform_static(struct jump_entry *entry, | 124 | extern void arch_jump_label_transform_static(struct jump_entry *entry, |
57 | enum jump_label_type type); | 125 | enum jump_label_type type); |
58 | extern int jump_label_text_reserved(void *start, void *end); | 126 | extern int jump_label_text_reserved(void *start, void *end); |
59 | extern void jump_label_inc(struct jump_label_key *key); | 127 | extern void static_key_slow_inc(struct static_key *key); |
60 | extern void jump_label_dec(struct jump_label_key *key); | 128 | extern void static_key_slow_dec(struct static_key *key); |
61 | extern void jump_label_dec_deferred(struct jump_label_key_deferred *key); | 129 | extern void static_key_slow_dec_deferred(struct static_key_deferred *key); |
62 | extern bool jump_label_enabled(struct jump_label_key *key); | ||
63 | extern void jump_label_apply_nops(struct module *mod); | 130 | extern void jump_label_apply_nops(struct module *mod); |
64 | extern void jump_label_rate_limit(struct jump_label_key_deferred *key, | 131 | extern void |
65 | unsigned long rl); | 132 | jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); |
133 | |||
134 | #define STATIC_KEY_INIT_TRUE ((struct static_key) \ | ||
135 | { .enabled = ATOMIC_INIT(1), .entries = (void *)1 }) | ||
136 | #define STATIC_KEY_INIT_FALSE ((struct static_key) \ | ||
137 | { .enabled = ATOMIC_INIT(0), .entries = (void *)0 }) | ||
66 | 138 | ||
67 | #else /* !HAVE_JUMP_LABEL */ | 139 | #else /* !HAVE_JUMP_LABEL */ |
68 | 140 | ||
69 | #include <linux/atomic.h> | 141 | #include <linux/atomic.h> |
70 | 142 | ||
71 | #define JUMP_LABEL_INIT {ATOMIC_INIT(0)} | 143 | struct static_key { |
72 | |||
73 | struct jump_label_key { | ||
74 | atomic_t enabled; | 144 | atomic_t enabled; |
75 | }; | 145 | }; |
76 | 146 | ||
@@ -78,30 +148,45 @@ static __always_inline void jump_label_init(void) | |||
78 | { | 148 | { |
79 | } | 149 | } |
80 | 150 | ||
81 | struct jump_label_key_deferred { | 151 | struct static_key_deferred { |
82 | struct jump_label_key key; | 152 | struct static_key key; |
83 | }; | 153 | }; |
84 | 154 | ||
85 | static __always_inline bool static_branch(struct jump_label_key *key) | 155 | static __always_inline bool static_key_false(struct static_key *key) |
156 | { | ||
157 | if (unlikely(atomic_read(&key->enabled)) > 0) | ||
158 | return true; | ||
159 | return false; | ||
160 | } | ||
161 | |||
162 | static __always_inline bool static_key_true(struct static_key *key) | ||
86 | { | 163 | { |
87 | if (unlikely(atomic_read(&key->enabled))) | 164 | if (likely(atomic_read(&key->enabled)) > 0) |
88 | return true; | 165 | return true; |
89 | return false; | 166 | return false; |
90 | } | 167 | } |
91 | 168 | ||
92 | static inline void jump_label_inc(struct jump_label_key *key) | 169 | /* Deprecated. Please use 'static_key_false() instead. */ |
170 | static __always_inline bool static_branch(struct static_key *key) | ||
171 | { | ||
172 | if (unlikely(atomic_read(&key->enabled)) > 0) | ||
173 | return true; | ||
174 | return false; | ||
175 | } | ||
176 | |||
177 | static inline void static_key_slow_inc(struct static_key *key) | ||
93 | { | 178 | { |
94 | atomic_inc(&key->enabled); | 179 | atomic_inc(&key->enabled); |
95 | } | 180 | } |
96 | 181 | ||
97 | static inline void jump_label_dec(struct jump_label_key *key) | 182 | static inline void static_key_slow_dec(struct static_key *key) |
98 | { | 183 | { |
99 | atomic_dec(&key->enabled); | 184 | atomic_dec(&key->enabled); |
100 | } | 185 | } |
101 | 186 | ||
102 | static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key) | 187 | static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) |
103 | { | 188 | { |
104 | jump_label_dec(&key->key); | 189 | static_key_slow_dec(&key->key); |
105 | } | 190 | } |
106 | 191 | ||
107 | static inline int jump_label_text_reserved(void *start, void *end) | 192 | static inline int jump_label_text_reserved(void *start, void *end) |
@@ -112,23 +197,30 @@ static inline int jump_label_text_reserved(void *start, void *end) | |||
112 | static inline void jump_label_lock(void) {} | 197 | static inline void jump_label_lock(void) {} |
113 | static inline void jump_label_unlock(void) {} | 198 | static inline void jump_label_unlock(void) {} |
114 | 199 | ||
115 | static inline bool jump_label_enabled(struct jump_label_key *key) | ||
116 | { | ||
117 | return !!atomic_read(&key->enabled); | ||
118 | } | ||
119 | |||
120 | static inline int jump_label_apply_nops(struct module *mod) | 200 | static inline int jump_label_apply_nops(struct module *mod) |
121 | { | 201 | { |
122 | return 0; | 202 | return 0; |
123 | } | 203 | } |
124 | 204 | ||
125 | static inline void jump_label_rate_limit(struct jump_label_key_deferred *key, | 205 | static inline void |
206 | jump_label_rate_limit(struct static_key_deferred *key, | ||
126 | unsigned long rl) | 207 | unsigned long rl) |
127 | { | 208 | { |
128 | } | 209 | } |
210 | |||
211 | #define STATIC_KEY_INIT_TRUE ((struct static_key) \ | ||
212 | { .enabled = ATOMIC_INIT(1) }) | ||
213 | #define STATIC_KEY_INIT_FALSE ((struct static_key) \ | ||
214 | { .enabled = ATOMIC_INIT(0) }) | ||
215 | |||
129 | #endif /* HAVE_JUMP_LABEL */ | 216 | #endif /* HAVE_JUMP_LABEL */ |
130 | 217 | ||
131 | #define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), }) | 218 | #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE |
132 | #define jump_label_key_disabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), }) | 219 | #define jump_label_enabled static_key_enabled |
220 | |||
221 | static inline bool static_key_enabled(struct static_key *key) | ||
222 | { | ||
223 | return (atomic_read(&key->enabled) > 0); | ||
224 | } | ||
133 | 225 | ||
134 | #endif /* _LINUX_JUMP_LABEL_H */ | 226 | #endif /* _LINUX_JUMP_LABEL_H */ |
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0eac07c95255..7dfaae7846ab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -214,8 +214,8 @@ enum { | |||
214 | #include <linux/skbuff.h> | 214 | #include <linux/skbuff.h> |
215 | 215 | ||
216 | #ifdef CONFIG_RPS | 216 | #ifdef CONFIG_RPS |
217 | #include <linux/jump_label.h> | 217 | #include <linux/static_key.h> |
218 | extern struct jump_label_key rps_needed; | 218 | extern struct static_key rps_needed; |
219 | #endif | 219 | #endif |
220 | 220 | ||
221 | struct neighbour; | 221 | struct neighbour; |
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index b809265607d0..29734be334c1 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h | |||
@@ -163,13 +163,13 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; | |||
163 | extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; | 163 | extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; |
164 | 164 | ||
165 | #if defined(CONFIG_JUMP_LABEL) | 165 | #if defined(CONFIG_JUMP_LABEL) |
166 | #include <linux/jump_label.h> | 166 | #include <linux/static_key.h> |
167 | extern struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; | 167 | extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; |
168 | static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) | 168 | static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) |
169 | { | 169 | { |
170 | if (__builtin_constant_p(pf) && | 170 | if (__builtin_constant_p(pf) && |
171 | __builtin_constant_p(hook)) | 171 | __builtin_constant_p(hook)) |
172 | return static_branch(&nf_hooks_needed[pf][hook]); | 172 | return static_key_false(&nf_hooks_needed[pf][hook]); |
173 | 173 | ||
174 | return !list_empty(&nf_hooks[pf][hook]); | 174 | return !list_empty(&nf_hooks[pf][hook]); |
175 | } | 175 | } |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index abb2776be1ba..bd9f55a5958d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -129,11 +129,40 @@ enum perf_event_sample_format { | |||
129 | PERF_SAMPLE_PERIOD = 1U << 8, | 129 | PERF_SAMPLE_PERIOD = 1U << 8, |
130 | PERF_SAMPLE_STREAM_ID = 1U << 9, | 130 | PERF_SAMPLE_STREAM_ID = 1U << 9, |
131 | PERF_SAMPLE_RAW = 1U << 10, | 131 | PERF_SAMPLE_RAW = 1U << 10, |
132 | PERF_SAMPLE_BRANCH_STACK = 1U << 11, | ||
132 | 133 | ||
133 | PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ | 134 | PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ |
134 | }; | 135 | }; |
135 | 136 | ||
136 | /* | 137 | /* |
138 | * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set | ||
139 | * | ||
140 | * If the user does not pass priv level information via branch_sample_type, | ||
141 | * the kernel uses the event's priv level. Branch and event priv levels do | ||
142 | * not have to match. Branch priv level is checked for permissions. | ||
143 | * | ||
144 | * The branch types can be combined, however BRANCH_ANY covers all types | ||
145 | * of branches and therefore it supersedes all the other types. | ||
146 | */ | ||
147 | enum perf_branch_sample_type { | ||
148 | PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */ | ||
149 | PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */ | ||
150 | PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */ | ||
151 | |||
152 | PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */ | ||
153 | PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ | ||
154 | PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ | ||
155 | PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ | ||
156 | |||
157 | PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */ | ||
158 | }; | ||
159 | |||
160 | #define PERF_SAMPLE_BRANCH_PLM_ALL \ | ||
161 | (PERF_SAMPLE_BRANCH_USER|\ | ||
162 | PERF_SAMPLE_BRANCH_KERNEL|\ | ||
163 | PERF_SAMPLE_BRANCH_HV) | ||
164 | |||
165 | /* | ||
137 | * The format of the data returned by read() on a perf event fd, | 166 | * The format of the data returned by read() on a perf event fd, |
138 | * as specified by attr.read_format: | 167 | * as specified by attr.read_format: |
139 | * | 168 | * |
@@ -163,6 +192,8 @@ enum perf_event_read_format { | |||
163 | }; | 192 | }; |
164 | 193 | ||
165 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ | 194 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ |
195 | #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ | ||
196 | #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ | ||
166 | 197 | ||
167 | /* | 198 | /* |
168 | * Hardware event_id to monitor via a performance monitoring event: | 199 | * Hardware event_id to monitor via a performance monitoring event: |
@@ -240,6 +271,7 @@ struct perf_event_attr { | |||
240 | __u64 bp_len; | 271 | __u64 bp_len; |
241 | __u64 config2; /* extension of config1 */ | 272 | __u64 config2; /* extension of config1 */ |
242 | }; | 273 | }; |
274 | __u64 branch_sample_type; /* enum branch_sample_type */ | ||
243 | }; | 275 | }; |
244 | 276 | ||
245 | /* | 277 | /* |
@@ -291,12 +323,14 @@ struct perf_event_mmap_page { | |||
291 | __s64 offset; /* add to hardware event value */ | 323 | __s64 offset; /* add to hardware event value */ |
292 | __u64 time_enabled; /* time event active */ | 324 | __u64 time_enabled; /* time event active */ |
293 | __u64 time_running; /* time event on cpu */ | 325 | __u64 time_running; /* time event on cpu */ |
326 | __u32 time_mult, time_shift; | ||
327 | __u64 time_offset; | ||
294 | 328 | ||
295 | /* | 329 | /* |
296 | * Hole for extension of the self monitor capabilities | 330 | * Hole for extension of the self monitor capabilities |
297 | */ | 331 | */ |
298 | 332 | ||
299 | __u64 __reserved[123]; /* align to 1k */ | 333 | __u64 __reserved[121]; /* align to 1k */ |
300 | 334 | ||
301 | /* | 335 | /* |
302 | * Control data for the mmap() data buffer. | 336 | * Control data for the mmap() data buffer. |
@@ -456,6 +490,8 @@ enum perf_event_type { | |||
456 | * | 490 | * |
457 | * { u32 size; | 491 | * { u32 size; |
458 | * char data[size];}&& PERF_SAMPLE_RAW | 492 | * char data[size];}&& PERF_SAMPLE_RAW |
493 | * | ||
494 | * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK | ||
459 | * }; | 495 | * }; |
460 | */ | 496 | */ |
461 | PERF_RECORD_SAMPLE = 9, | 497 | PERF_RECORD_SAMPLE = 9, |
@@ -512,7 +548,7 @@ struct perf_guest_info_callbacks { | |||
512 | #include <linux/ftrace.h> | 548 | #include <linux/ftrace.h> |
513 | #include <linux/cpu.h> | 549 | #include <linux/cpu.h> |
514 | #include <linux/irq_work.h> | 550 | #include <linux/irq_work.h> |
515 | #include <linux/jump_label.h> | 551 | #include <linux/static_key.h> |
516 | #include <linux/atomic.h> | 552 | #include <linux/atomic.h> |
517 | #include <asm/local.h> | 553 | #include <asm/local.h> |
518 | 554 | ||
@@ -528,12 +564,34 @@ struct perf_raw_record { | |||
528 | void *data; | 564 | void *data; |
529 | }; | 565 | }; |
530 | 566 | ||
567 | /* | ||
568 | * single taken branch record layout: | ||
569 | * | ||
570 | * from: source instruction (may not always be a branch insn) | ||
571 | * to: branch target | ||
572 | * mispred: branch target was mispredicted | ||
573 | * predicted: branch target was predicted | ||
574 | * | ||
575 | * support for mispred, predicted is optional. In case it | ||
576 | * is not supported mispred = predicted = 0. | ||
577 | */ | ||
531 | struct perf_branch_entry { | 578 | struct perf_branch_entry { |
532 | __u64 from; | 579 | __u64 from; |
533 | __u64 to; | 580 | __u64 to; |
534 | __u64 flags; | 581 | __u64 mispred:1, /* target mispredicted */ |
582 | predicted:1,/* target predicted */ | ||
583 | reserved:62; | ||
535 | }; | 584 | }; |
536 | 585 | ||
586 | /* | ||
587 | * branch stack layout: | ||
588 | * nr: number of taken branches stored in entries[] | ||
589 | * | ||
590 | * Note that nr can vary from sample to sample | ||
591 | * branches (to, from) are stored from most recent | ||
592 | * to least recent, i.e., entries[0] contains the most | ||
593 | * recent branch. | ||
594 | */ | ||
537 | struct perf_branch_stack { | 595 | struct perf_branch_stack { |
538 | __u64 nr; | 596 | __u64 nr; |
539 | struct perf_branch_entry entries[0]; | 597 | struct perf_branch_entry entries[0]; |
@@ -564,7 +622,9 @@ struct hw_perf_event { | |||
564 | unsigned long event_base; | 622 | unsigned long event_base; |
565 | int idx; | 623 | int idx; |
566 | int last_cpu; | 624 | int last_cpu; |
625 | |||
567 | struct hw_perf_event_extra extra_reg; | 626 | struct hw_perf_event_extra extra_reg; |
627 | struct hw_perf_event_extra branch_reg; | ||
568 | }; | 628 | }; |
569 | struct { /* software */ | 629 | struct { /* software */ |
570 | struct hrtimer hrtimer; | 630 | struct hrtimer hrtimer; |
@@ -616,6 +676,7 @@ struct pmu { | |||
616 | struct list_head entry; | 676 | struct list_head entry; |
617 | 677 | ||
618 | struct device *dev; | 678 | struct device *dev; |
679 | const struct attribute_group **attr_groups; | ||
619 | char *name; | 680 | char *name; |
620 | int type; | 681 | int type; |
621 | 682 | ||
@@ -681,6 +742,17 @@ struct pmu { | |||
681 | * for each successful ->add() during the transaction. | 742 | * for each successful ->add() during the transaction. |
682 | */ | 743 | */ |
683 | void (*cancel_txn) (struct pmu *pmu); /* optional */ | 744 | void (*cancel_txn) (struct pmu *pmu); /* optional */ |
745 | |||
746 | /* | ||
747 | * Will return the value for perf_event_mmap_page::index for this event, | ||
748 | * if no implementation is provided it will default to: event->hw.idx + 1. | ||
749 | */ | ||
750 | int (*event_idx) (struct perf_event *event); /*optional */ | ||
751 | |||
752 | /* | ||
753 | * flush branch stack on context-switches (needed in cpu-wide mode) | ||
754 | */ | ||
755 | void (*flush_branch_stack) (void); | ||
684 | }; | 756 | }; |
685 | 757 | ||
686 | /** | 758 | /** |
@@ -850,6 +922,9 @@ struct perf_event { | |||
850 | #ifdef CONFIG_EVENT_TRACING | 922 | #ifdef CONFIG_EVENT_TRACING |
851 | struct ftrace_event_call *tp_event; | 923 | struct ftrace_event_call *tp_event; |
852 | struct event_filter *filter; | 924 | struct event_filter *filter; |
925 | #ifdef CONFIG_FUNCTION_TRACER | ||
926 | struct ftrace_ops ftrace_ops; | ||
927 | #endif | ||
853 | #endif | 928 | #endif |
854 | 929 | ||
855 | #ifdef CONFIG_CGROUP_PERF | 930 | #ifdef CONFIG_CGROUP_PERF |
@@ -911,7 +986,8 @@ struct perf_event_context { | |||
911 | u64 parent_gen; | 986 | u64 parent_gen; |
912 | u64 generation; | 987 | u64 generation; |
913 | int pin_count; | 988 | int pin_count; |
914 | int nr_cgroups; /* cgroup events present */ | 989 | int nr_cgroups; /* cgroup evts */ |
990 | int nr_branch_stack; /* branch_stack evt */ | ||
915 | struct rcu_head rcu_head; | 991 | struct rcu_head rcu_head; |
916 | }; | 992 | }; |
917 | 993 | ||
@@ -976,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, | |||
976 | extern u64 perf_event_read_value(struct perf_event *event, | 1052 | extern u64 perf_event_read_value(struct perf_event *event, |
977 | u64 *enabled, u64 *running); | 1053 | u64 *enabled, u64 *running); |
978 | 1054 | ||
1055 | |||
979 | struct perf_sample_data { | 1056 | struct perf_sample_data { |
980 | u64 type; | 1057 | u64 type; |
981 | 1058 | ||
@@ -995,12 +1072,14 @@ struct perf_sample_data { | |||
995 | u64 period; | 1072 | u64 period; |
996 | struct perf_callchain_entry *callchain; | 1073 | struct perf_callchain_entry *callchain; |
997 | struct perf_raw_record *raw; | 1074 | struct perf_raw_record *raw; |
1075 | struct perf_branch_stack *br_stack; | ||
998 | }; | 1076 | }; |
999 | 1077 | ||
1000 | static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) | 1078 | static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) |
1001 | { | 1079 | { |
1002 | data->addr = addr; | 1080 | data->addr = addr; |
1003 | data->raw = NULL; | 1081 | data->raw = NULL; |
1082 | data->br_stack = NULL; | ||
1004 | } | 1083 | } |
1005 | 1084 | ||
1006 | extern void perf_output_sample(struct perf_output_handle *handle, | 1085 | extern void perf_output_sample(struct perf_output_handle *handle, |
@@ -1029,7 +1108,7 @@ static inline int is_software_event(struct perf_event *event) | |||
1029 | return event->pmu->task_ctx_nr == perf_sw_context; | 1108 | return event->pmu->task_ctx_nr == perf_sw_context; |
1030 | } | 1109 | } |
1031 | 1110 | ||
1032 | extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 1111 | extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
1033 | 1112 | ||
1034 | extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); | 1113 | extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); |
1035 | 1114 | ||
@@ -1057,7 +1136,7 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) | |||
1057 | { | 1136 | { |
1058 | struct pt_regs hot_regs; | 1137 | struct pt_regs hot_regs; |
1059 | 1138 | ||
1060 | if (static_branch(&perf_swevent_enabled[event_id])) { | 1139 | if (static_key_false(&perf_swevent_enabled[event_id])) { |
1061 | if (!regs) { | 1140 | if (!regs) { |
1062 | perf_fetch_caller_regs(&hot_regs); | 1141 | perf_fetch_caller_regs(&hot_regs); |
1063 | regs = &hot_regs; | 1142 | regs = &hot_regs; |
@@ -1066,12 +1145,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) | |||
1066 | } | 1145 | } |
1067 | } | 1146 | } |
1068 | 1147 | ||
1069 | extern struct jump_label_key_deferred perf_sched_events; | 1148 | extern struct static_key_deferred perf_sched_events; |
1070 | 1149 | ||
1071 | static inline void perf_event_task_sched_in(struct task_struct *prev, | 1150 | static inline void perf_event_task_sched_in(struct task_struct *prev, |
1072 | struct task_struct *task) | 1151 | struct task_struct *task) |
1073 | { | 1152 | { |
1074 | if (static_branch(&perf_sched_events.key)) | 1153 | if (static_key_false(&perf_sched_events.key)) |
1075 | __perf_event_task_sched_in(prev, task); | 1154 | __perf_event_task_sched_in(prev, task); |
1076 | } | 1155 | } |
1077 | 1156 | ||
@@ -1080,7 +1159,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, | |||
1080 | { | 1159 | { |
1081 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); | 1160 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); |
1082 | 1161 | ||
1083 | if (static_branch(&perf_sched_events.key)) | 1162 | if (static_key_false(&perf_sched_events.key)) |
1084 | __perf_event_task_sched_out(prev, next); | 1163 | __perf_event_task_sched_out(prev, next); |
1085 | } | 1164 | } |
1086 | 1165 | ||
@@ -1139,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data); | |||
1139 | # define perf_instruction_pointer(regs) instruction_pointer(regs) | 1218 | # define perf_instruction_pointer(regs) instruction_pointer(regs) |
1140 | #endif | 1219 | #endif |
1141 | 1220 | ||
1221 | static inline bool has_branch_stack(struct perf_event *event) | ||
1222 | { | ||
1223 | return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; | ||
1224 | } | ||
1225 | |||
1142 | extern int perf_output_begin(struct perf_output_handle *handle, | 1226 | extern int perf_output_begin(struct perf_output_handle *handle, |
1143 | struct perf_event *event, unsigned int size); | 1227 | struct perf_event *event, unsigned int size); |
1144 | extern void perf_output_end(struct perf_output_handle *handle); | 1228 | extern void perf_output_end(struct perf_output_handle *handle); |
diff --git a/include/linux/static_key.h b/include/linux/static_key.h new file mode 100644 index 000000000000..27bd3f8a0857 --- /dev/null +++ b/include/linux/static_key.h | |||
@@ -0,0 +1 @@ | |||
#include <linux/jump_label.h> | |||
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index df0a779c1bbd..bd96ecd0e05c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h | |||
@@ -17,7 +17,7 @@ | |||
17 | #include <linux/errno.h> | 17 | #include <linux/errno.h> |
18 | #include <linux/types.h> | 18 | #include <linux/types.h> |
19 | #include <linux/rcupdate.h> | 19 | #include <linux/rcupdate.h> |
20 | #include <linux/jump_label.h> | 20 | #include <linux/static_key.h> |
21 | 21 | ||
22 | struct module; | 22 | struct module; |
23 | struct tracepoint; | 23 | struct tracepoint; |
@@ -29,7 +29,7 @@ struct tracepoint_func { | |||
29 | 29 | ||
30 | struct tracepoint { | 30 | struct tracepoint { |
31 | const char *name; /* Tracepoint name */ | 31 | const char *name; /* Tracepoint name */ |
32 | struct jump_label_key key; | 32 | struct static_key key; |
33 | void (*regfunc)(void); | 33 | void (*regfunc)(void); |
34 | void (*unregfunc)(void); | 34 | void (*unregfunc)(void); |
35 | struct tracepoint_func __rcu *funcs; | 35 | struct tracepoint_func __rcu *funcs; |
@@ -114,7 +114,7 @@ static inline void tracepoint_synchronize_unregister(void) | |||
114 | * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just | 114 | * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just |
115 | * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". | 115 | * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". |
116 | */ | 116 | */ |
117 | #define __DO_TRACE(tp, proto, args, cond) \ | 117 | #define __DO_TRACE(tp, proto, args, cond, prercu, postrcu) \ |
118 | do { \ | 118 | do { \ |
119 | struct tracepoint_func *it_func_ptr; \ | 119 | struct tracepoint_func *it_func_ptr; \ |
120 | void *it_func; \ | 120 | void *it_func; \ |
@@ -122,6 +122,7 @@ static inline void tracepoint_synchronize_unregister(void) | |||
122 | \ | 122 | \ |
123 | if (!(cond)) \ | 123 | if (!(cond)) \ |
124 | return; \ | 124 | return; \ |
125 | prercu; \ | ||
125 | rcu_read_lock_sched_notrace(); \ | 126 | rcu_read_lock_sched_notrace(); \ |
126 | it_func_ptr = rcu_dereference_sched((tp)->funcs); \ | 127 | it_func_ptr = rcu_dereference_sched((tp)->funcs); \ |
127 | if (it_func_ptr) { \ | 128 | if (it_func_ptr) { \ |
@@ -132,6 +133,7 @@ static inline void tracepoint_synchronize_unregister(void) | |||
132 | } while ((++it_func_ptr)->func); \ | 133 | } while ((++it_func_ptr)->func); \ |
133 | } \ | 134 | } \ |
134 | rcu_read_unlock_sched_notrace(); \ | 135 | rcu_read_unlock_sched_notrace(); \ |
136 | postrcu; \ | ||
135 | } while (0) | 137 | } while (0) |
136 | 138 | ||
137 | /* | 139 | /* |
@@ -139,15 +141,25 @@ static inline void tracepoint_synchronize_unregister(void) | |||
139 | * not add unwanted padding between the beginning of the section and the | 141 | * not add unwanted padding between the beginning of the section and the |
140 | * structure. Force alignment to the same alignment as the section start. | 142 | * structure. Force alignment to the same alignment as the section start. |
141 | */ | 143 | */ |
142 | #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ | 144 | #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ |
143 | extern struct tracepoint __tracepoint_##name; \ | 145 | extern struct tracepoint __tracepoint_##name; \ |
144 | static inline void trace_##name(proto) \ | 146 | static inline void trace_##name(proto) \ |
145 | { \ | 147 | { \ |
148 | if (static_key_false(&__tracepoint_##name.key)) \ | ||
149 | __DO_TRACE(&__tracepoint_##name, \ | ||
150 | TP_PROTO(data_proto), \ | ||
151 | TP_ARGS(data_args), \ | ||
152 | TP_CONDITION(cond),,); \ | ||
153 | } \ | ||
154 | static inline void trace_##name##_rcuidle(proto) \ | ||
155 | { \ | ||
146 | if (static_branch(&__tracepoint_##name.key)) \ | 156 | if (static_branch(&__tracepoint_##name.key)) \ |
147 | __DO_TRACE(&__tracepoint_##name, \ | 157 | __DO_TRACE(&__tracepoint_##name, \ |
148 | TP_PROTO(data_proto), \ | 158 | TP_PROTO(data_proto), \ |
149 | TP_ARGS(data_args), \ | 159 | TP_ARGS(data_args), \ |
150 | TP_CONDITION(cond)); \ | 160 | TP_CONDITION(cond), \ |
161 | rcu_idle_exit(), \ | ||
162 | rcu_idle_enter()); \ | ||
151 | } \ | 163 | } \ |
152 | static inline int \ | 164 | static inline int \ |
153 | register_trace_##name(void (*probe)(data_proto), void *data) \ | 165 | register_trace_##name(void (*probe)(data_proto), void *data) \ |
@@ -176,7 +188,7 @@ static inline void tracepoint_synchronize_unregister(void) | |||
176 | __attribute__((section("__tracepoints_strings"))) = #name; \ | 188 | __attribute__((section("__tracepoints_strings"))) = #name; \ |
177 | struct tracepoint __tracepoint_##name \ | 189 | struct tracepoint __tracepoint_##name \ |
178 | __attribute__((section("__tracepoints"))) = \ | 190 | __attribute__((section("__tracepoints"))) = \ |
179 | { __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\ | 191 | { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\ |
180 | static struct tracepoint * const __tracepoint_ptr_##name __used \ | 192 | static struct tracepoint * const __tracepoint_ptr_##name __used \ |
181 | __attribute__((section("__tracepoints_ptrs"))) = \ | 193 | __attribute__((section("__tracepoints_ptrs"))) = \ |
182 | &__tracepoint_##name; | 194 | &__tracepoint_##name; |
@@ -190,9 +202,11 @@ static inline void tracepoint_synchronize_unregister(void) | |||
190 | EXPORT_SYMBOL(__tracepoint_##name) | 202 | EXPORT_SYMBOL(__tracepoint_##name) |
191 | 203 | ||
192 | #else /* !CONFIG_TRACEPOINTS */ | 204 | #else /* !CONFIG_TRACEPOINTS */ |
193 | #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ | 205 | #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ |
194 | static inline void trace_##name(proto) \ | 206 | static inline void trace_##name(proto) \ |
195 | { } \ | 207 | { } \ |
208 | static inline void trace_##name##_rcuidle(proto) \ | ||
209 | { } \ | ||
196 | static inline int \ | 210 | static inline int \ |
197 | register_trace_##name(void (*probe)(data_proto), \ | 211 | register_trace_##name(void (*probe)(data_proto), \ |
198 | void *data) \ | 212 | void *data) \ |
diff --git a/include/net/sock.h b/include/net/sock.h index 91c1c8baf020..dcde2d9268cd 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -55,7 +55,7 @@ | |||
55 | #include <linux/uaccess.h> | 55 | #include <linux/uaccess.h> |
56 | #include <linux/memcontrol.h> | 56 | #include <linux/memcontrol.h> |
57 | #include <linux/res_counter.h> | 57 | #include <linux/res_counter.h> |
58 | #include <linux/jump_label.h> | 58 | #include <linux/static_key.h> |
59 | 59 | ||
60 | #include <linux/filter.h> | 60 | #include <linux/filter.h> |
61 | #include <linux/rculist_nulls.h> | 61 | #include <linux/rculist_nulls.h> |
@@ -924,13 +924,13 @@ inline void sk_refcnt_debug_release(const struct sock *sk) | |||
924 | #endif /* SOCK_REFCNT_DEBUG */ | 924 | #endif /* SOCK_REFCNT_DEBUG */ |
925 | 925 | ||
926 | #if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) | 926 | #if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) |
927 | extern struct jump_label_key memcg_socket_limit_enabled; | 927 | extern struct static_key memcg_socket_limit_enabled; |
928 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, | 928 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, |
929 | struct cg_proto *cg_proto) | 929 | struct cg_proto *cg_proto) |
930 | { | 930 | { |
931 | return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); | 931 | return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); |
932 | } | 932 | } |
933 | #define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled) | 933 | #define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled) |
934 | #else | 934 | #else |
935 | #define mem_cgroup_sockets_enabled 0 | 935 | #define mem_cgroup_sockets_enabled 0 |
936 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, | 936 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, |
diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 1bcc2a8c00e2..14b38940062b 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h | |||
@@ -151,6 +151,8 @@ enum { | |||
151 | events get removed */ | 151 | events get removed */ |
152 | static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {}; | 152 | static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {}; |
153 | static inline void trace_power_end(u64 cpuid) {}; | 153 | static inline void trace_power_end(u64 cpuid) {}; |
154 | static inline void trace_power_start_rcuidle(u64 type, u64 state, u64 cpuid) {}; | ||
155 | static inline void trace_power_end_rcuidle(u64 cpuid) {}; | ||
154 | static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {}; | 156 | static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {}; |
155 | #endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */ | 157 | #endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */ |
156 | 158 | ||
diff --git a/include/trace/events/printk.h b/include/trace/events/printk.h new file mode 100644 index 000000000000..94ec79cc011a --- /dev/null +++ b/include/trace/events/printk.h | |||
@@ -0,0 +1,41 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM printk | ||
3 | |||
4 | #if !defined(_TRACE_PRINTK_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define _TRACE_PRINTK_H | ||
6 | |||
7 | #include <linux/tracepoint.h> | ||
8 | |||
9 | TRACE_EVENT_CONDITION(console, | ||
10 | TP_PROTO(const char *log_buf, unsigned start, unsigned end, | ||
11 | unsigned log_buf_len), | ||
12 | |||
13 | TP_ARGS(log_buf, start, end, log_buf_len), | ||
14 | |||
15 | TP_CONDITION(start != end), | ||
16 | |||
17 | TP_STRUCT__entry( | ||
18 | __dynamic_array(char, msg, end - start + 1) | ||
19 | ), | ||
20 | |||
21 | TP_fast_assign( | ||
22 | if ((start & (log_buf_len - 1)) > (end & (log_buf_len - 1))) { | ||
23 | memcpy(__get_dynamic_array(msg), | ||
24 | log_buf + (start & (log_buf_len - 1)), | ||
25 | log_buf_len - (start & (log_buf_len - 1))); | ||
26 | memcpy((char *)__get_dynamic_array(msg) + | ||
27 | log_buf_len - (start & (log_buf_len - 1)), | ||
28 | log_buf, end & (log_buf_len - 1)); | ||
29 | } else | ||
30 | memcpy(__get_dynamic_array(msg), | ||
31 | log_buf + (start & (log_buf_len - 1)), | ||
32 | end - start); | ||
33 | ((char *)__get_dynamic_array(msg))[end - start] = 0; | ||
34 | ), | ||
35 | |||
36 | TP_printk("%s", __get_str(msg)) | ||
37 | ); | ||
38 | #endif /* _TRACE_PRINTK_H */ | ||
39 | |||
40 | /* This part must be outside protection */ | ||
41 | #include <trace/define_trace.h> | ||
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index e33ed1bfa113..fbc7b1ad929b 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h | |||
@@ -6,6 +6,7 @@ | |||
6 | 6 | ||
7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
8 | #include <linux/tracepoint.h> | 8 | #include <linux/tracepoint.h> |
9 | #include <linux/binfmts.h> | ||
9 | 10 | ||
10 | /* | 11 | /* |
11 | * Tracepoint for calling kthread_stop, performed to end a kthread: | 12 | * Tracepoint for calling kthread_stop, performed to end a kthread: |
@@ -276,6 +277,32 @@ TRACE_EVENT(sched_process_fork, | |||
276 | ); | 277 | ); |
277 | 278 | ||
278 | /* | 279 | /* |
280 | * Tracepoint for exec: | ||
281 | */ | ||
282 | TRACE_EVENT(sched_process_exec, | ||
283 | |||
284 | TP_PROTO(struct task_struct *p, pid_t old_pid, | ||
285 | struct linux_binprm *bprm), | ||
286 | |||
287 | TP_ARGS(p, old_pid, bprm), | ||
288 | |||
289 | TP_STRUCT__entry( | ||
290 | __string( filename, bprm->filename ) | ||
291 | __field( pid_t, pid ) | ||
292 | __field( pid_t, old_pid ) | ||
293 | ), | ||
294 | |||
295 | TP_fast_assign( | ||
296 | __assign_str(filename, bprm->filename); | ||
297 | __entry->pid = p->pid; | ||
298 | __entry->old_pid = p->pid; | ||
299 | ), | ||
300 | |||
301 | TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), | ||
302 | __entry->pid, __entry->old_pid) | ||
303 | ); | ||
304 | |||
305 | /* | ||
279 | * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE | 306 | * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE |
280 | * adding sched_stat support to SCHED_FIFO/RR would be welcome. | 307 | * adding sched_stat support to SCHED_FIFO/RR would be welcome. |
281 | */ | 308 | */ |
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index 17df43464df0..39a8a430d90f 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h | |||
@@ -23,11 +23,23 @@ | |||
23 | } \ | 23 | } \ |
24 | } while (0) | 24 | } while (0) |
25 | 25 | ||
26 | #ifndef TRACE_HEADER_MULTI_READ | ||
27 | enum { | ||
28 | TRACE_SIGNAL_DELIVERED, | ||
29 | TRACE_SIGNAL_IGNORED, | ||
30 | TRACE_SIGNAL_ALREADY_PENDING, | ||
31 | TRACE_SIGNAL_OVERFLOW_FAIL, | ||
32 | TRACE_SIGNAL_LOSE_INFO, | ||
33 | }; | ||
34 | #endif | ||
35 | |||
26 | /** | 36 | /** |
27 | * signal_generate - called when a signal is generated | 37 | * signal_generate - called when a signal is generated |
28 | * @sig: signal number | 38 | * @sig: signal number |
29 | * @info: pointer to struct siginfo | 39 | * @info: pointer to struct siginfo |
30 | * @task: pointer to struct task_struct | 40 | * @task: pointer to struct task_struct |
41 | * @group: shared or private | ||
42 | * @result: TRACE_SIGNAL_* | ||
31 | * | 43 | * |
32 | * Current process sends a 'sig' signal to 'task' process with | 44 | * Current process sends a 'sig' signal to 'task' process with |
33 | * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, | 45 | * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, |
@@ -37,9 +49,10 @@ | |||
37 | */ | 49 | */ |
38 | TRACE_EVENT(signal_generate, | 50 | TRACE_EVENT(signal_generate, |
39 | 51 | ||
40 | TP_PROTO(int sig, struct siginfo *info, struct task_struct *task), | 52 | TP_PROTO(int sig, struct siginfo *info, struct task_struct *task, |
53 | int group, int result), | ||
41 | 54 | ||
42 | TP_ARGS(sig, info, task), | 55 | TP_ARGS(sig, info, task, group, result), |
43 | 56 | ||
44 | TP_STRUCT__entry( | 57 | TP_STRUCT__entry( |
45 | __field( int, sig ) | 58 | __field( int, sig ) |
@@ -47,6 +60,8 @@ TRACE_EVENT(signal_generate, | |||
47 | __field( int, code ) | 60 | __field( int, code ) |
48 | __array( char, comm, TASK_COMM_LEN ) | 61 | __array( char, comm, TASK_COMM_LEN ) |
49 | __field( pid_t, pid ) | 62 | __field( pid_t, pid ) |
63 | __field( int, group ) | ||
64 | __field( int, result ) | ||
50 | ), | 65 | ), |
51 | 66 | ||
52 | TP_fast_assign( | 67 | TP_fast_assign( |
@@ -54,11 +69,14 @@ TRACE_EVENT(signal_generate, | |||
54 | TP_STORE_SIGINFO(__entry, info); | 69 | TP_STORE_SIGINFO(__entry, info); |
55 | memcpy(__entry->comm, task->comm, TASK_COMM_LEN); | 70 | memcpy(__entry->comm, task->comm, TASK_COMM_LEN); |
56 | __entry->pid = task->pid; | 71 | __entry->pid = task->pid; |
72 | __entry->group = group; | ||
73 | __entry->result = result; | ||
57 | ), | 74 | ), |
58 | 75 | ||
59 | TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d", | 76 | TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d", |
60 | __entry->sig, __entry->errno, __entry->code, | 77 | __entry->sig, __entry->errno, __entry->code, |
61 | __entry->comm, __entry->pid) | 78 | __entry->comm, __entry->pid, __entry->group, |
79 | __entry->result) | ||
62 | ); | 80 | ); |
63 | 81 | ||
64 | /** | 82 | /** |
@@ -101,65 +119,6 @@ TRACE_EVENT(signal_deliver, | |||
101 | __entry->sa_handler, __entry->sa_flags) | 119 | __entry->sa_handler, __entry->sa_flags) |
102 | ); | 120 | ); |
103 | 121 | ||
104 | DECLARE_EVENT_CLASS(signal_queue_overflow, | ||
105 | |||
106 | TP_PROTO(int sig, int group, struct siginfo *info), | ||
107 | |||
108 | TP_ARGS(sig, group, info), | ||
109 | |||
110 | TP_STRUCT__entry( | ||
111 | __field( int, sig ) | ||
112 | __field( int, group ) | ||
113 | __field( int, errno ) | ||
114 | __field( int, code ) | ||
115 | ), | ||
116 | |||
117 | TP_fast_assign( | ||
118 | __entry->sig = sig; | ||
119 | __entry->group = group; | ||
120 | TP_STORE_SIGINFO(__entry, info); | ||
121 | ), | ||
122 | |||
123 | TP_printk("sig=%d group=%d errno=%d code=%d", | ||
124 | __entry->sig, __entry->group, __entry->errno, __entry->code) | ||
125 | ); | ||
126 | |||
127 | /** | ||
128 | * signal_overflow_fail - called when signal queue is overflow | ||
129 | * @sig: signal number | ||
130 | * @group: signal to process group or not (bool) | ||
131 | * @info: pointer to struct siginfo | ||
132 | * | ||
133 | * Kernel fails to generate 'sig' signal with 'info' siginfo, because | ||
134 | * siginfo queue is overflow, and the signal is dropped. | ||
135 | * 'group' is not 0 if the signal will be sent to a process group. | ||
136 | * 'sig' is always one of RT signals. | ||
137 | */ | ||
138 | DEFINE_EVENT(signal_queue_overflow, signal_overflow_fail, | ||
139 | |||
140 | TP_PROTO(int sig, int group, struct siginfo *info), | ||
141 | |||
142 | TP_ARGS(sig, group, info) | ||
143 | ); | ||
144 | |||
145 | /** | ||
146 | * signal_lose_info - called when siginfo is lost | ||
147 | * @sig: signal number | ||
148 | * @group: signal to process group or not (bool) | ||
149 | * @info: pointer to struct siginfo | ||
150 | * | ||
151 | * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo | ||
152 | * queue is overflow. | ||
153 | * 'group' is not 0 if the signal will be sent to a process group. | ||
154 | * 'sig' is always one of non-RT signals. | ||
155 | */ | ||
156 | DEFINE_EVENT(signal_queue_overflow, signal_lose_info, | ||
157 | |||
158 | TP_PROTO(int sig, int group, struct siginfo *info), | ||
159 | |||
160 | TP_ARGS(sig, group, info) | ||
161 | ); | ||
162 | |||
163 | #endif /* _TRACE_SIGNAL_H */ | 122 | #endif /* _TRACE_SIGNAL_H */ |
164 | 123 | ||
165 | /* This part must be outside protection */ | 124 | /* This part must be outside protection */ |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 1b5c081d8b9f..c61234b1a988 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info) | |||
118 | PERF_FLAG_FD_OUTPUT |\ | 118 | PERF_FLAG_FD_OUTPUT |\ |
119 | PERF_FLAG_PID_CGROUP) | 119 | PERF_FLAG_PID_CGROUP) |
120 | 120 | ||
121 | /* | ||
122 | * branch priv levels that need permission checks | ||
123 | */ | ||
124 | #define PERF_SAMPLE_BRANCH_PERM_PLM \ | ||
125 | (PERF_SAMPLE_BRANCH_KERNEL |\ | ||
126 | PERF_SAMPLE_BRANCH_HV) | ||
127 | |||
121 | enum event_type_t { | 128 | enum event_type_t { |
122 | EVENT_FLEXIBLE = 0x1, | 129 | EVENT_FLEXIBLE = 0x1, |
123 | EVENT_PINNED = 0x2, | 130 | EVENT_PINNED = 0x2, |
@@ -128,8 +135,9 @@ enum event_type_t { | |||
128 | * perf_sched_events : >0 events exist | 135 | * perf_sched_events : >0 events exist |
129 | * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu | 136 | * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu |
130 | */ | 137 | */ |
131 | struct jump_label_key_deferred perf_sched_events __read_mostly; | 138 | struct static_key_deferred perf_sched_events __read_mostly; |
132 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); | 139 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); |
140 | static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); | ||
133 | 141 | ||
134 | static atomic_t nr_mmap_events __read_mostly; | 142 | static atomic_t nr_mmap_events __read_mostly; |
135 | static atomic_t nr_comm_events __read_mostly; | 143 | static atomic_t nr_comm_events __read_mostly; |
@@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
881 | if (is_cgroup_event(event)) | 889 | if (is_cgroup_event(event)) |
882 | ctx->nr_cgroups++; | 890 | ctx->nr_cgroups++; |
883 | 891 | ||
892 | if (has_branch_stack(event)) | ||
893 | ctx->nr_branch_stack++; | ||
894 | |||
884 | list_add_rcu(&event->event_entry, &ctx->event_list); | 895 | list_add_rcu(&event->event_entry, &ctx->event_list); |
885 | if (!ctx->nr_events) | 896 | if (!ctx->nr_events) |
886 | perf_pmu_rotate_start(ctx->pmu); | 897 | perf_pmu_rotate_start(ctx->pmu); |
@@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
1020 | cpuctx->cgrp = NULL; | 1031 | cpuctx->cgrp = NULL; |
1021 | } | 1032 | } |
1022 | 1033 | ||
1034 | if (has_branch_stack(event)) | ||
1035 | ctx->nr_branch_stack--; | ||
1036 | |||
1023 | ctx->nr_events--; | 1037 | ctx->nr_events--; |
1024 | if (event->attr.inherit_stat) | 1038 | if (event->attr.inherit_stat) |
1025 | ctx->nr_stat--; | 1039 | ctx->nr_stat--; |
@@ -2195,6 +2209,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
2195 | } | 2209 | } |
2196 | 2210 | ||
2197 | /* | 2211 | /* |
2212 | * When sampling the branck stack in system-wide, it may be necessary | ||
2213 | * to flush the stack on context switch. This happens when the branch | ||
2214 | * stack does not tag its entries with the pid of the current task. | ||
2215 | * Otherwise it becomes impossible to associate a branch entry with a | ||
2216 | * task. This ambiguity is more likely to appear when the branch stack | ||
2217 | * supports priv level filtering and the user sets it to monitor only | ||
2218 | * at the user level (which could be a useful measurement in system-wide | ||
2219 | * mode). In that case, the risk is high of having a branch stack with | ||
2220 | * branch from multiple tasks. Flushing may mean dropping the existing | ||
2221 | * entries or stashing them somewhere in the PMU specific code layer. | ||
2222 | * | ||
2223 | * This function provides the context switch callback to the lower code | ||
2224 | * layer. It is invoked ONLY when there is at least one system-wide context | ||
2225 | * with at least one active event using taken branch sampling. | ||
2226 | */ | ||
2227 | static void perf_branch_stack_sched_in(struct task_struct *prev, | ||
2228 | struct task_struct *task) | ||
2229 | { | ||
2230 | struct perf_cpu_context *cpuctx; | ||
2231 | struct pmu *pmu; | ||
2232 | unsigned long flags; | ||
2233 | |||
2234 | /* no need to flush branch stack if not changing task */ | ||
2235 | if (prev == task) | ||
2236 | return; | ||
2237 | |||
2238 | local_irq_save(flags); | ||
2239 | |||
2240 | rcu_read_lock(); | ||
2241 | |||
2242 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
2243 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | ||
2244 | |||
2245 | /* | ||
2246 | * check if the context has at least one | ||
2247 | * event using PERF_SAMPLE_BRANCH_STACK | ||
2248 | */ | ||
2249 | if (cpuctx->ctx.nr_branch_stack > 0 | ||
2250 | && pmu->flush_branch_stack) { | ||
2251 | |||
2252 | pmu = cpuctx->ctx.pmu; | ||
2253 | |||
2254 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
2255 | |||
2256 | perf_pmu_disable(pmu); | ||
2257 | |||
2258 | pmu->flush_branch_stack(); | ||
2259 | |||
2260 | perf_pmu_enable(pmu); | ||
2261 | |||
2262 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
2263 | } | ||
2264 | } | ||
2265 | |||
2266 | rcu_read_unlock(); | ||
2267 | |||
2268 | local_irq_restore(flags); | ||
2269 | } | ||
2270 | |||
2271 | /* | ||
2198 | * Called from scheduler to add the events of the current task | 2272 | * Called from scheduler to add the events of the current task |
2199 | * with interrupts disabled. | 2273 | * with interrupts disabled. |
2200 | * | 2274 | * |
@@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev, | |||
2225 | */ | 2299 | */ |
2226 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) | 2300 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) |
2227 | perf_cgroup_sched_in(prev, task); | 2301 | perf_cgroup_sched_in(prev, task); |
2302 | |||
2303 | /* check for system-wide branch_stack events */ | ||
2304 | if (atomic_read(&__get_cpu_var(perf_branch_stack_events))) | ||
2305 | perf_branch_stack_sched_in(prev, task); | ||
2228 | } | 2306 | } |
2229 | 2307 | ||
2230 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | 2308 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) |
@@ -2778,7 +2856,7 @@ static void free_event(struct perf_event *event) | |||
2778 | 2856 | ||
2779 | if (!event->parent) { | 2857 | if (!event->parent) { |
2780 | if (event->attach_state & PERF_ATTACH_TASK) | 2858 | if (event->attach_state & PERF_ATTACH_TASK) |
2781 | jump_label_dec_deferred(&perf_sched_events); | 2859 | static_key_slow_dec_deferred(&perf_sched_events); |
2782 | if (event->attr.mmap || event->attr.mmap_data) | 2860 | if (event->attr.mmap || event->attr.mmap_data) |
2783 | atomic_dec(&nr_mmap_events); | 2861 | atomic_dec(&nr_mmap_events); |
2784 | if (event->attr.comm) | 2862 | if (event->attr.comm) |
@@ -2789,7 +2867,15 @@ static void free_event(struct perf_event *event) | |||
2789 | put_callchain_buffers(); | 2867 | put_callchain_buffers(); |
2790 | if (is_cgroup_event(event)) { | 2868 | if (is_cgroup_event(event)) { |
2791 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); | 2869 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); |
2792 | jump_label_dec_deferred(&perf_sched_events); | 2870 | static_key_slow_dec_deferred(&perf_sched_events); |
2871 | } | ||
2872 | |||
2873 | if (has_branch_stack(event)) { | ||
2874 | static_key_slow_dec_deferred(&perf_sched_events); | ||
2875 | /* is system-wide event */ | ||
2876 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
2877 | atomic_dec(&per_cpu(perf_branch_stack_events, | ||
2878 | event->cpu)); | ||
2793 | } | 2879 | } |
2794 | } | 2880 | } |
2795 | 2881 | ||
@@ -3238,10 +3324,6 @@ int perf_event_task_disable(void) | |||
3238 | return 0; | 3324 | return 0; |
3239 | } | 3325 | } |
3240 | 3326 | ||
3241 | #ifndef PERF_EVENT_INDEX_OFFSET | ||
3242 | # define PERF_EVENT_INDEX_OFFSET 0 | ||
3243 | #endif | ||
3244 | |||
3245 | static int perf_event_index(struct perf_event *event) | 3327 | static int perf_event_index(struct perf_event *event) |
3246 | { | 3328 | { |
3247 | if (event->hw.state & PERF_HES_STOPPED) | 3329 | if (event->hw.state & PERF_HES_STOPPED) |
@@ -3250,21 +3332,26 @@ static int perf_event_index(struct perf_event *event) | |||
3250 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 3332 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
3251 | return 0; | 3333 | return 0; |
3252 | 3334 | ||
3253 | return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; | 3335 | return event->pmu->event_idx(event); |
3254 | } | 3336 | } |
3255 | 3337 | ||
3256 | static void calc_timer_values(struct perf_event *event, | 3338 | static void calc_timer_values(struct perf_event *event, |
3339 | u64 *now, | ||
3257 | u64 *enabled, | 3340 | u64 *enabled, |
3258 | u64 *running) | 3341 | u64 *running) |
3259 | { | 3342 | { |
3260 | u64 now, ctx_time; | 3343 | u64 ctx_time; |
3261 | 3344 | ||
3262 | now = perf_clock(); | 3345 | *now = perf_clock(); |
3263 | ctx_time = event->shadow_ctx_time + now; | 3346 | ctx_time = event->shadow_ctx_time + *now; |
3264 | *enabled = ctx_time - event->tstamp_enabled; | 3347 | *enabled = ctx_time - event->tstamp_enabled; |
3265 | *running = ctx_time - event->tstamp_running; | 3348 | *running = ctx_time - event->tstamp_running; |
3266 | } | 3349 | } |
3267 | 3350 | ||
3351 | void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | ||
3352 | { | ||
3353 | } | ||
3354 | |||
3268 | /* | 3355 | /* |
3269 | * Callers need to ensure there can be no nesting of this function, otherwise | 3356 | * Callers need to ensure there can be no nesting of this function, otherwise |
3270 | * the seqlock logic goes bad. We can not serialize this because the arch | 3357 | * the seqlock logic goes bad. We can not serialize this because the arch |
@@ -3274,7 +3361,7 @@ void perf_event_update_userpage(struct perf_event *event) | |||
3274 | { | 3361 | { |
3275 | struct perf_event_mmap_page *userpg; | 3362 | struct perf_event_mmap_page *userpg; |
3276 | struct ring_buffer *rb; | 3363 | struct ring_buffer *rb; |
3277 | u64 enabled, running; | 3364 | u64 enabled, running, now; |
3278 | 3365 | ||
3279 | rcu_read_lock(); | 3366 | rcu_read_lock(); |
3280 | /* | 3367 | /* |
@@ -3286,7 +3373,7 @@ void perf_event_update_userpage(struct perf_event *event) | |||
3286 | * because of locking issue as we can be called in | 3373 | * because of locking issue as we can be called in |
3287 | * NMI context | 3374 | * NMI context |
3288 | */ | 3375 | */ |
3289 | calc_timer_values(event, &enabled, &running); | 3376 | calc_timer_values(event, &now, &enabled, &running); |
3290 | rb = rcu_dereference(event->rb); | 3377 | rb = rcu_dereference(event->rb); |
3291 | if (!rb) | 3378 | if (!rb) |
3292 | goto unlock; | 3379 | goto unlock; |
@@ -3302,7 +3389,7 @@ void perf_event_update_userpage(struct perf_event *event) | |||
3302 | barrier(); | 3389 | barrier(); |
3303 | userpg->index = perf_event_index(event); | 3390 | userpg->index = perf_event_index(event); |
3304 | userpg->offset = perf_event_count(event); | 3391 | userpg->offset = perf_event_count(event); |
3305 | if (event->state == PERF_EVENT_STATE_ACTIVE) | 3392 | if (userpg->index) |
3306 | userpg->offset -= local64_read(&event->hw.prev_count); | 3393 | userpg->offset -= local64_read(&event->hw.prev_count); |
3307 | 3394 | ||
3308 | userpg->time_enabled = enabled + | 3395 | userpg->time_enabled = enabled + |
@@ -3311,6 +3398,8 @@ void perf_event_update_userpage(struct perf_event *event) | |||
3311 | userpg->time_running = running + | 3398 | userpg->time_running = running + |
3312 | atomic64_read(&event->child_total_time_running); | 3399 | atomic64_read(&event->child_total_time_running); |
3313 | 3400 | ||
3401 | perf_update_user_clock(userpg, now); | ||
3402 | |||
3314 | barrier(); | 3403 | barrier(); |
3315 | ++userpg->lock; | 3404 | ++userpg->lock; |
3316 | preempt_enable(); | 3405 | preempt_enable(); |
@@ -3568,6 +3657,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3568 | event->mmap_user = get_current_user(); | 3657 | event->mmap_user = get_current_user(); |
3569 | vma->vm_mm->pinned_vm += event->mmap_locked; | 3658 | vma->vm_mm->pinned_vm += event->mmap_locked; |
3570 | 3659 | ||
3660 | perf_event_update_userpage(event); | ||
3661 | |||
3571 | unlock: | 3662 | unlock: |
3572 | if (!ret) | 3663 | if (!ret) |
3573 | atomic_inc(&event->mmap_count); | 3664 | atomic_inc(&event->mmap_count); |
@@ -3799,7 +3890,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
3799 | static void perf_output_read(struct perf_output_handle *handle, | 3890 | static void perf_output_read(struct perf_output_handle *handle, |
3800 | struct perf_event *event) | 3891 | struct perf_event *event) |
3801 | { | 3892 | { |
3802 | u64 enabled = 0, running = 0; | 3893 | u64 enabled = 0, running = 0, now; |
3803 | u64 read_format = event->attr.read_format; | 3894 | u64 read_format = event->attr.read_format; |
3804 | 3895 | ||
3805 | /* | 3896 | /* |
@@ -3812,7 +3903,7 @@ static void perf_output_read(struct perf_output_handle *handle, | |||
3812 | * NMI context | 3903 | * NMI context |
3813 | */ | 3904 | */ |
3814 | if (read_format & PERF_FORMAT_TOTAL_TIMES) | 3905 | if (read_format & PERF_FORMAT_TOTAL_TIMES) |
3815 | calc_timer_values(event, &enabled, &running); | 3906 | calc_timer_values(event, &now, &enabled, &running); |
3816 | 3907 | ||
3817 | if (event->attr.read_format & PERF_FORMAT_GROUP) | 3908 | if (event->attr.read_format & PERF_FORMAT_GROUP) |
3818 | perf_output_read_group(handle, event, enabled, running); | 3909 | perf_output_read_group(handle, event, enabled, running); |
@@ -3902,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
3902 | } | 3993 | } |
3903 | } | 3994 | } |
3904 | } | 3995 | } |
3996 | |||
3997 | if (sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
3998 | if (data->br_stack) { | ||
3999 | size_t size; | ||
4000 | |||
4001 | size = data->br_stack->nr | ||
4002 | * sizeof(struct perf_branch_entry); | ||
4003 | |||
4004 | perf_output_put(handle, data->br_stack->nr); | ||
4005 | perf_output_copy(handle, data->br_stack->entries, size); | ||
4006 | } else { | ||
4007 | /* | ||
4008 | * we always store at least the value of nr | ||
4009 | */ | ||
4010 | u64 nr = 0; | ||
4011 | perf_output_put(handle, nr); | ||
4012 | } | ||
4013 | } | ||
3905 | } | 4014 | } |
3906 | 4015 | ||
3907 | void perf_prepare_sample(struct perf_event_header *header, | 4016 | void perf_prepare_sample(struct perf_event_header *header, |
@@ -3944,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
3944 | WARN_ON_ONCE(size & (sizeof(u64)-1)); | 4053 | WARN_ON_ONCE(size & (sizeof(u64)-1)); |
3945 | header->size += size; | 4054 | header->size += size; |
3946 | } | 4055 | } |
4056 | |||
4057 | if (sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
4058 | int size = sizeof(u64); /* nr */ | ||
4059 | if (data->br_stack) { | ||
4060 | size += data->br_stack->nr | ||
4061 | * sizeof(struct perf_branch_entry); | ||
4062 | } | ||
4063 | header->size += size; | ||
4064 | } | ||
3947 | } | 4065 | } |
3948 | 4066 | ||
3949 | static void perf_event_output(struct perf_event *event, | 4067 | static void perf_event_output(struct perf_event *event, |
@@ -4986,7 +5104,7 @@ fail: | |||
4986 | return err; | 5104 | return err; |
4987 | } | 5105 | } |
4988 | 5106 | ||
4989 | struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 5107 | struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
4990 | 5108 | ||
4991 | static void sw_perf_event_destroy(struct perf_event *event) | 5109 | static void sw_perf_event_destroy(struct perf_event *event) |
4992 | { | 5110 | { |
@@ -4994,7 +5112,7 @@ static void sw_perf_event_destroy(struct perf_event *event) | |||
4994 | 5112 | ||
4995 | WARN_ON(event->parent); | 5113 | WARN_ON(event->parent); |
4996 | 5114 | ||
4997 | jump_label_dec(&perf_swevent_enabled[event_id]); | 5115 | static_key_slow_dec(&perf_swevent_enabled[event_id]); |
4998 | swevent_hlist_put(event); | 5116 | swevent_hlist_put(event); |
4999 | } | 5117 | } |
5000 | 5118 | ||
@@ -5005,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event) | |||
5005 | if (event->attr.type != PERF_TYPE_SOFTWARE) | 5123 | if (event->attr.type != PERF_TYPE_SOFTWARE) |
5006 | return -ENOENT; | 5124 | return -ENOENT; |
5007 | 5125 | ||
5126 | /* | ||
5127 | * no branch sampling for software events | ||
5128 | */ | ||
5129 | if (has_branch_stack(event)) | ||
5130 | return -EOPNOTSUPP; | ||
5131 | |||
5008 | switch (event_id) { | 5132 | switch (event_id) { |
5009 | case PERF_COUNT_SW_CPU_CLOCK: | 5133 | case PERF_COUNT_SW_CPU_CLOCK: |
5010 | case PERF_COUNT_SW_TASK_CLOCK: | 5134 | case PERF_COUNT_SW_TASK_CLOCK: |
@@ -5024,13 +5148,18 @@ static int perf_swevent_init(struct perf_event *event) | |||
5024 | if (err) | 5148 | if (err) |
5025 | return err; | 5149 | return err; |
5026 | 5150 | ||
5027 | jump_label_inc(&perf_swevent_enabled[event_id]); | 5151 | static_key_slow_inc(&perf_swevent_enabled[event_id]); |
5028 | event->destroy = sw_perf_event_destroy; | 5152 | event->destroy = sw_perf_event_destroy; |
5029 | } | 5153 | } |
5030 | 5154 | ||
5031 | return 0; | 5155 | return 0; |
5032 | } | 5156 | } |
5033 | 5157 | ||
5158 | static int perf_swevent_event_idx(struct perf_event *event) | ||
5159 | { | ||
5160 | return 0; | ||
5161 | } | ||
5162 | |||
5034 | static struct pmu perf_swevent = { | 5163 | static struct pmu perf_swevent = { |
5035 | .task_ctx_nr = perf_sw_context, | 5164 | .task_ctx_nr = perf_sw_context, |
5036 | 5165 | ||
@@ -5040,6 +5169,8 @@ static struct pmu perf_swevent = { | |||
5040 | .start = perf_swevent_start, | 5169 | .start = perf_swevent_start, |
5041 | .stop = perf_swevent_stop, | 5170 | .stop = perf_swevent_stop, |
5042 | .read = perf_swevent_read, | 5171 | .read = perf_swevent_read, |
5172 | |||
5173 | .event_idx = perf_swevent_event_idx, | ||
5043 | }; | 5174 | }; |
5044 | 5175 | ||
5045 | #ifdef CONFIG_EVENT_TRACING | 5176 | #ifdef CONFIG_EVENT_TRACING |
@@ -5108,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event) | |||
5108 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | 5239 | if (event->attr.type != PERF_TYPE_TRACEPOINT) |
5109 | return -ENOENT; | 5240 | return -ENOENT; |
5110 | 5241 | ||
5242 | /* | ||
5243 | * no branch sampling for tracepoint events | ||
5244 | */ | ||
5245 | if (has_branch_stack(event)) | ||
5246 | return -EOPNOTSUPP; | ||
5247 | |||
5111 | err = perf_trace_init(event); | 5248 | err = perf_trace_init(event); |
5112 | if (err) | 5249 | if (err) |
5113 | return err; | 5250 | return err; |
@@ -5126,6 +5263,8 @@ static struct pmu perf_tracepoint = { | |||
5126 | .start = perf_swevent_start, | 5263 | .start = perf_swevent_start, |
5127 | .stop = perf_swevent_stop, | 5264 | .stop = perf_swevent_stop, |
5128 | .read = perf_swevent_read, | 5265 | .read = perf_swevent_read, |
5266 | |||
5267 | .event_idx = perf_swevent_event_idx, | ||
5129 | }; | 5268 | }; |
5130 | 5269 | ||
5131 | static inline void perf_tp_register(void) | 5270 | static inline void perf_tp_register(void) |
@@ -5331,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event) | |||
5331 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) | 5470 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) |
5332 | return -ENOENT; | 5471 | return -ENOENT; |
5333 | 5472 | ||
5473 | /* | ||
5474 | * no branch sampling for software events | ||
5475 | */ | ||
5476 | if (has_branch_stack(event)) | ||
5477 | return -EOPNOTSUPP; | ||
5478 | |||
5334 | perf_swevent_init_hrtimer(event); | 5479 | perf_swevent_init_hrtimer(event); |
5335 | 5480 | ||
5336 | return 0; | 5481 | return 0; |
@@ -5345,6 +5490,8 @@ static struct pmu perf_cpu_clock = { | |||
5345 | .start = cpu_clock_event_start, | 5490 | .start = cpu_clock_event_start, |
5346 | .stop = cpu_clock_event_stop, | 5491 | .stop = cpu_clock_event_stop, |
5347 | .read = cpu_clock_event_read, | 5492 | .read = cpu_clock_event_read, |
5493 | |||
5494 | .event_idx = perf_swevent_event_idx, | ||
5348 | }; | 5495 | }; |
5349 | 5496 | ||
5350 | /* | 5497 | /* |
@@ -5403,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event) | |||
5403 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) | 5550 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) |
5404 | return -ENOENT; | 5551 | return -ENOENT; |
5405 | 5552 | ||
5553 | /* | ||
5554 | * no branch sampling for software events | ||
5555 | */ | ||
5556 | if (has_branch_stack(event)) | ||
5557 | return -EOPNOTSUPP; | ||
5558 | |||
5406 | perf_swevent_init_hrtimer(event); | 5559 | perf_swevent_init_hrtimer(event); |
5407 | 5560 | ||
5408 | return 0; | 5561 | return 0; |
@@ -5417,6 +5570,8 @@ static struct pmu perf_task_clock = { | |||
5417 | .start = task_clock_event_start, | 5570 | .start = task_clock_event_start, |
5418 | .stop = task_clock_event_stop, | 5571 | .stop = task_clock_event_stop, |
5419 | .read = task_clock_event_read, | 5572 | .read = task_clock_event_read, |
5573 | |||
5574 | .event_idx = perf_swevent_event_idx, | ||
5420 | }; | 5575 | }; |
5421 | 5576 | ||
5422 | static void perf_pmu_nop_void(struct pmu *pmu) | 5577 | static void perf_pmu_nop_void(struct pmu *pmu) |
@@ -5444,6 +5599,11 @@ static void perf_pmu_cancel_txn(struct pmu *pmu) | |||
5444 | perf_pmu_enable(pmu); | 5599 | perf_pmu_enable(pmu); |
5445 | } | 5600 | } |
5446 | 5601 | ||
5602 | static int perf_event_idx_default(struct perf_event *event) | ||
5603 | { | ||
5604 | return event->hw.idx + 1; | ||
5605 | } | ||
5606 | |||
5447 | /* | 5607 | /* |
5448 | * Ensures all contexts with the same task_ctx_nr have the same | 5608 | * Ensures all contexts with the same task_ctx_nr have the same |
5449 | * pmu_cpu_context too. | 5609 | * pmu_cpu_context too. |
@@ -5530,6 +5690,7 @@ static int pmu_dev_alloc(struct pmu *pmu) | |||
5530 | if (!pmu->dev) | 5690 | if (!pmu->dev) |
5531 | goto out; | 5691 | goto out; |
5532 | 5692 | ||
5693 | pmu->dev->groups = pmu->attr_groups; | ||
5533 | device_initialize(pmu->dev); | 5694 | device_initialize(pmu->dev); |
5534 | ret = dev_set_name(pmu->dev, "%s", pmu->name); | 5695 | ret = dev_set_name(pmu->dev, "%s", pmu->name); |
5535 | if (ret) | 5696 | if (ret) |
@@ -5633,6 +5794,9 @@ got_cpu_context: | |||
5633 | pmu->pmu_disable = perf_pmu_nop_void; | 5794 | pmu->pmu_disable = perf_pmu_nop_void; |
5634 | } | 5795 | } |
5635 | 5796 | ||
5797 | if (!pmu->event_idx) | ||
5798 | pmu->event_idx = perf_event_idx_default; | ||
5799 | |||
5636 | list_add_rcu(&pmu->entry, &pmus); | 5800 | list_add_rcu(&pmu->entry, &pmus); |
5637 | ret = 0; | 5801 | ret = 0; |
5638 | unlock: | 5802 | unlock: |
@@ -5825,7 +5989,7 @@ done: | |||
5825 | 5989 | ||
5826 | if (!event->parent) { | 5990 | if (!event->parent) { |
5827 | if (event->attach_state & PERF_ATTACH_TASK) | 5991 | if (event->attach_state & PERF_ATTACH_TASK) |
5828 | jump_label_inc(&perf_sched_events.key); | 5992 | static_key_slow_inc(&perf_sched_events.key); |
5829 | if (event->attr.mmap || event->attr.mmap_data) | 5993 | if (event->attr.mmap || event->attr.mmap_data) |
5830 | atomic_inc(&nr_mmap_events); | 5994 | atomic_inc(&nr_mmap_events); |
5831 | if (event->attr.comm) | 5995 | if (event->attr.comm) |
@@ -5839,6 +6003,12 @@ done: | |||
5839 | return ERR_PTR(err); | 6003 | return ERR_PTR(err); |
5840 | } | 6004 | } |
5841 | } | 6005 | } |
6006 | if (has_branch_stack(event)) { | ||
6007 | static_key_slow_inc(&perf_sched_events.key); | ||
6008 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
6009 | atomic_inc(&per_cpu(perf_branch_stack_events, | ||
6010 | event->cpu)); | ||
6011 | } | ||
5842 | } | 6012 | } |
5843 | 6013 | ||
5844 | return event; | 6014 | return event; |
@@ -5908,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, | |||
5908 | if (attr->read_format & ~(PERF_FORMAT_MAX-1)) | 6078 | if (attr->read_format & ~(PERF_FORMAT_MAX-1)) |
5909 | return -EINVAL; | 6079 | return -EINVAL; |
5910 | 6080 | ||
6081 | if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
6082 | u64 mask = attr->branch_sample_type; | ||
6083 | |||
6084 | /* only using defined bits */ | ||
6085 | if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1)) | ||
6086 | return -EINVAL; | ||
6087 | |||
6088 | /* at least one branch bit must be set */ | ||
6089 | if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL)) | ||
6090 | return -EINVAL; | ||
6091 | |||
6092 | /* kernel level capture: check permissions */ | ||
6093 | if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM) | ||
6094 | && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) | ||
6095 | return -EACCES; | ||
6096 | |||
6097 | /* propagate priv level, when not set for branch */ | ||
6098 | if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) { | ||
6099 | |||
6100 | /* exclude_kernel checked on syscall entry */ | ||
6101 | if (!attr->exclude_kernel) | ||
6102 | mask |= PERF_SAMPLE_BRANCH_KERNEL; | ||
6103 | |||
6104 | if (!attr->exclude_user) | ||
6105 | mask |= PERF_SAMPLE_BRANCH_USER; | ||
6106 | |||
6107 | if (!attr->exclude_hv) | ||
6108 | mask |= PERF_SAMPLE_BRANCH_HV; | ||
6109 | /* | ||
6110 | * adjust user setting (for HW filter setup) | ||
6111 | */ | ||
6112 | attr->branch_sample_type = mask; | ||
6113 | } | ||
6114 | } | ||
5911 | out: | 6115 | out: |
5912 | return ret; | 6116 | return ret; |
5913 | 6117 | ||
@@ -6063,7 +6267,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6063 | * - that may need work on context switch | 6267 | * - that may need work on context switch |
6064 | */ | 6268 | */ |
6065 | atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); | 6269 | atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); |
6066 | jump_label_inc(&perf_sched_events.key); | 6270 | static_key_slow_inc(&perf_sched_events.key); |
6067 | } | 6271 | } |
6068 | 6272 | ||
6069 | /* | 6273 | /* |
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index ee706ce44aa0..bb38c4d3ee12 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
@@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp) | |||
581 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) | 581 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) |
582 | return -ENOENT; | 582 | return -ENOENT; |
583 | 583 | ||
584 | /* | ||
585 | * no branch sampling for breakpoint events | ||
586 | */ | ||
587 | if (has_branch_stack(bp)) | ||
588 | return -EOPNOTSUPP; | ||
589 | |||
584 | err = register_perf_hw_breakpoint(bp); | 590 | err = register_perf_hw_breakpoint(bp); |
585 | if (err) | 591 | if (err) |
586 | return err; | 592 | return err; |
@@ -613,6 +619,11 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags) | |||
613 | bp->hw.state = PERF_HES_STOPPED; | 619 | bp->hw.state = PERF_HES_STOPPED; |
614 | } | 620 | } |
615 | 621 | ||
622 | static int hw_breakpoint_event_idx(struct perf_event *bp) | ||
623 | { | ||
624 | return 0; | ||
625 | } | ||
626 | |||
616 | static struct pmu perf_breakpoint = { | 627 | static struct pmu perf_breakpoint = { |
617 | .task_ctx_nr = perf_sw_context, /* could eventually get its own */ | 628 | .task_ctx_nr = perf_sw_context, /* could eventually get its own */ |
618 | 629 | ||
@@ -622,6 +633,8 @@ static struct pmu perf_breakpoint = { | |||
622 | .start = hw_breakpoint_start, | 633 | .start = hw_breakpoint_start, |
623 | .stop = hw_breakpoint_stop, | 634 | .stop = hw_breakpoint_stop, |
624 | .read = hw_breakpoint_pmu_read, | 635 | .read = hw_breakpoint_pmu_read, |
636 | |||
637 | .event_idx = hw_breakpoint_event_idx, | ||
625 | }; | 638 | }; |
626 | 639 | ||
627 | int __init init_hw_breakpoint(void) | 640 | int __init init_hw_breakpoint(void) |
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 25784d630a12..6080f6bc8c33 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -16,6 +16,8 @@ | |||
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/kernel_stat.h> | 17 | #include <linux/kernel_stat.h> |
18 | 18 | ||
19 | #include <trace/events/irq.h> | ||
20 | |||
19 | #include "internals.h" | 21 | #include "internals.h" |
20 | 22 | ||
21 | /** | 23 | /** |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 01d3b70fc98a..43049192b5ec 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/sort.h> | 13 | #include <linux/sort.h> |
14 | #include <linux/err.h> | 14 | #include <linux/err.h> |
15 | #include <linux/jump_label.h> | 15 | #include <linux/static_key.h> |
16 | 16 | ||
17 | #ifdef HAVE_JUMP_LABEL | 17 | #ifdef HAVE_JUMP_LABEL |
18 | 18 | ||
@@ -29,11 +29,6 @@ void jump_label_unlock(void) | |||
29 | mutex_unlock(&jump_label_mutex); | 29 | mutex_unlock(&jump_label_mutex); |
30 | } | 30 | } |
31 | 31 | ||
32 | bool jump_label_enabled(struct jump_label_key *key) | ||
33 | { | ||
34 | return !!atomic_read(&key->enabled); | ||
35 | } | ||
36 | |||
37 | static int jump_label_cmp(const void *a, const void *b) | 32 | static int jump_label_cmp(const void *a, const void *b) |
38 | { | 33 | { |
39 | const struct jump_entry *jea = a; | 34 | const struct jump_entry *jea = a; |
@@ -58,56 +53,66 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop) | |||
58 | sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); | 53 | sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); |
59 | } | 54 | } |
60 | 55 | ||
61 | static void jump_label_update(struct jump_label_key *key, int enable); | 56 | static void jump_label_update(struct static_key *key, int enable); |
62 | 57 | ||
63 | void jump_label_inc(struct jump_label_key *key) | 58 | void static_key_slow_inc(struct static_key *key) |
64 | { | 59 | { |
65 | if (atomic_inc_not_zero(&key->enabled)) | 60 | if (atomic_inc_not_zero(&key->enabled)) |
66 | return; | 61 | return; |
67 | 62 | ||
68 | jump_label_lock(); | 63 | jump_label_lock(); |
69 | if (atomic_read(&key->enabled) == 0) | 64 | if (atomic_read(&key->enabled) == 0) { |
70 | jump_label_update(key, JUMP_LABEL_ENABLE); | 65 | if (!jump_label_get_branch_default(key)) |
66 | jump_label_update(key, JUMP_LABEL_ENABLE); | ||
67 | else | ||
68 | jump_label_update(key, JUMP_LABEL_DISABLE); | ||
69 | } | ||
71 | atomic_inc(&key->enabled); | 70 | atomic_inc(&key->enabled); |
72 | jump_label_unlock(); | 71 | jump_label_unlock(); |
73 | } | 72 | } |
74 | EXPORT_SYMBOL_GPL(jump_label_inc); | 73 | EXPORT_SYMBOL_GPL(static_key_slow_inc); |
75 | 74 | ||
76 | static void __jump_label_dec(struct jump_label_key *key, | 75 | static void __static_key_slow_dec(struct static_key *key, |
77 | unsigned long rate_limit, struct delayed_work *work) | 76 | unsigned long rate_limit, struct delayed_work *work) |
78 | { | 77 | { |
79 | if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) | 78 | if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { |
79 | WARN(atomic_read(&key->enabled) < 0, | ||
80 | "jump label: negative count!\n"); | ||
80 | return; | 81 | return; |
82 | } | ||
81 | 83 | ||
82 | if (rate_limit) { | 84 | if (rate_limit) { |
83 | atomic_inc(&key->enabled); | 85 | atomic_inc(&key->enabled); |
84 | schedule_delayed_work(work, rate_limit); | 86 | schedule_delayed_work(work, rate_limit); |
85 | } else | 87 | } else { |
86 | jump_label_update(key, JUMP_LABEL_DISABLE); | 88 | if (!jump_label_get_branch_default(key)) |
87 | 89 | jump_label_update(key, JUMP_LABEL_DISABLE); | |
90 | else | ||
91 | jump_label_update(key, JUMP_LABEL_ENABLE); | ||
92 | } | ||
88 | jump_label_unlock(); | 93 | jump_label_unlock(); |
89 | } | 94 | } |
90 | EXPORT_SYMBOL_GPL(jump_label_dec); | ||
91 | 95 | ||
92 | static void jump_label_update_timeout(struct work_struct *work) | 96 | static void jump_label_update_timeout(struct work_struct *work) |
93 | { | 97 | { |
94 | struct jump_label_key_deferred *key = | 98 | struct static_key_deferred *key = |
95 | container_of(work, struct jump_label_key_deferred, work.work); | 99 | container_of(work, struct static_key_deferred, work.work); |
96 | __jump_label_dec(&key->key, 0, NULL); | 100 | __static_key_slow_dec(&key->key, 0, NULL); |
97 | } | 101 | } |
98 | 102 | ||
99 | void jump_label_dec(struct jump_label_key *key) | 103 | void static_key_slow_dec(struct static_key *key) |
100 | { | 104 | { |
101 | __jump_label_dec(key, 0, NULL); | 105 | __static_key_slow_dec(key, 0, NULL); |
102 | } | 106 | } |
107 | EXPORT_SYMBOL_GPL(static_key_slow_dec); | ||
103 | 108 | ||
104 | void jump_label_dec_deferred(struct jump_label_key_deferred *key) | 109 | void static_key_slow_dec_deferred(struct static_key_deferred *key) |
105 | { | 110 | { |
106 | __jump_label_dec(&key->key, key->timeout, &key->work); | 111 | __static_key_slow_dec(&key->key, key->timeout, &key->work); |
107 | } | 112 | } |
113 | EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); | ||
108 | 114 | ||
109 | 115 | void jump_label_rate_limit(struct static_key_deferred *key, | |
110 | void jump_label_rate_limit(struct jump_label_key_deferred *key, | ||
111 | unsigned long rl) | 116 | unsigned long rl) |
112 | { | 117 | { |
113 | key->timeout = rl; | 118 | key->timeout = rl; |
@@ -150,7 +155,7 @@ void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry | |||
150 | arch_jump_label_transform(entry, type); | 155 | arch_jump_label_transform(entry, type); |
151 | } | 156 | } |
152 | 157 | ||
153 | static void __jump_label_update(struct jump_label_key *key, | 158 | static void __jump_label_update(struct static_key *key, |
154 | struct jump_entry *entry, | 159 | struct jump_entry *entry, |
155 | struct jump_entry *stop, int enable) | 160 | struct jump_entry *stop, int enable) |
156 | { | 161 | { |
@@ -167,27 +172,40 @@ static void __jump_label_update(struct jump_label_key *key, | |||
167 | } | 172 | } |
168 | } | 173 | } |
169 | 174 | ||
175 | static enum jump_label_type jump_label_type(struct static_key *key) | ||
176 | { | ||
177 | bool true_branch = jump_label_get_branch_default(key); | ||
178 | bool state = static_key_enabled(key); | ||
179 | |||
180 | if ((!true_branch && state) || (true_branch && !state)) | ||
181 | return JUMP_LABEL_ENABLE; | ||
182 | |||
183 | return JUMP_LABEL_DISABLE; | ||
184 | } | ||
185 | |||
170 | void __init jump_label_init(void) | 186 | void __init jump_label_init(void) |
171 | { | 187 | { |
172 | struct jump_entry *iter_start = __start___jump_table; | 188 | struct jump_entry *iter_start = __start___jump_table; |
173 | struct jump_entry *iter_stop = __stop___jump_table; | 189 | struct jump_entry *iter_stop = __stop___jump_table; |
174 | struct jump_label_key *key = NULL; | 190 | struct static_key *key = NULL; |
175 | struct jump_entry *iter; | 191 | struct jump_entry *iter; |
176 | 192 | ||
177 | jump_label_lock(); | 193 | jump_label_lock(); |
178 | jump_label_sort_entries(iter_start, iter_stop); | 194 | jump_label_sort_entries(iter_start, iter_stop); |
179 | 195 | ||
180 | for (iter = iter_start; iter < iter_stop; iter++) { | 196 | for (iter = iter_start; iter < iter_stop; iter++) { |
181 | struct jump_label_key *iterk; | 197 | struct static_key *iterk; |
182 | 198 | ||
183 | iterk = (struct jump_label_key *)(unsigned long)iter->key; | 199 | iterk = (struct static_key *)(unsigned long)iter->key; |
184 | arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? | 200 | arch_jump_label_transform_static(iter, jump_label_type(iterk)); |
185 | JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE); | ||
186 | if (iterk == key) | 201 | if (iterk == key) |
187 | continue; | 202 | continue; |
188 | 203 | ||
189 | key = iterk; | 204 | key = iterk; |
190 | key->entries = iter; | 205 | /* |
206 | * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. | ||
207 | */ | ||
208 | *((unsigned long *)&key->entries) += (unsigned long)iter; | ||
191 | #ifdef CONFIG_MODULES | 209 | #ifdef CONFIG_MODULES |
192 | key->next = NULL; | 210 | key->next = NULL; |
193 | #endif | 211 | #endif |
@@ -197,8 +215,8 @@ void __init jump_label_init(void) | |||
197 | 215 | ||
198 | #ifdef CONFIG_MODULES | 216 | #ifdef CONFIG_MODULES |
199 | 217 | ||
200 | struct jump_label_mod { | 218 | struct static_key_mod { |
201 | struct jump_label_mod *next; | 219 | struct static_key_mod *next; |
202 | struct jump_entry *entries; | 220 | struct jump_entry *entries; |
203 | struct module *mod; | 221 | struct module *mod; |
204 | }; | 222 | }; |
@@ -218,9 +236,9 @@ static int __jump_label_mod_text_reserved(void *start, void *end) | |||
218 | start, end); | 236 | start, end); |
219 | } | 237 | } |
220 | 238 | ||
221 | static void __jump_label_mod_update(struct jump_label_key *key, int enable) | 239 | static void __jump_label_mod_update(struct static_key *key, int enable) |
222 | { | 240 | { |
223 | struct jump_label_mod *mod = key->next; | 241 | struct static_key_mod *mod = key->next; |
224 | 242 | ||
225 | while (mod) { | 243 | while (mod) { |
226 | struct module *m = mod->mod; | 244 | struct module *m = mod->mod; |
@@ -251,11 +269,7 @@ void jump_label_apply_nops(struct module *mod) | |||
251 | return; | 269 | return; |
252 | 270 | ||
253 | for (iter = iter_start; iter < iter_stop; iter++) { | 271 | for (iter = iter_start; iter < iter_stop; iter++) { |
254 | struct jump_label_key *iterk; | 272 | arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE); |
255 | |||
256 | iterk = (struct jump_label_key *)(unsigned long)iter->key; | ||
257 | arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? | ||
258 | JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE); | ||
259 | } | 273 | } |
260 | } | 274 | } |
261 | 275 | ||
@@ -264,8 +278,8 @@ static int jump_label_add_module(struct module *mod) | |||
264 | struct jump_entry *iter_start = mod->jump_entries; | 278 | struct jump_entry *iter_start = mod->jump_entries; |
265 | struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; | 279 | struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; |
266 | struct jump_entry *iter; | 280 | struct jump_entry *iter; |
267 | struct jump_label_key *key = NULL; | 281 | struct static_key *key = NULL; |
268 | struct jump_label_mod *jlm; | 282 | struct static_key_mod *jlm; |
269 | 283 | ||
270 | /* if the module doesn't have jump label entries, just return */ | 284 | /* if the module doesn't have jump label entries, just return */ |
271 | if (iter_start == iter_stop) | 285 | if (iter_start == iter_stop) |
@@ -274,28 +288,30 @@ static int jump_label_add_module(struct module *mod) | |||
274 | jump_label_sort_entries(iter_start, iter_stop); | 288 | jump_label_sort_entries(iter_start, iter_stop); |
275 | 289 | ||
276 | for (iter = iter_start; iter < iter_stop; iter++) { | 290 | for (iter = iter_start; iter < iter_stop; iter++) { |
277 | if (iter->key == (jump_label_t)(unsigned long)key) | 291 | struct static_key *iterk; |
278 | continue; | ||
279 | 292 | ||
280 | key = (struct jump_label_key *)(unsigned long)iter->key; | 293 | iterk = (struct static_key *)(unsigned long)iter->key; |
294 | if (iterk == key) | ||
295 | continue; | ||
281 | 296 | ||
297 | key = iterk; | ||
282 | if (__module_address(iter->key) == mod) { | 298 | if (__module_address(iter->key) == mod) { |
283 | atomic_set(&key->enabled, 0); | 299 | /* |
284 | key->entries = iter; | 300 | * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. |
301 | */ | ||
302 | *((unsigned long *)&key->entries) += (unsigned long)iter; | ||
285 | key->next = NULL; | 303 | key->next = NULL; |
286 | continue; | 304 | continue; |
287 | } | 305 | } |
288 | 306 | jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL); | |
289 | jlm = kzalloc(sizeof(struct jump_label_mod), GFP_KERNEL); | ||
290 | if (!jlm) | 307 | if (!jlm) |
291 | return -ENOMEM; | 308 | return -ENOMEM; |
292 | |||
293 | jlm->mod = mod; | 309 | jlm->mod = mod; |
294 | jlm->entries = iter; | 310 | jlm->entries = iter; |
295 | jlm->next = key->next; | 311 | jlm->next = key->next; |
296 | key->next = jlm; | 312 | key->next = jlm; |
297 | 313 | ||
298 | if (jump_label_enabled(key)) | 314 | if (jump_label_type(key) == JUMP_LABEL_ENABLE) |
299 | __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE); | 315 | __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE); |
300 | } | 316 | } |
301 | 317 | ||
@@ -307,14 +323,14 @@ static void jump_label_del_module(struct module *mod) | |||
307 | struct jump_entry *iter_start = mod->jump_entries; | 323 | struct jump_entry *iter_start = mod->jump_entries; |
308 | struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; | 324 | struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; |
309 | struct jump_entry *iter; | 325 | struct jump_entry *iter; |
310 | struct jump_label_key *key = NULL; | 326 | struct static_key *key = NULL; |
311 | struct jump_label_mod *jlm, **prev; | 327 | struct static_key_mod *jlm, **prev; |
312 | 328 | ||
313 | for (iter = iter_start; iter < iter_stop; iter++) { | 329 | for (iter = iter_start; iter < iter_stop; iter++) { |
314 | if (iter->key == (jump_label_t)(unsigned long)key) | 330 | if (iter->key == (jump_label_t)(unsigned long)key) |
315 | continue; | 331 | continue; |
316 | 332 | ||
317 | key = (struct jump_label_key *)(unsigned long)iter->key; | 333 | key = (struct static_key *)(unsigned long)iter->key; |
318 | 334 | ||
319 | if (__module_address(iter->key) == mod) | 335 | if (__module_address(iter->key) == mod) |
320 | continue; | 336 | continue; |
@@ -416,12 +432,13 @@ int jump_label_text_reserved(void *start, void *end) | |||
416 | return ret; | 432 | return ret; |
417 | } | 433 | } |
418 | 434 | ||
419 | static void jump_label_update(struct jump_label_key *key, int enable) | 435 | static void jump_label_update(struct static_key *key, int enable) |
420 | { | 436 | { |
421 | struct jump_entry *entry = key->entries, *stop = __stop___jump_table; | 437 | struct jump_entry *stop = __stop___jump_table; |
438 | struct jump_entry *entry = jump_label_get_entries(key); | ||
422 | 439 | ||
423 | #ifdef CONFIG_MODULES | 440 | #ifdef CONFIG_MODULES |
424 | struct module *mod = __module_address((jump_label_t)key); | 441 | struct module *mod = __module_address((unsigned long)key); |
425 | 442 | ||
426 | __jump_label_mod_update(key, enable); | 443 | __jump_label_mod_update(key, enable); |
427 | 444 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index 32690a0b7a18..0b3ea2cbd5fb 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -44,6 +44,9 @@ | |||
44 | 44 | ||
45 | #include <asm/uaccess.h> | 45 | #include <asm/uaccess.h> |
46 | 46 | ||
47 | #define CREATE_TRACE_POINTS | ||
48 | #include <trace/events/printk.h> | ||
49 | |||
47 | /* | 50 | /* |
48 | * Architectures can override it: | 51 | * Architectures can override it: |
49 | */ | 52 | */ |
@@ -542,6 +545,8 @@ MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to" | |||
542 | static void _call_console_drivers(unsigned start, | 545 | static void _call_console_drivers(unsigned start, |
543 | unsigned end, int msg_log_level) | 546 | unsigned end, int msg_log_level) |
544 | { | 547 | { |
548 | trace_console(&LOG_BUF(0), start, end, log_buf_len); | ||
549 | |||
545 | if ((msg_log_level < console_loglevel || ignore_loglevel) && | 550 | if ((msg_log_level < console_loglevel || ignore_loglevel) && |
546 | console_drivers && start != end) { | 551 | console_drivers && start != end) { |
547 | if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { | 552 | if ((start & LOG_BUF_MASK) > (end & LOG_BUF_MASK)) { |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b342f57879e6..6c41ba49767a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -162,13 +162,13 @@ static int sched_feat_show(struct seq_file *m, void *v) | |||
162 | 162 | ||
163 | #ifdef HAVE_JUMP_LABEL | 163 | #ifdef HAVE_JUMP_LABEL |
164 | 164 | ||
165 | #define jump_label_key__true jump_label_key_enabled | 165 | #define jump_label_key__true STATIC_KEY_INIT_TRUE |
166 | #define jump_label_key__false jump_label_key_disabled | 166 | #define jump_label_key__false STATIC_KEY_INIT_FALSE |
167 | 167 | ||
168 | #define SCHED_FEAT(name, enabled) \ | 168 | #define SCHED_FEAT(name, enabled) \ |
169 | jump_label_key__##enabled , | 169 | jump_label_key__##enabled , |
170 | 170 | ||
171 | struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { | 171 | struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { |
172 | #include "features.h" | 172 | #include "features.h" |
173 | }; | 173 | }; |
174 | 174 | ||
@@ -176,14 +176,14 @@ struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { | |||
176 | 176 | ||
177 | static void sched_feat_disable(int i) | 177 | static void sched_feat_disable(int i) |
178 | { | 178 | { |
179 | if (jump_label_enabled(&sched_feat_keys[i])) | 179 | if (static_key_enabled(&sched_feat_keys[i])) |
180 | jump_label_dec(&sched_feat_keys[i]); | 180 | static_key_slow_dec(&sched_feat_keys[i]); |
181 | } | 181 | } |
182 | 182 | ||
183 | static void sched_feat_enable(int i) | 183 | static void sched_feat_enable(int i) |
184 | { | 184 | { |
185 | if (!jump_label_enabled(&sched_feat_keys[i])) | 185 | if (!static_key_enabled(&sched_feat_keys[i])) |
186 | jump_label_inc(&sched_feat_keys[i]); | 186 | static_key_slow_inc(&sched_feat_keys[i]); |
187 | } | 187 | } |
188 | #else | 188 | #else |
189 | static void sched_feat_disable(int i) { }; | 189 | static void sched_feat_disable(int i) { }; |
@@ -894,7 +894,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
894 | delta -= irq_delta; | 894 | delta -= irq_delta; |
895 | #endif | 895 | #endif |
896 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING | 896 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING |
897 | if (static_branch((¶virt_steal_rq_enabled))) { | 897 | if (static_key_false((¶virt_steal_rq_enabled))) { |
898 | u64 st; | 898 | u64 st; |
899 | 899 | ||
900 | steal = paravirt_steal_clock(cpu_of(rq)); | 900 | steal = paravirt_steal_clock(cpu_of(rq)); |
@@ -2755,7 +2755,7 @@ void account_idle_time(cputime_t cputime) | |||
2755 | static __always_inline bool steal_account_process_tick(void) | 2755 | static __always_inline bool steal_account_process_tick(void) |
2756 | { | 2756 | { |
2757 | #ifdef CONFIG_PARAVIRT | 2757 | #ifdef CONFIG_PARAVIRT |
2758 | if (static_branch(¶virt_steal_enabled)) { | 2758 | if (static_key_false(¶virt_steal_enabled)) { |
2759 | u64 steal, st = 0; | 2759 | u64 steal, st = 0; |
2760 | 2760 | ||
2761 | steal = paravirt_steal_clock(smp_processor_id()); | 2761 | steal = paravirt_steal_clock(smp_processor_id()); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index aca16b843b7e..fd974faf467d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1401,20 +1401,20 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) | |||
1401 | #ifdef CONFIG_CFS_BANDWIDTH | 1401 | #ifdef CONFIG_CFS_BANDWIDTH |
1402 | 1402 | ||
1403 | #ifdef HAVE_JUMP_LABEL | 1403 | #ifdef HAVE_JUMP_LABEL |
1404 | static struct jump_label_key __cfs_bandwidth_used; | 1404 | static struct static_key __cfs_bandwidth_used; |
1405 | 1405 | ||
1406 | static inline bool cfs_bandwidth_used(void) | 1406 | static inline bool cfs_bandwidth_used(void) |
1407 | { | 1407 | { |
1408 | return static_branch(&__cfs_bandwidth_used); | 1408 | return static_key_false(&__cfs_bandwidth_used); |
1409 | } | 1409 | } |
1410 | 1410 | ||
1411 | void account_cfs_bandwidth_used(int enabled, int was_enabled) | 1411 | void account_cfs_bandwidth_used(int enabled, int was_enabled) |
1412 | { | 1412 | { |
1413 | /* only need to count groups transitioning between enabled/!enabled */ | 1413 | /* only need to count groups transitioning between enabled/!enabled */ |
1414 | if (enabled && !was_enabled) | 1414 | if (enabled && !was_enabled) |
1415 | jump_label_inc(&__cfs_bandwidth_used); | 1415 | static_key_slow_inc(&__cfs_bandwidth_used); |
1416 | else if (!enabled && was_enabled) | 1416 | else if (!enabled && was_enabled) |
1417 | jump_label_dec(&__cfs_bandwidth_used); | 1417 | static_key_slow_dec(&__cfs_bandwidth_used); |
1418 | } | 1418 | } |
1419 | #else /* HAVE_JUMP_LABEL */ | 1419 | #else /* HAVE_JUMP_LABEL */ |
1420 | static bool cfs_bandwidth_used(void) | 1420 | static bool cfs_bandwidth_used(void) |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 98c0c2623db8..b4cd6d8ea150 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -611,7 +611,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | |||
611 | * Tunables that become constants when CONFIG_SCHED_DEBUG is off: | 611 | * Tunables that become constants when CONFIG_SCHED_DEBUG is off: |
612 | */ | 612 | */ |
613 | #ifdef CONFIG_SCHED_DEBUG | 613 | #ifdef CONFIG_SCHED_DEBUG |
614 | # include <linux/jump_label.h> | 614 | # include <linux/static_key.h> |
615 | # define const_debug __read_mostly | 615 | # define const_debug __read_mostly |
616 | #else | 616 | #else |
617 | # define const_debug const | 617 | # define const_debug const |
@@ -630,18 +630,18 @@ enum { | |||
630 | #undef SCHED_FEAT | 630 | #undef SCHED_FEAT |
631 | 631 | ||
632 | #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) | 632 | #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) |
633 | static __always_inline bool static_branch__true(struct jump_label_key *key) | 633 | static __always_inline bool static_branch__true(struct static_key *key) |
634 | { | 634 | { |
635 | return likely(static_branch(key)); /* Not out of line branch. */ | 635 | return static_key_true(key); /* Not out of line branch. */ |
636 | } | 636 | } |
637 | 637 | ||
638 | static __always_inline bool static_branch__false(struct jump_label_key *key) | 638 | static __always_inline bool static_branch__false(struct static_key *key) |
639 | { | 639 | { |
640 | return unlikely(static_branch(key)); /* Out of line branch. */ | 640 | return static_key_false(key); /* Out of line branch. */ |
641 | } | 641 | } |
642 | 642 | ||
643 | #define SCHED_FEAT(name, enabled) \ | 643 | #define SCHED_FEAT(name, enabled) \ |
644 | static __always_inline bool static_branch_##name(struct jump_label_key *key) \ | 644 | static __always_inline bool static_branch_##name(struct static_key *key) \ |
645 | { \ | 645 | { \ |
646 | return static_branch__##enabled(key); \ | 646 | return static_branch__##enabled(key); \ |
647 | } | 647 | } |
@@ -650,7 +650,7 @@ static __always_inline bool static_branch_##name(struct jump_label_key *key) \ | |||
650 | 650 | ||
651 | #undef SCHED_FEAT | 651 | #undef SCHED_FEAT |
652 | 652 | ||
653 | extern struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR]; | 653 | extern struct static_key sched_feat_keys[__SCHED_FEAT_NR]; |
654 | #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) | 654 | #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) |
655 | #else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */ | 655 | #else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */ |
656 | #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) | 656 | #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) |
diff --git a/kernel/signal.c b/kernel/signal.c index c73c4284160e..8511e39813c7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1054,13 +1054,13 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
1054 | struct sigpending *pending; | 1054 | struct sigpending *pending; |
1055 | struct sigqueue *q; | 1055 | struct sigqueue *q; |
1056 | int override_rlimit; | 1056 | int override_rlimit; |
1057 | 1057 | int ret = 0, result; | |
1058 | trace_signal_generate(sig, info, t); | ||
1059 | 1058 | ||
1060 | assert_spin_locked(&t->sighand->siglock); | 1059 | assert_spin_locked(&t->sighand->siglock); |
1061 | 1060 | ||
1061 | result = TRACE_SIGNAL_IGNORED; | ||
1062 | if (!prepare_signal(sig, t, from_ancestor_ns)) | 1062 | if (!prepare_signal(sig, t, from_ancestor_ns)) |
1063 | return 0; | 1063 | goto ret; |
1064 | 1064 | ||
1065 | pending = group ? &t->signal->shared_pending : &t->pending; | 1065 | pending = group ? &t->signal->shared_pending : &t->pending; |
1066 | /* | 1066 | /* |
@@ -1068,8 +1068,11 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
1068 | * exactly one non-rt signal, so that we can get more | 1068 | * exactly one non-rt signal, so that we can get more |
1069 | * detailed information about the cause of the signal. | 1069 | * detailed information about the cause of the signal. |
1070 | */ | 1070 | */ |
1071 | result = TRACE_SIGNAL_ALREADY_PENDING; | ||
1071 | if (legacy_queue(pending, sig)) | 1072 | if (legacy_queue(pending, sig)) |
1072 | return 0; | 1073 | goto ret; |
1074 | |||
1075 | result = TRACE_SIGNAL_DELIVERED; | ||
1073 | /* | 1076 | /* |
1074 | * fast-pathed signals for kernel-internal things like SIGSTOP | 1077 | * fast-pathed signals for kernel-internal things like SIGSTOP |
1075 | * or SIGKILL. | 1078 | * or SIGKILL. |
@@ -1127,14 +1130,15 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
1127 | * signal was rt and sent by user using something | 1130 | * signal was rt and sent by user using something |
1128 | * other than kill(). | 1131 | * other than kill(). |
1129 | */ | 1132 | */ |
1130 | trace_signal_overflow_fail(sig, group, info); | 1133 | result = TRACE_SIGNAL_OVERFLOW_FAIL; |
1131 | return -EAGAIN; | 1134 | ret = -EAGAIN; |
1135 | goto ret; | ||
1132 | } else { | 1136 | } else { |
1133 | /* | 1137 | /* |
1134 | * This is a silent loss of information. We still | 1138 | * This is a silent loss of information. We still |
1135 | * send the signal, but the *info bits are lost. | 1139 | * send the signal, but the *info bits are lost. |
1136 | */ | 1140 | */ |
1137 | trace_signal_lose_info(sig, group, info); | 1141 | result = TRACE_SIGNAL_LOSE_INFO; |
1138 | } | 1142 | } |
1139 | } | 1143 | } |
1140 | 1144 | ||
@@ -1142,7 +1146,9 @@ out_set: | |||
1142 | signalfd_notify(t, sig); | 1146 | signalfd_notify(t, sig); |
1143 | sigaddset(&pending->signal, sig); | 1147 | sigaddset(&pending->signal, sig); |
1144 | complete_signal(sig, t, group); | 1148 | complete_signal(sig, t, group); |
1145 | return 0; | 1149 | ret: |
1150 | trace_signal_generate(sig, info, t, group, result); | ||
1151 | return ret; | ||
1146 | } | 1152 | } |
1147 | 1153 | ||
1148 | static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | 1154 | static int send_signal(int sig, struct siginfo *info, struct task_struct *t, |
@@ -1585,7 +1591,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
1585 | int sig = q->info.si_signo; | 1591 | int sig = q->info.si_signo; |
1586 | struct sigpending *pending; | 1592 | struct sigpending *pending; |
1587 | unsigned long flags; | 1593 | unsigned long flags; |
1588 | int ret; | 1594 | int ret, result; |
1589 | 1595 | ||
1590 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); | 1596 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); |
1591 | 1597 | ||
@@ -1594,6 +1600,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
1594 | goto ret; | 1600 | goto ret; |
1595 | 1601 | ||
1596 | ret = 1; /* the signal is ignored */ | 1602 | ret = 1; /* the signal is ignored */ |
1603 | result = TRACE_SIGNAL_IGNORED; | ||
1597 | if (!prepare_signal(sig, t, 0)) | 1604 | if (!prepare_signal(sig, t, 0)) |
1598 | goto out; | 1605 | goto out; |
1599 | 1606 | ||
@@ -1605,6 +1612,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
1605 | */ | 1612 | */ |
1606 | BUG_ON(q->info.si_code != SI_TIMER); | 1613 | BUG_ON(q->info.si_code != SI_TIMER); |
1607 | q->info.si_overrun++; | 1614 | q->info.si_overrun++; |
1615 | result = TRACE_SIGNAL_ALREADY_PENDING; | ||
1608 | goto out; | 1616 | goto out; |
1609 | } | 1617 | } |
1610 | q->info.si_overrun = 0; | 1618 | q->info.si_overrun = 0; |
@@ -1614,7 +1622,9 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
1614 | list_add_tail(&q->list, &pending->list); | 1622 | list_add_tail(&q->list, &pending->list); |
1615 | sigaddset(&pending->signal, sig); | 1623 | sigaddset(&pending->signal, sig); |
1616 | complete_signal(sig, t, group); | 1624 | complete_signal(sig, t, group); |
1625 | result = TRACE_SIGNAL_DELIVERED; | ||
1617 | out: | 1626 | out: |
1627 | trace_signal_generate(sig, &q->info, t, group, result); | ||
1618 | unlock_task_sighand(t, &flags); | 1628 | unlock_task_sighand(t, &flags); |
1619 | ret: | 1629 | ret: |
1620 | return ret; | 1630 | return ret; |
diff --git a/kernel/softirq.c b/kernel/softirq.c index c82d95a022ef..8afc6a8d4d7c 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -375,6 +375,12 @@ void raise_softirq(unsigned int nr) | |||
375 | local_irq_restore(flags); | 375 | local_irq_restore(flags); |
376 | } | 376 | } |
377 | 377 | ||
378 | void __raise_softirq_irqoff(unsigned int nr) | ||
379 | { | ||
380 | trace_softirq_raise(nr); | ||
381 | or_softirq_pending(1UL << nr); | ||
382 | } | ||
383 | |||
378 | void open_softirq(int nr, void (*action)(struct softirq_action *)) | 384 | void open_softirq(int nr, void (*action)(struct softirq_action *)) |
379 | { | 385 | { |
380 | softirq_vec[nr].action = action; | 386 | softirq_vec[nr].action = action; |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 683d559a0eef..867bd1dd2dd0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -62,6 +62,8 @@ | |||
62 | #define FTRACE_HASH_DEFAULT_BITS 10 | 62 | #define FTRACE_HASH_DEFAULT_BITS 10 |
63 | #define FTRACE_HASH_MAX_BITS 12 | 63 | #define FTRACE_HASH_MAX_BITS 12 |
64 | 64 | ||
65 | #define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) | ||
66 | |||
65 | /* ftrace_enabled is a method to turn ftrace on or off */ | 67 | /* ftrace_enabled is a method to turn ftrace on or off */ |
66 | int ftrace_enabled __read_mostly; | 68 | int ftrace_enabled __read_mostly; |
67 | static int last_ftrace_enabled; | 69 | static int last_ftrace_enabled; |
@@ -89,12 +91,14 @@ static struct ftrace_ops ftrace_list_end __read_mostly = { | |||
89 | }; | 91 | }; |
90 | 92 | ||
91 | static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; | 93 | static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; |
94 | static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; | ||
92 | static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; | 95 | static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; |
93 | ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; | 96 | ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; |
94 | static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub; | 97 | static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub; |
95 | ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; | 98 | ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; |
96 | ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; | 99 | ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; |
97 | static struct ftrace_ops global_ops; | 100 | static struct ftrace_ops global_ops; |
101 | static struct ftrace_ops control_ops; | ||
98 | 102 | ||
99 | static void | 103 | static void |
100 | ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); | 104 | ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); |
@@ -168,6 +172,32 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip) | |||
168 | } | 172 | } |
169 | #endif | 173 | #endif |
170 | 174 | ||
175 | static void control_ops_disable_all(struct ftrace_ops *ops) | ||
176 | { | ||
177 | int cpu; | ||
178 | |||
179 | for_each_possible_cpu(cpu) | ||
180 | *per_cpu_ptr(ops->disabled, cpu) = 1; | ||
181 | } | ||
182 | |||
183 | static int control_ops_alloc(struct ftrace_ops *ops) | ||
184 | { | ||
185 | int __percpu *disabled; | ||
186 | |||
187 | disabled = alloc_percpu(int); | ||
188 | if (!disabled) | ||
189 | return -ENOMEM; | ||
190 | |||
191 | ops->disabled = disabled; | ||
192 | control_ops_disable_all(ops); | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | static void control_ops_free(struct ftrace_ops *ops) | ||
197 | { | ||
198 | free_percpu(ops->disabled); | ||
199 | } | ||
200 | |||
171 | static void update_global_ops(void) | 201 | static void update_global_ops(void) |
172 | { | 202 | { |
173 | ftrace_func_t func; | 203 | ftrace_func_t func; |
@@ -259,6 +289,26 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) | |||
259 | return 0; | 289 | return 0; |
260 | } | 290 | } |
261 | 291 | ||
292 | static void add_ftrace_list_ops(struct ftrace_ops **list, | ||
293 | struct ftrace_ops *main_ops, | ||
294 | struct ftrace_ops *ops) | ||
295 | { | ||
296 | int first = *list == &ftrace_list_end; | ||
297 | add_ftrace_ops(list, ops); | ||
298 | if (first) | ||
299 | add_ftrace_ops(&ftrace_ops_list, main_ops); | ||
300 | } | ||
301 | |||
302 | static int remove_ftrace_list_ops(struct ftrace_ops **list, | ||
303 | struct ftrace_ops *main_ops, | ||
304 | struct ftrace_ops *ops) | ||
305 | { | ||
306 | int ret = remove_ftrace_ops(list, ops); | ||
307 | if (!ret && *list == &ftrace_list_end) | ||
308 | ret = remove_ftrace_ops(&ftrace_ops_list, main_ops); | ||
309 | return ret; | ||
310 | } | ||
311 | |||
262 | static int __register_ftrace_function(struct ftrace_ops *ops) | 312 | static int __register_ftrace_function(struct ftrace_ops *ops) |
263 | { | 313 | { |
264 | if (ftrace_disabled) | 314 | if (ftrace_disabled) |
@@ -270,15 +320,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops) | |||
270 | if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) | 320 | if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED)) |
271 | return -EBUSY; | 321 | return -EBUSY; |
272 | 322 | ||
323 | /* We don't support both control and global flags set. */ | ||
324 | if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) | ||
325 | return -EINVAL; | ||
326 | |||
273 | if (!core_kernel_data((unsigned long)ops)) | 327 | if (!core_kernel_data((unsigned long)ops)) |
274 | ops->flags |= FTRACE_OPS_FL_DYNAMIC; | 328 | ops->flags |= FTRACE_OPS_FL_DYNAMIC; |
275 | 329 | ||
276 | if (ops->flags & FTRACE_OPS_FL_GLOBAL) { | 330 | if (ops->flags & FTRACE_OPS_FL_GLOBAL) { |
277 | int first = ftrace_global_list == &ftrace_list_end; | 331 | add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops); |
278 | add_ftrace_ops(&ftrace_global_list, ops); | ||
279 | ops->flags |= FTRACE_OPS_FL_ENABLED; | 332 | ops->flags |= FTRACE_OPS_FL_ENABLED; |
280 | if (first) | 333 | } else if (ops->flags & FTRACE_OPS_FL_CONTROL) { |
281 | add_ftrace_ops(&ftrace_ops_list, &global_ops); | 334 | if (control_ops_alloc(ops)) |
335 | return -ENOMEM; | ||
336 | add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); | ||
282 | } else | 337 | } else |
283 | add_ftrace_ops(&ftrace_ops_list, ops); | 338 | add_ftrace_ops(&ftrace_ops_list, ops); |
284 | 339 | ||
@@ -302,11 +357,23 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) | |||
302 | return -EINVAL; | 357 | return -EINVAL; |
303 | 358 | ||
304 | if (ops->flags & FTRACE_OPS_FL_GLOBAL) { | 359 | if (ops->flags & FTRACE_OPS_FL_GLOBAL) { |
305 | ret = remove_ftrace_ops(&ftrace_global_list, ops); | 360 | ret = remove_ftrace_list_ops(&ftrace_global_list, |
306 | if (!ret && ftrace_global_list == &ftrace_list_end) | 361 | &global_ops, ops); |
307 | ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops); | ||
308 | if (!ret) | 362 | if (!ret) |
309 | ops->flags &= ~FTRACE_OPS_FL_ENABLED; | 363 | ops->flags &= ~FTRACE_OPS_FL_ENABLED; |
364 | } else if (ops->flags & FTRACE_OPS_FL_CONTROL) { | ||
365 | ret = remove_ftrace_list_ops(&ftrace_control_list, | ||
366 | &control_ops, ops); | ||
367 | if (!ret) { | ||
368 | /* | ||
369 | * The ftrace_ops is now removed from the list, | ||
370 | * so there'll be no new users. We must ensure | ||
371 | * all current users are done before we free | ||
372 | * the control data. | ||
373 | */ | ||
374 | synchronize_sched(); | ||
375 | control_ops_free(ops); | ||
376 | } | ||
310 | } else | 377 | } else |
311 | ret = remove_ftrace_ops(&ftrace_ops_list, ops); | 378 | ret = remove_ftrace_ops(&ftrace_ops_list, ops); |
312 | 379 | ||
@@ -1119,6 +1186,12 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash) | |||
1119 | call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); | 1186 | call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); |
1120 | } | 1187 | } |
1121 | 1188 | ||
1189 | void ftrace_free_filter(struct ftrace_ops *ops) | ||
1190 | { | ||
1191 | free_ftrace_hash(ops->filter_hash); | ||
1192 | free_ftrace_hash(ops->notrace_hash); | ||
1193 | } | ||
1194 | |||
1122 | static struct ftrace_hash *alloc_ftrace_hash(int size_bits) | 1195 | static struct ftrace_hash *alloc_ftrace_hash(int size_bits) |
1123 | { | 1196 | { |
1124 | struct ftrace_hash *hash; | 1197 | struct ftrace_hash *hash; |
@@ -1129,7 +1202,7 @@ static struct ftrace_hash *alloc_ftrace_hash(int size_bits) | |||
1129 | return NULL; | 1202 | return NULL; |
1130 | 1203 | ||
1131 | size = 1 << size_bits; | 1204 | size = 1 << size_bits; |
1132 | hash->buckets = kzalloc(sizeof(*hash->buckets) * size, GFP_KERNEL); | 1205 | hash->buckets = kcalloc(size, sizeof(*hash->buckets), GFP_KERNEL); |
1133 | 1206 | ||
1134 | if (!hash->buckets) { | 1207 | if (!hash->buckets) { |
1135 | kfree(hash); | 1208 | kfree(hash); |
@@ -3146,8 +3219,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, | |||
3146 | mutex_lock(&ftrace_regex_lock); | 3219 | mutex_lock(&ftrace_regex_lock); |
3147 | if (reset) | 3220 | if (reset) |
3148 | ftrace_filter_reset(hash); | 3221 | ftrace_filter_reset(hash); |
3149 | if (buf) | 3222 | if (buf && !ftrace_match_records(hash, buf, len)) { |
3150 | ftrace_match_records(hash, buf, len); | 3223 | ret = -EINVAL; |
3224 | goto out_regex_unlock; | ||
3225 | } | ||
3151 | 3226 | ||
3152 | mutex_lock(&ftrace_lock); | 3227 | mutex_lock(&ftrace_lock); |
3153 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); | 3228 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); |
@@ -3157,6 +3232,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, | |||
3157 | 3232 | ||
3158 | mutex_unlock(&ftrace_lock); | 3233 | mutex_unlock(&ftrace_lock); |
3159 | 3234 | ||
3235 | out_regex_unlock: | ||
3160 | mutex_unlock(&ftrace_regex_lock); | 3236 | mutex_unlock(&ftrace_regex_lock); |
3161 | 3237 | ||
3162 | free_ftrace_hash(hash); | 3238 | free_ftrace_hash(hash); |
@@ -3173,10 +3249,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, | |||
3173 | * Filters denote which functions should be enabled when tracing is enabled. | 3249 | * Filters denote which functions should be enabled when tracing is enabled. |
3174 | * If @buf is NULL and reset is set, all functions will be enabled for tracing. | 3250 | * If @buf is NULL and reset is set, all functions will be enabled for tracing. |
3175 | */ | 3251 | */ |
3176 | void ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, | 3252 | int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, |
3177 | int len, int reset) | 3253 | int len, int reset) |
3178 | { | 3254 | { |
3179 | ftrace_set_regex(ops, buf, len, reset, 1); | 3255 | return ftrace_set_regex(ops, buf, len, reset, 1); |
3180 | } | 3256 | } |
3181 | EXPORT_SYMBOL_GPL(ftrace_set_filter); | 3257 | EXPORT_SYMBOL_GPL(ftrace_set_filter); |
3182 | 3258 | ||
@@ -3191,10 +3267,10 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter); | |||
3191 | * is enabled. If @buf is NULL and reset is set, all functions will be enabled | 3267 | * is enabled. If @buf is NULL and reset is set, all functions will be enabled |
3192 | * for tracing. | 3268 | * for tracing. |
3193 | */ | 3269 | */ |
3194 | void ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, | 3270 | int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, |
3195 | int len, int reset) | 3271 | int len, int reset) |
3196 | { | 3272 | { |
3197 | ftrace_set_regex(ops, buf, len, reset, 0); | 3273 | return ftrace_set_regex(ops, buf, len, reset, 0); |
3198 | } | 3274 | } |
3199 | EXPORT_SYMBOL_GPL(ftrace_set_notrace); | 3275 | EXPORT_SYMBOL_GPL(ftrace_set_notrace); |
3200 | /** | 3276 | /** |
@@ -3871,6 +3947,36 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) | |||
3871 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 3947 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
3872 | 3948 | ||
3873 | static void | 3949 | static void |
3950 | ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) | ||
3951 | { | ||
3952 | struct ftrace_ops *op; | ||
3953 | |||
3954 | if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) | ||
3955 | return; | ||
3956 | |||
3957 | /* | ||
3958 | * Some of the ops may be dynamically allocated, | ||
3959 | * they must be freed after a synchronize_sched(). | ||
3960 | */ | ||
3961 | preempt_disable_notrace(); | ||
3962 | trace_recursion_set(TRACE_CONTROL_BIT); | ||
3963 | op = rcu_dereference_raw(ftrace_control_list); | ||
3964 | while (op != &ftrace_list_end) { | ||
3965 | if (!ftrace_function_local_disabled(op) && | ||
3966 | ftrace_ops_test(op, ip)) | ||
3967 | op->func(ip, parent_ip); | ||
3968 | |||
3969 | op = rcu_dereference_raw(op->next); | ||
3970 | }; | ||
3971 | trace_recursion_clear(TRACE_CONTROL_BIT); | ||
3972 | preempt_enable_notrace(); | ||
3973 | } | ||
3974 | |||
3975 | static struct ftrace_ops control_ops = { | ||
3976 | .func = ftrace_ops_control_func, | ||
3977 | }; | ||
3978 | |||
3979 | static void | ||
3874 | ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) | 3980 | ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) |
3875 | { | 3981 | { |
3876 | struct ftrace_ops *op; | 3982 | struct ftrace_ops *op; |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a3f1bc5d2a00..10d5503f0d04 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -2764,12 +2764,12 @@ static const char readme_msg[] = | |||
2764 | "tracing mini-HOWTO:\n\n" | 2764 | "tracing mini-HOWTO:\n\n" |
2765 | "# mount -t debugfs nodev /sys/kernel/debug\n\n" | 2765 | "# mount -t debugfs nodev /sys/kernel/debug\n\n" |
2766 | "# cat /sys/kernel/debug/tracing/available_tracers\n" | 2766 | "# cat /sys/kernel/debug/tracing/available_tracers\n" |
2767 | "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" | 2767 | "wakeup wakeup_rt preemptirqsoff preemptoff irqsoff function nop\n\n" |
2768 | "# cat /sys/kernel/debug/tracing/current_tracer\n" | 2768 | "# cat /sys/kernel/debug/tracing/current_tracer\n" |
2769 | "nop\n" | 2769 | "nop\n" |
2770 | "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n" | 2770 | "# echo wakeup > /sys/kernel/debug/tracing/current_tracer\n" |
2771 | "# cat /sys/kernel/debug/tracing/current_tracer\n" | 2771 | "# cat /sys/kernel/debug/tracing/current_tracer\n" |
2772 | "sched_switch\n" | 2772 | "wakeup\n" |
2773 | "# cat /sys/kernel/debug/tracing/trace_options\n" | 2773 | "# cat /sys/kernel/debug/tracing/trace_options\n" |
2774 | "noprint-parent nosym-offset nosym-addr noverbose\n" | 2774 | "noprint-parent nosym-offset nosym-addr noverbose\n" |
2775 | "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" | 2775 | "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b93ecbadad6d..54faec790bc1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -56,17 +56,23 @@ enum trace_type { | |||
56 | #define F_STRUCT(args...) args | 56 | #define F_STRUCT(args...) args |
57 | 57 | ||
58 | #undef FTRACE_ENTRY | 58 | #undef FTRACE_ENTRY |
59 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ | 59 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ |
60 | struct struct_name { \ | 60 | struct struct_name { \ |
61 | struct trace_entry ent; \ | 61 | struct trace_entry ent; \ |
62 | tstruct \ | 62 | tstruct \ |
63 | } | 63 | } |
64 | 64 | ||
65 | #undef TP_ARGS | 65 | #undef TP_ARGS |
66 | #define TP_ARGS(args...) args | 66 | #define TP_ARGS(args...) args |
67 | 67 | ||
68 | #undef FTRACE_ENTRY_DUP | 68 | #undef FTRACE_ENTRY_DUP |
69 | #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk) | 69 | #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter) |
70 | |||
71 | #undef FTRACE_ENTRY_REG | ||
72 | #define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \ | ||
73 | filter, regfn) \ | ||
74 | FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ | ||
75 | filter) | ||
70 | 76 | ||
71 | #include "trace_entries.h" | 77 | #include "trace_entries.h" |
72 | 78 | ||
@@ -288,6 +294,8 @@ struct tracer { | |||
288 | /* for function tracing recursion */ | 294 | /* for function tracing recursion */ |
289 | #define TRACE_INTERNAL_BIT (1<<11) | 295 | #define TRACE_INTERNAL_BIT (1<<11) |
290 | #define TRACE_GLOBAL_BIT (1<<12) | 296 | #define TRACE_GLOBAL_BIT (1<<12) |
297 | #define TRACE_CONTROL_BIT (1<<13) | ||
298 | |||
291 | /* | 299 | /* |
292 | * Abuse of the trace_recursion. | 300 | * Abuse of the trace_recursion. |
293 | * As we need a way to maintain state if we are tracing the function | 301 | * As we need a way to maintain state if we are tracing the function |
@@ -589,6 +597,8 @@ static inline int ftrace_trace_task(struct task_struct *task) | |||
589 | static inline int ftrace_is_dead(void) { return 0; } | 597 | static inline int ftrace_is_dead(void) { return 0; } |
590 | #endif | 598 | #endif |
591 | 599 | ||
600 | int ftrace_event_is_function(struct ftrace_event_call *call); | ||
601 | |||
592 | /* | 602 | /* |
593 | * struct trace_parser - servers for reading the user input separated by spaces | 603 | * struct trace_parser - servers for reading the user input separated by spaces |
594 | * @cont: set if the input is not complete - no final space char was found | 604 | * @cont: set if the input is not complete - no final space char was found |
@@ -766,9 +776,7 @@ struct filter_pred { | |||
766 | u64 val; | 776 | u64 val; |
767 | struct regex regex; | 777 | struct regex regex; |
768 | unsigned short *ops; | 778 | unsigned short *ops; |
769 | #ifdef CONFIG_FTRACE_STARTUP_TEST | ||
770 | struct ftrace_event_field *field; | 779 | struct ftrace_event_field *field; |
771 | #endif | ||
772 | int offset; | 780 | int offset; |
773 | int not; | 781 | int not; |
774 | int op; | 782 | int op; |
@@ -818,12 +826,22 @@ extern const char *__start___trace_bprintk_fmt[]; | |||
818 | extern const char *__stop___trace_bprintk_fmt[]; | 826 | extern const char *__stop___trace_bprintk_fmt[]; |
819 | 827 | ||
820 | #undef FTRACE_ENTRY | 828 | #undef FTRACE_ENTRY |
821 | #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ | 829 | #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ |
822 | extern struct ftrace_event_call \ | 830 | extern struct ftrace_event_call \ |
823 | __attribute__((__aligned__(4))) event_##call; | 831 | __attribute__((__aligned__(4))) event_##call; |
824 | #undef FTRACE_ENTRY_DUP | 832 | #undef FTRACE_ENTRY_DUP |
825 | #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ | 833 | #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \ |
826 | FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) | 834 | FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \ |
835 | filter) | ||
827 | #include "trace_entries.h" | 836 | #include "trace_entries.h" |
828 | 837 | ||
838 | #ifdef CONFIG_PERF_EVENTS | ||
839 | #ifdef CONFIG_FUNCTION_TRACER | ||
840 | int perf_ftrace_event_register(struct ftrace_event_call *call, | ||
841 | enum trace_reg type, void *data); | ||
842 | #else | ||
843 | #define perf_ftrace_event_register NULL | ||
844 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
845 | #endif /* CONFIG_PERF_EVENTS */ | ||
846 | |||
829 | #endif /* _LINUX_KERNEL_TRACE_H */ | 847 | #endif /* _LINUX_KERNEL_TRACE_H */ |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 93365907f219..d91eb0541b3a 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h | |||
@@ -55,7 +55,7 @@ | |||
55 | /* | 55 | /* |
56 | * Function trace entry - function address and parent function address: | 56 | * Function trace entry - function address and parent function address: |
57 | */ | 57 | */ |
58 | FTRACE_ENTRY(function, ftrace_entry, | 58 | FTRACE_ENTRY_REG(function, ftrace_entry, |
59 | 59 | ||
60 | TRACE_FN, | 60 | TRACE_FN, |
61 | 61 | ||
@@ -64,7 +64,11 @@ FTRACE_ENTRY(function, ftrace_entry, | |||
64 | __field( unsigned long, parent_ip ) | 64 | __field( unsigned long, parent_ip ) |
65 | ), | 65 | ), |
66 | 66 | ||
67 | F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip) | 67 | F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip), |
68 | |||
69 | FILTER_TRACE_FN, | ||
70 | |||
71 | perf_ftrace_event_register | ||
68 | ); | 72 | ); |
69 | 73 | ||
70 | /* Function call entry */ | 74 | /* Function call entry */ |
@@ -78,7 +82,9 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry, | |||
78 | __field_desc( int, graph_ent, depth ) | 82 | __field_desc( int, graph_ent, depth ) |
79 | ), | 83 | ), |
80 | 84 | ||
81 | F_printk("--> %lx (%d)", __entry->func, __entry->depth) | 85 | F_printk("--> %lx (%d)", __entry->func, __entry->depth), |
86 | |||
87 | FILTER_OTHER | ||
82 | ); | 88 | ); |
83 | 89 | ||
84 | /* Function return entry */ | 90 | /* Function return entry */ |
@@ -98,7 +104,9 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry, | |||
98 | F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d", | 104 | F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d", |
99 | __entry->func, __entry->depth, | 105 | __entry->func, __entry->depth, |
100 | __entry->calltime, __entry->rettime, | 106 | __entry->calltime, __entry->rettime, |
101 | __entry->depth) | 107 | __entry->depth), |
108 | |||
109 | FILTER_OTHER | ||
102 | ); | 110 | ); |
103 | 111 | ||
104 | /* | 112 | /* |
@@ -127,8 +135,9 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry, | |||
127 | F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", | 135 | F_printk("%u:%u:%u ==> %u:%u:%u [%03u]", |
128 | __entry->prev_pid, __entry->prev_prio, __entry->prev_state, | 136 | __entry->prev_pid, __entry->prev_prio, __entry->prev_state, |
129 | __entry->next_pid, __entry->next_prio, __entry->next_state, | 137 | __entry->next_pid, __entry->next_prio, __entry->next_state, |
130 | __entry->next_cpu | 138 | __entry->next_cpu), |
131 | ) | 139 | |
140 | FILTER_OTHER | ||
132 | ); | 141 | ); |
133 | 142 | ||
134 | /* | 143 | /* |
@@ -146,8 +155,9 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry, | |||
146 | F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", | 155 | F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]", |
147 | __entry->prev_pid, __entry->prev_prio, __entry->prev_state, | 156 | __entry->prev_pid, __entry->prev_prio, __entry->prev_state, |
148 | __entry->next_pid, __entry->next_prio, __entry->next_state, | 157 | __entry->next_pid, __entry->next_prio, __entry->next_state, |
149 | __entry->next_cpu | 158 | __entry->next_cpu), |
150 | ) | 159 | |
160 | FILTER_OTHER | ||
151 | ); | 161 | ); |
152 | 162 | ||
153 | /* | 163 | /* |
@@ -169,7 +179,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry, | |||
169 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", | 179 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", |
170 | __entry->caller[0], __entry->caller[1], __entry->caller[2], | 180 | __entry->caller[0], __entry->caller[1], __entry->caller[2], |
171 | __entry->caller[3], __entry->caller[4], __entry->caller[5], | 181 | __entry->caller[3], __entry->caller[4], __entry->caller[5], |
172 | __entry->caller[6], __entry->caller[7]) | 182 | __entry->caller[6], __entry->caller[7]), |
183 | |||
184 | FILTER_OTHER | ||
173 | ); | 185 | ); |
174 | 186 | ||
175 | FTRACE_ENTRY(user_stack, userstack_entry, | 187 | FTRACE_ENTRY(user_stack, userstack_entry, |
@@ -185,7 +197,9 @@ FTRACE_ENTRY(user_stack, userstack_entry, | |||
185 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", | 197 | "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", |
186 | __entry->caller[0], __entry->caller[1], __entry->caller[2], | 198 | __entry->caller[0], __entry->caller[1], __entry->caller[2], |
187 | __entry->caller[3], __entry->caller[4], __entry->caller[5], | 199 | __entry->caller[3], __entry->caller[4], __entry->caller[5], |
188 | __entry->caller[6], __entry->caller[7]) | 200 | __entry->caller[6], __entry->caller[7]), |
201 | |||
202 | FILTER_OTHER | ||
189 | ); | 203 | ); |
190 | 204 | ||
191 | /* | 205 | /* |
@@ -202,7 +216,9 @@ FTRACE_ENTRY(bprint, bprint_entry, | |||
202 | ), | 216 | ), |
203 | 217 | ||
204 | F_printk("%08lx fmt:%p", | 218 | F_printk("%08lx fmt:%p", |
205 | __entry->ip, __entry->fmt) | 219 | __entry->ip, __entry->fmt), |
220 | |||
221 | FILTER_OTHER | ||
206 | ); | 222 | ); |
207 | 223 | ||
208 | FTRACE_ENTRY(print, print_entry, | 224 | FTRACE_ENTRY(print, print_entry, |
@@ -215,7 +231,9 @@ FTRACE_ENTRY(print, print_entry, | |||
215 | ), | 231 | ), |
216 | 232 | ||
217 | F_printk("%08lx %s", | 233 | F_printk("%08lx %s", |
218 | __entry->ip, __entry->buf) | 234 | __entry->ip, __entry->buf), |
235 | |||
236 | FILTER_OTHER | ||
219 | ); | 237 | ); |
220 | 238 | ||
221 | FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, | 239 | FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, |
@@ -234,7 +252,9 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw, | |||
234 | 252 | ||
235 | F_printk("%lx %lx %lx %d %x %x", | 253 | F_printk("%lx %lx %lx %d %x %x", |
236 | (unsigned long)__entry->phys, __entry->value, __entry->pc, | 254 | (unsigned long)__entry->phys, __entry->value, __entry->pc, |
237 | __entry->map_id, __entry->opcode, __entry->width) | 255 | __entry->map_id, __entry->opcode, __entry->width), |
256 | |||
257 | FILTER_OTHER | ||
238 | ); | 258 | ); |
239 | 259 | ||
240 | FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, | 260 | FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, |
@@ -252,7 +272,9 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map, | |||
252 | 272 | ||
253 | F_printk("%lx %lx %lx %d %x", | 273 | F_printk("%lx %lx %lx %d %x", |
254 | (unsigned long)__entry->phys, __entry->virt, __entry->len, | 274 | (unsigned long)__entry->phys, __entry->virt, __entry->len, |
255 | __entry->map_id, __entry->opcode) | 275 | __entry->map_id, __entry->opcode), |
276 | |||
277 | FILTER_OTHER | ||
256 | ); | 278 | ); |
257 | 279 | ||
258 | 280 | ||
@@ -272,6 +294,8 @@ FTRACE_ENTRY(branch, trace_branch, | |||
272 | 294 | ||
273 | F_printk("%u:%s:%s (%u)", | 295 | F_printk("%u:%s:%s (%u)", |
274 | __entry->line, | 296 | __entry->line, |
275 | __entry->func, __entry->file, __entry->correct) | 297 | __entry->func, __entry->file, __entry->correct), |
298 | |||
299 | FILTER_OTHER | ||
276 | ); | 300 | ); |
277 | 301 | ||
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 19a359d5e6d5..fee3752ae8f6 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -24,6 +24,11 @@ static int total_ref_count; | |||
24 | static int perf_trace_event_perm(struct ftrace_event_call *tp_event, | 24 | static int perf_trace_event_perm(struct ftrace_event_call *tp_event, |
25 | struct perf_event *p_event) | 25 | struct perf_event *p_event) |
26 | { | 26 | { |
27 | /* The ftrace function trace is allowed only for root. */ | ||
28 | if (ftrace_event_is_function(tp_event) && | ||
29 | perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) | ||
30 | return -EPERM; | ||
31 | |||
27 | /* No tracing, just counting, so no obvious leak */ | 32 | /* No tracing, just counting, so no obvious leak */ |
28 | if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) | 33 | if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) |
29 | return 0; | 34 | return 0; |
@@ -44,23 +49,17 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event, | |||
44 | return 0; | 49 | return 0; |
45 | } | 50 | } |
46 | 51 | ||
47 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, | 52 | static int perf_trace_event_reg(struct ftrace_event_call *tp_event, |
48 | struct perf_event *p_event) | 53 | struct perf_event *p_event) |
49 | { | 54 | { |
50 | struct hlist_head __percpu *list; | 55 | struct hlist_head __percpu *list; |
51 | int ret; | 56 | int ret = -ENOMEM; |
52 | int cpu; | 57 | int cpu; |
53 | 58 | ||
54 | ret = perf_trace_event_perm(tp_event, p_event); | ||
55 | if (ret) | ||
56 | return ret; | ||
57 | |||
58 | p_event->tp_event = tp_event; | 59 | p_event->tp_event = tp_event; |
59 | if (tp_event->perf_refcount++ > 0) | 60 | if (tp_event->perf_refcount++ > 0) |
60 | return 0; | 61 | return 0; |
61 | 62 | ||
62 | ret = -ENOMEM; | ||
63 | |||
64 | list = alloc_percpu(struct hlist_head); | 63 | list = alloc_percpu(struct hlist_head); |
65 | if (!list) | 64 | if (!list) |
66 | goto fail; | 65 | goto fail; |
@@ -83,7 +82,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, | |||
83 | } | 82 | } |
84 | } | 83 | } |
85 | 84 | ||
86 | ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER); | 85 | ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL); |
87 | if (ret) | 86 | if (ret) |
88 | goto fail; | 87 | goto fail; |
89 | 88 | ||
@@ -108,6 +107,69 @@ fail: | |||
108 | return ret; | 107 | return ret; |
109 | } | 108 | } |
110 | 109 | ||
110 | static void perf_trace_event_unreg(struct perf_event *p_event) | ||
111 | { | ||
112 | struct ftrace_event_call *tp_event = p_event->tp_event; | ||
113 | int i; | ||
114 | |||
115 | if (--tp_event->perf_refcount > 0) | ||
116 | goto out; | ||
117 | |||
118 | tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL); | ||
119 | |||
120 | /* | ||
121 | * Ensure our callback won't be called anymore. The buffers | ||
122 | * will be freed after that. | ||
123 | */ | ||
124 | tracepoint_synchronize_unregister(); | ||
125 | |||
126 | free_percpu(tp_event->perf_events); | ||
127 | tp_event->perf_events = NULL; | ||
128 | |||
129 | if (!--total_ref_count) { | ||
130 | for (i = 0; i < PERF_NR_CONTEXTS; i++) { | ||
131 | free_percpu(perf_trace_buf[i]); | ||
132 | perf_trace_buf[i] = NULL; | ||
133 | } | ||
134 | } | ||
135 | out: | ||
136 | module_put(tp_event->mod); | ||
137 | } | ||
138 | |||
139 | static int perf_trace_event_open(struct perf_event *p_event) | ||
140 | { | ||
141 | struct ftrace_event_call *tp_event = p_event->tp_event; | ||
142 | return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event); | ||
143 | } | ||
144 | |||
145 | static void perf_trace_event_close(struct perf_event *p_event) | ||
146 | { | ||
147 | struct ftrace_event_call *tp_event = p_event->tp_event; | ||
148 | tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event); | ||
149 | } | ||
150 | |||
151 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, | ||
152 | struct perf_event *p_event) | ||
153 | { | ||
154 | int ret; | ||
155 | |||
156 | ret = perf_trace_event_perm(tp_event, p_event); | ||
157 | if (ret) | ||
158 | return ret; | ||
159 | |||
160 | ret = perf_trace_event_reg(tp_event, p_event); | ||
161 | if (ret) | ||
162 | return ret; | ||
163 | |||
164 | ret = perf_trace_event_open(p_event); | ||
165 | if (ret) { | ||
166 | perf_trace_event_unreg(p_event); | ||
167 | return ret; | ||
168 | } | ||
169 | |||
170 | return 0; | ||
171 | } | ||
172 | |||
111 | int perf_trace_init(struct perf_event *p_event) | 173 | int perf_trace_init(struct perf_event *p_event) |
112 | { | 174 | { |
113 | struct ftrace_event_call *tp_event; | 175 | struct ftrace_event_call *tp_event; |
@@ -130,6 +192,14 @@ int perf_trace_init(struct perf_event *p_event) | |||
130 | return ret; | 192 | return ret; |
131 | } | 193 | } |
132 | 194 | ||
195 | void perf_trace_destroy(struct perf_event *p_event) | ||
196 | { | ||
197 | mutex_lock(&event_mutex); | ||
198 | perf_trace_event_close(p_event); | ||
199 | perf_trace_event_unreg(p_event); | ||
200 | mutex_unlock(&event_mutex); | ||
201 | } | ||
202 | |||
133 | int perf_trace_add(struct perf_event *p_event, int flags) | 203 | int perf_trace_add(struct perf_event *p_event, int flags) |
134 | { | 204 | { |
135 | struct ftrace_event_call *tp_event = p_event->tp_event; | 205 | struct ftrace_event_call *tp_event = p_event->tp_event; |
@@ -146,43 +216,14 @@ int perf_trace_add(struct perf_event *p_event, int flags) | |||
146 | list = this_cpu_ptr(pcpu_list); | 216 | list = this_cpu_ptr(pcpu_list); |
147 | hlist_add_head_rcu(&p_event->hlist_entry, list); | 217 | hlist_add_head_rcu(&p_event->hlist_entry, list); |
148 | 218 | ||
149 | return 0; | 219 | return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event); |
150 | } | 220 | } |
151 | 221 | ||
152 | void perf_trace_del(struct perf_event *p_event, int flags) | 222 | void perf_trace_del(struct perf_event *p_event, int flags) |
153 | { | 223 | { |
154 | hlist_del_rcu(&p_event->hlist_entry); | ||
155 | } | ||
156 | |||
157 | void perf_trace_destroy(struct perf_event *p_event) | ||
158 | { | ||
159 | struct ftrace_event_call *tp_event = p_event->tp_event; | 224 | struct ftrace_event_call *tp_event = p_event->tp_event; |
160 | int i; | 225 | hlist_del_rcu(&p_event->hlist_entry); |
161 | 226 | tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); | |
162 | mutex_lock(&event_mutex); | ||
163 | if (--tp_event->perf_refcount > 0) | ||
164 | goto out; | ||
165 | |||
166 | tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); | ||
167 | |||
168 | /* | ||
169 | * Ensure our callback won't be called anymore. The buffers | ||
170 | * will be freed after that. | ||
171 | */ | ||
172 | tracepoint_synchronize_unregister(); | ||
173 | |||
174 | free_percpu(tp_event->perf_events); | ||
175 | tp_event->perf_events = NULL; | ||
176 | |||
177 | if (!--total_ref_count) { | ||
178 | for (i = 0; i < PERF_NR_CONTEXTS; i++) { | ||
179 | free_percpu(perf_trace_buf[i]); | ||
180 | perf_trace_buf[i] = NULL; | ||
181 | } | ||
182 | } | ||
183 | out: | ||
184 | module_put(tp_event->mod); | ||
185 | mutex_unlock(&event_mutex); | ||
186 | } | 227 | } |
187 | 228 | ||
188 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | 229 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, |
@@ -214,3 +255,86 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | |||
214 | return raw_data; | 255 | return raw_data; |
215 | } | 256 | } |
216 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); | 257 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); |
258 | |||
259 | #ifdef CONFIG_FUNCTION_TRACER | ||
260 | static void | ||
261 | perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip) | ||
262 | { | ||
263 | struct ftrace_entry *entry; | ||
264 | struct hlist_head *head; | ||
265 | struct pt_regs regs; | ||
266 | int rctx; | ||
267 | |||
268 | #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ | ||
269 | sizeof(u64)) - sizeof(u32)) | ||
270 | |||
271 | BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE); | ||
272 | |||
273 | perf_fetch_caller_regs(®s); | ||
274 | |||
275 | entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx); | ||
276 | if (!entry) | ||
277 | return; | ||
278 | |||
279 | entry->ip = ip; | ||
280 | entry->parent_ip = parent_ip; | ||
281 | |||
282 | head = this_cpu_ptr(event_function.perf_events); | ||
283 | perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, | ||
284 | 1, ®s, head); | ||
285 | |||
286 | #undef ENTRY_SIZE | ||
287 | } | ||
288 | |||
289 | static int perf_ftrace_function_register(struct perf_event *event) | ||
290 | { | ||
291 | struct ftrace_ops *ops = &event->ftrace_ops; | ||
292 | |||
293 | ops->flags |= FTRACE_OPS_FL_CONTROL; | ||
294 | ops->func = perf_ftrace_function_call; | ||
295 | return register_ftrace_function(ops); | ||
296 | } | ||
297 | |||
298 | static int perf_ftrace_function_unregister(struct perf_event *event) | ||
299 | { | ||
300 | struct ftrace_ops *ops = &event->ftrace_ops; | ||
301 | int ret = unregister_ftrace_function(ops); | ||
302 | ftrace_free_filter(ops); | ||
303 | return ret; | ||
304 | } | ||
305 | |||
306 | static void perf_ftrace_function_enable(struct perf_event *event) | ||
307 | { | ||
308 | ftrace_function_local_enable(&event->ftrace_ops); | ||
309 | } | ||
310 | |||
311 | static void perf_ftrace_function_disable(struct perf_event *event) | ||
312 | { | ||
313 | ftrace_function_local_disable(&event->ftrace_ops); | ||
314 | } | ||
315 | |||
316 | int perf_ftrace_event_register(struct ftrace_event_call *call, | ||
317 | enum trace_reg type, void *data) | ||
318 | { | ||
319 | switch (type) { | ||
320 | case TRACE_REG_REGISTER: | ||
321 | case TRACE_REG_UNREGISTER: | ||
322 | break; | ||
323 | case TRACE_REG_PERF_REGISTER: | ||
324 | case TRACE_REG_PERF_UNREGISTER: | ||
325 | return 0; | ||
326 | case TRACE_REG_PERF_OPEN: | ||
327 | return perf_ftrace_function_register(data); | ||
328 | case TRACE_REG_PERF_CLOSE: | ||
329 | return perf_ftrace_function_unregister(data); | ||
330 | case TRACE_REG_PERF_ADD: | ||
331 | perf_ftrace_function_enable(data); | ||
332 | return 0; | ||
333 | case TRACE_REG_PERF_DEL: | ||
334 | perf_ftrace_function_disable(data); | ||
335 | return 0; | ||
336 | } | ||
337 | |||
338 | return -EINVAL; | ||
339 | } | ||
340 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c212a7f934ec..079a93ae8a9d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -147,7 +147,8 @@ int trace_event_raw_init(struct ftrace_event_call *call) | |||
147 | } | 147 | } |
148 | EXPORT_SYMBOL_GPL(trace_event_raw_init); | 148 | EXPORT_SYMBOL_GPL(trace_event_raw_init); |
149 | 149 | ||
150 | int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type) | 150 | int ftrace_event_reg(struct ftrace_event_call *call, |
151 | enum trace_reg type, void *data) | ||
151 | { | 152 | { |
152 | switch (type) { | 153 | switch (type) { |
153 | case TRACE_REG_REGISTER: | 154 | case TRACE_REG_REGISTER: |
@@ -170,6 +171,11 @@ int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type) | |||
170 | call->class->perf_probe, | 171 | call->class->perf_probe, |
171 | call); | 172 | call); |
172 | return 0; | 173 | return 0; |
174 | case TRACE_REG_PERF_OPEN: | ||
175 | case TRACE_REG_PERF_CLOSE: | ||
176 | case TRACE_REG_PERF_ADD: | ||
177 | case TRACE_REG_PERF_DEL: | ||
178 | return 0; | ||
173 | #endif | 179 | #endif |
174 | } | 180 | } |
175 | return 0; | 181 | return 0; |
@@ -209,7 +215,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call, | |||
209 | tracing_stop_cmdline_record(); | 215 | tracing_stop_cmdline_record(); |
210 | call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; | 216 | call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; |
211 | } | 217 | } |
212 | call->class->reg(call, TRACE_REG_UNREGISTER); | 218 | call->class->reg(call, TRACE_REG_UNREGISTER, NULL); |
213 | } | 219 | } |
214 | break; | 220 | break; |
215 | case 1: | 221 | case 1: |
@@ -218,7 +224,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call, | |||
218 | tracing_start_cmdline_record(); | 224 | tracing_start_cmdline_record(); |
219 | call->flags |= TRACE_EVENT_FL_RECORDED_CMD; | 225 | call->flags |= TRACE_EVENT_FL_RECORDED_CMD; |
220 | } | 226 | } |
221 | ret = call->class->reg(call, TRACE_REG_REGISTER); | 227 | ret = call->class->reg(call, TRACE_REG_REGISTER, NULL); |
222 | if (ret) { | 228 | if (ret) { |
223 | tracing_stop_cmdline_record(); | 229 | tracing_stop_cmdline_record(); |
224 | pr_info("event trace: Could not enable event " | 230 | pr_info("event trace: Could not enable event " |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 24aee7127451..431dba8b7542 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -81,6 +81,7 @@ enum { | |||
81 | FILT_ERR_TOO_MANY_PREDS, | 81 | FILT_ERR_TOO_MANY_PREDS, |
82 | FILT_ERR_MISSING_FIELD, | 82 | FILT_ERR_MISSING_FIELD, |
83 | FILT_ERR_INVALID_FILTER, | 83 | FILT_ERR_INVALID_FILTER, |
84 | FILT_ERR_IP_FIELD_ONLY, | ||
84 | }; | 85 | }; |
85 | 86 | ||
86 | static char *err_text[] = { | 87 | static char *err_text[] = { |
@@ -96,6 +97,7 @@ static char *err_text[] = { | |||
96 | "Too many terms in predicate expression", | 97 | "Too many terms in predicate expression", |
97 | "Missing field name and/or value", | 98 | "Missing field name and/or value", |
98 | "Meaningless filter expression", | 99 | "Meaningless filter expression", |
100 | "Only 'ip' field is supported for function trace", | ||
99 | }; | 101 | }; |
100 | 102 | ||
101 | struct opstack_op { | 103 | struct opstack_op { |
@@ -685,7 +687,7 @@ find_event_field(struct ftrace_event_call *call, char *name) | |||
685 | 687 | ||
686 | static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) | 688 | static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) |
687 | { | 689 | { |
688 | stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL); | 690 | stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL); |
689 | if (!stack->preds) | 691 | if (!stack->preds) |
690 | return -ENOMEM; | 692 | return -ENOMEM; |
691 | stack->index = n_preds; | 693 | stack->index = n_preds; |
@@ -826,8 +828,7 @@ static int __alloc_preds(struct event_filter *filter, int n_preds) | |||
826 | if (filter->preds) | 828 | if (filter->preds) |
827 | __free_preds(filter); | 829 | __free_preds(filter); |
828 | 830 | ||
829 | filter->preds = | 831 | filter->preds = kcalloc(n_preds, sizeof(*filter->preds), GFP_KERNEL); |
830 | kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL); | ||
831 | 832 | ||
832 | if (!filter->preds) | 833 | if (!filter->preds) |
833 | return -ENOMEM; | 834 | return -ENOMEM; |
@@ -900,6 +901,11 @@ int filter_assign_type(const char *type) | |||
900 | return FILTER_OTHER; | 901 | return FILTER_OTHER; |
901 | } | 902 | } |
902 | 903 | ||
904 | static bool is_function_field(struct ftrace_event_field *field) | ||
905 | { | ||
906 | return field->filter_type == FILTER_TRACE_FN; | ||
907 | } | ||
908 | |||
903 | static bool is_string_field(struct ftrace_event_field *field) | 909 | static bool is_string_field(struct ftrace_event_field *field) |
904 | { | 910 | { |
905 | return field->filter_type == FILTER_DYN_STRING || | 911 | return field->filter_type == FILTER_DYN_STRING || |
@@ -987,6 +993,11 @@ static int init_pred(struct filter_parse_state *ps, | |||
987 | fn = filter_pred_strloc; | 993 | fn = filter_pred_strloc; |
988 | else | 994 | else |
989 | fn = filter_pred_pchar; | 995 | fn = filter_pred_pchar; |
996 | } else if (is_function_field(field)) { | ||
997 | if (strcmp(field->name, "ip")) { | ||
998 | parse_error(ps, FILT_ERR_IP_FIELD_ONLY, 0); | ||
999 | return -EINVAL; | ||
1000 | } | ||
990 | } else { | 1001 | } else { |
991 | if (field->is_signed) | 1002 | if (field->is_signed) |
992 | ret = strict_strtoll(pred->regex.pattern, 0, &val); | 1003 | ret = strict_strtoll(pred->regex.pattern, 0, &val); |
@@ -1334,10 +1345,7 @@ static struct filter_pred *create_pred(struct filter_parse_state *ps, | |||
1334 | 1345 | ||
1335 | strcpy(pred.regex.pattern, operand2); | 1346 | strcpy(pred.regex.pattern, operand2); |
1336 | pred.regex.len = strlen(pred.regex.pattern); | 1347 | pred.regex.len = strlen(pred.regex.pattern); |
1337 | |||
1338 | #ifdef CONFIG_FTRACE_STARTUP_TEST | ||
1339 | pred.field = field; | 1348 | pred.field = field; |
1340 | #endif | ||
1341 | return init_pred(ps, field, &pred) ? NULL : &pred; | 1349 | return init_pred(ps, field, &pred) ? NULL : &pred; |
1342 | } | 1350 | } |
1343 | 1351 | ||
@@ -1486,7 +1494,7 @@ static int fold_pred(struct filter_pred *preds, struct filter_pred *root) | |||
1486 | children = count_leafs(preds, &preds[root->left]); | 1494 | children = count_leafs(preds, &preds[root->left]); |
1487 | children += count_leafs(preds, &preds[root->right]); | 1495 | children += count_leafs(preds, &preds[root->right]); |
1488 | 1496 | ||
1489 | root->ops = kzalloc(sizeof(*root->ops) * children, GFP_KERNEL); | 1497 | root->ops = kcalloc(children, sizeof(*root->ops), GFP_KERNEL); |
1490 | if (!root->ops) | 1498 | if (!root->ops) |
1491 | return -ENOMEM; | 1499 | return -ENOMEM; |
1492 | 1500 | ||
@@ -1950,6 +1958,148 @@ void ftrace_profile_free_filter(struct perf_event *event) | |||
1950 | __free_filter(filter); | 1958 | __free_filter(filter); |
1951 | } | 1959 | } |
1952 | 1960 | ||
1961 | struct function_filter_data { | ||
1962 | struct ftrace_ops *ops; | ||
1963 | int first_filter; | ||
1964 | int first_notrace; | ||
1965 | }; | ||
1966 | |||
1967 | #ifdef CONFIG_FUNCTION_TRACER | ||
1968 | static char ** | ||
1969 | ftrace_function_filter_re(char *buf, int len, int *count) | ||
1970 | { | ||
1971 | char *str, *sep, **re; | ||
1972 | |||
1973 | str = kstrndup(buf, len, GFP_KERNEL); | ||
1974 | if (!str) | ||
1975 | return NULL; | ||
1976 | |||
1977 | /* | ||
1978 | * The argv_split function takes white space | ||
1979 | * as a separator, so convert ',' into spaces. | ||
1980 | */ | ||
1981 | while ((sep = strchr(str, ','))) | ||
1982 | *sep = ' '; | ||
1983 | |||
1984 | re = argv_split(GFP_KERNEL, str, count); | ||
1985 | kfree(str); | ||
1986 | return re; | ||
1987 | } | ||
1988 | |||
1989 | static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter, | ||
1990 | int reset, char *re, int len) | ||
1991 | { | ||
1992 | int ret; | ||
1993 | |||
1994 | if (filter) | ||
1995 | ret = ftrace_set_filter(ops, re, len, reset); | ||
1996 | else | ||
1997 | ret = ftrace_set_notrace(ops, re, len, reset); | ||
1998 | |||
1999 | return ret; | ||
2000 | } | ||
2001 | |||
2002 | static int __ftrace_function_set_filter(int filter, char *buf, int len, | ||
2003 | struct function_filter_data *data) | ||
2004 | { | ||
2005 | int i, re_cnt, ret; | ||
2006 | int *reset; | ||
2007 | char **re; | ||
2008 | |||
2009 | reset = filter ? &data->first_filter : &data->first_notrace; | ||
2010 | |||
2011 | /* | ||
2012 | * The 'ip' field could have multiple filters set, separated | ||
2013 | * either by space or comma. We first cut the filter and apply | ||
2014 | * all pieces separatelly. | ||
2015 | */ | ||
2016 | re = ftrace_function_filter_re(buf, len, &re_cnt); | ||
2017 | if (!re) | ||
2018 | return -EINVAL; | ||
2019 | |||
2020 | for (i = 0; i < re_cnt; i++) { | ||
2021 | ret = ftrace_function_set_regexp(data->ops, filter, *reset, | ||
2022 | re[i], strlen(re[i])); | ||
2023 | if (ret) | ||
2024 | break; | ||
2025 | |||
2026 | if (*reset) | ||
2027 | *reset = 0; | ||
2028 | } | ||
2029 | |||
2030 | argv_free(re); | ||
2031 | return ret; | ||
2032 | } | ||
2033 | |||
2034 | static int ftrace_function_check_pred(struct filter_pred *pred, int leaf) | ||
2035 | { | ||
2036 | struct ftrace_event_field *field = pred->field; | ||
2037 | |||
2038 | if (leaf) { | ||
2039 | /* | ||
2040 | * Check the leaf predicate for function trace, verify: | ||
2041 | * - only '==' and '!=' is used | ||
2042 | * - the 'ip' field is used | ||
2043 | */ | ||
2044 | if ((pred->op != OP_EQ) && (pred->op != OP_NE)) | ||
2045 | return -EINVAL; | ||
2046 | |||
2047 | if (strcmp(field->name, "ip")) | ||
2048 | return -EINVAL; | ||
2049 | } else { | ||
2050 | /* | ||
2051 | * Check the non leaf predicate for function trace, verify: | ||
2052 | * - only '||' is used | ||
2053 | */ | ||
2054 | if (pred->op != OP_OR) | ||
2055 | return -EINVAL; | ||
2056 | } | ||
2057 | |||
2058 | return 0; | ||
2059 | } | ||
2060 | |||
2061 | static int ftrace_function_set_filter_cb(enum move_type move, | ||
2062 | struct filter_pred *pred, | ||
2063 | int *err, void *data) | ||
2064 | { | ||
2065 | /* Checking the node is valid for function trace. */ | ||
2066 | if ((move != MOVE_DOWN) || | ||
2067 | (pred->left != FILTER_PRED_INVALID)) { | ||
2068 | *err = ftrace_function_check_pred(pred, 0); | ||
2069 | } else { | ||
2070 | *err = ftrace_function_check_pred(pred, 1); | ||
2071 | if (*err) | ||
2072 | return WALK_PRED_ABORT; | ||
2073 | |||
2074 | *err = __ftrace_function_set_filter(pred->op == OP_EQ, | ||
2075 | pred->regex.pattern, | ||
2076 | pred->regex.len, | ||
2077 | data); | ||
2078 | } | ||
2079 | |||
2080 | return (*err) ? WALK_PRED_ABORT : WALK_PRED_DEFAULT; | ||
2081 | } | ||
2082 | |||
2083 | static int ftrace_function_set_filter(struct perf_event *event, | ||
2084 | struct event_filter *filter) | ||
2085 | { | ||
2086 | struct function_filter_data data = { | ||
2087 | .first_filter = 1, | ||
2088 | .first_notrace = 1, | ||
2089 | .ops = &event->ftrace_ops, | ||
2090 | }; | ||
2091 | |||
2092 | return walk_pred_tree(filter->preds, filter->root, | ||
2093 | ftrace_function_set_filter_cb, &data); | ||
2094 | } | ||
2095 | #else | ||
2096 | static int ftrace_function_set_filter(struct perf_event *event, | ||
2097 | struct event_filter *filter) | ||
2098 | { | ||
2099 | return -ENODEV; | ||
2100 | } | ||
2101 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
2102 | |||
1953 | int ftrace_profile_set_filter(struct perf_event *event, int event_id, | 2103 | int ftrace_profile_set_filter(struct perf_event *event, int event_id, |
1954 | char *filter_str) | 2104 | char *filter_str) |
1955 | { | 2105 | { |
@@ -1970,9 +2120,16 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, | |||
1970 | goto out_unlock; | 2120 | goto out_unlock; |
1971 | 2121 | ||
1972 | err = create_filter(call, filter_str, false, &filter); | 2122 | err = create_filter(call, filter_str, false, &filter); |
1973 | if (!err) | 2123 | if (err) |
1974 | event->filter = filter; | 2124 | goto free_filter; |
2125 | |||
2126 | if (ftrace_event_is_function(call)) | ||
2127 | err = ftrace_function_set_filter(event, filter); | ||
1975 | else | 2128 | else |
2129 | event->filter = filter; | ||
2130 | |||
2131 | free_filter: | ||
2132 | if (err || ftrace_event_is_function(call)) | ||
1976 | __free_filter(filter); | 2133 | __free_filter(filter); |
1977 | 2134 | ||
1978 | out_unlock: | 2135 | out_unlock: |
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index bbeec31e0ae3..7b46c9bd22ae 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
@@ -18,6 +18,16 @@ | |||
18 | #undef TRACE_SYSTEM | 18 | #undef TRACE_SYSTEM |
19 | #define TRACE_SYSTEM ftrace | 19 | #define TRACE_SYSTEM ftrace |
20 | 20 | ||
21 | /* | ||
22 | * The FTRACE_ENTRY_REG macro allows ftrace entry to define register | ||
23 | * function and thus become accesible via perf. | ||
24 | */ | ||
25 | #undef FTRACE_ENTRY_REG | ||
26 | #define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \ | ||
27 | filter, regfn) \ | ||
28 | FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ | ||
29 | filter) | ||
30 | |||
21 | /* not needed for this file */ | 31 | /* not needed for this file */ |
22 | #undef __field_struct | 32 | #undef __field_struct |
23 | #define __field_struct(type, item) | 33 | #define __field_struct(type, item) |
@@ -44,21 +54,22 @@ | |||
44 | #define F_printk(fmt, args...) fmt, args | 54 | #define F_printk(fmt, args...) fmt, args |
45 | 55 | ||
46 | #undef FTRACE_ENTRY | 56 | #undef FTRACE_ENTRY |
47 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ | 57 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ |
48 | struct ____ftrace_##name { \ | 58 | struct ____ftrace_##name { \ |
49 | tstruct \ | 59 | tstruct \ |
50 | }; \ | 60 | }; \ |
51 | static void __always_unused ____ftrace_check_##name(void) \ | 61 | static void __always_unused ____ftrace_check_##name(void) \ |
52 | { \ | 62 | { \ |
53 | struct ____ftrace_##name *__entry = NULL; \ | 63 | struct ____ftrace_##name *__entry = NULL; \ |
54 | \ | 64 | \ |
55 | /* force compile-time check on F_printk() */ \ | 65 | /* force compile-time check on F_printk() */ \ |
56 | printk(print); \ | 66 | printk(print); \ |
57 | } | 67 | } |
58 | 68 | ||
59 | #undef FTRACE_ENTRY_DUP | 69 | #undef FTRACE_ENTRY_DUP |
60 | #define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \ | 70 | #define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter) \ |
61 | FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print)) | 71 | FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \ |
72 | filter) | ||
62 | 73 | ||
63 | #include "trace_entries.h" | 74 | #include "trace_entries.h" |
64 | 75 | ||
@@ -67,7 +78,7 @@ static void __always_unused ____ftrace_check_##name(void) \ | |||
67 | ret = trace_define_field(event_call, #type, #item, \ | 78 | ret = trace_define_field(event_call, #type, #item, \ |
68 | offsetof(typeof(field), item), \ | 79 | offsetof(typeof(field), item), \ |
69 | sizeof(field.item), \ | 80 | sizeof(field.item), \ |
70 | is_signed_type(type), FILTER_OTHER); \ | 81 | is_signed_type(type), filter_type); \ |
71 | if (ret) \ | 82 | if (ret) \ |
72 | return ret; | 83 | return ret; |
73 | 84 | ||
@@ -77,7 +88,7 @@ static void __always_unused ____ftrace_check_##name(void) \ | |||
77 | offsetof(typeof(field), \ | 88 | offsetof(typeof(field), \ |
78 | container.item), \ | 89 | container.item), \ |
79 | sizeof(field.container.item), \ | 90 | sizeof(field.container.item), \ |
80 | is_signed_type(type), FILTER_OTHER); \ | 91 | is_signed_type(type), filter_type); \ |
81 | if (ret) \ | 92 | if (ret) \ |
82 | return ret; | 93 | return ret; |
83 | 94 | ||
@@ -91,7 +102,7 @@ static void __always_unused ____ftrace_check_##name(void) \ | |||
91 | ret = trace_define_field(event_call, event_storage, #item, \ | 102 | ret = trace_define_field(event_call, event_storage, #item, \ |
92 | offsetof(typeof(field), item), \ | 103 | offsetof(typeof(field), item), \ |
93 | sizeof(field.item), \ | 104 | sizeof(field.item), \ |
94 | is_signed_type(type), FILTER_OTHER); \ | 105 | is_signed_type(type), filter_type); \ |
95 | mutex_unlock(&event_storage_mutex); \ | 106 | mutex_unlock(&event_storage_mutex); \ |
96 | if (ret) \ | 107 | if (ret) \ |
97 | return ret; \ | 108 | return ret; \ |
@@ -104,7 +115,7 @@ static void __always_unused ____ftrace_check_##name(void) \ | |||
104 | offsetof(typeof(field), \ | 115 | offsetof(typeof(field), \ |
105 | container.item), \ | 116 | container.item), \ |
106 | sizeof(field.container.item), \ | 117 | sizeof(field.container.item), \ |
107 | is_signed_type(type), FILTER_OTHER); \ | 118 | is_signed_type(type), filter_type); \ |
108 | if (ret) \ | 119 | if (ret) \ |
109 | return ret; | 120 | return ret; |
110 | 121 | ||
@@ -112,17 +123,18 @@ static void __always_unused ____ftrace_check_##name(void) \ | |||
112 | #define __dynamic_array(type, item) \ | 123 | #define __dynamic_array(type, item) \ |
113 | ret = trace_define_field(event_call, #type, #item, \ | 124 | ret = trace_define_field(event_call, #type, #item, \ |
114 | offsetof(typeof(field), item), \ | 125 | offsetof(typeof(field), item), \ |
115 | 0, is_signed_type(type), FILTER_OTHER);\ | 126 | 0, is_signed_type(type), filter_type);\ |
116 | if (ret) \ | 127 | if (ret) \ |
117 | return ret; | 128 | return ret; |
118 | 129 | ||
119 | #undef FTRACE_ENTRY | 130 | #undef FTRACE_ENTRY |
120 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ | 131 | #define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ |
121 | int \ | 132 | int \ |
122 | ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ | 133 | ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ |
123 | { \ | 134 | { \ |
124 | struct struct_name field; \ | 135 | struct struct_name field; \ |
125 | int ret; \ | 136 | int ret; \ |
137 | int filter_type = filter; \ | ||
126 | \ | 138 | \ |
127 | tstruct; \ | 139 | tstruct; \ |
128 | \ | 140 | \ |
@@ -152,13 +164,15 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ | |||
152 | #undef F_printk | 164 | #undef F_printk |
153 | #define F_printk(fmt, args...) #fmt ", " __stringify(args) | 165 | #define F_printk(fmt, args...) #fmt ", " __stringify(args) |
154 | 166 | ||
155 | #undef FTRACE_ENTRY | 167 | #undef FTRACE_ENTRY_REG |
156 | #define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \ | 168 | #define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ |
169 | regfn) \ | ||
157 | \ | 170 | \ |
158 | struct ftrace_event_class event_class_ftrace_##call = { \ | 171 | struct ftrace_event_class event_class_ftrace_##call = { \ |
159 | .system = __stringify(TRACE_SYSTEM), \ | 172 | .system = __stringify(TRACE_SYSTEM), \ |
160 | .define_fields = ftrace_define_fields_##call, \ | 173 | .define_fields = ftrace_define_fields_##call, \ |
161 | .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ | 174 | .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ |
175 | .reg = regfn, \ | ||
162 | }; \ | 176 | }; \ |
163 | \ | 177 | \ |
164 | struct ftrace_event_call __used event_##call = { \ | 178 | struct ftrace_event_call __used event_##call = { \ |
@@ -170,4 +184,14 @@ struct ftrace_event_call __used event_##call = { \ | |||
170 | struct ftrace_event_call __used \ | 184 | struct ftrace_event_call __used \ |
171 | __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; | 185 | __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; |
172 | 186 | ||
187 | #undef FTRACE_ENTRY | ||
188 | #define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter) \ | ||
189 | FTRACE_ENTRY_REG(call, struct_name, etype, \ | ||
190 | PARAMS(tstruct), PARAMS(print), filter, NULL) | ||
191 | |||
192 | int ftrace_event_is_function(struct ftrace_event_call *call) | ||
193 | { | ||
194 | return call == &event_function; | ||
195 | } | ||
196 | |||
173 | #include "trace_entries.h" | 197 | #include "trace_entries.h" |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 00d527c945a4..580a05ec926b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -1892,7 +1892,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
1892 | #endif /* CONFIG_PERF_EVENTS */ | 1892 | #endif /* CONFIG_PERF_EVENTS */ |
1893 | 1893 | ||
1894 | static __kprobes | 1894 | static __kprobes |
1895 | int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) | 1895 | int kprobe_register(struct ftrace_event_call *event, |
1896 | enum trace_reg type, void *data) | ||
1896 | { | 1897 | { |
1897 | struct trace_probe *tp = (struct trace_probe *)event->data; | 1898 | struct trace_probe *tp = (struct trace_probe *)event->data; |
1898 | 1899 | ||
@@ -1909,6 +1910,11 @@ int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) | |||
1909 | case TRACE_REG_PERF_UNREGISTER: | 1910 | case TRACE_REG_PERF_UNREGISTER: |
1910 | disable_trace_probe(tp, TP_FLAG_PROFILE); | 1911 | disable_trace_probe(tp, TP_FLAG_PROFILE); |
1911 | return 0; | 1912 | return 0; |
1913 | case TRACE_REG_PERF_OPEN: | ||
1914 | case TRACE_REG_PERF_CLOSE: | ||
1915 | case TRACE_REG_PERF_ADD: | ||
1916 | case TRACE_REG_PERF_DEL: | ||
1917 | return 0; | ||
1912 | #endif | 1918 | #endif |
1913 | } | 1919 | } |
1914 | return 0; | 1920 | return 0; |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 0d6ff3555942..c5a01873567d 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -300,7 +300,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, | |||
300 | unsigned long mask; | 300 | unsigned long mask; |
301 | const char *str; | 301 | const char *str; |
302 | const char *ret = p->buffer + p->len; | 302 | const char *ret = p->buffer + p->len; |
303 | int i; | 303 | int i, first = 1; |
304 | 304 | ||
305 | for (i = 0; flag_array[i].name && flags; i++) { | 305 | for (i = 0; flag_array[i].name && flags; i++) { |
306 | 306 | ||
@@ -310,14 +310,16 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, | |||
310 | 310 | ||
311 | str = flag_array[i].name; | 311 | str = flag_array[i].name; |
312 | flags &= ~mask; | 312 | flags &= ~mask; |
313 | if (p->len && delim) | 313 | if (!first && delim) |
314 | trace_seq_puts(p, delim); | 314 | trace_seq_puts(p, delim); |
315 | else | ||
316 | first = 0; | ||
315 | trace_seq_puts(p, str); | 317 | trace_seq_puts(p, str); |
316 | } | 318 | } |
317 | 319 | ||
318 | /* check for left over flags */ | 320 | /* check for left over flags */ |
319 | if (flags) { | 321 | if (flags) { |
320 | if (p->len && delim) | 322 | if (!first && delim) |
321 | trace_seq_puts(p, delim); | 323 | trace_seq_puts(p, delim); |
322 | trace_seq_printf(p, "0x%lx", flags); | 324 | trace_seq_printf(p, "0x%lx", flags); |
323 | } | 325 | } |
@@ -344,7 +346,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, | |||
344 | break; | 346 | break; |
345 | } | 347 | } |
346 | 348 | ||
347 | if (!p->len) | 349 | if (ret == (const char *)(p->buffer + p->len)) |
348 | trace_seq_printf(p, "0x%lx", val); | 350 | trace_seq_printf(p, "0x%lx", val); |
349 | 351 | ||
350 | trace_seq_putc(p, 0); | 352 | trace_seq_putc(p, 0); |
@@ -370,7 +372,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, | |||
370 | break; | 372 | break; |
371 | } | 373 | } |
372 | 374 | ||
373 | if (!p->len) | 375 | if (ret == (const char *)(p->buffer + p->len)) |
374 | trace_seq_printf(p, "0x%llx", val); | 376 | trace_seq_printf(p, "0x%llx", val); |
375 | 377 | ||
376 | trace_seq_putc(p, 0); | 378 | trace_seq_putc(p, 0); |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index cb654542c1a1..96fc73369099 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -17,9 +17,9 @@ static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); | |||
17 | static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); | 17 | static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); |
18 | 18 | ||
19 | static int syscall_enter_register(struct ftrace_event_call *event, | 19 | static int syscall_enter_register(struct ftrace_event_call *event, |
20 | enum trace_reg type); | 20 | enum trace_reg type, void *data); |
21 | static int syscall_exit_register(struct ftrace_event_call *event, | 21 | static int syscall_exit_register(struct ftrace_event_call *event, |
22 | enum trace_reg type); | 22 | enum trace_reg type, void *data); |
23 | 23 | ||
24 | static int syscall_enter_define_fields(struct ftrace_event_call *call); | 24 | static int syscall_enter_define_fields(struct ftrace_event_call *call); |
25 | static int syscall_exit_define_fields(struct ftrace_event_call *call); | 25 | static int syscall_exit_define_fields(struct ftrace_event_call *call); |
@@ -468,8 +468,8 @@ int __init init_ftrace_syscalls(void) | |||
468 | unsigned long addr; | 468 | unsigned long addr; |
469 | int i; | 469 | int i; |
470 | 470 | ||
471 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * | 471 | syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata), |
472 | NR_syscalls, GFP_KERNEL); | 472 | GFP_KERNEL); |
473 | if (!syscalls_metadata) { | 473 | if (!syscalls_metadata) { |
474 | WARN_ON(1); | 474 | WARN_ON(1); |
475 | return -ENOMEM; | 475 | return -ENOMEM; |
@@ -649,7 +649,7 @@ void perf_sysexit_disable(struct ftrace_event_call *call) | |||
649 | #endif /* CONFIG_PERF_EVENTS */ | 649 | #endif /* CONFIG_PERF_EVENTS */ |
650 | 650 | ||
651 | static int syscall_enter_register(struct ftrace_event_call *event, | 651 | static int syscall_enter_register(struct ftrace_event_call *event, |
652 | enum trace_reg type) | 652 | enum trace_reg type, void *data) |
653 | { | 653 | { |
654 | switch (type) { | 654 | switch (type) { |
655 | case TRACE_REG_REGISTER: | 655 | case TRACE_REG_REGISTER: |
@@ -664,13 +664,18 @@ static int syscall_enter_register(struct ftrace_event_call *event, | |||
664 | case TRACE_REG_PERF_UNREGISTER: | 664 | case TRACE_REG_PERF_UNREGISTER: |
665 | perf_sysenter_disable(event); | 665 | perf_sysenter_disable(event); |
666 | return 0; | 666 | return 0; |
667 | case TRACE_REG_PERF_OPEN: | ||
668 | case TRACE_REG_PERF_CLOSE: | ||
669 | case TRACE_REG_PERF_ADD: | ||
670 | case TRACE_REG_PERF_DEL: | ||
671 | return 0; | ||
667 | #endif | 672 | #endif |
668 | } | 673 | } |
669 | return 0; | 674 | return 0; |
670 | } | 675 | } |
671 | 676 | ||
672 | static int syscall_exit_register(struct ftrace_event_call *event, | 677 | static int syscall_exit_register(struct ftrace_event_call *event, |
673 | enum trace_reg type) | 678 | enum trace_reg type, void *data) |
674 | { | 679 | { |
675 | switch (type) { | 680 | switch (type) { |
676 | case TRACE_REG_REGISTER: | 681 | case TRACE_REG_REGISTER: |
@@ -685,6 +690,11 @@ static int syscall_exit_register(struct ftrace_event_call *event, | |||
685 | case TRACE_REG_PERF_UNREGISTER: | 690 | case TRACE_REG_PERF_UNREGISTER: |
686 | perf_sysexit_disable(event); | 691 | perf_sysexit_disable(event); |
687 | return 0; | 692 | return 0; |
693 | case TRACE_REG_PERF_OPEN: | ||
694 | case TRACE_REG_PERF_CLOSE: | ||
695 | case TRACE_REG_PERF_ADD: | ||
696 | case TRACE_REG_PERF_DEL: | ||
697 | return 0; | ||
688 | #endif | 698 | #endif |
689 | } | 699 | } |
690 | return 0; | 700 | return 0; |
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index f1539decd99d..d96ba22dabfa 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <linux/err.h> | 25 | #include <linux/err.h> |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
28 | #include <linux/jump_label.h> | 28 | #include <linux/static_key.h> |
29 | 29 | ||
30 | extern struct tracepoint * const __start___tracepoints_ptrs[]; | 30 | extern struct tracepoint * const __start___tracepoints_ptrs[]; |
31 | extern struct tracepoint * const __stop___tracepoints_ptrs[]; | 31 | extern struct tracepoint * const __stop___tracepoints_ptrs[]; |
@@ -256,9 +256,9 @@ static void set_tracepoint(struct tracepoint_entry **entry, | |||
256 | { | 256 | { |
257 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); | 257 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); |
258 | 258 | ||
259 | if (elem->regfunc && !jump_label_enabled(&elem->key) && active) | 259 | if (elem->regfunc && !static_key_enabled(&elem->key) && active) |
260 | elem->regfunc(); | 260 | elem->regfunc(); |
261 | else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active) | 261 | else if (elem->unregfunc && static_key_enabled(&elem->key) && !active) |
262 | elem->unregfunc(); | 262 | elem->unregfunc(); |
263 | 263 | ||
264 | /* | 264 | /* |
@@ -269,10 +269,10 @@ static void set_tracepoint(struct tracepoint_entry **entry, | |||
269 | * is used. | 269 | * is used. |
270 | */ | 270 | */ |
271 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); | 271 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); |
272 | if (active && !jump_label_enabled(&elem->key)) | 272 | if (active && !static_key_enabled(&elem->key)) |
273 | jump_label_inc(&elem->key); | 273 | static_key_slow_inc(&elem->key); |
274 | else if (!active && jump_label_enabled(&elem->key)) | 274 | else if (!active && static_key_enabled(&elem->key)) |
275 | jump_label_dec(&elem->key); | 275 | static_key_slow_dec(&elem->key); |
276 | } | 276 | } |
277 | 277 | ||
278 | /* | 278 | /* |
@@ -283,11 +283,11 @@ static void set_tracepoint(struct tracepoint_entry **entry, | |||
283 | */ | 283 | */ |
284 | static void disable_tracepoint(struct tracepoint *elem) | 284 | static void disable_tracepoint(struct tracepoint *elem) |
285 | { | 285 | { |
286 | if (elem->unregfunc && jump_label_enabled(&elem->key)) | 286 | if (elem->unregfunc && static_key_enabled(&elem->key)) |
287 | elem->unregfunc(); | 287 | elem->unregfunc(); |
288 | 288 | ||
289 | if (jump_label_enabled(&elem->key)) | 289 | if (static_key_enabled(&elem->key)) |
290 | jump_label_dec(&elem->key); | 290 | static_key_slow_dec(&elem->key); |
291 | rcu_assign_pointer(elem->funcs, NULL); | 291 | rcu_assign_pointer(elem->funcs, NULL); |
292 | } | 292 | } |
293 | 293 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d117262deba3..14bc092fb12c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -3,12 +3,9 @@ | |||
3 | * | 3 | * |
4 | * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. | 4 | * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. |
5 | * | 5 | * |
6 | * this code detects hard lockups: incidents in where on a CPU | 6 | * Note: Most of this code is borrowed heavily from the original softlockup |
7 | * the kernel does not respond to anything except NMI. | 7 | * detector, so thanks to Ingo for the initial implementation. |
8 | * | 8 | * Some chunks also taken from the old x86-specific nmi watchdog code, thanks |
9 | * Note: Most of this code is borrowed heavily from softlockup.c, | ||
10 | * so thanks to Ingo for the initial implementation. | ||
11 | * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks | ||
12 | * to those contributors as well. | 9 | * to those contributors as well. |
13 | */ | 10 | */ |
14 | 11 | ||
@@ -117,9 +114,10 @@ static unsigned long get_sample_period(void) | |||
117 | { | 114 | { |
118 | /* | 115 | /* |
119 | * convert watchdog_thresh from seconds to ns | 116 | * convert watchdog_thresh from seconds to ns |
120 | * the divide by 5 is to give hrtimer 5 chances to | 117 | * the divide by 5 is to give hrtimer several chances (two |
121 | * increment before the hardlockup detector generates | 118 | * or three with the current relation between the soft |
122 | * a warning | 119 | * and hard thresholds) to increment before the |
120 | * hardlockup detector generates a warning | ||
123 | */ | 121 | */ |
124 | return get_softlockup_thresh() * (NSEC_PER_SEC / 5); | 122 | return get_softlockup_thresh() * (NSEC_PER_SEC / 5); |
125 | } | 123 | } |
@@ -336,9 +334,11 @@ static int watchdog(void *unused) | |||
336 | 334 | ||
337 | set_current_state(TASK_INTERRUPTIBLE); | 335 | set_current_state(TASK_INTERRUPTIBLE); |
338 | /* | 336 | /* |
339 | * Run briefly once per second to reset the softlockup timestamp. | 337 | * Run briefly (kicked by the hrtimer callback function) once every |
340 | * If this gets delayed for more than 60 seconds then the | 338 | * get_sample_period() seconds (4 seconds by default) to reset the |
341 | * debug-printout triggers in watchdog_timer_fn(). | 339 | * softlockup timestamp. If this gets delayed for more than |
340 | * 2*watchdog_thresh seconds then the debug-printout triggers in | ||
341 | * watchdog_timer_fn(). | ||
342 | */ | 342 | */ |
343 | while (!kthread_should_stop()) { | 343 | while (!kthread_should_stop()) { |
344 | __touch_watchdog(); | 344 | __touch_watchdog(); |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d27a2aa3e815..05037dc9bde7 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -166,18 +166,21 @@ config LOCKUP_DETECTOR | |||
166 | hard and soft lockups. | 166 | hard and soft lockups. |
167 | 167 | ||
168 | Softlockups are bugs that cause the kernel to loop in kernel | 168 | Softlockups are bugs that cause the kernel to loop in kernel |
169 | mode for more than 60 seconds, without giving other tasks a | 169 | mode for more than 20 seconds, without giving other tasks a |
170 | chance to run. The current stack trace is displayed upon | 170 | chance to run. The current stack trace is displayed upon |
171 | detection and the system will stay locked up. | 171 | detection and the system will stay locked up. |
172 | 172 | ||
173 | Hardlockups are bugs that cause the CPU to loop in kernel mode | 173 | Hardlockups are bugs that cause the CPU to loop in kernel mode |
174 | for more than 60 seconds, without letting other interrupts have a | 174 | for more than 10 seconds, without letting other interrupts have a |
175 | chance to run. The current stack trace is displayed upon detection | 175 | chance to run. The current stack trace is displayed upon detection |
176 | and the system will stay locked up. | 176 | and the system will stay locked up. |
177 | 177 | ||
178 | The overhead should be minimal. A periodic hrtimer runs to | 178 | The overhead should be minimal. A periodic hrtimer runs to |
179 | generate interrupts and kick the watchdog task every 10-12 seconds. | 179 | generate interrupts and kick the watchdog task every 4 seconds. |
180 | An NMI is generated every 60 seconds or so to check for hardlockups. | 180 | An NMI is generated every 10 seconds or so to check for hardlockups. |
181 | |||
182 | The frequency of hrtimer and NMI events and the soft and hard lockup | ||
183 | thresholds can be controlled through the sysctl watchdog_thresh. | ||
181 | 184 | ||
182 | config HARDLOCKUP_DETECTOR | 185 | config HARDLOCKUP_DETECTOR |
183 | def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ | 186 | def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ |
@@ -189,7 +192,8 @@ config BOOTPARAM_HARDLOCKUP_PANIC | |||
189 | help | 192 | help |
190 | Say Y here to enable the kernel to panic on "hard lockups", | 193 | Say Y here to enable the kernel to panic on "hard lockups", |
191 | which are bugs that cause the kernel to loop in kernel | 194 | which are bugs that cause the kernel to loop in kernel |
192 | mode with interrupts disabled for more than 60 seconds. | 195 | mode with interrupts disabled for more than 10 seconds (configurable |
196 | using the watchdog_thresh sysctl). | ||
193 | 197 | ||
194 | Say N if unsure. | 198 | Say N if unsure. |
195 | 199 | ||
@@ -206,8 +210,8 @@ config BOOTPARAM_SOFTLOCKUP_PANIC | |||
206 | help | 210 | help |
207 | Say Y here to enable the kernel to panic on "soft lockups", | 211 | Say Y here to enable the kernel to panic on "soft lockups", |
208 | which are bugs that cause the kernel to loop in kernel | 212 | which are bugs that cause the kernel to loop in kernel |
209 | mode for more than 60 seconds, without giving other tasks a | 213 | mode for more than 20 seconds (configurable using the watchdog_thresh |
210 | chance to run. | 214 | sysctl), without giving other tasks a chance to run. |
211 | 215 | ||
212 | The panic can be used in combination with panic_timeout, | 216 | The panic can be used in combination with panic_timeout, |
213 | to cause the system to reboot automatically after a | 217 | to cause the system to reboot automatically after a |
diff --git a/net/core/dev.c b/net/core/dev.c index 6ca32f6b3105..6982bfd6a781 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -134,7 +134,7 @@ | |||
134 | #include <linux/inetdevice.h> | 134 | #include <linux/inetdevice.h> |
135 | #include <linux/cpu_rmap.h> | 135 | #include <linux/cpu_rmap.h> |
136 | #include <linux/net_tstamp.h> | 136 | #include <linux/net_tstamp.h> |
137 | #include <linux/jump_label.h> | 137 | #include <linux/static_key.h> |
138 | #include <net/flow_keys.h> | 138 | #include <net/flow_keys.h> |
139 | 139 | ||
140 | #include "net-sysfs.h" | 140 | #include "net-sysfs.h" |
@@ -1441,11 +1441,11 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) | |||
1441 | } | 1441 | } |
1442 | EXPORT_SYMBOL(call_netdevice_notifiers); | 1442 | EXPORT_SYMBOL(call_netdevice_notifiers); |
1443 | 1443 | ||
1444 | static struct jump_label_key netstamp_needed __read_mostly; | 1444 | static struct static_key netstamp_needed __read_mostly; |
1445 | #ifdef HAVE_JUMP_LABEL | 1445 | #ifdef HAVE_JUMP_LABEL |
1446 | /* We are not allowed to call jump_label_dec() from irq context | 1446 | /* We are not allowed to call static_key_slow_dec() from irq context |
1447 | * If net_disable_timestamp() is called from irq context, defer the | 1447 | * If net_disable_timestamp() is called from irq context, defer the |
1448 | * jump_label_dec() calls. | 1448 | * static_key_slow_dec() calls. |
1449 | */ | 1449 | */ |
1450 | static atomic_t netstamp_needed_deferred; | 1450 | static atomic_t netstamp_needed_deferred; |
1451 | #endif | 1451 | #endif |
@@ -1457,12 +1457,12 @@ void net_enable_timestamp(void) | |||
1457 | 1457 | ||
1458 | if (deferred) { | 1458 | if (deferred) { |
1459 | while (--deferred) | 1459 | while (--deferred) |
1460 | jump_label_dec(&netstamp_needed); | 1460 | static_key_slow_dec(&netstamp_needed); |
1461 | return; | 1461 | return; |
1462 | } | 1462 | } |
1463 | #endif | 1463 | #endif |
1464 | WARN_ON(in_interrupt()); | 1464 | WARN_ON(in_interrupt()); |
1465 | jump_label_inc(&netstamp_needed); | 1465 | static_key_slow_inc(&netstamp_needed); |
1466 | } | 1466 | } |
1467 | EXPORT_SYMBOL(net_enable_timestamp); | 1467 | EXPORT_SYMBOL(net_enable_timestamp); |
1468 | 1468 | ||
@@ -1474,19 +1474,19 @@ void net_disable_timestamp(void) | |||
1474 | return; | 1474 | return; |
1475 | } | 1475 | } |
1476 | #endif | 1476 | #endif |
1477 | jump_label_dec(&netstamp_needed); | 1477 | static_key_slow_dec(&netstamp_needed); |
1478 | } | 1478 | } |
1479 | EXPORT_SYMBOL(net_disable_timestamp); | 1479 | EXPORT_SYMBOL(net_disable_timestamp); |
1480 | 1480 | ||
1481 | static inline void net_timestamp_set(struct sk_buff *skb) | 1481 | static inline void net_timestamp_set(struct sk_buff *skb) |
1482 | { | 1482 | { |
1483 | skb->tstamp.tv64 = 0; | 1483 | skb->tstamp.tv64 = 0; |
1484 | if (static_branch(&netstamp_needed)) | 1484 | if (static_key_false(&netstamp_needed)) |
1485 | __net_timestamp(skb); | 1485 | __net_timestamp(skb); |
1486 | } | 1486 | } |
1487 | 1487 | ||
1488 | #define net_timestamp_check(COND, SKB) \ | 1488 | #define net_timestamp_check(COND, SKB) \ |
1489 | if (static_branch(&netstamp_needed)) { \ | 1489 | if (static_key_false(&netstamp_needed)) { \ |
1490 | if ((COND) && !(SKB)->tstamp.tv64) \ | 1490 | if ((COND) && !(SKB)->tstamp.tv64) \ |
1491 | __net_timestamp(SKB); \ | 1491 | __net_timestamp(SKB); \ |
1492 | } \ | 1492 | } \ |
@@ -2660,7 +2660,7 @@ EXPORT_SYMBOL(__skb_get_rxhash); | |||
2660 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; | 2660 | struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; |
2661 | EXPORT_SYMBOL(rps_sock_flow_table); | 2661 | EXPORT_SYMBOL(rps_sock_flow_table); |
2662 | 2662 | ||
2663 | struct jump_label_key rps_needed __read_mostly; | 2663 | struct static_key rps_needed __read_mostly; |
2664 | 2664 | ||
2665 | static struct rps_dev_flow * | 2665 | static struct rps_dev_flow * |
2666 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, | 2666 | set_rps_cpu(struct net_device *dev, struct sk_buff *skb, |
@@ -2945,7 +2945,7 @@ int netif_rx(struct sk_buff *skb) | |||
2945 | 2945 | ||
2946 | trace_netif_rx(skb); | 2946 | trace_netif_rx(skb); |
2947 | #ifdef CONFIG_RPS | 2947 | #ifdef CONFIG_RPS |
2948 | if (static_branch(&rps_needed)) { | 2948 | if (static_key_false(&rps_needed)) { |
2949 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 2949 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
2950 | int cpu; | 2950 | int cpu; |
2951 | 2951 | ||
@@ -3309,7 +3309,7 @@ int netif_receive_skb(struct sk_buff *skb) | |||
3309 | return NET_RX_SUCCESS; | 3309 | return NET_RX_SUCCESS; |
3310 | 3310 | ||
3311 | #ifdef CONFIG_RPS | 3311 | #ifdef CONFIG_RPS |
3312 | if (static_branch(&rps_needed)) { | 3312 | if (static_key_false(&rps_needed)) { |
3313 | struct rps_dev_flow voidflow, *rflow = &voidflow; | 3313 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
3314 | int cpu, ret; | 3314 | int cpu, ret; |
3315 | 3315 | ||
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a1727cda03d7..495586232aa1 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
@@ -608,10 +608,10 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, | |||
608 | spin_unlock(&rps_map_lock); | 608 | spin_unlock(&rps_map_lock); |
609 | 609 | ||
610 | if (map) | 610 | if (map) |
611 | jump_label_inc(&rps_needed); | 611 | static_key_slow_inc(&rps_needed); |
612 | if (old_map) { | 612 | if (old_map) { |
613 | kfree_rcu(old_map, rcu); | 613 | kfree_rcu(old_map, rcu); |
614 | jump_label_dec(&rps_needed); | 614 | static_key_slow_dec(&rps_needed); |
615 | } | 615 | } |
616 | free_cpumask_var(mask); | 616 | free_cpumask_var(mask); |
617 | return len; | 617 | return len; |
diff --git a/net/core/sock.c b/net/core/sock.c index 02f8dfe320b7..95aff9c7419b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -111,7 +111,7 @@ | |||
111 | #include <linux/init.h> | 111 | #include <linux/init.h> |
112 | #include <linux/highmem.h> | 112 | #include <linux/highmem.h> |
113 | #include <linux/user_namespace.h> | 113 | #include <linux/user_namespace.h> |
114 | #include <linux/jump_label.h> | 114 | #include <linux/static_key.h> |
115 | #include <linux/memcontrol.h> | 115 | #include <linux/memcontrol.h> |
116 | 116 | ||
117 | #include <asm/uaccess.h> | 117 | #include <asm/uaccess.h> |
@@ -184,7 +184,7 @@ void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) | |||
184 | static struct lock_class_key af_family_keys[AF_MAX]; | 184 | static struct lock_class_key af_family_keys[AF_MAX]; |
185 | static struct lock_class_key af_family_slock_keys[AF_MAX]; | 185 | static struct lock_class_key af_family_slock_keys[AF_MAX]; |
186 | 186 | ||
187 | struct jump_label_key memcg_socket_limit_enabled; | 187 | struct static_key memcg_socket_limit_enabled; |
188 | EXPORT_SYMBOL(memcg_socket_limit_enabled); | 188 | EXPORT_SYMBOL(memcg_socket_limit_enabled); |
189 | 189 | ||
190 | /* | 190 | /* |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d05559d4d9cd..0c2850874254 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -69,9 +69,9 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, | |||
69 | if (sock_table != orig_sock_table) { | 69 | if (sock_table != orig_sock_table) { |
70 | rcu_assign_pointer(rps_sock_flow_table, sock_table); | 70 | rcu_assign_pointer(rps_sock_flow_table, sock_table); |
71 | if (sock_table) | 71 | if (sock_table) |
72 | jump_label_inc(&rps_needed); | 72 | static_key_slow_inc(&rps_needed); |
73 | if (orig_sock_table) { | 73 | if (orig_sock_table) { |
74 | jump_label_dec(&rps_needed); | 74 | static_key_slow_dec(&rps_needed); |
75 | synchronize_rcu(); | 75 | synchronize_rcu(); |
76 | vfree(orig_sock_table); | 76 | vfree(orig_sock_table); |
77 | } | 77 | } |
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 49978788a9dc..602fb305365f 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c | |||
@@ -111,7 +111,7 @@ void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) | |||
111 | val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | 111 | val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); |
112 | 112 | ||
113 | if (val != RESOURCE_MAX) | 113 | if (val != RESOURCE_MAX) |
114 | jump_label_dec(&memcg_socket_limit_enabled); | 114 | static_key_slow_dec(&memcg_socket_limit_enabled); |
115 | } | 115 | } |
116 | EXPORT_SYMBOL(tcp_destroy_cgroup); | 116 | EXPORT_SYMBOL(tcp_destroy_cgroup); |
117 | 117 | ||
@@ -143,9 +143,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) | |||
143 | net->ipv4.sysctl_tcp_mem[i]); | 143 | net->ipv4.sysctl_tcp_mem[i]); |
144 | 144 | ||
145 | if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) | 145 | if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) |
146 | jump_label_dec(&memcg_socket_limit_enabled); | 146 | static_key_slow_dec(&memcg_socket_limit_enabled); |
147 | else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) | 147 | else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) |
148 | jump_label_inc(&memcg_socket_limit_enabled); | 148 | static_key_slow_inc(&memcg_socket_limit_enabled); |
149 | 149 | ||
150 | return 0; | 150 | return 0; |
151 | } | 151 | } |
diff --git a/net/netfilter/core.c b/net/netfilter/core.c index b4e8ff05b301..e1b7e051332e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c | |||
@@ -56,7 +56,7 @@ struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; | |||
56 | EXPORT_SYMBOL(nf_hooks); | 56 | EXPORT_SYMBOL(nf_hooks); |
57 | 57 | ||
58 | #if defined(CONFIG_JUMP_LABEL) | 58 | #if defined(CONFIG_JUMP_LABEL) |
59 | struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; | 59 | struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; |
60 | EXPORT_SYMBOL(nf_hooks_needed); | 60 | EXPORT_SYMBOL(nf_hooks_needed); |
61 | #endif | 61 | #endif |
62 | 62 | ||
@@ -77,7 +77,7 @@ int nf_register_hook(struct nf_hook_ops *reg) | |||
77 | list_add_rcu(®->list, elem->list.prev); | 77 | list_add_rcu(®->list, elem->list.prev); |
78 | mutex_unlock(&nf_hook_mutex); | 78 | mutex_unlock(&nf_hook_mutex); |
79 | #if defined(CONFIG_JUMP_LABEL) | 79 | #if defined(CONFIG_JUMP_LABEL) |
80 | jump_label_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); | 80 | static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); |
81 | #endif | 81 | #endif |
82 | return 0; | 82 | return 0; |
83 | } | 83 | } |
@@ -89,7 +89,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) | |||
89 | list_del_rcu(®->list); | 89 | list_del_rcu(®->list); |
90 | mutex_unlock(&nf_hook_mutex); | 90 | mutex_unlock(&nf_hook_mutex); |
91 | #if defined(CONFIG_JUMP_LABEL) | 91 | #if defined(CONFIG_JUMP_LABEL) |
92 | jump_label_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); | 92 | static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); |
93 | #endif | 93 | #endif |
94 | synchronize_net(); | 94 | synchronize_net(); |
95 | } | 95 | } |
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 4626a398836a..ca600e09c8d4 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile | |||
@@ -1,3 +1,10 @@ | |||
1 | OUTPUT := ./ | ||
2 | ifeq ("$(origin O)", "command line") | ||
3 | ifneq ($(O),) | ||
4 | OUTPUT := $(O)/ | ||
5 | endif | ||
6 | endif | ||
7 | |||
1 | MAN1_TXT= \ | 8 | MAN1_TXT= \ |
2 | $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ | 9 | $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ |
3 | $(wildcard perf-*.txt)) \ | 10 | $(wildcard perf-*.txt)) \ |
@@ -6,10 +13,11 @@ MAN5_TXT= | |||
6 | MAN7_TXT= | 13 | MAN7_TXT= |
7 | 14 | ||
8 | MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT) | 15 | MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT) |
9 | MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) | 16 | _MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) |
10 | MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) | 17 | _MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) |
11 | 18 | ||
12 | DOC_HTML=$(MAN_HTML) | 19 | MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML)) |
20 | MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML)) | ||
13 | 21 | ||
14 | ARTICLES = | 22 | ARTICLES = |
15 | # with their own formatting rules. | 23 | # with their own formatting rules. |
@@ -18,11 +26,17 @@ API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technica | |||
18 | SP_ARTICLES += $(API_DOCS) | 26 | SP_ARTICLES += $(API_DOCS) |
19 | SP_ARTICLES += technical/api-index | 27 | SP_ARTICLES += technical/api-index |
20 | 28 | ||
21 | DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES)) | 29 | _DOC_HTML = $(_MAN_HTML) |
30 | _DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES)) | ||
31 | DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML)) | ||
22 | 32 | ||
23 | DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) | 33 | _DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) |
24 | DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) | 34 | _DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) |
25 | DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) | 35 | _DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) |
36 | |||
37 | DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1)) | ||
38 | DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5)) | ||
39 | DOC_MAN7=$(addprefix $(OUTPUT),$(_DOC_MAN7)) | ||
26 | 40 | ||
27 | # Make the path relative to DESTDIR, not prefix | 41 | # Make the path relative to DESTDIR, not prefix |
28 | ifndef DESTDIR | 42 | ifndef DESTDIR |
@@ -150,9 +164,9 @@ man1: $(DOC_MAN1) | |||
150 | man5: $(DOC_MAN5) | 164 | man5: $(DOC_MAN5) |
151 | man7: $(DOC_MAN7) | 165 | man7: $(DOC_MAN7) |
152 | 166 | ||
153 | info: perf.info perfman.info | 167 | info: $(OUTPUT)perf.info $(OUTPUT)perfman.info |
154 | 168 | ||
155 | pdf: user-manual.pdf | 169 | pdf: $(OUTPUT)user-manual.pdf |
156 | 170 | ||
157 | install: install-man | 171 | install: install-man |
158 | 172 | ||
@@ -166,7 +180,7 @@ install-man: man | |||
166 | 180 | ||
167 | install-info: info | 181 | install-info: info |
168 | $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) | 182 | $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) |
169 | $(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir) | 183 | $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir) |
170 | if test -r $(DESTDIR)$(infodir)/dir; then \ | 184 | if test -r $(DESTDIR)$(infodir)/dir; then \ |
171 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ | 185 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ |
172 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ | 186 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ |
@@ -176,7 +190,7 @@ install-info: info | |||
176 | 190 | ||
177 | install-pdf: pdf | 191 | install-pdf: pdf |
178 | $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) | 192 | $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) |
179 | $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir) | 193 | $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir) |
180 | 194 | ||
181 | #install-html: html | 195 | #install-html: html |
182 | # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) | 196 | # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) |
@@ -189,14 +203,14 @@ install-pdf: pdf | |||
189 | # | 203 | # |
190 | # Determine "include::" file references in asciidoc files. | 204 | # Determine "include::" file references in asciidoc files. |
191 | # | 205 | # |
192 | doc.dep : $(wildcard *.txt) build-docdep.perl | 206 | $(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl |
193 | $(QUIET_GEN)$(RM) $@+ $@ && \ | 207 | $(QUIET_GEN)$(RM) $@+ $@ && \ |
194 | $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ | 208 | $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ |
195 | mv $@+ $@ | 209 | mv $@+ $@ |
196 | 210 | ||
197 | -include doc.dep | 211 | -include $(OUPTUT)doc.dep |
198 | 212 | ||
199 | cmds_txt = cmds-ancillaryinterrogators.txt \ | 213 | _cmds_txt = cmds-ancillaryinterrogators.txt \ |
200 | cmds-ancillarymanipulators.txt \ | 214 | cmds-ancillarymanipulators.txt \ |
201 | cmds-mainporcelain.txt \ | 215 | cmds-mainporcelain.txt \ |
202 | cmds-plumbinginterrogators.txt \ | 216 | cmds-plumbinginterrogators.txt \ |
@@ -205,32 +219,36 @@ cmds_txt = cmds-ancillaryinterrogators.txt \ | |||
205 | cmds-synchelpers.txt \ | 219 | cmds-synchelpers.txt \ |
206 | cmds-purehelpers.txt \ | 220 | cmds-purehelpers.txt \ |
207 | cmds-foreignscminterface.txt | 221 | cmds-foreignscminterface.txt |
222 | cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt)) | ||
208 | 223 | ||
209 | $(cmds_txt): cmd-list.made | 224 | $(cmds_txt): $(OUTPUT)cmd-list.made |
210 | 225 | ||
211 | cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) | 226 | $(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) |
212 | $(QUIET_GEN)$(RM) $@ && \ | 227 | $(QUIET_GEN)$(RM) $@ && \ |
213 | $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ | 228 | $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ |
214 | date >$@ | 229 | date >$@ |
215 | 230 | ||
216 | clean: | 231 | clean: |
217 | $(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7 | 232 | $(RM) $(MAN_XML) $(addsuffix +,$(MAN_XML)) |
218 | $(RM) *.texi *.texi+ *.texi++ perf.info perfman.info | 233 | $(RM) $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) |
219 | $(RM) howto-index.txt howto/*.html doc.dep | 234 | $(RM) $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) |
220 | $(RM) technical/api-*.html technical/api-index.txt | 235 | $(RM) $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ |
221 | $(RM) $(cmds_txt) *.made | 236 | $(RM) $(OUTPUT)perf.info $(OUTPUT)perfman.info |
222 | 237 | $(RM) $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep | |
223 | $(MAN_HTML): %.html : %.txt | 238 | $(RM) $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt |
239 | $(RM) $(cmds_txt) $(OUTPUT)*.made | ||
240 | |||
241 | $(MAN_HTML): $(OUTPUT)%.html : %.txt | ||
224 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ | 242 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ |
225 | $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ | 243 | $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ |
226 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ | 244 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ |
227 | mv $@+ $@ | 245 | mv $@+ $@ |
228 | 246 | ||
229 | %.1 %.5 %.7 : %.xml | 247 | $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml |
230 | $(QUIET_XMLTO)$(RM) $@ && \ | 248 | $(QUIET_XMLTO)$(RM) $@ && \ |
231 | xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< | 249 | xmlto -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< |
232 | 250 | ||
233 | %.xml : %.txt | 251 | $(OUTPUT)%.xml : %.txt |
234 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ | 252 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ |
235 | $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ | 253 | $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ |
236 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ | 254 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ |
@@ -239,25 +257,25 @@ $(MAN_HTML): %.html : %.txt | |||
239 | XSLT = docbook.xsl | 257 | XSLT = docbook.xsl |
240 | XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css | 258 | XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css |
241 | 259 | ||
242 | user-manual.html: user-manual.xml | 260 | $(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml |
243 | $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< | 261 | $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< |
244 | 262 | ||
245 | perf.info: user-manual.texi | 263 | $(OUTPUT)perf.info: $(OUTPUT)user-manual.texi |
246 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi | 264 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi |
247 | 265 | ||
248 | user-manual.texi: user-manual.xml | 266 | $(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml |
249 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ | 267 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ |
250 | $(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ | 268 | $(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ |
251 | $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ | 269 | $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ |
252 | rm $@++ && \ | 270 | rm $@++ && \ |
253 | mv $@+ $@ | 271 | mv $@+ $@ |
254 | 272 | ||
255 | user-manual.pdf: user-manual.xml | 273 | $(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml |
256 | $(QUIET_DBLATEX)$(RM) $@+ $@ && \ | 274 | $(QUIET_DBLATEX)$(RM) $@+ $@ && \ |
257 | $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \ | 275 | $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \ |
258 | mv $@+ $@ | 276 | mv $@+ $@ |
259 | 277 | ||
260 | perfman.texi: $(MAN_XML) cat-texi.perl | 278 | $(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl |
261 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ | 279 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ |
262 | ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \ | 280 | ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \ |
263 | --to-stdout $(xml) &&) true) > $@++ && \ | 281 | --to-stdout $(xml) &&) true) > $@++ && \ |
@@ -265,7 +283,7 @@ perfman.texi: $(MAN_XML) cat-texi.perl | |||
265 | rm $@++ && \ | 283 | rm $@++ && \ |
266 | mv $@+ $@ | 284 | mv $@+ $@ |
267 | 285 | ||
268 | perfman.info: perfman.texi | 286 | $(OUTPUT)perfman.info: $(OUTPUT)perfman.texi |
269 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi | 287 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi |
270 | 288 | ||
271 | $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml | 289 | $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml |
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index d6b2a4f2108b..c7f5f55634ac 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt | |||
@@ -8,7 +8,7 @@ perf-lock - Analyze lock events | |||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf lock' {record|report|trace} | 11 | 'perf lock' {record|report|script|info} |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
@@ -20,10 +20,13 @@ and statistics with this 'perf lock' command. | |||
20 | produces the file "perf.data" which contains tracing | 20 | produces the file "perf.data" which contains tracing |
21 | results of lock events. | 21 | results of lock events. |
22 | 22 | ||
23 | 'perf lock trace' shows raw lock events. | ||
24 | |||
25 | 'perf lock report' reports statistical data. | 23 | 'perf lock report' reports statistical data. |
26 | 24 | ||
25 | 'perf lock script' shows raw lock events. | ||
26 | |||
27 | 'perf lock info' shows metadata like threads or addresses | ||
28 | of lock instances. | ||
29 | |||
27 | COMMON OPTIONS | 30 | COMMON OPTIONS |
28 | -------------- | 31 | -------------- |
29 | 32 | ||
@@ -47,6 +50,17 @@ REPORT OPTIONS | |||
47 | Sorting key. Possible values: acquired (default), contended, | 50 | Sorting key. Possible values: acquired (default), contended, |
48 | wait_total, wait_max, wait_min. | 51 | wait_total, wait_max, wait_min. |
49 | 52 | ||
53 | INFO OPTIONS | ||
54 | ------------ | ||
55 | |||
56 | -t:: | ||
57 | --threads:: | ||
58 | dump thread list in perf.data | ||
59 | |||
60 | -m:: | ||
61 | --map:: | ||
62 | dump map of lock instances (address:name table) | ||
63 | |||
50 | SEE ALSO | 64 | SEE ALSO |
51 | -------- | 65 | -------- |
52 | linkperf:perf[1] | 66 | linkperf:perf[1] |
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 2937f7e14bb7..a1386b2fff00 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt | |||
@@ -52,11 +52,15 @@ OPTIONS | |||
52 | 52 | ||
53 | -p:: | 53 | -p:: |
54 | --pid=:: | 54 | --pid=:: |
55 | Record events on existing process ID. | 55 | Record events on existing process ID (comma separated list). |
56 | 56 | ||
57 | -t:: | 57 | -t:: |
58 | --tid=:: | 58 | --tid=:: |
59 | Record events on existing thread ID. | 59 | Record events on existing thread ID (comma separated list). |
60 | |||
61 | -u:: | ||
62 | --uid=:: | ||
63 | Record events in threads owned by uid. Name or number. | ||
60 | 64 | ||
61 | -r:: | 65 | -r:: |
62 | --realtime=:: | 66 | --realtime=:: |
@@ -148,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha | |||
148 | corresponding events, i.e., they always refer to events defined earlier on the command | 152 | corresponding events, i.e., they always refer to events defined earlier on the command |
149 | line. | 153 | line. |
150 | 154 | ||
155 | -b:: | ||
156 | --branch-any:: | ||
157 | Enable taken branch stack sampling. Any type of taken branch may be sampled. | ||
158 | This is a shortcut for --branch-filter any. See --branch-filter for more infos. | ||
159 | |||
160 | -j:: | ||
161 | --branch-filter:: | ||
162 | Enable taken branch stack sampling. Each sample captures a series of consecutive | ||
163 | taken branches. The number of branches captured with each sample depends on the | ||
164 | underlying hardware, the type of branches of interest, and the executed code. | ||
165 | It is possible to select the types of branches captured by enabling filters. The | ||
166 | following filters are defined: | ||
167 | |||
168 | - any: any type of branches | ||
169 | - any_call: any function call or system call | ||
170 | - any_ret: any function return or system call return | ||
171 | - any_ind: any indirect branch | ||
172 | - u: only when the branch target is at the user level | ||
173 | - k: only when the branch target is in the kernel | ||
174 | - hv: only when the target is at the hypervisor level | ||
175 | |||
176 | + | ||
177 | The option requires at least one branch type among any, any_call, any_ret, ind_call. | ||
178 | The privilege levels may be ommitted, in which case, the privilege levels of the associated | ||
179 | event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege | ||
180 | levels are subject to permissions. When sampling on multiple events, branch stack sampling | ||
181 | is enabled for all the sampling events. The sampled branch type is the same for all events. | ||
182 | The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k | ||
183 | Note that this feature may not be available on all processors. | ||
184 | |||
151 | SEE ALSO | 185 | SEE ALSO |
152 | -------- | 186 | -------- |
153 | linkperf:perf-stat[1], linkperf:perf-list[1] | 187 | linkperf:perf-stat[1], linkperf:perf-list[1] |
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 9b430e98712e..87feeee8b90c 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
@@ -153,6 +153,16 @@ OPTIONS | |||
153 | information which may be very large and thus may clutter the display. | 153 | information which may be very large and thus may clutter the display. |
154 | It currently includes: cpu and numa topology of the host system. | 154 | It currently includes: cpu and numa topology of the host system. |
155 | 155 | ||
156 | -b:: | ||
157 | --branch-stack:: | ||
158 | Use the addresses of sampled taken branches instead of the instruction | ||
159 | address to build the histograms. To generate meaningful output, the | ||
160 | perf.data file must have been obtained using perf record -b or | ||
161 | perf record --branch-filter xxx where xxx is a branch filter option. | ||
162 | perf report is able to auto-detect whether a perf.data file contains | ||
163 | branch stacks and it will automatically switch to the branch view mode, | ||
164 | unless --no-branch-stack is used. | ||
165 | |||
156 | SEE ALSO | 166 | SEE ALSO |
157 | -------- | 167 | -------- |
158 | linkperf:perf-stat[1], linkperf:perf-annotate[1] | 168 | linkperf:perf-stat[1], linkperf:perf-annotate[1] |
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 2f6cef43da25..e9cbfcddfa3f 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt | |||
@@ -115,7 +115,7 @@ OPTIONS | |||
115 | -f:: | 115 | -f:: |
116 | --fields:: | 116 | --fields:: |
117 | Comma separated list of fields to print. Options are: | 117 | Comma separated list of fields to print. Options are: |
118 | comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr. | 118 | comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff. |
119 | Field list can be prepended with the type, trace, sw or hw, | 119 | Field list can be prepended with the type, trace, sw or hw, |
120 | to indicate to which event type the field list applies. | 120 | to indicate to which event type the field list applies. |
121 | e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace | 121 | e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace |
@@ -200,6 +200,9 @@ OPTIONS | |||
200 | It currently includes: cpu and numa topology of the host system. | 200 | It currently includes: cpu and numa topology of the host system. |
201 | It can only be used with the perf script report mode. | 201 | It can only be used with the perf script report mode. |
202 | 202 | ||
203 | --show-kernel-path:: | ||
204 | Try to resolve the path of [kernel.kallsyms] | ||
205 | |||
203 | SEE ALSO | 206 | SEE ALSO |
204 | -------- | 207 | -------- |
205 | linkperf:perf-record[1], linkperf:perf-script-perl[1], | 208 | linkperf:perf-record[1], linkperf:perf-script-perl[1], |
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 8966b9ab2014..2fa173b51970 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
@@ -35,11 +35,11 @@ OPTIONS | |||
35 | child tasks do not inherit counters | 35 | child tasks do not inherit counters |
36 | -p:: | 36 | -p:: |
37 | --pid=<pid>:: | 37 | --pid=<pid>:: |
38 | stat events on existing process id | 38 | stat events on existing process id (comma separated list) |
39 | 39 | ||
40 | -t:: | 40 | -t:: |
41 | --tid=<tid>:: | 41 | --tid=<tid>:: |
42 | stat events on existing thread id | 42 | stat events on existing thread id (comma separated list) |
43 | 43 | ||
44 | 44 | ||
45 | -a:: | 45 | -a:: |
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index b1a5bbbfebef..4a5680cb242e 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt | |||
@@ -72,11 +72,15 @@ Default is to monitor all CPUS. | |||
72 | 72 | ||
73 | -p <pid>:: | 73 | -p <pid>:: |
74 | --pid=<pid>:: | 74 | --pid=<pid>:: |
75 | Profile events on existing Process ID. | 75 | Profile events on existing Process ID (comma separated list). |
76 | 76 | ||
77 | -t <tid>:: | 77 | -t <tid>:: |
78 | --tid=<tid>:: | 78 | --tid=<tid>:: |
79 | Profile events on existing thread ID. | 79 | Profile events on existing thread ID (comma separated list). |
80 | |||
81 | -u:: | ||
82 | --uid=:: | ||
83 | Record events in threads owned by uid. Name or number. | ||
80 | 84 | ||
81 | -r <priority>:: | 85 | -r <priority>:: |
82 | --realtime=<priority>:: | 86 | --realtime=<priority>:: |
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 1078c5fadd5b..5476bc0a1eac 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST | |||
@@ -9,6 +9,7 @@ lib/rbtree.c | |||
9 | include/linux/swab.h | 9 | include/linux/swab.h |
10 | arch/*/include/asm/unistd*.h | 10 | arch/*/include/asm/unistd*.h |
11 | arch/*/lib/memcpy*.S | 11 | arch/*/lib/memcpy*.S |
12 | arch/*/lib/memset*.S | ||
12 | include/linux/poison.h | 13 | include/linux/poison.h |
13 | include/linux/magic.h | 14 | include/linux/magic.h |
14 | include/linux/hw_breakpoint.h | 15 | include/linux/hw_breakpoint.h |
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 8a4b9bccf8b2..74fd7f89208a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -15,6 +15,16 @@ endif | |||
15 | 15 | ||
16 | # Define V to have a more verbose compile. | 16 | # Define V to have a more verbose compile. |
17 | # | 17 | # |
18 | # Define O to save output files in a separate directory. | ||
19 | # | ||
20 | # Define ARCH as name of target architecture if you want cross-builds. | ||
21 | # | ||
22 | # Define CROSS_COMPILE as prefix name of compiler if you want cross-builds. | ||
23 | # | ||
24 | # Define NO_LIBPERL to disable perl script extension. | ||
25 | # | ||
26 | # Define NO_LIBPYTHON to disable python script extension. | ||
27 | # | ||
18 | # Define PYTHON to point to the python binary if the default | 28 | # Define PYTHON to point to the python binary if the default |
19 | # `python' is not correct; for example: PYTHON=python2 | 29 | # `python' is not correct; for example: PYTHON=python2 |
20 | # | 30 | # |
@@ -32,6 +42,10 @@ endif | |||
32 | # Define NO_DWARF if you do not want debug-info analysis feature at all. | 42 | # Define NO_DWARF if you do not want debug-info analysis feature at all. |
33 | # | 43 | # |
34 | # Define WERROR=0 to disable treating any warnings as errors. | 44 | # Define WERROR=0 to disable treating any warnings as errors. |
45 | # | ||
46 | # Define NO_NEWT if you do not want TUI support. | ||
47 | # | ||
48 | # Define NO_DEMANGLE if you do not want C++ symbol demangling. | ||
35 | 49 | ||
36 | $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE | 50 | $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE |
37 | @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) | 51 | @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) |
@@ -61,7 +75,7 @@ ifeq ($(ARCH),x86_64) | |||
61 | ifeq (${IS_X86_64}, 1) | 75 | ifeq (${IS_X86_64}, 1) |
62 | RAW_ARCH := x86_64 | 76 | RAW_ARCH := x86_64 |
63 | ARCH_CFLAGS := -DARCH_X86_64 | 77 | ARCH_CFLAGS := -DARCH_X86_64 |
64 | ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S | 78 | ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S |
65 | endif | 79 | endif |
66 | endif | 80 | endif |
67 | 81 | ||
@@ -183,7 +197,10 @@ SCRIPT_SH += perf-archive.sh | |||
183 | grep-libs = $(filter -l%,$(1)) | 197 | grep-libs = $(filter -l%,$(1)) |
184 | strip-libs = $(filter-out -l%,$(1)) | 198 | strip-libs = $(filter-out -l%,$(1)) |
185 | 199 | ||
186 | $(OUTPUT)python/perf.so: $(PYRF_OBJS) | 200 | PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) |
201 | PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py | ||
202 | |||
203 | $(OUTPUT)python/perf.so: $(PYRF_OBJS) $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) | ||
187 | $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ | 204 | $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ |
188 | --quiet build_ext; \ | 205 | --quiet build_ext; \ |
189 | mkdir -p $(OUTPUT)python && \ | 206 | mkdir -p $(OUTPUT)python && \ |
@@ -258,6 +275,7 @@ LIB_H += util/callchain.h | |||
258 | LIB_H += util/build-id.h | 275 | LIB_H += util/build-id.h |
259 | LIB_H += util/debug.h | 276 | LIB_H += util/debug.h |
260 | LIB_H += util/debugfs.h | 277 | LIB_H += util/debugfs.h |
278 | LIB_H += util/sysfs.h | ||
261 | LIB_H += util/event.h | 279 | LIB_H += util/event.h |
262 | LIB_H += util/evsel.h | 280 | LIB_H += util/evsel.h |
263 | LIB_H += util/evlist.h | 281 | LIB_H += util/evlist.h |
@@ -304,6 +322,7 @@ LIB_OBJS += $(OUTPUT)util/build-id.o | |||
304 | LIB_OBJS += $(OUTPUT)util/config.o | 322 | LIB_OBJS += $(OUTPUT)util/config.o |
305 | LIB_OBJS += $(OUTPUT)util/ctype.o | 323 | LIB_OBJS += $(OUTPUT)util/ctype.o |
306 | LIB_OBJS += $(OUTPUT)util/debugfs.o | 324 | LIB_OBJS += $(OUTPUT)util/debugfs.o |
325 | LIB_OBJS += $(OUTPUT)util/sysfs.o | ||
307 | LIB_OBJS += $(OUTPUT)util/environment.o | 326 | LIB_OBJS += $(OUTPUT)util/environment.o |
308 | LIB_OBJS += $(OUTPUT)util/event.o | 327 | LIB_OBJS += $(OUTPUT)util/event.o |
309 | LIB_OBJS += $(OUTPUT)util/evlist.o | 328 | LIB_OBJS += $(OUTPUT)util/evlist.o |
@@ -361,8 +380,10 @@ BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o | |||
361 | BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o | 380 | BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o |
362 | ifeq ($(RAW_ARCH),x86_64) | 381 | ifeq ($(RAW_ARCH),x86_64) |
363 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o | 382 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o |
383 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o | ||
364 | endif | 384 | endif |
365 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o | 385 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o |
386 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o | ||
366 | 387 | ||
367 | BUILTIN_OBJS += $(OUTPUT)builtin-diff.o | 388 | BUILTIN_OBJS += $(OUTPUT)builtin-diff.o |
368 | BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o | 389 | BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o |
@@ -794,7 +815,6 @@ help: | |||
794 | @echo ' quick-install-html - install the html documentation quickly' | 815 | @echo ' quick-install-html - install the html documentation quickly' |
795 | @echo '' | 816 | @echo '' |
796 | @echo 'Perf maintainer targets:' | 817 | @echo 'Perf maintainer targets:' |
797 | @echo ' distclean - alias to clean' | ||
798 | @echo ' clean - clean all binary objects and build output' | 818 | @echo ' clean - clean all binary objects and build output' |
799 | 819 | ||
800 | doc: | 820 | doc: |
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index f7781c6267c0..a09bece6dad2 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h | |||
@@ -4,6 +4,7 @@ | |||
4 | extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); | 4 | extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); |
5 | extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); | 5 | extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); |
6 | extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); | 6 | extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); |
7 | extern int bench_mem_memset(int argc, const char **argv, const char *prefix); | ||
7 | 8 | ||
8 | #define BENCH_FORMAT_DEFAULT_STR "default" | 9 | #define BENCH_FORMAT_DEFAULT_STR "default" |
9 | #define BENCH_FORMAT_DEFAULT 0 | 10 | #define BENCH_FORMAT_DEFAULT 0 |
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h index d588b87696fc..d66ab799b35f 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h | |||
@@ -2,3 +2,11 @@ | |||
2 | MEMCPY_FN(__memcpy, | 2 | MEMCPY_FN(__memcpy, |
3 | "x86-64-unrolled", | 3 | "x86-64-unrolled", |
4 | "unrolled memcpy() in arch/x86/lib/memcpy_64.S") | 4 | "unrolled memcpy() in arch/x86/lib/memcpy_64.S") |
5 | |||
6 | MEMCPY_FN(memcpy_c, | ||
7 | "x86-64-movsq", | ||
8 | "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") | ||
9 | |||
10 | MEMCPY_FN(memcpy_c_e, | ||
11 | "x86-64-movsb", | ||
12 | "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") | ||
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index 185a96d66dd1..fcd9cf00600a 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S | |||
@@ -1,4 +1,8 @@ | |||
1 | 1 | #define memcpy MEMCPY /* don't hide glibc's memcpy() */ | |
2 | #define altinstr_replacement text | ||
3 | #define globl p2align 4; .globl | ||
4 | #define Lmemcpy_c globl memcpy_c; memcpy_c | ||
5 | #define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e | ||
2 | #include "../../../arch/x86/lib/memcpy_64.S" | 6 | #include "../../../arch/x86/lib/memcpy_64.S" |
3 | /* | 7 | /* |
4 | * We need to provide note.GNU-stack section, saying that we want | 8 | * We need to provide note.GNU-stack section, saying that we want |
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index db82021f4b91..71557225bf92 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c | |||
@@ -5,7 +5,6 @@ | |||
5 | * | 5 | * |
6 | * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> | 6 | * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> |
7 | */ | 7 | */ |
8 | #include <ctype.h> | ||
9 | 8 | ||
10 | #include "../perf.h" | 9 | #include "../perf.h" |
11 | #include "../util/util.h" | 10 | #include "../util/util.h" |
@@ -24,6 +23,7 @@ | |||
24 | 23 | ||
25 | static const char *length_str = "1MB"; | 24 | static const char *length_str = "1MB"; |
26 | static const char *routine = "default"; | 25 | static const char *routine = "default"; |
26 | static int iterations = 1; | ||
27 | static bool use_clock; | 27 | static bool use_clock; |
28 | static int clock_fd; | 28 | static int clock_fd; |
29 | static bool only_prefault; | 29 | static bool only_prefault; |
@@ -35,6 +35,8 @@ static const struct option options[] = { | |||
35 | "available unit: B, MB, GB (upper and lower)"), | 35 | "available unit: B, MB, GB (upper and lower)"), |
36 | OPT_STRING('r', "routine", &routine, "default", | 36 | OPT_STRING('r', "routine", &routine, "default", |
37 | "Specify routine to copy"), | 37 | "Specify routine to copy"), |
38 | OPT_INTEGER('i', "iterations", &iterations, | ||
39 | "repeat memcpy() invocation this number of times"), | ||
38 | OPT_BOOLEAN('c', "clock", &use_clock, | 40 | OPT_BOOLEAN('c', "clock", &use_clock, |
39 | "Use CPU clock for measuring"), | 41 | "Use CPU clock for measuring"), |
40 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, | 42 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, |
@@ -121,6 +123,7 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) | |||
121 | { | 123 | { |
122 | u64 clock_start = 0ULL, clock_end = 0ULL; | 124 | u64 clock_start = 0ULL, clock_end = 0ULL; |
123 | void *src = NULL, *dst = NULL; | 125 | void *src = NULL, *dst = NULL; |
126 | int i; | ||
124 | 127 | ||
125 | alloc_mem(&src, &dst, len); | 128 | alloc_mem(&src, &dst, len); |
126 | 129 | ||
@@ -128,7 +131,8 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) | |||
128 | fn(dst, src, len); | 131 | fn(dst, src, len); |
129 | 132 | ||
130 | clock_start = get_clock(); | 133 | clock_start = get_clock(); |
131 | fn(dst, src, len); | 134 | for (i = 0; i < iterations; ++i) |
135 | fn(dst, src, len); | ||
132 | clock_end = get_clock(); | 136 | clock_end = get_clock(); |
133 | 137 | ||
134 | free(src); | 138 | free(src); |
@@ -140,6 +144,7 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) | |||
140 | { | 144 | { |
141 | struct timeval tv_start, tv_end, tv_diff; | 145 | struct timeval tv_start, tv_end, tv_diff; |
142 | void *src = NULL, *dst = NULL; | 146 | void *src = NULL, *dst = NULL; |
147 | int i; | ||
143 | 148 | ||
144 | alloc_mem(&src, &dst, len); | 149 | alloc_mem(&src, &dst, len); |
145 | 150 | ||
@@ -147,7 +152,8 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) | |||
147 | fn(dst, src, len); | 152 | fn(dst, src, len); |
148 | 153 | ||
149 | BUG_ON(gettimeofday(&tv_start, NULL)); | 154 | BUG_ON(gettimeofday(&tv_start, NULL)); |
150 | fn(dst, src, len); | 155 | for (i = 0; i < iterations; ++i) |
156 | fn(dst, src, len); | ||
151 | BUG_ON(gettimeofday(&tv_end, NULL)); | 157 | BUG_ON(gettimeofday(&tv_end, NULL)); |
152 | 158 | ||
153 | timersub(&tv_end, &tv_start, &tv_diff); | 159 | timersub(&tv_end, &tv_start, &tv_diff); |
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h new file mode 100644 index 000000000000..a040fa77665b --- /dev/null +++ b/tools/perf/bench/mem-memset-arch.h | |||
@@ -0,0 +1,12 @@ | |||
1 | |||
2 | #ifdef ARCH_X86_64 | ||
3 | |||
4 | #define MEMSET_FN(fn, name, desc) \ | ||
5 | extern void *fn(void *, int, size_t); | ||
6 | |||
7 | #include "mem-memset-x86-64-asm-def.h" | ||
8 | |||
9 | #undef MEMSET_FN | ||
10 | |||
11 | #endif | ||
12 | |||
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h new file mode 100644 index 000000000000..a71dff97c1f5 --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h | |||
@@ -0,0 +1,12 @@ | |||
1 | |||
2 | MEMSET_FN(__memset, | ||
3 | "x86-64-unrolled", | ||
4 | "unrolled memset() in arch/x86/lib/memset_64.S") | ||
5 | |||
6 | MEMSET_FN(memset_c, | ||
7 | "x86-64-stosq", | ||
8 | "movsq-based memset() in arch/x86/lib/memset_64.S") | ||
9 | |||
10 | MEMSET_FN(memset_c_e, | ||
11 | "x86-64-stosb", | ||
12 | "movsb-based memset() in arch/x86/lib/memset_64.S") | ||
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S new file mode 100644 index 000000000000..9e5af89ed13a --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm.S | |||
@@ -0,0 +1,13 @@ | |||
1 | #define memset MEMSET /* don't hide glibc's memset() */ | ||
2 | #define altinstr_replacement text | ||
3 | #define globl p2align 4; .globl | ||
4 | #define Lmemset_c globl memset_c; memset_c | ||
5 | #define Lmemset_c_e globl memset_c_e; memset_c_e | ||
6 | #include "../../../arch/x86/lib/memset_64.S" | ||
7 | |||
8 | /* | ||
9 | * We need to provide note.GNU-stack section, saying that we want | ||
10 | * NOT executable stack. Otherwise the final linking will assume that | ||
11 | * the ELF stack should not be restricted at all and set it RWX. | ||
12 | */ | ||
13 | .section .note.GNU-stack,"",@progbits | ||
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c new file mode 100644 index 000000000000..e9079185bd72 --- /dev/null +++ b/tools/perf/bench/mem-memset.c | |||
@@ -0,0 +1,297 @@ | |||
1 | /* | ||
2 | * mem-memset.c | ||
3 | * | ||
4 | * memset: Simple memory set in various ways | ||
5 | * | ||
6 | * Trivial clone of mem-memcpy.c. | ||
7 | */ | ||
8 | |||
9 | #include "../perf.h" | ||
10 | #include "../util/util.h" | ||
11 | #include "../util/parse-options.h" | ||
12 | #include "../util/header.h" | ||
13 | #include "bench.h" | ||
14 | #include "mem-memset-arch.h" | ||
15 | |||
16 | #include <stdio.h> | ||
17 | #include <stdlib.h> | ||
18 | #include <string.h> | ||
19 | #include <sys/time.h> | ||
20 | #include <errno.h> | ||
21 | |||
22 | #define K 1024 | ||
23 | |||
24 | static const char *length_str = "1MB"; | ||
25 | static const char *routine = "default"; | ||
26 | static int iterations = 1; | ||
27 | static bool use_clock; | ||
28 | static int clock_fd; | ||
29 | static bool only_prefault; | ||
30 | static bool no_prefault; | ||
31 | |||
32 | static const struct option options[] = { | ||
33 | OPT_STRING('l', "length", &length_str, "1MB", | ||
34 | "Specify length of memory to copy. " | ||
35 | "available unit: B, MB, GB (upper and lower)"), | ||
36 | OPT_STRING('r', "routine", &routine, "default", | ||
37 | "Specify routine to copy"), | ||
38 | OPT_INTEGER('i', "iterations", &iterations, | ||
39 | "repeat memset() invocation this number of times"), | ||
40 | OPT_BOOLEAN('c', "clock", &use_clock, | ||
41 | "Use CPU clock for measuring"), | ||
42 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, | ||
43 | "Show only the result with page faults before memset()"), | ||
44 | OPT_BOOLEAN('n', "no-prefault", &no_prefault, | ||
45 | "Show only the result without page faults before memset()"), | ||
46 | OPT_END() | ||
47 | }; | ||
48 | |||
49 | typedef void *(*memset_t)(void *, int, size_t); | ||
50 | |||
51 | struct routine { | ||
52 | const char *name; | ||
53 | const char *desc; | ||
54 | memset_t fn; | ||
55 | }; | ||
56 | |||
57 | static const struct routine routines[] = { | ||
58 | { "default", | ||
59 | "Default memset() provided by glibc", | ||
60 | memset }, | ||
61 | #ifdef ARCH_X86_64 | ||
62 | |||
63 | #define MEMSET_FN(fn, name, desc) { name, desc, fn }, | ||
64 | #include "mem-memset-x86-64-asm-def.h" | ||
65 | #undef MEMSET_FN | ||
66 | |||
67 | #endif | ||
68 | |||
69 | { NULL, | ||
70 | NULL, | ||
71 | NULL } | ||
72 | }; | ||
73 | |||
74 | static const char * const bench_mem_memset_usage[] = { | ||
75 | "perf bench mem memset <options>", | ||
76 | NULL | ||
77 | }; | ||
78 | |||
79 | static struct perf_event_attr clock_attr = { | ||
80 | .type = PERF_TYPE_HARDWARE, | ||
81 | .config = PERF_COUNT_HW_CPU_CYCLES | ||
82 | }; | ||
83 | |||
84 | static void init_clock(void) | ||
85 | { | ||
86 | clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); | ||
87 | |||
88 | if (clock_fd < 0 && errno == ENOSYS) | ||
89 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | ||
90 | else | ||
91 | BUG_ON(clock_fd < 0); | ||
92 | } | ||
93 | |||
94 | static u64 get_clock(void) | ||
95 | { | ||
96 | int ret; | ||
97 | u64 clk; | ||
98 | |||
99 | ret = read(clock_fd, &clk, sizeof(u64)); | ||
100 | BUG_ON(ret != sizeof(u64)); | ||
101 | |||
102 | return clk; | ||
103 | } | ||
104 | |||
105 | static double timeval2double(struct timeval *ts) | ||
106 | { | ||
107 | return (double)ts->tv_sec + | ||
108 | (double)ts->tv_usec / (double)1000000; | ||
109 | } | ||
110 | |||
111 | static void alloc_mem(void **dst, size_t length) | ||
112 | { | ||
113 | *dst = zalloc(length); | ||
114 | if (!dst) | ||
115 | die("memory allocation failed - maybe length is too large?\n"); | ||
116 | } | ||
117 | |||
118 | static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) | ||
119 | { | ||
120 | u64 clock_start = 0ULL, clock_end = 0ULL; | ||
121 | void *dst = NULL; | ||
122 | int i; | ||
123 | |||
124 | alloc_mem(&dst, len); | ||
125 | |||
126 | if (prefault) | ||
127 | fn(dst, -1, len); | ||
128 | |||
129 | clock_start = get_clock(); | ||
130 | for (i = 0; i < iterations; ++i) | ||
131 | fn(dst, i, len); | ||
132 | clock_end = get_clock(); | ||
133 | |||
134 | free(dst); | ||
135 | return clock_end - clock_start; | ||
136 | } | ||
137 | |||
138 | static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) | ||
139 | { | ||
140 | struct timeval tv_start, tv_end, tv_diff; | ||
141 | void *dst = NULL; | ||
142 | int i; | ||
143 | |||
144 | alloc_mem(&dst, len); | ||
145 | |||
146 | if (prefault) | ||
147 | fn(dst, -1, len); | ||
148 | |||
149 | BUG_ON(gettimeofday(&tv_start, NULL)); | ||
150 | for (i = 0; i < iterations; ++i) | ||
151 | fn(dst, i, len); | ||
152 | BUG_ON(gettimeofday(&tv_end, NULL)); | ||
153 | |||
154 | timersub(&tv_end, &tv_start, &tv_diff); | ||
155 | |||
156 | free(dst); | ||
157 | return (double)((double)len / timeval2double(&tv_diff)); | ||
158 | } | ||
159 | |||
160 | #define pf (no_prefault ? 0 : 1) | ||
161 | |||
162 | #define print_bps(x) do { \ | ||
163 | if (x < K) \ | ||
164 | printf(" %14lf B/Sec", x); \ | ||
165 | else if (x < K * K) \ | ||
166 | printf(" %14lfd KB/Sec", x / K); \ | ||
167 | else if (x < K * K * K) \ | ||
168 | printf(" %14lf MB/Sec", x / K / K); \ | ||
169 | else \ | ||
170 | printf(" %14lf GB/Sec", x / K / K / K); \ | ||
171 | } while (0) | ||
172 | |||
173 | int bench_mem_memset(int argc, const char **argv, | ||
174 | const char *prefix __used) | ||
175 | { | ||
176 | int i; | ||
177 | size_t len; | ||
178 | double result_bps[2]; | ||
179 | u64 result_clock[2]; | ||
180 | |||
181 | argc = parse_options(argc, argv, options, | ||
182 | bench_mem_memset_usage, 0); | ||
183 | |||
184 | if (use_clock) | ||
185 | init_clock(); | ||
186 | |||
187 | len = (size_t)perf_atoll((char *)length_str); | ||
188 | |||
189 | result_clock[0] = result_clock[1] = 0ULL; | ||
190 | result_bps[0] = result_bps[1] = 0.0; | ||
191 | |||
192 | if ((s64)len <= 0) { | ||
193 | fprintf(stderr, "Invalid length:%s\n", length_str); | ||
194 | return 1; | ||
195 | } | ||
196 | |||
197 | /* same to without specifying either of prefault and no-prefault */ | ||
198 | if (only_prefault && no_prefault) | ||
199 | only_prefault = no_prefault = false; | ||
200 | |||
201 | for (i = 0; routines[i].name; i++) { | ||
202 | if (!strcmp(routines[i].name, routine)) | ||
203 | break; | ||
204 | } | ||
205 | if (!routines[i].name) { | ||
206 | printf("Unknown routine:%s\n", routine); | ||
207 | printf("Available routines...\n"); | ||
208 | for (i = 0; routines[i].name; i++) { | ||
209 | printf("\t%s ... %s\n", | ||
210 | routines[i].name, routines[i].desc); | ||
211 | } | ||
212 | return 1; | ||
213 | } | ||
214 | |||
215 | if (bench_format == BENCH_FORMAT_DEFAULT) | ||
216 | printf("# Copying %s Bytes ...\n\n", length_str); | ||
217 | |||
218 | if (!only_prefault && !no_prefault) { | ||
219 | /* show both of results */ | ||
220 | if (use_clock) { | ||
221 | result_clock[0] = | ||
222 | do_memset_clock(routines[i].fn, len, false); | ||
223 | result_clock[1] = | ||
224 | do_memset_clock(routines[i].fn, len, true); | ||
225 | } else { | ||
226 | result_bps[0] = | ||
227 | do_memset_gettimeofday(routines[i].fn, | ||
228 | len, false); | ||
229 | result_bps[1] = | ||
230 | do_memset_gettimeofday(routines[i].fn, | ||
231 | len, true); | ||
232 | } | ||
233 | } else { | ||
234 | if (use_clock) { | ||
235 | result_clock[pf] = | ||
236 | do_memset_clock(routines[i].fn, | ||
237 | len, only_prefault); | ||
238 | } else { | ||
239 | result_bps[pf] = | ||
240 | do_memset_gettimeofday(routines[i].fn, | ||
241 | len, only_prefault); | ||
242 | } | ||
243 | } | ||
244 | |||
245 | switch (bench_format) { | ||
246 | case BENCH_FORMAT_DEFAULT: | ||
247 | if (!only_prefault && !no_prefault) { | ||
248 | if (use_clock) { | ||
249 | printf(" %14lf Clock/Byte\n", | ||
250 | (double)result_clock[0] | ||
251 | / (double)len); | ||
252 | printf(" %14lf Clock/Byte (with prefault)\n ", | ||
253 | (double)result_clock[1] | ||
254 | / (double)len); | ||
255 | } else { | ||
256 | print_bps(result_bps[0]); | ||
257 | printf("\n"); | ||
258 | print_bps(result_bps[1]); | ||
259 | printf(" (with prefault)\n"); | ||
260 | } | ||
261 | } else { | ||
262 | if (use_clock) { | ||
263 | printf(" %14lf Clock/Byte", | ||
264 | (double)result_clock[pf] | ||
265 | / (double)len); | ||
266 | } else | ||
267 | print_bps(result_bps[pf]); | ||
268 | |||
269 | printf("%s\n", only_prefault ? " (with prefault)" : ""); | ||
270 | } | ||
271 | break; | ||
272 | case BENCH_FORMAT_SIMPLE: | ||
273 | if (!only_prefault && !no_prefault) { | ||
274 | if (use_clock) { | ||
275 | printf("%lf %lf\n", | ||
276 | (double)result_clock[0] / (double)len, | ||
277 | (double)result_clock[1] / (double)len); | ||
278 | } else { | ||
279 | printf("%lf %lf\n", | ||
280 | result_bps[0], result_bps[1]); | ||
281 | } | ||
282 | } else { | ||
283 | if (use_clock) { | ||
284 | printf("%lf\n", (double)result_clock[pf] | ||
285 | / (double)len); | ||
286 | } else | ||
287 | printf("%lf\n", result_bps[pf]); | ||
288 | } | ||
289 | break; | ||
290 | default: | ||
291 | /* reaching this means there's some disaster: */ | ||
292 | die("unknown format: %d\n", bench_format); | ||
293 | break; | ||
294 | } | ||
295 | |||
296 | return 0; | ||
297 | } | ||
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index fcb96269852a..b0e74ab2d7a2 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c | |||
@@ -52,6 +52,9 @@ static struct bench_suite mem_suites[] = { | |||
52 | { "memcpy", | 52 | { "memcpy", |
53 | "Simple memory copy in various ways", | 53 | "Simple memory copy in various ways", |
54 | bench_mem_memcpy }, | 54 | bench_mem_memcpy }, |
55 | { "memset", | ||
56 | "Simple memory set in various ways", | ||
57 | bench_mem_memset }, | ||
55 | suite_all, | 58 | suite_all, |
56 | { NULL, | 59 | { NULL, |
57 | NULL, | 60 | NULL, |
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 2296c391d0f5..12c814838993 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c | |||
@@ -922,12 +922,12 @@ static const struct option info_options[] = { | |||
922 | OPT_BOOLEAN('t', "threads", &info_threads, | 922 | OPT_BOOLEAN('t', "threads", &info_threads, |
923 | "dump thread list in perf.data"), | 923 | "dump thread list in perf.data"), |
924 | OPT_BOOLEAN('m', "map", &info_map, | 924 | OPT_BOOLEAN('m', "map", &info_map, |
925 | "map of lock instances (name:address table)"), | 925 | "map of lock instances (address:name table)"), |
926 | OPT_END() | 926 | OPT_END() |
927 | }; | 927 | }; |
928 | 928 | ||
929 | static const char * const lock_usage[] = { | 929 | static const char * const lock_usage[] = { |
930 | "perf lock [<options>] {record|trace|report}", | 930 | "perf lock [<options>] {record|report|script|info}", |
931 | NULL | 931 | NULL |
932 | }; | 932 | }; |
933 | 933 | ||
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index fb8566181f27..4935c09dd5b5 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c | |||
@@ -58,7 +58,7 @@ static struct { | |||
58 | struct perf_probe_event events[MAX_PROBES]; | 58 | struct perf_probe_event events[MAX_PROBES]; |
59 | struct strlist *dellist; | 59 | struct strlist *dellist; |
60 | struct line_range line_range; | 60 | struct line_range line_range; |
61 | const char *target_module; | 61 | const char *target; |
62 | int max_probe_points; | 62 | int max_probe_points; |
63 | struct strfilter *filter; | 63 | struct strfilter *filter; |
64 | } params; | 64 | } params; |
@@ -246,7 +246,7 @@ static const struct option options[] = { | |||
246 | "file", "vmlinux pathname"), | 246 | "file", "vmlinux pathname"), |
247 | OPT_STRING('s', "source", &symbol_conf.source_prefix, | 247 | OPT_STRING('s', "source", &symbol_conf.source_prefix, |
248 | "directory", "path to kernel source"), | 248 | "directory", "path to kernel source"), |
249 | OPT_STRING('m', "module", ¶ms.target_module, | 249 | OPT_STRING('m', "module", ¶ms.target, |
250 | "modname|path", | 250 | "modname|path", |
251 | "target module name (for online) or path (for offline)"), | 251 | "target module name (for online) or path (for offline)"), |
252 | #endif | 252 | #endif |
@@ -333,7 +333,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) | |||
333 | if (!params.filter) | 333 | if (!params.filter) |
334 | params.filter = strfilter__new(DEFAULT_FUNC_FILTER, | 334 | params.filter = strfilter__new(DEFAULT_FUNC_FILTER, |
335 | NULL); | 335 | NULL); |
336 | ret = show_available_funcs(params.target_module, | 336 | ret = show_available_funcs(params.target, |
337 | params.filter); | 337 | params.filter); |
338 | strfilter__delete(params.filter); | 338 | strfilter__delete(params.filter); |
339 | if (ret < 0) | 339 | if (ret < 0) |
@@ -354,7 +354,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) | |||
354 | usage_with_options(probe_usage, options); | 354 | usage_with_options(probe_usage, options); |
355 | } | 355 | } |
356 | 356 | ||
357 | ret = show_line_range(¶ms.line_range, params.target_module); | 357 | ret = show_line_range(¶ms.line_range, params.target); |
358 | if (ret < 0) | 358 | if (ret < 0) |
359 | pr_err(" Error: Failed to show lines. (%d)\n", ret); | 359 | pr_err(" Error: Failed to show lines. (%d)\n", ret); |
360 | return ret; | 360 | return ret; |
@@ -371,7 +371,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) | |||
371 | 371 | ||
372 | ret = show_available_vars(params.events, params.nevents, | 372 | ret = show_available_vars(params.events, params.nevents, |
373 | params.max_probe_points, | 373 | params.max_probe_points, |
374 | params.target_module, | 374 | params.target, |
375 | params.filter, | 375 | params.filter, |
376 | params.show_ext_vars); | 376 | params.show_ext_vars); |
377 | strfilter__delete(params.filter); | 377 | strfilter__delete(params.filter); |
@@ -393,7 +393,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) | |||
393 | if (params.nevents) { | 393 | if (params.nevents) { |
394 | ret = add_perf_probe_events(params.events, params.nevents, | 394 | ret = add_perf_probe_events(params.events, params.nevents, |
395 | params.max_probe_points, | 395 | params.max_probe_points, |
396 | params.target_module, | 396 | params.target, |
397 | params.force_add); | 397 | params.force_add); |
398 | if (ret < 0) { | 398 | if (ret < 0) { |
399 | pr_err(" Error: Failed to add events. (%d)\n", ret); | 399 | pr_err(" Error: Failed to add events. (%d)\n", ret); |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 227b6ae99785..be4e1eee782e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -44,6 +44,7 @@ struct perf_record { | |||
44 | struct perf_evlist *evlist; | 44 | struct perf_evlist *evlist; |
45 | struct perf_session *session; | 45 | struct perf_session *session; |
46 | const char *progname; | 46 | const char *progname; |
47 | const char *uid_str; | ||
47 | int output; | 48 | int output; |
48 | unsigned int page_size; | 49 | unsigned int page_size; |
49 | int realtime_prio; | 50 | int realtime_prio; |
@@ -208,7 +209,7 @@ fallback_missing_features: | |||
208 | if (opts->exclude_guest_missing) | 209 | if (opts->exclude_guest_missing) |
209 | attr->exclude_guest = attr->exclude_host = 0; | 210 | attr->exclude_guest = attr->exclude_host = 0; |
210 | retry_sample_id: | 211 | retry_sample_id: |
211 | attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; | 212 | attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; |
212 | try_again: | 213 | try_again: |
213 | if (perf_evsel__open(pos, evlist->cpus, evlist->threads, | 214 | if (perf_evsel__open(pos, evlist->cpus, evlist->threads, |
214 | opts->group, group_fd) < 0) { | 215 | opts->group, group_fd) < 0) { |
@@ -227,11 +228,11 @@ try_again: | |||
227 | "guest or host samples.\n"); | 228 | "guest or host samples.\n"); |
228 | opts->exclude_guest_missing = true; | 229 | opts->exclude_guest_missing = true; |
229 | goto fallback_missing_features; | 230 | goto fallback_missing_features; |
230 | } else if (opts->sample_id_all_avail) { | 231 | } else if (!opts->sample_id_all_missing) { |
231 | /* | 232 | /* |
232 | * Old kernel, no attr->sample_id_type_all field | 233 | * Old kernel, no attr->sample_id_type_all field |
233 | */ | 234 | */ |
234 | opts->sample_id_all_avail = false; | 235 | opts->sample_id_all_missing = true; |
235 | if (!opts->sample_time && !opts->raw_samples && !time_needed) | 236 | if (!opts->sample_time && !opts->raw_samples && !time_needed) |
236 | attr->sample_type &= ~PERF_SAMPLE_TIME; | 237 | attr->sample_type &= ~PERF_SAMPLE_TIME; |
237 | 238 | ||
@@ -396,7 +397,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) | |||
396 | { | 397 | { |
397 | struct stat st; | 398 | struct stat st; |
398 | int flags; | 399 | int flags; |
399 | int err, output; | 400 | int err, output, feat; |
400 | unsigned long waking = 0; | 401 | unsigned long waking = 0; |
401 | const bool forks = argc > 0; | 402 | const bool forks = argc > 0; |
402 | struct machine *machine; | 403 | struct machine *machine; |
@@ -463,8 +464,17 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) | |||
463 | 464 | ||
464 | rec->session = session; | 465 | rec->session = session; |
465 | 466 | ||
466 | if (!rec->no_buildid) | 467 | for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) |
467 | perf_header__set_feat(&session->header, HEADER_BUILD_ID); | 468 | perf_header__set_feat(&session->header, feat); |
469 | |||
470 | if (rec->no_buildid) | ||
471 | perf_header__clear_feat(&session->header, HEADER_BUILD_ID); | ||
472 | |||
473 | if (!have_tracepoints(&evsel_list->entries)) | ||
474 | perf_header__clear_feat(&session->header, HEADER_TRACE_INFO); | ||
475 | |||
476 | if (!rec->opts.branch_stack) | ||
477 | perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); | ||
468 | 478 | ||
469 | if (!rec->file_new) { | 479 | if (!rec->file_new) { |
470 | err = perf_session__read_header(session, output); | 480 | err = perf_session__read_header(session, output); |
@@ -472,22 +482,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) | |||
472 | goto out_delete_session; | 482 | goto out_delete_session; |
473 | } | 483 | } |
474 | 484 | ||
475 | if (have_tracepoints(&evsel_list->entries)) | ||
476 | perf_header__set_feat(&session->header, HEADER_TRACE_INFO); | ||
477 | |||
478 | perf_header__set_feat(&session->header, HEADER_HOSTNAME); | ||
479 | perf_header__set_feat(&session->header, HEADER_OSRELEASE); | ||
480 | perf_header__set_feat(&session->header, HEADER_ARCH); | ||
481 | perf_header__set_feat(&session->header, HEADER_CPUDESC); | ||
482 | perf_header__set_feat(&session->header, HEADER_NRCPUS); | ||
483 | perf_header__set_feat(&session->header, HEADER_EVENT_DESC); | ||
484 | perf_header__set_feat(&session->header, HEADER_CMDLINE); | ||
485 | perf_header__set_feat(&session->header, HEADER_VERSION); | ||
486 | perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); | ||
487 | perf_header__set_feat(&session->header, HEADER_TOTAL_MEM); | ||
488 | perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY); | ||
489 | perf_header__set_feat(&session->header, HEADER_CPUID); | ||
490 | |||
491 | if (forks) { | 485 | if (forks) { |
492 | err = perf_evlist__prepare_workload(evsel_list, opts, argv); | 486 | err = perf_evlist__prepare_workload(evsel_list, opts, argv); |
493 | if (err < 0) { | 487 | if (err < 0) { |
@@ -647,6 +641,90 @@ out_delete_session: | |||
647 | return err; | 641 | return err; |
648 | } | 642 | } |
649 | 643 | ||
644 | #define BRANCH_OPT(n, m) \ | ||
645 | { .name = n, .mode = (m) } | ||
646 | |||
647 | #define BRANCH_END { .name = NULL } | ||
648 | |||
649 | struct branch_mode { | ||
650 | const char *name; | ||
651 | int mode; | ||
652 | }; | ||
653 | |||
654 | static const struct branch_mode branch_modes[] = { | ||
655 | BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER), | ||
656 | BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL), | ||
657 | BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV), | ||
658 | BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY), | ||
659 | BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), | ||
660 | BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), | ||
661 | BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), | ||
662 | BRANCH_END | ||
663 | }; | ||
664 | |||
665 | static int | ||
666 | parse_branch_stack(const struct option *opt, const char *str, int unset) | ||
667 | { | ||
668 | #define ONLY_PLM \ | ||
669 | (PERF_SAMPLE_BRANCH_USER |\ | ||
670 | PERF_SAMPLE_BRANCH_KERNEL |\ | ||
671 | PERF_SAMPLE_BRANCH_HV) | ||
672 | |||
673 | uint64_t *mode = (uint64_t *)opt->value; | ||
674 | const struct branch_mode *br; | ||
675 | char *s, *os = NULL, *p; | ||
676 | int ret = -1; | ||
677 | |||
678 | if (unset) | ||
679 | return 0; | ||
680 | |||
681 | /* | ||
682 | * cannot set it twice, -b + --branch-filter for instance | ||
683 | */ | ||
684 | if (*mode) | ||
685 | return -1; | ||
686 | |||
687 | /* str may be NULL in case no arg is passed to -b */ | ||
688 | if (str) { | ||
689 | /* because str is read-only */ | ||
690 | s = os = strdup(str); | ||
691 | if (!s) | ||
692 | return -1; | ||
693 | |||
694 | for (;;) { | ||
695 | p = strchr(s, ','); | ||
696 | if (p) | ||
697 | *p = '\0'; | ||
698 | |||
699 | for (br = branch_modes; br->name; br++) { | ||
700 | if (!strcasecmp(s, br->name)) | ||
701 | break; | ||
702 | } | ||
703 | if (!br->name) { | ||
704 | ui__warning("unknown branch filter %s," | ||
705 | " check man page\n", s); | ||
706 | goto error; | ||
707 | } | ||
708 | |||
709 | *mode |= br->mode; | ||
710 | |||
711 | if (!p) | ||
712 | break; | ||
713 | |||
714 | s = p + 1; | ||
715 | } | ||
716 | } | ||
717 | ret = 0; | ||
718 | |||
719 | /* default to any branch */ | ||
720 | if ((*mode & ~ONLY_PLM) == 0) { | ||
721 | *mode = PERF_SAMPLE_BRANCH_ANY; | ||
722 | } | ||
723 | error: | ||
724 | free(os); | ||
725 | return ret; | ||
726 | } | ||
727 | |||
650 | static const char * const record_usage[] = { | 728 | static const char * const record_usage[] = { |
651 | "perf record [<options>] [<command>]", | 729 | "perf record [<options>] [<command>]", |
652 | "perf record [<options>] -- <command> [<options>]", | 730 | "perf record [<options>] -- <command> [<options>]", |
@@ -665,13 +743,10 @@ static const char * const record_usage[] = { | |||
665 | */ | 743 | */ |
666 | static struct perf_record record = { | 744 | static struct perf_record record = { |
667 | .opts = { | 745 | .opts = { |
668 | .target_pid = -1, | ||
669 | .target_tid = -1, | ||
670 | .mmap_pages = UINT_MAX, | 746 | .mmap_pages = UINT_MAX, |
671 | .user_freq = UINT_MAX, | 747 | .user_freq = UINT_MAX, |
672 | .user_interval = ULLONG_MAX, | 748 | .user_interval = ULLONG_MAX, |
673 | .freq = 1000, | 749 | .freq = 1000, |
674 | .sample_id_all_avail = true, | ||
675 | }, | 750 | }, |
676 | .write_mode = WRITE_FORCE, | 751 | .write_mode = WRITE_FORCE, |
677 | .file_new = true, | 752 | .file_new = true, |
@@ -690,9 +765,9 @@ const struct option record_options[] = { | |||
690 | parse_events_option), | 765 | parse_events_option), |
691 | OPT_CALLBACK(0, "filter", &record.evlist, "filter", | 766 | OPT_CALLBACK(0, "filter", &record.evlist, "filter", |
692 | "event filter", parse_filter), | 767 | "event filter", parse_filter), |
693 | OPT_INTEGER('p', "pid", &record.opts.target_pid, | 768 | OPT_STRING('p', "pid", &record.opts.target_pid, "pid", |
694 | "record events on existing process id"), | 769 | "record events on existing process id"), |
695 | OPT_INTEGER('t', "tid", &record.opts.target_tid, | 770 | OPT_STRING('t', "tid", &record.opts.target_tid, "tid", |
696 | "record events on existing thread id"), | 771 | "record events on existing thread id"), |
697 | OPT_INTEGER('r', "realtime", &record.realtime_prio, | 772 | OPT_INTEGER('r', "realtime", &record.realtime_prio, |
698 | "collect data with this RT SCHED_FIFO priority"), | 773 | "collect data with this RT SCHED_FIFO priority"), |
@@ -738,6 +813,15 @@ const struct option record_options[] = { | |||
738 | OPT_CALLBACK('G', "cgroup", &record.evlist, "name", | 813 | OPT_CALLBACK('G', "cgroup", &record.evlist, "name", |
739 | "monitor event in cgroup name only", | 814 | "monitor event in cgroup name only", |
740 | parse_cgroups), | 815 | parse_cgroups), |
816 | OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), | ||
817 | |||
818 | OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, | ||
819 | "branch any", "sample any taken branches", | ||
820 | parse_branch_stack), | ||
821 | |||
822 | OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, | ||
823 | "branch filter mask", "branch stack filter modes", | ||
824 | parse_branch_stack), | ||
741 | OPT_END() | 825 | OPT_END() |
742 | }; | 826 | }; |
743 | 827 | ||
@@ -758,8 +842,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) | |||
758 | 842 | ||
759 | argc = parse_options(argc, argv, record_options, record_usage, | 843 | argc = parse_options(argc, argv, record_options, record_usage, |
760 | PARSE_OPT_STOP_AT_NON_OPTION); | 844 | PARSE_OPT_STOP_AT_NON_OPTION); |
761 | if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 && | 845 | if (!argc && !rec->opts.target_pid && !rec->opts.target_tid && |
762 | !rec->opts.system_wide && !rec->opts.cpu_list) | 846 | !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str) |
763 | usage_with_options(record_usage, record_options); | 847 | usage_with_options(record_usage, record_options); |
764 | 848 | ||
765 | if (rec->force && rec->append_file) { | 849 | if (rec->force && rec->append_file) { |
@@ -799,11 +883,17 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) | |||
799 | goto out_symbol_exit; | 883 | goto out_symbol_exit; |
800 | } | 884 | } |
801 | 885 | ||
802 | if (rec->opts.target_pid != -1) | 886 | rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid, |
887 | rec->opts.target_pid); | ||
888 | if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1) | ||
889 | goto out_free_fd; | ||
890 | |||
891 | if (rec->opts.target_pid) | ||
803 | rec->opts.target_tid = rec->opts.target_pid; | 892 | rec->opts.target_tid = rec->opts.target_pid; |
804 | 893 | ||
805 | if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid, | 894 | if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid, |
806 | rec->opts.target_tid, rec->opts.cpu_list) < 0) | 895 | rec->opts.target_tid, rec->opts.uid, |
896 | rec->opts.cpu_list) < 0) | ||
807 | usage_with_options(record_usage, record_options); | 897 | usage_with_options(record_usage, record_options); |
808 | 898 | ||
809 | list_for_each_entry(pos, &evsel_list->entries, node) { | 899 | list_for_each_entry(pos, &evsel_list->entries, node) { |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 25d34d483e49..8e91c6eba18a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -53,6 +53,82 @@ struct perf_report { | |||
53 | DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); | 53 | DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); |
54 | }; | 54 | }; |
55 | 55 | ||
56 | static int perf_report__add_branch_hist_entry(struct perf_tool *tool, | ||
57 | struct addr_location *al, | ||
58 | struct perf_sample *sample, | ||
59 | struct perf_evsel *evsel, | ||
60 | struct machine *machine) | ||
61 | { | ||
62 | struct perf_report *rep = container_of(tool, struct perf_report, tool); | ||
63 | struct symbol *parent = NULL; | ||
64 | int err = 0; | ||
65 | unsigned i; | ||
66 | struct hist_entry *he; | ||
67 | struct branch_info *bi, *bx; | ||
68 | |||
69 | if ((sort__has_parent || symbol_conf.use_callchain) | ||
70 | && sample->callchain) { | ||
71 | err = machine__resolve_callchain(machine, evsel, al->thread, | ||
72 | sample->callchain, &parent); | ||
73 | if (err) | ||
74 | return err; | ||
75 | } | ||
76 | |||
77 | bi = machine__resolve_bstack(machine, al->thread, | ||
78 | sample->branch_stack); | ||
79 | if (!bi) | ||
80 | return -ENOMEM; | ||
81 | |||
82 | for (i = 0; i < sample->branch_stack->nr; i++) { | ||
83 | if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) | ||
84 | continue; | ||
85 | /* | ||
86 | * The report shows the percentage of total branches captured | ||
87 | * and not events sampled. Thus we use a pseudo period of 1. | ||
88 | */ | ||
89 | he = __hists__add_branch_entry(&evsel->hists, al, parent, | ||
90 | &bi[i], 1); | ||
91 | if (he) { | ||
92 | struct annotation *notes; | ||
93 | err = -ENOMEM; | ||
94 | bx = he->branch_info; | ||
95 | if (bx->from.sym && use_browser > 0) { | ||
96 | notes = symbol__annotation(bx->from.sym); | ||
97 | if (!notes->src | ||
98 | && symbol__alloc_hist(bx->from.sym) < 0) | ||
99 | goto out; | ||
100 | |||
101 | err = symbol__inc_addr_samples(bx->from.sym, | ||
102 | bx->from.map, | ||
103 | evsel->idx, | ||
104 | bx->from.al_addr); | ||
105 | if (err) | ||
106 | goto out; | ||
107 | } | ||
108 | |||
109 | if (bx->to.sym && use_browser > 0) { | ||
110 | notes = symbol__annotation(bx->to.sym); | ||
111 | if (!notes->src | ||
112 | && symbol__alloc_hist(bx->to.sym) < 0) | ||
113 | goto out; | ||
114 | |||
115 | err = symbol__inc_addr_samples(bx->to.sym, | ||
116 | bx->to.map, | ||
117 | evsel->idx, | ||
118 | bx->to.al_addr); | ||
119 | if (err) | ||
120 | goto out; | ||
121 | } | ||
122 | evsel->hists.stats.total_period += 1; | ||
123 | hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); | ||
124 | err = 0; | ||
125 | } else | ||
126 | return -ENOMEM; | ||
127 | } | ||
128 | out: | ||
129 | return err; | ||
130 | } | ||
131 | |||
56 | static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, | 132 | static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, |
57 | struct addr_location *al, | 133 | struct addr_location *al, |
58 | struct perf_sample *sample, | 134 | struct perf_sample *sample, |
@@ -126,14 +202,21 @@ static int process_sample_event(struct perf_tool *tool, | |||
126 | if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) | 202 | if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) |
127 | return 0; | 203 | return 0; |
128 | 204 | ||
129 | if (al.map != NULL) | 205 | if (sort__branch_mode == 1) { |
130 | al.map->dso->hit = 1; | 206 | if (perf_report__add_branch_hist_entry(tool, &al, sample, |
207 | evsel, machine)) { | ||
208 | pr_debug("problem adding lbr entry, skipping event\n"); | ||
209 | return -1; | ||
210 | } | ||
211 | } else { | ||
212 | if (al.map != NULL) | ||
213 | al.map->dso->hit = 1; | ||
131 | 214 | ||
132 | if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { | 215 | if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { |
133 | pr_debug("problem incrementing symbol period, skipping event\n"); | 216 | pr_debug("problem incrementing symbol period, skipping event\n"); |
134 | return -1; | 217 | return -1; |
218 | } | ||
135 | } | 219 | } |
136 | |||
137 | return 0; | 220 | return 0; |
138 | } | 221 | } |
139 | 222 | ||
@@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep) | |||
188 | } | 271 | } |
189 | } | 272 | } |
190 | 273 | ||
274 | if (sort__branch_mode == 1) { | ||
275 | if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) { | ||
276 | fprintf(stderr, "selected -b but no branch data." | ||
277 | " Did you call perf record without" | ||
278 | " -b?\n"); | ||
279 | return -1; | ||
280 | } | ||
281 | } | ||
282 | |||
191 | return 0; | 283 | return 0; |
192 | } | 284 | } |
193 | 285 | ||
@@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep) | |||
246 | { | 338 | { |
247 | int ret = -EINVAL; | 339 | int ret = -EINVAL; |
248 | u64 nr_samples; | 340 | u64 nr_samples; |
249 | struct perf_session *session; | 341 | struct perf_session *session = rep->session; |
250 | struct perf_evsel *pos; | 342 | struct perf_evsel *pos; |
251 | struct map *kernel_map; | 343 | struct map *kernel_map; |
252 | struct kmap *kernel_kmap; | 344 | struct kmap *kernel_kmap; |
@@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep) | |||
254 | 346 | ||
255 | signal(SIGINT, sig_handler); | 347 | signal(SIGINT, sig_handler); |
256 | 348 | ||
257 | session = perf_session__new(rep->input_name, O_RDONLY, | ||
258 | rep->force, false, &rep->tool); | ||
259 | if (session == NULL) | ||
260 | return -ENOMEM; | ||
261 | |||
262 | rep->session = session; | ||
263 | |||
264 | if (rep->cpu_list) { | 349 | if (rep->cpu_list) { |
265 | ret = perf_session__cpu_bitmap(session, rep->cpu_list, | 350 | ret = perf_session__cpu_bitmap(session, rep->cpu_list, |
266 | rep->cpu_bitmap); | 351 | rep->cpu_bitmap); |
@@ -427,9 +512,19 @@ setup: | |||
427 | return 0; | 512 | return 0; |
428 | } | 513 | } |
429 | 514 | ||
515 | static int | ||
516 | parse_branch_mode(const struct option *opt __used, const char *str __used, int unset) | ||
517 | { | ||
518 | sort__branch_mode = !unset; | ||
519 | return 0; | ||
520 | } | ||
521 | |||
430 | int cmd_report(int argc, const char **argv, const char *prefix __used) | 522 | int cmd_report(int argc, const char **argv, const char *prefix __used) |
431 | { | 523 | { |
524 | struct perf_session *session; | ||
432 | struct stat st; | 525 | struct stat st; |
526 | bool has_br_stack = false; | ||
527 | int ret = -1; | ||
433 | char callchain_default_opt[] = "fractal,0.5,callee"; | 528 | char callchain_default_opt[] = "fractal,0.5,callee"; |
434 | const char * const report_usage[] = { | 529 | const char * const report_usage[] = { |
435 | "perf report [<options>]", | 530 | "perf report [<options>]", |
@@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
477 | OPT_BOOLEAN(0, "stdio", &report.use_stdio, | 572 | OPT_BOOLEAN(0, "stdio", &report.use_stdio, |
478 | "Use the stdio interface"), | 573 | "Use the stdio interface"), |
479 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", | 574 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", |
480 | "sort by key(s): pid, comm, dso, symbol, parent"), | 575 | "sort by key(s): pid, comm, dso, symbol, parent, dso_to," |
576 | " dso_from, symbol_to, symbol_from, mispredict"), | ||
481 | OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, | 577 | OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, |
482 | "Show sample percentage for different cpu modes"), | 578 | "Show sample percentage for different cpu modes"), |
483 | OPT_STRING('p', "parent", &parent_pattern, "regex", | 579 | OPT_STRING('p', "parent", &parent_pattern, "regex", |
@@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
517 | "Specify disassembler style (e.g. -M intel for intel syntax)"), | 613 | "Specify disassembler style (e.g. -M intel for intel syntax)"), |
518 | OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, | 614 | OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, |
519 | "Show a column with the sum of periods"), | 615 | "Show a column with the sum of periods"), |
616 | OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", | ||
617 | "use branch records for histogram filling", parse_branch_mode), | ||
520 | OPT_END() | 618 | OPT_END() |
521 | }; | 619 | }; |
522 | 620 | ||
@@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
536 | else | 634 | else |
537 | report.input_name = "perf.data"; | 635 | report.input_name = "perf.data"; |
538 | } | 636 | } |
637 | session = perf_session__new(report.input_name, O_RDONLY, | ||
638 | report.force, false, &report.tool); | ||
639 | if (session == NULL) | ||
640 | return -ENOMEM; | ||
539 | 641 | ||
540 | if (strcmp(report.input_name, "-") != 0) | 642 | report.session = session; |
643 | |||
644 | has_br_stack = perf_header__has_feat(&session->header, | ||
645 | HEADER_BRANCH_STACK); | ||
646 | |||
647 | if (sort__branch_mode == -1 && has_br_stack) | ||
648 | sort__branch_mode = 1; | ||
649 | |||
650 | /* sort__branch_mode could be 0 if --no-branch-stack */ | ||
651 | if (sort__branch_mode == 1) { | ||
652 | /* | ||
653 | * if no sort_order is provided, then specify | ||
654 | * branch-mode specific order | ||
655 | */ | ||
656 | if (sort_order == default_sort_order) | ||
657 | sort_order = "comm,dso_from,symbol_from," | ||
658 | "dso_to,symbol_to"; | ||
659 | |||
660 | } | ||
661 | |||
662 | if (strcmp(report.input_name, "-") != 0) { | ||
541 | setup_browser(true); | 663 | setup_browser(true); |
542 | else | 664 | } else { |
543 | use_browser = 0; | 665 | use_browser = 0; |
666 | } | ||
544 | 667 | ||
545 | /* | 668 | /* |
546 | * Only in the newt browser we are doing integrated annotation, | 669 | * Only in the newt browser we are doing integrated annotation, |
@@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
568 | } | 691 | } |
569 | 692 | ||
570 | if (symbol__init() < 0) | 693 | if (symbol__init() < 0) |
571 | return -1; | 694 | goto error; |
572 | 695 | ||
573 | setup_sorting(report_usage, options); | 696 | setup_sorting(report_usage, options); |
574 | 697 | ||
575 | if (parent_pattern != default_parent_pattern) { | 698 | if (parent_pattern != default_parent_pattern) { |
576 | if (sort_dimension__add("parent") < 0) | 699 | if (sort_dimension__add("parent") < 0) |
577 | return -1; | 700 | goto error; |
578 | 701 | ||
579 | /* | 702 | /* |
580 | * Only show the parent fields if we explicitly | 703 | * Only show the parent fields if we explicitly |
@@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
592 | if (argc) | 715 | if (argc) |
593 | usage_with_options(report_usage, options); | 716 | usage_with_options(report_usage, options); |
594 | 717 | ||
595 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); | ||
596 | sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); | 718 | sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); |
597 | sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); | ||
598 | 719 | ||
599 | return __cmd_report(&report); | 720 | if (sort__branch_mode == 1) { |
721 | sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout); | ||
722 | sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout); | ||
723 | sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); | ||
724 | sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); | ||
725 | } else { | ||
726 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); | ||
727 | sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); | ||
728 | } | ||
729 | |||
730 | ret = __cmd_report(&report); | ||
731 | error: | ||
732 | perf_session__delete(session); | ||
733 | return ret; | ||
600 | } | 734 | } |
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index bb68ddf257b7..d4ce733b9eba 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c | |||
@@ -40,6 +40,7 @@ enum perf_output_field { | |||
40 | PERF_OUTPUT_SYM = 1U << 8, | 40 | PERF_OUTPUT_SYM = 1U << 8, |
41 | PERF_OUTPUT_DSO = 1U << 9, | 41 | PERF_OUTPUT_DSO = 1U << 9, |
42 | PERF_OUTPUT_ADDR = 1U << 10, | 42 | PERF_OUTPUT_ADDR = 1U << 10, |
43 | PERF_OUTPUT_SYMOFFSET = 1U << 11, | ||
43 | }; | 44 | }; |
44 | 45 | ||
45 | struct output_option { | 46 | struct output_option { |
@@ -57,6 +58,7 @@ struct output_option { | |||
57 | {.str = "sym", .field = PERF_OUTPUT_SYM}, | 58 | {.str = "sym", .field = PERF_OUTPUT_SYM}, |
58 | {.str = "dso", .field = PERF_OUTPUT_DSO}, | 59 | {.str = "dso", .field = PERF_OUTPUT_DSO}, |
59 | {.str = "addr", .field = PERF_OUTPUT_ADDR}, | 60 | {.str = "addr", .field = PERF_OUTPUT_ADDR}, |
61 | {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET}, | ||
60 | }; | 62 | }; |
61 | 63 | ||
62 | /* default set to maintain compatibility with current format */ | 64 | /* default set to maintain compatibility with current format */ |
@@ -193,6 +195,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, | |||
193 | "to symbols.\n"); | 195 | "to symbols.\n"); |
194 | return -EINVAL; | 196 | return -EINVAL; |
195 | } | 197 | } |
198 | if (PRINT_FIELD(SYMOFFSET) && !PRINT_FIELD(SYM)) { | ||
199 | pr_err("Display of offsets requested but symbol is not" | ||
200 | "selected.\n"); | ||
201 | return -EINVAL; | ||
202 | } | ||
196 | if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { | 203 | if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { |
197 | pr_err("Display of DSO requested but neither sample IP nor " | 204 | pr_err("Display of DSO requested but neither sample IP nor " |
198 | "sample address\nis selected. Hence, no addresses to convert " | 205 | "sample address\nis selected. Hence, no addresses to convert " |
@@ -300,10 +307,17 @@ static void print_sample_start(struct perf_sample *sample, | |||
300 | } else | 307 | } else |
301 | evname = __event_name(attr->type, attr->config); | 308 | evname = __event_name(attr->type, attr->config); |
302 | 309 | ||
303 | printf("%s: ", evname ? evname : "(unknown)"); | 310 | printf("%s: ", evname ? evname : "[unknown]"); |
304 | } | 311 | } |
305 | } | 312 | } |
306 | 313 | ||
314 | static bool is_bts_event(struct perf_event_attr *attr) | ||
315 | { | ||
316 | return ((attr->type == PERF_TYPE_HARDWARE) && | ||
317 | (attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | ||
318 | (attr->sample_period == 1)); | ||
319 | } | ||
320 | |||
307 | static bool sample_addr_correlates_sym(struct perf_event_attr *attr) | 321 | static bool sample_addr_correlates_sym(struct perf_event_attr *attr) |
308 | { | 322 | { |
309 | if ((attr->type == PERF_TYPE_SOFTWARE) && | 323 | if ((attr->type == PERF_TYPE_SOFTWARE) && |
@@ -312,6 +326,9 @@ static bool sample_addr_correlates_sym(struct perf_event_attr *attr) | |||
312 | (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))) | 326 | (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))) |
313 | return true; | 327 | return true; |
314 | 328 | ||
329 | if (is_bts_event(attr)) | ||
330 | return true; | ||
331 | |||
315 | return false; | 332 | return false; |
316 | } | 333 | } |
317 | 334 | ||
@@ -323,7 +340,6 @@ static void print_sample_addr(union perf_event *event, | |||
323 | { | 340 | { |
324 | struct addr_location al; | 341 | struct addr_location al; |
325 | u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | 342 | u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; |
326 | const char *symname, *dsoname; | ||
327 | 343 | ||
328 | printf("%16" PRIx64, sample->addr); | 344 | printf("%16" PRIx64, sample->addr); |
329 | 345 | ||
@@ -343,22 +359,46 @@ static void print_sample_addr(union perf_event *event, | |||
343 | al.sym = map__find_symbol(al.map, al.addr, NULL); | 359 | al.sym = map__find_symbol(al.map, al.addr, NULL); |
344 | 360 | ||
345 | if (PRINT_FIELD(SYM)) { | 361 | if (PRINT_FIELD(SYM)) { |
346 | if (al.sym && al.sym->name) | 362 | printf(" "); |
347 | symname = al.sym->name; | 363 | if (PRINT_FIELD(SYMOFFSET)) |
364 | symbol__fprintf_symname_offs(al.sym, &al, stdout); | ||
348 | else | 365 | else |
349 | symname = ""; | 366 | symbol__fprintf_symname(al.sym, stdout); |
350 | |||
351 | printf(" %16s", symname); | ||
352 | } | 367 | } |
353 | 368 | ||
354 | if (PRINT_FIELD(DSO)) { | 369 | if (PRINT_FIELD(DSO)) { |
355 | if (al.map && al.map->dso && al.map->dso->name) | 370 | printf(" ("); |
356 | dsoname = al.map->dso->name; | 371 | map__fprintf_dsoname(al.map, stdout); |
357 | else | 372 | printf(")"); |
358 | dsoname = ""; | 373 | } |
374 | } | ||
359 | 375 | ||
360 | printf(" (%s)", dsoname); | 376 | static void print_sample_bts(union perf_event *event, |
377 | struct perf_sample *sample, | ||
378 | struct perf_evsel *evsel, | ||
379 | struct machine *machine, | ||
380 | struct thread *thread) | ||
381 | { | ||
382 | struct perf_event_attr *attr = &evsel->attr; | ||
383 | |||
384 | /* print branch_from information */ | ||
385 | if (PRINT_FIELD(IP)) { | ||
386 | if (!symbol_conf.use_callchain) | ||
387 | printf(" "); | ||
388 | else | ||
389 | printf("\n"); | ||
390 | perf_event__print_ip(event, sample, machine, evsel, | ||
391 | PRINT_FIELD(SYM), PRINT_FIELD(DSO), | ||
392 | PRINT_FIELD(SYMOFFSET)); | ||
361 | } | 393 | } |
394 | |||
395 | printf(" => "); | ||
396 | |||
397 | /* print branch_to information */ | ||
398 | if (PRINT_FIELD(ADDR)) | ||
399 | print_sample_addr(event, sample, machine, thread, attr); | ||
400 | |||
401 | printf("\n"); | ||
362 | } | 402 | } |
363 | 403 | ||
364 | static void process_event(union perf_event *event __unused, | 404 | static void process_event(union perf_event *event __unused, |
@@ -374,6 +414,11 @@ static void process_event(union perf_event *event __unused, | |||
374 | 414 | ||
375 | print_sample_start(sample, thread, attr); | 415 | print_sample_start(sample, thread, attr); |
376 | 416 | ||
417 | if (is_bts_event(attr)) { | ||
418 | print_sample_bts(event, sample, evsel, machine, thread); | ||
419 | return; | ||
420 | } | ||
421 | |||
377 | if (PRINT_FIELD(TRACE)) | 422 | if (PRINT_FIELD(TRACE)) |
378 | print_trace_event(sample->cpu, sample->raw_data, | 423 | print_trace_event(sample->cpu, sample->raw_data, |
379 | sample->raw_size); | 424 | sample->raw_size); |
@@ -387,7 +432,8 @@ static void process_event(union perf_event *event __unused, | |||
387 | else | 432 | else |
388 | printf("\n"); | 433 | printf("\n"); |
389 | perf_event__print_ip(event, sample, machine, evsel, | 434 | perf_event__print_ip(event, sample, machine, evsel, |
390 | PRINT_FIELD(SYM), PRINT_FIELD(DSO)); | 435 | PRINT_FIELD(SYM), PRINT_FIELD(DSO), |
436 | PRINT_FIELD(SYMOFFSET)); | ||
391 | } | 437 | } |
392 | 438 | ||
393 | printf("\n"); | 439 | printf("\n"); |
@@ -1097,7 +1143,10 @@ static const struct option options[] = { | |||
1097 | OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", | 1143 | OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", |
1098 | "Look for files with symbols relative to this directory"), | 1144 | "Look for files with symbols relative to this directory"), |
1099 | OPT_CALLBACK('f', "fields", NULL, "str", | 1145 | OPT_CALLBACK('f', "fields", NULL, "str", |
1100 | "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr", | 1146 | "comma separated output fields prepend with 'type:'. " |
1147 | "Valid types: hw,sw,trace,raw. " | ||
1148 | "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," | ||
1149 | "addr,symoff", | ||
1101 | parse_output_fields), | 1150 | parse_output_fields), |
1102 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 1151 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
1103 | "system-wide collection from all CPUs"), | 1152 | "system-wide collection from all CPUs"), |
@@ -1106,6 +1155,9 @@ static const struct option options[] = { | |||
1106 | "only display events for these comms"), | 1155 | "only display events for these comms"), |
1107 | OPT_BOOLEAN('I', "show-info", &show_full_info, | 1156 | OPT_BOOLEAN('I', "show-info", &show_full_info, |
1108 | "display extended information from perf.data file"), | 1157 | "display extended information from perf.data file"), |
1158 | OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, | ||
1159 | "Show the path of [kernel.kallsyms]"), | ||
1160 | |||
1109 | OPT_END() | 1161 | OPT_END() |
1110 | }; | 1162 | }; |
1111 | 1163 | ||
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f5d2a63eba66..ea40e4e8b227 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -182,8 +182,8 @@ static int run_count = 1; | |||
182 | static bool no_inherit = false; | 182 | static bool no_inherit = false; |
183 | static bool scale = true; | 183 | static bool scale = true; |
184 | static bool no_aggr = false; | 184 | static bool no_aggr = false; |
185 | static pid_t target_pid = -1; | 185 | static const char *target_pid; |
186 | static pid_t target_tid = -1; | 186 | static const char *target_tid; |
187 | static pid_t child_pid = -1; | 187 | static pid_t child_pid = -1; |
188 | static bool null_run = false; | 188 | static bool null_run = false; |
189 | static int detailed_run = 0; | 189 | static int detailed_run = 0; |
@@ -296,7 +296,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel, | |||
296 | if (system_wide) | 296 | if (system_wide) |
297 | return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, | 297 | return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, |
298 | group, group_fd); | 298 | group, group_fd); |
299 | if (target_pid == -1 && target_tid == -1) { | 299 | if (!target_pid && !target_tid) { |
300 | attr->disabled = 1; | 300 | attr->disabled = 1; |
301 | attr->enable_on_exec = 1; | 301 | attr->enable_on_exec = 1; |
302 | } | 302 | } |
@@ -446,7 +446,7 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
446 | exit(-1); | 446 | exit(-1); |
447 | } | 447 | } |
448 | 448 | ||
449 | if (target_tid == -1 && target_pid == -1 && !system_wide) | 449 | if (!target_tid && !target_pid && !system_wide) |
450 | evsel_list->threads->map[0] = child_pid; | 450 | evsel_list->threads->map[0] = child_pid; |
451 | 451 | ||
452 | /* | 452 | /* |
@@ -576,6 +576,8 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
576 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) | 576 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) |
577 | fprintf(output, " # %8.3f CPUs utilized ", | 577 | fprintf(output, " # %8.3f CPUs utilized ", |
578 | avg / avg_stats(&walltime_nsecs_stats)); | 578 | avg / avg_stats(&walltime_nsecs_stats)); |
579 | else | ||
580 | fprintf(output, " "); | ||
579 | } | 581 | } |
580 | 582 | ||
581 | /* used for get_ratio_color() */ | 583 | /* used for get_ratio_color() */ |
@@ -844,12 +846,18 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
844 | 846 | ||
845 | fprintf(output, " # %8.3f GHz ", ratio); | 847 | fprintf(output, " # %8.3f GHz ", ratio); |
846 | } else if (runtime_nsecs_stats[cpu].n != 0) { | 848 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
849 | char unit = 'M'; | ||
850 | |||
847 | total = avg_stats(&runtime_nsecs_stats[cpu]); | 851 | total = avg_stats(&runtime_nsecs_stats[cpu]); |
848 | 852 | ||
849 | if (total) | 853 | if (total) |
850 | ratio = 1000.0 * avg / total; | 854 | ratio = 1000.0 * avg / total; |
855 | if (ratio < 0.001) { | ||
856 | ratio *= 1000; | ||
857 | unit = 'K'; | ||
858 | } | ||
851 | 859 | ||
852 | fprintf(output, " # %8.3f M/sec ", ratio); | 860 | fprintf(output, " # %8.3f %c/sec ", ratio, unit); |
853 | } else { | 861 | } else { |
854 | fprintf(output, " "); | 862 | fprintf(output, " "); |
855 | } | 863 | } |
@@ -960,14 +968,14 @@ static void print_stat(int argc, const char **argv) | |||
960 | if (!csv_output) { | 968 | if (!csv_output) { |
961 | fprintf(output, "\n"); | 969 | fprintf(output, "\n"); |
962 | fprintf(output, " Performance counter stats for "); | 970 | fprintf(output, " Performance counter stats for "); |
963 | if(target_pid == -1 && target_tid == -1) { | 971 | if (!target_pid && !target_tid) { |
964 | fprintf(output, "\'%s", argv[0]); | 972 | fprintf(output, "\'%s", argv[0]); |
965 | for (i = 1; i < argc; i++) | 973 | for (i = 1; i < argc; i++) |
966 | fprintf(output, " %s", argv[i]); | 974 | fprintf(output, " %s", argv[i]); |
967 | } else if (target_pid != -1) | 975 | } else if (target_pid) |
968 | fprintf(output, "process id \'%d", target_pid); | 976 | fprintf(output, "process id \'%s", target_pid); |
969 | else | 977 | else |
970 | fprintf(output, "thread id \'%d", target_tid); | 978 | fprintf(output, "thread id \'%s", target_tid); |
971 | 979 | ||
972 | fprintf(output, "\'"); | 980 | fprintf(output, "\'"); |
973 | if (run_count > 1) | 981 | if (run_count > 1) |
@@ -1041,10 +1049,10 @@ static const struct option options[] = { | |||
1041 | "event filter", parse_filter), | 1049 | "event filter", parse_filter), |
1042 | OPT_BOOLEAN('i', "no-inherit", &no_inherit, | 1050 | OPT_BOOLEAN('i', "no-inherit", &no_inherit, |
1043 | "child tasks do not inherit counters"), | 1051 | "child tasks do not inherit counters"), |
1044 | OPT_INTEGER('p', "pid", &target_pid, | 1052 | OPT_STRING('p', "pid", &target_pid, "pid", |
1045 | "stat events on existing process id"), | 1053 | "stat events on existing process id"), |
1046 | OPT_INTEGER('t', "tid", &target_tid, | 1054 | OPT_STRING('t', "tid", &target_tid, "tid", |
1047 | "stat events on existing thread id"), | 1055 | "stat events on existing thread id"), |
1048 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 1056 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
1049 | "system-wide collection from all CPUs"), | 1057 | "system-wide collection from all CPUs"), |
1050 | OPT_BOOLEAN('g', "group", &group, | 1058 | OPT_BOOLEAN('g', "group", &group, |
@@ -1182,7 +1190,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
1182 | } else if (big_num_opt == 0) /* User passed --no-big-num */ | 1190 | } else if (big_num_opt == 0) /* User passed --no-big-num */ |
1183 | big_num = false; | 1191 | big_num = false; |
1184 | 1192 | ||
1185 | if (!argc && target_pid == -1 && target_tid == -1) | 1193 | if (!argc && !target_pid && !target_tid) |
1186 | usage_with_options(stat_usage, options); | 1194 | usage_with_options(stat_usage, options); |
1187 | if (run_count <= 0) | 1195 | if (run_count <= 0) |
1188 | usage_with_options(stat_usage, options); | 1196 | usage_with_options(stat_usage, options); |
@@ -1198,10 +1206,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
1198 | if (add_default_attributes()) | 1206 | if (add_default_attributes()) |
1199 | goto out; | 1207 | goto out; |
1200 | 1208 | ||
1201 | if (target_pid != -1) | 1209 | if (target_pid) |
1202 | target_tid = target_pid; | 1210 | target_tid = target_pid; |
1203 | 1211 | ||
1204 | evsel_list->threads = thread_map__new(target_pid, target_tid); | 1212 | evsel_list->threads = thread_map__new_str(target_pid, |
1213 | target_tid, UINT_MAX); | ||
1205 | if (evsel_list->threads == NULL) { | 1214 | if (evsel_list->threads == NULL) { |
1206 | pr_err("Problems finding threads of monitor\n"); | 1215 | pr_err("Problems finding threads of monitor\n"); |
1207 | usage_with_options(stat_usage, options); | 1216 | usage_with_options(stat_usage, options); |
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 3854e869dce1..3e087ce8daa6 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c | |||
@@ -15,6 +15,8 @@ | |||
15 | #include "util/thread_map.h" | 15 | #include "util/thread_map.h" |
16 | #include "../../include/linux/hw_breakpoint.h" | 16 | #include "../../include/linux/hw_breakpoint.h" |
17 | 17 | ||
18 | #include <sys/mman.h> | ||
19 | |||
18 | static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) | 20 | static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) |
19 | { | 21 | { |
20 | bool *visited = symbol__priv(sym); | 22 | bool *visited = symbol__priv(sym); |
@@ -276,7 +278,7 @@ static int test__open_syscall_event(void) | |||
276 | return -1; | 278 | return -1; |
277 | } | 279 | } |
278 | 280 | ||
279 | threads = thread_map__new(-1, getpid()); | 281 | threads = thread_map__new(-1, getpid(), UINT_MAX); |
280 | if (threads == NULL) { | 282 | if (threads == NULL) { |
281 | pr_debug("thread_map__new\n"); | 283 | pr_debug("thread_map__new\n"); |
282 | return -1; | 284 | return -1; |
@@ -342,7 +344,7 @@ static int test__open_syscall_event_on_all_cpus(void) | |||
342 | return -1; | 344 | return -1; |
343 | } | 345 | } |
344 | 346 | ||
345 | threads = thread_map__new(-1, getpid()); | 347 | threads = thread_map__new(-1, getpid(), UINT_MAX); |
346 | if (threads == NULL) { | 348 | if (threads == NULL) { |
347 | pr_debug("thread_map__new\n"); | 349 | pr_debug("thread_map__new\n"); |
348 | return -1; | 350 | return -1; |
@@ -490,7 +492,7 @@ static int test__basic_mmap(void) | |||
490 | expected_nr_events[i] = random() % 257; | 492 | expected_nr_events[i] = random() % 257; |
491 | } | 493 | } |
492 | 494 | ||
493 | threads = thread_map__new(-1, getpid()); | 495 | threads = thread_map__new(-1, getpid(), UINT_MAX); |
494 | if (threads == NULL) { | 496 | if (threads == NULL) { |
495 | pr_debug("thread_map__new\n"); | 497 | pr_debug("thread_map__new\n"); |
496 | return -1; | 498 | return -1; |
@@ -1008,12 +1010,9 @@ realloc: | |||
1008 | static int test__PERF_RECORD(void) | 1010 | static int test__PERF_RECORD(void) |
1009 | { | 1011 | { |
1010 | struct perf_record_opts opts = { | 1012 | struct perf_record_opts opts = { |
1011 | .target_pid = -1, | ||
1012 | .target_tid = -1, | ||
1013 | .no_delay = true, | 1013 | .no_delay = true, |
1014 | .freq = 10, | 1014 | .freq = 10, |
1015 | .mmap_pages = 256, | 1015 | .mmap_pages = 256, |
1016 | .sample_id_all_avail = true, | ||
1017 | }; | 1016 | }; |
1018 | cpu_set_t *cpu_mask = NULL; | 1017 | cpu_set_t *cpu_mask = NULL; |
1019 | size_t cpu_mask_size = 0; | 1018 | size_t cpu_mask_size = 0; |
@@ -1054,7 +1053,7 @@ static int test__PERF_RECORD(void) | |||
1054 | * we're monitoring, the one forked there. | 1053 | * we're monitoring, the one forked there. |
1055 | */ | 1054 | */ |
1056 | err = perf_evlist__create_maps(evlist, opts.target_pid, | 1055 | err = perf_evlist__create_maps(evlist, opts.target_pid, |
1057 | opts.target_tid, opts.cpu_list); | 1056 | opts.target_tid, UINT_MAX, opts.cpu_list); |
1058 | if (err < 0) { | 1057 | if (err < 0) { |
1059 | pr_debug("Not enough memory to create thread/cpu maps\n"); | 1058 | pr_debug("Not enough memory to create thread/cpu maps\n"); |
1060 | goto out_delete_evlist; | 1059 | goto out_delete_evlist; |
@@ -1296,6 +1295,173 @@ out: | |||
1296 | return (err < 0 || errs > 0) ? -1 : 0; | 1295 | return (err < 0 || errs > 0) ? -1 : 0; |
1297 | } | 1296 | } |
1298 | 1297 | ||
1298 | |||
1299 | #if defined(__x86_64__) || defined(__i386__) | ||
1300 | |||
1301 | #define barrier() asm volatile("" ::: "memory") | ||
1302 | |||
1303 | static u64 rdpmc(unsigned int counter) | ||
1304 | { | ||
1305 | unsigned int low, high; | ||
1306 | |||
1307 | asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter)); | ||
1308 | |||
1309 | return low | ((u64)high) << 32; | ||
1310 | } | ||
1311 | |||
1312 | static u64 rdtsc(void) | ||
1313 | { | ||
1314 | unsigned int low, high; | ||
1315 | |||
1316 | asm volatile("rdtsc" : "=a" (low), "=d" (high)); | ||
1317 | |||
1318 | return low | ((u64)high) << 32; | ||
1319 | } | ||
1320 | |||
1321 | static u64 mmap_read_self(void *addr) | ||
1322 | { | ||
1323 | struct perf_event_mmap_page *pc = addr; | ||
1324 | u32 seq, idx, time_mult = 0, time_shift = 0; | ||
1325 | u64 count, cyc = 0, time_offset = 0, enabled, running, delta; | ||
1326 | |||
1327 | do { | ||
1328 | seq = pc->lock; | ||
1329 | barrier(); | ||
1330 | |||
1331 | enabled = pc->time_enabled; | ||
1332 | running = pc->time_running; | ||
1333 | |||
1334 | if (enabled != running) { | ||
1335 | cyc = rdtsc(); | ||
1336 | time_mult = pc->time_mult; | ||
1337 | time_shift = pc->time_shift; | ||
1338 | time_offset = pc->time_offset; | ||
1339 | } | ||
1340 | |||
1341 | idx = pc->index; | ||
1342 | count = pc->offset; | ||
1343 | if (idx) | ||
1344 | count += rdpmc(idx - 1); | ||
1345 | |||
1346 | barrier(); | ||
1347 | } while (pc->lock != seq); | ||
1348 | |||
1349 | if (enabled != running) { | ||
1350 | u64 quot, rem; | ||
1351 | |||
1352 | quot = (cyc >> time_shift); | ||
1353 | rem = cyc & ((1 << time_shift) - 1); | ||
1354 | delta = time_offset + quot * time_mult + | ||
1355 | ((rem * time_mult) >> time_shift); | ||
1356 | |||
1357 | enabled += delta; | ||
1358 | if (idx) | ||
1359 | running += delta; | ||
1360 | |||
1361 | quot = count / running; | ||
1362 | rem = count % running; | ||
1363 | count = quot * enabled + (rem * enabled) / running; | ||
1364 | } | ||
1365 | |||
1366 | return count; | ||
1367 | } | ||
1368 | |||
1369 | /* | ||
1370 | * If the RDPMC instruction faults then signal this back to the test parent task: | ||
1371 | */ | ||
1372 | static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used) | ||
1373 | { | ||
1374 | exit(-1); | ||
1375 | } | ||
1376 | |||
1377 | static int __test__rdpmc(void) | ||
1378 | { | ||
1379 | long page_size = sysconf(_SC_PAGE_SIZE); | ||
1380 | volatile int tmp = 0; | ||
1381 | u64 i, loops = 1000; | ||
1382 | int n; | ||
1383 | int fd; | ||
1384 | void *addr; | ||
1385 | struct perf_event_attr attr = { | ||
1386 | .type = PERF_TYPE_HARDWARE, | ||
1387 | .config = PERF_COUNT_HW_INSTRUCTIONS, | ||
1388 | .exclude_kernel = 1, | ||
1389 | }; | ||
1390 | u64 delta_sum = 0; | ||
1391 | struct sigaction sa; | ||
1392 | |||
1393 | sigfillset(&sa.sa_mask); | ||
1394 | sa.sa_sigaction = segfault_handler; | ||
1395 | sigaction(SIGSEGV, &sa, NULL); | ||
1396 | |||
1397 | fprintf(stderr, "\n\n"); | ||
1398 | |||
1399 | fd = sys_perf_event_open(&attr, 0, -1, -1, 0); | ||
1400 | if (fd < 0) { | ||
1401 | die("Error: sys_perf_event_open() syscall returned " | ||
1402 | "with %d (%s)\n", fd, strerror(errno)); | ||
1403 | } | ||
1404 | |||
1405 | addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); | ||
1406 | if (addr == (void *)(-1)) { | ||
1407 | die("Error: mmap() syscall returned " | ||
1408 | "with (%s)\n", strerror(errno)); | ||
1409 | } | ||
1410 | |||
1411 | for (n = 0; n < 6; n++) { | ||
1412 | u64 stamp, now, delta; | ||
1413 | |||
1414 | stamp = mmap_read_self(addr); | ||
1415 | |||
1416 | for (i = 0; i < loops; i++) | ||
1417 | tmp++; | ||
1418 | |||
1419 | now = mmap_read_self(addr); | ||
1420 | loops *= 10; | ||
1421 | |||
1422 | delta = now - stamp; | ||
1423 | fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta); | ||
1424 | |||
1425 | delta_sum += delta; | ||
1426 | } | ||
1427 | |||
1428 | munmap(addr, page_size); | ||
1429 | close(fd); | ||
1430 | |||
1431 | fprintf(stderr, " "); | ||
1432 | |||
1433 | if (!delta_sum) | ||
1434 | return -1; | ||
1435 | |||
1436 | return 0; | ||
1437 | } | ||
1438 | |||
1439 | static int test__rdpmc(void) | ||
1440 | { | ||
1441 | int status = 0; | ||
1442 | int wret = 0; | ||
1443 | int ret; | ||
1444 | int pid; | ||
1445 | |||
1446 | pid = fork(); | ||
1447 | if (pid < 0) | ||
1448 | return -1; | ||
1449 | |||
1450 | if (!pid) { | ||
1451 | ret = __test__rdpmc(); | ||
1452 | |||
1453 | exit(ret); | ||
1454 | } | ||
1455 | |||
1456 | wret = waitpid(pid, &status, 0); | ||
1457 | if (wret < 0 || status) | ||
1458 | return -1; | ||
1459 | |||
1460 | return 0; | ||
1461 | } | ||
1462 | |||
1463 | #endif | ||
1464 | |||
1299 | static struct test { | 1465 | static struct test { |
1300 | const char *desc; | 1466 | const char *desc; |
1301 | int (*func)(void); | 1467 | int (*func)(void); |
@@ -1320,6 +1486,12 @@ static struct test { | |||
1320 | .desc = "parse events tests", | 1486 | .desc = "parse events tests", |
1321 | .func = test__parse_events, | 1487 | .func = test__parse_events, |
1322 | }, | 1488 | }, |
1489 | #if defined(__x86_64__) || defined(__i386__) | ||
1490 | { | ||
1491 | .desc = "x86 rdpmc test", | ||
1492 | .func = test__rdpmc, | ||
1493 | }, | ||
1494 | #endif | ||
1323 | { | 1495 | { |
1324 | .desc = "Validate PERF_RECORD_* events & perf_sample fields", | 1496 | .desc = "Validate PERF_RECORD_* events & perf_sample fields", |
1325 | .func = test__PERF_RECORD, | 1497 | .func = test__PERF_RECORD, |
@@ -1412,7 +1584,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used) | |||
1412 | if (symbol__init() < 0) | 1584 | if (symbol__init() < 0) |
1413 | return -1; | 1585 | return -1; |
1414 | 1586 | ||
1415 | setup_pager(); | ||
1416 | |||
1417 | return __cmd_test(argc, argv); | 1587 | return __cmd_test(argc, argv); |
1418 | } | 1588 | } |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ecff31257eb3..e3c63aef8efc 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -64,7 +64,6 @@ | |||
64 | #include <linux/unistd.h> | 64 | #include <linux/unistd.h> |
65 | #include <linux/types.h> | 65 | #include <linux/types.h> |
66 | 66 | ||
67 | |||
68 | void get_term_dimensions(struct winsize *ws) | 67 | void get_term_dimensions(struct winsize *ws) |
69 | { | 68 | { |
70 | char *s = getenv("LINES"); | 69 | char *s = getenv("LINES"); |
@@ -544,10 +543,20 @@ static void perf_top__sort_new_samples(void *arg) | |||
544 | 543 | ||
545 | static void *display_thread_tui(void *arg) | 544 | static void *display_thread_tui(void *arg) |
546 | { | 545 | { |
546 | struct perf_evsel *pos; | ||
547 | struct perf_top *top = arg; | 547 | struct perf_top *top = arg; |
548 | const char *help = "For a higher level overview, try: perf top --sort comm,dso"; | 548 | const char *help = "For a higher level overview, try: perf top --sort comm,dso"; |
549 | 549 | ||
550 | perf_top__sort_new_samples(top); | 550 | perf_top__sort_new_samples(top); |
551 | |||
552 | /* | ||
553 | * Initialize the uid_filter_str, in the future the TUI will allow | ||
554 | * Zooming in/out UIDs. For now juse use whatever the user passed | ||
555 | * via --uid. | ||
556 | */ | ||
557 | list_for_each_entry(pos, &top->evlist->entries, node) | ||
558 | pos->hists.uid_filter_str = top->uid_str; | ||
559 | |||
551 | perf_evlist__tui_browse_hists(top->evlist, help, | 560 | perf_evlist__tui_browse_hists(top->evlist, help, |
552 | perf_top__sort_new_samples, | 561 | perf_top__sort_new_samples, |
553 | top, top->delay_secs); | 562 | top, top->delay_secs); |
@@ -668,6 +677,12 @@ static void perf_event__process_sample(struct perf_tool *tool, | |||
668 | return; | 677 | return; |
669 | } | 678 | } |
670 | 679 | ||
680 | if (!machine) { | ||
681 | pr_err("%u unprocessable samples recorded.", | ||
682 | top->session->hists.stats.nr_unprocessable_samples++); | ||
683 | return; | ||
684 | } | ||
685 | |||
671 | if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) | 686 | if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) |
672 | top->exact_samples++; | 687 | top->exact_samples++; |
673 | 688 | ||
@@ -861,7 +876,7 @@ fallback_missing_features: | |||
861 | if (top->exclude_guest_missing) | 876 | if (top->exclude_guest_missing) |
862 | attr->exclude_guest = attr->exclude_host = 0; | 877 | attr->exclude_guest = attr->exclude_host = 0; |
863 | retry_sample_id: | 878 | retry_sample_id: |
864 | attr->sample_id_all = top->sample_id_all_avail ? 1 : 0; | 879 | attr->sample_id_all = top->sample_id_all_missing ? 0 : 1; |
865 | try_again: | 880 | try_again: |
866 | if (perf_evsel__open(counter, top->evlist->cpus, | 881 | if (perf_evsel__open(counter, top->evlist->cpus, |
867 | top->evlist->threads, top->group, | 882 | top->evlist->threads, top->group, |
@@ -878,11 +893,11 @@ try_again: | |||
878 | "guest or host samples.\n"); | 893 | "guest or host samples.\n"); |
879 | top->exclude_guest_missing = true; | 894 | top->exclude_guest_missing = true; |
880 | goto fallback_missing_features; | 895 | goto fallback_missing_features; |
881 | } else if (top->sample_id_all_avail) { | 896 | } else if (!top->sample_id_all_missing) { |
882 | /* | 897 | /* |
883 | * Old kernel, no attr->sample_id_type_all field | 898 | * Old kernel, no attr->sample_id_type_all field |
884 | */ | 899 | */ |
885 | top->sample_id_all_avail = false; | 900 | top->sample_id_all_missing = true; |
886 | goto retry_sample_id; | 901 | goto retry_sample_id; |
887 | } | 902 | } |
888 | } | 903 | } |
@@ -967,7 +982,7 @@ static int __cmd_top(struct perf_top *top) | |||
967 | if (ret) | 982 | if (ret) |
968 | goto out_delete; | 983 | goto out_delete; |
969 | 984 | ||
970 | if (top->target_tid != -1) | 985 | if (top->target_tid || top->uid != UINT_MAX) |
971 | perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, | 986 | perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, |
972 | perf_event__process, | 987 | perf_event__process, |
973 | &top->session->host_machine); | 988 | &top->session->host_machine); |
@@ -1105,10 +1120,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1105 | struct perf_top top = { | 1120 | struct perf_top top = { |
1106 | .count_filter = 5, | 1121 | .count_filter = 5, |
1107 | .delay_secs = 2, | 1122 | .delay_secs = 2, |
1108 | .target_pid = -1, | 1123 | .uid = UINT_MAX, |
1109 | .target_tid = -1, | ||
1110 | .freq = 1000, /* 1 KHz */ | 1124 | .freq = 1000, /* 1 KHz */ |
1111 | .sample_id_all_avail = true, | ||
1112 | .mmap_pages = 128, | 1125 | .mmap_pages = 128, |
1113 | .sym_pcnt_filter = 5, | 1126 | .sym_pcnt_filter = 5, |
1114 | }; | 1127 | }; |
@@ -1119,9 +1132,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1119 | parse_events_option), | 1132 | parse_events_option), |
1120 | OPT_INTEGER('c', "count", &top.default_interval, | 1133 | OPT_INTEGER('c', "count", &top.default_interval, |
1121 | "event period to sample"), | 1134 | "event period to sample"), |
1122 | OPT_INTEGER('p', "pid", &top.target_pid, | 1135 | OPT_STRING('p', "pid", &top.target_pid, "pid", |
1123 | "profile events on existing process id"), | 1136 | "profile events on existing process id"), |
1124 | OPT_INTEGER('t', "tid", &top.target_tid, | 1137 | OPT_STRING('t', "tid", &top.target_tid, "tid", |
1125 | "profile events on existing thread id"), | 1138 | "profile events on existing thread id"), |
1126 | OPT_BOOLEAN('a', "all-cpus", &top.system_wide, | 1139 | OPT_BOOLEAN('a', "all-cpus", &top.system_wide, |
1127 | "system-wide collection from all CPUs"), | 1140 | "system-wide collection from all CPUs"), |
@@ -1180,6 +1193,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1180 | "Display raw encoding of assembly instructions (default)"), | 1193 | "Display raw encoding of assembly instructions (default)"), |
1181 | OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", | 1194 | OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", |
1182 | "Specify disassembler style (e.g. -M intel for intel syntax)"), | 1195 | "Specify disassembler style (e.g. -M intel for intel syntax)"), |
1196 | OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"), | ||
1183 | OPT_END() | 1197 | OPT_END() |
1184 | }; | 1198 | }; |
1185 | 1199 | ||
@@ -1205,18 +1219,22 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1205 | 1219 | ||
1206 | setup_browser(false); | 1220 | setup_browser(false); |
1207 | 1221 | ||
1222 | top.uid = parse_target_uid(top.uid_str, top.target_tid, top.target_pid); | ||
1223 | if (top.uid_str != NULL && top.uid == UINT_MAX - 1) | ||
1224 | goto out_delete_evlist; | ||
1225 | |||
1208 | /* CPU and PID are mutually exclusive */ | 1226 | /* CPU and PID are mutually exclusive */ |
1209 | if (top.target_tid > 0 && top.cpu_list) { | 1227 | if (top.target_tid && top.cpu_list) { |
1210 | printf("WARNING: PID switch overriding CPU\n"); | 1228 | printf("WARNING: PID switch overriding CPU\n"); |
1211 | sleep(1); | 1229 | sleep(1); |
1212 | top.cpu_list = NULL; | 1230 | top.cpu_list = NULL; |
1213 | } | 1231 | } |
1214 | 1232 | ||
1215 | if (top.target_pid != -1) | 1233 | if (top.target_pid) |
1216 | top.target_tid = top.target_pid; | 1234 | top.target_tid = top.target_pid; |
1217 | 1235 | ||
1218 | if (perf_evlist__create_maps(top.evlist, top.target_pid, | 1236 | if (perf_evlist__create_maps(top.evlist, top.target_pid, |
1219 | top.target_tid, top.cpu_list) < 0) | 1237 | top.target_tid, top.uid, top.cpu_list) < 0) |
1220 | usage_with_options(top_usage, options); | 1238 | usage_with_options(top_usage, options); |
1221 | 1239 | ||
1222 | if (!top.evlist->nr_entries && | 1240 | if (!top.evlist->nr_entries && |
@@ -1280,6 +1298,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1280 | 1298 | ||
1281 | status = __cmd_top(&top); | 1299 | status = __cmd_top(&top); |
1282 | 1300 | ||
1301 | out_delete_evlist: | ||
1283 | perf_evlist__delete(top.evlist); | 1302 | perf_evlist__delete(top.evlist); |
1284 | 1303 | ||
1285 | return status; | 1304 | return status; |
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 3afa39ac1d40..89e3355ab173 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
@@ -173,7 +173,6 @@ sys_perf_event_open(struct perf_event_attr *attr, | |||
173 | pid_t pid, int cpu, int group_fd, | 173 | pid_t pid, int cpu, int group_fd, |
174 | unsigned long flags) | 174 | unsigned long flags) |
175 | { | 175 | { |
176 | attr->size = sizeof(*attr); | ||
177 | return syscall(__NR_perf_event_open, attr, pid, cpu, | 176 | return syscall(__NR_perf_event_open, attr, pid, cpu, |
178 | group_fd, flags); | 177 | group_fd, flags); |
179 | } | 178 | } |
@@ -186,14 +185,32 @@ struct ip_callchain { | |||
186 | u64 ips[0]; | 185 | u64 ips[0]; |
187 | }; | 186 | }; |
188 | 187 | ||
188 | struct branch_flags { | ||
189 | u64 mispred:1; | ||
190 | u64 predicted:1; | ||
191 | u64 reserved:62; | ||
192 | }; | ||
193 | |||
194 | struct branch_entry { | ||
195 | u64 from; | ||
196 | u64 to; | ||
197 | struct branch_flags flags; | ||
198 | }; | ||
199 | |||
200 | struct branch_stack { | ||
201 | u64 nr; | ||
202 | struct branch_entry entries[0]; | ||
203 | }; | ||
204 | |||
189 | extern bool perf_host, perf_guest; | 205 | extern bool perf_host, perf_guest; |
190 | extern const char perf_version_string[]; | 206 | extern const char perf_version_string[]; |
191 | 207 | ||
192 | void pthread__unblock_sigwinch(void); | 208 | void pthread__unblock_sigwinch(void); |
193 | 209 | ||
194 | struct perf_record_opts { | 210 | struct perf_record_opts { |
195 | pid_t target_pid; | 211 | const char *target_pid; |
196 | pid_t target_tid; | 212 | const char *target_tid; |
213 | uid_t uid; | ||
197 | bool call_graph; | 214 | bool call_graph; |
198 | bool group; | 215 | bool group; |
199 | bool inherit_stat; | 216 | bool inherit_stat; |
@@ -204,13 +221,14 @@ struct perf_record_opts { | |||
204 | bool raw_samples; | 221 | bool raw_samples; |
205 | bool sample_address; | 222 | bool sample_address; |
206 | bool sample_time; | 223 | bool sample_time; |
207 | bool sample_id_all_avail; | 224 | bool sample_id_all_missing; |
208 | bool exclude_guest_missing; | 225 | bool exclude_guest_missing; |
209 | bool system_wide; | 226 | bool system_wide; |
210 | bool period; | 227 | bool period; |
211 | unsigned int freq; | 228 | unsigned int freq; |
212 | unsigned int mmap_pages; | 229 | unsigned int mmap_pages; |
213 | unsigned int user_freq; | 230 | unsigned int user_freq; |
231 | int branch_stack; | ||
214 | u64 default_interval; | 232 | u64 default_interval; |
215 | u64 user_interval; | 233 | u64 user_interval; |
216 | const char *cpu_list; | 234 | const char *cpu_list; |
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py index df638c438a9f..b11cca584238 100755 --- a/tools/perf/python/twatch.py +++ b/tools/perf/python/twatch.py | |||
@@ -19,7 +19,7 @@ def main(): | |||
19 | cpus = perf.cpu_map() | 19 | cpus = perf.cpu_map() |
20 | threads = perf.thread_map() | 20 | threads = perf.thread_map() |
21 | evsel = perf.evsel(task = 1, comm = 1, mmap = 0, | 21 | evsel = perf.evsel(task = 1, comm = 1, mmap = 0, |
22 | wakeup_events = 1, sample_period = 1, | 22 | wakeup_events = 1, watermark = 1, |
23 | sample_id_all = 1, | 23 | sample_id_all = 1, |
24 | sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID) | 24 | sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID) |
25 | evsel.open(cpus = cpus, threads = threads); | 25 | evsel.open(cpus = cpus, threads = threads); |
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 011ed2676604..e5a462f1d07c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c | |||
@@ -315,7 +315,7 @@ fallback: | |||
315 | "Please use:\n\n" | 315 | "Please use:\n\n" |
316 | " perf buildid-cache -av vmlinux\n\n" | 316 | " perf buildid-cache -av vmlinux\n\n" |
317 | "or:\n\n" | 317 | "or:\n\n" |
318 | " --vmlinux vmlinux", | 318 | " --vmlinux vmlinux\n", |
319 | sym->name, build_id_msg ?: ""); | 319 | sym->name, build_id_msg ?: ""); |
320 | goto out_free_filename; | 320 | goto out_free_filename; |
321 | } | 321 | } |
diff --git a/tools/perf/util/bitmap.c b/tools/perf/util/bitmap.c index 5e230acae1e9..0a1adc1111fd 100644 --- a/tools/perf/util/bitmap.c +++ b/tools/perf/util/bitmap.c | |||
@@ -19,3 +19,13 @@ int __bitmap_weight(const unsigned long *bitmap, int bits) | |||
19 | 19 | ||
20 | return w; | 20 | return w; |
21 | } | 21 | } |
22 | |||
23 | void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, | ||
24 | const unsigned long *bitmap2, int bits) | ||
25 | { | ||
26 | int k; | ||
27 | int nr = BITS_TO_LONGS(bits); | ||
28 | |||
29 | for (k = 0; k < nr; k++) | ||
30 | dst[k] = bitmap1[k] | bitmap2[k]; | ||
31 | } | ||
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 6893eec693ab..adc72f09914d 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c | |||
@@ -166,6 +166,17 @@ out: | |||
166 | return cpus; | 166 | return cpus; |
167 | } | 167 | } |
168 | 168 | ||
169 | size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp) | ||
170 | { | ||
171 | int i; | ||
172 | size_t printed = fprintf(fp, "%d cpu%s: ", | ||
173 | map->nr, map->nr > 1 ? "s" : ""); | ||
174 | for (i = 0; i < map->nr; ++i) | ||
175 | printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]); | ||
176 | |||
177 | return printed + fprintf(fp, "\n"); | ||
178 | } | ||
179 | |||
169 | struct cpu_map *cpu_map__dummy_new(void) | 180 | struct cpu_map *cpu_map__dummy_new(void) |
170 | { | 181 | { |
171 | struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); | 182 | struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); |
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 072c0a374794..c41518573c6a 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef __PERF_CPUMAP_H | 1 | #ifndef __PERF_CPUMAP_H |
2 | #define __PERF_CPUMAP_H | 2 | #define __PERF_CPUMAP_H |
3 | 3 | ||
4 | #include <stdio.h> | ||
5 | |||
4 | struct cpu_map { | 6 | struct cpu_map { |
5 | int nr; | 7 | int nr; |
6 | int map[]; | 8 | int map[]; |
@@ -10,4 +12,6 @@ struct cpu_map *cpu_map__new(const char *cpu_list); | |||
10 | struct cpu_map *cpu_map__dummy_new(void); | 12 | struct cpu_map *cpu_map__dummy_new(void); |
11 | void cpu_map__delete(struct cpu_map *map); | 13 | void cpu_map__delete(struct cpu_map *map); |
12 | 14 | ||
15 | size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); | ||
16 | |||
13 | #endif /* __PERF_CPUMAP_H */ | 17 | #endif /* __PERF_CPUMAP_H */ |
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index 35073621e5de..aada3ac5e891 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * No surprises, and works with signed and unsigned chars. | 4 | * No surprises, and works with signed and unsigned chars. |
5 | */ | 5 | */ |
6 | #include "cache.h" | 6 | #include "util.h" |
7 | 7 | ||
8 | enum { | 8 | enum { |
9 | S = GIT_SPACE, | 9 | S = GIT_SPACE, |
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c index ffc35e748e89..dd8b19319c03 100644 --- a/tools/perf/util/debugfs.c +++ b/tools/perf/util/debugfs.c | |||
@@ -15,32 +15,6 @@ static const char *debugfs_known_mountpoints[] = { | |||
15 | 0, | 15 | 0, |
16 | }; | 16 | }; |
17 | 17 | ||
18 | /* use this to force a umount */ | ||
19 | void debugfs_force_cleanup(void) | ||
20 | { | ||
21 | debugfs_find_mountpoint(); | ||
22 | debugfs_premounted = 0; | ||
23 | debugfs_umount(); | ||
24 | } | ||
25 | |||
26 | /* construct a full path to a debugfs element */ | ||
27 | int debugfs_make_path(const char *element, char *buffer, int size) | ||
28 | { | ||
29 | int len; | ||
30 | |||
31 | if (strlen(debugfs_mountpoint) == 0) { | ||
32 | buffer[0] = '\0'; | ||
33 | return -1; | ||
34 | } | ||
35 | |||
36 | len = strlen(debugfs_mountpoint) + strlen(element) + 1; | ||
37 | if (len >= size) | ||
38 | return len+1; | ||
39 | |||
40 | snprintf(buffer, size-1, "%s/%s", debugfs_mountpoint, element); | ||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | static int debugfs_found; | 18 | static int debugfs_found; |
45 | 19 | ||
46 | /* find the path to the mounted debugfs */ | 20 | /* find the path to the mounted debugfs */ |
@@ -97,17 +71,6 @@ int debugfs_valid_mountpoint(const char *debugfs) | |||
97 | return 0; | 71 | return 0; |
98 | } | 72 | } |
99 | 73 | ||
100 | |||
101 | int debugfs_valid_entry(const char *path) | ||
102 | { | ||
103 | struct stat st; | ||
104 | |||
105 | if (stat(path, &st)) | ||
106 | return -errno; | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | static void debugfs_set_tracing_events_path(const char *mountpoint) | 74 | static void debugfs_set_tracing_events_path(const char *mountpoint) |
112 | { | 75 | { |
113 | snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", | 76 | snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", |
@@ -149,107 +112,3 @@ void debugfs_set_path(const char *mountpoint) | |||
149 | snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint); | 112 | snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint); |
150 | debugfs_set_tracing_events_path(mountpoint); | 113 | debugfs_set_tracing_events_path(mountpoint); |
151 | } | 114 | } |
152 | |||
153 | /* umount the debugfs */ | ||
154 | |||
155 | int debugfs_umount(void) | ||
156 | { | ||
157 | char umountcmd[128]; | ||
158 | int ret; | ||
159 | |||
160 | /* if it was already mounted, leave it */ | ||
161 | if (debugfs_premounted) | ||
162 | return 0; | ||
163 | |||
164 | /* make sure it's a valid mount point */ | ||
165 | ret = debugfs_valid_mountpoint(debugfs_mountpoint); | ||
166 | if (ret) | ||
167 | return ret; | ||
168 | |||
169 | snprintf(umountcmd, sizeof(umountcmd), | ||
170 | "/bin/umount %s", debugfs_mountpoint); | ||
171 | return system(umountcmd); | ||
172 | } | ||
173 | |||
174 | int debugfs_write(const char *entry, const char *value) | ||
175 | { | ||
176 | char path[PATH_MAX + 1]; | ||
177 | int ret, count; | ||
178 | int fd; | ||
179 | |||
180 | /* construct the path */ | ||
181 | snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry); | ||
182 | |||
183 | /* verify that it exists */ | ||
184 | ret = debugfs_valid_entry(path); | ||
185 | if (ret) | ||
186 | return ret; | ||
187 | |||
188 | /* get how many chars we're going to write */ | ||
189 | count = strlen(value); | ||
190 | |||
191 | /* open the debugfs entry */ | ||
192 | fd = open(path, O_RDWR); | ||
193 | if (fd < 0) | ||
194 | return -errno; | ||
195 | |||
196 | while (count > 0) { | ||
197 | /* write it */ | ||
198 | ret = write(fd, value, count); | ||
199 | if (ret <= 0) { | ||
200 | if (ret == EAGAIN) | ||
201 | continue; | ||
202 | close(fd); | ||
203 | return -errno; | ||
204 | } | ||
205 | count -= ret; | ||
206 | } | ||
207 | |||
208 | /* close it */ | ||
209 | close(fd); | ||
210 | |||
211 | /* return success */ | ||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * read a debugfs entry | ||
217 | * returns the number of chars read or a negative errno | ||
218 | */ | ||
219 | int debugfs_read(const char *entry, char *buffer, size_t size) | ||
220 | { | ||
221 | char path[PATH_MAX + 1]; | ||
222 | int ret; | ||
223 | int fd; | ||
224 | |||
225 | /* construct the path */ | ||
226 | snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry); | ||
227 | |||
228 | /* verify that it exists */ | ||
229 | ret = debugfs_valid_entry(path); | ||
230 | if (ret) | ||
231 | return ret; | ||
232 | |||
233 | /* open the debugfs entry */ | ||
234 | fd = open(path, O_RDONLY); | ||
235 | if (fd < 0) | ||
236 | return -errno; | ||
237 | |||
238 | do { | ||
239 | /* read it */ | ||
240 | ret = read(fd, buffer, size); | ||
241 | if (ret == 0) { | ||
242 | close(fd); | ||
243 | return EOF; | ||
244 | } | ||
245 | } while (ret < 0 && errno == EAGAIN); | ||
246 | |||
247 | /* close it */ | ||
248 | close(fd); | ||
249 | |||
250 | /* make *sure* there's a null character at the end */ | ||
251 | buffer[ret] = '\0'; | ||
252 | |||
253 | /* return the number of chars read */ | ||
254 | return ret; | ||
255 | } | ||
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h index 4a878f735eb0..68f3e87ec57f 100644 --- a/tools/perf/util/debugfs.h +++ b/tools/perf/util/debugfs.h | |||
@@ -3,14 +3,8 @@ | |||
3 | 3 | ||
4 | const char *debugfs_find_mountpoint(void); | 4 | const char *debugfs_find_mountpoint(void); |
5 | int debugfs_valid_mountpoint(const char *debugfs); | 5 | int debugfs_valid_mountpoint(const char *debugfs); |
6 | int debugfs_valid_entry(const char *path); | ||
7 | char *debugfs_mount(const char *mountpoint); | 6 | char *debugfs_mount(const char *mountpoint); |
8 | int debugfs_umount(void); | ||
9 | void debugfs_set_path(const char *mountpoint); | 7 | void debugfs_set_path(const char *mountpoint); |
10 | int debugfs_write(const char *entry, const char *value); | ||
11 | int debugfs_read(const char *entry, char *buffer, size_t size); | ||
12 | void debugfs_force_cleanup(void); | ||
13 | int debugfs_make_path(const char *element, char *buffer, int size); | ||
14 | 8 | ||
15 | extern char debugfs_mountpoint[]; | 9 | extern char debugfs_mountpoint[]; |
16 | extern char tracing_events_path[]; | 10 | extern char tracing_events_path[]; |
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index cbdeaad9c5e5..1b197280c621 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h | |||
@@ -81,6 +81,7 @@ struct perf_sample { | |||
81 | u32 raw_size; | 81 | u32 raw_size; |
82 | void *raw_data; | 82 | void *raw_data; |
83 | struct ip_callchain *callchain; | 83 | struct ip_callchain *callchain; |
84 | struct branch_stack *branch_stack; | ||
84 | }; | 85 | }; |
85 | 86 | ||
86 | #define BUILD_ID_SIZE 20 | 87 | #define BUILD_ID_SIZE 20 |
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ea32a061f1c8..159263d17c2d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c | |||
@@ -97,9 +97,9 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) | |||
97 | ++evlist->nr_entries; | 97 | ++evlist->nr_entries; |
98 | } | 98 | } |
99 | 99 | ||
100 | static void perf_evlist__splice_list_tail(struct perf_evlist *evlist, | 100 | void perf_evlist__splice_list_tail(struct perf_evlist *evlist, |
101 | struct list_head *list, | 101 | struct list_head *list, |
102 | int nr_entries) | 102 | int nr_entries) |
103 | { | 103 | { |
104 | list_splice_tail(list, &evlist->entries); | 104 | list_splice_tail(list, &evlist->entries); |
105 | evlist->nr_entries += nr_entries; | 105 | evlist->nr_entries += nr_entries; |
@@ -597,15 +597,15 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, | |||
597 | return perf_evlist__mmap_per_cpu(evlist, prot, mask); | 597 | return perf_evlist__mmap_per_cpu(evlist, prot, mask); |
598 | } | 598 | } |
599 | 599 | ||
600 | int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, | 600 | int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid, |
601 | pid_t target_tid, const char *cpu_list) | 601 | const char *target_tid, uid_t uid, const char *cpu_list) |
602 | { | 602 | { |
603 | evlist->threads = thread_map__new(target_pid, target_tid); | 603 | evlist->threads = thread_map__new_str(target_pid, target_tid, uid); |
604 | 604 | ||
605 | if (evlist->threads == NULL) | 605 | if (evlist->threads == NULL) |
606 | return -1; | 606 | return -1; |
607 | 607 | ||
608 | if (cpu_list == NULL && target_tid != -1) | 608 | if (uid != UINT_MAX || (cpu_list == NULL && target_tid)) |
609 | evlist->cpus = cpu_map__dummy_new(); | 609 | evlist->cpus = cpu_map__dummy_new(); |
610 | else | 610 | else |
611 | evlist->cpus = cpu_map__new(cpu_list); | 611 | evlist->cpus = cpu_map__new(cpu_list); |
@@ -765,6 +765,7 @@ out_err: | |||
765 | list_for_each_entry_reverse(evsel, &evlist->entries, node) | 765 | list_for_each_entry_reverse(evsel, &evlist->entries, node) |
766 | perf_evsel__close(evsel, ncpus, nthreads); | 766 | perf_evsel__close(evsel, ncpus, nthreads); |
767 | 767 | ||
768 | errno = -err; | ||
768 | return err; | 769 | return err; |
769 | } | 770 | } |
770 | 771 | ||
@@ -824,7 +825,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, | |||
824 | exit(-1); | 825 | exit(-1); |
825 | } | 826 | } |
826 | 827 | ||
827 | if (!opts->system_wide && opts->target_tid == -1 && opts->target_pid == -1) | 828 | if (!opts->system_wide && !opts->target_tid && !opts->target_pid) |
828 | evlist->threads->map[0] = evlist->workload.pid; | 829 | evlist->threads->map[0] = evlist->workload.pid; |
829 | 830 | ||
830 | close(child_ready_pipe[1]); | 831 | close(child_ready_pipe[1]); |
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 8922aeed0467..21f1c9e57f13 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h | |||
@@ -106,8 +106,8 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, | |||
106 | evlist->threads = threads; | 106 | evlist->threads = threads; |
107 | } | 107 | } |
108 | 108 | ||
109 | int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, | 109 | int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid, |
110 | pid_t target_tid, const char *cpu_list); | 110 | const char *tid, uid_t uid, const char *cpu_list); |
111 | void perf_evlist__delete_maps(struct perf_evlist *evlist); | 111 | void perf_evlist__delete_maps(struct perf_evlist *evlist); |
112 | int perf_evlist__set_filters(struct perf_evlist *evlist); | 112 | int perf_evlist__set_filters(struct perf_evlist *evlist); |
113 | 113 | ||
@@ -117,4 +117,9 @@ u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist); | |||
117 | 117 | ||
118 | bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); | 118 | bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); |
119 | bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); | 119 | bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); |
120 | |||
121 | void perf_evlist__splice_list_tail(struct perf_evlist *evlist, | ||
122 | struct list_head *list, | ||
123 | int nr_entries); | ||
124 | |||
120 | #endif /* __PERF_EVLIST_H */ | 125 | #endif /* __PERF_EVLIST_H */ |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 7132ee834e0e..f421f7cbc0d3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
@@ -68,7 +68,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) | |||
68 | struct perf_event_attr *attr = &evsel->attr; | 68 | struct perf_event_attr *attr = &evsel->attr; |
69 | int track = !evsel->idx; /* only the first counter needs these */ | 69 | int track = !evsel->idx; /* only the first counter needs these */ |
70 | 70 | ||
71 | attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; | 71 | attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; |
72 | attr->inherit = !opts->no_inherit; | 72 | attr->inherit = !opts->no_inherit; |
73 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 73 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
74 | PERF_FORMAT_TOTAL_TIME_RUNNING | | 74 | PERF_FORMAT_TOTAL_TIME_RUNNING | |
@@ -111,7 +111,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) | |||
111 | if (opts->period) | 111 | if (opts->period) |
112 | attr->sample_type |= PERF_SAMPLE_PERIOD; | 112 | attr->sample_type |= PERF_SAMPLE_PERIOD; |
113 | 113 | ||
114 | if (opts->sample_id_all_avail && | 114 | if (!opts->sample_id_all_missing && |
115 | (opts->sample_time || opts->system_wide || | 115 | (opts->sample_time || opts->system_wide || |
116 | !opts->no_inherit || opts->cpu_list)) | 116 | !opts->no_inherit || opts->cpu_list)) |
117 | attr->sample_type |= PERF_SAMPLE_TIME; | 117 | attr->sample_type |= PERF_SAMPLE_TIME; |
@@ -126,11 +126,15 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) | |||
126 | attr->watermark = 0; | 126 | attr->watermark = 0; |
127 | attr->wakeup_events = 1; | 127 | attr->wakeup_events = 1; |
128 | } | 128 | } |
129 | if (opts->branch_stack) { | ||
130 | attr->sample_type |= PERF_SAMPLE_BRANCH_STACK; | ||
131 | attr->branch_sample_type = opts->branch_stack; | ||
132 | } | ||
129 | 133 | ||
130 | attr->mmap = track; | 134 | attr->mmap = track; |
131 | attr->comm = track; | 135 | attr->comm = track; |
132 | 136 | ||
133 | if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) { | 137 | if (!opts->target_pid && !opts->target_tid && !opts->system_wide) { |
134 | attr->disabled = 1; | 138 | attr->disabled = 1; |
135 | attr->enable_on_exec = 1; | 139 | attr->enable_on_exec = 1; |
136 | } | 140 | } |
@@ -536,7 +540,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, | |||
536 | } | 540 | } |
537 | 541 | ||
538 | if (type & PERF_SAMPLE_READ) { | 542 | if (type & PERF_SAMPLE_READ) { |
539 | fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n"); | 543 | fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n"); |
540 | return -1; | 544 | return -1; |
541 | } | 545 | } |
542 | 546 | ||
@@ -576,6 +580,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, | |||
576 | data->raw_data = (void *) pdata; | 580 | data->raw_data = (void *) pdata; |
577 | } | 581 | } |
578 | 582 | ||
583 | if (type & PERF_SAMPLE_BRANCH_STACK) { | ||
584 | u64 sz; | ||
585 | |||
586 | data->branch_stack = (struct branch_stack *)array; | ||
587 | array++; /* nr */ | ||
588 | |||
589 | sz = data->branch_stack->nr * sizeof(struct branch_entry); | ||
590 | sz /= sizeof(u64); | ||
591 | array += sz; | ||
592 | } | ||
579 | return 0; | 593 | return 0; |
580 | } | 594 | } |
581 | 595 | ||
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 14bb035c5fd9..fcd9cf3ea63e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
@@ -63,9 +63,20 @@ char *perf_header__find_event(u64 id) | |||
63 | return NULL; | 63 | return NULL; |
64 | } | 64 | } |
65 | 65 | ||
66 | static const char *__perf_magic = "PERFFILE"; | 66 | /* |
67 | * magic2 = "PERFILE2" | ||
68 | * must be a numerical value to let the endianness | ||
69 | * determine the memory layout. That way we are able | ||
70 | * to detect endianness when reading the perf.data file | ||
71 | * back. | ||
72 | * | ||
73 | * we check for legacy (PERFFILE) format. | ||
74 | */ | ||
75 | static const char *__perf_magic1 = "PERFFILE"; | ||
76 | static const u64 __perf_magic2 = 0x32454c4946524550ULL; | ||
77 | static const u64 __perf_magic2_sw = 0x50455246494c4532ULL; | ||
67 | 78 | ||
68 | #define PERF_MAGIC (*(u64 *)__perf_magic) | 79 | #define PERF_MAGIC __perf_magic2 |
69 | 80 | ||
70 | struct perf_file_attr { | 81 | struct perf_file_attr { |
71 | struct perf_event_attr attr; | 82 | struct perf_event_attr attr; |
@@ -1012,6 +1023,12 @@ write_it: | |||
1012 | return do_write_string(fd, buffer); | 1023 | return do_write_string(fd, buffer); |
1013 | } | 1024 | } |
1014 | 1025 | ||
1026 | static int write_branch_stack(int fd __used, struct perf_header *h __used, | ||
1027 | struct perf_evlist *evlist __used) | ||
1028 | { | ||
1029 | return 0; | ||
1030 | } | ||
1031 | |||
1015 | static void print_hostname(struct perf_header *ph, int fd, FILE *fp) | 1032 | static void print_hostname(struct perf_header *ph, int fd, FILE *fp) |
1016 | { | 1033 | { |
1017 | char *str = do_read_string(fd, ph); | 1034 | char *str = do_read_string(fd, ph); |
@@ -1133,8 +1150,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) | |||
1133 | uint64_t id; | 1150 | uint64_t id; |
1134 | void *buf = NULL; | 1151 | void *buf = NULL; |
1135 | char *str; | 1152 | char *str; |
1136 | u32 nre, sz, nr, i, j, msz; | 1153 | u32 nre, sz, nr, i, j; |
1137 | int ret; | 1154 | ssize_t ret; |
1155 | size_t msz; | ||
1138 | 1156 | ||
1139 | /* number of events */ | 1157 | /* number of events */ |
1140 | ret = read(fd, &nre, sizeof(nre)); | 1158 | ret = read(fd, &nre, sizeof(nre)); |
@@ -1151,25 +1169,23 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) | |||
1151 | if (ph->needs_swap) | 1169 | if (ph->needs_swap) |
1152 | sz = bswap_32(sz); | 1170 | sz = bswap_32(sz); |
1153 | 1171 | ||
1154 | /* | ||
1155 | * ensure it is at least to our ABI rev | ||
1156 | */ | ||
1157 | if (sz < (u32)sizeof(attr)) | ||
1158 | goto error; | ||
1159 | |||
1160 | memset(&attr, 0, sizeof(attr)); | 1172 | memset(&attr, 0, sizeof(attr)); |
1161 | 1173 | ||
1162 | /* read entire region to sync up to next field */ | 1174 | /* buffer to hold on file attr struct */ |
1163 | buf = malloc(sz); | 1175 | buf = malloc(sz); |
1164 | if (!buf) | 1176 | if (!buf) |
1165 | goto error; | 1177 | goto error; |
1166 | 1178 | ||
1167 | msz = sizeof(attr); | 1179 | msz = sizeof(attr); |
1168 | if (sz < msz) | 1180 | if (sz < (ssize_t)msz) |
1169 | msz = sz; | 1181 | msz = sz; |
1170 | 1182 | ||
1171 | for (i = 0 ; i < nre; i++) { | 1183 | for (i = 0 ; i < nre; i++) { |
1172 | 1184 | ||
1185 | /* | ||
1186 | * must read entire on-file attr struct to | ||
1187 | * sync up with layout. | ||
1188 | */ | ||
1173 | ret = read(fd, buf, sz); | 1189 | ret = read(fd, buf, sz); |
1174 | if (ret != (ssize_t)sz) | 1190 | if (ret != (ssize_t)sz) |
1175 | goto error; | 1191 | goto error; |
@@ -1305,25 +1321,204 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) | |||
1305 | free(str); | 1321 | free(str); |
1306 | } | 1322 | } |
1307 | 1323 | ||
1324 | static void print_branch_stack(struct perf_header *ph __used, int fd __used, | ||
1325 | FILE *fp) | ||
1326 | { | ||
1327 | fprintf(fp, "# contains samples with branch stack\n"); | ||
1328 | } | ||
1329 | |||
1330 | static int __event_process_build_id(struct build_id_event *bev, | ||
1331 | char *filename, | ||
1332 | struct perf_session *session) | ||
1333 | { | ||
1334 | int err = -1; | ||
1335 | struct list_head *head; | ||
1336 | struct machine *machine; | ||
1337 | u16 misc; | ||
1338 | struct dso *dso; | ||
1339 | enum dso_kernel_type dso_type; | ||
1340 | |||
1341 | machine = perf_session__findnew_machine(session, bev->pid); | ||
1342 | if (!machine) | ||
1343 | goto out; | ||
1344 | |||
1345 | misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | ||
1346 | |||
1347 | switch (misc) { | ||
1348 | case PERF_RECORD_MISC_KERNEL: | ||
1349 | dso_type = DSO_TYPE_KERNEL; | ||
1350 | head = &machine->kernel_dsos; | ||
1351 | break; | ||
1352 | case PERF_RECORD_MISC_GUEST_KERNEL: | ||
1353 | dso_type = DSO_TYPE_GUEST_KERNEL; | ||
1354 | head = &machine->kernel_dsos; | ||
1355 | break; | ||
1356 | case PERF_RECORD_MISC_USER: | ||
1357 | case PERF_RECORD_MISC_GUEST_USER: | ||
1358 | dso_type = DSO_TYPE_USER; | ||
1359 | head = &machine->user_dsos; | ||
1360 | break; | ||
1361 | default: | ||
1362 | goto out; | ||
1363 | } | ||
1364 | |||
1365 | dso = __dsos__findnew(head, filename); | ||
1366 | if (dso != NULL) { | ||
1367 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; | ||
1368 | |||
1369 | dso__set_build_id(dso, &bev->build_id); | ||
1370 | |||
1371 | if (filename[0] == '[') | ||
1372 | dso->kernel = dso_type; | ||
1373 | |||
1374 | build_id__sprintf(dso->build_id, sizeof(dso->build_id), | ||
1375 | sbuild_id); | ||
1376 | pr_debug("build id event received for %s: %s\n", | ||
1377 | dso->long_name, sbuild_id); | ||
1378 | } | ||
1379 | |||
1380 | err = 0; | ||
1381 | out: | ||
1382 | return err; | ||
1383 | } | ||
1384 | |||
1385 | static int perf_header__read_build_ids_abi_quirk(struct perf_header *header, | ||
1386 | int input, u64 offset, u64 size) | ||
1387 | { | ||
1388 | struct perf_session *session = container_of(header, struct perf_session, header); | ||
1389 | struct { | ||
1390 | struct perf_event_header header; | ||
1391 | u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; | ||
1392 | char filename[0]; | ||
1393 | } old_bev; | ||
1394 | struct build_id_event bev; | ||
1395 | char filename[PATH_MAX]; | ||
1396 | u64 limit = offset + size; | ||
1397 | |||
1398 | while (offset < limit) { | ||
1399 | ssize_t len; | ||
1400 | |||
1401 | if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev)) | ||
1402 | return -1; | ||
1403 | |||
1404 | if (header->needs_swap) | ||
1405 | perf_event_header__bswap(&old_bev.header); | ||
1406 | |||
1407 | len = old_bev.header.size - sizeof(old_bev); | ||
1408 | if (read(input, filename, len) != len) | ||
1409 | return -1; | ||
1410 | |||
1411 | bev.header = old_bev.header; | ||
1412 | |||
1413 | /* | ||
1414 | * As the pid is the missing value, we need to fill | ||
1415 | * it properly. The header.misc value give us nice hint. | ||
1416 | */ | ||
1417 | bev.pid = HOST_KERNEL_ID; | ||
1418 | if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER || | ||
1419 | bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL) | ||
1420 | bev.pid = DEFAULT_GUEST_KERNEL_ID; | ||
1421 | |||
1422 | memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id)); | ||
1423 | __event_process_build_id(&bev, filename, session); | ||
1424 | |||
1425 | offset += bev.header.size; | ||
1426 | } | ||
1427 | |||
1428 | return 0; | ||
1429 | } | ||
1430 | |||
1431 | static int perf_header__read_build_ids(struct perf_header *header, | ||
1432 | int input, u64 offset, u64 size) | ||
1433 | { | ||
1434 | struct perf_session *session = container_of(header, struct perf_session, header); | ||
1435 | struct build_id_event bev; | ||
1436 | char filename[PATH_MAX]; | ||
1437 | u64 limit = offset + size, orig_offset = offset; | ||
1438 | int err = -1; | ||
1439 | |||
1440 | while (offset < limit) { | ||
1441 | ssize_t len; | ||
1442 | |||
1443 | if (read(input, &bev, sizeof(bev)) != sizeof(bev)) | ||
1444 | goto out; | ||
1445 | |||
1446 | if (header->needs_swap) | ||
1447 | perf_event_header__bswap(&bev.header); | ||
1448 | |||
1449 | len = bev.header.size - sizeof(bev); | ||
1450 | if (read(input, filename, len) != len) | ||
1451 | goto out; | ||
1452 | /* | ||
1453 | * The a1645ce1 changeset: | ||
1454 | * | ||
1455 | * "perf: 'perf kvm' tool for monitoring guest performance from host" | ||
1456 | * | ||
1457 | * Added a field to struct build_id_event that broke the file | ||
1458 | * format. | ||
1459 | * | ||
1460 | * Since the kernel build-id is the first entry, process the | ||
1461 | * table using the old format if the well known | ||
1462 | * '[kernel.kallsyms]' string for the kernel build-id has the | ||
1463 | * first 4 characters chopped off (where the pid_t sits). | ||
1464 | */ | ||
1465 | if (memcmp(filename, "nel.kallsyms]", 13) == 0) { | ||
1466 | if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1) | ||
1467 | return -1; | ||
1468 | return perf_header__read_build_ids_abi_quirk(header, input, offset, size); | ||
1469 | } | ||
1470 | |||
1471 | __event_process_build_id(&bev, filename, session); | ||
1472 | |||
1473 | offset += bev.header.size; | ||
1474 | } | ||
1475 | err = 0; | ||
1476 | out: | ||
1477 | return err; | ||
1478 | } | ||
1479 | |||
1480 | static int process_trace_info(struct perf_file_section *section __unused, | ||
1481 | struct perf_header *ph __unused, | ||
1482 | int feat __unused, int fd) | ||
1483 | { | ||
1484 | trace_report(fd, false); | ||
1485 | return 0; | ||
1486 | } | ||
1487 | |||
1488 | static int process_build_id(struct perf_file_section *section, | ||
1489 | struct perf_header *ph, | ||
1490 | int feat __unused, int fd) | ||
1491 | { | ||
1492 | if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) | ||
1493 | pr_debug("Failed to read buildids, continuing...\n"); | ||
1494 | return 0; | ||
1495 | } | ||
1496 | |||
1308 | struct feature_ops { | 1497 | struct feature_ops { |
1309 | int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); | 1498 | int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); |
1310 | void (*print)(struct perf_header *h, int fd, FILE *fp); | 1499 | void (*print)(struct perf_header *h, int fd, FILE *fp); |
1500 | int (*process)(struct perf_file_section *section, | ||
1501 | struct perf_header *h, int feat, int fd); | ||
1311 | const char *name; | 1502 | const char *name; |
1312 | bool full_only; | 1503 | bool full_only; |
1313 | }; | 1504 | }; |
1314 | 1505 | ||
1315 | #define FEAT_OPA(n, func) \ | 1506 | #define FEAT_OPA(n, func) \ |
1316 | [n] = { .name = #n, .write = write_##func, .print = print_##func } | 1507 | [n] = { .name = #n, .write = write_##func, .print = print_##func } |
1508 | #define FEAT_OPP(n, func) \ | ||
1509 | [n] = { .name = #n, .write = write_##func, .print = print_##func, \ | ||
1510 | .process = process_##func } | ||
1317 | #define FEAT_OPF(n, func) \ | 1511 | #define FEAT_OPF(n, func) \ |
1318 | [n] = { .name = #n, .write = write_##func, .print = print_##func, .full_only = true } | 1512 | [n] = { .name = #n, .write = write_##func, .print = print_##func, \ |
1513 | .full_only = true } | ||
1319 | 1514 | ||
1320 | /* feature_ops not implemented: */ | 1515 | /* feature_ops not implemented: */ |
1321 | #define print_trace_info NULL | 1516 | #define print_trace_info NULL |
1322 | #define print_build_id NULL | 1517 | #define print_build_id NULL |
1323 | 1518 | ||
1324 | static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { | 1519 | static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { |
1325 | FEAT_OPA(HEADER_TRACE_INFO, trace_info), | 1520 | FEAT_OPP(HEADER_TRACE_INFO, trace_info), |
1326 | FEAT_OPA(HEADER_BUILD_ID, build_id), | 1521 | FEAT_OPP(HEADER_BUILD_ID, build_id), |
1327 | FEAT_OPA(HEADER_HOSTNAME, hostname), | 1522 | FEAT_OPA(HEADER_HOSTNAME, hostname), |
1328 | FEAT_OPA(HEADER_OSRELEASE, osrelease), | 1523 | FEAT_OPA(HEADER_OSRELEASE, osrelease), |
1329 | FEAT_OPA(HEADER_VERSION, version), | 1524 | FEAT_OPA(HEADER_VERSION, version), |
@@ -1336,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { | |||
1336 | FEAT_OPA(HEADER_CMDLINE, cmdline), | 1531 | FEAT_OPA(HEADER_CMDLINE, cmdline), |
1337 | FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), | 1532 | FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), |
1338 | FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), | 1533 | FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), |
1534 | FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), | ||
1339 | }; | 1535 | }; |
1340 | 1536 | ||
1341 | struct header_print_data { | 1537 | struct header_print_data { |
@@ -1620,24 +1816,128 @@ out_free: | |||
1620 | return err; | 1816 | return err; |
1621 | } | 1817 | } |
1622 | 1818 | ||
1819 | static const int attr_file_abi_sizes[] = { | ||
1820 | [0] = PERF_ATTR_SIZE_VER0, | ||
1821 | [1] = PERF_ATTR_SIZE_VER1, | ||
1822 | 0, | ||
1823 | }; | ||
1824 | |||
1825 | /* | ||
1826 | * In the legacy file format, the magic number is not used to encode endianness. | ||
1827 | * hdr_sz was used to encode endianness. But given that hdr_sz can vary based | ||
1828 | * on ABI revisions, we need to try all combinations for all endianness to | ||
1829 | * detect the endianness. | ||
1830 | */ | ||
1831 | static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph) | ||
1832 | { | ||
1833 | uint64_t ref_size, attr_size; | ||
1834 | int i; | ||
1835 | |||
1836 | for (i = 0 ; attr_file_abi_sizes[i]; i++) { | ||
1837 | ref_size = attr_file_abi_sizes[i] | ||
1838 | + sizeof(struct perf_file_section); | ||
1839 | if (hdr_sz != ref_size) { | ||
1840 | attr_size = bswap_64(hdr_sz); | ||
1841 | if (attr_size != ref_size) | ||
1842 | continue; | ||
1843 | |||
1844 | ph->needs_swap = true; | ||
1845 | } | ||
1846 | pr_debug("ABI%d perf.data file detected, need_swap=%d\n", | ||
1847 | i, | ||
1848 | ph->needs_swap); | ||
1849 | return 0; | ||
1850 | } | ||
1851 | /* could not determine endianness */ | ||
1852 | return -1; | ||
1853 | } | ||
1854 | |||
1855 | #define PERF_PIPE_HDR_VER0 16 | ||
1856 | |||
1857 | static const size_t attr_pipe_abi_sizes[] = { | ||
1858 | [0] = PERF_PIPE_HDR_VER0, | ||
1859 | 0, | ||
1860 | }; | ||
1861 | |||
1862 | /* | ||
1863 | * In the legacy pipe format, there is an implicit assumption that endiannesss | ||
1864 | * between host recording the samples, and host parsing the samples is the | ||
1865 | * same. This is not always the case given that the pipe output may always be | ||
1866 | * redirected into a file and analyzed on a different machine with possibly a | ||
1867 | * different endianness and perf_event ABI revsions in the perf tool itself. | ||
1868 | */ | ||
1869 | static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph) | ||
1870 | { | ||
1871 | u64 attr_size; | ||
1872 | int i; | ||
1873 | |||
1874 | for (i = 0 ; attr_pipe_abi_sizes[i]; i++) { | ||
1875 | if (hdr_sz != attr_pipe_abi_sizes[i]) { | ||
1876 | attr_size = bswap_64(hdr_sz); | ||
1877 | if (attr_size != hdr_sz) | ||
1878 | continue; | ||
1879 | |||
1880 | ph->needs_swap = true; | ||
1881 | } | ||
1882 | pr_debug("Pipe ABI%d perf.data file detected\n", i); | ||
1883 | return 0; | ||
1884 | } | ||
1885 | return -1; | ||
1886 | } | ||
1887 | |||
1888 | static int check_magic_endian(u64 magic, uint64_t hdr_sz, | ||
1889 | bool is_pipe, struct perf_header *ph) | ||
1890 | { | ||
1891 | int ret; | ||
1892 | |||
1893 | /* check for legacy format */ | ||
1894 | ret = memcmp(&magic, __perf_magic1, sizeof(magic)); | ||
1895 | if (ret == 0) { | ||
1896 | pr_debug("legacy perf.data format\n"); | ||
1897 | if (is_pipe) | ||
1898 | return try_all_pipe_abis(hdr_sz, ph); | ||
1899 | |||
1900 | return try_all_file_abis(hdr_sz, ph); | ||
1901 | } | ||
1902 | /* | ||
1903 | * the new magic number serves two purposes: | ||
1904 | * - unique number to identify actual perf.data files | ||
1905 | * - encode endianness of file | ||
1906 | */ | ||
1907 | |||
1908 | /* check magic number with one endianness */ | ||
1909 | if (magic == __perf_magic2) | ||
1910 | return 0; | ||
1911 | |||
1912 | /* check magic number with opposite endianness */ | ||
1913 | if (magic != __perf_magic2_sw) | ||
1914 | return -1; | ||
1915 | |||
1916 | ph->needs_swap = true; | ||
1917 | |||
1918 | return 0; | ||
1919 | } | ||
1920 | |||
1623 | int perf_file_header__read(struct perf_file_header *header, | 1921 | int perf_file_header__read(struct perf_file_header *header, |
1624 | struct perf_header *ph, int fd) | 1922 | struct perf_header *ph, int fd) |
1625 | { | 1923 | { |
1924 | int ret; | ||
1925 | |||
1626 | lseek(fd, 0, SEEK_SET); | 1926 | lseek(fd, 0, SEEK_SET); |
1627 | 1927 | ||
1628 | if (readn(fd, header, sizeof(*header)) <= 0 || | 1928 | ret = readn(fd, header, sizeof(*header)); |
1629 | memcmp(&header->magic, __perf_magic, sizeof(header->magic))) | 1929 | if (ret <= 0) |
1630 | return -1; | 1930 | return -1; |
1631 | 1931 | ||
1632 | if (header->attr_size != sizeof(struct perf_file_attr)) { | 1932 | if (check_magic_endian(header->magic, |
1633 | u64 attr_size = bswap_64(header->attr_size); | 1933 | header->attr_size, false, ph) < 0) { |
1634 | 1934 | pr_debug("magic/endian check failed\n"); | |
1635 | if (attr_size != sizeof(struct perf_file_attr)) | 1935 | return -1; |
1636 | return -1; | 1936 | } |
1637 | 1937 | ||
1938 | if (ph->needs_swap) { | ||
1638 | mem_bswap_64(header, offsetof(struct perf_file_header, | 1939 | mem_bswap_64(header, offsetof(struct perf_file_header, |
1639 | adds_features)); | 1940 | adds_features)); |
1640 | ph->needs_swap = true; | ||
1641 | } | 1941 | } |
1642 | 1942 | ||
1643 | if (header->size != sizeof(*header)) { | 1943 | if (header->size != sizeof(*header)) { |
@@ -1689,156 +1989,6 @@ int perf_file_header__read(struct perf_file_header *header, | |||
1689 | return 0; | 1989 | return 0; |
1690 | } | 1990 | } |
1691 | 1991 | ||
1692 | static int __event_process_build_id(struct build_id_event *bev, | ||
1693 | char *filename, | ||
1694 | struct perf_session *session) | ||
1695 | { | ||
1696 | int err = -1; | ||
1697 | struct list_head *head; | ||
1698 | struct machine *machine; | ||
1699 | u16 misc; | ||
1700 | struct dso *dso; | ||
1701 | enum dso_kernel_type dso_type; | ||
1702 | |||
1703 | machine = perf_session__findnew_machine(session, bev->pid); | ||
1704 | if (!machine) | ||
1705 | goto out; | ||
1706 | |||
1707 | misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | ||
1708 | |||
1709 | switch (misc) { | ||
1710 | case PERF_RECORD_MISC_KERNEL: | ||
1711 | dso_type = DSO_TYPE_KERNEL; | ||
1712 | head = &machine->kernel_dsos; | ||
1713 | break; | ||
1714 | case PERF_RECORD_MISC_GUEST_KERNEL: | ||
1715 | dso_type = DSO_TYPE_GUEST_KERNEL; | ||
1716 | head = &machine->kernel_dsos; | ||
1717 | break; | ||
1718 | case PERF_RECORD_MISC_USER: | ||
1719 | case PERF_RECORD_MISC_GUEST_USER: | ||
1720 | dso_type = DSO_TYPE_USER; | ||
1721 | head = &machine->user_dsos; | ||
1722 | break; | ||
1723 | default: | ||
1724 | goto out; | ||
1725 | } | ||
1726 | |||
1727 | dso = __dsos__findnew(head, filename); | ||
1728 | if (dso != NULL) { | ||
1729 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; | ||
1730 | |||
1731 | dso__set_build_id(dso, &bev->build_id); | ||
1732 | |||
1733 | if (filename[0] == '[') | ||
1734 | dso->kernel = dso_type; | ||
1735 | |||
1736 | build_id__sprintf(dso->build_id, sizeof(dso->build_id), | ||
1737 | sbuild_id); | ||
1738 | pr_debug("build id event received for %s: %s\n", | ||
1739 | dso->long_name, sbuild_id); | ||
1740 | } | ||
1741 | |||
1742 | err = 0; | ||
1743 | out: | ||
1744 | return err; | ||
1745 | } | ||
1746 | |||
1747 | static int perf_header__read_build_ids_abi_quirk(struct perf_header *header, | ||
1748 | int input, u64 offset, u64 size) | ||
1749 | { | ||
1750 | struct perf_session *session = container_of(header, struct perf_session, header); | ||
1751 | struct { | ||
1752 | struct perf_event_header header; | ||
1753 | u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; | ||
1754 | char filename[0]; | ||
1755 | } old_bev; | ||
1756 | struct build_id_event bev; | ||
1757 | char filename[PATH_MAX]; | ||
1758 | u64 limit = offset + size; | ||
1759 | |||
1760 | while (offset < limit) { | ||
1761 | ssize_t len; | ||
1762 | |||
1763 | if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev)) | ||
1764 | return -1; | ||
1765 | |||
1766 | if (header->needs_swap) | ||
1767 | perf_event_header__bswap(&old_bev.header); | ||
1768 | |||
1769 | len = old_bev.header.size - sizeof(old_bev); | ||
1770 | if (read(input, filename, len) != len) | ||
1771 | return -1; | ||
1772 | |||
1773 | bev.header = old_bev.header; | ||
1774 | |||
1775 | /* | ||
1776 | * As the pid is the missing value, we need to fill | ||
1777 | * it properly. The header.misc value give us nice hint. | ||
1778 | */ | ||
1779 | bev.pid = HOST_KERNEL_ID; | ||
1780 | if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER || | ||
1781 | bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL) | ||
1782 | bev.pid = DEFAULT_GUEST_KERNEL_ID; | ||
1783 | |||
1784 | memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id)); | ||
1785 | __event_process_build_id(&bev, filename, session); | ||
1786 | |||
1787 | offset += bev.header.size; | ||
1788 | } | ||
1789 | |||
1790 | return 0; | ||
1791 | } | ||
1792 | |||
1793 | static int perf_header__read_build_ids(struct perf_header *header, | ||
1794 | int input, u64 offset, u64 size) | ||
1795 | { | ||
1796 | struct perf_session *session = container_of(header, struct perf_session, header); | ||
1797 | struct build_id_event bev; | ||
1798 | char filename[PATH_MAX]; | ||
1799 | u64 limit = offset + size, orig_offset = offset; | ||
1800 | int err = -1; | ||
1801 | |||
1802 | while (offset < limit) { | ||
1803 | ssize_t len; | ||
1804 | |||
1805 | if (read(input, &bev, sizeof(bev)) != sizeof(bev)) | ||
1806 | goto out; | ||
1807 | |||
1808 | if (header->needs_swap) | ||
1809 | perf_event_header__bswap(&bev.header); | ||
1810 | |||
1811 | len = bev.header.size - sizeof(bev); | ||
1812 | if (read(input, filename, len) != len) | ||
1813 | goto out; | ||
1814 | /* | ||
1815 | * The a1645ce1 changeset: | ||
1816 | * | ||
1817 | * "perf: 'perf kvm' tool for monitoring guest performance from host" | ||
1818 | * | ||
1819 | * Added a field to struct build_id_event that broke the file | ||
1820 | * format. | ||
1821 | * | ||
1822 | * Since the kernel build-id is the first entry, process the | ||
1823 | * table using the old format if the well known | ||
1824 | * '[kernel.kallsyms]' string for the kernel build-id has the | ||
1825 | * first 4 characters chopped off (where the pid_t sits). | ||
1826 | */ | ||
1827 | if (memcmp(filename, "nel.kallsyms]", 13) == 0) { | ||
1828 | if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1) | ||
1829 | return -1; | ||
1830 | return perf_header__read_build_ids_abi_quirk(header, input, offset, size); | ||
1831 | } | ||
1832 | |||
1833 | __event_process_build_id(&bev, filename, session); | ||
1834 | |||
1835 | offset += bev.header.size; | ||
1836 | } | ||
1837 | err = 0; | ||
1838 | out: | ||
1839 | return err; | ||
1840 | } | ||
1841 | |||
1842 | static int perf_file_section__process(struct perf_file_section *section, | 1992 | static int perf_file_section__process(struct perf_file_section *section, |
1843 | struct perf_header *ph, | 1993 | struct perf_header *ph, |
1844 | int feat, int fd, void *data __used) | 1994 | int feat, int fd, void *data __used) |
@@ -1854,40 +2004,32 @@ static int perf_file_section__process(struct perf_file_section *section, | |||
1854 | return 0; | 2004 | return 0; |
1855 | } | 2005 | } |
1856 | 2006 | ||
1857 | switch (feat) { | 2007 | if (!feat_ops[feat].process) |
1858 | case HEADER_TRACE_INFO: | 2008 | return 0; |
1859 | trace_report(fd, false); | ||
1860 | break; | ||
1861 | case HEADER_BUILD_ID: | ||
1862 | if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) | ||
1863 | pr_debug("Failed to read buildids, continuing...\n"); | ||
1864 | break; | ||
1865 | default: | ||
1866 | break; | ||
1867 | } | ||
1868 | 2009 | ||
1869 | return 0; | 2010 | return feat_ops[feat].process(section, ph, feat, fd); |
1870 | } | 2011 | } |
1871 | 2012 | ||
1872 | static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, | 2013 | static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, |
1873 | struct perf_header *ph, int fd, | 2014 | struct perf_header *ph, int fd, |
1874 | bool repipe) | 2015 | bool repipe) |
1875 | { | 2016 | { |
1876 | if (readn(fd, header, sizeof(*header)) <= 0 || | 2017 | int ret; |
1877 | memcmp(&header->magic, __perf_magic, sizeof(header->magic))) | ||
1878 | return -1; | ||
1879 | 2018 | ||
1880 | if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) | 2019 | ret = readn(fd, header, sizeof(*header)); |
2020 | if (ret <= 0) | ||
1881 | return -1; | 2021 | return -1; |
1882 | 2022 | ||
1883 | if (header->size != sizeof(*header)) { | 2023 | if (check_magic_endian(header->magic, header->size, true, ph) < 0) { |
1884 | u64 size = bswap_64(header->size); | 2024 | pr_debug("endian/magic failed\n"); |
2025 | return -1; | ||
2026 | } | ||
1885 | 2027 | ||
1886 | if (size != sizeof(*header)) | 2028 | if (ph->needs_swap) |
1887 | return -1; | 2029 | header->size = bswap_64(header->size); |
1888 | 2030 | ||
1889 | ph->needs_swap = true; | 2031 | if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) |
1890 | } | 2032 | return -1; |
1891 | 2033 | ||
1892 | return 0; | 2034 | return 0; |
1893 | } | 2035 | } |
@@ -1908,6 +2050,52 @@ static int perf_header__read_pipe(struct perf_session *session, int fd) | |||
1908 | return 0; | 2050 | return 0; |
1909 | } | 2051 | } |
1910 | 2052 | ||
2053 | static int read_attr(int fd, struct perf_header *ph, | ||
2054 | struct perf_file_attr *f_attr) | ||
2055 | { | ||
2056 | struct perf_event_attr *attr = &f_attr->attr; | ||
2057 | size_t sz, left; | ||
2058 | size_t our_sz = sizeof(f_attr->attr); | ||
2059 | int ret; | ||
2060 | |||
2061 | memset(f_attr, 0, sizeof(*f_attr)); | ||
2062 | |||
2063 | /* read minimal guaranteed structure */ | ||
2064 | ret = readn(fd, attr, PERF_ATTR_SIZE_VER0); | ||
2065 | if (ret <= 0) { | ||
2066 | pr_debug("cannot read %d bytes of header attr\n", | ||
2067 | PERF_ATTR_SIZE_VER0); | ||
2068 | return -1; | ||
2069 | } | ||
2070 | |||
2071 | /* on file perf_event_attr size */ | ||
2072 | sz = attr->size; | ||
2073 | |||
2074 | if (ph->needs_swap) | ||
2075 | sz = bswap_32(sz); | ||
2076 | |||
2077 | if (sz == 0) { | ||
2078 | /* assume ABI0 */ | ||
2079 | sz = PERF_ATTR_SIZE_VER0; | ||
2080 | } else if (sz > our_sz) { | ||
2081 | pr_debug("file uses a more recent and unsupported ABI" | ||
2082 | " (%zu bytes extra)\n", sz - our_sz); | ||
2083 | return -1; | ||
2084 | } | ||
2085 | /* what we have not yet read and that we know about */ | ||
2086 | left = sz - PERF_ATTR_SIZE_VER0; | ||
2087 | if (left) { | ||
2088 | void *ptr = attr; | ||
2089 | ptr += PERF_ATTR_SIZE_VER0; | ||
2090 | |||
2091 | ret = readn(fd, ptr, left); | ||
2092 | } | ||
2093 | /* read perf_file_section, ids are read in caller */ | ||
2094 | ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids)); | ||
2095 | |||
2096 | return ret <= 0 ? -1 : 0; | ||
2097 | } | ||
2098 | |||
1911 | int perf_session__read_header(struct perf_session *session, int fd) | 2099 | int perf_session__read_header(struct perf_session *session, int fd) |
1912 | { | 2100 | { |
1913 | struct perf_header *header = &session->header; | 2101 | struct perf_header *header = &session->header; |
@@ -1923,19 +2111,17 @@ int perf_session__read_header(struct perf_session *session, int fd) | |||
1923 | if (session->fd_pipe) | 2111 | if (session->fd_pipe) |
1924 | return perf_header__read_pipe(session, fd); | 2112 | return perf_header__read_pipe(session, fd); |
1925 | 2113 | ||
1926 | if (perf_file_header__read(&f_header, header, fd) < 0) { | 2114 | if (perf_file_header__read(&f_header, header, fd) < 0) |
1927 | pr_debug("incompatible file format\n"); | ||
1928 | return -EINVAL; | 2115 | return -EINVAL; |
1929 | } | ||
1930 | 2116 | ||
1931 | nr_attrs = f_header.attrs.size / sizeof(f_attr); | 2117 | nr_attrs = f_header.attrs.size / f_header.attr_size; |
1932 | lseek(fd, f_header.attrs.offset, SEEK_SET); | 2118 | lseek(fd, f_header.attrs.offset, SEEK_SET); |
1933 | 2119 | ||
1934 | for (i = 0; i < nr_attrs; i++) { | 2120 | for (i = 0; i < nr_attrs; i++) { |
1935 | struct perf_evsel *evsel; | 2121 | struct perf_evsel *evsel; |
1936 | off_t tmp; | 2122 | off_t tmp; |
1937 | 2123 | ||
1938 | if (readn(fd, &f_attr, sizeof(f_attr)) <= 0) | 2124 | if (read_attr(fd, header, &f_attr) < 0) |
1939 | goto out_errno; | 2125 | goto out_errno; |
1940 | 2126 | ||
1941 | if (header->needs_swap) | 2127 | if (header->needs_swap) |
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index ac4ec956024e..21a6be09c129 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | enum { | 12 | enum { |
13 | HEADER_RESERVED = 0, /* always cleared */ | 13 | HEADER_RESERVED = 0, /* always cleared */ |
14 | HEADER_FIRST_FEATURE = 1, | ||
14 | HEADER_TRACE_INFO = 1, | 15 | HEADER_TRACE_INFO = 1, |
15 | HEADER_BUILD_ID, | 16 | HEADER_BUILD_ID, |
16 | 17 | ||
@@ -26,7 +27,7 @@ enum { | |||
26 | HEADER_EVENT_DESC, | 27 | HEADER_EVENT_DESC, |
27 | HEADER_CPU_TOPOLOGY, | 28 | HEADER_CPU_TOPOLOGY, |
28 | HEADER_NUMA_TOPOLOGY, | 29 | HEADER_NUMA_TOPOLOGY, |
29 | 30 | HEADER_BRANCH_STACK, | |
30 | HEADER_LAST_FEATURE, | 31 | HEADER_LAST_FEATURE, |
31 | HEADER_FEAT_BITS = 256, | 32 | HEADER_FEAT_BITS = 256, |
32 | }; | 33 | }; |
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e11e482bd185..3dc99a9b71f5 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
@@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists) | |||
50 | hists__set_col_len(hists, col, 0); | 50 | hists__set_col_len(hists, col, 0); |
51 | } | 51 | } |
52 | 52 | ||
53 | static void hists__set_unres_dso_col_len(struct hists *hists, int dso) | ||
54 | { | ||
55 | const unsigned int unresolved_col_width = BITS_PER_LONG / 4; | ||
56 | |||
57 | if (hists__col_len(hists, dso) < unresolved_col_width && | ||
58 | !symbol_conf.col_width_list_str && !symbol_conf.field_sep && | ||
59 | !symbol_conf.dso_list) | ||
60 | hists__set_col_len(hists, dso, unresolved_col_width); | ||
61 | } | ||
62 | |||
53 | static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) | 63 | static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) |
54 | { | 64 | { |
65 | const unsigned int unresolved_col_width = BITS_PER_LONG / 4; | ||
55 | u16 len; | 66 | u16 len; |
56 | 67 | ||
57 | if (h->ms.sym) | 68 | if (h->ms.sym) |
58 | hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen); | 69 | hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4); |
59 | else { | 70 | else |
60 | const unsigned int unresolved_col_width = BITS_PER_LONG / 4; | 71 | hists__set_unres_dso_col_len(hists, HISTC_DSO); |
61 | |||
62 | if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width && | ||
63 | !symbol_conf.col_width_list_str && !symbol_conf.field_sep && | ||
64 | !symbol_conf.dso_list) | ||
65 | hists__set_col_len(hists, HISTC_DSO, | ||
66 | unresolved_col_width); | ||
67 | } | ||
68 | 72 | ||
69 | len = thread__comm_len(h->thread); | 73 | len = thread__comm_len(h->thread); |
70 | if (hists__new_col_len(hists, HISTC_COMM, len)) | 74 | if (hists__new_col_len(hists, HISTC_COMM, len)) |
@@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) | |||
74 | len = dso__name_len(h->ms.map->dso); | 78 | len = dso__name_len(h->ms.map->dso); |
75 | hists__new_col_len(hists, HISTC_DSO, len); | 79 | hists__new_col_len(hists, HISTC_DSO, len); |
76 | } | 80 | } |
81 | |||
82 | if (h->branch_info) { | ||
83 | int symlen; | ||
84 | /* | ||
85 | * +4 accounts for '[x] ' priv level info | ||
86 | * +2 account of 0x prefix on raw addresses | ||
87 | */ | ||
88 | if (h->branch_info->from.sym) { | ||
89 | symlen = (int)h->branch_info->from.sym->namelen + 4; | ||
90 | hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); | ||
91 | |||
92 | symlen = dso__name_len(h->branch_info->from.map->dso); | ||
93 | hists__new_col_len(hists, HISTC_DSO_FROM, symlen); | ||
94 | } else { | ||
95 | symlen = unresolved_col_width + 4 + 2; | ||
96 | hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); | ||
97 | hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM); | ||
98 | } | ||
99 | |||
100 | if (h->branch_info->to.sym) { | ||
101 | symlen = (int)h->branch_info->to.sym->namelen + 4; | ||
102 | hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); | ||
103 | |||
104 | symlen = dso__name_len(h->branch_info->to.map->dso); | ||
105 | hists__new_col_len(hists, HISTC_DSO_TO, symlen); | ||
106 | } else { | ||
107 | symlen = unresolved_col_width + 4 + 2; | ||
108 | hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); | ||
109 | hists__set_unres_dso_col_len(hists, HISTC_DSO_TO); | ||
110 | } | ||
111 | } | ||
77 | } | 112 | } |
78 | 113 | ||
79 | static void hist_entry__add_cpumode_period(struct hist_entry *he, | 114 | static void hist_entry__add_cpumode_period(struct hist_entry *he, |
@@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent) | |||
195 | return 0; | 230 | return 0; |
196 | } | 231 | } |
197 | 232 | ||
198 | struct hist_entry *__hists__add_entry(struct hists *hists, | 233 | static struct hist_entry *add_hist_entry(struct hists *hists, |
234 | struct hist_entry *entry, | ||
199 | struct addr_location *al, | 235 | struct addr_location *al, |
200 | struct symbol *sym_parent, u64 period) | 236 | u64 period) |
201 | { | 237 | { |
202 | struct rb_node **p; | 238 | struct rb_node **p; |
203 | struct rb_node *parent = NULL; | 239 | struct rb_node *parent = NULL; |
204 | struct hist_entry *he; | 240 | struct hist_entry *he; |
205 | struct hist_entry entry = { | ||
206 | .thread = al->thread, | ||
207 | .ms = { | ||
208 | .map = al->map, | ||
209 | .sym = al->sym, | ||
210 | }, | ||
211 | .cpu = al->cpu, | ||
212 | .ip = al->addr, | ||
213 | .level = al->level, | ||
214 | .period = period, | ||
215 | .parent = sym_parent, | ||
216 | .filtered = symbol__parent_filter(sym_parent), | ||
217 | }; | ||
218 | int cmp; | 241 | int cmp; |
219 | 242 | ||
220 | pthread_mutex_lock(&hists->lock); | 243 | pthread_mutex_lock(&hists->lock); |
@@ -225,7 +248,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, | |||
225 | parent = *p; | 248 | parent = *p; |
226 | he = rb_entry(parent, struct hist_entry, rb_node_in); | 249 | he = rb_entry(parent, struct hist_entry, rb_node_in); |
227 | 250 | ||
228 | cmp = hist_entry__cmp(&entry, he); | 251 | cmp = hist_entry__cmp(entry, he); |
229 | 252 | ||
230 | if (!cmp) { | 253 | if (!cmp) { |
231 | he->period += period; | 254 | he->period += period; |
@@ -239,7 +262,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, | |||
239 | p = &(*p)->rb_right; | 262 | p = &(*p)->rb_right; |
240 | } | 263 | } |
241 | 264 | ||
242 | he = hist_entry__new(&entry); | 265 | he = hist_entry__new(entry); |
243 | if (!he) | 266 | if (!he) |
244 | goto out_unlock; | 267 | goto out_unlock; |
245 | 268 | ||
@@ -252,6 +275,51 @@ out_unlock: | |||
252 | return he; | 275 | return he; |
253 | } | 276 | } |
254 | 277 | ||
278 | struct hist_entry *__hists__add_branch_entry(struct hists *self, | ||
279 | struct addr_location *al, | ||
280 | struct symbol *sym_parent, | ||
281 | struct branch_info *bi, | ||
282 | u64 period) | ||
283 | { | ||
284 | struct hist_entry entry = { | ||
285 | .thread = al->thread, | ||
286 | .ms = { | ||
287 | .map = bi->to.map, | ||
288 | .sym = bi->to.sym, | ||
289 | }, | ||
290 | .cpu = al->cpu, | ||
291 | .ip = bi->to.addr, | ||
292 | .level = al->level, | ||
293 | .period = period, | ||
294 | .parent = sym_parent, | ||
295 | .filtered = symbol__parent_filter(sym_parent), | ||
296 | .branch_info = bi, | ||
297 | }; | ||
298 | |||
299 | return add_hist_entry(self, &entry, al, period); | ||
300 | } | ||
301 | |||
302 | struct hist_entry *__hists__add_entry(struct hists *self, | ||
303 | struct addr_location *al, | ||
304 | struct symbol *sym_parent, u64 period) | ||
305 | { | ||
306 | struct hist_entry entry = { | ||
307 | .thread = al->thread, | ||
308 | .ms = { | ||
309 | .map = al->map, | ||
310 | .sym = al->sym, | ||
311 | }, | ||
312 | .cpu = al->cpu, | ||
313 | .ip = al->addr, | ||
314 | .level = al->level, | ||
315 | .period = period, | ||
316 | .parent = sym_parent, | ||
317 | .filtered = symbol__parent_filter(sym_parent), | ||
318 | }; | ||
319 | |||
320 | return add_hist_entry(self, &entry, al, period); | ||
321 | } | ||
322 | |||
255 | int64_t | 323 | int64_t |
256 | hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) | 324 | hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) |
257 | { | 325 | { |
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f55f0a8d1f81..9413f3e31fea 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h | |||
@@ -32,6 +32,7 @@ struct events_stats { | |||
32 | u32 nr_unknown_events; | 32 | u32 nr_unknown_events; |
33 | u32 nr_invalid_chains; | 33 | u32 nr_invalid_chains; |
34 | u32 nr_unknown_id; | 34 | u32 nr_unknown_id; |
35 | u32 nr_unprocessable_samples; | ||
35 | }; | 36 | }; |
36 | 37 | ||
37 | enum hist_column { | 38 | enum hist_column { |
@@ -41,6 +42,11 @@ enum hist_column { | |||
41 | HISTC_COMM, | 42 | HISTC_COMM, |
42 | HISTC_PARENT, | 43 | HISTC_PARENT, |
43 | HISTC_CPU, | 44 | HISTC_CPU, |
45 | HISTC_MISPREDICT, | ||
46 | HISTC_SYMBOL_FROM, | ||
47 | HISTC_SYMBOL_TO, | ||
48 | HISTC_DSO_FROM, | ||
49 | HISTC_DSO_TO, | ||
44 | HISTC_NR_COLS, /* Last entry */ | 50 | HISTC_NR_COLS, /* Last entry */ |
45 | }; | 51 | }; |
46 | 52 | ||
@@ -55,6 +61,7 @@ struct hists { | |||
55 | u64 nr_entries; | 61 | u64 nr_entries; |
56 | const struct thread *thread_filter; | 62 | const struct thread *thread_filter; |
57 | const struct dso *dso_filter; | 63 | const struct dso *dso_filter; |
64 | const char *uid_filter_str; | ||
58 | pthread_mutex_t lock; | 65 | pthread_mutex_t lock; |
59 | struct events_stats stats; | 66 | struct events_stats stats; |
60 | u64 event_stream; | 67 | u64 event_stream; |
@@ -72,6 +79,12 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, | |||
72 | struct hists *hists); | 79 | struct hists *hists); |
73 | void hist_entry__free(struct hist_entry *); | 80 | void hist_entry__free(struct hist_entry *); |
74 | 81 | ||
82 | struct hist_entry *__hists__add_branch_entry(struct hists *self, | ||
83 | struct addr_location *al, | ||
84 | struct symbol *sym_parent, | ||
85 | struct branch_info *bi, | ||
86 | u64 period); | ||
87 | |||
75 | void hists__output_resort(struct hists *self); | 88 | void hists__output_resort(struct hists *self); |
76 | void hists__output_resort_threaded(struct hists *hists); | 89 | void hists__output_resort_threaded(struct hists *hists); |
77 | void hists__collapse_resort(struct hists *self); | 90 | void hists__collapse_resort(struct hists *self); |
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h index bb4198e7837a..afe38199e922 100644 --- a/tools/perf/util/include/asm/dwarf2.h +++ b/tools/perf/util/include/asm/dwarf2.h | |||
@@ -2,10 +2,12 @@ | |||
2 | #ifndef PERF_DWARF2_H | 2 | #ifndef PERF_DWARF2_H |
3 | #define PERF_DWARF2_H | 3 | #define PERF_DWARF2_H |
4 | 4 | ||
5 | /* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ | 5 | /* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */ |
6 | 6 | ||
7 | #define CFI_STARTPROC | 7 | #define CFI_STARTPROC |
8 | #define CFI_ENDPROC | 8 | #define CFI_ENDPROC |
9 | #define CFI_REMEMBER_STATE | ||
10 | #define CFI_RESTORE_STATE | ||
9 | 11 | ||
10 | #endif /* PERF_DWARF2_H */ | 12 | #endif /* PERF_DWARF2_H */ |
11 | 13 | ||
diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h index eda4416efa0a..bb162e40c76c 100644 --- a/tools/perf/util/include/linux/bitmap.h +++ b/tools/perf/util/include/linux/bitmap.h | |||
@@ -5,6 +5,8 @@ | |||
5 | #include <linux/bitops.h> | 5 | #include <linux/bitops.h> |
6 | 6 | ||
7 | int __bitmap_weight(const unsigned long *bitmap, int bits); | 7 | int __bitmap_weight(const unsigned long *bitmap, int bits); |
8 | void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, | ||
9 | const unsigned long *bitmap2, int bits); | ||
8 | 10 | ||
9 | #define BITMAP_LAST_WORD_MASK(nbits) \ | 11 | #define BITMAP_LAST_WORD_MASK(nbits) \ |
10 | ( \ | 12 | ( \ |
@@ -32,4 +34,13 @@ static inline int bitmap_weight(const unsigned long *src, int nbits) | |||
32 | return __bitmap_weight(src, nbits); | 34 | return __bitmap_weight(src, nbits); |
33 | } | 35 | } |
34 | 36 | ||
37 | static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, | ||
38 | const unsigned long *src2, int nbits) | ||
39 | { | ||
40 | if (small_const_nbits(nbits)) | ||
41 | *dst = *src1 | *src2; | ||
42 | else | ||
43 | __bitmap_or(dst, src1, src2, nbits); | ||
44 | } | ||
45 | |||
35 | #endif /* _PERF_BITOPS_H */ | 46 | #endif /* _PERF_BITOPS_H */ |
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 316aa0ab7122..dea6d1c1a954 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c | |||
@@ -212,6 +212,21 @@ size_t map__fprintf(struct map *self, FILE *fp) | |||
212 | self->start, self->end, self->pgoff, self->dso->name); | 212 | self->start, self->end, self->pgoff, self->dso->name); |
213 | } | 213 | } |
214 | 214 | ||
215 | size_t map__fprintf_dsoname(struct map *map, FILE *fp) | ||
216 | { | ||
217 | const char *dsoname; | ||
218 | |||
219 | if (map && map->dso && (map->dso->name || map->dso->long_name)) { | ||
220 | if (symbol_conf.show_kernel_path && map->dso->long_name) | ||
221 | dsoname = map->dso->long_name; | ||
222 | else if (map->dso->name) | ||
223 | dsoname = map->dso->name; | ||
224 | } else | ||
225 | dsoname = "[unknown]"; | ||
226 | |||
227 | return fprintf(fp, "%s", dsoname); | ||
228 | } | ||
229 | |||
215 | /* | 230 | /* |
216 | * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN. | 231 | * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN. |
217 | * map->dso->adjust_symbols==1 for ET_EXEC-like cases. | 232 | * map->dso->adjust_symbols==1 for ET_EXEC-like cases. |
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 2b8017f8a930..b100c20b7f94 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h | |||
@@ -118,6 +118,7 @@ void map__delete(struct map *self); | |||
118 | struct map *map__clone(struct map *self); | 118 | struct map *map__clone(struct map *self); |
119 | int map__overlap(struct map *l, struct map *r); | 119 | int map__overlap(struct map *l, struct map *r); |
120 | size_t map__fprintf(struct map *self, FILE *fp); | 120 | size_t map__fprintf(struct map *self, FILE *fp); |
121 | size_t map__fprintf_dsoname(struct map *map, FILE *fp); | ||
121 | 122 | ||
122 | int map__load(struct map *self, symbol_filter_t filter); | 123 | int map__load(struct map *self, symbol_filter_t filter); |
123 | struct symbol *map__find_symbol(struct map *self, | 124 | struct symbol *map__find_symbol(struct map *self, |
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e33554a562b3..8a8ee64e72d1 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c | |||
@@ -34,7 +34,6 @@ | |||
34 | 34 | ||
35 | #include "util.h" | 35 | #include "util.h" |
36 | #include "event.h" | 36 | #include "event.h" |
37 | #include "string.h" | ||
38 | #include "strlist.h" | 37 | #include "strlist.h" |
39 | #include "debug.h" | 38 | #include "debug.h" |
40 | #include "cache.h" | 39 | #include "cache.h" |
@@ -273,10 +272,10 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs, | |||
273 | /* Try to find perf_probe_event with debuginfo */ | 272 | /* Try to find perf_probe_event with debuginfo */ |
274 | static int try_to_find_probe_trace_events(struct perf_probe_event *pev, | 273 | static int try_to_find_probe_trace_events(struct perf_probe_event *pev, |
275 | struct probe_trace_event **tevs, | 274 | struct probe_trace_event **tevs, |
276 | int max_tevs, const char *module) | 275 | int max_tevs, const char *target) |
277 | { | 276 | { |
278 | bool need_dwarf = perf_probe_event_need_dwarf(pev); | 277 | bool need_dwarf = perf_probe_event_need_dwarf(pev); |
279 | struct debuginfo *dinfo = open_debuginfo(module); | 278 | struct debuginfo *dinfo = open_debuginfo(target); |
280 | int ntevs, ret = 0; | 279 | int ntevs, ret = 0; |
281 | 280 | ||
282 | if (!dinfo) { | 281 | if (!dinfo) { |
@@ -295,9 +294,9 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, | |||
295 | 294 | ||
296 | if (ntevs > 0) { /* Succeeded to find trace events */ | 295 | if (ntevs > 0) { /* Succeeded to find trace events */ |
297 | pr_debug("find %d probe_trace_events.\n", ntevs); | 296 | pr_debug("find %d probe_trace_events.\n", ntevs); |
298 | if (module) | 297 | if (target) |
299 | ret = add_module_to_probe_trace_events(*tevs, ntevs, | 298 | ret = add_module_to_probe_trace_events(*tevs, ntevs, |
300 | module); | 299 | target); |
301 | return ret < 0 ? ret : ntevs; | 300 | return ret < 0 ? ret : ntevs; |
302 | } | 301 | } |
303 | 302 | ||
@@ -1729,7 +1728,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, | |||
1729 | } | 1728 | } |
1730 | 1729 | ||
1731 | ret = 0; | 1730 | ret = 0; |
1732 | printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":"); | 1731 | printf("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); |
1733 | for (i = 0; i < ntevs; i++) { | 1732 | for (i = 0; i < ntevs; i++) { |
1734 | tev = &tevs[i]; | 1733 | tev = &tevs[i]; |
1735 | if (pev->event) | 1734 | if (pev->event) |
@@ -1784,7 +1783,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, | |||
1784 | 1783 | ||
1785 | if (ret >= 0) { | 1784 | if (ret >= 0) { |
1786 | /* Show how to use the event. */ | 1785 | /* Show how to use the event. */ |
1787 | printf("\nYou can now use it on all perf tools, such as:\n\n"); | 1786 | printf("\nYou can now use it in all perf tools, such as:\n\n"); |
1788 | printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, | 1787 | printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, |
1789 | tev->event); | 1788 | tev->event); |
1790 | } | 1789 | } |
@@ -1796,14 +1795,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, | |||
1796 | 1795 | ||
1797 | static int convert_to_probe_trace_events(struct perf_probe_event *pev, | 1796 | static int convert_to_probe_trace_events(struct perf_probe_event *pev, |
1798 | struct probe_trace_event **tevs, | 1797 | struct probe_trace_event **tevs, |
1799 | int max_tevs, const char *module) | 1798 | int max_tevs, const char *target) |
1800 | { | 1799 | { |
1801 | struct symbol *sym; | 1800 | struct symbol *sym; |
1802 | int ret = 0, i; | 1801 | int ret = 0, i; |
1803 | struct probe_trace_event *tev; | 1802 | struct probe_trace_event *tev; |
1804 | 1803 | ||
1805 | /* Convert perf_probe_event with debuginfo */ | 1804 | /* Convert perf_probe_event with debuginfo */ |
1806 | ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module); | 1805 | ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target); |
1807 | if (ret != 0) | 1806 | if (ret != 0) |
1808 | return ret; /* Found in debuginfo or got an error */ | 1807 | return ret; /* Found in debuginfo or got an error */ |
1809 | 1808 | ||
@@ -1819,8 +1818,8 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, | |||
1819 | goto error; | 1818 | goto error; |
1820 | } | 1819 | } |
1821 | 1820 | ||
1822 | if (module) { | 1821 | if (target) { |
1823 | tev->point.module = strdup(module); | 1822 | tev->point.module = strdup(target); |
1824 | if (tev->point.module == NULL) { | 1823 | if (tev->point.module == NULL) { |
1825 | ret = -ENOMEM; | 1824 | ret = -ENOMEM; |
1826 | goto error; | 1825 | goto error; |
@@ -1890,7 +1889,7 @@ struct __event_package { | |||
1890 | }; | 1889 | }; |
1891 | 1890 | ||
1892 | int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, | 1891 | int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, |
1893 | int max_tevs, const char *module, bool force_add) | 1892 | int max_tevs, const char *target, bool force_add) |
1894 | { | 1893 | { |
1895 | int i, j, ret; | 1894 | int i, j, ret; |
1896 | struct __event_package *pkgs; | 1895 | struct __event_package *pkgs; |
@@ -1913,7 +1912,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, | |||
1913 | ret = convert_to_probe_trace_events(pkgs[i].pev, | 1912 | ret = convert_to_probe_trace_events(pkgs[i].pev, |
1914 | &pkgs[i].tevs, | 1913 | &pkgs[i].tevs, |
1915 | max_tevs, | 1914 | max_tevs, |
1916 | module); | 1915 | target); |
1917 | if (ret < 0) | 1916 | if (ret < 0) |
1918 | goto end; | 1917 | goto end; |
1919 | pkgs[i].ntevs = ret; | 1918 | pkgs[i].ntevs = ret; |
@@ -1965,7 +1964,7 @@ static int __del_trace_probe_event(int fd, struct str_node *ent) | |||
1965 | goto error; | 1964 | goto error; |
1966 | } | 1965 | } |
1967 | 1966 | ||
1968 | printf("Remove event: %s\n", ent->s); | 1967 | printf("Removed event: %s\n", ent->s); |
1969 | return 0; | 1968 | return 0; |
1970 | error: | 1969 | error: |
1971 | pr_warning("Failed to delete event: %s\n", strerror(-ret)); | 1970 | pr_warning("Failed to delete event: %s\n", strerror(-ret)); |
@@ -2069,7 +2068,7 @@ static int filter_available_functions(struct map *map __unused, | |||
2069 | return 1; | 2068 | return 1; |
2070 | } | 2069 | } |
2071 | 2070 | ||
2072 | int show_available_funcs(const char *module, struct strfilter *_filter) | 2071 | int show_available_funcs(const char *target, struct strfilter *_filter) |
2073 | { | 2072 | { |
2074 | struct map *map; | 2073 | struct map *map; |
2075 | int ret; | 2074 | int ret; |
@@ -2080,9 +2079,9 @@ int show_available_funcs(const char *module, struct strfilter *_filter) | |||
2080 | if (ret < 0) | 2079 | if (ret < 0) |
2081 | return ret; | 2080 | return ret; |
2082 | 2081 | ||
2083 | map = kernel_get_module_map(module); | 2082 | map = kernel_get_module_map(target); |
2084 | if (!map) { | 2083 | if (!map) { |
2085 | pr_err("Failed to find %s map.\n", (module) ? : "kernel"); | 2084 | pr_err("Failed to find %s map.\n", (target) ? : "kernel"); |
2086 | return -EINVAL; | 2085 | return -EINVAL; |
2087 | } | 2086 | } |
2088 | available_func_filter = _filter; | 2087 | available_func_filter = _filter; |
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 74bd2e63c4b4..2cc162d3b78c 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <stdlib.h> | 30 | #include <stdlib.h> |
31 | #include <string.h> | 31 | #include <string.h> |
32 | #include <stdarg.h> | 32 | #include <stdarg.h> |
33 | #include <ctype.h> | ||
34 | #include <dwarf-regs.h> | 33 | #include <dwarf-regs.h> |
35 | 34 | ||
36 | #include <linux/bitops.h> | 35 | #include <linux/bitops.h> |
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources new file mode 100644 index 000000000000..2884e67ee625 --- /dev/null +++ b/tools/perf/util/python-ext-sources | |||
@@ -0,0 +1,19 @@ | |||
1 | # | ||
2 | # List of files needed by perf python extention | ||
3 | # | ||
4 | # Each source file must be placed on its own line so that it can be | ||
5 | # processed by Makefile and util/setup.py accordingly. | ||
6 | # | ||
7 | |||
8 | util/python.c | ||
9 | util/ctype.c | ||
10 | util/evlist.c | ||
11 | util/evsel.c | ||
12 | util/cpumap.c | ||
13 | util/thread_map.c | ||
14 | util/util.c | ||
15 | util/xyarray.c | ||
16 | util/cgroup.c | ||
17 | util/debugfs.c | ||
18 | util/strlist.c | ||
19 | ../../lib/rbtree.c | ||
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 9dd47a4f2596..e03b58a48424 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c | |||
@@ -425,14 +425,14 @@ struct pyrf_thread_map { | |||
425 | static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, | 425 | static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, |
426 | PyObject *args, PyObject *kwargs) | 426 | PyObject *args, PyObject *kwargs) |
427 | { | 427 | { |
428 | static char *kwlist[] = { "pid", "tid", NULL }; | 428 | static char *kwlist[] = { "pid", "tid", "uid", NULL }; |
429 | int pid = -1, tid = -1; | 429 | int pid = -1, tid = -1, uid = UINT_MAX; |
430 | 430 | ||
431 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", | 431 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii", |
432 | kwlist, &pid, &tid)) | 432 | kwlist, &pid, &tid, &uid)) |
433 | return -1; | 433 | return -1; |
434 | 434 | ||
435 | pthreads->threads = thread_map__new(pid, tid); | 435 | pthreads->threads = thread_map__new(pid, tid, uid); |
436 | if (pthreads->threads == NULL) | 436 | if (pthreads->threads == NULL) |
437 | return -1; | 437 | return -1; |
438 | return 0; | 438 | return 0; |
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 0b2a48783172..c2623c6f9b51 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <stdio.h> | 24 | #include <stdio.h> |
25 | #include <stdlib.h> | 25 | #include <stdlib.h> |
26 | #include <string.h> | 26 | #include <string.h> |
27 | #include <ctype.h> | ||
28 | #include <errno.h> | 27 | #include <errno.h> |
29 | 28 | ||
30 | #include "../../perf.h" | 29 | #include "../../perf.h" |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index b5ca2558c7bb..002ebbf59f48 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
@@ -24,7 +24,7 @@ static int perf_session__open(struct perf_session *self, bool force) | |||
24 | self->fd = STDIN_FILENO; | 24 | self->fd = STDIN_FILENO; |
25 | 25 | ||
26 | if (perf_session__read_header(self, self->fd) < 0) | 26 | if (perf_session__read_header(self, self->fd) < 0) |
27 | pr_err("incompatible file format"); | 27 | pr_err("incompatible file format (rerun with -v to learn more)"); |
28 | 28 | ||
29 | return 0; | 29 | return 0; |
30 | } | 30 | } |
@@ -56,7 +56,7 @@ static int perf_session__open(struct perf_session *self, bool force) | |||
56 | } | 56 | } |
57 | 57 | ||
58 | if (perf_session__read_header(self, self->fd) < 0) { | 58 | if (perf_session__read_header(self, self->fd) < 0) { |
59 | pr_err("incompatible file format"); | 59 | pr_err("incompatible file format (rerun with -v to learn more)"); |
60 | goto out_close; | 60 | goto out_close; |
61 | } | 61 | } |
62 | 62 | ||
@@ -229,6 +229,64 @@ static bool symbol__match_parent_regex(struct symbol *sym) | |||
229 | return 0; | 229 | return 0; |
230 | } | 230 | } |
231 | 231 | ||
232 | static const u8 cpumodes[] = { | ||
233 | PERF_RECORD_MISC_USER, | ||
234 | PERF_RECORD_MISC_KERNEL, | ||
235 | PERF_RECORD_MISC_GUEST_USER, | ||
236 | PERF_RECORD_MISC_GUEST_KERNEL | ||
237 | }; | ||
238 | #define NCPUMODES (sizeof(cpumodes)/sizeof(u8)) | ||
239 | |||
240 | static void ip__resolve_ams(struct machine *self, struct thread *thread, | ||
241 | struct addr_map_symbol *ams, | ||
242 | u64 ip) | ||
243 | { | ||
244 | struct addr_location al; | ||
245 | size_t i; | ||
246 | u8 m; | ||
247 | |||
248 | memset(&al, 0, sizeof(al)); | ||
249 | |||
250 | for (i = 0; i < NCPUMODES; i++) { | ||
251 | m = cpumodes[i]; | ||
252 | /* | ||
253 | * We cannot use the header.misc hint to determine whether a | ||
254 | * branch stack address is user, kernel, guest, hypervisor. | ||
255 | * Branches may straddle the kernel/user/hypervisor boundaries. | ||
256 | * Thus, we have to try consecutively until we find a match | ||
257 | * or else, the symbol is unknown | ||
258 | */ | ||
259 | thread__find_addr_location(thread, self, m, MAP__FUNCTION, | ||
260 | ip, &al, NULL); | ||
261 | if (al.sym) | ||
262 | goto found; | ||
263 | } | ||
264 | found: | ||
265 | ams->addr = ip; | ||
266 | ams->al_addr = al.addr; | ||
267 | ams->sym = al.sym; | ||
268 | ams->map = al.map; | ||
269 | } | ||
270 | |||
271 | struct branch_info *machine__resolve_bstack(struct machine *self, | ||
272 | struct thread *thr, | ||
273 | struct branch_stack *bs) | ||
274 | { | ||
275 | struct branch_info *bi; | ||
276 | unsigned int i; | ||
277 | |||
278 | bi = calloc(bs->nr, sizeof(struct branch_info)); | ||
279 | if (!bi) | ||
280 | return NULL; | ||
281 | |||
282 | for (i = 0; i < bs->nr; i++) { | ||
283 | ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to); | ||
284 | ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from); | ||
285 | bi[i].flags = bs->entries[i].flags; | ||
286 | } | ||
287 | return bi; | ||
288 | } | ||
289 | |||
232 | int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, | 290 | int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, |
233 | struct thread *thread, | 291 | struct thread *thread, |
234 | struct ip_callchain *chain, | 292 | struct ip_callchain *chain, |
@@ -697,6 +755,18 @@ static void callchain__printf(struct perf_sample *sample) | |||
697 | i, sample->callchain->ips[i]); | 755 | i, sample->callchain->ips[i]); |
698 | } | 756 | } |
699 | 757 | ||
758 | static void branch_stack__printf(struct perf_sample *sample) | ||
759 | { | ||
760 | uint64_t i; | ||
761 | |||
762 | printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); | ||
763 | |||
764 | for (i = 0; i < sample->branch_stack->nr; i++) | ||
765 | printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", | ||
766 | i, sample->branch_stack->entries[i].from, | ||
767 | sample->branch_stack->entries[i].to); | ||
768 | } | ||
769 | |||
700 | static void perf_session__print_tstamp(struct perf_session *session, | 770 | static void perf_session__print_tstamp(struct perf_session *session, |
701 | union perf_event *event, | 771 | union perf_event *event, |
702 | struct perf_sample *sample) | 772 | struct perf_sample *sample) |
@@ -744,6 +814,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event, | |||
744 | 814 | ||
745 | if (session->sample_type & PERF_SAMPLE_CALLCHAIN) | 815 | if (session->sample_type & PERF_SAMPLE_CALLCHAIN) |
746 | callchain__printf(sample); | 816 | callchain__printf(sample); |
817 | |||
818 | if (session->sample_type & PERF_SAMPLE_BRANCH_STACK) | ||
819 | branch_stack__printf(sample); | ||
747 | } | 820 | } |
748 | 821 | ||
749 | static struct machine * | 822 | static struct machine * |
@@ -796,6 +869,10 @@ static int perf_session_deliver_event(struct perf_session *session, | |||
796 | ++session->hists.stats.nr_unknown_id; | 869 | ++session->hists.stats.nr_unknown_id; |
797 | return -1; | 870 | return -1; |
798 | } | 871 | } |
872 | if (machine == NULL) { | ||
873 | ++session->hists.stats.nr_unprocessable_samples; | ||
874 | return -1; | ||
875 | } | ||
799 | return tool->sample(tool, event, sample, evsel, machine); | 876 | return tool->sample(tool, event, sample, evsel, machine); |
800 | case PERF_RECORD_MMAP: | 877 | case PERF_RECORD_MMAP: |
801 | return tool->mmap(tool, event, sample, machine); | 878 | return tool->mmap(tool, event, sample, machine); |
@@ -964,6 +1041,12 @@ static void perf_session__warn_about_errors(const struct perf_session *session, | |||
964 | session->hists.stats.nr_invalid_chains, | 1041 | session->hists.stats.nr_invalid_chains, |
965 | session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); | 1042 | session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); |
966 | } | 1043 | } |
1044 | |||
1045 | if (session->hists.stats.nr_unprocessable_samples != 0) { | ||
1046 | ui__warning("%u unprocessable samples recorded.\n" | ||
1047 | "Do you have a KVM guest running and not using 'perf kvm'?\n", | ||
1048 | session->hists.stats.nr_unprocessable_samples); | ||
1049 | } | ||
967 | } | 1050 | } |
968 | 1051 | ||
969 | #define session_done() (*(volatile int *)(&session_done)) | 1052 | #define session_done() (*(volatile int *)(&session_done)) |
@@ -1293,10 +1376,9 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, | |||
1293 | 1376 | ||
1294 | void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | 1377 | void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, |
1295 | struct machine *machine, struct perf_evsel *evsel, | 1378 | struct machine *machine, struct perf_evsel *evsel, |
1296 | int print_sym, int print_dso) | 1379 | int print_sym, int print_dso, int print_symoffset) |
1297 | { | 1380 | { |
1298 | struct addr_location al; | 1381 | struct addr_location al; |
1299 | const char *symname, *dsoname; | ||
1300 | struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; | 1382 | struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; |
1301 | struct callchain_cursor_node *node; | 1383 | struct callchain_cursor_node *node; |
1302 | 1384 | ||
@@ -1324,20 +1406,13 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | |||
1324 | 1406 | ||
1325 | printf("\t%16" PRIx64, node->ip); | 1407 | printf("\t%16" PRIx64, node->ip); |
1326 | if (print_sym) { | 1408 | if (print_sym) { |
1327 | if (node->sym && node->sym->name) | 1409 | printf(" "); |
1328 | symname = node->sym->name; | 1410 | symbol__fprintf_symname(node->sym, stdout); |
1329 | else | ||
1330 | symname = ""; | ||
1331 | |||
1332 | printf(" %s", symname); | ||
1333 | } | 1411 | } |
1334 | if (print_dso) { | 1412 | if (print_dso) { |
1335 | if (node->map && node->map->dso && node->map->dso->name) | 1413 | printf(" ("); |
1336 | dsoname = node->map->dso->name; | 1414 | map__fprintf_dsoname(al.map, stdout); |
1337 | else | 1415 | printf(")"); |
1338 | dsoname = ""; | ||
1339 | |||
1340 | printf(" (%s)", dsoname); | ||
1341 | } | 1416 | } |
1342 | printf("\n"); | 1417 | printf("\n"); |
1343 | 1418 | ||
@@ -1347,21 +1422,18 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | |||
1347 | } else { | 1422 | } else { |
1348 | printf("%16" PRIx64, sample->ip); | 1423 | printf("%16" PRIx64, sample->ip); |
1349 | if (print_sym) { | 1424 | if (print_sym) { |
1350 | if (al.sym && al.sym->name) | 1425 | printf(" "); |
1351 | symname = al.sym->name; | 1426 | if (print_symoffset) |
1427 | symbol__fprintf_symname_offs(al.sym, &al, | ||
1428 | stdout); | ||
1352 | else | 1429 | else |
1353 | symname = ""; | 1430 | symbol__fprintf_symname(al.sym, stdout); |
1354 | |||
1355 | printf(" %s", symname); | ||
1356 | } | 1431 | } |
1357 | 1432 | ||
1358 | if (print_dso) { | 1433 | if (print_dso) { |
1359 | if (al.map && al.map->dso && al.map->dso->name) | 1434 | printf(" ("); |
1360 | dsoname = al.map->dso->name; | 1435 | map__fprintf_dsoname(al.map, stdout); |
1361 | else | 1436 | printf(")"); |
1362 | dsoname = ""; | ||
1363 | |||
1364 | printf(" (%s)", dsoname); | ||
1365 | } | 1437 | } |
1366 | } | 1438 | } |
1367 | } | 1439 | } |
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 37bc38381fb6..7a5434c00565 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h | |||
@@ -73,6 +73,10 @@ int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel | |||
73 | struct ip_callchain *chain, | 73 | struct ip_callchain *chain, |
74 | struct symbol **parent); | 74 | struct symbol **parent); |
75 | 75 | ||
76 | struct branch_info *machine__resolve_bstack(struct machine *self, | ||
77 | struct thread *thread, | ||
78 | struct branch_stack *bs); | ||
79 | |||
76 | bool perf_session__has_traces(struct perf_session *self, const char *msg); | 80 | bool perf_session__has_traces(struct perf_session *self, const char *msg); |
77 | 81 | ||
78 | void mem_bswap_64(void *src, int byte_size); | 82 | void mem_bswap_64(void *src, int byte_size); |
@@ -147,7 +151,7 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, | |||
147 | 151 | ||
148 | void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | 152 | void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, |
149 | struct machine *machine, struct perf_evsel *evsel, | 153 | struct machine *machine, struct perf_evsel *evsel, |
150 | int print_sym, int print_dso); | 154 | int print_sym, int print_dso, int print_symoffset); |
151 | 155 | ||
152 | int perf_session__cpu_bitmap(struct perf_session *session, | 156 | int perf_session__cpu_bitmap(struct perf_session *session, |
153 | const char *cpu_list, unsigned long *cpu_bitmap); | 157 | const char *cpu_list, unsigned long *cpu_bitmap); |
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 36d4c5619575..d0f9f29cf181 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py | |||
@@ -24,11 +24,11 @@ cflags += getenv('CFLAGS', '').split() | |||
24 | build_lib = getenv('PYTHON_EXTBUILD_LIB') | 24 | build_lib = getenv('PYTHON_EXTBUILD_LIB') |
25 | build_tmp = getenv('PYTHON_EXTBUILD_TMP') | 25 | build_tmp = getenv('PYTHON_EXTBUILD_TMP') |
26 | 26 | ||
27 | ext_sources = [f.strip() for f in file('util/python-ext-sources') | ||
28 | if len(f.strip()) > 0 and f[0] != '#'] | ||
29 | |||
27 | perf = Extension('perf', | 30 | perf = Extension('perf', |
28 | sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c', | 31 | sources = ext_sources, |
29 | 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c', | ||
30 | 'util/util.c', 'util/xyarray.c', 'util/cgroup.c', | ||
31 | 'util/debugfs.c'], | ||
32 | include_dirs = ['util/include'], | 32 | include_dirs = ['util/include'], |
33 | extra_compile_args = cflags, | 33 | extra_compile_args = cflags, |
34 | ) | 34 | ) |
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 076c9d4e1ea4..a27237430c5f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c | |||
@@ -8,6 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol"; | |||
8 | const char *sort_order = default_sort_order; | 8 | const char *sort_order = default_sort_order; |
9 | int sort__need_collapse = 0; | 9 | int sort__need_collapse = 0; |
10 | int sort__has_parent = 0; | 10 | int sort__has_parent = 0; |
11 | int sort__branch_mode = -1; /* -1 = means not set */ | ||
11 | 12 | ||
12 | enum sort_type sort__first_dimension; | 13 | enum sort_type sort__first_dimension; |
13 | 14 | ||
@@ -97,6 +98,26 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf, | |||
97 | return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); | 98 | return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); |
98 | } | 99 | } |
99 | 100 | ||
101 | static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r) | ||
102 | { | ||
103 | struct dso *dso_l = map_l ? map_l->dso : NULL; | ||
104 | struct dso *dso_r = map_r ? map_r->dso : NULL; | ||
105 | const char *dso_name_l, *dso_name_r; | ||
106 | |||
107 | if (!dso_l || !dso_r) | ||
108 | return cmp_null(dso_l, dso_r); | ||
109 | |||
110 | if (verbose) { | ||
111 | dso_name_l = dso_l->long_name; | ||
112 | dso_name_r = dso_r->long_name; | ||
113 | } else { | ||
114 | dso_name_l = dso_l->short_name; | ||
115 | dso_name_r = dso_r->short_name; | ||
116 | } | ||
117 | |||
118 | return strcmp(dso_name_l, dso_name_r); | ||
119 | } | ||
120 | |||
100 | struct sort_entry sort_comm = { | 121 | struct sort_entry sort_comm = { |
101 | .se_header = "Command", | 122 | .se_header = "Command", |
102 | .se_cmp = sort__comm_cmp, | 123 | .se_cmp = sort__comm_cmp, |
@@ -110,36 +131,74 @@ struct sort_entry sort_comm = { | |||
110 | static int64_t | 131 | static int64_t |
111 | sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) | 132 | sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) |
112 | { | 133 | { |
113 | struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL; | 134 | return _sort__dso_cmp(left->ms.map, right->ms.map); |
114 | struct dso *dso_r = right->ms.map ? right->ms.map->dso : NULL; | 135 | } |
115 | const char *dso_name_l, *dso_name_r; | ||
116 | 136 | ||
117 | if (!dso_l || !dso_r) | ||
118 | return cmp_null(dso_l, dso_r); | ||
119 | 137 | ||
120 | if (verbose) { | 138 | static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r, |
121 | dso_name_l = dso_l->long_name; | 139 | u64 ip_l, u64 ip_r) |
122 | dso_name_r = dso_r->long_name; | 140 | { |
123 | } else { | 141 | if (!sym_l || !sym_r) |
124 | dso_name_l = dso_l->short_name; | 142 | return cmp_null(sym_l, sym_r); |
125 | dso_name_r = dso_r->short_name; | 143 | |
144 | if (sym_l == sym_r) | ||
145 | return 0; | ||
146 | |||
147 | if (sym_l) | ||
148 | ip_l = sym_l->start; | ||
149 | if (sym_r) | ||
150 | ip_r = sym_r->start; | ||
151 | |||
152 | return (int64_t)(ip_r - ip_l); | ||
153 | } | ||
154 | |||
155 | static int _hist_entry__dso_snprintf(struct map *map, char *bf, | ||
156 | size_t size, unsigned int width) | ||
157 | { | ||
158 | if (map && map->dso) { | ||
159 | const char *dso_name = !verbose ? map->dso->short_name : | ||
160 | map->dso->long_name; | ||
161 | return repsep_snprintf(bf, size, "%-*s", width, dso_name); | ||
126 | } | 162 | } |
127 | 163 | ||
128 | return strcmp(dso_name_l, dso_name_r); | 164 | return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); |
129 | } | 165 | } |
130 | 166 | ||
131 | static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, | 167 | static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, |
132 | size_t size, unsigned int width) | 168 | size_t size, unsigned int width) |
133 | { | 169 | { |
134 | if (self->ms.map && self->ms.map->dso) { | 170 | return _hist_entry__dso_snprintf(self->ms.map, bf, size, width); |
135 | const char *dso_name = !verbose ? self->ms.map->dso->short_name : | 171 | } |
136 | self->ms.map->dso->long_name; | 172 | |
137 | return repsep_snprintf(bf, size, "%-*s", width, dso_name); | 173 | static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, |
174 | u64 ip, char level, char *bf, size_t size, | ||
175 | unsigned int width __used) | ||
176 | { | ||
177 | size_t ret = 0; | ||
178 | |||
179 | if (verbose) { | ||
180 | char o = map ? dso__symtab_origin(map->dso) : '!'; | ||
181 | ret += repsep_snprintf(bf, size, "%-#*llx %c ", | ||
182 | BITS_PER_LONG / 4, ip, o); | ||
138 | } | 183 | } |
139 | 184 | ||
140 | return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); | 185 | ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); |
186 | if (sym) | ||
187 | ret += repsep_snprintf(bf + ret, size - ret, "%-*s", | ||
188 | width - ret, | ||
189 | sym->name); | ||
190 | else { | ||
191 | size_t len = BITS_PER_LONG / 4; | ||
192 | ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", | ||
193 | len, ip); | ||
194 | ret += repsep_snprintf(bf + ret, size - ret, "%-*s", | ||
195 | width - ret, ""); | ||
196 | } | ||
197 | |||
198 | return ret; | ||
141 | } | 199 | } |
142 | 200 | ||
201 | |||
143 | struct sort_entry sort_dso = { | 202 | struct sort_entry sort_dso = { |
144 | .se_header = "Shared Object", | 203 | .se_header = "Shared Object", |
145 | .se_cmp = sort__dso_cmp, | 204 | .se_cmp = sort__dso_cmp, |
@@ -147,8 +206,14 @@ struct sort_entry sort_dso = { | |||
147 | .se_width_idx = HISTC_DSO, | 206 | .se_width_idx = HISTC_DSO, |
148 | }; | 207 | }; |
149 | 208 | ||
150 | /* --sort symbol */ | 209 | static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, |
210 | size_t size, unsigned int width __used) | ||
211 | { | ||
212 | return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip, | ||
213 | self->level, bf, size, width); | ||
214 | } | ||
151 | 215 | ||
216 | /* --sort symbol */ | ||
152 | static int64_t | 217 | static int64_t |
153 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) | 218 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) |
154 | { | 219 | { |
@@ -166,31 +231,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) | |||
166 | ip_l = left->ms.sym->start; | 231 | ip_l = left->ms.sym->start; |
167 | ip_r = right->ms.sym->start; | 232 | ip_r = right->ms.sym->start; |
168 | 233 | ||
169 | return (int64_t)(ip_r - ip_l); | 234 | return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r); |
170 | } | ||
171 | |||
172 | static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, | ||
173 | size_t size, unsigned int width __used) | ||
174 | { | ||
175 | size_t ret = 0; | ||
176 | |||
177 | if (verbose) { | ||
178 | char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; | ||
179 | ret += repsep_snprintf(bf, size, "%-#*llx %c ", | ||
180 | BITS_PER_LONG / 4, self->ip, o); | ||
181 | } | ||
182 | |||
183 | if (!sort_dso.elide) | ||
184 | ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level); | ||
185 | |||
186 | if (self->ms.sym) | ||
187 | ret += repsep_snprintf(bf + ret, size - ret, "%s", | ||
188 | self->ms.sym->name); | ||
189 | else | ||
190 | ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx", | ||
191 | BITS_PER_LONG / 4, self->ip); | ||
192 | |||
193 | return ret; | ||
194 | } | 235 | } |
195 | 236 | ||
196 | struct sort_entry sort_sym = { | 237 | struct sort_entry sort_sym = { |
@@ -249,19 +290,155 @@ struct sort_entry sort_cpu = { | |||
249 | .se_width_idx = HISTC_CPU, | 290 | .se_width_idx = HISTC_CPU, |
250 | }; | 291 | }; |
251 | 292 | ||
293 | static int64_t | ||
294 | sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right) | ||
295 | { | ||
296 | return _sort__dso_cmp(left->branch_info->from.map, | ||
297 | right->branch_info->from.map); | ||
298 | } | ||
299 | |||
300 | static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf, | ||
301 | size_t size, unsigned int width) | ||
302 | { | ||
303 | return _hist_entry__dso_snprintf(self->branch_info->from.map, | ||
304 | bf, size, width); | ||
305 | } | ||
306 | |||
307 | struct sort_entry sort_dso_from = { | ||
308 | .se_header = "Source Shared Object", | ||
309 | .se_cmp = sort__dso_from_cmp, | ||
310 | .se_snprintf = hist_entry__dso_from_snprintf, | ||
311 | .se_width_idx = HISTC_DSO_FROM, | ||
312 | }; | ||
313 | |||
314 | static int64_t | ||
315 | sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) | ||
316 | { | ||
317 | return _sort__dso_cmp(left->branch_info->to.map, | ||
318 | right->branch_info->to.map); | ||
319 | } | ||
320 | |||
321 | static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf, | ||
322 | size_t size, unsigned int width) | ||
323 | { | ||
324 | return _hist_entry__dso_snprintf(self->branch_info->to.map, | ||
325 | bf, size, width); | ||
326 | } | ||
327 | |||
328 | static int64_t | ||
329 | sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) | ||
330 | { | ||
331 | struct addr_map_symbol *from_l = &left->branch_info->from; | ||
332 | struct addr_map_symbol *from_r = &right->branch_info->from; | ||
333 | |||
334 | if (!from_l->sym && !from_r->sym) | ||
335 | return right->level - left->level; | ||
336 | |||
337 | return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr, | ||
338 | from_r->addr); | ||
339 | } | ||
340 | |||
341 | static int64_t | ||
342 | sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right) | ||
343 | { | ||
344 | struct addr_map_symbol *to_l = &left->branch_info->to; | ||
345 | struct addr_map_symbol *to_r = &right->branch_info->to; | ||
346 | |||
347 | if (!to_l->sym && !to_r->sym) | ||
348 | return right->level - left->level; | ||
349 | |||
350 | return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr); | ||
351 | } | ||
352 | |||
353 | static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, | ||
354 | size_t size, unsigned int width __used) | ||
355 | { | ||
356 | struct addr_map_symbol *from = &self->branch_info->from; | ||
357 | return _hist_entry__sym_snprintf(from->map, from->sym, from->addr, | ||
358 | self->level, bf, size, width); | ||
359 | |||
360 | } | ||
361 | |||
362 | static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, | ||
363 | size_t size, unsigned int width __used) | ||
364 | { | ||
365 | struct addr_map_symbol *to = &self->branch_info->to; | ||
366 | return _hist_entry__sym_snprintf(to->map, to->sym, to->addr, | ||
367 | self->level, bf, size, width); | ||
368 | |||
369 | } | ||
370 | |||
371 | struct sort_entry sort_dso_to = { | ||
372 | .se_header = "Target Shared Object", | ||
373 | .se_cmp = sort__dso_to_cmp, | ||
374 | .se_snprintf = hist_entry__dso_to_snprintf, | ||
375 | .se_width_idx = HISTC_DSO_TO, | ||
376 | }; | ||
377 | |||
378 | struct sort_entry sort_sym_from = { | ||
379 | .se_header = "Source Symbol", | ||
380 | .se_cmp = sort__sym_from_cmp, | ||
381 | .se_snprintf = hist_entry__sym_from_snprintf, | ||
382 | .se_width_idx = HISTC_SYMBOL_FROM, | ||
383 | }; | ||
384 | |||
385 | struct sort_entry sort_sym_to = { | ||
386 | .se_header = "Target Symbol", | ||
387 | .se_cmp = sort__sym_to_cmp, | ||
388 | .se_snprintf = hist_entry__sym_to_snprintf, | ||
389 | .se_width_idx = HISTC_SYMBOL_TO, | ||
390 | }; | ||
391 | |||
392 | static int64_t | ||
393 | sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right) | ||
394 | { | ||
395 | const unsigned char mp = left->branch_info->flags.mispred != | ||
396 | right->branch_info->flags.mispred; | ||
397 | const unsigned char p = left->branch_info->flags.predicted != | ||
398 | right->branch_info->flags.predicted; | ||
399 | |||
400 | return mp || p; | ||
401 | } | ||
402 | |||
403 | static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf, | ||
404 | size_t size, unsigned int width){ | ||
405 | static const char *out = "N/A"; | ||
406 | |||
407 | if (self->branch_info->flags.predicted) | ||
408 | out = "N"; | ||
409 | else if (self->branch_info->flags.mispred) | ||
410 | out = "Y"; | ||
411 | |||
412 | return repsep_snprintf(bf, size, "%-*s", width, out); | ||
413 | } | ||
414 | |||
415 | struct sort_entry sort_mispredict = { | ||
416 | .se_header = "Branch Mispredicted", | ||
417 | .se_cmp = sort__mispredict_cmp, | ||
418 | .se_snprintf = hist_entry__mispredict_snprintf, | ||
419 | .se_width_idx = HISTC_MISPREDICT, | ||
420 | }; | ||
421 | |||
252 | struct sort_dimension { | 422 | struct sort_dimension { |
253 | const char *name; | 423 | const char *name; |
254 | struct sort_entry *entry; | 424 | struct sort_entry *entry; |
255 | int taken; | 425 | int taken; |
256 | }; | 426 | }; |
257 | 427 | ||
428 | #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } | ||
429 | |||
258 | static struct sort_dimension sort_dimensions[] = { | 430 | static struct sort_dimension sort_dimensions[] = { |
259 | { .name = "pid", .entry = &sort_thread, }, | 431 | DIM(SORT_PID, "pid", sort_thread), |
260 | { .name = "comm", .entry = &sort_comm, }, | 432 | DIM(SORT_COMM, "comm", sort_comm), |
261 | { .name = "dso", .entry = &sort_dso, }, | 433 | DIM(SORT_DSO, "dso", sort_dso), |
262 | { .name = "symbol", .entry = &sort_sym, }, | 434 | DIM(SORT_DSO_FROM, "dso_from", sort_dso_from), |
263 | { .name = "parent", .entry = &sort_parent, }, | 435 | DIM(SORT_DSO_TO, "dso_to", sort_dso_to), |
264 | { .name = "cpu", .entry = &sort_cpu, }, | 436 | DIM(SORT_SYM, "symbol", sort_sym), |
437 | DIM(SORT_SYM_TO, "symbol_from", sort_sym_from), | ||
438 | DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to), | ||
439 | DIM(SORT_PARENT, "parent", sort_parent), | ||
440 | DIM(SORT_CPU, "cpu", sort_cpu), | ||
441 | DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), | ||
265 | }; | 442 | }; |
266 | 443 | ||
267 | int sort_dimension__add(const char *tok) | 444 | int sort_dimension__add(const char *tok) |
@@ -273,7 +450,6 @@ int sort_dimension__add(const char *tok) | |||
273 | 450 | ||
274 | if (strncasecmp(tok, sd->name, strlen(tok))) | 451 | if (strncasecmp(tok, sd->name, strlen(tok))) |
275 | continue; | 452 | continue; |
276 | |||
277 | if (sd->entry == &sort_parent) { | 453 | if (sd->entry == &sort_parent) { |
278 | int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); | 454 | int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); |
279 | if (ret) { | 455 | if (ret) { |
@@ -305,6 +481,16 @@ int sort_dimension__add(const char *tok) | |||
305 | sort__first_dimension = SORT_PARENT; | 481 | sort__first_dimension = SORT_PARENT; |
306 | else if (!strcmp(sd->name, "cpu")) | 482 | else if (!strcmp(sd->name, "cpu")) |
307 | sort__first_dimension = SORT_CPU; | 483 | sort__first_dimension = SORT_CPU; |
484 | else if (!strcmp(sd->name, "symbol_from")) | ||
485 | sort__first_dimension = SORT_SYM_FROM; | ||
486 | else if (!strcmp(sd->name, "symbol_to")) | ||
487 | sort__first_dimension = SORT_SYM_TO; | ||
488 | else if (!strcmp(sd->name, "dso_from")) | ||
489 | sort__first_dimension = SORT_DSO_FROM; | ||
490 | else if (!strcmp(sd->name, "dso_to")) | ||
491 | sort__first_dimension = SORT_DSO_TO; | ||
492 | else if (!strcmp(sd->name, "mispredict")) | ||
493 | sort__first_dimension = SORT_MISPREDICT; | ||
308 | } | 494 | } |
309 | 495 | ||
310 | list_add_tail(&sd->entry->list, &hist_entry__sort_list); | 496 | list_add_tail(&sd->entry->list, &hist_entry__sort_list); |
@@ -312,7 +498,6 @@ int sort_dimension__add(const char *tok) | |||
312 | 498 | ||
313 | return 0; | 499 | return 0; |
314 | } | 500 | } |
315 | |||
316 | return -ESRCH; | 501 | return -ESRCH; |
317 | } | 502 | } |
318 | 503 | ||
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 3f67ae395752..472aa5a63a58 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h | |||
@@ -31,11 +31,16 @@ extern const char *parent_pattern; | |||
31 | extern const char default_sort_order[]; | 31 | extern const char default_sort_order[]; |
32 | extern int sort__need_collapse; | 32 | extern int sort__need_collapse; |
33 | extern int sort__has_parent; | 33 | extern int sort__has_parent; |
34 | extern int sort__branch_mode; | ||
34 | extern char *field_sep; | 35 | extern char *field_sep; |
35 | extern struct sort_entry sort_comm; | 36 | extern struct sort_entry sort_comm; |
36 | extern struct sort_entry sort_dso; | 37 | extern struct sort_entry sort_dso; |
37 | extern struct sort_entry sort_sym; | 38 | extern struct sort_entry sort_sym; |
38 | extern struct sort_entry sort_parent; | 39 | extern struct sort_entry sort_parent; |
40 | extern struct sort_entry sort_dso_from; | ||
41 | extern struct sort_entry sort_dso_to; | ||
42 | extern struct sort_entry sort_sym_from; | ||
43 | extern struct sort_entry sort_sym_to; | ||
39 | extern enum sort_type sort__first_dimension; | 44 | extern enum sort_type sort__first_dimension; |
40 | 45 | ||
41 | /** | 46 | /** |
@@ -72,6 +77,7 @@ struct hist_entry { | |||
72 | struct hist_entry *pair; | 77 | struct hist_entry *pair; |
73 | struct rb_root sorted_chain; | 78 | struct rb_root sorted_chain; |
74 | }; | 79 | }; |
80 | struct branch_info *branch_info; | ||
75 | struct callchain_root callchain[0]; | 81 | struct callchain_root callchain[0]; |
76 | }; | 82 | }; |
77 | 83 | ||
@@ -82,6 +88,11 @@ enum sort_type { | |||
82 | SORT_SYM, | 88 | SORT_SYM, |
83 | SORT_PARENT, | 89 | SORT_PARENT, |
84 | SORT_CPU, | 90 | SORT_CPU, |
91 | SORT_DSO_FROM, | ||
92 | SORT_DSO_TO, | ||
93 | SORT_SYM_FROM, | ||
94 | SORT_SYM_TO, | ||
95 | SORT_MISPREDICT, | ||
85 | }; | 96 | }; |
86 | 97 | ||
87 | /* | 98 | /* |
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0975438c3e72..5dd83c3e2c0c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
@@ -1,4 +1,3 @@ | |||
1 | #include <ctype.h> | ||
2 | #include <dirent.h> | 1 | #include <dirent.h> |
3 | #include <errno.h> | 2 | #include <errno.h> |
4 | #include <libgen.h> | 3 | #include <libgen.h> |
@@ -12,6 +11,7 @@ | |||
12 | #include <unistd.h> | 11 | #include <unistd.h> |
13 | #include <inttypes.h> | 12 | #include <inttypes.h> |
14 | #include "build-id.h" | 13 | #include "build-id.h" |
14 | #include "util.h" | ||
15 | #include "debug.h" | 15 | #include "debug.h" |
16 | #include "symbol.h" | 16 | #include "symbol.h" |
17 | #include "strlist.h" | 17 | #include "strlist.h" |
@@ -263,6 +263,28 @@ static size_t symbol__fprintf(struct symbol *sym, FILE *fp) | |||
263 | sym->name); | 263 | sym->name); |
264 | } | 264 | } |
265 | 265 | ||
266 | size_t symbol__fprintf_symname_offs(const struct symbol *sym, | ||
267 | const struct addr_location *al, FILE *fp) | ||
268 | { | ||
269 | unsigned long offset; | ||
270 | size_t length; | ||
271 | |||
272 | if (sym && sym->name) { | ||
273 | length = fprintf(fp, "%s", sym->name); | ||
274 | if (al) { | ||
275 | offset = al->addr - sym->start; | ||
276 | length += fprintf(fp, "+0x%lx", offset); | ||
277 | } | ||
278 | return length; | ||
279 | } else | ||
280 | return fprintf(fp, "[unknown]"); | ||
281 | } | ||
282 | |||
283 | size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) | ||
284 | { | ||
285 | return symbol__fprintf_symname_offs(sym, NULL, fp); | ||
286 | } | ||
287 | |||
266 | void dso__set_long_name(struct dso *dso, char *name) | 288 | void dso__set_long_name(struct dso *dso, char *name) |
267 | { | 289 | { |
268 | if (name == NULL) | 290 | if (name == NULL) |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 123c2e14353e..ac49ef208a5f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <stdbool.h> | 5 | #include <stdbool.h> |
6 | #include <stdint.h> | 6 | #include <stdint.h> |
7 | #include "map.h" | 7 | #include "map.h" |
8 | #include "../perf.h" | ||
8 | #include <linux/list.h> | 9 | #include <linux/list.h> |
9 | #include <linux/rbtree.h> | 10 | #include <linux/rbtree.h> |
10 | #include <stdio.h> | 11 | #include <stdio.h> |
@@ -70,6 +71,7 @@ struct symbol_conf { | |||
70 | unsigned short priv_size; | 71 | unsigned short priv_size; |
71 | unsigned short nr_events; | 72 | unsigned short nr_events; |
72 | bool try_vmlinux_path, | 73 | bool try_vmlinux_path, |
74 | show_kernel_path, | ||
73 | use_modules, | 75 | use_modules, |
74 | sort_by_name, | 76 | sort_by_name, |
75 | show_nr_samples, | 77 | show_nr_samples, |
@@ -95,7 +97,11 @@ struct symbol_conf { | |||
95 | *col_width_list_str; | 97 | *col_width_list_str; |
96 | struct strlist *dso_list, | 98 | struct strlist *dso_list, |
97 | *comm_list, | 99 | *comm_list, |
98 | *sym_list; | 100 | *sym_list, |
101 | *dso_from_list, | ||
102 | *dso_to_list, | ||
103 | *sym_from_list, | ||
104 | *sym_to_list; | ||
99 | const char *symfs; | 105 | const char *symfs; |
100 | }; | 106 | }; |
101 | 107 | ||
@@ -119,6 +125,19 @@ struct map_symbol { | |||
119 | bool has_children; | 125 | bool has_children; |
120 | }; | 126 | }; |
121 | 127 | ||
128 | struct addr_map_symbol { | ||
129 | struct map *map; | ||
130 | struct symbol *sym; | ||
131 | u64 addr; | ||
132 | u64 al_addr; | ||
133 | }; | ||
134 | |||
135 | struct branch_info { | ||
136 | struct addr_map_symbol from; | ||
137 | struct addr_map_symbol to; | ||
138 | struct branch_flags flags; | ||
139 | }; | ||
140 | |||
122 | struct addr_location { | 141 | struct addr_location { |
123 | struct thread *thread; | 142 | struct thread *thread; |
124 | struct map *map; | 143 | struct map *map; |
@@ -241,6 +260,9 @@ void machines__destroy_guest_kernel_maps(struct rb_root *machines); | |||
241 | 260 | ||
242 | int symbol__init(void); | 261 | int symbol__init(void); |
243 | void symbol__exit(void); | 262 | void symbol__exit(void); |
263 | size_t symbol__fprintf_symname_offs(const struct symbol *sym, | ||
264 | const struct addr_location *al, FILE *fp); | ||
265 | size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); | ||
244 | bool symbol_type__is_a(char symbol_type, enum map_type map_type); | 266 | bool symbol_type__is_a(char symbol_type, enum map_type map_type); |
245 | 267 | ||
246 | size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); | 268 | size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); |
diff --git a/tools/perf/util/sysfs.c b/tools/perf/util/sysfs.c new file mode 100644 index 000000000000..48c6902e749f --- /dev/null +++ b/tools/perf/util/sysfs.c | |||
@@ -0,0 +1,60 @@ | |||
1 | |||
2 | #include "util.h" | ||
3 | #include "sysfs.h" | ||
4 | |||
5 | static const char * const sysfs_known_mountpoints[] = { | ||
6 | "/sys", | ||
7 | 0, | ||
8 | }; | ||
9 | |||
10 | static int sysfs_found; | ||
11 | char sysfs_mountpoint[PATH_MAX]; | ||
12 | |||
13 | static int sysfs_valid_mountpoint(const char *sysfs) | ||
14 | { | ||
15 | struct statfs st_fs; | ||
16 | |||
17 | if (statfs(sysfs, &st_fs) < 0) | ||
18 | return -ENOENT; | ||
19 | else if (st_fs.f_type != (long) SYSFS_MAGIC) | ||
20 | return -ENOENT; | ||
21 | |||
22 | return 0; | ||
23 | } | ||
24 | |||
25 | const char *sysfs_find_mountpoint(void) | ||
26 | { | ||
27 | const char * const *ptr; | ||
28 | char type[100]; | ||
29 | FILE *fp; | ||
30 | |||
31 | if (sysfs_found) | ||
32 | return (const char *) sysfs_mountpoint; | ||
33 | |||
34 | ptr = sysfs_known_mountpoints; | ||
35 | while (*ptr) { | ||
36 | if (sysfs_valid_mountpoint(*ptr) == 0) { | ||
37 | sysfs_found = 1; | ||
38 | strcpy(sysfs_mountpoint, *ptr); | ||
39 | return sysfs_mountpoint; | ||
40 | } | ||
41 | ptr++; | ||
42 | } | ||
43 | |||
44 | /* give up and parse /proc/mounts */ | ||
45 | fp = fopen("/proc/mounts", "r"); | ||
46 | if (fp == NULL) | ||
47 | return NULL; | ||
48 | |||
49 | while (!sysfs_found && | ||
50 | fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", | ||
51 | sysfs_mountpoint, type) == 2) { | ||
52 | |||
53 | if (strcmp(type, "sysfs") == 0) | ||
54 | sysfs_found = 1; | ||
55 | } | ||
56 | |||
57 | fclose(fp); | ||
58 | |||
59 | return sysfs_found ? sysfs_mountpoint : NULL; | ||
60 | } | ||
diff --git a/tools/perf/util/sysfs.h b/tools/perf/util/sysfs.h new file mode 100644 index 000000000000..a813b7203938 --- /dev/null +++ b/tools/perf/util/sysfs.h | |||
@@ -0,0 +1,6 @@ | |||
1 | #ifndef __SYSFS_H__ | ||
2 | #define __SYSFS_H__ | ||
3 | |||
4 | const char *sysfs_find_mountpoint(void); | ||
5 | |||
6 | #endif /* __DEBUGFS_H__ */ | ||
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index a5df131b77c3..84d9bd782004 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c | |||
@@ -1,6 +1,13 @@ | |||
1 | #include <dirent.h> | 1 | #include <dirent.h> |
2 | #include <limits.h> | ||
3 | #include <stdbool.h> | ||
2 | #include <stdlib.h> | 4 | #include <stdlib.h> |
3 | #include <stdio.h> | 5 | #include <stdio.h> |
6 | #include <sys/types.h> | ||
7 | #include <sys/stat.h> | ||
8 | #include <unistd.h> | ||
9 | #include "strlist.h" | ||
10 | #include <string.h> | ||
4 | #include "thread_map.h" | 11 | #include "thread_map.h" |
5 | 12 | ||
6 | /* Skip "." and ".." directories */ | 13 | /* Skip "." and ".." directories */ |
@@ -23,7 +30,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) | |||
23 | sprintf(name, "/proc/%d/task", pid); | 30 | sprintf(name, "/proc/%d/task", pid); |
24 | items = scandir(name, &namelist, filter, NULL); | 31 | items = scandir(name, &namelist, filter, NULL); |
25 | if (items <= 0) | 32 | if (items <= 0) |
26 | return NULL; | 33 | return NULL; |
27 | 34 | ||
28 | threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); | 35 | threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); |
29 | if (threads != NULL) { | 36 | if (threads != NULL) { |
@@ -51,14 +58,240 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) | |||
51 | return threads; | 58 | return threads; |
52 | } | 59 | } |
53 | 60 | ||
54 | struct thread_map *thread_map__new(pid_t pid, pid_t tid) | 61 | struct thread_map *thread_map__new_by_uid(uid_t uid) |
62 | { | ||
63 | DIR *proc; | ||
64 | int max_threads = 32, items, i; | ||
65 | char path[256]; | ||
66 | struct dirent dirent, *next, **namelist = NULL; | ||
67 | struct thread_map *threads = malloc(sizeof(*threads) + | ||
68 | max_threads * sizeof(pid_t)); | ||
69 | if (threads == NULL) | ||
70 | goto out; | ||
71 | |||
72 | proc = opendir("/proc"); | ||
73 | if (proc == NULL) | ||
74 | goto out_free_threads; | ||
75 | |||
76 | threads->nr = 0; | ||
77 | |||
78 | while (!readdir_r(proc, &dirent, &next) && next) { | ||
79 | char *end; | ||
80 | bool grow = false; | ||
81 | struct stat st; | ||
82 | pid_t pid = strtol(dirent.d_name, &end, 10); | ||
83 | |||
84 | if (*end) /* only interested in proper numerical dirents */ | ||
85 | continue; | ||
86 | |||
87 | snprintf(path, sizeof(path), "/proc/%s", dirent.d_name); | ||
88 | |||
89 | if (stat(path, &st) != 0) | ||
90 | continue; | ||
91 | |||
92 | if (st.st_uid != uid) | ||
93 | continue; | ||
94 | |||
95 | snprintf(path, sizeof(path), "/proc/%d/task", pid); | ||
96 | items = scandir(path, &namelist, filter, NULL); | ||
97 | if (items <= 0) | ||
98 | goto out_free_closedir; | ||
99 | |||
100 | while (threads->nr + items >= max_threads) { | ||
101 | max_threads *= 2; | ||
102 | grow = true; | ||
103 | } | ||
104 | |||
105 | if (grow) { | ||
106 | struct thread_map *tmp; | ||
107 | |||
108 | tmp = realloc(threads, (sizeof(*threads) + | ||
109 | max_threads * sizeof(pid_t))); | ||
110 | if (tmp == NULL) | ||
111 | goto out_free_namelist; | ||
112 | |||
113 | threads = tmp; | ||
114 | } | ||
115 | |||
116 | for (i = 0; i < items; i++) | ||
117 | threads->map[threads->nr + i] = atoi(namelist[i]->d_name); | ||
118 | |||
119 | for (i = 0; i < items; i++) | ||
120 | free(namelist[i]); | ||
121 | free(namelist); | ||
122 | |||
123 | threads->nr += items; | ||
124 | } | ||
125 | |||
126 | out_closedir: | ||
127 | closedir(proc); | ||
128 | out: | ||
129 | return threads; | ||
130 | |||
131 | out_free_threads: | ||
132 | free(threads); | ||
133 | return NULL; | ||
134 | |||
135 | out_free_namelist: | ||
136 | for (i = 0; i < items; i++) | ||
137 | free(namelist[i]); | ||
138 | free(namelist); | ||
139 | |||
140 | out_free_closedir: | ||
141 | free(threads); | ||
142 | threads = NULL; | ||
143 | goto out_closedir; | ||
144 | } | ||
145 | |||
146 | struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) | ||
55 | { | 147 | { |
56 | if (pid != -1) | 148 | if (pid != -1) |
57 | return thread_map__new_by_pid(pid); | 149 | return thread_map__new_by_pid(pid); |
150 | |||
151 | if (tid == -1 && uid != UINT_MAX) | ||
152 | return thread_map__new_by_uid(uid); | ||
153 | |||
58 | return thread_map__new_by_tid(tid); | 154 | return thread_map__new_by_tid(tid); |
59 | } | 155 | } |
60 | 156 | ||
157 | static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) | ||
158 | { | ||
159 | struct thread_map *threads = NULL, *nt; | ||
160 | char name[256]; | ||
161 | int items, total_tasks = 0; | ||
162 | struct dirent **namelist = NULL; | ||
163 | int i, j = 0; | ||
164 | pid_t pid, prev_pid = INT_MAX; | ||
165 | char *end_ptr; | ||
166 | struct str_node *pos; | ||
167 | struct strlist *slist = strlist__new(false, pid_str); | ||
168 | |||
169 | if (!slist) | ||
170 | return NULL; | ||
171 | |||
172 | strlist__for_each(pos, slist) { | ||
173 | pid = strtol(pos->s, &end_ptr, 10); | ||
174 | |||
175 | if (pid == INT_MIN || pid == INT_MAX || | ||
176 | (*end_ptr != '\0' && *end_ptr != ',')) | ||
177 | goto out_free_threads; | ||
178 | |||
179 | if (pid == prev_pid) | ||
180 | continue; | ||
181 | |||
182 | sprintf(name, "/proc/%d/task", pid); | ||
183 | items = scandir(name, &namelist, filter, NULL); | ||
184 | if (items <= 0) | ||
185 | goto out_free_threads; | ||
186 | |||
187 | total_tasks += items; | ||
188 | nt = realloc(threads, (sizeof(*threads) + | ||
189 | sizeof(pid_t) * total_tasks)); | ||
190 | if (nt == NULL) | ||
191 | goto out_free_threads; | ||
192 | |||
193 | threads = nt; | ||
194 | |||
195 | if (threads) { | ||
196 | for (i = 0; i < items; i++) | ||
197 | threads->map[j++] = atoi(namelist[i]->d_name); | ||
198 | threads->nr = total_tasks; | ||
199 | } | ||
200 | |||
201 | for (i = 0; i < items; i++) | ||
202 | free(namelist[i]); | ||
203 | free(namelist); | ||
204 | |||
205 | if (!threads) | ||
206 | break; | ||
207 | } | ||
208 | |||
209 | out: | ||
210 | strlist__delete(slist); | ||
211 | return threads; | ||
212 | |||
213 | out_free_threads: | ||
214 | free(threads); | ||
215 | threads = NULL; | ||
216 | goto out; | ||
217 | } | ||
218 | |||
219 | static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) | ||
220 | { | ||
221 | struct thread_map *threads = NULL, *nt; | ||
222 | int ntasks = 0; | ||
223 | pid_t tid, prev_tid = INT_MAX; | ||
224 | char *end_ptr; | ||
225 | struct str_node *pos; | ||
226 | struct strlist *slist; | ||
227 | |||
228 | /* perf-stat expects threads to be generated even if tid not given */ | ||
229 | if (!tid_str) { | ||
230 | threads = malloc(sizeof(*threads) + sizeof(pid_t)); | ||
231 | if (threads != NULL) { | ||
232 | threads->map[0] = -1; | ||
233 | threads->nr = 1; | ||
234 | } | ||
235 | return threads; | ||
236 | } | ||
237 | |||
238 | slist = strlist__new(false, tid_str); | ||
239 | if (!slist) | ||
240 | return NULL; | ||
241 | |||
242 | strlist__for_each(pos, slist) { | ||
243 | tid = strtol(pos->s, &end_ptr, 10); | ||
244 | |||
245 | if (tid == INT_MIN || tid == INT_MAX || | ||
246 | (*end_ptr != '\0' && *end_ptr != ',')) | ||
247 | goto out_free_threads; | ||
248 | |||
249 | if (tid == prev_tid) | ||
250 | continue; | ||
251 | |||
252 | ntasks++; | ||
253 | nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks); | ||
254 | |||
255 | if (nt == NULL) | ||
256 | goto out_free_threads; | ||
257 | |||
258 | threads = nt; | ||
259 | threads->map[ntasks - 1] = tid; | ||
260 | threads->nr = ntasks; | ||
261 | } | ||
262 | out: | ||
263 | return threads; | ||
264 | |||
265 | out_free_threads: | ||
266 | free(threads); | ||
267 | threads = NULL; | ||
268 | goto out; | ||
269 | } | ||
270 | |||
271 | struct thread_map *thread_map__new_str(const char *pid, const char *tid, | ||
272 | uid_t uid) | ||
273 | { | ||
274 | if (pid) | ||
275 | return thread_map__new_by_pid_str(pid); | ||
276 | |||
277 | if (!tid && uid != UINT_MAX) | ||
278 | return thread_map__new_by_uid(uid); | ||
279 | |||
280 | return thread_map__new_by_tid_str(tid); | ||
281 | } | ||
282 | |||
61 | void thread_map__delete(struct thread_map *threads) | 283 | void thread_map__delete(struct thread_map *threads) |
62 | { | 284 | { |
63 | free(threads); | 285 | free(threads); |
64 | } | 286 | } |
287 | |||
288 | size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) | ||
289 | { | ||
290 | int i; | ||
291 | size_t printed = fprintf(fp, "%d thread%s: ", | ||
292 | threads->nr, threads->nr > 1 ? "s" : ""); | ||
293 | for (i = 0; i < threads->nr; ++i) | ||
294 | printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]); | ||
295 | |||
296 | return printed + fprintf(fp, "\n"); | ||
297 | } | ||
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 3cb907311409..7da80f14418b 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define __PERF_THREAD_MAP_H | 2 | #define __PERF_THREAD_MAP_H |
3 | 3 | ||
4 | #include <sys/types.h> | 4 | #include <sys/types.h> |
5 | #include <stdio.h> | ||
5 | 6 | ||
6 | struct thread_map { | 7 | struct thread_map { |
7 | int nr; | 8 | int nr; |
@@ -10,6 +11,14 @@ struct thread_map { | |||
10 | 11 | ||
11 | struct thread_map *thread_map__new_by_pid(pid_t pid); | 12 | struct thread_map *thread_map__new_by_pid(pid_t pid); |
12 | struct thread_map *thread_map__new_by_tid(pid_t tid); | 13 | struct thread_map *thread_map__new_by_tid(pid_t tid); |
13 | struct thread_map *thread_map__new(pid_t pid, pid_t tid); | 14 | struct thread_map *thread_map__new_by_uid(uid_t uid); |
15 | struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); | ||
16 | |||
17 | struct thread_map *thread_map__new_str(const char *pid, | ||
18 | const char *tid, uid_t uid); | ||
19 | |||
14 | void thread_map__delete(struct thread_map *threads); | 20 | void thread_map__delete(struct thread_map *threads); |
21 | |||
22 | size_t thread_map__fprintf(struct thread_map *threads, FILE *fp); | ||
23 | |||
15 | #endif /* __PERF_THREAD_MAP_H */ | 24 | #endif /* __PERF_THREAD_MAP_H */ |
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 500471dffa4f..09fe579ccafb 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c | |||
@@ -69,12 +69,15 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) | |||
69 | 69 | ||
70 | ret += SNPRINTF(bf + ret, size - ret, "], "); | 70 | ret += SNPRINTF(bf + ret, size - ret, "], "); |
71 | 71 | ||
72 | if (top->target_pid != -1) | 72 | if (top->target_pid) |
73 | ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d", | 73 | ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s", |
74 | top->target_pid); | 74 | top->target_pid); |
75 | else if (top->target_tid != -1) | 75 | else if (top->target_tid) |
76 | ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d", | 76 | ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s", |
77 | top->target_tid); | 77 | top->target_tid); |
78 | else if (top->uid_str != NULL) | ||
79 | ret += SNPRINTF(bf + ret, size - ret, " (uid: %s", | ||
80 | top->uid_str); | ||
78 | else | 81 | else |
79 | ret += SNPRINTF(bf + ret, size - ret, " (all"); | 82 | ret += SNPRINTF(bf + ret, size - ret, " (all"); |
80 | 83 | ||
@@ -82,7 +85,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) | |||
82 | ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", | 85 | ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", |
83 | top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list); | 86 | top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list); |
84 | else { | 87 | else { |
85 | if (top->target_tid != -1) | 88 | if (top->target_tid) |
86 | ret += SNPRINTF(bf + ret, size - ret, ")"); | 89 | ret += SNPRINTF(bf + ret, size - ret, ")"); |
87 | else | 90 | else |
88 | ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", | 91 | ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", |
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index f2eab81435ae..ce61cb2d1acf 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h | |||
@@ -23,7 +23,8 @@ struct perf_top { | |||
23 | u64 guest_us_samples, guest_kernel_samples; | 23 | u64 guest_us_samples, guest_kernel_samples; |
24 | int print_entries, count_filter, delay_secs; | 24 | int print_entries, count_filter, delay_secs; |
25 | int freq; | 25 | int freq; |
26 | pid_t target_pid, target_tid; | 26 | const char *target_pid, *target_tid; |
27 | uid_t uid; | ||
27 | bool hide_kernel_symbols, hide_user_symbols, zero; | 28 | bool hide_kernel_symbols, hide_user_symbols, zero; |
28 | bool system_wide; | 29 | bool system_wide; |
29 | bool use_tui, use_stdio; | 30 | bool use_tui, use_stdio; |
@@ -33,7 +34,7 @@ struct perf_top { | |||
33 | bool vmlinux_warned; | 34 | bool vmlinux_warned; |
34 | bool inherit; | 35 | bool inherit; |
35 | bool group; | 36 | bool group; |
36 | bool sample_id_all_avail; | 37 | bool sample_id_all_missing; |
37 | bool exclude_guest_missing; | 38 | bool exclude_guest_missing; |
38 | bool dump_symtab; | 39 | bool dump_symtab; |
39 | const char *cpu_list; | 40 | const char *cpu_list; |
@@ -46,6 +47,7 @@ struct perf_top { | |||
46 | int realtime_prio; | 47 | int realtime_prio; |
47 | int sym_pcnt_filter; | 48 | int sym_pcnt_filter; |
48 | const char *sym_filter; | 49 | const char *sym_filter; |
50 | const char *uid_str; | ||
49 | }; | 51 | }; |
50 | 52 | ||
51 | size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); | 53 | size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); |
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 1a8d4dc4f386..a4088ced1e64 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <stdio.h> | 25 | #include <stdio.h> |
26 | #include <stdlib.h> | 26 | #include <stdlib.h> |
27 | #include <string.h> | 27 | #include <string.h> |
28 | #include <ctype.h> | ||
29 | #include <errno.h> | 28 | #include <errno.h> |
30 | 29 | ||
31 | #include "../perf.h" | 30 | #include "../perf.h" |
@@ -1424,6 +1423,11 @@ static long long arg_num_eval(struct print_arg *arg) | |||
1424 | die("unknown op '%s'", arg->op.op); | 1423 | die("unknown op '%s'", arg->op.op); |
1425 | } | 1424 | } |
1426 | break; | 1425 | break; |
1426 | case '+': | ||
1427 | left = arg_num_eval(arg->op.left); | ||
1428 | right = arg_num_eval(arg->op.right); | ||
1429 | val = left + right; | ||
1430 | break; | ||
1427 | default: | 1431 | default: |
1428 | die("unknown op '%s'", arg->op.op); | 1432 | die("unknown op '%s'", arg->op.op); |
1429 | } | 1433 | } |
@@ -1484,6 +1488,13 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok) | |||
1484 | 1488 | ||
1485 | free_token(token); | 1489 | free_token(token); |
1486 | type = process_arg(event, arg, &token); | 1490 | type = process_arg(event, arg, &token); |
1491 | |||
1492 | if (type == EVENT_OP) | ||
1493 | type = process_op(event, arg, &token); | ||
1494 | |||
1495 | if (type == EVENT_ERROR) | ||
1496 | goto out_free; | ||
1497 | |||
1487 | if (test_type_token(type, token, EVENT_DELIM, ",")) | 1498 | if (test_type_token(type, token, EVENT_DELIM, ",")) |
1488 | goto out_free; | 1499 | goto out_free; |
1489 | 1500 | ||
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index f55cc3a765a1..b9592e0de8d7 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <pthread.h> | 33 | #include <pthread.h> |
34 | #include <fcntl.h> | 34 | #include <fcntl.h> |
35 | #include <unistd.h> | 35 | #include <unistd.h> |
36 | #include <ctype.h> | ||
37 | #include <errno.h> | 36 | #include <errno.h> |
38 | 37 | ||
39 | #include "../perf.h" | 38 | #include "../perf.h" |
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index a3fdf55f317b..18ae6c1831d3 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <stdio.h> | 22 | #include <stdio.h> |
23 | #include <stdlib.h> | 23 | #include <stdlib.h> |
24 | #include <string.h> | 24 | #include <string.h> |
25 | #include <ctype.h> | ||
26 | #include <errno.h> | 25 | #include <errno.h> |
27 | 26 | ||
28 | #include "../perf.h" | 27 | #include "../perf.h" |
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index 295a9c93f945..57a4c6ef3fd2 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c | |||
@@ -69,14 +69,17 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro | |||
69 | if (!self->navkeypressed) | 69 | if (!self->navkeypressed) |
70 | width += 1; | 70 | width += 1; |
71 | 71 | ||
72 | if (!ab->hide_src_code && ol->offset != -1) | ||
73 | if (!current_entry || (self->use_navkeypressed && | ||
74 | !self->navkeypressed)) | ||
75 | ui_browser__set_color(self, HE_COLORSET_CODE); | ||
76 | |||
72 | if (!*ol->line) | 77 | if (!*ol->line) |
73 | slsmg_write_nstring(" ", width - 18); | 78 | slsmg_write_nstring(" ", width - 18); |
74 | else | 79 | else |
75 | slsmg_write_nstring(ol->line, width - 18); | 80 | slsmg_write_nstring(ol->line, width - 18); |
76 | 81 | ||
77 | if (!current_entry) | 82 | if (current_entry) |
78 | ui_browser__set_color(self, HE_COLORSET_CODE); | ||
79 | else | ||
80 | ab->selection = ol; | 83 | ab->selection = ol; |
81 | } | 84 | } |
82 | 85 | ||
@@ -230,9 +233,9 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx, | |||
230 | struct rb_node *nd = NULL; | 233 | struct rb_node *nd = NULL; |
231 | struct map_symbol *ms = self->b.priv; | 234 | struct map_symbol *ms = self->b.priv; |
232 | struct symbol *sym = ms->sym; | 235 | struct symbol *sym = ms->sym; |
233 | const char *help = "<-, ESC: exit, TAB/shift+TAB: cycle hottest lines, " | 236 | const char *help = "<-/ESC: Exit, TAB/shift+TAB: Cycle hot lines, " |
234 | "H: Hottest, -> Line action, S -> Toggle source " | 237 | "H: Go to hottest line, ->/ENTER: Line action, " |
235 | "code view"; | 238 | "S: Toggle source code view"; |
236 | int key; | 239 | int key; |
237 | 240 | ||
238 | if (ui_browser__show(&self->b, sym->name, help) < 0) | 241 | if (ui_browser__show(&self->b, sym->name, help) < 0) |
@@ -284,9 +287,11 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx, | |||
284 | nd = self->curr_hot; | 287 | nd = self->curr_hot; |
285 | break; | 288 | break; |
286 | case 'H': | 289 | case 'H': |
290 | case 'h': | ||
287 | nd = self->curr_hot; | 291 | nd = self->curr_hot; |
288 | break; | 292 | break; |
289 | case 'S': | 293 | case 'S': |
294 | case 's': | ||
290 | if (annotate_browser__toggle_source(self)) | 295 | if (annotate_browser__toggle_source(self)) |
291 | ui_helpline__puts(help); | 296 | ui_helpline__puts(help); |
292 | continue; | 297 | continue; |
@@ -338,6 +343,7 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx, | |||
338 | pthread_mutex_unlock(¬es->lock); | 343 | pthread_mutex_unlock(¬es->lock); |
339 | symbol__tui_annotate(target, ms->map, evidx, | 344 | symbol__tui_annotate(target, ms->map, evidx, |
340 | timer, arg, delay_secs); | 345 | timer, arg, delay_secs); |
346 | ui_browser__show_title(&self->b, sym->name); | ||
341 | } | 347 | } |
342 | continue; | 348 | continue; |
343 | case K_LEFT: | 349 | case K_LEFT: |
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index bb9197c9c4a4..fa530fcc764a 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c | |||
@@ -805,8 +805,11 @@ static struct hist_browser *hist_browser__new(struct hists *hists) | |||
805 | self->hists = hists; | 805 | self->hists = hists; |
806 | self->b.refresh = hist_browser__refresh; | 806 | self->b.refresh = hist_browser__refresh; |
807 | self->b.seek = ui_browser__hists_seek; | 807 | self->b.seek = ui_browser__hists_seek; |
808 | self->b.use_navkeypressed = true, | 808 | self->b.use_navkeypressed = true; |
809 | self->has_symbols = sort_sym.list.next != NULL; | 809 | if (sort__branch_mode == 1) |
810 | self->has_symbols = sort_sym_from.list.next != NULL; | ||
811 | else | ||
812 | self->has_symbols = sort_sym.list.next != NULL; | ||
810 | } | 813 | } |
811 | 814 | ||
812 | return self; | 815 | return self; |
@@ -839,6 +842,9 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, | |||
839 | nr_events = convert_unit(nr_events, &unit); | 842 | nr_events = convert_unit(nr_events, &unit); |
840 | printed = scnprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name); | 843 | printed = scnprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name); |
841 | 844 | ||
845 | if (self->uid_filter_str) | ||
846 | printed += snprintf(bf + printed, size - printed, | ||
847 | ", UID: %s", self->uid_filter_str); | ||
842 | if (thread) | 848 | if (thread) |
843 | printed += scnprintf(bf + printed, size - printed, | 849 | printed += scnprintf(bf + printed, size - printed, |
844 | ", Thread: %s(%d)", | 850 | ", Thread: %s(%d)", |
@@ -850,6 +856,16 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, | |||
850 | return printed; | 856 | return printed; |
851 | } | 857 | } |
852 | 858 | ||
859 | static inline void free_popup_options(char **options, int n) | ||
860 | { | ||
861 | int i; | ||
862 | |||
863 | for (i = 0; i < n; ++i) { | ||
864 | free(options[i]); | ||
865 | options[i] = NULL; | ||
866 | } | ||
867 | } | ||
868 | |||
853 | static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | 869 | static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, |
854 | const char *helpline, const char *ev_name, | 870 | const char *helpline, const char *ev_name, |
855 | bool left_exits, | 871 | bool left_exits, |
@@ -858,7 +874,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
858 | { | 874 | { |
859 | struct hists *self = &evsel->hists; | 875 | struct hists *self = &evsel->hists; |
860 | struct hist_browser *browser = hist_browser__new(self); | 876 | struct hist_browser *browser = hist_browser__new(self); |
877 | struct branch_info *bi; | ||
861 | struct pstack *fstack; | 878 | struct pstack *fstack; |
879 | char *options[16]; | ||
880 | int nr_options = 0; | ||
862 | int key = -1; | 881 | int key = -1; |
863 | 882 | ||
864 | if (browser == NULL) | 883 | if (browser == NULL) |
@@ -870,13 +889,16 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
870 | 889 | ||
871 | ui_helpline__push(helpline); | 890 | ui_helpline__push(helpline); |
872 | 891 | ||
892 | memset(options, 0, sizeof(options)); | ||
893 | |||
873 | while (1) { | 894 | while (1) { |
874 | const struct thread *thread = NULL; | 895 | const struct thread *thread = NULL; |
875 | const struct dso *dso = NULL; | 896 | const struct dso *dso = NULL; |
876 | char *options[16]; | 897 | int choice = 0, |
877 | int nr_options = 0, choice = 0, i, | ||
878 | annotate = -2, zoom_dso = -2, zoom_thread = -2, | 898 | annotate = -2, zoom_dso = -2, zoom_thread = -2, |
879 | browse_map = -2; | 899 | annotate_f = -2, annotate_t = -2, browse_map = -2; |
900 | |||
901 | nr_options = 0; | ||
880 | 902 | ||
881 | key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); | 903 | key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); |
882 | 904 | ||
@@ -884,7 +906,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
884 | thread = hist_browser__selected_thread(browser); | 906 | thread = hist_browser__selected_thread(browser); |
885 | dso = browser->selection->map ? browser->selection->map->dso : NULL; | 907 | dso = browser->selection->map ? browser->selection->map->dso : NULL; |
886 | } | 908 | } |
887 | |||
888 | switch (key) { | 909 | switch (key) { |
889 | case K_TAB: | 910 | case K_TAB: |
890 | case K_UNTAB: | 911 | case K_UNTAB: |
@@ -899,7 +920,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
899 | if (!browser->has_symbols) { | 920 | if (!browser->has_symbols) { |
900 | ui_browser__warning(&browser->b, delay_secs * 2, | 921 | ui_browser__warning(&browser->b, delay_secs * 2, |
901 | "Annotation is only available for symbolic views, " | 922 | "Annotation is only available for symbolic views, " |
902 | "include \"sym\" in --sort to use it."); | 923 | "include \"sym*\" in --sort to use it."); |
903 | continue; | 924 | continue; |
904 | } | 925 | } |
905 | 926 | ||
@@ -969,12 +990,34 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
969 | if (!browser->has_symbols) | 990 | if (!browser->has_symbols) |
970 | goto add_exit_option; | 991 | goto add_exit_option; |
971 | 992 | ||
972 | if (browser->selection != NULL && | 993 | if (sort__branch_mode == 1) { |
973 | browser->selection->sym != NULL && | 994 | bi = browser->he_selection->branch_info; |
974 | !browser->selection->map->dso->annotate_warned && | 995 | if (browser->selection != NULL && |
975 | asprintf(&options[nr_options], "Annotate %s", | 996 | bi && |
976 | browser->selection->sym->name) > 0) | 997 | bi->from.sym != NULL && |
977 | annotate = nr_options++; | 998 | !bi->from.map->dso->annotate_warned && |
999 | asprintf(&options[nr_options], "Annotate %s", | ||
1000 | bi->from.sym->name) > 0) | ||
1001 | annotate_f = nr_options++; | ||
1002 | |||
1003 | if (browser->selection != NULL && | ||
1004 | bi && | ||
1005 | bi->to.sym != NULL && | ||
1006 | !bi->to.map->dso->annotate_warned && | ||
1007 | (bi->to.sym != bi->from.sym || | ||
1008 | bi->to.map->dso != bi->from.map->dso) && | ||
1009 | asprintf(&options[nr_options], "Annotate %s", | ||
1010 | bi->to.sym->name) > 0) | ||
1011 | annotate_t = nr_options++; | ||
1012 | } else { | ||
1013 | |||
1014 | if (browser->selection != NULL && | ||
1015 | browser->selection->sym != NULL && | ||
1016 | !browser->selection->map->dso->annotate_warned && | ||
1017 | asprintf(&options[nr_options], "Annotate %s", | ||
1018 | browser->selection->sym->name) > 0) | ||
1019 | annotate = nr_options++; | ||
1020 | } | ||
978 | 1021 | ||
979 | if (thread != NULL && | 1022 | if (thread != NULL && |
980 | asprintf(&options[nr_options], "Zoom %s %s(%d) thread", | 1023 | asprintf(&options[nr_options], "Zoom %s %s(%d) thread", |
@@ -995,25 +1038,39 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
995 | browse_map = nr_options++; | 1038 | browse_map = nr_options++; |
996 | add_exit_option: | 1039 | add_exit_option: |
997 | options[nr_options++] = (char *)"Exit"; | 1040 | options[nr_options++] = (char *)"Exit"; |
998 | 1041 | retry_popup_menu: | |
999 | choice = ui__popup_menu(nr_options, options); | 1042 | choice = ui__popup_menu(nr_options, options); |
1000 | 1043 | ||
1001 | for (i = 0; i < nr_options - 1; ++i) | ||
1002 | free(options[i]); | ||
1003 | |||
1004 | if (choice == nr_options - 1) | 1044 | if (choice == nr_options - 1) |
1005 | break; | 1045 | break; |
1006 | 1046 | ||
1007 | if (choice == -1) | 1047 | if (choice == -1) { |
1048 | free_popup_options(options, nr_options - 1); | ||
1008 | continue; | 1049 | continue; |
1050 | } | ||
1009 | 1051 | ||
1010 | if (choice == annotate) { | 1052 | if (choice == annotate || choice == annotate_t || choice == annotate_f) { |
1011 | struct hist_entry *he; | 1053 | struct hist_entry *he; |
1012 | int err; | 1054 | int err; |
1013 | do_annotate: | 1055 | do_annotate: |
1014 | he = hist_browser__selected_entry(browser); | 1056 | he = hist_browser__selected_entry(browser); |
1015 | if (he == NULL) | 1057 | if (he == NULL) |
1016 | continue; | 1058 | continue; |
1059 | |||
1060 | /* | ||
1061 | * we stash the branch_info symbol + map into the | ||
1062 | * the ms so we don't have to rewrite all the annotation | ||
1063 | * code to use branch_info. | ||
1064 | * in branch mode, the ms struct is not used | ||
1065 | */ | ||
1066 | if (choice == annotate_f) { | ||
1067 | he->ms.sym = he->branch_info->from.sym; | ||
1068 | he->ms.map = he->branch_info->from.map; | ||
1069 | } else if (choice == annotate_t) { | ||
1070 | he->ms.sym = he->branch_info->to.sym; | ||
1071 | he->ms.map = he->branch_info->to.map; | ||
1072 | } | ||
1073 | |||
1017 | /* | 1074 | /* |
1018 | * Don't let this be freed, say, by hists__decay_entry. | 1075 | * Don't let this be freed, say, by hists__decay_entry. |
1019 | */ | 1076 | */ |
@@ -1021,9 +1078,18 @@ do_annotate: | |||
1021 | err = hist_entry__tui_annotate(he, evsel->idx, | 1078 | err = hist_entry__tui_annotate(he, evsel->idx, |
1022 | timer, arg, delay_secs); | 1079 | timer, arg, delay_secs); |
1023 | he->used = false; | 1080 | he->used = false; |
1081 | /* | ||
1082 | * offer option to annotate the other branch source or target | ||
1083 | * (if they exists) when returning from annotate | ||
1084 | */ | ||
1085 | if ((err == 'q' || err == CTRL('c')) | ||
1086 | && annotate_t != -2 && annotate_f != -2) | ||
1087 | goto retry_popup_menu; | ||
1088 | |||
1024 | ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); | 1089 | ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); |
1025 | if (err) | 1090 | if (err) |
1026 | ui_browser__handle_resize(&browser->b); | 1091 | ui_browser__handle_resize(&browser->b); |
1092 | |||
1027 | } else if (choice == browse_map) | 1093 | } else if (choice == browse_map) |
1028 | map__browse(browser->selection->map); | 1094 | map__browse(browser->selection->map); |
1029 | else if (choice == zoom_dso) { | 1095 | else if (choice == zoom_dso) { |
@@ -1069,6 +1135,7 @@ out_free_stack: | |||
1069 | pstack__delete(fstack); | 1135 | pstack__delete(fstack); |
1070 | out: | 1136 | out: |
1071 | hist_browser__delete(browser); | 1137 | hist_browser__delete(browser); |
1138 | free_popup_options(options, nr_options - 1); | ||
1072 | return key; | 1139 | return key; |
1073 | } | 1140 | } |
1074 | 1141 | ||
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c index 6905bcc8be2d..eca6575abfd0 100644 --- a/tools/perf/util/ui/browsers/map.c +++ b/tools/perf/util/ui/browsers/map.c | |||
@@ -3,9 +3,9 @@ | |||
3 | #include <newt.h> | 3 | #include <newt.h> |
4 | #include <inttypes.h> | 4 | #include <inttypes.h> |
5 | #include <sys/ttydefaults.h> | 5 | #include <sys/ttydefaults.h> |
6 | #include <ctype.h> | ||
7 | #include <string.h> | 6 | #include <string.h> |
8 | #include <linux/bitops.h> | 7 | #include <linux/bitops.h> |
8 | #include "../../util.h" | ||
9 | #include "../../debug.h" | 9 | #include "../../debug.h" |
10 | #include "../../symbol.h" | 10 | #include "../../symbol.h" |
11 | #include "../browser.h" | 11 | #include "../browser.h" |
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index d76d1c0ff98f..52bb07c6442a 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c | |||
@@ -7,6 +7,7 @@ | |||
7 | * Copyright (C) Linus Torvalds, 2005 | 7 | * Copyright (C) Linus Torvalds, 2005 |
8 | */ | 8 | */ |
9 | #include "util.h" | 9 | #include "util.h" |
10 | #include "debug.h" | ||
10 | 11 | ||
11 | static void report(const char *prefix, const char *err, va_list params) | 12 | static void report(const char *prefix, const char *err, va_list params) |
12 | { | 13 | { |
@@ -81,3 +82,41 @@ void warning(const char *warn, ...) | |||
81 | warn_routine(warn, params); | 82 | warn_routine(warn, params); |
82 | va_end(params); | 83 | va_end(params); |
83 | } | 84 | } |
85 | |||
86 | uid_t parse_target_uid(const char *str, const char *tid, const char *pid) | ||
87 | { | ||
88 | struct passwd pwd, *result; | ||
89 | char buf[1024]; | ||
90 | |||
91 | if (str == NULL) | ||
92 | return UINT_MAX; | ||
93 | |||
94 | /* UID and PID are mutually exclusive */ | ||
95 | if (tid || pid) { | ||
96 | ui__warning("PID/TID switch overriding UID\n"); | ||
97 | sleep(1); | ||
98 | return UINT_MAX; | ||
99 | } | ||
100 | |||
101 | getpwnam_r(str, &pwd, buf, sizeof(buf), &result); | ||
102 | |||
103 | if (result == NULL) { | ||
104 | char *endptr; | ||
105 | int uid = strtol(str, &endptr, 10); | ||
106 | |||
107 | if (*endptr != '\0') { | ||
108 | ui__error("Invalid user %s\n", str); | ||
109 | return UINT_MAX - 1; | ||
110 | } | ||
111 | |||
112 | getpwuid_r(uid, &pwd, buf, sizeof(buf), &result); | ||
113 | |||
114 | if (result == NULL) { | ||
115 | ui__error("Problems obtaining information for user %s\n", | ||
116 | str); | ||
117 | return UINT_MAX - 1; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | return result->pw_uid; | ||
122 | } | ||
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index fb25d1329218..8109a907841e 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c | |||
@@ -14,6 +14,8 @@ void event_attr_init(struct perf_event_attr *attr) | |||
14 | attr->exclude_host = 1; | 14 | attr->exclude_host = 1; |
15 | if (!perf_guest) | 15 | if (!perf_guest) |
16 | attr->exclude_guest = 1; | 16 | attr->exclude_guest = 1; |
17 | /* to capture ABI version */ | ||
18 | attr->size = sizeof(*attr); | ||
17 | } | 19 | } |
18 | 20 | ||
19 | int mkdir_p(char *path, mode_t mode) | 21 | int mkdir_p(char *path, mode_t mode) |
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ecf9898169c8..0f99f394d8e0 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h | |||
@@ -199,6 +199,8 @@ static inline int has_extension(const char *filename, const char *ext) | |||
199 | #undef isalpha | 199 | #undef isalpha |
200 | #undef isprint | 200 | #undef isprint |
201 | #undef isalnum | 201 | #undef isalnum |
202 | #undef islower | ||
203 | #undef isupper | ||
202 | #undef tolower | 204 | #undef tolower |
203 | #undef toupper | 205 | #undef toupper |
204 | 206 | ||
@@ -219,6 +221,8 @@ extern unsigned char sane_ctype[256]; | |||
219 | #define isalpha(x) sane_istest(x,GIT_ALPHA) | 221 | #define isalpha(x) sane_istest(x,GIT_ALPHA) |
220 | #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) | 222 | #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) |
221 | #define isprint(x) sane_istest(x,GIT_PRINT) | 223 | #define isprint(x) sane_istest(x,GIT_PRINT) |
224 | #define islower(x) (sane_istest(x,GIT_ALPHA) && sane_istest(x,0x20)) | ||
225 | #define isupper(x) (sane_istest(x,GIT_ALPHA) && !sane_istest(x,0x20)) | ||
222 | #define tolower(x) sane_case((unsigned char)(x), 0x20) | 226 | #define tolower(x) sane_case((unsigned char)(x), 0x20) |
223 | #define toupper(x) sane_case((unsigned char)(x), 0) | 227 | #define toupper(x) sane_case((unsigned char)(x), 0) |
224 | 228 | ||
@@ -245,6 +249,8 @@ struct perf_event_attr; | |||
245 | 249 | ||
246 | void event_attr_init(struct perf_event_attr *attr); | 250 | void event_attr_init(struct perf_event_attr *attr); |
247 | 251 | ||
252 | uid_t parse_target_uid(const char *str, const char *tid, const char *pid); | ||
253 | |||
248 | #define _STR(x) #x | 254 | #define _STR(x) #x |
249 | #define STR(x) _STR(x) | 255 | #define STR(x) _STR(x) |
250 | 256 | ||