diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-31 14:52:01 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-31 14:52:01 -0500 |
commit | b21c07040304b8716e38a4a0e4ab60f386357e61 (patch) | |
tree | eb2cf03ec35b9a5090c0adaab659a766e02c3b10 | |
parent | 4e58fb7305449cf8c5a86dd97dfc1812221be77c (diff) | |
parent | fb7ae981cb9fe8665b9da97e8734745e030c151d (diff) |
Merge branch 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'tracing-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
tracing: Fix sign fields in ftrace_define_fields_##call()
tracing/syscalls: Fix typo in SYSCALL_DEFINE0
tracing/kprobe: Show sign of fields in trace_kprobe format files
ksym_tracer: Remove trace_stat
ksym_tracer: Fix race when incrementing count
ksym_tracer: Fix to allow writing newline to ksym_trace_filter
ksym_tracer: Fix to make the tracer work
tracing: Kconfig spelling fixes and cleanups
tracing: Fix setting tracer specific options
Documentation: Update ftrace-design.txt
Documentation: Update tracepoint-analysis.txt
Documentation: Update mmiotrace.txt
-rw-r--r-- | Documentation/trace/ftrace-design.txt | 14 | ||||
-rw-r--r-- | Documentation/trace/mmiotrace.txt | 15 | ||||
-rw-r--r-- | Documentation/trace/tracepoint-analysis.txt | 60 | ||||
-rw-r--r-- | include/linux/syscalls.h | 2 | ||||
-rw-r--r-- | include/trace/ftrace.h | 7 | ||||
-rw-r--r-- | kernel/hw_breakpoint.c | 10 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 112 | ||||
-rw-r--r-- | kernel/trace/trace.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_export.c | 7 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 7 | ||||
-rw-r--r-- | kernel/trace/trace_ksym.c | 140 |
11 files changed, 179 insertions, 197 deletions
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 641a1ef2a7ff..239f14b2b55a 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt | |||
@@ -53,14 +53,14 @@ size of the mcount call that is embedded in the function). | |||
53 | For example, if the function foo() calls bar(), when the bar() function calls | 53 | For example, if the function foo() calls bar(), when the bar() function calls |
54 | mcount(), the arguments mcount() will pass to the tracer are: | 54 | mcount(), the arguments mcount() will pass to the tracer are: |
55 | "frompc" - the address bar() will use to return to foo() | 55 | "frompc" - the address bar() will use to return to foo() |
56 | "selfpc" - the address bar() (with _mcount() size adjustment) | 56 | "selfpc" - the address bar() (with mcount() size adjustment) |
57 | 57 | ||
58 | Also keep in mind that this mcount function will be called *a lot*, so | 58 | Also keep in mind that this mcount function will be called *a lot*, so |
59 | optimizing for the default case of no tracer will help the smooth running of | 59 | optimizing for the default case of no tracer will help the smooth running of |
60 | your system when tracing is disabled. So the start of the mcount function is | 60 | your system when tracing is disabled. So the start of the mcount function is |
61 | typically the bare min with checking things before returning. That also means | 61 | typically the bare minimum with checking things before returning. That also |
62 | the code flow should usually kept linear (i.e. no branching in the nop case). | 62 | means the code flow should usually be kept linear (i.e. no branching in the nop |
63 | This is of course an optimization and not a hard requirement. | 63 | case). This is of course an optimization and not a hard requirement. |
64 | 64 | ||
65 | Here is some pseudo code that should help (these functions should actually be | 65 | Here is some pseudo code that should help (these functions should actually be |
66 | implemented in assembly): | 66 | implemented in assembly): |
@@ -131,10 +131,10 @@ some functions to save (hijack) and restore the return address. | |||
131 | 131 | ||
132 | The mcount function should check the function pointers ftrace_graph_return | 132 | The mcount function should check the function pointers ftrace_graph_return |
133 | (compare to ftrace_stub) and ftrace_graph_entry (compare to | 133 | (compare to ftrace_stub) and ftrace_graph_entry (compare to |
134 | ftrace_graph_entry_stub). If either of those are not set to the relevant stub | 134 | ftrace_graph_entry_stub). If either of those is not set to the relevant stub |
135 | function, call the arch-specific function ftrace_graph_caller which in turn | 135 | function, call the arch-specific function ftrace_graph_caller which in turn |
136 | calls the arch-specific function prepare_ftrace_return. Neither of these | 136 | calls the arch-specific function prepare_ftrace_return. Neither of these |
137 | function names are strictly required, but you should use them anyways to stay | 137 | function names is strictly required, but you should use them anyway to stay |
138 | consistent across the architecture ports -- easier to compare & contrast | 138 | consistent across the architecture ports -- easier to compare & contrast |
139 | things. | 139 | things. |
140 | 140 | ||
@@ -144,7 +144,7 @@ but the first argument should be a pointer to the "frompc". Typically this is | |||
144 | located on the stack. This allows the function to hijack the return address | 144 | located on the stack. This allows the function to hijack the return address |
145 | temporarily to have it point to the arch-specific function return_to_handler. | 145 | temporarily to have it point to the arch-specific function return_to_handler. |
146 | That function will simply call the common ftrace_return_to_handler function and | 146 | That function will simply call the common ftrace_return_to_handler function and |
147 | that will return the original return address with which, you can return to the | 147 | that will return the original return address with which you can return to the |
148 | original call site. | 148 | original call site. |
149 | 149 | ||
150 | Here is the updated mcount pseudo code: | 150 | Here is the updated mcount pseudo code: |
diff --git a/Documentation/trace/mmiotrace.txt b/Documentation/trace/mmiotrace.txt index 162effbfbdec..664e7386d89e 100644 --- a/Documentation/trace/mmiotrace.txt +++ b/Documentation/trace/mmiotrace.txt | |||
@@ -44,7 +44,8 @@ Check for lost events. | |||
44 | Usage | 44 | Usage |
45 | ----- | 45 | ----- |
46 | 46 | ||
47 | Make sure debugfs is mounted to /sys/kernel/debug. If not, (requires root privileges) | 47 | Make sure debugfs is mounted to /sys/kernel/debug. |
48 | If not (requires root privileges): | ||
48 | $ mount -t debugfs debugfs /sys/kernel/debug | 49 | $ mount -t debugfs debugfs /sys/kernel/debug |
49 | 50 | ||
50 | Check that the driver you are about to trace is not loaded. | 51 | Check that the driver you are about to trace is not loaded. |
@@ -91,7 +92,7 @@ $ dmesg > dmesg.txt | |||
91 | $ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt | 92 | $ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt |
92 | and then send the .tar.gz file. The trace compresses considerably. Replace | 93 | and then send the .tar.gz file. The trace compresses considerably. Replace |
93 | "pciid" and "nick" with the PCI ID or model name of your piece of hardware | 94 | "pciid" and "nick" with the PCI ID or model name of your piece of hardware |
94 | under investigation and your nick name. | 95 | under investigation and your nickname. |
95 | 96 | ||
96 | 97 | ||
97 | How Mmiotrace Works | 98 | How Mmiotrace Works |
@@ -100,7 +101,7 @@ How Mmiotrace Works | |||
100 | Access to hardware IO-memory is gained by mapping addresses from PCI bus by | 101 | Access to hardware IO-memory is gained by mapping addresses from PCI bus by |
101 | calling one of the ioremap_*() functions. Mmiotrace is hooked into the | 102 | calling one of the ioremap_*() functions. Mmiotrace is hooked into the |
102 | __ioremap() function and gets called whenever a mapping is created. Mapping is | 103 | __ioremap() function and gets called whenever a mapping is created. Mapping is |
103 | an event that is recorded into the trace log. Note, that ISA range mappings | 104 | an event that is recorded into the trace log. Note that ISA range mappings |
104 | are not caught, since the mapping always exists and is returned directly. | 105 | are not caught, since the mapping always exists and is returned directly. |
105 | 106 | ||
106 | MMIO accesses are recorded via page faults. Just before __ioremap() returns, | 107 | MMIO accesses are recorded via page faults. Just before __ioremap() returns, |
@@ -122,11 +123,11 @@ Trace Log Format | |||
122 | ---------------- | 123 | ---------------- |
123 | 124 | ||
124 | The raw log is text and easily filtered with e.g. grep and awk. One record is | 125 | The raw log is text and easily filtered with e.g. grep and awk. One record is |
125 | one line in the log. A record starts with a keyword, followed by keyword | 126 | one line in the log. A record starts with a keyword, followed by keyword- |
126 | dependant arguments. Arguments are separated by a space, or continue until the | 127 | dependent arguments. Arguments are separated by a space, or continue until the |
127 | end of line. The format for version 20070824 is as follows: | 128 | end of line. The format for version 20070824 is as follows: |
128 | 129 | ||
129 | Explanation Keyword Space separated arguments | 130 | Explanation Keyword Space-separated arguments |
130 | --------------------------------------------------------------------------- | 131 | --------------------------------------------------------------------------- |
131 | 132 | ||
132 | read event R width, timestamp, map id, physical, value, PC, PID | 133 | read event R width, timestamp, map id, physical, value, PC, PID |
@@ -136,7 +137,7 @@ iounmap event UNMAP timestamp, map id, PC, PID | |||
136 | marker MARK timestamp, text | 137 | marker MARK timestamp, text |
137 | version VERSION the string "20070824" | 138 | version VERSION the string "20070824" |
138 | info for reader LSPCI one line from lspci -v | 139 | info for reader LSPCI one line from lspci -v |
139 | PCI address map PCIDEV space separated /proc/bus/pci/devices data | 140 | PCI address map PCIDEV space-separated /proc/bus/pci/devices data |
140 | unk. opcode UNKNOWN timestamp, map id, physical, data, PC, PID | 141 | unk. opcode UNKNOWN timestamp, map id, physical, data, PC, PID |
141 | 142 | ||
142 | Timestamp is in seconds with decimals. Physical is a PCI bus address, virtual | 143 | Timestamp is in seconds with decimals. Physical is a PCI bus address, virtual |
diff --git a/Documentation/trace/tracepoint-analysis.txt b/Documentation/trace/tracepoint-analysis.txt index 5eb4e487e667..87bee3c129ba 100644 --- a/Documentation/trace/tracepoint-analysis.txt +++ b/Documentation/trace/tracepoint-analysis.txt | |||
@@ -10,8 +10,8 @@ Tracepoints (see Documentation/trace/tracepoints.txt) can be used without | |||
10 | creating custom kernel modules to register probe functions using the event | 10 | creating custom kernel modules to register probe functions using the event |
11 | tracing infrastructure. | 11 | tracing infrastructure. |
12 | 12 | ||
13 | Simplistically, tracepoints will represent an important event that when can | 13 | Simplistically, tracepoints represent important events that can be |
14 | be taken in conjunction with other tracepoints to build a "Big Picture" of | 14 | taken in conjunction with other tracepoints to build a "Big Picture" of |
15 | what is going on within the system. There are a large number of methods for | 15 | what is going on within the system. There are a large number of methods for |
16 | gathering and interpreting these events. Lacking any current Best Practises, | 16 | gathering and interpreting these events. Lacking any current Best Practises, |
17 | this document describes some of the methods that can be used. | 17 | this document describes some of the methods that can be used. |
@@ -33,12 +33,12 @@ calling | |||
33 | 33 | ||
34 | will give a fair indication of the number of events available. | 34 | will give a fair indication of the number of events available. |
35 | 35 | ||
36 | 2.2 PCL | 36 | 2.2 PCL (Performance Counters for Linux) |
37 | ------- | 37 | ------- |
38 | 38 | ||
39 | Discovery and enumeration of all counters and events, including tracepoints | 39 | Discovery and enumeration of all counters and events, including tracepoints, |
40 | are available with the perf tool. Getting a list of available events is a | 40 | are available with the perf tool. Getting a list of available events is a |
41 | simple case of | 41 | simple case of: |
42 | 42 | ||
43 | $ perf list 2>&1 | grep Tracepoint | 43 | $ perf list 2>&1 | grep Tracepoint |
44 | ext4:ext4_free_inode [Tracepoint event] | 44 | ext4:ext4_free_inode [Tracepoint event] |
@@ -49,19 +49,19 @@ simple case of | |||
49 | [ .... remaining output snipped .... ] | 49 | [ .... remaining output snipped .... ] |
50 | 50 | ||
51 | 51 | ||
52 | 2. Enabling Events | 52 | 3. Enabling Events |
53 | ================== | 53 | ================== |
54 | 54 | ||
55 | 2.1 System-Wide Event Enabling | 55 | 3.1 System-Wide Event Enabling |
56 | ------------------------------ | 56 | ------------------------------ |
57 | 57 | ||
58 | See Documentation/trace/events.txt for a proper description on how events | 58 | See Documentation/trace/events.txt for a proper description on how events |
59 | can be enabled system-wide. A short example of enabling all events related | 59 | can be enabled system-wide. A short example of enabling all events related |
60 | to page allocation would look something like | 60 | to page allocation would look something like: |
61 | 61 | ||
62 | $ for i in `find /sys/kernel/debug/tracing/events -name "enable" | grep mm_`; do echo 1 > $i; done | 62 | $ for i in `find /sys/kernel/debug/tracing/events -name "enable" | grep mm_`; do echo 1 > $i; done |
63 | 63 | ||
64 | 2.2 System-Wide Event Enabling with SystemTap | 64 | 3.2 System-Wide Event Enabling with SystemTap |
65 | --------------------------------------------- | 65 | --------------------------------------------- |
66 | 66 | ||
67 | In SystemTap, tracepoints are accessible using the kernel.trace() function | 67 | In SystemTap, tracepoints are accessible using the kernel.trace() function |
@@ -86,7 +86,7 @@ were allocating the pages. | |||
86 | print_count() | 86 | print_count() |
87 | } | 87 | } |
88 | 88 | ||
89 | 2.3 System-Wide Event Enabling with PCL | 89 | 3.3 System-Wide Event Enabling with PCL |
90 | --------------------------------------- | 90 | --------------------------------------- |
91 | 91 | ||
92 | By specifying the -a switch and analysing sleep, the system-wide events | 92 | By specifying the -a switch and analysing sleep, the system-wide events |
@@ -107,16 +107,16 @@ for a duration of time can be examined. | |||
107 | Similarly, one could execute a shell and exit it as desired to get a report | 107 | Similarly, one could execute a shell and exit it as desired to get a report |
108 | at that point. | 108 | at that point. |
109 | 109 | ||
110 | 2.4 Local Event Enabling | 110 | 3.4 Local Event Enabling |
111 | ------------------------ | 111 | ------------------------ |
112 | 112 | ||
113 | Documentation/trace/ftrace.txt describes how to enable events on a per-thread | 113 | Documentation/trace/ftrace.txt describes how to enable events on a per-thread |
114 | basis using set_ftrace_pid. | 114 | basis using set_ftrace_pid. |
115 | 115 | ||
116 | 2.5 Local Event Enablement with PCL | 116 | 3.5 Local Event Enablement with PCL |
117 | ----------------------------------- | 117 | ----------------------------------- |
118 | 118 | ||
119 | Events can be activate and tracked for the duration of a process on a local | 119 | Events can be activated and tracked for the duration of a process on a local |
120 | basis using PCL such as follows. | 120 | basis using PCL such as follows. |
121 | 121 | ||
122 | $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ | 122 | $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ |
@@ -131,18 +131,18 @@ basis using PCL such as follows. | |||
131 | 131 | ||
132 | 0.973913387 seconds time elapsed | 132 | 0.973913387 seconds time elapsed |
133 | 133 | ||
134 | 3. Event Filtering | 134 | 4. Event Filtering |
135 | ================== | 135 | ================== |
136 | 136 | ||
137 | Documentation/trace/ftrace.txt covers in-depth how to filter events in | 137 | Documentation/trace/ftrace.txt covers in-depth how to filter events in |
138 | ftrace. Obviously using grep and awk of trace_pipe is an option as well | 138 | ftrace. Obviously using grep and awk of trace_pipe is an option as well |
139 | as any script reading trace_pipe. | 139 | as any script reading trace_pipe. |
140 | 140 | ||
141 | 4. Analysing Event Variances with PCL | 141 | 5. Analysing Event Variances with PCL |
142 | ===================================== | 142 | ===================================== |
143 | 143 | ||
144 | Any workload can exhibit variances between runs and it can be important | 144 | Any workload can exhibit variances between runs and it can be important |
145 | to know what the standard deviation in. By and large, this is left to the | 145 | to know what the standard deviation is. By and large, this is left to the |
146 | performance analyst to do it by hand. In the event that the discrete event | 146 | performance analyst to do it by hand. In the event that the discrete event |
147 | occurrences are useful to the performance analyst, then perf can be used. | 147 | occurrences are useful to the performance analyst, then perf can be used. |
148 | 148 | ||
@@ -166,7 +166,7 @@ In the event that some higher-level event is required that depends on some | |||
166 | aggregation of discrete events, then a script would need to be developed. | 166 | aggregation of discrete events, then a script would need to be developed. |
167 | 167 | ||
168 | Using --repeat, it is also possible to view how events are fluctuating over | 168 | Using --repeat, it is also possible to view how events are fluctuating over |
169 | time on a system wide basis using -a and sleep. | 169 | time on a system-wide basis using -a and sleep. |
170 | 170 | ||
171 | $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ | 171 | $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ |
172 | -e kmem:mm_pagevec_free \ | 172 | -e kmem:mm_pagevec_free \ |
@@ -180,7 +180,7 @@ time on a system wide basis using -a and sleep. | |||
180 | 180 | ||
181 | 1.002251757 seconds time elapsed ( +- 0.005% ) | 181 | 1.002251757 seconds time elapsed ( +- 0.005% ) |
182 | 182 | ||
183 | 5. Higher-Level Analysis with Helper Scripts | 183 | 6. Higher-Level Analysis with Helper Scripts |
184 | ============================================ | 184 | ============================================ |
185 | 185 | ||
186 | When events are enabled the events that are triggering can be read from | 186 | When events are enabled the events that are triggering can be read from |
@@ -190,11 +190,11 @@ be gathered on-line as appropriate. Examples of post-processing might include | |||
190 | 190 | ||
191 | o Reading information from /proc for the PID that triggered the event | 191 | o Reading information from /proc for the PID that triggered the event |
192 | o Deriving a higher-level event from a series of lower-level events. | 192 | o Deriving a higher-level event from a series of lower-level events. |
193 | o Calculate latencies between two events | 193 | o Calculating latencies between two events |
194 | 194 | ||
195 | Documentation/trace/postprocess/trace-pagealloc-postprocess.pl is an example | 195 | Documentation/trace/postprocess/trace-pagealloc-postprocess.pl is an example |
196 | script that can read trace_pipe from STDIN or a copy of a trace. When used | 196 | script that can read trace_pipe from STDIN or a copy of a trace. When used |
197 | on-line, it can be interrupted once to generate a report without existing | 197 | on-line, it can be interrupted once to generate a report without exiting |
198 | and twice to exit. | 198 | and twice to exit. |
199 | 199 | ||
200 | Simplistically, the script just reads STDIN and counts up events but it | 200 | Simplistically, the script just reads STDIN and counts up events but it |
@@ -212,12 +212,12 @@ also can do more such as | |||
212 | processes, the parent process responsible for creating all the helpers | 212 | processes, the parent process responsible for creating all the helpers |
213 | can be identified | 213 | can be identified |
214 | 214 | ||
215 | 6. Lower-Level Analysis with PCL | 215 | 7. Lower-Level Analysis with PCL |
216 | ================================ | 216 | ================================ |
217 | 217 | ||
218 | There may also be a requirement to identify what functions with a program | 218 | There may also be a requirement to identify what functions within a program |
219 | were generating events within the kernel. To begin this sort of analysis, the | 219 | were generating events within the kernel. To begin this sort of analysis, the |
220 | data must be recorded. At the time of writing, this required root | 220 | data must be recorded. At the time of writing, this required root: |
221 | 221 | ||
222 | $ perf record -c 1 \ | 222 | $ perf record -c 1 \ |
223 | -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ | 223 | -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ |
@@ -253,11 +253,11 @@ perf report. | |||
253 | # (For more details, try: perf report --sort comm,dso,symbol) | 253 | # (For more details, try: perf report --sort comm,dso,symbol) |
254 | # | 254 | # |
255 | 255 | ||
256 | According to this, the vast majority of events occured triggered on events | 256 | According to this, the vast majority of events triggered on events |
257 | within the VDSO. With simple binaries, this will often be the case so lets | 257 | within the VDSO. With simple binaries, this will often be the case so let's |
258 | take a slightly different example. In the course of writing this, it was | 258 | take a slightly different example. In the course of writing this, it was |
259 | noticed that X was generating an insane amount of page allocations so lets look | 259 | noticed that X was generating an insane amount of page allocations so let's look |
260 | at it | 260 | at it: |
261 | 261 | ||
262 | $ perf record -c 1 -f \ | 262 | $ perf record -c 1 -f \ |
263 | -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ | 263 | -e kmem:mm_page_alloc -e kmem:mm_page_free_direct \ |
@@ -280,8 +280,8 @@ This was interrupted after a few seconds and | |||
280 | # (For more details, try: perf report --sort comm,dso,symbol) | 280 | # (For more details, try: perf report --sort comm,dso,symbol) |
281 | # | 281 | # |
282 | 282 | ||
283 | So, almost half of the events are occuring in a library. To get an idea which | 283 | So, almost half of the events are occurring in a library. To get an idea which |
284 | symbol. | 284 | symbol: |
285 | 285 | ||
286 | $ perf report --sort comm,dso,symbol | 286 | $ perf report --sort comm,dso,symbol |
287 | # Samples: 27666 | 287 | # Samples: 27666 |
@@ -297,7 +297,7 @@ symbol. | |||
297 | 0.01% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1 [.] get_fast_path | 297 | 0.01% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1 [.] get_fast_path |
298 | 0.00% Xorg [kernel] [k] ftrace_trace_userstack | 298 | 0.00% Xorg [kernel] [k] ftrace_trace_userstack |
299 | 299 | ||
300 | To see where within the function pixmanFillsse2 things are going wrong | 300 | To see where within the function pixmanFillsse2 things are going wrong: |
301 | 301 | ||
302 | $ perf annotate pixmanFillsse2 | 302 | $ perf annotate pixmanFillsse2 |
303 | [ ... ] | 303 | [ ... ] |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 65793e90d6f6..207466a49f3d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -195,7 +195,7 @@ struct perf_event_attr; | |||
195 | static const struct syscall_metadata __used \ | 195 | static const struct syscall_metadata __used \ |
196 | __attribute__((__aligned__(4))) \ | 196 | __attribute__((__aligned__(4))) \ |
197 | __attribute__((section("__syscalls_metadata"))) \ | 197 | __attribute__((section("__syscalls_metadata"))) \ |
198 | __syscall_meta_##sname = { \ | 198 | __syscall_meta__##sname = { \ |
199 | .name = "sys_"#sname, \ | 199 | .name = "sys_"#sname, \ |
200 | .nb_args = 0, \ | 200 | .nb_args = 0, \ |
201 | .enter_event = &event_enter__##sname, \ | 201 | .enter_event = &event_enter__##sname, \ |
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 73523151a731..c6fe03e902ca 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
@@ -414,7 +414,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ | |||
414 | BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ | 414 | BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ |
415 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ | 415 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ |
416 | offsetof(typeof(field), item), \ | 416 | offsetof(typeof(field), item), \ |
417 | sizeof(field.item), 0, FILTER_OTHER); \ | 417 | sizeof(field.item), \ |
418 | is_signed_type(type), FILTER_OTHER); \ | ||
418 | if (ret) \ | 419 | if (ret) \ |
419 | return ret; | 420 | return ret; |
420 | 421 | ||
@@ -422,8 +423,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ | |||
422 | #define __dynamic_array(type, item, len) \ | 423 | #define __dynamic_array(type, item, len) \ |
423 | ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \ | 424 | ret = trace_define_field(event_call, "__data_loc " #type "[]", #item, \ |
424 | offsetof(typeof(field), __data_loc_##item), \ | 425 | offsetof(typeof(field), __data_loc_##item), \ |
425 | sizeof(field.__data_loc_##item), 0, \ | 426 | sizeof(field.__data_loc_##item), \ |
426 | FILTER_OTHER); | 427 | is_signed_type(type), FILTER_OTHER); |
427 | 428 | ||
428 | #undef __string | 429 | #undef __string |
429 | #define __string(item, src) __dynamic_array(char, item, -1) | 430 | #define __string(item, src) __dynamic_array(char, item, -1) |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index dbcbf6a33a08..50dbd5999588 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/percpu.h> | 40 | #include <linux/percpu.h> |
41 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
42 | #include <linux/init.h> | 42 | #include <linux/init.h> |
43 | #include <linux/cpu.h> | ||
43 | #include <linux/smp.h> | 44 | #include <linux/smp.h> |
44 | 45 | ||
45 | #include <linux/hw_breakpoint.h> | 46 | #include <linux/hw_breakpoint.h> |
@@ -388,7 +389,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, | |||
388 | if (!cpu_events) | 389 | if (!cpu_events) |
389 | return ERR_PTR(-ENOMEM); | 390 | return ERR_PTR(-ENOMEM); |
390 | 391 | ||
391 | for_each_possible_cpu(cpu) { | 392 | get_online_cpus(); |
393 | for_each_online_cpu(cpu) { | ||
392 | pevent = per_cpu_ptr(cpu_events, cpu); | 394 | pevent = per_cpu_ptr(cpu_events, cpu); |
393 | bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); | 395 | bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); |
394 | 396 | ||
@@ -399,18 +401,20 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, | |||
399 | goto fail; | 401 | goto fail; |
400 | } | 402 | } |
401 | } | 403 | } |
404 | put_online_cpus(); | ||
402 | 405 | ||
403 | return cpu_events; | 406 | return cpu_events; |
404 | 407 | ||
405 | fail: | 408 | fail: |
406 | for_each_possible_cpu(cpu) { | 409 | for_each_online_cpu(cpu) { |
407 | pevent = per_cpu_ptr(cpu_events, cpu); | 410 | pevent = per_cpu_ptr(cpu_events, cpu); |
408 | if (IS_ERR(*pevent)) | 411 | if (IS_ERR(*pevent)) |
409 | break; | 412 | break; |
410 | unregister_hw_breakpoint(*pevent); | 413 | unregister_hw_breakpoint(*pevent); |
411 | } | 414 | } |
415 | put_online_cpus(); | ||
416 | |||
412 | free_percpu(cpu_events); | 417 | free_percpu(cpu_events); |
413 | /* return the error if any */ | ||
414 | return ERR_PTR(err); | 418 | return ERR_PTR(err); |
415 | } | 419 | } |
416 | EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); | 420 | EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d006554888dc..6c22d8a2f289 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -12,17 +12,17 @@ config NOP_TRACER | |||
12 | config HAVE_FTRACE_NMI_ENTER | 12 | config HAVE_FTRACE_NMI_ENTER |
13 | bool | 13 | bool |
14 | help | 14 | help |
15 | See Documentation/trace/ftrace-implementation.txt | 15 | See Documentation/trace/ftrace-design.txt |
16 | 16 | ||
17 | config HAVE_FUNCTION_TRACER | 17 | config HAVE_FUNCTION_TRACER |
18 | bool | 18 | bool |
19 | help | 19 | help |
20 | See Documentation/trace/ftrace-implementation.txt | 20 | See Documentation/trace/ftrace-design.txt |
21 | 21 | ||
22 | config HAVE_FUNCTION_GRAPH_TRACER | 22 | config HAVE_FUNCTION_GRAPH_TRACER |
23 | bool | 23 | bool |
24 | help | 24 | help |
25 | See Documentation/trace/ftrace-implementation.txt | 25 | See Documentation/trace/ftrace-design.txt |
26 | 26 | ||
27 | config HAVE_FUNCTION_GRAPH_FP_TEST | 27 | config HAVE_FUNCTION_GRAPH_FP_TEST |
28 | bool | 28 | bool |
@@ -34,17 +34,17 @@ config HAVE_FUNCTION_GRAPH_FP_TEST | |||
34 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST | 34 | config HAVE_FUNCTION_TRACE_MCOUNT_TEST |
35 | bool | 35 | bool |
36 | help | 36 | help |
37 | See Documentation/trace/ftrace-implementation.txt | 37 | See Documentation/trace/ftrace-design.txt |
38 | 38 | ||
39 | config HAVE_DYNAMIC_FTRACE | 39 | config HAVE_DYNAMIC_FTRACE |
40 | bool | 40 | bool |
41 | help | 41 | help |
42 | See Documentation/trace/ftrace-implementation.txt | 42 | See Documentation/trace/ftrace-design.txt |
43 | 43 | ||
44 | config HAVE_FTRACE_MCOUNT_RECORD | 44 | config HAVE_FTRACE_MCOUNT_RECORD |
45 | bool | 45 | bool |
46 | help | 46 | help |
47 | See Documentation/trace/ftrace-implementation.txt | 47 | See Documentation/trace/ftrace-design.txt |
48 | 48 | ||
49 | config HAVE_HW_BRANCH_TRACER | 49 | config HAVE_HW_BRANCH_TRACER |
50 | bool | 50 | bool |
@@ -52,7 +52,7 @@ config HAVE_HW_BRANCH_TRACER | |||
52 | config HAVE_SYSCALL_TRACEPOINTS | 52 | config HAVE_SYSCALL_TRACEPOINTS |
53 | bool | 53 | bool |
54 | help | 54 | help |
55 | See Documentation/trace/ftrace-implementation.txt | 55 | See Documentation/trace/ftrace-design.txt |
56 | 56 | ||
57 | config TRACER_MAX_TRACE | 57 | config TRACER_MAX_TRACE |
58 | bool | 58 | bool |
@@ -83,7 +83,7 @@ config RING_BUFFER_ALLOW_SWAP | |||
83 | # This allows those options to appear when no other tracer is selected. But the | 83 | # This allows those options to appear when no other tracer is selected. But the |
84 | # options do not appear when something else selects it. We need the two options | 84 | # options do not appear when something else selects it. We need the two options |
85 | # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the | 85 | # GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the |
86 | # hidding of the automatic options. | 86 | # hiding of the automatic options. |
87 | 87 | ||
88 | config TRACING | 88 | config TRACING |
89 | bool | 89 | bool |
@@ -119,7 +119,7 @@ menuconfig FTRACE | |||
119 | bool "Tracers" | 119 | bool "Tracers" |
120 | default y if DEBUG_KERNEL | 120 | default y if DEBUG_KERNEL |
121 | help | 121 | help |
122 | Enable the kernel tracing infrastructure. | 122 | Enable the kernel tracing infrastructure. |
123 | 123 | ||
124 | if FTRACE | 124 | if FTRACE |
125 | 125 | ||
@@ -133,7 +133,7 @@ config FUNCTION_TRACER | |||
133 | help | 133 | help |
134 | Enable the kernel to trace every kernel function. This is done | 134 | Enable the kernel to trace every kernel function. This is done |
135 | by using a compiler feature to insert a small, 5-byte No-Operation | 135 | by using a compiler feature to insert a small, 5-byte No-Operation |
136 | instruction to the beginning of every kernel function, which NOP | 136 | instruction at the beginning of every kernel function, which NOP |
137 | sequence is then dynamically patched into a tracer call when | 137 | sequence is then dynamically patched into a tracer call when |
138 | tracing is enabled by the administrator. If it's runtime disabled | 138 | tracing is enabled by the administrator. If it's runtime disabled |
139 | (the bootup default), then the overhead of the instructions is very | 139 | (the bootup default), then the overhead of the instructions is very |
@@ -150,7 +150,7 @@ config FUNCTION_GRAPH_TRACER | |||
150 | and its entry. | 150 | and its entry. |
151 | Its first purpose is to trace the duration of functions and | 151 | Its first purpose is to trace the duration of functions and |
152 | draw a call graph for each thread with some information like | 152 | draw a call graph for each thread with some information like |
153 | the return value. This is done by setting the current return | 153 | the return value. This is done by setting the current return |
154 | address on the current task structure into a stack of calls. | 154 | address on the current task structure into a stack of calls. |
155 | 155 | ||
156 | 156 | ||
@@ -173,7 +173,7 @@ config IRQSOFF_TRACER | |||
173 | 173 | ||
174 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency | 174 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency |
175 | 175 | ||
176 | (Note that kernel size and overhead increases with this option | 176 | (Note that kernel size and overhead increase with this option |
177 | enabled. This option and the preempt-off timing option can be | 177 | enabled. This option and the preempt-off timing option can be |
178 | used together or separately.) | 178 | used together or separately.) |
179 | 179 | ||
@@ -186,7 +186,7 @@ config PREEMPT_TRACER | |||
186 | select TRACER_MAX_TRACE | 186 | select TRACER_MAX_TRACE |
187 | select RING_BUFFER_ALLOW_SWAP | 187 | select RING_BUFFER_ALLOW_SWAP |
188 | help | 188 | help |
189 | This option measures the time spent in preemption off critical | 189 | This option measures the time spent in preemption-off critical |
190 | sections, with microsecond accuracy. | 190 | sections, with microsecond accuracy. |
191 | 191 | ||
192 | The default measurement method is a maximum search, which is | 192 | The default measurement method is a maximum search, which is |
@@ -195,7 +195,7 @@ config PREEMPT_TRACER | |||
195 | 195 | ||
196 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency | 196 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency |
197 | 197 | ||
198 | (Note that kernel size and overhead increases with this option | 198 | (Note that kernel size and overhead increase with this option |
199 | enabled. This option and the irqs-off timing option can be | 199 | enabled. This option and the irqs-off timing option can be |
200 | used together or separately.) | 200 | used together or separately.) |
201 | 201 | ||
@@ -222,7 +222,7 @@ config ENABLE_DEFAULT_TRACERS | |||
222 | depends on !GENERIC_TRACER | 222 | depends on !GENERIC_TRACER |
223 | select TRACING | 223 | select TRACING |
224 | help | 224 | help |
225 | This tracer hooks to various trace points in the kernel | 225 | This tracer hooks to various trace points in the kernel, |
226 | allowing the user to pick and choose which trace point they | 226 | allowing the user to pick and choose which trace point they |
227 | want to trace. It also includes the sched_switch tracer plugin. | 227 | want to trace. It also includes the sched_switch tracer plugin. |
228 | 228 | ||
@@ -265,19 +265,19 @@ choice | |||
265 | The likely/unlikely profiler only looks at the conditions that | 265 | The likely/unlikely profiler only looks at the conditions that |
266 | are annotated with a likely or unlikely macro. | 266 | are annotated with a likely or unlikely macro. |
267 | 267 | ||
268 | The "all branch" profiler will profile every if statement in the | 268 | The "all branch" profiler will profile every if-statement in the |
269 | kernel. This profiler will also enable the likely/unlikely | 269 | kernel. This profiler will also enable the likely/unlikely |
270 | profiler as well. | 270 | profiler. |
271 | 271 | ||
272 | Either of the above profilers add a bit of overhead to the system. | 272 | Either of the above profilers adds a bit of overhead to the system. |
273 | If unsure choose "No branch profiling". | 273 | If unsure, choose "No branch profiling". |
274 | 274 | ||
275 | config BRANCH_PROFILE_NONE | 275 | config BRANCH_PROFILE_NONE |
276 | bool "No branch profiling" | 276 | bool "No branch profiling" |
277 | help | 277 | help |
278 | No branch profiling. Branch profiling adds a bit of overhead. | 278 | No branch profiling. Branch profiling adds a bit of overhead. |
279 | Only enable it if you want to analyse the branching behavior. | 279 | Only enable it if you want to analyse the branching behavior. |
280 | Otherwise keep it disabled. | 280 | Otherwise keep it disabled. |
281 | 281 | ||
282 | config PROFILE_ANNOTATED_BRANCHES | 282 | config PROFILE_ANNOTATED_BRANCHES |
283 | bool "Trace likely/unlikely profiler" | 283 | bool "Trace likely/unlikely profiler" |
@@ -288,7 +288,7 @@ config PROFILE_ANNOTATED_BRANCHES | |||
288 | 288 | ||
289 | /sys/kernel/debug/tracing/profile_annotated_branch | 289 | /sys/kernel/debug/tracing/profile_annotated_branch |
290 | 290 | ||
291 | Note: this will add a significant overhead, only turn this | 291 | Note: this will add a significant overhead; only turn this |
292 | on if you need to profile the system's use of these macros. | 292 | on if you need to profile the system's use of these macros. |
293 | 293 | ||
294 | config PROFILE_ALL_BRANCHES | 294 | config PROFILE_ALL_BRANCHES |
@@ -305,7 +305,7 @@ config PROFILE_ALL_BRANCHES | |||
305 | 305 | ||
306 | This configuration, when enabled, will impose a great overhead | 306 | This configuration, when enabled, will impose a great overhead |
307 | on the system. This should only be enabled when the system | 307 | on the system. This should only be enabled when the system |
308 | is to be analyzed | 308 | is to be analyzed in much detail. |
309 | endchoice | 309 | endchoice |
310 | 310 | ||
311 | config TRACING_BRANCHES | 311 | config TRACING_BRANCHES |
@@ -335,7 +335,7 @@ config POWER_TRACER | |||
335 | depends on X86 | 335 | depends on X86 |
336 | select GENERIC_TRACER | 336 | select GENERIC_TRACER |
337 | help | 337 | help |
338 | This tracer helps developers to analyze and optimize the kernels | 338 | This tracer helps developers to analyze and optimize the kernel's |
339 | power management decisions, specifically the C-state and P-state | 339 | power management decisions, specifically the C-state and P-state |
340 | behavior. | 340 | behavior. |
341 | 341 | ||
@@ -391,14 +391,14 @@ config HW_BRANCH_TRACER | |||
391 | select GENERIC_TRACER | 391 | select GENERIC_TRACER |
392 | help | 392 | help |
393 | This tracer records all branches on the system in a circular | 393 | This tracer records all branches on the system in a circular |
394 | buffer giving access to the last N branches for each cpu. | 394 | buffer, giving access to the last N branches for each cpu. |
395 | 395 | ||
396 | config KMEMTRACE | 396 | config KMEMTRACE |
397 | bool "Trace SLAB allocations" | 397 | bool "Trace SLAB allocations" |
398 | select GENERIC_TRACER | 398 | select GENERIC_TRACER |
399 | help | 399 | help |
400 | kmemtrace provides tracing for slab allocator functions, such as | 400 | kmemtrace provides tracing for slab allocator functions, such as |
401 | kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected | 401 | kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected |
402 | data is then fed to the userspace application in order to analyse | 402 | data is then fed to the userspace application in order to analyse |
403 | allocation hotspots, internal fragmentation and so on, making it | 403 | allocation hotspots, internal fragmentation and so on, making it |
404 | possible to see how well an allocator performs, as well as debug | 404 | possible to see how well an allocator performs, as well as debug |
@@ -417,15 +417,15 @@ config WORKQUEUE_TRACER | |||
417 | bool "Trace workqueues" | 417 | bool "Trace workqueues" |
418 | select GENERIC_TRACER | 418 | select GENERIC_TRACER |
419 | help | 419 | help |
420 | The workqueue tracer provides some statistical informations | 420 | The workqueue tracer provides some statistical information |
421 | about each cpu workqueue thread such as the number of the | 421 | about each cpu workqueue thread such as the number of the |
422 | works inserted and executed since their creation. It can help | 422 | works inserted and executed since their creation. It can help |
423 | to evaluate the amount of work each of them have to perform. | 423 | to evaluate the amount of work each of them has to perform. |
424 | For example it can help a developer to decide whether he should | 424 | For example it can help a developer to decide whether he should |
425 | choose a per cpu workqueue instead of a singlethreaded one. | 425 | choose a per-cpu workqueue instead of a singlethreaded one. |
426 | 426 | ||
427 | config BLK_DEV_IO_TRACE | 427 | config BLK_DEV_IO_TRACE |
428 | bool "Support for tracing block io actions" | 428 | bool "Support for tracing block IO actions" |
429 | depends on SYSFS | 429 | depends on SYSFS |
430 | depends on BLOCK | 430 | depends on BLOCK |
431 | select RELAY | 431 | select RELAY |
@@ -456,15 +456,15 @@ config KPROBE_EVENT | |||
456 | select TRACING | 456 | select TRACING |
457 | default y | 457 | default y |
458 | help | 458 | help |
459 | This allows the user to add tracing events (similar to tracepoints) on the fly | 459 | This allows the user to add tracing events (similar to tracepoints) |
460 | via the ftrace interface. See Documentation/trace/kprobetrace.txt | 460 | on the fly via the ftrace interface. See |
461 | for more details. | 461 | Documentation/trace/kprobetrace.txt for more details. |
462 | 462 | ||
463 | Those events can be inserted wherever kprobes can probe, and record | 463 | Those events can be inserted wherever kprobes can probe, and record |
464 | various register and memory values. | 464 | various register and memory values. |
465 | 465 | ||
466 | This option is also required by perf-probe subcommand of perf tools. If | 466 | This option is also required by perf-probe subcommand of perf tools. |
467 | you want to use perf tools, this option is strongly recommended. | 467 | If you want to use perf tools, this option is strongly recommended. |
468 | 468 | ||
469 | config DYNAMIC_FTRACE | 469 | config DYNAMIC_FTRACE |
470 | bool "enable/disable ftrace tracepoints dynamically" | 470 | bool "enable/disable ftrace tracepoints dynamically" |
@@ -472,32 +472,32 @@ config DYNAMIC_FTRACE | |||
472 | depends on HAVE_DYNAMIC_FTRACE | 472 | depends on HAVE_DYNAMIC_FTRACE |
473 | default y | 473 | default y |
474 | help | 474 | help |
475 | This option will modify all the calls to ftrace dynamically | 475 | This option will modify all the calls to ftrace dynamically |
476 | (will patch them out of the binary image and replaces them | 476 | (will patch them out of the binary image and replace them |
477 | with a No-Op instruction) as they are called. A table is | 477 | with a No-Op instruction) as they are called. A table is |
478 | created to dynamically enable them again. | 478 | created to dynamically enable them again. |
479 | 479 | ||
480 | This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise | 480 | This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but |
481 | has native performance as long as no tracing is active. | 481 | otherwise has native performance as long as no tracing is active. |
482 | 482 | ||
483 | The changes to the code are done by a kernel thread that | 483 | The changes to the code are done by a kernel thread that |
484 | wakes up once a second and checks to see if any ftrace calls | 484 | wakes up once a second and checks to see if any ftrace calls |
485 | were made. If so, it runs stop_machine (stops all CPUS) | 485 | were made. If so, it runs stop_machine (stops all CPUS) |
486 | and modifies the code to jump over the call to ftrace. | 486 | and modifies the code to jump over the call to ftrace. |
487 | 487 | ||
488 | config FUNCTION_PROFILER | 488 | config FUNCTION_PROFILER |
489 | bool "Kernel function profiler" | 489 | bool "Kernel function profiler" |
490 | depends on FUNCTION_TRACER | 490 | depends on FUNCTION_TRACER |
491 | default n | 491 | default n |
492 | help | 492 | help |
493 | This option enables the kernel function profiler. A file is created | 493 | This option enables the kernel function profiler. A file is created |
494 | in debugfs called function_profile_enabled which defaults to zero. | 494 | in debugfs called function_profile_enabled which defaults to zero. |
495 | When a 1 is echoed into this file profiling begins, and when a | 495 | When a 1 is echoed into this file profiling begins, and when a |
496 | zero is entered, profiling stops. A file in the trace_stats | 496 | zero is entered, profiling stops. A "functions" file is created in |
497 | directory called functions, that show the list of functions that | 497 | the trace_stats directory; this file shows the list of functions that |
498 | have been hit and their counters. | 498 | have been hit and their counters. |
499 | 499 | ||
500 | If in doubt, say N | 500 | If in doubt, say N. |
501 | 501 | ||
502 | config FTRACE_MCOUNT_RECORD | 502 | config FTRACE_MCOUNT_RECORD |
503 | def_bool y | 503 | def_bool y |
@@ -556,8 +556,8 @@ config RING_BUFFER_BENCHMARK | |||
556 | tristate "Ring buffer benchmark stress tester" | 556 | tristate "Ring buffer benchmark stress tester" |
557 | depends on RING_BUFFER | 557 | depends on RING_BUFFER |
558 | help | 558 | help |
559 | This option creates a test to stress the ring buffer and bench mark it. | 559 | This option creates a test to stress the ring buffer and benchmark it. |
560 | It creates its own ring buffer such that it will not interfer with | 560 | It creates its own ring buffer such that it will not interfere with |
561 | any other users of the ring buffer (such as ftrace). It then creates | 561 | any other users of the ring buffer (such as ftrace). It then creates |
562 | a producer and consumer that will run for 10 seconds and sleep for | 562 | a producer and consumer that will run for 10 seconds and sleep for |
563 | 10 seconds. Each interval it will print out the number of events | 563 | 10 seconds. Each interval it will print out the number of events |
@@ -566,7 +566,7 @@ config RING_BUFFER_BENCHMARK | |||
566 | It does not disable interrupts or raise its priority, so it may be | 566 | It does not disable interrupts or raise its priority, so it may be |
567 | affected by processes that are running. | 567 | affected by processes that are running. |
568 | 568 | ||
569 | If unsure, say N | 569 | If unsure, say N. |
570 | 570 | ||
571 | endif # FTRACE | 571 | endif # FTRACE |
572 | 572 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8b9f20ab8eed..0df1b0f2cb9e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -3949,7 +3949,7 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
3949 | if (!!(topt->flags->val & topt->opt->bit) != val) { | 3949 | if (!!(topt->flags->val & topt->opt->bit) != val) { |
3950 | mutex_lock(&trace_types_lock); | 3950 | mutex_lock(&trace_types_lock); |
3951 | ret = __set_tracer_option(current_trace, topt->flags, | 3951 | ret = __set_tracer_option(current_trace, topt->flags, |
3952 | topt->opt, val); | 3952 | topt->opt, !val); |
3953 | mutex_unlock(&trace_types_lock); | 3953 | mutex_unlock(&trace_types_lock); |
3954 | if (ret) | 3954 | if (ret) |
3955 | return ret; | 3955 | return ret; |
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 458e5bfe26d0..d4fa5dc1ee4e 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
@@ -158,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ | |||
158 | BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ | 158 | BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ |
159 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ | 159 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ |
160 | offsetof(typeof(field), item), \ | 160 | offsetof(typeof(field), item), \ |
161 | sizeof(field.item), 0, FILTER_OTHER); \ | 161 | sizeof(field.item), \ |
162 | is_signed_type(type), FILTER_OTHER); \ | ||
162 | if (ret) \ | 163 | if (ret) \ |
163 | return ret; | 164 | return ret; |
164 | 165 | ||
@@ -168,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ | |||
168 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ | 169 | ret = trace_define_field(event_call, #type "[" #len "]", #item, \ |
169 | offsetof(typeof(field), \ | 170 | offsetof(typeof(field), \ |
170 | container.item), \ | 171 | container.item), \ |
171 | sizeof(field.container.item), 0, \ | 172 | sizeof(field.container.item), \ |
172 | FILTER_OTHER); \ | 173 | is_signed_type(type), FILTER_OTHER); \ |
173 | if (ret) \ | 174 | if (ret) \ |
174 | return ret; | 175 | return ret; |
175 | 176 | ||
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 375f81a568dc..6ea90c0e2c96 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -1201,10 +1201,11 @@ static int __probe_event_show_format(struct trace_seq *s, | |||
1201 | #undef SHOW_FIELD | 1201 | #undef SHOW_FIELD |
1202 | #define SHOW_FIELD(type, item, name) \ | 1202 | #define SHOW_FIELD(type, item, name) \ |
1203 | do { \ | 1203 | do { \ |
1204 | ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \ | 1204 | ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \ |
1205 | "offset:%u;\tsize:%u;\n", name, \ | 1205 | "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\ |
1206 | (unsigned int)offsetof(typeof(field), item),\ | 1206 | (unsigned int)offsetof(typeof(field), item),\ |
1207 | (unsigned int)sizeof(type)); \ | 1207 | (unsigned int)sizeof(type), \ |
1208 | is_signed_type(type)); \ | ||
1208 | if (!ret) \ | 1209 | if (!ret) \ |
1209 | return 0; \ | 1210 | return 0; \ |
1210 | } while (0) | 1211 | } while (0) |
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index faf37fa4408c..94103cdcf9d8 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c | |||
@@ -26,12 +26,13 @@ | |||
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | 27 | ||
28 | #include "trace_output.h" | 28 | #include "trace_output.h" |
29 | #include "trace_stat.h" | ||
30 | #include "trace.h" | 29 | #include "trace.h" |
31 | 30 | ||
32 | #include <linux/hw_breakpoint.h> | 31 | #include <linux/hw_breakpoint.h> |
33 | #include <asm/hw_breakpoint.h> | 32 | #include <asm/hw_breakpoint.h> |
34 | 33 | ||
34 | #include <asm/atomic.h> | ||
35 | |||
35 | /* | 36 | /* |
36 | * For now, let us restrict the no. of symbols traced simultaneously to number | 37 | * For now, let us restrict the no. of symbols traced simultaneously to number |
37 | * of available hardware breakpoint registers. | 38 | * of available hardware breakpoint registers. |
@@ -44,7 +45,7 @@ struct trace_ksym { | |||
44 | struct perf_event **ksym_hbp; | 45 | struct perf_event **ksym_hbp; |
45 | struct perf_event_attr attr; | 46 | struct perf_event_attr attr; |
46 | #ifdef CONFIG_PROFILE_KSYM_TRACER | 47 | #ifdef CONFIG_PROFILE_KSYM_TRACER |
47 | unsigned long counter; | 48 | atomic64_t counter; |
48 | #endif | 49 | #endif |
49 | struct hlist_node ksym_hlist; | 50 | struct hlist_node ksym_hlist; |
50 | }; | 51 | }; |
@@ -69,9 +70,8 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) | |||
69 | 70 | ||
70 | rcu_read_lock(); | 71 | rcu_read_lock(); |
71 | hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { | 72 | hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { |
72 | if ((entry->attr.bp_addr == hbp_hit_addr) && | 73 | if (entry->attr.bp_addr == hbp_hit_addr) { |
73 | (entry->counter <= MAX_UL_INT)) { | 74 | atomic64_inc(&entry->counter); |
74 | entry->counter++; | ||
75 | break; | 75 | break; |
76 | } | 76 | } |
77 | } | 77 | } |
@@ -197,7 +197,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) | |||
197 | entry->attr.bp_addr = addr; | 197 | entry->attr.bp_addr = addr; |
198 | entry->attr.bp_len = HW_BREAKPOINT_LEN_4; | 198 | entry->attr.bp_len = HW_BREAKPOINT_LEN_4; |
199 | 199 | ||
200 | ret = -EAGAIN; | ||
201 | entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, | 200 | entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, |
202 | ksym_hbp_handler); | 201 | ksym_hbp_handler); |
203 | 202 | ||
@@ -300,8 +299,8 @@ static ssize_t ksym_trace_filter_write(struct file *file, | |||
300 | * 2: echo 0 > ksym_trace_filter | 299 | * 2: echo 0 > ksym_trace_filter |
301 | * 3: echo "*:---" > ksym_trace_filter | 300 | * 3: echo "*:---" > ksym_trace_filter |
302 | */ | 301 | */ |
303 | if (!buf[0] || !strcmp(buf, "0") || | 302 | if (!input_string[0] || !strcmp(input_string, "0") || |
304 | !strcmp(buf, "*:---")) { | 303 | !strcmp(input_string, "*:---")) { |
305 | __ksym_trace_reset(); | 304 | __ksym_trace_reset(); |
306 | ret = 0; | 305 | ret = 0; |
307 | goto out; | 306 | goto out; |
@@ -444,102 +443,77 @@ struct tracer ksym_tracer __read_mostly = | |||
444 | .print_line = ksym_trace_output | 443 | .print_line = ksym_trace_output |
445 | }; | 444 | }; |
446 | 445 | ||
447 | __init static int init_ksym_trace(void) | ||
448 | { | ||
449 | struct dentry *d_tracer; | ||
450 | struct dentry *entry; | ||
451 | |||
452 | d_tracer = tracing_init_dentry(); | ||
453 | ksym_filter_entry_count = 0; | ||
454 | |||
455 | entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer, | ||
456 | NULL, &ksym_tracing_fops); | ||
457 | if (!entry) | ||
458 | pr_warning("Could not create debugfs " | ||
459 | "'ksym_trace_filter' file\n"); | ||
460 | |||
461 | return register_tracer(&ksym_tracer); | ||
462 | } | ||
463 | device_initcall(init_ksym_trace); | ||
464 | |||
465 | |||
466 | #ifdef CONFIG_PROFILE_KSYM_TRACER | 446 | #ifdef CONFIG_PROFILE_KSYM_TRACER |
467 | static int ksym_tracer_stat_headers(struct seq_file *m) | 447 | static int ksym_profile_show(struct seq_file *m, void *v) |
468 | { | 448 | { |
449 | struct hlist_node *node; | ||
450 | struct trace_ksym *entry; | ||
451 | int access_type = 0; | ||
452 | char fn_name[KSYM_NAME_LEN]; | ||
453 | |||
469 | seq_puts(m, " Access Type "); | 454 | seq_puts(m, " Access Type "); |
470 | seq_puts(m, " Symbol Counter\n"); | 455 | seq_puts(m, " Symbol Counter\n"); |
471 | seq_puts(m, " ----------- "); | 456 | seq_puts(m, " ----------- "); |
472 | seq_puts(m, " ------ -------\n"); | 457 | seq_puts(m, " ------ -------\n"); |
473 | return 0; | ||
474 | } | ||
475 | 458 | ||
476 | static int ksym_tracer_stat_show(struct seq_file *m, void *v) | 459 | rcu_read_lock(); |
477 | { | 460 | hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { |
478 | struct hlist_node *stat = v; | ||
479 | struct trace_ksym *entry; | ||
480 | int access_type = 0; | ||
481 | char fn_name[KSYM_NAME_LEN]; | ||
482 | 461 | ||
483 | entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); | 462 | access_type = entry->attr.bp_type; |
484 | 463 | ||
485 | access_type = entry->attr.bp_type; | 464 | switch (access_type) { |
465 | case HW_BREAKPOINT_R: | ||
466 | seq_puts(m, " R "); | ||
467 | break; | ||
468 | case HW_BREAKPOINT_W: | ||
469 | seq_puts(m, " W "); | ||
470 | break; | ||
471 | case HW_BREAKPOINT_R | HW_BREAKPOINT_W: | ||
472 | seq_puts(m, " RW "); | ||
473 | break; | ||
474 | default: | ||
475 | seq_puts(m, " NA "); | ||
476 | } | ||
486 | 477 | ||
487 | switch (access_type) { | 478 | if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0) |
488 | case HW_BREAKPOINT_R: | 479 | seq_printf(m, " %-36s", fn_name); |
489 | seq_puts(m, " R "); | 480 | else |
490 | break; | 481 | seq_printf(m, " %-36s", "<NA>"); |
491 | case HW_BREAKPOINT_W: | 482 | seq_printf(m, " %15llu\n", |
492 | seq_puts(m, " W "); | 483 | (unsigned long long)atomic64_read(&entry->counter)); |
493 | break; | ||
494 | case HW_BREAKPOINT_R | HW_BREAKPOINT_W: | ||
495 | seq_puts(m, " RW "); | ||
496 | break; | ||
497 | default: | ||
498 | seq_puts(m, " NA "); | ||
499 | } | 484 | } |
500 | 485 | rcu_read_unlock(); | |
501 | if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0) | ||
502 | seq_printf(m, " %-36s", fn_name); | ||
503 | else | ||
504 | seq_printf(m, " %-36s", "<NA>"); | ||
505 | seq_printf(m, " %15lu\n", entry->counter); | ||
506 | 486 | ||
507 | return 0; | 487 | return 0; |
508 | } | 488 | } |
509 | 489 | ||
510 | static void *ksym_tracer_stat_start(struct tracer_stat *trace) | 490 | static int ksym_profile_open(struct inode *node, struct file *file) |
511 | { | 491 | { |
512 | return ksym_filter_head.first; | 492 | return single_open(file, ksym_profile_show, NULL); |
513 | } | ||
514 | |||
515 | static void * | ||
516 | ksym_tracer_stat_next(void *v, int idx) | ||
517 | { | ||
518 | struct hlist_node *stat = v; | ||
519 | |||
520 | return stat->next; | ||
521 | } | 493 | } |
522 | 494 | ||
523 | static struct tracer_stat ksym_tracer_stats = { | 495 | static const struct file_operations ksym_profile_fops = { |
524 | .name = "ksym_tracer", | 496 | .open = ksym_profile_open, |
525 | .stat_start = ksym_tracer_stat_start, | 497 | .read = seq_read, |
526 | .stat_next = ksym_tracer_stat_next, | 498 | .llseek = seq_lseek, |
527 | .stat_headers = ksym_tracer_stat_headers, | 499 | .release = single_release, |
528 | .stat_show = ksym_tracer_stat_show | ||
529 | }; | 500 | }; |
501 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||
530 | 502 | ||
531 | __init static int ksym_tracer_stat_init(void) | 503 | __init static int init_ksym_trace(void) |
532 | { | 504 | { |
533 | int ret; | 505 | struct dentry *d_tracer; |
534 | 506 | ||
535 | ret = register_stat_tracer(&ksym_tracer_stats); | 507 | d_tracer = tracing_init_dentry(); |
536 | if (ret) { | ||
537 | printk(KERN_WARNING "Warning: could not register " | ||
538 | "ksym tracer stats\n"); | ||
539 | return 1; | ||
540 | } | ||
541 | 508 | ||
542 | return 0; | 509 | trace_create_file("ksym_trace_filter", 0644, d_tracer, |
510 | NULL, &ksym_tracing_fops); | ||
511 | |||
512 | #ifdef CONFIG_PROFILE_KSYM_TRACER | ||
513 | trace_create_file("ksym_profile", 0444, d_tracer, | ||
514 | NULL, &ksym_profile_fops); | ||
515 | #endif | ||
516 | |||
517 | return register_tracer(&ksym_tracer); | ||
543 | } | 518 | } |
544 | fs_initcall(ksym_tracer_stat_init); | 519 | device_initcall(init_ksym_trace); |
545 | #endif /* CONFIG_PROFILE_KSYM_TRACER */ | ||