diff options
-rw-r--r-- | Documentation/ftrace.txt | 171 | ||||
-rw-r--r-- | kernel/trace/trace.c | 2 |
2 files changed, 74 insertions, 99 deletions
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index ea5a827395dd..9cc4d685dde5 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt | |||
@@ -8,7 +8,7 @@ Copyright 2008 Red Hat Inc. | |||
8 | Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton, | 8 | Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton, |
9 | John Kacur, and David Teigland. | 9 | John Kacur, and David Teigland. |
10 | 10 | ||
11 | Written for: 2.6.27-rc1 | 11 | Written for: 2.6.28-rc2 |
12 | 12 | ||
13 | Introduction | 13 | Introduction |
14 | ------------ | 14 | ------------ |
@@ -50,26 +50,26 @@ of ftrace. Here is a list of some of the key files: | |||
50 | 50 | ||
51 | Note: all time values are in microseconds. | 51 | Note: all time values are in microseconds. |
52 | 52 | ||
53 | current_tracer : This is used to set or display the current tracer | 53 | current_tracer: This is used to set or display the current tracer |
54 | that is configured. | 54 | that is configured. |
55 | 55 | ||
56 | available_tracers : This holds the different types of tracers that | 56 | available_tracers: This holds the different types of tracers that |
57 | have been compiled into the kernel. The tracers | 57 | have been compiled into the kernel. The tracers |
58 | listed here can be configured by echoing their name | 58 | listed here can be configured by echoing their name |
59 | into current_tracer. | 59 | into current_tracer. |
60 | 60 | ||
61 | tracing_enabled : This sets or displays whether the current_tracer | 61 | tracing_enabled: This sets or displays whether the current_tracer |
62 | is activated and tracing or not. Echo 0 into this | 62 | is activated and tracing or not. Echo 0 into this |
63 | file to disable the tracer or 1 to enable it. | 63 | file to disable the tracer or 1 to enable it. |
64 | 64 | ||
65 | trace : This file holds the output of the trace in a human readable | 65 | trace: This file holds the output of the trace in a human readable |
66 | format (described below). | 66 | format (described below). |
67 | 67 | ||
68 | latency_trace : This file shows the same trace but the information | 68 | latency_trace: This file shows the same trace but the information |
69 | is organized more to display possible latencies | 69 | is organized more to display possible latencies |
70 | in the system (described below). | 70 | in the system (described below). |
71 | 71 | ||
72 | trace_pipe : The output is the same as the "trace" file but this | 72 | trace_pipe: The output is the same as the "trace" file but this |
73 | file is meant to be streamed with live tracing. | 73 | file is meant to be streamed with live tracing. |
74 | Reads from this file will block until new data | 74 | Reads from this file will block until new data |
75 | is retrieved. Unlike the "trace" and "latency_trace" | 75 | is retrieved. Unlike the "trace" and "latency_trace" |
@@ -82,11 +82,11 @@ of ftrace. Here is a list of some of the key files: | |||
82 | tracer is not adding more data, they will display | 82 | tracer is not adding more data, they will display |
83 | the same information every time they are read. | 83 | the same information every time they are read. |
84 | 84 | ||
85 | iter_ctrl : This file lets the user control the amount of data | 85 | iter_ctrl: This file lets the user control the amount of data |
86 | that is displayed in one of the above output | 86 | that is displayed in one of the above output |
87 | files. | 87 | files. |
88 | 88 | ||
89 | trace_max_latency : Some of the tracers record the max latency. | 89 | trace_max_latency: Some of the tracers record the max latency. |
90 | For example, the time interrupts are disabled. | 90 | For example, the time interrupts are disabled. |
91 | This time is saved in this file. The max trace | 91 | This time is saved in this file. The max trace |
92 | will also be stored, and displayed by either | 92 | will also be stored, and displayed by either |
@@ -94,29 +94,26 @@ of ftrace. Here is a list of some of the key files: | |||
94 | only be recorded if the latency is greater than | 94 | only be recorded if the latency is greater than |
95 | the value in this file. (in microseconds) | 95 | the value in this file. (in microseconds) |
96 | 96 | ||
97 | trace_entries : This sets or displays the number of trace | 97 | trace_entries: This sets or displays the number of bytes each CPU |
98 | entries each CPU buffer can hold. The tracer buffers | 98 | buffer can hold. The tracer buffers are the same size |
99 | are the same size for each CPU. The displayed number | 99 | for each CPU. The displayed number is the size of the |
100 | is the size of the CPU buffer and not total size. The | 100 | CPU buffer and not total size of all buffers. The |
101 | trace buffers are allocated in pages (blocks of memory | 101 | trace buffers are allocated in pages (blocks of memory |
102 | that the kernel uses for allocation, usually 4 KB in size). | 102 | that the kernel uses for allocation, usually 4 KB in size). |
103 | Since each entry is smaller than a page, if the last | 103 | If the last page allocated has room for more bytes |
104 | allocated page has room for more entries than were | 104 | than requested, the rest of the page will be used, |
105 | requested, the rest of the page is used to allocate | 105 | making the actual allocation bigger than requested. |
106 | entries. | 106 | (Note, the size may not be a multiple of the page size due |
107 | to buffer managment overhead.) | ||
107 | 108 | ||
108 | This can only be updated when the current_tracer | 109 | This can only be updated when the current_tracer |
109 | is set to "none". | 110 | is set to "nop". |
110 | 111 | ||
111 | NOTE: It is planned on changing the allocated buffers | 112 | tracing_cpumask: This is a mask that lets the user only trace |
112 | from being the number of possible CPUS to | ||
113 | the number of online CPUS. | ||
114 | |||
115 | tracing_cpumask : This is a mask that lets the user only trace | ||
116 | on specified CPUS. The format is a hex string | 113 | on specified CPUS. The format is a hex string |
117 | representing the CPUS. | 114 | representing the CPUS. |
118 | 115 | ||
119 | set_ftrace_filter : When dynamic ftrace is configured in (see the | 116 | set_ftrace_filter: When dynamic ftrace is configured in (see the |
120 | section below "dynamic ftrace"), the code is dynamically | 117 | section below "dynamic ftrace"), the code is dynamically |
121 | modified (code text rewrite) to disable calling of the | 118 | modified (code text rewrite) to disable calling of the |
122 | function profiler (mcount). This lets tracing be configured | 119 | function profiler (mcount). This lets tracing be configured |
@@ -130,14 +127,11 @@ of ftrace. Here is a list of some of the key files: | |||
130 | be traced. If a function exists in both set_ftrace_filter | 127 | be traced. If a function exists in both set_ftrace_filter |
131 | and set_ftrace_notrace, the function will _not_ be traced. | 128 | and set_ftrace_notrace, the function will _not_ be traced. |
132 | 129 | ||
133 | available_filter_functions : When a function is encountered the first | 130 | available_filter_functions: This lists the functions that ftrace |
134 | time by the dynamic tracer, it is recorded and | 131 | has processed and can trace. These are the function |
135 | later the call is converted into a nop. This file | 132 | names that you can pass to "set_ftrace_filter" or |
136 | lists the functions that have been recorded | 133 | "set_ftrace_notrace". (See the section "dynamic ftrace" |
137 | by the dynamic tracer and these functions can | 134 | below for more details.) |
138 | be used to set the ftrace filter by the above | ||
139 | "set_ftrace_filter" file. (See the section "dynamic ftrace" | ||
140 | below for more details). | ||
141 | 135 | ||
142 | 136 | ||
143 | The Tracers | 137 | The Tracers |
@@ -145,7 +139,7 @@ The Tracers | |||
145 | 139 | ||
146 | Here is the list of current tracers that may be configured. | 140 | Here is the list of current tracers that may be configured. |
147 | 141 | ||
148 | ftrace - function tracer that uses mcount to trace all functions. | 142 | function - function tracer that uses mcount to trace all functions. |
149 | 143 | ||
150 | sched_switch - traces the context switches between tasks. | 144 | sched_switch - traces the context switches between tasks. |
151 | 145 | ||
@@ -166,8 +160,8 @@ Here is the list of current tracers that may be configured. | |||
166 | the highest priority task to get scheduled after | 160 | the highest priority task to get scheduled after |
167 | it has been woken up. | 161 | it has been woken up. |
168 | 162 | ||
169 | none - This is not a tracer. To remove all tracers from tracing | 163 | nop - This is not a tracer. To remove all tracers from tracing |
170 | simply echo "none" into current_tracer. | 164 | simply echo "nop" into current_tracer. |
171 | 165 | ||
172 | 166 | ||
173 | Examples of using the tracer | 167 | Examples of using the tracer |
@@ -182,7 +176,7 @@ Output format: | |||
182 | Here is an example of the output format of the file "trace" | 176 | Here is an example of the output format of the file "trace" |
183 | 177 | ||
184 | -------- | 178 | -------- |
185 | # tracer: ftrace | 179 | # tracer: function |
186 | # | 180 | # |
187 | # TASK-PID CPU# TIMESTAMP FUNCTION | 181 | # TASK-PID CPU# TIMESTAMP FUNCTION |
188 | # | | | | | | 182 | # | | | | | |
@@ -192,7 +186,7 @@ Here is an example of the output format of the file "trace" | |||
192 | -------- | 186 | -------- |
193 | 187 | ||
194 | A header is printed with the tracer name that is represented by the trace. | 188 | A header is printed with the tracer name that is represented by the trace. |
195 | In this case the tracer is "ftrace". Then a header showing the format. Task | 189 | In this case the tracer is "function". Then a header showing the format. Task |
196 | name "bash", the task PID "4251", the CPU that it was running on | 190 | name "bash", the task PID "4251", the CPU that it was running on |
197 | "01", the timestamp in <secs>.<usecs> format, the function name that was | 191 | "01", the timestamp in <secs>.<usecs> format, the function name that was |
198 | traced "path_put" and the parent function that called this function | 192 | traced "path_put" and the parent function that called this function |
@@ -1003,22 +997,20 @@ is the stack for the hard interrupt. This hides the fact that NEED_RESCHED | |||
1003 | has been set. We do not see the 'N' until we switch back to the task's | 997 | has been set. We do not see the 'N' until we switch back to the task's |
1004 | assigned stack. | 998 | assigned stack. |
1005 | 999 | ||
1006 | ftrace | 1000 | function |
1007 | ------ | 1001 | -------- |
1008 | 1002 | ||
1009 | ftrace is not only the name of the tracing infrastructure, but it | 1003 | This tracer is the function tracer. Enabling the function tracer |
1010 | is also a name of one of the tracers. The tracer is the function | 1004 | can be done from the debug file system. Make sure the ftrace_enabled is |
1011 | tracer. Enabling the function tracer can be done from the | 1005 | set; otherwise this tracer is a nop. |
1012 | debug file system. Make sure the ftrace_enabled is set otherwise | ||
1013 | this tracer is a nop. | ||
1014 | 1006 | ||
1015 | # sysctl kernel.ftrace_enabled=1 | 1007 | # sysctl kernel.ftrace_enabled=1 |
1016 | # echo ftrace > /debug/tracing/current_tracer | 1008 | # echo function > /debug/tracing/current_tracer |
1017 | # echo 1 > /debug/tracing/tracing_enabled | 1009 | # echo 1 > /debug/tracing/tracing_enabled |
1018 | # usleep 1 | 1010 | # usleep 1 |
1019 | # echo 0 > /debug/tracing/tracing_enabled | 1011 | # echo 0 > /debug/tracing/tracing_enabled |
1020 | # cat /debug/tracing/trace | 1012 | # cat /debug/tracing/trace |
1021 | # tracer: ftrace | 1013 | # tracer: function |
1022 | # | 1014 | # |
1023 | # TASK-PID CPU# TIMESTAMP FUNCTION | 1015 | # TASK-PID CPU# TIMESTAMP FUNCTION |
1024 | # | | | | | | 1016 | # | | | | | |
@@ -1040,10 +1032,10 @@ this tracer is a nop. | |||
1040 | [...] | 1032 | [...] |
1041 | 1033 | ||
1042 | 1034 | ||
1043 | Note: ftrace uses ring buffers to store the above entries. The newest data | 1035 | Note: function tracer uses ring buffers to store the above entries. |
1044 | may overwrite the oldest data. Sometimes using echo to stop the trace | 1036 | The newest data may overwrite the oldest data. Sometimes using echo to |
1045 | is not sufficient because the tracing could have overwritten the data | 1037 | stop the trace is not sufficient because the tracing could have overwritten |
1046 | that you wanted to record. For this reason, it is sometimes better to | 1038 | the data that you wanted to record. For this reason, it is sometimes better to |
1047 | disable tracing directly from a program. This allows you to stop the | 1039 | disable tracing directly from a program. This allows you to stop the |
1048 | tracing at the point that you hit the part that you are interested in. | 1040 | tracing at the point that you hit the part that you are interested in. |
1049 | To disable the tracing directly from a C program, something like following | 1041 | To disable the tracing directly from a C program, something like following |
@@ -1077,18 +1069,31 @@ every kernel function, produced by the -pg switch in gcc), starts | |||
1077 | of pointing to a simple return. (Enabling FTRACE will include the | 1069 | of pointing to a simple return. (Enabling FTRACE will include the |
1078 | -pg switch in the compiling of the kernel.) | 1070 | -pg switch in the compiling of the kernel.) |
1079 | 1071 | ||
1080 | When dynamic ftrace is initialized, it calls kstop_machine to make | 1072 | At compile time every C file object is run through the |
1081 | the machine act like a uniprocessor so that it can freely modify code | 1073 | recordmcount.pl script (located in the scripts directory). This |
1082 | without worrying about other processors executing that same code. At | 1074 | script will process the C object using objdump to find all the |
1083 | initialization, the mcount calls are changed to call a "record_ip" | 1075 | locations in the .text section that call mcount. (Note, only |
1084 | function. After this, the first time a kernel function is called, | 1076 | the .text section is processed, since processing other sections |
1085 | it has the calling address saved in a hash table. | 1077 | like .init.text may cause races due to those sections being freed). |
1086 | 1078 | ||
1087 | Later on the ftraced kernel thread is awoken and will again call | 1079 | A new section called "__mcount_loc" is created that holds references |
1088 | kstop_machine if new functions have been recorded. The ftraced thread | 1080 | to all the mcount call sites in the .text section. This section is |
1089 | will change all calls to mcount to "nop". Just calling mcount | 1081 | compiled back into the original object. The final linker will add |
1090 | and having mcount return has shown a 10% overhead. By converting | 1082 | all these references into a single table. |
1091 | it to a nop, there is no measurable overhead to the system. | 1083 | |
1084 | On boot up, before SMP is initialized, the dynamic ftrace code | ||
1085 | scans this table and updates all the locations into nops. It also | ||
1086 | records the locations, which are added to the available_filter_functions | ||
1087 | list. Modules are processed as they are loaded and before they are | ||
1088 | executed. When a module is unloaded, it also removes its functions from | ||
1089 | the ftrace function list. This is automatic in the module unload | ||
1090 | code, and the module author does not need to worry about it. | ||
1091 | |||
1092 | When tracing is enabled, kstop_machine is called to prevent races | ||
1093 | with the CPUS executing code being modified (which can cause the | ||
1094 | CPU to do undesireable things), and the nops are patched back | ||
1095 | to calls. But this time, they do not call mcount (which is just | ||
1096 | a function stub). They now call into the ftrace infrastructure. | ||
1092 | 1097 | ||
1093 | One special side-effect to the recording of the functions being | 1098 | One special side-effect to the recording of the functions being |
1094 | traced is that we can now selectively choose which functions we | 1099 | traced is that we can now selectively choose which functions we |
@@ -1251,36 +1256,6 @@ Produces: | |||
1251 | 1256 | ||
1252 | We can see that there's no more lock or preempt tracing. | 1257 | We can see that there's no more lock or preempt tracing. |
1253 | 1258 | ||
1254 | ftraced | ||
1255 | ------- | ||
1256 | |||
1257 | As mentioned above, when dynamic ftrace is configured in, a kernel | ||
1258 | thread wakes up once a second and checks to see if there are mcount | ||
1259 | calls that need to be converted into nops. If there are not any, then | ||
1260 | it simply goes back to sleep. But if there are some, it will call | ||
1261 | kstop_machine to convert the calls to nops. | ||
1262 | |||
1263 | There may be a case in which you do not want this added latency. | ||
1264 | Perhaps you are doing some audio recording and this activity might | ||
1265 | cause skips in the playback. There is an interface to disable | ||
1266 | and enable the "ftraced" kernel thread. | ||
1267 | |||
1268 | # echo 0 > /debug/tracing/ftraced_enabled | ||
1269 | |||
1270 | This will disable the calling of kstop_machine to update the | ||
1271 | mcount calls to nops. Remember that there is a large overhead | ||
1272 | to calling mcount. Without this kernel thread, that overhead will | ||
1273 | exist. | ||
1274 | |||
1275 | If there are recorded calls to mcount, any write to the ftraced_enabled | ||
1276 | file will cause the kstop_machine to run. This means that a | ||
1277 | user can manually perform the updates when they want to by simply | ||
1278 | echoing a '0' into the ftraced_enabled file. | ||
1279 | |||
1280 | The updates are also done at the beginning of enabling a tracer | ||
1281 | that uses ftrace function recording. | ||
1282 | |||
1283 | |||
1284 | trace_pipe | 1259 | trace_pipe |
1285 | ---------- | 1260 | ---------- |
1286 | 1261 | ||
@@ -1289,14 +1264,14 @@ on the tracing is different. Every read from trace_pipe is consumed. | |||
1289 | This means that subsequent reads will be different. The trace | 1264 | This means that subsequent reads will be different. The trace |
1290 | is live. | 1265 | is live. |
1291 | 1266 | ||
1292 | # echo ftrace > /debug/tracing/current_tracer | 1267 | # echo function > /debug/tracing/current_tracer |
1293 | # cat /debug/tracing/trace_pipe > /tmp/trace.out & | 1268 | # cat /debug/tracing/trace_pipe > /tmp/trace.out & |
1294 | [1] 4153 | 1269 | [1] 4153 |
1295 | # echo 1 > /debug/tracing/tracing_enabled | 1270 | # echo 1 > /debug/tracing/tracing_enabled |
1296 | # usleep 1 | 1271 | # usleep 1 |
1297 | # echo 0 > /debug/tracing/tracing_enabled | 1272 | # echo 0 > /debug/tracing/tracing_enabled |
1298 | # cat /debug/tracing/trace | 1273 | # cat /debug/tracing/trace |
1299 | # tracer: ftrace | 1274 | # tracer: function |
1300 | # | 1275 | # |
1301 | # TASK-PID CPU# TIMESTAMP FUNCTION | 1276 | # TASK-PID CPU# TIMESTAMP FUNCTION |
1302 | # | | | | | | 1277 | # | | | | | |
@@ -1317,7 +1292,7 @@ is live. | |||
1317 | 1292 | ||
1318 | Note, reading the trace_pipe file will block until more input is added. | 1293 | Note, reading the trace_pipe file will block until more input is added. |
1319 | By changing the tracer, trace_pipe will issue an EOF. We needed | 1294 | By changing the tracer, trace_pipe will issue an EOF. We needed |
1320 | to set the ftrace tracer _before_ cating the trace_pipe file. | 1295 | to set the function tracer _before_ we "cat" the trace_pipe file. |
1321 | 1296 | ||
1322 | 1297 | ||
1323 | trace entries | 1298 | trace entries |
@@ -1334,10 +1309,10 @@ number of entries. | |||
1334 | 65620 | 1309 | 65620 |
1335 | 1310 | ||
1336 | Note, to modify this, you must have tracing completely disabled. To do that, | 1311 | Note, to modify this, you must have tracing completely disabled. To do that, |
1337 | echo "none" into the current_tracer. If the current_tracer is not set | 1312 | echo "nop" into the current_tracer. If the current_tracer is not set |
1338 | to "none", an EINVAL error will be returned. | 1313 | to "nop", an EINVAL error will be returned. |
1339 | 1314 | ||
1340 | # echo none > /debug/tracing/current_tracer | 1315 | # echo nop > /debug/tracing/current_tracer |
1341 | # echo 100000 > /debug/tracing/trace_entries | 1316 | # echo 100000 > /debug/tracing/trace_entries |
1342 | # cat /debug/tracing/trace_entries | 1317 | # cat /debug/tracing/trace_entries |
1343 | 100045 | 1318 | 100045 |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index abfa8103d046..697eda36b86a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -1755,7 +1755,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) | |||
1755 | return TRACE_TYPE_HANDLED; | 1755 | return TRACE_TYPE_HANDLED; |
1756 | 1756 | ||
1757 | SEQ_PUT_FIELD_RET(s, entry->pid); | 1757 | SEQ_PUT_FIELD_RET(s, entry->pid); |
1758 | SEQ_PUT_FIELD_RET(s, iter->cpu); | 1758 | SEQ_PUT_FIELD_RET(s, entry->cpu); |
1759 | SEQ_PUT_FIELD_RET(s, iter->ts); | 1759 | SEQ_PUT_FIELD_RET(s, iter->ts); |
1760 | 1760 | ||
1761 | switch (entry->type) { | 1761 | switch (entry->type) { |