diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/DocBook/tracepoint.tmpl | 5 | ||||
-rw-r--r-- | Documentation/RCU/trace.txt | 254 | ||||
-rw-r--r-- | Documentation/RCU/whatisRCU.txt | 2 | ||||
-rw-r--r-- | Documentation/dontdiff | 3 | ||||
-rw-r--r-- | Documentation/fb/framebuffer.txt | 6 | ||||
-rw-r--r-- | Documentation/feature-removal-schedule.txt | 15 | ||||
-rw-r--r-- | Documentation/filesystems/caching/fscache.txt | 110 | ||||
-rw-r--r-- | Documentation/filesystems/caching/netfs-api.txt | 21 | ||||
-rw-r--r-- | Documentation/filesystems/ocfs2.txt | 6 | ||||
-rw-r--r-- | Documentation/filesystems/proc.txt | 3 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 39 | ||||
-rw-r--r-- | Documentation/pcmcia/driver-changes.txt | 12 | ||||
-rw-r--r-- | Documentation/slow-work.txt | 160 | ||||
-rw-r--r-- | Documentation/sysctl/ctl_unnumbered.txt | 22 | ||||
-rw-r--r-- | Documentation/trace/ftrace-design.txt | 13 | ||||
-rw-r--r-- | Documentation/trace/kprobetrace.txt | 149 |
16 files changed, 563 insertions, 257 deletions
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl index b0756d0fd579..8bca1d5cec09 100644 --- a/Documentation/DocBook/tracepoint.tmpl +++ b/Documentation/DocBook/tracepoint.tmpl | |||
@@ -86,4 +86,9 @@ | |||
86 | !Iinclude/trace/events/irq.h | 86 | !Iinclude/trace/events/irq.h |
87 | </chapter> | 87 | </chapter> |
88 | 88 | ||
89 | <chapter id="signal"> | ||
90 | <title>SIGNAL</title> | ||
91 | !Iinclude/trace/events/signal.h | ||
92 | </chapter> | ||
93 | |||
89 | </book> | 94 | </book> |
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 187bbf10c923..8608fd85e921 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt | |||
@@ -1,185 +1,10 @@ | |||
1 | CONFIG_RCU_TRACE debugfs Files and Formats | 1 | CONFIG_RCU_TRACE debugfs Files and Formats |
2 | 2 | ||
3 | 3 | ||
4 | The rcupreempt and rcutree implementations of RCU provide debugfs trace | 4 | The rcutree implementation of RCU provides debugfs trace output that |
5 | output that summarizes counters and state. This information is useful for | 5 | summarizes counters and state. This information is useful for debugging |
6 | debugging RCU itself, and can sometimes also help to debug abuses of RCU. | 6 | RCU itself, and can sometimes also help to debug abuses of RCU. |
7 | Note that the rcuclassic implementation of RCU does not provide debugfs | 7 | The following sections describe the debugfs files and formats. |
8 | trace output. | ||
9 | |||
10 | The following sections describe the debugfs files and formats for | ||
11 | preemptable RCU (rcupreempt) and hierarchical RCU (rcutree). | ||
12 | |||
13 | |||
14 | Preemptable RCU debugfs Files and Formats | ||
15 | |||
16 | This implementation of RCU provides three debugfs files under the | ||
17 | top-level directory RCU: rcu/rcuctrs (which displays the per-CPU | ||
18 | counters used by preemptable RCU) rcu/rcugp (which displays grace-period | ||
19 | counters), and rcu/rcustats (which internal counters for debugging RCU). | ||
20 | |||
21 | The output of "cat rcu/rcuctrs" looks as follows: | ||
22 | |||
23 | CPU last cur F M | ||
24 | 0 5 -5 0 0 | ||
25 | 1 -1 0 0 0 | ||
26 | 2 0 1 0 0 | ||
27 | 3 0 1 0 0 | ||
28 | 4 0 1 0 0 | ||
29 | 5 0 1 0 0 | ||
30 | 6 0 2 0 0 | ||
31 | 7 0 -1 0 0 | ||
32 | 8 0 1 0 0 | ||
33 | ggp = 26226, state = waitzero | ||
34 | |||
35 | The per-CPU fields are as follows: | ||
36 | |||
37 | o "CPU" gives the CPU number. Offline CPUs are not displayed. | ||
38 | |||
39 | o "last" gives the value of the counter that is being decremented | ||
40 | for the current grace period phase. In the example above, | ||
41 | the counters sum to 4, indicating that there are still four | ||
42 | RCU read-side critical sections still running that started | ||
43 | before the last counter flip. | ||
44 | |||
45 | o "cur" gives the value of the counter that is currently being | ||
46 | both incremented (by rcu_read_lock()) and decremented (by | ||
47 | rcu_read_unlock()). In the example above, the counters sum to | ||
48 | 1, indicating that there is only one RCU read-side critical section | ||
49 | still running that started after the last counter flip. | ||
50 | |||
51 | o "F" indicates whether RCU is waiting for this CPU to acknowledge | ||
52 | a counter flip. In the above example, RCU is not waiting on any, | ||
53 | which is consistent with the state being "waitzero" rather than | ||
54 | "waitack". | ||
55 | |||
56 | o "M" indicates whether RCU is waiting for this CPU to execute a | ||
57 | memory barrier. In the above example, RCU is not waiting on any, | ||
58 | which is consistent with the state being "waitzero" rather than | ||
59 | "waitmb". | ||
60 | |||
61 | o "ggp" is the global grace-period counter. | ||
62 | |||
63 | o "state" is the RCU state, which can be one of the following: | ||
64 | |||
65 | o "idle": there is no grace period in progress. | ||
66 | |||
67 | o "waitack": RCU just incremented the global grace-period | ||
68 | counter, which has the effect of reversing the roles of | ||
69 | the "last" and "cur" counters above, and is waiting for | ||
70 | all the CPUs to acknowledge the flip. Once the flip has | ||
71 | been acknowledged, CPUs will no longer be incrementing | ||
72 | what are now the "last" counters, so that their sum will | ||
73 | decrease monotonically down to zero. | ||
74 | |||
75 | o "waitzero": RCU is waiting for the sum of the "last" counters | ||
76 | to decrease to zero. | ||
77 | |||
78 | o "waitmb": RCU is waiting for each CPU to execute a memory | ||
79 | barrier, which ensures that instructions from a given CPU's | ||
80 | last RCU read-side critical section cannot be reordered | ||
81 | with instructions following the memory-barrier instruction. | ||
82 | |||
83 | The output of "cat rcu/rcugp" looks as follows: | ||
84 | |||
85 | oldggp=48870 newggp=48873 | ||
86 | |||
87 | Note that reading from this file provokes a synchronize_rcu(). The | ||
88 | "oldggp" value is that of "ggp" from rcu/rcuctrs above, taken before | ||
89 | executing the synchronize_rcu(), and the "newggp" value is also the | ||
90 | "ggp" value, but taken after the synchronize_rcu() command returns. | ||
91 | |||
92 | |||
93 | The output of "cat rcu/rcugp" looks as follows: | ||
94 | |||
95 | na=1337955 nl=40 wa=1337915 wl=44 da=1337871 dl=0 dr=1337871 di=1337871 | ||
96 | 1=50989 e1=6138 i1=49722 ie1=82 g1=49640 a1=315203 ae1=265563 a2=49640 | ||
97 | z1=1401244 ze1=1351605 z2=49639 m1=5661253 me1=5611614 m2=49639 | ||
98 | |||
99 | These are counters tracking internal preemptable-RCU events, however, | ||
100 | some of them may be useful for debugging algorithms using RCU. In | ||
101 | particular, the "nl", "wl", and "dl" values track the number of RCU | ||
102 | callbacks in various states. The fields are as follows: | ||
103 | |||
104 | o "na" is the total number of RCU callbacks that have been enqueued | ||
105 | since boot. | ||
106 | |||
107 | o "nl" is the number of RCU callbacks waiting for the previous | ||
108 | grace period to end so that they can start waiting on the next | ||
109 | grace period. | ||
110 | |||
111 | o "wa" is the total number of RCU callbacks that have started waiting | ||
112 | for a grace period since boot. "na" should be roughly equal to | ||
113 | "nl" plus "wa". | ||
114 | |||
115 | o "wl" is the number of RCU callbacks currently waiting for their | ||
116 | grace period to end. | ||
117 | |||
118 | o "da" is the total number of RCU callbacks whose grace periods | ||
119 | have completed since boot. "wa" should be roughly equal to | ||
120 | "wl" plus "da". | ||
121 | |||
122 | o "dr" is the total number of RCU callbacks that have been removed | ||
123 | from the list of callbacks ready to invoke. "dr" should be roughly | ||
124 | equal to "da". | ||
125 | |||
126 | o "di" is the total number of RCU callbacks that have been invoked | ||
127 | since boot. "di" should be roughly equal to "da", though some | ||
128 | early versions of preemptable RCU had a bug so that only the | ||
129 | last CPU's count of invocations was displayed, rather than the | ||
130 | sum of all CPU's counts. | ||
131 | |||
132 | o "1" is the number of calls to rcu_try_flip(). This should be | ||
133 | roughly equal to the sum of "e1", "i1", "a1", "z1", and "m1" | ||
134 | described below. In other words, the number of times that | ||
135 | the state machine is visited should be equal to the sum of the | ||
136 | number of times that each state is visited plus the number of | ||
137 | times that the state-machine lock acquisition failed. | ||
138 | |||
139 | o "e1" is the number of times that rcu_try_flip() was unable to | ||
140 | acquire the fliplock. | ||
141 | |||
142 | o "i1" is the number of calls to rcu_try_flip_idle(). | ||
143 | |||
144 | o "ie1" is the number of times rcu_try_flip_idle() exited early | ||
145 | due to the calling CPU having no work for RCU. | ||
146 | |||
147 | o "g1" is the number of times that rcu_try_flip_idle() decided | ||
148 | to start a new grace period. "i1" should be roughly equal to | ||
149 | "ie1" plus "g1". | ||
150 | |||
151 | o "a1" is the number of calls to rcu_try_flip_waitack(). | ||
152 | |||
153 | o "ae1" is the number of times that rcu_try_flip_waitack() found | ||
154 | that at least one CPU had not yet acknowledge the new grace period | ||
155 | (AKA "counter flip"). | ||
156 | |||
157 | o "a2" is the number of time rcu_try_flip_waitack() found that | ||
158 | all CPUs had acknowledged. "a1" should be roughly equal to | ||
159 | "ae1" plus "a2". (This particular output was collected on | ||
160 | a 128-CPU machine, hence the smaller-than-usual fraction of | ||
161 | calls to rcu_try_flip_waitack() finding all CPUs having already | ||
162 | acknowledged.) | ||
163 | |||
164 | o "z1" is the number of calls to rcu_try_flip_waitzero(). | ||
165 | |||
166 | o "ze1" is the number of times that rcu_try_flip_waitzero() found | ||
167 | that not all of the old RCU read-side critical sections had | ||
168 | completed. | ||
169 | |||
170 | o "z2" is the number of times that rcu_try_flip_waitzero() finds | ||
171 | the sum of the counters equal to zero, in other words, that | ||
172 | all of the old RCU read-side critical sections had completed. | ||
173 | The value of "z1" should be roughly equal to "ze1" plus | ||
174 | "z2". | ||
175 | |||
176 | o "m1" is the number of calls to rcu_try_flip_waitmb(). | ||
177 | |||
178 | o "me1" is the number of times that rcu_try_flip_waitmb() finds | ||
179 | that at least one CPU has not yet executed a memory barrier. | ||
180 | |||
181 | o "m2" is the number of times that rcu_try_flip_waitmb() finds that | ||
182 | all CPUs have executed a memory barrier. | ||
183 | 8 | ||
184 | 9 | ||
185 | Hierarchical RCU debugfs Files and Formats | 10 | Hierarchical RCU debugfs Files and Formats |
@@ -210,9 +35,10 @@ rcu_bh: | |||
210 | 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 | 35 | 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 |
211 | 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 | 36 | 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 |
212 | 37 | ||
213 | The first section lists the rcu_data structures for rcu, the second for | 38 | The first section lists the rcu_data structures for rcu_sched, the second |
214 | rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. | 39 | for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an |
215 | The fields are as follows: | 40 | additional section for rcu_preempt. Each section has one line per CPU, |
41 | or eight for this 8-CPU system. The fields are as follows: | ||
216 | 42 | ||
217 | o The number at the beginning of each line is the CPU number. | 43 | o The number at the beginning of each line is the CPU number. |
218 | CPUs numbers followed by an exclamation mark are offline, | 44 | CPUs numbers followed by an exclamation mark are offline, |
@@ -223,9 +49,9 @@ o The number at the beginning of each line is the CPU number. | |||
223 | 49 | ||
224 | o "c" is the count of grace periods that this CPU believes have | 50 | o "c" is the count of grace periods that this CPU believes have |
225 | completed. CPUs in dynticks idle mode may lag quite a ways | 51 | completed. CPUs in dynticks idle mode may lag quite a ways |
226 | behind, for example, CPU 4 under "rcu" above, which has slept | 52 | behind, for example, CPU 4 under "rcu_sched" above, which has |
227 | through the past 25 RCU grace periods. It is not unusual to | 53 | slept through the past 25 RCU grace periods. It is not unusual |
228 | see CPUs lagging by thousands of grace periods. | 54 | to see CPUs lagging by thousands of grace periods. |
229 | 55 | ||
230 | o "g" is the count of grace periods that this CPU believes have | 56 | o "g" is the count of grace periods that this CPU believes have |
231 | started. Again, CPUs in dynticks idle mode may lag behind. | 57 | started. Again, CPUs in dynticks idle mode may lag behind. |
@@ -308,8 +134,10 @@ The output of "cat rcu/rcugp" looks as follows: | |||
308 | rcu_sched: completed=33062 gpnum=33063 | 134 | rcu_sched: completed=33062 gpnum=33063 |
309 | rcu_bh: completed=464 gpnum=464 | 135 | rcu_bh: completed=464 gpnum=464 |
310 | 136 | ||
311 | Again, this output is for both "rcu" and "rcu_bh". The fields are | 137 | Again, this output is for both "rcu_sched" and "rcu_bh". Note that |
312 | taken from the rcu_state structure, and are as follows: | 138 | kernels built with CONFIG_TREE_PREEMPT_RCU will have an additional |
139 | "rcu_preempt" line. The fields are taken from the rcu_state structure, | ||
140 | and are as follows: | ||
313 | 141 | ||
314 | o "completed" is the number of grace periods that have completed. | 142 | o "completed" is the number of grace periods that have completed. |
315 | It is comparable to the "c" field from rcu/rcudata in that a | 143 | It is comparable to the "c" field from rcu/rcudata in that a |
@@ -324,23 +152,24 @@ o "gpnum" is the number of grace periods that have started. It is | |||
324 | If these two fields are equal (as they are for "rcu_bh" above), | 152 | If these two fields are equal (as they are for "rcu_bh" above), |
325 | then there is no grace period in progress, in other words, RCU | 153 | then there is no grace period in progress, in other words, RCU |
326 | is idle. On the other hand, if the two fields differ (as they | 154 | is idle. On the other hand, if the two fields differ (as they |
327 | do for "rcu" above), then an RCU grace period is in progress. | 155 | do for "rcu_sched" above), then an RCU grace period is in progress. |
328 | 156 | ||
329 | 157 | ||
330 | The output of "cat rcu/rcuhier" looks as follows, with very long lines: | 158 | The output of "cat rcu/rcuhier" looks as follows, with very long lines: |
331 | 159 | ||
332 | c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 | 160 | c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0 |
333 | 1/1 0:127 ^0 | 161 | 1/1 .>. 0:127 ^0 |
334 | 3/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 | 162 | 3/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 |
335 | 3/3f 0:5 ^0 2/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 | 163 | 3/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 |
336 | rcu_bh: | 164 | rcu_bh: |
337 | c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 | 165 | c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0 |
338 | 0/1 0:127 ^0 | 166 | 0/1 .>. 0:127 ^0 |
339 | 0/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 | 167 | 0/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 |
340 | 0/3f 0:5 ^0 0/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 | 168 | 0/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 |
341 | 169 | ||
342 | This is once again split into "rcu" and "rcu_bh" portions. The fields are | 170 | This is once again split into "rcu_sched" and "rcu_bh" portions, |
343 | as follows: | 171 | and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional |
172 | "rcu_preempt" section. The fields are as follows: | ||
344 | 173 | ||
345 | o "c" is exactly the same as "completed" under rcu/rcugp. | 174 | o "c" is exactly the same as "completed" under rcu/rcugp. |
346 | 175 | ||
@@ -372,6 +201,11 @@ o "fqlh" is the number of calls to force_quiescent_state() that | |||
372 | exited immediately (without even being counted in nfqs above) | 201 | exited immediately (without even being counted in nfqs above) |
373 | due to contention on ->fqslock. | 202 | due to contention on ->fqslock. |
374 | 203 | ||
204 | o "oqlen" is the number of callbacks on the "orphan" callback | ||
205 | list. RCU callbacks are placed on this list by CPUs going | ||
206 | offline, and are "adopted" either by the CPU helping the outgoing | ||
207 | CPU or by the next rcu_barrier*() call, whichever comes first. | ||
208 | |||
375 | o Each element of the form "1/1 0:127 ^0" represents one struct | 209 | o Each element of the form "1/1 0:127 ^0" represents one struct |
376 | rcu_node. Each line represents one level of the hierarchy, from | 210 | rcu_node. Each line represents one level of the hierarchy, from |
377 | root to leaves. It is best to think of the rcu_data structures | 211 | root to leaves. It is best to think of the rcu_data structures |
@@ -379,7 +213,7 @@ o Each element of the form "1/1 0:127 ^0" represents one struct | |||
379 | might be either one, two, or three levels of rcu_node structures, | 213 | might be either one, two, or three levels of rcu_node structures, |
380 | depending on the relationship between CONFIG_RCU_FANOUT and | 214 | depending on the relationship between CONFIG_RCU_FANOUT and |
381 | CONFIG_NR_CPUS. | 215 | CONFIG_NR_CPUS. |
382 | 216 | ||
383 | o The numbers separated by the "/" are the qsmask followed | 217 | o The numbers separated by the "/" are the qsmask followed |
384 | by the qsmaskinit. The qsmask will have one bit | 218 | by the qsmaskinit. The qsmask will have one bit |
385 | set for each entity in the next lower level that | 219 | set for each entity in the next lower level that |
@@ -389,10 +223,19 @@ o Each element of the form "1/1 0:127 ^0" represents one struct | |||
389 | The value of qsmaskinit is assigned to that of qsmask | 223 | The value of qsmaskinit is assigned to that of qsmask |
390 | at the beginning of each grace period. | 224 | at the beginning of each grace period. |
391 | 225 | ||
392 | For example, for "rcu", the qsmask of the first entry | 226 | For example, for "rcu_sched", the qsmask of the first |
393 | of the lowest level is 0x14, meaning that we are still | 227 | entry of the lowest level is 0x14, meaning that we |
394 | waiting for CPUs 2 and 4 to check in for the current | 228 | are still waiting for CPUs 2 and 4 to check in for the |
395 | grace period. | 229 | current grace period. |
230 | |||
231 | o The characters separated by the ">" indicate the state | ||
232 | of the blocked-tasks lists. A "T" preceding the ">" | ||
233 | indicates that at least one task blocked in an RCU | ||
234 | read-side critical section blocks the current grace | ||
235 | period, while a "." preceding the ">" indicates otherwise. | ||
236 | The character following the ">" indicates similarly for | ||
237 | the next grace period. A "T" should appear in this | ||
238 | field only for rcu-preempt. | ||
396 | 239 | ||
397 | o The numbers separated by the ":" are the range of CPUs | 240 | o The numbers separated by the ":" are the range of CPUs |
398 | served by this struct rcu_node. This can be helpful | 241 | served by this struct rcu_node. This can be helpful |
@@ -431,8 +274,9 @@ rcu_bh: | |||
431 | 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 | 274 | 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 |
432 | 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 | 275 | 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 |
433 | 276 | ||
434 | As always, this is once again split into "rcu" and "rcu_bh" portions. | 277 | As always, this is once again split into "rcu_sched" and "rcu_bh" |
435 | The fields are as follows: | 278 | portions, with CONFIG_TREE_PREEMPT_RCU kernels having an additional |
279 | "rcu_preempt" section. The fields are as follows: | ||
436 | 280 | ||
437 | o "np" is the number of times that __rcu_pending() has been invoked | 281 | o "np" is the number of times that __rcu_pending() has been invoked |
438 | for the corresponding flavor of RCU. | 282 | for the corresponding flavor of RCU. |
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index e41a7fecf0d3..d542ca243b80 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt | |||
@@ -830,7 +830,7 @@ sched: Critical sections Grace period Barrier | |||
830 | SRCU: Critical sections Grace period Barrier | 830 | SRCU: Critical sections Grace period Barrier |
831 | 831 | ||
832 | srcu_read_lock synchronize_srcu N/A | 832 | srcu_read_lock synchronize_srcu N/A |
833 | srcu_read_unlock | 833 | srcu_read_unlock synchronize_srcu_expedited |
834 | 834 | ||
835 | SRCU: Initialization/cleanup | 835 | SRCU: Initialization/cleanup |
836 | init_srcu_struct | 836 | init_srcu_struct |
diff --git a/Documentation/dontdiff b/Documentation/dontdiff index e1efc400bed6..e151b2a36267 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff | |||
@@ -65,6 +65,7 @@ aicdb.h* | |||
65 | asm-offsets.h | 65 | asm-offsets.h |
66 | asm_offsets.h | 66 | asm_offsets.h |
67 | autoconf.h* | 67 | autoconf.h* |
68 | av_permissions.h | ||
68 | bbootsect | 69 | bbootsect |
69 | bin2c | 70 | bin2c |
70 | binkernel.spec | 71 | binkernel.spec |
@@ -95,12 +96,14 @@ docproc | |||
95 | elf2ecoff | 96 | elf2ecoff |
96 | elfconfig.h* | 97 | elfconfig.h* |
97 | fixdep | 98 | fixdep |
99 | flask.h | ||
98 | fore200e_mkfirm | 100 | fore200e_mkfirm |
99 | fore200e_pca_fw.c* | 101 | fore200e_pca_fw.c* |
100 | gconf | 102 | gconf |
101 | gen-devlist | 103 | gen-devlist |
102 | gen_crc32table | 104 | gen_crc32table |
103 | gen_init_cpio | 105 | gen_init_cpio |
106 | genheaders | ||
104 | genksyms | 107 | genksyms |
105 | *_gray256.c | 108 | *_gray256.c |
106 | ihex2fw | 109 | ihex2fw |
diff --git a/Documentation/fb/framebuffer.txt b/Documentation/fb/framebuffer.txt index b3e3a0356839..fe79e3c8847d 100644 --- a/Documentation/fb/framebuffer.txt +++ b/Documentation/fb/framebuffer.txt | |||
@@ -312,10 +312,8 @@ and to the following documentation: | |||
312 | 8. Mailing list | 312 | 8. Mailing list |
313 | --------------- | 313 | --------------- |
314 | 314 | ||
315 | There are several frame buffer device related mailing lists at SourceForge: | 315 | There is a frame buffer device related mailing list at kernel.org: |
316 | - linux-fbdev-announce@lists.sourceforge.net, for announcements, | 316 | linux-fbdev@vger.kernel.org. |
317 | - linux-fbdev-user@lists.sourceforge.net, for generic user support, | ||
318 | - linux-fbdev-devel@lists.sourceforge.net, for project developers. | ||
319 | 317 | ||
320 | Point your web browser to http://sourceforge.net/projects/linux-fbdev/ for | 318 | Point your web browser to http://sourceforge.net/projects/linux-fbdev/ for |
321 | subscription information and archive browsing. | 319 | subscription information and archive browsing. |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index bc693fffabe0..f613df8ec7bf 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -6,6 +6,21 @@ be removed from this file. | |||
6 | 6 | ||
7 | --------------------------- | 7 | --------------------------- |
8 | 8 | ||
9 | What: USER_SCHED | ||
10 | When: 2.6.34 | ||
11 | |||
12 | Why: USER_SCHED was implemented as a proof of concept for group scheduling. | ||
13 | The effect of USER_SCHED can already be achieved from userspace with | ||
14 | the help of libcgroup. The removal of USER_SCHED will also simplify | ||
15 | the scheduler code with the removal of one major ifdef. There are also | ||
16 | issues USER_SCHED has with USER_NS. A decision was taken not to fix | ||
17 | those and instead remove USER_SCHED. Also new group scheduling | ||
18 | features will not be implemented for USER_SCHED. | ||
19 | |||
20 | Who: Dhaval Giani <dhaval@linux.vnet.ibm.com> | ||
21 | |||
22 | --------------------------- | ||
23 | |||
9 | What: PRISM54 | 24 | What: PRISM54 |
10 | When: 2.6.34 | 25 | When: 2.6.34 |
11 | 26 | ||
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index 9e94b9491d89..a91e2e2095b0 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt | |||
@@ -235,6 +235,7 @@ proc files. | |||
235 | neg=N Number of negative lookups made | 235 | neg=N Number of negative lookups made |
236 | pos=N Number of positive lookups made | 236 | pos=N Number of positive lookups made |
237 | crt=N Number of objects created by lookup | 237 | crt=N Number of objects created by lookup |
238 | tmo=N Number of lookups timed out and requeued | ||
238 | Updates n=N Number of update cookie requests seen | 239 | Updates n=N Number of update cookie requests seen |
239 | nul=N Number of upd reqs given a NULL parent | 240 | nul=N Number of upd reqs given a NULL parent |
240 | run=N Number of upd reqs granted CPU time | 241 | run=N Number of upd reqs granted CPU time |
@@ -250,8 +251,10 @@ proc files. | |||
250 | ok=N Number of successful alloc reqs | 251 | ok=N Number of successful alloc reqs |
251 | wt=N Number of alloc reqs that waited on lookup completion | 252 | wt=N Number of alloc reqs that waited on lookup completion |
252 | nbf=N Number of alloc reqs rejected -ENOBUFS | 253 | nbf=N Number of alloc reqs rejected -ENOBUFS |
254 | int=N Number of alloc reqs aborted -ERESTARTSYS | ||
253 | ops=N Number of alloc reqs submitted | 255 | ops=N Number of alloc reqs submitted |
254 | owt=N Number of alloc reqs waited for CPU time | 256 | owt=N Number of alloc reqs waited for CPU time |
257 | abt=N Number of alloc reqs aborted due to object death | ||
255 | Retrvls n=N Number of retrieval (read) requests seen | 258 | Retrvls n=N Number of retrieval (read) requests seen |
256 | ok=N Number of successful retr reqs | 259 | ok=N Number of successful retr reqs |
257 | wt=N Number of retr reqs that waited on lookup completion | 260 | wt=N Number of retr reqs that waited on lookup completion |
@@ -261,6 +264,7 @@ proc files. | |||
261 | oom=N Number of retr reqs failed -ENOMEM | 264 | oom=N Number of retr reqs failed -ENOMEM |
262 | ops=N Number of retr reqs submitted | 265 | ops=N Number of retr reqs submitted |
263 | owt=N Number of retr reqs waited for CPU time | 266 | owt=N Number of retr reqs waited for CPU time |
267 | abt=N Number of retr reqs aborted due to object death | ||
264 | Stores n=N Number of storage (write) requests seen | 268 | Stores n=N Number of storage (write) requests seen |
265 | ok=N Number of successful store reqs | 269 | ok=N Number of successful store reqs |
266 | agn=N Number of store reqs on a page already pending storage | 270 | agn=N Number of store reqs on a page already pending storage |
@@ -268,12 +272,37 @@ proc files. | |||
268 | oom=N Number of store reqs failed -ENOMEM | 272 | oom=N Number of store reqs failed -ENOMEM |
269 | ops=N Number of store reqs submitted | 273 | ops=N Number of store reqs submitted |
270 | run=N Number of store reqs granted CPU time | 274 | run=N Number of store reqs granted CPU time |
275 | pgs=N Number of pages given store req processing time | ||
276 | rxd=N Number of store reqs deleted from tracking tree | ||
277 | olm=N Number of store reqs over store limit | ||
278 | VmScan nos=N Number of release reqs against pages with no pending store | ||
279 | gon=N Number of release reqs against pages stored by time lock granted | ||
280 | bsy=N Number of release reqs ignored due to in-progress store | ||
281 | can=N Number of page stores cancelled due to release req | ||
271 | Ops pend=N Number of times async ops added to pending queues | 282 | Ops pend=N Number of times async ops added to pending queues |
272 | run=N Number of times async ops given CPU time | 283 | run=N Number of times async ops given CPU time |
273 | enq=N Number of times async ops queued for processing | 284 | enq=N Number of times async ops queued for processing |
285 | can=N Number of async ops cancelled | ||
286 | rej=N Number of async ops rejected due to object lookup/create failure | ||
274 | dfr=N Number of async ops queued for deferred release | 287 | dfr=N Number of async ops queued for deferred release |
275 | rel=N Number of async ops released | 288 | rel=N Number of async ops released |
276 | gc=N Number of deferred-release async ops garbage collected | 289 | gc=N Number of deferred-release async ops garbage collected |
290 | CacheOp alo=N Number of in-progress alloc_object() cache ops | ||
291 | luo=N Number of in-progress lookup_object() cache ops | ||
292 | luc=N Number of in-progress lookup_complete() cache ops | ||
293 | gro=N Number of in-progress grab_object() cache ops | ||
294 | upo=N Number of in-progress update_object() cache ops | ||
295 | dro=N Number of in-progress drop_object() cache ops | ||
296 | pto=N Number of in-progress put_object() cache ops | ||
297 | syn=N Number of in-progress sync_cache() cache ops | ||
298 | atc=N Number of in-progress attr_changed() cache ops | ||
299 | rap=N Number of in-progress read_or_alloc_page() cache ops | ||
300 | ras=N Number of in-progress read_or_alloc_pages() cache ops | ||
301 | alp=N Number of in-progress allocate_page() cache ops | ||
302 | als=N Number of in-progress allocate_pages() cache ops | ||
303 | wrp=N Number of in-progress write_page() cache ops | ||
304 | ucp=N Number of in-progress uncache_page() cache ops | ||
305 | dsp=N Number of in-progress dissociate_pages() cache ops | ||
277 | 306 | ||
278 | 307 | ||
279 | (*) /proc/fs/fscache/histogram | 308 | (*) /proc/fs/fscache/histogram |
@@ -299,6 +328,87 @@ proc files. | |||
299 | jiffy range covered, and the SECS field the equivalent number of seconds. | 328 | jiffy range covered, and the SECS field the equivalent number of seconds. |
300 | 329 | ||
301 | 330 | ||
331 | =========== | ||
332 | OBJECT LIST | ||
333 | =========== | ||
334 | |||
335 | If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a | ||
336 | list of all the objects currently allocated and allow them to be viewed | ||
337 | through: | ||
338 | |||
339 | /proc/fs/fscache/objects | ||
340 | |||
341 | This will look something like: | ||
342 | |||
343 | [root@andromeda ~]# head /proc/fs/fscache/objects | ||
344 | OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA OBJECT_KEY, AUX_DATA | ||
345 | ======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================ | ||
346 | 17e4b 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a | ||
347 | 1693a 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a | ||
348 | |||
349 | where the first set of columns before the '|' describe the object: | ||
350 | |||
351 | COLUMN DESCRIPTION | ||
352 | ======= =============================================================== | ||
353 | OBJECT Object debugging ID (appears as OBJ%x in some debug messages) | ||
354 | PARENT Debugging ID of parent object | ||
355 | STAT Object state | ||
356 | CHLDN Number of child objects of this object | ||
357 | OPS Number of outstanding operations on this object | ||
358 | OOP Number of outstanding child object management operations | ||
359 | IPR | ||
360 | EX Number of outstanding exclusive operations | ||
361 | READS Number of outstanding read operations | ||
362 | EM Object's event mask | ||
363 | EV Events raised on this object | ||
364 | F Object flags | ||
365 | S Object slow-work work item flags | ||
366 | |||
367 | and the second set of columns describe the object's cookie, if present: | ||
368 | |||
369 | COLUMN DESCRIPTION | ||
370 | =============== ======================================================= | ||
371 | NETFS_COOKIE_DEF Name of netfs cookie definition | ||
372 | TY Cookie type (IX - index, DT - data, hex - special) | ||
373 | FL Cookie flags | ||
374 | NETFS_DATA Netfs private data stored in the cookie | ||
375 | OBJECT_KEY Object key } 1 column, with separating comma | ||
376 | AUX_DATA Object aux data } presence may be configured | ||
377 | |||
378 | The data shown may be filtered by attaching the a key to an appropriate keyring | ||
379 | before viewing the file. Something like: | ||
380 | |||
381 | keyctl add user fscache:objlist <restrictions> @s | ||
382 | |||
383 | where <restrictions> are a selection of the following letters: | ||
384 | |||
385 | K Show hexdump of object key (don't show if not given) | ||
386 | A Show hexdump of object aux data (don't show if not given) | ||
387 | |||
388 | and the following paired letters: | ||
389 | |||
390 | C Show objects that have a cookie | ||
391 | c Show objects that don't have a cookie | ||
392 | B Show objects that are busy | ||
393 | b Show objects that aren't busy | ||
394 | W Show objects that have pending writes | ||
395 | w Show objects that don't have pending writes | ||
396 | R Show objects that have outstanding reads | ||
397 | r Show objects that don't have outstanding reads | ||
398 | S Show objects that have slow work queued | ||
399 | s Show objects that don't have slow work queued | ||
400 | |||
401 | If neither side of a letter pair is given, then both are implied. For example: | ||
402 | |||
403 | keyctl add user fscache:objlist KB @s | ||
404 | |||
405 | shows objects that are busy, and lists their object keys, but does not dump | ||
406 | their auxiliary data. It also implies "CcWwRrSs", but as 'B' is given, 'b' is | ||
407 | not implied. | ||
408 | |||
409 | By default all objects and all fields will be shown. | ||
410 | |||
411 | |||
302 | ========= | 412 | ========= |
303 | DEBUGGING | 413 | DEBUGGING |
304 | ========= | 414 | ========= |
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt index 2666b1ed5e9e..1902c57b72ef 100644 --- a/Documentation/filesystems/caching/netfs-api.txt +++ b/Documentation/filesystems/caching/netfs-api.txt | |||
@@ -641,7 +641,7 @@ data file must be retired (see the relinquish cookie function below). | |||
641 | 641 | ||
642 | Furthermore, note that this does not cancel the asynchronous read or write | 642 | Furthermore, note that this does not cancel the asynchronous read or write |
643 | operation started by the read/alloc and write functions, so the page | 643 | operation started by the read/alloc and write functions, so the page |
644 | invalidation and release functions must use: | 644 | invalidation functions must use: |
645 | 645 | ||
646 | bool fscache_check_page_write(struct fscache_cookie *cookie, | 646 | bool fscache_check_page_write(struct fscache_cookie *cookie, |
647 | struct page *page); | 647 | struct page *page); |
@@ -654,6 +654,25 @@ to see if a page is being written to the cache, and: | |||
654 | to wait for it to finish if it is. | 654 | to wait for it to finish if it is. |
655 | 655 | ||
656 | 656 | ||
657 | When releasepage() is being implemented, a special FS-Cache function exists to | ||
658 | manage the heuristics of coping with vmscan trying to eject pages, which may | ||
659 | conflict with the cache trying to write pages to the cache (which may itself | ||
660 | need to allocate memory): | ||
661 | |||
662 | bool fscache_maybe_release_page(struct fscache_cookie *cookie, | ||
663 | struct page *page, | ||
664 | gfp_t gfp); | ||
665 | |||
666 | This takes the netfs cookie, and the page and gfp arguments as supplied to | ||
667 | releasepage(). It will return false if the page cannot be released yet for | ||
668 | some reason and if it returns true, the page has been uncached and can now be | ||
669 | released. | ||
670 | |||
671 | To make a page available for release, this function may wait for an outstanding | ||
672 | storage request to complete, or it may attempt to cancel the storage request - | ||
673 | in which case the page will not be stored in the cache this time. | ||
674 | |||
675 | |||
657 | ========================== | 676 | ========================== |
658 | INDEX AND DATA FILE UPDATE | 677 | INDEX AND DATA FILE UPDATE |
659 | ========================== | 678 | ========================== |
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index c2a0871280a0..c58b9f5ba002 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt | |||
@@ -20,15 +20,16 @@ Lots of code taken from ext3 and other projects. | |||
20 | Authors in alphabetical order: | 20 | Authors in alphabetical order: |
21 | Joel Becker <joel.becker@oracle.com> | 21 | Joel Becker <joel.becker@oracle.com> |
22 | Zach Brown <zach.brown@oracle.com> | 22 | Zach Brown <zach.brown@oracle.com> |
23 | Mark Fasheh <mark.fasheh@oracle.com> | 23 | Mark Fasheh <mfasheh@suse.com> |
24 | Kurt Hackel <kurt.hackel@oracle.com> | 24 | Kurt Hackel <kurt.hackel@oracle.com> |
25 | Tao Ma <tao.ma@oracle.com> | ||
25 | Sunil Mushran <sunil.mushran@oracle.com> | 26 | Sunil Mushran <sunil.mushran@oracle.com> |
26 | Manish Singh <manish.singh@oracle.com> | 27 | Manish Singh <manish.singh@oracle.com> |
28 | Tiger Yang <tiger.yang@oracle.com> | ||
27 | 29 | ||
28 | Caveats | 30 | Caveats |
29 | ======= | 31 | ======= |
30 | Features which OCFS2 does not support yet: | 32 | Features which OCFS2 does not support yet: |
31 | - quotas | ||
32 | - Directory change notification (F_NOTIFY) | 33 | - Directory change notification (F_NOTIFY) |
33 | - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) | 34 | - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) |
34 | 35 | ||
@@ -70,7 +71,6 @@ commit=nrsec (*) Ocfs2 can be told to sync all its data and metadata | |||
70 | performance. | 71 | performance. |
71 | localalloc=8(*) Allows custom localalloc size in MB. If the value is too | 72 | localalloc=8(*) Allows custom localalloc size in MB. If the value is too |
72 | large, the fs will silently revert it to the default. | 73 | large, the fs will silently revert it to the default. |
73 | Localalloc is not enabled for local mounts. | ||
74 | localflocks This disables cluster aware flock. | 74 | localflocks This disables cluster aware flock. |
75 | inode64 Indicates that Ocfs2 is allowed to create inodes at | 75 | inode64 Indicates that Ocfs2 is allowed to create inodes at |
76 | any location in the filesystem, including those which | 76 | any location in the filesystem, including those which |
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 2c48f945546b..4af0018533f2 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -1072,7 +1072,8 @@ second). The meanings of the columns are as follows, from left to right: | |||
1072 | - irq: servicing interrupts | 1072 | - irq: servicing interrupts |
1073 | - softirq: servicing softirqs | 1073 | - softirq: servicing softirqs |
1074 | - steal: involuntary wait | 1074 | - steal: involuntary wait |
1075 | - guest: running a guest | 1075 | - guest: running a normal guest |
1076 | - guest_nice: running a niced guest | ||
1076 | 1077 | ||
1077 | The "intr" line gives counts of interrupts serviced since boot time, for each | 1078 | The "intr" line gives counts of interrupts serviced since boot time, for each |
1078 | of the possible system interrupts. The first column is the total of all | 1079 | of the possible system interrupts. The first column is the total of all |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9107b387e91f..fce5b5e516cc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -85,7 +85,6 @@ parameter is applicable: | |||
85 | PPT Parallel port support is enabled. | 85 | PPT Parallel port support is enabled. |
86 | PS2 Appropriate PS/2 support is enabled. | 86 | PS2 Appropriate PS/2 support is enabled. |
87 | RAM RAM disk support is enabled. | 87 | RAM RAM disk support is enabled. |
88 | ROOTPLUG The example Root Plug LSM is enabled. | ||
89 | S390 S390 architecture is enabled. | 88 | S390 S390 architecture is enabled. |
90 | SCSI Appropriate SCSI support is enabled. | 89 | SCSI Appropriate SCSI support is enabled. |
91 | A lot of drivers has their options described inside of | 90 | A lot of drivers has their options described inside of |
@@ -345,6 +344,15 @@ and is between 256 and 4096 characters. It is defined in the file | |||
345 | Change the amount of debugging information output | 344 | Change the amount of debugging information output |
346 | when initialising the APIC and IO-APIC components. | 345 | when initialising the APIC and IO-APIC components. |
347 | 346 | ||
347 | show_lapic= [APIC,X86] Advanced Programmable Interrupt Controller | ||
348 | Limit apic dumping. The parameter defines the maximal | ||
349 | number of local apics being dumped. Also it is possible | ||
350 | to set it to "all" by meaning -- no limit here. | ||
351 | Format: { 1 (default) | 2 | ... | all }. | ||
352 | The parameter valid if only apic=debug or | ||
353 | apic=verbose is specified. | ||
354 | Example: apic=debug show_lapic=all | ||
355 | |||
348 | apm= [APM] Advanced Power Management | 356 | apm= [APM] Advanced Power Management |
349 | See header of arch/x86/kernel/apm_32.c. | 357 | See header of arch/x86/kernel/apm_32.c. |
350 | 358 | ||
@@ -779,6 +787,13 @@ and is between 256 and 4096 characters. It is defined in the file | |||
779 | by the set_ftrace_notrace file in the debugfs | 787 | by the set_ftrace_notrace file in the debugfs |
780 | tracing directory. | 788 | tracing directory. |
781 | 789 | ||
790 | ftrace_graph_filter=[function-list] | ||
791 | [FTRACE] Limit the top level callers functions traced | ||
792 | by the function graph tracer at boot up. | ||
793 | function-list is a comma separated list of functions | ||
794 | that can be changed at run time by the | ||
795 | set_graph_function file in the debugfs tracing directory. | ||
796 | |||
782 | gamecon.map[2|3]= | 797 | gamecon.map[2|3]= |
783 | [HW,JOY] Multisystem joystick and NES/SNES/PSX pad | 798 | [HW,JOY] Multisystem joystick and NES/SNES/PSX pad |
784 | support via parallel port (up to 5 devices per port) | 799 | support via parallel port (up to 5 devices per port) |
@@ -2032,8 +2047,15 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2032 | 2047 | ||
2033 | print-fatal-signals= | 2048 | print-fatal-signals= |
2034 | [KNL] debug: print fatal signals | 2049 | [KNL] debug: print fatal signals |
2035 | print-fatal-signals=1: print segfault info to | 2050 | |
2036 | the kernel console. | 2051 | If enabled, warn about various signal handling |
2052 | related application anomalies: too many signals, | ||
2053 | too many POSIX.1 timers, fatal signals causing a | ||
2054 | coredump - etc. | ||
2055 | |||
2056 | If you hit the warning due to signal overflow, | ||
2057 | you might want to try "ulimit -i unlimited". | ||
2058 | |||
2037 | default: off. | 2059 | default: off. |
2038 | 2060 | ||
2039 | printk.time= Show timing data prefixed to each printk message line | 2061 | printk.time= Show timing data prefixed to each printk message line |
@@ -2164,15 +2186,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2164 | Useful for devices that are detected asynchronously | 2186 | Useful for devices that are detected asynchronously |
2165 | (e.g. USB and MMC devices). | 2187 | (e.g. USB and MMC devices). |
2166 | 2188 | ||
2167 | root_plug.vendor_id= | ||
2168 | [ROOTPLUG] Override the default vendor ID | ||
2169 | |||
2170 | root_plug.product_id= | ||
2171 | [ROOTPLUG] Override the default product ID | ||
2172 | |||
2173 | root_plug.debug= | ||
2174 | [ROOTPLUG] Enable debugging output | ||
2175 | |||
2176 | rw [KNL] Mount root device read-write on boot | 2189 | rw [KNL] Mount root device read-write on boot |
2177 | 2190 | ||
2178 | S [KNL] Run init in single mode | 2191 | S [KNL] Run init in single mode |
@@ -2182,6 +2195,8 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2182 | 2195 | ||
2183 | sbni= [NET] Granch SBNI12 leased line adapter | 2196 | sbni= [NET] Granch SBNI12 leased line adapter |
2184 | 2197 | ||
2198 | sched_debug [KNL] Enables verbose scheduler debug messages. | ||
2199 | |||
2185 | sc1200wdt= [HW,WDT] SC1200 WDT (watchdog) driver | 2200 | sc1200wdt= [HW,WDT] SC1200 WDT (watchdog) driver |
2186 | Format: <io>[,<timeout>[,<isapnp>]] | 2201 | Format: <io>[,<timeout>[,<isapnp>]] |
2187 | 2202 | ||
diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt index 059934363caf..446f43b309df 100644 --- a/Documentation/pcmcia/driver-changes.txt +++ b/Documentation/pcmcia/driver-changes.txt | |||
@@ -1,5 +1,17 @@ | |||
1 | This file details changes in 2.6 which affect PCMCIA card driver authors: | 1 | This file details changes in 2.6 which affect PCMCIA card driver authors: |
2 | 2 | ||
3 | * no cs_error / CS_CHECK / CONFIG_PCMCIA_DEBUG (as of 2.6.33) | ||
4 | Instead of the cs_error() callback or the CS_CHECK() macro, please use | ||
5 | Linux-style checking of return values, and -- if necessary -- debug | ||
6 | messages using "dev_dbg()" or "pr_debug()". | ||
7 | |||
8 | * New CIS tuple access (as of 2.6.33) | ||
9 | Instead of pcmcia_get_{first,next}_tuple(), pcmcia_get_tuple_data() and | ||
10 | pcmcia_parse_tuple(), a driver shall use "pcmcia_get_tuple()" if it is | ||
11 | only interested in one (raw) tuple, or "pcmcia_loop_tuple()" if it is | ||
12 | interested in all tuples of one type. To decode the MAC from CISTPL_FUNCE, | ||
13 | a new helper "pcmcia_get_mac_from_cis()" was added. | ||
14 | |||
3 | * New configuration loop helper (as of 2.6.28) | 15 | * New configuration loop helper (as of 2.6.28) |
4 | By calling pcmcia_loop_config(), a driver can iterate over all available | 16 | By calling pcmcia_loop_config(), a driver can iterate over all available |
5 | configuration options. During a driver's probe() phase, one doesn't need | 17 | configuration options. During a driver's probe() phase, one doesn't need |
diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index ebc50f808ea4..9dbf4470c7e1 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt | |||
@@ -41,6 +41,13 @@ expand files, provided the time taken to do so isn't too long. | |||
41 | Operations of both types may sleep during execution, thus tying up the thread | 41 | Operations of both types may sleep during execution, thus tying up the thread |
42 | loaned to it. | 42 | loaned to it. |
43 | 43 | ||
44 | A further class of work item is available, based on the slow work item class: | ||
45 | |||
46 | (*) Delayed slow work items. | ||
47 | |||
48 | These are slow work items that have a timer to defer queueing of the item for | ||
49 | a while. | ||
50 | |||
44 | 51 | ||
45 | THREAD-TO-CLASS ALLOCATION | 52 | THREAD-TO-CLASS ALLOCATION |
46 | -------------------------- | 53 | -------------------------- |
@@ -64,9 +71,11 @@ USING SLOW WORK ITEMS | |||
64 | Firstly, a module or subsystem wanting to make use of slow work items must | 71 | Firstly, a module or subsystem wanting to make use of slow work items must |
65 | register its interest: | 72 | register its interest: |
66 | 73 | ||
67 | int ret = slow_work_register_user(); | 74 | int ret = slow_work_register_user(struct module *module); |
68 | 75 | ||
69 | This will return 0 if successful, or a -ve error upon failure. | 76 | This will return 0 if successful, or a -ve error upon failure. The module |
77 | pointer should be the module interested in using this facility (almost | ||
78 | certainly THIS_MODULE). | ||
70 | 79 | ||
71 | 80 | ||
72 | Slow work items may then be set up by: | 81 | Slow work items may then be set up by: |
@@ -93,6 +102,10 @@ Slow work items may then be set up by: | |||
93 | 102 | ||
94 | or: | 103 | or: |
95 | 104 | ||
105 | delayed_slow_work_init(&myitem, &myitem_ops); | ||
106 | |||
107 | or: | ||
108 | |||
96 | vslow_work_init(&myitem, &myitem_ops); | 109 | vslow_work_init(&myitem, &myitem_ops); |
97 | 110 | ||
98 | depending on its class. | 111 | depending on its class. |
@@ -102,15 +115,92 @@ A suitably set up work item can then be enqueued for processing: | |||
102 | int ret = slow_work_enqueue(&myitem); | 115 | int ret = slow_work_enqueue(&myitem); |
103 | 116 | ||
104 | This will return a -ve error if the thread pool is unable to gain a reference | 117 | This will return a -ve error if the thread pool is unable to gain a reference |
105 | on the item, 0 otherwise. | 118 | on the item, 0 otherwise, or (for delayed work): |
119 | |||
120 | int ret = delayed_slow_work_enqueue(&myitem, my_jiffy_delay); | ||
106 | 121 | ||
107 | 122 | ||
108 | The items are reference counted, so there ought to be no need for a flush | 123 | The items are reference counted, so there ought to be no need for a flush |
109 | operation. When all a module's slow work items have been processed, and the | 124 | operation. But as the reference counting is optional, means to cancel |
125 | existing work items are also included: | ||
126 | |||
127 | cancel_slow_work(&myitem); | ||
128 | cancel_delayed_slow_work(&myitem); | ||
129 | |||
130 | can be used to cancel pending work. The above cancel function waits for | ||
131 | existing work to have been executed (or prevent execution of them, depending | ||
132 | on timing). | ||
133 | |||
134 | |||
135 | When all a module's slow work items have been processed, and the | ||
110 | module has no further interest in the facility, it should unregister its | 136 | module has no further interest in the facility, it should unregister its |
111 | interest: | 137 | interest: |
112 | 138 | ||
113 | slow_work_unregister_user(); | 139 | slow_work_unregister_user(struct module *module); |
140 | |||
141 | The module pointer is used to wait for all outstanding work items for that | ||
142 | module before completing the unregistration. This prevents the put_ref() code | ||
143 | from being taken away before it completes. module should almost certainly be | ||
144 | THIS_MODULE. | ||
145 | |||
146 | |||
147 | ================ | ||
148 | HELPER FUNCTIONS | ||
149 | ================ | ||
150 | |||
151 | The slow-work facility provides a function by which it can be determined | ||
152 | whether or not an item is queued for later execution: | ||
153 | |||
154 | bool queued = slow_work_is_queued(struct slow_work *work); | ||
155 | |||
156 | If it returns false, then the item is not on the queue (it may be executing | ||
157 | with a requeue pending). This can be used to work out whether an item on which | ||
158 | another depends is on the queue, thus allowing a dependent item to be queued | ||
159 | after it. | ||
160 | |||
161 | If the above shows an item on which another depends not to be queued, then the | ||
162 | owner of the dependent item might need to wait. However, to avoid locking up | ||
163 | the threads unnecessarily be sleeping in them, it can make sense under some | ||
164 | circumstances to return the work item to the queue, thus deferring it until | ||
165 | some other items have had a chance to make use of the yielded thread. | ||
166 | |||
167 | To yield a thread and defer an item, the work function should simply enqueue | ||
168 | the work item again and return. However, this doesn't work if there's nothing | ||
169 | actually on the queue, as the thread just vacated will jump straight back into | ||
170 | the item's work function, thus busy waiting on a CPU. | ||
171 | |||
172 | Instead, the item should use the thread to wait for the dependency to go away, | ||
173 | but rather than using schedule() or schedule_timeout() to sleep, it should use | ||
174 | the following function: | ||
175 | |||
176 | bool requeue = slow_work_sleep_till_thread_needed( | ||
177 | struct slow_work *work, | ||
178 | signed long *_timeout); | ||
179 | |||
180 | This will add a second wait and then sleep, such that it will be woken up if | ||
181 | either something appears on the queue that could usefully make use of the | ||
182 | thread - and behind which this item can be queued, or if the event the caller | ||
183 | set up to wait for happens. True will be returned if something else appeared | ||
184 | on the queue and this work function should perhaps return, of false if | ||
185 | something else woke it up. The timeout is as for schedule_timeout(). | ||
186 | |||
187 | For example: | ||
188 | |||
189 | wq = bit_waitqueue(&my_flags, MY_BIT); | ||
190 | init_wait(&wait); | ||
191 | requeue = false; | ||
192 | do { | ||
193 | prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); | ||
194 | if (!test_bit(MY_BIT, &my_flags)) | ||
195 | break; | ||
196 | requeue = slow_work_sleep_till_thread_needed(&my_work, | ||
197 | &timeout); | ||
198 | } while (timeout > 0 && !requeue); | ||
199 | finish_wait(wq, &wait); | ||
200 | if (!test_bit(MY_BIT, &my_flags) | ||
201 | goto do_my_thing; | ||
202 | if (requeue) | ||
203 | return; // to slow_work | ||
114 | 204 | ||
115 | 205 | ||
116 | =============== | 206 | =============== |
@@ -118,7 +208,8 @@ ITEM OPERATIONS | |||
118 | =============== | 208 | =============== |
119 | 209 | ||
120 | Each work item requires a table of operations of type struct slow_work_ops. | 210 | Each work item requires a table of operations of type struct slow_work_ops. |
121 | All members are required: | 211 | Only ->execute() is required; the getting and putting of a reference and the |
212 | describing of an item are all optional. | ||
122 | 213 | ||
123 | (*) Get a reference on an item: | 214 | (*) Get a reference on an item: |
124 | 215 | ||
@@ -148,6 +239,16 @@ All members are required: | |||
148 | This should perform the work required of the item. It may sleep, it may | 239 | This should perform the work required of the item. It may sleep, it may |
149 | perform disk I/O and it may wait for locks. | 240 | perform disk I/O and it may wait for locks. |
150 | 241 | ||
242 | (*) View an item through /proc: | ||
243 | |||
244 | void (*desc)(struct slow_work *work, struct seq_file *m); | ||
245 | |||
246 | If supplied, this should print to 'm' a small string describing the work | ||
247 | the item is to do. This should be no more than about 40 characters, and | ||
248 | shouldn't include a newline character. | ||
249 | |||
250 | See the 'Viewing executing and queued items' section below. | ||
251 | |||
151 | 252 | ||
152 | ================== | 253 | ================== |
153 | POOL CONFIGURATION | 254 | POOL CONFIGURATION |
@@ -172,3 +273,50 @@ The slow-work thread pool has a number of configurables: | |||
172 | is bounded to between 1 and one fewer than the number of active threads. | 273 | is bounded to between 1 and one fewer than the number of active threads. |
173 | This ensures there is always at least one thread that can process very | 274 | This ensures there is always at least one thread that can process very |
174 | slow work items, and always at least one thread that won't. | 275 | slow work items, and always at least one thread that won't. |
276 | |||
277 | |||
278 | ================================== | ||
279 | VIEWING EXECUTING AND QUEUED ITEMS | ||
280 | ================================== | ||
281 | |||
282 | If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available: | ||
283 | |||
284 | /sys/kernel/debug/slow_work/runqueue | ||
285 | |||
286 | through which the list of work items being executed and the queues of items to | ||
287 | be executed may be viewed. The owner of a work item is given the chance to | ||
288 | add some information of its own. | ||
289 | |||
290 | The contents look something like the following: | ||
291 | |||
292 | THR PID ITEM ADDR FL MARK DESC | ||
293 | === ===== ================ == ===== ========== | ||
294 | 0 3005 ffff880023f52348 a 952ms FSC: OBJ17d3: LOOK | ||
295 | 1 3006 ffff880024e33668 2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2 | ||
296 | 2 3165 ffff8800296dd180 a 424ms FSC: OBJ17e4: LOOK | ||
297 | 3 4089 ffff8800262c8d78 a 212ms FSC: OBJ17ea: CRTN | ||
298 | 4 4090 ffff88002792bed8 2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2 | ||
299 | 5 4092 ffff88002a0ef308 2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2 | ||
300 | 6 4094 ffff88002abaf4b8 2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2 | ||
301 | 7 4095 ffff88002bb188e0 a 388ms FSC: OBJ17e9: CRTN | ||
302 | vsq - ffff880023d99668 1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2 | ||
303 | vsq - ffff8800295d1740 1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2 | ||
304 | vsq - ffff880025ba3308 1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2 | ||
305 | vsq - ffff880024ec83e0 1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2 | ||
306 | vsq - ffff880026618e00 1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2 | ||
307 | vsq - ffff880025a2a4b8 1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2 | ||
308 | vsq - ffff880023cbe6d8 9 212ms FSC: OBJ17eb: LOOK | ||
309 | vsq - ffff880024d37590 9 212ms FSC: OBJ17ec: LOOK | ||
310 | vsq - ffff880027746cb0 9 212ms FSC: OBJ17ed: LOOK | ||
311 | vsq - ffff880024d37ae8 9 212ms FSC: OBJ17ee: LOOK | ||
312 | vsq - ffff880024d37cb0 9 212ms FSC: OBJ17ef: LOOK | ||
313 | vsq - ffff880025036550 9 212ms FSC: OBJ17f0: LOOK | ||
314 | vsq - ffff8800250368e0 9 212ms FSC: OBJ17f1: LOOK | ||
315 | vsq - ffff880025036aa8 9 212ms FSC: OBJ17f2: LOOK | ||
316 | |||
317 | In the 'THR' column, executing items show the thread they're occupying and | ||
318 | queued threads indicate which queue they're on. 'PID' shows the process ID of | ||
319 | a slow-work thread that's executing something. 'FL' shows the work item flags. | ||
320 | 'MARK' indicates how long since an item was queued or began executing. Lastly, | ||
321 | the 'DESC' column permits the owner of an item to give some information. | ||
322 | |||
diff --git a/Documentation/sysctl/ctl_unnumbered.txt b/Documentation/sysctl/ctl_unnumbered.txt deleted file mode 100644 index 23003a8ea3e7..000000000000 --- a/Documentation/sysctl/ctl_unnumbered.txt +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | |||
2 | Except for a few extremely rare exceptions user space applications do not use | ||
3 | the binary sysctl interface. Instead everyone uses /proc/sys/... with | ||
4 | readable ascii names. | ||
5 | |||
6 | Recently the kernel has started supporting setting the binary sysctl value to | ||
7 | CTL_UNNUMBERED so we no longer need to assign a binary sysctl path to allow | ||
8 | sysctls to show up in /proc/sys. | ||
9 | |||
10 | Assigning binary sysctl numbers is an endless source of conflicts in sysctl.h, | ||
11 | breaking of the user space ABI (because of those conflicts), and maintenance | ||
12 | problems. A complete pass through all of the sysctl users revealed multiple | ||
13 | instances where the sysctl binary interface was broken and had gone undetected | ||
14 | for years. | ||
15 | |||
16 | So please do not add new binary sysctl numbers. They are unneeded and | ||
17 | problematic. | ||
18 | |||
19 | If you really need a new binary sysctl number please first merge your sysctl | ||
20 | into the kernel and then as a separate patch allocate a binary sysctl number. | ||
21 | |||
22 | (ebiederm@xmission.com, June 2007) | ||
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 7003e10f10f5..641a1ef2a7ff 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt | |||
@@ -213,10 +213,19 @@ If you can't trace NMI functions, then skip this option. | |||
213 | <details to be filled> | 213 | <details to be filled> |
214 | 214 | ||
215 | 215 | ||
216 | HAVE_FTRACE_SYSCALLS | 216 | HAVE_SYSCALL_TRACEPOINTS |
217 | --------------------- | 217 | --------------------- |
218 | 218 | ||
219 | <details to be filled> | 219 | You need very few things to get the syscalls tracing in an arch. |
220 | |||
221 | - Have a NR_syscalls variable in <asm/unistd.h> that provides the number | ||
222 | of syscalls supported by the arch. | ||
223 | - Implement arch_syscall_addr() that resolves a syscall address from a | ||
224 | syscall number. | ||
225 | - Support the TIF_SYSCALL_TRACEPOINT thread flags | ||
226 | - Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace | ||
227 | in the ptrace syscalls tracing path. | ||
228 | - Tag this arch as HAVE_SYSCALL_TRACEPOINTS. | ||
220 | 229 | ||
221 | 230 | ||
222 | HAVE_FTRACE_MCOUNT_RECORD | 231 | HAVE_FTRACE_MCOUNT_RECORD |
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt new file mode 100644 index 000000000000..47aabeebbdf6 --- /dev/null +++ b/Documentation/trace/kprobetrace.txt | |||
@@ -0,0 +1,149 @@ | |||
1 | Kprobe-based Event Tracing | ||
2 | ========================== | ||
3 | |||
4 | Documentation is written by Masami Hiramatsu | ||
5 | |||
6 | |||
7 | Overview | ||
8 | -------- | ||
9 | These events are similar to tracepoint based events. Instead of Tracepoint, | ||
10 | this is based on kprobes (kprobe and kretprobe). So it can probe wherever | ||
11 | kprobes can probe (this means, all functions body except for __kprobes | ||
12 | functions). Unlike the Tracepoint based event, this can be added and removed | ||
13 | dynamically, on the fly. | ||
14 | |||
15 | To enable this feature, build your kernel with CONFIG_KPROBE_TRACING=y. | ||
16 | |||
17 | Similar to the events tracer, this doesn't need to be activated via | ||
18 | current_tracer. Instead of that, add probe points via | ||
19 | /sys/kernel/debug/tracing/kprobe_events, and enable it via | ||
20 | /sys/kernel/debug/tracing/events/kprobes/<EVENT>/enabled. | ||
21 | |||
22 | |||
23 | Synopsis of kprobe_events | ||
24 | ------------------------- | ||
25 | p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe | ||
26 | r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe | ||
27 | |||
28 | GRP : Group name. If omitted, use "kprobes" for it. | ||
29 | EVENT : Event name. If omitted, the event name is generated | ||
30 | based on SYMBOL+offs or MEMADDR. | ||
31 | SYMBOL[+offs] : Symbol+offset where the probe is inserted. | ||
32 | MEMADDR : Address where the probe is inserted. | ||
33 | |||
34 | FETCHARGS : Arguments. Each probe can have up to 128 args. | ||
35 | %REG : Fetch register REG | ||
36 | @ADDR : Fetch memory at ADDR (ADDR should be in kernel) | ||
37 | @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) | ||
38 | $stackN : Fetch Nth entry of stack (N >= 0) | ||
39 | $stack : Fetch stack address. | ||
40 | $argN : Fetch function argument. (N >= 0)(*) | ||
41 | $retval : Fetch return value.(**) | ||
42 | +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***) | ||
43 | NAME=FETCHARG: Set NAME as the argument name of FETCHARG. | ||
44 | |||
45 | (*) aN may not correct on asmlinkaged functions and at the middle of | ||
46 | function body. | ||
47 | (**) only for return probe. | ||
48 | (***) this is useful for fetching a field of data structures. | ||
49 | |||
50 | |||
51 | Per-Probe Event Filtering | ||
52 | ------------------------- | ||
53 | Per-probe event filtering feature allows you to set different filter on each | ||
54 | probe and gives you what arguments will be shown in trace buffer. If an event | ||
55 | name is specified right after 'p:' or 'r:' in kprobe_events, it adds an event | ||
56 | under tracing/events/kprobes/<EVENT>, at the directory you can see 'id', | ||
57 | 'enabled', 'format' and 'filter'. | ||
58 | |||
59 | enabled: | ||
60 | You can enable/disable the probe by writing 1 or 0 on it. | ||
61 | |||
62 | format: | ||
63 | This shows the format of this probe event. | ||
64 | |||
65 | filter: | ||
66 | You can write filtering rules of this event. | ||
67 | |||
68 | id: | ||
69 | This shows the id of this probe event. | ||
70 | |||
71 | |||
72 | Event Profiling | ||
73 | --------------- | ||
74 | You can check the total number of probe hits and probe miss-hits via | ||
75 | /sys/kernel/debug/tracing/kprobe_profile. | ||
76 | The first column is event name, the second is the number of probe hits, | ||
77 | the third is the number of probe miss-hits. | ||
78 | |||
79 | |||
80 | Usage examples | ||
81 | -------------- | ||
82 | To add a probe as a new event, write a new definition to kprobe_events | ||
83 | as below. | ||
84 | |||
85 | echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events | ||
86 | |||
87 | This sets a kprobe on the top of do_sys_open() function with recording | ||
88 | 1st to 4th arguments as "myprobe" event. As this example shows, users can | ||
89 | choose more familiar names for each arguments. | ||
90 | |||
91 | echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events | ||
92 | |||
93 | This sets a kretprobe on the return point of do_sys_open() function with | ||
94 | recording return value as "myretprobe" event. | ||
95 | You can see the format of these events via | ||
96 | /sys/kernel/debug/tracing/events/kprobes/<EVENT>/format. | ||
97 | |||
98 | cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format | ||
99 | name: myprobe | ||
100 | ID: 75 | ||
101 | format: | ||
102 | field:unsigned short common_type; offset:0; size:2; | ||
103 | field:unsigned char common_flags; offset:2; size:1; | ||
104 | field:unsigned char common_preempt_count; offset:3; size:1; | ||
105 | field:int common_pid; offset:4; size:4; | ||
106 | field:int common_tgid; offset:8; size:4; | ||
107 | |||
108 | field: unsigned long ip; offset:16;tsize:8; | ||
109 | field: int nargs; offset:24;tsize:4; | ||
110 | field: unsigned long dfd; offset:32;tsize:8; | ||
111 | field: unsigned long filename; offset:40;tsize:8; | ||
112 | field: unsigned long flags; offset:48;tsize:8; | ||
113 | field: unsigned long mode; offset:56;tsize:8; | ||
114 | |||
115 | print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode | ||
116 | |||
117 | |||
118 | You can see that the event has 4 arguments as in the expressions you specified. | ||
119 | |||
120 | echo > /sys/kernel/debug/tracing/kprobe_events | ||
121 | |||
122 | This clears all probe points. | ||
123 | |||
124 | Right after definition, each event is disabled by default. For tracing these | ||
125 | events, you need to enable it. | ||
126 | |||
127 | echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable | ||
128 | echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable | ||
129 | |||
130 | And you can see the traced information via /sys/kernel/debug/tracing/trace. | ||
131 | |||
132 | cat /sys/kernel/debug/tracing/trace | ||
133 | # tracer: nop | ||
134 | # | ||
135 | # TASK-PID CPU# TIMESTAMP FUNCTION | ||
136 | # | | | | | | ||
137 | <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0 | ||
138 | <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $retval=fffffffffffffffe | ||
139 | <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6 | ||
140 | <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3 | ||
141 | <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10 | ||
142 | <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3 | ||
143 | |||
144 | |||
145 | Each line shows when the kernel hits an event, and <- SYMBOL means kernel | ||
146 | returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel | ||
147 | returns from do_sys_open to sys_open+0x1b). | ||
148 | |||
149 | |||