diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/arm/OMAP/omap_pm | 129 | ||||
-rw-r--r-- | Documentation/cpu-freq/user-guide.txt | 9 | ||||
-rw-r--r-- | Documentation/dontdiff | 1 | ||||
-rw-r--r-- | Documentation/feature-removal-schedule.txt | 10 | ||||
-rw-r--r-- | Documentation/hwmon/pcf8591 | 28 | ||||
-rw-r--r-- | Documentation/hwmon/tmp421 | 36 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 4 | ||||
-rw-r--r-- | Documentation/trace/events.txt | 184 | ||||
-rw-r--r-- | Documentation/trace/ftrace-design.txt | 233 | ||||
-rw-r--r-- | Documentation/trace/ftrace.txt | 6 |
10 files changed, 613 insertions, 27 deletions
diff --git a/Documentation/arm/OMAP/omap_pm b/Documentation/arm/OMAP/omap_pm new file mode 100644 index 000000000000..5389440aade3 --- /dev/null +++ b/Documentation/arm/OMAP/omap_pm | |||
@@ -0,0 +1,129 @@ | |||
1 | |||
2 | The OMAP PM interface | ||
3 | ===================== | ||
4 | |||
5 | This document describes the temporary OMAP PM interface. Driver | ||
6 | authors use these functions to communicate minimum latency or | ||
7 | throughput constraints to the kernel power management code. | ||
8 | Over time, the intention is to merge features from the OMAP PM | ||
9 | interface into the Linux PM QoS code. | ||
10 | |||
11 | Drivers need to express PM parameters which: | ||
12 | |||
13 | - support the range of power management parameters present in the TI SRF; | ||
14 | |||
15 | - separate the drivers from the underlying PM parameter | ||
16 | implementation, whether it is the TI SRF or Linux PM QoS or Linux | ||
17 | latency framework or something else; | ||
18 | |||
19 | - specify PM parameters in terms of fundamental units, such as | ||
20 | latency and throughput, rather than units which are specific to OMAP | ||
21 | or to particular OMAP variants; | ||
22 | |||
23 | - allow drivers which are shared with other architectures (e.g., | ||
24 | DaVinci) to add these constraints in a way which won't affect non-OMAP | ||
25 | systems, | ||
26 | |||
27 | - can be implemented immediately with minimal disruption of other | ||
28 | architectures. | ||
29 | |||
30 | |||
31 | This document proposes the OMAP PM interface, including the following | ||
32 | five power management functions for driver code: | ||
33 | |||
34 | 1. Set the maximum MPU wakeup latency: | ||
35 | (*pdata->set_max_mpu_wakeup_lat)(struct device *dev, unsigned long t) | ||
36 | |||
37 | 2. Set the maximum device wakeup latency: | ||
38 | (*pdata->set_max_dev_wakeup_lat)(struct device *dev, unsigned long t) | ||
39 | |||
40 | 3. Set the maximum system DMA transfer start latency (CORE pwrdm): | ||
41 | (*pdata->set_max_sdma_lat)(struct device *dev, long t) | ||
42 | |||
43 | 4. Set the minimum bus throughput needed by a device: | ||
44 | (*pdata->set_min_bus_tput)(struct device *dev, u8 agent_id, unsigned long r) | ||
45 | |||
46 | 5. Return the number of times the device has lost context | ||
47 | (*pdata->get_dev_context_loss_count)(struct device *dev) | ||
48 | |||
49 | |||
50 | Further documentation for all OMAP PM interface functions can be | ||
51 | found in arch/arm/plat-omap/include/mach/omap-pm.h. | ||
52 | |||
53 | |||
54 | The OMAP PM layer is intended to be temporary | ||
55 | --------------------------------------------- | ||
56 | |||
57 | The intention is that eventually the Linux PM QoS layer should support | ||
58 | the range of power management features present in OMAP3. As this | ||
59 | happens, existing drivers using the OMAP PM interface can be modified | ||
60 | to use the Linux PM QoS code; and the OMAP PM interface can disappear. | ||
61 | |||
62 | |||
63 | Driver usage of the OMAP PM functions | ||
64 | ------------------------------------- | ||
65 | |||
66 | As the 'pdata' in the above examples indicates, these functions are | ||
67 | exposed to drivers through function pointers in driver .platform_data | ||
68 | structures. The function pointers are initialized by the board-*.c | ||
69 | files to point to the corresponding OMAP PM functions: | ||
70 | .set_max_dev_wakeup_lat will point to | ||
71 | omap_pm_set_max_dev_wakeup_lat(), etc. Other architectures which do | ||
72 | not support these functions should leave these function pointers set | ||
73 | to NULL. Drivers should use the following idiom: | ||
74 | |||
75 | if (pdata->set_max_dev_wakeup_lat) | ||
76 | (*pdata->set_max_dev_wakeup_lat)(dev, t); | ||
77 | |||
78 | The most common usage of these functions will probably be to specify | ||
79 | the maximum time from when an interrupt occurs, to when the device | ||
80 | becomes accessible. To accomplish this, driver writers should use the | ||
81 | set_max_mpu_wakeup_lat() function to to constrain the MPU wakeup | ||
82 | latency, and the set_max_dev_wakeup_lat() function to constrain the | ||
83 | device wakeup latency (from clk_enable() to accessibility). For | ||
84 | example, | ||
85 | |||
86 | /* Limit MPU wakeup latency */ | ||
87 | if (pdata->set_max_mpu_wakeup_lat) | ||
88 | (*pdata->set_max_mpu_wakeup_lat)(dev, tc); | ||
89 | |||
90 | /* Limit device powerdomain wakeup latency */ | ||
91 | if (pdata->set_max_dev_wakeup_lat) | ||
92 | (*pdata->set_max_dev_wakeup_lat)(dev, td); | ||
93 | |||
94 | /* total wakeup latency in this example: (tc + td) */ | ||
95 | |||
96 | The PM parameters can be overwritten by calling the function again | ||
97 | with the new value. The settings can be removed by calling the | ||
98 | function with a t argument of -1 (except in the case of | ||
99 | set_max_bus_tput(), which should be called with an r argument of 0). | ||
100 | |||
101 | The fifth function above, omap_pm_get_dev_context_loss_count(), | ||
102 | is intended as an optimization to allow drivers to determine whether the | ||
103 | device has lost its internal context. If context has been lost, the | ||
104 | driver must restore its internal context before proceeding. | ||
105 | |||
106 | |||
107 | Other specialized interface functions | ||
108 | ------------------------------------- | ||
109 | |||
110 | The five functions listed above are intended to be usable by any | ||
111 | device driver. DSPBridge and CPUFreq have a few special requirements. | ||
112 | DSPBridge expresses target DSP performance levels in terms of OPP IDs. | ||
113 | CPUFreq expresses target MPU performance levels in terms of MPU | ||
114 | frequency. The OMAP PM interface contains functions for these | ||
115 | specialized cases to convert that input information (OPPs/MPU | ||
116 | frequency) into the form that the underlying power management | ||
117 | implementation needs: | ||
118 | |||
119 | 6. (*pdata->dsp_get_opp_table)(void) | ||
120 | |||
121 | 7. (*pdata->dsp_set_min_opp)(u8 opp_id) | ||
122 | |||
123 | 8. (*pdata->dsp_get_opp)(void) | ||
124 | |||
125 | 9. (*pdata->cpu_get_freq_table)(void) | ||
126 | |||
127 | 10. (*pdata->cpu_set_freq)(unsigned long f) | ||
128 | |||
129 | 11. (*pdata->cpu_get_freq)(void) | ||
diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index 5d5f5fadd1c2..2a5b850847c0 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt | |||
@@ -176,7 +176,9 @@ scaling_governor, and by "echoing" the name of another | |||
176 | work on some specific architectures or | 176 | work on some specific architectures or |
177 | processors. | 177 | processors. |
178 | 178 | ||
179 | cpuinfo_cur_freq : Current speed of the CPU, in KHz. | 179 | cpuinfo_cur_freq : Current frequency of the CPU as obtained from |
180 | the hardware, in KHz. This is the frequency | ||
181 | the CPU actually runs at. | ||
180 | 182 | ||
181 | scaling_available_frequencies : List of available frequencies, in KHz. | 183 | scaling_available_frequencies : List of available frequencies, in KHz. |
182 | 184 | ||
@@ -196,7 +198,10 @@ related_cpus : List of CPUs that need some sort of frequency | |||
196 | 198 | ||
197 | scaling_driver : Hardware driver for cpufreq. | 199 | scaling_driver : Hardware driver for cpufreq. |
198 | 200 | ||
199 | scaling_cur_freq : Current frequency of the CPU, in KHz. | 201 | scaling_cur_freq : Current frequency of the CPU as determined by |
202 | the governor and cpufreq core, in KHz. This is | ||
203 | the frequency the kernel thinks the CPU runs | ||
204 | at. | ||
200 | 205 | ||
201 | If you have selected the "userspace" governor which allows you to | 206 | If you have selected the "userspace" governor which allows you to |
202 | set the CPU operating frequency to a specific value, you can read out | 207 | set the CPU operating frequency to a specific value, you can read out |
diff --git a/Documentation/dontdiff b/Documentation/dontdiff index 88519daab6e9..e1efc400bed6 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff | |||
@@ -152,7 +152,6 @@ piggy.gz | |||
152 | piggyback | 152 | piggyback |
153 | pnmtologo | 153 | pnmtologo |
154 | ppc_defs.h* | 154 | ppc_defs.h* |
155 | promcon_tbl.c | ||
156 | pss_boot.h | 155 | pss_boot.h |
157 | qconf | 156 | qconf |
158 | raid6altivec*.c | 157 | raid6altivec*.c |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 503d21216d58..fa75220f8d34 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -428,16 +428,6 @@ Who: Johannes Berg <johannes@sipsolutions.net> | |||
428 | 428 | ||
429 | ---------------------------- | 429 | ---------------------------- |
430 | 430 | ||
431 | What: CONFIG_X86_OLD_MCE | ||
432 | When: 2.6.32 | ||
433 | Why: Remove the old legacy 32bit machine check code. This has been | ||
434 | superseded by the newer machine check code from the 64bit port, | ||
435 | but the old version has been kept around for easier testing. Note this | ||
436 | doesn't impact the old P5 and WinChip machine check handlers. | ||
437 | Who: Andi Kleen <andi@firstfloor.org> | ||
438 | |||
439 | ---------------------------- | ||
440 | |||
441 | What: lock_policy_rwsem_* and unlock_policy_rwsem_* will not be | 431 | What: lock_policy_rwsem_* and unlock_policy_rwsem_* will not be |
442 | exported interface anymore. | 432 | exported interface anymore. |
443 | When: 2.6.33 | 433 | When: 2.6.33 |
diff --git a/Documentation/hwmon/pcf8591 b/Documentation/hwmon/pcf8591 index 5628fcf4207f..e76a7892f68e 100644 --- a/Documentation/hwmon/pcf8591 +++ b/Documentation/hwmon/pcf8591 | |||
@@ -2,11 +2,11 @@ Kernel driver pcf8591 | |||
2 | ===================== | 2 | ===================== |
3 | 3 | ||
4 | Supported chips: | 4 | Supported chips: |
5 | * Philips PCF8591 | 5 | * Philips/NXP PCF8591 |
6 | Prefix: 'pcf8591' | 6 | Prefix: 'pcf8591' |
7 | Addresses scanned: I2C 0x48 - 0x4f | 7 | Addresses scanned: I2C 0x48 - 0x4f |
8 | Datasheet: Publicly available at the Philips Semiconductor website | 8 | Datasheet: Publicly available at the NXP website |
9 | http://www.semiconductors.philips.com/pip/PCF8591P.html | 9 | http://www.nxp.com/pip/PCF8591_6.html |
10 | 10 | ||
11 | Authors: | 11 | Authors: |
12 | Aurelien Jarno <aurelien@aurel32.net> | 12 | Aurelien Jarno <aurelien@aurel32.net> |
@@ -16,9 +16,10 @@ Authors: | |||
16 | 16 | ||
17 | Description | 17 | Description |
18 | ----------- | 18 | ----------- |
19 | |||
19 | The PCF8591 is an 8-bit A/D and D/A converter (4 analog inputs and one | 20 | The PCF8591 is an 8-bit A/D and D/A converter (4 analog inputs and one |
20 | analog output) for the I2C bus produced by Philips Semiconductors. It | 21 | analog output) for the I2C bus produced by Philips Semiconductors (now NXP). |
21 | is designed to provide a byte I2C interface to up to 4 separate devices. | 22 | It is designed to provide a byte I2C interface to up to 4 separate devices. |
22 | 23 | ||
23 | The PCF8591 has 4 analog inputs programmable as single-ended or | 24 | The PCF8591 has 4 analog inputs programmable as single-ended or |
24 | differential inputs : | 25 | differential inputs : |
@@ -58,8 +59,8 @@ Accessing PCF8591 via /sys interface | |||
58 | ------------------------------------- | 59 | ------------------------------------- |
59 | 60 | ||
60 | ! Be careful ! | 61 | ! Be careful ! |
61 | The PCF8591 is plainly impossible to detect ! Stupid chip. | 62 | The PCF8591 is plainly impossible to detect! Stupid chip. |
62 | So every chip with address in the interval [48..4f] is | 63 | So every chip with address in the interval [0x48..0x4f] is |
63 | detected as PCF8591. If you have other chips in this address | 64 | detected as PCF8591. If you have other chips in this address |
64 | range, the workaround is to load this module after the one | 65 | range, the workaround is to load this module after the one |
65 | for your others chips. | 66 | for your others chips. |
@@ -67,19 +68,20 @@ for your others chips. | |||
67 | On detection (i.e. insmod, modprobe et al.), directories are being | 68 | On detection (i.e. insmod, modprobe et al.), directories are being |
68 | created for each detected PCF8591: | 69 | created for each detected PCF8591: |
69 | 70 | ||
70 | /sys/bus/devices/<0>-<1>/ | 71 | /sys/bus/i2c/devices/<0>-<1>/ |
71 | where <0> is the bus the chip was detected on (e. g. i2c-0) | 72 | where <0> is the bus the chip was detected on (e. g. i2c-0) |
72 | and <1> the chip address ([48..4f]) | 73 | and <1> the chip address ([48..4f]) |
73 | 74 | ||
74 | Inside these directories, there are such files: | 75 | Inside these directories, there are such files: |
75 | in0, in1, in2, in3, out0_enable, out0_output, name | 76 | in0_input, in1_input, in2_input, in3_input, out0_enable, out0_output, name |
76 | 77 | ||
77 | Name contains chip name. | 78 | Name contains chip name. |
78 | 79 | ||
79 | The in0, in1, in2 and in3 files are RO. Reading gives the value of the | 80 | The in0_input, in1_input, in2_input and in3_input files are RO. Reading gives |
80 | corresponding channel. Depending on the current analog inputs configuration, | 81 | the value of the corresponding channel. Depending on the current analog inputs |
81 | files in2 and/or in3 do not exist. Values range are from 0 to 255 for single | 82 | configuration, files in2_input and in3_input may not exist. Values range |
82 | ended inputs and -128 to +127 for differential inputs (8-bit ADC). | 83 | from 0 to 255 for single ended inputs and -128 to +127 for differential inputs |
84 | (8-bit ADC). | ||
83 | 85 | ||
84 | The out0_enable file is RW. Reading gives "1" for analog output enabled and | 86 | The out0_enable file is RW. Reading gives "1" for analog output enabled and |
85 | "0" for analog output disabled. Writing accepts "0" and "1" accordingly. | 87 | "0" for analog output disabled. Writing accepts "0" and "1" accordingly. |
diff --git a/Documentation/hwmon/tmp421 b/Documentation/hwmon/tmp421 new file mode 100644 index 000000000000..0cf07f824741 --- /dev/null +++ b/Documentation/hwmon/tmp421 | |||
@@ -0,0 +1,36 @@ | |||
1 | Kernel driver tmp421 | ||
2 | ==================== | ||
3 | |||
4 | Supported chips: | ||
5 | * Texas Instruments TMP421 | ||
6 | Prefix: 'tmp421' | ||
7 | Addresses scanned: I2C 0x2a, 0x4c, 0x4d, 0x4e and 0x4f | ||
8 | Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp421.html | ||
9 | * Texas Instruments TMP422 | ||
10 | Prefix: 'tmp422' | ||
11 | Addresses scanned: I2C 0x2a, 0x4c, 0x4d, 0x4e and 0x4f | ||
12 | Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp421.html | ||
13 | * Texas Instruments TMP423 | ||
14 | Prefix: 'tmp423' | ||
15 | Addresses scanned: I2C 0x2a, 0x4c, 0x4d, 0x4e and 0x4f | ||
16 | Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp421.html | ||
17 | |||
18 | Authors: | ||
19 | Andre Prendel <andre.prendel@gmx.de> | ||
20 | |||
21 | Description | ||
22 | ----------- | ||
23 | |||
24 | This driver implements support for Texas Instruments TMP421, TMP422 | ||
25 | and TMP423 temperature sensor chips. These chips implement one local | ||
26 | and up to one (TMP421), up to two (TMP422) or up to three (TMP423) | ||
27 | remote sensors. Temperature is measured in degrees Celsius. The chips | ||
28 | are wired over I2C/SMBus and specified over a temperature range of -40 | ||
29 | to +125 degrees Celsius. Resolution for both the local and remote | ||
30 | channels is 0.0625 degree C. | ||
31 | |||
32 | The chips support only temperature measurement. The driver exports | ||
33 | the temperature values via the following sysfs files: | ||
34 | |||
35 | temp[1-4]_input | ||
36 | temp[2-4]_fault | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4c12a290bee5..f45d0d8e71d8 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1286,6 +1286,10 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1286 | (machvec) in a generic kernel. | 1286 | (machvec) in a generic kernel. |
1287 | Example: machvec=hpzx1_swiotlb | 1287 | Example: machvec=hpzx1_swiotlb |
1288 | 1288 | ||
1289 | machtype= [Loongson] Share the same kernel image file between different | ||
1290 | yeeloong laptop. | ||
1291 | Example: machtype=lemote-yeeloong-2f-7inch | ||
1292 | |||
1289 | max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater | 1293 | max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater |
1290 | than or equal to this physical address is ignored. | 1294 | than or equal to this physical address is ignored. |
1291 | 1295 | ||
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt index 90e8b3383ba2..78c45a87be57 100644 --- a/Documentation/trace/events.txt +++ b/Documentation/trace/events.txt | |||
@@ -1,7 +1,7 @@ | |||
1 | Event Tracing | 1 | Event Tracing |
2 | 2 | ||
3 | Documentation written by Theodore Ts'o | 3 | Documentation written by Theodore Ts'o |
4 | Updated by Li Zefan | 4 | Updated by Li Zefan and Tom Zanussi |
5 | 5 | ||
6 | 1. Introduction | 6 | 1. Introduction |
7 | =============== | 7 | =============== |
@@ -97,3 +97,185 @@ The format of this boot option is the same as described in section 2.1. | |||
97 | 97 | ||
98 | See The example provided in samples/trace_events | 98 | See The example provided in samples/trace_events |
99 | 99 | ||
100 | 4. Event formats | ||
101 | ================ | ||
102 | |||
103 | Each trace event has a 'format' file associated with it that contains | ||
104 | a description of each field in a logged event. This information can | ||
105 | be used to parse the binary trace stream, and is also the place to | ||
106 | find the field names that can be used in event filters (see section 5). | ||
107 | |||
108 | It also displays the format string that will be used to print the | ||
109 | event in text mode, along with the event name and ID used for | ||
110 | profiling. | ||
111 | |||
112 | Every event has a set of 'common' fields associated with it; these are | ||
113 | the fields prefixed with 'common_'. The other fields vary between | ||
114 | events and correspond to the fields defined in the TRACE_EVENT | ||
115 | definition for that event. | ||
116 | |||
117 | Each field in the format has the form: | ||
118 | |||
119 | field:field-type field-name; offset:N; size:N; | ||
120 | |||
121 | where offset is the offset of the field in the trace record and size | ||
122 | is the size of the data item, in bytes. | ||
123 | |||
124 | For example, here's the information displayed for the 'sched_wakeup' | ||
125 | event: | ||
126 | |||
127 | # cat /debug/tracing/events/sched/sched_wakeup/format | ||
128 | |||
129 | name: sched_wakeup | ||
130 | ID: 60 | ||
131 | format: | ||
132 | field:unsigned short common_type; offset:0; size:2; | ||
133 | field:unsigned char common_flags; offset:2; size:1; | ||
134 | field:unsigned char common_preempt_count; offset:3; size:1; | ||
135 | field:int common_pid; offset:4; size:4; | ||
136 | field:int common_tgid; offset:8; size:4; | ||
137 | |||
138 | field:char comm[TASK_COMM_LEN]; offset:12; size:16; | ||
139 | field:pid_t pid; offset:28; size:4; | ||
140 | field:int prio; offset:32; size:4; | ||
141 | field:int success; offset:36; size:4; | ||
142 | field:int cpu; offset:40; size:4; | ||
143 | |||
144 | print fmt: "task %s:%d [%d] success=%d [%03d]", REC->comm, REC->pid, | ||
145 | REC->prio, REC->success, REC->cpu | ||
146 | |||
147 | This event contains 10 fields, the first 5 common and the remaining 5 | ||
148 | event-specific. All the fields for this event are numeric, except for | ||
149 | 'comm' which is a string, a distinction important for event filtering. | ||
150 | |||
151 | 5. Event filtering | ||
152 | ================== | ||
153 | |||
154 | Trace events can be filtered in the kernel by associating boolean | ||
155 | 'filter expressions' with them. As soon as an event is logged into | ||
156 | the trace buffer, its fields are checked against the filter expression | ||
157 | associated with that event type. An event with field values that | ||
158 | 'match' the filter will appear in the trace output, and an event whose | ||
159 | values don't match will be discarded. An event with no filter | ||
160 | associated with it matches everything, and is the default when no | ||
161 | filter has been set for an event. | ||
162 | |||
163 | 5.1 Expression syntax | ||
164 | --------------------- | ||
165 | |||
166 | A filter expression consists of one or more 'predicates' that can be | ||
167 | combined using the logical operators '&&' and '||'. A predicate is | ||
168 | simply a clause that compares the value of a field contained within a | ||
169 | logged event with a constant value and returns either 0 or 1 depending | ||
170 | on whether the field value matched (1) or didn't match (0): | ||
171 | |||
172 | field-name relational-operator value | ||
173 | |||
174 | Parentheses can be used to provide arbitrary logical groupings and | ||
175 | double-quotes can be used to prevent the shell from interpreting | ||
176 | operators as shell metacharacters. | ||
177 | |||
178 | The field-names available for use in filters can be found in the | ||
179 | 'format' files for trace events (see section 4). | ||
180 | |||
181 | The relational-operators depend on the type of the field being tested: | ||
182 | |||
183 | The operators available for numeric fields are: | ||
184 | |||
185 | ==, !=, <, <=, >, >= | ||
186 | |||
187 | And for string fields they are: | ||
188 | |||
189 | ==, != | ||
190 | |||
191 | Currently, only exact string matches are supported. | ||
192 | |||
193 | Currently, the maximum number of predicates in a filter is 16. | ||
194 | |||
195 | 5.2 Setting filters | ||
196 | ------------------- | ||
197 | |||
198 | A filter for an individual event is set by writing a filter expression | ||
199 | to the 'filter' file for the given event. | ||
200 | |||
201 | For example: | ||
202 | |||
203 | # cd /debug/tracing/events/sched/sched_wakeup | ||
204 | # echo "common_preempt_count > 4" > filter | ||
205 | |||
206 | A slightly more involved example: | ||
207 | |||
208 | # cd /debug/tracing/events/sched/sched_signal_send | ||
209 | # echo "((sig >= 10 && sig < 15) || sig == 17) && comm != bash" > filter | ||
210 | |||
211 | If there is an error in the expression, you'll get an 'Invalid | ||
212 | argument' error when setting it, and the erroneous string along with | ||
213 | an error message can be seen by looking at the filter e.g.: | ||
214 | |||
215 | # cd /debug/tracing/events/sched/sched_signal_send | ||
216 | # echo "((sig >= 10 && sig < 15) || dsig == 17) && comm != bash" > filter | ||
217 | -bash: echo: write error: Invalid argument | ||
218 | # cat filter | ||
219 | ((sig >= 10 && sig < 15) || dsig == 17) && comm != bash | ||
220 | ^ | ||
221 | parse_error: Field not found | ||
222 | |||
223 | Currently the caret ('^') for an error always appears at the beginning of | ||
224 | the filter string; the error message should still be useful though | ||
225 | even without more accurate position info. | ||
226 | |||
227 | 5.3 Clearing filters | ||
228 | -------------------- | ||
229 | |||
230 | To clear the filter for an event, write a '0' to the event's filter | ||
231 | file. | ||
232 | |||
233 | To clear the filters for all events in a subsystem, write a '0' to the | ||
234 | subsystem's filter file. | ||
235 | |||
236 | 5.3 Subsystem filters | ||
237 | --------------------- | ||
238 | |||
239 | For convenience, filters for every event in a subsystem can be set or | ||
240 | cleared as a group by writing a filter expression into the filter file | ||
241 | at the root of the subsytem. Note however, that if a filter for any | ||
242 | event within the subsystem lacks a field specified in the subsystem | ||
243 | filter, or if the filter can't be applied for any other reason, the | ||
244 | filter for that event will retain its previous setting. This can | ||
245 | result in an unintended mixture of filters which could lead to | ||
246 | confusing (to the user who might think different filters are in | ||
247 | effect) trace output. Only filters that reference just the common | ||
248 | fields can be guaranteed to propagate successfully to all events. | ||
249 | |||
250 | Here are a few subsystem filter examples that also illustrate the | ||
251 | above points: | ||
252 | |||
253 | Clear the filters on all events in the sched subsytem: | ||
254 | |||
255 | # cd /sys/kernel/debug/tracing/events/sched | ||
256 | # echo 0 > filter | ||
257 | # cat sched_switch/filter | ||
258 | none | ||
259 | # cat sched_wakeup/filter | ||
260 | none | ||
261 | |||
262 | Set a filter using only common fields for all events in the sched | ||
263 | subsytem (all events end up with the same filter): | ||
264 | |||
265 | # cd /sys/kernel/debug/tracing/events/sched | ||
266 | # echo common_pid == 0 > filter | ||
267 | # cat sched_switch/filter | ||
268 | common_pid == 0 | ||
269 | # cat sched_wakeup/filter | ||
270 | common_pid == 0 | ||
271 | |||
272 | Attempt to set a filter using a non-common field for all events in the | ||
273 | sched subsytem (all events but those that have a prev_pid field retain | ||
274 | their old filters): | ||
275 | |||
276 | # cd /sys/kernel/debug/tracing/events/sched | ||
277 | # echo prev_pid == 0 > filter | ||
278 | # cat sched_switch/filter | ||
279 | prev_pid == 0 | ||
280 | # cat sched_wakeup/filter | ||
281 | common_pid == 0 | ||
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt new file mode 100644 index 000000000000..7003e10f10f5 --- /dev/null +++ b/Documentation/trace/ftrace-design.txt | |||
@@ -0,0 +1,233 @@ | |||
1 | function tracer guts | ||
2 | ==================== | ||
3 | |||
4 | Introduction | ||
5 | ------------ | ||
6 | |||
7 | Here we will cover the architecture pieces that the common function tracing | ||
8 | code relies on for proper functioning. Things are broken down into increasing | ||
9 | complexity so that you can start simple and at least get basic functionality. | ||
10 | |||
11 | Note that this focuses on architecture implementation details only. If you | ||
12 | want more explanation of a feature in terms of common code, review the common | ||
13 | ftrace.txt file. | ||
14 | |||
15 | |||
16 | Prerequisites | ||
17 | ------------- | ||
18 | |||
19 | Ftrace relies on these features being implemented: | ||
20 | STACKTRACE_SUPPORT - implement save_stack_trace() | ||
21 | TRACE_IRQFLAGS_SUPPORT - implement include/asm/irqflags.h | ||
22 | |||
23 | |||
24 | HAVE_FUNCTION_TRACER | ||
25 | -------------------- | ||
26 | |||
27 | You will need to implement the mcount and the ftrace_stub functions. | ||
28 | |||
29 | The exact mcount symbol name will depend on your toolchain. Some call it | ||
30 | "mcount", "_mcount", or even "__mcount". You can probably figure it out by | ||
31 | running something like: | ||
32 | $ echo 'main(){}' | gcc -x c -S -o - - -pg | grep mcount | ||
33 | call mcount | ||
34 | We'll make the assumption below that the symbol is "mcount" just to keep things | ||
35 | nice and simple in the examples. | ||
36 | |||
37 | Keep in mind that the ABI that is in effect inside of the mcount function is | ||
38 | *highly* architecture/toolchain specific. We cannot help you in this regard, | ||
39 | sorry. Dig up some old documentation and/or find someone more familiar than | ||
40 | you to bang ideas off of. Typically, register usage (argument/scratch/etc...) | ||
41 | is a major issue at this point, especially in relation to the location of the | ||
42 | mcount call (before/after function prologue). You might also want to look at | ||
43 | how glibc has implemented the mcount function for your architecture. It might | ||
44 | be (semi-)relevant. | ||
45 | |||
46 | The mcount function should check the function pointer ftrace_trace_function | ||
47 | to see if it is set to ftrace_stub. If it is, there is nothing for you to do, | ||
48 | so return immediately. If it isn't, then call that function in the same way | ||
49 | the mcount function normally calls __mcount_internal -- the first argument is | ||
50 | the "frompc" while the second argument is the "selfpc" (adjusted to remove the | ||
51 | size of the mcount call that is embedded in the function). | ||
52 | |||
53 | For example, if the function foo() calls bar(), when the bar() function calls | ||
54 | mcount(), the arguments mcount() will pass to the tracer are: | ||
55 | "frompc" - the address bar() will use to return to foo() | ||
56 | "selfpc" - the address bar() (with _mcount() size adjustment) | ||
57 | |||
58 | Also keep in mind that this mcount function will be called *a lot*, so | ||
59 | optimizing for the default case of no tracer will help the smooth running of | ||
60 | your system when tracing is disabled. So the start of the mcount function is | ||
61 | typically the bare min with checking things before returning. That also means | ||
62 | the code flow should usually kept linear (i.e. no branching in the nop case). | ||
63 | This is of course an optimization and not a hard requirement. | ||
64 | |||
65 | Here is some pseudo code that should help (these functions should actually be | ||
66 | implemented in assembly): | ||
67 | |||
68 | void ftrace_stub(void) | ||
69 | { | ||
70 | return; | ||
71 | } | ||
72 | |||
73 | void mcount(void) | ||
74 | { | ||
75 | /* save any bare state needed in order to do initial checking */ | ||
76 | |||
77 | extern void (*ftrace_trace_function)(unsigned long, unsigned long); | ||
78 | if (ftrace_trace_function != ftrace_stub) | ||
79 | goto do_trace; | ||
80 | |||
81 | /* restore any bare state */ | ||
82 | |||
83 | return; | ||
84 | |||
85 | do_trace: | ||
86 | |||
87 | /* save all state needed by the ABI (see paragraph above) */ | ||
88 | |||
89 | unsigned long frompc = ...; | ||
90 | unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE; | ||
91 | ftrace_trace_function(frompc, selfpc); | ||
92 | |||
93 | /* restore all state needed by the ABI */ | ||
94 | } | ||
95 | |||
96 | Don't forget to export mcount for modules ! | ||
97 | extern void mcount(void); | ||
98 | EXPORT_SYMBOL(mcount); | ||
99 | |||
100 | |||
101 | HAVE_FUNCTION_TRACE_MCOUNT_TEST | ||
102 | ------------------------------- | ||
103 | |||
104 | This is an optional optimization for the normal case when tracing is turned off | ||
105 | in the system. If you do not enable this Kconfig option, the common ftrace | ||
106 | code will take care of doing the checking for you. | ||
107 | |||
108 | To support this feature, you only need to check the function_trace_stop | ||
109 | variable in the mcount function. If it is non-zero, there is no tracing to be | ||
110 | done at all, so you can return. | ||
111 | |||
112 | This additional pseudo code would simply be: | ||
113 | void mcount(void) | ||
114 | { | ||
115 | /* save any bare state needed in order to do initial checking */ | ||
116 | |||
117 | + if (function_trace_stop) | ||
118 | + return; | ||
119 | |||
120 | extern void (*ftrace_trace_function)(unsigned long, unsigned long); | ||
121 | if (ftrace_trace_function != ftrace_stub) | ||
122 | ... | ||
123 | |||
124 | |||
125 | HAVE_FUNCTION_GRAPH_TRACER | ||
126 | -------------------------- | ||
127 | |||
128 | Deep breath ... time to do some real work. Here you will need to update the | ||
129 | mcount function to check ftrace graph function pointers, as well as implement | ||
130 | some functions to save (hijack) and restore the return address. | ||
131 | |||
132 | The mcount function should check the function pointers ftrace_graph_return | ||
133 | (compare to ftrace_stub) and ftrace_graph_entry (compare to | ||
134 | ftrace_graph_entry_stub). If either of those are not set to the relevant stub | ||
135 | function, call the arch-specific function ftrace_graph_caller which in turn | ||
136 | calls the arch-specific function prepare_ftrace_return. Neither of these | ||
137 | function names are strictly required, but you should use them anyways to stay | ||
138 | consistent across the architecture ports -- easier to compare & contrast | ||
139 | things. | ||
140 | |||
141 | The arguments to prepare_ftrace_return are slightly different than what are | ||
142 | passed to ftrace_trace_function. The second argument "selfpc" is the same, | ||
143 | but the first argument should be a pointer to the "frompc". Typically this is | ||
144 | located on the stack. This allows the function to hijack the return address | ||
145 | temporarily to have it point to the arch-specific function return_to_handler. | ||
146 | That function will simply call the common ftrace_return_to_handler function and | ||
147 | that will return the original return address with which, you can return to the | ||
148 | original call site. | ||
149 | |||
150 | Here is the updated mcount pseudo code: | ||
151 | void mcount(void) | ||
152 | { | ||
153 | ... | ||
154 | if (ftrace_trace_function != ftrace_stub) | ||
155 | goto do_trace; | ||
156 | |||
157 | +#ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
158 | + extern void (*ftrace_graph_return)(...); | ||
159 | + extern void (*ftrace_graph_entry)(...); | ||
160 | + if (ftrace_graph_return != ftrace_stub || | ||
161 | + ftrace_graph_entry != ftrace_graph_entry_stub) | ||
162 | + ftrace_graph_caller(); | ||
163 | +#endif | ||
164 | |||
165 | /* restore any bare state */ | ||
166 | ... | ||
167 | |||
168 | Here is the pseudo code for the new ftrace_graph_caller assembly function: | ||
169 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
170 | void ftrace_graph_caller(void) | ||
171 | { | ||
172 | /* save all state needed by the ABI */ | ||
173 | |||
174 | unsigned long *frompc = &...; | ||
175 | unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE; | ||
176 | prepare_ftrace_return(frompc, selfpc); | ||
177 | |||
178 | /* restore all state needed by the ABI */ | ||
179 | } | ||
180 | #endif | ||
181 | |||
182 | For information on how to implement prepare_ftrace_return(), simply look at | ||
183 | the x86 version. The only architecture-specific piece in it is the setup of | ||
184 | the fault recovery table (the asm(...) code). The rest should be the same | ||
185 | across architectures. | ||
186 | |||
187 | Here is the pseudo code for the new return_to_handler assembly function. Note | ||
188 | that the ABI that applies here is different from what applies to the mcount | ||
189 | code. Since you are returning from a function (after the epilogue), you might | ||
190 | be able to skimp on things saved/restored (usually just registers used to pass | ||
191 | return values). | ||
192 | |||
193 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
194 | void return_to_handler(void) | ||
195 | { | ||
196 | /* save all state needed by the ABI (see paragraph above) */ | ||
197 | |||
198 | void (*original_return_point)(void) = ftrace_return_to_handler(); | ||
199 | |||
200 | /* restore all state needed by the ABI */ | ||
201 | |||
202 | /* this is usually either a return or a jump */ | ||
203 | original_return_point(); | ||
204 | } | ||
205 | #endif | ||
206 | |||
207 | |||
208 | HAVE_FTRACE_NMI_ENTER | ||
209 | --------------------- | ||
210 | |||
211 | If you can't trace NMI functions, then skip this option. | ||
212 | |||
213 | <details to be filled> | ||
214 | |||
215 | |||
216 | HAVE_FTRACE_SYSCALLS | ||
217 | --------------------- | ||
218 | |||
219 | <details to be filled> | ||
220 | |||
221 | |||
222 | HAVE_FTRACE_MCOUNT_RECORD | ||
223 | ------------------------- | ||
224 | |||
225 | See scripts/recordmcount.pl for more info. | ||
226 | |||
227 | <details to be filled> | ||
228 | |||
229 | |||
230 | HAVE_DYNAMIC_FTRACE | ||
231 | --------------------- | ||
232 | |||
233 | <details to be filled> | ||
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 355d0f1f8c50..1b6292bbdd6d 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt | |||
@@ -26,6 +26,12 @@ disabled, and more (ftrace allows for tracer plugins, which | |||
26 | means that the list of tracers can always grow). | 26 | means that the list of tracers can always grow). |
27 | 27 | ||
28 | 28 | ||
29 | Implementation Details | ||
30 | ---------------------- | ||
31 | |||
32 | See ftrace-design.txt for details for arch porters and such. | ||
33 | |||
34 | |||
29 | The File System | 35 | The File System |
30 | --------------- | 36 | --------------- |
31 | 37 | ||