aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/DMA-ISA-LPC.txt2
-rw-r--r--Documentation/DocBook/Makefile5
-rw-r--r--Documentation/DocBook/deviceiobook.tmpl323
-rw-r--r--Documentation/DocBook/iio.tmpl697
-rw-r--r--Documentation/DocBook/regulator.tmpl304
-rw-r--r--Documentation/Makefile.sphinx34
-rw-r--r--Documentation/admin-guide/README.rst4
-rw-r--r--Documentation/admin-guide/dynamic-debug-howto.rst4
-rw-r--r--Documentation/block/pr.txt2
-rw-r--r--Documentation/cgroup-v1/cpusets.txt2
-rw-r--r--Documentation/conf.py2
-rw-r--r--Documentation/core-api/cpu_hotplug.rst372
-rw-r--r--Documentation/core-api/index.rst1
-rw-r--r--Documentation/cpu-freq/user-guide.txt4
-rw-r--r--Documentation/cpu-hotplug.txt452
-rw-r--r--Documentation/dev-tools/sparse.rst6
-rw-r--r--Documentation/dontdiff7
-rw-r--r--Documentation/driver-api/device-io.rst201
-rw-r--r--Documentation/driver-api/device_link.rst18
-rw-r--r--Documentation/driver-api/iio/buffers.rst125
-rw-r--r--Documentation/driver-api/iio/core.rst182
-rw-r--r--Documentation/driver-api/iio/index.rst17
-rw-r--r--Documentation/driver-api/iio/intro.rst33
-rw-r--r--Documentation/driver-api/iio/triggered-buffers.rst69
-rw-r--r--Documentation/driver-api/iio/triggers.rst80
-rw-r--r--Documentation/driver-api/index.rst4
-rw-r--r--Documentation/driver-api/pm/conf.py10
-rw-r--r--Documentation/driver-api/pm/devices.rst736
-rw-r--r--Documentation/driver-api/pm/index.rst16
-rw-r--r--Documentation/driver-api/pm/notifiers.rst70
-rw-r--r--Documentation/driver-api/pm/types.rst5
-rw-r--r--Documentation/driver-api/regulator.rst170
-rw-r--r--Documentation/hwmon/ds16218
-rw-r--r--Documentation/index.rst10
-rw-r--r--Documentation/input/input.txt4
-rw-r--r--Documentation/ioctl/botching-up-ioctls.txt2
-rw-r--r--Documentation/livepatch/livepatch.txt2
-rw-r--r--Documentation/media/Makefile3
-rw-r--r--Documentation/networking/kcm.txt2
-rw-r--r--Documentation/power/00-INDEX2
-rw-r--r--Documentation/power/devices.txt716
-rw-r--r--Documentation/power/freezing-of-tasks.txt3
-rw-r--r--Documentation/power/notifiers.txt55
-rw-r--r--Documentation/power/pci.txt2
-rw-r--r--Documentation/pps/pps.txt18
-rw-r--r--Documentation/thermal/nouveau_thermal2
-rw-r--r--Documentation/translations/ja_JP/HOWTO2
-rw-r--r--Documentation/translations/ko_KR/howto.rst4
-rw-r--r--Documentation/translations/zh_CN/CodingStyle813
-rw-r--r--Documentation/translations/zh_CN/coding-style.rst950
-rw-r--r--Documentation/translations/zh_CN/index.rst12
-rw-r--r--Documentation/usb/power-management.txt2
-rw-r--r--Documentation/vm/transhuge.txt2
-rw-r--r--Makefile2
-rw-r--r--include/linux/pm.h110
-rwxr-xr-xscripts/kernel-doc115
56 files changed, 3288 insertions, 3510 deletions
diff --git a/Documentation/DMA-ISA-LPC.txt b/Documentation/DMA-ISA-LPC.txt
index b1a19835e907..c41331398752 100644
--- a/Documentation/DMA-ISA-LPC.txt
+++ b/Documentation/DMA-ISA-LPC.txt
@@ -42,7 +42,7 @@ requirements you pass the flag GFP_DMA to kmalloc.
42 42
43Unfortunately the memory available for ISA DMA is scarce so unless you 43Unfortunately the memory available for ISA DMA is scarce so unless you
44allocate the memory during boot-up it's a good idea to also pass 44allocate the memory during boot-up it's a good idea to also pass
45__GFP_REPEAT and __GFP_NOWARN to make the allocater try a bit harder. 45__GFP_REPEAT and __GFP_NOWARN to make the allocator try a bit harder.
46 46
47(This scarcity also means that you should allocate the buffer as 47(This scarcity also means that you should allocate the buffer as
48early as possible and not release it until the driver is unloaded.) 48early as possible and not release it until the driver is unloaded.)
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 5fd8f5effd0c..60a17b7da834 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -13,7 +13,7 @@ DOCBOOKS := z8530book.xml \
13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ 13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
14 genericirq.xml s390-drivers.xml scsi.xml \ 14 genericirq.xml s390-drivers.xml scsi.xml \
15 sh.xml regulator.xml w1.xml \ 15 sh.xml regulator.xml w1.xml \
16 writing_musb_glue_layer.xml iio.xml 16 writing_musb_glue_layer.xml
17 17
18ifeq ($(DOCBOOKS),) 18ifeq ($(DOCBOOKS),)
19 19
@@ -71,6 +71,7 @@ installmandocs: mandocs
71# no-op for the DocBook toolchain 71# no-op for the DocBook toolchain
72epubdocs: 72epubdocs:
73latexdocs: 73latexdocs:
74linkcheckdocs:
74 75
75### 76###
76#External programs used 77#External programs used
@@ -272,6 +273,6 @@ cleandocs:
272 $(Q)rm -rf $(call objectify, $(clean-dirs)) 273 $(Q)rm -rf $(call objectify, $(clean-dirs))
273 274
274# Declare the contents of the .PHONY variable as phony. We keep that 275# Declare the contents of the .PHONY variable as phony. We keep that
275# information in a variable se we can use it in if_changed and friends. 276# information in a variable so we can use it in if_changed and friends.
276 277
277.PHONY: $(PHONY) 278.PHONY: $(PHONY)
diff --git a/Documentation/DocBook/deviceiobook.tmpl b/Documentation/DocBook/deviceiobook.tmpl
deleted file mode 100644
index 54199a0dcf9a..000000000000
--- a/Documentation/DocBook/deviceiobook.tmpl
+++ /dev/null
@@ -1,323 +0,0 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4
5<book id="DoingIO">
6 <bookinfo>
7 <title>Bus-Independent Device Accesses</title>
8
9 <authorgroup>
10 <author>
11 <firstname>Matthew</firstname>
12 <surname>Wilcox</surname>
13 <affiliation>
14 <address>
15 <email>matthew@wil.cx</email>
16 </address>
17 </affiliation>
18 </author>
19 </authorgroup>
20
21 <authorgroup>
22 <author>
23 <firstname>Alan</firstname>
24 <surname>Cox</surname>
25 <affiliation>
26 <address>
27 <email>alan@lxorguk.ukuu.org.uk</email>
28 </address>
29 </affiliation>
30 </author>
31 </authorgroup>
32
33 <copyright>
34 <year>2001</year>
35 <holder>Matthew Wilcox</holder>
36 </copyright>
37
38 <legalnotice>
39 <para>
40 This documentation is free software; you can redistribute
41 it and/or modify it under the terms of the GNU General Public
42 License as published by the Free Software Foundation; either
43 version 2 of the License, or (at your option) any later
44 version.
45 </para>
46
47 <para>
48 This program is distributed in the hope that it will be
49 useful, but WITHOUT ANY WARRANTY; without even the implied
50 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
51 See the GNU General Public License for more details.
52 </para>
53
54 <para>
55 You should have received a copy of the GNU General Public
56 License along with this program; if not, write to the Free
57 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
58 MA 02111-1307 USA
59 </para>
60
61 <para>
62 For more details see the file COPYING in the source
63 distribution of Linux.
64 </para>
65 </legalnotice>
66 </bookinfo>
67
68<toc></toc>
69
70 <chapter id="intro">
71 <title>Introduction</title>
72 <para>
73 Linux provides an API which abstracts performing IO across all busses
74 and devices, allowing device drivers to be written independently of
75 bus type.
76 </para>
77 </chapter>
78
79 <chapter id="bugs">
80 <title>Known Bugs And Assumptions</title>
81 <para>
82 None.
83 </para>
84 </chapter>
85
86 <chapter id="mmio">
87 <title>Memory Mapped IO</title>
88 <sect1 id="getting_access_to_the_device">
89 <title>Getting Access to the Device</title>
90 <para>
91 The most widely supported form of IO is memory mapped IO.
92 That is, a part of the CPU's address space is interpreted
93 not as accesses to memory, but as accesses to a device. Some
94 architectures define devices to be at a fixed address, but most
95 have some method of discovering devices. The PCI bus walk is a
96 good example of such a scheme. This document does not cover how
97 to receive such an address, but assumes you are starting with one.
98 Physical addresses are of type unsigned long.
99 </para>
100
101 <para>
102 This address should not be used directly. Instead, to get an
103 address suitable for passing to the accessor functions described
104 below, you should call <function>ioremap</function>.
105 An address suitable for accessing the device will be returned to you.
106 </para>
107
108 <para>
109 After you've finished using the device (say, in your module's
110 exit routine), call <function>iounmap</function> in order to return
111 the address space to the kernel. Most architectures allocate new
112 address space each time you call <function>ioremap</function>, and
113 they can run out unless you call <function>iounmap</function>.
114 </para>
115 </sect1>
116
117 <sect1 id="accessing_the_device">
118 <title>Accessing the device</title>
119 <para>
120 The part of the interface most used by drivers is reading and
121 writing memory-mapped registers on the device. Linux provides
122 interfaces to read and write 8-bit, 16-bit, 32-bit and 64-bit
123 quantities. Due to a historical accident, these are named byte,
124 word, long and quad accesses. Both read and write accesses are
125 supported; there is no prefetch support at this time.
126 </para>
127
128 <para>
129 The functions are named <function>readb</function>,
130 <function>readw</function>, <function>readl</function>,
131 <function>readq</function>, <function>readb_relaxed</function>,
132 <function>readw_relaxed</function>, <function>readl_relaxed</function>,
133 <function>readq_relaxed</function>, <function>writeb</function>,
134 <function>writew</function>, <function>writel</function> and
135 <function>writeq</function>.
136 </para>
137
138 <para>
139 Some devices (such as framebuffers) would like to use larger
140 transfers than 8 bytes at a time. For these devices, the
141 <function>memcpy_toio</function>, <function>memcpy_fromio</function>
142 and <function>memset_io</function> functions are provided.
143 Do not use memset or memcpy on IO addresses; they
144 are not guaranteed to copy data in order.
145 </para>
146
147 <para>
148 The read and write functions are defined to be ordered. That is the
149 compiler is not permitted to reorder the I/O sequence. When the
150 ordering can be compiler optimised, you can use <function>
151 __readb</function> and friends to indicate the relaxed ordering. Use
152 this with care.
153 </para>
154
155 <para>
156 While the basic functions are defined to be synchronous with respect
157 to each other and ordered with respect to each other the busses the
158 devices sit on may themselves have asynchronicity. In particular many
159 authors are burned by the fact that PCI bus writes are posted
160 asynchronously. A driver author must issue a read from the same
161 device to ensure that writes have occurred in the specific cases the
162 author cares. This kind of property cannot be hidden from driver
163 writers in the API. In some cases, the read used to flush the device
164 may be expected to fail (if the card is resetting, for example). In
165 that case, the read should be done from config space, which is
166 guaranteed to soft-fail if the card doesn't respond.
167 </para>
168
169 <para>
170 The following is an example of flushing a write to a device when
171 the driver would like to ensure the write's effects are visible prior
172 to continuing execution.
173 </para>
174
175<programlisting>
176static inline void
177qla1280_disable_intrs(struct scsi_qla_host *ha)
178{
179 struct device_reg *reg;
180
181 reg = ha->iobase;
182 /* disable risc and host interrupts */
183 WRT_REG_WORD(&amp;reg->ictrl, 0);
184 /*
185 * The following read will ensure that the above write
186 * has been received by the device before we return from this
187 * function.
188 */
189 RD_REG_WORD(&amp;reg->ictrl);
190 ha->flags.ints_enabled = 0;
191}
192</programlisting>
193
194 <para>
195 In addition to write posting, on some large multiprocessing systems
196 (e.g. SGI Challenge, Origin and Altix machines) posted writes won't
197 be strongly ordered coming from different CPUs. Thus it's important
198 to properly protect parts of your driver that do memory-mapped writes
199 with locks and use the <function>mmiowb</function> to make sure they
200 arrive in the order intended. Issuing a regular <function>readX
201 </function> will also ensure write ordering, but should only be used
202 when the driver has to be sure that the write has actually arrived
203 at the device (not that it's simply ordered with respect to other
204 writes), since a full <function>readX</function> is a relatively
205 expensive operation.
206 </para>
207
208 <para>
209 Generally, one should use <function>mmiowb</function> prior to
210 releasing a spinlock that protects regions using <function>writeb
211 </function> or similar functions that aren't surrounded by <function>
212 readb</function> calls, which will ensure ordering and flushing. The
213 following pseudocode illustrates what might occur if write ordering
214 isn't guaranteed via <function>mmiowb</function> or one of the
215 <function>readX</function> functions.
216 </para>
217
218<programlisting>
219CPU A: spin_lock_irqsave(&amp;dev_lock, flags)
220CPU A: ...
221CPU A: writel(newval, ring_ptr);
222CPU A: spin_unlock_irqrestore(&amp;dev_lock, flags)
223 ...
224CPU B: spin_lock_irqsave(&amp;dev_lock, flags)
225CPU B: writel(newval2, ring_ptr);
226CPU B: ...
227CPU B: spin_unlock_irqrestore(&amp;dev_lock, flags)
228</programlisting>
229
230 <para>
231 In the case above, newval2 could be written to ring_ptr before
232 newval. Fixing it is easy though:
233 </para>
234
235<programlisting>
236CPU A: spin_lock_irqsave(&amp;dev_lock, flags)
237CPU A: ...
238CPU A: writel(newval, ring_ptr);
239CPU A: mmiowb(); /* ensure no other writes beat us to the device */
240CPU A: spin_unlock_irqrestore(&amp;dev_lock, flags)
241 ...
242CPU B: spin_lock_irqsave(&amp;dev_lock, flags)
243CPU B: writel(newval2, ring_ptr);
244CPU B: ...
245CPU B: mmiowb();
246CPU B: spin_unlock_irqrestore(&amp;dev_lock, flags)
247</programlisting>
248
249 <para>
250 See tg3.c for a real world example of how to use <function>mmiowb
251 </function>
252 </para>
253
254 <para>
255 PCI ordering rules also guarantee that PIO read responses arrive
256 after any outstanding DMA writes from that bus, since for some devices
257 the result of a <function>readb</function> call may signal to the
258 driver that a DMA transaction is complete. In many cases, however,
259 the driver may want to indicate that the next
260 <function>readb</function> call has no relation to any previous DMA
261 writes performed by the device. The driver can use
262 <function>readb_relaxed</function> for these cases, although only
263 some platforms will honor the relaxed semantics. Using the relaxed
264 read functions will provide significant performance benefits on
265 platforms that support it. The qla2xxx driver provides examples
266 of how to use <function>readX_relaxed</function>. In many cases,
267 a majority of the driver's <function>readX</function> calls can
268 safely be converted to <function>readX_relaxed</function> calls, since
269 only a few will indicate or depend on DMA completion.
270 </para>
271 </sect1>
272
273 </chapter>
274
275 <chapter id="port_space_accesses">
276 <title>Port Space Accesses</title>
277 <sect1 id="port_space_explained">
278 <title>Port Space Explained</title>
279
280 <para>
281 Another form of IO commonly supported is Port Space. This is a
282 range of addresses separate to the normal memory address space.
283 Access to these addresses is generally not as fast as accesses
284 to the memory mapped addresses, and it also has a potentially
285 smaller address space.
286 </para>
287
288 <para>
289 Unlike memory mapped IO, no preparation is required
290 to access port space.
291 </para>
292
293 </sect1>
294 <sect1 id="accessing_port_space">
295 <title>Accessing Port Space</title>
296 <para>
297 Accesses to this space are provided through a set of functions
298 which allow 8-bit, 16-bit and 32-bit accesses; also
299 known as byte, word and long. These functions are
300 <function>inb</function>, <function>inw</function>,
301 <function>inl</function>, <function>outb</function>,
302 <function>outw</function> and <function>outl</function>.
303 </para>
304
305 <para>
306 Some variants are provided for these functions. Some devices
307 require that accesses to their ports are slowed down. This
308 functionality is provided by appending a <function>_p</function>
309 to the end of the function. There are also equivalents to memcpy.
310 The <function>ins</function> and <function>outs</function>
311 functions copy bytes, words or longs to the given port.
312 </para>
313 </sect1>
314
315 </chapter>
316
317 <chapter id="pubfunctions">
318 <title>Public Functions Provided</title>
319!Iarch/x86/include/asm/io.h
320!Elib/pci_iomap.c
321 </chapter>
322
323</book>
diff --git a/Documentation/DocBook/iio.tmpl b/Documentation/DocBook/iio.tmpl
deleted file mode 100644
index e2ab6a1f223e..000000000000
--- a/Documentation/DocBook/iio.tmpl
+++ /dev/null
@@ -1,697 +0,0 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4
5<book id="iioid">
6 <bookinfo>
7 <title>Industrial I/O driver developer's guide </title>
8
9 <authorgroup>
10 <author>
11 <firstname>Daniel</firstname>
12 <surname>Baluta</surname>
13 <affiliation>
14 <address>
15 <email>daniel.baluta@intel.com</email>
16 </address>
17 </affiliation>
18 </author>
19 </authorgroup>
20
21 <copyright>
22 <year>2015</year>
23 <holder>Intel Corporation</holder>
24 </copyright>
25
26 <legalnotice>
27 <para>
28 This documentation is free software; you can redistribute
29 it and/or modify it under the terms of the GNU General Public
30 License version 2.
31 </para>
32 </legalnotice>
33 </bookinfo>
34
35 <toc></toc>
36
37 <chapter id="intro">
38 <title>Introduction</title>
39 <para>
40 The main purpose of the Industrial I/O subsystem (IIO) is to provide
41 support for devices that in some sense perform either analog-to-digital
42 conversion (ADC) or digital-to-analog conversion (DAC) or both. The aim
43 is to fill the gap between the somewhat similar hwmon and input
44 subsystems.
45 Hwmon is directed at low sample rate sensors used to monitor and
46 control the system itself, like fan speed control or temperature
47 measurement. Input is, as its name suggests, focused on human interaction
48 input devices (keyboard, mouse, touchscreen). In some cases there is
49 considerable overlap between these and IIO.
50 </para>
51 <para>
52 Devices that fall into this category include:
53 <itemizedlist>
54 <listitem>
55 analog to digital converters (ADCs)
56 </listitem>
57 <listitem>
58 accelerometers
59 </listitem>
60 <listitem>
61 capacitance to digital converters (CDCs)
62 </listitem>
63 <listitem>
64 digital to analog converters (DACs)
65 </listitem>
66 <listitem>
67 gyroscopes
68 </listitem>
69 <listitem>
70 inertial measurement units (IMUs)
71 </listitem>
72 <listitem>
73 color and light sensors
74 </listitem>
75 <listitem>
76 magnetometers
77 </listitem>
78 <listitem>
79 pressure sensors
80 </listitem>
81 <listitem>
82 proximity sensors
83 </listitem>
84 <listitem>
85 temperature sensors
86 </listitem>
87 </itemizedlist>
88 Usually these sensors are connected via SPI or I2C. A common use case of the
89 sensors devices is to have combined functionality (e.g. light plus proximity
90 sensor).
91 </para>
92 </chapter>
93 <chapter id='iiosubsys'>
94 <title>Industrial I/O core</title>
95 <para>
96 The Industrial I/O core offers:
97 <itemizedlist>
98 <listitem>
99 a unified framework for writing drivers for many different types of
100 embedded sensors.
101 </listitem>
102 <listitem>
103 a standard interface to user space applications manipulating sensors.
104 </listitem>
105 </itemizedlist>
106 The implementation can be found under <filename>
107 drivers/iio/industrialio-*</filename>
108 </para>
109 <sect1 id="iiodevice">
110 <title> Industrial I/O devices </title>
111
112!Finclude/linux/iio/iio.h iio_dev
113!Fdrivers/iio/industrialio-core.c iio_device_alloc
114!Fdrivers/iio/industrialio-core.c iio_device_free
115!Fdrivers/iio/industrialio-core.c iio_device_register
116!Fdrivers/iio/industrialio-core.c iio_device_unregister
117
118 <para>
119 An IIO device usually corresponds to a single hardware sensor and it
120 provides all the information needed by a driver handling a device.
121 Let's first have a look at the functionality embedded in an IIO
122 device then we will show how a device driver makes use of an IIO
123 device.
124 </para>
125 <para>
126 There are two ways for a user space application to interact
127 with an IIO driver.
128 <itemizedlist>
129 <listitem>
130 <filename>/sys/bus/iio/iio:deviceX/</filename>, this
131 represents a hardware sensor and groups together the data
132 channels of the same chip.
133 </listitem>
134 <listitem>
135 <filename>/dev/iio:deviceX</filename>, character device node
136 interface used for buffered data transfer and for events information
137 retrieval.
138 </listitem>
139 </itemizedlist>
140 </para>
141 A typical IIO driver will register itself as an I2C or SPI driver and will
142 create two routines, <function> probe </function> and <function> remove
143 </function>. At <function>probe</function>:
144 <itemizedlist>
145 <listitem>call <function>iio_device_alloc</function>, which allocates memory
146 for an IIO device.
147 </listitem>
148 <listitem> initialize IIO device fields with driver specific information
149 (e.g. device name, device channels).
150 </listitem>
151 <listitem>call <function> iio_device_register</function>, this registers the
152 device with the IIO core. After this call the device is ready to accept
153 requests from user space applications.
154 </listitem>
155 </itemizedlist>
156 At <function>remove</function>, we free the resources allocated in
157 <function>probe</function> in reverse order:
158 <itemizedlist>
159 <listitem><function>iio_device_unregister</function>, unregister the device
160 from the IIO core.
161 </listitem>
162 <listitem><function>iio_device_free</function>, free the memory allocated
163 for the IIO device.
164 </listitem>
165 </itemizedlist>
166
167 <sect2 id="iioattr"> <title> IIO device sysfs interface </title>
168 <para>
169 Attributes are sysfs files used to expose chip info and also allowing
170 applications to set various configuration parameters. For device
171 with index X, attributes can be found under
172 <filename>/sys/bus/iio/iio:deviceX/ </filename> directory.
173 Common attributes are:
174 <itemizedlist>
175 <listitem><filename>name</filename>, description of the physical
176 chip.
177 </listitem>
178 <listitem><filename>dev</filename>, shows the major:minor pair
179 associated with <filename>/dev/iio:deviceX</filename> node.
180 </listitem>
181 <listitem><filename>sampling_frequency_available</filename>,
182 available discrete set of sampling frequency values for
183 device.
184 </listitem>
185 </itemizedlist>
186 Available standard attributes for IIO devices are described in the
187 <filename>Documentation/ABI/testing/sysfs-bus-iio </filename> file
188 in the Linux kernel sources.
189 </para>
190 </sect2>
191 <sect2 id="iiochannel"> <title> IIO device channels </title>
192!Finclude/linux/iio/iio.h iio_chan_spec structure.
193 <para>
194 An IIO device channel is a representation of a data channel. An
195 IIO device can have one or multiple channels. For example:
196 <itemizedlist>
197 <listitem>
198 a thermometer sensor has one channel representing the
199 temperature measurement.
200 </listitem>
201 <listitem>
202 a light sensor with two channels indicating the measurements in
203 the visible and infrared spectrum.
204 </listitem>
205 <listitem>
206 an accelerometer can have up to 3 channels representing
207 acceleration on X, Y and Z axes.
208 </listitem>
209 </itemizedlist>
210 An IIO channel is described by the <type> struct iio_chan_spec
211 </type>. A thermometer driver for the temperature sensor in the
212 example above would have to describe its channel as follows:
213 <programlisting>
214 static const struct iio_chan_spec temp_channel[] = {
215 {
216 .type = IIO_TEMP,
217 .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
218 },
219 };
220
221 </programlisting>
222 Channel sysfs attributes exposed to userspace are specified in
223 the form of <emphasis>bitmasks</emphasis>. Depending on their
224 shared info, attributes can be set in one of the following masks:
225 <itemizedlist>
226 <listitem><emphasis>info_mask_separate</emphasis>, attributes will
227 be specific to this channel</listitem>
228 <listitem><emphasis>info_mask_shared_by_type</emphasis>,
229 attributes are shared by all channels of the same type</listitem>
230 <listitem><emphasis>info_mask_shared_by_dir</emphasis>, attributes
231 are shared by all channels of the same direction </listitem>
232 <listitem><emphasis>info_mask_shared_by_all</emphasis>,
233 attributes are shared by all channels</listitem>
234 </itemizedlist>
235 When there are multiple data channels per channel type we have two
236 ways to distinguish between them:
237 <itemizedlist>
238 <listitem> set <emphasis> .modified</emphasis> field of <type>
239 iio_chan_spec</type> to 1. Modifiers are specified using
240 <emphasis>.channel2</emphasis> field of the same
241 <type>iio_chan_spec</type> structure and are used to indicate a
242 physically unique characteristic of the channel such as its direction
243 or spectral response. For example, a light sensor can have two channels,
244 one for infrared light and one for both infrared and visible light.
245 </listitem>
246 <listitem> set <emphasis>.indexed </emphasis> field of
247 <type>iio_chan_spec</type> to 1. In this case the channel is
248 simply another instance with an index specified by the
249 <emphasis>.channel</emphasis> field.
250 </listitem>
251 </itemizedlist>
252 Here is how we can make use of the channel's modifiers:
253 <programlisting>
254 static const struct iio_chan_spec light_channels[] = {
255 {
256 .type = IIO_INTENSITY,
257 .modified = 1,
258 .channel2 = IIO_MOD_LIGHT_IR,
259 .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
260 .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ),
261 },
262 {
263 .type = IIO_INTENSITY,
264 .modified = 1,
265 .channel2 = IIO_MOD_LIGHT_BOTH,
266 .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
267 .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ),
268 },
269 {
270 .type = IIO_LIGHT,
271 .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
272 .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ),
273 },
274
275 }
276 </programlisting>
277 This channel's definition will generate two separate sysfs files
278 for raw data retrieval:
279 <itemizedlist>
280 <listitem>
281 <filename>/sys/bus/iio/iio:deviceX/in_intensity_ir_raw</filename>
282 </listitem>
283 <listitem>
284 <filename>/sys/bus/iio/iio:deviceX/in_intensity_both_raw</filename>
285 </listitem>
286 </itemizedlist>
287 one file for processed data:
288 <itemizedlist>
289 <listitem>
290 <filename>/sys/bus/iio/iio:deviceX/in_illuminance_input
291 </filename>
292 </listitem>
293 </itemizedlist>
294 and one shared sysfs file for sampling frequency:
295 <itemizedlist>
296 <listitem>
297 <filename>/sys/bus/iio/iio:deviceX/sampling_frequency.
298 </filename>
299 </listitem>
300 </itemizedlist>
301 </para>
302 <para>
303 Here is how we can make use of the channel's indexing:
304 <programlisting>
305 static const struct iio_chan_spec light_channels[] = {
306 {
307 .type = IIO_VOLTAGE,
308 .indexed = 1,
309 .channel = 0,
310 .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
311 },
312 {
313 .type = IIO_VOLTAGE,
314 .indexed = 1,
315 .channel = 1,
316 .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
317 },
318 }
319 </programlisting>
320 This will generate two separate attributes files for raw data
321 retrieval:
322 <itemizedlist>
323 <listitem>
324 <filename>/sys/bus/iio/devices/iio:deviceX/in_voltage0_raw</filename>,
325 representing voltage measurement for channel 0.
326 </listitem>
327 <listitem>
328 <filename>/sys/bus/iio/devices/iio:deviceX/in_voltage1_raw</filename>,
329 representing voltage measurement for channel 1.
330 </listitem>
331 </itemizedlist>
332 </para>
333 </sect2>
334 </sect1>
335
336 <sect1 id="iiobuffer"> <title> Industrial I/O buffers </title>
337!Finclude/linux/iio/buffer.h iio_buffer
338!Edrivers/iio/industrialio-buffer.c
339
340 <para>
341 The Industrial I/O core offers a way for continuous data capture
342 based on a trigger source. Multiple data channels can be read at once
343 from <filename>/dev/iio:deviceX</filename> character device node,
344 thus reducing the CPU load.
345 </para>
346
347 <sect2 id="iiobuffersysfs">
348 <title>IIO buffer sysfs interface </title>
349 <para>
350 An IIO buffer has an associated attributes directory under <filename>
351 /sys/bus/iio/iio:deviceX/buffer/</filename>. Here are the existing
352 attributes:
353 <itemizedlist>
354 <listitem>
355 <emphasis>length</emphasis>, the total number of data samples
356 (capacity) that can be stored by the buffer.
357 </listitem>
358 <listitem>
359 <emphasis>enable</emphasis>, activate buffer capture.
360 </listitem>
361 </itemizedlist>
362
363 </para>
364 </sect2>
365 <sect2 id="iiobuffersetup"> <title> IIO buffer setup </title>
366 <para>The meta information associated with a channel reading
367 placed in a buffer is called a <emphasis> scan element </emphasis>.
368 The important bits configuring scan elements are exposed to
369 userspace applications via the <filename>
370 /sys/bus/iio/iio:deviceX/scan_elements/</filename> directory. This
371 file contains attributes of the following form:
372 <itemizedlist>
373 <listitem><emphasis>enable</emphasis>, used for enabling a channel.
374 If and only if its attribute is non zero, then a triggered capture
375 will contain data samples for this channel.
376 </listitem>
377 <listitem><emphasis>type</emphasis>, description of the scan element
378 data storage within the buffer and hence the form in which it is
379 read from user space. Format is <emphasis>
380 [be|le]:[s|u]bits/storagebitsXrepeat[>>shift] </emphasis>.
381 <itemizedlist>
382 <listitem> <emphasis>be</emphasis> or <emphasis>le</emphasis>, specifies
383 big or little endian.
384 </listitem>
385 <listitem>
386 <emphasis>s </emphasis>or <emphasis>u</emphasis>, specifies if
387 signed (2's complement) or unsigned.
388 </listitem>
389 <listitem><emphasis>bits</emphasis>, is the number of valid data
390 bits.
391 </listitem>
392 <listitem><emphasis>storagebits</emphasis>, is the number of bits
393 (after padding) that it occupies in the buffer.
394 </listitem>
395 <listitem>
396 <emphasis>shift</emphasis>, if specified, is the shift that needs
397 to be applied prior to masking out unused bits.
398 </listitem>
399 <listitem>
400 <emphasis>repeat</emphasis>, specifies the number of bits/storagebits
401 repetitions. When the repeat element is 0 or 1, then the repeat
402 value is omitted.
403 </listitem>
404 </itemizedlist>
405 </listitem>
406 </itemizedlist>
407 For example, a driver for a 3-axis accelerometer with 12 bit
408 resolution where data is stored in two 8-bits registers as
409 follows:
410 <programlisting>
411 7 6 5 4 3 2 1 0
412 +---+---+---+---+---+---+---+---+
413 |D3 |D2 |D1 |D0 | X | X | X | X | (LOW byte, address 0x06)
414 +---+---+---+---+---+---+---+---+
415
416 7 6 5 4 3 2 1 0
417 +---+---+---+---+---+---+---+---+
418 |D11|D10|D9 |D8 |D7 |D6 |D5 |D4 | (HIGH byte, address 0x07)
419 +---+---+---+---+---+---+---+---+
420 </programlisting>
421
422 will have the following scan element type for each axis:
423 <programlisting>
424 $ cat /sys/bus/iio/devices/iio:device0/scan_elements/in_accel_y_type
425 le:s12/16>>4
426 </programlisting>
427 A user space application will interpret data samples read from the
428 buffer as two byte little endian signed data, that needs a 4 bits
429 right shift before masking out the 12 valid bits of data.
430 </para>
431 <para>
432 For implementing buffer support a driver should initialize the following
433 fields in <type>iio_chan_spec</type> definition:
434 <programlisting>
435 struct iio_chan_spec {
436 /* other members */
437 int scan_index
438 struct {
439 char sign;
440 u8 realbits;
441 u8 storagebits;
442 u8 shift;
443 u8 repeat;
444 enum iio_endian endianness;
445 } scan_type;
446 };
447 </programlisting>
448 The driver implementing the accelerometer described above will
449 have the following channel definition:
450 <programlisting>
451 struct struct iio_chan_spec accel_channels[] = {
452 {
453 .type = IIO_ACCEL,
454 .modified = 1,
455 .channel2 = IIO_MOD_X,
456 /* other stuff here */
457 .scan_index = 0,
458 .scan_type = {
459 .sign = 's',
460 .realbits = 12,
461 .storagebits = 16,
462 .shift = 4,
463 .endianness = IIO_LE,
464 },
465 }
466 /* similar for Y (with channel2 = IIO_MOD_Y, scan_index = 1)
467 * and Z (with channel2 = IIO_MOD_Z, scan_index = 2) axis
468 */
469 }
470 </programlisting>
471 </para>
472 <para>
473 Here <emphasis> scan_index </emphasis> defines the order in which
474 the enabled channels are placed inside the buffer. Channels with a lower
475 scan_index will be placed before channels with a higher index. Each
476 channel needs to have a unique scan_index.
477 </para>
478 <para>
479 Setting scan_index to -1 can be used to indicate that the specific
480 channel does not support buffered capture. In this case no entries will
481 be created for the channel in the scan_elements directory.
482 </para>
483 </sect2>
484 </sect1>
485
486 <sect1 id="iiotrigger"> <title> Industrial I/O triggers </title>
487!Finclude/linux/iio/trigger.h iio_trigger
488!Edrivers/iio/industrialio-trigger.c
489 <para>
490 In many situations it is useful for a driver to be able to
491 capture data based on some external event (trigger) as opposed
492 to periodically polling for data. An IIO trigger can be provided
493 by a device driver that also has an IIO device based on hardware
494 generated events (e.g. data ready or threshold exceeded) or
495 provided by a separate driver from an independent interrupt
496 source (e.g. GPIO line connected to some external system, timer
497 interrupt or user space writing a specific file in sysfs). A
498 trigger may initiate data capture for a number of sensors and
499 also it may be completely unrelated to the sensor itself.
500 </para>
501
502 <sect2 id="iiotrigsysfs"> <title> IIO trigger sysfs interface </title>
503 There are two locations in sysfs related to triggers:
504 <itemizedlist>
505 <listitem><filename>/sys/bus/iio/devices/triggerY</filename>,
506 this file is created once an IIO trigger is registered with
507 the IIO core and corresponds to trigger with index Y. Because
508 triggers can be very different depending on type there are few
509 standard attributes that we can describe here:
510 <itemizedlist>
511 <listitem>
512 <emphasis>name</emphasis>, trigger name that can be later
513 used for association with a device.
514 </listitem>
515 <listitem>
516 <emphasis>sampling_frequency</emphasis>, some timer based
517 triggers use this attribute to specify the frequency for
518 trigger calls.
519 </listitem>
520 </itemizedlist>
521 </listitem>
522 <listitem>
523 <filename>/sys/bus/iio/devices/iio:deviceX/trigger/</filename>, this
524 directory is created once the device supports a triggered
525 buffer. We can associate a trigger with our device by writing
526 the trigger's name in the <filename>current_trigger</filename> file.
527 </listitem>
528 </itemizedlist>
529 </sect2>
530
531 <sect2 id="iiotrigattr"> <title> IIO trigger setup</title>
532
533 <para>
534 Let's see a simple example of how to setup a trigger to be used
535 by a driver.
536
537 <programlisting>
538 struct iio_trigger_ops trigger_ops = {
539 .set_trigger_state = sample_trigger_state,
540 .validate_device = sample_validate_device,
541 }
542
543 struct iio_trigger *trig;
544
545 /* first, allocate memory for our trigger */
546 trig = iio_trigger_alloc(dev, "trig-%s-%d", name, idx);
547
548 /* setup trigger operations field */
549 trig->ops = &amp;trigger_ops;
550
551 /* now register the trigger with the IIO core */
552 iio_trigger_register(trig);
553 </programlisting>
554 </para>
555 </sect2>
556
557 <sect2 id="iiotrigsetup"> <title> IIO trigger ops</title>
558!Finclude/linux/iio/trigger.h iio_trigger_ops
559 <para>
560 Notice that a trigger has a set of operations attached:
561 <itemizedlist>
562 <listitem>
563 <function>set_trigger_state</function>, switch the trigger on/off
564 on demand.
565 </listitem>
566 <listitem>
567 <function>validate_device</function>, function to validate the
568 device when the current trigger gets changed.
569 </listitem>
570 </itemizedlist>
571 </para>
572 </sect2>
573 </sect1>
574 <sect1 id="iiotriggered_buffer">
575 <title> Industrial I/O triggered buffers </title>
576 <para>
577 Now that we know what buffers and triggers are let's see how they
578 work together.
579 </para>
580 <sect2 id="iiotrigbufsetup"> <title> IIO triggered buffer setup</title>
581!Edrivers/iio/buffer/industrialio-triggered-buffer.c
582!Finclude/linux/iio/iio.h iio_buffer_setup_ops
583
584
585 <para>
586 A typical triggered buffer setup looks like this:
587 <programlisting>
588 const struct iio_buffer_setup_ops sensor_buffer_setup_ops = {
589 .preenable = sensor_buffer_preenable,
590 .postenable = sensor_buffer_postenable,
591 .postdisable = sensor_buffer_postdisable,
592 .predisable = sensor_buffer_predisable,
593 };
594
595 irqreturn_t sensor_iio_pollfunc(int irq, void *p)
596 {
597 pf->timestamp = iio_get_time_ns((struct indio_dev *)p);
598 return IRQ_WAKE_THREAD;
599 }
600
601 irqreturn_t sensor_trigger_handler(int irq, void *p)
602 {
603 u16 buf[8];
604 int i = 0;
605
606 /* read data for each active channel */
607 for_each_set_bit(bit, active_scan_mask, masklength)
608 buf[i++] = sensor_get_data(bit)
609
610 iio_push_to_buffers_with_timestamp(indio_dev, buf, timestamp);
611
612 iio_trigger_notify_done(trigger);
613 return IRQ_HANDLED;
614 }
615
616 /* setup triggered buffer, usually in probe function */
617 iio_triggered_buffer_setup(indio_dev, sensor_iio_polfunc,
618 sensor_trigger_handler,
619 sensor_buffer_setup_ops);
620 </programlisting>
621 </para>
622 The important things to notice here are:
623 <itemizedlist>
624 <listitem><function> iio_buffer_setup_ops</function>, the buffer setup
625 functions to be called at predefined points in the buffer configuration
626 sequence (e.g. before enable, after disable). If not specified, the
627 IIO core uses the default <type>iio_triggered_buffer_setup_ops</type>.
628 </listitem>
629 <listitem><function>sensor_iio_pollfunc</function>, the function that
630 will be used as top half of poll function. It should do as little
631 processing as possible, because it runs in interrupt context. The most
632 common operation is recording of the current timestamp and for this reason
633 one can use the IIO core defined <function>iio_pollfunc_store_time
634 </function> function.
635 </listitem>
636 <listitem><function>sensor_trigger_handler</function>, the function that
637 will be used as bottom half of the poll function. This runs in the
638 context of a kernel thread and all the processing takes place here.
639 It usually reads data from the device and stores it in the internal
640 buffer together with the timestamp recorded in the top half.
641 </listitem>
642 </itemizedlist>
643 </sect2>
644 </sect1>
645 </chapter>
646 <chapter id='iioresources'>
647 <title> Resources </title>
648 IIO core may change during time so the best documentation to read is the
649 source code. There are several locations where you should look:
650 <itemizedlist>
651 <listitem>
652 <filename>drivers/iio/</filename>, contains the IIO core plus
653 and directories for each sensor type (e.g. accel, magnetometer,
654 etc.)
655 </listitem>
656 <listitem>
657 <filename>include/linux/iio/</filename>, contains the header
658 files, nice to read for the internal kernel interfaces.
659 </listitem>
660 <listitem>
661 <filename>include/uapi/linux/iio/</filename>, contains files to be
662 used by user space applications.
663 </listitem>
664 <listitem>
665 <filename>tools/iio/</filename>, contains tools for rapidly
666 testing buffers, events and device creation.
667 </listitem>
668 <listitem>
669 <filename>drivers/staging/iio/</filename>, contains code for some
670 drivers or experimental features that are not yet mature enough
671 to be moved out.
672 </listitem>
673 </itemizedlist>
674 <para>
675 Besides the code, there are some good online documentation sources:
676 <itemizedlist>
677 <listitem>
678 <ulink url="http://marc.info/?l=linux-iio"> Industrial I/O mailing
679 list </ulink>
680 </listitem>
681 <listitem>
682 <ulink url="http://wiki.analog.com/software/linux/docs/iio/iio">
683 Analog Device IIO wiki page </ulink>
684 </listitem>
685 <listitem>
686 <ulink url="https://fosdem.org/2015/schedule/event/iiosdr/">
687 Using the Linux IIO framework for SDR, Lars-Peter Clausen's
688 presentation at FOSDEM </ulink>
689 </listitem>
690 </itemizedlist>
691 </para>
692 </chapter>
693</book>
694
695<!--
696vim: softtabstop=2:shiftwidth=2:expandtab:textwidth=72
697-->
diff --git a/Documentation/DocBook/regulator.tmpl b/Documentation/DocBook/regulator.tmpl
deleted file mode 100644
index 3b08a085d2c7..000000000000
--- a/Documentation/DocBook/regulator.tmpl
+++ /dev/null
@@ -1,304 +0,0 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4
5<book id="regulator-api">
6 <bookinfo>
7 <title>Voltage and current regulator API</title>
8
9 <authorgroup>
10 <author>
11 <firstname>Liam</firstname>
12 <surname>Girdwood</surname>
13 <affiliation>
14 <address>
15 <email>lrg@slimlogic.co.uk</email>
16 </address>
17 </affiliation>
18 </author>
19 <author>
20 <firstname>Mark</firstname>
21 <surname>Brown</surname>
22 <affiliation>
23 <orgname>Wolfson Microelectronics</orgname>
24 <address>
25 <email>broonie@opensource.wolfsonmicro.com</email>
26 </address>
27 </affiliation>
28 </author>
29 </authorgroup>
30
31 <copyright>
32 <year>2007-2008</year>
33 <holder>Wolfson Microelectronics</holder>
34 </copyright>
35 <copyright>
36 <year>2008</year>
37 <holder>Liam Girdwood</holder>
38 </copyright>
39
40 <legalnotice>
41 <para>
42 This documentation is free software; you can redistribute
43 it and/or modify it under the terms of the GNU General Public
44 License version 2 as published by the Free Software Foundation.
45 </para>
46
47 <para>
48 This program is distributed in the hope that it will be
49 useful, but WITHOUT ANY WARRANTY; without even the implied
50 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
51 See the GNU General Public License for more details.
52 </para>
53
54 <para>
55 You should have received a copy of the GNU General Public
56 License along with this program; if not, write to the Free
57 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
58 MA 02111-1307 USA
59 </para>
60
61 <para>
62 For more details see the file COPYING in the source
63 distribution of Linux.
64 </para>
65 </legalnotice>
66 </bookinfo>
67
68<toc></toc>
69
70 <chapter id="intro">
71 <title>Introduction</title>
72 <para>
73 This framework is designed to provide a standard kernel
74 interface to control voltage and current regulators.
75 </para>
76 <para>
77 The intention is to allow systems to dynamically control
78 regulator power output in order to save power and prolong
79 battery life. This applies to both voltage regulators (where
80 voltage output is controllable) and current sinks (where current
81 limit is controllable).
82 </para>
83 <para>
84 Note that additional (and currently more complete) documentation
85 is available in the Linux kernel source under
86 <filename>Documentation/power/regulator</filename>.
87 </para>
88
89 <sect1 id="glossary">
90 <title>Glossary</title>
91 <para>
92 The regulator API uses a number of terms which may not be
93 familiar:
94 </para>
95 <glossary>
96
97 <glossentry>
98 <glossterm>Regulator</glossterm>
99 <glossdef>
100 <para>
101 Electronic device that supplies power to other devices. Most
102 regulators can enable and disable their output and some can also
103 control their output voltage or current.
104 </para>
105 </glossdef>
106 </glossentry>
107
108 <glossentry>
109 <glossterm>Consumer</glossterm>
110 <glossdef>
111 <para>
112 Electronic device which consumes power provided by a regulator.
113 These may either be static, requiring only a fixed supply, or
114 dynamic, requiring active management of the regulator at
115 runtime.
116 </para>
117 </glossdef>
118 </glossentry>
119
120 <glossentry>
121 <glossterm>Power Domain</glossterm>
122 <glossdef>
123 <para>
124 The electronic circuit supplied by a given regulator, including
125 the regulator and all consumer devices. The configuration of
126 the regulator is shared between all the components in the
127 circuit.
128 </para>
129 </glossdef>
130 </glossentry>
131
132 <glossentry>
133 <glossterm>Power Management Integrated Circuit</glossterm>
134 <acronym>PMIC</acronym>
135 <glossdef>
136 <para>
137 An IC which contains numerous regulators and often also other
138 subsystems. In an embedded system the primary PMIC is often
139 equivalent to a combination of the PSU and southbridge in a
140 desktop system.
141 </para>
142 </glossdef>
143 </glossentry>
144 </glossary>
145 </sect1>
146 </chapter>
147
148 <chapter id="consumer">
149 <title>Consumer driver interface</title>
150 <para>
151 This offers a similar API to the kernel clock framework.
152 Consumer drivers use <link
153 linkend='API-regulator-get'>get</link> and <link
154 linkend='API-regulator-put'>put</link> operations to acquire and
155 release regulators. Functions are
156 provided to <link linkend='API-regulator-enable'>enable</link>
157 and <link linkend='API-regulator-disable'>disable</link> the
158 regulator and to get and set the runtime parameters of the
159 regulator.
160 </para>
161 <para>
162 When requesting regulators consumers use symbolic names for their
163 supplies, such as "Vcc", which are mapped into actual regulator
164 devices by the machine interface.
165 </para>
166 <para>
167 A stub version of this API is provided when the regulator
168 framework is not in use in order to minimise the need to use
169 ifdefs.
170 </para>
171
172 <sect1 id="consumer-enable">
173 <title>Enabling and disabling</title>
174 <para>
175 The regulator API provides reference counted enabling and
176 disabling of regulators. Consumer devices use the <function><link
177 linkend='API-regulator-enable'>regulator_enable</link></function>
178 and <function><link
179 linkend='API-regulator-disable'>regulator_disable</link>
180 </function> functions to enable and disable regulators. Calls
181 to the two functions must be balanced.
182 </para>
183 <para>
184 Note that since multiple consumers may be using a regulator and
185 machine constraints may not allow the regulator to be disabled
186 there is no guarantee that calling
187 <function>regulator_disable</function> will actually cause the
188 supply provided by the regulator to be disabled. Consumer
189 drivers should assume that the regulator may be enabled at all
190 times.
191 </para>
192 </sect1>
193
194 <sect1 id="consumer-config">
195 <title>Configuration</title>
196 <para>
197 Some consumer devices may need to be able to dynamically
198 configure their supplies. For example, MMC drivers may need to
199 select the correct operating voltage for their cards. This may
200 be done while the regulator is enabled or disabled.
201 </para>
202 <para>
203 The <function><link
204 linkend='API-regulator-set-voltage'>regulator_set_voltage</link>
205 </function> and <function><link
206 linkend='API-regulator-set-current-limit'
207 >regulator_set_current_limit</link>
208 </function> functions provide the primary interface for this.
209 Both take ranges of voltages and currents, supporting drivers
210 that do not require a specific value (eg, CPU frequency scaling
211 normally permits the CPU to use a wider range of supply
212 voltages at lower frequencies but does not require that the
213 supply voltage be lowered). Where an exact value is required
214 both minimum and maximum values should be identical.
215 </para>
216 </sect1>
217
218 <sect1 id="consumer-callback">
219 <title>Callbacks</title>
220 <para>
221 Callbacks may also be <link
222 linkend='API-regulator-register-notifier'>registered</link>
223 for events such as regulation failures.
224 </para>
225 </sect1>
226 </chapter>
227
228 <chapter id="driver">
229 <title>Regulator driver interface</title>
230 <para>
231 Drivers for regulator chips <link
232 linkend='API-regulator-register'>register</link> the regulators
233 with the regulator core, providing operations structures to the
234 core. A <link
235 linkend='API-regulator-notifier-call-chain'>notifier</link> interface
236 allows error conditions to be reported to the core.
237 </para>
238 <para>
239 Registration should be triggered by explicit setup done by the
240 platform, supplying a <link
241 linkend='API-struct-regulator-init-data'>struct
242 regulator_init_data</link> for the regulator containing
243 <link linkend='machine-constraint'>constraint</link> and
244 <link linkend='machine-supply'>supply</link> information.
245 </para>
246 </chapter>
247
248 <chapter id="machine">
249 <title>Machine interface</title>
250 <para>
251 This interface provides a way to define how regulators are
252 connected to consumers on a given system and what the valid
253 operating parameters are for the system.
254 </para>
255
256 <sect1 id="machine-supply">
257 <title>Supplies</title>
258 <para>
259 Regulator supplies are specified using <link
260 linkend='API-struct-regulator-consumer-supply'>struct
261 regulator_consumer_supply</link>. This is done at
262 <link linkend='driver'>driver registration
263 time</link> as part of the machine constraints.
264 </para>
265 </sect1>
266
267 <sect1 id="machine-constraint">
268 <title>Constraints</title>
269 <para>
270 As well as defining the connections the machine interface
271 also provides constraints defining the operations that
272 clients are allowed to perform and the parameters that may be
273 set. This is required since generally regulator devices will
274 offer more flexibility than it is safe to use on a given
275 system, for example supporting higher supply voltages than the
276 consumers are rated for.
277 </para>
278 <para>
279 This is done at <link linkend='driver'>driver
280 registration time</link> by providing a <link
281 linkend='API-struct-regulation-constraints'>struct
282 regulation_constraints</link>.
283 </para>
284 <para>
285 The constraints may also specify an initial configuration for the
286 regulator in the constraints, which is particularly useful for
287 use with static consumers.
288 </para>
289 </sect1>
290 </chapter>
291
292 <chapter id="api">
293 <title>API reference</title>
294 <para>
295 Due to limitations of the kernel documentation framework and the
296 existing layout of the source code the entire regulator API is
297 documented here.
298 </para>
299!Iinclude/linux/regulator/consumer.h
300!Iinclude/linux/regulator/machine.h
301!Iinclude/linux/regulator/driver.h
302!Edrivers/regulator/core.c
303 </chapter>
304</book>
diff --git a/Documentation/Makefile.sphinx b/Documentation/Makefile.sphinx
index 707c65337ebf..bcf529f6cf9b 100644
--- a/Documentation/Makefile.sphinx
+++ b/Documentation/Makefile.sphinx
@@ -43,7 +43,7 @@ ALLSPHINXOPTS = $(KERNELDOC_CONF) $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)
43I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 43I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
44 44
45# commands; the 'cmd' from scripts/Kbuild.include is not *loopable* 45# commands; the 'cmd' from scripts/Kbuild.include is not *loopable*
46loop_cmd = $(echo-cmd) $(cmd_$(1)) 46loop_cmd = $(echo-cmd) $(cmd_$(1)) || exit;
47 47
48# $2 sphinx builder e.g. "html" 48# $2 sphinx builder e.g. "html"
49# $3 name of the build subfolder / e.g. "media", used as: 49# $3 name of the build subfolder / e.g. "media", used as:
@@ -54,7 +54,8 @@ loop_cmd = $(echo-cmd) $(cmd_$(1))
54# e.g. "media" for the linux-tv book-set at ./Documentation/media 54# e.g. "media" for the linux-tv book-set at ./Documentation/media
55 55
56quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4) 56quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
57 cmd_sphinx = $(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media $2;\ 57 cmd_sphinx = $(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media $2 && \
58 PYTHONDONTWRITEBYTECODE=1 \
58 BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(srctree)/$(src)/$5/$(SPHINX_CONF)) \ 59 BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(srctree)/$(src)/$5/$(SPHINX_CONF)) \
59 $(SPHINXBUILD) \ 60 $(SPHINXBUILD) \
60 -b $2 \ 61 -b $2 \
@@ -63,13 +64,16 @@ quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
63 -D version=$(KERNELVERSION) -D release=$(KERNELRELEASE) \ 64 -D version=$(KERNELVERSION) -D release=$(KERNELRELEASE) \
64 $(ALLSPHINXOPTS) \ 65 $(ALLSPHINXOPTS) \
65 $(abspath $(srctree)/$(src)/$5) \ 66 $(abspath $(srctree)/$(src)/$5) \
66 $(abspath $(BUILDDIR)/$3/$4); 67 $(abspath $(BUILDDIR)/$3/$4)
67 68
68htmldocs: 69htmldocs:
69 @$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var))) 70 @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
71
72linkcheckdocs:
73 @$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,linkcheck,$(var),,$(var)))
70 74
71latexdocs: 75latexdocs:
72 @$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,latex,$(var),latex,$(var))) 76 @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,latex,$(var),latex,$(var)))
73 77
74ifeq ($(HAVE_PDFLATEX),0) 78ifeq ($(HAVE_PDFLATEX),0)
75 79
@@ -80,27 +84,34 @@ pdfdocs:
80else # HAVE_PDFLATEX 84else # HAVE_PDFLATEX
81 85
82pdfdocs: latexdocs 86pdfdocs: latexdocs
83 $(foreach var,$(SPHINXDIRS), $(MAKE) PDFLATEX=$(PDFLATEX) LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex;) 87 $(foreach var,$(SPHINXDIRS), $(MAKE) PDFLATEX=$(PDFLATEX) LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex || exit;)
84 88
85endif # HAVE_PDFLATEX 89endif # HAVE_PDFLATEX
86 90
87epubdocs: 91epubdocs:
88 @$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,epub,$(var),epub,$(var))) 92 @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,epub,$(var),epub,$(var)))
89 93
90xmldocs: 94xmldocs:
91 @$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,xml,$(var),xml,$(var))) 95 @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,xml,$(var),xml,$(var)))
96
97endif # HAVE_SPHINX
98
99# The following targets are independent of HAVE_SPHINX, and the rules should
100# work or silently pass without Sphinx.
92 101
93# no-ops for the Sphinx toolchain 102# no-ops for the Sphinx toolchain
94sgmldocs: 103sgmldocs:
104 @:
95psdocs: 105psdocs:
106 @:
96mandocs: 107mandocs:
108 @:
97installmandocs: 109installmandocs:
110 @:
98 111
99cleandocs: 112cleandocs:
100 $(Q)rm -rf $(BUILDDIR) 113 $(Q)rm -rf $(BUILDDIR)
101 $(Q)$(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) -C Documentation/media clean 114 $(Q)$(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media clean
102
103endif # HAVE_SPHINX
104 115
105dochelp: 116dochelp:
106 @echo ' Linux kernel internal documentation in different formats (Sphinx):' 117 @echo ' Linux kernel internal documentation in different formats (Sphinx):'
@@ -109,6 +120,7 @@ dochelp:
109 @echo ' pdfdocs - PDF' 120 @echo ' pdfdocs - PDF'
110 @echo ' epubdocs - EPUB' 121 @echo ' epubdocs - EPUB'
111 @echo ' xmldocs - XML' 122 @echo ' xmldocs - XML'
123 @echo ' linkcheckdocs - check for broken external links (will connect to external hosts)'
112 @echo ' cleandocs - clean all generated files' 124 @echo ' cleandocs - clean all generated files'
113 @echo 125 @echo
114 @echo ' make SPHINXDIRS="s1 s2" [target] Generate only docs of folder s1, s2' 126 @echo ' make SPHINXDIRS="s1 s2" [target] Generate only docs of folder s1, s2'
diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst
index 1b6dfb2b3adb..697a00ccec25 100644
--- a/Documentation/admin-guide/README.rst
+++ b/Documentation/admin-guide/README.rst
@@ -17,7 +17,7 @@ What is Linux?
17 loading, shared copy-on-write executables, proper memory management, 17 loading, shared copy-on-write executables, proper memory management,
18 and multistack networking including IPv4 and IPv6. 18 and multistack networking including IPv4 and IPv6.
19 19
20 It is distributed under the GNU General Public License - see the 20 It is distributed under the GNU General Public License v2 - see the
21 accompanying COPYING file for more details. 21 accompanying COPYING file for more details.
22 22
23On what hardware does it run? 23On what hardware does it run?
@@ -236,7 +236,7 @@ Configuring the kernel
236 236
237 - Having unnecessary drivers will make the kernel bigger, and can 237 - Having unnecessary drivers will make the kernel bigger, and can
238 under some circumstances lead to problems: probing for a 238 under some circumstances lead to problems: probing for a
239 nonexistent controller card may confuse your other controllers 239 nonexistent controller card may confuse your other controllers.
240 240
241 - A kernel with math-emulation compiled in will still use the 241 - A kernel with math-emulation compiled in will still use the
242 coprocessor if one is present: the math emulation will just 242 coprocessor if one is present: the math emulation will just
diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst b/Documentation/admin-guide/dynamic-debug-howto.rst
index 88adcfdf5b2b..12278a926370 100644
--- a/Documentation/admin-guide/dynamic-debug-howto.rst
+++ b/Documentation/admin-guide/dynamic-debug-howto.rst
@@ -93,9 +93,9 @@ Command Language Reference
93At the lexical level, a command comprises a sequence of words separated 93At the lexical level, a command comprises a sequence of words separated
94by spaces or tabs. So these are all equivalent:: 94by spaces or tabs. So these are all equivalent::
95 95
96 nullarbor:~ # echo -c 'file svcsock.c line 1603 +p' > 96 nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
97 <debugfs>/dynamic_debug/control 97 <debugfs>/dynamic_debug/control
98 nullarbor:~ # echo -c ' file svcsock.c line 1603 +p ' > 98 nullarbor:~ # echo -n ' file svcsock.c line 1603 +p ' >
99 <debugfs>/dynamic_debug/control 99 <debugfs>/dynamic_debug/control
100 nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > 100 nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
101 <debugfs>/dynamic_debug/control 101 <debugfs>/dynamic_debug/control
diff --git a/Documentation/block/pr.txt b/Documentation/block/pr.txt
index d3eb1ca65051..ac9b8e70e64b 100644
--- a/Documentation/block/pr.txt
+++ b/Documentation/block/pr.txt
@@ -90,7 +90,7 @@ and thus removes any access restriction implied by it.
904. IOC_PR_PREEMPT 904. IOC_PR_PREEMPT
91 91
92This ioctl command releases the existing reservation referred to by 92This ioctl command releases the existing reservation referred to by
93old_key and replaces it with a a new reservation of type for the 93old_key and replaces it with a new reservation of type for the
94reservation key new_key. 94reservation key new_key.
95 95
96 96
diff --git a/Documentation/cgroup-v1/cpusets.txt b/Documentation/cgroup-v1/cpusets.txt
index e5ac5da86682..8402dd6de8df 100644
--- a/Documentation/cgroup-v1/cpusets.txt
+++ b/Documentation/cgroup-v1/cpusets.txt
@@ -615,7 +615,7 @@ to allocate a page of memory for that task.
615 615
616If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset 616If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset
617will have its allowed CPU placement changed immediately. Similarly, 617will have its allowed CPU placement changed immediately. Similarly,
618if a task's pid is written to another cpusets 'cpuset.tasks' file, then its 618if a task's pid is written to another cpuset's 'tasks' file, then its
619allowed CPU placement is changed immediately. If such a task had been 619allowed CPU placement is changed immediately. If such a task had been
620bound to some subset of its cpuset using the sched_setaffinity() call, 620bound to some subset of its cpuset using the sched_setaffinity() call,
621the task will be allowed to run on any CPU allowed in its new cpuset, 621the task will be allowed to run on any CPU allowed in its new cpuset,
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 1ac958c0333d..f6823cf01275 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -58,7 +58,7 @@ master_doc = 'index'
58 58
59# General information about the project. 59# General information about the project.
60project = 'The Linux Kernel' 60project = 'The Linux Kernel'
61copyright = '2016, The kernel development community' 61copyright = 'The kernel development community'
62author = 'The kernel development community' 62author = 'The kernel development community'
63 63
64# The version info for the project you're documenting, acts as replacement for 64# The version info for the project you're documenting, acts as replacement for
diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst
new file mode 100644
index 000000000000..4a50ab7817f7
--- /dev/null
+++ b/Documentation/core-api/cpu_hotplug.rst
@@ -0,0 +1,372 @@
1=========================
2CPU hotplug in the Kernel
3=========================
4
5:Date: December, 2016
6:Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
7 Rusty Russell <rusty@rustcorp.com.au>,
8 Srivatsa Vaddagiri <vatsa@in.ibm.com>,
9 Ashok Raj <ashok.raj@intel.com>,
10 Joel Schopp <jschopp@austin.ibm.com>
11
12Introduction
13============
14
15Modern advances in system architectures have introduced advanced error
16reporting and correction capabilities in processors. There are couple OEMS that
17support NUMA hardware which are hot pluggable as well, where physical node
18insertion and removal require support for CPU hotplug.
19
20Such advances require CPUs available to a kernel to be removed either for
21provisioning reasons, or for RAS purposes to keep an offending CPU off
22system execution path. Hence the need for CPU hotplug support in the
23Linux kernel.
24
25A more novel use of CPU-hotplug support is its use today in suspend resume
26support for SMP. Dual-core and HT support makes even a laptop run SMP kernels
27which didn't support these methods.
28
29
30Command Line Switches
31=====================
32``maxcpus=n``
33 Restrict boot time CPUs to *n*. Say if you have fourV CPUs, using
34 ``maxcpus=2`` will only boot two. You can choose to bring the
35 other CPUs later online.
36
37``nr_cpus=n``
38 Restrict the total amount CPUs the kernel will support. If the number
39 supplied here is lower than the number of physically available CPUs than
40 those CPUs can not be brought online later.
41
42``additional_cpus=n``
43 Use this to limit hotpluggable CPUs. This option sets
44 ``cpu_possible_mask = cpu_present_mask + additional_cpus``
45
46 This option is limited to the IA64 architecture.
47
48``possible_cpus=n``
49 This option sets ``possible_cpus`` bits in ``cpu_possible_mask``.
50
51 This option is limited to the X86 and S390 architecture.
52
53``cede_offline={"off","on"}``
54 Use this option to disable/enable putting offlined processors to an extended
55 ``H_CEDE`` state on supported pseries platforms. If nothing is specified,
56 ``cede_offline`` is set to "on".
57
58 This option is limited to the PowerPC architecture.
59
60``cpu0_hotplug``
61 Allow to shutdown CPU0.
62
63 This option is limited to the X86 architecture.
64
65CPU maps
66========
67
68``cpu_possible_mask``
69 Bitmap of possible CPUs that can ever be available in the
70 system. This is used to allocate some boot time memory for per_cpu variables
71 that aren't designed to grow/shrink as CPUs are made available or removed.
72 Once set during boot time discovery phase, the map is static, i.e no bits
73 are added or removed anytime. Trimming it accurately for your system needs
74 upfront can save some boot time memory.
75
76``cpu_online_mask``
77 Bitmap of all CPUs currently online. Its set in ``__cpu_up()``
78 after a CPU is available for kernel scheduling and ready to receive
79 interrupts from devices. Its cleared when a CPU is brought down using
80 ``__cpu_disable()``, before which all OS services including interrupts are
81 migrated to another target CPU.
82
83``cpu_present_mask``
84 Bitmap of CPUs currently present in the system. Not all
85 of them may be online. When physical hotplug is processed by the relevant
86 subsystem (e.g ACPI) can change and new bit either be added or removed
87 from the map depending on the event is hot-add/hot-remove. There are currently
88 no locking rules as of now. Typical usage is to init topology during boot,
89 at which time hotplug is disabled.
90
91You really don't need to manipulate any of the system CPU maps. They should
92be read-only for most use. When setting up per-cpu resources almost always use
93``cpu_possible_mask`` or ``for_each_possible_cpu()`` to iterate. To macro
94``for_each_cpu()`` can be used to iterate over a custom CPU mask.
95
96Never use anything other than ``cpumask_t`` to represent bitmap of CPUs.
97
98
99Using CPU hotplug
100=================
101The kernel option *CONFIG_HOTPLUG_CPU* needs to be enabled. It is currently
102available on multiple architectures including ARM, MIPS, PowerPC and X86. The
103configuration is done via the sysfs interface: ::
104
105 $ ls -lh /sys/devices/system/cpu
106 total 0
107 drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu0
108 drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu1
109 drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu2
110 drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu3
111 drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu4
112 drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu5
113 drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu6
114 drwxr-xr-x 9 root root 0 Dec 21 16:33 cpu7
115 drwxr-xr-x 2 root root 0 Dec 21 16:33 hotplug
116 -r--r--r-- 1 root root 4.0K Dec 21 16:33 offline
117 -r--r--r-- 1 root root 4.0K Dec 21 16:33 online
118 -r--r--r-- 1 root root 4.0K Dec 21 16:33 possible
119 -r--r--r-- 1 root root 4.0K Dec 21 16:33 present
120
121The files *offline*, *online*, *possible*, *present* represent the CPU masks.
122Each CPU folder contains an *online* file which controls the logical on (1) and
123off (0) state. To logically shutdown CPU4: ::
124
125 $ echo 0 > /sys/devices/system/cpu/cpu4/online
126 smpboot: CPU 4 is now offline
127
128Once the CPU is shutdown, it will be removed from */proc/interrupts*,
129*/proc/cpuinfo* and should also not be shown visible by the *top* command. To
130bring CPU4 back online: ::
131
132 $ echo 1 > /sys/devices/system/cpu/cpu4/online
133 smpboot: Booting Node 0 Processor 4 APIC 0x1
134
135The CPU is usable again. This should work on all CPUs. CPU0 is often special
136and excluded from CPU hotplug. On X86 the kernel option
137*CONFIG_BOOTPARAM_HOTPLUG_CPU0* has to be enabled in order to be able to
138shutdown CPU0. Alternatively the kernel command option *cpu0_hotplug* can be
139used. Some known dependencies of CPU0:
140
141* Resume from hibernate/suspend. Hibernate/suspend will fail if CPU0 is offline.
142* PIC interrupts. CPU0 can't be removed if a PIC interrupt is detected.
143
144Please let Fenghua Yu <fenghua.yu@intel.com> know if you find any dependencies
145on CPU0.
146
147The CPU hotplug coordination
148============================
149
150The offline case
151----------------
152Once a CPU has been logically shutdown the teardown callbacks of registered
153hotplug states will be invoked, starting with ``CPUHP_ONLINE`` and terminating
154at state ``CPUHP_OFFLINE``. This includes:
155
156* If tasks are frozen due to a suspend operation then *cpuhp_tasks_frozen*
157 will be set to true.
158* All processes are migrated away from this outgoing CPU to new CPUs.
159 The new CPU is chosen from each process' current cpuset, which may be
160 a subset of all online CPUs.
161* All interrupts targeted to this CPU are migrated to a new CPU
162* timers are also migrated to a new CPU
163* Once all services are migrated, kernel calls an arch specific routine
164 ``__cpu_disable()`` to perform arch specific cleanup.
165
166Using the hotplug API
167---------------------
168It is possible to receive notifications once a CPU is offline or onlined. This
169might be important to certain drivers which need to perform some kind of setup
170or clean up functions based on the number of available CPUs: ::
171
172 #include <linux/cpuhotplug.h>
173
174 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "X/Y:online",
175 Y_online, Y_prepare_down);
176
177*X* is the subsystem and *Y* the particular driver. The *Y_online* callback
178will be invoked during registration on all online CPUs. If an error
179occurs during the online callback the *Y_prepare_down* callback will be
180invoked on all CPUs on which the online callback was previously invoked.
181After registration completed, the *Y_online* callback will be invoked
182once a CPU is brought online and *Y_prepare_down* will be invoked when a
183CPU is shutdown. All resources which were previously allocated in
184*Y_online* should be released in *Y_prepare_down*.
185The return value *ret* is negative if an error occurred during the
186registration process. Otherwise a positive value is returned which
187contains the allocated hotplug for dynamically allocated states
188(*CPUHP_AP_ONLINE_DYN*). It will return zero for predefined states.
189
190The callback can be remove by invoking ``cpuhp_remove_state()``. In case of a
191dynamically allocated state (*CPUHP_AP_ONLINE_DYN*) use the returned state.
192During the removal of a hotplug state the teardown callback will be invoked.
193
194Multiple instances
195~~~~~~~~~~~~~~~~~~
196If a driver has multiple instances and each instance needs to perform the
197callback independently then it is likely that a ''multi-state'' should be used.
198First a multi-state state needs to be registered: ::
199
200 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "X/Y:online,
201 Y_online, Y_prepare_down);
202 Y_hp_online = ret;
203
204The ``cpuhp_setup_state_multi()`` behaves similar to ``cpuhp_setup_state()``
205except it prepares the callbacks for a multi state and does not invoke
206the callbacks. This is a one time setup.
207Once a new instance is allocated, you need to register this new instance: ::
208
209 ret = cpuhp_state_add_instance(Y_hp_online, &d->node);
210
211This function will add this instance to your previously allocated
212*Y_hp_online* state and invoke the previously registered callback
213(*Y_online*) on all online CPUs. The *node* element is a ``struct
214hlist_node`` member of your per-instance data structure.
215
216On removal of the instance: ::
217 cpuhp_state_remove_instance(Y_hp_online, &d->node)
218
219should be invoked which will invoke the teardown callback on all online
220CPUs.
221
222Manual setup
223~~~~~~~~~~~~
224Usually it is handy to invoke setup and teardown callbacks on registration or
225removal of a state because usually the operation needs to performed once a CPU
226goes online (offline) and during initial setup (shutdown) of the driver. However
227each registration and removal function is also available with a ``_nocalls``
228suffix which does not invoke the provided callbacks if the invocation of the
229callbacks is not desired. During the manual setup (or teardown) the functions
230``get_online_cpus()`` and ``put_online_cpus()`` should be used to inhibit CPU
231hotplug operations.
232
233
234The ordering of the events
235--------------------------
236The hotplug states are defined in ``include/linux/cpuhotplug.h``:
237
238* The states *CPUHP_OFFLINE* … *CPUHP_AP_OFFLINE* are invoked before the
239 CPU is up.
240* The states *CPUHP_AP_OFFLINE* … *CPUHP_AP_ONLINE* are invoked
241 just the after the CPU has been brought up. The interrupts are off and
242 the scheduler is not yet active on this CPU. Starting with *CPUHP_AP_OFFLINE*
243 the callbacks are invoked on the target CPU.
244* The states between *CPUHP_AP_ONLINE_DYN* and *CPUHP_AP_ONLINE_DYN_END* are
245 reserved for the dynamic allocation.
246* The states are invoked in the reverse order on CPU shutdown starting with
247 *CPUHP_ONLINE* and stopping at *CPUHP_OFFLINE*. Here the callbacks are
248 invoked on the CPU that will be shutdown until *CPUHP_AP_OFFLINE*.
249
250A dynamically allocated state via *CPUHP_AP_ONLINE_DYN* is often enough.
251However if an earlier invocation during the bring up or shutdown is required
252then an explicit state should be acquired. An explicit state might also be
253required if the hotplug event requires specific ordering in respect to
254another hotplug event.
255
256Testing of hotplug states
257=========================
258One way to verify whether a custom state is working as expected or not is to
259shutdown a CPU and then put it online again. It is also possible to put the CPU
260to certain state (for instance *CPUHP_AP_ONLINE*) and then go back to
261*CPUHP_ONLINE*. This would simulate an error one state after *CPUHP_AP_ONLINE*
262which would lead to rollback to the online state.
263
264All registered states are enumerated in ``/sys/devices/system/cpu/hotplug/states``: ::
265
266 $ tail /sys/devices/system/cpu/hotplug/states
267 138: mm/vmscan:online
268 139: mm/vmstat:online
269 140: lib/percpu_cnt:online
270 141: acpi/cpu-drv:online
271 142: base/cacheinfo:online
272 143: virtio/net:online
273 144: x86/mce:online
274 145: printk:online
275 168: sched:active
276 169: online
277
278To rollback CPU4 to ``lib/percpu_cnt:online`` and back online just issue: ::
279
280 $ cat /sys/devices/system/cpu/cpu4/hotplug/state
281 169
282 $ echo 140 > /sys/devices/system/cpu/cpu4/hotplug/target
283 $ cat /sys/devices/system/cpu/cpu4/hotplug/state
284 140
285
286It is important to note that the teardown callbac of state 140 have been
287invoked. And now get back online: ::
288
289 $ echo 169 > /sys/devices/system/cpu/cpu4/hotplug/target
290 $ cat /sys/devices/system/cpu/cpu4/hotplug/state
291 169
292
293With trace events enabled, the individual steps are visible, too: ::
294
295 # TASK-PID CPU# TIMESTAMP FUNCTION
296 # | | | | |
297 bash-394 [001] 22.976: cpuhp_enter: cpu: 0004 target: 140 step: 169 (cpuhp_kick_ap_work)
298 cpuhp/4-31 [004] 22.977: cpuhp_enter: cpu: 0004 target: 140 step: 168 (sched_cpu_deactivate)
299 cpuhp/4-31 [004] 22.990: cpuhp_exit: cpu: 0004 state: 168 step: 168 ret: 0
300 cpuhp/4-31 [004] 22.991: cpuhp_enter: cpu: 0004 target: 140 step: 144 (mce_cpu_pre_down)
301 cpuhp/4-31 [004] 22.992: cpuhp_exit: cpu: 0004 state: 144 step: 144 ret: 0
302 cpuhp/4-31 [004] 22.993: cpuhp_multi_enter: cpu: 0004 target: 140 step: 143 (virtnet_cpu_down_prep)
303 cpuhp/4-31 [004] 22.994: cpuhp_exit: cpu: 0004 state: 143 step: 143 ret: 0
304 cpuhp/4-31 [004] 22.995: cpuhp_enter: cpu: 0004 target: 140 step: 142 (cacheinfo_cpu_pre_down)
305 cpuhp/4-31 [004] 22.996: cpuhp_exit: cpu: 0004 state: 142 step: 142 ret: 0
306 bash-394 [001] 22.997: cpuhp_exit: cpu: 0004 state: 140 step: 169 ret: 0
307 bash-394 [005] 95.540: cpuhp_enter: cpu: 0004 target: 169 step: 140 (cpuhp_kick_ap_work)
308 cpuhp/4-31 [004] 95.541: cpuhp_enter: cpu: 0004 target: 169 step: 141 (acpi_soft_cpu_online)
309 cpuhp/4-31 [004] 95.542: cpuhp_exit: cpu: 0004 state: 141 step: 141 ret: 0
310 cpuhp/4-31 [004] 95.543: cpuhp_enter: cpu: 0004 target: 169 step: 142 (cacheinfo_cpu_online)
311 cpuhp/4-31 [004] 95.544: cpuhp_exit: cpu: 0004 state: 142 step: 142 ret: 0
312 cpuhp/4-31 [004] 95.545: cpuhp_multi_enter: cpu: 0004 target: 169 step: 143 (virtnet_cpu_online)
313 cpuhp/4-31 [004] 95.546: cpuhp_exit: cpu: 0004 state: 143 step: 143 ret: 0
314 cpuhp/4-31 [004] 95.547: cpuhp_enter: cpu: 0004 target: 169 step: 144 (mce_cpu_online)
315 cpuhp/4-31 [004] 95.548: cpuhp_exit: cpu: 0004 state: 144 step: 144 ret: 0
316 cpuhp/4-31 [004] 95.549: cpuhp_enter: cpu: 0004 target: 169 step: 145 (console_cpu_notify)
317 cpuhp/4-31 [004] 95.550: cpuhp_exit: cpu: 0004 state: 145 step: 145 ret: 0
318 cpuhp/4-31 [004] 95.551: cpuhp_enter: cpu: 0004 target: 169 step: 168 (sched_cpu_activate)
319 cpuhp/4-31 [004] 95.552: cpuhp_exit: cpu: 0004 state: 168 step: 168 ret: 0
320 bash-394 [005] 95.553: cpuhp_exit: cpu: 0004 state: 169 step: 140 ret: 0
321
322As it an be seen, CPU4 went down until timestamp 22.996 and then back up until
32395.552. All invoked callbacks including their return codes are visible in the
324trace.
325
326Architecture's requirements
327===========================
328The following functions and configurations are required:
329
330``CONFIG_HOTPLUG_CPU``
331 This entry needs to be enabled in Kconfig
332
333``__cpu_up()``
334 Arch interface to bring up a CPU
335
336``__cpu_disable()``
337 Arch interface to shutdown a CPU, no more interrupts can be handled by the
338 kernel after the routine returns. This includes the shutdown of the timer.
339
340``__cpu_die()``
341 This actually supposed to ensure death of the CPU. Actually look at some
342 example code in other arch that implement CPU hotplug. The processor is taken
343 down from the ``idle()`` loop for that specific architecture. ``__cpu_die()``
344 typically waits for some per_cpu state to be set, to ensure the processor dead
345 routine is called to be sure positively.
346
347User Space Notification
348=======================
349After CPU successfully onlined or offline udev events are sent. A udev rule like: ::
350
351 SUBSYSTEM=="cpu", DRIVERS=="processor", DEVPATH=="/devices/system/cpu/*", RUN+="the_hotplug_receiver.sh"
352
353will receive all events. A script like: ::
354
355 #!/bin/sh
356
357 if [ "${ACTION}" = "offline" ]
358 then
359 echo "CPU ${DEVPATH##*/} offline"
360
361 elif [ "${ACTION}" = "online" ]
362 then
363 echo "CPU ${DEVPATH##*/} online"
364
365 fi
366
367can process the event further.
368
369Kernel Inline Documentations Reference
370======================================
371
372.. kernel-doc:: include/linux/cpuhotplug.h
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 2872ca1a52f1..0d93d8089136 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -13,6 +13,7 @@ Core utilities
13 13
14 assoc_array 14 assoc_array
15 atomic_ops 15 atomic_ops
16 cpu_hotplug
16 local_ops 17 local_ops
17 workqueue 18 workqueue
18 19
diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt
index 107f6fdd7d14..391da64e9492 100644
--- a/Documentation/cpu-freq/user-guide.txt
+++ b/Documentation/cpu-freq/user-guide.txt
@@ -82,7 +82,9 @@ UltraSPARC-III
82------- 82-------
83 83
84Several "PowerBook" and "iBook2" notebooks are supported. 84Several "PowerBook" and "iBook2" notebooks are supported.
85 85The following POWER processors are supported in powernv mode:
86POWER8
87POWER9
86 88
871.5 SuperH 891.5 SuperH
88---------- 90----------
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
deleted file mode 100644
index d02e8a451872..000000000000
--- a/Documentation/cpu-hotplug.txt
+++ /dev/null
@@ -1,452 +0,0 @@
1 CPU hotplug Support in Linux(tm) Kernel
2
3 Maintainers:
4 CPU Hotplug Core:
5 Rusty Russell <rusty@rustcorp.com.au>
6 Srivatsa Vaddagiri <vatsa@in.ibm.com>
7 i386:
8 Zwane Mwaikambo <zwanem@gmail.com>
9 ppc64:
10 Nathan Lynch <nathanl@austin.ibm.com>
11 Joel Schopp <jschopp@austin.ibm.com>
12 ia64/x86_64:
13 Ashok Raj <ashok.raj@intel.com>
14 s390:
15 Heiko Carstens <heiko.carstens@de.ibm.com>
16
17Authors: Ashok Raj <ashok.raj@intel.com>
18Lots of feedback: Nathan Lynch <nathanl@austin.ibm.com>,
19 Joel Schopp <jschopp@austin.ibm.com>
20
21Introduction
22
23Modern advances in system architectures have introduced advanced error
24reporting and correction capabilities in processors. CPU architectures permit
25partitioning support, where compute resources of a single CPU could be made
26available to virtual machine environments. There are couple OEMS that
27support NUMA hardware which are hot pluggable as well, where physical
28node insertion and removal require support for CPU hotplug.
29
30Such advances require CPUs available to a kernel to be removed either for
31provisioning reasons, or for RAS purposes to keep an offending CPU off
32system execution path. Hence the need for CPU hotplug support in the
33Linux kernel.
34
35A more novel use of CPU-hotplug support is its use today in suspend
36resume support for SMP. Dual-core and HT support makes even
37a laptop run SMP kernels which didn't support these methods. SMP support
38for suspend/resume is a work in progress.
39
40General Stuff about CPU Hotplug
41--------------------------------
42
43Command Line Switches
44---------------------
45maxcpus=n Restrict boot time cpus to n. Say if you have 4 cpus, using
46 maxcpus=2 will only boot 2. You can choose to bring the
47 other cpus later online, read FAQ's for more info.
48
49additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets
50 cpu_possible_mask = cpu_present_mask + additional_cpus
51
52cede_offline={"off","on"} Use this option to disable/enable putting offlined
53 processors to an extended H_CEDE state on
54 supported pseries platforms.
55 If nothing is specified,
56 cede_offline is set to "on".
57
58(*) Option valid only for following architectures
59- ia64
60
61ia64 uses the number of disabled local apics in ACPI tables MADT to
62determine the number of potentially hot-pluggable cpus. The implementation
63should only rely on this to count the # of cpus, but *MUST* not rely
64on the apicid values in those tables for disabled apics. In the event
65BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could
66use this parameter "additional_cpus=x" to represent those cpus in the
67cpu_possible_mask.
68
69possible_cpus=n [s390,x86_64] use this to set hotpluggable cpus.
70 This option sets possible_cpus bits in
71 cpu_possible_mask. Thus keeping the numbers of bits set
72 constant even if the machine gets rebooted.
73
74CPU maps and such
75-----------------
76[More on cpumaps and primitive to manipulate, please check
77include/linux/cpumask.h that has more descriptive text.]
78
79cpu_possible_mask: Bitmap of possible CPUs that can ever be available in the
80system. This is used to allocate some boot time memory for per_cpu variables
81that aren't designed to grow/shrink as CPUs are made available or removed.
82Once set during boot time discovery phase, the map is static, i.e no bits
83are added or removed anytime. Trimming it accurately for your system needs
84upfront can save some boot time memory. See below for how we use heuristics
85in x86_64 case to keep this under check.
86
87cpu_online_mask: Bitmap of all CPUs currently online. It's set in __cpu_up()
88after a CPU is available for kernel scheduling and ready to receive
89interrupts from devices. It's cleared when a CPU is brought down using
90__cpu_disable(), before which all OS services including interrupts are
91migrated to another target CPU.
92
93cpu_present_mask: Bitmap of CPUs currently present in the system. Not all
94of them may be online. When physical hotplug is processed by the relevant
95subsystem (e.g ACPI) can change and new bit either be added or removed
96from the map depending on the event is hot-add/hot-remove. There are currently
97no locking rules as of now. Typical usage is to init topology during boot,
98at which time hotplug is disabled.
99
100You really dont need to manipulate any of the system cpu maps. They should
101be read-only for most use. When setting up per-cpu resources almost always use
102cpu_possible_mask/for_each_possible_cpu() to iterate.
103
104Never use anything other than cpumask_t to represent bitmap of CPUs.
105
106 #include <linux/cpumask.h>
107
108 for_each_possible_cpu - Iterate over cpu_possible_mask
109 for_each_online_cpu - Iterate over cpu_online_mask
110 for_each_present_cpu - Iterate over cpu_present_mask
111 for_each_cpu(x,mask) - Iterate over some random collection of cpu mask.
112
113 #include <linux/cpu.h>
114 get_online_cpus() and put_online_cpus():
115
116The above calls are used to inhibit cpu hotplug operations. While the
117cpu_hotplug.refcount is non zero, the cpu_online_mask will not change.
118If you merely need to avoid cpus going away, you could also use
119preempt_disable() and preempt_enable() for those sections.
120Just remember the critical section cannot call any
121function that can sleep or schedule this process away. The preempt_disable()
122will work as long as stop_machine_run() is used to take a cpu down.
123
124CPU Hotplug - Frequently Asked Questions.
125
126Q: How to enable my kernel to support CPU hotplug?
127A: When doing make defconfig, Enable CPU hotplug support
128
129 "Processor type and Features" -> Support for Hotpluggable CPUs
130
131Make sure that you have CONFIG_SMP turned on as well.
132
133You would need to enable CONFIG_HOTPLUG_CPU for SMP suspend/resume support
134as well.
135
136Q: What architectures support CPU hotplug?
137A: As of 2.6.14, the following architectures support CPU hotplug.
138
139i386 (Intel), ppc, ppc64, parisc, s390, ia64 and x86_64
140
141Q: How to test if hotplug is supported on the newly built kernel?
142A: You should now notice an entry in sysfs.
143
144Check if sysfs is mounted, using the "mount" command. You should notice
145an entry as shown below in the output.
146
147 ....
148 none on /sys type sysfs (rw)
149 ....
150
151If this is not mounted, do the following.
152
153 #mkdir /sys
154 #mount -t sysfs sys /sys
155
156Now you should see entries for all present cpu, the following is an example
157in a 8-way system.
158
159 #pwd
160 #/sys/devices/system/cpu
161 #ls -l
162 total 0
163 drwxr-xr-x 10 root root 0 Sep 19 07:44 .
164 drwxr-xr-x 13 root root 0 Sep 19 07:45 ..
165 drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu0
166 drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu1
167 drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu2
168 drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu3
169 drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu4
170 drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu5
171 drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu6
172 drwxr-xr-x 3 root root 0 Sep 19 07:48 cpu7
173
174Under each directory you would find an "online" file which is the control
175file to logically online/offline a processor.
176
177Q: Does hot-add/hot-remove refer to physical add/remove of cpus?
178A: The usage of hot-add/remove may not be very consistently used in the code.
179CONFIG_HOTPLUG_CPU enables logical online/offline capability in the kernel.
180To support physical addition/removal, one would need some BIOS hooks and
181the platform should have something like an attention button in PCI hotplug.
182CONFIG_ACPI_HOTPLUG_CPU enables ACPI support for physical add/remove of CPUs.
183
184Q: How do I logically offline a CPU?
185A: Do the following.
186
187 #echo 0 > /sys/devices/system/cpu/cpuX/online
188
189Once the logical offline is successful, check
190
191 #cat /proc/interrupts
192
193You should now not see the CPU that you removed. Also online file will report
194the state as 0 when a CPU is offline and 1 when it's online.
195
196 #To display the current cpu state.
197 #cat /sys/devices/system/cpu/cpuX/online
198
199Q: Why can't I remove CPU0 on some systems?
200A: Some architectures may have some special dependency on a certain CPU.
201
202For e.g in IA64 platforms we have ability to send platform interrupts to the
203OS. a.k.a Corrected Platform Error Interrupts (CPEI). In current ACPI
204specifications, we didn't have a way to change the target CPU. Hence if the
205current ACPI version doesn't support such re-direction, we disable that CPU
206by making it not-removable.
207
208In such cases you will also notice that the online file is missing under cpu0.
209
210Q: Is CPU0 removable on X86?
211A: Yes. If kernel is compiled with CONFIG_BOOTPARAM_HOTPLUG_CPU0=y, CPU0 is
212removable by default. Otherwise, CPU0 is also removable by kernel option
213cpu0_hotplug.
214
215But some features depend on CPU0. Two known dependencies are:
216
2171. Resume from hibernate/suspend depends on CPU0. Hibernate/suspend will fail if
218CPU0 is offline and you need to online CPU0 before hibernate/suspend can
219continue.
2202. PIC interrupts also depend on CPU0. CPU0 can't be removed if a PIC interrupt
221is detected.
222
223It's said poweroff/reboot may depend on CPU0 on some machines although I haven't
224seen any poweroff/reboot failure so far after CPU0 is offline on a few tested
225machines.
226
227Please let me know if you know or see any other dependencies of CPU0.
228
229If the dependencies are under your control, you can turn on CPU0 hotplug feature
230either by CONFIG_BOOTPARAM_HOTPLUG_CPU0 or by kernel parameter cpu0_hotplug.
231
232--Fenghua Yu <fenghua.yu@intel.com>
233
234Q: How do I find out if a particular CPU is not removable?
235A: Depending on the implementation, some architectures may show this by the
236absence of the "online" file. This is done if it can be determined ahead of
237time that this CPU cannot be removed.
238
239In some situations, this can be a run time check, i.e if you try to remove the
240last CPU, this will not be permitted. You can find such failures by
241investigating the return value of the "echo" command.
242
243Q: What happens when a CPU is being logically offlined?
244A: The following happen, listed in no particular order :-)
245
246- A notification is sent to in-kernel registered modules by sending an event
247 CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the
248 CPU is being offlined while tasks are frozen due to a suspend operation in
249 progress
250- All processes are migrated away from this outgoing CPU to new CPUs.
251 The new CPU is chosen from each process' current cpuset, which may be
252 a subset of all online CPUs.
253- All interrupts targeted to this CPU are migrated to a new CPU
254- timers/bottom half/task lets are also migrated to a new CPU
255- Once all services are migrated, kernel calls an arch specific routine
256 __cpu_disable() to perform arch specific cleanup.
257- Once this is successful, an event for successful cleanup is sent by an event
258 CPU_DEAD (or CPU_DEAD_FROZEN if tasks are frozen due to a suspend while the
259 CPU is being offlined).
260
261 "It is expected that each service cleans up when the CPU_DOWN_PREPARE
262 notifier is called, when CPU_DEAD is called it's expected there is nothing
263 running on behalf of this CPU that was offlined"
264
265Q: If I have some kernel code that needs to be aware of CPU arrival and
266 departure, how to i arrange for proper notification?
267A: This is what you would need in your kernel code to receive notifications.
268
269 #include <linux/cpu.h>
270 static int foobar_cpu_callback(struct notifier_block *nfb,
271 unsigned long action, void *hcpu)
272 {
273 unsigned int cpu = (unsigned long)hcpu;
274
275 switch (action) {
276 case CPU_ONLINE:
277 case CPU_ONLINE_FROZEN:
278 foobar_online_action(cpu);
279 break;
280 case CPU_DEAD:
281 case CPU_DEAD_FROZEN:
282 foobar_dead_action(cpu);
283 break;
284 }
285 return NOTIFY_OK;
286 }
287
288 static struct notifier_block foobar_cpu_notifier =
289 {
290 .notifier_call = foobar_cpu_callback,
291 };
292
293You need to call register_cpu_notifier() from your init function.
294Init functions could be of two types:
2951. early init (init function called when only the boot processor is online).
2962. late init (init function called _after_ all the CPUs are online).
297
298For the first case, you should add the following to your init function
299
300 register_cpu_notifier(&foobar_cpu_notifier);
301
302For the second case, you should add the following to your init function
303
304 register_hotcpu_notifier(&foobar_cpu_notifier);
305
306You can fail PREPARE notifiers if something doesn't work to prepare resources.
307This will stop the activity and send a following CANCELED event back.
308
309CPU_DEAD should not be failed, its just a goodness indication, but bad
310things will happen if a notifier in path sent a BAD notify code.
311
312Q: I don't see my action being called for all CPUs already up and running?
313A: Yes, CPU notifiers are called only when new CPUs are on-lined or offlined.
314 If you need to perform some action for each CPU already in the system, then
315 do this:
316
317 for_each_online_cpu(i) {
318 foobar_cpu_callback(&foobar_cpu_notifier, CPU_UP_PREPARE, i);
319 foobar_cpu_callback(&foobar_cpu_notifier, CPU_ONLINE, i);
320 }
321
322 However, if you want to register a hotplug callback, as well as perform
323 some initialization for CPUs that are already online, then do this:
324
325 Version 1: (Correct)
326 ---------
327
328 cpu_notifier_register_begin();
329
330 for_each_online_cpu(i) {
331 foobar_cpu_callback(&foobar_cpu_notifier,
332 CPU_UP_PREPARE, i);
333 foobar_cpu_callback(&foobar_cpu_notifier,
334 CPU_ONLINE, i);
335 }
336
337 /* Note the use of the double underscored version of the API */
338 __register_cpu_notifier(&foobar_cpu_notifier);
339
340 cpu_notifier_register_done();
341
342 Note that the following code is *NOT* the right way to achieve this,
343 because it is prone to an ABBA deadlock between the cpu_add_remove_lock
344 and the cpu_hotplug.lock.
345
346 Version 2: (Wrong!)
347 ---------
348
349 get_online_cpus();
350
351 for_each_online_cpu(i) {
352 foobar_cpu_callback(&foobar_cpu_notifier,
353 CPU_UP_PREPARE, i);
354 foobar_cpu_callback(&foobar_cpu_notifier,
355 CPU_ONLINE, i);
356 }
357
358 register_cpu_notifier(&foobar_cpu_notifier);
359
360 put_online_cpus();
361
362 So always use the first version shown above when you want to register
363 callbacks as well as initialize the already online CPUs.
364
365
366Q: If I would like to develop CPU hotplug support for a new architecture,
367 what do I need at a minimum?
368A: The following are what is required for CPU hotplug infrastructure to work
369 correctly.
370
371 - Make sure you have an entry in Kconfig to enable CONFIG_HOTPLUG_CPU
372 - __cpu_up() - Arch interface to bring up a CPU
373 - __cpu_disable() - Arch interface to shutdown a CPU, no more interrupts
374 can be handled by the kernel after the routine
375 returns. Including local APIC timers etc are
376 shutdown.
377 - __cpu_die() - This actually supposed to ensure death of the CPU.
378 Actually look at some example code in other arch
379 that implement CPU hotplug. The processor is taken
380 down from the idle() loop for that specific
381 architecture. __cpu_die() typically waits for some
382 per_cpu state to be set, to ensure the processor
383 dead routine is called to be sure positively.
384
385Q: I need to ensure that a particular CPU is not removed when there is some
386 work specific to this CPU in progress.
387A: There are two ways. If your code can be run in interrupt context, use
388 smp_call_function_single(), otherwise use work_on_cpu(). Note that
389 work_on_cpu() is slow, and can fail due to out of memory:
390
391 int my_func_on_cpu(int cpu)
392 {
393 int err;
394 get_online_cpus();
395 if (!cpu_online(cpu))
396 err = -EINVAL;
397 else
398#if NEEDS_BLOCKING
399 err = work_on_cpu(cpu, __my_func_on_cpu, NULL);
400#else
401 smp_call_function_single(cpu, __my_func_on_cpu, &err,
402 true);
403#endif
404 put_online_cpus();
405 return err;
406 }
407
408Q: How do we determine how many CPUs are available for hotplug.
409A: There is no clear spec defined way from ACPI that can give us that
410 information today. Based on some input from Natalie of Unisys,
411 that the ACPI MADT (Multiple APIC Description Tables) marks those possible
412 CPUs in a system with disabled status.
413
414 Andi implemented some simple heuristics that count the number of disabled
415 CPUs in MADT as hotpluggable CPUS. In the case there are no disabled CPUS
416 we assume 1/2 the number of CPUs currently present can be hotplugged.
417
418 Caveat: ACPI MADT can only provide 256 entries in systems with only ACPI 2.0c
419 or earlier ACPI version supported, because the apicid field in MADT is only
420 8 bits. From ACPI 3.0, this limitation was removed since the apicid field
421 was extended to 32 bits with x2APIC introduced.
422
423User Space Notification
424
425Hotplug support for devices is common in Linux today. Its being used today to
426support automatic configuration of network, usb and pci devices. A hotplug
427event can be used to invoke an agent script to perform the configuration task.
428
429You can add /etc/hotplug/cpu.agent to handle hotplug notification user space
430scripts.
431
432 #!/bin/bash
433 # $Id: cpu.agent
434 # Kernel hotplug params include:
435 #ACTION=%s [online or offline]
436 #DEVPATH=%s
437 #
438 cd /etc/hotplug
439 . ./hotplug.functions
440
441 case $ACTION in
442 online)
443 echo `date` ":cpu.agent" add cpu >> /tmp/hotplug.txt
444 ;;
445 offline)
446 echo `date` ":cpu.agent" remove cpu >>/tmp/hotplug.txt
447 ;;
448 *)
449 debug_mesg CPU $ACTION event not supported
450 exit 1
451 ;;
452 esac
diff --git a/Documentation/dev-tools/sparse.rst b/Documentation/dev-tools/sparse.rst
index 78aa00a604a0..ffdcc97f6f5a 100644
--- a/Documentation/dev-tools/sparse.rst
+++ b/Documentation/dev-tools/sparse.rst
@@ -103,3 +103,9 @@ have already built it.
103 103
104The optional make variable CF can be used to pass arguments to sparse. The 104The optional make variable CF can be used to pass arguments to sparse. The
105build system passes -Wbitwise to sparse automatically. 105build system passes -Wbitwise to sparse automatically.
106
107Checking RCU annotations
108~~~~~~~~~~~~~~~~~~~~~~~~
109
110RCU annotations are not checked by default. To enable RCU annotation
111checks, include -DCONFIG_SPARSE_RCU_POINTER in your CF flags.
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index a23edccd2059..77b92221f951 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -116,9 +116,11 @@ crc32table.h*
116cscope.* 116cscope.*
117defkeymap.c 117defkeymap.c
118devlist.h* 118devlist.h*
119devicetable-offsets.h
119dnotify_test 120dnotify_test
120docproc 121docproc
121dslm 122dslm
123dtc
122elf2ecoff 124elf2ecoff
123elfconfig.h* 125elfconfig.h*
124evergreen_reg_safe.h 126evergreen_reg_safe.h
@@ -153,8 +155,8 @@ keywords.c
153ksym.c* 155ksym.c*
154ksym.h* 156ksym.h*
155kxgettext 157kxgettext
156lex.c 158*lex.c
157lex.*.c 159*lex.*.c
158linux 160linux
159logo_*.c 161logo_*.c
160logo_*_clut224.c 162logo_*_clut224.c
@@ -215,6 +217,7 @@ series
215setup 217setup
216setup.bin 218setup.bin
217setup.elf 219setup.elf
220sortextable
218sImage 221sImage
219sm_tbl* 222sm_tbl*
220split-include 223split-include
diff --git a/Documentation/driver-api/device-io.rst b/Documentation/driver-api/device-io.rst
new file mode 100644
index 000000000000..b00b23903078
--- /dev/null
+++ b/Documentation/driver-api/device-io.rst
@@ -0,0 +1,201 @@
1.. Copyright 2001 Matthew Wilcox
2..
3.. This documentation is free software; you can redistribute
4.. it and/or modify it under the terms of the GNU General Public
5.. License as published by the Free Software Foundation; either
6.. version 2 of the License, or (at your option) any later
7.. version.
8
9===============================
10Bus-Independent Device Accesses
11===============================
12
13:Author: Matthew Wilcox
14:Author: Alan Cox
15
16Introduction
17============
18
19Linux provides an API which abstracts performing IO across all busses
20and devices, allowing device drivers to be written independently of bus
21type.
22
23Memory Mapped IO
24================
25
26Getting Access to the Device
27----------------------------
28
29The most widely supported form of IO is memory mapped IO. That is, a
30part of the CPU's address space is interpreted not as accesses to
31memory, but as accesses to a device. Some architectures define devices
32to be at a fixed address, but most have some method of discovering
33devices. The PCI bus walk is a good example of such a scheme. This
34document does not cover how to receive such an address, but assumes you
35are starting with one. Physical addresses are of type unsigned long.
36
37This address should not be used directly. Instead, to get an address
38suitable for passing to the accessor functions described below, you
39should call :c:func:`ioremap()`. An address suitable for accessing
40the device will be returned to you.
41
42After you've finished using the device (say, in your module's exit
43routine), call :c:func:`iounmap()` in order to return the address
44space to the kernel. Most architectures allocate new address space each
45time you call :c:func:`ioremap()`, and they can run out unless you
46call :c:func:`iounmap()`.
47
48Accessing the device
49--------------------
50
51The part of the interface most used by drivers is reading and writing
52memory-mapped registers on the device. Linux provides interfaces to read
53and write 8-bit, 16-bit, 32-bit and 64-bit quantities. Due to a
54historical accident, these are named byte, word, long and quad accesses.
55Both read and write accesses are supported; there is no prefetch support
56at this time.
57
58The functions are named readb(), readw(), readl(), readq(),
59readb_relaxed(), readw_relaxed(), readl_relaxed(), readq_relaxed(),
60writeb(), writew(), writel() and writeq().
61
62Some devices (such as framebuffers) would like to use larger transfers than
638 bytes at a time. For these devices, the :c:func:`memcpy_toio()`,
64:c:func:`memcpy_fromio()` and :c:func:`memset_io()` functions are
65provided. Do not use memset or memcpy on IO addresses; they are not
66guaranteed to copy data in order.
67
68The read and write functions are defined to be ordered. That is the
69compiler is not permitted to reorder the I/O sequence. When the ordering
70can be compiler optimised, you can use __readb() and friends to
71indicate the relaxed ordering. Use this with care.
72
73While the basic functions are defined to be synchronous with respect to
74each other and ordered with respect to each other the busses the devices
75sit on may themselves have asynchronicity. In particular many authors
76are burned by the fact that PCI bus writes are posted asynchronously. A
77driver author must issue a read from the same device to ensure that
78writes have occurred in the specific cases the author cares. This kind
79of property cannot be hidden from driver writers in the API. In some
80cases, the read used to flush the device may be expected to fail (if the
81card is resetting, for example). In that case, the read should be done
82from config space, which is guaranteed to soft-fail if the card doesn't
83respond.
84
85The following is an example of flushing a write to a device when the
86driver would like to ensure the write's effects are visible prior to
87continuing execution::
88
89 static inline void
90 qla1280_disable_intrs(struct scsi_qla_host *ha)
91 {
92 struct device_reg *reg;
93
94 reg = ha->iobase;
95 /* disable risc and host interrupts */
96 WRT_REG_WORD(&reg->ictrl, 0);
97 /*
98 * The following read will ensure that the above write
99 * has been received by the device before we return from this
100 * function.
101 */
102 RD_REG_WORD(&reg->ictrl);
103 ha->flags.ints_enabled = 0;
104 }
105
106In addition to write posting, on some large multiprocessing systems
107(e.g. SGI Challenge, Origin and Altix machines) posted writes won't be
108strongly ordered coming from different CPUs. Thus it's important to
109properly protect parts of your driver that do memory-mapped writes with
110locks and use the :c:func:`mmiowb()` to make sure they arrive in the
111order intended. Issuing a regular readX() will also ensure write ordering,
112but should only be used when the
113driver has to be sure that the write has actually arrived at the device
114(not that it's simply ordered with respect to other writes), since a
115full readX() is a relatively expensive operation.
116
117Generally, one should use :c:func:`mmiowb()` prior to releasing a spinlock
118that protects regions using :c:func:`writeb()` or similar functions that
119aren't surrounded by readb() calls, which will ensure ordering
120and flushing. The following pseudocode illustrates what might occur if
121write ordering isn't guaranteed via :c:func:`mmiowb()` or one of the
122readX() functions::
123
124 CPU A: spin_lock_irqsave(&dev_lock, flags)
125 CPU A: ...
126 CPU A: writel(newval, ring_ptr);
127 CPU A: spin_unlock_irqrestore(&dev_lock, flags)
128 ...
129 CPU B: spin_lock_irqsave(&dev_lock, flags)
130 CPU B: writel(newval2, ring_ptr);
131 CPU B: ...
132 CPU B: spin_unlock_irqrestore(&dev_lock, flags)
133
134In the case above, newval2 could be written to ring_ptr before newval.
135Fixing it is easy though::
136
137 CPU A: spin_lock_irqsave(&dev_lock, flags)
138 CPU A: ...
139 CPU A: writel(newval, ring_ptr);
140 CPU A: mmiowb(); /* ensure no other writes beat us to the device */
141 CPU A: spin_unlock_irqrestore(&dev_lock, flags)
142 ...
143 CPU B: spin_lock_irqsave(&dev_lock, flags)
144 CPU B: writel(newval2, ring_ptr);
145 CPU B: ...
146 CPU B: mmiowb();
147 CPU B: spin_unlock_irqrestore(&dev_lock, flags)
148
149See tg3.c for a real world example of how to use :c:func:`mmiowb()`
150
151PCI ordering rules also guarantee that PIO read responses arrive after any
152outstanding DMA writes from that bus, since for some devices the result of
153a readb() call may signal to the driver that a DMA transaction is
154complete. In many cases, however, the driver may want to indicate that the
155next readb() call has no relation to any previous DMA writes
156performed by the device. The driver can use readb_relaxed() for
157these cases, although only some platforms will honor the relaxed
158semantics. Using the relaxed read functions will provide significant
159performance benefits on platforms that support it. The qla2xxx driver
160provides examples of how to use readX_relaxed(). In many cases, a majority
161of the driver's readX() calls can safely be converted to readX_relaxed()
162calls, since only a few will indicate or depend on DMA completion.
163
164Port Space Accesses
165===================
166
167Port Space Explained
168--------------------
169
170Another form of IO commonly supported is Port Space. This is a range of
171addresses separate to the normal memory address space. Access to these
172addresses is generally not as fast as accesses to the memory mapped
173addresses, and it also has a potentially smaller address space.
174
175Unlike memory mapped IO, no preparation is required to access port
176space.
177
178Accessing Port Space
179--------------------
180
181Accesses to this space are provided through a set of functions which
182allow 8-bit, 16-bit and 32-bit accesses; also known as byte, word and
183long. These functions are :c:func:`inb()`, :c:func:`inw()`,
184:c:func:`inl()`, :c:func:`outb()`, :c:func:`outw()` and
185:c:func:`outl()`.
186
187Some variants are provided for these functions. Some devices require
188that accesses to their ports are slowed down. This functionality is
189provided by appending a ``_p`` to the end of the function.
190There are also equivalents to memcpy. The :c:func:`ins()` and
191:c:func:`outs()` functions copy bytes, words or longs to the given
192port.
193
194Public Functions Provided
195=========================
196
197.. kernel-doc:: arch/x86/include/asm/io.h
198 :internal:
199
200.. kernel-doc:: lib/pci_iomap.c
201 :export:
diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst
index 5f5713448703..70e328e16aad 100644
--- a/Documentation/driver-api/device_link.rst
+++ b/Documentation/driver-api/device_link.rst
@@ -1,3 +1,6 @@
1.. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain <dev_pm_domain>`
2.. |struct generic_pm_domain| replace:: :c:type:`struct generic_pm_domain <generic_pm_domain>`
3
1============ 4============
2Device links 5Device links
3============ 6============
@@ -120,12 +123,11 @@ Examples
120 is the same as if the MMU was the parent of the master device. 123 is the same as if the MMU was the parent of the master device.
121 124
122 The fact that both devices share the same power domain would normally 125 The fact that both devices share the same power domain would normally
123 suggest usage of a :c:type:`struct dev_pm_domain` or :c:type:`struct 126 suggest usage of a |struct dev_pm_domain| or |struct generic_pm_domain|,
124 generic_pm_domain`, however these are not independent devices that 127 however these are not independent devices that happen to share a power
125 happen to share a power switch, but rather the MMU device serves the 128 switch, but rather the MMU device serves the busmaster device and is
126 busmaster device and is useless without it. A device link creates a 129 useless without it. A device link creates a synthetic hierarchical
127 synthetic hierarchical relationship between the devices and is thus 130 relationship between the devices and is thus more apt.
128 more apt.
129 131
130* A Thunderbolt host controller comprises a number of PCIe hotplug ports 132* A Thunderbolt host controller comprises a number of PCIe hotplug ports
131 and an NHI device to manage the PCIe switch. On resume from system sleep, 133 and an NHI device to manage the PCIe switch. On resume from system sleep,
@@ -157,7 +159,7 @@ Examples
157Alternatives 159Alternatives
158============ 160============
159 161
160* A :c:type:`struct dev_pm_domain` can be used to override the bus, 162* A |struct dev_pm_domain| can be used to override the bus,
161 class or device type callbacks. It is intended for devices sharing 163 class or device type callbacks. It is intended for devices sharing
162 a single on/off switch, however it does not guarantee a specific 164 a single on/off switch, however it does not guarantee a specific
163 suspend/resume ordering, this needs to be implemented separately. 165 suspend/resume ordering, this needs to be implemented separately.
@@ -166,7 +168,7 @@ Alternatives
166 suspended. Furthermore it cannot be used to enforce a specific shutdown 168 suspended. Furthermore it cannot be used to enforce a specific shutdown
167 ordering or a driver presence dependency. 169 ordering or a driver presence dependency.
168 170
169* A :c:type:`struct generic_pm_domain` is a lot more heavyweight than a 171* A |struct generic_pm_domain| is a lot more heavyweight than a
170 device link and does not allow for shutdown ordering or driver presence 172 device link and does not allow for shutdown ordering or driver presence
171 dependencies. It also cannot be used on ACPI systems. 173 dependencies. It also cannot be used on ACPI systems.
172 174
diff --git a/Documentation/driver-api/iio/buffers.rst b/Documentation/driver-api/iio/buffers.rst
new file mode 100644
index 000000000000..02c99a6bee18
--- /dev/null
+++ b/Documentation/driver-api/iio/buffers.rst
@@ -0,0 +1,125 @@
1=======
2Buffers
3=======
4
5* struct :c:type:`iio_buffer` — general buffer structure
6* :c:func:`iio_validate_scan_mask_onehot` — Validates that exactly one channel
7 is selected
8* :c:func:`iio_buffer_get` — Grab a reference to the buffer
9* :c:func:`iio_buffer_put` — Release the reference to the buffer
10
11The Industrial I/O core offers a way for continuous data capture based on a
12trigger source. Multiple data channels can be read at once from
13:file:`/dev/iio:device{X}` character device node, thus reducing the CPU load.
14
15IIO buffer sysfs interface
16==========================
17An IIO buffer has an associated attributes directory under
18:file:`/sys/bus/iio/iio:device{X}/buffer/*`. Here are some of the existing
19attributes:
20
21* :file:`length`, the total number of data samples (capacity) that can be
22 stored by the buffer.
23* :file:`enable`, activate buffer capture.
24
25IIO buffer setup
26================
27
28The meta information associated with a channel reading placed in a buffer is
29called a scan element . The important bits configuring scan elements are
30exposed to userspace applications via the
31:file:`/sys/bus/iio/iio:device{X}/scan_elements/*` directory. This file contains
32attributes of the following form:
33
34* :file:`enable`, used for enabling a channel. If and only if its attribute
35 is non *zero*, then a triggered capture will contain data samples for this
36 channel.
37* :file:`type`, description of the scan element data storage within the buffer
38 and hence the form in which it is read from user space.
39 Format is [be|le]:[s|u]bits/storagebitsXrepeat[>>shift] .
40 * *be* or *le*, specifies big or little endian.
41 * *s* or *u*, specifies if signed (2's complement) or unsigned.
42 * *bits*, is the number of valid data bits.
43 * *storagebits*, is the number of bits (after padding) that it occupies in the
44 buffer.
45 * *shift*, if specified, is the shift that needs to be applied prior to
46 masking out unused bits.
47 * *repeat*, specifies the number of bits/storagebits repetitions. When the
48 repeat element is 0 or 1, then the repeat value is omitted.
49
50For example, a driver for a 3-axis accelerometer with 12 bit resolution where
51data is stored in two 8-bits registers as follows::
52
53 7 6 5 4 3 2 1 0
54 +---+---+---+---+---+---+---+---+
55 |D3 |D2 |D1 |D0 | X | X | X | X | (LOW byte, address 0x06)
56 +---+---+---+---+---+---+---+---+
57
58 7 6 5 4 3 2 1 0
59 +---+---+---+---+---+---+---+---+
60 |D11|D10|D9 |D8 |D7 |D6 |D5 |D4 | (HIGH byte, address 0x07)
61 +---+---+---+---+---+---+---+---+
62
63will have the following scan element type for each axis::
64
65 $ cat /sys/bus/iio/devices/iio:device0/scan_elements/in_accel_y_type
66 le:s12/16>>4
67
68A user space application will interpret data samples read from the buffer as
69two byte little endian signed data, that needs a 4 bits right shift before
70masking out the 12 valid bits of data.
71
72For implementing buffer support a driver should initialize the following
73fields in iio_chan_spec definition::
74
75 struct iio_chan_spec {
76 /* other members */
77 int scan_index
78 struct {
79 char sign;
80 u8 realbits;
81 u8 storagebits;
82 u8 shift;
83 u8 repeat;
84 enum iio_endian endianness;
85 } scan_type;
86 };
87
88The driver implementing the accelerometer described above will have the
89following channel definition::
90
91 struct struct iio_chan_spec accel_channels[] = {
92 {
93 .type = IIO_ACCEL,
94 .modified = 1,
95 .channel2 = IIO_MOD_X,
96 /* other stuff here */
97 .scan_index = 0,
98 .scan_type = {
99 .sign = 's',
100 .realbits = 12,
101 .storagebits = 16,
102 .shift = 4,
103 .endianness = IIO_LE,
104 },
105 }
106 /* similar for Y (with channel2 = IIO_MOD_Y, scan_index = 1)
107 * and Z (with channel2 = IIO_MOD_Z, scan_index = 2) axis
108 */
109 }
110
111Here **scan_index** defines the order in which the enabled channels are placed
112inside the buffer. Channels with a lower **scan_index** will be placed before
113channels with a higher index. Each channel needs to have a unique
114**scan_index**.
115
116Setting **scan_index** to -1 can be used to indicate that the specific channel
117does not support buffered capture. In this case no entries will be created for
118the channel in the scan_elements directory.
119
120More details
121============
122.. kernel-doc:: include/linux/iio/buffer.h
123.. kernel-doc:: drivers/iio/industrialio-buffer.c
124 :export:
125
diff --git a/Documentation/driver-api/iio/core.rst b/Documentation/driver-api/iio/core.rst
new file mode 100644
index 000000000000..9a34ae03b679
--- /dev/null
+++ b/Documentation/driver-api/iio/core.rst
@@ -0,0 +1,182 @@
1=============
2Core elements
3=============
4
5The Industrial I/O core offers a unified framework for writing drivers for
6many different types of embedded sensors. a standard interface to user space
7applications manipulating sensors. The implementation can be found under
8:file:`drivers/iio/industrialio-*`
9
10Industrial I/O Devices
11----------------------
12
13* struct :c:type:`iio_dev` - industrial I/O device
14* :c:func:`iio_device_alloc()` - alocate an :c:type:`iio_dev` from a driver
15* :c:func:`iio_device_free()` - free an :c:type:`iio_dev` from a driver
16* :c:func:`iio_device_register()` - register a device with the IIO subsystem
17* :c:func:`iio_device_unregister()` - unregister a device from the IIO
18 subsystem
19
20An IIO device usually corresponds to a single hardware sensor and it
21provides all the information needed by a driver handling a device.
22Let's first have a look at the functionality embedded in an IIO device
23then we will show how a device driver makes use of an IIO device.
24
25There are two ways for a user space application to interact with an IIO driver.
26
271. :file:`/sys/bus/iio/iio:device{X}/`, this represents a hardware sensor
28 and groups together the data channels of the same chip.
292. :file:`/dev/iio:device{X}`, character device node interface used for
30 buffered data transfer and for events information retrieval.
31
32A typical IIO driver will register itself as an :doc:`I2C <../i2c>` or
33:doc:`SPI <../spi>` driver and will create two routines, probe and remove.
34
35At probe:
36
371. Call :c:func:`iio_device_alloc()`, which allocates memory for an IIO device.
382. Initialize IIO device fields with driver specific information (e.g.
39 device name, device channels).
403. Call :c:func:`iio_device_register()`, this registers the device with the
41 IIO core. After this call the device is ready to accept requests from user
42 space applications.
43
44At remove, we free the resources allocated in probe in reverse order:
45
461. :c:func:`iio_device_unregister()`, unregister the device from the IIO core.
472. :c:func:`iio_device_free()`, free the memory allocated for the IIO device.
48
49IIO device sysfs interface
50==========================
51
52Attributes are sysfs files used to expose chip info and also allowing
53applications to set various configuration parameters. For device with
54index X, attributes can be found under /sys/bus/iio/iio:deviceX/ directory.
55Common attributes are:
56
57* :file:`name`, description of the physical chip.
58* :file:`dev`, shows the major:minor pair associated with
59 :file:`/dev/iio:deviceX` node.
60* :file:`sampling_frequency_available`, available discrete set of sampling
61 frequency values for device.
62* Available standard attributes for IIO devices are described in the
63 :file:`Documentation/ABI/testing/sysfs-bus-iio` file in the Linux kernel
64 sources.
65
66IIO device channels
67===================
68
69struct :c:type:`iio_chan_spec` - specification of a single channel
70
71An IIO device channel is a representation of a data channel. An IIO device can
72have one or multiple channels. For example:
73
74* a thermometer sensor has one channel representing the temperature measurement.
75* a light sensor with two channels indicating the measurements in the visible
76 and infrared spectrum.
77* an accelerometer can have up to 3 channels representing acceleration on X, Y
78 and Z axes.
79
80An IIO channel is described by the struct :c:type:`iio_chan_spec`.
81A thermometer driver for the temperature sensor in the example above would
82have to describe its channel as follows::
83
84 static const struct iio_chan_spec temp_channel[] = {
85 {
86 .type = IIO_TEMP,
87 .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
88 },
89 };
90
91Channel sysfs attributes exposed to userspace are specified in the form of
92bitmasks. Depending on their shared info, attributes can be set in one of the
93following masks:
94
95* **info_mask_separate**, attributes will be specific to
96 this channel
97* **info_mask_shared_by_type**, attributes are shared by all channels of the
98 same type
99* **info_mask_shared_by_dir**, attributes are shared by all channels of the same
100 direction
101* **info_mask_shared_by_all**, attributes are shared by all channels
102
103When there are multiple data channels per channel type we have two ways to
104distinguish between them:
105
106* set **.modified** field of :c:type:`iio_chan_spec` to 1. Modifiers are
107 specified using **.channel2** field of the same :c:type:`iio_chan_spec`
108 structure and are used to indicate a physically unique characteristic of the
109 channel such as its direction or spectral response. For example, a light
110 sensor can have two channels, one for infrared light and one for both
111 infrared and visible light.
112* set **.indexed** field of :c:type:`iio_chan_spec` to 1. In this case the
113 channel is simply another instance with an index specified by the **.channel**
114 field.
115
116Here is how we can make use of the channel's modifiers::
117
118 static const struct iio_chan_spec light_channels[] = {
119 {
120 .type = IIO_INTENSITY,
121 .modified = 1,
122 .channel2 = IIO_MOD_LIGHT_IR,
123 .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
124 .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ),
125 },
126 {
127 .type = IIO_INTENSITY,
128 .modified = 1,
129 .channel2 = IIO_MOD_LIGHT_BOTH,
130 .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
131 .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ),
132 },
133 {
134 .type = IIO_LIGHT,
135 .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
136 .info_mask_shared = BIT(IIO_CHAN_INFO_SAMP_FREQ),
137 },
138 }
139
140This channel's definition will generate two separate sysfs files for raw data
141retrieval:
142
143* :file:`/sys/bus/iio/iio:device{X}/in_intensity_ir_raw`
144* :file:`/sys/bus/iio/iio:device{X}/in_intensity_both_raw`
145
146one file for processed data:
147
148* :file:`/sys/bus/iio/iio:device{X}/in_illuminance_input`
149
150and one shared sysfs file for sampling frequency:
151
152* :file:`/sys/bus/iio/iio:device{X}/sampling_frequency`.
153
154Here is how we can make use of the channel's indexing::
155
156 static const struct iio_chan_spec light_channels[] = {
157 {
158 .type = IIO_VOLTAGE,
159 .indexed = 1,
160 .channel = 0,
161 .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
162 },
163 {
164 .type = IIO_VOLTAGE,
165 .indexed = 1,
166 .channel = 1,
167 .info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
168 },
169 }
170
171This will generate two separate attributes files for raw data retrieval:
172
173* :file:`/sys/bus/iio/devices/iio:device{X}/in_voltage0_raw`, representing
174 voltage measurement for channel 0.
175* :file:`/sys/bus/iio/devices/iio:device{X}/in_voltage1_raw`, representing
176 voltage measurement for channel 1.
177
178More details
179============
180.. kernel-doc:: include/linux/iio/iio.h
181.. kernel-doc:: drivers/iio/industrialio-core.c
182 :export:
diff --git a/Documentation/driver-api/iio/index.rst b/Documentation/driver-api/iio/index.rst
new file mode 100644
index 000000000000..e5c3922d1b6f
--- /dev/null
+++ b/Documentation/driver-api/iio/index.rst
@@ -0,0 +1,17 @@
1.. include:: <isonum.txt>
2
3Industrial I/O
4==============
5
6**Copyright** |copy| 2015 Intel Corporation
7
8Contents:
9
10.. toctree::
11 :maxdepth: 2
12
13 intro
14 core
15 buffers
16 triggers
17 triggered-buffers
diff --git a/Documentation/driver-api/iio/intro.rst b/Documentation/driver-api/iio/intro.rst
new file mode 100644
index 000000000000..3653fbd57069
--- /dev/null
+++ b/Documentation/driver-api/iio/intro.rst
@@ -0,0 +1,33 @@
1.. include:: <isonum.txt>
2
3============
4Introduction
5============
6
7The main purpose of the Industrial I/O subsystem (IIO) is to provide support
8for devices that in some sense perform either
9analog-to-digital conversion (ADC) or digital-to-analog conversion (DAC)
10or both. The aim is to fill the gap between the somewhat similar hwmon and
11:doc:`input <../input>` subsystems. Hwmon is directed at low sample rate
12sensors used to monitor and control the system itself, like fan speed control
13or temperature measurement. :doc:`Input <../input>` is, as its name suggests,
14focused on human interaction input devices (keyboard, mouse, touchscreen).
15In some cases there is considerable overlap between these and IIO.
16
17Devices that fall into this category include:
18
19* analog to digital converters (ADCs)
20* accelerometers
21* capacitance to digital converters (CDCs)
22* digital to analog converters (DACs)
23* gyroscopes
24* inertial measurement units (IMUs)
25* color and light sensors
26* magnetometers
27* pressure sensors
28* proximity sensors
29* temperature sensors
30
31Usually these sensors are connected via :doc:`SPI <../spi>` or
32:doc:`I2C <../i2c>`. A common use case of the sensors devices is to have
33combined functionality (e.g. light plus proximity sensor).
diff --git a/Documentation/driver-api/iio/triggered-buffers.rst b/Documentation/driver-api/iio/triggered-buffers.rst
new file mode 100644
index 000000000000..0db12660cc90
--- /dev/null
+++ b/Documentation/driver-api/iio/triggered-buffers.rst
@@ -0,0 +1,69 @@
1=================
2Triggered Buffers
3=================
4
5Now that we know what buffers and triggers are let's see how they work together.
6
7IIO triggered buffer setup
8==========================
9
10* :c:func:`iio_triggered_buffer_setup` — Setup triggered buffer and pollfunc
11* :c:func:`iio_triggered_buffer_cleanup` — Free resources allocated by
12 :c:func:`iio_triggered_buffer_setup`
13* struct :c:type:`iio_buffer_setup_ops` — buffer setup related callbacks
14
15A typical triggered buffer setup looks like this::
16
17 const struct iio_buffer_setup_ops sensor_buffer_setup_ops = {
18 .preenable = sensor_buffer_preenable,
19 .postenable = sensor_buffer_postenable,
20 .postdisable = sensor_buffer_postdisable,
21 .predisable = sensor_buffer_predisable,
22 };
23
24 irqreturn_t sensor_iio_pollfunc(int irq, void *p)
25 {
26 pf->timestamp = iio_get_time_ns((struct indio_dev *)p);
27 return IRQ_WAKE_THREAD;
28 }
29
30 irqreturn_t sensor_trigger_handler(int irq, void *p)
31 {
32 u16 buf[8];
33 int i = 0;
34
35 /* read data for each active channel */
36 for_each_set_bit(bit, active_scan_mask, masklength)
37 buf[i++] = sensor_get_data(bit)
38
39 iio_push_to_buffers_with_timestamp(indio_dev, buf, timestamp);
40
41 iio_trigger_notify_done(trigger);
42 return IRQ_HANDLED;
43 }
44
45 /* setup triggered buffer, usually in probe function */
46 iio_triggered_buffer_setup(indio_dev, sensor_iio_polfunc,
47 sensor_trigger_handler,
48 sensor_buffer_setup_ops);
49
50The important things to notice here are:
51
52* :c:type:`iio_buffer_setup_ops`, the buffer setup functions to be called at
53 predefined points in the buffer configuration sequence (e.g. before enable,
54 after disable). If not specified, the IIO core uses the default
55 iio_triggered_buffer_setup_ops.
56* **sensor_iio_pollfunc**, the function that will be used as top half of poll
57 function. It should do as little processing as possible, because it runs in
58 interrupt context. The most common operation is recording of the current
59 timestamp and for this reason one can use the IIO core defined
60 :c:func:`iio_pollfunc_store_time` function.
61* **sensor_trigger_handler**, the function that will be used as bottom half of
62 the poll function. This runs in the context of a kernel thread and all the
63 processing takes place here. It usually reads data from the device and
64 stores it in the internal buffer together with the timestamp recorded in the
65 top half.
66
67More details
68============
69.. kernel-doc:: drivers/iio/buffer/industrialio-triggered-buffer.c
diff --git a/Documentation/driver-api/iio/triggers.rst b/Documentation/driver-api/iio/triggers.rst
new file mode 100644
index 000000000000..f89d37e7dd82
--- /dev/null
+++ b/Documentation/driver-api/iio/triggers.rst
@@ -0,0 +1,80 @@
1========
2Triggers
3========
4
5* struct :c:type:`iio_trigger` — industrial I/O trigger device
6* :c:func:`devm_iio_trigger_alloc` — Resource-managed iio_trigger_alloc
7* :c:func:`devm_iio_trigger_free` — Resource-managed iio_trigger_free
8* :c:func:`devm_iio_trigger_register` — Resource-managed iio_trigger_register
9* :c:func:`devm_iio_trigger_unregister` — Resource-managed
10 iio_trigger_unregister
11* :c:func:`iio_trigger_validate_own_device` — Check if a trigger and IIO
12 device belong to the same device
13
14In many situations it is useful for a driver to be able to capture data based
15on some external event (trigger) as opposed to periodically polling for data.
16An IIO trigger can be provided by a device driver that also has an IIO device
17based on hardware generated events (e.g. data ready or threshold exceeded) or
18provided by a separate driver from an independent interrupt source (e.g. GPIO
19line connected to some external system, timer interrupt or user space writing
20a specific file in sysfs). A trigger may initiate data capture for a number of
21sensors and also it may be completely unrelated to the sensor itself.
22
23IIO trigger sysfs interface
24===========================
25
26There are two locations in sysfs related to triggers:
27
28* :file:`/sys/bus/iio/devices/trigger{Y}/*`, this file is created once an
29 IIO trigger is registered with the IIO core and corresponds to trigger
30 with index Y.
31 Because triggers can be very different depending on type there are few
32 standard attributes that we can describe here:
33
34 * :file:`name`, trigger name that can be later used for association with a
35 device.
36 * :file:`sampling_frequency`, some timer based triggers use this attribute to
37 specify the frequency for trigger calls.
38
39* :file:`/sys/bus/iio/devices/iio:device{X}/trigger/*`, this directory is
40 created once the device supports a triggered buffer. We can associate a
41 trigger with our device by writing the trigger's name in the
42 :file:`current_trigger` file.
43
44IIO trigger setup
45=================
46
47Let's see a simple example of how to setup a trigger to be used by a driver::
48
49 struct iio_trigger_ops trigger_ops = {
50 .set_trigger_state = sample_trigger_state,
51 .validate_device = sample_validate_device,
52 }
53
54 struct iio_trigger *trig;
55
56 /* first, allocate memory for our trigger */
57 trig = iio_trigger_alloc(dev, "trig-%s-%d", name, idx);
58
59 /* setup trigger operations field */
60 trig->ops = &trigger_ops;
61
62 /* now register the trigger with the IIO core */
63 iio_trigger_register(trig);
64
65IIO trigger ops
66===============
67
68* struct :c:type:`iio_trigger_ops` — operations structure for an iio_trigger.
69
70Notice that a trigger has a set of operations attached:
71
72* :file:`set_trigger_state`, switch the trigger on/off on demand.
73* :file:`validate_device`, function to validate the device when the current
74 trigger gets changed.
75
76More details
77============
78.. kernel-doc:: include/linux/iio/trigger.h
79.. kernel-doc:: drivers/iio/industrialio-trigger.c
80 :export:
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index dbd34c9c1d93..60db00d1532b 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -16,11 +16,15 @@ available subsections can be seen below.
16 16
17 basics 17 basics
18 infrastructure 18 infrastructure
19 pm/index
20 device-io
19 dma-buf 21 dma-buf
20 device_link 22 device_link
21 message-based 23 message-based
22 sound 24 sound
23 frame-buffer 25 frame-buffer
26 regulator
27 iio/index
24 input 28 input
25 usb 29 usb
26 spi 30 spi
diff --git a/Documentation/driver-api/pm/conf.py b/Documentation/driver-api/pm/conf.py
new file mode 100644
index 000000000000..a89fac11272f
--- /dev/null
+++ b/Documentation/driver-api/pm/conf.py
@@ -0,0 +1,10 @@
1# -*- coding: utf-8; mode: python -*-
2
3project = "Device Power Management"
4
5tags.add("subproject")
6
7latex_documents = [
8 ('index', 'pm.tex', project,
9 'The kernel development community', 'manual'),
10]
diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst
new file mode 100644
index 000000000000..bedd32388dac
--- /dev/null
+++ b/Documentation/driver-api/pm/devices.rst
@@ -0,0 +1,736 @@
1.. |struct dev_pm_ops| replace:: :c:type:`struct dev_pm_ops <dev_pm_ops>`
2.. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain <dev_pm_domain>`
3.. |struct bus_type| replace:: :c:type:`struct bus_type <bus_type>`
4.. |struct device_type| replace:: :c:type:`struct device_type <device_type>`
5.. |struct class| replace:: :c:type:`struct class <class>`
6.. |struct wakeup_source| replace:: :c:type:`struct wakeup_source <wakeup_source>`
7.. |struct device| replace:: :c:type:`struct device <device>`
8
9==============================
10Device Power Management Basics
11==============================
12
13::
14
15 Copyright (c) 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
16 Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu>
17 Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
18
19Most of the code in Linux is device drivers, so most of the Linux power
20management (PM) code is also driver-specific. Most drivers will do very
21little; others, especially for platforms with small batteries (like cell
22phones), will do a lot.
23
24This writeup gives an overview of how drivers interact with system-wide
25power management goals, emphasizing the models and interfaces that are
26shared by everything that hooks up to the driver model core. Read it as
27background for the domain-specific work you'd do with any specific driver.
28
29
30Two Models for Device Power Management
31======================================
32
33Drivers will use one or both of these models to put devices into low-power
34states:
35
36 System Sleep model:
37
38 Drivers can enter low-power states as part of entering system-wide
39 low-power states like "suspend" (also known as "suspend-to-RAM"), or
40 (mostly for systems with disks) "hibernation" (also known as
41 "suspend-to-disk").
42
43 This is something that device, bus, and class drivers collaborate on
44 by implementing various role-specific suspend and resume methods to
45 cleanly power down hardware and software subsystems, then reactivate
46 them without loss of data.
47
48 Some drivers can manage hardware wakeup events, which make the system
49 leave the low-power state. This feature may be enabled or disabled
50 using the relevant :file:`/sys/devices/.../power/wakeup` file (for
51 Ethernet drivers the ioctl interface used by ethtool may also be used
52 for this purpose); enabling it may cost some power usage, but let the
53 whole system enter low-power states more often.
54
55 Runtime Power Management model:
56
57 Devices may also be put into low-power states while the system is
58 running, independently of other power management activity in principle.
59 However, devices are not generally independent of each other (for
60 example, a parent device cannot be suspended unless all of its child
61 devices have been suspended). Moreover, depending on the bus type the
62 device is on, it may be necessary to carry out some bus-specific
63 operations on the device for this purpose. Devices put into low power
64 states at run time may require special handling during system-wide power
65 transitions (suspend or hibernation).
66
67 For these reasons not only the device driver itself, but also the
68 appropriate subsystem (bus type, device type or device class) driver and
69 the PM core are involved in runtime power management. As in the system
70 sleep power management case, they need to collaborate by implementing
71 various role-specific suspend and resume methods, so that the hardware
72 is cleanly powered down and reactivated without data or service loss.
73
74There's not a lot to be said about those low-power states except that they are
75very system-specific, and often device-specific. Also, that if enough devices
76have been put into low-power states (at runtime), the effect may be very similar
77to entering some system-wide low-power state (system sleep) ... and that
78synergies exist, so that several drivers using runtime PM might put the system
79into a state where even deeper power saving options are available.
80
81Most suspended devices will have quiesced all I/O: no more DMA or IRQs (except
82for wakeup events), no more data read or written, and requests from upstream
83drivers are no longer accepted. A given bus or platform may have different
84requirements though.
85
86Examples of hardware wakeup events include an alarm from a real time clock,
87network wake-on-LAN packets, keyboard or mouse activity, and media insertion
88or removal (for PCMCIA, MMC/SD, USB, and so on).
89
90Interfaces for Entering System Sleep States
91===========================================
92
93There are programming interfaces provided for subsystems (bus type, device type,
94device class) and device drivers to allow them to participate in the power
95management of devices they are concerned with. These interfaces cover both
96system sleep and runtime power management.
97
98
99Device Power Management Operations
100----------------------------------
101
102Device power management operations, at the subsystem level as well as at the
103device driver level, are implemented by defining and populating objects of type
104|struct dev_pm_ops| defined in :file:`include/linux/pm.h`. The roles of the
105methods included in it will be explained in what follows. For now, it should be
106sufficient to remember that the last three methods are specific to runtime power
107management while the remaining ones are used during system-wide power
108transitions.
109
110There also is a deprecated "old" or "legacy" interface for power management
111operations available at least for some subsystems. This approach does not use
112|struct dev_pm_ops| objects and it is suitable only for implementing system
113sleep power management methods in a limited way. Therefore it is not described
114in this document, so please refer directly to the source code for more
115information about it.
116
117
118Subsystem-Level Methods
119-----------------------
120
121The core methods to suspend and resume devices reside in
122|struct dev_pm_ops| pointed to by the :c:member:`ops` member of
123|struct dev_pm_domain|, or by the :c:member:`pm` member of |struct bus_type|,
124|struct device_type| and |struct class|. They are mostly of interest to the
125people writing infrastructure for platforms and buses, like PCI or USB, or
126device type and device class drivers. They also are relevant to the writers of
127device drivers whose subsystems (PM domains, device types, device classes and
128bus types) don't provide all power management methods.
129
130Bus drivers implement these methods as appropriate for the hardware and the
131drivers using it; PCI works differently from USB, and so on. Not many people
132write subsystem-level drivers; most driver code is a "device driver" that builds
133on top of bus-specific framework code.
134
135For more information on these driver calls, see the description later;
136they are called in phases for every device, respecting the parent-child
137sequencing in the driver model tree.
138
139
140:file:`/sys/devices/.../power/wakeup` files
141-------------------------------------------
142
143All device objects in the driver model contain fields that control the handling
144of system wakeup events (hardware signals that can force the system out of a
145sleep state). These fields are initialized by bus or device driver code using
146:c:func:`device_set_wakeup_capable()` and :c:func:`device_set_wakeup_enable()`,
147defined in :file:`include/linux/pm_wakeup.h`.
148
149The :c:member:`power.can_wakeup` flag just records whether the device (and its
150driver) can physically support wakeup events. The
151:c:func:`device_set_wakeup_capable()` routine affects this flag. The
152:c:member:`power.wakeup` field is a pointer to an object of type
153|struct wakeup_source| used for controlling whether or not the device should use
154its system wakeup mechanism and for notifying the PM core of system wakeup
155events signaled by the device. This object is only present for wakeup-capable
156devices (i.e. devices whose :c:member:`can_wakeup` flags are set) and is created
157(or removed) by :c:func:`device_set_wakeup_capable()`.
158
159Whether or not a device is capable of issuing wakeup events is a hardware
160matter, and the kernel is responsible for keeping track of it. By contrast,
161whether or not a wakeup-capable device should issue wakeup events is a policy
162decision, and it is managed by user space through a sysfs attribute: the
163:file:`power/wakeup` file. User space can write the "enabled" or "disabled"
164strings to it to indicate whether or not, respectively, the device is supposed
165to signal system wakeup. This file is only present if the
166:c:member:`power.wakeup` object exists for the given device and is created (or
167removed) along with that object, by :c:func:`device_set_wakeup_capable()`.
168Reads from the file will return the corresponding string.
169
170The initial value in the :file:`power/wakeup` file is "disabled" for the
171majority of devices; the major exceptions are power buttons, keyboards, and
172Ethernet adapters whose WoL (wake-on-LAN) feature has been set up with ethtool.
173It should also default to "enabled" for devices that don't generate wakeup
174requests on their own but merely forward wakeup requests from one bus to another
175(like PCI Express ports).
176
177The :c:func:`device_may_wakeup()` routine returns true only if the
178:c:member:`power.wakeup` object exists and the corresponding :file:`power/wakeup`
179file contains the "enabled" string. This information is used by subsystems,
180like the PCI bus type code, to see whether or not to enable the devices' wakeup
181mechanisms. If device wakeup mechanisms are enabled or disabled directly by
182drivers, they also should use :c:func:`device_may_wakeup()` to decide what to do
183during a system sleep transition. Device drivers, however, are not expected to
184call :c:func:`device_set_wakeup_enable()` directly in any case.
185
186It ought to be noted that system wakeup is conceptually different from "remote
187wakeup" used by runtime power management, although it may be supported by the
188same physical mechanism. Remote wakeup is a feature allowing devices in
189low-power states to trigger specific interrupts to signal conditions in which
190they should be put into the full-power state. Those interrupts may or may not
191be used to signal system wakeup events, depending on the hardware design. On
192some systems it is impossible to trigger them from system sleep states. In any
193case, remote wakeup should always be enabled for runtime power management for
194all devices and drivers that support it.
195
196
197:file:`/sys/devices/.../power/control` files
198--------------------------------------------
199
200Each device in the driver model has a flag to control whether it is subject to
201runtime power management. This flag, :c:member:`runtime_auto`, is initialized
202by the bus type (or generally subsystem) code using :c:func:`pm_runtime_allow()`
203or :c:func:`pm_runtime_forbid()`; the default is to allow runtime power
204management.
205
206The setting can be adjusted by user space by writing either "on" or "auto" to
207the device's :file:`power/control` sysfs file. Writing "auto" calls
208:c:func:`pm_runtime_allow()`, setting the flag and allowing the device to be
209runtime power-managed by its driver. Writing "on" calls
210:c:func:`pm_runtime_forbid()`, clearing the flag, returning the device to full
211power if it was in a low-power state, and preventing the
212device from being runtime power-managed. User space can check the current value
213of the :c:member:`runtime_auto` flag by reading that file.
214
215The device's :c:member:`runtime_auto` flag has no effect on the handling of
216system-wide power transitions. In particular, the device can (and in the
217majority of cases should and will) be put into a low-power state during a
218system-wide transition to a sleep state even though its :c:member:`runtime_auto`
219flag is clear.
220
221For more information about the runtime power management framework, refer to
222:file:`Documentation/power/runtime_pm.txt`.
223
224
225Calling Drivers to Enter and Leave System Sleep States
226======================================================
227
228When the system goes into a sleep state, each device's driver is asked to
229suspend the device by putting it into a state compatible with the target
230system state. That's usually some version of "off", but the details are
231system-specific. Also, wakeup-enabled devices will usually stay partly
232functional in order to wake the system.
233
234When the system leaves that low-power state, the device's driver is asked to
235resume it by returning it to full power. The suspend and resume operations
236always go together, and both are multi-phase operations.
237
238For simple drivers, suspend might quiesce the device using class code
239and then turn its hardware as "off" as possible during suspend_noirq. The
240matching resume calls would then completely reinitialize the hardware
241before reactivating its class I/O queues.
242
243More power-aware drivers might prepare the devices for triggering system wakeup
244events.
245
246
247Call Sequence Guarantees
248------------------------
249
250To ensure that bridges and similar links needing to talk to a device are
251available when the device is suspended or resumed, the device hierarchy is
252walked in a bottom-up order to suspend devices. A top-down order is
253used to resume those devices.
254
255The ordering of the device hierarchy is defined by the order in which devices
256get registered: a child can never be registered, probed or resumed before
257its parent; and can't be removed or suspended after that parent.
258
259The policy is that the device hierarchy should match hardware bus topology.
260[Or at least the control bus, for devices which use multiple busses.]
261In particular, this means that a device registration may fail if the parent of
262the device is suspending (i.e. has been chosen by the PM core as the next
263device to suspend) or has already suspended, as well as after all of the other
264devices have been suspended. Device drivers must be prepared to cope with such
265situations.
266
267
268System Power Management Phases
269------------------------------
270
271Suspending or resuming the system is done in several phases. Different phases
272are used for suspend-to-idle, shallow (standby), and deep ("suspend-to-RAM")
273sleep states and the hibernation state ("suspend-to-disk"). Each phase involves
274executing callbacks for every device before the next phase begins. Not all
275buses or classes support all these callbacks and not all drivers use all the
276callbacks. The various phases always run after tasks have been frozen and
277before they are unfrozen. Furthermore, the ``*_noirq phases`` run at a time
278when IRQ handlers have been disabled (except for those marked with the
279IRQF_NO_SUSPEND flag).
280
281All phases use PM domain, bus, type, class or driver callbacks (that is, methods
282defined in ``dev->pm_domain->ops``, ``dev->bus->pm``, ``dev->type->pm``,
283``dev->class->pm`` or ``dev->driver->pm``). These callbacks are regarded by the
284PM core as mutually exclusive. Moreover, PM domain callbacks always take
285precedence over all of the other callbacks and, for example, type callbacks take
286precedence over bus, class and driver callbacks. To be precise, the following
287rules are used to determine which callback to execute in the given phase:
288
289 1. If ``dev->pm_domain`` is present, the PM core will choose the callback
290 provided by ``dev->pm_domain->ops`` for execution.
291
292 2. Otherwise, if both ``dev->type`` and ``dev->type->pm`` are present, the
293 callback provided by ``dev->type->pm`` will be chosen for execution.
294
295 3. Otherwise, if both ``dev->class`` and ``dev->class->pm`` are present,
296 the callback provided by ``dev->class->pm`` will be chosen for
297 execution.
298
299 4. Otherwise, if both ``dev->bus`` and ``dev->bus->pm`` are present, the
300 callback provided by ``dev->bus->pm`` will be chosen for execution.
301
302This allows PM domains and device types to override callbacks provided by bus
303types or device classes if necessary.
304
305The PM domain, type, class and bus callbacks may in turn invoke device- or
306driver-specific methods stored in ``dev->driver->pm``, but they don't have to do
307that.
308
309If the subsystem callback chosen for execution is not present, the PM core will
310execute the corresponding method from the ``dev->driver->pm`` set instead if
311there is one.
312
313
314Entering System Suspend
315-----------------------
316
317When the system goes into the freeze, standby or memory sleep state,
318the phases are: ``prepare``, ``suspend``, ``suspend_late``, ``suspend_noirq``.
319
320 1. The ``prepare`` phase is meant to prevent races by preventing new
321 devices from being registered; the PM core would never know that all the
322 children of a device had been suspended if new children could be
323 registered at will. [By contrast, from the PM core's perspective,
324 devices may be unregistered at any time.] Unlike the other
325 suspend-related phases, during the ``prepare`` phase the device
326 hierarchy is traversed top-down.
327
328 After the ``->prepare`` callback method returns, no new children may be
329 registered below the device. The method may also prepare the device or
330 driver in some way for the upcoming system power transition, but it
331 should not put the device into a low-power state.
332
333 For devices supporting runtime power management, the return value of the
334 prepare callback can be used to indicate to the PM core that it may
335 safely leave the device in runtime suspend (if runtime-suspended
336 already), provided that all of the device's descendants are also left in
337 runtime suspend. Namely, if the prepare callback returns a positive
338 number and that happens for all of the descendants of the device too,
339 and all of them (including the device itself) are runtime-suspended, the
340 PM core will skip the ``suspend``, ``suspend_late`` and
341 ``suspend_noirq`` phases as well as all of the corresponding phases of
342 the subsequent device resume for all of these devices. In that case,
343 the ``->complete`` callback will be invoked directly after the
344 ``->prepare`` callback and is entirely responsible for putting the
345 device into a consistent state as appropriate.
346
347 Note that this direct-complete procedure applies even if the device is
348 disabled for runtime PM; only the runtime-PM status matters. It follows
349 that if a device has system-sleep callbacks but does not support runtime
350 PM, then its prepare callback must never return a positive value. This
351 is because all such devices are initially set to runtime-suspended with
352 runtime PM disabled.
353
354 2. The ``->suspend`` methods should quiesce the device to stop it from
355 performing I/O. They also may save the device registers and put it into
356 the appropriate low-power state, depending on the bus type the device is
357 on, and they may enable wakeup events.
358
359 3. For a number of devices it is convenient to split suspend into the
360 "quiesce device" and "save device state" phases, in which cases
361 ``suspend_late`` is meant to do the latter. It is always executed after
362 runtime power management has been disabled for the device in question.
363
364 4. The ``suspend_noirq`` phase occurs after IRQ handlers have been disabled,
365 which means that the driver's interrupt handler will not be called while
366 the callback method is running. The ``->suspend_noirq`` methods should
367 save the values of the device's registers that weren't saved previously
368 and finally put the device into the appropriate low-power state.
369
370 The majority of subsystems and device drivers need not implement this
371 callback. However, bus types allowing devices to share interrupt
372 vectors, like PCI, generally need it; otherwise a driver might encounter
373 an error during the suspend phase by fielding a shared interrupt
374 generated by some other device after its own device had been set to low
375 power.
376
377At the end of these phases, drivers should have stopped all I/O transactions
378(DMA, IRQs), saved enough state that they can re-initialize or restore previous
379state (as needed by the hardware), and placed the device into a low-power state.
380On many platforms they will gate off one or more clock sources; sometimes they
381will also switch off power supplies or reduce voltages. [Drivers supporting
382runtime PM may already have performed some or all of these steps.]
383
384If :c:func:`device_may_wakeup(dev)` returns ``true``, the device should be
385prepared for generating hardware wakeup signals to trigger a system wakeup event
386when the system is in the sleep state. For example, :c:func:`enable_irq_wake()`
387might identify GPIO signals hooked up to a switch or other external hardware,
388and :c:func:`pci_enable_wake()` does something similar for the PCI PME signal.
389
390If any of these callbacks returns an error, the system won't enter the desired
391low-power state. Instead, the PM core will unwind its actions by resuming all
392the devices that were suspended.
393
394
395Leaving System Suspend
396----------------------
397
398When resuming from freeze, standby or memory sleep, the phases are:
399``resume_noirq``, ``resume_early``, ``resume``, ``complete``.
400
401 1. The ``->resume_noirq`` callback methods should perform any actions
402 needed before the driver's interrupt handlers are invoked. This
403 generally means undoing the actions of the ``suspend_noirq`` phase. If
404 the bus type permits devices to share interrupt vectors, like PCI, the
405 method should bring the device and its driver into a state in which the
406 driver can recognize if the device is the source of incoming interrupts,
407 if any, and handle them correctly.
408
409 For example, the PCI bus type's ``->pm.resume_noirq()`` puts the device
410 into the full-power state (D0 in the PCI terminology) and restores the
411 standard configuration registers of the device. Then it calls the
412 device driver's ``->pm.resume_noirq()`` method to perform device-specific
413 actions.
414
415 2. The ``->resume_early`` methods should prepare devices for the execution
416 of the resume methods. This generally involves undoing the actions of
417 the preceding ``suspend_late`` phase.
418
419 3. The ``->resume`` methods should bring the device back to its operating
420 state, so that it can perform normal I/O. This generally involves
421 undoing the actions of the ``suspend`` phase.
422
423 4. The ``complete`` phase should undo the actions of the ``prepare`` phase.
424 For this reason, unlike the other resume-related phases, during the
425 ``complete`` phase the device hierarchy is traversed bottom-up.
426
427 Note, however, that new children may be registered below the device as
428 soon as the ``->resume`` callbacks occur; it's not necessary to wait
429 until the ``complete`` phase with that.
430
431 Moreover, if the preceding ``->prepare`` callback returned a positive
432 number, the device may have been left in runtime suspend throughout the
433 whole system suspend and resume (the ``suspend``, ``suspend_late``,
434 ``suspend_noirq`` phases of system suspend and the ``resume_noirq``,
435 ``resume_early``, ``resume`` phases of system resume may have been
436 skipped for it). In that case, the ``->complete`` callback is entirely
437 responsible for putting the device into a consistent state after system
438 suspend if necessary. [For example, it may need to queue up a runtime
439 resume request for the device for this purpose.] To check if that is
440 the case, the ``->complete`` callback can consult the device's
441 ``power.direct_complete`` flag. Namely, if that flag is set when the
442 ``->complete`` callback is being run, it has been called directly after
443 the preceding ``->prepare`` and special actions may be required
444 to make the device work correctly afterward.
445
446At the end of these phases, drivers should be as functional as they were before
447suspending: I/O can be performed using DMA and IRQs, and the relevant clocks are
448gated on.
449
450However, the details here may again be platform-specific. For example,
451some systems support multiple "run" states, and the mode in effect at
452the end of resume might not be the one which preceded suspension.
453That means availability of certain clocks or power supplies changed,
454which could easily affect how a driver works.
455
456Drivers need to be able to handle hardware which has been reset since all of the
457suspend methods were called, for example by complete reinitialization.
458This may be the hardest part, and the one most protected by NDA'd documents
459and chip errata. It's simplest if the hardware state hasn't changed since
460the suspend was carried out, but that can only be guaranteed if the target
461system sleep entered was suspend-to-idle. For the other system sleep states
462that may not be the case (and usually isn't for ACPI-defined system sleep
463states, like S3).
464
465Drivers must also be prepared to notice that the device has been removed
466while the system was powered down, whenever that's physically possible.
467PCMCIA, MMC, USB, Firewire, SCSI, and even IDE are common examples of busses
468where common Linux platforms will see such removal. Details of how drivers
469will notice and handle such removals are currently bus-specific, and often
470involve a separate thread.
471
472These callbacks may return an error value, but the PM core will ignore such
473errors since there's nothing it can do about them other than printing them in
474the system log.
475
476
477Entering Hibernation
478--------------------
479
480Hibernating the system is more complicated than putting it into sleep states,
481because it involves creating and saving a system image. Therefore there are
482more phases for hibernation, with a different set of callbacks. These phases
483always run after tasks have been frozen and enough memory has been freed.
484
485The general procedure for hibernation is to quiesce all devices ("freeze"),
486create an image of the system memory while everything is stable, reactivate all
487devices ("thaw"), write the image to permanent storage, and finally shut down
488the system ("power off"). The phases used to accomplish this are: ``prepare``,
489``freeze``, ``freeze_late``, ``freeze_noirq``, ``thaw_noirq``, ``thaw_early``,
490``thaw``, ``complete``, ``prepare``, ``poweroff``, ``poweroff_late``,
491``poweroff_noirq``.
492
493 1. The ``prepare`` phase is discussed in the "Entering System Suspend"
494 section above.
495
496 2. The ``->freeze`` methods should quiesce the device so that it doesn't
497 generate IRQs or DMA, and they may need to save the values of device
498 registers. However the device does not have to be put in a low-power
499 state, and to save time it's best not to do so. Also, the device should
500 not be prepared to generate wakeup events.
501
502 3. The ``freeze_late`` phase is analogous to the ``suspend_late`` phase
503 described earlier, except that the device should not be put into a
504 low-power state and should not be allowed to generate wakeup events.
505
506 4. The ``freeze_noirq`` phase is analogous to the ``suspend_noirq`` phase
507 discussed earlier, except again that the device should not be put into
508 a low-power state and should not be allowed to generate wakeup events.
509
510At this point the system image is created. All devices should be inactive and
511the contents of memory should remain undisturbed while this happens, so that the
512image forms an atomic snapshot of the system state.
513
514 5. The ``thaw_noirq`` phase is analogous to the ``resume_noirq`` phase
515 discussed earlier. The main difference is that its methods can assume
516 the device is in the same state as at the end of the ``freeze_noirq``
517 phase.
518
519 6. The ``thaw_early`` phase is analogous to the ``resume_early`` phase
520 described above. Its methods should undo the actions of the preceding
521 ``freeze_late``, if necessary.
522
523 7. The ``thaw`` phase is analogous to the ``resume`` phase discussed
524 earlier. Its methods should bring the device back to an operating
525 state, so that it can be used for saving the image if necessary.
526
527 8. The ``complete`` phase is discussed in the "Leaving System Suspend"
528 section above.
529
530At this point the system image is saved, and the devices then need to be
531prepared for the upcoming system shutdown. This is much like suspending them
532before putting the system into the suspend-to-idle, shallow or deep sleep state,
533and the phases are similar.
534
535 9. The ``prepare`` phase is discussed above.
536
537 10. The ``poweroff`` phase is analogous to the ``suspend`` phase.
538
539 11. The ``poweroff_late`` phase is analogous to the ``suspend_late`` phase.
540
541 12. The ``poweroff_noirq`` phase is analogous to the ``suspend_noirq`` phase.
542
543The ``->poweroff``, ``->poweroff_late`` and ``->poweroff_noirq`` callbacks
544should do essentially the same things as the ``->suspend``, ``->suspend_late``
545and ``->suspend_noirq`` callbacks, respectively. The only notable difference is
546that they need not store the device register values, because the registers
547should already have been stored during the ``freeze``, ``freeze_late`` or
548``freeze_noirq`` phases.
549
550
551Leaving Hibernation
552-------------------
553
554Resuming from hibernation is, again, more complicated than resuming from a sleep
555state in which the contents of main memory are preserved, because it requires
556a system image to be loaded into memory and the pre-hibernation memory contents
557to be restored before control can be passed back to the image kernel.
558
559Although in principle the image might be loaded into memory and the
560pre-hibernation memory contents restored by the boot loader, in practice this
561can't be done because boot loaders aren't smart enough and there is no
562established protocol for passing the necessary information. So instead, the
563boot loader loads a fresh instance of the kernel, called "the restore kernel",
564into memory and passes control to it in the usual way. Then the restore kernel
565reads the system image, restores the pre-hibernation memory contents, and passes
566control to the image kernel. Thus two different kernel instances are involved
567in resuming from hibernation. In fact, the restore kernel may be completely
568different from the image kernel: a different configuration and even a different
569version. This has important consequences for device drivers and their
570subsystems.
571
572To be able to load the system image into memory, the restore kernel needs to
573include at least a subset of device drivers allowing it to access the storage
574medium containing the image, although it doesn't need to include all of the
575drivers present in the image kernel. After the image has been loaded, the
576devices managed by the boot kernel need to be prepared for passing control back
577to the image kernel. This is very similar to the initial steps involved in
578creating a system image, and it is accomplished in the same way, using
579``prepare``, ``freeze``, and ``freeze_noirq`` phases. However, the devices
580affected by these phases are only those having drivers in the restore kernel;
581other devices will still be in whatever state the boot loader left them.
582
583Should the restoration of the pre-hibernation memory contents fail, the restore
584kernel would go through the "thawing" procedure described above, using the
585``thaw_noirq``, ``thaw_early``, ``thaw``, and ``complete`` phases, and then
586continue running normally. This happens only rarely. Most often the
587pre-hibernation memory contents are restored successfully and control is passed
588to the image kernel, which then becomes responsible for bringing the system back
589to the working state.
590
591To achieve this, the image kernel must restore the devices' pre-hibernation
592functionality. The operation is much like waking up from a sleep state (with
593the memory contents preserved), although it involves different phases:
594``restore_noirq``, ``restore_early``, ``restore``, ``complete``.
595
596 1. The ``restore_noirq`` phase is analogous to the ``resume_noirq`` phase.
597
598 2. The ``restore_early`` phase is analogous to the ``resume_early`` phase.
599
600 3. The ``restore`` phase is analogous to the ``resume`` phase.
601
602 4. The ``complete`` phase is discussed above.
603
604The main difference from ``resume[_early|_noirq]`` is that
605``restore[_early|_noirq]`` must assume the device has been accessed and
606reconfigured by the boot loader or the restore kernel. Consequently, the state
607of the device may be different from the state remembered from the ``freeze``,
608``freeze_late`` and ``freeze_noirq`` phases. The device may even need to be
609reset and completely re-initialized. In many cases this difference doesn't
610matter, so the ``->resume[_early|_noirq]`` and ``->restore[_early|_norq]``
611method pointers can be set to the same routines. Nevertheless, different
612callback pointers are used in case there is a situation where it actually does
613matter.
614
615
616Power Management Notifiers
617==========================
618
619There are some operations that cannot be carried out by the power management
620callbacks discussed above, because the callbacks occur too late or too early.
621To handle these cases, subsystems and device drivers may register power
622management notifiers that are called before tasks are frozen and after they have
623been thawed. Generally speaking, the PM notifiers are suitable for performing
624actions that either require user space to be available, or at least won't
625interfere with user space.
626
627For details refer to :doc:`notifiers`.
628
629
630Device Low-Power (suspend) States
631=================================
632
633Device low-power states aren't standard. One device might only handle
634"on" and "off", while another might support a dozen different versions of
635"on" (how many engines are active?), plus a state that gets back to "on"
636faster than from a full "off".
637
638Some buses define rules about what different suspend states mean. PCI
639gives one example: after the suspend sequence completes, a non-legacy
640PCI device may not perform DMA or issue IRQs, and any wakeup events it
641issues would be issued through the PME# bus signal. Plus, there are
642several PCI-standard device states, some of which are optional.
643
644In contrast, integrated system-on-chip processors often use IRQs as the
645wakeup event sources (so drivers would call :c:func:`enable_irq_wake`) and
646might be able to treat DMA completion as a wakeup event (sometimes DMA can stay
647active too, it'd only be the CPU and some peripherals that sleep).
648
649Some details here may be platform-specific. Systems may have devices that
650can be fully active in certain sleep states, such as an LCD display that's
651refreshed using DMA while most of the system is sleeping lightly ... and
652its frame buffer might even be updated by a DSP or other non-Linux CPU while
653the Linux control processor stays idle.
654
655Moreover, the specific actions taken may depend on the target system state.
656One target system state might allow a given device to be very operational;
657another might require a hard shut down with re-initialization on resume.
658And two different target systems might use the same device in different
659ways; the aforementioned LCD might be active in one product's "standby",
660but a different product using the same SOC might work differently.
661
662
663Device Power Management Domains
664===============================
665
666Sometimes devices share reference clocks or other power resources. In those
667cases it generally is not possible to put devices into low-power states
668individually. Instead, a set of devices sharing a power resource can be put
669into a low-power state together at the same time by turning off the shared
670power resource. Of course, they also need to be put into the full-power state
671together, by turning the shared power resource on. A set of devices with this
672property is often referred to as a power domain. A power domain may also be
673nested inside another power domain. The nested domain is referred to as the
674sub-domain of the parent domain.
675
676Support for power domains is provided through the :c:member:`pm_domain` field of
677|struct device|. This field is a pointer to an object of type
678|struct dev_pm_domain|, defined in :file:`include/linux/pm.h``, providing a set
679of power management callbacks analogous to the subsystem-level and device driver
680callbacks that are executed for the given device during all power transitions,
681instead of the respective subsystem-level callbacks. Specifically, if a
682device's :c:member:`pm_domain` pointer is not NULL, the ``->suspend()`` callback
683from the object pointed to by it will be executed instead of its subsystem's
684(e.g. bus type's) ``->suspend()`` callback and analogously for all of the
685remaining callbacks. In other words, power management domain callbacks, if
686defined for the given device, always take precedence over the callbacks provided
687by the device's subsystem (e.g. bus type).
688
689The support for device power management domains is only relevant to platforms
690needing to use the same device driver power management callbacks in many
691different power domain configurations and wanting to avoid incorporating the
692support for power domains into subsystem-level callbacks, for example by
693modifying the platform bus type. Other platforms need not implement it or take
694it into account in any way.
695
696Devices may be defined as IRQ-safe which indicates to the PM core that their
697runtime PM callbacks may be invoked with disabled interrupts (see
698:file:`Documentation/power/runtime_pm.txt` for more information). If an
699IRQ-safe device belongs to a PM domain, the runtime PM of the domain will be
700disallowed, unless the domain itself is defined as IRQ-safe. However, it
701makes sense to define a PM domain as IRQ-safe only if all the devices in it
702are IRQ-safe. Moreover, if an IRQ-safe domain has a parent domain, the runtime
703PM of the parent is only allowed if the parent itself is IRQ-safe too with the
704additional restriction that all child domains of an IRQ-safe parent must also
705be IRQ-safe.
706
707
708Runtime Power Management
709========================
710
711Many devices are able to dynamically power down while the system is still
712running. This feature is useful for devices that are not being used, and
713can offer significant power savings on a running system. These devices
714often support a range of runtime power states, which might use names such
715as "off", "sleep", "idle", "active", and so on. Those states will in some
716cases (like PCI) be partially constrained by the bus the device uses, and will
717usually include hardware states that are also used in system sleep states.
718
719A system-wide power transition can be started while some devices are in low
720power states due to runtime power management. The system sleep PM callbacks
721should recognize such situations and react to them appropriately, but the
722necessary actions are subsystem-specific.
723
724In some cases the decision may be made at the subsystem level while in other
725cases the device driver may be left to decide. In some cases it may be
726desirable to leave a suspended device in that state during a system-wide power
727transition, but in other cases the device must be put back into the full-power
728state temporarily, for example so that its system wakeup capability can be
729disabled. This all depends on the hardware and the design of the subsystem and
730device driver in question.
731
732During system-wide resume from a sleep state it's easiest to put devices into
733the full-power state, as explained in :file:`Documentation/power/runtime_pm.txt`.
734Refer to that document for more information regarding this particular issue as
735well as for information on the device runtime power management framework in
736general.
diff --git a/Documentation/driver-api/pm/index.rst b/Documentation/driver-api/pm/index.rst
new file mode 100644
index 000000000000..2f6d0e9cf6b7
--- /dev/null
+++ b/Documentation/driver-api/pm/index.rst
@@ -0,0 +1,16 @@
1=======================
2Device Power Management
3=======================
4
5.. toctree::
6
7 devices
8 notifiers
9 types
10
11.. only:: subproject and html
12
13 Indices
14 =======
15
16 * :ref:`genindex`
diff --git a/Documentation/driver-api/pm/notifiers.rst b/Documentation/driver-api/pm/notifiers.rst
new file mode 100644
index 000000000000..62f860026992
--- /dev/null
+++ b/Documentation/driver-api/pm/notifiers.rst
@@ -0,0 +1,70 @@
1=============================
2Suspend/Hibernation Notifiers
3=============================
4
5::
6
7 Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
8
9There are some operations that subsystems or drivers may want to carry out
10before hibernation/suspend or after restore/resume, but they require the system
11to be fully functional, so the drivers' and subsystems' ``->suspend()`` and
12``->resume()`` or even ``->prepare()`` and ``->complete()`` callbacks are not
13suitable for this purpose.
14
15For example, device drivers may want to upload firmware to their devices after
16resume/restore, but they cannot do it by calling :c:func:`request_firmware()`
17from their ``->resume()`` or ``->complete()`` callback routines (user land
18processes are frozen at these points). The solution may be to load the firmware
19into memory before processes are frozen and upload it from there in the
20``->resume()`` routine. A suspend/hibernation notifier may be used for that.
21
22Subsystems or drivers having such needs can register suspend notifiers that
23will be called upon the following events by the PM core:
24
25``PM_HIBERNATION_PREPARE``
26 The system is going to hibernate, tasks will be frozen immediately. This
27 is different from ``PM_SUSPEND_PREPARE`` below, because in this case
28 additional work is done between the notifiers and the invocation of PM
29 callbacks for the "freeze" transition.
30
31``PM_POST_HIBERNATION``
32 The system memory state has been restored from a hibernation image or an
33 error occurred during hibernation. Device restore callbacks have been
34 executed and tasks have been thawed.
35
36``PM_RESTORE_PREPARE``
37 The system is going to restore a hibernation image. If all goes well,
38 the restored image kernel will issue a ``PM_POST_HIBERNATION``
39 notification.
40
41``PM_POST_RESTORE``
42 An error occurred during restore from hibernation. Device restore
43 callbacks have been executed and tasks have been thawed.
44
45``PM_SUSPEND_PREPARE``
46 The system is preparing for suspend.
47
48``PM_POST_SUSPEND``
49 The system has just resumed or an error occurred during suspend. Device
50 resume callbacks have been executed and tasks have been thawed.
51
52It is generally assumed that whatever the notifiers do for
53``PM_HIBERNATION_PREPARE``, should be undone for ``PM_POST_HIBERNATION``.
54Analogously, operations carried out for ``PM_SUSPEND_PREPARE`` should be
55reversed for ``PM_POST_SUSPEND``.
56
57Moreover, if one of the notifiers fails for the ``PM_HIBERNATION_PREPARE`` or
58``PM_SUSPEND_PREPARE`` event, the notifiers that have already succeeded for that
59event will be called for ``PM_POST_HIBERNATION`` or ``PM_POST_SUSPEND``,
60respectively.
61
62The hibernation and suspend notifiers are called with :c:data:`pm_mutex` held.
63They are defined in the usual way, but their last argument is meaningless (it is
64always NULL).
65
66To register and/or unregister a suspend notifier use
67:c:func:`register_pm_notifier()` and :c:func:`unregister_pm_notifier()`,
68respectively (both defined in :file:`include/linux/suspend.h`). If you don't
69need to unregister the notifier, you can also use the :c:func:`pm_notifier()`
70macro defined in :file:`include/linux/suspend.h`.
diff --git a/Documentation/driver-api/pm/types.rst b/Documentation/driver-api/pm/types.rst
new file mode 100644
index 000000000000..3ebdecc54104
--- /dev/null
+++ b/Documentation/driver-api/pm/types.rst
@@ -0,0 +1,5 @@
1==================================
2Device Power Management Data Types
3==================================
4
5.. kernel-doc:: include/linux/pm.h
diff --git a/Documentation/driver-api/regulator.rst b/Documentation/driver-api/regulator.rst
new file mode 100644
index 000000000000..520da0a5251d
--- /dev/null
+++ b/Documentation/driver-api/regulator.rst
@@ -0,0 +1,170 @@
1.. Copyright 2007-2008 Wolfson Microelectronics
2
3.. This documentation is free software; you can redistribute
4.. it and/or modify it under the terms of the GNU General Public
5.. License version 2 as published by the Free Software Foundation.
6
7=================================
8Voltage and current regulator API
9=================================
10
11:Author: Liam Girdwood
12:Author: Mark Brown
13
14Introduction
15============
16
17This framework is designed to provide a standard kernel interface to
18control voltage and current regulators.
19
20The intention is to allow systems to dynamically control regulator power
21output in order to save power and prolong battery life. This applies to
22both voltage regulators (where voltage output is controllable) and
23current sinks (where current limit is controllable).
24
25Note that additional (and currently more complete) documentation is
26available in the Linux kernel source under
27``Documentation/power/regulator``.
28
29Glossary
30--------
31
32The regulator API uses a number of terms which may not be familiar:
33
34Regulator
35
36 Electronic device that supplies power to other devices. Most regulators
37 can enable and disable their output and some can also control their
38 output voltage or current.
39
40Consumer
41
42 Electronic device which consumes power provided by a regulator. These
43 may either be static, requiring only a fixed supply, or dynamic,
44 requiring active management of the regulator at runtime.
45
46Power Domain
47
48 The electronic circuit supplied by a given regulator, including the
49 regulator and all consumer devices. The configuration of the regulator
50 is shared between all the components in the circuit.
51
52Power Management Integrated Circuit (PMIC)
53
54 An IC which contains numerous regulators and often also other
55 subsystems. In an embedded system the primary PMIC is often equivalent
56 to a combination of the PSU and southbridge in a desktop system.
57
58Consumer driver interface
59=========================
60
61This offers a similar API to the kernel clock framework. Consumer
62drivers use `get <#API-regulator-get>`__ and
63`put <#API-regulator-put>`__ operations to acquire and release
64regulators. Functions are provided to `enable <#API-regulator-enable>`__
65and `disable <#API-regulator-disable>`__ the regulator and to get and
66set the runtime parameters of the regulator.
67
68When requesting regulators consumers use symbolic names for their
69supplies, such as "Vcc", which are mapped into actual regulator devices
70by the machine interface.
71
72A stub version of this API is provided when the regulator framework is
73not in use in order to minimise the need to use ifdefs.
74
75Enabling and disabling
76----------------------
77
78The regulator API provides reference counted enabling and disabling of
79regulators. Consumer devices use the :c:func:`regulator_enable()` and
80:c:func:`regulator_disable()` functions to enable and disable
81regulators. Calls to the two functions must be balanced.
82
83Note that since multiple consumers may be using a regulator and machine
84constraints may not allow the regulator to be disabled there is no
85guarantee that calling :c:func:`regulator_disable()` will actually
86cause the supply provided by the regulator to be disabled. Consumer
87drivers should assume that the regulator may be enabled at all times.
88
89Configuration
90-------------
91
92Some consumer devices may need to be able to dynamically configure their
93supplies. For example, MMC drivers may need to select the correct
94operating voltage for their cards. This may be done while the regulator
95is enabled or disabled.
96
97The :c:func:`regulator_set_voltage()` and
98:c:func:`regulator_set_current_limit()` functions provide the primary
99interface for this. Both take ranges of voltages and currents, supporting
100drivers that do not require a specific value (eg, CPU frequency scaling
101normally permits the CPU to use a wider range of supply voltages at lower
102frequencies but does not require that the supply voltage be lowered). Where
103an exact value is required both minimum and maximum values should be
104identical.
105
106Callbacks
107---------
108
109Callbacks may also be registered for events such as regulation failures.
110
111Regulator driver interface
112==========================
113
114Drivers for regulator chips register the regulators with the regulator
115core, providing operations structures to the core. A notifier interface
116allows error conditions to be reported to the core.
117
118Registration should be triggered by explicit setup done by the platform,
119supplying a struct :c:type:`regulator_init_data` for the regulator
120containing constraint and supply information.
121
122Machine interface
123=================
124
125This interface provides a way to define how regulators are connected to
126consumers on a given system and what the valid operating parameters are
127for the system.
128
129Supplies
130--------
131
132Regulator supplies are specified using struct
133:c:type:`regulator_consumer_supply`. This is done at driver registration
134time as part of the machine constraints.
135
136Constraints
137-----------
138
139As well as defining the connections the machine interface also provides
140constraints defining the operations that clients are allowed to perform
141and the parameters that may be set. This is required since generally
142regulator devices will offer more flexibility than it is safe to use on
143a given system, for example supporting higher supply voltages than the
144consumers are rated for.
145
146This is done at driver registration time` by providing a
147struct :c:type:`regulation_constraints`.
148
149The constraints may also specify an initial configuration for the
150regulator in the constraints, which is particularly useful for use with
151static consumers.
152
153API reference
154=============
155
156Due to limitations of the kernel documentation framework and the
157existing layout of the source code the entire regulator API is
158documented here.
159
160.. kernel-doc:: include/linux/regulator/consumer.h
161 :internal:
162
163.. kernel-doc:: include/linux/regulator/machine.h
164 :internal:
165
166.. kernel-doc:: include/linux/regulator/driver.h
167 :internal:
168
169.. kernel-doc:: drivers/regulator/core.c
170 :export:
diff --git a/Documentation/hwmon/ds1621 b/Documentation/hwmon/ds1621
index f775e612f582..fa3407997795 100644
--- a/Documentation/hwmon/ds1621
+++ b/Documentation/hwmon/ds1621
@@ -117,10 +117,10 @@ support, which is achieved via the R0 and R1 config register bits, where:
117 117
118R0..R1 118R0..R1
119------ 119------
120 0 0 => 9 bits, 0.5 degrees Celcius 120 0 0 => 9 bits, 0.5 degrees Celsius
121 1 0 => 10 bits, 0.25 degrees Celcius 121 1 0 => 10 bits, 0.25 degrees Celsius
122 0 1 => 11 bits, 0.125 degrees Celcius 122 0 1 => 11 bits, 0.125 degrees Celsius
123 1 1 => 12 bits, 0.0625 degrees Celcius 123 1 1 => 12 bits, 0.0625 degrees Celsius
124 124
125Note: 125Note:
126At initial device power-on, the default resolution is set to 12-bits. 126At initial device power-on, the default resolution is set to 12-bits.
diff --git a/Documentation/index.rst b/Documentation/index.rst
index cb5d77699c60..f6e641a54bbc 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -47,7 +47,7 @@ These books get into the details of how specific kernel subsystems work
47from the point of view of a kernel developer. Much of the information here 47from the point of view of a kernel developer. Much of the information here
48is taken directly from the kernel source, with supplemental material added 48is taken directly from the kernel source, with supplemental material added
49as needed (or at least as we managed to add it — probably *not* all that is 49as needed (or at least as we managed to add it — probably *not* all that is
50needed). 50needed).
51 51
52.. toctree:: 52.. toctree::
53 :maxdepth: 2 53 :maxdepth: 2
@@ -68,6 +68,14 @@ Korean translations
68 68
69 translations/ko_KR/index 69 translations/ko_KR/index
70 70
71Chinese translations
72--------------------
73
74.. toctree::
75 :maxdepth: 1
76
77 translations/zh_CN/index
78
71Indices and tables 79Indices and tables
72================== 80==================
73 81
diff --git a/Documentation/input/input.txt b/Documentation/input/input.txt
index 0acfddbe2028..7ebce100fe90 100644
--- a/Documentation/input/input.txt
+++ b/Documentation/input/input.txt
@@ -279,10 +279,10 @@ struct input_event {
279 279
280 'time' is the timestamp, it returns the time at which the event happened. 280 'time' is the timestamp, it returns the time at which the event happened.
281Type is for example EV_REL for relative moment, EV_KEY for a keypress or 281Type is for example EV_REL for relative moment, EV_KEY for a keypress or
282release. More types are defined in include/linux/input.h. 282release. More types are defined in include/uapi/linux/input-event-codes.h.
283 283
284 'code' is event code, for example REL_X or KEY_BACKSPACE, again a complete 284 'code' is event code, for example REL_X or KEY_BACKSPACE, again a complete
285list is in include/linux/input.h. 285list is in include/uapi/linux/input-event-codes.h.
286 286
287 'value' is the value the event carries. Either a relative change for 287 'value' is the value the event carries. Either a relative change for
288EV_REL, absolute new value for EV_ABS (joysticks ...), or 0 for EV_KEY for 288EV_REL, absolute new value for EV_ABS (joysticks ...), or 0 for EV_KEY for
diff --git a/Documentation/ioctl/botching-up-ioctls.txt b/Documentation/ioctl/botching-up-ioctls.txt
index 36138c632f7a..d02cfb48901c 100644
--- a/Documentation/ioctl/botching-up-ioctls.txt
+++ b/Documentation/ioctl/botching-up-ioctls.txt
@@ -24,7 +24,7 @@ Prerequisites
24------------- 24-------------
25 25
26First the prerequisites. Without these you have already failed, because you 26First the prerequisites. Without these you have already failed, because you
27will need to add a a 32-bit compat layer: 27will need to add a 32-bit compat layer:
28 28
29 * Only use fixed sized integers. To avoid conflicts with typedefs in userspace 29 * Only use fixed sized integers. To avoid conflicts with typedefs in userspace
30 the kernel has special types like __u32, __s64. Use them. 30 the kernel has special types like __u32, __s64. Use them.
diff --git a/Documentation/livepatch/livepatch.txt b/Documentation/livepatch/livepatch.txt
index 7f04e13ec53d..9d2096c7160d 100644
--- a/Documentation/livepatch/livepatch.txt
+++ b/Documentation/livepatch/livepatch.txt
@@ -358,7 +358,7 @@ The current Livepatch implementation has several limitations:
358 Each function has to handle TOC and save LR before it could call 358 Each function has to handle TOC and save LR before it could call
359 the ftrace handler. This operation has to be reverted on return. 359 the ftrace handler. This operation has to be reverted on return.
360 Fortunately, the generic ftrace code has the same problem and all 360 Fortunately, the generic ftrace code has the same problem and all
361 this is is handled on the ftrace level. 361 this is handled on the ftrace level.
362 362
363 363
364 + Kretprobes using the ftrace framework conflict with the patched 364 + Kretprobes using the ftrace framework conflict with the patched
diff --git a/Documentation/media/Makefile b/Documentation/media/Makefile
index 32663602ff25..9b3e70b2cab2 100644
--- a/Documentation/media/Makefile
+++ b/Documentation/media/Makefile
@@ -36,7 +36,7 @@ quiet_cmd_genpdf = GENPDF $2
36 cmd_genpdf = convert $2 $3 36 cmd_genpdf = convert $2 $3
37 37
38quiet_cmd_gendot = DOT $2 38quiet_cmd_gendot = DOT $2
39 cmd_gendot = dot -Tsvg $2 > $3 39 cmd_gendot = dot -Tsvg $2 > $3 || { rm -f $3; exit 1; }
40 40
41%.pdf: %.svg 41%.pdf: %.svg
42 @$(call cmd,genpdf,$<,$@) 42 @$(call cmd,genpdf,$<,$@)
@@ -103,6 +103,7 @@ html: all
103epub: all 103epub: all
104xml: all 104xml: all
105latex: $(IMGPDF) all 105latex: $(IMGPDF) all
106linkcheck:
106 107
107clean: 108clean:
108 -rm -f $(DOTTGT) $(IMGTGT) ${TARGETS} 2>/dev/null 109 -rm -f $(DOTTGT) $(IMGTGT) ${TARGETS} 2>/dev/null
diff --git a/Documentation/networking/kcm.txt b/Documentation/networking/kcm.txt
index 3476ede5bc2c..9a513295b07c 100644
--- a/Documentation/networking/kcm.txt
+++ b/Documentation/networking/kcm.txt
@@ -272,7 +272,7 @@ on the socket thus waking up the application thread. When the application
272sees the error (which may just be a disconnect) it should unattach the 272sees the error (which may just be a disconnect) it should unattach the
273socket from KCM and then close it. It is assumed that once an error is 273socket from KCM and then close it. It is assumed that once an error is
274posted on the TCP socket the data stream is unrecoverable (i.e. an error 274posted on the TCP socket the data stream is unrecoverable (i.e. an error
275may have occurred in in the middle of receiving a messssge). 275may have occurred in the middle of receiving a messssge).
276 276
277TCP connection monitoring 277TCP connection monitoring
278------------------------- 278-------------------------
diff --git a/Documentation/power/00-INDEX b/Documentation/power/00-INDEX
index 7cb6085839f3..7f3c2def2cac 100644
--- a/Documentation/power/00-INDEX
+++ b/Documentation/power/00-INDEX
@@ -14,8 +14,6 @@ freezing-of-tasks.txt
14 - How processes and controlled during suspend 14 - How processes and controlled during suspend
15interface.txt 15interface.txt
16 - Power management user interface in /sys/power 16 - Power management user interface in /sys/power
17notifiers.txt
18 - Registering suspend notifiers in device drivers
19opp.txt 17opp.txt
20 - Operating Performance Point library 18 - Operating Performance Point library
21pci.txt 19pci.txt
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
deleted file mode 100644
index 73ddea39a9ce..000000000000
--- a/Documentation/power/devices.txt
+++ /dev/null
@@ -1,716 +0,0 @@
1Device Power Management
2
3Copyright (c) 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
4Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu>
5Copyright (c) 2014 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
6
7
8Most of the code in Linux is device drivers, so most of the Linux power
9management (PM) code is also driver-specific. Most drivers will do very
10little; others, especially for platforms with small batteries (like cell
11phones), will do a lot.
12
13This writeup gives an overview of how drivers interact with system-wide
14power management goals, emphasizing the models and interfaces that are
15shared by everything that hooks up to the driver model core. Read it as
16background for the domain-specific work you'd do with any specific driver.
17
18
19Two Models for Device Power Management
20======================================
21Drivers will use one or both of these models to put devices into low-power
22states:
23
24 System Sleep model:
25 Drivers can enter low-power states as part of entering system-wide
26 low-power states like "suspend" (also known as "suspend-to-RAM"), or
27 (mostly for systems with disks) "hibernation" (also known as
28 "suspend-to-disk").
29
30 This is something that device, bus, and class drivers collaborate on
31 by implementing various role-specific suspend and resume methods to
32 cleanly power down hardware and software subsystems, then reactivate
33 them without loss of data.
34
35 Some drivers can manage hardware wakeup events, which make the system
36 leave the low-power state. This feature may be enabled or disabled
37 using the relevant /sys/devices/.../power/wakeup file (for Ethernet
38 drivers the ioctl interface used by ethtool may also be used for this
39 purpose); enabling it may cost some power usage, but let the whole
40 system enter low-power states more often.
41
42 Runtime Power Management model:
43 Devices may also be put into low-power states while the system is
44 running, independently of other power management activity in principle.
45 However, devices are not generally independent of each other (for
46 example, a parent device cannot be suspended unless all of its child
47 devices have been suspended). Moreover, depending on the bus type the
48 device is on, it may be necessary to carry out some bus-specific
49 operations on the device for this purpose. Devices put into low power
50 states at run time may require special handling during system-wide power
51 transitions (suspend or hibernation).
52
53 For these reasons not only the device driver itself, but also the
54 appropriate subsystem (bus type, device type or device class) driver and
55 the PM core are involved in runtime power management. As in the system
56 sleep power management case, they need to collaborate by implementing
57 various role-specific suspend and resume methods, so that the hardware
58 is cleanly powered down and reactivated without data or service loss.
59
60There's not a lot to be said about those low-power states except that they are
61very system-specific, and often device-specific. Also, that if enough devices
62have been put into low-power states (at runtime), the effect may be very similar
63to entering some system-wide low-power state (system sleep) ... and that
64synergies exist, so that several drivers using runtime PM might put the system
65into a state where even deeper power saving options are available.
66
67Most suspended devices will have quiesced all I/O: no more DMA or IRQs (except
68for wakeup events), no more data read or written, and requests from upstream
69drivers are no longer accepted. A given bus or platform may have different
70requirements though.
71
72Examples of hardware wakeup events include an alarm from a real time clock,
73network wake-on-LAN packets, keyboard or mouse activity, and media insertion
74or removal (for PCMCIA, MMC/SD, USB, and so on).
75
76
77Interfaces for Entering System Sleep States
78===========================================
79There are programming interfaces provided for subsystems (bus type, device type,
80device class) and device drivers to allow them to participate in the power
81management of devices they are concerned with. These interfaces cover both
82system sleep and runtime power management.
83
84
85Device Power Management Operations
86----------------------------------
87Device power management operations, at the subsystem level as well as at the
88device driver level, are implemented by defining and populating objects of type
89struct dev_pm_ops:
90
91struct dev_pm_ops {
92 int (*prepare)(struct device *dev);
93 void (*complete)(struct device *dev);
94 int (*suspend)(struct device *dev);
95 int (*resume)(struct device *dev);
96 int (*freeze)(struct device *dev);
97 int (*thaw)(struct device *dev);
98 int (*poweroff)(struct device *dev);
99 int (*restore)(struct device *dev);
100 int (*suspend_late)(struct device *dev);
101 int (*resume_early)(struct device *dev);
102 int (*freeze_late)(struct device *dev);
103 int (*thaw_early)(struct device *dev);
104 int (*poweroff_late)(struct device *dev);
105 int (*restore_early)(struct device *dev);
106 int (*suspend_noirq)(struct device *dev);
107 int (*resume_noirq)(struct device *dev);
108 int (*freeze_noirq)(struct device *dev);
109 int (*thaw_noirq)(struct device *dev);
110 int (*poweroff_noirq)(struct device *dev);
111 int (*restore_noirq)(struct device *dev);
112 int (*runtime_suspend)(struct device *dev);
113 int (*runtime_resume)(struct device *dev);
114 int (*runtime_idle)(struct device *dev);
115};
116
117This structure is defined in include/linux/pm.h and the methods included in it
118are also described in that file. Their roles will be explained in what follows.
119For now, it should be sufficient to remember that the last three methods are
120specific to runtime power management while the remaining ones are used during
121system-wide power transitions.
122
123There also is a deprecated "old" or "legacy" interface for power management
124operations available at least for some subsystems. This approach does not use
125struct dev_pm_ops objects and it is suitable only for implementing system sleep
126power management methods. Therefore it is not described in this document, so
127please refer directly to the source code for more information about it.
128
129
130Subsystem-Level Methods
131-----------------------
132The core methods to suspend and resume devices reside in struct dev_pm_ops
133pointed to by the ops member of struct dev_pm_domain, or by the pm member of
134struct bus_type, struct device_type and struct class. They are mostly of
135interest to the people writing infrastructure for platforms and buses, like PCI
136or USB, or device type and device class drivers. They also are relevant to the
137writers of device drivers whose subsystems (PM domains, device types, device
138classes and bus types) don't provide all power management methods.
139
140Bus drivers implement these methods as appropriate for the hardware and the
141drivers using it; PCI works differently from USB, and so on. Not many people
142write subsystem-level drivers; most driver code is a "device driver" that builds
143on top of bus-specific framework code.
144
145For more information on these driver calls, see the description later;
146they are called in phases for every device, respecting the parent-child
147sequencing in the driver model tree.
148
149
150/sys/devices/.../power/wakeup files
151-----------------------------------
152All device objects in the driver model contain fields that control the handling
153of system wakeup events (hardware signals that can force the system out of a
154sleep state). These fields are initialized by bus or device driver code using
155device_set_wakeup_capable() and device_set_wakeup_enable(), defined in
156include/linux/pm_wakeup.h.
157
158The "power.can_wakeup" flag just records whether the device (and its driver) can
159physically support wakeup events. The device_set_wakeup_capable() routine
160affects this flag. The "power.wakeup" field is a pointer to an object of type
161struct wakeup_source used for controlling whether or not the device should use
162its system wakeup mechanism and for notifying the PM core of system wakeup
163events signaled by the device. This object is only present for wakeup-capable
164devices (i.e. devices whose "can_wakeup" flags are set) and is created (or
165removed) by device_set_wakeup_capable().
166
167Whether or not a device is capable of issuing wakeup events is a hardware
168matter, and the kernel is responsible for keeping track of it. By contrast,
169whether or not a wakeup-capable device should issue wakeup events is a policy
170decision, and it is managed by user space through a sysfs attribute: the
171"power/wakeup" file. User space can write the strings "enabled" or "disabled"
172to it to indicate whether or not, respectively, the device is supposed to signal
173system wakeup. This file is only present if the "power.wakeup" object exists
174for the given device and is created (or removed) along with that object, by
175device_set_wakeup_capable(). Reads from the file will return the corresponding
176string.
177
178The "power/wakeup" file is supposed to contain the "disabled" string initially
179for the majority of devices; the major exceptions are power buttons, keyboards,
180and Ethernet adapters whose WoL (wake-on-LAN) feature has been set up with
181ethtool. It should also default to "enabled" for devices that don't generate
182wakeup requests on their own but merely forward wakeup requests from one bus to
183another (like PCI Express ports).
184
185The device_may_wakeup() routine returns true only if the "power.wakeup" object
186exists and the corresponding "power/wakeup" file contains the string "enabled".
187This information is used by subsystems, like the PCI bus type code, to see
188whether or not to enable the devices' wakeup mechanisms. If device wakeup
189mechanisms are enabled or disabled directly by drivers, they also should use
190device_may_wakeup() to decide what to do during a system sleep transition.
191Device drivers, however, are not supposed to call device_set_wakeup_enable()
192directly in any case.
193
194It ought to be noted that system wakeup is conceptually different from "remote
195wakeup" used by runtime power management, although it may be supported by the
196same physical mechanism. Remote wakeup is a feature allowing devices in
197low-power states to trigger specific interrupts to signal conditions in which
198they should be put into the full-power state. Those interrupts may or may not
199be used to signal system wakeup events, depending on the hardware design. On
200some systems it is impossible to trigger them from system sleep states. In any
201case, remote wakeup should always be enabled for runtime power management for
202all devices and drivers that support it.
203
204/sys/devices/.../power/control files
205------------------------------------
206Each device in the driver model has a flag to control whether it is subject to
207runtime power management. This flag, called runtime_auto, is initialized by the
208bus type (or generally subsystem) code using pm_runtime_allow() or
209pm_runtime_forbid(); the default is to allow runtime power management.
210
211The setting can be adjusted by user space by writing either "on" or "auto" to
212the device's power/control sysfs file. Writing "auto" calls pm_runtime_allow(),
213setting the flag and allowing the device to be runtime power-managed by its
214driver. Writing "on" calls pm_runtime_forbid(), clearing the flag, returning
215the device to full power if it was in a low-power state, and preventing the
216device from being runtime power-managed. User space can check the current value
217of the runtime_auto flag by reading the file.
218
219The device's runtime_auto flag has no effect on the handling of system-wide
220power transitions. In particular, the device can (and in the majority of cases
221should and will) be put into a low-power state during a system-wide transition
222to a sleep state even though its runtime_auto flag is clear.
223
224For more information about the runtime power management framework, refer to
225Documentation/power/runtime_pm.txt.
226
227
228Calling Drivers to Enter and Leave System Sleep States
229======================================================
230When the system goes into a sleep state, each device's driver is asked to
231suspend the device by putting it into a state compatible with the target
232system state. That's usually some version of "off", but the details are
233system-specific. Also, wakeup-enabled devices will usually stay partly
234functional in order to wake the system.
235
236When the system leaves that low-power state, the device's driver is asked to
237resume it by returning it to full power. The suspend and resume operations
238always go together, and both are multi-phase operations.
239
240For simple drivers, suspend might quiesce the device using class code
241and then turn its hardware as "off" as possible during suspend_noirq. The
242matching resume calls would then completely reinitialize the hardware
243before reactivating its class I/O queues.
244
245More power-aware drivers might prepare the devices for triggering system wakeup
246events.
247
248
249Call Sequence Guarantees
250------------------------
251To ensure that bridges and similar links needing to talk to a device are
252available when the device is suspended or resumed, the device tree is
253walked in a bottom-up order to suspend devices. A top-down order is
254used to resume those devices.
255
256The ordering of the device tree is defined by the order in which devices
257get registered: a child can never be registered, probed or resumed before
258its parent; and can't be removed or suspended after that parent.
259
260The policy is that the device tree should match hardware bus topology.
261(Or at least the control bus, for devices which use multiple busses.)
262In particular, this means that a device registration may fail if the parent of
263the device is suspending (i.e. has been chosen by the PM core as the next
264device to suspend) or has already suspended, as well as after all of the other
265devices have been suspended. Device drivers must be prepared to cope with such
266situations.
267
268
269System Power Management Phases
270------------------------------
271Suspending or resuming the system is done in several phases. Different phases
272are used for freeze, standby, and memory sleep states ("suspend-to-RAM") and the
273hibernation state ("suspend-to-disk"). Each phase involves executing callbacks
274for every device before the next phase begins. Not all busses or classes
275support all these callbacks and not all drivers use all the callbacks. The
276various phases always run after tasks have been frozen and before they are
277unfrozen. Furthermore, the *_noirq phases run at a time when IRQ handlers have
278been disabled (except for those marked with the IRQF_NO_SUSPEND flag).
279
280All phases use PM domain, bus, type, class or driver callbacks (that is, methods
281defined in dev->pm_domain->ops, dev->bus->pm, dev->type->pm, dev->class->pm or
282dev->driver->pm). These callbacks are regarded by the PM core as mutually
283exclusive. Moreover, PM domain callbacks always take precedence over all of the
284other callbacks and, for example, type callbacks take precedence over bus, class
285and driver callbacks. To be precise, the following rules are used to determine
286which callback to execute in the given phase:
287
288 1. If dev->pm_domain is present, the PM core will choose the callback
289 included in dev->pm_domain->ops for execution
290
291 2. Otherwise, if both dev->type and dev->type->pm are present, the callback
292 included in dev->type->pm will be chosen for execution.
293
294 3. Otherwise, if both dev->class and dev->class->pm are present, the
295 callback included in dev->class->pm will be chosen for execution.
296
297 4. Otherwise, if both dev->bus and dev->bus->pm are present, the callback
298 included in dev->bus->pm will be chosen for execution.
299
300This allows PM domains and device types to override callbacks provided by bus
301types or device classes if necessary.
302
303The PM domain, type, class and bus callbacks may in turn invoke device- or
304driver-specific methods stored in dev->driver->pm, but they don't have to do
305that.
306
307If the subsystem callback chosen for execution is not present, the PM core will
308execute the corresponding method from dev->driver->pm instead if there is one.
309
310
311Entering System Suspend
312-----------------------
313When the system goes into the freeze, standby or memory sleep state,
314the phases are:
315
316 prepare, suspend, suspend_late, suspend_noirq.
317
318 1. The prepare phase is meant to prevent races by preventing new devices
319 from being registered; the PM core would never know that all the
320 children of a device had been suspended if new children could be
321 registered at will. (By contrast, devices may be unregistered at any
322 time.) Unlike the other suspend-related phases, during the prepare
323 phase the device tree is traversed top-down.
324
325 After the prepare callback method returns, no new children may be
326 registered below the device. The method may also prepare the device or
327 driver in some way for the upcoming system power transition, but it
328 should not put the device into a low-power state.
329
330 For devices supporting runtime power management, the return value of the
331 prepare callback can be used to indicate to the PM core that it may
332 safely leave the device in runtime suspend (if runtime-suspended
333 already), provided that all of the device's descendants are also left in
334 runtime suspend. Namely, if the prepare callback returns a positive
335 number and that happens for all of the descendants of the device too,
336 and all of them (including the device itself) are runtime-suspended, the
337 PM core will skip the suspend, suspend_late and suspend_noirq suspend
338 phases as well as the resume_noirq, resume_early and resume phases of
339 the following system resume for all of these devices. In that case,
340 the complete callback will be called directly after the prepare callback
341 and is entirely responsible for bringing the device back to the
342 functional state as appropriate.
343
344 Note that this direct-complete procedure applies even if the device is
345 disabled for runtime PM; only the runtime-PM status matters. It follows
346 that if a device has system-sleep callbacks but does not support runtime
347 PM, then its prepare callback must never return a positive value. This
348 is because all devices are initially set to runtime-suspended with
349 runtime PM disabled.
350
351 2. The suspend methods should quiesce the device to stop it from performing
352 I/O. They also may save the device registers and put it into the
353 appropriate low-power state, depending on the bus type the device is on,
354 and they may enable wakeup events.
355
356 3 For a number of devices it is convenient to split suspend into the
357 "quiesce device" and "save device state" phases, in which cases
358 suspend_late is meant to do the latter. It is always executed after
359 runtime power management has been disabled for all devices.
360
361 4. The suspend_noirq phase occurs after IRQ handlers have been disabled,
362 which means that the driver's interrupt handler will not be called while
363 the callback method is running. The methods should save the values of
364 the device's registers that weren't saved previously and finally put the
365 device into the appropriate low-power state.
366
367 The majority of subsystems and device drivers need not implement this
368 callback. However, bus types allowing devices to share interrupt
369 vectors, like PCI, generally need it; otherwise a driver might encounter
370 an error during the suspend phase by fielding a shared interrupt
371 generated by some other device after its own device had been set to low
372 power.
373
374At the end of these phases, drivers should have stopped all I/O transactions
375(DMA, IRQs), saved enough state that they can re-initialize or restore previous
376state (as needed by the hardware), and placed the device into a low-power state.
377On many platforms they will gate off one or more clock sources; sometimes they
378will also switch off power supplies or reduce voltages. (Drivers supporting
379runtime PM may already have performed some or all of these steps.)
380
381If device_may_wakeup(dev) returns true, the device should be prepared for
382generating hardware wakeup signals to trigger a system wakeup event when the
383system is in the sleep state. For example, enable_irq_wake() might identify
384GPIO signals hooked up to a switch or other external hardware, and
385pci_enable_wake() does something similar for the PCI PME signal.
386
387If any of these callbacks returns an error, the system won't enter the desired
388low-power state. Instead the PM core will unwind its actions by resuming all
389the devices that were suspended.
390
391
392Leaving System Suspend
393----------------------
394When resuming from freeze, standby or memory sleep, the phases are:
395
396 resume_noirq, resume_early, resume, complete.
397
398 1. The resume_noirq callback methods should perform any actions needed
399 before the driver's interrupt handlers are invoked. This generally
400 means undoing the actions of the suspend_noirq phase. If the bus type
401 permits devices to share interrupt vectors, like PCI, the method should
402 bring the device and its driver into a state in which the driver can
403 recognize if the device is the source of incoming interrupts, if any,
404 and handle them correctly.
405
406 For example, the PCI bus type's ->pm.resume_noirq() puts the device into
407 the full-power state (D0 in the PCI terminology) and restores the
408 standard configuration registers of the device. Then it calls the
409 device driver's ->pm.resume_noirq() method to perform device-specific
410 actions.
411
412 2. The resume_early methods should prepare devices for the execution of
413 the resume methods. This generally involves undoing the actions of the
414 preceding suspend_late phase.
415
416 3 The resume methods should bring the device back to its operating
417 state, so that it can perform normal I/O. This generally involves
418 undoing the actions of the suspend phase.
419
420 4. The complete phase should undo the actions of the prepare phase. Note,
421 however, that new children may be registered below the device as soon as
422 the resume callbacks occur; it's not necessary to wait until the
423 complete phase.
424
425 Moreover, if the preceding prepare callback returned a positive number,
426 the device may have been left in runtime suspend throughout the whole
427 system suspend and resume (the suspend, suspend_late, suspend_noirq
428 phases of system suspend and the resume_noirq, resume_early, resume
429 phases of system resume may have been skipped for it). In that case,
430 the complete callback is entirely responsible for bringing the device
431 back to the functional state after system suspend if necessary. [For
432 example, it may need to queue up a runtime resume request for the device
433 for this purpose.] To check if that is the case, the complete callback
434 can consult the device's power.direct_complete flag. Namely, if that
435 flag is set when the complete callback is being run, it has been called
436 directly after the preceding prepare and special action may be required
437 to make the device work correctly afterward.
438
439At the end of these phases, drivers should be as functional as they were before
440suspending: I/O can be performed using DMA and IRQs, and the relevant clocks are
441gated on.
442
443However, the details here may again be platform-specific. For example,
444some systems support multiple "run" states, and the mode in effect at
445the end of resume might not be the one which preceded suspension.
446That means availability of certain clocks or power supplies changed,
447which could easily affect how a driver works.
448
449Drivers need to be able to handle hardware which has been reset since the
450suspend methods were called, for example by complete reinitialization.
451This may be the hardest part, and the one most protected by NDA'd documents
452and chip errata. It's simplest if the hardware state hasn't changed since
453the suspend was carried out, but that can't be guaranteed (in fact, it usually
454is not the case).
455
456Drivers must also be prepared to notice that the device has been removed
457while the system was powered down, whenever that's physically possible.
458PCMCIA, MMC, USB, Firewire, SCSI, and even IDE are common examples of busses
459where common Linux platforms will see such removal. Details of how drivers
460will notice and handle such removals are currently bus-specific, and often
461involve a separate thread.
462
463These callbacks may return an error value, but the PM core will ignore such
464errors since there's nothing it can do about them other than printing them in
465the system log.
466
467
468Entering Hibernation
469--------------------
470Hibernating the system is more complicated than putting it into the other
471sleep states, because it involves creating and saving a system image.
472Therefore there are more phases for hibernation, with a different set of
473callbacks. These phases always run after tasks have been frozen and memory has
474been freed.
475
476The general procedure for hibernation is to quiesce all devices (freeze), create
477an image of the system memory while everything is stable, reactivate all
478devices (thaw), write the image to permanent storage, and finally shut down the
479system (poweroff). The phases used to accomplish this are:
480
481 prepare, freeze, freeze_late, freeze_noirq, thaw_noirq, thaw_early,
482 thaw, complete, prepare, poweroff, poweroff_late, poweroff_noirq
483
484 1. The prepare phase is discussed in the "Entering System Suspend" section
485 above.
486
487 2. The freeze methods should quiesce the device so that it doesn't generate
488 IRQs or DMA, and they may need to save the values of device registers.
489 However the device does not have to be put in a low-power state, and to
490 save time it's best not to do so. Also, the device should not be
491 prepared to generate wakeup events.
492
493 3. The freeze_late phase is analogous to the suspend_late phase described
494 above, except that the device should not be put in a low-power state and
495 should not be allowed to generate wakeup events by it.
496
497 4. The freeze_noirq phase is analogous to the suspend_noirq phase discussed
498 above, except again that the device should not be put in a low-power
499 state and should not be allowed to generate wakeup events.
500
501At this point the system image is created. All devices should be inactive and
502the contents of memory should remain undisturbed while this happens, so that the
503image forms an atomic snapshot of the system state.
504
505 5. The thaw_noirq phase is analogous to the resume_noirq phase discussed
506 above. The main difference is that its methods can assume the device is
507 in the same state as at the end of the freeze_noirq phase.
508
509 6. The thaw_early phase is analogous to the resume_early phase described
510 above. Its methods should undo the actions of the preceding
511 freeze_late, if necessary.
512
513 7. The thaw phase is analogous to the resume phase discussed above. Its
514 methods should bring the device back to an operating state, so that it
515 can be used for saving the image if necessary.
516
517 8. The complete phase is discussed in the "Leaving System Suspend" section
518 above.
519
520At this point the system image is saved, and the devices then need to be
521prepared for the upcoming system shutdown. This is much like suspending them
522before putting the system into the freeze, standby or memory sleep state,
523and the phases are similar.
524
525 9. The prepare phase is discussed above.
526
527 10. The poweroff phase is analogous to the suspend phase.
528
529 11. The poweroff_late phase is analogous to the suspend_late phase.
530
531 12. The poweroff_noirq phase is analogous to the suspend_noirq phase.
532
533The poweroff, poweroff_late and poweroff_noirq callbacks should do essentially
534the same things as the suspend, suspend_late and suspend_noirq callbacks,
535respectively. The only notable difference is that they need not store the
536device register values, because the registers should already have been stored
537during the freeze, freeze_late or freeze_noirq phases.
538
539
540Leaving Hibernation
541-------------------
542Resuming from hibernation is, again, more complicated than resuming from a sleep
543state in which the contents of main memory are preserved, because it requires
544a system image to be loaded into memory and the pre-hibernation memory contents
545to be restored before control can be passed back to the image kernel.
546
547Although in principle, the image might be loaded into memory and the
548pre-hibernation memory contents restored by the boot loader, in practice this
549can't be done because boot loaders aren't smart enough and there is no
550established protocol for passing the necessary information. So instead, the
551boot loader loads a fresh instance of the kernel, called the boot kernel, into
552memory and passes control to it in the usual way. Then the boot kernel reads
553the system image, restores the pre-hibernation memory contents, and passes
554control to the image kernel. Thus two different kernels are involved in
555resuming from hibernation. In fact, the boot kernel may be completely different
556from the image kernel: a different configuration and even a different version.
557This has important consequences for device drivers and their subsystems.
558
559To be able to load the system image into memory, the boot kernel needs to
560include at least a subset of device drivers allowing it to access the storage
561medium containing the image, although it doesn't need to include all of the
562drivers present in the image kernel. After the image has been loaded, the
563devices managed by the boot kernel need to be prepared for passing control back
564to the image kernel. This is very similar to the initial steps involved in
565creating a system image, and it is accomplished in the same way, using prepare,
566freeze, and freeze_noirq phases. However the devices affected by these phases
567are only those having drivers in the boot kernel; other devices will still be in
568whatever state the boot loader left them.
569
570Should the restoration of the pre-hibernation memory contents fail, the boot
571kernel would go through the "thawing" procedure described above, using the
572thaw_noirq, thaw, and complete phases, and then continue running normally. This
573happens only rarely. Most often the pre-hibernation memory contents are
574restored successfully and control is passed to the image kernel, which then
575becomes responsible for bringing the system back to the working state.
576
577To achieve this, the image kernel must restore the devices' pre-hibernation
578functionality. The operation is much like waking up from the memory sleep
579state, although it involves different phases:
580
581 restore_noirq, restore_early, restore, complete
582
583 1. The restore_noirq phase is analogous to the resume_noirq phase.
584
585 2. The restore_early phase is analogous to the resume_early phase.
586
587 3. The restore phase is analogous to the resume phase.
588
589 4. The complete phase is discussed above.
590
591The main difference from resume[_early|_noirq] is that restore[_early|_noirq]
592must assume the device has been accessed and reconfigured by the boot loader or
593the boot kernel. Consequently the state of the device may be different from the
594state remembered from the freeze, freeze_late and freeze_noirq phases. The
595device may even need to be reset and completely re-initialized. In many cases
596this difference doesn't matter, so the resume[_early|_noirq] and
597restore[_early|_norq] method pointers can be set to the same routines.
598Nevertheless, different callback pointers are used in case there is a situation
599where it actually does matter.
600
601
602Device Power Management Domains
603-------------------------------
604Sometimes devices share reference clocks or other power resources. In those
605cases it generally is not possible to put devices into low-power states
606individually. Instead, a set of devices sharing a power resource can be put
607into a low-power state together at the same time by turning off the shared
608power resource. Of course, they also need to be put into the full-power state
609together, by turning the shared power resource on. A set of devices with this
610property is often referred to as a power domain. A power domain may also be
611nested inside another power domain. The nested domain is referred to as the
612sub-domain of the parent domain.
613
614Support for power domains is provided through the pm_domain field of struct
615device. This field is a pointer to an object of type struct dev_pm_domain,
616defined in include/linux/pm.h, providing a set of power management callbacks
617analogous to the subsystem-level and device driver callbacks that are executed
618for the given device during all power transitions, instead of the respective
619subsystem-level callbacks. Specifically, if a device's pm_domain pointer is
620not NULL, the ->suspend() callback from the object pointed to by it will be
621executed instead of its subsystem's (e.g. bus type's) ->suspend() callback and
622analogously for all of the remaining callbacks. In other words, power
623management domain callbacks, if defined for the given device, always take
624precedence over the callbacks provided by the device's subsystem (e.g. bus
625type).
626
627The support for device power management domains is only relevant to platforms
628needing to use the same device driver power management callbacks in many
629different power domain configurations and wanting to avoid incorporating the
630support for power domains into subsystem-level callbacks, for example by
631modifying the platform bus type. Other platforms need not implement it or take
632it into account in any way.
633
634Devices may be defined as IRQ-safe which indicates to the PM core that their
635runtime PM callbacks may be invoked with disabled interrupts (see
636Documentation/power/runtime_pm.txt for more information). If an IRQ-safe
637device belongs to a PM domain, the runtime PM of the domain will be
638disallowed, unless the domain itself is defined as IRQ-safe. However, it
639makes sense to define a PM domain as IRQ-safe only if all the devices in it
640are IRQ-safe. Moreover, if an IRQ-safe domain has a parent domain, the runtime
641PM of the parent is only allowed if the parent itself is IRQ-safe too with the
642additional restriction that all child domains of an IRQ-safe parent must also
643be IRQ-safe.
644
645Device Low Power (suspend) States
646---------------------------------
647Device low-power states aren't standard. One device might only handle
648"on" and "off", while another might support a dozen different versions of
649"on" (how many engines are active?), plus a state that gets back to "on"
650faster than from a full "off".
651
652Some busses define rules about what different suspend states mean. PCI
653gives one example: after the suspend sequence completes, a non-legacy
654PCI device may not perform DMA or issue IRQs, and any wakeup events it
655issues would be issued through the PME# bus signal. Plus, there are
656several PCI-standard device states, some of which are optional.
657
658In contrast, integrated system-on-chip processors often use IRQs as the
659wakeup event sources (so drivers would call enable_irq_wake) and might
660be able to treat DMA completion as a wakeup event (sometimes DMA can stay
661active too, it'd only be the CPU and some peripherals that sleep).
662
663Some details here may be platform-specific. Systems may have devices that
664can be fully active in certain sleep states, such as an LCD display that's
665refreshed using DMA while most of the system is sleeping lightly ... and
666its frame buffer might even be updated by a DSP or other non-Linux CPU while
667the Linux control processor stays idle.
668
669Moreover, the specific actions taken may depend on the target system state.
670One target system state might allow a given device to be very operational;
671another might require a hard shut down with re-initialization on resume.
672And two different target systems might use the same device in different
673ways; the aforementioned LCD might be active in one product's "standby",
674but a different product using the same SOC might work differently.
675
676
677Power Management Notifiers
678--------------------------
679There are some operations that cannot be carried out by the power management
680callbacks discussed above, because the callbacks occur too late or too early.
681To handle these cases, subsystems and device drivers may register power
682management notifiers that are called before tasks are frozen and after they have
683been thawed. Generally speaking, the PM notifiers are suitable for performing
684actions that either require user space to be available, or at least won't
685interfere with user space.
686
687For details refer to Documentation/power/notifiers.txt.
688
689
690Runtime Power Management
691========================
692Many devices are able to dynamically power down while the system is still
693running. This feature is useful for devices that are not being used, and
694can offer significant power savings on a running system. These devices
695often support a range of runtime power states, which might use names such
696as "off", "sleep", "idle", "active", and so on. Those states will in some
697cases (like PCI) be partially constrained by the bus the device uses, and will
698usually include hardware states that are also used in system sleep states.
699
700A system-wide power transition can be started while some devices are in low
701power states due to runtime power management. The system sleep PM callbacks
702should recognize such situations and react to them appropriately, but the
703necessary actions are subsystem-specific.
704
705In some cases the decision may be made at the subsystem level while in other
706cases the device driver may be left to decide. In some cases it may be
707desirable to leave a suspended device in that state during a system-wide power
708transition, but in other cases the device must be put back into the full-power
709state temporarily, for example so that its system wakeup capability can be
710disabled. This all depends on the hardware and the design of the subsystem and
711device driver in question.
712
713During system-wide resume from a sleep state it's easiest to put devices into
714the full-power state, as explained in Documentation/power/runtime_pm.txt. Refer
715to that document for more information regarding this particular issue as well as
716for information on the device runtime power management framework in general.
diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
index 85894d83b352..af005770e767 100644
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.txt
@@ -197,7 +197,8 @@ tasks, since it generally exists anyway.
197 197
198A driver must have all firmwares it may need in RAM before suspend() is called. 198A driver must have all firmwares it may need in RAM before suspend() is called.
199If keeping them is not practical, for example due to their size, they must be 199If keeping them is not practical, for example due to their size, they must be
200requested early enough using the suspend notifier API described in notifiers.txt. 200requested early enough using the suspend notifier API described in
201Documentation/driver-api/pm/notifiers.rst.
201 202
202VI. Are there any precautions to be taken to prevent freezing failures? 203VI. Are there any precautions to be taken to prevent freezing failures?
203 204
diff --git a/Documentation/power/notifiers.txt b/Documentation/power/notifiers.txt
deleted file mode 100644
index a81fa254303d..000000000000
--- a/Documentation/power/notifiers.txt
+++ /dev/null
@@ -1,55 +0,0 @@
1Suspend notifiers
2 (C) 2007-2011 Rafael J. Wysocki <rjw@sisk.pl>, GPL
3
4There are some operations that subsystems or drivers may want to carry out
5before hibernation/suspend or after restore/resume, but they require the system
6to be fully functional, so the drivers' and subsystems' .suspend() and .resume()
7or even .prepare() and .complete() callbacks are not suitable for this purpose.
8For example, device drivers may want to upload firmware to their devices after
9resume/restore, but they cannot do it by calling request_firmware() from their
10.resume() or .complete() routines (user land processes are frozen at these
11points). The solution may be to load the firmware into memory before processes
12are frozen and upload it from there in the .resume() routine.
13A suspend/hibernation notifier may be used for this purpose.
14
15The subsystems or drivers having such needs can register suspend notifiers that
16will be called upon the following events by the PM core:
17
18PM_HIBERNATION_PREPARE The system is going to hibernate, tasks will be frozen
19 immediately. This is different from PM_SUSPEND_PREPARE
20 below because here we do additional work between notifiers
21 and drivers freezing.
22
23PM_POST_HIBERNATION The system memory state has been restored from a
24 hibernation image or an error occurred during
25 hibernation. Device drivers' restore callbacks have
26 been executed and tasks have been thawed.
27
28PM_RESTORE_PREPARE The system is going to restore a hibernation image.
29 If all goes well, the restored kernel will issue a
30 PM_POST_HIBERNATION notification.
31
32PM_POST_RESTORE An error occurred during restore from hibernation.
33 Device drivers' restore callbacks have been executed
34 and tasks have been thawed.
35
36PM_SUSPEND_PREPARE The system is preparing for suspend.
37
38PM_POST_SUSPEND The system has just resumed or an error occurred during
39 suspend. Device drivers' resume callbacks have been
40 executed and tasks have been thawed.
41
42It is generally assumed that whatever the notifiers do for
43PM_HIBERNATION_PREPARE, should be undone for PM_POST_HIBERNATION. Analogously,
44operations performed for PM_SUSPEND_PREPARE should be reversed for
45PM_POST_SUSPEND. Additionally, all of the notifiers are called for
46PM_POST_HIBERNATION if one of them fails for PM_HIBERNATION_PREPARE, and
47all of the notifiers are called for PM_POST_SUSPEND if one of them fails for
48PM_SUSPEND_PREPARE.
49
50The hibernation and suspend notifiers are called with pm_mutex held. They are
51defined in the usual way, but their last argument is meaningless (it is always
52NULL). To register and/or unregister a suspend notifier use the functions
53register_pm_notifier() and unregister_pm_notifier(), respectively, defined in
54include/linux/suspend.h . If you don't need to unregister the notifier, you can
55also use the pm_notifier() macro defined in include/linux/suspend.h .
diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt
index 85c746cbab2c..a1b7f7158930 100644
--- a/Documentation/power/pci.txt
+++ b/Documentation/power/pci.txt
@@ -713,7 +713,7 @@ In addition to that the prepare() callback may carry out some operations
713preparing the device to be suspended, although it should not allocate memory 713preparing the device to be suspended, although it should not allocate memory
714(if additional memory is required to suspend the device, it has to be 714(if additional memory is required to suspend the device, it has to be
715preallocated earlier, for example in a suspend/hibernate notifier as described 715preallocated earlier, for example in a suspend/hibernate notifier as described
716in Documentation/power/notifiers.txt). 716in Documentation/driver-api/pm/notifiers.rst).
717 717
7183.1.2. suspend() 7183.1.2. suspend()
719 719
diff --git a/Documentation/pps/pps.txt b/Documentation/pps/pps.txt
index 50022b3c8ebf..1fdbd5447216 100644
--- a/Documentation/pps/pps.txt
+++ b/Documentation/pps/pps.txt
@@ -63,7 +63,7 @@ for instance) is a PPS source too, and if not they should provide the
63possibility to open another device as PPS source. 63possibility to open another device as PPS source.
64 64
65In LinuxPPS the PPS sources are simply char devices usually mapped 65In LinuxPPS the PPS sources are simply char devices usually mapped
66into files /dev/pps0, /dev/pps1, etc.. 66into files /dev/pps0, /dev/pps1, etc.
67 67
68 68
69PPS with USB to serial devices 69PPS with USB to serial devices
@@ -71,9 +71,12 @@ PPS with USB to serial devices
71 71
72It is possible to grab the PPS from an USB to serial device. However, 72It is possible to grab the PPS from an USB to serial device. However,
73you should take into account the latencies and jitter introduced by 73you should take into account the latencies and jitter introduced by
74the USB stack. Users has reported clock instability around +-1ms when 74the USB stack. Users have reported clock instability around +-1ms when
75synchronized with PPS through USB. This isn't suited for time server 75synchronized with PPS through USB. With USB 2.0, jitter may decrease
76synchronization. 76down to the order of 125 microseconds.
77
78This may be suitable for time server synchronization with NTP because
79of its undersampling and algorithms.
77 80
78If your device doesn't report PPS, you can check that the feature is 81If your device doesn't report PPS, you can check that the feature is
79supported by its driver. Most of the time, you only need to add a call 82supported by its driver. Most of the time, you only need to add a call
@@ -166,7 +169,8 @@ Testing the PPS support
166 169
167In order to test the PPS support even without specific hardware you can use 170In order to test the PPS support even without specific hardware you can use
168the ktimer driver (see the client subsection in the PPS configuration menu) 171the ktimer driver (see the client subsection in the PPS configuration menu)
169and the userland tools provided in the Documentation/pps/ directory. 172and the userland tools available in your distribution's pps-tools package,
173http://linuxpps.org , or https://github.com/ago/pps-tools .
170 174
171Once you have enabled the compilation of ktimer just modprobe it (if 175Once you have enabled the compilation of ktimer just modprobe it (if
172not statically compiled): 176not statically compiled):
@@ -183,8 +187,8 @@ and the run ppstest as follow:
183 source 0 - assert 1186592700.388931295, sequence: 365 - clear 0.000000000, sequence: 0 187 source 0 - assert 1186592700.388931295, sequence: 365 - clear 0.000000000, sequence: 0
184 source 0 - assert 1186592701.389032765, sequence: 366 - clear 0.000000000, sequence: 0 188 source 0 - assert 1186592701.389032765, sequence: 366 - clear 0.000000000, sequence: 0
185 189
186Please, note that to compile userland programs you need the file timepps.h 190Please, note that to compile userland programs you need the file timepps.h .
187(see Documentation/pps/). 191This is available in the pps-tools repository mentioned above.
188 192
189 193
190Generators 194Generators
diff --git a/Documentation/thermal/nouveau_thermal b/Documentation/thermal/nouveau_thermal
index 60bc29357ac3..6e17a11efcb0 100644
--- a/Documentation/thermal/nouveau_thermal
+++ b/Documentation/thermal/nouveau_thermal
@@ -42,7 +42,7 @@ thresholds can be configured thanks to the following HWMON attributes:
42 * Critical: temp1_crit and temp1_crit_hyst; 42 * Critical: temp1_crit and temp1_crit_hyst;
43 * Shutdown: temp1_emergency and temp1_emergency_hyst. 43 * Shutdown: temp1_emergency and temp1_emergency_hyst.
44 44
45NOTE: Remember that the values are stored as milli degrees Celcius. Don't forget 45NOTE: Remember that the values are stored as milli degrees Celsius. Don't forget
46to multiply! 46to multiply!
47 47
48Fan management 48Fan management
diff --git a/Documentation/translations/ja_JP/HOWTO b/Documentation/translations/ja_JP/HOWTO
index b03fc8047f03..4ebd20750ef1 100644
--- a/Documentation/translations/ja_JP/HOWTO
+++ b/Documentation/translations/ja_JP/HOWTO
@@ -111,7 +111,7 @@ Linux カーãƒãƒ«ã‚½ãƒ¼ã‚¹ãƒ„リーã¯å¹…広ã„範囲ã®ãƒ‰ã‚­ãƒ¥ãƒ¡ãƒ³ãƒˆã‚’å
111カーãƒãƒ«ã®å¤‰æ›´ãŒã€ã‚«ãƒ¼ãƒãƒ«ãŒãƒ¦ãƒ¼ã‚¶ç©ºé–“ã«å…¬é–‹ã—ã¦ã„るインターフェイス㮠111カーãƒãƒ«ã®å¤‰æ›´ãŒã€ã‚«ãƒ¼ãƒãƒ«ãŒãƒ¦ãƒ¼ã‚¶ç©ºé–“ã«å…¬é–‹ã—ã¦ã„るインターフェイスã®
112変更を引ãèµ·ã“ã™å ´åˆã€ãã®å¤‰æ›´ã‚’説明ã™ã‚‹ãƒžãƒ‹ãƒ¥ã‚¢ãƒ«ãƒšãƒ¼ã‚¸ã®ãƒ‘ッãƒã‚„情報 112変更を引ãèµ·ã“ã™å ´åˆã€ãã®å¤‰æ›´ã‚’説明ã™ã‚‹ãƒžãƒ‹ãƒ¥ã‚¢ãƒ«ãƒšãƒ¼ã‚¸ã®ãƒ‘ッãƒã‚„情報
113をマニュアルページã®ãƒ¡ãƒ³ãƒ†ãƒŠ mtk.manpages@gmail.com ã«é€ã‚Šã€CC ã‚’ 113をマニュアルページã®ãƒ¡ãƒ³ãƒ†ãƒŠ mtk.manpages@gmail.com ã«é€ã‚Šã€CC ã‚’
114linux-api@ver.kernel.org ã«é€ã‚‹ã“ã¨ã‚’å‹§ã‚ã¾ã™ã€‚ 114linux-api@vger.kernel.org ã«é€ã‚‹ã“ã¨ã‚’å‹§ã‚ã¾ã™ã€‚
115 115
116以下ã¯ã‚«ãƒ¼ãƒãƒ«ã‚½ãƒ¼ã‚¹ãƒ„リーã«å«ã¾ã‚Œã¦ã„る読んã§ãŠãã¹ãファイルã®ä¸€è¦§ã§ 116以下ã¯ã‚«ãƒ¼ãƒãƒ«ã‚½ãƒ¼ã‚¹ãƒ„リーã«å«ã¾ã‚Œã¦ã„る読んã§ãŠãã¹ãファイルã®ä¸€è¦§ã§
117ã™- 117ã™-
diff --git a/Documentation/translations/ko_KR/howto.rst b/Documentation/translations/ko_KR/howto.rst
index 3b0c15b277e0..2333697251dd 100644
--- a/Documentation/translations/ko_KR/howto.rst
+++ b/Documentation/translations/ko_KR/howto.rst
@@ -289,8 +289,8 @@ pub/linux/kernel/v4.x/ 디렉토리ì—서 ì°¸ì¡°ë  ìˆ˜ 있다.개발 프로세ì
289Andrew Mortonì˜ ê¸€ì´ ìžˆë‹¤. 289Andrew Mortonì˜ ê¸€ì´ ìžˆë‹¤.
290 290
291 *"커ë„ì´ ì–¸ì œ ë°°í¬ë ì§€ëŠ” ì•„ë¬´ë„ ëª¨ë¥¸ë‹¤. 왜ëƒí•˜ë©´ ë°°í¬ëŠ” 알려진 291 *"커ë„ì´ ì–¸ì œ ë°°í¬ë ì§€ëŠ” ì•„ë¬´ë„ ëª¨ë¥¸ë‹¤. 왜ëƒí•˜ë©´ ë°°í¬ëŠ” 알려진
292 ë²„ê·¸ì˜ ìƒí™©ì— ë”°ë¼ ë°°í¬ë˜ëŠ” 것ì´ì§€ 미리정해 ë†“ì€ ì‹œê°„ì— ë”°ë¼ 292 ë²„ê·¸ì˜ ìƒí™©ì— ë”°ë¼ ë°°í¬ë˜ëŠ” 것ì´ì§€ 미리정해 ë†“ì€ ì‹œê°„ì— ë”°ë¼
293 ë°°í¬ë˜ëŠ” ê²ƒì€ ì•„ë‹ˆê¸° 때문ì´ë‹¤."* 293 ë°°í¬ë˜ëŠ” ê²ƒì€ ì•„ë‹ˆê¸° 때문ì´ë‹¤."*
294 294
2954.x.y - 안정 ì»¤ë„ íŠ¸ë¦¬ 2954.x.y - 안정 ì»¤ë„ íŠ¸ë¦¬
296~~~~~~~~~~~~~~~~~~~~~~ 296~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/translations/zh_CN/CodingStyle b/Documentation/translations/zh_CN/CodingStyle
deleted file mode 100644
index dc101f48e713..000000000000
--- a/Documentation/translations/zh_CN/CodingStyle
+++ /dev/null
@@ -1,813 +0,0 @@
1Chinese translated version of Documentation/process/coding-style.rst
2
3If you have any comment or update to the content, please post to LKML directly.
4However, if you have problem communicating in English you can also ask the
5Chinese maintainer for help. Contact the Chinese maintainer, if this
6translation is outdated or there is problem with translation.
7
8Chinese maintainer: Zhang Le <r0bertz@gentoo.org>
9---------------------------------------------------------------------
10Documentation/process/coding-style.rst的中文翻译
11
12如果想评论或更新本文的内容,请直接å‘信到LKMLã€‚å¦‚æžœä½ ä½¿ç”¨è‹±æ–‡äº¤æµæœ‰å›°éš¾çš„è¯ï¼Œä¹Ÿå¯
13以å‘中文版维护者求助。如果本翻译更新ä¸åŠæ—¶æˆ–者翻译存在问题,请è”系中文版维护者。
14
15中文版维护者: å¼ ä¹ Zhang Le <r0bertz@gentoo.org>
16中文版翻译者: å¼ ä¹ Zhang Le <r0bertz@gentoo.org>
17中文版校译者: çŽ‹èª Wang Cong <xiyou.wangcong@gmail.com>
18 wheelz <kernel.zeng@gmail.com>
19 管旭东 Xudong Guan <xudong.guan@gmail.com>
20 Li Zefan <lizf@cn.fujitsu.com>
21 Wang Chen <wangchen@cn.fujitsu.com>
22以下为正文
23---------------------------------------------------------------------
24
25 Linux内核代ç é£Žæ ¼
26
27这是一个简短的文档,æè¿°äº† linux 内核的首选代ç é£Žæ ¼ã€‚代ç é£Žæ ¼æ˜¯å› äººè€Œå¼‚的,而且我
28䏿„¿æ„æŠŠè‡ªå·±çš„è§‚ç‚¹å¼ºåŠ ç»™ä»»ä½•äººï¼Œä½†è¿™å°±åƒæˆ‘去åšä»»ä½•事情都必须éµå¾ªçš„原则那样,我也
29希望在ç»å¤§å¤šæ•°äº‹ä¸Šä¿æŒè¿™ç§çš„æ€åº¦ã€‚è¯·ï¼ˆåœ¨å†™ä»£ç æ—¶ï¼‰è‡³å°‘考虑一下这里的代ç é£Žæ ¼ã€‚
30
31首先,我建议你打å°ä¸€ä»½ GNU 代ç è§„范,然åŽä¸è¦è¯»ã€‚烧了它,这是一个具有é‡å¤§è±¡å¾æ€§æ„义
32的动作。
33
34ä¸ç®¡æ€Žæ ·ï¼ŒçŽ°åœ¨æˆ‘ä»¬å¼€å§‹ï¼š
35
36
37 第一章:缩进
38
39制表符是 8 个字符,所以缩进也是 8 个字符。有些异端è¿åŠ¨è¯•å›¾å°†ç¼©è¿›å˜ä¸º 4(甚至 2ï¼ï¼‰
40个字符深,这几乎相当于å°è¯•将圆周率的值定义为 3。
41
42ç†ç”±ï¼šç¼©è¿›çš„全部æ„义就在于清楚的定义一个控制å—起止于何处。尤其是当你盯ç€ä½ çš„å±å¹•
43连续看了 20 å°æ—¶ä¹‹åŽï¼Œä½ å°†ä¼šå‘现大一点的缩进会使你更容易分辨缩进。
44
45现在,有些人会抱怨 8 个字符的缩进会使代ç å‘å³è¾¹ç§»åŠ¨çš„å¤ªè¿œï¼Œåœ¨ 80 个字符的终端å±å¹•上
46就很难读这样的代ç ã€‚è¿™ä¸ªé—®é¢˜çš„ç­”æ¡ˆæ˜¯ï¼Œå¦‚æžœä½ éœ€è¦ 3 级以上的缩进,ä¸ç®¡ç”¨ä½•ç§æ–¹å¼ä½ 
47的代ç å·²ç»æœ‰é—®é¢˜äº†ï¼Œåº”该修正你的程åºã€‚
48
49简而言之,8 个字符的缩进å¯ä»¥è®©ä»£ç æ›´å®¹æ˜“阅读,还有一个好处是当你的函数嵌套太深的
50时候å¯ä»¥ç»™ä½ è­¦å‘Šã€‚留心这个警告。
51
52在 switch 语å¥ä¸­æ¶ˆé™¤å¤šçº§ç¼©è¿›çš„é¦–é€‰çš„æ–¹å¼æ˜¯è®© “switch†和从属于它的 “case†标签
53对é½äºŽåŒä¸€åˆ—,而ä¸è¦ “两次缩进†“case†标签。比如:
54
55 switch (suffix) {
56 case 'G':
57 case 'g':
58 mem <<= 30;
59 break;
60 case 'M':
61 case 'm':
62 mem <<= 20;
63 break;
64 case 'K':
65 case 'k':
66 mem <<= 10;
67 /* fall through */
68 default:
69 break;
70 }
71
72ä¸è¦æŠŠå¤šä¸ªè¯­å¥æ”¾åœ¨ä¸€è¡Œé‡Œï¼Œé™¤éžä½ æœ‰ä»€ä¹ˆä¸œè¥¿è¦éšè—:
73
74 if (condition) do_this;
75 do_something_everytime;
76
77也ä¸è¦åœ¨ä¸€è¡Œé‡Œæ”¾å¤šä¸ªèµ‹å€¼è¯­å¥ã€‚内核代ç é£Žæ ¼è¶…级简å•。就是é¿å…å¯èƒ½å¯¼è‡´åˆ«äººè¯¯è¯»çš„表
78è¾¾å¼ã€‚
79
80é™¤äº†æ³¨é‡Šã€æ–‡æ¡£å’Œ Kconfig 之外,ä¸è¦ä½¿ç”¨ç©ºæ ¼æ¥ç¼©è¿›ï¼Œå‰é¢çš„例孿˜¯ä¾‹å¤–,是有æ„为之。
81
82选用一个好的编辑器,ä¸è¦åœ¨è¡Œå°¾ç•™ç©ºæ ¼ã€‚
83
84
85 第二章:把长的行和字符串打散
86
87代ç é£Žæ ¼çš„æ„ä¹‰å°±åœ¨äºŽä½¿ç”¨å¹³å¸¸ä½¿ç”¨çš„å·¥å…·æ¥ç»´æŒä»£ç çš„å¯è¯»æ€§å’Œå¯ç»´æŠ¤æ€§ã€‚
88
89æ¯ä¸€è¡Œçš„长度的é™åˆ¶æ˜¯ 80 列,我们强烈建议您éµå®ˆè¿™ä¸ªæƒ¯ä¾‹ã€‚
90
91长于 80 列的语å¥è¦æ‰“æ•£æˆæœ‰æ„义的片段。除éžè¶…过 80 列能显著增加å¯è¯»æ€§ï¼Œå¹¶ä¸”ä¸ä¼šéšè—
92ä¿¡æ¯ã€‚å­ç‰‡æ®µè¦æ˜Žæ˜¾çŸ­äºŽæ¯ç‰‡æ®µï¼Œå¹¶æ˜Žæ˜¾é å³ã€‚è¿™åŒæ ·é€‚用于有ç€å¾ˆé•¿å‚数列表的函数头。
93然而,ç»å¯¹ä¸è¦æ‰“散对用户å¯è§çš„字符串,例如 printk ä¿¡æ¯ï¼Œå› ä¸ºè¿™å°†å¯¼è‡´æ— æ³• grep 这些
94ä¿¡æ¯ã€‚
95
96 第三章:大括å·å’Œç©ºæ ¼çš„æ”¾ç½®
97
98C语言风格中å¦å¤–一个常è§é—®é¢˜æ˜¯å¤§æ‹¬å·çš„æ”¾ç½®ã€‚和缩进大å°ä¸åŒï¼Œé€‰æ‹©æˆ–弃用æŸç§æ”¾ç½®ç­–
99略并没有多少技术上的原因,ä¸è¿‡é¦–选的方å¼ï¼Œå°±åƒ Kernighan å’Œ Ritchie 展示给我们的,
100æ˜¯æŠŠèµ·å§‹å¤§æ‹¬å·æ”¾åœ¨è¡Œå°¾ï¼Œè€ŒæŠŠç»“æŸå¤§æ‹¬å·æ”¾åœ¨è¡Œé¦–,所以:
101
102 if (x is true) {
103 we do y
104 }
105
106这适用于所有的éžå‡½æ•°è¯­å¥å—(ifã€switchã€forã€whileã€do)。比如:
107
108 switch (action) {
109 case KOBJ_ADD:
110 return "add";
111 case KOBJ_REMOVE:
112 return "remove";
113 case KOBJ_CHANGE:
114 return "change";
115 default:
116 return NULL;
117 }
118
119ä¸è¿‡ï¼Œæœ‰ä¸€ä¸ªä¾‹å¤–ï¼Œé‚£å°±æ˜¯å‡½æ•°ï¼šå‡½æ•°çš„èµ·å§‹å¤§æ‹¬å·æ”¾ç½®äºŽä¸‹ä¸€è¡Œçš„开头,所以:
120
121 int function(int x)
122 {
123 body of function
124 }
125
126全世界的异端å¯èƒ½ä¼šæŠ±æ€¨è¿™ä¸ªä¸ä¸€è‡´æ€§æ˜¯â€¦â€¦å‘ƒâ€¦â€¦ä¸ä¸€è‡´çš„,ä¸è¿‡æ‰€æœ‰æ€ç»´å¥å…¨çš„人都知é“
127(a) K&R 是 _正确的_,并且 (b) K&R 是正确的。此外,ä¸ç®¡æ€Žæ ·å‡½æ•°éƒ½æ˜¯ç‰¹æ®Šçš„(C
128函数是ä¸èƒ½åµŒå¥—的)。
129
130注æ„结æŸå¤§æ‹¬å·ç‹¬è‡ªå æ®ä¸€è¡Œï¼Œé™¤éžå®ƒåŽé¢è·Ÿç€åŒä¸€ä¸ªè¯­å¥çš„剩余部分,也就是 do 语å¥ä¸­çš„
131“while†或者 if 语å¥ä¸­çš„ “elseâ€ï¼Œåƒè¿™æ ·ï¼š
132
133 do {
134 body of do-loop
135 } while (condition);
136
137和
138
139 if (x == y) {
140 ..
141 } else if (x > y) {
142 ...
143 } else {
144 ....
145 }
146
147ç†ç”±ï¼šK&R。
148
149也请注æ„è¿™ç§å¤§æ‹¬å·çš„æ”¾ç½®æ–¹å¼ä¹Ÿèƒ½ä½¿ç©ºï¼ˆæˆ–者差ä¸å¤šç©ºçš„ï¼‰è¡Œçš„æ•°é‡æœ€å°åŒ–ï¼ŒåŒæ—¶ä¸å¤±å¯
150读性。因此,由于你的å±å¹•上的新行是ä¸å¯å†ç”Ÿèµ„æºï¼ˆæƒ³æƒ³ 25 行的终端å±å¹•),你将会有更
151å¤šçš„ç©ºè¡Œæ¥æ”¾ç½®æ³¨é‡Šã€‚
152
153å½“åªæœ‰ä¸€ä¸ªå•独的语å¥çš„æ—¶å€™ï¼Œä¸ç”¨åŠ ä¸å¿…è¦çš„大括å·ã€‚
154
155 if (condition)
156 action();
157
158和
159
160 if (condition)
161 do_this();
162 else
163 do_that();
164
165这并ä¸é€‚ç”¨äºŽåªæœ‰ä¸€ä¸ªæ¡ä»¶åˆ†æ”¯æ˜¯å•语å¥çš„æƒ…况;这时所有分支都è¦ä½¿ç”¨å¤§æ‹¬å·ï¼š
166
167 if (condition) {
168 do_this();
169 do_that();
170 } else {
171 otherwise();
172 }
173
174 3.1:空格
175
176Linux 内核的空格使用方å¼ï¼ˆä¸»è¦ï¼‰å–决于它是用于函数还是关键字。(大多数)关键字åŽ
177è¦åŠ ä¸€ä¸ªç©ºæ ¼ã€‚å€¼å¾—æ³¨æ„的例外是 sizeofã€typeofã€alignof å’Œ __attribute__,这些
178关键字æŸäº›ç¨‹åº¦ä¸Šçœ‹èµ·æ¥æ›´åƒå‡½æ•°ï¼ˆå®ƒä»¬åœ¨ Linux 里也常常伴éšå°æ‹¬å·è€Œä½¿ç”¨ï¼Œå°½ç®¡åœ¨ C 里
179è¿™æ ·çš„å°æ‹¬å·ä¸æ˜¯å¿…éœ€çš„ï¼Œå°±åƒ â€œstruct fileinfo info†声明过åŽçš„ “sizeof infoâ€ï¼‰ã€‚
180
181æ‰€ä»¥åœ¨è¿™äº›å…³é”®å­—ä¹‹åŽæ”¾ä¸€ä¸ªç©ºæ ¼ï¼š
182
183 if, switch, case, for, do, while
184
185但是ä¸è¦åœ¨ sizeofã€typeofã€alignof 或者 __attribute__ è¿™äº›å…³é”®å­—ä¹‹åŽæ”¾ç©ºæ ¼ã€‚例如,
186
187 s = sizeof(struct file);
188
189ä¸è¦åœ¨å°æ‹¬å·é‡Œçš„表达å¼ä¸¤ä¾§åŠ ç©ºæ ¼ã€‚è¿™æ˜¯ä¸€ä¸ªå例:
190
191 s = sizeof( struct file );
192
193当声明指针类型或者返回指针类型的函数时,“*â€ çš„é¦–é€‰ä½¿ç”¨æ–¹å¼æ˜¯ä½¿ä¹‹é è¿‘å˜é‡å或者函
194æ•°åï¼Œè€Œä¸æ˜¯é è¿‘类型å。例å­ï¼š
195
196 char *linux_banner;
197 unsigned long long memparse(char *ptr, char **retptr);
198 char *match_strdup(substring_t *s);
199
200在大多数二元和三元æ“ä½œç¬¦ä¸¤ä¾§ä½¿ç”¨ä¸€ä¸ªç©ºæ ¼ï¼Œä¾‹å¦‚ä¸‹é¢æ‰€æœ‰è¿™äº›æ“作符:
201
202 = + - < > * / % | & ^ <= >= == != ? :
203
204但是一元æ“作符åŽä¸è¦åŠ ç©ºæ ¼ï¼š
205
206 & * + - ~ ! sizeof typeof alignof __attribute__ defined
207
208åŽç¼€è‡ªåŠ å’Œè‡ªå‡ä¸€å…ƒæ“作符å‰ä¸åŠ ç©ºæ ¼ï¼š
209
210 ++ --
211
212å‰ç¼€è‡ªåŠ å’Œè‡ªå‡ä¸€å…ƒæ“作符åŽä¸åŠ ç©ºæ ¼ï¼š
213
214 ++ --
215
216‘.’ å’Œ “->†结构体æˆå‘˜æ“作符å‰åŽä¸åŠ ç©ºæ ¼ã€‚
217
218ä¸è¦åœ¨è¡Œå°¾ç•™ç©ºç™½ã€‚有些å¯ä»¥è‡ªåŠ¨ç¼©è¿›çš„ç¼–è¾‘å™¨ä¼šåœ¨æ–°è¡Œçš„è¡Œé¦–åŠ å…¥é€‚é‡çš„空白,然åŽä½ 
219å°±å¯ä»¥ç›´æŽ¥åœ¨é‚£ä¸€è¡Œè¾“入代ç ã€‚ä¸è¿‡å‡å¦‚ä½ æœ€åŽæ²¡æœ‰åœ¨é‚£ä¸€è¡Œè¾“入代ç ï¼Œæœ‰äº›ç¼–辑器就ä¸
220会移除已ç»åŠ å…¥çš„ç©ºç™½ï¼Œå°±åƒä½ æ•…æ„ç•™ä¸‹ä¸€ä¸ªåªæœ‰ç©ºç™½çš„行。包å«è¡Œå°¾ç©ºç™½çš„行就这样产
221生了。
222
223当gitå‘现补ä¸åŒ…å«äº†è¡Œå°¾ç©ºç™½çš„æ—¶å€™ä¼šè­¦å‘Šä½ ï¼Œå¹¶ä¸”å¯ä»¥åº”ä½ çš„è¦æ±‚去掉行尾空白;ä¸è¿‡
224如果你是正在打一系列补ä¸ï¼Œè¿™æ ·åšä¼šå¯¼è‡´åŽé¢çš„è¡¥ä¸å¤±è´¥ï¼Œå› ä¸ºä½ æ”¹å˜äº†è¡¥ä¸çš„上下文。
225
226
227 第四章:命å
228
229C是一个简朴的语言,你的命å也应该这样。和 Modula-2 å’Œ Pascal 程åºå‘˜ä¸åŒï¼ŒC 程åºå‘˜
230ä¸ä½¿ç”¨ç±»ä¼¼ ThisVariableIsATemporaryCounter 这样åŽä¸½çš„å字。C 程åºå‘˜ä¼šç§°é‚£ä¸ªå˜é‡
231为 “tmpâ€ï¼Œè¿™æ ·å†™èµ·æ¥ä¼šæ›´å®¹æ˜“,而且至少ä¸ä¼šä»¤å…¶éš¾äºŽç†è§£ã€‚
232
233ä¸è¿‡ï¼Œè™½ç„¶æ··ç”¨å¤§å°å†™çš„åå­—æ˜¯ä¸æå€¡ä½¿ç”¨çš„ï¼Œä½†æ˜¯å…¨å±€å˜é‡è¿˜æ˜¯éœ€è¦ä¸€ä¸ªå…·æè¿°æ€§çš„åå­—
234。称一个全局函数为 “foo†是一个难以饶æ•的错误。
235
236全局å˜é‡ï¼ˆåªæœ‰å½“你真正需è¦å®ƒä»¬çš„æ—¶å€™å†ç”¨å®ƒï¼‰éœ€è¦æœ‰ä¸€ä¸ªå…·æè¿°æ€§çš„å字,就åƒå…¨å±€å‡½
237数。如果你有一个å¯ä»¥è®¡ç®—活动用户数é‡çš„函数,你应该å«å®ƒ “count_active_users()â€
238或者类似的å字,你ä¸åº”该å«å®ƒ “cntuser()â€ã€‚
239
240在函数å中包å«å‡½æ•°ç±»åž‹ï¼ˆæ‰€è°“çš„åŒˆç‰™åˆ©å‘½åæ³•)是脑å­å‡ºäº†é—®é¢˜â€”—编译器知é“那些类型而
241且能够检查那些类型,这样åšåªèƒ½æŠŠç¨‹åºå‘˜å¼„糊涂了。难怪微软总是制造出有问题的程åºã€‚
242
243本地å˜é‡å应该简短,而且能够表达相关的å«ä¹‰ã€‚å¦‚æžœä½ æœ‰ä¸€äº›éšæœºçš„æ•´æ•°åž‹çš„循环计数器
244,它应该被称为 “iâ€ã€‚å«å®ƒ “loop_counter†并无益处,如果它没有被误解的å¯èƒ½çš„è¯ã€‚
245类似的,“tmp†å¯ä»¥ç”¨æ¥ç§°å‘¼ä»»æ„类型的临时å˜é‡ã€‚
246
247如果你怕混淆了你的本地å˜é‡å,你就é‡åˆ°å¦ä¸€ä¸ªé—®é¢˜äº†ï¼Œå«åšå‡½æ•°å¢žé•¿è·å°”蒙失衡综åˆç—‡
248。请看第六章(函数)。
249
250
251 第五章:Typedef
252
253ä¸è¦ä½¿ç”¨ç±»ä¼¼ “vps_t†之类的东西。
254
255对结构体和指针使用 typedef 是一个错误。当你在代ç é‡Œçœ‹åˆ°ï¼š
256
257 vps_t a;
258
259è¿™ä»£è¡¨ä»€ä¹ˆæ„æ€å‘¢ï¼Ÿ
260
261相å,如果是这样
262
263 struct virtual_container *a;
264
265ä½ å°±çŸ¥é“ â€œa†是什么了。
266
267很多人认为 typedef “能æé«˜å¯è¯»æ€§â€ã€‚å®žé™…ä¸æ˜¯è¿™æ ·çš„。它们åªåœ¨ä¸‹åˆ—情况下有用:
268
269 (a) 完全ä¸é€æ˜Žçš„å¯¹è±¡ï¼ˆè¿™ç§æƒ…况下è¦ä¸»åŠ¨ä½¿ç”¨ typedef æ¥éšè—这个对象实际上是什么)。
270
271 例如:“pte_t†等ä¸é€æ˜Žå¯¹è±¡ï¼Œä½ åªèƒ½ç”¨åˆé€‚的访问函数æ¥è®¿é—®å®ƒä»¬ã€‚
272
273 注æ„ï¼ä¸é€æ˜Žæ€§å’Œâ€œè®¿é—®å‡½æ•°â€æœ¬èº«æ˜¯ä¸å¥½çš„。我们使用 pte_t 等类型的原因在于真的是
274 完全没有任何共用的å¯è®¿é—®ä¿¡æ¯ã€‚
275
276 (b) 清楚的整数类型,如此,这层抽象就å¯ä»¥å¸®åŠ©æ¶ˆé™¤åˆ°åº•æ˜¯ “int†还是 “long†的混淆。
277
278 u8/u16/u32 是完全没有问题的 typedef,ä¸è¿‡å®ƒä»¬æ›´ç¬¦åˆç±»åˆ« (d) è€Œä¸æ˜¯è¿™é‡Œã€‚
279
280 冿¬¡æ³¨æ„ï¼è¦è¿™æ ·åšï¼Œå¿…须事出有因。如果æŸä¸ªå˜é‡æ˜¯ “unsigned long“,那么没有必è¦
281
282 typedef unsigned long myflags_t;
283
284 ä¸è¿‡å¦‚果有一个明确的原因,比如它在æŸç§æƒ…况下å¯èƒ½ä¼šæ˜¯ä¸€ä¸ª “unsigned int†而在
285 其他情况下å¯èƒ½ä¸º “unsigned longâ€ï¼Œé‚£ä¹ˆå°±ä¸è¦çŠ¹è±«ï¼Œè¯·åŠ¡å¿…ä½¿ç”¨ typedef。
286
287 (c) 当你使用sparse按字é¢çš„创建一个新类型æ¥åšç±»åž‹æ£€æŸ¥çš„æ—¶å€™ã€‚
288
289 (d) 和标准C99类型相åŒçš„类型,在æŸäº›ä¾‹å¤–的情况下。
290
291 虽然让眼ç›å’Œè„‘ç­‹æ¥é€‚应新的标准类型比如 “uint32_t†ä¸éœ€è¦èŠ±å¾ˆå¤šæ—¶é—´ï¼Œå¯æ˜¯æœ‰äº›
292 人ä»ç„¶æ‹’ç»ä½¿ç”¨å®ƒä»¬ã€‚
293
294 因此,Linux 特有的等åŒäºŽæ ‡å‡†ç±»åž‹çš„ “u8/u16/u32/u64†类型和它们的有符å·ç±»åž‹æ˜¯è¢«
295 å…许的——尽管在你自己的新代ç ä¸­ï¼Œå®ƒä»¬ä¸æ˜¯å¼ºåˆ¶è¦æ±‚è¦ä½¿ç”¨çš„。
296
297 当编辑已ç»ä½¿ç”¨äº†æŸä¸ªç±»åž‹é›†çš„å·²æœ‰ä»£ç æ—¶ï¼Œä½ åº”该éµå¾ªé‚£äº›ä»£ç ä¸­å·²ç»åšå‡ºçš„选择。
298
299 (e) å¯ä»¥åœ¨ç”¨æˆ·ç©ºé—´å®‰å…¨ä½¿ç”¨çš„类型。
300
301 在æŸäº›ç”¨æˆ·ç©ºé—´å¯è§çš„结构体里,我们ä¸èƒ½è¦æ±‚C99类型而且ä¸èƒ½ç”¨ä¸Šé¢æåˆ°çš„ “u32â€
302 类型。因此,我们在与用户空间共享的所有结构体中使用 __u32 和类似的类型。
303
304å¯èƒ½è¿˜æœ‰å…¶ä»–的情况,ä¸è¿‡åŸºæœ¬çš„规则是永远ä¸è¦ä½¿ç”¨ typedef,除éžä½ å¯ä»¥æ˜Žç¡®çš„应用上
305è¿°æŸä¸ªè§„则中的一个。
306
307总的æ¥è¯´ï¼Œå¦‚果一个指针或者一个结构体里的元素å¯ä»¥åˆç†çš„被直接访问到,那么它们就ä¸
308应该是一个 typedef。
309
310
311 第六章:函数
312
313函数应该简短而漂亮,并且åªå®Œæˆä¸€ä»¶äº‹æƒ…。函数应该å¯ä»¥ä¸€å±æˆ–è€…ä¸¤å±æ˜¾ç¤ºå®Œï¼ˆæˆ‘们都知
314é“ ISO/ANSI å±å¹•大尿˜¯ 80x24),åªåšä¸€ä»¶äº‹æƒ…,而且把它åšå¥½ã€‚
315
316ä¸€ä¸ªå‡½æ•°çš„æœ€å¤§é•¿åº¦æ˜¯å’Œè¯¥å‡½æ•°çš„å¤æ‚度和缩进级数æˆå比的。所以,如果你有一个ç†è®ºä¸Š
317很简å•çš„åªæœ‰ä¸€ä¸ªå¾ˆé•¿ï¼ˆä½†æ˜¯ç®€å•)的 case 语å¥çš„函数,而且你需è¦åœ¨æ¯ä¸ª case 里åš
318很多很å°çš„事情,这样的函数尽管很长,但也是å¯ä»¥çš„。
319
320ä¸è¿‡ï¼Œå¦‚æžœä½ æœ‰ä¸€ä¸ªå¤æ‚çš„å‡½æ•°ï¼Œè€Œä¸”ä½ æ€€ç–‘ä¸€ä¸ªå¤©åˆ†ä¸æ˜¯å¾ˆé«˜çš„高中一年级学生å¯èƒ½ç”šè‡³
321æžä¸æ¸…楚这个函数的目的,你应该严格的éµå®ˆå‰é¢æåˆ°çš„长度é™åˆ¶ã€‚使用辅助函数,并为之
322å–个具æè¿°æ€§çš„å字(如果你觉得它们的性能很é‡è¦çš„è¯ï¼Œå¯ä»¥è®©ç¼–译器内è”它们,这样的
323æ•ˆæžœå¾€å¾€ä¼šæ¯”ä½ å†™ä¸€ä¸ªå¤æ‚函数的效果è¦å¥½ã€‚)
324
325函数的å¦å¤–ä¸€ä¸ªè¡¡é‡æ ‡å‡†æ˜¯æœ¬åœ°å˜é‡çš„æ•°é‡ã€‚此数é‡ä¸åº”超过 5ï¼10 个,å¦åˆ™ä½ çš„函数就有
326é—®é¢˜äº†ã€‚é‡æ–°è€ƒè™‘ä¸€ä¸‹ä½ çš„å‡½æ•°ï¼ŒæŠŠå®ƒåˆ†æ‹†æˆæ›´å°çš„函数。人的大脑一般å¯ä»¥è½»æ¾çš„åŒæ—¶è·Ÿ
327踪 7 个ä¸åŒçš„事物,如果å†å¢žå¤šçš„è¯ï¼Œå°±ä¼šç³Šæ¶‚了。å³ä¾¿ä½ èªé¢–过人,你也å¯èƒ½ä¼šè®°ä¸æ¸…ä½ 
3282 个星期å‰åšè¿‡çš„事情。
329
330åœ¨æºæ–‡ä»¶é‡Œï¼Œä½¿ç”¨ç©ºè¡Œéš”å¼€ä¸åŒçš„函数。如果该函数需è¦è¢«å¯¼å‡ºï¼Œå®ƒçš„ EXPORT* å®åº”该紧贴
331在它的结æŸå¤§æ‹¬å·ä¹‹ä¸‹ã€‚比如:
332
333 int system_is_up(void)
334 {
335 return system_state == SYSTEM_RUNNING;
336 }
337 EXPORT_SYMBOL(system_is_up);
338
339在函数原型中,包å«å‡½æ•°å和它们的数æ®ç±»åž‹ã€‚虽然Cè¯­è¨€é‡Œæ²¡æœ‰è¿™æ ·çš„è¦æ±‚,在 Linux 里这
340是æå€¡çš„åšæ³•,因为这样å¯ä»¥å¾ˆç®€å•的给读者æä¾›æ›´å¤šçš„æœ‰ä»·å€¼çš„ä¿¡æ¯ã€‚
341
342
343 第七章:集中的函数退出途径
344
345虽然被æŸäº›äººå£°ç§°å·²ç»è¿‡æ—¶ï¼Œä½†æ˜¯ goto 语å¥çš„等价物还是ç»å¸¸è¢«ç¼–è¯‘å™¨æ‰€ä½¿ç”¨ï¼Œå…·ä½“å½¢å¼æ˜¯
346æ— æ¡ä»¶è·³è½¬æŒ‡ä»¤ã€‚
347
348当一个函数从多个ä½ç½®é€€å‡ºï¼Œå¹¶ä¸”需è¦åšä¸€äº›ç±»ä¼¼æ¸…ç†çš„å¸¸è§æ“作时,goto 语å¥å°±å¾ˆæ–¹ä¾¿äº†ã€‚
349如果并ä¸éœ€è¦æ¸…ç†æ“作,那么直接 return å³å¯ã€‚
350
351ç†ç”±æ˜¯ï¼š
352
353- æ— æ¡ä»¶è¯­å¥å®¹æ˜“ç†è§£å’Œè·Ÿè¸ª
354- 嵌套程度å‡å°
355- å¯ä»¥é¿å…由于修改时忘记更新æŸä¸ªå•独的退出点而导致的错误
356- å‡è½»äº†ç¼–译器的工作,无需删除冗余代ç ;)
357
358 int fun(int a)
359 {
360 int result = 0;
361 char *buffer;
362
363 buffer = kmalloc(SIZE, GFP_KERNEL);
364 if (!buffer)
365 return -ENOMEM;
366
367 if (condition1) {
368 while (loop1) {
369 ...
370 }
371 result = 1;
372 goto out_buffer;
373 }
374 ...
375 out_buffer:
376 kfree(buffer);
377 return result;
378 }
379
380ä¸€ä¸ªéœ€è¦æ³¨æ„的常è§é”™è¯¯æ˜¯â€œä¸€ä¸ª err 错误â€ï¼Œå°±åƒè¿™æ ·ï¼š
381
382 err:
383 kfree(foo->bar);
384 kfree(foo);
385 return ret;
386
387这段代ç çš„错误是,在æŸäº›é€€å‡ºè·¯å¾„上 “foo†是 NULL。通常情况下,通过把它分离æˆä¸¤ä¸ª
388错误标签 “err_bar:†和 “err_foo:†æ¥ä¿®å¤è¿™ä¸ªé”™è¯¯ã€‚
389
390 第八章:注释
391
392注释是好的,ä¸è¿‡æœ‰è¿‡åº¦æ³¨é‡Šçš„å±é™©ã€‚永远ä¸è¦åœ¨æ³¨é‡Šé‡Œè§£é‡Šä½ çš„ä»£ç æ˜¯å¦‚何è¿ä½œçš„:更好
393çš„åšæ³•是让别人一看你的代ç å°±å¯ä»¥æ˜Žç™½ï¼Œè§£é‡Šå†™çš„å¾ˆå·®çš„ä»£ç æ˜¯æµªè´¹æ—¶é—´ã€‚
394
395一般的,你想è¦ä½ çš„æ³¨é‡Šå‘Šè¯‰åˆ«äººä½ çš„代ç åšäº†ä»€ä¹ˆï¼Œè€Œä¸æ˜¯æ€Žä¹ˆåšçš„。也请你ä¸è¦æŠŠæ³¨é‡Š
396æ”¾åœ¨ä¸€ä¸ªå‡½æ•°ä½“å†…éƒ¨ï¼šå¦‚æžœå‡½æ•°å¤æ‚到你需è¦ç‹¬ç«‹çš„æ³¨é‡Šå…¶ä¸­çš„一部分,你很å¯èƒ½éœ€è¦å›žåˆ°
397第六章看一看。你å¯ä»¥åšä¸€äº›å°æ³¨é‡Šæ¥æ³¨æ˜Žæˆ–警告æŸäº›å¾ˆèªæ˜Žï¼ˆæˆ–è€…æ§½ç³•ï¼‰çš„åšæ³•,但ä¸è¦
398加太多。你应该åšçš„,是把注释放在函数的头部,告诉人们它åšäº†ä»€ä¹ˆï¼Œä¹Ÿå¯ä»¥åŠ ä¸Šå®ƒåšè¿™
399些事情的原因。
400
401当注释内核API函数时,请使用 kernel-doc æ ¼å¼ã€‚请看
402Documentation/doc-guide/å’Œscripts/kernel-doc 以获得详细信æ¯ã€‚
403
404Linux的注释风格是 C89 “/* ... */†风格。ä¸è¦ä½¿ç”¨ C99 风格 “// ...†注释。
405
406长(多行)的首选注释风格是:
407
408 /*
409 * This is the preferred style for multi-line
410 * comments in the Linux kernel source code.
411 * Please use it consistently.
412 *
413 * Description: A column of asterisks on the left side,
414 * with beginning and ending almost-blank lines.
415 */
416
417对于在 net/ å’Œ drivers/net/ 的文件,首选的长(多行)注释风格有些ä¸åŒã€‚
418
419 /* The preferred comment style for files in net/ and drivers/net
420 * looks like this.
421 *
422 * It is nearly the same as the generally preferred comment style,
423 * but there is no initial almost-blank line.
424 */
425
426注释数æ®ä¹Ÿæ˜¯å¾ˆé‡è¦çš„,ä¸ç®¡æ˜¯åŸºæœ¬ç±»åž‹è¿˜æ˜¯è¡ç”Ÿç±»åž‹ã€‚为了方便实现这一点,æ¯ä¸€è¡Œåº”åª
427声明一个数æ®ï¼ˆä¸è¦ä½¿ç”¨é€—å·æ¥ä¸€æ¬¡å£°æ˜Žå¤šä¸ªæ•°æ®ï¼‰ã€‚这样你就有空间æ¥ä¸ºæ¯ä¸ªæ•°æ®å†™ä¸€æ®µ
428å°æ³¨é‡Šæ¥è§£é‡Šå®ƒä»¬çš„用途了。
429
430
431 第ä¹ç« ï¼šä½ å·²ç»æŠŠäº‹æƒ…弄糟了
432
433这没什么,我们都是这样。å¯èƒ½ä½ çš„使用了很长时间 Unix 的朋å‹å·²ç»å‘Šè¯‰ä½  “GNU emacs†能
434自动帮你格å¼åŒ– C æºä»£ç ï¼Œè€Œä¸”你也注æ„到了,确实是这样,ä¸è¿‡å®ƒæ‰€ä½¿ç”¨çš„默认值和我们
435想è¦çš„ç›¸åŽ»ç”šè¿œï¼ˆå®žé™…ä¸Šï¼Œç”šè‡³æ¯”éšæœºæ‰“的还è¦å·®â€”—无数个猴å­åœ¨ GNU emacs 里打字永远ä¸
436会创造出一个好程åºï¼‰ï¼ˆè¯‘注:请å‚考 Infinite Monkey Theorem)
437
438所以你è¦ä¹ˆæ”¾å¼ƒ GNU emacs,è¦ä¹ˆæ”¹å˜å®ƒè®©å®ƒä½¿ç”¨æ›´åˆç†çš„设定。è¦é‡‡ç”¨åŽä¸€ä¸ªæ–¹æ¡ˆï¼Œä½ å¯
439以把下é¢è¿™æ®µç²˜è´´åˆ°ä½ çš„ .emacs 文件里。
440
441(defun c-lineup-arglist-tabs-only (ignored)
442 "Line up argument lists by tabs, not spaces"
443 (let* ((anchor (c-langelem-pos c-syntactic-element))
444 (column (c-langelem-2nd-pos c-syntactic-element))
445 (offset (- (1+ column) anchor))
446 (steps (floor offset c-basic-offset)))
447 (* (max steps 1)
448 c-basic-offset)))
449
450(add-hook 'c-mode-common-hook
451 (lambda ()
452 ;; Add kernel style
453 (c-add-style
454 "linux-tabs-only"
455 '("linux" (c-offsets-alist
456 (arglist-cont-nonempty
457 c-lineup-gcc-asm-reg
458 c-lineup-arglist-tabs-only))))))
459
460(add-hook 'c-mode-hook
461 (lambda ()
462 (let ((filename (buffer-file-name)))
463 ;; Enable kernel mode for the appropriate files
464 (when (and filename
465 (string-match (expand-file-name "~/src/linux-trees")
466 filename))
467 (setq indent-tabs-mode t)
468 (setq show-trailing-whitespace t)
469 (c-set-style "linux-tabs-only")))))
470
471这会让 emacs 在 ~/src/linux-trees 目录下的 C æºæ–‡ä»¶èŽ·å¾—æ›´å¥½çš„å†…æ ¸ä»£ç é£Žæ ¼ã€‚
472
473ä¸è¿‡å°±ç®—ä½ å°è¯•让 emacs 正确的格å¼åŒ–代ç å¤±è´¥äº†ï¼Œä¹Ÿå¹¶ä¸æ„味ç€ä½ å¤±åŽ»äº†ä¸€åˆ‡ï¼šè¿˜å¯ä»¥ç”¨
474“indentâ€ã€‚
475
476ä¸è¿‡ï¼ŒGNU indent 也有和 GNU emacs 一样有问题的设定,所以你需è¦ç»™å®ƒä¸€äº›å‘½ä»¤é€‰é¡¹ã€‚ä¸
477过,这还ä¸ç®—太糟糕,因为就算是 GNU indent çš„ä½œè€…ä¹Ÿè®¤åŒ K&R çš„æƒå¨æ€§ï¼ˆGNU çš„äººå¹¶ä¸æ˜¯
478åäººï¼Œä»–ä»¬åªæ˜¯åœ¨è¿™ä¸ªé—®é¢˜ä¸Šè¢«ä¸¥é‡çš„误导了),所以你åªè¦ç»™ indent 指定选项 “-kr -i8â€
479(代表 “K&R,8 个字符缩进â€ï¼‰ï¼Œæˆ–者使用 “scripts/Lindentâ€ï¼Œè¿™æ ·å°±å¯ä»¥ä»¥æœ€æ—¶é«¦çš„æ–¹å¼
480缩进æºä»£ç ã€‚
481
482“indentâ€ æœ‰å¾ˆå¤šé€‰é¡¹ï¼Œç‰¹åˆ«æ˜¯é‡æ–°æ ¼å¼åŒ–注释的时候,你å¯èƒ½éœ€è¦çœ‹ä¸€ä¸‹å®ƒçš„æ‰‹å†Œé¡µã€‚ä¸è¿‡
483è®°ä½ï¼šâ€œindent†ä¸èƒ½ä¿®æ­£å的编程习惯。
484
485
486 第å章:Kconfig é…置文件
487
488对于é布æºç æ ‘的所有 Kconfig* é…置文件æ¥è¯´ï¼Œå®ƒä»¬ç¼©è¿›æ–¹å¼ä¸Ž C 代ç ç›¸æ¯”有所ä¸åŒã€‚紧挨
489在 “config†定义下é¢çš„行缩进一个制表符,帮助信æ¯åˆ™å†å¤šç¼©è¿› 2 个空格。比如:
490
491config AUDIT
492 bool "Auditing support"
493 depends on NET
494 help
495 Enable auditing infrastructure that can be used with another
496 kernel subsystem, such as SELinux (which requires this for
497 logging of avc messages output). Does not do system-call
498 auditing without CONFIG_AUDITSYSCALL.
499
500而那些å±é™©çš„功能(比如æŸäº›æ–‡ä»¶ç³»ç»Ÿçš„写支æŒï¼‰åº”该在它们的æç¤ºå­—符串里显著的声明这
501一点:
502
503config ADFS_FS_RW
504 bool "ADFS write support (DANGEROUS)"
505 depends on ADFS_FS
506 ...
507
508è¦æŸ¥çœ‹é…置文件的完整文档,请看 Documentation/kbuild/kconfig-language.txt。
509
510
511 第å一章:数æ®ç»“æž„
512
513如果一个数æ®ç»“构,在创建和销æ¯å®ƒçš„å•线执行环境之外å¯è§ï¼Œé‚£ä¹ˆå®ƒå¿…é¡»è¦æœ‰ä¸€ä¸ªå¼•用计
514数器。内核里没有垃圾收集(并且内核之外的垃圾收集慢且效率低下),这æ„味ç€ä½ ç»å¯¹éœ€
515è¦è®°å½•ä½ å¯¹è¿™ç§æ•°æ®ç»“构的使用情况。
516
517引用计数æ„味ç€ä½ èƒ½å¤Ÿé¿å…上é”,并且å…许多个用户并行访问这个数æ®ç»“构——而ä¸éœ€è¦æ‹…心
518这个数æ®ç»“构仅仅因为暂时ä¸è¢«ä½¿ç”¨å°±æ¶ˆå¤±äº†ï¼Œé‚£äº›ç”¨æˆ·å¯èƒ½ä¸è¿‡æ˜¯æ²‰ç¡äº†ä¸€é˜µæˆ–者åšäº†ä¸€
519些其他事情而已。
520
521注æ„上é”ä¸èƒ½å–ä»£å¼•ç”¨è®¡æ•°ã€‚ä¸Šé”æ˜¯ä¸ºäº†ä¿æŒæ•°æ®ç»“构的一致性,而引用计数是一个内存管
522ç†æŠ€å·§ã€‚é€šå¸¸äºŒè€…éƒ½éœ€è¦ï¼Œä¸è¦æŠŠä¸¤ä¸ªæžæ··äº†ã€‚
523
524很多数æ®ç»“构实际上有2级引用计数,它们通常有ä¸åŒâ€œç±»â€çš„用户。å­ç±»è®¡æ•°å™¨ç»Ÿè®¡å­ç±»ç”¨
525户的数é‡ï¼Œæ¯å½“å­ç±»è®¡æ•°å™¨å‡è‡³é›¶æ—¶ï¼Œå…¨å±€è®¡æ•°å™¨å‡ä¸€ã€‚
526
527è¿™ç§â€œå¤šçº§å¼•用计数â€çš„例å­å¯ä»¥åœ¨å†…存管ç†ï¼ˆâ€œstruct mm_structâ€ï¼šmm_users å’Œ mm_count)
528和文件系统(“struct super_blockâ€ï¼šs_countå’Œs_active)中找到。
529
530è®°ä½ï¼šå¦‚æžœå¦ä¸€ä¸ªæ‰§è¡Œçº¿ç´¢å¯ä»¥æ‰¾åˆ°ä½ çš„æ•°æ®ç»“构,但是这个数æ®ç»“构没有引用计数器,这
531里几乎肯定是一个 bug。
532
533
534 第å二章:å®ï¼Œæžšä¸¾å’ŒRTL
535
536用于定义常é‡çš„å®çš„åå­—åŠæžšä¸¾é‡Œçš„æ ‡ç­¾éœ€è¦å¤§å†™ã€‚
537
538#define CONSTANT 0x12345
539
540åœ¨å®šä¹‰å‡ ä¸ªç›¸å…³çš„å¸¸é‡æ—¶ï¼Œæœ€å¥½ç”¨æžšä¸¾ã€‚
541
542å®çš„å字请用大写字æ¯ï¼Œä¸è¿‡å½¢å¦‚函数的å®çš„åå­—å¯ä»¥ç”¨å°å†™å­—æ¯ã€‚
543
544一般的,如果能写æˆå†…è”函数就ä¸è¦å†™æˆåƒå‡½æ•°çš„å®ã€‚
545
546嫿œ‰å¤šä¸ªè¯­å¥çš„å®åº”该被包å«åœ¨ä¸€ä¸ª do-while 代ç å—里:
547
548 #define macrofun(a, b, c) \
549 do { \
550 if (a == 5) \
551 do_this(b, c); \
552 } while (0)
553
554使用å®çš„æ—¶å€™åº”é¿å…的事情:
555
5561) å½±å“æŽ§åˆ¶æµç¨‹çš„å®ï¼š
557
558 #define FOO(x) \
559 do { \
560 if (blah(x) < 0) \
561 return -EBUGGERED; \
562 } while (0)
563
564éžå¸¸ä¸å¥½ã€‚它看起æ¥åƒä¸€ä¸ªå‡½æ•°ï¼Œä¸è¿‡å´èƒ½å¯¼è‡´â€œè°ƒç”¨â€å®ƒçš„函数退出;ä¸è¦æ‰“乱读者大脑里
565的语法分æžå™¨ã€‚
566
5672) ä¾èµ–于一个固定å字的本地å˜é‡çš„å®ï¼š
568
569 #define FOO(val) bar(index, val)
570
571å¯èƒ½çœ‹èµ·æ¥åƒæ˜¯ä¸ªä¸é”™çš„东西,ä¸è¿‡å®ƒéžå¸¸å®¹æ˜“把读代ç çš„人æžç³Šæ¶‚,而且容易导致看起æ¥
572ä¸ç›¸å…³çš„æ”¹åЍ另æ¥é”™è¯¯ã€‚
573
5743) ä½œä¸ºå·¦å€¼çš„å¸¦å‚æ•°çš„å®ï¼š FOO(x) = y;如果有人把 FOO å˜æˆä¸€ä¸ªå†…è”函数的è¯ï¼Œè¿™ç§ç”¨
575法就会出错了。
576
5774) 忘记了优先级:使用表达å¼å®šä¹‰å¸¸é‡çš„å®å¿…须将表达å¼ç½®äºŽä¸€å¯¹å°æ‹¬å·ä¹‹å†…ã€‚å¸¦å‚æ•°çš„
578å®ä¹Ÿè¦æ³¨æ„此类问题。
579
580 #define CONSTANT 0x4000
581 #define CONSTEXP (CONSTANT | 3)
582
5835) 在å®é‡Œå®šä¹‰ç±»ä¼¼å‡½æ•°çš„æœ¬åœ°å˜é‡æ—¶å‘½å冲çªï¼š
584
585 #define FOO(x) \
586 ({ \
587 typeof(x) ret; \
588 ret = calc_ret(x); \
589 (ret); \
590 })
591
592ret 是本地å˜é‡çš„通用åå­— - __foo_ret æ›´ä¸å®¹æ˜“与一个已存在的å˜é‡å†²çªã€‚
593
594cpp 手册对å®çš„讲解很详细。gcc internals 手册也详细讲解了 RTL(译注:register
595transfer language),内核里的汇编语言ç»å¸¸ç”¨åˆ°å®ƒã€‚
596
597
598 第å三章:打å°å†…核消æ¯
599
600内核开å‘者应该是å—过良好教育的。请一定注æ„内核信æ¯çš„æ‹¼å†™ï¼Œä»¥ç»™äººä»¥å¥½çš„å°è±¡ã€‚ä¸è¦
601用ä¸è§„范的å•è¯æ¯”如 “dontâ€ï¼Œè€Œè¦ç”¨ “do notâ€æˆ–者 “don'tâ€ã€‚ä¿è¯è¿™äº›ä¿¡æ¯ç®€å•ã€æ˜Žäº†ã€
602无歧义。
603
604内核信æ¯ä¸å¿…以å¥å·ï¼ˆè¯‘注:英文å¥å·ï¼Œå³ç‚¹ï¼‰ç»“æŸã€‚
605
606åœ¨å°æ‹¬å·é‡Œæ‰“å°æ•°å­— (%d) 没有任何价值,应该é¿å…这样åšã€‚
607
608<linux/device.h> 里有一些驱动模型诊断å®ï¼Œä½ åº”该使用它们,以确ä¿ä¿¡æ¯å¯¹åº”于正确的
609设备和驱动,并且被标记了正确的消æ¯çº§åˆ«ã€‚è¿™äº›å®æœ‰ï¼šdev_err(),dev_warn(),
610dev_info() 等等。对于那些ä¸å’ŒæŸä¸ªç‰¹å®šè®¾å¤‡ç›¸å…³è¿žçš„ä¿¡æ¯ï¼Œ<linux/printk.h> 定义了
611pr_notice(),pr_info(),pr_warn(),pr_err() 和其他。
612
613写出好的调试信æ¯å¯ä»¥æ˜¯ä¸€ä¸ªå¾ˆå¤§çš„æŒ‘战;一旦你写出åŽï¼Œè¿™äº›ä¿¡æ¯åœ¨è¿œç¨‹é™¤é”™æ—¶èƒ½æä¾›æžå¤§
614的帮助。然而打å°è°ƒè¯•ä¿¡æ¯çš„å¤„ç†æ–¹å¼åŒæ‰“å°éžè°ƒè¯•ä¿¡æ¯ä¸åŒã€‚å…¶ä»– pr_XXX() 函数能无æ¡ä»¶åœ°
615打å°ï¼Œpr_debug() å´ä¸ï¼›é»˜è®¤æƒ…况下它ä¸ä¼šè¢«ç¼–译,除éžå®šä¹‰äº† DEBUG 或设定了
616CONFIG_DYNAMIC_DEBUGã€‚å®žé™…è¿™åŒæ ·æ˜¯ä¸ºäº† dev_dbg(),一个相关约定是在一个已ç»å¼€å¯äº†
617DEBUG 时,使用 VERBOSE_DEBUG æ¥æ·»åŠ  dev_vdbg()。
618
619许多å­ç³»ç»Ÿæ‹¥æœ‰ Kconfig 调试选项æ¥å¼€å¯ -DDEBUG 在对应的 Makefile 里é¢ï¼›åœ¨å…¶ä»–
620情况下,特殊文件使用 #define DEBUG。当一æ¡è°ƒè¯•ä¿¡æ¯éœ€è¦è¢«æ— æ¡ä»¶æ‰“å°æ—¶ï¼Œä¾‹å¦‚,如果
621å·²ç»åŒ…å«ä¸€ä¸ªè°ƒè¯•相关的 #ifdef æ¡ä»¶ï¼Œprintk(KERN_DEBUG ...) å°±å¯è¢«ä½¿ç”¨ã€‚
622
623
624 第å四章:分é…内存
625
626内核æä¾›äº†ä¸‹é¢çš„一般用途的内存分é…函数:
627kmalloc(),kzalloc(),kmalloc_array(),kcalloc(),vmalloc() 和 vzalloc()。
628请å‚考 API æ–‡æ¡£ä»¥èŽ·å–æœ‰å…³å®ƒä»¬çš„详细信æ¯ã€‚
629
630传递结构体大å°çš„首选形弿˜¯è¿™æ ·çš„:
631
632 p = kmalloc(sizeof(*p), ...);
633
634å¦å¤–一ç§ä¼ é€’æ–¹å¼ä¸­ï¼Œsizeof çš„æ“作数是结构体的å字,这样会é™ä½Žå¯è¯»æ€§ï¼Œå¹¶ä¸”å¯èƒ½ä¼šå¼•
635å…¥ bug。有å¯èƒ½æŒ‡é’ˆå˜é‡ç±»åž‹è¢«æ”¹å˜æ—¶ï¼Œè€Œå¯¹åº”的传递给内存分é…函数的 sizeof 的结果ä¸å˜ã€‚
636
637强制转æ¢ä¸€ä¸ª void 指针返回值是多余的。C 语言本身ä¿è¯äº†ä»Ž void 指针到其他任何指针类型
638çš„è½¬æ¢æ˜¯æ²¡æœ‰é—®é¢˜çš„。
639
640分é…ä¸€ä¸ªæ•°ç»„çš„é¦–é€‰å½¢å¼æ˜¯è¿™æ ·çš„:
641
642 p = kmalloc_array(n, sizeof(...), ...);
643
644分é…ä¸€ä¸ªé›¶é•¿æ•°ç»„çš„é¦–é€‰å½¢å¼æ˜¯è¿™æ ·çš„:
645
646 p = kcalloc(n, sizeof(...), ...);
647
648两ç§å½¢å¼æ£€æŸ¥åˆ†é…å¤§å° n * sizeof(...) 的溢出,如果溢出返回 NULL。
649
650
651 第å五章:内è”弊病
652
653有一个常è§çš„误解是内è”函数是 gcc æä¾›çš„å¯ä»¥è®©ä»£ç è¿è¡Œæ›´å¿«çš„一个选项。虽然使用内è”
654函数有时候是æ°å½“çš„ï¼ˆæ¯”å¦‚ä½œä¸ºä¸€ç§æ›¿ä»£å®çš„æ–¹å¼ï¼Œè¯·çœ‹ç¬¬å二章),ä¸è¿‡å¾ˆå¤šæƒ…况䏋䏿˜¯
655这样。inline 关键字的过度使用会使内核å˜å¤§ï¼Œä»Žè€Œä½¿æ•´ä¸ªç³»ç»Ÿè¿è¡Œé€Ÿåº¦å˜æ…¢ã€‚因为大内核
656会å ç”¨æ›´å¤šçš„æŒ‡ä»¤é«˜é€Ÿç¼“存(译注:一级缓存通常是指令缓存和数æ®ç¼“存分开的)而且会导
657致 pagecache çš„å¯ç”¨å†…å­˜å‡å°‘。想象一下,一次pagecache未命中就会导致一次ç£ç›˜å¯»å€ï¼Œ
658将耗时 5 毫秒。5 毫秒的时间内 CPU 能执行很多很多指令。
659
660一个基本的原则是如果一个函数有 3 行以上,就ä¸è¦æŠŠå®ƒå˜æˆå†…è”函数。这个原则的一个例
661å¤–æ˜¯ï¼Œå¦‚æžœä½ çŸ¥é“æŸä¸ªå‚数是一个编译时常é‡ï¼Œè€Œä¸”因为这个常é‡ä½ ç¡®å®šç¼–译器在编译时能
662优化掉你的函数的大部分代ç ï¼Œé‚£ä»ç„¶å¯ä»¥ç»™å®ƒåŠ ä¸Š inline 关键字。kmalloc() 内è”函数就
663是一个很好的例å­ã€‚
664
665人们ç»å¸¸ä¸»å¼ ç»™ static 的而且åªç”¨äº†ä¸€æ¬¡çš„函数加上 inline,如此ä¸ä¼šæœ‰ä»»ä½•æŸå¤±ï¼Œå› ä¸ºæ²¡
666有什么好æƒè¡¡çš„ã€‚è™½ç„¶ä»ŽæŠ€æœ¯ä¸Šè¯´è¿™æ˜¯æ­£ç¡®çš„ï¼Œä½†æ˜¯å®žé™…ä¸Šè¿™ç§æƒ…况下å³ä½¿ä¸åŠ  inline gcc
667也å¯ä»¥è‡ªåŠ¨ä½¿å…¶å†…è”。而且其他用户å¯èƒ½ä¼šè¦æ±‚移除 inline,由此而æ¥çš„争论会抵消 inline
668自身的潜在价值,得ä¸å¿å¤±ã€‚
669
670
671 第å六章:函数返回值åŠå‘½å
672
673函数å¯ä»¥è¿”回很多ç§ä¸åŒç±»åž‹çš„值,最常è§çš„ä¸€ç§æ˜¯è¡¨æ˜Žå‡½æ•°æ‰§è¡ŒæˆåŠŸæˆ–è€…å¤±è´¥çš„å€¼ã€‚è¿™æ ·
674的一个值å¯ä»¥è¡¨ç¤ºä¸ºä¸€ä¸ªé”™è¯¯ä»£ç æ•´æ•°ï¼ˆ-Exxxï¼å¤±è´¥ï¼Œ0ï¼æˆåŠŸï¼‰æˆ–è€…ä¸€ä¸ªâ€œæˆåŠŸâ€å¸ƒå°”值(
6750ï¼å¤±è´¥ï¼Œéž0ï¼æˆåŠŸï¼‰ã€‚
676
677æ··åˆä½¿ç”¨è¿™ä¸¤ç§è¡¨è¾¾æ–¹å¼æ˜¯éš¾äºŽå‘现的 bug çš„æ¥æºã€‚如果 C 语言本身严格区分整形和布尔型å˜
678é‡ï¼Œé‚£ä¹ˆç¼–译器就能够帮我们å‘现这些错误……ä¸è¿‡ C 语言ä¸åŒºåˆ†ã€‚为了é¿å…äº§ç”Ÿè¿™ç§ bug,请
679éµå¾ªä¸‹é¢çš„æƒ¯ä¾‹ï¼š
680
681 如果函数的åå­—æ˜¯ä¸€ä¸ªåŠ¨ä½œæˆ–è€…å¼ºåˆ¶æ€§çš„å‘½ä»¤ï¼Œé‚£ä¹ˆè¿™ä¸ªå‡½æ•°åº”è¯¥è¿”å›žé”™è¯¯ä»£ç æ•´
682 数。如果是一个判断,那么函数应该返回一个“æˆåŠŸâ€å¸ƒå°”值。
683
684比如,“add work†是一个命令,所以 add_work() 函数在æˆåŠŸæ—¶è¿”å›ž 0,在失败时返回 -EBUSY。
685类似的,因为 “PCI device present†是一个判断,所以 pci_dev_present() 函数在æˆåŠŸæ‰¾åˆ°
686一个匹é…的设备时应该返回 1,如果找ä¸åˆ°æ—¶åº”该返回 0。
687
688所有导出(译注:EXPORT)的函数都必须éµå®ˆè¿™ä¸ªæƒ¯ä¾‹ï¼Œæ‰€æœ‰çš„公共函数也都应该如此。ç§
689有(static)函数ä¸éœ€è¦å¦‚此,但是我们也推è这样åšã€‚
690
691è¿”å›žå€¼æ˜¯å®žé™…è®¡ç®—ç»“æžœè€Œä¸æ˜¯è®¡ç®—æ˜¯å¦æˆåŠŸçš„æ ‡å¿—çš„å‡½æ•°ä¸å—此惯例的é™åˆ¶ã€‚一般的,他们
692通过返回一些正常值范围之外的结果æ¥è¡¨ç¤ºå‡ºé”™ã€‚å…¸åž‹çš„ä¾‹å­æ˜¯è¿”回指针的函数,他们使用
693NULL 或者 ERR_PTR æœºåˆ¶æ¥æŠ¥å‘Šé”™è¯¯ã€‚
694
695
696 第å七章:ä¸è¦é‡æ–°å‘明内核å®
697
698头文件 include/linux/kernel.h 包å«äº†ä¸€äº›å®ï¼Œä½ åº”该使用它们,而ä¸è¦è‡ªå·±å†™ä¸€äº›å®ƒä»¬çš„
699å˜ç§ã€‚比如,如果你需è¦è®¡ç®—一个数组的长度,使用这个å®
700
701 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
702
703类似的,如果你è¦è®¡ç®—æŸç»“构体æˆå‘˜çš„大å°ï¼Œä½¿ç”¨
704
705 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
706
707还有å¯ä»¥åšä¸¥æ ¼çš„类型检查的 min() å’Œ max() å®ï¼Œå¦‚果你需è¦å¯ä»¥ä½¿ç”¨å®ƒä»¬ã€‚ä½ å¯ä»¥è‡ªå·±çœ‹çœ‹
708那个头文件里还定义了什么你å¯ä»¥æ‹¿æ¥ç”¨çš„东西,如果有定义的è¯ï¼Œä½ å°±ä¸åº”在你的代ç é‡Œ
709è‡ªå·±é‡æ–°å®šä¹‰ã€‚
710
711
712 第å八章:编辑器模å¼è¡Œå’Œå…¶ä»–需è¦ç½—嗦的事情
713
714有一些编辑器å¯ä»¥è§£é‡ŠåµŒå…¥åœ¨æºæ–‡ä»¶é‡Œçš„由一些特殊标记标明的é…置信æ¯ã€‚比如,emacs
715能够解释被标记æˆè¿™æ ·çš„行:
716
717 -*- mode: c -*-
718
719或者这样的:
720
721 /*
722 Local Variables:
723 compile-command: "gcc -DMAGIC_DEBUG_FLAG foo.c"
724 End:
725 */
726
727Vim 能够解释这样的标记:
728
729 /* vim:set sw=8 noet */
730
731ä¸è¦åœ¨æºä»£ç ä¸­åŒ…å«ä»»ä½•这样的内容。æ¯ä¸ªäººéƒ½æœ‰ä»–自己的编辑器é…ç½®ï¼Œä½ çš„æºæ–‡ä»¶ä¸åº”
732该覆盖别人的é…置。这包括有关缩进和模å¼é…置的标记。人们å¯ä»¥ä½¿ç”¨ä»–们自己定制的模
733å¼ï¼Œæˆ–者使用其他å¯ä»¥äº§ç”Ÿæ­£ç¡®çš„缩进的巧妙方法。
734
735
736 第åä¹ç« ï¼šå†…è”æ±‡ç¼–
737
738在特定架构的代ç ä¸­ï¼Œä½ ä¹Ÿè®¸éœ€è¦å†…è”æ±‡ç¼–æ¥ä½¿ç”¨ CPU 接å£å’Œå¹³å°ç›¸å…³åŠŸèƒ½ã€‚åœ¨éœ€è¦
739è¿™ä¹ˆåšæ—¶ï¼Œä¸è¦çŠ¹è±«ã€‚ç„¶è€Œï¼Œå½“ C å¯ä»¥å®Œæˆå·¥ä½œæ—¶ï¼Œä¸è¦æ— ç«¯åœ°ä½¿ç”¨å†…è”æ±‡ç¼–。如果
740å¯èƒ½ï¼Œä½ å¯ä»¥å¹¶ä¸”应该用 C 和硬件交互。
741
742è€ƒè™‘åŽ»å†™é€šç”¨ä¸€ç‚¹çš„å†…è”æ±‡ç¼–ä½œä¸ºç®€æ˜Žçš„è¾…åŠ©å‡½æ•°ï¼Œè€Œä¸æ˜¯é‡å¤å†™ä¸‹å®ƒä»¬çš„细节。记ä½
743å†…è”æ±‡ç¼–å¯ä»¥ä½¿ç”¨ C 傿•°ã€‚
744
745大而特殊的汇编函数应该放在 .S 文件中,对应 C 的原型定义在 C 头文件中。汇编
746函数的 C 原型应该使用 “asmlinkageâ€ã€‚
747
748ä½ å¯èƒ½éœ€è¦å°†ä½ çš„æ±‡ç¼–è¯­å¥æ ‡è®°ä¸º volatile,æ¥é˜»æ­¢ GCC 在没å‘现任何副作用åŽå°±
749移除了它。你ä¸å¿…总是这样åšï¼Œè™½ç„¶ï¼Œè¿™æ ·å¯ä»¥é™åˆ¶ä¸å¿…è¦çš„优化。
750
751在写一个包å«å¤šæ¡æŒ‡ä»¤çš„å•ä¸ªå†…è”æ±‡ç¼–è¯­å¥æ—¶ï¼ŒæŠŠæ¯æ¡æŒ‡ä»¤ç”¨å¼•å·å­—符串分离,并写在
752å•独一行,在æ¯ä¸ªå­—符串结尾,除了 \n\t 结尾之外,在汇编输出中适当地缩进下
753ä¸€æ¡æŒ‡ä»¤ï¼š
754
755 asm ("magic %reg1, #42\n\t"
756 "more_magic %reg2, %reg3"
757 : /* outputs */ : /* inputs */ : /* clobbers */);
758
759
760 第二å章:æ¡ä»¶ç¼–译
761
762åªè¦å¯èƒ½ï¼Œå°±ä¸è¦åœ¨ .c 文件里é¢ä½¿ç”¨é¢„å¤„ç†æ¡ä»¶ï¼›è¿™æ ·åšè®©ä»£ç æ›´éš¾é˜…读并且逻辑难以
763跟踪。替代方案是,在头文件定义函数在这些 .c 文件中使用这类的æ¡ä»¶è¡¨è¾¾å¼ï¼Œæä¾›ç©º
764æ“作的桩版本(译注:桩程åºï¼Œæ˜¯æŒ‡ç”¨æ¥æ›¿æ¢ä¸€éƒ¨åˆ†åŠŸèƒ½çš„ç¨‹åºæ®µï¼‰åœ¨ #else 情况下,
765å†ä»Ž .c 文件中无æ¡ä»¶åœ°è°ƒç”¨è¿™äº›å‡½æ•°ã€‚编译器会é¿å…生æˆä»»ä½•桩调用的代ç ï¼Œäº§ç”Ÿä¸€è‡´
766的结果,但逻辑将更加清晰。
767
768å®å¯ç¼–è¯‘æ•´ä¸ªå‡½æ•°ï¼Œè€Œä¸æ˜¯éƒ¨åˆ†å‡½æ•°æˆ–部分表达å¼ã€‚è€Œä¸æ˜¯åœ¨ä¸€ä¸ªè¡¨è¾¾å¼æ·»åŠ  ifdef,
769è§£æžéƒ¨åˆ†æˆ–全部表达å¼åˆ°ä¸€ä¸ªå•独的辅助函数,并应用æ¡ä»¶åˆ°è¯¥å‡½æ•°å†…。
770
771如果你有一个在特定é…置中å¯èƒ½æ˜¯æœªä½¿ç”¨çš„函数或å˜é‡ï¼Œç¼–译器将警告它定义了但未使用,
772标记这个定义为 __maybe_unused è€Œä¸æ˜¯å°†å®ƒåŒ…å«åœ¨ä¸€ä¸ªé¢„å¤„ç†æ¡ä»¶ä¸­ã€‚(然而,如果
773一个函数或å˜é‡æ€»æ˜¯æœªä½¿ç”¨çš„,就直接删除它。)
774
775在代ç ä¸­ï¼Œå¯èƒ½çš„æƒ…况下,使用 IS_ENABLED 宿¥è½¬åŒ–æŸä¸ª Kconfig 标记为 C 的布尔
776表达å¼ï¼Œå¹¶åœ¨æ­£å¸¸çš„ C æ¡ä»¶ä¸­ä½¿ç”¨å®ƒï¼š
777
778 if (IS_ENABLED(CONFIG_SOMETHING)) {
779 ...
780 }
781
782编译器会无æ¡ä»¶åœ°åšå¸¸æ•°åˆå¹¶ï¼Œå°±åƒä½¿ç”¨ #ifdef é‚£æ ·ï¼ŒåŒ…å«æˆ–排除代ç å—,所以这ä¸ä¼š
783带æ¥ä»»ä½•è¿è¡Œæ—¶å¼€é”€ã€‚ç„¶è€Œï¼Œè¿™ç§æ–¹æ³•便—§å…许 C 编译器查看å—内的代ç ï¼Œå¹¶æ£€æŸ¥å®ƒçš„æ­£ç¡®
784性(语法,类型,符å·å¼•用,等等)。因此,如果æ¡ä»¶ä¸æ»¡è¶³ï¼Œä»£ç å—内的引用符å·å°†ä¸å­˜åœ¨ï¼Œ
785你必须继续使用 #ifdef。
786
787在任何有æ„义的 #if 或 #ifdef å—的末尾(超过几行),在 #endif åŒä¸€è¡Œçš„åŽé¢å†™ä¸‹
788注释,指出该æ¡ä»¶è¡¨è¾¾å¼è¢«ä½¿ç”¨ã€‚例如:
789
790 #ifdef CONFIG_SOMETHING
791 ...
792 #endif /* CONFIG_SOMETHING */
793
794
795 附录 I:å‚考
796
797The C Programming Language, 第二版
798作者:Brian W. Kernighan 和 Denni M. Ritchie.
799Prentice Hall, Inc., 1988.
800ISBN 0-13-110362-8 (软皮), 0-13-110370-9 (硬皮).
801
802The Practice of Programming
803作者:Brian W. Kernighan 和 Rob Pike.
804Addison-Wesley, Inc., 1999.
805ISBN 0-201-61586-X.
806
807GNU 手册 - éµå¾ª K&R 标准和此文本 - cpp, gcc, gcc internals and indent,
808都å¯ä»¥ä»Ž http://www.gnu.org/manual/ 找到
809
810WG14是C语言的国际标准化工作组,URL: http://www.open-std.org/JTC1/SC22/WG14/
811
812Kernel process/coding-style.rst,作者 greg@kroah.com å‘表于OLS 2002:
813http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
diff --git a/Documentation/translations/zh_CN/coding-style.rst b/Documentation/translations/zh_CN/coding-style.rst
new file mode 100644
index 000000000000..1466aa64b8b4
--- /dev/null
+++ b/Documentation/translations/zh_CN/coding-style.rst
@@ -0,0 +1,950 @@
1Chinese translated version of Documentation/process/coding-style.rst
2
3If you have any comment or update to the content, please post to LKML directly.
4However, if you have problem communicating in English you can also ask the
5Chinese maintainer for help. Contact the Chinese maintainer, if this
6translation is outdated or there is problem with translation.
7
8Chinese maintainer: Zhang Le <r0bertz@gentoo.org>
9
10---------------------------------------------------------------------
11
12Documentation/process/coding-style.rst 的中文翻译
13
14如果想评论或更新本文的内容,请直接å‘信到LKMLã€‚å¦‚æžœä½ ä½¿ç”¨è‹±æ–‡äº¤æµæœ‰å›°éš¾çš„è¯ï¼Œ
15也å¯ä»¥å‘中文版维护者求助。如果本翻译更新ä¸åŠæ—¶æˆ–者翻译存在问题,请è”系中文版
16维护者::
17
18 中文版维护者: å¼ ä¹ Zhang Le <r0bertz@gentoo.org>
19 中文版翻译者: å¼ ä¹ Zhang Le <r0bertz@gentoo.org>
20 中文版校译者: çŽ‹èª Wang Cong <xiyou.wangcong@gmail.com>
21 wheelz <kernel.zeng@gmail.com>
22 管旭东 Xudong Guan <xudong.guan@gmail.com>
23 Li Zefan <lizf@cn.fujitsu.com>
24 Wang Chen <wangchen@cn.fujitsu.com>
25
26以下为正文
27
28---------------------------------------------------------------------
29
30Linux 内核代ç é£Žæ ¼
31=========================
32
33这是一个简短的文档,æè¿°äº† linux 内核的首选代ç é£Žæ ¼ã€‚代ç é£Žæ ¼æ˜¯å› äººè€Œå¼‚的,
34è€Œä¸”æˆ‘ä¸æ„¿æ„æŠŠè‡ªå·±çš„è§‚ç‚¹å¼ºåŠ ç»™ä»»ä½•äººï¼Œä½†è¿™å°±åƒæˆ‘去åšä»»ä½•事情都必须éµå¾ªçš„原则
35那样,我也希望在ç»å¤§å¤šæ•°äº‹ä¸Šä¿æŒè¿™ç§çš„æ€åº¦ã€‚è¯· (åœ¨å†™ä»£ç æ—¶) 至少考虑一下这里
36的代ç é£Žæ ¼ã€‚
37
38首先,我建议你打å°ä¸€ä»½ GNU 代ç è§„范,然åŽä¸è¦è¯»ã€‚烧了它,这是一个具有é‡å¤§è±¡å¾
39性æ„义的动作。
40
41ä¸ç®¡æ€Žæ ·ï¼ŒçŽ°åœ¨æˆ‘ä»¬å¼€å§‹ï¼š
42
43
441) 缩进
45--------------
46
47制表符是 8 个字符,所以缩进也是 8 个字符。有些异端è¿åŠ¨è¯•å›¾å°†ç¼©è¿›å˜ä¸º 4 (甚至
482ï¼) 字符深,这几乎相当于å°è¯•将圆周率的值定义为 3。
49
50ç†ç”±ï¼šç¼©è¿›çš„全部æ„义就在于清楚的定义一个控制å—起止于何处。尤其是当你盯ç€ä½ çš„
51å±å¹•连续看了 20 å°æ—¶ä¹‹åŽï¼Œä½ å°†ä¼šå‘现大一点的缩进会使你更容易分辨缩进。
52
53现在,有些人会抱怨 8 个字符的缩进会使代ç å‘å³è¾¹ç§»åŠ¨çš„å¤ªè¿œï¼Œåœ¨ 80 个字符的终端
54å±å¹•上就很难读这样的代ç ã€‚è¿™ä¸ªé—®é¢˜çš„ç­”æ¡ˆæ˜¯ï¼Œå¦‚æžœä½ éœ€è¦ 3 级以上的缩进,ä¸ç®¡ç”¨
55ä½•ç§æ–¹å¼ä½ çš„代ç å·²ç»æœ‰é—®é¢˜äº†ï¼Œåº”该修正你的程åºã€‚
56
57简而言之,8 个字符的缩进å¯ä»¥è®©ä»£ç æ›´å®¹æ˜“阅读,还有一个好处是当你的函数嵌套太
58深的时候å¯ä»¥ç»™ä½ è­¦å‘Šã€‚留心这个警告。
59
60在 switch 语å¥ä¸­æ¶ˆé™¤å¤šçº§ç¼©è¿›çš„é¦–é€‰çš„æ–¹å¼æ˜¯è®© ``switch`` 和从属于它的 ``case``
61标签对é½äºŽåŒä¸€åˆ—,而ä¸è¦ ``两次缩进`` ``case`` 标签。比如:
62
63.. code-block:: c
64
65 switch (suffix) {
66 case 'G':
67 case 'g':
68 mem <<= 30;
69 break;
70 case 'M':
71 case 'm':
72 mem <<= 20;
73 break;
74 case 'K':
75 case 'k':
76 mem <<= 10;
77 /* fall through */
78 default:
79 break;
80 }
81
82ä¸è¦æŠŠå¤šä¸ªè¯­å¥æ”¾åœ¨ä¸€è¡Œé‡Œï¼Œé™¤éžä½ æœ‰ä»€ä¹ˆä¸œè¥¿è¦éšè—:
83
84.. code-block:: c
85
86 if (condition) do_this;
87 do_something_everytime;
88
89也ä¸è¦åœ¨ä¸€è¡Œé‡Œæ”¾å¤šä¸ªèµ‹å€¼è¯­å¥ã€‚内核代ç é£Žæ ¼è¶…级简å•。就是é¿å…å¯èƒ½å¯¼è‡´åˆ«äººè¯¯è¯»
90的表达å¼ã€‚
91
92é™¤äº†æ³¨é‡Šã€æ–‡æ¡£å’Œ Kconfig 之外,ä¸è¦ä½¿ç”¨ç©ºæ ¼æ¥ç¼©è¿›ï¼Œå‰é¢çš„例孿˜¯ä¾‹å¤–,是有æ„为
93之。
94
95选用一个好的编辑器,ä¸è¦åœ¨è¡Œå°¾ç•™ç©ºæ ¼ã€‚
96
97
982) 把长的行和字符串打散
99------------------------------
100
101代ç é£Žæ ¼çš„æ„ä¹‰å°±åœ¨äºŽä½¿ç”¨å¹³å¸¸ä½¿ç”¨çš„å·¥å…·æ¥ç»´æŒä»£ç çš„å¯è¯»æ€§å’Œå¯ç»´æŠ¤æ€§ã€‚
102
103æ¯ä¸€è¡Œçš„长度的é™åˆ¶æ˜¯ 80 列,我们强烈建议您éµå®ˆè¿™ä¸ªæƒ¯ä¾‹ã€‚
104
105长于 80 列的语å¥è¦æ‰“æ•£æˆæœ‰æ„义的片段。除éžè¶…过 80 列能显著增加å¯è¯»æ€§ï¼Œå¹¶ä¸”ä¸
106会éšè—ä¿¡æ¯ã€‚å­ç‰‡æ®µè¦æ˜Žæ˜¾çŸ­äºŽæ¯ç‰‡æ®µï¼Œå¹¶æ˜Žæ˜¾é å³ã€‚è¿™åŒæ ·é€‚用于有ç€å¾ˆé•¿å‚数列表
107的函数头。然而,ç»å¯¹ä¸è¦æ‰“散对用户å¯è§çš„字符串,例如 printk ä¿¡æ¯ï¼Œå› ä¸ºè¿™æ ·å°±
108很难对它们 grep。
109
110
1113) 大括å·å’Œç©ºæ ¼çš„æ”¾ç½®
112------------------------------
113
114C 语言风格中å¦å¤–一个常è§é—®é¢˜æ˜¯å¤§æ‹¬å·çš„æ”¾ç½®ã€‚和缩进大å°ä¸åŒï¼Œé€‰æ‹©æˆ–弃用æŸç§æ”¾
115置策略并没有多少技术上的原因,ä¸è¿‡é¦–选的方å¼ï¼Œå°±åƒ Kernighan å’Œ Ritchie 展示
116ç»™æˆ‘ä»¬çš„ï¼Œæ˜¯æŠŠèµ·å§‹å¤§æ‹¬å·æ”¾åœ¨è¡Œå°¾ï¼Œè€ŒæŠŠç»“æŸå¤§æ‹¬å·æ”¾åœ¨è¡Œé¦–,所以:
117
118.. code-block:: c
119
120 if (x is true) {
121 we do y
122 }
123
124这适用于所有的éžå‡½æ•°è¯­å¥å— (if, switch, for, while, do)。比如:
125
126.. code-block:: c
127
128 switch (action) {
129 case KOBJ_ADD:
130 return "add";
131 case KOBJ_REMOVE:
132 return "remove";
133 case KOBJ_CHANGE:
134 return "change";
135 default:
136 return NULL;
137 }
138
139ä¸è¿‡ï¼Œæœ‰ä¸€ä¸ªä¾‹å¤–ï¼Œé‚£å°±æ˜¯å‡½æ•°ï¼šå‡½æ•°çš„èµ·å§‹å¤§æ‹¬å·æ”¾ç½®äºŽä¸‹ä¸€è¡Œçš„开头,所以:
140
141.. code-block:: c
142
143 int function(int x)
144 {
145 body of function
146 }
147
148全世界的异端å¯èƒ½ä¼šæŠ±æ€¨è¿™ä¸ªä¸ä¸€è‡´æ€§æ˜¯... 呃... ä¸ä¸€è‡´çš„,ä¸è¿‡æ‰€æœ‰æ€ç»´å¥å…¨çš„人
149éƒ½çŸ¥é“ (a) K&R 是 **正确的** 并且 (b) K&R 是正确的。此外,ä¸ç®¡æ€Žæ ·å‡½æ•°éƒ½æ˜¯ç‰¹
150殊的 (C 函数是ä¸èƒ½åµŒå¥—çš„)。
151
152注æ„结æŸå¤§æ‹¬å·ç‹¬è‡ªå æ®ä¸€è¡Œï¼Œé™¤éžå®ƒåŽé¢è·Ÿç€åŒä¸€ä¸ªè¯­å¥çš„剩余部分,也就是 do 语
153å¥ä¸­çš„ "while" 或者 if 语å¥ä¸­çš„ "else",åƒè¿™æ ·ï¼š
154
155.. code-block:: c
156
157 do {
158 body of do-loop
159 } while (condition);
160
161和
162
163.. code-block:: c
164
165 if (x == y) {
166 ..
167 } else if (x > y) {
168 ...
169 } else {
170 ....
171 }
172
173ç†ç”±ï¼šK&R。
174
175也请注æ„è¿™ç§å¤§æ‹¬å·çš„æ”¾ç½®æ–¹å¼ä¹Ÿèƒ½ä½¿ç©º (或者差ä¸å¤šç©ºçš„) è¡Œçš„æ•°é‡æœ€å°åŒ–ï¼ŒåŒæ—¶ä¸
176失å¯è¯»æ€§ã€‚因此,由于你的å±å¹•上的新行是ä¸å¯å†ç”Ÿèµ„æº (想想 25 行的终端å±å¹•),你
177å°†ä¼šæœ‰æ›´å¤šçš„ç©ºè¡Œæ¥æ”¾ç½®æ³¨é‡Šã€‚
178
179å½“åªæœ‰ä¸€ä¸ªå•独的语å¥çš„æ—¶å€™ï¼Œä¸ç”¨åŠ ä¸å¿…è¦çš„大括å·ã€‚
180
181.. code-block:: c
182
183 if (condition)
184 action();
185
186和
187
188.. code-block:: c
189
190 if (condition)
191 do_this();
192 else
193 do_that();
194
195这并ä¸é€‚ç”¨äºŽåªæœ‰ä¸€ä¸ªæ¡ä»¶åˆ†æ”¯æ˜¯å•语å¥çš„æƒ…况;这时所有分支都è¦ä½¿ç”¨å¤§æ‹¬å·ï¼š
196
197.. code-block:: c
198
199 if (condition) {
200 do_this();
201 do_that();
202 } else {
203 otherwise();
204 }
205
2063.1) 空格
207********************
208
209Linux å†…æ ¸çš„ç©ºæ ¼ä½¿ç”¨æ–¹å¼ (主è¦) å–决于它是用于函数还是关键字。(大多数) 关键字
210åŽè¦åŠ ä¸€ä¸ªç©ºæ ¼ã€‚å€¼å¾—æ³¨æ„的例外是 sizeof, typeof, alignof å’Œ __attribute__,这
211些关键字æŸäº›ç¨‹åº¦ä¸Šçœ‹èµ·æ¥æ›´åƒå‡½æ•° (它们在 Linux 里也常常伴éšå°æ‹¬å·è€Œä½¿ç”¨ï¼Œå°½ç®¡
212在 C é‡Œè¿™æ ·çš„å°æ‹¬å·ä¸æ˜¯å¿…éœ€çš„ï¼Œå°±åƒ ``struct fileinfo info;`` 声明过åŽçš„
213``sizeof info``)。
214
215æ‰€ä»¥åœ¨è¿™äº›å…³é”®å­—ä¹‹åŽæ”¾ä¸€ä¸ªç©ºæ ¼::
216
217 if, switch, case, for, do, while
218
219但是ä¸è¦åœ¨ sizeof, typeof, alignof 或者 __attribute__ è¿™äº›å…³é”®å­—ä¹‹åŽæ”¾ç©ºæ ¼ã€‚
220例如,
221
222.. code-block:: c
223
224 s = sizeof(struct file);
225
226ä¸è¦åœ¨å°æ‹¬å·é‡Œçš„表达å¼ä¸¤ä¾§åŠ ç©ºæ ¼ã€‚è¿™æ˜¯ä¸€ä¸ª **å例** :
227
228.. code-block:: c
229
230 s = sizeof( struct file );
231
232当声明指针类型或者返回指针类型的函数时, ``*`` çš„é¦–é€‰ä½¿ç”¨æ–¹å¼æ˜¯ä½¿ä¹‹é è¿‘å˜é‡å
233或者函数åï¼Œè€Œä¸æ˜¯é è¿‘类型å。例å­ï¼š
234
235.. code-block:: c
236
237 char *linux_banner;
238 unsigned long long memparse(char *ptr, char **retptr);
239 char *match_strdup(substring_t *s);
240
241在大多数二元和三元æ“ä½œç¬¦ä¸¤ä¾§ä½¿ç”¨ä¸€ä¸ªç©ºæ ¼ï¼Œä¾‹å¦‚ä¸‹é¢æ‰€æœ‰è¿™äº›æ“作符::
242
243 = + - < > * / % | & ^ <= >= == != ? :
244
245但是一元æ“作符åŽä¸è¦åŠ ç©ºæ ¼::
246
247 & * + - ~ ! sizeof typeof alignof __attribute__ defined
248
249åŽç¼€è‡ªåŠ å’Œè‡ªå‡ä¸€å…ƒæ“作符å‰ä¸åŠ ç©ºæ ¼::
250
251 ++ --
252
253å‰ç¼€è‡ªåŠ å’Œè‡ªå‡ä¸€å…ƒæ“作符åŽä¸åŠ ç©ºæ ¼::
254
255 ++ --
256
257``.`` å’Œ ``->`` 结构体æˆå‘˜æ“作符å‰åŽä¸åŠ ç©ºæ ¼ã€‚
258
259ä¸è¦åœ¨è¡Œå°¾ç•™ç©ºç™½ã€‚有些å¯ä»¥è‡ªåŠ¨ç¼©è¿›çš„ç¼–è¾‘å™¨ä¼šåœ¨æ–°è¡Œçš„è¡Œé¦–åŠ å…¥é€‚é‡çš„空白,然åŽ
260ä½ å°±å¯ä»¥ç›´æŽ¥åœ¨é‚£ä¸€è¡Œè¾“入代ç ã€‚ä¸è¿‡å‡å¦‚ä½ æœ€åŽæ²¡æœ‰åœ¨é‚£ä¸€è¡Œè¾“入代ç ï¼Œæœ‰äº›ç¼–辑器
261å°±ä¸ä¼šç§»é™¤å·²ç»åŠ å…¥çš„ç©ºç™½ï¼Œå°±åƒä½ æ•…æ„ç•™ä¸‹ä¸€ä¸ªåªæœ‰ç©ºç™½çš„行。包å«è¡Œå°¾ç©ºç™½çš„行就
262这样产生了。
263
264当 git å‘现补ä¸åŒ…å«äº†è¡Œå°¾ç©ºç™½çš„æ—¶å€™ä¼šè­¦å‘Šä½ ï¼Œå¹¶ä¸”å¯ä»¥åº”ä½ çš„è¦æ±‚去掉行尾空白;
265ä¸è¿‡å¦‚果你是正在打一系列补ä¸ï¼Œè¿™æ ·åšä¼šå¯¼è‡´åŽé¢çš„è¡¥ä¸å¤±è´¥ï¼Œå› ä¸ºä½ æ”¹å˜äº†è¡¥ä¸çš„
266上下文。
267
268
2694) 命å
270------------------------------
271
272C 是一个简朴的语言,你的命å也应该这样。和 Modula-2 å’Œ Pascal 程åºå‘˜ä¸åŒï¼Œ
273C 程åºå‘˜ä¸ä½¿ç”¨ç±»ä¼¼ ThisVariableIsATemporaryCounter 这样åŽä¸½çš„å字。C 程åºå‘˜ä¼š
274称那个å˜é‡ä¸º ``tmp`` ,这样写起æ¥ä¼šæ›´å®¹æ˜“,而且至少ä¸ä¼šä»¤å…¶éš¾äºŽç†è§£ã€‚
275
276ä¸è¿‡ï¼Œè™½ç„¶æ··ç”¨å¤§å°å†™çš„åå­—æ˜¯ä¸æå€¡ä½¿ç”¨çš„ï¼Œä½†æ˜¯å…¨å±€å˜é‡è¿˜æ˜¯éœ€è¦ä¸€ä¸ªå…·æè¿°æ€§çš„
277å字。称一个全局函数为 ``foo`` 是一个难以饶æ•的错误。
278
279全局å˜é‡ (åªæœ‰å½“ä½  **真正** 需è¦å®ƒä»¬çš„æ—¶å€™å†ç”¨å®ƒ) éœ€è¦æœ‰ä¸€ä¸ªå…·æè¿°æ€§çš„å字,就
280åƒå…¨å±€å‡½æ•°ã€‚如果你有一个å¯ä»¥è®¡ç®—活动用户数é‡çš„函数,你应该å«å®ƒ
281``count_active_users()`` 或者类似的å字,你ä¸åº”该å«å®ƒ ``cntuser()`` 。
282
283在函数å中包å«å‡½æ•°ç±»åž‹ (æ‰€è°“çš„åŒˆç‰™åˆ©å‘½åæ³•) 是脑å­å‡ºäº†é—®é¢˜â€”—编译器知é“那些类
284型而且能够检查那些类型,这样åšåªèƒ½æŠŠç¨‹åºå‘˜å¼„糊涂了。难怪微软总是制造出有问题
285的程åºã€‚
286
287本地å˜é‡å应该简短,而且能够表达相关的å«ä¹‰ã€‚å¦‚æžœä½ æœ‰ä¸€äº›éšæœºçš„æ•´æ•°åž‹çš„循环计
288数器,它应该被称为 ``i`` 。å«å®ƒ ``loop_counter`` 并无益处,如果它没有被误解的
289å¯èƒ½çš„è¯ã€‚类似的, ``tmp`` å¯ä»¥ç”¨æ¥ç§°å‘¼ä»»æ„类型的临时å˜é‡ã€‚
290
291如果你怕混淆了你的本地å˜é‡å,你就é‡åˆ°å¦ä¸€ä¸ªé—®é¢˜äº†ï¼Œå«åšå‡½æ•°å¢žé•¿è·å°”蒙失衡综
292åˆç—‡ã€‚请看第六章 (函数)。
293
294
2955) Typedef
296-----------
297
298ä¸è¦ä½¿ç”¨ç±»ä¼¼ ``vps_t`` 之类的东西。
299
300对结构体和指针使用 typedef 是一个 **错误** 。当你在代ç é‡Œçœ‹åˆ°ï¼š
301
302.. code-block:: c
303
304 vps_t a;
305
306è¿™ä»£è¡¨ä»€ä¹ˆæ„æ€å‘¢ï¼Ÿ
307
308相å,如果是这样
309
310.. code-block:: c
311
312 struct virtual_container *a;
313
314ä½ å°±çŸ¥é“ ``a`` 是什么了。
315
316很多人认为 typedef ``能æé«˜å¯è¯»æ€§`` ã€‚å®žé™…ä¸æ˜¯è¿™æ ·çš„。它们åªåœ¨ä¸‹åˆ—情况下有用:
317
318 (a) 完全ä¸é€æ˜Žçš„对象 (è¿™ç§æƒ…况下è¦ä¸»åŠ¨ä½¿ç”¨ typedef æ¥ **éšè—** 这个对象实际上
319 是什么)。
320
321 例如: ``pte_t`` ç­‰ä¸é€æ˜Žå¯¹è±¡ï¼Œä½ åªèƒ½ç”¨åˆé€‚的访问函数æ¥è®¿é—®å®ƒä»¬ã€‚
322
323 .. note::
324
325 ä¸é€æ˜Žæ€§å’Œ "访问函数" 本身是ä¸å¥½çš„。我们使用 pte_t 等类型的原因在于真
326 的是完全没有任何共用的å¯è®¿é—®ä¿¡æ¯ã€‚
327
328 (b) 清楚的整数类型,如此,这层抽象就å¯ä»¥ **帮助** 消除到底是 ``int`` 还是
329 ``long`` 的混淆。
330
331 u8/u16/u32 是完全没有问题的 typedef,ä¸è¿‡å®ƒä»¬æ›´ç¬¦åˆç±»åˆ« (d) è€Œä¸æ˜¯è¿™é‡Œã€‚
332
333 .. note::
334
335 è¦è¿™æ ·åšï¼Œå¿…须事出有因。如果æŸä¸ªå˜é‡æ˜¯ ``unsigned long`` ,那么没有必è¦
336
337 typedef unsigned long myflags_t;
338
339 ä¸è¿‡å¦‚果有一个明确的原因,比如它在æŸç§æƒ…况下å¯èƒ½ä¼šæ˜¯ä¸€ä¸ª ``unsigned int``
340 而在其他情况下å¯èƒ½ä¸º ``unsigned long`` ,那么就ä¸è¦çŠ¹è±«ï¼Œè¯·åŠ¡å¿…ä½¿ç”¨
341 typedef。
342
343 (c) 当你使用 sparse 按字é¢çš„创建一个 **æ–°** 类型æ¥åšç±»åž‹æ£€æŸ¥çš„æ—¶å€™ã€‚
344
345 (d) 和标准 C99 类型相åŒçš„类型,在æŸäº›ä¾‹å¤–的情况下。
346
347 虽然让眼ç›å’Œè„‘ç­‹æ¥é€‚应新的标准类型比如 ``uint32_t`` ä¸éœ€è¦èŠ±å¾ˆå¤šæ—¶é—´ï¼Œå¯
348 是有些人ä»ç„¶æ‹’ç»ä½¿ç”¨å®ƒä»¬ã€‚
349
350 因此,Linux 特有的等åŒäºŽæ ‡å‡†ç±»åž‹çš„ ``u8/u16/u32/u64`` 类型和它们的有符å·
351 类型是被å…许的——尽管在你自己的新代ç ä¸­ï¼Œå®ƒä»¬ä¸æ˜¯å¼ºåˆ¶è¦æ±‚è¦ä½¿ç”¨çš„。
352
353 当编辑已ç»ä½¿ç”¨äº†æŸä¸ªç±»åž‹é›†çš„å·²æœ‰ä»£ç æ—¶ï¼Œä½ åº”该éµå¾ªé‚£äº›ä»£ç ä¸­å·²ç»åšå‡ºçš„选
354 择。
355
356 (e) å¯ä»¥åœ¨ç”¨æˆ·ç©ºé—´å®‰å…¨ä½¿ç”¨çš„类型。
357
358 在æŸäº›ç”¨æˆ·ç©ºé—´å¯è§çš„结构体里,我们ä¸èƒ½è¦æ±‚ C99 类型而且ä¸èƒ½ç”¨ä¸Šé¢æåˆ°çš„
359 ``u32`` 类型。因此,我们在与用户空间共享的所有结构体中使用 __u32 和类似
360 的类型。
361
362å¯èƒ½è¿˜æœ‰å…¶ä»–的情况,ä¸è¿‡åŸºæœ¬çš„规则是 **永远ä¸è¦** 使用 typedef,除éžä½ å¯ä»¥æ˜Ž
363确的应用上述æŸä¸ªè§„则中的一个。
364
365总的æ¥è¯´ï¼Œå¦‚果一个指针或者一个结构体里的元素å¯ä»¥åˆç†çš„被直接访问到,那么它们
366å°±ä¸åº”该是一个 typedef。
367
368
3696) 函数
370------------------------------
371
372函数应该简短而漂亮,并且åªå®Œæˆä¸€ä»¶äº‹æƒ…。函数应该å¯ä»¥ä¸€å±æˆ–è€…ä¸¤å±æ˜¾ç¤ºå®Œ (我们
373éƒ½çŸ¥é“ ISO/ANSI å±å¹•大尿˜¯ 80x24),åªåšä¸€ä»¶äº‹æƒ…,而且把它åšå¥½ã€‚
374
375ä¸€ä¸ªå‡½æ•°çš„æœ€å¤§é•¿åº¦æ˜¯å’Œè¯¥å‡½æ•°çš„å¤æ‚度和缩进级数æˆå比的。所以,如果你有一个ç†
376论上很简å•çš„åªæœ‰ä¸€ä¸ªå¾ˆé•¿ (但是简å•) çš„ case 语å¥çš„函数,而且你需è¦åœ¨æ¯ä¸ª case
377里åšå¾ˆå¤šå¾ˆå°çš„事情,这样的函数尽管很长,但也是å¯ä»¥çš„。
378
379ä¸è¿‡ï¼Œå¦‚æžœä½ æœ‰ä¸€ä¸ªå¤æ‚çš„å‡½æ•°ï¼Œè€Œä¸”ä½ æ€€ç–‘ä¸€ä¸ªå¤©åˆ†ä¸æ˜¯å¾ˆé«˜çš„高中一年级学生å¯èƒ½
380甚至æžä¸æ¸…楚这个函数的目的,你应该严格éµå®ˆå‰é¢æåˆ°çš„长度é™åˆ¶ã€‚使用辅助函数,
381并为之å–个具æè¿°æ€§çš„åå­— (如果你觉得它们的性能很é‡è¦çš„è¯ï¼Œå¯ä»¥è®©ç¼–译器内è”它
382ä»¬ï¼Œè¿™æ ·çš„æ•ˆæžœå¾€å¾€ä¼šæ¯”ä½ å†™ä¸€ä¸ªå¤æ‚函数的效果è¦å¥½ã€‚)
383
384函数的å¦å¤–ä¸€ä¸ªè¡¡é‡æ ‡å‡†æ˜¯æœ¬åœ°å˜é‡çš„æ•°é‡ã€‚此数é‡ä¸åº”超过 5ï¼10 个,å¦åˆ™ä½ çš„函数
385å°±æœ‰é—®é¢˜äº†ã€‚é‡æ–°è€ƒè™‘ä¸€ä¸‹ä½ çš„å‡½æ•°ï¼ŒæŠŠå®ƒåˆ†æ‹†æˆæ›´å°çš„函数。人的大脑一般å¯ä»¥è½»æ¾
386çš„åŒæ—¶è·Ÿè¸ª 7 个ä¸åŒçš„事物,如果å†å¢žå¤šçš„è¯ï¼Œå°±ä¼šç³Šæ¶‚了。å³ä¾¿ä½ èªé¢–过人,你也å¯
387èƒ½ä¼šè®°ä¸æ¸…ä½  2 个星期å‰åšè¿‡çš„事情。
388
389åœ¨æºæ–‡ä»¶é‡Œï¼Œä½¿ç”¨ç©ºè¡Œéš”å¼€ä¸åŒçš„函数。如果该函数需è¦è¢«å¯¼å‡ºï¼Œå®ƒçš„ **EXPORT** å®
390应该紧贴在它的结æŸå¤§æ‹¬å·ä¹‹ä¸‹ã€‚比如:
391
392.. code-block:: c
393
394 int system_is_up(void)
395 {
396 return system_state == SYSTEM_RUNNING;
397 }
398 EXPORT_SYMBOL(system_is_up);
399
400在函数原型中,包å«å‡½æ•°å和它们的数æ®ç±»åž‹ã€‚虽然 C è¯­è¨€é‡Œæ²¡æœ‰è¿™æ ·çš„è¦æ±‚,在
401Linux 里这是æå€¡çš„åšæ³•,因为这样å¯ä»¥å¾ˆç®€å•的给读者æä¾›æ›´å¤šçš„æœ‰ä»·å€¼çš„ä¿¡æ¯ã€‚
402
403
4047) 集中的函数退出途径
405------------------------------
406
407虽然被æŸäº›äººå£°ç§°å·²ç»è¿‡æ—¶ï¼Œä½†æ˜¯ goto 语å¥çš„等价物还是ç»å¸¸è¢«ç¼–译器所使用,具体
408形弿˜¯æ— æ¡ä»¶è·³è½¬æŒ‡ä»¤ã€‚
409
410当一个函数从多个ä½ç½®é€€å‡ºï¼Œå¹¶ä¸”需è¦åšä¸€äº›ç±»ä¼¼æ¸…ç†çš„å¸¸è§æ“作时,goto 语å¥å°±å¾ˆæ–¹
411便了。如果并ä¸éœ€è¦æ¸…ç†æ“作,那么直接 return å³å¯ã€‚
412
413选择一个能够说明 goto 行为或它为何存在的标签å。如果 goto è¦é‡Šæ”¾ ``buffer``,
414一个ä¸é”™çš„åå­—å¯ä»¥æ˜¯ ``out_free_buffer:`` ã€‚åˆ«åŽ»ä½¿ç”¨åƒ ``err1:`` å’Œ ``err2:``
415这样的GW_BASIC å称,因为一旦你添加或删除了 (函数的) 退出路径,你就必须对它们
416釿–°ç¼–å·ï¼Œè¿™æ ·ä¼šéš¾ä»¥åŽ»æ£€éªŒæ­£ç¡®æ€§ã€‚
417
418使用 goto çš„ç†ç”±æ˜¯ï¼š
419
420- æ— æ¡ä»¶è¯­å¥å®¹æ˜“ç†è§£å’Œè·Ÿè¸ª
421- 嵌套程度å‡å°
422- å¯ä»¥é¿å…由于修改时忘记更新个别的退出点而导致错误
423- 让编译器çœåŽ»åˆ é™¤å†—ä½™ä»£ç çš„工作 ;)
424
425.. code-block:: c
426
427 int fun(int a)
428 {
429 int result = 0;
430 char *buffer;
431
432 buffer = kmalloc(SIZE, GFP_KERNEL);
433 if (!buffer)
434 return -ENOMEM;
435
436 if (condition1) {
437 while (loop1) {
438 ...
439 }
440 result = 1;
441 goto out_free_buffer;
442 }
443 ...
444 out_free_buffer:
445 kfree(buffer);
446 return result;
447 }
448
449ä¸€ä¸ªéœ€è¦æ³¨æ„的常è§é”™è¯¯æ˜¯ ``一个 err 错误`` ,就åƒè¿™æ ·ï¼š
450
451.. code-block:: c
452
453 err:
454 kfree(foo->bar);
455 kfree(foo);
456 return ret;
457
458这段代ç çš„错误是,在æŸäº›é€€å‡ºè·¯å¾„上 ``foo`` 是 NULL。通常情况下,通过把它分离
459æˆä¸¤ä¸ªé”™è¯¯æ ‡ç­¾ ``err_free_bar:`` å’Œ ``err_free_foo:`` æ¥ä¿®å¤è¿™ä¸ªé”™è¯¯ï¼š
460
461.. code-block:: c
462
463 err_free_bar:
464 kfree(foo->bar);
465 err_free_foo:
466 kfree(foo);
467 return ret;
468
469ç†æƒ³æƒ…å†µä¸‹ï¼Œä½ åº”è¯¥æ¨¡æ‹Ÿé”™è¯¯æ¥æµ‹è¯•所有退出路径。
470
471
4728) 注释
473------------------------------
474
475注释是好的,ä¸è¿‡æœ‰è¿‡åº¦æ³¨é‡Šçš„å±é™©ã€‚永远ä¸è¦åœ¨æ³¨é‡Šé‡Œè§£é‡Šä½ çš„ä»£ç æ˜¯å¦‚何è¿ä½œçš„:
476æ›´å¥½çš„åšæ³•是让别人一看你的代ç å°±å¯ä»¥æ˜Žç™½ï¼Œè§£é‡Šå†™çš„å¾ˆå·®çš„ä»£ç æ˜¯æµªè´¹æ—¶é—´ã€‚
477
478一般的,你想è¦ä½ çš„æ³¨é‡Šå‘Šè¯‰åˆ«äººä½ çš„代ç åšäº†ä»€ä¹ˆï¼Œè€Œä¸æ˜¯æ€Žä¹ˆåšçš„。也请你ä¸è¦æŠŠ
479æ³¨é‡Šæ”¾åœ¨ä¸€ä¸ªå‡½æ•°ä½“å†…éƒ¨ï¼šå¦‚æžœå‡½æ•°å¤æ‚到你需è¦ç‹¬ç«‹çš„æ³¨é‡Šå…¶ä¸­çš„一部分,你很å¯èƒ½
480需è¦å›žåˆ°ç¬¬å…­ç« çœ‹ä¸€çœ‹ã€‚ä½ å¯ä»¥åšä¸€äº›å°æ³¨é‡Šæ¥æ³¨æ˜Žæˆ–警告æŸäº›å¾ˆèªæ˜Ž (或者槽糕) çš„
481åšæ³•,但ä¸è¦åŠ å¤ªå¤šã€‚ä½ åº”è¯¥åšçš„,是把注释放在函数的头部,告诉人们它åšäº†ä»€ä¹ˆï¼Œ
482也å¯ä»¥åŠ ä¸Šå®ƒåšè¿™äº›äº‹æƒ…的原因。
483
484当注释内核 API 函数时,请使用 kernel-doc æ ¼å¼ã€‚请看
485Documentation/doc-guide/ å’Œ scripts/kernel-doc 以获得详细信æ¯ã€‚
486
487长 (多行) 注释的首选风格是:
488
489.. code-block:: c
490
491 /*
492 * This is the preferred style for multi-line
493 * comments in the Linux kernel source code.
494 * Please use it consistently.
495 *
496 * Description: A column of asterisks on the left side,
497 * with beginning and ending almost-blank lines.
498 */
499
500对于在 net/ å’Œ drivers/net/ 的文件,首选的长 (多行) 注释风格有些ä¸åŒã€‚
501
502.. code-block:: c
503
504 /* The preferred comment style for files in net/ and drivers/net
505 * looks like this.
506 *
507 * It is nearly the same as the generally preferred comment style,
508 * but there is no initial almost-blank line.
509 */
510
511注释数æ®ä¹Ÿæ˜¯å¾ˆé‡è¦çš„,ä¸ç®¡æ˜¯åŸºæœ¬ç±»åž‹è¿˜æ˜¯è¡ç”Ÿç±»åž‹ã€‚为了方便实现这一点,æ¯ä¸€è¡Œ
512应åªå£°æ˜Žä¸€ä¸ªæ•°æ® (ä¸è¦ä½¿ç”¨é€—å·æ¥ä¸€æ¬¡å£°æ˜Žå¤šä¸ªæ•°æ®)。这样你就有空间æ¥ä¸ºæ¯ä¸ªæ•°æ®
513å†™ä¸€æ®µå°æ³¨é‡Šæ¥è§£é‡Šå®ƒä»¬çš„用途了。
514
515
5169) ä½ å·²ç»æŠŠäº‹æƒ…å¼„ç³Ÿäº†
517------------------------------
518
519这没什么,我们都是这样。å¯èƒ½ä½ çš„使用了很长时间 Unix 的朋å‹å·²ç»å‘Šè¯‰ä½ 
520``GNU emacs`` 能自动帮你格å¼åŒ– C æºä»£ç ï¼Œè€Œä¸”你也注æ„到了,确实是这样,ä¸è¿‡å®ƒ
521所使用的默认值和我们想è¦çš„相去甚远 (å®žé™…ä¸Šï¼Œç”šè‡³æ¯”éšæœºæ‰“的还è¦å·®â€”—无数个猴å­
522在 GNU emacs 里打字永远ä¸ä¼šåˆ›é€ å‡ºä¸€ä¸ªå¥½ç¨‹åº) (译注:Infinite Monkey Theorem)
523
524所以你è¦ä¹ˆæ”¾å¼ƒ GNU emacs,è¦ä¹ˆæ”¹å˜å®ƒè®©å®ƒä½¿ç”¨æ›´åˆç†çš„设定。è¦é‡‡ç”¨åŽä¸€ä¸ªæ–¹æ¡ˆï¼Œ
525ä½ å¯ä»¥æŠŠä¸‹é¢è¿™æ®µç²˜è´´åˆ°ä½ çš„ .emacs 文件里。
526
527.. code-block:: none
528
529 (defun c-lineup-arglist-tabs-only (ignored)
530 "Line up argument lists by tabs, not spaces"
531 (let* ((anchor (c-langelem-pos c-syntactic-element))
532 (column (c-langelem-2nd-pos c-syntactic-element))
533 (offset (- (1+ column) anchor))
534 (steps (floor offset c-basic-offset)))
535 (* (max steps 1)
536 c-basic-offset)))
537
538 (add-hook 'c-mode-common-hook
539 (lambda ()
540 ;; Add kernel style
541 (c-add-style
542 "linux-tabs-only"
543 '("linux" (c-offsets-alist
544 (arglist-cont-nonempty
545 c-lineup-gcc-asm-reg
546 c-lineup-arglist-tabs-only))))))
547
548 (add-hook 'c-mode-hook
549 (lambda ()
550 (let ((filename (buffer-file-name)))
551 ;; Enable kernel mode for the appropriate files
552 (when (and filename
553 (string-match (expand-file-name "~/src/linux-trees")
554 filename))
555 (setq indent-tabs-mode t)
556 (setq show-trailing-whitespace t)
557 (c-set-style "linux-tabs-only")))))
558
559这会让 emacs 在 ``~/src/linux-trees`` 下的 C æºæ–‡ä»¶èŽ·å¾—æ›´å¥½çš„å†…æ ¸ä»£ç é£Žæ ¼ã€‚
560
561ä¸è¿‡å°±ç®—ä½ å°è¯•让 emacs 正确的格å¼åŒ–代ç å¤±è´¥äº†ï¼Œä¹Ÿå¹¶ä¸æ„味ç€ä½ å¤±åŽ»äº†ä¸€åˆ‡ï¼šè¿˜å¯
562以用 ``indent`` 。
563
564ä¸è¿‡ï¼ŒGNU indent 也有和 GNU emacs 一样有问题的设定,所以你需è¦ç»™å®ƒä¸€äº›å‘½ä»¤é€‰
565项。ä¸è¿‡ï¼Œè¿™è¿˜ä¸ç®—太糟糕,因为就算是 GNU indent çš„ä½œè€…ä¹Ÿè®¤åŒ K&R çš„æƒå¨æ€§
566(GNU çš„äººå¹¶ä¸æ˜¯åäººï¼Œä»–ä»¬åªæ˜¯åœ¨è¿™ä¸ªé—®é¢˜ä¸Šè¢«ä¸¥é‡çš„误导了),所以你åªè¦ç»™ indent
567指定选项 ``-kr -i8`` (代表 ``K&R,8 字符缩进``),或使用 ``scripts/Lindent``
568这样就å¯ä»¥ä»¥æœ€æ—¶é«¦çš„æ–¹å¼ç¼©è¿›æºä»£ç ã€‚
569
570``indent`` æœ‰å¾ˆå¤šé€‰é¡¹ï¼Œç‰¹åˆ«æ˜¯é‡æ–°æ ¼å¼åŒ–注释的时候,你å¯èƒ½éœ€è¦çœ‹ä¸€ä¸‹å®ƒçš„æ‰‹å†Œã€‚
571ä¸è¿‡è®°ä½ï¼š ``indent`` ä¸èƒ½ä¿®æ­£å的编程习惯。
572
573
57410) Kconfig é…置文件
575------------------------------
576
577对于é布æºç æ ‘的所有 Kconfig* é…置文件æ¥è¯´ï¼Œå®ƒä»¬ç¼©è¿›æ–¹å¼æœ‰æ‰€ä¸åŒã€‚紧挨ç€
578``config`` 定义的行,用一个制表符缩进,然而 help ä¿¡æ¯çš„缩进则é¢å¤–增加 2 个空
579格。举个例å­::
580
581 config AUDIT
582 bool "Auditing support"
583 depends on NET
584 help
585 Enable auditing infrastructure that can be used with another
586 kernel subsystem, such as SELinux (which requires this for
587 logging of avc messages output). Does not do system-call
588 auditing without CONFIG_AUDITSYSCALL.
589
590而那些å±é™©çš„功能 (比如æŸäº›æ–‡ä»¶ç³»ç»Ÿçš„写支æŒ) 应该在它们的æç¤ºå­—符串里显著的声
591明这一点::
592
593 config ADFS_FS_RW
594 bool "ADFS write support (DANGEROUS)"
595 depends on ADFS_FS
596 ...
597
598è¦æŸ¥çœ‹é…置文件的完整文档,请看 Documentation/kbuild/kconfig-language.txt。
599
600
60111) æ•°æ®ç»“æž„
602------------------------------
603
604如果一个数æ®ç»“构,在创建和销æ¯å®ƒçš„å•线执行环境之外å¯è§ï¼Œé‚£ä¹ˆå®ƒå¿…é¡»è¦æœ‰ä¸€ä¸ªå¼•
605用计数器。内核里没有垃圾收集 (并且内核之外的垃圾收集慢且效率低下),这æ„味ç€ä½ 
606ç»å¯¹éœ€è¦è®°å½•ä½ å¯¹è¿™ç§æ•°æ®ç»“构的使用情况。
607
608引用计数æ„味ç€ä½ èƒ½å¤Ÿé¿å…上é”,并且å…许多个用户并行访问这个数æ®ç»“构——而ä¸éœ€è¦
609担心这个数æ®ç»“构仅仅因为暂时ä¸è¢«ä½¿ç”¨å°±æ¶ˆå¤±äº†ï¼Œé‚£äº›ç”¨æˆ·å¯èƒ½ä¸è¿‡æ˜¯æ²‰ç¡äº†ä¸€é˜µæˆ–
610者åšäº†ä¸€äº›å…¶ä»–事情而已。
611
612注æ„ä¸Šé” **ä¸èƒ½** å–ä»£å¼•ç”¨è®¡æ•°ã€‚ä¸Šé”æ˜¯ä¸ºäº†ä¿æŒæ•°æ®ç»“构的一致性,而引用计数是一
613ä¸ªå†…å­˜ç®¡ç†æŠ€å·§ã€‚é€šå¸¸äºŒè€…éƒ½éœ€è¦ï¼Œä¸è¦æŠŠä¸¤ä¸ªæžæ··äº†ã€‚
614
615很多数æ®ç»“构实际上有 2 级引用计数,它们通常有ä¸åŒ ``ç±»`` 的用户。å­ç±»è®¡æ•°å™¨ç»Ÿ
616计å­ç±»ç”¨æˆ·çš„æ•°é‡ï¼Œæ¯å½“å­ç±»è®¡æ•°å™¨å‡è‡³é›¶æ—¶ï¼Œå…¨å±€è®¡æ•°å™¨å‡ä¸€ã€‚
617
618è¿™ç§ ``多级引用计数`` 的例å­å¯ä»¥åœ¨å†…å­˜ç®¡ç† (``struct mm_struct``: mm_users å’Œ
619mm_count),和文件系统 (``struct super_block``: s_count 和 s_active) 中找到。
620
621è®°ä½ï¼šå¦‚æžœå¦ä¸€ä¸ªæ‰§è¡Œçº¿ç´¢å¯ä»¥æ‰¾åˆ°ä½ çš„æ•°æ®ç»“构,但这个数æ®ç»“构没有引用计数器,
622这里几乎肯定是一个 bug。
623
624
62512) å®ï¼Œæžšä¸¾å’ŒRTL
626------------------------------
627
628用于定义常é‡çš„å®çš„åå­—åŠæžšä¸¾é‡Œçš„æ ‡ç­¾éœ€è¦å¤§å†™ã€‚
629
630.. code-block:: c
631
632 #define CONSTANT 0x12345
633
634åœ¨å®šä¹‰å‡ ä¸ªç›¸å…³çš„å¸¸é‡æ—¶ï¼Œæœ€å¥½ç”¨æžšä¸¾ã€‚
635
636å®çš„å字请用大写字æ¯ï¼Œä¸è¿‡å½¢å¦‚函数的å®çš„åå­—å¯ä»¥ç”¨å°å†™å­—æ¯ã€‚
637
638一般的,如果能写æˆå†…è”函数就ä¸è¦å†™æˆåƒå‡½æ•°çš„å®ã€‚
639
640嫿œ‰å¤šä¸ªè¯­å¥çš„å®åº”该被包å«åœ¨ä¸€ä¸ª do-while 代ç å—里:
641
642.. code-block:: c
643
644 #define macrofun(a, b, c) \
645 do { \
646 if (a == 5) \
647 do_this(b, c); \
648 } while (0)
649
650使用å®çš„æ—¶å€™åº”é¿å…的事情:
651
6521) å½±å“æŽ§åˆ¶æµç¨‹çš„å®ï¼š
653
654.. code-block:: c
655
656 #define FOO(x) \
657 do { \
658 if (blah(x) < 0) \
659 return -EBUGGERED; \
660 } while (0)
661
662**éžå¸¸** ä¸å¥½ã€‚它看起æ¥åƒä¸€ä¸ªå‡½æ•°ï¼Œä¸è¿‡å´èƒ½å¯¼è‡´ ``调用`` 它的函数退出;ä¸è¦æ‰“
663乱读者大脑里的语法分æžå™¨ã€‚
664
6652) ä¾èµ–于一个固定å字的本地å˜é‡çš„å®ï¼š
666
667.. code-block:: c
668
669 #define FOO(val) bar(index, val)
670
671å¯èƒ½çœ‹èµ·æ¥åƒæ˜¯ä¸ªä¸é”™çš„东西,ä¸è¿‡å®ƒéžå¸¸å®¹æ˜“把读代ç çš„人æžç³Šæ¶‚,而且容易导致看起
672æ¥ä¸ç›¸å…³çš„æ”¹åЍ另æ¥é”™è¯¯ã€‚
673
6743) ä½œä¸ºå·¦å€¼çš„å¸¦å‚æ•°çš„å®ï¼š FOO(x) = y;如果有人把 FOO å˜æˆä¸€ä¸ªå†…è”函数的è¯ï¼Œè¿™
675 ç§ç”¨æ³•就会出错了。
676
6774) 忘记了优先级:使用表达å¼å®šä¹‰å¸¸é‡çš„å®å¿…须将表达å¼ç½®äºŽä¸€å¯¹å°æ‹¬å·ä¹‹å†…ã€‚å¸¦å‚æ•°
678 çš„å®ä¹Ÿè¦æ³¨æ„此类问题。
679
680.. code-block:: c
681
682 #define CONSTANT 0x4000
683 #define CONSTEXP (CONSTANT | 3)
684
6855) 在å®é‡Œå®šä¹‰ç±»ä¼¼å‡½æ•°çš„æœ¬åœ°å˜é‡æ—¶å‘½å冲çªï¼š
686
687.. code-block:: c
688
689 #define FOO(x) \
690 ({ \
691 typeof(x) ret; \
692 ret = calc_ret(x); \
693 (ret); \
694 })
695
696ret 是本地å˜é‡çš„通用åå­— - __foo_ret æ›´ä¸å®¹æ˜“与一个已存在的å˜é‡å†²çªã€‚
697
698cpp 手册对å®çš„讲解很详细。gcc internals 手册也详细讲解了 RTL,内核里的汇编语
699言ç»å¸¸ç”¨åˆ°å®ƒã€‚
700
701
70213) 打å°å†…核消æ¯
703------------------------------
704
705内核开å‘者应该是å—过良好教育的。请一定注æ„内核信æ¯çš„æ‹¼å†™ï¼Œä»¥ç»™äººä»¥å¥½çš„å°è±¡ã€‚
706ä¸è¦ç”¨ä¸è§„范的å•è¯æ¯”如 ``dont``,而è¦ç”¨ ``do not`` 或者 ``don't`` 。ä¿è¯è¿™äº›ä¿¡
707æ¯ç®€å•明了,无歧义。
708
709内核信æ¯ä¸å¿…以英文å¥å·ç»“æŸã€‚
710
711åœ¨å°æ‹¬å·é‡Œæ‰“å°æ•°å­— (%d) 没有任何价值,应该é¿å…这样åšã€‚
712
713<linux/device.h> 里有一些驱动模型诊断å®ï¼Œä½ åº”该使用它们,以确ä¿ä¿¡æ¯å¯¹åº”于正确
714的设备和驱动,并且被标记了正确的消æ¯çº§åˆ«ã€‚è¿™äº›å®æœ‰ï¼šdev_err(), dev_warn(),
715dev_info() 等等。对于那些ä¸å’ŒæŸä¸ªç‰¹å®šè®¾å¤‡ç›¸å…³è¿žçš„ä¿¡æ¯ï¼Œ<linux/printk.h> 定义
716了 pr_notice(), pr_info(), pr_warn(), pr_err() 和其他。
717
718写出好的调试信æ¯å¯ä»¥æ˜¯ä¸€ä¸ªå¾ˆå¤§çš„æŒ‘战;一旦你写出åŽï¼Œè¿™äº›ä¿¡æ¯åœ¨è¿œç¨‹é™¤é”™æ—¶èƒ½æ
719ä¾›æžå¤§çš„帮助。然而打å°è°ƒè¯•ä¿¡æ¯çš„å¤„ç†æ–¹å¼åŒæ‰“å°éžè°ƒè¯•ä¿¡æ¯ä¸åŒã€‚å…¶ä»– pr_XXX()
720函数能无æ¡ä»¶åœ°æ‰“å°ï¼Œpr_debug() å´ä¸ï¼›é»˜è®¤æƒ…况下它ä¸ä¼šè¢«ç¼–译,除éžå®šä¹‰äº† DEBUG
721或设定了 CONFIG_DYNAMIC_DEBUGã€‚å®žé™…è¿™åŒæ ·æ˜¯ä¸ºäº† dev_dbg(),一个相关约定是在一
722个已ç»å¼€å¯äº† DEBUG 时,使用 VERBOSE_DEBUG æ¥æ·»åŠ  dev_vdbg()。
723
724许多å­ç³»ç»Ÿæ‹¥æœ‰ Kconfig 调试选项æ¥å¼€å¯ -DDEBUG 在对应的 Makefile 里é¢ï¼›åœ¨å…¶ä»–
725情况下,特殊文件使用 #define DEBUG。当一æ¡è°ƒè¯•ä¿¡æ¯éœ€è¦è¢«æ— æ¡ä»¶æ‰“å°æ—¶ï¼Œä¾‹å¦‚,
726如果已ç»åŒ…å«ä¸€ä¸ªè°ƒè¯•相关的 #ifdef æ¡ä»¶ï¼Œprintk(KERN_DEBUG ...) å°±å¯è¢«ä½¿ç”¨ã€‚
727
728
72914) 分é…内存
730------------------------------
731
732内核æä¾›äº†ä¸‹é¢çš„一般用途的内存分é…函数:
733kmalloc(), kzalloc(), kmalloc_array(), kcalloc(), vmalloc() 和 vzalloc()。
734请å‚考 API æ–‡æ¡£ä»¥èŽ·å–æœ‰å…³å®ƒä»¬çš„详细信æ¯ã€‚
735
736传递结构体大å°çš„首选形弿˜¯è¿™æ ·çš„:
737
738.. code-block:: c
739
740 p = kmalloc(sizeof(*p), ...);
741
742å¦å¤–一ç§ä¼ é€’æ–¹å¼ä¸­ï¼Œsizeof çš„æ“作数是结构体的å字,这样会é™ä½Žå¯è¯»æ€§ï¼Œå¹¶ä¸”å¯èƒ½
743会引入 bug。有å¯èƒ½æŒ‡é’ˆå˜é‡ç±»åž‹è¢«æ”¹å˜æ—¶ï¼Œè€Œå¯¹åº”的传递给内存分é…函数的 sizeof
744的结果ä¸å˜ã€‚
745
746强制转æ¢ä¸€ä¸ª void 指针返回值是多余的。C 语言本身ä¿è¯äº†ä»Ž void 指针到其他任何
747æŒ‡é’ˆç±»åž‹çš„è½¬æ¢æ˜¯æ²¡æœ‰é—®é¢˜çš„。
748
749分é…ä¸€ä¸ªæ•°ç»„çš„é¦–é€‰å½¢å¼æ˜¯è¿™æ ·çš„:
750
751.. code-block:: c
752
753 p = kmalloc_array(n, sizeof(...), ...);
754
755分é…ä¸€ä¸ªé›¶é•¿æ•°ç»„çš„é¦–é€‰å½¢å¼æ˜¯è¿™æ ·çš„:
756
757.. code-block:: c
758
759 p = kcalloc(n, sizeof(...), ...);
760
761两ç§å½¢å¼æ£€æŸ¥åˆ†é…å¤§å° n * sizeof(...) 的溢出,如果溢出返回 NULL。
762
763
76415) 内è”弊病
765------------------------------
766
767有一个常è§çš„误解是 ``内è”`` 是 gcc æä¾›çš„å¯ä»¥è®©ä»£ç è¿è¡Œæ›´å¿«çš„一个选项。虽然使
768用内è”函数有时候是æ°å½“çš„ (æ¯”å¦‚ä½œä¸ºä¸€ç§æ›¿ä»£å®çš„æ–¹å¼ï¼Œè¯·çœ‹ç¬¬å二章),ä¸è¿‡å¾ˆå¤šæƒ…
769况䏋䏿˜¯è¿™æ ·ã€‚inline 的过度使用会使内核å˜å¤§ï¼Œä»Žè€Œä½¿æ•´ä¸ªç³»ç»Ÿè¿è¡Œé€Ÿåº¦å˜æ…¢ã€‚
770因为体积大内核会å ç”¨æ›´å¤šçš„æŒ‡ä»¤é«˜é€Ÿç¼“存,而且会导致 pagecache çš„å¯ç”¨å†…å­˜å‡å°‘。
771想象一下,一次 pagecache 未命中就会导致一次ç£ç›˜å¯»å€ï¼Œå°†è€—æ—¶ 5 毫秒。5 毫秒的
772时间内 CPU 能执行很多很多指令。
773
774一个基本的原则是如果一个函数有 3 行以上,就ä¸è¦æŠŠå®ƒå˜æˆå†…è”函数。这个原则的一
775ä¸ªä¾‹å¤–æ˜¯ï¼Œå¦‚æžœä½ çŸ¥é“æŸä¸ªå‚数是一个编译时常é‡ï¼Œè€Œä¸”因为这个常é‡ä½ ç¡®å®šç¼–译器在
776编译时能优化掉你的函数的大部分代ç ï¼Œé‚£ä»ç„¶å¯ä»¥ç»™å®ƒåŠ ä¸Š inline 关键字。
777kmalloc() 内è”函数就是一个很好的例å­ã€‚
778
779人们ç»å¸¸ä¸»å¼ ç»™ static 的而且åªç”¨äº†ä¸€æ¬¡çš„函数加上 inline,如此ä¸ä¼šæœ‰ä»»ä½•æŸå¤±ï¼Œ
780因为没有什么好æƒè¡¡çš„ã€‚è™½ç„¶ä»ŽæŠ€æœ¯ä¸Šè¯´è¿™æ˜¯æ­£ç¡®çš„ï¼Œä½†æ˜¯å®žé™…ä¸Šè¿™ç§æƒ…况下å³ä½¿ä¸åŠ 
781inline gcc 也å¯ä»¥è‡ªåŠ¨ä½¿å…¶å†…è”。而且其他用户å¯èƒ½ä¼šè¦æ±‚移除 inline,由此而æ¥çš„
782争论会抵消 inline 自身的潜在价值,得ä¸å¿å¤±ã€‚
783
784
78516) 函数返回值åŠå‘½å
786------------------------------
787
788函数å¯ä»¥è¿”回多ç§ä¸åŒç±»åž‹çš„值,最常è§çš„ä¸€ç§æ˜¯è¡¨æ˜Žå‡½æ•°æ‰§è¡ŒæˆåŠŸæˆ–è€…å¤±è´¥çš„å€¼ã€‚è¿™æ ·
789的一个值å¯ä»¥è¡¨ç¤ºä¸ºä¸€ä¸ªé”™è¯¯ä»£ç æ•´æ•° (-Exxxï¼å¤±è´¥ï¼Œ0ï¼æˆåŠŸ) 或者一个 ``æˆåŠŸ``
790布尔值 (0ï¼å¤±è´¥ï¼Œéž0ï¼æˆåŠŸ)。
791
792æ··åˆä½¿ç”¨è¿™ä¸¤ç§è¡¨è¾¾æ–¹å¼æ˜¯éš¾äºŽå‘现的 bug çš„æ¥æºã€‚如果 C 语言本身严格区分整形和
793布尔型å˜é‡ï¼Œé‚£ä¹ˆç¼–译器就能够帮我们å‘现这些错误... ä¸è¿‡ C 语言ä¸åŒºåˆ†ã€‚为了é¿å…
794äº§ç”Ÿè¿™ç§ bug,请éµå¾ªä¸‹é¢çš„æƒ¯ä¾‹::
795
796 如果函数的å字是一个动作或者强制性的命令,那么这个函数应该返回错误代
797 ç æ•´æ•°ã€‚如果是一个判断,那么函数应该返回一个 "æˆåŠŸ" 布尔值。
798
799比如, ``add work`` 是一个命令,所以 add_work() 在æˆåŠŸæ—¶è¿”å›ž 0,在失败时返回
800-EBUSY。类似的,因为 ``PCI device present`` 是一个判断,所以 pci_dev_present()
801在æˆåŠŸæ‰¾åˆ°ä¸€ä¸ªåŒ¹é…的设备时应该返回 1,如果找ä¸åˆ°æ—¶åº”该返回 0。
802
803所有 EXPORTed 函数都必须éµå®ˆè¿™ä¸ªæƒ¯ä¾‹ï¼Œæ‰€æœ‰çš„å…¬å…±å‡½æ•°ä¹Ÿéƒ½åº”è¯¥å¦‚æ­¤ã€‚ç§æœ‰
804(static) 函数ä¸éœ€è¦å¦‚此,但是我们也推è这样åšã€‚
805
806è¿”å›žå€¼æ˜¯å®žé™…è®¡ç®—ç»“æžœè€Œä¸æ˜¯è®¡ç®—æ˜¯å¦æˆåŠŸçš„æ ‡å¿—çš„å‡½æ•°ä¸å—此惯例的é™åˆ¶ã€‚一般的,
807他们通过返回一些正常值范围之外的结果æ¥è¡¨ç¤ºå‡ºé”™ã€‚å…¸åž‹çš„ä¾‹å­æ˜¯è¿”回指针的函数,
808他们使用 NULL 或者 ERR_PTR æœºåˆ¶æ¥æŠ¥å‘Šé”™è¯¯ã€‚
809
810
81117) ä¸è¦é‡æ–°å‘明内核å®
812------------------------------
813
814头文件 include/linux/kernel.h 包å«äº†ä¸€äº›å®ï¼Œä½ åº”该使用它们,而ä¸è¦è‡ªå·±å†™ä¸€äº›
815它们的å˜ç§ã€‚比如,如果你需è¦è®¡ç®—一个数组的长度,使用这个å®
816
817.. code-block:: c
818
819 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
820
821类似的,如果你è¦è®¡ç®—æŸç»“构体æˆå‘˜çš„大å°ï¼Œä½¿ç”¨
822
823.. code-block:: c
824
825 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
826
827还有å¯ä»¥åšä¸¥æ ¼çš„类型检查的 min() å’Œ max() å®ï¼Œå¦‚果你需è¦å¯ä»¥ä½¿ç”¨å®ƒä»¬ã€‚ä½ å¯ä»¥
828自己看看那个头文件里还定义了什么你å¯ä»¥æ‹¿æ¥ç”¨çš„东西,如果有定义的è¯ï¼Œä½ å°±ä¸åº”
829在你的代ç é‡Œè‡ªå·±é‡æ–°å®šä¹‰ã€‚
830
831
83218) 编辑器模å¼è¡Œå’Œå…¶ä»–需è¦ç½—嗦的事情
833--------------------------------------------------
834
835有一些编辑器å¯ä»¥è§£é‡ŠåµŒå…¥åœ¨æºæ–‡ä»¶é‡Œçš„由一些特殊标记标明的é…置信æ¯ã€‚比如,emacs
836能够解释被标记æˆè¿™æ ·çš„行:
837
838.. code-block:: c
839
840 -*- mode: c -*-
841
842或者这样的:
843
844.. code-block:: c
845
846 /*
847 Local Variables:
848 compile-command: "gcc -DMAGIC_DEBUG_FLAG foo.c"
849 End:
850 */
851
852Vim 能够解释这样的标记:
853
854.. code-block:: c
855
856 /* vim:set sw=8 noet */
857
858ä¸è¦åœ¨æºä»£ç ä¸­åŒ…å«ä»»ä½•这样的内容。æ¯ä¸ªäººéƒ½æœ‰ä»–自己的编辑器é…ç½®ï¼Œä½ çš„æºæ–‡ä»¶ä¸
859应该覆盖别人的é…置。这包括有关缩进和模å¼é…置的标记。人们å¯ä»¥ä½¿ç”¨ä»–们自己定制
860的模å¼ï¼Œæˆ–者使用其他å¯ä»¥äº§ç”Ÿæ­£ç¡®çš„缩进的巧妙方法。
861
862
86319) å†…è”æ±‡ç¼–
864------------------------------
865
866在特定架构的代ç ä¸­ï¼Œä½ å¯èƒ½éœ€è¦å†…è”æ±‡ç¼–与 CPU 和平å°ç›¸å…³åŠŸèƒ½è¿žæŽ¥ã€‚éœ€è¦è¿™ä¹ˆåšæ—¶
867å°±ä¸è¦çŠ¹è±«ã€‚ç„¶è€Œï¼Œå½“ C å¯ä»¥å®Œæˆå·¥ä½œæ—¶ï¼Œä¸è¦å¹³ç™½æ— æ•…åœ°ä½¿ç”¨å†…è”æ±‡ç¼–。在å¯èƒ½çš„æƒ…
868况下,你å¯ä»¥å¹¶ä¸”应该用 C 和硬件沟通。
869
870请考虑去写æ†ç»‘通用ä½å…ƒ (wrap common bits) çš„å†…è”æ±‡ç¼–的简å•辅助函数,别去é‡å¤
871åœ°å†™ä¸‹åªæœ‰ç»†å¾®å·®å¼‚å†…è”æ±‡ç¼–。记ä½å†…è”æ±‡ç¼–å¯ä»¥ä½¿ç”¨ C 傿•°ã€‚
872
873å¤§åž‹ï¼Œæœ‰ä¸€å®šå¤æ‚度的汇编函数应该放在 .S 文件内,用相应的 C 原型定义在 C 头文
874件中。汇编函数的 C 原型应该使用 ``asmlinkage`` 。
875
876ä½ å¯èƒ½éœ€è¦æŠŠæ±‡ç¼–è¯­å¥æ ‡è®°ä¸º volatile,用æ¥é˜»æ­¢ GCC 在没å‘现任何副作用åŽå°±æŠŠå®ƒ
877移除了。你ä¸å¿…总是这样åšï¼Œå°½ç®¡ï¼Œè¿™ä¸å¿…è¦çš„举动会é™åˆ¶ä¼˜åŒ–。
878
879在写一个包å«å¤šæ¡æŒ‡ä»¤çš„å•ä¸ªå†…è”æ±‡ç¼–è¯­å¥æ—¶ï¼ŒæŠŠæ¯æ¡æŒ‡ä»¤ç”¨å¼•å·åˆ†å‰²è€Œä¸”å„å ä¸€è¡Œï¼Œ
880除了最åŽä¸€æ¡æŒ‡ä»¤å¤–,在æ¯ä¸ªæŒ‡ä»¤ç»“尾加上 \n\t,让汇编输出时å¯ä»¥æ­£ç¡®åœ°ç¼©è¿›ä¸‹ä¸€æ¡
881指令:
882
883.. code-block:: c
884
885 asm ("magic %reg1, #42\n\t"
886 "more_magic %reg2, %reg3"
887 : /* outputs */ : /* inputs */ : /* clobbers */);
888
889
89020) æ¡ä»¶ç¼–译
891------------------------------
892
893åªè¦å¯èƒ½ï¼Œå°±ä¸è¦åœ¨ .c 文件里é¢ä½¿ç”¨é¢„å¤„ç†æ¡ä»¶ (#if, #ifdef);这样åšè®©ä»£ç æ›´éš¾
894é˜…è¯»å¹¶ä¸”æ›´éš¾åŽ»è·Ÿè¸ªé€»è¾‘ã€‚æ›¿ä»£æ–¹æ¡ˆæ˜¯ï¼Œåœ¨å¤´æ–‡ä»¶ä¸­ç”¨é¢„å¤„ç†æ¡ä»¶æä¾›ç»™é‚£äº› .c 文件
895使用,å†ç»™ #else æä¾›ä¸€ä¸ªç©ºæ¡© (no-op stub) 版本,然åŽåœ¨ .c 文件内无æ¡ä»¶åœ°è°ƒç”¨
896那些 (定义在头文件内的) 函数。这样åšï¼Œç¼–译器会é¿å…为桩函数 (stub) 的调用生æˆ
897任何代ç ï¼Œäº§ç”Ÿçš„结果是相åŒçš„,但逻辑将更加清晰。
898
899最好倾å‘äºŽç¼–è¯‘æ•´ä¸ªå‡½æ•°ï¼Œè€Œä¸æ˜¯å‡½æ•°çš„一部分或表达å¼çš„一部分。与其放一个 ifdef
900在表达å¼å†…,ä¸å¦‚分解出部分或全部表达å¼ï¼Œæ”¾è¿›ä¸€ä¸ªå•独的辅助函数,并应用预处ç†
901æ¡ä»¶åˆ°è¿™ä¸ªè¾…助函数内。
902
903如果你有一个在特定é…置中,å¯èƒ½å˜æˆæœªä½¿ç”¨çš„函数或å˜é‡ï¼Œç¼–译器会警告它定义了但
904未使用,把它标记为 __maybe_unused è€Œä¸æ˜¯å°†å®ƒåŒ…å«åœ¨ä¸€ä¸ªé¢„å¤„ç†æ¡ä»¶ä¸­ã€‚(然而,如
905果一个函数或å˜é‡æ€»æ˜¯æœªä½¿ç”¨ï¼Œå°±ç›´æŽ¥åˆ é™¤å®ƒã€‚)
906
907在代ç ä¸­ï¼Œå°½å¯èƒ½åœ°ä½¿ç”¨ IS_ENABLED 宿¥è½¬åŒ–æŸä¸ª Kconfig 标记为 C 的布尔
908表达å¼ï¼Œå¹¶åœ¨ä¸€èˆ¬çš„ C æ¡ä»¶ä¸­ä½¿ç”¨å®ƒï¼š
909
910.. code-block:: c
911
912 if (IS_ENABLED(CONFIG_SOMETHING)) {
913 ...
914 }
915
916编译器会åšå¸¸é‡æŠ˜å ï¼Œç„¶åŽå°±åƒä½¿ç”¨ #ifdef é‚£æ ·åŽ»åŒ…å«æˆ–排除代ç å—,所以这ä¸ä¼šå¸¦
917æ¥ä»»ä½•è¿è¡Œæ—¶å¼€é”€ã€‚ç„¶è€Œï¼Œè¿™ç§æ–¹æ³•便—§å…许 C 编译器查看å—内的代ç ï¼Œå¹¶æ£€æŸ¥å®ƒçš„æ­£
918确性 (语法,类型,符å·å¼•用,等等)。因此,如果æ¡ä»¶ä¸æ»¡è¶³ï¼Œä»£ç å—内的引用符å·å°±
919ä¸å­˜åœ¨æ—¶ï¼Œä½ è¿˜æ˜¯å¿…须去用 #ifdef。
920
921在任何有æ„义的 #if 或 #ifdef å—的末尾 (超过几行的),在 #endif åŒä¸€è¡Œçš„åŽé¢å†™ä¸‹
922注解,注释这个æ¡ä»¶è¡¨è¾¾å¼ã€‚例如:
923
924.. code-block:: c
925
926 #ifdef CONFIG_SOMETHING
927 ...
928 #endif /* CONFIG_SOMETHING */
929
930
931附录 I) å‚考
932-------------------
933
934The C Programming Language, 第二版
935作者:Brian W. Kernighan 和 Denni M. Ritchie.
936Prentice Hall, Inc., 1988.
937ISBN 0-13-110362-8 (软皮), 0-13-110370-9 (硬皮).
938
939The Practice of Programming
940作者:Brian W. Kernighan 和 Rob Pike.
941Addison-Wesley, Inc., 1999.
942ISBN 0-201-61586-X.
943
944GNU 手册 - éµå¾ª K&R 标准和此文本 - cpp, gcc, gcc internals and indent,
945都å¯ä»¥ä»Ž http://www.gnu.org/manual/ 找到
946
947WG14 是 C 语言的国际标准化工作组,URL: http://www.open-std.org/JTC1/SC22/WG14/
948
949Kernel process/coding-style.rst,作者 greg@kroah.com å‘表于 OLS 2002:
950http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
new file mode 100644
index 000000000000..75956d669962
--- /dev/null
+++ b/Documentation/translations/zh_CN/index.rst
@@ -0,0 +1,12 @@
1.. raw:: latex
2
3 \renewcommand\thesection*
4 \renewcommand\thesubsection*
5
6Chinese translations
7====================
8
9.. toctree::
10 :maxdepth: 1
11
12 coding-style
diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt
index 0a94ffe17ab6..00e706997130 100644
--- a/Documentation/usb/power-management.txt
+++ b/Documentation/usb/power-management.txt
@@ -543,7 +543,7 @@ relevant attribute files are usb2_hardware_lpm and usb3_hardware_lpm.
543 When a USB 3.0 lpm-capable device is plugged in to a 543 When a USB 3.0 lpm-capable device is plugged in to a
544 xHCI host which supports link PM, it will check if U1 544 xHCI host which supports link PM, it will check if U1
545 and U2 exit latencies have been set in the BOS 545 and U2 exit latencies have been set in the BOS
546 descriptor; if the check is is passed and the host 546 descriptor; if the check is passed and the host
547 supports USB3 hardware LPM, USB3 hardware LPM will be 547 supports USB3 hardware LPM, USB3 hardware LPM will be
548 enabled for the device and these files will be created. 548 enabled for the device and these files will be created.
549 The files hold a string value (enable or disable) 549 The files hold a string value (enable or disable)
diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
index c4171e4519c2..f2e739545e74 100644
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@@ -296,7 +296,7 @@ thp_split_page is incremented every time a huge page is split into base
296 reason is that a huge page is old and is being reclaimed. 296 reason is that a huge page is old and is being reclaimed.
297 This action implies splitting all PMD the page mapped with. 297 This action implies splitting all PMD the page mapped with.
298 298
299thp_split_page_failed is is incremented if kernel fails to split huge 299thp_split_page_failed is incremented if kernel fails to split huge
300 page. This can happen if the page was pinned by somebody. 300 page. This can happen if the page was pinned by somebody.
301 301
302thp_deferred_split_page is incremented when a huge page is put onto split 302thp_deferred_split_page is incremented when a huge page is put onto split
diff --git a/Makefile b/Makefile
index 4e2abc36e14b..b83109b5d217 100644
--- a/Makefile
+++ b/Makefile
@@ -1446,7 +1446,7 @@ $(help-board-dirs): help-%:
1446 1446
1447# Documentation targets 1447# Documentation targets
1448# --------------------------------------------------------------------------- 1448# ---------------------------------------------------------------------------
1449DOC_TARGETS := xmldocs sgmldocs psdocs latexdocs pdfdocs htmldocs mandocs installmandocs epubdocs cleandocs 1449DOC_TARGETS := xmldocs sgmldocs psdocs latexdocs pdfdocs htmldocs mandocs installmandocs epubdocs cleandocs linkcheckdocs
1450PHONY += $(DOC_TARGETS) 1450PHONY += $(DOC_TARGETS)
1451$(DOC_TARGETS): scripts_basic FORCE 1451$(DOC_TARGETS): scripts_basic FORCE
1452 $(Q)$(MAKE) $(build)=scripts build_docproc build_check-lc_ctype 1452 $(Q)$(MAKE) $(build)=scripts build_docproc build_check-lc_ctype
diff --git a/include/linux/pm.h b/include/linux/pm.h
index f926af41e122..a0894bc52bb4 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -64,24 +64,7 @@ typedef struct pm_message {
64} pm_message_t; 64} pm_message_t;
65 65
66/** 66/**
67 * struct dev_pm_ops - device PM callbacks 67 * struct dev_pm_ops - device PM callbacks.
68 *
69 * Several device power state transitions are externally visible, affecting
70 * the state of pending I/O queues and (for drivers that touch hardware)
71 * interrupts, wakeups, DMA, and other hardware state. There may also be
72 * internal transitions to various low-power modes which are transparent
73 * to the rest of the driver stack (such as a driver that's ON gating off
74 * clocks which are not in active use).
75 *
76 * The externally visible transitions are handled with the help of callbacks
77 * included in this structure in such a way that two levels of callbacks are
78 * involved. First, the PM core executes callbacks provided by PM domains,
79 * device types, classes and bus types. They are the subsystem-level callbacks
80 * supposed to execute callbacks provided by device drivers, although they may
81 * choose not to do that. If the driver callbacks are executed, they have to
82 * collaborate with the subsystem-level callbacks to achieve the goals
83 * appropriate for the given system transition, given transition phase and the
84 * subsystem the device belongs to.
85 * 68 *
86 * @prepare: The principal role of this callback is to prevent new children of 69 * @prepare: The principal role of this callback is to prevent new children of
87 * the device from being registered after it has returned (the driver's 70 * the device from being registered after it has returned (the driver's
@@ -240,34 +223,6 @@ typedef struct pm_message {
240 * driver's interrupt handler, which is guaranteed not to run while 223 * driver's interrupt handler, which is guaranteed not to run while
241 * @restore_noirq() is being executed. Analogous to @resume_noirq(). 224 * @restore_noirq() is being executed. Analogous to @resume_noirq().
242 * 225 *
243 * All of the above callbacks, except for @complete(), return error codes.
244 * However, the error codes returned by the resume operations, @resume(),
245 * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq(), do
246 * not cause the PM core to abort the resume transition during which they are
247 * returned. The error codes returned in those cases are only printed by the PM
248 * core to the system logs for debugging purposes. Still, it is recommended
249 * that drivers only return error codes from their resume methods in case of an
250 * unrecoverable failure (i.e. when the device being handled refuses to resume
251 * and becomes unusable) to allow us to modify the PM core in the future, so
252 * that it can avoid attempting to handle devices that failed to resume and
253 * their children.
254 *
255 * It is allowed to unregister devices while the above callbacks are being
256 * executed. However, a callback routine must NOT try to unregister the device
257 * it was called for, although it may unregister children of that device (for
258 * example, if it detects that a child was unplugged while the system was
259 * asleep).
260 *
261 * Refer to Documentation/power/admin-guide/devices.rst for more information about the role
262 * of the above callbacks in the system suspend process.
263 *
264 * There also are callbacks related to runtime power management of devices.
265 * Again, these callbacks are executed by the PM core only for subsystems
266 * (PM domains, device types, classes and bus types) and the subsystem-level
267 * callbacks are supposed to invoke the driver callbacks. Moreover, the exact
268 * actions to be performed by a device driver's callbacks generally depend on
269 * the platform and subsystem the device belongs to.
270 *
271 * @runtime_suspend: Prepare the device for a condition in which it won't be 226 * @runtime_suspend: Prepare the device for a condition in which it won't be
272 * able to communicate with the CPU(s) and RAM due to power management. 227 * able to communicate with the CPU(s) and RAM due to power management.
273 * This need not mean that the device should be put into a low-power state. 228 * This need not mean that the device should be put into a low-power state.
@@ -287,11 +242,51 @@ typedef struct pm_message {
287 * Check these conditions, and return 0 if it's appropriate to let the PM 242 * Check these conditions, and return 0 if it's appropriate to let the PM
288 * core queue a suspend request for the device. 243 * core queue a suspend request for the device.
289 * 244 *
290 * Refer to Documentation/power/runtime_pm.txt for more information about the 245 * Several device power state transitions are externally visible, affecting
291 * role of the above callbacks in device runtime power management. 246 * the state of pending I/O queues and (for drivers that touch hardware)
247 * interrupts, wakeups, DMA, and other hardware state. There may also be
248 * internal transitions to various low-power modes which are transparent
249 * to the rest of the driver stack (such as a driver that's ON gating off
250 * clocks which are not in active use).
292 * 251 *
252 * The externally visible transitions are handled with the help of callbacks
253 * included in this structure in such a way that, typically, two levels of
254 * callbacks are involved. First, the PM core executes callbacks provided by PM
255 * domains, device types, classes and bus types. They are the subsystem-level
256 * callbacks expected to execute callbacks provided by device drivers, although
257 * they may choose not to do that. If the driver callbacks are executed, they
258 * have to collaborate with the subsystem-level callbacks to achieve the goals
259 * appropriate for the given system transition, given transition phase and the
260 * subsystem the device belongs to.
261 *
262 * All of the above callbacks, except for @complete(), return error codes.
263 * However, the error codes returned by @resume(), @thaw(), @restore(),
264 * @resume_noirq(), @thaw_noirq(), and @restore_noirq(), do not cause the PM
265 * core to abort the resume transition during which they are returned. The
266 * error codes returned in those cases are only printed to the system logs for
267 * debugging purposes. Still, it is recommended that drivers only return error
268 * codes from their resume methods in case of an unrecoverable failure (i.e.
269 * when the device being handled refuses to resume and becomes unusable) to
270 * allow the PM core to be modified in the future, so that it can avoid
271 * attempting to handle devices that failed to resume and their children.
272 *
273 * It is allowed to unregister devices while the above callbacks are being
274 * executed. However, a callback routine MUST NOT try to unregister the device
275 * it was called for, although it may unregister children of that device (for
276 * example, if it detects that a child was unplugged while the system was
277 * asleep).
278 *
279 * There also are callbacks related to runtime power management of devices.
280 * Again, as a rule these callbacks are executed by the PM core for subsystems
281 * (PM domains, device types, classes and bus types) and the subsystem-level
282 * callbacks are expected to invoke the driver callbacks. Moreover, the exact
283 * actions to be performed by a device driver's callbacks generally depend on
284 * the platform and subsystem the device belongs to.
285 *
286 * Refer to Documentation/power/runtime_pm.txt for more information about the
287 * role of the @runtime_suspend(), @runtime_resume() and @runtime_idle()
288 * callbacks in device runtime power management.
293 */ 289 */
294
295struct dev_pm_ops { 290struct dev_pm_ops {
296 int (*prepare)(struct device *dev); 291 int (*prepare)(struct device *dev);
297 void (*complete)(struct device *dev); 292 void (*complete)(struct device *dev);
@@ -391,7 +386,7 @@ const struct dev_pm_ops name = { \
391 SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ 386 SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
392} 387}
393 388
394/** 389/*
395 * PM_EVENT_ messages 390 * PM_EVENT_ messages
396 * 391 *
397 * The following PM_EVENT_ messages are defined for the internal use of the PM 392 * The following PM_EVENT_ messages are defined for the internal use of the PM
@@ -487,7 +482,7 @@ const struct dev_pm_ops name = { \
487 482
488#define PMSG_IS_AUTO(msg) (((msg).event & PM_EVENT_AUTO) != 0) 483#define PMSG_IS_AUTO(msg) (((msg).event & PM_EVENT_AUTO) != 0)
489 484
490/** 485/*
491 * Device run-time power management status. 486 * Device run-time power management status.
492 * 487 *
493 * These status labels are used internally by the PM core to indicate the 488 * These status labels are used internally by the PM core to indicate the
@@ -517,7 +512,7 @@ enum rpm_status {
517 RPM_SUSPENDING, 512 RPM_SUSPENDING,
518}; 513};
519 514
520/** 515/*
521 * Device run-time power management request types. 516 * Device run-time power management request types.
522 * 517 *
523 * RPM_REQ_NONE Do nothing. 518 * RPM_REQ_NONE Do nothing.
@@ -616,15 +611,18 @@ extern void update_pm_runtime_accounting(struct device *dev);
616extern int dev_pm_get_subsys_data(struct device *dev); 611extern int dev_pm_get_subsys_data(struct device *dev);
617extern void dev_pm_put_subsys_data(struct device *dev); 612extern void dev_pm_put_subsys_data(struct device *dev);
618 613
619/* 614/**
620 * Power domains provide callbacks that are executed during system suspend, 615 * struct dev_pm_domain - power management domain representation.
621 * hibernation, system resume and during runtime PM transitions along with
622 * subsystem-level and driver-level callbacks.
623 * 616 *
617 * @ops: Power management operations associated with this domain.
624 * @detach: Called when removing a device from the domain. 618 * @detach: Called when removing a device from the domain.
625 * @activate: Called before executing probe routines for bus types and drivers. 619 * @activate: Called before executing probe routines for bus types and drivers.
626 * @sync: Called after successful driver probe. 620 * @sync: Called after successful driver probe.
627 * @dismiss: Called after unsuccessful driver probe and after driver removal. 621 * @dismiss: Called after unsuccessful driver probe and after driver removal.
622 *
623 * Power domains provide callbacks that are executed during system suspend,
624 * hibernation, system resume and during runtime PM transitions instead of
625 * subsystem-level and driver-level callbacks.
628 */ 626 */
629struct dev_pm_domain { 627struct dev_pm_domain {
630 struct dev_pm_ops ops; 628 struct dev_pm_ops ops;
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 030fc633acd4..33c85dfdfce9 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -199,12 +199,12 @@ EOF
199# 'funcname()' - function 199# 'funcname()' - function
200# '$ENVVAR' - environmental variable 200# '$ENVVAR' - environmental variable
201# '&struct_name' - name of a structure (up to two words including 'struct') 201# '&struct_name' - name of a structure (up to two words including 'struct')
202# '&struct_name.member' - name of a structure member
202# '@parameter' - name of a parameter 203# '@parameter' - name of a parameter
203# '%CONST' - name of a constant. 204# '%CONST' - name of a constant.
204 205
205## init lots of data 206## init lots of data
206 207
207
208my $errors = 0; 208my $errors = 0;
209my $warnings = 0; 209my $warnings = 0;
210my $anon_struct_union = 0; 210my $anon_struct_union = 0;
@@ -214,14 +214,19 @@ my $type_constant = '\%([-_\w]+)';
214my $type_func = '(\w+)\(\)'; 214my $type_func = '(\w+)\(\)';
215my $type_param = '\@(\w+(\.\.\.)?)'; 215my $type_param = '\@(\w+(\.\.\.)?)';
216my $type_fp_param = '\@(\w+)\(\)'; # Special RST handling for func ptr params 216my $type_fp_param = '\@(\w+)\(\)'; # Special RST handling for func ptr params
217my $type_struct = '\&((struct\s*)*[_\w]+)';
218my $type_struct_xml = '\\&amp;((struct\s*)*[_\w]+)';
219my $type_env = '(\$\w+)'; 217my $type_env = '(\$\w+)';
220my $type_enum_full = '\&(enum)\s*([_\w]+)'; 218my $type_enum = '\&(enum\s*([_\w]+))';
221my $type_struct_full = '\&(struct)\s*([_\w]+)'; 219my $type_struct = '\&(struct\s*([_\w]+))';
222my $type_typedef_full = '\&(typedef)\s*([_\w]+)'; 220my $type_typedef = '\&(typedef\s*([_\w]+))';
223my $type_union_full = '\&(union)\s*([_\w]+)'; 221my $type_union = '\&(union\s*([_\w]+))';
224my $type_member = '\&([_\w]+)((\.|->)[_\w]+)'; 222my $type_member = '\&([_\w]+)(\.|->)([_\w]+)';
223my $type_fallback = '\&([_\w]+)';
224my $type_enum_xml = '\&amp;(enum\s*([_\w]+))';
225my $type_struct_xml = '\&amp;(struct\s*([_\w]+))';
226my $type_typedef_xml = '\&amp;(typedef\s*([_\w]+))';
227my $type_union_xml = '\&amp;(union\s*([_\w]+))';
228my $type_member_xml = '\&amp;([_\w]+)(\.|-\&gt;)([_\w]+)';
229my $type_fallback_xml = '\&amp([_\w]+)';
225my $type_member_func = $type_member . '\(\)'; 230my $type_member_func = $type_member . '\(\)';
226 231
227# Output conversion substitutions. 232# Output conversion substitutions.
@@ -231,9 +236,14 @@ my $type_member_func = $type_member . '\(\)';
231my @highlights_html = ( 236my @highlights_html = (
232 [$type_constant, "<i>\$1</i>"], 237 [$type_constant, "<i>\$1</i>"],
233 [$type_func, "<b>\$1</b>"], 238 [$type_func, "<b>\$1</b>"],
239 [$type_enum_xml, "<i>\$1</i>"],
234 [$type_struct_xml, "<i>\$1</i>"], 240 [$type_struct_xml, "<i>\$1</i>"],
241 [$type_typedef_xml, "<i>\$1</i>"],
242 [$type_union_xml, "<i>\$1</i>"],
235 [$type_env, "<b><i>\$1</i></b>"], 243 [$type_env, "<b><i>\$1</i></b>"],
236 [$type_param, "<tt><b>\$1</b></tt>"] 244 [$type_param, "<tt><b>\$1</b></tt>"],
245 [$type_member_xml, "<tt><i>\$1</i>\$2\$3</tt>"],
246 [$type_fallback_xml, "<i>\$1</i>"]
237 ); 247 );
238my $local_lt = "\\\\\\\\lt:"; 248my $local_lt = "\\\\\\\\lt:";
239my $local_gt = "\\\\\\\\gt:"; 249my $local_gt = "\\\\\\\\gt:";
@@ -243,9 +253,14 @@ my $blankline_html = $local_lt . "p" . $local_gt; # was "<p>"
243my @highlights_html5 = ( 253my @highlights_html5 = (
244 [$type_constant, "<span class=\"const\">\$1</span>"], 254 [$type_constant, "<span class=\"const\">\$1</span>"],
245 [$type_func, "<span class=\"func\">\$1</span>"], 255 [$type_func, "<span class=\"func\">\$1</span>"],
256 [$type_enum_xml, "<span class=\"enum\">\$1</span>"],
246 [$type_struct_xml, "<span class=\"struct\">\$1</span>"], 257 [$type_struct_xml, "<span class=\"struct\">\$1</span>"],
258 [$type_typedef_xml, "<span class=\"typedef\">\$1</span>"],
259 [$type_union_xml, "<span class=\"union\">\$1</span>"],
247 [$type_env, "<span class=\"env\">\$1</span>"], 260 [$type_env, "<span class=\"env\">\$1</span>"],
248 [$type_param, "<span class=\"param\">\$1</span>]"] 261 [$type_param, "<span class=\"param\">\$1</span>]"],
262 [$type_member_xml, "<span class=\"literal\"><span class=\"struct\">\$1</span>\$2<span class=\"member\">\$3</span></span>"],
263 [$type_fallback_xml, "<span class=\"struct\">\$1</span>"]
249 ); 264 );
250my $blankline_html5 = $local_lt . "br /" . $local_gt; 265my $blankline_html5 = $local_lt . "br /" . $local_gt;
251 266
@@ -253,10 +268,15 @@ my $blankline_html5 = $local_lt . "br /" . $local_gt;
253my @highlights_xml = ( 268my @highlights_xml = (
254 ["([^=])\\\"([^\\\"<]+)\\\"", "\$1<quote>\$2</quote>"], 269 ["([^=])\\\"([^\\\"<]+)\\\"", "\$1<quote>\$2</quote>"],
255 [$type_constant, "<constant>\$1</constant>"], 270 [$type_constant, "<constant>\$1</constant>"],
271 [$type_enum_xml, "<type>\$1</type>"],
256 [$type_struct_xml, "<structname>\$1</structname>"], 272 [$type_struct_xml, "<structname>\$1</structname>"],
273 [$type_typedef_xml, "<type>\$1</type>"],
274 [$type_union_xml, "<structname>\$1</structname>"],
257 [$type_param, "<parameter>\$1</parameter>"], 275 [$type_param, "<parameter>\$1</parameter>"],
258 [$type_func, "<function>\$1</function>"], 276 [$type_func, "<function>\$1</function>"],
259 [$type_env, "<envar>\$1</envar>"] 277 [$type_env, "<envar>\$1</envar>"],
278 [$type_member_xml, "<literal><structname>\$1</structname>\$2<structfield>\$3</structfield></literal>"],
279 [$type_fallback_xml, "<structname>\$1</structname>"]
260 ); 280 );
261my $blankline_xml = $local_lt . "/para" . $local_gt . $local_lt . "para" . $local_gt . "\n"; 281my $blankline_xml = $local_lt . "/para" . $local_gt . $local_lt . "para" . $local_gt . "\n";
262 282
@@ -264,9 +284,14 @@ my $blankline_xml = $local_lt . "/para" . $local_gt . $local_lt . "para" . $loca
264my @highlights_gnome = ( 284my @highlights_gnome = (
265 [$type_constant, "<replaceable class=\"option\">\$1</replaceable>"], 285 [$type_constant, "<replaceable class=\"option\">\$1</replaceable>"],
266 [$type_func, "<function>\$1</function>"], 286 [$type_func, "<function>\$1</function>"],
287 [$type_enum, "<type>\$1</type>"],
267 [$type_struct, "<structname>\$1</structname>"], 288 [$type_struct, "<structname>\$1</structname>"],
289 [$type_typedef, "<type>\$1</type>"],
290 [$type_union, "<structname>\$1</structname>"],
268 [$type_env, "<envar>\$1</envar>"], 291 [$type_env, "<envar>\$1</envar>"],
269 [$type_param, "<parameter>\$1</parameter>" ] 292 [$type_param, "<parameter>\$1</parameter>" ],
293 [$type_member, "<literal><structname>\$1</structname>\$2<structfield>\$3</structfield></literal>"],
294 [$type_fallback, "<structname>\$1</structname>"]
270 ); 295 );
271my $blankline_gnome = "</para><para>\n"; 296my $blankline_gnome = "</para><para>\n";
272 297
@@ -274,8 +299,13 @@ my $blankline_gnome = "</para><para>\n";
274my @highlights_man = ( 299my @highlights_man = (
275 [$type_constant, "\$1"], 300 [$type_constant, "\$1"],
276 [$type_func, "\\\\fB\$1\\\\fP"], 301 [$type_func, "\\\\fB\$1\\\\fP"],
302 [$type_enum, "\\\\fI\$1\\\\fP"],
277 [$type_struct, "\\\\fI\$1\\\\fP"], 303 [$type_struct, "\\\\fI\$1\\\\fP"],
278 [$type_param, "\\\\fI\$1\\\\fP"] 304 [$type_typedef, "\\\\fI\$1\\\\fP"],
305 [$type_union, "\\\\fI\$1\\\\fP"],
306 [$type_param, "\\\\fI\$1\\\\fP"],
307 [$type_member, "\\\\fI\$1\$2\$3\\\\fP"],
308 [$type_fallback, "\\\\fI\$1\\\\fP"]
279 ); 309 );
280my $blankline_man = ""; 310my $blankline_man = "";
281 311
@@ -283,8 +313,13 @@ my $blankline_man = "";
283my @highlights_text = ( 313my @highlights_text = (
284 [$type_constant, "\$1"], 314 [$type_constant, "\$1"],
285 [$type_func, "\$1"], 315 [$type_func, "\$1"],
316 [$type_enum, "\$1"],
286 [$type_struct, "\$1"], 317 [$type_struct, "\$1"],
287 [$type_param, "\$1"] 318 [$type_typedef, "\$1"],
319 [$type_union, "\$1"],
320 [$type_param, "\$1"],
321 [$type_member, "\$1\$2\$3"],
322 [$type_fallback, "\$1"]
288 ); 323 );
289my $blankline_text = ""; 324my $blankline_text = "";
290 325
@@ -292,16 +327,16 @@ my $blankline_text = "";
292my @highlights_rst = ( 327my @highlights_rst = (
293 [$type_constant, "``\$1``"], 328 [$type_constant, "``\$1``"],
294 # Note: need to escape () to avoid func matching later 329 # Note: need to escape () to avoid func matching later
295 [$type_member_func, "\\:c\\:type\\:`\$1\$2\\\\(\\\\) <\$1>`"], 330 [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"],
296 [$type_member, "\\:c\\:type\\:`\$1\$2 <\$1>`"], 331 [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"],
297 [$type_fp_param, "**\$1\\\\(\\\\)**"], 332 [$type_fp_param, "**\$1\\\\(\\\\)**"],
298 [$type_func, "\\:c\\:func\\:`\$1()`"], 333 [$type_func, "\\:c\\:func\\:`\$1()`"],
299 [$type_struct_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"], 334 [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"],
300 [$type_enum_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"], 335 [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"],
301 [$type_typedef_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"], 336 [$type_typedef, "\\:c\\:type\\:`\$1 <\$2>`"],
302 [$type_union_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"], 337 [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"],
303 # in rst this can refer to any type 338 # in rst this can refer to any type
304 [$type_struct, "\\:c\\:type\\:`\$1`"], 339 [$type_fallback, "\\:c\\:type\\:`\$1`"],
305 [$type_param, "**\$1**"] 340 [$type_param, "**\$1**"]
306 ); 341 );
307my $blankline_rst = "\n"; 342my $blankline_rst = "\n";
@@ -310,8 +345,13 @@ my $blankline_rst = "\n";
310my @highlights_list = ( 345my @highlights_list = (
311 [$type_constant, "\$1"], 346 [$type_constant, "\$1"],
312 [$type_func, "\$1"], 347 [$type_func, "\$1"],
348 [$type_enum, "\$1"],
313 [$type_struct, "\$1"], 349 [$type_struct, "\$1"],
314 [$type_param, "\$1"] 350 [$type_typedef, "\$1"],
351 [$type_union, "\$1"],
352 [$type_param, "\$1"],
353 [$type_member, "\$1"],
354 [$type_fallback, "\$1"]
315 ); 355 );
316my $blankline_list = ""; 356my $blankline_list = "";
317 357
@@ -1131,8 +1171,9 @@ sub output_function_xml(%) {
1131 foreach $parameter (@{$args{'parameterlist'}}) { 1171 foreach $parameter (@{$args{'parameterlist'}}) {
1132 my $parameter_name = $parameter; 1172 my $parameter_name = $parameter;
1133 $parameter_name =~ s/\[.*//; 1173 $parameter_name =~ s/\[.*//;
1174 $type = $args{'parametertypes'}{$parameter};
1134 1175
1135 print " <varlistentry>\n <term><parameter>$parameter</parameter></term>\n"; 1176 print " <varlistentry>\n <term><parameter>$type $parameter</parameter></term>\n";
1136 print " <listitem>\n <para>\n"; 1177 print " <listitem>\n <para>\n";
1137 $lineprefix=" "; 1178 $lineprefix=" ";
1138 output_highlight($args{'parameterdescs'}{$parameter_name}); 1179 output_highlight($args{'parameterdescs'}{$parameter_name});
@@ -1223,8 +1264,9 @@ sub output_struct_xml(%) {
1223 1264
1224 defined($args{'parameterdescs'}{$parameter_name}) || next; 1265 defined($args{'parameterdescs'}{$parameter_name}) || next;
1225 ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next; 1266 ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
1267 $type = $args{'parametertypes'}{$parameter};
1226 print " <varlistentry>"; 1268 print " <varlistentry>";
1227 print " <term>$parameter</term>\n"; 1269 print " <term><literal>$type $parameter</literal></term>\n";
1228 print " <listitem><para>\n"; 1270 print " <listitem><para>\n";
1229 output_highlight($args{'parameterdescs'}{$parameter_name}); 1271 output_highlight($args{'parameterdescs'}{$parameter_name});
1230 print " </para></listitem>\n"; 1272 print " </para></listitem>\n";
@@ -1883,7 +1925,7 @@ sub output_function_rst(%) {
1883 $lineprefix = " "; 1925 $lineprefix = " ";
1884 foreach $parameter (@{$args{'parameterlist'}}) { 1926 foreach $parameter (@{$args{'parameterlist'}}) {
1885 my $parameter_name = $parameter; 1927 my $parameter_name = $parameter;
1886 #$parameter_name =~ s/\[.*//; 1928 $parameter_name =~ s/\[.*//;
1887 $type = $args{'parametertypes'}{$parameter}; 1929 $type = $args{'parametertypes'}{$parameter};
1888 1930
1889 if ($type ne "") { 1931 if ($type ne "") {
@@ -2409,6 +2451,7 @@ sub push_parameter($$$) {
2409 # "[blah" in a parameter string; 2451 # "[blah" in a parameter string;
2410 ###$param =~ s/\s*//g; 2452 ###$param =~ s/\s*//g;
2411 push @parameterlist, $param; 2453 push @parameterlist, $param;
2454 $type =~ s/\s\s+/ /g;
2412 $parametertypes{$param} = $type; 2455 $parametertypes{$param} = $type;
2413} 2456}
2414 2457
@@ -2505,7 +2548,13 @@ sub dump_function($$) {
2505 $prototype =~ s/__must_check +//; 2548 $prototype =~ s/__must_check +//;
2506 $prototype =~ s/__weak +//; 2549 $prototype =~ s/__weak +//;
2507 my $define = $prototype =~ s/^#\s*define\s+//; #ak added 2550 my $define = $prototype =~ s/^#\s*define\s+//; #ak added
2508 $prototype =~ s/__attribute__\s*\(\([a-z,]*\)\)//; 2551 $prototype =~ s/__attribute__\s*\(\(
2552 (?:
2553 [\w\s]++ # attribute name
2554 (?:\([^)]*+\))? # attribute arguments
2555 \s*+,? # optional comma at the end
2556 )+
2557 \)\)\s+//x;
2509 2558
2510 # Yes, this truly is vile. We are looking for: 2559 # Yes, this truly is vile. We are looking for:
2511 # 1. Return type (may be nothing if we're looking at a macro) 2560 # 1. Return type (may be nothing if we're looking at a macro)
@@ -2533,21 +2582,21 @@ sub dump_function($$) {
2533 $noret = 1; 2582 $noret = 1;
2534 } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || 2583 } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
2535 $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || 2584 $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
2536 $prototype =~ m/^(\w+\s*\*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || 2585 $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
2537 $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || 2586 $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
2538 $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || 2587 $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
2539 $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || 2588 $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
2540 $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || 2589 $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
2541 $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || 2590 $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
2542 $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || 2591 $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
2543 $prototype =~ m/^(\w+\s*\*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || 2592 $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
2544 $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || 2593 $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
2545 $prototype =~ m/^(\w+\s+\w+\s*\*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || 2594 $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
2546 $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || 2595 $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
2547 $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || 2596 $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
2548 $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || 2597 $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
2549 $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || 2598 $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
2550 $prototype =~ m/^(\w+\s+\w+\s*\*\s*\w+\s*\*\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { 2599 $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) {
2551 $return_type = $1; 2600 $return_type = $1;
2552 $declaration_name = $2; 2601 $declaration_name = $2;
2553 my $args = $3; 2602 my $args = $3;