diff options
Diffstat (limited to 'Documentation/DocBook')
24 files changed, 13728 insertions, 0 deletions
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile new file mode 100644 index 000000000000..a221039ee4c9 --- /dev/null +++ b/Documentation/DocBook/Makefile | |||
@@ -0,0 +1,195 @@ | |||
1 | ### | ||
2 | # This makefile is used to generate the kernel documentation, | ||
3 | # primarily based on in-line comments in various source files. | ||
4 | # See Documentation/kernel-doc-nano-HOWTO.txt for instruction in how | ||
5 | # to ducument the SRC - and how to read it. | ||
6 | # To add a new book the only step required is to add the book to the | ||
7 | # list of DOCBOOKS. | ||
8 | |||
9 | DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \ | ||
10 | kernel-hacking.xml kernel-locking.xml via-audio.xml \ | ||
11 | deviceiobook.xml procfs-guide.xml tulip-user.xml \ | ||
12 | writing_usb_driver.xml scsidrivers.xml sis900.xml \ | ||
13 | kernel-api.xml journal-api.xml lsm.xml usb.xml \ | ||
14 | gadget.xml libata.xml mtdnand.xml librs.xml | ||
15 | |||
16 | ### | ||
17 | # The build process is as follows (targets): | ||
18 | # (xmldocs) | ||
19 | # file.tmpl --> file.xml +--> file.ps (psdocs) | ||
20 | # +--> file.pdf (pdfdocs) | ||
21 | # +--> DIR=file (htmldocs) | ||
22 | # +--> man/ (mandocs) | ||
23 | |||
24 | ### | ||
25 | # The targets that may be used. | ||
26 | .PHONY: xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs | ||
27 | |||
28 | BOOKS := $(addprefix $(obj)/,$(DOCBOOKS)) | ||
29 | xmldocs: $(BOOKS) | ||
30 | sgmldocs: xmldocs | ||
31 | |||
32 | PS := $(patsubst %.xml, %.ps, $(BOOKS)) | ||
33 | psdocs: $(PS) | ||
34 | |||
35 | PDF := $(patsubst %.xml, %.pdf, $(BOOKS)) | ||
36 | pdfdocs: $(PDF) | ||
37 | |||
38 | HTML := $(patsubst %.xml, %.html, $(BOOKS)) | ||
39 | htmldocs: $(HTML) | ||
40 | |||
41 | MAN := $(patsubst %.xml, %.9, $(BOOKS)) | ||
42 | mandocs: $(MAN) | ||
43 | |||
44 | installmandocs: mandocs | ||
45 | $(MAKEMAN) install Documentation/DocBook/man | ||
46 | |||
47 | ### | ||
48 | #External programs used | ||
49 | KERNELDOC = scripts/kernel-doc | ||
50 | DOCPROC = scripts/basic/docproc | ||
51 | SPLITMAN = $(PERL) $(srctree)/scripts/split-man | ||
52 | MAKEMAN = $(PERL) $(srctree)/scripts/makeman | ||
53 | |||
54 | ### | ||
55 | # DOCPROC is used for two purposes: | ||
56 | # 1) To generate a dependency list for a .tmpl file | ||
57 | # 2) To preprocess a .tmpl file and call kernel-doc with | ||
58 | # appropriate parameters. | ||
59 | # The following rules are used to generate the .xml documentation | ||
60 | # required to generate the final targets. (ps, pdf, html). | ||
61 | quiet_cmd_docproc = DOCPROC $@ | ||
62 | cmd_docproc = SRCTREE=$(srctree)/ $(DOCPROC) doc $< >$@ | ||
63 | define rule_docproc | ||
64 | set -e; \ | ||
65 | $(if $($(quiet)cmd_$(1)),echo ' $($(quiet)cmd_$(1))';) \ | ||
66 | $(cmd_$(1)); \ | ||
67 | ( \ | ||
68 | echo 'cmd_$@ := $(cmd_$(1))'; \ | ||
69 | echo $@: `SRCTREE=$(srctree) $(DOCPROC) depend $<`; \ | ||
70 | ) > $(dir $@).$(notdir $@).cmd | ||
71 | endef | ||
72 | |||
73 | %.xml: %.tmpl FORCE | ||
74 | $(call if_changed_rule,docproc) | ||
75 | |||
76 | ### | ||
77 | #Read in all saved dependency files | ||
78 | cmd_files := $(wildcard $(foreach f,$(BOOKS),$(dir $(f)).$(notdir $(f)).cmd)) | ||
79 | |||
80 | ifneq ($(cmd_files),) | ||
81 | include $(cmd_files) | ||
82 | endif | ||
83 | |||
84 | ### | ||
85 | # Changes in kernel-doc force a rebuild of all documentation | ||
86 | $(BOOKS): $(KERNELDOC) | ||
87 | |||
88 | ### | ||
89 | # procfs guide uses a .c file as example code. | ||
90 | # This requires an explicit dependency | ||
91 | C-procfs-example = procfs_example.xml | ||
92 | C-procfs-example2 = $(addprefix $(obj)/,$(C-procfs-example)) | ||
93 | $(obj)/procfs-guide.xml: $(C-procfs-example2) | ||
94 | |||
95 | ### | ||
96 | # Rules to generate postscript, PDF and HTML | ||
97 | # db2html creates a directory. Generate a html file used for timestamp | ||
98 | |||
99 | quiet_cmd_db2ps = DB2PS $@ | ||
100 | cmd_db2ps = db2ps -o $(dir $@) $< | ||
101 | %.ps : %.xml | ||
102 | @(which db2ps > /dev/null 2>&1) || \ | ||
103 | (echo "*** You need to install DocBook stylesheets ***"; \ | ||
104 | exit 1) | ||
105 | $(call cmd,db2ps) | ||
106 | |||
107 | quiet_cmd_db2pdf = DB2PDF $@ | ||
108 | cmd_db2pdf = db2pdf -o $(dir $@) $< | ||
109 | %.pdf : %.xml | ||
110 | @(which db2pdf > /dev/null 2>&1) || \ | ||
111 | (echo "*** You need to install DocBook stylesheets ***"; \ | ||
112 | exit 1) | ||
113 | $(call cmd,db2pdf) | ||
114 | |||
115 | quiet_cmd_db2html = DB2HTML $@ | ||
116 | cmd_db2html = db2html -o $(patsubst %.html,%,$@) $< && \ | ||
117 | echo '<a HREF="$(patsubst %.html,%,$(notdir $@))/book1.html"> \ | ||
118 | Goto $(patsubst %.html,%,$(notdir $@))</a><p>' > $@ | ||
119 | |||
120 | %.html: %.xml | ||
121 | @(which db2html > /dev/null 2>&1) || \ | ||
122 | (echo "*** You need to install DocBook stylesheets ***"; \ | ||
123 | exit 1) | ||
124 | @rm -rf $@ $(patsubst %.html,%,$@) | ||
125 | $(call cmd,db2html) | ||
126 | @if [ ! -z "$(PNG-$(basename $(notdir $@)))" ]; then \ | ||
127 | cp $(PNG-$(basename $(notdir $@))) $(patsubst %.html,%,$@); fi | ||
128 | |||
129 | ### | ||
130 | # Rule to generate man files - output is placed in the man subdirectory | ||
131 | |||
132 | %.9: %.xml | ||
133 | ifneq ($(KBUILD_SRC),) | ||
134 | $(Q)mkdir -p $(objtree)/Documentation/DocBook/man | ||
135 | endif | ||
136 | $(SPLITMAN) $< $(objtree)/Documentation/DocBook/man "$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)" | ||
137 | $(MAKEMAN) convert $(objtree)/Documentation/DocBook/man $< | ||
138 | |||
139 | ### | ||
140 | # Rules to generate postscripts and PNG imgages from .fig format files | ||
141 | quiet_cmd_fig2eps = FIG2EPS $@ | ||
142 | cmd_fig2eps = fig2dev -Leps $< $@ | ||
143 | |||
144 | %.eps: %.fig | ||
145 | @(which fig2dev > /dev/null 2>&1) || \ | ||
146 | (echo "*** You need to install transfig ***"; \ | ||
147 | exit 1) | ||
148 | $(call cmd,fig2eps) | ||
149 | |||
150 | quiet_cmd_fig2png = FIG2PNG $@ | ||
151 | cmd_fig2png = fig2dev -Lpng $< $@ | ||
152 | |||
153 | %.png: %.fig | ||
154 | @(which fig2dev > /dev/null 2>&1) || \ | ||
155 | (echo "*** You need to install transfig ***"; \ | ||
156 | exit 1) | ||
157 | $(call cmd,fig2png) | ||
158 | |||
159 | ### | ||
160 | # Rule to convert a .c file to inline XML documentation | ||
161 | %.xml: %.c | ||
162 | @echo ' GEN $@' | ||
163 | @( \ | ||
164 | echo "<programlisting>"; \ | ||
165 | expand --tabs=8 < $< | \ | ||
166 | sed -e "s/&/\\&/g" \ | ||
167 | -e "s/</\\</g" \ | ||
168 | -e "s/>/\\>/g"; \ | ||
169 | echo "</programlisting>") > $@ | ||
170 | |||
171 | ### | ||
172 | # Help targets as used by the top-level makefile | ||
173 | dochelp: | ||
174 | @echo ' Linux kernel internal documentation in different formats:' | ||
175 | @echo ' xmldocs (XML DocBook), psdocs (Postscript), pdfdocs (PDF)' | ||
176 | @echo ' htmldocs (HTML), mandocs (man pages, use installmandocs to install)' | ||
177 | |||
178 | ### | ||
179 | # Temporary files left by various tools | ||
180 | clean-files := $(DOCBOOKS) \ | ||
181 | $(patsubst %.xml, %.dvi, $(DOCBOOKS)) \ | ||
182 | $(patsubst %.xml, %.aux, $(DOCBOOKS)) \ | ||
183 | $(patsubst %.xml, %.tex, $(DOCBOOKS)) \ | ||
184 | $(patsubst %.xml, %.log, $(DOCBOOKS)) \ | ||
185 | $(patsubst %.xml, %.out, $(DOCBOOKS)) \ | ||
186 | $(patsubst %.xml, %.ps, $(DOCBOOKS)) \ | ||
187 | $(patsubst %.xml, %.pdf, $(DOCBOOKS)) \ | ||
188 | $(patsubst %.xml, %.html, $(DOCBOOKS)) \ | ||
189 | $(patsubst %.xml, %.9, $(DOCBOOKS)) \ | ||
190 | $(C-procfs-example) | ||
191 | |||
192 | clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) | ||
193 | |||
194 | #man put files in man subdir - traverse down | ||
195 | subdir- := man/ | ||
diff --git a/Documentation/DocBook/deviceiobook.tmpl b/Documentation/DocBook/deviceiobook.tmpl new file mode 100644 index 000000000000..6f41f2f5c6f6 --- /dev/null +++ b/Documentation/DocBook/deviceiobook.tmpl | |||
@@ -0,0 +1,341 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="DoingIO"> | ||
6 | <bookinfo> | ||
7 | <title>Bus-Independent Device Accesses</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Matthew</firstname> | ||
12 | <surname>Wilcox</surname> | ||
13 | <affiliation> | ||
14 | <address> | ||
15 | <email>matthew@wil.cx</email> | ||
16 | </address> | ||
17 | </affiliation> | ||
18 | </author> | ||
19 | </authorgroup> | ||
20 | |||
21 | <authorgroup> | ||
22 | <author> | ||
23 | <firstname>Alan</firstname> | ||
24 | <surname>Cox</surname> | ||
25 | <affiliation> | ||
26 | <address> | ||
27 | <email>alan@redhat.com</email> | ||
28 | </address> | ||
29 | </affiliation> | ||
30 | </author> | ||
31 | </authorgroup> | ||
32 | |||
33 | <copyright> | ||
34 | <year>2001</year> | ||
35 | <holder>Matthew Wilcox</holder> | ||
36 | </copyright> | ||
37 | |||
38 | <legalnotice> | ||
39 | <para> | ||
40 | This documentation is free software; you can redistribute | ||
41 | it and/or modify it under the terms of the GNU General Public | ||
42 | License as published by the Free Software Foundation; either | ||
43 | version 2 of the License, or (at your option) any later | ||
44 | version. | ||
45 | </para> | ||
46 | |||
47 | <para> | ||
48 | This program is distributed in the hope that it will be | ||
49 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
50 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
51 | See the GNU General Public License for more details. | ||
52 | </para> | ||
53 | |||
54 | <para> | ||
55 | You should have received a copy of the GNU General Public | ||
56 | License along with this program; if not, write to the Free | ||
57 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
58 | MA 02111-1307 USA | ||
59 | </para> | ||
60 | |||
61 | <para> | ||
62 | For more details see the file COPYING in the source | ||
63 | distribution of Linux. | ||
64 | </para> | ||
65 | </legalnotice> | ||
66 | </bookinfo> | ||
67 | |||
68 | <toc></toc> | ||
69 | |||
70 | <chapter id="intro"> | ||
71 | <title>Introduction</title> | ||
72 | <para> | ||
73 | Linux provides an API which abstracts performing IO across all busses | ||
74 | and devices, allowing device drivers to be written independently of | ||
75 | bus type. | ||
76 | </para> | ||
77 | </chapter> | ||
78 | |||
79 | <chapter id="bugs"> | ||
80 | <title>Known Bugs And Assumptions</title> | ||
81 | <para> | ||
82 | None. | ||
83 | </para> | ||
84 | </chapter> | ||
85 | |||
86 | <chapter id="mmio"> | ||
87 | <title>Memory Mapped IO</title> | ||
88 | <sect1> | ||
89 | <title>Getting Access to the Device</title> | ||
90 | <para> | ||
91 | The most widely supported form of IO is memory mapped IO. | ||
92 | That is, a part of the CPU's address space is interpreted | ||
93 | not as accesses to memory, but as accesses to a device. Some | ||
94 | architectures define devices to be at a fixed address, but most | ||
95 | have some method of discovering devices. The PCI bus walk is a | ||
96 | good example of such a scheme. This document does not cover how | ||
97 | to receive such an address, but assumes you are starting with one. | ||
98 | Physical addresses are of type unsigned long. | ||
99 | </para> | ||
100 | |||
101 | <para> | ||
102 | This address should not be used directly. Instead, to get an | ||
103 | address suitable for passing to the accessor functions described | ||
104 | below, you should call <function>ioremap</function>. | ||
105 | An address suitable for accessing the device will be returned to you. | ||
106 | </para> | ||
107 | |||
108 | <para> | ||
109 | After you've finished using the device (say, in your module's | ||
110 | exit routine), call <function>iounmap</function> in order to return | ||
111 | the address space to the kernel. Most architectures allocate new | ||
112 | address space each time you call <function>ioremap</function>, and | ||
113 | they can run out unless you call <function>iounmap</function>. | ||
114 | </para> | ||
115 | </sect1> | ||
116 | |||
117 | <sect1> | ||
118 | <title>Accessing the device</title> | ||
119 | <para> | ||
120 | The part of the interface most used by drivers is reading and | ||
121 | writing memory-mapped registers on the device. Linux provides | ||
122 | interfaces to read and write 8-bit, 16-bit, 32-bit and 64-bit | ||
123 | quantities. Due to a historical accident, these are named byte, | ||
124 | word, long and quad accesses. Both read and write accesses are | ||
125 | supported; there is no prefetch support at this time. | ||
126 | </para> | ||
127 | |||
128 | <para> | ||
129 | The functions are named <function>readb</function>, | ||
130 | <function>readw</function>, <function>readl</function>, | ||
131 | <function>readq</function>, <function>readb_relaxed</function>, | ||
132 | <function>readw_relaxed</function>, <function>readl_relaxed</function>, | ||
133 | <function>readq_relaxed</function>, <function>writeb</function>, | ||
134 | <function>writew</function>, <function>writel</function> and | ||
135 | <function>writeq</function>. | ||
136 | </para> | ||
137 | |||
138 | <para> | ||
139 | Some devices (such as framebuffers) would like to use larger | ||
140 | transfers than 8 bytes at a time. For these devices, the | ||
141 | <function>memcpy_toio</function>, <function>memcpy_fromio</function> | ||
142 | and <function>memset_io</function> functions are provided. | ||
143 | Do not use memset or memcpy on IO addresses; they | ||
144 | are not guaranteed to copy data in order. | ||
145 | </para> | ||
146 | |||
147 | <para> | ||
148 | The read and write functions are defined to be ordered. That is the | ||
149 | compiler is not permitted to reorder the I/O sequence. When the | ||
150 | ordering can be compiler optimised, you can use <function> | ||
151 | __readb</function> and friends to indicate the relaxed ordering. Use | ||
152 | this with care. | ||
153 | </para> | ||
154 | |||
155 | <para> | ||
156 | While the basic functions are defined to be synchronous with respect | ||
157 | to each other and ordered with respect to each other the busses the | ||
158 | devices sit on may themselves have asynchronicity. In particular many | ||
159 | authors are burned by the fact that PCI bus writes are posted | ||
160 | asynchronously. A driver author must issue a read from the same | ||
161 | device to ensure that writes have occurred in the specific cases the | ||
162 | author cares. This kind of property cannot be hidden from driver | ||
163 | writers in the API. In some cases, the read used to flush the device | ||
164 | may be expected to fail (if the card is resetting, for example). In | ||
165 | that case, the read should be done from config space, which is | ||
166 | guaranteed to soft-fail if the card doesn't respond. | ||
167 | </para> | ||
168 | |||
169 | <para> | ||
170 | The following is an example of flushing a write to a device when | ||
171 | the driver would like to ensure the write's effects are visible prior | ||
172 | to continuing execution. | ||
173 | </para> | ||
174 | |||
175 | <programlisting> | ||
176 | static inline void | ||
177 | qla1280_disable_intrs(struct scsi_qla_host *ha) | ||
178 | { | ||
179 | struct device_reg *reg; | ||
180 | |||
181 | reg = ha->iobase; | ||
182 | /* disable risc and host interrupts */ | ||
183 | WRT_REG_WORD(&reg->ictrl, 0); | ||
184 | /* | ||
185 | * The following read will ensure that the above write | ||
186 | * has been received by the device before we return from this | ||
187 | * function. | ||
188 | */ | ||
189 | RD_REG_WORD(&reg->ictrl); | ||
190 | ha->flags.ints_enabled = 0; | ||
191 | } | ||
192 | </programlisting> | ||
193 | |||
194 | <para> | ||
195 | In addition to write posting, on some large multiprocessing systems | ||
196 | (e.g. SGI Challenge, Origin and Altix machines) posted writes won't | ||
197 | be strongly ordered coming from different CPUs. Thus it's important | ||
198 | to properly protect parts of your driver that do memory-mapped writes | ||
199 | with locks and use the <function>mmiowb</function> to make sure they | ||
200 | arrive in the order intended. Issuing a regular <function>readX | ||
201 | </function> will also ensure write ordering, but should only be used | ||
202 | when the driver has to be sure that the write has actually arrived | ||
203 | at the device (not that it's simply ordered with respect to other | ||
204 | writes), since a full <function>readX</function> is a relatively | ||
205 | expensive operation. | ||
206 | </para> | ||
207 | |||
208 | <para> | ||
209 | Generally, one should use <function>mmiowb</function> prior to | ||
210 | releasing a spinlock that protects regions using <function>writeb | ||
211 | </function> or similar functions that aren't surrounded by <function> | ||
212 | readb</function> calls, which will ensure ordering and flushing. The | ||
213 | following pseudocode illustrates what might occur if write ordering | ||
214 | isn't guaranteed via <function>mmiowb</function> or one of the | ||
215 | <function>readX</function> functions. | ||
216 | </para> | ||
217 | |||
218 | <programlisting> | ||
219 | CPU A: spin_lock_irqsave(&dev_lock, flags) | ||
220 | CPU A: ... | ||
221 | CPU A: writel(newval, ring_ptr); | ||
222 | CPU A: spin_unlock_irqrestore(&dev_lock, flags) | ||
223 | ... | ||
224 | CPU B: spin_lock_irqsave(&dev_lock, flags) | ||
225 | CPU B: writel(newval2, ring_ptr); | ||
226 | CPU B: ... | ||
227 | CPU B: spin_unlock_irqrestore(&dev_lock, flags) | ||
228 | </programlisting> | ||
229 | |||
230 | <para> | ||
231 | In the case above, newval2 could be written to ring_ptr before | ||
232 | newval. Fixing it is easy though: | ||
233 | </para> | ||
234 | |||
235 | <programlisting> | ||
236 | CPU A: spin_lock_irqsave(&dev_lock, flags) | ||
237 | CPU A: ... | ||
238 | CPU A: writel(newval, ring_ptr); | ||
239 | CPU A: mmiowb(); /* ensure no other writes beat us to the device */ | ||
240 | CPU A: spin_unlock_irqrestore(&dev_lock, flags) | ||
241 | ... | ||
242 | CPU B: spin_lock_irqsave(&dev_lock, flags) | ||
243 | CPU B: writel(newval2, ring_ptr); | ||
244 | CPU B: ... | ||
245 | CPU B: mmiowb(); | ||
246 | CPU B: spin_unlock_irqrestore(&dev_lock, flags) | ||
247 | </programlisting> | ||
248 | |||
249 | <para> | ||
250 | See tg3.c for a real world example of how to use <function>mmiowb | ||
251 | </function> | ||
252 | </para> | ||
253 | |||
254 | <para> | ||
255 | PCI ordering rules also guarantee that PIO read responses arrive | ||
256 | after any outstanding DMA writes from that bus, since for some devices | ||
257 | the result of a <function>readb</function> call may signal to the | ||
258 | driver that a DMA transaction is complete. In many cases, however, | ||
259 | the driver may want to indicate that the next | ||
260 | <function>readb</function> call has no relation to any previous DMA | ||
261 | writes performed by the device. The driver can use | ||
262 | <function>readb_relaxed</function> for these cases, although only | ||
263 | some platforms will honor the relaxed semantics. Using the relaxed | ||
264 | read functions will provide significant performance benefits on | ||
265 | platforms that support it. The qla2xxx driver provides examples | ||
266 | of how to use <function>readX_relaxed</function>. In many cases, | ||
267 | a majority of the driver's <function>readX</function> calls can | ||
268 | safely be converted to <function>readX_relaxed</function> calls, since | ||
269 | only a few will indicate or depend on DMA completion. | ||
270 | </para> | ||
271 | </sect1> | ||
272 | |||
273 | <sect1> | ||
274 | <title>ISA legacy functions</title> | ||
275 | <para> | ||
276 | On older kernels (2.2 and earlier) the ISA bus could be read or | ||
277 | written with these functions and without ioremap being used. This is | ||
278 | no longer true in Linux 2.4. A set of equivalent functions exist for | ||
279 | easy legacy driver porting. The functions available are prefixed | ||
280 | with 'isa_' and are <function>isa_readb</function>, | ||
281 | <function>isa_writeb</function>, <function>isa_readw</function>, | ||
282 | <function>isa_writew</function>, <function>isa_readl</function>, | ||
283 | <function>isa_writel</function>, <function>isa_memcpy_fromio</function> | ||
284 | and <function>isa_memcpy_toio</function> | ||
285 | </para> | ||
286 | <para> | ||
287 | These functions should not be used in new drivers, and will | ||
288 | eventually be going away. | ||
289 | </para> | ||
290 | </sect1> | ||
291 | |||
292 | </chapter> | ||
293 | |||
294 | <chapter> | ||
295 | <title>Port Space Accesses</title> | ||
296 | <sect1> | ||
297 | <title>Port Space Explained</title> | ||
298 | |||
299 | <para> | ||
300 | Another form of IO commonly supported is Port Space. This is a | ||
301 | range of addresses separate to the normal memory address space. | ||
302 | Access to these addresses is generally not as fast as accesses | ||
303 | to the memory mapped addresses, and it also has a potentially | ||
304 | smaller address space. | ||
305 | </para> | ||
306 | |||
307 | <para> | ||
308 | Unlike memory mapped IO, no preparation is required | ||
309 | to access port space. | ||
310 | </para> | ||
311 | |||
312 | </sect1> | ||
313 | <sect1> | ||
314 | <title>Accessing Port Space</title> | ||
315 | <para> | ||
316 | Accesses to this space are provided through a set of functions | ||
317 | which allow 8-bit, 16-bit and 32-bit accesses; also | ||
318 | known as byte, word and long. These functions are | ||
319 | <function>inb</function>, <function>inw</function>, | ||
320 | <function>inl</function>, <function>outb</function>, | ||
321 | <function>outw</function> and <function>outl</function>. | ||
322 | </para> | ||
323 | |||
324 | <para> | ||
325 | Some variants are provided for these functions. Some devices | ||
326 | require that accesses to their ports are slowed down. This | ||
327 | functionality is provided by appending a <function>_p</function> | ||
328 | to the end of the function. There are also equivalents to memcpy. | ||
329 | The <function>ins</function> and <function>outs</function> | ||
330 | functions copy bytes, words or longs to the given port. | ||
331 | </para> | ||
332 | </sect1> | ||
333 | |||
334 | </chapter> | ||
335 | |||
336 | <chapter id="pubfunctions"> | ||
337 | <title>Public Functions Provided</title> | ||
338 | !Einclude/asm-i386/io.h | ||
339 | </chapter> | ||
340 | |||
341 | </book> | ||
diff --git a/Documentation/DocBook/gadget.tmpl b/Documentation/DocBook/gadget.tmpl new file mode 100644 index 000000000000..a34442436128 --- /dev/null +++ b/Documentation/DocBook/gadget.tmpl | |||
@@ -0,0 +1,752 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="USB-Gadget-API"> | ||
6 | <bookinfo> | ||
7 | <title>USB Gadget API for Linux</title> | ||
8 | <date>20 August 2004</date> | ||
9 | <edition>20 August 2004</edition> | ||
10 | |||
11 | <legalnotice> | ||
12 | <para> | ||
13 | This documentation is free software; you can redistribute | ||
14 | it and/or modify it under the terms of the GNU General Public | ||
15 | License as published by the Free Software Foundation; either | ||
16 | version 2 of the License, or (at your option) any later | ||
17 | version. | ||
18 | </para> | ||
19 | |||
20 | <para> | ||
21 | This program is distributed in the hope that it will be | ||
22 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
23 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
24 | See the GNU General Public License for more details. | ||
25 | </para> | ||
26 | |||
27 | <para> | ||
28 | You should have received a copy of the GNU General Public | ||
29 | License along with this program; if not, write to the Free | ||
30 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
31 | MA 02111-1307 USA | ||
32 | </para> | ||
33 | |||
34 | <para> | ||
35 | For more details see the file COPYING in the source | ||
36 | distribution of Linux. | ||
37 | </para> | ||
38 | </legalnotice> | ||
39 | <copyright> | ||
40 | <year>2003-2004</year> | ||
41 | <holder>David Brownell</holder> | ||
42 | </copyright> | ||
43 | |||
44 | <author> | ||
45 | <firstname>David</firstname> | ||
46 | <surname>Brownell</surname> | ||
47 | <affiliation> | ||
48 | <address><email>dbrownell@users.sourceforge.net</email></address> | ||
49 | </affiliation> | ||
50 | </author> | ||
51 | </bookinfo> | ||
52 | |||
53 | <toc></toc> | ||
54 | |||
55 | <chapter><title>Introduction</title> | ||
56 | |||
57 | <para>This document presents a Linux-USB "Gadget" | ||
58 | kernel mode | ||
59 | API, for use within peripherals and other USB devices | ||
60 | that embed Linux. | ||
61 | It provides an overview of the API structure, | ||
62 | and shows how that fits into a system development project. | ||
63 | This is the first such API released on Linux to address | ||
64 | a number of important problems, including: </para> | ||
65 | |||
66 | <itemizedlist> | ||
67 | <listitem><para>Supports USB 2.0, for high speed devices which | ||
68 | can stream data at several dozen megabytes per second. | ||
69 | </para></listitem> | ||
70 | <listitem><para>Handles devices with dozens of endpoints just as | ||
71 | well as ones with just two fixed-function ones. Gadget drivers | ||
72 | can be written so they're easy to port to new hardware. | ||
73 | </para></listitem> | ||
74 | <listitem><para>Flexible enough to expose more complex USB device | ||
75 | capabilities such as multiple configurations, multiple interfaces, | ||
76 | composite devices, | ||
77 | and alternate interface settings. | ||
78 | </para></listitem> | ||
79 | <listitem><para>USB "On-The-Go" (OTG) support, in conjunction | ||
80 | with updates to the Linux-USB host side. | ||
81 | </para></listitem> | ||
82 | <listitem><para>Sharing data structures and API models with the | ||
83 | Linux-USB host side API. This helps the OTG support, and | ||
84 | looks forward to more-symmetric frameworks (where the same | ||
85 | I/O model is used by both host and device side drivers). | ||
86 | </para></listitem> | ||
87 | <listitem><para>Minimalist, so it's easier to support new device | ||
88 | controller hardware. I/O processing doesn't imply large | ||
89 | demands for memory or CPU resources. | ||
90 | </para></listitem> | ||
91 | </itemizedlist> | ||
92 | |||
93 | |||
94 | <para>Most Linux developers will not be able to use this API, since they | ||
95 | have USB "host" hardware in a PC, workstation, or server. | ||
96 | Linux users with embedded systems are more likely to | ||
97 | have USB peripheral hardware. | ||
98 | To distinguish drivers running inside such hardware from the | ||
99 | more familiar Linux "USB device drivers", | ||
100 | which are host side proxies for the real USB devices, | ||
101 | a different term is used: | ||
102 | the drivers inside the peripherals are "USB gadget drivers". | ||
103 | In USB protocol interactions, the device driver is the master | ||
104 | (or "client driver") | ||
105 | and the gadget driver is the slave (or "function driver"). | ||
106 | </para> | ||
107 | |||
108 | <para>The gadget API resembles the host side Linux-USB API in that both | ||
109 | use queues of request objects to package I/O buffers, and those requests | ||
110 | may be submitted or canceled. | ||
111 | They share common definitions for the standard USB | ||
112 | <emphasis>Chapter 9</emphasis> messages, structures, and constants. | ||
113 | Also, both APIs bind and unbind drivers to devices. | ||
114 | The APIs differ in detail, since the host side's current | ||
115 | URB framework exposes a number of implementation details | ||
116 | and assumptions that are inappropriate for a gadget API. | ||
117 | While the model for control transfers and configuration | ||
118 | management is necessarily different (one side is a hardware-neutral master, | ||
119 | the other is a hardware-aware slave), the endpoint I/0 API used here | ||
120 | should also be usable for an overhead-reduced host side API. | ||
121 | </para> | ||
122 | |||
123 | </chapter> | ||
124 | |||
125 | <chapter id="structure"><title>Structure of Gadget Drivers</title> | ||
126 | |||
127 | <para>A system running inside a USB peripheral | ||
128 | normally has at least three layers inside the kernel to handle | ||
129 | USB protocol processing, and may have additional layers in | ||
130 | user space code. | ||
131 | The "gadget" API is used by the middle layer to interact | ||
132 | with the lowest level (which directly handles hardware). | ||
133 | </para> | ||
134 | |||
135 | <para>In Linux, from the bottom up, these layers are: | ||
136 | </para> | ||
137 | |||
138 | <variablelist> | ||
139 | |||
140 | <varlistentry> | ||
141 | <term><emphasis>USB Controller Driver</emphasis></term> | ||
142 | |||
143 | <listitem> | ||
144 | <para>This is the lowest software level. | ||
145 | It is the only layer that talks to hardware, | ||
146 | through registers, fifos, dma, irqs, and the like. | ||
147 | The <filename><linux/usb_gadget.h></filename> API abstracts | ||
148 | the peripheral controller endpoint hardware. | ||
149 | That hardware is exposed through endpoint objects, which accept | ||
150 | streams of IN/OUT buffers, and through callbacks that interact | ||
151 | with gadget drivers. | ||
152 | Since normal USB devices only have one upstream | ||
153 | port, they only have one of these drivers. | ||
154 | The controller driver can support any number of different | ||
155 | gadget drivers, but only one of them can be used at a time. | ||
156 | </para> | ||
157 | |||
158 | <para>Examples of such controller hardware include | ||
159 | the PCI-based NetChip 2280 USB 2.0 high speed controller, | ||
160 | the SA-11x0 or PXA-25x UDC (found within many PDAs), | ||
161 | and a variety of other products. | ||
162 | </para> | ||
163 | |||
164 | </listitem></varlistentry> | ||
165 | |||
166 | <varlistentry> | ||
167 | <term><emphasis>Gadget Driver</emphasis></term> | ||
168 | |||
169 | <listitem> | ||
170 | <para>The lower boundary of this driver implements hardware-neutral | ||
171 | USB functions, using calls to the controller driver. | ||
172 | Because such hardware varies widely in capabilities and restrictions, | ||
173 | and is used in embedded environments where space is at a premium, | ||
174 | the gadget driver is often configured at compile time | ||
175 | to work with endpoints supported by one particular controller. | ||
176 | Gadget drivers may be portable to several different controllers, | ||
177 | using conditional compilation. | ||
178 | (Recent kernels substantially simplify the work involved in | ||
179 | supporting new hardware, by <emphasis>autoconfiguring</emphasis> | ||
180 | endpoints automatically for many bulk-oriented drivers.) | ||
181 | Gadget driver responsibilities include: | ||
182 | </para> | ||
183 | <itemizedlist> | ||
184 | <listitem><para>handling setup requests (ep0 protocol responses) | ||
185 | possibly including class-specific functionality | ||
186 | </para></listitem> | ||
187 | <listitem><para>returning configuration and string descriptors | ||
188 | </para></listitem> | ||
189 | <listitem><para>(re)setting configurations and interface | ||
190 | altsettings, including enabling and configuring endpoints | ||
191 | </para></listitem> | ||
192 | <listitem><para>handling life cycle events, such as managing | ||
193 | bindings to hardware, | ||
194 | USB suspend/resume, remote wakeup, | ||
195 | and disconnection from the USB host. | ||
196 | </para></listitem> | ||
197 | <listitem><para>managing IN and OUT transfers on all currently | ||
198 | enabled endpoints | ||
199 | </para></listitem> | ||
200 | </itemizedlist> | ||
201 | |||
202 | <para> | ||
203 | Such drivers may be modules of proprietary code, although | ||
204 | that approach is discouraged in the Linux community. | ||
205 | </para> | ||
206 | </listitem></varlistentry> | ||
207 | |||
208 | <varlistentry> | ||
209 | <term><emphasis>Upper Level</emphasis></term> | ||
210 | |||
211 | <listitem> | ||
212 | <para>Most gadget drivers have an upper boundary that connects | ||
213 | to some Linux driver or framework in Linux. | ||
214 | Through that boundary flows the data which the gadget driver | ||
215 | produces and/or consumes through protocol transfers over USB. | ||
216 | Examples include: | ||
217 | </para> | ||
218 | <itemizedlist> | ||
219 | <listitem><para>user mode code, using generic (gadgetfs) | ||
220 | or application specific files in | ||
221 | <filename>/dev</filename> | ||
222 | </para></listitem> | ||
223 | <listitem><para>networking subsystem (for network gadgets, | ||
224 | like the CDC Ethernet Model gadget driver) | ||
225 | </para></listitem> | ||
226 | <listitem><para>data capture drivers, perhaps video4Linux or | ||
227 | a scanner driver; or test and measurement hardware. | ||
228 | </para></listitem> | ||
229 | <listitem><para>input subsystem (for HID gadgets) | ||
230 | </para></listitem> | ||
231 | <listitem><para>sound subsystem (for audio gadgets) | ||
232 | </para></listitem> | ||
233 | <listitem><para>file system (for PTP gadgets) | ||
234 | </para></listitem> | ||
235 | <listitem><para>block i/o subsystem (for usb-storage gadgets) | ||
236 | </para></listitem> | ||
237 | <listitem><para>... and more </para></listitem> | ||
238 | </itemizedlist> | ||
239 | </listitem></varlistentry> | ||
240 | |||
241 | <varlistentry> | ||
242 | <term><emphasis>Additional Layers</emphasis></term> | ||
243 | |||
244 | <listitem> | ||
245 | <para>Other layers may exist. | ||
246 | These could include kernel layers, such as network protocol stacks, | ||
247 | as well as user mode applications building on standard POSIX | ||
248 | system call APIs such as | ||
249 | <emphasis>open()</emphasis>, <emphasis>close()</emphasis>, | ||
250 | <emphasis>read()</emphasis> and <emphasis>write()</emphasis>. | ||
251 | On newer systems, POSIX Async I/O calls may be an option. | ||
252 | Such user mode code will not necessarily be subject to | ||
253 | the GNU General Public License (GPL). | ||
254 | </para> | ||
255 | </listitem></varlistentry> | ||
256 | |||
257 | |||
258 | </variablelist> | ||
259 | |||
260 | <para>OTG-capable systems will also need to include a standard Linux-USB | ||
261 | host side stack, | ||
262 | with <emphasis>usbcore</emphasis>, | ||
263 | one or more <emphasis>Host Controller Drivers</emphasis> (HCDs), | ||
264 | <emphasis>USB Device Drivers</emphasis> to support | ||
265 | the OTG "Targeted Peripheral List", | ||
266 | and so forth. | ||
267 | There will also be an <emphasis>OTG Controller Driver</emphasis>, | ||
268 | which is visible to gadget and device driver developers only indirectly. | ||
269 | That helps the host and device side USB controllers implement the | ||
270 | two new OTG protocols (HNP and SRP). | ||
271 | Roles switch (host to peripheral, or vice versa) using HNP | ||
272 | during USB suspend processing, and SRP can be viewed as a | ||
273 | more battery-friendly kind of device wakeup protocol. | ||
274 | </para> | ||
275 | |||
276 | <para>Over time, reusable utilities are evolving to help make some | ||
277 | gadget driver tasks simpler. | ||
278 | For example, building configuration descriptors from vectors of | ||
279 | descriptors for the configurations interfaces and endpoints is | ||
280 | now automated, and many drivers now use autoconfiguration to | ||
281 | choose hardware endpoints and initialize their descriptors. | ||
282 | |||
283 | A potential example of particular interest | ||
284 | is code implementing standard USB-IF protocols for | ||
285 | HID, networking, storage, or audio classes. | ||
286 | Some developers are interested in KDB or KGDB hooks, to let | ||
287 | target hardware be remotely debugged. | ||
288 | Most such USB protocol code doesn't need to be hardware-specific, | ||
289 | any more than network protocols like X11, HTTP, or NFS are. | ||
290 | Such gadget-side interface drivers should eventually be combined, | ||
291 | to implement composite devices. | ||
292 | </para> | ||
293 | |||
294 | </chapter> | ||
295 | |||
296 | |||
297 | <chapter id="api"><title>Kernel Mode Gadget API</title> | ||
298 | |||
299 | <para>Gadget drivers declare themselves through a | ||
300 | <emphasis>struct usb_gadget_driver</emphasis>, which is responsible for | ||
301 | most parts of enumeration for a <emphasis>struct usb_gadget</emphasis>. | ||
302 | The response to a set_configuration usually involves | ||
303 | enabling one or more of the <emphasis>struct usb_ep</emphasis> objects | ||
304 | exposed by the gadget, and submitting one or more | ||
305 | <emphasis>struct usb_request</emphasis> buffers to transfer data. | ||
306 | Understand those four data types, and their operations, and | ||
307 | you will understand how this API works. | ||
308 | </para> | ||
309 | |||
310 | <note><title>Incomplete Data Type Descriptions</title> | ||
311 | |||
312 | <para>This documentation was prepared using the standard Linux | ||
313 | kernel <filename>docproc</filename> tool, which turns text | ||
314 | and in-code comments into SGML DocBook and then into usable | ||
315 | formats such as HTML or PDF. | ||
316 | Other than the "Chapter 9" data types, most of the significant | ||
317 | data types and functions are described here. | ||
318 | </para> | ||
319 | |||
320 | <para>However, docproc does not understand all the C constructs | ||
321 | that are used, so some relevant information is likely omitted from | ||
322 | what you are reading. | ||
323 | One example of such information is endpoint autoconfiguration. | ||
324 | You'll have to read the header file, and use example source | ||
325 | code (such as that for "Gadget Zero"), to fully understand the API. | ||
326 | </para> | ||
327 | |||
328 | <para>The part of the API implementing some basic | ||
329 | driver capabilities is specific to the version of the | ||
330 | Linux kernel that's in use. | ||
331 | The 2.6 kernel includes a <emphasis>driver model</emphasis> | ||
332 | framework that has no analogue on earlier kernels; | ||
333 | so those parts of the gadget API are not fully portable. | ||
334 | (They are implemented on 2.4 kernels, but in a different way.) | ||
335 | The driver model state is another part of this API that is | ||
336 | ignored by the kerneldoc tools. | ||
337 | </para> | ||
338 | </note> | ||
339 | |||
340 | <para>The core API does not expose | ||
341 | every possible hardware feature, only the most widely available ones. | ||
342 | There are significant hardware features, such as device-to-device DMA | ||
343 | (without temporary storage in a memory buffer) | ||
344 | that would be added using hardware-specific APIs. | ||
345 | </para> | ||
346 | |||
347 | <para>This API allows drivers to use conditional compilation to handle | ||
348 | endpoint capabilities of different hardware, but doesn't require that. | ||
349 | Hardware tends to have arbitrary restrictions, relating to | ||
350 | transfer types, addressing, packet sizes, buffering, and availability. | ||
351 | As a rule, such differences only matter for "endpoint zero" logic | ||
352 | that handles device configuration and management. | ||
353 | The API supports limited run-time | ||
354 | detection of capabilities, through naming conventions for endpoints. | ||
355 | Many drivers will be able to at least partially autoconfigure | ||
356 | themselves. | ||
357 | In particular, driver init sections will often have endpoint | ||
358 | autoconfiguration logic that scans the hardware's list of endpoints | ||
359 | to find ones matching the driver requirements | ||
360 | (relying on those conventions), to eliminate some of the most | ||
361 | common reasons for conditional compilation. | ||
362 | </para> | ||
363 | |||
364 | <para>Like the Linux-USB host side API, this API exposes | ||
365 | the "chunky" nature of USB messages: I/O requests are in terms | ||
366 | of one or more "packets", and packet boundaries are visible to drivers. | ||
367 | Compared to RS-232 serial protocols, USB resembles | ||
368 | synchronous protocols like HDLC | ||
369 | (N bytes per frame, multipoint addressing, host as the primary | ||
370 | station and devices as secondary stations) | ||
371 | more than asynchronous ones | ||
372 | (tty style: 8 data bits per frame, no parity, one stop bit). | ||
373 | So for example the controller drivers won't buffer | ||
374 | two single byte writes into a single two-byte USB IN packet, | ||
375 | although gadget drivers may do so when they implement | ||
376 | protocols where packet boundaries (and "short packets") | ||
377 | are not significant. | ||
378 | </para> | ||
379 | |||
380 | <sect1 id="lifecycle"><title>Driver Life Cycle</title> | ||
381 | |||
382 | <para>Gadget drivers make endpoint I/O requests to hardware without | ||
383 | needing to know many details of the hardware, but driver | ||
384 | setup/configuration code needs to handle some differences. | ||
385 | Use the API like this: | ||
386 | </para> | ||
387 | |||
388 | <orderedlist numeration='arabic'> | ||
389 | |||
390 | <listitem><para>Register a driver for the particular device side | ||
391 | usb controller hardware, | ||
392 | such as the net2280 on PCI (USB 2.0), | ||
393 | sa11x0 or pxa25x as found in Linux PDAs, | ||
394 | and so on. | ||
395 | At this point the device is logically in the USB ch9 initial state | ||
396 | ("attached"), drawing no power and not usable | ||
397 | (since it does not yet support enumeration). | ||
398 | Any host should not see the device, since it's not | ||
399 | activated the data line pullup used by the host to | ||
400 | detect a device, even if VBUS power is available. | ||
401 | </para></listitem> | ||
402 | |||
403 | <listitem><para>Register a gadget driver that implements some higher level | ||
404 | device function. That will then bind() to a usb_gadget, which | ||
405 | activates the data line pullup sometime after detecting VBUS. | ||
406 | </para></listitem> | ||
407 | |||
408 | <listitem><para>The hardware driver can now start enumerating. | ||
409 | The steps it handles are to accept USB power and set_address requests. | ||
410 | Other steps are handled by the gadget driver. | ||
411 | If the gadget driver module is unloaded before the host starts to | ||
412 | enumerate, steps before step 7 are skipped. | ||
413 | </para></listitem> | ||
414 | |||
415 | <listitem><para>The gadget driver's setup() call returns usb descriptors, | ||
416 | based both on what the bus interface hardware provides and on the | ||
417 | functionality being implemented. | ||
418 | That can involve alternate settings or configurations, | ||
419 | unless the hardware prevents such operation. | ||
420 | For OTG devices, each configuration descriptor includes | ||
421 | an OTG descriptor. | ||
422 | </para></listitem> | ||
423 | |||
424 | <listitem><para>The gadget driver handles the last step of enumeration, | ||
425 | when the USB host issues a set_configuration call. | ||
426 | It enables all endpoints used in that configuration, | ||
427 | with all interfaces in their default settings. | ||
428 | That involves using a list of the hardware's endpoints, enabling each | ||
429 | endpoint according to its descriptor. | ||
430 | It may also involve using <function>usb_gadget_vbus_draw</function> | ||
431 | to let more power be drawn from VBUS, as allowed by that configuration. | ||
432 | For OTG devices, setting a configuration may also involve reporting | ||
433 | HNP capabilities through a user interface. | ||
434 | </para></listitem> | ||
435 | |||
436 | <listitem><para>Do real work and perform data transfers, possibly involving | ||
437 | changes to interface settings or switching to new configurations, until the | ||
438 | device is disconnect()ed from the host. | ||
439 | Queue any number of transfer requests to each endpoint. | ||
440 | It may be suspended and resumed several times before being disconnected. | ||
441 | On disconnect, the drivers go back to step 3 (above). | ||
442 | </para></listitem> | ||
443 | |||
444 | <listitem><para>When the gadget driver module is being unloaded, | ||
445 | the driver unbind() callback is issued. That lets the controller | ||
446 | driver be unloaded. | ||
447 | </para></listitem> | ||
448 | |||
449 | </orderedlist> | ||
450 | |||
451 | <para>Drivers will normally be arranged so that just loading the | ||
452 | gadget driver module (or statically linking it into a Linux kernel) | ||
453 | allows the peripheral device to be enumerated, but some drivers | ||
454 | will defer enumeration until some higher level component (like | ||
455 | a user mode daemon) enables it. | ||
456 | Note that at this lowest level there are no policies about how | ||
457 | ep0 configuration logic is implemented, | ||
458 | except that it should obey USB specifications. | ||
459 | Such issues are in the domain of gadget drivers, | ||
460 | including knowing about implementation constraints | ||
461 | imposed by some USB controllers | ||
462 | or understanding that composite devices might happen to | ||
463 | be built by integrating reusable components. | ||
464 | </para> | ||
465 | |||
466 | <para>Note that the lifecycle above can be slightly different | ||
467 | for OTG devices. | ||
468 | Other than providing an additional OTG descriptor in each | ||
469 | configuration, only the HNP-related differences are particularly | ||
470 | visible to driver code. | ||
471 | They involve reporting requirements during the SET_CONFIGURATION | ||
472 | request, and the option to invoke HNP during some suspend callbacks. | ||
473 | Also, SRP changes the semantics of | ||
474 | <function>usb_gadget_wakeup</function> | ||
475 | slightly. | ||
476 | </para> | ||
477 | |||
478 | </sect1> | ||
479 | |||
480 | <sect1 id="ch9"><title>USB 2.0 Chapter 9 Types and Constants</title> | ||
481 | |||
482 | <para>Gadget drivers | ||
483 | rely on common USB structures and constants | ||
484 | defined in the | ||
485 | <filename><linux/usb_ch9.h></filename> | ||
486 | header file, which is standard in Linux 2.6 kernels. | ||
487 | These are the same types and constants used by host | ||
488 | side drivers (and usbcore). | ||
489 | </para> | ||
490 | |||
491 | !Iinclude/linux/usb_ch9.h | ||
492 | </sect1> | ||
493 | |||
494 | <sect1 id="core"><title>Core Objects and Methods</title> | ||
495 | |||
496 | <para>These are declared in | ||
497 | <filename><linux/usb_gadget.h></filename>, | ||
498 | and are used by gadget drivers to interact with | ||
499 | USB peripheral controller drivers. | ||
500 | </para> | ||
501 | |||
502 | <!-- yeech, this is ugly in nsgmls PDF output. | ||
503 | |||
504 | the PDF bookmark and refentry output nesting is wrong, | ||
505 | and the member/argument documentation indents ugly. | ||
506 | |||
507 | plus something (docproc?) adds whitespace before the | ||
508 | descriptive paragraph text, so it can't line up right | ||
509 | unless the explanations are trivial. | ||
510 | --> | ||
511 | |||
512 | !Iinclude/linux/usb_gadget.h | ||
513 | </sect1> | ||
514 | |||
515 | <sect1 id="utils"><title>Optional Utilities</title> | ||
516 | |||
517 | <para>The core API is sufficient for writing a USB Gadget Driver, | ||
518 | but some optional utilities are provided to simplify common tasks. | ||
519 | These utilities include endpoint autoconfiguration. | ||
520 | </para> | ||
521 | |||
522 | !Edrivers/usb/gadget/usbstring.c | ||
523 | !Edrivers/usb/gadget/config.c | ||
524 | <!-- !Edrivers/usb/gadget/epautoconf.c --> | ||
525 | </sect1> | ||
526 | |||
527 | </chapter> | ||
528 | |||
529 | <chapter id="controllers"><title>Peripheral Controller Drivers</title> | ||
530 | |||
531 | <para>The first hardware supporting this API was the NetChip 2280 | ||
532 | controller, which supports USB 2.0 high speed and is based on PCI. | ||
533 | This is the <filename>net2280</filename> driver module. | ||
534 | The driver supports Linux kernel versions 2.4 and 2.6; | ||
535 | contact NetChip Technologies for development boards and product | ||
536 | information. | ||
537 | </para> | ||
538 | |||
539 | <para>Other hardware working in the "gadget" framework includes: | ||
540 | Intel's PXA 25x and IXP42x series processors | ||
541 | (<filename>pxa2xx_udc</filename>), | ||
542 | Toshiba TC86c001 "Goku-S" (<filename>goku_udc</filename>), | ||
543 | Renesas SH7705/7727 (<filename>sh_udc</filename>), | ||
544 | MediaQ 11xx (<filename>mq11xx_udc</filename>), | ||
545 | Hynix HMS30C7202 (<filename>h7202_udc</filename>), | ||
546 | National 9303/4 (<filename>n9604_udc</filename>), | ||
547 | Texas Instruments OMAP (<filename>omap_udc</filename>), | ||
548 | Sharp LH7A40x (<filename>lh7a40x_udc</filename>), | ||
549 | and more. | ||
550 | Most of those are full speed controllers. | ||
551 | </para> | ||
552 | |||
553 | <para>At this writing, there are people at work on drivers in | ||
554 | this framework for several other USB device controllers, | ||
555 | with plans to make many of them be widely available. | ||
556 | </para> | ||
557 | |||
558 | <!-- !Edrivers/usb/gadget/net2280.c --> | ||
559 | |||
560 | <para>A partial USB simulator, | ||
561 | the <filename>dummy_hcd</filename> driver, is available. | ||
562 | It can act like a net2280, a pxa25x, or an sa11x0 in terms | ||
563 | of available endpoints and device speeds; and it simulates | ||
564 | control, bulk, and to some extent interrupt transfers. | ||
565 | That lets you develop some parts of a gadget driver on a normal PC, | ||
566 | without any special hardware, and perhaps with the assistance | ||
567 | of tools such as GDB running with User Mode Linux. | ||
568 | At least one person has expressed interest in adapting that | ||
569 | approach, hooking it up to a simulator for a microcontroller. | ||
570 | Such simulators can help debug subsystems where the runtime hardware | ||
571 | is unfriendly to software development, or is not yet available. | ||
572 | </para> | ||
573 | |||
574 | <para>Support for other controllers is expected to be developed | ||
575 | and contributed | ||
576 | over time, as this driver framework evolves. | ||
577 | </para> | ||
578 | |||
579 | </chapter> | ||
580 | |||
581 | <chapter id="gadget"><title>Gadget Drivers</title> | ||
582 | |||
583 | <para>In addition to <emphasis>Gadget Zero</emphasis> | ||
584 | (used primarily for testing and development with drivers | ||
585 | for usb controller hardware), other gadget drivers exist. | ||
586 | </para> | ||
587 | |||
588 | <para>There's an <emphasis>ethernet</emphasis> gadget | ||
589 | driver, which implements one of the most useful | ||
590 | <emphasis>Communications Device Class</emphasis> (CDC) models. | ||
591 | One of the standards for cable modem interoperability even | ||
592 | specifies the use of this ethernet model as one of two | ||
593 | mandatory options. | ||
594 | Gadgets using this code look to a USB host as if they're | ||
595 | an Ethernet adapter. | ||
596 | It provides access to a network where the gadget's CPU is one host, | ||
597 | which could easily be bridging, routing, or firewalling | ||
598 | access to other networks. | ||
599 | Since some hardware can't fully implement the CDC Ethernet | ||
600 | requirements, this driver also implements a "good parts only" | ||
601 | subset of CDC Ethernet. | ||
602 | (That subset doesn't advertise itself as CDC Ethernet, | ||
603 | to avoid creating problems.) | ||
604 | </para> | ||
605 | |||
606 | <para>Support for Microsoft's <emphasis>RNDIS</emphasis> | ||
607 | protocol has been contributed by Pengutronix and Auerswald GmbH. | ||
608 | This is like CDC Ethernet, but it runs on more slightly USB hardware | ||
609 | (but less than the CDC subset). | ||
610 | However, its main claim to fame is being able to connect directly to | ||
611 | recent versions of Windows, using drivers that Microsoft bundles | ||
612 | and supports, making it much simpler to network with Windows. | ||
613 | </para> | ||
614 | |||
615 | <para>There is also support for user mode gadget drivers, | ||
616 | using <emphasis>gadgetfs</emphasis>. | ||
617 | This provides a <emphasis>User Mode API</emphasis> that presents | ||
618 | each endpoint as a single file descriptor. I/O is done using | ||
619 | normal <emphasis>read()</emphasis> and <emphasis>read()</emphasis> calls. | ||
620 | Familiar tools like GDB and pthreads can be used to | ||
621 | develop and debug user mode drivers, so that once a robust | ||
622 | controller driver is available many applications for it | ||
623 | won't require new kernel mode software. | ||
624 | Linux 2.6 <emphasis>Async I/O (AIO)</emphasis> | ||
625 | support is available, so that user mode software | ||
626 | can stream data with only slightly more overhead | ||
627 | than a kernel driver. | ||
628 | </para> | ||
629 | |||
630 | <para>There's a USB Mass Storage class driver, which provides | ||
631 | a different solution for interoperability with systems such | ||
632 | as MS-Windows and MacOS. | ||
633 | That <emphasis>File-backed Storage</emphasis> driver uses a | ||
634 | file or block device as backing store for a drive, | ||
635 | like the <filename>loop</filename> driver. | ||
636 | The USB host uses the BBB, CB, or CBI versions of the mass | ||
637 | storage class specification, using transparent SCSI commands | ||
638 | to access the data from the backing store. | ||
639 | </para> | ||
640 | |||
641 | <para>There's a "serial line" driver, useful for TTY style | ||
642 | operation over USB. | ||
643 | The latest version of that driver supports CDC ACM style | ||
644 | operation, like a USB modem, and so on most hardware it can | ||
645 | interoperate easily with MS-Windows. | ||
646 | One interesting use of that driver is in boot firmware (like a BIOS), | ||
647 | which can sometimes use that model with very small systems without | ||
648 | real serial lines. | ||
649 | </para> | ||
650 | |||
651 | <para>Support for other kinds of gadget is expected to | ||
652 | be developed and contributed | ||
653 | over time, as this driver framework evolves. | ||
654 | </para> | ||
655 | |||
656 | </chapter> | ||
657 | |||
658 | <chapter id="otg"><title>USB On-The-GO (OTG)</title> | ||
659 | |||
660 | <para>USB OTG support on Linux 2.6 was initially developed | ||
661 | by Texas Instruments for | ||
662 | <ulink url="http://www.omap.com">OMAP</ulink> 16xx and 17xx | ||
663 | series processors. | ||
664 | Other OTG systems should work in similar ways, but the | ||
665 | hardware level details could be very different. | ||
666 | </para> | ||
667 | |||
668 | <para>Systems need specialized hardware support to implement OTG, | ||
669 | notably including a special <emphasis>Mini-AB</emphasis> jack | ||
670 | and associated transciever to support <emphasis>Dual-Role</emphasis> | ||
671 | operation: | ||
672 | they can act either as a host, using the standard | ||
673 | Linux-USB host side driver stack, | ||
674 | or as a peripheral, using this "gadget" framework. | ||
675 | To do that, the system software relies on small additions | ||
676 | to those programming interfaces, | ||
677 | and on a new internal component (here called an "OTG Controller") | ||
678 | affecting which driver stack connects to the OTG port. | ||
679 | In each role, the system can re-use the existing pool of | ||
680 | hardware-neutral drivers, layered on top of the controller | ||
681 | driver interfaces (<emphasis>usb_bus</emphasis> or | ||
682 | <emphasis>usb_gadget</emphasis>). | ||
683 | Such drivers need at most minor changes, and most of the calls | ||
684 | added to support OTG can also benefit non-OTG products. | ||
685 | </para> | ||
686 | |||
687 | <itemizedlist> | ||
688 | <listitem><para>Gadget drivers test the <emphasis>is_otg</emphasis> | ||
689 | flag, and use it to determine whether or not to include | ||
690 | an OTG descriptor in each of their configurations. | ||
691 | </para></listitem> | ||
692 | <listitem><para>Gadget drivers may need changes to support the | ||
693 | two new OTG protocols, exposed in new gadget attributes | ||
694 | such as <emphasis>b_hnp_enable</emphasis> flag. | ||
695 | HNP support should be reported through a user interface | ||
696 | (two LEDs could suffice), and is triggered in some cases | ||
697 | when the host suspends the peripheral. | ||
698 | SRP support can be user-initiated just like remote wakeup, | ||
699 | probably by pressing the same button. | ||
700 | </para></listitem> | ||
701 | <listitem><para>On the host side, USB device drivers need | ||
702 | to be taught to trigger HNP at appropriate moments, using | ||
703 | <function>usb_suspend_device()</function>. | ||
704 | That also conserves battery power, which is useful even | ||
705 | for non-OTG configurations. | ||
706 | </para></listitem> | ||
707 | <listitem><para>Also on the host side, a driver must support the | ||
708 | OTG "Targeted Peripheral List". That's just a whitelist, | ||
709 | used to reject peripherals not supported with a given | ||
710 | Linux OTG host. | ||
711 | <emphasis>This whitelist is product-specific; | ||
712 | each product must modify <filename>otg_whitelist.h</filename> | ||
713 | to match its interoperability specification. | ||
714 | </emphasis> | ||
715 | </para> | ||
716 | <para>Non-OTG Linux hosts, like PCs and workstations, | ||
717 | normally have some solution for adding drivers, so that | ||
718 | peripherals that aren't recognized can eventually be supported. | ||
719 | That approach is unreasonable for consumer products that may | ||
720 | never have their firmware upgraded, and where it's usually | ||
721 | unrealistic to expect traditional PC/workstation/server kinds | ||
722 | of support model to work. | ||
723 | For example, it's often impractical to change device firmware | ||
724 | once the product has been distributed, so driver bugs can't | ||
725 | normally be fixed if they're found after shipment. | ||
726 | </para></listitem> | ||
727 | </itemizedlist> | ||
728 | |||
729 | <para> | ||
730 | Additional changes are needed below those hardware-neutral | ||
731 | <emphasis>usb_bus</emphasis> and <emphasis>usb_gadget</emphasis> | ||
732 | driver interfaces; those aren't discussed here in any detail. | ||
733 | Those affect the hardware-specific code for each USB Host or Peripheral | ||
734 | controller, and how the HCD initializes (since OTG can be active only | ||
735 | on a single port). | ||
736 | They also involve what may be called an <emphasis>OTG Controller | ||
737 | Driver</emphasis>, managing the OTG transceiver and the OTG state | ||
738 | machine logic as well as much of the root hub behavior for the | ||
739 | OTG port. | ||
740 | The OTG controller driver needs to activate and deactivate USB | ||
741 | controllers depending on the relevant device role. | ||
742 | Some related changes were needed inside usbcore, so that it | ||
743 | can identify OTG-capable devices and respond appropriately | ||
744 | to HNP or SRP protocols. | ||
745 | </para> | ||
746 | |||
747 | </chapter> | ||
748 | |||
749 | </book> | ||
750 | <!-- | ||
751 | vim:syntax=sgml:sw=4 | ||
752 | --> | ||
diff --git a/Documentation/DocBook/journal-api.tmpl b/Documentation/DocBook/journal-api.tmpl new file mode 100644 index 000000000000..1ef6f43c6d8f --- /dev/null +++ b/Documentation/DocBook/journal-api.tmpl | |||
@@ -0,0 +1,333 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="LinuxJBDAPI"> | ||
6 | <bookinfo> | ||
7 | <title>The Linux Journalling API</title> | ||
8 | <authorgroup> | ||
9 | <author> | ||
10 | <firstname>Roger</firstname> | ||
11 | <surname>Gammans</surname> | ||
12 | <affiliation> | ||
13 | <address> | ||
14 | <email>rgammans@computer-surgery.co.uk</email> | ||
15 | </address> | ||
16 | </affiliation> | ||
17 | </author> | ||
18 | </authorgroup> | ||
19 | |||
20 | <authorgroup> | ||
21 | <author> | ||
22 | <firstname>Stephen</firstname> | ||
23 | <surname>Tweedie</surname> | ||
24 | <affiliation> | ||
25 | <address> | ||
26 | <email>sct@redhat.com</email> | ||
27 | </address> | ||
28 | </affiliation> | ||
29 | </author> | ||
30 | </authorgroup> | ||
31 | |||
32 | <copyright> | ||
33 | <year>2002</year> | ||
34 | <holder>Roger Gammans</holder> | ||
35 | </copyright> | ||
36 | |||
37 | <legalnotice> | ||
38 | <para> | ||
39 | This documentation is free software; you can redistribute | ||
40 | it and/or modify it under the terms of the GNU General Public | ||
41 | License as published by the Free Software Foundation; either | ||
42 | version 2 of the License, or (at your option) any later | ||
43 | version. | ||
44 | </para> | ||
45 | |||
46 | <para> | ||
47 | This program is distributed in the hope that it will be | ||
48 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
49 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
50 | See the GNU General Public License for more details. | ||
51 | </para> | ||
52 | |||
53 | <para> | ||
54 | You should have received a copy of the GNU General Public | ||
55 | License along with this program; if not, write to the Free | ||
56 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
57 | MA 02111-1307 USA | ||
58 | </para> | ||
59 | |||
60 | <para> | ||
61 | For more details see the file COPYING in the source | ||
62 | distribution of Linux. | ||
63 | </para> | ||
64 | </legalnotice> | ||
65 | </bookinfo> | ||
66 | |||
67 | <toc></toc> | ||
68 | |||
69 | <chapter id="Overview"> | ||
70 | <title>Overview</title> | ||
71 | <sect1> | ||
72 | <title>Details</title> | ||
73 | <para> | ||
74 | The journalling layer is easy to use. You need to | ||
75 | first of all create a journal_t data structure. There are | ||
76 | two calls to do this dependent on how you decide to allocate the physical | ||
77 | media on which the journal resides. The journal_init_inode() call | ||
78 | is for journals stored in filesystem inodes, or the journal_init_dev() | ||
79 | call can be use for journal stored on a raw device (in a continuous range | ||
80 | of blocks). A journal_t is a typedef for a struct pointer, so when | ||
81 | you are finally finished make sure you call journal_destroy() on it | ||
82 | to free up any used kernel memory. | ||
83 | </para> | ||
84 | |||
85 | <para> | ||
86 | Once you have got your journal_t object you need to 'mount' or load the journal | ||
87 | file, unless of course you haven't initialised it yet - in which case you | ||
88 | need to call journal_create(). | ||
89 | </para> | ||
90 | |||
91 | <para> | ||
92 | Most of the time however your journal file will already have been created, but | ||
93 | before you load it you must call journal_wipe() to empty the journal file. | ||
94 | Hang on, you say , what if the filesystem wasn't cleanly umount()'d . Well, it is the | ||
95 | job of the client file system to detect this and skip the call to journal_wipe(). | ||
96 | </para> | ||
97 | |||
98 | <para> | ||
99 | In either case the next call should be to journal_load() which prepares the | ||
100 | journal file for use. Note that journal_wipe(..,0) calls journal_skip_recovery() | ||
101 | for you if it detects any outstanding transactions in the journal and similarly | ||
102 | journal_load() will call journal_recover() if necessary. | ||
103 | I would advise reading fs/ext3/super.c for examples on this stage. | ||
104 | [RGG: Why is the journal_wipe() call necessary - doesn't this needlessly | ||
105 | complicate the API. Or isn't a good idea for the journal layer to hide | ||
106 | dirty mounts from the client fs] | ||
107 | </para> | ||
108 | |||
109 | <para> | ||
110 | Now you can go ahead and start modifying the underlying | ||
111 | filesystem. Almost. | ||
112 | </para> | ||
113 | |||
114 | |||
115 | <para> | ||
116 | |||
117 | You still need to actually journal your filesystem changes, this | ||
118 | is done by wrapping them into transactions. Additionally you | ||
119 | also need to wrap the modification of each of the the buffers | ||
120 | with calls to the journal layer, so it knows what the modifications | ||
121 | you are actually making are. To do this use journal_start() which | ||
122 | returns a transaction handle. | ||
123 | </para> | ||
124 | |||
125 | <para> | ||
126 | journal_start() | ||
127 | and its counterpart journal_stop(), which indicates the end of a transaction | ||
128 | are nestable calls, so you can reenter a transaction if necessary, | ||
129 | but remember you must call journal_stop() the same number of times as | ||
130 | journal_start() before the transaction is completed (or more accurately | ||
131 | leaves the the update phase). Ext3/VFS makes use of this feature to simplify | ||
132 | quota support. | ||
133 | </para> | ||
134 | |||
135 | <para> | ||
136 | Inside each transaction you need to wrap the modifications to the | ||
137 | individual buffers (blocks). Before you start to modify a buffer you | ||
138 | need to call journal_get_{create,write,undo}_access() as appropriate, | ||
139 | this allows the journalling layer to copy the unmodified data if it | ||
140 | needs to. After all the buffer may be part of a previously uncommitted | ||
141 | transaction. | ||
142 | At this point you are at last ready to modify a buffer, and once | ||
143 | you are have done so you need to call journal_dirty_{meta,}data(). | ||
144 | Or if you've asked for access to a buffer you now know is now longer | ||
145 | required to be pushed back on the device you can call journal_forget() | ||
146 | in much the same way as you might have used bforget() in the past. | ||
147 | </para> | ||
148 | |||
149 | <para> | ||
150 | A journal_flush() may be called at any time to commit and checkpoint | ||
151 | all your transactions. | ||
152 | </para> | ||
153 | |||
154 | <para> | ||
155 | Then at umount time , in your put_super() (2.4) or write_super() (2.5) | ||
156 | you can then call journal_destroy() to clean up your in-core journal object. | ||
157 | </para> | ||
158 | |||
159 | |||
160 | <para> | ||
161 | Unfortunately there a couple of ways the journal layer can cause a deadlock. | ||
162 | The first thing to note is that each task can only have | ||
163 | a single outstanding transaction at any one time, remember nothing | ||
164 | commits until the outermost journal_stop(). This means | ||
165 | you must complete the transaction at the end of each file/inode/address | ||
166 | etc. operation you perform, so that the journalling system isn't re-entered | ||
167 | on another journal. Since transactions can't be nested/batched | ||
168 | across differing journals, and another filesystem other than | ||
169 | yours (say ext3) may be modified in a later syscall. | ||
170 | </para> | ||
171 | |||
172 | <para> | ||
173 | The second case to bear in mind is that journal_start() can | ||
174 | block if there isn't enough space in the journal for your transaction | ||
175 | (based on the passed nblocks param) - when it blocks it merely(!) needs to | ||
176 | wait for transactions to complete and be committed from other tasks, | ||
177 | so essentially we are waiting for journal_stop(). So to avoid | ||
178 | deadlocks you must treat journal_start/stop() as if they | ||
179 | were semaphores and include them in your semaphore ordering rules to prevent | ||
180 | deadlocks. Note that journal_extend() has similar blocking behaviour to | ||
181 | journal_start() so you can deadlock here just as easily as on journal_start(). | ||
182 | </para> | ||
183 | |||
184 | <para> | ||
185 | Try to reserve the right number of blocks the first time. ;-). This will | ||
186 | be the maximum number of blocks you are going to touch in this transaction. | ||
187 | I advise having a look at at least ext3_jbd.h to see the basis on which | ||
188 | ext3 uses to make these decisions. | ||
189 | </para> | ||
190 | |||
191 | <para> | ||
192 | Another wriggle to watch out for is your on-disk block allocation strategy. | ||
193 | why? Because, if you undo a delete, you need to ensure you haven't reused any | ||
194 | of the freed blocks in a later transaction. One simple way of doing this | ||
195 | is make sure any blocks you allocate only have checkpointed transactions | ||
196 | listed against them. Ext3 does this in ext3_test_allocatable(). | ||
197 | </para> | ||
198 | |||
199 | <para> | ||
200 | Lock is also providing through journal_{un,}lock_updates(), | ||
201 | ext3 uses this when it wants a window with a clean and stable fs for a moment. | ||
202 | eg. | ||
203 | </para> | ||
204 | |||
205 | <programlisting> | ||
206 | |||
207 | journal_lock_updates() //stop new stuff happening.. | ||
208 | journal_flush() // checkpoint everything. | ||
209 | ..do stuff on stable fs | ||
210 | journal_unlock_updates() // carry on with filesystem use. | ||
211 | </programlisting> | ||
212 | |||
213 | <para> | ||
214 | The opportunities for abuse and DOS attacks with this should be obvious, | ||
215 | if you allow unprivileged userspace to trigger codepaths containing these | ||
216 | calls. | ||
217 | </para> | ||
218 | |||
219 | <para> | ||
220 | A new feature of jbd since 2.5.25 is commit callbacks with the new | ||
221 | journal_callback_set() function you can now ask the journalling layer | ||
222 | to call you back when the transaction is finally committed to disk, so that | ||
223 | you can do some of your own management. The key to this is the journal_callback | ||
224 | struct, this maintains the internal callback information but you can | ||
225 | extend it like this:- | ||
226 | </para> | ||
227 | <programlisting> | ||
228 | struct myfs_callback_s { | ||
229 | //Data structure element required by jbd.. | ||
230 | struct journal_callback for_jbd; | ||
231 | // Stuff for myfs allocated together. | ||
232 | myfs_inode* i_commited; | ||
233 | |||
234 | } | ||
235 | </programlisting> | ||
236 | |||
237 | <para> | ||
238 | this would be useful if you needed to know when data was committed to a | ||
239 | particular inode. | ||
240 | </para> | ||
241 | |||
242 | </sect1> | ||
243 | |||
244 | <sect1> | ||
245 | <title>Summary</title> | ||
246 | <para> | ||
247 | Using the journal is a matter of wrapping the different context changes, | ||
248 | being each mount, each modification (transaction) and each changed buffer | ||
249 | to tell the journalling layer about them. | ||
250 | </para> | ||
251 | |||
252 | <para> | ||
253 | Here is a some pseudo code to give you an idea of how it works, as | ||
254 | an example. | ||
255 | </para> | ||
256 | |||
257 | <programlisting> | ||
258 | journal_t* my_jnrl = journal_create(); | ||
259 | journal_init_{dev,inode}(jnrl,...) | ||
260 | if (clean) journal_wipe(); | ||
261 | journal_load(); | ||
262 | |||
263 | foreach(transaction) { /*transactions must be | ||
264 | completed before | ||
265 | a syscall returns to | ||
266 | userspace*/ | ||
267 | |||
268 | handle_t * xct=journal_start(my_jnrl); | ||
269 | foreach(bh) { | ||
270 | journal_get_{create,write,undo}_access(xact,bh); | ||
271 | if ( myfs_modify(bh) ) { /* returns true | ||
272 | if makes changes */ | ||
273 | journal_dirty_{meta,}data(xact,bh); | ||
274 | } else { | ||
275 | journal_forget(bh); | ||
276 | } | ||
277 | } | ||
278 | journal_stop(xct); | ||
279 | } | ||
280 | journal_destroy(my_jrnl); | ||
281 | </programlisting> | ||
282 | </sect1> | ||
283 | |||
284 | </chapter> | ||
285 | |||
286 | <chapter id="adt"> | ||
287 | <title>Data Types</title> | ||
288 | <para> | ||
289 | The journalling layer uses typedefs to 'hide' the concrete definitions | ||
290 | of the structures used. As a client of the JBD layer you can | ||
291 | just rely on the using the pointer as a magic cookie of some sort. | ||
292 | |||
293 | Obviously the hiding is not enforced as this is 'C'. | ||
294 | </para> | ||
295 | <sect1><title>Structures</title> | ||
296 | !Iinclude/linux/jbd.h | ||
297 | </sect1> | ||
298 | </chapter> | ||
299 | |||
300 | <chapter id="calls"> | ||
301 | <title>Functions</title> | ||
302 | <para> | ||
303 | The functions here are split into two groups those that | ||
304 | affect a journal as a whole, and those which are used to | ||
305 | manage transactions | ||
306 | </para> | ||
307 | <sect1><title>Journal Level</title> | ||
308 | !Efs/jbd/journal.c | ||
309 | !Efs/jbd/recovery.c | ||
310 | </sect1> | ||
311 | <sect1><title>Transasction Level</title> | ||
312 | !Efs/jbd/transaction.c | ||
313 | </sect1> | ||
314 | </chapter> | ||
315 | <chapter> | ||
316 | <title>See also</title> | ||
317 | <para> | ||
318 | <citation> | ||
319 | <ulink url="ftp://ftp.uk.linux.org/pub/linux/sct/fs/jfs/journal-design.ps.gz"> | ||
320 | Journaling the Linux ext2fs Filesystem,LinuxExpo 98, Stephen Tweedie | ||
321 | </ulink> | ||
322 | </citation> | ||
323 | </para> | ||
324 | <para> | ||
325 | <citation> | ||
326 | <ulink url="http://olstrans.sourceforge.net/release/OLS2000-ext3/OLS2000-ext3.html"> | ||
327 | Ext3 Journalling FileSystem , OLS 2000, Dr. Stephen Tweedie | ||
328 | </ulink> | ||
329 | </citation> | ||
330 | </para> | ||
331 | </chapter> | ||
332 | |||
333 | </book> | ||
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl new file mode 100644 index 000000000000..1bd20c860285 --- /dev/null +++ b/Documentation/DocBook/kernel-api.tmpl | |||
@@ -0,0 +1,342 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="LinuxKernelAPI"> | ||
6 | <bookinfo> | ||
7 | <title>The Linux Kernel API</title> | ||
8 | |||
9 | <legalnotice> | ||
10 | <para> | ||
11 | This documentation is free software; you can redistribute | ||
12 | it and/or modify it under the terms of the GNU General Public | ||
13 | License as published by the Free Software Foundation; either | ||
14 | version 2 of the License, or (at your option) any later | ||
15 | version. | ||
16 | </para> | ||
17 | |||
18 | <para> | ||
19 | This program is distributed in the hope that it will be | ||
20 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
21 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
22 | See the GNU General Public License for more details. | ||
23 | </para> | ||
24 | |||
25 | <para> | ||
26 | You should have received a copy of the GNU General Public | ||
27 | License along with this program; if not, write to the Free | ||
28 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
29 | MA 02111-1307 USA | ||
30 | </para> | ||
31 | |||
32 | <para> | ||
33 | For more details see the file COPYING in the source | ||
34 | distribution of Linux. | ||
35 | </para> | ||
36 | </legalnotice> | ||
37 | </bookinfo> | ||
38 | |||
39 | <toc></toc> | ||
40 | |||
41 | <chapter id="Basics"> | ||
42 | <title>Driver Basics</title> | ||
43 | <sect1><title>Driver Entry and Exit points</title> | ||
44 | !Iinclude/linux/init.h | ||
45 | </sect1> | ||
46 | |||
47 | <sect1><title>Atomic and pointer manipulation</title> | ||
48 | !Iinclude/asm-i386/atomic.h | ||
49 | !Iinclude/asm-i386/unaligned.h | ||
50 | </sect1> | ||
51 | |||
52 | <!-- FIXME: | ||
53 | kernel/sched.c has no docs, which stuffs up the sgml. Comment | ||
54 | out until somebody adds docs. KAO | ||
55 | <sect1><title>Delaying, scheduling, and timer routines</title> | ||
56 | X!Ekernel/sched.c | ||
57 | </sect1> | ||
58 | KAO --> | ||
59 | </chapter> | ||
60 | |||
61 | <chapter id="adt"> | ||
62 | <title>Data Types</title> | ||
63 | <sect1><title>Doubly Linked Lists</title> | ||
64 | !Iinclude/linux/list.h | ||
65 | </sect1> | ||
66 | </chapter> | ||
67 | |||
68 | <chapter id="libc"> | ||
69 | <title>Basic C Library Functions</title> | ||
70 | |||
71 | <para> | ||
72 | When writing drivers, you cannot in general use routines which are | ||
73 | from the C Library. Some of the functions have been found generally | ||
74 | useful and they are listed below. The behaviour of these functions | ||
75 | may vary slightly from those defined by ANSI, and these deviations | ||
76 | are noted in the text. | ||
77 | </para> | ||
78 | |||
79 | <sect1><title>String Conversions</title> | ||
80 | !Ilib/vsprintf.c | ||
81 | !Elib/vsprintf.c | ||
82 | </sect1> | ||
83 | <sect1><title>String Manipulation</title> | ||
84 | !Ilib/string.c | ||
85 | !Elib/string.c | ||
86 | </sect1> | ||
87 | <sect1><title>Bit Operations</title> | ||
88 | !Iinclude/asm-i386/bitops.h | ||
89 | </sect1> | ||
90 | </chapter> | ||
91 | |||
92 | <chapter id="mm"> | ||
93 | <title>Memory Management in Linux</title> | ||
94 | <sect1><title>The Slab Cache</title> | ||
95 | !Emm/slab.c | ||
96 | </sect1> | ||
97 | <sect1><title>User Space Memory Access</title> | ||
98 | !Iinclude/asm-i386/uaccess.h | ||
99 | !Iarch/i386/lib/usercopy.c | ||
100 | </sect1> | ||
101 | </chapter> | ||
102 | |||
103 | <chapter id="kfifo"> | ||
104 | <title>FIFO Buffer</title> | ||
105 | <sect1><title>kfifo interface</title> | ||
106 | !Iinclude/linux/kfifo.h | ||
107 | !Ekernel/kfifo.c | ||
108 | </sect1> | ||
109 | </chapter> | ||
110 | |||
111 | <chapter id="proc"> | ||
112 | <title>The proc filesystem</title> | ||
113 | |||
114 | <sect1><title>sysctl interface</title> | ||
115 | !Ekernel/sysctl.c | ||
116 | </sect1> | ||
117 | </chapter> | ||
118 | |||
119 | <chapter id="debugfs"> | ||
120 | <title>The debugfs filesystem</title> | ||
121 | |||
122 | <sect1><title>debugfs interface</title> | ||
123 | !Efs/debugfs/inode.c | ||
124 | !Efs/debugfs/file.c | ||
125 | </sect1> | ||
126 | </chapter> | ||
127 | |||
128 | <chapter id="vfs"> | ||
129 | <title>The Linux VFS</title> | ||
130 | <sect1><title>The Directory Cache</title> | ||
131 | !Efs/dcache.c | ||
132 | !Iinclude/linux/dcache.h | ||
133 | </sect1> | ||
134 | <sect1><title>Inode Handling</title> | ||
135 | !Efs/inode.c | ||
136 | !Efs/bad_inode.c | ||
137 | </sect1> | ||
138 | <sect1><title>Registration and Superblocks</title> | ||
139 | !Efs/super.c | ||
140 | </sect1> | ||
141 | <sect1><title>File Locks</title> | ||
142 | !Efs/locks.c | ||
143 | !Ifs/locks.c | ||
144 | </sect1> | ||
145 | </chapter> | ||
146 | |||
147 | <chapter id="netcore"> | ||
148 | <title>Linux Networking</title> | ||
149 | <sect1><title>Socket Buffer Functions</title> | ||
150 | !Iinclude/linux/skbuff.h | ||
151 | !Enet/core/skbuff.c | ||
152 | </sect1> | ||
153 | <sect1><title>Socket Filter</title> | ||
154 | !Enet/core/filter.c | ||
155 | </sect1> | ||
156 | <sect1><title>Generic Network Statistics</title> | ||
157 | !Iinclude/linux/gen_stats.h | ||
158 | !Enet/core/gen_stats.c | ||
159 | !Enet/core/gen_estimator.c | ||
160 | </sect1> | ||
161 | </chapter> | ||
162 | |||
163 | <chapter id="netdev"> | ||
164 | <title>Network device support</title> | ||
165 | <sect1><title>Driver Support</title> | ||
166 | !Enet/core/dev.c | ||
167 | </sect1> | ||
168 | <sect1><title>8390 Based Network Cards</title> | ||
169 | !Edrivers/net/8390.c | ||
170 | </sect1> | ||
171 | <sect1><title>Synchronous PPP</title> | ||
172 | !Edrivers/net/wan/syncppp.c | ||
173 | </sect1> | ||
174 | </chapter> | ||
175 | |||
176 | <chapter id="modload"> | ||
177 | <title>Module Support</title> | ||
178 | <sect1><title>Module Loading</title> | ||
179 | !Ekernel/kmod.c | ||
180 | </sect1> | ||
181 | <sect1><title>Inter Module support</title> | ||
182 | <para> | ||
183 | Refer to the file kernel/module.c for more information. | ||
184 | </para> | ||
185 | <!-- FIXME: Removed for now since no structured comments in source | ||
186 | X!Ekernel/module.c | ||
187 | --> | ||
188 | </sect1> | ||
189 | </chapter> | ||
190 | |||
191 | <chapter id="hardware"> | ||
192 | <title>Hardware Interfaces</title> | ||
193 | <sect1><title>Interrupt Handling</title> | ||
194 | !Iarch/i386/kernel/irq.c | ||
195 | </sect1> | ||
196 | |||
197 | <sect1><title>MTRR Handling</title> | ||
198 | !Earch/i386/kernel/cpu/mtrr/main.c | ||
199 | </sect1> | ||
200 | <sect1><title>PCI Support Library</title> | ||
201 | !Edrivers/pci/pci.c | ||
202 | </sect1> | ||
203 | <sect1><title>PCI Hotplug Support Library</title> | ||
204 | !Edrivers/pci/hotplug/pci_hotplug_core.c | ||
205 | </sect1> | ||
206 | <sect1><title>MCA Architecture</title> | ||
207 | <sect2><title>MCA Device Functions</title> | ||
208 | <para> | ||
209 | Refer to the file arch/i386/kernel/mca.c for more information. | ||
210 | </para> | ||
211 | <!-- FIXME: Removed for now since no structured comments in source | ||
212 | X!Earch/i386/kernel/mca.c | ||
213 | --> | ||
214 | </sect2> | ||
215 | <sect2><title>MCA Bus DMA</title> | ||
216 | !Iinclude/asm-i386/mca_dma.h | ||
217 | </sect2> | ||
218 | </sect1> | ||
219 | </chapter> | ||
220 | |||
221 | <chapter id="devfs"> | ||
222 | <title>The Device File System</title> | ||
223 | !Efs/devfs/base.c | ||
224 | </chapter> | ||
225 | |||
226 | <chapter id="security"> | ||
227 | <title>Security Framework</title> | ||
228 | !Esecurity/security.c | ||
229 | </chapter> | ||
230 | |||
231 | <chapter id="pmfuncs"> | ||
232 | <title>Power Management</title> | ||
233 | !Ekernel/power/pm.c | ||
234 | </chapter> | ||
235 | |||
236 | <chapter id="blkdev"> | ||
237 | <title>Block Devices</title> | ||
238 | !Edrivers/block/ll_rw_blk.c | ||
239 | </chapter> | ||
240 | |||
241 | <chapter id="miscdev"> | ||
242 | <title>Miscellaneous Devices</title> | ||
243 | !Edrivers/char/misc.c | ||
244 | </chapter> | ||
245 | |||
246 | <chapter id="viddev"> | ||
247 | <title>Video4Linux</title> | ||
248 | !Edrivers/media/video/videodev.c | ||
249 | </chapter> | ||
250 | |||
251 | <chapter id="snddev"> | ||
252 | <title>Sound Devices</title> | ||
253 | !Esound/sound_core.c | ||
254 | <!-- FIXME: Removed for now since no structured comments in source | ||
255 | X!Isound/sound_firmware.c | ||
256 | --> | ||
257 | </chapter> | ||
258 | |||
259 | <chapter id="uart16x50"> | ||
260 | <title>16x50 UART Driver</title> | ||
261 | !Edrivers/serial/serial_core.c | ||
262 | !Edrivers/serial/8250.c | ||
263 | </chapter> | ||
264 | |||
265 | <chapter id="z85230"> | ||
266 | <title>Z85230 Support Library</title> | ||
267 | !Edrivers/net/wan/z85230.c | ||
268 | </chapter> | ||
269 | |||
270 | <chapter id="fbdev"> | ||
271 | <title>Frame Buffer Library</title> | ||
272 | |||
273 | <para> | ||
274 | The frame buffer drivers depend heavily on four data structures. | ||
275 | These structures are declared in include/linux/fb.h. They are | ||
276 | fb_info, fb_var_screeninfo, fb_fix_screeninfo and fb_monospecs. | ||
277 | The last three can be made available to and from userland. | ||
278 | </para> | ||
279 | |||
280 | <para> | ||
281 | fb_info defines the current state of a particular video card. | ||
282 | Inside fb_info, there exists a fb_ops structure which is a | ||
283 | collection of needed functions to make fbdev and fbcon work. | ||
284 | fb_info is only visible to the kernel. | ||
285 | </para> | ||
286 | |||
287 | <para> | ||
288 | fb_var_screeninfo is used to describe the features of a video card | ||
289 | that are user defined. With fb_var_screeninfo, things such as | ||
290 | depth and the resolution may be defined. | ||
291 | </para> | ||
292 | |||
293 | <para> | ||
294 | The next structure is fb_fix_screeninfo. This defines the | ||
295 | properties of a card that are created when a mode is set and can't | ||
296 | be changed otherwise. A good example of this is the start of the | ||
297 | frame buffer memory. This "locks" the address of the frame buffer | ||
298 | memory, so that it cannot be changed or moved. | ||
299 | </para> | ||
300 | |||
301 | <para> | ||
302 | The last structure is fb_monospecs. In the old API, there was | ||
303 | little importance for fb_monospecs. This allowed for forbidden things | ||
304 | such as setting a mode of 800x600 on a fix frequency monitor. With | ||
305 | the new API, fb_monospecs prevents such things, and if used | ||
306 | correctly, can prevent a monitor from being cooked. fb_monospecs | ||
307 | will not be useful until kernels 2.5.x. | ||
308 | </para> | ||
309 | |||
310 | <sect1><title>Frame Buffer Memory</title> | ||
311 | !Edrivers/video/fbmem.c | ||
312 | </sect1> | ||
313 | <sect1><title>Frame Buffer Console</title> | ||
314 | !Edrivers/video/console/fbcon.c | ||
315 | </sect1> | ||
316 | <sect1><title>Frame Buffer Colormap</title> | ||
317 | !Edrivers/video/fbcmap.c | ||
318 | </sect1> | ||
319 | <!-- FIXME: | ||
320 | drivers/video/fbgen.c has no docs, which stuffs up the sgml. Comment | ||
321 | out until somebody adds docs. KAO | ||
322 | <sect1><title>Frame Buffer Generic Functions</title> | ||
323 | X!Idrivers/video/fbgen.c | ||
324 | </sect1> | ||
325 | KAO --> | ||
326 | <sect1><title>Frame Buffer Video Mode Database</title> | ||
327 | !Idrivers/video/modedb.c | ||
328 | !Edrivers/video/modedb.c | ||
329 | </sect1> | ||
330 | <sect1><title>Frame Buffer Macintosh Video Mode Database</title> | ||
331 | !Idrivers/video/macmodes.c | ||
332 | </sect1> | ||
333 | <sect1><title>Frame Buffer Fonts</title> | ||
334 | <para> | ||
335 | Refer to the file drivers/video/console/fonts.c for more information. | ||
336 | </para> | ||
337 | <!-- FIXME: Removed for now since no structured comments in source | ||
338 | X!Idrivers/video/console/fonts.c | ||
339 | --> | ||
340 | </sect1> | ||
341 | </chapter> | ||
342 | </book> | ||
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl new file mode 100644 index 000000000000..49a9ef82d575 --- /dev/null +++ b/Documentation/DocBook/kernel-hacking.tmpl | |||
@@ -0,0 +1,1349 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="lk-hacking-guide"> | ||
6 | <bookinfo> | ||
7 | <title>Unreliable Guide To Hacking The Linux Kernel</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Paul</firstname> | ||
12 | <othername>Rusty</othername> | ||
13 | <surname>Russell</surname> | ||
14 | <affiliation> | ||
15 | <address> | ||
16 | <email>rusty@rustcorp.com.au</email> | ||
17 | </address> | ||
18 | </affiliation> | ||
19 | </author> | ||
20 | </authorgroup> | ||
21 | |||
22 | <copyright> | ||
23 | <year>2001</year> | ||
24 | <holder>Rusty Russell</holder> | ||
25 | </copyright> | ||
26 | |||
27 | <legalnotice> | ||
28 | <para> | ||
29 | This documentation is free software; you can redistribute | ||
30 | it and/or modify it under the terms of the GNU General Public | ||
31 | License as published by the Free Software Foundation; either | ||
32 | version 2 of the License, or (at your option) any later | ||
33 | version. | ||
34 | </para> | ||
35 | |||
36 | <para> | ||
37 | This program is distributed in the hope that it will be | ||
38 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
39 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
40 | See the GNU General Public License for more details. | ||
41 | </para> | ||
42 | |||
43 | <para> | ||
44 | You should have received a copy of the GNU General Public | ||
45 | License along with this program; if not, write to the Free | ||
46 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
47 | MA 02111-1307 USA | ||
48 | </para> | ||
49 | |||
50 | <para> | ||
51 | For more details see the file COPYING in the source | ||
52 | distribution of Linux. | ||
53 | </para> | ||
54 | </legalnotice> | ||
55 | |||
56 | <releaseinfo> | ||
57 | This is the first release of this document as part of the kernel tarball. | ||
58 | </releaseinfo> | ||
59 | |||
60 | </bookinfo> | ||
61 | |||
62 | <toc></toc> | ||
63 | |||
64 | <chapter id="introduction"> | ||
65 | <title>Introduction</title> | ||
66 | <para> | ||
67 | Welcome, gentle reader, to Rusty's Unreliable Guide to Linux | ||
68 | Kernel Hacking. This document describes the common routines and | ||
69 | general requirements for kernel code: its goal is to serve as a | ||
70 | primer for Linux kernel development for experienced C | ||
71 | programmers. I avoid implementation details: that's what the | ||
72 | code is for, and I ignore whole tracts of useful routines. | ||
73 | </para> | ||
74 | <para> | ||
75 | Before you read this, please understand that I never wanted to | ||
76 | write this document, being grossly under-qualified, but I always | ||
77 | wanted to read it, and this was the only way. I hope it will | ||
78 | grow into a compendium of best practice, common starting points | ||
79 | and random information. | ||
80 | </para> | ||
81 | </chapter> | ||
82 | |||
83 | <chapter id="basic-players"> | ||
84 | <title>The Players</title> | ||
85 | |||
86 | <para> | ||
87 | At any time each of the CPUs in a system can be: | ||
88 | </para> | ||
89 | |||
90 | <itemizedlist> | ||
91 | <listitem> | ||
92 | <para> | ||
93 | not associated with any process, serving a hardware interrupt; | ||
94 | </para> | ||
95 | </listitem> | ||
96 | |||
97 | <listitem> | ||
98 | <para> | ||
99 | not associated with any process, serving a softirq, tasklet or bh; | ||
100 | </para> | ||
101 | </listitem> | ||
102 | |||
103 | <listitem> | ||
104 | <para> | ||
105 | running in kernel space, associated with a process; | ||
106 | </para> | ||
107 | </listitem> | ||
108 | |||
109 | <listitem> | ||
110 | <para> | ||
111 | running a process in user space. | ||
112 | </para> | ||
113 | </listitem> | ||
114 | </itemizedlist> | ||
115 | |||
116 | <para> | ||
117 | There is a strict ordering between these: other than the last | ||
118 | category (userspace) each can only be pre-empted by those above. | ||
119 | For example, while a softirq is running on a CPU, no other | ||
120 | softirq will pre-empt it, but a hardware interrupt can. However, | ||
121 | any other CPUs in the system execute independently. | ||
122 | </para> | ||
123 | |||
124 | <para> | ||
125 | We'll see a number of ways that the user context can block | ||
126 | interrupts, to become truly non-preemptable. | ||
127 | </para> | ||
128 | |||
129 | <sect1 id="basics-usercontext"> | ||
130 | <title>User Context</title> | ||
131 | |||
132 | <para> | ||
133 | User context is when you are coming in from a system call or | ||
134 | other trap: you can sleep, and you own the CPU (except for | ||
135 | interrupts) until you call <function>schedule()</function>. | ||
136 | In other words, user context (unlike userspace) is not pre-emptable. | ||
137 | </para> | ||
138 | |||
139 | <note> | ||
140 | <para> | ||
141 | You are always in user context on module load and unload, | ||
142 | and on operations on the block device layer. | ||
143 | </para> | ||
144 | </note> | ||
145 | |||
146 | <para> | ||
147 | In user context, the <varname>current</varname> pointer (indicating | ||
148 | the task we are currently executing) is valid, and | ||
149 | <function>in_interrupt()</function> | ||
150 | (<filename>include/linux/interrupt.h</filename>) is <returnvalue>false | ||
151 | </returnvalue>. | ||
152 | </para> | ||
153 | |||
154 | <caution> | ||
155 | <para> | ||
156 | Beware that if you have interrupts or bottom halves disabled | ||
157 | (see below), <function>in_interrupt()</function> will return a | ||
158 | false positive. | ||
159 | </para> | ||
160 | </caution> | ||
161 | </sect1> | ||
162 | |||
163 | <sect1 id="basics-hardirqs"> | ||
164 | <title>Hardware Interrupts (Hard IRQs)</title> | ||
165 | |||
166 | <para> | ||
167 | Timer ticks, <hardware>network cards</hardware> and | ||
168 | <hardware>keyboard</hardware> are examples of real | ||
169 | hardware which produce interrupts at any time. The kernel runs | ||
170 | interrupt handlers, which services the hardware. The kernel | ||
171 | guarantees that this handler is never re-entered: if another | ||
172 | interrupt arrives, it is queued (or dropped). Because it | ||
173 | disables interrupts, this handler has to be fast: frequently it | ||
174 | simply acknowledges the interrupt, marks a `software interrupt' | ||
175 | for execution and exits. | ||
176 | </para> | ||
177 | |||
178 | <para> | ||
179 | You can tell you are in a hardware interrupt, because | ||
180 | <function>in_irq()</function> returns <returnvalue>true</returnvalue>. | ||
181 | </para> | ||
182 | <caution> | ||
183 | <para> | ||
184 | Beware that this will return a false positive if interrupts are disabled | ||
185 | (see below). | ||
186 | </para> | ||
187 | </caution> | ||
188 | </sect1> | ||
189 | |||
190 | <sect1 id="basics-softirqs"> | ||
191 | <title>Software Interrupt Context: Bottom Halves, Tasklets, softirqs</title> | ||
192 | |||
193 | <para> | ||
194 | Whenever a system call is about to return to userspace, or a | ||
195 | hardware interrupt handler exits, any `software interrupts' | ||
196 | which are marked pending (usually by hardware interrupts) are | ||
197 | run (<filename>kernel/softirq.c</filename>). | ||
198 | </para> | ||
199 | |||
200 | <para> | ||
201 | Much of the real interrupt handling work is done here. Early in | ||
202 | the transition to <acronym>SMP</acronym>, there were only `bottom | ||
203 | halves' (BHs), which didn't take advantage of multiple CPUs. Shortly | ||
204 | after we switched from wind-up computers made of match-sticks and snot, | ||
205 | we abandoned this limitation. | ||
206 | </para> | ||
207 | |||
208 | <para> | ||
209 | <filename class="headerfile">include/linux/interrupt.h</filename> lists the | ||
210 | different BH's. No matter how many CPUs you have, no two BHs will run at | ||
211 | the same time. This made the transition to SMP simpler, but sucks hard for | ||
212 | scalable performance. A very important bottom half is the timer | ||
213 | BH (<filename class="headerfile">include/linux/timer.h</filename>): you | ||
214 | can register to have it call functions for you in a given length of time. | ||
215 | </para> | ||
216 | |||
217 | <para> | ||
218 | 2.3.43 introduced softirqs, and re-implemented the (now | ||
219 | deprecated) BHs underneath them. Softirqs are fully-SMP | ||
220 | versions of BHs: they can run on as many CPUs at once as | ||
221 | required. This means they need to deal with any races in shared | ||
222 | data using their own locks. A bitmask is used to keep track of | ||
223 | which are enabled, so the 32 available softirqs should not be | ||
224 | used up lightly. (<emphasis>Yes</emphasis>, people will | ||
225 | notice). | ||
226 | </para> | ||
227 | |||
228 | <para> | ||
229 | tasklets (<filename class="headerfile">include/linux/interrupt.h</filename>) | ||
230 | are like softirqs, except they are dynamically-registrable (meaning you | ||
231 | can have as many as you want), and they also guarantee that any tasklet | ||
232 | will only run on one CPU at any time, although different tasklets can | ||
233 | run simultaneously (unlike different BHs). | ||
234 | </para> | ||
235 | <caution> | ||
236 | <para> | ||
237 | The name `tasklet' is misleading: they have nothing to do with `tasks', | ||
238 | and probably more to do with some bad vodka Alexey Kuznetsov had at the | ||
239 | time. | ||
240 | </para> | ||
241 | </caution> | ||
242 | |||
243 | <para> | ||
244 | You can tell you are in a softirq (or bottom half, or tasklet) | ||
245 | using the <function>in_softirq()</function> macro | ||
246 | (<filename class="headerfile">include/linux/interrupt.h</filename>). | ||
247 | </para> | ||
248 | <caution> | ||
249 | <para> | ||
250 | Beware that this will return a false positive if a bh lock (see below) | ||
251 | is held. | ||
252 | </para> | ||
253 | </caution> | ||
254 | </sect1> | ||
255 | </chapter> | ||
256 | |||
257 | <chapter id="basic-rules"> | ||
258 | <title>Some Basic Rules</title> | ||
259 | |||
260 | <variablelist> | ||
261 | <varlistentry> | ||
262 | <term>No memory protection</term> | ||
263 | <listitem> | ||
264 | <para> | ||
265 | If you corrupt memory, whether in user context or | ||
266 | interrupt context, the whole machine will crash. Are you | ||
267 | sure you can't do what you want in userspace? | ||
268 | </para> | ||
269 | </listitem> | ||
270 | </varlistentry> | ||
271 | |||
272 | <varlistentry> | ||
273 | <term>No floating point or <acronym>MMX</acronym></term> | ||
274 | <listitem> | ||
275 | <para> | ||
276 | The <acronym>FPU</acronym> context is not saved; even in user | ||
277 | context the <acronym>FPU</acronym> state probably won't | ||
278 | correspond with the current process: you would mess with some | ||
279 | user process' <acronym>FPU</acronym> state. If you really want | ||
280 | to do this, you would have to explicitly save/restore the full | ||
281 | <acronym>FPU</acronym> state (and avoid context switches). It | ||
282 | is generally a bad idea; use fixed point arithmetic first. | ||
283 | </para> | ||
284 | </listitem> | ||
285 | </varlistentry> | ||
286 | |||
287 | <varlistentry> | ||
288 | <term>A rigid stack limit</term> | ||
289 | <listitem> | ||
290 | <para> | ||
291 | The kernel stack is about 6K in 2.2 (for most | ||
292 | architectures: it's about 14K on the Alpha), and shared | ||
293 | with interrupts so you can't use it all. Avoid deep | ||
294 | recursion and huge local arrays on the stack (allocate | ||
295 | them dynamically instead). | ||
296 | </para> | ||
297 | </listitem> | ||
298 | </varlistentry> | ||
299 | |||
300 | <varlistentry> | ||
301 | <term>The Linux kernel is portable</term> | ||
302 | <listitem> | ||
303 | <para> | ||
304 | Let's keep it that way. Your code should be 64-bit clean, | ||
305 | and endian-independent. You should also minimize CPU | ||
306 | specific stuff, e.g. inline assembly should be cleanly | ||
307 | encapsulated and minimized to ease porting. Generally it | ||
308 | should be restricted to the architecture-dependent part of | ||
309 | the kernel tree. | ||
310 | </para> | ||
311 | </listitem> | ||
312 | </varlistentry> | ||
313 | </variablelist> | ||
314 | </chapter> | ||
315 | |||
316 | <chapter id="ioctls"> | ||
317 | <title>ioctls: Not writing a new system call</title> | ||
318 | |||
319 | <para> | ||
320 | A system call generally looks like this | ||
321 | </para> | ||
322 | |||
323 | <programlisting> | ||
324 | asmlinkage long sys_mycall(int arg) | ||
325 | { | ||
326 | return 0; | ||
327 | } | ||
328 | </programlisting> | ||
329 | |||
330 | <para> | ||
331 | First, in most cases you don't want to create a new system call. | ||
332 | You create a character device and implement an appropriate ioctl | ||
333 | for it. This is much more flexible than system calls, doesn't have | ||
334 | to be entered in every architecture's | ||
335 | <filename class="headerfile">include/asm/unistd.h</filename> and | ||
336 | <filename>arch/kernel/entry.S</filename> file, and is much more | ||
337 | likely to be accepted by Linus. | ||
338 | </para> | ||
339 | |||
340 | <para> | ||
341 | If all your routine does is read or write some parameter, consider | ||
342 | implementing a <function>sysctl</function> interface instead. | ||
343 | </para> | ||
344 | |||
345 | <para> | ||
346 | Inside the ioctl you're in user context to a process. When a | ||
347 | error occurs you return a negated errno (see | ||
348 | <filename class="headerfile">include/linux/errno.h</filename>), | ||
349 | otherwise you return <returnvalue>0</returnvalue>. | ||
350 | </para> | ||
351 | |||
352 | <para> | ||
353 | After you slept you should check if a signal occurred: the | ||
354 | Unix/Linux way of handling signals is to temporarily exit the | ||
355 | system call with the <constant>-ERESTARTSYS</constant> error. The | ||
356 | system call entry code will switch back to user context, process | ||
357 | the signal handler and then your system call will be restarted | ||
358 | (unless the user disabled that). So you should be prepared to | ||
359 | process the restart, e.g. if you're in the middle of manipulating | ||
360 | some data structure. | ||
361 | </para> | ||
362 | |||
363 | <programlisting> | ||
364 | if (signal_pending()) | ||
365 | return -ERESTARTSYS; | ||
366 | </programlisting> | ||
367 | |||
368 | <para> | ||
369 | If you're doing longer computations: first think userspace. If you | ||
370 | <emphasis>really</emphasis> want to do it in kernel you should | ||
371 | regularly check if you need to give up the CPU (remember there is | ||
372 | cooperative multitasking per CPU). Idiom: | ||
373 | </para> | ||
374 | |||
375 | <programlisting> | ||
376 | cond_resched(); /* Will sleep */ | ||
377 | </programlisting> | ||
378 | |||
379 | <para> | ||
380 | A short note on interface design: the UNIX system call motto is | ||
381 | "Provide mechanism not policy". | ||
382 | </para> | ||
383 | </chapter> | ||
384 | |||
385 | <chapter id="deadlock-recipes"> | ||
386 | <title>Recipes for Deadlock</title> | ||
387 | |||
388 | <para> | ||
389 | You cannot call any routines which may sleep, unless: | ||
390 | </para> | ||
391 | <itemizedlist> | ||
392 | <listitem> | ||
393 | <para> | ||
394 | You are in user context. | ||
395 | </para> | ||
396 | </listitem> | ||
397 | |||
398 | <listitem> | ||
399 | <para> | ||
400 | You do not own any spinlocks. | ||
401 | </para> | ||
402 | </listitem> | ||
403 | |||
404 | <listitem> | ||
405 | <para> | ||
406 | You have interrupts enabled (actually, Andi Kleen says | ||
407 | that the scheduling code will enable them for you, but | ||
408 | that's probably not what you wanted). | ||
409 | </para> | ||
410 | </listitem> | ||
411 | </itemizedlist> | ||
412 | |||
413 | <para> | ||
414 | Note that some functions may sleep implicitly: common ones are | ||
415 | the user space access functions (*_user) and memory allocation | ||
416 | functions without <symbol>GFP_ATOMIC</symbol>. | ||
417 | </para> | ||
418 | |||
419 | <para> | ||
420 | You will eventually lock up your box if you break these rules. | ||
421 | </para> | ||
422 | |||
423 | <para> | ||
424 | Really. | ||
425 | </para> | ||
426 | </chapter> | ||
427 | |||
428 | <chapter id="common-routines"> | ||
429 | <title>Common Routines</title> | ||
430 | |||
431 | <sect1 id="routines-printk"> | ||
432 | <title> | ||
433 | <function>printk()</function> | ||
434 | <filename class="headerfile">include/linux/kernel.h</filename> | ||
435 | </title> | ||
436 | |||
437 | <para> | ||
438 | <function>printk()</function> feeds kernel messages to the | ||
439 | console, dmesg, and the syslog daemon. It is useful for debugging | ||
440 | and reporting errors, and can be used inside interrupt context, | ||
441 | but use with caution: a machine which has its console flooded with | ||
442 | printk messages is unusable. It uses a format string mostly | ||
443 | compatible with ANSI C printf, and C string concatenation to give | ||
444 | it a first "priority" argument: | ||
445 | </para> | ||
446 | |||
447 | <programlisting> | ||
448 | printk(KERN_INFO "i = %u\n", i); | ||
449 | </programlisting> | ||
450 | |||
451 | <para> | ||
452 | See <filename class="headerfile">include/linux/kernel.h</filename>; | ||
453 | for other KERN_ values; these are interpreted by syslog as the | ||
454 | level. Special case: for printing an IP address use | ||
455 | </para> | ||
456 | |||
457 | <programlisting> | ||
458 | __u32 ipaddress; | ||
459 | printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); | ||
460 | </programlisting> | ||
461 | |||
462 | <para> | ||
463 | <function>printk()</function> internally uses a 1K buffer and does | ||
464 | not catch overruns. Make sure that will be enough. | ||
465 | </para> | ||
466 | |||
467 | <note> | ||
468 | <para> | ||
469 | You will know when you are a real kernel hacker | ||
470 | when you start typoing printf as printk in your user programs :) | ||
471 | </para> | ||
472 | </note> | ||
473 | |||
474 | <!--- From the Lions book reader department --> | ||
475 | |||
476 | <note> | ||
477 | <para> | ||
478 | Another sidenote: the original Unix Version 6 sources had a | ||
479 | comment on top of its printf function: "Printf should not be | ||
480 | used for chit-chat". You should follow that advice. | ||
481 | </para> | ||
482 | </note> | ||
483 | </sect1> | ||
484 | |||
485 | <sect1 id="routines-copy"> | ||
486 | <title> | ||
487 | <function>copy_[to/from]_user()</function> | ||
488 | / | ||
489 | <function>get_user()</function> | ||
490 | / | ||
491 | <function>put_user()</function> | ||
492 | <filename class="headerfile">include/asm/uaccess.h</filename> | ||
493 | </title> | ||
494 | |||
495 | <para> | ||
496 | <emphasis>[SLEEPS]</emphasis> | ||
497 | </para> | ||
498 | |||
499 | <para> | ||
500 | <function>put_user()</function> and <function>get_user()</function> | ||
501 | are used to get and put single values (such as an int, char, or | ||
502 | long) from and to userspace. A pointer into userspace should | ||
503 | never be simply dereferenced: data should be copied using these | ||
504 | routines. Both return <constant>-EFAULT</constant> or 0. | ||
505 | </para> | ||
506 | <para> | ||
507 | <function>copy_to_user()</function> and | ||
508 | <function>copy_from_user()</function> are more general: they copy | ||
509 | an arbitrary amount of data to and from userspace. | ||
510 | <caution> | ||
511 | <para> | ||
512 | Unlike <function>put_user()</function> and | ||
513 | <function>get_user()</function>, they return the amount of | ||
514 | uncopied data (ie. <returnvalue>0</returnvalue> still means | ||
515 | success). | ||
516 | </para> | ||
517 | </caution> | ||
518 | [Yes, this moronic interface makes me cringe. Please submit a | ||
519 | patch and become my hero --RR.] | ||
520 | </para> | ||
521 | <para> | ||
522 | The functions may sleep implicitly. This should never be called | ||
523 | outside user context (it makes no sense), with interrupts | ||
524 | disabled, or a spinlock held. | ||
525 | </para> | ||
526 | </sect1> | ||
527 | |||
528 | <sect1 id="routines-kmalloc"> | ||
529 | <title><function>kmalloc()</function>/<function>kfree()</function> | ||
530 | <filename class="headerfile">include/linux/slab.h</filename></title> | ||
531 | |||
532 | <para> | ||
533 | <emphasis>[MAY SLEEP: SEE BELOW]</emphasis> | ||
534 | </para> | ||
535 | |||
536 | <para> | ||
537 | These routines are used to dynamically request pointer-aligned | ||
538 | chunks of memory, like malloc and free do in userspace, but | ||
539 | <function>kmalloc()</function> takes an extra flag word. | ||
540 | Important values: | ||
541 | </para> | ||
542 | |||
543 | <variablelist> | ||
544 | <varlistentry> | ||
545 | <term> | ||
546 | <constant> | ||
547 | GFP_KERNEL | ||
548 | </constant> | ||
549 | </term> | ||
550 | <listitem> | ||
551 | <para> | ||
552 | May sleep and swap to free memory. Only allowed in user | ||
553 | context, but is the most reliable way to allocate memory. | ||
554 | </para> | ||
555 | </listitem> | ||
556 | </varlistentry> | ||
557 | |||
558 | <varlistentry> | ||
559 | <term> | ||
560 | <constant> | ||
561 | GFP_ATOMIC | ||
562 | </constant> | ||
563 | </term> | ||
564 | <listitem> | ||
565 | <para> | ||
566 | Don't sleep. Less reliable than <constant>GFP_KERNEL</constant>, | ||
567 | but may be called from interrupt context. You should | ||
568 | <emphasis>really</emphasis> have a good out-of-memory | ||
569 | error-handling strategy. | ||
570 | </para> | ||
571 | </listitem> | ||
572 | </varlistentry> | ||
573 | |||
574 | <varlistentry> | ||
575 | <term> | ||
576 | <constant> | ||
577 | GFP_DMA | ||
578 | </constant> | ||
579 | </term> | ||
580 | <listitem> | ||
581 | <para> | ||
582 | Allocate ISA DMA lower than 16MB. If you don't know what that | ||
583 | is you don't need it. Very unreliable. | ||
584 | </para> | ||
585 | </listitem> | ||
586 | </varlistentry> | ||
587 | </variablelist> | ||
588 | |||
589 | <para> | ||
590 | If you see a <errorname>kmem_grow: Called nonatomically from int | ||
591 | </errorname> warning message you called a memory allocation function | ||
592 | from interrupt context without <constant>GFP_ATOMIC</constant>. | ||
593 | You should really fix that. Run, don't walk. | ||
594 | </para> | ||
595 | |||
596 | <para> | ||
597 | If you are allocating at least <constant>PAGE_SIZE</constant> | ||
598 | (<filename class="headerfile">include/asm/page.h</filename>) bytes, | ||
599 | consider using <function>__get_free_pages()</function> | ||
600 | |||
601 | (<filename class="headerfile">include/linux/mm.h</filename>). It | ||
602 | takes an order argument (0 for page sized, 1 for double page, 2 | ||
603 | for four pages etc.) and the same memory priority flag word as | ||
604 | above. | ||
605 | </para> | ||
606 | |||
607 | <para> | ||
608 | If you are allocating more than a page worth of bytes you can use | ||
609 | <function>vmalloc()</function>. It'll allocate virtual memory in | ||
610 | the kernel map. This block is not contiguous in physical memory, | ||
611 | but the <acronym>MMU</acronym> makes it look like it is for you | ||
612 | (so it'll only look contiguous to the CPUs, not to external device | ||
613 | drivers). If you really need large physically contiguous memory | ||
614 | for some weird device, you have a problem: it is poorly supported | ||
615 | in Linux because after some time memory fragmentation in a running | ||
616 | kernel makes it hard. The best way is to allocate the block early | ||
617 | in the boot process via the <function>alloc_bootmem()</function> | ||
618 | routine. | ||
619 | </para> | ||
620 | |||
621 | <para> | ||
622 | Before inventing your own cache of often-used objects consider | ||
623 | using a slab cache in | ||
624 | <filename class="headerfile">include/linux/slab.h</filename> | ||
625 | </para> | ||
626 | </sect1> | ||
627 | |||
628 | <sect1 id="routines-current"> | ||
629 | <title><function>current</function> | ||
630 | <filename class="headerfile">include/asm/current.h</filename></title> | ||
631 | |||
632 | <para> | ||
633 | This global variable (really a macro) contains a pointer to | ||
634 | the current task structure, so is only valid in user context. | ||
635 | For example, when a process makes a system call, this will | ||
636 | point to the task structure of the calling process. It is | ||
637 | <emphasis>not NULL</emphasis> in interrupt context. | ||
638 | </para> | ||
639 | </sect1> | ||
640 | |||
641 | <sect1 id="routines-udelay"> | ||
642 | <title><function>udelay()</function>/<function>mdelay()</function> | ||
643 | <filename class="headerfile">include/asm/delay.h</filename> | ||
644 | <filename class="headerfile">include/linux/delay.h</filename> | ||
645 | </title> | ||
646 | |||
647 | <para> | ||
648 | The <function>udelay()</function> function can be used for small pauses. | ||
649 | Do not use large values with <function>udelay()</function> as you risk | ||
650 | overflow - the helper function <function>mdelay()</function> is useful | ||
651 | here, or even consider <function>schedule_timeout()</function>. | ||
652 | </para> | ||
653 | </sect1> | ||
654 | |||
655 | <sect1 id="routines-endian"> | ||
656 | <title><function>cpu_to_be32()</function>/<function>be32_to_cpu()</function>/<function>cpu_to_le32()</function>/<function>le32_to_cpu()</function> | ||
657 | <filename class="headerfile">include/asm/byteorder.h</filename> | ||
658 | </title> | ||
659 | |||
660 | <para> | ||
661 | The <function>cpu_to_be32()</function> family (where the "32" can | ||
662 | be replaced by 64 or 16, and the "be" can be replaced by "le") are | ||
663 | the general way to do endian conversions in the kernel: they | ||
664 | return the converted value. All variations supply the reverse as | ||
665 | well: <function>be32_to_cpu()</function>, etc. | ||
666 | </para> | ||
667 | |||
668 | <para> | ||
669 | There are two major variations of these functions: the pointer | ||
670 | variation, such as <function>cpu_to_be32p()</function>, which take | ||
671 | a pointer to the given type, and return the converted value. The | ||
672 | other variation is the "in-situ" family, such as | ||
673 | <function>cpu_to_be32s()</function>, which convert value referred | ||
674 | to by the pointer, and return void. | ||
675 | </para> | ||
676 | </sect1> | ||
677 | |||
678 | <sect1 id="routines-local-irqs"> | ||
679 | <title><function>local_irq_save()</function>/<function>local_irq_restore()</function> | ||
680 | <filename class="headerfile">include/asm/system.h</filename> | ||
681 | </title> | ||
682 | |||
683 | <para> | ||
684 | These routines disable hard interrupts on the local CPU, and | ||
685 | restore them. They are reentrant; saving the previous state in | ||
686 | their one <varname>unsigned long flags</varname> argument. If you | ||
687 | know that interrupts are enabled, you can simply use | ||
688 | <function>local_irq_disable()</function> and | ||
689 | <function>local_irq_enable()</function>. | ||
690 | </para> | ||
691 | </sect1> | ||
692 | |||
693 | <sect1 id="routines-softirqs"> | ||
694 | <title><function>local_bh_disable()</function>/<function>local_bh_enable()</function> | ||
695 | <filename class="headerfile">include/linux/interrupt.h</filename></title> | ||
696 | |||
697 | <para> | ||
698 | These routines disable soft interrupts on the local CPU, and | ||
699 | restore them. They are reentrant; if soft interrupts were | ||
700 | disabled before, they will still be disabled after this pair | ||
701 | of functions has been called. They prevent softirqs, tasklets | ||
702 | and bottom halves from running on the current CPU. | ||
703 | </para> | ||
704 | </sect1> | ||
705 | |||
706 | <sect1 id="routines-processorids"> | ||
707 | <title><function>smp_processor_id</function>() | ||
708 | <filename class="headerfile">include/asm/smp.h</filename></title> | ||
709 | |||
710 | <para> | ||
711 | <function>smp_processor_id()</function> returns the current | ||
712 | processor number, between 0 and <symbol>NR_CPUS</symbol> (the | ||
713 | maximum number of CPUs supported by Linux, currently 32). These | ||
714 | values are not necessarily continuous. | ||
715 | </para> | ||
716 | </sect1> | ||
717 | |||
718 | <sect1 id="routines-init"> | ||
719 | <title><type>__init</type>/<type>__exit</type>/<type>__initdata</type> | ||
720 | <filename class="headerfile">include/linux/init.h</filename></title> | ||
721 | |||
722 | <para> | ||
723 | After boot, the kernel frees up a special section; functions | ||
724 | marked with <type>__init</type> and data structures marked with | ||
725 | <type>__initdata</type> are dropped after boot is complete (within | ||
726 | modules this directive is currently ignored). <type>__exit</type> | ||
727 | is used to declare a function which is only required on exit: the | ||
728 | function will be dropped if this file is not compiled as a module. | ||
729 | See the header file for use. Note that it makes no sense for a function | ||
730 | marked with <type>__init</type> to be exported to modules with | ||
731 | <function>EXPORT_SYMBOL()</function> - this will break. | ||
732 | </para> | ||
733 | <para> | ||
734 | Static data structures marked as <type>__initdata</type> must be initialised | ||
735 | (as opposed to ordinary static data which is zeroed BSS) and cannot be | ||
736 | <type>const</type>. | ||
737 | </para> | ||
738 | |||
739 | </sect1> | ||
740 | |||
741 | <sect1 id="routines-init-again"> | ||
742 | <title><function>__initcall()</function>/<function>module_init()</function> | ||
743 | <filename class="headerfile">include/linux/init.h</filename></title> | ||
744 | <para> | ||
745 | Many parts of the kernel are well served as a module | ||
746 | (dynamically-loadable parts of the kernel). Using the | ||
747 | <function>module_init()</function> and | ||
748 | <function>module_exit()</function> macros it is easy to write code | ||
749 | without #ifdefs which can operate both as a module or built into | ||
750 | the kernel. | ||
751 | </para> | ||
752 | |||
753 | <para> | ||
754 | The <function>module_init()</function> macro defines which | ||
755 | function is to be called at module insertion time (if the file is | ||
756 | compiled as a module), or at boot time: if the file is not | ||
757 | compiled as a module the <function>module_init()</function> macro | ||
758 | becomes equivalent to <function>__initcall()</function>, which | ||
759 | through linker magic ensures that the function is called on boot. | ||
760 | </para> | ||
761 | |||
762 | <para> | ||
763 | The function can return a negative error number to cause | ||
764 | module loading to fail (unfortunately, this has no effect if | ||
765 | the module is compiled into the kernel). For modules, this is | ||
766 | called in user context, with interrupts enabled, and the | ||
767 | kernel lock held, so it can sleep. | ||
768 | </para> | ||
769 | </sect1> | ||
770 | |||
771 | <sect1 id="routines-moduleexit"> | ||
772 | <title> <function>module_exit()</function> | ||
773 | <filename class="headerfile">include/linux/init.h</filename> </title> | ||
774 | |||
775 | <para> | ||
776 | This macro defines the function to be called at module removal | ||
777 | time (or never, in the case of the file compiled into the | ||
778 | kernel). It will only be called if the module usage count has | ||
779 | reached zero. This function can also sleep, but cannot fail: | ||
780 | everything must be cleaned up by the time it returns. | ||
781 | </para> | ||
782 | </sect1> | ||
783 | |||
784 | <!-- add info on new-style module refcounting here --> | ||
785 | </chapter> | ||
786 | |||
787 | <chapter id="queues"> | ||
788 | <title>Wait Queues | ||
789 | <filename class="headerfile">include/linux/wait.h</filename> | ||
790 | </title> | ||
791 | <para> | ||
792 | <emphasis>[SLEEPS]</emphasis> | ||
793 | </para> | ||
794 | |||
795 | <para> | ||
796 | A wait queue is used to wait for someone to wake you up when a | ||
797 | certain condition is true. They must be used carefully to ensure | ||
798 | there is no race condition. You declare a | ||
799 | <type>wait_queue_head_t</type>, and then processes which want to | ||
800 | wait for that condition declare a <type>wait_queue_t</type> | ||
801 | referring to themselves, and place that in the queue. | ||
802 | </para> | ||
803 | |||
804 | <sect1 id="queue-declaring"> | ||
805 | <title>Declaring</title> | ||
806 | |||
807 | <para> | ||
808 | You declare a <type>wait_queue_head_t</type> using the | ||
809 | <function>DECLARE_WAIT_QUEUE_HEAD()</function> macro, or using the | ||
810 | <function>init_waitqueue_head()</function> routine in your | ||
811 | initialization code. | ||
812 | </para> | ||
813 | </sect1> | ||
814 | |||
815 | <sect1 id="queue-waitqueue"> | ||
816 | <title>Queuing</title> | ||
817 | |||
818 | <para> | ||
819 | Placing yourself in the waitqueue is fairly complex, because you | ||
820 | must put yourself in the queue before checking the condition. | ||
821 | There is a macro to do this: | ||
822 | <function>wait_event_interruptible()</function> | ||
823 | |||
824 | <filename class="headerfile">include/linux/sched.h</filename> The | ||
825 | first argument is the wait queue head, and the second is an | ||
826 | expression which is evaluated; the macro returns | ||
827 | <returnvalue>0</returnvalue> when this expression is true, or | ||
828 | <returnvalue>-ERESTARTSYS</returnvalue> if a signal is received. | ||
829 | The <function>wait_event()</function> version ignores signals. | ||
830 | </para> | ||
831 | <para> | ||
832 | Do not use the <function>sleep_on()</function> function family - | ||
833 | it is very easy to accidentally introduce races; almost certainly | ||
834 | one of the <function>wait_event()</function> family will do, or a | ||
835 | loop around <function>schedule_timeout()</function>. If you choose | ||
836 | to loop around <function>schedule_timeout()</function> remember | ||
837 | you must set the task state (with | ||
838 | <function>set_current_state()</function>) on each iteration to avoid | ||
839 | busy-looping. | ||
840 | </para> | ||
841 | |||
842 | </sect1> | ||
843 | |||
844 | <sect1 id="queue-waking"> | ||
845 | <title>Waking Up Queued Tasks</title> | ||
846 | |||
847 | <para> | ||
848 | Call <function>wake_up()</function> | ||
849 | |||
850 | <filename class="headerfile">include/linux/sched.h</filename>;, | ||
851 | which will wake up every process in the queue. The exception is | ||
852 | if one has <constant>TASK_EXCLUSIVE</constant> set, in which case | ||
853 | the remainder of the queue will not be woken. | ||
854 | </para> | ||
855 | </sect1> | ||
856 | </chapter> | ||
857 | |||
858 | <chapter id="atomic-ops"> | ||
859 | <title>Atomic Operations</title> | ||
860 | |||
861 | <para> | ||
862 | Certain operations are guaranteed atomic on all platforms. The | ||
863 | first class of operations work on <type>atomic_t</type> | ||
864 | |||
865 | <filename class="headerfile">include/asm/atomic.h</filename>; this | ||
866 | contains a signed integer (at least 24 bits long), and you must use | ||
867 | these functions to manipulate or read atomic_t variables. | ||
868 | <function>atomic_read()</function> and | ||
869 | <function>atomic_set()</function> get and set the counter, | ||
870 | <function>atomic_add()</function>, | ||
871 | <function>atomic_sub()</function>, | ||
872 | <function>atomic_inc()</function>, | ||
873 | <function>atomic_dec()</function>, and | ||
874 | <function>atomic_dec_and_test()</function> (returns | ||
875 | <returnvalue>true</returnvalue> if it was decremented to zero). | ||
876 | </para> | ||
877 | |||
878 | <para> | ||
879 | Yes. It returns <returnvalue>true</returnvalue> (i.e. != 0) if the | ||
880 | atomic variable is zero. | ||
881 | </para> | ||
882 | |||
883 | <para> | ||
884 | Note that these functions are slower than normal arithmetic, and | ||
885 | so should not be used unnecessarily. On some platforms they | ||
886 | are much slower, like 32-bit Sparc where they use a spinlock. | ||
887 | </para> | ||
888 | |||
889 | <para> | ||
890 | The second class of atomic operations is atomic bit operations on a | ||
891 | <type>long</type>, defined in | ||
892 | |||
893 | <filename class="headerfile">include/linux/bitops.h</filename>. These | ||
894 | operations generally take a pointer to the bit pattern, and a bit | ||
895 | number: 0 is the least significant bit. | ||
896 | <function>set_bit()</function>, <function>clear_bit()</function> | ||
897 | and <function>change_bit()</function> set, clear, and flip the | ||
898 | given bit. <function>test_and_set_bit()</function>, | ||
899 | <function>test_and_clear_bit()</function> and | ||
900 | <function>test_and_change_bit()</function> do the same thing, | ||
901 | except return true if the bit was previously set; these are | ||
902 | particularly useful for very simple locking. | ||
903 | </para> | ||
904 | |||
905 | <para> | ||
906 | It is possible to call these operations with bit indices greater | ||
907 | than BITS_PER_LONG. The resulting behavior is strange on big-endian | ||
908 | platforms though so it is a good idea not to do this. | ||
909 | </para> | ||
910 | |||
911 | <para> | ||
912 | Note that the order of bits depends on the architecture, and in | ||
913 | particular, the bitfield passed to these operations must be at | ||
914 | least as large as a <type>long</type>. | ||
915 | </para> | ||
916 | </chapter> | ||
917 | |||
918 | <chapter id="symbols"> | ||
919 | <title>Symbols</title> | ||
920 | |||
921 | <para> | ||
922 | Within the kernel proper, the normal linking rules apply | ||
923 | (ie. unless a symbol is declared to be file scope with the | ||
924 | <type>static</type> keyword, it can be used anywhere in the | ||
925 | kernel). However, for modules, a special exported symbol table is | ||
926 | kept which limits the entry points to the kernel proper. Modules | ||
927 | can also export symbols. | ||
928 | </para> | ||
929 | |||
930 | <sect1 id="sym-exportsymbols"> | ||
931 | <title><function>EXPORT_SYMBOL()</function> | ||
932 | <filename class="headerfile">include/linux/module.h</filename></title> | ||
933 | |||
934 | <para> | ||
935 | This is the classic method of exporting a symbol, and it works | ||
936 | for both modules and non-modules. In the kernel all these | ||
937 | declarations are often bundled into a single file to help | ||
938 | genksyms (which searches source files for these declarations). | ||
939 | See the comment on genksyms and Makefiles below. | ||
940 | </para> | ||
941 | </sect1> | ||
942 | |||
943 | <sect1 id="sym-exportsymbols-gpl"> | ||
944 | <title><function>EXPORT_SYMBOL_GPL()</function> | ||
945 | <filename class="headerfile">include/linux/module.h</filename></title> | ||
946 | |||
947 | <para> | ||
948 | Similar to <function>EXPORT_SYMBOL()</function> except that the | ||
949 | symbols exported by <function>EXPORT_SYMBOL_GPL()</function> can | ||
950 | only be seen by modules with a | ||
951 | <function>MODULE_LICENSE()</function> that specifies a GPL | ||
952 | compatible license. | ||
953 | </para> | ||
954 | </sect1> | ||
955 | </chapter> | ||
956 | |||
957 | <chapter id="conventions"> | ||
958 | <title>Routines and Conventions</title> | ||
959 | |||
960 | <sect1 id="conventions-doublelinkedlist"> | ||
961 | <title>Double-linked lists | ||
962 | <filename class="headerfile">include/linux/list.h</filename></title> | ||
963 | |||
964 | <para> | ||
965 | There are three sets of linked-list routines in the kernel | ||
966 | headers, but this one seems to be winning out (and Linus has | ||
967 | used it). If you don't have some particular pressing need for | ||
968 | a single list, it's a good choice. In fact, I don't care | ||
969 | whether it's a good choice or not, just use it so we can get | ||
970 | rid of the others. | ||
971 | </para> | ||
972 | </sect1> | ||
973 | |||
974 | <sect1 id="convention-returns"> | ||
975 | <title>Return Conventions</title> | ||
976 | |||
977 | <para> | ||
978 | For code called in user context, it's very common to defy C | ||
979 | convention, and return <returnvalue>0</returnvalue> for success, | ||
980 | and a negative error number | ||
981 | (eg. <returnvalue>-EFAULT</returnvalue>) for failure. This can be | ||
982 | unintuitive at first, but it's fairly widespread in the networking | ||
983 | code, for example. | ||
984 | </para> | ||
985 | |||
986 | <para> | ||
987 | The filesystem code uses <function>ERR_PTR()</function> | ||
988 | |||
989 | <filename class="headerfile">include/linux/fs.h</filename>; to | ||
990 | encode a negative error number into a pointer, and | ||
991 | <function>IS_ERR()</function> and <function>PTR_ERR()</function> | ||
992 | to get it back out again: avoids a separate pointer parameter for | ||
993 | the error number. Icky, but in a good way. | ||
994 | </para> | ||
995 | </sect1> | ||
996 | |||
997 | <sect1 id="conventions-borkedcompile"> | ||
998 | <title>Breaking Compilation</title> | ||
999 | |||
1000 | <para> | ||
1001 | Linus and the other developers sometimes change function or | ||
1002 | structure names in development kernels; this is not done just to | ||
1003 | keep everyone on their toes: it reflects a fundamental change | ||
1004 | (eg. can no longer be called with interrupts on, or does extra | ||
1005 | checks, or doesn't do checks which were caught before). Usually | ||
1006 | this is accompanied by a fairly complete note to the linux-kernel | ||
1007 | mailing list; search the archive. Simply doing a global replace | ||
1008 | on the file usually makes things <emphasis>worse</emphasis>. | ||
1009 | </para> | ||
1010 | </sect1> | ||
1011 | |||
1012 | <sect1 id="conventions-initialising"> | ||
1013 | <title>Initializing structure members</title> | ||
1014 | |||
1015 | <para> | ||
1016 | The preferred method of initializing structures is to use | ||
1017 | designated initialisers, as defined by ISO C99, eg: | ||
1018 | </para> | ||
1019 | <programlisting> | ||
1020 | static struct block_device_operations opt_fops = { | ||
1021 | .open = opt_open, | ||
1022 | .release = opt_release, | ||
1023 | .ioctl = opt_ioctl, | ||
1024 | .check_media_change = opt_media_change, | ||
1025 | }; | ||
1026 | </programlisting> | ||
1027 | <para> | ||
1028 | This makes it easy to grep for, and makes it clear which | ||
1029 | structure fields are set. You should do this because it looks | ||
1030 | cool. | ||
1031 | </para> | ||
1032 | </sect1> | ||
1033 | |||
1034 | <sect1 id="conventions-gnu-extns"> | ||
1035 | <title>GNU Extensions</title> | ||
1036 | |||
1037 | <para> | ||
1038 | GNU Extensions are explicitly allowed in the Linux kernel. | ||
1039 | Note that some of the more complex ones are not very well | ||
1040 | supported, due to lack of general use, but the following are | ||
1041 | considered standard (see the GCC info page section "C | ||
1042 | Extensions" for more details - Yes, really the info page, the | ||
1043 | man page is only a short summary of the stuff in info): | ||
1044 | </para> | ||
1045 | <itemizedlist> | ||
1046 | <listitem> | ||
1047 | <para> | ||
1048 | Inline functions | ||
1049 | </para> | ||
1050 | </listitem> | ||
1051 | <listitem> | ||
1052 | <para> | ||
1053 | Statement expressions (ie. the ({ and }) constructs). | ||
1054 | </para> | ||
1055 | </listitem> | ||
1056 | <listitem> | ||
1057 | <para> | ||
1058 | Declaring attributes of a function / variable / type | ||
1059 | (__attribute__) | ||
1060 | </para> | ||
1061 | </listitem> | ||
1062 | <listitem> | ||
1063 | <para> | ||
1064 | typeof | ||
1065 | </para> | ||
1066 | </listitem> | ||
1067 | <listitem> | ||
1068 | <para> | ||
1069 | Zero length arrays | ||
1070 | </para> | ||
1071 | </listitem> | ||
1072 | <listitem> | ||
1073 | <para> | ||
1074 | Macro varargs | ||
1075 | </para> | ||
1076 | </listitem> | ||
1077 | <listitem> | ||
1078 | <para> | ||
1079 | Arithmetic on void pointers | ||
1080 | </para> | ||
1081 | </listitem> | ||
1082 | <listitem> | ||
1083 | <para> | ||
1084 | Non-Constant initializers | ||
1085 | </para> | ||
1086 | </listitem> | ||
1087 | <listitem> | ||
1088 | <para> | ||
1089 | Assembler Instructions (not outside arch/ and include/asm/) | ||
1090 | </para> | ||
1091 | </listitem> | ||
1092 | <listitem> | ||
1093 | <para> | ||
1094 | Function names as strings (__FUNCTION__) | ||
1095 | </para> | ||
1096 | </listitem> | ||
1097 | <listitem> | ||
1098 | <para> | ||
1099 | __builtin_constant_p() | ||
1100 | </para> | ||
1101 | </listitem> | ||
1102 | </itemizedlist> | ||
1103 | |||
1104 | <para> | ||
1105 | Be wary when using long long in the kernel, the code gcc generates for | ||
1106 | it is horrible and worse: division and multiplication does not work | ||
1107 | on i386 because the GCC runtime functions for it are missing from | ||
1108 | the kernel environment. | ||
1109 | </para> | ||
1110 | |||
1111 | <!-- FIXME: add a note about ANSI aliasing cleanness --> | ||
1112 | </sect1> | ||
1113 | |||
1114 | <sect1 id="conventions-cplusplus"> | ||
1115 | <title>C++</title> | ||
1116 | |||
1117 | <para> | ||
1118 | Using C++ in the kernel is usually a bad idea, because the | ||
1119 | kernel does not provide the necessary runtime environment | ||
1120 | and the include files are not tested for it. It is still | ||
1121 | possible, but not recommended. If you really want to do | ||
1122 | this, forget about exceptions at least. | ||
1123 | </para> | ||
1124 | </sect1> | ||
1125 | |||
1126 | <sect1 id="conventions-ifdef"> | ||
1127 | <title>#if</title> | ||
1128 | |||
1129 | <para> | ||
1130 | It is generally considered cleaner to use macros in header files | ||
1131 | (or at the top of .c files) to abstract away functions rather than | ||
1132 | using `#if' pre-processor statements throughout the source code. | ||
1133 | </para> | ||
1134 | </sect1> | ||
1135 | </chapter> | ||
1136 | |||
1137 | <chapter id="submitting"> | ||
1138 | <title>Putting Your Stuff in the Kernel</title> | ||
1139 | |||
1140 | <para> | ||
1141 | In order to get your stuff into shape for official inclusion, or | ||
1142 | even to make a neat patch, there's administrative work to be | ||
1143 | done: | ||
1144 | </para> | ||
1145 | <itemizedlist> | ||
1146 | <listitem> | ||
1147 | <para> | ||
1148 | Figure out whose pond you've been pissing in. Look at the top of | ||
1149 | the source files, inside the <filename>MAINTAINERS</filename> | ||
1150 | file, and last of all in the <filename>CREDITS</filename> file. | ||
1151 | You should coordinate with this person to make sure you're not | ||
1152 | duplicating effort, or trying something that's already been | ||
1153 | rejected. | ||
1154 | </para> | ||
1155 | |||
1156 | <para> | ||
1157 | Make sure you put your name and EMail address at the top of | ||
1158 | any files you create or mangle significantly. This is the | ||
1159 | first place people will look when they find a bug, or when | ||
1160 | <emphasis>they</emphasis> want to make a change. | ||
1161 | </para> | ||
1162 | </listitem> | ||
1163 | |||
1164 | <listitem> | ||
1165 | <para> | ||
1166 | Usually you want a configuration option for your kernel hack. | ||
1167 | Edit <filename>Config.in</filename> in the appropriate directory | ||
1168 | (but under <filename>arch/</filename> it's called | ||
1169 | <filename>config.in</filename>). The Config Language used is not | ||
1170 | bash, even though it looks like bash; the safe way is to use only | ||
1171 | the constructs that you already see in | ||
1172 | <filename>Config.in</filename> files (see | ||
1173 | <filename>Documentation/kbuild/kconfig-language.txt</filename>). | ||
1174 | It's good to run "make xconfig" at least once to test (because | ||
1175 | it's the only one with a static parser). | ||
1176 | </para> | ||
1177 | |||
1178 | <para> | ||
1179 | Variables which can be Y or N use <type>bool</type> followed by a | ||
1180 | tagline and the config define name (which must start with | ||
1181 | CONFIG_). The <type>tristate</type> function is the same, but | ||
1182 | allows the answer M (which defines | ||
1183 | <symbol>CONFIG_foo_MODULE</symbol> in your source, instead of | ||
1184 | <symbol>CONFIG_FOO</symbol>) if <symbol>CONFIG_MODULES</symbol> | ||
1185 | is enabled. | ||
1186 | </para> | ||
1187 | |||
1188 | <para> | ||
1189 | You may well want to make your CONFIG option only visible if | ||
1190 | <symbol>CONFIG_EXPERIMENTAL</symbol> is enabled: this serves as a | ||
1191 | warning to users. There many other fancy things you can do: see | ||
1192 | the various <filename>Config.in</filename> files for ideas. | ||
1193 | </para> | ||
1194 | </listitem> | ||
1195 | |||
1196 | <listitem> | ||
1197 | <para> | ||
1198 | Edit the <filename>Makefile</filename>: the CONFIG variables are | ||
1199 | exported here so you can conditionalize compilation with `ifeq'. | ||
1200 | If your file exports symbols then add the names to | ||
1201 | <varname>export-objs</varname> so that genksyms will find them. | ||
1202 | <caution> | ||
1203 | <para> | ||
1204 | There is a restriction on the kernel build system that objects | ||
1205 | which export symbols must have globally unique names. | ||
1206 | If your object does not have a globally unique name then the | ||
1207 | standard fix is to move the | ||
1208 | <function>EXPORT_SYMBOL()</function> statements to their own | ||
1209 | object with a unique name. | ||
1210 | This is why several systems have separate exporting objects, | ||
1211 | usually suffixed with ksyms. | ||
1212 | </para> | ||
1213 | </caution> | ||
1214 | </para> | ||
1215 | </listitem> | ||
1216 | |||
1217 | <listitem> | ||
1218 | <para> | ||
1219 | Document your option in Documentation/Configure.help. Mention | ||
1220 | incompatibilities and issues here. <emphasis> Definitely | ||
1221 | </emphasis> end your description with <quote> if in doubt, say N | ||
1222 | </quote> (or, occasionally, `Y'); this is for people who have no | ||
1223 | idea what you are talking about. | ||
1224 | </para> | ||
1225 | </listitem> | ||
1226 | |||
1227 | <listitem> | ||
1228 | <para> | ||
1229 | Put yourself in <filename>CREDITS</filename> if you've done | ||
1230 | something noteworthy, usually beyond a single file (your name | ||
1231 | should be at the top of the source files anyway). | ||
1232 | <filename>MAINTAINERS</filename> means you want to be consulted | ||
1233 | when changes are made to a subsystem, and hear about bugs; it | ||
1234 | implies a more-than-passing commitment to some part of the code. | ||
1235 | </para> | ||
1236 | </listitem> | ||
1237 | |||
1238 | <listitem> | ||
1239 | <para> | ||
1240 | Finally, don't forget to read <filename>Documentation/SubmittingPatches</filename> | ||
1241 | and possibly <filename>Documentation/SubmittingDrivers</filename>. | ||
1242 | </para> | ||
1243 | </listitem> | ||
1244 | </itemizedlist> | ||
1245 | </chapter> | ||
1246 | |||
1247 | <chapter id="cantrips"> | ||
1248 | <title>Kernel Cantrips</title> | ||
1249 | |||
1250 | <para> | ||
1251 | Some favorites from browsing the source. Feel free to add to this | ||
1252 | list. | ||
1253 | </para> | ||
1254 | |||
1255 | <para> | ||
1256 | <filename>include/linux/brlock.h:</filename> | ||
1257 | </para> | ||
1258 | <programlisting> | ||
1259 | extern inline void br_read_lock (enum brlock_indices idx) | ||
1260 | { | ||
1261 | /* | ||
1262 | * This causes a link-time bug message if an | ||
1263 | * invalid index is used: | ||
1264 | */ | ||
1265 | if (idx >= __BR_END) | ||
1266 | __br_lock_usage_bug(); | ||
1267 | |||
1268 | read_lock(&__brlock_array[smp_processor_id()][idx]); | ||
1269 | } | ||
1270 | </programlisting> | ||
1271 | |||
1272 | <para> | ||
1273 | <filename>include/linux/fs.h</filename>: | ||
1274 | </para> | ||
1275 | <programlisting> | ||
1276 | /* | ||
1277 | * Kernel pointers have redundant information, so we can use a | ||
1278 | * scheme where we can return either an error code or a dentry | ||
1279 | * pointer with the same return value. | ||
1280 | * | ||
1281 | * This should be a per-architecture thing, to allow different | ||
1282 | * error and pointer decisions. | ||
1283 | */ | ||
1284 | #define ERR_PTR(err) ((void *)((long)(err))) | ||
1285 | #define PTR_ERR(ptr) ((long)(ptr)) | ||
1286 | #define IS_ERR(ptr) ((unsigned long)(ptr) > (unsigned long)(-1000)) | ||
1287 | </programlisting> | ||
1288 | |||
1289 | <para> | ||
1290 | <filename>include/asm-i386/uaccess.h:</filename> | ||
1291 | </para> | ||
1292 | |||
1293 | <programlisting> | ||
1294 | #define copy_to_user(to,from,n) \ | ||
1295 | (__builtin_constant_p(n) ? \ | ||
1296 | __constant_copy_to_user((to),(from),(n)) : \ | ||
1297 | __generic_copy_to_user((to),(from),(n))) | ||
1298 | </programlisting> | ||
1299 | |||
1300 | <para> | ||
1301 | <filename>arch/sparc/kernel/head.S:</filename> | ||
1302 | </para> | ||
1303 | |||
1304 | <programlisting> | ||
1305 | /* | ||
1306 | * Sun people can't spell worth damn. "compatability" indeed. | ||
1307 | * At least we *know* we can't spell, and use a spell-checker. | ||
1308 | */ | ||
1309 | |||
1310 | /* Uh, actually Linus it is I who cannot spell. Too much murky | ||
1311 | * Sparc assembly will do this to ya. | ||
1312 | */ | ||
1313 | C_LABEL(cputypvar): | ||
1314 | .asciz "compatability" | ||
1315 | |||
1316 | /* Tested on SS-5, SS-10. Probably someone at Sun applied a spell-checker. */ | ||
1317 | .align 4 | ||
1318 | C_LABEL(cputypvar_sun4m): | ||
1319 | .asciz "compatible" | ||
1320 | </programlisting> | ||
1321 | |||
1322 | <para> | ||
1323 | <filename>arch/sparc/lib/checksum.S:</filename> | ||
1324 | </para> | ||
1325 | |||
1326 | <programlisting> | ||
1327 | /* Sun, you just can't beat me, you just can't. Stop trying, | ||
1328 | * give up. I'm serious, I am going to kick the living shit | ||
1329 | * out of you, game over, lights out. | ||
1330 | */ | ||
1331 | </programlisting> | ||
1332 | </chapter> | ||
1333 | |||
1334 | <chapter id="credits"> | ||
1335 | <title>Thanks</title> | ||
1336 | |||
1337 | <para> | ||
1338 | Thanks to Andi Kleen for the idea, answering my questions, fixing | ||
1339 | my mistakes, filling content, etc. Philipp Rumpf for more spelling | ||
1340 | and clarity fixes, and some excellent non-obvious points. Werner | ||
1341 | Almesberger for giving me a great summary of | ||
1342 | <function>disable_irq()</function>, and Jes Sorensen and Andrea | ||
1343 | Arcangeli added caveats. Michael Elizabeth Chastain for checking | ||
1344 | and adding to the Configure section. <!-- Rusty insisted on this | ||
1345 | bit; I didn't do it! --> Telsa Gwynne for teaching me DocBook. | ||
1346 | </para> | ||
1347 | </chapter> | ||
1348 | </book> | ||
1349 | |||
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl new file mode 100644 index 000000000000..90dc2de8e0af --- /dev/null +++ b/Documentation/DocBook/kernel-locking.tmpl | |||
@@ -0,0 +1,2088 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="LKLockingGuide"> | ||
6 | <bookinfo> | ||
7 | <title>Unreliable Guide To Locking</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Rusty</firstname> | ||
12 | <surname>Russell</surname> | ||
13 | <affiliation> | ||
14 | <address> | ||
15 | <email>rusty@rustcorp.com.au</email> | ||
16 | </address> | ||
17 | </affiliation> | ||
18 | </author> | ||
19 | </authorgroup> | ||
20 | |||
21 | <copyright> | ||
22 | <year>2003</year> | ||
23 | <holder>Rusty Russell</holder> | ||
24 | </copyright> | ||
25 | |||
26 | <legalnotice> | ||
27 | <para> | ||
28 | This documentation is free software; you can redistribute | ||
29 | it and/or modify it under the terms of the GNU General Public | ||
30 | License as published by the Free Software Foundation; either | ||
31 | version 2 of the License, or (at your option) any later | ||
32 | version. | ||
33 | </para> | ||
34 | |||
35 | <para> | ||
36 | This program is distributed in the hope that it will be | ||
37 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
38 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
39 | See the GNU General Public License for more details. | ||
40 | </para> | ||
41 | |||
42 | <para> | ||
43 | You should have received a copy of the GNU General Public | ||
44 | License along with this program; if not, write to the Free | ||
45 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
46 | MA 02111-1307 USA | ||
47 | </para> | ||
48 | |||
49 | <para> | ||
50 | For more details see the file COPYING in the source | ||
51 | distribution of Linux. | ||
52 | </para> | ||
53 | </legalnotice> | ||
54 | </bookinfo> | ||
55 | |||
56 | <toc></toc> | ||
57 | <chapter id="intro"> | ||
58 | <title>Introduction</title> | ||
59 | <para> | ||
60 | Welcome, to Rusty's Remarkably Unreliable Guide to Kernel | ||
61 | Locking issues. This document describes the locking systems in | ||
62 | the Linux Kernel in 2.6. | ||
63 | </para> | ||
64 | <para> | ||
65 | With the wide availability of HyperThreading, and <firstterm | ||
66 | linkend="gloss-preemption">preemption </firstterm> in the Linux | ||
67 | Kernel, everyone hacking on the kernel needs to know the | ||
68 | fundamentals of concurrency and locking for | ||
69 | <firstterm linkend="gloss-smp"><acronym>SMP</acronym></firstterm>. | ||
70 | </para> | ||
71 | </chapter> | ||
72 | |||
73 | <chapter id="races"> | ||
74 | <title>The Problem With Concurrency</title> | ||
75 | <para> | ||
76 | (Skip this if you know what a Race Condition is). | ||
77 | </para> | ||
78 | <para> | ||
79 | In a normal program, you can increment a counter like so: | ||
80 | </para> | ||
81 | <programlisting> | ||
82 | very_important_count++; | ||
83 | </programlisting> | ||
84 | |||
85 | <para> | ||
86 | This is what they would expect to happen: | ||
87 | </para> | ||
88 | |||
89 | <table> | ||
90 | <title>Expected Results</title> | ||
91 | |||
92 | <tgroup cols="2" align="left"> | ||
93 | |||
94 | <thead> | ||
95 | <row> | ||
96 | <entry>Instance 1</entry> | ||
97 | <entry>Instance 2</entry> | ||
98 | </row> | ||
99 | </thead> | ||
100 | |||
101 | <tbody> | ||
102 | <row> | ||
103 | <entry>read very_important_count (5)</entry> | ||
104 | <entry></entry> | ||
105 | </row> | ||
106 | <row> | ||
107 | <entry>add 1 (6)</entry> | ||
108 | <entry></entry> | ||
109 | </row> | ||
110 | <row> | ||
111 | <entry>write very_important_count (6)</entry> | ||
112 | <entry></entry> | ||
113 | </row> | ||
114 | <row> | ||
115 | <entry></entry> | ||
116 | <entry>read very_important_count (6)</entry> | ||
117 | </row> | ||
118 | <row> | ||
119 | <entry></entry> | ||
120 | <entry>add 1 (7)</entry> | ||
121 | </row> | ||
122 | <row> | ||
123 | <entry></entry> | ||
124 | <entry>write very_important_count (7)</entry> | ||
125 | </row> | ||
126 | </tbody> | ||
127 | |||
128 | </tgroup> | ||
129 | </table> | ||
130 | |||
131 | <para> | ||
132 | This is what might happen: | ||
133 | </para> | ||
134 | |||
135 | <table> | ||
136 | <title>Possible Results</title> | ||
137 | |||
138 | <tgroup cols="2" align="left"> | ||
139 | <thead> | ||
140 | <row> | ||
141 | <entry>Instance 1</entry> | ||
142 | <entry>Instance 2</entry> | ||
143 | </row> | ||
144 | </thead> | ||
145 | |||
146 | <tbody> | ||
147 | <row> | ||
148 | <entry>read very_important_count (5)</entry> | ||
149 | <entry></entry> | ||
150 | </row> | ||
151 | <row> | ||
152 | <entry></entry> | ||
153 | <entry>read very_important_count (5)</entry> | ||
154 | </row> | ||
155 | <row> | ||
156 | <entry>add 1 (6)</entry> | ||
157 | <entry></entry> | ||
158 | </row> | ||
159 | <row> | ||
160 | <entry></entry> | ||
161 | <entry>add 1 (6)</entry> | ||
162 | </row> | ||
163 | <row> | ||
164 | <entry>write very_important_count (6)</entry> | ||
165 | <entry></entry> | ||
166 | </row> | ||
167 | <row> | ||
168 | <entry></entry> | ||
169 | <entry>write very_important_count (6)</entry> | ||
170 | </row> | ||
171 | </tbody> | ||
172 | </tgroup> | ||
173 | </table> | ||
174 | |||
175 | <sect1 id="race-condition"> | ||
176 | <title>Race Conditions and Critical Regions</title> | ||
177 | <para> | ||
178 | This overlap, where the result depends on the | ||
179 | relative timing of multiple tasks, is called a <firstterm>race condition</firstterm>. | ||
180 | The piece of code containing the concurrency issue is called a | ||
181 | <firstterm>critical region</firstterm>. And especially since Linux starting running | ||
182 | on SMP machines, they became one of the major issues in kernel | ||
183 | design and implementation. | ||
184 | </para> | ||
185 | <para> | ||
186 | Preemption can have the same effect, even if there is only one | ||
187 | CPU: by preempting one task during the critical region, we have | ||
188 | exactly the same race condition. In this case the thread which | ||
189 | preempts might run the critical region itself. | ||
190 | </para> | ||
191 | <para> | ||
192 | The solution is to recognize when these simultaneous accesses | ||
193 | occur, and use locks to make sure that only one instance can | ||
194 | enter the critical region at any time. There are many | ||
195 | friendly primitives in the Linux kernel to help you do this. | ||
196 | And then there are the unfriendly primitives, but I'll pretend | ||
197 | they don't exist. | ||
198 | </para> | ||
199 | </sect1> | ||
200 | </chapter> | ||
201 | |||
202 | <chapter id="locks"> | ||
203 | <title>Locking in the Linux Kernel</title> | ||
204 | |||
205 | <para> | ||
206 | If I could give you one piece of advice: never sleep with anyone | ||
207 | crazier than yourself. But if I had to give you advice on | ||
208 | locking: <emphasis>keep it simple</emphasis>. | ||
209 | </para> | ||
210 | |||
211 | <para> | ||
212 | Be reluctant to introduce new locks. | ||
213 | </para> | ||
214 | |||
215 | <para> | ||
216 | Strangely enough, this last one is the exact reverse of my advice when | ||
217 | you <emphasis>have</emphasis> slept with someone crazier than yourself. | ||
218 | And you should think about getting a big dog. | ||
219 | </para> | ||
220 | |||
221 | <sect1 id="lock-intro"> | ||
222 | <title>Two Main Types of Kernel Locks: Spinlocks and Semaphores</title> | ||
223 | |||
224 | <para> | ||
225 | There are two main types of kernel locks. The fundamental type | ||
226 | is the spinlock | ||
227 | (<filename class="headerfile">include/asm/spinlock.h</filename>), | ||
228 | which is a very simple single-holder lock: if you can't get the | ||
229 | spinlock, you keep trying (spinning) until you can. Spinlocks are | ||
230 | very small and fast, and can be used anywhere. | ||
231 | </para> | ||
232 | <para> | ||
233 | The second type is a semaphore | ||
234 | (<filename class="headerfile">include/asm/semaphore.h</filename>): it | ||
235 | can have more than one holder at any time (the number decided at | ||
236 | initialization time), although it is most commonly used as a | ||
237 | single-holder lock (a mutex). If you can't get a semaphore, | ||
238 | your task will put itself on the queue, and be woken up when the | ||
239 | semaphore is released. This means the CPU will do something | ||
240 | else while you are waiting, but there are many cases when you | ||
241 | simply can't sleep (see <xref linkend="sleeping-things"/>), and so | ||
242 | have to use a spinlock instead. | ||
243 | </para> | ||
244 | <para> | ||
245 | Neither type of lock is recursive: see | ||
246 | <xref linkend="deadlock"/>. | ||
247 | </para> | ||
248 | </sect1> | ||
249 | |||
250 | <sect1 id="uniprocessor"> | ||
251 | <title>Locks and Uniprocessor Kernels</title> | ||
252 | |||
253 | <para> | ||
254 | For kernels compiled without <symbol>CONFIG_SMP</symbol>, and | ||
255 | without <symbol>CONFIG_PREEMPT</symbol> spinlocks do not exist at | ||
256 | all. This is an excellent design decision: when no-one else can | ||
257 | run at the same time, there is no reason to have a lock. | ||
258 | </para> | ||
259 | |||
260 | <para> | ||
261 | If the kernel is compiled without <symbol>CONFIG_SMP</symbol>, | ||
262 | but <symbol>CONFIG_PREEMPT</symbol> is set, then spinlocks | ||
263 | simply disable preemption, which is sufficient to prevent any | ||
264 | races. For most purposes, we can think of preemption as | ||
265 | equivalent to SMP, and not worry about it separately. | ||
266 | </para> | ||
267 | |||
268 | <para> | ||
269 | You should always test your locking code with <symbol>CONFIG_SMP</symbol> | ||
270 | and <symbol>CONFIG_PREEMPT</symbol> enabled, even if you don't have an SMP test box, because it | ||
271 | will still catch some kinds of locking bugs. | ||
272 | </para> | ||
273 | |||
274 | <para> | ||
275 | Semaphores still exist, because they are required for | ||
276 | synchronization between <firstterm linkend="gloss-usercontext">user | ||
277 | contexts</firstterm>, as we will see below. | ||
278 | </para> | ||
279 | </sect1> | ||
280 | |||
281 | <sect1 id="usercontextlocking"> | ||
282 | <title>Locking Only In User Context</title> | ||
283 | |||
284 | <para> | ||
285 | If you have a data structure which is only ever accessed from | ||
286 | user context, then you can use a simple semaphore | ||
287 | (<filename>linux/asm/semaphore.h</filename>) to protect it. This | ||
288 | is the most trivial case: you initialize the semaphore to the number | ||
289 | of resources available (usually 1), and call | ||
290 | <function>down_interruptible()</function> to grab the semaphore, and | ||
291 | <function>up()</function> to release it. There is also a | ||
292 | <function>down()</function>, which should be avoided, because it | ||
293 | will not return if a signal is received. | ||
294 | </para> | ||
295 | |||
296 | <para> | ||
297 | Example: <filename>linux/net/core/netfilter.c</filename> allows | ||
298 | registration of new <function>setsockopt()</function> and | ||
299 | <function>getsockopt()</function> calls, with | ||
300 | <function>nf_register_sockopt()</function>. Registration and | ||
301 | de-registration are only done on module load and unload (and boot | ||
302 | time, where there is no concurrency), and the list of registrations | ||
303 | is only consulted for an unknown <function>setsockopt()</function> | ||
304 | or <function>getsockopt()</function> system call. The | ||
305 | <varname>nf_sockopt_mutex</varname> is perfect to protect this, | ||
306 | especially since the setsockopt and getsockopt calls may well | ||
307 | sleep. | ||
308 | </para> | ||
309 | </sect1> | ||
310 | |||
311 | <sect1 id="lock-user-bh"> | ||
312 | <title>Locking Between User Context and Softirqs</title> | ||
313 | |||
314 | <para> | ||
315 | If a <firstterm linkend="gloss-softirq">softirq</firstterm> shares | ||
316 | data with user context, you have two problems. Firstly, the current | ||
317 | user context can be interrupted by a softirq, and secondly, the | ||
318 | critical region could be entered from another CPU. This is where | ||
319 | <function>spin_lock_bh()</function> | ||
320 | (<filename class="headerfile">include/linux/spinlock.h</filename>) is | ||
321 | used. It disables softirqs on that CPU, then grabs the lock. | ||
322 | <function>spin_unlock_bh()</function> does the reverse. (The | ||
323 | '_bh' suffix is a historical reference to "Bottom Halves", the | ||
324 | old name for software interrupts. It should really be | ||
325 | called spin_lock_softirq()' in a perfect world). | ||
326 | </para> | ||
327 | |||
328 | <para> | ||
329 | Note that you can also use <function>spin_lock_irq()</function> | ||
330 | or <function>spin_lock_irqsave()</function> here, which stop | ||
331 | hardware interrupts as well: see <xref linkend="hardirq-context"/>. | ||
332 | </para> | ||
333 | |||
334 | <para> | ||
335 | This works perfectly for <firstterm linkend="gloss-up"><acronym>UP | ||
336 | </acronym></firstterm> as well: the spin lock vanishes, and this macro | ||
337 | simply becomes <function>local_bh_disable()</function> | ||
338 | (<filename class="headerfile">include/linux/interrupt.h</filename>), which | ||
339 | protects you from the softirq being run. | ||
340 | </para> | ||
341 | </sect1> | ||
342 | |||
343 | <sect1 id="lock-user-tasklet"> | ||
344 | <title>Locking Between User Context and Tasklets</title> | ||
345 | |||
346 | <para> | ||
347 | This is exactly the same as above, because <firstterm | ||
348 | linkend="gloss-tasklet">tasklets</firstterm> are actually run | ||
349 | from a softirq. | ||
350 | </para> | ||
351 | </sect1> | ||
352 | |||
353 | <sect1 id="lock-user-timers"> | ||
354 | <title>Locking Between User Context and Timers</title> | ||
355 | |||
356 | <para> | ||
357 | This, too, is exactly the same as above, because <firstterm | ||
358 | linkend="gloss-timers">timers</firstterm> are actually run from | ||
359 | a softirq. From a locking point of view, tasklets and timers | ||
360 | are identical. | ||
361 | </para> | ||
362 | </sect1> | ||
363 | |||
364 | <sect1 id="lock-tasklets"> | ||
365 | <title>Locking Between Tasklets/Timers</title> | ||
366 | |||
367 | <para> | ||
368 | Sometimes a tasklet or timer might want to share data with | ||
369 | another tasklet or timer. | ||
370 | </para> | ||
371 | |||
372 | <sect2 id="lock-tasklets-same"> | ||
373 | <title>The Same Tasklet/Timer</title> | ||
374 | <para> | ||
375 | Since a tasklet is never run on two CPUs at once, you don't | ||
376 | need to worry about your tasklet being reentrant (running | ||
377 | twice at once), even on SMP. | ||
378 | </para> | ||
379 | </sect2> | ||
380 | |||
381 | <sect2 id="lock-tasklets-different"> | ||
382 | <title>Different Tasklets/Timers</title> | ||
383 | <para> | ||
384 | If another tasklet/timer wants | ||
385 | to share data with your tasklet or timer , you will both need to use | ||
386 | <function>spin_lock()</function> and | ||
387 | <function>spin_unlock()</function> calls. | ||
388 | <function>spin_lock_bh()</function> is | ||
389 | unnecessary here, as you are already in a tasklet, and | ||
390 | none will be run on the same CPU. | ||
391 | </para> | ||
392 | </sect2> | ||
393 | </sect1> | ||
394 | |||
395 | <sect1 id="lock-softirqs"> | ||
396 | <title>Locking Between Softirqs</title> | ||
397 | |||
398 | <para> | ||
399 | Often a softirq might | ||
400 | want to share data with itself or a tasklet/timer. | ||
401 | </para> | ||
402 | |||
403 | <sect2 id="lock-softirqs-same"> | ||
404 | <title>The Same Softirq</title> | ||
405 | |||
406 | <para> | ||
407 | The same softirq can run on the other CPUs: you can use a | ||
408 | per-CPU array (see <xref linkend="per-cpu"/>) for better | ||
409 | performance. If you're going so far as to use a softirq, | ||
410 | you probably care about scalable performance enough | ||
411 | to justify the extra complexity. | ||
412 | </para> | ||
413 | |||
414 | <para> | ||
415 | You'll need to use <function>spin_lock()</function> and | ||
416 | <function>spin_unlock()</function> for shared data. | ||
417 | </para> | ||
418 | </sect2> | ||
419 | |||
420 | <sect2 id="lock-softirqs-different"> | ||
421 | <title>Different Softirqs</title> | ||
422 | |||
423 | <para> | ||
424 | You'll need to use <function>spin_lock()</function> and | ||
425 | <function>spin_unlock()</function> for shared data, whether it | ||
426 | be a timer, tasklet, different softirq or the same or another | ||
427 | softirq: any of them could be running on a different CPU. | ||
428 | </para> | ||
429 | </sect2> | ||
430 | </sect1> | ||
431 | </chapter> | ||
432 | |||
433 | <chapter id="hardirq-context"> | ||
434 | <title>Hard IRQ Context</title> | ||
435 | |||
436 | <para> | ||
437 | Hardware interrupts usually communicate with a | ||
438 | tasklet or softirq. Frequently this involves putting work in a | ||
439 | queue, which the softirq will take out. | ||
440 | </para> | ||
441 | |||
442 | <sect1 id="hardirq-softirq"> | ||
443 | <title>Locking Between Hard IRQ and Softirqs/Tasklets</title> | ||
444 | |||
445 | <para> | ||
446 | If a hardware irq handler shares data with a softirq, you have | ||
447 | two concerns. Firstly, the softirq processing can be | ||
448 | interrupted by a hardware interrupt, and secondly, the | ||
449 | critical region could be entered by a hardware interrupt on | ||
450 | another CPU. This is where <function>spin_lock_irq()</function> is | ||
451 | used. It is defined to disable interrupts on that cpu, then grab | ||
452 | the lock. <function>spin_unlock_irq()</function> does the reverse. | ||
453 | </para> | ||
454 | |||
455 | <para> | ||
456 | The irq handler does not to use | ||
457 | <function>spin_lock_irq()</function>, because the softirq cannot | ||
458 | run while the irq handler is running: it can use | ||
459 | <function>spin_lock()</function>, which is slightly faster. The | ||
460 | only exception would be if a different hardware irq handler uses | ||
461 | the same lock: <function>spin_lock_irq()</function> will stop | ||
462 | that from interrupting us. | ||
463 | </para> | ||
464 | |||
465 | <para> | ||
466 | This works perfectly for UP as well: the spin lock vanishes, | ||
467 | and this macro simply becomes <function>local_irq_disable()</function> | ||
468 | (<filename class="headerfile">include/asm/smp.h</filename>), which | ||
469 | protects you from the softirq/tasklet/BH being run. | ||
470 | </para> | ||
471 | |||
472 | <para> | ||
473 | <function>spin_lock_irqsave()</function> | ||
474 | (<filename>include/linux/spinlock.h</filename>) is a variant | ||
475 | which saves whether interrupts were on or off in a flags word, | ||
476 | which is passed to <function>spin_unlock_irqrestore()</function>. This | ||
477 | means that the same code can be used inside an hard irq handler (where | ||
478 | interrupts are already off) and in softirqs (where the irq | ||
479 | disabling is required). | ||
480 | </para> | ||
481 | |||
482 | <para> | ||
483 | Note that softirqs (and hence tasklets and timers) are run on | ||
484 | return from hardware interrupts, so | ||
485 | <function>spin_lock_irq()</function> also stops these. In that | ||
486 | sense, <function>spin_lock_irqsave()</function> is the most | ||
487 | general and powerful locking function. | ||
488 | </para> | ||
489 | |||
490 | </sect1> | ||
491 | <sect1 id="hardirq-hardirq"> | ||
492 | <title>Locking Between Two Hard IRQ Handlers</title> | ||
493 | <para> | ||
494 | It is rare to have to share data between two IRQ handlers, but | ||
495 | if you do, <function>spin_lock_irqsave()</function> should be | ||
496 | used: it is architecture-specific whether all interrupts are | ||
497 | disabled inside irq handlers themselves. | ||
498 | </para> | ||
499 | </sect1> | ||
500 | |||
501 | </chapter> | ||
502 | |||
503 | <chapter id="cheatsheet"> | ||
504 | <title>Cheat Sheet For Locking</title> | ||
505 | <para> | ||
506 | Pete Zaitcev gives the following summary: | ||
507 | </para> | ||
508 | <itemizedlist> | ||
509 | <listitem> | ||
510 | <para> | ||
511 | If you are in a process context (any syscall) and want to | ||
512 | lock other process out, use a semaphore. You can take a semaphore | ||
513 | and sleep (<function>copy_from_user*(</function> or | ||
514 | <function>kmalloc(x,GFP_KERNEL)</function>). | ||
515 | </para> | ||
516 | </listitem> | ||
517 | <listitem> | ||
518 | <para> | ||
519 | Otherwise (== data can be touched in an interrupt), use | ||
520 | <function>spin_lock_irqsave()</function> and | ||
521 | <function>spin_unlock_irqrestore()</function>. | ||
522 | </para> | ||
523 | </listitem> | ||
524 | <listitem> | ||
525 | <para> | ||
526 | Avoid holding spinlock for more than 5 lines of code and | ||
527 | across any function call (except accessors like | ||
528 | <function>readb</function>). | ||
529 | </para> | ||
530 | </listitem> | ||
531 | </itemizedlist> | ||
532 | |||
533 | <sect1 id="minimum-lock-reqirements"> | ||
534 | <title>Table of Minimum Requirements</title> | ||
535 | |||
536 | <para> The following table lists the <emphasis>minimum</emphasis> | ||
537 | locking requirements between various contexts. In some cases, | ||
538 | the same context can only be running on one CPU at a time, so | ||
539 | no locking is required for that context (eg. a particular | ||
540 | thread can only run on one CPU at a time, but if it needs | ||
541 | shares data with another thread, locking is required). | ||
542 | </para> | ||
543 | <para> | ||
544 | Remember the advice above: you can always use | ||
545 | <function>spin_lock_irqsave()</function>, which is a superset | ||
546 | of all other spinlock primitives. | ||
547 | </para> | ||
548 | <table> | ||
549 | <title>Table of Locking Requirements</title> | ||
550 | <tgroup cols="11"> | ||
551 | <tbody> | ||
552 | <row> | ||
553 | <entry></entry> | ||
554 | <entry>IRQ Handler A</entry> | ||
555 | <entry>IRQ Handler B</entry> | ||
556 | <entry>Softirq A</entry> | ||
557 | <entry>Softirq B</entry> | ||
558 | <entry>Tasklet A</entry> | ||
559 | <entry>Tasklet B</entry> | ||
560 | <entry>Timer A</entry> | ||
561 | <entry>Timer B</entry> | ||
562 | <entry>User Context A</entry> | ||
563 | <entry>User Context B</entry> | ||
564 | </row> | ||
565 | |||
566 | <row> | ||
567 | <entry>IRQ Handler A</entry> | ||
568 | <entry>None</entry> | ||
569 | </row> | ||
570 | |||
571 | <row> | ||
572 | <entry>IRQ Handler B</entry> | ||
573 | <entry>spin_lock_irqsave</entry> | ||
574 | <entry>None</entry> | ||
575 | </row> | ||
576 | |||
577 | <row> | ||
578 | <entry>Softirq A</entry> | ||
579 | <entry>spin_lock_irq</entry> | ||
580 | <entry>spin_lock_irq</entry> | ||
581 | <entry>spin_lock</entry> | ||
582 | </row> | ||
583 | |||
584 | <row> | ||
585 | <entry>Softirq B</entry> | ||
586 | <entry>spin_lock_irq</entry> | ||
587 | <entry>spin_lock_irq</entry> | ||
588 | <entry>spin_lock</entry> | ||
589 | <entry>spin_lock</entry> | ||
590 | </row> | ||
591 | |||
592 | <row> | ||
593 | <entry>Tasklet A</entry> | ||
594 | <entry>spin_lock_irq</entry> | ||
595 | <entry>spin_lock_irq</entry> | ||
596 | <entry>spin_lock</entry> | ||
597 | <entry>spin_lock</entry> | ||
598 | <entry>None</entry> | ||
599 | </row> | ||
600 | |||
601 | <row> | ||
602 | <entry>Tasklet B</entry> | ||
603 | <entry>spin_lock_irq</entry> | ||
604 | <entry>spin_lock_irq</entry> | ||
605 | <entry>spin_lock</entry> | ||
606 | <entry>spin_lock</entry> | ||
607 | <entry>spin_lock</entry> | ||
608 | <entry>None</entry> | ||
609 | </row> | ||
610 | |||
611 | <row> | ||
612 | <entry>Timer A</entry> | ||
613 | <entry>spin_lock_irq</entry> | ||
614 | <entry>spin_lock_irq</entry> | ||
615 | <entry>spin_lock</entry> | ||
616 | <entry>spin_lock</entry> | ||
617 | <entry>spin_lock</entry> | ||
618 | <entry>spin_lock</entry> | ||
619 | <entry>None</entry> | ||
620 | </row> | ||
621 | |||
622 | <row> | ||
623 | <entry>Timer B</entry> | ||
624 | <entry>spin_lock_irq</entry> | ||
625 | <entry>spin_lock_irq</entry> | ||
626 | <entry>spin_lock</entry> | ||
627 | <entry>spin_lock</entry> | ||
628 | <entry>spin_lock</entry> | ||
629 | <entry>spin_lock</entry> | ||
630 | <entry>spin_lock</entry> | ||
631 | <entry>None</entry> | ||
632 | </row> | ||
633 | |||
634 | <row> | ||
635 | <entry>User Context A</entry> | ||
636 | <entry>spin_lock_irq</entry> | ||
637 | <entry>spin_lock_irq</entry> | ||
638 | <entry>spin_lock_bh</entry> | ||
639 | <entry>spin_lock_bh</entry> | ||
640 | <entry>spin_lock_bh</entry> | ||
641 | <entry>spin_lock_bh</entry> | ||
642 | <entry>spin_lock_bh</entry> | ||
643 | <entry>spin_lock_bh</entry> | ||
644 | <entry>None</entry> | ||
645 | </row> | ||
646 | |||
647 | <row> | ||
648 | <entry>User Context B</entry> | ||
649 | <entry>spin_lock_irq</entry> | ||
650 | <entry>spin_lock_irq</entry> | ||
651 | <entry>spin_lock_bh</entry> | ||
652 | <entry>spin_lock_bh</entry> | ||
653 | <entry>spin_lock_bh</entry> | ||
654 | <entry>spin_lock_bh</entry> | ||
655 | <entry>spin_lock_bh</entry> | ||
656 | <entry>spin_lock_bh</entry> | ||
657 | <entry>down_interruptible</entry> | ||
658 | <entry>None</entry> | ||
659 | </row> | ||
660 | |||
661 | </tbody> | ||
662 | </tgroup> | ||
663 | </table> | ||
664 | </sect1> | ||
665 | </chapter> | ||
666 | |||
667 | <chapter id="Examples"> | ||
668 | <title>Common Examples</title> | ||
669 | <para> | ||
670 | Let's step through a simple example: a cache of number to name | ||
671 | mappings. The cache keeps a count of how often each of the objects is | ||
672 | used, and when it gets full, throws out the least used one. | ||
673 | |||
674 | </para> | ||
675 | |||
676 | <sect1 id="examples-usercontext"> | ||
677 | <title>All In User Context</title> | ||
678 | <para> | ||
679 | For our first example, we assume that all operations are in user | ||
680 | context (ie. from system calls), so we can sleep. This means we can | ||
681 | use a semaphore to protect the cache and all the objects within | ||
682 | it. Here's the code: | ||
683 | </para> | ||
684 | |||
685 | <programlisting> | ||
686 | #include <linux/list.h> | ||
687 | #include <linux/slab.h> | ||
688 | #include <linux/string.h> | ||
689 | #include <asm/semaphore.h> | ||
690 | #include <asm/errno.h> | ||
691 | |||
692 | struct object | ||
693 | { | ||
694 | struct list_head list; | ||
695 | int id; | ||
696 | char name[32]; | ||
697 | int popularity; | ||
698 | }; | ||
699 | |||
700 | /* Protects the cache, cache_num, and the objects within it */ | ||
701 | static DECLARE_MUTEX(cache_lock); | ||
702 | static LIST_HEAD(cache); | ||
703 | static unsigned int cache_num = 0; | ||
704 | #define MAX_CACHE_SIZE 10 | ||
705 | |||
706 | /* Must be holding cache_lock */ | ||
707 | static struct object *__cache_find(int id) | ||
708 | { | ||
709 | struct object *i; | ||
710 | |||
711 | list_for_each_entry(i, &cache, list) | ||
712 | if (i->id == id) { | ||
713 | i->popularity++; | ||
714 | return i; | ||
715 | } | ||
716 | return NULL; | ||
717 | } | ||
718 | |||
719 | /* Must be holding cache_lock */ | ||
720 | static void __cache_delete(struct object *obj) | ||
721 | { | ||
722 | BUG_ON(!obj); | ||
723 | list_del(&obj->list); | ||
724 | kfree(obj); | ||
725 | cache_num--; | ||
726 | } | ||
727 | |||
728 | /* Must be holding cache_lock */ | ||
729 | static void __cache_add(struct object *obj) | ||
730 | { | ||
731 | list_add(&obj->list, &cache); | ||
732 | if (++cache_num > MAX_CACHE_SIZE) { | ||
733 | struct object *i, *outcast = NULL; | ||
734 | list_for_each_entry(i, &cache, list) { | ||
735 | if (!outcast || i->popularity < outcast->popularity) | ||
736 | outcast = i; | ||
737 | } | ||
738 | __cache_delete(outcast); | ||
739 | } | ||
740 | } | ||
741 | |||
742 | int cache_add(int id, const char *name) | ||
743 | { | ||
744 | struct object *obj; | ||
745 | |||
746 | if ((obj = kmalloc(sizeof(*obj), GFP_KERNEL)) == NULL) | ||
747 | return -ENOMEM; | ||
748 | |||
749 | strlcpy(obj->name, name, sizeof(obj->name)); | ||
750 | obj->id = id; | ||
751 | obj->popularity = 0; | ||
752 | |||
753 | down(&cache_lock); | ||
754 | __cache_add(obj); | ||
755 | up(&cache_lock); | ||
756 | return 0; | ||
757 | } | ||
758 | |||
759 | void cache_delete(int id) | ||
760 | { | ||
761 | down(&cache_lock); | ||
762 | __cache_delete(__cache_find(id)); | ||
763 | up(&cache_lock); | ||
764 | } | ||
765 | |||
766 | int cache_find(int id, char *name) | ||
767 | { | ||
768 | struct object *obj; | ||
769 | int ret = -ENOENT; | ||
770 | |||
771 | down(&cache_lock); | ||
772 | obj = __cache_find(id); | ||
773 | if (obj) { | ||
774 | ret = 0; | ||
775 | strcpy(name, obj->name); | ||
776 | } | ||
777 | up(&cache_lock); | ||
778 | return ret; | ||
779 | } | ||
780 | </programlisting> | ||
781 | |||
782 | <para> | ||
783 | Note that we always make sure we have the cache_lock when we add, | ||
784 | delete, or look up the cache: both the cache infrastructure itself and | ||
785 | the contents of the objects are protected by the lock. In this case | ||
786 | it's easy, since we copy the data for the user, and never let them | ||
787 | access the objects directly. | ||
788 | </para> | ||
789 | <para> | ||
790 | There is a slight (and common) optimization here: in | ||
791 | <function>cache_add</function> we set up the fields of the object | ||
792 | before grabbing the lock. This is safe, as no-one else can access it | ||
793 | until we put it in cache. | ||
794 | </para> | ||
795 | </sect1> | ||
796 | |||
797 | <sect1 id="examples-interrupt"> | ||
798 | <title>Accessing From Interrupt Context</title> | ||
799 | <para> | ||
800 | Now consider the case where <function>cache_find</function> can be | ||
801 | called from interrupt context: either a hardware interrupt or a | ||
802 | softirq. An example would be a timer which deletes object from the | ||
803 | cache. | ||
804 | </para> | ||
805 | <para> | ||
806 | The change is shown below, in standard patch format: the | ||
807 | <symbol>-</symbol> are lines which are taken away, and the | ||
808 | <symbol>+</symbol> are lines which are added. | ||
809 | </para> | ||
810 | <programlisting> | ||
811 | --- cache.c.usercontext 2003-12-09 13:58:54.000000000 +1100 | ||
812 | +++ cache.c.interrupt 2003-12-09 14:07:49.000000000 +1100 | ||
813 | @@ -12,7 +12,7 @@ | ||
814 | int popularity; | ||
815 | }; | ||
816 | |||
817 | -static DECLARE_MUTEX(cache_lock); | ||
818 | +static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; | ||
819 | static LIST_HEAD(cache); | ||
820 | static unsigned int cache_num = 0; | ||
821 | #define MAX_CACHE_SIZE 10 | ||
822 | @@ -55,6 +55,7 @@ | ||
823 | int cache_add(int id, const char *name) | ||
824 | { | ||
825 | struct object *obj; | ||
826 | + unsigned long flags; | ||
827 | |||
828 | if ((obj = kmalloc(sizeof(*obj), GFP_KERNEL)) == NULL) | ||
829 | return -ENOMEM; | ||
830 | @@ -63,30 +64,33 @@ | ||
831 | obj->id = id; | ||
832 | obj->popularity = 0; | ||
833 | |||
834 | - down(&cache_lock); | ||
835 | + spin_lock_irqsave(&cache_lock, flags); | ||
836 | __cache_add(obj); | ||
837 | - up(&cache_lock); | ||
838 | + spin_unlock_irqrestore(&cache_lock, flags); | ||
839 | return 0; | ||
840 | } | ||
841 | |||
842 | void cache_delete(int id) | ||
843 | { | ||
844 | - down(&cache_lock); | ||
845 | + unsigned long flags; | ||
846 | + | ||
847 | + spin_lock_irqsave(&cache_lock, flags); | ||
848 | __cache_delete(__cache_find(id)); | ||
849 | - up(&cache_lock); | ||
850 | + spin_unlock_irqrestore(&cache_lock, flags); | ||
851 | } | ||
852 | |||
853 | int cache_find(int id, char *name) | ||
854 | { | ||
855 | struct object *obj; | ||
856 | int ret = -ENOENT; | ||
857 | + unsigned long flags; | ||
858 | |||
859 | - down(&cache_lock); | ||
860 | + spin_lock_irqsave(&cache_lock, flags); | ||
861 | obj = __cache_find(id); | ||
862 | if (obj) { | ||
863 | ret = 0; | ||
864 | strcpy(name, obj->name); | ||
865 | } | ||
866 | - up(&cache_lock); | ||
867 | + spin_unlock_irqrestore(&cache_lock, flags); | ||
868 | return ret; | ||
869 | } | ||
870 | </programlisting> | ||
871 | |||
872 | <para> | ||
873 | Note that the <function>spin_lock_irqsave</function> will turn off | ||
874 | interrupts if they are on, otherwise does nothing (if we are already | ||
875 | in an interrupt handler), hence these functions are safe to call from | ||
876 | any context. | ||
877 | </para> | ||
878 | <para> | ||
879 | Unfortunately, <function>cache_add</function> calls | ||
880 | <function>kmalloc</function> with the <symbol>GFP_KERNEL</symbol> | ||
881 | flag, which is only legal in user context. I have assumed that | ||
882 | <function>cache_add</function> is still only called in user context, | ||
883 | otherwise this should become a parameter to | ||
884 | <function>cache_add</function>. | ||
885 | </para> | ||
886 | </sect1> | ||
887 | <sect1 id="examples-refcnt"> | ||
888 | <title>Exposing Objects Outside This File</title> | ||
889 | <para> | ||
890 | If our objects contained more information, it might not be sufficient | ||
891 | to copy the information in and out: other parts of the code might want | ||
892 | to keep pointers to these objects, for example, rather than looking up | ||
893 | the id every time. This produces two problems. | ||
894 | </para> | ||
895 | <para> | ||
896 | The first problem is that we use the <symbol>cache_lock</symbol> to | ||
897 | protect objects: we'd need to make this non-static so the rest of the | ||
898 | code can use it. This makes locking trickier, as it is no longer all | ||
899 | in one place. | ||
900 | </para> | ||
901 | <para> | ||
902 | The second problem is the lifetime problem: if another structure keeps | ||
903 | a pointer to an object, it presumably expects that pointer to remain | ||
904 | valid. Unfortunately, this is only guaranteed while you hold the | ||
905 | lock, otherwise someone might call <function>cache_delete</function> | ||
906 | and even worse, add another object, re-using the same address. | ||
907 | </para> | ||
908 | <para> | ||
909 | As there is only one lock, you can't hold it forever: no-one else would | ||
910 | get any work done. | ||
911 | </para> | ||
912 | <para> | ||
913 | The solution to this problem is to use a reference count: everyone who | ||
914 | has a pointer to the object increases it when they first get the | ||
915 | object, and drops the reference count when they're finished with it. | ||
916 | Whoever drops it to zero knows it is unused, and can actually delete it. | ||
917 | </para> | ||
918 | <para> | ||
919 | Here is the code: | ||
920 | </para> | ||
921 | |||
922 | <programlisting> | ||
923 | --- cache.c.interrupt 2003-12-09 14:25:43.000000000 +1100 | ||
924 | +++ cache.c.refcnt 2003-12-09 14:33:05.000000000 +1100 | ||
925 | @@ -7,6 +7,7 @@ | ||
926 | struct object | ||
927 | { | ||
928 | struct list_head list; | ||
929 | + unsigned int refcnt; | ||
930 | int id; | ||
931 | char name[32]; | ||
932 | int popularity; | ||
933 | @@ -17,6 +18,35 @@ | ||
934 | static unsigned int cache_num = 0; | ||
935 | #define MAX_CACHE_SIZE 10 | ||
936 | |||
937 | +static void __object_put(struct object *obj) | ||
938 | +{ | ||
939 | + if (--obj->refcnt == 0) | ||
940 | + kfree(obj); | ||
941 | +} | ||
942 | + | ||
943 | +static void __object_get(struct object *obj) | ||
944 | +{ | ||
945 | + obj->refcnt++; | ||
946 | +} | ||
947 | + | ||
948 | +void object_put(struct object *obj) | ||
949 | +{ | ||
950 | + unsigned long flags; | ||
951 | + | ||
952 | + spin_lock_irqsave(&cache_lock, flags); | ||
953 | + __object_put(obj); | ||
954 | + spin_unlock_irqrestore(&cache_lock, flags); | ||
955 | +} | ||
956 | + | ||
957 | +void object_get(struct object *obj) | ||
958 | +{ | ||
959 | + unsigned long flags; | ||
960 | + | ||
961 | + spin_lock_irqsave(&cache_lock, flags); | ||
962 | + __object_get(obj); | ||
963 | + spin_unlock_irqrestore(&cache_lock, flags); | ||
964 | +} | ||
965 | + | ||
966 | /* Must be holding cache_lock */ | ||
967 | static struct object *__cache_find(int id) | ||
968 | { | ||
969 | @@ -35,6 +65,7 @@ | ||
970 | { | ||
971 | BUG_ON(!obj); | ||
972 | list_del(&obj->list); | ||
973 | + __object_put(obj); | ||
974 | cache_num--; | ||
975 | } | ||
976 | |||
977 | @@ -63,6 +94,7 @@ | ||
978 | strlcpy(obj->name, name, sizeof(obj->name)); | ||
979 | obj->id = id; | ||
980 | obj->popularity = 0; | ||
981 | + obj->refcnt = 1; /* The cache holds a reference */ | ||
982 | |||
983 | spin_lock_irqsave(&cache_lock, flags); | ||
984 | __cache_add(obj); | ||
985 | @@ -79,18 +111,15 @@ | ||
986 | spin_unlock_irqrestore(&cache_lock, flags); | ||
987 | } | ||
988 | |||
989 | -int cache_find(int id, char *name) | ||
990 | +struct object *cache_find(int id) | ||
991 | { | ||
992 | struct object *obj; | ||
993 | - int ret = -ENOENT; | ||
994 | unsigned long flags; | ||
995 | |||
996 | spin_lock_irqsave(&cache_lock, flags); | ||
997 | obj = __cache_find(id); | ||
998 | - if (obj) { | ||
999 | - ret = 0; | ||
1000 | - strcpy(name, obj->name); | ||
1001 | - } | ||
1002 | + if (obj) | ||
1003 | + __object_get(obj); | ||
1004 | spin_unlock_irqrestore(&cache_lock, flags); | ||
1005 | - return ret; | ||
1006 | + return obj; | ||
1007 | } | ||
1008 | </programlisting> | ||
1009 | |||
1010 | <para> | ||
1011 | We encapsulate the reference counting in the standard 'get' and 'put' | ||
1012 | functions. Now we can return the object itself from | ||
1013 | <function>cache_find</function> which has the advantage that the user | ||
1014 | can now sleep holding the object (eg. to | ||
1015 | <function>copy_to_user</function> to name to userspace). | ||
1016 | </para> | ||
1017 | <para> | ||
1018 | The other point to note is that I said a reference should be held for | ||
1019 | every pointer to the object: thus the reference count is 1 when first | ||
1020 | inserted into the cache. In some versions the framework does not hold | ||
1021 | a reference count, but they are more complicated. | ||
1022 | </para> | ||
1023 | |||
1024 | <sect2 id="examples-refcnt-atomic"> | ||
1025 | <title>Using Atomic Operations For The Reference Count</title> | ||
1026 | <para> | ||
1027 | In practice, <type>atomic_t</type> would usually be used for | ||
1028 | <structfield>refcnt</structfield>. There are a number of atomic | ||
1029 | operations defined in | ||
1030 | |||
1031 | <filename class="headerfile">include/asm/atomic.h</filename>: these are | ||
1032 | guaranteed to be seen atomically from all CPUs in the system, so no | ||
1033 | lock is required. In this case, it is simpler than using spinlocks, | ||
1034 | although for anything non-trivial using spinlocks is clearer. The | ||
1035 | <function>atomic_inc</function> and | ||
1036 | <function>atomic_dec_and_test</function> are used instead of the | ||
1037 | standard increment and decrement operators, and the lock is no longer | ||
1038 | used to protect the reference count itself. | ||
1039 | </para> | ||
1040 | |||
1041 | <programlisting> | ||
1042 | --- cache.c.refcnt 2003-12-09 15:00:35.000000000 +1100 | ||
1043 | +++ cache.c.refcnt-atomic 2003-12-11 15:49:42.000000000 +1100 | ||
1044 | @@ -7,7 +7,7 @@ | ||
1045 | struct object | ||
1046 | { | ||
1047 | struct list_head list; | ||
1048 | - unsigned int refcnt; | ||
1049 | + atomic_t refcnt; | ||
1050 | int id; | ||
1051 | char name[32]; | ||
1052 | int popularity; | ||
1053 | @@ -18,33 +18,15 @@ | ||
1054 | static unsigned int cache_num = 0; | ||
1055 | #define MAX_CACHE_SIZE 10 | ||
1056 | |||
1057 | -static void __object_put(struct object *obj) | ||
1058 | -{ | ||
1059 | - if (--obj->refcnt == 0) | ||
1060 | - kfree(obj); | ||
1061 | -} | ||
1062 | - | ||
1063 | -static void __object_get(struct object *obj) | ||
1064 | -{ | ||
1065 | - obj->refcnt++; | ||
1066 | -} | ||
1067 | - | ||
1068 | void object_put(struct object *obj) | ||
1069 | { | ||
1070 | - unsigned long flags; | ||
1071 | - | ||
1072 | - spin_lock_irqsave(&cache_lock, flags); | ||
1073 | - __object_put(obj); | ||
1074 | - spin_unlock_irqrestore(&cache_lock, flags); | ||
1075 | + if (atomic_dec_and_test(&obj->refcnt)) | ||
1076 | + kfree(obj); | ||
1077 | } | ||
1078 | |||
1079 | void object_get(struct object *obj) | ||
1080 | { | ||
1081 | - unsigned long flags; | ||
1082 | - | ||
1083 | - spin_lock_irqsave(&cache_lock, flags); | ||
1084 | - __object_get(obj); | ||
1085 | - spin_unlock_irqrestore(&cache_lock, flags); | ||
1086 | + atomic_inc(&obj->refcnt); | ||
1087 | } | ||
1088 | |||
1089 | /* Must be holding cache_lock */ | ||
1090 | @@ -65,7 +47,7 @@ | ||
1091 | { | ||
1092 | BUG_ON(!obj); | ||
1093 | list_del(&obj->list); | ||
1094 | - __object_put(obj); | ||
1095 | + object_put(obj); | ||
1096 | cache_num--; | ||
1097 | } | ||
1098 | |||
1099 | @@ -94,7 +76,7 @@ | ||
1100 | strlcpy(obj->name, name, sizeof(obj->name)); | ||
1101 | obj->id = id; | ||
1102 | obj->popularity = 0; | ||
1103 | - obj->refcnt = 1; /* The cache holds a reference */ | ||
1104 | + atomic_set(&obj->refcnt, 1); /* The cache holds a reference */ | ||
1105 | |||
1106 | spin_lock_irqsave(&cache_lock, flags); | ||
1107 | __cache_add(obj); | ||
1108 | @@ -119,7 +101,7 @@ | ||
1109 | spin_lock_irqsave(&cache_lock, flags); | ||
1110 | obj = __cache_find(id); | ||
1111 | if (obj) | ||
1112 | - __object_get(obj); | ||
1113 | + object_get(obj); | ||
1114 | spin_unlock_irqrestore(&cache_lock, flags); | ||
1115 | return obj; | ||
1116 | } | ||
1117 | </programlisting> | ||
1118 | </sect2> | ||
1119 | </sect1> | ||
1120 | |||
1121 | <sect1 id="examples-lock-per-obj"> | ||
1122 | <title>Protecting The Objects Themselves</title> | ||
1123 | <para> | ||
1124 | In these examples, we assumed that the objects (except the reference | ||
1125 | counts) never changed once they are created. If we wanted to allow | ||
1126 | the name to change, there are three possibilities: | ||
1127 | </para> | ||
1128 | <itemizedlist> | ||
1129 | <listitem> | ||
1130 | <para> | ||
1131 | You can make <symbol>cache_lock</symbol> non-static, and tell people | ||
1132 | to grab that lock before changing the name in any object. | ||
1133 | </para> | ||
1134 | </listitem> | ||
1135 | <listitem> | ||
1136 | <para> | ||
1137 | You can provide a <function>cache_obj_rename</function> which grabs | ||
1138 | this lock and changes the name for the caller, and tell everyone to | ||
1139 | use that function. | ||
1140 | </para> | ||
1141 | </listitem> | ||
1142 | <listitem> | ||
1143 | <para> | ||
1144 | You can make the <symbol>cache_lock</symbol> protect only the cache | ||
1145 | itself, and use another lock to protect the name. | ||
1146 | </para> | ||
1147 | </listitem> | ||
1148 | </itemizedlist> | ||
1149 | |||
1150 | <para> | ||
1151 | Theoretically, you can make the locks as fine-grained as one lock for | ||
1152 | every field, for every object. In practice, the most common variants | ||
1153 | are: | ||
1154 | </para> | ||
1155 | <itemizedlist> | ||
1156 | <listitem> | ||
1157 | <para> | ||
1158 | One lock which protects the infrastructure (the <symbol>cache</symbol> | ||
1159 | list in this example) and all the objects. This is what we have done | ||
1160 | so far. | ||
1161 | </para> | ||
1162 | </listitem> | ||
1163 | <listitem> | ||
1164 | <para> | ||
1165 | One lock which protects the infrastructure (including the list | ||
1166 | pointers inside the objects), and one lock inside the object which | ||
1167 | protects the rest of that object. | ||
1168 | </para> | ||
1169 | </listitem> | ||
1170 | <listitem> | ||
1171 | <para> | ||
1172 | Multiple locks to protect the infrastructure (eg. one lock per hash | ||
1173 | chain), possibly with a separate per-object lock. | ||
1174 | </para> | ||
1175 | </listitem> | ||
1176 | </itemizedlist> | ||
1177 | |||
1178 | <para> | ||
1179 | Here is the "lock-per-object" implementation: | ||
1180 | </para> | ||
1181 | <programlisting> | ||
1182 | --- cache.c.refcnt-atomic 2003-12-11 15:50:54.000000000 +1100 | ||
1183 | +++ cache.c.perobjectlock 2003-12-11 17:15:03.000000000 +1100 | ||
1184 | @@ -6,11 +6,17 @@ | ||
1185 | |||
1186 | struct object | ||
1187 | { | ||
1188 | + /* These two protected by cache_lock. */ | ||
1189 | struct list_head list; | ||
1190 | + int popularity; | ||
1191 | + | ||
1192 | atomic_t refcnt; | ||
1193 | + | ||
1194 | + /* Doesn't change once created. */ | ||
1195 | int id; | ||
1196 | + | ||
1197 | + spinlock_t lock; /* Protects the name */ | ||
1198 | char name[32]; | ||
1199 | - int popularity; | ||
1200 | }; | ||
1201 | |||
1202 | static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; | ||
1203 | @@ -77,6 +84,7 @@ | ||
1204 | obj->id = id; | ||
1205 | obj->popularity = 0; | ||
1206 | atomic_set(&obj->refcnt, 1); /* The cache holds a reference */ | ||
1207 | + spin_lock_init(&obj->lock); | ||
1208 | |||
1209 | spin_lock_irqsave(&cache_lock, flags); | ||
1210 | __cache_add(obj); | ||
1211 | </programlisting> | ||
1212 | |||
1213 | <para> | ||
1214 | Note that I decide that the <structfield>popularity</structfield> | ||
1215 | count should be protected by the <symbol>cache_lock</symbol> rather | ||
1216 | than the per-object lock: this is because it (like the | ||
1217 | <structname>struct list_head</structname> inside the object) is | ||
1218 | logically part of the infrastructure. This way, I don't need to grab | ||
1219 | the lock of every object in <function>__cache_add</function> when | ||
1220 | seeking the least popular. | ||
1221 | </para> | ||
1222 | |||
1223 | <para> | ||
1224 | I also decided that the <structfield>id</structfield> member is | ||
1225 | unchangeable, so I don't need to grab each object lock in | ||
1226 | <function>__cache_find()</function> to examine the | ||
1227 | <structfield>id</structfield>: the object lock is only used by a | ||
1228 | caller who wants to read or write the <structfield>name</structfield> | ||
1229 | field. | ||
1230 | </para> | ||
1231 | |||
1232 | <para> | ||
1233 | Note also that I added a comment describing what data was protected by | ||
1234 | which locks. This is extremely important, as it describes the runtime | ||
1235 | behavior of the code, and can be hard to gain from just reading. And | ||
1236 | as Alan Cox says, <quote>Lock data, not code</quote>. | ||
1237 | </para> | ||
1238 | </sect1> | ||
1239 | </chapter> | ||
1240 | |||
1241 | <chapter id="common-problems"> | ||
1242 | <title>Common Problems</title> | ||
1243 | <sect1 id="deadlock"> | ||
1244 | <title>Deadlock: Simple and Advanced</title> | ||
1245 | |||
1246 | <para> | ||
1247 | There is a coding bug where a piece of code tries to grab a | ||
1248 | spinlock twice: it will spin forever, waiting for the lock to | ||
1249 | be released (spinlocks, rwlocks and semaphores are not | ||
1250 | recursive in Linux). This is trivial to diagnose: not a | ||
1251 | stay-up-five-nights-talk-to-fluffy-code-bunnies kind of | ||
1252 | problem. | ||
1253 | </para> | ||
1254 | |||
1255 | <para> | ||
1256 | For a slightly more complex case, imagine you have a region | ||
1257 | shared by a softirq and user context. If you use a | ||
1258 | <function>spin_lock()</function> call to protect it, it is | ||
1259 | possible that the user context will be interrupted by the softirq | ||
1260 | while it holds the lock, and the softirq will then spin | ||
1261 | forever trying to get the same lock. | ||
1262 | </para> | ||
1263 | |||
1264 | <para> | ||
1265 | Both of these are called deadlock, and as shown above, it can | ||
1266 | occur even with a single CPU (although not on UP compiles, | ||
1267 | since spinlocks vanish on kernel compiles with | ||
1268 | <symbol>CONFIG_SMP</symbol>=n. You'll still get data corruption | ||
1269 | in the second example). | ||
1270 | </para> | ||
1271 | |||
1272 | <para> | ||
1273 | This complete lockup is easy to diagnose: on SMP boxes the | ||
1274 | watchdog timer or compiling with <symbol>DEBUG_SPINLOCKS</symbol> set | ||
1275 | (<filename>include/linux/spinlock.h</filename>) will show this up | ||
1276 | immediately when it happens. | ||
1277 | </para> | ||
1278 | |||
1279 | <para> | ||
1280 | A more complex problem is the so-called 'deadly embrace', | ||
1281 | involving two or more locks. Say you have a hash table: each | ||
1282 | entry in the table is a spinlock, and a chain of hashed | ||
1283 | objects. Inside a softirq handler, you sometimes want to | ||
1284 | alter an object from one place in the hash to another: you | ||
1285 | grab the spinlock of the old hash chain and the spinlock of | ||
1286 | the new hash chain, and delete the object from the old one, | ||
1287 | and insert it in the new one. | ||
1288 | </para> | ||
1289 | |||
1290 | <para> | ||
1291 | There are two problems here. First, if your code ever | ||
1292 | tries to move the object to the same chain, it will deadlock | ||
1293 | with itself as it tries to lock it twice. Secondly, if the | ||
1294 | same softirq on another CPU is trying to move another object | ||
1295 | in the reverse direction, the following could happen: | ||
1296 | </para> | ||
1297 | |||
1298 | <table> | ||
1299 | <title>Consequences</title> | ||
1300 | |||
1301 | <tgroup cols="2" align="left"> | ||
1302 | |||
1303 | <thead> | ||
1304 | <row> | ||
1305 | <entry>CPU 1</entry> | ||
1306 | <entry>CPU 2</entry> | ||
1307 | </row> | ||
1308 | </thead> | ||
1309 | |||
1310 | <tbody> | ||
1311 | <row> | ||
1312 | <entry>Grab lock A -> OK</entry> | ||
1313 | <entry>Grab lock B -> OK</entry> | ||
1314 | </row> | ||
1315 | <row> | ||
1316 | <entry>Grab lock B -> spin</entry> | ||
1317 | <entry>Grab lock A -> spin</entry> | ||
1318 | </row> | ||
1319 | </tbody> | ||
1320 | </tgroup> | ||
1321 | </table> | ||
1322 | |||
1323 | <para> | ||
1324 | The two CPUs will spin forever, waiting for the other to give up | ||
1325 | their lock. It will look, smell, and feel like a crash. | ||
1326 | </para> | ||
1327 | </sect1> | ||
1328 | |||
1329 | <sect1 id="techs-deadlock-prevent"> | ||
1330 | <title>Preventing Deadlock</title> | ||
1331 | |||
1332 | <para> | ||
1333 | Textbooks will tell you that if you always lock in the same | ||
1334 | order, you will never get this kind of deadlock. Practice | ||
1335 | will tell you that this approach doesn't scale: when I | ||
1336 | create a new lock, I don't understand enough of the kernel | ||
1337 | to figure out where in the 5000 lock hierarchy it will fit. | ||
1338 | </para> | ||
1339 | |||
1340 | <para> | ||
1341 | The best locks are encapsulated: they never get exposed in | ||
1342 | headers, and are never held around calls to non-trivial | ||
1343 | functions outside the same file. You can read through this | ||
1344 | code and see that it will never deadlock, because it never | ||
1345 | tries to grab another lock while it has that one. People | ||
1346 | using your code don't even need to know you are using a | ||
1347 | lock. | ||
1348 | </para> | ||
1349 | |||
1350 | <para> | ||
1351 | A classic problem here is when you provide callbacks or | ||
1352 | hooks: if you call these with the lock held, you risk simple | ||
1353 | deadlock, or a deadly embrace (who knows what the callback | ||
1354 | will do?). Remember, the other programmers are out to get | ||
1355 | you, so don't do this. | ||
1356 | </para> | ||
1357 | |||
1358 | <sect2 id="techs-deadlock-overprevent"> | ||
1359 | <title>Overzealous Prevention Of Deadlocks</title> | ||
1360 | |||
1361 | <para> | ||
1362 | Deadlocks are problematic, but not as bad as data | ||
1363 | corruption. Code which grabs a read lock, searches a list, | ||
1364 | fails to find what it wants, drops the read lock, grabs a | ||
1365 | write lock and inserts the object has a race condition. | ||
1366 | </para> | ||
1367 | |||
1368 | <para> | ||
1369 | If you don't see why, please stay the fuck away from my code. | ||
1370 | </para> | ||
1371 | </sect2> | ||
1372 | </sect1> | ||
1373 | |||
1374 | <sect1 id="racing-timers"> | ||
1375 | <title>Racing Timers: A Kernel Pastime</title> | ||
1376 | |||
1377 | <para> | ||
1378 | Timers can produce their own special problems with races. | ||
1379 | Consider a collection of objects (list, hash, etc) where each | ||
1380 | object has a timer which is due to destroy it. | ||
1381 | </para> | ||
1382 | |||
1383 | <para> | ||
1384 | If you want to destroy the entire collection (say on module | ||
1385 | removal), you might do the following: | ||
1386 | </para> | ||
1387 | |||
1388 | <programlisting> | ||
1389 | /* THIS CODE BAD BAD BAD BAD: IF IT WAS ANY WORSE IT WOULD USE | ||
1390 | HUNGARIAN NOTATION */ | ||
1391 | spin_lock_bh(&list_lock); | ||
1392 | |||
1393 | while (list) { | ||
1394 | struct foo *next = list->next; | ||
1395 | del_timer(&list->timer); | ||
1396 | kfree(list); | ||
1397 | list = next; | ||
1398 | } | ||
1399 | |||
1400 | spin_unlock_bh(&list_lock); | ||
1401 | </programlisting> | ||
1402 | |||
1403 | <para> | ||
1404 | Sooner or later, this will crash on SMP, because a timer can | ||
1405 | have just gone off before the <function>spin_lock_bh()</function>, | ||
1406 | and it will only get the lock after we | ||
1407 | <function>spin_unlock_bh()</function>, and then try to free | ||
1408 | the element (which has already been freed!). | ||
1409 | </para> | ||
1410 | |||
1411 | <para> | ||
1412 | This can be avoided by checking the result of | ||
1413 | <function>del_timer()</function>: if it returns | ||
1414 | <returnvalue>1</returnvalue>, the timer has been deleted. | ||
1415 | If <returnvalue>0</returnvalue>, it means (in this | ||
1416 | case) that it is currently running, so we can do: | ||
1417 | </para> | ||
1418 | |||
1419 | <programlisting> | ||
1420 | retry: | ||
1421 | spin_lock_bh(&list_lock); | ||
1422 | |||
1423 | while (list) { | ||
1424 | struct foo *next = list->next; | ||
1425 | if (!del_timer(&list->timer)) { | ||
1426 | /* Give timer a chance to delete this */ | ||
1427 | spin_unlock_bh(&list_lock); | ||
1428 | goto retry; | ||
1429 | } | ||
1430 | kfree(list); | ||
1431 | list = next; | ||
1432 | } | ||
1433 | |||
1434 | spin_unlock_bh(&list_lock); | ||
1435 | </programlisting> | ||
1436 | |||
1437 | <para> | ||
1438 | Another common problem is deleting timers which restart | ||
1439 | themselves (by calling <function>add_timer()</function> at the end | ||
1440 | of their timer function). Because this is a fairly common case | ||
1441 | which is prone to races, you should use <function>del_timer_sync()</function> | ||
1442 | (<filename class="headerfile">include/linux/timer.h</filename>) | ||
1443 | to handle this case. It returns the number of times the timer | ||
1444 | had to be deleted before we finally stopped it from adding itself back | ||
1445 | in. | ||
1446 | </para> | ||
1447 | </sect1> | ||
1448 | |||
1449 | </chapter> | ||
1450 | |||
1451 | <chapter id="Efficiency"> | ||
1452 | <title>Locking Speed</title> | ||
1453 | |||
1454 | <para> | ||
1455 | There are three main things to worry about when considering speed of | ||
1456 | some code which does locking. First is concurrency: how many things | ||
1457 | are going to be waiting while someone else is holding a lock. Second | ||
1458 | is the time taken to actually acquire and release an uncontended lock. | ||
1459 | Third is using fewer, or smarter locks. I'm assuming that the lock is | ||
1460 | used fairly often: otherwise, you wouldn't be concerned about | ||
1461 | efficiency. | ||
1462 | </para> | ||
1463 | <para> | ||
1464 | Concurrency depends on how long the lock is usually held: you should | ||
1465 | hold the lock for as long as needed, but no longer. In the cache | ||
1466 | example, we always create the object without the lock held, and then | ||
1467 | grab the lock only when we are ready to insert it in the list. | ||
1468 | </para> | ||
1469 | <para> | ||
1470 | Acquisition times depend on how much damage the lock operations do to | ||
1471 | the pipeline (pipeline stalls) and how likely it is that this CPU was | ||
1472 | the last one to grab the lock (ie. is the lock cache-hot for this | ||
1473 | CPU): on a machine with more CPUs, this likelihood drops fast. | ||
1474 | Consider a 700MHz Intel Pentium III: an instruction takes about 0.7ns, | ||
1475 | an atomic increment takes about 58ns, a lock which is cache-hot on | ||
1476 | this CPU takes 160ns, and a cacheline transfer from another CPU takes | ||
1477 | an additional 170 to 360ns. (These figures from Paul McKenney's | ||
1478 | <ulink url="http://www.linuxjournal.com/article.php?sid=6993"> Linux | ||
1479 | Journal RCU article</ulink>). | ||
1480 | </para> | ||
1481 | <para> | ||
1482 | These two aims conflict: holding a lock for a short time might be done | ||
1483 | by splitting locks into parts (such as in our final per-object-lock | ||
1484 | example), but this increases the number of lock acquisitions, and the | ||
1485 | results are often slower than having a single lock. This is another | ||
1486 | reason to advocate locking simplicity. | ||
1487 | </para> | ||
1488 | <para> | ||
1489 | The third concern is addressed below: there are some methods to reduce | ||
1490 | the amount of locking which needs to be done. | ||
1491 | </para> | ||
1492 | |||
1493 | <sect1 id="efficiency-rwlocks"> | ||
1494 | <title>Read/Write Lock Variants</title> | ||
1495 | |||
1496 | <para> | ||
1497 | Both spinlocks and semaphores have read/write variants: | ||
1498 | <type>rwlock_t</type> and <structname>struct rw_semaphore</structname>. | ||
1499 | These divide users into two classes: the readers and the writers. If | ||
1500 | you are only reading the data, you can get a read lock, but to write to | ||
1501 | the data you need the write lock. Many people can hold a read lock, | ||
1502 | but a writer must be sole holder. | ||
1503 | </para> | ||
1504 | |||
1505 | <para> | ||
1506 | If your code divides neatly along reader/writer lines (as our | ||
1507 | cache code does), and the lock is held by readers for | ||
1508 | significant lengths of time, using these locks can help. They | ||
1509 | are slightly slower than the normal locks though, so in practice | ||
1510 | <type>rwlock_t</type> is not usually worthwhile. | ||
1511 | </para> | ||
1512 | </sect1> | ||
1513 | |||
1514 | <sect1 id="efficiency-read-copy-update"> | ||
1515 | <title>Avoiding Locks: Read Copy Update</title> | ||
1516 | |||
1517 | <para> | ||
1518 | There is a special method of read/write locking called Read Copy | ||
1519 | Update. Using RCU, the readers can avoid taking a lock | ||
1520 | altogether: as we expect our cache to be read more often than | ||
1521 | updated (otherwise the cache is a waste of time), it is a | ||
1522 | candidate for this optimization. | ||
1523 | </para> | ||
1524 | |||
1525 | <para> | ||
1526 | How do we get rid of read locks? Getting rid of read locks | ||
1527 | means that writers may be changing the list underneath the | ||
1528 | readers. That is actually quite simple: we can read a linked | ||
1529 | list while an element is being added if the writer adds the | ||
1530 | element very carefully. For example, adding | ||
1531 | <symbol>new</symbol> to a single linked list called | ||
1532 | <symbol>list</symbol>: | ||
1533 | </para> | ||
1534 | |||
1535 | <programlisting> | ||
1536 | new->next = list->next; | ||
1537 | wmb(); | ||
1538 | list->next = new; | ||
1539 | </programlisting> | ||
1540 | |||
1541 | <para> | ||
1542 | The <function>wmb()</function> is a write memory barrier. It | ||
1543 | ensures that the first operation (setting the new element's | ||
1544 | <symbol>next</symbol> pointer) is complete and will be seen by | ||
1545 | all CPUs, before the second operation is (putting the new | ||
1546 | element into the list). This is important, since modern | ||
1547 | compilers and modern CPUs can both reorder instructions unless | ||
1548 | told otherwise: we want a reader to either not see the new | ||
1549 | element at all, or see the new element with the | ||
1550 | <symbol>next</symbol> pointer correctly pointing at the rest of | ||
1551 | the list. | ||
1552 | </para> | ||
1553 | <para> | ||
1554 | Fortunately, there is a function to do this for standard | ||
1555 | <structname>struct list_head</structname> lists: | ||
1556 | <function>list_add_rcu()</function> | ||
1557 | (<filename>include/linux/list.h</filename>). | ||
1558 | </para> | ||
1559 | <para> | ||
1560 | Removing an element from the list is even simpler: we replace | ||
1561 | the pointer to the old element with a pointer to its successor, | ||
1562 | and readers will either see it, or skip over it. | ||
1563 | </para> | ||
1564 | <programlisting> | ||
1565 | list->next = old->next; | ||
1566 | </programlisting> | ||
1567 | <para> | ||
1568 | There is <function>list_del_rcu()</function> | ||
1569 | (<filename>include/linux/list.h</filename>) which does this (the | ||
1570 | normal version poisons the old object, which we don't want). | ||
1571 | </para> | ||
1572 | <para> | ||
1573 | The reader must also be careful: some CPUs can look through the | ||
1574 | <symbol>next</symbol> pointer to start reading the contents of | ||
1575 | the next element early, but don't realize that the pre-fetched | ||
1576 | contents is wrong when the <symbol>next</symbol> pointer changes | ||
1577 | underneath them. Once again, there is a | ||
1578 | <function>list_for_each_entry_rcu()</function> | ||
1579 | (<filename>include/linux/list.h</filename>) to help you. Of | ||
1580 | course, writers can just use | ||
1581 | <function>list_for_each_entry()</function>, since there cannot | ||
1582 | be two simultaneous writers. | ||
1583 | </para> | ||
1584 | <para> | ||
1585 | Our final dilemma is this: when can we actually destroy the | ||
1586 | removed element? Remember, a reader might be stepping through | ||
1587 | this element in the list right now: it we free this element and | ||
1588 | the <symbol>next</symbol> pointer changes, the reader will jump | ||
1589 | off into garbage and crash. We need to wait until we know that | ||
1590 | all the readers who were traversing the list when we deleted the | ||
1591 | element are finished. We use <function>call_rcu()</function> to | ||
1592 | register a callback which will actually destroy the object once | ||
1593 | the readers are finished. | ||
1594 | </para> | ||
1595 | <para> | ||
1596 | But how does Read Copy Update know when the readers are | ||
1597 | finished? The method is this: firstly, the readers always | ||
1598 | traverse the list inside | ||
1599 | <function>rcu_read_lock()</function>/<function>rcu_read_unlock()</function> | ||
1600 | pairs: these simply disable preemption so the reader won't go to | ||
1601 | sleep while reading the list. | ||
1602 | </para> | ||
1603 | <para> | ||
1604 | RCU then waits until every other CPU has slept at least once: | ||
1605 | since readers cannot sleep, we know that any readers which were | ||
1606 | traversing the list during the deletion are finished, and the | ||
1607 | callback is triggered. The real Read Copy Update code is a | ||
1608 | little more optimized than this, but this is the fundamental | ||
1609 | idea. | ||
1610 | </para> | ||
1611 | |||
1612 | <programlisting> | ||
1613 | --- cache.c.perobjectlock 2003-12-11 17:15:03.000000000 +1100 | ||
1614 | +++ cache.c.rcupdate 2003-12-11 17:55:14.000000000 +1100 | ||
1615 | @@ -1,15 +1,18 @@ | ||
1616 | #include <linux/list.h> | ||
1617 | #include <linux/slab.h> | ||
1618 | #include <linux/string.h> | ||
1619 | +#include <linux/rcupdate.h> | ||
1620 | #include <asm/semaphore.h> | ||
1621 | #include <asm/errno.h> | ||
1622 | |||
1623 | struct object | ||
1624 | { | ||
1625 | - /* These two protected by cache_lock. */ | ||
1626 | + /* This is protected by RCU */ | ||
1627 | struct list_head list; | ||
1628 | int popularity; | ||
1629 | |||
1630 | + struct rcu_head rcu; | ||
1631 | + | ||
1632 | atomic_t refcnt; | ||
1633 | |||
1634 | /* Doesn't change once created. */ | ||
1635 | @@ -40,7 +43,7 @@ | ||
1636 | { | ||
1637 | struct object *i; | ||
1638 | |||
1639 | - list_for_each_entry(i, &cache, list) { | ||
1640 | + list_for_each_entry_rcu(i, &cache, list) { | ||
1641 | if (i->id == id) { | ||
1642 | i->popularity++; | ||
1643 | return i; | ||
1644 | @@ -49,19 +52,25 @@ | ||
1645 | return NULL; | ||
1646 | } | ||
1647 | |||
1648 | +/* Final discard done once we know no readers are looking. */ | ||
1649 | +static void cache_delete_rcu(void *arg) | ||
1650 | +{ | ||
1651 | + object_put(arg); | ||
1652 | +} | ||
1653 | + | ||
1654 | /* Must be holding cache_lock */ | ||
1655 | static void __cache_delete(struct object *obj) | ||
1656 | { | ||
1657 | BUG_ON(!obj); | ||
1658 | - list_del(&obj->list); | ||
1659 | - object_put(obj); | ||
1660 | + list_del_rcu(&obj->list); | ||
1661 | cache_num--; | ||
1662 | + call_rcu(&obj->rcu, cache_delete_rcu, obj); | ||
1663 | } | ||
1664 | |||
1665 | /* Must be holding cache_lock */ | ||
1666 | static void __cache_add(struct object *obj) | ||
1667 | { | ||
1668 | - list_add(&obj->list, &cache); | ||
1669 | + list_add_rcu(&obj->list, &cache); | ||
1670 | if (++cache_num > MAX_CACHE_SIZE) { | ||
1671 | struct object *i, *outcast = NULL; | ||
1672 | list_for_each_entry(i, &cache, list) { | ||
1673 | @@ -85,6 +94,7 @@ | ||
1674 | obj->popularity = 0; | ||
1675 | atomic_set(&obj->refcnt, 1); /* The cache holds a reference */ | ||
1676 | spin_lock_init(&obj->lock); | ||
1677 | + INIT_RCU_HEAD(&obj->rcu); | ||
1678 | |||
1679 | spin_lock_irqsave(&cache_lock, flags); | ||
1680 | __cache_add(obj); | ||
1681 | @@ -104,12 +114,11 @@ | ||
1682 | struct object *cache_find(int id) | ||
1683 | { | ||
1684 | struct object *obj; | ||
1685 | - unsigned long flags; | ||
1686 | |||
1687 | - spin_lock_irqsave(&cache_lock, flags); | ||
1688 | + rcu_read_lock(); | ||
1689 | obj = __cache_find(id); | ||
1690 | if (obj) | ||
1691 | object_get(obj); | ||
1692 | - spin_unlock_irqrestore(&cache_lock, flags); | ||
1693 | + rcu_read_unlock(); | ||
1694 | return obj; | ||
1695 | } | ||
1696 | </programlisting> | ||
1697 | |||
1698 | <para> | ||
1699 | Note that the reader will alter the | ||
1700 | <structfield>popularity</structfield> member in | ||
1701 | <function>__cache_find()</function>, and now it doesn't hold a lock. | ||
1702 | One solution would be to make it an <type>atomic_t</type>, but for | ||
1703 | this usage, we don't really care about races: an approximate result is | ||
1704 | good enough, so I didn't change it. | ||
1705 | </para> | ||
1706 | |||
1707 | <para> | ||
1708 | The result is that <function>cache_find()</function> requires no | ||
1709 | synchronization with any other functions, so is almost as fast on SMP | ||
1710 | as it would be on UP. | ||
1711 | </para> | ||
1712 | |||
1713 | <para> | ||
1714 | There is a furthur optimization possible here: remember our original | ||
1715 | cache code, where there were no reference counts and the caller simply | ||
1716 | held the lock whenever using the object? This is still possible: if | ||
1717 | you hold the lock, noone can delete the object, so you don't need to | ||
1718 | get and put the reference count. | ||
1719 | </para> | ||
1720 | |||
1721 | <para> | ||
1722 | Now, because the 'read lock' in RCU is simply disabling preemption, a | ||
1723 | caller which always has preemption disabled between calling | ||
1724 | <function>cache_find()</function> and | ||
1725 | <function>object_put()</function> does not need to actually get and | ||
1726 | put the reference count: we could expose | ||
1727 | <function>__cache_find()</function> by making it non-static, and | ||
1728 | such callers could simply call that. | ||
1729 | </para> | ||
1730 | <para> | ||
1731 | The benefit here is that the reference count is not written to: the | ||
1732 | object is not altered in any way, which is much faster on SMP | ||
1733 | machines due to caching. | ||
1734 | </para> | ||
1735 | </sect1> | ||
1736 | |||
1737 | <sect1 id="per-cpu"> | ||
1738 | <title>Per-CPU Data</title> | ||
1739 | |||
1740 | <para> | ||
1741 | Another technique for avoiding locking which is used fairly | ||
1742 | widely is to duplicate information for each CPU. For example, | ||
1743 | if you wanted to keep a count of a common condition, you could | ||
1744 | use a spin lock and a single counter. Nice and simple. | ||
1745 | </para> | ||
1746 | |||
1747 | <para> | ||
1748 | If that was too slow (it's usually not, but if you've got a | ||
1749 | really big machine to test on and can show that it is), you | ||
1750 | could instead use a counter for each CPU, then none of them need | ||
1751 | an exclusive lock. See <function>DEFINE_PER_CPU()</function>, | ||
1752 | <function>get_cpu_var()</function> and | ||
1753 | <function>put_cpu_var()</function> | ||
1754 | (<filename class="headerfile">include/linux/percpu.h</filename>). | ||
1755 | </para> | ||
1756 | |||
1757 | <para> | ||
1758 | Of particular use for simple per-cpu counters is the | ||
1759 | <type>local_t</type> type, and the | ||
1760 | <function>cpu_local_inc()</function> and related functions, | ||
1761 | which are more efficient than simple code on some architectures | ||
1762 | (<filename class="headerfile">include/asm/local.h</filename>). | ||
1763 | </para> | ||
1764 | |||
1765 | <para> | ||
1766 | Note that there is no simple, reliable way of getting an exact | ||
1767 | value of such a counter, without introducing more locks. This | ||
1768 | is not a problem for some uses. | ||
1769 | </para> | ||
1770 | </sect1> | ||
1771 | |||
1772 | <sect1 id="mostly-hardirq"> | ||
1773 | <title>Data Which Mostly Used By An IRQ Handler</title> | ||
1774 | |||
1775 | <para> | ||
1776 | If data is always accessed from within the same IRQ handler, you | ||
1777 | don't need a lock at all: the kernel already guarantees that the | ||
1778 | irq handler will not run simultaneously on multiple CPUs. | ||
1779 | </para> | ||
1780 | <para> | ||
1781 | Manfred Spraul points out that you can still do this, even if | ||
1782 | the data is very occasionally accessed in user context or | ||
1783 | softirqs/tasklets. The irq handler doesn't use a lock, and | ||
1784 | all other accesses are done as so: | ||
1785 | </para> | ||
1786 | |||
1787 | <programlisting> | ||
1788 | spin_lock(&lock); | ||
1789 | disable_irq(irq); | ||
1790 | ... | ||
1791 | enable_irq(irq); | ||
1792 | spin_unlock(&lock); | ||
1793 | </programlisting> | ||
1794 | <para> | ||
1795 | The <function>disable_irq()</function> prevents the irq handler | ||
1796 | from running (and waits for it to finish if it's currently | ||
1797 | running on other CPUs). The spinlock prevents any other | ||
1798 | accesses happening at the same time. Naturally, this is slower | ||
1799 | than just a <function>spin_lock_irq()</function> call, so it | ||
1800 | only makes sense if this type of access happens extremely | ||
1801 | rarely. | ||
1802 | </para> | ||
1803 | </sect1> | ||
1804 | </chapter> | ||
1805 | |||
1806 | <chapter id="sleeping-things"> | ||
1807 | <title>What Functions Are Safe To Call From Interrupts?</title> | ||
1808 | |||
1809 | <para> | ||
1810 | Many functions in the kernel sleep (ie. call schedule()) | ||
1811 | directly or indirectly: you can never call them while holding a | ||
1812 | spinlock, or with preemption disabled. This also means you need | ||
1813 | to be in user context: calling them from an interrupt is illegal. | ||
1814 | </para> | ||
1815 | |||
1816 | <sect1 id="sleeping"> | ||
1817 | <title>Some Functions Which Sleep</title> | ||
1818 | |||
1819 | <para> | ||
1820 | The most common ones are listed below, but you usually have to | ||
1821 | read the code to find out if other calls are safe. If everyone | ||
1822 | else who calls it can sleep, you probably need to be able to | ||
1823 | sleep, too. In particular, registration and deregistration | ||
1824 | functions usually expect to be called from user context, and can | ||
1825 | sleep. | ||
1826 | </para> | ||
1827 | |||
1828 | <itemizedlist> | ||
1829 | <listitem> | ||
1830 | <para> | ||
1831 | Accesses to | ||
1832 | <firstterm linkend="gloss-userspace">userspace</firstterm>: | ||
1833 | </para> | ||
1834 | <itemizedlist> | ||
1835 | <listitem> | ||
1836 | <para> | ||
1837 | <function>copy_from_user()</function> | ||
1838 | </para> | ||
1839 | </listitem> | ||
1840 | <listitem> | ||
1841 | <para> | ||
1842 | <function>copy_to_user()</function> | ||
1843 | </para> | ||
1844 | </listitem> | ||
1845 | <listitem> | ||
1846 | <para> | ||
1847 | <function>get_user()</function> | ||
1848 | </para> | ||
1849 | </listitem> | ||
1850 | <listitem> | ||
1851 | <para> | ||
1852 | <function> put_user()</function> | ||
1853 | </para> | ||
1854 | </listitem> | ||
1855 | </itemizedlist> | ||
1856 | </listitem> | ||
1857 | |||
1858 | <listitem> | ||
1859 | <para> | ||
1860 | <function>kmalloc(GFP_KERNEL)</function> | ||
1861 | </para> | ||
1862 | </listitem> | ||
1863 | |||
1864 | <listitem> | ||
1865 | <para> | ||
1866 | <function>down_interruptible()</function> and | ||
1867 | <function>down()</function> | ||
1868 | </para> | ||
1869 | <para> | ||
1870 | There is a <function>down_trylock()</function> which can be | ||
1871 | used inside interrupt context, as it will not sleep. | ||
1872 | <function>up()</function> will also never sleep. | ||
1873 | </para> | ||
1874 | </listitem> | ||
1875 | </itemizedlist> | ||
1876 | </sect1> | ||
1877 | |||
1878 | <sect1 id="dont-sleep"> | ||
1879 | <title>Some Functions Which Don't Sleep</title> | ||
1880 | |||
1881 | <para> | ||
1882 | Some functions are safe to call from any context, or holding | ||
1883 | almost any lock. | ||
1884 | </para> | ||
1885 | |||
1886 | <itemizedlist> | ||
1887 | <listitem> | ||
1888 | <para> | ||
1889 | <function>printk()</function> | ||
1890 | </para> | ||
1891 | </listitem> | ||
1892 | <listitem> | ||
1893 | <para> | ||
1894 | <function>kfree()</function> | ||
1895 | </para> | ||
1896 | </listitem> | ||
1897 | <listitem> | ||
1898 | <para> | ||
1899 | <function>add_timer()</function> and <function>del_timer()</function> | ||
1900 | </para> | ||
1901 | </listitem> | ||
1902 | </itemizedlist> | ||
1903 | </sect1> | ||
1904 | </chapter> | ||
1905 | |||
1906 | <chapter id="references"> | ||
1907 | <title>Further reading</title> | ||
1908 | |||
1909 | <itemizedlist> | ||
1910 | <listitem> | ||
1911 | <para> | ||
1912 | <filename>Documentation/spinlocks.txt</filename>: | ||
1913 | Linus Torvalds' spinlocking tutorial in the kernel sources. | ||
1914 | </para> | ||
1915 | </listitem> | ||
1916 | |||
1917 | <listitem> | ||
1918 | <para> | ||
1919 | Unix Systems for Modern Architectures: Symmetric | ||
1920 | Multiprocessing and Caching for Kernel Programmers: | ||
1921 | </para> | ||
1922 | |||
1923 | <para> | ||
1924 | Curt Schimmel's very good introduction to kernel level | ||
1925 | locking (not written for Linux, but nearly everything | ||
1926 | applies). The book is expensive, but really worth every | ||
1927 | penny to understand SMP locking. [ISBN: 0201633388] | ||
1928 | </para> | ||
1929 | </listitem> | ||
1930 | </itemizedlist> | ||
1931 | </chapter> | ||
1932 | |||
1933 | <chapter id="thanks"> | ||
1934 | <title>Thanks</title> | ||
1935 | |||
1936 | <para> | ||
1937 | Thanks to Telsa Gwynne for DocBooking, neatening and adding | ||
1938 | style. | ||
1939 | </para> | ||
1940 | |||
1941 | <para> | ||
1942 | Thanks to Martin Pool, Philipp Rumpf, Stephen Rothwell, Paul | ||
1943 | Mackerras, Ruedi Aschwanden, Alan Cox, Manfred Spraul, Tim | ||
1944 | Waugh, Pete Zaitcev, James Morris, Robert Love, Paul McKenney, | ||
1945 | John Ashby for proofreading, correcting, flaming, commenting. | ||
1946 | </para> | ||
1947 | |||
1948 | <para> | ||
1949 | Thanks to the cabal for having no influence on this document. | ||
1950 | </para> | ||
1951 | </chapter> | ||
1952 | |||
1953 | <glossary id="glossary"> | ||
1954 | <title>Glossary</title> | ||
1955 | |||
1956 | <glossentry id="gloss-preemption"> | ||
1957 | <glossterm>preemption</glossterm> | ||
1958 | <glossdef> | ||
1959 | <para> | ||
1960 | Prior to 2.5, or when <symbol>CONFIG_PREEMPT</symbol> is | ||
1961 | unset, processes in user context inside the kernel would not | ||
1962 | preempt each other (ie. you had that CPU until you have it up, | ||
1963 | except for interrupts). With the addition of | ||
1964 | <symbol>CONFIG_PREEMPT</symbol> in 2.5.4, this changed: when | ||
1965 | in user context, higher priority tasks can "cut in": spinlocks | ||
1966 | were changed to disable preemption, even on UP. | ||
1967 | </para> | ||
1968 | </glossdef> | ||
1969 | </glossentry> | ||
1970 | |||
1971 | <glossentry id="gloss-bh"> | ||
1972 | <glossterm>bh</glossterm> | ||
1973 | <glossdef> | ||
1974 | <para> | ||
1975 | Bottom Half: for historical reasons, functions with | ||
1976 | '_bh' in them often now refer to any software interrupt, e.g. | ||
1977 | <function>spin_lock_bh()</function> blocks any software interrupt | ||
1978 | on the current CPU. Bottom halves are deprecated, and will | ||
1979 | eventually be replaced by tasklets. Only one bottom half will be | ||
1980 | running at any time. | ||
1981 | </para> | ||
1982 | </glossdef> | ||
1983 | </glossentry> | ||
1984 | |||
1985 | <glossentry id="gloss-hwinterrupt"> | ||
1986 | <glossterm>Hardware Interrupt / Hardware IRQ</glossterm> | ||
1987 | <glossdef> | ||
1988 | <para> | ||
1989 | Hardware interrupt request. <function>in_irq()</function> returns | ||
1990 | <returnvalue>true</returnvalue> in a hardware interrupt handler. | ||
1991 | </para> | ||
1992 | </glossdef> | ||
1993 | </glossentry> | ||
1994 | |||
1995 | <glossentry id="gloss-interruptcontext"> | ||
1996 | <glossterm>Interrupt Context</glossterm> | ||
1997 | <glossdef> | ||
1998 | <para> | ||
1999 | Not user context: processing a hardware irq or software irq. | ||
2000 | Indicated by the <function>in_interrupt()</function> macro | ||
2001 | returning <returnvalue>true</returnvalue>. | ||
2002 | </para> | ||
2003 | </glossdef> | ||
2004 | </glossentry> | ||
2005 | |||
2006 | <glossentry id="gloss-smp"> | ||
2007 | <glossterm><acronym>SMP</acronym></glossterm> | ||
2008 | <glossdef> | ||
2009 | <para> | ||
2010 | Symmetric Multi-Processor: kernels compiled for multiple-CPU | ||
2011 | machines. (CONFIG_SMP=y). | ||
2012 | </para> | ||
2013 | </glossdef> | ||
2014 | </glossentry> | ||
2015 | |||
2016 | <glossentry id="gloss-softirq"> | ||
2017 | <glossterm>Software Interrupt / softirq</glossterm> | ||
2018 | <glossdef> | ||
2019 | <para> | ||
2020 | Software interrupt handler. <function>in_irq()</function> returns | ||
2021 | <returnvalue>false</returnvalue>; <function>in_softirq()</function> | ||
2022 | returns <returnvalue>true</returnvalue>. Tasklets and softirqs | ||
2023 | both fall into the category of 'software interrupts'. | ||
2024 | </para> | ||
2025 | <para> | ||
2026 | Strictly speaking a softirq is one of up to 32 enumerated software | ||
2027 | interrupts which can run on multiple CPUs at once. | ||
2028 | Sometimes used to refer to tasklets as | ||
2029 | well (ie. all software interrupts). | ||
2030 | </para> | ||
2031 | </glossdef> | ||
2032 | </glossentry> | ||
2033 | |||
2034 | <glossentry id="gloss-tasklet"> | ||
2035 | <glossterm>tasklet</glossterm> | ||
2036 | <glossdef> | ||
2037 | <para> | ||
2038 | A dynamically-registrable software interrupt, | ||
2039 | which is guaranteed to only run on one CPU at a time. | ||
2040 | </para> | ||
2041 | </glossdef> | ||
2042 | </glossentry> | ||
2043 | |||
2044 | <glossentry id="gloss-timers"> | ||
2045 | <glossterm>timer</glossterm> | ||
2046 | <glossdef> | ||
2047 | <para> | ||
2048 | A dynamically-registrable software interrupt, which is run at | ||
2049 | (or close to) a given time. When running, it is just like a | ||
2050 | tasklet (in fact, they are called from the TIMER_SOFTIRQ). | ||
2051 | </para> | ||
2052 | </glossdef> | ||
2053 | </glossentry> | ||
2054 | |||
2055 | <glossentry id="gloss-up"> | ||
2056 | <glossterm><acronym>UP</acronym></glossterm> | ||
2057 | <glossdef> | ||
2058 | <para> | ||
2059 | Uni-Processor: Non-SMP. (CONFIG_SMP=n). | ||
2060 | </para> | ||
2061 | </glossdef> | ||
2062 | </glossentry> | ||
2063 | |||
2064 | <glossentry id="gloss-usercontext"> | ||
2065 | <glossterm>User Context</glossterm> | ||
2066 | <glossdef> | ||
2067 | <para> | ||
2068 | The kernel executing on behalf of a particular process (ie. a | ||
2069 | system call or trap) or kernel thread. You can tell which | ||
2070 | process with the <symbol>current</symbol> macro.) Not to | ||
2071 | be confused with userspace. Can be interrupted by software or | ||
2072 | hardware interrupts. | ||
2073 | </para> | ||
2074 | </glossdef> | ||
2075 | </glossentry> | ||
2076 | |||
2077 | <glossentry id="gloss-userspace"> | ||
2078 | <glossterm>Userspace</glossterm> | ||
2079 | <glossdef> | ||
2080 | <para> | ||
2081 | A process executing its own code outside the kernel. | ||
2082 | </para> | ||
2083 | </glossdef> | ||
2084 | </glossentry> | ||
2085 | |||
2086 | </glossary> | ||
2087 | </book> | ||
2088 | |||
diff --git a/Documentation/DocBook/libata.tmpl b/Documentation/DocBook/libata.tmpl new file mode 100644 index 000000000000..cf2fce7707da --- /dev/null +++ b/Documentation/DocBook/libata.tmpl | |||
@@ -0,0 +1,282 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="libataDevGuide"> | ||
6 | <bookinfo> | ||
7 | <title>libATA Developer's Guide</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Jeff</firstname> | ||
12 | <surname>Garzik</surname> | ||
13 | </author> | ||
14 | </authorgroup> | ||
15 | |||
16 | <copyright> | ||
17 | <year>2003</year> | ||
18 | <holder>Jeff Garzik</holder> | ||
19 | </copyright> | ||
20 | |||
21 | <legalnotice> | ||
22 | <para> | ||
23 | The contents of this file are subject to the Open | ||
24 | Software License version 1.1 that can be found at | ||
25 | <ulink url="http://www.opensource.org/licenses/osl-1.1.txt">http://www.opensource.org/licenses/osl-1.1.txt</ulink> and is included herein | ||
26 | by reference. | ||
27 | </para> | ||
28 | |||
29 | <para> | ||
30 | Alternatively, the contents of this file may be used under the terms | ||
31 | of the GNU General Public License version 2 (the "GPL") as distributed | ||
32 | in the kernel source COPYING file, in which case the provisions of | ||
33 | the GPL are applicable instead of the above. If you wish to allow | ||
34 | the use of your version of this file only under the terms of the | ||
35 | GPL and not to allow others to use your version of this file under | ||
36 | the OSL, indicate your decision by deleting the provisions above and | ||
37 | replace them with the notice and other provisions required by the GPL. | ||
38 | If you do not delete the provisions above, a recipient may use your | ||
39 | version of this file under either the OSL or the GPL. | ||
40 | </para> | ||
41 | |||
42 | </legalnotice> | ||
43 | </bookinfo> | ||
44 | |||
45 | <toc></toc> | ||
46 | |||
47 | <chapter id="libataThanks"> | ||
48 | <title>Thanks</title> | ||
49 | <para> | ||
50 | The bulk of the ATA knowledge comes thanks to long conversations with | ||
51 | Andre Hedrick (www.linux-ide.org). | ||
52 | </para> | ||
53 | <para> | ||
54 | Thanks to Alan Cox for pointing out similarities | ||
55 | between SATA and SCSI, and in general for motivation to hack on | ||
56 | libata. | ||
57 | </para> | ||
58 | <para> | ||
59 | libata's device detection | ||
60 | method, ata_pio_devchk, and in general all the early probing was | ||
61 | based on extensive study of Hale Landis's probe/reset code in his | ||
62 | ATADRVR driver (www.ata-atapi.com). | ||
63 | </para> | ||
64 | </chapter> | ||
65 | |||
66 | <chapter id="libataDriverApi"> | ||
67 | <title>libata Driver API</title> | ||
68 | <sect1> | ||
69 | <title>struct ata_port_operations</title> | ||
70 | |||
71 | <programlisting> | ||
72 | void (*port_disable) (struct ata_port *); | ||
73 | </programlisting> | ||
74 | |||
75 | <para> | ||
76 | Called from ata_bus_probe() and ata_bus_reset() error paths, | ||
77 | as well as when unregistering from the SCSI module (rmmod, hot | ||
78 | unplug). | ||
79 | </para> | ||
80 | |||
81 | <programlisting> | ||
82 | void (*dev_config) (struct ata_port *, struct ata_device *); | ||
83 | </programlisting> | ||
84 | |||
85 | <para> | ||
86 | Called after IDENTIFY [PACKET] DEVICE is issued to each device | ||
87 | found. Typically used to apply device-specific fixups prior to | ||
88 | issue of SET FEATURES - XFER MODE, and prior to operation. | ||
89 | </para> | ||
90 | |||
91 | <programlisting> | ||
92 | void (*set_piomode) (struct ata_port *, struct ata_device *); | ||
93 | void (*set_dmamode) (struct ata_port *, struct ata_device *); | ||
94 | void (*post_set_mode) (struct ata_port *ap); | ||
95 | </programlisting> | ||
96 | |||
97 | <para> | ||
98 | Hooks called prior to the issue of SET FEATURES - XFER MODE | ||
99 | command. dev->pio_mode is guaranteed to be valid when | ||
100 | ->set_piomode() is called, and dev->dma_mode is guaranteed to be | ||
101 | valid when ->set_dmamode() is called. ->post_set_mode() is | ||
102 | called unconditionally, after the SET FEATURES - XFER MODE | ||
103 | command completes successfully. | ||
104 | </para> | ||
105 | |||
106 | <para> | ||
107 | ->set_piomode() is always called (if present), but | ||
108 | ->set_dma_mode() is only called if DMA is possible. | ||
109 | </para> | ||
110 | |||
111 | <programlisting> | ||
112 | void (*tf_load) (struct ata_port *ap, struct ata_taskfile *tf); | ||
113 | void (*tf_read) (struct ata_port *ap, struct ata_taskfile *tf); | ||
114 | </programlisting> | ||
115 | |||
116 | <para> | ||
117 | ->tf_load() is called to load the given taskfile into hardware | ||
118 | registers / DMA buffers. ->tf_read() is called to read the | ||
119 | hardware registers / DMA buffers, to obtain the current set of | ||
120 | taskfile register values. | ||
121 | </para> | ||
122 | |||
123 | <programlisting> | ||
124 | void (*exec_command)(struct ata_port *ap, struct ata_taskfile *tf); | ||
125 | </programlisting> | ||
126 | |||
127 | <para> | ||
128 | causes an ATA command, previously loaded with | ||
129 | ->tf_load(), to be initiated in hardware. | ||
130 | </para> | ||
131 | |||
132 | <programlisting> | ||
133 | u8 (*check_status)(struct ata_port *ap); | ||
134 | void (*dev_select)(struct ata_port *ap, unsigned int device); | ||
135 | </programlisting> | ||
136 | |||
137 | <para> | ||
138 | Reads the Status ATA shadow register from hardware. On some | ||
139 | hardware, this has the side effect of clearing the interrupt | ||
140 | condition. | ||
141 | </para> | ||
142 | |||
143 | <programlisting> | ||
144 | void (*dev_select)(struct ata_port *ap, unsigned int device); | ||
145 | </programlisting> | ||
146 | |||
147 | <para> | ||
148 | Issues the low-level hardware command(s) that causes one of N | ||
149 | hardware devices to be considered 'selected' (active and | ||
150 | available for use) on the ATA bus. | ||
151 | </para> | ||
152 | |||
153 | <programlisting> | ||
154 | void (*phy_reset) (struct ata_port *ap); | ||
155 | </programlisting> | ||
156 | |||
157 | <para> | ||
158 | The very first step in the probe phase. Actions vary depending | ||
159 | on the bus type, typically. After waking up the device and probing | ||
160 | for device presence (PATA and SATA), typically a soft reset | ||
161 | (SRST) will be performed. Drivers typically use the helper | ||
162 | functions ata_bus_reset() or sata_phy_reset() for this hook. | ||
163 | </para> | ||
164 | |||
165 | <programlisting> | ||
166 | void (*bmdma_setup) (struct ata_queued_cmd *qc); | ||
167 | void (*bmdma_start) (struct ata_queued_cmd *qc); | ||
168 | </programlisting> | ||
169 | |||
170 | <para> | ||
171 | When setting up an IDE BMDMA transaction, these hooks arm | ||
172 | (->bmdma_setup) and fire (->bmdma_start) the hardware's DMA | ||
173 | engine. | ||
174 | </para> | ||
175 | |||
176 | <programlisting> | ||
177 | void (*qc_prep) (struct ata_queued_cmd *qc); | ||
178 | int (*qc_issue) (struct ata_queued_cmd *qc); | ||
179 | </programlisting> | ||
180 | |||
181 | <para> | ||
182 | Higher-level hooks, these two hooks can potentially supercede | ||
183 | several of the above taskfile/DMA engine hooks. ->qc_prep is | ||
184 | called after the buffers have been DMA-mapped, and is typically | ||
185 | used to populate the hardware's DMA scatter-gather table. | ||
186 | Most drivers use the standard ata_qc_prep() helper function, but | ||
187 | more advanced drivers roll their own. | ||
188 | </para> | ||
189 | <para> | ||
190 | ->qc_issue is used to make a command active, once the hardware | ||
191 | and S/G tables have been prepared. IDE BMDMA drivers use the | ||
192 | helper function ata_qc_issue_prot() for taskfile protocol-based | ||
193 | dispatch. More advanced drivers roll their own ->qc_issue | ||
194 | implementation, using this as the "issue new ATA command to | ||
195 | hardware" hook. | ||
196 | </para> | ||
197 | |||
198 | <programlisting> | ||
199 | void (*eng_timeout) (struct ata_port *ap); | ||
200 | </programlisting> | ||
201 | |||
202 | <para> | ||
203 | This is a high level error handling function, called from the | ||
204 | error handling thread, when a command times out. | ||
205 | </para> | ||
206 | |||
207 | <programlisting> | ||
208 | irqreturn_t (*irq_handler)(int, void *, struct pt_regs *); | ||
209 | void (*irq_clear) (struct ata_port *); | ||
210 | </programlisting> | ||
211 | |||
212 | <para> | ||
213 | ->irq_handler is the interrupt handling routine registered with | ||
214 | the system, by libata. ->irq_clear is called during probe just | ||
215 | before the interrupt handler is registered, to be sure hardware | ||
216 | is quiet. | ||
217 | </para> | ||
218 | |||
219 | <programlisting> | ||
220 | u32 (*scr_read) (struct ata_port *ap, unsigned int sc_reg); | ||
221 | void (*scr_write) (struct ata_port *ap, unsigned int sc_reg, | ||
222 | u32 val); | ||
223 | </programlisting> | ||
224 | |||
225 | <para> | ||
226 | Read and write standard SATA phy registers. Currently only used | ||
227 | if ->phy_reset hook called the sata_phy_reset() helper function. | ||
228 | </para> | ||
229 | |||
230 | <programlisting> | ||
231 | int (*port_start) (struct ata_port *ap); | ||
232 | void (*port_stop) (struct ata_port *ap); | ||
233 | void (*host_stop) (struct ata_host_set *host_set); | ||
234 | </programlisting> | ||
235 | |||
236 | <para> | ||
237 | ->port_start() is called just after the data structures for each | ||
238 | port are initialized. Typically this is used to alloc per-port | ||
239 | DMA buffers / tables / rings, enable DMA engines, and similar | ||
240 | tasks. | ||
241 | </para> | ||
242 | <para> | ||
243 | ->host_stop() is called when the rmmod or hot unplug process | ||
244 | begins. The hook must stop all hardware interrupts, DMA | ||
245 | engines, etc. | ||
246 | </para> | ||
247 | <para> | ||
248 | ->port_stop() is called after ->host_stop(). It's sole function | ||
249 | is to release DMA/memory resources, now that they are no longer | ||
250 | actively being used. | ||
251 | </para> | ||
252 | |||
253 | </sect1> | ||
254 | </chapter> | ||
255 | |||
256 | <chapter id="libataExt"> | ||
257 | <title>libata Library</title> | ||
258 | !Edrivers/scsi/libata-core.c | ||
259 | </chapter> | ||
260 | |||
261 | <chapter id="libataInt"> | ||
262 | <title>libata Core Internals</title> | ||
263 | !Idrivers/scsi/libata-core.c | ||
264 | </chapter> | ||
265 | |||
266 | <chapter id="libataScsiInt"> | ||
267 | <title>libata SCSI translation/emulation</title> | ||
268 | !Edrivers/scsi/libata-scsi.c | ||
269 | !Idrivers/scsi/libata-scsi.c | ||
270 | </chapter> | ||
271 | |||
272 | <chapter id="PiixInt"> | ||
273 | <title>ata_piix Internals</title> | ||
274 | !Idrivers/scsi/ata_piix.c | ||
275 | </chapter> | ||
276 | |||
277 | <chapter id="SILInt"> | ||
278 | <title>sata_sil Internals</title> | ||
279 | !Idrivers/scsi/sata_sil.c | ||
280 | </chapter> | ||
281 | |||
282 | </book> | ||
diff --git a/Documentation/DocBook/librs.tmpl b/Documentation/DocBook/librs.tmpl new file mode 100644 index 000000000000..3ff39bafc00e --- /dev/null +++ b/Documentation/DocBook/librs.tmpl | |||
@@ -0,0 +1,289 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="Reed-Solomon-Library-Guide"> | ||
6 | <bookinfo> | ||
7 | <title>Reed-Solomon Library Programming Interface</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Thomas</firstname> | ||
12 | <surname>Gleixner</surname> | ||
13 | <affiliation> | ||
14 | <address> | ||
15 | <email>tglx@linutronix.de</email> | ||
16 | </address> | ||
17 | </affiliation> | ||
18 | </author> | ||
19 | </authorgroup> | ||
20 | |||
21 | <copyright> | ||
22 | <year>2004</year> | ||
23 | <holder>Thomas Gleixner</holder> | ||
24 | </copyright> | ||
25 | |||
26 | <legalnotice> | ||
27 | <para> | ||
28 | This documentation is free software; you can redistribute | ||
29 | it and/or modify it under the terms of the GNU General Public | ||
30 | License version 2 as published by the Free Software Foundation. | ||
31 | </para> | ||
32 | |||
33 | <para> | ||
34 | This program is distributed in the hope that it will be | ||
35 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
36 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
37 | See the GNU General Public License for more details. | ||
38 | </para> | ||
39 | |||
40 | <para> | ||
41 | You should have received a copy of the GNU General Public | ||
42 | License along with this program; if not, write to the Free | ||
43 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
44 | MA 02111-1307 USA | ||
45 | </para> | ||
46 | |||
47 | <para> | ||
48 | For more details see the file COPYING in the source | ||
49 | distribution of Linux. | ||
50 | </para> | ||
51 | </legalnotice> | ||
52 | </bookinfo> | ||
53 | |||
54 | <toc></toc> | ||
55 | |||
56 | <chapter id="intro"> | ||
57 | <title>Introduction</title> | ||
58 | <para> | ||
59 | The generic Reed-Solomon Library provides encoding, decoding | ||
60 | and error correction functions. | ||
61 | </para> | ||
62 | <para> | ||
63 | Reed-Solomon codes are used in communication and storage | ||
64 | applications to ensure data integrity. | ||
65 | </para> | ||
66 | <para> | ||
67 | This documentation is provided for developers who want to utilize | ||
68 | the functions provided by the library. | ||
69 | </para> | ||
70 | </chapter> | ||
71 | |||
72 | <chapter id="bugs"> | ||
73 | <title>Known Bugs And Assumptions</title> | ||
74 | <para> | ||
75 | None. | ||
76 | </para> | ||
77 | </chapter> | ||
78 | |||
79 | <chapter id="usage"> | ||
80 | <title>Usage</title> | ||
81 | <para> | ||
82 | This chapter provides examples how to use the library. | ||
83 | </para> | ||
84 | <sect1> | ||
85 | <title>Initializing</title> | ||
86 | <para> | ||
87 | The init function init_rs returns a pointer to a | ||
88 | rs decoder structure, which holds the necessary | ||
89 | information for encoding, decoding and error correction | ||
90 | with the given polynomial. It either uses an existing | ||
91 | matching decoder or creates a new one. On creation all | ||
92 | the lookup tables for fast en/decoding are created. | ||
93 | The function may take a while, so make sure not to | ||
94 | call it in critical code paths. | ||
95 | </para> | ||
96 | <programlisting> | ||
97 | /* the Reed Solomon control structure */ | ||
98 | static struct rs_control *rs_decoder; | ||
99 | |||
100 | /* Symbolsize is 10 (bits) | ||
101 | * Primitve polynomial is x^10+x^3+1 | ||
102 | * first consecutive root is 0 | ||
103 | * primitve element to generate roots = 1 | ||
104 | * generator polinomial degree (number of roots) = 6 | ||
105 | */ | ||
106 | rs_decoder = init_rs (10, 0x409, 0, 1, 6); | ||
107 | </programlisting> | ||
108 | </sect1> | ||
109 | <sect1> | ||
110 | <title>Encoding</title> | ||
111 | <para> | ||
112 | The encoder calculates the Reed-Solomon code over | ||
113 | the given data length and stores the result in | ||
114 | the parity buffer. Note that the parity buffer must | ||
115 | be initialized before calling the encoder. | ||
116 | </para> | ||
117 | <para> | ||
118 | The expanded data can be inverted on the fly by | ||
119 | providing a non zero inversion mask. The expanded data is | ||
120 | XOR'ed with the mask. This is used e.g. for FLASH | ||
121 | ECC, where the all 0xFF is inverted to an all 0x00. | ||
122 | The Reed-Solomon code for all 0x00 is all 0x00. The | ||
123 | code is inverted before storing to FLASH so it is 0xFF | ||
124 | too. This prevent's that reading from an erased FLASH | ||
125 | results in ECC errors. | ||
126 | </para> | ||
127 | <para> | ||
128 | The databytes are expanded to the given symbol size | ||
129 | on the fly. There is no support for encoding continuous | ||
130 | bitstreams with a symbol size != 8 at the moment. If | ||
131 | it is necessary it should be not a big deal to implement | ||
132 | such functionality. | ||
133 | </para> | ||
134 | <programlisting> | ||
135 | /* Parity buffer. Size = number of roots */ | ||
136 | uint16_t par[6]; | ||
137 | /* Initialize the parity buffer */ | ||
138 | memset(par, 0, sizeof(par)); | ||
139 | /* Encode 512 byte in data8. Store parity in buffer par */ | ||
140 | encode_rs8 (rs_decoder, data8, 512, par, 0); | ||
141 | </programlisting> | ||
142 | </sect1> | ||
143 | <sect1> | ||
144 | <title>Decoding</title> | ||
145 | <para> | ||
146 | The decoder calculates the syndrome over | ||
147 | the given data length and the received parity symbols | ||
148 | and corrects errors in the data. | ||
149 | </para> | ||
150 | <para> | ||
151 | If a syndrome is available from a hardware decoder | ||
152 | then the syndrome calculation is skipped. | ||
153 | </para> | ||
154 | <para> | ||
155 | The correction of the data buffer can be suppressed | ||
156 | by providing a correction pattern buffer and an error | ||
157 | location buffer to the decoder. The decoder stores the | ||
158 | calculated error location and the correction bitmask | ||
159 | in the given buffers. This is useful for hardware | ||
160 | decoders which use a weird bit ordering scheme. | ||
161 | </para> | ||
162 | <para> | ||
163 | The databytes are expanded to the given symbol size | ||
164 | on the fly. There is no support for decoding continuous | ||
165 | bitstreams with a symbolsize != 8 at the moment. If | ||
166 | it is necessary it should be not a big deal to implement | ||
167 | such functionality. | ||
168 | </para> | ||
169 | |||
170 | <sect2> | ||
171 | <title> | ||
172 | Decoding with syndrome calculation, direct data correction | ||
173 | </title> | ||
174 | <programlisting> | ||
175 | /* Parity buffer. Size = number of roots */ | ||
176 | uint16_t par[6]; | ||
177 | uint8_t data[512]; | ||
178 | int numerr; | ||
179 | /* Receive data */ | ||
180 | ..... | ||
181 | /* Receive parity */ | ||
182 | ..... | ||
183 | /* Decode 512 byte in data8.*/ | ||
184 | numerr = decode_rs8 (rs_decoder, data8, par, 512, NULL, 0, NULL, 0, NULL); | ||
185 | </programlisting> | ||
186 | </sect2> | ||
187 | |||
188 | <sect2> | ||
189 | <title> | ||
190 | Decoding with syndrome given by hardware decoder, direct data correction | ||
191 | </title> | ||
192 | <programlisting> | ||
193 | /* Parity buffer. Size = number of roots */ | ||
194 | uint16_t par[6], syn[6]; | ||
195 | uint8_t data[512]; | ||
196 | int numerr; | ||
197 | /* Receive data */ | ||
198 | ..... | ||
199 | /* Receive parity */ | ||
200 | ..... | ||
201 | /* Get syndrome from hardware decoder */ | ||
202 | ..... | ||
203 | /* Decode 512 byte in data8.*/ | ||
204 | numerr = decode_rs8 (rs_decoder, data8, par, 512, syn, 0, NULL, 0, NULL); | ||
205 | </programlisting> | ||
206 | </sect2> | ||
207 | |||
208 | <sect2> | ||
209 | <title> | ||
210 | Decoding with syndrome given by hardware decoder, no direct data correction. | ||
211 | </title> | ||
212 | <para> | ||
213 | Note: It's not necessary to give data and received parity to the decoder. | ||
214 | </para> | ||
215 | <programlisting> | ||
216 | /* Parity buffer. Size = number of roots */ | ||
217 | uint16_t par[6], syn[6], corr[8]; | ||
218 | uint8_t data[512]; | ||
219 | int numerr, errpos[8]; | ||
220 | /* Receive data */ | ||
221 | ..... | ||
222 | /* Receive parity */ | ||
223 | ..... | ||
224 | /* Get syndrome from hardware decoder */ | ||
225 | ..... | ||
226 | /* Decode 512 byte in data8.*/ | ||
227 | numerr = decode_rs8 (rs_decoder, NULL, NULL, 512, syn, 0, errpos, 0, corr); | ||
228 | for (i = 0; i < numerr; i++) { | ||
229 | do_error_correction_in_your_buffer(errpos[i], corr[i]); | ||
230 | } | ||
231 | </programlisting> | ||
232 | </sect2> | ||
233 | </sect1> | ||
234 | <sect1> | ||
235 | <title>Cleanup</title> | ||
236 | <para> | ||
237 | The function free_rs frees the allocated resources, | ||
238 | if the caller is the last user of the decoder. | ||
239 | </para> | ||
240 | <programlisting> | ||
241 | /* Release resources */ | ||
242 | free_rs(rs_decoder); | ||
243 | </programlisting> | ||
244 | </sect1> | ||
245 | |||
246 | </chapter> | ||
247 | |||
248 | <chapter id="structs"> | ||
249 | <title>Structures</title> | ||
250 | <para> | ||
251 | This chapter contains the autogenerated documentation of the structures which are | ||
252 | used in the Reed-Solomon Library and are relevant for a developer. | ||
253 | </para> | ||
254 | !Iinclude/linux/rslib.h | ||
255 | </chapter> | ||
256 | |||
257 | <chapter id="pubfunctions"> | ||
258 | <title>Public Functions Provided</title> | ||
259 | <para> | ||
260 | This chapter contains the autogenerated documentation of the Reed-Solomon functions | ||
261 | which are exported. | ||
262 | </para> | ||
263 | !Elib/reed_solomon/reed_solomon.c | ||
264 | </chapter> | ||
265 | |||
266 | <chapter id="credits"> | ||
267 | <title>Credits</title> | ||
268 | <para> | ||
269 | The library code for encoding and decoding was written by Phil Karn. | ||
270 | </para> | ||
271 | <programlisting> | ||
272 | Copyright 2002, Phil Karn, KA9Q | ||
273 | May be used under the terms of the GNU General Public License (GPL) | ||
274 | </programlisting> | ||
275 | <para> | ||
276 | The wrapper functions and interfaces are written by Thomas Gleixner | ||
277 | </para> | ||
278 | <para> | ||
279 | Many users have provided bugfixes, improvements and helping hands for testing. | ||
280 | Thanks a lot. | ||
281 | </para> | ||
282 | <para> | ||
283 | The following people have contributed to this document: | ||
284 | </para> | ||
285 | <para> | ||
286 | Thomas Gleixner<email>tglx@linutronix.de</email> | ||
287 | </para> | ||
288 | </chapter> | ||
289 | </book> | ||
diff --git a/Documentation/DocBook/lsm.tmpl b/Documentation/DocBook/lsm.tmpl new file mode 100644 index 000000000000..f63822195871 --- /dev/null +++ b/Documentation/DocBook/lsm.tmpl | |||
@@ -0,0 +1,265 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <article class="whitepaper" id="LinuxSecurityModule" lang="en"> | ||
6 | <articleinfo> | ||
7 | <title>Linux Security Modules: General Security Hooks for Linux</title> | ||
8 | <authorgroup> | ||
9 | <author> | ||
10 | <firstname>Stephen</firstname> | ||
11 | <surname>Smalley</surname> | ||
12 | <affiliation> | ||
13 | <orgname>NAI Labs</orgname> | ||
14 | <address><email>ssmalley@nai.com</email></address> | ||
15 | </affiliation> | ||
16 | </author> | ||
17 | <author> | ||
18 | <firstname>Timothy</firstname> | ||
19 | <surname>Fraser</surname> | ||
20 | <affiliation> | ||
21 | <orgname>NAI Labs</orgname> | ||
22 | <address><email>tfraser@nai.com</email></address> | ||
23 | </affiliation> | ||
24 | </author> | ||
25 | <author> | ||
26 | <firstname>Chris</firstname> | ||
27 | <surname>Vance</surname> | ||
28 | <affiliation> | ||
29 | <orgname>NAI Labs</orgname> | ||
30 | <address><email>cvance@nai.com</email></address> | ||
31 | </affiliation> | ||
32 | </author> | ||
33 | </authorgroup> | ||
34 | </articleinfo> | ||
35 | |||
36 | <sect1><title>Introduction</title> | ||
37 | |||
38 | <para> | ||
39 | In March 2001, the National Security Agency (NSA) gave a presentation | ||
40 | about Security-Enhanced Linux (SELinux) at the 2.5 Linux Kernel | ||
41 | Summit. SELinux is an implementation of flexible and fine-grained | ||
42 | nondiscretionary access controls in the Linux kernel, originally | ||
43 | implemented as its own particular kernel patch. Several other | ||
44 | security projects (e.g. RSBAC, Medusa) have also developed flexible | ||
45 | access control architectures for the Linux kernel, and various | ||
46 | projects have developed particular access control models for Linux | ||
47 | (e.g. LIDS, DTE, SubDomain). Each project has developed and | ||
48 | maintained its own kernel patch to support its security needs. | ||
49 | </para> | ||
50 | |||
51 | <para> | ||
52 | In response to the NSA presentation, Linus Torvalds made a set of | ||
53 | remarks that described a security framework he would be willing to | ||
54 | consider for inclusion in the mainstream Linux kernel. He described a | ||
55 | general framework that would provide a set of security hooks to | ||
56 | control operations on kernel objects and a set of opaque security | ||
57 | fields in kernel data structures for maintaining security attributes. | ||
58 | This framework could then be used by loadable kernel modules to | ||
59 | implement any desired model of security. Linus also suggested the | ||
60 | possibility of migrating the Linux capabilities code into such a | ||
61 | module. | ||
62 | </para> | ||
63 | |||
64 | <para> | ||
65 | The Linux Security Modules (LSM) project was started by WireX to | ||
66 | develop such a framework. LSM is a joint development effort by | ||
67 | several security projects, including Immunix, SELinux, SGI and Janus, | ||
68 | and several individuals, including Greg Kroah-Hartman and James | ||
69 | Morris, to develop a Linux kernel patch that implements this | ||
70 | framework. The patch is currently tracking the 2.4 series and is | ||
71 | targeted for integration into the 2.5 development series. This | ||
72 | technical report provides an overview of the framework and the example | ||
73 | capabilities security module provided by the LSM kernel patch. | ||
74 | </para> | ||
75 | |||
76 | </sect1> | ||
77 | |||
78 | <sect1 id="framework"><title>LSM Framework</title> | ||
79 | |||
80 | <para> | ||
81 | The LSM kernel patch provides a general kernel framework to support | ||
82 | security modules. In particular, the LSM framework is primarily | ||
83 | focused on supporting access control modules, although future | ||
84 | development is likely to address other security needs such as | ||
85 | auditing. By itself, the framework does not provide any additional | ||
86 | security; it merely provides the infrastructure to support security | ||
87 | modules. The LSM kernel patch also moves most of the capabilities | ||
88 | logic into an optional security module, with the system defaulting | ||
89 | to the traditional superuser logic. This capabilities module | ||
90 | is discussed further in <xref linkend="cap"/>. | ||
91 | </para> | ||
92 | |||
93 | <para> | ||
94 | The LSM kernel patch adds security fields to kernel data structures | ||
95 | and inserts calls to hook functions at critical points in the kernel | ||
96 | code to manage the security fields and to perform access control. It | ||
97 | also adds functions for registering and unregistering security | ||
98 | modules, and adds a general <function>security</function> system call | ||
99 | to support new system calls for security-aware applications. | ||
100 | </para> | ||
101 | |||
102 | <para> | ||
103 | The LSM security fields are simply <type>void*</type> pointers. For | ||
104 | process and program execution security information, security fields | ||
105 | were added to <structname>struct task_struct</structname> and | ||
106 | <structname>struct linux_binprm</structname>. For filesystem security | ||
107 | information, a security field was added to | ||
108 | <structname>struct super_block</structname>. For pipe, file, and socket | ||
109 | security information, security fields were added to | ||
110 | <structname>struct inode</structname> and | ||
111 | <structname>struct file</structname>. For packet and network device security | ||
112 | information, security fields were added to | ||
113 | <structname>struct sk_buff</structname> and | ||
114 | <structname>struct net_device</structname>. For System V IPC security | ||
115 | information, security fields were added to | ||
116 | <structname>struct kern_ipc_perm</structname> and | ||
117 | <structname>struct msg_msg</structname>; additionally, the definitions | ||
118 | for <structname>struct msg_msg</structname>, <structname>struct | ||
119 | msg_queue</structname>, and <structname>struct | ||
120 | shmid_kernel</structname> were moved to header files | ||
121 | (<filename>include/linux/msg.h</filename> and | ||
122 | <filename>include/linux/shm.h</filename> as appropriate) to allow | ||
123 | the security modules to use these definitions. | ||
124 | </para> | ||
125 | |||
126 | <para> | ||
127 | Each LSM hook is a function pointer in a global table, | ||
128 | security_ops. This table is a | ||
129 | <structname>security_operations</structname> structure as defined by | ||
130 | <filename>include/linux/security.h</filename>. Detailed documentation | ||
131 | for each hook is included in this header file. At present, this | ||
132 | structure consists of a collection of substructures that group related | ||
133 | hooks based on the kernel object (e.g. task, inode, file, sk_buff, | ||
134 | etc) as well as some top-level hook function pointers for system | ||
135 | operations. This structure is likely to be flattened in the future | ||
136 | for performance. The placement of the hook calls in the kernel code | ||
137 | is described by the "called:" lines in the per-hook documentation in | ||
138 | the header file. The hook calls can also be easily found in the | ||
139 | kernel code by looking for the string "security_ops->". | ||
140 | |||
141 | </para> | ||
142 | |||
143 | <para> | ||
144 | Linus mentioned per-process security hooks in his original remarks as a | ||
145 | possible alternative to global security hooks. However, if LSM were | ||
146 | to start from the perspective of per-process hooks, then the base | ||
147 | framework would have to deal with how to handle operations that | ||
148 | involve multiple processes (e.g. kill), since each process might have | ||
149 | its own hook for controlling the operation. This would require a | ||
150 | general mechanism for composing hooks in the base framework. | ||
151 | Additionally, LSM would still need global hooks for operations that | ||
152 | have no process context (e.g. network input operations). | ||
153 | Consequently, LSM provides global security hooks, but a security | ||
154 | module is free to implement per-process hooks (where that makes sense) | ||
155 | by storing a security_ops table in each process' security field and | ||
156 | then invoking these per-process hooks from the global hooks. | ||
157 | The problem of composition is thus deferred to the module. | ||
158 | </para> | ||
159 | |||
160 | <para> | ||
161 | The global security_ops table is initialized to a set of hook | ||
162 | functions provided by a dummy security module that provides | ||
163 | traditional superuser logic. A <function>register_security</function> | ||
164 | function (in <filename>security/security.c</filename>) is provided to | ||
165 | allow a security module to set security_ops to refer to its own hook | ||
166 | functions, and an <function>unregister_security</function> function is | ||
167 | provided to revert security_ops to the dummy module hooks. This | ||
168 | mechanism is used to set the primary security module, which is | ||
169 | responsible for making the final decision for each hook. | ||
170 | </para> | ||
171 | |||
172 | <para> | ||
173 | LSM also provides a simple mechanism for stacking additional security | ||
174 | modules with the primary security module. It defines | ||
175 | <function>register_security</function> and | ||
176 | <function>unregister_security</function> hooks in the | ||
177 | <structname>security_operations</structname> structure and provides | ||
178 | <function>mod_reg_security</function> and | ||
179 | <function>mod_unreg_security</function> functions that invoke these | ||
180 | hooks after performing some sanity checking. A security module can | ||
181 | call these functions in order to stack with other modules. However, | ||
182 | the actual details of how this stacking is handled are deferred to the | ||
183 | module, which can implement these hooks in any way it wishes | ||
184 | (including always returning an error if it does not wish to support | ||
185 | stacking). In this manner, LSM again defers the problem of | ||
186 | composition to the module. | ||
187 | </para> | ||
188 | |||
189 | <para> | ||
190 | Although the LSM hooks are organized into substructures based on | ||
191 | kernel object, all of the hooks can be viewed as falling into two | ||
192 | major categories: hooks that are used to manage the security fields | ||
193 | and hooks that are used to perform access control. Examples of the | ||
194 | first category of hooks include the | ||
195 | <function>alloc_security</function> and | ||
196 | <function>free_security</function> hooks defined for each kernel data | ||
197 | structure that has a security field. These hooks are used to allocate | ||
198 | and free security structures for kernel objects. The first category | ||
199 | of hooks also includes hooks that set information in the security | ||
200 | field after allocation, such as the <function>post_lookup</function> | ||
201 | hook in <structname>struct inode_security_ops</structname>. This hook | ||
202 | is used to set security information for inodes after successful lookup | ||
203 | operations. An example of the second category of hooks is the | ||
204 | <function>permission</function> hook in | ||
205 | <structname>struct inode_security_ops</structname>. This hook checks | ||
206 | permission when accessing an inode. | ||
207 | </para> | ||
208 | |||
209 | </sect1> | ||
210 | |||
211 | <sect1 id="cap"><title>LSM Capabilities Module</title> | ||
212 | |||
213 | <para> | ||
214 | The LSM kernel patch moves most of the existing POSIX.1e capabilities | ||
215 | logic into an optional security module stored in the file | ||
216 | <filename>security/capability.c</filename>. This change allows | ||
217 | users who do not want to use capabilities to omit this code entirely | ||
218 | from their kernel, instead using the dummy module for traditional | ||
219 | superuser logic or any other module that they desire. This change | ||
220 | also allows the developers of the capabilities logic to maintain and | ||
221 | enhance their code more freely, without needing to integrate patches | ||
222 | back into the base kernel. | ||
223 | </para> | ||
224 | |||
225 | <para> | ||
226 | In addition to moving the capabilities logic, the LSM kernel patch | ||
227 | could move the capability-related fields from the kernel data | ||
228 | structures into the new security fields managed by the security | ||
229 | modules. However, at present, the LSM kernel patch leaves the | ||
230 | capability fields in the kernel data structures. In his original | ||
231 | remarks, Linus suggested that this might be preferable so that other | ||
232 | security modules can be easily stacked with the capabilities module | ||
233 | without needing to chain multiple security structures on the security field. | ||
234 | It also avoids imposing extra overhead on the capabilities module | ||
235 | to manage the security fields. However, the LSM framework could | ||
236 | certainly support such a move if it is determined to be desirable, | ||
237 | with only a few additional changes described below. | ||
238 | </para> | ||
239 | |||
240 | <para> | ||
241 | At present, the capabilities logic for computing process capabilities | ||
242 | on <function>execve</function> and <function>set*uid</function>, | ||
243 | checking capabilities for a particular process, saving and checking | ||
244 | capabilities for netlink messages, and handling the | ||
245 | <function>capget</function> and <function>capset</function> system | ||
246 | calls have been moved into the capabilities module. There are still a | ||
247 | few locations in the base kernel where capability-related fields are | ||
248 | directly examined or modified, but the current version of the LSM | ||
249 | patch does allow a security module to completely replace the | ||
250 | assignment and testing of capabilities. These few locations would | ||
251 | need to be changed if the capability-related fields were moved into | ||
252 | the security field. The following is a list of known locations that | ||
253 | still perform such direct examination or modification of | ||
254 | capability-related fields: | ||
255 | <itemizedlist> | ||
256 | <listitem><para><filename>fs/open.c</filename>:<function>sys_access</function></para></listitem> | ||
257 | <listitem><para><filename>fs/lockd/host.c</filename>:<function>nlm_bind_host</function></para></listitem> | ||
258 | <listitem><para><filename>fs/nfsd/auth.c</filename>:<function>nfsd_setuser</function></para></listitem> | ||
259 | <listitem><para><filename>fs/proc/array.c</filename>:<function>task_cap</function></para></listitem> | ||
260 | </itemizedlist> | ||
261 | </para> | ||
262 | |||
263 | </sect1> | ||
264 | |||
265 | </article> | ||
diff --git a/Documentation/DocBook/man/Makefile b/Documentation/DocBook/man/Makefile new file mode 100644 index 000000000000..4fb7ea0f7ac8 --- /dev/null +++ b/Documentation/DocBook/man/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | # Rules are put in Documentation/DocBook | ||
2 | |||
3 | clean-files := *.9.gz *.sgml manpage.links manpage.refs | ||
diff --git a/Documentation/DocBook/mcabook.tmpl b/Documentation/DocBook/mcabook.tmpl new file mode 100644 index 000000000000..4367f4642f3d --- /dev/null +++ b/Documentation/DocBook/mcabook.tmpl | |||
@@ -0,0 +1,107 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="MCAGuide"> | ||
6 | <bookinfo> | ||
7 | <title>MCA Driver Programming Interface</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Alan</firstname> | ||
12 | <surname>Cox</surname> | ||
13 | <affiliation> | ||
14 | <address> | ||
15 | <email>alan@redhat.com</email> | ||
16 | </address> | ||
17 | </affiliation> | ||
18 | </author> | ||
19 | <author> | ||
20 | <firstname>David</firstname> | ||
21 | <surname>Weinehall</surname> | ||
22 | </author> | ||
23 | <author> | ||
24 | <firstname>Chris</firstname> | ||
25 | <surname>Beauregard</surname> | ||
26 | </author> | ||
27 | </authorgroup> | ||
28 | |||
29 | <copyright> | ||
30 | <year>2000</year> | ||
31 | <holder>Alan Cox</holder> | ||
32 | <holder>David Weinehall</holder> | ||
33 | <holder>Chris Beauregard</holder> | ||
34 | </copyright> | ||
35 | |||
36 | <legalnotice> | ||
37 | <para> | ||
38 | This documentation is free software; you can redistribute | ||
39 | it and/or modify it under the terms of the GNU General Public | ||
40 | License as published by the Free Software Foundation; either | ||
41 | version 2 of the License, or (at your option) any later | ||
42 | version. | ||
43 | </para> | ||
44 | |||
45 | <para> | ||
46 | This program is distributed in the hope that it will be | ||
47 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
48 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
49 | See the GNU General Public License for more details. | ||
50 | </para> | ||
51 | |||
52 | <para> | ||
53 | You should have received a copy of the GNU General Public | ||
54 | License along with this program; if not, write to the Free | ||
55 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
56 | MA 02111-1307 USA | ||
57 | </para> | ||
58 | |||
59 | <para> | ||
60 | For more details see the file COPYING in the source | ||
61 | distribution of Linux. | ||
62 | </para> | ||
63 | </legalnotice> | ||
64 | </bookinfo> | ||
65 | |||
66 | <toc></toc> | ||
67 | |||
68 | <chapter id="intro"> | ||
69 | <title>Introduction</title> | ||
70 | <para> | ||
71 | The MCA bus functions provide a generalised interface to find MCA | ||
72 | bus cards, to claim them for a driver, and to read and manipulate POS | ||
73 | registers without being aware of the motherboard internals or | ||
74 | certain deep magic specific to onboard devices. | ||
75 | </para> | ||
76 | <para> | ||
77 | The basic interface to the MCA bus devices is the slot. Each slot | ||
78 | is numbered and virtual slot numbers are assigned to the internal | ||
79 | devices. Using a pci_dev as other busses do does not really make | ||
80 | sense in the MCA context as the MCA bus resources require card | ||
81 | specific interpretation. | ||
82 | </para> | ||
83 | <para> | ||
84 | Finally the MCA bus functions provide a parallel set of DMA | ||
85 | functions mimicing the ISA bus DMA functions as closely as possible, | ||
86 | although also supporting the additional DMA functionality on the | ||
87 | MCA bus controllers. | ||
88 | </para> | ||
89 | </chapter> | ||
90 | <chapter id="bugs"> | ||
91 | <title>Known Bugs And Assumptions</title> | ||
92 | <para> | ||
93 | None. | ||
94 | </para> | ||
95 | </chapter> | ||
96 | |||
97 | <chapter id="pubfunctions"> | ||
98 | <title>Public Functions Provided</title> | ||
99 | !Earch/i386/kernel/mca.c | ||
100 | </chapter> | ||
101 | |||
102 | <chapter id="dmafunctions"> | ||
103 | <title>DMA Functions Provided</title> | ||
104 | !Iinclude/asm-i386/mca_dma.h | ||
105 | </chapter> | ||
106 | |||
107 | </book> | ||
diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl new file mode 100644 index 000000000000..6e463d0db266 --- /dev/null +++ b/Documentation/DocBook/mtdnand.tmpl | |||
@@ -0,0 +1,1320 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="MTD-NAND-Guide"> | ||
6 | <bookinfo> | ||
7 | <title>MTD NAND Driver Programming Interface</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Thomas</firstname> | ||
12 | <surname>Gleixner</surname> | ||
13 | <affiliation> | ||
14 | <address> | ||
15 | <email>tglx@linutronix.de</email> | ||
16 | </address> | ||
17 | </affiliation> | ||
18 | </author> | ||
19 | </authorgroup> | ||
20 | |||
21 | <copyright> | ||
22 | <year>2004</year> | ||
23 | <holder>Thomas Gleixner</holder> | ||
24 | </copyright> | ||
25 | |||
26 | <legalnotice> | ||
27 | <para> | ||
28 | This documentation is free software; you can redistribute | ||
29 | it and/or modify it under the terms of the GNU General Public | ||
30 | License version 2 as published by the Free Software Foundation. | ||
31 | </para> | ||
32 | |||
33 | <para> | ||
34 | This program is distributed in the hope that it will be | ||
35 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
36 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
37 | See the GNU General Public License for more details. | ||
38 | </para> | ||
39 | |||
40 | <para> | ||
41 | You should have received a copy of the GNU General Public | ||
42 | License along with this program; if not, write to the Free | ||
43 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
44 | MA 02111-1307 USA | ||
45 | </para> | ||
46 | |||
47 | <para> | ||
48 | For more details see the file COPYING in the source | ||
49 | distribution of Linux. | ||
50 | </para> | ||
51 | </legalnotice> | ||
52 | </bookinfo> | ||
53 | |||
54 | <toc></toc> | ||
55 | |||
56 | <chapter id="intro"> | ||
57 | <title>Introduction</title> | ||
58 | <para> | ||
59 | The generic NAND driver supports almost all NAND and AG-AND based | ||
60 | chips and connects them to the Memory Technology Devices (MTD) | ||
61 | subsystem of the Linux Kernel. | ||
62 | </para> | ||
63 | <para> | ||
64 | This documentation is provided for developers who want to implement | ||
65 | board drivers or filesystem drivers suitable for NAND devices. | ||
66 | </para> | ||
67 | </chapter> | ||
68 | |||
69 | <chapter id="bugs"> | ||
70 | <title>Known Bugs And Assumptions</title> | ||
71 | <para> | ||
72 | None. | ||
73 | </para> | ||
74 | </chapter> | ||
75 | |||
76 | <chapter id="dochints"> | ||
77 | <title>Documentation hints</title> | ||
78 | <para> | ||
79 | The function and structure docs are autogenerated. Each function and | ||
80 | struct member has a short description which is marked with an [XXX] identifier. | ||
81 | The following chapters explain the meaning of those identifiers. | ||
82 | </para> | ||
83 | <sect1> | ||
84 | <title>Function identifiers [XXX]</title> | ||
85 | <para> | ||
86 | The functions are marked with [XXX] identifiers in the short | ||
87 | comment. The identifiers explain the usage and scope of the | ||
88 | functions. Following identifiers are used: | ||
89 | </para> | ||
90 | <itemizedlist> | ||
91 | <listitem><para> | ||
92 | [MTD Interface]</para><para> | ||
93 | These functions provide the interface to the MTD kernel API. | ||
94 | They are not replacable and provide functionality | ||
95 | which is complete hardware independent. | ||
96 | </para></listitem> | ||
97 | <listitem><para> | ||
98 | [NAND Interface]</para><para> | ||
99 | These functions are exported and provide the interface to the NAND kernel API. | ||
100 | </para></listitem> | ||
101 | <listitem><para> | ||
102 | [GENERIC]</para><para> | ||
103 | Generic functions are not replacable and provide functionality | ||
104 | which is complete hardware independent. | ||
105 | </para></listitem> | ||
106 | <listitem><para> | ||
107 | [DEFAULT]</para><para> | ||
108 | Default functions provide hardware related functionality which is suitable | ||
109 | for most of the implementations. These functions can be replaced by the | ||
110 | board driver if neccecary. Those functions are called via pointers in the | ||
111 | NAND chip description structure. The board driver can set the functions which | ||
112 | should be replaced by board dependend functions before calling nand_scan(). | ||
113 | If the function pointer is NULL on entry to nand_scan() then the pointer | ||
114 | is set to the default function which is suitable for the detected chip type. | ||
115 | </para></listitem> | ||
116 | </itemizedlist> | ||
117 | </sect1> | ||
118 | <sect1> | ||
119 | <title>Struct member identifiers [XXX]</title> | ||
120 | <para> | ||
121 | The struct members are marked with [XXX] identifiers in the | ||
122 | comment. The identifiers explain the usage and scope of the | ||
123 | members. Following identifiers are used: | ||
124 | </para> | ||
125 | <itemizedlist> | ||
126 | <listitem><para> | ||
127 | [INTERN]</para><para> | ||
128 | These members are for NAND driver internal use only and must not be | ||
129 | modified. Most of these values are calculated from the chip geometry | ||
130 | information which is evaluated during nand_scan(). | ||
131 | </para></listitem> | ||
132 | <listitem><para> | ||
133 | [REPLACEABLE]</para><para> | ||
134 | Replaceable members hold hardware related functions which can be | ||
135 | provided by the board driver. The board driver can set the functions which | ||
136 | should be replaced by board dependend functions before calling nand_scan(). | ||
137 | If the function pointer is NULL on entry to nand_scan() then the pointer | ||
138 | is set to the default function which is suitable for the detected chip type. | ||
139 | </para></listitem> | ||
140 | <listitem><para> | ||
141 | [BOARDSPECIFIC]</para><para> | ||
142 | Board specific members hold hardware related information which must | ||
143 | be provided by the board driver. The board driver must set the function | ||
144 | pointers and datafields before calling nand_scan(). | ||
145 | </para></listitem> | ||
146 | <listitem><para> | ||
147 | [OPTIONAL]</para><para> | ||
148 | Optional members can hold information relevant for the board driver. The | ||
149 | generic NAND driver code does not use this information. | ||
150 | </para></listitem> | ||
151 | </itemizedlist> | ||
152 | </sect1> | ||
153 | </chapter> | ||
154 | |||
155 | <chapter id="basicboarddriver"> | ||
156 | <title>Basic board driver</title> | ||
157 | <para> | ||
158 | For most boards it will be sufficient to provide just the | ||
159 | basic functions and fill out some really board dependend | ||
160 | members in the nand chip description structure. | ||
161 | See drivers/mtd/nand/skeleton for reference. | ||
162 | </para> | ||
163 | <sect1> | ||
164 | <title>Basic defines</title> | ||
165 | <para> | ||
166 | At least you have to provide a mtd structure and | ||
167 | a storage for the ioremap'ed chip address. | ||
168 | You can allocate the mtd structure using kmalloc | ||
169 | or you can allocate it statically. | ||
170 | In case of static allocation you have to allocate | ||
171 | a nand_chip structure too. | ||
172 | </para> | ||
173 | <para> | ||
174 | Kmalloc based example | ||
175 | </para> | ||
176 | <programlisting> | ||
177 | static struct mtd_info *board_mtd; | ||
178 | static unsigned long baseaddr; | ||
179 | </programlisting> | ||
180 | <para> | ||
181 | Static example | ||
182 | </para> | ||
183 | <programlisting> | ||
184 | static struct mtd_info board_mtd; | ||
185 | static struct nand_chip board_chip; | ||
186 | static unsigned long baseaddr; | ||
187 | </programlisting> | ||
188 | </sect1> | ||
189 | <sect1> | ||
190 | <title>Partition defines</title> | ||
191 | <para> | ||
192 | If you want to divide your device into parititions, then | ||
193 | enable the configuration switch CONFIG_MTD_PARITIONS and define | ||
194 | a paritioning scheme suitable to your board. | ||
195 | </para> | ||
196 | <programlisting> | ||
197 | #define NUM_PARTITIONS 2 | ||
198 | static struct mtd_partition partition_info[] = { | ||
199 | { .name = "Flash partition 1", | ||
200 | .offset = 0, | ||
201 | .size = 8 * 1024 * 1024 }, | ||
202 | { .name = "Flash partition 2", | ||
203 | .offset = MTDPART_OFS_NEXT, | ||
204 | .size = MTDPART_SIZ_FULL }, | ||
205 | }; | ||
206 | </programlisting> | ||
207 | </sect1> | ||
208 | <sect1> | ||
209 | <title>Hardware control function</title> | ||
210 | <para> | ||
211 | The hardware control function provides access to the | ||
212 | control pins of the NAND chip(s). | ||
213 | The access can be done by GPIO pins or by address lines. | ||
214 | If you use address lines, make sure that the timing | ||
215 | requirements are met. | ||
216 | </para> | ||
217 | <para> | ||
218 | <emphasis>GPIO based example</emphasis> | ||
219 | </para> | ||
220 | <programlisting> | ||
221 | static void board_hwcontrol(struct mtd_info *mtd, int cmd) | ||
222 | { | ||
223 | switch(cmd){ | ||
224 | case NAND_CTL_SETCLE: /* Set CLE pin high */ break; | ||
225 | case NAND_CTL_CLRCLE: /* Set CLE pin low */ break; | ||
226 | case NAND_CTL_SETALE: /* Set ALE pin high */ break; | ||
227 | case NAND_CTL_CLRALE: /* Set ALE pin low */ break; | ||
228 | case NAND_CTL_SETNCE: /* Set nCE pin low */ break; | ||
229 | case NAND_CTL_CLRNCE: /* Set nCE pin high */ break; | ||
230 | } | ||
231 | } | ||
232 | </programlisting> | ||
233 | <para> | ||
234 | <emphasis>Address lines based example.</emphasis> It's assumed that the | ||
235 | nCE pin is driven by a chip select decoder. | ||
236 | </para> | ||
237 | <programlisting> | ||
238 | static void board_hwcontrol(struct mtd_info *mtd, int cmd) | ||
239 | { | ||
240 | struct nand_chip *this = (struct nand_chip *) mtd->priv; | ||
241 | switch(cmd){ | ||
242 | case NAND_CTL_SETCLE: this->IO_ADDR_W |= CLE_ADRR_BIT; break; | ||
243 | case NAND_CTL_CLRCLE: this->IO_ADDR_W &= ~CLE_ADRR_BIT; break; | ||
244 | case NAND_CTL_SETALE: this->IO_ADDR_W |= ALE_ADRR_BIT; break; | ||
245 | case NAND_CTL_CLRALE: this->IO_ADDR_W &= ~ALE_ADRR_BIT; break; | ||
246 | } | ||
247 | } | ||
248 | </programlisting> | ||
249 | </sect1> | ||
250 | <sect1> | ||
251 | <title>Device ready function</title> | ||
252 | <para> | ||
253 | If the hardware interface has the ready busy pin of the NAND chip connected to a | ||
254 | GPIO or other accesible I/O pin, this function is used to read back the state of the | ||
255 | pin. The function has no arguments and should return 0, if the device is busy (R/B pin | ||
256 | is low) and 1, if the device is ready (R/B pin is high). | ||
257 | If the hardware interface does not give access to the ready busy pin, then | ||
258 | the function must not be defined and the function pointer this->dev_ready is set to NULL. | ||
259 | </para> | ||
260 | </sect1> | ||
261 | <sect1> | ||
262 | <title>Init function</title> | ||
263 | <para> | ||
264 | The init function allocates memory and sets up all the board | ||
265 | specific parameters and function pointers. When everything | ||
266 | is set up nand_scan() is called. This function tries to | ||
267 | detect and identify then chip. If a chip is found all the | ||
268 | internal data fields are initialized accordingly. | ||
269 | The structure(s) have to be zeroed out first and then filled with the neccecary | ||
270 | information about the device. | ||
271 | </para> | ||
272 | <programlisting> | ||
273 | int __init board_init (void) | ||
274 | { | ||
275 | struct nand_chip *this; | ||
276 | int err = 0; | ||
277 | |||
278 | /* Allocate memory for MTD device structure and private data */ | ||
279 | board_mtd = kmalloc (sizeof(struct mtd_info) + sizeof (struct nand_chip), GFP_KERNEL); | ||
280 | if (!board_mtd) { | ||
281 | printk ("Unable to allocate NAND MTD device structure.\n"); | ||
282 | err = -ENOMEM; | ||
283 | goto out; | ||
284 | } | ||
285 | |||
286 | /* Initialize structures */ | ||
287 | memset ((char *) board_mtd, 0, sizeof(struct mtd_info) + sizeof(struct nand_chip)); | ||
288 | |||
289 | /* map physical adress */ | ||
290 | baseaddr = (unsigned long)ioremap(CHIP_PHYSICAL_ADDRESS, 1024); | ||
291 | if(!baseaddr){ | ||
292 | printk("Ioremap to access NAND chip failed\n"); | ||
293 | err = -EIO; | ||
294 | goto out_mtd; | ||
295 | } | ||
296 | |||
297 | /* Get pointer to private data */ | ||
298 | this = (struct nand_chip *) (); | ||
299 | /* Link the private data with the MTD structure */ | ||
300 | board_mtd->priv = this; | ||
301 | |||
302 | /* Set address of NAND IO lines */ | ||
303 | this->IO_ADDR_R = baseaddr; | ||
304 | this->IO_ADDR_W = baseaddr; | ||
305 | /* Reference hardware control function */ | ||
306 | this->hwcontrol = board_hwcontrol; | ||
307 | /* Set command delay time, see datasheet for correct value */ | ||
308 | this->chip_delay = CHIP_DEPENDEND_COMMAND_DELAY; | ||
309 | /* Assign the device ready function, if available */ | ||
310 | this->dev_ready = board_dev_ready; | ||
311 | this->eccmode = NAND_ECC_SOFT; | ||
312 | |||
313 | /* Scan to find existance of the device */ | ||
314 | if (nand_scan (board_mtd, 1)) { | ||
315 | err = -ENXIO; | ||
316 | goto out_ior; | ||
317 | } | ||
318 | |||
319 | add_mtd_partitions(board_mtd, partition_info, NUM_PARTITIONS); | ||
320 | goto out; | ||
321 | |||
322 | out_ior: | ||
323 | iounmap((void *)baseaddr); | ||
324 | out_mtd: | ||
325 | kfree (board_mtd); | ||
326 | out: | ||
327 | return err; | ||
328 | } | ||
329 | module_init(board_init); | ||
330 | </programlisting> | ||
331 | </sect1> | ||
332 | <sect1> | ||
333 | <title>Exit function</title> | ||
334 | <para> | ||
335 | The exit function is only neccecary if the driver is | ||
336 | compiled as a module. It releases all resources which | ||
337 | are held by the chip driver and unregisters the partitions | ||
338 | in the MTD layer. | ||
339 | </para> | ||
340 | <programlisting> | ||
341 | #ifdef MODULE | ||
342 | static void __exit board_cleanup (void) | ||
343 | { | ||
344 | /* Release resources, unregister device */ | ||
345 | nand_release (board_mtd); | ||
346 | |||
347 | /* unmap physical adress */ | ||
348 | iounmap((void *)baseaddr); | ||
349 | |||
350 | /* Free the MTD device structure */ | ||
351 | kfree (board_mtd); | ||
352 | } | ||
353 | module_exit(board_cleanup); | ||
354 | #endif | ||
355 | </programlisting> | ||
356 | </sect1> | ||
357 | </chapter> | ||
358 | |||
359 | <chapter id="boarddriversadvanced"> | ||
360 | <title>Advanced board driver functions</title> | ||
361 | <para> | ||
362 | This chapter describes the advanced functionality of the NAND | ||
363 | driver. For a list of functions which can be overridden by the board | ||
364 | driver see the documentation of the nand_chip structure. | ||
365 | </para> | ||
366 | <sect1> | ||
367 | <title>Multiple chip control</title> | ||
368 | <para> | ||
369 | The nand driver can control chip arrays. Therefor the | ||
370 | board driver must provide an own select_chip function. This | ||
371 | function must (de)select the requested chip. | ||
372 | The function pointer in the nand_chip structure must | ||
373 | be set before calling nand_scan(). The maxchip parameter | ||
374 | of nand_scan() defines the maximum number of chips to | ||
375 | scan for. Make sure that the select_chip function can | ||
376 | handle the requested number of chips. | ||
377 | </para> | ||
378 | <para> | ||
379 | The nand driver concatenates the chips to one virtual | ||
380 | chip and provides this virtual chip to the MTD layer. | ||
381 | </para> | ||
382 | <para> | ||
383 | <emphasis>Note: The driver can only handle linear chip arrays | ||
384 | of equally sized chips. There is no support for | ||
385 | parallel arrays which extend the buswidth.</emphasis> | ||
386 | </para> | ||
387 | <para> | ||
388 | <emphasis>GPIO based example</emphasis> | ||
389 | </para> | ||
390 | <programlisting> | ||
391 | static void board_select_chip (struct mtd_info *mtd, int chip) | ||
392 | { | ||
393 | /* Deselect all chips, set all nCE pins high */ | ||
394 | GPIO(BOARD_NAND_NCE) |= 0xff; | ||
395 | if (chip >= 0) | ||
396 | GPIO(BOARD_NAND_NCE) &= ~ (1 << chip); | ||
397 | } | ||
398 | </programlisting> | ||
399 | <para> | ||
400 | <emphasis>Address lines based example.</emphasis> | ||
401 | Its assumed that the nCE pins are connected to an | ||
402 | address decoder. | ||
403 | </para> | ||
404 | <programlisting> | ||
405 | static void board_select_chip (struct mtd_info *mtd, int chip) | ||
406 | { | ||
407 | struct nand_chip *this = (struct nand_chip *) mtd->priv; | ||
408 | |||
409 | /* Deselect all chips */ | ||
410 | this->IO_ADDR_R &= ~BOARD_NAND_ADDR_MASK; | ||
411 | this->IO_ADDR_W &= ~BOARD_NAND_ADDR_MASK; | ||
412 | switch (chip) { | ||
413 | case 0: | ||
414 | this->IO_ADDR_R |= BOARD_NAND_ADDR_CHIP0; | ||
415 | this->IO_ADDR_W |= BOARD_NAND_ADDR_CHIP0; | ||
416 | break; | ||
417 | .... | ||
418 | case n: | ||
419 | this->IO_ADDR_R |= BOARD_NAND_ADDR_CHIPn; | ||
420 | this->IO_ADDR_W |= BOARD_NAND_ADDR_CHIPn; | ||
421 | break; | ||
422 | } | ||
423 | } | ||
424 | </programlisting> | ||
425 | </sect1> | ||
426 | <sect1> | ||
427 | <title>Hardware ECC support</title> | ||
428 | <sect2> | ||
429 | <title>Functions and constants</title> | ||
430 | <para> | ||
431 | The nand driver supports three different types of | ||
432 | hardware ECC. | ||
433 | <itemizedlist> | ||
434 | <listitem><para>NAND_ECC_HW3_256</para><para> | ||
435 | Hardware ECC generator providing 3 bytes ECC per | ||
436 | 256 byte. | ||
437 | </para> </listitem> | ||
438 | <listitem><para>NAND_ECC_HW3_512</para><para> | ||
439 | Hardware ECC generator providing 3 bytes ECC per | ||
440 | 512 byte. | ||
441 | </para> </listitem> | ||
442 | <listitem><para>NAND_ECC_HW6_512</para><para> | ||
443 | Hardware ECC generator providing 6 bytes ECC per | ||
444 | 512 byte. | ||
445 | </para> </listitem> | ||
446 | <listitem><para>NAND_ECC_HW8_512</para><para> | ||
447 | Hardware ECC generator providing 6 bytes ECC per | ||
448 | 512 byte. | ||
449 | </para> </listitem> | ||
450 | </itemizedlist> | ||
451 | If your hardware generator has a different functionality | ||
452 | add it at the appropriate place in nand_base.c | ||
453 | </para> | ||
454 | <para> | ||
455 | The board driver must provide following functions: | ||
456 | <itemizedlist> | ||
457 | <listitem><para>enable_hwecc</para><para> | ||
458 | This function is called before reading / writing to | ||
459 | the chip. Reset or initialize the hardware generator | ||
460 | in this function. The function is called with an | ||
461 | argument which let you distinguish between read | ||
462 | and write operations. | ||
463 | </para> </listitem> | ||
464 | <listitem><para>calculate_ecc</para><para> | ||
465 | This function is called after read / write from / to | ||
466 | the chip. Transfer the ECC from the hardware to | ||
467 | the buffer. If the option NAND_HWECC_SYNDROME is set | ||
468 | then the function is only called on write. See below. | ||
469 | </para> </listitem> | ||
470 | <listitem><para>correct_data</para><para> | ||
471 | In case of an ECC error this function is called for | ||
472 | error detection and correction. Return 1 respectively 2 | ||
473 | in case the error can be corrected. If the error is | ||
474 | not correctable return -1. If your hardware generator | ||
475 | matches the default algorithm of the nand_ecc software | ||
476 | generator then use the correction function provided | ||
477 | by nand_ecc instead of implementing duplicated code. | ||
478 | </para> </listitem> | ||
479 | </itemizedlist> | ||
480 | </para> | ||
481 | </sect2> | ||
482 | <sect2> | ||
483 | <title>Hardware ECC with syndrome calculation</title> | ||
484 | <para> | ||
485 | Many hardware ECC implementations provide Reed-Solomon | ||
486 | codes and calculate an error syndrome on read. The syndrome | ||
487 | must be converted to a standard Reed-Solomon syndrome | ||
488 | before calling the error correction code in the generic | ||
489 | Reed-Solomon library. | ||
490 | </para> | ||
491 | <para> | ||
492 | The ECC bytes must be placed immidiately after the data | ||
493 | bytes in order to make the syndrome generator work. This | ||
494 | is contrary to the usual layout used by software ECC. The | ||
495 | seperation of data and out of band area is not longer | ||
496 | possible. The nand driver code handles this layout and | ||
497 | the remaining free bytes in the oob area are managed by | ||
498 | the autoplacement code. Provide a matching oob-layout | ||
499 | in this case. See rts_from4.c and diskonchip.c for | ||
500 | implementation reference. In those cases we must also | ||
501 | use bad block tables on FLASH, because the ECC layout is | ||
502 | interferring with the bad block marker positions. | ||
503 | See bad block table support for details. | ||
504 | </para> | ||
505 | </sect2> | ||
506 | </sect1> | ||
507 | <sect1> | ||
508 | <title>Bad block table support</title> | ||
509 | <para> | ||
510 | Most NAND chips mark the bad blocks at a defined | ||
511 | position in the spare area. Those blocks must | ||
512 | not be erased under any circumstances as the bad | ||
513 | block information would be lost. | ||
514 | It is possible to check the bad block mark each | ||
515 | time when the blocks are accessed by reading the | ||
516 | spare area of the first page in the block. This | ||
517 | is time consuming so a bad block table is used. | ||
518 | </para> | ||
519 | <para> | ||
520 | The nand driver supports various types of bad block | ||
521 | tables. | ||
522 | <itemizedlist> | ||
523 | <listitem><para>Per device</para><para> | ||
524 | The bad block table contains all bad block information | ||
525 | of the device which can consist of multiple chips. | ||
526 | </para> </listitem> | ||
527 | <listitem><para>Per chip</para><para> | ||
528 | A bad block table is used per chip and contains the | ||
529 | bad block information for this particular chip. | ||
530 | </para> </listitem> | ||
531 | <listitem><para>Fixed offset</para><para> | ||
532 | The bad block table is located at a fixed offset | ||
533 | in the chip (device). This applies to various | ||
534 | DiskOnChip devices. | ||
535 | </para> </listitem> | ||
536 | <listitem><para>Automatic placed</para><para> | ||
537 | The bad block table is automatically placed and | ||
538 | detected either at the end or at the beginning | ||
539 | of a chip (device) | ||
540 | </para> </listitem> | ||
541 | <listitem><para>Mirrored tables</para><para> | ||
542 | The bad block table is mirrored on the chip (device) to | ||
543 | allow updates of the bad block table without data loss. | ||
544 | </para> </listitem> | ||
545 | </itemizedlist> | ||
546 | </para> | ||
547 | <para> | ||
548 | nand_scan() calls the function nand_default_bbt(). | ||
549 | nand_default_bbt() selects appropriate default | ||
550 | bad block table desriptors depending on the chip information | ||
551 | which was retrieved by nand_scan(). | ||
552 | </para> | ||
553 | <para> | ||
554 | The standard policy is scanning the device for bad | ||
555 | blocks and build a ram based bad block table which | ||
556 | allows faster access than always checking the | ||
557 | bad block information on the flash chip itself. | ||
558 | </para> | ||
559 | <sect2> | ||
560 | <title>Flash based tables</title> | ||
561 | <para> | ||
562 | It may be desired or neccecary to keep a bad block table in FLASH. | ||
563 | For AG-AND chips this is mandatory, as they have no factory marked | ||
564 | bad blocks. They have factory marked good blocks. The marker pattern | ||
565 | is erased when the block is erased to be reused. So in case of | ||
566 | powerloss before writing the pattern back to the chip this block | ||
567 | would be lost and added to the bad blocks. Therefor we scan the | ||
568 | chip(s) when we detect them the first time for good blocks and | ||
569 | store this information in a bad block table before erasing any | ||
570 | of the blocks. | ||
571 | </para> | ||
572 | <para> | ||
573 | The blocks in which the tables are stored are procteted against | ||
574 | accidental access by marking them bad in the memory bad block | ||
575 | table. The bad block table managment functions are allowed | ||
576 | to circumvernt this protection. | ||
577 | </para> | ||
578 | <para> | ||
579 | The simplest way to activate the FLASH based bad block table support | ||
580 | is to set the option NAND_USE_FLASH_BBT in the option field of | ||
581 | the nand chip structure before calling nand_scan(). For AG-AND | ||
582 | chips is this done by default. | ||
583 | This activates the default FLASH based bad block table functionality | ||
584 | of the NAND driver. The default bad block table options are | ||
585 | <itemizedlist> | ||
586 | <listitem><para>Store bad block table per chip</para></listitem> | ||
587 | <listitem><para>Use 2 bits per block</para></listitem> | ||
588 | <listitem><para>Automatic placement at the end of the chip</para></listitem> | ||
589 | <listitem><para>Use mirrored tables with version numbers</para></listitem> | ||
590 | <listitem><para>Reserve 4 blocks at the end of the chip</para></listitem> | ||
591 | </itemizedlist> | ||
592 | </para> | ||
593 | </sect2> | ||
594 | <sect2> | ||
595 | <title>User defined tables</title> | ||
596 | <para> | ||
597 | User defined tables are created by filling out a | ||
598 | nand_bbt_descr structure and storing the pointer in the | ||
599 | nand_chip structure member bbt_td before calling nand_scan(). | ||
600 | If a mirror table is neccecary a second structure must be | ||
601 | created and a pointer to this structure must be stored | ||
602 | in bbt_md inside the nand_chip structure. If the bbt_md | ||
603 | member is set to NULL then only the main table is used | ||
604 | and no scan for the mirrored table is performed. | ||
605 | </para> | ||
606 | <para> | ||
607 | The most important field in the nand_bbt_descr structure | ||
608 | is the options field. The options define most of the | ||
609 | table properties. Use the predefined constants from | ||
610 | nand.h to define the options. | ||
611 | <itemizedlist> | ||
612 | <listitem><para>Number of bits per block</para> | ||
613 | <para>The supported number of bits is 1, 2, 4, 8.</para></listitem> | ||
614 | <listitem><para>Table per chip</para> | ||
615 | <para>Setting the constant NAND_BBT_PERCHIP selects that | ||
616 | a bad block table is managed for each chip in a chip array. | ||
617 | If this option is not set then a per device bad block table | ||
618 | is used.</para></listitem> | ||
619 | <listitem><para>Table location is absolute</para> | ||
620 | <para>Use the option constant NAND_BBT_ABSPAGE and | ||
621 | define the absolute page number where the bad block | ||
622 | table starts in the field pages. If you have selected bad block | ||
623 | tables per chip and you have a multi chip array then the start page | ||
624 | must be given for each chip in the chip array. Note: there is no scan | ||
625 | for a table ident pattern performed, so the fields | ||
626 | pattern, veroffs, offs, len can be left uninitialized</para></listitem> | ||
627 | <listitem><para>Table location is automatically detected</para> | ||
628 | <para>The table can either be located in the first or the last good | ||
629 | blocks of the chip (device). Set NAND_BBT_LASTBLOCK to place | ||
630 | the bad block table at the end of the chip (device). The | ||
631 | bad block tables are marked and identified by a pattern which | ||
632 | is stored in the spare area of the first page in the block which | ||
633 | holds the bad block table. Store a pointer to the pattern | ||
634 | in the pattern field. Further the length of the pattern has to be | ||
635 | stored in len and the offset in the spare area must be given | ||
636 | in the offs member of the nand_bbt_descr stucture. For mirrored | ||
637 | bad block tables different patterns are mandatory.</para></listitem> | ||
638 | <listitem><para>Table creation</para> | ||
639 | <para>Set the option NAND_BBT_CREATE to enable the table creation | ||
640 | if no table can be found during the scan. Usually this is done only | ||
641 | once if a new chip is found. </para></listitem> | ||
642 | <listitem><para>Table write support</para> | ||
643 | <para>Set the option NAND_BBT_WRITE to enable the table write support. | ||
644 | This allows the update of the bad block table(s) in case a block has | ||
645 | to be marked bad due to wear. The MTD interface function block_markbad | ||
646 | is calling the update function of the bad block table. If the write | ||
647 | support is enabled then the table is updated on FLASH.</para> | ||
648 | <para> | ||
649 | Note: Write support should only be enabled for mirrored tables with | ||
650 | version control. | ||
651 | </para></listitem> | ||
652 | <listitem><para>Table version control</para> | ||
653 | <para>Set the option NAND_BBT_VERSION to enable the table version control. | ||
654 | It's highly recommended to enable this for mirrored tables with write | ||
655 | support. It makes sure that the risk of loosing the bad block | ||
656 | table information is reduced to the loss of the information about the | ||
657 | one worn out block which should be marked bad. The version is stored in | ||
658 | 4 consecutive bytes in the spare area of the device. The position of | ||
659 | the version number is defined by the member veroffs in the bad block table | ||
660 | descriptor.</para></listitem> | ||
661 | <listitem><para>Save block contents on write</para> | ||
662 | <para> | ||
663 | In case that the block which holds the bad block table does contain | ||
664 | other useful information, set the option NAND_BBT_SAVECONTENT. When | ||
665 | the bad block table is written then the whole block is read the bad | ||
666 | block table is updated and the block is erased and everything is | ||
667 | written back. If this option is not set only the bad block table | ||
668 | is written and everything else in the block is ignored and erased. | ||
669 | </para></listitem> | ||
670 | <listitem><para>Number of reserved blocks</para> | ||
671 | <para> | ||
672 | For automatic placement some blocks must be reserved for | ||
673 | bad block table storage. The number of reserved blocks is defined | ||
674 | in the maxblocks member of the babd block table description structure. | ||
675 | Reserving 4 blocks for mirrored tables should be a reasonable number. | ||
676 | This also limits the number of blocks which are scanned for the bad | ||
677 | block table ident pattern. | ||
678 | </para></listitem> | ||
679 | </itemizedlist> | ||
680 | </para> | ||
681 | </sect2> | ||
682 | </sect1> | ||
683 | <sect1> | ||
684 | <title>Spare area (auto)placement</title> | ||
685 | <para> | ||
686 | The nand driver implements different possibilities for | ||
687 | placement of filesystem data in the spare area, | ||
688 | <itemizedlist> | ||
689 | <listitem><para>Placement defined by fs driver</para></listitem> | ||
690 | <listitem><para>Automatic placement</para></listitem> | ||
691 | </itemizedlist> | ||
692 | The default placement function is automatic placement. The | ||
693 | nand driver has built in default placement schemes for the | ||
694 | various chiptypes. If due to hardware ECC functionality the | ||
695 | default placement does not fit then the board driver can | ||
696 | provide a own placement scheme. | ||
697 | </para> | ||
698 | <para> | ||
699 | File system drivers can provide a own placement scheme which | ||
700 | is used instead of the default placement scheme. | ||
701 | </para> | ||
702 | <para> | ||
703 | Placement schemes are defined by a nand_oobinfo structure | ||
704 | <programlisting> | ||
705 | struct nand_oobinfo { | ||
706 | int useecc; | ||
707 | int eccbytes; | ||
708 | int eccpos[24]; | ||
709 | int oobfree[8][2]; | ||
710 | }; | ||
711 | </programlisting> | ||
712 | <itemizedlist> | ||
713 | <listitem><para>useecc</para><para> | ||
714 | The useecc member controls the ecc and placement function. The header | ||
715 | file include/mtd/mtd-abi.h contains constants to select ecc and | ||
716 | placement. MTD_NANDECC_OFF switches off the ecc complete. This is | ||
717 | not recommended and available for testing and diagnosis only. | ||
718 | MTD_NANDECC_PLACE selects caller defined placement, MTD_NANDECC_AUTOPLACE | ||
719 | selects automatic placement. | ||
720 | </para></listitem> | ||
721 | <listitem><para>eccbytes</para><para> | ||
722 | The eccbytes member defines the number of ecc bytes per page. | ||
723 | </para></listitem> | ||
724 | <listitem><para>eccpos</para><para> | ||
725 | The eccpos array holds the byte offsets in the spare area where | ||
726 | the ecc codes are placed. | ||
727 | </para></listitem> | ||
728 | <listitem><para>oobfree</para><para> | ||
729 | The oobfree array defines the areas in the spare area which can be | ||
730 | used for automatic placement. The information is given in the format | ||
731 | {offset, size}. offset defines the start of the usable area, size the | ||
732 | length in bytes. More than one area can be defined. The list is terminated | ||
733 | by an {0, 0} entry. | ||
734 | </para></listitem> | ||
735 | </itemizedlist> | ||
736 | </para> | ||
737 | <sect2> | ||
738 | <title>Placement defined by fs driver</title> | ||
739 | <para> | ||
740 | The calling function provides a pointer to a nand_oobinfo | ||
741 | structure which defines the ecc placement. For writes the | ||
742 | caller must provide a spare area buffer along with the | ||
743 | data buffer. The spare area buffer size is (number of pages) * | ||
744 | (size of spare area). For reads the buffer size is | ||
745 | (number of pages) * ((size of spare area) + (number of ecc | ||
746 | steps per page) * sizeof (int)). The driver stores the | ||
747 | result of the ecc check for each tuple in the spare buffer. | ||
748 | The storage sequence is | ||
749 | </para> | ||
750 | <para> | ||
751 | <spare data page 0><ecc result 0>...<ecc result n> | ||
752 | </para> | ||
753 | <para> | ||
754 | ... | ||
755 | </para> | ||
756 | <para> | ||
757 | <spare data page n><ecc result 0>...<ecc result n> | ||
758 | </para> | ||
759 | <para> | ||
760 | This is a legacy mode used by YAFFS1. | ||
761 | </para> | ||
762 | <para> | ||
763 | If the spare area buffer is NULL then only the ECC placement is | ||
764 | done according to the given scheme in the nand_oobinfo structure. | ||
765 | </para> | ||
766 | </sect2> | ||
767 | <sect2> | ||
768 | <title>Automatic placement</title> | ||
769 | <para> | ||
770 | Automatic placement uses the built in defaults to place the | ||
771 | ecc bytes in the spare area. If filesystem data have to be stored / | ||
772 | read into the spare area then the calling function must provide a | ||
773 | buffer. The buffer size per page is determined by the oobfree array in | ||
774 | the nand_oobinfo structure. | ||
775 | </para> | ||
776 | <para> | ||
777 | If the spare area buffer is NULL then only the ECC placement is | ||
778 | done according to the default builtin scheme. | ||
779 | </para> | ||
780 | </sect2> | ||
781 | <sect2> | ||
782 | <title>User space placement selection</title> | ||
783 | <para> | ||
784 | All non ecc functions like mtd->read and mtd->write use an internal | ||
785 | structure, which can be set by an ioctl. This structure is preset | ||
786 | to the autoplacement default. | ||
787 | <programlisting> | ||
788 | ioctl (fd, MEMSETOOBSEL, oobsel); | ||
789 | </programlisting> | ||
790 | oobsel is a pointer to a user supplied structure of type | ||
791 | nand_oobconfig. The contents of this structure must match the | ||
792 | criteria of the filesystem, which will be used. See an example in utils/nandwrite.c. | ||
793 | </para> | ||
794 | </sect2> | ||
795 | </sect1> | ||
796 | <sect1> | ||
797 | <title>Spare area autoplacement default schemes</title> | ||
798 | <sect2> | ||
799 | <title>256 byte pagesize</title> | ||
800 | <informaltable><tgroup cols="3"><tbody> | ||
801 | <row> | ||
802 | <entry>Offset</entry> | ||
803 | <entry>Content</entry> | ||
804 | <entry>Comment</entry> | ||
805 | </row> | ||
806 | <row> | ||
807 | <entry>0x00</entry> | ||
808 | <entry>ECC byte 0</entry> | ||
809 | <entry>Error correction code byte 0</entry> | ||
810 | </row> | ||
811 | <row> | ||
812 | <entry>0x01</entry> | ||
813 | <entry>ECC byte 1</entry> | ||
814 | <entry>Error correction code byte 1</entry> | ||
815 | </row> | ||
816 | <row> | ||
817 | <entry>0x02</entry> | ||
818 | <entry>ECC byte 2</entry> | ||
819 | <entry>Error correction code byte 2</entry> | ||
820 | </row> | ||
821 | <row> | ||
822 | <entry>0x03</entry> | ||
823 | <entry>Autoplace 0</entry> | ||
824 | <entry></entry> | ||
825 | </row> | ||
826 | <row> | ||
827 | <entry>0x04</entry> | ||
828 | <entry>Autoplace 1</entry> | ||
829 | <entry></entry> | ||
830 | </row> | ||
831 | <row> | ||
832 | <entry>0x05</entry> | ||
833 | <entry>Bad block marker</entry> | ||
834 | <entry>If any bit in this byte is zero, then this block is bad. | ||
835 | This applies only to the first page in a block. In the remaining | ||
836 | pages this byte is reserved</entry> | ||
837 | </row> | ||
838 | <row> | ||
839 | <entry>0x06</entry> | ||
840 | <entry>Autoplace 2</entry> | ||
841 | <entry></entry> | ||
842 | </row> | ||
843 | <row> | ||
844 | <entry>0x07</entry> | ||
845 | <entry>Autoplace 3</entry> | ||
846 | <entry></entry> | ||
847 | </row> | ||
848 | </tbody></tgroup></informaltable> | ||
849 | </sect2> | ||
850 | <sect2> | ||
851 | <title>512 byte pagesize</title> | ||
852 | <informaltable><tgroup cols="3"><tbody> | ||
853 | <row> | ||
854 | <entry>Offset</entry> | ||
855 | <entry>Content</entry> | ||
856 | <entry>Comment</entry> | ||
857 | </row> | ||
858 | <row> | ||
859 | <entry>0x00</entry> | ||
860 | <entry>ECC byte 0</entry> | ||
861 | <entry>Error correction code byte 0 of the lower 256 Byte data in | ||
862 | this page</entry> | ||
863 | </row> | ||
864 | <row> | ||
865 | <entry>0x01</entry> | ||
866 | <entry>ECC byte 1</entry> | ||
867 | <entry>Error correction code byte 1 of the lower 256 Bytes of data | ||
868 | in this page</entry> | ||
869 | </row> | ||
870 | <row> | ||
871 | <entry>0x02</entry> | ||
872 | <entry>ECC byte 2</entry> | ||
873 | <entry>Error correction code byte 2 of the lower 256 Bytes of data | ||
874 | in this page</entry> | ||
875 | </row> | ||
876 | <row> | ||
877 | <entry>0x03</entry> | ||
878 | <entry>ECC byte 3</entry> | ||
879 | <entry>Error correction code byte 0 of the upper 256 Bytes of data | ||
880 | in this page</entry> | ||
881 | </row> | ||
882 | <row> | ||
883 | <entry>0x04</entry> | ||
884 | <entry>reserved</entry> | ||
885 | <entry>reserved</entry> | ||
886 | </row> | ||
887 | <row> | ||
888 | <entry>0x05</entry> | ||
889 | <entry>Bad block marker</entry> | ||
890 | <entry>If any bit in this byte is zero, then this block is bad. | ||
891 | This applies only to the first page in a block. In the remaining | ||
892 | pages this byte is reserved</entry> | ||
893 | </row> | ||
894 | <row> | ||
895 | <entry>0x06</entry> | ||
896 | <entry>ECC byte 4</entry> | ||
897 | <entry>Error correction code byte 1 of the upper 256 Bytes of data | ||
898 | in this page</entry> | ||
899 | </row> | ||
900 | <row> | ||
901 | <entry>0x07</entry> | ||
902 | <entry>ECC byte 5</entry> | ||
903 | <entry>Error correction code byte 2 of the upper 256 Bytes of data | ||
904 | in this page</entry> | ||
905 | </row> | ||
906 | <row> | ||
907 | <entry>0x08 - 0x0F</entry> | ||
908 | <entry>Autoplace 0 - 7</entry> | ||
909 | <entry></entry> | ||
910 | </row> | ||
911 | </tbody></tgroup></informaltable> | ||
912 | </sect2> | ||
913 | <sect2> | ||
914 | <title>2048 byte pagesize</title> | ||
915 | <informaltable><tgroup cols="3"><tbody> | ||
916 | <row> | ||
917 | <entry>Offset</entry> | ||
918 | <entry>Content</entry> | ||
919 | <entry>Comment</entry> | ||
920 | </row> | ||
921 | <row> | ||
922 | <entry>0x00</entry> | ||
923 | <entry>Bad block marker</entry> | ||
924 | <entry>If any bit in this byte is zero, then this block is bad. | ||
925 | This applies only to the first page in a block. In the remaining | ||
926 | pages this byte is reserved</entry> | ||
927 | </row> | ||
928 | <row> | ||
929 | <entry>0x01</entry> | ||
930 | <entry>Reserved</entry> | ||
931 | <entry>Reserved</entry> | ||
932 | </row> | ||
933 | <row> | ||
934 | <entry>0x02-0x27</entry> | ||
935 | <entry>Autoplace 0 - 37</entry> | ||
936 | <entry></entry> | ||
937 | </row> | ||
938 | <row> | ||
939 | <entry>0x28</entry> | ||
940 | <entry>ECC byte 0</entry> | ||
941 | <entry>Error correction code byte 0 of the first 256 Byte data in | ||
942 | this page</entry> | ||
943 | </row> | ||
944 | <row> | ||
945 | <entry>0x29</entry> | ||
946 | <entry>ECC byte 1</entry> | ||
947 | <entry>Error correction code byte 1 of the first 256 Bytes of data | ||
948 | in this page</entry> | ||
949 | </row> | ||
950 | <row> | ||
951 | <entry>0x2A</entry> | ||
952 | <entry>ECC byte 2</entry> | ||
953 | <entry>Error correction code byte 2 of the first 256 Bytes data in | ||
954 | this page</entry> | ||
955 | </row> | ||
956 | <row> | ||
957 | <entry>0x2B</entry> | ||
958 | <entry>ECC byte 3</entry> | ||
959 | <entry>Error correction code byte 0 of the second 256 Bytes of data | ||
960 | in this page</entry> | ||
961 | </row> | ||
962 | <row> | ||
963 | <entry>0x2C</entry> | ||
964 | <entry>ECC byte 4</entry> | ||
965 | <entry>Error correction code byte 1 of the second 256 Bytes of data | ||
966 | in this page</entry> | ||
967 | </row> | ||
968 | <row> | ||
969 | <entry>0x2D</entry> | ||
970 | <entry>ECC byte 5</entry> | ||
971 | <entry>Error correction code byte 2 of the second 256 Bytes of data | ||
972 | in this page</entry> | ||
973 | </row> | ||
974 | <row> | ||
975 | <entry>0x2E</entry> | ||
976 | <entry>ECC byte 6</entry> | ||
977 | <entry>Error correction code byte 0 of the third 256 Bytes of data | ||
978 | in this page</entry> | ||
979 | </row> | ||
980 | <row> | ||
981 | <entry>0x2F</entry> | ||
982 | <entry>ECC byte 7</entry> | ||
983 | <entry>Error correction code byte 1 of the third 256 Bytes of data | ||
984 | in this page</entry> | ||
985 | </row> | ||
986 | <row> | ||
987 | <entry>0x30</entry> | ||
988 | <entry>ECC byte 8</entry> | ||
989 | <entry>Error correction code byte 2 of the third 256 Bytes of data | ||
990 | in this page</entry> | ||
991 | </row> | ||
992 | <row> | ||
993 | <entry>0x31</entry> | ||
994 | <entry>ECC byte 9</entry> | ||
995 | <entry>Error correction code byte 0 of the fourth 256 Bytes of data | ||
996 | in this page</entry> | ||
997 | </row> | ||
998 | <row> | ||
999 | <entry>0x32</entry> | ||
1000 | <entry>ECC byte 10</entry> | ||
1001 | <entry>Error correction code byte 1 of the fourth 256 Bytes of data | ||
1002 | in this page</entry> | ||
1003 | </row> | ||
1004 | <row> | ||
1005 | <entry>0x33</entry> | ||
1006 | <entry>ECC byte 11</entry> | ||
1007 | <entry>Error correction code byte 2 of the fourth 256 Bytes of data | ||
1008 | in this page</entry> | ||
1009 | </row> | ||
1010 | <row> | ||
1011 | <entry>0x34</entry> | ||
1012 | <entry>ECC byte 12</entry> | ||
1013 | <entry>Error correction code byte 0 of the fifth 256 Bytes of data | ||
1014 | in this page</entry> | ||
1015 | </row> | ||
1016 | <row> | ||
1017 | <entry>0x35</entry> | ||
1018 | <entry>ECC byte 13</entry> | ||
1019 | <entry>Error correction code byte 1 of the fifth 256 Bytes of data | ||
1020 | in this page</entry> | ||
1021 | </row> | ||
1022 | <row> | ||
1023 | <entry>0x36</entry> | ||
1024 | <entry>ECC byte 14</entry> | ||
1025 | <entry>Error correction code byte 2 of the fifth 256 Bytes of data | ||
1026 | in this page</entry> | ||
1027 | </row> | ||
1028 | <row> | ||
1029 | <entry>0x37</entry> | ||
1030 | <entry>ECC byte 15</entry> | ||
1031 | <entry>Error correction code byte 0 of the sixt 256 Bytes of data | ||
1032 | in this page</entry> | ||
1033 | </row> | ||
1034 | <row> | ||
1035 | <entry>0x38</entry> | ||
1036 | <entry>ECC byte 16</entry> | ||
1037 | <entry>Error correction code byte 1 of the sixt 256 Bytes of data | ||
1038 | in this page</entry> | ||
1039 | </row> | ||
1040 | <row> | ||
1041 | <entry>0x39</entry> | ||
1042 | <entry>ECC byte 17</entry> | ||
1043 | <entry>Error correction code byte 2 of the sixt 256 Bytes of data | ||
1044 | in this page</entry> | ||
1045 | </row> | ||
1046 | <row> | ||
1047 | <entry>0x3A</entry> | ||
1048 | <entry>ECC byte 18</entry> | ||
1049 | <entry>Error correction code byte 0 of the seventh 256 Bytes of | ||
1050 | data in this page</entry> | ||
1051 | </row> | ||
1052 | <row> | ||
1053 | <entry>0x3B</entry> | ||
1054 | <entry>ECC byte 19</entry> | ||
1055 | <entry>Error correction code byte 1 of the seventh 256 Bytes of | ||
1056 | data in this page</entry> | ||
1057 | </row> | ||
1058 | <row> | ||
1059 | <entry>0x3C</entry> | ||
1060 | <entry>ECC byte 20</entry> | ||
1061 | <entry>Error correction code byte 2 of the seventh 256 Bytes of | ||
1062 | data in this page</entry> | ||
1063 | </row> | ||
1064 | <row> | ||
1065 | <entry>0x3D</entry> | ||
1066 | <entry>ECC byte 21</entry> | ||
1067 | <entry>Error correction code byte 0 of the eigth 256 Bytes of data | ||
1068 | in this page</entry> | ||
1069 | </row> | ||
1070 | <row> | ||
1071 | <entry>0x3E</entry> | ||
1072 | <entry>ECC byte 22</entry> | ||
1073 | <entry>Error correction code byte 1 of the eigth 256 Bytes of data | ||
1074 | in this page</entry> | ||
1075 | </row> | ||
1076 | <row> | ||
1077 | <entry>0x3F</entry> | ||
1078 | <entry>ECC byte 23</entry> | ||
1079 | <entry>Error correction code byte 2 of the eigth 256 Bytes of data | ||
1080 | in this page</entry> | ||
1081 | </row> | ||
1082 | </tbody></tgroup></informaltable> | ||
1083 | </sect2> | ||
1084 | </sect1> | ||
1085 | </chapter> | ||
1086 | |||
1087 | <chapter id="filesystems"> | ||
1088 | <title>Filesystem support</title> | ||
1089 | <para> | ||
1090 | The NAND driver provides all neccecary functions for a | ||
1091 | filesystem via the MTD interface. | ||
1092 | </para> | ||
1093 | <para> | ||
1094 | Filesystems must be aware of the NAND pecularities and | ||
1095 | restrictions. One major restrictions of NAND Flash is, that you cannot | ||
1096 | write as often as you want to a page. The consecutive writes to a page, | ||
1097 | before erasing it again, are restricted to 1-3 writes, depending on the | ||
1098 | manufacturers specifications. This applies similar to the spare area. | ||
1099 | </para> | ||
1100 | <para> | ||
1101 | Therefor NAND aware filesystems must either write in page size chunks | ||
1102 | or hold a writebuffer to collect smaller writes until they sum up to | ||
1103 | pagesize. Available NAND aware filesystems: JFFS2, YAFFS. | ||
1104 | </para> | ||
1105 | <para> | ||
1106 | The spare area usage to store filesystem data is controlled by | ||
1107 | the spare area placement functionality which is described in one | ||
1108 | of the earlier chapters. | ||
1109 | </para> | ||
1110 | </chapter> | ||
1111 | <chapter id="tools"> | ||
1112 | <title>Tools</title> | ||
1113 | <para> | ||
1114 | The MTD project provides a couple of helpful tools to handle NAND Flash. | ||
1115 | <itemizedlist> | ||
1116 | <listitem><para>flasherase, flasheraseall: Erase and format FLASH partitions</para></listitem> | ||
1117 | <listitem><para>nandwrite: write filesystem images to NAND FLASH</para></listitem> | ||
1118 | <listitem><para>nanddump: dump the contents of a NAND FLASH partitions</para></listitem> | ||
1119 | </itemizedlist> | ||
1120 | </para> | ||
1121 | <para> | ||
1122 | These tools are aware of the NAND restrictions. Please use those tools | ||
1123 | instead of complaining about errors which are caused by non NAND aware | ||
1124 | access methods. | ||
1125 | </para> | ||
1126 | </chapter> | ||
1127 | |||
1128 | <chapter id="defines"> | ||
1129 | <title>Constants</title> | ||
1130 | <para> | ||
1131 | This chapter describes the constants which might be relevant for a driver developer. | ||
1132 | </para> | ||
1133 | <sect1> | ||
1134 | <title>Chip option constants</title> | ||
1135 | <sect2> | ||
1136 | <title>Constants for chip id table</title> | ||
1137 | <para> | ||
1138 | These constants are defined in nand.h. They are ored together to describe | ||
1139 | the chip functionality. | ||
1140 | <programlisting> | ||
1141 | /* Chip can not auto increment pages */ | ||
1142 | #define NAND_NO_AUTOINCR 0x00000001 | ||
1143 | /* Buswitdh is 16 bit */ | ||
1144 | #define NAND_BUSWIDTH_16 0x00000002 | ||
1145 | /* Device supports partial programming without padding */ | ||
1146 | #define NAND_NO_PADDING 0x00000004 | ||
1147 | /* Chip has cache program function */ | ||
1148 | #define NAND_CACHEPRG 0x00000008 | ||
1149 | /* Chip has copy back function */ | ||
1150 | #define NAND_COPYBACK 0x00000010 | ||
1151 | /* AND Chip which has 4 banks and a confusing page / block | ||
1152 | * assignment. See Renesas datasheet for further information */ | ||
1153 | #define NAND_IS_AND 0x00000020 | ||
1154 | /* Chip has a array of 4 pages which can be read without | ||
1155 | * additional ready /busy waits */ | ||
1156 | #define NAND_4PAGE_ARRAY 0x00000040 | ||
1157 | </programlisting> | ||
1158 | </para> | ||
1159 | </sect2> | ||
1160 | <sect2> | ||
1161 | <title>Constants for runtime options</title> | ||
1162 | <para> | ||
1163 | These constants are defined in nand.h. They are ored together to describe | ||
1164 | the functionality. | ||
1165 | <programlisting> | ||
1166 | /* Use a flash based bad block table. This option is parsed by the | ||
1167 | * default bad block table function (nand_default_bbt). */ | ||
1168 | #define NAND_USE_FLASH_BBT 0x00010000 | ||
1169 | /* The hw ecc generator provides a syndrome instead a ecc value on read | ||
1170 | * This can only work if we have the ecc bytes directly behind the | ||
1171 | * data bytes. Applies for DOC and AG-AND Renesas HW Reed Solomon generators */ | ||
1172 | #define NAND_HWECC_SYNDROME 0x00020000 | ||
1173 | </programlisting> | ||
1174 | </para> | ||
1175 | </sect2> | ||
1176 | </sect1> | ||
1177 | |||
1178 | <sect1> | ||
1179 | <title>ECC selection constants</title> | ||
1180 | <para> | ||
1181 | Use these constants to select the ECC algorithm. | ||
1182 | <programlisting> | ||
1183 | /* No ECC. Usage is not recommended ! */ | ||
1184 | #define NAND_ECC_NONE 0 | ||
1185 | /* Software ECC 3 byte ECC per 256 Byte data */ | ||
1186 | #define NAND_ECC_SOFT 1 | ||
1187 | /* Hardware ECC 3 byte ECC per 256 Byte data */ | ||
1188 | #define NAND_ECC_HW3_256 2 | ||
1189 | /* Hardware ECC 3 byte ECC per 512 Byte data */ | ||
1190 | #define NAND_ECC_HW3_512 3 | ||
1191 | /* Hardware ECC 6 byte ECC per 512 Byte data */ | ||
1192 | #define NAND_ECC_HW6_512 4 | ||
1193 | /* Hardware ECC 6 byte ECC per 512 Byte data */ | ||
1194 | #define NAND_ECC_HW8_512 6 | ||
1195 | </programlisting> | ||
1196 | </para> | ||
1197 | </sect1> | ||
1198 | |||
1199 | <sect1> | ||
1200 | <title>Hardware control related constants</title> | ||
1201 | <para> | ||
1202 | These constants describe the requested hardware access function when | ||
1203 | the boardspecific hardware control function is called | ||
1204 | <programlisting> | ||
1205 | /* Select the chip by setting nCE to low */ | ||
1206 | #define NAND_CTL_SETNCE 1 | ||
1207 | /* Deselect the chip by setting nCE to high */ | ||
1208 | #define NAND_CTL_CLRNCE 2 | ||
1209 | /* Select the command latch by setting CLE to high */ | ||
1210 | #define NAND_CTL_SETCLE 3 | ||
1211 | /* Deselect the command latch by setting CLE to low */ | ||
1212 | #define NAND_CTL_CLRCLE 4 | ||
1213 | /* Select the address latch by setting ALE to high */ | ||
1214 | #define NAND_CTL_SETALE 5 | ||
1215 | /* Deselect the address latch by setting ALE to low */ | ||
1216 | #define NAND_CTL_CLRALE 6 | ||
1217 | /* Set write protection by setting WP to high. Not used! */ | ||
1218 | #define NAND_CTL_SETWP 7 | ||
1219 | /* Clear write protection by setting WP to low. Not used! */ | ||
1220 | #define NAND_CTL_CLRWP 8 | ||
1221 | </programlisting> | ||
1222 | </para> | ||
1223 | </sect1> | ||
1224 | |||
1225 | <sect1> | ||
1226 | <title>Bad block table related constants</title> | ||
1227 | <para> | ||
1228 | These constants describe the options used for bad block | ||
1229 | table descriptors. | ||
1230 | <programlisting> | ||
1231 | /* Options for the bad block table descriptors */ | ||
1232 | |||
1233 | /* The number of bits used per block in the bbt on the device */ | ||
1234 | #define NAND_BBT_NRBITS_MSK 0x0000000F | ||
1235 | #define NAND_BBT_1BIT 0x00000001 | ||
1236 | #define NAND_BBT_2BIT 0x00000002 | ||
1237 | #define NAND_BBT_4BIT 0x00000004 | ||
1238 | #define NAND_BBT_8BIT 0x00000008 | ||
1239 | /* The bad block table is in the last good block of the device */ | ||
1240 | #define NAND_BBT_LASTBLOCK 0x00000010 | ||
1241 | /* The bbt is at the given page, else we must scan for the bbt */ | ||
1242 | #define NAND_BBT_ABSPAGE 0x00000020 | ||
1243 | /* The bbt is at the given page, else we must scan for the bbt */ | ||
1244 | #define NAND_BBT_SEARCH 0x00000040 | ||
1245 | /* bbt is stored per chip on multichip devices */ | ||
1246 | #define NAND_BBT_PERCHIP 0x00000080 | ||
1247 | /* bbt has a version counter at offset veroffs */ | ||
1248 | #define NAND_BBT_VERSION 0x00000100 | ||
1249 | /* Create a bbt if none axists */ | ||
1250 | #define NAND_BBT_CREATE 0x00000200 | ||
1251 | /* Search good / bad pattern through all pages of a block */ | ||
1252 | #define NAND_BBT_SCANALLPAGES 0x00000400 | ||
1253 | /* Scan block empty during good / bad block scan */ | ||
1254 | #define NAND_BBT_SCANEMPTY 0x00000800 | ||
1255 | /* Write bbt if neccecary */ | ||
1256 | #define NAND_BBT_WRITE 0x00001000 | ||
1257 | /* Read and write back block contents when writing bbt */ | ||
1258 | #define NAND_BBT_SAVECONTENT 0x00002000 | ||
1259 | </programlisting> | ||
1260 | </para> | ||
1261 | </sect1> | ||
1262 | |||
1263 | </chapter> | ||
1264 | |||
1265 | <chapter id="structs"> | ||
1266 | <title>Structures</title> | ||
1267 | <para> | ||
1268 | This chapter contains the autogenerated documentation of the structures which are | ||
1269 | used in the NAND driver and might be relevant for a driver developer. Each | ||
1270 | struct member has a short description which is marked with an [XXX] identifier. | ||
1271 | See the chapter "Documentation hints" for an explanation. | ||
1272 | </para> | ||
1273 | !Iinclude/linux/mtd/nand.h | ||
1274 | </chapter> | ||
1275 | |||
1276 | <chapter id="pubfunctions"> | ||
1277 | <title>Public Functions Provided</title> | ||
1278 | <para> | ||
1279 | This chapter contains the autogenerated documentation of the NAND kernel API functions | ||
1280 | which are exported. Each function has a short description which is marked with an [XXX] identifier. | ||
1281 | See the chapter "Documentation hints" for an explanation. | ||
1282 | </para> | ||
1283 | !Edrivers/mtd/nand/nand_base.c | ||
1284 | !Edrivers/mtd/nand/nand_bbt.c | ||
1285 | !Edrivers/mtd/nand/nand_ecc.c | ||
1286 | </chapter> | ||
1287 | |||
1288 | <chapter id="intfunctions"> | ||
1289 | <title>Internal Functions Provided</title> | ||
1290 | <para> | ||
1291 | This chapter contains the autogenerated documentation of the NAND driver internal functions. | ||
1292 | Each function has a short description which is marked with an [XXX] identifier. | ||
1293 | See the chapter "Documentation hints" for an explanation. | ||
1294 | The functions marked with [DEFAULT] might be relevant for a board driver developer. | ||
1295 | </para> | ||
1296 | !Idrivers/mtd/nand/nand_base.c | ||
1297 | !Idrivers/mtd/nand/nand_bbt.c | ||
1298 | !Idrivers/mtd/nand/nand_ecc.c | ||
1299 | </chapter> | ||
1300 | |||
1301 | <chapter id="credits"> | ||
1302 | <title>Credits</title> | ||
1303 | <para> | ||
1304 | The following people have contributed to the NAND driver: | ||
1305 | <orderedlist> | ||
1306 | <listitem><para>Steven J. Hill<email>sjhill@realitydiluted.com</email></para></listitem> | ||
1307 | <listitem><para>David Woodhouse<email>dwmw2@infradead.org</email></para></listitem> | ||
1308 | <listitem><para>Thomas Gleixner<email>tglx@linutronix.de</email></para></listitem> | ||
1309 | </orderedlist> | ||
1310 | A lot of users have provided bugfixes, improvements and helping hands for testing. | ||
1311 | Thanks a lot. | ||
1312 | </para> | ||
1313 | <para> | ||
1314 | The following people have contributed to this document: | ||
1315 | <orderedlist> | ||
1316 | <listitem><para>Thomas Gleixner<email>tglx@linutronix.de</email></para></listitem> | ||
1317 | </orderedlist> | ||
1318 | </para> | ||
1319 | </chapter> | ||
1320 | </book> | ||
diff --git a/Documentation/DocBook/procfs-guide.tmpl b/Documentation/DocBook/procfs-guide.tmpl new file mode 100644 index 000000000000..45cad23efefa --- /dev/null +++ b/Documentation/DocBook/procfs-guide.tmpl | |||
@@ -0,0 +1,591 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [ | ||
4 | <!ENTITY procfsexample SYSTEM "procfs_example.xml"> | ||
5 | ]> | ||
6 | |||
7 | <book id="LKProcfsGuide"> | ||
8 | <bookinfo> | ||
9 | <title>Linux Kernel Procfs Guide</title> | ||
10 | |||
11 | <authorgroup> | ||
12 | <author> | ||
13 | <firstname>Erik</firstname> | ||
14 | <othername>(J.A.K.)</othername> | ||
15 | <surname>Mouw</surname> | ||
16 | <affiliation> | ||
17 | <orgname>Delft University of Technology</orgname> | ||
18 | <orgdiv>Faculty of Information Technology and Systems</orgdiv> | ||
19 | <address> | ||
20 | <email>J.A.K.Mouw@its.tudelft.nl</email> | ||
21 | <pob>PO BOX 5031</pob> | ||
22 | <postcode>2600 GA</postcode> | ||
23 | <city>Delft</city> | ||
24 | <country>The Netherlands</country> | ||
25 | </address> | ||
26 | </affiliation> | ||
27 | </author> | ||
28 | </authorgroup> | ||
29 | |||
30 | <revhistory> | ||
31 | <revision> | ||
32 | <revnumber>1.0 </revnumber> | ||
33 | <date>May 30, 2001</date> | ||
34 | <revremark>Initial revision posted to linux-kernel</revremark> | ||
35 | </revision> | ||
36 | <revision> | ||
37 | <revnumber>1.1 </revnumber> | ||
38 | <date>June 3, 2001</date> | ||
39 | <revremark>Revised after comments from linux-kernel</revremark> | ||
40 | </revision> | ||
41 | </revhistory> | ||
42 | |||
43 | <copyright> | ||
44 | <year>2001</year> | ||
45 | <holder>Erik Mouw</holder> | ||
46 | </copyright> | ||
47 | |||
48 | |||
49 | <legalnotice> | ||
50 | <para> | ||
51 | This documentation is free software; you can redistribute it | ||
52 | and/or modify it under the terms of the GNU General Public | ||
53 | License as published by the Free Software Foundation; either | ||
54 | version 2 of the License, or (at your option) any later | ||
55 | version. | ||
56 | </para> | ||
57 | |||
58 | <para> | ||
59 | This documentation is distributed in the hope that it will be | ||
60 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
61 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR | ||
62 | PURPOSE. See the GNU General Public License for more details. | ||
63 | </para> | ||
64 | |||
65 | <para> | ||
66 | You should have received a copy of the GNU General Public | ||
67 | License along with this program; if not, write to the Free | ||
68 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
69 | MA 02111-1307 USA | ||
70 | </para> | ||
71 | |||
72 | <para> | ||
73 | For more details see the file COPYING in the source | ||
74 | distribution of Linux. | ||
75 | </para> | ||
76 | </legalnotice> | ||
77 | </bookinfo> | ||
78 | |||
79 | |||
80 | |||
81 | |||
82 | <toc> | ||
83 | </toc> | ||
84 | |||
85 | |||
86 | |||
87 | |||
88 | <preface> | ||
89 | <title>Preface</title> | ||
90 | |||
91 | <para> | ||
92 | This guide describes the use of the procfs file system from | ||
93 | within the Linux kernel. The idea to write this guide came up on | ||
94 | the #kernelnewbies IRC channel (see <ulink | ||
95 | url="http://www.kernelnewbies.org/">http://www.kernelnewbies.org/</ulink>), | ||
96 | when Jeff Garzik explained the use of procfs and forwarded me a | ||
97 | message Alexander Viro wrote to the linux-kernel mailing list. I | ||
98 | agreed to write it up nicely, so here it is. | ||
99 | </para> | ||
100 | |||
101 | <para> | ||
102 | I'd like to thank Jeff Garzik | ||
103 | <email>jgarzik@pobox.com</email> and Alexander Viro | ||
104 | <email>viro@parcelfarce.linux.theplanet.co.uk</email> for their input, | ||
105 | Tim Waugh <email>twaugh@redhat.com</email> for his <ulink | ||
106 | url="http://people.redhat.com/twaugh/docbook/selfdocbook/">Selfdocbook</ulink>, | ||
107 | and Marc Joosen <email>marcj@historia.et.tudelft.nl</email> for | ||
108 | proofreading. | ||
109 | </para> | ||
110 | |||
111 | <para> | ||
112 | This documentation was written while working on the LART | ||
113 | computing board (<ulink | ||
114 | url="http://www.lart.tudelft.nl/">http://www.lart.tudelft.nl/</ulink>), | ||
115 | which is sponsored by the Mobile Multi-media Communications | ||
116 | (<ulink | ||
117 | url="http://www.mmc.tudelft.nl/">http://www.mmc.tudelft.nl/</ulink>) | ||
118 | and Ubiquitous Communications (<ulink | ||
119 | url="http://www.ubicom.tudelft.nl/">http://www.ubicom.tudelft.nl/</ulink>) | ||
120 | projects. | ||
121 | </para> | ||
122 | |||
123 | <para> | ||
124 | Erik | ||
125 | </para> | ||
126 | </preface> | ||
127 | |||
128 | |||
129 | |||
130 | |||
131 | <chapter id="intro"> | ||
132 | <title>Introduction</title> | ||
133 | |||
134 | <para> | ||
135 | The <filename class="directory">/proc</filename> file system | ||
136 | (procfs) is a special file system in the linux kernel. It's a | ||
137 | virtual file system: it is not associated with a block device | ||
138 | but exists only in memory. The files in the procfs are there to | ||
139 | allow userland programs access to certain information from the | ||
140 | kernel (like process information in <filename | ||
141 | class="directory">/proc/[0-9]+/</filename>), but also for debug | ||
142 | purposes (like <filename>/proc/ksyms</filename>). | ||
143 | </para> | ||
144 | |||
145 | <para> | ||
146 | This guide describes the use of the procfs file system from | ||
147 | within the Linux kernel. It starts by introducing all relevant | ||
148 | functions to manage the files within the file system. After that | ||
149 | it shows how to communicate with userland, and some tips and | ||
150 | tricks will be pointed out. Finally a complete example will be | ||
151 | shown. | ||
152 | </para> | ||
153 | |||
154 | <para> | ||
155 | Note that the files in <filename | ||
156 | class="directory">/proc/sys</filename> are sysctl files: they | ||
157 | don't belong to procfs and are governed by a completely | ||
158 | different API described in the Kernel API book. | ||
159 | </para> | ||
160 | </chapter> | ||
161 | |||
162 | |||
163 | |||
164 | |||
165 | <chapter id="managing"> | ||
166 | <title>Managing procfs entries</title> | ||
167 | |||
168 | <para> | ||
169 | This chapter describes the functions that various kernel | ||
170 | components use to populate the procfs with files, symlinks, | ||
171 | device nodes, and directories. | ||
172 | </para> | ||
173 | |||
174 | <para> | ||
175 | A minor note before we start: if you want to use any of the | ||
176 | procfs functions, be sure to include the correct header file! | ||
177 | This should be one of the first lines in your code: | ||
178 | </para> | ||
179 | |||
180 | <programlisting> | ||
181 | #include <linux/proc_fs.h> | ||
182 | </programlisting> | ||
183 | |||
184 | |||
185 | |||
186 | |||
187 | <sect1 id="regularfile"> | ||
188 | <title>Creating a regular file</title> | ||
189 | |||
190 | <funcsynopsis> | ||
191 | <funcprototype> | ||
192 | <funcdef>struct proc_dir_entry* <function>create_proc_entry</function></funcdef> | ||
193 | <paramdef>const char* <parameter>name</parameter></paramdef> | ||
194 | <paramdef>mode_t <parameter>mode</parameter></paramdef> | ||
195 | <paramdef>struct proc_dir_entry* <parameter>parent</parameter></paramdef> | ||
196 | </funcprototype> | ||
197 | </funcsynopsis> | ||
198 | |||
199 | <para> | ||
200 | This function creates a regular file with the name | ||
201 | <parameter>name</parameter>, file mode | ||
202 | <parameter>mode</parameter> in the directory | ||
203 | <parameter>parent</parameter>. To create a file in the root of | ||
204 | the procfs, use <constant>NULL</constant> as | ||
205 | <parameter>parent</parameter> parameter. When successful, the | ||
206 | function will return a pointer to the freshly created | ||
207 | <structname>struct proc_dir_entry</structname>; otherwise it | ||
208 | will return <constant>NULL</constant>. <xref | ||
209 | linkend="userland"/> describes how to do something useful with | ||
210 | regular files. | ||
211 | </para> | ||
212 | |||
213 | <para> | ||
214 | Note that it is specifically supported that you can pass a | ||
215 | path that spans multiple directories. For example | ||
216 | <function>create_proc_entry</function>(<parameter>"drivers/via0/info"</parameter>) | ||
217 | will create the <filename class="directory">via0</filename> | ||
218 | directory if necessary, with standard | ||
219 | <constant>0755</constant> permissions. | ||
220 | </para> | ||
221 | |||
222 | <para> | ||
223 | If you only want to be able to read the file, the function | ||
224 | <function>create_proc_read_entry</function> described in <xref | ||
225 | linkend="convenience"/> may be used to create and initialise | ||
226 | the procfs entry in one single call. | ||
227 | </para> | ||
228 | </sect1> | ||
229 | |||
230 | |||
231 | |||
232 | |||
233 | <sect1> | ||
234 | <title>Creating a symlink</title> | ||
235 | |||
236 | <funcsynopsis> | ||
237 | <funcprototype> | ||
238 | <funcdef>struct proc_dir_entry* | ||
239 | <function>proc_symlink</function></funcdef> <paramdef>const | ||
240 | char* <parameter>name</parameter></paramdef> | ||
241 | <paramdef>struct proc_dir_entry* | ||
242 | <parameter>parent</parameter></paramdef> <paramdef>const | ||
243 | char* <parameter>dest</parameter></paramdef> | ||
244 | </funcprototype> | ||
245 | </funcsynopsis> | ||
246 | |||
247 | <para> | ||
248 | This creates a symlink in the procfs directory | ||
249 | <parameter>parent</parameter> that points from | ||
250 | <parameter>name</parameter> to | ||
251 | <parameter>dest</parameter>. This translates in userland to | ||
252 | <literal>ln -s</literal> <parameter>dest</parameter> | ||
253 | <parameter>name</parameter>. | ||
254 | </para> | ||
255 | </sect1> | ||
256 | |||
257 | <sect1> | ||
258 | <title>Creating a directory</title> | ||
259 | |||
260 | <funcsynopsis> | ||
261 | <funcprototype> | ||
262 | <funcdef>struct proc_dir_entry* <function>proc_mkdir</function></funcdef> | ||
263 | <paramdef>const char* <parameter>name</parameter></paramdef> | ||
264 | <paramdef>struct proc_dir_entry* <parameter>parent</parameter></paramdef> | ||
265 | </funcprototype> | ||
266 | </funcsynopsis> | ||
267 | |||
268 | <para> | ||
269 | Create a directory <parameter>name</parameter> in the procfs | ||
270 | directory <parameter>parent</parameter>. | ||
271 | </para> | ||
272 | </sect1> | ||
273 | |||
274 | |||
275 | |||
276 | |||
277 | <sect1> | ||
278 | <title>Removing an entry</title> | ||
279 | |||
280 | <funcsynopsis> | ||
281 | <funcprototype> | ||
282 | <funcdef>void <function>remove_proc_entry</function></funcdef> | ||
283 | <paramdef>const char* <parameter>name</parameter></paramdef> | ||
284 | <paramdef>struct proc_dir_entry* <parameter>parent</parameter></paramdef> | ||
285 | </funcprototype> | ||
286 | </funcsynopsis> | ||
287 | |||
288 | <para> | ||
289 | Removes the entry <parameter>name</parameter> in the directory | ||
290 | <parameter>parent</parameter> from the procfs. Entries are | ||
291 | removed by their <emphasis>name</emphasis>, not by the | ||
292 | <structname>struct proc_dir_entry</structname> returned by the | ||
293 | various create functions. Note that this function doesn't | ||
294 | recursively remove entries. | ||
295 | </para> | ||
296 | |||
297 | <para> | ||
298 | Be sure to free the <structfield>data</structfield> entry from | ||
299 | the <structname>struct proc_dir_entry</structname> before | ||
300 | <function>remove_proc_entry</function> is called (that is: if | ||
301 | there was some <structfield>data</structfield> allocated, of | ||
302 | course). See <xref linkend="usingdata"/> for more information | ||
303 | on using the <structfield>data</structfield> entry. | ||
304 | </para> | ||
305 | </sect1> | ||
306 | </chapter> | ||
307 | |||
308 | |||
309 | |||
310 | |||
311 | <chapter id="userland"> | ||
312 | <title>Communicating with userland</title> | ||
313 | |||
314 | <para> | ||
315 | Instead of reading (or writing) information directly from | ||
316 | kernel memory, procfs works with <emphasis>call back | ||
317 | functions</emphasis> for files: functions that are called when | ||
318 | a specific file is being read or written. Such functions have | ||
319 | to be initialised after the procfs file is created by setting | ||
320 | the <structfield>read_proc</structfield> and/or | ||
321 | <structfield>write_proc</structfield> fields in the | ||
322 | <structname>struct proc_dir_entry*</structname> that the | ||
323 | function <function>create_proc_entry</function> returned: | ||
324 | </para> | ||
325 | |||
326 | <programlisting> | ||
327 | struct proc_dir_entry* entry; | ||
328 | |||
329 | entry->read_proc = read_proc_foo; | ||
330 | entry->write_proc = write_proc_foo; | ||
331 | </programlisting> | ||
332 | |||
333 | <para> | ||
334 | If you only want to use a the | ||
335 | <structfield>read_proc</structfield>, the function | ||
336 | <function>create_proc_read_entry</function> described in <xref | ||
337 | linkend="convenience"/> may be used to create and initialise the | ||
338 | procfs entry in one single call. | ||
339 | </para> | ||
340 | |||
341 | |||
342 | |||
343 | <sect1> | ||
344 | <title>Reading data</title> | ||
345 | |||
346 | <para> | ||
347 | The read function is a call back function that allows userland | ||
348 | processes to read data from the kernel. The read function | ||
349 | should have the following format: | ||
350 | </para> | ||
351 | |||
352 | <funcsynopsis> | ||
353 | <funcprototype> | ||
354 | <funcdef>int <function>read_func</function></funcdef> | ||
355 | <paramdef>char* <parameter>page</parameter></paramdef> | ||
356 | <paramdef>char** <parameter>start</parameter></paramdef> | ||
357 | <paramdef>off_t <parameter>off</parameter></paramdef> | ||
358 | <paramdef>int <parameter>count</parameter></paramdef> | ||
359 | <paramdef>int* <parameter>eof</parameter></paramdef> | ||
360 | <paramdef>void* <parameter>data</parameter></paramdef> | ||
361 | </funcprototype> | ||
362 | </funcsynopsis> | ||
363 | |||
364 | <para> | ||
365 | The read function should write its information into the | ||
366 | <parameter>page</parameter>. For proper use, the function | ||
367 | should start writing at an offset of | ||
368 | <parameter>off</parameter> in <parameter>page</parameter> and | ||
369 | write at most <parameter>count</parameter> bytes, but because | ||
370 | most read functions are quite simple and only return a small | ||
371 | amount of information, these two parameters are usually | ||
372 | ignored (it breaks pagers like <literal>more</literal> and | ||
373 | <literal>less</literal>, but <literal>cat</literal> still | ||
374 | works). | ||
375 | </para> | ||
376 | |||
377 | <para> | ||
378 | If the <parameter>off</parameter> and | ||
379 | <parameter>count</parameter> parameters are properly used, | ||
380 | <parameter>eof</parameter> should be used to signal that the | ||
381 | end of the file has been reached by writing | ||
382 | <literal>1</literal> to the memory location | ||
383 | <parameter>eof</parameter> points to. | ||
384 | </para> | ||
385 | |||
386 | <para> | ||
387 | The parameter <parameter>start</parameter> doesn't seem to be | ||
388 | used anywhere in the kernel. The <parameter>data</parameter> | ||
389 | parameter can be used to create a single call back function for | ||
390 | several files, see <xref linkend="usingdata"/>. | ||
391 | </para> | ||
392 | |||
393 | <para> | ||
394 | The <function>read_func</function> function must return the | ||
395 | number of bytes written into the <parameter>page</parameter>. | ||
396 | </para> | ||
397 | |||
398 | <para> | ||
399 | <xref linkend="example"/> shows how to use a read call back | ||
400 | function. | ||
401 | </para> | ||
402 | </sect1> | ||
403 | |||
404 | |||
405 | |||
406 | |||
407 | <sect1> | ||
408 | <title>Writing data</title> | ||
409 | |||
410 | <para> | ||
411 | The write call back function allows a userland process to write | ||
412 | data to the kernel, so it has some kind of control over the | ||
413 | kernel. The write function should have the following format: | ||
414 | </para> | ||
415 | |||
416 | <funcsynopsis> | ||
417 | <funcprototype> | ||
418 | <funcdef>int <function>write_func</function></funcdef> | ||
419 | <paramdef>struct file* <parameter>file</parameter></paramdef> | ||
420 | <paramdef>const char* <parameter>buffer</parameter></paramdef> | ||
421 | <paramdef>unsigned long <parameter>count</parameter></paramdef> | ||
422 | <paramdef>void* <parameter>data</parameter></paramdef> | ||
423 | </funcprototype> | ||
424 | </funcsynopsis> | ||
425 | |||
426 | <para> | ||
427 | The write function should read <parameter>count</parameter> | ||
428 | bytes at maximum from the <parameter>buffer</parameter>. Note | ||
429 | that the <parameter>buffer</parameter> doesn't live in the | ||
430 | kernel's memory space, so it should first be copied to kernel | ||
431 | space with <function>copy_from_user</function>. The | ||
432 | <parameter>file</parameter> parameter is usually | ||
433 | ignored. <xref linkend="usingdata"/> shows how to use the | ||
434 | <parameter>data</parameter> parameter. | ||
435 | </para> | ||
436 | |||
437 | <para> | ||
438 | Again, <xref linkend="example"/> shows how to use this call back | ||
439 | function. | ||
440 | </para> | ||
441 | </sect1> | ||
442 | |||
443 | |||
444 | |||
445 | |||
446 | <sect1 id="usingdata"> | ||
447 | <title>A single call back for many files</title> | ||
448 | |||
449 | <para> | ||
450 | When a large number of almost identical files is used, it's | ||
451 | quite inconvenient to use a separate call back function for | ||
452 | each file. A better approach is to have a single call back | ||
453 | function that distinguishes between the files by using the | ||
454 | <structfield>data</structfield> field in <structname>struct | ||
455 | proc_dir_entry</structname>. First of all, the | ||
456 | <structfield>data</structfield> field has to be initialised: | ||
457 | </para> | ||
458 | |||
459 | <programlisting> | ||
460 | struct proc_dir_entry* entry; | ||
461 | struct my_file_data *file_data; | ||
462 | |||
463 | file_data = kmalloc(sizeof(struct my_file_data), GFP_KERNEL); | ||
464 | entry->data = file_data; | ||
465 | </programlisting> | ||
466 | |||
467 | <para> | ||
468 | The <structfield>data</structfield> field is a <type>void | ||
469 | *</type>, so it can be initialised with anything. | ||
470 | </para> | ||
471 | |||
472 | <para> | ||
473 | Now that the <structfield>data</structfield> field is set, the | ||
474 | <function>read_proc</function> and | ||
475 | <function>write_proc</function> can use it to distinguish | ||
476 | between files because they get it passed into their | ||
477 | <parameter>data</parameter> parameter: | ||
478 | </para> | ||
479 | |||
480 | <programlisting> | ||
481 | int foo_read_func(char *page, char **start, off_t off, | ||
482 | int count, int *eof, void *data) | ||
483 | { | ||
484 | int len; | ||
485 | |||
486 | if(data == file_data) { | ||
487 | /* special case for this file */ | ||
488 | } else { | ||
489 | /* normal processing */ | ||
490 | } | ||
491 | |||
492 | return len; | ||
493 | } | ||
494 | </programlisting> | ||
495 | |||
496 | <para> | ||
497 | Be sure to free the <structfield>data</structfield> data field | ||
498 | when removing the procfs entry. | ||
499 | </para> | ||
500 | </sect1> | ||
501 | </chapter> | ||
502 | |||
503 | |||
504 | |||
505 | |||
506 | <chapter id="tips"> | ||
507 | <title>Tips and tricks</title> | ||
508 | |||
509 | |||
510 | |||
511 | |||
512 | <sect1 id="convenience"> | ||
513 | <title>Convenience functions</title> | ||
514 | |||
515 | <funcsynopsis> | ||
516 | <funcprototype> | ||
517 | <funcdef>struct proc_dir_entry* <function>create_proc_read_entry</function></funcdef> | ||
518 | <paramdef>const char* <parameter>name</parameter></paramdef> | ||
519 | <paramdef>mode_t <parameter>mode</parameter></paramdef> | ||
520 | <paramdef>struct proc_dir_entry* <parameter>parent</parameter></paramdef> | ||
521 | <paramdef>read_proc_t* <parameter>read_proc</parameter></paramdef> | ||
522 | <paramdef>void* <parameter>data</parameter></paramdef> | ||
523 | </funcprototype> | ||
524 | </funcsynopsis> | ||
525 | |||
526 | <para> | ||
527 | This function creates a regular file in exactly the same way | ||
528 | as <function>create_proc_entry</function> from <xref | ||
529 | linkend="regularfile"/> does, but also allows to set the read | ||
530 | function <parameter>read_proc</parameter> in one call. This | ||
531 | function can set the <parameter>data</parameter> as well, like | ||
532 | explained in <xref linkend="usingdata"/>. | ||
533 | </para> | ||
534 | </sect1> | ||
535 | |||
536 | |||
537 | |||
538 | <sect1> | ||
539 | <title>Modules</title> | ||
540 | |||
541 | <para> | ||
542 | If procfs is being used from within a module, be sure to set | ||
543 | the <structfield>owner</structfield> field in the | ||
544 | <structname>struct proc_dir_entry</structname> to | ||
545 | <constant>THIS_MODULE</constant>. | ||
546 | </para> | ||
547 | |||
548 | <programlisting> | ||
549 | struct proc_dir_entry* entry; | ||
550 | |||
551 | entry->owner = THIS_MODULE; | ||
552 | </programlisting> | ||
553 | </sect1> | ||
554 | |||
555 | |||
556 | |||
557 | |||
558 | <sect1> | ||
559 | <title>Mode and ownership</title> | ||
560 | |||
561 | <para> | ||
562 | Sometimes it is useful to change the mode and/or ownership of | ||
563 | a procfs entry. Here is an example that shows how to achieve | ||
564 | that: | ||
565 | </para> | ||
566 | |||
567 | <programlisting> | ||
568 | struct proc_dir_entry* entry; | ||
569 | |||
570 | entry->mode = S_IWUSR |S_IRUSR | S_IRGRP | S_IROTH; | ||
571 | entry->uid = 0; | ||
572 | entry->gid = 100; | ||
573 | </programlisting> | ||
574 | |||
575 | </sect1> | ||
576 | </chapter> | ||
577 | |||
578 | |||
579 | |||
580 | |||
581 | <chapter id="example"> | ||
582 | <title>Example</title> | ||
583 | |||
584 | <!-- be careful with the example code: it shouldn't be wider than | ||
585 | approx. 60 columns, or otherwise it won't fit properly on a page | ||
586 | --> | ||
587 | |||
588 | &procfsexample; | ||
589 | |||
590 | </chapter> | ||
591 | </book> | ||
diff --git a/Documentation/DocBook/procfs_example.c b/Documentation/DocBook/procfs_example.c new file mode 100644 index 000000000000..7064084c1c5e --- /dev/null +++ b/Documentation/DocBook/procfs_example.c | |||
@@ -0,0 +1,224 @@ | |||
1 | /* | ||
2 | * procfs_example.c: an example proc interface | ||
3 | * | ||
4 | * Copyright (C) 2001, Erik Mouw (J.A.K.Mouw@its.tudelft.nl) | ||
5 | * | ||
6 | * This file accompanies the procfs-guide in the Linux kernel | ||
7 | * source. Its main use is to demonstrate the concepts and | ||
8 | * functions described in the guide. | ||
9 | * | ||
10 | * This software has been developed while working on the LART | ||
11 | * computing board (http://www.lart.tudelft.nl/), which is | ||
12 | * sponsored by the Mobile Multi-media Communications | ||
13 | * (http://www.mmc.tudelft.nl/) and Ubiquitous Communications | ||
14 | * (http://www.ubicom.tudelft.nl/) projects. | ||
15 | * | ||
16 | * The author can be reached at: | ||
17 | * | ||
18 | * Erik Mouw | ||
19 | * Information and Communication Theory Group | ||
20 | * Faculty of Information Technology and Systems | ||
21 | * Delft University of Technology | ||
22 | * P.O. Box 5031 | ||
23 | * 2600 GA Delft | ||
24 | * The Netherlands | ||
25 | * | ||
26 | * | ||
27 | * This program is free software; you can redistribute | ||
28 | * it and/or modify it under the terms of the GNU General | ||
29 | * Public License as published by the Free Software | ||
30 | * Foundation; either version 2 of the License, or (at your | ||
31 | * option) any later version. | ||
32 | * | ||
33 | * This program is distributed in the hope that it will be | ||
34 | * useful, but WITHOUT ANY WARRANTY; without even the implied | ||
35 | * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR | ||
36 | * PURPOSE. See the GNU General Public License for more | ||
37 | * details. | ||
38 | * | ||
39 | * You should have received a copy of the GNU General Public | ||
40 | * License along with this program; if not, write to the | ||
41 | * Free Software Foundation, Inc., 59 Temple Place, | ||
42 | * Suite 330, Boston, MA 02111-1307 USA | ||
43 | * | ||
44 | */ | ||
45 | |||
46 | #include <linux/module.h> | ||
47 | #include <linux/kernel.h> | ||
48 | #include <linux/init.h> | ||
49 | #include <linux/proc_fs.h> | ||
50 | #include <linux/jiffies.h> | ||
51 | #include <asm/uaccess.h> | ||
52 | |||
53 | |||
54 | #define MODULE_VERS "1.0" | ||
55 | #define MODULE_NAME "procfs_example" | ||
56 | |||
57 | #define FOOBAR_LEN 8 | ||
58 | |||
59 | struct fb_data_t { | ||
60 | char name[FOOBAR_LEN + 1]; | ||
61 | char value[FOOBAR_LEN + 1]; | ||
62 | }; | ||
63 | |||
64 | |||
65 | static struct proc_dir_entry *example_dir, *foo_file, | ||
66 | *bar_file, *jiffies_file, *symlink; | ||
67 | |||
68 | |||
69 | struct fb_data_t foo_data, bar_data; | ||
70 | |||
71 | |||
72 | static int proc_read_jiffies(char *page, char **start, | ||
73 | off_t off, int count, | ||
74 | int *eof, void *data) | ||
75 | { | ||
76 | int len; | ||
77 | |||
78 | len = sprintf(page, "jiffies = %ld\n", | ||
79 | jiffies); | ||
80 | |||
81 | return len; | ||
82 | } | ||
83 | |||
84 | |||
85 | static int proc_read_foobar(char *page, char **start, | ||
86 | off_t off, int count, | ||
87 | int *eof, void *data) | ||
88 | { | ||
89 | int len; | ||
90 | struct fb_data_t *fb_data = (struct fb_data_t *)data; | ||
91 | |||
92 | /* DON'T DO THAT - buffer overruns are bad */ | ||
93 | len = sprintf(page, "%s = '%s'\n", | ||
94 | fb_data->name, fb_data->value); | ||
95 | |||
96 | return len; | ||
97 | } | ||
98 | |||
99 | |||
100 | static int proc_write_foobar(struct file *file, | ||
101 | const char *buffer, | ||
102 | unsigned long count, | ||
103 | void *data) | ||
104 | { | ||
105 | int len; | ||
106 | struct fb_data_t *fb_data = (struct fb_data_t *)data; | ||
107 | |||
108 | if(count > FOOBAR_LEN) | ||
109 | len = FOOBAR_LEN; | ||
110 | else | ||
111 | len = count; | ||
112 | |||
113 | if(copy_from_user(fb_data->value, buffer, len)) | ||
114 | return -EFAULT; | ||
115 | |||
116 | fb_data->value[len] = '\0'; | ||
117 | |||
118 | return len; | ||
119 | } | ||
120 | |||
121 | |||
122 | static int __init init_procfs_example(void) | ||
123 | { | ||
124 | int rv = 0; | ||
125 | |||
126 | /* create directory */ | ||
127 | example_dir = proc_mkdir(MODULE_NAME, NULL); | ||
128 | if(example_dir == NULL) { | ||
129 | rv = -ENOMEM; | ||
130 | goto out; | ||
131 | } | ||
132 | |||
133 | example_dir->owner = THIS_MODULE; | ||
134 | |||
135 | /* create jiffies using convenience function */ | ||
136 | jiffies_file = create_proc_read_entry("jiffies", | ||
137 | 0444, example_dir, | ||
138 | proc_read_jiffies, | ||
139 | NULL); | ||
140 | if(jiffies_file == NULL) { | ||
141 | rv = -ENOMEM; | ||
142 | goto no_jiffies; | ||
143 | } | ||
144 | |||
145 | jiffies_file->owner = THIS_MODULE; | ||
146 | |||
147 | /* create foo and bar files using same callback | ||
148 | * functions | ||
149 | */ | ||
150 | foo_file = create_proc_entry("foo", 0644, example_dir); | ||
151 | if(foo_file == NULL) { | ||
152 | rv = -ENOMEM; | ||
153 | goto no_foo; | ||
154 | } | ||
155 | |||
156 | strcpy(foo_data.name, "foo"); | ||
157 | strcpy(foo_data.value, "foo"); | ||
158 | foo_file->data = &foo_data; | ||
159 | foo_file->read_proc = proc_read_foobar; | ||
160 | foo_file->write_proc = proc_write_foobar; | ||
161 | foo_file->owner = THIS_MODULE; | ||
162 | |||
163 | bar_file = create_proc_entry("bar", 0644, example_dir); | ||
164 | if(bar_file == NULL) { | ||
165 | rv = -ENOMEM; | ||
166 | goto no_bar; | ||
167 | } | ||
168 | |||
169 | strcpy(bar_data.name, "bar"); | ||
170 | strcpy(bar_data.value, "bar"); | ||
171 | bar_file->data = &bar_data; | ||
172 | bar_file->read_proc = proc_read_foobar; | ||
173 | bar_file->write_proc = proc_write_foobar; | ||
174 | bar_file->owner = THIS_MODULE; | ||
175 | |||
176 | /* create symlink */ | ||
177 | symlink = proc_symlink("jiffies_too", example_dir, | ||
178 | "jiffies"); | ||
179 | if(symlink == NULL) { | ||
180 | rv = -ENOMEM; | ||
181 | goto no_symlink; | ||
182 | } | ||
183 | |||
184 | symlink->owner = THIS_MODULE; | ||
185 | |||
186 | /* everything OK */ | ||
187 | printk(KERN_INFO "%s %s initialised\n", | ||
188 | MODULE_NAME, MODULE_VERS); | ||
189 | return 0; | ||
190 | |||
191 | no_symlink: | ||
192 | remove_proc_entry("tty", example_dir); | ||
193 | no_tty: | ||
194 | remove_proc_entry("bar", example_dir); | ||
195 | no_bar: | ||
196 | remove_proc_entry("foo", example_dir); | ||
197 | no_foo: | ||
198 | remove_proc_entry("jiffies", example_dir); | ||
199 | no_jiffies: | ||
200 | remove_proc_entry(MODULE_NAME, NULL); | ||
201 | out: | ||
202 | return rv; | ||
203 | } | ||
204 | |||
205 | |||
206 | static void __exit cleanup_procfs_example(void) | ||
207 | { | ||
208 | remove_proc_entry("jiffies_too", example_dir); | ||
209 | remove_proc_entry("tty", example_dir); | ||
210 | remove_proc_entry("bar", example_dir); | ||
211 | remove_proc_entry("foo", example_dir); | ||
212 | remove_proc_entry("jiffies", example_dir); | ||
213 | remove_proc_entry(MODULE_NAME, NULL); | ||
214 | |||
215 | printk(KERN_INFO "%s %s removed\n", | ||
216 | MODULE_NAME, MODULE_VERS); | ||
217 | } | ||
218 | |||
219 | |||
220 | module_init(init_procfs_example); | ||
221 | module_exit(cleanup_procfs_example); | ||
222 | |||
223 | MODULE_AUTHOR("Erik Mouw"); | ||
224 | MODULE_DESCRIPTION("procfs examples"); | ||
diff --git a/Documentation/DocBook/scsidrivers.tmpl b/Documentation/DocBook/scsidrivers.tmpl new file mode 100644 index 000000000000..d058e65daf19 --- /dev/null +++ b/Documentation/DocBook/scsidrivers.tmpl | |||
@@ -0,0 +1,193 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="scsidrivers"> | ||
6 | <bookinfo> | ||
7 | <title>SCSI Subsystem Interfaces</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Douglas</firstname> | ||
12 | <surname>Gilbert</surname> | ||
13 | <affiliation> | ||
14 | <address> | ||
15 | <email>dgilbert@interlog.com</email> | ||
16 | </address> | ||
17 | </affiliation> | ||
18 | </author> | ||
19 | </authorgroup> | ||
20 | <pubdate>2003-08-11</pubdate> | ||
21 | |||
22 | <copyright> | ||
23 | <year>2002</year> | ||
24 | <year>2003</year> | ||
25 | <holder>Douglas Gilbert</holder> | ||
26 | </copyright> | ||
27 | |||
28 | <legalnotice> | ||
29 | <para> | ||
30 | This documentation is free software; you can redistribute | ||
31 | it and/or modify it under the terms of the GNU General Public | ||
32 | License as published by the Free Software Foundation; either | ||
33 | version 2 of the License, or (at your option) any later | ||
34 | version. | ||
35 | </para> | ||
36 | |||
37 | <para> | ||
38 | This program is distributed in the hope that it will be | ||
39 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
40 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
41 | See the GNU General Public License for more details. | ||
42 | </para> | ||
43 | |||
44 | <para> | ||
45 | You should have received a copy of the GNU General Public | ||
46 | License along with this program; if not, write to the Free | ||
47 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
48 | MA 02111-1307 USA | ||
49 | </para> | ||
50 | |||
51 | <para> | ||
52 | For more details see the file COPYING in the source | ||
53 | distribution of Linux. | ||
54 | </para> | ||
55 | </legalnotice> | ||
56 | |||
57 | </bookinfo> | ||
58 | |||
59 | <toc></toc> | ||
60 | |||
61 | <chapter id="intro"> | ||
62 | <title>Introduction</title> | ||
63 | <para> | ||
64 | This document outlines the interface between the Linux scsi mid level | ||
65 | and lower level drivers. Lower level drivers are variously called HBA | ||
66 | (host bus adapter) drivers, host drivers (HD) or pseudo adapter drivers. | ||
67 | The latter alludes to the fact that a lower level driver may be a | ||
68 | bridge to another IO subsystem (and the "ide-scsi" driver is an example | ||
69 | of this). There can be many lower level drivers active in a running | ||
70 | system, but only one per hardware type. For example, the aic7xxx driver | ||
71 | controls adaptec controllers based on the 7xxx chip series. Most lower | ||
72 | level drivers can control one or more scsi hosts (a.k.a. scsi initiators). | ||
73 | </para> | ||
74 | <para> | ||
75 | This document can been found in an ASCII text file in the linux kernel | ||
76 | source: <filename>Documentation/scsi/scsi_mid_low_api.txt</filename> . | ||
77 | It currently hold a little more information than this document. The | ||
78 | <filename>drivers/scsi/hosts.h</filename> and <filename> | ||
79 | drivers/scsi/scsi.h</filename> headers contain descriptions of members | ||
80 | of important structures for the scsi subsystem. | ||
81 | </para> | ||
82 | </chapter> | ||
83 | |||
84 | <chapter id="driver-struct"> | ||
85 | <title>Driver structure</title> | ||
86 | <para> | ||
87 | Traditionally a lower level driver for the scsi subsystem has been | ||
88 | at least two files in the drivers/scsi directory. For example, a | ||
89 | driver called "xyz" has a header file "xyz.h" and a source file | ||
90 | "xyz.c". [Actually there is no good reason why this couldn't all | ||
91 | be in one file.] Some drivers that have been ported to several operating | ||
92 | systems (e.g. aic7xxx which has separate files for generic and | ||
93 | OS-specific code) have more than two files. Such drivers tend to have | ||
94 | their own directory under the drivers/scsi directory. | ||
95 | </para> | ||
96 | <para> | ||
97 | scsi_module.c is normally included at the end of a lower | ||
98 | level driver. For it to work a declaration like this is needed before | ||
99 | it is included: | ||
100 | <programlisting> | ||
101 | static Scsi_Host_Template driver_template = DRIVER_TEMPLATE; | ||
102 | /* DRIVER_TEMPLATE should contain pointers to supported interface | ||
103 | functions. Scsi_Host_Template is defined hosts.h */ | ||
104 | #include "scsi_module.c" | ||
105 | </programlisting> | ||
106 | </para> | ||
107 | <para> | ||
108 | The scsi_module.c assumes the name "driver_template" is appropriately | ||
109 | defined. It contains 2 functions: | ||
110 | <orderedlist> | ||
111 | <listitem><para> | ||
112 | init_this_scsi_driver() called during builtin and module driver | ||
113 | initialization: invokes mid level's scsi_register_host() | ||
114 | </para></listitem> | ||
115 | <listitem><para> | ||
116 | exit_this_scsi_driver() called during closedown: invokes | ||
117 | mid level's scsi_unregister_host() | ||
118 | </para></listitem> | ||
119 | </orderedlist> | ||
120 | </para> | ||
121 | <para> | ||
122 | When a new, lower level driver is being added to Linux, the following | ||
123 | files (all found in the drivers/scsi directory) will need some attention: | ||
124 | Makefile, Config.help and Config.in . It is probably best to look at what | ||
125 | an existing lower level driver does in this regard. | ||
126 | </para> | ||
127 | </chapter> | ||
128 | |||
129 | <chapter id="intfunctions"> | ||
130 | <title>Interface Functions</title> | ||
131 | !EDocumentation/scsi/scsi_mid_low_api.txt | ||
132 | </chapter> | ||
133 | |||
134 | <chapter id="locks"> | ||
135 | <title>Locks</title> | ||
136 | <para> | ||
137 | Each Scsi_Host instance has a spin_lock called Scsi_Host::default_lock | ||
138 | which is initialized in scsi_register() [found in hosts.c]. Within the | ||
139 | same function the Scsi_Host::host_lock pointer is initialized to point | ||
140 | at default_lock with the scsi_assign_lock() function. Thereafter | ||
141 | lock and unlock operations performed by the mid level use the | ||
142 | Scsi_Host::host_lock pointer. | ||
143 | </para> | ||
144 | <para> | ||
145 | Lower level drivers can override the use of Scsi_Host::default_lock by | ||
146 | using scsi_assign_lock(). The earliest opportunity to do this would | ||
147 | be in the detect() function after it has invoked scsi_register(). It | ||
148 | could be replaced by a coarser grain lock (e.g. per driver) or a | ||
149 | lock of equal granularity (i.e. per host). Using finer grain locks | ||
150 | (e.g. per scsi device) may be possible by juggling locks in | ||
151 | queuecommand(). | ||
152 | </para> | ||
153 | </chapter> | ||
154 | |||
155 | <chapter id="changes"> | ||
156 | <title>Changes since lk 2.4 series</title> | ||
157 | <para> | ||
158 | io_request_lock has been replaced by several finer grained locks. The lock | ||
159 | relevant to lower level drivers is Scsi_Host::host_lock and there is one | ||
160 | per scsi host. | ||
161 | </para> | ||
162 | <para> | ||
163 | The older error handling mechanism has been removed. This means the | ||
164 | lower level interface functions abort() and reset() have been removed. | ||
165 | </para> | ||
166 | <para> | ||
167 | In the 2.4 series the scsi subsystem configuration descriptions were | ||
168 | aggregated with the configuration descriptions from all other Linux | ||
169 | subsystems in the Documentation/Configure.help file. In the 2.5 series, | ||
170 | the scsi subsystem now has its own (much smaller) drivers/scsi/Config.help | ||
171 | file. | ||
172 | </para> | ||
173 | </chapter> | ||
174 | |||
175 | <chapter id="credits"> | ||
176 | <title>Credits</title> | ||
177 | <para> | ||
178 | The following people have contributed to this document: | ||
179 | <orderedlist> | ||
180 | <listitem><para> | ||
181 | Mike Anderson <email>andmike@us.ibm.com</email> | ||
182 | </para></listitem> | ||
183 | <listitem><para> | ||
184 | James Bottomley <email>James.Bottomley@steeleye.com</email> | ||
185 | </para></listitem> | ||
186 | <listitem><para> | ||
187 | Patrick Mansfield <email>patmans@us.ibm.com</email> | ||
188 | </para></listitem> | ||
189 | </orderedlist> | ||
190 | </para> | ||
191 | </chapter> | ||
192 | |||
193 | </book> | ||
diff --git a/Documentation/DocBook/sis900.tmpl b/Documentation/DocBook/sis900.tmpl new file mode 100644 index 000000000000..6c2cbac93c3f --- /dev/null +++ b/Documentation/DocBook/sis900.tmpl | |||
@@ -0,0 +1,585 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="SiS900Guide"> | ||
6 | |||
7 | <bookinfo> | ||
8 | |||
9 | <title>SiS 900/7016 Fast Ethernet Device Driver</title> | ||
10 | |||
11 | <authorgroup> | ||
12 | <author> | ||
13 | <firstname>Ollie</firstname> | ||
14 | <surname>Lho</surname> | ||
15 | </author> | ||
16 | |||
17 | <author> | ||
18 | <firstname>Lei Chun</firstname> | ||
19 | <surname>Chang</surname> | ||
20 | </author> | ||
21 | </authorgroup> | ||
22 | |||
23 | <edition>Document Revision: 0.3 for SiS900 driver v1.06 & v1.07</edition> | ||
24 | <pubdate>November 16, 2000</pubdate> | ||
25 | |||
26 | <copyright> | ||
27 | <year>1999</year> | ||
28 | <holder>Silicon Integrated System Corp.</holder> | ||
29 | </copyright> | ||
30 | |||
31 | <legalnotice> | ||
32 | <para> | ||
33 | This program is free software; you can redistribute it and/or modify | ||
34 | it under the terms of the GNU General Public License as published by | ||
35 | the Free Software Foundation; either version 2 of the License, or | ||
36 | (at your option) any later version. | ||
37 | </para> | ||
38 | |||
39 | <para> | ||
40 | This program is distributed in the hope that it will be useful, | ||
41 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
42 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
43 | GNU General Public License for more details. | ||
44 | </para> | ||
45 | |||
46 | <para> | ||
47 | You should have received a copy of the GNU General Public License | ||
48 | along with this program; if not, write to the Free Software | ||
49 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
50 | </para> | ||
51 | </legalnotice> | ||
52 | |||
53 | <abstract> | ||
54 | <para> | ||
55 | This document gives some information on installation and usage of SiS 900/7016 | ||
56 | device driver under Linux. | ||
57 | </para> | ||
58 | </abstract> | ||
59 | |||
60 | </bookinfo> | ||
61 | |||
62 | <toc></toc> | ||
63 | |||
64 | <chapter id="intro"> | ||
65 | <title>Introduction</title> | ||
66 | |||
67 | <para> | ||
68 | This document describes the revision 1.06 and 1.07 of SiS 900/7016 Fast Ethernet | ||
69 | device driver under Linux. The driver is developed by Silicon Integrated | ||
70 | System Corp. and distributed freely under the GNU General Public License (GPL). | ||
71 | The driver can be compiled as a loadable module and used under Linux kernel | ||
72 | version 2.2.x. (rev. 1.06) | ||
73 | With minimal changes, the driver can also be used under 2.3.x and 2.4.x kernel | ||
74 | (rev. 1.07), please see | ||
75 | <xref linkend="install"/>. If you are intended to | ||
76 | use the driver for earlier kernels, you are on your own. | ||
77 | </para> | ||
78 | |||
79 | <para> | ||
80 | The driver is tested with usual TCP/IP applications including | ||
81 | FTP, Telnet, Netscape etc. and is used constantly by the developers. | ||
82 | </para> | ||
83 | |||
84 | <para> | ||
85 | Please send all comments/fixes/questions to | ||
86 | <ulink url="mailto:lcchang@sis.com.tw">Lei-Chun Chang</ulink>. | ||
87 | </para> | ||
88 | </chapter> | ||
89 | |||
90 | <chapter id="changes"> | ||
91 | <title>Changes</title> | ||
92 | |||
93 | <para> | ||
94 | Changes made in Revision 1.07 | ||
95 | |||
96 | <orderedlist> | ||
97 | <listitem> | ||
98 | <para> | ||
99 | Separation of sis900.c and sis900.h in order to move most | ||
100 | constant definition to sis900.h (many of those constants were | ||
101 | corrected) | ||
102 | </para> | ||
103 | </listitem> | ||
104 | |||
105 | <listitem> | ||
106 | <para> | ||
107 | Clean up PCI detection, the pci-scan from Donald Becker were not used, | ||
108 | just simple pci_find_*. | ||
109 | </para> | ||
110 | </listitem> | ||
111 | |||
112 | <listitem> | ||
113 | <para> | ||
114 | MII detection is modified to support multiple mii transceiver. | ||
115 | </para> | ||
116 | </listitem> | ||
117 | |||
118 | <listitem> | ||
119 | <para> | ||
120 | Bugs in read_eeprom, mdio_* were removed. | ||
121 | </para> | ||
122 | </listitem> | ||
123 | |||
124 | <listitem> | ||
125 | <para> | ||
126 | Lot of sis900 irrelevant comments were removed/changed and | ||
127 | more comments were added to reflect the real situation. | ||
128 | </para> | ||
129 | </listitem> | ||
130 | |||
131 | <listitem> | ||
132 | <para> | ||
133 | Clean up of physical/virtual address space mess in buffer | ||
134 | descriptors. | ||
135 | </para> | ||
136 | </listitem> | ||
137 | |||
138 | <listitem> | ||
139 | <para> | ||
140 | Better transmit/receive error handling. | ||
141 | </para> | ||
142 | </listitem> | ||
143 | |||
144 | <listitem> | ||
145 | <para> | ||
146 | The driver now uses zero-copy single buffer management | ||
147 | scheme to improve performance. | ||
148 | </para> | ||
149 | </listitem> | ||
150 | |||
151 | <listitem> | ||
152 | <para> | ||
153 | Names of variables were changed to be more consistent. | ||
154 | </para> | ||
155 | </listitem> | ||
156 | |||
157 | <listitem> | ||
158 | <para> | ||
159 | Clean up of auo-negotiation and timer code. | ||
160 | </para> | ||
161 | </listitem> | ||
162 | |||
163 | <listitem> | ||
164 | <para> | ||
165 | Automatic detection and change of PHY on the fly. | ||
166 | </para> | ||
167 | </listitem> | ||
168 | |||
169 | <listitem> | ||
170 | <para> | ||
171 | Bug in mac probing fixed. | ||
172 | </para> | ||
173 | </listitem> | ||
174 | |||
175 | <listitem> | ||
176 | <para> | ||
177 | Fix 630E equalier problem by modifying the equalizer workaround rule. | ||
178 | </para> | ||
179 | </listitem> | ||
180 | |||
181 | <listitem> | ||
182 | <para> | ||
183 | Support for ICS1893 10/100 Interated PHYceiver. | ||
184 | </para> | ||
185 | </listitem> | ||
186 | |||
187 | <listitem> | ||
188 | <para> | ||
189 | Support for media select by ifconfig. | ||
190 | </para> | ||
191 | </listitem> | ||
192 | |||
193 | <listitem> | ||
194 | <para> | ||
195 | Added kernel-doc extratable documentation. | ||
196 | </para> | ||
197 | </listitem> | ||
198 | |||
199 | </orderedlist> | ||
200 | </para> | ||
201 | </chapter> | ||
202 | |||
203 | <chapter id="tested"> | ||
204 | <title>Tested Environment</title> | ||
205 | |||
206 | <para> | ||
207 | This driver is developed on the following hardware | ||
208 | |||
209 | <itemizedlist> | ||
210 | <listitem> | ||
211 | |||
212 | <para> | ||
213 | Intel Celeron 500 with SiS 630 (rev 02) chipset | ||
214 | </para> | ||
215 | </listitem> | ||
216 | <listitem> | ||
217 | |||
218 | <para> | ||
219 | SiS 900 (rev 01) and SiS 7016/7014 Fast Ethernet Card | ||
220 | </para> | ||
221 | </listitem> | ||
222 | |||
223 | </itemizedlist> | ||
224 | |||
225 | and tested with these software environments | ||
226 | |||
227 | <itemizedlist> | ||
228 | <listitem> | ||
229 | |||
230 | <para> | ||
231 | Red Hat Linux version 6.2 | ||
232 | </para> | ||
233 | </listitem> | ||
234 | <listitem> | ||
235 | |||
236 | <para> | ||
237 | Linux kernel version 2.4.0 | ||
238 | </para> | ||
239 | </listitem> | ||
240 | <listitem> | ||
241 | |||
242 | <para> | ||
243 | Netscape version 4.6 | ||
244 | </para> | ||
245 | </listitem> | ||
246 | <listitem> | ||
247 | |||
248 | <para> | ||
249 | NcFTP 3.0.0 beta 18 | ||
250 | </para> | ||
251 | </listitem> | ||
252 | <listitem> | ||
253 | |||
254 | <para> | ||
255 | Samba version 2.0.3 | ||
256 | </para> | ||
257 | </listitem> | ||
258 | |||
259 | </itemizedlist> | ||
260 | |||
261 | </para> | ||
262 | |||
263 | </chapter> | ||
264 | |||
265 | <chapter id="files"> | ||
266 | <title>Files in This Package</title> | ||
267 | |||
268 | <para> | ||
269 | In the package you can find these files: | ||
270 | </para> | ||
271 | |||
272 | <para> | ||
273 | <variablelist> | ||
274 | |||
275 | <varlistentry> | ||
276 | <term>sis900.c</term> | ||
277 | <listitem> | ||
278 | <para> | ||
279 | Driver source file in C | ||
280 | </para> | ||
281 | </listitem> | ||
282 | </varlistentry> | ||
283 | |||
284 | <varlistentry> | ||
285 | <term>sis900.h</term> | ||
286 | <listitem> | ||
287 | <para> | ||
288 | Header file for sis900.c | ||
289 | </para> | ||
290 | </listitem> | ||
291 | </varlistentry> | ||
292 | |||
293 | <varlistentry> | ||
294 | <term>sis900.sgml</term> | ||
295 | <listitem> | ||
296 | <para> | ||
297 | DocBook SGML source of the document | ||
298 | </para> | ||
299 | </listitem> | ||
300 | </varlistentry> | ||
301 | |||
302 | <varlistentry> | ||
303 | <term>sis900.txt</term> | ||
304 | <listitem> | ||
305 | <para> | ||
306 | Driver document in plain text | ||
307 | </para> | ||
308 | </listitem> | ||
309 | </varlistentry> | ||
310 | |||
311 | </variablelist> | ||
312 | </para> | ||
313 | </chapter> | ||
314 | |||
315 | <chapter id="install"> | ||
316 | <title>Installation</title> | ||
317 | |||
318 | <para> | ||
319 | Silicon Integrated System Corp. is cooperating closely with core Linux Kernel | ||
320 | developers. The revisions of SiS 900 driver are distributed by the usuall channels | ||
321 | for kernel tar files and patches. Those kernel tar files for official kernel and | ||
322 | patches for kernel pre-release can be download at | ||
323 | <ulink url="http://ftp.kernel.org/pub/linux/kernel/">official kernel ftp site</ulink> | ||
324 | and its mirrors. | ||
325 | The 1.06 revision can be found in kernel version later than 2.3.15 and pre-2.2.14, | ||
326 | and 1.07 revision can be found in kernel version 2.4.0. | ||
327 | If you have no prior experience in networking under Linux, please read | ||
328 | <ulink url="http://www.tldp.org/">Ethernet HOWTO</ulink> and | ||
329 | <ulink url="http://www.tldp.org/">Networking HOWTO</ulink> available from | ||
330 | Linux Documentation Project (LDP). | ||
331 | </para> | ||
332 | |||
333 | <para> | ||
334 | The driver is bundled in release later than 2.2.11 and 2.3.15 so this | ||
335 | is the most easy case. | ||
336 | Be sure you have the appropriate packages for compiling kernel source. | ||
337 | Those packages are listed in Document/Changes in kernel source | ||
338 | distribution. If you have to install the driver other than those bundled | ||
339 | in kernel release, you should have your driver file | ||
340 | <filename>sis900.c</filename> and <filename>sis900.h</filename> | ||
341 | copied into <filename class="directory">/usr/src/linux/drivers/net/</filename> first. | ||
342 | There are two alternative ways to install the driver | ||
343 | </para> | ||
344 | |||
345 | <sect1> | ||
346 | <title>Building the driver as loadable module</title> | ||
347 | |||
348 | <para> | ||
349 | To build the driver as a loadable kernel module you have to reconfigure | ||
350 | the kernel to activate network support by | ||
351 | </para> | ||
352 | |||
353 | <para><screen> | ||
354 | make menuconfig | ||
355 | </screen></para> | ||
356 | |||
357 | <para> | ||
358 | Choose <quote>Loadable module support ---></quote>, | ||
359 | then select <quote>Enable loadable module support</quote>. | ||
360 | </para> | ||
361 | |||
362 | <para> | ||
363 | Choose <quote>Network Device Support ---></quote>, select | ||
364 | <quote>Ethernet (10 or 100Mbit)</quote>. | ||
365 | Then select <quote>EISA, VLB, PCI and on board controllers</quote>, | ||
366 | and choose <quote>SiS 900/7016 PCI Fast Ethernet Adapter support</quote> | ||
367 | to <quote>M</quote>. | ||
368 | </para> | ||
369 | |||
370 | <para> | ||
371 | After reconfiguring the kernel, you can make the driver module by | ||
372 | </para> | ||
373 | |||
374 | <para><screen> | ||
375 | make modules | ||
376 | </screen></para> | ||
377 | |||
378 | <para> | ||
379 | The driver should be compiled with no errors. After compiling the driver, | ||
380 | the driver can be installed to proper place by | ||
381 | </para> | ||
382 | |||
383 | <para><screen> | ||
384 | make modules_install | ||
385 | </screen></para> | ||
386 | |||
387 | <para> | ||
388 | Load the driver into kernel by | ||
389 | </para> | ||
390 | |||
391 | <para><screen> | ||
392 | insmod sis900 | ||
393 | </screen></para> | ||
394 | |||
395 | <para> | ||
396 | When loading the driver into memory, some information message can be view by | ||
397 | </para> | ||
398 | |||
399 | <para> | ||
400 | <screen> | ||
401 | dmesg | ||
402 | </screen> | ||
403 | |||
404 | or | ||
405 | |||
406 | <screen> | ||
407 | cat /var/log/message | ||
408 | </screen> | ||
409 | </para> | ||
410 | |||
411 | <para> | ||
412 | If the driver is loaded properly you will have messages similar to this: | ||
413 | </para> | ||
414 | |||
415 | <para><screen> | ||
416 | sis900.c: v1.07.06 11/07/2000 | ||
417 | eth0: SiS 900 PCI Fast Ethernet at 0xd000, IRQ 10, 00:00:e8:83:7f:a4. | ||
418 | eth0: SiS 900 Internal MII PHY transceiver found at address 1. | ||
419 | eth0: Using SiS 900 Internal MII PHY as default | ||
420 | </screen></para> | ||
421 | |||
422 | <para> | ||
423 | showing the version of the driver and the results of probing routine. | ||
424 | </para> | ||
425 | |||
426 | <para> | ||
427 | Once the driver is loaded, network can be brought up by | ||
428 | </para> | ||
429 | |||
430 | <para><screen> | ||
431 | /sbin/ifconfig eth0 IPADDR broadcast BROADCAST netmask NETMASK media TYPE | ||
432 | </screen></para> | ||
433 | |||
434 | <para> | ||
435 | where IPADDR, BROADCAST, NETMASK are your IP address, broadcast address and | ||
436 | netmask respectively. TYPE is used to set medium type used by the device. | ||
437 | Typical values are "10baseT"(twisted-pair 10Mbps Ethernet) or "100baseT" | ||
438 | (twisted-pair 100Mbps Ethernet). For more information on how to configure | ||
439 | network interface, please refer to | ||
440 | <ulink url="http://www.tldp.org/">Networking HOWTO</ulink>. | ||
441 | </para> | ||
442 | |||
443 | <para> | ||
444 | The link status is also shown by kernel messages. For example, after the | ||
445 | network interface is activated, you may have the message: | ||
446 | </para> | ||
447 | |||
448 | <para><screen> | ||
449 | eth0: Media Link On 100mbps full-duplex | ||
450 | </screen></para> | ||
451 | |||
452 | <para> | ||
453 | If you try to unplug the twist pair (TP) cable you will get | ||
454 | </para> | ||
455 | |||
456 | <para><screen> | ||
457 | eth0: Media Link Off | ||
458 | </screen></para> | ||
459 | |||
460 | <para> | ||
461 | indicating that the link is failed. | ||
462 | </para> | ||
463 | </sect1> | ||
464 | |||
465 | <sect1> | ||
466 | <title>Building the driver into kernel</title> | ||
467 | |||
468 | <para> | ||
469 | If you want to make the driver into kernel, choose <quote>Y</quote> | ||
470 | rather than <quote>M</quote> on | ||
471 | <quote>SiS 900/7016 PCI Fast Ethernet Adapter support</quote> | ||
472 | when configuring the kernel. Build the kernel image in the usual way | ||
473 | </para> | ||
474 | |||
475 | <para><screen> | ||
476 | make clean | ||
477 | |||
478 | make bzlilo | ||
479 | </screen></para> | ||
480 | |||
481 | <para> | ||
482 | Next time the system reboot, you have the driver in memory. | ||
483 | </para> | ||
484 | |||
485 | </sect1> | ||
486 | </chapter> | ||
487 | |||
488 | <chapter id="problems"> | ||
489 | <title>Known Problems and Bugs</title> | ||
490 | |||
491 | <para> | ||
492 | There are some known problems and bugs. If you find any other bugs please | ||
493 | mail to <ulink url="mailto:lcchang@sis.com.tw">lcchang@sis.com.tw</ulink> | ||
494 | |||
495 | <orderedlist> | ||
496 | |||
497 | <listitem> | ||
498 | <para> | ||
499 | AM79C901 HomePNA PHY is not thoroughly tested, there may be some | ||
500 | bugs in the <quote>on the fly</quote> change of transceiver. | ||
501 | </para> | ||
502 | </listitem> | ||
503 | |||
504 | <listitem> | ||
505 | <para> | ||
506 | A bug is hidden somewhere in the receive buffer management code, | ||
507 | the bug causes NULL pointer reference in the kernel. This fault is | ||
508 | caught before bad things happen and reported with the message: | ||
509 | |||
510 | <computeroutput> | ||
511 | eth0: NULL pointer encountered in Rx ring, skipping | ||
512 | </computeroutput> | ||
513 | |||
514 | which can be viewed with <literal remap="tt">dmesg</literal> or | ||
515 | <literal remap="tt">cat /var/log/message</literal>. | ||
516 | </para> | ||
517 | </listitem> | ||
518 | |||
519 | <listitem> | ||
520 | <para> | ||
521 | The media type change from 10Mbps to 100Mbps twisted-pair ethernet | ||
522 | by ifconfig causes the media link down. | ||
523 | </para> | ||
524 | </listitem> | ||
525 | |||
526 | </orderedlist> | ||
527 | </para> | ||
528 | </chapter> | ||
529 | |||
530 | <chapter id="RHistory"> | ||
531 | <title>Revision History</title> | ||
532 | |||
533 | <para> | ||
534 | <itemizedlist> | ||
535 | |||
536 | <listitem> | ||
537 | <para> | ||
538 | November 13, 2000, Revision 1.07, seventh release, 630E problem fixed | ||
539 | and further clean up. | ||
540 | </para> | ||
541 | </listitem> | ||
542 | |||
543 | <listitem> | ||
544 | <para> | ||
545 | November 4, 1999, Revision 1.06, Second release, lots of clean up | ||
546 | and optimization. | ||
547 | </para> | ||
548 | </listitem> | ||
549 | |||
550 | <listitem> | ||
551 | <para> | ||
552 | August 8, 1999, Revision 1.05, Initial Public Release | ||
553 | </para> | ||
554 | </listitem> | ||
555 | |||
556 | </itemizedlist> | ||
557 | </para> | ||
558 | </chapter> | ||
559 | |||
560 | <chapter id="acknowledgements"> | ||
561 | <title>Acknowledgements</title> | ||
562 | |||
563 | <para> | ||
564 | This driver was originally derived form | ||
565 | <ulink url="mailto:becker@cesdis1.gsfc.nasa.gov">Donald Becker</ulink>'s | ||
566 | <ulink url="ftp://cesdis.gsfc.nasa.gov/pub/linux/drivers/kern-2.3/pci-skeleton.c" | ||
567 | >pci-skeleton</ulink> and | ||
568 | <ulink url="ftp://cesdis.gsfc.nasa.gov/pub/linux/drivers/kern-2.3/rtl8139.c" | ||
569 | >rtl8139</ulink> drivers. Donald also provided various suggestion | ||
570 | regarded with improvements made in revision 1.06. | ||
571 | </para> | ||
572 | |||
573 | <para> | ||
574 | The 1.05 revision was created by | ||
575 | <ulink url="mailto:cmhuang@sis.com.tw">Jim Huang</ulink>, AMD 79c901 | ||
576 | support was added by <ulink url="mailto:lcs@sis.com.tw">Chin-Shan Li</ulink>. | ||
577 | </para> | ||
578 | </chapter> | ||
579 | |||
580 | <chapter id="functions"> | ||
581 | <title>List of Functions</title> | ||
582 | !Idrivers/net/sis900.c | ||
583 | </chapter> | ||
584 | |||
585 | </book> | ||
diff --git a/Documentation/DocBook/tulip-user.tmpl b/Documentation/DocBook/tulip-user.tmpl new file mode 100644 index 000000000000..6520d7a1b132 --- /dev/null +++ b/Documentation/DocBook/tulip-user.tmpl | |||
@@ -0,0 +1,327 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="TulipUserGuide"> | ||
6 | <bookinfo> | ||
7 | <title>Tulip Driver User's Guide</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Jeff</firstname> | ||
12 | <surname>Garzik</surname> | ||
13 | <affiliation> | ||
14 | <address> | ||
15 | <email>jgarzik@pobox.com</email> | ||
16 | </address> | ||
17 | </affiliation> | ||
18 | </author> | ||
19 | </authorgroup> | ||
20 | |||
21 | <copyright> | ||
22 | <year>2001</year> | ||
23 | <holder>Jeff Garzik</holder> | ||
24 | </copyright> | ||
25 | |||
26 | <legalnotice> | ||
27 | <para> | ||
28 | This documentation is free software; you can redistribute | ||
29 | it and/or modify it under the terms of the GNU General Public | ||
30 | License as published by the Free Software Foundation; either | ||
31 | version 2 of the License, or (at your option) any later | ||
32 | version. | ||
33 | </para> | ||
34 | |||
35 | <para> | ||
36 | This program is distributed in the hope that it will be | ||
37 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
38 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
39 | See the GNU General Public License for more details. | ||
40 | </para> | ||
41 | |||
42 | <para> | ||
43 | You should have received a copy of the GNU General Public | ||
44 | License along with this program; if not, write to the Free | ||
45 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
46 | MA 02111-1307 USA | ||
47 | </para> | ||
48 | |||
49 | <para> | ||
50 | For more details see the file COPYING in the source | ||
51 | distribution of Linux. | ||
52 | </para> | ||
53 | </legalnotice> | ||
54 | </bookinfo> | ||
55 | |||
56 | <toc></toc> | ||
57 | |||
58 | <chapter id="intro"> | ||
59 | <title>Introduction</title> | ||
60 | <para> | ||
61 | The Tulip Ethernet Card Driver | ||
62 | is maintained by Jeff Garzik (<email>jgarzik@pobox.com</email>). | ||
63 | </para> | ||
64 | |||
65 | <para> | ||
66 | The Tulip driver was developed by Donald Becker and changed by | ||
67 | Jeff Garzik, Takashi Manabe and a cast of thousands. | ||
68 | </para> | ||
69 | |||
70 | <para> | ||
71 | For 2.4.x and later kernels, the Linux Tulip driver is available at | ||
72 | <ulink url="http://sourceforge.net/projects/tulip/">http://sourceforge.net/projects/tulip/</ulink> | ||
73 | </para> | ||
74 | |||
75 | <para> | ||
76 | This driver is for the Digital "Tulip" Ethernet adapter interface. | ||
77 | It should work with most DEC 21*4*-based chips/ethercards, as well as | ||
78 | with work-alike chips from Lite-On (PNIC) and Macronix (MXIC) and ASIX. | ||
79 | </para> | ||
80 | |||
81 | <para> | ||
82 | The original author may be reached as becker@scyld.com, or C/O | ||
83 | Scyld Computing Corporation, | ||
84 | 410 Severn Ave., Suite 210, | ||
85 | Annapolis MD 21403 | ||
86 | </para> | ||
87 | |||
88 | <para> | ||
89 | Additional information on Donald Becker's tulip.c | ||
90 | is available at <ulink url="http://www.scyld.com/network/tulip.html">http://www.scyld.com/network/tulip.html</ulink> | ||
91 | </para> | ||
92 | |||
93 | </chapter> | ||
94 | |||
95 | <chapter id="drvr-compat"> | ||
96 | <title>Driver Compatibility</title> | ||
97 | |||
98 | <para> | ||
99 | This device driver is designed for the DECchip "Tulip", Digital's | ||
100 | single-chip ethernet controllers for PCI (now owned by Intel). | ||
101 | Supported members of the family | ||
102 | are the 21040, 21041, 21140, 21140A, 21142, and 21143. Similar work-alike | ||
103 | chips from Lite-On, Macronics, ASIX, Compex and other listed below are also | ||
104 | supported. | ||
105 | </para> | ||
106 | |||
107 | <para> | ||
108 | These chips are used on at least 140 unique PCI board designs. The great | ||
109 | number of chips and board designs supported is the reason for the | ||
110 | driver size and complexity. Almost of the increasing complexity is in the | ||
111 | board configuration and media selection code. There is very little | ||
112 | increasing in the operational critical path length. | ||
113 | </para> | ||
114 | </chapter> | ||
115 | |||
116 | <chapter id="board-settings"> | ||
117 | <title>Board-specific Settings</title> | ||
118 | |||
119 | <para> | ||
120 | PCI bus devices are configured by the system at boot time, so no jumpers | ||
121 | need to be set on the board. The system BIOS preferably should assign the | ||
122 | PCI INTA signal to an otherwise unused system IRQ line. | ||
123 | </para> | ||
124 | |||
125 | <para> | ||
126 | Some boards have EEPROMs tables with default media entry. The factory default | ||
127 | is usually "autoselect". This should only be overridden when using | ||
128 | transceiver connections without link beat e.g. 10base2 or AUI, or (rarely!) | ||
129 | for forcing full-duplex when used with old link partners that do not do | ||
130 | autonegotiation. | ||
131 | </para> | ||
132 | </chapter> | ||
133 | |||
134 | <chapter id="driver-operation"> | ||
135 | <title>Driver Operation</title> | ||
136 | |||
137 | <sect1><title>Ring buffers</title> | ||
138 | |||
139 | <para> | ||
140 | The Tulip can use either ring buffers or lists of Tx and Rx descriptors. | ||
141 | This driver uses statically allocated rings of Rx and Tx descriptors, set at | ||
142 | compile time by RX/TX_RING_SIZE. This version of the driver allocates skbuffs | ||
143 | for the Rx ring buffers at open() time and passes the skb->data field to the | ||
144 | Tulip as receive data buffers. When an incoming frame is less than | ||
145 | RX_COPYBREAK bytes long, a fresh skbuff is allocated and the frame is | ||
146 | copied to the new skbuff. When the incoming frame is larger, the skbuff is | ||
147 | passed directly up the protocol stack and replaced by a newly allocated | ||
148 | skbuff. | ||
149 | </para> | ||
150 | |||
151 | <para> | ||
152 | The RX_COPYBREAK value is chosen to trade-off the memory wasted by | ||
153 | using a full-sized skbuff for small frames vs. the copying costs of larger | ||
154 | frames. For small frames the copying cost is negligible (esp. considering | ||
155 | that we are pre-loading the cache with immediately useful header | ||
156 | information). For large frames the copying cost is non-trivial, and the | ||
157 | larger copy might flush the cache of useful data. A subtle aspect of this | ||
158 | choice is that the Tulip only receives into longword aligned buffers, thus | ||
159 | the IP header at offset 14 isn't longword aligned for further processing. | ||
160 | Copied frames are put into the new skbuff at an offset of "+2", thus copying | ||
161 | has the beneficial effect of aligning the IP header and preloading the | ||
162 | cache. | ||
163 | </para> | ||
164 | |||
165 | </sect1> | ||
166 | |||
167 | <sect1><title>Synchronization</title> | ||
168 | <para> | ||
169 | The driver runs as two independent, single-threaded flows of control. One | ||
170 | is the send-packet routine, which enforces single-threaded use by the | ||
171 | dev->tbusy flag. The other thread is the interrupt handler, which is single | ||
172 | threaded by the hardware and other software. | ||
173 | </para> | ||
174 | |||
175 | <para> | ||
176 | The send packet thread has partial control over the Tx ring and 'dev->tbusy' | ||
177 | flag. It sets the tbusy flag whenever it's queuing a Tx packet. If the next | ||
178 | queue slot is empty, it clears the tbusy flag when finished otherwise it sets | ||
179 | the 'tp->tx_full' flag. | ||
180 | </para> | ||
181 | |||
182 | <para> | ||
183 | The interrupt handler has exclusive control over the Rx ring and records stats | ||
184 | from the Tx ring. (The Tx-done interrupt can't be selectively turned off, so | ||
185 | we can't avoid the interrupt overhead by having the Tx routine reap the Tx | ||
186 | stats.) After reaping the stats, it marks the queue entry as empty by setting | ||
187 | the 'base' to zero. Iff the 'tp->tx_full' flag is set, it clears both the | ||
188 | tx_full and tbusy flags. | ||
189 | </para> | ||
190 | |||
191 | </sect1> | ||
192 | |||
193 | </chapter> | ||
194 | |||
195 | <chapter id="errata"> | ||
196 | <title>Errata</title> | ||
197 | |||
198 | <para> | ||
199 | The old DEC databooks were light on details. | ||
200 | The 21040 databook claims that CSR13, CSR14, and CSR15 should each be the last | ||
201 | register of the set CSR12-15 written. Hmmm, now how is that possible? | ||
202 | </para> | ||
203 | |||
204 | <para> | ||
205 | The DEC SROM format is very badly designed not precisely defined, leading to | ||
206 | part of the media selection junkheap below. Some boards do not have EEPROM | ||
207 | media tables and need to be patched up. Worse, other boards use the DEC | ||
208 | design kit media table when it isn't correct for their board. | ||
209 | </para> | ||
210 | |||
211 | <para> | ||
212 | We cannot use MII interrupts because there is no defined GPIO pin to attach | ||
213 | them. The MII transceiver status is polled using an kernel timer. | ||
214 | </para> | ||
215 | </chapter> | ||
216 | |||
217 | <chapter id="changelog"> | ||
218 | <title>Driver Change History</title> | ||
219 | |||
220 | <sect1><title>Version 0.9.14 (February 20, 2001)</title> | ||
221 | <itemizedlist> | ||
222 | <listitem><para>Fix PNIC problems (Manfred Spraul)</para></listitem> | ||
223 | <listitem><para>Add new PCI id for Accton comet</para></listitem> | ||
224 | <listitem><para>Support Davicom tulips</para></listitem> | ||
225 | <listitem><para>Fix oops in eeprom parsing</para></listitem> | ||
226 | <listitem><para>Enable workarounds for early PCI chipsets</para></listitem> | ||
227 | <listitem><para>IA64, hppa csr0 support</para></listitem> | ||
228 | <listitem><para>Support media types 5, 6</para></listitem> | ||
229 | <listitem><para>Interpret a bit more of the 21142 SROM extended media type 3</para></listitem> | ||
230 | <listitem><para>Add missing delay in eeprom reading</para></listitem> | ||
231 | </itemizedlist> | ||
232 | </sect1> | ||
233 | |||
234 | <sect1><title>Version 0.9.11 (November 3, 2000)</title> | ||
235 | <itemizedlist> | ||
236 | <listitem><para>Eliminate extra bus accesses when sharing interrupts (prumpf)</para></listitem> | ||
237 | <listitem><para>Barrier following ownership descriptor bit flip (prumpf)</para></listitem> | ||
238 | <listitem><para>Endianness fixes for >14 addresses in setup frames (prumpf)</para></listitem> | ||
239 | <listitem><para>Report link beat to kernel/userspace via netif_carrier_*. (kuznet)</para></listitem> | ||
240 | <listitem><para>Better spinlocking in set_rx_mode.</para></listitem> | ||
241 | <listitem><para>Fix I/O resource request failure error messages (DaveM catch)</para></listitem> | ||
242 | <listitem><para>Handle DMA allocation failure.</para></listitem> | ||
243 | </itemizedlist> | ||
244 | </sect1> | ||
245 | |||
246 | <sect1><title>Version 0.9.10 (September 6, 2000)</title> | ||
247 | <itemizedlist> | ||
248 | <listitem><para>Simple interrupt mitigation (via jamal)</para></listitem> | ||
249 | <listitem><para>More PCI ids</para></listitem> | ||
250 | </itemizedlist> | ||
251 | </sect1> | ||
252 | |||
253 | <sect1><title>Version 0.9.9 (August 11, 2000)</title> | ||
254 | <itemizedlist> | ||
255 | <listitem><para>More PCI ids</para></listitem> | ||
256 | </itemizedlist> | ||
257 | </sect1> | ||
258 | |||
259 | <sect1><title>Version 0.9.8 (July 13, 2000)</title> | ||
260 | <itemizedlist> | ||
261 | <listitem><para>Correct signed/unsigned comparison for dummy frame index</para></listitem> | ||
262 | <listitem><para>Remove outdated references to struct enet_statistics</para></listitem> | ||
263 | </itemizedlist> | ||
264 | </sect1> | ||
265 | |||
266 | <sect1><title>Version 0.9.7 (June 17, 2000)</title> | ||
267 | <itemizedlist> | ||
268 | <listitem><para>Timer cleanups (Andrew Morton)</para></listitem> | ||
269 | <listitem><para>Alpha compile fix (somebody?)</para></listitem> | ||
270 | </itemizedlist> | ||
271 | </sect1> | ||
272 | |||
273 | <sect1><title>Version 0.9.6 (May 31, 2000)</title> | ||
274 | <itemizedlist> | ||
275 | <listitem><para>Revert 21143-related support flag patch</para></listitem> | ||
276 | <listitem><para>Add HPPA/media-table debugging printk</para></listitem> | ||
277 | </itemizedlist> | ||
278 | </sect1> | ||
279 | |||
280 | <sect1><title>Version 0.9.5 (May 30, 2000)</title> | ||
281 | <itemizedlist> | ||
282 | <listitem><para>HPPA support (willy@puffingroup)</para></listitem> | ||
283 | <listitem><para>CSR6 bits and tulip.h cleanup (Chris Smith)</para></listitem> | ||
284 | <listitem><para>Improve debugging messages a bit</para></listitem> | ||
285 | <listitem><para>Add delay after CSR13 write in t21142_start_nway</para></listitem> | ||
286 | <listitem><para>Remove unused ETHER_STATS code</para></listitem> | ||
287 | <listitem><para>Convert 'extern inline' to 'static inline' in tulip.h (Chris Smith)</para></listitem> | ||
288 | <listitem><para>Update DS21143 support flags in tulip_chip_info[]</para></listitem> | ||
289 | <listitem><para>Use spin_lock_irq, not _irqsave/restore, in tulip_start_xmit()</para></listitem> | ||
290 | <listitem><para>Add locking to set_rx_mode()</para></listitem> | ||
291 | <listitem><para>Fix race with chip setting DescOwned bit (Hal Murray)</para></listitem> | ||
292 | <listitem><para>Request 100% of PIO and MMIO resource space assigned to card</para></listitem> | ||
293 | <listitem><para>Remove error message from pci_enable_device failure</para></listitem> | ||
294 | </itemizedlist> | ||
295 | </sect1> | ||
296 | |||
297 | <sect1><title>Version 0.9.4.3 (April 14, 2000)</title> | ||
298 | <itemizedlist> | ||
299 | <listitem><para>mod_timer fix (Hal Murray)</para></listitem> | ||
300 | <listitem><para>PNIC2 resuscitation (Chris Smith)</para></listitem> | ||
301 | </itemizedlist> | ||
302 | </sect1> | ||
303 | |||
304 | <sect1><title>Version 0.9.4.2 (March 21, 2000)</title> | ||
305 | <itemizedlist> | ||
306 | <listitem><para>Fix 21041 CSR7, CSR13/14/15 handling</para></listitem> | ||
307 | <listitem><para>Merge some PCI ids from tulip 0.91x</para></listitem> | ||
308 | <listitem><para>Merge some HAS_xxx flags and flag settings from tulip 0.91x</para></listitem> | ||
309 | <listitem><para>asm/io.h fix (submitted by many) and cleanup</para></listitem> | ||
310 | <listitem><para>s/HAS_NWAY143/HAS_NWAY/</para></listitem> | ||
311 | <listitem><para>Cleanup 21041 mode reporting</para></listitem> | ||
312 | <listitem><para>Small code cleanups</para></listitem> | ||
313 | </itemizedlist> | ||
314 | </sect1> | ||
315 | |||
316 | <sect1><title>Version 0.9.4.1 (March 18, 2000)</title> | ||
317 | <itemizedlist> | ||
318 | <listitem><para>Finish PCI DMA conversion (davem)</para></listitem> | ||
319 | <listitem><para>Do not netif_start_queue() at end of tulip_tx_timeout() (kuznet)</para></listitem> | ||
320 | <listitem><para>PCI DMA fix (kuznet)</para></listitem> | ||
321 | <listitem><para>eeprom.c code cleanup</para></listitem> | ||
322 | <listitem><para>Remove Xircom Tulip crud</para></listitem> | ||
323 | </itemizedlist> | ||
324 | </sect1> | ||
325 | </chapter> | ||
326 | |||
327 | </book> | ||
diff --git a/Documentation/DocBook/usb.tmpl b/Documentation/DocBook/usb.tmpl new file mode 100644 index 000000000000..f3ef0bf435e9 --- /dev/null +++ b/Documentation/DocBook/usb.tmpl | |||
@@ -0,0 +1,979 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="Linux-USB-API"> | ||
6 | <bookinfo> | ||
7 | <title>The Linux-USB Host Side API</title> | ||
8 | |||
9 | <legalnotice> | ||
10 | <para> | ||
11 | This documentation is free software; you can redistribute | ||
12 | it and/or modify it under the terms of the GNU General Public | ||
13 | License as published by the Free Software Foundation; either | ||
14 | version 2 of the License, or (at your option) any later | ||
15 | version. | ||
16 | </para> | ||
17 | |||
18 | <para> | ||
19 | This program is distributed in the hope that it will be | ||
20 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
21 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
22 | See the GNU General Public License for more details. | ||
23 | </para> | ||
24 | |||
25 | <para> | ||
26 | You should have received a copy of the GNU General Public | ||
27 | License along with this program; if not, write to the Free | ||
28 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
29 | MA 02111-1307 USA | ||
30 | </para> | ||
31 | |||
32 | <para> | ||
33 | For more details see the file COPYING in the source | ||
34 | distribution of Linux. | ||
35 | </para> | ||
36 | </legalnotice> | ||
37 | </bookinfo> | ||
38 | |||
39 | <toc></toc> | ||
40 | |||
41 | <chapter id="intro"> | ||
42 | <title>Introduction to USB on Linux</title> | ||
43 | |||
44 | <para>A Universal Serial Bus (USB) is used to connect a host, | ||
45 | such as a PC or workstation, to a number of peripheral | ||
46 | devices. USB uses a tree structure, with the host at the | ||
47 | root (the system's master), hubs as interior nodes, and | ||
48 | peripheral devices as leaves (and slaves). | ||
49 | Modern PCs support several such trees of USB devices, usually | ||
50 | one USB 2.0 tree (480 Mbit/sec each) with | ||
51 | a few USB 1.1 trees (12 Mbit/sec each) that are used when you | ||
52 | connect a USB 1.1 device directly to the machine's "root hub". | ||
53 | </para> | ||
54 | |||
55 | <para>That master/slave asymmetry was designed in part for | ||
56 | ease of use. It is not physically possible to assemble | ||
57 | (legal) USB cables incorrectly: all upstream "to-the-host" | ||
58 | connectors are the rectangular type, matching the sockets on | ||
59 | root hubs, and the downstream type are the squarish type | ||
60 | (or they are built in to the peripheral). | ||
61 | Software doesn't need to deal with distributed autoconfiguration | ||
62 | since the pre-designated master node manages all that. | ||
63 | At the electrical level, bus protocol overhead is reduced by | ||
64 | eliminating arbitration and moving scheduling into host software. | ||
65 | </para> | ||
66 | |||
67 | <para>USB 1.0 was announced in January 1996, and was revised | ||
68 | as USB 1.1 (with improvements in hub specification and | ||
69 | support for interrupt-out transfers) in September 1998. | ||
70 | USB 2.0 was released in April 2000, including high speed | ||
71 | transfers and transaction translating hubs (used for USB 1.1 | ||
72 | and 1.0 backward compatibility). | ||
73 | </para> | ||
74 | |||
75 | <para>USB support was added to Linux early in the 2.2 kernel series | ||
76 | shortly before the 2.3 development forked off. Updates | ||
77 | from 2.3 were regularly folded back into 2.2 releases, bringing | ||
78 | new features such as <filename>/sbin/hotplug</filename> support, | ||
79 | more drivers, and more robustness. | ||
80 | The 2.5 kernel series continued such improvements, and also | ||
81 | worked on USB 2.0 support, | ||
82 | higher performance, | ||
83 | better consistency between host controller drivers, | ||
84 | API simplification (to make bugs less likely), | ||
85 | and providing internal "kerneldoc" documentation. | ||
86 | </para> | ||
87 | |||
88 | <para>Linux can run inside USB devices as well as on | ||
89 | the hosts that control the devices. | ||
90 | Because the Linux 2.x USB support evolved to support mass market | ||
91 | platforms such as Apple Macintosh or PC-compatible systems, | ||
92 | it didn't address design concerns for those types of USB systems. | ||
93 | So it can't be used inside mass-market PDAs, or other peripherals. | ||
94 | USB device drivers running inside those Linux peripherals | ||
95 | don't do the same things as the ones running inside hosts, | ||
96 | and so they've been given a different name: | ||
97 | they're called <emphasis>gadget drivers</emphasis>. | ||
98 | This document does not present gadget drivers. | ||
99 | </para> | ||
100 | |||
101 | </chapter> | ||
102 | |||
103 | <chapter id="host"> | ||
104 | <title>USB Host-Side API Model</title> | ||
105 | |||
106 | <para>Within the kernel, | ||
107 | host-side drivers for USB devices talk to the "usbcore" APIs. | ||
108 | There are two types of public "usbcore" APIs, targetted at two different | ||
109 | layers of USB driver. Those are | ||
110 | <emphasis>general purpose</emphasis> drivers, exposed through | ||
111 | driver frameworks such as block, character, or network devices; | ||
112 | and drivers that are <emphasis>part of the core</emphasis>, | ||
113 | which are involved in managing a USB bus. | ||
114 | Such core drivers include the <emphasis>hub</emphasis> driver, | ||
115 | which manages trees of USB devices, and several different kinds | ||
116 | of <emphasis>host controller driver (HCD)</emphasis>, | ||
117 | which control individual busses. | ||
118 | </para> | ||
119 | |||
120 | <para>The device model seen by USB drivers is relatively complex. | ||
121 | </para> | ||
122 | |||
123 | <itemizedlist> | ||
124 | |||
125 | <listitem><para>USB supports four kinds of data transfer | ||
126 | (control, bulk, interrupt, and isochronous). Two transfer | ||
127 | types use bandwidth as it's available (control and bulk), | ||
128 | while the other two types of transfer (interrupt and isochronous) | ||
129 | are scheduled to provide guaranteed bandwidth. | ||
130 | </para></listitem> | ||
131 | |||
132 | <listitem><para>The device description model includes one or more | ||
133 | "configurations" per device, only one of which is active at a time. | ||
134 | Devices that are capable of high speed operation must also support | ||
135 | full speed configurations, along with a way to ask about the | ||
136 | "other speed" configurations that might be used. | ||
137 | </para></listitem> | ||
138 | |||
139 | <listitem><para>Configurations have one or more "interface", each | ||
140 | of which may have "alternate settings". Interfaces may be | ||
141 | standardized by USB "Class" specifications, or may be specific to | ||
142 | a vendor or device.</para> | ||
143 | |||
144 | <para>USB device drivers actually bind to interfaces, not devices. | ||
145 | Think of them as "interface drivers", though you | ||
146 | may not see many devices where the distinction is important. | ||
147 | <emphasis>Most USB devices are simple, with only one configuration, | ||
148 | one interface, and one alternate setting.</emphasis> | ||
149 | </para></listitem> | ||
150 | |||
151 | <listitem><para>Interfaces have one or more "endpoints", each of | ||
152 | which supports one type and direction of data transfer such as | ||
153 | "bulk out" or "interrupt in". The entire configuration may have | ||
154 | up to sixteen endpoints in each direction, allocated as needed | ||
155 | among all the interfaces. | ||
156 | </para></listitem> | ||
157 | |||
158 | <listitem><para>Data transfer on USB is packetized; each endpoint | ||
159 | has a maximum packet size. | ||
160 | Drivers must often be aware of conventions such as flagging the end | ||
161 | of bulk transfers using "short" (including zero length) packets. | ||
162 | </para></listitem> | ||
163 | |||
164 | <listitem><para>The Linux USB API supports synchronous calls for | ||
165 | control and bulk messaging. | ||
166 | It also supports asynchnous calls for all kinds of data transfer, | ||
167 | using request structures called "URBs" (USB Request Blocks). | ||
168 | </para></listitem> | ||
169 | |||
170 | </itemizedlist> | ||
171 | |||
172 | <para>Accordingly, the USB Core API exposed to device drivers | ||
173 | covers quite a lot of territory. You'll probably need to consult | ||
174 | the USB 2.0 specification, available online from www.usb.org at | ||
175 | no cost, as well as class or device specifications. | ||
176 | </para> | ||
177 | |||
178 | <para>The only host-side drivers that actually touch hardware | ||
179 | (reading/writing registers, handling IRQs, and so on) are the HCDs. | ||
180 | In theory, all HCDs provide the same functionality through the same | ||
181 | API. In practice, that's becoming more true on the 2.5 kernels, | ||
182 | but there are still differences that crop up especially with | ||
183 | fault handling. Different controllers don't necessarily report | ||
184 | the same aspects of failures, and recovery from faults (including | ||
185 | software-induced ones like unlinking an URB) isn't yet fully | ||
186 | consistent. | ||
187 | Device driver authors should make a point of doing disconnect | ||
188 | testing (while the device is active) with each different host | ||
189 | controller driver, to make sure drivers don't have bugs of | ||
190 | their own as well as to make sure they aren't relying on some | ||
191 | HCD-specific behavior. | ||
192 | (You will need external USB 1.1 and/or | ||
193 | USB 2.0 hubs to perform all those tests.) | ||
194 | </para> | ||
195 | |||
196 | </chapter> | ||
197 | |||
198 | <chapter><title>USB-Standard Types</title> | ||
199 | |||
200 | <para>In <filename><linux/usb_ch9.h></filename> you will find | ||
201 | the USB data types defined in chapter 9 of the USB specification. | ||
202 | These data types are used throughout USB, and in APIs including | ||
203 | this host side API, gadget APIs, and usbfs. | ||
204 | </para> | ||
205 | |||
206 | !Iinclude/linux/usb_ch9.h | ||
207 | |||
208 | </chapter> | ||
209 | |||
210 | <chapter><title>Host-Side Data Types and Macros</title> | ||
211 | |||
212 | <para>The host side API exposes several layers to drivers, some of | ||
213 | which are more necessary than others. | ||
214 | These support lifecycle models for host side drivers | ||
215 | and devices, and support passing buffers through usbcore to | ||
216 | some HCD that performs the I/O for the device driver. | ||
217 | </para> | ||
218 | |||
219 | |||
220 | !Iinclude/linux/usb.h | ||
221 | |||
222 | </chapter> | ||
223 | |||
224 | <chapter><title>USB Core APIs</title> | ||
225 | |||
226 | <para>There are two basic I/O models in the USB API. | ||
227 | The most elemental one is asynchronous: drivers submit requests | ||
228 | in the form of an URB, and the URB's completion callback | ||
229 | handle the next step. | ||
230 | All USB transfer types support that model, although there | ||
231 | are special cases for control URBs (which always have setup | ||
232 | and status stages, but may not have a data stage) and | ||
233 | isochronous URBs (which allow large packets and include | ||
234 | per-packet fault reports). | ||
235 | Built on top of that is synchronous API support, where a | ||
236 | driver calls a routine that allocates one or more URBs, | ||
237 | submits them, and waits until they complete. | ||
238 | There are synchronous wrappers for single-buffer control | ||
239 | and bulk transfers (which are awkward to use in some | ||
240 | driver disconnect scenarios), and for scatterlist based | ||
241 | streaming i/o (bulk or interrupt). | ||
242 | </para> | ||
243 | |||
244 | <para>USB drivers need to provide buffers that can be | ||
245 | used for DMA, although they don't necessarily need to | ||
246 | provide the DMA mapping themselves. | ||
247 | There are APIs to use used when allocating DMA buffers, | ||
248 | which can prevent use of bounce buffers on some systems. | ||
249 | In some cases, drivers may be able to rely on 64bit DMA | ||
250 | to eliminate another kind of bounce buffer. | ||
251 | </para> | ||
252 | |||
253 | !Edrivers/usb/core/urb.c | ||
254 | !Edrivers/usb/core/message.c | ||
255 | !Edrivers/usb/core/file.c | ||
256 | !Edrivers/usb/core/usb.c | ||
257 | !Edrivers/usb/core/hub.c | ||
258 | </chapter> | ||
259 | |||
260 | <chapter><title>Host Controller APIs</title> | ||
261 | |||
262 | <para>These APIs are only for use by host controller drivers, | ||
263 | most of which implement standard register interfaces such as | ||
264 | EHCI, OHCI, or UHCI. | ||
265 | UHCI was one of the first interfaces, designed by Intel and | ||
266 | also used by VIA; it doesn't do much in hardware. | ||
267 | OHCI was designed later, to have the hardware do more work | ||
268 | (bigger transfers, tracking protocol state, and so on). | ||
269 | EHCI was designed with USB 2.0; its design has features that | ||
270 | resemble OHCI (hardware does much more work) as well as | ||
271 | UHCI (some parts of ISO support, TD list processing). | ||
272 | </para> | ||
273 | |||
274 | <para>There are host controllers other than the "big three", | ||
275 | although most PCI based controllers (and a few non-PCI based | ||
276 | ones) use one of those interfaces. | ||
277 | Not all host controllers use DMA; some use PIO, and there | ||
278 | is also a simulator. | ||
279 | </para> | ||
280 | |||
281 | <para>The same basic APIs are available to drivers for all | ||
282 | those controllers. | ||
283 | For historical reasons they are in two layers: | ||
284 | <structname>struct usb_bus</structname> is a rather thin | ||
285 | layer that became available in the 2.2 kernels, while | ||
286 | <structname>struct usb_hcd</structname> is a more featureful | ||
287 | layer (available in later 2.4 kernels and in 2.5) that | ||
288 | lets HCDs share common code, to shrink driver size | ||
289 | and significantly reduce hcd-specific behaviors. | ||
290 | </para> | ||
291 | |||
292 | !Edrivers/usb/core/hcd.c | ||
293 | !Edrivers/usb/core/hcd-pci.c | ||
294 | !Edrivers/usb/core/buffer.c | ||
295 | </chapter> | ||
296 | |||
297 | <chapter> | ||
298 | <title>The USB Filesystem (usbfs)</title> | ||
299 | |||
300 | <para>This chapter presents the Linux <emphasis>usbfs</emphasis>. | ||
301 | You may prefer to avoid writing new kernel code for your | ||
302 | USB driver; that's the problem that usbfs set out to solve. | ||
303 | User mode device drivers are usually packaged as applications | ||
304 | or libraries, and may use usbfs through some programming library | ||
305 | that wraps it. Such libraries include | ||
306 | <ulink url="http://libusb.sourceforge.net">libusb</ulink> | ||
307 | for C/C++, and | ||
308 | <ulink url="http://jUSB.sourceforge.net">jUSB</ulink> for Java. | ||
309 | </para> | ||
310 | |||
311 | <note><title>Unfinished</title> | ||
312 | <para>This particular documentation is incomplete, | ||
313 | especially with respect to the asynchronous mode. | ||
314 | As of kernel 2.5.66 the code and this (new) documentation | ||
315 | need to be cross-reviewed. | ||
316 | </para> | ||
317 | </note> | ||
318 | |||
319 | <para>Configure usbfs into Linux kernels by enabling the | ||
320 | <emphasis>USB filesystem</emphasis> option (CONFIG_USB_DEVICEFS), | ||
321 | and you get basic support for user mode USB device drivers. | ||
322 | Until relatively recently it was often (confusingly) called | ||
323 | <emphasis>usbdevfs</emphasis> although it wasn't solving what | ||
324 | <emphasis>devfs</emphasis> was. | ||
325 | Every USB device will appear in usbfs, regardless of whether or | ||
326 | not it has a kernel driver; but only devices with kernel drivers | ||
327 | show up in devfs. | ||
328 | </para> | ||
329 | |||
330 | <sect1> | ||
331 | <title>What files are in "usbfs"?</title> | ||
332 | |||
333 | <para>Conventionally mounted at | ||
334 | <filename>/proc/bus/usb</filename>, usbfs | ||
335 | features include: | ||
336 | <itemizedlist> | ||
337 | <listitem><para><filename>/proc/bus/usb/devices</filename> | ||
338 | ... a text file | ||
339 | showing each of the USB devices on known to the kernel, | ||
340 | and their configuration descriptors. | ||
341 | You can also poll() this to learn about new devices. | ||
342 | </para></listitem> | ||
343 | <listitem><para><filename>/proc/bus/usb/BBB/DDD</filename> | ||
344 | ... magic files | ||
345 | exposing the each device's configuration descriptors, and | ||
346 | supporting a series of ioctls for making device requests, | ||
347 | including I/O to devices. (Purely for access by programs.) | ||
348 | </para></listitem> | ||
349 | </itemizedlist> | ||
350 | </para> | ||
351 | |||
352 | <para> Each bus is given a number (BBB) based on when it was | ||
353 | enumerated; within each bus, each device is given a similar | ||
354 | number (DDD). | ||
355 | Those BBB/DDD paths are not "stable" identifiers; | ||
356 | expect them to change even if you always leave the devices | ||
357 | plugged in to the same hub port. | ||
358 | <emphasis>Don't even think of saving these in application | ||
359 | configuration files.</emphasis> | ||
360 | Stable identifiers are available, for user mode applications | ||
361 | that want to use them. HID and networking devices expose | ||
362 | these stable IDs, so that for example you can be sure that | ||
363 | you told the right UPS to power down its second server. | ||
364 | "usbfs" doesn't (yet) expose those IDs. | ||
365 | </para> | ||
366 | |||
367 | </sect1> | ||
368 | |||
369 | <sect1> | ||
370 | <title>Mounting and Access Control</title> | ||
371 | |||
372 | <para>There are a number of mount options for usbfs, which will | ||
373 | be of most interest to you if you need to override the default | ||
374 | access control policy. | ||
375 | That policy is that only root may read or write device files | ||
376 | (<filename>/proc/bus/BBB/DDD</filename>) although anyone may read | ||
377 | the <filename>devices</filename> | ||
378 | or <filename>drivers</filename> files. | ||
379 | I/O requests to the device also need the CAP_SYS_RAWIO capability, | ||
380 | </para> | ||
381 | |||
382 | <para>The significance of that is that by default, all user mode | ||
383 | device drivers need super-user privileges. | ||
384 | You can change modes or ownership in a driver setup | ||
385 | when the device hotplugs, or maye just start the | ||
386 | driver right then, as a privileged server (or some activity | ||
387 | within one). | ||
388 | That's the most secure approach for multi-user systems, | ||
389 | but for single user systems ("trusted" by that user) | ||
390 | it's more convenient just to grant everyone all access | ||
391 | (using the <emphasis>devmode=0666</emphasis> option) | ||
392 | so the driver can start whenever it's needed. | ||
393 | </para> | ||
394 | |||
395 | <para>The mount options for usbfs, usable in /etc/fstab or | ||
396 | in command line invocations of <emphasis>mount</emphasis>, are: | ||
397 | |||
398 | <variablelist> | ||
399 | <varlistentry> | ||
400 | <term><emphasis>busgid</emphasis>=NNNNN</term> | ||
401 | <listitem><para>Controls the GID used for the | ||
402 | /proc/bus/usb/BBB | ||
403 | directories. (Default: 0)</para></listitem></varlistentry> | ||
404 | <varlistentry><term><emphasis>busmode</emphasis>=MMM</term> | ||
405 | <listitem><para>Controls the file mode used for the | ||
406 | /proc/bus/usb/BBB | ||
407 | directories. (Default: 0555) | ||
408 | </para></listitem></varlistentry> | ||
409 | <varlistentry><term><emphasis>busuid</emphasis>=NNNNN</term> | ||
410 | <listitem><para>Controls the UID used for the | ||
411 | /proc/bus/usb/BBB | ||
412 | directories. (Default: 0)</para></listitem></varlistentry> | ||
413 | |||
414 | <varlistentry><term><emphasis>devgid</emphasis>=NNNNN</term> | ||
415 | <listitem><para>Controls the GID used for the | ||
416 | /proc/bus/usb/BBB/DDD | ||
417 | files. (Default: 0)</para></listitem></varlistentry> | ||
418 | <varlistentry><term><emphasis>devmode</emphasis>=MMM</term> | ||
419 | <listitem><para>Controls the file mode used for the | ||
420 | /proc/bus/usb/BBB/DDD | ||
421 | files. (Default: 0644)</para></listitem></varlistentry> | ||
422 | <varlistentry><term><emphasis>devuid</emphasis>=NNNNN</term> | ||
423 | <listitem><para>Controls the UID used for the | ||
424 | /proc/bus/usb/BBB/DDD | ||
425 | files. (Default: 0)</para></listitem></varlistentry> | ||
426 | |||
427 | <varlistentry><term><emphasis>listgid</emphasis>=NNNNN</term> | ||
428 | <listitem><para>Controls the GID used for the | ||
429 | /proc/bus/usb/devices and drivers files. | ||
430 | (Default: 0)</para></listitem></varlistentry> | ||
431 | <varlistentry><term><emphasis>listmode</emphasis>=MMM</term> | ||
432 | <listitem><para>Controls the file mode used for the | ||
433 | /proc/bus/usb/devices and drivers files. | ||
434 | (Default: 0444)</para></listitem></varlistentry> | ||
435 | <varlistentry><term><emphasis>listuid</emphasis>=NNNNN</term> | ||
436 | <listitem><para>Controls the UID used for the | ||
437 | /proc/bus/usb/devices and drivers files. | ||
438 | (Default: 0)</para></listitem></varlistentry> | ||
439 | </variablelist> | ||
440 | |||
441 | </para> | ||
442 | |||
443 | <para>Note that many Linux distributions hard-wire the mount options | ||
444 | for usbfs in their init scripts, such as | ||
445 | <filename>/etc/rc.d/rc.sysinit</filename>, | ||
446 | rather than making it easy to set this per-system | ||
447 | policy in <filename>/etc/fstab</filename>. | ||
448 | </para> | ||
449 | |||
450 | </sect1> | ||
451 | |||
452 | <sect1> | ||
453 | <title>/proc/bus/usb/devices</title> | ||
454 | |||
455 | <para>This file is handy for status viewing tools in user | ||
456 | mode, which can scan the text format and ignore most of it. | ||
457 | More detailed device status (including class and vendor | ||
458 | status) is available from device-specific files. | ||
459 | For information about the current format of this file, | ||
460 | see the | ||
461 | <filename>Documentation/usb/proc_usb_info.txt</filename> | ||
462 | file in your Linux kernel sources. | ||
463 | </para> | ||
464 | |||
465 | <para>Otherwise the main use for this file from programs | ||
466 | is to poll() it to get notifications of usb devices | ||
467 | as they're plugged or unplugged. | ||
468 | To see what changed, you'd need to read the file and | ||
469 | compare "before" and "after" contents, scan the filesystem, | ||
470 | or see its hotplug event. | ||
471 | </para> | ||
472 | |||
473 | </sect1> | ||
474 | |||
475 | <sect1> | ||
476 | <title>/proc/bus/usb/BBB/DDD</title> | ||
477 | |||
478 | <para>Use these files in one of these basic ways: | ||
479 | </para> | ||
480 | |||
481 | <para><emphasis>They can be read,</emphasis> | ||
482 | producing first the device descriptor | ||
483 | (18 bytes) and then the descriptors for the current configuration. | ||
484 | See the USB 2.0 spec for details about those binary data formats. | ||
485 | You'll need to convert most multibyte values from little endian | ||
486 | format to your native host byte order, although a few of the | ||
487 | fields in the device descriptor (both of the BCD-encoded fields, | ||
488 | and the vendor and product IDs) will be byteswapped for you. | ||
489 | Note that configuration descriptors include descriptors for | ||
490 | interfaces, altsettings, endpoints, and maybe additional | ||
491 | class descriptors. | ||
492 | </para> | ||
493 | |||
494 | <para><emphasis>Perform USB operations</emphasis> using | ||
495 | <emphasis>ioctl()</emphasis> requests to make endpoint I/O | ||
496 | requests (synchronously or asynchronously) or manage | ||
497 | the device. | ||
498 | These requests need the CAP_SYS_RAWIO capability, | ||
499 | as well as filesystem access permissions. | ||
500 | Only one ioctl request can be made on one of these | ||
501 | device files at a time. | ||
502 | This means that if you are synchronously reading an endpoint | ||
503 | from one thread, you won't be able to write to a different | ||
504 | endpoint from another thread until the read completes. | ||
505 | This works for <emphasis>half duplex</emphasis> protocols, | ||
506 | but otherwise you'd use asynchronous i/o requests. | ||
507 | </para> | ||
508 | |||
509 | </sect1> | ||
510 | |||
511 | |||
512 | <sect1> | ||
513 | <title>Life Cycle of User Mode Drivers</title> | ||
514 | |||
515 | <para>Such a driver first needs to find a device file | ||
516 | for a device it knows how to handle. | ||
517 | Maybe it was told about it because a | ||
518 | <filename>/sbin/hotplug</filename> event handling agent | ||
519 | chose that driver to handle the new device. | ||
520 | Or maybe it's an application that scans all the | ||
521 | /proc/bus/usb device files, and ignores most devices. | ||
522 | In either case, it should <function>read()</function> all | ||
523 | the descriptors from the device file, | ||
524 | and check them against what it knows how to handle. | ||
525 | It might just reject everything except a particular | ||
526 | vendor and product ID, or need a more complex policy. | ||
527 | </para> | ||
528 | |||
529 | <para>Never assume there will only be one such device | ||
530 | on the system at a time! | ||
531 | If your code can't handle more than one device at | ||
532 | a time, at least detect when there's more than one, and | ||
533 | have your users choose which device to use. | ||
534 | </para> | ||
535 | |||
536 | <para>Once your user mode driver knows what device to use, | ||
537 | it interacts with it in either of two styles. | ||
538 | The simple style is to make only control requests; some | ||
539 | devices don't need more complex interactions than those. | ||
540 | (An example might be software using vendor-specific control | ||
541 | requests for some initialization or configuration tasks, | ||
542 | with a kernel driver for the rest.) | ||
543 | </para> | ||
544 | |||
545 | <para>More likely, you need a more complex style driver: | ||
546 | one using non-control endpoints, reading or writing data | ||
547 | and claiming exclusive use of an interface. | ||
548 | <emphasis>Bulk</emphasis> transfers are easiest to use, | ||
549 | but only their sibling <emphasis>interrupt</emphasis> transfers | ||
550 | work with low speed devices. | ||
551 | Both interrupt and <emphasis>isochronous</emphasis> transfers | ||
552 | offer service guarantees because their bandwidth is reserved. | ||
553 | Such "periodic" transfers are awkward to use through usbfs, | ||
554 | unless you're using the asynchronous calls. However, interrupt | ||
555 | transfers can also be used in a synchronous "one shot" style. | ||
556 | </para> | ||
557 | |||
558 | <para>Your user-mode driver should never need to worry | ||
559 | about cleaning up request state when the device is | ||
560 | disconnected, although it should close its open file | ||
561 | descriptors as soon as it starts seeing the ENODEV | ||
562 | errors. | ||
563 | </para> | ||
564 | |||
565 | </sect1> | ||
566 | |||
567 | <sect1><title>The ioctl() Requests</title> | ||
568 | |||
569 | <para>To use these ioctls, you need to include the following | ||
570 | headers in your userspace program: | ||
571 | <programlisting>#include <linux/usb.h> | ||
572 | #include <linux/usbdevice_fs.h> | ||
573 | #include <asm/byteorder.h></programlisting> | ||
574 | The standard USB device model requests, from "Chapter 9" of | ||
575 | the USB 2.0 specification, are automatically included from | ||
576 | the <filename><linux/usb_ch9.h></filename> header. | ||
577 | </para> | ||
578 | |||
579 | <para>Unless noted otherwise, the ioctl requests | ||
580 | described here will | ||
581 | update the modification time on the usbfs file to which | ||
582 | they are applied (unless they fail). | ||
583 | A return of zero indicates success; otherwise, a | ||
584 | standard USB error code is returned. (These are | ||
585 | documented in | ||
586 | <filename>Documentation/usb/error-codes.txt</filename> | ||
587 | in your kernel sources.) | ||
588 | </para> | ||
589 | |||
590 | <para>Each of these files multiplexes access to several | ||
591 | I/O streams, one per endpoint. | ||
592 | Each device has one control endpoint (endpoint zero) | ||
593 | which supports a limited RPC style RPC access. | ||
594 | Devices are configured | ||
595 | by khubd (in the kernel) setting a device-wide | ||
596 | <emphasis>configuration</emphasis> that affects things | ||
597 | like power consumption and basic functionality. | ||
598 | The endpoints are part of USB <emphasis>interfaces</emphasis>, | ||
599 | which may have <emphasis>altsettings</emphasis> | ||
600 | affecting things like which endpoints are available. | ||
601 | Many devices only have a single configuration and interface, | ||
602 | so drivers for them will ignore configurations and altsettings. | ||
603 | </para> | ||
604 | |||
605 | |||
606 | <sect2> | ||
607 | <title>Management/Status Requests</title> | ||
608 | |||
609 | <para>A number of usbfs requests don't deal very directly | ||
610 | with device I/O. | ||
611 | They mostly relate to device management and status. | ||
612 | These are all synchronous requests. | ||
613 | </para> | ||
614 | |||
615 | <variablelist> | ||
616 | |||
617 | <varlistentry><term>USBDEVFS_CLAIMINTERFACE</term> | ||
618 | <listitem><para>This is used to force usbfs to | ||
619 | claim a specific interface, | ||
620 | which has not previously been claimed by usbfs or any other | ||
621 | kernel driver. | ||
622 | The ioctl parameter is an integer holding the number of | ||
623 | the interface (bInterfaceNumber from descriptor). | ||
624 | </para><para> | ||
625 | Note that if your driver doesn't claim an interface | ||
626 | before trying to use one of its endpoints, and no | ||
627 | other driver has bound to it, then the interface is | ||
628 | automatically claimed by usbfs. | ||
629 | </para><para> | ||
630 | This claim will be released by a RELEASEINTERFACE ioctl, | ||
631 | or by closing the file descriptor. | ||
632 | File modification time is not updated by this request. | ||
633 | </para></listitem></varlistentry> | ||
634 | |||
635 | <varlistentry><term>USBDEVFS_CONNECTINFO</term> | ||
636 | <listitem><para>Says whether the device is lowspeed. | ||
637 | The ioctl parameter points to a structure like this: | ||
638 | <programlisting>struct usbdevfs_connectinfo { | ||
639 | unsigned int devnum; | ||
640 | unsigned char slow; | ||
641 | }; </programlisting> | ||
642 | File modification time is not updated by this request. | ||
643 | </para><para> | ||
644 | <emphasis>You can't tell whether a "not slow" | ||
645 | device is connected at high speed (480 MBit/sec) | ||
646 | or just full speed (12 MBit/sec).</emphasis> | ||
647 | You should know the devnum value already, | ||
648 | it's the DDD value of the device file name. | ||
649 | </para></listitem></varlistentry> | ||
650 | |||
651 | <varlistentry><term>USBDEVFS_GETDRIVER</term> | ||
652 | <listitem><para>Returns the name of the kernel driver | ||
653 | bound to a given interface (a string). Parameter | ||
654 | is a pointer to this structure, which is modified: | ||
655 | <programlisting>struct usbdevfs_getdriver { | ||
656 | unsigned int interface; | ||
657 | char driver[USBDEVFS_MAXDRIVERNAME + 1]; | ||
658 | };</programlisting> | ||
659 | File modification time is not updated by this request. | ||
660 | </para></listitem></varlistentry> | ||
661 | |||
662 | <varlistentry><term>USBDEVFS_IOCTL</term> | ||
663 | <listitem><para>Passes a request from userspace through | ||
664 | to a kernel driver that has an ioctl entry in the | ||
665 | <emphasis>struct usb_driver</emphasis> it registered. | ||
666 | <programlisting>struct usbdevfs_ioctl { | ||
667 | int ifno; | ||
668 | int ioctl_code; | ||
669 | void *data; | ||
670 | }; | ||
671 | |||
672 | /* user mode call looks like this. | ||
673 | * 'request' becomes the driver->ioctl() 'code' parameter. | ||
674 | * the size of 'param' is encoded in 'request', and that data | ||
675 | * is copied to or from the driver->ioctl() 'buf' parameter. | ||
676 | */ | ||
677 | static int | ||
678 | usbdev_ioctl (int fd, int ifno, unsigned request, void *param) | ||
679 | { | ||
680 | struct usbdevfs_ioctl wrapper; | ||
681 | |||
682 | wrapper.ifno = ifno; | ||
683 | wrapper.ioctl_code = request; | ||
684 | wrapper.data = param; | ||
685 | |||
686 | return ioctl (fd, USBDEVFS_IOCTL, &wrapper); | ||
687 | } </programlisting> | ||
688 | File modification time is not updated by this request. | ||
689 | </para><para> | ||
690 | This request lets kernel drivers talk to user mode code | ||
691 | through filesystem operations even when they don't create | ||
692 | a charactor or block special device. | ||
693 | It's also been used to do things like ask devices what | ||
694 | device special file should be used. | ||
695 | Two pre-defined ioctls are used | ||
696 | to disconnect and reconnect kernel drivers, so | ||
697 | that user mode code can completely manage binding | ||
698 | and configuration of devices. | ||
699 | </para></listitem></varlistentry> | ||
700 | |||
701 | <varlistentry><term>USBDEVFS_RELEASEINTERFACE</term> | ||
702 | <listitem><para>This is used to release the claim usbfs | ||
703 | made on interface, either implicitly or because of a | ||
704 | USBDEVFS_CLAIMINTERFACE call, before the file | ||
705 | descriptor is closed. | ||
706 | The ioctl parameter is an integer holding the number of | ||
707 | the interface (bInterfaceNumber from descriptor); | ||
708 | File modification time is not updated by this request. | ||
709 | </para><warning><para> | ||
710 | <emphasis>No security check is made to ensure | ||
711 | that the task which made the claim is the one | ||
712 | which is releasing it. | ||
713 | This means that user mode driver may interfere | ||
714 | other ones. </emphasis> | ||
715 | </para></warning></listitem></varlistentry> | ||
716 | |||
717 | <varlistentry><term>USBDEVFS_RESETEP</term> | ||
718 | <listitem><para>Resets the data toggle value for an endpoint | ||
719 | (bulk or interrupt) to DATA0. | ||
720 | The ioctl parameter is an integer endpoint number | ||
721 | (1 to 15, as identified in the endpoint descriptor), | ||
722 | with USB_DIR_IN added if the device's endpoint sends | ||
723 | data to the host. | ||
724 | </para><warning><para> | ||
725 | <emphasis>Avoid using this request. | ||
726 | It should probably be removed.</emphasis> | ||
727 | Using it typically means the device and driver will lose | ||
728 | toggle synchronization. If you really lost synchronization, | ||
729 | you likely need to completely handshake with the device, | ||
730 | using a request like CLEAR_HALT | ||
731 | or SET_INTERFACE. | ||
732 | </para></warning></listitem></varlistentry> | ||
733 | |||
734 | </variablelist> | ||
735 | |||
736 | </sect2> | ||
737 | |||
738 | <sect2> | ||
739 | <title>Synchronous I/O Support</title> | ||
740 | |||
741 | <para>Synchronous requests involve the kernel blocking | ||
742 | until until the user mode request completes, either by | ||
743 | finishing successfully or by reporting an error. | ||
744 | In most cases this is the simplest way to use usbfs, | ||
745 | although as noted above it does prevent performing I/O | ||
746 | to more than one endpoint at a time. | ||
747 | </para> | ||
748 | |||
749 | <variablelist> | ||
750 | |||
751 | <varlistentry><term>USBDEVFS_BULK</term> | ||
752 | <listitem><para>Issues a bulk read or write request to the | ||
753 | device. | ||
754 | The ioctl parameter is a pointer to this structure: | ||
755 | <programlisting>struct usbdevfs_bulktransfer { | ||
756 | unsigned int ep; | ||
757 | unsigned int len; | ||
758 | unsigned int timeout; /* in milliseconds */ | ||
759 | void *data; | ||
760 | };</programlisting> | ||
761 | </para><para>The "ep" value identifies a | ||
762 | bulk endpoint number (1 to 15, as identified in an endpoint | ||
763 | descriptor), | ||
764 | masked with USB_DIR_IN when referring to an endpoint which | ||
765 | sends data to the host from the device. | ||
766 | The length of the data buffer is identified by "len"; | ||
767 | Recent kernels support requests up to about 128KBytes. | ||
768 | <emphasis>FIXME say how read length is returned, | ||
769 | and how short reads are handled.</emphasis>. | ||
770 | </para></listitem></varlistentry> | ||
771 | |||
772 | <varlistentry><term>USBDEVFS_CLEAR_HALT</term> | ||
773 | <listitem><para>Clears endpoint halt (stall) and | ||
774 | resets the endpoint toggle. This is only | ||
775 | meaningful for bulk or interrupt endpoints. | ||
776 | The ioctl parameter is an integer endpoint number | ||
777 | (1 to 15, as identified in an endpoint descriptor), | ||
778 | masked with USB_DIR_IN when referring to an endpoint which | ||
779 | sends data to the host from the device. | ||
780 | </para><para> | ||
781 | Use this on bulk or interrupt endpoints which have | ||
782 | stalled, returning <emphasis>-EPIPE</emphasis> status | ||
783 | to a data transfer request. | ||
784 | Do not issue the control request directly, since | ||
785 | that could invalidate the host's record of the | ||
786 | data toggle. | ||
787 | </para></listitem></varlistentry> | ||
788 | |||
789 | <varlistentry><term>USBDEVFS_CONTROL</term> | ||
790 | <listitem><para>Issues a control request to the device. | ||
791 | The ioctl parameter points to a structure like this: | ||
792 | <programlisting>struct usbdevfs_ctrltransfer { | ||
793 | __u8 bRequestType; | ||
794 | __u8 bRequest; | ||
795 | __u16 wValue; | ||
796 | __u16 wIndex; | ||
797 | __u16 wLength; | ||
798 | __u32 timeout; /* in milliseconds */ | ||
799 | void *data; | ||
800 | };</programlisting> | ||
801 | </para><para> | ||
802 | The first eight bytes of this structure are the contents | ||
803 | of the SETUP packet to be sent to the device; see the | ||
804 | USB 2.0 specification for details. | ||
805 | The bRequestType value is composed by combining a | ||
806 | USB_TYPE_* value, a USB_DIR_* value, and a | ||
807 | USB_RECIP_* value (from | ||
808 | <emphasis><linux/usb.h></emphasis>). | ||
809 | If wLength is nonzero, it describes the length of the data | ||
810 | buffer, which is either written to the device | ||
811 | (USB_DIR_OUT) or read from the device (USB_DIR_IN). | ||
812 | </para><para> | ||
813 | At this writing, you can't transfer more than 4 KBytes | ||
814 | of data to or from a device; usbfs has a limit, and | ||
815 | some host controller drivers have a limit. | ||
816 | (That's not usually a problem.) | ||
817 | <emphasis>Also</emphasis> there's no way to say it's | ||
818 | not OK to get a short read back from the device. | ||
819 | </para></listitem></varlistentry> | ||
820 | |||
821 | <varlistentry><term>USBDEVFS_RESET</term> | ||
822 | <listitem><para>Does a USB level device reset. | ||
823 | The ioctl parameter is ignored. | ||
824 | After the reset, this rebinds all device interfaces. | ||
825 | File modification time is not updated by this request. | ||
826 | </para><warning><para> | ||
827 | <emphasis>Avoid using this call</emphasis> | ||
828 | until some usbcore bugs get fixed, | ||
829 | since it does not fully synchronize device, interface, | ||
830 | and driver (not just usbfs) state. | ||
831 | </para></warning></listitem></varlistentry> | ||
832 | |||
833 | <varlistentry><term>USBDEVFS_SETINTERFACE</term> | ||
834 | <listitem><para>Sets the alternate setting for an | ||
835 | interface. The ioctl parameter is a pointer to a | ||
836 | structure like this: | ||
837 | <programlisting>struct usbdevfs_setinterface { | ||
838 | unsigned int interface; | ||
839 | unsigned int altsetting; | ||
840 | }; </programlisting> | ||
841 | File modification time is not updated by this request. | ||
842 | </para><para> | ||
843 | Those struct members are from some interface descriptor | ||
844 | applying to the the current configuration. | ||
845 | The interface number is the bInterfaceNumber value, and | ||
846 | the altsetting number is the bAlternateSetting value. | ||
847 | (This resets each endpoint in the interface.) | ||
848 | </para></listitem></varlistentry> | ||
849 | |||
850 | <varlistentry><term>USBDEVFS_SETCONFIGURATION</term> | ||
851 | <listitem><para>Issues the | ||
852 | <function>usb_set_configuration</function> call | ||
853 | for the device. | ||
854 | The parameter is an integer holding the number of | ||
855 | a configuration (bConfigurationValue from descriptor). | ||
856 | File modification time is not updated by this request. | ||
857 | </para><warning><para> | ||
858 | <emphasis>Avoid using this call</emphasis> | ||
859 | until some usbcore bugs get fixed, | ||
860 | since it does not fully synchronize device, interface, | ||
861 | and driver (not just usbfs) state. | ||
862 | </para></warning></listitem></varlistentry> | ||
863 | |||
864 | </variablelist> | ||
865 | </sect2> | ||
866 | |||
867 | <sect2> | ||
868 | <title>Asynchronous I/O Support</title> | ||
869 | |||
870 | <para>As mentioned above, there are situations where it may be | ||
871 | important to initiate concurrent operations from user mode code. | ||
872 | This is particularly important for periodic transfers | ||
873 | (interrupt and isochronous), but it can be used for other | ||
874 | kinds of USB requests too. | ||
875 | In such cases, the asynchronous requests described here | ||
876 | are essential. Rather than submitting one request and having | ||
877 | the kernel block until it completes, the blocking is separate. | ||
878 | </para> | ||
879 | |||
880 | <para>These requests are packaged into a structure that | ||
881 | resembles the URB used by kernel device drivers. | ||
882 | (No POSIX Async I/O support here, sorry.) | ||
883 | It identifies the endpoint type (USBDEVFS_URB_TYPE_*), | ||
884 | endpoint (number, masked with USB_DIR_IN as appropriate), | ||
885 | buffer and length, and a user "context" value serving to | ||
886 | uniquely identify each request. | ||
887 | (It's usually a pointer to per-request data.) | ||
888 | Flags can modify requests (not as many as supported for | ||
889 | kernel drivers). | ||
890 | </para> | ||
891 | |||
892 | <para>Each request can specify a realtime signal number | ||
893 | (between SIGRTMIN and SIGRTMAX, inclusive) to request a | ||
894 | signal be sent when the request completes. | ||
895 | </para> | ||
896 | |||
897 | <para>When usbfs returns these urbs, the status value | ||
898 | is updated, and the buffer may have been modified. | ||
899 | Except for isochronous transfers, the actual_length is | ||
900 | updated to say how many bytes were transferred; if the | ||
901 | USBDEVFS_URB_DISABLE_SPD flag is set | ||
902 | ("short packets are not OK"), if fewer bytes were read | ||
903 | than were requested then you get an error report. | ||
904 | </para> | ||
905 | |||
906 | <programlisting>struct usbdevfs_iso_packet_desc { | ||
907 | unsigned int length; | ||
908 | unsigned int actual_length; | ||
909 | unsigned int status; | ||
910 | }; | ||
911 | |||
912 | struct usbdevfs_urb { | ||
913 | unsigned char type; | ||
914 | unsigned char endpoint; | ||
915 | int status; | ||
916 | unsigned int flags; | ||
917 | void *buffer; | ||
918 | int buffer_length; | ||
919 | int actual_length; | ||
920 | int start_frame; | ||
921 | int number_of_packets; | ||
922 | int error_count; | ||
923 | unsigned int signr; | ||
924 | void *usercontext; | ||
925 | struct usbdevfs_iso_packet_desc iso_frame_desc[]; | ||
926 | };</programlisting> | ||
927 | |||
928 | <para> For these asynchronous requests, the file modification | ||
929 | time reflects when the request was initiated. | ||
930 | This contrasts with their use with the synchronous requests, | ||
931 | where it reflects when requests complete. | ||
932 | </para> | ||
933 | |||
934 | <variablelist> | ||
935 | |||
936 | <varlistentry><term>USBDEVFS_DISCARDURB</term> | ||
937 | <listitem><para> | ||
938 | <emphasis>TBS</emphasis> | ||
939 | File modification time is not updated by this request. | ||
940 | </para><para> | ||
941 | </para></listitem></varlistentry> | ||
942 | |||
943 | <varlistentry><term>USBDEVFS_DISCSIGNAL</term> | ||
944 | <listitem><para> | ||
945 | <emphasis>TBS</emphasis> | ||
946 | File modification time is not updated by this request. | ||
947 | </para><para> | ||
948 | </para></listitem></varlistentry> | ||
949 | |||
950 | <varlistentry><term>USBDEVFS_REAPURB</term> | ||
951 | <listitem><para> | ||
952 | <emphasis>TBS</emphasis> | ||
953 | File modification time is not updated by this request. | ||
954 | </para><para> | ||
955 | </para></listitem></varlistentry> | ||
956 | |||
957 | <varlistentry><term>USBDEVFS_REAPURBNDELAY</term> | ||
958 | <listitem><para> | ||
959 | <emphasis>TBS</emphasis> | ||
960 | File modification time is not updated by this request. | ||
961 | </para><para> | ||
962 | </para></listitem></varlistentry> | ||
963 | |||
964 | <varlistentry><term>USBDEVFS_SUBMITURB</term> | ||
965 | <listitem><para> | ||
966 | <emphasis>TBS</emphasis> | ||
967 | </para><para> | ||
968 | </para></listitem></varlistentry> | ||
969 | |||
970 | </variablelist> | ||
971 | </sect2> | ||
972 | |||
973 | </sect1> | ||
974 | |||
975 | </chapter> | ||
976 | |||
977 | </book> | ||
978 | <!-- vim:syntax=sgml:sw=4 | ||
979 | --> | ||
diff --git a/Documentation/DocBook/via-audio.tmpl b/Documentation/DocBook/via-audio.tmpl new file mode 100644 index 000000000000..36e642147d6b --- /dev/null +++ b/Documentation/DocBook/via-audio.tmpl | |||
@@ -0,0 +1,597 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="ViaAudioGuide"> | ||
6 | <bookinfo> | ||
7 | <title>Via 686 Audio Driver for Linux</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Jeff</firstname> | ||
12 | <surname>Garzik</surname> | ||
13 | </author> | ||
14 | </authorgroup> | ||
15 | |||
16 | <copyright> | ||
17 | <year>1999-2001</year> | ||
18 | <holder>Jeff Garzik</holder> | ||
19 | </copyright> | ||
20 | |||
21 | <legalnotice> | ||
22 | <para> | ||
23 | This documentation is free software; you can redistribute | ||
24 | it and/or modify it under the terms of the GNU General Public | ||
25 | License as published by the Free Software Foundation; either | ||
26 | version 2 of the License, or (at your option) any later | ||
27 | version. | ||
28 | </para> | ||
29 | |||
30 | <para> | ||
31 | This program is distributed in the hope that it will be | ||
32 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
33 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
34 | See the GNU General Public License for more details. | ||
35 | </para> | ||
36 | |||
37 | <para> | ||
38 | You should have received a copy of the GNU General Public | ||
39 | License along with this program; if not, write to the Free | ||
40 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
41 | MA 02111-1307 USA | ||
42 | </para> | ||
43 | |||
44 | <para> | ||
45 | For more details see the file COPYING in the source | ||
46 | distribution of Linux. | ||
47 | </para> | ||
48 | </legalnotice> | ||
49 | </bookinfo> | ||
50 | |||
51 | <toc></toc> | ||
52 | |||
53 | <chapter id="intro"> | ||
54 | <title>Introduction</title> | ||
55 | <para> | ||
56 | The Via VT82C686A "super southbridge" chips contain | ||
57 | AC97-compatible audio logic which features dual 16-bit stereo | ||
58 | PCM sound channels (full duplex), plus a third PCM channel intended for use | ||
59 | in hardware-assisted FM synthesis. | ||
60 | </para> | ||
61 | <para> | ||
62 | The current Linux kernel audio driver for this family of chips | ||
63 | supports audio playback and recording, but hardware-assisted | ||
64 | FM features, and hardware buffer direct-access (mmap) | ||
65 | support are not yet available. | ||
66 | </para> | ||
67 | <para> | ||
68 | This driver supports any Linux kernel version after 2.4.10. | ||
69 | </para> | ||
70 | <para> | ||
71 | Please send bug reports to the mailing list <email>linux-via@gtf.org</email>. | ||
72 | To subscribe, e-mail <email>majordomo@gtf.org</email> with | ||
73 | </para> | ||
74 | <programlisting> | ||
75 | subscribe linux-via | ||
76 | </programlisting> | ||
77 | <para> | ||
78 | in the body of the message. | ||
79 | </para> | ||
80 | </chapter> | ||
81 | |||
82 | <chapter id="install"> | ||
83 | <title>Driver Installation</title> | ||
84 | <para> | ||
85 | To use this audio driver, select the | ||
86 | CONFIG_SOUND_VIA82CXXX option in the section Sound during kernel configuration. | ||
87 | Follow the usual kernel procedures for rebuilding the kernel, | ||
88 | or building and installing driver modules. | ||
89 | </para> | ||
90 | <para> | ||
91 | To make this driver the default audio driver, you can add the | ||
92 | following to your /etc/conf.modules file: | ||
93 | </para> | ||
94 | <programlisting> | ||
95 | alias sound via82cxxx_audio | ||
96 | </programlisting> | ||
97 | <para> | ||
98 | Note that soundcore and ac97_codec support modules | ||
99 | are also required for working audio, in addition to | ||
100 | the via82cxxx_audio module itself. | ||
101 | </para> | ||
102 | </chapter> | ||
103 | |||
104 | <chapter id="reportbug"> | ||
105 | <title>Submitting a bug report</title> | ||
106 | <sect1 id="bugrepdesc"><title>Description of problem</title> | ||
107 | <para> | ||
108 | Describe the application you were using to play/record sound, and how | ||
109 | to reproduce the problem. | ||
110 | </para> | ||
111 | </sect1> | ||
112 | <sect1 id="bugrepdiag"><title>Diagnostic output</title> | ||
113 | <para> | ||
114 | Obtain the via-audio-diag diagnostics program from | ||
115 | http://sf.net/projects/gkernel/ and provide a dump of the | ||
116 | audio chip's registers while the problem is occurring. Sample command line: | ||
117 | </para> | ||
118 | <programlisting> | ||
119 | ./via-audio-diag -aps > diag-output.txt | ||
120 | </programlisting> | ||
121 | </sect1> | ||
122 | <sect1 id="bugrepdebug"><title>Driver debug output</title> | ||
123 | <para> | ||
124 | Define <constant>VIA_DEBUG</constant> at the beginning of the driver, then capture and email | ||
125 | the kernel log output. This can be viewed in the system kernel log (if | ||
126 | enabled), or via the dmesg program. Sample command line: | ||
127 | </para> | ||
128 | <programlisting> | ||
129 | dmesg > /tmp/dmesg-output.txt | ||
130 | </programlisting> | ||
131 | </sect1> | ||
132 | <sect1 id="bugrepprintk"><title>Bigger kernel message buffer</title> | ||
133 | <para> | ||
134 | If you wish to increase the size of the buffer displayed by dmesg, then | ||
135 | change the <constant>LOG_BUF_LEN</constant> macro at the top of linux/kernel/printk.c, recompile | ||
136 | your kernel, and pass the <constant>LOG_BUF_LEN</constant> value to dmesg. Sample command line with | ||
137 | <constant>LOG_BUF_LEN</constant> == 32768: | ||
138 | </para> | ||
139 | <programlisting> | ||
140 | dmesg -s 32768 > /tmp/dmesg-output.txt | ||
141 | </programlisting> | ||
142 | </sect1> | ||
143 | </chapter> | ||
144 | |||
145 | <chapter id="bugs"> | ||
146 | <title>Known Bugs And Assumptions</title> | ||
147 | <para> | ||
148 | <variablelist> | ||
149 | <varlistentry><term>Low volume</term> | ||
150 | <listitem> | ||
151 | <para> | ||
152 | Volume too low on many systems. Workaround: use mixer program | ||
153 | such as xmixer to increase volume. | ||
154 | </para> | ||
155 | </listitem></varlistentry> | ||
156 | |||
157 | </variablelist> | ||
158 | |||
159 | </para> | ||
160 | </chapter> | ||
161 | |||
162 | <chapter id="thanks"> | ||
163 | <title>Thanks</title> | ||
164 | <para> | ||
165 | Via for providing e-mail support, specs, and NDA'd source code. | ||
166 | </para> | ||
167 | <para> | ||
168 | MandrakeSoft for providing hacking time. | ||
169 | </para> | ||
170 | <para> | ||
171 | AC97 mixer interface fixes and debugging by Ron Cemer <email>roncemer@gte.net</email>. | ||
172 | </para> | ||
173 | <para> | ||
174 | Rui Sousa <email>rui.sousa@conexant.com</email>, for bugfixing | ||
175 | MMAP support, and several other notable fixes that resulted from | ||
176 | his hard work and testing. | ||
177 | </para> | ||
178 | <para> | ||
179 | Adrian Cox <email>adrian@humboldt.co.uk</email>, for bugfixing | ||
180 | MMAP support, and several other notable fixes that resulted from | ||
181 | his hard work and testing. | ||
182 | </para> | ||
183 | <para> | ||
184 | Thomas Sailer for further bugfixes. | ||
185 | </para> | ||
186 | </chapter> | ||
187 | |||
188 | <chapter id="notes"> | ||
189 | <title>Random Notes</title> | ||
190 | <para> | ||
191 | Two /proc pseudo-files provide diagnostic information. This is generally | ||
192 | not useful to most users. Power users can disable CONFIG_SOUND_VIA82CXXX_PROCFS, | ||
193 | and remove the /proc support code. Once | ||
194 | version 2.0.0 is released, the /proc support code will be disabled by | ||
195 | default. Available /proc pseudo-files: | ||
196 | </para> | ||
197 | <programlisting> | ||
198 | /proc/driver/via/0/info | ||
199 | /proc/driver/via/0/ac97 | ||
200 | </programlisting> | ||
201 | <para> | ||
202 | This driver by default supports all PCI audio devices which report | ||
203 | a vendor id of 0x1106, and a device id of 0x3058. Subsystem vendor | ||
204 | and device ids are not examined. | ||
205 | </para> | ||
206 | <para> | ||
207 | GNU indent formatting options: | ||
208 | <programlisting> | ||
209 | -kr -i8 -ts8 -br -ce -bap -sob -l80 -pcs -cs -ss -bs -di1 -nbc -lp -psl | ||
210 | </programlisting> | ||
211 | </para> | ||
212 | <para> | ||
213 | Via has graciously donated e-mail support and source code to help further | ||
214 | the development of this driver. Their assistance has been invaluable | ||
215 | in the design and coding of the next major version of this driver. | ||
216 | </para> | ||
217 | <para> | ||
218 | The Via audio chip apparently provides a second PCM scatter-gather | ||
219 | DMA channel just for FM data, but does not have a full hardware MIDI | ||
220 | processor. I haven't put much thought towards a solution here, but it | ||
221 | might involve using SoftOSS midi wave table, or simply disabling MIDI | ||
222 | support altogether and using the FM PCM channel as a second (input? output?) | ||
223 | </para> | ||
224 | </chapter> | ||
225 | |||
226 | <chapter id="changelog"> | ||
227 | <title>Driver ChangeLog</title> | ||
228 | |||
229 | <sect1 id="version191"><title> | ||
230 | Version 1.9.1 | ||
231 | </title> | ||
232 | <itemizedlist spacing="compact"> | ||
233 | <listitem> | ||
234 | <para> | ||
235 | DSP read/write bugfixes from Thomas Sailer. | ||
236 | </para> | ||
237 | </listitem> | ||
238 | |||
239 | <listitem> | ||
240 | <para> | ||
241 | Add new PCI id for single-channel use of Via 8233. | ||
242 | </para> | ||
243 | </listitem> | ||
244 | |||
245 | <listitem> | ||
246 | <para> | ||
247 | Other bug fixes, tweaks, new ioctls. | ||
248 | </para> | ||
249 | </listitem> | ||
250 | |||
251 | </itemizedlist> | ||
252 | </sect1> | ||
253 | |||
254 | <sect1 id="version1115"><title> | ||
255 | Version 1.1.15 | ||
256 | </title> | ||
257 | <itemizedlist spacing="compact"> | ||
258 | <listitem> | ||
259 | <para> | ||
260 | Support for variable fragment size and variable fragment number (Rui | ||
261 | Sousa) | ||
262 | </para> | ||
263 | </listitem> | ||
264 | |||
265 | <listitem> | ||
266 | <para> | ||
267 | Fixes for the SPEED, STEREO, CHANNELS, FMT ioctls when in read & | ||
268 | write mode (Rui Sousa) | ||
269 | </para> | ||
270 | </listitem> | ||
271 | |||
272 | <listitem> | ||
273 | <para> | ||
274 | Mmaped sound is now fully functional. (Rui Sousa) | ||
275 | </para> | ||
276 | </listitem> | ||
277 | |||
278 | <listitem> | ||
279 | <para> | ||
280 | Make sure to enable PCI device before reading any of its PCI | ||
281 | config information. (fixes potential hotplug problems) | ||
282 | </para> | ||
283 | </listitem> | ||
284 | |||
285 | <listitem> | ||
286 | <para> | ||
287 | Clean up code a bit and add more internal function documentation. | ||
288 | </para> | ||
289 | </listitem> | ||
290 | |||
291 | <listitem> | ||
292 | <para> | ||
293 | AC97 codec access fixes (Adrian Cox) | ||
294 | </para> | ||
295 | </listitem> | ||
296 | |||
297 | <listitem> | ||
298 | <para> | ||
299 | Big endian fixes (Adrian Cox) | ||
300 | </para> | ||
301 | </listitem> | ||
302 | |||
303 | <listitem> | ||
304 | <para> | ||
305 | MIDI support (Adrian Cox) | ||
306 | </para> | ||
307 | </listitem> | ||
308 | |||
309 | <listitem> | ||
310 | <para> | ||
311 | Detect and report locked-rate AC97 codecs. If your hardware only | ||
312 | supports 48Khz (locked rate), then your recording/playback software | ||
313 | must upsample or downsample accordingly. The hardware cannot do it. | ||
314 | </para> | ||
315 | </listitem> | ||
316 | |||
317 | <listitem> | ||
318 | <para> | ||
319 | Use new pci_request_regions and pci_disable_device functions in | ||
320 | kernel 2.4.6. | ||
321 | </para> | ||
322 | </listitem> | ||
323 | |||
324 | </itemizedlist> | ||
325 | </sect1> | ||
326 | |||
327 | <sect1 id="version1114"><title> | ||
328 | Version 1.1.14 | ||
329 | </title> | ||
330 | <itemizedlist spacing="compact"> | ||
331 | <listitem> | ||
332 | <para> | ||
333 | Use VM_RESERVE when available, to eliminate unnecessary page faults. | ||
334 | </para> | ||
335 | </listitem> | ||
336 | </itemizedlist> | ||
337 | </sect1> | ||
338 | |||
339 | <sect1 id="version1112"><title> | ||
340 | Version 1.1.12 | ||
341 | </title> | ||
342 | <itemizedlist spacing="compact"> | ||
343 | <listitem> | ||
344 | <para> | ||
345 | mmap bug fixes from Linus. | ||
346 | </para> | ||
347 | </listitem> | ||
348 | </itemizedlist> | ||
349 | </sect1> | ||
350 | |||
351 | <sect1 id="version1111"><title> | ||
352 | Version 1.1.11 | ||
353 | </title> | ||
354 | <itemizedlist spacing="compact"> | ||
355 | <listitem> | ||
356 | <para> | ||
357 | Many more bug fixes. mmap enabled by default, but may still be buggy. | ||
358 | </para> | ||
359 | </listitem> | ||
360 | |||
361 | <listitem> | ||
362 | <para> | ||
363 | Uses new and spiffy method of mmap'ing the DMA buffer, based | ||
364 | on a suggestion from Linus. | ||
365 | </para> | ||
366 | </listitem> | ||
367 | </itemizedlist> | ||
368 | </sect1> | ||
369 | |||
370 | <sect1 id="version1110"><title> | ||
371 | Version 1.1.10 | ||
372 | </title> | ||
373 | <itemizedlist spacing="compact"> | ||
374 | <listitem> | ||
375 | <para> | ||
376 | Many bug fixes. mmap enabled by default, but may still be buggy. | ||
377 | </para> | ||
378 | </listitem> | ||
379 | </itemizedlist> | ||
380 | </sect1> | ||
381 | |||
382 | <sect1 id="version119"><title> | ||
383 | Version 1.1.9 | ||
384 | </title> | ||
385 | <itemizedlist spacing="compact"> | ||
386 | <listitem> | ||
387 | <para> | ||
388 | Redesign and rewrite audio playback implementation. (faster and smaller, hopefully) | ||
389 | </para> | ||
390 | </listitem> | ||
391 | |||
392 | <listitem> | ||
393 | <para> | ||
394 | Implement recording and full duplex (DSP_CAP_DUPLEX) support. | ||
395 | </para> | ||
396 | </listitem> | ||
397 | |||
398 | <listitem> | ||
399 | <para> | ||
400 | Make procfs support optional. | ||
401 | </para> | ||
402 | </listitem> | ||
403 | |||
404 | <listitem> | ||
405 | <para> | ||
406 | Quick interrupt status check, to lessen overhead in interrupt | ||
407 | sharing situations. | ||
408 | </para> | ||
409 | </listitem> | ||
410 | |||
411 | <listitem> | ||
412 | <para> | ||
413 | Add mmap(2) support. Disabled for now, it is still buggy and experimental. | ||
414 | </para> | ||
415 | </listitem> | ||
416 | |||
417 | <listitem> | ||
418 | <para> | ||
419 | Surround all syscalls with a semaphore for cheap and easy SMP protection. | ||
420 | </para> | ||
421 | </listitem> | ||
422 | |||
423 | <listitem> | ||
424 | <para> | ||
425 | Fix bug in channel shutdown (hardware channel reset) code. | ||
426 | </para> | ||
427 | </listitem> | ||
428 | |||
429 | <listitem> | ||
430 | <para> | ||
431 | Remove unnecessary spinlocks (better performance). | ||
432 | </para> | ||
433 | </listitem> | ||
434 | |||
435 | <listitem> | ||
436 | <para> | ||
437 | Eliminate "unknown AFMT" message by using a different method | ||
438 | of selecting the best AFMT_xxx sound sample format for use. | ||
439 | </para> | ||
440 | </listitem> | ||
441 | |||
442 | <listitem> | ||
443 | <para> | ||
444 | Support for realtime hardware pointer position reporting | ||
445 | (DSP_CAP_REALTIME, SNDCTL_DSP_GETxPTR ioctls) | ||
446 | </para> | ||
447 | </listitem> | ||
448 | |||
449 | <listitem> | ||
450 | <para> | ||
451 | Support for capture/playback triggering | ||
452 | (DSP_CAP_TRIGGER, SNDCTL_DSP_SETTRIGGER ioctls) | ||
453 | </para> | ||
454 | </listitem> | ||
455 | |||
456 | <listitem> | ||
457 | <para> | ||
458 | SNDCTL_DSP_SETDUPLEX and SNDCTL_DSP_POST ioctls now handled. | ||
459 | </para> | ||
460 | </listitem> | ||
461 | |||
462 | <listitem> | ||
463 | <para> | ||
464 | Rewrite open(2) and close(2) logic to allow only one user at | ||
465 | a time. All other open(2) attempts will sleep until they succeed. | ||
466 | FIXME: open(O_RDONLY) and open(O_WRONLY) should be allowed to succeed. | ||
467 | </para> | ||
468 | </listitem> | ||
469 | |||
470 | <listitem> | ||
471 | <para> | ||
472 | Reviewed code to ensure that SMP and multiple audio devices | ||
473 | are fully supported. | ||
474 | </para> | ||
475 | </listitem> | ||
476 | |||
477 | </itemizedlist> | ||
478 | </sect1> | ||
479 | |||
480 | <sect1 id="version118"><title> | ||
481 | Version 1.1.8 | ||
482 | </title> | ||
483 | <itemizedlist spacing="compact"> | ||
484 | <listitem> | ||
485 | <para> | ||
486 | Clean up interrupt handler output. Fixes the following kernel error message: | ||
487 | </para> | ||
488 | <programlisting> | ||
489 | unhandled interrupt ... | ||
490 | </programlisting> | ||
491 | </listitem> | ||
492 | |||
493 | <listitem> | ||
494 | <para> | ||
495 | Convert documentation to DocBook, so that PDF, HTML and PostScript (.ps) output is readily | ||
496 | available. | ||
497 | </para> | ||
498 | </listitem> | ||
499 | |||
500 | </itemizedlist> | ||
501 | </sect1> | ||
502 | |||
503 | <sect1 id="version117"><title> | ||
504 | Version 1.1.7 | ||
505 | </title> | ||
506 | <itemizedlist spacing="compact"> | ||
507 | <listitem> | ||
508 | <para> | ||
509 | Fix module unload bug where mixer device left registered | ||
510 | after driver exit | ||
511 | </para> | ||
512 | </listitem> | ||
513 | </itemizedlist> | ||
514 | </sect1> | ||
515 | |||
516 | <sect1 id="version116"><title> | ||
517 | Version 1.1.6 | ||
518 | </title> | ||
519 | <itemizedlist spacing="compact"> | ||
520 | <listitem> | ||
521 | <para> | ||
522 | Rewrite via_set_rate to mimic ALSA basic AC97 rate setting | ||
523 | </para> | ||
524 | </listitem> | ||
525 | <listitem> | ||
526 | <para> | ||
527 | Remove much dead code | ||
528 | </para> | ||
529 | </listitem> | ||
530 | <listitem> | ||
531 | <para> | ||
532 | Complete spin_lock_irqsave -> spin_lock_irq conversion in via_dsp_ioctl | ||
533 | </para> | ||
534 | </listitem> | ||
535 | <listitem> | ||
536 | <para> | ||
537 | Fix build problem in via_dsp_ioctl | ||
538 | </para> | ||
539 | </listitem> | ||
540 | <listitem> | ||
541 | <para> | ||
542 | Optimize included headers to eliminate headers found in linux/sound | ||
543 | </para> | ||
544 | </listitem> | ||
545 | </itemizedlist> | ||
546 | </sect1> | ||
547 | |||
548 | <sect1 id="version115"><title> | ||
549 | Version 1.1.5 | ||
550 | </title> | ||
551 | <itemizedlist spacing="compact"> | ||
552 | <listitem> | ||
553 | <para> | ||
554 | Disable some overly-verbose debugging code | ||
555 | </para> | ||
556 | </listitem> | ||
557 | <listitem> | ||
558 | <para> | ||
559 | Remove unnecessary sound locks | ||
560 | </para> | ||
561 | </listitem> | ||
562 | <listitem> | ||
563 | <para> | ||
564 | Fix some ioctls for better time resolution | ||
565 | </para> | ||
566 | </listitem> | ||
567 | <listitem> | ||
568 | <para> | ||
569 | Begin spin_lock_irqsave -> spin_lock_irq conversion in via_dsp_ioctl | ||
570 | </para> | ||
571 | </listitem> | ||
572 | </itemizedlist> | ||
573 | </sect1> | ||
574 | |||
575 | <sect1 id="version114"><title> | ||
576 | Version 1.1.4 | ||
577 | </title> | ||
578 | <itemizedlist spacing="compact"> | ||
579 | <listitem> | ||
580 | <para> | ||
581 | Completed rewrite of driver. Eliminated SoundBlaster compatibility | ||
582 | completely, and now uses the much-faster scatter-gather DMA engine. | ||
583 | </para> | ||
584 | </listitem> | ||
585 | </itemizedlist> | ||
586 | </sect1> | ||
587 | |||
588 | </chapter> | ||
589 | |||
590 | <chapter id="intfunctions"> | ||
591 | <title>Internal Functions</title> | ||
592 | !Isound/oss/via82cxxx_audio.c | ||
593 | </chapter> | ||
594 | |||
595 | </book> | ||
596 | |||
597 | |||
diff --git a/Documentation/DocBook/videobook.tmpl b/Documentation/DocBook/videobook.tmpl new file mode 100644 index 000000000000..3ec6c875588a --- /dev/null +++ b/Documentation/DocBook/videobook.tmpl | |||
@@ -0,0 +1,1663 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="V4LGuide"> | ||
6 | <bookinfo> | ||
7 | <title>Video4Linux Programming</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Alan</firstname> | ||
12 | <surname>Cox</surname> | ||
13 | <affiliation> | ||
14 | <address> | ||
15 | <email>alan@redhat.com</email> | ||
16 | </address> | ||
17 | </affiliation> | ||
18 | </author> | ||
19 | </authorgroup> | ||
20 | |||
21 | <copyright> | ||
22 | <year>2000</year> | ||
23 | <holder>Alan Cox</holder> | ||
24 | </copyright> | ||
25 | |||
26 | <legalnotice> | ||
27 | <para> | ||
28 | This documentation is free software; you can redistribute | ||
29 | it and/or modify it under the terms of the GNU General Public | ||
30 | License as published by the Free Software Foundation; either | ||
31 | version 2 of the License, or (at your option) any later | ||
32 | version. | ||
33 | </para> | ||
34 | |||
35 | <para> | ||
36 | This program is distributed in the hope that it will be | ||
37 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
38 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
39 | See the GNU General Public License for more details. | ||
40 | </para> | ||
41 | |||
42 | <para> | ||
43 | You should have received a copy of the GNU General Public | ||
44 | License along with this program; if not, write to the Free | ||
45 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
46 | MA 02111-1307 USA | ||
47 | </para> | ||
48 | |||
49 | <para> | ||
50 | For more details see the file COPYING in the source | ||
51 | distribution of Linux. | ||
52 | </para> | ||
53 | </legalnotice> | ||
54 | </bookinfo> | ||
55 | |||
56 | <toc></toc> | ||
57 | |||
58 | <chapter id="intro"> | ||
59 | <title>Introduction</title> | ||
60 | <para> | ||
61 | Parts of this document first appeared in Linux Magazine under a | ||
62 | ninety day exclusivity. | ||
63 | </para> | ||
64 | <para> | ||
65 | Video4Linux is intended to provide a common programming interface | ||
66 | for the many TV and capture cards now on the market, as well as | ||
67 | parallel port and USB video cameras. Radio, teletext decoders and | ||
68 | vertical blanking data interfaces are also provided. | ||
69 | </para> | ||
70 | </chapter> | ||
71 | <chapter id="radio"> | ||
72 | <title>Radio Devices</title> | ||
73 | <para> | ||
74 | There are a wide variety of radio interfaces available for PC's, and these | ||
75 | are generally very simple to program. The biggest problem with supporting | ||
76 | such devices is normally extracting documentation from the vendor. | ||
77 | </para> | ||
78 | <para> | ||
79 | The radio interface supports a simple set of control ioctls standardised | ||
80 | across all radio and tv interfaces. It does not support read or write, which | ||
81 | are used for video streams. The reason radio cards do not allow you to read | ||
82 | the audio stream into an application is that without exception they provide | ||
83 | a connection on to a soundcard. Soundcards can be used to read the radio | ||
84 | data just fine. | ||
85 | </para> | ||
86 | <sect1 id="registerradio"> | ||
87 | <title>Registering Radio Devices</title> | ||
88 | <para> | ||
89 | The Video4linux core provides an interface for registering devices. The | ||
90 | first step in writing our radio card driver is to register it. | ||
91 | </para> | ||
92 | <programlisting> | ||
93 | |||
94 | |||
95 | static struct video_device my_radio | ||
96 | { | ||
97 | "My radio", | ||
98 | VID_TYPE_TUNER, | ||
99 | VID_HARDWARE_MYRADIO, | ||
100 | radio_open. | ||
101 | radio_close, | ||
102 | NULL, /* no read */ | ||
103 | NULL, /* no write */ | ||
104 | NULL, /* no poll */ | ||
105 | radio_ioctl, | ||
106 | NULL, /* no special init function */ | ||
107 | NULL /* no private data */ | ||
108 | }; | ||
109 | |||
110 | |||
111 | </programlisting> | ||
112 | <para> | ||
113 | This declares our video4linux device driver interface. The VID_TYPE_ value | ||
114 | defines what kind of an interface we are, and defines basic capabilities. | ||
115 | </para> | ||
116 | <para> | ||
117 | The only defined value relevant for a radio card is VID_TYPE_TUNER which | ||
118 | indicates that the device can be tuned. Clearly our radio is going to have some | ||
119 | way to change channel so it is tuneable. | ||
120 | </para> | ||
121 | <para> | ||
122 | The VID_HARDWARE_ types are unique to each device. Numbers are assigned by | ||
123 | <email>alan@redhat.com</email> when device drivers are going to be released. Until then you | ||
124 | can pull a suitably large number out of your hat and use it. 10000 should be | ||
125 | safe for a very long time even allowing for the huge number of vendors | ||
126 | making new and different radio cards at the moment. | ||
127 | </para> | ||
128 | <para> | ||
129 | We declare an open and close routine, but we do not need read or write, | ||
130 | which are used to read and write video data to or from the card itself. As | ||
131 | we have no read or write there is no poll function. | ||
132 | </para> | ||
133 | <para> | ||
134 | The private initialise function is run when the device is registered. In | ||
135 | this driver we've already done all the work needed. The final pointer is a | ||
136 | private data pointer that can be used by the device driver to attach and | ||
137 | retrieve private data structures. We set this field "priv" to NULL for | ||
138 | the moment. | ||
139 | </para> | ||
140 | <para> | ||
141 | Having the structure defined is all very well but we now need to register it | ||
142 | with the kernel. | ||
143 | </para> | ||
144 | <programlisting> | ||
145 | |||
146 | |||
147 | static int io = 0x320; | ||
148 | |||
149 | int __init myradio_init(struct video_init *v) | ||
150 | { | ||
151 | if(!request_region(io, MY_IO_SIZE, "myradio")) | ||
152 | { | ||
153 | printk(KERN_ERR | ||
154 | "myradio: port 0x%03X is in use.\n", io); | ||
155 | return -EBUSY; | ||
156 | } | ||
157 | |||
158 | if(video_device_register(&my_radio, VFL_TYPE_RADIO)==-1) { | ||
159 | release_region(io, MY_IO_SIZE); | ||
160 | return -EINVAL; | ||
161 | } | ||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | </programlisting> | ||
166 | <para> | ||
167 | The first stage of the initialisation, as is normally the case, is to check | ||
168 | that the I/O space we are about to fiddle with doesn't belong to some other | ||
169 | driver. If it is we leave well alone. If the user gives the address of the | ||
170 | wrong device then we will spot this. These policies will generally avoid | ||
171 | crashing the machine. | ||
172 | </para> | ||
173 | <para> | ||
174 | Now we ask the Video4Linux layer to register the device for us. We hand it | ||
175 | our carefully designed video_device structure and also tell it which group | ||
176 | of devices we want it registered with. In this case VFL_TYPE_RADIO. | ||
177 | </para> | ||
178 | <para> | ||
179 | The types available are | ||
180 | </para> | ||
181 | <table frame="all"><title>Device Types</title> | ||
182 | <tgroup cols="3" align="left"> | ||
183 | <tbody> | ||
184 | <row> | ||
185 | <entry>VFL_TYPE_RADIO</entry><entry>/dev/radio{n}</entry><entry> | ||
186 | |||
187 | Radio devices are assigned in this block. As with all of these | ||
188 | selections the actual number assignment is done by the video layer | ||
189 | accordijng to what is free.</entry> | ||
190 | </row><row> | ||
191 | <entry>VFL_TYPE_GRABBER</entry><entry>/dev/video{n}</entry><entry> | ||
192 | Video capture devices and also -- counter-intuitively for the name -- | ||
193 | hardware video playback devices such as MPEG2 cards.</entry> | ||
194 | </row><row> | ||
195 | <entry>VFL_TYPE_VBI</entry><entry>/dev/vbi{n}</entry><entry> | ||
196 | The VBI devices capture the hidden lines on a television picture | ||
197 | that carry further information like closed caption data, teletext | ||
198 | (primarily in Europe) and now Intercast and the ATVEC internet | ||
199 | television encodings.</entry> | ||
200 | </row><row> | ||
201 | <entry>VFL_TYPE_VTX</entry><entry>/dev/vtx[n}</entry><entry> | ||
202 | VTX is 'Videotext' also known as 'Teletext'. This is a system for | ||
203 | sending numbered, 40x25, mostly textual page images over the hidden | ||
204 | lines. Unlike the /dev/vbi interfaces, this is for 'smart' decoder | ||
205 | chips. (The use of the word smart here has to be taken in context, | ||
206 | the smartest teletext chips are fairly dumb pieces of technology). | ||
207 | </entry> | ||
208 | </row> | ||
209 | </tbody> | ||
210 | </tgroup> | ||
211 | </table> | ||
212 | <para> | ||
213 | We are most definitely a radio. | ||
214 | </para> | ||
215 | <para> | ||
216 | Finally we allocate our I/O space so that nobody treads on us and return 0 | ||
217 | to signify general happiness with the state of the universe. | ||
218 | </para> | ||
219 | </sect1> | ||
220 | <sect1 id="openradio"> | ||
221 | <title>Opening And Closing The Radio</title> | ||
222 | |||
223 | <para> | ||
224 | The functions we declared in our video_device are mostly very simple. | ||
225 | Firstly we can drop in what is basically standard code for open and close. | ||
226 | </para> | ||
227 | <programlisting> | ||
228 | |||
229 | |||
230 | static int users = 0; | ||
231 | |||
232 | static int radio_open(stuct video_device *dev, int flags) | ||
233 | { | ||
234 | if(users) | ||
235 | return -EBUSY; | ||
236 | users++; | ||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | </programlisting> | ||
241 | <para> | ||
242 | At open time we need to do nothing but check if someone else is also using | ||
243 | the radio card. If nobody is using it we make a note that we are using it, | ||
244 | then we ensure that nobody unloads our driver on us. | ||
245 | </para> | ||
246 | <programlisting> | ||
247 | |||
248 | |||
249 | static int radio_close(struct video_device *dev) | ||
250 | { | ||
251 | users--; | ||
252 | } | ||
253 | |||
254 | </programlisting> | ||
255 | <para> | ||
256 | At close time we simply need to reduce the user count and allow the module | ||
257 | to become unloadable. | ||
258 | </para> | ||
259 | <para> | ||
260 | If you are sharp you will have noticed neither the open nor the close | ||
261 | routines attempt to reset or change the radio settings. This is intentional. | ||
262 | It allows an application to set up the radio and exit. It avoids a user | ||
263 | having to leave an application running all the time just to listen to the | ||
264 | radio. | ||
265 | </para> | ||
266 | </sect1> | ||
267 | <sect1 id="ioctlradio"> | ||
268 | <title>The Ioctl Interface</title> | ||
269 | <para> | ||
270 | This leaves the ioctl routine, without which the driver will not be | ||
271 | terribly useful to anyone. | ||
272 | </para> | ||
273 | <programlisting> | ||
274 | |||
275 | |||
276 | static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg) | ||
277 | { | ||
278 | switch(cmd) | ||
279 | { | ||
280 | case VIDIOCGCAP: | ||
281 | { | ||
282 | struct video_capability v; | ||
283 | v.type = VID_TYPE_TUNER; | ||
284 | v.channels = 1; | ||
285 | v.audios = 1; | ||
286 | v.maxwidth = 0; | ||
287 | v.minwidth = 0; | ||
288 | v.maxheight = 0; | ||
289 | v.minheight = 0; | ||
290 | strcpy(v.name, "My Radio"); | ||
291 | if(copy_to_user(arg, &v, sizeof(v))) | ||
292 | return -EFAULT; | ||
293 | return 0; | ||
294 | } | ||
295 | |||
296 | </programlisting> | ||
297 | <para> | ||
298 | VIDIOCGCAP is the first ioctl all video4linux devices must support. It | ||
299 | allows the applications to find out what sort of a card they have found and | ||
300 | to figure out what they want to do about it. The fields in the structure are | ||
301 | </para> | ||
302 | <table frame="all"><title>struct video_capability fields</title> | ||
303 | <tgroup cols="2" align="left"> | ||
304 | <tbody> | ||
305 | <row> | ||
306 | <entry>name</entry><entry>The device text name. This is intended for the user.</entry> | ||
307 | </row><row> | ||
308 | <entry>channels</entry><entry>The number of different channels you can tune on | ||
309 | this card. It could even by zero for a card that has | ||
310 | no tuning capability. For our simple FM radio it is 1. | ||
311 | An AM/FM radio would report 2.</entry> | ||
312 | </row><row> | ||
313 | <entry>audios</entry><entry>The number of audio inputs on this device. For our | ||
314 | radio there is only one audio input.</entry> | ||
315 | </row><row> | ||
316 | <entry>minwidth,minheight</entry><entry>The smallest size the card is capable of capturing | ||
317 | images in. We set these to zero. Radios do not | ||
318 | capture pictures</entry> | ||
319 | </row><row> | ||
320 | <entry>maxwidth,maxheight</entry><entry>The largest image size the card is capable of | ||
321 | capturing. For our radio we report 0. | ||
322 | </entry> | ||
323 | </row><row> | ||
324 | <entry>type</entry><entry>This reports the capabilities of the device, and | ||
325 | matches the field we filled in in the struct | ||
326 | video_device when registering.</entry> | ||
327 | </row> | ||
328 | </tbody> | ||
329 | </tgroup> | ||
330 | </table> | ||
331 | <para> | ||
332 | Having filled in the fields, we use copy_to_user to copy the structure into | ||
333 | the users buffer. If the copy fails we return an EFAULT to the application | ||
334 | so that it knows it tried to feed us garbage. | ||
335 | </para> | ||
336 | <para> | ||
337 | The next pair of ioctl operations select which tuner is to be used and let | ||
338 | the application find the tuner properties. We have only a single FM band | ||
339 | tuner in our example device. | ||
340 | </para> | ||
341 | <programlisting> | ||
342 | |||
343 | |||
344 | case VIDIOCGTUNER: | ||
345 | { | ||
346 | struct video_tuner v; | ||
347 | if(copy_from_user(&v, arg, sizeof(v))!=0) | ||
348 | return -EFAULT; | ||
349 | if(v.tuner) | ||
350 | return -EINVAL; | ||
351 | v.rangelow=(87*16000); | ||
352 | v.rangehigh=(108*16000); | ||
353 | v.flags = VIDEO_TUNER_LOW; | ||
354 | v.mode = VIDEO_MODE_AUTO; | ||
355 | v.signal = 0xFFFF; | ||
356 | strcpy(v.name, "FM"); | ||
357 | if(copy_to_user(&v, arg, sizeof(v))!=0) | ||
358 | return -EFAULT; | ||
359 | return 0; | ||
360 | } | ||
361 | |||
362 | </programlisting> | ||
363 | <para> | ||
364 | The VIDIOCGTUNER ioctl allows applications to query a tuner. The application | ||
365 | sets the tuner field to the tuner number it wishes to query. The query does | ||
366 | not change the tuner that is being used, it merely enquires about the tuner | ||
367 | in question. | ||
368 | </para> | ||
369 | <para> | ||
370 | We have exactly one tuner so after copying the user buffer to our temporary | ||
371 | structure we complain if they asked for a tuner other than tuner 0. | ||
372 | </para> | ||
373 | <para> | ||
374 | The video_tuner structure has the following fields | ||
375 | </para> | ||
376 | <table frame="all"><title>struct video_tuner fields</title> | ||
377 | <tgroup cols="2" align="left"> | ||
378 | <tbody> | ||
379 | <row> | ||
380 | <entry>int tuner</entry><entry>The number of the tuner in question</entry> | ||
381 | </row><row> | ||
382 | <entry>char name[32]</entry><entry>A text description of this tuner. "FM" will do fine. | ||
383 | This is intended for the application.</entry> | ||
384 | </row><row> | ||
385 | <entry>u32 flags</entry> | ||
386 | <entry>Tuner capability flags</entry> | ||
387 | </row> | ||
388 | <row> | ||
389 | <entry>u16 mode</entry><entry>The current reception mode</entry> | ||
390 | |||
391 | </row><row> | ||
392 | <entry>u16 signal</entry><entry>The signal strength scaled between 0 and 65535. If | ||
393 | a device cannot tell the signal strength it should | ||
394 | report 65535. Many simple cards contain only a | ||
395 | signal/no signal bit. Such cards will report either | ||
396 | 0 or 65535.</entry> | ||
397 | |||
398 | </row><row> | ||
399 | <entry>u32 rangelow, rangehigh</entry><entry> | ||
400 | The range of frequencies supported by the radio | ||
401 | or TV. It is scaled according to the VIDEO_TUNER_LOW | ||
402 | flag.</entry> | ||
403 | |||
404 | </row> | ||
405 | </tbody> | ||
406 | </tgroup> | ||
407 | </table> | ||
408 | |||
409 | <table frame="all"><title>struct video_tuner flags</title> | ||
410 | <tgroup cols="2" align="left"> | ||
411 | <tbody> | ||
412 | <row> | ||
413 | <entry>VIDEO_TUNER_PAL</entry><entry>A PAL TV tuner</entry> | ||
414 | </row><row> | ||
415 | <entry>VIDEO_TUNER_NTSC</entry><entry>An NTSC (US) TV tuner</entry> | ||
416 | </row><row> | ||
417 | <entry>VIDEO_TUNER_SECAM</entry><entry>A SECAM (French) TV tuner</entry> | ||
418 | </row><row> | ||
419 | <entry>VIDEO_TUNER_LOW</entry><entry> | ||
420 | The tuner frequency is scaled in 1/16th of a KHz | ||
421 | steps. If not it is in 1/16th of a MHz steps | ||
422 | </entry> | ||
423 | </row><row> | ||
424 | <entry>VIDEO_TUNER_NORM</entry><entry>The tuner can set its format</entry> | ||
425 | </row><row> | ||
426 | <entry>VIDEO_TUNER_STEREO_ON</entry><entry>The tuner is currently receiving a stereo signal</entry> | ||
427 | </row> | ||
428 | </tbody> | ||
429 | </tgroup> | ||
430 | </table> | ||
431 | |||
432 | <table frame="all"><title>struct video_tuner modes</title> | ||
433 | <tgroup cols="2" align="left"> | ||
434 | <tbody> | ||
435 | <row> | ||
436 | <entry>VIDEO_MODE_PAL</entry><entry>PAL Format</entry> | ||
437 | </row><row> | ||
438 | <entry>VIDEO_MODE_NTSC</entry><entry>NTSC Format (USA)</entry> | ||
439 | </row><row> | ||
440 | <entry>VIDEO_MODE_SECAM</entry><entry>French Format</entry> | ||
441 | </row><row> | ||
442 | <entry>VIDEO_MODE_AUTO</entry><entry>A device that does not need to do | ||
443 | TV format switching</entry> | ||
444 | </row> | ||
445 | </tbody> | ||
446 | </tgroup> | ||
447 | </table> | ||
448 | <para> | ||
449 | The settings for the radio card are thus fairly simple. We report that we | ||
450 | are a tuner called "FM" for FM radio. In order to get the best tuning | ||
451 | resolution we report VIDEO_TUNER_LOW and select tuning to 1/16th of KHz. Its | ||
452 | unlikely our card can do that resolution but it is a fair bet the card can | ||
453 | do better than 1/16th of a MHz. VIDEO_TUNER_LOW is appropriate to almost all | ||
454 | radio usage. | ||
455 | </para> | ||
456 | <para> | ||
457 | We report that the tuner automatically handles deciding what format it is | ||
458 | receiving - true enough as it only handles FM radio. Our example card is | ||
459 | also incapable of detecting stereo or signal strengths so it reports a | ||
460 | strength of 0xFFFF (maximum) and no stereo detected. | ||
461 | </para> | ||
462 | <para> | ||
463 | To finish off we set the range that can be tuned to be 87-108Mhz, the normal | ||
464 | FM broadcast radio range. It is important to find out what the card is | ||
465 | actually capable of tuning. It is easy enough to simply use the FM broadcast | ||
466 | range. Unfortunately if you do this you will discover the FM broadcast | ||
467 | ranges in the USA, Europe and Japan are all subtly different and some users | ||
468 | cannot receive all the stations they wish. | ||
469 | </para> | ||
470 | <para> | ||
471 | The application also needs to be able to set the tuner it wishes to use. In | ||
472 | our case, with a single tuner this is rather simple to arrange. | ||
473 | </para> | ||
474 | <programlisting> | ||
475 | |||
476 | case VIDIOCSTUNER: | ||
477 | { | ||
478 | struct video_tuner v; | ||
479 | if(copy_from_user(&v, arg, sizeof(v))) | ||
480 | return -EFAULT; | ||
481 | if(v.tuner != 0) | ||
482 | return -EINVAL; | ||
483 | return 0; | ||
484 | } | ||
485 | |||
486 | </programlisting> | ||
487 | <para> | ||
488 | We copy the user supplied structure into kernel memory so we can examine it. | ||
489 | If the user has selected a tuner other than zero we reject the request. If | ||
490 | they wanted tuner 0 then, surprisingly enough, that is the current tuner already. | ||
491 | </para> | ||
492 | <para> | ||
493 | The next two ioctls we need to provide are to get and set the frequency of | ||
494 | the radio. These both use an unsigned long argument which is the frequency. | ||
495 | The scale of the frequency depends on the VIDEO_TUNER_LOW flag as I | ||
496 | mentioned earlier on. Since we have VIDEO_TUNER_LOW set this will be in | ||
497 | 1/16ths of a KHz. | ||
498 | </para> | ||
499 | <programlisting> | ||
500 | |||
501 | static unsigned long current_freq; | ||
502 | |||
503 | |||
504 | |||
505 | case VIDIOCGFREQ: | ||
506 | if(copy_to_user(arg, &current_freq, | ||
507 | sizeof(unsigned long)) | ||
508 | return -EFAULT; | ||
509 | return 0; | ||
510 | |||
511 | </programlisting> | ||
512 | <para> | ||
513 | Querying the frequency in our case is relatively simple. Our radio card is | ||
514 | too dumb to let us query the signal strength so we remember our setting if | ||
515 | we know it. All we have to do is copy it to the user. | ||
516 | </para> | ||
517 | <programlisting> | ||
518 | |||
519 | |||
520 | case VIDIOCSFREQ: | ||
521 | { | ||
522 | u32 freq; | ||
523 | if(copy_from_user(arg, &freq, | ||
524 | sizeof(unsigned long))!=0) | ||
525 | return -EFAULT; | ||
526 | if(hardware_set_freq(freq)<0) | ||
527 | return -EINVAL; | ||
528 | current_freq = freq; | ||
529 | return 0; | ||
530 | } | ||
531 | |||
532 | </programlisting> | ||
533 | <para> | ||
534 | Setting the frequency is a little more complex. We begin by copying the | ||
535 | desired frequency into kernel space. Next we call a hardware specific routine | ||
536 | to set the radio up. This might be as simple as some scaling and a few | ||
537 | writes to an I/O port. For most radio cards it turns out a good deal more | ||
538 | complicated and may involve programming things like a phase locked loop on | ||
539 | the card. This is what documentation is for. | ||
540 | </para> | ||
541 | <para> | ||
542 | The final set of operations we need to provide for our radio are the | ||
543 | volume controls. Not all radio cards can even do volume control. After all | ||
544 | there is a perfectly good volume control on the sound card. We will assume | ||
545 | our radio card has a simple 4 step volume control. | ||
546 | </para> | ||
547 | <para> | ||
548 | There are two ioctls with audio we need to support | ||
549 | </para> | ||
550 | <programlisting> | ||
551 | |||
552 | static int current_volume=0; | ||
553 | |||
554 | case VIDIOCGAUDIO: | ||
555 | { | ||
556 | struct video_audio v; | ||
557 | if(copy_from_user(&v, arg, sizeof(v))) | ||
558 | return -EFAULT; | ||
559 | if(v.audio != 0) | ||
560 | return -EINVAL; | ||
561 | v.volume = 16384*current_volume; | ||
562 | v.step = 16384; | ||
563 | strcpy(v.name, "Radio"); | ||
564 | v.mode = VIDEO_SOUND_MONO; | ||
565 | v.balance = 0; | ||
566 | v.base = 0; | ||
567 | v.treble = 0; | ||
568 | |||
569 | if(copy_to_user(arg. &v, sizeof(v))) | ||
570 | return -EFAULT; | ||
571 | return 0; | ||
572 | } | ||
573 | |||
574 | </programlisting> | ||
575 | <para> | ||
576 | Much like the tuner we start by copying the user structure into kernel | ||
577 | space. Again we check if the user has asked for a valid audio input. We have | ||
578 | only input 0 and we punt if they ask for another input. | ||
579 | </para> | ||
580 | <para> | ||
581 | Then we fill in the video_audio structure. This has the following format | ||
582 | </para> | ||
583 | <table frame="all"><title>struct video_audio fields</title> | ||
584 | <tgroup cols="2" align="left"> | ||
585 | <tbody> | ||
586 | <row> | ||
587 | <entry>audio</entry><entry>The input the user wishes to query</entry> | ||
588 | </row><row> | ||
589 | <entry>volume</entry><entry>The volume setting on a scale of 0-65535</entry> | ||
590 | </row><row> | ||
591 | <entry>base</entry><entry>The base level on a scale of 0-65535</entry> | ||
592 | </row><row> | ||
593 | <entry>treble</entry><entry>The treble level on a scale of 0-65535</entry> | ||
594 | </row><row> | ||
595 | <entry>flags</entry><entry>The features this audio device supports | ||
596 | </entry> | ||
597 | </row><row> | ||
598 | <entry>name</entry><entry>A text name to display to the user. We picked | ||
599 | "Radio" as it explains things quite nicely.</entry> | ||
600 | </row><row> | ||
601 | <entry>mode</entry><entry>The current reception mode for the audio | ||
602 | |||
603 | We report MONO because our card is too stupid to know if it is in | ||
604 | mono or stereo. | ||
605 | </entry> | ||
606 | </row><row> | ||
607 | <entry>balance</entry><entry>The stereo balance on a scale of 0-65535, 32768 is | ||
608 | middle.</entry> | ||
609 | </row><row> | ||
610 | <entry>step</entry><entry>The step by which the volume control jumps. This is | ||
611 | used to help make it easy for applications to set | ||
612 | slider behaviour.</entry> | ||
613 | </row> | ||
614 | </tbody> | ||
615 | </tgroup> | ||
616 | </table> | ||
617 | |||
618 | <table frame="all"><title>struct video_audio flags</title> | ||
619 | <tgroup cols="2" align="left"> | ||
620 | <tbody> | ||
621 | <row> | ||
622 | <entry>VIDEO_AUDIO_MUTE</entry><entry>The audio is currently muted. We | ||
623 | could fake this in our driver but we | ||
624 | choose not to bother.</entry> | ||
625 | </row><row> | ||
626 | <entry>VIDEO_AUDIO_MUTABLE</entry><entry>The input has a mute option</entry> | ||
627 | </row><row> | ||
628 | <entry>VIDEO_AUDIO_TREBLE</entry><entry>The input has a treble control</entry> | ||
629 | </row><row> | ||
630 | <entry>VIDEO_AUDIO_BASS</entry><entry>The input has a base control</entry> | ||
631 | </row> | ||
632 | </tbody> | ||
633 | </tgroup> | ||
634 | </table> | ||
635 | |||
636 | <table frame="all"><title>struct video_audio modes</title> | ||
637 | <tgroup cols="2" align="left"> | ||
638 | <tbody> | ||
639 | <row> | ||
640 | <entry>VIDEO_SOUND_MONO</entry><entry>Mono sound</entry> | ||
641 | </row><row> | ||
642 | <entry>VIDEO_SOUND_STEREO</entry><entry>Stereo sound</entry> | ||
643 | </row><row> | ||
644 | <entry>VIDEO_SOUND_LANG1</entry><entry>Alternative language 1 (TV specific)</entry> | ||
645 | </row><row> | ||
646 | <entry>VIDEO_SOUND_LANG2</entry><entry>Alternative language 2 (TV specific)</entry> | ||
647 | </row> | ||
648 | </tbody> | ||
649 | </tgroup> | ||
650 | </table> | ||
651 | <para> | ||
652 | Having filled in the structure we copy it back to user space. | ||
653 | </para> | ||
654 | <para> | ||
655 | The VIDIOCSAUDIO ioctl allows the user to set the audio parameters in the | ||
656 | video_audio structure. The driver does its best to honour the request. | ||
657 | </para> | ||
658 | <programlisting> | ||
659 | |||
660 | case VIDIOCSAUDIO: | ||
661 | { | ||
662 | struct video_audio v; | ||
663 | if(copy_from_user(&v, arg, sizeof(v))) | ||
664 | return -EFAULT; | ||
665 | if(v.audio) | ||
666 | return -EINVAL; | ||
667 | current_volume = v/16384; | ||
668 | hardware_set_volume(current_volume); | ||
669 | return 0; | ||
670 | } | ||
671 | |||
672 | </programlisting> | ||
673 | <para> | ||
674 | In our case there is very little that the user can set. The volume is | ||
675 | basically the limit. Note that we could pretend to have a mute feature | ||
676 | by rewriting this to | ||
677 | </para> | ||
678 | <programlisting> | ||
679 | |||
680 | case VIDIOCSAUDIO: | ||
681 | { | ||
682 | struct video_audio v; | ||
683 | if(copy_from_user(&v, arg, sizeof(v))) | ||
684 | return -EFAULT; | ||
685 | if(v.audio) | ||
686 | return -EINVAL; | ||
687 | current_volume = v/16384; | ||
688 | if(v.flags&VIDEO_AUDIO_MUTE) | ||
689 | hardware_set_volume(0); | ||
690 | else | ||
691 | hardware_set_volume(current_volume); | ||
692 | current_muted = v.flags & | ||
693 | VIDEO_AUDIO_MUTE; | ||
694 | return 0; | ||
695 | } | ||
696 | |||
697 | </programlisting> | ||
698 | <para> | ||
699 | This with the corresponding changes to the VIDIOCGAUDIO code to report the | ||
700 | state of the mute flag we save and to report the card has a mute function, | ||
701 | will allow applications to use a mute facility with this card. It is | ||
702 | questionable whether this is a good idea however. User applications can already | ||
703 | fake this themselves and kernel space is precious. | ||
704 | </para> | ||
705 | <para> | ||
706 | We now have a working radio ioctl handler. So we just wrap up the function | ||
707 | </para> | ||
708 | <programlisting> | ||
709 | |||
710 | |||
711 | } | ||
712 | return -ENOIOCTLCMD; | ||
713 | } | ||
714 | |||
715 | </programlisting> | ||
716 | <para> | ||
717 | and pass the Video4Linux layer back an error so that it knows we did not | ||
718 | understand the request we got passed. | ||
719 | </para> | ||
720 | </sect1> | ||
721 | <sect1 id="modradio"> | ||
722 | <title>Module Wrapper</title> | ||
723 | <para> | ||
724 | Finally we add in the usual module wrapping and the driver is done. | ||
725 | </para> | ||
726 | <programlisting> | ||
727 | |||
728 | #ifndef MODULE | ||
729 | |||
730 | static int io = 0x300; | ||
731 | |||
732 | #else | ||
733 | |||
734 | static int io = -1; | ||
735 | |||
736 | #endif | ||
737 | |||
738 | MODULE_AUTHOR("Alan Cox"); | ||
739 | MODULE_DESCRIPTION("A driver for an imaginary radio card."); | ||
740 | module_param(io, int, 0444); | ||
741 | MODULE_PARM_DESC(io, "I/O address of the card."); | ||
742 | |||
743 | static int __init init(void) | ||
744 | { | ||
745 | if(io==-1) | ||
746 | { | ||
747 | printk(KERN_ERR | ||
748 | "You must set an I/O address with io=0x???\n"); | ||
749 | return -EINVAL; | ||
750 | } | ||
751 | return myradio_init(NULL); | ||
752 | } | ||
753 | |||
754 | static void __exit cleanup(void) | ||
755 | { | ||
756 | video_unregister_device(&my_radio); | ||
757 | release_region(io, MY_IO_SIZE); | ||
758 | } | ||
759 | |||
760 | module_init(init); | ||
761 | module_exit(cleanup); | ||
762 | |||
763 | </programlisting> | ||
764 | <para> | ||
765 | In this example we set the IO base by default if the driver is compiled into | ||
766 | the kernel: you can still set it using "my_radio.irq" if this file is called <filename>my_radio.c</filename>. For the module we require the | ||
767 | user sets the parameter. We set io to a nonsense port (-1) so that we can | ||
768 | tell if the user supplied an io parameter or not. | ||
769 | </para> | ||
770 | <para> | ||
771 | We use MODULE_ defines to give an author for the card driver and a | ||
772 | description. We also use them to declare that io is an integer and it is the | ||
773 | address of the card, and can be read by anyone from sysfs. | ||
774 | </para> | ||
775 | <para> | ||
776 | The clean-up routine unregisters the video_device we registered, and frees | ||
777 | up the I/O space. Note that the unregister takes the actual video_device | ||
778 | structure as its argument. Unlike the file operations structure which can be | ||
779 | shared by all instances of a device a video_device structure as an actual | ||
780 | instance of the device. If you are registering multiple radio devices you | ||
781 | need to fill in one structure per device (most likely by setting up a | ||
782 | template and copying it to each of the actual device structures). | ||
783 | </para> | ||
784 | </sect1> | ||
785 | </chapter> | ||
786 | <chapter> | ||
787 | <title>Video Capture Devices</title> | ||
788 | <sect1 id="introvid"> | ||
789 | <title>Video Capture Device Types</title> | ||
790 | <para> | ||
791 | The video capture devices share the same interfaces as radio devices. In | ||
792 | order to explain the video capture interface I will use the example of a | ||
793 | camera that has no tuners or audio input. This keeps the example relatively | ||
794 | clean. To get both combine the two driver examples. | ||
795 | </para> | ||
796 | <para> | ||
797 | Video capture devices divide into four categories. A little technology | ||
798 | backgrounder. Full motion video even at television resolution (which is | ||
799 | actually fairly low) is pretty resource-intensive. You are continually | ||
800 | passing megabytes of data every second from the capture card to the display. | ||
801 | several alternative approaches have emerged because copying this through the | ||
802 | processor and the user program is a particularly bad idea . | ||
803 | </para> | ||
804 | <para> | ||
805 | The first is to add the television image onto the video output directly. | ||
806 | This is also how some 3D cards work. These basic cards can generally drop the | ||
807 | video into any chosen rectangle of the display. Cards like this, which | ||
808 | include most mpeg1 cards that used the feature connector, aren't very | ||
809 | friendly in a windowing environment. They don't understand windows or | ||
810 | clipping. The video window is always on the top of the display. | ||
811 | </para> | ||
812 | <para> | ||
813 | Chroma keying is a technique used by cards to get around this. It is an old | ||
814 | television mixing trick where you mark all the areas you wish to replace | ||
815 | with a single clear colour that isn't used in the image - TV people use an | ||
816 | incredibly bright blue while computing people often use a particularly | ||
817 | virulent purple. Bright blue occurs on the desktop. Anyone with virulent | ||
818 | purple windows has another problem besides their TV overlay. | ||
819 | </para> | ||
820 | <para> | ||
821 | The third approach is to copy the data from the capture card to the video | ||
822 | card, but to do it directly across the PCI bus. This relieves the processor | ||
823 | from doing the work but does require some smartness on the part of the video | ||
824 | capture chip, as well as a suitable video card. Programming this kind of | ||
825 | card and more so debugging it can be extremely tricky. There are some quite | ||
826 | complicated interactions with the display and you may also have to cope with | ||
827 | various chipset bugs that show up when PCI cards start talking to each | ||
828 | other. | ||
829 | </para> | ||
830 | <para> | ||
831 | To keep our example fairly simple we will assume a card that supports | ||
832 | overlaying a flat rectangular image onto the frame buffer output, and which | ||
833 | can also capture stuff into processor memory. | ||
834 | </para> | ||
835 | </sect1> | ||
836 | <sect1 id="regvid"> | ||
837 | <title>Registering Video Capture Devices</title> | ||
838 | <para> | ||
839 | This time we need to add more functions for our camera device. | ||
840 | </para> | ||
841 | <programlisting> | ||
842 | static struct video_device my_camera | ||
843 | { | ||
844 | "My Camera", | ||
845 | VID_TYPE_OVERLAY|VID_TYPE_SCALES|\ | ||
846 | VID_TYPE_CAPTURE|VID_TYPE_CHROMAKEY, | ||
847 | VID_HARDWARE_MYCAMERA, | ||
848 | camera_open. | ||
849 | camera_close, | ||
850 | camera_read, /* no read */ | ||
851 | NULL, /* no write */ | ||
852 | camera_poll, /* no poll */ | ||
853 | camera_ioctl, | ||
854 | NULL, /* no special init function */ | ||
855 | NULL /* no private data */ | ||
856 | }; | ||
857 | </programlisting> | ||
858 | <para> | ||
859 | We need a read() function which is used for capturing data from | ||
860 | the card, and we need a poll function so that a driver can wait for the next | ||
861 | frame to be captured. | ||
862 | </para> | ||
863 | <para> | ||
864 | We use the extra video capability flags that did not apply to the | ||
865 | radio interface. The video related flags are | ||
866 | </para> | ||
867 | <table frame="all"><title>Capture Capabilities</title> | ||
868 | <tgroup cols="2" align="left"> | ||
869 | <tbody> | ||
870 | <row> | ||
871 | <entry>VID_TYPE_CAPTURE</entry><entry>We support image capture</entry> | ||
872 | </row><row> | ||
873 | <entry>VID_TYPE_TELETEXT</entry><entry>A teletext capture device (vbi{n])</entry> | ||
874 | </row><row> | ||
875 | <entry>VID_TYPE_OVERLAY</entry><entry>The image can be directly overlaid onto the | ||
876 | frame buffer</entry> | ||
877 | </row><row> | ||
878 | <entry>VID_TYPE_CHROMAKEY</entry><entry>Chromakey can be used to select which parts | ||
879 | of the image to display</entry> | ||
880 | </row><row> | ||
881 | <entry>VID_TYPE_CLIPPING</entry><entry>It is possible to give the board a list of | ||
882 | rectangles to draw around. </entry> | ||
883 | </row><row> | ||
884 | <entry>VID_TYPE_FRAMERAM</entry><entry>The video capture goes into the video memory | ||
885 | and actually changes it. Applications need | ||
886 | to know this so they can clean up after the | ||
887 | card</entry> | ||
888 | </row><row> | ||
889 | <entry>VID_TYPE_SCALES</entry><entry>The image can be scaled to various sizes, | ||
890 | rather than being a single fixed size.</entry> | ||
891 | </row><row> | ||
892 | <entry>VID_TYPE_MONOCHROME</entry><entry>The capture will be monochrome. This isn't a | ||
893 | complete answer to the question since a mono | ||
894 | camera on a colour capture card will still | ||
895 | produce mono output.</entry> | ||
896 | </row><row> | ||
897 | <entry>VID_TYPE_SUBCAPTURE</entry><entry>The card allows only part of its field of | ||
898 | view to be captured. This enables | ||
899 | applications to avoid copying all of a large | ||
900 | image into memory when only some section is | ||
901 | relevant.</entry> | ||
902 | </row> | ||
903 | </tbody> | ||
904 | </tgroup> | ||
905 | </table> | ||
906 | <para> | ||
907 | We set VID_TYPE_CAPTURE so that we are seen as a capture card, | ||
908 | VID_TYPE_CHROMAKEY so the application knows it is time to draw in virulent | ||
909 | purple, and VID_TYPE_SCALES because we can be resized. | ||
910 | </para> | ||
911 | <para> | ||
912 | Our setup is fairly similar. This time we also want an interrupt line | ||
913 | for the 'frame captured' signal. Not all cards have this so some of them | ||
914 | cannot handle poll(). | ||
915 | </para> | ||
916 | <programlisting> | ||
917 | |||
918 | |||
919 | static int io = 0x320; | ||
920 | static int irq = 11; | ||
921 | |||
922 | int __init mycamera_init(struct video_init *v) | ||
923 | { | ||
924 | if(!request_region(io, MY_IO_SIZE, "mycamera")) | ||
925 | { | ||
926 | printk(KERN_ERR | ||
927 | "mycamera: port 0x%03X is in use.\n", io); | ||
928 | return -EBUSY; | ||
929 | } | ||
930 | |||
931 | if(video_device_register(&my_camera, | ||
932 | VFL_TYPE_GRABBER)==-1) { | ||
933 | release_region(io, MY_IO_SIZE); | ||
934 | return -EINVAL; | ||
935 | } | ||
936 | return 0; | ||
937 | } | ||
938 | |||
939 | </programlisting> | ||
940 | <para> | ||
941 | This is little changed from the needs of the radio card. We specify | ||
942 | VFL_TYPE_GRABBER this time as we want to be allocated a /dev/video name. | ||
943 | </para> | ||
944 | </sect1> | ||
945 | <sect1 id="opvid"> | ||
946 | <title>Opening And Closing The Capture Device</title> | ||
947 | <programlisting> | ||
948 | |||
949 | |||
950 | static int users = 0; | ||
951 | |||
952 | static int camera_open(stuct video_device *dev, int flags) | ||
953 | { | ||
954 | if(users) | ||
955 | return -EBUSY; | ||
956 | if(request_irq(irq, camera_irq, 0, "camera", dev)<0) | ||
957 | return -EBUSY; | ||
958 | users++; | ||
959 | return 0; | ||
960 | } | ||
961 | |||
962 | |||
963 | static int camera_close(struct video_device *dev) | ||
964 | { | ||
965 | users--; | ||
966 | free_irq(irq, dev); | ||
967 | } | ||
968 | </programlisting> | ||
969 | <para> | ||
970 | The open and close routines are also quite similar. The only real change is | ||
971 | that we now request an interrupt for the camera device interrupt line. If we | ||
972 | cannot get the interrupt we report EBUSY to the application and give up. | ||
973 | </para> | ||
974 | </sect1> | ||
975 | <sect1 id="irqvid"> | ||
976 | <title>Interrupt Handling</title> | ||
977 | <para> | ||
978 | Our example handler is for an ISA bus device. If it was PCI you would be | ||
979 | able to share the interrupt and would have set SA_SHIRQ to indicate a | ||
980 | shared IRQ. We pass the device pointer as the interrupt routine argument. We | ||
981 | don't need to since we only support one card but doing this will make it | ||
982 | easier to upgrade the driver for multiple devices in the future. | ||
983 | </para> | ||
984 | <para> | ||
985 | Our interrupt routine needs to do little if we assume the card can simply | ||
986 | queue one frame to be read after it captures it. | ||
987 | </para> | ||
988 | <programlisting> | ||
989 | |||
990 | |||
991 | static struct wait_queue *capture_wait; | ||
992 | static int capture_ready = 0; | ||
993 | |||
994 | static void camera_irq(int irq, void *dev_id, | ||
995 | struct pt_regs *regs) | ||
996 | { | ||
997 | capture_ready=1; | ||
998 | wake_up_interruptible(&capture_wait); | ||
999 | } | ||
1000 | </programlisting> | ||
1001 | <para> | ||
1002 | The interrupt handler is nice and simple for this card as we are assuming | ||
1003 | the card is buffering the frame for us. This means we have little to do but | ||
1004 | wake up anybody interested. We also set a capture_ready flag, as we may | ||
1005 | capture a frame before an application needs it. In this case we need to know | ||
1006 | that a frame is ready. If we had to collect the frame on the interrupt life | ||
1007 | would be more complex. | ||
1008 | </para> | ||
1009 | <para> | ||
1010 | The two new routines we need to supply are camera_read which returns a | ||
1011 | frame, and camera_poll which waits for a frame to become ready. | ||
1012 | </para> | ||
1013 | <programlisting> | ||
1014 | |||
1015 | |||
1016 | static int camera_poll(struct video_device *dev, | ||
1017 | struct file *file, struct poll_table *wait) | ||
1018 | { | ||
1019 | poll_wait(file, &capture_wait, wait); | ||
1020 | if(capture_read) | ||
1021 | return POLLIN|POLLRDNORM; | ||
1022 | return 0; | ||
1023 | } | ||
1024 | |||
1025 | </programlisting> | ||
1026 | <para> | ||
1027 | Our wait queue for polling is the capture_wait queue. This will cause the | ||
1028 | task to be woken up by our camera_irq routine. We check capture_read to see | ||
1029 | if there is an image present and if so report that it is readable. | ||
1030 | </para> | ||
1031 | </sect1> | ||
1032 | <sect1 id="rdvid"> | ||
1033 | <title>Reading The Video Image</title> | ||
1034 | <programlisting> | ||
1035 | |||
1036 | |||
1037 | static long camera_read(struct video_device *dev, char *buf, | ||
1038 | unsigned long count) | ||
1039 | { | ||
1040 | struct wait_queue wait = { current, NULL }; | ||
1041 | u8 *ptr; | ||
1042 | int len; | ||
1043 | int i; | ||
1044 | |||
1045 | add_wait_queue(&capture_wait, &wait); | ||
1046 | |||
1047 | while(!capture_ready) | ||
1048 | { | ||
1049 | if(file->flags&O_NDELAY) | ||
1050 | { | ||
1051 | remove_wait_queue(&capture_wait, &wait); | ||
1052 | current->state = TASK_RUNNING; | ||
1053 | return -EWOULDBLOCK; | ||
1054 | } | ||
1055 | if(signal_pending(current)) | ||
1056 | { | ||
1057 | remove_wait_queue(&capture_wait, &wait); | ||
1058 | current->state = TASK_RUNNING; | ||
1059 | return -ERESTARTSYS; | ||
1060 | } | ||
1061 | schedule(); | ||
1062 | current->state = TASK_INTERRUPTIBLE; | ||
1063 | } | ||
1064 | remove_wait_queue(&capture_wait, &wait); | ||
1065 | current->state = TASK_RUNNING; | ||
1066 | |||
1067 | </programlisting> | ||
1068 | <para> | ||
1069 | The first thing we have to do is to ensure that the application waits until | ||
1070 | the next frame is ready. The code here is almost identical to the mouse code | ||
1071 | we used earlier in this chapter. It is one of the common building blocks of | ||
1072 | Linux device driver code and probably one which you will find occurs in any | ||
1073 | drivers you write. | ||
1074 | </para> | ||
1075 | <para> | ||
1076 | We wait for a frame to be ready, or for a signal to interrupt our waiting. If a | ||
1077 | signal occurs we need to return from the system call so that the signal can | ||
1078 | be sent to the application itself. We also check to see if the user actually | ||
1079 | wanted to avoid waiting - ie if they are using non-blocking I/O and have other things | ||
1080 | to get on with. | ||
1081 | </para> | ||
1082 | <para> | ||
1083 | Next we copy the data from the card to the user application. This is rarely | ||
1084 | as easy as our example makes out. We will add capture_w, and capture_h here | ||
1085 | to hold the width and height of the captured image. We assume the card only | ||
1086 | supports 24bit RGB for now. | ||
1087 | </para> | ||
1088 | <programlisting> | ||
1089 | |||
1090 | |||
1091 | |||
1092 | capture_ready = 0; | ||
1093 | |||
1094 | ptr=(u8 *)buf; | ||
1095 | len = capture_w * 3 * capture_h; /* 24bit RGB */ | ||
1096 | |||
1097 | if(len>count) | ||
1098 | len=count; /* Doesn't all fit */ | ||
1099 | |||
1100 | for(i=0; i<len; i++) | ||
1101 | { | ||
1102 | put_user(inb(io+IMAGE_DATA), ptr); | ||
1103 | ptr++; | ||
1104 | } | ||
1105 | |||
1106 | hardware_restart_capture(); | ||
1107 | |||
1108 | return i; | ||
1109 | } | ||
1110 | |||
1111 | </programlisting> | ||
1112 | <para> | ||
1113 | For a real hardware device you would try to avoid the loop with put_user(). | ||
1114 | Each call to put_user() has a time overhead checking whether the accesses to user | ||
1115 | space are allowed. It would be better to read a line into a temporary buffer | ||
1116 | then copy this to user space in one go. | ||
1117 | </para> | ||
1118 | <para> | ||
1119 | Having captured the image and put it into user space we can kick the card to | ||
1120 | get the next frame acquired. | ||
1121 | </para> | ||
1122 | </sect1> | ||
1123 | <sect1 id="iocvid"> | ||
1124 | <title>Video Ioctl Handling</title> | ||
1125 | <para> | ||
1126 | As with the radio driver the major control interface is via the ioctl() | ||
1127 | function. Video capture devices support the same tuner calls as a radio | ||
1128 | device and also support additional calls to control how the video functions | ||
1129 | are handled. In this simple example the card has no tuners to avoid making | ||
1130 | the code complex. | ||
1131 | </para> | ||
1132 | <programlisting> | ||
1133 | |||
1134 | |||
1135 | |||
1136 | static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg) | ||
1137 | { | ||
1138 | switch(cmd) | ||
1139 | { | ||
1140 | case VIDIOCGCAP: | ||
1141 | { | ||
1142 | struct video_capability v; | ||
1143 | v.type = VID_TYPE_CAPTURE|\ | ||
1144 | VID_TYPE_CHROMAKEY|\ | ||
1145 | VID_TYPE_SCALES|\ | ||
1146 | VID_TYPE_OVERLAY; | ||
1147 | v.channels = 1; | ||
1148 | v.audios = 0; | ||
1149 | v.maxwidth = 640; | ||
1150 | v.minwidth = 16; | ||
1151 | v.maxheight = 480; | ||
1152 | v.minheight = 16; | ||
1153 | strcpy(v.name, "My Camera"); | ||
1154 | if(copy_to_user(arg, &v, sizeof(v))) | ||
1155 | return -EFAULT; | ||
1156 | return 0; | ||
1157 | } | ||
1158 | |||
1159 | |||
1160 | </programlisting> | ||
1161 | <para> | ||
1162 | The first ioctl we must support and which all video capture and radio | ||
1163 | devices are required to support is VIDIOCGCAP. This behaves exactly the same | ||
1164 | as with a radio device. This time, however, we report the extra capabilities | ||
1165 | we outlined earlier on when defining our video_dev structure. | ||
1166 | </para> | ||
1167 | <para> | ||
1168 | We now set the video flags saying that we support overlay, capture, | ||
1169 | scaling and chromakey. We also report size limits - our smallest image is | ||
1170 | 16x16 pixels, our largest is 640x480. | ||
1171 | </para> | ||
1172 | <para> | ||
1173 | To keep things simple we report no audio and no tuning capabilities at all. | ||
1174 | </para> | ||
1175 | <programlisting> | ||
1176 | |||
1177 | case VIDIOCGCHAN: | ||
1178 | { | ||
1179 | struct video_channel v; | ||
1180 | if(copy_from_user(&v, arg, sizeof(v))) | ||
1181 | return -EFAULT; | ||
1182 | if(v.channel != 0) | ||
1183 | return -EINVAL; | ||
1184 | v.flags = 0; | ||
1185 | v.tuners = 0; | ||
1186 | v.type = VIDEO_TYPE_CAMERA; | ||
1187 | v.norm = VIDEO_MODE_AUTO; | ||
1188 | strcpy(v.name, "Camera Input");break; | ||
1189 | if(copy_to_user(&v, arg, sizeof(v))) | ||
1190 | return -EFAULT; | ||
1191 | return 0; | ||
1192 | } | ||
1193 | |||
1194 | |||
1195 | </programlisting> | ||
1196 | <para> | ||
1197 | This follows what is very much the standard way an ioctl handler looks | ||
1198 | in Linux. We copy the data into a kernel space variable and we check that the | ||
1199 | request is valid (in this case that the input is 0). Finally we copy the | ||
1200 | camera info back to the user. | ||
1201 | </para> | ||
1202 | <para> | ||
1203 | The VIDIOCGCHAN ioctl allows a user to ask about video channels (that is | ||
1204 | inputs to the video card). Our example card has a single camera input. The | ||
1205 | fields in the structure are | ||
1206 | </para> | ||
1207 | <table frame="all"><title>struct video_channel fields</title> | ||
1208 | <tgroup cols="2" align="left"> | ||
1209 | <tbody> | ||
1210 | <row> | ||
1211 | |||
1212 | <entry>channel</entry><entry>The channel number we are selecting</entry> | ||
1213 | </row><row> | ||
1214 | <entry>name</entry><entry>The name for this channel. This is intended | ||
1215 | to describe the port to the user. | ||
1216 | Appropriate names are therefore things like | ||
1217 | "Camera" "SCART input"</entry> | ||
1218 | </row><row> | ||
1219 | <entry>flags</entry><entry>Channel properties</entry> | ||
1220 | </row><row> | ||
1221 | <entry>type</entry><entry>Input type</entry> | ||
1222 | </row><row> | ||
1223 | <entry>norm</entry><entry>The current television encoding being used | ||
1224 | if relevant for this channel. | ||
1225 | </entry> | ||
1226 | </row> | ||
1227 | </tbody> | ||
1228 | </tgroup> | ||
1229 | </table> | ||
1230 | <table frame="all"><title>struct video_channel flags</title> | ||
1231 | <tgroup cols="2" align="left"> | ||
1232 | <tbody> | ||
1233 | <row> | ||
1234 | <entry>VIDEO_VC_TUNER</entry><entry>Channel has a tuner.</entry> | ||
1235 | </row><row> | ||
1236 | <entry>VIDEO_VC_AUDIO</entry><entry>Channel has audio.</entry> | ||
1237 | </row> | ||
1238 | </tbody> | ||
1239 | </tgroup> | ||
1240 | </table> | ||
1241 | <table frame="all"><title>struct video_channel types</title> | ||
1242 | <tgroup cols="2" align="left"> | ||
1243 | <tbody> | ||
1244 | <row> | ||
1245 | <entry>VIDEO_TYPE_TV</entry><entry>Television input.</entry> | ||
1246 | </row><row> | ||
1247 | <entry>VIDEO_TYPE_CAMERA</entry><entry>Fixed camera input.</entry> | ||
1248 | </row><row> | ||
1249 | <entry>0</entry><entry>Type is unknown.</entry> | ||
1250 | </row> | ||
1251 | </tbody> | ||
1252 | </tgroup> | ||
1253 | </table> | ||
1254 | <table frame="all"><title>struct video_channel norms</title> | ||
1255 | <tgroup cols="2" align="left"> | ||
1256 | <tbody> | ||
1257 | <row> | ||
1258 | <entry>VIDEO_MODE_PAL</entry><entry>PAL encoded Television</entry> | ||
1259 | </row><row> | ||
1260 | <entry>VIDEO_MODE_NTSC</entry><entry>NTSC (US) encoded Television</entry> | ||
1261 | </row><row> | ||
1262 | <entry>VIDEO_MODE_SECAM</entry><entry>SECAM (French) Television </entry> | ||
1263 | </row><row> | ||
1264 | <entry>VIDEO_MODE_AUTO</entry><entry>Automatic switching, or format does not | ||
1265 | matter</entry> | ||
1266 | </row> | ||
1267 | </tbody> | ||
1268 | </tgroup> | ||
1269 | </table> | ||
1270 | <para> | ||
1271 | The corresponding VIDIOCSCHAN ioctl allows a user to change channel and to | ||
1272 | request the norm is changed - for example to switch between a PAL or an NTSC | ||
1273 | format camera. | ||
1274 | </para> | ||
1275 | <programlisting> | ||
1276 | |||
1277 | |||
1278 | case VIDIOCSCHAN: | ||
1279 | { | ||
1280 | struct video_channel v; | ||
1281 | if(copy_from_user(&v, arg, sizeof(v))) | ||
1282 | return -EFAULT; | ||
1283 | if(v.channel != 0) | ||
1284 | return -EINVAL; | ||
1285 | if(v.norm != VIDEO_MODE_AUTO) | ||
1286 | return -EINVAL; | ||
1287 | return 0; | ||
1288 | } | ||
1289 | |||
1290 | |||
1291 | </programlisting> | ||
1292 | <para> | ||
1293 | The implementation of this call in our driver is remarkably easy. Because we | ||
1294 | are assuming fixed format hardware we need only check that the user has not | ||
1295 | tried to change anything. | ||
1296 | </para> | ||
1297 | <para> | ||
1298 | The user also needs to be able to configure and adjust the picture they are | ||
1299 | seeing. This is much like adjusting a television set. A user application | ||
1300 | also needs to know the palette being used so that it knows how to display | ||
1301 | the image that has been captured. The VIDIOCGPICT and VIDIOCSPICT ioctl | ||
1302 | calls provide this information. | ||
1303 | </para> | ||
1304 | <programlisting> | ||
1305 | |||
1306 | |||
1307 | case VIDIOCGPICT | ||
1308 | { | ||
1309 | struct video_picture v; | ||
1310 | v.brightness = hardware_brightness(); | ||
1311 | v.hue = hardware_hue(); | ||
1312 | v.colour = hardware_saturation(); | ||
1313 | v.contrast = hardware_brightness(); | ||
1314 | /* Not settable */ | ||
1315 | v.whiteness = 32768; | ||
1316 | v.depth = 24; /* 24bit */ | ||
1317 | v.palette = VIDEO_PALETTE_RGB24; | ||
1318 | if(copy_to_user(&v, arg, | ||
1319 | sizeof(v))) | ||
1320 | return -EFAULT; | ||
1321 | return 0; | ||
1322 | } | ||
1323 | |||
1324 | |||
1325 | </programlisting> | ||
1326 | <para> | ||
1327 | The brightness, hue, color, and contrast provide the picture controls that | ||
1328 | are akin to a conventional television. Whiteness provides additional | ||
1329 | control for greyscale images. All of these values are scaled between 0-65535 | ||
1330 | and have 32768 as the mid point setting. The scaling means that applications | ||
1331 | do not have to worry about the capability range of the hardware but can let | ||
1332 | it make a best effort attempt. | ||
1333 | </para> | ||
1334 | <para> | ||
1335 | Our depth is 24, as this is in bits. We will be returning RGB24 format. This | ||
1336 | has one byte of red, then one of green, then one of blue. This then repeats | ||
1337 | for every other pixel in the image. The other common formats the interface | ||
1338 | defines are | ||
1339 | </para> | ||
1340 | <table frame="all"><title>Framebuffer Encodings</title> | ||
1341 | <tgroup cols="2" align="left"> | ||
1342 | <tbody> | ||
1343 | <row> | ||
1344 | <entry>GREY</entry><entry>Linear greyscale. This is for simple cameras and the | ||
1345 | like</entry> | ||
1346 | </row><row> | ||
1347 | <entry>RGB565</entry><entry>The top 5 bits hold 32 red levels, the next six bits | ||
1348 | hold green and the low 5 bits hold blue. </entry> | ||
1349 | </row><row> | ||
1350 | <entry>RGB555</entry><entry>The top bit is clear. The red green and blue levels | ||
1351 | each occupy five bits.</entry> | ||
1352 | </row> | ||
1353 | </tbody> | ||
1354 | </tgroup> | ||
1355 | </table> | ||
1356 | <para> | ||
1357 | Additional modes are support for YUV capture formats. These are common for | ||
1358 | TV and video conferencing applications. | ||
1359 | </para> | ||
1360 | <para> | ||
1361 | The VIDIOCSPICT ioctl allows a user to set some of the picture parameters. | ||
1362 | Exactly which ones are supported depends heavily on the card itself. It is | ||
1363 | possible to support many modes and effects in software. In general doing | ||
1364 | this in the kernel is a bad idea. Video capture is a performance-sensitive | ||
1365 | application and the programs can often do better if they aren't being | ||
1366 | 'helped' by an overkeen driver writer. Thus for our device we will report | ||
1367 | RGB24 only and refuse to allow a change. | ||
1368 | </para> | ||
1369 | <programlisting> | ||
1370 | |||
1371 | |||
1372 | case VIDIOCSPICT: | ||
1373 | { | ||
1374 | struct video_picture v; | ||
1375 | if(copy_from_user(&v, arg, sizeof(v))) | ||
1376 | return -EFAULT; | ||
1377 | if(v.depth!=24 || | ||
1378 | v.palette != VIDEO_PALETTE_RGB24) | ||
1379 | return -EINVAL; | ||
1380 | set_hardware_brightness(v.brightness); | ||
1381 | set_hardware_hue(v.hue); | ||
1382 | set_hardware_saturation(v.colour); | ||
1383 | set_hardware_brightness(v.contrast); | ||
1384 | return 0; | ||
1385 | } | ||
1386 | |||
1387 | |||
1388 | </programlisting> | ||
1389 | <para> | ||
1390 | We check the user has not tried to change the palette or the depth. We do | ||
1391 | not want to carry out some of the changes and then return an error. This may | ||
1392 | confuse the application which will be assuming no change occurred. | ||
1393 | </para> | ||
1394 | <para> | ||
1395 | In much the same way as you need to be able to set the picture controls to | ||
1396 | get the right capture images, many cards need to know what they are | ||
1397 | displaying onto when generating overlay output. In some cases getting this | ||
1398 | wrong even makes a nasty mess or may crash the computer. For that reason | ||
1399 | the VIDIOCSBUF ioctl used to set up the frame buffer information may well | ||
1400 | only be usable by root. | ||
1401 | </para> | ||
1402 | <para> | ||
1403 | We will assume our card is one of the old ISA devices with feature connector | ||
1404 | and only supports a couple of standard video modes. Very common for older | ||
1405 | cards although the PCI devices are way smarter than this. | ||
1406 | </para> | ||
1407 | <programlisting> | ||
1408 | |||
1409 | |||
1410 | static struct video_buffer capture_fb; | ||
1411 | |||
1412 | case VIDIOCGFBUF: | ||
1413 | { | ||
1414 | if(copy_to_user(arg, &capture_fb, | ||
1415 | sizeof(capture_fb))) | ||
1416 | return -EFAULT; | ||
1417 | return 0; | ||
1418 | |||
1419 | } | ||
1420 | |||
1421 | |||
1422 | </programlisting> | ||
1423 | <para> | ||
1424 | We keep the frame buffer information in the format the ioctl uses. This | ||
1425 | makes it nice and easy to work with in the ioctl calls. | ||
1426 | </para> | ||
1427 | <programlisting> | ||
1428 | |||
1429 | case VIDIOCSFBUF: | ||
1430 | { | ||
1431 | struct video_buffer v; | ||
1432 | |||
1433 | if(!capable(CAP_SYS_ADMIN)) | ||
1434 | return -EPERM; | ||
1435 | |||
1436 | if(copy_from_user(&v, arg, sizeof(v))) | ||
1437 | return -EFAULT; | ||
1438 | if(v.width!=320 && v.width!=640) | ||
1439 | return -EINVAL; | ||
1440 | if(v.height!=200 && v.height!=240 | ||
1441 | && v.height!=400 | ||
1442 | && v.height !=480) | ||
1443 | return -EINVAL; | ||
1444 | memcpy(&capture_fb, &v, sizeof(v)); | ||
1445 | hardware_set_fb(&v); | ||
1446 | return 0; | ||
1447 | } | ||
1448 | |||
1449 | |||
1450 | |||
1451 | </programlisting> | ||
1452 | <para> | ||
1453 | The capable() function checks a user has the required capability. The Linux | ||
1454 | operating system has a set of about 30 capabilities indicating privileged | ||
1455 | access to services. The default set up gives the superuser (uid 0) all of | ||
1456 | them and nobody else has any. | ||
1457 | </para> | ||
1458 | <para> | ||
1459 | We check that the user has the SYS_ADMIN capability, that is they are | ||
1460 | allowed to operate as the machine administrator. We don't want anyone but | ||
1461 | the administrator making a mess of the display. | ||
1462 | </para> | ||
1463 | <para> | ||
1464 | Next we check for standard PC video modes (320 or 640 wide with either | ||
1465 | EGA or VGA depths). If the mode is not a standard video mode we reject it as | ||
1466 | not supported by our card. If the mode is acceptable we save it so that | ||
1467 | VIDIOCFBUF will give the right answer next time it is called. The | ||
1468 | hardware_set_fb() function is some undescribed card specific function to | ||
1469 | program the card for the desired mode. | ||
1470 | </para> | ||
1471 | <para> | ||
1472 | Before the driver can display an overlay window it needs to know where the | ||
1473 | window should be placed, and also how large it should be. If the card | ||
1474 | supports clipping it needs to know which rectangles to omit from the | ||
1475 | display. The video_window structure is used to describe the way the image | ||
1476 | should be displayed. | ||
1477 | </para> | ||
1478 | <table frame="all"><title>struct video_window fields</title> | ||
1479 | <tgroup cols="2" align="left"> | ||
1480 | <tbody> | ||
1481 | <row> | ||
1482 | <entry>width</entry><entry>The width in pixels of the desired image. The card | ||
1483 | may use a smaller size if this size is not available</entry> | ||
1484 | </row><row> | ||
1485 | <entry>height</entry><entry>The height of the image. The card may use a smaller | ||
1486 | size if this size is not available.</entry> | ||
1487 | </row><row> | ||
1488 | <entry>x</entry><entry> The X position of the top left of the window. This | ||
1489 | is in pixels relative to the left hand edge of the | ||
1490 | picture. Not all cards can display images aligned on | ||
1491 | any pixel boundary. If the position is unsuitable | ||
1492 | the card adjusts the image right and reduces the | ||
1493 | width.</entry> | ||
1494 | </row><row> | ||
1495 | <entry>y</entry><entry> The Y position of the top left of the window. This | ||
1496 | is counted in pixels relative to the top edge of the | ||
1497 | picture. As with the width if the card cannot | ||
1498 | display starting on this line it will adjust the | ||
1499 | values.</entry> | ||
1500 | </row><row> | ||
1501 | <entry>chromakey</entry><entry>The colour (expressed in RGB32 format) for the | ||
1502 | chromakey colour if chroma keying is being used. </entry> | ||
1503 | </row><row> | ||
1504 | <entry>clips</entry><entry>An array of rectangles that must not be drawn | ||
1505 | over.</entry> | ||
1506 | </row><row> | ||
1507 | <entry>clipcount</entry><entry>The number of clips in this array.</entry> | ||
1508 | </row> | ||
1509 | </tbody> | ||
1510 | </tgroup> | ||
1511 | </table> | ||
1512 | <para> | ||
1513 | Each clip is a struct video_clip which has the following fields | ||
1514 | </para> | ||
1515 | <table frame="all"><title>video_clip fields</title> | ||
1516 | <tgroup cols="2" align="left"> | ||
1517 | <tbody> | ||
1518 | <row> | ||
1519 | <entry>x, y</entry><entry>Co-ordinates relative to the display</entry> | ||
1520 | </row><row> | ||
1521 | <entry>width, height</entry><entry>Width and height in pixels</entry> | ||
1522 | </row><row> | ||
1523 | <entry>next</entry><entry>A spare field for the application to use</entry> | ||
1524 | </row> | ||
1525 | </tbody> | ||
1526 | </tgroup> | ||
1527 | </table> | ||
1528 | <para> | ||
1529 | The driver is required to ensure it always draws in the area requested or a smaller area, and that it never draws in any of the areas that are clipped. | ||
1530 | This may well mean it has to leave alone. small areas the application wished to be | ||
1531 | drawn. | ||
1532 | </para> | ||
1533 | <para> | ||
1534 | Our example card uses chromakey so does not have to address most of the | ||
1535 | clipping. We will add a video_window structure to our global variables to | ||
1536 | remember our parameters, as we did with the frame buffer. | ||
1537 | </para> | ||
1538 | <programlisting> | ||
1539 | |||
1540 | |||
1541 | case VIDIOCGWIN: | ||
1542 | { | ||
1543 | if(copy_to_user(arg, &capture_win, | ||
1544 | sizeof(capture_win))) | ||
1545 | return -EFAULT; | ||
1546 | return 0; | ||
1547 | } | ||
1548 | |||
1549 | |||
1550 | case VIDIOCSWIN: | ||
1551 | { | ||
1552 | struct video_window v; | ||
1553 | if(copy_from_user(&v, arg, sizeof(v))) | ||
1554 | return -EFAULT; | ||
1555 | if(v.width > 640 || v.height > 480) | ||
1556 | return -EINVAL; | ||
1557 | if(v.width < 16 || v.height < 16) | ||
1558 | return -EINVAL; | ||
1559 | hardware_set_key(v.chromakey); | ||
1560 | hardware_set_window(v); | ||
1561 | memcpy(&capture_win, &v, sizeof(v)); | ||
1562 | capture_w = v.width; | ||
1563 | capture_h = v.height; | ||
1564 | return 0; | ||
1565 | } | ||
1566 | |||
1567 | |||
1568 | </programlisting> | ||
1569 | <para> | ||
1570 | Because we are using Chromakey our setup is fairly simple. Mostly we have to | ||
1571 | check the values are sane and load them into the capture card. | ||
1572 | </para> | ||
1573 | <para> | ||
1574 | With all the setup done we can now turn on the actual capture/overlay. This | ||
1575 | is done with the VIDIOCCAPTURE ioctl. This takes a single integer argument | ||
1576 | where 0 is on and 1 is off. | ||
1577 | </para> | ||
1578 | <programlisting> | ||
1579 | |||
1580 | |||
1581 | case VIDIOCCAPTURE: | ||
1582 | { | ||
1583 | int v; | ||
1584 | if(get_user(v, (int *)arg)) | ||
1585 | return -EFAULT; | ||
1586 | if(v==0) | ||
1587 | hardware_capture_off(); | ||
1588 | else | ||
1589 | { | ||
1590 | if(capture_fb.width == 0 | ||
1591 | || capture_w == 0) | ||
1592 | return -EINVAL; | ||
1593 | hardware_capture_on(); | ||
1594 | } | ||
1595 | return 0; | ||
1596 | } | ||
1597 | |||
1598 | |||
1599 | </programlisting> | ||
1600 | <para> | ||
1601 | We grab the flag from user space and either enable or disable according to | ||
1602 | its value. There is one small corner case we have to consider here. Suppose | ||
1603 | that the capture was requested before the video window or the frame buffer | ||
1604 | had been set up. In those cases there will be unconfigured fields in our | ||
1605 | card data, as well as unconfigured hardware settings. We check for this case and | ||
1606 | return an error if the frame buffer or the capture window width is zero. | ||
1607 | </para> | ||
1608 | <programlisting> | ||
1609 | |||
1610 | |||
1611 | default: | ||
1612 | return -ENOIOCTLCMD; | ||
1613 | } | ||
1614 | } | ||
1615 | </programlisting> | ||
1616 | <para> | ||
1617 | |||
1618 | We don't need to support any other ioctls, so if we get this far, it is time | ||
1619 | to tell the video layer that we don't now what the user is talking about. | ||
1620 | </para> | ||
1621 | </sect1> | ||
1622 | <sect1 id="endvid"> | ||
1623 | <title>Other Functionality</title> | ||
1624 | <para> | ||
1625 | The Video4Linux layer supports additional features, including a high | ||
1626 | performance mmap() based capture mode and capturing part of the image. | ||
1627 | These features are out of the scope of the book. You should however have enough | ||
1628 | example code to implement most simple video4linux devices for radio and TV | ||
1629 | cards. | ||
1630 | </para> | ||
1631 | </sect1> | ||
1632 | </chapter> | ||
1633 | <chapter id="bugs"> | ||
1634 | <title>Known Bugs And Assumptions</title> | ||
1635 | <para> | ||
1636 | <variablelist> | ||
1637 | <varlistentry><term>Multiple Opens</term> | ||
1638 | <listitem> | ||
1639 | <para> | ||
1640 | The driver assumes multiple opens should not be allowed. A driver | ||
1641 | can work around this but not cleanly. | ||
1642 | </para> | ||
1643 | </listitem></varlistentry> | ||
1644 | |||
1645 | <varlistentry><term>API Deficiencies</term> | ||
1646 | <listitem> | ||
1647 | <para> | ||
1648 | The existing API poorly reflects compression capable devices. There | ||
1649 | are plans afoot to merge V4L, V4L2 and some other ideas into a | ||
1650 | better interface. | ||
1651 | </para> | ||
1652 | </listitem></varlistentry> | ||
1653 | </variablelist> | ||
1654 | |||
1655 | </para> | ||
1656 | </chapter> | ||
1657 | |||
1658 | <chapter id="pubfunctions"> | ||
1659 | <title>Public Functions Provided</title> | ||
1660 | !Edrivers/media/video/videodev.c | ||
1661 | </chapter> | ||
1662 | |||
1663 | </book> | ||
diff --git a/Documentation/DocBook/wanbook.tmpl b/Documentation/DocBook/wanbook.tmpl new file mode 100644 index 000000000000..9eebcc304de4 --- /dev/null +++ b/Documentation/DocBook/wanbook.tmpl | |||
@@ -0,0 +1,99 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="WANGuide"> | ||
6 | <bookinfo> | ||
7 | <title>Synchronous PPP and Cisco HDLC Programming Guide</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Alan</firstname> | ||
12 | <surname>Cox</surname> | ||
13 | <affiliation> | ||
14 | <address> | ||
15 | <email>alan@redhat.com</email> | ||
16 | </address> | ||
17 | </affiliation> | ||
18 | </author> | ||
19 | </authorgroup> | ||
20 | |||
21 | <copyright> | ||
22 | <year>2000</year> | ||
23 | <holder>Alan Cox</holder> | ||
24 | </copyright> | ||
25 | |||
26 | <legalnotice> | ||
27 | <para> | ||
28 | This documentation is free software; you can redistribute | ||
29 | it and/or modify it under the terms of the GNU General Public | ||
30 | License as published by the Free Software Foundation; either | ||
31 | version 2 of the License, or (at your option) any later | ||
32 | version. | ||
33 | </para> | ||
34 | |||
35 | <para> | ||
36 | This program is distributed in the hope that it will be | ||
37 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
38 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
39 | See the GNU General Public License for more details. | ||
40 | </para> | ||
41 | |||
42 | <para> | ||
43 | You should have received a copy of the GNU General Public | ||
44 | License along with this program; if not, write to the Free | ||
45 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
46 | MA 02111-1307 USA | ||
47 | </para> | ||
48 | |||
49 | <para> | ||
50 | For more details see the file COPYING in the source | ||
51 | distribution of Linux. | ||
52 | </para> | ||
53 | </legalnotice> | ||
54 | </bookinfo> | ||
55 | |||
56 | <toc></toc> | ||
57 | |||
58 | <chapter id="intro"> | ||
59 | <title>Introduction</title> | ||
60 | <para> | ||
61 | The syncppp drivers in Linux provide a fairly complete | ||
62 | implementation of Cisco HDLC and a minimal implementation of | ||
63 | PPP. The longer term goal is to switch the PPP layer to the | ||
64 | generic PPP interface that is new in Linux 2.3.x. The API should | ||
65 | remain unchanged when this is done, but support will then be | ||
66 | available for IPX, compression and other PPP features | ||
67 | </para> | ||
68 | </chapter> | ||
69 | <chapter id="bugs"> | ||
70 | <title>Known Bugs And Assumptions</title> | ||
71 | <para> | ||
72 | <variablelist> | ||
73 | <varlistentry><term>PPP is minimal</term> | ||
74 | <listitem> | ||
75 | <para> | ||
76 | The current PPP implementation is very basic, although sufficient | ||
77 | for most wan usages. | ||
78 | </para> | ||
79 | </listitem></varlistentry> | ||
80 | |||
81 | <varlistentry><term>Cisco HDLC Quirks</term> | ||
82 | <listitem> | ||
83 | <para> | ||
84 | Currently we do not end all packets with the correct Cisco multicast | ||
85 | or unicast flags. Nothing appears to mind too much but this should | ||
86 | be corrected. | ||
87 | </para> | ||
88 | </listitem></varlistentry> | ||
89 | </variablelist> | ||
90 | |||
91 | </para> | ||
92 | </chapter> | ||
93 | |||
94 | <chapter id="pubfunctions"> | ||
95 | <title>Public Functions Provided</title> | ||
96 | !Edrivers/net/wan/syncppp.c | ||
97 | </chapter> | ||
98 | |||
99 | </book> | ||
diff --git a/Documentation/DocBook/writing_usb_driver.tmpl b/Documentation/DocBook/writing_usb_driver.tmpl new file mode 100644 index 000000000000..51f3bfb6fb6e --- /dev/null +++ b/Documentation/DocBook/writing_usb_driver.tmpl | |||
@@ -0,0 +1,419 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="USBDeviceDriver"> | ||
6 | <bookinfo> | ||
7 | <title>Writing USB Device Drivers</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Greg</firstname> | ||
12 | <surname>Kroah-Hartman</surname> | ||
13 | <affiliation> | ||
14 | <address> | ||
15 | <email>greg@kroah.com</email> | ||
16 | </address> | ||
17 | </affiliation> | ||
18 | </author> | ||
19 | </authorgroup> | ||
20 | |||
21 | <copyright> | ||
22 | <year>2001-2002</year> | ||
23 | <holder>Greg Kroah-Hartman</holder> | ||
24 | </copyright> | ||
25 | |||
26 | <legalnotice> | ||
27 | <para> | ||
28 | This documentation is free software; you can redistribute | ||
29 | it and/or modify it under the terms of the GNU General Public | ||
30 | License as published by the Free Software Foundation; either | ||
31 | version 2 of the License, or (at your option) any later | ||
32 | version. | ||
33 | </para> | ||
34 | |||
35 | <para> | ||
36 | This program is distributed in the hope that it will be | ||
37 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
38 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
39 | See the GNU General Public License for more details. | ||
40 | </para> | ||
41 | |||
42 | <para> | ||
43 | You should have received a copy of the GNU General Public | ||
44 | License along with this program; if not, write to the Free | ||
45 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
46 | MA 02111-1307 USA | ||
47 | </para> | ||
48 | |||
49 | <para> | ||
50 | For more details see the file COPYING in the source | ||
51 | distribution of Linux. | ||
52 | </para> | ||
53 | |||
54 | <para> | ||
55 | This documentation is based on an article published in | ||
56 | Linux Journal Magazine, October 2001, Issue 90. | ||
57 | </para> | ||
58 | </legalnotice> | ||
59 | </bookinfo> | ||
60 | |||
61 | <toc></toc> | ||
62 | |||
63 | <chapter id="intro"> | ||
64 | <title>Introduction</title> | ||
65 | <para> | ||
66 | The Linux USB subsystem has grown from supporting only two different | ||
67 | types of devices in the 2.2.7 kernel (mice and keyboards), to over 20 | ||
68 | different types of devices in the 2.4 kernel. Linux currently supports | ||
69 | almost all USB class devices (standard types of devices like keyboards, | ||
70 | mice, modems, printers and speakers) and an ever-growing number of | ||
71 | vendor-specific devices (such as USB to serial converters, digital | ||
72 | cameras, Ethernet devices and MP3 players). For a full list of the | ||
73 | different USB devices currently supported, see Resources. | ||
74 | </para> | ||
75 | <para> | ||
76 | The remaining kinds of USB devices that do not have support on Linux are | ||
77 | almost all vendor-specific devices. Each vendor decides to implement a | ||
78 | custom protocol to talk to their device, so a custom driver usually needs | ||
79 | to be created. Some vendors are open with their USB protocols and help | ||
80 | with the creation of Linux drivers, while others do not publish them, and | ||
81 | developers are forced to reverse-engineer. See Resources for some links | ||
82 | to handy reverse-engineering tools. | ||
83 | </para> | ||
84 | <para> | ||
85 | Because each different protocol causes a new driver to be created, I have | ||
86 | written a generic USB driver skeleton, modeled after the pci-skeleton.c | ||
87 | file in the kernel source tree upon which many PCI network drivers have | ||
88 | been based. This USB skeleton can be found at drivers/usb/usb-skeleton.c | ||
89 | in the kernel source tree. In this article I will walk through the basics | ||
90 | of the skeleton driver, explaining the different pieces and what needs to | ||
91 | be done to customize it to your specific device. | ||
92 | </para> | ||
93 | </chapter> | ||
94 | |||
95 | <chapter id="basics"> | ||
96 | <title>Linux USB Basics</title> | ||
97 | <para> | ||
98 | If you are going to write a Linux USB driver, please become familiar with | ||
99 | the USB protocol specification. It can be found, along with many other | ||
100 | useful documents, at the USB home page (see Resources). An excellent | ||
101 | introduction to the Linux USB subsystem can be found at the USB Working | ||
102 | Devices List (see Resources). It explains how the Linux USB subsystem is | ||
103 | structured and introduces the reader to the concept of USB urbs, which | ||
104 | are essential to USB drivers. | ||
105 | </para> | ||
106 | <para> | ||
107 | The first thing a Linux USB driver needs to do is register itself with | ||
108 | the Linux USB subsystem, giving it some information about which devices | ||
109 | the driver supports and which functions to call when a device supported | ||
110 | by the driver is inserted or removed from the system. All of this | ||
111 | information is passed to the USB subsystem in the usb_driver structure. | ||
112 | The skeleton driver declares a usb_driver as: | ||
113 | </para> | ||
114 | <programlisting> | ||
115 | static struct usb_driver skel_driver = { | ||
116 | .name = "skeleton", | ||
117 | .probe = skel_probe, | ||
118 | .disconnect = skel_disconnect, | ||
119 | .fops = &skel_fops, | ||
120 | .minor = USB_SKEL_MINOR_BASE, | ||
121 | .id_table = skel_table, | ||
122 | }; | ||
123 | </programlisting> | ||
124 | <para> | ||
125 | The variable name is a string that describes the driver. It is used in | ||
126 | informational messages printed to the system log. The probe and | ||
127 | disconnect function pointers are called when a device that matches the | ||
128 | information provided in the id_table variable is either seen or removed. | ||
129 | </para> | ||
130 | <para> | ||
131 | The fops and minor variables are optional. Most USB drivers hook into | ||
132 | another kernel subsystem, such as the SCSI, network or TTY subsystem. | ||
133 | These types of drivers register themselves with the other kernel | ||
134 | subsystem, and any user-space interactions are provided through that | ||
135 | interface. But for drivers that do not have a matching kernel subsystem, | ||
136 | such as MP3 players or scanners, a method of interacting with user space | ||
137 | is needed. The USB subsystem provides a way to register a minor device | ||
138 | number and a set of file_operations function pointers that enable this | ||
139 | user-space interaction. The skeleton driver needs this kind of interface, | ||
140 | so it provides a minor starting number and a pointer to its | ||
141 | file_operations functions. | ||
142 | </para> | ||
143 | <para> | ||
144 | The USB driver is then registered with a call to usb_register, usually in | ||
145 | the driver's init function, as shown here: | ||
146 | </para> | ||
147 | <programlisting> | ||
148 | static int __init usb_skel_init(void) | ||
149 | { | ||
150 | int result; | ||
151 | |||
152 | /* register this driver with the USB subsystem */ | ||
153 | result = usb_register(&skel_driver); | ||
154 | if (result < 0) { | ||
155 | err("usb_register failed for the "__FILE__ "driver." | ||
156 | "Error number %d", result); | ||
157 | return -1; | ||
158 | } | ||
159 | |||
160 | return 0; | ||
161 | } | ||
162 | module_init(usb_skel_init); | ||
163 | </programlisting> | ||
164 | <para> | ||
165 | When the driver is unloaded from the system, it needs to unregister | ||
166 | itself with the USB subsystem. This is done with the usb_unregister | ||
167 | function: | ||
168 | </para> | ||
169 | <programlisting> | ||
170 | static void __exit usb_skel_exit(void) | ||
171 | { | ||
172 | /* deregister this driver with the USB subsystem */ | ||
173 | usb_deregister(&skel_driver); | ||
174 | } | ||
175 | module_exit(usb_skel_exit); | ||
176 | </programlisting> | ||
177 | <para> | ||
178 | To enable the linux-hotplug system to load the driver automatically when | ||
179 | the device is plugged in, you need to create a MODULE_DEVICE_TABLE. The | ||
180 | following code tells the hotplug scripts that this module supports a | ||
181 | single device with a specific vendor and product ID: | ||
182 | </para> | ||
183 | <programlisting> | ||
184 | /* table of devices that work with this driver */ | ||
185 | static struct usb_device_id skel_table [] = { | ||
186 | { USB_DEVICE(USB_SKEL_VENDOR_ID, USB_SKEL_PRODUCT_ID) }, | ||
187 | { } /* Terminating entry */ | ||
188 | }; | ||
189 | MODULE_DEVICE_TABLE (usb, skel_table); | ||
190 | </programlisting> | ||
191 | <para> | ||
192 | There are other macros that can be used in describing a usb_device_id for | ||
193 | drivers that support a whole class of USB drivers. See usb.h for more | ||
194 | information on this. | ||
195 | </para> | ||
196 | </chapter> | ||
197 | |||
198 | <chapter id="device"> | ||
199 | <title>Device operation</title> | ||
200 | <para> | ||
201 | When a device is plugged into the USB bus that matches the device ID | ||
202 | pattern that your driver registered with the USB core, the probe function | ||
203 | is called. The usb_device structure, interface number and the interface ID | ||
204 | are passed to the function: | ||
205 | </para> | ||
206 | <programlisting> | ||
207 | static int skel_probe(struct usb_interface *interface, | ||
208 | const struct usb_device_id *id) | ||
209 | </programlisting> | ||
210 | <para> | ||
211 | The driver now needs to verify that this device is actually one that it | ||
212 | can accept. If so, it returns 0. | ||
213 | If not, or if any error occurs during initialization, an errorcode | ||
214 | (such as <literal>-ENOMEM</literal> or <literal>-ENODEV</literal>) | ||
215 | is returned from the probe function. | ||
216 | </para> | ||
217 | <para> | ||
218 | In the skeleton driver, we determine what end points are marked as bulk-in | ||
219 | and bulk-out. We create buffers to hold the data that will be sent and | ||
220 | received from the device, and a USB urb to write data to the device is | ||
221 | initialized. | ||
222 | </para> | ||
223 | <para> | ||
224 | Conversely, when the device is removed from the USB bus, the disconnect | ||
225 | function is called with the device pointer. The driver needs to clean any | ||
226 | private data that has been allocated at this time and to shut down any | ||
227 | pending urbs that are in the USB system. The driver also unregisters | ||
228 | itself from the devfs subsystem with the call: | ||
229 | </para> | ||
230 | <programlisting> | ||
231 | /* remove our devfs node */ | ||
232 | devfs_unregister(skel->devfs); | ||
233 | </programlisting> | ||
234 | <para> | ||
235 | Now that the device is plugged into the system and the driver is bound to | ||
236 | the device, any of the functions in the file_operations structure that | ||
237 | were passed to the USB subsystem will be called from a user program trying | ||
238 | to talk to the device. The first function called will be open, as the | ||
239 | program tries to open the device for I/O. We increment our private usage | ||
240 | count and save off a pointer to our internal structure in the file | ||
241 | structure. This is done so that future calls to file operations will | ||
242 | enable the driver to determine which device the user is addressing. All | ||
243 | of this is done with the following code: | ||
244 | </para> | ||
245 | <programlisting> | ||
246 | /* increment our usage count for the module */ | ||
247 | ++skel->open_count; | ||
248 | |||
249 | /* save our object in the file's private structure */ | ||
250 | file->private_data = dev; | ||
251 | </programlisting> | ||
252 | <para> | ||
253 | After the open function is called, the read and write functions are called | ||
254 | to receive and send data to the device. In the skel_write function, we | ||
255 | receive a pointer to some data that the user wants to send to the device | ||
256 | and the size of the data. The function determines how much data it can | ||
257 | send to the device based on the size of the write urb it has created (this | ||
258 | size depends on the size of the bulk out end point that the device has). | ||
259 | Then it copies the data from user space to kernel space, points the urb to | ||
260 | the data and submits the urb to the USB subsystem. This can be shown in | ||
261 | he following code: | ||
262 | </para> | ||
263 | <programlisting> | ||
264 | /* we can only write as much as 1 urb will hold */ | ||
265 | bytes_written = (count > skel->bulk_out_size) ? skel->bulk_out_size : count; | ||
266 | |||
267 | /* copy the data from user space into our urb */ | ||
268 | copy_from_user(skel->write_urb->transfer_buffer, buffer, bytes_written); | ||
269 | |||
270 | /* set up our urb */ | ||
271 | usb_fill_bulk_urb(skel->write_urb, | ||
272 | skel->dev, | ||
273 | usb_sndbulkpipe(skel->dev, skel->bulk_out_endpointAddr), | ||
274 | skel->write_urb->transfer_buffer, | ||
275 | bytes_written, | ||
276 | skel_write_bulk_callback, | ||
277 | skel); | ||
278 | |||
279 | /* send the data out the bulk port */ | ||
280 | result = usb_submit_urb(skel->write_urb); | ||
281 | if (result) { | ||
282 | err("Failed submitting write urb, error %d", result); | ||
283 | } | ||
284 | </programlisting> | ||
285 | <para> | ||
286 | When the write urb is filled up with the proper information using the | ||
287 | usb_fill_bulk_urb function, we point the urb's completion callback to call our | ||
288 | own skel_write_bulk_callback function. This function is called when the | ||
289 | urb is finished by the USB subsystem. The callback function is called in | ||
290 | interrupt context, so caution must be taken not to do very much processing | ||
291 | at that time. Our implementation of skel_write_bulk_callback merely | ||
292 | reports if the urb was completed successfully or not and then returns. | ||
293 | </para> | ||
294 | <para> | ||
295 | The read function works a bit differently from the write function in that | ||
296 | we do not use an urb to transfer data from the device to the driver. | ||
297 | Instead we call the usb_bulk_msg function, which can be used to send or | ||
298 | receive data from a device without having to create urbs and handle | ||
299 | urb completion callback functions. We call the usb_bulk_msg function, | ||
300 | giving it a buffer into which to place any data received from the device | ||
301 | and a timeout value. If the timeout period expires without receiving any | ||
302 | data from the device, the function will fail and return an error message. | ||
303 | This can be shown with the following code: | ||
304 | </para> | ||
305 | <programlisting> | ||
306 | /* do an immediate bulk read to get data from the device */ | ||
307 | retval = usb_bulk_msg (skel->dev, | ||
308 | usb_rcvbulkpipe (skel->dev, | ||
309 | skel->bulk_in_endpointAddr), | ||
310 | skel->bulk_in_buffer, | ||
311 | skel->bulk_in_size, | ||
312 | &count, HZ*10); | ||
313 | /* if the read was successful, copy the data to user space */ | ||
314 | if (!retval) { | ||
315 | if (copy_to_user (buffer, skel->bulk_in_buffer, count)) | ||
316 | retval = -EFAULT; | ||
317 | else | ||
318 | retval = count; | ||
319 | } | ||
320 | </programlisting> | ||
321 | <para> | ||
322 | The usb_bulk_msg function can be very useful for doing single reads or | ||
323 | writes to a device; however, if you need to read or write constantly to a | ||
324 | device, it is recommended to set up your own urbs and submit them to the | ||
325 | USB subsystem. | ||
326 | </para> | ||
327 | <para> | ||
328 | When the user program releases the file handle that it has been using to | ||
329 | talk to the device, the release function in the driver is called. In this | ||
330 | function we decrement our private usage count and wait for possible | ||
331 | pending writes: | ||
332 | </para> | ||
333 | <programlisting> | ||
334 | /* decrement our usage count for the device */ | ||
335 | --skel->open_count; | ||
336 | </programlisting> | ||
337 | <para> | ||
338 | One of the more difficult problems that USB drivers must be able to handle | ||
339 | smoothly is the fact that the USB device may be removed from the system at | ||
340 | any point in time, even if a program is currently talking to it. It needs | ||
341 | to be able to shut down any current reads and writes and notify the | ||
342 | user-space programs that the device is no longer there. The following | ||
343 | code (function <function>skel_delete</function>) | ||
344 | is an example of how to do this: </para> | ||
345 | <programlisting> | ||
346 | static inline void skel_delete (struct usb_skel *dev) | ||
347 | { | ||
348 | if (dev->bulk_in_buffer != NULL) | ||
349 | kfree (dev->bulk_in_buffer); | ||
350 | if (dev->bulk_out_buffer != NULL) | ||
351 | usb_buffer_free (dev->udev, dev->bulk_out_size, | ||
352 | dev->bulk_out_buffer, | ||
353 | dev->write_urb->transfer_dma); | ||
354 | if (dev->write_urb != NULL) | ||
355 | usb_free_urb (dev->write_urb); | ||
356 | kfree (dev); | ||
357 | } | ||
358 | </programlisting> | ||
359 | <para> | ||
360 | If a program currently has an open handle to the device, we reset the flag | ||
361 | <literal>device_present</literal>. For | ||
362 | every read, write, release and other functions that expect a device to be | ||
363 | present, the driver first checks this flag to see if the device is | ||
364 | still present. If not, it releases that the device has disappeared, and a | ||
365 | -ENODEV error is returned to the user-space program. When the release | ||
366 | function is eventually called, it determines if there is no device | ||
367 | and if not, it does the cleanup that the skel_disconnect | ||
368 | function normally does if there are no open files on the device (see | ||
369 | Listing 5). | ||
370 | </para> | ||
371 | </chapter> | ||
372 | |||
373 | <chapter id="iso"> | ||
374 | <title>Isochronous Data</title> | ||
375 | <para> | ||
376 | This usb-skeleton driver does not have any examples of interrupt or | ||
377 | isochronous data being sent to or from the device. Interrupt data is sent | ||
378 | almost exactly as bulk data is, with a few minor exceptions. Isochronous | ||
379 | data works differently with continuous streams of data being sent to or | ||
380 | from the device. The audio and video camera drivers are very good examples | ||
381 | of drivers that handle isochronous data and will be useful if you also | ||
382 | need to do this. | ||
383 | </para> | ||
384 | </chapter> | ||
385 | |||
386 | <chapter id="Conclusion"> | ||
387 | <title>Conclusion</title> | ||
388 | <para> | ||
389 | Writing Linux USB device drivers is not a difficult task as the | ||
390 | usb-skeleton driver shows. This driver, combined with the other current | ||
391 | USB drivers, should provide enough examples to help a beginning author | ||
392 | create a working driver in a minimal amount of time. The linux-usb-devel | ||
393 | mailing list archives also contain a lot of helpful information. | ||
394 | </para> | ||
395 | </chapter> | ||
396 | |||
397 | <chapter id="resources"> | ||
398 | <title>Resources</title> | ||
399 | <para> | ||
400 | The Linux USB Project: <ulink url="http://www.linux-usb.org">http://www.linux-usb.org/</ulink> | ||
401 | </para> | ||
402 | <para> | ||
403 | Linux Hotplug Project: <ulink url="http://linux-hotplug.sourceforge.net">http://linux-hotplug.sourceforge.net/</ulink> | ||
404 | </para> | ||
405 | <para> | ||
406 | Linux USB Working Devices List: <ulink url="http://www.qbik.ch/usb/devices">http://www.qbik.ch/usb/devices/</ulink> | ||
407 | </para> | ||
408 | <para> | ||
409 | linux-usb-devel Mailing List Archives: <ulink url="http://marc.theaimsgroup.com/?l=linux-usb-devel">http://marc.theaimsgroup.com/?l=linux-usb-devel</ulink> | ||
410 | </para> | ||
411 | <para> | ||
412 | Programming Guide for Linux USB Device Drivers: <ulink url="http://usb.cs.tum.edu/usbdoc">http://usb.cs.tum.edu/usbdoc</ulink> | ||
413 | </para> | ||
414 | <para> | ||
415 | USB Home Page: <ulink url="http://www.usb.org">http://www.usb.org</ulink> | ||
416 | </para> | ||
417 | </chapter> | ||
418 | |||
419 | </book> | ||
diff --git a/Documentation/DocBook/z8530book.tmpl b/Documentation/DocBook/z8530book.tmpl new file mode 100644 index 000000000000..a507876447aa --- /dev/null +++ b/Documentation/DocBook/z8530book.tmpl | |||
@@ -0,0 +1,385 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="Z85230Guide"> | ||
6 | <bookinfo> | ||
7 | <title>Z8530 Programming Guide</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Alan</firstname> | ||
12 | <surname>Cox</surname> | ||
13 | <affiliation> | ||
14 | <address> | ||
15 | <email>alan@redhat.com</email> | ||
16 | </address> | ||
17 | </affiliation> | ||
18 | </author> | ||
19 | </authorgroup> | ||
20 | |||
21 | <copyright> | ||
22 | <year>2000</year> | ||
23 | <holder>Alan Cox</holder> | ||
24 | </copyright> | ||
25 | |||
26 | <legalnotice> | ||
27 | <para> | ||
28 | This documentation is free software; you can redistribute | ||
29 | it and/or modify it under the terms of the GNU General Public | ||
30 | License as published by the Free Software Foundation; either | ||
31 | version 2 of the License, or (at your option) any later | ||
32 | version. | ||
33 | </para> | ||
34 | |||
35 | <para> | ||
36 | This program is distributed in the hope that it will be | ||
37 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
38 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
39 | See the GNU General Public License for more details. | ||
40 | </para> | ||
41 | |||
42 | <para> | ||
43 | You should have received a copy of the GNU General Public | ||
44 | License along with this program; if not, write to the Free | ||
45 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
46 | MA 02111-1307 USA | ||
47 | </para> | ||
48 | |||
49 | <para> | ||
50 | For more details see the file COPYING in the source | ||
51 | distribution of Linux. | ||
52 | </para> | ||
53 | </legalnotice> | ||
54 | </bookinfo> | ||
55 | |||
56 | <toc></toc> | ||
57 | |||
58 | <chapter id="intro"> | ||
59 | <title>Introduction</title> | ||
60 | <para> | ||
61 | The Z85x30 family synchronous/asynchronous controller chips are | ||
62 | used on a large number of cheap network interface cards. The | ||
63 | kernel provides a core interface layer that is designed to make | ||
64 | it easy to provide WAN services using this chip. | ||
65 | </para> | ||
66 | <para> | ||
67 | The current driver only support synchronous operation. Merging the | ||
68 | asynchronous driver support into this code to allow any Z85x30 | ||
69 | device to be used as both a tty interface and as a synchronous | ||
70 | controller is a project for Linux post the 2.4 release | ||
71 | </para> | ||
72 | <para> | ||
73 | The support code handles most common card configurations and | ||
74 | supports running both Cisco HDLC and Synchronous PPP. With extra | ||
75 | glue the frame relay and X.25 protocols can also be used with this | ||
76 | driver. | ||
77 | </para> | ||
78 | </chapter> | ||
79 | |||
80 | <chapter> | ||
81 | <title>Driver Modes</title> | ||
82 | <para> | ||
83 | The Z85230 driver layer can drive Z8530, Z85C30 and Z85230 devices | ||
84 | in three different modes. Each mode can be applied to an individual | ||
85 | channel on the chip (each chip has two channels). | ||
86 | </para> | ||
87 | <para> | ||
88 | The PIO synchronous mode supports the most common Z8530 wiring. Here | ||
89 | the chip is interface to the I/O and interrupt facilities of the | ||
90 | host machine but not to the DMA subsystem. When running PIO the | ||
91 | Z8530 has extremely tight timing requirements. Doing high speeds, | ||
92 | even with a Z85230 will be tricky. Typically you should expect to | ||
93 | achieve at best 9600 baud with a Z8C530 and 64Kbits with a Z85230. | ||
94 | </para> | ||
95 | <para> | ||
96 | The DMA mode supports the chip when it is configured to use dual DMA | ||
97 | channels on an ISA bus. The better cards tend to support this mode | ||
98 | of operation for a single channel. With DMA running the Z85230 tops | ||
99 | out when it starts to hit ISA DMA constraints at about 512Kbits. It | ||
100 | is worth noting here that many PC machines hang or crash when the | ||
101 | chip is driven fast enough to hold the ISA bus solid. | ||
102 | </para> | ||
103 | <para> | ||
104 | Transmit DMA mode uses a single DMA channel. The DMA channel is used | ||
105 | for transmission as the transmit FIFO is smaller than the receive | ||
106 | FIFO. it gives better performance than pure PIO mode but is nowhere | ||
107 | near as ideal as pure DMA mode. | ||
108 | </para> | ||
109 | </chapter> | ||
110 | |||
111 | <chapter> | ||
112 | <title>Using the Z85230 driver</title> | ||
113 | <para> | ||
114 | The Z85230 driver provides the back end interface to your board. To | ||
115 | configure a Z8530 interface you need to detect the board and to | ||
116 | identify its ports and interrupt resources. It is also your problem | ||
117 | to verify the resources are available. | ||
118 | </para> | ||
119 | <para> | ||
120 | Having identified the chip you need to fill in a struct z8530_dev, | ||
121 | which describes each chip. This object must exist until you finally | ||
122 | shutdown the board. Firstly zero the active field. This ensures | ||
123 | nothing goes off without you intending it. The irq field should | ||
124 | be set to the interrupt number of the chip. (Each chip has a single | ||
125 | interrupt source rather than each channel). You are responsible | ||
126 | for allocating the interrupt line. The interrupt handler should be | ||
127 | set to <function>z8530_interrupt</function>. The device id should | ||
128 | be set to the z8530_dev structure pointer. Whether the interrupt can | ||
129 | be shared or not is board dependent, and up to you to initialise. | ||
130 | </para> | ||
131 | <para> | ||
132 | The structure holds two channel structures. | ||
133 | Initialise chanA.ctrlio and chanA.dataio with the address of the | ||
134 | control and data ports. You can or this with Z8530_PORT_SLEEP to | ||
135 | indicate your interface needs the 5uS delay for chip settling done | ||
136 | in software. The PORT_SLEEP option is architecture specific. Other | ||
137 | flags may become available on future platforms, eg for MMIO. | ||
138 | Initialise the chanA.irqs to &z8530_nop to start the chip up | ||
139 | as disabled and discarding interrupt events. This ensures that | ||
140 | stray interrupts will be mopped up and not hang the bus. Set | ||
141 | chanA.dev to point to the device structure itself. The | ||
142 | private and name field you may use as you wish. The private field | ||
143 | is unused by the Z85230 layer. The name is used for error reporting | ||
144 | and it may thus make sense to make it match the network name. | ||
145 | </para> | ||
146 | <para> | ||
147 | Repeat the same operation with the B channel if your chip has | ||
148 | both channels wired to something useful. This isn't always the | ||
149 | case. If it is not wired then the I/O values do not matter, but | ||
150 | you must initialise chanB.dev. | ||
151 | </para> | ||
152 | <para> | ||
153 | If your board has DMA facilities then initialise the txdma and | ||
154 | rxdma fields for the relevant channels. You must also allocate the | ||
155 | ISA DMA channels and do any necessary board level initialisation | ||
156 | to configure them. The low level driver will do the Z8530 and | ||
157 | DMA controller programming but not board specific magic. | ||
158 | </para> | ||
159 | <para> | ||
160 | Having initialised the device you can then call | ||
161 | <function>z8530_init</function>. This will probe the chip and | ||
162 | reset it into a known state. An identification sequence is then | ||
163 | run to identify the chip type. If the checks fail to pass the | ||
164 | function returns a non zero error code. Typically this indicates | ||
165 | that the port given is not valid. After this call the | ||
166 | type field of the z8530_dev structure is initialised to either | ||
167 | Z8530, Z85C30 or Z85230 according to the chip found. | ||
168 | </para> | ||
169 | <para> | ||
170 | Once you have called z8530_init you can also make use of the utility | ||
171 | function <function>z8530_describe</function>. This provides a | ||
172 | consistent reporting format for the Z8530 devices, and allows all | ||
173 | the drivers to provide consistent reporting. | ||
174 | </para> | ||
175 | </chapter> | ||
176 | |||
177 | <chapter> | ||
178 | <title>Attaching Network Interfaces</title> | ||
179 | <para> | ||
180 | If you wish to use the network interface facilities of the driver, | ||
181 | then you need to attach a network device to each channel that is | ||
182 | present and in use. In addition to use the SyncPPP and Cisco HDLC | ||
183 | you need to follow some additional plumbing rules. They may seem | ||
184 | complex but a look at the example hostess_sv11 driver should | ||
185 | reassure you. | ||
186 | </para> | ||
187 | <para> | ||
188 | The network device used for each channel should be pointed to by | ||
189 | the netdevice field of each channel. The dev-> priv field of the | ||
190 | network device points to your private data - you will need to be | ||
191 | able to find your ppp device from this. In addition to use the | ||
192 | sync ppp layer the private data must start with a void * pointer | ||
193 | to the syncppp structures. | ||
194 | </para> | ||
195 | <para> | ||
196 | The way most drivers approach this particular problem is to | ||
197 | create a structure holding the Z8530 device definition and | ||
198 | put that and the syncppp pointer into the private field of | ||
199 | the network device. The network device fields of the channels | ||
200 | then point back to the network devices. The ppp_device can also | ||
201 | be put in the private structure conveniently. | ||
202 | </para> | ||
203 | <para> | ||
204 | If you wish to use the synchronous ppp then you need to attach | ||
205 | the syncppp layer to the network device. You should do this before | ||
206 | you register the network device. The | ||
207 | <function>sppp_attach</function> requires that the first void * | ||
208 | pointer in your private data is pointing to an empty struct | ||
209 | ppp_device. The function fills in the initial data for the | ||
210 | ppp/hdlc layer. | ||
211 | </para> | ||
212 | <para> | ||
213 | Before you register your network device you will also need to | ||
214 | provide suitable handlers for most of the network device callbacks. | ||
215 | See the network device documentation for more details on this. | ||
216 | </para> | ||
217 | </chapter> | ||
218 | |||
219 | <chapter> | ||
220 | <title>Configuring And Activating The Port</title> | ||
221 | <para> | ||
222 | The Z85230 driver provides helper functions and tables to load the | ||
223 | port registers on the Z8530 chips. When programming the register | ||
224 | settings for a channel be aware that the documentation recommends | ||
225 | initialisation orders. Strange things happen when these are not | ||
226 | followed. | ||
227 | </para> | ||
228 | <para> | ||
229 | <function>z8530_channel_load</function> takes an array of | ||
230 | pairs of initialisation values in an array of u8 type. The first | ||
231 | value is the Z8530 register number. Add 16 to indicate the alternate | ||
232 | register bank on the later chips. The array is terminated by a 255. | ||
233 | </para> | ||
234 | <para> | ||
235 | The driver provides a pair of public tables. The | ||
236 | z8530_hdlc_kilostream table is for the UK 'Kilostream' service and | ||
237 | also happens to cover most other end host configurations. The | ||
238 | z8530_hdlc_kilostream_85230 table is the same configuration using | ||
239 | the enhancements of the 85230 chip. The configuration loaded is | ||
240 | standard NRZ encoded synchronous data with HDLC bitstuffing. All | ||
241 | of the timing is taken from the other end of the link. | ||
242 | </para> | ||
243 | <para> | ||
244 | When writing your own tables be aware that the driver internally | ||
245 | tracks register values. It may need to reload values. You should | ||
246 | therefore be sure to set registers 1-7, 9-11, 14 and 15 in all | ||
247 | configurations. Where the register settings depend on DMA selection | ||
248 | the driver will update the bits itself when you open or close. | ||
249 | Loading a new table with the interface open is not recommended. | ||
250 | </para> | ||
251 | <para> | ||
252 | There are three standard configurations supported by the core | ||
253 | code. In PIO mode the interface is programmed up to use | ||
254 | interrupt driven PIO. This places high demands on the host processor | ||
255 | to avoid latency. The driver is written to take account of latency | ||
256 | issues but it cannot avoid latencies caused by other drivers, | ||
257 | notably IDE in PIO mode. Because the drivers allocate buffers you | ||
258 | must also prevent MTU changes while the port is open. | ||
259 | </para> | ||
260 | <para> | ||
261 | Once the port is open it will call the rx_function of each channel | ||
262 | whenever a completed packet arrived. This is invoked from | ||
263 | interrupt context and passes you the channel and a network | ||
264 | buffer (struct sk_buff) holding the data. The data includes | ||
265 | the CRC bytes so most users will want to trim the last two | ||
266 | bytes before processing the data. This function is very timing | ||
267 | critical. When you wish to simply discard data the support | ||
268 | code provides the function <function>z8530_null_rx</function> | ||
269 | to discard the data. | ||
270 | </para> | ||
271 | <para> | ||
272 | To active PIO mode sending and receiving the <function> | ||
273 | z8530_sync_open</function> is called. This expects to be passed | ||
274 | the network device and the channel. Typically this is called from | ||
275 | your network device open callback. On a failure a non zero error | ||
276 | status is returned. The <function>z8530_sync_close</function> | ||
277 | function shuts down a PIO channel. This must be done before the | ||
278 | channel is opened again and before the driver shuts down | ||
279 | and unloads. | ||
280 | </para> | ||
281 | <para> | ||
282 | The ideal mode of operation is dual channel DMA mode. Here the | ||
283 | kernel driver will configure the board for DMA in both directions. | ||
284 | The driver also handles ISA DMA issues such as controller | ||
285 | programming and the memory range limit for you. This mode is | ||
286 | activated by calling the <function>z8530_sync_dma_open</function> | ||
287 | function. On failure a non zero error value is returned. | ||
288 | Once this mode is activated it can be shut down by calling the | ||
289 | <function>z8530_sync_dma_close</function>. You must call the close | ||
290 | function matching the open mode you used. | ||
291 | </para> | ||
292 | <para> | ||
293 | The final supported mode uses a single DMA channel to drive the | ||
294 | transmit side. As the Z85C30 has a larger FIFO on the receive | ||
295 | channel this tends to increase the maximum speed a little. | ||
296 | This is activated by calling the <function>z8530_sync_txdma_open | ||
297 | </function>. This returns a non zero error code on failure. The | ||
298 | <function>z8530_sync_txdma_close</function> function closes down | ||
299 | the Z8530 interface from this mode. | ||
300 | </para> | ||
301 | </chapter> | ||
302 | |||
303 | <chapter> | ||
304 | <title>Network Layer Functions</title> | ||
305 | <para> | ||
306 | The Z8530 layer provides functions to queue packets for | ||
307 | transmission. The driver internally buffers the frame currently | ||
308 | being transmitted and one further frame (in order to keep back | ||
309 | to back transmission running). Any further buffering is up to | ||
310 | the caller. | ||
311 | </para> | ||
312 | <para> | ||
313 | The function <function>z8530_queue_xmit</function> takes a network | ||
314 | buffer in sk_buff format and queues it for transmission. The | ||
315 | caller must provide the entire packet with the exception of the | ||
316 | bitstuffing and CRC. This is normally done by the caller via | ||
317 | the syncppp interface layer. It returns 0 if the buffer has been | ||
318 | queued and non zero values for queue full. If the function accepts | ||
319 | the buffer it becomes property of the Z8530 layer and the caller | ||
320 | should not free it. | ||
321 | </para> | ||
322 | <para> | ||
323 | The function <function>z8530_get_stats</function> returns a pointer | ||
324 | to an internally maintained per interface statistics block. This | ||
325 | provides most of the interface code needed to implement the network | ||
326 | layer get_stats callback. | ||
327 | </para> | ||
328 | </chapter> | ||
329 | |||
330 | <chapter> | ||
331 | <title>Porting The Z8530 Driver</title> | ||
332 | <para> | ||
333 | The Z8530 driver is written to be portable. In DMA mode it makes | ||
334 | assumptions about the use of ISA DMA. These are probably warranted | ||
335 | in most cases as the Z85230 in particular was designed to glue to PC | ||
336 | type machines. The PIO mode makes no real assumptions. | ||
337 | </para> | ||
338 | <para> | ||
339 | Should you need to retarget the Z8530 driver to another architecture | ||
340 | the only code that should need changing are the port I/O functions. | ||
341 | At the moment these assume PC I/O port accesses. This may not be | ||
342 | appropriate for all platforms. Replacing | ||
343 | <function>z8530_read_port</function> and <function>z8530_write_port | ||
344 | </function> is intended to be all that is required to port this | ||
345 | driver layer. | ||
346 | </para> | ||
347 | </chapter> | ||
348 | |||
349 | <chapter id="bugs"> | ||
350 | <title>Known Bugs And Assumptions</title> | ||
351 | <para> | ||
352 | <variablelist> | ||
353 | <varlistentry><term>Interrupt Locking</term> | ||
354 | <listitem> | ||
355 | <para> | ||
356 | The locking in the driver is done via the global cli/sti lock. This | ||
357 | makes for relatively poor SMP performance. Switching this to use a | ||
358 | per device spin lock would probably materially improve performance. | ||
359 | </para> | ||
360 | </listitem></varlistentry> | ||
361 | |||
362 | <varlistentry><term>Occasional Failures</term> | ||
363 | <listitem> | ||
364 | <para> | ||
365 | We have reports of occasional failures when run for very long | ||
366 | periods of time and the driver starts to receive junk frames. At | ||
367 | the moment the cause of this is not clear. | ||
368 | </para> | ||
369 | </listitem></varlistentry> | ||
370 | </variablelist> | ||
371 | |||
372 | </para> | ||
373 | </chapter> | ||
374 | |||
375 | <chapter id="pubfunctions"> | ||
376 | <title>Public Functions Provided</title> | ||
377 | !Edrivers/net/wan/z85230.c | ||
378 | </chapter> | ||
379 | |||
380 | <chapter id="intfunctions"> | ||
381 | <title>Internal Functions</title> | ||
382 | !Idrivers/net/wan/z85230.c | ||
383 | </chapter> | ||
384 | |||
385 | </book> | ||