diff options
Diffstat (limited to 'Documentation/driver-model/devres.txt')
-rw-r--r-- | Documentation/driver-model/devres.txt | 268 |
1 files changed, 268 insertions, 0 deletions
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt new file mode 100644 index 000000000000..5163b85308f5 --- /dev/null +++ b/Documentation/driver-model/devres.txt | |||
@@ -0,0 +1,268 @@ | |||
1 | Devres - Managed Device Resource | ||
2 | ================================ | ||
3 | |||
4 | Tejun Heo <teheo@suse.de> | ||
5 | |||
6 | First draft 10 January 2007 | ||
7 | |||
8 | |||
9 | 1. Intro : Huh? Devres? | ||
10 | 2. Devres : Devres in a nutshell | ||
11 | 3. Devres Group : Group devres'es and release them together | ||
12 | 4. Details : Life time rules, calling context, ... | ||
13 | 5. Overhead : How much do we have to pay for this? | ||
14 | 6. List of managed interfaces : Currently implemented managed interfaces | ||
15 | |||
16 | |||
17 | 1. Intro | ||
18 | -------- | ||
19 | |||
20 | devres came up while trying to convert libata to use iomap. Each | ||
21 | iomapped address should be kept and unmapped on driver detach. For | ||
22 | example, a plain SFF ATA controller (that is, good old PCI IDE) in | ||
23 | native mode makes use of 5 PCI BARs and all of them should be | ||
24 | maintained. | ||
25 | |||
26 | As with many other device drivers, libata low level drivers have | ||
27 | sufficient bugs in ->remove and ->probe failure path. Well, yes, | ||
28 | that's probably because libata low level driver developers are lazy | ||
29 | bunch, but aren't all low level driver developers? After spending a | ||
30 | day fiddling with braindamaged hardware with no document or | ||
31 | braindamaged document, if it's finally working, well, it's working. | ||
32 | |||
33 | For one reason or another, low level drivers don't receive as much | ||
34 | attention or testing as core code, and bugs on driver detach or | ||
35 | initilaization failure doesn't happen often enough to be noticeable. | ||
36 | Init failure path is worse because it's much less travelled while | ||
37 | needs to handle multiple entry points. | ||
38 | |||
39 | So, many low level drivers end up leaking resources on driver detach | ||
40 | and having half broken failure path implementation in ->probe() which | ||
41 | would leak resources or even cause oops when failure occurs. iomap | ||
42 | adds more to this mix. So do msi and msix. | ||
43 | |||
44 | |||
45 | 2. Devres | ||
46 | --------- | ||
47 | |||
48 | devres is basically linked list of arbitrarily sized memory areas | ||
49 | associated with a struct device. Each devres entry is associated with | ||
50 | a release function. A devres can be released in several ways. No | ||
51 | matter what, all devres entries are released on driver detach. On | ||
52 | release, the associated release function is invoked and then the | ||
53 | devres entry is freed. | ||
54 | |||
55 | Managed interface is created for resources commonly used by device | ||
56 | drivers using devres. For example, coherent DMA memory is acquired | ||
57 | using dma_alloc_coherent(). The managed version is called | ||
58 | dmam_alloc_coherent(). It is identical to dma_alloc_coherent() except | ||
59 | for the DMA memory allocated using it is managed and will be | ||
60 | automatically released on driver detach. Implementation looks like | ||
61 | the following. | ||
62 | |||
63 | struct dma_devres { | ||
64 | size_t size; | ||
65 | void *vaddr; | ||
66 | dma_addr_t dma_handle; | ||
67 | }; | ||
68 | |||
69 | static void dmam_coherent_release(struct device *dev, void *res) | ||
70 | { | ||
71 | struct dma_devres *this = res; | ||
72 | |||
73 | dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle); | ||
74 | } | ||
75 | |||
76 | dmam_alloc_coherent(dev, size, dma_handle, gfp) | ||
77 | { | ||
78 | struct dma_devres *dr; | ||
79 | void *vaddr; | ||
80 | |||
81 | dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp); | ||
82 | ... | ||
83 | |||
84 | /* alloc DMA memory as usual */ | ||
85 | vaddr = dma_alloc_coherent(...); | ||
86 | ... | ||
87 | |||
88 | /* record size, vaddr, dma_handle in dr */ | ||
89 | dr->vaddr = vaddr; | ||
90 | ... | ||
91 | |||
92 | devres_add(dev, dr); | ||
93 | |||
94 | return vaddr; | ||
95 | } | ||
96 | |||
97 | If a driver uses dmam_alloc_coherent(), the area is guaranteed to be | ||
98 | freed whether initialization fails half-way or the device gets | ||
99 | detached. If most resources are acquired using managed interface, a | ||
100 | driver can have much simpler init and exit code. Init path basically | ||
101 | looks like the following. | ||
102 | |||
103 | my_init_one() | ||
104 | { | ||
105 | struct mydev *d; | ||
106 | |||
107 | d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); | ||
108 | if (!d) | ||
109 | return -ENOMEM; | ||
110 | |||
111 | d->ring = dmam_alloc_coherent(...); | ||
112 | if (!d->ring) | ||
113 | return -ENOMEM; | ||
114 | |||
115 | if (check something) | ||
116 | return -EINVAL; | ||
117 | ... | ||
118 | |||
119 | return register_to_upper_layer(d); | ||
120 | } | ||
121 | |||
122 | And exit path, | ||
123 | |||
124 | my_remove_one() | ||
125 | { | ||
126 | unregister_from_upper_layer(d); | ||
127 | shutdown_my_hardware(); | ||
128 | } | ||
129 | |||
130 | As shown above, low level drivers can be simplified a lot by using | ||
131 | devres. Complexity is shifted from less maintained low level drivers | ||
132 | to better maintained higher layer. Also, as init failure path is | ||
133 | shared with exit path, both can get more testing. | ||
134 | |||
135 | |||
136 | 3. Devres group | ||
137 | --------------- | ||
138 | |||
139 | Devres entries can be grouped using devres group. When a group is | ||
140 | released, all contained normal devres entries and properly nested | ||
141 | groups are released. One usage is to rollback series of acquired | ||
142 | resources on failure. For example, | ||
143 | |||
144 | if (!devres_open_group(dev, NULL, GFP_KERNEL)) | ||
145 | return -ENOMEM; | ||
146 | |||
147 | acquire A; | ||
148 | if (failed) | ||
149 | goto err; | ||
150 | |||
151 | acquire B; | ||
152 | if (failed) | ||
153 | goto err; | ||
154 | ... | ||
155 | |||
156 | devres_remove_group(dev, NULL); | ||
157 | return 0; | ||
158 | |||
159 | err: | ||
160 | devres_release_group(dev, NULL); | ||
161 | return err_code; | ||
162 | |||
163 | As resource acquision failure usually means probe failure, constructs | ||
164 | like above are usually useful in midlayer driver (e.g. libata core | ||
165 | layer) where interface function shouldn't have side effect on failure. | ||
166 | For LLDs, just returning error code suffices in most cases. | ||
167 | |||
168 | Each group is identified by void *id. It can either be explicitly | ||
169 | specified by @id argument to devres_open_group() or automatically | ||
170 | created by passing NULL as @id as in the above example. In both | ||
171 | cases, devres_open_group() returns the group's id. The returned id | ||
172 | can be passed to other devres functions to select the target group. | ||
173 | If NULL is given to those functions, the latest open group is | ||
174 | selected. | ||
175 | |||
176 | For example, you can do something like the following. | ||
177 | |||
178 | int my_midlayer_create_something() | ||
179 | { | ||
180 | if (!devres_open_group(dev, my_midlayer_create_something, GFP_KERNEL)) | ||
181 | return -ENOMEM; | ||
182 | |||
183 | ... | ||
184 | |||
185 | devres_close_group(dev, my_midlayer_something); | ||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | void my_midlayer_destroy_something() | ||
190 | { | ||
191 | devres_release_group(dev, my_midlayer_create_soemthing); | ||
192 | } | ||
193 | |||
194 | |||
195 | 4. Details | ||
196 | ---------- | ||
197 | |||
198 | Lifetime of a devres entry begins on devres allocation and finishes | ||
199 | when it is released or destroyed (removed and freed) - no reference | ||
200 | counting. | ||
201 | |||
202 | devres core guarantees atomicity to all basic devres operations and | ||
203 | has support for single-instance devres types (atomic | ||
204 | lookup-and-add-if-not-found). Other than that, synchronizing | ||
205 | concurrent accesses to allocated devres data is caller's | ||
206 | responsibility. This is usually non-issue because bus ops and | ||
207 | resource allocations already do the job. | ||
208 | |||
209 | For an example of single-instance devres type, read pcim_iomap_table() | ||
210 | in lib/iomap.c. | ||
211 | |||
212 | All devres interface functions can be called without context if the | ||
213 | right gfp mask is given. | ||
214 | |||
215 | |||
216 | 5. Overhead | ||
217 | ----------- | ||
218 | |||
219 | Each devres bookkeeping info is allocated together with requested data | ||
220 | area. With debug option turned off, bookkeeping info occupies 16 | ||
221 | bytes on 32bit machines and 24 bytes on 64bit (three pointers rounded | ||
222 | up to ull alignment). If singly linked list is used, it can be | ||
223 | reduced to two pointers (8 bytes on 32bit, 16 bytes on 64bit). | ||
224 | |||
225 | Each devres group occupies 8 pointers. It can be reduced to 6 if | ||
226 | singly linked list is used. | ||
227 | |||
228 | Memory space overhead on ahci controller with two ports is between 300 | ||
229 | and 400 bytes on 32bit machine after naive conversion (we can | ||
230 | certainly invest a bit more effort into libata core layer). | ||
231 | |||
232 | |||
233 | 6. List of managed interfaces | ||
234 | ----------------------------- | ||
235 | |||
236 | IO region | ||
237 | devm_request_region() | ||
238 | devm_request_mem_region() | ||
239 | devm_release_region() | ||
240 | devm_release_mem_region() | ||
241 | |||
242 | IRQ | ||
243 | devm_request_irq() | ||
244 | devm_free_irq() | ||
245 | |||
246 | DMA | ||
247 | dmam_alloc_coherent() | ||
248 | dmam_free_coherent() | ||
249 | dmam_alloc_noncoherent() | ||
250 | dmam_free_noncoherent() | ||
251 | dmam_declare_coherent_memory() | ||
252 | dmam_pool_create() | ||
253 | dmam_pool_destroy() | ||
254 | |||
255 | PCI | ||
256 | pcim_enable_device() : after success, all PCI ops become managed | ||
257 | pcim_pin_device() : keep PCI device enabled after release | ||
258 | |||
259 | IOMAP | ||
260 | devm_ioport_map() | ||
261 | devm_ioport_unmap() | ||
262 | devm_ioremap() | ||
263 | devm_ioremap_nocache() | ||
264 | devm_iounmap() | ||
265 | pcim_iomap() | ||
266 | pcim_iounmap() | ||
267 | pcim_iomap_table() : array of mapped addresses indexed by BAR | ||
268 | pcim_iomap_regions() : do request_region() and iomap() on multiple BARs | ||