diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2010-03-19 03:05:10 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2010-03-19 03:05:10 -0400 |
commit | b4b7a4ef097f288f724420b473dbf92a89c0ab7e (patch) | |
tree | 23ad8101e3e77c32a8d1e1b95a9c1cd7f7a475b7 /Documentation | |
parent | e9ce335df51ff782035a15c261a3c0c9892a1767 (diff) | |
parent | a3d3203e4bb40f253b1541e310dc0f9305be7c84 (diff) |
Merge branch 'master' into for-linus
Conflicts:
block/Kconfig
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'Documentation')
56 files changed, 1625 insertions, 1217 deletions
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 5aceb88b3f8b..05e2ae236865 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt | |||
@@ -4,20 +4,18 @@ | |||
4 | James E.J. Bottomley <James.Bottomley@HansenPartnership.com> | 4 | James E.J. Bottomley <James.Bottomley@HansenPartnership.com> |
5 | 5 | ||
6 | This document describes the DMA API. For a more gentle introduction | 6 | This document describes the DMA API. For a more gentle introduction |
7 | phrased in terms of the pci_ equivalents (and actual examples) see | 7 | of the API (and actual examples) see |
8 | Documentation/PCI/PCI-DMA-mapping.txt. | 8 | Documentation/DMA-API-HOWTO.txt. |
9 | 9 | ||
10 | This API is split into two pieces. Part I describes the API and the | 10 | This API is split into two pieces. Part I describes the API. Part II |
11 | corresponding pci_ API. Part II describes the extensions to the API | 11 | describes the extensions to the API for supporting non-consistent |
12 | for supporting non-consistent memory machines. Unless you know that | 12 | memory machines. Unless you know that your driver absolutely has to |
13 | your driver absolutely has to support non-consistent platforms (this | 13 | support non-consistent platforms (this is usually only legacy |
14 | is usually only legacy platforms) you should only use the API | 14 | platforms) you should only use the API described in part I. |
15 | described in part I. | ||
16 | 15 | ||
17 | Part I - pci_ and dma_ Equivalent API | 16 | Part I - dma_ API |
18 | ------------------------------------- | 17 | ------------------------------------- |
19 | 18 | ||
20 | To get the pci_ API, you must #include <linux/pci.h> | ||
21 | To get the dma_ API, you must #include <linux/dma-mapping.h> | 19 | To get the dma_ API, you must #include <linux/dma-mapping.h> |
22 | 20 | ||
23 | 21 | ||
@@ -27,9 +25,6 @@ Part Ia - Using large dma-coherent buffers | |||
27 | void * | 25 | void * |
28 | dma_alloc_coherent(struct device *dev, size_t size, | 26 | dma_alloc_coherent(struct device *dev, size_t size, |
29 | dma_addr_t *dma_handle, gfp_t flag) | 27 | dma_addr_t *dma_handle, gfp_t flag) |
30 | void * | ||
31 | pci_alloc_consistent(struct pci_dev *dev, size_t size, | ||
32 | dma_addr_t *dma_handle) | ||
33 | 28 | ||
34 | Consistent memory is memory for which a write by either the device or | 29 | Consistent memory is memory for which a write by either the device or |
35 | the processor can immediately be read by the processor or device | 30 | the processor can immediately be read by the processor or device |
@@ -53,15 +48,11 @@ The simplest way to do that is to use the dma_pool calls (see below). | |||
53 | The flag parameter (dma_alloc_coherent only) allows the caller to | 48 | The flag parameter (dma_alloc_coherent only) allows the caller to |
54 | specify the GFP_ flags (see kmalloc) for the allocation (the | 49 | specify the GFP_ flags (see kmalloc) for the allocation (the |
55 | implementation may choose to ignore flags that affect the location of | 50 | implementation may choose to ignore flags that affect the location of |
56 | the returned memory, like GFP_DMA). For pci_alloc_consistent, you | 51 | the returned memory, like GFP_DMA). |
57 | must assume GFP_ATOMIC behaviour. | ||
58 | 52 | ||
59 | void | 53 | void |
60 | dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, | 54 | dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, |
61 | dma_addr_t dma_handle) | 55 | dma_addr_t dma_handle) |
62 | void | ||
63 | pci_free_consistent(struct pci_dev *dev, size_t size, void *cpu_addr, | ||
64 | dma_addr_t dma_handle) | ||
65 | 56 | ||
66 | Free the region of consistent memory you previously allocated. dev, | 57 | Free the region of consistent memory you previously allocated. dev, |
67 | size and dma_handle must all be the same as those passed into the | 58 | size and dma_handle must all be the same as those passed into the |
@@ -89,10 +80,6 @@ for alignment, like queue heads needing to be aligned on N-byte boundaries. | |||
89 | dma_pool_create(const char *name, struct device *dev, | 80 | dma_pool_create(const char *name, struct device *dev, |
90 | size_t size, size_t align, size_t alloc); | 81 | size_t size, size_t align, size_t alloc); |
91 | 82 | ||
92 | struct pci_pool * | ||
93 | pci_pool_create(const char *name, struct pci_device *dev, | ||
94 | size_t size, size_t align, size_t alloc); | ||
95 | |||
96 | The pool create() routines initialize a pool of dma-coherent buffers | 83 | The pool create() routines initialize a pool of dma-coherent buffers |
97 | for use with a given device. It must be called in a context which | 84 | for use with a given device. It must be called in a context which |
98 | can sleep. | 85 | can sleep. |
@@ -108,9 +95,6 @@ from this pool must not cross 4KByte boundaries. | |||
108 | void *dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags, | 95 | void *dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags, |
109 | dma_addr_t *dma_handle); | 96 | dma_addr_t *dma_handle); |
110 | 97 | ||
111 | void *pci_pool_alloc(struct pci_pool *pool, gfp_t gfp_flags, | ||
112 | dma_addr_t *dma_handle); | ||
113 | |||
114 | This allocates memory from the pool; the returned memory will meet the size | 98 | This allocates memory from the pool; the returned memory will meet the size |
115 | and alignment requirements specified at creation time. Pass GFP_ATOMIC to | 99 | and alignment requirements specified at creation time. Pass GFP_ATOMIC to |
116 | prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks), | 100 | prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks), |
@@ -122,9 +106,6 @@ pool's device. | |||
122 | void dma_pool_free(struct dma_pool *pool, void *vaddr, | 106 | void dma_pool_free(struct dma_pool *pool, void *vaddr, |
123 | dma_addr_t addr); | 107 | dma_addr_t addr); |
124 | 108 | ||
125 | void pci_pool_free(struct pci_pool *pool, void *vaddr, | ||
126 | dma_addr_t addr); | ||
127 | |||
128 | This puts memory back into the pool. The pool is what was passed to | 109 | This puts memory back into the pool. The pool is what was passed to |
129 | the pool allocation routine; the cpu (vaddr) and dma addresses are what | 110 | the pool allocation routine; the cpu (vaddr) and dma addresses are what |
130 | were returned when that routine allocated the memory being freed. | 111 | were returned when that routine allocated the memory being freed. |
@@ -132,8 +113,6 @@ were returned when that routine allocated the memory being freed. | |||
132 | 113 | ||
133 | void dma_pool_destroy(struct dma_pool *pool); | 114 | void dma_pool_destroy(struct dma_pool *pool); |
134 | 115 | ||
135 | void pci_pool_destroy(struct pci_pool *pool); | ||
136 | |||
137 | The pool destroy() routines free the resources of the pool. They must be | 116 | The pool destroy() routines free the resources of the pool. They must be |
138 | called in a context which can sleep. Make sure you've freed all allocated | 117 | called in a context which can sleep. Make sure you've freed all allocated |
139 | memory back to the pool before you destroy it. | 118 | memory back to the pool before you destroy it. |
@@ -144,8 +123,6 @@ Part Ic - DMA addressing limitations | |||
144 | 123 | ||
145 | int | 124 | int |
146 | dma_supported(struct device *dev, u64 mask) | 125 | dma_supported(struct device *dev, u64 mask) |
147 | int | ||
148 | pci_dma_supported(struct pci_dev *hwdev, u64 mask) | ||
149 | 126 | ||
150 | Checks to see if the device can support DMA to the memory described by | 127 | Checks to see if the device can support DMA to the memory described by |
151 | mask. | 128 | mask. |
@@ -159,8 +136,14 @@ driver writers. | |||
159 | 136 | ||
160 | int | 137 | int |
161 | dma_set_mask(struct device *dev, u64 mask) | 138 | dma_set_mask(struct device *dev, u64 mask) |
139 | |||
140 | Checks to see if the mask is possible and updates the device | ||
141 | parameters if it is. | ||
142 | |||
143 | Returns: 0 if successful and a negative error if not. | ||
144 | |||
162 | int | 145 | int |
163 | pci_set_dma_mask(struct pci_device *dev, u64 mask) | 146 | dma_set_coherent_mask(struct device *dev, u64 mask) |
164 | 147 | ||
165 | Checks to see if the mask is possible and updates the device | 148 | Checks to see if the mask is possible and updates the device |
166 | parameters if it is. | 149 | parameters if it is. |
@@ -187,9 +170,6 @@ Part Id - Streaming DMA mappings | |||
187 | dma_addr_t | 170 | dma_addr_t |
188 | dma_map_single(struct device *dev, void *cpu_addr, size_t size, | 171 | dma_map_single(struct device *dev, void *cpu_addr, size_t size, |
189 | enum dma_data_direction direction) | 172 | enum dma_data_direction direction) |
190 | dma_addr_t | ||
191 | pci_map_single(struct pci_dev *hwdev, void *cpu_addr, size_t size, | ||
192 | int direction) | ||
193 | 173 | ||
194 | Maps a piece of processor virtual memory so it can be accessed by the | 174 | Maps a piece of processor virtual memory so it can be accessed by the |
195 | device and returns the physical handle of the memory. | 175 | device and returns the physical handle of the memory. |
@@ -198,14 +178,10 @@ The direction for both api's may be converted freely by casting. | |||
198 | However the dma_ API uses a strongly typed enumerator for its | 178 | However the dma_ API uses a strongly typed enumerator for its |
199 | direction: | 179 | direction: |
200 | 180 | ||
201 | DMA_NONE = PCI_DMA_NONE no direction (used for | 181 | DMA_NONE no direction (used for debugging) |
202 | debugging) | 182 | DMA_TO_DEVICE data is going from the memory to the device |
203 | DMA_TO_DEVICE = PCI_DMA_TODEVICE data is going from the | 183 | DMA_FROM_DEVICE data is coming from the device to the memory |
204 | memory to the device | 184 | DMA_BIDIRECTIONAL direction isn't known |
205 | DMA_FROM_DEVICE = PCI_DMA_FROMDEVICE data is coming from | ||
206 | the device to the | ||
207 | memory | ||
208 | DMA_BIDIRECTIONAL = PCI_DMA_BIDIRECTIONAL direction isn't known | ||
209 | 185 | ||
210 | Notes: Not all memory regions in a machine can be mapped by this | 186 | Notes: Not all memory regions in a machine can be mapped by this |
211 | API. Further, regions that appear to be physically contiguous in | 187 | API. Further, regions that appear to be physically contiguous in |
@@ -268,9 +244,6 @@ cache lines are updated with data that the device may have changed). | |||
268 | void | 244 | void |
269 | dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, | 245 | dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, |
270 | enum dma_data_direction direction) | 246 | enum dma_data_direction direction) |
271 | void | ||
272 | pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, | ||
273 | size_t size, int direction) | ||
274 | 247 | ||
275 | Unmaps the region previously mapped. All the parameters passed in | 248 | Unmaps the region previously mapped. All the parameters passed in |
276 | must be identical to those passed in (and returned) by the mapping | 249 | must be identical to those passed in (and returned) by the mapping |
@@ -280,15 +253,9 @@ dma_addr_t | |||
280 | dma_map_page(struct device *dev, struct page *page, | 253 | dma_map_page(struct device *dev, struct page *page, |
281 | unsigned long offset, size_t size, | 254 | unsigned long offset, size_t size, |
282 | enum dma_data_direction direction) | 255 | enum dma_data_direction direction) |
283 | dma_addr_t | ||
284 | pci_map_page(struct pci_dev *hwdev, struct page *page, | ||
285 | unsigned long offset, size_t size, int direction) | ||
286 | void | 256 | void |
287 | dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, | 257 | dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, |
288 | enum dma_data_direction direction) | 258 | enum dma_data_direction direction) |
289 | void | ||
290 | pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, | ||
291 | size_t size, int direction) | ||
292 | 259 | ||
293 | API for mapping and unmapping for pages. All the notes and warnings | 260 | API for mapping and unmapping for pages. All the notes and warnings |
294 | for the other mapping APIs apply here. Also, although the <offset> | 261 | for the other mapping APIs apply here. Also, although the <offset> |
@@ -299,9 +266,6 @@ cache width is. | |||
299 | int | 266 | int |
300 | dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | 267 | dma_mapping_error(struct device *dev, dma_addr_t dma_addr) |
301 | 268 | ||
302 | int | ||
303 | pci_dma_mapping_error(struct pci_dev *hwdev, dma_addr_t dma_addr) | ||
304 | |||
305 | In some circumstances dma_map_single and dma_map_page will fail to create | 269 | In some circumstances dma_map_single and dma_map_page will fail to create |
306 | a mapping. A driver can check for these errors by testing the returned | 270 | a mapping. A driver can check for these errors by testing the returned |
307 | dma address with dma_mapping_error(). A non-zero return value means the mapping | 271 | dma address with dma_mapping_error(). A non-zero return value means the mapping |
@@ -311,9 +275,6 @@ reduce current DMA mapping usage or delay and try again later). | |||
311 | int | 275 | int |
312 | dma_map_sg(struct device *dev, struct scatterlist *sg, | 276 | dma_map_sg(struct device *dev, struct scatterlist *sg, |
313 | int nents, enum dma_data_direction direction) | 277 | int nents, enum dma_data_direction direction) |
314 | int | ||
315 | pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, | ||
316 | int nents, int direction) | ||
317 | 278 | ||
318 | Returns: the number of physical segments mapped (this may be shorter | 279 | Returns: the number of physical segments mapped (this may be shorter |
319 | than <nents> passed in if some elements of the scatter/gather list are | 280 | than <nents> passed in if some elements of the scatter/gather list are |
@@ -353,9 +314,6 @@ accessed sg->address and sg->length as shown above. | |||
353 | void | 314 | void |
354 | dma_unmap_sg(struct device *dev, struct scatterlist *sg, | 315 | dma_unmap_sg(struct device *dev, struct scatterlist *sg, |
355 | int nhwentries, enum dma_data_direction direction) | 316 | int nhwentries, enum dma_data_direction direction) |
356 | void | ||
357 | pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, | ||
358 | int nents, int direction) | ||
359 | 317 | ||
360 | Unmap the previously mapped scatter/gather list. All the parameters | 318 | Unmap the previously mapped scatter/gather list. All the parameters |
361 | must be the same as those and passed in to the scatter/gather mapping | 319 | must be the same as those and passed in to the scatter/gather mapping |
@@ -365,21 +323,23 @@ Note: <nents> must be the number you passed in, *not* the number of | |||
365 | physical entries returned. | 323 | physical entries returned. |
366 | 324 | ||
367 | void | 325 | void |
368 | dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size, | 326 | dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, |
369 | enum dma_data_direction direction) | 327 | enum dma_data_direction direction) |
370 | void | 328 | void |
371 | pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle, | 329 | dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, |
372 | size_t size, int direction) | 330 | enum dma_data_direction direction) |
373 | void | 331 | void |
374 | dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems, | 332 | dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, |
375 | enum dma_data_direction direction) | 333 | enum dma_data_direction direction) |
376 | void | 334 | void |
377 | pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg, | 335 | dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, |
378 | int nelems, int direction) | 336 | enum dma_data_direction direction) |
379 | 337 | ||
380 | Synchronise a single contiguous or scatter/gather mapping. All the | 338 | Synchronise a single contiguous or scatter/gather mapping for the cpu |
381 | parameters must be the same as those passed into the single mapping | 339 | and device. With the sync_sg API, all the parameters must be the same |
382 | API. | 340 | as those passed into the single mapping API. With the sync_single API, |
341 | you can use dma_handle and size parameters that aren't identical to | ||
342 | those passed into the single mapping API to do a partial sync. | ||
383 | 343 | ||
384 | Notes: You must do this: | 344 | Notes: You must do this: |
385 | 345 | ||
@@ -461,9 +421,9 @@ void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr, | |||
461 | Part II - Advanced dma_ usage | 421 | Part II - Advanced dma_ usage |
462 | ----------------------------- | 422 | ----------------------------- |
463 | 423 | ||
464 | Warning: These pieces of the DMA API have no PCI equivalent. They | 424 | Warning: These pieces of the DMA API should not be used in the |
465 | should also not be used in the majority of cases, since they cater for | 425 | majority of cases, since they cater for unlikely corner cases that |
466 | unlikely corner cases that don't belong in usual drivers. | 426 | don't belong in usual drivers. |
467 | 427 | ||
468 | If you don't understand how cache line coherency works between a | 428 | If you don't understand how cache line coherency works between a |
469 | processor and an I/O device, you should not be using this part of the | 429 | processor and an I/O device, you should not be using this part of the |
@@ -514,16 +474,6 @@ into the width returned by this call. It will also always be a power | |||
514 | of two for easy alignment. | 474 | of two for easy alignment. |
515 | 475 | ||
516 | void | 476 | void |
517 | dma_sync_single_range(struct device *dev, dma_addr_t dma_handle, | ||
518 | unsigned long offset, size_t size, | ||
519 | enum dma_data_direction direction) | ||
520 | |||
521 | Does a partial sync, starting at offset and continuing for size. You | ||
522 | must be careful to observe the cache alignment and width when doing | ||
523 | anything like this. You must also be extra careful about accessing | ||
524 | memory you intend to sync partially. | ||
525 | |||
526 | void | ||
527 | dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 477 | dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
528 | enum dma_data_direction direction) | 478 | enum dma_data_direction direction) |
529 | 479 | ||
diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl index 5e7d84b48505..133cd6c3f3c1 100644 --- a/Documentation/DocBook/mtdnand.tmpl +++ b/Documentation/DocBook/mtdnand.tmpl | |||
@@ -488,7 +488,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) | |||
488 | The ECC bytes must be placed immidiately after the data | 488 | The ECC bytes must be placed immidiately after the data |
489 | bytes in order to make the syndrome generator work. This | 489 | bytes in order to make the syndrome generator work. This |
490 | is contrary to the usual layout used by software ECC. The | 490 | is contrary to the usual layout used by software ECC. The |
491 | seperation of data and out of band area is not longer | 491 | separation of data and out of band area is not longer |
492 | possible. The nand driver code handles this layout and | 492 | possible. The nand driver code handles this layout and |
493 | the remaining free bytes in the oob area are managed by | 493 | the remaining free bytes in the oob area are managed by |
494 | the autoplacement code. Provide a matching oob-layout | 494 | the autoplacement code. Provide a matching oob-layout |
@@ -560,7 +560,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip) | |||
560 | bad blocks. They have factory marked good blocks. The marker pattern | 560 | bad blocks. They have factory marked good blocks. The marker pattern |
561 | is erased when the block is erased to be reused. So in case of | 561 | is erased when the block is erased to be reused. So in case of |
562 | powerloss before writing the pattern back to the chip this block | 562 | powerloss before writing the pattern back to the chip this block |
563 | would be lost and added to the bad blocks. Therefor we scan the | 563 | would be lost and added to the bad blocks. Therefore we scan the |
564 | chip(s) when we detect them the first time for good blocks and | 564 | chip(s) when we detect them the first time for good blocks and |
565 | store this information in a bad block table before erasing any | 565 | store this information in a bad block table before erasing any |
566 | of the blocks. | 566 | of the blocks. |
@@ -1094,7 +1094,7 @@ in this page</entry> | |||
1094 | manufacturers specifications. This applies similar to the spare area. | 1094 | manufacturers specifications. This applies similar to the spare area. |
1095 | </para> | 1095 | </para> |
1096 | <para> | 1096 | <para> |
1097 | Therefor NAND aware filesystems must either write in page size chunks | 1097 | Therefore NAND aware filesystems must either write in page size chunks |
1098 | or hold a writebuffer to collect smaller writes until they sum up to | 1098 | or hold a writebuffer to collect smaller writes until they sum up to |
1099 | pagesize. Available NAND aware filesystems: JFFS2, YAFFS. | 1099 | pagesize. Available NAND aware filesystems: JFFS2, YAFFS. |
1100 | </para> | 1100 | </para> |
diff --git a/Documentation/DocBook/v4l/common.xml b/Documentation/DocBook/v4l/common.xml index c65f0ac9b6ee..cea23e1c4fc6 100644 --- a/Documentation/DocBook/v4l/common.xml +++ b/Documentation/DocBook/v4l/common.xml | |||
@@ -1170,7 +1170,7 @@ frames per second. If less than this number of frames is to be | |||
1170 | captured or output, applications can request frame skipping or | 1170 | captured or output, applications can request frame skipping or |
1171 | duplicating on the driver side. This is especially useful when using | 1171 | duplicating on the driver side. This is especially useful when using |
1172 | the &func-read; or &func-write;, which are not augmented by timestamps | 1172 | the &func-read; or &func-write;, which are not augmented by timestamps |
1173 | or sequence counters, and to avoid unneccessary data copying.</para> | 1173 | or sequence counters, and to avoid unnecessary data copying.</para> |
1174 | 1174 | ||
1175 | <para>Finally these ioctls can be used to determine the number of | 1175 | <para>Finally these ioctls can be used to determine the number of |
1176 | buffers used internally by a driver in read/write mode. For | 1176 | buffers used internally by a driver in read/write mode. For |
diff --git a/Documentation/DocBook/v4l/vidioc-g-parm.xml b/Documentation/DocBook/v4l/vidioc-g-parm.xml index 78332d365ce9..392aa9e5571e 100644 --- a/Documentation/DocBook/v4l/vidioc-g-parm.xml +++ b/Documentation/DocBook/v4l/vidioc-g-parm.xml | |||
@@ -55,7 +55,7 @@ captured or output, applications can request frame skipping or | |||
55 | duplicating on the driver side. This is especially useful when using | 55 | duplicating on the driver side. This is especially useful when using |
56 | the <function>read()</function> or <function>write()</function>, which | 56 | the <function>read()</function> or <function>write()</function>, which |
57 | are not augmented by timestamps or sequence counters, and to avoid | 57 | are not augmented by timestamps or sequence counters, and to avoid |
58 | unneccessary data copying.</para> | 58 | unnecessary data copying.</para> |
59 | 59 | ||
60 | <para>Further these ioctls can be used to determine the number of | 60 | <para>Further these ioctls can be used to determine the number of |
61 | buffers used internally by a driver in read/write mode. For | 61 | buffers used internally by a driver in read/write mode. For |
diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 8495fc970391..f5395af88a41 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO | |||
@@ -221,8 +221,8 @@ branches. These different branches are: | |||
221 | - main 2.6.x kernel tree | 221 | - main 2.6.x kernel tree |
222 | - 2.6.x.y -stable kernel tree | 222 | - 2.6.x.y -stable kernel tree |
223 | - 2.6.x -git kernel patches | 223 | - 2.6.x -git kernel patches |
224 | - 2.6.x -mm kernel patches | ||
225 | - subsystem specific kernel trees and patches | 224 | - subsystem specific kernel trees and patches |
225 | - the 2.6.x -next kernel tree for integration tests | ||
226 | 226 | ||
227 | 2.6.x kernel tree | 227 | 2.6.x kernel tree |
228 | ----------------- | 228 | ----------------- |
@@ -232,7 +232,7 @@ process is as follows: | |||
232 | - As soon as a new kernel is released a two weeks window is open, | 232 | - As soon as a new kernel is released a two weeks window is open, |
233 | during this period of time maintainers can submit big diffs to | 233 | during this period of time maintainers can submit big diffs to |
234 | Linus, usually the patches that have already been included in the | 234 | Linus, usually the patches that have already been included in the |
235 | -mm kernel for a few weeks. The preferred way to submit big changes | 235 | -next kernel for a few weeks. The preferred way to submit big changes |
236 | is using git (the kernel's source management tool, more information | 236 | is using git (the kernel's source management tool, more information |
237 | can be found at http://git.or.cz/) but plain patches are also just | 237 | can be found at http://git.or.cz/) but plain patches are also just |
238 | fine. | 238 | fine. |
@@ -293,84 +293,43 @@ daily and represent the current state of Linus' tree. They are more | |||
293 | experimental than -rc kernels since they are generated automatically | 293 | experimental than -rc kernels since they are generated automatically |
294 | without even a cursory glance to see if they are sane. | 294 | without even a cursory glance to see if they are sane. |
295 | 295 | ||
296 | 2.6.x -mm kernel patches | ||
297 | ------------------------ | ||
298 | These are experimental kernel patches released by Andrew Morton. Andrew | ||
299 | takes all of the different subsystem kernel trees and patches and mushes | ||
300 | them together, along with a lot of patches that have been plucked from | ||
301 | the linux-kernel mailing list. This tree serves as a proving ground for | ||
302 | new features and patches. Once a patch has proved its worth in -mm for | ||
303 | a while Andrew or the subsystem maintainer pushes it on to Linus for | ||
304 | inclusion in mainline. | ||
305 | |||
306 | It is heavily encouraged that all new patches get tested in the -mm tree | ||
307 | before they are sent to Linus for inclusion in the main kernel tree. Code | ||
308 | which does not make an appearance in -mm before the opening of the merge | ||
309 | window will prove hard to merge into the mainline. | ||
310 | |||
311 | These kernels are not appropriate for use on systems that are supposed | ||
312 | to be stable and they are more risky to run than any of the other | ||
313 | branches. | ||
314 | |||
315 | If you wish to help out with the kernel development process, please test | ||
316 | and use these kernel releases and provide feedback to the linux-kernel | ||
317 | mailing list if you have any problems, and if everything works properly. | ||
318 | |||
319 | In addition to all the other experimental patches, these kernels usually | ||
320 | also contain any changes in the mainline -git kernels available at the | ||
321 | time of release. | ||
322 | |||
323 | The -mm kernels are not released on a fixed schedule, but usually a few | ||
324 | -mm kernels are released in between each -rc kernel (1 to 3 is common). | ||
325 | |||
326 | Subsystem Specific kernel trees and patches | 296 | Subsystem Specific kernel trees and patches |
327 | ------------------------------------------- | 297 | ------------------------------------------- |
328 | A number of the different kernel subsystem developers expose their | 298 | The maintainers of the various kernel subsystems --- and also many |
329 | development trees so that others can see what is happening in the | 299 | kernel subsystem developers --- expose their current state of |
330 | different areas of the kernel. These trees are pulled into the -mm | 300 | development in source repositories. That way, others can see what is |
331 | kernel releases as described above. | 301 | happening in the different areas of the kernel. In areas where |
332 | 302 | development is rapid, a developer may be asked to base his submissions | |
333 | Here is a list of some of the different kernel trees available: | 303 | onto such a subsystem kernel tree so that conflicts between the |
334 | git trees: | 304 | submission and other already ongoing work are avoided. |
335 | - Kbuild development tree, Sam Ravnborg <sam@ravnborg.org> | 305 | |
336 | git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git | 306 | Most of these repositories are git trees, but there are also other SCMs |
337 | 307 | in use, or patch queues being published as quilt series. Addresses of | |
338 | - ACPI development tree, Len Brown <len.brown@intel.com> | 308 | these subsystem repositories are listed in the MAINTAINERS file. Many |
339 | git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git | 309 | of them can be browsed at http://git.kernel.org/. |
340 | 310 | ||
341 | - Block development tree, Jens Axboe <jens.axboe@oracle.com> | 311 | Before a proposed patch is committed to such a subsystem tree, it is |
342 | git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git | 312 | subject to review which primarily happens on mailing lists (see the |
343 | 313 | respective section below). For several kernel subsystems, this review | |
344 | - DRM development tree, Dave Airlie <airlied@linux.ie> | 314 | process is tracked with the tool patchwork. Patchwork offers a web |
345 | git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git | 315 | interface which shows patch postings, any comments on a patch or |
346 | 316 | revisions to it, and maintainers can mark patches as under review, | |
347 | - ia64 development tree, Tony Luck <tony.luck@intel.com> | 317 | accepted, or rejected. Most of these patchwork sites are listed at |
348 | git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git | 318 | http://patchwork.kernel.org/ or http://patchwork.ozlabs.org/. |
349 | 319 | ||
350 | - infiniband, Roland Dreier <rolandd@cisco.com> | 320 | 2.6.x -next kernel tree for integration tests |
351 | git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git | 321 | --------------------------------------------- |
352 | 322 | Before updates from subsystem trees are merged into the mainline 2.6.x | |
353 | - libata, Jeff Garzik <jgarzik@pobox.com> | 323 | tree, they need to be integration-tested. For this purpose, a special |
354 | git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git | 324 | testing repository exists into which virtually all subsystem trees are |
355 | 325 | pulled on an almost daily basis: | |
356 | - network drivers, Jeff Garzik <jgarzik@pobox.com> | 326 | http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git |
357 | git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git | 327 | http://linux.f-seidel.de/linux-next/pmwiki/ |
358 | 328 | ||
359 | - pcmcia, Dominik Brodowski <linux@dominikbrodowski.net> | 329 | This way, the -next kernel gives a summary outlook onto what will be |
360 | git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git | 330 | expected to go into the mainline kernel at the next merge period. |
361 | 331 | Adventurous testers are very welcome to runtime-test the -next kernel. | |
362 | - SCSI, James Bottomley <James.Bottomley@hansenpartnership.com> | ||
363 | git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git | ||
364 | |||
365 | - x86, Ingo Molnar <mingo@elte.hu> | ||
366 | git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git | ||
367 | |||
368 | quilt trees: | ||
369 | - USB, Driver Core, and I2C, Greg Kroah-Hartman <gregkh@suse.de> | ||
370 | kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ | ||
371 | 332 | ||
372 | Other kernel trees can be found listed at http://git.kernel.org/ and in | ||
373 | the MAINTAINERS file. | ||
374 | 333 | ||
375 | Bug Reporting | 334 | Bug Reporting |
376 | ------------- | 335 | ------------- |
diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt index bc38283379f0..69dd29ed824e 100644 --- a/Documentation/IPMI.txt +++ b/Documentation/IPMI.txt | |||
@@ -365,6 +365,7 @@ You can change this at module load time (for a module) with: | |||
365 | regshifts=<shift1>,<shift2>,... | 365 | regshifts=<shift1>,<shift2>,... |
366 | slave_addrs=<addr1>,<addr2>,... | 366 | slave_addrs=<addr1>,<addr2>,... |
367 | force_kipmid=<enable1>,<enable2>,... | 367 | force_kipmid=<enable1>,<enable2>,... |
368 | kipmid_max_busy_us=<ustime1>,<ustime2>,... | ||
368 | unload_when_empty=[0|1] | 369 | unload_when_empty=[0|1] |
369 | 370 | ||
370 | Each of these except si_trydefaults is a list, the first item for the | 371 | Each of these except si_trydefaults is a list, the first item for the |
@@ -433,6 +434,7 @@ kernel command line as: | |||
433 | ipmi_si.regshifts=<shift1>,<shift2>,... | 434 | ipmi_si.regshifts=<shift1>,<shift2>,... |
434 | ipmi_si.slave_addrs=<addr1>,<addr2>,... | 435 | ipmi_si.slave_addrs=<addr1>,<addr2>,... |
435 | ipmi_si.force_kipmid=<enable1>,<enable2>,... | 436 | ipmi_si.force_kipmid=<enable1>,<enable2>,... |
437 | ipmi_si.kipmid_max_busy_us=<ustime1>,<ustime2>,... | ||
436 | 438 | ||
437 | It works the same as the module parameters of the same names. | 439 | It works the same as the module parameters of the same names. |
438 | 440 | ||
@@ -450,6 +452,16 @@ force this thread on or off. If you force it off and don't have | |||
450 | interrupts, the driver will run VERY slowly. Don't blame me, | 452 | interrupts, the driver will run VERY slowly. Don't blame me, |
451 | these interfaces suck. | 453 | these interfaces suck. |
452 | 454 | ||
455 | Unfortunately, this thread can use a lot of CPU depending on the | ||
456 | interface's performance. This can waste a lot of CPU and cause | ||
457 | various issues with detecting idle CPU and using extra power. To | ||
458 | avoid this, the kipmid_max_busy_us sets the maximum amount of time, in | ||
459 | microseconds, that kipmid will spin before sleeping for a tick. This | ||
460 | value sets a balance between performance and CPU waste and needs to be | ||
461 | tuned to your needs. Maybe, someday, auto-tuning will be added, but | ||
462 | that's not a simple thing and even the auto-tuning would need to be | ||
463 | tuned to the user's desired performance. | ||
464 | |||
453 | The driver supports a hot add and remove of interfaces. This way, | 465 | The driver supports a hot add and remove of interfaces. This way, |
454 | interfaces can be added or removed after the kernel is up and running. | 466 | interfaces can be added or removed after the kernel is up and running. |
455 | This is done using /sys/modules/ipmi_si/parameters/hotmod, which is a | 467 | This is done using /sys/modules/ipmi_si/parameters/hotmod, which is a |
diff --git a/Documentation/Makefile b/Documentation/Makefile index 94b945733534..6fc7ea1d1f9d 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile | |||
@@ -1,3 +1,3 @@ | |||
1 | obj-m := DocBook/ accounting/ auxdisplay/ connector/ \ | 1 | obj-m := DocBook/ accounting/ auxdisplay/ connector/ \ |
2 | filesystems/configfs/ ia64/ networking/ \ | 2 | filesystems/ filesystems/configfs/ ia64/ laptops/ networking/ \ |
3 | pcmcia/ spi/ video4linux/ vm/ watchdog/src/ | 3 | pcmcia/ spi/ timers/ video4linux/ vm/ watchdog/src/ |
diff --git a/Documentation/PCI/PCI-DMA-mapping.txt b/Documentation/PCI/PCI-DMA-mapping.txt index ecad88d9fe59..52618ab069ad 100644 --- a/Documentation/PCI/PCI-DMA-mapping.txt +++ b/Documentation/PCI/PCI-DMA-mapping.txt | |||
@@ -1,12 +1,12 @@ | |||
1 | Dynamic DMA mapping | 1 | Dynamic DMA mapping Guide |
2 | =================== | 2 | ========================= |
3 | 3 | ||
4 | David S. Miller <davem@redhat.com> | 4 | David S. Miller <davem@redhat.com> |
5 | Richard Henderson <rth@cygnus.com> | 5 | Richard Henderson <rth@cygnus.com> |
6 | Jakub Jelinek <jakub@redhat.com> | 6 | Jakub Jelinek <jakub@redhat.com> |
7 | 7 | ||
8 | This document describes the DMA mapping system in terms of the pci_ | 8 | This is a guide to device driver writers on how to use the DMA API |
9 | API. For a similar API that works for generic devices, see | 9 | with example pseudo-code. For a concise description of the API, see |
10 | DMA-API.txt. | 10 | DMA-API.txt. |
11 | 11 | ||
12 | Most of the 64bit platforms have special hardware that translates bus | 12 | Most of the 64bit platforms have special hardware that translates bus |
@@ -26,12 +26,15 @@ mapped only for the time they are actually used and unmapped after the DMA | |||
26 | transfer. | 26 | transfer. |
27 | 27 | ||
28 | The following API will work of course even on platforms where no such | 28 | The following API will work of course even on platforms where no such |
29 | hardware exists, see e.g. arch/x86/include/asm/pci.h for how it is implemented on | 29 | hardware exists. |
30 | top of the virt_to_bus interface. | 30 | |
31 | Note that the DMA API works with any bus independent of the underlying | ||
32 | microprocessor architecture. You should use the DMA API rather than | ||
33 | the bus specific DMA API (e.g. pci_dma_*). | ||
31 | 34 | ||
32 | First of all, you should make sure | 35 | First of all, you should make sure |
33 | 36 | ||
34 | #include <linux/pci.h> | 37 | #include <linux/dma-mapping.h> |
35 | 38 | ||
36 | is in your driver. This file will obtain for you the definition of the | 39 | is in your driver. This file will obtain for you the definition of the |
37 | dma_addr_t (which can hold any valid DMA address for the platform) | 40 | dma_addr_t (which can hold any valid DMA address for the platform) |
@@ -78,44 +81,43 @@ for you to DMA from/to. | |||
78 | DMA addressing limitations | 81 | DMA addressing limitations |
79 | 82 | ||
80 | Does your device have any DMA addressing limitations? For example, is | 83 | Does your device have any DMA addressing limitations? For example, is |
81 | your device only capable of driving the low order 24-bits of address | 84 | your device only capable of driving the low order 24-bits of address? |
82 | on the PCI bus for SAC DMA transfers? If so, you need to inform the | 85 | If so, you need to inform the kernel of this fact. |
83 | PCI layer of this fact. | ||
84 | 86 | ||
85 | By default, the kernel assumes that your device can address the full | 87 | By default, the kernel assumes that your device can address the full |
86 | 32-bits in a SAC cycle. For a 64-bit DAC capable device, this needs | 88 | 32-bits. For a 64-bit capable device, this needs to be increased. |
87 | to be increased. And for a device with limitations, as discussed in | 89 | And for a device with limitations, as discussed in the previous |
88 | the previous paragraph, it needs to be decreased. | 90 | paragraph, it needs to be decreased. |
89 | 91 | ||
90 | pci_alloc_consistent() by default will return 32-bit DMA addresses. | 92 | Special note about PCI: PCI-X specification requires PCI-X devices to |
91 | PCI-X specification requires PCI-X devices to support 64-bit | 93 | support 64-bit addressing (DAC) for all transactions. And at least |
92 | addressing (DAC) for all transactions. And at least one platform (SGI | 94 | one platform (SGI SN2) requires 64-bit consistent allocations to |
93 | SN2) requires 64-bit consistent allocations to operate correctly when | 95 | operate correctly when the IO bus is in PCI-X mode. |
94 | the IO bus is in PCI-X mode. Therefore, like with pci_set_dma_mask(), | 96 | |
95 | it's good practice to call pci_set_consistent_dma_mask() to set the | 97 | For correct operation, you must interrogate the kernel in your device |
96 | appropriate mask even if your device only supports 32-bit DMA | 98 | probe routine to see if the DMA controller on the machine can properly |
97 | (default) and especially if it's a PCI-X device. | 99 | support the DMA addressing limitation your device has. It is good |
98 | 100 | style to do this even if your device holds the default setting, | |
99 | For correct operation, you must interrogate the PCI layer in your | ||
100 | device probe routine to see if the PCI controller on the machine can | ||
101 | properly support the DMA addressing limitation your device has. It is | ||
102 | good style to do this even if your device holds the default setting, | ||
103 | because this shows that you did think about these issues wrt. your | 101 | because this shows that you did think about these issues wrt. your |
104 | device. | 102 | device. |
105 | 103 | ||
106 | The query is performed via a call to pci_set_dma_mask(): | 104 | The query is performed via a call to dma_set_mask(): |
107 | 105 | ||
108 | int pci_set_dma_mask(struct pci_dev *pdev, u64 device_mask); | 106 | int dma_set_mask(struct device *dev, u64 mask); |
109 | 107 | ||
110 | The query for consistent allocations is performed via a call to | 108 | The query for consistent allocations is performed via a call to |
111 | pci_set_consistent_dma_mask(): | 109 | dma_set_coherent_mask(): |
112 | 110 | ||
113 | int pci_set_consistent_dma_mask(struct pci_dev *pdev, u64 device_mask); | 111 | int dma_set_coherent_mask(struct device *dev, u64 mask); |
114 | 112 | ||
115 | Here, pdev is a pointer to the PCI device struct of your device, and | 113 | Here, dev is a pointer to the device struct of your device, and mask |
116 | device_mask is a bit mask describing which bits of a PCI address your | 114 | is a bit mask describing which bits of an address your device |
117 | device supports. It returns zero if your card can perform DMA | 115 | supports. It returns zero if your card can perform DMA properly on |
118 | properly on the machine given the address mask you provided. | 116 | the machine given the address mask you provided. In general, the |
117 | device struct of your device is embedded in the bus specific device | ||
118 | struct of your device. For example, a pointer to the device struct of | ||
119 | your PCI device is pdev->dev (pdev is a pointer to the PCI device | ||
120 | struct of your device). | ||
119 | 121 | ||
120 | If it returns non-zero, your device cannot perform DMA properly on | 122 | If it returns non-zero, your device cannot perform DMA properly on |
121 | this platform, and attempting to do so will result in undefined | 123 | this platform, and attempting to do so will result in undefined |
@@ -133,31 +135,30 @@ of your driver reports that performance is bad or that the device is not | |||
133 | even detected, you can ask them for the kernel messages to find out | 135 | even detected, you can ask them for the kernel messages to find out |
134 | exactly why. | 136 | exactly why. |
135 | 137 | ||
136 | The standard 32-bit addressing PCI device would do something like | 138 | The standard 32-bit addressing device would do something like this: |
137 | this: | ||
138 | 139 | ||
139 | if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { | 140 | if (dma_set_mask(dev, DMA_BIT_MASK(32))) { |
140 | printk(KERN_WARNING | 141 | printk(KERN_WARNING |
141 | "mydev: No suitable DMA available.\n"); | 142 | "mydev: No suitable DMA available.\n"); |
142 | goto ignore_this_device; | 143 | goto ignore_this_device; |
143 | } | 144 | } |
144 | 145 | ||
145 | Another common scenario is a 64-bit capable device. The approach | 146 | Another common scenario is a 64-bit capable device. The approach here |
146 | here is to try for 64-bit DAC addressing, but back down to a | 147 | is to try for 64-bit addressing, but back down to a 32-bit mask that |
147 | 32-bit mask should that fail. The PCI platform code may fail the | 148 | should not fail. The kernel may fail the 64-bit mask not because the |
148 | 64-bit mask not because the platform is not capable of 64-bit | 149 | platform is not capable of 64-bit addressing. Rather, it may fail in |
149 | addressing. Rather, it may fail in this case simply because | 150 | this case simply because 32-bit addressing is done more efficiently |
150 | 32-bit SAC addressing is done more efficiently than DAC addressing. | 151 | than 64-bit addressing. For example, Sparc64 PCI SAC addressing is |
151 | Sparc64 is one platform which behaves in this way. | 152 | more efficient than DAC addressing. |
152 | 153 | ||
153 | Here is how you would handle a 64-bit capable device which can drive | 154 | Here is how you would handle a 64-bit capable device which can drive |
154 | all 64-bits when accessing streaming DMA: | 155 | all 64-bits when accessing streaming DMA: |
155 | 156 | ||
156 | int using_dac; | 157 | int using_dac; |
157 | 158 | ||
158 | if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { | 159 | if (!dma_set_mask(dev, DMA_BIT_MASK(64))) { |
159 | using_dac = 1; | 160 | using_dac = 1; |
160 | } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { | 161 | } else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) { |
161 | using_dac = 0; | 162 | using_dac = 0; |
162 | } else { | 163 | } else { |
163 | printk(KERN_WARNING | 164 | printk(KERN_WARNING |
@@ -170,36 +171,36 @@ the case would look like this: | |||
170 | 171 | ||
171 | int using_dac, consistent_using_dac; | 172 | int using_dac, consistent_using_dac; |
172 | 173 | ||
173 | if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { | 174 | if (!dma_set_mask(dev, DMA_BIT_MASK(64))) { |
174 | using_dac = 1; | 175 | using_dac = 1; |
175 | consistent_using_dac = 1; | 176 | consistent_using_dac = 1; |
176 | pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); | 177 | dma_set_coherent_mask(dev, DMA_BIT_MASK(64)); |
177 | } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) { | 178 | } else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) { |
178 | using_dac = 0; | 179 | using_dac = 0; |
179 | consistent_using_dac = 0; | 180 | consistent_using_dac = 0; |
180 | pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); | 181 | dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); |
181 | } else { | 182 | } else { |
182 | printk(KERN_WARNING | 183 | printk(KERN_WARNING |
183 | "mydev: No suitable DMA available.\n"); | 184 | "mydev: No suitable DMA available.\n"); |
184 | goto ignore_this_device; | 185 | goto ignore_this_device; |
185 | } | 186 | } |
186 | 187 | ||
187 | pci_set_consistent_dma_mask() will always be able to set the same or a | 188 | dma_set_coherent_mask() will always be able to set the same or a |
188 | smaller mask as pci_set_dma_mask(). However for the rare case that a | 189 | smaller mask as dma_set_mask(). However for the rare case that a |
189 | device driver only uses consistent allocations, one would have to | 190 | device driver only uses consistent allocations, one would have to |
190 | check the return value from pci_set_consistent_dma_mask(). | 191 | check the return value from dma_set_coherent_mask(). |
191 | 192 | ||
192 | Finally, if your device can only drive the low 24-bits of | 193 | Finally, if your device can only drive the low 24-bits of |
193 | address during PCI bus mastering you might do something like: | 194 | address you might do something like: |
194 | 195 | ||
195 | if (pci_set_dma_mask(pdev, DMA_BIT_MASK(24))) { | 196 | if (dma_set_mask(dev, DMA_BIT_MASK(24))) { |
196 | printk(KERN_WARNING | 197 | printk(KERN_WARNING |
197 | "mydev: 24-bit DMA addressing not available.\n"); | 198 | "mydev: 24-bit DMA addressing not available.\n"); |
198 | goto ignore_this_device; | 199 | goto ignore_this_device; |
199 | } | 200 | } |
200 | 201 | ||
201 | When pci_set_dma_mask() is successful, and returns zero, the PCI layer | 202 | When dma_set_mask() is successful, and returns zero, the kernel saves |
202 | saves away this mask you have provided. The PCI layer will use this | 203 | away this mask you have provided. The kernel will use this |
203 | information later when you make DMA mappings. | 204 | information later when you make DMA mappings. |
204 | 205 | ||
205 | There is a case which we are aware of at this time, which is worth | 206 | There is a case which we are aware of at this time, which is worth |
@@ -208,7 +209,7 @@ functions (for example a sound card provides playback and record | |||
208 | functions) and the various different functions have _different_ | 209 | functions) and the various different functions have _different_ |
209 | DMA addressing limitations, you may wish to probe each mask and | 210 | DMA addressing limitations, you may wish to probe each mask and |
210 | only provide the functionality which the machine can handle. It | 211 | only provide the functionality which the machine can handle. It |
211 | is important that the last call to pci_set_dma_mask() be for the | 212 | is important that the last call to dma_set_mask() be for the |
212 | most specific mask. | 213 | most specific mask. |
213 | 214 | ||
214 | Here is pseudo-code showing how this might be done: | 215 | Here is pseudo-code showing how this might be done: |
@@ -217,17 +218,17 @@ Here is pseudo-code showing how this might be done: | |||
217 | #define RECORD_ADDRESS_BITS DMA_BIT_MASK(24) | 218 | #define RECORD_ADDRESS_BITS DMA_BIT_MASK(24) |
218 | 219 | ||
219 | struct my_sound_card *card; | 220 | struct my_sound_card *card; |
220 | struct pci_dev *pdev; | 221 | struct device *dev; |
221 | 222 | ||
222 | ... | 223 | ... |
223 | if (!pci_set_dma_mask(pdev, PLAYBACK_ADDRESS_BITS)) { | 224 | if (!dma_set_mask(dev, PLAYBACK_ADDRESS_BITS)) { |
224 | card->playback_enabled = 1; | 225 | card->playback_enabled = 1; |
225 | } else { | 226 | } else { |
226 | card->playback_enabled = 0; | 227 | card->playback_enabled = 0; |
227 | printk(KERN_WARNING "%s: Playback disabled due to DMA limitations.\n", | 228 | printk(KERN_WARNING "%s: Playback disabled due to DMA limitations.\n", |
228 | card->name); | 229 | card->name); |
229 | } | 230 | } |
230 | if (!pci_set_dma_mask(pdev, RECORD_ADDRESS_BITS)) { | 231 | if (!dma_set_mask(dev, RECORD_ADDRESS_BITS)) { |
231 | card->record_enabled = 1; | 232 | card->record_enabled = 1; |
232 | } else { | 233 | } else { |
233 | card->record_enabled = 0; | 234 | card->record_enabled = 0; |
@@ -252,8 +253,8 @@ There are two types of DMA mappings: | |||
252 | Think of "consistent" as "synchronous" or "coherent". | 253 | Think of "consistent" as "synchronous" or "coherent". |
253 | 254 | ||
254 | The current default is to return consistent memory in the low 32 | 255 | The current default is to return consistent memory in the low 32 |
255 | bits of the PCI bus space. However, for future compatibility you | 256 | bits of the bus space. However, for future compatibility you should |
256 | should set the consistent mask even if this default is fine for your | 257 | set the consistent mask even if this default is fine for your |
257 | driver. | 258 | driver. |
258 | 259 | ||
259 | Good examples of what to use consistent mappings for are: | 260 | Good examples of what to use consistent mappings for are: |
@@ -285,9 +286,9 @@ There are two types of DMA mappings: | |||
285 | found in PCI bridges (such as by reading a register's value | 286 | found in PCI bridges (such as by reading a register's value |
286 | after writing it). | 287 | after writing it). |
287 | 288 | ||
288 | - Streaming DMA mappings which are usually mapped for one DMA transfer, | 289 | - Streaming DMA mappings which are usually mapped for one DMA |
289 | unmapped right after it (unless you use pci_dma_sync_* below) and for which | 290 | transfer, unmapped right after it (unless you use dma_sync_* below) |
290 | hardware can optimize for sequential accesses. | 291 | and for which hardware can optimize for sequential accesses. |
291 | 292 | ||
292 | This of "streaming" as "asynchronous" or "outside the coherency | 293 | This of "streaming" as "asynchronous" or "outside the coherency |
293 | domain". | 294 | domain". |
@@ -302,8 +303,8 @@ There are two types of DMA mappings: | |||
302 | optimizations the hardware allows. To this end, when using | 303 | optimizations the hardware allows. To this end, when using |
303 | such mappings you must be explicit about what you want to happen. | 304 | such mappings you must be explicit about what you want to happen. |
304 | 305 | ||
305 | Neither type of DMA mapping has alignment restrictions that come | 306 | Neither type of DMA mapping has alignment restrictions that come from |
306 | from PCI, although some devices may have such restrictions. | 307 | the underlying bus, although some devices may have such restrictions. |
307 | Also, systems with caches that aren't DMA-coherent will work better | 308 | Also, systems with caches that aren't DMA-coherent will work better |
308 | when the underlying buffers don't share cache lines with other data. | 309 | when the underlying buffers don't share cache lines with other data. |
309 | 310 | ||
@@ -315,33 +316,27 @@ you should do: | |||
315 | 316 | ||
316 | dma_addr_t dma_handle; | 317 | dma_addr_t dma_handle; |
317 | 318 | ||
318 | cpu_addr = pci_alloc_consistent(pdev, size, &dma_handle); | 319 | cpu_addr = dma_alloc_coherent(dev, size, &dma_handle, gfp); |
319 | |||
320 | where pdev is a struct pci_dev *. This may be called in interrupt context. | ||
321 | You should use dma_alloc_coherent (see DMA-API.txt) for buses | ||
322 | where devices don't have struct pci_dev (like ISA, EISA). | ||
323 | 320 | ||
324 | This argument is needed because the DMA translations may be bus | 321 | where device is a struct device *. This may be called in interrupt |
325 | specific (and often is private to the bus which the device is attached | 322 | context with the GFP_ATOMIC flag. |
326 | to). | ||
327 | 323 | ||
328 | Size is the length of the region you want to allocate, in bytes. | 324 | Size is the length of the region you want to allocate, in bytes. |
329 | 325 | ||
330 | This routine will allocate RAM for that region, so it acts similarly to | 326 | This routine will allocate RAM for that region, so it acts similarly to |
331 | __get_free_pages (but takes size instead of a page order). If your | 327 | __get_free_pages (but takes size instead of a page order). If your |
332 | driver needs regions sized smaller than a page, you may prefer using | 328 | driver needs regions sized smaller than a page, you may prefer using |
333 | the pci_pool interface, described below. | 329 | the dma_pool interface, described below. |
334 | 330 | ||
335 | The consistent DMA mapping interfaces, for non-NULL pdev, will by | 331 | The consistent DMA mapping interfaces, for non-NULL dev, will by |
336 | default return a DMA address which is SAC (Single Address Cycle) | 332 | default return a DMA address which is 32-bit addressable. Even if the |
337 | addressable. Even if the device indicates (via PCI dma mask) that it | 333 | device indicates (via DMA mask) that it may address the upper 32-bits, |
338 | may address the upper 32-bits and thus perform DAC cycles, consistent | 334 | consistent allocation will only return > 32-bit addresses for DMA if |
339 | allocation will only return > 32-bit PCI addresses for DMA if the | 335 | the consistent DMA mask has been explicitly changed via |
340 | consistent dma mask has been explicitly changed via | 336 | dma_set_coherent_mask(). This is true of the dma_pool interface as |
341 | pci_set_consistent_dma_mask(). This is true of the pci_pool interface | 337 | well. |
342 | as well. | 338 | |
343 | 339 | dma_alloc_coherent returns two values: the virtual address which you | |
344 | pci_alloc_consistent returns two values: the virtual address which you | ||
345 | can use to access it from the CPU and dma_handle which you pass to the | 340 | can use to access it from the CPU and dma_handle which you pass to the |
346 | card. | 341 | card. |
347 | 342 | ||
@@ -354,54 +349,54 @@ buffer you receive will not cross a 64K boundary. | |||
354 | 349 | ||
355 | To unmap and free such a DMA region, you call: | 350 | To unmap and free such a DMA region, you call: |
356 | 351 | ||
357 | pci_free_consistent(pdev, size, cpu_addr, dma_handle); | 352 | dma_free_coherent(dev, size, cpu_addr, dma_handle); |
358 | 353 | ||
359 | where pdev, size are the same as in the above call and cpu_addr and | 354 | where dev, size are the same as in the above call and cpu_addr and |
360 | dma_handle are the values pci_alloc_consistent returned to you. | 355 | dma_handle are the values dma_alloc_coherent returned to you. |
361 | This function may not be called in interrupt context. | 356 | This function may not be called in interrupt context. |
362 | 357 | ||
363 | If your driver needs lots of smaller memory regions, you can write | 358 | If your driver needs lots of smaller memory regions, you can write |
364 | custom code to subdivide pages returned by pci_alloc_consistent, | 359 | custom code to subdivide pages returned by dma_alloc_coherent, |
365 | or you can use the pci_pool API to do that. A pci_pool is like | 360 | or you can use the dma_pool API to do that. A dma_pool is like |
366 | a kmem_cache, but it uses pci_alloc_consistent not __get_free_pages. | 361 | a kmem_cache, but it uses dma_alloc_coherent not __get_free_pages. |
367 | Also, it understands common hardware constraints for alignment, | 362 | Also, it understands common hardware constraints for alignment, |
368 | like queue heads needing to be aligned on N byte boundaries. | 363 | like queue heads needing to be aligned on N byte boundaries. |
369 | 364 | ||
370 | Create a pci_pool like this: | 365 | Create a dma_pool like this: |
371 | 366 | ||
372 | struct pci_pool *pool; | 367 | struct dma_pool *pool; |
373 | 368 | ||
374 | pool = pci_pool_create(name, pdev, size, align, alloc); | 369 | pool = dma_pool_create(name, dev, size, align, alloc); |
375 | 370 | ||
376 | The "name" is for diagnostics (like a kmem_cache name); pdev and size | 371 | The "name" is for diagnostics (like a kmem_cache name); dev and size |
377 | are as above. The device's hardware alignment requirement for this | 372 | are as above. The device's hardware alignment requirement for this |
378 | type of data is "align" (which is expressed in bytes, and must be a | 373 | type of data is "align" (which is expressed in bytes, and must be a |
379 | power of two). If your device has no boundary crossing restrictions, | 374 | power of two). If your device has no boundary crossing restrictions, |
380 | pass 0 for alloc; passing 4096 says memory allocated from this pool | 375 | pass 0 for alloc; passing 4096 says memory allocated from this pool |
381 | must not cross 4KByte boundaries (but at that time it may be better to | 376 | must not cross 4KByte boundaries (but at that time it may be better to |
382 | go for pci_alloc_consistent directly instead). | 377 | go for dma_alloc_coherent directly instead). |
383 | 378 | ||
384 | Allocate memory from a pci pool like this: | 379 | Allocate memory from a dma pool like this: |
385 | 380 | ||
386 | cpu_addr = pci_pool_alloc(pool, flags, &dma_handle); | 381 | cpu_addr = dma_pool_alloc(pool, flags, &dma_handle); |
387 | 382 | ||
388 | flags are SLAB_KERNEL if blocking is permitted (not in_interrupt nor | 383 | flags are SLAB_KERNEL if blocking is permitted (not in_interrupt nor |
389 | holding SMP locks), SLAB_ATOMIC otherwise. Like pci_alloc_consistent, | 384 | holding SMP locks), SLAB_ATOMIC otherwise. Like dma_alloc_coherent, |
390 | this returns two values, cpu_addr and dma_handle. | 385 | this returns two values, cpu_addr and dma_handle. |
391 | 386 | ||
392 | Free memory that was allocated from a pci_pool like this: | 387 | Free memory that was allocated from a dma_pool like this: |
393 | 388 | ||
394 | pci_pool_free(pool, cpu_addr, dma_handle); | 389 | dma_pool_free(pool, cpu_addr, dma_handle); |
395 | 390 | ||
396 | where pool is what you passed to pci_pool_alloc, and cpu_addr and | 391 | where pool is what you passed to dma_pool_alloc, and cpu_addr and |
397 | dma_handle are the values pci_pool_alloc returned. This function | 392 | dma_handle are the values dma_pool_alloc returned. This function |
398 | may be called in interrupt context. | 393 | may be called in interrupt context. |
399 | 394 | ||
400 | Destroy a pci_pool by calling: | 395 | Destroy a dma_pool by calling: |
401 | 396 | ||
402 | pci_pool_destroy(pool); | 397 | dma_pool_destroy(pool); |
403 | 398 | ||
404 | Make sure you've called pci_pool_free for all memory allocated | 399 | Make sure you've called dma_pool_free for all memory allocated |
405 | from a pool before you destroy the pool. This function may not | 400 | from a pool before you destroy the pool. This function may not |
406 | be called in interrupt context. | 401 | be called in interrupt context. |
407 | 402 | ||
@@ -411,15 +406,15 @@ The interfaces described in subsequent portions of this document | |||
411 | take a DMA direction argument, which is an integer and takes on | 406 | take a DMA direction argument, which is an integer and takes on |
412 | one of the following values: | 407 | one of the following values: |
413 | 408 | ||
414 | PCI_DMA_BIDIRECTIONAL | 409 | DMA_BIDIRECTIONAL |
415 | PCI_DMA_TODEVICE | 410 | DMA_TO_DEVICE |
416 | PCI_DMA_FROMDEVICE | 411 | DMA_FROM_DEVICE |
417 | PCI_DMA_NONE | 412 | DMA_NONE |
418 | 413 | ||
419 | One should provide the exact DMA direction if you know it. | 414 | One should provide the exact DMA direction if you know it. |
420 | 415 | ||
421 | PCI_DMA_TODEVICE means "from main memory to the PCI device" | 416 | DMA_TO_DEVICE means "from main memory to the device" |
422 | PCI_DMA_FROMDEVICE means "from the PCI device to main memory" | 417 | DMA_FROM_DEVICE means "from the device to main memory" |
423 | It is the direction in which the data moves during the DMA | 418 | It is the direction in which the data moves during the DMA |
424 | transfer. | 419 | transfer. |
425 | 420 | ||
@@ -427,12 +422,12 @@ You are _strongly_ encouraged to specify this as precisely | |||
427 | as you possibly can. | 422 | as you possibly can. |
428 | 423 | ||
429 | If you absolutely cannot know the direction of the DMA transfer, | 424 | If you absolutely cannot know the direction of the DMA transfer, |
430 | specify PCI_DMA_BIDIRECTIONAL. It means that the DMA can go in | 425 | specify DMA_BIDIRECTIONAL. It means that the DMA can go in |
431 | either direction. The platform guarantees that you may legally | 426 | either direction. The platform guarantees that you may legally |
432 | specify this, and that it will work, but this may be at the | 427 | specify this, and that it will work, but this may be at the |
433 | cost of performance for example. | 428 | cost of performance for example. |
434 | 429 | ||
435 | The value PCI_DMA_NONE is to be used for debugging. One can | 430 | The value DMA_NONE is to be used for debugging. One can |
436 | hold this in a data structure before you come to know the | 431 | hold this in a data structure before you come to know the |
437 | precise direction, and this will help catch cases where your | 432 | precise direction, and this will help catch cases where your |
438 | direction tracking logic has failed to set things up properly. | 433 | direction tracking logic has failed to set things up properly. |
@@ -442,21 +437,21 @@ potential platform-specific optimizations of such) is for debugging. | |||
442 | Some platforms actually have a write permission boolean which DMA | 437 | Some platforms actually have a write permission boolean which DMA |
443 | mappings can be marked with, much like page protections in the user | 438 | mappings can be marked with, much like page protections in the user |
444 | program address space. Such platforms can and do report errors in the | 439 | program address space. Such platforms can and do report errors in the |
445 | kernel logs when the PCI controller hardware detects violation of the | 440 | kernel logs when the DMA controller hardware detects violation of the |
446 | permission setting. | 441 | permission setting. |
447 | 442 | ||
448 | Only streaming mappings specify a direction, consistent mappings | 443 | Only streaming mappings specify a direction, consistent mappings |
449 | implicitly have a direction attribute setting of | 444 | implicitly have a direction attribute setting of |
450 | PCI_DMA_BIDIRECTIONAL. | 445 | DMA_BIDIRECTIONAL. |
451 | 446 | ||
452 | The SCSI subsystem tells you the direction to use in the | 447 | The SCSI subsystem tells you the direction to use in the |
453 | 'sc_data_direction' member of the SCSI command your driver is | 448 | 'sc_data_direction' member of the SCSI command your driver is |
454 | working on. | 449 | working on. |
455 | 450 | ||
456 | For Networking drivers, it's a rather simple affair. For transmit | 451 | For Networking drivers, it's a rather simple affair. For transmit |
457 | packets, map/unmap them with the PCI_DMA_TODEVICE direction | 452 | packets, map/unmap them with the DMA_TO_DEVICE direction |
458 | specifier. For receive packets, just the opposite, map/unmap them | 453 | specifier. For receive packets, just the opposite, map/unmap them |
459 | with the PCI_DMA_FROMDEVICE direction specifier. | 454 | with the DMA_FROM_DEVICE direction specifier. |
460 | 455 | ||
461 | Using Streaming DMA mappings | 456 | Using Streaming DMA mappings |
462 | 457 | ||
@@ -467,43 +462,43 @@ scatterlist. | |||
467 | 462 | ||
468 | To map a single region, you do: | 463 | To map a single region, you do: |
469 | 464 | ||
470 | struct pci_dev *pdev = mydev->pdev; | 465 | struct device *dev = &my_dev->dev; |
471 | dma_addr_t dma_handle; | 466 | dma_addr_t dma_handle; |
472 | void *addr = buffer->ptr; | 467 | void *addr = buffer->ptr; |
473 | size_t size = buffer->len; | 468 | size_t size = buffer->len; |
474 | 469 | ||
475 | dma_handle = pci_map_single(pdev, addr, size, direction); | 470 | dma_handle = dma_map_single(dev, addr, size, direction); |
476 | 471 | ||
477 | and to unmap it: | 472 | and to unmap it: |
478 | 473 | ||
479 | pci_unmap_single(pdev, dma_handle, size, direction); | 474 | dma_unmap_single(dev, dma_handle, size, direction); |
480 | 475 | ||
481 | You should call pci_unmap_single when the DMA activity is finished, e.g. | 476 | You should call dma_unmap_single when the DMA activity is finished, e.g. |
482 | from the interrupt which told you that the DMA transfer is done. | 477 | from the interrupt which told you that the DMA transfer is done. |
483 | 478 | ||
484 | Using cpu pointers like this for single mappings has a disadvantage, | 479 | Using cpu pointers like this for single mappings has a disadvantage, |
485 | you cannot reference HIGHMEM memory in this way. Thus, there is a | 480 | you cannot reference HIGHMEM memory in this way. Thus, there is a |
486 | map/unmap interface pair akin to pci_{map,unmap}_single. These | 481 | map/unmap interface pair akin to dma_{map,unmap}_single. These |
487 | interfaces deal with page/offset pairs instead of cpu pointers. | 482 | interfaces deal with page/offset pairs instead of cpu pointers. |
488 | Specifically: | 483 | Specifically: |
489 | 484 | ||
490 | struct pci_dev *pdev = mydev->pdev; | 485 | struct device *dev = &my_dev->dev; |
491 | dma_addr_t dma_handle; | 486 | dma_addr_t dma_handle; |
492 | struct page *page = buffer->page; | 487 | struct page *page = buffer->page; |
493 | unsigned long offset = buffer->offset; | 488 | unsigned long offset = buffer->offset; |
494 | size_t size = buffer->len; | 489 | size_t size = buffer->len; |
495 | 490 | ||
496 | dma_handle = pci_map_page(pdev, page, offset, size, direction); | 491 | dma_handle = dma_map_page(dev, page, offset, size, direction); |
497 | 492 | ||
498 | ... | 493 | ... |
499 | 494 | ||
500 | pci_unmap_page(pdev, dma_handle, size, direction); | 495 | dma_unmap_page(dev, dma_handle, size, direction); |
501 | 496 | ||
502 | Here, "offset" means byte offset within the given page. | 497 | Here, "offset" means byte offset within the given page. |
503 | 498 | ||
504 | With scatterlists, you map a region gathered from several regions by: | 499 | With scatterlists, you map a region gathered from several regions by: |
505 | 500 | ||
506 | int i, count = pci_map_sg(pdev, sglist, nents, direction); | 501 | int i, count = dma_map_sg(dev, sglist, nents, direction); |
507 | struct scatterlist *sg; | 502 | struct scatterlist *sg; |
508 | 503 | ||
509 | for_each_sg(sglist, sg, count, i) { | 504 | for_each_sg(sglist, sg, count, i) { |
@@ -527,16 +522,16 @@ accessed sg->address and sg->length as shown above. | |||
527 | 522 | ||
528 | To unmap a scatterlist, just call: | 523 | To unmap a scatterlist, just call: |
529 | 524 | ||
530 | pci_unmap_sg(pdev, sglist, nents, direction); | 525 | dma_unmap_sg(dev, sglist, nents, direction); |
531 | 526 | ||
532 | Again, make sure DMA activity has already finished. | 527 | Again, make sure DMA activity has already finished. |
533 | 528 | ||
534 | PLEASE NOTE: The 'nents' argument to the pci_unmap_sg call must be | 529 | PLEASE NOTE: The 'nents' argument to the dma_unmap_sg call must be |
535 | the _same_ one you passed into the pci_map_sg call, | 530 | the _same_ one you passed into the dma_map_sg call, |
536 | it should _NOT_ be the 'count' value _returned_ from the | 531 | it should _NOT_ be the 'count' value _returned_ from the |
537 | pci_map_sg call. | 532 | dma_map_sg call. |
538 | 533 | ||
539 | Every pci_map_{single,sg} call should have its pci_unmap_{single,sg} | 534 | Every dma_map_{single,sg} call should have its dma_unmap_{single,sg} |
540 | counterpart, because the bus address space is a shared resource (although | 535 | counterpart, because the bus address space is a shared resource (although |
541 | in some ports the mapping is per each BUS so less devices contend for the | 536 | in some ports the mapping is per each BUS so less devices contend for the |
542 | same bus address space) and you could render the machine unusable by eating | 537 | same bus address space) and you could render the machine unusable by eating |
@@ -547,14 +542,14 @@ the data in between the DMA transfers, the buffer needs to be synced | |||
547 | properly in order for the cpu and device to see the most uptodate and | 542 | properly in order for the cpu and device to see the most uptodate and |
548 | correct copy of the DMA buffer. | 543 | correct copy of the DMA buffer. |
549 | 544 | ||
550 | So, firstly, just map it with pci_map_{single,sg}, and after each DMA | 545 | So, firstly, just map it with dma_map_{single,sg}, and after each DMA |
551 | transfer call either: | 546 | transfer call either: |
552 | 547 | ||
553 | pci_dma_sync_single_for_cpu(pdev, dma_handle, size, direction); | 548 | dma_sync_single_for_cpu(dev, dma_handle, size, direction); |
554 | 549 | ||
555 | or: | 550 | or: |
556 | 551 | ||
557 | pci_dma_sync_sg_for_cpu(pdev, sglist, nents, direction); | 552 | dma_sync_sg_for_cpu(dev, sglist, nents, direction); |
558 | 553 | ||
559 | as appropriate. | 554 | as appropriate. |
560 | 555 | ||
@@ -562,27 +557,27 @@ Then, if you wish to let the device get at the DMA area again, | |||
562 | finish accessing the data with the cpu, and then before actually | 557 | finish accessing the data with the cpu, and then before actually |
563 | giving the buffer to the hardware call either: | 558 | giving the buffer to the hardware call either: |
564 | 559 | ||
565 | pci_dma_sync_single_for_device(pdev, dma_handle, size, direction); | 560 | dma_sync_single_for_device(dev, dma_handle, size, direction); |
566 | 561 | ||
567 | or: | 562 | or: |
568 | 563 | ||
569 | pci_dma_sync_sg_for_device(dev, sglist, nents, direction); | 564 | dma_sync_sg_for_device(dev, sglist, nents, direction); |
570 | 565 | ||
571 | as appropriate. | 566 | as appropriate. |
572 | 567 | ||
573 | After the last DMA transfer call one of the DMA unmap routines | 568 | After the last DMA transfer call one of the DMA unmap routines |
574 | pci_unmap_{single,sg}. If you don't touch the data from the first pci_map_* | 569 | dma_unmap_{single,sg}. If you don't touch the data from the first dma_map_* |
575 | call till pci_unmap_*, then you don't have to call the pci_dma_sync_* | 570 | call till dma_unmap_*, then you don't have to call the dma_sync_* |
576 | routines at all. | 571 | routines at all. |
577 | 572 | ||
578 | Here is pseudo code which shows a situation in which you would need | 573 | Here is pseudo code which shows a situation in which you would need |
579 | to use the pci_dma_sync_*() interfaces. | 574 | to use the dma_sync_*() interfaces. |
580 | 575 | ||
581 | my_card_setup_receive_buffer(struct my_card *cp, char *buffer, int len) | 576 | my_card_setup_receive_buffer(struct my_card *cp, char *buffer, int len) |
582 | { | 577 | { |
583 | dma_addr_t mapping; | 578 | dma_addr_t mapping; |
584 | 579 | ||
585 | mapping = pci_map_single(cp->pdev, buffer, len, PCI_DMA_FROMDEVICE); | 580 | mapping = dma_map_single(cp->dev, buffer, len, DMA_FROM_DEVICE); |
586 | 581 | ||
587 | cp->rx_buf = buffer; | 582 | cp->rx_buf = buffer; |
588 | cp->rx_len = len; | 583 | cp->rx_len = len; |
@@ -606,25 +601,25 @@ to use the pci_dma_sync_*() interfaces. | |||
606 | * the DMA transfer with the CPU first | 601 | * the DMA transfer with the CPU first |
607 | * so that we see updated contents. | 602 | * so that we see updated contents. |
608 | */ | 603 | */ |
609 | pci_dma_sync_single_for_cpu(cp->pdev, cp->rx_dma, | 604 | dma_sync_single_for_cpu(&cp->dev, cp->rx_dma, |
610 | cp->rx_len, | 605 | cp->rx_len, |
611 | PCI_DMA_FROMDEVICE); | 606 | DMA_FROM_DEVICE); |
612 | 607 | ||
613 | /* Now it is safe to examine the buffer. */ | 608 | /* Now it is safe to examine the buffer. */ |
614 | hp = (struct my_card_header *) cp->rx_buf; | 609 | hp = (struct my_card_header *) cp->rx_buf; |
615 | if (header_is_ok(hp)) { | 610 | if (header_is_ok(hp)) { |
616 | pci_unmap_single(cp->pdev, cp->rx_dma, cp->rx_len, | 611 | dma_unmap_single(&cp->dev, cp->rx_dma, cp->rx_len, |
617 | PCI_DMA_FROMDEVICE); | 612 | DMA_FROM_DEVICE); |
618 | pass_to_upper_layers(cp->rx_buf); | 613 | pass_to_upper_layers(cp->rx_buf); |
619 | make_and_setup_new_rx_buf(cp); | 614 | make_and_setup_new_rx_buf(cp); |
620 | } else { | 615 | } else { |
621 | /* Just sync the buffer and give it back | 616 | /* Just sync the buffer and give it back |
622 | * to the card. | 617 | * to the card. |
623 | */ | 618 | */ |
624 | pci_dma_sync_single_for_device(cp->pdev, | 619 | dma_sync_single_for_device(&cp->dev, |
625 | cp->rx_dma, | 620 | cp->rx_dma, |
626 | cp->rx_len, | 621 | cp->rx_len, |
627 | PCI_DMA_FROMDEVICE); | 622 | DMA_FROM_DEVICE); |
628 | give_rx_buf_to_card(cp); | 623 | give_rx_buf_to_card(cp); |
629 | } | 624 | } |
630 | } | 625 | } |
@@ -634,19 +629,19 @@ Drivers converted fully to this interface should not use virt_to_bus any | |||
634 | longer, nor should they use bus_to_virt. Some drivers have to be changed a | 629 | longer, nor should they use bus_to_virt. Some drivers have to be changed a |
635 | little bit, because there is no longer an equivalent to bus_to_virt in the | 630 | little bit, because there is no longer an equivalent to bus_to_virt in the |
636 | dynamic DMA mapping scheme - you have to always store the DMA addresses | 631 | dynamic DMA mapping scheme - you have to always store the DMA addresses |
637 | returned by the pci_alloc_consistent, pci_pool_alloc, and pci_map_single | 632 | returned by the dma_alloc_coherent, dma_pool_alloc, and dma_map_single |
638 | calls (pci_map_sg stores them in the scatterlist itself if the platform | 633 | calls (dma_map_sg stores them in the scatterlist itself if the platform |
639 | supports dynamic DMA mapping in hardware) in your driver structures and/or | 634 | supports dynamic DMA mapping in hardware) in your driver structures and/or |
640 | in the card registers. | 635 | in the card registers. |
641 | 636 | ||
642 | All PCI drivers should be using these interfaces with no exceptions. | 637 | All drivers should be using these interfaces with no exceptions. It |
643 | It is planned to completely remove virt_to_bus() and bus_to_virt() as | 638 | is planned to completely remove virt_to_bus() and bus_to_virt() as |
644 | they are entirely deprecated. Some ports already do not provide these | 639 | they are entirely deprecated. Some ports already do not provide these |
645 | as it is impossible to correctly support them. | 640 | as it is impossible to correctly support them. |
646 | 641 | ||
647 | Optimizing Unmap State Space Consumption | 642 | Optimizing Unmap State Space Consumption |
648 | 643 | ||
649 | On many platforms, pci_unmap_{single,page}() is simply a nop. | 644 | On many platforms, dma_unmap_{single,page}() is simply a nop. |
650 | Therefore, keeping track of the mapping address and length is a waste | 645 | Therefore, keeping track of the mapping address and length is a waste |
651 | of space. Instead of filling your drivers up with ifdefs and the like | 646 | of space. Instead of filling your drivers up with ifdefs and the like |
652 | to "work around" this (which would defeat the whole purpose of a | 647 | to "work around" this (which would defeat the whole purpose of a |
@@ -655,7 +650,7 @@ portable API) the following facilities are provided. | |||
655 | Actually, instead of describing the macros one by one, we'll | 650 | Actually, instead of describing the macros one by one, we'll |
656 | transform some example code. | 651 | transform some example code. |
657 | 652 | ||
658 | 1) Use DECLARE_PCI_UNMAP_{ADDR,LEN} in state saving structures. | 653 | 1) Use DEFINE_DMA_UNMAP_{ADDR,LEN} in state saving structures. |
659 | Example, before: | 654 | Example, before: |
660 | 655 | ||
661 | struct ring_state { | 656 | struct ring_state { |
@@ -668,14 +663,11 @@ transform some example code. | |||
668 | 663 | ||
669 | struct ring_state { | 664 | struct ring_state { |
670 | struct sk_buff *skb; | 665 | struct sk_buff *skb; |
671 | DECLARE_PCI_UNMAP_ADDR(mapping) | 666 | DEFINE_DMA_UNMAP_ADDR(mapping); |
672 | DECLARE_PCI_UNMAP_LEN(len) | 667 | DEFINE_DMA_UNMAP_LEN(len); |
673 | }; | 668 | }; |
674 | 669 | ||
675 | NOTE: DO NOT put a semicolon at the end of the DECLARE_*() | 670 | 2) Use dma_unmap_{addr,len}_set to set these values. |
676 | macro. | ||
677 | |||
678 | 2) Use pci_unmap_{addr,len}_set to set these values. | ||
679 | Example, before: | 671 | Example, before: |
680 | 672 | ||
681 | ringp->mapping = FOO; | 673 | ringp->mapping = FOO; |
@@ -683,21 +675,21 @@ transform some example code. | |||
683 | 675 | ||
684 | after: | 676 | after: |
685 | 677 | ||
686 | pci_unmap_addr_set(ringp, mapping, FOO); | 678 | dma_unmap_addr_set(ringp, mapping, FOO); |
687 | pci_unmap_len_set(ringp, len, BAR); | 679 | dma_unmap_len_set(ringp, len, BAR); |
688 | 680 | ||
689 | 3) Use pci_unmap_{addr,len} to access these values. | 681 | 3) Use dma_unmap_{addr,len} to access these values. |
690 | Example, before: | 682 | Example, before: |
691 | 683 | ||
692 | pci_unmap_single(pdev, ringp->mapping, ringp->len, | 684 | dma_unmap_single(dev, ringp->mapping, ringp->len, |
693 | PCI_DMA_FROMDEVICE); | 685 | DMA_FROM_DEVICE); |
694 | 686 | ||
695 | after: | 687 | after: |
696 | 688 | ||
697 | pci_unmap_single(pdev, | 689 | dma_unmap_single(dev, |
698 | pci_unmap_addr(ringp, mapping), | 690 | dma_unmap_addr(ringp, mapping), |
699 | pci_unmap_len(ringp, len), | 691 | dma_unmap_len(ringp, len), |
700 | PCI_DMA_FROMDEVICE); | 692 | DMA_FROM_DEVICE); |
701 | 693 | ||
702 | It really should be self-explanatory. We treat the ADDR and LEN | 694 | It really should be self-explanatory. We treat the ADDR and LEN |
703 | separately, because it is possible for an implementation to only | 695 | separately, because it is possible for an implementation to only |
@@ -732,15 +724,15 @@ to "Closing". | |||
732 | DMA address space is limited on some architectures and an allocation | 724 | DMA address space is limited on some architectures and an allocation |
733 | failure can be determined by: | 725 | failure can be determined by: |
734 | 726 | ||
735 | - checking if pci_alloc_consistent returns NULL or pci_map_sg returns 0 | 727 | - checking if dma_alloc_coherent returns NULL or dma_map_sg returns 0 |
736 | 728 | ||
737 | - checking the returned dma_addr_t of pci_map_single and pci_map_page | 729 | - checking the returned dma_addr_t of dma_map_single and dma_map_page |
738 | by using pci_dma_mapping_error(): | 730 | by using dma_mapping_error(): |
739 | 731 | ||
740 | dma_addr_t dma_handle; | 732 | dma_addr_t dma_handle; |
741 | 733 | ||
742 | dma_handle = pci_map_single(pdev, addr, size, direction); | 734 | dma_handle = dma_map_single(dev, addr, size, direction); |
743 | if (pci_dma_mapping_error(pdev, dma_handle)) { | 735 | if (dma_mapping_error(dev, dma_handle)) { |
744 | /* | 736 | /* |
745 | * reduce current DMA mapping usage, | 737 | * reduce current DMA mapping usage, |
746 | * delay and try again later or | 738 | * delay and try again later or |
diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist index 1053a56be3b1..8916ca48bc95 100644 --- a/Documentation/SubmitChecklist +++ b/Documentation/SubmitChecklist | |||
@@ -9,10 +9,14 @@ Documentation/SubmittingPatches and elsewhere regarding submitting Linux | |||
9 | kernel patches. | 9 | kernel patches. |
10 | 10 | ||
11 | 11 | ||
12 | 1: Builds cleanly with applicable or modified CONFIG options =y, =m, and | 12 | 1: If you use a facility then #include the file that defines/declares |
13 | that facility. Don't depend on other header files pulling in ones | ||
14 | that you use. | ||
15 | |||
16 | 2: Builds cleanly with applicable or modified CONFIG options =y, =m, and | ||
13 | =n. No gcc warnings/errors, no linker warnings/errors. | 17 | =n. No gcc warnings/errors, no linker warnings/errors. |
14 | 18 | ||
15 | 2: Passes allnoconfig, allmodconfig | 19 | 2b: Passes allnoconfig, allmodconfig |
16 | 20 | ||
17 | 3: Builds on multiple CPU architectures by using local cross-compile tools | 21 | 3: Builds on multiple CPU architectures by using local cross-compile tools |
18 | or some other build farm. | 22 | or some other build farm. |
diff --git a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt b/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt index 76b3a11e90be..fa968aa99d67 100644 --- a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt +++ b/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt | |||
@@ -14,8 +14,8 @@ Introduction | |||
14 | how the clocks are arranged. The first implementation used as single | 14 | how the clocks are arranged. The first implementation used as single |
15 | PLL to feed the ARM, memory and peripherals via a series of dividers | 15 | PLL to feed the ARM, memory and peripherals via a series of dividers |
16 | and muxes and this is the implementation that is documented here. A | 16 | and muxes and this is the implementation that is documented here. A |
17 | newer version where there is a seperate PLL and clock divider for the | 17 | newer version where there is a separate PLL and clock divider for the |
18 | ARM core is available as a seperate driver. | 18 | ARM core is available as a separate driver. |
19 | 19 | ||
20 | 20 | ||
21 | Layout | 21 | Layout |
diff --git a/Documentation/arm/Samsung/Overview.txt b/Documentation/arm/Samsung/Overview.txt new file mode 100644 index 000000000000..7cced1fea9c3 --- /dev/null +++ b/Documentation/arm/Samsung/Overview.txt | |||
@@ -0,0 +1,86 @@ | |||
1 | Samsung ARM Linux Overview | ||
2 | ========================== | ||
3 | |||
4 | Introduction | ||
5 | ------------ | ||
6 | |||
7 | The Samsung range of ARM SoCs spans many similar devices, from the initial | ||
8 | ARM9 through to the newest ARM cores. This document shows an overview of | ||
9 | the current kernel support, how to use it and where to find the code | ||
10 | that supports this. | ||
11 | |||
12 | The currently supported SoCs are: | ||
13 | |||
14 | - S3C24XX: See Documentation/arm/Samsung-S3C24XX/Overview.txt for full list | ||
15 | - S3C64XX: S3C6400 and S3C6410 | ||
16 | - S5PC6440 | ||
17 | |||
18 | S5PC100 and S5PC110 support is currently being merged | ||
19 | |||
20 | |||
21 | S3C24XX Systems | ||
22 | --------------- | ||
23 | |||
24 | There is still documentation in Documnetation/arm/Samsung-S3C24XX/ which | ||
25 | deals with the architecture and drivers specific to these devices. | ||
26 | |||
27 | See Documentation/arm/Samsung-S3C24XX/Overview.txt for more information | ||
28 | on the implementation details and specific support. | ||
29 | |||
30 | |||
31 | Configuration | ||
32 | ------------- | ||
33 | |||
34 | A number of configurations are supplied, as there is no current way of | ||
35 | unifying all the SoCs into one kernel. | ||
36 | |||
37 | s5p6440_defconfig - S5P6440 specific default configuration | ||
38 | s5pc100_defconfig - S5PC100 specific default configuration | ||
39 | |||
40 | |||
41 | Layout | ||
42 | ------ | ||
43 | |||
44 | The directory layout is currently being restructured, and consists of | ||
45 | several platform directories and then the machine specific directories | ||
46 | of the CPUs being built for. | ||
47 | |||
48 | plat-samsung provides the base for all the implementations, and is the | ||
49 | last in the line of include directories that are processed for the build | ||
50 | specific information. It contains the base clock, GPIO and device definitions | ||
51 | to get the system running. | ||
52 | |||
53 | plat-s3c is the s3c24xx/s3c64xx platform directory, although it is currently | ||
54 | involved in other builds this will be phased out once the relevant code is | ||
55 | moved elsewhere. | ||
56 | |||
57 | plat-s3c24xx is for s3c24xx specific builds, see the S3C24XX docs. | ||
58 | |||
59 | plat-s3c64xx is for the s3c64xx specific bits, see the S3C24XX docs. | ||
60 | |||
61 | plat-s5p is for s5p specific builds, more to be added. | ||
62 | |||
63 | |||
64 | [ to finish ] | ||
65 | |||
66 | |||
67 | Port Contributors | ||
68 | ----------------- | ||
69 | |||
70 | Ben Dooks (BJD) | ||
71 | Vincent Sanders | ||
72 | Herbert Potzl | ||
73 | Arnaud Patard (RTP) | ||
74 | Roc Wu | ||
75 | Klaus Fetscher | ||
76 | Dimitry Andric | ||
77 | Shannon Holland | ||
78 | Guillaume Gourat (NexVision) | ||
79 | Christer Weinigel (wingel) (Acer N30) | ||
80 | Lucas Correia Villa Real (S3C2400 port) | ||
81 | |||
82 | |||
83 | Document Author | ||
84 | --------------- | ||
85 | |||
86 | Copyright 2009-2010 Ben Dooks <ben-linux@fluff.org> | ||
diff --git a/Documentation/arm/Samsung/clksrc-change-registers.awk b/Documentation/arm/Samsung/clksrc-change-registers.awk new file mode 100755 index 000000000000..0c50220851fb --- /dev/null +++ b/Documentation/arm/Samsung/clksrc-change-registers.awk | |||
@@ -0,0 +1,167 @@ | |||
1 | #!/usr/bin/awk -f | ||
2 | # | ||
3 | # Copyright 2010 Ben Dooks <ben-linux@fluff.org> | ||
4 | # | ||
5 | # Released under GPLv2 | ||
6 | |||
7 | # example usage | ||
8 | # ./clksrc-change-registers.awk arch/arm/plat-s5pc1xx/include/plat/regs-clock.h < src > dst | ||
9 | |||
10 | function extract_value(s) | ||
11 | { | ||
12 | eqat = index(s, "=") | ||
13 | comat = index(s, ",") | ||
14 | return substr(s, eqat+2, (comat-eqat)-2) | ||
15 | } | ||
16 | |||
17 | function remove_brackets(b) | ||
18 | { | ||
19 | return substr(b, 2, length(b)-2) | ||
20 | } | ||
21 | |||
22 | function splitdefine(l, p) | ||
23 | { | ||
24 | r = split(l, tp) | ||
25 | |||
26 | p[0] = tp[2] | ||
27 | p[1] = remove_brackets(tp[3]) | ||
28 | } | ||
29 | |||
30 | function find_length(f) | ||
31 | { | ||
32 | if (0) | ||
33 | printf "find_length " f "\n" > "/dev/stderr" | ||
34 | |||
35 | if (f ~ /0x1/) | ||
36 | return 1 | ||
37 | else if (f ~ /0x3/) | ||
38 | return 2 | ||
39 | else if (f ~ /0x7/) | ||
40 | return 3 | ||
41 | else if (f ~ /0xf/) | ||
42 | return 4 | ||
43 | |||
44 | printf "unknown legnth " f "\n" > "/dev/stderr" | ||
45 | exit | ||
46 | } | ||
47 | |||
48 | function find_shift(s) | ||
49 | { | ||
50 | id = index(s, "<") | ||
51 | if (id <= 0) { | ||
52 | printf "cannot find shift " s "\n" > "/dev/stderr" | ||
53 | exit | ||
54 | } | ||
55 | |||
56 | return substr(s, id+2) | ||
57 | } | ||
58 | |||
59 | |||
60 | BEGIN { | ||
61 | if (ARGC < 2) { | ||
62 | print "too few arguments" > "/dev/stderr" | ||
63 | exit | ||
64 | } | ||
65 | |||
66 | # read the header file and find the mask values that we will need | ||
67 | # to replace and create an associative array of values | ||
68 | |||
69 | while (getline line < ARGV[1] > 0) { | ||
70 | if (line ~ /\#define.*_MASK/ && | ||
71 | !(line ~ /S5PC100_EPLL_MASK/) && | ||
72 | !(line ~ /USB_SIG_MASK/)) { | ||
73 | splitdefine(line, fields) | ||
74 | name = fields[0] | ||
75 | if (0) | ||
76 | printf "MASK " line "\n" > "/dev/stderr" | ||
77 | dmask[name,0] = find_length(fields[1]) | ||
78 | dmask[name,1] = find_shift(fields[1]) | ||
79 | if (0) | ||
80 | printf "=> '" name "' LENGTH=" dmask[name,0] " SHIFT=" dmask[name,1] "\n" > "/dev/stderr" | ||
81 | } else { | ||
82 | } | ||
83 | } | ||
84 | |||
85 | delete ARGV[1] | ||
86 | } | ||
87 | |||
88 | /clksrc_clk.*=.*{/ { | ||
89 | shift="" | ||
90 | mask="" | ||
91 | divshift="" | ||
92 | reg_div="" | ||
93 | reg_src="" | ||
94 | indent=1 | ||
95 | |||
96 | print $0 | ||
97 | |||
98 | for(; indent >= 1;) { | ||
99 | if ((getline line) <= 0) { | ||
100 | printf "unexpected end of file" > "/dev/stderr" | ||
101 | exit 1; | ||
102 | } | ||
103 | |||
104 | if (line ~ /\.shift/) { | ||
105 | shift = extract_value(line) | ||
106 | } else if (line ~ /\.mask/) { | ||
107 | mask = extract_value(line) | ||
108 | } else if (line ~ /\.reg_divider/) { | ||
109 | reg_div = extract_value(line) | ||
110 | } else if (line ~ /\.reg_source/) { | ||
111 | reg_src = extract_value(line) | ||
112 | } else if (line ~ /\.divider_shift/) { | ||
113 | divshift = extract_value(line) | ||
114 | } else if (line ~ /{/) { | ||
115 | indent++ | ||
116 | print line | ||
117 | } else if (line ~ /}/) { | ||
118 | indent-- | ||
119 | |||
120 | if (indent == 0) { | ||
121 | if (0) { | ||
122 | printf "shift '" shift "' ='" dmask[shift,0] "'\n" > "/dev/stderr" | ||
123 | printf "mask '" mask "'\n" > "/dev/stderr" | ||
124 | printf "dshft '" divshift "'\n" > "/dev/stderr" | ||
125 | printf "rdiv '" reg_div "'\n" > "/dev/stderr" | ||
126 | printf "rsrc '" reg_src "'\n" > "/dev/stderr" | ||
127 | } | ||
128 | |||
129 | generated = mask | ||
130 | sub(reg_src, reg_div, generated) | ||
131 | |||
132 | if (0) { | ||
133 | printf "/* rsrc " reg_src " */\n" | ||
134 | printf "/* rdiv " reg_div " */\n" | ||
135 | printf "/* shift " shift " */\n" | ||
136 | printf "/* mask " mask " */\n" | ||
137 | printf "/* generated " generated " */\n" | ||
138 | } | ||
139 | |||
140 | if (reg_div != "") { | ||
141 | printf "\t.reg_div = { " | ||
142 | printf ".reg = " reg_div ", " | ||
143 | printf ".shift = " dmask[generated,1] ", " | ||
144 | printf ".size = " dmask[generated,0] ", " | ||
145 | printf "},\n" | ||
146 | } | ||
147 | |||
148 | printf "\t.reg_src = { " | ||
149 | printf ".reg = " reg_src ", " | ||
150 | printf ".shift = " dmask[mask,1] ", " | ||
151 | printf ".size = " dmask[mask,0] ", " | ||
152 | |||
153 | printf "},\n" | ||
154 | |||
155 | } | ||
156 | |||
157 | print line | ||
158 | } else { | ||
159 | print line | ||
160 | } | ||
161 | |||
162 | if (0) | ||
163 | printf indent ":" line "\n" > "/dev/stderr" | ||
164 | } | ||
165 | } | ||
166 | |||
167 | // && ! /clksrc_clk.*=.*{/ { print $0 } | ||
diff --git a/Documentation/cgroups/cgroup_event_listener.c b/Documentation/cgroups/cgroup_event_listener.c new file mode 100644 index 000000000000..8c2bfc4a6358 --- /dev/null +++ b/Documentation/cgroups/cgroup_event_listener.c | |||
@@ -0,0 +1,110 @@ | |||
1 | /* | ||
2 | * cgroup_event_listener.c - Simple listener of cgroup events | ||
3 | * | ||
4 | * Copyright (C) Kirill A. Shutemov <kirill@shutemov.name> | ||
5 | */ | ||
6 | |||
7 | #include <assert.h> | ||
8 | #include <errno.h> | ||
9 | #include <fcntl.h> | ||
10 | #include <libgen.h> | ||
11 | #include <limits.h> | ||
12 | #include <stdio.h> | ||
13 | #include <string.h> | ||
14 | #include <unistd.h> | ||
15 | |||
16 | #include <sys/eventfd.h> | ||
17 | |||
18 | #define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>\n" | ||
19 | |||
20 | int main(int argc, char **argv) | ||
21 | { | ||
22 | int efd = -1; | ||
23 | int cfd = -1; | ||
24 | int event_control = -1; | ||
25 | char event_control_path[PATH_MAX]; | ||
26 | char line[LINE_MAX]; | ||
27 | int ret; | ||
28 | |||
29 | if (argc != 3) { | ||
30 | fputs(USAGE_STR, stderr); | ||
31 | return 1; | ||
32 | } | ||
33 | |||
34 | cfd = open(argv[1], O_RDONLY); | ||
35 | if (cfd == -1) { | ||
36 | fprintf(stderr, "Cannot open %s: %s\n", argv[1], | ||
37 | strerror(errno)); | ||
38 | goto out; | ||
39 | } | ||
40 | |||
41 | ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control", | ||
42 | dirname(argv[1])); | ||
43 | if (ret >= PATH_MAX) { | ||
44 | fputs("Path to cgroup.event_control is too long\n", stderr); | ||
45 | goto out; | ||
46 | } | ||
47 | |||
48 | event_control = open(event_control_path, O_WRONLY); | ||
49 | if (event_control == -1) { | ||
50 | fprintf(stderr, "Cannot open %s: %s\n", event_control_path, | ||
51 | strerror(errno)); | ||
52 | goto out; | ||
53 | } | ||
54 | |||
55 | efd = eventfd(0, 0); | ||
56 | if (efd == -1) { | ||
57 | perror("eventfd() failed"); | ||
58 | goto out; | ||
59 | } | ||
60 | |||
61 | ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]); | ||
62 | if (ret >= LINE_MAX) { | ||
63 | fputs("Arguments string is too long\n", stderr); | ||
64 | goto out; | ||
65 | } | ||
66 | |||
67 | ret = write(event_control, line, strlen(line) + 1); | ||
68 | if (ret == -1) { | ||
69 | perror("Cannot write to cgroup.event_control"); | ||
70 | goto out; | ||
71 | } | ||
72 | |||
73 | while (1) { | ||
74 | uint64_t result; | ||
75 | |||
76 | ret = read(efd, &result, sizeof(result)); | ||
77 | if (ret == -1) { | ||
78 | if (errno == EINTR) | ||
79 | continue; | ||
80 | perror("Cannot read from eventfd"); | ||
81 | break; | ||
82 | } | ||
83 | assert(ret == sizeof(result)); | ||
84 | |||
85 | ret = access(event_control_path, W_OK); | ||
86 | if ((ret == -1) && (errno == ENOENT)) { | ||
87 | puts("The cgroup seems to have removed."); | ||
88 | ret = 0; | ||
89 | break; | ||
90 | } | ||
91 | |||
92 | if (ret == -1) { | ||
93 | perror("cgroup.event_control " | ||
94 | "is not accessable any more"); | ||
95 | break; | ||
96 | } | ||
97 | |||
98 | printf("%s %s: crossed\n", argv[1], argv[2]); | ||
99 | } | ||
100 | |||
101 | out: | ||
102 | if (efd >= 0) | ||
103 | close(efd); | ||
104 | if (event_control >= 0) | ||
105 | close(event_control); | ||
106 | if (cfd >= 0) | ||
107 | close(cfd); | ||
108 | |||
109 | return (ret != 0); | ||
110 | } | ||
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 0b33bfe7dde9..fd588ff0e296 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -22,6 +22,8 @@ CONTENTS: | |||
22 | 2. Usage Examples and Syntax | 22 | 2. Usage Examples and Syntax |
23 | 2.1 Basic Usage | 23 | 2.1 Basic Usage |
24 | 2.2 Attaching processes | 24 | 2.2 Attaching processes |
25 | 2.3 Mounting hierarchies by name | ||
26 | 2.4 Notification API | ||
25 | 3. Kernel API | 27 | 3. Kernel API |
26 | 3.1 Overview | 28 | 3.1 Overview |
27 | 3.2 Synchronization | 29 | 3.2 Synchronization |
@@ -434,6 +436,25 @@ you give a subsystem a name. | |||
434 | The name of the subsystem appears as part of the hierarchy description | 436 | The name of the subsystem appears as part of the hierarchy description |
435 | in /proc/mounts and /proc/<pid>/cgroups. | 437 | in /proc/mounts and /proc/<pid>/cgroups. |
436 | 438 | ||
439 | 2.4 Notification API | ||
440 | -------------------- | ||
441 | |||
442 | There is mechanism which allows to get notifications about changing | ||
443 | status of a cgroup. | ||
444 | |||
445 | To register new notification handler you need: | ||
446 | - create a file descriptor for event notification using eventfd(2); | ||
447 | - open a control file to be monitored (e.g. memory.usage_in_bytes); | ||
448 | - write "<event_fd> <control_fd> <args>" to cgroup.event_control. | ||
449 | Interpretation of args is defined by control file implementation; | ||
450 | |||
451 | eventfd will be woken up by control file implementation or when the | ||
452 | cgroup is removed. | ||
453 | |||
454 | To unregister notification handler just close eventfd. | ||
455 | |||
456 | NOTE: Support of notifications should be implemented for the control | ||
457 | file. See documentation for the subsystem. | ||
437 | 458 | ||
438 | 3. Kernel API | 459 | 3. Kernel API |
439 | ============= | 460 | ============= |
@@ -488,6 +509,11 @@ Each subsystem should: | |||
488 | - add an entry in linux/cgroup_subsys.h | 509 | - add an entry in linux/cgroup_subsys.h |
489 | - define a cgroup_subsys object called <name>_subsys | 510 | - define a cgroup_subsys object called <name>_subsys |
490 | 511 | ||
512 | If a subsystem can be compiled as a module, it should also have in its | ||
513 | module initcall a call to cgroup_load_subsys(), and in its exitcall a | ||
514 | call to cgroup_unload_subsys(). It should also set its_subsys.module = | ||
515 | THIS_MODULE in its .c file. | ||
516 | |||
491 | Each subsystem may export the following methods. The only mandatory | 517 | Each subsystem may export the following methods. The only mandatory |
492 | methods are create/destroy. Any others that are null are presumed to | 518 | methods are create/destroy. Any others that are null are presumed to |
493 | be successful no-ops. | 519 | be successful no-ops. |
@@ -536,10 +562,21 @@ returns an error, this will abort the attach operation. If a NULL | |||
536 | task is passed, then a successful result indicates that *any* | 562 | task is passed, then a successful result indicates that *any* |
537 | unspecified task can be moved into the cgroup. Note that this isn't | 563 | unspecified task can be moved into the cgroup. Note that this isn't |
538 | called on a fork. If this method returns 0 (success) then this should | 564 | called on a fork. If this method returns 0 (success) then this should |
539 | remain valid while the caller holds cgroup_mutex. If threadgroup is | 565 | remain valid while the caller holds cgroup_mutex and it is ensured that either |
566 | attach() or cancel_attach() will be called in future. If threadgroup is | ||
540 | true, then a successful result indicates that all threads in the given | 567 | true, then a successful result indicates that all threads in the given |
541 | thread's threadgroup can be moved together. | 568 | thread's threadgroup can be moved together. |
542 | 569 | ||
570 | void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | ||
571 | struct task_struct *task, bool threadgroup) | ||
572 | (cgroup_mutex held by caller) | ||
573 | |||
574 | Called when a task attach operation has failed after can_attach() has succeeded. | ||
575 | A subsystem whose can_attach() has some side-effects should provide this | ||
576 | function, so that the subsytem can implement a rollback. If not, not necessary. | ||
577 | This will be called only about subsystems whose can_attach() operation have | ||
578 | succeeded. | ||
579 | |||
543 | void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 580 | void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
544 | struct cgroup *old_cgrp, struct task_struct *task, | 581 | struct cgroup *old_cgrp, struct task_struct *task, |
545 | bool threadgroup) | 582 | bool threadgroup) |
diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt index 1d7e9784439a..4160df82b3f5 100644 --- a/Documentation/cgroups/cpusets.txt +++ b/Documentation/cgroups/cpusets.txt | |||
@@ -168,20 +168,20 @@ Each cpuset is represented by a directory in the cgroup file system | |||
168 | containing (on top of the standard cgroup files) the following | 168 | containing (on top of the standard cgroup files) the following |
169 | files describing that cpuset: | 169 | files describing that cpuset: |
170 | 170 | ||
171 | - cpus: list of CPUs in that cpuset | 171 | - cpuset.cpus: list of CPUs in that cpuset |
172 | - mems: list of Memory Nodes in that cpuset | 172 | - cpuset.mems: list of Memory Nodes in that cpuset |
173 | - memory_migrate flag: if set, move pages to cpusets nodes | 173 | - cpuset.memory_migrate flag: if set, move pages to cpusets nodes |
174 | - cpu_exclusive flag: is cpu placement exclusive? | 174 | - cpuset.cpu_exclusive flag: is cpu placement exclusive? |
175 | - mem_exclusive flag: is memory placement exclusive? | 175 | - cpuset.mem_exclusive flag: is memory placement exclusive? |
176 | - mem_hardwall flag: is memory allocation hardwalled | 176 | - cpuset.mem_hardwall flag: is memory allocation hardwalled |
177 | - memory_pressure: measure of how much paging pressure in cpuset | 177 | - cpuset.memory_pressure: measure of how much paging pressure in cpuset |
178 | - memory_spread_page flag: if set, spread page cache evenly on allowed nodes | 178 | - cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes |
179 | - memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes | 179 | - cpuset.memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes |
180 | - sched_load_balance flag: if set, load balance within CPUs on that cpuset | 180 | - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset |
181 | - sched_relax_domain_level: the searching range when migrating tasks | 181 | - cpuset.sched_relax_domain_level: the searching range when migrating tasks |
182 | 182 | ||
183 | In addition, the root cpuset only has the following file: | 183 | In addition, the root cpuset only has the following file: |
184 | - memory_pressure_enabled flag: compute memory_pressure? | 184 | - cpuset.memory_pressure_enabled flag: compute memory_pressure? |
185 | 185 | ||
186 | New cpusets are created using the mkdir system call or shell | 186 | New cpusets are created using the mkdir system call or shell |
187 | command. The properties of a cpuset, such as its flags, allowed | 187 | command. The properties of a cpuset, such as its flags, allowed |
@@ -229,7 +229,7 @@ If a cpuset is cpu or mem exclusive, no other cpuset, other than | |||
229 | a direct ancestor or descendant, may share any of the same CPUs or | 229 | a direct ancestor or descendant, may share any of the same CPUs or |
230 | Memory Nodes. | 230 | Memory Nodes. |
231 | 231 | ||
232 | A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled", | 232 | A cpuset that is cpuset.mem_exclusive *or* cpuset.mem_hardwall is "hardwalled", |
233 | i.e. it restricts kernel allocations for page, buffer and other data | 233 | i.e. it restricts kernel allocations for page, buffer and other data |
234 | commonly shared by the kernel across multiple users. All cpusets, | 234 | commonly shared by the kernel across multiple users. All cpusets, |
235 | whether hardwalled or not, restrict allocations of memory for user | 235 | whether hardwalled or not, restrict allocations of memory for user |
@@ -304,15 +304,15 @@ times 1000. | |||
304 | --------------------------- | 304 | --------------------------- |
305 | There are two boolean flag files per cpuset that control where the | 305 | There are two boolean flag files per cpuset that control where the |
306 | kernel allocates pages for the file system buffers and related in | 306 | kernel allocates pages for the file system buffers and related in |
307 | kernel data structures. They are called 'memory_spread_page' and | 307 | kernel data structures. They are called 'cpuset.memory_spread_page' and |
308 | 'memory_spread_slab'. | 308 | 'cpuset.memory_spread_slab'. |
309 | 309 | ||
310 | If the per-cpuset boolean flag file 'memory_spread_page' is set, then | 310 | If the per-cpuset boolean flag file 'cpuset.memory_spread_page' is set, then |
311 | the kernel will spread the file system buffers (page cache) evenly | 311 | the kernel will spread the file system buffers (page cache) evenly |
312 | over all the nodes that the faulting task is allowed to use, instead | 312 | over all the nodes that the faulting task is allowed to use, instead |
313 | of preferring to put those pages on the node where the task is running. | 313 | of preferring to put those pages on the node where the task is running. |
314 | 314 | ||
315 | If the per-cpuset boolean flag file 'memory_spread_slab' is set, | 315 | If the per-cpuset boolean flag file 'cpuset.memory_spread_slab' is set, |
316 | then the kernel will spread some file system related slab caches, | 316 | then the kernel will spread some file system related slab caches, |
317 | such as for inodes and dentries evenly over all the nodes that the | 317 | such as for inodes and dentries evenly over all the nodes that the |
318 | faulting task is allowed to use, instead of preferring to put those | 318 | faulting task is allowed to use, instead of preferring to put those |
@@ -337,21 +337,21 @@ their containing tasks memory spread settings. If memory spreading | |||
337 | is turned off, then the currently specified NUMA mempolicy once again | 337 | is turned off, then the currently specified NUMA mempolicy once again |
338 | applies to memory page allocations. | 338 | applies to memory page allocations. |
339 | 339 | ||
340 | Both 'memory_spread_page' and 'memory_spread_slab' are boolean flag | 340 | Both 'cpuset.memory_spread_page' and 'cpuset.memory_spread_slab' are boolean flag |
341 | files. By default they contain "0", meaning that the feature is off | 341 | files. By default they contain "0", meaning that the feature is off |
342 | for that cpuset. If a "1" is written to that file, then that turns | 342 | for that cpuset. If a "1" is written to that file, then that turns |
343 | the named feature on. | 343 | the named feature on. |
344 | 344 | ||
345 | The implementation is simple. | 345 | The implementation is simple. |
346 | 346 | ||
347 | Setting the flag 'memory_spread_page' turns on a per-process flag | 347 | Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag |
348 | PF_SPREAD_PAGE for each task that is in that cpuset or subsequently | 348 | PF_SPREAD_PAGE for each task that is in that cpuset or subsequently |
349 | joins that cpuset. The page allocation calls for the page cache | 349 | joins that cpuset. The page allocation calls for the page cache |
350 | is modified to perform an inline check for this PF_SPREAD_PAGE task | 350 | is modified to perform an inline check for this PF_SPREAD_PAGE task |
351 | flag, and if set, a call to a new routine cpuset_mem_spread_node() | 351 | flag, and if set, a call to a new routine cpuset_mem_spread_node() |
352 | returns the node to prefer for the allocation. | 352 | returns the node to prefer for the allocation. |
353 | 353 | ||
354 | Similarly, setting 'memory_spread_slab' turns on the flag | 354 | Similarly, setting 'cpuset.memory_spread_slab' turns on the flag |
355 | PF_SPREAD_SLAB, and appropriately marked slab caches will allocate | 355 | PF_SPREAD_SLAB, and appropriately marked slab caches will allocate |
356 | pages from the node returned by cpuset_mem_spread_node(). | 356 | pages from the node returned by cpuset_mem_spread_node(). |
357 | 357 | ||
@@ -404,24 +404,24 @@ the following two situations: | |||
404 | system overhead on those CPUs, including avoiding task load | 404 | system overhead on those CPUs, including avoiding task load |
405 | balancing if that is not needed. | 405 | balancing if that is not needed. |
406 | 406 | ||
407 | When the per-cpuset flag "sched_load_balance" is enabled (the default | 407 | When the per-cpuset flag "cpuset.sched_load_balance" is enabled (the default |
408 | setting), it requests that all the CPUs in that cpusets allowed 'cpus' | 408 | setting), it requests that all the CPUs in that cpusets allowed 'cpuset.cpus' |
409 | be contained in a single sched domain, ensuring that load balancing | 409 | be contained in a single sched domain, ensuring that load balancing |
410 | can move a task (not otherwised pinned, as by sched_setaffinity) | 410 | can move a task (not otherwised pinned, as by sched_setaffinity) |
411 | from any CPU in that cpuset to any other. | 411 | from any CPU in that cpuset to any other. |
412 | 412 | ||
413 | When the per-cpuset flag "sched_load_balance" is disabled, then the | 413 | When the per-cpuset flag "cpuset.sched_load_balance" is disabled, then the |
414 | scheduler will avoid load balancing across the CPUs in that cpuset, | 414 | scheduler will avoid load balancing across the CPUs in that cpuset, |
415 | --except-- in so far as is necessary because some overlapping cpuset | 415 | --except-- in so far as is necessary because some overlapping cpuset |
416 | has "sched_load_balance" enabled. | 416 | has "sched_load_balance" enabled. |
417 | 417 | ||
418 | So, for example, if the top cpuset has the flag "sched_load_balance" | 418 | So, for example, if the top cpuset has the flag "cpuset.sched_load_balance" |
419 | enabled, then the scheduler will have one sched domain covering all | 419 | enabled, then the scheduler will have one sched domain covering all |
420 | CPUs, and the setting of the "sched_load_balance" flag in any other | 420 | CPUs, and the setting of the "cpuset.sched_load_balance" flag in any other |
421 | cpusets won't matter, as we're already fully load balancing. | 421 | cpusets won't matter, as we're already fully load balancing. |
422 | 422 | ||
423 | Therefore in the above two situations, the top cpuset flag | 423 | Therefore in the above two situations, the top cpuset flag |
424 | "sched_load_balance" should be disabled, and only some of the smaller, | 424 | "cpuset.sched_load_balance" should be disabled, and only some of the smaller, |
425 | child cpusets have this flag enabled. | 425 | child cpusets have this flag enabled. |
426 | 426 | ||
427 | When doing this, you don't usually want to leave any unpinned tasks in | 427 | When doing this, you don't usually want to leave any unpinned tasks in |
@@ -433,7 +433,7 @@ scheduler might not consider the possibility of load balancing that | |||
433 | task to that underused CPU. | 433 | task to that underused CPU. |
434 | 434 | ||
435 | Of course, tasks pinned to a particular CPU can be left in a cpuset | 435 | Of course, tasks pinned to a particular CPU can be left in a cpuset |
436 | that disables "sched_load_balance" as those tasks aren't going anywhere | 436 | that disables "cpuset.sched_load_balance" as those tasks aren't going anywhere |
437 | else anyway. | 437 | else anyway. |
438 | 438 | ||
439 | There is an impedance mismatch here, between cpusets and sched domains. | 439 | There is an impedance mismatch here, between cpusets and sched domains. |
@@ -443,19 +443,19 @@ overlap and each CPU is in at most one sched domain. | |||
443 | It is necessary for sched domains to be flat because load balancing | 443 | It is necessary for sched domains to be flat because load balancing |
444 | across partially overlapping sets of CPUs would risk unstable dynamics | 444 | across partially overlapping sets of CPUs would risk unstable dynamics |
445 | that would be beyond our understanding. So if each of two partially | 445 | that would be beyond our understanding. So if each of two partially |
446 | overlapping cpusets enables the flag 'sched_load_balance', then we | 446 | overlapping cpusets enables the flag 'cpuset.sched_load_balance', then we |
447 | form a single sched domain that is a superset of both. We won't move | 447 | form a single sched domain that is a superset of both. We won't move |
448 | a task to a CPU outside it cpuset, but the scheduler load balancing | 448 | a task to a CPU outside it cpuset, but the scheduler load balancing |
449 | code might waste some compute cycles considering that possibility. | 449 | code might waste some compute cycles considering that possibility. |
450 | 450 | ||
451 | This mismatch is why there is not a simple one-to-one relation | 451 | This mismatch is why there is not a simple one-to-one relation |
452 | between which cpusets have the flag "sched_load_balance" enabled, | 452 | between which cpusets have the flag "cpuset.sched_load_balance" enabled, |
453 | and the sched domain configuration. If a cpuset enables the flag, it | 453 | and the sched domain configuration. If a cpuset enables the flag, it |
454 | will get balancing across all its CPUs, but if it disables the flag, | 454 | will get balancing across all its CPUs, but if it disables the flag, |
455 | it will only be assured of no load balancing if no other overlapping | 455 | it will only be assured of no load balancing if no other overlapping |
456 | cpuset enables the flag. | 456 | cpuset enables the flag. |
457 | 457 | ||
458 | If two cpusets have partially overlapping 'cpus' allowed, and only | 458 | If two cpusets have partially overlapping 'cpuset.cpus' allowed, and only |
459 | one of them has this flag enabled, then the other may find its | 459 | one of them has this flag enabled, then the other may find its |
460 | tasks only partially load balanced, just on the overlapping CPUs. | 460 | tasks only partially load balanced, just on the overlapping CPUs. |
461 | This is just the general case of the top_cpuset example given a few | 461 | This is just the general case of the top_cpuset example given a few |
@@ -468,23 +468,23 @@ load balancing to the other CPUs. | |||
468 | 1.7.1 sched_load_balance implementation details. | 468 | 1.7.1 sched_load_balance implementation details. |
469 | ------------------------------------------------ | 469 | ------------------------------------------------ |
470 | 470 | ||
471 | The per-cpuset flag 'sched_load_balance' defaults to enabled (contrary | 471 | The per-cpuset flag 'cpuset.sched_load_balance' defaults to enabled (contrary |
472 | to most cpuset flags.) When enabled for a cpuset, the kernel will | 472 | to most cpuset flags.) When enabled for a cpuset, the kernel will |
473 | ensure that it can load balance across all the CPUs in that cpuset | 473 | ensure that it can load balance across all the CPUs in that cpuset |
474 | (makes sure that all the CPUs in the cpus_allowed of that cpuset are | 474 | (makes sure that all the CPUs in the cpus_allowed of that cpuset are |
475 | in the same sched domain.) | 475 | in the same sched domain.) |
476 | 476 | ||
477 | If two overlapping cpusets both have 'sched_load_balance' enabled, | 477 | If two overlapping cpusets both have 'cpuset.sched_load_balance' enabled, |
478 | then they will be (must be) both in the same sched domain. | 478 | then they will be (must be) both in the same sched domain. |
479 | 479 | ||
480 | If, as is the default, the top cpuset has 'sched_load_balance' enabled, | 480 | If, as is the default, the top cpuset has 'cpuset.sched_load_balance' enabled, |
481 | then by the above that means there is a single sched domain covering | 481 | then by the above that means there is a single sched domain covering |
482 | the whole system, regardless of any other cpuset settings. | 482 | the whole system, regardless of any other cpuset settings. |
483 | 483 | ||
484 | The kernel commits to user space that it will avoid load balancing | 484 | The kernel commits to user space that it will avoid load balancing |
485 | where it can. It will pick as fine a granularity partition of sched | 485 | where it can. It will pick as fine a granularity partition of sched |
486 | domains as it can while still providing load balancing for any set | 486 | domains as it can while still providing load balancing for any set |
487 | of CPUs allowed to a cpuset having 'sched_load_balance' enabled. | 487 | of CPUs allowed to a cpuset having 'cpuset.sched_load_balance' enabled. |
488 | 488 | ||
489 | The internal kernel cpuset to scheduler interface passes from the | 489 | The internal kernel cpuset to scheduler interface passes from the |
490 | cpuset code to the scheduler code a partition of the load balanced | 490 | cpuset code to the scheduler code a partition of the load balanced |
@@ -495,9 +495,9 @@ all the CPUs that must be load balanced. | |||
495 | The cpuset code builds a new such partition and passes it to the | 495 | The cpuset code builds a new such partition and passes it to the |
496 | scheduler sched domain setup code, to have the sched domains rebuilt | 496 | scheduler sched domain setup code, to have the sched domains rebuilt |
497 | as necessary, whenever: | 497 | as necessary, whenever: |
498 | - the 'sched_load_balance' flag of a cpuset with non-empty CPUs changes, | 498 | - the 'cpuset.sched_load_balance' flag of a cpuset with non-empty CPUs changes, |
499 | - or CPUs come or go from a cpuset with this flag enabled, | 499 | - or CPUs come or go from a cpuset with this flag enabled, |
500 | - or 'sched_relax_domain_level' value of a cpuset with non-empty CPUs | 500 | - or 'cpuset.sched_relax_domain_level' value of a cpuset with non-empty CPUs |
501 | and with this flag enabled changes, | 501 | and with this flag enabled changes, |
502 | - or a cpuset with non-empty CPUs and with this flag enabled is removed, | 502 | - or a cpuset with non-empty CPUs and with this flag enabled is removed, |
503 | - or a cpu is offlined/onlined. | 503 | - or a cpu is offlined/onlined. |
@@ -542,7 +542,7 @@ As the result, task B on CPU X need to wait task A or wait load balance | |||
542 | on the next tick. For some applications in special situation, waiting | 542 | on the next tick. For some applications in special situation, waiting |
543 | 1 tick may be too long. | 543 | 1 tick may be too long. |
544 | 544 | ||
545 | The 'sched_relax_domain_level' file allows you to request changing | 545 | The 'cpuset.sched_relax_domain_level' file allows you to request changing |
546 | this searching range as you like. This file takes int value which | 546 | this searching range as you like. This file takes int value which |
547 | indicates size of searching range in levels ideally as follows, | 547 | indicates size of searching range in levels ideally as follows, |
548 | otherwise initial value -1 that indicates the cpuset has no request. | 548 | otherwise initial value -1 that indicates the cpuset has no request. |
@@ -559,8 +559,8 @@ The system default is architecture dependent. The system default | |||
559 | can be changed using the relax_domain_level= boot parameter. | 559 | can be changed using the relax_domain_level= boot parameter. |
560 | 560 | ||
561 | This file is per-cpuset and affect the sched domain where the cpuset | 561 | This file is per-cpuset and affect the sched domain where the cpuset |
562 | belongs to. Therefore if the flag 'sched_load_balance' of a cpuset | 562 | belongs to. Therefore if the flag 'cpuset.sched_load_balance' of a cpuset |
563 | is disabled, then 'sched_relax_domain_level' have no effect since | 563 | is disabled, then 'cpuset.sched_relax_domain_level' have no effect since |
564 | there is no sched domain belonging the cpuset. | 564 | there is no sched domain belonging the cpuset. |
565 | 565 | ||
566 | If multiple cpusets are overlapping and hence they form a single sched | 566 | If multiple cpusets are overlapping and hence they form a single sched |
@@ -607,9 +607,9 @@ from one cpuset to another, then the kernel will adjust the tasks | |||
607 | memory placement, as above, the next time that the kernel attempts | 607 | memory placement, as above, the next time that the kernel attempts |
608 | to allocate a page of memory for that task. | 608 | to allocate a page of memory for that task. |
609 | 609 | ||
610 | If a cpuset has its 'cpus' modified, then each task in that cpuset | 610 | If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset |
611 | will have its allowed CPU placement changed immediately. Similarly, | 611 | will have its allowed CPU placement changed immediately. Similarly, |
612 | if a tasks pid is written to another cpusets 'tasks' file, then its | 612 | if a tasks pid is written to another cpusets 'cpuset.tasks' file, then its |
613 | allowed CPU placement is changed immediately. If such a task had been | 613 | allowed CPU placement is changed immediately. If such a task had been |
614 | bound to some subset of its cpuset using the sched_setaffinity() call, | 614 | bound to some subset of its cpuset using the sched_setaffinity() call, |
615 | the task will be allowed to run on any CPU allowed in its new cpuset, | 615 | the task will be allowed to run on any CPU allowed in its new cpuset, |
@@ -622,8 +622,8 @@ and the processor placement is updated immediately. | |||
622 | Normally, once a page is allocated (given a physical page | 622 | Normally, once a page is allocated (given a physical page |
623 | of main memory) then that page stays on whatever node it | 623 | of main memory) then that page stays on whatever node it |
624 | was allocated, so long as it remains allocated, even if the | 624 | was allocated, so long as it remains allocated, even if the |
625 | cpusets memory placement policy 'mems' subsequently changes. | 625 | cpusets memory placement policy 'cpuset.mems' subsequently changes. |
626 | If the cpuset flag file 'memory_migrate' is set true, then when | 626 | If the cpuset flag file 'cpuset.memory_migrate' is set true, then when |
627 | tasks are attached to that cpuset, any pages that task had | 627 | tasks are attached to that cpuset, any pages that task had |
628 | allocated to it on nodes in its previous cpuset are migrated | 628 | allocated to it on nodes in its previous cpuset are migrated |
629 | to the tasks new cpuset. The relative placement of the page within | 629 | to the tasks new cpuset. The relative placement of the page within |
@@ -631,12 +631,12 @@ the cpuset is preserved during these migration operations if possible. | |||
631 | For example if the page was on the second valid node of the prior cpuset | 631 | For example if the page was on the second valid node of the prior cpuset |
632 | then the page will be placed on the second valid node of the new cpuset. | 632 | then the page will be placed on the second valid node of the new cpuset. |
633 | 633 | ||
634 | Also if 'memory_migrate' is set true, then if that cpusets | 634 | Also if 'cpuset.memory_migrate' is set true, then if that cpusets |
635 | 'mems' file is modified, pages allocated to tasks in that | 635 | 'cpuset.mems' file is modified, pages allocated to tasks in that |
636 | cpuset, that were on nodes in the previous setting of 'mems', | 636 | cpuset, that were on nodes in the previous setting of 'cpuset.mems', |
637 | will be moved to nodes in the new setting of 'mems.' | 637 | will be moved to nodes in the new setting of 'mems.' |
638 | Pages that were not in the tasks prior cpuset, or in the cpusets | 638 | Pages that were not in the tasks prior cpuset, or in the cpusets |
639 | prior 'mems' setting, will not be moved. | 639 | prior 'cpuset.mems' setting, will not be moved. |
640 | 640 | ||
641 | There is an exception to the above. If hotplug functionality is used | 641 | There is an exception to the above. If hotplug functionality is used |
642 | to remove all the CPUs that are currently assigned to a cpuset, | 642 | to remove all the CPUs that are currently assigned to a cpuset, |
@@ -678,8 +678,8 @@ and then start a subshell 'sh' in that cpuset: | |||
678 | cd /dev/cpuset | 678 | cd /dev/cpuset |
679 | mkdir Charlie | 679 | mkdir Charlie |
680 | cd Charlie | 680 | cd Charlie |
681 | /bin/echo 2-3 > cpus | 681 | /bin/echo 2-3 > cpuset.cpus |
682 | /bin/echo 1 > mems | 682 | /bin/echo 1 > cpuset.mems |
683 | /bin/echo $$ > tasks | 683 | /bin/echo $$ > tasks |
684 | sh | 684 | sh |
685 | # The subshell 'sh' is now running in cpuset Charlie | 685 | # The subshell 'sh' is now running in cpuset Charlie |
@@ -725,10 +725,13 @@ Now you want to do something with this cpuset. | |||
725 | 725 | ||
726 | In this directory you can find several files: | 726 | In this directory you can find several files: |
727 | # ls | 727 | # ls |
728 | cpu_exclusive memory_migrate mems tasks | 728 | cpuset.cpu_exclusive cpuset.memory_spread_slab |
729 | cpus memory_pressure notify_on_release | 729 | cpuset.cpus cpuset.mems |
730 | mem_exclusive memory_spread_page sched_load_balance | 730 | cpuset.mem_exclusive cpuset.sched_load_balance |
731 | mem_hardwall memory_spread_slab sched_relax_domain_level | 731 | cpuset.mem_hardwall cpuset.sched_relax_domain_level |
732 | cpuset.memory_migrate notify_on_release | ||
733 | cpuset.memory_pressure tasks | ||
734 | cpuset.memory_spread_page | ||
732 | 735 | ||
733 | Reading them will give you information about the state of this cpuset: | 736 | Reading them will give you information about the state of this cpuset: |
734 | the CPUs and Memory Nodes it can use, the processes that are using | 737 | the CPUs and Memory Nodes it can use, the processes that are using |
@@ -736,13 +739,13 @@ it, its properties. By writing to these files you can manipulate | |||
736 | the cpuset. | 739 | the cpuset. |
737 | 740 | ||
738 | Set some flags: | 741 | Set some flags: |
739 | # /bin/echo 1 > cpu_exclusive | 742 | # /bin/echo 1 > cpuset.cpu_exclusive |
740 | 743 | ||
741 | Add some cpus: | 744 | Add some cpus: |
742 | # /bin/echo 0-7 > cpus | 745 | # /bin/echo 0-7 > cpuset.cpus |
743 | 746 | ||
744 | Add some mems: | 747 | Add some mems: |
745 | # /bin/echo 0-7 > mems | 748 | # /bin/echo 0-7 > cpuset.mems |
746 | 749 | ||
747 | Now attach your shell to this cpuset: | 750 | Now attach your shell to this cpuset: |
748 | # /bin/echo $$ > tasks | 751 | # /bin/echo $$ > tasks |
@@ -774,28 +777,28 @@ echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent | |||
774 | This is the syntax to use when writing in the cpus or mems files | 777 | This is the syntax to use when writing in the cpus or mems files |
775 | in cpuset directories: | 778 | in cpuset directories: |
776 | 779 | ||
777 | # /bin/echo 1-4 > cpus -> set cpus list to cpus 1,2,3,4 | 780 | # /bin/echo 1-4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4 |
778 | # /bin/echo 1,2,3,4 > cpus -> set cpus list to cpus 1,2,3,4 | 781 | # /bin/echo 1,2,3,4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4 |
779 | 782 | ||
780 | To add a CPU to a cpuset, write the new list of CPUs including the | 783 | To add a CPU to a cpuset, write the new list of CPUs including the |
781 | CPU to be added. To add 6 to the above cpuset: | 784 | CPU to be added. To add 6 to the above cpuset: |
782 | 785 | ||
783 | # /bin/echo 1-4,6 > cpus -> set cpus list to cpus 1,2,3,4,6 | 786 | # /bin/echo 1-4,6 > cpuset.cpus -> set cpus list to cpus 1,2,3,4,6 |
784 | 787 | ||
785 | Similarly to remove a CPU from a cpuset, write the new list of CPUs | 788 | Similarly to remove a CPU from a cpuset, write the new list of CPUs |
786 | without the CPU to be removed. | 789 | without the CPU to be removed. |
787 | 790 | ||
788 | To remove all the CPUs: | 791 | To remove all the CPUs: |
789 | 792 | ||
790 | # /bin/echo "" > cpus -> clear cpus list | 793 | # /bin/echo "" > cpuset.cpus -> clear cpus list |
791 | 794 | ||
792 | 2.3 Setting flags | 795 | 2.3 Setting flags |
793 | ----------------- | 796 | ----------------- |
794 | 797 | ||
795 | The syntax is very simple: | 798 | The syntax is very simple: |
796 | 799 | ||
797 | # /bin/echo 1 > cpu_exclusive -> set flag 'cpu_exclusive' | 800 | # /bin/echo 1 > cpuset.cpu_exclusive -> set flag 'cpuset.cpu_exclusive' |
798 | # /bin/echo 0 > cpu_exclusive -> unset flag 'cpu_exclusive' | 801 | # /bin/echo 0 > cpuset.cpu_exclusive -> unset flag 'cpuset.cpu_exclusive' |
799 | 802 | ||
800 | 2.4 Attaching processes | 803 | 2.4 Attaching processes |
801 | ----------------------- | 804 | ----------------------- |
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt index 72db89ed0609..f7f68b2ac199 100644 --- a/Documentation/cgroups/memcg_test.txt +++ b/Documentation/cgroups/memcg_test.txt | |||
@@ -1,6 +1,6 @@ | |||
1 | Memory Resource Controller(Memcg) Implementation Memo. | 1 | Memory Resource Controller(Memcg) Implementation Memo. |
2 | Last Updated: 2009/1/20 | 2 | Last Updated: 2010/2 |
3 | Base Kernel Version: based on 2.6.29-rc2. | 3 | Base Kernel Version: based on 2.6.33-rc7-mm(candidate for 34). |
4 | 4 | ||
5 | Because VM is getting complex (one of reasons is memcg...), memcg's behavior | 5 | Because VM is getting complex (one of reasons is memcg...), memcg's behavior |
6 | is complex. This is a document for memcg's internal behavior. | 6 | is complex. This is a document for memcg's internal behavior. |
@@ -337,7 +337,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. | |||
337 | race and lock dependency with other cgroup subsystems. | 337 | race and lock dependency with other cgroup subsystems. |
338 | 338 | ||
339 | example) | 339 | example) |
340 | # mount -t cgroup none /cgroup -t cpuset,memory,cpu,devices | 340 | # mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices |
341 | 341 | ||
342 | and do task move, mkdir, rmdir etc...under this. | 342 | and do task move, mkdir, rmdir etc...under this. |
343 | 343 | ||
@@ -348,7 +348,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. | |||
348 | 348 | ||
349 | For example, test like following is good. | 349 | For example, test like following is good. |
350 | (Shell-A) | 350 | (Shell-A) |
351 | # mount -t cgroup none /cgroup -t memory | 351 | # mount -t cgroup none /cgroup -o memory |
352 | # mkdir /cgroup/test | 352 | # mkdir /cgroup/test |
353 | # echo 40M > /cgroup/test/memory.limit_in_bytes | 353 | # echo 40M > /cgroup/test/memory.limit_in_bytes |
354 | # echo 0 > /cgroup/test/tasks | 354 | # echo 0 > /cgroup/test/tasks |
@@ -378,3 +378,42 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. | |||
378 | #echo 50M > memory.limit_in_bytes | 378 | #echo 50M > memory.limit_in_bytes |
379 | #echo 50M > memory.memsw.limit_in_bytes | 379 | #echo 50M > memory.memsw.limit_in_bytes |
380 | run 51M of malloc | 380 | run 51M of malloc |
381 | |||
382 | 9.9 Move charges at task migration | ||
383 | Charges associated with a task can be moved along with task migration. | ||
384 | |||
385 | (Shell-A) | ||
386 | #mkdir /cgroup/A | ||
387 | #echo $$ >/cgroup/A/tasks | ||
388 | run some programs which uses some amount of memory in /cgroup/A. | ||
389 | |||
390 | (Shell-B) | ||
391 | #mkdir /cgroup/B | ||
392 | #echo 1 >/cgroup/B/memory.move_charge_at_immigrate | ||
393 | #echo "pid of the program running in group A" >/cgroup/B/tasks | ||
394 | |||
395 | You can see charges have been moved by reading *.usage_in_bytes or | ||
396 | memory.stat of both A and B. | ||
397 | See 8.2 of Documentation/cgroups/memory.txt to see what value should be | ||
398 | written to move_charge_at_immigrate. | ||
399 | |||
400 | 9.10 Memory thresholds | ||
401 | Memory controler implements memory thresholds using cgroups notification | ||
402 | API. You can use Documentation/cgroups/cgroup_event_listener.c to test | ||
403 | it. | ||
404 | |||
405 | (Shell-A) Create cgroup and run event listener | ||
406 | # mkdir /cgroup/A | ||
407 | # ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M | ||
408 | |||
409 | (Shell-B) Add task to cgroup and try to allocate and free memory | ||
410 | # echo $$ >/cgroup/A/tasks | ||
411 | # a="$(dd if=/dev/zero bs=1M count=10)" | ||
412 | # a= | ||
413 | |||
414 | You will see message from cgroup_event_listener every time you cross | ||
415 | the thresholds. | ||
416 | |||
417 | Use /cgroup/A/memory.memsw.usage_in_bytes to test memsw thresholds. | ||
418 | |||
419 | It's good idea to test root cgroup as well. | ||
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index b871f2552b45..f8bc802d70b9 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt | |||
@@ -182,6 +182,8 @@ list. | |||
182 | NOTE: Reclaim does not work for the root cgroup, since we cannot set any | 182 | NOTE: Reclaim does not work for the root cgroup, since we cannot set any |
183 | limits on the root cgroup. | 183 | limits on the root cgroup. |
184 | 184 | ||
185 | Note2: When panic_on_oom is set to "2", the whole system will panic. | ||
186 | |||
185 | 2. Locking | 187 | 2. Locking |
186 | 188 | ||
187 | The memory controller uses the following hierarchy | 189 | The memory controller uses the following hierarchy |
@@ -262,10 +264,12 @@ some of the pages cached in the cgroup (page cache pages). | |||
262 | 4.2 Task migration | 264 | 4.2 Task migration |
263 | 265 | ||
264 | When a task migrates from one cgroup to another, it's charge is not | 266 | When a task migrates from one cgroup to another, it's charge is not |
265 | carried forward. The pages allocated from the original cgroup still | 267 | carried forward by default. The pages allocated from the original cgroup still |
266 | remain charged to it, the charge is dropped when the page is freed or | 268 | remain charged to it, the charge is dropped when the page is freed or |
267 | reclaimed. | 269 | reclaimed. |
268 | 270 | ||
271 | Note: You can move charges of a task along with task migration. See 8. | ||
272 | |||
269 | 4.3 Removing a cgroup | 273 | 4.3 Removing a cgroup |
270 | 274 | ||
271 | A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a | 275 | A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a |
@@ -377,7 +381,8 @@ The feature can be disabled by | |||
377 | NOTE1: Enabling/disabling will fail if the cgroup already has other | 381 | NOTE1: Enabling/disabling will fail if the cgroup already has other |
378 | cgroups created below it. | 382 | cgroups created below it. |
379 | 383 | ||
380 | NOTE2: This feature can be enabled/disabled per subtree. | 384 | NOTE2: When panic_on_oom is set to "2", the whole system will panic in |
385 | case of an oom event in any cgroup. | ||
381 | 386 | ||
382 | 7. Soft limits | 387 | 7. Soft limits |
383 | 388 | ||
@@ -414,7 +419,76 @@ NOTE1: Soft limits take effect over a long period of time, since they involve | |||
414 | NOTE2: It is recommended to set the soft limit always below the hard limit, | 419 | NOTE2: It is recommended to set the soft limit always below the hard limit, |
415 | otherwise the hard limit will take precedence. | 420 | otherwise the hard limit will take precedence. |
416 | 421 | ||
417 | 8. TODO | 422 | 8. Move charges at task migration |
423 | |||
424 | Users can move charges associated with a task along with task migration, that | ||
425 | is, uncharge task's pages from the old cgroup and charge them to the new cgroup. | ||
426 | This feature is not supported in !CONFIG_MMU environments because of lack of | ||
427 | page tables. | ||
428 | |||
429 | 8.1 Interface | ||
430 | |||
431 | This feature is disabled by default. It can be enabled(and disabled again) by | ||
432 | writing to memory.move_charge_at_immigrate of the destination cgroup. | ||
433 | |||
434 | If you want to enable it: | ||
435 | |||
436 | # echo (some positive value) > memory.move_charge_at_immigrate | ||
437 | |||
438 | Note: Each bits of move_charge_at_immigrate has its own meaning about what type | ||
439 | of charges should be moved. See 8.2 for details. | ||
440 | Note: Charges are moved only when you move mm->owner, IOW, a leader of a thread | ||
441 | group. | ||
442 | Note: If we cannot find enough space for the task in the destination cgroup, we | ||
443 | try to make space by reclaiming memory. Task migration may fail if we | ||
444 | cannot make enough space. | ||
445 | Note: It can take several seconds if you move charges in giga bytes order. | ||
446 | |||
447 | And if you want disable it again: | ||
448 | |||
449 | # echo 0 > memory.move_charge_at_immigrate | ||
450 | |||
451 | 8.2 Type of charges which can be move | ||
452 | |||
453 | Each bits of move_charge_at_immigrate has its own meaning about what type of | ||
454 | charges should be moved. | ||
455 | |||
456 | bit | what type of charges would be moved ? | ||
457 | -----+------------------------------------------------------------------------ | ||
458 | 0 | A charge of an anonymous page(or swap of it) used by the target task. | ||
459 | | Those pages and swaps must be used only by the target task. You must | ||
460 | | enable Swap Extension(see 2.4) to enable move of swap charges. | ||
461 | |||
462 | Note: Those pages and swaps must be charged to the old cgroup. | ||
463 | Note: More type of pages(e.g. file cache, shmem,) will be supported by other | ||
464 | bits in future. | ||
465 | |||
466 | 8.3 TODO | ||
467 | |||
468 | - Add support for other types of pages(e.g. file cache, shmem, etc.). | ||
469 | - Implement madvise(2) to let users decide the vma to be moved or not to be | ||
470 | moved. | ||
471 | - All of moving charge operations are done under cgroup_mutex. It's not good | ||
472 | behavior to hold the mutex too long, so we may need some trick. | ||
473 | |||
474 | 9. Memory thresholds | ||
475 | |||
476 | Memory controler implements memory thresholds using cgroups notification | ||
477 | API (see cgroups.txt). It allows to register multiple memory and memsw | ||
478 | thresholds and gets notifications when it crosses. | ||
479 | |||
480 | To register a threshold application need: | ||
481 | - create an eventfd using eventfd(2); | ||
482 | - open memory.usage_in_bytes or memory.memsw.usage_in_bytes; | ||
483 | - write string like "<event_fd> <memory.usage_in_bytes> <threshold>" to | ||
484 | cgroup.event_control. | ||
485 | |||
486 | Application will be notified through eventfd when memory usage crosses | ||
487 | threshold in any direction. | ||
488 | |||
489 | It's applicable for root and non-root cgroup. | ||
490 | |||
491 | 10. TODO | ||
418 | 492 | ||
419 | 1. Add support for accounting huge pages (as a separate controller) | 493 | 1. Add support for accounting huge pages (as a separate controller) |
420 | 2. Make per-cgroup scanner reclaim not-shared pages first | 494 | 2. Make per-cgroup scanner reclaim not-shared pages first |
diff --git a/Documentation/console/console.txt b/Documentation/console/console.txt index 877a1b26cc3d..926cf1b5e63e 100644 --- a/Documentation/console/console.txt +++ b/Documentation/console/console.txt | |||
@@ -74,7 +74,7 @@ driver takes over the consoles vacated by the driver. Binding, on the other | |||
74 | hand, will bind the driver to the consoles that are currently occupied by a | 74 | hand, will bind the driver to the consoles that are currently occupied by a |
75 | system driver. | 75 | system driver. |
76 | 76 | ||
77 | NOTE1: Binding and binding must be selected in Kconfig. It's under: | 77 | NOTE1: Binding and unbinding must be selected in Kconfig. It's under: |
78 | 78 | ||
79 | Device Drivers -> Character devices -> Support for binding and unbinding | 79 | Device Drivers -> Character devices -> Support for binding and unbinding |
80 | console drivers | 80 | console drivers |
diff --git a/Documentation/driver-model/platform.txt b/Documentation/driver-model/platform.txt index 2e2c2ea90ceb..41f41632ee55 100644 --- a/Documentation/driver-model/platform.txt +++ b/Documentation/driver-model/platform.txt | |||
@@ -192,7 +192,7 @@ command line. This will execute all matching early_param() callbacks. | |||
192 | User specified early platform devices will be registered at this point. | 192 | User specified early platform devices will be registered at this point. |
193 | For the early serial console case the user can specify port on the | 193 | For the early serial console case the user can specify port on the |
194 | kernel command line as "earlyprintk=serial.0" where "earlyprintk" is | 194 | kernel command line as "earlyprintk=serial.0" where "earlyprintk" is |
195 | the class string, "serial" is the name of the platfrom driver and | 195 | the class string, "serial" is the name of the platform driver and |
196 | 0 is the platform device id. If the id is -1 then the dot and the | 196 | 0 is the platform device id. If the id is -1 then the dot and the |
197 | id can be omitted. | 197 | id can be omitted. |
198 | 198 | ||
diff --git a/Documentation/eisa.txt b/Documentation/eisa.txt index 60e361ba08c0..f297fc1202ae 100644 --- a/Documentation/eisa.txt +++ b/Documentation/eisa.txt | |||
@@ -171,7 +171,7 @@ device. | |||
171 | virtual_root.force_probe : | 171 | virtual_root.force_probe : |
172 | 172 | ||
173 | Force the probing code to probe EISA slots even when it cannot find an | 173 | Force the probing code to probe EISA slots even when it cannot find an |
174 | EISA compliant mainboard (nothing appears on slot 0). Defaultd to 0 | 174 | EISA compliant mainboard (nothing appears on slot 0). Defaults to 0 |
175 | (don't force), and set to 1 (force probing) when either | 175 | (don't force), and set to 1 (force probing) when either |
176 | CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set. | 176 | CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set. |
177 | 177 | ||
diff --git a/Documentation/email-clients.txt b/Documentation/email-clients.txt index a618efab7b15..945ff3fda433 100644 --- a/Documentation/email-clients.txt +++ b/Documentation/email-clients.txt | |||
@@ -216,26 +216,14 @@ Works. Use "Insert file..." or external editor. | |||
216 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 216 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
217 | Gmail (Web GUI) | 217 | Gmail (Web GUI) |
218 | 218 | ||
219 | If you just have to use Gmail to send patches, it CAN be made to work. It | 219 | Does not work for sending patches. |
220 | requires a bit of external help, though. | 220 | |
221 | 221 | Gmail web client converts tabs to spaces automatically. | |
222 | The first problem is that Gmail converts tabs to spaces. This will | 222 | |
223 | totally break your patches. To prevent this, you have to use a different | 223 | At the same time it wraps lines every 78 chars with CRLF style line breaks |
224 | editor. There is a firefox extension called "ViewSourceWith" | 224 | although tab2space problem can be solved with external editor. |
225 | (https://addons.mozilla.org/en-US/firefox/addon/394) which allows you to | 225 | |
226 | edit any text box in the editor of your choice. Configure it to launch | 226 | Another problem is that Gmail will base64-encode any message that has a |
227 | your favorite editor. When you want to send a patch, use this technique. | 227 | non-ASCII character. That includes things like European names. |
228 | Once you have crafted your messsage + patch, save and exit the editor, | ||
229 | which should reload the Gmail edit box. GMAIL WILL PRESERVE THE TABS. | ||
230 | Hoorah. Apparently you can cut-n-paste literal tabs, but Gmail will | ||
231 | convert those to spaces upon sending! | ||
232 | |||
233 | The second problem is that Gmail converts tabs to spaces on replies. If | ||
234 | you reply to a patch, don't expect to be able to apply it as a patch. | ||
235 | |||
236 | The last problem is that Gmail will base64-encode any message that has a | ||
237 | non-ASCII character. That includes things like European names. Be aware. | ||
238 | |||
239 | Gmail is not convenient for lkml patches, but CAN be made to work. | ||
240 | 228 | ||
241 | ### | 229 | ### |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index a5cc0db63d7a..ed511af0f79a 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -582,3 +582,10 @@ Why: The paravirt mmu host support is slower than non-paravirt mmu, both | |||
582 | Who: Avi Kivity <avi@redhat.com> | 582 | Who: Avi Kivity <avi@redhat.com> |
583 | 583 | ||
584 | ---------------------------- | 584 | ---------------------------- |
585 | |||
586 | What: "acpi=ht" boot option | ||
587 | When: 2.6.35 | ||
588 | Why: Useful in 2003, implementation is a hack. | ||
589 | Generally invoked by accident today. | ||
590 | Seen as doing more harm than good. | ||
591 | Who: Len Brown <len.brown@intel.com> | ||
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 5139b8c9d5af..3bae418c6ad3 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX | |||
@@ -32,6 +32,8 @@ dlmfs.txt | |||
32 | - info on the userspace interface to the OCFS2 DLM. | 32 | - info on the userspace interface to the OCFS2 DLM. |
33 | dnotify.txt | 33 | dnotify.txt |
34 | - info about directory notification in Linux. | 34 | - info about directory notification in Linux. |
35 | dnotify_test.c | ||
36 | - example program for dnotify | ||
35 | ecryptfs.txt | 37 | ecryptfs.txt |
36 | - docs on eCryptfs: stacked cryptographic filesystem for Linux. | 38 | - docs on eCryptfs: stacked cryptographic filesystem for Linux. |
37 | exofs.txt | 39 | exofs.txt |
diff --git a/Documentation/filesystems/Makefile b/Documentation/filesystems/Makefile new file mode 100644 index 000000000000..a5dd114da14f --- /dev/null +++ b/Documentation/filesystems/Makefile | |||
@@ -0,0 +1,8 @@ | |||
1 | # kbuild trick to avoid linker error. Can be omitted if a module is built. | ||
2 | obj- := dummy.o | ||
3 | |||
4 | # List of programs to build | ||
5 | hostprogs-y := dnotify_test | ||
6 | |||
7 | # Tell kbuild to always build the programs | ||
8 | always := $(hostprogs-y) | ||
diff --git a/Documentation/filesystems/dnotify.txt b/Documentation/filesystems/dnotify.txt index 9f5d338ddbb8..6baf88f46859 100644 --- a/Documentation/filesystems/dnotify.txt +++ b/Documentation/filesystems/dnotify.txt | |||
@@ -62,38 +62,9 @@ disabled, fcntl(fd, F_NOTIFY, ...) will return -EINVAL. | |||
62 | 62 | ||
63 | Example | 63 | Example |
64 | ------- | 64 | ------- |
65 | See Documentation/filesystems/dnotify_test.c for an example. | ||
65 | 66 | ||
66 | #define _GNU_SOURCE /* needed to get the defines */ | 67 | NOTE |
67 | #include <fcntl.h> /* in glibc 2.2 this has the needed | 68 | ---- |
68 | values defined */ | 69 | Beginning with Linux 2.6.13, dnotify has been replaced by inotify. |
69 | #include <signal.h> | 70 | See Documentation/filesystems/inotify.txt for more information on it. |
70 | #include <stdio.h> | ||
71 | #include <unistd.h> | ||
72 | |||
73 | static volatile int event_fd; | ||
74 | |||
75 | static void handler(int sig, siginfo_t *si, void *data) | ||
76 | { | ||
77 | event_fd = si->si_fd; | ||
78 | } | ||
79 | |||
80 | int main(void) | ||
81 | { | ||
82 | struct sigaction act; | ||
83 | int fd; | ||
84 | |||
85 | act.sa_sigaction = handler; | ||
86 | sigemptyset(&act.sa_mask); | ||
87 | act.sa_flags = SA_SIGINFO; | ||
88 | sigaction(SIGRTMIN + 1, &act, NULL); | ||
89 | |||
90 | fd = open(".", O_RDONLY); | ||
91 | fcntl(fd, F_SETSIG, SIGRTMIN + 1); | ||
92 | fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT); | ||
93 | /* we will now be notified if any of the files | ||
94 | in "." is modified or new files are created */ | ||
95 | while (1) { | ||
96 | pause(); | ||
97 | printf("Got event on fd=%d\n", event_fd); | ||
98 | } | ||
99 | } | ||
diff --git a/Documentation/filesystems/dnotify_test.c b/Documentation/filesystems/dnotify_test.c new file mode 100644 index 000000000000..8b37b4a1e18d --- /dev/null +++ b/Documentation/filesystems/dnotify_test.c | |||
@@ -0,0 +1,34 @@ | |||
1 | #define _GNU_SOURCE /* needed to get the defines */ | ||
2 | #include <fcntl.h> /* in glibc 2.2 this has the needed | ||
3 | values defined */ | ||
4 | #include <signal.h> | ||
5 | #include <stdio.h> | ||
6 | #include <unistd.h> | ||
7 | |||
8 | static volatile int event_fd; | ||
9 | |||
10 | static void handler(int sig, siginfo_t *si, void *data) | ||
11 | { | ||
12 | event_fd = si->si_fd; | ||
13 | } | ||
14 | |||
15 | int main(void) | ||
16 | { | ||
17 | struct sigaction act; | ||
18 | int fd; | ||
19 | |||
20 | act.sa_sigaction = handler; | ||
21 | sigemptyset(&act.sa_mask); | ||
22 | act.sa_flags = SA_SIGINFO; | ||
23 | sigaction(SIGRTMIN + 1, &act, NULL); | ||
24 | |||
25 | fd = open(".", O_RDONLY); | ||
26 | fcntl(fd, F_SETSIG, SIGRTMIN + 1); | ||
27 | fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT); | ||
28 | /* we will now be notified if any of the files | ||
29 | in "." is modified or new files are created */ | ||
30 | while (1) { | ||
31 | pause(); | ||
32 | printf("Got event on fd=%d\n", event_fd); | ||
33 | } | ||
34 | } | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 96a44dd95e03..a4f30faa4f1f 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -195,7 +195,7 @@ asynchronous manner and the vaule may not be very precise. To see a precise | |||
195 | snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table. | 195 | snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table. |
196 | It's slow but very precise. | 196 | It's slow but very precise. |
197 | 197 | ||
198 | Table 1-2: Contents of the statm files (as of 2.6.30-rc7) | 198 | Table 1-2: Contents of the status files (as of 2.6.30-rc7) |
199 | .............................................................................. | 199 | .............................................................................. |
200 | Field Content | 200 | Field Content |
201 | Name filename of the executable | 201 | Name filename of the executable |
diff --git a/Documentation/hwmon/abituguru b/Documentation/hwmon/abituguru index 87ffa0f5ec70..5eb3b9d5f0d5 100644 --- a/Documentation/hwmon/abituguru +++ b/Documentation/hwmon/abituguru | |||
@@ -30,7 +30,7 @@ Supported chips: | |||
30 | bank1_types=1,1,0,0,0,0,0,2,0,0,0,0,2,0,0,1 | 30 | bank1_types=1,1,0,0,0,0,0,2,0,0,0,0,2,0,0,1 |
31 | You may also need to specify the fan_sensors option for these boards | 31 | You may also need to specify the fan_sensors option for these boards |
32 | fan_sensors=5 | 32 | fan_sensors=5 |
33 | 2) There is a seperate abituguru3 driver for these motherboards, | 33 | 2) There is a separate abituguru3 driver for these motherboards, |
34 | the abituguru (without the 3 !) driver will not work on these | 34 | the abituguru (without the 3 !) driver will not work on these |
35 | motherboards (and visa versa)! | 35 | motherboards (and visa versa)! |
36 | 36 | ||
diff --git a/Documentation/input/rotary-encoder.txt b/Documentation/input/rotary-encoder.txt index 3a6aec40c0b0..8b4129de1d2d 100644 --- a/Documentation/input/rotary-encoder.txt +++ b/Documentation/input/rotary-encoder.txt | |||
@@ -75,7 +75,7 @@ and the number of steps or will clamp at the maximum and zero depending on | |||
75 | the configuration. | 75 | the configuration. |
76 | 76 | ||
77 | Because GPIO to IRQ mapping is platform specific, this information must | 77 | Because GPIO to IRQ mapping is platform specific, this information must |
78 | be given in seperately to the driver. See the example below. | 78 | be given in separately to the driver. See the example below. |
79 | 79 | ||
80 | ---------<snip>--------- | 80 | ---------<snip>--------- |
81 | 81 | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 3bc48b0bd3a9..e4cbca58536c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -200,10 +200,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
200 | acpi_display_output=video | 200 | acpi_display_output=video |
201 | See above. | 201 | See above. |
202 | 202 | ||
203 | acpi_early_pdc_eval [HW,ACPI] Evaluate processor _PDC methods | ||
204 | early. Needed on some platforms to properly | ||
205 | initialize the EC. | ||
206 | |||
207 | acpi_irq_balance [HW,ACPI] | 203 | acpi_irq_balance [HW,ACPI] |
208 | ACPI will balance active IRQs | 204 | ACPI will balance active IRQs |
209 | default in APIC mode | 205 | default in APIC mode |
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt index c79ab996dada..bdb13817e1e9 100644 --- a/Documentation/kobject.txt +++ b/Documentation/kobject.txt | |||
@@ -266,7 +266,7 @@ kobj_type: | |||
266 | 266 | ||
267 | struct kobj_type { | 267 | struct kobj_type { |
268 | void (*release)(struct kobject *); | 268 | void (*release)(struct kobject *); |
269 | struct sysfs_ops *sysfs_ops; | 269 | const struct sysfs_ops *sysfs_ops; |
270 | struct attribute **default_attrs; | 270 | struct attribute **default_attrs; |
271 | }; | 271 | }; |
272 | 272 | ||
diff --git a/Documentation/laptops/00-INDEX b/Documentation/laptops/00-INDEX index ee5692b26dd4..fa688538e757 100644 --- a/Documentation/laptops/00-INDEX +++ b/Documentation/laptops/00-INDEX | |||
@@ -2,6 +2,12 @@ | |||
2 | - This file | 2 | - This file |
3 | acer-wmi.txt | 3 | acer-wmi.txt |
4 | - information on the Acer Laptop WMI Extras driver. | 4 | - information on the Acer Laptop WMI Extras driver. |
5 | asus-laptop.txt | ||
6 | - information on the Asus Laptop Extras driver. | ||
7 | disk-shock-protection.txt | ||
8 | - information on hard disk shock protection. | ||
9 | dslm.c | ||
10 | - Simple Disk Sleep Monitor program | ||
5 | laptop-mode.txt | 11 | laptop-mode.txt |
6 | - how to conserve battery power using laptop-mode. | 12 | - how to conserve battery power using laptop-mode. |
7 | sony-laptop.txt | 13 | sony-laptop.txt |
diff --git a/Documentation/laptops/Makefile b/Documentation/laptops/Makefile new file mode 100644 index 000000000000..5cb144af3c09 --- /dev/null +++ b/Documentation/laptops/Makefile | |||
@@ -0,0 +1,8 @@ | |||
1 | # kbuild trick to avoid linker error. Can be omitted if a module is built. | ||
2 | obj- := dummy.o | ||
3 | |||
4 | # List of programs to build | ||
5 | hostprogs-y := dslm | ||
6 | |||
7 | # Tell kbuild to always build the programs | ||
8 | always := $(hostprogs-y) | ||
diff --git a/Documentation/laptops/dslm.c b/Documentation/laptops/dslm.c new file mode 100644 index 000000000000..72ff290c5fc6 --- /dev/null +++ b/Documentation/laptops/dslm.c | |||
@@ -0,0 +1,166 @@ | |||
1 | /* | ||
2 | * dslm.c | ||
3 | * Simple Disk Sleep Monitor | ||
4 | * by Bartek Kania | ||
5 | * Licenced under the GPL | ||
6 | */ | ||
7 | #include <unistd.h> | ||
8 | #include <stdlib.h> | ||
9 | #include <stdio.h> | ||
10 | #include <fcntl.h> | ||
11 | #include <errno.h> | ||
12 | #include <time.h> | ||
13 | #include <string.h> | ||
14 | #include <signal.h> | ||
15 | #include <sys/ioctl.h> | ||
16 | #include <linux/hdreg.h> | ||
17 | |||
18 | #ifdef DEBUG | ||
19 | #define D(x) x | ||
20 | #else | ||
21 | #define D(x) | ||
22 | #endif | ||
23 | |||
24 | int endit = 0; | ||
25 | |||
26 | /* Check if the disk is in powersave-mode | ||
27 | * Most of the code is stolen from hdparm. | ||
28 | * 1 = active, 0 = standby/sleep, -1 = unknown */ | ||
29 | static int check_powermode(int fd) | ||
30 | { | ||
31 | unsigned char args[4] = {WIN_CHECKPOWERMODE1,0,0,0}; | ||
32 | int state; | ||
33 | |||
34 | if (ioctl(fd, HDIO_DRIVE_CMD, &args) | ||
35 | && (args[0] = WIN_CHECKPOWERMODE2) /* try again with 0x98 */ | ||
36 | && ioctl(fd, HDIO_DRIVE_CMD, &args)) { | ||
37 | if (errno != EIO || args[0] != 0 || args[1] != 0) { | ||
38 | state = -1; /* "unknown"; */ | ||
39 | } else | ||
40 | state = 0; /* "sleeping"; */ | ||
41 | } else { | ||
42 | state = (args[2] == 255) ? 1 : 0; | ||
43 | } | ||
44 | D(printf(" drive state is: %d\n", state)); | ||
45 | |||
46 | return state; | ||
47 | } | ||
48 | |||
49 | static char *state_name(int i) | ||
50 | { | ||
51 | if (i == -1) return "unknown"; | ||
52 | if (i == 0) return "sleeping"; | ||
53 | if (i == 1) return "active"; | ||
54 | |||
55 | return "internal error"; | ||
56 | } | ||
57 | |||
58 | static char *myctime(time_t time) | ||
59 | { | ||
60 | char *ts = ctime(&time); | ||
61 | ts[strlen(ts) - 1] = 0; | ||
62 | |||
63 | return ts; | ||
64 | } | ||
65 | |||
66 | static void measure(int fd) | ||
67 | { | ||
68 | time_t start_time; | ||
69 | int last_state; | ||
70 | time_t last_time; | ||
71 | int curr_state; | ||
72 | time_t curr_time = 0; | ||
73 | time_t time_diff; | ||
74 | time_t active_time = 0; | ||
75 | time_t sleep_time = 0; | ||
76 | time_t unknown_time = 0; | ||
77 | time_t total_time = 0; | ||
78 | int changes = 0; | ||
79 | float tmp; | ||
80 | |||
81 | printf("Starting measurements\n"); | ||
82 | |||
83 | last_state = check_powermode(fd); | ||
84 | start_time = last_time = time(0); | ||
85 | printf(" System is in state %s\n\n", state_name(last_state)); | ||
86 | |||
87 | while(!endit) { | ||
88 | sleep(1); | ||
89 | curr_state = check_powermode(fd); | ||
90 | |||
91 | if (curr_state != last_state || endit) { | ||
92 | changes++; | ||
93 | curr_time = time(0); | ||
94 | time_diff = curr_time - last_time; | ||
95 | |||
96 | if (last_state == 1) active_time += time_diff; | ||
97 | else if (last_state == 0) sleep_time += time_diff; | ||
98 | else unknown_time += time_diff; | ||
99 | |||
100 | last_state = curr_state; | ||
101 | last_time = curr_time; | ||
102 | |||
103 | printf("%s: State-change to %s\n", myctime(curr_time), | ||
104 | state_name(curr_state)); | ||
105 | } | ||
106 | } | ||
107 | changes--; /* Compensate for SIGINT */ | ||
108 | |||
109 | total_time = time(0) - start_time; | ||
110 | printf("\nTotal running time: %lus\n", curr_time - start_time); | ||
111 | printf(" State changed %d times\n", changes); | ||
112 | |||
113 | tmp = (float)sleep_time / (float)total_time * 100; | ||
114 | printf(" Time in sleep state: %lus (%.2f%%)\n", sleep_time, tmp); | ||
115 | tmp = (float)active_time / (float)total_time * 100; | ||
116 | printf(" Time in active state: %lus (%.2f%%)\n", active_time, tmp); | ||
117 | tmp = (float)unknown_time / (float)total_time * 100; | ||
118 | printf(" Time in unknown state: %lus (%.2f%%)\n", unknown_time, tmp); | ||
119 | } | ||
120 | |||
121 | static void ender(int s) | ||
122 | { | ||
123 | endit = 1; | ||
124 | } | ||
125 | |||
126 | static void usage(void) | ||
127 | { | ||
128 | puts("usage: dslm [-w <time>] <disk>"); | ||
129 | exit(0); | ||
130 | } | ||
131 | |||
132 | int main(int argc, char **argv) | ||
133 | { | ||
134 | int fd; | ||
135 | char *disk = 0; | ||
136 | int settle_time = 60; | ||
137 | |||
138 | /* Parse the simple command-line */ | ||
139 | if (argc == 2) | ||
140 | disk = argv[1]; | ||
141 | else if (argc == 4) { | ||
142 | settle_time = atoi(argv[2]); | ||
143 | disk = argv[3]; | ||
144 | } else | ||
145 | usage(); | ||
146 | |||
147 | if (!(fd = open(disk, O_RDONLY|O_NONBLOCK))) { | ||
148 | printf("Can't open %s, because: %s\n", disk, strerror(errno)); | ||
149 | exit(-1); | ||
150 | } | ||
151 | |||
152 | if (settle_time) { | ||
153 | printf("Waiting %d seconds for the system to settle down to " | ||
154 | "'normal'\n", settle_time); | ||
155 | sleep(settle_time); | ||
156 | } else | ||
157 | puts("Not waiting for system to settle down"); | ||
158 | |||
159 | signal(SIGINT, ender); | ||
160 | |||
161 | measure(fd); | ||
162 | |||
163 | close(fd); | ||
164 | |||
165 | return 0; | ||
166 | } | ||
diff --git a/Documentation/laptops/laptop-mode.txt b/Documentation/laptops/laptop-mode.txt index eeedee11c8c2..2c3c35093023 100644 --- a/Documentation/laptops/laptop-mode.txt +++ b/Documentation/laptops/laptop-mode.txt | |||
@@ -779,172 +779,4 @@ Monitoring tool | |||
779 | --------------- | 779 | --------------- |
780 | 780 | ||
781 | Bartek Kania submitted this, it can be used to measure how much time your disk | 781 | Bartek Kania submitted this, it can be used to measure how much time your disk |
782 | spends spun up/down. | 782 | spends spun up/down. See Documentation/laptops/dslm.c |
783 | |||
784 | ---------------------------dslm.c BEGIN----------------------------------------- | ||
785 | /* | ||
786 | * Simple Disk Sleep Monitor | ||
787 | * by Bartek Kania | ||
788 | * Licenced under the GPL | ||
789 | */ | ||
790 | #include <unistd.h> | ||
791 | #include <stdlib.h> | ||
792 | #include <stdio.h> | ||
793 | #include <fcntl.h> | ||
794 | #include <errno.h> | ||
795 | #include <time.h> | ||
796 | #include <string.h> | ||
797 | #include <signal.h> | ||
798 | #include <sys/ioctl.h> | ||
799 | #include <linux/hdreg.h> | ||
800 | |||
801 | #ifdef DEBUG | ||
802 | #define D(x) x | ||
803 | #else | ||
804 | #define D(x) | ||
805 | #endif | ||
806 | |||
807 | int endit = 0; | ||
808 | |||
809 | /* Check if the disk is in powersave-mode | ||
810 | * Most of the code is stolen from hdparm. | ||
811 | * 1 = active, 0 = standby/sleep, -1 = unknown */ | ||
812 | int check_powermode(int fd) | ||
813 | { | ||
814 | unsigned char args[4] = {WIN_CHECKPOWERMODE1,0,0,0}; | ||
815 | int state; | ||
816 | |||
817 | if (ioctl(fd, HDIO_DRIVE_CMD, &args) | ||
818 | && (args[0] = WIN_CHECKPOWERMODE2) /* try again with 0x98 */ | ||
819 | && ioctl(fd, HDIO_DRIVE_CMD, &args)) { | ||
820 | if (errno != EIO || args[0] != 0 || args[1] != 0) { | ||
821 | state = -1; /* "unknown"; */ | ||
822 | } else | ||
823 | state = 0; /* "sleeping"; */ | ||
824 | } else { | ||
825 | state = (args[2] == 255) ? 1 : 0; | ||
826 | } | ||
827 | D(printf(" drive state is: %d\n", state)); | ||
828 | |||
829 | return state; | ||
830 | } | ||
831 | |||
832 | char *state_name(int i) | ||
833 | { | ||
834 | if (i == -1) return "unknown"; | ||
835 | if (i == 0) return "sleeping"; | ||
836 | if (i == 1) return "active"; | ||
837 | |||
838 | return "internal error"; | ||
839 | } | ||
840 | |||
841 | char *myctime(time_t time) | ||
842 | { | ||
843 | char *ts = ctime(&time); | ||
844 | ts[strlen(ts) - 1] = 0; | ||
845 | |||
846 | return ts; | ||
847 | } | ||
848 | |||
849 | void measure(int fd) | ||
850 | { | ||
851 | time_t start_time; | ||
852 | int last_state; | ||
853 | time_t last_time; | ||
854 | int curr_state; | ||
855 | time_t curr_time = 0; | ||
856 | time_t time_diff; | ||
857 | time_t active_time = 0; | ||
858 | time_t sleep_time = 0; | ||
859 | time_t unknown_time = 0; | ||
860 | time_t total_time = 0; | ||
861 | int changes = 0; | ||
862 | float tmp; | ||
863 | |||
864 | printf("Starting measurements\n"); | ||
865 | |||
866 | last_state = check_powermode(fd); | ||
867 | start_time = last_time = time(0); | ||
868 | printf(" System is in state %s\n\n", state_name(last_state)); | ||
869 | |||
870 | while(!endit) { | ||
871 | sleep(1); | ||
872 | curr_state = check_powermode(fd); | ||
873 | |||
874 | if (curr_state != last_state || endit) { | ||
875 | changes++; | ||
876 | curr_time = time(0); | ||
877 | time_diff = curr_time - last_time; | ||
878 | |||
879 | if (last_state == 1) active_time += time_diff; | ||
880 | else if (last_state == 0) sleep_time += time_diff; | ||
881 | else unknown_time += time_diff; | ||
882 | |||
883 | last_state = curr_state; | ||
884 | last_time = curr_time; | ||
885 | |||
886 | printf("%s: State-change to %s\n", myctime(curr_time), | ||
887 | state_name(curr_state)); | ||
888 | } | ||
889 | } | ||
890 | changes--; /* Compensate for SIGINT */ | ||
891 | |||
892 | total_time = time(0) - start_time; | ||
893 | printf("\nTotal running time: %lus\n", curr_time - start_time); | ||
894 | printf(" State changed %d times\n", changes); | ||
895 | |||
896 | tmp = (float)sleep_time / (float)total_time * 100; | ||
897 | printf(" Time in sleep state: %lus (%.2f%%)\n", sleep_time, tmp); | ||
898 | tmp = (float)active_time / (float)total_time * 100; | ||
899 | printf(" Time in active state: %lus (%.2f%%)\n", active_time, tmp); | ||
900 | tmp = (float)unknown_time / (float)total_time * 100; | ||
901 | printf(" Time in unknown state: %lus (%.2f%%)\n", unknown_time, tmp); | ||
902 | } | ||
903 | |||
904 | void ender(int s) | ||
905 | { | ||
906 | endit = 1; | ||
907 | } | ||
908 | |||
909 | void usage() | ||
910 | { | ||
911 | puts("usage: dslm [-w <time>] <disk>"); | ||
912 | exit(0); | ||
913 | } | ||
914 | |||
915 | int main(int argc, char **argv) | ||
916 | { | ||
917 | int fd; | ||
918 | char *disk = 0; | ||
919 | int settle_time = 60; | ||
920 | |||
921 | /* Parse the simple command-line */ | ||
922 | if (argc == 2) | ||
923 | disk = argv[1]; | ||
924 | else if (argc == 4) { | ||
925 | settle_time = atoi(argv[2]); | ||
926 | disk = argv[3]; | ||
927 | } else | ||
928 | usage(); | ||
929 | |||
930 | if (!(fd = open(disk, O_RDONLY|O_NONBLOCK))) { | ||
931 | printf("Can't open %s, because: %s\n", disk, strerror(errno)); | ||
932 | exit(-1); | ||
933 | } | ||
934 | |||
935 | if (settle_time) { | ||
936 | printf("Waiting %d seconds for the system to settle down to " | ||
937 | "'normal'\n", settle_time); | ||
938 | sleep(settle_time); | ||
939 | } else | ||
940 | puts("Not waiting for system to settle down"); | ||
941 | |||
942 | signal(SIGINT, ender); | ||
943 | |||
944 | measure(fd); | ||
945 | |||
946 | close(fd); | ||
947 | |||
948 | return 0; | ||
949 | } | ||
950 | ---------------------------dslm.c END------------------------------------------- | ||
diff --git a/Documentation/networking/skfp.txt b/Documentation/networking/skfp.txt index abfddf81e34a..203ec66c9fb4 100644 --- a/Documentation/networking/skfp.txt +++ b/Documentation/networking/skfp.txt | |||
@@ -68,7 +68,7 @@ Compaq adapters (not tested): | |||
68 | ======================= | 68 | ======================= |
69 | 69 | ||
70 | From v2.01 on, the driver is integrated in the linux kernel sources. | 70 | From v2.01 on, the driver is integrated in the linux kernel sources. |
71 | Therefor, the installation is the same as for any other adapter | 71 | Therefore, the installation is the same as for any other adapter |
72 | supported by the kernel. | 72 | supported by the kernel. |
73 | Refer to the manual of your distribution about the installation | 73 | Refer to the manual of your distribution about the installation |
74 | of network adapters. | 74 | of network adapters. |
diff --git a/Documentation/networking/timestamping/timestamping.c b/Documentation/networking/timestamping/timestamping.c index a7936fe8444a..bab619a48214 100644 --- a/Documentation/networking/timestamping/timestamping.c +++ b/Documentation/networking/timestamping/timestamping.c | |||
@@ -370,7 +370,7 @@ int main(int argc, char **argv) | |||
370 | } | 370 | } |
371 | 371 | ||
372 | sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); | 372 | sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); |
373 | if (socket < 0) | 373 | if (sock < 0) |
374 | bail("socket"); | 374 | bail("socket"); |
375 | 375 | ||
376 | memset(&device, 0, sizeof(device)); | 376 | memset(&device, 0, sizeof(device)); |
diff --git a/Documentation/pnp.txt b/Documentation/pnp.txt index a327db67782a..763e4659bf18 100644 --- a/Documentation/pnp.txt +++ b/Documentation/pnp.txt | |||
@@ -57,7 +57,7 @@ PC standard floppy disk controller | |||
57 | # cat resources | 57 | # cat resources |
58 | DISABLED | 58 | DISABLED |
59 | 59 | ||
60 | - Notice the string "DISABLED". THis means the device is not active. | 60 | - Notice the string "DISABLED". This means the device is not active. |
61 | 61 | ||
62 | 3.) check the device's possible configurations (optional) | 62 | 3.) check the device's possible configurations (optional) |
63 | # cat options | 63 | # cat options |
@@ -139,7 +139,7 @@ Plug and Play but it is planned to be in the near future. | |||
139 | 139 | ||
140 | Requirements for a Linux PnP protocol: | 140 | Requirements for a Linux PnP protocol: |
141 | 1.) the protocol must use EISA IDs | 141 | 1.) the protocol must use EISA IDs |
142 | 2.) the protocol must inform the PnP Layer of a devices current configuration | 142 | 2.) the protocol must inform the PnP Layer of a device's current configuration |
143 | - the ability to set resources is optional but preferred. | 143 | - the ability to set resources is optional but preferred. |
144 | 144 | ||
145 | The following are PnP protocol related functions: | 145 | The following are PnP protocol related functions: |
@@ -158,7 +158,7 @@ pnp_remove_device | |||
158 | - automatically will free mem used by the device and related structures | 158 | - automatically will free mem used by the device and related structures |
159 | 159 | ||
160 | pnp_add_id | 160 | pnp_add_id |
161 | - adds a EISA ID to the list of supported IDs for the specified device | 161 | - adds an EISA ID to the list of supported IDs for the specified device |
162 | 162 | ||
163 | For more information consult the source of a protocol such as | 163 | For more information consult the source of a protocol such as |
164 | /drivers/pnp/pnpbios/core.c. | 164 | /drivers/pnp/pnpbios/core.c. |
@@ -167,7 +167,7 @@ For more information consult the source of a protocol such as | |||
167 | 167 | ||
168 | Linux Plug and Play Drivers | 168 | Linux Plug and Play Drivers |
169 | --------------------------- | 169 | --------------------------- |
170 | This section contains information for linux PnP driver developers. | 170 | This section contains information for Linux PnP driver developers. |
171 | 171 | ||
172 | The New Way | 172 | The New Way |
173 | ........... | 173 | ........... |
@@ -235,11 +235,10 @@ static int __init serial8250_pnp_init(void) | |||
235 | The Old Way | 235 | The Old Way |
236 | ........... | 236 | ........... |
237 | 237 | ||
238 | a series of compatibility functions have been created to make it easy to convert | 238 | A series of compatibility functions have been created to make it easy to convert |
239 | |||
240 | ISAPNP drivers. They should serve as a temporary solution only. | 239 | ISAPNP drivers. They should serve as a temporary solution only. |
241 | 240 | ||
242 | they are as follows: | 241 | They are as follows: |
243 | 242 | ||
244 | struct pnp_card *pnp_find_card(unsigned short vendor, | 243 | struct pnp_card *pnp_find_card(unsigned short vendor, |
245 | unsigned short device, | 244 | unsigned short device, |
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index ab00eeddecaf..55b859b3bc72 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt | |||
@@ -256,7 +256,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: | |||
256 | to suspend the device again in future | 256 | to suspend the device again in future |
257 | 257 | ||
258 | int pm_runtime_resume(struct device *dev); | 258 | int pm_runtime_resume(struct device *dev); |
259 | - execute the subsystem-leve resume callback for the device; returns 0 on | 259 | - execute the subsystem-level resume callback for the device; returns 0 on |
260 | success, 1 if the device's run-time PM status was already 'active' or | 260 | success, 1 if the device's run-time PM status was already 'active' or |
261 | error code on failure, where -EAGAIN means it may be safe to attempt to | 261 | error code on failure, where -EAGAIN means it may be safe to attempt to |
262 | resume the device again in future, but 'power.runtime_error' should be | 262 | resume the device again in future, but 'power.runtime_error' should be |
diff --git a/Documentation/s390/kvm.txt b/Documentation/s390/kvm.txt index 6f5ceb0f09fc..85f3280d7ef6 100644 --- a/Documentation/s390/kvm.txt +++ b/Documentation/s390/kvm.txt | |||
@@ -102,7 +102,7 @@ args: unsigned long | |||
102 | see also: include/linux/kvm.h | 102 | see also: include/linux/kvm.h |
103 | This ioctl stores the state of the cpu at the guest real address given as | 103 | This ioctl stores the state of the cpu at the guest real address given as |
104 | argument, unless one of the following values defined in include/linux/kvm.h | 104 | argument, unless one of the following values defined in include/linux/kvm.h |
105 | is given as arguement: | 105 | is given as argument: |
106 | KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in | 106 | KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in |
107 | absolute lowcore as defined by the principles of operation | 107 | absolute lowcore as defined by the principles of operation |
108 | KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in | 108 | KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in |
diff --git a/Documentation/scsi/ChangeLog.lpfc b/Documentation/scsi/ChangeLog.lpfc index ff19a52fe004..2ffc1148eb95 100644 --- a/Documentation/scsi/ChangeLog.lpfc +++ b/Documentation/scsi/ChangeLog.lpfc | |||
@@ -989,8 +989,8 @@ Changes from 20040709 to 20040716 | |||
989 | * Remove redundant port_cmp != 2 check in if | 989 | * Remove redundant port_cmp != 2 check in if |
990 | (!port_cmp) { .... if (port_cmp != 2).... } | 990 | (!port_cmp) { .... if (port_cmp != 2).... } |
991 | * Clock changes: removed struct clk_data and timerList. | 991 | * Clock changes: removed struct clk_data and timerList. |
992 | * Clock changes: seperate nodev_tmo and els_retry_delay into 2 | 992 | * Clock changes: separate nodev_tmo and els_retry_delay into 2 |
993 | seperate timers and convert to 1 argument changed | 993 | separate timers and convert to 1 argument changed |
994 | LPFC_NODE_FARP_PEND_t to struct lpfc_node_farp_pend convert | 994 | LPFC_NODE_FARP_PEND_t to struct lpfc_node_farp_pend convert |
995 | ipfarp_tmo to 1 argument convert target struct tmofunc and | 995 | ipfarp_tmo to 1 argument convert target struct tmofunc and |
996 | rtplunfunc to 1 argument * cr_count, cr_delay and | 996 | rtplunfunc to 1 argument * cr_count, cr_delay and |
@@ -1514,7 +1514,7 @@ Changes from 20040402 to 20040409 | |||
1514 | * Remove unused elxclock declaration in elx_sli.h. | 1514 | * Remove unused elxclock declaration in elx_sli.h. |
1515 | * Since everywhere IOCB_ENTRY is used, the return value is cast, | 1515 | * Since everywhere IOCB_ENTRY is used, the return value is cast, |
1516 | move the cast into the macro. | 1516 | move the cast into the macro. |
1517 | * Split ioctls out into seperate files | 1517 | * Split ioctls out into separate files |
1518 | 1518 | ||
1519 | Changes from 20040326 to 20040402 | 1519 | Changes from 20040326 to 20040402 |
1520 | 1520 | ||
@@ -1534,7 +1534,7 @@ Changes from 20040326 to 20040402 | |||
1534 | * Unused variable cleanup | 1534 | * Unused variable cleanup |
1535 | * Use Linux list macros for DMABUF_t | 1535 | * Use Linux list macros for DMABUF_t |
1536 | * Break up ioctls into 3 sections, dfc, util, hbaapi | 1536 | * Break up ioctls into 3 sections, dfc, util, hbaapi |
1537 | rearranged code so this could be easily seperated into a | 1537 | rearranged code so this could be easily separated into a |
1538 | differnet module later All 3 are currently turned on by | 1538 | differnet module later All 3 are currently turned on by |
1539 | defines in lpfc_ioctl.c LPFC_DFC_IOCTL, LPFC_UTIL_IOCTL, | 1539 | defines in lpfc_ioctl.c LPFC_DFC_IOCTL, LPFC_UTIL_IOCTL, |
1540 | LPFC_HBAAPI_IOCTL | 1540 | LPFC_HBAAPI_IOCTL |
@@ -1551,7 +1551,7 @@ Changes from 20040326 to 20040402 | |||
1551 | started by lpfc_online(). lpfc_offline() only stopped | 1551 | started by lpfc_online(). lpfc_offline() only stopped |
1552 | els_timeout routine. It now stops all timeout routines | 1552 | els_timeout routine. It now stops all timeout routines |
1553 | associated with that hba. | 1553 | associated with that hba. |
1554 | * Replace seperate next and prev pointers in struct | 1554 | * Replace separate next and prev pointers in struct |
1555 | lpfc_bindlist with list_head type. In elxHBA_t, replace | 1555 | lpfc_bindlist with list_head type. In elxHBA_t, replace |
1556 | fc_nlpbind_start and _end with fc_nlpbind_list and use | 1556 | fc_nlpbind_start and _end with fc_nlpbind_list and use |
1557 | list_head macros to access it. | 1557 | list_head macros to access it. |
diff --git a/Documentation/serial/tty.txt b/Documentation/serial/tty.txt index 5e5349a4fcd2..7c900507279f 100644 --- a/Documentation/serial/tty.txt +++ b/Documentation/serial/tty.txt | |||
@@ -105,6 +105,10 @@ write_wakeup() - May be called at any point between open and close. | |||
105 | is permitted to call the driver write method from | 105 | is permitted to call the driver write method from |
106 | this function. In such a situation defer it. | 106 | this function. In such a situation defer it. |
107 | 107 | ||
108 | dcd_change() - Report to the tty line the current DCD pin status | ||
109 | changes and the relative timestamp. The timestamp | ||
110 | can be NULL. | ||
111 | |||
108 | 112 | ||
109 | Driver Access | 113 | Driver Access |
110 | 114 | ||
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 33df82e3a398..bfcbbf88c44d 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt | |||
@@ -1812,7 +1812,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
1812 | Module snd-ua101 | 1812 | Module snd-ua101 |
1813 | ---------------- | 1813 | ---------------- |
1814 | 1814 | ||
1815 | Module for the Edirol UA-101 audio/MIDI interface. | 1815 | Module for the Edirol UA-101/UA-1000 audio/MIDI interfaces. |
1816 | 1816 | ||
1817 | This module supports multiple devices, autoprobe and hotplugging. | 1817 | This module supports multiple devices, autoprobe and hotplugging. |
1818 | 1818 | ||
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index fc5790d36cd9..6c7d18c53f84 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -573,11 +573,14 @@ Because other nodes' memory may be free. This means system total status | |||
573 | may be not fatal yet. | 573 | may be not fatal yet. |
574 | 574 | ||
575 | If this is set to 2, the kernel panics compulsorily even on the | 575 | If this is set to 2, the kernel panics compulsorily even on the |
576 | above-mentioned. | 576 | above-mentioned. Even oom happens under memory cgroup, the whole |
577 | system panics. | ||
577 | 578 | ||
578 | The default value is 0. | 579 | The default value is 0. |
579 | 1 and 2 are for failover of clustering. Please select either | 580 | 1 and 2 are for failover of clustering. Please select either |
580 | according to your policy of failover. | 581 | according to your policy of failover. |
582 | panic_on_oom=2+kdump gives you very strong tool to investigate | ||
583 | why oom happens. You can get snapshot. | ||
581 | 584 | ||
582 | ============================================================= | 585 | ============================================================= |
583 | 586 | ||
diff --git a/Documentation/timers/00-INDEX b/Documentation/timers/00-INDEX index 397dc35e1323..a9248da5cdbc 100644 --- a/Documentation/timers/00-INDEX +++ b/Documentation/timers/00-INDEX | |||
@@ -4,6 +4,8 @@ highres.txt | |||
4 | - High resolution timers and dynamic ticks design notes | 4 | - High resolution timers and dynamic ticks design notes |
5 | hpet.txt | 5 | hpet.txt |
6 | - High Precision Event Timer Driver for Linux | 6 | - High Precision Event Timer Driver for Linux |
7 | hpet_example.c | ||
8 | - sample hpet timer test program | ||
7 | hrtimers.txt | 9 | hrtimers.txt |
8 | - subsystem for high-resolution kernel timers | 10 | - subsystem for high-resolution kernel timers |
9 | timer_stats.txt | 11 | timer_stats.txt |
diff --git a/Documentation/timers/Makefile b/Documentation/timers/Makefile new file mode 100644 index 000000000000..c85625f4ab25 --- /dev/null +++ b/Documentation/timers/Makefile | |||
@@ -0,0 +1,8 @@ | |||
1 | # kbuild trick to avoid linker error. Can be omitted if a module is built. | ||
2 | obj- := dummy.o | ||
3 | |||
4 | # List of programs to build | ||
5 | hostprogs-y := hpet_example | ||
6 | |||
7 | # Tell kbuild to always build the programs | ||
8 | always := $(hostprogs-y) | ||
diff --git a/Documentation/timers/hpet.txt b/Documentation/timers/hpet.txt index 16d25e6b5a00..767392ffd31e 100644 --- a/Documentation/timers/hpet.txt +++ b/Documentation/timers/hpet.txt | |||
@@ -26,274 +26,5 @@ initialization. An example of this initialization can be found in | |||
26 | arch/x86/kernel/hpet.c. | 26 | arch/x86/kernel/hpet.c. |
27 | 27 | ||
28 | The driver provides a userspace API which resembles the API found in the | 28 | The driver provides a userspace API which resembles the API found in the |
29 | RTC driver framework. An example user space program is provided below. | 29 | RTC driver framework. An example user space program is provided in |
30 | 30 | file:Documentation/timers/hpet_example.c | |
31 | #include <stdio.h> | ||
32 | #include <stdlib.h> | ||
33 | #include <unistd.h> | ||
34 | #include <fcntl.h> | ||
35 | #include <string.h> | ||
36 | #include <memory.h> | ||
37 | #include <malloc.h> | ||
38 | #include <time.h> | ||
39 | #include <ctype.h> | ||
40 | #include <sys/types.h> | ||
41 | #include <sys/wait.h> | ||
42 | #include <signal.h> | ||
43 | #include <fcntl.h> | ||
44 | #include <errno.h> | ||
45 | #include <sys/time.h> | ||
46 | #include <linux/hpet.h> | ||
47 | |||
48 | |||
49 | extern void hpet_open_close(int, const char **); | ||
50 | extern void hpet_info(int, const char **); | ||
51 | extern void hpet_poll(int, const char **); | ||
52 | extern void hpet_fasync(int, const char **); | ||
53 | extern void hpet_read(int, const char **); | ||
54 | |||
55 | #include <sys/poll.h> | ||
56 | #include <sys/ioctl.h> | ||
57 | #include <signal.h> | ||
58 | |||
59 | struct hpet_command { | ||
60 | char *command; | ||
61 | void (*func)(int argc, const char ** argv); | ||
62 | } hpet_command[] = { | ||
63 | { | ||
64 | "open-close", | ||
65 | hpet_open_close | ||
66 | }, | ||
67 | { | ||
68 | "info", | ||
69 | hpet_info | ||
70 | }, | ||
71 | { | ||
72 | "poll", | ||
73 | hpet_poll | ||
74 | }, | ||
75 | { | ||
76 | "fasync", | ||
77 | hpet_fasync | ||
78 | }, | ||
79 | }; | ||
80 | |||
81 | int | ||
82 | main(int argc, const char ** argv) | ||
83 | { | ||
84 | int i; | ||
85 | |||
86 | argc--; | ||
87 | argv++; | ||
88 | |||
89 | if (!argc) { | ||
90 | fprintf(stderr, "-hpet: requires command\n"); | ||
91 | return -1; | ||
92 | } | ||
93 | |||
94 | |||
95 | for (i = 0; i < (sizeof (hpet_command) / sizeof (hpet_command[0])); i++) | ||
96 | if (!strcmp(argv[0], hpet_command[i].command)) { | ||
97 | argc--; | ||
98 | argv++; | ||
99 | fprintf(stderr, "-hpet: executing %s\n", | ||
100 | hpet_command[i].command); | ||
101 | hpet_command[i].func(argc, argv); | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | fprintf(stderr, "do_hpet: command %s not implemented\n", argv[0]); | ||
106 | |||
107 | return -1; | ||
108 | } | ||
109 | |||
110 | void | ||
111 | hpet_open_close(int argc, const char **argv) | ||
112 | { | ||
113 | int fd; | ||
114 | |||
115 | if (argc != 1) { | ||
116 | fprintf(stderr, "hpet_open_close: device-name\n"); | ||
117 | return; | ||
118 | } | ||
119 | |||
120 | fd = open(argv[0], O_RDONLY); | ||
121 | if (fd < 0) | ||
122 | fprintf(stderr, "hpet_open_close: open failed\n"); | ||
123 | else | ||
124 | close(fd); | ||
125 | |||
126 | return; | ||
127 | } | ||
128 | |||
129 | void | ||
130 | hpet_info(int argc, const char **argv) | ||
131 | { | ||
132 | } | ||
133 | |||
134 | void | ||
135 | hpet_poll(int argc, const char **argv) | ||
136 | { | ||
137 | unsigned long freq; | ||
138 | int iterations, i, fd; | ||
139 | struct pollfd pfd; | ||
140 | struct hpet_info info; | ||
141 | struct timeval stv, etv; | ||
142 | struct timezone tz; | ||
143 | long usec; | ||
144 | |||
145 | if (argc != 3) { | ||
146 | fprintf(stderr, "hpet_poll: device-name freq iterations\n"); | ||
147 | return; | ||
148 | } | ||
149 | |||
150 | freq = atoi(argv[1]); | ||
151 | iterations = atoi(argv[2]); | ||
152 | |||
153 | fd = open(argv[0], O_RDONLY); | ||
154 | |||
155 | if (fd < 0) { | ||
156 | fprintf(stderr, "hpet_poll: open of %s failed\n", argv[0]); | ||
157 | return; | ||
158 | } | ||
159 | |||
160 | if (ioctl(fd, HPET_IRQFREQ, freq) < 0) { | ||
161 | fprintf(stderr, "hpet_poll: HPET_IRQFREQ failed\n"); | ||
162 | goto out; | ||
163 | } | ||
164 | |||
165 | if (ioctl(fd, HPET_INFO, &info) < 0) { | ||
166 | fprintf(stderr, "hpet_poll: failed to get info\n"); | ||
167 | goto out; | ||
168 | } | ||
169 | |||
170 | fprintf(stderr, "hpet_poll: info.hi_flags 0x%lx\n", info.hi_flags); | ||
171 | |||
172 | if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) { | ||
173 | fprintf(stderr, "hpet_poll: HPET_EPI failed\n"); | ||
174 | goto out; | ||
175 | } | ||
176 | |||
177 | if (ioctl(fd, HPET_IE_ON, 0) < 0) { | ||
178 | fprintf(stderr, "hpet_poll, HPET_IE_ON failed\n"); | ||
179 | goto out; | ||
180 | } | ||
181 | |||
182 | pfd.fd = fd; | ||
183 | pfd.events = POLLIN; | ||
184 | |||
185 | for (i = 0; i < iterations; i++) { | ||
186 | pfd.revents = 0; | ||
187 | gettimeofday(&stv, &tz); | ||
188 | if (poll(&pfd, 1, -1) < 0) | ||
189 | fprintf(stderr, "hpet_poll: poll failed\n"); | ||
190 | else { | ||
191 | long data; | ||
192 | |||
193 | gettimeofday(&etv, &tz); | ||
194 | usec = stv.tv_sec * 1000000 + stv.tv_usec; | ||
195 | usec = (etv.tv_sec * 1000000 + etv.tv_usec) - usec; | ||
196 | |||
197 | fprintf(stderr, | ||
198 | "hpet_poll: expired time = 0x%lx\n", usec); | ||
199 | |||
200 | fprintf(stderr, "hpet_poll: revents = 0x%x\n", | ||
201 | pfd.revents); | ||
202 | |||
203 | if (read(fd, &data, sizeof(data)) != sizeof(data)) { | ||
204 | fprintf(stderr, "hpet_poll: read failed\n"); | ||
205 | } | ||
206 | else | ||
207 | fprintf(stderr, "hpet_poll: data 0x%lx\n", | ||
208 | data); | ||
209 | } | ||
210 | } | ||
211 | |||
212 | out: | ||
213 | close(fd); | ||
214 | return; | ||
215 | } | ||
216 | |||
217 | static int hpet_sigio_count; | ||
218 | |||
219 | static void | ||
220 | hpet_sigio(int val) | ||
221 | { | ||
222 | fprintf(stderr, "hpet_sigio: called\n"); | ||
223 | hpet_sigio_count++; | ||
224 | } | ||
225 | |||
226 | void | ||
227 | hpet_fasync(int argc, const char **argv) | ||
228 | { | ||
229 | unsigned long freq; | ||
230 | int iterations, i, fd, value; | ||
231 | sig_t oldsig; | ||
232 | struct hpet_info info; | ||
233 | |||
234 | hpet_sigio_count = 0; | ||
235 | fd = -1; | ||
236 | |||
237 | if ((oldsig = signal(SIGIO, hpet_sigio)) == SIG_ERR) { | ||
238 | fprintf(stderr, "hpet_fasync: failed to set signal handler\n"); | ||
239 | return; | ||
240 | } | ||
241 | |||
242 | if (argc != 3) { | ||
243 | fprintf(stderr, "hpet_fasync: device-name freq iterations\n"); | ||
244 | goto out; | ||
245 | } | ||
246 | |||
247 | fd = open(argv[0], O_RDONLY); | ||
248 | |||
249 | if (fd < 0) { | ||
250 | fprintf(stderr, "hpet_fasync: failed to open %s\n", argv[0]); | ||
251 | return; | ||
252 | } | ||
253 | |||
254 | |||
255 | if ((fcntl(fd, F_SETOWN, getpid()) == 1) || | ||
256 | ((value = fcntl(fd, F_GETFL)) == 1) || | ||
257 | (fcntl(fd, F_SETFL, value | O_ASYNC) == 1)) { | ||
258 | fprintf(stderr, "hpet_fasync: fcntl failed\n"); | ||
259 | goto out; | ||
260 | } | ||
261 | |||
262 | freq = atoi(argv[1]); | ||
263 | iterations = atoi(argv[2]); | ||
264 | |||
265 | if (ioctl(fd, HPET_IRQFREQ, freq) < 0) { | ||
266 | fprintf(stderr, "hpet_fasync: HPET_IRQFREQ failed\n"); | ||
267 | goto out; | ||
268 | } | ||
269 | |||
270 | if (ioctl(fd, HPET_INFO, &info) < 0) { | ||
271 | fprintf(stderr, "hpet_fasync: failed to get info\n"); | ||
272 | goto out; | ||
273 | } | ||
274 | |||
275 | fprintf(stderr, "hpet_fasync: info.hi_flags 0x%lx\n", info.hi_flags); | ||
276 | |||
277 | if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) { | ||
278 | fprintf(stderr, "hpet_fasync: HPET_EPI failed\n"); | ||
279 | goto out; | ||
280 | } | ||
281 | |||
282 | if (ioctl(fd, HPET_IE_ON, 0) < 0) { | ||
283 | fprintf(stderr, "hpet_fasync, HPET_IE_ON failed\n"); | ||
284 | goto out; | ||
285 | } | ||
286 | |||
287 | for (i = 0; i < iterations; i++) { | ||
288 | (void) pause(); | ||
289 | fprintf(stderr, "hpet_fasync: count = %d\n", hpet_sigio_count); | ||
290 | } | ||
291 | |||
292 | out: | ||
293 | signal(SIGIO, oldsig); | ||
294 | |||
295 | if (fd >= 0) | ||
296 | close(fd); | ||
297 | |||
298 | return; | ||
299 | } | ||
diff --git a/Documentation/timers/hpet_example.c b/Documentation/timers/hpet_example.c new file mode 100644 index 000000000000..f9ce2d9fdfd5 --- /dev/null +++ b/Documentation/timers/hpet_example.c | |||
@@ -0,0 +1,269 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <stdlib.h> | ||
3 | #include <unistd.h> | ||
4 | #include <fcntl.h> | ||
5 | #include <string.h> | ||
6 | #include <memory.h> | ||
7 | #include <malloc.h> | ||
8 | #include <time.h> | ||
9 | #include <ctype.h> | ||
10 | #include <sys/types.h> | ||
11 | #include <sys/wait.h> | ||
12 | #include <signal.h> | ||
13 | #include <fcntl.h> | ||
14 | #include <errno.h> | ||
15 | #include <sys/time.h> | ||
16 | #include <linux/hpet.h> | ||
17 | |||
18 | |||
19 | extern void hpet_open_close(int, const char **); | ||
20 | extern void hpet_info(int, const char **); | ||
21 | extern void hpet_poll(int, const char **); | ||
22 | extern void hpet_fasync(int, const char **); | ||
23 | extern void hpet_read(int, const char **); | ||
24 | |||
25 | #include <sys/poll.h> | ||
26 | #include <sys/ioctl.h> | ||
27 | #include <signal.h> | ||
28 | |||
29 | struct hpet_command { | ||
30 | char *command; | ||
31 | void (*func)(int argc, const char ** argv); | ||
32 | } hpet_command[] = { | ||
33 | { | ||
34 | "open-close", | ||
35 | hpet_open_close | ||
36 | }, | ||
37 | { | ||
38 | "info", | ||
39 | hpet_info | ||
40 | }, | ||
41 | { | ||
42 | "poll", | ||
43 | hpet_poll | ||
44 | }, | ||
45 | { | ||
46 | "fasync", | ||
47 | hpet_fasync | ||
48 | }, | ||
49 | }; | ||
50 | |||
51 | int | ||
52 | main(int argc, const char ** argv) | ||
53 | { | ||
54 | int i; | ||
55 | |||
56 | argc--; | ||
57 | argv++; | ||
58 | |||
59 | if (!argc) { | ||
60 | fprintf(stderr, "-hpet: requires command\n"); | ||
61 | return -1; | ||
62 | } | ||
63 | |||
64 | |||
65 | for (i = 0; i < (sizeof (hpet_command) / sizeof (hpet_command[0])); i++) | ||
66 | if (!strcmp(argv[0], hpet_command[i].command)) { | ||
67 | argc--; | ||
68 | argv++; | ||
69 | fprintf(stderr, "-hpet: executing %s\n", | ||
70 | hpet_command[i].command); | ||
71 | hpet_command[i].func(argc, argv); | ||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | fprintf(stderr, "do_hpet: command %s not implemented\n", argv[0]); | ||
76 | |||
77 | return -1; | ||
78 | } | ||
79 | |||
80 | void | ||
81 | hpet_open_close(int argc, const char **argv) | ||
82 | { | ||
83 | int fd; | ||
84 | |||
85 | if (argc != 1) { | ||
86 | fprintf(stderr, "hpet_open_close: device-name\n"); | ||
87 | return; | ||
88 | } | ||
89 | |||
90 | fd = open(argv[0], O_RDONLY); | ||
91 | if (fd < 0) | ||
92 | fprintf(stderr, "hpet_open_close: open failed\n"); | ||
93 | else | ||
94 | close(fd); | ||
95 | |||
96 | return; | ||
97 | } | ||
98 | |||
99 | void | ||
100 | hpet_info(int argc, const char **argv) | ||
101 | { | ||
102 | } | ||
103 | |||
104 | void | ||
105 | hpet_poll(int argc, const char **argv) | ||
106 | { | ||
107 | unsigned long freq; | ||
108 | int iterations, i, fd; | ||
109 | struct pollfd pfd; | ||
110 | struct hpet_info info; | ||
111 | struct timeval stv, etv; | ||
112 | struct timezone tz; | ||
113 | long usec; | ||
114 | |||
115 | if (argc != 3) { | ||
116 | fprintf(stderr, "hpet_poll: device-name freq iterations\n"); | ||
117 | return; | ||
118 | } | ||
119 | |||
120 | freq = atoi(argv[1]); | ||
121 | iterations = atoi(argv[2]); | ||
122 | |||
123 | fd = open(argv[0], O_RDONLY); | ||
124 | |||
125 | if (fd < 0) { | ||
126 | fprintf(stderr, "hpet_poll: open of %s failed\n", argv[0]); | ||
127 | return; | ||
128 | } | ||
129 | |||
130 | if (ioctl(fd, HPET_IRQFREQ, freq) < 0) { | ||
131 | fprintf(stderr, "hpet_poll: HPET_IRQFREQ failed\n"); | ||
132 | goto out; | ||
133 | } | ||
134 | |||
135 | if (ioctl(fd, HPET_INFO, &info) < 0) { | ||
136 | fprintf(stderr, "hpet_poll: failed to get info\n"); | ||
137 | goto out; | ||
138 | } | ||
139 | |||
140 | fprintf(stderr, "hpet_poll: info.hi_flags 0x%lx\n", info.hi_flags); | ||
141 | |||
142 | if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) { | ||
143 | fprintf(stderr, "hpet_poll: HPET_EPI failed\n"); | ||
144 | goto out; | ||
145 | } | ||
146 | |||
147 | if (ioctl(fd, HPET_IE_ON, 0) < 0) { | ||
148 | fprintf(stderr, "hpet_poll, HPET_IE_ON failed\n"); | ||
149 | goto out; | ||
150 | } | ||
151 | |||
152 | pfd.fd = fd; | ||
153 | pfd.events = POLLIN; | ||
154 | |||
155 | for (i = 0; i < iterations; i++) { | ||
156 | pfd.revents = 0; | ||
157 | gettimeofday(&stv, &tz); | ||
158 | if (poll(&pfd, 1, -1) < 0) | ||
159 | fprintf(stderr, "hpet_poll: poll failed\n"); | ||
160 | else { | ||
161 | long data; | ||
162 | |||
163 | gettimeofday(&etv, &tz); | ||
164 | usec = stv.tv_sec * 1000000 + stv.tv_usec; | ||
165 | usec = (etv.tv_sec * 1000000 + etv.tv_usec) - usec; | ||
166 | |||
167 | fprintf(stderr, | ||
168 | "hpet_poll: expired time = 0x%lx\n", usec); | ||
169 | |||
170 | fprintf(stderr, "hpet_poll: revents = 0x%x\n", | ||
171 | pfd.revents); | ||
172 | |||
173 | if (read(fd, &data, sizeof(data)) != sizeof(data)) { | ||
174 | fprintf(stderr, "hpet_poll: read failed\n"); | ||
175 | } | ||
176 | else | ||
177 | fprintf(stderr, "hpet_poll: data 0x%lx\n", | ||
178 | data); | ||
179 | } | ||
180 | } | ||
181 | |||
182 | out: | ||
183 | close(fd); | ||
184 | return; | ||
185 | } | ||
186 | |||
187 | static int hpet_sigio_count; | ||
188 | |||
189 | static void | ||
190 | hpet_sigio(int val) | ||
191 | { | ||
192 | fprintf(stderr, "hpet_sigio: called\n"); | ||
193 | hpet_sigio_count++; | ||
194 | } | ||
195 | |||
196 | void | ||
197 | hpet_fasync(int argc, const char **argv) | ||
198 | { | ||
199 | unsigned long freq; | ||
200 | int iterations, i, fd, value; | ||
201 | sig_t oldsig; | ||
202 | struct hpet_info info; | ||
203 | |||
204 | hpet_sigio_count = 0; | ||
205 | fd = -1; | ||
206 | |||
207 | if ((oldsig = signal(SIGIO, hpet_sigio)) == SIG_ERR) { | ||
208 | fprintf(stderr, "hpet_fasync: failed to set signal handler\n"); | ||
209 | return; | ||
210 | } | ||
211 | |||
212 | if (argc != 3) { | ||
213 | fprintf(stderr, "hpet_fasync: device-name freq iterations\n"); | ||
214 | goto out; | ||
215 | } | ||
216 | |||
217 | fd = open(argv[0], O_RDONLY); | ||
218 | |||
219 | if (fd < 0) { | ||
220 | fprintf(stderr, "hpet_fasync: failed to open %s\n", argv[0]); | ||
221 | return; | ||
222 | } | ||
223 | |||
224 | |||
225 | if ((fcntl(fd, F_SETOWN, getpid()) == 1) || | ||
226 | ((value = fcntl(fd, F_GETFL)) == 1) || | ||
227 | (fcntl(fd, F_SETFL, value | O_ASYNC) == 1)) { | ||
228 | fprintf(stderr, "hpet_fasync: fcntl failed\n"); | ||
229 | goto out; | ||
230 | } | ||
231 | |||
232 | freq = atoi(argv[1]); | ||
233 | iterations = atoi(argv[2]); | ||
234 | |||
235 | if (ioctl(fd, HPET_IRQFREQ, freq) < 0) { | ||
236 | fprintf(stderr, "hpet_fasync: HPET_IRQFREQ failed\n"); | ||
237 | goto out; | ||
238 | } | ||
239 | |||
240 | if (ioctl(fd, HPET_INFO, &info) < 0) { | ||
241 | fprintf(stderr, "hpet_fasync: failed to get info\n"); | ||
242 | goto out; | ||
243 | } | ||
244 | |||
245 | fprintf(stderr, "hpet_fasync: info.hi_flags 0x%lx\n", info.hi_flags); | ||
246 | |||
247 | if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) { | ||
248 | fprintf(stderr, "hpet_fasync: HPET_EPI failed\n"); | ||
249 | goto out; | ||
250 | } | ||
251 | |||
252 | if (ioctl(fd, HPET_IE_ON, 0) < 0) { | ||
253 | fprintf(stderr, "hpet_fasync, HPET_IE_ON failed\n"); | ||
254 | goto out; | ||
255 | } | ||
256 | |||
257 | for (i = 0; i < iterations; i++) { | ||
258 | (void) pause(); | ||
259 | fprintf(stderr, "hpet_fasync: count = %d\n", hpet_sigio_count); | ||
260 | } | ||
261 | |||
262 | out: | ||
263 | signal(SIGIO, oldsig); | ||
264 | |||
265 | if (fd >= 0) | ||
266 | close(fd); | ||
267 | |||
268 | return; | ||
269 | } | ||
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index bab3040da548..03485bfbd797 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt | |||
@@ -1588,7 +1588,7 @@ module author does not need to worry about it. | |||
1588 | 1588 | ||
1589 | When tracing is enabled, kstop_machine is called to prevent | 1589 | When tracing is enabled, kstop_machine is called to prevent |
1590 | races with the CPUS executing code being modified (which can | 1590 | races with the CPUS executing code being modified (which can |
1591 | cause the CPU to do undesireable things), and the nops are | 1591 | cause the CPU to do undesirable things), and the nops are |
1592 | patched back to calls. But this time, they do not call mcount | 1592 | patched back to calls. But this time, they do not call mcount |
1593 | (which is just a function stub). They now call into the ftrace | 1593 | (which is just a function stub). They now call into the ftrace |
1594 | infrastructure. | 1594 | infrastructure. |
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX index e57d6a9dd32b..dca82d7c83d8 100644 --- a/Documentation/vm/00-INDEX +++ b/Documentation/vm/00-INDEX | |||
@@ -4,23 +4,35 @@ active_mm.txt | |||
4 | - An explanation from Linus about tsk->active_mm vs tsk->mm. | 4 | - An explanation from Linus about tsk->active_mm vs tsk->mm. |
5 | balance | 5 | balance |
6 | - various information on memory balancing. | 6 | - various information on memory balancing. |
7 | hugepage-mmap.c | ||
8 | - Example app using huge page memory with the mmap system call. | ||
9 | hugepage-shm.c | ||
10 | - Example app using huge page memory with Sys V shared memory system calls. | ||
7 | hugetlbpage.txt | 11 | hugetlbpage.txt |
8 | - a brief summary of hugetlbpage support in the Linux kernel. | 12 | - a brief summary of hugetlbpage support in the Linux kernel. |
13 | hwpoison.txt | ||
14 | - explains what hwpoison is | ||
9 | ksm.txt | 15 | ksm.txt |
10 | - how to use the Kernel Samepage Merging feature. | 16 | - how to use the Kernel Samepage Merging feature. |
11 | locking | 17 | locking |
12 | - info on how locking and synchronization is done in the Linux vm code. | 18 | - info on how locking and synchronization is done in the Linux vm code. |
19 | map_hugetlb.c | ||
20 | - an example program that uses the MAP_HUGETLB mmap flag. | ||
13 | numa | 21 | numa |
14 | - information about NUMA specific code in the Linux vm. | 22 | - information about NUMA specific code in the Linux vm. |
15 | numa_memory_policy.txt | 23 | numa_memory_policy.txt |
16 | - documentation of concepts and APIs of the 2.6 memory policy support. | 24 | - documentation of concepts and APIs of the 2.6 memory policy support. |
17 | overcommit-accounting | 25 | overcommit-accounting |
18 | - description of the Linux kernels overcommit handling modes. | 26 | - description of the Linux kernels overcommit handling modes. |
27 | page-types.c | ||
28 | - Tool for querying page flags | ||
19 | page_migration | 29 | page_migration |
20 | - description of page migration in NUMA systems. | 30 | - description of page migration in NUMA systems. |
31 | pagemap.txt | ||
32 | - pagemap, from the userspace perspective | ||
21 | slabinfo.c | 33 | slabinfo.c |
22 | - source code for a tool to get reports about slabs. | 34 | - source code for a tool to get reports about slabs. |
23 | slub.txt | 35 | slub.txt |
24 | - a short users guide for SLUB. | 36 | - a short users guide for SLUB. |
25 | map_hugetlb.c | 37 | unevictable-lru.txt |
26 | - an example program that uses the MAP_HUGETLB mmap flag. | 38 | - Unevictable LRU infrastructure |
diff --git a/Documentation/vm/Makefile b/Documentation/vm/Makefile index 5bd269b3731a..9dcff328b964 100644 --- a/Documentation/vm/Makefile +++ b/Documentation/vm/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | obj- := dummy.o | 2 | obj- := dummy.o |
3 | 3 | ||
4 | # List of programs to build | 4 | # List of programs to build |
5 | hostprogs-y := slabinfo page-types | 5 | hostprogs-y := slabinfo page-types hugepage-mmap hugepage-shm map_hugetlb |
6 | 6 | ||
7 | # Tell kbuild to always build the programs | 7 | # Tell kbuild to always build the programs |
8 | always := $(hostprogs-y) | 8 | always := $(hostprogs-y) |
diff --git a/Documentation/vm/hugepage-mmap.c b/Documentation/vm/hugepage-mmap.c new file mode 100644 index 000000000000..db0dd9a33d54 --- /dev/null +++ b/Documentation/vm/hugepage-mmap.c | |||
@@ -0,0 +1,91 @@ | |||
1 | /* | ||
2 | * hugepage-mmap: | ||
3 | * | ||
4 | * Example of using huge page memory in a user application using the mmap | ||
5 | * system call. Before running this application, make sure that the | ||
6 | * administrator has mounted the hugetlbfs filesystem (on some directory | ||
7 | * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this | ||
8 | * example, the app is requesting memory of size 256MB that is backed by | ||
9 | * huge pages. | ||
10 | * | ||
11 | * For the ia64 architecture, the Linux kernel reserves Region number 4 for | ||
12 | * huge pages. That means that if one requires a fixed address, a huge page | ||
13 | * aligned address starting with 0x800000... will be required. If a fixed | ||
14 | * address is not required, the kernel will select an address in the proper | ||
15 | * range. | ||
16 | * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. | ||
17 | */ | ||
18 | |||
19 | #include <stdlib.h> | ||
20 | #include <stdio.h> | ||
21 | #include <unistd.h> | ||
22 | #include <sys/mman.h> | ||
23 | #include <fcntl.h> | ||
24 | |||
25 | #define FILE_NAME "/mnt/hugepagefile" | ||
26 | #define LENGTH (256UL*1024*1024) | ||
27 | #define PROTECTION (PROT_READ | PROT_WRITE) | ||
28 | |||
29 | /* Only ia64 requires this */ | ||
30 | #ifdef __ia64__ | ||
31 | #define ADDR (void *)(0x8000000000000000UL) | ||
32 | #define FLAGS (MAP_SHARED | MAP_FIXED) | ||
33 | #else | ||
34 | #define ADDR (void *)(0x0UL) | ||
35 | #define FLAGS (MAP_SHARED) | ||
36 | #endif | ||
37 | |||
38 | static void check_bytes(char *addr) | ||
39 | { | ||
40 | printf("First hex is %x\n", *((unsigned int *)addr)); | ||
41 | } | ||
42 | |||
43 | static void write_bytes(char *addr) | ||
44 | { | ||
45 | unsigned long i; | ||
46 | |||
47 | for (i = 0; i < LENGTH; i++) | ||
48 | *(addr + i) = (char)i; | ||
49 | } | ||
50 | |||
51 | static void read_bytes(char *addr) | ||
52 | { | ||
53 | unsigned long i; | ||
54 | |||
55 | check_bytes(addr); | ||
56 | for (i = 0; i < LENGTH; i++) | ||
57 | if (*(addr + i) != (char)i) { | ||
58 | printf("Mismatch at %lu\n", i); | ||
59 | break; | ||
60 | } | ||
61 | } | ||
62 | |||
63 | int main(void) | ||
64 | { | ||
65 | void *addr; | ||
66 | int fd; | ||
67 | |||
68 | fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); | ||
69 | if (fd < 0) { | ||
70 | perror("Open failed"); | ||
71 | exit(1); | ||
72 | } | ||
73 | |||
74 | addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); | ||
75 | if (addr == MAP_FAILED) { | ||
76 | perror("mmap"); | ||
77 | unlink(FILE_NAME); | ||
78 | exit(1); | ||
79 | } | ||
80 | |||
81 | printf("Returned address is %p\n", addr); | ||
82 | check_bytes(addr); | ||
83 | write_bytes(addr); | ||
84 | read_bytes(addr); | ||
85 | |||
86 | munmap(addr, LENGTH); | ||
87 | close(fd); | ||
88 | unlink(FILE_NAME); | ||
89 | |||
90 | return 0; | ||
91 | } | ||
diff --git a/Documentation/vm/hugepage-shm.c b/Documentation/vm/hugepage-shm.c new file mode 100644 index 000000000000..07956d8592c9 --- /dev/null +++ b/Documentation/vm/hugepage-shm.c | |||
@@ -0,0 +1,98 @@ | |||
1 | /* | ||
2 | * hugepage-shm: | ||
3 | * | ||
4 | * Example of using huge page memory in a user application using Sys V shared | ||
5 | * memory system calls. In this example the app is requesting 256MB of | ||
6 | * memory that is backed by huge pages. The application uses the flag | ||
7 | * SHM_HUGETLB in the shmget system call to inform the kernel that it is | ||
8 | * requesting huge pages. | ||
9 | * | ||
10 | * For the ia64 architecture, the Linux kernel reserves Region number 4 for | ||
11 | * huge pages. That means that if one requires a fixed address, a huge page | ||
12 | * aligned address starting with 0x800000... will be required. If a fixed | ||
13 | * address is not required, the kernel will select an address in the proper | ||
14 | * range. | ||
15 | * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. | ||
16 | * | ||
17 | * Note: The default shared memory limit is quite low on many kernels, | ||
18 | * you may need to increase it via: | ||
19 | * | ||
20 | * echo 268435456 > /proc/sys/kernel/shmmax | ||
21 | * | ||
22 | * This will increase the maximum size per shared memory segment to 256MB. | ||
23 | * The other limit that you will hit eventually is shmall which is the | ||
24 | * total amount of shared memory in pages. To set it to 16GB on a system | ||
25 | * with a 4kB pagesize do: | ||
26 | * | ||
27 | * echo 4194304 > /proc/sys/kernel/shmall | ||
28 | */ | ||
29 | |||
30 | #include <stdlib.h> | ||
31 | #include <stdio.h> | ||
32 | #include <sys/types.h> | ||
33 | #include <sys/ipc.h> | ||
34 | #include <sys/shm.h> | ||
35 | #include <sys/mman.h> | ||
36 | |||
37 | #ifndef SHM_HUGETLB | ||
38 | #define SHM_HUGETLB 04000 | ||
39 | #endif | ||
40 | |||
41 | #define LENGTH (256UL*1024*1024) | ||
42 | |||
43 | #define dprintf(x) printf(x) | ||
44 | |||
45 | /* Only ia64 requires this */ | ||
46 | #ifdef __ia64__ | ||
47 | #define ADDR (void *)(0x8000000000000000UL) | ||
48 | #define SHMAT_FLAGS (SHM_RND) | ||
49 | #else | ||
50 | #define ADDR (void *)(0x0UL) | ||
51 | #define SHMAT_FLAGS (0) | ||
52 | #endif | ||
53 | |||
54 | int main(void) | ||
55 | { | ||
56 | int shmid; | ||
57 | unsigned long i; | ||
58 | char *shmaddr; | ||
59 | |||
60 | if ((shmid = shmget(2, LENGTH, | ||
61 | SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) { | ||
62 | perror("shmget"); | ||
63 | exit(1); | ||
64 | } | ||
65 | printf("shmid: 0x%x\n", shmid); | ||
66 | |||
67 | shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS); | ||
68 | if (shmaddr == (char *)-1) { | ||
69 | perror("Shared memory attach failure"); | ||
70 | shmctl(shmid, IPC_RMID, NULL); | ||
71 | exit(2); | ||
72 | } | ||
73 | printf("shmaddr: %p\n", shmaddr); | ||
74 | |||
75 | dprintf("Starting the writes:\n"); | ||
76 | for (i = 0; i < LENGTH; i++) { | ||
77 | shmaddr[i] = (char)(i); | ||
78 | if (!(i % (1024 * 1024))) | ||
79 | dprintf("."); | ||
80 | } | ||
81 | dprintf("\n"); | ||
82 | |||
83 | dprintf("Starting the Check..."); | ||
84 | for (i = 0; i < LENGTH; i++) | ||
85 | if (shmaddr[i] != (char)i) | ||
86 | printf("\nIndex %lu mismatched\n", i); | ||
87 | dprintf("Done.\n"); | ||
88 | |||
89 | if (shmdt((const void *)shmaddr) != 0) { | ||
90 | perror("Detach failure"); | ||
91 | shmctl(shmid, IPC_RMID, NULL); | ||
92 | exit(3); | ||
93 | } | ||
94 | |||
95 | shmctl(shmid, IPC_RMID, NULL); | ||
96 | |||
97 | return 0; | ||
98 | } | ||
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt index bc31636973e3..457634c1e03e 100644 --- a/Documentation/vm/hugetlbpage.txt +++ b/Documentation/vm/hugetlbpage.txt | |||
@@ -299,176 +299,11 @@ map_hugetlb.c. | |||
299 | ******************************************************************* | 299 | ******************************************************************* |
300 | 300 | ||
301 | /* | 301 | /* |
302 | * Example of using huge page memory in a user application using Sys V shared | 302 | * hugepage-shm: see Documentation/vm/hugepage-shm.c |
303 | * memory system calls. In this example the app is requesting 256MB of | ||
304 | * memory that is backed by huge pages. The application uses the flag | ||
305 | * SHM_HUGETLB in the shmget system call to inform the kernel that it is | ||
306 | * requesting huge pages. | ||
307 | * | ||
308 | * For the ia64 architecture, the Linux kernel reserves Region number 4 for | ||
309 | * huge pages. That means that if one requires a fixed address, a huge page | ||
310 | * aligned address starting with 0x800000... will be required. If a fixed | ||
311 | * address is not required, the kernel will select an address in the proper | ||
312 | * range. | ||
313 | * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. | ||
314 | * | ||
315 | * Note: The default shared memory limit is quite low on many kernels, | ||
316 | * you may need to increase it via: | ||
317 | * | ||
318 | * echo 268435456 > /proc/sys/kernel/shmmax | ||
319 | * | ||
320 | * This will increase the maximum size per shared memory segment to 256MB. | ||
321 | * The other limit that you will hit eventually is shmall which is the | ||
322 | * total amount of shared memory in pages. To set it to 16GB on a system | ||
323 | * with a 4kB pagesize do: | ||
324 | * | ||
325 | * echo 4194304 > /proc/sys/kernel/shmall | ||
326 | */ | 303 | */ |
327 | #include <stdlib.h> | ||
328 | #include <stdio.h> | ||
329 | #include <sys/types.h> | ||
330 | #include <sys/ipc.h> | ||
331 | #include <sys/shm.h> | ||
332 | #include <sys/mman.h> | ||
333 | |||
334 | #ifndef SHM_HUGETLB | ||
335 | #define SHM_HUGETLB 04000 | ||
336 | #endif | ||
337 | |||
338 | #define LENGTH (256UL*1024*1024) | ||
339 | |||
340 | #define dprintf(x) printf(x) | ||
341 | |||
342 | #define ADDR (void *)(0x0UL) /* let kernel choose address */ | ||
343 | #define SHMAT_FLAGS (0) | ||
344 | |||
345 | int main(void) | ||
346 | { | ||
347 | int shmid; | ||
348 | unsigned long i; | ||
349 | char *shmaddr; | ||
350 | |||
351 | if ((shmid = shmget(2, LENGTH, | ||
352 | SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) { | ||
353 | perror("shmget"); | ||
354 | exit(1); | ||
355 | } | ||
356 | printf("shmid: 0x%x\n", shmid); | ||
357 | |||
358 | shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS); | ||
359 | if (shmaddr == (char *)-1) { | ||
360 | perror("Shared memory attach failure"); | ||
361 | shmctl(shmid, IPC_RMID, NULL); | ||
362 | exit(2); | ||
363 | } | ||
364 | printf("shmaddr: %p\n", shmaddr); | ||
365 | |||
366 | dprintf("Starting the writes:\n"); | ||
367 | for (i = 0; i < LENGTH; i++) { | ||
368 | shmaddr[i] = (char)(i); | ||
369 | if (!(i % (1024 * 1024))) | ||
370 | dprintf("."); | ||
371 | } | ||
372 | dprintf("\n"); | ||
373 | |||
374 | dprintf("Starting the Check..."); | ||
375 | for (i = 0; i < LENGTH; i++) | ||
376 | if (shmaddr[i] != (char)i) | ||
377 | printf("\nIndex %lu mismatched\n", i); | ||
378 | dprintf("Done.\n"); | ||
379 | |||
380 | if (shmdt((const void *)shmaddr) != 0) { | ||
381 | perror("Detach failure"); | ||
382 | shmctl(shmid, IPC_RMID, NULL); | ||
383 | exit(3); | ||
384 | } | ||
385 | |||
386 | shmctl(shmid, IPC_RMID, NULL); | ||
387 | |||
388 | return 0; | ||
389 | } | ||
390 | 304 | ||
391 | ******************************************************************* | 305 | ******************************************************************* |
392 | 306 | ||
393 | /* | 307 | /* |
394 | * Example of using huge page memory in a user application using the mmap | 308 | * hugepage-mmap: see Documentation/vm/hugepage-mmap.c |
395 | * system call. Before running this application, make sure that the | ||
396 | * administrator has mounted the hugetlbfs filesystem (on some directory | ||
397 | * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this | ||
398 | * example, the app is requesting memory of size 256MB that is backed by | ||
399 | * huge pages. | ||
400 | * | ||
401 | * For the ia64 architecture, the Linux kernel reserves Region number 4 for | ||
402 | * huge pages. That means that if one requires a fixed address, a huge page | ||
403 | * aligned address starting with 0x800000... will be required. If a fixed | ||
404 | * address is not required, the kernel will select an address in the proper | ||
405 | * range. | ||
406 | * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. | ||
407 | */ | 309 | */ |
408 | #include <stdlib.h> | ||
409 | #include <stdio.h> | ||
410 | #include <unistd.h> | ||
411 | #include <sys/mman.h> | ||
412 | #include <fcntl.h> | ||
413 | |||
414 | #define FILE_NAME "/mnt/hugepagefile" | ||
415 | #define LENGTH (256UL*1024*1024) | ||
416 | #define PROTECTION (PROT_READ | PROT_WRITE) | ||
417 | |||
418 | #define ADDR (void *)(0x0UL) /* let kernel choose address */ | ||
419 | #define FLAGS (MAP_SHARED) | ||
420 | |||
421 | void check_bytes(char *addr) | ||
422 | { | ||
423 | printf("First hex is %x\n", *((unsigned int *)addr)); | ||
424 | } | ||
425 | |||
426 | void write_bytes(char *addr) | ||
427 | { | ||
428 | unsigned long i; | ||
429 | |||
430 | for (i = 0; i < LENGTH; i++) | ||
431 | *(addr + i) = (char)i; | ||
432 | } | ||
433 | |||
434 | void read_bytes(char *addr) | ||
435 | { | ||
436 | unsigned long i; | ||
437 | |||
438 | check_bytes(addr); | ||
439 | for (i = 0; i < LENGTH; i++) | ||
440 | if (*(addr + i) != (char)i) { | ||
441 | printf("Mismatch at %lu\n", i); | ||
442 | break; | ||
443 | } | ||
444 | } | ||
445 | |||
446 | int main(void) | ||
447 | { | ||
448 | void *addr; | ||
449 | int fd; | ||
450 | |||
451 | fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); | ||
452 | if (fd < 0) { | ||
453 | perror("Open failed"); | ||
454 | exit(1); | ||
455 | } | ||
456 | |||
457 | addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); | ||
458 | if (addr == MAP_FAILED) { | ||
459 | perror("mmap"); | ||
460 | unlink(FILE_NAME); | ||
461 | exit(1); | ||
462 | } | ||
463 | |||
464 | printf("Returned address is %p\n", addr); | ||
465 | check_bytes(addr); | ||
466 | write_bytes(addr); | ||
467 | read_bytes(addr); | ||
468 | |||
469 | munmap(addr, LENGTH); | ||
470 | close(fd); | ||
471 | unlink(FILE_NAME); | ||
472 | |||
473 | return 0; | ||
474 | } | ||
diff --git a/Documentation/vm/map_hugetlb.c b/Documentation/vm/map_hugetlb.c index e2bdae37f499..9969c7d9f985 100644 --- a/Documentation/vm/map_hugetlb.c +++ b/Documentation/vm/map_hugetlb.c | |||
@@ -31,12 +31,12 @@ | |||
31 | #define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) | 31 | #define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) |
32 | #endif | 32 | #endif |
33 | 33 | ||
34 | void check_bytes(char *addr) | 34 | static void check_bytes(char *addr) |
35 | { | 35 | { |
36 | printf("First hex is %x\n", *((unsigned int *)addr)); | 36 | printf("First hex is %x\n", *((unsigned int *)addr)); |
37 | } | 37 | } |
38 | 38 | ||
39 | void write_bytes(char *addr) | 39 | static void write_bytes(char *addr) |
40 | { | 40 | { |
41 | unsigned long i; | 41 | unsigned long i; |
42 | 42 | ||
@@ -44,7 +44,7 @@ void write_bytes(char *addr) | |||
44 | *(addr + i) = (char)i; | 44 | *(addr + i) = (char)i; |
45 | } | 45 | } |
46 | 46 | ||
47 | void read_bytes(char *addr) | 47 | static void read_bytes(char *addr) |
48 | { | 48 | { |
49 | unsigned long i; | 49 | unsigned long i; |
50 | 50 | ||
diff --git a/Documentation/voyager.txt b/Documentation/voyager.txt deleted file mode 100644 index 2749af552cdf..000000000000 --- a/Documentation/voyager.txt +++ /dev/null | |||
@@ -1,95 +0,0 @@ | |||
1 | Running Linux on the Voyager Architecture | ||
2 | ========================================= | ||
3 | |||
4 | For full details and current project status, see | ||
5 | |||
6 | http://www.hansenpartnership.com/voyager | ||
7 | |||
8 | The voyager architecture was designed by NCR in the mid 80s to be a | ||
9 | fully SMP capable RAS computing architecture built around intel's 486 | ||
10 | chip set. The voyager came in three levels of architectural | ||
11 | sophistication: 3,4 and 5 --- 1 and 2 never made it out of prototype. | ||
12 | The linux patches support only the Level 5 voyager architecture (any | ||
13 | machine class 3435 and above). | ||
14 | |||
15 | The Voyager Architecture | ||
16 | ------------------------ | ||
17 | |||
18 | Voyager machines consist of a Baseboard with a 386 diagnostic | ||
19 | processor, a Power Supply Interface (PSI) a Primary and possibly | ||
20 | Secondary Microchannel bus and between 2 and 20 voyager slots. The | ||
21 | voyager slots can be populated with memory and cpu cards (up to 4GB | ||
22 | memory and from 1 486 to 32 Pentium Pro processors). Internally, the | ||
23 | voyager has a dual arbitrated system bus and a configuration and test | ||
24 | bus (CAT). The voyager bus speed is 40MHz. Therefore (since all | ||
25 | voyager cards are dual ported for each system bus) the maximum | ||
26 | transfer rate is 320Mb/s but only if you have your slot configuration | ||
27 | tuned (only memory cards can communicate with both busses at once, CPU | ||
28 | cards utilise them one at a time). | ||
29 | |||
30 | Voyager SMP | ||
31 | ----------- | ||
32 | |||
33 | Since voyager was the first intel based SMP system, it is slightly | ||
34 | more primitive than the Intel IO-APIC approach to SMP. Voyager allows | ||
35 | arbitrary interrupt routing (including processor affinity routing) of | ||
36 | all 16 PC type interrupts. However it does this by using a modified | ||
37 | 5259 master/slave chip set instead of an APIC bus. Additionally, | ||
38 | voyager supports Cross Processor Interrupts (CPI) equivalent to the | ||
39 | APIC IPIs. There are two routed voyager interrupt lines provided to | ||
40 | each slot. | ||
41 | |||
42 | Processor Cards | ||
43 | --------------- | ||
44 | |||
45 | These come in single, dyadic and quad configurations (the quads are | ||
46 | problematic--see later). The maximum configuration is 8 quad cards | ||
47 | for 32 way SMP. | ||
48 | |||
49 | Quad Processors | ||
50 | --------------- | ||
51 | |||
52 | Because voyager only supplies two interrupt lines to each Processor | ||
53 | card, the Quad processors have to be configured (and Bootstrapped) in | ||
54 | as a pair of Master/Slave processors. | ||
55 | |||
56 | In fact, most Quad cards only accept one VIC interrupt line, so they | ||
57 | have one interrupt handling processor (called the VIC extended | ||
58 | processor) and three non-interrupt handling processors. | ||
59 | |||
60 | Current Status | ||
61 | -------------- | ||
62 | |||
63 | The System will boot on Mono, Dyad and Quad cards. There was | ||
64 | originally a Quad boot problem which has been fixed by proper gdt | ||
65 | alignment in the initial boot loader. If you still cannot get your | ||
66 | voyager system to boot, email me at: | ||
67 | |||
68 | <J.E.J.Bottomley@HansenPartnership.com> | ||
69 | |||
70 | |||
71 | The Quad cards now support using the separate Quad CPI vectors instead | ||
72 | of going through the VIC mailbox system. | ||
73 | |||
74 | The Level 4 architecture (3430 and 3360 Machines) should also work | ||
75 | fine. | ||
76 | |||
77 | Dump Switch | ||
78 | ----------- | ||
79 | |||
80 | The voyager dump switch sends out a broadcast NMI which the voyager | ||
81 | code intercepts and does a task dump. | ||
82 | |||
83 | Power Switch | ||
84 | ------------ | ||
85 | |||
86 | The front panel power switch is intercepted by the kernel and should | ||
87 | cause a system shutdown and power off. | ||
88 | |||
89 | A Note About Mixed CPU Systems | ||
90 | ------------------------------ | ||
91 | |||
92 | Linux isn't designed to handle mixed CPU systems very well. In order | ||
93 | to get everything going you *must* make sure that your lowest | ||
94 | capability CPU is used for booting. Also, mixing CPU classes | ||
95 | (e.g. 486 and 586) is really not going to work very well at all. | ||