diff options
Diffstat (limited to 'Documentation')
25 files changed, 1382 insertions, 546 deletions
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 805db4b2cba6..cc7a8c39fb6f 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt | |||
@@ -26,7 +26,7 @@ Part Ia - Using large dma-coherent buffers | |||
26 | 26 | ||
27 | void * | 27 | void * |
28 | dma_alloc_coherent(struct device *dev, size_t size, | 28 | dma_alloc_coherent(struct device *dev, size_t size, |
29 | dma_addr_t *dma_handle, int flag) | 29 | dma_addr_t *dma_handle, gfp_t flag) |
30 | void * | 30 | void * |
31 | pci_alloc_consistent(struct pci_dev *dev, size_t size, | 31 | pci_alloc_consistent(struct pci_dev *dev, size_t size, |
32 | dma_addr_t *dma_handle) | 32 | dma_addr_t *dma_handle) |
@@ -38,7 +38,7 @@ to make sure to flush the processor's write buffers before telling | |||
38 | devices to read that memory.) | 38 | devices to read that memory.) |
39 | 39 | ||
40 | This routine allocates a region of <size> bytes of consistent memory. | 40 | This routine allocates a region of <size> bytes of consistent memory. |
41 | it also returns a <dma_handle> which may be cast to an unsigned | 41 | It also returns a <dma_handle> which may be cast to an unsigned |
42 | integer the same width as the bus and used as the physical address | 42 | integer the same width as the bus and used as the physical address |
43 | base of the region. | 43 | base of the region. |
44 | 44 | ||
@@ -52,21 +52,21 @@ The simplest way to do that is to use the dma_pool calls (see below). | |||
52 | 52 | ||
53 | The flag parameter (dma_alloc_coherent only) allows the caller to | 53 | The flag parameter (dma_alloc_coherent only) allows the caller to |
54 | specify the GFP_ flags (see kmalloc) for the allocation (the | 54 | specify the GFP_ flags (see kmalloc) for the allocation (the |
55 | implementation may chose to ignore flags that affect the location of | 55 | implementation may choose to ignore flags that affect the location of |
56 | the returned memory, like GFP_DMA). For pci_alloc_consistent, you | 56 | the returned memory, like GFP_DMA). For pci_alloc_consistent, you |
57 | must assume GFP_ATOMIC behaviour. | 57 | must assume GFP_ATOMIC behaviour. |
58 | 58 | ||
59 | void | 59 | void |
60 | dma_free_coherent(struct device *dev, size_t size, void *cpu_addr | 60 | dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, |
61 | dma_addr_t dma_handle) | 61 | dma_addr_t dma_handle) |
62 | void | 62 | void |
63 | pci_free_consistent(struct pci_dev *dev, size_t size, void *cpu_addr | 63 | pci_free_consistent(struct pci_dev *dev, size_t size, void *cpu_addr, |
64 | dma_addr_t dma_handle) | 64 | dma_addr_t dma_handle) |
65 | 65 | ||
66 | Free the region of consistent memory you previously allocated. dev, | 66 | Free the region of consistent memory you previously allocated. dev, |
67 | size and dma_handle must all be the same as those passed into the | 67 | size and dma_handle must all be the same as those passed into the |
68 | consistent allocate. cpu_addr must be the virtual address returned by | 68 | consistent allocate. cpu_addr must be the virtual address returned by |
69 | the consistent allocate | 69 | the consistent allocate. |
70 | 70 | ||
71 | 71 | ||
72 | Part Ib - Using small dma-coherent buffers | 72 | Part Ib - Using small dma-coherent buffers |
@@ -77,9 +77,9 @@ To get this part of the dma_ API, you must #include <linux/dmapool.h> | |||
77 | Many drivers need lots of small dma-coherent memory regions for DMA | 77 | Many drivers need lots of small dma-coherent memory regions for DMA |
78 | descriptors or I/O buffers. Rather than allocating in units of a page | 78 | descriptors or I/O buffers. Rather than allocating in units of a page |
79 | or more using dma_alloc_coherent(), you can use DMA pools. These work | 79 | or more using dma_alloc_coherent(), you can use DMA pools. These work |
80 | much like a struct kmem_cache, except that they use the dma-coherent allocator | 80 | much like a struct kmem_cache, except that they use the dma-coherent allocator, |
81 | not __get_free_pages(). Also, they understand common hardware constraints | 81 | not __get_free_pages(). Also, they understand common hardware constraints |
82 | for alignment, like queue heads needing to be aligned on N byte boundaries. | 82 | for alignment, like queue heads needing to be aligned on N-byte boundaries. |
83 | 83 | ||
84 | 84 | ||
85 | struct dma_pool * | 85 | struct dma_pool * |
@@ -102,15 +102,15 @@ crossing restrictions, pass 0 for alloc; passing 4096 says memory allocated | |||
102 | from this pool must not cross 4KByte boundaries. | 102 | from this pool must not cross 4KByte boundaries. |
103 | 103 | ||
104 | 104 | ||
105 | void *dma_pool_alloc(struct dma_pool *pool, int gfp_flags, | 105 | void *dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags, |
106 | dma_addr_t *dma_handle); | 106 | dma_addr_t *dma_handle); |
107 | 107 | ||
108 | void *pci_pool_alloc(struct pci_pool *pool, int gfp_flags, | 108 | void *pci_pool_alloc(struct pci_pool *pool, gfp_t gfp_flags, |
109 | dma_addr_t *dma_handle); | 109 | dma_addr_t *dma_handle); |
110 | 110 | ||
111 | This allocates memory from the pool; the returned memory will meet the size | 111 | This allocates memory from the pool; the returned memory will meet the size |
112 | and alignment requirements specified at creation time. Pass GFP_ATOMIC to | 112 | and alignment requirements specified at creation time. Pass GFP_ATOMIC to |
113 | prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks) | 113 | prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks), |
114 | pass GFP_KERNEL to allow blocking. Like dma_alloc_coherent(), this returns | 114 | pass GFP_KERNEL to allow blocking. Like dma_alloc_coherent(), this returns |
115 | two values: an address usable by the cpu, and the dma address usable by the | 115 | two values: an address usable by the cpu, and the dma address usable by the |
116 | pool's device. | 116 | pool's device. |
@@ -123,7 +123,7 @@ pool's device. | |||
123 | dma_addr_t addr); | 123 | dma_addr_t addr); |
124 | 124 | ||
125 | This puts memory back into the pool. The pool is what was passed to | 125 | This puts memory back into the pool. The pool is what was passed to |
126 | the pool allocation routine; the cpu and dma addresses are what | 126 | the pool allocation routine; the cpu (vaddr) and dma addresses are what |
127 | were returned when that routine allocated the memory being freed. | 127 | were returned when that routine allocated the memory being freed. |
128 | 128 | ||
129 | 129 | ||
@@ -209,18 +209,18 @@ Notes: Not all memory regions in a machine can be mapped by this | |||
209 | API. Further, regions that appear to be physically contiguous in | 209 | API. Further, regions that appear to be physically contiguous in |
210 | kernel virtual space may not be contiguous as physical memory. Since | 210 | kernel virtual space may not be contiguous as physical memory. Since |
211 | this API does not provide any scatter/gather capability, it will fail | 211 | this API does not provide any scatter/gather capability, it will fail |
212 | if the user tries to map a non physically contiguous piece of memory. | 212 | if the user tries to map a non-physically contiguous piece of memory. |
213 | For this reason, it is recommended that memory mapped by this API be | 213 | For this reason, it is recommended that memory mapped by this API be |
214 | obtained only from sources which guarantee to be physically contiguous | 214 | obtained only from sources which guarantee it to be physically contiguous |
215 | (like kmalloc). | 215 | (like kmalloc). |
216 | 216 | ||
217 | Further, the physical address of the memory must be within the | 217 | Further, the physical address of the memory must be within the |
218 | dma_mask of the device (the dma_mask represents a bit mask of the | 218 | dma_mask of the device (the dma_mask represents a bit mask of the |
219 | addressable region for the device. i.e. if the physical address of | 219 | addressable region for the device. I.e., if the physical address of |
220 | the memory anded with the dma_mask is still equal to the physical | 220 | the memory anded with the dma_mask is still equal to the physical |
221 | address, then the device can perform DMA to the memory). In order to | 221 | address, then the device can perform DMA to the memory). In order to |
222 | ensure that the memory allocated by kmalloc is within the dma_mask, | 222 | ensure that the memory allocated by kmalloc is within the dma_mask, |
223 | the driver may specify various platform dependent flags to restrict | 223 | the driver may specify various platform-dependent flags to restrict |
224 | the physical memory range of the allocation (e.g. on x86, GFP_DMA | 224 | the physical memory range of the allocation (e.g. on x86, GFP_DMA |
225 | guarantees to be within the first 16Mb of available physical memory, | 225 | guarantees to be within the first 16Mb of available physical memory, |
226 | as required by ISA devices). | 226 | as required by ISA devices). |
@@ -244,14 +244,14 @@ are guaranteed also to be cache line boundaries). | |||
244 | 244 | ||
245 | DMA_TO_DEVICE synchronisation must be done after the last modification | 245 | DMA_TO_DEVICE synchronisation must be done after the last modification |
246 | of the memory region by the software and before it is handed off to | 246 | of the memory region by the software and before it is handed off to |
247 | the driver. Once this primitive is used. Memory covered by this | 247 | the driver. Once this primitive is used, memory covered by this |
248 | primitive should be treated as read only by the device. If the device | 248 | primitive should be treated as read-only by the device. If the device |
249 | may write to it at any point, it should be DMA_BIDIRECTIONAL (see | 249 | may write to it at any point, it should be DMA_BIDIRECTIONAL (see |
250 | below). | 250 | below). |
251 | 251 | ||
252 | DMA_FROM_DEVICE synchronisation must be done before the driver | 252 | DMA_FROM_DEVICE synchronisation must be done before the driver |
253 | accesses data that may be changed by the device. This memory should | 253 | accesses data that may be changed by the device. This memory should |
254 | be treated as read only by the driver. If the driver needs to write | 254 | be treated as read-only by the driver. If the driver needs to write |
255 | to it at any point, it should be DMA_BIDIRECTIONAL (see below). | 255 | to it at any point, it should be DMA_BIDIRECTIONAL (see below). |
256 | 256 | ||
257 | DMA_BIDIRECTIONAL requires special handling: it means that the driver | 257 | DMA_BIDIRECTIONAL requires special handling: it means that the driver |
@@ -261,7 +261,7 @@ you must always sync bidirectional memory twice: once before the | |||
261 | memory is handed off to the device (to make sure all memory changes | 261 | memory is handed off to the device (to make sure all memory changes |
262 | are flushed from the processor) and once before the data may be | 262 | are flushed from the processor) and once before the data may be |
263 | accessed after being used by the device (to make sure any processor | 263 | accessed after being used by the device (to make sure any processor |
264 | cache lines are updated with data that the device may have changed. | 264 | cache lines are updated with data that the device may have changed). |
265 | 265 | ||
266 | void | 266 | void |
267 | dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, | 267 | dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, |
@@ -302,8 +302,8 @@ pci_dma_mapping_error(dma_addr_t dma_addr) | |||
302 | 302 | ||
303 | In some circumstances dma_map_single and dma_map_page will fail to create | 303 | In some circumstances dma_map_single and dma_map_page will fail to create |
304 | a mapping. A driver can check for these errors by testing the returned | 304 | a mapping. A driver can check for these errors by testing the returned |
305 | dma address with dma_mapping_error(). A non zero return value means the mapping | 305 | dma address with dma_mapping_error(). A non-zero return value means the mapping |
306 | could not be created and the driver should take appropriate action (eg | 306 | could not be created and the driver should take appropriate action (e.g. |
307 | reduce current DMA mapping usage or delay and try again later). | 307 | reduce current DMA mapping usage or delay and try again later). |
308 | 308 | ||
309 | int | 309 | int |
@@ -315,7 +315,7 @@ reduce current DMA mapping usage or delay and try again later). | |||
315 | 315 | ||
316 | Maps a scatter gather list from the block layer. | 316 | Maps a scatter gather list from the block layer. |
317 | 317 | ||
318 | Returns: the number of physical segments mapped (this may be shorted | 318 | Returns: the number of physical segments mapped (this may be shorter |
319 | than <nents> passed in if the block layer determines that some | 319 | than <nents> passed in if the block layer determines that some |
320 | elements of the scatter/gather list are physically adjacent and thus | 320 | elements of the scatter/gather list are physically adjacent and thus |
321 | may be mapped with a single entry). | 321 | may be mapped with a single entry). |
@@ -357,7 +357,7 @@ accessed sg->address and sg->length as shown above. | |||
357 | pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, | 357 | pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, |
358 | int nents, int direction) | 358 | int nents, int direction) |
359 | 359 | ||
360 | unmap the previously mapped scatter/gather list. All the parameters | 360 | Unmap the previously mapped scatter/gather list. All the parameters |
361 | must be the same as those and passed in to the scatter/gather mapping | 361 | must be the same as those and passed in to the scatter/gather mapping |
362 | API. | 362 | API. |
363 | 363 | ||
@@ -377,7 +377,7 @@ void | |||
377 | pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg, | 377 | pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg, |
378 | int nelems, int direction) | 378 | int nelems, int direction) |
379 | 379 | ||
380 | synchronise a single contiguous or scatter/gather mapping. All the | 380 | Synchronise a single contiguous or scatter/gather mapping. All the |
381 | parameters must be the same as those passed into the single mapping | 381 | parameters must be the same as those passed into the single mapping |
382 | API. | 382 | API. |
383 | 383 | ||
@@ -406,7 +406,7 @@ API at all. | |||
406 | 406 | ||
407 | void * | 407 | void * |
408 | dma_alloc_noncoherent(struct device *dev, size_t size, | 408 | dma_alloc_noncoherent(struct device *dev, size_t size, |
409 | dma_addr_t *dma_handle, int flag) | 409 | dma_addr_t *dma_handle, gfp_t flag) |
410 | 410 | ||
411 | Identical to dma_alloc_coherent() except that the platform will | 411 | Identical to dma_alloc_coherent() except that the platform will |
412 | choose to return either consistent or non-consistent memory as it sees | 412 | choose to return either consistent or non-consistent memory as it sees |
@@ -426,34 +426,34 @@ void | |||
426 | dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr, | 426 | dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr, |
427 | dma_addr_t dma_handle) | 427 | dma_addr_t dma_handle) |
428 | 428 | ||
429 | free memory allocated by the nonconsistent API. All parameters must | 429 | Free memory allocated by the nonconsistent API. All parameters must |
430 | be identical to those passed in (and returned by | 430 | be identical to those passed in (and returned by |
431 | dma_alloc_noncoherent()). | 431 | dma_alloc_noncoherent()). |
432 | 432 | ||
433 | int | 433 | int |
434 | dma_is_consistent(struct device *dev, dma_addr_t dma_handle) | 434 | dma_is_consistent(struct device *dev, dma_addr_t dma_handle) |
435 | 435 | ||
436 | returns true if the device dev is performing consistent DMA on the memory | 436 | Returns true if the device dev is performing consistent DMA on the memory |
437 | area pointed to by the dma_handle. | 437 | area pointed to by the dma_handle. |
438 | 438 | ||
439 | int | 439 | int |
440 | dma_get_cache_alignment(void) | 440 | dma_get_cache_alignment(void) |
441 | 441 | ||
442 | returns the processor cache alignment. This is the absolute minimum | 442 | Returns the processor cache alignment. This is the absolute minimum |
443 | alignment *and* width that you must observe when either mapping | 443 | alignment *and* width that you must observe when either mapping |
444 | memory or doing partial flushes. | 444 | memory or doing partial flushes. |
445 | 445 | ||
446 | Notes: This API may return a number *larger* than the actual cache | 446 | Notes: This API may return a number *larger* than the actual cache |
447 | line, but it will guarantee that one or more cache lines fit exactly | 447 | line, but it will guarantee that one or more cache lines fit exactly |
448 | into the width returned by this call. It will also always be a power | 448 | into the width returned by this call. It will also always be a power |
449 | of two for easy alignment | 449 | of two for easy alignment. |
450 | 450 | ||
451 | void | 451 | void |
452 | dma_sync_single_range(struct device *dev, dma_addr_t dma_handle, | 452 | dma_sync_single_range(struct device *dev, dma_addr_t dma_handle, |
453 | unsigned long offset, size_t size, | 453 | unsigned long offset, size_t size, |
454 | enum dma_data_direction direction) | 454 | enum dma_data_direction direction) |
455 | 455 | ||
456 | does a partial sync. starting at offset and continuing for size. You | 456 | Does a partial sync, starting at offset and continuing for size. You |
457 | must be careful to observe the cache alignment and width when doing | 457 | must be careful to observe the cache alignment and width when doing |
458 | anything like this. You must also be extra careful about accessing | 458 | anything like this. You must also be extra careful about accessing |
459 | memory you intend to sync partially. | 459 | memory you intend to sync partially. |
@@ -472,21 +472,20 @@ dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, | |||
472 | dma_addr_t device_addr, size_t size, int | 472 | dma_addr_t device_addr, size_t size, int |
473 | flags) | 473 | flags) |
474 | 474 | ||
475 | |||
476 | Declare region of memory to be handed out by dma_alloc_coherent when | 475 | Declare region of memory to be handed out by dma_alloc_coherent when |
477 | it's asked for coherent memory for this device. | 476 | it's asked for coherent memory for this device. |
478 | 477 | ||
479 | bus_addr is the physical address to which the memory is currently | 478 | bus_addr is the physical address to which the memory is currently |
480 | assigned in the bus responding region (this will be used by the | 479 | assigned in the bus responding region (this will be used by the |
481 | platform to perform the mapping) | 480 | platform to perform the mapping). |
482 | 481 | ||
483 | device_addr is the physical address the device needs to be programmed | 482 | device_addr is the physical address the device needs to be programmed |
484 | with actually to address this memory (this will be handed out as the | 483 | with actually to address this memory (this will be handed out as the |
485 | dma_addr_t in dma_alloc_coherent()) | 484 | dma_addr_t in dma_alloc_coherent()). |
486 | 485 | ||
487 | size is the size of the area (must be multiples of PAGE_SIZE). | 486 | size is the size of the area (must be multiples of PAGE_SIZE). |
488 | 487 | ||
489 | flags can be or'd together and are | 488 | flags can be or'd together and are: |
490 | 489 | ||
491 | DMA_MEMORY_MAP - request that the memory returned from | 490 | DMA_MEMORY_MAP - request that the memory returned from |
492 | dma_alloc_coherent() be directly writable. | 491 | dma_alloc_coherent() be directly writable. |
@@ -494,7 +493,7 @@ dma_alloc_coherent() be directly writable. | |||
494 | DMA_MEMORY_IO - request that the memory returned from | 493 | DMA_MEMORY_IO - request that the memory returned from |
495 | dma_alloc_coherent() be addressable using read/write/memcpy_toio etc. | 494 | dma_alloc_coherent() be addressable using read/write/memcpy_toio etc. |
496 | 495 | ||
497 | One or both of these flags must be present | 496 | One or both of these flags must be present. |
498 | 497 | ||
499 | DMA_MEMORY_INCLUDES_CHILDREN - make the declared memory be allocated by | 498 | DMA_MEMORY_INCLUDES_CHILDREN - make the declared memory be allocated by |
500 | dma_alloc_coherent of any child devices of this one (for memory residing | 499 | dma_alloc_coherent of any child devices of this one (for memory residing |
@@ -528,7 +527,7 @@ dma_release_declared_memory(struct device *dev) | |||
528 | Remove the memory region previously declared from the system. This | 527 | Remove the memory region previously declared from the system. This |
529 | API performs *no* in-use checking for this region and will return | 528 | API performs *no* in-use checking for this region and will return |
530 | unconditionally having removed all the required structures. It is the | 529 | unconditionally having removed all the required structures. It is the |
531 | drivers job to ensure that no parts of this memory region are | 530 | driver's job to ensure that no parts of this memory region are |
532 | currently in use. | 531 | currently in use. |
533 | 532 | ||
534 | void * | 533 | void * |
@@ -538,12 +537,10 @@ dma_mark_declared_memory_occupied(struct device *dev, | |||
538 | This is used to occupy specific regions of the declared space | 537 | This is used to occupy specific regions of the declared space |
539 | (dma_alloc_coherent() will hand out the first free region it finds). | 538 | (dma_alloc_coherent() will hand out the first free region it finds). |
540 | 539 | ||
541 | device_addr is the *device* address of the region requested | 540 | device_addr is the *device* address of the region requested. |
542 | 541 | ||
543 | size is the size (and should be a page sized multiple). | 542 | size is the size (and should be a page-sized multiple). |
544 | 543 | ||
545 | The return value will be either a pointer to the processor virtual | 544 | The return value will be either a pointer to the processor virtual |
546 | address of the memory, or an error (via PTR_ERR()) if any part of the | 545 | address of the memory, or an error (via PTR_ERR()) if any part of the |
547 | region is occupied. | 546 | region is occupied. |
548 | |||
549 | |||
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index eb42bf9847cb..b886f52a9aac 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl | |||
@@ -380,7 +380,6 @@ X!Edrivers/base/interface.c | |||
380 | !Edrivers/base/bus.c | 380 | !Edrivers/base/bus.c |
381 | </sect1> | 381 | </sect1> |
382 | <sect1><title>Device Drivers Power Management</title> | 382 | <sect1><title>Device Drivers Power Management</title> |
383 | !Edrivers/base/power/main.c | ||
384 | !Edrivers/base/power/resume.c | 383 | !Edrivers/base/power/resume.c |
385 | !Edrivers/base/power/suspend.c | 384 | !Edrivers/base/power/suspend.c |
386 | </sect1> | 385 | </sect1> |
@@ -398,12 +397,12 @@ X!Edrivers/acpi/pci_bind.c | |||
398 | --> | 397 | --> |
399 | </sect1> | 398 | </sect1> |
400 | <sect1><title>Device drivers PnP support</title> | 399 | <sect1><title>Device drivers PnP support</title> |
401 | !Edrivers/pnp/core.c | 400 | !Idrivers/pnp/core.c |
402 | <!-- No correct structured comments | 401 | <!-- No correct structured comments |
403 | X!Edrivers/pnp/system.c | 402 | X!Edrivers/pnp/system.c |
404 | --> | 403 | --> |
405 | !Edrivers/pnp/card.c | 404 | !Edrivers/pnp/card.c |
406 | !Edrivers/pnp/driver.c | 405 | !Idrivers/pnp/driver.c |
407 | !Edrivers/pnp/manager.c | 406 | !Edrivers/pnp/manager.c |
408 | !Edrivers/pnp/support.c | 407 | !Edrivers/pnp/support.c |
409 | </sect1> | 408 | </sect1> |
@@ -704,14 +703,22 @@ X!Idrivers/video/console/fonts.c | |||
704 | 703 | ||
705 | <chapter id="splice"> | 704 | <chapter id="splice"> |
706 | <title>splice API</title> | 705 | <title>splice API</title> |
707 | <para>) | 706 | <para> |
708 | splice is a method for moving blocks of data around inside the | 707 | splice is a method for moving blocks of data around inside the |
709 | kernel, without continually transferring it between the kernel | 708 | kernel, without continually transferring them between the kernel |
710 | and user space. | 709 | and user space. |
711 | </para> | 710 | </para> |
712 | !Iinclude/linux/splice.h | ||
713 | !Ffs/splice.c | 711 | !Ffs/splice.c |
714 | </chapter> | 712 | </chapter> |
715 | 713 | ||
714 | <chapter id="pipes"> | ||
715 | <title>pipes API</title> | ||
716 | <para> | ||
717 | Pipe interfaces are all for in-kernel (builtin image) use. | ||
718 | They are not exported for use by modules. | ||
719 | </para> | ||
720 | !Iinclude/linux/pipe_fs_i.h | ||
721 | !Ffs/pipe.c | ||
722 | </chapter> | ||
716 | 723 | ||
717 | </book> | 724 | </book> |
diff --git a/Documentation/block/barrier.txt b/Documentation/block/barrier.txt index 7d279f2f5bb2..2c2f24f634e4 100644 --- a/Documentation/block/barrier.txt +++ b/Documentation/block/barrier.txt | |||
@@ -79,9 +79,9 @@ and how to prepare flush requests. Note that the term 'ordered' is | |||
79 | used to indicate the whole sequence of performing barrier requests | 79 | used to indicate the whole sequence of performing barrier requests |
80 | including draining and flushing. | 80 | including draining and flushing. |
81 | 81 | ||
82 | typedef void (prepare_flush_fn)(request_queue_t *q, struct request *rq); | 82 | typedef void (prepare_flush_fn)(struct request_queue *q, struct request *rq); |
83 | 83 | ||
84 | int blk_queue_ordered(request_queue_t *q, unsigned ordered, | 84 | int blk_queue_ordered(struct request_queue *q, unsigned ordered, |
85 | prepare_flush_fn *prepare_flush_fn); | 85 | prepare_flush_fn *prepare_flush_fn); |
86 | 86 | ||
87 | @q : the queue in question | 87 | @q : the queue in question |
@@ -92,7 +92,7 @@ int blk_queue_ordered(request_queue_t *q, unsigned ordered, | |||
92 | For example, SCSI disk driver's prepare_flush_fn looks like the | 92 | For example, SCSI disk driver's prepare_flush_fn looks like the |
93 | following. | 93 | following. |
94 | 94 | ||
95 | static void sd_prepare_flush(request_queue_t *q, struct request *rq) | 95 | static void sd_prepare_flush(struct request_queue *q, struct request *rq) |
96 | { | 96 | { |
97 | memset(rq->cmd, 0, sizeof(rq->cmd)); | 97 | memset(rq->cmd, 0, sizeof(rq->cmd)); |
98 | rq->cmd_type = REQ_TYPE_BLOCK_PC; | 98 | rq->cmd_type = REQ_TYPE_BLOCK_PC; |
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 3adaace328a6..8af392fc6ef0 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt | |||
@@ -740,12 +740,12 @@ Block now offers some simple generic functionality to help support command | |||
740 | queueing (typically known as tagged command queueing), ie manage more than | 740 | queueing (typically known as tagged command queueing), ie manage more than |
741 | one outstanding command on a queue at any given time. | 741 | one outstanding command on a queue at any given time. |
742 | 742 | ||
743 | blk_queue_init_tags(request_queue_t *q, int depth) | 743 | blk_queue_init_tags(struct request_queue *q, int depth) |
744 | 744 | ||
745 | Initialize internal command tagging structures for a maximum | 745 | Initialize internal command tagging structures for a maximum |
746 | depth of 'depth'. | 746 | depth of 'depth'. |
747 | 747 | ||
748 | blk_queue_free_tags((request_queue_t *q) | 748 | blk_queue_free_tags((struct request_queue *q) |
749 | 749 | ||
750 | Teardown tag info associated with the queue. This will be done | 750 | Teardown tag info associated with the queue. This will be done |
751 | automatically by block if blk_queue_cleanup() is called on a queue | 751 | automatically by block if blk_queue_cleanup() is called on a queue |
@@ -754,7 +754,7 @@ one outstanding command on a queue at any given time. | |||
754 | The above are initialization and exit management, the main helpers during | 754 | The above are initialization and exit management, the main helpers during |
755 | normal operations are: | 755 | normal operations are: |
756 | 756 | ||
757 | blk_queue_start_tag(request_queue_t *q, struct request *rq) | 757 | blk_queue_start_tag(struct request_queue *q, struct request *rq) |
758 | 758 | ||
759 | Start tagged operation for this request. A free tag number between | 759 | Start tagged operation for this request. A free tag number between |
760 | 0 and 'depth' is assigned to the request (rq->tag holds this number), | 760 | 0 and 'depth' is assigned to the request (rq->tag holds this number), |
@@ -762,7 +762,7 @@ normal operations are: | |||
762 | for this queue is already achieved (or if the tag wasn't started for | 762 | for this queue is already achieved (or if the tag wasn't started for |
763 | some other reason), 1 is returned. Otherwise 0 is returned. | 763 | some other reason), 1 is returned. Otherwise 0 is returned. |
764 | 764 | ||
765 | blk_queue_end_tag(request_queue_t *q, struct request *rq) | 765 | blk_queue_end_tag(struct request_queue *q, struct request *rq) |
766 | 766 | ||
767 | End tagged operation on this request. 'rq' is removed from the internal | 767 | End tagged operation on this request. 'rq' is removed from the internal |
768 | book keeping structures. | 768 | book keeping structures. |
@@ -781,7 +781,7 @@ queue. For instance, on IDE any tagged request error needs to clear both | |||
781 | the hardware and software block queue and enable the driver to sanely restart | 781 | the hardware and software block queue and enable the driver to sanely restart |
782 | all the outstanding requests. There's a third helper to do that: | 782 | all the outstanding requests. There's a third helper to do that: |
783 | 783 | ||
784 | blk_queue_invalidate_tags(request_queue_t *q) | 784 | blk_queue_invalidate_tags(struct request_queue *q) |
785 | 785 | ||
786 | Clear the internal block tag queue and re-add all the pending requests | 786 | Clear the internal block tag queue and re-add all the pending requests |
787 | to the request queue. The driver will receive them again on the | 787 | to the request queue. The driver will receive them again on the |
diff --git a/Documentation/block/request.txt b/Documentation/block/request.txt index 75924e2a6975..fff58acb40a3 100644 --- a/Documentation/block/request.txt +++ b/Documentation/block/request.txt | |||
@@ -83,6 +83,6 @@ struct bio *bio DBI First bio in request | |||
83 | 83 | ||
84 | struct bio *biotail DBI Last bio in request | 84 | struct bio *biotail DBI Last bio in request |
85 | 85 | ||
86 | request_queue_t *q DB Request queue this request belongs to | 86 | struct request_queue *q DB Request queue this request belongs to |
87 | 87 | ||
88 | struct request_list *rl B Request list this request came from | 88 | struct request_list *rl B Request list this request came from |
diff --git a/Documentation/dontdiff b/Documentation/dontdiff index 595a5ea4c690..7b9551fc6fe3 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff | |||
@@ -18,6 +18,7 @@ | |||
18 | *.moc | 18 | *.moc |
19 | *.mod.c | 19 | *.mod.c |
20 | *.o | 20 | *.o |
21 | *.o.* | ||
21 | *.orig | 22 | *.orig |
22 | *.out | 23 | *.out |
23 | 24 | ||
@@ -163,6 +164,8 @@ raid6tables.c | |||
163 | relocs | 164 | relocs |
164 | series | 165 | series |
165 | setup | 166 | setup |
167 | setup.bin | ||
168 | setup.elf | ||
166 | sim710_d.h* | 169 | sim710_d.h* |
167 | sImage | 170 | sImage |
168 | sm_tbl* | 171 | sm_tbl* |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index c175eedadb5f..a43d2878a4ef 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -211,22 +211,6 @@ Who: Richard Purdie <rpurdie@rpsys.net> | |||
211 | 211 | ||
212 | --------------------------- | 212 | --------------------------- |
213 | 213 | ||
214 | What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer) | ||
215 | When: December 2007 | ||
216 | Why: These functions are a leftover from 2.4 times. They have several | ||
217 | problems: | ||
218 | - Duplication of checks that are done in the device driver's | ||
219 | interrupt handler | ||
220 | - common I/O layer can't do device specific error recovery | ||
221 | - device driver can't be notified for conditions happening during | ||
222 | execution of the function | ||
223 | Device drivers should issue the read device characteristics and read | ||
224 | configuration data ccws and do the appropriate error handling | ||
225 | themselves. | ||
226 | Who: Cornelia Huck <cornelia.huck@de.ibm.com> | ||
227 | |||
228 | --------------------------- | ||
229 | |||
230 | What: i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers | 214 | What: i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers |
231 | When: September 2007 | 215 | When: September 2007 |
232 | Why: Obsolete. The new i2c-gpio driver replaces all hardware-specific | 216 | Why: Obsolete. The new i2c-gpio driver replaces all hardware-specific |
diff --git a/Documentation/filesystems/hfsplus.txt b/Documentation/filesystems/hfsplus.txt new file mode 100644 index 000000000000..af1628a1061c --- /dev/null +++ b/Documentation/filesystems/hfsplus.txt | |||
@@ -0,0 +1,59 @@ | |||
1 | |||
2 | Macintosh HFSPlus Filesystem for Linux | ||
3 | ====================================== | ||
4 | |||
5 | HFSPlus is a filesystem first introduced in MacOS 8.1. | ||
6 | HFSPlus has several extensions to HFS, including 32-bit allocation | ||
7 | blocks, 255-character unicode filenames, and file sizes of 2^63 bytes. | ||
8 | |||
9 | |||
10 | Mount options | ||
11 | ============= | ||
12 | |||
13 | When mounting an HFSPlus filesystem, the following options are accepted: | ||
14 | |||
15 | creator=cccc, type=cccc | ||
16 | Specifies the creator/type values as shown by the MacOS finder | ||
17 | used for creating new files. Default values: '????'. | ||
18 | |||
19 | uid=n, gid=n | ||
20 | Specifies the user/group that owns all files on the filesystem | ||
21 | that have uninitialized permissions structures. | ||
22 | Default: user/group id of the mounting process. | ||
23 | |||
24 | umask=n | ||
25 | Specifies the umask (in octal) used for files and directories | ||
26 | that have uninitialized permissions structures. | ||
27 | Default: umask of the mounting process. | ||
28 | |||
29 | session=n | ||
30 | Select the CDROM session to mount as HFSPlus filesystem. Defaults to | ||
31 | leaving that decision to the CDROM driver. This option will fail | ||
32 | with anything but a CDROM as underlying devices. | ||
33 | |||
34 | part=n | ||
35 | Select partition number n from the devices. This option only makes | ||
36 | sense for CDROMs because they can't be partitioned under Linux. | ||
37 | For disk devices the generic partition parsing code does this | ||
38 | for us. Defaults to not parsing the partition table at all. | ||
39 | |||
40 | decompose | ||
41 | Decompose file name characters. | ||
42 | |||
43 | nodecompose | ||
44 | Do not decompose file name characters. | ||
45 | |||
46 | force | ||
47 | Used to force write access to volumes that are marked as journalled | ||
48 | or locked. Use at your own risk. | ||
49 | |||
50 | nls=cccc | ||
51 | Encoding to use when presenting file names. | ||
52 | |||
53 | |||
54 | References | ||
55 | ========== | ||
56 | |||
57 | kernel source: <file:fs/hfsplus> | ||
58 | |||
59 | Apple Technote 1150 http://developer.apple.com/technotes/tn/tn1150.html | ||
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index 218a8650f48d..6bc2ba215df9 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt | |||
@@ -148,7 +148,7 @@ pin ... that won't always match the specified output value, because of | |||
148 | issues including wire-OR and output latencies. | 148 | issues including wire-OR and output latencies. |
149 | 149 | ||
150 | The get/set calls have no error returns because "invalid GPIO" should have | 150 | The get/set calls have no error returns because "invalid GPIO" should have |
151 | been reported earlier in gpio_set_direction(). However, note that not all | 151 | been reported earlier from gpio_direction_*(). However, note that not all |
152 | platforms can read the value of output pins; those that can't should always | 152 | platforms can read the value of output pins; those that can't should always |
153 | return zero. Also, using these calls for GPIOs that can't safely be accessed | 153 | return zero. Also, using these calls for GPIOs that can't safely be accessed |
154 | without sleeping (see below) is an error. | 154 | without sleeping (see below) is an error. |
@@ -239,7 +239,7 @@ map between them using calls like: | |||
239 | Those return either the corresponding number in the other namespace, or | 239 | Those return either the corresponding number in the other namespace, or |
240 | else a negative errno code if the mapping can't be done. (For example, | 240 | else a negative errno code if the mapping can't be done. (For example, |
241 | some GPIOs can't used as IRQs.) It is an unchecked error to use a GPIO | 241 | some GPIOs can't used as IRQs.) It is an unchecked error to use a GPIO |
242 | number that hasn't been marked as an input using gpio_set_direction(), or | 242 | number that wasn't set up as an input using gpio_direction_input(), or |
243 | to use an IRQ number that didn't originally come from gpio_to_irq(). | 243 | to use an IRQ number that didn't originally come from gpio_to_irq(). |
244 | 244 | ||
245 | These two mapping calls are expected to cost on the order of a single | 245 | These two mapping calls are expected to cost on the order of a single |
diff --git a/Documentation/hpet.txt b/Documentation/hpet.txt index b7a3dc38dd52..6ad52d9dad6c 100644 --- a/Documentation/hpet.txt +++ b/Documentation/hpet.txt | |||
@@ -5,7 +5,7 @@ for the 8254 and Real Time Clock (RTC) periodic timer functionality. | |||
5 | Each HPET can have up to 32 timers. It is possible to configure the | 5 | Each HPET can have up to 32 timers. It is possible to configure the |
6 | first two timers as legacy replacements for 8254 and RTC periodic timers. | 6 | first two timers as legacy replacements for 8254 and RTC periodic timers. |
7 | A specification done by Intel and Microsoft can be found at | 7 | A specification done by Intel and Microsoft can be found at |
8 | <http://www.intel.com/hardwaredesign/hpetspec.htm>. | 8 | <http://www.intel.com/technology/architecture/hpetspec.htm>. |
9 | 9 | ||
10 | The driver supports detection of HPET driver allocation and initialization | 10 | The driver supports detection of HPET driver allocation and initialization |
11 | of the HPET before the driver module_init routine is called. This enables | 11 | of the HPET before the driver module_init routine is called. This enables |
diff --git a/Documentation/hwmon/adm1031 b/Documentation/hwmon/adm1031 index 130a38382b98..be92a77da1d5 100644 --- a/Documentation/hwmon/adm1031 +++ b/Documentation/hwmon/adm1031 | |||
@@ -6,13 +6,13 @@ Supported chips: | |||
6 | Prefix: 'adm1030' | 6 | Prefix: 'adm1030' |
7 | Addresses scanned: I2C 0x2c to 0x2e | 7 | Addresses scanned: I2C 0x2c to 0x2e |
8 | Datasheet: Publicly available at the Analog Devices website | 8 | Datasheet: Publicly available at the Analog Devices website |
9 | http://products.analog.com/products/info.asp?product=ADM1030 | 9 | http://www.analog.com/en/prod/0%2C2877%2CADM1030%2C00.html |
10 | 10 | ||
11 | * Analog Devices ADM1031 | 11 | * Analog Devices ADM1031 |
12 | Prefix: 'adm1031' | 12 | Prefix: 'adm1031' |
13 | Addresses scanned: I2C 0x2c to 0x2e | 13 | Addresses scanned: I2C 0x2c to 0x2e |
14 | Datasheet: Publicly available at the Analog Devices website | 14 | Datasheet: Publicly available at the Analog Devices website |
15 | http://products.analog.com/products/info.asp?product=ADM1031 | 15 | http://www.analog.com/en/prod/0%2C2877%2CADM1031%2C00.html |
16 | 16 | ||
17 | Authors: | 17 | Authors: |
18 | Alexandre d'Alton <alex@alexdalton.org> | 18 | Alexandre d'Alton <alex@alexdalton.org> |
diff --git a/Documentation/hwmon/thmc50 b/Documentation/hwmon/thmc50 new file mode 100644 index 000000000000..9639ca93d559 --- /dev/null +++ b/Documentation/hwmon/thmc50 | |||
@@ -0,0 +1,74 @@ | |||
1 | Kernel driver thmc50 | ||
2 | ===================== | ||
3 | |||
4 | Supported chips: | ||
5 | * Analog Devices ADM1022 | ||
6 | Prefix: 'adm1022' | ||
7 | Addresses scanned: I2C 0x2c - 0x2e | ||
8 | Datasheet: http://www.analog.com/en/prod/0,2877,ADM1022,00.html | ||
9 | * Texas Instruments THMC50 | ||
10 | Prefix: 'thmc50' | ||
11 | Addresses scanned: I2C 0x2c - 0x2e | ||
12 | Datasheet: http://focus.ti.com/docs/prod/folders/print/thmc50.html | ||
13 | |||
14 | Author: Krzysztof Helt <krzysztof.h1@wp.pl> | ||
15 | |||
16 | This driver was derived from the 2.4 kernel thmc50.c source file. | ||
17 | |||
18 | Credits: | ||
19 | thmc50.c (2.4 kernel): | ||
20 | Frodo Looijaard <frodol@dds.nl> | ||
21 | Philip Edelbrock <phil@netroedge.com> | ||
22 | |||
23 | Module Parameters | ||
24 | ----------------- | ||
25 | |||
26 | * adm1022_temp3: short array | ||
27 | List of adapter,address pairs to force chips into ADM1022 mode with | ||
28 | second remote temperature. This does not work for original THMC50 chips. | ||
29 | |||
30 | Description | ||
31 | ----------- | ||
32 | |||
33 | The THMC50 implements: an internal temperature sensor, support for an | ||
34 | external diode-type temperature sensor (compatible w/ the diode sensor inside | ||
35 | many processors), and a controllable fan/analog_out DAC. For the temperature | ||
36 | sensors, limits can be set through the appropriate Overtemperature Shutdown | ||
37 | register and Hysteresis register. Each value can be set and read to half-degree | ||
38 | accuracy. An alarm is issued (usually to a connected LM78) when the | ||
39 | temperature gets higher then the Overtemperature Shutdown value; it stays on | ||
40 | until the temperature falls below the Hysteresis value. All temperatures are in | ||
41 | degrees Celsius, and are guaranteed within a range of -55 to +125 degrees. | ||
42 | |||
43 | The THMC50 only updates its values each 1.5 seconds; reading it more often | ||
44 | will do no harm, but will return 'old' values. | ||
45 | |||
46 | The THMC50 is usually used in combination with LM78-like chips, to measure | ||
47 | the temperature of the processor(s). | ||
48 | |||
49 | The ADM1022 works the same as THMC50 but it is faster (5 Hz instead of | ||
50 | 1 Hz for THMC50). It can be also put in a new mode to handle additional | ||
51 | remote temperature sensor. The driver use the mode set by BIOS by default. | ||
52 | |||
53 | In case the BIOS is broken and the mode is set incorrectly, you can force | ||
54 | the mode with additional remote temperature with adm1022_temp3 parameter. | ||
55 | A typical symptom of wrong setting is a fan forced to full speed. | ||
56 | |||
57 | Driver Features | ||
58 | --------------- | ||
59 | |||
60 | The driver provides up to three temperatures: | ||
61 | |||
62 | temp1 -- internal | ||
63 | temp2 -- remote | ||
64 | temp3 -- 2nd remote only for ADM1022 | ||
65 | |||
66 | pwm1 -- fan speed (0 = stop, 255 = full) | ||
67 | pwm1_mode -- always 0 (DC mode) | ||
68 | |||
69 | The value of 0 for pwm1 also forces FAN_OFF signal from the chip, | ||
70 | so it stops fans even if the value 0 into the ANALOG_OUT register does not. | ||
71 | |||
72 | The driver was tested on Compaq AP550 with two ADM1022 chips (one works | ||
73 | in the temp3 mode), five temperature readings and two fans. | ||
74 | |||
diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt index 09a1bafe2528..b963c3b4afa5 100644 --- a/Documentation/iostats.txt +++ b/Documentation/iostats.txt | |||
@@ -79,7 +79,7 @@ Field 8 -- # of milliseconds spent writing | |||
79 | measured from __make_request() to end_that_request_last()). | 79 | measured from __make_request() to end_that_request_last()). |
80 | Field 9 -- # of I/Os currently in progress | 80 | Field 9 -- # of I/Os currently in progress |
81 | The only field that should go to zero. Incremented as requests are | 81 | The only field that should go to zero. Incremented as requests are |
82 | given to appropriate request_queue_t and decremented as they finish. | 82 | given to appropriate struct request_queue and decremented as they finish. |
83 | Field 10 -- # of milliseconds spent doing I/Os | 83 | Field 10 -- # of milliseconds spent doing I/Os |
84 | This field is increases so long as field 9 is nonzero. | 84 | This field is increases so long as field 9 is nonzero. |
85 | Field 11 -- weighted # of milliseconds spent doing I/Os | 85 | Field 11 -- weighted # of milliseconds spent doing I/Os |
diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO index b2446a090870..9f08dab1e75b 100644 --- a/Documentation/ja_JP/HOWTO +++ b/Documentation/ja_JP/HOWTO | |||
@@ -1,23 +1,24 @@ | |||
1 | NOTE: | 1 | NOTE: |
2 | This is Japanese translated version of "Documentation/HOWTO". | 2 | This is a version of Documentation/HOWTO translated into Japanese. |
3 | This one is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com> | 3 | This document is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com> |
4 | and JF Project team <www.linux.or.jp/JF>. | 4 | and the JF Project team <www.linux.or.jp/JF>. |
5 | If you find difference with original file or problem in translation, | 5 | If you find any difference between this document and the original file |
6 | please contact maintainer of this file or JF project. | 6 | or a problem with the translation, |
7 | 7 | please contact the maintainer of this file or JF project. | |
8 | Please also note that purpose of this file is easier to read for non | 8 | |
9 | English natives and not to be intended to fork. So, if you have any | 9 | Please also note that the purpose of this file is to be easier to read |
10 | comments or updates of this file, please try to update Original(English) | 10 | for non English (read: Japanese) speakers and is not intended as a |
11 | file at first. | 11 | fork. So if you have any comments or updates for this file, please try |
12 | 12 | to update the original English file first. | |
13 | Last Updated: 2007/06/04 | 13 | |
14 | Last Updated: 2007/07/18 | ||
14 | ================================== | 15 | ================================== |
15 | ã“ã‚Œã¯ã€ | 16 | ã“ã‚Œã¯ã€ |
16 | linux-2.6.21/Documentation/HOWTO | 17 | linux-2.6.22/Documentation/HOWTO |
17 | ã®å’Œè¨³ã§ã™ã€‚ | 18 | ã®å’Œè¨³ã§ã™ã€‚ |
18 | 19 | ||
19 | 翻訳団体: JF プãƒã‚¸ã‚§ã‚¯ãƒˆ < http://www.linux.or.jp/JF/ > | 20 | 翻訳団体: JF プãƒã‚¸ã‚§ã‚¯ãƒˆ < http://www.linux.or.jp/JF/ > |
20 | 翻訳日: 2007/06/04 | 21 | 翻訳日: 2007/07/16 |
21 | 翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com> | 22 | 翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com> |
22 | æ ¡æ£è€…: æ¾å€‰ã•ã‚“ <nbh--mats at nifty dot com> | 23 | æ ¡æ£è€…: æ¾å€‰ã•ã‚“ <nbh--mats at nifty dot com> |
23 | å°æž— é›…å…¸ã•ã‚“ (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp> | 24 | å°æž— é›…å…¸ã•ã‚“ (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp> |
@@ -52,6 +53,7 @@ Linux カーãƒãƒ«é–‹ç™ºã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ã¨å…±ã«æ´»å‹•ã™ã‚‹ã‚„り方をå¦ã | |||
52 | ã¾ãŸã€ã“ã®ã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ãŒãªãœä»Šã†ã¾ãã¾ã‚ã£ã¦ã„ã‚‹ã®ã‹ã¨ã„ã†ç†ç”±ã®ä¸€éƒ¨ã‚‚ | 53 | ã¾ãŸã€ã“ã®ã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ãŒãªãœä»Šã†ã¾ãã¾ã‚ã£ã¦ã„ã‚‹ã®ã‹ã¨ã„ã†ç†ç”±ã®ä¸€éƒ¨ã‚‚ |
53 | 説明ã—よã†ã¨è©¦ã¿ã¦ã„ã¾ã™ã€‚ | 54 | 説明ã—よã†ã¨è©¦ã¿ã¦ã„ã¾ã™ã€‚ |
54 | 55 | ||
56 | |||
55 | カーãƒãƒ«ã¯ å°‘é‡ã®ã‚¢ãƒ¼ã‚テクãƒãƒ£ä¾å˜éƒ¨åˆ†ãŒã‚¢ã‚»ãƒ³ãƒ–リ言語ã§æ›¸ã‹ã‚Œã¦ã„ã‚‹ | 57 | カーãƒãƒ«ã¯ å°‘é‡ã®ã‚¢ãƒ¼ã‚テクãƒãƒ£ä¾å˜éƒ¨åˆ†ãŒã‚¢ã‚»ãƒ³ãƒ–リ言語ã§æ›¸ã‹ã‚Œã¦ã„ã‚‹ |
56 | 以外ã¯å¤§éƒ¨åˆ†ã¯ C 言語ã§æ›¸ã‹ã‚Œã¦ã„ã¾ã™ã€‚C言語をよãç†è§£ã—ã¦ã„ã‚‹ã“ã¨ã¯ã‚«ãƒ¼ | 58 | 以外ã¯å¤§éƒ¨åˆ†ã¯ C 言語ã§æ›¸ã‹ã‚Œã¦ã„ã¾ã™ã€‚C言語をよãç†è§£ã—ã¦ã„ã‚‹ã“ã¨ã¯ã‚«ãƒ¼ |
57 | ãƒãƒ«é–‹ç™ºè€…ã«ã¯å¿…è¦ã§ã™ã€‚アーã‚テクãƒãƒ£å‘ã‘ã®ä½Žãƒ¬ãƒ™ãƒ«éƒ¨åˆ†ã®é–‹ç™ºã‚’ã™ã‚‹ã® | 59 | ãƒãƒ«é–‹ç™ºè€…ã«ã¯å¿…è¦ã§ã™ã€‚アーã‚テクãƒãƒ£å‘ã‘ã®ä½Žãƒ¬ãƒ™ãƒ«éƒ¨åˆ†ã®é–‹ç™ºã‚’ã™ã‚‹ã® |
@@ -141,6 +143,7 @@ Linux カーãƒãƒ«ã‚½ãƒ¼ã‚¹ãƒ„リーã¯å¹…広ã„範囲ã®ãƒ‰ã‚ュメントをå | |||
141 | ã“れらã®ãƒ«ãƒ¼ãƒ«ã«å¾“ãˆã°ã†ã¾ãã„ãã“ã¨ã‚’ä¿è¨¼ã™ã‚‹ã“ã¨ã§ã¯ã‚ã‚Šã¾ã›ã‚“ | 143 | ã“れらã®ãƒ«ãƒ¼ãƒ«ã«å¾“ãˆã°ã†ã¾ãã„ãã“ã¨ã‚’ä¿è¨¼ã™ã‚‹ã“ã¨ã§ã¯ã‚ã‚Šã¾ã›ã‚“ |
142 | ㌠(ã™ã¹ã¦ã®ãƒ‘ッãƒã¯å†…容ã¨ã‚¹ã‚¿ã‚¤ãƒ«ã«ã¤ã„ã¦ç²¾æŸ»ã‚’å—ã‘ã‚‹ã®ã§)〠| 144 | ㌠(ã™ã¹ã¦ã®ãƒ‘ッãƒã¯å†…容ã¨ã‚¹ã‚¿ã‚¤ãƒ«ã«ã¤ã„ã¦ç²¾æŸ»ã‚’å—ã‘ã‚‹ã®ã§)〠|
143 | ルールã«å¾“ã‚ãªã‘ã‚Œã°é–“é•ã„ãªãã†ã¾ãã„ã‹ãªã„ã§ã—ょã†ã€‚ | 145 | ルールã«å¾“ã‚ãªã‘ã‚Œã°é–“é•ã„ãªãã†ã¾ãã„ã‹ãªã„ã§ã—ょã†ã€‚ |
146 | |||
144 | ã“ã®ä»–ã«ãƒ‘ッãƒã‚’作る方法ã«ã¤ã„ã¦ã®ã‚ˆãã§ããŸè¨˜è¿°ã¯- | 147 | ã“ã®ä»–ã«ãƒ‘ッãƒã‚’作る方法ã«ã¤ã„ã¦ã®ã‚ˆãã§ããŸè¨˜è¿°ã¯- |
145 | 148 | ||
146 | "The Perfect Patch" | 149 | "The Perfect Patch" |
@@ -360,44 +363,42 @@ linux-kernel メーリングリストã§åŽé›†ã•ã‚ŒãŸå¤šæ•°ã®ãƒ‘ッãƒã¨åŒæ | |||
360 | 363 | ||
361 | git ツリー- | 364 | git ツリー- |
362 | - Kbuild ã®é–‹ç™ºãƒ„リーã€Sam Ravnborg <sam@ravnborg.org> | 365 | - Kbuild ã®é–‹ç™ºãƒ„リーã€Sam Ravnborg <sam@ravnborg.org> |
363 | kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git | 366 | git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git |
364 | 367 | ||
365 | - ACPI ã®é–‹ç™ºãƒ„リー〠Len Brown <len.brown@intel.com> | 368 | - ACPI ã®é–‹ç™ºãƒ„リー〠Len Brown <len.brown@intel.com> |
366 | kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git | 369 | git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git |
367 | 370 | ||
368 | - Block ã®é–‹ç™ºãƒ„リーã€Jens Axboe <axboe@suse.de> | 371 | - Block ã®é–‹ç™ºãƒ„リーã€Jens Axboe <axboe@suse.de> |
369 | kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git | 372 | git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git |
370 | 373 | ||
371 | - DRM ã®é–‹ç™ºãƒ„リーã€Dave Airlie <airlied@linux.ie> | 374 | - DRM ã®é–‹ç™ºãƒ„リーã€Dave Airlie <airlied@linux.ie> |
372 | kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git | 375 | git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git |
373 | 376 | ||
374 | - ia64 ã®é–‹ç™ºãƒ„リーã€Tony Luck <tony.luck@intel.com> | 377 | - ia64 ã®é–‹ç™ºãƒ„リーã€Tony Luck <tony.luck@intel.com> |
375 | kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git | 378 | git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git |
376 | |||
377 | - ieee1394 ã®é–‹ç™ºãƒ„リーã€Jody McIntyre <scjody@modernduck.com> | ||
378 | kernel.org:/pub/scm/linux/kernel/git/scjody/ieee1394.git | ||
379 | 379 | ||
380 | - infiniband, Roland Dreier <rolandd@cisco.com> | 380 | - infiniband, Roland Dreier <rolandd@cisco.com> |
381 | kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git | 381 | git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git |
382 | 382 | ||
383 | - libata, Jeff Garzik <jgarzik@pobox.com> | 383 | - libata, Jeff Garzik <jgarzik@pobox.com> |
384 | kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git | 384 | git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git |
385 | 385 | ||
386 | - ãƒãƒƒãƒˆãƒ¯ãƒ¼ã‚¯ãƒ‰ãƒ©ã‚¤ãƒ, Jeff Garzik <jgarzik@pobox.com> | 386 | - ãƒãƒƒãƒˆãƒ¯ãƒ¼ã‚¯ãƒ‰ãƒ©ã‚¤ãƒ, Jeff Garzik <jgarzik@pobox.com> |
387 | kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git | 387 | git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git |
388 | 388 | ||
389 | - pcmcia, Dominik Brodowski <linux@dominikbrodowski.net> | 389 | - pcmcia, Dominik Brodowski <linux@dominikbrodowski.net> |
390 | kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git | 390 | git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git |
391 | 391 | ||
392 | - SCSI, James Bottomley <James.Bottomley@SteelEye.com> | 392 | - SCSI, James Bottomley <James.Bottomley@SteelEye.com> |
393 | kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git | 393 | git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git |
394 | |||
395 | ãã®ä»–ã® git カーãƒãƒ«ãƒ„リー㯠http://kernel.org/git ã«ä¸€è¦§è¡¨ãŒã‚ã‚Šã¾ | ||
396 | ã™ã€‚ | ||
397 | 394 | ||
398 | quilt ツリー- | 395 | quilt ツリー- |
399 | - USB, PCI ドライãƒã‚³ã‚¢ã¨ I2C, Greg Kroah-Hartman <gregkh@suse.de> | 396 | - USB, PCI ドライãƒã‚³ã‚¢ã¨ I2C, Greg Kroah-Hartman <gregkh@suse.de> |
400 | kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ | 397 | kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ |
398 | - x86-64 㨠i386 ã®ä»²é–“ Andi Kleen <ak@suse.de> | ||
399 | |||
400 | ãã®ä»–ã®ã‚«ãƒ¼ãƒãƒ«ãƒ„リー㯠http://git.kernel.org/ 㨠MAINTAINERS ファ | ||
401 | イルã«ä¸€è¦§è¡¨ãŒã‚ã‚Šã¾ã™ã€‚ | ||
401 | 402 | ||
402 | ãƒã‚°ãƒ¬ãƒãƒ¼ãƒˆ | 403 | ãƒã‚°ãƒ¬ãƒãƒ¼ãƒˆ |
403 | ------------- | 404 | ------------- |
@@ -508,6 +509,7 @@ MAINTAINERS ファイルã«ãƒªã‚¹ãƒˆãŒã‚ã‚Šã¾ã™ã®ã§å‚ç…§ã—ã¦ãã ã•ã | |||
508 | ã›ã‚“*。å˜ã«è‡ªåˆ†ã®ãƒ‘ッãƒã«å¯¾ã—ã¦æŒ‡æ‘˜ã•ã‚ŒãŸå•é¡Œã‚’å…¨ã¦ä¿®æ£ã—ã¦å†é€ã™ã‚Œã° | 509 | ã›ã‚“*。å˜ã«è‡ªåˆ†ã®ãƒ‘ッãƒã«å¯¾ã—ã¦æŒ‡æ‘˜ã•ã‚ŒãŸå•é¡Œã‚’å…¨ã¦ä¿®æ£ã—ã¦å†é€ã™ã‚Œã° |
509 | ã„ã„ã®ã§ã™ã€‚ | 510 | ã„ã„ã®ã§ã™ã€‚ |
510 | 511 | ||
512 | |||
511 | カーãƒãƒ«ã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ã¨ä¼æ¥çµ„ç¹”ã®ã¡ãŒã„ | 513 | カーãƒãƒ«ã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ã¨ä¼æ¥çµ„ç¹”ã®ã¡ãŒã„ |
512 | ----------------------------------------------------------------- | 514 | ----------------------------------------------------------------- |
513 | 515 | ||
@@ -577,6 +579,7 @@ Linux カーãƒãƒ«ã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ã¯ã€ä¸€åº¦ã«å¤§é‡ã®ã‚³ãƒ¼ãƒ‰ã®å¡Šã‚’å– | |||
577 | ã‹ã—ã€500è¡Œã®ãƒ‘ッãƒã¯ã€æ£ã—ã„ã“ã¨ã‚’レビューã™ã‚‹ã®ã«æ•°æ™‚é–“ã‹ã‹ã‚‹ã‹ã‚‚ | 579 | ã‹ã—ã€500è¡Œã®ãƒ‘ッãƒã¯ã€æ£ã—ã„ã“ã¨ã‚’レビューã™ã‚‹ã®ã«æ•°æ™‚é–“ã‹ã‹ã‚‹ã‹ã‚‚ |
578 | ã—ã‚Œã¾ã›ã‚“(時間ã¯ãƒ‘ッãƒã®ã‚µã‚¤ã‚ºãªã©ã«ã‚ˆã‚ŠæŒ‡æ•°é–¢æ•°ã«æ¯”例ã—ã¦ã‹ã‹ã‚Šã¾ | 580 | ã—ã‚Œã¾ã›ã‚“(時間ã¯ãƒ‘ッãƒã®ã‚µã‚¤ã‚ºãªã©ã«ã‚ˆã‚ŠæŒ‡æ•°é–¢æ•°ã«æ¯”例ã—ã¦ã‹ã‹ã‚Šã¾ |
579 | ã™) | 581 | ã™) |
582 | |||
580 | å°ã•ã„パッãƒã¯ä½•ã‹ã‚ã£ãŸã¨ãã«ãƒ‡ãƒãƒƒã‚°ã‚‚ã¨ã¦ã‚‚ç°¡å˜ã«ãªã‚Šã¾ã™ã€‚パッ | 583 | å°ã•ã„パッãƒã¯ä½•ã‹ã‚ã£ãŸã¨ãã«ãƒ‡ãƒãƒƒã‚°ã‚‚ã¨ã¦ã‚‚ç°¡å˜ã«ãªã‚Šã¾ã™ã€‚パッ |
581 | ãƒã‚’1個1個å–り除ãã®ã¯ã€ã¨ã¦ã‚‚大ããªãƒ‘ッãƒã‚’当ã¦ãŸå¾Œã«(ã‹ã¤ã€ä½•ã‹ãŠ | 584 | ãƒã‚’1個1個å–り除ãã®ã¯ã€ã¨ã¦ã‚‚大ããªãƒ‘ッãƒã‚’当ã¦ãŸå¾Œã«(ã‹ã¤ã€ä½•ã‹ãŠ |
582 | ã‹ã—ããªã£ãŸå¾Œã§)解剖ã™ã‚‹ã®ã«æ¯”ã¹ã‚Œã°ã¨ã¦ã‚‚ç°¡å˜ã§ã™ã€‚ | 585 | ã‹ã—ããªã£ãŸå¾Œã§)解剖ã™ã‚‹ã®ã«æ¯”ã¹ã‚Œã°ã¨ã¦ã‚‚ç°¡å˜ã§ã™ã€‚ |
@@ -591,6 +594,7 @@ Linux カーãƒãƒ«ã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ã¯ã€ä¸€åº¦ã«å¤§é‡ã®ã‚³ãƒ¼ãƒ‰ã®å¡Šã‚’å– | |||
591 | ã†ã€‚先生ã¯ç°¡æ½”ãªæœ€é«˜ã®è§£ã‚’ã¿ãŸã„ã®ã§ã™ã€‚良ã„生徒ã¯ã“れを知ã£ã¦ | 594 | ã†ã€‚先生ã¯ç°¡æ½”ãªæœ€é«˜ã®è§£ã‚’ã¿ãŸã„ã®ã§ã™ã€‚良ã„生徒ã¯ã“れを知ã£ã¦ |
592 | ãŠã‚Šã€ãã—ã¦æœ€çµ‚解ã®å‰ã®ä¸é–“作æ¥ã‚’æ出ã™ã‚‹ã“ã¨ã¯æ±ºã—ã¦ãªã„ã®ã§ | 595 | ãŠã‚Šã€ãã—ã¦æœ€çµ‚解ã®å‰ã®ä¸é–“作æ¥ã‚’æ出ã™ã‚‹ã“ã¨ã¯æ±ºã—ã¦ãªã„ã®ã§ |
593 | ã™" | 596 | ã™" |
597 | |||
594 | カーãƒãƒ«é–‹ç™ºã§ã‚‚ã“ã‚Œã¯åŒã˜ã§ã™ã€‚メンテナーé”ã¨ãƒ¬ãƒ“ューアé”ã¯ã€ | 598 | カーãƒãƒ«é–‹ç™ºã§ã‚‚ã“ã‚Œã¯åŒã˜ã§ã™ã€‚メンテナーé”ã¨ãƒ¬ãƒ“ューアé”ã¯ã€ |
595 | å•é¡Œã‚’解決ã™ã‚‹è§£ã®èƒŒå¾Œã«ãªã‚‹æ€è€ƒãƒ—ãƒã‚»ã‚¹ã‚’ã¿ãŸã„ã¨ã¯æ€ã„ã¾ã›ã‚“。 | 599 | å•é¡Œã‚’解決ã™ã‚‹è§£ã®èƒŒå¾Œã«ãªã‚‹æ€è€ƒãƒ—ãƒã‚»ã‚¹ã‚’ã¿ãŸã„ã¨ã¯æ€ã„ã¾ã›ã‚“。 |
596 | 彼らã¯å˜ç´”ã§ã‚ã–ã‚„ã‹ãªè§£æ±ºæ–¹æ³•ã‚’ã¿ãŸã„ã®ã§ã™ã€‚ | 600 | 彼らã¯å˜ç´”ã§ã‚ã–ã‚„ã‹ãªè§£æ±ºæ–¹æ³•ã‚’ã¿ãŸã„ã®ã§ã™ã€‚ |
diff --git a/Documentation/ja_JP/stable_api_nonsense.txt b/Documentation/ja_JP/stable_api_nonsense.txt index b3f2b27f0881..7653b5cbfed2 100644 --- a/Documentation/ja_JP/stable_api_nonsense.txt +++ b/Documentation/ja_JP/stable_api_nonsense.txt | |||
@@ -1,17 +1,17 @@ | |||
1 | NOTE: | 1 | NOTE: |
2 | This is a Japanese translated version of | 2 | This is a version of Documentation/stable_api_nonsense.txt into Japanese. |
3 | "Documentation/stable_api_nonsense.txt". | 3 | This document is maintained by IKEDA, Munehiro <m-ikeda@ds.jp.nec.com> |
4 | This one is maintained by | 4 | and the JF Project team <http://www.linux.or.jp/JF/>. |
5 | IKEDA, Munehiro <m-ikeda@ds.jp.nec.com> | 5 | If you find any difference between this document and the original file |
6 | and JF Project team <http://www.linux.or.jp/JF/>. | 6 | or a problem with the translation, |
7 | If you find difference with original file or problem in translation, | ||
8 | please contact the maintainer of this file or JF project. | 7 | please contact the maintainer of this file or JF project. |
9 | 8 | ||
10 | Please also note that purpose of this file is easier to read for non | 9 | Please also note that the purpose of this file is to be easier to read |
11 | English natives and not to be intended to fork. So, if you have any | 10 | for non English (read: Japanese) speakers and is not intended as a |
12 | comments or updates of this file, please try to update | 11 | fork. So if you have any comments or updates of this file, please try |
13 | Original(English) file at first. | 12 | to update the original English file first. |
14 | 13 | ||
14 | Last Updated: 2007/07/18 | ||
15 | ================================== | 15 | ================================== |
16 | ã“ã‚Œã¯ã€ | 16 | ã“ã‚Œã¯ã€ |
17 | linux-2.6.22-rc4/Documentation/stable_api_nonsense.txt ã®å’Œè¨³ | 17 | linux-2.6.22-rc4/Documentation/stable_api_nonsense.txt ã®å’Œè¨³ |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index fb80e9ffea68..efdb42fd3fb8 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -30,6 +30,7 @@ the beginning of each description states the restrictions within which a | |||
30 | parameter is applicable: | 30 | parameter is applicable: |
31 | 31 | ||
32 | ACPI ACPI support is enabled. | 32 | ACPI ACPI support is enabled. |
33 | AGP AGP (Accelerated Graphics Port) is enabled. | ||
33 | ALSA ALSA sound support is enabled. | 34 | ALSA ALSA sound support is enabled. |
34 | APIC APIC support is enabled. | 35 | APIC APIC support is enabled. |
35 | APM Advanced Power Management support is enabled. | 36 | APM Advanced Power Management support is enabled. |
@@ -40,7 +41,6 @@ parameter is applicable: | |||
40 | EIDE EIDE/ATAPI support is enabled. | 41 | EIDE EIDE/ATAPI support is enabled. |
41 | FB The frame buffer device is enabled. | 42 | FB The frame buffer device is enabled. |
42 | HW Appropriate hardware is enabled. | 43 | HW Appropriate hardware is enabled. |
43 | IA-32 IA-32 aka i386 architecture is enabled. | ||
44 | IA-64 IA-64 architecture is enabled. | 44 | IA-64 IA-64 architecture is enabled. |
45 | IOSCHED More than one I/O scheduler is enabled. | 45 | IOSCHED More than one I/O scheduler is enabled. |
46 | IP_PNP IP DHCP, BOOTP, or RARP is enabled. | 46 | IP_PNP IP DHCP, BOOTP, or RARP is enabled. |
@@ -57,14 +57,14 @@ parameter is applicable: | |||
57 | MDA MDA console support is enabled. | 57 | MDA MDA console support is enabled. |
58 | MOUSE Appropriate mouse support is enabled. | 58 | MOUSE Appropriate mouse support is enabled. |
59 | MSI Message Signaled Interrupts (PCI). | 59 | MSI Message Signaled Interrupts (PCI). |
60 | MTD MTD support is enabled. | 60 | MTD MTD (Memory Technology Device) support is enabled. |
61 | NET Appropriate network support is enabled. | 61 | NET Appropriate network support is enabled. |
62 | NUMA NUMA support is enabled. | 62 | NUMA NUMA support is enabled. |
63 | GENERIC_TIME The generic timeofday code is enabled. | 63 | GENERIC_TIME The generic timeofday code is enabled. |
64 | NFS Appropriate NFS support is enabled. | 64 | NFS Appropriate NFS support is enabled. |
65 | OSS OSS sound support is enabled. | 65 | OSS OSS sound support is enabled. |
66 | PV_OPS A paravirtualized kernel | 66 | PV_OPS A paravirtualized kernel is enabled. |
67 | PARIDE The ParIDE subsystem is enabled. | 67 | PARIDE The ParIDE (parallel port IDE) subsystem is enabled. |
68 | PARISC The PA-RISC architecture is enabled. | 68 | PARISC The PA-RISC architecture is enabled. |
69 | PCI PCI bus support is enabled. | 69 | PCI PCI bus support is enabled. |
70 | PCMCIA The PCMCIA subsystem is enabled. | 70 | PCMCIA The PCMCIA subsystem is enabled. |
@@ -91,6 +91,7 @@ parameter is applicable: | |||
91 | VT Virtual terminal support is enabled. | 91 | VT Virtual terminal support is enabled. |
92 | WDT Watchdog support is enabled. | 92 | WDT Watchdog support is enabled. |
93 | XT IBM PC/XT MFM hard disk support is enabled. | 93 | XT IBM PC/XT MFM hard disk support is enabled. |
94 | X86-32 X86-32, aka i386 architecture is enabled. | ||
94 | X86-64 X86-64 architecture is enabled. | 95 | X86-64 X86-64 architecture is enabled. |
95 | More X86-64 boot options can be found in | 96 | More X86-64 boot options can be found in |
96 | Documentation/x86_64/boot-options.txt . | 97 | Documentation/x86_64/boot-options.txt . |
@@ -122,10 +123,6 @@ and is between 256 and 4096 characters. It is defined in the file | |||
122 | ./include/asm/setup.h as COMMAND_LINE_SIZE. | 123 | ./include/asm/setup.h as COMMAND_LINE_SIZE. |
123 | 124 | ||
124 | 125 | ||
125 | 53c7xx= [HW,SCSI] Amiga SCSI controllers | ||
126 | See header of drivers/scsi/53c7xx.c. | ||
127 | See also Documentation/scsi/ncr53c7xx.txt. | ||
128 | |||
129 | acpi= [HW,ACPI,X86-64,i386] | 126 | acpi= [HW,ACPI,X86-64,i386] |
130 | Advanced Configuration and Power Interface | 127 | Advanced Configuration and Power Interface |
131 | Format: { force | off | ht | strict | noirq } | 128 | Format: { force | off | ht | strict | noirq } |
@@ -222,11 +219,17 @@ and is between 256 and 4096 characters. It is defined in the file | |||
222 | 219 | ||
223 | acpi_fake_ecdt [HW,ACPI] Workaround failure due to BIOS lacking ECDT | 220 | acpi_fake_ecdt [HW,ACPI] Workaround failure due to BIOS lacking ECDT |
224 | 221 | ||
225 | acpi_pm_good [IA-32,X86-64] | 222 | acpi_pm_good [X86-32,X86-64] |
226 | Override the pmtimer bug detection: force the kernel | 223 | Override the pmtimer bug detection: force the kernel |
227 | to assume that this machine's pmtimer latches its value | 224 | to assume that this machine's pmtimer latches its value |
228 | and always returns good values. | 225 | and always returns good values. |
229 | 226 | ||
227 | agp= [AGP] | ||
228 | { off | try_unsupported } | ||
229 | off: disable AGP support | ||
230 | try_unsupported: try to drive unsupported chipsets | ||
231 | (may crash computer or cause data corruption) | ||
232 | |||
230 | enable_timer_pin_1 [i386,x86-64] | 233 | enable_timer_pin_1 [i386,x86-64] |
231 | Enable PIN 1 of APIC timer | 234 | Enable PIN 1 of APIC timer |
232 | Can be useful to work around chipset bugs | 235 | Can be useful to work around chipset bugs |
@@ -279,7 +282,8 @@ and is between 256 and 4096 characters. It is defined in the file | |||
279 | not play well with APC CPU idle - disable it if you have | 282 | not play well with APC CPU idle - disable it if you have |
280 | APC and your system crashes randomly. | 283 | APC and your system crashes randomly. |
281 | 284 | ||
282 | apic= [APIC,i386] Change the output verbosity whilst booting | 285 | apic= [APIC,i386] Advanced Programmable Interrupt Controller |
286 | Change the output verbosity whilst booting | ||
283 | Format: { quiet (default) | verbose | debug } | 287 | Format: { quiet (default) | verbose | debug } |
284 | Change the amount of debugging information output | 288 | Change the amount of debugging information output |
285 | when initialising the APIC and IO-APIC components. | 289 | when initialising the APIC and IO-APIC components. |
@@ -353,7 +357,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
353 | 357 | ||
354 | c101= [NET] Moxa C101 synchronous serial card | 358 | c101= [NET] Moxa C101 synchronous serial card |
355 | 359 | ||
356 | cachesize= [BUGS=IA-32] Override level 2 CPU cache size detection. | 360 | cachesize= [BUGS=X86-32] Override level 2 CPU cache size detection. |
357 | Sometimes CPU hardware bugs make them report the cache | 361 | Sometimes CPU hardware bugs make them report the cache |
358 | size incorrectly. The kernel will attempt work arounds | 362 | size incorrectly. The kernel will attempt work arounds |
359 | to fix known problems, but for some CPUs it is not | 363 | to fix known problems, but for some CPUs it is not |
@@ -372,7 +376,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
372 | Value can be changed at runtime via | 376 | Value can be changed at runtime via |
373 | /selinux/checkreqprot. | 377 | /selinux/checkreqprot. |
374 | 378 | ||
375 | clock= [BUGS=IA-32, HW] gettimeofday clocksource override. | 379 | clock= [BUGS=X86-32, HW] gettimeofday clocksource override. |
376 | [Deprecated] | 380 | [Deprecated] |
377 | Forces specified clocksource (if available) to be used | 381 | Forces specified clocksource (if available) to be used |
378 | when calculating gettimeofday(). If specified | 382 | when calculating gettimeofday(). If specified |
@@ -390,7 +394,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
390 | [ARM] imx_timer1,OSTS,netx_timer,mpu_timer2, | 394 | [ARM] imx_timer1,OSTS,netx_timer,mpu_timer2, |
391 | pxa_timer,timer3,32k_counter,timer0_1 | 395 | pxa_timer,timer3,32k_counter,timer0_1 |
392 | [AVR32] avr32 | 396 | [AVR32] avr32 |
393 | [IA-32] pit,hpet,tsc,vmi-timer; | 397 | [X86-32] pit,hpet,tsc,vmi-timer; |
394 | scx200_hrt on Geode; cyclone on IBM x440 | 398 | scx200_hrt on Geode; cyclone on IBM x440 |
395 | [MIPS] MIPS | 399 | [MIPS] MIPS |
396 | [PARISC] cr16 | 400 | [PARISC] cr16 |
@@ -410,7 +414,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
410 | over the 8254 in addition to over the IO-APIC. The | 414 | over the 8254 in addition to over the IO-APIC. The |
411 | kernel tries to set a sensible default. | 415 | kernel tries to set a sensible default. |
412 | 416 | ||
413 | hpet= [IA-32,HPET] option to disable HPET and use PIT. | 417 | hpet= [X86-32,HPET] option to disable HPET and use PIT. |
414 | Format: disable | 418 | Format: disable |
415 | 419 | ||
416 | com20020= [HW,NET] ARCnet - COM20020 chipset | 420 | com20020= [HW,NET] ARCnet - COM20020 chipset |
@@ -547,7 +551,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
547 | 551 | ||
548 | dtc3181e= [HW,SCSI] | 552 | dtc3181e= [HW,SCSI] |
549 | 553 | ||
550 | earlyprintk= [IA-32,X86-64,SH] | 554 | earlyprintk= [X86-32,X86-64,SH] |
551 | earlyprintk=vga | 555 | earlyprintk=vga |
552 | earlyprintk=serial[,ttySn[,baudrate]] | 556 | earlyprintk=serial[,ttySn[,baudrate]] |
553 | 557 | ||
@@ -585,7 +589,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
585 | eisa_irq_edge= [PARISC,HW] | 589 | eisa_irq_edge= [PARISC,HW] |
586 | See header of drivers/parisc/eisa.c. | 590 | See header of drivers/parisc/eisa.c. |
587 | 591 | ||
588 | elanfreq= [IA-32] | 592 | elanfreq= [X86-32] |
589 | See comment before function elanfreq_setup() in | 593 | See comment before function elanfreq_setup() in |
590 | arch/i386/kernel/cpu/cpufreq/elanfreq.c. | 594 | arch/i386/kernel/cpu/cpufreq/elanfreq.c. |
591 | 595 | ||
@@ -594,7 +598,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
594 | See Documentation/block/as-iosched.txt and | 598 | See Documentation/block/as-iosched.txt and |
595 | Documentation/block/deadline-iosched.txt for details. | 599 | Documentation/block/deadline-iosched.txt for details. |
596 | 600 | ||
597 | elfcorehdr= [IA-32, X86_64] | 601 | elfcorehdr= [X86-32, X86_64] |
598 | Specifies physical address of start of kernel core | 602 | Specifies physical address of start of kernel core |
599 | image elf header. Generally kexec loader will | 603 | image elf header. Generally kexec loader will |
600 | pass this option to capture kernel. | 604 | pass this option to capture kernel. |
@@ -676,7 +680,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
676 | hisax= [HW,ISDN] | 680 | hisax= [HW,ISDN] |
677 | See Documentation/isdn/README.HiSax. | 681 | See Documentation/isdn/README.HiSax. |
678 | 682 | ||
679 | hugepages= [HW,IA-32,IA-64] Maximal number of HugeTLB pages. | 683 | hugepages= [HW,X86-32,IA-64] Maximal number of HugeTLB pages. |
680 | 684 | ||
681 | i8042.direct [HW] Put keyboard port into non-translated mode | 685 | i8042.direct [HW] Put keyboard port into non-translated mode |
682 | i8042.dumbkbd [HW] Pretend that controller can only read data from | 686 | i8042.dumbkbd [HW] Pretend that controller can only read data from |
@@ -768,7 +772,8 @@ and is between 256 and 4096 characters. It is defined in the file | |||
768 | See Documentation/nfsroot.txt. | 772 | See Documentation/nfsroot.txt. |
769 | 773 | ||
770 | ip2= [HW] Set IO/IRQ pairs for up to 4 IntelliPort boards | 774 | ip2= [HW] Set IO/IRQ pairs for up to 4 IntelliPort boards |
771 | See comment before ip2_setup() in drivers/char/ip2.c. | 775 | See comment before ip2_setup() in |
776 | drivers/char/ip2/ip2base.c. | ||
772 | 777 | ||
773 | ips= [HW,SCSI] Adaptec / IBM ServeRAID controller | 778 | ips= [HW,SCSI] Adaptec / IBM ServeRAID controller |
774 | See header of drivers/scsi/ips.c. | 779 | See header of drivers/scsi/ips.c. |
@@ -817,7 +822,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
817 | js= [HW,JOY] Analog joystick | 822 | js= [HW,JOY] Analog joystick |
818 | See Documentation/input/joystick.txt. | 823 | See Documentation/input/joystick.txt. |
819 | 824 | ||
820 | kernelcore=nn[KMG] [KNL,IA-32,IA-64,PPC,X86-64] This parameter | 825 | kernelcore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter |
821 | specifies the amount of memory usable by the kernel | 826 | specifies the amount of memory usable by the kernel |
822 | for non-movable allocations. The requested amount is | 827 | for non-movable allocations. The requested amount is |
823 | spread evenly throughout all nodes in the system. The | 828 | spread evenly throughout all nodes in the system. The |
@@ -833,7 +838,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
833 | use the HighMem zone if it exists, and the Normal | 838 | use the HighMem zone if it exists, and the Normal |
834 | zone if it does not. | 839 | zone if it does not. |
835 | 840 | ||
836 | movablecore=nn[KMG] [KNL,IA-32,IA-64,PPC,X86-64] This parameter | 841 | movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter |
837 | is similar to kernelcore except it specifies the | 842 | is similar to kernelcore except it specifies the |
838 | amount of memory used for migratable allocations. | 843 | amount of memory used for migratable allocations. |
839 | If both kernelcore and movablecore is specified, | 844 | If both kernelcore and movablecore is specified, |
@@ -845,28 +850,20 @@ and is between 256 and 4096 characters. It is defined in the file | |||
845 | 850 | ||
846 | keepinitrd [HW,ARM] | 851 | keepinitrd [HW,ARM] |
847 | 852 | ||
848 | kstack=N [IA-32,X86-64] Print N words from the kernel stack | 853 | kstack=N [X86-32,X86-64] Print N words from the kernel stack |
849 | in oops dumps. | 854 | in oops dumps. |
850 | 855 | ||
851 | l2cr= [PPC] | 856 | l2cr= [PPC] |
852 | 857 | ||
853 | lapic [IA-32,APIC] Enable the local APIC even if BIOS | 858 | lapic [X86-32,APIC] Enable the local APIC even if BIOS |
854 | disabled it. | 859 | disabled it. |
855 | 860 | ||
856 | lapic_timer_c2_ok [IA-32,x86-64,APIC] trust the local apic timer in | 861 | lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer in |
857 | C2 power state. | 862 | C2 power state. |
858 | 863 | ||
859 | lasi= [HW,SCSI] PARISC LASI driver for the 53c700 chip | 864 | lasi= [HW,SCSI] PARISC LASI driver for the 53c700 chip |
860 | Format: addr:<io>,irq:<irq> | 865 | Format: addr:<io>,irq:<irq> |
861 | 866 | ||
862 | legacy_serial.force [HW,IA-32,X86-64] | ||
863 | Probe for COM ports at legacy addresses even | ||
864 | if PNPBIOS or ACPI should describe them. This | ||
865 | is for working around firmware defects. | ||
866 | |||
867 | llsc*= [IA64] See function print_params() in | ||
868 | arch/ia64/sn/kernel/llsc4.c. | ||
869 | |||
870 | load_ramdisk= [RAM] List of ramdisks to load from floppy | 867 | load_ramdisk= [RAM] List of ramdisks to load from floppy |
871 | See Documentation/ramdisk.txt. | 868 | See Documentation/ramdisk.txt. |
872 | 869 | ||
@@ -972,11 +969,11 @@ and is between 256 and 4096 characters. It is defined in the file | |||
972 | [SCSI] Maximum number of LUNs received. | 969 | [SCSI] Maximum number of LUNs received. |
973 | Should be between 1 and 16384. | 970 | Should be between 1 and 16384. |
974 | 971 | ||
975 | mca-pentium [BUGS=IA-32] | 972 | mca-pentium [BUGS=X86-32] |
976 | 973 | ||
977 | mcatest= [IA-64] | 974 | mcatest= [IA-64] |
978 | 975 | ||
979 | mce [IA-32] Machine Check Exception | 976 | mce [X86-32] Machine Check Exception |
980 | 977 | ||
981 | md= [HW] RAID subsystems devices and level | 978 | md= [HW] RAID subsystems devices and level |
982 | See Documentation/md.txt. | 979 | See Documentation/md.txt. |
@@ -988,14 +985,14 @@ and is between 256 and 4096 characters. It is defined in the file | |||
988 | mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory | 985 | mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory |
989 | Amount of memory to be used when the kernel is not able | 986 | Amount of memory to be used when the kernel is not able |
990 | to see the whole system memory or for test. | 987 | to see the whole system memory or for test. |
991 | [IA-32] Use together with memmap= to avoid physical | 988 | [X86-32] Use together with memmap= to avoid physical |
992 | address space collisions. Without memmap= PCI devices | 989 | address space collisions. Without memmap= PCI devices |
993 | could be placed at addresses belonging to unused RAM. | 990 | could be placed at addresses belonging to unused RAM. |
994 | 991 | ||
995 | mem=nopentium [BUGS=IA-32] Disable usage of 4MB pages for kernel | 992 | mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel |
996 | memory. | 993 | memory. |
997 | 994 | ||
998 | memmap=exactmap [KNL,IA-32,X86_64] Enable setting of an exact | 995 | memmap=exactmap [KNL,X86-32,X86_64] Enable setting of an exact |
999 | E820 memory map, as specified by the user. | 996 | E820 memory map, as specified by the user. |
1000 | Such memmap=exactmap lines can be constructed based on | 997 | Such memmap=exactmap lines can be constructed based on |
1001 | BIOS output or other requirements. See the memmap=nn@ss | 998 | BIOS output or other requirements. See the memmap=nn@ss |
@@ -1039,7 +1036,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1039 | <name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>] | 1036 | <name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>] |
1040 | 1037 | ||
1041 | mtdparts= [MTD] | 1038 | mtdparts= [MTD] |
1042 | See drivers/mtd/cmdline.c. | 1039 | See drivers/mtd/cmdlinepart.c. |
1043 | 1040 | ||
1044 | mtouchusb.raw_coordinates= | 1041 | mtouchusb.raw_coordinates= |
1045 | [HW] Make the MicroTouch USB driver use raw coordinates | 1042 | [HW] Make the MicroTouch USB driver use raw coordinates |
@@ -1081,9 +1078,9 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1081 | [NFS] set the maximum lifetime for idmapper cache | 1078 | [NFS] set the maximum lifetime for idmapper cache |
1082 | entries. | 1079 | entries. |
1083 | 1080 | ||
1084 | nmi_watchdog= [KNL,BUGS=IA-32] Debugging features for SMP kernels | 1081 | nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels |
1085 | 1082 | ||
1086 | no387 [BUGS=IA-32] Tells the kernel to use the 387 maths | 1083 | no387 [BUGS=X86-32] Tells the kernel to use the 387 maths |
1087 | emulation library even if a 387 maths coprocessor | 1084 | emulation library even if a 387 maths coprocessor |
1088 | is present. | 1085 | is present. |
1089 | 1086 | ||
@@ -1114,17 +1111,17 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1114 | 1111 | ||
1115 | noexec [IA-64] | 1112 | noexec [IA-64] |
1116 | 1113 | ||
1117 | noexec [IA-32,X86-64] | 1114 | noexec [X86-32,X86-64] |
1118 | noexec=on: enable non-executable mappings (default) | 1115 | noexec=on: enable non-executable mappings (default) |
1119 | noexec=off: disable nn-executable mappings | 1116 | noexec=off: disable nn-executable mappings |
1120 | 1117 | ||
1121 | nofxsr [BUGS=IA-32] Disables x86 floating point extended | 1118 | nofxsr [BUGS=X86-32] Disables x86 floating point extended |
1122 | register save and restore. The kernel will only save | 1119 | register save and restore. The kernel will only save |
1123 | legacy floating-point registers on task switch. | 1120 | legacy floating-point registers on task switch. |
1124 | 1121 | ||
1125 | nohlt [BUGS=ARM] | 1122 | nohlt [BUGS=ARM] |
1126 | 1123 | ||
1127 | no-hlt [BUGS=IA-32] Tells the kernel that the hlt | 1124 | no-hlt [BUGS=X86-32] Tells the kernel that the hlt |
1128 | instruction doesn't work correctly and not to | 1125 | instruction doesn't work correctly and not to |
1129 | use it. | 1126 | use it. |
1130 | 1127 | ||
@@ -1139,12 +1136,12 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1139 | Valid arguments: on, off | 1136 | Valid arguments: on, off |
1140 | Default: on | 1137 | Default: on |
1141 | 1138 | ||
1142 | noirqbalance [IA-32,SMP,KNL] Disable kernel irq balancing | 1139 | noirqbalance [X86-32,SMP,KNL] Disable kernel irq balancing |
1143 | 1140 | ||
1144 | noirqdebug [IA-32] Disables the code which attempts to detect and | 1141 | noirqdebug [X86-32] Disables the code which attempts to detect and |
1145 | disable unhandled interrupt sources. | 1142 | disable unhandled interrupt sources. |
1146 | 1143 | ||
1147 | no_timer_check [IA-32,X86_64,APIC] Disables the code which tests for | 1144 | no_timer_check [X86-32,X86_64,APIC] Disables the code which tests for |
1148 | broken timer IRQ sources. | 1145 | broken timer IRQ sources. |
1149 | 1146 | ||
1150 | noisapnp [ISAPNP] Disables ISA PnP code. | 1147 | noisapnp [ISAPNP] Disables ISA PnP code. |
@@ -1156,20 +1153,20 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1156 | 1153 | ||
1157 | nojitter [IA64] Disables jitter checking for ITC timers. | 1154 | nojitter [IA64] Disables jitter checking for ITC timers. |
1158 | 1155 | ||
1159 | nolapic [IA-32,APIC] Do not enable or use the local APIC. | 1156 | nolapic [X86-32,APIC] Do not enable or use the local APIC. |
1160 | 1157 | ||
1161 | nolapic_timer [IA-32,APIC] Do not use the local APIC timer. | 1158 | nolapic_timer [X86-32,APIC] Do not use the local APIC timer. |
1162 | 1159 | ||
1163 | noltlbs [PPC] Do not use large page/tlb entries for kernel | 1160 | noltlbs [PPC] Do not use large page/tlb entries for kernel |
1164 | lowmem mapping on PPC40x. | 1161 | lowmem mapping on PPC40x. |
1165 | 1162 | ||
1166 | nomca [IA-64] Disable machine check abort handling | 1163 | nomca [IA-64] Disable machine check abort handling |
1167 | 1164 | ||
1168 | nomce [IA-32] Machine Check Exception | 1165 | nomce [X86-32] Machine Check Exception |
1169 | 1166 | ||
1170 | noreplace-paravirt [IA-32,PV_OPS] Don't patch paravirt_ops | 1167 | noreplace-paravirt [X86-32,PV_OPS] Don't patch paravirt_ops |
1171 | 1168 | ||
1172 | noreplace-smp [IA-32,SMP] Don't replace SMP instructions | 1169 | noreplace-smp [X86-32,SMP] Don't replace SMP instructions |
1173 | with UP alternatives | 1170 | with UP alternatives |
1174 | 1171 | ||
1175 | noresidual [PPC] Don't use residual data on PReP machines. | 1172 | noresidual [PPC] Don't use residual data on PReP machines. |
@@ -1183,7 +1180,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1183 | 1180 | ||
1184 | nosbagart [IA-64] | 1181 | nosbagart [IA-64] |
1185 | 1182 | ||
1186 | nosep [BUGS=IA-32] Disables x86 SYSENTER/SYSEXIT support. | 1183 | nosep [BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support. |
1187 | 1184 | ||
1188 | nosmp [SMP] Tells an SMP kernel to act as a UP kernel. | 1185 | nosmp [SMP] Tells an SMP kernel to act as a UP kernel. |
1189 | 1186 | ||
@@ -1191,7 +1188,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1191 | 1188 | ||
1192 | nosync [HW,M68K] Disables sync negotiation for all devices. | 1189 | nosync [HW,M68K] Disables sync negotiation for all devices. |
1193 | 1190 | ||
1194 | notsc [BUGS=IA-32] Disable Time Stamp Counter | 1191 | notsc [BUGS=X86-32] Disable Time Stamp Counter |
1195 | 1192 | ||
1196 | nousb [USB] Disable the USB subsystem | 1193 | nousb [USB] Disable the USB subsystem |
1197 | 1194 | ||
@@ -1264,28 +1261,28 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1264 | See also Documentation/paride.txt. | 1261 | See also Documentation/paride.txt. |
1265 | 1262 | ||
1266 | pci=option[,option...] [PCI] various PCI subsystem options: | 1263 | pci=option[,option...] [PCI] various PCI subsystem options: |
1267 | off [IA-32] don't probe for the PCI bus | 1264 | off [X86-32] don't probe for the PCI bus |
1268 | bios [IA-32] force use of PCI BIOS, don't access | 1265 | bios [X86-32] force use of PCI BIOS, don't access |
1269 | the hardware directly. Use this if your machine | 1266 | the hardware directly. Use this if your machine |
1270 | has a non-standard PCI host bridge. | 1267 | has a non-standard PCI host bridge. |
1271 | nobios [IA-32] disallow use of PCI BIOS, only direct | 1268 | nobios [X86-32] disallow use of PCI BIOS, only direct |
1272 | hardware access methods are allowed. Use this | 1269 | hardware access methods are allowed. Use this |
1273 | if you experience crashes upon bootup and you | 1270 | if you experience crashes upon bootup and you |
1274 | suspect they are caused by the BIOS. | 1271 | suspect they are caused by the BIOS. |
1275 | conf1 [IA-32] Force use of PCI Configuration | 1272 | conf1 [X86-32] Force use of PCI Configuration |
1276 | Mechanism 1. | 1273 | Mechanism 1. |
1277 | conf2 [IA-32] Force use of PCI Configuration | 1274 | conf2 [X86-32] Force use of PCI Configuration |
1278 | Mechanism 2. | 1275 | Mechanism 2. |
1279 | nommconf [IA-32,X86_64] Disable use of MMCONFIG for PCI | 1276 | nommconf [X86-32,X86_64] Disable use of MMCONFIG for PCI |
1280 | Configuration | 1277 | Configuration |
1281 | nomsi [MSI] If the PCI_MSI kernel config parameter is | 1278 | nomsi [MSI] If the PCI_MSI kernel config parameter is |
1282 | enabled, this kernel boot option can be used to | 1279 | enabled, this kernel boot option can be used to |
1283 | disable the use of MSI interrupts system-wide. | 1280 | disable the use of MSI interrupts system-wide. |
1284 | nosort [IA-32] Don't sort PCI devices according to | 1281 | nosort [X86-32] Don't sort PCI devices according to |
1285 | order given by the PCI BIOS. This sorting is | 1282 | order given by the PCI BIOS. This sorting is |
1286 | done to get a device order compatible with | 1283 | done to get a device order compatible with |
1287 | older kernels. | 1284 | older kernels. |
1288 | biosirq [IA-32] Use PCI BIOS calls to get the interrupt | 1285 | biosirq [X86-32] Use PCI BIOS calls to get the interrupt |
1289 | routing table. These calls are known to be buggy | 1286 | routing table. These calls are known to be buggy |
1290 | on several machines and they hang the machine | 1287 | on several machines and they hang the machine |
1291 | when used, but on other computers it's the only | 1288 | when used, but on other computers it's the only |
@@ -1293,32 +1290,32 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1293 | this option if the kernel is unable to allocate | 1290 | this option if the kernel is unable to allocate |
1294 | IRQs or discover secondary PCI buses on your | 1291 | IRQs or discover secondary PCI buses on your |
1295 | motherboard. | 1292 | motherboard. |
1296 | rom [IA-32] Assign address space to expansion ROMs. | 1293 | rom [X86-32] Assign address space to expansion ROMs. |
1297 | Use with caution as certain devices share | 1294 | Use with caution as certain devices share |
1298 | address decoders between ROMs and other | 1295 | address decoders between ROMs and other |
1299 | resources. | 1296 | resources. |
1300 | irqmask=0xMMMM [IA-32] Set a bit mask of IRQs allowed to be | 1297 | irqmask=0xMMMM [X86-32] Set a bit mask of IRQs allowed to be |
1301 | assigned automatically to PCI devices. You can | 1298 | assigned automatically to PCI devices. You can |
1302 | make the kernel exclude IRQs of your ISA cards | 1299 | make the kernel exclude IRQs of your ISA cards |
1303 | this way. | 1300 | this way. |
1304 | pirqaddr=0xAAAAA [IA-32] Specify the physical address | 1301 | pirqaddr=0xAAAAA [X86-32] Specify the physical address |
1305 | of the PIRQ table (normally generated | 1302 | of the PIRQ table (normally generated |
1306 | by the BIOS) if it is outside the | 1303 | by the BIOS) if it is outside the |
1307 | F0000h-100000h range. | 1304 | F0000h-100000h range. |
1308 | lastbus=N [IA-32] Scan all buses thru bus #N. Can be | 1305 | lastbus=N [X86-32] Scan all buses thru bus #N. Can be |
1309 | useful if the kernel is unable to find your | 1306 | useful if the kernel is unable to find your |
1310 | secondary buses and you want to tell it | 1307 | secondary buses and you want to tell it |
1311 | explicitly which ones they are. | 1308 | explicitly which ones they are. |
1312 | assign-busses [IA-32] Always assign all PCI bus | 1309 | assign-busses [X86-32] Always assign all PCI bus |
1313 | numbers ourselves, overriding | 1310 | numbers ourselves, overriding |
1314 | whatever the firmware may have done. | 1311 | whatever the firmware may have done. |
1315 | usepirqmask [IA-32] Honor the possible IRQ mask stored | 1312 | usepirqmask [X86-32] Honor the possible IRQ mask stored |
1316 | in the BIOS $PIR table. This is needed on | 1313 | in the BIOS $PIR table. This is needed on |
1317 | some systems with broken BIOSes, notably | 1314 | some systems with broken BIOSes, notably |
1318 | some HP Pavilion N5400 and Omnibook XE3 | 1315 | some HP Pavilion N5400 and Omnibook XE3 |
1319 | notebooks. This will have no effect if ACPI | 1316 | notebooks. This will have no effect if ACPI |
1320 | IRQ routing is enabled. | 1317 | IRQ routing is enabled. |
1321 | noacpi [IA-32] Do not use ACPI for IRQ routing | 1318 | noacpi [X86-32] Do not use ACPI for IRQ routing |
1322 | or for PCI scanning. | 1319 | or for PCI scanning. |
1323 | routeirq Do IRQ routing for all PCI devices. | 1320 | routeirq Do IRQ routing for all PCI devices. |
1324 | This is normally done in pci_enable_device(), | 1321 | This is normally done in pci_enable_device(), |
@@ -1467,13 +1464,13 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1467 | Run specified binary instead of /init from the ramdisk, | 1464 | Run specified binary instead of /init from the ramdisk, |
1468 | used for early userspace startup. See initrd. | 1465 | used for early userspace startup. See initrd. |
1469 | 1466 | ||
1470 | reboot= [BUGS=IA-32,BUGS=ARM,BUGS=IA-64] Rebooting mode | 1467 | reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode |
1471 | Format: <reboot_mode>[,<reboot_mode2>[,...]] | 1468 | Format: <reboot_mode>[,<reboot_mode2>[,...]] |
1472 | See arch/*/kernel/reboot.c or arch/*/kernel/process.c | 1469 | See arch/*/kernel/reboot.c or arch/*/kernel/process.c |
1473 | 1470 | ||
1474 | reserve= [KNL,BUGS] Force the kernel to ignore some iomem area | 1471 | reserve= [KNL,BUGS] Force the kernel to ignore some iomem area |
1475 | 1472 | ||
1476 | reservetop= [IA-32] | 1473 | reservetop= [X86-32] |
1477 | Format: nn[KMG] | 1474 | Format: nn[KMG] |
1478 | Reserves a hole at the top of the kernel virtual | 1475 | Reserves a hole at the top of the kernel virtual |
1479 | address space. | 1476 | address space. |
@@ -1564,7 +1561,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1564 | Value can be changed at runtime via | 1561 | Value can be changed at runtime via |
1565 | /selinux/compat_net. | 1562 | /selinux/compat_net. |
1566 | 1563 | ||
1567 | serialnumber [BUGS=IA-32] | 1564 | serialnumber [BUGS=X86-32] |
1568 | 1565 | ||
1569 | sg_def_reserved_size= [SCSI] | 1566 | sg_def_reserved_size= [SCSI] |
1570 | 1567 | ||
@@ -1617,7 +1614,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1617 | smart2= [HW] | 1614 | smart2= [HW] |
1618 | Format: <io1>[,<io2>[,...,<io8>]] | 1615 | Format: <io1>[,<io2>[,...,<io8>]] |
1619 | 1616 | ||
1620 | smp-alt-once [IA-32,SMP] On a hotplug CPU system, only | 1617 | smp-alt-once [X86-32,SMP] On a hotplug CPU system, only |
1621 | attempt to substitute SMP alternatives once at boot. | 1618 | attempt to substitute SMP alternatives once at boot. |
1622 | 1619 | ||
1623 | smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices | 1620 | smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices |
@@ -1882,7 +1879,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1882 | usbhid.mousepoll= | 1879 | usbhid.mousepoll= |
1883 | [USBHID] The interval which mice are to be polled at. | 1880 | [USBHID] The interval which mice are to be polled at. |
1884 | 1881 | ||
1885 | vdso= [IA-32,SH,x86-64] | 1882 | vdso= [X86-32,SH,x86-64] |
1886 | vdso=2: enable compat VDSO (default with COMPAT_VDSO) | 1883 | vdso=2: enable compat VDSO (default with COMPAT_VDSO) |
1887 | vdso=1: enable VDSO (default) | 1884 | vdso=1: enable VDSO (default) |
1888 | vdso=0: disable VDSO mapping | 1885 | vdso=0: disable VDSO mapping |
@@ -1893,7 +1890,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1893 | video= [FB] Frame buffer configuration | 1890 | video= [FB] Frame buffer configuration |
1894 | See Documentation/fb/modedb.txt. | 1891 | See Documentation/fb/modedb.txt. |
1895 | 1892 | ||
1896 | vga= [BOOT,IA-32] Select a particular video mode | 1893 | vga= [BOOT,X86-32] Select a particular video mode |
1897 | See Documentation/i386/boot.txt and | 1894 | See Documentation/i386/boot.txt and |
1898 | Documentation/svga.txt. | 1895 | Documentation/svga.txt. |
1899 | Use vga=ask for menu. | 1896 | Use vga=ask for menu. |
diff --git a/Documentation/keys.txt b/Documentation/keys.txt index 81d9aa097298..947d57d53453 100644 --- a/Documentation/keys.txt +++ b/Documentation/keys.txt | |||
@@ -859,9 +859,8 @@ payload contents" for more information. | |||
859 | void unregister_key_type(struct key_type *type); | 859 | void unregister_key_type(struct key_type *type); |
860 | 860 | ||
861 | 861 | ||
862 | Under some circumstances, it may be desirable to desirable to deal with a | 862 | Under some circumstances, it may be desirable to deal with a bundle of keys. |
863 | bundle of keys. The facility provides access to the keyring type for managing | 863 | The facility provides access to the keyring type for managing such a bundle: |
864 | such a bundle: | ||
865 | 864 | ||
866 | struct key_type key_type_keyring; | 865 | struct key_type key_type_keyring; |
867 | 866 | ||
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt index e44855513b3d..8ee49ee7c963 100644 --- a/Documentation/kobject.txt +++ b/Documentation/kobject.txt | |||
@@ -27,7 +27,6 @@ in detail, and briefly here: | |||
27 | - kobjects a simple object. | 27 | - kobjects a simple object. |
28 | - kset a set of objects of a certain type. | 28 | - kset a set of objects of a certain type. |
29 | - ktype a set of helpers for objects of a common type. | 29 | - ktype a set of helpers for objects of a common type. |
30 | - subsystem a controlling object for a number of ksets. | ||
31 | 30 | ||
32 | 31 | ||
33 | The kobject infrastructure maintains a close relationship with the | 32 | The kobject infrastructure maintains a close relationship with the |
@@ -54,13 +53,15 @@ embedded in larger data structures and replace fields they duplicate. | |||
54 | 1.2 Definition | 53 | 1.2 Definition |
55 | 54 | ||
56 | struct kobject { | 55 | struct kobject { |
56 | const char * k_name; | ||
57 | char name[KOBJ_NAME_LEN]; | 57 | char name[KOBJ_NAME_LEN]; |
58 | atomic_t refcount; | 58 | struct kref kref; |
59 | struct list_head entry; | 59 | struct list_head entry; |
60 | struct kobject * parent; | 60 | struct kobject * parent; |
61 | struct kset * kset; | 61 | struct kset * kset; |
62 | struct kobj_type * ktype; | 62 | struct kobj_type * ktype; |
63 | struct dentry * dentry; | 63 | struct sysfs_dirent * sd; |
64 | wait_queue_head_t poll; | ||
64 | }; | 65 | }; |
65 | 66 | ||
66 | void kobject_init(struct kobject *); | 67 | void kobject_init(struct kobject *); |
@@ -137,8 +138,7 @@ If a kobject does not have a parent when it is registered, its parent | |||
137 | becomes its dominant kset. | 138 | becomes its dominant kset. |
138 | 139 | ||
139 | If a kobject does not have a parent nor a dominant kset, its directory | 140 | If a kobject does not have a parent nor a dominant kset, its directory |
140 | is created at the top-level of the sysfs partition. This should only | 141 | is created at the top-level of the sysfs partition. |
141 | happen for kobjects that are embedded in a struct subsystem. | ||
142 | 142 | ||
143 | 143 | ||
144 | 144 | ||
@@ -150,10 +150,10 @@ A kset is a set of kobjects that are embedded in the same type. | |||
150 | 150 | ||
151 | 151 | ||
152 | struct kset { | 152 | struct kset { |
153 | struct subsystem * subsys; | ||
154 | struct kobj_type * ktype; | 153 | struct kobj_type * ktype; |
155 | struct list_head list; | 154 | struct list_head list; |
156 | struct kobject kobj; | 155 | struct kobject kobj; |
156 | struct kset_uevent_ops * uevent_ops; | ||
157 | }; | 157 | }; |
158 | 158 | ||
159 | 159 | ||
@@ -169,8 +169,7 @@ struct kobject * kset_find_obj(struct kset *, char *); | |||
169 | 169 | ||
170 | 170 | ||
171 | The type that the kobjects are embedded in is described by the ktype | 171 | The type that the kobjects are embedded in is described by the ktype |
172 | pointer. The subsystem that the kobject belongs to is pointed to by the | 172 | pointer. |
173 | subsys pointer. | ||
174 | 173 | ||
175 | A kset contains a kobject itself, meaning that it may be registered in | 174 | A kset contains a kobject itself, meaning that it may be registered in |
176 | the kobject hierarchy and exported via sysfs. More importantly, the | 175 | the kobject hierarchy and exported via sysfs. More importantly, the |
@@ -209,6 +208,58 @@ the hierarchy. | |||
209 | kset_find_obj() may be used to locate a kobject with a particular | 208 | kset_find_obj() may be used to locate a kobject with a particular |
210 | name. The kobject, if found, is returned. | 209 | name. The kobject, if found, is returned. |
211 | 210 | ||
211 | There are also some helper functions which names point to the formerly | ||
212 | existing "struct subsystem", whose functions have been taken over by | ||
213 | ksets. | ||
214 | |||
215 | |||
216 | decl_subsys(name,type,uevent_ops) | ||
217 | |||
218 | Declares a kset named '<name>_subsys' of type <type> with | ||
219 | uevent_ops <uevent_ops>. For example, | ||
220 | |||
221 | decl_subsys(devices, &ktype_device, &device_uevent_ops); | ||
222 | |||
223 | is equivalent to doing: | ||
224 | |||
225 | struct kset devices_subsys = { | ||
226 | .kobj = { | ||
227 | .name = "devices", | ||
228 | }, | ||
229 | .ktype = &ktype_devices, | ||
230 | .uevent_ops = &device_uevent_ops, | ||
231 | }; | ||
232 | |||
233 | |||
234 | The objects that are registered with a subsystem that use the | ||
235 | subsystem's default list must have their kset ptr set properly. These | ||
236 | objects may have embedded kobjects or ksets. The | ||
237 | following helpers make setting the kset easier: | ||
238 | |||
239 | |||
240 | kobj_set_kset_s(obj,subsys) | ||
241 | |||
242 | - Assumes that obj->kobj exists, and is a struct kobject. | ||
243 | - Sets the kset of that kobject to the kset <subsys>. | ||
244 | |||
245 | |||
246 | kset_set_kset_s(obj,subsys) | ||
247 | |||
248 | - Assumes that obj->kset exists, and is a struct kset. | ||
249 | - Sets the kset of the embedded kobject to the kset <subsys>. | ||
250 | |||
251 | subsys_set_kset(obj,subsys) | ||
252 | |||
253 | - Assumes obj->subsys exists, and is a struct subsystem. | ||
254 | - Sets obj->subsys.kset.kobj.kset to the subsystem's embedded kset. | ||
255 | |||
256 | void subsystem_init(struct kset *s); | ||
257 | int subsystem_register(struct kset *s); | ||
258 | void subsystem_unregister(struct kset *s); | ||
259 | struct kset *subsys_get(struct kset *s); | ||
260 | void kset_put(struct kset *s); | ||
261 | |||
262 | These are just wrappers around the respective kset_* functions. | ||
212 | 263 | ||
213 | 2.3 sysfs | 264 | 2.3 sysfs |
214 | 265 | ||
@@ -254,114 +305,3 @@ Instances of struct kobj_type are not registered; only referenced by | |||
254 | the kset. A kobj_type may be referenced by an arbitrary number of | 305 | the kset. A kobj_type may be referenced by an arbitrary number of |
255 | ksets, as there may be disparate sets of identical objects. | 306 | ksets, as there may be disparate sets of identical objects. |
256 | 307 | ||
257 | |||
258 | |||
259 | 4. subsystems | ||
260 | |||
261 | 4.1 Description | ||
262 | |||
263 | A subsystem represents a significant entity of code that maintains an | ||
264 | arbitrary number of sets of objects of various types. Since the number | ||
265 | of ksets and the type of objects they contain are variable, a | ||
266 | generic representation of a subsystem is minimal. | ||
267 | |||
268 | |||
269 | struct subsystem { | ||
270 | struct kset kset; | ||
271 | struct rw_semaphore rwsem; | ||
272 | }; | ||
273 | |||
274 | int subsystem_register(struct subsystem *); | ||
275 | void subsystem_unregister(struct subsystem *); | ||
276 | |||
277 | struct subsystem * subsys_get(struct subsystem * s); | ||
278 | void subsys_put(struct subsystem * s); | ||
279 | |||
280 | |||
281 | A subsystem contains an embedded kset so: | ||
282 | |||
283 | - It can be represented in the object hierarchy via the kset's | ||
284 | embedded kobject. | ||
285 | |||
286 | - It can maintain a default list of objects of one type. | ||
287 | |||
288 | Additional ksets may attach to the subsystem simply by referencing the | ||
289 | subsystem before they are registered. (This one-way reference means | ||
290 | that there is no way to determine the ksets that are attached to the | ||
291 | subsystem.) | ||
292 | |||
293 | All ksets that are attached to a subsystem share the subsystem's R/W | ||
294 | semaphore. | ||
295 | |||
296 | |||
297 | 4.2 subsystem Programming Interface. | ||
298 | |||
299 | The subsystem programming interface is simple and does not offer the | ||
300 | flexibility that the kset and kobject programming interfaces do. They | ||
301 | may be registered and unregistered, as well as reference counted. Each | ||
302 | call forwards the calls to their embedded ksets (which forward the | ||
303 | calls to their embedded kobjects). | ||
304 | |||
305 | |||
306 | 4.3 Helpers | ||
307 | |||
308 | A number of macros are available to make dealing with subsystems and | ||
309 | their embedded objects easier. | ||
310 | |||
311 | |||
312 | decl_subsys(name,type) | ||
313 | |||
314 | Declares a subsystem named '<name>_subsys', with an embedded kset of | ||
315 | type <type>. For example, | ||
316 | |||
317 | decl_subsys(devices,&ktype_devices); | ||
318 | |||
319 | is equivalent to doing: | ||
320 | |||
321 | struct subsystem device_subsys = { | ||
322 | .kset = { | ||
323 | .kobj = { | ||
324 | .name = "devices", | ||
325 | }, | ||
326 | .ktype = &ktype_devices, | ||
327 | } | ||
328 | }; | ||
329 | |||
330 | |||
331 | The objects that are registered with a subsystem that use the | ||
332 | subsystem's default list must have their kset ptr set properly. These | ||
333 | objects may have embedded kobjects, ksets, or other subsystems. The | ||
334 | following helpers make setting the kset easier: | ||
335 | |||
336 | |||
337 | kobj_set_kset_s(obj,subsys) | ||
338 | |||
339 | - Assumes that obj->kobj exists, and is a struct kobject. | ||
340 | - Sets the kset of that kobject to the subsystem's embedded kset. | ||
341 | |||
342 | |||
343 | kset_set_kset_s(obj,subsys) | ||
344 | |||
345 | - Assumes that obj->kset exists, and is a struct kset. | ||
346 | - Sets the kset of the embedded kobject to the subsystem's | ||
347 | embedded kset. | ||
348 | |||
349 | subsys_set_kset(obj,subsys) | ||
350 | |||
351 | - Assumes obj->subsys exists, and is a struct subsystem. | ||
352 | - Sets obj->subsys.kset.kobj.kset to the subsystem's embedded kset. | ||
353 | |||
354 | |||
355 | 4.4 sysfs | ||
356 | |||
357 | subsystems are represented in sysfs via their embedded kobjects. They | ||
358 | follow the same rules as previously mentioned with no exceptions. They | ||
359 | typically receive a top-level directory in sysfs, except when their | ||
360 | embedded kobject is part of another kset, or the parent of the | ||
361 | embedded kobject is explicitly set. | ||
362 | |||
363 | Note that the subsystem's embedded kset must be 'attached' to the | ||
364 | subsystem itself in order to use its rwsem. This is done after | ||
365 | kset_add() has been called. (Not before, because kset_add() uses its | ||
366 | subsystem for a default parent if it doesn't already have one). | ||
367 | |||
diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile index b9b9427376e9..31e794ef5f98 100644 --- a/Documentation/lguest/Makefile +++ b/Documentation/lguest/Makefile | |||
@@ -11,8 +11,7 @@ endif | |||
11 | include $(KBUILD_OUTPUT)/.config | 11 | include $(KBUILD_OUTPUT)/.config |
12 | LGUEST_GUEST_TOP := ($(CONFIG_PAGE_OFFSET) - 0x08000000) | 12 | LGUEST_GUEST_TOP := ($(CONFIG_PAGE_OFFSET) - 0x08000000) |
13 | 13 | ||
14 | CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 \ | 14 | CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -Wl,-T,lguest.lds |
15 | -static -DLGUEST_GUEST_TOP="$(LGUEST_GUEST_TOP)" -Wl,-T,lguest.lds | ||
16 | LDLIBS:=-lz | 15 | LDLIBS:=-lz |
17 | 16 | ||
18 | all: lguest.lds lguest | 17 | all: lguest.lds lguest |
diff --git a/Documentation/lguest/extract b/Documentation/lguest/extract new file mode 100644 index 000000000000..7730bb6e4b94 --- /dev/null +++ b/Documentation/lguest/extract | |||
@@ -0,0 +1,58 @@ | |||
1 | #! /bin/sh | ||
2 | |||
3 | set -e | ||
4 | |||
5 | PREFIX=$1 | ||
6 | shift | ||
7 | |||
8 | trap 'rm -r $TMPDIR' 0 | ||
9 | TMPDIR=`mktemp -d` | ||
10 | |||
11 | exec 3>/dev/null | ||
12 | for f; do | ||
13 | while IFS=" | ||
14 | " read -r LINE; do | ||
15 | case "$LINE" in | ||
16 | *$PREFIX:[0-9]*:\**) | ||
17 | NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"` | ||
18 | if [ -f $TMPDIR/$NUM ]; then | ||
19 | echo "$TMPDIR/$NUM already exits prior to $f" | ||
20 | exit 1 | ||
21 | fi | ||
22 | exec 3>>$TMPDIR/$NUM | ||
23 | echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM | ||
24 | /bin/echo "$LINE" | sed -e "s/$PREFIX:[0-9]*//" -e "s/:\*/*/" >&3 | ||
25 | ;; | ||
26 | *$PREFIX:[0-9]*) | ||
27 | NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"` | ||
28 | if [ -f $TMPDIR/$NUM ]; then | ||
29 | echo "$TMPDIR/$NUM already exits prior to $f" | ||
30 | exit 1 | ||
31 | fi | ||
32 | exec 3>>$TMPDIR/$NUM | ||
33 | echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM | ||
34 | /bin/echo "$LINE" | sed "s/$PREFIX:[0-9]*//" >&3 | ||
35 | ;; | ||
36 | *:\**) | ||
37 | /bin/echo "$LINE" | sed -e "s/:\*/*/" -e "s,/\*\*/,," >&3 | ||
38 | echo >&3 | ||
39 | exec 3>/dev/null | ||
40 | ;; | ||
41 | *) | ||
42 | /bin/echo "$LINE" >&3 | ||
43 | ;; | ||
44 | esac | ||
45 | done < $f | ||
46 | echo >&3 | ||
47 | exec 3>/dev/null | ||
48 | done | ||
49 | |||
50 | LASTFILE="" | ||
51 | for f in $TMPDIR/*; do | ||
52 | if [ "$LASTFILE" != $(cat $TMPDIR/.$(basename $f) ) ]; then | ||
53 | LASTFILE=$(cat $TMPDIR/.$(basename $f) ) | ||
54 | echo "[ $LASTFILE ]" | ||
55 | fi | ||
56 | cat $f | ||
57 | done | ||
58 | |||
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 1432b502a2d9..f7918401a007 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -1,5 +1,10 @@ | |||
1 | /* Simple program to layout "physical" memory for new lguest guest. | 1 | /*P:100 This is the Launcher code, a simple program which lays out the |
2 | * Linked high to avoid likely physical memory. */ | 2 | * "physical" memory for the new Guest by mapping the kernel image and the |
3 | * virtual devices, then reads repeatedly from /dev/lguest to run the Guest. | ||
4 | * | ||
5 | * The only trick: the Makefile links it at a high address so it will be clear | ||
6 | * of the guest memory region. It means that each Guest cannot have more than | ||
7 | * about 2.5G of memory on a normally configured Host. :*/ | ||
3 | #define _LARGEFILE64_SOURCE | 8 | #define _LARGEFILE64_SOURCE |
4 | #define _GNU_SOURCE | 9 | #define _GNU_SOURCE |
5 | #include <stdio.h> | 10 | #include <stdio.h> |
@@ -29,12 +34,20 @@ | |||
29 | #include <termios.h> | 34 | #include <termios.h> |
30 | #include <getopt.h> | 35 | #include <getopt.h> |
31 | #include <zlib.h> | 36 | #include <zlib.h> |
37 | /*L:110 We can ignore the 28 include files we need for this program, but I do | ||
38 | * want to draw attention to the use of kernel-style types. | ||
39 | * | ||
40 | * As Linus said, "C is a Spartan language, and so should your naming be." I | ||
41 | * like these abbreviations and the header we need uses them, so we define them | ||
42 | * here. | ||
43 | */ | ||
32 | typedef unsigned long long u64; | 44 | typedef unsigned long long u64; |
33 | typedef uint32_t u32; | 45 | typedef uint32_t u32; |
34 | typedef uint16_t u16; | 46 | typedef uint16_t u16; |
35 | typedef uint8_t u8; | 47 | typedef uint8_t u8; |
36 | #include "../../include/linux/lguest_launcher.h" | 48 | #include "../../include/linux/lguest_launcher.h" |
37 | #include "../../include/asm-i386/e820.h" | 49 | #include "../../include/asm-i386/e820.h" |
50 | /*:*/ | ||
38 | 51 | ||
39 | #define PAGE_PRESENT 0x7 /* Present, RW, Execute */ | 52 | #define PAGE_PRESENT 0x7 /* Present, RW, Execute */ |
40 | #define NET_PEERNUM 1 | 53 | #define NET_PEERNUM 1 |
@@ -43,31 +56,52 @@ typedef uint8_t u8; | |||
43 | #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ | 56 | #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ |
44 | #endif | 57 | #endif |
45 | 58 | ||
59 | /*L:120 verbose is both a global flag and a macro. The C preprocessor allows | ||
60 | * this, and although I wouldn't recommend it, it works quite nicely here. */ | ||
46 | static bool verbose; | 61 | static bool verbose; |
47 | #define verbose(args...) \ | 62 | #define verbose(args...) \ |
48 | do { if (verbose) printf(args); } while(0) | 63 | do { if (verbose) printf(args); } while(0) |
64 | /*:*/ | ||
65 | |||
66 | /* The pipe to send commands to the waker process */ | ||
49 | static int waker_fd; | 67 | static int waker_fd; |
68 | /* The top of guest physical memory. */ | ||
69 | static u32 top; | ||
50 | 70 | ||
71 | /* This is our list of devices. */ | ||
51 | struct device_list | 72 | struct device_list |
52 | { | 73 | { |
74 | /* Summary information about the devices in our list: ready to pass to | ||
75 | * select() to ask which need servicing.*/ | ||
53 | fd_set infds; | 76 | fd_set infds; |
54 | int max_infd; | 77 | int max_infd; |
55 | 78 | ||
79 | /* The descriptor page for the devices. */ | ||
80 | struct lguest_device_desc *descs; | ||
81 | |||
82 | /* A single linked list of devices. */ | ||
56 | struct device *dev; | 83 | struct device *dev; |
84 | /* ... And an end pointer so we can easily append new devices */ | ||
57 | struct device **lastdev; | 85 | struct device **lastdev; |
58 | }; | 86 | }; |
59 | 87 | ||
88 | /* The device structure describes a single device. */ | ||
60 | struct device | 89 | struct device |
61 | { | 90 | { |
91 | /* The linked-list pointer. */ | ||
62 | struct device *next; | 92 | struct device *next; |
93 | /* The descriptor for this device, as mapped into the Guest. */ | ||
63 | struct lguest_device_desc *desc; | 94 | struct lguest_device_desc *desc; |
95 | /* The memory page(s) of this device, if any. Also mapped in Guest. */ | ||
64 | void *mem; | 96 | void *mem; |
65 | 97 | ||
66 | /* Watch this fd if handle_input non-NULL. */ | 98 | /* If handle_input is set, it wants to be called when this file |
99 | * descriptor is ready. */ | ||
67 | int fd; | 100 | int fd; |
68 | bool (*handle_input)(int fd, struct device *me); | 101 | bool (*handle_input)(int fd, struct device *me); |
69 | 102 | ||
70 | /* Watch DMA to this key if handle_input non-NULL. */ | 103 | /* If handle_output is set, it wants to be called when the Guest sends |
104 | * DMA to this key. */ | ||
71 | unsigned long watch_key; | 105 | unsigned long watch_key; |
72 | u32 (*handle_output)(int fd, const struct iovec *iov, | 106 | u32 (*handle_output)(int fd, const struct iovec *iov, |
73 | unsigned int num, struct device *me); | 107 | unsigned int num, struct device *me); |
@@ -76,6 +110,11 @@ struct device | |||
76 | void *priv; | 110 | void *priv; |
77 | }; | 111 | }; |
78 | 112 | ||
113 | /*L:130 | ||
114 | * Loading the Kernel. | ||
115 | * | ||
116 | * We start with couple of simple helper routines. open_or_die() avoids | ||
117 | * error-checking code cluttering the callers: */ | ||
79 | static int open_or_die(const char *name, int flags) | 118 | static int open_or_die(const char *name, int flags) |
80 | { | 119 | { |
81 | int fd = open(name, flags); | 120 | int fd = open(name, flags); |
@@ -84,26 +123,38 @@ static int open_or_die(const char *name, int flags) | |||
84 | return fd; | 123 | return fd; |
85 | } | 124 | } |
86 | 125 | ||
126 | /* map_zeroed_pages() takes a (page-aligned) address and a number of pages. */ | ||
87 | static void *map_zeroed_pages(unsigned long addr, unsigned int num) | 127 | static void *map_zeroed_pages(unsigned long addr, unsigned int num) |
88 | { | 128 | { |
129 | /* We cache the /dev/zero file-descriptor so we only open it once. */ | ||
89 | static int fd = -1; | 130 | static int fd = -1; |
90 | 131 | ||
91 | if (fd == -1) | 132 | if (fd == -1) |
92 | fd = open_or_die("/dev/zero", O_RDONLY); | 133 | fd = open_or_die("/dev/zero", O_RDONLY); |
93 | 134 | ||
135 | /* We use a private mapping (ie. if we write to the page, it will be | ||
136 | * copied), and obviously we insist that it be mapped where we ask. */ | ||
94 | if (mmap((void *)addr, getpagesize() * num, | 137 | if (mmap((void *)addr, getpagesize() * num, |
95 | PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, fd, 0) | 138 | PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, fd, 0) |
96 | != (void *)addr) | 139 | != (void *)addr) |
97 | err(1, "Mmaping %u pages of /dev/zero @%p", num, (void *)addr); | 140 | err(1, "Mmaping %u pages of /dev/zero @%p", num, (void *)addr); |
141 | |||
142 | /* Returning the address is just a courtesy: can simplify callers. */ | ||
98 | return (void *)addr; | 143 | return (void *)addr; |
99 | } | 144 | } |
100 | 145 | ||
101 | /* Find magic string marking entry point, return entry point. */ | 146 | /* To find out where to start we look for the magic Guest string, which marks |
147 | * the code we see in lguest_asm.S. This is a hack which we are currently | ||
148 | * plotting to replace with the normal Linux entry point. */ | ||
102 | static unsigned long entry_point(void *start, void *end, | 149 | static unsigned long entry_point(void *start, void *end, |
103 | unsigned long page_offset) | 150 | unsigned long page_offset) |
104 | { | 151 | { |
105 | void *p; | 152 | void *p; |
106 | 153 | ||
154 | /* The scan gives us the physical starting address. We want the | ||
155 | * virtual address in this case, and fortunately, we already figured | ||
156 | * out the physical-virtual difference and passed it here in | ||
157 | * "page_offset". */ | ||
107 | for (p = start; p < end; p++) | 158 | for (p = start; p < end; p++) |
108 | if (memcmp(p, "GenuineLguest", strlen("GenuineLguest")) == 0) | 159 | if (memcmp(p, "GenuineLguest", strlen("GenuineLguest")) == 0) |
109 | return (long)p + strlen("GenuineLguest") + page_offset; | 160 | return (long)p + strlen("GenuineLguest") + page_offset; |
@@ -111,7 +162,17 @@ static unsigned long entry_point(void *start, void *end, | |||
111 | err(1, "Is this image a genuine lguest?"); | 162 | err(1, "Is this image a genuine lguest?"); |
112 | } | 163 | } |
113 | 164 | ||
114 | /* Returns the entry point */ | 165 | /* This routine takes an open vmlinux image, which is in ELF, and maps it into |
166 | * the Guest memory. ELF = Embedded Linking Format, which is the format used | ||
167 | * by all modern binaries on Linux including the kernel. | ||
168 | * | ||
169 | * The ELF headers give *two* addresses: a physical address, and a virtual | ||
170 | * address. The Guest kernel expects to be placed in memory at the physical | ||
171 | * address, and the page tables set up so it will correspond to that virtual | ||
172 | * address. We return the difference between the virtual and physical | ||
173 | * addresses in the "page_offset" pointer. | ||
174 | * | ||
175 | * We return the starting address. */ | ||
115 | static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, | 176 | static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, |
116 | unsigned long *page_offset) | 177 | unsigned long *page_offset) |
117 | { | 178 | { |
@@ -120,40 +181,61 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, | |||
120 | unsigned int i; | 181 | unsigned int i; |
121 | unsigned long start = -1UL, end = 0; | 182 | unsigned long start = -1UL, end = 0; |
122 | 183 | ||
123 | /* Sanity checks. */ | 184 | /* Sanity checks on the main ELF header: an x86 executable with a |
185 | * reasonable number of correctly-sized program headers. */ | ||
124 | if (ehdr->e_type != ET_EXEC | 186 | if (ehdr->e_type != ET_EXEC |
125 | || ehdr->e_machine != EM_386 | 187 | || ehdr->e_machine != EM_386 |
126 | || ehdr->e_phentsize != sizeof(Elf32_Phdr) | 188 | || ehdr->e_phentsize != sizeof(Elf32_Phdr) |
127 | || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr)) | 189 | || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr)) |
128 | errx(1, "Malformed elf header"); | 190 | errx(1, "Malformed elf header"); |
129 | 191 | ||
192 | /* An ELF executable contains an ELF header and a number of "program" | ||
193 | * headers which indicate which parts ("segments") of the program to | ||
194 | * load where. */ | ||
195 | |||
196 | /* We read in all the program headers at once: */ | ||
130 | if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0) | 197 | if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0) |
131 | err(1, "Seeking to program headers"); | 198 | err(1, "Seeking to program headers"); |
132 | if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) | 199 | if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) |
133 | err(1, "Reading program headers"); | 200 | err(1, "Reading program headers"); |
134 | 201 | ||
202 | /* We don't know page_offset yet. */ | ||
135 | *page_offset = 0; | 203 | *page_offset = 0; |
136 | /* We map the loadable segments at virtual addresses corresponding | 204 | |
137 | * to their physical addresses (our virtual == guest physical). */ | 205 | /* Try all the headers: there are usually only three. A read-only one, |
206 | * a read-write one, and a "note" section which isn't loadable. */ | ||
138 | for (i = 0; i < ehdr->e_phnum; i++) { | 207 | for (i = 0; i < ehdr->e_phnum; i++) { |
208 | /* If this isn't a loadable segment, we ignore it */ | ||
139 | if (phdr[i].p_type != PT_LOAD) | 209 | if (phdr[i].p_type != PT_LOAD) |
140 | continue; | 210 | continue; |
141 | 211 | ||
142 | verbose("Section %i: size %i addr %p\n", | 212 | verbose("Section %i: size %i addr %p\n", |
143 | i, phdr[i].p_memsz, (void *)phdr[i].p_paddr); | 213 | i, phdr[i].p_memsz, (void *)phdr[i].p_paddr); |
144 | 214 | ||
145 | /* We expect linear address space. */ | 215 | /* We expect a simple linear address space: every segment must |
216 | * have the same difference between virtual (p_vaddr) and | ||
217 | * physical (p_paddr) address. */ | ||
146 | if (!*page_offset) | 218 | if (!*page_offset) |
147 | *page_offset = phdr[i].p_vaddr - phdr[i].p_paddr; | 219 | *page_offset = phdr[i].p_vaddr - phdr[i].p_paddr; |
148 | else if (*page_offset != phdr[i].p_vaddr - phdr[i].p_paddr) | 220 | else if (*page_offset != phdr[i].p_vaddr - phdr[i].p_paddr) |
149 | errx(1, "Page offset of section %i different", i); | 221 | errx(1, "Page offset of section %i different", i); |
150 | 222 | ||
223 | /* We track the first and last address we mapped, so we can | ||
224 | * tell entry_point() where to scan. */ | ||
151 | if (phdr[i].p_paddr < start) | 225 | if (phdr[i].p_paddr < start) |
152 | start = phdr[i].p_paddr; | 226 | start = phdr[i].p_paddr; |
153 | if (phdr[i].p_paddr + phdr[i].p_filesz > end) | 227 | if (phdr[i].p_paddr + phdr[i].p_filesz > end) |
154 | end = phdr[i].p_paddr + phdr[i].p_filesz; | 228 | end = phdr[i].p_paddr + phdr[i].p_filesz; |
155 | 229 | ||
156 | /* We map everything private, writable. */ | 230 | /* We map this section of the file at its physical address. We |
231 | * map it read & write even if the header says this segment is | ||
232 | * read-only. The kernel really wants to be writable: it | ||
233 | * patches its own instructions which would normally be | ||
234 | * read-only. | ||
235 | * | ||
236 | * MAP_PRIVATE means that the page won't be copied until a | ||
237 | * write is done to it. This allows us to share much of the | ||
238 | * kernel memory between Guests. */ | ||
157 | addr = mmap((void *)phdr[i].p_paddr, | 239 | addr = mmap((void *)phdr[i].p_paddr, |
158 | phdr[i].p_filesz, | 240 | phdr[i].p_filesz, |
159 | PROT_READ|PROT_WRITE|PROT_EXEC, | 241 | PROT_READ|PROT_WRITE|PROT_EXEC, |
@@ -167,7 +249,31 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, | |||
167 | return entry_point((void *)start, (void *)end, *page_offset); | 249 | return entry_point((void *)start, (void *)end, *page_offset); |
168 | } | 250 | } |
169 | 251 | ||
170 | /* This is amazingly reliable. */ | 252 | /*L:170 Prepare to be SHOCKED and AMAZED. And possibly a trifle nauseated. |
253 | * | ||
254 | * We know that CONFIG_PAGE_OFFSET sets what virtual address the kernel expects | ||
255 | * to be. We don't know what that option was, but we can figure it out | ||
256 | * approximately by looking at the addresses in the code. I chose the common | ||
257 | * case of reading a memory location into the %eax register: | ||
258 | * | ||
259 | * movl <some-address>, %eax | ||
260 | * | ||
261 | * This gets encoded as five bytes: "0xA1 <4-byte-address>". For example, | ||
262 | * "0xA1 0x18 0x60 0x47 0xC0" reads the address 0xC0476018 into %eax. | ||
263 | * | ||
264 | * In this example can guess that the kernel was compiled with | ||
265 | * CONFIG_PAGE_OFFSET set to 0xC0000000 (it's always a round number). If the | ||
266 | * kernel were larger than 16MB, we might see 0xC1 addresses show up, but our | ||
267 | * kernel isn't that bloated yet. | ||
268 | * | ||
269 | * Unfortunately, x86 has variable-length instructions, so finding this | ||
270 | * particular instruction properly involves writing a disassembler. Instead, | ||
271 | * we rely on statistics. We look for "0xA1" and tally the different bytes | ||
272 | * which occur 4 bytes later (the "0xC0" in our example above). When one of | ||
273 | * those bytes appears three times, we can be reasonably confident that it | ||
274 | * forms the start of CONFIG_PAGE_OFFSET. | ||
275 | * | ||
276 | * This is amazingly reliable. */ | ||
171 | static unsigned long intuit_page_offset(unsigned char *img, unsigned long len) | 277 | static unsigned long intuit_page_offset(unsigned char *img, unsigned long len) |
172 | { | 278 | { |
173 | unsigned int i, possibilities[256] = { 0 }; | 279 | unsigned int i, possibilities[256] = { 0 }; |
@@ -180,30 +286,52 @@ static unsigned long intuit_page_offset(unsigned char *img, unsigned long len) | |||
180 | errx(1, "could not determine page offset"); | 286 | errx(1, "could not determine page offset"); |
181 | } | 287 | } |
182 | 288 | ||
289 | /*L:160 Unfortunately the entire ELF image isn't compressed: the segments | ||
290 | * which need loading are extracted and compressed raw. This denies us the | ||
291 | * information we need to make a fully-general loader. */ | ||
183 | static unsigned long unpack_bzimage(int fd, unsigned long *page_offset) | 292 | static unsigned long unpack_bzimage(int fd, unsigned long *page_offset) |
184 | { | 293 | { |
185 | gzFile f; | 294 | gzFile f; |
186 | int ret, len = 0; | 295 | int ret, len = 0; |
296 | /* A bzImage always gets loaded at physical address 1M. This is | ||
297 | * actually configurable as CONFIG_PHYSICAL_START, but as the comment | ||
298 | * there says, "Don't change this unless you know what you are doing". | ||
299 | * Indeed. */ | ||
187 | void *img = (void *)0x100000; | 300 | void *img = (void *)0x100000; |
188 | 301 | ||
302 | /* gzdopen takes our file descriptor (carefully placed at the start of | ||
303 | * the GZIP header we found) and returns a gzFile. */ | ||
189 | f = gzdopen(fd, "rb"); | 304 | f = gzdopen(fd, "rb"); |
305 | /* We read it into memory in 64k chunks until we hit the end. */ | ||
190 | while ((ret = gzread(f, img + len, 65536)) > 0) | 306 | while ((ret = gzread(f, img + len, 65536)) > 0) |
191 | len += ret; | 307 | len += ret; |
192 | if (ret < 0) | 308 | if (ret < 0) |
193 | err(1, "reading image from bzImage"); | 309 | err(1, "reading image from bzImage"); |
194 | 310 | ||
195 | verbose("Unpacked size %i addr %p\n", len, img); | 311 | verbose("Unpacked size %i addr %p\n", len, img); |
312 | |||
313 | /* Without the ELF header, we can't tell virtual-physical gap. This is | ||
314 | * CONFIG_PAGE_OFFSET, and people do actually change it. Fortunately, | ||
315 | * I have a clever way of figuring it out from the code itself. */ | ||
196 | *page_offset = intuit_page_offset(img, len); | 316 | *page_offset = intuit_page_offset(img, len); |
197 | 317 | ||
198 | return entry_point(img, img + len, *page_offset); | 318 | return entry_point(img, img + len, *page_offset); |
199 | } | 319 | } |
200 | 320 | ||
321 | /*L:150 A bzImage, unlike an ELF file, is not meant to be loaded. You're | ||
322 | * supposed to jump into it and it will unpack itself. We can't do that | ||
323 | * because the Guest can't run the unpacking code, and adding features to | ||
324 | * lguest kills puppies, so we don't want to. | ||
325 | * | ||
326 | * The bzImage is formed by putting the decompressing code in front of the | ||
327 | * compressed kernel code. So we can simple scan through it looking for the | ||
328 | * first "gzip" header, and start decompressing from there. */ | ||
201 | static unsigned long load_bzimage(int fd, unsigned long *page_offset) | 329 | static unsigned long load_bzimage(int fd, unsigned long *page_offset) |
202 | { | 330 | { |
203 | unsigned char c; | 331 | unsigned char c; |
204 | int state = 0; | 332 | int state = 0; |
205 | 333 | ||
206 | /* Ugly brute force search for gzip header. */ | 334 | /* GZIP header is 0x1F 0x8B <method> <flags>... <compressed-by>. */ |
207 | while (read(fd, &c, 1) == 1) { | 335 | while (read(fd, &c, 1) == 1) { |
208 | switch (state) { | 336 | switch (state) { |
209 | case 0: | 337 | case 0: |
@@ -220,8 +348,10 @@ static unsigned long load_bzimage(int fd, unsigned long *page_offset) | |||
220 | state++; | 348 | state++; |
221 | break; | 349 | break; |
222 | case 9: | 350 | case 9: |
351 | /* Seek back to the start of the gzip header. */ | ||
223 | lseek(fd, -10, SEEK_CUR); | 352 | lseek(fd, -10, SEEK_CUR); |
224 | if (c != 0x03) /* Compressed under UNIX. */ | 353 | /* One final check: "compressed under UNIX". */ |
354 | if (c != 0x03) | ||
225 | state = -1; | 355 | state = -1; |
226 | else | 356 | else |
227 | return unpack_bzimage(fd, page_offset); | 357 | return unpack_bzimage(fd, page_offset); |
@@ -230,25 +360,43 @@ static unsigned long load_bzimage(int fd, unsigned long *page_offset) | |||
230 | errx(1, "Could not find kernel in bzImage"); | 360 | errx(1, "Could not find kernel in bzImage"); |
231 | } | 361 | } |
232 | 362 | ||
363 | /*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels | ||
364 | * come wrapped up in the self-decompressing "bzImage" format. With some funky | ||
365 | * coding, we can load those, too. */ | ||
233 | static unsigned long load_kernel(int fd, unsigned long *page_offset) | 366 | static unsigned long load_kernel(int fd, unsigned long *page_offset) |
234 | { | 367 | { |
235 | Elf32_Ehdr hdr; | 368 | Elf32_Ehdr hdr; |
236 | 369 | ||
370 | /* Read in the first few bytes. */ | ||
237 | if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) | 371 | if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) |
238 | err(1, "Reading kernel"); | 372 | err(1, "Reading kernel"); |
239 | 373 | ||
374 | /* If it's an ELF file, it starts with "\177ELF" */ | ||
240 | if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) | 375 | if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) |
241 | return map_elf(fd, &hdr, page_offset); | 376 | return map_elf(fd, &hdr, page_offset); |
242 | 377 | ||
378 | /* Otherwise we assume it's a bzImage, and try to unpack it */ | ||
243 | return load_bzimage(fd, page_offset); | 379 | return load_bzimage(fd, page_offset); |
244 | } | 380 | } |
245 | 381 | ||
382 | /* This is a trivial little helper to align pages. Andi Kleen hated it because | ||
383 | * it calls getpagesize() twice: "it's dumb code." | ||
384 | * | ||
385 | * Kernel guys get really het up about optimization, even when it's not | ||
386 | * necessary. I leave this code as a reaction against that. */ | ||
246 | static inline unsigned long page_align(unsigned long addr) | 387 | static inline unsigned long page_align(unsigned long addr) |
247 | { | 388 | { |
389 | /* Add upwards and truncate downwards. */ | ||
248 | return ((addr + getpagesize()-1) & ~(getpagesize()-1)); | 390 | return ((addr + getpagesize()-1) & ~(getpagesize()-1)); |
249 | } | 391 | } |
250 | 392 | ||
251 | /* initrd gets loaded at top of memory: return length. */ | 393 | /*L:180 An "initial ram disk" is a disk image loaded into memory along with |
394 | * the kernel which the kernel can use to boot from without needing any | ||
395 | * drivers. Most distributions now use this as standard: the initrd contains | ||
396 | * the code to load the appropriate driver modules for the current machine. | ||
397 | * | ||
398 | * Importantly, James Morris works for RedHat, and Fedora uses initrds for its | ||
399 | * kernels. He sent me this (and tells me when I break it). */ | ||
252 | static unsigned long load_initrd(const char *name, unsigned long mem) | 400 | static unsigned long load_initrd(const char *name, unsigned long mem) |
253 | { | 401 | { |
254 | int ifd; | 402 | int ifd; |
@@ -257,21 +405,35 @@ static unsigned long load_initrd(const char *name, unsigned long mem) | |||
257 | void *iaddr; | 405 | void *iaddr; |
258 | 406 | ||
259 | ifd = open_or_die(name, O_RDONLY); | 407 | ifd = open_or_die(name, O_RDONLY); |
408 | /* fstat() is needed to get the file size. */ | ||
260 | if (fstat(ifd, &st) < 0) | 409 | if (fstat(ifd, &st) < 0) |
261 | err(1, "fstat() on initrd '%s'", name); | 410 | err(1, "fstat() on initrd '%s'", name); |
262 | 411 | ||
412 | /* The length needs to be rounded up to a page size: mmap needs the | ||
413 | * address to be page aligned. */ | ||
263 | len = page_align(st.st_size); | 414 | len = page_align(st.st_size); |
415 | /* We map the initrd at the top of memory. */ | ||
264 | iaddr = mmap((void *)mem - len, st.st_size, | 416 | iaddr = mmap((void *)mem - len, st.st_size, |
265 | PROT_READ|PROT_EXEC|PROT_WRITE, | 417 | PROT_READ|PROT_EXEC|PROT_WRITE, |
266 | MAP_FIXED|MAP_PRIVATE, ifd, 0); | 418 | MAP_FIXED|MAP_PRIVATE, ifd, 0); |
267 | if (iaddr != (void *)mem - len) | 419 | if (iaddr != (void *)mem - len) |
268 | err(1, "Mmaping initrd '%s' returned %p not %p", | 420 | err(1, "Mmaping initrd '%s' returned %p not %p", |
269 | name, iaddr, (void *)mem - len); | 421 | name, iaddr, (void *)mem - len); |
422 | /* Once a file is mapped, you can close the file descriptor. It's a | ||
423 | * little odd, but quite useful. */ | ||
270 | close(ifd); | 424 | close(ifd); |
271 | verbose("mapped initrd %s size=%lu @ %p\n", name, st.st_size, iaddr); | 425 | verbose("mapped initrd %s size=%lu @ %p\n", name, st.st_size, iaddr); |
426 | |||
427 | /* We return the initrd size. */ | ||
272 | return len; | 428 | return len; |
273 | } | 429 | } |
274 | 430 | ||
431 | /* Once we know how much memory we have, and the address the Guest kernel | ||
432 | * expects, we can construct simple linear page tables which will get the Guest | ||
433 | * far enough into the boot to create its own. | ||
434 | * | ||
435 | * We lay them out of the way, just below the initrd (which is why we need to | ||
436 | * know its size). */ | ||
275 | static unsigned long setup_pagetables(unsigned long mem, | 437 | static unsigned long setup_pagetables(unsigned long mem, |
276 | unsigned long initrd_size, | 438 | unsigned long initrd_size, |
277 | unsigned long page_offset) | 439 | unsigned long page_offset) |
@@ -280,23 +442,32 @@ static unsigned long setup_pagetables(unsigned long mem, | |||
280 | unsigned int mapped_pages, i, linear_pages; | 442 | unsigned int mapped_pages, i, linear_pages; |
281 | unsigned int ptes_per_page = getpagesize()/sizeof(u32); | 443 | unsigned int ptes_per_page = getpagesize()/sizeof(u32); |
282 | 444 | ||
283 | /* If we can map all of memory above page_offset, we do so. */ | 445 | /* Ideally we map all physical memory starting at page_offset. |
446 | * However, if page_offset is 0xC0000000 we can only map 1G of physical | ||
447 | * (0xC0000000 + 1G overflows). */ | ||
284 | if (mem <= -page_offset) | 448 | if (mem <= -page_offset) |
285 | mapped_pages = mem/getpagesize(); | 449 | mapped_pages = mem/getpagesize(); |
286 | else | 450 | else |
287 | mapped_pages = -page_offset/getpagesize(); | 451 | mapped_pages = -page_offset/getpagesize(); |
288 | 452 | ||
289 | /* Each linear PTE page can map ptes_per_page pages. */ | 453 | /* Each PTE page can map ptes_per_page pages: how many do we need? */ |
290 | linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page; | 454 | linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page; |
291 | 455 | ||
292 | /* We lay out top-level then linear mapping immediately below initrd */ | 456 | /* We put the toplevel page directory page at the top of memory. */ |
293 | pgdir = (void *)mem - initrd_size - getpagesize(); | 457 | pgdir = (void *)mem - initrd_size - getpagesize(); |
458 | |||
459 | /* Now we use the next linear_pages pages as pte pages */ | ||
294 | linear = (void *)pgdir - linear_pages*getpagesize(); | 460 | linear = (void *)pgdir - linear_pages*getpagesize(); |
295 | 461 | ||
462 | /* Linear mapping is easy: put every page's address into the mapping in | ||
463 | * order. PAGE_PRESENT contains the flags Present, Writable and | ||
464 | * Executable. */ | ||
296 | for (i = 0; i < mapped_pages; i++) | 465 | for (i = 0; i < mapped_pages; i++) |
297 | linear[i] = ((i * getpagesize()) | PAGE_PRESENT); | 466 | linear[i] = ((i * getpagesize()) | PAGE_PRESENT); |
298 | 467 | ||
299 | /* Now set up pgd so that this memory is at page_offset */ | 468 | /* The top level points to the linear page table pages above. The |
469 | * entry representing page_offset points to the first one, and they | ||
470 | * continue from there. */ | ||
300 | for (i = 0; i < mapped_pages; i += ptes_per_page) { | 471 | for (i = 0; i < mapped_pages; i += ptes_per_page) { |
301 | pgdir[(i + page_offset/getpagesize())/ptes_per_page] | 472 | pgdir[(i + page_offset/getpagesize())/ptes_per_page] |
302 | = (((u32)linear + i*sizeof(u32)) | PAGE_PRESENT); | 473 | = (((u32)linear + i*sizeof(u32)) | PAGE_PRESENT); |
@@ -305,9 +476,13 @@ static unsigned long setup_pagetables(unsigned long mem, | |||
305 | verbose("Linear mapping of %u pages in %u pte pages at %p\n", | 476 | verbose("Linear mapping of %u pages in %u pte pages at %p\n", |
306 | mapped_pages, linear_pages, linear); | 477 | mapped_pages, linear_pages, linear); |
307 | 478 | ||
479 | /* We return the top level (guest-physical) address: the kernel needs | ||
480 | * to know where it is. */ | ||
308 | return (unsigned long)pgdir; | 481 | return (unsigned long)pgdir; |
309 | } | 482 | } |
310 | 483 | ||
484 | /* Simple routine to roll all the commandline arguments together with spaces | ||
485 | * between them. */ | ||
311 | static void concat(char *dst, char *args[]) | 486 | static void concat(char *dst, char *args[]) |
312 | { | 487 | { |
313 | unsigned int i, len = 0; | 488 | unsigned int i, len = 0; |
@@ -321,18 +496,24 @@ static void concat(char *dst, char *args[]) | |||
321 | dst[len] = '\0'; | 496 | dst[len] = '\0'; |
322 | } | 497 | } |
323 | 498 | ||
499 | /* This is where we actually tell the kernel to initialize the Guest. We saw | ||
500 | * the arguments it expects when we looked at initialize() in lguest_user.c: | ||
501 | * the top physical page to allow, the top level pagetable, the entry point and | ||
502 | * the page_offset constant for the Guest. */ | ||
324 | static int tell_kernel(u32 pgdir, u32 start, u32 page_offset) | 503 | static int tell_kernel(u32 pgdir, u32 start, u32 page_offset) |
325 | { | 504 | { |
326 | u32 args[] = { LHREQ_INITIALIZE, | 505 | u32 args[] = { LHREQ_INITIALIZE, |
327 | LGUEST_GUEST_TOP/getpagesize(), /* Just below us */ | 506 | top/getpagesize(), pgdir, start, page_offset }; |
328 | pgdir, start, page_offset }; | ||
329 | int fd; | 507 | int fd; |
330 | 508 | ||
331 | fd = open_or_die("/dev/lguest", O_RDWR); | 509 | fd = open_or_die("/dev/lguest", O_RDWR); |
332 | if (write(fd, args, sizeof(args)) < 0) | 510 | if (write(fd, args, sizeof(args)) < 0) |
333 | err(1, "Writing to /dev/lguest"); | 511 | err(1, "Writing to /dev/lguest"); |
512 | |||
513 | /* We return the /dev/lguest file descriptor to control this Guest */ | ||
334 | return fd; | 514 | return fd; |
335 | } | 515 | } |
516 | /*:*/ | ||
336 | 517 | ||
337 | static void set_fd(int fd, struct device_list *devices) | 518 | static void set_fd(int fd, struct device_list *devices) |
338 | { | 519 | { |
@@ -341,61 +522,108 @@ static void set_fd(int fd, struct device_list *devices) | |||
341 | devices->max_infd = fd; | 522 | devices->max_infd = fd; |
342 | } | 523 | } |
343 | 524 | ||
344 | /* When input arrives, we tell the kernel to kick lguest out with -EAGAIN. */ | 525 | /*L:200 |
526 | * The Waker. | ||
527 | * | ||
528 | * With a console and network devices, we can have lots of input which we need | ||
529 | * to process. We could try to tell the kernel what file descriptors to watch, | ||
530 | * but handing a file descriptor mask through to the kernel is fairly icky. | ||
531 | * | ||
532 | * Instead, we fork off a process which watches the file descriptors and writes | ||
533 | * the LHREQ_BREAK command to the /dev/lguest filedescriptor to tell the Host | ||
534 | * loop to stop running the Guest. This causes it to return from the | ||
535 | * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset | ||
536 | * the LHREQ_BREAK and wake us up again. | ||
537 | * | ||
538 | * This, of course, is merely a different *kind* of icky. | ||
539 | */ | ||
345 | static void wake_parent(int pipefd, int lguest_fd, struct device_list *devices) | 540 | static void wake_parent(int pipefd, int lguest_fd, struct device_list *devices) |
346 | { | 541 | { |
542 | /* Add the pipe from the Launcher to the fdset in the device_list, so | ||
543 | * we watch it, too. */ | ||
347 | set_fd(pipefd, devices); | 544 | set_fd(pipefd, devices); |
348 | 545 | ||
349 | for (;;) { | 546 | for (;;) { |
350 | fd_set rfds = devices->infds; | 547 | fd_set rfds = devices->infds; |
351 | u32 args[] = { LHREQ_BREAK, 1 }; | 548 | u32 args[] = { LHREQ_BREAK, 1 }; |
352 | 549 | ||
550 | /* Wait until input is ready from one of the devices. */ | ||
353 | select(devices->max_infd+1, &rfds, NULL, NULL, NULL); | 551 | select(devices->max_infd+1, &rfds, NULL, NULL, NULL); |
552 | /* Is it a message from the Launcher? */ | ||
354 | if (FD_ISSET(pipefd, &rfds)) { | 553 | if (FD_ISSET(pipefd, &rfds)) { |
355 | int ignorefd; | 554 | int ignorefd; |
555 | /* If read() returns 0, it means the Launcher has | ||
556 | * exited. We silently follow. */ | ||
356 | if (read(pipefd, &ignorefd, sizeof(ignorefd)) == 0) | 557 | if (read(pipefd, &ignorefd, sizeof(ignorefd)) == 0) |
357 | exit(0); | 558 | exit(0); |
559 | /* Otherwise it's telling us there's a problem with one | ||
560 | * of the devices, and we should ignore that file | ||
561 | * descriptor from now on. */ | ||
358 | FD_CLR(ignorefd, &devices->infds); | 562 | FD_CLR(ignorefd, &devices->infds); |
359 | } else | 563 | } else /* Send LHREQ_BREAK command. */ |
360 | write(lguest_fd, args, sizeof(args)); | 564 | write(lguest_fd, args, sizeof(args)); |
361 | } | 565 | } |
362 | } | 566 | } |
363 | 567 | ||
568 | /* This routine just sets up a pipe to the Waker process. */ | ||
364 | static int setup_waker(int lguest_fd, struct device_list *device_list) | 569 | static int setup_waker(int lguest_fd, struct device_list *device_list) |
365 | { | 570 | { |
366 | int pipefd[2], child; | 571 | int pipefd[2], child; |
367 | 572 | ||
573 | /* We create a pipe to talk to the waker, and also so it knows when the | ||
574 | * Launcher dies (and closes pipe). */ | ||
368 | pipe(pipefd); | 575 | pipe(pipefd); |
369 | child = fork(); | 576 | child = fork(); |
370 | if (child == -1) | 577 | if (child == -1) |
371 | err(1, "forking"); | 578 | err(1, "forking"); |
372 | 579 | ||
373 | if (child == 0) { | 580 | if (child == 0) { |
581 | /* Close the "writing" end of our copy of the pipe */ | ||
374 | close(pipefd[1]); | 582 | close(pipefd[1]); |
375 | wake_parent(pipefd[0], lguest_fd, device_list); | 583 | wake_parent(pipefd[0], lguest_fd, device_list); |
376 | } | 584 | } |
585 | /* Close the reading end of our copy of the pipe. */ | ||
377 | close(pipefd[0]); | 586 | close(pipefd[0]); |
378 | 587 | ||
588 | /* Here is the fd used to talk to the waker. */ | ||
379 | return pipefd[1]; | 589 | return pipefd[1]; |
380 | } | 590 | } |
381 | 591 | ||
592 | /*L:210 | ||
593 | * Device Handling. | ||
594 | * | ||
595 | * When the Guest sends DMA to us, it sends us an array of addresses and sizes. | ||
596 | * We need to make sure it's not trying to reach into the Launcher itself, so | ||
597 | * we have a convenient routine which check it and exits with an error message | ||
598 | * if something funny is going on: | ||
599 | */ | ||
382 | static void *_check_pointer(unsigned long addr, unsigned int size, | 600 | static void *_check_pointer(unsigned long addr, unsigned int size, |
383 | unsigned int line) | 601 | unsigned int line) |
384 | { | 602 | { |
385 | if (addr >= LGUEST_GUEST_TOP || addr + size >= LGUEST_GUEST_TOP) | 603 | /* We have to separately check addr and addr+size, because size could |
604 | * be huge and addr + size might wrap around. */ | ||
605 | if (addr >= top || addr + size >= top) | ||
386 | errx(1, "%s:%i: Invalid address %li", __FILE__, line, addr); | 606 | errx(1, "%s:%i: Invalid address %li", __FILE__, line, addr); |
607 | /* We return a pointer for the caller's convenience, now we know it's | ||
608 | * safe to use. */ | ||
387 | return (void *)addr; | 609 | return (void *)addr; |
388 | } | 610 | } |
611 | /* A macro which transparently hands the line number to the real function. */ | ||
389 | #define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) | 612 | #define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) |
390 | 613 | ||
391 | /* Returns pointer to dma->used_len */ | 614 | /* The Guest has given us the address of a "struct lguest_dma". We check it's |
615 | * OK and convert it to an iovec (which is a simple array of ptr/size | ||
616 | * pairs). */ | ||
392 | static u32 *dma2iov(unsigned long dma, struct iovec iov[], unsigned *num) | 617 | static u32 *dma2iov(unsigned long dma, struct iovec iov[], unsigned *num) |
393 | { | 618 | { |
394 | unsigned int i; | 619 | unsigned int i; |
395 | struct lguest_dma *udma; | 620 | struct lguest_dma *udma; |
396 | 621 | ||
622 | /* First we make sure that the array memory itself is valid. */ | ||
397 | udma = check_pointer(dma, sizeof(*udma)); | 623 | udma = check_pointer(dma, sizeof(*udma)); |
624 | /* Now we check each element */ | ||
398 | for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { | 625 | for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { |
626 | /* A zero length ends the array. */ | ||
399 | if (!udma->len[i]) | 627 | if (!udma->len[i]) |
400 | break; | 628 | break; |
401 | 629 | ||
@@ -403,9 +631,15 @@ static u32 *dma2iov(unsigned long dma, struct iovec iov[], unsigned *num) | |||
403 | iov[i].iov_len = udma->len[i]; | 631 | iov[i].iov_len = udma->len[i]; |
404 | } | 632 | } |
405 | *num = i; | 633 | *num = i; |
634 | |||
635 | /* We return the pointer to where the caller should write the amount of | ||
636 | * the buffer used. */ | ||
406 | return &udma->used_len; | 637 | return &udma->used_len; |
407 | } | 638 | } |
408 | 639 | ||
640 | /* This routine gets a DMA buffer from the Guest for a given key, and converts | ||
641 | * it to an iovec array. It returns the interrupt the Guest wants when we're | ||
642 | * finished, and a pointer to the "used_len" field to fill in. */ | ||
409 | static u32 *get_dma_buffer(int fd, void *key, | 643 | static u32 *get_dma_buffer(int fd, void *key, |
410 | struct iovec iov[], unsigned int *num, u32 *irq) | 644 | struct iovec iov[], unsigned int *num, u32 *irq) |
411 | { | 645 | { |
@@ -413,16 +647,21 @@ static u32 *get_dma_buffer(int fd, void *key, | |||
413 | unsigned long udma; | 647 | unsigned long udma; |
414 | u32 *res; | 648 | u32 *res; |
415 | 649 | ||
650 | /* Ask the kernel for a DMA buffer corresponding to this key. */ | ||
416 | udma = write(fd, buf, sizeof(buf)); | 651 | udma = write(fd, buf, sizeof(buf)); |
652 | /* They haven't registered any, or they're all used? */ | ||
417 | if (udma == (unsigned long)-1) | 653 | if (udma == (unsigned long)-1) |
418 | return NULL; | 654 | return NULL; |
419 | 655 | ||
420 | /* Kernel stashes irq in ->used_len. */ | 656 | /* Convert it into our iovec array */ |
421 | res = dma2iov(udma, iov, num); | 657 | res = dma2iov(udma, iov, num); |
658 | /* The kernel stashes irq in ->used_len to get it out to us. */ | ||
422 | *irq = *res; | 659 | *irq = *res; |
660 | /* Return a pointer to ((struct lguest_dma *)udma)->used_len. */ | ||
423 | return res; | 661 | return res; |
424 | } | 662 | } |
425 | 663 | ||
664 | /* This is a convenient routine to send the Guest an interrupt. */ | ||
426 | static void trigger_irq(int fd, u32 irq) | 665 | static void trigger_irq(int fd, u32 irq) |
427 | { | 666 | { |
428 | u32 buf[] = { LHREQ_IRQ, irq }; | 667 | u32 buf[] = { LHREQ_IRQ, irq }; |
@@ -430,6 +669,10 @@ static void trigger_irq(int fd, u32 irq) | |||
430 | err(1, "Triggering irq %i", irq); | 669 | err(1, "Triggering irq %i", irq); |
431 | } | 670 | } |
432 | 671 | ||
672 | /* This simply sets up an iovec array where we can put data to be discarded. | ||
673 | * This happens when the Guest doesn't want or can't handle the input: we have | ||
674 | * to get rid of it somewhere, and if we bury it in the ceiling space it will | ||
675 | * start to smell after a week. */ | ||
433 | static void discard_iovec(struct iovec *iov, unsigned int *num) | 676 | static void discard_iovec(struct iovec *iov, unsigned int *num) |
434 | { | 677 | { |
435 | static char discard_buf[1024]; | 678 | static char discard_buf[1024]; |
@@ -438,19 +681,24 @@ static void discard_iovec(struct iovec *iov, unsigned int *num) | |||
438 | iov->iov_len = sizeof(discard_buf); | 681 | iov->iov_len = sizeof(discard_buf); |
439 | } | 682 | } |
440 | 683 | ||
684 | /* Here is the input terminal setting we save, and the routine to restore them | ||
685 | * on exit so the user can see what they type next. */ | ||
441 | static struct termios orig_term; | 686 | static struct termios orig_term; |
442 | static void restore_term(void) | 687 | static void restore_term(void) |
443 | { | 688 | { |
444 | tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); | 689 | tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); |
445 | } | 690 | } |
446 | 691 | ||
692 | /* We associate some data with the console for our exit hack. */ | ||
447 | struct console_abort | 693 | struct console_abort |
448 | { | 694 | { |
695 | /* How many times have they hit ^C? */ | ||
449 | int count; | 696 | int count; |
697 | /* When did they start? */ | ||
450 | struct timeval start; | 698 | struct timeval start; |
451 | }; | 699 | }; |
452 | 700 | ||
453 | /* We DMA input to buffer bound at start of console page. */ | 701 | /* This is the routine which handles console input (ie. stdin). */ |
454 | static bool handle_console_input(int fd, struct device *dev) | 702 | static bool handle_console_input(int fd, struct device *dev) |
455 | { | 703 | { |
456 | u32 irq = 0, *lenp; | 704 | u32 irq = 0, *lenp; |
@@ -459,24 +707,38 @@ static bool handle_console_input(int fd, struct device *dev) | |||
459 | struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; | 707 | struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; |
460 | struct console_abort *abort = dev->priv; | 708 | struct console_abort *abort = dev->priv; |
461 | 709 | ||
710 | /* First we get the console buffer from the Guest. The key is dev->mem | ||
711 | * which was set to 0 in setup_console(). */ | ||
462 | lenp = get_dma_buffer(fd, dev->mem, iov, &num, &irq); | 712 | lenp = get_dma_buffer(fd, dev->mem, iov, &num, &irq); |
463 | if (!lenp) { | 713 | if (!lenp) { |
714 | /* If it's not ready for input, warn and set up to discard. */ | ||
464 | warn("console: no dma buffer!"); | 715 | warn("console: no dma buffer!"); |
465 | discard_iovec(iov, &num); | 716 | discard_iovec(iov, &num); |
466 | } | 717 | } |
467 | 718 | ||
719 | /* This is why we convert to iovecs: the readv() call uses them, and so | ||
720 | * it reads straight into the Guest's buffer. */ | ||
468 | len = readv(dev->fd, iov, num); | 721 | len = readv(dev->fd, iov, num); |
469 | if (len <= 0) { | 722 | if (len <= 0) { |
723 | /* This implies that the console is closed, is /dev/null, or | ||
724 | * something went terribly wrong. We still go through the rest | ||
725 | * of the logic, though, especially the exit handling below. */ | ||
470 | warnx("Failed to get console input, ignoring console."); | 726 | warnx("Failed to get console input, ignoring console."); |
471 | len = 0; | 727 | len = 0; |
472 | } | 728 | } |
473 | 729 | ||
730 | /* If we read the data into the Guest, fill in the length and send the | ||
731 | * interrupt. */ | ||
474 | if (lenp) { | 732 | if (lenp) { |
475 | *lenp = len; | 733 | *lenp = len; |
476 | trigger_irq(fd, irq); | 734 | trigger_irq(fd, irq); |
477 | } | 735 | } |
478 | 736 | ||
479 | /* Three ^C within one second? Exit. */ | 737 | /* Three ^C within one second? Exit. |
738 | * | ||
739 | * This is such a hack, but works surprisingly well. Each ^C has to be | ||
740 | * in a buffer by itself, so they can't be too fast. But we check that | ||
741 | * we get three within about a second, so they can't be too slow. */ | ||
480 | if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) { | 742 | if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) { |
481 | if (!abort->count++) | 743 | if (!abort->count++) |
482 | gettimeofday(&abort->start, NULL); | 744 | gettimeofday(&abort->start, NULL); |
@@ -484,43 +746,60 @@ static bool handle_console_input(int fd, struct device *dev) | |||
484 | struct timeval now; | 746 | struct timeval now; |
485 | gettimeofday(&now, NULL); | 747 | gettimeofday(&now, NULL); |
486 | if (now.tv_sec <= abort->start.tv_sec+1) { | 748 | if (now.tv_sec <= abort->start.tv_sec+1) { |
487 | /* Make sure waker is not blocked in BREAK */ | ||
488 | u32 args[] = { LHREQ_BREAK, 0 }; | 749 | u32 args[] = { LHREQ_BREAK, 0 }; |
750 | /* Close the fd so Waker will know it has to | ||
751 | * exit. */ | ||
489 | close(waker_fd); | 752 | close(waker_fd); |
753 | /* Just in case waker is blocked in BREAK, send | ||
754 | * unbreak now. */ | ||
490 | write(fd, args, sizeof(args)); | 755 | write(fd, args, sizeof(args)); |
491 | exit(2); | 756 | exit(2); |
492 | } | 757 | } |
493 | abort->count = 0; | 758 | abort->count = 0; |
494 | } | 759 | } |
495 | } else | 760 | } else |
761 | /* Any other key resets the abort counter. */ | ||
496 | abort->count = 0; | 762 | abort->count = 0; |
497 | 763 | ||
764 | /* Now, if we didn't read anything, put the input terminal back and | ||
765 | * return failure (meaning, don't call us again). */ | ||
498 | if (!len) { | 766 | if (!len) { |
499 | restore_term(); | 767 | restore_term(); |
500 | return false; | 768 | return false; |
501 | } | 769 | } |
770 | /* Everything went OK! */ | ||
502 | return true; | 771 | return true; |
503 | } | 772 | } |
504 | 773 | ||
774 | /* Handling console output is much simpler than input. */ | ||
505 | static u32 handle_console_output(int fd, const struct iovec *iov, | 775 | static u32 handle_console_output(int fd, const struct iovec *iov, |
506 | unsigned num, struct device*dev) | 776 | unsigned num, struct device*dev) |
507 | { | 777 | { |
778 | /* Whatever the Guest sends, write it to standard output. Return the | ||
779 | * number of bytes written. */ | ||
508 | return writev(STDOUT_FILENO, iov, num); | 780 | return writev(STDOUT_FILENO, iov, num); |
509 | } | 781 | } |
510 | 782 | ||
783 | /* Guest->Host network output is also pretty easy. */ | ||
511 | static u32 handle_tun_output(int fd, const struct iovec *iov, | 784 | static u32 handle_tun_output(int fd, const struct iovec *iov, |
512 | unsigned num, struct device *dev) | 785 | unsigned num, struct device *dev) |
513 | { | 786 | { |
514 | /* Now we've seen output, we should warn if we can't get buffers. */ | 787 | /* We put a flag in the "priv" pointer of the network device, and set |
788 | * it as soon as we see output. We'll see why in handle_tun_input() */ | ||
515 | *(bool *)dev->priv = true; | 789 | *(bool *)dev->priv = true; |
790 | /* Whatever packet the Guest sent us, write it out to the tun | ||
791 | * device. */ | ||
516 | return writev(dev->fd, iov, num); | 792 | return writev(dev->fd, iov, num); |
517 | } | 793 | } |
518 | 794 | ||
795 | /* This matches the peer_key() in lguest_net.c. The key for any given slot | ||
796 | * is the address of the network device's page plus 4 * the slot number. */ | ||
519 | static unsigned long peer_offset(unsigned int peernum) | 797 | static unsigned long peer_offset(unsigned int peernum) |
520 | { | 798 | { |
521 | return 4 * peernum; | 799 | return 4 * peernum; |
522 | } | 800 | } |
523 | 801 | ||
802 | /* This is where we handle a packet coming in from the tun device */ | ||
524 | static bool handle_tun_input(int fd, struct device *dev) | 803 | static bool handle_tun_input(int fd, struct device *dev) |
525 | { | 804 | { |
526 | u32 irq = 0, *lenp; | 805 | u32 irq = 0, *lenp; |
@@ -528,17 +807,28 @@ static bool handle_tun_input(int fd, struct device *dev) | |||
528 | unsigned num; | 807 | unsigned num; |
529 | struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; | 808 | struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; |
530 | 809 | ||
810 | /* First we get a buffer the Guest has bound to its key. */ | ||
531 | lenp = get_dma_buffer(fd, dev->mem+peer_offset(NET_PEERNUM), iov, &num, | 811 | lenp = get_dma_buffer(fd, dev->mem+peer_offset(NET_PEERNUM), iov, &num, |
532 | &irq); | 812 | &irq); |
533 | if (!lenp) { | 813 | if (!lenp) { |
814 | /* Now, it's expected that if we try to send a packet too | ||
815 | * early, the Guest won't be ready yet. This is why we set a | ||
816 | * flag when the Guest sends its first packet. If it's sent a | ||
817 | * packet we assume it should be ready to receive them. | ||
818 | * | ||
819 | * Actually, this is what the status bits in the descriptor are | ||
820 | * for: we should *use* them. FIXME! */ | ||
534 | if (*(bool *)dev->priv) | 821 | if (*(bool *)dev->priv) |
535 | warn("network: no dma buffer!"); | 822 | warn("network: no dma buffer!"); |
536 | discard_iovec(iov, &num); | 823 | discard_iovec(iov, &num); |
537 | } | 824 | } |
538 | 825 | ||
826 | /* Read the packet from the device directly into the Guest's buffer. */ | ||
539 | len = readv(dev->fd, iov, num); | 827 | len = readv(dev->fd, iov, num); |
540 | if (len <= 0) | 828 | if (len <= 0) |
541 | err(1, "reading network"); | 829 | err(1, "reading network"); |
830 | |||
831 | /* Write the used_len, and trigger the interrupt for the Guest */ | ||
542 | if (lenp) { | 832 | if (lenp) { |
543 | *lenp = len; | 833 | *lenp = len; |
544 | trigger_irq(fd, irq); | 834 | trigger_irq(fd, irq); |
@@ -546,9 +836,13 @@ static bool handle_tun_input(int fd, struct device *dev) | |||
546 | verbose("tun input packet len %i [%02x %02x] (%s)\n", len, | 836 | verbose("tun input packet len %i [%02x %02x] (%s)\n", len, |
547 | ((u8 *)iov[0].iov_base)[0], ((u8 *)iov[0].iov_base)[1], | 837 | ((u8 *)iov[0].iov_base)[0], ((u8 *)iov[0].iov_base)[1], |
548 | lenp ? "sent" : "discarded"); | 838 | lenp ? "sent" : "discarded"); |
839 | /* All good. */ | ||
549 | return true; | 840 | return true; |
550 | } | 841 | } |
551 | 842 | ||
843 | /* The last device handling routine is block output: the Guest has sent a DMA | ||
844 | * to the block device. It will have placed the command it wants in the | ||
845 | * "struct lguest_block_page". */ | ||
552 | static u32 handle_block_output(int fd, const struct iovec *iov, | 846 | static u32 handle_block_output(int fd, const struct iovec *iov, |
553 | unsigned num, struct device *dev) | 847 | unsigned num, struct device *dev) |
554 | { | 848 | { |
@@ -558,36 +852,64 @@ static u32 handle_block_output(int fd, const struct iovec *iov, | |||
558 | struct iovec reply[LGUEST_MAX_DMA_SECTIONS]; | 852 | struct iovec reply[LGUEST_MAX_DMA_SECTIONS]; |
559 | off64_t device_len, off = (off64_t)p->sector * 512; | 853 | off64_t device_len, off = (off64_t)p->sector * 512; |
560 | 854 | ||
855 | /* First we extract the device length from the dev->priv pointer. */ | ||
561 | device_len = *(off64_t *)dev->priv; | 856 | device_len = *(off64_t *)dev->priv; |
562 | 857 | ||
858 | /* We first check that the read or write is within the length of the | ||
859 | * block file. */ | ||
563 | if (off >= device_len) | 860 | if (off >= device_len) |
564 | err(1, "Bad offset %llu vs %llu", off, device_len); | 861 | err(1, "Bad offset %llu vs %llu", off, device_len); |
862 | /* Move to the right location in the block file. This shouldn't fail, | ||
863 | * but best to check. */ | ||
565 | if (lseek64(dev->fd, off, SEEK_SET) != off) | 864 | if (lseek64(dev->fd, off, SEEK_SET) != off) |
566 | err(1, "Bad seek to sector %i", p->sector); | 865 | err(1, "Bad seek to sector %i", p->sector); |
567 | 866 | ||
568 | verbose("Block: %s at offset %llu\n", p->type ? "WRITE" : "READ", off); | 867 | verbose("Block: %s at offset %llu\n", p->type ? "WRITE" : "READ", off); |
569 | 868 | ||
869 | /* They were supposed to bind a reply buffer at key equal to the start | ||
870 | * of the block device memory. We need this to tell them when the | ||
871 | * request is finished. */ | ||
570 | lenp = get_dma_buffer(fd, dev->mem, reply, &reply_num, &irq); | 872 | lenp = get_dma_buffer(fd, dev->mem, reply, &reply_num, &irq); |
571 | if (!lenp) | 873 | if (!lenp) |
572 | err(1, "Block request didn't give us a dma buffer"); | 874 | err(1, "Block request didn't give us a dma buffer"); |
573 | 875 | ||
574 | if (p->type) { | 876 | if (p->type) { |
877 | /* A write request. The DMA they sent contained the data, so | ||
878 | * write it out. */ | ||
575 | len = writev(dev->fd, iov, num); | 879 | len = writev(dev->fd, iov, num); |
880 | /* Grr... Now we know how long the "struct lguest_dma" they | ||
881 | * sent was, we make sure they didn't try to write over the end | ||
882 | * of the block file (possibly extending it). */ | ||
576 | if (off + len > device_len) { | 883 | if (off + len > device_len) { |
884 | /* Trim it back to the correct length */ | ||
577 | ftruncate(dev->fd, device_len); | 885 | ftruncate(dev->fd, device_len); |
886 | /* Die, bad Guest, die. */ | ||
578 | errx(1, "Write past end %llu+%u", off, len); | 887 | errx(1, "Write past end %llu+%u", off, len); |
579 | } | 888 | } |
889 | /* The reply length is 0: we just send back an empty DMA to | ||
890 | * interrupt them and tell them the write is finished. */ | ||
580 | *lenp = 0; | 891 | *lenp = 0; |
581 | } else { | 892 | } else { |
893 | /* A read request. They sent an empty DMA to start the | ||
894 | * request, and we put the read contents into the reply | ||
895 | * buffer. */ | ||
582 | len = readv(dev->fd, reply, reply_num); | 896 | len = readv(dev->fd, reply, reply_num); |
583 | *lenp = len; | 897 | *lenp = len; |
584 | } | 898 | } |
585 | 899 | ||
900 | /* The result is 1 (done), 2 if there was an error (short read or | ||
901 | * write). */ | ||
586 | p->result = 1 + (p->bytes != len); | 902 | p->result = 1 + (p->bytes != len); |
903 | /* Now tell them we've used their reply buffer. */ | ||
587 | trigger_irq(fd, irq); | 904 | trigger_irq(fd, irq); |
905 | |||
906 | /* We're supposed to return the number of bytes of the output buffer we | ||
907 | * used. But the block device uses the "result" field instead, so we | ||
908 | * don't bother. */ | ||
588 | return 0; | 909 | return 0; |
589 | } | 910 | } |
590 | 911 | ||
912 | /* This is the generic routine we call when the Guest sends some DMA out. */ | ||
591 | static void handle_output(int fd, unsigned long dma, unsigned long key, | 913 | static void handle_output(int fd, unsigned long dma, unsigned long key, |
592 | struct device_list *devices) | 914 | struct device_list *devices) |
593 | { | 915 | { |
@@ -596,30 +918,53 @@ static void handle_output(int fd, unsigned long dma, unsigned long key, | |||
596 | struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; | 918 | struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; |
597 | unsigned num = 0; | 919 | unsigned num = 0; |
598 | 920 | ||
921 | /* Convert the "struct lguest_dma" they're sending to a "struct | ||
922 | * iovec". */ | ||
599 | lenp = dma2iov(dma, iov, &num); | 923 | lenp = dma2iov(dma, iov, &num); |
924 | |||
925 | /* Check each device: if they expect output to this key, tell them to | ||
926 | * handle it. */ | ||
600 | for (i = devices->dev; i; i = i->next) { | 927 | for (i = devices->dev; i; i = i->next) { |
601 | if (i->handle_output && key == i->watch_key) { | 928 | if (i->handle_output && key == i->watch_key) { |
929 | /* We write the result straight into the used_len field | ||
930 | * for them. */ | ||
602 | *lenp = i->handle_output(fd, iov, num, i); | 931 | *lenp = i->handle_output(fd, iov, num, i); |
603 | return; | 932 | return; |
604 | } | 933 | } |
605 | } | 934 | } |
935 | |||
936 | /* This can happen: the kernel sends any SEND_DMA which doesn't match | ||
937 | * another Guest to us. It could be that another Guest just left a | ||
938 | * network, for example. But it's unusual. */ | ||
606 | warnx("Pending dma %p, key %p", (void *)dma, (void *)key); | 939 | warnx("Pending dma %p, key %p", (void *)dma, (void *)key); |
607 | } | 940 | } |
608 | 941 | ||
942 | /* This is called when the waker wakes us up: check for incoming file | ||
943 | * descriptors. */ | ||
609 | static void handle_input(int fd, struct device_list *devices) | 944 | static void handle_input(int fd, struct device_list *devices) |
610 | { | 945 | { |
946 | /* select() wants a zeroed timeval to mean "don't wait". */ | ||
611 | struct timeval poll = { .tv_sec = 0, .tv_usec = 0 }; | 947 | struct timeval poll = { .tv_sec = 0, .tv_usec = 0 }; |
612 | 948 | ||
613 | for (;;) { | 949 | for (;;) { |
614 | struct device *i; | 950 | struct device *i; |
615 | fd_set fds = devices->infds; | 951 | fd_set fds = devices->infds; |
616 | 952 | ||
953 | /* If nothing is ready, we're done. */ | ||
617 | if (select(devices->max_infd+1, &fds, NULL, NULL, &poll) == 0) | 954 | if (select(devices->max_infd+1, &fds, NULL, NULL, &poll) == 0) |
618 | break; | 955 | break; |
619 | 956 | ||
957 | /* Otherwise, call the device(s) which have readable | ||
958 | * file descriptors and a method of handling them. */ | ||
620 | for (i = devices->dev; i; i = i->next) { | 959 | for (i = devices->dev; i; i = i->next) { |
621 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { | 960 | if (i->handle_input && FD_ISSET(i->fd, &fds)) { |
961 | /* If handle_input() returns false, it means we | ||
962 | * should no longer service it. | ||
963 | * handle_console_input() does this. */ | ||
622 | if (!i->handle_input(fd, i)) { | 964 | if (!i->handle_input(fd, i)) { |
965 | /* Clear it from the set of input file | ||
966 | * descriptors kept at the head of the | ||
967 | * device list. */ | ||
623 | FD_CLR(i->fd, &devices->infds); | 968 | FD_CLR(i->fd, &devices->infds); |
624 | /* Tell waker to ignore it too... */ | 969 | /* Tell waker to ignore it too... */ |
625 | write(waker_fd, &i->fd, sizeof(i->fd)); | 970 | write(waker_fd, &i->fd, sizeof(i->fd)); |
@@ -629,26 +974,42 @@ static void handle_input(int fd, struct device_list *devices) | |||
629 | } | 974 | } |
630 | } | 975 | } |
631 | 976 | ||
632 | static struct lguest_device_desc *new_dev_desc(u16 type, u16 features, | 977 | /*L:190 |
633 | u16 num_pages) | 978 | * Device Setup |
979 | * | ||
980 | * All devices need a descriptor so the Guest knows it exists, and a "struct | ||
981 | * device" so the Launcher can keep track of it. We have common helper | ||
982 | * routines to allocate them. | ||
983 | * | ||
984 | * This routine allocates a new "struct lguest_device_desc" from descriptor | ||
985 | * table in the devices array just above the Guest's normal memory. */ | ||
986 | static struct lguest_device_desc * | ||
987 | new_dev_desc(struct lguest_device_desc *descs, | ||
988 | u16 type, u16 features, u16 num_pages) | ||
634 | { | 989 | { |
635 | static unsigned long top = LGUEST_GUEST_TOP; | 990 | unsigned int i; |
636 | struct lguest_device_desc *desc; | ||
637 | 991 | ||
638 | desc = malloc(sizeof(*desc)); | 992 | for (i = 0; i < LGUEST_MAX_DEVICES; i++) { |
639 | desc->type = type; | 993 | if (!descs[i].type) { |
640 | desc->num_pages = num_pages; | 994 | descs[i].type = type; |
641 | desc->features = features; | 995 | descs[i].features = features; |
642 | desc->status = 0; | 996 | descs[i].num_pages = num_pages; |
643 | if (num_pages) { | 997 | /* If they said the device needs memory, we allocate |
644 | top -= num_pages*getpagesize(); | 998 | * that now, bumping up the top of Guest memory. */ |
645 | map_zeroed_pages(top, num_pages); | 999 | if (num_pages) { |
646 | desc->pfn = top / getpagesize(); | 1000 | map_zeroed_pages(top, num_pages); |
647 | } else | 1001 | descs[i].pfn = top/getpagesize(); |
648 | desc->pfn = 0; | 1002 | top += num_pages*getpagesize(); |
649 | return desc; | 1003 | } |
1004 | return &descs[i]; | ||
1005 | } | ||
1006 | } | ||
1007 | errx(1, "too many devices"); | ||
650 | } | 1008 | } |
651 | 1009 | ||
1010 | /* This monster routine does all the creation and setup of a new device, | ||
1011 | * including caling new_dev_desc() to allocate the descriptor and device | ||
1012 | * memory. */ | ||
652 | static struct device *new_device(struct device_list *devices, | 1013 | static struct device *new_device(struct device_list *devices, |
653 | u16 type, u16 num_pages, u16 features, | 1014 | u16 type, u16 num_pages, u16 features, |
654 | int fd, | 1015 | int fd, |
@@ -661,15 +1022,21 @@ static struct device *new_device(struct device_list *devices, | |||
661 | { | 1022 | { |
662 | struct device *dev = malloc(sizeof(*dev)); | 1023 | struct device *dev = malloc(sizeof(*dev)); |
663 | 1024 | ||
664 | /* Append to device list. */ | 1025 | /* Append to device list. Prepending to a single-linked list is |
1026 | * easier, but the user expects the devices to be arranged on the bus | ||
1027 | * in command-line order. The first network device on the command line | ||
1028 | * is eth0, the first block device /dev/lgba, etc. */ | ||
665 | *devices->lastdev = dev; | 1029 | *devices->lastdev = dev; |
666 | dev->next = NULL; | 1030 | dev->next = NULL; |
667 | devices->lastdev = &dev->next; | 1031 | devices->lastdev = &dev->next; |
668 | 1032 | ||
1033 | /* Now we populate the fields one at a time. */ | ||
669 | dev->fd = fd; | 1034 | dev->fd = fd; |
1035 | /* If we have an input handler for this file descriptor, then we add it | ||
1036 | * to the device_list's fdset and maxfd. */ | ||
670 | if (handle_input) | 1037 | if (handle_input) |
671 | set_fd(dev->fd, devices); | 1038 | set_fd(dev->fd, devices); |
672 | dev->desc = new_dev_desc(type, features, num_pages); | 1039 | dev->desc = new_dev_desc(devices->descs, type, features, num_pages); |
673 | dev->mem = (void *)(dev->desc->pfn * getpagesize()); | 1040 | dev->mem = (void *)(dev->desc->pfn * getpagesize()); |
674 | dev->handle_input = handle_input; | 1041 | dev->handle_input = handle_input; |
675 | dev->watch_key = (unsigned long)dev->mem + watch_off; | 1042 | dev->watch_key = (unsigned long)dev->mem + watch_off; |
@@ -677,27 +1044,37 @@ static struct device *new_device(struct device_list *devices, | |||
677 | return dev; | 1044 | return dev; |
678 | } | 1045 | } |
679 | 1046 | ||
1047 | /* Our first setup routine is the console. It's a fairly simple device, but | ||
1048 | * UNIX tty handling makes it uglier than it could be. */ | ||
680 | static void setup_console(struct device_list *devices) | 1049 | static void setup_console(struct device_list *devices) |
681 | { | 1050 | { |
682 | struct device *dev; | 1051 | struct device *dev; |
683 | 1052 | ||
1053 | /* If we can save the initial standard input settings... */ | ||
684 | if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { | 1054 | if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { |
685 | struct termios term = orig_term; | 1055 | struct termios term = orig_term; |
1056 | /* Then we turn off echo, line buffering and ^C etc. We want a | ||
1057 | * raw input stream to the Guest. */ | ||
686 | term.c_lflag &= ~(ISIG|ICANON|ECHO); | 1058 | term.c_lflag &= ~(ISIG|ICANON|ECHO); |
687 | tcsetattr(STDIN_FILENO, TCSANOW, &term); | 1059 | tcsetattr(STDIN_FILENO, TCSANOW, &term); |
1060 | /* If we exit gracefully, the original settings will be | ||
1061 | * restored so the user can see what they're typing. */ | ||
688 | atexit(restore_term); | 1062 | atexit(restore_term); |
689 | } | 1063 | } |
690 | 1064 | ||
691 | /* We don't currently require a page for the console. */ | 1065 | /* We don't currently require any memory for the console, so we ask for |
1066 | * 0 pages. */ | ||
692 | dev = new_device(devices, LGUEST_DEVICE_T_CONSOLE, 0, 0, | 1067 | dev = new_device(devices, LGUEST_DEVICE_T_CONSOLE, 0, 0, |
693 | STDIN_FILENO, handle_console_input, | 1068 | STDIN_FILENO, handle_console_input, |
694 | LGUEST_CONSOLE_DMA_KEY, handle_console_output); | 1069 | LGUEST_CONSOLE_DMA_KEY, handle_console_output); |
1070 | /* We store the console state in dev->priv, and initialize it. */ | ||
695 | dev->priv = malloc(sizeof(struct console_abort)); | 1071 | dev->priv = malloc(sizeof(struct console_abort)); |
696 | ((struct console_abort *)dev->priv)->count = 0; | 1072 | ((struct console_abort *)dev->priv)->count = 0; |
697 | verbose("device %p: console\n", | 1073 | verbose("device %p: console\n", |
698 | (void *)(dev->desc->pfn * getpagesize())); | 1074 | (void *)(dev->desc->pfn * getpagesize())); |
699 | } | 1075 | } |
700 | 1076 | ||
1077 | /* Setting up a block file is also fairly straightforward. */ | ||
701 | static void setup_block_file(const char *filename, struct device_list *devices) | 1078 | static void setup_block_file(const char *filename, struct device_list *devices) |
702 | { | 1079 | { |
703 | int fd; | 1080 | int fd; |
@@ -705,20 +1082,47 @@ static void setup_block_file(const char *filename, struct device_list *devices) | |||
705 | off64_t *device_len; | 1082 | off64_t *device_len; |
706 | struct lguest_block_page *p; | 1083 | struct lguest_block_page *p; |
707 | 1084 | ||
1085 | /* We open with O_LARGEFILE because otherwise we get stuck at 2G. We | ||
1086 | * open with O_DIRECT because otherwise our benchmarks go much too | ||
1087 | * fast. */ | ||
708 | fd = open_or_die(filename, O_RDWR|O_LARGEFILE|O_DIRECT); | 1088 | fd = open_or_die(filename, O_RDWR|O_LARGEFILE|O_DIRECT); |
1089 | |||
1090 | /* We want one page, and have no input handler (the block file never | ||
1091 | * has anything interesting to say to us). Our timing will be quite | ||
1092 | * random, so it should be a reasonable randomness source. */ | ||
709 | dev = new_device(devices, LGUEST_DEVICE_T_BLOCK, 1, | 1093 | dev = new_device(devices, LGUEST_DEVICE_T_BLOCK, 1, |
710 | LGUEST_DEVICE_F_RANDOMNESS, | 1094 | LGUEST_DEVICE_F_RANDOMNESS, |
711 | fd, NULL, 0, handle_block_output); | 1095 | fd, NULL, 0, handle_block_output); |
1096 | |||
1097 | /* We store the device size in the private area */ | ||
712 | device_len = dev->priv = malloc(sizeof(*device_len)); | 1098 | device_len = dev->priv = malloc(sizeof(*device_len)); |
1099 | /* This is the safe way of establishing the size of our device: it | ||
1100 | * might be a normal file or an actual block device like /dev/hdb. */ | ||
713 | *device_len = lseek64(fd, 0, SEEK_END); | 1101 | *device_len = lseek64(fd, 0, SEEK_END); |
714 | p = dev->mem; | ||
715 | 1102 | ||
1103 | /* The device memory is a "struct lguest_block_page". It's zeroed | ||
1104 | * already, we just need to put in the device size. Block devices | ||
1105 | * think in sectors (ie. 512 byte chunks), so we translate here. */ | ||
1106 | p = dev->mem; | ||
716 | p->num_sectors = *device_len/512; | 1107 | p->num_sectors = *device_len/512; |
717 | verbose("device %p: block %i sectors\n", | 1108 | verbose("device %p: block %i sectors\n", |
718 | (void *)(dev->desc->pfn * getpagesize()), p->num_sectors); | 1109 | (void *)(dev->desc->pfn * getpagesize()), p->num_sectors); |
719 | } | 1110 | } |
720 | 1111 | ||
721 | /* We use fnctl locks to reserve network slots (autocleanup!) */ | 1112 | /* |
1113 | * Network Devices. | ||
1114 | * | ||
1115 | * Setting up network devices is quite a pain, because we have three types. | ||
1116 | * First, we have the inter-Guest network. This is a file which is mapped into | ||
1117 | * the address space of the Guests who are on the network. Because it is a | ||
1118 | * shared mapping, the same page underlies all the devices, and they can send | ||
1119 | * DMA to each other. | ||
1120 | * | ||
1121 | * Remember from our network driver, the Guest is told what slot in the page it | ||
1122 | * is to use. We use exclusive fnctl locks to reserve a slot. If another | ||
1123 | * Guest is using a slot, the lock will fail and we try another. Because fnctl | ||
1124 | * locks are cleaned up automatically when we die, this cleverly means that our | ||
1125 | * reservation on the slot will vanish if we crash. */ | ||
722 | static unsigned int find_slot(int netfd, const char *filename) | 1126 | static unsigned int find_slot(int netfd, const char *filename) |
723 | { | 1127 | { |
724 | struct flock fl; | 1128 | struct flock fl; |
@@ -726,26 +1130,33 @@ static unsigned int find_slot(int netfd, const char *filename) | |||
726 | fl.l_type = F_WRLCK; | 1130 | fl.l_type = F_WRLCK; |
727 | fl.l_whence = SEEK_SET; | 1131 | fl.l_whence = SEEK_SET; |
728 | fl.l_len = 1; | 1132 | fl.l_len = 1; |
1133 | /* Try a 1 byte lock in each possible position number */ | ||
729 | for (fl.l_start = 0; | 1134 | for (fl.l_start = 0; |
730 | fl.l_start < getpagesize()/sizeof(struct lguest_net); | 1135 | fl.l_start < getpagesize()/sizeof(struct lguest_net); |
731 | fl.l_start++) { | 1136 | fl.l_start++) { |
1137 | /* If we succeed, return the slot number. */ | ||
732 | if (fcntl(netfd, F_SETLK, &fl) == 0) | 1138 | if (fcntl(netfd, F_SETLK, &fl) == 0) |
733 | return fl.l_start; | 1139 | return fl.l_start; |
734 | } | 1140 | } |
735 | errx(1, "No free slots in network file %s", filename); | 1141 | errx(1, "No free slots in network file %s", filename); |
736 | } | 1142 | } |
737 | 1143 | ||
1144 | /* This function sets up the network file */ | ||
738 | static void setup_net_file(const char *filename, | 1145 | static void setup_net_file(const char *filename, |
739 | struct device_list *devices) | 1146 | struct device_list *devices) |
740 | { | 1147 | { |
741 | int netfd; | 1148 | int netfd; |
742 | struct device *dev; | 1149 | struct device *dev; |
743 | 1150 | ||
1151 | /* We don't use open_or_die() here: for friendliness we create the file | ||
1152 | * if it doesn't already exist. */ | ||
744 | netfd = open(filename, O_RDWR, 0); | 1153 | netfd = open(filename, O_RDWR, 0); |
745 | if (netfd < 0) { | 1154 | if (netfd < 0) { |
746 | if (errno == ENOENT) { | 1155 | if (errno == ENOENT) { |
747 | netfd = open(filename, O_RDWR|O_CREAT, 0600); | 1156 | netfd = open(filename, O_RDWR|O_CREAT, 0600); |
748 | if (netfd >= 0) { | 1157 | if (netfd >= 0) { |
1158 | /* If we succeeded, initialize the file with a | ||
1159 | * blank page. */ | ||
749 | char page[getpagesize()]; | 1160 | char page[getpagesize()]; |
750 | memset(page, 0, sizeof(page)); | 1161 | memset(page, 0, sizeof(page)); |
751 | write(netfd, page, sizeof(page)); | 1162 | write(netfd, page, sizeof(page)); |
@@ -755,11 +1166,15 @@ static void setup_net_file(const char *filename, | |||
755 | err(1, "cannot open net file '%s'", filename); | 1166 | err(1, "cannot open net file '%s'", filename); |
756 | } | 1167 | } |
757 | 1168 | ||
1169 | /* We need 1 page, and the features indicate the slot to use and that | ||
1170 | * no checksum is needed. We never touch this device again; it's | ||
1171 | * between the Guests on the network, so we don't register input or | ||
1172 | * output handlers. */ | ||
758 | dev = new_device(devices, LGUEST_DEVICE_T_NET, 1, | 1173 | dev = new_device(devices, LGUEST_DEVICE_T_NET, 1, |
759 | find_slot(netfd, filename)|LGUEST_NET_F_NOCSUM, | 1174 | find_slot(netfd, filename)|LGUEST_NET_F_NOCSUM, |
760 | -1, NULL, 0, NULL); | 1175 | -1, NULL, 0, NULL); |
761 | 1176 | ||
762 | /* We overwrite the /dev/zero mapping with the actual file. */ | 1177 | /* Map the shared file. */ |
763 | if (mmap(dev->mem, getpagesize(), PROT_READ|PROT_WRITE, | 1178 | if (mmap(dev->mem, getpagesize(), PROT_READ|PROT_WRITE, |
764 | MAP_FIXED|MAP_SHARED, netfd, 0) != dev->mem) | 1179 | MAP_FIXED|MAP_SHARED, netfd, 0) != dev->mem) |
765 | err(1, "could not mmap '%s'", filename); | 1180 | err(1, "could not mmap '%s'", filename); |
@@ -767,6 +1182,7 @@ static void setup_net_file(const char *filename, | |||
767 | (void *)(dev->desc->pfn * getpagesize()), filename, | 1182 | (void *)(dev->desc->pfn * getpagesize()), filename, |
768 | dev->desc->features & ~LGUEST_NET_F_NOCSUM); | 1183 | dev->desc->features & ~LGUEST_NET_F_NOCSUM); |
769 | } | 1184 | } |
1185 | /*:*/ | ||
770 | 1186 | ||
771 | static u32 str2ip(const char *ipaddr) | 1187 | static u32 str2ip(const char *ipaddr) |
772 | { | 1188 | { |
@@ -776,7 +1192,11 @@ static u32 str2ip(const char *ipaddr) | |||
776 | return (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3]; | 1192 | return (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3]; |
777 | } | 1193 | } |
778 | 1194 | ||
779 | /* adapted from libbridge */ | 1195 | /* This code is "adapted" from libbridge: it attaches the Host end of the |
1196 | * network device to the bridge device specified by the command line. | ||
1197 | * | ||
1198 | * This is yet another James Morris contribution (I'm an IP-level guy, so I | ||
1199 | * dislike bridging), and I just try not to break it. */ | ||
780 | static void add_to_bridge(int fd, const char *if_name, const char *br_name) | 1200 | static void add_to_bridge(int fd, const char *if_name, const char *br_name) |
781 | { | 1201 | { |
782 | int ifidx; | 1202 | int ifidx; |
@@ -795,12 +1215,16 @@ static void add_to_bridge(int fd, const char *if_name, const char *br_name) | |||
795 | err(1, "can't add %s to bridge %s", if_name, br_name); | 1215 | err(1, "can't add %s to bridge %s", if_name, br_name); |
796 | } | 1216 | } |
797 | 1217 | ||
1218 | /* This sets up the Host end of the network device with an IP address, brings | ||
1219 | * it up so packets will flow, the copies the MAC address into the hwaddr | ||
1220 | * pointer (in practice, the Host's slot in the network device's memory). */ | ||
798 | static void configure_device(int fd, const char *devname, u32 ipaddr, | 1221 | static void configure_device(int fd, const char *devname, u32 ipaddr, |
799 | unsigned char hwaddr[6]) | 1222 | unsigned char hwaddr[6]) |
800 | { | 1223 | { |
801 | struct ifreq ifr; | 1224 | struct ifreq ifr; |
802 | struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; | 1225 | struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; |
803 | 1226 | ||
1227 | /* Don't read these incantations. Just cut & paste them like I did! */ | ||
804 | memset(&ifr, 0, sizeof(ifr)); | 1228 | memset(&ifr, 0, sizeof(ifr)); |
805 | strcpy(ifr.ifr_name, devname); | 1229 | strcpy(ifr.ifr_name, devname); |
806 | sin->sin_family = AF_INET; | 1230 | sin->sin_family = AF_INET; |
@@ -811,12 +1235,19 @@ static void configure_device(int fd, const char *devname, u32 ipaddr, | |||
811 | if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) | 1235 | if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) |
812 | err(1, "Bringing interface %s up", devname); | 1236 | err(1, "Bringing interface %s up", devname); |
813 | 1237 | ||
1238 | /* SIOC stands for Socket I/O Control. G means Get (vs S for Set | ||
1239 | * above). IF means Interface, and HWADDR is hardware address. | ||
1240 | * Simple! */ | ||
814 | if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) | 1241 | if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) |
815 | err(1, "getting hw address for %s", devname); | 1242 | err(1, "getting hw address for %s", devname); |
816 | |||
817 | memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6); | 1243 | memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6); |
818 | } | 1244 | } |
819 | 1245 | ||
1246 | /*L:195 The other kind of network is a Host<->Guest network. This can either | ||
1247 | * use briding or routing, but the principle is the same: it uses the "tun" | ||
1248 | * device to inject packets into the Host as if they came in from a normal | ||
1249 | * network card. We just shunt packets between the Guest and the tun | ||
1250 | * device. */ | ||
820 | static void setup_tun_net(const char *arg, struct device_list *devices) | 1251 | static void setup_tun_net(const char *arg, struct device_list *devices) |
821 | { | 1252 | { |
822 | struct device *dev; | 1253 | struct device *dev; |
@@ -825,36 +1256,56 @@ static void setup_tun_net(const char *arg, struct device_list *devices) | |||
825 | u32 ip; | 1256 | u32 ip; |
826 | const char *br_name = NULL; | 1257 | const char *br_name = NULL; |
827 | 1258 | ||
1259 | /* We open the /dev/net/tun device and tell it we want a tap device. A | ||
1260 | * tap device is like a tun device, only somehow different. To tell | ||
1261 | * the truth, I completely blundered my way through this code, but it | ||
1262 | * works now! */ | ||
828 | netfd = open_or_die("/dev/net/tun", O_RDWR); | 1263 | netfd = open_or_die("/dev/net/tun", O_RDWR); |
829 | memset(&ifr, 0, sizeof(ifr)); | 1264 | memset(&ifr, 0, sizeof(ifr)); |
830 | ifr.ifr_flags = IFF_TAP | IFF_NO_PI; | 1265 | ifr.ifr_flags = IFF_TAP | IFF_NO_PI; |
831 | strcpy(ifr.ifr_name, "tap%d"); | 1266 | strcpy(ifr.ifr_name, "tap%d"); |
832 | if (ioctl(netfd, TUNSETIFF, &ifr) != 0) | 1267 | if (ioctl(netfd, TUNSETIFF, &ifr) != 0) |
833 | err(1, "configuring /dev/net/tun"); | 1268 | err(1, "configuring /dev/net/tun"); |
1269 | /* We don't need checksums calculated for packets coming in this | ||
1270 | * device: trust us! */ | ||
834 | ioctl(netfd, TUNSETNOCSUM, 1); | 1271 | ioctl(netfd, TUNSETNOCSUM, 1); |
835 | 1272 | ||
836 | /* You will be peer 1: we should create enough jitter to randomize */ | 1273 | /* We create the net device with 1 page, using the features field of |
1274 | * the descriptor to tell the Guest it is in slot 1 (NET_PEERNUM), and | ||
1275 | * that the device has fairly random timing. We do *not* specify | ||
1276 | * LGUEST_NET_F_NOCSUM: these packets can reach the real world. | ||
1277 | * | ||
1278 | * We will put our MAC address is slot 0 for the Guest to see, so | ||
1279 | * it will send packets to us using the key "peer_offset(0)": */ | ||
837 | dev = new_device(devices, LGUEST_DEVICE_T_NET, 1, | 1280 | dev = new_device(devices, LGUEST_DEVICE_T_NET, 1, |
838 | NET_PEERNUM|LGUEST_DEVICE_F_RANDOMNESS, netfd, | 1281 | NET_PEERNUM|LGUEST_DEVICE_F_RANDOMNESS, netfd, |
839 | handle_tun_input, peer_offset(0), handle_tun_output); | 1282 | handle_tun_input, peer_offset(0), handle_tun_output); |
1283 | |||
1284 | /* We keep a flag which says whether we've seen packets come out from | ||
1285 | * this network device. */ | ||
840 | dev->priv = malloc(sizeof(bool)); | 1286 | dev->priv = malloc(sizeof(bool)); |
841 | *(bool *)dev->priv = false; | 1287 | *(bool *)dev->priv = false; |
842 | 1288 | ||
1289 | /* We need a socket to perform the magic network ioctls to bring up the | ||
1290 | * tap interface, connect to the bridge etc. Any socket will do! */ | ||
843 | ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); | 1291 | ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); |
844 | if (ipfd < 0) | 1292 | if (ipfd < 0) |
845 | err(1, "opening IP socket"); | 1293 | err(1, "opening IP socket"); |
846 | 1294 | ||
1295 | /* If the command line was --tunnet=bridge:<name> do bridging. */ | ||
847 | if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { | 1296 | if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { |
848 | ip = INADDR_ANY; | 1297 | ip = INADDR_ANY; |
849 | br_name = arg + strlen(BRIDGE_PFX); | 1298 | br_name = arg + strlen(BRIDGE_PFX); |
850 | add_to_bridge(ipfd, ifr.ifr_name, br_name); | 1299 | add_to_bridge(ipfd, ifr.ifr_name, br_name); |
851 | } else | 1300 | } else /* It is an IP address to set up the device with */ |
852 | ip = str2ip(arg); | 1301 | ip = str2ip(arg); |
853 | 1302 | ||
854 | /* We are peer 0, ie. first slot. */ | 1303 | /* We are peer 0, ie. first slot, so we hand dev->mem to this routine |
1304 | * to write the MAC address at the start of the device memory. */ | ||
855 | configure_device(ipfd, ifr.ifr_name, ip, dev->mem); | 1305 | configure_device(ipfd, ifr.ifr_name, ip, dev->mem); |
856 | 1306 | ||
857 | /* Set "promisc" bit: we want every single packet. */ | 1307 | /* Set "promisc" bit: we want every single packet if we're going to |
1308 | * bridge to other machines (and otherwise it doesn't matter). */ | ||
858 | *((u8 *)dev->mem) |= 0x1; | 1309 | *((u8 *)dev->mem) |= 0x1; |
859 | 1310 | ||
860 | close(ipfd); | 1311 | close(ipfd); |
@@ -865,31 +1316,10 @@ static void setup_tun_net(const char *arg, struct device_list *devices) | |||
865 | if (br_name) | 1316 | if (br_name) |
866 | verbose("attached to bridge: %s\n", br_name); | 1317 | verbose("attached to bridge: %s\n", br_name); |
867 | } | 1318 | } |
1319 | /* That's the end of device setup. */ | ||
868 | 1320 | ||
869 | /* Now we know how much memory we have, we copy in device descriptors */ | 1321 | /*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves |
870 | static void map_device_descriptors(struct device_list *devs, unsigned long mem) | 1322 | * its input and output, and finally, lays it to rest. */ |
871 | { | ||
872 | struct device *i; | ||
873 | unsigned int num; | ||
874 | struct lguest_device_desc *descs; | ||
875 | |||
876 | /* Device descriptor array sits just above top of normal memory */ | ||
877 | descs = map_zeroed_pages(mem, 1); | ||
878 | |||
879 | for (i = devs->dev, num = 0; i; i = i->next, num++) { | ||
880 | if (num == LGUEST_MAX_DEVICES) | ||
881 | errx(1, "too many devices"); | ||
882 | verbose("Device %i: %s\n", num, | ||
883 | i->desc->type == LGUEST_DEVICE_T_NET ? "net" | ||
884 | : i->desc->type == LGUEST_DEVICE_T_CONSOLE ? "console" | ||
885 | : i->desc->type == LGUEST_DEVICE_T_BLOCK ? "block" | ||
886 | : "unknown"); | ||
887 | descs[num] = *i->desc; | ||
888 | free(i->desc); | ||
889 | i->desc = &descs[num]; | ||
890 | } | ||
891 | } | ||
892 | |||
893 | static void __attribute__((noreturn)) | 1323 | static void __attribute__((noreturn)) |
894 | run_guest(int lguest_fd, struct device_list *device_list) | 1324 | run_guest(int lguest_fd, struct device_list *device_list) |
895 | { | 1325 | { |
@@ -901,20 +1331,37 @@ run_guest(int lguest_fd, struct device_list *device_list) | |||
901 | /* We read from the /dev/lguest device to run the Guest. */ | 1331 | /* We read from the /dev/lguest device to run the Guest. */ |
902 | readval = read(lguest_fd, arr, sizeof(arr)); | 1332 | readval = read(lguest_fd, arr, sizeof(arr)); |
903 | 1333 | ||
1334 | /* The read can only really return sizeof(arr) (the Guest did a | ||
1335 | * SEND_DMA to us), or an error. */ | ||
1336 | |||
1337 | /* For a successful read, arr[0] is the address of the "struct | ||
1338 | * lguest_dma", and arr[1] is the key the Guest sent to. */ | ||
904 | if (readval == sizeof(arr)) { | 1339 | if (readval == sizeof(arr)) { |
905 | handle_output(lguest_fd, arr[0], arr[1], device_list); | 1340 | handle_output(lguest_fd, arr[0], arr[1], device_list); |
906 | continue; | 1341 | continue; |
1342 | /* ENOENT means the Guest died. Reading tells us why. */ | ||
907 | } else if (errno == ENOENT) { | 1343 | } else if (errno == ENOENT) { |
908 | char reason[1024] = { 0 }; | 1344 | char reason[1024] = { 0 }; |
909 | read(lguest_fd, reason, sizeof(reason)-1); | 1345 | read(lguest_fd, reason, sizeof(reason)-1); |
910 | errx(1, "%s", reason); | 1346 | errx(1, "%s", reason); |
1347 | /* EAGAIN means the waker wanted us to look at some input. | ||
1348 | * Anything else means a bug or incompatible change. */ | ||
911 | } else if (errno != EAGAIN) | 1349 | } else if (errno != EAGAIN) |
912 | err(1, "Running guest failed"); | 1350 | err(1, "Running guest failed"); |
1351 | |||
1352 | /* Service input, then unset the BREAK which releases | ||
1353 | * the Waker. */ | ||
913 | handle_input(lguest_fd, device_list); | 1354 | handle_input(lguest_fd, device_list); |
914 | if (write(lguest_fd, args, sizeof(args)) < 0) | 1355 | if (write(lguest_fd, args, sizeof(args)) < 0) |
915 | err(1, "Resetting break"); | 1356 | err(1, "Resetting break"); |
916 | } | 1357 | } |
917 | } | 1358 | } |
1359 | /* | ||
1360 | * This is the end of the Launcher. | ||
1361 | * | ||
1362 | * But wait! We've seen I/O from the Launcher, and we've seen I/O from the | ||
1363 | * Drivers. If we were to see the Host kernel I/O code, our understanding | ||
1364 | * would be complete... :*/ | ||
918 | 1365 | ||
919 | static struct option opts[] = { | 1366 | static struct option opts[] = { |
920 | { "verbose", 0, NULL, 'v' }, | 1367 | { "verbose", 0, NULL, 'v' }, |
@@ -932,19 +1379,59 @@ static void usage(void) | |||
932 | "<mem-in-mb> vmlinux [args...]"); | 1379 | "<mem-in-mb> vmlinux [args...]"); |
933 | } | 1380 | } |
934 | 1381 | ||
1382 | /*L:100 The Launcher code itself takes us out into userspace, that scary place | ||
1383 | * where pointers run wild and free! Unfortunately, like most userspace | ||
1384 | * programs, it's quite boring (which is why everyone like to hack on the | ||
1385 | * kernel!). Perhaps if you make up an Lguest Drinking Game at this point, it | ||
1386 | * will get you through this section. Or, maybe not. | ||
1387 | * | ||
1388 | * The Launcher binary sits up high, usually starting at address 0xB8000000. | ||
1389 | * Everything below this is the "physical" memory for the Guest. For example, | ||
1390 | * if the Guest were to write a "1" at physical address 0, we would see a "1" | ||
1391 | * in the Launcher at "(int *)0". Guest physical == Launcher virtual. | ||
1392 | * | ||
1393 | * This can be tough to get your head around, but usually it just means that we | ||
1394 | * don't need to do any conversion when the Guest gives us it's "physical" | ||
1395 | * addresses. | ||
1396 | */ | ||
935 | int main(int argc, char *argv[]) | 1397 | int main(int argc, char *argv[]) |
936 | { | 1398 | { |
937 | unsigned long mem, pgdir, start, page_offset, initrd_size = 0; | 1399 | /* Memory, top-level pagetable, code startpoint, PAGE_OFFSET and size |
938 | int c, lguest_fd; | 1400 | * of the (optional) initrd. */ |
1401 | unsigned long mem = 0, pgdir, start, page_offset, initrd_size = 0; | ||
1402 | /* A temporary and the /dev/lguest file descriptor. */ | ||
1403 | int i, c, lguest_fd; | ||
1404 | /* The list of Guest devices, based on command line arguments. */ | ||
939 | struct device_list device_list; | 1405 | struct device_list device_list; |
1406 | /* The boot information for the Guest: at guest-physical address 0. */ | ||
940 | void *boot = (void *)0; | 1407 | void *boot = (void *)0; |
1408 | /* If they specify an initrd file to load. */ | ||
941 | const char *initrd_name = NULL; | 1409 | const char *initrd_name = NULL; |
942 | 1410 | ||
1411 | /* First we initialize the device list. Since console and network | ||
1412 | * device receive input from a file descriptor, we keep an fdset | ||
1413 | * (infds) and the maximum fd number (max_infd) with the head of the | ||
1414 | * list. We also keep a pointer to the last device, for easy appending | ||
1415 | * to the list. */ | ||
943 | device_list.max_infd = -1; | 1416 | device_list.max_infd = -1; |
944 | device_list.dev = NULL; | 1417 | device_list.dev = NULL; |
945 | device_list.lastdev = &device_list.dev; | 1418 | device_list.lastdev = &device_list.dev; |
946 | FD_ZERO(&device_list.infds); | 1419 | FD_ZERO(&device_list.infds); |
947 | 1420 | ||
1421 | /* We need to know how much memory so we can set up the device | ||
1422 | * descriptor and memory pages for the devices as we parse the command | ||
1423 | * line. So we quickly look through the arguments to find the amount | ||
1424 | * of memory now. */ | ||
1425 | for (i = 1; i < argc; i++) { | ||
1426 | if (argv[i][0] != '-') { | ||
1427 | mem = top = atoi(argv[i]) * 1024 * 1024; | ||
1428 | device_list.descs = map_zeroed_pages(top, 1); | ||
1429 | top += getpagesize(); | ||
1430 | break; | ||
1431 | } | ||
1432 | } | ||
1433 | |||
1434 | /* The options are fairly straight-forward */ | ||
948 | while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) { | 1435 | while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) { |
949 | switch (c) { | 1436 | switch (c) { |
950 | case 'v': | 1437 | case 'v': |
@@ -967,46 +1454,71 @@ int main(int argc, char *argv[]) | |||
967 | usage(); | 1454 | usage(); |
968 | } | 1455 | } |
969 | } | 1456 | } |
1457 | /* After the other arguments we expect memory and kernel image name, | ||
1458 | * followed by command line arguments for the kernel. */ | ||
970 | if (optind + 2 > argc) | 1459 | if (optind + 2 > argc) |
971 | usage(); | 1460 | usage(); |
972 | 1461 | ||
973 | /* We need a console device */ | 1462 | /* We always have a console device */ |
974 | setup_console(&device_list); | 1463 | setup_console(&device_list); |
975 | 1464 | ||
976 | /* First we map /dev/zero over all of guest-physical memory. */ | 1465 | /* We start by mapping anonymous pages over all of guest-physical |
977 | mem = atoi(argv[optind]) * 1024 * 1024; | 1466 | * memory range. This fills it with 0, and ensures that the Guest |
1467 | * won't be killed when it tries to access it. */ | ||
978 | map_zeroed_pages(0, mem / getpagesize()); | 1468 | map_zeroed_pages(0, mem / getpagesize()); |
979 | 1469 | ||
980 | /* Now we load the kernel */ | 1470 | /* Now we load the kernel */ |
981 | start = load_kernel(open_or_die(argv[optind+1], O_RDONLY), | 1471 | start = load_kernel(open_or_die(argv[optind+1], O_RDONLY), |
982 | &page_offset); | 1472 | &page_offset); |
983 | 1473 | ||
984 | /* Write the device descriptors into memory. */ | 1474 | /* Map the initrd image if requested (at top of physical memory) */ |
985 | map_device_descriptors(&device_list, mem); | ||
986 | |||
987 | /* Map the initrd image if requested */ | ||
988 | if (initrd_name) { | 1475 | if (initrd_name) { |
989 | initrd_size = load_initrd(initrd_name, mem); | 1476 | initrd_size = load_initrd(initrd_name, mem); |
1477 | /* These are the location in the Linux boot header where the | ||
1478 | * start and size of the initrd are expected to be found. */ | ||
990 | *(unsigned long *)(boot+0x218) = mem - initrd_size; | 1479 | *(unsigned long *)(boot+0x218) = mem - initrd_size; |
991 | *(unsigned long *)(boot+0x21c) = initrd_size; | 1480 | *(unsigned long *)(boot+0x21c) = initrd_size; |
1481 | /* The bootloader type 0xFF means "unknown"; that's OK. */ | ||
992 | *(unsigned char *)(boot+0x210) = 0xFF; | 1482 | *(unsigned char *)(boot+0x210) = 0xFF; |
993 | } | 1483 | } |
994 | 1484 | ||
995 | /* Set up the initial linar pagetables. */ | 1485 | /* Set up the initial linear pagetables, starting below the initrd. */ |
996 | pgdir = setup_pagetables(mem, initrd_size, page_offset); | 1486 | pgdir = setup_pagetables(mem, initrd_size, page_offset); |
997 | 1487 | ||
998 | /* E820 memory map: ours is a simple, single region. */ | 1488 | /* The Linux boot header contains an "E820" memory map: ours is a |
1489 | * simple, single region. */ | ||
999 | *(char*)(boot+E820NR) = 1; | 1490 | *(char*)(boot+E820NR) = 1; |
1000 | *((struct e820entry *)(boot+E820MAP)) | 1491 | *((struct e820entry *)(boot+E820MAP)) |
1001 | = ((struct e820entry) { 0, mem, E820_RAM }); | 1492 | = ((struct e820entry) { 0, mem, E820_RAM }); |
1002 | /* Command line pointer and command line (at 4096) */ | 1493 | /* The boot header contains a command line pointer: we put the command |
1494 | * line after the boot header (at address 4096) */ | ||
1003 | *(void **)(boot + 0x228) = boot + 4096; | 1495 | *(void **)(boot + 0x228) = boot + 4096; |
1004 | concat(boot + 4096, argv+optind+2); | 1496 | concat(boot + 4096, argv+optind+2); |
1005 | /* Paravirt type: 1 == lguest */ | 1497 | |
1498 | /* The guest type value of "1" tells the Guest it's under lguest. */ | ||
1006 | *(int *)(boot + 0x23c) = 1; | 1499 | *(int *)(boot + 0x23c) = 1; |
1007 | 1500 | ||
1501 | /* We tell the kernel to initialize the Guest: this returns the open | ||
1502 | * /dev/lguest file descriptor. */ | ||
1008 | lguest_fd = tell_kernel(pgdir, start, page_offset); | 1503 | lguest_fd = tell_kernel(pgdir, start, page_offset); |
1504 | |||
1505 | /* We fork off a child process, which wakes the Launcher whenever one | ||
1506 | * of the input file descriptors needs attention. Otherwise we would | ||
1507 | * run the Guest until it tries to output something. */ | ||
1009 | waker_fd = setup_waker(lguest_fd, &device_list); | 1508 | waker_fd = setup_waker(lguest_fd, &device_list); |
1010 | 1509 | ||
1510 | /* Finally, run the Guest. This doesn't return. */ | ||
1011 | run_guest(lguest_fd, &device_list); | 1511 | run_guest(lguest_fd, &device_list); |
1012 | } | 1512 | } |
1513 | /*:*/ | ||
1514 | |||
1515 | /*M:999 | ||
1516 | * Mastery is done: you now know everything I do. | ||
1517 | * | ||
1518 | * But surely you have seen code, features and bugs in your wanderings which | ||
1519 | * you now yearn to attack? That is the real game, and I look forward to you | ||
1520 | * patching and forking lguest into the Your-Name-Here-visor. | ||
1521 | * | ||
1522 | * Farewell, and good coding! | ||
1523 | * Rusty Russell. | ||
1524 | */ | ||
diff --git a/Documentation/sched-stats.txt b/Documentation/sched-stats.txt index 6f72021aae51..442e14d35dea 100644 --- a/Documentation/sched-stats.txt +++ b/Documentation/sched-stats.txt | |||
@@ -1,10 +1,11 @@ | |||
1 | Version 10 of schedstats includes support for sched_domains, which | 1 | Version 14 of schedstats includes support for sched_domains, which hit the |
2 | hit the mainline kernel in 2.6.7. Some counters make more sense to be | 2 | mainline kernel in 2.6.20 although it is identical to the stats from version |
3 | per-runqueue; other to be per-domain. Note that domains (and their associated | 3 | 12 which was in the kernel from 2.6.13-2.6.19 (version 13 never saw a kernel |
4 | information) will only be pertinent and available on machines utilizing | 4 | release). Some counters make more sense to be per-runqueue; other to be |
5 | CONFIG_SMP. | 5 | per-domain. Note that domains (and their associated information) will only |
6 | 6 | be pertinent and available on machines utilizing CONFIG_SMP. | |
7 | In version 10 of schedstat, there is at least one level of domain | 7 | |
8 | In version 14 of schedstat, there is at least one level of domain | ||
8 | statistics for each cpu listed, and there may well be more than one | 9 | statistics for each cpu listed, and there may well be more than one |
9 | domain. Domains have no particular names in this implementation, but | 10 | domain. Domains have no particular names in this implementation, but |
10 | the highest numbered one typically arbitrates balancing across all the | 11 | the highest numbered one typically arbitrates balancing across all the |
@@ -27,7 +28,7 @@ to write their own scripts, the fields are described here. | |||
27 | 28 | ||
28 | CPU statistics | 29 | CPU statistics |
29 | -------------- | 30 | -------------- |
30 | cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | 31 | cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12 |
31 | 32 | ||
32 | NOTE: In the sched_yield() statistics, the active queue is considered empty | 33 | NOTE: In the sched_yield() statistics, the active queue is considered empty |
33 | if it has only one process in it, since obviously the process calling | 34 | if it has only one process in it, since obviously the process calling |
@@ -39,48 +40,20 @@ First four fields are sched_yield() statistics: | |||
39 | 3) # of times just the expired queue was empty | 40 | 3) # of times just the expired queue was empty |
40 | 4) # of times sched_yield() was called | 41 | 4) # of times sched_yield() was called |
41 | 42 | ||
42 | Next four are schedule() statistics: | 43 | Next three are schedule() statistics: |
43 | 5) # of times the active queue had at least one other process on it | 44 | 5) # of times we switched to the expired queue and reused it |
44 | 6) # of times we switched to the expired queue and reused it | 45 | 6) # of times schedule() was called |
45 | 7) # of times schedule() was called | 46 | 7) # of times schedule() left the processor idle |
46 | 8) # of times schedule() left the processor idle | ||
47 | |||
48 | Next four are active_load_balance() statistics: | ||
49 | 9) # of times active_load_balance() was called | ||
50 | 10) # of times active_load_balance() caused this cpu to gain a task | ||
51 | 11) # of times active_load_balance() caused this cpu to lose a task | ||
52 | 12) # of times active_load_balance() tried to move a task and failed | ||
53 | |||
54 | Next three are try_to_wake_up() statistics: | ||
55 | 13) # of times try_to_wake_up() was called | ||
56 | 14) # of times try_to_wake_up() successfully moved the awakening task | ||
57 | 15) # of times try_to_wake_up() attempted to move the awakening task | ||
58 | |||
59 | Next two are wake_up_new_task() statistics: | ||
60 | 16) # of times wake_up_new_task() was called | ||
61 | 17) # of times wake_up_new_task() successfully moved the new task | ||
62 | |||
63 | Next one is a sched_migrate_task() statistic: | ||
64 | 18) # of times sched_migrate_task() was called | ||
65 | 47 | ||
66 | Next one is a sched_balance_exec() statistic: | 48 | Next two are try_to_wake_up() statistics: |
67 | 19) # of times sched_balance_exec() was called | 49 | 8) # of times try_to_wake_up() was called |
50 | 9) # of times try_to_wake_up() was called to wake up the local cpu | ||
68 | 51 | ||
69 | Next three are statistics describing scheduling latency: | 52 | Next three are statistics describing scheduling latency: |
70 | 20) sum of all time spent running by tasks on this processor (in ms) | 53 | 10) sum of all time spent running by tasks on this processor (in jiffies) |
71 | 21) sum of all time spent waiting to run by tasks on this processor (in ms) | 54 | 11) sum of all time spent waiting to run by tasks on this processor (in |
72 | 22) # of tasks (not necessarily unique) given to the processor | 55 | jiffies) |
73 | 56 | 12) # of timeslices run on this cpu | |
74 | The last six are statistics dealing with pull_task(): | ||
75 | 23) # of times pull_task() moved a task to this cpu when newly idle | ||
76 | 24) # of times pull_task() stole a task from this cpu when another cpu | ||
77 | was newly idle | ||
78 | 25) # of times pull_task() moved a task to this cpu when idle | ||
79 | 26) # of times pull_task() stole a task from this cpu when another cpu | ||
80 | was idle | ||
81 | 27) # of times pull_task() moved a task to this cpu when busy | ||
82 | 28) # of times pull_task() stole a task from this cpu when another cpu | ||
83 | was busy | ||
84 | 57 | ||
85 | 58 | ||
86 | Domain statistics | 59 | Domain statistics |
@@ -89,65 +62,95 @@ One of these is produced per domain for each cpu described. (Note that if | |||
89 | CONFIG_SMP is not defined, *no* domains are utilized and these lines | 62 | CONFIG_SMP is not defined, *no* domains are utilized and these lines |
90 | will not appear in the output.) | 63 | will not appear in the output.) |
91 | 64 | ||
92 | domain<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | 65 | domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
93 | 66 | ||
94 | The first field is a bit mask indicating what cpus this domain operates over. | 67 | The first field is a bit mask indicating what cpus this domain operates over. |
95 | 68 | ||
96 | The next fifteen are a variety of load_balance() statistics: | 69 | The next 24 are a variety of load_balance() statistics in grouped into types |
97 | 70 | of idleness (idle, busy, and newly idle): | |
98 | 1) # of times in this domain load_balance() was called when the cpu | 71 | |
99 | was idle | 72 | 1) # of times in this domain load_balance() was called when the |
100 | 2) # of times in this domain load_balance() was called when the cpu | 73 | cpu was idle |
101 | was busy | 74 | 2) # of times in this domain load_balance() checked but found |
102 | 3) # of times in this domain load_balance() was called when the cpu | 75 | the load did not require balancing when the cpu was idle |
103 | was just becoming idle | 76 | 3) # of times in this domain load_balance() tried to move one or |
104 | 4) # of times in this domain load_balance() tried to move one or more | 77 | more tasks and failed, when the cpu was idle |
105 | tasks and failed, when the cpu was idle | 78 | 4) sum of imbalances discovered (if any) with each call to |
106 | 5) # of times in this domain load_balance() tried to move one or more | 79 | load_balance() in this domain when the cpu was idle |
107 | tasks and failed, when the cpu was busy | 80 | 5) # of times in this domain pull_task() was called when the cpu |
108 | 6) # of times in this domain load_balance() tried to move one or more | 81 | was idle |
109 | tasks and failed, when the cpu was just becoming idle | 82 | 6) # of times in this domain pull_task() was called even though |
110 | 7) sum of imbalances discovered (if any) with each call to | 83 | the target task was cache-hot when idle |
111 | load_balance() in this domain when the cpu was idle | 84 | 7) # of times in this domain load_balance() was called but did |
112 | 8) sum of imbalances discovered (if any) with each call to | 85 | not find a busier queue while the cpu was idle |
113 | load_balance() in this domain when the cpu was busy | 86 | 8) # of times in this domain a busier queue was found while the |
114 | 9) sum of imbalances discovered (if any) with each call to | 87 | cpu was idle but no busier group was found |
115 | load_balance() in this domain when the cpu was just becoming idle | 88 | |
116 | 10) # of times in this domain load_balance() was called but did not find | 89 | 9) # of times in this domain load_balance() was called when the |
117 | a busier queue while the cpu was idle | 90 | cpu was busy |
118 | 11) # of times in this domain load_balance() was called but did not find | 91 | 10) # of times in this domain load_balance() checked but found the |
119 | a busier queue while the cpu was busy | 92 | load did not require balancing when busy |
120 | 12) # of times in this domain load_balance() was called but did not find | 93 | 11) # of times in this domain load_balance() tried to move one or |
121 | a busier queue while the cpu was just becoming idle | 94 | more tasks and failed, when the cpu was busy |
122 | 13) # of times in this domain a busier queue was found while the cpu was | 95 | 12) sum of imbalances discovered (if any) with each call to |
123 | idle but no busier group was found | 96 | load_balance() in this domain when the cpu was busy |
124 | 14) # of times in this domain a busier queue was found while the cpu was | 97 | 13) # of times in this domain pull_task() was called when busy |
125 | busy but no busier group was found | 98 | 14) # of times in this domain pull_task() was called even though the |
126 | 15) # of times in this domain a busier queue was found while the cpu was | 99 | target task was cache-hot when busy |
127 | just becoming idle but no busier group was found | 100 | 15) # of times in this domain load_balance() was called but did not |
128 | 101 | find a busier queue while the cpu was busy | |
129 | Next two are sched_balance_exec() statistics: | 102 | 16) # of times in this domain a busier queue was found while the cpu |
130 | 17) # of times in this domain sched_balance_exec() successfully pushed | 103 | was busy but no busier group was found |
131 | a task to a new cpu | 104 | |
132 | 18) # of times in this domain sched_balance_exec() tried but failed to | 105 | 17) # of times in this domain load_balance() was called when the |
133 | push a task to a new cpu | 106 | cpu was just becoming idle |
134 | 107 | 18) # of times in this domain load_balance() checked but found the | |
135 | Next two are try_to_wake_up() statistics: | 108 | load did not require balancing when the cpu was just becoming idle |
136 | 19) # of times in this domain try_to_wake_up() tried to move a task based | 109 | 19) # of times in this domain load_balance() tried to move one or more |
137 | on affinity and cache warmth | 110 | tasks and failed, when the cpu was just becoming idle |
138 | 20) # of times in this domain try_to_wake_up() tried to move a task based | 111 | 20) sum of imbalances discovered (if any) with each call to |
139 | on load balancing | 112 | load_balance() in this domain when the cpu was just becoming idle |
140 | 113 | 21) # of times in this domain pull_task() was called when newly idle | |
114 | 22) # of times in this domain pull_task() was called even though the | ||
115 | target task was cache-hot when just becoming idle | ||
116 | 23) # of times in this domain load_balance() was called but did not | ||
117 | find a busier queue while the cpu was just becoming idle | ||
118 | 24) # of times in this domain a busier queue was found while the cpu | ||
119 | was just becoming idle but no busier group was found | ||
120 | |||
121 | Next three are active_load_balance() statistics: | ||
122 | 25) # of times active_load_balance() was called | ||
123 | 26) # of times active_load_balance() tried to move a task and failed | ||
124 | 27) # of times active_load_balance() successfully moved a task | ||
125 | |||
126 | Next three are sched_balance_exec() statistics: | ||
127 | 28) sbe_cnt is not used | ||
128 | 29) sbe_balanced is not used | ||
129 | 30) sbe_pushed is not used | ||
130 | |||
131 | Next three are sched_balance_fork() statistics: | ||
132 | 31) sbf_cnt is not used | ||
133 | 32) sbf_balanced is not used | ||
134 | 33) sbf_pushed is not used | ||
135 | |||
136 | Next three are try_to_wake_up() statistics: | ||
137 | 34) # of times in this domain try_to_wake_up() awoke a task that | ||
138 | last ran on a different cpu in this domain | ||
139 | 35) # of times in this domain try_to_wake_up() moved a task to the | ||
140 | waking cpu because it was cache-cold on its own cpu anyway | ||
141 | 36) # of times in this domain try_to_wake_up() started passive balancing | ||
141 | 142 | ||
142 | /proc/<pid>/schedstat | 143 | /proc/<pid>/schedstat |
143 | ---------------- | 144 | ---------------- |
144 | schedstats also adds a new /proc/<pid/schedstat file to include some of | 145 | schedstats also adds a new /proc/<pid/schedstat file to include some of |
145 | the same information on a per-process level. There are three fields in | 146 | the same information on a per-process level. There are three fields in |
146 | this file correlating to fields 20, 21, and 22 in the CPU fields, but | 147 | this file correlating for that process to: |
147 | they only apply for that process. | 148 | 1) time spent on the cpu |
149 | 2) time spent waiting on a runqueue | ||
150 | 3) # of timeslices run on this cpu | ||
148 | 151 | ||
149 | A program could be easily written to make use of these extra fields to | 152 | A program could be easily written to make use of these extra fields to |
150 | report on how well a particular process or set of processes is faring | 153 | report on how well a particular process or set of processes is faring |
151 | under the scheduler's policies. A simple version of such a program is | 154 | under the scheduler's policies. A simple version of such a program is |
152 | available at | 155 | available at |
153 | http://eaglet.rain.com/rick/linux/schedstat/v10/latency.c | 156 | http://eaglet.rain.com/rick/linux/schedstat/v12/latency.c |
diff --git a/Documentation/spi/spidev_test.c b/Documentation/spi/spidev_test.c new file mode 100644 index 000000000000..218e86215297 --- /dev/null +++ b/Documentation/spi/spidev_test.c | |||
@@ -0,0 +1,202 @@ | |||
1 | /* | ||
2 | * SPI testing utility (using spidev driver) | ||
3 | * | ||
4 | * Copyright (c) 2007 MontaVista Software, Inc. | ||
5 | * Copyright (c) 2007 Anton Vorontsov <avorontsov@ru.mvista.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License. | ||
10 | * | ||
11 | * Cross-compile with cross-gcc -I/path/to/cross-kernel/include | ||
12 | */ | ||
13 | |||
14 | #include <stdint.h> | ||
15 | #include <unistd.h> | ||
16 | #include <stdio.h> | ||
17 | #include <stdlib.h> | ||
18 | #include <getopt.h> | ||
19 | #include <fcntl.h> | ||
20 | #include <sys/ioctl.h> | ||
21 | #include <linux/types.h> | ||
22 | #include <linux/spi/spidev.h> | ||
23 | |||
24 | #define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) | ||
25 | |||
26 | static void pabort(const char *s) | ||
27 | { | ||
28 | perror(s); | ||
29 | abort(); | ||
30 | } | ||
31 | |||
32 | static char *device = "/dev/spidev1.1"; | ||
33 | static uint8_t mode; | ||
34 | static uint8_t bits = 8; | ||
35 | static uint32_t speed = 500000; | ||
36 | static uint16_t delay; | ||
37 | |||
38 | static void transfer(int fd) | ||
39 | { | ||
40 | int ret; | ||
41 | uint8_t tx[] = { | ||
42 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | ||
43 | 0x40, 0x00, 0x00, 0x00, 0x00, 0x95, | ||
44 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | ||
45 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | ||
46 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | ||
47 | 0xDE, 0xAD, 0xBE, 0xEF, 0xBA, 0xAD, | ||
48 | 0xF0, 0x0D, | ||
49 | }; | ||
50 | uint8_t rx[ARRAY_SIZE(tx)] = {0, }; | ||
51 | struct spi_ioc_transfer tr = { | ||
52 | .tx_buf = (unsigned long)tx, | ||
53 | .rx_buf = (unsigned long)rx, | ||
54 | .len = ARRAY_SIZE(tx), | ||
55 | .delay_usecs = delay, | ||
56 | .speed_hz = speed, | ||
57 | .bits_per_word = bits, | ||
58 | }; | ||
59 | |||
60 | ret = ioctl(fd, SPI_IOC_MESSAGE(1), &tr); | ||
61 | if (ret == 1) | ||
62 | pabort("can't send spi message"); | ||
63 | |||
64 | for (ret = 0; ret < ARRAY_SIZE(tx); ret++) { | ||
65 | if (!(ret % 6)) | ||
66 | puts(""); | ||
67 | printf("%.2X ", rx[ret]); | ||
68 | } | ||
69 | puts(""); | ||
70 | } | ||
71 | |||
72 | void print_usage(char *prog) | ||
73 | { | ||
74 | printf("Usage: %s [-DsbdlHOLC3]\n", prog); | ||
75 | puts(" -D --device device to use (default /dev/spidev1.1)\n" | ||
76 | " -s --speed max speed (Hz)\n" | ||
77 | " -d --delay delay (usec)\n" | ||
78 | " -b --bpw bits per word \n" | ||
79 | " -l --loop loopback\n" | ||
80 | " -H --cpha clock phase\n" | ||
81 | " -O --cpol clock polarity\n" | ||
82 | " -L --lsb least significant bit first\n" | ||
83 | " -C --cs-high chip select active high\n" | ||
84 | " -3 --3wire SI/SO signals shared\n"); | ||
85 | exit(1); | ||
86 | } | ||
87 | |||
88 | void parse_opts(int argc, char *argv[]) | ||
89 | { | ||
90 | while (1) { | ||
91 | static struct option lopts[] = { | ||
92 | { "device", 1, 0, 'D' }, | ||
93 | { "speed", 1, 0, 's' }, | ||
94 | { "delay", 1, 0, 'd' }, | ||
95 | { "bpw", 1, 0, 'b' }, | ||
96 | { "loop", 0, 0, 'l' }, | ||
97 | { "cpha", 0, 0, 'H' }, | ||
98 | { "cpol", 0, 0, 'O' }, | ||
99 | { "lsb", 0, 0, 'L' }, | ||
100 | { "cs-high", 0, 0, 'C' }, | ||
101 | { "3wire", 0, 0, '3' }, | ||
102 | { NULL, 0, 0, 0 }, | ||
103 | }; | ||
104 | int c; | ||
105 | |||
106 | c = getopt_long(argc, argv, "D:s:d:b:lHOLC3", lopts, NULL); | ||
107 | |||
108 | if (c == -1) | ||
109 | break; | ||
110 | |||
111 | switch (c) { | ||
112 | case 'D': | ||
113 | device = optarg; | ||
114 | break; | ||
115 | case 's': | ||
116 | speed = atoi(optarg); | ||
117 | break; | ||
118 | case 'd': | ||
119 | delay = atoi(optarg); | ||
120 | break; | ||
121 | case 'b': | ||
122 | bits = atoi(optarg); | ||
123 | break; | ||
124 | case 'l': | ||
125 | mode |= SPI_LOOP; | ||
126 | break; | ||
127 | case 'H': | ||
128 | mode |= SPI_CPHA; | ||
129 | break; | ||
130 | case 'O': | ||
131 | mode |= SPI_CPOL; | ||
132 | break; | ||
133 | case 'L': | ||
134 | mode |= SPI_LSB_FIRST; | ||
135 | break; | ||
136 | case 'C': | ||
137 | mode |= SPI_CS_HIGH; | ||
138 | break; | ||
139 | case '3': | ||
140 | mode |= SPI_3WIRE; | ||
141 | break; | ||
142 | default: | ||
143 | print_usage(argv[0]); | ||
144 | break; | ||
145 | } | ||
146 | } | ||
147 | } | ||
148 | |||
149 | int main(int argc, char *argv[]) | ||
150 | { | ||
151 | int ret = 0; | ||
152 | int fd; | ||
153 | |||
154 | parse_opts(argc, argv); | ||
155 | |||
156 | fd = open(device, O_RDWR); | ||
157 | if (fd < 0) | ||
158 | pabort("can't open device"); | ||
159 | |||
160 | /* | ||
161 | * spi mode | ||
162 | */ | ||
163 | ret = ioctl(fd, SPI_IOC_WR_MODE, &mode); | ||
164 | if (ret == -1) | ||
165 | pabort("can't set spi mode"); | ||
166 | |||
167 | ret = ioctl(fd, SPI_IOC_RD_MODE, &mode); | ||
168 | if (ret == -1) | ||
169 | pabort("can't get spi mode"); | ||
170 | |||
171 | /* | ||
172 | * bits per word | ||
173 | */ | ||
174 | ret = ioctl(fd, SPI_IOC_WR_BITS_PER_WORD, &bits); | ||
175 | if (ret == -1) | ||
176 | pabort("can't set bits per word"); | ||
177 | |||
178 | ret = ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits); | ||
179 | if (ret == -1) | ||
180 | pabort("can't get bits per word"); | ||
181 | |||
182 | /* | ||
183 | * max speed hz | ||
184 | */ | ||
185 | ret = ioctl(fd, SPI_IOC_WR_MAX_SPEED_HZ, &speed); | ||
186 | if (ret == -1) | ||
187 | pabort("can't set max speed hz"); | ||
188 | |||
189 | ret = ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed); | ||
190 | if (ret == -1) | ||
191 | pabort("can't get max speed hz"); | ||
192 | |||
193 | printf("spi mode: %d\n", mode); | ||
194 | printf("bits per word: %d\n", bits); | ||
195 | printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000); | ||
196 | |||
197 | transfer(fd); | ||
198 | |||
199 | close(fd); | ||
200 | |||
201 | return ret; | ||
202 | } | ||
diff --git a/Documentation/stable_api_nonsense.txt b/Documentation/stable_api_nonsense.txt index a2afca3b2bab..847b342b7b20 100644 --- a/Documentation/stable_api_nonsense.txt +++ b/Documentation/stable_api_nonsense.txt | |||
@@ -10,7 +10,7 @@ kernel to userspace interfaces. The kernel to userspace interface is | |||
10 | the one that application programs use, the syscall interface. That | 10 | the one that application programs use, the syscall interface. That |
11 | interface is _very_ stable over time, and will not break. I have old | 11 | interface is _very_ stable over time, and will not break. I have old |
12 | programs that were built on a pre 0.9something kernel that still work | 12 | programs that were built on a pre 0.9something kernel that still work |
13 | just fine on the latest 2.6 kernel release. This interface is the one | 13 | just fine on the latest 2.6 kernel release. That interface is the one |
14 | that users and application programmers can count on being stable. | 14 | that users and application programmers can count on being stable. |
15 | 15 | ||
16 | 16 | ||
diff --git a/Documentation/sysfs-rules.txt b/Documentation/sysfs-rules.txt index 42861bb0bc9b..80ef562160bb 100644 --- a/Documentation/sysfs-rules.txt +++ b/Documentation/sysfs-rules.txt | |||
@@ -1,19 +1,18 @@ | |||
1 | Rules on how to access information in the Linux kernel sysfs | 1 | Rules on how to access information in the Linux kernel sysfs |
2 | 2 | ||
3 | The kernel exported sysfs exports internal kernel implementation-details | 3 | The kernel-exported sysfs exports internal kernel implementation details |
4 | and depends on internal kernel structures and layout. It is agreed upon | 4 | and depends on internal kernel structures and layout. It is agreed upon |
5 | by the kernel developers that the Linux kernel does not provide a stable | 5 | by the kernel developers that the Linux kernel does not provide a stable |
6 | internal API. As sysfs is a direct export of kernel internal | 6 | internal API. As sysfs is a direct export of kernel internal |
7 | structures, the sysfs interface can not provide a stable interface eighter, | 7 | structures, the sysfs interface cannot provide a stable interface either; |
8 | it may always change along with internal kernel changes. | 8 | it may always change along with internal kernel changes. |
9 | 9 | ||
10 | To minimize the risk of breaking users of sysfs, which are in most cases | 10 | To minimize the risk of breaking users of sysfs, which are in most cases |
11 | low-level userspace applications, with a new kernel release, the users | 11 | low-level userspace applications, with a new kernel release, the users |
12 | of sysfs must follow some rules to use an as abstract-as-possible way to | 12 | of sysfs must follow some rules to use an as-abstract-as-possible way to |
13 | access this filesystem. The current udev and HAL programs already | 13 | access this filesystem. The current udev and HAL programs already |
14 | implement this and users are encouraged to plug, if possible, into the | 14 | implement this and users are encouraged to plug, if possible, into the |
15 | abstractions these programs provide instead of accessing sysfs | 15 | abstractions these programs provide instead of accessing sysfs directly. |
16 | directly. | ||
17 | 16 | ||
18 | But if you really do want or need to access sysfs directly, please follow | 17 | But if you really do want or need to access sysfs directly, please follow |
19 | the following rules and then your programs should work with future | 18 | the following rules and then your programs should work with future |
@@ -25,22 +24,22 @@ versions of the sysfs interface. | |||
25 | implementation details in its own API. Therefore it is not better than | 24 | implementation details in its own API. Therefore it is not better than |
26 | reading directories and opening the files yourself. | 25 | reading directories and opening the files yourself. |
27 | Also, it is not actively maintained, in the sense of reflecting the | 26 | Also, it is not actively maintained, in the sense of reflecting the |
28 | current kernel-development. The goal of providing a stable interface | 27 | current kernel development. The goal of providing a stable interface |
29 | to sysfs has failed, it causes more problems, than it solves. It | 28 | to sysfs has failed; it causes more problems than it solves. It |
30 | violates many of the rules in this document. | 29 | violates many of the rules in this document. |
31 | 30 | ||
32 | - sysfs is always at /sys | 31 | - sysfs is always at /sys |
33 | Parsing /proc/mounts is a waste of time. Other mount points are a | 32 | Parsing /proc/mounts is a waste of time. Other mount points are a |
34 | system configuration bug you should not try to solve. For test cases, | 33 | system configuration bug you should not try to solve. For test cases, |
35 | possibly support a SYSFS_PATH environment variable to overwrite the | 34 | possibly support a SYSFS_PATH environment variable to overwrite the |
36 | applications behavior, but never try to search for sysfs. Never try | 35 | application's behavior, but never try to search for sysfs. Never try |
37 | to mount it, if you are not an early boot script. | 36 | to mount it, if you are not an early boot script. |
38 | 37 | ||
39 | - devices are only "devices" | 38 | - devices are only "devices" |
40 | There is no such thing like class-, bus-, physical devices, | 39 | There is no such thing like class-, bus-, physical devices, |
41 | interfaces, and such that you can rely on in userspace. Everything is | 40 | interfaces, and such that you can rely on in userspace. Everything is |
42 | just simply a "device". Class-, bus-, physical, ... types are just | 41 | just simply a "device". Class-, bus-, physical, ... types are just |
43 | kernel implementation details, which should not be expected by | 42 | kernel implementation details which should not be expected by |
44 | applications that look for devices in sysfs. | 43 | applications that look for devices in sysfs. |
45 | 44 | ||
46 | The properties of a device are: | 45 | The properties of a device are: |
@@ -48,11 +47,11 @@ versions of the sysfs interface. | |||
48 | - identical to the DEVPATH value in the event sent from the kernel | 47 | - identical to the DEVPATH value in the event sent from the kernel |
49 | at device creation and removal | 48 | at device creation and removal |
50 | - the unique key to the device at that point in time | 49 | - the unique key to the device at that point in time |
51 | - the kernels path to the device-directory without the leading | 50 | - the kernel's path to the device directory without the leading |
52 | /sys, and always starting with with a slash | 51 | /sys, and always starting with with a slash |
53 | - all elements of a devpath must be real directories. Symlinks | 52 | - all elements of a devpath must be real directories. Symlinks |
54 | pointing to /sys/devices must always be resolved to their real | 53 | pointing to /sys/devices must always be resolved to their real |
55 | target, and the target path must be used to access the device. | 54 | target and the target path must be used to access the device. |
56 | That way the devpath to the device matches the devpath of the | 55 | That way the devpath to the device matches the devpath of the |
57 | kernel used at event time. | 56 | kernel used at event time. |
58 | - using or exposing symlink values as elements in a devpath string | 57 | - using or exposing symlink values as elements in a devpath string |
@@ -73,17 +72,17 @@ versions of the sysfs interface. | |||
73 | link | 72 | link |
74 | - it is retrieved by reading the "driver"-link and using only the | 73 | - it is retrieved by reading the "driver"-link and using only the |
75 | last element of the target path | 74 | last element of the target path |
76 | - devices which do not have "driver"-link, just do not have a | 75 | - devices which do not have "driver"-link just do not have a |
77 | driver; copying the driver value in a child device context, is a | 76 | driver; copying the driver value in a child device context is a |
78 | bug in the application | 77 | bug in the application |
79 | 78 | ||
80 | o attributes | 79 | o attributes |
81 | - the files in the device directory or files below a subdirectories | 80 | - the files in the device directory or files below subdirectories |
82 | of the same device directory | 81 | of the same device directory |
83 | - accessing attributes reached by a symlink pointing to another device, | 82 | - accessing attributes reached by a symlink pointing to another device, |
84 | like the "device"-link, is a bug in the application | 83 | like the "device"-link, is a bug in the application |
85 | 84 | ||
86 | Everything else is just a kernel driver-core implementation detail, | 85 | Everything else is just a kernel driver-core implementation detail |
87 | that should not be assumed to be stable across kernel releases. | 86 | that should not be assumed to be stable across kernel releases. |
88 | 87 | ||
89 | - Properties of parent devices never belong into a child device. | 88 | - Properties of parent devices never belong into a child device. |
@@ -91,25 +90,25 @@ versions of the sysfs interface. | |||
91 | context properties. If the device 'eth0' or 'sda' does not have a | 90 | context properties. If the device 'eth0' or 'sda' does not have a |
92 | "driver"-link, then this device does not have a driver. Its value is empty. | 91 | "driver"-link, then this device does not have a driver. Its value is empty. |
93 | Never copy any property of the parent-device into a child-device. Parent | 92 | Never copy any property of the parent-device into a child-device. Parent |
94 | device-properties may change dynamically without any notice to the | 93 | device properties may change dynamically without any notice to the |
95 | child device. | 94 | child device. |
96 | 95 | ||
97 | - Hierarchy in a single device-tree | 96 | - Hierarchy in a single device tree |
98 | There is only one valid place in sysfs where hierarchy can be examined | 97 | There is only one valid place in sysfs where hierarchy can be examined |
99 | and this is below: /sys/devices. | 98 | and this is below: /sys/devices. |
100 | It is planned, that all device directories will end up in the tree | 99 | It is planned that all device directories will end up in the tree |
101 | below this directory. | 100 | below this directory. |
102 | 101 | ||
103 | - Classification by subsystem | 102 | - Classification by subsystem |
104 | There are currently three places for classification of devices: | 103 | There are currently three places for classification of devices: |
105 | /sys/block, /sys/class and /sys/bus. It is planned that these will | 104 | /sys/block, /sys/class and /sys/bus. It is planned that these will |
106 | not contain any device-directories themselves, but only flat lists of | 105 | not contain any device directories themselves, but only flat lists of |
107 | symlinks pointing to the unified /sys/devices tree. | 106 | symlinks pointing to the unified /sys/devices tree. |
108 | All three places have completely different rules on how to access | 107 | All three places have completely different rules on how to access |
109 | device information. It is planned to merge all three | 108 | device information. It is planned to merge all three |
110 | classification-directories into one place at /sys/subsystem, | 109 | classification directories into one place at /sys/subsystem, |
111 | following the layout of the bus-directories. All buses and | 110 | following the layout of the bus directories. All buses and |
112 | classes, including the converted block-subsystem, will show up | 111 | classes, including the converted block subsystem, will show up |
113 | there. | 112 | there. |
114 | The devices belonging to a subsystem will create a symlink in the | 113 | The devices belonging to a subsystem will create a symlink in the |
115 | "devices" directory at /sys/subsystem/<name>/devices. | 114 | "devices" directory at /sys/subsystem/<name>/devices. |
@@ -121,38 +120,38 @@ versions of the sysfs interface. | |||
121 | subsystem name. | 120 | subsystem name. |
122 | 121 | ||
123 | Assuming /sys/class/<subsystem> and /sys/bus/<subsystem>, or | 122 | Assuming /sys/class/<subsystem> and /sys/bus/<subsystem>, or |
124 | /sys/block and /sys/class/block are not interchangeable, is a bug in | 123 | /sys/block and /sys/class/block are not interchangeable is a bug in |
125 | the application. | 124 | the application. |
126 | 125 | ||
127 | - Block | 126 | - Block |
128 | The converted block-subsystem at /sys/class/block, or | 127 | The converted block subsystem at /sys/class/block or |
129 | /sys/subsystem/block will contain the links for disks and partitions | 128 | /sys/subsystem/block will contain the links for disks and partitions |
130 | at the same level, never in a hierarchy. Assuming the block-subsytem to | 129 | at the same level, never in a hierarchy. Assuming the block subsytem to |
131 | contain only disks and not partition-devices in the same flat list is | 130 | contain only disks and not partition devices in the same flat list is |
132 | a bug in the application. | 131 | a bug in the application. |
133 | 132 | ||
134 | - "device"-link and <subsystem>:<kernel name>-links | 133 | - "device"-link and <subsystem>:<kernel name>-links |
135 | Never depend on the "device"-link. The "device"-link is a workaround | 134 | Never depend on the "device"-link. The "device"-link is a workaround |
136 | for the old layout, where class-devices are not created in | 135 | for the old layout, where class devices are not created in |
137 | /sys/devices/ like the bus-devices. If the link-resolving of a | 136 | /sys/devices/ like the bus devices. If the link-resolving of a |
138 | device-directory does not end in /sys/devices/, you can use the | 137 | device directory does not end in /sys/devices/, you can use the |
139 | "device"-link to find the parent devices in /sys/devices/. That is the | 138 | "device"-link to find the parent devices in /sys/devices/. That is the |
140 | single valid use of the "device"-link, it must never appear in any | 139 | single valid use of the "device"-link; it must never appear in any |
141 | path as an element. Assuming the existence of the "device"-link for | 140 | path as an element. Assuming the existence of the "device"-link for |
142 | a device in /sys/devices/ is a bug in the application. | 141 | a device in /sys/devices/ is a bug in the application. |
143 | Accessing /sys/class/net/eth0/device is a bug in the application. | 142 | Accessing /sys/class/net/eth0/device is a bug in the application. |
144 | 143 | ||
145 | Never depend on the class-specific links back to the /sys/class | 144 | Never depend on the class-specific links back to the /sys/class |
146 | directory. These links are also a workaround for the design mistake | 145 | directory. These links are also a workaround for the design mistake |
147 | that class-devices are not created in /sys/devices. If a device | 146 | that class devices are not created in /sys/devices. If a device |
148 | directory does not contain directories for child devices, these links | 147 | directory does not contain directories for child devices, these links |
149 | may be used to find the child devices in /sys/class. That is the single | 148 | may be used to find the child devices in /sys/class. That is the single |
150 | valid use of these links, they must never appear in any path as an | 149 | valid use of these links; they must never appear in any path as an |
151 | element. Assuming the existence of these links for devices which are | 150 | element. Assuming the existence of these links for devices which are |
152 | real child device directories in the /sys/devices tree, is a bug in | 151 | real child device directories in the /sys/devices tree is a bug in |
153 | the application. | 152 | the application. |
154 | 153 | ||
155 | It is planned to remove all these links when when all class-device | 154 | It is planned to remove all these links when all class device |
156 | directories live in /sys/devices. | 155 | directories live in /sys/devices. |
157 | 156 | ||
158 | - Position of devices along device chain can change. | 157 | - Position of devices along device chain can change. |
@@ -161,6 +160,5 @@ versions of the sysfs interface. | |||
161 | the chain. You must always request the parent device you are looking for | 160 | the chain. You must always request the parent device you are looking for |
162 | by its subsystem value. You need to walk up the chain until you find | 161 | by its subsystem value. You need to walk up the chain until you find |
163 | the device that matches the expected subsystem. Depending on a specific | 162 | the device that matches the expected subsystem. Depending on a specific |
164 | position of a parent device, or exposing relative paths, using "../" to | 163 | position of a parent device or exposing relative paths using "../" to |
165 | access the chain of parents, is a bug in the application. | 164 | access the chain of parents is a bug in the application. |
166 | |||