aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/Changes1
-rw-r--r--Documentation/DMA-API.txt79
-rw-r--r--Documentation/DocBook/kernel-api.tmpl19
-rw-r--r--Documentation/DocBook/uio-howto.tmpl4
-rw-r--r--Documentation/dontdiff3
-rw-r--r--Documentation/fb/pvr2fb.txt22
-rw-r--r--Documentation/feature-removal-schedule.txt16
-rw-r--r--Documentation/filesystems/hfsplus.txt59
-rw-r--r--Documentation/hpet.txt2
-rw-r--r--Documentation/hwmon/adm10314
-rw-r--r--Documentation/hwmon/thmc5074
-rw-r--r--Documentation/i386/zero-page.txt10
-rw-r--r--Documentation/ja_JP/HOWTO66
-rw-r--r--Documentation/ja_JP/stable_api_nonsense.txt20
-rw-r--r--Documentation/kbuild/kconfig-language.txt9
-rw-r--r--Documentation/kernel-parameters.txt147
-rw-r--r--Documentation/keys.txt5
-rw-r--r--Documentation/kobject.txt178
-rw-r--r--Documentation/lguest/Makefile4
-rw-r--r--Documentation/lguest/extract58
-rw-r--r--Documentation/lguest/lguest.c620
-rw-r--r--Documentation/memory-hotplug.txt322
-rw-r--r--Documentation/sched-design-CFS.txt2
-rw-r--r--Documentation/sched-nice-design.txt108
-rw-r--r--Documentation/sched-stats.txt195
-rw-r--r--Documentation/spi/spidev_test.c202
-rw-r--r--Documentation/stable_api_nonsense.txt2
-rw-r--r--Documentation/sysfs-rules.txt72
-rw-r--r--Documentation/sysrq.txt4
-rw-r--r--Documentation/thinkpad-acpi.txt4
-rw-r--r--Documentation/vm/slabinfo.c2
31 files changed, 1809 insertions, 504 deletions
diff --git a/Documentation/Changes b/Documentation/Changes
index 73a8617f1861..cb2b141b1c3e 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -45,6 +45,7 @@ o nfs-utils 1.0.5 # showmount --version
45o procps 3.2.0 # ps --version 45o procps 3.2.0 # ps --version
46o oprofile 0.9 # oprofiled --version 46o oprofile 0.9 # oprofiled --version
47o udev 081 # udevinfo -V 47o udev 081 # udevinfo -V
48o grub 0.93 # grub --version
48 49
49Kernel compilation 50Kernel compilation
50================== 51==================
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 805db4b2cba6..cc7a8c39fb6f 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -26,7 +26,7 @@ Part Ia - Using large dma-coherent buffers
26 26
27void * 27void *
28dma_alloc_coherent(struct device *dev, size_t size, 28dma_alloc_coherent(struct device *dev, size_t size,
29 dma_addr_t *dma_handle, int flag) 29 dma_addr_t *dma_handle, gfp_t flag)
30void * 30void *
31pci_alloc_consistent(struct pci_dev *dev, size_t size, 31pci_alloc_consistent(struct pci_dev *dev, size_t size,
32 dma_addr_t *dma_handle) 32 dma_addr_t *dma_handle)
@@ -38,7 +38,7 @@ to make sure to flush the processor's write buffers before telling
38devices to read that memory.) 38devices to read that memory.)
39 39
40This routine allocates a region of <size> bytes of consistent memory. 40This routine allocates a region of <size> bytes of consistent memory.
41it also returns a <dma_handle> which may be cast to an unsigned 41It also returns a <dma_handle> which may be cast to an unsigned
42integer the same width as the bus and used as the physical address 42integer the same width as the bus and used as the physical address
43base of the region. 43base of the region.
44 44
@@ -52,21 +52,21 @@ The simplest way to do that is to use the dma_pool calls (see below).
52 52
53The flag parameter (dma_alloc_coherent only) allows the caller to 53The flag parameter (dma_alloc_coherent only) allows the caller to
54specify the GFP_ flags (see kmalloc) for the allocation (the 54specify the GFP_ flags (see kmalloc) for the allocation (the
55implementation may chose to ignore flags that affect the location of 55implementation may choose to ignore flags that affect the location of
56the returned memory, like GFP_DMA). For pci_alloc_consistent, you 56the returned memory, like GFP_DMA). For pci_alloc_consistent, you
57must assume GFP_ATOMIC behaviour. 57must assume GFP_ATOMIC behaviour.
58 58
59void 59void
60dma_free_coherent(struct device *dev, size_t size, void *cpu_addr 60dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
61 dma_addr_t dma_handle) 61 dma_addr_t dma_handle)
62void 62void
63pci_free_consistent(struct pci_dev *dev, size_t size, void *cpu_addr 63pci_free_consistent(struct pci_dev *dev, size_t size, void *cpu_addr,
64 dma_addr_t dma_handle) 64 dma_addr_t dma_handle)
65 65
66Free the region of consistent memory you previously allocated. dev, 66Free the region of consistent memory you previously allocated. dev,
67size and dma_handle must all be the same as those passed into the 67size and dma_handle must all be the same as those passed into the
68consistent allocate. cpu_addr must be the virtual address returned by 68consistent allocate. cpu_addr must be the virtual address returned by
69the consistent allocate 69the consistent allocate.
70 70
71 71
72Part Ib - Using small dma-coherent buffers 72Part Ib - Using small dma-coherent buffers
@@ -77,9 +77,9 @@ To get this part of the dma_ API, you must #include <linux/dmapool.h>
77Many drivers need lots of small dma-coherent memory regions for DMA 77Many drivers need lots of small dma-coherent memory regions for DMA
78descriptors or I/O buffers. Rather than allocating in units of a page 78descriptors or I/O buffers. Rather than allocating in units of a page
79or more using dma_alloc_coherent(), you can use DMA pools. These work 79or more using dma_alloc_coherent(), you can use DMA pools. These work
80much like a struct kmem_cache, except that they use the dma-coherent allocator 80much like a struct kmem_cache, except that they use the dma-coherent allocator,
81not __get_free_pages(). Also, they understand common hardware constraints 81not __get_free_pages(). Also, they understand common hardware constraints
82for alignment, like queue heads needing to be aligned on N byte boundaries. 82for alignment, like queue heads needing to be aligned on N-byte boundaries.
83 83
84 84
85 struct dma_pool * 85 struct dma_pool *
@@ -102,15 +102,15 @@ crossing restrictions, pass 0 for alloc; passing 4096 says memory allocated
102from this pool must not cross 4KByte boundaries. 102from this pool must not cross 4KByte boundaries.
103 103
104 104
105 void *dma_pool_alloc(struct dma_pool *pool, int gfp_flags, 105 void *dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
106 dma_addr_t *dma_handle); 106 dma_addr_t *dma_handle);
107 107
108 void *pci_pool_alloc(struct pci_pool *pool, int gfp_flags, 108 void *pci_pool_alloc(struct pci_pool *pool, gfp_t gfp_flags,
109 dma_addr_t *dma_handle); 109 dma_addr_t *dma_handle);
110 110
111This allocates memory from the pool; the returned memory will meet the size 111This allocates memory from the pool; the returned memory will meet the size
112and alignment requirements specified at creation time. Pass GFP_ATOMIC to 112and alignment requirements specified at creation time. Pass GFP_ATOMIC to
113prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks) 113prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks),
114pass GFP_KERNEL to allow blocking. Like dma_alloc_coherent(), this returns 114pass GFP_KERNEL to allow blocking. Like dma_alloc_coherent(), this returns
115two values: an address usable by the cpu, and the dma address usable by the 115two values: an address usable by the cpu, and the dma address usable by the
116pool's device. 116pool's device.
@@ -123,7 +123,7 @@ pool's device.
123 dma_addr_t addr); 123 dma_addr_t addr);
124 124
125This puts memory back into the pool. The pool is what was passed to 125This puts memory back into the pool. The pool is what was passed to
126the pool allocation routine; the cpu and dma addresses are what 126the pool allocation routine; the cpu (vaddr) and dma addresses are what
127were returned when that routine allocated the memory being freed. 127were returned when that routine allocated the memory being freed.
128 128
129 129
@@ -209,18 +209,18 @@ Notes: Not all memory regions in a machine can be mapped by this
209API. Further, regions that appear to be physically contiguous in 209API. Further, regions that appear to be physically contiguous in
210kernel virtual space may not be contiguous as physical memory. Since 210kernel virtual space may not be contiguous as physical memory. Since
211this API does not provide any scatter/gather capability, it will fail 211this API does not provide any scatter/gather capability, it will fail
212if the user tries to map a non physically contiguous piece of memory. 212if the user tries to map a non-physically contiguous piece of memory.
213For this reason, it is recommended that memory mapped by this API be 213For this reason, it is recommended that memory mapped by this API be
214obtained only from sources which guarantee to be physically contiguous 214obtained only from sources which guarantee it to be physically contiguous
215(like kmalloc). 215(like kmalloc).
216 216
217Further, the physical address of the memory must be within the 217Further, the physical address of the memory must be within the
218dma_mask of the device (the dma_mask represents a bit mask of the 218dma_mask of the device (the dma_mask represents a bit mask of the
219addressable region for the device. i.e. if the physical address of 219addressable region for the device. I.e., if the physical address of
220the memory anded with the dma_mask is still equal to the physical 220the memory anded with the dma_mask is still equal to the physical
221address, then the device can perform DMA to the memory). In order to 221address, then the device can perform DMA to the memory). In order to
222ensure that the memory allocated by kmalloc is within the dma_mask, 222ensure that the memory allocated by kmalloc is within the dma_mask,
223the driver may specify various platform dependent flags to restrict 223the driver may specify various platform-dependent flags to restrict
224the physical memory range of the allocation (e.g. on x86, GFP_DMA 224the physical memory range of the allocation (e.g. on x86, GFP_DMA
225guarantees to be within the first 16Mb of available physical memory, 225guarantees to be within the first 16Mb of available physical memory,
226as required by ISA devices). 226as required by ISA devices).
@@ -244,14 +244,14 @@ are guaranteed also to be cache line boundaries).
244 244
245DMA_TO_DEVICE synchronisation must be done after the last modification 245DMA_TO_DEVICE synchronisation must be done after the last modification
246of the memory region by the software and before it is handed off to 246of the memory region by the software and before it is handed off to
247the driver. Once this primitive is used. Memory covered by this 247the driver. Once this primitive is used, memory covered by this
248primitive should be treated as read only by the device. If the device 248primitive should be treated as read-only by the device. If the device
249may write to it at any point, it should be DMA_BIDIRECTIONAL (see 249may write to it at any point, it should be DMA_BIDIRECTIONAL (see
250below). 250below).
251 251
252DMA_FROM_DEVICE synchronisation must be done before the driver 252DMA_FROM_DEVICE synchronisation must be done before the driver
253accesses data that may be changed by the device. This memory should 253accesses data that may be changed by the device. This memory should
254be treated as read only by the driver. If the driver needs to write 254be treated as read-only by the driver. If the driver needs to write
255to it at any point, it should be DMA_BIDIRECTIONAL (see below). 255to it at any point, it should be DMA_BIDIRECTIONAL (see below).
256 256
257DMA_BIDIRECTIONAL requires special handling: it means that the driver 257DMA_BIDIRECTIONAL requires special handling: it means that the driver
@@ -261,7 +261,7 @@ you must always sync bidirectional memory twice: once before the
261memory is handed off to the device (to make sure all memory changes 261memory is handed off to the device (to make sure all memory changes
262are flushed from the processor) and once before the data may be 262are flushed from the processor) and once before the data may be
263accessed after being used by the device (to make sure any processor 263accessed after being used by the device (to make sure any processor
264cache lines are updated with data that the device may have changed. 264cache lines are updated with data that the device may have changed).
265 265
266void 266void
267dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, 267dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
@@ -302,8 +302,8 @@ pci_dma_mapping_error(dma_addr_t dma_addr)
302 302
303In some circumstances dma_map_single and dma_map_page will fail to create 303In some circumstances dma_map_single and dma_map_page will fail to create
304a mapping. A driver can check for these errors by testing the returned 304a mapping. A driver can check for these errors by testing the returned
305dma address with dma_mapping_error(). A non zero return value means the mapping 305dma address with dma_mapping_error(). A non-zero return value means the mapping
306could not be created and the driver should take appropriate action (eg 306could not be created and the driver should take appropriate action (e.g.
307reduce current DMA mapping usage or delay and try again later). 307reduce current DMA mapping usage or delay and try again later).
308 308
309 int 309 int
@@ -315,7 +315,7 @@ reduce current DMA mapping usage or delay and try again later).
315 315
316Maps a scatter gather list from the block layer. 316Maps a scatter gather list from the block layer.
317 317
318Returns: the number of physical segments mapped (this may be shorted 318Returns: the number of physical segments mapped (this may be shorter
319than <nents> passed in if the block layer determines that some 319than <nents> passed in if the block layer determines that some
320elements of the scatter/gather list are physically adjacent and thus 320elements of the scatter/gather list are physically adjacent and thus
321may be mapped with a single entry). 321may be mapped with a single entry).
@@ -357,7 +357,7 @@ accessed sg->address and sg->length as shown above.
357 pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, 357 pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
358 int nents, int direction) 358 int nents, int direction)
359 359
360unmap the previously mapped scatter/gather list. All the parameters 360Unmap the previously mapped scatter/gather list. All the parameters
361must be the same as those and passed in to the scatter/gather mapping 361must be the same as those and passed in to the scatter/gather mapping
362API. 362API.
363 363
@@ -377,7 +377,7 @@ void
377pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg, 377pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg,
378 int nelems, int direction) 378 int nelems, int direction)
379 379
380synchronise a single contiguous or scatter/gather mapping. All the 380Synchronise a single contiguous or scatter/gather mapping. All the
381parameters must be the same as those passed into the single mapping 381parameters must be the same as those passed into the single mapping
382API. 382API.
383 383
@@ -406,7 +406,7 @@ API at all.
406 406
407void * 407void *
408dma_alloc_noncoherent(struct device *dev, size_t size, 408dma_alloc_noncoherent(struct device *dev, size_t size,
409 dma_addr_t *dma_handle, int flag) 409 dma_addr_t *dma_handle, gfp_t flag)
410 410
411Identical to dma_alloc_coherent() except that the platform will 411Identical to dma_alloc_coherent() except that the platform will
412choose to return either consistent or non-consistent memory as it sees 412choose to return either consistent or non-consistent memory as it sees
@@ -426,34 +426,34 @@ void
426dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr, 426dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr,
427 dma_addr_t dma_handle) 427 dma_addr_t dma_handle)
428 428
429free memory allocated by the nonconsistent API. All parameters must 429Free memory allocated by the nonconsistent API. All parameters must
430be identical to those passed in (and returned by 430be identical to those passed in (and returned by
431dma_alloc_noncoherent()). 431dma_alloc_noncoherent()).
432 432
433int 433int
434dma_is_consistent(struct device *dev, dma_addr_t dma_handle) 434dma_is_consistent(struct device *dev, dma_addr_t dma_handle)
435 435
436returns true if the device dev is performing consistent DMA on the memory 436Returns true if the device dev is performing consistent DMA on the memory
437area pointed to by the dma_handle. 437area pointed to by the dma_handle.
438 438
439int 439int
440dma_get_cache_alignment(void) 440dma_get_cache_alignment(void)
441 441
442returns the processor cache alignment. This is the absolute minimum 442Returns the processor cache alignment. This is the absolute minimum
443alignment *and* width that you must observe when either mapping 443alignment *and* width that you must observe when either mapping
444memory or doing partial flushes. 444memory or doing partial flushes.
445 445
446Notes: This API may return a number *larger* than the actual cache 446Notes: This API may return a number *larger* than the actual cache
447line, but it will guarantee that one or more cache lines fit exactly 447line, but it will guarantee that one or more cache lines fit exactly
448into the width returned by this call. It will also always be a power 448into the width returned by this call. It will also always be a power
449of two for easy alignment 449of two for easy alignment.
450 450
451void 451void
452dma_sync_single_range(struct device *dev, dma_addr_t dma_handle, 452dma_sync_single_range(struct device *dev, dma_addr_t dma_handle,
453 unsigned long offset, size_t size, 453 unsigned long offset, size_t size,
454 enum dma_data_direction direction) 454 enum dma_data_direction direction)
455 455
456does a partial sync. starting at offset and continuing for size. You 456Does a partial sync, starting at offset and continuing for size. You
457must be careful to observe the cache alignment and width when doing 457must be careful to observe the cache alignment and width when doing
458anything like this. You must also be extra careful about accessing 458anything like this. You must also be extra careful about accessing
459memory you intend to sync partially. 459memory you intend to sync partially.
@@ -472,21 +472,20 @@ dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
472 dma_addr_t device_addr, size_t size, int 472 dma_addr_t device_addr, size_t size, int
473 flags) 473 flags)
474 474
475
476Declare region of memory to be handed out by dma_alloc_coherent when 475Declare region of memory to be handed out by dma_alloc_coherent when
477it's asked for coherent memory for this device. 476it's asked for coherent memory for this device.
478 477
479bus_addr is the physical address to which the memory is currently 478bus_addr is the physical address to which the memory is currently
480assigned in the bus responding region (this will be used by the 479assigned in the bus responding region (this will be used by the
481platform to perform the mapping) 480platform to perform the mapping).
482 481
483device_addr is the physical address the device needs to be programmed 482device_addr is the physical address the device needs to be programmed
484with actually to address this memory (this will be handed out as the 483with actually to address this memory (this will be handed out as the
485dma_addr_t in dma_alloc_coherent()) 484dma_addr_t in dma_alloc_coherent()).
486 485
487size is the size of the area (must be multiples of PAGE_SIZE). 486size is the size of the area (must be multiples of PAGE_SIZE).
488 487
489flags can be or'd together and are 488flags can be or'd together and are:
490 489
491DMA_MEMORY_MAP - request that the memory returned from 490DMA_MEMORY_MAP - request that the memory returned from
492dma_alloc_coherent() be directly writable. 491dma_alloc_coherent() be directly writable.
@@ -494,7 +493,7 @@ dma_alloc_coherent() be directly writable.
494DMA_MEMORY_IO - request that the memory returned from 493DMA_MEMORY_IO - request that the memory returned from
495dma_alloc_coherent() be addressable using read/write/memcpy_toio etc. 494dma_alloc_coherent() be addressable using read/write/memcpy_toio etc.
496 495
497One or both of these flags must be present 496One or both of these flags must be present.
498 497
499DMA_MEMORY_INCLUDES_CHILDREN - make the declared memory be allocated by 498DMA_MEMORY_INCLUDES_CHILDREN - make the declared memory be allocated by
500dma_alloc_coherent of any child devices of this one (for memory residing 499dma_alloc_coherent of any child devices of this one (for memory residing
@@ -528,7 +527,7 @@ dma_release_declared_memory(struct device *dev)
528Remove the memory region previously declared from the system. This 527Remove the memory region previously declared from the system. This
529API performs *no* in-use checking for this region and will return 528API performs *no* in-use checking for this region and will return
530unconditionally having removed all the required structures. It is the 529unconditionally having removed all the required structures. It is the
531drivers job to ensure that no parts of this memory region are 530driver's job to ensure that no parts of this memory region are
532currently in use. 531currently in use.
533 532
534void * 533void *
@@ -538,12 +537,10 @@ dma_mark_declared_memory_occupied(struct device *dev,
538This is used to occupy specific regions of the declared space 537This is used to occupy specific regions of the declared space
539(dma_alloc_coherent() will hand out the first free region it finds). 538(dma_alloc_coherent() will hand out the first free region it finds).
540 539
541device_addr is the *device* address of the region requested 540device_addr is the *device* address of the region requested.
542 541
543size is the size (and should be a page sized multiple). 542size is the size (and should be a page-sized multiple).
544 543
545The return value will be either a pointer to the processor virtual 544The return value will be either a pointer to the processor virtual
546address of the memory, or an error (via PTR_ERR()) if any part of the 545address of the memory, or an error (via PTR_ERR()) if any part of the
547region is occupied. 546region is occupied.
548
549
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index eb42bf9847cb..b886f52a9aac 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -380,7 +380,6 @@ X!Edrivers/base/interface.c
380!Edrivers/base/bus.c 380!Edrivers/base/bus.c
381 </sect1> 381 </sect1>
382 <sect1><title>Device Drivers Power Management</title> 382 <sect1><title>Device Drivers Power Management</title>
383!Edrivers/base/power/main.c
384!Edrivers/base/power/resume.c 383!Edrivers/base/power/resume.c
385!Edrivers/base/power/suspend.c 384!Edrivers/base/power/suspend.c
386 </sect1> 385 </sect1>
@@ -398,12 +397,12 @@ X!Edrivers/acpi/pci_bind.c
398--> 397-->
399 </sect1> 398 </sect1>
400 <sect1><title>Device drivers PnP support</title> 399 <sect1><title>Device drivers PnP support</title>
401!Edrivers/pnp/core.c 400!Idrivers/pnp/core.c
402<!-- No correct structured comments 401<!-- No correct structured comments
403X!Edrivers/pnp/system.c 402X!Edrivers/pnp/system.c
404 --> 403 -->
405!Edrivers/pnp/card.c 404!Edrivers/pnp/card.c
406!Edrivers/pnp/driver.c 405!Idrivers/pnp/driver.c
407!Edrivers/pnp/manager.c 406!Edrivers/pnp/manager.c
408!Edrivers/pnp/support.c 407!Edrivers/pnp/support.c
409 </sect1> 408 </sect1>
@@ -704,14 +703,22 @@ X!Idrivers/video/console/fonts.c
704 703
705 <chapter id="splice"> 704 <chapter id="splice">
706 <title>splice API</title> 705 <title>splice API</title>
707 <para>) 706 <para>
708 splice is a method for moving blocks of data around inside the 707 splice is a method for moving blocks of data around inside the
709 kernel, without continually transferring it between the kernel 708 kernel, without continually transferring them between the kernel
710 and user space. 709 and user space.
711 </para> 710 </para>
712!Iinclude/linux/splice.h
713!Ffs/splice.c 711!Ffs/splice.c
714 </chapter> 712 </chapter>
715 713
714 <chapter id="pipes">
715 <title>pipes API</title>
716 <para>
717 Pipe interfaces are all for in-kernel (builtin image) use.
718 They are not exported for use by modules.
719 </para>
720!Iinclude/linux/pipe_fs_i.h
721!Ffs/pipe.c
722 </chapter>
716 723
717</book> 724</book>
diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl
index e3bb29a8d8dd..c119484258b8 100644
--- a/Documentation/DocBook/uio-howto.tmpl
+++ b/Documentation/DocBook/uio-howto.tmpl
@@ -133,10 +133,6 @@ interested in translating it, please email me
133 <para>updates of your driver can take place without recompiling 133 <para>updates of your driver can take place without recompiling
134 the kernel.</para> 134 the kernel.</para>
135</listitem> 135</listitem>
136<listitem>
137 <para>if you need to keep some parts of your driver closed source,
138 you can do so without violating the GPL license on the kernel.</para>
139</listitem>
140</itemizedlist> 136</itemizedlist>
141 137
142<sect1 id="how_uio_works"> 138<sect1 id="how_uio_works">
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 595a5ea4c690..7b9551fc6fe3 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -18,6 +18,7 @@
18*.moc 18*.moc
19*.mod.c 19*.mod.c
20*.o 20*.o
21*.o.*
21*.orig 22*.orig
22*.out 23*.out
23*.pdf 24*.pdf
@@ -163,6 +164,8 @@ raid6tables.c
163relocs 164relocs
164series 165series
165setup 166setup
167setup.bin
168setup.elf
166sim710_d.h* 169sim710_d.h*
167sImage 170sImage
168sm_tbl* 171sm_tbl*
diff --git a/Documentation/fb/pvr2fb.txt b/Documentation/fb/pvr2fb.txt
index 2bf6c2321c2d..36bdeff585e2 100644
--- a/Documentation/fb/pvr2fb.txt
+++ b/Documentation/fb/pvr2fb.txt
@@ -9,14 +9,13 @@ one found in the Dreamcast.
9Advantages: 9Advantages:
10 10
11 * It provides a nice large console (128 cols + 48 lines with 1024x768) 11 * It provides a nice large console (128 cols + 48 lines with 1024x768)
12 without using tiny, unreadable fonts. 12 without using tiny, unreadable fonts (NOT on the Dreamcast)
13 * You can run XF86_FBDev on top of /dev/fb0 13 * You can run XF86_FBDev on top of /dev/fb0
14 * Most important: boot logo :-) 14 * Most important: boot logo :-)
15 15
16Disadvantages: 16Disadvantages:
17 17
18 * Driver is currently limited to the Dreamcast PowerVR 2 implementation 18 * Driver is largely untested on non-Dreamcast systems.
19 at the time of this writing.
20 19
21Configuration 20Configuration
22============= 21=============
@@ -29,11 +28,16 @@ Accepted options:
29font:X - default font to use. All fonts are supported, including the 28font:X - default font to use. All fonts are supported, including the
30 SUN12x22 font which is very nice at high resolutions. 29 SUN12x22 font which is very nice at high resolutions.
31 30
32mode:X - default video mode. The following video modes are supported:
33 640x240-60, 640x480-60.
34 31
32mode:X - default video mode with format [xres]x[yres]-<bpp>@<refresh rate>
33 The following video modes are supported:
34 640x640-16@60, 640x480-24@60, 640x480-32@60. The Dreamcast
35 defaults to 640x480-16@60. At the time of writing the
36 24bpp and 32bpp modes function poorly. Work to fix that is
37 ongoing
38
35 Note: the 640x240 mode is currently broken, and should not be 39 Note: the 640x240 mode is currently broken, and should not be
36 used for any reason. It is only mentioned as a reference. 40 used for any reason. It is only mentioned here as a reference.
37 41
38inverse - invert colors on screen (for LCD displays) 42inverse - invert colors on screen (for LCD displays)
39 43
@@ -52,10 +56,10 @@ output:X - output type. This can be any of the following: pal, ntsc, and
52X11 56X11
53=== 57===
54 58
55XF86_FBDev should work, in theory. At the time of this writing it is 59XF86_FBDev has been shown to work on the Dreamcast in the past - though not yet
56totally untested and may or may not even portray the beginnings of 60on any 2.6 series kernel.
57working. If you end up testing this, please let me know!
58 61
59-- 62--
60Paul Mundt <lethal@linuxdc.org> 63Paul Mundt <lethal@linuxdc.org>
64Updated by Adrian McMenamin <adrian@mcmen.demon.co.uk>
61 65
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index c175eedadb5f..a43d2878a4ef 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -211,22 +211,6 @@ Who: Richard Purdie <rpurdie@rpsys.net>
211 211
212--------------------------- 212---------------------------
213 213
214What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer)
215When: December 2007
216Why: These functions are a leftover from 2.4 times. They have several
217 problems:
218 - Duplication of checks that are done in the device driver's
219 interrupt handler
220 - common I/O layer can't do device specific error recovery
221 - device driver can't be notified for conditions happening during
222 execution of the function
223 Device drivers should issue the read device characteristics and read
224 configuration data ccws and do the appropriate error handling
225 themselves.
226Who: Cornelia Huck <cornelia.huck@de.ibm.com>
227
228---------------------------
229
230What: i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers 214What: i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers
231When: September 2007 215When: September 2007
232Why: Obsolete. The new i2c-gpio driver replaces all hardware-specific 216Why: Obsolete. The new i2c-gpio driver replaces all hardware-specific
diff --git a/Documentation/filesystems/hfsplus.txt b/Documentation/filesystems/hfsplus.txt
new file mode 100644
index 000000000000..af1628a1061c
--- /dev/null
+++ b/Documentation/filesystems/hfsplus.txt
@@ -0,0 +1,59 @@
1
2Macintosh HFSPlus Filesystem for Linux
3======================================
4
5HFSPlus is a filesystem first introduced in MacOS 8.1.
6HFSPlus has several extensions to HFS, including 32-bit allocation
7blocks, 255-character unicode filenames, and file sizes of 2^63 bytes.
8
9
10Mount options
11=============
12
13When mounting an HFSPlus filesystem, the following options are accepted:
14
15 creator=cccc, type=cccc
16 Specifies the creator/type values as shown by the MacOS finder
17 used for creating new files. Default values: '????'.
18
19 uid=n, gid=n
20 Specifies the user/group that owns all files on the filesystem
21 that have uninitialized permissions structures.
22 Default: user/group id of the mounting process.
23
24 umask=n
25 Specifies the umask (in octal) used for files and directories
26 that have uninitialized permissions structures.
27 Default: umask of the mounting process.
28
29 session=n
30 Select the CDROM session to mount as HFSPlus filesystem. Defaults to
31 leaving that decision to the CDROM driver. This option will fail
32 with anything but a CDROM as underlying devices.
33
34 part=n
35 Select partition number n from the devices. This option only makes
36 sense for CDROMs because they can't be partitioned under Linux.
37 For disk devices the generic partition parsing code does this
38 for us. Defaults to not parsing the partition table at all.
39
40 decompose
41 Decompose file name characters.
42
43 nodecompose
44 Do not decompose file name characters.
45
46 force
47 Used to force write access to volumes that are marked as journalled
48 or locked. Use at your own risk.
49
50 nls=cccc
51 Encoding to use when presenting file names.
52
53
54References
55==========
56
57kernel source: <file:fs/hfsplus>
58
59Apple Technote 1150 http://developer.apple.com/technotes/tn/tn1150.html
diff --git a/Documentation/hpet.txt b/Documentation/hpet.txt
index b7a3dc38dd52..6ad52d9dad6c 100644
--- a/Documentation/hpet.txt
+++ b/Documentation/hpet.txt
@@ -5,7 +5,7 @@ for the 8254 and Real Time Clock (RTC) periodic timer functionality.
5Each HPET can have up to 32 timers. It is possible to configure the 5Each HPET can have up to 32 timers. It is possible to configure the
6first two timers as legacy replacements for 8254 and RTC periodic timers. 6first two timers as legacy replacements for 8254 and RTC periodic timers.
7A specification done by Intel and Microsoft can be found at 7A specification done by Intel and Microsoft can be found at
8<http://www.intel.com/hardwaredesign/hpetspec.htm>. 8<http://www.intel.com/technology/architecture/hpetspec.htm>.
9 9
10The driver supports detection of HPET driver allocation and initialization 10The driver supports detection of HPET driver allocation and initialization
11of the HPET before the driver module_init routine is called. This enables 11of the HPET before the driver module_init routine is called. This enables
diff --git a/Documentation/hwmon/adm1031 b/Documentation/hwmon/adm1031
index 130a38382b98..be92a77da1d5 100644
--- a/Documentation/hwmon/adm1031
+++ b/Documentation/hwmon/adm1031
@@ -6,13 +6,13 @@ Supported chips:
6 Prefix: 'adm1030' 6 Prefix: 'adm1030'
7 Addresses scanned: I2C 0x2c to 0x2e 7 Addresses scanned: I2C 0x2c to 0x2e
8 Datasheet: Publicly available at the Analog Devices website 8 Datasheet: Publicly available at the Analog Devices website
9 http://products.analog.com/products/info.asp?product=ADM1030 9 http://www.analog.com/en/prod/0%2C2877%2CADM1030%2C00.html
10 10
11 * Analog Devices ADM1031 11 * Analog Devices ADM1031
12 Prefix: 'adm1031' 12 Prefix: 'adm1031'
13 Addresses scanned: I2C 0x2c to 0x2e 13 Addresses scanned: I2C 0x2c to 0x2e
14 Datasheet: Publicly available at the Analog Devices website 14 Datasheet: Publicly available at the Analog Devices website
15 http://products.analog.com/products/info.asp?product=ADM1031 15 http://www.analog.com/en/prod/0%2C2877%2CADM1031%2C00.html
16 16
17Authors: 17Authors:
18 Alexandre d'Alton <alex@alexdalton.org> 18 Alexandre d'Alton <alex@alexdalton.org>
diff --git a/Documentation/hwmon/thmc50 b/Documentation/hwmon/thmc50
new file mode 100644
index 000000000000..9639ca93d559
--- /dev/null
+++ b/Documentation/hwmon/thmc50
@@ -0,0 +1,74 @@
1Kernel driver thmc50
2=====================
3
4Supported chips:
5 * Analog Devices ADM1022
6 Prefix: 'adm1022'
7 Addresses scanned: I2C 0x2c - 0x2e
8 Datasheet: http://www.analog.com/en/prod/0,2877,ADM1022,00.html
9 * Texas Instruments THMC50
10 Prefix: 'thmc50'
11 Addresses scanned: I2C 0x2c - 0x2e
12 Datasheet: http://focus.ti.com/docs/prod/folders/print/thmc50.html
13
14Author: Krzysztof Helt <krzysztof.h1@wp.pl>
15
16This driver was derived from the 2.4 kernel thmc50.c source file.
17
18Credits:
19 thmc50.c (2.4 kernel):
20 Frodo Looijaard <frodol@dds.nl>
21 Philip Edelbrock <phil@netroedge.com>
22
23Module Parameters
24-----------------
25
26* adm1022_temp3: short array
27 List of adapter,address pairs to force chips into ADM1022 mode with
28 second remote temperature. This does not work for original THMC50 chips.
29
30Description
31-----------
32
33The THMC50 implements: an internal temperature sensor, support for an
34external diode-type temperature sensor (compatible w/ the diode sensor inside
35many processors), and a controllable fan/analog_out DAC. For the temperature
36sensors, limits can be set through the appropriate Overtemperature Shutdown
37register and Hysteresis register. Each value can be set and read to half-degree
38accuracy. An alarm is issued (usually to a connected LM78) when the
39temperature gets higher then the Overtemperature Shutdown value; it stays on
40until the temperature falls below the Hysteresis value. All temperatures are in
41degrees Celsius, and are guaranteed within a range of -55 to +125 degrees.
42
43The THMC50 only updates its values each 1.5 seconds; reading it more often
44will do no harm, but will return 'old' values.
45
46The THMC50 is usually used in combination with LM78-like chips, to measure
47the temperature of the processor(s).
48
49The ADM1022 works the same as THMC50 but it is faster (5 Hz instead of
501 Hz for THMC50). It can be also put in a new mode to handle additional
51remote temperature sensor. The driver use the mode set by BIOS by default.
52
53In case the BIOS is broken and the mode is set incorrectly, you can force
54the mode with additional remote temperature with adm1022_temp3 parameter.
55A typical symptom of wrong setting is a fan forced to full speed.
56
57Driver Features
58---------------
59
60The driver provides up to three temperatures:
61
62temp1 -- internal
63temp2 -- remote
64temp3 -- 2nd remote only for ADM1022
65
66pwm1 -- fan speed (0 = stop, 255 = full)
67pwm1_mode -- always 0 (DC mode)
68
69The value of 0 for pwm1 also forces FAN_OFF signal from the chip,
70so it stops fans even if the value 0 into the ANALOG_OUT register does not.
71
72The driver was tested on Compaq AP550 with two ADM1022 chips (one works
73in the temp3 mode), five temperature readings and two fans.
74
diff --git a/Documentation/i386/zero-page.txt b/Documentation/i386/zero-page.txt
index 75b3680c41eb..6c0817c45683 100644
--- a/Documentation/i386/zero-page.txt
+++ b/Documentation/i386/zero-page.txt
@@ -1,3 +1,13 @@
1---------------------------------------------------------------------------
2!!!!!!!!!!!!!!!WARNING!!!!!!!!
3The zero page is a kernel internal data structure, not a stable ABI. It might change
4without warning and the kernel has no way to detect old version of it.
5If you're writing some external code like a boot loader you should only use
6the stable versioned real mode boot protocol described in boot.txt. Otherwise the kernel
7might break you at any time.
8!!!!!!!!!!!!!WARNING!!!!!!!!!!!
9----------------------------------------------------------------------------
10
1Summary of boot_params layout (kernel point of view) 11Summary of boot_params layout (kernel point of view)
2 ( collected by Hans Lermen and Martin Mares ) 12 ( collected by Hans Lermen and Martin Mares )
3 13
diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO
index b2446a090870..9f08dab1e75b 100644
--- a/Documentation/ja_JP/HOWTO
+++ b/Documentation/ja_JP/HOWTO
@@ -1,23 +1,24 @@
1NOTE: 1NOTE:
2This is Japanese translated version of "Documentation/HOWTO". 2This is a version of Documentation/HOWTO translated into Japanese.
3This one is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com> 3This document is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com>
4and JF Project team <www.linux.or.jp/JF>. 4and the JF Project team <www.linux.or.jp/JF>.
5If you find difference with original file or problem in translation, 5If you find any difference between this document and the original file
6please contact maintainer of this file or JF project. 6or a problem with the translation,
7 7please contact the maintainer of this file or JF project.
8Please also note that purpose of this file is easier to read for non 8
9English natives and not to be intended to fork. So, if you have any 9Please also note that the purpose of this file is to be easier to read
10comments or updates of this file, please try to update Original(English) 10for non English (read: Japanese) speakers and is not intended as a
11file at first. 11fork. So if you have any comments or updates for this file, please try
12 12to update the original English file first.
13Last Updated: 2007/06/04 13
14Last Updated: 2007/07/18
14================================== 15==================================
15ã“ã‚Œã¯ã€ 16ã“ã‚Œã¯ã€
16linux-2.6.21/Documentation/HOWTO 17linux-2.6.22/Documentation/HOWTO
17ã®å’Œè¨³ã§ã™ã€‚ 18ã®å’Œè¨³ã§ã™ã€‚
18 19
19翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ > 20翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
20翻訳日: 2007/06/04 21翻訳日: 2007/07/16
21翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com> 22翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com>
22校正者: æ¾å€‰ã•ã‚“ <nbh--mats at nifty dot com> 23校正者: æ¾å€‰ã•ã‚“ <nbh--mats at nifty dot com>
23 å°æž— é›…å…¸ã•ã‚“ (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp> 24 å°æž— é›…å…¸ã•ã‚“ (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp>
@@ -52,6 +53,7 @@ Linux カーãƒãƒ«é–‹ç™ºã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ã¨å…±ã«æ´»å‹•ã™ã‚‹ã‚„り方を学ã
52ã¾ãŸã€ã“ã®ã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ãŒãªãœä»Šã†ã¾ãã¾ã‚ã£ã¦ã„ã‚‹ã®ã‹ã¨ã„ã†ç†ç”±ã®ä¸€éƒ¨ã‚‚ 53ã¾ãŸã€ã“ã®ã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ãŒãªãœä»Šã†ã¾ãã¾ã‚ã£ã¦ã„ã‚‹ã®ã‹ã¨ã„ã†ç†ç”±ã®ä¸€éƒ¨ã‚‚
53説明ã—よã†ã¨è©¦ã¿ã¦ã„ã¾ã™ã€‚ 54説明ã—よã†ã¨è©¦ã¿ã¦ã„ã¾ã™ã€‚
54 55
56
55カーãƒãƒ«ã¯ å°‘é‡ã®ã‚¢ãƒ¼ã‚­ãƒ†ã‚¯ãƒãƒ£ä¾å­˜éƒ¨åˆ†ãŒã‚¢ã‚»ãƒ³ãƒ–リ言語ã§æ›¸ã‹ã‚Œã¦ã„ã‚‹ 57カーãƒãƒ«ã¯ å°‘é‡ã®ã‚¢ãƒ¼ã‚­ãƒ†ã‚¯ãƒãƒ£ä¾å­˜éƒ¨åˆ†ãŒã‚¢ã‚»ãƒ³ãƒ–リ言語ã§æ›¸ã‹ã‚Œã¦ã„ã‚‹
56以外ã¯å¤§éƒ¨åˆ†ã¯ C 言語ã§æ›¸ã‹ã‚Œã¦ã„ã¾ã™ã€‚C言語をよãç†è§£ã—ã¦ã„ã‚‹ã“ã¨ã¯ã‚«ãƒ¼ 58以外ã¯å¤§éƒ¨åˆ†ã¯ C 言語ã§æ›¸ã‹ã‚Œã¦ã„ã¾ã™ã€‚C言語をよãç†è§£ã—ã¦ã„ã‚‹ã“ã¨ã¯ã‚«ãƒ¼
57ãƒãƒ«é–‹ç™ºè€…ã«ã¯å¿…è¦ã§ã™ã€‚アーキテクãƒãƒ£å‘ã‘ã®ä½Žãƒ¬ãƒ™ãƒ«éƒ¨åˆ†ã®é–‹ç™ºã‚’ã™ã‚‹ã® 59ãƒãƒ«é–‹ç™ºè€…ã«ã¯å¿…è¦ã§ã™ã€‚アーキテクãƒãƒ£å‘ã‘ã®ä½Žãƒ¬ãƒ™ãƒ«éƒ¨åˆ†ã®é–‹ç™ºã‚’ã™ã‚‹ã®
@@ -141,6 +143,7 @@ Linux カーãƒãƒ«ã‚½ãƒ¼ã‚¹ãƒ„リーã¯å¹…広ã„範囲ã®ãƒ‰ã‚­ãƒ¥ãƒ¡ãƒ³ãƒˆã‚’å
141 ã“れらã®ãƒ«ãƒ¼ãƒ«ã«å¾“ãˆã°ã†ã¾ãã„ãã“ã¨ã‚’ä¿è¨¼ã™ã‚‹ã“ã¨ã§ã¯ã‚ã‚Šã¾ã›ã‚“ 143 ã“れらã®ãƒ«ãƒ¼ãƒ«ã«å¾“ãˆã°ã†ã¾ãã„ãã“ã¨ã‚’ä¿è¨¼ã™ã‚‹ã“ã¨ã§ã¯ã‚ã‚Šã¾ã›ã‚“
142 ㌠(ã™ã¹ã¦ã®ãƒ‘ッãƒã¯å†…容ã¨ã‚¹ã‚¿ã‚¤ãƒ«ã«ã¤ã„ã¦ç²¾æŸ»ã‚’å—ã‘ã‚‹ã®ã§)〠144 ㌠(ã™ã¹ã¦ã®ãƒ‘ッãƒã¯å†…容ã¨ã‚¹ã‚¿ã‚¤ãƒ«ã«ã¤ã„ã¦ç²¾æŸ»ã‚’å—ã‘ã‚‹ã®ã§)ã€
143 ルールã«å¾“ã‚ãªã‘ã‚Œã°é–“é•ã„ãªãã†ã¾ãã„ã‹ãªã„ã§ã—ょã†ã€‚ 145 ルールã«å¾“ã‚ãªã‘ã‚Œã°é–“é•ã„ãªãã†ã¾ãã„ã‹ãªã„ã§ã—ょã†ã€‚
146
144 ã“ã®ä»–ã«ãƒ‘ッãƒã‚’作る方法ã«ã¤ã„ã¦ã®ã‚ˆãã§ããŸè¨˜è¿°ã¯- 147 ã“ã®ä»–ã«ãƒ‘ッãƒã‚’作る方法ã«ã¤ã„ã¦ã®ã‚ˆãã§ããŸè¨˜è¿°ã¯-
145 148
146 "The Perfect Patch" 149 "The Perfect Patch"
@@ -360,44 +363,42 @@ linux-kernel メーリングリストã§åŽé›†ã•ã‚ŒãŸå¤šæ•°ã®ãƒ‘ッãƒã¨åŒæ
360 363
361 git ツリー- 364 git ツリー-
362 - Kbuild ã®é–‹ç™ºãƒ„リーã€Sam Ravnborg <sam@ravnborg.org> 365 - Kbuild ã®é–‹ç™ºãƒ„リーã€Sam Ravnborg <sam@ravnborg.org>
363 kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git 366 git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
364 367
365 - ACPI ã®é–‹ç™ºãƒ„リー〠Len Brown <len.brown@intel.com> 368 - ACPI ã®é–‹ç™ºãƒ„リー〠Len Brown <len.brown@intel.com>
366 kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git 369 git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
367 370
368 - Block ã®é–‹ç™ºãƒ„リーã€Jens Axboe <axboe@suse.de> 371 - Block ã®é–‹ç™ºãƒ„リーã€Jens Axboe <axboe@suse.de>
369 kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git 372 git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
370 373
371 - DRM ã®é–‹ç™ºãƒ„リーã€Dave Airlie <airlied@linux.ie> 374 - DRM ã®é–‹ç™ºãƒ„リーã€Dave Airlie <airlied@linux.ie>
372 kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git 375 git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git
373 376
374 - ia64 ã®é–‹ç™ºãƒ„リーã€Tony Luck <tony.luck@intel.com> 377 - ia64 ã®é–‹ç™ºãƒ„リーã€Tony Luck <tony.luck@intel.com>
375 kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git 378 git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git
376
377 - ieee1394 ã®é–‹ç™ºãƒ„リーã€Jody McIntyre <scjody@modernduck.com>
378 kernel.org:/pub/scm/linux/kernel/git/scjody/ieee1394.git
379 379
380 - infiniband, Roland Dreier <rolandd@cisco.com> 380 - infiniband, Roland Dreier <rolandd@cisco.com>
381 kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git 381 git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
382 382
383 - libata, Jeff Garzik <jgarzik@pobox.com> 383 - libata, Jeff Garzik <jgarzik@pobox.com>
384 kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git 384 git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git
385 385
386 - ãƒãƒƒãƒˆãƒ¯ãƒ¼ã‚¯ãƒ‰ãƒ©ã‚¤ãƒ, Jeff Garzik <jgarzik@pobox.com> 386 - ãƒãƒƒãƒˆãƒ¯ãƒ¼ã‚¯ãƒ‰ãƒ©ã‚¤ãƒ, Jeff Garzik <jgarzik@pobox.com>
387 kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git 387 git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git
388 388
389 - pcmcia, Dominik Brodowski <linux@dominikbrodowski.net> 389 - pcmcia, Dominik Brodowski <linux@dominikbrodowski.net>
390 kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git 390 git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git
391 391
392 - SCSI, James Bottomley <James.Bottomley@SteelEye.com> 392 - SCSI, James Bottomley <James.Bottomley@SteelEye.com>
393 kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git 393 git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
394
395 ãã®ä»–ã® git カーãƒãƒ«ãƒ„リー㯠http://kernel.org/git ã«ä¸€è¦§è¡¨ãŒã‚ã‚Šã¾
396 ã™ã€‚
397 394
398 quilt ツリー- 395 quilt ツリー-
399 - USB, PCI ドライãƒã‚³ã‚¢ã¨ I2C, Greg Kroah-Hartman <gregkh@suse.de> 396 - USB, PCI ドライãƒã‚³ã‚¢ã¨ I2C, Greg Kroah-Hartman <gregkh@suse.de>
400 kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ 397 kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
398 - x86-64 㨠i386 ã®ä»²é–“ Andi Kleen <ak@suse.de>
399
400 ãã®ä»–ã®ã‚«ãƒ¼ãƒãƒ«ãƒ„リー㯠http://git.kernel.org/ 㨠MAINTAINERS ファ
401 イルã«ä¸€è¦§è¡¨ãŒã‚ã‚Šã¾ã™ã€‚
401 402
402ãƒã‚°ãƒ¬ãƒãƒ¼ãƒˆ 403ãƒã‚°ãƒ¬ãƒãƒ¼ãƒˆ
403------------- 404-------------
@@ -508,6 +509,7 @@ MAINTAINERS ファイルã«ãƒªã‚¹ãƒˆãŒã‚ã‚Šã¾ã™ã®ã§å‚ç…§ã—ã¦ãã ã•ã
508ã›ã‚“*。å˜ã«è‡ªåˆ†ã®ãƒ‘ッãƒã«å¯¾ã—ã¦æŒ‡æ‘˜ã•ã‚ŒãŸå•é¡Œã‚’å…¨ã¦ä¿®æ­£ã—ã¦å†é€ã™ã‚Œã° 509ã›ã‚“*。å˜ã«è‡ªåˆ†ã®ãƒ‘ッãƒã«å¯¾ã—ã¦æŒ‡æ‘˜ã•ã‚ŒãŸå•é¡Œã‚’å…¨ã¦ä¿®æ­£ã—ã¦å†é€ã™ã‚Œã°
509ã„ã„ã®ã§ã™ã€‚ 510ã„ã„ã®ã§ã™ã€‚
510 511
512
511カーãƒãƒ«ã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ã¨ä¼æ¥­çµ„ç¹”ã®ã¡ãŒã„ 513カーãƒãƒ«ã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ã¨ä¼æ¥­çµ„ç¹”ã®ã¡ãŒã„
512----------------------------------------------------------------- 514-----------------------------------------------------------------
513 515
@@ -577,6 +579,7 @@ Linux カーãƒãƒ«ã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ã¯ã€ä¸€åº¦ã«å¤§é‡ã®ã‚³ãƒ¼ãƒ‰ã®å¡Šã‚’å–
577 ã‹ã—ã€500è¡Œã®ãƒ‘ッãƒã¯ã€æ­£ã—ã„ã“ã¨ã‚’レビューã™ã‚‹ã®ã«æ•°æ™‚é–“ã‹ã‹ã‚‹ã‹ã‚‚ 579 ã‹ã—ã€500è¡Œã®ãƒ‘ッãƒã¯ã€æ­£ã—ã„ã“ã¨ã‚’レビューã™ã‚‹ã®ã«æ•°æ™‚é–“ã‹ã‹ã‚‹ã‹ã‚‚
578 ã—ã‚Œã¾ã›ã‚“(時間ã¯ãƒ‘ッãƒã®ã‚µã‚¤ã‚ºãªã©ã«ã‚ˆã‚ŠæŒ‡æ•°é–¢æ•°ã«æ¯”例ã—ã¦ã‹ã‹ã‚Šã¾ 580 ã—ã‚Œã¾ã›ã‚“(時間ã¯ãƒ‘ッãƒã®ã‚µã‚¤ã‚ºãªã©ã«ã‚ˆã‚ŠæŒ‡æ•°é–¢æ•°ã«æ¯”例ã—ã¦ã‹ã‹ã‚Šã¾
579 ã™) 581 ã™)
582
580 å°ã•ã„パッãƒã¯ä½•ã‹ã‚ã£ãŸã¨ãã«ãƒ‡ãƒãƒƒã‚°ã‚‚ã¨ã¦ã‚‚ç°¡å˜ã«ãªã‚Šã¾ã™ã€‚パッ 583 å°ã•ã„パッãƒã¯ä½•ã‹ã‚ã£ãŸã¨ãã«ãƒ‡ãƒãƒƒã‚°ã‚‚ã¨ã¦ã‚‚ç°¡å˜ã«ãªã‚Šã¾ã™ã€‚パッ
581 ãƒã‚’1個1個å–り除ãã®ã¯ã€ã¨ã¦ã‚‚大ããªãƒ‘ッãƒã‚’当ã¦ãŸå¾Œã«(ã‹ã¤ã€ä½•ã‹ãŠ 584 ãƒã‚’1個1個å–り除ãã®ã¯ã€ã¨ã¦ã‚‚大ããªãƒ‘ッãƒã‚’当ã¦ãŸå¾Œã«(ã‹ã¤ã€ä½•ã‹ãŠ
582 ã‹ã—ããªã£ãŸå¾Œã§)解剖ã™ã‚‹ã®ã«æ¯”ã¹ã‚Œã°ã¨ã¦ã‚‚ç°¡å˜ã§ã™ã€‚ 585 ã‹ã—ããªã£ãŸå¾Œã§)解剖ã™ã‚‹ã®ã«æ¯”ã¹ã‚Œã°ã¨ã¦ã‚‚ç°¡å˜ã§ã™ã€‚
@@ -591,6 +594,7 @@ Linux カーãƒãƒ«ã‚³ãƒŸãƒ¥ãƒ‹ãƒ†ã‚£ã¯ã€ä¸€åº¦ã«å¤§é‡ã®ã‚³ãƒ¼ãƒ‰ã®å¡Šã‚’å–
591 ã†ã€‚先生ã¯ç°¡æ½”ãªæœ€é«˜ã®è§£ã‚’ã¿ãŸã„ã®ã§ã™ã€‚良ã„生徒ã¯ã“れを知ã£ã¦ 594 ã†ã€‚先生ã¯ç°¡æ½”ãªæœ€é«˜ã®è§£ã‚’ã¿ãŸã„ã®ã§ã™ã€‚良ã„生徒ã¯ã“れを知ã£ã¦
592 ãŠã‚Šã€ãã—ã¦æœ€çµ‚解ã®å‰ã®ä¸­é–“作業をæ出ã™ã‚‹ã“ã¨ã¯æ±ºã—ã¦ãªã„ã®ã§ 595 ãŠã‚Šã€ãã—ã¦æœ€çµ‚解ã®å‰ã®ä¸­é–“作業をæ出ã™ã‚‹ã“ã¨ã¯æ±ºã—ã¦ãªã„ã®ã§
593 ã™" 596 ã™"
597
594 カーãƒãƒ«é–‹ç™ºã§ã‚‚ã“ã‚Œã¯åŒã˜ã§ã™ã€‚メンテナーé”ã¨ãƒ¬ãƒ“ューアé”ã¯ã€ 598 カーãƒãƒ«é–‹ç™ºã§ã‚‚ã“ã‚Œã¯åŒã˜ã§ã™ã€‚メンテナーé”ã¨ãƒ¬ãƒ“ューアé”ã¯ã€
595 å•é¡Œã‚’解決ã™ã‚‹è§£ã®èƒŒå¾Œã«ãªã‚‹æ€è€ƒãƒ—ロセスをã¿ãŸã„ã¨ã¯æ€ã„ã¾ã›ã‚“。 599 å•é¡Œã‚’解決ã™ã‚‹è§£ã®èƒŒå¾Œã«ãªã‚‹æ€è€ƒãƒ—ロセスをã¿ãŸã„ã¨ã¯æ€ã„ã¾ã›ã‚“。
596 彼らã¯å˜ç´”ã§ã‚ã–ã‚„ã‹ãªè§£æ±ºæ–¹æ³•ã‚’ã¿ãŸã„ã®ã§ã™ã€‚ 600 彼らã¯å˜ç´”ã§ã‚ã–ã‚„ã‹ãªè§£æ±ºæ–¹æ³•ã‚’ã¿ãŸã„ã®ã§ã™ã€‚
diff --git a/Documentation/ja_JP/stable_api_nonsense.txt b/Documentation/ja_JP/stable_api_nonsense.txt
index b3f2b27f0881..7653b5cbfed2 100644
--- a/Documentation/ja_JP/stable_api_nonsense.txt
+++ b/Documentation/ja_JP/stable_api_nonsense.txt
@@ -1,17 +1,17 @@
1NOTE: 1NOTE:
2This is a Japanese translated version of 2This is a version of Documentation/stable_api_nonsense.txt into Japanese.
3"Documentation/stable_api_nonsense.txt". 3This document is maintained by IKEDA, Munehiro <m-ikeda@ds.jp.nec.com>
4This one is maintained by 4and the JF Project team <http://www.linux.or.jp/JF/>.
5IKEDA, Munehiro <m-ikeda@ds.jp.nec.com> 5If you find any difference between this document and the original file
6and JF Project team <http://www.linux.or.jp/JF/>. 6or a problem with the translation,
7If you find difference with original file or problem in translation,
8please contact the maintainer of this file or JF project. 7please contact the maintainer of this file or JF project.
9 8
10Please also note that purpose of this file is easier to read for non 9Please also note that the purpose of this file is to be easier to read
11English natives and not to be intended to fork. So, if you have any 10for non English (read: Japanese) speakers and is not intended as a
12comments or updates of this file, please try to update 11fork. So if you have any comments or updates of this file, please try
13Original(English) file at first. 12to update the original English file first.
14 13
14Last Updated: 2007/07/18
15================================== 15==================================
16ã“ã‚Œã¯ã€ 16ã“ã‚Œã¯ã€
17linux-2.6.22-rc4/Documentation/stable_api_nonsense.txt ã®å’Œè¨³ 17linux-2.6.22-rc4/Documentation/stable_api_nonsense.txt ã®å’Œè¨³
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index 536d5bfbdb8d..fe8b0c4892cf 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -98,6 +98,15 @@ applicable everywhere (see syntax).
98 times, the limit is set to the largest selection. 98 times, the limit is set to the largest selection.
99 Reverse dependencies can only be used with boolean or tristate 99 Reverse dependencies can only be used with boolean or tristate
100 symbols. 100 symbols.
101 Note:
102 select is evil.... select will by brute force set a symbol
103 equal to 'y' without visiting the dependencies. So abusing
104 select you are able to select a symbol FOO even if FOO depends
105 on BAR that is not set. In general use select only for
106 non-visible symbols (no promts anywhere) and for symbols with
107 no dependencies. That will limit the usefulness but on the
108 other hand avoid the illegal configurations all over. kconfig
109 should one day warn about such things.
101 110
102- numerical ranges: "range" <symbol> <symbol> ["if" <expr>] 111- numerical ranges: "range" <symbol> <symbol> ["if" <expr>]
103 This allows to limit the range of possible input values for int 112 This allows to limit the range of possible input values for int
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 379931e74334..09c0ec100f61 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -30,6 +30,7 @@ the beginning of each description states the restrictions within which a
30parameter is applicable: 30parameter is applicable:
31 31
32 ACPI ACPI support is enabled. 32 ACPI ACPI support is enabled.
33 AGP AGP (Accelerated Graphics Port) is enabled.
33 ALSA ALSA sound support is enabled. 34 ALSA ALSA sound support is enabled.
34 APIC APIC support is enabled. 35 APIC APIC support is enabled.
35 APM Advanced Power Management support is enabled. 36 APM Advanced Power Management support is enabled.
@@ -40,7 +41,6 @@ parameter is applicable:
40 EIDE EIDE/ATAPI support is enabled. 41 EIDE EIDE/ATAPI support is enabled.
41 FB The frame buffer device is enabled. 42 FB The frame buffer device is enabled.
42 HW Appropriate hardware is enabled. 43 HW Appropriate hardware is enabled.
43 IA-32 IA-32 aka i386 architecture is enabled.
44 IA-64 IA-64 architecture is enabled. 44 IA-64 IA-64 architecture is enabled.
45 IOSCHED More than one I/O scheduler is enabled. 45 IOSCHED More than one I/O scheduler is enabled.
46 IP_PNP IP DHCP, BOOTP, or RARP is enabled. 46 IP_PNP IP DHCP, BOOTP, or RARP is enabled.
@@ -57,14 +57,14 @@ parameter is applicable:
57 MDA MDA console support is enabled. 57 MDA MDA console support is enabled.
58 MOUSE Appropriate mouse support is enabled. 58 MOUSE Appropriate mouse support is enabled.
59 MSI Message Signaled Interrupts (PCI). 59 MSI Message Signaled Interrupts (PCI).
60 MTD MTD support is enabled. 60 MTD MTD (Memory Technology Device) support is enabled.
61 NET Appropriate network support is enabled. 61 NET Appropriate network support is enabled.
62 NUMA NUMA support is enabled. 62 NUMA NUMA support is enabled.
63 GENERIC_TIME The generic timeofday code is enabled. 63 GENERIC_TIME The generic timeofday code is enabled.
64 NFS Appropriate NFS support is enabled. 64 NFS Appropriate NFS support is enabled.
65 OSS OSS sound support is enabled. 65 OSS OSS sound support is enabled.
66 PV_OPS A paravirtualized kernel 66 PV_OPS A paravirtualized kernel is enabled.
67 PARIDE The ParIDE subsystem is enabled. 67 PARIDE The ParIDE (parallel port IDE) subsystem is enabled.
68 PARISC The PA-RISC architecture is enabled. 68 PARISC The PA-RISC architecture is enabled.
69 PCI PCI bus support is enabled. 69 PCI PCI bus support is enabled.
70 PCMCIA The PCMCIA subsystem is enabled. 70 PCMCIA The PCMCIA subsystem is enabled.
@@ -91,6 +91,7 @@ parameter is applicable:
91 VT Virtual terminal support is enabled. 91 VT Virtual terminal support is enabled.
92 WDT Watchdog support is enabled. 92 WDT Watchdog support is enabled.
93 XT IBM PC/XT MFM hard disk support is enabled. 93 XT IBM PC/XT MFM hard disk support is enabled.
94 X86-32 X86-32, aka i386 architecture is enabled.
94 X86-64 X86-64 architecture is enabled. 95 X86-64 X86-64 architecture is enabled.
95 More X86-64 boot options can be found in 96 More X86-64 boot options can be found in
96 Documentation/x86_64/boot-options.txt . 97 Documentation/x86_64/boot-options.txt .
@@ -122,10 +123,6 @@ and is between 256 and 4096 characters. It is defined in the file
122./include/asm/setup.h as COMMAND_LINE_SIZE. 123./include/asm/setup.h as COMMAND_LINE_SIZE.
123 124
124 125
125 53c7xx= [HW,SCSI] Amiga SCSI controllers
126 See header of drivers/scsi/53c7xx.c.
127 See also Documentation/scsi/ncr53c7xx.txt.
128
129 acpi= [HW,ACPI,X86-64,i386] 126 acpi= [HW,ACPI,X86-64,i386]
130 Advanced Configuration and Power Interface 127 Advanced Configuration and Power Interface
131 Format: { force | off | ht | strict | noirq } 128 Format: { force | off | ht | strict | noirq }
@@ -224,11 +221,17 @@ and is between 256 and 4096 characters. It is defined in the file
224 221
225 acpi_fake_ecdt [HW,ACPI] Workaround failure due to BIOS lacking ECDT 222 acpi_fake_ecdt [HW,ACPI] Workaround failure due to BIOS lacking ECDT
226 223
227 acpi_pm_good [IA-32,X86-64] 224 acpi_pm_good [X86-32,X86-64]
228 Override the pmtimer bug detection: force the kernel 225 Override the pmtimer bug detection: force the kernel
229 to assume that this machine's pmtimer latches its value 226 to assume that this machine's pmtimer latches its value
230 and always returns good values. 227 and always returns good values.
231 228
229 agp= [AGP]
230 { off | try_unsupported }
231 off: disable AGP support
232 try_unsupported: try to drive unsupported chipsets
233 (may crash computer or cause data corruption)
234
232 enable_timer_pin_1 [i386,x86-64] 235 enable_timer_pin_1 [i386,x86-64]
233 Enable PIN 1 of APIC timer 236 Enable PIN 1 of APIC timer
234 Can be useful to work around chipset bugs 237 Can be useful to work around chipset bugs
@@ -281,7 +284,8 @@ and is between 256 and 4096 characters. It is defined in the file
281 not play well with APC CPU idle - disable it if you have 284 not play well with APC CPU idle - disable it if you have
282 APC and your system crashes randomly. 285 APC and your system crashes randomly.
283 286
284 apic= [APIC,i386] Change the output verbosity whilst booting 287 apic= [APIC,i386] Advanced Programmable Interrupt Controller
288 Change the output verbosity whilst booting
285 Format: { quiet (default) | verbose | debug } 289 Format: { quiet (default) | verbose | debug }
286 Change the amount of debugging information output 290 Change the amount of debugging information output
287 when initialising the APIC and IO-APIC components. 291 when initialising the APIC and IO-APIC components.
@@ -355,7 +359,7 @@ and is between 256 and 4096 characters. It is defined in the file
355 359
356 c101= [NET] Moxa C101 synchronous serial card 360 c101= [NET] Moxa C101 synchronous serial card
357 361
358 cachesize= [BUGS=IA-32] Override level 2 CPU cache size detection. 362 cachesize= [BUGS=X86-32] Override level 2 CPU cache size detection.
359 Sometimes CPU hardware bugs make them report the cache 363 Sometimes CPU hardware bugs make them report the cache
360 size incorrectly. The kernel will attempt work arounds 364 size incorrectly. The kernel will attempt work arounds
361 to fix known problems, but for some CPUs it is not 365 to fix known problems, but for some CPUs it is not
@@ -374,7 +378,7 @@ and is between 256 and 4096 characters. It is defined in the file
374 Value can be changed at runtime via 378 Value can be changed at runtime via
375 /selinux/checkreqprot. 379 /selinux/checkreqprot.
376 380
377 clock= [BUGS=IA-32, HW] gettimeofday clocksource override. 381 clock= [BUGS=X86-32, HW] gettimeofday clocksource override.
378 [Deprecated] 382 [Deprecated]
379 Forces specified clocksource (if available) to be used 383 Forces specified clocksource (if available) to be used
380 when calculating gettimeofday(). If specified 384 when calculating gettimeofday(). If specified
@@ -392,7 +396,7 @@ and is between 256 and 4096 characters. It is defined in the file
392 [ARM] imx_timer1,OSTS,netx_timer,mpu_timer2, 396 [ARM] imx_timer1,OSTS,netx_timer,mpu_timer2,
393 pxa_timer,timer3,32k_counter,timer0_1 397 pxa_timer,timer3,32k_counter,timer0_1
394 [AVR32] avr32 398 [AVR32] avr32
395 [IA-32] pit,hpet,tsc,vmi-timer; 399 [X86-32] pit,hpet,tsc,vmi-timer;
396 scx200_hrt on Geode; cyclone on IBM x440 400 scx200_hrt on Geode; cyclone on IBM x440
397 [MIPS] MIPS 401 [MIPS] MIPS
398 [PARISC] cr16 402 [PARISC] cr16
@@ -412,7 +416,7 @@ and is between 256 and 4096 characters. It is defined in the file
412 over the 8254 in addition to over the IO-APIC. The 416 over the 8254 in addition to over the IO-APIC. The
413 kernel tries to set a sensible default. 417 kernel tries to set a sensible default.
414 418
415 hpet= [IA-32,HPET] option to disable HPET and use PIT. 419 hpet= [X86-32,HPET] option to disable HPET and use PIT.
416 Format: disable 420 Format: disable
417 421
418 com20020= [HW,NET] ARCnet - COM20020 chipset 422 com20020= [HW,NET] ARCnet - COM20020 chipset
@@ -549,7 +553,7 @@ and is between 256 and 4096 characters. It is defined in the file
549 553
550 dtc3181e= [HW,SCSI] 554 dtc3181e= [HW,SCSI]
551 555
552 earlyprintk= [IA-32,X86-64,SH] 556 earlyprintk= [X86-32,X86-64,SH]
553 earlyprintk=vga 557 earlyprintk=vga
554 earlyprintk=serial[,ttySn[,baudrate]] 558 earlyprintk=serial[,ttySn[,baudrate]]
555 559
@@ -587,7 +591,7 @@ and is between 256 and 4096 characters. It is defined in the file
587 eisa_irq_edge= [PARISC,HW] 591 eisa_irq_edge= [PARISC,HW]
588 See header of drivers/parisc/eisa.c. 592 See header of drivers/parisc/eisa.c.
589 593
590 elanfreq= [IA-32] 594 elanfreq= [X86-32]
591 See comment before function elanfreq_setup() in 595 See comment before function elanfreq_setup() in
592 arch/i386/kernel/cpu/cpufreq/elanfreq.c. 596 arch/i386/kernel/cpu/cpufreq/elanfreq.c.
593 597
@@ -596,7 +600,7 @@ and is between 256 and 4096 characters. It is defined in the file
596 See Documentation/block/as-iosched.txt and 600 See Documentation/block/as-iosched.txt and
597 Documentation/block/deadline-iosched.txt for details. 601 Documentation/block/deadline-iosched.txt for details.
598 602
599 elfcorehdr= [IA-32, X86_64] 603 elfcorehdr= [X86-32, X86_64]
600 Specifies physical address of start of kernel core 604 Specifies physical address of start of kernel core
601 image elf header. Generally kexec loader will 605 image elf header. Generally kexec loader will
602 pass this option to capture kernel. 606 pass this option to capture kernel.
@@ -678,7 +682,7 @@ and is between 256 and 4096 characters. It is defined in the file
678 hisax= [HW,ISDN] 682 hisax= [HW,ISDN]
679 See Documentation/isdn/README.HiSax. 683 See Documentation/isdn/README.HiSax.
680 684
681 hugepages= [HW,IA-32,IA-64] Maximal number of HugeTLB pages. 685 hugepages= [HW,X86-32,IA-64] Maximal number of HugeTLB pages.
682 686
683 i8042.direct [HW] Put keyboard port into non-translated mode 687 i8042.direct [HW] Put keyboard port into non-translated mode
684 i8042.dumbkbd [HW] Pretend that controller can only read data from 688 i8042.dumbkbd [HW] Pretend that controller can only read data from
@@ -770,7 +774,8 @@ and is between 256 and 4096 characters. It is defined in the file
770 See Documentation/nfsroot.txt. 774 See Documentation/nfsroot.txt.
771 775
772 ip2= [HW] Set IO/IRQ pairs for up to 4 IntelliPort boards 776 ip2= [HW] Set IO/IRQ pairs for up to 4 IntelliPort boards
773 See comment before ip2_setup() in drivers/char/ip2.c. 777 See comment before ip2_setup() in
778 drivers/char/ip2/ip2base.c.
774 779
775 ips= [HW,SCSI] Adaptec / IBM ServeRAID controller 780 ips= [HW,SCSI] Adaptec / IBM ServeRAID controller
776 See header of drivers/scsi/ips.c. 781 See header of drivers/scsi/ips.c.
@@ -819,7 +824,7 @@ and is between 256 and 4096 characters. It is defined in the file
819 js= [HW,JOY] Analog joystick 824 js= [HW,JOY] Analog joystick
820 See Documentation/input/joystick.txt. 825 See Documentation/input/joystick.txt.
821 826
822 kernelcore=nn[KMG] [KNL,IA-32,IA-64,PPC,X86-64] This parameter 827 kernelcore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
823 specifies the amount of memory usable by the kernel 828 specifies the amount of memory usable by the kernel
824 for non-movable allocations. The requested amount is 829 for non-movable allocations. The requested amount is
825 spread evenly throughout all nodes in the system. The 830 spread evenly throughout all nodes in the system. The
@@ -835,7 +840,7 @@ and is between 256 and 4096 characters. It is defined in the file
835 use the HighMem zone if it exists, and the Normal 840 use the HighMem zone if it exists, and the Normal
836 zone if it does not. 841 zone if it does not.
837 842
838 movablecore=nn[KMG] [KNL,IA-32,IA-64,PPC,X86-64] This parameter 843 movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
839 is similar to kernelcore except it specifies the 844 is similar to kernelcore except it specifies the
840 amount of memory used for migratable allocations. 845 amount of memory used for migratable allocations.
841 If both kernelcore and movablecore is specified, 846 If both kernelcore and movablecore is specified,
@@ -847,28 +852,20 @@ and is between 256 and 4096 characters. It is defined in the file
847 852
848 keepinitrd [HW,ARM] 853 keepinitrd [HW,ARM]
849 854
850 kstack=N [IA-32,X86-64] Print N words from the kernel stack 855 kstack=N [X86-32,X86-64] Print N words from the kernel stack
851 in oops dumps. 856 in oops dumps.
852 857
853 l2cr= [PPC] 858 l2cr= [PPC]
854 859
855 lapic [IA-32,APIC] Enable the local APIC even if BIOS 860 lapic [X86-32,APIC] Enable the local APIC even if BIOS
856 disabled it. 861 disabled it.
857 862
858 lapic_timer_c2_ok [IA-32,x86-64,APIC] trust the local apic timer in 863 lapic_timer_c2_ok [X86-32,x86-64,APIC] trust the local apic timer in
859 C2 power state. 864 C2 power state.
860 865
861 lasi= [HW,SCSI] PARISC LASI driver for the 53c700 chip 866 lasi= [HW,SCSI] PARISC LASI driver for the 53c700 chip
862 Format: addr:<io>,irq:<irq> 867 Format: addr:<io>,irq:<irq>
863 868
864 legacy_serial.force [HW,IA-32,X86-64]
865 Probe for COM ports at legacy addresses even
866 if PNPBIOS or ACPI should describe them. This
867 is for working around firmware defects.
868
869 llsc*= [IA64] See function print_params() in
870 arch/ia64/sn/kernel/llsc4.c.
871
872 load_ramdisk= [RAM] List of ramdisks to load from floppy 869 load_ramdisk= [RAM] List of ramdisks to load from floppy
873 See Documentation/ramdisk.txt. 870 See Documentation/ramdisk.txt.
874 871
@@ -974,11 +971,11 @@ and is between 256 and 4096 characters. It is defined in the file
974 [SCSI] Maximum number of LUNs received. 971 [SCSI] Maximum number of LUNs received.
975 Should be between 1 and 16384. 972 Should be between 1 and 16384.
976 973
977 mca-pentium [BUGS=IA-32] 974 mca-pentium [BUGS=X86-32]
978 975
979 mcatest= [IA-64] 976 mcatest= [IA-64]
980 977
981 mce [IA-32] Machine Check Exception 978 mce [X86-32] Machine Check Exception
982 979
983 md= [HW] RAID subsystems devices and level 980 md= [HW] RAID subsystems devices and level
984 See Documentation/md.txt. 981 See Documentation/md.txt.
@@ -990,14 +987,14 @@ and is between 256 and 4096 characters. It is defined in the file
990 mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory 987 mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
991 Amount of memory to be used when the kernel is not able 988 Amount of memory to be used when the kernel is not able
992 to see the whole system memory or for test. 989 to see the whole system memory or for test.
993 [IA-32] Use together with memmap= to avoid physical 990 [X86-32] Use together with memmap= to avoid physical
994 address space collisions. Without memmap= PCI devices 991 address space collisions. Without memmap= PCI devices
995 could be placed at addresses belonging to unused RAM. 992 could be placed at addresses belonging to unused RAM.
996 993
997 mem=nopentium [BUGS=IA-32] Disable usage of 4MB pages for kernel 994 mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
998 memory. 995 memory.
999 996
1000 memmap=exactmap [KNL,IA-32,X86_64] Enable setting of an exact 997 memmap=exactmap [KNL,X86-32,X86_64] Enable setting of an exact
1001 E820 memory map, as specified by the user. 998 E820 memory map, as specified by the user.
1002 Such memmap=exactmap lines can be constructed based on 999 Such memmap=exactmap lines can be constructed based on
1003 BIOS output or other requirements. See the memmap=nn@ss 1000 BIOS output or other requirements. See the memmap=nn@ss
@@ -1041,7 +1038,7 @@ and is between 256 and 4096 characters. It is defined in the file
1041 <name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>] 1038 <name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>]
1042 1039
1043 mtdparts= [MTD] 1040 mtdparts= [MTD]
1044 See drivers/mtd/cmdline.c. 1041 See drivers/mtd/cmdlinepart.c.
1045 1042
1046 mtouchusb.raw_coordinates= 1043 mtouchusb.raw_coordinates=
1047 [HW] Make the MicroTouch USB driver use raw coordinates 1044 [HW] Make the MicroTouch USB driver use raw coordinates
@@ -1083,9 +1080,9 @@ and is between 256 and 4096 characters. It is defined in the file
1083 [NFS] set the maximum lifetime for idmapper cache 1080 [NFS] set the maximum lifetime for idmapper cache
1084 entries. 1081 entries.
1085 1082
1086 nmi_watchdog= [KNL,BUGS=IA-32] Debugging features for SMP kernels 1083 nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels
1087 1084
1088 no387 [BUGS=IA-32] Tells the kernel to use the 387 maths 1085 no387 [BUGS=X86-32] Tells the kernel to use the 387 maths
1089 emulation library even if a 387 maths coprocessor 1086 emulation library even if a 387 maths coprocessor
1090 is present. 1087 is present.
1091 1088
@@ -1116,17 +1113,17 @@ and is between 256 and 4096 characters. It is defined in the file
1116 1113
1117 noexec [IA-64] 1114 noexec [IA-64]
1118 1115
1119 noexec [IA-32,X86-64] 1116 noexec [X86-32,X86-64]
1120 noexec=on: enable non-executable mappings (default) 1117 noexec=on: enable non-executable mappings (default)
1121 noexec=off: disable nn-executable mappings 1118 noexec=off: disable nn-executable mappings
1122 1119
1123 nofxsr [BUGS=IA-32] Disables x86 floating point extended 1120 nofxsr [BUGS=X86-32] Disables x86 floating point extended
1124 register save and restore. The kernel will only save 1121 register save and restore. The kernel will only save
1125 legacy floating-point registers on task switch. 1122 legacy floating-point registers on task switch.
1126 1123
1127 nohlt [BUGS=ARM] 1124 nohlt [BUGS=ARM]
1128 1125
1129 no-hlt [BUGS=IA-32] Tells the kernel that the hlt 1126 no-hlt [BUGS=X86-32] Tells the kernel that the hlt
1130 instruction doesn't work correctly and not to 1127 instruction doesn't work correctly and not to
1131 use it. 1128 use it.
1132 1129
@@ -1141,12 +1138,12 @@ and is between 256 and 4096 characters. It is defined in the file
1141 Valid arguments: on, off 1138 Valid arguments: on, off
1142 Default: on 1139 Default: on
1143 1140
1144 noirqbalance [IA-32,SMP,KNL] Disable kernel irq balancing 1141 noirqbalance [X86-32,SMP,KNL] Disable kernel irq balancing
1145 1142
1146 noirqdebug [IA-32] Disables the code which attempts to detect and 1143 noirqdebug [X86-32] Disables the code which attempts to detect and
1147 disable unhandled interrupt sources. 1144 disable unhandled interrupt sources.
1148 1145
1149 no_timer_check [IA-32,X86_64,APIC] Disables the code which tests for 1146 no_timer_check [X86-32,X86_64,APIC] Disables the code which tests for
1150 broken timer IRQ sources. 1147 broken timer IRQ sources.
1151 1148
1152 noisapnp [ISAPNP] Disables ISA PnP code. 1149 noisapnp [ISAPNP] Disables ISA PnP code.
@@ -1158,20 +1155,20 @@ and is between 256 and 4096 characters. It is defined in the file
1158 1155
1159 nojitter [IA64] Disables jitter checking for ITC timers. 1156 nojitter [IA64] Disables jitter checking for ITC timers.
1160 1157
1161 nolapic [IA-32,APIC] Do not enable or use the local APIC. 1158 nolapic [X86-32,APIC] Do not enable or use the local APIC.
1162 1159
1163 nolapic_timer [IA-32,APIC] Do not use the local APIC timer. 1160 nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
1164 1161
1165 noltlbs [PPC] Do not use large page/tlb entries for kernel 1162 noltlbs [PPC] Do not use large page/tlb entries for kernel
1166 lowmem mapping on PPC40x. 1163 lowmem mapping on PPC40x.
1167 1164
1168 nomca [IA-64] Disable machine check abort handling 1165 nomca [IA-64] Disable machine check abort handling
1169 1166
1170 nomce [IA-32] Machine Check Exception 1167 nomce [X86-32] Machine Check Exception
1171 1168
1172 noreplace-paravirt [IA-32,PV_OPS] Don't patch paravirt_ops 1169 noreplace-paravirt [X86-32,PV_OPS] Don't patch paravirt_ops
1173 1170
1174 noreplace-smp [IA-32,SMP] Don't replace SMP instructions 1171 noreplace-smp [X86-32,SMP] Don't replace SMP instructions
1175 with UP alternatives 1172 with UP alternatives
1176 1173
1177 noresidual [PPC] Don't use residual data on PReP machines. 1174 noresidual [PPC] Don't use residual data on PReP machines.
@@ -1185,7 +1182,7 @@ and is between 256 and 4096 characters. It is defined in the file
1185 1182
1186 nosbagart [IA-64] 1183 nosbagart [IA-64]
1187 1184
1188 nosep [BUGS=IA-32] Disables x86 SYSENTER/SYSEXIT support. 1185 nosep [BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support.
1189 1186
1190 nosmp [SMP] Tells an SMP kernel to act as a UP kernel. 1187 nosmp [SMP] Tells an SMP kernel to act as a UP kernel.
1191 1188
@@ -1193,7 +1190,7 @@ and is between 256 and 4096 characters. It is defined in the file
1193 1190
1194 nosync [HW,M68K] Disables sync negotiation for all devices. 1191 nosync [HW,M68K] Disables sync negotiation for all devices.
1195 1192
1196 notsc [BUGS=IA-32] Disable Time Stamp Counter 1193 notsc [BUGS=X86-32] Disable Time Stamp Counter
1197 1194
1198 nousb [USB] Disable the USB subsystem 1195 nousb [USB] Disable the USB subsystem
1199 1196
@@ -1266,28 +1263,28 @@ and is between 256 and 4096 characters. It is defined in the file
1266 See also Documentation/paride.txt. 1263 See also Documentation/paride.txt.
1267 1264
1268 pci=option[,option...] [PCI] various PCI subsystem options: 1265 pci=option[,option...] [PCI] various PCI subsystem options:
1269 off [IA-32] don't probe for the PCI bus 1266 off [X86-32] don't probe for the PCI bus
1270 bios [IA-32] force use of PCI BIOS, don't access 1267 bios [X86-32] force use of PCI BIOS, don't access
1271 the hardware directly. Use this if your machine 1268 the hardware directly. Use this if your machine
1272 has a non-standard PCI host bridge. 1269 has a non-standard PCI host bridge.
1273 nobios [IA-32] disallow use of PCI BIOS, only direct 1270 nobios [X86-32] disallow use of PCI BIOS, only direct
1274 hardware access methods are allowed. Use this 1271 hardware access methods are allowed. Use this
1275 if you experience crashes upon bootup and you 1272 if you experience crashes upon bootup and you
1276 suspect they are caused by the BIOS. 1273 suspect they are caused by the BIOS.
1277 conf1 [IA-32] Force use of PCI Configuration 1274 conf1 [X86-32] Force use of PCI Configuration
1278 Mechanism 1. 1275 Mechanism 1.
1279 conf2 [IA-32] Force use of PCI Configuration 1276 conf2 [X86-32] Force use of PCI Configuration
1280 Mechanism 2. 1277 Mechanism 2.
1281 nommconf [IA-32,X86_64] Disable use of MMCONFIG for PCI 1278 nommconf [X86-32,X86_64] Disable use of MMCONFIG for PCI
1282 Configuration 1279 Configuration
1283 nomsi [MSI] If the PCI_MSI kernel config parameter is 1280 nomsi [MSI] If the PCI_MSI kernel config parameter is
1284 enabled, this kernel boot option can be used to 1281 enabled, this kernel boot option can be used to
1285 disable the use of MSI interrupts system-wide. 1282 disable the use of MSI interrupts system-wide.
1286 nosort [IA-32] Don't sort PCI devices according to 1283 nosort [X86-32] Don't sort PCI devices according to
1287 order given by the PCI BIOS. This sorting is 1284 order given by the PCI BIOS. This sorting is
1288 done to get a device order compatible with 1285 done to get a device order compatible with
1289 older kernels. 1286 older kernels.
1290 biosirq [IA-32] Use PCI BIOS calls to get the interrupt 1287 biosirq [X86-32] Use PCI BIOS calls to get the interrupt
1291 routing table. These calls are known to be buggy 1288 routing table. These calls are known to be buggy
1292 on several machines and they hang the machine 1289 on several machines and they hang the machine
1293 when used, but on other computers it's the only 1290 when used, but on other computers it's the only
@@ -1295,32 +1292,32 @@ and is between 256 and 4096 characters. It is defined in the file
1295 this option if the kernel is unable to allocate 1292 this option if the kernel is unable to allocate
1296 IRQs or discover secondary PCI buses on your 1293 IRQs or discover secondary PCI buses on your
1297 motherboard. 1294 motherboard.
1298 rom [IA-32] Assign address space to expansion ROMs. 1295 rom [X86-32] Assign address space to expansion ROMs.
1299 Use with caution as certain devices share 1296 Use with caution as certain devices share
1300 address decoders between ROMs and other 1297 address decoders between ROMs and other
1301 resources. 1298 resources.
1302 irqmask=0xMMMM [IA-32] Set a bit mask of IRQs allowed to be 1299 irqmask=0xMMMM [X86-32] Set a bit mask of IRQs allowed to be
1303 assigned automatically to PCI devices. You can 1300 assigned automatically to PCI devices. You can
1304 make the kernel exclude IRQs of your ISA cards 1301 make the kernel exclude IRQs of your ISA cards
1305 this way. 1302 this way.
1306 pirqaddr=0xAAAAA [IA-32] Specify the physical address 1303 pirqaddr=0xAAAAA [X86-32] Specify the physical address
1307 of the PIRQ table (normally generated 1304 of the PIRQ table (normally generated
1308 by the BIOS) if it is outside the 1305 by the BIOS) if it is outside the
1309 F0000h-100000h range. 1306 F0000h-100000h range.
1310 lastbus=N [IA-32] Scan all buses thru bus #N. Can be 1307 lastbus=N [X86-32] Scan all buses thru bus #N. Can be
1311 useful if the kernel is unable to find your 1308 useful if the kernel is unable to find your
1312 secondary buses and you want to tell it 1309 secondary buses and you want to tell it
1313 explicitly which ones they are. 1310 explicitly which ones they are.
1314 assign-busses [IA-32] Always assign all PCI bus 1311 assign-busses [X86-32] Always assign all PCI bus
1315 numbers ourselves, overriding 1312 numbers ourselves, overriding
1316 whatever the firmware may have done. 1313 whatever the firmware may have done.
1317 usepirqmask [IA-32] Honor the possible IRQ mask stored 1314 usepirqmask [X86-32] Honor the possible IRQ mask stored
1318 in the BIOS $PIR table. This is needed on 1315 in the BIOS $PIR table. This is needed on
1319 some systems with broken BIOSes, notably 1316 some systems with broken BIOSes, notably
1320 some HP Pavilion N5400 and Omnibook XE3 1317 some HP Pavilion N5400 and Omnibook XE3
1321 notebooks. This will have no effect if ACPI 1318 notebooks. This will have no effect if ACPI
1322 IRQ routing is enabled. 1319 IRQ routing is enabled.
1323 noacpi [IA-32] Do not use ACPI for IRQ routing 1320 noacpi [X86-32] Do not use ACPI for IRQ routing
1324 or for PCI scanning. 1321 or for PCI scanning.
1325 routeirq Do IRQ routing for all PCI devices. 1322 routeirq Do IRQ routing for all PCI devices.
1326 This is normally done in pci_enable_device(), 1323 This is normally done in pci_enable_device(),
@@ -1469,13 +1466,13 @@ and is between 256 and 4096 characters. It is defined in the file
1469 Run specified binary instead of /init from the ramdisk, 1466 Run specified binary instead of /init from the ramdisk,
1470 used for early userspace startup. See initrd. 1467 used for early userspace startup. See initrd.
1471 1468
1472 reboot= [BUGS=IA-32,BUGS=ARM,BUGS=IA-64] Rebooting mode 1469 reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode
1473 Format: <reboot_mode>[,<reboot_mode2>[,...]] 1470 Format: <reboot_mode>[,<reboot_mode2>[,...]]
1474 See arch/*/kernel/reboot.c or arch/*/kernel/process.c 1471 See arch/*/kernel/reboot.c or arch/*/kernel/process.c
1475 1472
1476 reserve= [KNL,BUGS] Force the kernel to ignore some iomem area 1473 reserve= [KNL,BUGS] Force the kernel to ignore some iomem area
1477 1474
1478 reservetop= [IA-32] 1475 reservetop= [X86-32]
1479 Format: nn[KMG] 1476 Format: nn[KMG]
1480 Reserves a hole at the top of the kernel virtual 1477 Reserves a hole at the top of the kernel virtual
1481 address space. 1478 address space.
@@ -1566,7 +1563,7 @@ and is between 256 and 4096 characters. It is defined in the file
1566 Value can be changed at runtime via 1563 Value can be changed at runtime via
1567 /selinux/compat_net. 1564 /selinux/compat_net.
1568 1565
1569 serialnumber [BUGS=IA-32] 1566 serialnumber [BUGS=X86-32]
1570 1567
1571 sg_def_reserved_size= [SCSI] 1568 sg_def_reserved_size= [SCSI]
1572 1569
@@ -1619,7 +1616,7 @@ and is between 256 and 4096 characters. It is defined in the file
1619 smart2= [HW] 1616 smart2= [HW]
1620 Format: <io1>[,<io2>[,...,<io8>]] 1617 Format: <io1>[,<io2>[,...,<io8>]]
1621 1618
1622 smp-alt-once [IA-32,SMP] On a hotplug CPU system, only 1619 smp-alt-once [X86-32,SMP] On a hotplug CPU system, only
1623 attempt to substitute SMP alternatives once at boot. 1620 attempt to substitute SMP alternatives once at boot.
1624 1621
1625 smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices 1622 smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices
@@ -1884,7 +1881,7 @@ and is between 256 and 4096 characters. It is defined in the file
1884 usbhid.mousepoll= 1881 usbhid.mousepoll=
1885 [USBHID] The interval which mice are to be polled at. 1882 [USBHID] The interval which mice are to be polled at.
1886 1883
1887 vdso= [IA-32,SH,x86-64] 1884 vdso= [X86-32,SH,x86-64]
1888 vdso=2: enable compat VDSO (default with COMPAT_VDSO) 1885 vdso=2: enable compat VDSO (default with COMPAT_VDSO)
1889 vdso=1: enable VDSO (default) 1886 vdso=1: enable VDSO (default)
1890 vdso=0: disable VDSO mapping 1887 vdso=0: disable VDSO mapping
@@ -1895,7 +1892,7 @@ and is between 256 and 4096 characters. It is defined in the file
1895 video= [FB] Frame buffer configuration 1892 video= [FB] Frame buffer configuration
1896 See Documentation/fb/modedb.txt. 1893 See Documentation/fb/modedb.txt.
1897 1894
1898 vga= [BOOT,IA-32] Select a particular video mode 1895 vga= [BOOT,X86-32] Select a particular video mode
1899 See Documentation/i386/boot.txt and 1896 See Documentation/i386/boot.txt and
1900 Documentation/svga.txt. 1897 Documentation/svga.txt.
1901 Use vga=ask for menu. 1898 Use vga=ask for menu.
@@ -1927,7 +1924,7 @@ and is between 256 and 4096 characters. It is defined in the file
1927 See header of drivers/scsi/wd7000.c. 1924 See header of drivers/scsi/wd7000.c.
1928 1925
1929 wdt= [WDT] Watchdog 1926 wdt= [WDT] Watchdog
1930 See Documentation/watchdog/watchdog.txt. 1927 See Documentation/watchdog/wdt.txt.
1931 1928
1932 xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks. 1929 xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks.
1933 xd_geo= See header of drivers/block/xd.c. 1930 xd_geo= See header of drivers/block/xd.c.
diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index 81d9aa097298..947d57d53453 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -859,9 +859,8 @@ payload contents" for more information.
859 void unregister_key_type(struct key_type *type); 859 void unregister_key_type(struct key_type *type);
860 860
861 861
862Under some circumstances, it may be desirable to desirable to deal with a 862Under some circumstances, it may be desirable to deal with a bundle of keys.
863bundle of keys. The facility provides access to the keyring type for managing 863The facility provides access to the keyring type for managing such a bundle:
864such a bundle:
865 864
866 struct key_type key_type_keyring; 865 struct key_type key_type_keyring;
867 866
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index e44855513b3d..8ee49ee7c963 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -27,7 +27,6 @@ in detail, and briefly here:
27- kobjects a simple object. 27- kobjects a simple object.
28- kset a set of objects of a certain type. 28- kset a set of objects of a certain type.
29- ktype a set of helpers for objects of a common type. 29- ktype a set of helpers for objects of a common type.
30- subsystem a controlling object for a number of ksets.
31 30
32 31
33The kobject infrastructure maintains a close relationship with the 32The kobject infrastructure maintains a close relationship with the
@@ -54,13 +53,15 @@ embedded in larger data structures and replace fields they duplicate.
541.2 Definition 531.2 Definition
55 54
56struct kobject { 55struct kobject {
56 const char * k_name;
57 char name[KOBJ_NAME_LEN]; 57 char name[KOBJ_NAME_LEN];
58 atomic_t refcount; 58 struct kref kref;
59 struct list_head entry; 59 struct list_head entry;
60 struct kobject * parent; 60 struct kobject * parent;
61 struct kset * kset; 61 struct kset * kset;
62 struct kobj_type * ktype; 62 struct kobj_type * ktype;
63 struct dentry * dentry; 63 struct sysfs_dirent * sd;
64 wait_queue_head_t poll;
64}; 65};
65 66
66void kobject_init(struct kobject *); 67void kobject_init(struct kobject *);
@@ -137,8 +138,7 @@ If a kobject does not have a parent when it is registered, its parent
137becomes its dominant kset. 138becomes its dominant kset.
138 139
139If a kobject does not have a parent nor a dominant kset, its directory 140If a kobject does not have a parent nor a dominant kset, its directory
140is created at the top-level of the sysfs partition. This should only 141is created at the top-level of the sysfs partition.
141happen for kobjects that are embedded in a struct subsystem.
142 142
143 143
144 144
@@ -150,10 +150,10 @@ A kset is a set of kobjects that are embedded in the same type.
150 150
151 151
152struct kset { 152struct kset {
153 struct subsystem * subsys;
154 struct kobj_type * ktype; 153 struct kobj_type * ktype;
155 struct list_head list; 154 struct list_head list;
156 struct kobject kobj; 155 struct kobject kobj;
156 struct kset_uevent_ops * uevent_ops;
157}; 157};
158 158
159 159
@@ -169,8 +169,7 @@ struct kobject * kset_find_obj(struct kset *, char *);
169 169
170 170
171The type that the kobjects are embedded in is described by the ktype 171The type that the kobjects are embedded in is described by the ktype
172pointer. The subsystem that the kobject belongs to is pointed to by the 172pointer.
173subsys pointer.
174 173
175A kset contains a kobject itself, meaning that it may be registered in 174A kset contains a kobject itself, meaning that it may be registered in
176the kobject hierarchy and exported via sysfs. More importantly, the 175the kobject hierarchy and exported via sysfs. More importantly, the
@@ -209,6 +208,58 @@ the hierarchy.
209kset_find_obj() may be used to locate a kobject with a particular 208kset_find_obj() may be used to locate a kobject with a particular
210name. The kobject, if found, is returned. 209name. The kobject, if found, is returned.
211 210
211There are also some helper functions which names point to the formerly
212existing "struct subsystem", whose functions have been taken over by
213ksets.
214
215
216decl_subsys(name,type,uevent_ops)
217
218Declares a kset named '<name>_subsys' of type <type> with
219uevent_ops <uevent_ops>. For example,
220
221decl_subsys(devices, &ktype_device, &device_uevent_ops);
222
223is equivalent to doing:
224
225struct kset devices_subsys = {
226 .kobj = {
227 .name = "devices",
228 },
229 .ktype = &ktype_devices,
230 .uevent_ops = &device_uevent_ops,
231};
232
233
234The objects that are registered with a subsystem that use the
235subsystem's default list must have their kset ptr set properly. These
236objects may have embedded kobjects or ksets. The
237following helpers make setting the kset easier:
238
239
240kobj_set_kset_s(obj,subsys)
241
242- Assumes that obj->kobj exists, and is a struct kobject.
243- Sets the kset of that kobject to the kset <subsys>.
244
245
246kset_set_kset_s(obj,subsys)
247
248- Assumes that obj->kset exists, and is a struct kset.
249- Sets the kset of the embedded kobject to the kset <subsys>.
250
251subsys_set_kset(obj,subsys)
252
253- Assumes obj->subsys exists, and is a struct subsystem.
254- Sets obj->subsys.kset.kobj.kset to the subsystem's embedded kset.
255
256void subsystem_init(struct kset *s);
257int subsystem_register(struct kset *s);
258void subsystem_unregister(struct kset *s);
259struct kset *subsys_get(struct kset *s);
260void kset_put(struct kset *s);
261
262These are just wrappers around the respective kset_* functions.
212 263
2132.3 sysfs 2642.3 sysfs
214 265
@@ -254,114 +305,3 @@ Instances of struct kobj_type are not registered; only referenced by
254the kset. A kobj_type may be referenced by an arbitrary number of 305the kset. A kobj_type may be referenced by an arbitrary number of
255ksets, as there may be disparate sets of identical objects. 306ksets, as there may be disparate sets of identical objects.
256 307
257
258
2594. subsystems
260
2614.1 Description
262
263A subsystem represents a significant entity of code that maintains an
264arbitrary number of sets of objects of various types. Since the number
265of ksets and the type of objects they contain are variable, a
266generic representation of a subsystem is minimal.
267
268
269struct subsystem {
270 struct kset kset;
271 struct rw_semaphore rwsem;
272};
273
274int subsystem_register(struct subsystem *);
275void subsystem_unregister(struct subsystem *);
276
277struct subsystem * subsys_get(struct subsystem * s);
278void subsys_put(struct subsystem * s);
279
280
281A subsystem contains an embedded kset so:
282
283- It can be represented in the object hierarchy via the kset's
284 embedded kobject.
285
286- It can maintain a default list of objects of one type.
287
288Additional ksets may attach to the subsystem simply by referencing the
289subsystem before they are registered. (This one-way reference means
290that there is no way to determine the ksets that are attached to the
291subsystem.)
292
293All ksets that are attached to a subsystem share the subsystem's R/W
294semaphore.
295
296
2974.2 subsystem Programming Interface.
298
299The subsystem programming interface is simple and does not offer the
300flexibility that the kset and kobject programming interfaces do. They
301may be registered and unregistered, as well as reference counted. Each
302call forwards the calls to their embedded ksets (which forward the
303calls to their embedded kobjects).
304
305
3064.3 Helpers
307
308A number of macros are available to make dealing with subsystems and
309their embedded objects easier.
310
311
312decl_subsys(name,type)
313
314Declares a subsystem named '<name>_subsys', with an embedded kset of
315type <type>. For example,
316
317decl_subsys(devices,&ktype_devices);
318
319is equivalent to doing:
320
321struct subsystem device_subsys = {
322 .kset = {
323 .kobj = {
324 .name = "devices",
325 },
326 .ktype = &ktype_devices,
327 }
328};
329
330
331The objects that are registered with a subsystem that use the
332subsystem's default list must have their kset ptr set properly. These
333objects may have embedded kobjects, ksets, or other subsystems. The
334following helpers make setting the kset easier:
335
336
337kobj_set_kset_s(obj,subsys)
338
339- Assumes that obj->kobj exists, and is a struct kobject.
340- Sets the kset of that kobject to the subsystem's embedded kset.
341
342
343kset_set_kset_s(obj,subsys)
344
345- Assumes that obj->kset exists, and is a struct kset.
346- Sets the kset of the embedded kobject to the subsystem's
347 embedded kset.
348
349subsys_set_kset(obj,subsys)
350
351- Assumes obj->subsys exists, and is a struct subsystem.
352- Sets obj->subsys.kset.kobj.kset to the subsystem's embedded kset.
353
354
3554.4 sysfs
356
357subsystems are represented in sysfs via their embedded kobjects. They
358follow the same rules as previously mentioned with no exceptions. They
359typically receive a top-level directory in sysfs, except when their
360embedded kobject is part of another kset, or the parent of the
361embedded kobject is explicitly set.
362
363Note that the subsystem's embedded kset must be 'attached' to the
364subsystem itself in order to use its rwsem. This is done after
365kset_add() has been called. (Not before, because kset_add() uses its
366subsystem for a default parent if it doesn't already have one).
367
diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile
index 31e794ef5f98..c0b7a4556390 100644
--- a/Documentation/lguest/Makefile
+++ b/Documentation/lguest/Makefile
@@ -13,7 +13,9 @@ LGUEST_GUEST_TOP := ($(CONFIG_PAGE_OFFSET) - 0x08000000)
13 13
14CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -Wl,-T,lguest.lds 14CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -Wl,-T,lguest.lds
15LDLIBS:=-lz 15LDLIBS:=-lz
16 16# Removing this works for some versions of ld.so (eg. Ubuntu Feisty) and
17# not others (eg. FC7).
18LDFLAGS+=-static
17all: lguest.lds lguest 19all: lguest.lds lguest
18 20
19# The linker script on x86 is so complex the only way of creating one 21# The linker script on x86 is so complex the only way of creating one
diff --git a/Documentation/lguest/extract b/Documentation/lguest/extract
new file mode 100644
index 000000000000..7730bb6e4b94
--- /dev/null
+++ b/Documentation/lguest/extract
@@ -0,0 +1,58 @@
1#! /bin/sh
2
3set -e
4
5PREFIX=$1
6shift
7
8trap 'rm -r $TMPDIR' 0
9TMPDIR=`mktemp -d`
10
11exec 3>/dev/null
12for f; do
13 while IFS="
14" read -r LINE; do
15 case "$LINE" in
16 *$PREFIX:[0-9]*:\**)
17 NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
18 if [ -f $TMPDIR/$NUM ]; then
19 echo "$TMPDIR/$NUM already exits prior to $f"
20 exit 1
21 fi
22 exec 3>>$TMPDIR/$NUM
23 echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
24 /bin/echo "$LINE" | sed -e "s/$PREFIX:[0-9]*//" -e "s/:\*/*/" >&3
25 ;;
26 *$PREFIX:[0-9]*)
27 NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
28 if [ -f $TMPDIR/$NUM ]; then
29 echo "$TMPDIR/$NUM already exits prior to $f"
30 exit 1
31 fi
32 exec 3>>$TMPDIR/$NUM
33 echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
34 /bin/echo "$LINE" | sed "s/$PREFIX:[0-9]*//" >&3
35 ;;
36 *:\**)
37 /bin/echo "$LINE" | sed -e "s/:\*/*/" -e "s,/\*\*/,," >&3
38 echo >&3
39 exec 3>/dev/null
40 ;;
41 *)
42 /bin/echo "$LINE" >&3
43 ;;
44 esac
45 done < $f
46 echo >&3
47 exec 3>/dev/null
48done
49
50LASTFILE=""
51for f in $TMPDIR/*; do
52 if [ "$LASTFILE" != $(cat $TMPDIR/.$(basename $f) ) ]; then
53 LASTFILE=$(cat $TMPDIR/.$(basename $f) )
54 echo "[ $LASTFILE ]"
55 fi
56 cat $f
57done
58
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 62a8133393e1..f7918401a007 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1,5 +1,10 @@
1/* Simple program to layout "physical" memory for new lguest guest. 1/*P:100 This is the Launcher code, a simple program which lays out the
2 * Linked high to avoid likely physical memory. */ 2 * "physical" memory for the new Guest by mapping the kernel image and the
3 * virtual devices, then reads repeatedly from /dev/lguest to run the Guest.
4 *
5 * The only trick: the Makefile links it at a high address so it will be clear
6 * of the guest memory region. It means that each Guest cannot have more than
7 * about 2.5G of memory on a normally configured Host. :*/
3#define _LARGEFILE64_SOURCE 8#define _LARGEFILE64_SOURCE
4#define _GNU_SOURCE 9#define _GNU_SOURCE
5#include <stdio.h> 10#include <stdio.h>
@@ -29,12 +34,20 @@
29#include <termios.h> 34#include <termios.h>
30#include <getopt.h> 35#include <getopt.h>
31#include <zlib.h> 36#include <zlib.h>
37/*L:110 We can ignore the 28 include files we need for this program, but I do
38 * want to draw attention to the use of kernel-style types.
39 *
40 * As Linus said, "C is a Spartan language, and so should your naming be." I
41 * like these abbreviations and the header we need uses them, so we define them
42 * here.
43 */
32typedef unsigned long long u64; 44typedef unsigned long long u64;
33typedef uint32_t u32; 45typedef uint32_t u32;
34typedef uint16_t u16; 46typedef uint16_t u16;
35typedef uint8_t u8; 47typedef uint8_t u8;
36#include "../../include/linux/lguest_launcher.h" 48#include "../../include/linux/lguest_launcher.h"
37#include "../../include/asm-i386/e820.h" 49#include "../../include/asm-i386/e820.h"
50/*:*/
38 51
39#define PAGE_PRESENT 0x7 /* Present, RW, Execute */ 52#define PAGE_PRESENT 0x7 /* Present, RW, Execute */
40#define NET_PEERNUM 1 53#define NET_PEERNUM 1
@@ -43,33 +56,52 @@ typedef uint8_t u8;
43#define SIOCBRADDIF 0x89a2 /* add interface to bridge */ 56#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
44#endif 57#endif
45 58
59/*L:120 verbose is both a global flag and a macro. The C preprocessor allows
60 * this, and although I wouldn't recommend it, it works quite nicely here. */
46static bool verbose; 61static bool verbose;
47#define verbose(args...) \ 62#define verbose(args...) \
48 do { if (verbose) printf(args); } while(0) 63 do { if (verbose) printf(args); } while(0)
64/*:*/
65
66/* The pipe to send commands to the waker process */
49static int waker_fd; 67static int waker_fd;
68/* The top of guest physical memory. */
50static u32 top; 69static u32 top;
51 70
71/* This is our list of devices. */
52struct device_list 72struct device_list
53{ 73{
74 /* Summary information about the devices in our list: ready to pass to
75 * select() to ask which need servicing.*/
54 fd_set infds; 76 fd_set infds;
55 int max_infd; 77 int max_infd;
56 78
79 /* The descriptor page for the devices. */
57 struct lguest_device_desc *descs; 80 struct lguest_device_desc *descs;
81
82 /* A single linked list of devices. */
58 struct device *dev; 83 struct device *dev;
84 /* ... And an end pointer so we can easily append new devices */
59 struct device **lastdev; 85 struct device **lastdev;
60}; 86};
61 87
88/* The device structure describes a single device. */
62struct device 89struct device
63{ 90{
91 /* The linked-list pointer. */
64 struct device *next; 92 struct device *next;
93 /* The descriptor for this device, as mapped into the Guest. */
65 struct lguest_device_desc *desc; 94 struct lguest_device_desc *desc;
95 /* The memory page(s) of this device, if any. Also mapped in Guest. */
66 void *mem; 96 void *mem;
67 97
68 /* Watch this fd if handle_input non-NULL. */ 98 /* If handle_input is set, it wants to be called when this file
99 * descriptor is ready. */
69 int fd; 100 int fd;
70 bool (*handle_input)(int fd, struct device *me); 101 bool (*handle_input)(int fd, struct device *me);
71 102
72 /* Watch DMA to this key if handle_input non-NULL. */ 103 /* If handle_output is set, it wants to be called when the Guest sends
104 * DMA to this key. */
73 unsigned long watch_key; 105 unsigned long watch_key;
74 u32 (*handle_output)(int fd, const struct iovec *iov, 106 u32 (*handle_output)(int fd, const struct iovec *iov,
75 unsigned int num, struct device *me); 107 unsigned int num, struct device *me);
@@ -78,6 +110,11 @@ struct device
78 void *priv; 110 void *priv;
79}; 111};
80 112
113/*L:130
114 * Loading the Kernel.
115 *
116 * We start with couple of simple helper routines. open_or_die() avoids
117 * error-checking code cluttering the callers: */
81static int open_or_die(const char *name, int flags) 118static int open_or_die(const char *name, int flags)
82{ 119{
83 int fd = open(name, flags); 120 int fd = open(name, flags);
@@ -86,26 +123,38 @@ static int open_or_die(const char *name, int flags)
86 return fd; 123 return fd;
87} 124}
88 125
126/* map_zeroed_pages() takes a (page-aligned) address and a number of pages. */
89static void *map_zeroed_pages(unsigned long addr, unsigned int num) 127static void *map_zeroed_pages(unsigned long addr, unsigned int num)
90{ 128{
129 /* We cache the /dev/zero file-descriptor so we only open it once. */
91 static int fd = -1; 130 static int fd = -1;
92 131
93 if (fd == -1) 132 if (fd == -1)
94 fd = open_or_die("/dev/zero", O_RDONLY); 133 fd = open_or_die("/dev/zero", O_RDONLY);
95 134
135 /* We use a private mapping (ie. if we write to the page, it will be
136 * copied), and obviously we insist that it be mapped where we ask. */
96 if (mmap((void *)addr, getpagesize() * num, 137 if (mmap((void *)addr, getpagesize() * num,
97 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, fd, 0) 138 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, fd, 0)
98 != (void *)addr) 139 != (void *)addr)
99 err(1, "Mmaping %u pages of /dev/zero @%p", num, (void *)addr); 140 err(1, "Mmaping %u pages of /dev/zero @%p", num, (void *)addr);
141
142 /* Returning the address is just a courtesy: can simplify callers. */
100 return (void *)addr; 143 return (void *)addr;
101} 144}
102 145
103/* Find magic string marking entry point, return entry point. */ 146/* To find out where to start we look for the magic Guest string, which marks
147 * the code we see in lguest_asm.S. This is a hack which we are currently
148 * plotting to replace with the normal Linux entry point. */
104static unsigned long entry_point(void *start, void *end, 149static unsigned long entry_point(void *start, void *end,
105 unsigned long page_offset) 150 unsigned long page_offset)
106{ 151{
107 void *p; 152 void *p;
108 153
154 /* The scan gives us the physical starting address. We want the
155 * virtual address in this case, and fortunately, we already figured
156 * out the physical-virtual difference and passed it here in
157 * "page_offset". */
109 for (p = start; p < end; p++) 158 for (p = start; p < end; p++)
110 if (memcmp(p, "GenuineLguest", strlen("GenuineLguest")) == 0) 159 if (memcmp(p, "GenuineLguest", strlen("GenuineLguest")) == 0)
111 return (long)p + strlen("GenuineLguest") + page_offset; 160 return (long)p + strlen("GenuineLguest") + page_offset;
@@ -113,7 +162,17 @@ static unsigned long entry_point(void *start, void *end,
113 err(1, "Is this image a genuine lguest?"); 162 err(1, "Is this image a genuine lguest?");
114} 163}
115 164
116/* Returns the entry point */ 165/* This routine takes an open vmlinux image, which is in ELF, and maps it into
166 * the Guest memory. ELF = Embedded Linking Format, which is the format used
167 * by all modern binaries on Linux including the kernel.
168 *
169 * The ELF headers give *two* addresses: a physical address, and a virtual
170 * address. The Guest kernel expects to be placed in memory at the physical
171 * address, and the page tables set up so it will correspond to that virtual
172 * address. We return the difference between the virtual and physical
173 * addresses in the "page_offset" pointer.
174 *
175 * We return the starting address. */
117static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr, 176static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
118 unsigned long *page_offset) 177 unsigned long *page_offset)
119{ 178{
@@ -122,40 +181,61 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
122 unsigned int i; 181 unsigned int i;
123 unsigned long start = -1UL, end = 0; 182 unsigned long start = -1UL, end = 0;
124 183
125 /* Sanity checks. */ 184 /* Sanity checks on the main ELF header: an x86 executable with a
185 * reasonable number of correctly-sized program headers. */
126 if (ehdr->e_type != ET_EXEC 186 if (ehdr->e_type != ET_EXEC
127 || ehdr->e_machine != EM_386 187 || ehdr->e_machine != EM_386
128 || ehdr->e_phentsize != sizeof(Elf32_Phdr) 188 || ehdr->e_phentsize != sizeof(Elf32_Phdr)
129 || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr)) 189 || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr))
130 errx(1, "Malformed elf header"); 190 errx(1, "Malformed elf header");
131 191
192 /* An ELF executable contains an ELF header and a number of "program"
193 * headers which indicate which parts ("segments") of the program to
194 * load where. */
195
196 /* We read in all the program headers at once: */
132 if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0) 197 if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0)
133 err(1, "Seeking to program headers"); 198 err(1, "Seeking to program headers");
134 if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) 199 if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr))
135 err(1, "Reading program headers"); 200 err(1, "Reading program headers");
136 201
202 /* We don't know page_offset yet. */
137 *page_offset = 0; 203 *page_offset = 0;
138 /* We map the loadable segments at virtual addresses corresponding 204
139 * to their physical addresses (our virtual == guest physical). */ 205 /* Try all the headers: there are usually only three. A read-only one,
206 * a read-write one, and a "note" section which isn't loadable. */
140 for (i = 0; i < ehdr->e_phnum; i++) { 207 for (i = 0; i < ehdr->e_phnum; i++) {
208 /* If this isn't a loadable segment, we ignore it */
141 if (phdr[i].p_type != PT_LOAD) 209 if (phdr[i].p_type != PT_LOAD)
142 continue; 210 continue;
143 211
144 verbose("Section %i: size %i addr %p\n", 212 verbose("Section %i: size %i addr %p\n",
145 i, phdr[i].p_memsz, (void *)phdr[i].p_paddr); 213 i, phdr[i].p_memsz, (void *)phdr[i].p_paddr);
146 214
147 /* We expect linear address space. */ 215 /* We expect a simple linear address space: every segment must
216 * have the same difference between virtual (p_vaddr) and
217 * physical (p_paddr) address. */
148 if (!*page_offset) 218 if (!*page_offset)
149 *page_offset = phdr[i].p_vaddr - phdr[i].p_paddr; 219 *page_offset = phdr[i].p_vaddr - phdr[i].p_paddr;
150 else if (*page_offset != phdr[i].p_vaddr - phdr[i].p_paddr) 220 else if (*page_offset != phdr[i].p_vaddr - phdr[i].p_paddr)
151 errx(1, "Page offset of section %i different", i); 221 errx(1, "Page offset of section %i different", i);
152 222
223 /* We track the first and last address we mapped, so we can
224 * tell entry_point() where to scan. */
153 if (phdr[i].p_paddr < start) 225 if (phdr[i].p_paddr < start)
154 start = phdr[i].p_paddr; 226 start = phdr[i].p_paddr;
155 if (phdr[i].p_paddr + phdr[i].p_filesz > end) 227 if (phdr[i].p_paddr + phdr[i].p_filesz > end)
156 end = phdr[i].p_paddr + phdr[i].p_filesz; 228 end = phdr[i].p_paddr + phdr[i].p_filesz;
157 229
158 /* We map everything private, writable. */ 230 /* We map this section of the file at its physical address. We
231 * map it read & write even if the header says this segment is
232 * read-only. The kernel really wants to be writable: it
233 * patches its own instructions which would normally be
234 * read-only.
235 *
236 * MAP_PRIVATE means that the page won't be copied until a
237 * write is done to it. This allows us to share much of the
238 * kernel memory between Guests. */
159 addr = mmap((void *)phdr[i].p_paddr, 239 addr = mmap((void *)phdr[i].p_paddr,
160 phdr[i].p_filesz, 240 phdr[i].p_filesz,
161 PROT_READ|PROT_WRITE|PROT_EXEC, 241 PROT_READ|PROT_WRITE|PROT_EXEC,
@@ -169,7 +249,31 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr,
169 return entry_point((void *)start, (void *)end, *page_offset); 249 return entry_point((void *)start, (void *)end, *page_offset);
170} 250}
171 251
172/* This is amazingly reliable. */ 252/*L:170 Prepare to be SHOCKED and AMAZED. And possibly a trifle nauseated.
253 *
254 * We know that CONFIG_PAGE_OFFSET sets what virtual address the kernel expects
255 * to be. We don't know what that option was, but we can figure it out
256 * approximately by looking at the addresses in the code. I chose the common
257 * case of reading a memory location into the %eax register:
258 *
259 * movl <some-address>, %eax
260 *
261 * This gets encoded as five bytes: "0xA1 <4-byte-address>". For example,
262 * "0xA1 0x18 0x60 0x47 0xC0" reads the address 0xC0476018 into %eax.
263 *
264 * In this example can guess that the kernel was compiled with
265 * CONFIG_PAGE_OFFSET set to 0xC0000000 (it's always a round number). If the
266 * kernel were larger than 16MB, we might see 0xC1 addresses show up, but our
267 * kernel isn't that bloated yet.
268 *
269 * Unfortunately, x86 has variable-length instructions, so finding this
270 * particular instruction properly involves writing a disassembler. Instead,
271 * we rely on statistics. We look for "0xA1" and tally the different bytes
272 * which occur 4 bytes later (the "0xC0" in our example above). When one of
273 * those bytes appears three times, we can be reasonably confident that it
274 * forms the start of CONFIG_PAGE_OFFSET.
275 *
276 * This is amazingly reliable. */
173static unsigned long intuit_page_offset(unsigned char *img, unsigned long len) 277static unsigned long intuit_page_offset(unsigned char *img, unsigned long len)
174{ 278{
175 unsigned int i, possibilities[256] = { 0 }; 279 unsigned int i, possibilities[256] = { 0 };
@@ -182,30 +286,52 @@ static unsigned long intuit_page_offset(unsigned char *img, unsigned long len)
182 errx(1, "could not determine page offset"); 286 errx(1, "could not determine page offset");
183} 287}
184 288
289/*L:160 Unfortunately the entire ELF image isn't compressed: the segments
290 * which need loading are extracted and compressed raw. This denies us the
291 * information we need to make a fully-general loader. */
185static unsigned long unpack_bzimage(int fd, unsigned long *page_offset) 292static unsigned long unpack_bzimage(int fd, unsigned long *page_offset)
186{ 293{
187 gzFile f; 294 gzFile f;
188 int ret, len = 0; 295 int ret, len = 0;
296 /* A bzImage always gets loaded at physical address 1M. This is
297 * actually configurable as CONFIG_PHYSICAL_START, but as the comment
298 * there says, "Don't change this unless you know what you are doing".
299 * Indeed. */
189 void *img = (void *)0x100000; 300 void *img = (void *)0x100000;
190 301
302 /* gzdopen takes our file descriptor (carefully placed at the start of
303 * the GZIP header we found) and returns a gzFile. */
191 f = gzdopen(fd, "rb"); 304 f = gzdopen(fd, "rb");
305 /* We read it into memory in 64k chunks until we hit the end. */
192 while ((ret = gzread(f, img + len, 65536)) > 0) 306 while ((ret = gzread(f, img + len, 65536)) > 0)
193 len += ret; 307 len += ret;
194 if (ret < 0) 308 if (ret < 0)
195 err(1, "reading image from bzImage"); 309 err(1, "reading image from bzImage");
196 310
197 verbose("Unpacked size %i addr %p\n", len, img); 311 verbose("Unpacked size %i addr %p\n", len, img);
312
313 /* Without the ELF header, we can't tell virtual-physical gap. This is
314 * CONFIG_PAGE_OFFSET, and people do actually change it. Fortunately,
315 * I have a clever way of figuring it out from the code itself. */
198 *page_offset = intuit_page_offset(img, len); 316 *page_offset = intuit_page_offset(img, len);
199 317
200 return entry_point(img, img + len, *page_offset); 318 return entry_point(img, img + len, *page_offset);
201} 319}
202 320
321/*L:150 A bzImage, unlike an ELF file, is not meant to be loaded. You're
322 * supposed to jump into it and it will unpack itself. We can't do that
323 * because the Guest can't run the unpacking code, and adding features to
324 * lguest kills puppies, so we don't want to.
325 *
326 * The bzImage is formed by putting the decompressing code in front of the
327 * compressed kernel code. So we can simple scan through it looking for the
328 * first "gzip" header, and start decompressing from there. */
203static unsigned long load_bzimage(int fd, unsigned long *page_offset) 329static unsigned long load_bzimage(int fd, unsigned long *page_offset)
204{ 330{
205 unsigned char c; 331 unsigned char c;
206 int state = 0; 332 int state = 0;
207 333
208 /* Ugly brute force search for gzip header. */ 334 /* GZIP header is 0x1F 0x8B <method> <flags>... <compressed-by>. */
209 while (read(fd, &c, 1) == 1) { 335 while (read(fd, &c, 1) == 1) {
210 switch (state) { 336 switch (state) {
211 case 0: 337 case 0:
@@ -222,8 +348,10 @@ static unsigned long load_bzimage(int fd, unsigned long *page_offset)
222 state++; 348 state++;
223 break; 349 break;
224 case 9: 350 case 9:
351 /* Seek back to the start of the gzip header. */
225 lseek(fd, -10, SEEK_CUR); 352 lseek(fd, -10, SEEK_CUR);
226 if (c != 0x03) /* Compressed under UNIX. */ 353 /* One final check: "compressed under UNIX". */
354 if (c != 0x03)
227 state = -1; 355 state = -1;
228 else 356 else
229 return unpack_bzimage(fd, page_offset); 357 return unpack_bzimage(fd, page_offset);
@@ -232,25 +360,43 @@ static unsigned long load_bzimage(int fd, unsigned long *page_offset)
232 errx(1, "Could not find kernel in bzImage"); 360 errx(1, "Could not find kernel in bzImage");
233} 361}
234 362
363/*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels
364 * come wrapped up in the self-decompressing "bzImage" format. With some funky
365 * coding, we can load those, too. */
235static unsigned long load_kernel(int fd, unsigned long *page_offset) 366static unsigned long load_kernel(int fd, unsigned long *page_offset)
236{ 367{
237 Elf32_Ehdr hdr; 368 Elf32_Ehdr hdr;
238 369
370 /* Read in the first few bytes. */
239 if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) 371 if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr))
240 err(1, "Reading kernel"); 372 err(1, "Reading kernel");
241 373
374 /* If it's an ELF file, it starts with "\177ELF" */
242 if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) 375 if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0)
243 return map_elf(fd, &hdr, page_offset); 376 return map_elf(fd, &hdr, page_offset);
244 377
378 /* Otherwise we assume it's a bzImage, and try to unpack it */
245 return load_bzimage(fd, page_offset); 379 return load_bzimage(fd, page_offset);
246} 380}
247 381
382/* This is a trivial little helper to align pages. Andi Kleen hated it because
383 * it calls getpagesize() twice: "it's dumb code."
384 *
385 * Kernel guys get really het up about optimization, even when it's not
386 * necessary. I leave this code as a reaction against that. */
248static inline unsigned long page_align(unsigned long addr) 387static inline unsigned long page_align(unsigned long addr)
249{ 388{
389 /* Add upwards and truncate downwards. */
250 return ((addr + getpagesize()-1) & ~(getpagesize()-1)); 390 return ((addr + getpagesize()-1) & ~(getpagesize()-1));
251} 391}
252 392
253/* initrd gets loaded at top of memory: return length. */ 393/*L:180 An "initial ram disk" is a disk image loaded into memory along with
394 * the kernel which the kernel can use to boot from without needing any
395 * drivers. Most distributions now use this as standard: the initrd contains
396 * the code to load the appropriate driver modules for the current machine.
397 *
398 * Importantly, James Morris works for RedHat, and Fedora uses initrds for its
399 * kernels. He sent me this (and tells me when I break it). */
254static unsigned long load_initrd(const char *name, unsigned long mem) 400static unsigned long load_initrd(const char *name, unsigned long mem)
255{ 401{
256 int ifd; 402 int ifd;
@@ -259,21 +405,35 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
259 void *iaddr; 405 void *iaddr;
260 406
261 ifd = open_or_die(name, O_RDONLY); 407 ifd = open_or_die(name, O_RDONLY);
408 /* fstat() is needed to get the file size. */
262 if (fstat(ifd, &st) < 0) 409 if (fstat(ifd, &st) < 0)
263 err(1, "fstat() on initrd '%s'", name); 410 err(1, "fstat() on initrd '%s'", name);
264 411
412 /* The length needs to be rounded up to a page size: mmap needs the
413 * address to be page aligned. */
265 len = page_align(st.st_size); 414 len = page_align(st.st_size);
415 /* We map the initrd at the top of memory. */
266 iaddr = mmap((void *)mem - len, st.st_size, 416 iaddr = mmap((void *)mem - len, st.st_size,
267 PROT_READ|PROT_EXEC|PROT_WRITE, 417 PROT_READ|PROT_EXEC|PROT_WRITE,
268 MAP_FIXED|MAP_PRIVATE, ifd, 0); 418 MAP_FIXED|MAP_PRIVATE, ifd, 0);
269 if (iaddr != (void *)mem - len) 419 if (iaddr != (void *)mem - len)
270 err(1, "Mmaping initrd '%s' returned %p not %p", 420 err(1, "Mmaping initrd '%s' returned %p not %p",
271 name, iaddr, (void *)mem - len); 421 name, iaddr, (void *)mem - len);
422 /* Once a file is mapped, you can close the file descriptor. It's a
423 * little odd, but quite useful. */
272 close(ifd); 424 close(ifd);
273 verbose("mapped initrd %s size=%lu @ %p\n", name, st.st_size, iaddr); 425 verbose("mapped initrd %s size=%lu @ %p\n", name, st.st_size, iaddr);
426
427 /* We return the initrd size. */
274 return len; 428 return len;
275} 429}
276 430
431/* Once we know how much memory we have, and the address the Guest kernel
432 * expects, we can construct simple linear page tables which will get the Guest
433 * far enough into the boot to create its own.
434 *
435 * We lay them out of the way, just below the initrd (which is why we need to
436 * know its size). */
277static unsigned long setup_pagetables(unsigned long mem, 437static unsigned long setup_pagetables(unsigned long mem,
278 unsigned long initrd_size, 438 unsigned long initrd_size,
279 unsigned long page_offset) 439 unsigned long page_offset)
@@ -282,23 +442,32 @@ static unsigned long setup_pagetables(unsigned long mem,
282 unsigned int mapped_pages, i, linear_pages; 442 unsigned int mapped_pages, i, linear_pages;
283 unsigned int ptes_per_page = getpagesize()/sizeof(u32); 443 unsigned int ptes_per_page = getpagesize()/sizeof(u32);
284 444
285 /* If we can map all of memory above page_offset, we do so. */ 445 /* Ideally we map all physical memory starting at page_offset.
446 * However, if page_offset is 0xC0000000 we can only map 1G of physical
447 * (0xC0000000 + 1G overflows). */
286 if (mem <= -page_offset) 448 if (mem <= -page_offset)
287 mapped_pages = mem/getpagesize(); 449 mapped_pages = mem/getpagesize();
288 else 450 else
289 mapped_pages = -page_offset/getpagesize(); 451 mapped_pages = -page_offset/getpagesize();
290 452
291 /* Each linear PTE page can map ptes_per_page pages. */ 453 /* Each PTE page can map ptes_per_page pages: how many do we need? */
292 linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page; 454 linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page;
293 455
294 /* We lay out top-level then linear mapping immediately below initrd */ 456 /* We put the toplevel page directory page at the top of memory. */
295 pgdir = (void *)mem - initrd_size - getpagesize(); 457 pgdir = (void *)mem - initrd_size - getpagesize();
458
459 /* Now we use the next linear_pages pages as pte pages */
296 linear = (void *)pgdir - linear_pages*getpagesize(); 460 linear = (void *)pgdir - linear_pages*getpagesize();
297 461
462 /* Linear mapping is easy: put every page's address into the mapping in
463 * order. PAGE_PRESENT contains the flags Present, Writable and
464 * Executable. */
298 for (i = 0; i < mapped_pages; i++) 465 for (i = 0; i < mapped_pages; i++)
299 linear[i] = ((i * getpagesize()) | PAGE_PRESENT); 466 linear[i] = ((i * getpagesize()) | PAGE_PRESENT);
300 467
301 /* Now set up pgd so that this memory is at page_offset */ 468 /* The top level points to the linear page table pages above. The
469 * entry representing page_offset points to the first one, and they
470 * continue from there. */
302 for (i = 0; i < mapped_pages; i += ptes_per_page) { 471 for (i = 0; i < mapped_pages; i += ptes_per_page) {
303 pgdir[(i + page_offset/getpagesize())/ptes_per_page] 472 pgdir[(i + page_offset/getpagesize())/ptes_per_page]
304 = (((u32)linear + i*sizeof(u32)) | PAGE_PRESENT); 473 = (((u32)linear + i*sizeof(u32)) | PAGE_PRESENT);
@@ -307,9 +476,13 @@ static unsigned long setup_pagetables(unsigned long mem,
307 verbose("Linear mapping of %u pages in %u pte pages at %p\n", 476 verbose("Linear mapping of %u pages in %u pte pages at %p\n",
308 mapped_pages, linear_pages, linear); 477 mapped_pages, linear_pages, linear);
309 478
479 /* We return the top level (guest-physical) address: the kernel needs
480 * to know where it is. */
310 return (unsigned long)pgdir; 481 return (unsigned long)pgdir;
311} 482}
312 483
484/* Simple routine to roll all the commandline arguments together with spaces
485 * between them. */
313static void concat(char *dst, char *args[]) 486static void concat(char *dst, char *args[])
314{ 487{
315 unsigned int i, len = 0; 488 unsigned int i, len = 0;
@@ -323,6 +496,10 @@ static void concat(char *dst, char *args[])
323 dst[len] = '\0'; 496 dst[len] = '\0';
324} 497}
325 498
499/* This is where we actually tell the kernel to initialize the Guest. We saw
500 * the arguments it expects when we looked at initialize() in lguest_user.c:
501 * the top physical page to allow, the top level pagetable, the entry point and
502 * the page_offset constant for the Guest. */
326static int tell_kernel(u32 pgdir, u32 start, u32 page_offset) 503static int tell_kernel(u32 pgdir, u32 start, u32 page_offset)
327{ 504{
328 u32 args[] = { LHREQ_INITIALIZE, 505 u32 args[] = { LHREQ_INITIALIZE,
@@ -332,8 +509,11 @@ static int tell_kernel(u32 pgdir, u32 start, u32 page_offset)
332 fd = open_or_die("/dev/lguest", O_RDWR); 509 fd = open_or_die("/dev/lguest", O_RDWR);
333 if (write(fd, args, sizeof(args)) < 0) 510 if (write(fd, args, sizeof(args)) < 0)
334 err(1, "Writing to /dev/lguest"); 511 err(1, "Writing to /dev/lguest");
512
513 /* We return the /dev/lguest file descriptor to control this Guest */
335 return fd; 514 return fd;
336} 515}
516/*:*/
337 517
338static void set_fd(int fd, struct device_list *devices) 518static void set_fd(int fd, struct device_list *devices)
339{ 519{
@@ -342,61 +522,108 @@ static void set_fd(int fd, struct device_list *devices)
342 devices->max_infd = fd; 522 devices->max_infd = fd;
343} 523}
344 524
345/* When input arrives, we tell the kernel to kick lguest out with -EAGAIN. */ 525/*L:200
526 * The Waker.
527 *
528 * With a console and network devices, we can have lots of input which we need
529 * to process. We could try to tell the kernel what file descriptors to watch,
530 * but handing a file descriptor mask through to the kernel is fairly icky.
531 *
532 * Instead, we fork off a process which watches the file descriptors and writes
533 * the LHREQ_BREAK command to the /dev/lguest filedescriptor to tell the Host
534 * loop to stop running the Guest. This causes it to return from the
535 * /dev/lguest read with -EAGAIN, where it will write to /dev/lguest to reset
536 * the LHREQ_BREAK and wake us up again.
537 *
538 * This, of course, is merely a different *kind* of icky.
539 */
346static void wake_parent(int pipefd, int lguest_fd, struct device_list *devices) 540static void wake_parent(int pipefd, int lguest_fd, struct device_list *devices)
347{ 541{
542 /* Add the pipe from the Launcher to the fdset in the device_list, so
543 * we watch it, too. */
348 set_fd(pipefd, devices); 544 set_fd(pipefd, devices);
349 545
350 for (;;) { 546 for (;;) {
351 fd_set rfds = devices->infds; 547 fd_set rfds = devices->infds;
352 u32 args[] = { LHREQ_BREAK, 1 }; 548 u32 args[] = { LHREQ_BREAK, 1 };
353 549
550 /* Wait until input is ready from one of the devices. */
354 select(devices->max_infd+1, &rfds, NULL, NULL, NULL); 551 select(devices->max_infd+1, &rfds, NULL, NULL, NULL);
552 /* Is it a message from the Launcher? */
355 if (FD_ISSET(pipefd, &rfds)) { 553 if (FD_ISSET(pipefd, &rfds)) {
356 int ignorefd; 554 int ignorefd;
555 /* If read() returns 0, it means the Launcher has
556 * exited. We silently follow. */
357 if (read(pipefd, &ignorefd, sizeof(ignorefd)) == 0) 557 if (read(pipefd, &ignorefd, sizeof(ignorefd)) == 0)
358 exit(0); 558 exit(0);
559 /* Otherwise it's telling us there's a problem with one
560 * of the devices, and we should ignore that file
561 * descriptor from now on. */
359 FD_CLR(ignorefd, &devices->infds); 562 FD_CLR(ignorefd, &devices->infds);
360 } else 563 } else /* Send LHREQ_BREAK command. */
361 write(lguest_fd, args, sizeof(args)); 564 write(lguest_fd, args, sizeof(args));
362 } 565 }
363} 566}
364 567
568/* This routine just sets up a pipe to the Waker process. */
365static int setup_waker(int lguest_fd, struct device_list *device_list) 569static int setup_waker(int lguest_fd, struct device_list *device_list)
366{ 570{
367 int pipefd[2], child; 571 int pipefd[2], child;
368 572
573 /* We create a pipe to talk to the waker, and also so it knows when the
574 * Launcher dies (and closes pipe). */
369 pipe(pipefd); 575 pipe(pipefd);
370 child = fork(); 576 child = fork();
371 if (child == -1) 577 if (child == -1)
372 err(1, "forking"); 578 err(1, "forking");
373 579
374 if (child == 0) { 580 if (child == 0) {
581 /* Close the "writing" end of our copy of the pipe */
375 close(pipefd[1]); 582 close(pipefd[1]);
376 wake_parent(pipefd[0], lguest_fd, device_list); 583 wake_parent(pipefd[0], lguest_fd, device_list);
377 } 584 }
585 /* Close the reading end of our copy of the pipe. */
378 close(pipefd[0]); 586 close(pipefd[0]);
379 587
588 /* Here is the fd used to talk to the waker. */
380 return pipefd[1]; 589 return pipefd[1];
381} 590}
382 591
592/*L:210
593 * Device Handling.
594 *
595 * When the Guest sends DMA to us, it sends us an array of addresses and sizes.
596 * We need to make sure it's not trying to reach into the Launcher itself, so
597 * we have a convenient routine which check it and exits with an error message
598 * if something funny is going on:
599 */
383static void *_check_pointer(unsigned long addr, unsigned int size, 600static void *_check_pointer(unsigned long addr, unsigned int size,
384 unsigned int line) 601 unsigned int line)
385{ 602{
603 /* We have to separately check addr and addr+size, because size could
604 * be huge and addr + size might wrap around. */
386 if (addr >= top || addr + size >= top) 605 if (addr >= top || addr + size >= top)
387 errx(1, "%s:%i: Invalid address %li", __FILE__, line, addr); 606 errx(1, "%s:%i: Invalid address %li", __FILE__, line, addr);
607 /* We return a pointer for the caller's convenience, now we know it's
608 * safe to use. */
388 return (void *)addr; 609 return (void *)addr;
389} 610}
611/* A macro which transparently hands the line number to the real function. */
390#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) 612#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
391 613
392/* Returns pointer to dma->used_len */ 614/* The Guest has given us the address of a "struct lguest_dma". We check it's
615 * OK and convert it to an iovec (which is a simple array of ptr/size
616 * pairs). */
393static u32 *dma2iov(unsigned long dma, struct iovec iov[], unsigned *num) 617static u32 *dma2iov(unsigned long dma, struct iovec iov[], unsigned *num)
394{ 618{
395 unsigned int i; 619 unsigned int i;
396 struct lguest_dma *udma; 620 struct lguest_dma *udma;
397 621
622 /* First we make sure that the array memory itself is valid. */
398 udma = check_pointer(dma, sizeof(*udma)); 623 udma = check_pointer(dma, sizeof(*udma));
624 /* Now we check each element */
399 for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { 625 for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) {
626 /* A zero length ends the array. */
400 if (!udma->len[i]) 627 if (!udma->len[i])
401 break; 628 break;
402 629
@@ -404,9 +631,15 @@ static u32 *dma2iov(unsigned long dma, struct iovec iov[], unsigned *num)
404 iov[i].iov_len = udma->len[i]; 631 iov[i].iov_len = udma->len[i];
405 } 632 }
406 *num = i; 633 *num = i;
634
635 /* We return the pointer to where the caller should write the amount of
636 * the buffer used. */
407 return &udma->used_len; 637 return &udma->used_len;
408} 638}
409 639
640/* This routine gets a DMA buffer from the Guest for a given key, and converts
641 * it to an iovec array. It returns the interrupt the Guest wants when we're
642 * finished, and a pointer to the "used_len" field to fill in. */
410static u32 *get_dma_buffer(int fd, void *key, 643static u32 *get_dma_buffer(int fd, void *key,
411 struct iovec iov[], unsigned int *num, u32 *irq) 644 struct iovec iov[], unsigned int *num, u32 *irq)
412{ 645{
@@ -414,16 +647,21 @@ static u32 *get_dma_buffer(int fd, void *key,
414 unsigned long udma; 647 unsigned long udma;
415 u32 *res; 648 u32 *res;
416 649
650 /* Ask the kernel for a DMA buffer corresponding to this key. */
417 udma = write(fd, buf, sizeof(buf)); 651 udma = write(fd, buf, sizeof(buf));
652 /* They haven't registered any, or they're all used? */
418 if (udma == (unsigned long)-1) 653 if (udma == (unsigned long)-1)
419 return NULL; 654 return NULL;
420 655
421 /* Kernel stashes irq in ->used_len. */ 656 /* Convert it into our iovec array */
422 res = dma2iov(udma, iov, num); 657 res = dma2iov(udma, iov, num);
658 /* The kernel stashes irq in ->used_len to get it out to us. */
423 *irq = *res; 659 *irq = *res;
660 /* Return a pointer to ((struct lguest_dma *)udma)->used_len. */
424 return res; 661 return res;
425} 662}
426 663
664/* This is a convenient routine to send the Guest an interrupt. */
427static void trigger_irq(int fd, u32 irq) 665static void trigger_irq(int fd, u32 irq)
428{ 666{
429 u32 buf[] = { LHREQ_IRQ, irq }; 667 u32 buf[] = { LHREQ_IRQ, irq };
@@ -431,6 +669,10 @@ static void trigger_irq(int fd, u32 irq)
431 err(1, "Triggering irq %i", irq); 669 err(1, "Triggering irq %i", irq);
432} 670}
433 671
672/* This simply sets up an iovec array where we can put data to be discarded.
673 * This happens when the Guest doesn't want or can't handle the input: we have
674 * to get rid of it somewhere, and if we bury it in the ceiling space it will
675 * start to smell after a week. */
434static void discard_iovec(struct iovec *iov, unsigned int *num) 676static void discard_iovec(struct iovec *iov, unsigned int *num)
435{ 677{
436 static char discard_buf[1024]; 678 static char discard_buf[1024];
@@ -439,19 +681,24 @@ static void discard_iovec(struct iovec *iov, unsigned int *num)
439 iov->iov_len = sizeof(discard_buf); 681 iov->iov_len = sizeof(discard_buf);
440} 682}
441 683
684/* Here is the input terminal setting we save, and the routine to restore them
685 * on exit so the user can see what they type next. */
442static struct termios orig_term; 686static struct termios orig_term;
443static void restore_term(void) 687static void restore_term(void)
444{ 688{
445 tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); 689 tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
446} 690}
447 691
692/* We associate some data with the console for our exit hack. */
448struct console_abort 693struct console_abort
449{ 694{
695 /* How many times have they hit ^C? */
450 int count; 696 int count;
697 /* When did they start? */
451 struct timeval start; 698 struct timeval start;
452}; 699};
453 700
454/* We DMA input to buffer bound at start of console page. */ 701/* This is the routine which handles console input (ie. stdin). */
455static bool handle_console_input(int fd, struct device *dev) 702static bool handle_console_input(int fd, struct device *dev)
456{ 703{
457 u32 irq = 0, *lenp; 704 u32 irq = 0, *lenp;
@@ -460,24 +707,38 @@ static bool handle_console_input(int fd, struct device *dev)
460 struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; 707 struct iovec iov[LGUEST_MAX_DMA_SECTIONS];
461 struct console_abort *abort = dev->priv; 708 struct console_abort *abort = dev->priv;
462 709
710 /* First we get the console buffer from the Guest. The key is dev->mem
711 * which was set to 0 in setup_console(). */
463 lenp = get_dma_buffer(fd, dev->mem, iov, &num, &irq); 712 lenp = get_dma_buffer(fd, dev->mem, iov, &num, &irq);
464 if (!lenp) { 713 if (!lenp) {
714 /* If it's not ready for input, warn and set up to discard. */
465 warn("console: no dma buffer!"); 715 warn("console: no dma buffer!");
466 discard_iovec(iov, &num); 716 discard_iovec(iov, &num);
467 } 717 }
468 718
719 /* This is why we convert to iovecs: the readv() call uses them, and so
720 * it reads straight into the Guest's buffer. */
469 len = readv(dev->fd, iov, num); 721 len = readv(dev->fd, iov, num);
470 if (len <= 0) { 722 if (len <= 0) {
723 /* This implies that the console is closed, is /dev/null, or
724 * something went terribly wrong. We still go through the rest
725 * of the logic, though, especially the exit handling below. */
471 warnx("Failed to get console input, ignoring console."); 726 warnx("Failed to get console input, ignoring console.");
472 len = 0; 727 len = 0;
473 } 728 }
474 729
730 /* If we read the data into the Guest, fill in the length and send the
731 * interrupt. */
475 if (lenp) { 732 if (lenp) {
476 *lenp = len; 733 *lenp = len;
477 trigger_irq(fd, irq); 734 trigger_irq(fd, irq);
478 } 735 }
479 736
480 /* Three ^C within one second? Exit. */ 737 /* Three ^C within one second? Exit.
738 *
739 * This is such a hack, but works surprisingly well. Each ^C has to be
740 * in a buffer by itself, so they can't be too fast. But we check that
741 * we get three within about a second, so they can't be too slow. */
481 if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) { 742 if (len == 1 && ((char *)iov[0].iov_base)[0] == 3) {
482 if (!abort->count++) 743 if (!abort->count++)
483 gettimeofday(&abort->start, NULL); 744 gettimeofday(&abort->start, NULL);
@@ -485,43 +746,60 @@ static bool handle_console_input(int fd, struct device *dev)
485 struct timeval now; 746 struct timeval now;
486 gettimeofday(&now, NULL); 747 gettimeofday(&now, NULL);
487 if (now.tv_sec <= abort->start.tv_sec+1) { 748 if (now.tv_sec <= abort->start.tv_sec+1) {
488 /* Make sure waker is not blocked in BREAK */
489 u32 args[] = { LHREQ_BREAK, 0 }; 749 u32 args[] = { LHREQ_BREAK, 0 };
750 /* Close the fd so Waker will know it has to
751 * exit. */
490 close(waker_fd); 752 close(waker_fd);
753 /* Just in case waker is blocked in BREAK, send
754 * unbreak now. */
491 write(fd, args, sizeof(args)); 755 write(fd, args, sizeof(args));
492 exit(2); 756 exit(2);
493 } 757 }
494 abort->count = 0; 758 abort->count = 0;
495 } 759 }
496 } else 760 } else
761 /* Any other key resets the abort counter. */
497 abort->count = 0; 762 abort->count = 0;
498 763
764 /* Now, if we didn't read anything, put the input terminal back and
765 * return failure (meaning, don't call us again). */
499 if (!len) { 766 if (!len) {
500 restore_term(); 767 restore_term();
501 return false; 768 return false;
502 } 769 }
770 /* Everything went OK! */
503 return true; 771 return true;
504} 772}
505 773
774/* Handling console output is much simpler than input. */
506static u32 handle_console_output(int fd, const struct iovec *iov, 775static u32 handle_console_output(int fd, const struct iovec *iov,
507 unsigned num, struct device*dev) 776 unsigned num, struct device*dev)
508{ 777{
778 /* Whatever the Guest sends, write it to standard output. Return the
779 * number of bytes written. */
509 return writev(STDOUT_FILENO, iov, num); 780 return writev(STDOUT_FILENO, iov, num);
510} 781}
511 782
783/* Guest->Host network output is also pretty easy. */
512static u32 handle_tun_output(int fd, const struct iovec *iov, 784static u32 handle_tun_output(int fd, const struct iovec *iov,
513 unsigned num, struct device *dev) 785 unsigned num, struct device *dev)
514{ 786{
515 /* Now we've seen output, we should warn if we can't get buffers. */ 787 /* We put a flag in the "priv" pointer of the network device, and set
788 * it as soon as we see output. We'll see why in handle_tun_input() */
516 *(bool *)dev->priv = true; 789 *(bool *)dev->priv = true;
790 /* Whatever packet the Guest sent us, write it out to the tun
791 * device. */
517 return writev(dev->fd, iov, num); 792 return writev(dev->fd, iov, num);
518} 793}
519 794
795/* This matches the peer_key() in lguest_net.c. The key for any given slot
796 * is the address of the network device's page plus 4 * the slot number. */
520static unsigned long peer_offset(unsigned int peernum) 797static unsigned long peer_offset(unsigned int peernum)
521{ 798{
522 return 4 * peernum; 799 return 4 * peernum;
523} 800}
524 801
802/* This is where we handle a packet coming in from the tun device */
525static bool handle_tun_input(int fd, struct device *dev) 803static bool handle_tun_input(int fd, struct device *dev)
526{ 804{
527 u32 irq = 0, *lenp; 805 u32 irq = 0, *lenp;
@@ -529,17 +807,28 @@ static bool handle_tun_input(int fd, struct device *dev)
529 unsigned num; 807 unsigned num;
530 struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; 808 struct iovec iov[LGUEST_MAX_DMA_SECTIONS];
531 809
810 /* First we get a buffer the Guest has bound to its key. */
532 lenp = get_dma_buffer(fd, dev->mem+peer_offset(NET_PEERNUM), iov, &num, 811 lenp = get_dma_buffer(fd, dev->mem+peer_offset(NET_PEERNUM), iov, &num,
533 &irq); 812 &irq);
534 if (!lenp) { 813 if (!lenp) {
814 /* Now, it's expected that if we try to send a packet too
815 * early, the Guest won't be ready yet. This is why we set a
816 * flag when the Guest sends its first packet. If it's sent a
817 * packet we assume it should be ready to receive them.
818 *
819 * Actually, this is what the status bits in the descriptor are
820 * for: we should *use* them. FIXME! */
535 if (*(bool *)dev->priv) 821 if (*(bool *)dev->priv)
536 warn("network: no dma buffer!"); 822 warn("network: no dma buffer!");
537 discard_iovec(iov, &num); 823 discard_iovec(iov, &num);
538 } 824 }
539 825
826 /* Read the packet from the device directly into the Guest's buffer. */
540 len = readv(dev->fd, iov, num); 827 len = readv(dev->fd, iov, num);
541 if (len <= 0) 828 if (len <= 0)
542 err(1, "reading network"); 829 err(1, "reading network");
830
831 /* Write the used_len, and trigger the interrupt for the Guest */
543 if (lenp) { 832 if (lenp) {
544 *lenp = len; 833 *lenp = len;
545 trigger_irq(fd, irq); 834 trigger_irq(fd, irq);
@@ -547,9 +836,13 @@ static bool handle_tun_input(int fd, struct device *dev)
547 verbose("tun input packet len %i [%02x %02x] (%s)\n", len, 836 verbose("tun input packet len %i [%02x %02x] (%s)\n", len,
548 ((u8 *)iov[0].iov_base)[0], ((u8 *)iov[0].iov_base)[1], 837 ((u8 *)iov[0].iov_base)[0], ((u8 *)iov[0].iov_base)[1],
549 lenp ? "sent" : "discarded"); 838 lenp ? "sent" : "discarded");
839 /* All good. */
550 return true; 840 return true;
551} 841}
552 842
843/* The last device handling routine is block output: the Guest has sent a DMA
844 * to the block device. It will have placed the command it wants in the
845 * "struct lguest_block_page". */
553static u32 handle_block_output(int fd, const struct iovec *iov, 846static u32 handle_block_output(int fd, const struct iovec *iov,
554 unsigned num, struct device *dev) 847 unsigned num, struct device *dev)
555{ 848{
@@ -559,36 +852,64 @@ static u32 handle_block_output(int fd, const struct iovec *iov,
559 struct iovec reply[LGUEST_MAX_DMA_SECTIONS]; 852 struct iovec reply[LGUEST_MAX_DMA_SECTIONS];
560 off64_t device_len, off = (off64_t)p->sector * 512; 853 off64_t device_len, off = (off64_t)p->sector * 512;
561 854
855 /* First we extract the device length from the dev->priv pointer. */
562 device_len = *(off64_t *)dev->priv; 856 device_len = *(off64_t *)dev->priv;
563 857
858 /* We first check that the read or write is within the length of the
859 * block file. */
564 if (off >= device_len) 860 if (off >= device_len)
565 err(1, "Bad offset %llu vs %llu", off, device_len); 861 err(1, "Bad offset %llu vs %llu", off, device_len);
862 /* Move to the right location in the block file. This shouldn't fail,
863 * but best to check. */
566 if (lseek64(dev->fd, off, SEEK_SET) != off) 864 if (lseek64(dev->fd, off, SEEK_SET) != off)
567 err(1, "Bad seek to sector %i", p->sector); 865 err(1, "Bad seek to sector %i", p->sector);
568 866
569 verbose("Block: %s at offset %llu\n", p->type ? "WRITE" : "READ", off); 867 verbose("Block: %s at offset %llu\n", p->type ? "WRITE" : "READ", off);
570 868
869 /* They were supposed to bind a reply buffer at key equal to the start
870 * of the block device memory. We need this to tell them when the
871 * request is finished. */
571 lenp = get_dma_buffer(fd, dev->mem, reply, &reply_num, &irq); 872 lenp = get_dma_buffer(fd, dev->mem, reply, &reply_num, &irq);
572 if (!lenp) 873 if (!lenp)
573 err(1, "Block request didn't give us a dma buffer"); 874 err(1, "Block request didn't give us a dma buffer");
574 875
575 if (p->type) { 876 if (p->type) {
877 /* A write request. The DMA they sent contained the data, so
878 * write it out. */
576 len = writev(dev->fd, iov, num); 879 len = writev(dev->fd, iov, num);
880 /* Grr... Now we know how long the "struct lguest_dma" they
881 * sent was, we make sure they didn't try to write over the end
882 * of the block file (possibly extending it). */
577 if (off + len > device_len) { 883 if (off + len > device_len) {
884 /* Trim it back to the correct length */
578 ftruncate(dev->fd, device_len); 885 ftruncate(dev->fd, device_len);
886 /* Die, bad Guest, die. */
579 errx(1, "Write past end %llu+%u", off, len); 887 errx(1, "Write past end %llu+%u", off, len);
580 } 888 }
889 /* The reply length is 0: we just send back an empty DMA to
890 * interrupt them and tell them the write is finished. */
581 *lenp = 0; 891 *lenp = 0;
582 } else { 892 } else {
893 /* A read request. They sent an empty DMA to start the
894 * request, and we put the read contents into the reply
895 * buffer. */
583 len = readv(dev->fd, reply, reply_num); 896 len = readv(dev->fd, reply, reply_num);
584 *lenp = len; 897 *lenp = len;
585 } 898 }
586 899
900 /* The result is 1 (done), 2 if there was an error (short read or
901 * write). */
587 p->result = 1 + (p->bytes != len); 902 p->result = 1 + (p->bytes != len);
903 /* Now tell them we've used their reply buffer. */
588 trigger_irq(fd, irq); 904 trigger_irq(fd, irq);
905
906 /* We're supposed to return the number of bytes of the output buffer we
907 * used. But the block device uses the "result" field instead, so we
908 * don't bother. */
589 return 0; 909 return 0;
590} 910}
591 911
912/* This is the generic routine we call when the Guest sends some DMA out. */
592static void handle_output(int fd, unsigned long dma, unsigned long key, 913static void handle_output(int fd, unsigned long dma, unsigned long key,
593 struct device_list *devices) 914 struct device_list *devices)
594{ 915{
@@ -597,30 +918,53 @@ static void handle_output(int fd, unsigned long dma, unsigned long key,
597 struct iovec iov[LGUEST_MAX_DMA_SECTIONS]; 918 struct iovec iov[LGUEST_MAX_DMA_SECTIONS];
598 unsigned num = 0; 919 unsigned num = 0;
599 920
921 /* Convert the "struct lguest_dma" they're sending to a "struct
922 * iovec". */
600 lenp = dma2iov(dma, iov, &num); 923 lenp = dma2iov(dma, iov, &num);
924
925 /* Check each device: if they expect output to this key, tell them to
926 * handle it. */
601 for (i = devices->dev; i; i = i->next) { 927 for (i = devices->dev; i; i = i->next) {
602 if (i->handle_output && key == i->watch_key) { 928 if (i->handle_output && key == i->watch_key) {
929 /* We write the result straight into the used_len field
930 * for them. */
603 *lenp = i->handle_output(fd, iov, num, i); 931 *lenp = i->handle_output(fd, iov, num, i);
604 return; 932 return;
605 } 933 }
606 } 934 }
935
936 /* This can happen: the kernel sends any SEND_DMA which doesn't match
937 * another Guest to us. It could be that another Guest just left a
938 * network, for example. But it's unusual. */
607 warnx("Pending dma %p, key %p", (void *)dma, (void *)key); 939 warnx("Pending dma %p, key %p", (void *)dma, (void *)key);
608} 940}
609 941
942/* This is called when the waker wakes us up: check for incoming file
943 * descriptors. */
610static void handle_input(int fd, struct device_list *devices) 944static void handle_input(int fd, struct device_list *devices)
611{ 945{
946 /* select() wants a zeroed timeval to mean "don't wait". */
612 struct timeval poll = { .tv_sec = 0, .tv_usec = 0 }; 947 struct timeval poll = { .tv_sec = 0, .tv_usec = 0 };
613 948
614 for (;;) { 949 for (;;) {
615 struct device *i; 950 struct device *i;
616 fd_set fds = devices->infds; 951 fd_set fds = devices->infds;
617 952
953 /* If nothing is ready, we're done. */
618 if (select(devices->max_infd+1, &fds, NULL, NULL, &poll) == 0) 954 if (select(devices->max_infd+1, &fds, NULL, NULL, &poll) == 0)
619 break; 955 break;
620 956
957 /* Otherwise, call the device(s) which have readable
958 * file descriptors and a method of handling them. */
621 for (i = devices->dev; i; i = i->next) { 959 for (i = devices->dev; i; i = i->next) {
622 if (i->handle_input && FD_ISSET(i->fd, &fds)) { 960 if (i->handle_input && FD_ISSET(i->fd, &fds)) {
961 /* If handle_input() returns false, it means we
962 * should no longer service it.
963 * handle_console_input() does this. */
623 if (!i->handle_input(fd, i)) { 964 if (!i->handle_input(fd, i)) {
965 /* Clear it from the set of input file
966 * descriptors kept at the head of the
967 * device list. */
624 FD_CLR(i->fd, &devices->infds); 968 FD_CLR(i->fd, &devices->infds);
625 /* Tell waker to ignore it too... */ 969 /* Tell waker to ignore it too... */
626 write(waker_fd, &i->fd, sizeof(i->fd)); 970 write(waker_fd, &i->fd, sizeof(i->fd));
@@ -630,6 +974,15 @@ static void handle_input(int fd, struct device_list *devices)
630 } 974 }
631} 975}
632 976
977/*L:190
978 * Device Setup
979 *
980 * All devices need a descriptor so the Guest knows it exists, and a "struct
981 * device" so the Launcher can keep track of it. We have common helper
982 * routines to allocate them.
983 *
984 * This routine allocates a new "struct lguest_device_desc" from descriptor
985 * table in the devices array just above the Guest's normal memory. */
633static struct lguest_device_desc * 986static struct lguest_device_desc *
634new_dev_desc(struct lguest_device_desc *descs, 987new_dev_desc(struct lguest_device_desc *descs,
635 u16 type, u16 features, u16 num_pages) 988 u16 type, u16 features, u16 num_pages)
@@ -641,6 +994,8 @@ new_dev_desc(struct lguest_device_desc *descs,
641 descs[i].type = type; 994 descs[i].type = type;
642 descs[i].features = features; 995 descs[i].features = features;
643 descs[i].num_pages = num_pages; 996 descs[i].num_pages = num_pages;
997 /* If they said the device needs memory, we allocate
998 * that now, bumping up the top of Guest memory. */
644 if (num_pages) { 999 if (num_pages) {
645 map_zeroed_pages(top, num_pages); 1000 map_zeroed_pages(top, num_pages);
646 descs[i].pfn = top/getpagesize(); 1001 descs[i].pfn = top/getpagesize();
@@ -652,6 +1007,9 @@ new_dev_desc(struct lguest_device_desc *descs,
652 errx(1, "too many devices"); 1007 errx(1, "too many devices");
653} 1008}
654 1009
1010/* This monster routine does all the creation and setup of a new device,
1011 * including caling new_dev_desc() to allocate the descriptor and device
1012 * memory. */
655static struct device *new_device(struct device_list *devices, 1013static struct device *new_device(struct device_list *devices,
656 u16 type, u16 num_pages, u16 features, 1014 u16 type, u16 num_pages, u16 features,
657 int fd, 1015 int fd,
@@ -664,12 +1022,18 @@ static struct device *new_device(struct device_list *devices,
664{ 1022{
665 struct device *dev = malloc(sizeof(*dev)); 1023 struct device *dev = malloc(sizeof(*dev));
666 1024
667 /* Append to device list. */ 1025 /* Append to device list. Prepending to a single-linked list is
1026 * easier, but the user expects the devices to be arranged on the bus
1027 * in command-line order. The first network device on the command line
1028 * is eth0, the first block device /dev/lgba, etc. */
668 *devices->lastdev = dev; 1029 *devices->lastdev = dev;
669 dev->next = NULL; 1030 dev->next = NULL;
670 devices->lastdev = &dev->next; 1031 devices->lastdev = &dev->next;
671 1032
1033 /* Now we populate the fields one at a time. */
672 dev->fd = fd; 1034 dev->fd = fd;
1035 /* If we have an input handler for this file descriptor, then we add it
1036 * to the device_list's fdset and maxfd. */
673 if (handle_input) 1037 if (handle_input)
674 set_fd(dev->fd, devices); 1038 set_fd(dev->fd, devices);
675 dev->desc = new_dev_desc(devices->descs, type, features, num_pages); 1039 dev->desc = new_dev_desc(devices->descs, type, features, num_pages);
@@ -680,27 +1044,37 @@ static struct device *new_device(struct device_list *devices,
680 return dev; 1044 return dev;
681} 1045}
682 1046
1047/* Our first setup routine is the console. It's a fairly simple device, but
1048 * UNIX tty handling makes it uglier than it could be. */
683static void setup_console(struct device_list *devices) 1049static void setup_console(struct device_list *devices)
684{ 1050{
685 struct device *dev; 1051 struct device *dev;
686 1052
1053 /* If we can save the initial standard input settings... */
687 if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { 1054 if (tcgetattr(STDIN_FILENO, &orig_term) == 0) {
688 struct termios term = orig_term; 1055 struct termios term = orig_term;
1056 /* Then we turn off echo, line buffering and ^C etc. We want a
1057 * raw input stream to the Guest. */
689 term.c_lflag &= ~(ISIG|ICANON|ECHO); 1058 term.c_lflag &= ~(ISIG|ICANON|ECHO);
690 tcsetattr(STDIN_FILENO, TCSANOW, &term); 1059 tcsetattr(STDIN_FILENO, TCSANOW, &term);
1060 /* If we exit gracefully, the original settings will be
1061 * restored so the user can see what they're typing. */
691 atexit(restore_term); 1062 atexit(restore_term);
692 } 1063 }
693 1064
694 /* We don't currently require a page for the console. */ 1065 /* We don't currently require any memory for the console, so we ask for
1066 * 0 pages. */
695 dev = new_device(devices, LGUEST_DEVICE_T_CONSOLE, 0, 0, 1067 dev = new_device(devices, LGUEST_DEVICE_T_CONSOLE, 0, 0,
696 STDIN_FILENO, handle_console_input, 1068 STDIN_FILENO, handle_console_input,
697 LGUEST_CONSOLE_DMA_KEY, handle_console_output); 1069 LGUEST_CONSOLE_DMA_KEY, handle_console_output);
1070 /* We store the console state in dev->priv, and initialize it. */
698 dev->priv = malloc(sizeof(struct console_abort)); 1071 dev->priv = malloc(sizeof(struct console_abort));
699 ((struct console_abort *)dev->priv)->count = 0; 1072 ((struct console_abort *)dev->priv)->count = 0;
700 verbose("device %p: console\n", 1073 verbose("device %p: console\n",
701 (void *)(dev->desc->pfn * getpagesize())); 1074 (void *)(dev->desc->pfn * getpagesize()));
702} 1075}
703 1076
1077/* Setting up a block file is also fairly straightforward. */
704static void setup_block_file(const char *filename, struct device_list *devices) 1078static void setup_block_file(const char *filename, struct device_list *devices)
705{ 1079{
706 int fd; 1080 int fd;
@@ -708,20 +1082,47 @@ static void setup_block_file(const char *filename, struct device_list *devices)
708 off64_t *device_len; 1082 off64_t *device_len;
709 struct lguest_block_page *p; 1083 struct lguest_block_page *p;
710 1084
1085 /* We open with O_LARGEFILE because otherwise we get stuck at 2G. We
1086 * open with O_DIRECT because otherwise our benchmarks go much too
1087 * fast. */
711 fd = open_or_die(filename, O_RDWR|O_LARGEFILE|O_DIRECT); 1088 fd = open_or_die(filename, O_RDWR|O_LARGEFILE|O_DIRECT);
1089
1090 /* We want one page, and have no input handler (the block file never
1091 * has anything interesting to say to us). Our timing will be quite
1092 * random, so it should be a reasonable randomness source. */
712 dev = new_device(devices, LGUEST_DEVICE_T_BLOCK, 1, 1093 dev = new_device(devices, LGUEST_DEVICE_T_BLOCK, 1,
713 LGUEST_DEVICE_F_RANDOMNESS, 1094 LGUEST_DEVICE_F_RANDOMNESS,
714 fd, NULL, 0, handle_block_output); 1095 fd, NULL, 0, handle_block_output);
1096
1097 /* We store the device size in the private area */
715 device_len = dev->priv = malloc(sizeof(*device_len)); 1098 device_len = dev->priv = malloc(sizeof(*device_len));
1099 /* This is the safe way of establishing the size of our device: it
1100 * might be a normal file or an actual block device like /dev/hdb. */
716 *device_len = lseek64(fd, 0, SEEK_END); 1101 *device_len = lseek64(fd, 0, SEEK_END);
717 p = dev->mem;
718 1102
1103 /* The device memory is a "struct lguest_block_page". It's zeroed
1104 * already, we just need to put in the device size. Block devices
1105 * think in sectors (ie. 512 byte chunks), so we translate here. */
1106 p = dev->mem;
719 p->num_sectors = *device_len/512; 1107 p->num_sectors = *device_len/512;
720 verbose("device %p: block %i sectors\n", 1108 verbose("device %p: block %i sectors\n",
721 (void *)(dev->desc->pfn * getpagesize()), p->num_sectors); 1109 (void *)(dev->desc->pfn * getpagesize()), p->num_sectors);
722} 1110}
723 1111
724/* We use fnctl locks to reserve network slots (autocleanup!) */ 1112/*
1113 * Network Devices.
1114 *
1115 * Setting up network devices is quite a pain, because we have three types.
1116 * First, we have the inter-Guest network. This is a file which is mapped into
1117 * the address space of the Guests who are on the network. Because it is a
1118 * shared mapping, the same page underlies all the devices, and they can send
1119 * DMA to each other.
1120 *
1121 * Remember from our network driver, the Guest is told what slot in the page it
1122 * is to use. We use exclusive fnctl locks to reserve a slot. If another
1123 * Guest is using a slot, the lock will fail and we try another. Because fnctl
1124 * locks are cleaned up automatically when we die, this cleverly means that our
1125 * reservation on the slot will vanish if we crash. */
725static unsigned int find_slot(int netfd, const char *filename) 1126static unsigned int find_slot(int netfd, const char *filename)
726{ 1127{
727 struct flock fl; 1128 struct flock fl;
@@ -729,26 +1130,33 @@ static unsigned int find_slot(int netfd, const char *filename)
729 fl.l_type = F_WRLCK; 1130 fl.l_type = F_WRLCK;
730 fl.l_whence = SEEK_SET; 1131 fl.l_whence = SEEK_SET;
731 fl.l_len = 1; 1132 fl.l_len = 1;
1133 /* Try a 1 byte lock in each possible position number */
732 for (fl.l_start = 0; 1134 for (fl.l_start = 0;
733 fl.l_start < getpagesize()/sizeof(struct lguest_net); 1135 fl.l_start < getpagesize()/sizeof(struct lguest_net);
734 fl.l_start++) { 1136 fl.l_start++) {
1137 /* If we succeed, return the slot number. */
735 if (fcntl(netfd, F_SETLK, &fl) == 0) 1138 if (fcntl(netfd, F_SETLK, &fl) == 0)
736 return fl.l_start; 1139 return fl.l_start;
737 } 1140 }
738 errx(1, "No free slots in network file %s", filename); 1141 errx(1, "No free slots in network file %s", filename);
739} 1142}
740 1143
1144/* This function sets up the network file */
741static void setup_net_file(const char *filename, 1145static void setup_net_file(const char *filename,
742 struct device_list *devices) 1146 struct device_list *devices)
743{ 1147{
744 int netfd; 1148 int netfd;
745 struct device *dev; 1149 struct device *dev;
746 1150
1151 /* We don't use open_or_die() here: for friendliness we create the file
1152 * if it doesn't already exist. */
747 netfd = open(filename, O_RDWR, 0); 1153 netfd = open(filename, O_RDWR, 0);
748 if (netfd < 0) { 1154 if (netfd < 0) {
749 if (errno == ENOENT) { 1155 if (errno == ENOENT) {
750 netfd = open(filename, O_RDWR|O_CREAT, 0600); 1156 netfd = open(filename, O_RDWR|O_CREAT, 0600);
751 if (netfd >= 0) { 1157 if (netfd >= 0) {
1158 /* If we succeeded, initialize the file with a
1159 * blank page. */
752 char page[getpagesize()]; 1160 char page[getpagesize()];
753 memset(page, 0, sizeof(page)); 1161 memset(page, 0, sizeof(page));
754 write(netfd, page, sizeof(page)); 1162 write(netfd, page, sizeof(page));
@@ -758,11 +1166,15 @@ static void setup_net_file(const char *filename,
758 err(1, "cannot open net file '%s'", filename); 1166 err(1, "cannot open net file '%s'", filename);
759 } 1167 }
760 1168
1169 /* We need 1 page, and the features indicate the slot to use and that
1170 * no checksum is needed. We never touch this device again; it's
1171 * between the Guests on the network, so we don't register input or
1172 * output handlers. */
761 dev = new_device(devices, LGUEST_DEVICE_T_NET, 1, 1173 dev = new_device(devices, LGUEST_DEVICE_T_NET, 1,
762 find_slot(netfd, filename)|LGUEST_NET_F_NOCSUM, 1174 find_slot(netfd, filename)|LGUEST_NET_F_NOCSUM,
763 -1, NULL, 0, NULL); 1175 -1, NULL, 0, NULL);
764 1176
765 /* We overwrite the /dev/zero mapping with the actual file. */ 1177 /* Map the shared file. */
766 if (mmap(dev->mem, getpagesize(), PROT_READ|PROT_WRITE, 1178 if (mmap(dev->mem, getpagesize(), PROT_READ|PROT_WRITE,
767 MAP_FIXED|MAP_SHARED, netfd, 0) != dev->mem) 1179 MAP_FIXED|MAP_SHARED, netfd, 0) != dev->mem)
768 err(1, "could not mmap '%s'", filename); 1180 err(1, "could not mmap '%s'", filename);
@@ -770,6 +1182,7 @@ static void setup_net_file(const char *filename,
770 (void *)(dev->desc->pfn * getpagesize()), filename, 1182 (void *)(dev->desc->pfn * getpagesize()), filename,
771 dev->desc->features & ~LGUEST_NET_F_NOCSUM); 1183 dev->desc->features & ~LGUEST_NET_F_NOCSUM);
772} 1184}
1185/*:*/
773 1186
774static u32 str2ip(const char *ipaddr) 1187static u32 str2ip(const char *ipaddr)
775{ 1188{
@@ -779,7 +1192,11 @@ static u32 str2ip(const char *ipaddr)
779 return (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3]; 1192 return (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3];
780} 1193}
781 1194
782/* adapted from libbridge */ 1195/* This code is "adapted" from libbridge: it attaches the Host end of the
1196 * network device to the bridge device specified by the command line.
1197 *
1198 * This is yet another James Morris contribution (I'm an IP-level guy, so I
1199 * dislike bridging), and I just try not to break it. */
783static void add_to_bridge(int fd, const char *if_name, const char *br_name) 1200static void add_to_bridge(int fd, const char *if_name, const char *br_name)
784{ 1201{
785 int ifidx; 1202 int ifidx;
@@ -798,12 +1215,16 @@ static void add_to_bridge(int fd, const char *if_name, const char *br_name)
798 err(1, "can't add %s to bridge %s", if_name, br_name); 1215 err(1, "can't add %s to bridge %s", if_name, br_name);
799} 1216}
800 1217
1218/* This sets up the Host end of the network device with an IP address, brings
1219 * it up so packets will flow, the copies the MAC address into the hwaddr
1220 * pointer (in practice, the Host's slot in the network device's memory). */
801static void configure_device(int fd, const char *devname, u32 ipaddr, 1221static void configure_device(int fd, const char *devname, u32 ipaddr,
802 unsigned char hwaddr[6]) 1222 unsigned char hwaddr[6])
803{ 1223{
804 struct ifreq ifr; 1224 struct ifreq ifr;
805 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; 1225 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
806 1226
1227 /* Don't read these incantations. Just cut & paste them like I did! */
807 memset(&ifr, 0, sizeof(ifr)); 1228 memset(&ifr, 0, sizeof(ifr));
808 strcpy(ifr.ifr_name, devname); 1229 strcpy(ifr.ifr_name, devname);
809 sin->sin_family = AF_INET; 1230 sin->sin_family = AF_INET;
@@ -814,12 +1235,19 @@ static void configure_device(int fd, const char *devname, u32 ipaddr,
814 if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) 1235 if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0)
815 err(1, "Bringing interface %s up", devname); 1236 err(1, "Bringing interface %s up", devname);
816 1237
1238 /* SIOC stands for Socket I/O Control. G means Get (vs S for Set
1239 * above). IF means Interface, and HWADDR is hardware address.
1240 * Simple! */
817 if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) 1241 if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
818 err(1, "getting hw address for %s", devname); 1242 err(1, "getting hw address for %s", devname);
819
820 memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6); 1243 memcpy(hwaddr, ifr.ifr_hwaddr.sa_data, 6);
821} 1244}
822 1245
1246/*L:195 The other kind of network is a Host<->Guest network. This can either
1247 * use briding or routing, but the principle is the same: it uses the "tun"
1248 * device to inject packets into the Host as if they came in from a normal
1249 * network card. We just shunt packets between the Guest and the tun
1250 * device. */
823static void setup_tun_net(const char *arg, struct device_list *devices) 1251static void setup_tun_net(const char *arg, struct device_list *devices)
824{ 1252{
825 struct device *dev; 1253 struct device *dev;
@@ -828,36 +1256,56 @@ static void setup_tun_net(const char *arg, struct device_list *devices)
828 u32 ip; 1256 u32 ip;
829 const char *br_name = NULL; 1257 const char *br_name = NULL;
830 1258
1259 /* We open the /dev/net/tun device and tell it we want a tap device. A
1260 * tap device is like a tun device, only somehow different. To tell
1261 * the truth, I completely blundered my way through this code, but it
1262 * works now! */
831 netfd = open_or_die("/dev/net/tun", O_RDWR); 1263 netfd = open_or_die("/dev/net/tun", O_RDWR);
832 memset(&ifr, 0, sizeof(ifr)); 1264 memset(&ifr, 0, sizeof(ifr));
833 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 1265 ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
834 strcpy(ifr.ifr_name, "tap%d"); 1266 strcpy(ifr.ifr_name, "tap%d");
835 if (ioctl(netfd, TUNSETIFF, &ifr) != 0) 1267 if (ioctl(netfd, TUNSETIFF, &ifr) != 0)
836 err(1, "configuring /dev/net/tun"); 1268 err(1, "configuring /dev/net/tun");
1269 /* We don't need checksums calculated for packets coming in this
1270 * device: trust us! */
837 ioctl(netfd, TUNSETNOCSUM, 1); 1271 ioctl(netfd, TUNSETNOCSUM, 1);
838 1272
839 /* You will be peer 1: we should create enough jitter to randomize */ 1273 /* We create the net device with 1 page, using the features field of
1274 * the descriptor to tell the Guest it is in slot 1 (NET_PEERNUM), and
1275 * that the device has fairly random timing. We do *not* specify
1276 * LGUEST_NET_F_NOCSUM: these packets can reach the real world.
1277 *
1278 * We will put our MAC address is slot 0 for the Guest to see, so
1279 * it will send packets to us using the key "peer_offset(0)": */
840 dev = new_device(devices, LGUEST_DEVICE_T_NET, 1, 1280 dev = new_device(devices, LGUEST_DEVICE_T_NET, 1,
841 NET_PEERNUM|LGUEST_DEVICE_F_RANDOMNESS, netfd, 1281 NET_PEERNUM|LGUEST_DEVICE_F_RANDOMNESS, netfd,
842 handle_tun_input, peer_offset(0), handle_tun_output); 1282 handle_tun_input, peer_offset(0), handle_tun_output);
1283
1284 /* We keep a flag which says whether we've seen packets come out from
1285 * this network device. */
843 dev->priv = malloc(sizeof(bool)); 1286 dev->priv = malloc(sizeof(bool));
844 *(bool *)dev->priv = false; 1287 *(bool *)dev->priv = false;
845 1288
1289 /* We need a socket to perform the magic network ioctls to bring up the
1290 * tap interface, connect to the bridge etc. Any socket will do! */
846 ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 1291 ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
847 if (ipfd < 0) 1292 if (ipfd < 0)
848 err(1, "opening IP socket"); 1293 err(1, "opening IP socket");
849 1294
1295 /* If the command line was --tunnet=bridge:<name> do bridging. */
850 if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { 1296 if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) {
851 ip = INADDR_ANY; 1297 ip = INADDR_ANY;
852 br_name = arg + strlen(BRIDGE_PFX); 1298 br_name = arg + strlen(BRIDGE_PFX);
853 add_to_bridge(ipfd, ifr.ifr_name, br_name); 1299 add_to_bridge(ipfd, ifr.ifr_name, br_name);
854 } else 1300 } else /* It is an IP address to set up the device with */
855 ip = str2ip(arg); 1301 ip = str2ip(arg);
856 1302
857 /* We are peer 0, ie. first slot. */ 1303 /* We are peer 0, ie. first slot, so we hand dev->mem to this routine
1304 * to write the MAC address at the start of the device memory. */
858 configure_device(ipfd, ifr.ifr_name, ip, dev->mem); 1305 configure_device(ipfd, ifr.ifr_name, ip, dev->mem);
859 1306
860 /* Set "promisc" bit: we want every single packet. */ 1307 /* Set "promisc" bit: we want every single packet if we're going to
1308 * bridge to other machines (and otherwise it doesn't matter). */
861 *((u8 *)dev->mem) |= 0x1; 1309 *((u8 *)dev->mem) |= 0x1;
862 1310
863 close(ipfd); 1311 close(ipfd);
@@ -868,7 +1316,10 @@ static void setup_tun_net(const char *arg, struct device_list *devices)
868 if (br_name) 1316 if (br_name)
869 verbose("attached to bridge: %s\n", br_name); 1317 verbose("attached to bridge: %s\n", br_name);
870} 1318}
1319/* That's the end of device setup. */
871 1320
1321/*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves
1322 * its input and output, and finally, lays it to rest. */
872static void __attribute__((noreturn)) 1323static void __attribute__((noreturn))
873run_guest(int lguest_fd, struct device_list *device_list) 1324run_guest(int lguest_fd, struct device_list *device_list)
874{ 1325{
@@ -880,20 +1331,37 @@ run_guest(int lguest_fd, struct device_list *device_list)
880 /* We read from the /dev/lguest device to run the Guest. */ 1331 /* We read from the /dev/lguest device to run the Guest. */
881 readval = read(lguest_fd, arr, sizeof(arr)); 1332 readval = read(lguest_fd, arr, sizeof(arr));
882 1333
1334 /* The read can only really return sizeof(arr) (the Guest did a
1335 * SEND_DMA to us), or an error. */
1336
1337 /* For a successful read, arr[0] is the address of the "struct
1338 * lguest_dma", and arr[1] is the key the Guest sent to. */
883 if (readval == sizeof(arr)) { 1339 if (readval == sizeof(arr)) {
884 handle_output(lguest_fd, arr[0], arr[1], device_list); 1340 handle_output(lguest_fd, arr[0], arr[1], device_list);
885 continue; 1341 continue;
1342 /* ENOENT means the Guest died. Reading tells us why. */
886 } else if (errno == ENOENT) { 1343 } else if (errno == ENOENT) {
887 char reason[1024] = { 0 }; 1344 char reason[1024] = { 0 };
888 read(lguest_fd, reason, sizeof(reason)-1); 1345 read(lguest_fd, reason, sizeof(reason)-1);
889 errx(1, "%s", reason); 1346 errx(1, "%s", reason);
1347 /* EAGAIN means the waker wanted us to look at some input.
1348 * Anything else means a bug or incompatible change. */
890 } else if (errno != EAGAIN) 1349 } else if (errno != EAGAIN)
891 err(1, "Running guest failed"); 1350 err(1, "Running guest failed");
1351
1352 /* Service input, then unset the BREAK which releases
1353 * the Waker. */
892 handle_input(lguest_fd, device_list); 1354 handle_input(lguest_fd, device_list);
893 if (write(lguest_fd, args, sizeof(args)) < 0) 1355 if (write(lguest_fd, args, sizeof(args)) < 0)
894 err(1, "Resetting break"); 1356 err(1, "Resetting break");
895 } 1357 }
896} 1358}
1359/*
1360 * This is the end of the Launcher.
1361 *
1362 * But wait! We've seen I/O from the Launcher, and we've seen I/O from the
1363 * Drivers. If we were to see the Host kernel I/O code, our understanding
1364 * would be complete... :*/
897 1365
898static struct option opts[] = { 1366static struct option opts[] = {
899 { "verbose", 0, NULL, 'v' }, 1367 { "verbose", 0, NULL, 'v' },
@@ -911,20 +1379,49 @@ static void usage(void)
911 "<mem-in-mb> vmlinux [args...]"); 1379 "<mem-in-mb> vmlinux [args...]");
912} 1380}
913 1381
1382/*L:100 The Launcher code itself takes us out into userspace, that scary place
1383 * where pointers run wild and free! Unfortunately, like most userspace
1384 * programs, it's quite boring (which is why everyone like to hack on the
1385 * kernel!). Perhaps if you make up an Lguest Drinking Game at this point, it
1386 * will get you through this section. Or, maybe not.
1387 *
1388 * The Launcher binary sits up high, usually starting at address 0xB8000000.
1389 * Everything below this is the "physical" memory for the Guest. For example,
1390 * if the Guest were to write a "1" at physical address 0, we would see a "1"
1391 * in the Launcher at "(int *)0". Guest physical == Launcher virtual.
1392 *
1393 * This can be tough to get your head around, but usually it just means that we
1394 * don't need to do any conversion when the Guest gives us it's "physical"
1395 * addresses.
1396 */
914int main(int argc, char *argv[]) 1397int main(int argc, char *argv[])
915{ 1398{
1399 /* Memory, top-level pagetable, code startpoint, PAGE_OFFSET and size
1400 * of the (optional) initrd. */
916 unsigned long mem = 0, pgdir, start, page_offset, initrd_size = 0; 1401 unsigned long mem = 0, pgdir, start, page_offset, initrd_size = 0;
1402 /* A temporary and the /dev/lguest file descriptor. */
917 int i, c, lguest_fd; 1403 int i, c, lguest_fd;
1404 /* The list of Guest devices, based on command line arguments. */
918 struct device_list device_list; 1405 struct device_list device_list;
1406 /* The boot information for the Guest: at guest-physical address 0. */
919 void *boot = (void *)0; 1407 void *boot = (void *)0;
1408 /* If they specify an initrd file to load. */
920 const char *initrd_name = NULL; 1409 const char *initrd_name = NULL;
921 1410
1411 /* First we initialize the device list. Since console and network
1412 * device receive input from a file descriptor, we keep an fdset
1413 * (infds) and the maximum fd number (max_infd) with the head of the
1414 * list. We also keep a pointer to the last device, for easy appending
1415 * to the list. */
922 device_list.max_infd = -1; 1416 device_list.max_infd = -1;
923 device_list.dev = NULL; 1417 device_list.dev = NULL;
924 device_list.lastdev = &device_list.dev; 1418 device_list.lastdev = &device_list.dev;
925 FD_ZERO(&device_list.infds); 1419 FD_ZERO(&device_list.infds);
926 1420
927 /* We need to know how much memory so we can allocate devices. */ 1421 /* We need to know how much memory so we can set up the device
1422 * descriptor and memory pages for the devices as we parse the command
1423 * line. So we quickly look through the arguments to find the amount
1424 * of memory now. */
928 for (i = 1; i < argc; i++) { 1425 for (i = 1; i < argc; i++) {
929 if (argv[i][0] != '-') { 1426 if (argv[i][0] != '-') {
930 mem = top = atoi(argv[i]) * 1024 * 1024; 1427 mem = top = atoi(argv[i]) * 1024 * 1024;
@@ -933,6 +1430,8 @@ int main(int argc, char *argv[])
933 break; 1430 break;
934 } 1431 }
935 } 1432 }
1433
1434 /* The options are fairly straight-forward */
936 while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) { 1435 while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) {
937 switch (c) { 1436 switch (c) {
938 case 'v': 1437 case 'v':
@@ -955,42 +1454,71 @@ int main(int argc, char *argv[])
955 usage(); 1454 usage();
956 } 1455 }
957 } 1456 }
1457 /* After the other arguments we expect memory and kernel image name,
1458 * followed by command line arguments for the kernel. */
958 if (optind + 2 > argc) 1459 if (optind + 2 > argc)
959 usage(); 1460 usage();
960 1461
961 /* We need a console device */ 1462 /* We always have a console device */
962 setup_console(&device_list); 1463 setup_console(&device_list);
963 1464
964 /* First we map /dev/zero over all of guest-physical memory. */ 1465 /* We start by mapping anonymous pages over all of guest-physical
1466 * memory range. This fills it with 0, and ensures that the Guest
1467 * won't be killed when it tries to access it. */
965 map_zeroed_pages(0, mem / getpagesize()); 1468 map_zeroed_pages(0, mem / getpagesize());
966 1469
967 /* Now we load the kernel */ 1470 /* Now we load the kernel */
968 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY), 1471 start = load_kernel(open_or_die(argv[optind+1], O_RDONLY),
969 &page_offset); 1472 &page_offset);
970 1473
971 /* Map the initrd image if requested */ 1474 /* Map the initrd image if requested (at top of physical memory) */
972 if (initrd_name) { 1475 if (initrd_name) {
973 initrd_size = load_initrd(initrd_name, mem); 1476 initrd_size = load_initrd(initrd_name, mem);
1477 /* These are the location in the Linux boot header where the
1478 * start and size of the initrd are expected to be found. */
974 *(unsigned long *)(boot+0x218) = mem - initrd_size; 1479 *(unsigned long *)(boot+0x218) = mem - initrd_size;
975 *(unsigned long *)(boot+0x21c) = initrd_size; 1480 *(unsigned long *)(boot+0x21c) = initrd_size;
1481 /* The bootloader type 0xFF means "unknown"; that's OK. */
976 *(unsigned char *)(boot+0x210) = 0xFF; 1482 *(unsigned char *)(boot+0x210) = 0xFF;
977 } 1483 }
978 1484
979 /* Set up the initial linar pagetables. */ 1485 /* Set up the initial linear pagetables, starting below the initrd. */
980 pgdir = setup_pagetables(mem, initrd_size, page_offset); 1486 pgdir = setup_pagetables(mem, initrd_size, page_offset);
981 1487
982 /* E820 memory map: ours is a simple, single region. */ 1488 /* The Linux boot header contains an "E820" memory map: ours is a
1489 * simple, single region. */
983 *(char*)(boot+E820NR) = 1; 1490 *(char*)(boot+E820NR) = 1;
984 *((struct e820entry *)(boot+E820MAP)) 1491 *((struct e820entry *)(boot+E820MAP))
985 = ((struct e820entry) { 0, mem, E820_RAM }); 1492 = ((struct e820entry) { 0, mem, E820_RAM });
986 /* Command line pointer and command line (at 4096) */ 1493 /* The boot header contains a command line pointer: we put the command
1494 * line after the boot header (at address 4096) */
987 *(void **)(boot + 0x228) = boot + 4096; 1495 *(void **)(boot + 0x228) = boot + 4096;
988 concat(boot + 4096, argv+optind+2); 1496 concat(boot + 4096, argv+optind+2);
989 /* Paravirt type: 1 == lguest */ 1497
1498 /* The guest type value of "1" tells the Guest it's under lguest. */
990 *(int *)(boot + 0x23c) = 1; 1499 *(int *)(boot + 0x23c) = 1;
991 1500
1501 /* We tell the kernel to initialize the Guest: this returns the open
1502 * /dev/lguest file descriptor. */
992 lguest_fd = tell_kernel(pgdir, start, page_offset); 1503 lguest_fd = tell_kernel(pgdir, start, page_offset);
1504
1505 /* We fork off a child process, which wakes the Launcher whenever one
1506 * of the input file descriptors needs attention. Otherwise we would
1507 * run the Guest until it tries to output something. */
993 waker_fd = setup_waker(lguest_fd, &device_list); 1508 waker_fd = setup_waker(lguest_fd, &device_list);
994 1509
1510 /* Finally, run the Guest. This doesn't return. */
995 run_guest(lguest_fd, &device_list); 1511 run_guest(lguest_fd, &device_list);
996} 1512}
1513/*:*/
1514
1515/*M:999
1516 * Mastery is done: you now know everything I do.
1517 *
1518 * But surely you have seen code, features and bugs in your wanderings which
1519 * you now yearn to attack? That is the real game, and I look forward to you
1520 * patching and forking lguest into the Your-Name-Here-visor.
1521 *
1522 * Farewell, and good coding!
1523 * Rusty Russell.
1524 */
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
new file mode 100644
index 000000000000..5fbcc22c98e9
--- /dev/null
+++ b/Documentation/memory-hotplug.txt
@@ -0,0 +1,322 @@
1==============
2Memory Hotplug
3==============
4
5Last Updated: Jul 28 2007
6
7This document is about memory hotplug including how-to-use and current status.
8Because Memory Hotplug is still under development, contents of this text will
9be changed often.
10
111. Introduction
12 1.1 purpose of memory hotplug
13 1.2. Phases of memory hotplug
14 1.3. Unit of Memory online/offline operation
152. Kernel Configuration
163. sysfs files for memory hotplug
174. Physical memory hot-add phase
18 4.1 Hardware(Firmware) Support
19 4.2 Notify memory hot-add event by hand
205. Logical Memory hot-add phase
21 5.1. State of memory
22 5.2. How to online memory
236. Logical memory remove
24 6.1 Memory offline and ZONE_MOVABLE
25 6.2. How to offline memory
267. Physical memory remove
278. Future Work List
28
29Note(1): x86_64's has special implementation for memory hotplug.
30 This text does not describe it.
31Note(2): This text assumes that sysfs is mounted at /sys.
32
33
34---------------
351. Introduction
36---------------
37
381.1 purpose of memory hotplug
39------------
40Memory Hotplug allows users to increase/decrease the amount of memory.
41Generally, there are two purposes.
42
43(A) For changing the amount of memory.
44 This is to allow a feature like capacity on demand.
45(B) For installing/removing DIMMs or NUMA-nodes physically.
46 This is to exchange DIMMs/NUMA-nodes, reduce power consumption, etc.
47
48(A) is required by highly virtualized environments and (B) is required by
49hardware which supports memory power management.
50
51Linux memory hotplug is designed for both purpose.
52
53
541.2. Phases of memory hotplug
55---------------
56There are 2 phases in Memory Hotplug.
57 1) Physical Memory Hotplug phase
58 2) Logical Memory Hotplug phase.
59
60The First phase is to communicate hardware/firmware and make/erase
61environment for hotplugged memory. Basically, this phase is necessary
62for the purpose (B), but this is good phase for communication between
63highly virtualized environments too.
64
65When memory is hotplugged, the kernel recognizes new memory, makes new memory
66management tables, and makes sysfs files for new memory's operation.
67
68If firmware supports notification of connection of new memory to OS,
69this phase is triggered automatically. ACPI can notify this event. If not,
70"probe" operation by system administration is used instead.
71(see Section 4.).
72
73Logical Memory Hotplug phase is to change memory state into
74avaiable/unavailable for users. Amount of memory from user's view is
75changed by this phase. The kernel makes all memory in it as free pages
76when a memory range is available.
77
78In this document, this phase is described as online/offline.
79
80Logical Memory Hotplug phase is triggred by write of sysfs file by system
81administrator. For the hot-add case, it must be executed after Physical Hotplug
82phase by hand.
83(However, if you writes udev's hotplug scripts for memory hotplug, these
84 phases can be execute in seamless way.)
85
86
871.3. Unit of Memory online/offline operation
88------------
89Memory hotplug uses SPARSEMEM memory model. SPARSEMEM divides the whole memory
90into chunks of the same size. The chunk is called a "section". The size of
91a section is architecture dependent. For example, power uses 16MiB, ia64 uses
921GiB. The unit of online/offline operation is "one section". (see Section 3.)
93
94To determine the size of sections, please read this file:
95
96/sys/devices/system/memory/block_size_bytes
97
98This file shows the size of sections in byte.
99
100-----------------------
1012. Kernel Configuration
102-----------------------
103To use memory hotplug feature, kernel must be compiled with following
104config options.
105
106- For all memory hotplug
107 Memory model -> Sparse Memory (CONFIG_SPARSEMEM)
108 Allow for memory hot-add (CONFIG_MEMORY_HOTPLUG)
109
110- To enable memory removal, the followings are also necessary
111 Allow for memory hot remove (CONFIG_MEMORY_HOTREMOVE)
112 Page Migration (CONFIG_MIGRATION)
113
114- For ACPI memory hotplug, the followings are also necessary
115 Memory hotplug (under ACPI Support menu) (CONFIG_ACPI_HOTPLUG_MEMORY)
116 This option can be kernel module.
117
118- As a related configuration, if your box has a feature of NUMA-node hotplug
119 via ACPI, then this option is necessary too.
120 ACPI0004,PNP0A05 and PNP0A06 Container Driver (under ACPI Support menu)
121 (CONFIG_ACPI_CONTAINER).
122 This option can be kernel module too.
123
124--------------------------------
1253 sysfs files for memory hotplug
126--------------------------------
127All sections have their device information under /sys/devices/system/memory as
128
129/sys/devices/system/memory/memoryXXX
130(XXX is section id.)
131
132Now, XXX is defined as start_address_of_section / section_size.
133
134For example, assume 1GiB section size. A device for a memory starting at
1350x100000000 is /sys/device/system/memory/memory4
136(0x100000000 / 1Gib = 4)
137This device covers address range [0x100000000 ... 0x140000000)
138
139Under each section, you can see 3 files.
140
141/sys/devices/system/memory/memoryXXX/phys_index
142/sys/devices/system/memory/memoryXXX/phys_device
143/sys/devices/system/memory/memoryXXX/state
144
145'phys_index' : read-only and contains section id, same as XXX.
146'state' : read-write
147 at read: contains online/offline state of memory.
148 at write: user can specify "online", "offline" command
149'phys_device': read-only: designed to show the name of physical memory device.
150 This is not well implemented now.
151
152NOTE:
153 These directories/files appear after physical memory hotplug phase.
154
155
156--------------------------------
1574. Physical memory hot-add phase
158--------------------------------
159
1604.1 Hardware(Firmware) Support
161------------
162On x86_64/ia64 platform, memory hotplug by ACPI is supported.
163
164In general, the firmware (ACPI) which supports memory hotplug defines
165memory class object of _HID "PNP0C80". When a notify is asserted to PNP0C80,
166Linux's ACPI handler does hot-add memory to the system and calls a hotplug udev
167script. This will be done automatically.
168
169But scripts for memory hotplug are not contained in generic udev package(now).
170You may have to write it by yourself or online/offline memory by hand.
171Please see "How to online memory", "How to offline memory" in this text.
172
173If firmware supports NUMA-node hotplug, and defines an object _HID "ACPI0004",
174"PNP0A05", or "PNP0A06", notification is asserted to it, and ACPI handler
175calls hotplug code for all of objects which are defined in it.
176If memory device is found, memory hotplug code will be called.
177
178
1794.2 Notify memory hot-add event by hand
180------------
181In some environments, especially virtualized environment, firmware will not
182notify memory hotplug event to the kernel. For such environment, "probe"
183interface is supported. This interface depends on CONFIG_ARCH_MEMORY_PROBE.
184
185Now, CONFIG_ARCH_MEMORY_PROBE is supported only by powerpc but it does not
186contain highly architecture codes. Please add config if you need "probe"
187interface.
188
189Probe interface is located at
190/sys/devices/system/memory/probe
191
192You can tell the physical address of new memory to the kernel by
193
194% echo start_address_of_new_memory > /sys/devices/system/memory/probe
195
196Then, [start_address_of_new_memory, start_address_of_new_memory + section_size)
197memory range is hot-added. In this case, hotplug script is not called (in
198current implementation). You'll have to online memory by yourself.
199Please see "How to online memory" in this text.
200
201
202
203------------------------------
2045. Logical Memory hot-add phase
205------------------------------
206
2075.1. State of memory
208------------
209To see (online/offline) state of memory section, read 'state' file.
210
211% cat /sys/device/system/memory/memoryXXX/state
212
213
214If the memory section is online, you'll read "online".
215If the memory section is offline, you'll read "offline".
216
217
2185.2. How to online memory
219------------
220Even if the memory is hot-added, it is not at ready-to-use state.
221For using newly added memory, you have to "online" the memory section.
222
223For onlining, you have to write "online" to the section's state file as:
224
225% echo online > /sys/devices/system/memory/memoryXXX/state
226
227After this, section memoryXXX's state will be 'online' and the amount of
228available memory will be increased.
229
230Currently, newly added memory is added as ZONE_NORMAL (for powerpc, ZONE_DMA).
231This may be changed in future.
232
233
234
235------------------------
2366. Logical memory remove
237------------------------
238
2396.1 Memory offline and ZONE_MOVABLE
240------------
241Memory offlining is more complicated than memory online. Because memory offline
242has to make the whole memory section be unused, memory offline can fail if
243the section includes memory which cannot be freed.
244
245In general, memory offline can use 2 techniques.
246
247(1) reclaim and free all memory in the section.
248(2) migrate all pages in the section.
249
250In the current implementation, Linux's memory offline uses method (2), freeing
251all pages in the section by page migration. But not all pages are
252migratable. Under current Linux, migratable pages are anonymous pages and
253page caches. For offlining a section by migration, the kernel has to guarantee
254that the section contains only migratable pages.
255
256Now, a boot option for making a section which consists of migratable pages is
257supported. By specifying "kernelcore=" or "movablecore=" boot option, you can
258create ZONE_MOVABLE...a zone which is just used for movable pages.
259(See also Documentation/kernel-parameters.txt)
260
261Assume the system has "TOTAL" amount of memory at boot time, this boot option
262creates ZONE_MOVABLE as following.
263
2641) When kernelcore=YYYY boot option is used,
265 Size of memory not for movable pages (not for offline) is YYYY.
266 Size of memory for movable pages (for offline) is TOTAL-YYYY.
267
2682) When movablecore=ZZZZ boot option is used,
269 Size of memory not for movable pages (not for offline) is TOTAL - ZZZZ.
270 Size of memory for movable pages (for offline) is ZZZZ.
271
272
273Note) Unfortunately, there is no information to show which section belongs
274to ZONE_MOVABLE. This is TBD.
275
276
2776.2. How to offline memory
278------------
279You can offline a section by using the same sysfs interface that was used in
280memory onlining.
281
282% echo offline > /sys/devices/system/memory/memoryXXX/state
283
284If offline succeeds, the state of the memory section is changed to be "offline".
285If it fails, some error core (like -EBUSY) will be returned by the kernel.
286Even if a section does not belong to ZONE_MOVABLE, you can try to offline it.
287If it doesn't contain 'unmovable' memory, you'll get success.
288
289A section under ZONE_MOVABLE is considered to be able to be offlined easily.
290But under some busy state, it may return -EBUSY. Even if a memory section
291cannot be offlined due to -EBUSY, you can retry offlining it and may be able to
292offline it (or not).
293(For example, a page is referred to by some kernel internal call and released
294 soon.)
295
296Consideration:
297Memory hotplug's design direction is to make the possibility of memory offlining
298higher and to guarantee unplugging memory under any situation. But it needs
299more work. Returning -EBUSY under some situation may be good because the user
300can decide to retry more or not by himself. Currently, memory offlining code
301does some amount of retry with 120 seconds timeout.
302
303-------------------------
3047. Physical memory remove
305-------------------------
306Need more implementation yet....
307 - Notification completion of remove works by OS to firmware.
308 - Guard from remove if not yet.
309
310--------------
3118. Future Work
312--------------
313 - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
314 sysctl or new control file.
315 - showing memory section and physical device relationship.
316 - showing memory section and node relationship (maybe good for NUMA)
317 - showing memory section is under ZONE_MOVABLE or not
318 - test and make it better memory offlining.
319 - support HugeTLB page migration and offlining.
320 - memmap removing at memory offline.
321 - physical remove memory.
322
diff --git a/Documentation/sched-design-CFS.txt b/Documentation/sched-design-CFS.txt
index 16feebb7bdc0..84901e7c0508 100644
--- a/Documentation/sched-design-CFS.txt
+++ b/Documentation/sched-design-CFS.txt
@@ -83,7 +83,7 @@ Some implementation details:
83 CFS uses nanosecond granularity accounting and does not rely on any 83 CFS uses nanosecond granularity accounting and does not rely on any
84 jiffies or other HZ detail. Thus the CFS scheduler has no notion of 84 jiffies or other HZ detail. Thus the CFS scheduler has no notion of
85 'timeslices' and has no heuristics whatsoever. There is only one 85 'timeslices' and has no heuristics whatsoever. There is only one
86 central tunable: 86 central tunable (you have to switch on CONFIG_SCHED_DEBUG):
87 87
88 /proc/sys/kernel/sched_granularity_ns 88 /proc/sys/kernel/sched_granularity_ns
89 89
diff --git a/Documentation/sched-nice-design.txt b/Documentation/sched-nice-design.txt
new file mode 100644
index 000000000000..e2bae5a577e3
--- /dev/null
+++ b/Documentation/sched-nice-design.txt
@@ -0,0 +1,108 @@
1This document explains the thinking about the revamped and streamlined
2nice-levels implementation in the new Linux scheduler.
3
4Nice levels were always pretty weak under Linux and people continuously
5pestered us to make nice +19 tasks use up much less CPU time.
6
7Unfortunately that was not that easy to implement under the old
8scheduler, (otherwise we'd have done it long ago) because nice level
9support was historically coupled to timeslice length, and timeslice
10units were driven by the HZ tick, so the smallest timeslice was 1/HZ.
11
12In the O(1) scheduler (in 2003) we changed negative nice levels to be
13much stronger than they were before in 2.4 (and people were happy about
14that change), and we also intentionally calibrated the linear timeslice
15rule so that nice +19 level would be _exactly_ 1 jiffy. To better
16understand it, the timeslice graph went like this (cheesy ASCII art
17alert!):
18
19
20 A
21 \ | [timeslice length]
22 \ |
23 \ |
24 \ |
25 \ |
26 \|___100msecs
27 |^ . _
28 | ^ . _
29 | ^ . _
30 -*----------------------------------*-----> [nice level]
31 -20 | +19
32 |
33 |
34
35So that if someone wanted to really renice tasks, +19 would give a much
36bigger hit than the normal linear rule would do. (The solution of
37changing the ABI to extend priorities was discarded early on.)
38
39This approach worked to some degree for some time, but later on with
40HZ=1000 it caused 1 jiffy to be 1 msec, which meant 0.1% CPU usage which
41we felt to be a bit excessive. Excessive _not_ because it's too small of
42a CPU utilization, but because it causes too frequent (once per
43millisec) rescheduling. (and would thus trash the cache, etc. Remember,
44this was long ago when hardware was weaker and caches were smaller, and
45people were running number crunching apps at nice +19.)
46
47So for HZ=1000 we changed nice +19 to 5msecs, because that felt like the
48right minimal granularity - and this translates to 5% CPU utilization.
49But the fundamental HZ-sensitive property for nice+19 still remained,
50and we never got a single complaint about nice +19 being too _weak_ in
51terms of CPU utilization, we only got complaints about it (still) being
52too _strong_ :-)
53
54To sum it up: we always wanted to make nice levels more consistent, but
55within the constraints of HZ and jiffies and their nasty design level
56coupling to timeslices and granularity it was not really viable.
57
58The second (less frequent but still periodically occuring) complaint
59about Linux's nice level support was its assymetry around the origo
60(which you can see demonstrated in the picture above), or more
61accurately: the fact that nice level behavior depended on the _absolute_
62nice level as well, while the nice API itself is fundamentally
63"relative":
64
65 int nice(int inc);
66
67 asmlinkage long sys_nice(int increment)
68
69(the first one is the glibc API, the second one is the syscall API.)
70Note that the 'inc' is relative to the current nice level. Tools like
71bash's "nice" command mirror this relative API.
72
73With the old scheduler, if you for example started a niced task with +1
74and another task with +2, the CPU split between the two tasks would
75depend on the nice level of the parent shell - if it was at nice -10 the
76CPU split was different than if it was at +5 or +10.
77
78A third complaint against Linux's nice level support was that negative
79nice levels were not 'punchy enough', so lots of people had to resort to
80run audio (and other multimedia) apps under RT priorities such as
81SCHED_FIFO. But this caused other problems: SCHED_FIFO is not starvation
82proof, and a buggy SCHED_FIFO app can also lock up the system for good.
83
84The new scheduler in v2.6.23 addresses all three types of complaints:
85
86To address the first complaint (of nice levels being not "punchy"
87enough), the scheduler was decoupled from 'time slice' and HZ concepts
88(and granularity was made a separate concept from nice levels) and thus
89it was possible to implement better and more consistent nice +19
90support: with the new scheduler nice +19 tasks get a HZ-independent
911.5%, instead of the variable 3%-5%-9% range they got in the old
92scheduler.
93
94To address the second complaint (of nice levels not being consistent),
95the new scheduler makes nice(1) have the same CPU utilization effect on
96tasks, regardless of their absolute nice levels. So on the new
97scheduler, running a nice +10 and a nice 11 task has the same CPU
98utilization "split" between them as running a nice -5 and a nice -4
99task. (one will get 55% of the CPU, the other 45%.) That is why nice
100levels were changed to be "multiplicative" (or exponential) - that way
101it does not matter which nice level you start out from, the 'relative
102result' will always be the same.
103
104The third complaint (of negative nice levels not being "punchy" enough
105and forcing audio apps to run under the more dangerous SCHED_FIFO
106scheduling policy) is addressed by the new scheduler almost
107automatically: stronger negative nice levels are an automatic
108side-effect of the recalibrated dynamic range of nice levels.
diff --git a/Documentation/sched-stats.txt b/Documentation/sched-stats.txt
index 6f72021aae51..442e14d35dea 100644
--- a/Documentation/sched-stats.txt
+++ b/Documentation/sched-stats.txt
@@ -1,10 +1,11 @@
1Version 10 of schedstats includes support for sched_domains, which 1Version 14 of schedstats includes support for sched_domains, which hit the
2hit the mainline kernel in 2.6.7. Some counters make more sense to be 2mainline kernel in 2.6.20 although it is identical to the stats from version
3per-runqueue; other to be per-domain. Note that domains (and their associated 312 which was in the kernel from 2.6.13-2.6.19 (version 13 never saw a kernel
4information) will only be pertinent and available on machines utilizing 4release). Some counters make more sense to be per-runqueue; other to be
5CONFIG_SMP. 5per-domain. Note that domains (and their associated information) will only
6 6be pertinent and available on machines utilizing CONFIG_SMP.
7In version 10 of schedstat, there is at least one level of domain 7
8In version 14 of schedstat, there is at least one level of domain
8statistics for each cpu listed, and there may well be more than one 9statistics for each cpu listed, and there may well be more than one
9domain. Domains have no particular names in this implementation, but 10domain. Domains have no particular names in this implementation, but
10the highest numbered one typically arbitrates balancing across all the 11the highest numbered one typically arbitrates balancing across all the
@@ -27,7 +28,7 @@ to write their own scripts, the fields are described here.
27 28
28CPU statistics 29CPU statistics
29-------------- 30--------------
30cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 31cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12
31 32
32NOTE: In the sched_yield() statistics, the active queue is considered empty 33NOTE: In the sched_yield() statistics, the active queue is considered empty
33 if it has only one process in it, since obviously the process calling 34 if it has only one process in it, since obviously the process calling
@@ -39,48 +40,20 @@ First four fields are sched_yield() statistics:
39 3) # of times just the expired queue was empty 40 3) # of times just the expired queue was empty
40 4) # of times sched_yield() was called 41 4) # of times sched_yield() was called
41 42
42Next four are schedule() statistics: 43Next three are schedule() statistics:
43 5) # of times the active queue had at least one other process on it 44 5) # of times we switched to the expired queue and reused it
44 6) # of times we switched to the expired queue and reused it 45 6) # of times schedule() was called
45 7) # of times schedule() was called 46 7) # of times schedule() left the processor idle
46 8) # of times schedule() left the processor idle
47
48Next four are active_load_balance() statistics:
49 9) # of times active_load_balance() was called
50 10) # of times active_load_balance() caused this cpu to gain a task
51 11) # of times active_load_balance() caused this cpu to lose a task
52 12) # of times active_load_balance() tried to move a task and failed
53
54Next three are try_to_wake_up() statistics:
55 13) # of times try_to_wake_up() was called
56 14) # of times try_to_wake_up() successfully moved the awakening task
57 15) # of times try_to_wake_up() attempted to move the awakening task
58
59Next two are wake_up_new_task() statistics:
60 16) # of times wake_up_new_task() was called
61 17) # of times wake_up_new_task() successfully moved the new task
62
63Next one is a sched_migrate_task() statistic:
64 18) # of times sched_migrate_task() was called
65 47
66Next one is a sched_balance_exec() statistic: 48Next two are try_to_wake_up() statistics:
67 19) # of times sched_balance_exec() was called 49 8) # of times try_to_wake_up() was called
50 9) # of times try_to_wake_up() was called to wake up the local cpu
68 51
69Next three are statistics describing scheduling latency: 52Next three are statistics describing scheduling latency:
70 20) sum of all time spent running by tasks on this processor (in ms) 53 10) sum of all time spent running by tasks on this processor (in jiffies)
71 21) sum of all time spent waiting to run by tasks on this processor (in ms) 54 11) sum of all time spent waiting to run by tasks on this processor (in
72 22) # of tasks (not necessarily unique) given to the processor 55 jiffies)
73 56 12) # of timeslices run on this cpu
74The last six are statistics dealing with pull_task():
75 23) # of times pull_task() moved a task to this cpu when newly idle
76 24) # of times pull_task() stole a task from this cpu when another cpu
77 was newly idle
78 25) # of times pull_task() moved a task to this cpu when idle
79 26) # of times pull_task() stole a task from this cpu when another cpu
80 was idle
81 27) # of times pull_task() moved a task to this cpu when busy
82 28) # of times pull_task() stole a task from this cpu when another cpu
83 was busy
84 57
85 58
86Domain statistics 59Domain statistics
@@ -89,65 +62,95 @@ One of these is produced per domain for each cpu described. (Note that if
89CONFIG_SMP is not defined, *no* domains are utilized and these lines 62CONFIG_SMP is not defined, *no* domains are utilized and these lines
90will not appear in the output.) 63will not appear in the output.)
91 64
92domain<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 65domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
93 66
94The first field is a bit mask indicating what cpus this domain operates over. 67The first field is a bit mask indicating what cpus this domain operates over.
95 68
96The next fifteen are a variety of load_balance() statistics: 69The next 24 are a variety of load_balance() statistics in grouped into types
97 70of idleness (idle, busy, and newly idle):
98 1) # of times in this domain load_balance() was called when the cpu 71
99 was idle 72 1) # of times in this domain load_balance() was called when the
100 2) # of times in this domain load_balance() was called when the cpu 73 cpu was idle
101 was busy 74 2) # of times in this domain load_balance() checked but found
102 3) # of times in this domain load_balance() was called when the cpu 75 the load did not require balancing when the cpu was idle
103 was just becoming idle 76 3) # of times in this domain load_balance() tried to move one or
104 4) # of times in this domain load_balance() tried to move one or more 77 more tasks and failed, when the cpu was idle
105 tasks and failed, when the cpu was idle 78 4) sum of imbalances discovered (if any) with each call to
106 5) # of times in this domain load_balance() tried to move one or more 79 load_balance() in this domain when the cpu was idle
107 tasks and failed, when the cpu was busy 80 5) # of times in this domain pull_task() was called when the cpu
108 6) # of times in this domain load_balance() tried to move one or more 81 was idle
109 tasks and failed, when the cpu was just becoming idle 82 6) # of times in this domain pull_task() was called even though
110 7) sum of imbalances discovered (if any) with each call to 83 the target task was cache-hot when idle
111 load_balance() in this domain when the cpu was idle 84 7) # of times in this domain load_balance() was called but did
112 8) sum of imbalances discovered (if any) with each call to 85 not find a busier queue while the cpu was idle
113 load_balance() in this domain when the cpu was busy 86 8) # of times in this domain a busier queue was found while the
114 9) sum of imbalances discovered (if any) with each call to 87 cpu was idle but no busier group was found
115 load_balance() in this domain when the cpu was just becoming idle 88
116 10) # of times in this domain load_balance() was called but did not find 89 9) # of times in this domain load_balance() was called when the
117 a busier queue while the cpu was idle 90 cpu was busy
118 11) # of times in this domain load_balance() was called but did not find 91 10) # of times in this domain load_balance() checked but found the
119 a busier queue while the cpu was busy 92 load did not require balancing when busy
120 12) # of times in this domain load_balance() was called but did not find 93 11) # of times in this domain load_balance() tried to move one or
121 a busier queue while the cpu was just becoming idle 94 more tasks and failed, when the cpu was busy
122 13) # of times in this domain a busier queue was found while the cpu was 95 12) sum of imbalances discovered (if any) with each call to
123 idle but no busier group was found 96 load_balance() in this domain when the cpu was busy
124 14) # of times in this domain a busier queue was found while the cpu was 97 13) # of times in this domain pull_task() was called when busy
125 busy but no busier group was found 98 14) # of times in this domain pull_task() was called even though the
126 15) # of times in this domain a busier queue was found while the cpu was 99 target task was cache-hot when busy
127 just becoming idle but no busier group was found 100 15) # of times in this domain load_balance() was called but did not
128 101 find a busier queue while the cpu was busy
129Next two are sched_balance_exec() statistics: 102 16) # of times in this domain a busier queue was found while the cpu
130 17) # of times in this domain sched_balance_exec() successfully pushed 103 was busy but no busier group was found
131 a task to a new cpu 104
132 18) # of times in this domain sched_balance_exec() tried but failed to 105 17) # of times in this domain load_balance() was called when the
133 push a task to a new cpu 106 cpu was just becoming idle
134 107 18) # of times in this domain load_balance() checked but found the
135Next two are try_to_wake_up() statistics: 108 load did not require balancing when the cpu was just becoming idle
136 19) # of times in this domain try_to_wake_up() tried to move a task based 109 19) # of times in this domain load_balance() tried to move one or more
137 on affinity and cache warmth 110 tasks and failed, when the cpu was just becoming idle
138 20) # of times in this domain try_to_wake_up() tried to move a task based 111 20) sum of imbalances discovered (if any) with each call to
139 on load balancing 112 load_balance() in this domain when the cpu was just becoming idle
140 113 21) # of times in this domain pull_task() was called when newly idle
114 22) # of times in this domain pull_task() was called even though the
115 target task was cache-hot when just becoming idle
116 23) # of times in this domain load_balance() was called but did not
117 find a busier queue while the cpu was just becoming idle
118 24) # of times in this domain a busier queue was found while the cpu
119 was just becoming idle but no busier group was found
120
121 Next three are active_load_balance() statistics:
122 25) # of times active_load_balance() was called
123 26) # of times active_load_balance() tried to move a task and failed
124 27) # of times active_load_balance() successfully moved a task
125
126 Next three are sched_balance_exec() statistics:
127 28) sbe_cnt is not used
128 29) sbe_balanced is not used
129 30) sbe_pushed is not used
130
131 Next three are sched_balance_fork() statistics:
132 31) sbf_cnt is not used
133 32) sbf_balanced is not used
134 33) sbf_pushed is not used
135
136 Next three are try_to_wake_up() statistics:
137 34) # of times in this domain try_to_wake_up() awoke a task that
138 last ran on a different cpu in this domain
139 35) # of times in this domain try_to_wake_up() moved a task to the
140 waking cpu because it was cache-cold on its own cpu anyway
141 36) # of times in this domain try_to_wake_up() started passive balancing
141 142
142/proc/<pid>/schedstat 143/proc/<pid>/schedstat
143---------------- 144----------------
144schedstats also adds a new /proc/<pid/schedstat file to include some of 145schedstats also adds a new /proc/<pid/schedstat file to include some of
145the same information on a per-process level. There are three fields in 146the same information on a per-process level. There are three fields in
146this file correlating to fields 20, 21, and 22 in the CPU fields, but 147this file correlating for that process to:
147they only apply for that process. 148 1) time spent on the cpu
149 2) time spent waiting on a runqueue
150 3) # of timeslices run on this cpu
148 151
149A program could be easily written to make use of these extra fields to 152A program could be easily written to make use of these extra fields to
150report on how well a particular process or set of processes is faring 153report on how well a particular process or set of processes is faring
151under the scheduler's policies. A simple version of such a program is 154under the scheduler's policies. A simple version of such a program is
152available at 155available at
153 http://eaglet.rain.com/rick/linux/schedstat/v10/latency.c 156 http://eaglet.rain.com/rick/linux/schedstat/v12/latency.c
diff --git a/Documentation/spi/spidev_test.c b/Documentation/spi/spidev_test.c
new file mode 100644
index 000000000000..218e86215297
--- /dev/null
+++ b/Documentation/spi/spidev_test.c
@@ -0,0 +1,202 @@
1/*
2 * SPI testing utility (using spidev driver)
3 *
4 * Copyright (c) 2007 MontaVista Software, Inc.
5 * Copyright (c) 2007 Anton Vorontsov <avorontsov@ru.mvista.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License.
10 *
11 * Cross-compile with cross-gcc -I/path/to/cross-kernel/include
12 */
13
14#include <stdint.h>
15#include <unistd.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <getopt.h>
19#include <fcntl.h>
20#include <sys/ioctl.h>
21#include <linux/types.h>
22#include <linux/spi/spidev.h>
23
24#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
25
26static void pabort(const char *s)
27{
28 perror(s);
29 abort();
30}
31
32static char *device = "/dev/spidev1.1";
33static uint8_t mode;
34static uint8_t bits = 8;
35static uint32_t speed = 500000;
36static uint16_t delay;
37
38static void transfer(int fd)
39{
40 int ret;
41 uint8_t tx[] = {
42 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
43 0x40, 0x00, 0x00, 0x00, 0x00, 0x95,
44 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
45 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
46 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
47 0xDE, 0xAD, 0xBE, 0xEF, 0xBA, 0xAD,
48 0xF0, 0x0D,
49 };
50 uint8_t rx[ARRAY_SIZE(tx)] = {0, };
51 struct spi_ioc_transfer tr = {
52 .tx_buf = (unsigned long)tx,
53 .rx_buf = (unsigned long)rx,
54 .len = ARRAY_SIZE(tx),
55 .delay_usecs = delay,
56 .speed_hz = speed,
57 .bits_per_word = bits,
58 };
59
60 ret = ioctl(fd, SPI_IOC_MESSAGE(1), &tr);
61 if (ret == 1)
62 pabort("can't send spi message");
63
64 for (ret = 0; ret < ARRAY_SIZE(tx); ret++) {
65 if (!(ret % 6))
66 puts("");
67 printf("%.2X ", rx[ret]);
68 }
69 puts("");
70}
71
72void print_usage(char *prog)
73{
74 printf("Usage: %s [-DsbdlHOLC3]\n", prog);
75 puts(" -D --device device to use (default /dev/spidev1.1)\n"
76 " -s --speed max speed (Hz)\n"
77 " -d --delay delay (usec)\n"
78 " -b --bpw bits per word \n"
79 " -l --loop loopback\n"
80 " -H --cpha clock phase\n"
81 " -O --cpol clock polarity\n"
82 " -L --lsb least significant bit first\n"
83 " -C --cs-high chip select active high\n"
84 " -3 --3wire SI/SO signals shared\n");
85 exit(1);
86}
87
88void parse_opts(int argc, char *argv[])
89{
90 while (1) {
91 static struct option lopts[] = {
92 { "device", 1, 0, 'D' },
93 { "speed", 1, 0, 's' },
94 { "delay", 1, 0, 'd' },
95 { "bpw", 1, 0, 'b' },
96 { "loop", 0, 0, 'l' },
97 { "cpha", 0, 0, 'H' },
98 { "cpol", 0, 0, 'O' },
99 { "lsb", 0, 0, 'L' },
100 { "cs-high", 0, 0, 'C' },
101 { "3wire", 0, 0, '3' },
102 { NULL, 0, 0, 0 },
103 };
104 int c;
105
106 c = getopt_long(argc, argv, "D:s:d:b:lHOLC3", lopts, NULL);
107
108 if (c == -1)
109 break;
110
111 switch (c) {
112 case 'D':
113 device = optarg;
114 break;
115 case 's':
116 speed = atoi(optarg);
117 break;
118 case 'd':
119 delay = atoi(optarg);
120 break;
121 case 'b':
122 bits = atoi(optarg);
123 break;
124 case 'l':
125 mode |= SPI_LOOP;
126 break;
127 case 'H':
128 mode |= SPI_CPHA;
129 break;
130 case 'O':
131 mode |= SPI_CPOL;
132 break;
133 case 'L':
134 mode |= SPI_LSB_FIRST;
135 break;
136 case 'C':
137 mode |= SPI_CS_HIGH;
138 break;
139 case '3':
140 mode |= SPI_3WIRE;
141 break;
142 default:
143 print_usage(argv[0]);
144 break;
145 }
146 }
147}
148
149int main(int argc, char *argv[])
150{
151 int ret = 0;
152 int fd;
153
154 parse_opts(argc, argv);
155
156 fd = open(device, O_RDWR);
157 if (fd < 0)
158 pabort("can't open device");
159
160 /*
161 * spi mode
162 */
163 ret = ioctl(fd, SPI_IOC_WR_MODE, &mode);
164 if (ret == -1)
165 pabort("can't set spi mode");
166
167 ret = ioctl(fd, SPI_IOC_RD_MODE, &mode);
168 if (ret == -1)
169 pabort("can't get spi mode");
170
171 /*
172 * bits per word
173 */
174 ret = ioctl(fd, SPI_IOC_WR_BITS_PER_WORD, &bits);
175 if (ret == -1)
176 pabort("can't set bits per word");
177
178 ret = ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits);
179 if (ret == -1)
180 pabort("can't get bits per word");
181
182 /*
183 * max speed hz
184 */
185 ret = ioctl(fd, SPI_IOC_WR_MAX_SPEED_HZ, &speed);
186 if (ret == -1)
187 pabort("can't set max speed hz");
188
189 ret = ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed);
190 if (ret == -1)
191 pabort("can't get max speed hz");
192
193 printf("spi mode: %d\n", mode);
194 printf("bits per word: %d\n", bits);
195 printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000);
196
197 transfer(fd);
198
199 close(fd);
200
201 return ret;
202}
diff --git a/Documentation/stable_api_nonsense.txt b/Documentation/stable_api_nonsense.txt
index a2afca3b2bab..847b342b7b20 100644
--- a/Documentation/stable_api_nonsense.txt
+++ b/Documentation/stable_api_nonsense.txt
@@ -10,7 +10,7 @@ kernel to userspace interfaces. The kernel to userspace interface is
10the one that application programs use, the syscall interface. That 10the one that application programs use, the syscall interface. That
11interface is _very_ stable over time, and will not break. I have old 11interface is _very_ stable over time, and will not break. I have old
12programs that were built on a pre 0.9something kernel that still work 12programs that were built on a pre 0.9something kernel that still work
13just fine on the latest 2.6 kernel release. This interface is the one 13just fine on the latest 2.6 kernel release. That interface is the one
14that users and application programmers can count on being stable. 14that users and application programmers can count on being stable.
15 15
16 16
diff --git a/Documentation/sysfs-rules.txt b/Documentation/sysfs-rules.txt
index 42861bb0bc9b..80ef562160bb 100644
--- a/Documentation/sysfs-rules.txt
+++ b/Documentation/sysfs-rules.txt
@@ -1,19 +1,18 @@
1Rules on how to access information in the Linux kernel sysfs 1Rules on how to access information in the Linux kernel sysfs
2 2
3The kernel exported sysfs exports internal kernel implementation-details 3The kernel-exported sysfs exports internal kernel implementation details
4and depends on internal kernel structures and layout. It is agreed upon 4and depends on internal kernel structures and layout. It is agreed upon
5by the kernel developers that the Linux kernel does not provide a stable 5by the kernel developers that the Linux kernel does not provide a stable
6internal API. As sysfs is a direct export of kernel internal 6internal API. As sysfs is a direct export of kernel internal
7structures, the sysfs interface can not provide a stable interface eighter, 7structures, the sysfs interface cannot provide a stable interface either;
8it may always change along with internal kernel changes. 8it may always change along with internal kernel changes.
9 9
10To minimize the risk of breaking users of sysfs, which are in most cases 10To minimize the risk of breaking users of sysfs, which are in most cases
11low-level userspace applications, with a new kernel release, the users 11low-level userspace applications, with a new kernel release, the users
12of sysfs must follow some rules to use an as abstract-as-possible way to 12of sysfs must follow some rules to use an as-abstract-as-possible way to
13access this filesystem. The current udev and HAL programs already 13access this filesystem. The current udev and HAL programs already
14implement this and users are encouraged to plug, if possible, into the 14implement this and users are encouraged to plug, if possible, into the
15abstractions these programs provide instead of accessing sysfs 15abstractions these programs provide instead of accessing sysfs directly.
16directly.
17 16
18But if you really do want or need to access sysfs directly, please follow 17But if you really do want or need to access sysfs directly, please follow
19the following rules and then your programs should work with future 18the following rules and then your programs should work with future
@@ -25,22 +24,22 @@ versions of the sysfs interface.
25 implementation details in its own API. Therefore it is not better than 24 implementation details in its own API. Therefore it is not better than
26 reading directories and opening the files yourself. 25 reading directories and opening the files yourself.
27 Also, it is not actively maintained, in the sense of reflecting the 26 Also, it is not actively maintained, in the sense of reflecting the
28 current kernel-development. The goal of providing a stable interface 27 current kernel development. The goal of providing a stable interface
29 to sysfs has failed, it causes more problems, than it solves. It 28 to sysfs has failed; it causes more problems than it solves. It
30 violates many of the rules in this document. 29 violates many of the rules in this document.
31 30
32- sysfs is always at /sys 31- sysfs is always at /sys
33 Parsing /proc/mounts is a waste of time. Other mount points are a 32 Parsing /proc/mounts is a waste of time. Other mount points are a
34 system configuration bug you should not try to solve. For test cases, 33 system configuration bug you should not try to solve. For test cases,
35 possibly support a SYSFS_PATH environment variable to overwrite the 34 possibly support a SYSFS_PATH environment variable to overwrite the
36 applications behavior, but never try to search for sysfs. Never try 35 application's behavior, but never try to search for sysfs. Never try
37 to mount it, if you are not an early boot script. 36 to mount it, if you are not an early boot script.
38 37
39- devices are only "devices" 38- devices are only "devices"
40 There is no such thing like class-, bus-, physical devices, 39 There is no such thing like class-, bus-, physical devices,
41 interfaces, and such that you can rely on in userspace. Everything is 40 interfaces, and such that you can rely on in userspace. Everything is
42 just simply a "device". Class-, bus-, physical, ... types are just 41 just simply a "device". Class-, bus-, physical, ... types are just
43 kernel implementation details, which should not be expected by 42 kernel implementation details which should not be expected by
44 applications that look for devices in sysfs. 43 applications that look for devices in sysfs.
45 44
46 The properties of a device are: 45 The properties of a device are:
@@ -48,11 +47,11 @@ versions of the sysfs interface.
48 - identical to the DEVPATH value in the event sent from the kernel 47 - identical to the DEVPATH value in the event sent from the kernel
49 at device creation and removal 48 at device creation and removal
50 - the unique key to the device at that point in time 49 - the unique key to the device at that point in time
51 - the kernels path to the device-directory without the leading 50 - the kernel's path to the device directory without the leading
52 /sys, and always starting with with a slash 51 /sys, and always starting with with a slash
53 - all elements of a devpath must be real directories. Symlinks 52 - all elements of a devpath must be real directories. Symlinks
54 pointing to /sys/devices must always be resolved to their real 53 pointing to /sys/devices must always be resolved to their real
55 target, and the target path must be used to access the device. 54 target and the target path must be used to access the device.
56 That way the devpath to the device matches the devpath of the 55 That way the devpath to the device matches the devpath of the
57 kernel used at event time. 56 kernel used at event time.
58 - using or exposing symlink values as elements in a devpath string 57 - using or exposing symlink values as elements in a devpath string
@@ -73,17 +72,17 @@ versions of the sysfs interface.
73 link 72 link
74 - it is retrieved by reading the "driver"-link and using only the 73 - it is retrieved by reading the "driver"-link and using only the
75 last element of the target path 74 last element of the target path
76 - devices which do not have "driver"-link, just do not have a 75 - devices which do not have "driver"-link just do not have a
77 driver; copying the driver value in a child device context, is a 76 driver; copying the driver value in a child device context is a
78 bug in the application 77 bug in the application
79 78
80 o attributes 79 o attributes
81 - the files in the device directory or files below a subdirectories 80 - the files in the device directory or files below subdirectories
82 of the same device directory 81 of the same device directory
83 - accessing attributes reached by a symlink pointing to another device, 82 - accessing attributes reached by a symlink pointing to another device,
84 like the "device"-link, is a bug in the application 83 like the "device"-link, is a bug in the application
85 84
86 Everything else is just a kernel driver-core implementation detail, 85 Everything else is just a kernel driver-core implementation detail
87 that should not be assumed to be stable across kernel releases. 86 that should not be assumed to be stable across kernel releases.
88 87
89- Properties of parent devices never belong into a child device. 88- Properties of parent devices never belong into a child device.
@@ -91,25 +90,25 @@ versions of the sysfs interface.
91 context properties. If the device 'eth0' or 'sda' does not have a 90 context properties. If the device 'eth0' or 'sda' does not have a
92 "driver"-link, then this device does not have a driver. Its value is empty. 91 "driver"-link, then this device does not have a driver. Its value is empty.
93 Never copy any property of the parent-device into a child-device. Parent 92 Never copy any property of the parent-device into a child-device. Parent
94 device-properties may change dynamically without any notice to the 93 device properties may change dynamically without any notice to the
95 child device. 94 child device.
96 95
97- Hierarchy in a single device-tree 96- Hierarchy in a single device tree
98 There is only one valid place in sysfs where hierarchy can be examined 97 There is only one valid place in sysfs where hierarchy can be examined
99 and this is below: /sys/devices. 98 and this is below: /sys/devices.
100 It is planned, that all device directories will end up in the tree 99 It is planned that all device directories will end up in the tree
101 below this directory. 100 below this directory.
102 101
103- Classification by subsystem 102- Classification by subsystem
104 There are currently three places for classification of devices: 103 There are currently three places for classification of devices:
105 /sys/block, /sys/class and /sys/bus. It is planned that these will 104 /sys/block, /sys/class and /sys/bus. It is planned that these will
106 not contain any device-directories themselves, but only flat lists of 105 not contain any device directories themselves, but only flat lists of
107 symlinks pointing to the unified /sys/devices tree. 106 symlinks pointing to the unified /sys/devices tree.
108 All three places have completely different rules on how to access 107 All three places have completely different rules on how to access
109 device information. It is planned to merge all three 108 device information. It is planned to merge all three
110 classification-directories into one place at /sys/subsystem, 109 classification directories into one place at /sys/subsystem,
111 following the layout of the bus-directories. All buses and 110 following the layout of the bus directories. All buses and
112 classes, including the converted block-subsystem, will show up 111 classes, including the converted block subsystem, will show up
113 there. 112 there.
114 The devices belonging to a subsystem will create a symlink in the 113 The devices belonging to a subsystem will create a symlink in the
115 "devices" directory at /sys/subsystem/<name>/devices. 114 "devices" directory at /sys/subsystem/<name>/devices.
@@ -121,38 +120,38 @@ versions of the sysfs interface.
121 subsystem name. 120 subsystem name.
122 121
123 Assuming /sys/class/<subsystem> and /sys/bus/<subsystem>, or 122 Assuming /sys/class/<subsystem> and /sys/bus/<subsystem>, or
124 /sys/block and /sys/class/block are not interchangeable, is a bug in 123 /sys/block and /sys/class/block are not interchangeable is a bug in
125 the application. 124 the application.
126 125
127- Block 126- Block
128 The converted block-subsystem at /sys/class/block, or 127 The converted block subsystem at /sys/class/block or
129 /sys/subsystem/block will contain the links for disks and partitions 128 /sys/subsystem/block will contain the links for disks and partitions
130 at the same level, never in a hierarchy. Assuming the block-subsytem to 129 at the same level, never in a hierarchy. Assuming the block subsytem to
131 contain only disks and not partition-devices in the same flat list is 130 contain only disks and not partition devices in the same flat list is
132 a bug in the application. 131 a bug in the application.
133 132
134- "device"-link and <subsystem>:<kernel name>-links 133- "device"-link and <subsystem>:<kernel name>-links
135 Never depend on the "device"-link. The "device"-link is a workaround 134 Never depend on the "device"-link. The "device"-link is a workaround
136 for the old layout, where class-devices are not created in 135 for the old layout, where class devices are not created in
137 /sys/devices/ like the bus-devices. If the link-resolving of a 136 /sys/devices/ like the bus devices. If the link-resolving of a
138 device-directory does not end in /sys/devices/, you can use the 137 device directory does not end in /sys/devices/, you can use the
139 "device"-link to find the parent devices in /sys/devices/. That is the 138 "device"-link to find the parent devices in /sys/devices/. That is the
140 single valid use of the "device"-link, it must never appear in any 139 single valid use of the "device"-link; it must never appear in any
141 path as an element. Assuming the existence of the "device"-link for 140 path as an element. Assuming the existence of the "device"-link for
142 a device in /sys/devices/ is a bug in the application. 141 a device in /sys/devices/ is a bug in the application.
143 Accessing /sys/class/net/eth0/device is a bug in the application. 142 Accessing /sys/class/net/eth0/device is a bug in the application.
144 143
145 Never depend on the class-specific links back to the /sys/class 144 Never depend on the class-specific links back to the /sys/class
146 directory. These links are also a workaround for the design mistake 145 directory. These links are also a workaround for the design mistake
147 that class-devices are not created in /sys/devices. If a device 146 that class devices are not created in /sys/devices. If a device
148 directory does not contain directories for child devices, these links 147 directory does not contain directories for child devices, these links
149 may be used to find the child devices in /sys/class. That is the single 148 may be used to find the child devices in /sys/class. That is the single
150 valid use of these links, they must never appear in any path as an 149 valid use of these links; they must never appear in any path as an
151 element. Assuming the existence of these links for devices which are 150 element. Assuming the existence of these links for devices which are
152 real child device directories in the /sys/devices tree, is a bug in 151 real child device directories in the /sys/devices tree is a bug in
153 the application. 152 the application.
154 153
155 It is planned to remove all these links when when all class-device 154 It is planned to remove all these links when all class device
156 directories live in /sys/devices. 155 directories live in /sys/devices.
157 156
158- Position of devices along device chain can change. 157- Position of devices along device chain can change.
@@ -161,6 +160,5 @@ versions of the sysfs interface.
161 the chain. You must always request the parent device you are looking for 160 the chain. You must always request the parent device you are looking for
162 by its subsystem value. You need to walk up the chain until you find 161 by its subsystem value. You need to walk up the chain until you find
163 the device that matches the expected subsystem. Depending on a specific 162 the device that matches the expected subsystem. Depending on a specific
164 position of a parent device, or exposing relative paths, using "../" to 163 position of a parent device or exposing relative paths using "../" to
165 access the chain of parents, is a bug in the application. 164 access the chain of parents is a bug in the application.
166
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index ba328f255417..ef19142896ca 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -1,6 +1,6 @@
1Linux Magic System Request Key Hacks 1Linux Magic System Request Key Hacks
2Documentation for sysrq.c 2Documentation for sysrq.c
3Last update: 2007-MAR-14 3Last update: 2007-AUG-04
4 4
5* What is the magic SysRq key? 5* What is the magic SysRq key?
6~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -78,7 +78,7 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
78'g' - Used by kgdb on ppc and sh platforms. 78'g' - Used by kgdb on ppc and sh platforms.
79 79
80'h' - Will display help (actually any other key than those listed 80'h' - Will display help (actually any other key than those listed
81 above will display help. but 'h' is easy to remember :-) 81 here will display help. but 'h' is easy to remember :-)
82 82
83'i' - Send a SIGKILL to all processes, except for init. 83'i' - Send a SIGKILL to all processes, except for init.
84 84
diff --git a/Documentation/thinkpad-acpi.txt b/Documentation/thinkpad-acpi.txt
index 6711fbcf4080..eb2f5986e1eb 100644
--- a/Documentation/thinkpad-acpi.txt
+++ b/Documentation/thinkpad-acpi.txt
@@ -105,10 +105,10 @@ The version of thinkpad-acpi's sysfs interface is exported by the driver
105as a driver attribute (see below). 105as a driver attribute (see below).
106 106
107Sysfs driver attributes are on the driver's sysfs attribute space, 107Sysfs driver attributes are on the driver's sysfs attribute space,
108for 2.6.20 this is /sys/bus/platform/drivers/thinkpad-acpi/. 108for 2.6.20 this is /sys/bus/platform/drivers/thinkpad_acpi/.
109 109
110Sysfs device attributes are on the driver's sysfs attribute space, 110Sysfs device attributes are on the driver's sysfs attribute space,
111for 2.6.20 this is /sys/devices/platform/thinkpad-acpi/. 111for 2.6.20 this is /sys/devices/platform/thinkpad_acpi/.
112 112
113Driver version 113Driver version
114-------------- 114--------------
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
index d4f21ffd1404..1af7bd5a2183 100644
--- a/Documentation/vm/slabinfo.c
+++ b/Documentation/vm/slabinfo.c
@@ -396,7 +396,7 @@ void report(struct slabinfo *s)
396 if (strcmp(s->name, "*") == 0) 396 if (strcmp(s->name, "*") == 0)
397 return; 397 return;
398 398
399 printf("\nSlabcache: %-20s Aliases: %2d Order : %2d Objects: %d\n", 399 printf("\nSlabcache: %-20s Aliases: %2d Order : %2d Objects: %lu\n",
400 s->name, s->aliases, s->order, s->objects); 400 s->name, s->aliases, s->order, s->objects);
401 if (s->hwcache_align) 401 if (s->hwcache_align)
402 printf("** Hardware cacheline aligned\n"); 402 printf("** Hardware cacheline aligned\n");