aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/testing/sysfs-class-bdi46
-rw-r--r--Documentation/DMA-API.txt69
-rw-r--r--Documentation/DMA-attributes.txt24
-rw-r--r--Documentation/DMA-mapping.txt38
-rw-r--r--Documentation/DocBook/Makefile2
-rw-r--r--Documentation/DocBook/debugobjects.tmpl391
-rw-r--r--Documentation/DocBook/kernel-api.tmpl56
-rw-r--r--Documentation/DocBook/rapidio.tmpl1
-rw-r--r--Documentation/braille-console.txt34
-rw-r--r--Documentation/cgroups.txt3
-rw-r--r--Documentation/controllers/devices.txt48
-rw-r--r--Documentation/controllers/resource_counter.txt181
-rw-r--r--Documentation/cpu-freq/user-guide.txt14
-rw-r--r--Documentation/cpusets.txt26
-rw-r--r--Documentation/dontdiff2
-rw-r--r--Documentation/fb/gxfb.txt52
-rw-r--r--Documentation/fb/intelfb.txt2
-rw-r--r--Documentation/fb/lxfb.txt52
-rw-r--r--Documentation/fb/metronomefb.txt16
-rw-r--r--Documentation/fb/modedb.txt4
-rw-r--r--Documentation/feature-removal-schedule.txt27
-rw-r--r--Documentation/filesystems/Locking3
-rw-r--r--Documentation/filesystems/proc.txt21
-rw-r--r--Documentation/filesystems/tmpfs.txt12
-rw-r--r--Documentation/filesystems/vfat.txt15
-rw-r--r--Documentation/gpio.txt10
-rw-r--r--Documentation/hwmon/w83l785ts3
-rw-r--r--Documentation/i2c/writing-clients3
-rw-r--r--Documentation/i386/boot.txt12
-rw-r--r--Documentation/kbuild/kconfig-language.txt17
-rw-r--r--Documentation/kdump/kdump.txt5
-rw-r--r--Documentation/kernel-parameters.txt17
-rw-r--r--Documentation/keys-request-key.txt11
-rw-r--r--Documentation/keys.txt59
-rw-r--r--Documentation/kprobes.txt51
-rw-r--r--Documentation/laptops/thinkpad-acpi.txt139
-rw-r--r--Documentation/lguest/lguest.c62
-rw-r--r--Documentation/md.txt6
-rw-r--r--Documentation/oops-tracing.txt4
-rw-r--r--Documentation/powerpc/booting-without-of.txt33
-rw-r--r--Documentation/powerpc/mpc52xx-device-tree-bindings.txt12
-rw-r--r--Documentation/sound/alsa/ALSA-Configuration.txt1
-rw-r--r--Documentation/spi/spidev168
-rw-r--r--Documentation/spi/spidev_fdx.c158
-rw-r--r--Documentation/sysrq.txt2
-rw-r--r--Documentation/thermal/sysfs-api.txt33
-rw-r--r--Documentation/video4linux/CARDLIST.saa71343
-rw-r--r--Documentation/video4linux/cx18.txt34
-rw-r--r--Documentation/vm/numa_memory_policy.txt281
-rw-r--r--Documentation/vm/slabinfo.c27
50 files changed, 1891 insertions, 399 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
new file mode 100644
index 000000000000..5ac1e01bbd48
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -0,0 +1,46 @@
1What: /sys/class/bdi/<bdi>/
2Date: January 2008
3Contact: Peter Zijlstra <a.p.zijlstra@chello.nl>
4Description:
5
6Provide a place in sysfs for the backing_dev_info object. This allows
7setting and retrieving various BDI specific variables.
8
9The <bdi> identifier can be either of the following:
10
11MAJOR:MINOR
12
13 Device number for block devices, or value of st_dev on
14 non-block filesystems which provide their own BDI, such as NFS
15 and FUSE.
16
17default
18
19 The default backing dev, used for non-block device backed
20 filesystems which do not provide their own BDI.
21
22Files under /sys/class/bdi/<bdi>/
23---------------------------------
24
25read_ahead_kb (read-write)
26
27 Size of the read-ahead window in kilobytes
28
29min_ratio (read-write)
30
31 Under normal circumstances each device is given a part of the
32 total write-back cache that relates to its current average
33 writeout speed in relation to the other devices.
34
35 The 'min_ratio' parameter allows assigning a minimum
36 percentage of the write-back cache to a particular device.
37 For example, this is useful for providing a minimum QoS.
38
39max_ratio (read-write)
40
41 Allows limiting a particular device to use not more than the
42 given percentage of the write-back cache. This is useful in
43 situations where we want to avoid one device taking all or
44 most of the write-back cache. For example in case of an NFS
45 mount that is prone to get stuck, or a FUSE mount which cannot
46 be trusted to play fair.
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index b939ebb62871..80d150458c80 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -145,7 +145,7 @@ Part Ic - DMA addressing limitations
145int 145int
146dma_supported(struct device *dev, u64 mask) 146dma_supported(struct device *dev, u64 mask)
147int 147int
148pci_dma_supported(struct device *dev, u64 mask) 148pci_dma_supported(struct pci_dev *hwdev, u64 mask)
149 149
150Checks to see if the device can support DMA to the memory described by 150Checks to see if the device can support DMA to the memory described by
151mask. 151mask.
@@ -189,7 +189,7 @@ dma_addr_t
189dma_map_single(struct device *dev, void *cpu_addr, size_t size, 189dma_map_single(struct device *dev, void *cpu_addr, size_t size,
190 enum dma_data_direction direction) 190 enum dma_data_direction direction)
191dma_addr_t 191dma_addr_t
192pci_map_single(struct device *dev, void *cpu_addr, size_t size, 192pci_map_single(struct pci_dev *hwdev, void *cpu_addr, size_t size,
193 int direction) 193 int direction)
194 194
195Maps a piece of processor virtual memory so it can be accessed by the 195Maps a piece of processor virtual memory so it can be accessed by the
@@ -395,6 +395,71 @@ Notes: You must do this:
395 395
396See also dma_map_single(). 396See also dma_map_single().
397 397
398dma_addr_t
399dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size,
400 enum dma_data_direction dir,
401 struct dma_attrs *attrs)
402
403void
404dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr,
405 size_t size, enum dma_data_direction dir,
406 struct dma_attrs *attrs)
407
408int
409dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
410 int nents, enum dma_data_direction dir,
411 struct dma_attrs *attrs)
412
413void
414dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
415 int nents, enum dma_data_direction dir,
416 struct dma_attrs *attrs)
417
418The four functions above are just like the counterpart functions
419without the _attrs suffixes, except that they pass an optional
420struct dma_attrs*.
421
422struct dma_attrs encapsulates a set of "dma attributes". For the
423definition of struct dma_attrs see linux/dma-attrs.h.
424
425The interpretation of dma attributes is architecture-specific, and
426each attribute should be documented in Documentation/DMA-attributes.txt.
427
428If struct dma_attrs* is NULL, the semantics of each of these
429functions is identical to those of the corresponding function
430without the _attrs suffix. As a result dma_map_single_attrs()
431can generally replace dma_map_single(), etc.
432
433As an example of the use of the *_attrs functions, here's how
434you could pass an attribute DMA_ATTR_FOO when mapping memory
435for DMA:
436
437#include <linux/dma-attrs.h>
438/* DMA_ATTR_FOO should be defined in linux/dma-attrs.h and
439 * documented in Documentation/DMA-attributes.txt */
440...
441
442 DEFINE_DMA_ATTRS(attrs);
443 dma_set_attr(DMA_ATTR_FOO, &attrs);
444 ....
445 n = dma_map_sg_attrs(dev, sg, nents, DMA_TO_DEVICE, &attr);
446 ....
447
448Architectures that care about DMA_ATTR_FOO would check for its
449presence in their implementations of the mapping and unmapping
450routines, e.g.:
451
452void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr,
453 size_t size, enum dma_data_direction dir,
454 struct dma_attrs *attrs)
455{
456 ....
457 int foo = dma_get_attr(DMA_ATTR_FOO, attrs);
458 ....
459 if (foo)
460 /* twizzle the frobnozzle */
461 ....
462
398 463
399Part II - Advanced dma_ usage 464Part II - Advanced dma_ usage
400----------------------------- 465-----------------------------
diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
new file mode 100644
index 000000000000..6d772f84b477
--- /dev/null
+++ b/Documentation/DMA-attributes.txt
@@ -0,0 +1,24 @@
1 DMA attributes
2 ==============
3
4This document describes the semantics of the DMA attributes that are
5defined in linux/dma-attrs.h.
6
7DMA_ATTR_WRITE_BARRIER
8----------------------
9
10DMA_ATTR_WRITE_BARRIER is a (write) barrier attribute for DMA. DMA
11to a memory region with the DMA_ATTR_WRITE_BARRIER attribute forces
12all pending DMA writes to complete, and thus provides a mechanism to
13strictly order DMA from a device across all intervening busses and
14bridges. This barrier is not specific to a particular type of
15interconnect, it applies to the system as a whole, and so its
16implementation must account for the idiosyncracies of the system all
17the way from the DMA device to memory.
18
19As an example of a situation where DMA_ATTR_WRITE_BARRIER would be
20useful, suppose that a device does a DMA write to indicate that data is
21ready and available in memory. The DMA of the "completion indication"
22could race with data DMA. Mapping the memory used for completion
23indications with DMA_ATTR_WRITE_BARRIER would prevent the race.
24
diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt
index d84f89dbf921..b463ecd0c7ce 100644
--- a/Documentation/DMA-mapping.txt
+++ b/Documentation/DMA-mapping.txt
@@ -315,11 +315,11 @@ you should do:
315 315
316 dma_addr_t dma_handle; 316 dma_addr_t dma_handle;
317 317
318 cpu_addr = pci_alloc_consistent(dev, size, &dma_handle); 318 cpu_addr = pci_alloc_consistent(pdev, size, &dma_handle);
319 319
320where dev is a struct pci_dev *. You should pass NULL for PCI like buses 320where pdev is a struct pci_dev *. This may be called in interrupt context.
321where devices don't have struct pci_dev (like ISA, EISA). This may be 321You should use dma_alloc_coherent (see DMA-API.txt) for buses
322called in interrupt context. 322where devices don't have struct pci_dev (like ISA, EISA).
323 323
324This argument is needed because the DMA translations may be bus 324This argument is needed because the DMA translations may be bus
325specific (and often is private to the bus which the device is attached 325specific (and often is private to the bus which the device is attached
@@ -332,7 +332,7 @@ __get_free_pages (but takes size instead of a page order). If your
332driver needs regions sized smaller than a page, you may prefer using 332driver needs regions sized smaller than a page, you may prefer using
333the pci_pool interface, described below. 333the pci_pool interface, described below.
334 334
335The consistent DMA mapping interfaces, for non-NULL dev, will by 335The consistent DMA mapping interfaces, for non-NULL pdev, will by
336default return a DMA address which is SAC (Single Address Cycle) 336default return a DMA address which is SAC (Single Address Cycle)
337addressable. Even if the device indicates (via PCI dma mask) that it 337addressable. Even if the device indicates (via PCI dma mask) that it
338may address the upper 32-bits and thus perform DAC cycles, consistent 338may address the upper 32-bits and thus perform DAC cycles, consistent
@@ -354,9 +354,9 @@ buffer you receive will not cross a 64K boundary.
354 354
355To unmap and free such a DMA region, you call: 355To unmap and free such a DMA region, you call:
356 356
357 pci_free_consistent(dev, size, cpu_addr, dma_handle); 357 pci_free_consistent(pdev, size, cpu_addr, dma_handle);
358 358
359where dev, size are the same as in the above call and cpu_addr and 359where pdev, size are the same as in the above call and cpu_addr and
360dma_handle are the values pci_alloc_consistent returned to you. 360dma_handle are the values pci_alloc_consistent returned to you.
361This function may not be called in interrupt context. 361This function may not be called in interrupt context.
362 362
@@ -371,9 +371,9 @@ Create a pci_pool like this:
371 371
372 struct pci_pool *pool; 372 struct pci_pool *pool;
373 373
374 pool = pci_pool_create(name, dev, size, align, alloc); 374 pool = pci_pool_create(name, pdev, size, align, alloc);
375 375
376The "name" is for diagnostics (like a kmem_cache name); dev and size 376The "name" is for diagnostics (like a kmem_cache name); pdev and size
377are as above. The device's hardware alignment requirement for this 377are as above. The device's hardware alignment requirement for this
378type of data is "align" (which is expressed in bytes, and must be a 378type of data is "align" (which is expressed in bytes, and must be a
379power of two). If your device has no boundary crossing restrictions, 379power of two). If your device has no boundary crossing restrictions,
@@ -472,11 +472,11 @@ To map a single region, you do:
472 void *addr = buffer->ptr; 472 void *addr = buffer->ptr;
473 size_t size = buffer->len; 473 size_t size = buffer->len;
474 474
475 dma_handle = pci_map_single(dev, addr, size, direction); 475 dma_handle = pci_map_single(pdev, addr, size, direction);
476 476
477and to unmap it: 477and to unmap it:
478 478
479 pci_unmap_single(dev, dma_handle, size, direction); 479 pci_unmap_single(pdev, dma_handle, size, direction);
480 480
481You should call pci_unmap_single when the DMA activity is finished, e.g. 481You should call pci_unmap_single when the DMA activity is finished, e.g.
482from the interrupt which told you that the DMA transfer is done. 482from the interrupt which told you that the DMA transfer is done.
@@ -493,17 +493,17 @@ Specifically:
493 unsigned long offset = buffer->offset; 493 unsigned long offset = buffer->offset;
494 size_t size = buffer->len; 494 size_t size = buffer->len;
495 495
496 dma_handle = pci_map_page(dev, page, offset, size, direction); 496 dma_handle = pci_map_page(pdev, page, offset, size, direction);
497 497
498 ... 498 ...
499 499
500 pci_unmap_page(dev, dma_handle, size, direction); 500 pci_unmap_page(pdev, dma_handle, size, direction);
501 501
502Here, "offset" means byte offset within the given page. 502Here, "offset" means byte offset within the given page.
503 503
504With scatterlists, you map a region gathered from several regions by: 504With scatterlists, you map a region gathered from several regions by:
505 505
506 int i, count = pci_map_sg(dev, sglist, nents, direction); 506 int i, count = pci_map_sg(pdev, sglist, nents, direction);
507 struct scatterlist *sg; 507 struct scatterlist *sg;
508 508
509 for_each_sg(sglist, sg, count, i) { 509 for_each_sg(sglist, sg, count, i) {
@@ -527,7 +527,7 @@ accessed sg->address and sg->length as shown above.
527 527
528To unmap a scatterlist, just call: 528To unmap a scatterlist, just call:
529 529
530 pci_unmap_sg(dev, sglist, nents, direction); 530 pci_unmap_sg(pdev, sglist, nents, direction);
531 531
532Again, make sure DMA activity has already finished. 532Again, make sure DMA activity has already finished.
533 533
@@ -550,11 +550,11 @@ correct copy of the DMA buffer.
550So, firstly, just map it with pci_map_{single,sg}, and after each DMA 550So, firstly, just map it with pci_map_{single,sg}, and after each DMA
551transfer call either: 551transfer call either:
552 552
553 pci_dma_sync_single_for_cpu(dev, dma_handle, size, direction); 553 pci_dma_sync_single_for_cpu(pdev, dma_handle, size, direction);
554 554
555or: 555or:
556 556
557 pci_dma_sync_sg_for_cpu(dev, sglist, nents, direction); 557 pci_dma_sync_sg_for_cpu(pdev, sglist, nents, direction);
558 558
559as appropriate. 559as appropriate.
560 560
@@ -562,7 +562,7 @@ Then, if you wish to let the device get at the DMA area again,
562finish accessing the data with the cpu, and then before actually 562finish accessing the data with the cpu, and then before actually
563giving the buffer to the hardware call either: 563giving the buffer to the hardware call either:
564 564
565 pci_dma_sync_single_for_device(dev, dma_handle, size, direction); 565 pci_dma_sync_single_for_device(pdev, dma_handle, size, direction);
566 566
567or: 567or:
568 568
@@ -739,7 +739,7 @@ failure can be determined by:
739 739
740 dma_addr_t dma_handle; 740 dma_addr_t dma_handle;
741 741
742 dma_handle = pci_map_single(dev, addr, size, direction); 742 dma_handle = pci_map_single(pdev, addr, size, direction);
743 if (pci_dma_mapping_error(dma_handle)) { 743 if (pci_dma_mapping_error(dma_handle)) {
744 /* 744 /*
745 * reduce current DMA mapping usage, 745 * reduce current DMA mapping usage,
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 83966e94cc32..0eb0d027eb32 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -12,7 +12,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
12 kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \ 12 kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ 13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
14 genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ 14 genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
15 mac80211.xml 15 mac80211.xml debugobjects.xml
16 16
17### 17###
18# The build process is as follows (targets): 18# The build process is as follows (targets):
diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl
new file mode 100644
index 000000000000..7f5f218015fe
--- /dev/null
+++ b/Documentation/DocBook/debugobjects.tmpl
@@ -0,0 +1,391 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4
5<book id="debug-objects-guide">
6 <bookinfo>
7 <title>Debug objects life time</title>
8
9 <authorgroup>
10 <author>
11 <firstname>Thomas</firstname>
12 <surname>Gleixner</surname>
13 <affiliation>
14 <address>
15 <email>tglx@linutronix.de</email>
16 </address>
17 </affiliation>
18 </author>
19 </authorgroup>
20
21 <copyright>
22 <year>2008</year>
23 <holder>Thomas Gleixner</holder>
24 </copyright>
25
26 <legalnotice>
27 <para>
28 This documentation is free software; you can redistribute
29 it and/or modify it under the terms of the GNU General Public
30 License version 2 as published by the Free Software Foundation.
31 </para>
32
33 <para>
34 This program is distributed in the hope that it will be
35 useful, but WITHOUT ANY WARRANTY; without even the implied
36 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
37 See the GNU General Public License for more details.
38 </para>
39
40 <para>
41 You should have received a copy of the GNU General Public
42 License along with this program; if not, write to the Free
43 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
44 MA 02111-1307 USA
45 </para>
46
47 <para>
48 For more details see the file COPYING in the source
49 distribution of Linux.
50 </para>
51 </legalnotice>
52 </bookinfo>
53
54<toc></toc>
55
56 <chapter id="intro">
57 <title>Introduction</title>
58 <para>
59 debugobjects is a generic infrastructure to track the life time
60 of kernel objects and validate the operations on those.
61 </para>
62 <para>
63 debugobjects is useful to check for the following error patterns:
64 <itemizedlist>
65 <listitem><para>Activation of uninitialized objects</para></listitem>
66 <listitem><para>Initialization of active objects</para></listitem>
67 <listitem><para>Usage of freed/destroyed objects</para></listitem>
68 </itemizedlist>
69 </para>
70 <para>
71 debugobjects is not changing the data structure of the real
72 object so it can be compiled in with a minimal runtime impact
73 and enabled on demand with a kernel command line option.
74 </para>
75 </chapter>
76
77 <chapter id="howto">
78 <title>Howto use debugobjects</title>
79 <para>
80 A kernel subsystem needs to provide a data structure which
81 describes the object type and add calls into the debug code at
82 appropriate places. The data structure to describe the object
83 type needs at minimum the name of the object type. Optional
84 functions can and should be provided to fixup detected problems
85 so the kernel can continue to work and the debug information can
86 be retrieved from a live system instead of hard core debugging
87 with serial consoles and stack trace transcripts from the
88 monitor.
89 </para>
90 <para>
91 The debug calls provided by debugobjects are:
92 <itemizedlist>
93 <listitem><para>debug_object_init</para></listitem>
94 <listitem><para>debug_object_init_on_stack</para></listitem>
95 <listitem><para>debug_object_activate</para></listitem>
96 <listitem><para>debug_object_deactivate</para></listitem>
97 <listitem><para>debug_object_destroy</para></listitem>
98 <listitem><para>debug_object_free</para></listitem>
99 </itemizedlist>
100 Each of these functions takes the address of the real object and
101 a pointer to the object type specific debug description
102 structure.
103 </para>
104 <para>
105 Each detected error is reported in the statistics and a limited
106 number of errors are printk'ed including a full stack trace.
107 </para>
108 <para>
109 The statistics are available via debugfs/debug_objects/stats.
110 They provide information about the number of warnings and the
111 number of successful fixups along with information about the
112 usage of the internal tracking objects and the state of the
113 internal tracking objects pool.
114 </para>
115 </chapter>
116 <chapter id="debugfunctions">
117 <title>Debug functions</title>
118 <sect1 id="prototypes">
119 <title>Debug object function reference</title>
120!Elib/debugobjects.c
121 </sect1>
122 <sect1 id="debug_object_init">
123 <title>debug_object_init</title>
124 <para>
125 This function is called whenever the initialization function
126 of a real object is called.
127 </para>
128 <para>
129 When the real object is already tracked by debugobjects it is
130 checked, whether the object can be initialized. Initializing
131 is not allowed for active and destroyed objects. When
132 debugobjects detects an error, then it calls the fixup_init
133 function of the object type description structure if provided
134 by the caller. The fixup function can correct the problem
135 before the real initialization of the object happens. E.g. it
136 can deactivate an active object in order to prevent damage to
137 the subsystem.
138 </para>
139 <para>
140 When the real object is not yet tracked by debugobjects,
141 debugobjects allocates a tracker object for the real object
142 and sets the tracker object state to ODEBUG_STATE_INIT. It
143 verifies that the object is not on the callers stack. If it is
144 on the callers stack then a limited number of warnings
145 including a full stack trace is printk'ed. The calling code
146 must use debug_object_init_on_stack() and remove the object
147 before leaving the function which allocated it. See next
148 section.
149 </para>
150 </sect1>
151
152 <sect1 id="debug_object_init_on_stack">
153 <title>debug_object_init_on_stack</title>
154 <para>
155 This function is called whenever the initialization function
156 of a real object which resides on the stack is called.
157 </para>
158 <para>
159 When the real object is already tracked by debugobjects it is
160 checked, whether the object can be initialized. Initializing
161 is not allowed for active and destroyed objects. When
162 debugobjects detects an error, then it calls the fixup_init
163 function of the object type description structure if provided
164 by the caller. The fixup function can correct the problem
165 before the real initialization of the object happens. E.g. it
166 can deactivate an active object in order to prevent damage to
167 the subsystem.
168 </para>
169 <para>
170 When the real object is not yet tracked by debugobjects
171 debugobjects allocates a tracker object for the real object
172 and sets the tracker object state to ODEBUG_STATE_INIT. It
173 verifies that the object is on the callers stack.
174 </para>
175 <para>
176 An object which is on the stack must be removed from the
177 tracker by calling debug_object_free() before the function
178 which allocates the object returns. Otherwise we keep track of
179 stale objects.
180 </para>
181 </sect1>
182
183 <sect1 id="debug_object_activate">
184 <title>debug_object_activate</title>
185 <para>
186 This function is called whenever the activation function of a
187 real object is called.
188 </para>
189 <para>
190 When the real object is already tracked by debugobjects it is
191 checked, whether the object can be activated. Activating is
192 not allowed for active and destroyed objects. When
193 debugobjects detects an error, then it calls the
194 fixup_activate function of the object type description
195 structure if provided by the caller. The fixup function can
196 correct the problem before the real activation of the object
197 happens. E.g. it can deactivate an active object in order to
198 prevent damage to the subsystem.
199 </para>
200 <para>
201 When the real object is not yet tracked by debugobjects then
202 the fixup_activate function is called if available. This is
203 necessary to allow the legitimate activation of statically
204 allocated and initialized objects. The fixup function checks
205 whether the object is valid and calls the debug_objects_init()
206 function to initialize the tracking of this object.
207 </para>
208 <para>
209 When the activation is legitimate, then the state of the
210 associated tracker object is set to ODEBUG_STATE_ACTIVE.
211 </para>
212 </sect1>
213
214 <sect1 id="debug_object_deactivate">
215 <title>debug_object_deactivate</title>
216 <para>
217 This function is called whenever the deactivation function of
218 a real object is called.
219 </para>
220 <para>
221 When the real object is tracked by debugobjects it is checked,
222 whether the object can be deactivated. Deactivating is not
223 allowed for untracked or destroyed objects.
224 </para>
225 <para>
226 When the deactivation is legitimate, then the state of the
227 associated tracker object is set to ODEBUG_STATE_INACTIVE.
228 </para>
229 </sect1>
230
231 <sect1 id="debug_object_destroy">
232 <title>debug_object_destroy</title>
233 <para>
234 This function is called to mark an object destroyed. This is
235 useful to prevent the usage of invalid objects, which are
236 still available in memory: either statically allocated objects
237 or objects which are freed later.
238 </para>
239 <para>
240 When the real object is tracked by debugobjects it is checked,
241 whether the object can be destroyed. Destruction is not
242 allowed for active and destroyed objects. When debugobjects
243 detects an error, then it calls the fixup_destroy function of
244 the object type description structure if provided by the
245 caller. The fixup function can correct the problem before the
246 real destruction of the object happens. E.g. it can deactivate
247 an active object in order to prevent damage to the subsystem.
248 </para>
249 <para>
250 When the destruction is legitimate, then the state of the
251 associated tracker object is set to ODEBUG_STATE_DESTROYED.
252 </para>
253 </sect1>
254
255 <sect1 id="debug_object_free">
256 <title>debug_object_free</title>
257 <para>
258 This function is called before an object is freed.
259 </para>
260 <para>
261 When the real object is tracked by debugobjects it is checked,
262 whether the object can be freed. Free is not allowed for
263 active objects. When debugobjects detects an error, then it
264 calls the fixup_free function of the object type description
265 structure if provided by the caller. The fixup function can
266 correct the problem before the real free of the object
267 happens. E.g. it can deactivate an active object in order to
268 prevent damage to the subsystem.
269 </para>
270 <para>
271 Note that debug_object_free removes the object from the
272 tracker. Later usage of the object is detected by the other
273 debug checks.
274 </para>
275 </sect1>
276 </chapter>
277 <chapter id="fixupfunctions">
278 <title>Fixup functions</title>
279 <sect1 id="debug_obj_descr">
280 <title>Debug object type description structure</title>
281!Iinclude/linux/debugobjects.h
282 </sect1>
283 <sect1 id="fixup_init">
284 <title>fixup_init</title>
285 <para>
286 This function is called from the debug code whenever a problem
287 in debug_object_init is detected. The function takes the
288 address of the object and the state which is currently
289 recorded in the tracker.
290 </para>
291 <para>
292 Called from debug_object_init when the object state is:
293 <itemizedlist>
294 <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
295 </itemizedlist>
296 </para>
297 <para>
298 The function returns 1 when the fixup was successful,
299 otherwise 0. The return value is used to update the
300 statistics.
301 </para>
302 <para>
303 Note, that the function needs to call the debug_object_init()
304 function again, after the damage has been repaired in order to
305 keep the state consistent.
306 </para>
307 </sect1>
308
309 <sect1 id="fixup_activate">
310 <title>fixup_activate</title>
311 <para>
312 This function is called from the debug code whenever a problem
313 in debug_object_activate is detected.
314 </para>
315 <para>
316 Called from debug_object_activate when the object state is:
317 <itemizedlist>
318 <listitem><para>ODEBUG_STATE_NOTAVAILABLE</para></listitem>
319 <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
320 </itemizedlist>
321 </para>
322 <para>
323 The function returns 1 when the fixup was successful,
324 otherwise 0. The return value is used to update the
325 statistics.
326 </para>
327 <para>
328 Note that the function needs to call the debug_object_activate()
329 function again after the damage has been repaired in order to
330 keep the state consistent.
331 </para>
332 <para>
333 The activation of statically initialized objects is a special
334 case. When debug_object_activate() has no tracked object for
335 this object address then fixup_activate() is called with
336 object state ODEBUG_STATE_NOTAVAILABLE. The fixup function
337 needs to check whether this is a legitimate case of a
338 statically initialized object or not. In case it is it calls
339 debug_object_init() and debug_object_activate() to make the
340 object known to the tracker and marked active. In this case
341 the function should return 0 because this is not a real fixup.
342 </para>
343 </sect1>
344
345 <sect1 id="fixup_destroy">
346 <title>fixup_destroy</title>
347 <para>
348 This function is called from the debug code whenever a problem
349 in debug_object_destroy is detected.
350 </para>
351 <para>
352 Called from debug_object_destroy when the object state is:
353 <itemizedlist>
354 <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
355 </itemizedlist>
356 </para>
357 <para>
358 The function returns 1 when the fixup was successful,
359 otherwise 0. The return value is used to update the
360 statistics.
361 </para>
362 </sect1>
363 <sect1 id="fixup_free">
364 <title>fixup_free</title>
365 <para>
366 This function is called from the debug code whenever a problem
367 in debug_object_free is detected. Further it can be called
368 from the debug checks in kfree/vfree, when an active object is
369 detected from the debug_check_no_obj_freed() sanity checks.
370 </para>
371 <para>
372 Called from debug_object_free() or debug_check_no_obj_freed()
373 when the object state is:
374 <itemizedlist>
375 <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
376 </itemizedlist>
377 </para>
378 <para>
379 The function returns 1 when the fixup was successful,
380 otherwise 0. The return value is used to update the
381 statistics.
382 </para>
383 </sect1>
384 </chapter>
385 <chapter id="bugs">
386 <title>Known Bugs And Assumptions</title>
387 <para>
388 None (knock on wood).
389 </para>
390 </chapter>
391</book>
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 488dd4a4945b..b7b1482f6e04 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -119,7 +119,7 @@ X!Ilib/string.c
119!Elib/string.c 119!Elib/string.c
120 </sect1> 120 </sect1>
121 <sect1><title>Bit Operations</title> 121 <sect1><title>Bit Operations</title>
122!Iinclude/asm-x86/bitops_32.h 122!Iinclude/asm-x86/bitops.h
123 </sect1> 123 </sect1>
124 </chapter> 124 </chapter>
125 125
@@ -645,4 +645,58 @@ X!Idrivers/video/console/fonts.c
645!Edrivers/i2c/i2c-core.c 645!Edrivers/i2c/i2c-core.c
646 </chapter> 646 </chapter>
647 647
648 <chapter id="clk">
649 <title>Clock Framework</title>
650
651 <para>
652 The clock framework defines programming interfaces to support
653 software management of the system clock tree.
654 This framework is widely used with System-On-Chip (SOC) platforms
655 to support power management and various devices which may need
656 custom clock rates.
657 Note that these "clocks" don't relate to timekeeping or real
658 time clocks (RTCs), each of which have separate frameworks.
659 These <structname>struct clk</structname> instances may be used
660 to manage for example a 96 MHz signal that is used to shift bits
661 into and out of peripherals or busses, or otherwise trigger
662 synchronous state machine transitions in system hardware.
663 </para>
664
665 <para>
666 Power management is supported by explicit software clock gating:
667 unused clocks are disabled, so the system doesn't waste power
668 changing the state of transistors that aren't in active use.
669 On some systems this may be backed by hardware clock gating,
670 where clocks are gated without being disabled in software.
671 Sections of chips that are powered but not clocked may be able
672 to retain their last state.
673 This low power state is often called a <emphasis>retention
674 mode</emphasis>.
675 This mode still incurs leakage currents, especially with finer
676 circuit geometries, but for CMOS circuits power is mostly used
677 by clocked state changes.
678 </para>
679
680 <para>
681 Power-aware drivers only enable their clocks when the device
682 they manage is in active use. Also, system sleep states often
683 differ according to which clock domains are active: while a
684 "standby" state may allow wakeup from several active domains, a
685 "mem" (suspend-to-RAM) state may require a more wholesale shutdown
686 of clocks derived from higher speed PLLs and oscillators, limiting
687 the number of possible wakeup event sources. A driver's suspend
688 method may need to be aware of system-specific clock constraints
689 on the target sleep state.
690 </para>
691
692 <para>
693 Some platforms support programmable clock generators. These
694 can be used by external chips of various kinds, such as other
695 CPUs, multimedia codecs, and devices with strict requirements
696 for interface clocking.
697 </para>
698
699!Iinclude/linux/clk.h
700 </chapter>
701
648</book> 702</book>
diff --git a/Documentation/DocBook/rapidio.tmpl b/Documentation/DocBook/rapidio.tmpl
index b9e143e28c64..54eb26b57372 100644
--- a/Documentation/DocBook/rapidio.tmpl
+++ b/Documentation/DocBook/rapidio.tmpl
@@ -133,7 +133,6 @@
133!Idrivers/rapidio/rio-sysfs.c 133!Idrivers/rapidio/rio-sysfs.c
134 </sect1> 134 </sect1>
135 <sect1 id="PPC32_support"><title>PPC32 support</title> 135 <sect1 id="PPC32_support"><title>PPC32 support</title>
136!Iarch/powerpc/kernel/rio.c
137!Earch/powerpc/sysdev/fsl_rio.c 136!Earch/powerpc/sysdev/fsl_rio.c
138!Iarch/powerpc/sysdev/fsl_rio.c 137!Iarch/powerpc/sysdev/fsl_rio.c
139 </sect1> 138 </sect1>
diff --git a/Documentation/braille-console.txt b/Documentation/braille-console.txt
new file mode 100644
index 000000000000..000b0fbdc105
--- /dev/null
+++ b/Documentation/braille-console.txt
@@ -0,0 +1,34 @@
1 Linux Braille Console
2
3To get early boot messages on a braille device (before userspace screen
4readers can start), you first need to compile the support for the usual serial
5console (see serial-console.txt), and for braille device (in Device Drivers -
6Accessibility).
7
8Then you need to specify a console=brl, option on the kernel command line, the
9format is:
10
11 console=brl,serial_options...
12
13where serial_options... are the same as described in serial-console.txt
14
15So for instance you can use console=brl,ttyS0 if the braille device is connected
16to the first serial port, and console=brl,ttyS0,115200 to override the baud rate
17to 115200, etc.
18
19By default, the braille device will just show the last kernel message (console
20mode). To review previous messages, press the Insert key to switch to the VT
21review mode. In review mode, the arrow keys permit to browse in the VT content,
22page up/down keys go at the top/bottom of the screen, and the home key goes back
23to the cursor, hence providing very basic screen reviewing facility.
24
25Sound feedback can be obtained by adding the braille_console.sound=1 kernel
26parameter.
27
28For simplicity, only one braille console can be enabled, other uses of
29console=brl,... will be discarded. Also note that it does not interfere with
30the console selection mecanism described in serial-console.txt
31
32For now, only the VisioBraille device is supported.
33
34Samuel Thibault <samuel.thibault@ens-lyon.org>
diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt
index 31d12e21ff8a..c298a6690e0d 100644
--- a/Documentation/cgroups.txt
+++ b/Documentation/cgroups.txt
@@ -500,8 +500,7 @@ post-attachment activity that requires memory allocations or blocking.
500 500
501void fork(struct cgroup_subsy *ss, struct task_struct *task) 501void fork(struct cgroup_subsy *ss, struct task_struct *task)
502 502
503Called when a task is forked into a cgroup. Also called during 503Called when a task is forked into a cgroup.
504registration for all existing tasks.
505 504
506void exit(struct cgroup_subsys *ss, struct task_struct *task) 505void exit(struct cgroup_subsys *ss, struct task_struct *task)
507 506
diff --git a/Documentation/controllers/devices.txt b/Documentation/controllers/devices.txt
new file mode 100644
index 000000000000..4dcea42432c2
--- /dev/null
+++ b/Documentation/controllers/devices.txt
@@ -0,0 +1,48 @@
1Device Whitelist Controller
2
31. Description:
4
5Implement a cgroup to track and enforce open and mknod restrictions
6on device files. A device cgroup associates a device access
7whitelist with each cgroup. A whitelist entry has 4 fields.
8'type' is a (all), c (char), or b (block). 'all' means it applies
9to all types and all major and minor numbers. Major and minor are
10either an integer or * for all. Access is a composition of r
11(read), w (write), and m (mknod).
12
13The root device cgroup starts with rwm to 'all'. A child device
14cgroup gets a copy of the parent. Administrators can then remove
15devices from the whitelist or add new entries. A child cgroup can
16never receive a device access which is denied its parent. However
17when a device access is removed from a parent it will not also be
18removed from the child(ren).
19
202. User Interface
21
22An entry is added using devices.allow, and removed using
23devices.deny. For instance
24
25 echo 'c 1:3 mr' > /cgroups/1/devices.allow
26
27allows cgroup 1 to read and mknod the device usually known as
28/dev/null. Doing
29
30 echo a > /cgroups/1/devices.deny
31
32will remove the default 'a *:* mrw' entry.
33
343. Security
35
36Any task can move itself between cgroups. This clearly won't
37suffice, but we can decide the best way to adequately restrict
38movement as people get some experience with this. We may just want
39to require CAP_SYS_ADMIN, which at least is a separate bit from
40CAP_MKNOD. We may want to just refuse moving to a cgroup which
41isn't a descendent of the current one. Or we may want to use
42CAP_MAC_ADMIN, since we really are trying to lock down root.
43
44CAP_SYS_ADMIN is needed to modify the whitelist or move another
45task to a new cgroup. (Again we'll probably want to change that).
46
47A cgroup may not be granted more permissions than the cgroup's
48parent has.
diff --git a/Documentation/controllers/resource_counter.txt b/Documentation/controllers/resource_counter.txt
new file mode 100644
index 000000000000..f196ac1d7d25
--- /dev/null
+++ b/Documentation/controllers/resource_counter.txt
@@ -0,0 +1,181 @@
1
2 The Resource Counter
3
4The resource counter, declared at include/linux/res_counter.h,
5is supposed to facilitate the resource management by controllers
6by providing common stuff for accounting.
7
8This "stuff" includes the res_counter structure and routines
9to work with it.
10
11
12
131. Crucial parts of the res_counter structure
14
15 a. unsigned long long usage
16
17 The usage value shows the amount of a resource that is consumed
18 by a group at a given time. The units of measurement should be
19 determined by the controller that uses this counter. E.g. it can
20 be bytes, items or any other unit the controller operates on.
21
22 b. unsigned long long max_usage
23
24 The maximal value of the usage over time.
25
26 This value is useful when gathering statistical information about
27 the particular group, as it shows the actual resource requirements
28 for a particular group, not just some usage snapshot.
29
30 c. unsigned long long limit
31
32 The maximal allowed amount of resource to consume by the group. In
33 case the group requests for more resources, so that the usage value
34 would exceed the limit, the resource allocation is rejected (see
35 the next section).
36
37 d. unsigned long long failcnt
38
39 The failcnt stands for "failures counter". This is the number of
40 resource allocation attempts that failed.
41
42 c. spinlock_t lock
43
44 Protects changes of the above values.
45
46
47
482. Basic accounting routines
49
50 a. void res_counter_init(struct res_counter *rc)
51
52 Initializes the resource counter. As usual, should be the first
53 routine called for a new counter.
54
55 b. int res_counter_charge[_locked]
56 (struct res_counter *rc, unsigned long val)
57
58 When a resource is about to be allocated it has to be accounted
59 with the appropriate resource counter (controller should determine
60 which one to use on its own). This operation is called "charging".
61
62 This is not very important which operation - resource allocation
63 or charging - is performed first, but
64 * if the allocation is performed first, this may create a
65 temporary resource over-usage by the time resource counter is
66 charged;
67 * if the charging is performed first, then it should be uncharged
68 on error path (if the one is called).
69
70 c. void res_counter_uncharge[_locked]
71 (struct res_counter *rc, unsigned long val)
72
73 When a resource is released (freed) it should be de-accounted
74 from the resource counter it was accounted to. This is called
75 "uncharging".
76
77 The _locked routines imply that the res_counter->lock is taken.
78
79
80 2.1 Other accounting routines
81
82 There are more routines that may help you with common needs, like
83 checking whether the limit is reached or resetting the max_usage
84 value. They are all declared in include/linux/res_counter.h.
85
86
87
883. Analyzing the resource counter registrations
89
90 a. If the failcnt value constantly grows, this means that the counter's
91 limit is too tight. Either the group is misbehaving and consumes too
92 many resources, or the configuration is not suitable for the group
93 and the limit should be increased.
94
95 b. The max_usage value can be used to quickly tune the group. One may
96 set the limits to maximal values and either load the container with
97 a common pattern or leave one for a while. After this the max_usage
98 value shows the amount of memory the container would require during
99 its common activity.
100
101 Setting the limit a bit above this value gives a pretty good
102 configuration that works in most of the cases.
103
104 c. If the max_usage is much less than the limit, but the failcnt value
105 is growing, then the group tries to allocate a big chunk of resource
106 at once.
107
108 d. If the max_usage is much less than the limit, but the failcnt value
109 is 0, then this group is given too high limit, that it does not
110 require. It is better to lower the limit a bit leaving more resource
111 for other groups.
112
113
114
1154. Communication with the control groups subsystem (cgroups)
116
117All the resource controllers that are using cgroups and resource counters
118should provide files (in the cgroup filesystem) to work with the resource
119counter fields. They are recommended to adhere to the following rules:
120
121 a. File names
122
123 Field name File name
124 ---------------------------------------------------
125 usage usage_in_<unit_of_measurement>
126 max_usage max_usage_in_<unit_of_measurement>
127 limit limit_in_<unit_of_measurement>
128 failcnt failcnt
129 lock no file :)
130
131 b. Reading from file should show the corresponding field value in the
132 appropriate format.
133
134 c. Writing to file
135
136 Field Expected behavior
137 ----------------------------------
138 usage prohibited
139 max_usage reset to usage
140 limit set the limit
141 failcnt reset to zero
142
143
144
1455. Usage example
146
147 a. Declare a task group (take a look at cgroups subsystem for this) and
148 fold a res_counter into it
149
150 struct my_group {
151 struct res_counter res;
152
153 <other fields>
154 }
155
156 b. Put hooks in resource allocation/release paths
157
158 int alloc_something(...)
159 {
160 if (res_counter_charge(res_counter_ptr, amount) < 0)
161 return -ENOMEM;
162
163 <allocate the resource and return to the caller>
164 }
165
166 void release_something(...)
167 {
168 res_counter_uncharge(res_counter_ptr, amount);
169
170 <release the resource>
171 }
172
173 In order to keep the usage value self-consistent, both the
174 "res_counter_ptr" and the "amount" in release_something() should be
175 the same as they were in the alloc_something() when the releasing
176 resource was allocated.
177
178 c. Provide the way to read res_counter values and set them (the cgroups
179 still can help with it).
180
181 c. Compile and run :)
diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt
index af3b925ece08..6c442d8426b5 100644
--- a/Documentation/cpu-freq/user-guide.txt
+++ b/Documentation/cpu-freq/user-guide.txt
@@ -154,6 +154,11 @@ scaling_governor, and by "echoing" the name of another
154 that some governors won't load - they only 154 that some governors won't load - they only
155 work on some specific architectures or 155 work on some specific architectures or
156 processors. 156 processors.
157
158cpuinfo_cur_freq : Current speed of the CPU, in KHz.
159
160scaling_available_frequencies : List of available frequencies, in KHz.
161
157scaling_min_freq and 162scaling_min_freq and
158scaling_max_freq show the current "policy limits" (in 163scaling_max_freq show the current "policy limits" (in
159 kHz). By echoing new values into these 164 kHz). By echoing new values into these
@@ -162,6 +167,15 @@ scaling_max_freq show the current "policy limits" (in
162 first set scaling_max_freq, then 167 first set scaling_max_freq, then
163 scaling_min_freq. 168 scaling_min_freq.
164 169
170affected_cpus : List of CPUs that require software coordination
171 of frequency.
172
173related_cpus : List of CPUs that need some sort of frequency
174 coordination, whether software or hardware.
175
176scaling_driver : Hardware driver for cpufreq.
177
178scaling_cur_freq : Current frequency of the CPU, in KHz.
165 179
166If you have selected the "userspace" governor which allows you to 180If you have selected the "userspace" governor which allows you to
167set the CPU operating frequency to a specific value, you can read out 181set the CPU operating frequency to a specific value, you can read out
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index aa854b9b18cd..fb7b361e6eea 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -171,6 +171,7 @@ files describing that cpuset:
171 - memory_migrate flag: if set, move pages to cpusets nodes 171 - memory_migrate flag: if set, move pages to cpusets nodes
172 - cpu_exclusive flag: is cpu placement exclusive? 172 - cpu_exclusive flag: is cpu placement exclusive?
173 - mem_exclusive flag: is memory placement exclusive? 173 - mem_exclusive flag: is memory placement exclusive?
174 - mem_hardwall flag: is memory allocation hardwalled
174 - memory_pressure: measure of how much paging pressure in cpuset 175 - memory_pressure: measure of how much paging pressure in cpuset
175 176
176In addition, the root cpuset only has the following file: 177In addition, the root cpuset only has the following file:
@@ -222,17 +223,18 @@ If a cpuset is cpu or mem exclusive, no other cpuset, other than
222a direct ancestor or descendent, may share any of the same CPUs or 223a direct ancestor or descendent, may share any of the same CPUs or
223Memory Nodes. 224Memory Nodes.
224 225
225A cpuset that is mem_exclusive restricts kernel allocations for 226A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled",
226page, buffer and other data commonly shared by the kernel across 227i.e. it restricts kernel allocations for page, buffer and other data
227multiple users. All cpusets, whether mem_exclusive or not, restrict 228commonly shared by the kernel across multiple users. All cpusets,
228allocations of memory for user space. This enables configuring a 229whether hardwalled or not, restrict allocations of memory for user
229system so that several independent jobs can share common kernel data, 230space. This enables configuring a system so that several independent
230such as file system pages, while isolating each jobs user allocation in 231jobs can share common kernel data, such as file system pages, while
231its own cpuset. To do this, construct a large mem_exclusive cpuset to 232isolating each job's user allocation in its own cpuset. To do this,
232hold all the jobs, and construct child, non-mem_exclusive cpusets for 233construct a large mem_exclusive cpuset to hold all the jobs, and
233each individual job. Only a small amount of typical kernel memory, 234construct child, non-mem_exclusive cpusets for each individual job.
234such as requests from interrupt handlers, is allowed to be taken 235Only a small amount of typical kernel memory, such as requests from
235outside even a mem_exclusive cpuset. 236interrupt handlers, is allowed to be taken outside even a
237mem_exclusive cpuset.
236 238
237 239
2381.5 What is memory_pressure ? 2401.5 What is memory_pressure ?
@@ -707,7 +709,7 @@ Now you want to do something with this cpuset.
707 709
708In this directory you can find several files: 710In this directory you can find several files:
709# ls 711# ls
710cpus cpu_exclusive mems mem_exclusive tasks 712cpus cpu_exclusive mems mem_exclusive mem_hardwall tasks
711 713
712Reading them will give you information about the state of this cpuset: 714Reading them will give you information about the state of this cpuset:
713the CPUs and Memory Nodes it can use, the processes that are using 715the CPUs and Memory Nodes it can use, the processes that are using
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 354aec047c0e..881e6dd03aea 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -141,6 +141,7 @@ mkprep
141mktables 141mktables
142mktree 142mktree
143modpost 143modpost
144modules.order
144modversions.h* 145modversions.h*
145offset.h 146offset.h
146offsets.h 147offsets.h
@@ -171,6 +172,7 @@ sm_tbl*
171split-include 172split-include
172tags 173tags
173tftpboot.img 174tftpboot.img
175timeconst.h
174times.h* 176times.h*
175tkparse 177tkparse
176trix_boot.h 178trix_boot.h
diff --git a/Documentation/fb/gxfb.txt b/Documentation/fb/gxfb.txt
new file mode 100644
index 000000000000..2f640903bbb2
--- /dev/null
+++ b/Documentation/fb/gxfb.txt
@@ -0,0 +1,52 @@
1[This file is cloned from VesaFB/aty128fb]
2
3What is gxfb?
4=================
5
6This is a graphics framebuffer driver for AMD Geode GX2 based processors.
7
8Advantages:
9
10 * No need to use AMD's VSA code (or other VESA emulation layer) in the
11 BIOS.
12 * It provides a nice large console (128 cols + 48 lines with 1024x768)
13 without using tiny, unreadable fonts.
14 * You can run XF68_FBDev on top of /dev/fb0
15 * Most important: boot logo :-)
16
17Disadvantages:
18
19 * graphic mode is slower than text mode...
20
21
22How to use it?
23==============
24
25Switching modes is done using gxfb.mode_option=<resolution>... boot
26parameter or using `fbset' program.
27
28See Documentation/fb/modedb.txt for more information on modedb
29resolutions.
30
31
32X11
33===
34
35XF68_FBDev should generally work fine, but it is non-accelerated.
36
37
38Configuration
39=============
40
41You can pass kernel command line options to gxfb with gxfb.<option>.
42For example, gxfb.mode_option=800x600@75.
43Accepted options:
44
45mode_option - specify the video mode. Of the form
46 <x>x<y>[-<bpp>][@<refresh>]
47vram - size of video ram (normally auto-detected)
48vt_switch - enable vt switching during suspend/resume. The vt
49 switch is slow, but harmless.
50
51--
52Andres Salomon <dilinger@debian.org>
diff --git a/Documentation/fb/intelfb.txt b/Documentation/fb/intelfb.txt
index da5ee74219e8..27a3160650a4 100644
--- a/Documentation/fb/intelfb.txt
+++ b/Documentation/fb/intelfb.txt
@@ -14,6 +14,8 @@ graphics devices. These would include:
14 Intel 915GM 14 Intel 915GM
15 Intel 945G 15 Intel 945G
16 Intel 945GM 16 Intel 945GM
17 Intel 965G
18 Intel 965GM
17 19
18B. List of available options 20B. List of available options
19 21
diff --git a/Documentation/fb/lxfb.txt b/Documentation/fb/lxfb.txt
new file mode 100644
index 000000000000..38b3ca6f6ca7
--- /dev/null
+++ b/Documentation/fb/lxfb.txt
@@ -0,0 +1,52 @@
1[This file is cloned from VesaFB/aty128fb]
2
3What is lxfb?
4=================
5
6This is a graphics framebuffer driver for AMD Geode LX based processors.
7
8Advantages:
9
10 * No need to use AMD's VSA code (or other VESA emulation layer) in the
11 BIOS.
12 * It provides a nice large console (128 cols + 48 lines with 1024x768)
13 without using tiny, unreadable fonts.
14 * You can run XF68_FBDev on top of /dev/fb0
15 * Most important: boot logo :-)
16
17Disadvantages:
18
19 * graphic mode is slower than text mode...
20
21
22How to use it?
23==============
24
25Switching modes is done using lxfb.mode_option=<resolution>... boot
26parameter or using `fbset' program.
27
28See Documentation/fb/modedb.txt for more information on modedb
29resolutions.
30
31
32X11
33===
34
35XF68_FBDev should generally work fine, but it is non-accelerated.
36
37
38Configuration
39=============
40
41You can pass kernel command line options to lxfb with lxfb.<option>.
42For example, lxfb.mode_option=800x600@75.
43Accepted options:
44
45mode_option - specify the video mode. Of the form
46 <x>x<y>[-<bpp>][@<refresh>]
47vram - size of video ram (normally auto-detected)
48vt_switch - enable vt switching during suspend/resume. The vt
49 switch is slow, but harmless.
50
51--
52Andres Salomon <dilinger@debian.org>
diff --git a/Documentation/fb/metronomefb.txt b/Documentation/fb/metronomefb.txt
index b9a2e7b7e838..237ca412582d 100644
--- a/Documentation/fb/metronomefb.txt
+++ b/Documentation/fb/metronomefb.txt
@@ -1,7 +1,7 @@
1 Metronomefb 1 Metronomefb
2 ----------- 2 -----------
3Maintained by Jaya Kumar <jayakumar.lkml.gmail.com> 3Maintained by Jaya Kumar <jayakumar.lkml.gmail.com>
4Last revised: Nov 20, 2007 4Last revised: Mar 10, 2008
5 5
6Metronomefb is a driver for the Metronome display controller. The controller 6Metronomefb is a driver for the Metronome display controller. The controller
7is from E-Ink Corporation. It is intended to be used to drive the E-Ink 7is from E-Ink Corporation. It is intended to be used to drive the E-Ink
@@ -11,20 +11,18 @@ display media here http://www.e-ink.com/products/matrix/metronome.html .
11Metronome is interfaced to the host CPU through the AMLCD interface. The 11Metronome is interfaced to the host CPU through the AMLCD interface. The
12host CPU generates the control information and the image in a framebuffer 12host CPU generates the control information and the image in a framebuffer
13which is then delivered to the AMLCD interface by a host specific method. 13which is then delivered to the AMLCD interface by a host specific method.
14Currently, that's implemented for the PXA's LCDC controller. The display and 14The display and error status are each pulled through individual GPIOs.
15error status are each pulled through individual GPIOs.
16 15
17Metronomefb was written for the PXA255/gumstix/lyre combination and 16Metronomefb is platform independent and depends on a board specific driver
18therefore currently has board set specific code in it. If other boards based on 17to do all physical IO work. Currently, an example is implemented for the
19other architectures are available, then the host specific code can be separated 18PXA board used in the AM-200 EPD devkit. This example is am200epd.c
20and abstracted out.
21 19
22Metronomefb requires waveform information which is delivered via the AMLCD 20Metronomefb requires waveform information which is delivered via the AMLCD
23interface to the metronome controller. The waveform information is expected to 21interface to the metronome controller. The waveform information is expected to
24be delivered from userspace via the firmware class interface. The waveform file 22be delivered from userspace via the firmware class interface. The waveform file
25can be compressed as long as your udev or hotplug script is aware of the need 23can be compressed as long as your udev or hotplug script is aware of the need
26to uncompress it before delivering it. metronomefb will ask for waveform.wbf 24to uncompress it before delivering it. metronomefb will ask for metronome.wbf
27which would typically go into /lib/firmware/waveform.wbf depending on your 25which would typically go into /lib/firmware/metronome.wbf depending on your
28udev/hotplug setup. I have only tested with a single waveform file which was 26udev/hotplug setup. I have only tested with a single waveform file which was
29originally labeled 23P01201_60_WT0107_MTC. I do not know what it stands for. 27originally labeled 23P01201_60_WT0107_MTC. I do not know what it stands for.
30Caution should be exercised when manipulating the waveform as there may be 28Caution should be exercised when manipulating the waveform as there may be
diff --git a/Documentation/fb/modedb.txt b/Documentation/fb/modedb.txt
index 4fcdb4cf4cca..ec4dee75a354 100644
--- a/Documentation/fb/modedb.txt
+++ b/Documentation/fb/modedb.txt
@@ -125,8 +125,12 @@ There may be more modes.
125 amifb - Amiga chipset frame buffer 125 amifb - Amiga chipset frame buffer
126 aty128fb - ATI Rage128 / Pro frame buffer 126 aty128fb - ATI Rage128 / Pro frame buffer
127 atyfb - ATI Mach64 frame buffer 127 atyfb - ATI Mach64 frame buffer
128 pm2fb - Permedia 2/2V frame buffer
129 pm3fb - Permedia 3 frame buffer
130 sstfb - Voodoo 1/2 (SST1) chipset frame buffer
128 tdfxfb - 3D Fx frame buffer 131 tdfxfb - 3D Fx frame buffer
129 tridentfb - Trident (Cyber)blade chipset frame buffer 132 tridentfb - Trident (Cyber)blade chipset frame buffer
133 vt8623fb - VIA 8623 frame buffer
130 134
131BTW, only a few drivers use this at the moment. Others are to follow 135BTW, only a few drivers use this at the moment. Others are to follow
132(feel free to send patches). 136(feel free to send patches).
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 448729fcaeb1..3c35d452b1a9 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -128,15 +128,6 @@ Who: Arjan van de Ven <arjan@linux.intel.com>
128 128
129--------------------------- 129---------------------------
130 130
131What: vm_ops.nopage
132When: Soon, provided in-kernel callers have been converted
133Why: This interface is replaced by vm_ops.fault, but it has been around
134 forever, is used by a lot of drivers, and doesn't cost much to
135 maintain.
136Who: Nick Piggin <npiggin@suse.de>
137
138---------------------------
139
140What: PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment 131What: PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment
141When: October 2008 132When: October 2008
142Why: The stacking of class devices makes these values misleading and 133Why: The stacking of class devices makes these values misleading and
@@ -147,6 +138,24 @@ Who: Kay Sievers <kay.sievers@suse.de>
147 138
148--------------------------- 139---------------------------
149 140
141What: find_task_by_pid
142When: 2.6.26
143Why: With pid namespaces, calling this funciton will return the
144 wrong task when called from inside a namespace.
145
146 The best way to save a task pid and find a task by this
147 pid later, is to find this task's struct pid pointer (or get
148 it directly from the task) and call pid_task() later.
149
150 If someone really needs to get a task by its pid_t, then
151 he most likely needs the find_task_by_vpid() to get the
152 task from the same namespace as the current task is in, but
153 this may be not so in general.
154
155Who: Pavel Emelyanov <xemul@openvz.org>
156
157---------------------------
158
150What: ACPI procfs interface 159What: ACPI procfs interface
151When: July 2008 160When: July 2008
152Why: ACPI sysfs conversion should be finished by January 2008. 161Why: ACPI sysfs conversion should be finished by January 2008.
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 42d4b30b1045..c2992bc54f2f 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -511,7 +511,6 @@ prototypes:
511 void (*open)(struct vm_area_struct*); 511 void (*open)(struct vm_area_struct*);
512 void (*close)(struct vm_area_struct*); 512 void (*close)(struct vm_area_struct*);
513 int (*fault)(struct vm_area_struct*, struct vm_fault *); 513 int (*fault)(struct vm_area_struct*, struct vm_fault *);
514 struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *);
515 int (*page_mkwrite)(struct vm_area_struct *, struct page *); 514 int (*page_mkwrite)(struct vm_area_struct *, struct page *);
516 515
517locking rules: 516locking rules:
@@ -519,7 +518,6 @@ locking rules:
519open: no yes 518open: no yes
520close: no yes 519close: no yes
521fault: no yes 520fault: no yes
522nopage: no yes
523page_mkwrite: no yes no 521page_mkwrite: no yes no
524 522
525 ->page_mkwrite() is called when a previously read-only page is 523 ->page_mkwrite() is called when a previously read-only page is
@@ -537,4 +535,3 @@ NULL.
537 535
538ipc/shm.c::shm_delete() - may need BKL. 536ipc/shm.c::shm_delete() - may need BKL.
539->read() and ->write() in many drivers are (probably) missing BKL. 537->read() and ->write() in many drivers are (probably) missing BKL.
540drivers/sgi/char/graphics.c::sgi_graphics_nopage() - may need BKL.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 2a99116edc47..dbc3c6a3650f 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -463,11 +463,17 @@ SwapTotal: 0 kB
463SwapFree: 0 kB 463SwapFree: 0 kB
464Dirty: 968 kB 464Dirty: 968 kB
465Writeback: 0 kB 465Writeback: 0 kB
466AnonPages: 861800 kB
466Mapped: 280372 kB 467Mapped: 280372 kB
467Slab: 684068 kB 468Slab: 284364 kB
469SReclaimable: 159856 kB
470SUnreclaim: 124508 kB
471PageTables: 24448 kB
472NFS_Unstable: 0 kB
473Bounce: 0 kB
474WritebackTmp: 0 kB
468CommitLimit: 7669796 kB 475CommitLimit: 7669796 kB
469Committed_AS: 100056 kB 476Committed_AS: 100056 kB
470PageTables: 24448 kB
471VmallocTotal: 112216 kB 477VmallocTotal: 112216 kB
472VmallocUsed: 428 kB 478VmallocUsed: 428 kB
473VmallocChunk: 111088 kB 479VmallocChunk: 111088 kB
@@ -503,8 +509,17 @@ VmallocChunk: 111088 kB
503 on the disk 509 on the disk
504 Dirty: Memory which is waiting to get written back to the disk 510 Dirty: Memory which is waiting to get written back to the disk
505 Writeback: Memory which is actively being written back to the disk 511 Writeback: Memory which is actively being written back to the disk
512 AnonPages: Non-file backed pages mapped into userspace page tables
506 Mapped: files which have been mmaped, such as libraries 513 Mapped: files which have been mmaped, such as libraries
507 Slab: in-kernel data structures cache 514 Slab: in-kernel data structures cache
515SReclaimable: Part of Slab, that might be reclaimed, such as caches
516 SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure
517 PageTables: amount of memory dedicated to the lowest level of page
518 tables.
519NFS_Unstable: NFS pages sent to the server, but not yet committed to stable
520 storage
521 Bounce: Memory used for block device "bounce buffers"
522WritebackTmp: Memory used by FUSE for temporary writeback buffers
508 CommitLimit: Based on the overcommit ratio ('vm.overcommit_ratio'), 523 CommitLimit: Based on the overcommit ratio ('vm.overcommit_ratio'),
509 this is the total amount of memory currently available to 524 this is the total amount of memory currently available to
510 be allocated on the system. This limit is only adhered to 525 be allocated on the system. This limit is only adhered to
@@ -531,8 +546,6 @@ Committed_AS: The amount of memory presently allocated on the system.
531 above) will not be permitted. This is useful if one needs 546 above) will not be permitted. This is useful if one needs
532 to guarantee that processes will not fail due to lack of 547 to guarantee that processes will not fail due to lack of
533 memory once that memory has been successfully allocated. 548 memory once that memory has been successfully allocated.
534 PageTables: amount of memory dedicated to the lowest level of page
535 tables.
536VmallocTotal: total size of vmalloc memory area 549VmallocTotal: total size of vmalloc memory area
537 VmallocUsed: amount of vmalloc area which is used 550 VmallocUsed: amount of vmalloc area which is used
538VmallocChunk: largest contigious block of vmalloc area which is free 551VmallocChunk: largest contigious block of vmalloc area which is free
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index 145e44086358..222437efd75a 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -92,6 +92,18 @@ NodeList format is a comma-separated list of decimal numbers and ranges,
92a range being two hyphen-separated decimal numbers, the smallest and 92a range being two hyphen-separated decimal numbers, the smallest and
93largest node numbers in the range. For example, mpol=bind:0-3,5,7,9-15 93largest node numbers in the range. For example, mpol=bind:0-3,5,7,9-15
94 94
95NUMA memory allocation policies have optional flags that can be used in
96conjunction with their modes. These optional flags can be specified
97when tmpfs is mounted by appending them to the mode before the NodeList.
98See Documentation/vm/numa_memory_policy.txt for a list of all available
99memory allocation policy mode flags.
100
101 =static is equivalent to MPOL_F_STATIC_NODES
102 =relative is equivalent to MPOL_F_RELATIVE_NODES
103
104For example, mpol=bind=static:NodeList, is the equivalent of an
105allocation policy of MPOL_BIND | MPOL_F_STATIC_NODES.
106
95Note that trying to mount a tmpfs with an mpol option will fail if the 107Note that trying to mount a tmpfs with an mpol option will fail if the
96running kernel does not support NUMA; and will fail if its nodelist 108running kernel does not support NUMA; and will fail if its nodelist
97specifies a node which is not online. If your system relies on that 109specifies a node which is not online. If your system relies on that
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index fcc123ffa252..2d5e1e582e13 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -17,6 +17,21 @@ dmask=### -- The permission mask for the directory.
17fmask=### -- The permission mask for files. 17fmask=### -- The permission mask for files.
18 The default is the umask of current process. 18 The default is the umask of current process.
19 19
20allow_utime=### -- This option controls the permission check of mtime/atime.
21
22 20 - If current process is in group of file's group ID,
23 you can change timestamp.
24 2 - Other users can change timestamp.
25
26 The default is set from `dmask' option. (If the directory is
27 writable, utime(2) is also allowed. I.e. ~dmask & 022)
28
29 Normally utime(2) checks current process is owner of
30 the file, or it has CAP_FOWNER capability. But FAT
31 filesystem doesn't have uid/gid on disk, so normal
32 check is too unflexible. With this option you can
33 relax it.
34
20codepage=### -- Sets the codepage number for converting to shortname 35codepage=### -- Sets the codepage number for converting to shortname
21 characters on FAT filesystem. 36 characters on FAT filesystem.
22 By default, FAT_DEFAULT_CODEPAGE setting is used. 37 By default, FAT_DEFAULT_CODEPAGE setting is used.
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index 54630095aa3c..c35ca9e40d4c 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -107,6 +107,16 @@ type of GPIO controller, and on one particular board 80-95 with an FPGA.
107The numbers need not be contiguous; either of those platforms could also 107The numbers need not be contiguous; either of those platforms could also
108use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders. 108use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders.
109 109
110If you want to initialize a structure with an invalid GPIO number, use
111some negative number (perhaps "-EINVAL"); that will never be valid. To
112test if a number could reference a GPIO, you may use this predicate:
113
114 int gpio_is_valid(int number);
115
116A number that's not valid will be rejected by calls which may request
117or free GPIOs (see below). Other numbers may also be rejected; for
118example, a number might be valid but unused on a given board.
119
110Whether a platform supports multiple GPIO controllers is currently a 120Whether a platform supports multiple GPIO controllers is currently a
111platform-specific implementation issue. 121platform-specific implementation issue.
112 122
diff --git a/Documentation/hwmon/w83l785ts b/Documentation/hwmon/w83l785ts
index 1841cedc25b2..bd1fa9d4468d 100644
--- a/Documentation/hwmon/w83l785ts
+++ b/Documentation/hwmon/w83l785ts
@@ -33,7 +33,8 @@ Known Issues
33------------ 33------------
34 34
35On some systems (Asus), the BIOS is known to interfere with the driver 35On some systems (Asus), the BIOS is known to interfere with the driver
36and cause read errors. The driver will retry a given number of times 36and cause read errors. Or maybe the W83L785TS-S chip is simply unreliable,
37we don't really know. The driver will retry a given number of times
37(5 by default) and then give up, returning the old value (or 0 if 38(5 by default) and then give up, returning the old value (or 0 if
38there is no old value). It seems to work well enough so that you should 39there is no old value). It seems to work well enough so that you should
39not notice anything. Thanks to James Bolt for helping test this feature. 40not notice anything. Thanks to James Bolt for helping test this feature.
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index bfb0a5520817..ee75cbace28d 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -164,7 +164,8 @@ I2C device drivers using this binding model work just like any other
164kind of driver in Linux: they provide a probe() method to bind to 164kind of driver in Linux: they provide a probe() method to bind to
165those devices, and a remove() method to unbind. 165those devices, and a remove() method to unbind.
166 166
167 static int foo_probe(struct i2c_client *client); 167 static int foo_probe(struct i2c_client *client,
168 const struct i2c_device_id *id);
168 static int foo_remove(struct i2c_client *client); 169 static int foo_remove(struct i2c_client *client);
169 170
170Remember that the i2c_driver does not create those client handles. The 171Remember that the i2c_driver does not create those client handles. The
diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt
index 0fac3465f2e3..95ad15c3b01f 100644
--- a/Documentation/i386/boot.txt
+++ b/Documentation/i386/boot.txt
@@ -40,9 +40,17 @@ Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable.
40 Introduce relocatable_kernel and kernel_alignment fields. 40 Introduce relocatable_kernel and kernel_alignment fields.
41 41
42Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of 42Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of
43 the boot command line 43 the boot command line.
44 44
45Protocol 2.09: (kernel 2.6.26) Added a field of 64-bit physical 45Protocol 2.07: (Kernel 2.6.24) Added paravirtualised boot protocol.
46 Introduced hardware_subarch and hardware_subarch_data
47 and KEEP_SEGMENTS flag in load_flags.
48
49Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format
50 payload. Introduced payload_offset and payload length
51 fields to aid in locating the payload.
52
53Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical
46 pointer to single linked list of struct setup_data. 54 pointer to single linked list of struct setup_data.
47 55
48**** MEMORY LAYOUT 56**** MEMORY LAYOUT
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index 649cb8799890..00b950d1c193 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -104,14 +104,15 @@ applicable everywhere (see syntax).
104 Reverse dependencies can only be used with boolean or tristate 104 Reverse dependencies can only be used with boolean or tristate
105 symbols. 105 symbols.
106 Note: 106 Note:
107 select is evil.... select will by brute force set a symbol 107 select should be used with care. select will force
108 equal to 'y' without visiting the dependencies. So abusing 108 a symbol to a value without visiting the dependencies.
109 select you are able to select a symbol FOO even if FOO depends 109 By abusing select you are able to select a symbol FOO even
110 on BAR that is not set. In general use select only for 110 if FOO depends on BAR that is not set.
111 non-visible symbols (no prompts anywhere) and for symbols with 111 In general use select only for non-visible symbols
112 no dependencies. That will limit the usefulness but on the 112 (no prompts anywhere) and for symbols with no dependencies.
113 other hand avoid the illegal configurations all over. kconfig 113 That will limit the usefulness but on the other hand avoid
114 should one day warn about such things. 114 the illegal configurations all over.
115 kconfig should one day warn about such things.
115 116
116- numerical ranges: "range" <symbol> <symbol> ["if" <expr>] 117- numerical ranges: "range" <symbol> <symbol> ["if" <expr>]
117 This allows to limit the range of possible input values for int 118 This allows to limit the range of possible input values for int
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index d0ac72cc19ff..b8e52c0355d3 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -245,6 +245,8 @@ The syntax is:
245 crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset] 245 crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
246 range=start-[end] 246 range=start-[end]
247 247
248 'start' is inclusive and 'end' is exclusive.
249
248For example: 250For example:
249 251
250 crashkernel=512M-2G:64M,2G-:128M 252 crashkernel=512M-2G:64M,2G-:128M
@@ -253,10 +255,11 @@ This would mean:
253 255
254 1) if the RAM is smaller than 512M, then don't reserve anything 256 1) if the RAM is smaller than 512M, then don't reserve anything
255 (this is the "rescue" case) 257 (this is the "rescue" case)
256 2) if the RAM size is between 512M and 2G, then reserve 64M 258 2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M
257 3) if the RAM size is larger than 2G, then reserve 128M 259 3) if the RAM size is larger than 2G, then reserve 128M
258 260
259 261
262
260Boot into System Kernel 263Boot into System Kernel
261======================= 264=======================
262 265
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e5f3d918316f..a3c35446e755 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -496,6 +496,11 @@ and is between 256 and 4096 characters. It is defined in the file
496 switching to the matching ttyS device later. The 496 switching to the matching ttyS device later. The
497 options are the same as for ttyS, above. 497 options are the same as for ttyS, above.
498 498
499 If the device connected to the port is not a TTY but a braille
500 device, prepend "brl," before the device type, for instance
501 console=brl,ttyS0
502 For now, only VisioBraille is supported.
503
499 earlycon= [KNL] Output early console device and options. 504 earlycon= [KNL] Output early console device and options.
500 uart[8250],io,<addr>[,options] 505 uart[8250],io,<addr>[,options]
501 uart[8250],mmio,<addr>[,options] 506 uart[8250],mmio,<addr>[,options]
@@ -556,6 +561,8 @@ and is between 256 and 4096 characters. It is defined in the file
556 1 will print _a lot_ more information - normally 561 1 will print _a lot_ more information - normally
557 only useful to kernel developers. 562 only useful to kernel developers.
558 563
564 debug_objects [KNL] Enable object debugging
565
559 decnet.addr= [HW,NET] 566 decnet.addr= [HW,NET]
560 Format: <area>[,<node>] 567 Format: <area>[,<node>]
561 See also Documentation/networking/decnet.txt. 568 See also Documentation/networking/decnet.txt.
@@ -627,8 +634,7 @@ and is between 256 and 4096 characters. It is defined in the file
627 eata= [HW,SCSI] 634 eata= [HW,SCSI]
628 635
629 edd= [EDD] 636 edd= [EDD]
630 Format: {"of[f]" | "sk[ipmbr]"} 637 Format: {"off" | "on" | "skip[mbr]"}
631 See comment in arch/i386/boot/edd.S
632 638
633 eisa_irq_edge= [PARISC,HW] 639 eisa_irq_edge= [PARISC,HW]
634 See header of drivers/parisc/eisa.c. 640 See header of drivers/parisc/eisa.c.
@@ -1389,6 +1395,13 @@ and is between 256 and 4096 characters. It is defined in the file
1389 1395
1390 nr_uarts= [SERIAL] maximum number of UARTs to be registered. 1396 nr_uarts= [SERIAL] maximum number of UARTs to be registered.
1391 1397
1398 olpc_ec_timeout= [OLPC] ms delay when issuing EC commands
1399 Rather than timing out after 20 ms if an EC
1400 command is not properly ACKed, override the length
1401 of the timeout. We have interrupts disabled while
1402 waiting for the ACK, so if this is set too high
1403 interrupts *may* be lost!
1404
1392 opl3= [HW,OSS] 1405 opl3= [HW,OSS]
1393 Format: <io> 1406 Format: <io>
1394 1407
diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt
index 266955d23ee6..09b55e461740 100644
--- a/Documentation/keys-request-key.txt
+++ b/Documentation/keys-request-key.txt
@@ -11,26 +11,29 @@ request_key*():
11 11
12 struct key *request_key(const struct key_type *type, 12 struct key *request_key(const struct key_type *type,
13 const char *description, 13 const char *description,
14 const char *callout_string); 14 const char *callout_info);
15 15
16or: 16or:
17 17
18 struct key *request_key_with_auxdata(const struct key_type *type, 18 struct key *request_key_with_auxdata(const struct key_type *type,
19 const char *description, 19 const char *description,
20 const char *callout_string, 20 const char *callout_info,
21 size_t callout_len,
21 void *aux); 22 void *aux);
22 23
23or: 24or:
24 25
25 struct key *request_key_async(const struct key_type *type, 26 struct key *request_key_async(const struct key_type *type,
26 const char *description, 27 const char *description,
27 const char *callout_string); 28 const char *callout_info,
29 size_t callout_len);
28 30
29or: 31or:
30 32
31 struct key *request_key_async_with_auxdata(const struct key_type *type, 33 struct key *request_key_async_with_auxdata(const struct key_type *type,
32 const char *description, 34 const char *description,
33 const char *callout_string, 35 const char *callout_info,
36 size_t callout_len,
34 void *aux); 37 void *aux);
35 38
36Or by userspace invoking the request_key system call: 39Or by userspace invoking the request_key system call:
diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index 51652d39e61c..d5c7a57d1700 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -170,7 +170,8 @@ The key service provides a number of features besides keys:
170 amount of description and payload space that can be consumed. 170 amount of description and payload space that can be consumed.
171 171
172 The user can view information on this and other statistics through procfs 172 The user can view information on this and other statistics through procfs
173 files. 173 files. The root user may also alter the quota limits through sysctl files
174 (see the section "New procfs files").
174 175
175 Process-specific and thread-specific keyrings are not counted towards a 176 Process-specific and thread-specific keyrings are not counted towards a
176 user's quota. 177 user's quota.
@@ -329,6 +330,27 @@ about the status of the key service:
329 <bytes>/<max> Key size quota 330 <bytes>/<max> Key size quota
330 331
331 332
333Four new sysctl files have been added also for the purpose of controlling the
334quota limits on keys:
335
336 (*) /proc/sys/kernel/keys/root_maxkeys
337 /proc/sys/kernel/keys/root_maxbytes
338
339 These files hold the maximum number of keys that root may have and the
340 maximum total number of bytes of data that root may have stored in those
341 keys.
342
343 (*) /proc/sys/kernel/keys/maxkeys
344 /proc/sys/kernel/keys/maxbytes
345
346 These files hold the maximum number of keys that each non-root user may
347 have and the maximum total number of bytes of data that each of those
348 users may have stored in their keys.
349
350Root may alter these by writing each new limit as a decimal number string to
351the appropriate file.
352
353
332=============================== 354===============================
333USERSPACE SYSTEM CALL INTERFACE 355USERSPACE SYSTEM CALL INTERFACE
334=============================== 356===============================
@@ -711,6 +733,27 @@ The keyctl syscall functions are:
711 The assumed authoritative key is inherited across fork and exec. 733 The assumed authoritative key is inherited across fork and exec.
712 734
713 735
736 (*) Get the LSM security context attached to a key.
737
738 long keyctl(KEYCTL_GET_SECURITY, key_serial_t key, char *buffer,
739 size_t buflen)
740
741 This function returns a string that represents the LSM security context
742 attached to a key in the buffer provided.
743
744 Unless there's an error, it always returns the amount of data it could
745 produce, even if that's too big for the buffer, but it won't copy more
746 than requested to userspace. If the buffer pointer is NULL then no copy
747 will take place.
748
749 A NUL character is included at the end of the string if the buffer is
750 sufficiently big. This is included in the returned count. If no LSM is
751 in force then an empty string will be returned.
752
753 A process must have view permission on the key for this function to be
754 successful.
755
756
714=============== 757===============
715KERNEL SERVICES 758KERNEL SERVICES
716=============== 759===============
@@ -771,7 +814,7 @@ payload contents" for more information.
771 814
772 struct key *request_key(const struct key_type *type, 815 struct key *request_key(const struct key_type *type,
773 const char *description, 816 const char *description,
774 const char *callout_string); 817 const char *callout_info);
775 818
776 This is used to request a key or keyring with a description that matches 819 This is used to request a key or keyring with a description that matches
777 the description specified according to the key type's match function. This 820 the description specified according to the key type's match function. This
@@ -793,24 +836,28 @@ payload contents" for more information.
793 836
794 struct key *request_key_with_auxdata(const struct key_type *type, 837 struct key *request_key_with_auxdata(const struct key_type *type,
795 const char *description, 838 const char *description,
796 const char *callout_string, 839 const void *callout_info,
840 size_t callout_len,
797 void *aux); 841 void *aux);
798 842
799 This is identical to request_key(), except that the auxiliary data is 843 This is identical to request_key(), except that the auxiliary data is
800 passed to the key_type->request_key() op if it exists. 844 passed to the key_type->request_key() op if it exists, and the callout_info
845 is a blob of length callout_len, if given (the length may be 0).
801 846
802 847
803(*) A key can be requested asynchronously by calling one of: 848(*) A key can be requested asynchronously by calling one of:
804 849
805 struct key *request_key_async(const struct key_type *type, 850 struct key *request_key_async(const struct key_type *type,
806 const char *description, 851 const char *description,
807 const char *callout_string); 852 const void *callout_info,
853 size_t callout_len);
808 854
809 or: 855 or:
810 856
811 struct key *request_key_async_with_auxdata(const struct key_type *type, 857 struct key *request_key_async_with_auxdata(const struct key_type *type,
812 const char *description, 858 const char *description,
813 const char *callout_string, 859 const char *callout_info,
860 size_t callout_len,
814 void *aux); 861 void *aux);
815 862
816 which are asynchronous equivalents of request_key() and 863 which are asynchronous equivalents of request_key() and
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index be89f393274f..6877e7187113 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -37,6 +37,11 @@ registration function such as register_kprobe() specifies where
37the probe is to be inserted and what handler is to be called when 37the probe is to be inserted and what handler is to be called when
38the probe is hit. 38the probe is hit.
39 39
40There are also register_/unregister_*probes() functions for batch
41registration/unregistration of a group of *probes. These functions
42can speed up unregistration process when you have to unregister
43a lot of probes at once.
44
40The next three subsections explain how the different types of 45The next three subsections explain how the different types of
41probes work. They explain certain things that you'll need to 46probes work. They explain certain things that you'll need to
42know in order to make the best use of Kprobes -- e.g., the 47know in order to make the best use of Kprobes -- e.g., the
@@ -190,10 +195,11 @@ code mapping.
1904. API Reference 1954. API Reference
191 196
192The Kprobes API includes a "register" function and an "unregister" 197The Kprobes API includes a "register" function and an "unregister"
193function for each type of probe. Here are terse, mini-man-page 198function for each type of probe. The API also includes "register_*probes"
194specifications for these functions and the associated probe handlers 199and "unregister_*probes" functions for (un)registering arrays of probes.
195that you'll write. See the files in the samples/kprobes/ sub-directory 200Here are terse, mini-man-page specifications for these functions and
196for examples. 201the associated probe handlers that you'll write. See the files in the
202samples/kprobes/ sub-directory for examples.
197 203
1984.1 register_kprobe 2044.1 register_kprobe
199 205
@@ -319,6 +325,43 @@ void unregister_kretprobe(struct kretprobe *rp);
319Removes the specified probe. The unregister function can be called 325Removes the specified probe. The unregister function can be called
320at any time after the probe has been registered. 326at any time after the probe has been registered.
321 327
328NOTE:
329If the functions find an incorrect probe (ex. an unregistered probe),
330they clear the addr field of the probe.
331
3324.5 register_*probes
333
334#include <linux/kprobes.h>
335int register_kprobes(struct kprobe **kps, int num);
336int register_kretprobes(struct kretprobe **rps, int num);
337int register_jprobes(struct jprobe **jps, int num);
338
339Registers each of the num probes in the specified array. If any
340error occurs during registration, all probes in the array, up to
341the bad probe, are safely unregistered before the register_*probes
342function returns.
343- kps/rps/jps: an array of pointers to *probe data structures
344- num: the number of the array entries.
345
346NOTE:
347You have to allocate(or define) an array of pointers and set all
348of the array entries before using these functions.
349
3504.6 unregister_*probes
351
352#include <linux/kprobes.h>
353void unregister_kprobes(struct kprobe **kps, int num);
354void unregister_kretprobes(struct kretprobe **rps, int num);
355void unregister_jprobes(struct jprobe **jps, int num);
356
357Removes each of the num probes in the specified array at once.
358
359NOTE:
360If the functions find some incorrect probes (ex. unregistered
361probes) in the specified array, they clear the addr field of those
362incorrect probes. However, other probes in the array are
363unregistered correctly.
364
3225. Kprobes Features and Limitations 3655. Kprobes Features and Limitations
323 366
324Kprobes allows multiple probes at the same address. Currently, 367Kprobes allows multiple probes at the same address. Currently,
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index 76cb428435da..01c6c3d8a7e3 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -1,7 +1,7 @@
1 ThinkPad ACPI Extras Driver 1 ThinkPad ACPI Extras Driver
2 2
3 Version 0.19 3 Version 0.20
4 January 06th, 2008 4 April 09th, 2008
5 5
6 Borislav Deianov <borislav@users.sf.net> 6 Borislav Deianov <borislav@users.sf.net>
7 Henrique de Moraes Holschuh <hmh@hmh.eng.br> 7 Henrique de Moraes Holschuh <hmh@hmh.eng.br>
@@ -18,6 +18,11 @@ This driver used to be named ibm-acpi until kernel 2.6.21 and release
18moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel 18moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel
192.6.22, and release 0.14. 192.6.22, and release 0.14.
20 20
21The driver is named "thinkpad-acpi". In some places, like module
22names, "thinkpad_acpi" is used because of userspace issues.
23
24"tpacpi" is used as a shorthand where "thinkpad-acpi" would be too
25long due to length limitations on some Linux kernel versions.
21 26
22Status 27Status
23------ 28------
@@ -571,6 +576,47 @@ netlink interface and the input layer interface, and don't bother at all
571with hotkey_report_mode. 576with hotkey_report_mode.
572 577
573 578
579Brightness hotkey notes:
580
581These are the current sane choices for brightness key mapping in
582thinkpad-acpi:
583
584For IBM and Lenovo models *without* ACPI backlight control (the ones on
585which thinkpad-acpi will autoload its backlight interface by default,
586and on which ACPI video does not export a backlight interface):
587
5881. Don't enable or map the brightness hotkeys in thinkpad-acpi, as
589 these older firmware versions unfortunately won't respect the hotkey
590 mask for brightness keys anyway, and always reacts to them. This
591 usually work fine, unless X.org drivers are doing something to block
592 the BIOS. In that case, use (3) below. This is the default mode of
593 operation.
594
5952. Enable the hotkeys, but map them to something else that is NOT
596 KEY_BRIGHTNESS_UP/DOWN or any other keycode that would cause
597 userspace to try to change the backlight level, and use that as an
598 on-screen-display hint.
599
6003. IF AND ONLY IF X.org drivers find a way to block the firmware from
601 automatically changing the brightness, enable the hotkeys and map
602 them to KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN, and feed that to
603 something that calls xbacklight. thinkpad-acpi will not be able to
604 change brightness in that case either, so you should disable its
605 backlight interface.
606
607For Lenovo models *with* ACPI backlight control:
608
6091. Load up ACPI video and use that. ACPI video will report ACPI
610 events for brightness change keys. Do not mess with thinkpad-acpi
611 defaults in this case. thinkpad-acpi should not have anything to do
612 with backlight events in a scenario where ACPI video is loaded:
613 brightness hotkeys must be disabled, and the backlight interface is
614 to be kept disabled as well. This is the default mode of operation.
615
6162. Do *NOT* load up ACPI video, enable the hotkeys in thinkpad-acpi,
617 and map them to KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN. Process
618 these keys on userspace somehow (e.g. by calling xbacklight).
619
574Bluetooth 620Bluetooth
575--------- 621---------
576 622
@@ -647,16 +693,31 @@ while others are still having problems. For more information:
647 693
648https://bugs.freedesktop.org/show_bug.cgi?id=2000 694https://bugs.freedesktop.org/show_bug.cgi?id=2000
649 695
650ThinkLight control -- /proc/acpi/ibm/light 696ThinkLight control
651------------------------------------------ 697------------------
698
699procfs: /proc/acpi/ibm/light
700sysfs attributes: as per LED class, for the "tpacpi::thinklight" LED
652 701
653The current status of the ThinkLight can be found in this file. A few 702procfs notes:
654models which do not make the status available will show it as 703
655"unknown". The available commands are: 704The ThinkLight status can be read and set through the procfs interface. A
705few models which do not make the status available will show the ThinkLight
706status as "unknown". The available commands are:
656 707
657 echo on > /proc/acpi/ibm/light 708 echo on > /proc/acpi/ibm/light
658 echo off > /proc/acpi/ibm/light 709 echo off > /proc/acpi/ibm/light
659 710
711sysfs notes:
712
713The ThinkLight sysfs interface is documented by the LED class
714documentation, in Documentation/leds-class.txt. The ThinkLight LED name
715is "tpacpi::thinklight".
716
717Due to limitations in the sysfs LED class, if the status of the thinklight
718cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
719It is impossible to know if the status returned through sysfs is valid.
720
660Docking / undocking -- /proc/acpi/ibm/dock 721Docking / undocking -- /proc/acpi/ibm/dock
661------------------------------------------ 722------------------------------------------
662 723
@@ -815,28 +876,63 @@ The cmos command interface is prone to firmware split-brain problems, as
815in newer ThinkPads it is just a compatibility layer. Do not use it, it is 876in newer ThinkPads it is just a compatibility layer. Do not use it, it is
816exported just as a debug tool. 877exported just as a debug tool.
817 878
818LED control -- /proc/acpi/ibm/led 879LED control
819--------------------------------- 880-----------
881
882procfs: /proc/acpi/ibm/led
883sysfs attributes: as per LED class, see below for names
884
885Some of the LED indicators can be controlled through this feature. On
886some older ThinkPad models, it is possible to query the status of the
887LED indicators as well. Newer ThinkPads cannot query the real status
888of the LED indicators.
820 889
821Some of the LED indicators can be controlled through this feature. The 890procfs notes:
822available commands are: 891
892The available commands are:
823 893
824 echo '<led number> on' >/proc/acpi/ibm/led 894 echo '<LED number> on' >/proc/acpi/ibm/led
825 echo '<led number> off' >/proc/acpi/ibm/led 895 echo '<LED number> off' >/proc/acpi/ibm/led
826 echo '<led number> blink' >/proc/acpi/ibm/led 896 echo '<LED number> blink' >/proc/acpi/ibm/led
827 897
828The <led number> range is 0 to 7. The set of LEDs that can be 898The <LED number> range is 0 to 7. The set of LEDs that can be
829controlled varies from model to model. Here is the mapping on the X40: 899controlled varies from model to model. Here is the common ThinkPad
900mapping:
830 901
831 0 - power 902 0 - power
832 1 - battery (orange) 903 1 - battery (orange)
833 2 - battery (green) 904 2 - battery (green)
834 3 - UltraBase 905 3 - UltraBase/dock
835 4 - UltraBay 906 4 - UltraBay
907 5 - UltraBase battery slot
908 6 - (unknown)
836 7 - standby 909 7 - standby
837 910
838All of the above can be turned on and off and can be made to blink. 911All of the above can be turned on and off and can be made to blink.
839 912
913sysfs notes:
914
915The ThinkPad LED sysfs interface is described in detail by the LED class
916documentation, in Documentation/leds-class.txt.
917
918The leds are named (in LED ID order, from 0 to 7):
919"tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt",
920"tpacpi::dock_active", "tpacpi::bay_active", "tpacpi::dock_batt",
921"tpacpi::unknown_led", "tpacpi::standby".
922
923Due to limitations in the sysfs LED class, if the status of the LED
924indicators cannot be read due to an error, thinkpad-acpi will report it as
925a brightness of zero (same as LED off).
926
927If the thinkpad firmware doesn't support reading the current status,
928trying to read the current LED brightness will just return whatever
929brightness was last written to that attribute.
930
931These LEDs can blink using hardware acceleration. To request that a
932ThinkPad indicator LED should blink in hardware accelerated mode, use the
933"timer" trigger, and leave the delay_on and delay_off parameters set to
934zero (to request hardware acceleration autodetection).
935
840ACPI sounds -- /proc/acpi/ibm/beep 936ACPI sounds -- /proc/acpi/ibm/beep
841---------------------------------- 937----------------------------------
842 938
@@ -1090,6 +1186,15 @@ it there will be the following attributes:
1090 dim the display. 1186 dim the display.
1091 1187
1092 1188
1189WARNING:
1190
1191 Whatever you do, do NOT ever call thinkpad-acpi backlight-level change
1192 interface and the ACPI-based backlight level change interface
1193 (available on newer BIOSes, and driven by the Linux ACPI video driver)
1194 at the same time. The two will interact in bad ways, do funny things,
1195 and maybe reduce the life of the backlight lamps by needlessly kicking
1196 its level up and down at every change.
1197
1093Volume control -- /proc/acpi/ibm/volume 1198Volume control -- /proc/acpi/ibm/volume
1094--------------------------------------- 1199---------------------------------------
1095 1200
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 4c1fc65a8b3d..3be8ab2a886a 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -131,6 +131,9 @@ struct device
131 /* Any queues attached to this device */ 131 /* Any queues attached to this device */
132 struct virtqueue *vq; 132 struct virtqueue *vq;
133 133
134 /* Handle status being finalized (ie. feature bits stable). */
135 void (*ready)(struct device *me);
136
134 /* Device-specific data. */ 137 /* Device-specific data. */
135 void *priv; 138 void *priv;
136}; 139};
@@ -925,24 +928,40 @@ static void enable_fd(int fd, struct virtqueue *vq)
925 write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); 928 write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
926} 929}
927 930
928/* When the Guest asks us to reset a device, it's is fairly easy. */ 931/* When the Guest tells us they updated the status field, we handle it. */
929static void reset_device(struct device *dev) 932static void update_device_status(struct device *dev)
930{ 933{
931 struct virtqueue *vq; 934 struct virtqueue *vq;
932 935
933 verbose("Resetting device %s\n", dev->name); 936 /* This is a reset. */
934 /* Clear the status. */ 937 if (dev->desc->status == 0) {
935 dev->desc->status = 0; 938 verbose("Resetting device %s\n", dev->name);
936 939
937 /* Clear any features they've acked. */ 940 /* Clear any features they've acked. */
938 memset(get_feature_bits(dev) + dev->desc->feature_len, 0, 941 memset(get_feature_bits(dev) + dev->desc->feature_len, 0,
939 dev->desc->feature_len); 942 dev->desc->feature_len);
940 943
941 /* Zero out the virtqueues. */ 944 /* Zero out the virtqueues. */
942 for (vq = dev->vq; vq; vq = vq->next) { 945 for (vq = dev->vq; vq; vq = vq->next) {
943 memset(vq->vring.desc, 0, 946 memset(vq->vring.desc, 0,
944 vring_size(vq->config.num, getpagesize())); 947 vring_size(vq->config.num, getpagesize()));
945 vq->last_avail_idx = 0; 948 vq->last_avail_idx = 0;
949 }
950 } else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
951 warnx("Device %s configuration FAILED", dev->name);
952 } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
953 unsigned int i;
954
955 verbose("Device %s OK: offered", dev->name);
956 for (i = 0; i < dev->desc->feature_len; i++)
957 verbose(" %08x", get_feature_bits(dev)[i]);
958 verbose(", accepted");
959 for (i = 0; i < dev->desc->feature_len; i++)
960 verbose(" %08x", get_feature_bits(dev)
961 [dev->desc->feature_len+i]);
962
963 if (dev->ready)
964 dev->ready(dev);
946 } 965 }
947} 966}
948 967
@@ -954,9 +973,9 @@ static void handle_output(int fd, unsigned long addr)
954 973
955 /* Check each device and virtqueue. */ 974 /* Check each device and virtqueue. */
956 for (i = devices.dev; i; i = i->next) { 975 for (i = devices.dev; i; i = i->next) {
957 /* Notifications to device descriptors reset the device. */ 976 /* Notifications to device descriptors update device status. */
958 if (from_guest_phys(addr) == i->desc) { 977 if (from_guest_phys(addr) == i->desc) {
959 reset_device(i); 978 update_device_status(i);
960 return; 979 return;
961 } 980 }
962 981
@@ -1170,6 +1189,7 @@ static struct device *new_device(const char *name, u16 type, int fd,
1170 dev->handle_input = handle_input; 1189 dev->handle_input = handle_input;
1171 dev->name = name; 1190 dev->name = name;
1172 dev->vq = NULL; 1191 dev->vq = NULL;
1192 dev->ready = NULL;
1173 1193
1174 /* Append to device list. Prepending to a single-linked list is 1194 /* Append to device list. Prepending to a single-linked list is
1175 * easier, but the user expects the devices to be arranged on the bus 1195 * easier, but the user expects the devices to be arranged on the bus
@@ -1398,7 +1418,7 @@ static bool service_io(struct device *dev)
1398 struct vblk_info *vblk = dev->priv; 1418 struct vblk_info *vblk = dev->priv;
1399 unsigned int head, out_num, in_num, wlen; 1419 unsigned int head, out_num, in_num, wlen;
1400 int ret; 1420 int ret;
1401 struct virtio_blk_inhdr *in; 1421 u8 *in;
1402 struct virtio_blk_outhdr *out; 1422 struct virtio_blk_outhdr *out;
1403 struct iovec iov[dev->vq->vring.num]; 1423 struct iovec iov[dev->vq->vring.num];
1404 off64_t off; 1424 off64_t off;
@@ -1416,7 +1436,7 @@ static bool service_io(struct device *dev)
1416 head, out_num, in_num); 1436 head, out_num, in_num);
1417 1437
1418 out = convert(&iov[0], struct virtio_blk_outhdr); 1438 out = convert(&iov[0], struct virtio_blk_outhdr);
1419 in = convert(&iov[out_num+in_num-1], struct virtio_blk_inhdr); 1439 in = convert(&iov[out_num+in_num-1], u8);
1420 off = out->sector * 512; 1440 off = out->sector * 512;
1421 1441
1422 /* The block device implements "barriers", where the Guest indicates 1442 /* The block device implements "barriers", where the Guest indicates
@@ -1430,7 +1450,7 @@ static bool service_io(struct device *dev)
1430 * It'd be nice if we supported eject, for example, but we don't. */ 1450 * It'd be nice if we supported eject, for example, but we don't. */
1431 if (out->type & VIRTIO_BLK_T_SCSI_CMD) { 1451 if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
1432 fprintf(stderr, "Scsi commands unsupported\n"); 1452 fprintf(stderr, "Scsi commands unsupported\n");
1433 in->status = VIRTIO_BLK_S_UNSUPP; 1453 *in = VIRTIO_BLK_S_UNSUPP;
1434 wlen = sizeof(*in); 1454 wlen = sizeof(*in);
1435 } else if (out->type & VIRTIO_BLK_T_OUT) { 1455 } else if (out->type & VIRTIO_BLK_T_OUT) {
1436 /* Write */ 1456 /* Write */
@@ -1453,7 +1473,7 @@ static bool service_io(struct device *dev)
1453 errx(1, "Write past end %llu+%u", off, ret); 1473 errx(1, "Write past end %llu+%u", off, ret);
1454 } 1474 }
1455 wlen = sizeof(*in); 1475 wlen = sizeof(*in);
1456 in->status = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); 1476 *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
1457 } else { 1477 } else {
1458 /* Read */ 1478 /* Read */
1459 1479
@@ -1466,10 +1486,10 @@ static bool service_io(struct device *dev)
1466 verbose("READ from sector %llu: %i\n", out->sector, ret); 1486 verbose("READ from sector %llu: %i\n", out->sector, ret);
1467 if (ret >= 0) { 1487 if (ret >= 0) {
1468 wlen = sizeof(*in) + ret; 1488 wlen = sizeof(*in) + ret;
1469 in->status = VIRTIO_BLK_S_OK; 1489 *in = VIRTIO_BLK_S_OK;
1470 } else { 1490 } else {
1471 wlen = sizeof(*in); 1491 wlen = sizeof(*in);
1472 in->status = VIRTIO_BLK_S_IOERR; 1492 *in = VIRTIO_BLK_S_IOERR;
1473 } 1493 }
1474 } 1494 }
1475 1495
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 396cdd982c26..a8b430627473 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -450,3 +450,9 @@ These currently include
450 there are upper and lower limits (32768, 16). Default is 128. 450 there are upper and lower limits (32768, 16). Default is 128.
451 strip_cache_active (currently raid5 only) 451 strip_cache_active (currently raid5 only)
452 number of active entries in the stripe cache 452 number of active entries in the stripe cache
453 preread_bypass_threshold (currently raid5 only)
454 number of times a stripe requiring preread will be bypassed by
455 a stripe that does not require preread. For fairness defaults
456 to 1. Setting this to 0 disables bypass accounting and
457 requires preread stripes to wait until all full-width stripe-
458 writes are complete. Valid values are 0 to stripe_cache_size.
diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt
index 7f60dfe642ca..b152e81da592 100644
--- a/Documentation/oops-tracing.txt
+++ b/Documentation/oops-tracing.txt
@@ -253,6 +253,10 @@ characters, each representing a particular tainted value.
253 253
254 8: 'D' if the kernel has died recently, i.e. there was an OOPS or BUG. 254 8: 'D' if the kernel has died recently, i.e. there was an OOPS or BUG.
255 255
256 9: 'A' if the ACPI table has been overridden.
257
258 10: 'W' if a warning has previously been issued by the kernel.
259
256The primary reason for the 'Tainted: ' string is to tell kernel 260The primary reason for the 'Tainted: ' string is to tell kernel
257debuggers if this is a clean kernel or if anything unusual has 261debuggers if this is a clean kernel or if anything unusual has
258occurred. Tainting is permanent: even if an offending module is 262occurred. Tainting is permanent: even if an offending module is
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index cf89e8cfd5bf..1d2a772506cf 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -2836,6 +2836,39 @@ platforms are moved over to use the flattened-device-tree model.
2836 big-endian; 2836 big-endian;
2837 }; 2837 };
2838 2838
2839 r) Freescale Display Interface Unit
2840
2841 The Freescale DIU is a LCD controller, with proper hardware, it can also
2842 drive DVI monitors.
2843
2844 Required properties:
2845 - compatible : should be "fsl-diu".
2846 - reg : should contain at least address and length of the DIU register
2847 set.
2848 - Interrupts : one DIU interrupt should be describe here.
2849
2850 Example (MPC8610HPCD)
2851 display@2c000 {
2852 compatible = "fsl,diu";
2853 reg = <0x2c000 100>;
2854 interrupts = <72 2>;
2855 interrupt-parent = <&mpic>;
2856 };
2857
2858 s) Freescale on board FPGA
2859
2860 This is the memory-mapped registers for on board FPGA.
2861
2862 Required properities:
2863 - compatible : should be "fsl,fpga-pixis".
2864 - reg : should contain the address and the lenght of the FPPGA register
2865 set.
2866
2867 Example (MPC8610HPCD)
2868 board-control@e8000000 {
2869 compatible = "fsl,fpga-pixis";
2870 reg = <0xe8000000 32>;
2871 };
2839 2872
2840VII - Marvell Discovery mv64[345]6x System Controller chips 2873VII - Marvell Discovery mv64[345]6x System Controller chips
2841=========================================================== 2874===========================================================
diff --git a/Documentation/powerpc/mpc52xx-device-tree-bindings.txt b/Documentation/powerpc/mpc52xx-device-tree-bindings.txt
index 5e03610e186f..cda7a7dffa6d 100644
--- a/Documentation/powerpc/mpc52xx-device-tree-bindings.txt
+++ b/Documentation/powerpc/mpc52xx-device-tree-bindings.txt
@@ -186,6 +186,12 @@ Recommended soc5200 child nodes; populate as needed for your board
186name device_type compatible Description 186name device_type compatible Description
187---- ----------- ---------- ----------- 187---- ----------- ---------- -----------
188gpt@<addr> gpt fsl,mpc5200-gpt General purpose timers 188gpt@<addr> gpt fsl,mpc5200-gpt General purpose timers
189gpt@<addr> gpt fsl,mpc5200-gpt-gpio General purpose
190 timers in GPIO mode
191gpio@<addr> fsl,mpc5200-gpio MPC5200 simple gpio
192 controller
193gpio@<addr> fsl,mpc5200-gpio-wkup MPC5200 wakeup gpio
194 controller
189rtc@<addr> rtc mpc5200-rtc Real time clock 195rtc@<addr> rtc mpc5200-rtc Real time clock
190mscan@<addr> mscan mpc5200-mscan CAN bus controller 196mscan@<addr> mscan mpc5200-mscan CAN bus controller
191pci@<addr> pci mpc5200-pci PCI bridge 197pci@<addr> pci mpc5200-pci PCI bridge
@@ -225,6 +231,12 @@ PSC in i2s mode: The mpc5200 and mpc5200b PSCs are not compatible when in
225i2s mode. An 'mpc5200b-psc-i2s' node cannot include 'mpc5200-psc-i2s' in the 231i2s mode. An 'mpc5200b-psc-i2s' node cannot include 'mpc5200-psc-i2s' in the
226compatible field. 232compatible field.
227 233
2347) GPIO controller nodes
235Each GPIO controller node should have the empty property gpio-controller and
236#gpio-cells set to 2. First cell is the GPIO number which is interpreted
237according to the bit numbers in the GPIO control registers. The second cell
238is for flags which is currently unsused.
239
228IV - Extra Notes 240IV - Extra Notes
229================ 241================
230 242
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index fd4c32a031c9..0bbee38acd26 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -795,6 +795,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
795 lg-lw LG LW20/LW25 laptop 795 lg-lw LG LW20/LW25 laptop
796 tcl TCL S700 796 tcl TCL S700
797 clevo Clevo laptops (m520G, m665n) 797 clevo Clevo laptops (m520G, m665n)
798 medion Medion Rim 2150
798 test for testing/debugging purpose, almost all controls can be 799 test for testing/debugging purpose, almost all controls can be
799 adjusted. Appearing only when compiled with 800 adjusted. Appearing only when compiled with
800 $CONFIG_SND_DEBUG=y 801 $CONFIG_SND_DEBUG=y
diff --git a/Documentation/spi/spidev b/Documentation/spi/spidev
index 5c8e1b988a08..ed2da5e5b28a 100644
--- a/Documentation/spi/spidev
+++ b/Documentation/spi/spidev
@@ -126,8 +126,8 @@ NOTES:
126FULL DUPLEX CHARACTER DEVICE API 126FULL DUPLEX CHARACTER DEVICE API
127================================ 127================================
128 128
129See the sample program below for one example showing the use of the full 129See the spidev_fdx.c sample program for one example showing the use of the
130duplex programming interface. (Although it doesn't perform a full duplex 130full duplex programming interface. (Although it doesn't perform a full duplex
131transfer.) The model is the same as that used in the kernel spi_sync() 131transfer.) The model is the same as that used in the kernel spi_sync()
132request; the individual transfers offer the same capabilities as are 132request; the individual transfers offer the same capabilities as are
133available to kernel drivers (except that it's not asynchronous). 133available to kernel drivers (except that it's not asynchronous).
@@ -141,167 +141,3 @@ and bitrate for each transfer segment.)
141 141
142To make a full duplex request, provide both rx_buf and tx_buf for the 142To make a full duplex request, provide both rx_buf and tx_buf for the
143same transfer. It's even OK if those are the same buffer. 143same transfer. It's even OK if those are the same buffer.
144
145
146SAMPLE PROGRAM
147==============
148
149-------------------------------- CUT HERE
150#include <stdio.h>
151#include <unistd.h>
152#include <stdlib.h>
153#include <fcntl.h>
154#include <string.h>
155
156#include <sys/ioctl.h>
157#include <sys/types.h>
158#include <sys/stat.h>
159
160#include <linux/types.h>
161#include <linux/spi/spidev.h>
162
163
164static int verbose;
165
166static void do_read(int fd, int len)
167{
168 unsigned char buf[32], *bp;
169 int status;
170
171 /* read at least 2 bytes, no more than 32 */
172 if (len < 2)
173 len = 2;
174 else if (len > sizeof(buf))
175 len = sizeof(buf);
176 memset(buf, 0, sizeof buf);
177
178 status = read(fd, buf, len);
179 if (status < 0) {
180 perror("read");
181 return;
182 }
183 if (status != len) {
184 fprintf(stderr, "short read\n");
185 return;
186 }
187
188 printf("read(%2d, %2d): %02x %02x,", len, status,
189 buf[0], buf[1]);
190 status -= 2;
191 bp = buf + 2;
192 while (status-- > 0)
193 printf(" %02x", *bp++);
194 printf("\n");
195}
196
197static void do_msg(int fd, int len)
198{
199 struct spi_ioc_transfer xfer[2];
200 unsigned char buf[32], *bp;
201 int status;
202
203 memset(xfer, 0, sizeof xfer);
204 memset(buf, 0, sizeof buf);
205
206 if (len > sizeof buf)
207 len = sizeof buf;
208
209 buf[0] = 0xaa;
210 xfer[0].tx_buf = (__u64) buf;
211 xfer[0].len = 1;
212
213 xfer[1].rx_buf = (__u64) buf;
214 xfer[1].len = len;
215
216 status = ioctl(fd, SPI_IOC_MESSAGE(2), xfer);
217 if (status < 0) {
218 perror("SPI_IOC_MESSAGE");
219 return;
220 }
221
222 printf("response(%2d, %2d): ", len, status);
223 for (bp = buf; len; len--)
224 printf(" %02x", *bp++);
225 printf("\n");
226}
227
228static void dumpstat(const char *name, int fd)
229{
230 __u8 mode, lsb, bits;
231 __u32 speed;
232
233 if (ioctl(fd, SPI_IOC_RD_MODE, &mode) < 0) {
234 perror("SPI rd_mode");
235 return;
236 }
237 if (ioctl(fd, SPI_IOC_RD_LSB_FIRST, &lsb) < 0) {
238 perror("SPI rd_lsb_fist");
239 return;
240 }
241 if (ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits) < 0) {
242 perror("SPI bits_per_word");
243 return;
244 }
245 if (ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed) < 0) {
246 perror("SPI max_speed_hz");
247 return;
248 }
249
250 printf("%s: spi mode %d, %d bits %sper word, %d Hz max\n",
251 name, mode, bits, lsb ? "(lsb first) " : "", speed);
252}
253
254int main(int argc, char **argv)
255{
256 int c;
257 int readcount = 0;
258 int msglen = 0;
259 int fd;
260 const char *name;
261
262 while ((c = getopt(argc, argv, "hm:r:v")) != EOF) {
263 switch (c) {
264 case 'm':
265 msglen = atoi(optarg);
266 if (msglen < 0)
267 goto usage;
268 continue;
269 case 'r':
270 readcount = atoi(optarg);
271 if (readcount < 0)
272 goto usage;
273 continue;
274 case 'v':
275 verbose++;
276 continue;
277 case 'h':
278 case '?':
279usage:
280 fprintf(stderr,
281 "usage: %s [-h] [-m N] [-r N] /dev/spidevB.D\n",
282 argv[0]);
283 return 1;
284 }
285 }
286
287 if ((optind + 1) != argc)
288 goto usage;
289 name = argv[optind];
290
291 fd = open(name, O_RDWR);
292 if (fd < 0) {
293 perror("open");
294 return 1;
295 }
296
297 dumpstat(name, fd);
298
299 if (msglen)
300 do_msg(fd, msglen);
301
302 if (readcount)
303 do_read(fd, readcount);
304
305 close(fd);
306 return 0;
307}
diff --git a/Documentation/spi/spidev_fdx.c b/Documentation/spi/spidev_fdx.c
new file mode 100644
index 000000000000..fc354f760384
--- /dev/null
+++ b/Documentation/spi/spidev_fdx.c
@@ -0,0 +1,158 @@
1#include <stdio.h>
2#include <unistd.h>
3#include <stdlib.h>
4#include <fcntl.h>
5#include <string.h>
6
7#include <sys/ioctl.h>
8#include <sys/types.h>
9#include <sys/stat.h>
10
11#include <linux/types.h>
12#include <linux/spi/spidev.h>
13
14
15static int verbose;
16
17static void do_read(int fd, int len)
18{
19 unsigned char buf[32], *bp;
20 int status;
21
22 /* read at least 2 bytes, no more than 32 */
23 if (len < 2)
24 len = 2;
25 else if (len > sizeof(buf))
26 len = sizeof(buf);
27 memset(buf, 0, sizeof buf);
28
29 status = read(fd, buf, len);
30 if (status < 0) {
31 perror("read");
32 return;
33 }
34 if (status != len) {
35 fprintf(stderr, "short read\n");
36 return;
37 }
38
39 printf("read(%2d, %2d): %02x %02x,", len, status,
40 buf[0], buf[1]);
41 status -= 2;
42 bp = buf + 2;
43 while (status-- > 0)
44 printf(" %02x", *bp++);
45 printf("\n");
46}
47
48static void do_msg(int fd, int len)
49{
50 struct spi_ioc_transfer xfer[2];
51 unsigned char buf[32], *bp;
52 int status;
53
54 memset(xfer, 0, sizeof xfer);
55 memset(buf, 0, sizeof buf);
56
57 if (len > sizeof buf)
58 len = sizeof buf;
59
60 buf[0] = 0xaa;
61 xfer[0].tx_buf = (__u64) buf;
62 xfer[0].len = 1;
63
64 xfer[1].rx_buf = (__u64) buf;
65 xfer[1].len = len;
66
67 status = ioctl(fd, SPI_IOC_MESSAGE(2), xfer);
68 if (status < 0) {
69 perror("SPI_IOC_MESSAGE");
70 return;
71 }
72
73 printf("response(%2d, %2d): ", len, status);
74 for (bp = buf; len; len--)
75 printf(" %02x", *bp++);
76 printf("\n");
77}
78
79static void dumpstat(const char *name, int fd)
80{
81 __u8 mode, lsb, bits;
82 __u32 speed;
83
84 if (ioctl(fd, SPI_IOC_RD_MODE, &mode) < 0) {
85 perror("SPI rd_mode");
86 return;
87 }
88 if (ioctl(fd, SPI_IOC_RD_LSB_FIRST, &lsb) < 0) {
89 perror("SPI rd_lsb_fist");
90 return;
91 }
92 if (ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits) < 0) {
93 perror("SPI bits_per_word");
94 return;
95 }
96 if (ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed) < 0) {
97 perror("SPI max_speed_hz");
98 return;
99 }
100
101 printf("%s: spi mode %d, %d bits %sper word, %d Hz max\n",
102 name, mode, bits, lsb ? "(lsb first) " : "", speed);
103}
104
105int main(int argc, char **argv)
106{
107 int c;
108 int readcount = 0;
109 int msglen = 0;
110 int fd;
111 const char *name;
112
113 while ((c = getopt(argc, argv, "hm:r:v")) != EOF) {
114 switch (c) {
115 case 'm':
116 msglen = atoi(optarg);
117 if (msglen < 0)
118 goto usage;
119 continue;
120 case 'r':
121 readcount = atoi(optarg);
122 if (readcount < 0)
123 goto usage;
124 continue;
125 case 'v':
126 verbose++;
127 continue;
128 case 'h':
129 case '?':
130usage:
131 fprintf(stderr,
132 "usage: %s [-h] [-m N] [-r N] /dev/spidevB.D\n",
133 argv[0]);
134 return 1;
135 }
136 }
137
138 if ((optind + 1) != argc)
139 goto usage;
140 name = argv[optind];
141
142 fd = open(name, O_RDWR);
143 if (fd < 0) {
144 perror("open");
145 return 1;
146 }
147
148 dumpstat(name, fd);
149
150 if (msglen)
151 do_msg(fd, msglen);
152
153 if (readcount)
154 do_read(fd, readcount);
155
156 close(fd);
157 return 0;
158}
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 10c8f6922ef4..5ce0952aa065 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -85,6 +85,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
85'k' - Secure Access Key (SAK) Kills all programs on the current virtual 85'k' - Secure Access Key (SAK) Kills all programs on the current virtual
86 console. NOTE: See important comments below in SAK section. 86 console. NOTE: See important comments below in SAK section.
87 87
88'l' - Shows a stack backtrace for all active CPUs.
89
88'm' - Will dump current memory info to your console. 90'm' - Will dump current memory info to your console.
89 91
90'n' - Used to make RT tasks nice-able 92'n' - Used to make RT tasks nice-able
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index d9f28be75403..70d68ce8640a 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -108,10 +108,12 @@ and throttle appropriate devices.
108RO read only value 108RO read only value
109RW read/write value 109RW read/write value
110 110
111All thermal sysfs attributes will be represented under /sys/class/thermal 111Thermal sysfs attributes will be represented under /sys/class/thermal.
112Hwmon sysfs I/F extension is also available under /sys/class/hwmon
113if hwmon is compiled in or built as a module.
112 114
113Thermal zone device sys I/F, created once it's registered: 115Thermal zone device sys I/F, created once it's registered:
114|thermal_zone[0-*]: 116/sys/class/thermal/thermal_zone[0-*]:
115 |-----type: Type of the thermal zone 117 |-----type: Type of the thermal zone
116 |-----temp: Current temperature 118 |-----temp: Current temperature
117 |-----mode: Working mode of the thermal zone 119 |-----mode: Working mode of the thermal zone
@@ -119,7 +121,7 @@ Thermal zone device sys I/F, created once it's registered:
119 |-----trip_point_[0-*]_type: Trip point type 121 |-----trip_point_[0-*]_type: Trip point type
120 122
121Thermal cooling device sys I/F, created once it's registered: 123Thermal cooling device sys I/F, created once it's registered:
122|cooling_device[0-*]: 124/sys/class/thermal/cooling_device[0-*]:
123 |-----type : Type of the cooling device(processor/fan/...) 125 |-----type : Type of the cooling device(processor/fan/...)
124 |-----max_state: Maximum cooling state of the cooling device 126 |-----max_state: Maximum cooling state of the cooling device
125 |-----cur_state: Current cooling state of the cooling device 127 |-----cur_state: Current cooling state of the cooling device
@@ -130,10 +132,19 @@ They represent the relationship between a thermal zone and its associated coolin
130They are created/removed for each 132They are created/removed for each
131thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful execution. 133thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful execution.
132 134
133|thermal_zone[0-*] 135/sys/class/thermal/thermal_zone[0-*]
134 |-----cdev[0-*]: The [0-*]th cooling device in the current thermal zone 136 |-----cdev[0-*]: The [0-*]th cooling device in the current thermal zone
135 |-----cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with 137 |-----cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with
136 138
139Besides the thermal zone device sysfs I/F and cooling device sysfs I/F,
140the generic thermal driver also creates a hwmon sysfs I/F for each _type_ of
141thermal zone device. E.g. the generic thermal driver registers one hwmon class device
142and build the associated hwmon sysfs I/F for all the registered ACPI thermal zones.
143/sys/class/hwmon/hwmon[0-*]:
144 |-----name: The type of the thermal zone devices.
145 |-----temp[1-*]_input: The current temperature of thermal zone [1-*].
146 |-----temp[1-*]_critical: The critical trip point of thermal zone [1-*].
147Please read Documentation/hwmon/sysfs-interface for additional information.
137 148
138*************************** 149***************************
139* Thermal zone attributes * 150* Thermal zone attributes *
@@ -141,7 +152,10 @@ thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful e
141 152
142type Strings which represent the thermal zone type. 153type Strings which represent the thermal zone type.
143 This is given by thermal zone driver as part of registration. 154 This is given by thermal zone driver as part of registration.
144 Eg: "ACPI thermal zone" indicates it's a ACPI thermal device 155 Eg: "acpitz" indicates it's an ACPI thermal device.
156 In order to keep it consistent with hwmon sys attribute,
157 this should be a short, lowercase string,
158 not containing spaces nor dashes.
145 RO 159 RO
146 Required 160 Required
147 161
@@ -218,7 +232,7 @@ the sys I/F structure will be built like this:
218/sys/class/thermal: 232/sys/class/thermal:
219 233
220|thermal_zone1: 234|thermal_zone1:
221 |-----type: ACPI thermal zone 235 |-----type: acpitz
222 |-----temp: 37000 236 |-----temp: 37000
223 |-----mode: kernel 237 |-----mode: kernel
224 |-----trip_point_0_temp: 100000 238 |-----trip_point_0_temp: 100000
@@ -243,3 +257,10 @@ the sys I/F structure will be built like this:
243 |-----type: Fan 257 |-----type: Fan
244 |-----max_state: 2 258 |-----max_state: 2
245 |-----cur_state: 0 259 |-----cur_state: 0
260
261/sys/class/hwmon:
262
263|hwmon0:
264 |-----name: acpitz
265 |-----temp1_input: 37000
266 |-----temp1_crit: 100000
diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134
index 44d84dd15ad6..67937df1e974 100644
--- a/Documentation/video4linux/CARDLIST.saa7134
+++ b/Documentation/video4linux/CARDLIST.saa7134
@@ -128,7 +128,7 @@
128127 -> Beholder BeholdTV 507 FM/RDS / BeholdTV 509 FM [0000:5071,0000:507B,5ace:5070,5ace:5090] 128127 -> Beholder BeholdTV 507 FM/RDS / BeholdTV 509 FM [0000:5071,0000:507B,5ace:5070,5ace:5090]
129128 -> Beholder BeholdTV Columbus TVFM [0000:5201] 129128 -> Beholder BeholdTV Columbus TVFM [0000:5201]
130129 -> Beholder BeholdTV 607 / BeholdTV 609 [5ace:6070,5ace:6071,5ace:6072,5ace:6073,5ace:6090,5ace:6091,5ace:6092,5ace:6093] 130129 -> Beholder BeholdTV 607 / BeholdTV 609 [5ace:6070,5ace:6071,5ace:6072,5ace:6073,5ace:6090,5ace:6091,5ace:6092,5ace:6093]
131130 -> Beholder BeholdTV M6 / BeholdTV M6 Extra [5ace:6190,5ace:6193] 131130 -> Beholder BeholdTV M6 / BeholdTV M6 Extra [5ace:6190,5ace:6193,5ace:6191]
132131 -> Twinhan Hybrid DTV-DVB 3056 PCI [1822:0022] 132131 -> Twinhan Hybrid DTV-DVB 3056 PCI [1822:0022]
133132 -> Genius TVGO AM11MCE 133132 -> Genius TVGO AM11MCE
134133 -> NXP Snake DVB-S reference design 134133 -> NXP Snake DVB-S reference design
@@ -140,3 +140,4 @@
140139 -> Compro VideoMate T750 [185b:c900] 140139 -> Compro VideoMate T750 [185b:c900]
141140 -> Avermedia DVB-S Pro A700 [1461:a7a1] 141140 -> Avermedia DVB-S Pro A700 [1461:a7a1]
142141 -> Avermedia DVB-S Hybrid+FM A700 [1461:a7a2] 142141 -> Avermedia DVB-S Hybrid+FM A700 [1461:a7a2]
143142 -> Beholder BeholdTV H6 [5ace:6290]
diff --git a/Documentation/video4linux/cx18.txt b/Documentation/video4linux/cx18.txt
new file mode 100644
index 000000000000..077d56ec3f3d
--- /dev/null
+++ b/Documentation/video4linux/cx18.txt
@@ -0,0 +1,34 @@
1Some notes regarding the cx18 driver for the Conexant CX23418 MPEG
2encoder chip:
3
41) The only hardware currently supported is the Hauppauge HVR-1600.
5
62) Some people have problems getting the i2c bus to work. Cause unknown.
7 The symptom is that the eeprom cannot be read and the card is
8 unusable.
9
103) The audio from the analog tuner is mono only. Probably caused by
11 incorrect audio register information in the datasheet. We are
12 waiting for updated information from Conexant.
13
144) VBI (raw or sliced) has not yet been implemented.
15
165) MPEG indexing is not yet implemented.
17
186) The driver is still a bit rough around the edges, this should
19 improve over time.
20
21
22Firmware:
23
24The firmware needs to be extracted from the Windows Hauppauge HVR-1600
25driver, available here:
26
27http://hauppauge.lightpath.net/software/install_cd/hauppauge_cd_3.4d1.zip
28
29Unzip, then copy the following files to the firmware directory
30and rename them as follows:
31
32Drivers/Driver18/hcw18apu.rom -> v4l-cx23418-apu.fw
33Drivers/Driver18/hcw18enc.rom -> v4l-cx23418-cpu.fw
34Drivers/Driver18/hcw18mlC.rom -> v4l-cx23418-dig.fw
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt
index dd4986497996..bad16d3f6a47 100644
--- a/Documentation/vm/numa_memory_policy.txt
+++ b/Documentation/vm/numa_memory_policy.txt
@@ -135,77 +135,58 @@ most general to most specific:
135 135
136Components of Memory Policies 136Components of Memory Policies
137 137
138 A Linux memory policy is a tuple consisting of a "mode" and an optional set 138 A Linux memory policy consists of a "mode", optional mode flags, and an
139 of nodes. The mode determine the behavior of the policy, while the 139 optional set of nodes. The mode determines the behavior of the policy,
140 optional set of nodes can be viewed as the arguments to the behavior. 140 the optional mode flags determine the behavior of the mode, and the
141 optional set of nodes can be viewed as the arguments to the policy
142 behavior.
141 143
142 Internally, memory policies are implemented by a reference counted 144 Internally, memory policies are implemented by a reference counted
143 structure, struct mempolicy. Details of this structure will be discussed 145 structure, struct mempolicy. Details of this structure will be discussed
144 in context, below, as required to explain the behavior. 146 in context, below, as required to explain the behavior.
145 147
146 Note: in some functions AND in the struct mempolicy itself, the mode
147 is called "policy". However, to avoid confusion with the policy tuple,
148 this document will continue to use the term "mode".
149
150 Linux memory policy supports the following 4 behavioral modes: 148 Linux memory policy supports the following 4 behavioral modes:
151 149
152 Default Mode--MPOL_DEFAULT: The behavior specified by this mode is 150 Default Mode--MPOL_DEFAULT: This mode is only used in the memory
153 context or scope dependent. 151 policy APIs. Internally, MPOL_DEFAULT is converted to the NULL
154 152 memory policy in all policy scopes. Any existing non-default policy
155 As mentioned in the Policy Scope section above, during normal 153 will simply be removed when MPOL_DEFAULT is specified. As a result,
156 system operation, the System Default Policy is hard coded to 154 MPOL_DEFAULT means "fall back to the next most specific policy scope."
157 contain the Default mode.
158
159 In this context, default mode means "local" allocation--that is
160 attempt to allocate the page from the node associated with the cpu
161 where the fault occurs. If the "local" node has no memory, or the
162 node's memory can be exhausted [no free pages available], local
163 allocation will "fallback to"--attempt to allocate pages from--
164 "nearby" nodes, in order of increasing "distance".
165 155
166 Implementation detail -- subject to change: "Fallback" uses 156 For example, a NULL or default task policy will fall back to the
167 a per node list of sibling nodes--called zonelists--built at 157 system default policy. A NULL or default vma policy will fall
168 boot time, or when nodes or memory are added or removed from 158 back to the task policy.
169 the system [memory hotplug]. These per node zonelist are
170 constructed with nodes in order of increasing distance based
171 on information provided by the platform firmware.
172 159
173 When a task/process policy or a shared policy contains the Default 160 When specified in one of the memory policy APIs, the Default mode
174 mode, this also means "local allocation", as described above. 161 does not use the optional set of nodes.
175 162
176 In the context of a VMA, Default mode means "fall back to task 163 It is an error for the set of nodes specified for this policy to
177 policy"--which may or may not specify Default mode. Thus, Default 164 be non-empty.
178 mode can not be counted on to mean local allocation when used
179 on a non-shared region of the address space. However, see
180 MPOL_PREFERRED below.
181
182 The Default mode does not use the optional set of nodes.
183 165
184 MPOL_BIND: This mode specifies that memory must come from the 166 MPOL_BIND: This mode specifies that memory must come from the
185 set of nodes specified by the policy. 167 set of nodes specified by the policy. Memory will be allocated from
186 168 the node in the set with sufficient free memory that is closest to
187 The memory policy APIs do not specify an order in which the nodes 169 the node where the allocation takes place.
188 will be searched. However, unlike "local allocation", the Bind
189 policy does not consider the distance between the nodes. Rather,
190 allocations will fallback to the nodes specified by the policy in
191 order of numeric node id. Like everything in Linux, this is subject
192 to change.
193 170
194 MPOL_PREFERRED: This mode specifies that the allocation should be 171 MPOL_PREFERRED: This mode specifies that the allocation should be
195 attempted from the single node specified in the policy. If that 172 attempted from the single node specified in the policy. If that
196 allocation fails, the kernel will search other nodes, exactly as 173 allocation fails, the kernel will search other nodes, in order of
197 it would for a local allocation that started at the preferred node 174 increasing distance from the preferred node based on information
198 in increasing distance from the preferred node. "Local" allocation 175 provided by the platform firmware.
199 policy can be viewed as a Preferred policy that starts at the node
200 containing the cpu where the allocation takes place. 176 containing the cpu where the allocation takes place.
201 177
202 Internally, the Preferred policy uses a single node--the 178 Internally, the Preferred policy uses a single node--the
203 preferred_node member of struct mempolicy. A "distinguished 179 preferred_node member of struct mempolicy. When the internal
204 value of this preferred_node, currently '-1', is interpreted 180 mode flag MPOL_F_LOCAL is set, the preferred_node is ignored and
205 as "the node containing the cpu where the allocation takes 181 the policy is interpreted as local allocation. "Local" allocation
206 place"--local allocation. This is the way to specify 182 policy can be viewed as a Preferred policy that starts at the node
207 local allocation for a specific range of addresses--i.e. for 183 containing the cpu where the allocation takes place.
208 VMA policies. 184
185 It is possible for the user to specify that local allocation is
186 always preferred by passing an empty nodemask with this mode.
187 If an empty nodemask is passed, the policy cannot use the
188 MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES flags described
189 below.
209 190
210 MPOL_INTERLEAVED: This mode specifies that page allocations be 191 MPOL_INTERLEAVED: This mode specifies that page allocations be
211 interleaved, on a page granularity, across the nodes specified in 192 interleaved, on a page granularity, across the nodes specified in
@@ -231,6 +212,154 @@ Components of Memory Policies
231 the temporary interleaved system default policy works in this 212 the temporary interleaved system default policy works in this
232 mode. 213 mode.
233 214
215 Linux memory policy supports the following optional mode flags:
216
217 MPOL_F_STATIC_NODES: This flag specifies that the nodemask passed by
218 the user should not be remapped if the task or VMA's set of allowed
219 nodes changes after the memory policy has been defined.
220
221 Without this flag, anytime a mempolicy is rebound because of a
222 change in the set of allowed nodes, the node (Preferred) or
223 nodemask (Bind, Interleave) is remapped to the new set of
224 allowed nodes. This may result in nodes being used that were
225 previously undesired.
226
227 With this flag, if the user-specified nodes overlap with the
228 nodes allowed by the task's cpuset, then the memory policy is
229 applied to their intersection. If the two sets of nodes do not
230 overlap, the Default policy is used.
231
232 For example, consider a task that is attached to a cpuset with
233 mems 1-3 that sets an Interleave policy over the same set. If
234 the cpuset's mems change to 3-5, the Interleave will now occur
235 over nodes 3, 4, and 5. With this flag, however, since only node
236 3 is allowed from the user's nodemask, the "interleave" only
237 occurs over that node. If no nodes from the user's nodemask are
238 now allowed, the Default behavior is used.
239
240 MPOL_F_STATIC_NODES cannot be combined with the
241 MPOL_F_RELATIVE_NODES flag. It also cannot be used for
242 MPOL_PREFERRED policies that were created with an empty nodemask
243 (local allocation).
244
245 MPOL_F_RELATIVE_NODES: This flag specifies that the nodemask passed
246 by the user will be mapped relative to the set of the task or VMA's
247 set of allowed nodes. The kernel stores the user-passed nodemask,
248 and if the allowed nodes changes, then that original nodemask will
249 be remapped relative to the new set of allowed nodes.
250
251 Without this flag (and without MPOL_F_STATIC_NODES), anytime a
252 mempolicy is rebound because of a change in the set of allowed
253 nodes, the node (Preferred) or nodemask (Bind, Interleave) is
254 remapped to the new set of allowed nodes. That remap may not
255 preserve the relative nature of the user's passed nodemask to its
256 set of allowed nodes upon successive rebinds: a nodemask of
257 1,3,5 may be remapped to 7-9 and then to 1-3 if the set of
258 allowed nodes is restored to its original state.
259
260 With this flag, the remap is done so that the node numbers from
261 the user's passed nodemask are relative to the set of allowed
262 nodes. In other words, if nodes 0, 2, and 4 are set in the user's
263 nodemask, the policy will be effected over the first (and in the
264 Bind or Interleave case, the third and fifth) nodes in the set of
265 allowed nodes. The nodemask passed by the user represents nodes
266 relative to task or VMA's set of allowed nodes.
267
268 If the user's nodemask includes nodes that are outside the range
269 of the new set of allowed nodes (for example, node 5 is set in
270 the user's nodemask when the set of allowed nodes is only 0-3),
271 then the remap wraps around to the beginning of the nodemask and,
272 if not already set, sets the node in the mempolicy nodemask.
273
274 For example, consider a task that is attached to a cpuset with
275 mems 2-5 that sets an Interleave policy over the same set with
276 MPOL_F_RELATIVE_NODES. If the cpuset's mems change to 3-7, the
277 interleave now occurs over nodes 3,5-6. If the cpuset's mems
278 then change to 0,2-3,5, then the interleave occurs over nodes
279 0,3,5.
280
281 Thanks to the consistent remapping, applications preparing
282 nodemasks to specify memory policies using this flag should
283 disregard their current, actual cpuset imposed memory placement
284 and prepare the nodemask as if they were always located on
285 memory nodes 0 to N-1, where N is the number of memory nodes the
286 policy is intended to manage. Let the kernel then remap to the
287 set of memory nodes allowed by the task's cpuset, as that may
288 change over time.
289
290 MPOL_F_RELATIVE_NODES cannot be combined with the
291 MPOL_F_STATIC_NODES flag. It also cannot be used for
292 MPOL_PREFERRED policies that were created with an empty nodemask
293 (local allocation).
294
295MEMORY POLICY REFERENCE COUNTING
296
297To resolve use/free races, struct mempolicy contains an atomic reference
298count field. Internal interfaces, mpol_get()/mpol_put() increment and
299decrement this reference count, respectively. mpol_put() will only free
300the structure back to the mempolicy kmem cache when the reference count
301goes to zero.
302
303When a new memory policy is allocated, it's reference count is initialized
304to '1', representing the reference held by the task that is installing the
305new policy. When a pointer to a memory policy structure is stored in another
306structure, another reference is added, as the task's reference will be dropped
307on completion of the policy installation.
308
309During run-time "usage" of the policy, we attempt to minimize atomic operations
310on the reference count, as this can lead to cache lines bouncing between cpus
311and NUMA nodes. "Usage" here means one of the following:
312
3131) querying of the policy, either by the task itself [using the get_mempolicy()
314 API discussed below] or by another task using the /proc/<pid>/numa_maps
315 interface.
316
3172) examination of the policy to determine the policy mode and associated node
318 or node lists, if any, for page allocation. This is considered a "hot
319 path". Note that for MPOL_BIND, the "usage" extends across the entire
320 allocation process, which may sleep during page reclaimation, because the
321 BIND policy nodemask is used, by reference, to filter ineligible nodes.
322
323We can avoid taking an extra reference during the usages listed above as
324follows:
325
3261) we never need to get/free the system default policy as this is never
327 changed nor freed, once the system is up and running.
328
3292) for querying the policy, we do not need to take an extra reference on the
330 target task's task policy nor vma policies because we always acquire the
331 task's mm's mmap_sem for read during the query. The set_mempolicy() and
332 mbind() APIs [see below] always acquire the mmap_sem for write when
333 installing or replacing task or vma policies. Thus, there is no possibility
334 of a task or thread freeing a policy while another task or thread is
335 querying it.
336
3373) Page allocation usage of task or vma policy occurs in the fault path where
338 we hold them mmap_sem for read. Again, because replacing the task or vma
339 policy requires that the mmap_sem be held for write, the policy can't be
340 freed out from under us while we're using it for page allocation.
341
3424) Shared policies require special consideration. One task can replace a
343 shared memory policy while another task, with a distinct mmap_sem, is
344 querying or allocating a page based on the policy. To resolve this
345 potential race, the shared policy infrastructure adds an extra reference
346 to the shared policy during lookup while holding a spin lock on the shared
347 policy management structure. This requires that we drop this extra
348 reference when we're finished "using" the policy. We must drop the
349 extra reference on shared policies in the same query/allocation paths
350 used for non-shared policies. For this reason, shared policies are marked
351 as such, and the extra reference is dropped "conditionally"--i.e., only
352 for shared policies.
353
354 Because of this extra reference counting, and because we must lookup
355 shared policies in a tree structure under spinlock, shared policies are
356 more expensive to use in the page allocation path. This is expecially
357 true for shared policies on shared memory regions shared by tasks running
358 on different NUMA nodes. This extra overhead can be avoided by always
359 falling back to task or system default policy for shared memory regions,
360 or by prefaulting the entire shared memory region into memory and locking
361 it down. However, this might not be appropriate for all applications.
362
234MEMORY POLICY APIs 363MEMORY POLICY APIs
235 364
236Linux supports 3 system calls for controlling memory policy. These APIS 365Linux supports 3 system calls for controlling memory policy. These APIS
@@ -251,7 +380,9 @@ Set [Task] Memory Policy:
251 Set's the calling task's "task/process memory policy" to mode 380 Set's the calling task's "task/process memory policy" to mode
252 specified by the 'mode' argument and the set of nodes defined 381 specified by the 'mode' argument and the set of nodes defined
253 by 'nmask'. 'nmask' points to a bit mask of node ids containing 382 by 'nmask'. 'nmask' points to a bit mask of node ids containing
254 at least 'maxnode' ids. 383 at least 'maxnode' ids. Optional mode flags may be passed by
384 combining the 'mode' argument with the flag (for example:
385 MPOL_INTERLEAVE | MPOL_F_STATIC_NODES).
255 386
256 See the set_mempolicy(2) man page for more details 387 See the set_mempolicy(2) man page for more details
257 388
@@ -303,29 +434,19 @@ MEMORY POLICIES AND CPUSETS
303Memory policies work within cpusets as described above. For memory policies 434Memory policies work within cpusets as described above. For memory policies
304that require a node or set of nodes, the nodes are restricted to the set of 435that require a node or set of nodes, the nodes are restricted to the set of
305nodes whose memories are allowed by the cpuset constraints. If the nodemask 436nodes whose memories are allowed by the cpuset constraints. If the nodemask
306specified for the policy contains nodes that are not allowed by the cpuset, or 437specified for the policy contains nodes that are not allowed by the cpuset and
307the intersection of the set of nodes specified for the policy and the set of 438MPOL_F_RELATIVE_NODES is not used, the intersection of the set of nodes
308nodes with memory is the empty set, the policy is considered invalid 439specified for the policy and the set of nodes with memory is used. If the
309and cannot be installed. 440result is the empty set, the policy is considered invalid and cannot be
310 441installed. If MPOL_F_RELATIVE_NODES is used, the policy's nodes are mapped
311The interaction of memory policies and cpusets can be problematic for a 442onto and folded into the task's set of allowed nodes as previously described.
312couple of reasons: 443
313 444The interaction of memory policies and cpusets can be problematic when tasks
3141) the memory policy APIs take physical node id's as arguments. As mentioned 445in two cpusets share access to a memory region, such as shared memory segments
315 above, it is illegal to specify nodes that are not allowed in the cpuset. 446created by shmget() of mmap() with the MAP_ANONYMOUS and MAP_SHARED flags, and
316 The application must query the allowed nodes using the get_mempolicy() 447any of the tasks install shared policy on the region, only nodes whose
317 API with the MPOL_F_MEMS_ALLOWED flag to determine the allowed nodes and 448memories are allowed in both cpusets may be used in the policies. Obtaining
318 restrict itself to those nodes. However, the resources available to a 449this information requires "stepping outside" the memory policy APIs to use the
319 cpuset can be changed by the system administrator, or a workload manager 450cpuset information and requires that one know in what cpusets other task might
320 application, at any time. So, a task may still get errors attempting to 451be attaching to the shared region. Furthermore, if the cpusets' allowed
321 specify policy nodes, and must query the allowed memories again. 452memory sets are disjoint, "local" allocation is the only valid policy.
322
3232) when tasks in two cpusets share access to a memory region, such as shared
324 memory segments created by shmget() of mmap() with the MAP_ANONYMOUS and
325 MAP_SHARED flags, and any of the tasks install shared policy on the region,
326 only nodes whose memories are allowed in both cpusets may be used in the
327 policies. Obtaining this information requires "stepping outside" the
328 memory policy APIs to use the cpuset information and requires that one
329 know in what cpusets other task might be attaching to the shared region.
330 Furthermore, if the cpusets' allowed memory sets are disjoint, "local"
331 allocation is the only valid policy.
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
index 22d7e3e4d60c..d3ce295bffac 100644
--- a/Documentation/vm/slabinfo.c
+++ b/Documentation/vm/slabinfo.c
@@ -31,7 +31,7 @@ struct slabinfo {
31 int hwcache_align, object_size, objs_per_slab; 31 int hwcache_align, object_size, objs_per_slab;
32 int sanity_checks, slab_size, store_user, trace; 32 int sanity_checks, slab_size, store_user, trace;
33 int order, poison, reclaim_account, red_zone; 33 int order, poison, reclaim_account, red_zone;
34 unsigned long partial, objects, slabs; 34 unsigned long partial, objects, slabs, objects_partial, objects_total;
35 unsigned long alloc_fastpath, alloc_slowpath; 35 unsigned long alloc_fastpath, alloc_slowpath;
36 unsigned long free_fastpath, free_slowpath; 36 unsigned long free_fastpath, free_slowpath;
37 unsigned long free_frozen, free_add_partial, free_remove_partial; 37 unsigned long free_frozen, free_add_partial, free_remove_partial;
@@ -540,7 +540,8 @@ void slabcache(struct slabinfo *s)
540 return; 540 return;
541 541
542 store_size(size_str, slab_size(s)); 542 store_size(size_str, slab_size(s));
543 snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs, s->partial, s->cpu_slabs); 543 snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs,
544 s->partial, s->cpu_slabs);
544 545
545 if (!line++) 546 if (!line++)
546 first_line(); 547 first_line();
@@ -776,7 +777,6 @@ void totals(void)
776 unsigned long used; 777 unsigned long used;
777 unsigned long long wasted; 778 unsigned long long wasted;
778 unsigned long long objwaste; 779 unsigned long long objwaste;
779 long long objects_in_partial_slabs;
780 unsigned long percentage_partial_slabs; 780 unsigned long percentage_partial_slabs;
781 unsigned long percentage_partial_objs; 781 unsigned long percentage_partial_objs;
782 782
@@ -790,18 +790,11 @@ void totals(void)
790 wasted = size - used; 790 wasted = size - used;
791 objwaste = s->slab_size - s->object_size; 791 objwaste = s->slab_size - s->object_size;
792 792
793 objects_in_partial_slabs = s->objects -
794 (s->slabs - s->partial - s ->cpu_slabs) *
795 s->objs_per_slab;
796
797 if (objects_in_partial_slabs < 0)
798 objects_in_partial_slabs = 0;
799
800 percentage_partial_slabs = s->partial * 100 / s->slabs; 793 percentage_partial_slabs = s->partial * 100 / s->slabs;
801 if (percentage_partial_slabs > 100) 794 if (percentage_partial_slabs > 100)
802 percentage_partial_slabs = 100; 795 percentage_partial_slabs = 100;
803 796
804 percentage_partial_objs = objects_in_partial_slabs * 100 797 percentage_partial_objs = s->objects_partial * 100
805 / s->objects; 798 / s->objects;
806 799
807 if (percentage_partial_objs > 100) 800 if (percentage_partial_objs > 100)
@@ -823,8 +816,8 @@ void totals(void)
823 min_objects = s->objects; 816 min_objects = s->objects;
824 if (used < min_used) 817 if (used < min_used)
825 min_used = used; 818 min_used = used;
826 if (objects_in_partial_slabs < min_partobj) 819 if (s->objects_partial < min_partobj)
827 min_partobj = objects_in_partial_slabs; 820 min_partobj = s->objects_partial;
828 if (percentage_partial_slabs < min_ppart) 821 if (percentage_partial_slabs < min_ppart)
829 min_ppart = percentage_partial_slabs; 822 min_ppart = percentage_partial_slabs;
830 if (percentage_partial_objs < min_ppartobj) 823 if (percentage_partial_objs < min_ppartobj)
@@ -848,8 +841,8 @@ void totals(void)
848 max_objects = s->objects; 841 max_objects = s->objects;
849 if (used > max_used) 842 if (used > max_used)
850 max_used = used; 843 max_used = used;
851 if (objects_in_partial_slabs > max_partobj) 844 if (s->objects_partial > max_partobj)
852 max_partobj = objects_in_partial_slabs; 845 max_partobj = s->objects_partial;
853 if (percentage_partial_slabs > max_ppart) 846 if (percentage_partial_slabs > max_ppart)
854 max_ppart = percentage_partial_slabs; 847 max_ppart = percentage_partial_slabs;
855 if (percentage_partial_objs > max_ppartobj) 848 if (percentage_partial_objs > max_ppartobj)
@@ -864,7 +857,7 @@ void totals(void)
864 857
865 total_objects += s->objects; 858 total_objects += s->objects;
866 total_used += used; 859 total_used += used;
867 total_partobj += objects_in_partial_slabs; 860 total_partobj += s->objects_partial;
868 total_ppart += percentage_partial_slabs; 861 total_ppart += percentage_partial_slabs;
869 total_ppartobj += percentage_partial_objs; 862 total_ppartobj += percentage_partial_objs;
870 863
@@ -1160,6 +1153,8 @@ void read_slab_dir(void)
1160 slab->hwcache_align = get_obj("hwcache_align"); 1153 slab->hwcache_align = get_obj("hwcache_align");
1161 slab->object_size = get_obj("object_size"); 1154 slab->object_size = get_obj("object_size");
1162 slab->objects = get_obj("objects"); 1155 slab->objects = get_obj("objects");
1156 slab->objects_partial = get_obj("objects_partial");
1157 slab->objects_total = get_obj("objects_total");
1163 slab->objs_per_slab = get_obj("objs_per_slab"); 1158 slab->objs_per_slab = get_obj("objs_per_slab");
1164 slab->order = get_obj("order"); 1159 slab->order = get_obj("order");
1165 slab->partial = get_obj("partial"); 1160 slab->partial = get_obj("partial");