diff options
Diffstat (limited to 'Documentation')
50 files changed, 1891 insertions, 399 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi new file mode 100644 index 000000000000..5ac1e01bbd48 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-bdi | |||
@@ -0,0 +1,46 @@ | |||
1 | What: /sys/class/bdi/<bdi>/ | ||
2 | Date: January 2008 | ||
3 | Contact: Peter Zijlstra <a.p.zijlstra@chello.nl> | ||
4 | Description: | ||
5 | |||
6 | Provide a place in sysfs for the backing_dev_info object. This allows | ||
7 | setting and retrieving various BDI specific variables. | ||
8 | |||
9 | The <bdi> identifier can be either of the following: | ||
10 | |||
11 | MAJOR:MINOR | ||
12 | |||
13 | Device number for block devices, or value of st_dev on | ||
14 | non-block filesystems which provide their own BDI, such as NFS | ||
15 | and FUSE. | ||
16 | |||
17 | default | ||
18 | |||
19 | The default backing dev, used for non-block device backed | ||
20 | filesystems which do not provide their own BDI. | ||
21 | |||
22 | Files under /sys/class/bdi/<bdi>/ | ||
23 | --------------------------------- | ||
24 | |||
25 | read_ahead_kb (read-write) | ||
26 | |||
27 | Size of the read-ahead window in kilobytes | ||
28 | |||
29 | min_ratio (read-write) | ||
30 | |||
31 | Under normal circumstances each device is given a part of the | ||
32 | total write-back cache that relates to its current average | ||
33 | writeout speed in relation to the other devices. | ||
34 | |||
35 | The 'min_ratio' parameter allows assigning a minimum | ||
36 | percentage of the write-back cache to a particular device. | ||
37 | For example, this is useful for providing a minimum QoS. | ||
38 | |||
39 | max_ratio (read-write) | ||
40 | |||
41 | Allows limiting a particular device to use not more than the | ||
42 | given percentage of the write-back cache. This is useful in | ||
43 | situations where we want to avoid one device taking all or | ||
44 | most of the write-back cache. For example in case of an NFS | ||
45 | mount that is prone to get stuck, or a FUSE mount which cannot | ||
46 | be trusted to play fair. | ||
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index b939ebb62871..80d150458c80 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt | |||
@@ -145,7 +145,7 @@ Part Ic - DMA addressing limitations | |||
145 | int | 145 | int |
146 | dma_supported(struct device *dev, u64 mask) | 146 | dma_supported(struct device *dev, u64 mask) |
147 | int | 147 | int |
148 | pci_dma_supported(struct device *dev, u64 mask) | 148 | pci_dma_supported(struct pci_dev *hwdev, u64 mask) |
149 | 149 | ||
150 | Checks to see if the device can support DMA to the memory described by | 150 | Checks to see if the device can support DMA to the memory described by |
151 | mask. | 151 | mask. |
@@ -189,7 +189,7 @@ dma_addr_t | |||
189 | dma_map_single(struct device *dev, void *cpu_addr, size_t size, | 189 | dma_map_single(struct device *dev, void *cpu_addr, size_t size, |
190 | enum dma_data_direction direction) | 190 | enum dma_data_direction direction) |
191 | dma_addr_t | 191 | dma_addr_t |
192 | pci_map_single(struct device *dev, void *cpu_addr, size_t size, | 192 | pci_map_single(struct pci_dev *hwdev, void *cpu_addr, size_t size, |
193 | int direction) | 193 | int direction) |
194 | 194 | ||
195 | Maps a piece of processor virtual memory so it can be accessed by the | 195 | Maps a piece of processor virtual memory so it can be accessed by the |
@@ -395,6 +395,71 @@ Notes: You must do this: | |||
395 | 395 | ||
396 | See also dma_map_single(). | 396 | See also dma_map_single(). |
397 | 397 | ||
398 | dma_addr_t | ||
399 | dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size, | ||
400 | enum dma_data_direction dir, | ||
401 | struct dma_attrs *attrs) | ||
402 | |||
403 | void | ||
404 | dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr, | ||
405 | size_t size, enum dma_data_direction dir, | ||
406 | struct dma_attrs *attrs) | ||
407 | |||
408 | int | ||
409 | dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, | ||
410 | int nents, enum dma_data_direction dir, | ||
411 | struct dma_attrs *attrs) | ||
412 | |||
413 | void | ||
414 | dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, | ||
415 | int nents, enum dma_data_direction dir, | ||
416 | struct dma_attrs *attrs) | ||
417 | |||
418 | The four functions above are just like the counterpart functions | ||
419 | without the _attrs suffixes, except that they pass an optional | ||
420 | struct dma_attrs*. | ||
421 | |||
422 | struct dma_attrs encapsulates a set of "dma attributes". For the | ||
423 | definition of struct dma_attrs see linux/dma-attrs.h. | ||
424 | |||
425 | The interpretation of dma attributes is architecture-specific, and | ||
426 | each attribute should be documented in Documentation/DMA-attributes.txt. | ||
427 | |||
428 | If struct dma_attrs* is NULL, the semantics of each of these | ||
429 | functions is identical to those of the corresponding function | ||
430 | without the _attrs suffix. As a result dma_map_single_attrs() | ||
431 | can generally replace dma_map_single(), etc. | ||
432 | |||
433 | As an example of the use of the *_attrs functions, here's how | ||
434 | you could pass an attribute DMA_ATTR_FOO when mapping memory | ||
435 | for DMA: | ||
436 | |||
437 | #include <linux/dma-attrs.h> | ||
438 | /* DMA_ATTR_FOO should be defined in linux/dma-attrs.h and | ||
439 | * documented in Documentation/DMA-attributes.txt */ | ||
440 | ... | ||
441 | |||
442 | DEFINE_DMA_ATTRS(attrs); | ||
443 | dma_set_attr(DMA_ATTR_FOO, &attrs); | ||
444 | .... | ||
445 | n = dma_map_sg_attrs(dev, sg, nents, DMA_TO_DEVICE, &attr); | ||
446 | .... | ||
447 | |||
448 | Architectures that care about DMA_ATTR_FOO would check for its | ||
449 | presence in their implementations of the mapping and unmapping | ||
450 | routines, e.g.: | ||
451 | |||
452 | void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr, | ||
453 | size_t size, enum dma_data_direction dir, | ||
454 | struct dma_attrs *attrs) | ||
455 | { | ||
456 | .... | ||
457 | int foo = dma_get_attr(DMA_ATTR_FOO, attrs); | ||
458 | .... | ||
459 | if (foo) | ||
460 | /* twizzle the frobnozzle */ | ||
461 | .... | ||
462 | |||
398 | 463 | ||
399 | Part II - Advanced dma_ usage | 464 | Part II - Advanced dma_ usage |
400 | ----------------------------- | 465 | ----------------------------- |
diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt new file mode 100644 index 000000000000..6d772f84b477 --- /dev/null +++ b/Documentation/DMA-attributes.txt | |||
@@ -0,0 +1,24 @@ | |||
1 | DMA attributes | ||
2 | ============== | ||
3 | |||
4 | This document describes the semantics of the DMA attributes that are | ||
5 | defined in linux/dma-attrs.h. | ||
6 | |||
7 | DMA_ATTR_WRITE_BARRIER | ||
8 | ---------------------- | ||
9 | |||
10 | DMA_ATTR_WRITE_BARRIER is a (write) barrier attribute for DMA. DMA | ||
11 | to a memory region with the DMA_ATTR_WRITE_BARRIER attribute forces | ||
12 | all pending DMA writes to complete, and thus provides a mechanism to | ||
13 | strictly order DMA from a device across all intervening busses and | ||
14 | bridges. This barrier is not specific to a particular type of | ||
15 | interconnect, it applies to the system as a whole, and so its | ||
16 | implementation must account for the idiosyncracies of the system all | ||
17 | the way from the DMA device to memory. | ||
18 | |||
19 | As an example of a situation where DMA_ATTR_WRITE_BARRIER would be | ||
20 | useful, suppose that a device does a DMA write to indicate that data is | ||
21 | ready and available in memory. The DMA of the "completion indication" | ||
22 | could race with data DMA. Mapping the memory used for completion | ||
23 | indications with DMA_ATTR_WRITE_BARRIER would prevent the race. | ||
24 | |||
diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt index d84f89dbf921..b463ecd0c7ce 100644 --- a/Documentation/DMA-mapping.txt +++ b/Documentation/DMA-mapping.txt | |||
@@ -315,11 +315,11 @@ you should do: | |||
315 | 315 | ||
316 | dma_addr_t dma_handle; | 316 | dma_addr_t dma_handle; |
317 | 317 | ||
318 | cpu_addr = pci_alloc_consistent(dev, size, &dma_handle); | 318 | cpu_addr = pci_alloc_consistent(pdev, size, &dma_handle); |
319 | 319 | ||
320 | where dev is a struct pci_dev *. You should pass NULL for PCI like buses | 320 | where pdev is a struct pci_dev *. This may be called in interrupt context. |
321 | where devices don't have struct pci_dev (like ISA, EISA). This may be | 321 | You should use dma_alloc_coherent (see DMA-API.txt) for buses |
322 | called in interrupt context. | 322 | where devices don't have struct pci_dev (like ISA, EISA). |
323 | 323 | ||
324 | This argument is needed because the DMA translations may be bus | 324 | This argument is needed because the DMA translations may be bus |
325 | specific (and often is private to the bus which the device is attached | 325 | specific (and often is private to the bus which the device is attached |
@@ -332,7 +332,7 @@ __get_free_pages (but takes size instead of a page order). If your | |||
332 | driver needs regions sized smaller than a page, you may prefer using | 332 | driver needs regions sized smaller than a page, you may prefer using |
333 | the pci_pool interface, described below. | 333 | the pci_pool interface, described below. |
334 | 334 | ||
335 | The consistent DMA mapping interfaces, for non-NULL dev, will by | 335 | The consistent DMA mapping interfaces, for non-NULL pdev, will by |
336 | default return a DMA address which is SAC (Single Address Cycle) | 336 | default return a DMA address which is SAC (Single Address Cycle) |
337 | addressable. Even if the device indicates (via PCI dma mask) that it | 337 | addressable. Even if the device indicates (via PCI dma mask) that it |
338 | may address the upper 32-bits and thus perform DAC cycles, consistent | 338 | may address the upper 32-bits and thus perform DAC cycles, consistent |
@@ -354,9 +354,9 @@ buffer you receive will not cross a 64K boundary. | |||
354 | 354 | ||
355 | To unmap and free such a DMA region, you call: | 355 | To unmap and free such a DMA region, you call: |
356 | 356 | ||
357 | pci_free_consistent(dev, size, cpu_addr, dma_handle); | 357 | pci_free_consistent(pdev, size, cpu_addr, dma_handle); |
358 | 358 | ||
359 | where dev, size are the same as in the above call and cpu_addr and | 359 | where pdev, size are the same as in the above call and cpu_addr and |
360 | dma_handle are the values pci_alloc_consistent returned to you. | 360 | dma_handle are the values pci_alloc_consistent returned to you. |
361 | This function may not be called in interrupt context. | 361 | This function may not be called in interrupt context. |
362 | 362 | ||
@@ -371,9 +371,9 @@ Create a pci_pool like this: | |||
371 | 371 | ||
372 | struct pci_pool *pool; | 372 | struct pci_pool *pool; |
373 | 373 | ||
374 | pool = pci_pool_create(name, dev, size, align, alloc); | 374 | pool = pci_pool_create(name, pdev, size, align, alloc); |
375 | 375 | ||
376 | The "name" is for diagnostics (like a kmem_cache name); dev and size | 376 | The "name" is for diagnostics (like a kmem_cache name); pdev and size |
377 | are as above. The device's hardware alignment requirement for this | 377 | are as above. The device's hardware alignment requirement for this |
378 | type of data is "align" (which is expressed in bytes, and must be a | 378 | type of data is "align" (which is expressed in bytes, and must be a |
379 | power of two). If your device has no boundary crossing restrictions, | 379 | power of two). If your device has no boundary crossing restrictions, |
@@ -472,11 +472,11 @@ To map a single region, you do: | |||
472 | void *addr = buffer->ptr; | 472 | void *addr = buffer->ptr; |
473 | size_t size = buffer->len; | 473 | size_t size = buffer->len; |
474 | 474 | ||
475 | dma_handle = pci_map_single(dev, addr, size, direction); | 475 | dma_handle = pci_map_single(pdev, addr, size, direction); |
476 | 476 | ||
477 | and to unmap it: | 477 | and to unmap it: |
478 | 478 | ||
479 | pci_unmap_single(dev, dma_handle, size, direction); | 479 | pci_unmap_single(pdev, dma_handle, size, direction); |
480 | 480 | ||
481 | You should call pci_unmap_single when the DMA activity is finished, e.g. | 481 | You should call pci_unmap_single when the DMA activity is finished, e.g. |
482 | from the interrupt which told you that the DMA transfer is done. | 482 | from the interrupt which told you that the DMA transfer is done. |
@@ -493,17 +493,17 @@ Specifically: | |||
493 | unsigned long offset = buffer->offset; | 493 | unsigned long offset = buffer->offset; |
494 | size_t size = buffer->len; | 494 | size_t size = buffer->len; |
495 | 495 | ||
496 | dma_handle = pci_map_page(dev, page, offset, size, direction); | 496 | dma_handle = pci_map_page(pdev, page, offset, size, direction); |
497 | 497 | ||
498 | ... | 498 | ... |
499 | 499 | ||
500 | pci_unmap_page(dev, dma_handle, size, direction); | 500 | pci_unmap_page(pdev, dma_handle, size, direction); |
501 | 501 | ||
502 | Here, "offset" means byte offset within the given page. | 502 | Here, "offset" means byte offset within the given page. |
503 | 503 | ||
504 | With scatterlists, you map a region gathered from several regions by: | 504 | With scatterlists, you map a region gathered from several regions by: |
505 | 505 | ||
506 | int i, count = pci_map_sg(dev, sglist, nents, direction); | 506 | int i, count = pci_map_sg(pdev, sglist, nents, direction); |
507 | struct scatterlist *sg; | 507 | struct scatterlist *sg; |
508 | 508 | ||
509 | for_each_sg(sglist, sg, count, i) { | 509 | for_each_sg(sglist, sg, count, i) { |
@@ -527,7 +527,7 @@ accessed sg->address and sg->length as shown above. | |||
527 | 527 | ||
528 | To unmap a scatterlist, just call: | 528 | To unmap a scatterlist, just call: |
529 | 529 | ||
530 | pci_unmap_sg(dev, sglist, nents, direction); | 530 | pci_unmap_sg(pdev, sglist, nents, direction); |
531 | 531 | ||
532 | Again, make sure DMA activity has already finished. | 532 | Again, make sure DMA activity has already finished. |
533 | 533 | ||
@@ -550,11 +550,11 @@ correct copy of the DMA buffer. | |||
550 | So, firstly, just map it with pci_map_{single,sg}, and after each DMA | 550 | So, firstly, just map it with pci_map_{single,sg}, and after each DMA |
551 | transfer call either: | 551 | transfer call either: |
552 | 552 | ||
553 | pci_dma_sync_single_for_cpu(dev, dma_handle, size, direction); | 553 | pci_dma_sync_single_for_cpu(pdev, dma_handle, size, direction); |
554 | 554 | ||
555 | or: | 555 | or: |
556 | 556 | ||
557 | pci_dma_sync_sg_for_cpu(dev, sglist, nents, direction); | 557 | pci_dma_sync_sg_for_cpu(pdev, sglist, nents, direction); |
558 | 558 | ||
559 | as appropriate. | 559 | as appropriate. |
560 | 560 | ||
@@ -562,7 +562,7 @@ Then, if you wish to let the device get at the DMA area again, | |||
562 | finish accessing the data with the cpu, and then before actually | 562 | finish accessing the data with the cpu, and then before actually |
563 | giving the buffer to the hardware call either: | 563 | giving the buffer to the hardware call either: |
564 | 564 | ||
565 | pci_dma_sync_single_for_device(dev, dma_handle, size, direction); | 565 | pci_dma_sync_single_for_device(pdev, dma_handle, size, direction); |
566 | 566 | ||
567 | or: | 567 | or: |
568 | 568 | ||
@@ -739,7 +739,7 @@ failure can be determined by: | |||
739 | 739 | ||
740 | dma_addr_t dma_handle; | 740 | dma_addr_t dma_handle; |
741 | 741 | ||
742 | dma_handle = pci_map_single(dev, addr, size, direction); | 742 | dma_handle = pci_map_single(pdev, addr, size, direction); |
743 | if (pci_dma_mapping_error(dma_handle)) { | 743 | if (pci_dma_mapping_error(dma_handle)) { |
744 | /* | 744 | /* |
745 | * reduce current DMA mapping usage, | 745 | * reduce current DMA mapping usage, |
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 83966e94cc32..0eb0d027eb32 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile | |||
@@ -12,7 +12,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \ | |||
12 | kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \ | 12 | kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \ |
13 | gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ | 13 | gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ |
14 | genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ | 14 | genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ |
15 | mac80211.xml | 15 | mac80211.xml debugobjects.xml |
16 | 16 | ||
17 | ### | 17 | ### |
18 | # The build process is as follows (targets): | 18 | # The build process is as follows (targets): |
diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl new file mode 100644 index 000000000000..7f5f218015fe --- /dev/null +++ b/Documentation/DocBook/debugobjects.tmpl | |||
@@ -0,0 +1,391 @@ | |||
1 | <?xml version="1.0" encoding="UTF-8"?> | ||
2 | <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" | ||
3 | "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> | ||
4 | |||
5 | <book id="debug-objects-guide"> | ||
6 | <bookinfo> | ||
7 | <title>Debug objects life time</title> | ||
8 | |||
9 | <authorgroup> | ||
10 | <author> | ||
11 | <firstname>Thomas</firstname> | ||
12 | <surname>Gleixner</surname> | ||
13 | <affiliation> | ||
14 | <address> | ||
15 | <email>tglx@linutronix.de</email> | ||
16 | </address> | ||
17 | </affiliation> | ||
18 | </author> | ||
19 | </authorgroup> | ||
20 | |||
21 | <copyright> | ||
22 | <year>2008</year> | ||
23 | <holder>Thomas Gleixner</holder> | ||
24 | </copyright> | ||
25 | |||
26 | <legalnotice> | ||
27 | <para> | ||
28 | This documentation is free software; you can redistribute | ||
29 | it and/or modify it under the terms of the GNU General Public | ||
30 | License version 2 as published by the Free Software Foundation. | ||
31 | </para> | ||
32 | |||
33 | <para> | ||
34 | This program is distributed in the hope that it will be | ||
35 | useful, but WITHOUT ANY WARRANTY; without even the implied | ||
36 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
37 | See the GNU General Public License for more details. | ||
38 | </para> | ||
39 | |||
40 | <para> | ||
41 | You should have received a copy of the GNU General Public | ||
42 | License along with this program; if not, write to the Free | ||
43 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, | ||
44 | MA 02111-1307 USA | ||
45 | </para> | ||
46 | |||
47 | <para> | ||
48 | For more details see the file COPYING in the source | ||
49 | distribution of Linux. | ||
50 | </para> | ||
51 | </legalnotice> | ||
52 | </bookinfo> | ||
53 | |||
54 | <toc></toc> | ||
55 | |||
56 | <chapter id="intro"> | ||
57 | <title>Introduction</title> | ||
58 | <para> | ||
59 | debugobjects is a generic infrastructure to track the life time | ||
60 | of kernel objects and validate the operations on those. | ||
61 | </para> | ||
62 | <para> | ||
63 | debugobjects is useful to check for the following error patterns: | ||
64 | <itemizedlist> | ||
65 | <listitem><para>Activation of uninitialized objects</para></listitem> | ||
66 | <listitem><para>Initialization of active objects</para></listitem> | ||
67 | <listitem><para>Usage of freed/destroyed objects</para></listitem> | ||
68 | </itemizedlist> | ||
69 | </para> | ||
70 | <para> | ||
71 | debugobjects is not changing the data structure of the real | ||
72 | object so it can be compiled in with a minimal runtime impact | ||
73 | and enabled on demand with a kernel command line option. | ||
74 | </para> | ||
75 | </chapter> | ||
76 | |||
77 | <chapter id="howto"> | ||
78 | <title>Howto use debugobjects</title> | ||
79 | <para> | ||
80 | A kernel subsystem needs to provide a data structure which | ||
81 | describes the object type and add calls into the debug code at | ||
82 | appropriate places. The data structure to describe the object | ||
83 | type needs at minimum the name of the object type. Optional | ||
84 | functions can and should be provided to fixup detected problems | ||
85 | so the kernel can continue to work and the debug information can | ||
86 | be retrieved from a live system instead of hard core debugging | ||
87 | with serial consoles and stack trace transcripts from the | ||
88 | monitor. | ||
89 | </para> | ||
90 | <para> | ||
91 | The debug calls provided by debugobjects are: | ||
92 | <itemizedlist> | ||
93 | <listitem><para>debug_object_init</para></listitem> | ||
94 | <listitem><para>debug_object_init_on_stack</para></listitem> | ||
95 | <listitem><para>debug_object_activate</para></listitem> | ||
96 | <listitem><para>debug_object_deactivate</para></listitem> | ||
97 | <listitem><para>debug_object_destroy</para></listitem> | ||
98 | <listitem><para>debug_object_free</para></listitem> | ||
99 | </itemizedlist> | ||
100 | Each of these functions takes the address of the real object and | ||
101 | a pointer to the object type specific debug description | ||
102 | structure. | ||
103 | </para> | ||
104 | <para> | ||
105 | Each detected error is reported in the statistics and a limited | ||
106 | number of errors are printk'ed including a full stack trace. | ||
107 | </para> | ||
108 | <para> | ||
109 | The statistics are available via debugfs/debug_objects/stats. | ||
110 | They provide information about the number of warnings and the | ||
111 | number of successful fixups along with information about the | ||
112 | usage of the internal tracking objects and the state of the | ||
113 | internal tracking objects pool. | ||
114 | </para> | ||
115 | </chapter> | ||
116 | <chapter id="debugfunctions"> | ||
117 | <title>Debug functions</title> | ||
118 | <sect1 id="prototypes"> | ||
119 | <title>Debug object function reference</title> | ||
120 | !Elib/debugobjects.c | ||
121 | </sect1> | ||
122 | <sect1 id="debug_object_init"> | ||
123 | <title>debug_object_init</title> | ||
124 | <para> | ||
125 | This function is called whenever the initialization function | ||
126 | of a real object is called. | ||
127 | </para> | ||
128 | <para> | ||
129 | When the real object is already tracked by debugobjects it is | ||
130 | checked, whether the object can be initialized. Initializing | ||
131 | is not allowed for active and destroyed objects. When | ||
132 | debugobjects detects an error, then it calls the fixup_init | ||
133 | function of the object type description structure if provided | ||
134 | by the caller. The fixup function can correct the problem | ||
135 | before the real initialization of the object happens. E.g. it | ||
136 | can deactivate an active object in order to prevent damage to | ||
137 | the subsystem. | ||
138 | </para> | ||
139 | <para> | ||
140 | When the real object is not yet tracked by debugobjects, | ||
141 | debugobjects allocates a tracker object for the real object | ||
142 | and sets the tracker object state to ODEBUG_STATE_INIT. It | ||
143 | verifies that the object is not on the callers stack. If it is | ||
144 | on the callers stack then a limited number of warnings | ||
145 | including a full stack trace is printk'ed. The calling code | ||
146 | must use debug_object_init_on_stack() and remove the object | ||
147 | before leaving the function which allocated it. See next | ||
148 | section. | ||
149 | </para> | ||
150 | </sect1> | ||
151 | |||
152 | <sect1 id="debug_object_init_on_stack"> | ||
153 | <title>debug_object_init_on_stack</title> | ||
154 | <para> | ||
155 | This function is called whenever the initialization function | ||
156 | of a real object which resides on the stack is called. | ||
157 | </para> | ||
158 | <para> | ||
159 | When the real object is already tracked by debugobjects it is | ||
160 | checked, whether the object can be initialized. Initializing | ||
161 | is not allowed for active and destroyed objects. When | ||
162 | debugobjects detects an error, then it calls the fixup_init | ||
163 | function of the object type description structure if provided | ||
164 | by the caller. The fixup function can correct the problem | ||
165 | before the real initialization of the object happens. E.g. it | ||
166 | can deactivate an active object in order to prevent damage to | ||
167 | the subsystem. | ||
168 | </para> | ||
169 | <para> | ||
170 | When the real object is not yet tracked by debugobjects | ||
171 | debugobjects allocates a tracker object for the real object | ||
172 | and sets the tracker object state to ODEBUG_STATE_INIT. It | ||
173 | verifies that the object is on the callers stack. | ||
174 | </para> | ||
175 | <para> | ||
176 | An object which is on the stack must be removed from the | ||
177 | tracker by calling debug_object_free() before the function | ||
178 | which allocates the object returns. Otherwise we keep track of | ||
179 | stale objects. | ||
180 | </para> | ||
181 | </sect1> | ||
182 | |||
183 | <sect1 id="debug_object_activate"> | ||
184 | <title>debug_object_activate</title> | ||
185 | <para> | ||
186 | This function is called whenever the activation function of a | ||
187 | real object is called. | ||
188 | </para> | ||
189 | <para> | ||
190 | When the real object is already tracked by debugobjects it is | ||
191 | checked, whether the object can be activated. Activating is | ||
192 | not allowed for active and destroyed objects. When | ||
193 | debugobjects detects an error, then it calls the | ||
194 | fixup_activate function of the object type description | ||
195 | structure if provided by the caller. The fixup function can | ||
196 | correct the problem before the real activation of the object | ||
197 | happens. E.g. it can deactivate an active object in order to | ||
198 | prevent damage to the subsystem. | ||
199 | </para> | ||
200 | <para> | ||
201 | When the real object is not yet tracked by debugobjects then | ||
202 | the fixup_activate function is called if available. This is | ||
203 | necessary to allow the legitimate activation of statically | ||
204 | allocated and initialized objects. The fixup function checks | ||
205 | whether the object is valid and calls the debug_objects_init() | ||
206 | function to initialize the tracking of this object. | ||
207 | </para> | ||
208 | <para> | ||
209 | When the activation is legitimate, then the state of the | ||
210 | associated tracker object is set to ODEBUG_STATE_ACTIVE. | ||
211 | </para> | ||
212 | </sect1> | ||
213 | |||
214 | <sect1 id="debug_object_deactivate"> | ||
215 | <title>debug_object_deactivate</title> | ||
216 | <para> | ||
217 | This function is called whenever the deactivation function of | ||
218 | a real object is called. | ||
219 | </para> | ||
220 | <para> | ||
221 | When the real object is tracked by debugobjects it is checked, | ||
222 | whether the object can be deactivated. Deactivating is not | ||
223 | allowed for untracked or destroyed objects. | ||
224 | </para> | ||
225 | <para> | ||
226 | When the deactivation is legitimate, then the state of the | ||
227 | associated tracker object is set to ODEBUG_STATE_INACTIVE. | ||
228 | </para> | ||
229 | </sect1> | ||
230 | |||
231 | <sect1 id="debug_object_destroy"> | ||
232 | <title>debug_object_destroy</title> | ||
233 | <para> | ||
234 | This function is called to mark an object destroyed. This is | ||
235 | useful to prevent the usage of invalid objects, which are | ||
236 | still available in memory: either statically allocated objects | ||
237 | or objects which are freed later. | ||
238 | </para> | ||
239 | <para> | ||
240 | When the real object is tracked by debugobjects it is checked, | ||
241 | whether the object can be destroyed. Destruction is not | ||
242 | allowed for active and destroyed objects. When debugobjects | ||
243 | detects an error, then it calls the fixup_destroy function of | ||
244 | the object type description structure if provided by the | ||
245 | caller. The fixup function can correct the problem before the | ||
246 | real destruction of the object happens. E.g. it can deactivate | ||
247 | an active object in order to prevent damage to the subsystem. | ||
248 | </para> | ||
249 | <para> | ||
250 | When the destruction is legitimate, then the state of the | ||
251 | associated tracker object is set to ODEBUG_STATE_DESTROYED. | ||
252 | </para> | ||
253 | </sect1> | ||
254 | |||
255 | <sect1 id="debug_object_free"> | ||
256 | <title>debug_object_free</title> | ||
257 | <para> | ||
258 | This function is called before an object is freed. | ||
259 | </para> | ||
260 | <para> | ||
261 | When the real object is tracked by debugobjects it is checked, | ||
262 | whether the object can be freed. Free is not allowed for | ||
263 | active objects. When debugobjects detects an error, then it | ||
264 | calls the fixup_free function of the object type description | ||
265 | structure if provided by the caller. The fixup function can | ||
266 | correct the problem before the real free of the object | ||
267 | happens. E.g. it can deactivate an active object in order to | ||
268 | prevent damage to the subsystem. | ||
269 | </para> | ||
270 | <para> | ||
271 | Note that debug_object_free removes the object from the | ||
272 | tracker. Later usage of the object is detected by the other | ||
273 | debug checks. | ||
274 | </para> | ||
275 | </sect1> | ||
276 | </chapter> | ||
277 | <chapter id="fixupfunctions"> | ||
278 | <title>Fixup functions</title> | ||
279 | <sect1 id="debug_obj_descr"> | ||
280 | <title>Debug object type description structure</title> | ||
281 | !Iinclude/linux/debugobjects.h | ||
282 | </sect1> | ||
283 | <sect1 id="fixup_init"> | ||
284 | <title>fixup_init</title> | ||
285 | <para> | ||
286 | This function is called from the debug code whenever a problem | ||
287 | in debug_object_init is detected. The function takes the | ||
288 | address of the object and the state which is currently | ||
289 | recorded in the tracker. | ||
290 | </para> | ||
291 | <para> | ||
292 | Called from debug_object_init when the object state is: | ||
293 | <itemizedlist> | ||
294 | <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem> | ||
295 | </itemizedlist> | ||
296 | </para> | ||
297 | <para> | ||
298 | The function returns 1 when the fixup was successful, | ||
299 | otherwise 0. The return value is used to update the | ||
300 | statistics. | ||
301 | </para> | ||
302 | <para> | ||
303 | Note, that the function needs to call the debug_object_init() | ||
304 | function again, after the damage has been repaired in order to | ||
305 | keep the state consistent. | ||
306 | </para> | ||
307 | </sect1> | ||
308 | |||
309 | <sect1 id="fixup_activate"> | ||
310 | <title>fixup_activate</title> | ||
311 | <para> | ||
312 | This function is called from the debug code whenever a problem | ||
313 | in debug_object_activate is detected. | ||
314 | </para> | ||
315 | <para> | ||
316 | Called from debug_object_activate when the object state is: | ||
317 | <itemizedlist> | ||
318 | <listitem><para>ODEBUG_STATE_NOTAVAILABLE</para></listitem> | ||
319 | <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem> | ||
320 | </itemizedlist> | ||
321 | </para> | ||
322 | <para> | ||
323 | The function returns 1 when the fixup was successful, | ||
324 | otherwise 0. The return value is used to update the | ||
325 | statistics. | ||
326 | </para> | ||
327 | <para> | ||
328 | Note that the function needs to call the debug_object_activate() | ||
329 | function again after the damage has been repaired in order to | ||
330 | keep the state consistent. | ||
331 | </para> | ||
332 | <para> | ||
333 | The activation of statically initialized objects is a special | ||
334 | case. When debug_object_activate() has no tracked object for | ||
335 | this object address then fixup_activate() is called with | ||
336 | object state ODEBUG_STATE_NOTAVAILABLE. The fixup function | ||
337 | needs to check whether this is a legitimate case of a | ||
338 | statically initialized object or not. In case it is it calls | ||
339 | debug_object_init() and debug_object_activate() to make the | ||
340 | object known to the tracker and marked active. In this case | ||
341 | the function should return 0 because this is not a real fixup. | ||
342 | </para> | ||
343 | </sect1> | ||
344 | |||
345 | <sect1 id="fixup_destroy"> | ||
346 | <title>fixup_destroy</title> | ||
347 | <para> | ||
348 | This function is called from the debug code whenever a problem | ||
349 | in debug_object_destroy is detected. | ||
350 | </para> | ||
351 | <para> | ||
352 | Called from debug_object_destroy when the object state is: | ||
353 | <itemizedlist> | ||
354 | <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem> | ||
355 | </itemizedlist> | ||
356 | </para> | ||
357 | <para> | ||
358 | The function returns 1 when the fixup was successful, | ||
359 | otherwise 0. The return value is used to update the | ||
360 | statistics. | ||
361 | </para> | ||
362 | </sect1> | ||
363 | <sect1 id="fixup_free"> | ||
364 | <title>fixup_free</title> | ||
365 | <para> | ||
366 | This function is called from the debug code whenever a problem | ||
367 | in debug_object_free is detected. Further it can be called | ||
368 | from the debug checks in kfree/vfree, when an active object is | ||
369 | detected from the debug_check_no_obj_freed() sanity checks. | ||
370 | </para> | ||
371 | <para> | ||
372 | Called from debug_object_free() or debug_check_no_obj_freed() | ||
373 | when the object state is: | ||
374 | <itemizedlist> | ||
375 | <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem> | ||
376 | </itemizedlist> | ||
377 | </para> | ||
378 | <para> | ||
379 | The function returns 1 when the fixup was successful, | ||
380 | otherwise 0. The return value is used to update the | ||
381 | statistics. | ||
382 | </para> | ||
383 | </sect1> | ||
384 | </chapter> | ||
385 | <chapter id="bugs"> | ||
386 | <title>Known Bugs And Assumptions</title> | ||
387 | <para> | ||
388 | None (knock on wood). | ||
389 | </para> | ||
390 | </chapter> | ||
391 | </book> | ||
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index 488dd4a4945b..b7b1482f6e04 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl | |||
@@ -119,7 +119,7 @@ X!Ilib/string.c | |||
119 | !Elib/string.c | 119 | !Elib/string.c |
120 | </sect1> | 120 | </sect1> |
121 | <sect1><title>Bit Operations</title> | 121 | <sect1><title>Bit Operations</title> |
122 | !Iinclude/asm-x86/bitops_32.h | 122 | !Iinclude/asm-x86/bitops.h |
123 | </sect1> | 123 | </sect1> |
124 | </chapter> | 124 | </chapter> |
125 | 125 | ||
@@ -645,4 +645,58 @@ X!Idrivers/video/console/fonts.c | |||
645 | !Edrivers/i2c/i2c-core.c | 645 | !Edrivers/i2c/i2c-core.c |
646 | </chapter> | 646 | </chapter> |
647 | 647 | ||
648 | <chapter id="clk"> | ||
649 | <title>Clock Framework</title> | ||
650 | |||
651 | <para> | ||
652 | The clock framework defines programming interfaces to support | ||
653 | software management of the system clock tree. | ||
654 | This framework is widely used with System-On-Chip (SOC) platforms | ||
655 | to support power management and various devices which may need | ||
656 | custom clock rates. | ||
657 | Note that these "clocks" don't relate to timekeeping or real | ||
658 | time clocks (RTCs), each of which have separate frameworks. | ||
659 | These <structname>struct clk</structname> instances may be used | ||
660 | to manage for example a 96 MHz signal that is used to shift bits | ||
661 | into and out of peripherals or busses, or otherwise trigger | ||
662 | synchronous state machine transitions in system hardware. | ||
663 | </para> | ||
664 | |||
665 | <para> | ||
666 | Power management is supported by explicit software clock gating: | ||
667 | unused clocks are disabled, so the system doesn't waste power | ||
668 | changing the state of transistors that aren't in active use. | ||
669 | On some systems this may be backed by hardware clock gating, | ||
670 | where clocks are gated without being disabled in software. | ||
671 | Sections of chips that are powered but not clocked may be able | ||
672 | to retain their last state. | ||
673 | This low power state is often called a <emphasis>retention | ||
674 | mode</emphasis>. | ||
675 | This mode still incurs leakage currents, especially with finer | ||
676 | circuit geometries, but for CMOS circuits power is mostly used | ||
677 | by clocked state changes. | ||
678 | </para> | ||
679 | |||
680 | <para> | ||
681 | Power-aware drivers only enable their clocks when the device | ||
682 | they manage is in active use. Also, system sleep states often | ||
683 | differ according to which clock domains are active: while a | ||
684 | "standby" state may allow wakeup from several active domains, a | ||
685 | "mem" (suspend-to-RAM) state may require a more wholesale shutdown | ||
686 | of clocks derived from higher speed PLLs and oscillators, limiting | ||
687 | the number of possible wakeup event sources. A driver's suspend | ||
688 | method may need to be aware of system-specific clock constraints | ||
689 | on the target sleep state. | ||
690 | </para> | ||
691 | |||
692 | <para> | ||
693 | Some platforms support programmable clock generators. These | ||
694 | can be used by external chips of various kinds, such as other | ||
695 | CPUs, multimedia codecs, and devices with strict requirements | ||
696 | for interface clocking. | ||
697 | </para> | ||
698 | |||
699 | !Iinclude/linux/clk.h | ||
700 | </chapter> | ||
701 | |||
648 | </book> | 702 | </book> |
diff --git a/Documentation/DocBook/rapidio.tmpl b/Documentation/DocBook/rapidio.tmpl index b9e143e28c64..54eb26b57372 100644 --- a/Documentation/DocBook/rapidio.tmpl +++ b/Documentation/DocBook/rapidio.tmpl | |||
@@ -133,7 +133,6 @@ | |||
133 | !Idrivers/rapidio/rio-sysfs.c | 133 | !Idrivers/rapidio/rio-sysfs.c |
134 | </sect1> | 134 | </sect1> |
135 | <sect1 id="PPC32_support"><title>PPC32 support</title> | 135 | <sect1 id="PPC32_support"><title>PPC32 support</title> |
136 | !Iarch/powerpc/kernel/rio.c | ||
137 | !Earch/powerpc/sysdev/fsl_rio.c | 136 | !Earch/powerpc/sysdev/fsl_rio.c |
138 | !Iarch/powerpc/sysdev/fsl_rio.c | 137 | !Iarch/powerpc/sysdev/fsl_rio.c |
139 | </sect1> | 138 | </sect1> |
diff --git a/Documentation/braille-console.txt b/Documentation/braille-console.txt new file mode 100644 index 000000000000..000b0fbdc105 --- /dev/null +++ b/Documentation/braille-console.txt | |||
@@ -0,0 +1,34 @@ | |||
1 | Linux Braille Console | ||
2 | |||
3 | To get early boot messages on a braille device (before userspace screen | ||
4 | readers can start), you first need to compile the support for the usual serial | ||
5 | console (see serial-console.txt), and for braille device (in Device Drivers - | ||
6 | Accessibility). | ||
7 | |||
8 | Then you need to specify a console=brl, option on the kernel command line, the | ||
9 | format is: | ||
10 | |||
11 | console=brl,serial_options... | ||
12 | |||
13 | where serial_options... are the same as described in serial-console.txt | ||
14 | |||
15 | So for instance you can use console=brl,ttyS0 if the braille device is connected | ||
16 | to the first serial port, and console=brl,ttyS0,115200 to override the baud rate | ||
17 | to 115200, etc. | ||
18 | |||
19 | By default, the braille device will just show the last kernel message (console | ||
20 | mode). To review previous messages, press the Insert key to switch to the VT | ||
21 | review mode. In review mode, the arrow keys permit to browse in the VT content, | ||
22 | page up/down keys go at the top/bottom of the screen, and the home key goes back | ||
23 | to the cursor, hence providing very basic screen reviewing facility. | ||
24 | |||
25 | Sound feedback can be obtained by adding the braille_console.sound=1 kernel | ||
26 | parameter. | ||
27 | |||
28 | For simplicity, only one braille console can be enabled, other uses of | ||
29 | console=brl,... will be discarded. Also note that it does not interfere with | ||
30 | the console selection mecanism described in serial-console.txt | ||
31 | |||
32 | For now, only the VisioBraille device is supported. | ||
33 | |||
34 | Samuel Thibault <samuel.thibault@ens-lyon.org> | ||
diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt index 31d12e21ff8a..c298a6690e0d 100644 --- a/Documentation/cgroups.txt +++ b/Documentation/cgroups.txt | |||
@@ -500,8 +500,7 @@ post-attachment activity that requires memory allocations or blocking. | |||
500 | 500 | ||
501 | void fork(struct cgroup_subsy *ss, struct task_struct *task) | 501 | void fork(struct cgroup_subsy *ss, struct task_struct *task) |
502 | 502 | ||
503 | Called when a task is forked into a cgroup. Also called during | 503 | Called when a task is forked into a cgroup. |
504 | registration for all existing tasks. | ||
505 | 504 | ||
506 | void exit(struct cgroup_subsys *ss, struct task_struct *task) | 505 | void exit(struct cgroup_subsys *ss, struct task_struct *task) |
507 | 506 | ||
diff --git a/Documentation/controllers/devices.txt b/Documentation/controllers/devices.txt new file mode 100644 index 000000000000..4dcea42432c2 --- /dev/null +++ b/Documentation/controllers/devices.txt | |||
@@ -0,0 +1,48 @@ | |||
1 | Device Whitelist Controller | ||
2 | |||
3 | 1. Description: | ||
4 | |||
5 | Implement a cgroup to track and enforce open and mknod restrictions | ||
6 | on device files. A device cgroup associates a device access | ||
7 | whitelist with each cgroup. A whitelist entry has 4 fields. | ||
8 | 'type' is a (all), c (char), or b (block). 'all' means it applies | ||
9 | to all types and all major and minor numbers. Major and minor are | ||
10 | either an integer or * for all. Access is a composition of r | ||
11 | (read), w (write), and m (mknod). | ||
12 | |||
13 | The root device cgroup starts with rwm to 'all'. A child device | ||
14 | cgroup gets a copy of the parent. Administrators can then remove | ||
15 | devices from the whitelist or add new entries. A child cgroup can | ||
16 | never receive a device access which is denied its parent. However | ||
17 | when a device access is removed from a parent it will not also be | ||
18 | removed from the child(ren). | ||
19 | |||
20 | 2. User Interface | ||
21 | |||
22 | An entry is added using devices.allow, and removed using | ||
23 | devices.deny. For instance | ||
24 | |||
25 | echo 'c 1:3 mr' > /cgroups/1/devices.allow | ||
26 | |||
27 | allows cgroup 1 to read and mknod the device usually known as | ||
28 | /dev/null. Doing | ||
29 | |||
30 | echo a > /cgroups/1/devices.deny | ||
31 | |||
32 | will remove the default 'a *:* mrw' entry. | ||
33 | |||
34 | 3. Security | ||
35 | |||
36 | Any task can move itself between cgroups. This clearly won't | ||
37 | suffice, but we can decide the best way to adequately restrict | ||
38 | movement as people get some experience with this. We may just want | ||
39 | to require CAP_SYS_ADMIN, which at least is a separate bit from | ||
40 | CAP_MKNOD. We may want to just refuse moving to a cgroup which | ||
41 | isn't a descendent of the current one. Or we may want to use | ||
42 | CAP_MAC_ADMIN, since we really are trying to lock down root. | ||
43 | |||
44 | CAP_SYS_ADMIN is needed to modify the whitelist or move another | ||
45 | task to a new cgroup. (Again we'll probably want to change that). | ||
46 | |||
47 | A cgroup may not be granted more permissions than the cgroup's | ||
48 | parent has. | ||
diff --git a/Documentation/controllers/resource_counter.txt b/Documentation/controllers/resource_counter.txt new file mode 100644 index 000000000000..f196ac1d7d25 --- /dev/null +++ b/Documentation/controllers/resource_counter.txt | |||
@@ -0,0 +1,181 @@ | |||
1 | |||
2 | The Resource Counter | ||
3 | |||
4 | The resource counter, declared at include/linux/res_counter.h, | ||
5 | is supposed to facilitate the resource management by controllers | ||
6 | by providing common stuff for accounting. | ||
7 | |||
8 | This "stuff" includes the res_counter structure and routines | ||
9 | to work with it. | ||
10 | |||
11 | |||
12 | |||
13 | 1. Crucial parts of the res_counter structure | ||
14 | |||
15 | a. unsigned long long usage | ||
16 | |||
17 | The usage value shows the amount of a resource that is consumed | ||
18 | by a group at a given time. The units of measurement should be | ||
19 | determined by the controller that uses this counter. E.g. it can | ||
20 | be bytes, items or any other unit the controller operates on. | ||
21 | |||
22 | b. unsigned long long max_usage | ||
23 | |||
24 | The maximal value of the usage over time. | ||
25 | |||
26 | This value is useful when gathering statistical information about | ||
27 | the particular group, as it shows the actual resource requirements | ||
28 | for a particular group, not just some usage snapshot. | ||
29 | |||
30 | c. unsigned long long limit | ||
31 | |||
32 | The maximal allowed amount of resource to consume by the group. In | ||
33 | case the group requests for more resources, so that the usage value | ||
34 | would exceed the limit, the resource allocation is rejected (see | ||
35 | the next section). | ||
36 | |||
37 | d. unsigned long long failcnt | ||
38 | |||
39 | The failcnt stands for "failures counter". This is the number of | ||
40 | resource allocation attempts that failed. | ||
41 | |||
42 | c. spinlock_t lock | ||
43 | |||
44 | Protects changes of the above values. | ||
45 | |||
46 | |||
47 | |||
48 | 2. Basic accounting routines | ||
49 | |||
50 | a. void res_counter_init(struct res_counter *rc) | ||
51 | |||
52 | Initializes the resource counter. As usual, should be the first | ||
53 | routine called for a new counter. | ||
54 | |||
55 | b. int res_counter_charge[_locked] | ||
56 | (struct res_counter *rc, unsigned long val) | ||
57 | |||
58 | When a resource is about to be allocated it has to be accounted | ||
59 | with the appropriate resource counter (controller should determine | ||
60 | which one to use on its own). This operation is called "charging". | ||
61 | |||
62 | This is not very important which operation - resource allocation | ||
63 | or charging - is performed first, but | ||
64 | * if the allocation is performed first, this may create a | ||
65 | temporary resource over-usage by the time resource counter is | ||
66 | charged; | ||
67 | * if the charging is performed first, then it should be uncharged | ||
68 | on error path (if the one is called). | ||
69 | |||
70 | c. void res_counter_uncharge[_locked] | ||
71 | (struct res_counter *rc, unsigned long val) | ||
72 | |||
73 | When a resource is released (freed) it should be de-accounted | ||
74 | from the resource counter it was accounted to. This is called | ||
75 | "uncharging". | ||
76 | |||
77 | The _locked routines imply that the res_counter->lock is taken. | ||
78 | |||
79 | |||
80 | 2.1 Other accounting routines | ||
81 | |||
82 | There are more routines that may help you with common needs, like | ||
83 | checking whether the limit is reached or resetting the max_usage | ||
84 | value. They are all declared in include/linux/res_counter.h. | ||
85 | |||
86 | |||
87 | |||
88 | 3. Analyzing the resource counter registrations | ||
89 | |||
90 | a. If the failcnt value constantly grows, this means that the counter's | ||
91 | limit is too tight. Either the group is misbehaving and consumes too | ||
92 | many resources, or the configuration is not suitable for the group | ||
93 | and the limit should be increased. | ||
94 | |||
95 | b. The max_usage value can be used to quickly tune the group. One may | ||
96 | set the limits to maximal values and either load the container with | ||
97 | a common pattern or leave one for a while. After this the max_usage | ||
98 | value shows the amount of memory the container would require during | ||
99 | its common activity. | ||
100 | |||
101 | Setting the limit a bit above this value gives a pretty good | ||
102 | configuration that works in most of the cases. | ||
103 | |||
104 | c. If the max_usage is much less than the limit, but the failcnt value | ||
105 | is growing, then the group tries to allocate a big chunk of resource | ||
106 | at once. | ||
107 | |||
108 | d. If the max_usage is much less than the limit, but the failcnt value | ||
109 | is 0, then this group is given too high limit, that it does not | ||
110 | require. It is better to lower the limit a bit leaving more resource | ||
111 | for other groups. | ||
112 | |||
113 | |||
114 | |||
115 | 4. Communication with the control groups subsystem (cgroups) | ||
116 | |||
117 | All the resource controllers that are using cgroups and resource counters | ||
118 | should provide files (in the cgroup filesystem) to work with the resource | ||
119 | counter fields. They are recommended to adhere to the following rules: | ||
120 | |||
121 | a. File names | ||
122 | |||
123 | Field name File name | ||
124 | --------------------------------------------------- | ||
125 | usage usage_in_<unit_of_measurement> | ||
126 | max_usage max_usage_in_<unit_of_measurement> | ||
127 | limit limit_in_<unit_of_measurement> | ||
128 | failcnt failcnt | ||
129 | lock no file :) | ||
130 | |||
131 | b. Reading from file should show the corresponding field value in the | ||
132 | appropriate format. | ||
133 | |||
134 | c. Writing to file | ||
135 | |||
136 | Field Expected behavior | ||
137 | ---------------------------------- | ||
138 | usage prohibited | ||
139 | max_usage reset to usage | ||
140 | limit set the limit | ||
141 | failcnt reset to zero | ||
142 | |||
143 | |||
144 | |||
145 | 5. Usage example | ||
146 | |||
147 | a. Declare a task group (take a look at cgroups subsystem for this) and | ||
148 | fold a res_counter into it | ||
149 | |||
150 | struct my_group { | ||
151 | struct res_counter res; | ||
152 | |||
153 | <other fields> | ||
154 | } | ||
155 | |||
156 | b. Put hooks in resource allocation/release paths | ||
157 | |||
158 | int alloc_something(...) | ||
159 | { | ||
160 | if (res_counter_charge(res_counter_ptr, amount) < 0) | ||
161 | return -ENOMEM; | ||
162 | |||
163 | <allocate the resource and return to the caller> | ||
164 | } | ||
165 | |||
166 | void release_something(...) | ||
167 | { | ||
168 | res_counter_uncharge(res_counter_ptr, amount); | ||
169 | |||
170 | <release the resource> | ||
171 | } | ||
172 | |||
173 | In order to keep the usage value self-consistent, both the | ||
174 | "res_counter_ptr" and the "amount" in release_something() should be | ||
175 | the same as they were in the alloc_something() when the releasing | ||
176 | resource was allocated. | ||
177 | |||
178 | c. Provide the way to read res_counter values and set them (the cgroups | ||
179 | still can help with it). | ||
180 | |||
181 | c. Compile and run :) | ||
diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index af3b925ece08..6c442d8426b5 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt | |||
@@ -154,6 +154,11 @@ scaling_governor, and by "echoing" the name of another | |||
154 | that some governors won't load - they only | 154 | that some governors won't load - they only |
155 | work on some specific architectures or | 155 | work on some specific architectures or |
156 | processors. | 156 | processors. |
157 | |||
158 | cpuinfo_cur_freq : Current speed of the CPU, in KHz. | ||
159 | |||
160 | scaling_available_frequencies : List of available frequencies, in KHz. | ||
161 | |||
157 | scaling_min_freq and | 162 | scaling_min_freq and |
158 | scaling_max_freq show the current "policy limits" (in | 163 | scaling_max_freq show the current "policy limits" (in |
159 | kHz). By echoing new values into these | 164 | kHz). By echoing new values into these |
@@ -162,6 +167,15 @@ scaling_max_freq show the current "policy limits" (in | |||
162 | first set scaling_max_freq, then | 167 | first set scaling_max_freq, then |
163 | scaling_min_freq. | 168 | scaling_min_freq. |
164 | 169 | ||
170 | affected_cpus : List of CPUs that require software coordination | ||
171 | of frequency. | ||
172 | |||
173 | related_cpus : List of CPUs that need some sort of frequency | ||
174 | coordination, whether software or hardware. | ||
175 | |||
176 | scaling_driver : Hardware driver for cpufreq. | ||
177 | |||
178 | scaling_cur_freq : Current frequency of the CPU, in KHz. | ||
165 | 179 | ||
166 | If you have selected the "userspace" governor which allows you to | 180 | If you have selected the "userspace" governor which allows you to |
167 | set the CPU operating frequency to a specific value, you can read out | 181 | set the CPU operating frequency to a specific value, you can read out |
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index aa854b9b18cd..fb7b361e6eea 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt | |||
@@ -171,6 +171,7 @@ files describing that cpuset: | |||
171 | - memory_migrate flag: if set, move pages to cpusets nodes | 171 | - memory_migrate flag: if set, move pages to cpusets nodes |
172 | - cpu_exclusive flag: is cpu placement exclusive? | 172 | - cpu_exclusive flag: is cpu placement exclusive? |
173 | - mem_exclusive flag: is memory placement exclusive? | 173 | - mem_exclusive flag: is memory placement exclusive? |
174 | - mem_hardwall flag: is memory allocation hardwalled | ||
174 | - memory_pressure: measure of how much paging pressure in cpuset | 175 | - memory_pressure: measure of how much paging pressure in cpuset |
175 | 176 | ||
176 | In addition, the root cpuset only has the following file: | 177 | In addition, the root cpuset only has the following file: |
@@ -222,17 +223,18 @@ If a cpuset is cpu or mem exclusive, no other cpuset, other than | |||
222 | a direct ancestor or descendent, may share any of the same CPUs or | 223 | a direct ancestor or descendent, may share any of the same CPUs or |
223 | Memory Nodes. | 224 | Memory Nodes. |
224 | 225 | ||
225 | A cpuset that is mem_exclusive restricts kernel allocations for | 226 | A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled", |
226 | page, buffer and other data commonly shared by the kernel across | 227 | i.e. it restricts kernel allocations for page, buffer and other data |
227 | multiple users. All cpusets, whether mem_exclusive or not, restrict | 228 | commonly shared by the kernel across multiple users. All cpusets, |
228 | allocations of memory for user space. This enables configuring a | 229 | whether hardwalled or not, restrict allocations of memory for user |
229 | system so that several independent jobs can share common kernel data, | 230 | space. This enables configuring a system so that several independent |
230 | such as file system pages, while isolating each jobs user allocation in | 231 | jobs can share common kernel data, such as file system pages, while |
231 | its own cpuset. To do this, construct a large mem_exclusive cpuset to | 232 | isolating each job's user allocation in its own cpuset. To do this, |
232 | hold all the jobs, and construct child, non-mem_exclusive cpusets for | 233 | construct a large mem_exclusive cpuset to hold all the jobs, and |
233 | each individual job. Only a small amount of typical kernel memory, | 234 | construct child, non-mem_exclusive cpusets for each individual job. |
234 | such as requests from interrupt handlers, is allowed to be taken | 235 | Only a small amount of typical kernel memory, such as requests from |
235 | outside even a mem_exclusive cpuset. | 236 | interrupt handlers, is allowed to be taken outside even a |
237 | mem_exclusive cpuset. | ||
236 | 238 | ||
237 | 239 | ||
238 | 1.5 What is memory_pressure ? | 240 | 1.5 What is memory_pressure ? |
@@ -707,7 +709,7 @@ Now you want to do something with this cpuset. | |||
707 | 709 | ||
708 | In this directory you can find several files: | 710 | In this directory you can find several files: |
709 | # ls | 711 | # ls |
710 | cpus cpu_exclusive mems mem_exclusive tasks | 712 | cpus cpu_exclusive mems mem_exclusive mem_hardwall tasks |
711 | 713 | ||
712 | Reading them will give you information about the state of this cpuset: | 714 | Reading them will give you information about the state of this cpuset: |
713 | the CPUs and Memory Nodes it can use, the processes that are using | 715 | the CPUs and Memory Nodes it can use, the processes that are using |
diff --git a/Documentation/dontdiff b/Documentation/dontdiff index 354aec047c0e..881e6dd03aea 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff | |||
@@ -141,6 +141,7 @@ mkprep | |||
141 | mktables | 141 | mktables |
142 | mktree | 142 | mktree |
143 | modpost | 143 | modpost |
144 | modules.order | ||
144 | modversions.h* | 145 | modversions.h* |
145 | offset.h | 146 | offset.h |
146 | offsets.h | 147 | offsets.h |
@@ -171,6 +172,7 @@ sm_tbl* | |||
171 | split-include | 172 | split-include |
172 | tags | 173 | tags |
173 | tftpboot.img | 174 | tftpboot.img |
175 | timeconst.h | ||
174 | times.h* | 176 | times.h* |
175 | tkparse | 177 | tkparse |
176 | trix_boot.h | 178 | trix_boot.h |
diff --git a/Documentation/fb/gxfb.txt b/Documentation/fb/gxfb.txt new file mode 100644 index 000000000000..2f640903bbb2 --- /dev/null +++ b/Documentation/fb/gxfb.txt | |||
@@ -0,0 +1,52 @@ | |||
1 | [This file is cloned from VesaFB/aty128fb] | ||
2 | |||
3 | What is gxfb? | ||
4 | ================= | ||
5 | |||
6 | This is a graphics framebuffer driver for AMD Geode GX2 based processors. | ||
7 | |||
8 | Advantages: | ||
9 | |||
10 | * No need to use AMD's VSA code (or other VESA emulation layer) in the | ||
11 | BIOS. | ||
12 | * It provides a nice large console (128 cols + 48 lines with 1024x768) | ||
13 | without using tiny, unreadable fonts. | ||
14 | * You can run XF68_FBDev on top of /dev/fb0 | ||
15 | * Most important: boot logo :-) | ||
16 | |||
17 | Disadvantages: | ||
18 | |||
19 | * graphic mode is slower than text mode... | ||
20 | |||
21 | |||
22 | How to use it? | ||
23 | ============== | ||
24 | |||
25 | Switching modes is done using gxfb.mode_option=<resolution>... boot | ||
26 | parameter or using `fbset' program. | ||
27 | |||
28 | See Documentation/fb/modedb.txt for more information on modedb | ||
29 | resolutions. | ||
30 | |||
31 | |||
32 | X11 | ||
33 | === | ||
34 | |||
35 | XF68_FBDev should generally work fine, but it is non-accelerated. | ||
36 | |||
37 | |||
38 | Configuration | ||
39 | ============= | ||
40 | |||
41 | You can pass kernel command line options to gxfb with gxfb.<option>. | ||
42 | For example, gxfb.mode_option=800x600@75. | ||
43 | Accepted options: | ||
44 | |||
45 | mode_option - specify the video mode. Of the form | ||
46 | <x>x<y>[-<bpp>][@<refresh>] | ||
47 | vram - size of video ram (normally auto-detected) | ||
48 | vt_switch - enable vt switching during suspend/resume. The vt | ||
49 | switch is slow, but harmless. | ||
50 | |||
51 | -- | ||
52 | Andres Salomon <dilinger@debian.org> | ||
diff --git a/Documentation/fb/intelfb.txt b/Documentation/fb/intelfb.txt index da5ee74219e8..27a3160650a4 100644 --- a/Documentation/fb/intelfb.txt +++ b/Documentation/fb/intelfb.txt | |||
@@ -14,6 +14,8 @@ graphics devices. These would include: | |||
14 | Intel 915GM | 14 | Intel 915GM |
15 | Intel 945G | 15 | Intel 945G |
16 | Intel 945GM | 16 | Intel 945GM |
17 | Intel 965G | ||
18 | Intel 965GM | ||
17 | 19 | ||
18 | B. List of available options | 20 | B. List of available options |
19 | 21 | ||
diff --git a/Documentation/fb/lxfb.txt b/Documentation/fb/lxfb.txt new file mode 100644 index 000000000000..38b3ca6f6ca7 --- /dev/null +++ b/Documentation/fb/lxfb.txt | |||
@@ -0,0 +1,52 @@ | |||
1 | [This file is cloned from VesaFB/aty128fb] | ||
2 | |||
3 | What is lxfb? | ||
4 | ================= | ||
5 | |||
6 | This is a graphics framebuffer driver for AMD Geode LX based processors. | ||
7 | |||
8 | Advantages: | ||
9 | |||
10 | * No need to use AMD's VSA code (or other VESA emulation layer) in the | ||
11 | BIOS. | ||
12 | * It provides a nice large console (128 cols + 48 lines with 1024x768) | ||
13 | without using tiny, unreadable fonts. | ||
14 | * You can run XF68_FBDev on top of /dev/fb0 | ||
15 | * Most important: boot logo :-) | ||
16 | |||
17 | Disadvantages: | ||
18 | |||
19 | * graphic mode is slower than text mode... | ||
20 | |||
21 | |||
22 | How to use it? | ||
23 | ============== | ||
24 | |||
25 | Switching modes is done using lxfb.mode_option=<resolution>... boot | ||
26 | parameter or using `fbset' program. | ||
27 | |||
28 | See Documentation/fb/modedb.txt for more information on modedb | ||
29 | resolutions. | ||
30 | |||
31 | |||
32 | X11 | ||
33 | === | ||
34 | |||
35 | XF68_FBDev should generally work fine, but it is non-accelerated. | ||
36 | |||
37 | |||
38 | Configuration | ||
39 | ============= | ||
40 | |||
41 | You can pass kernel command line options to lxfb with lxfb.<option>. | ||
42 | For example, lxfb.mode_option=800x600@75. | ||
43 | Accepted options: | ||
44 | |||
45 | mode_option - specify the video mode. Of the form | ||
46 | <x>x<y>[-<bpp>][@<refresh>] | ||
47 | vram - size of video ram (normally auto-detected) | ||
48 | vt_switch - enable vt switching during suspend/resume. The vt | ||
49 | switch is slow, but harmless. | ||
50 | |||
51 | -- | ||
52 | Andres Salomon <dilinger@debian.org> | ||
diff --git a/Documentation/fb/metronomefb.txt b/Documentation/fb/metronomefb.txt index b9a2e7b7e838..237ca412582d 100644 --- a/Documentation/fb/metronomefb.txt +++ b/Documentation/fb/metronomefb.txt | |||
@@ -1,7 +1,7 @@ | |||
1 | Metronomefb | 1 | Metronomefb |
2 | ----------- | 2 | ----------- |
3 | Maintained by Jaya Kumar <jayakumar.lkml.gmail.com> | 3 | Maintained by Jaya Kumar <jayakumar.lkml.gmail.com> |
4 | Last revised: Nov 20, 2007 | 4 | Last revised: Mar 10, 2008 |
5 | 5 | ||
6 | Metronomefb is a driver for the Metronome display controller. The controller | 6 | Metronomefb is a driver for the Metronome display controller. The controller |
7 | is from E-Ink Corporation. It is intended to be used to drive the E-Ink | 7 | is from E-Ink Corporation. It is intended to be used to drive the E-Ink |
@@ -11,20 +11,18 @@ display media here http://www.e-ink.com/products/matrix/metronome.html . | |||
11 | Metronome is interfaced to the host CPU through the AMLCD interface. The | 11 | Metronome is interfaced to the host CPU through the AMLCD interface. The |
12 | host CPU generates the control information and the image in a framebuffer | 12 | host CPU generates the control information and the image in a framebuffer |
13 | which is then delivered to the AMLCD interface by a host specific method. | 13 | which is then delivered to the AMLCD interface by a host specific method. |
14 | Currently, that's implemented for the PXA's LCDC controller. The display and | 14 | The display and error status are each pulled through individual GPIOs. |
15 | error status are each pulled through individual GPIOs. | ||
16 | 15 | ||
17 | Metronomefb was written for the PXA255/gumstix/lyre combination and | 16 | Metronomefb is platform independent and depends on a board specific driver |
18 | therefore currently has board set specific code in it. If other boards based on | 17 | to do all physical IO work. Currently, an example is implemented for the |
19 | other architectures are available, then the host specific code can be separated | 18 | PXA board used in the AM-200 EPD devkit. This example is am200epd.c |
20 | and abstracted out. | ||
21 | 19 | ||
22 | Metronomefb requires waveform information which is delivered via the AMLCD | 20 | Metronomefb requires waveform information which is delivered via the AMLCD |
23 | interface to the metronome controller. The waveform information is expected to | 21 | interface to the metronome controller. The waveform information is expected to |
24 | be delivered from userspace via the firmware class interface. The waveform file | 22 | be delivered from userspace via the firmware class interface. The waveform file |
25 | can be compressed as long as your udev or hotplug script is aware of the need | 23 | can be compressed as long as your udev or hotplug script is aware of the need |
26 | to uncompress it before delivering it. metronomefb will ask for waveform.wbf | 24 | to uncompress it before delivering it. metronomefb will ask for metronome.wbf |
27 | which would typically go into /lib/firmware/waveform.wbf depending on your | 25 | which would typically go into /lib/firmware/metronome.wbf depending on your |
28 | udev/hotplug setup. I have only tested with a single waveform file which was | 26 | udev/hotplug setup. I have only tested with a single waveform file which was |
29 | originally labeled 23P01201_60_WT0107_MTC. I do not know what it stands for. | 27 | originally labeled 23P01201_60_WT0107_MTC. I do not know what it stands for. |
30 | Caution should be exercised when manipulating the waveform as there may be | 28 | Caution should be exercised when manipulating the waveform as there may be |
diff --git a/Documentation/fb/modedb.txt b/Documentation/fb/modedb.txt index 4fcdb4cf4cca..ec4dee75a354 100644 --- a/Documentation/fb/modedb.txt +++ b/Documentation/fb/modedb.txt | |||
@@ -125,8 +125,12 @@ There may be more modes. | |||
125 | amifb - Amiga chipset frame buffer | 125 | amifb - Amiga chipset frame buffer |
126 | aty128fb - ATI Rage128 / Pro frame buffer | 126 | aty128fb - ATI Rage128 / Pro frame buffer |
127 | atyfb - ATI Mach64 frame buffer | 127 | atyfb - ATI Mach64 frame buffer |
128 | pm2fb - Permedia 2/2V frame buffer | ||
129 | pm3fb - Permedia 3 frame buffer | ||
130 | sstfb - Voodoo 1/2 (SST1) chipset frame buffer | ||
128 | tdfxfb - 3D Fx frame buffer | 131 | tdfxfb - 3D Fx frame buffer |
129 | tridentfb - Trident (Cyber)blade chipset frame buffer | 132 | tridentfb - Trident (Cyber)blade chipset frame buffer |
133 | vt8623fb - VIA 8623 frame buffer | ||
130 | 134 | ||
131 | BTW, only a few drivers use this at the moment. Others are to follow | 135 | BTW, only a few drivers use this at the moment. Others are to follow |
132 | (feel free to send patches). | 136 | (feel free to send patches). |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 448729fcaeb1..3c35d452b1a9 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -128,15 +128,6 @@ Who: Arjan van de Ven <arjan@linux.intel.com> | |||
128 | 128 | ||
129 | --------------------------- | 129 | --------------------------- |
130 | 130 | ||
131 | What: vm_ops.nopage | ||
132 | When: Soon, provided in-kernel callers have been converted | ||
133 | Why: This interface is replaced by vm_ops.fault, but it has been around | ||
134 | forever, is used by a lot of drivers, and doesn't cost much to | ||
135 | maintain. | ||
136 | Who: Nick Piggin <npiggin@suse.de> | ||
137 | |||
138 | --------------------------- | ||
139 | |||
140 | What: PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment | 131 | What: PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment |
141 | When: October 2008 | 132 | When: October 2008 |
142 | Why: The stacking of class devices makes these values misleading and | 133 | Why: The stacking of class devices makes these values misleading and |
@@ -147,6 +138,24 @@ Who: Kay Sievers <kay.sievers@suse.de> | |||
147 | 138 | ||
148 | --------------------------- | 139 | --------------------------- |
149 | 140 | ||
141 | What: find_task_by_pid | ||
142 | When: 2.6.26 | ||
143 | Why: With pid namespaces, calling this funciton will return the | ||
144 | wrong task when called from inside a namespace. | ||
145 | |||
146 | The best way to save a task pid and find a task by this | ||
147 | pid later, is to find this task's struct pid pointer (or get | ||
148 | it directly from the task) and call pid_task() later. | ||
149 | |||
150 | If someone really needs to get a task by its pid_t, then | ||
151 | he most likely needs the find_task_by_vpid() to get the | ||
152 | task from the same namespace as the current task is in, but | ||
153 | this may be not so in general. | ||
154 | |||
155 | Who: Pavel Emelyanov <xemul@openvz.org> | ||
156 | |||
157 | --------------------------- | ||
158 | |||
150 | What: ACPI procfs interface | 159 | What: ACPI procfs interface |
151 | When: July 2008 | 160 | When: July 2008 |
152 | Why: ACPI sysfs conversion should be finished by January 2008. | 161 | Why: ACPI sysfs conversion should be finished by January 2008. |
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 42d4b30b1045..c2992bc54f2f 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -511,7 +511,6 @@ prototypes: | |||
511 | void (*open)(struct vm_area_struct*); | 511 | void (*open)(struct vm_area_struct*); |
512 | void (*close)(struct vm_area_struct*); | 512 | void (*close)(struct vm_area_struct*); |
513 | int (*fault)(struct vm_area_struct*, struct vm_fault *); | 513 | int (*fault)(struct vm_area_struct*, struct vm_fault *); |
514 | struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *); | ||
515 | int (*page_mkwrite)(struct vm_area_struct *, struct page *); | 514 | int (*page_mkwrite)(struct vm_area_struct *, struct page *); |
516 | 515 | ||
517 | locking rules: | 516 | locking rules: |
@@ -519,7 +518,6 @@ locking rules: | |||
519 | open: no yes | 518 | open: no yes |
520 | close: no yes | 519 | close: no yes |
521 | fault: no yes | 520 | fault: no yes |
522 | nopage: no yes | ||
523 | page_mkwrite: no yes no | 521 | page_mkwrite: no yes no |
524 | 522 | ||
525 | ->page_mkwrite() is called when a previously read-only page is | 523 | ->page_mkwrite() is called when a previously read-only page is |
@@ -537,4 +535,3 @@ NULL. | |||
537 | 535 | ||
538 | ipc/shm.c::shm_delete() - may need BKL. | 536 | ipc/shm.c::shm_delete() - may need BKL. |
539 | ->read() and ->write() in many drivers are (probably) missing BKL. | 537 | ->read() and ->write() in many drivers are (probably) missing BKL. |
540 | drivers/sgi/char/graphics.c::sgi_graphics_nopage() - may need BKL. | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 2a99116edc47..dbc3c6a3650f 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -463,11 +463,17 @@ SwapTotal: 0 kB | |||
463 | SwapFree: 0 kB | 463 | SwapFree: 0 kB |
464 | Dirty: 968 kB | 464 | Dirty: 968 kB |
465 | Writeback: 0 kB | 465 | Writeback: 0 kB |
466 | AnonPages: 861800 kB | ||
466 | Mapped: 280372 kB | 467 | Mapped: 280372 kB |
467 | Slab: 684068 kB | 468 | Slab: 284364 kB |
469 | SReclaimable: 159856 kB | ||
470 | SUnreclaim: 124508 kB | ||
471 | PageTables: 24448 kB | ||
472 | NFS_Unstable: 0 kB | ||
473 | Bounce: 0 kB | ||
474 | WritebackTmp: 0 kB | ||
468 | CommitLimit: 7669796 kB | 475 | CommitLimit: 7669796 kB |
469 | Committed_AS: 100056 kB | 476 | Committed_AS: 100056 kB |
470 | PageTables: 24448 kB | ||
471 | VmallocTotal: 112216 kB | 477 | VmallocTotal: 112216 kB |
472 | VmallocUsed: 428 kB | 478 | VmallocUsed: 428 kB |
473 | VmallocChunk: 111088 kB | 479 | VmallocChunk: 111088 kB |
@@ -503,8 +509,17 @@ VmallocChunk: 111088 kB | |||
503 | on the disk | 509 | on the disk |
504 | Dirty: Memory which is waiting to get written back to the disk | 510 | Dirty: Memory which is waiting to get written back to the disk |
505 | Writeback: Memory which is actively being written back to the disk | 511 | Writeback: Memory which is actively being written back to the disk |
512 | AnonPages: Non-file backed pages mapped into userspace page tables | ||
506 | Mapped: files which have been mmaped, such as libraries | 513 | Mapped: files which have been mmaped, such as libraries |
507 | Slab: in-kernel data structures cache | 514 | Slab: in-kernel data structures cache |
515 | SReclaimable: Part of Slab, that might be reclaimed, such as caches | ||
516 | SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure | ||
517 | PageTables: amount of memory dedicated to the lowest level of page | ||
518 | tables. | ||
519 | NFS_Unstable: NFS pages sent to the server, but not yet committed to stable | ||
520 | storage | ||
521 | Bounce: Memory used for block device "bounce buffers" | ||
522 | WritebackTmp: Memory used by FUSE for temporary writeback buffers | ||
508 | CommitLimit: Based on the overcommit ratio ('vm.overcommit_ratio'), | 523 | CommitLimit: Based on the overcommit ratio ('vm.overcommit_ratio'), |
509 | this is the total amount of memory currently available to | 524 | this is the total amount of memory currently available to |
510 | be allocated on the system. This limit is only adhered to | 525 | be allocated on the system. This limit is only adhered to |
@@ -531,8 +546,6 @@ Committed_AS: The amount of memory presently allocated on the system. | |||
531 | above) will not be permitted. This is useful if one needs | 546 | above) will not be permitted. This is useful if one needs |
532 | to guarantee that processes will not fail due to lack of | 547 | to guarantee that processes will not fail due to lack of |
533 | memory once that memory has been successfully allocated. | 548 | memory once that memory has been successfully allocated. |
534 | PageTables: amount of memory dedicated to the lowest level of page | ||
535 | tables. | ||
536 | VmallocTotal: total size of vmalloc memory area | 549 | VmallocTotal: total size of vmalloc memory area |
537 | VmallocUsed: amount of vmalloc area which is used | 550 | VmallocUsed: amount of vmalloc area which is used |
538 | VmallocChunk: largest contigious block of vmalloc area which is free | 551 | VmallocChunk: largest contigious block of vmalloc area which is free |
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt index 145e44086358..222437efd75a 100644 --- a/Documentation/filesystems/tmpfs.txt +++ b/Documentation/filesystems/tmpfs.txt | |||
@@ -92,6 +92,18 @@ NodeList format is a comma-separated list of decimal numbers and ranges, | |||
92 | a range being two hyphen-separated decimal numbers, the smallest and | 92 | a range being two hyphen-separated decimal numbers, the smallest and |
93 | largest node numbers in the range. For example, mpol=bind:0-3,5,7,9-15 | 93 | largest node numbers in the range. For example, mpol=bind:0-3,5,7,9-15 |
94 | 94 | ||
95 | NUMA memory allocation policies have optional flags that can be used in | ||
96 | conjunction with their modes. These optional flags can be specified | ||
97 | when tmpfs is mounted by appending them to the mode before the NodeList. | ||
98 | See Documentation/vm/numa_memory_policy.txt for a list of all available | ||
99 | memory allocation policy mode flags. | ||
100 | |||
101 | =static is equivalent to MPOL_F_STATIC_NODES | ||
102 | =relative is equivalent to MPOL_F_RELATIVE_NODES | ||
103 | |||
104 | For example, mpol=bind=static:NodeList, is the equivalent of an | ||
105 | allocation policy of MPOL_BIND | MPOL_F_STATIC_NODES. | ||
106 | |||
95 | Note that trying to mount a tmpfs with an mpol option will fail if the | 107 | Note that trying to mount a tmpfs with an mpol option will fail if the |
96 | running kernel does not support NUMA; and will fail if its nodelist | 108 | running kernel does not support NUMA; and will fail if its nodelist |
97 | specifies a node which is not online. If your system relies on that | 109 | specifies a node which is not online. If your system relies on that |
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index fcc123ffa252..2d5e1e582e13 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt | |||
@@ -17,6 +17,21 @@ dmask=### -- The permission mask for the directory. | |||
17 | fmask=### -- The permission mask for files. | 17 | fmask=### -- The permission mask for files. |
18 | The default is the umask of current process. | 18 | The default is the umask of current process. |
19 | 19 | ||
20 | allow_utime=### -- This option controls the permission check of mtime/atime. | ||
21 | |||
22 | 20 - If current process is in group of file's group ID, | ||
23 | you can change timestamp. | ||
24 | 2 - Other users can change timestamp. | ||
25 | |||
26 | The default is set from `dmask' option. (If the directory is | ||
27 | writable, utime(2) is also allowed. I.e. ~dmask & 022) | ||
28 | |||
29 | Normally utime(2) checks current process is owner of | ||
30 | the file, or it has CAP_FOWNER capability. But FAT | ||
31 | filesystem doesn't have uid/gid on disk, so normal | ||
32 | check is too unflexible. With this option you can | ||
33 | relax it. | ||
34 | |||
20 | codepage=### -- Sets the codepage number for converting to shortname | 35 | codepage=### -- Sets the codepage number for converting to shortname |
21 | characters on FAT filesystem. | 36 | characters on FAT filesystem. |
22 | By default, FAT_DEFAULT_CODEPAGE setting is used. | 37 | By default, FAT_DEFAULT_CODEPAGE setting is used. |
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index 54630095aa3c..c35ca9e40d4c 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt | |||
@@ -107,6 +107,16 @@ type of GPIO controller, and on one particular board 80-95 with an FPGA. | |||
107 | The numbers need not be contiguous; either of those platforms could also | 107 | The numbers need not be contiguous; either of those platforms could also |
108 | use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders. | 108 | use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders. |
109 | 109 | ||
110 | If you want to initialize a structure with an invalid GPIO number, use | ||
111 | some negative number (perhaps "-EINVAL"); that will never be valid. To | ||
112 | test if a number could reference a GPIO, you may use this predicate: | ||
113 | |||
114 | int gpio_is_valid(int number); | ||
115 | |||
116 | A number that's not valid will be rejected by calls which may request | ||
117 | or free GPIOs (see below). Other numbers may also be rejected; for | ||
118 | example, a number might be valid but unused on a given board. | ||
119 | |||
110 | Whether a platform supports multiple GPIO controllers is currently a | 120 | Whether a platform supports multiple GPIO controllers is currently a |
111 | platform-specific implementation issue. | 121 | platform-specific implementation issue. |
112 | 122 | ||
diff --git a/Documentation/hwmon/w83l785ts b/Documentation/hwmon/w83l785ts index 1841cedc25b2..bd1fa9d4468d 100644 --- a/Documentation/hwmon/w83l785ts +++ b/Documentation/hwmon/w83l785ts | |||
@@ -33,7 +33,8 @@ Known Issues | |||
33 | ------------ | 33 | ------------ |
34 | 34 | ||
35 | On some systems (Asus), the BIOS is known to interfere with the driver | 35 | On some systems (Asus), the BIOS is known to interfere with the driver |
36 | and cause read errors. The driver will retry a given number of times | 36 | and cause read errors. Or maybe the W83L785TS-S chip is simply unreliable, |
37 | we don't really know. The driver will retry a given number of times | ||
37 | (5 by default) and then give up, returning the old value (or 0 if | 38 | (5 by default) and then give up, returning the old value (or 0 if |
38 | there is no old value). It seems to work well enough so that you should | 39 | there is no old value). It seems to work well enough so that you should |
39 | not notice anything. Thanks to James Bolt for helping test this feature. | 40 | not notice anything. Thanks to James Bolt for helping test this feature. |
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index bfb0a5520817..ee75cbace28d 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients | |||
@@ -164,7 +164,8 @@ I2C device drivers using this binding model work just like any other | |||
164 | kind of driver in Linux: they provide a probe() method to bind to | 164 | kind of driver in Linux: they provide a probe() method to bind to |
165 | those devices, and a remove() method to unbind. | 165 | those devices, and a remove() method to unbind. |
166 | 166 | ||
167 | static int foo_probe(struct i2c_client *client); | 167 | static int foo_probe(struct i2c_client *client, |
168 | const struct i2c_device_id *id); | ||
168 | static int foo_remove(struct i2c_client *client); | 169 | static int foo_remove(struct i2c_client *client); |
169 | 170 | ||
170 | Remember that the i2c_driver does not create those client handles. The | 171 | Remember that the i2c_driver does not create those client handles. The |
diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt index 0fac3465f2e3..95ad15c3b01f 100644 --- a/Documentation/i386/boot.txt +++ b/Documentation/i386/boot.txt | |||
@@ -40,9 +40,17 @@ Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable. | |||
40 | Introduce relocatable_kernel and kernel_alignment fields. | 40 | Introduce relocatable_kernel and kernel_alignment fields. |
41 | 41 | ||
42 | Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of | 42 | Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of |
43 | the boot command line | 43 | the boot command line. |
44 | 44 | ||
45 | Protocol 2.09: (kernel 2.6.26) Added a field of 64-bit physical | 45 | Protocol 2.07: (Kernel 2.6.24) Added paravirtualised boot protocol. |
46 | Introduced hardware_subarch and hardware_subarch_data | ||
47 | and KEEP_SEGMENTS flag in load_flags. | ||
48 | |||
49 | Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format | ||
50 | payload. Introduced payload_offset and payload length | ||
51 | fields to aid in locating the payload. | ||
52 | |||
53 | Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical | ||
46 | pointer to single linked list of struct setup_data. | 54 | pointer to single linked list of struct setup_data. |
47 | 55 | ||
48 | **** MEMORY LAYOUT | 56 | **** MEMORY LAYOUT |
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt index 649cb8799890..00b950d1c193 100644 --- a/Documentation/kbuild/kconfig-language.txt +++ b/Documentation/kbuild/kconfig-language.txt | |||
@@ -104,14 +104,15 @@ applicable everywhere (see syntax). | |||
104 | Reverse dependencies can only be used with boolean or tristate | 104 | Reverse dependencies can only be used with boolean or tristate |
105 | symbols. | 105 | symbols. |
106 | Note: | 106 | Note: |
107 | select is evil.... select will by brute force set a symbol | 107 | select should be used with care. select will force |
108 | equal to 'y' without visiting the dependencies. So abusing | 108 | a symbol to a value without visiting the dependencies. |
109 | select you are able to select a symbol FOO even if FOO depends | 109 | By abusing select you are able to select a symbol FOO even |
110 | on BAR that is not set. In general use select only for | 110 | if FOO depends on BAR that is not set. |
111 | non-visible symbols (no prompts anywhere) and for symbols with | 111 | In general use select only for non-visible symbols |
112 | no dependencies. That will limit the usefulness but on the | 112 | (no prompts anywhere) and for symbols with no dependencies. |
113 | other hand avoid the illegal configurations all over. kconfig | 113 | That will limit the usefulness but on the other hand avoid |
114 | should one day warn about such things. | 114 | the illegal configurations all over. |
115 | kconfig should one day warn about such things. | ||
115 | 116 | ||
116 | - numerical ranges: "range" <symbol> <symbol> ["if" <expr>] | 117 | - numerical ranges: "range" <symbol> <symbol> ["if" <expr>] |
117 | This allows to limit the range of possible input values for int | 118 | This allows to limit the range of possible input values for int |
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index d0ac72cc19ff..b8e52c0355d3 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt | |||
@@ -245,6 +245,8 @@ The syntax is: | |||
245 | crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset] | 245 | crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset] |
246 | range=start-[end] | 246 | range=start-[end] |
247 | 247 | ||
248 | 'start' is inclusive and 'end' is exclusive. | ||
249 | |||
248 | For example: | 250 | For example: |
249 | 251 | ||
250 | crashkernel=512M-2G:64M,2G-:128M | 252 | crashkernel=512M-2G:64M,2G-:128M |
@@ -253,10 +255,11 @@ This would mean: | |||
253 | 255 | ||
254 | 1) if the RAM is smaller than 512M, then don't reserve anything | 256 | 1) if the RAM is smaller than 512M, then don't reserve anything |
255 | (this is the "rescue" case) | 257 | (this is the "rescue" case) |
256 | 2) if the RAM size is between 512M and 2G, then reserve 64M | 258 | 2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M |
257 | 3) if the RAM size is larger than 2G, then reserve 128M | 259 | 3) if the RAM size is larger than 2G, then reserve 128M |
258 | 260 | ||
259 | 261 | ||
262 | |||
260 | Boot into System Kernel | 263 | Boot into System Kernel |
261 | ======================= | 264 | ======================= |
262 | 265 | ||
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e5f3d918316f..a3c35446e755 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -496,6 +496,11 @@ and is between 256 and 4096 characters. It is defined in the file | |||
496 | switching to the matching ttyS device later. The | 496 | switching to the matching ttyS device later. The |
497 | options are the same as for ttyS, above. | 497 | options are the same as for ttyS, above. |
498 | 498 | ||
499 | If the device connected to the port is not a TTY but a braille | ||
500 | device, prepend "brl," before the device type, for instance | ||
501 | console=brl,ttyS0 | ||
502 | For now, only VisioBraille is supported. | ||
503 | |||
499 | earlycon= [KNL] Output early console device and options. | 504 | earlycon= [KNL] Output early console device and options. |
500 | uart[8250],io,<addr>[,options] | 505 | uart[8250],io,<addr>[,options] |
501 | uart[8250],mmio,<addr>[,options] | 506 | uart[8250],mmio,<addr>[,options] |
@@ -556,6 +561,8 @@ and is between 256 and 4096 characters. It is defined in the file | |||
556 | 1 will print _a lot_ more information - normally | 561 | 1 will print _a lot_ more information - normally |
557 | only useful to kernel developers. | 562 | only useful to kernel developers. |
558 | 563 | ||
564 | debug_objects [KNL] Enable object debugging | ||
565 | |||
559 | decnet.addr= [HW,NET] | 566 | decnet.addr= [HW,NET] |
560 | Format: <area>[,<node>] | 567 | Format: <area>[,<node>] |
561 | See also Documentation/networking/decnet.txt. | 568 | See also Documentation/networking/decnet.txt. |
@@ -627,8 +634,7 @@ and is between 256 and 4096 characters. It is defined in the file | |||
627 | eata= [HW,SCSI] | 634 | eata= [HW,SCSI] |
628 | 635 | ||
629 | edd= [EDD] | 636 | edd= [EDD] |
630 | Format: {"of[f]" | "sk[ipmbr]"} | 637 | Format: {"off" | "on" | "skip[mbr]"} |
631 | See comment in arch/i386/boot/edd.S | ||
632 | 638 | ||
633 | eisa_irq_edge= [PARISC,HW] | 639 | eisa_irq_edge= [PARISC,HW] |
634 | See header of drivers/parisc/eisa.c. | 640 | See header of drivers/parisc/eisa.c. |
@@ -1389,6 +1395,13 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1389 | 1395 | ||
1390 | nr_uarts= [SERIAL] maximum number of UARTs to be registered. | 1396 | nr_uarts= [SERIAL] maximum number of UARTs to be registered. |
1391 | 1397 | ||
1398 | olpc_ec_timeout= [OLPC] ms delay when issuing EC commands | ||
1399 | Rather than timing out after 20 ms if an EC | ||
1400 | command is not properly ACKed, override the length | ||
1401 | of the timeout. We have interrupts disabled while | ||
1402 | waiting for the ACK, so if this is set too high | ||
1403 | interrupts *may* be lost! | ||
1404 | |||
1392 | opl3= [HW,OSS] | 1405 | opl3= [HW,OSS] |
1393 | Format: <io> | 1406 | Format: <io> |
1394 | 1407 | ||
diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt index 266955d23ee6..09b55e461740 100644 --- a/Documentation/keys-request-key.txt +++ b/Documentation/keys-request-key.txt | |||
@@ -11,26 +11,29 @@ request_key*(): | |||
11 | 11 | ||
12 | struct key *request_key(const struct key_type *type, | 12 | struct key *request_key(const struct key_type *type, |
13 | const char *description, | 13 | const char *description, |
14 | const char *callout_string); | 14 | const char *callout_info); |
15 | 15 | ||
16 | or: | 16 | or: |
17 | 17 | ||
18 | struct key *request_key_with_auxdata(const struct key_type *type, | 18 | struct key *request_key_with_auxdata(const struct key_type *type, |
19 | const char *description, | 19 | const char *description, |
20 | const char *callout_string, | 20 | const char *callout_info, |
21 | size_t callout_len, | ||
21 | void *aux); | 22 | void *aux); |
22 | 23 | ||
23 | or: | 24 | or: |
24 | 25 | ||
25 | struct key *request_key_async(const struct key_type *type, | 26 | struct key *request_key_async(const struct key_type *type, |
26 | const char *description, | 27 | const char *description, |
27 | const char *callout_string); | 28 | const char *callout_info, |
29 | size_t callout_len); | ||
28 | 30 | ||
29 | or: | 31 | or: |
30 | 32 | ||
31 | struct key *request_key_async_with_auxdata(const struct key_type *type, | 33 | struct key *request_key_async_with_auxdata(const struct key_type *type, |
32 | const char *description, | 34 | const char *description, |
33 | const char *callout_string, | 35 | const char *callout_info, |
36 | size_t callout_len, | ||
34 | void *aux); | 37 | void *aux); |
35 | 38 | ||
36 | Or by userspace invoking the request_key system call: | 39 | Or by userspace invoking the request_key system call: |
diff --git a/Documentation/keys.txt b/Documentation/keys.txt index 51652d39e61c..d5c7a57d1700 100644 --- a/Documentation/keys.txt +++ b/Documentation/keys.txt | |||
@@ -170,7 +170,8 @@ The key service provides a number of features besides keys: | |||
170 | amount of description and payload space that can be consumed. | 170 | amount of description and payload space that can be consumed. |
171 | 171 | ||
172 | The user can view information on this and other statistics through procfs | 172 | The user can view information on this and other statistics through procfs |
173 | files. | 173 | files. The root user may also alter the quota limits through sysctl files |
174 | (see the section "New procfs files"). | ||
174 | 175 | ||
175 | Process-specific and thread-specific keyrings are not counted towards a | 176 | Process-specific and thread-specific keyrings are not counted towards a |
176 | user's quota. | 177 | user's quota. |
@@ -329,6 +330,27 @@ about the status of the key service: | |||
329 | <bytes>/<max> Key size quota | 330 | <bytes>/<max> Key size quota |
330 | 331 | ||
331 | 332 | ||
333 | Four new sysctl files have been added also for the purpose of controlling the | ||
334 | quota limits on keys: | ||
335 | |||
336 | (*) /proc/sys/kernel/keys/root_maxkeys | ||
337 | /proc/sys/kernel/keys/root_maxbytes | ||
338 | |||
339 | These files hold the maximum number of keys that root may have and the | ||
340 | maximum total number of bytes of data that root may have stored in those | ||
341 | keys. | ||
342 | |||
343 | (*) /proc/sys/kernel/keys/maxkeys | ||
344 | /proc/sys/kernel/keys/maxbytes | ||
345 | |||
346 | These files hold the maximum number of keys that each non-root user may | ||
347 | have and the maximum total number of bytes of data that each of those | ||
348 | users may have stored in their keys. | ||
349 | |||
350 | Root may alter these by writing each new limit as a decimal number string to | ||
351 | the appropriate file. | ||
352 | |||
353 | |||
332 | =============================== | 354 | =============================== |
333 | USERSPACE SYSTEM CALL INTERFACE | 355 | USERSPACE SYSTEM CALL INTERFACE |
334 | =============================== | 356 | =============================== |
@@ -711,6 +733,27 @@ The keyctl syscall functions are: | |||
711 | The assumed authoritative key is inherited across fork and exec. | 733 | The assumed authoritative key is inherited across fork and exec. |
712 | 734 | ||
713 | 735 | ||
736 | (*) Get the LSM security context attached to a key. | ||
737 | |||
738 | long keyctl(KEYCTL_GET_SECURITY, key_serial_t key, char *buffer, | ||
739 | size_t buflen) | ||
740 | |||
741 | This function returns a string that represents the LSM security context | ||
742 | attached to a key in the buffer provided. | ||
743 | |||
744 | Unless there's an error, it always returns the amount of data it could | ||
745 | produce, even if that's too big for the buffer, but it won't copy more | ||
746 | than requested to userspace. If the buffer pointer is NULL then no copy | ||
747 | will take place. | ||
748 | |||
749 | A NUL character is included at the end of the string if the buffer is | ||
750 | sufficiently big. This is included in the returned count. If no LSM is | ||
751 | in force then an empty string will be returned. | ||
752 | |||
753 | A process must have view permission on the key for this function to be | ||
754 | successful. | ||
755 | |||
756 | |||
714 | =============== | 757 | =============== |
715 | KERNEL SERVICES | 758 | KERNEL SERVICES |
716 | =============== | 759 | =============== |
@@ -771,7 +814,7 @@ payload contents" for more information. | |||
771 | 814 | ||
772 | struct key *request_key(const struct key_type *type, | 815 | struct key *request_key(const struct key_type *type, |
773 | const char *description, | 816 | const char *description, |
774 | const char *callout_string); | 817 | const char *callout_info); |
775 | 818 | ||
776 | This is used to request a key or keyring with a description that matches | 819 | This is used to request a key or keyring with a description that matches |
777 | the description specified according to the key type's match function. This | 820 | the description specified according to the key type's match function. This |
@@ -793,24 +836,28 @@ payload contents" for more information. | |||
793 | 836 | ||
794 | struct key *request_key_with_auxdata(const struct key_type *type, | 837 | struct key *request_key_with_auxdata(const struct key_type *type, |
795 | const char *description, | 838 | const char *description, |
796 | const char *callout_string, | 839 | const void *callout_info, |
840 | size_t callout_len, | ||
797 | void *aux); | 841 | void *aux); |
798 | 842 | ||
799 | This is identical to request_key(), except that the auxiliary data is | 843 | This is identical to request_key(), except that the auxiliary data is |
800 | passed to the key_type->request_key() op if it exists. | 844 | passed to the key_type->request_key() op if it exists, and the callout_info |
845 | is a blob of length callout_len, if given (the length may be 0). | ||
801 | 846 | ||
802 | 847 | ||
803 | (*) A key can be requested asynchronously by calling one of: | 848 | (*) A key can be requested asynchronously by calling one of: |
804 | 849 | ||
805 | struct key *request_key_async(const struct key_type *type, | 850 | struct key *request_key_async(const struct key_type *type, |
806 | const char *description, | 851 | const char *description, |
807 | const char *callout_string); | 852 | const void *callout_info, |
853 | size_t callout_len); | ||
808 | 854 | ||
809 | or: | 855 | or: |
810 | 856 | ||
811 | struct key *request_key_async_with_auxdata(const struct key_type *type, | 857 | struct key *request_key_async_with_auxdata(const struct key_type *type, |
812 | const char *description, | 858 | const char *description, |
813 | const char *callout_string, | 859 | const char *callout_info, |
860 | size_t callout_len, | ||
814 | void *aux); | 861 | void *aux); |
815 | 862 | ||
816 | which are asynchronous equivalents of request_key() and | 863 | which are asynchronous equivalents of request_key() and |
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index be89f393274f..6877e7187113 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt | |||
@@ -37,6 +37,11 @@ registration function such as register_kprobe() specifies where | |||
37 | the probe is to be inserted and what handler is to be called when | 37 | the probe is to be inserted and what handler is to be called when |
38 | the probe is hit. | 38 | the probe is hit. |
39 | 39 | ||
40 | There are also register_/unregister_*probes() functions for batch | ||
41 | registration/unregistration of a group of *probes. These functions | ||
42 | can speed up unregistration process when you have to unregister | ||
43 | a lot of probes at once. | ||
44 | |||
40 | The next three subsections explain how the different types of | 45 | The next three subsections explain how the different types of |
41 | probes work. They explain certain things that you'll need to | 46 | probes work. They explain certain things that you'll need to |
42 | know in order to make the best use of Kprobes -- e.g., the | 47 | know in order to make the best use of Kprobes -- e.g., the |
@@ -190,10 +195,11 @@ code mapping. | |||
190 | 4. API Reference | 195 | 4. API Reference |
191 | 196 | ||
192 | The Kprobes API includes a "register" function and an "unregister" | 197 | The Kprobes API includes a "register" function and an "unregister" |
193 | function for each type of probe. Here are terse, mini-man-page | 198 | function for each type of probe. The API also includes "register_*probes" |
194 | specifications for these functions and the associated probe handlers | 199 | and "unregister_*probes" functions for (un)registering arrays of probes. |
195 | that you'll write. See the files in the samples/kprobes/ sub-directory | 200 | Here are terse, mini-man-page specifications for these functions and |
196 | for examples. | 201 | the associated probe handlers that you'll write. See the files in the |
202 | samples/kprobes/ sub-directory for examples. | ||
197 | 203 | ||
198 | 4.1 register_kprobe | 204 | 4.1 register_kprobe |
199 | 205 | ||
@@ -319,6 +325,43 @@ void unregister_kretprobe(struct kretprobe *rp); | |||
319 | Removes the specified probe. The unregister function can be called | 325 | Removes the specified probe. The unregister function can be called |
320 | at any time after the probe has been registered. | 326 | at any time after the probe has been registered. |
321 | 327 | ||
328 | NOTE: | ||
329 | If the functions find an incorrect probe (ex. an unregistered probe), | ||
330 | they clear the addr field of the probe. | ||
331 | |||
332 | 4.5 register_*probes | ||
333 | |||
334 | #include <linux/kprobes.h> | ||
335 | int register_kprobes(struct kprobe **kps, int num); | ||
336 | int register_kretprobes(struct kretprobe **rps, int num); | ||
337 | int register_jprobes(struct jprobe **jps, int num); | ||
338 | |||
339 | Registers each of the num probes in the specified array. If any | ||
340 | error occurs during registration, all probes in the array, up to | ||
341 | the bad probe, are safely unregistered before the register_*probes | ||
342 | function returns. | ||
343 | - kps/rps/jps: an array of pointers to *probe data structures | ||
344 | - num: the number of the array entries. | ||
345 | |||
346 | NOTE: | ||
347 | You have to allocate(or define) an array of pointers and set all | ||
348 | of the array entries before using these functions. | ||
349 | |||
350 | 4.6 unregister_*probes | ||
351 | |||
352 | #include <linux/kprobes.h> | ||
353 | void unregister_kprobes(struct kprobe **kps, int num); | ||
354 | void unregister_kretprobes(struct kretprobe **rps, int num); | ||
355 | void unregister_jprobes(struct jprobe **jps, int num); | ||
356 | |||
357 | Removes each of the num probes in the specified array at once. | ||
358 | |||
359 | NOTE: | ||
360 | If the functions find some incorrect probes (ex. unregistered | ||
361 | probes) in the specified array, they clear the addr field of those | ||
362 | incorrect probes. However, other probes in the array are | ||
363 | unregistered correctly. | ||
364 | |||
322 | 5. Kprobes Features and Limitations | 365 | 5. Kprobes Features and Limitations |
323 | 366 | ||
324 | Kprobes allows multiple probes at the same address. Currently, | 367 | Kprobes allows multiple probes at the same address. Currently, |
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index 76cb428435da..01c6c3d8a7e3 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt | |||
@@ -1,7 +1,7 @@ | |||
1 | ThinkPad ACPI Extras Driver | 1 | ThinkPad ACPI Extras Driver |
2 | 2 | ||
3 | Version 0.19 | 3 | Version 0.20 |
4 | January 06th, 2008 | 4 | April 09th, 2008 |
5 | 5 | ||
6 | Borislav Deianov <borislav@users.sf.net> | 6 | Borislav Deianov <borislav@users.sf.net> |
7 | Henrique de Moraes Holschuh <hmh@hmh.eng.br> | 7 | Henrique de Moraes Holschuh <hmh@hmh.eng.br> |
@@ -18,6 +18,11 @@ This driver used to be named ibm-acpi until kernel 2.6.21 and release | |||
18 | moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel | 18 | moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel |
19 | 2.6.22, and release 0.14. | 19 | 2.6.22, and release 0.14. |
20 | 20 | ||
21 | The driver is named "thinkpad-acpi". In some places, like module | ||
22 | names, "thinkpad_acpi" is used because of userspace issues. | ||
23 | |||
24 | "tpacpi" is used as a shorthand where "thinkpad-acpi" would be too | ||
25 | long due to length limitations on some Linux kernel versions. | ||
21 | 26 | ||
22 | Status | 27 | Status |
23 | ------ | 28 | ------ |
@@ -571,6 +576,47 @@ netlink interface and the input layer interface, and don't bother at all | |||
571 | with hotkey_report_mode. | 576 | with hotkey_report_mode. |
572 | 577 | ||
573 | 578 | ||
579 | Brightness hotkey notes: | ||
580 | |||
581 | These are the current sane choices for brightness key mapping in | ||
582 | thinkpad-acpi: | ||
583 | |||
584 | For IBM and Lenovo models *without* ACPI backlight control (the ones on | ||
585 | which thinkpad-acpi will autoload its backlight interface by default, | ||
586 | and on which ACPI video does not export a backlight interface): | ||
587 | |||
588 | 1. Don't enable or map the brightness hotkeys in thinkpad-acpi, as | ||
589 | these older firmware versions unfortunately won't respect the hotkey | ||
590 | mask for brightness keys anyway, and always reacts to them. This | ||
591 | usually work fine, unless X.org drivers are doing something to block | ||
592 | the BIOS. In that case, use (3) below. This is the default mode of | ||
593 | operation. | ||
594 | |||
595 | 2. Enable the hotkeys, but map them to something else that is NOT | ||
596 | KEY_BRIGHTNESS_UP/DOWN or any other keycode that would cause | ||
597 | userspace to try to change the backlight level, and use that as an | ||
598 | on-screen-display hint. | ||
599 | |||
600 | 3. IF AND ONLY IF X.org drivers find a way to block the firmware from | ||
601 | automatically changing the brightness, enable the hotkeys and map | ||
602 | them to KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN, and feed that to | ||
603 | something that calls xbacklight. thinkpad-acpi will not be able to | ||
604 | change brightness in that case either, so you should disable its | ||
605 | backlight interface. | ||
606 | |||
607 | For Lenovo models *with* ACPI backlight control: | ||
608 | |||
609 | 1. Load up ACPI video and use that. ACPI video will report ACPI | ||
610 | events for brightness change keys. Do not mess with thinkpad-acpi | ||
611 | defaults in this case. thinkpad-acpi should not have anything to do | ||
612 | with backlight events in a scenario where ACPI video is loaded: | ||
613 | brightness hotkeys must be disabled, and the backlight interface is | ||
614 | to be kept disabled as well. This is the default mode of operation. | ||
615 | |||
616 | 2. Do *NOT* load up ACPI video, enable the hotkeys in thinkpad-acpi, | ||
617 | and map them to KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN. Process | ||
618 | these keys on userspace somehow (e.g. by calling xbacklight). | ||
619 | |||
574 | Bluetooth | 620 | Bluetooth |
575 | --------- | 621 | --------- |
576 | 622 | ||
@@ -647,16 +693,31 @@ while others are still having problems. For more information: | |||
647 | 693 | ||
648 | https://bugs.freedesktop.org/show_bug.cgi?id=2000 | 694 | https://bugs.freedesktop.org/show_bug.cgi?id=2000 |
649 | 695 | ||
650 | ThinkLight control -- /proc/acpi/ibm/light | 696 | ThinkLight control |
651 | ------------------------------------------ | 697 | ------------------ |
698 | |||
699 | procfs: /proc/acpi/ibm/light | ||
700 | sysfs attributes: as per LED class, for the "tpacpi::thinklight" LED | ||
652 | 701 | ||
653 | The current status of the ThinkLight can be found in this file. A few | 702 | procfs notes: |
654 | models which do not make the status available will show it as | 703 | |
655 | "unknown". The available commands are: | 704 | The ThinkLight status can be read and set through the procfs interface. A |
705 | few models which do not make the status available will show the ThinkLight | ||
706 | status as "unknown". The available commands are: | ||
656 | 707 | ||
657 | echo on > /proc/acpi/ibm/light | 708 | echo on > /proc/acpi/ibm/light |
658 | echo off > /proc/acpi/ibm/light | 709 | echo off > /proc/acpi/ibm/light |
659 | 710 | ||
711 | sysfs notes: | ||
712 | |||
713 | The ThinkLight sysfs interface is documented by the LED class | ||
714 | documentation, in Documentation/leds-class.txt. The ThinkLight LED name | ||
715 | is "tpacpi::thinklight". | ||
716 | |||
717 | Due to limitations in the sysfs LED class, if the status of the thinklight | ||
718 | cannot be read or if it is unknown, thinkpad-acpi will report it as "off". | ||
719 | It is impossible to know if the status returned through sysfs is valid. | ||
720 | |||
660 | Docking / undocking -- /proc/acpi/ibm/dock | 721 | Docking / undocking -- /proc/acpi/ibm/dock |
661 | ------------------------------------------ | 722 | ------------------------------------------ |
662 | 723 | ||
@@ -815,28 +876,63 @@ The cmos command interface is prone to firmware split-brain problems, as | |||
815 | in newer ThinkPads it is just a compatibility layer. Do not use it, it is | 876 | in newer ThinkPads it is just a compatibility layer. Do not use it, it is |
816 | exported just as a debug tool. | 877 | exported just as a debug tool. |
817 | 878 | ||
818 | LED control -- /proc/acpi/ibm/led | 879 | LED control |
819 | --------------------------------- | 880 | ----------- |
881 | |||
882 | procfs: /proc/acpi/ibm/led | ||
883 | sysfs attributes: as per LED class, see below for names | ||
884 | |||
885 | Some of the LED indicators can be controlled through this feature. On | ||
886 | some older ThinkPad models, it is possible to query the status of the | ||
887 | LED indicators as well. Newer ThinkPads cannot query the real status | ||
888 | of the LED indicators. | ||
820 | 889 | ||
821 | Some of the LED indicators can be controlled through this feature. The | 890 | procfs notes: |
822 | available commands are: | 891 | |
892 | The available commands are: | ||
823 | 893 | ||
824 | echo '<led number> on' >/proc/acpi/ibm/led | 894 | echo '<LED number> on' >/proc/acpi/ibm/led |
825 | echo '<led number> off' >/proc/acpi/ibm/led | 895 | echo '<LED number> off' >/proc/acpi/ibm/led |
826 | echo '<led number> blink' >/proc/acpi/ibm/led | 896 | echo '<LED number> blink' >/proc/acpi/ibm/led |
827 | 897 | ||
828 | The <led number> range is 0 to 7. The set of LEDs that can be | 898 | The <LED number> range is 0 to 7. The set of LEDs that can be |
829 | controlled varies from model to model. Here is the mapping on the X40: | 899 | controlled varies from model to model. Here is the common ThinkPad |
900 | mapping: | ||
830 | 901 | ||
831 | 0 - power | 902 | 0 - power |
832 | 1 - battery (orange) | 903 | 1 - battery (orange) |
833 | 2 - battery (green) | 904 | 2 - battery (green) |
834 | 3 - UltraBase | 905 | 3 - UltraBase/dock |
835 | 4 - UltraBay | 906 | 4 - UltraBay |
907 | 5 - UltraBase battery slot | ||
908 | 6 - (unknown) | ||
836 | 7 - standby | 909 | 7 - standby |
837 | 910 | ||
838 | All of the above can be turned on and off and can be made to blink. | 911 | All of the above can be turned on and off and can be made to blink. |
839 | 912 | ||
913 | sysfs notes: | ||
914 | |||
915 | The ThinkPad LED sysfs interface is described in detail by the LED class | ||
916 | documentation, in Documentation/leds-class.txt. | ||
917 | |||
918 | The leds are named (in LED ID order, from 0 to 7): | ||
919 | "tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt", | ||
920 | "tpacpi::dock_active", "tpacpi::bay_active", "tpacpi::dock_batt", | ||
921 | "tpacpi::unknown_led", "tpacpi::standby". | ||
922 | |||
923 | Due to limitations in the sysfs LED class, if the status of the LED | ||
924 | indicators cannot be read due to an error, thinkpad-acpi will report it as | ||
925 | a brightness of zero (same as LED off). | ||
926 | |||
927 | If the thinkpad firmware doesn't support reading the current status, | ||
928 | trying to read the current LED brightness will just return whatever | ||
929 | brightness was last written to that attribute. | ||
930 | |||
931 | These LEDs can blink using hardware acceleration. To request that a | ||
932 | ThinkPad indicator LED should blink in hardware accelerated mode, use the | ||
933 | "timer" trigger, and leave the delay_on and delay_off parameters set to | ||
934 | zero (to request hardware acceleration autodetection). | ||
935 | |||
840 | ACPI sounds -- /proc/acpi/ibm/beep | 936 | ACPI sounds -- /proc/acpi/ibm/beep |
841 | ---------------------------------- | 937 | ---------------------------------- |
842 | 938 | ||
@@ -1090,6 +1186,15 @@ it there will be the following attributes: | |||
1090 | dim the display. | 1186 | dim the display. |
1091 | 1187 | ||
1092 | 1188 | ||
1189 | WARNING: | ||
1190 | |||
1191 | Whatever you do, do NOT ever call thinkpad-acpi backlight-level change | ||
1192 | interface and the ACPI-based backlight level change interface | ||
1193 | (available on newer BIOSes, and driven by the Linux ACPI video driver) | ||
1194 | at the same time. The two will interact in bad ways, do funny things, | ||
1195 | and maybe reduce the life of the backlight lamps by needlessly kicking | ||
1196 | its level up and down at every change. | ||
1197 | |||
1093 | Volume control -- /proc/acpi/ibm/volume | 1198 | Volume control -- /proc/acpi/ibm/volume |
1094 | --------------------------------------- | 1199 | --------------------------------------- |
1095 | 1200 | ||
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 4c1fc65a8b3d..3be8ab2a886a 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c | |||
@@ -131,6 +131,9 @@ struct device | |||
131 | /* Any queues attached to this device */ | 131 | /* Any queues attached to this device */ |
132 | struct virtqueue *vq; | 132 | struct virtqueue *vq; |
133 | 133 | ||
134 | /* Handle status being finalized (ie. feature bits stable). */ | ||
135 | void (*ready)(struct device *me); | ||
136 | |||
134 | /* Device-specific data. */ | 137 | /* Device-specific data. */ |
135 | void *priv; | 138 | void *priv; |
136 | }; | 139 | }; |
@@ -925,24 +928,40 @@ static void enable_fd(int fd, struct virtqueue *vq) | |||
925 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); | 928 | write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); |
926 | } | 929 | } |
927 | 930 | ||
928 | /* When the Guest asks us to reset a device, it's is fairly easy. */ | 931 | /* When the Guest tells us they updated the status field, we handle it. */ |
929 | static void reset_device(struct device *dev) | 932 | static void update_device_status(struct device *dev) |
930 | { | 933 | { |
931 | struct virtqueue *vq; | 934 | struct virtqueue *vq; |
932 | 935 | ||
933 | verbose("Resetting device %s\n", dev->name); | 936 | /* This is a reset. */ |
934 | /* Clear the status. */ | 937 | if (dev->desc->status == 0) { |
935 | dev->desc->status = 0; | 938 | verbose("Resetting device %s\n", dev->name); |
936 | 939 | ||
937 | /* Clear any features they've acked. */ | 940 | /* Clear any features they've acked. */ |
938 | memset(get_feature_bits(dev) + dev->desc->feature_len, 0, | 941 | memset(get_feature_bits(dev) + dev->desc->feature_len, 0, |
939 | dev->desc->feature_len); | 942 | dev->desc->feature_len); |
940 | 943 | ||
941 | /* Zero out the virtqueues. */ | 944 | /* Zero out the virtqueues. */ |
942 | for (vq = dev->vq; vq; vq = vq->next) { | 945 | for (vq = dev->vq; vq; vq = vq->next) { |
943 | memset(vq->vring.desc, 0, | 946 | memset(vq->vring.desc, 0, |
944 | vring_size(vq->config.num, getpagesize())); | 947 | vring_size(vq->config.num, getpagesize())); |
945 | vq->last_avail_idx = 0; | 948 | vq->last_avail_idx = 0; |
949 | } | ||
950 | } else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { | ||
951 | warnx("Device %s configuration FAILED", dev->name); | ||
952 | } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { | ||
953 | unsigned int i; | ||
954 | |||
955 | verbose("Device %s OK: offered", dev->name); | ||
956 | for (i = 0; i < dev->desc->feature_len; i++) | ||
957 | verbose(" %08x", get_feature_bits(dev)[i]); | ||
958 | verbose(", accepted"); | ||
959 | for (i = 0; i < dev->desc->feature_len; i++) | ||
960 | verbose(" %08x", get_feature_bits(dev) | ||
961 | [dev->desc->feature_len+i]); | ||
962 | |||
963 | if (dev->ready) | ||
964 | dev->ready(dev); | ||
946 | } | 965 | } |
947 | } | 966 | } |
948 | 967 | ||
@@ -954,9 +973,9 @@ static void handle_output(int fd, unsigned long addr) | |||
954 | 973 | ||
955 | /* Check each device and virtqueue. */ | 974 | /* Check each device and virtqueue. */ |
956 | for (i = devices.dev; i; i = i->next) { | 975 | for (i = devices.dev; i; i = i->next) { |
957 | /* Notifications to device descriptors reset the device. */ | 976 | /* Notifications to device descriptors update device status. */ |
958 | if (from_guest_phys(addr) == i->desc) { | 977 | if (from_guest_phys(addr) == i->desc) { |
959 | reset_device(i); | 978 | update_device_status(i); |
960 | return; | 979 | return; |
961 | } | 980 | } |
962 | 981 | ||
@@ -1170,6 +1189,7 @@ static struct device *new_device(const char *name, u16 type, int fd, | |||
1170 | dev->handle_input = handle_input; | 1189 | dev->handle_input = handle_input; |
1171 | dev->name = name; | 1190 | dev->name = name; |
1172 | dev->vq = NULL; | 1191 | dev->vq = NULL; |
1192 | dev->ready = NULL; | ||
1173 | 1193 | ||
1174 | /* Append to device list. Prepending to a single-linked list is | 1194 | /* Append to device list. Prepending to a single-linked list is |
1175 | * easier, but the user expects the devices to be arranged on the bus | 1195 | * easier, but the user expects the devices to be arranged on the bus |
@@ -1398,7 +1418,7 @@ static bool service_io(struct device *dev) | |||
1398 | struct vblk_info *vblk = dev->priv; | 1418 | struct vblk_info *vblk = dev->priv; |
1399 | unsigned int head, out_num, in_num, wlen; | 1419 | unsigned int head, out_num, in_num, wlen; |
1400 | int ret; | 1420 | int ret; |
1401 | struct virtio_blk_inhdr *in; | 1421 | u8 *in; |
1402 | struct virtio_blk_outhdr *out; | 1422 | struct virtio_blk_outhdr *out; |
1403 | struct iovec iov[dev->vq->vring.num]; | 1423 | struct iovec iov[dev->vq->vring.num]; |
1404 | off64_t off; | 1424 | off64_t off; |
@@ -1416,7 +1436,7 @@ static bool service_io(struct device *dev) | |||
1416 | head, out_num, in_num); | 1436 | head, out_num, in_num); |
1417 | 1437 | ||
1418 | out = convert(&iov[0], struct virtio_blk_outhdr); | 1438 | out = convert(&iov[0], struct virtio_blk_outhdr); |
1419 | in = convert(&iov[out_num+in_num-1], struct virtio_blk_inhdr); | 1439 | in = convert(&iov[out_num+in_num-1], u8); |
1420 | off = out->sector * 512; | 1440 | off = out->sector * 512; |
1421 | 1441 | ||
1422 | /* The block device implements "barriers", where the Guest indicates | 1442 | /* The block device implements "barriers", where the Guest indicates |
@@ -1430,7 +1450,7 @@ static bool service_io(struct device *dev) | |||
1430 | * It'd be nice if we supported eject, for example, but we don't. */ | 1450 | * It'd be nice if we supported eject, for example, but we don't. */ |
1431 | if (out->type & VIRTIO_BLK_T_SCSI_CMD) { | 1451 | if (out->type & VIRTIO_BLK_T_SCSI_CMD) { |
1432 | fprintf(stderr, "Scsi commands unsupported\n"); | 1452 | fprintf(stderr, "Scsi commands unsupported\n"); |
1433 | in->status = VIRTIO_BLK_S_UNSUPP; | 1453 | *in = VIRTIO_BLK_S_UNSUPP; |
1434 | wlen = sizeof(*in); | 1454 | wlen = sizeof(*in); |
1435 | } else if (out->type & VIRTIO_BLK_T_OUT) { | 1455 | } else if (out->type & VIRTIO_BLK_T_OUT) { |
1436 | /* Write */ | 1456 | /* Write */ |
@@ -1453,7 +1473,7 @@ static bool service_io(struct device *dev) | |||
1453 | errx(1, "Write past end %llu+%u", off, ret); | 1473 | errx(1, "Write past end %llu+%u", off, ret); |
1454 | } | 1474 | } |
1455 | wlen = sizeof(*in); | 1475 | wlen = sizeof(*in); |
1456 | in->status = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); | 1476 | *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); |
1457 | } else { | 1477 | } else { |
1458 | /* Read */ | 1478 | /* Read */ |
1459 | 1479 | ||
@@ -1466,10 +1486,10 @@ static bool service_io(struct device *dev) | |||
1466 | verbose("READ from sector %llu: %i\n", out->sector, ret); | 1486 | verbose("READ from sector %llu: %i\n", out->sector, ret); |
1467 | if (ret >= 0) { | 1487 | if (ret >= 0) { |
1468 | wlen = sizeof(*in) + ret; | 1488 | wlen = sizeof(*in) + ret; |
1469 | in->status = VIRTIO_BLK_S_OK; | 1489 | *in = VIRTIO_BLK_S_OK; |
1470 | } else { | 1490 | } else { |
1471 | wlen = sizeof(*in); | 1491 | wlen = sizeof(*in); |
1472 | in->status = VIRTIO_BLK_S_IOERR; | 1492 | *in = VIRTIO_BLK_S_IOERR; |
1473 | } | 1493 | } |
1474 | } | 1494 | } |
1475 | 1495 | ||
diff --git a/Documentation/md.txt b/Documentation/md.txt index 396cdd982c26..a8b430627473 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -450,3 +450,9 @@ These currently include | |||
450 | there are upper and lower limits (32768, 16). Default is 128. | 450 | there are upper and lower limits (32768, 16). Default is 128. |
451 | strip_cache_active (currently raid5 only) | 451 | strip_cache_active (currently raid5 only) |
452 | number of active entries in the stripe cache | 452 | number of active entries in the stripe cache |
453 | preread_bypass_threshold (currently raid5 only) | ||
454 | number of times a stripe requiring preread will be bypassed by | ||
455 | a stripe that does not require preread. For fairness defaults | ||
456 | to 1. Setting this to 0 disables bypass accounting and | ||
457 | requires preread stripes to wait until all full-width stripe- | ||
458 | writes are complete. Valid values are 0 to stripe_cache_size. | ||
diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt index 7f60dfe642ca..b152e81da592 100644 --- a/Documentation/oops-tracing.txt +++ b/Documentation/oops-tracing.txt | |||
@@ -253,6 +253,10 @@ characters, each representing a particular tainted value. | |||
253 | 253 | ||
254 | 8: 'D' if the kernel has died recently, i.e. there was an OOPS or BUG. | 254 | 8: 'D' if the kernel has died recently, i.e. there was an OOPS or BUG. |
255 | 255 | ||
256 | 9: 'A' if the ACPI table has been overridden. | ||
257 | |||
258 | 10: 'W' if a warning has previously been issued by the kernel. | ||
259 | |||
256 | The primary reason for the 'Tainted: ' string is to tell kernel | 260 | The primary reason for the 'Tainted: ' string is to tell kernel |
257 | debuggers if this is a clean kernel or if anything unusual has | 261 | debuggers if this is a clean kernel or if anything unusual has |
258 | occurred. Tainting is permanent: even if an offending module is | 262 | occurred. Tainting is permanent: even if an offending module is |
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index cf89e8cfd5bf..1d2a772506cf 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt | |||
@@ -2836,6 +2836,39 @@ platforms are moved over to use the flattened-device-tree model. | |||
2836 | big-endian; | 2836 | big-endian; |
2837 | }; | 2837 | }; |
2838 | 2838 | ||
2839 | r) Freescale Display Interface Unit | ||
2840 | |||
2841 | The Freescale DIU is a LCD controller, with proper hardware, it can also | ||
2842 | drive DVI monitors. | ||
2843 | |||
2844 | Required properties: | ||
2845 | - compatible : should be "fsl-diu". | ||
2846 | - reg : should contain at least address and length of the DIU register | ||
2847 | set. | ||
2848 | - Interrupts : one DIU interrupt should be describe here. | ||
2849 | |||
2850 | Example (MPC8610HPCD) | ||
2851 | display@2c000 { | ||
2852 | compatible = "fsl,diu"; | ||
2853 | reg = <0x2c000 100>; | ||
2854 | interrupts = <72 2>; | ||
2855 | interrupt-parent = <&mpic>; | ||
2856 | }; | ||
2857 | |||
2858 | s) Freescale on board FPGA | ||
2859 | |||
2860 | This is the memory-mapped registers for on board FPGA. | ||
2861 | |||
2862 | Required properities: | ||
2863 | - compatible : should be "fsl,fpga-pixis". | ||
2864 | - reg : should contain the address and the lenght of the FPPGA register | ||
2865 | set. | ||
2866 | |||
2867 | Example (MPC8610HPCD) | ||
2868 | board-control@e8000000 { | ||
2869 | compatible = "fsl,fpga-pixis"; | ||
2870 | reg = <0xe8000000 32>; | ||
2871 | }; | ||
2839 | 2872 | ||
2840 | VII - Marvell Discovery mv64[345]6x System Controller chips | 2873 | VII - Marvell Discovery mv64[345]6x System Controller chips |
2841 | =========================================================== | 2874 | =========================================================== |
diff --git a/Documentation/powerpc/mpc52xx-device-tree-bindings.txt b/Documentation/powerpc/mpc52xx-device-tree-bindings.txt index 5e03610e186f..cda7a7dffa6d 100644 --- a/Documentation/powerpc/mpc52xx-device-tree-bindings.txt +++ b/Documentation/powerpc/mpc52xx-device-tree-bindings.txt | |||
@@ -186,6 +186,12 @@ Recommended soc5200 child nodes; populate as needed for your board | |||
186 | name device_type compatible Description | 186 | name device_type compatible Description |
187 | ---- ----------- ---------- ----------- | 187 | ---- ----------- ---------- ----------- |
188 | gpt@<addr> gpt fsl,mpc5200-gpt General purpose timers | 188 | gpt@<addr> gpt fsl,mpc5200-gpt General purpose timers |
189 | gpt@<addr> gpt fsl,mpc5200-gpt-gpio General purpose | ||
190 | timers in GPIO mode | ||
191 | gpio@<addr> fsl,mpc5200-gpio MPC5200 simple gpio | ||
192 | controller | ||
193 | gpio@<addr> fsl,mpc5200-gpio-wkup MPC5200 wakeup gpio | ||
194 | controller | ||
189 | rtc@<addr> rtc mpc5200-rtc Real time clock | 195 | rtc@<addr> rtc mpc5200-rtc Real time clock |
190 | mscan@<addr> mscan mpc5200-mscan CAN bus controller | 196 | mscan@<addr> mscan mpc5200-mscan CAN bus controller |
191 | pci@<addr> pci mpc5200-pci PCI bridge | 197 | pci@<addr> pci mpc5200-pci PCI bridge |
@@ -225,6 +231,12 @@ PSC in i2s mode: The mpc5200 and mpc5200b PSCs are not compatible when in | |||
225 | i2s mode. An 'mpc5200b-psc-i2s' node cannot include 'mpc5200-psc-i2s' in the | 231 | i2s mode. An 'mpc5200b-psc-i2s' node cannot include 'mpc5200-psc-i2s' in the |
226 | compatible field. | 232 | compatible field. |
227 | 233 | ||
234 | 7) GPIO controller nodes | ||
235 | Each GPIO controller node should have the empty property gpio-controller and | ||
236 | #gpio-cells set to 2. First cell is the GPIO number which is interpreted | ||
237 | according to the bit numbers in the GPIO control registers. The second cell | ||
238 | is for flags which is currently unsused. | ||
239 | |||
228 | IV - Extra Notes | 240 | IV - Extra Notes |
229 | ================ | 241 | ================ |
230 | 242 | ||
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index fd4c32a031c9..0bbee38acd26 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt | |||
@@ -795,6 +795,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. | |||
795 | lg-lw LG LW20/LW25 laptop | 795 | lg-lw LG LW20/LW25 laptop |
796 | tcl TCL S700 | 796 | tcl TCL S700 |
797 | clevo Clevo laptops (m520G, m665n) | 797 | clevo Clevo laptops (m520G, m665n) |
798 | medion Medion Rim 2150 | ||
798 | test for testing/debugging purpose, almost all controls can be | 799 | test for testing/debugging purpose, almost all controls can be |
799 | adjusted. Appearing only when compiled with | 800 | adjusted. Appearing only when compiled with |
800 | $CONFIG_SND_DEBUG=y | 801 | $CONFIG_SND_DEBUG=y |
diff --git a/Documentation/spi/spidev b/Documentation/spi/spidev index 5c8e1b988a08..ed2da5e5b28a 100644 --- a/Documentation/spi/spidev +++ b/Documentation/spi/spidev | |||
@@ -126,8 +126,8 @@ NOTES: | |||
126 | FULL DUPLEX CHARACTER DEVICE API | 126 | FULL DUPLEX CHARACTER DEVICE API |
127 | ================================ | 127 | ================================ |
128 | 128 | ||
129 | See the sample program below for one example showing the use of the full | 129 | See the spidev_fdx.c sample program for one example showing the use of the |
130 | duplex programming interface. (Although it doesn't perform a full duplex | 130 | full duplex programming interface. (Although it doesn't perform a full duplex |
131 | transfer.) The model is the same as that used in the kernel spi_sync() | 131 | transfer.) The model is the same as that used in the kernel spi_sync() |
132 | request; the individual transfers offer the same capabilities as are | 132 | request; the individual transfers offer the same capabilities as are |
133 | available to kernel drivers (except that it's not asynchronous). | 133 | available to kernel drivers (except that it's not asynchronous). |
@@ -141,167 +141,3 @@ and bitrate for each transfer segment.) | |||
141 | 141 | ||
142 | To make a full duplex request, provide both rx_buf and tx_buf for the | 142 | To make a full duplex request, provide both rx_buf and tx_buf for the |
143 | same transfer. It's even OK if those are the same buffer. | 143 | same transfer. It's even OK if those are the same buffer. |
144 | |||
145 | |||
146 | SAMPLE PROGRAM | ||
147 | ============== | ||
148 | |||
149 | -------------------------------- CUT HERE | ||
150 | #include <stdio.h> | ||
151 | #include <unistd.h> | ||
152 | #include <stdlib.h> | ||
153 | #include <fcntl.h> | ||
154 | #include <string.h> | ||
155 | |||
156 | #include <sys/ioctl.h> | ||
157 | #include <sys/types.h> | ||
158 | #include <sys/stat.h> | ||
159 | |||
160 | #include <linux/types.h> | ||
161 | #include <linux/spi/spidev.h> | ||
162 | |||
163 | |||
164 | static int verbose; | ||
165 | |||
166 | static void do_read(int fd, int len) | ||
167 | { | ||
168 | unsigned char buf[32], *bp; | ||
169 | int status; | ||
170 | |||
171 | /* read at least 2 bytes, no more than 32 */ | ||
172 | if (len < 2) | ||
173 | len = 2; | ||
174 | else if (len > sizeof(buf)) | ||
175 | len = sizeof(buf); | ||
176 | memset(buf, 0, sizeof buf); | ||
177 | |||
178 | status = read(fd, buf, len); | ||
179 | if (status < 0) { | ||
180 | perror("read"); | ||
181 | return; | ||
182 | } | ||
183 | if (status != len) { | ||
184 | fprintf(stderr, "short read\n"); | ||
185 | return; | ||
186 | } | ||
187 | |||
188 | printf("read(%2d, %2d): %02x %02x,", len, status, | ||
189 | buf[0], buf[1]); | ||
190 | status -= 2; | ||
191 | bp = buf + 2; | ||
192 | while (status-- > 0) | ||
193 | printf(" %02x", *bp++); | ||
194 | printf("\n"); | ||
195 | } | ||
196 | |||
197 | static void do_msg(int fd, int len) | ||
198 | { | ||
199 | struct spi_ioc_transfer xfer[2]; | ||
200 | unsigned char buf[32], *bp; | ||
201 | int status; | ||
202 | |||
203 | memset(xfer, 0, sizeof xfer); | ||
204 | memset(buf, 0, sizeof buf); | ||
205 | |||
206 | if (len > sizeof buf) | ||
207 | len = sizeof buf; | ||
208 | |||
209 | buf[0] = 0xaa; | ||
210 | xfer[0].tx_buf = (__u64) buf; | ||
211 | xfer[0].len = 1; | ||
212 | |||
213 | xfer[1].rx_buf = (__u64) buf; | ||
214 | xfer[1].len = len; | ||
215 | |||
216 | status = ioctl(fd, SPI_IOC_MESSAGE(2), xfer); | ||
217 | if (status < 0) { | ||
218 | perror("SPI_IOC_MESSAGE"); | ||
219 | return; | ||
220 | } | ||
221 | |||
222 | printf("response(%2d, %2d): ", len, status); | ||
223 | for (bp = buf; len; len--) | ||
224 | printf(" %02x", *bp++); | ||
225 | printf("\n"); | ||
226 | } | ||
227 | |||
228 | static void dumpstat(const char *name, int fd) | ||
229 | { | ||
230 | __u8 mode, lsb, bits; | ||
231 | __u32 speed; | ||
232 | |||
233 | if (ioctl(fd, SPI_IOC_RD_MODE, &mode) < 0) { | ||
234 | perror("SPI rd_mode"); | ||
235 | return; | ||
236 | } | ||
237 | if (ioctl(fd, SPI_IOC_RD_LSB_FIRST, &lsb) < 0) { | ||
238 | perror("SPI rd_lsb_fist"); | ||
239 | return; | ||
240 | } | ||
241 | if (ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits) < 0) { | ||
242 | perror("SPI bits_per_word"); | ||
243 | return; | ||
244 | } | ||
245 | if (ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed) < 0) { | ||
246 | perror("SPI max_speed_hz"); | ||
247 | return; | ||
248 | } | ||
249 | |||
250 | printf("%s: spi mode %d, %d bits %sper word, %d Hz max\n", | ||
251 | name, mode, bits, lsb ? "(lsb first) " : "", speed); | ||
252 | } | ||
253 | |||
254 | int main(int argc, char **argv) | ||
255 | { | ||
256 | int c; | ||
257 | int readcount = 0; | ||
258 | int msglen = 0; | ||
259 | int fd; | ||
260 | const char *name; | ||
261 | |||
262 | while ((c = getopt(argc, argv, "hm:r:v")) != EOF) { | ||
263 | switch (c) { | ||
264 | case 'm': | ||
265 | msglen = atoi(optarg); | ||
266 | if (msglen < 0) | ||
267 | goto usage; | ||
268 | continue; | ||
269 | case 'r': | ||
270 | readcount = atoi(optarg); | ||
271 | if (readcount < 0) | ||
272 | goto usage; | ||
273 | continue; | ||
274 | case 'v': | ||
275 | verbose++; | ||
276 | continue; | ||
277 | case 'h': | ||
278 | case '?': | ||
279 | usage: | ||
280 | fprintf(stderr, | ||
281 | "usage: %s [-h] [-m N] [-r N] /dev/spidevB.D\n", | ||
282 | argv[0]); | ||
283 | return 1; | ||
284 | } | ||
285 | } | ||
286 | |||
287 | if ((optind + 1) != argc) | ||
288 | goto usage; | ||
289 | name = argv[optind]; | ||
290 | |||
291 | fd = open(name, O_RDWR); | ||
292 | if (fd < 0) { | ||
293 | perror("open"); | ||
294 | return 1; | ||
295 | } | ||
296 | |||
297 | dumpstat(name, fd); | ||
298 | |||
299 | if (msglen) | ||
300 | do_msg(fd, msglen); | ||
301 | |||
302 | if (readcount) | ||
303 | do_read(fd, readcount); | ||
304 | |||
305 | close(fd); | ||
306 | return 0; | ||
307 | } | ||
diff --git a/Documentation/spi/spidev_fdx.c b/Documentation/spi/spidev_fdx.c new file mode 100644 index 000000000000..fc354f760384 --- /dev/null +++ b/Documentation/spi/spidev_fdx.c | |||
@@ -0,0 +1,158 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <unistd.h> | ||
3 | #include <stdlib.h> | ||
4 | #include <fcntl.h> | ||
5 | #include <string.h> | ||
6 | |||
7 | #include <sys/ioctl.h> | ||
8 | #include <sys/types.h> | ||
9 | #include <sys/stat.h> | ||
10 | |||
11 | #include <linux/types.h> | ||
12 | #include <linux/spi/spidev.h> | ||
13 | |||
14 | |||
15 | static int verbose; | ||
16 | |||
17 | static void do_read(int fd, int len) | ||
18 | { | ||
19 | unsigned char buf[32], *bp; | ||
20 | int status; | ||
21 | |||
22 | /* read at least 2 bytes, no more than 32 */ | ||
23 | if (len < 2) | ||
24 | len = 2; | ||
25 | else if (len > sizeof(buf)) | ||
26 | len = sizeof(buf); | ||
27 | memset(buf, 0, sizeof buf); | ||
28 | |||
29 | status = read(fd, buf, len); | ||
30 | if (status < 0) { | ||
31 | perror("read"); | ||
32 | return; | ||
33 | } | ||
34 | if (status != len) { | ||
35 | fprintf(stderr, "short read\n"); | ||
36 | return; | ||
37 | } | ||
38 | |||
39 | printf("read(%2d, %2d): %02x %02x,", len, status, | ||
40 | buf[0], buf[1]); | ||
41 | status -= 2; | ||
42 | bp = buf + 2; | ||
43 | while (status-- > 0) | ||
44 | printf(" %02x", *bp++); | ||
45 | printf("\n"); | ||
46 | } | ||
47 | |||
48 | static void do_msg(int fd, int len) | ||
49 | { | ||
50 | struct spi_ioc_transfer xfer[2]; | ||
51 | unsigned char buf[32], *bp; | ||
52 | int status; | ||
53 | |||
54 | memset(xfer, 0, sizeof xfer); | ||
55 | memset(buf, 0, sizeof buf); | ||
56 | |||
57 | if (len > sizeof buf) | ||
58 | len = sizeof buf; | ||
59 | |||
60 | buf[0] = 0xaa; | ||
61 | xfer[0].tx_buf = (__u64) buf; | ||
62 | xfer[0].len = 1; | ||
63 | |||
64 | xfer[1].rx_buf = (__u64) buf; | ||
65 | xfer[1].len = len; | ||
66 | |||
67 | status = ioctl(fd, SPI_IOC_MESSAGE(2), xfer); | ||
68 | if (status < 0) { | ||
69 | perror("SPI_IOC_MESSAGE"); | ||
70 | return; | ||
71 | } | ||
72 | |||
73 | printf("response(%2d, %2d): ", len, status); | ||
74 | for (bp = buf; len; len--) | ||
75 | printf(" %02x", *bp++); | ||
76 | printf("\n"); | ||
77 | } | ||
78 | |||
79 | static void dumpstat(const char *name, int fd) | ||
80 | { | ||
81 | __u8 mode, lsb, bits; | ||
82 | __u32 speed; | ||
83 | |||
84 | if (ioctl(fd, SPI_IOC_RD_MODE, &mode) < 0) { | ||
85 | perror("SPI rd_mode"); | ||
86 | return; | ||
87 | } | ||
88 | if (ioctl(fd, SPI_IOC_RD_LSB_FIRST, &lsb) < 0) { | ||
89 | perror("SPI rd_lsb_fist"); | ||
90 | return; | ||
91 | } | ||
92 | if (ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits) < 0) { | ||
93 | perror("SPI bits_per_word"); | ||
94 | return; | ||
95 | } | ||
96 | if (ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed) < 0) { | ||
97 | perror("SPI max_speed_hz"); | ||
98 | return; | ||
99 | } | ||
100 | |||
101 | printf("%s: spi mode %d, %d bits %sper word, %d Hz max\n", | ||
102 | name, mode, bits, lsb ? "(lsb first) " : "", speed); | ||
103 | } | ||
104 | |||
105 | int main(int argc, char **argv) | ||
106 | { | ||
107 | int c; | ||
108 | int readcount = 0; | ||
109 | int msglen = 0; | ||
110 | int fd; | ||
111 | const char *name; | ||
112 | |||
113 | while ((c = getopt(argc, argv, "hm:r:v")) != EOF) { | ||
114 | switch (c) { | ||
115 | case 'm': | ||
116 | msglen = atoi(optarg); | ||
117 | if (msglen < 0) | ||
118 | goto usage; | ||
119 | continue; | ||
120 | case 'r': | ||
121 | readcount = atoi(optarg); | ||
122 | if (readcount < 0) | ||
123 | goto usage; | ||
124 | continue; | ||
125 | case 'v': | ||
126 | verbose++; | ||
127 | continue; | ||
128 | case 'h': | ||
129 | case '?': | ||
130 | usage: | ||
131 | fprintf(stderr, | ||
132 | "usage: %s [-h] [-m N] [-r N] /dev/spidevB.D\n", | ||
133 | argv[0]); | ||
134 | return 1; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | if ((optind + 1) != argc) | ||
139 | goto usage; | ||
140 | name = argv[optind]; | ||
141 | |||
142 | fd = open(name, O_RDWR); | ||
143 | if (fd < 0) { | ||
144 | perror("open"); | ||
145 | return 1; | ||
146 | } | ||
147 | |||
148 | dumpstat(name, fd); | ||
149 | |||
150 | if (msglen) | ||
151 | do_msg(fd, msglen); | ||
152 | |||
153 | if (readcount) | ||
154 | do_read(fd, readcount); | ||
155 | |||
156 | close(fd); | ||
157 | return 0; | ||
158 | } | ||
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index 10c8f6922ef4..5ce0952aa065 100644 --- a/Documentation/sysrq.txt +++ b/Documentation/sysrq.txt | |||
@@ -85,6 +85,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.: | |||
85 | 'k' - Secure Access Key (SAK) Kills all programs on the current virtual | 85 | 'k' - Secure Access Key (SAK) Kills all programs on the current virtual |
86 | console. NOTE: See important comments below in SAK section. | 86 | console. NOTE: See important comments below in SAK section. |
87 | 87 | ||
88 | 'l' - Shows a stack backtrace for all active CPUs. | ||
89 | |||
88 | 'm' - Will dump current memory info to your console. | 90 | 'm' - Will dump current memory info to your console. |
89 | 91 | ||
90 | 'n' - Used to make RT tasks nice-able | 92 | 'n' - Used to make RT tasks nice-able |
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index d9f28be75403..70d68ce8640a 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt | |||
@@ -108,10 +108,12 @@ and throttle appropriate devices. | |||
108 | RO read only value | 108 | RO read only value |
109 | RW read/write value | 109 | RW read/write value |
110 | 110 | ||
111 | All thermal sysfs attributes will be represented under /sys/class/thermal | 111 | Thermal sysfs attributes will be represented under /sys/class/thermal. |
112 | Hwmon sysfs I/F extension is also available under /sys/class/hwmon | ||
113 | if hwmon is compiled in or built as a module. | ||
112 | 114 | ||
113 | Thermal zone device sys I/F, created once it's registered: | 115 | Thermal zone device sys I/F, created once it's registered: |
114 | |thermal_zone[0-*]: | 116 | /sys/class/thermal/thermal_zone[0-*]: |
115 | |-----type: Type of the thermal zone | 117 | |-----type: Type of the thermal zone |
116 | |-----temp: Current temperature | 118 | |-----temp: Current temperature |
117 | |-----mode: Working mode of the thermal zone | 119 | |-----mode: Working mode of the thermal zone |
@@ -119,7 +121,7 @@ Thermal zone device sys I/F, created once it's registered: | |||
119 | |-----trip_point_[0-*]_type: Trip point type | 121 | |-----trip_point_[0-*]_type: Trip point type |
120 | 122 | ||
121 | Thermal cooling device sys I/F, created once it's registered: | 123 | Thermal cooling device sys I/F, created once it's registered: |
122 | |cooling_device[0-*]: | 124 | /sys/class/thermal/cooling_device[0-*]: |
123 | |-----type : Type of the cooling device(processor/fan/...) | 125 | |-----type : Type of the cooling device(processor/fan/...) |
124 | |-----max_state: Maximum cooling state of the cooling device | 126 | |-----max_state: Maximum cooling state of the cooling device |
125 | |-----cur_state: Current cooling state of the cooling device | 127 | |-----cur_state: Current cooling state of the cooling device |
@@ -130,10 +132,19 @@ They represent the relationship between a thermal zone and its associated coolin | |||
130 | They are created/removed for each | 132 | They are created/removed for each |
131 | thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful execution. | 133 | thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful execution. |
132 | 134 | ||
133 | |thermal_zone[0-*] | 135 | /sys/class/thermal/thermal_zone[0-*] |
134 | |-----cdev[0-*]: The [0-*]th cooling device in the current thermal zone | 136 | |-----cdev[0-*]: The [0-*]th cooling device in the current thermal zone |
135 | |-----cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with | 137 | |-----cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with |
136 | 138 | ||
139 | Besides the thermal zone device sysfs I/F and cooling device sysfs I/F, | ||
140 | the generic thermal driver also creates a hwmon sysfs I/F for each _type_ of | ||
141 | thermal zone device. E.g. the generic thermal driver registers one hwmon class device | ||
142 | and build the associated hwmon sysfs I/F for all the registered ACPI thermal zones. | ||
143 | /sys/class/hwmon/hwmon[0-*]: | ||
144 | |-----name: The type of the thermal zone devices. | ||
145 | |-----temp[1-*]_input: The current temperature of thermal zone [1-*]. | ||
146 | |-----temp[1-*]_critical: The critical trip point of thermal zone [1-*]. | ||
147 | Please read Documentation/hwmon/sysfs-interface for additional information. | ||
137 | 148 | ||
138 | *************************** | 149 | *************************** |
139 | * Thermal zone attributes * | 150 | * Thermal zone attributes * |
@@ -141,7 +152,10 @@ thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful e | |||
141 | 152 | ||
142 | type Strings which represent the thermal zone type. | 153 | type Strings which represent the thermal zone type. |
143 | This is given by thermal zone driver as part of registration. | 154 | This is given by thermal zone driver as part of registration. |
144 | Eg: "ACPI thermal zone" indicates it's a ACPI thermal device | 155 | Eg: "acpitz" indicates it's an ACPI thermal device. |
156 | In order to keep it consistent with hwmon sys attribute, | ||
157 | this should be a short, lowercase string, | ||
158 | not containing spaces nor dashes. | ||
145 | RO | 159 | RO |
146 | Required | 160 | Required |
147 | 161 | ||
@@ -218,7 +232,7 @@ the sys I/F structure will be built like this: | |||
218 | /sys/class/thermal: | 232 | /sys/class/thermal: |
219 | 233 | ||
220 | |thermal_zone1: | 234 | |thermal_zone1: |
221 | |-----type: ACPI thermal zone | 235 | |-----type: acpitz |
222 | |-----temp: 37000 | 236 | |-----temp: 37000 |
223 | |-----mode: kernel | 237 | |-----mode: kernel |
224 | |-----trip_point_0_temp: 100000 | 238 | |-----trip_point_0_temp: 100000 |
@@ -243,3 +257,10 @@ the sys I/F structure will be built like this: | |||
243 | |-----type: Fan | 257 | |-----type: Fan |
244 | |-----max_state: 2 | 258 | |-----max_state: 2 |
245 | |-----cur_state: 0 | 259 | |-----cur_state: 0 |
260 | |||
261 | /sys/class/hwmon: | ||
262 | |||
263 | |hwmon0: | ||
264 | |-----name: acpitz | ||
265 | |-----temp1_input: 37000 | ||
266 | |-----temp1_crit: 100000 | ||
diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134 index 44d84dd15ad6..67937df1e974 100644 --- a/Documentation/video4linux/CARDLIST.saa7134 +++ b/Documentation/video4linux/CARDLIST.saa7134 | |||
@@ -128,7 +128,7 @@ | |||
128 | 127 -> Beholder BeholdTV 507 FM/RDS / BeholdTV 509 FM [0000:5071,0000:507B,5ace:5070,5ace:5090] | 128 | 127 -> Beholder BeholdTV 507 FM/RDS / BeholdTV 509 FM [0000:5071,0000:507B,5ace:5070,5ace:5090] |
129 | 128 -> Beholder BeholdTV Columbus TVFM [0000:5201] | 129 | 128 -> Beholder BeholdTV Columbus TVFM [0000:5201] |
130 | 129 -> Beholder BeholdTV 607 / BeholdTV 609 [5ace:6070,5ace:6071,5ace:6072,5ace:6073,5ace:6090,5ace:6091,5ace:6092,5ace:6093] | 130 | 129 -> Beholder BeholdTV 607 / BeholdTV 609 [5ace:6070,5ace:6071,5ace:6072,5ace:6073,5ace:6090,5ace:6091,5ace:6092,5ace:6093] |
131 | 130 -> Beholder BeholdTV M6 / BeholdTV M6 Extra [5ace:6190,5ace:6193] | 131 | 130 -> Beholder BeholdTV M6 / BeholdTV M6 Extra [5ace:6190,5ace:6193,5ace:6191] |
132 | 131 -> Twinhan Hybrid DTV-DVB 3056 PCI [1822:0022] | 132 | 131 -> Twinhan Hybrid DTV-DVB 3056 PCI [1822:0022] |
133 | 132 -> Genius TVGO AM11MCE | 133 | 132 -> Genius TVGO AM11MCE |
134 | 133 -> NXP Snake DVB-S reference design | 134 | 133 -> NXP Snake DVB-S reference design |
@@ -140,3 +140,4 @@ | |||
140 | 139 -> Compro VideoMate T750 [185b:c900] | 140 | 139 -> Compro VideoMate T750 [185b:c900] |
141 | 140 -> Avermedia DVB-S Pro A700 [1461:a7a1] | 141 | 140 -> Avermedia DVB-S Pro A700 [1461:a7a1] |
142 | 141 -> Avermedia DVB-S Hybrid+FM A700 [1461:a7a2] | 142 | 141 -> Avermedia DVB-S Hybrid+FM A700 [1461:a7a2] |
143 | 142 -> Beholder BeholdTV H6 [5ace:6290] | ||
diff --git a/Documentation/video4linux/cx18.txt b/Documentation/video4linux/cx18.txt new file mode 100644 index 000000000000..077d56ec3f3d --- /dev/null +++ b/Documentation/video4linux/cx18.txt | |||
@@ -0,0 +1,34 @@ | |||
1 | Some notes regarding the cx18 driver for the Conexant CX23418 MPEG | ||
2 | encoder chip: | ||
3 | |||
4 | 1) The only hardware currently supported is the Hauppauge HVR-1600. | ||
5 | |||
6 | 2) Some people have problems getting the i2c bus to work. Cause unknown. | ||
7 | The symptom is that the eeprom cannot be read and the card is | ||
8 | unusable. | ||
9 | |||
10 | 3) The audio from the analog tuner is mono only. Probably caused by | ||
11 | incorrect audio register information in the datasheet. We are | ||
12 | waiting for updated information from Conexant. | ||
13 | |||
14 | 4) VBI (raw or sliced) has not yet been implemented. | ||
15 | |||
16 | 5) MPEG indexing is not yet implemented. | ||
17 | |||
18 | 6) The driver is still a bit rough around the edges, this should | ||
19 | improve over time. | ||
20 | |||
21 | |||
22 | Firmware: | ||
23 | |||
24 | The firmware needs to be extracted from the Windows Hauppauge HVR-1600 | ||
25 | driver, available here: | ||
26 | |||
27 | http://hauppauge.lightpath.net/software/install_cd/hauppauge_cd_3.4d1.zip | ||
28 | |||
29 | Unzip, then copy the following files to the firmware directory | ||
30 | and rename them as follows: | ||
31 | |||
32 | Drivers/Driver18/hcw18apu.rom -> v4l-cx23418-apu.fw | ||
33 | Drivers/Driver18/hcw18enc.rom -> v4l-cx23418-cpu.fw | ||
34 | Drivers/Driver18/hcw18mlC.rom -> v4l-cx23418-dig.fw | ||
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt index dd4986497996..bad16d3f6a47 100644 --- a/Documentation/vm/numa_memory_policy.txt +++ b/Documentation/vm/numa_memory_policy.txt | |||
@@ -135,77 +135,58 @@ most general to most specific: | |||
135 | 135 | ||
136 | Components of Memory Policies | 136 | Components of Memory Policies |
137 | 137 | ||
138 | A Linux memory policy is a tuple consisting of a "mode" and an optional set | 138 | A Linux memory policy consists of a "mode", optional mode flags, and an |
139 | of nodes. The mode determine the behavior of the policy, while the | 139 | optional set of nodes. The mode determines the behavior of the policy, |
140 | optional set of nodes can be viewed as the arguments to the behavior. | 140 | the optional mode flags determine the behavior of the mode, and the |
141 | optional set of nodes can be viewed as the arguments to the policy | ||
142 | behavior. | ||
141 | 143 | ||
142 | Internally, memory policies are implemented by a reference counted | 144 | Internally, memory policies are implemented by a reference counted |
143 | structure, struct mempolicy. Details of this structure will be discussed | 145 | structure, struct mempolicy. Details of this structure will be discussed |
144 | in context, below, as required to explain the behavior. | 146 | in context, below, as required to explain the behavior. |
145 | 147 | ||
146 | Note: in some functions AND in the struct mempolicy itself, the mode | ||
147 | is called "policy". However, to avoid confusion with the policy tuple, | ||
148 | this document will continue to use the term "mode". | ||
149 | |||
150 | Linux memory policy supports the following 4 behavioral modes: | 148 | Linux memory policy supports the following 4 behavioral modes: |
151 | 149 | ||
152 | Default Mode--MPOL_DEFAULT: The behavior specified by this mode is | 150 | Default Mode--MPOL_DEFAULT: This mode is only used in the memory |
153 | context or scope dependent. | 151 | policy APIs. Internally, MPOL_DEFAULT is converted to the NULL |
154 | 152 | memory policy in all policy scopes. Any existing non-default policy | |
155 | As mentioned in the Policy Scope section above, during normal | 153 | will simply be removed when MPOL_DEFAULT is specified. As a result, |
156 | system operation, the System Default Policy is hard coded to | 154 | MPOL_DEFAULT means "fall back to the next most specific policy scope." |
157 | contain the Default mode. | ||
158 | |||
159 | In this context, default mode means "local" allocation--that is | ||
160 | attempt to allocate the page from the node associated with the cpu | ||
161 | where the fault occurs. If the "local" node has no memory, or the | ||
162 | node's memory can be exhausted [no free pages available], local | ||
163 | allocation will "fallback to"--attempt to allocate pages from-- | ||
164 | "nearby" nodes, in order of increasing "distance". | ||
165 | 155 | ||
166 | Implementation detail -- subject to change: "Fallback" uses | 156 | For example, a NULL or default task policy will fall back to the |
167 | a per node list of sibling nodes--called zonelists--built at | 157 | system default policy. A NULL or default vma policy will fall |
168 | boot time, or when nodes or memory are added or removed from | 158 | back to the task policy. |
169 | the system [memory hotplug]. These per node zonelist are | ||
170 | constructed with nodes in order of increasing distance based | ||
171 | on information provided by the platform firmware. | ||
172 | 159 | ||
173 | When a task/process policy or a shared policy contains the Default | 160 | When specified in one of the memory policy APIs, the Default mode |
174 | mode, this also means "local allocation", as described above. | 161 | does not use the optional set of nodes. |
175 | 162 | ||
176 | In the context of a VMA, Default mode means "fall back to task | 163 | It is an error for the set of nodes specified for this policy to |
177 | policy"--which may or may not specify Default mode. Thus, Default | 164 | be non-empty. |
178 | mode can not be counted on to mean local allocation when used | ||
179 | on a non-shared region of the address space. However, see | ||
180 | MPOL_PREFERRED below. | ||
181 | |||
182 | The Default mode does not use the optional set of nodes. | ||
183 | 165 | ||
184 | MPOL_BIND: This mode specifies that memory must come from the | 166 | MPOL_BIND: This mode specifies that memory must come from the |
185 | set of nodes specified by the policy. | 167 | set of nodes specified by the policy. Memory will be allocated from |
186 | 168 | the node in the set with sufficient free memory that is closest to | |
187 | The memory policy APIs do not specify an order in which the nodes | 169 | the node where the allocation takes place. |
188 | will be searched. However, unlike "local allocation", the Bind | ||
189 | policy does not consider the distance between the nodes. Rather, | ||
190 | allocations will fallback to the nodes specified by the policy in | ||
191 | order of numeric node id. Like everything in Linux, this is subject | ||
192 | to change. | ||
193 | 170 | ||
194 | MPOL_PREFERRED: This mode specifies that the allocation should be | 171 | MPOL_PREFERRED: This mode specifies that the allocation should be |
195 | attempted from the single node specified in the policy. If that | 172 | attempted from the single node specified in the policy. If that |
196 | allocation fails, the kernel will search other nodes, exactly as | 173 | allocation fails, the kernel will search other nodes, in order of |
197 | it would for a local allocation that started at the preferred node | 174 | increasing distance from the preferred node based on information |
198 | in increasing distance from the preferred node. "Local" allocation | 175 | provided by the platform firmware. |
199 | policy can be viewed as a Preferred policy that starts at the node | ||
200 | containing the cpu where the allocation takes place. | 176 | containing the cpu where the allocation takes place. |
201 | 177 | ||
202 | Internally, the Preferred policy uses a single node--the | 178 | Internally, the Preferred policy uses a single node--the |
203 | preferred_node member of struct mempolicy. A "distinguished | 179 | preferred_node member of struct mempolicy. When the internal |
204 | value of this preferred_node, currently '-1', is interpreted | 180 | mode flag MPOL_F_LOCAL is set, the preferred_node is ignored and |
205 | as "the node containing the cpu where the allocation takes | 181 | the policy is interpreted as local allocation. "Local" allocation |
206 | place"--local allocation. This is the way to specify | 182 | policy can be viewed as a Preferred policy that starts at the node |
207 | local allocation for a specific range of addresses--i.e. for | 183 | containing the cpu where the allocation takes place. |
208 | VMA policies. | 184 | |
185 | It is possible for the user to specify that local allocation is | ||
186 | always preferred by passing an empty nodemask with this mode. | ||
187 | If an empty nodemask is passed, the policy cannot use the | ||
188 | MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES flags described | ||
189 | below. | ||
209 | 190 | ||
210 | MPOL_INTERLEAVED: This mode specifies that page allocations be | 191 | MPOL_INTERLEAVED: This mode specifies that page allocations be |
211 | interleaved, on a page granularity, across the nodes specified in | 192 | interleaved, on a page granularity, across the nodes specified in |
@@ -231,6 +212,154 @@ Components of Memory Policies | |||
231 | the temporary interleaved system default policy works in this | 212 | the temporary interleaved system default policy works in this |
232 | mode. | 213 | mode. |
233 | 214 | ||
215 | Linux memory policy supports the following optional mode flags: | ||
216 | |||
217 | MPOL_F_STATIC_NODES: This flag specifies that the nodemask passed by | ||
218 | the user should not be remapped if the task or VMA's set of allowed | ||
219 | nodes changes after the memory policy has been defined. | ||
220 | |||
221 | Without this flag, anytime a mempolicy is rebound because of a | ||
222 | change in the set of allowed nodes, the node (Preferred) or | ||
223 | nodemask (Bind, Interleave) is remapped to the new set of | ||
224 | allowed nodes. This may result in nodes being used that were | ||
225 | previously undesired. | ||
226 | |||
227 | With this flag, if the user-specified nodes overlap with the | ||
228 | nodes allowed by the task's cpuset, then the memory policy is | ||
229 | applied to their intersection. If the two sets of nodes do not | ||
230 | overlap, the Default policy is used. | ||
231 | |||
232 | For example, consider a task that is attached to a cpuset with | ||
233 | mems 1-3 that sets an Interleave policy over the same set. If | ||
234 | the cpuset's mems change to 3-5, the Interleave will now occur | ||
235 | over nodes 3, 4, and 5. With this flag, however, since only node | ||
236 | 3 is allowed from the user's nodemask, the "interleave" only | ||
237 | occurs over that node. If no nodes from the user's nodemask are | ||
238 | now allowed, the Default behavior is used. | ||
239 | |||
240 | MPOL_F_STATIC_NODES cannot be combined with the | ||
241 | MPOL_F_RELATIVE_NODES flag. It also cannot be used for | ||
242 | MPOL_PREFERRED policies that were created with an empty nodemask | ||
243 | (local allocation). | ||
244 | |||
245 | MPOL_F_RELATIVE_NODES: This flag specifies that the nodemask passed | ||
246 | by the user will be mapped relative to the set of the task or VMA's | ||
247 | set of allowed nodes. The kernel stores the user-passed nodemask, | ||
248 | and if the allowed nodes changes, then that original nodemask will | ||
249 | be remapped relative to the new set of allowed nodes. | ||
250 | |||
251 | Without this flag (and without MPOL_F_STATIC_NODES), anytime a | ||
252 | mempolicy is rebound because of a change in the set of allowed | ||
253 | nodes, the node (Preferred) or nodemask (Bind, Interleave) is | ||
254 | remapped to the new set of allowed nodes. That remap may not | ||
255 | preserve the relative nature of the user's passed nodemask to its | ||
256 | set of allowed nodes upon successive rebinds: a nodemask of | ||
257 | 1,3,5 may be remapped to 7-9 and then to 1-3 if the set of | ||
258 | allowed nodes is restored to its original state. | ||
259 | |||
260 | With this flag, the remap is done so that the node numbers from | ||
261 | the user's passed nodemask are relative to the set of allowed | ||
262 | nodes. In other words, if nodes 0, 2, and 4 are set in the user's | ||
263 | nodemask, the policy will be effected over the first (and in the | ||
264 | Bind or Interleave case, the third and fifth) nodes in the set of | ||
265 | allowed nodes. The nodemask passed by the user represents nodes | ||
266 | relative to task or VMA's set of allowed nodes. | ||
267 | |||
268 | If the user's nodemask includes nodes that are outside the range | ||
269 | of the new set of allowed nodes (for example, node 5 is set in | ||
270 | the user's nodemask when the set of allowed nodes is only 0-3), | ||
271 | then the remap wraps around to the beginning of the nodemask and, | ||
272 | if not already set, sets the node in the mempolicy nodemask. | ||
273 | |||
274 | For example, consider a task that is attached to a cpuset with | ||
275 | mems 2-5 that sets an Interleave policy over the same set with | ||
276 | MPOL_F_RELATIVE_NODES. If the cpuset's mems change to 3-7, the | ||
277 | interleave now occurs over nodes 3,5-6. If the cpuset's mems | ||
278 | then change to 0,2-3,5, then the interleave occurs over nodes | ||
279 | 0,3,5. | ||
280 | |||
281 | Thanks to the consistent remapping, applications preparing | ||
282 | nodemasks to specify memory policies using this flag should | ||
283 | disregard their current, actual cpuset imposed memory placement | ||
284 | and prepare the nodemask as if they were always located on | ||
285 | memory nodes 0 to N-1, where N is the number of memory nodes the | ||
286 | policy is intended to manage. Let the kernel then remap to the | ||
287 | set of memory nodes allowed by the task's cpuset, as that may | ||
288 | change over time. | ||
289 | |||
290 | MPOL_F_RELATIVE_NODES cannot be combined with the | ||
291 | MPOL_F_STATIC_NODES flag. It also cannot be used for | ||
292 | MPOL_PREFERRED policies that were created with an empty nodemask | ||
293 | (local allocation). | ||
294 | |||
295 | MEMORY POLICY REFERENCE COUNTING | ||
296 | |||
297 | To resolve use/free races, struct mempolicy contains an atomic reference | ||
298 | count field. Internal interfaces, mpol_get()/mpol_put() increment and | ||
299 | decrement this reference count, respectively. mpol_put() will only free | ||
300 | the structure back to the mempolicy kmem cache when the reference count | ||
301 | goes to zero. | ||
302 | |||
303 | When a new memory policy is allocated, it's reference count is initialized | ||
304 | to '1', representing the reference held by the task that is installing the | ||
305 | new policy. When a pointer to a memory policy structure is stored in another | ||
306 | structure, another reference is added, as the task's reference will be dropped | ||
307 | on completion of the policy installation. | ||
308 | |||
309 | During run-time "usage" of the policy, we attempt to minimize atomic operations | ||
310 | on the reference count, as this can lead to cache lines bouncing between cpus | ||
311 | and NUMA nodes. "Usage" here means one of the following: | ||
312 | |||
313 | 1) querying of the policy, either by the task itself [using the get_mempolicy() | ||
314 | API discussed below] or by another task using the /proc/<pid>/numa_maps | ||
315 | interface. | ||
316 | |||
317 | 2) examination of the policy to determine the policy mode and associated node | ||
318 | or node lists, if any, for page allocation. This is considered a "hot | ||
319 | path". Note that for MPOL_BIND, the "usage" extends across the entire | ||
320 | allocation process, which may sleep during page reclaimation, because the | ||
321 | BIND policy nodemask is used, by reference, to filter ineligible nodes. | ||
322 | |||
323 | We can avoid taking an extra reference during the usages listed above as | ||
324 | follows: | ||
325 | |||
326 | 1) we never need to get/free the system default policy as this is never | ||
327 | changed nor freed, once the system is up and running. | ||
328 | |||
329 | 2) for querying the policy, we do not need to take an extra reference on the | ||
330 | target task's task policy nor vma policies because we always acquire the | ||
331 | task's mm's mmap_sem for read during the query. The set_mempolicy() and | ||
332 | mbind() APIs [see below] always acquire the mmap_sem for write when | ||
333 | installing or replacing task or vma policies. Thus, there is no possibility | ||
334 | of a task or thread freeing a policy while another task or thread is | ||
335 | querying it. | ||
336 | |||
337 | 3) Page allocation usage of task or vma policy occurs in the fault path where | ||
338 | we hold them mmap_sem for read. Again, because replacing the task or vma | ||
339 | policy requires that the mmap_sem be held for write, the policy can't be | ||
340 | freed out from under us while we're using it for page allocation. | ||
341 | |||
342 | 4) Shared policies require special consideration. One task can replace a | ||
343 | shared memory policy while another task, with a distinct mmap_sem, is | ||
344 | querying or allocating a page based on the policy. To resolve this | ||
345 | potential race, the shared policy infrastructure adds an extra reference | ||
346 | to the shared policy during lookup while holding a spin lock on the shared | ||
347 | policy management structure. This requires that we drop this extra | ||
348 | reference when we're finished "using" the policy. We must drop the | ||
349 | extra reference on shared policies in the same query/allocation paths | ||
350 | used for non-shared policies. For this reason, shared policies are marked | ||
351 | as such, and the extra reference is dropped "conditionally"--i.e., only | ||
352 | for shared policies. | ||
353 | |||
354 | Because of this extra reference counting, and because we must lookup | ||
355 | shared policies in a tree structure under spinlock, shared policies are | ||
356 | more expensive to use in the page allocation path. This is expecially | ||
357 | true for shared policies on shared memory regions shared by tasks running | ||
358 | on different NUMA nodes. This extra overhead can be avoided by always | ||
359 | falling back to task or system default policy for shared memory regions, | ||
360 | or by prefaulting the entire shared memory region into memory and locking | ||
361 | it down. However, this might not be appropriate for all applications. | ||
362 | |||
234 | MEMORY POLICY APIs | 363 | MEMORY POLICY APIs |
235 | 364 | ||
236 | Linux supports 3 system calls for controlling memory policy. These APIS | 365 | Linux supports 3 system calls for controlling memory policy. These APIS |
@@ -251,7 +380,9 @@ Set [Task] Memory Policy: | |||
251 | Set's the calling task's "task/process memory policy" to mode | 380 | Set's the calling task's "task/process memory policy" to mode |
252 | specified by the 'mode' argument and the set of nodes defined | 381 | specified by the 'mode' argument and the set of nodes defined |
253 | by 'nmask'. 'nmask' points to a bit mask of node ids containing | 382 | by 'nmask'. 'nmask' points to a bit mask of node ids containing |
254 | at least 'maxnode' ids. | 383 | at least 'maxnode' ids. Optional mode flags may be passed by |
384 | combining the 'mode' argument with the flag (for example: | ||
385 | MPOL_INTERLEAVE | MPOL_F_STATIC_NODES). | ||
255 | 386 | ||
256 | See the set_mempolicy(2) man page for more details | 387 | See the set_mempolicy(2) man page for more details |
257 | 388 | ||
@@ -303,29 +434,19 @@ MEMORY POLICIES AND CPUSETS | |||
303 | Memory policies work within cpusets as described above. For memory policies | 434 | Memory policies work within cpusets as described above. For memory policies |
304 | that require a node or set of nodes, the nodes are restricted to the set of | 435 | that require a node or set of nodes, the nodes are restricted to the set of |
305 | nodes whose memories are allowed by the cpuset constraints. If the nodemask | 436 | nodes whose memories are allowed by the cpuset constraints. If the nodemask |
306 | specified for the policy contains nodes that are not allowed by the cpuset, or | 437 | specified for the policy contains nodes that are not allowed by the cpuset and |
307 | the intersection of the set of nodes specified for the policy and the set of | 438 | MPOL_F_RELATIVE_NODES is not used, the intersection of the set of nodes |
308 | nodes with memory is the empty set, the policy is considered invalid | 439 | specified for the policy and the set of nodes with memory is used. If the |
309 | and cannot be installed. | 440 | result is the empty set, the policy is considered invalid and cannot be |
310 | 441 | installed. If MPOL_F_RELATIVE_NODES is used, the policy's nodes are mapped | |
311 | The interaction of memory policies and cpusets can be problematic for a | 442 | onto and folded into the task's set of allowed nodes as previously described. |
312 | couple of reasons: | 443 | |
313 | 444 | The interaction of memory policies and cpusets can be problematic when tasks | |
314 | 1) the memory policy APIs take physical node id's as arguments. As mentioned | 445 | in two cpusets share access to a memory region, such as shared memory segments |
315 | above, it is illegal to specify nodes that are not allowed in the cpuset. | 446 | created by shmget() of mmap() with the MAP_ANONYMOUS and MAP_SHARED flags, and |
316 | The application must query the allowed nodes using the get_mempolicy() | 447 | any of the tasks install shared policy on the region, only nodes whose |
317 | API with the MPOL_F_MEMS_ALLOWED flag to determine the allowed nodes and | 448 | memories are allowed in both cpusets may be used in the policies. Obtaining |
318 | restrict itself to those nodes. However, the resources available to a | 449 | this information requires "stepping outside" the memory policy APIs to use the |
319 | cpuset can be changed by the system administrator, or a workload manager | 450 | cpuset information and requires that one know in what cpusets other task might |
320 | application, at any time. So, a task may still get errors attempting to | 451 | be attaching to the shared region. Furthermore, if the cpusets' allowed |
321 | specify policy nodes, and must query the allowed memories again. | 452 | memory sets are disjoint, "local" allocation is the only valid policy. |
322 | |||
323 | 2) when tasks in two cpusets share access to a memory region, such as shared | ||
324 | memory segments created by shmget() of mmap() with the MAP_ANONYMOUS and | ||
325 | MAP_SHARED flags, and any of the tasks install shared policy on the region, | ||
326 | only nodes whose memories are allowed in both cpusets may be used in the | ||
327 | policies. Obtaining this information requires "stepping outside" the | ||
328 | memory policy APIs to use the cpuset information and requires that one | ||
329 | know in what cpusets other task might be attaching to the shared region. | ||
330 | Furthermore, if the cpusets' allowed memory sets are disjoint, "local" | ||
331 | allocation is the only valid policy. | ||
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c index 22d7e3e4d60c..d3ce295bffac 100644 --- a/Documentation/vm/slabinfo.c +++ b/Documentation/vm/slabinfo.c | |||
@@ -31,7 +31,7 @@ struct slabinfo { | |||
31 | int hwcache_align, object_size, objs_per_slab; | 31 | int hwcache_align, object_size, objs_per_slab; |
32 | int sanity_checks, slab_size, store_user, trace; | 32 | int sanity_checks, slab_size, store_user, trace; |
33 | int order, poison, reclaim_account, red_zone; | 33 | int order, poison, reclaim_account, red_zone; |
34 | unsigned long partial, objects, slabs; | 34 | unsigned long partial, objects, slabs, objects_partial, objects_total; |
35 | unsigned long alloc_fastpath, alloc_slowpath; | 35 | unsigned long alloc_fastpath, alloc_slowpath; |
36 | unsigned long free_fastpath, free_slowpath; | 36 | unsigned long free_fastpath, free_slowpath; |
37 | unsigned long free_frozen, free_add_partial, free_remove_partial; | 37 | unsigned long free_frozen, free_add_partial, free_remove_partial; |
@@ -540,7 +540,8 @@ void slabcache(struct slabinfo *s) | |||
540 | return; | 540 | return; |
541 | 541 | ||
542 | store_size(size_str, slab_size(s)); | 542 | store_size(size_str, slab_size(s)); |
543 | snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs, s->partial, s->cpu_slabs); | 543 | snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs, |
544 | s->partial, s->cpu_slabs); | ||
544 | 545 | ||
545 | if (!line++) | 546 | if (!line++) |
546 | first_line(); | 547 | first_line(); |
@@ -776,7 +777,6 @@ void totals(void) | |||
776 | unsigned long used; | 777 | unsigned long used; |
777 | unsigned long long wasted; | 778 | unsigned long long wasted; |
778 | unsigned long long objwaste; | 779 | unsigned long long objwaste; |
779 | long long objects_in_partial_slabs; | ||
780 | unsigned long percentage_partial_slabs; | 780 | unsigned long percentage_partial_slabs; |
781 | unsigned long percentage_partial_objs; | 781 | unsigned long percentage_partial_objs; |
782 | 782 | ||
@@ -790,18 +790,11 @@ void totals(void) | |||
790 | wasted = size - used; | 790 | wasted = size - used; |
791 | objwaste = s->slab_size - s->object_size; | 791 | objwaste = s->slab_size - s->object_size; |
792 | 792 | ||
793 | objects_in_partial_slabs = s->objects - | ||
794 | (s->slabs - s->partial - s ->cpu_slabs) * | ||
795 | s->objs_per_slab; | ||
796 | |||
797 | if (objects_in_partial_slabs < 0) | ||
798 | objects_in_partial_slabs = 0; | ||
799 | |||
800 | percentage_partial_slabs = s->partial * 100 / s->slabs; | 793 | percentage_partial_slabs = s->partial * 100 / s->slabs; |
801 | if (percentage_partial_slabs > 100) | 794 | if (percentage_partial_slabs > 100) |
802 | percentage_partial_slabs = 100; | 795 | percentage_partial_slabs = 100; |
803 | 796 | ||
804 | percentage_partial_objs = objects_in_partial_slabs * 100 | 797 | percentage_partial_objs = s->objects_partial * 100 |
805 | / s->objects; | 798 | / s->objects; |
806 | 799 | ||
807 | if (percentage_partial_objs > 100) | 800 | if (percentage_partial_objs > 100) |
@@ -823,8 +816,8 @@ void totals(void) | |||
823 | min_objects = s->objects; | 816 | min_objects = s->objects; |
824 | if (used < min_used) | 817 | if (used < min_used) |
825 | min_used = used; | 818 | min_used = used; |
826 | if (objects_in_partial_slabs < min_partobj) | 819 | if (s->objects_partial < min_partobj) |
827 | min_partobj = objects_in_partial_slabs; | 820 | min_partobj = s->objects_partial; |
828 | if (percentage_partial_slabs < min_ppart) | 821 | if (percentage_partial_slabs < min_ppart) |
829 | min_ppart = percentage_partial_slabs; | 822 | min_ppart = percentage_partial_slabs; |
830 | if (percentage_partial_objs < min_ppartobj) | 823 | if (percentage_partial_objs < min_ppartobj) |
@@ -848,8 +841,8 @@ void totals(void) | |||
848 | max_objects = s->objects; | 841 | max_objects = s->objects; |
849 | if (used > max_used) | 842 | if (used > max_used) |
850 | max_used = used; | 843 | max_used = used; |
851 | if (objects_in_partial_slabs > max_partobj) | 844 | if (s->objects_partial > max_partobj) |
852 | max_partobj = objects_in_partial_slabs; | 845 | max_partobj = s->objects_partial; |
853 | if (percentage_partial_slabs > max_ppart) | 846 | if (percentage_partial_slabs > max_ppart) |
854 | max_ppart = percentage_partial_slabs; | 847 | max_ppart = percentage_partial_slabs; |
855 | if (percentage_partial_objs > max_ppartobj) | 848 | if (percentage_partial_objs > max_ppartobj) |
@@ -864,7 +857,7 @@ void totals(void) | |||
864 | 857 | ||
865 | total_objects += s->objects; | 858 | total_objects += s->objects; |
866 | total_used += used; | 859 | total_used += used; |
867 | total_partobj += objects_in_partial_slabs; | 860 | total_partobj += s->objects_partial; |
868 | total_ppart += percentage_partial_slabs; | 861 | total_ppart += percentage_partial_slabs; |
869 | total_ppartobj += percentage_partial_objs; | 862 | total_ppartobj += percentage_partial_objs; |
870 | 863 | ||
@@ -1160,6 +1153,8 @@ void read_slab_dir(void) | |||
1160 | slab->hwcache_align = get_obj("hwcache_align"); | 1153 | slab->hwcache_align = get_obj("hwcache_align"); |
1161 | slab->object_size = get_obj("object_size"); | 1154 | slab->object_size = get_obj("object_size"); |
1162 | slab->objects = get_obj("objects"); | 1155 | slab->objects = get_obj("objects"); |
1156 | slab->objects_partial = get_obj("objects_partial"); | ||
1157 | slab->objects_total = get_obj("objects_total"); | ||
1163 | slab->objs_per_slab = get_obj("objs_per_slab"); | 1158 | slab->objs_per_slab = get_obj("objs_per_slab"); |
1164 | slab->order = get_obj("order"); | 1159 | slab->order = get_obj("order"); |
1165 | slab->partial = get_obj("partial"); | 1160 | slab->partial = get_obj("partial"); |